diff --git a/results/MicroBenchmarks/ImageProcessing/BilateralFiltering/CMakeFiles/BilateralFilter.dir/bilateralFilterKernel.s b/results/MicroBenchmarks/ImageProcessing/BilateralFiltering/CMakeFiles/BilateralFilter.dir/bilateralFilterKernel.s index c2dcac8b..cefa2ab0 100644 --- a/results/MicroBenchmarks/ImageProcessing/BilateralFiltering/CMakeFiles/BilateralFilter.dir/bilateralFilterKernel.s +++ b/results/MicroBenchmarks/ImageProcessing/BilateralFiltering/CMakeFiles/BilateralFilter.dir/bilateralFilterKernel.s @@ -1,16 +1,6 @@ .file "bilateralFilterKernel.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function bilateralFilterKernel -.LCPI0_0: - .dword 0x4072000000000000 # double 288 -.LCPI0_1: - .dword 0x408c463abeccb2bb # double 904.77868423386042 -.LCPI0_2: - .dword 0xbf60000000000000 # double -0.001953125 -.LCPI0_3: - .dword 0x409921fb54442d18 # double 1608.4954386379741 .text - .globl bilateralFilterKernel + .globl bilateralFilterKernel # -- Begin function bilateralFilterKernel .p2align 5 .type bilateralFilterKernel,@function bilateralFilterKernel: # @bilateralFilterKernel @@ -38,32 +28,40 @@ bilateralFilterKernel: # @bilateralFilterKernel move $s8, $a4 bstrpick.d $a4, $a4, 31, 31 add.w $a4, $s8, $a4 - srai.d $s5, $a4, 1 - sub.w $a0, $a0, $s5 + srai.d $s0, $a4, 1 + sub.w $a0, $a0, $s0 st.d $a3, $sp, 16 # 8-byte Folded Spill st.d $a2, $sp, 32 # 8-byte Folded Spill st.d $a0, $sp, 24 # 8-byte Folded Spill - bge $s5, $a0, .LBB0_11 + bge $s0, $a0, .LBB0_11 # %bb.1: # %.preheader77.lr.ph - sub.w $a0, $a1, $s5 + sub.w $a0, $a1, $s0 st.d $a0, $sp, 56 # 8-byte Folded Spill - bge $s5, $a0, .LBB0_11 + bge $s0, $a0, .LBB0_11 # %bb.2: # %.preheader77.lr.ph blez $s8, .LBB0_11 # %bb.3: # %.preheader77.us.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_1) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI0_2) - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI0_3) bstrpick.d $a0, $a1, 31, 0 st.d $a0, $sp, 8 # 8-byte Folded Spill - movgr2fr.d $fs4, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + lu32i.d $a0, 131072 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -267061 + ori $a0, $a0, 699 + lu32i.d $a0, -244166 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs2, $a0 + lu52i.d $a0, $zero, -1034 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1033 + movgr2fr.d $fs4, $a0 ld.d $s7, $sp, 32 # 8-byte Folded Reload - move $a3, $s5 + move $a3, $s0 .p2align 4, , 16 .LBB0_4: # %.preheader77.us.us # =>This Loop Header: Depth=1 @@ -81,7 +79,7 @@ bilateralFilterKernel: # @bilateralFilterKernel alsl.d $a0, $a0, $a1, 2 st.d $a0, $sp, 64 # 8-byte Folded Spill st.d $s7, $sp, 48 # 8-byte Folded Spill - move $s0, $s5 + move $s5, $s0 .p2align 4, , 16 .LBB0_5: # %.preheader76.us.us.us # Parent Loop BB0_4 Depth=1 @@ -90,21 +88,21 @@ bilateralFilterKernel: # @bilateralFilterKernel # Child Loop BB0_7 Depth 4 move $s3, $zero ld.d $a0, $sp, 72 # 8-byte Folded Reload - alsl.d $s1, $s0, $a0, 2 + alsl.d $s1, $s5, $a0, 2 st.d $s7, $sp, 80 # 8-byte Folded Spill - fmov.d $fs5, $fs4 - fmov.d $fs6, $fs4 + fmov.d $fs5, $fs0 + fmov.d $fs6, $fs0 .p2align 4, , 16 .LBB0_6: # %.preheader.us.us.us.us # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_5 Depth=2 # => This Loop Header: Depth=3 # Child Loop BB0_7 Depth 4 - sub.d $a0, $s5, $s3 + sub.d $a0, $s0, $s3 mul.d $s4, $a0, $a0 move $fp, $s8 move $s6, $s7 - move $s2, $s5 + move $s2, $s0 .p2align 4, , 16 .LBB0_7: # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_5 Depth=2 @@ -117,10 +115,10 @@ bilateralFilterKernel: # @bilateralFilterKernel add.d $a0, $a1, $a0 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 - fdiv.d $fa0, $fa0, $fs0 + fdiv.d $fa0, $fa0, $fs1 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 - fdiv.d $fs7, $fa0, $fs1 + fdiv.d $fs7, $fa0, $fs2 mul.d $a0, $s2, $s2 add.d $a0, $a0, $s4 bstrpick.d $a0, $a0, 31, 0 @@ -130,11 +128,11 @@ bilateralFilterKernel: # @bilateralFilterKernel fcvt.s.d $fa0, $fa0 fmul.s $fa0, $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fmul.d $fa0, $fa0, $fs2 + fmul.d $fa0, $fa0, $fs3 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 ld.w $a0, $s6, 0 - fdiv.d $fa0, $fa0, $fs3 + fdiv.d $fa0, $fa0, $fs4 fmul.d $fa0, $fs7, $fa0 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 @@ -155,14 +153,14 @@ bilateralFilterKernel: # @bilateralFilterKernel fdiv.d $fa0, $fs6, $fs5 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - slli.d $a1, $s0, 2 + slli.d $a1, $s5, 2 ld.d $a2, $sp, 64 # 8-byte Folded Reload stx.w $a0, $a2, $a1 - addi.d $s0, $s0, 1 + addi.d $s5, $s5, 1 ld.d $s7, $sp, 80 # 8-byte Folded Reload addi.d $s7, $s7, 4 ld.d $a0, $sp, 56 # 8-byte Folded Reload - bne $s0, $a0, .LBB0_5 + bne $s5, $a0, .LBB0_5 # %bb.10: # %._crit_edge.split.us.us.us # in Loop: Header=BB0_4 Depth=1 ld.d $a3, $sp, 40 # 8-byte Folded Reload diff --git a/results/MicroBenchmarks/ImageProcessing/Blur/CMakeFiles/blur.dir/gaussianBlurKernel.s b/results/MicroBenchmarks/ImageProcessing/Blur/CMakeFiles/blur.dir/gaussianBlurKernel.s index d585bb1e..92e786df 100644 --- a/results/MicroBenchmarks/ImageProcessing/Blur/CMakeFiles/blur.dir/gaussianBlurKernel.s +++ b/results/MicroBenchmarks/ImageProcessing/Blur/CMakeFiles/blur.dir/gaussianBlurKernel.s @@ -1,14 +1,6 @@ .file "gaussianBlurKernel.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function gaussianBlurKernel -.LCPI0_0: - .word 0x43220000 # float 162 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0x407fcf0216a64912 # double 508.93800988154646 .text - .globl gaussianBlurKernel + .globl gaussianBlurKernel # -- Begin function gaussianBlurKernel .p2align 5 .type gaussianBlurKernel,@function gaussianBlurKernel: # @gaussianBlurKernel @@ -44,11 +36,14 @@ gaussianBlurKernel: # @gaussianBlurKernel movgr2fr.w $fs1, $zero addi.w $s3, $zero, -4 addi.d $s4, $sp, 40 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fs4, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fs5, $a0, %pc_lo12(.LCPI0_1) addi.w $s5, $zero, -16 + lu12i.w $a0, 274976 + movgr2fr.w $fs4, $a0 + lu12i.w $a0, 92772 + ori $a0, $a0, 2322 + lu32i.d $a0, -12542 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fs5, $a0 addi.w $s6, $zero, -9 ori $s7, $zero, 5 move $s8, $s3 diff --git a/results/MicroBenchmarks/ImageProcessing/Dither/CMakeFiles/Dither.dir/orderedDitherKernel.s b/results/MicroBenchmarks/ImageProcessing/Dither/CMakeFiles/Dither.dir/orderedDitherKernel.s index fa9e6929..0c12d006 100644 --- a/results/MicroBenchmarks/ImageProcessing/Dither/CMakeFiles/Dither.dir/orderedDitherKernel.s +++ b/results/MicroBenchmarks/ImageProcessing/Dither/CMakeFiles/Dither.dir/orderedDitherKernel.s @@ -1,17 +1,13 @@ .file "orderedDitherKernel.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function orderedDitherKernel -.LCPI0_0: - .dword 0x406fe00000000000 # double 255 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI0_1: + .p2align 4, 0x0 # -- Begin function orderedDitherKernel +.LCPI0_0: .dword 2 # 0x2 .dword 3 # 0x3 -.LCPI0_2: +.LCPI0_1: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI0_3: +.LCPI0_2: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -46,11 +42,10 @@ orderedDitherKernel: # @orderedDitherKernel sltui $t4, $a1, 4 or $t3, $t4, $t3 andi $t3, $t3, 1 - pcalau12i $t4, %pc_hi20(.LCPI0_0) - fld.d $fa0, $t4, %pc_lo12(.LCPI0_0) ori $t4, $zero, 0 lu32i.d $t4, -8192 lu52i.d $t4, $t4, 1030 + movgr2fr.d $fa0, $t4 vreplgr2vr.d $vr1, $t4 move $t4, $a4 b .LBB0_4 @@ -346,8 +341,8 @@ orderedDitherKernel: # @orderedDitherKernel move $t0, $zero ori $t1, $zero, 4 ori $t2, $zero, 255 - pcalau12i $t3, %pc_hi20(.LCPI0_3) - vld $vr0, $t3, %pc_lo12(.LCPI0_3) + pcalau12i $t3, %pc_hi20(.LCPI0_2) + vld $vr0, $t3, %pc_lo12(.LCPI0_2) vreplgr2vr.w $vr1, $a5 lu32i.d $a5, 0 vrepli.w $vr2, 3 @@ -455,10 +450,10 @@ orderedDitherKernel: # @orderedDitherKernel addi.d $a5, $a5, %pc_lo12(.L__const.orderedDitherKernel.dither.3) move $a6, $zero addi.w $t0, $zero, -5 + pcalau12i $t1, %pc_hi20(.LCPI0_0) + vld $vr0, $t1, %pc_lo12(.LCPI0_0) pcalau12i $t1, %pc_hi20(.LCPI0_1) - vld $vr0, $t1, %pc_lo12(.LCPI0_1) - pcalau12i $t1, %pc_hi20(.LCPI0_2) - vld $vr1, $t1, %pc_lo12(.LCPI0_2) + vld $vr1, $t1, %pc_lo12(.LCPI0_1) ori $t1, $zero, 255 vrepli.d $vr2, 7 vrepli.w $vr3, 255 diff --git a/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/LambdaSubsetAbenchmarks.s b/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/LambdaSubsetAbenchmarks.s index ee0866de..8129fd7a 100644 --- a/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/LambdaSubsetAbenchmarks.s +++ b/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/LambdaSubsetAbenchmarks.s @@ -291,37 +291,32 @@ _ZL23BM_PRESSURE_CALC_LAMBDARN9benchmark5StateE: # @_ZL23BM_PRESSURE_CALC_LAMBDA .size _ZL23BM_PRESSURE_CALC_LAMBDARN9benchmark5StateE, .Lfunc_end0-_ZL23BM_PRESSURE_CALC_LAMBDARN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE -.LCPI1_0: - .dword 0x3c18987cee7f439d # double 3.333333E-19 -.LCPI1_1: - .dword 0x3842e7922a37d1a0 # double 1.1111110000000001E-37 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE .type _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE,@function _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE .cfi_startproc # %bb.0: # %_ZN9benchmark5State13StateIteratorC2EPS0_.exit - addi.d $sp, $sp, -528 - .cfi_def_cfa_offset 528 - st.d $ra, $sp, 520 # 8-byte Folded Spill - st.d $fp, $sp, 512 # 8-byte Folded Spill - st.d $s0, $sp, 504 # 8-byte Folded Spill - st.d $s1, $sp, 496 # 8-byte Folded Spill - st.d $s2, $sp, 488 # 8-byte Folded Spill - st.d $s3, $sp, 480 # 8-byte Folded Spill - st.d $s4, $sp, 472 # 8-byte Folded Spill - st.d $s5, $sp, 464 # 8-byte Folded Spill - st.d $s6, $sp, 456 # 8-byte Folded Spill - st.d $s7, $sp, 448 # 8-byte Folded Spill - st.d $s8, $sp, 440 # 8-byte Folded Spill - fst.d $fs0, $sp, 432 # 8-byte Folded Spill - fst.d $fs1, $sp, 424 # 8-byte Folded Spill - fst.d $fs2, $sp, 416 # 8-byte Folded Spill - fst.d $fs3, $sp, 408 # 8-byte Folded Spill - fst.d $fs4, $sp, 400 # 8-byte Folded Spill - fst.d $fs5, $sp, 392 # 8-byte Folded Spill + addi.d $sp, $sp, -544 + .cfi_def_cfa_offset 544 + st.d $ra, $sp, 536 # 8-byte Folded Spill + st.d $fp, $sp, 528 # 8-byte Folded Spill + st.d $s0, $sp, 520 # 8-byte Folded Spill + st.d $s1, $sp, 512 # 8-byte Folded Spill + st.d $s2, $sp, 504 # 8-byte Folded Spill + st.d $s3, $sp, 496 # 8-byte Folded Spill + st.d $s4, $sp, 488 # 8-byte Folded Spill + st.d $s5, $sp, 480 # 8-byte Folded Spill + st.d $s6, $sp, 472 # 8-byte Folded Spill + st.d $s7, $sp, 464 # 8-byte Folded Spill + st.d $s8, $sp, 456 # 8-byte Folded Spill + fst.d $fs0, $sp, 448 # 8-byte Folded Spill + fst.d $fs1, $sp, 440 # 8-byte Folded Spill + fst.d $fs2, $sp, 432 # 8-byte Folded Spill + fst.d $fs3, $sp, 424 # 8-byte Folded Spill + fst.d $fs4, $sp, 416 # 8-byte Folded Spill + fst.d $fs5, $sp, 408 # 8-byte Folded Spill + fst.d $fs6, $sp, 400 # 8-byte Folded Spill + fst.d $fs7, $sp, 392 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -339,6 +334,8 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b .cfi_offset 59, -120 .cfi_offset 60, -128 .cfi_offset 61, -136 + .cfi_offset 62, -144 + .cfi_offset 63, -152 move $s0, $a0 pcaddu18i $ra, %call36(_Z11getLoopDatav) jirl $ra, $ra, 0 @@ -347,11 +344,11 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b pcaddu18i $ra, %call36(_Z8loopInitj) jirl $ra, $ra, 0 ld.d $a0, $fp, 8 - st.d $a0, $sp, 176 # 8-byte Folded Spill + st.d $a0, $sp, 184 # 8-byte Folded Spill ld.d $a0, $fp, 16 - st.d $a0, $sp, 168 # 8-byte Folded Spill + st.d $a0, $sp, 176 # 8-byte Folded Spill ld.d $a0, $fp, 24 - st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $a0, $sp, 168 # 8-byte Folded Spill ld.d $s4, $fp, 32 ld.d $s5, $fp, 40 ld.d $s6, $fp, 48 @@ -368,7 +365,7 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b ld.d $a0, $fp, 112 st.d $a0, $sp, 360 # 8-byte Folded Spill ld.d $a0, $fp, 120 - st.d $a0, $sp, 344 # 8-byte Folded Spill + st.d $a0, $sp, 336 # 8-byte Folded Spill fld.d $fs0, $fp, 384 fld.d $fa0, $fp, 392 vst $vr0, $sp, 144 # 16-byte Folded Spill @@ -382,13 +379,13 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b jirl $ra, $ra, 0 vld $vr9, $sp, 128 # 16-byte Folded Reload vld $vr8, $sp, 144 # 16-byte Folded Reload - ld.d $t8, $sp, 160 # 8-byte Folded Reload - ld.d $t7, $sp, 168 # 8-byte Folded Reload - ld.d $t6, $sp, 176 # 8-byte Folded Reload + ld.d $t8, $sp, 168 # 8-byte Folded Reload + ld.d $t7, $sp, 176 # 8-byte Folded Reload + ld.d $t6, $sp, 184 # 8-byte Folded Reload move $t5, $s0 - bnez $fp, .LBB1_52 + bnez $fp, .LBB1_51 # %bb.1: # %_ZN9benchmark5State13StateIteratorC2EPS0_.exit - beqz $s3, .LBB1_52 + beqz $s3, .LBB1_51 # %bb.2: # %.lr.ph200 move $ra, $s4 ld.d $a0, $t5, 32 @@ -400,41 +397,41 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b alsl.d $a4, $s0, $s5, 3 alsl.d $a5, $s0, $s7, 3 alsl.d $a6, $s0, $s2, 3 - st.d $s5, $sp, 200 # 8-byte Folded Spill + st.d $s5, $sp, 208 # 8-byte Folded Spill alsl.d $a7, $s0, $s6, 3 st.d $s1, $sp, 112 # 8-byte Folded Spill sub.d $t0, $t6, $t7 st.d $s6, $sp, 120 # 8-byte Folded Spill sub.d $t1, $t6, $t8 - st.d $s7, $sp, 192 # 8-byte Folded Spill - ld.d $t2, $sp, 200 # 8-byte Folded Reload + st.d $s7, $sp, 200 # 8-byte Folded Spill + ld.d $t2, $sp, 208 # 8-byte Folded Reload sub.d $t2, $t6, $t2 - st.d $s8, $sp, 184 # 8-byte Folded Spill - ld.d $t3, $sp, 192 # 8-byte Folded Reload + st.d $s8, $sp, 192 # 8-byte Folded Spill + ld.d $t3, $sp, 200 # 8-byte Folded Reload sub.d $t3, $t6, $t3 st.d $s2, $sp, 104 # 8-byte Folded Spill - ld.d $t4, $sp, 184 # 8-byte Folded Reload + ld.d $t4, $sp, 192 # 8-byte Folded Reload sub.d $t4, $t6, $t4 sltui $t0, $t0, 32 sltui $t1, $t1, 32 or $t0, $t0, $t1 sltui $t1, $t2, 32 - ld.d $t2, $sp, 192 # 8-byte Folded Reload + ld.d $t2, $sp, 200 # 8-byte Folded Reload or $t0, $t0, $t1 sltui $t1, $t3, 32 - ld.d $t3, $sp, 184 # 8-byte Folded Reload + ld.d $t3, $sp, 192 # 8-byte Folded Reload or $t0, $t0, $t1 sltui $t1, $t4, 32 ld.d $t4, $sp, 104 # 8-byte Folded Reload or $t0, $t0, $t1 bstrpick.d $t1, $a0, 30, 2 slli.d $t1, $t1, 2 - st.d $t1, $sp, 336 # 8-byte Folded Spill + st.d $t1, $sp, 328 # 8-byte Folded Spill sltu $a3, $t6, $a3 sltu $t1, $t8, $a1 and $a3, $a3, $t1 sltu $a4, $t6, $a4 - ld.d $t1, $sp, 200 # 8-byte Folded Reload + ld.d $t1, $sp, 208 # 8-byte Folded Reload sltu $t1, $t1, $a1 and $a4, $a4, $t1 ld.d $t1, $sp, 120 # 8-byte Folded Reload @@ -449,41 +446,41 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b and $a4, $a4, $a5 or $a3, $a3, $a4 sltu $a4, $t6, $a7 - ld.d $a7, $sp, 200 # 8-byte Folded Reload + ld.d $a7, $sp, 208 # 8-byte Folded Reload sltu $a5, $t1, $a1 and $a4, $a4, $a5 or $a3, $a3, $a4 - st.d $a3, $sp, 296 # 8-byte Folded Spill + st.d $a3, $sp, 304 # 8-byte Folded Spill bstrpick.d $a3, $a0, 30, 1 slli.d $a3, $a3, 1 - st.d $a3, $sp, 288 # 8-byte Folded Spill + st.d $a3, $sp, 296 # 8-byte Folded Spill sltu $a2, $t6, $a2 sltu $a1, $t3, $a1 and $a1, $a2, $a1 - st.d $a1, $sp, 280 # 8-byte Folded Spill + st.d $a1, $sp, 288 # 8-byte Folded Spill vreplvei.d $vr10, $vr8, 0 vreplvei.d $vr11, $vr9, 0 addi.d $a1, $t7, 16 - st.d $a1, $sp, 240 # 8-byte Folded Spill + st.d $a1, $sp, 248 # 8-byte Folded Spill addi.d $a1, $t6, 16 - st.d $a1, $sp, 272 # 8-byte Folded Spill + st.d $a1, $sp, 280 # 8-byte Folded Spill addi.d $a1, $t8, 16 - st.d $a1, $sp, 232 # 8-byte Folded Spill + st.d $a1, $sp, 240 # 8-byte Folded Spill addi.d $a1, $t3, 16 - st.d $a1, $sp, 264 # 8-byte Folded Spill + st.d $a1, $sp, 272 # 8-byte Folded Spill addi.d $a1, $a7, 16 - st.d $a1, $sp, 224 # 8-byte Folded Spill + st.d $a1, $sp, 232 # 8-byte Folded Spill addi.d $a1, $t2, 16 - st.d $a1, $sp, 216 # 8-byte Folded Spill + st.d $a1, $sp, 224 # 8-byte Folded Spill addi.w $a0, $a0, 0 - st.d $a0, $sp, 304 # 8-byte Folded Spill + st.d $a0, $sp, 312 # 8-byte Folded Spill st.d $s0, $sp, 352 # 8-byte Folded Spill sltui $a0, $s0, 4 - st.d $a0, $sp, 256 # 8-byte Folded Spill + st.d $a0, $sp, 264 # 8-byte Folded Spill or $a0, $a0, $t0 ld.d $t0, $sp, 112 # 8-byte Folded Reload andi $a0, $a0, 1 - st.d $a0, $sp, 248 # 8-byte Folded Spill + st.d $a0, $sp, 256 # 8-byte Folded Spill vldi $vr12, -800 vldi $vr13, -928 movgr2fr.d $fs2, $zero @@ -492,19 +489,30 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b vldi $vr16, -864 vldi $vr17, -996 vldi $vr18, -872 + lu12i.w $a0, -71692 + ori $a0, $a0, 925 + lu32i.d $a0, -485252 + lu52i.d $a0, $a0, 961 + st.d $a0, $sp, 344 # 8-byte Folded Spill + movgr2fr.d $fs3, $a0 + lu12i.w $a0, 172925 + ori $a0, $a0, 416 + lu32i.d $a0, 190354 + lu52i.d $a0, $a0, 900 + movgr2fr.d $fs4, $a0 lu52i.d $a0, $zero, 1022 vreplgr2vr.d $vr19, $a0 ori $a0, $zero, 0 lu32i.d $a0, -524288 lu52i.d $a0, $a0, 1024 - st.d $a0, $sp, 208 # 8-byte Folded Spill + st.d $a0, $sp, 216 # 8-byte Folded Spill vldi $vr20, -912 b .LBB1_4 .p2align 4, , 16 .LBB1_3: # %"_Z6forallIZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateEE3$_5Ev9simd_execiiT_.exit" # in Loop: Header=BB1_4 Depth=1 addi.d $a6, $a6, -1 - beqz $a6, .LBB1_52 + beqz $a6, .LBB1_51 .LBB1_4: # =>This Loop Header: Depth=1 # Child Loop BB1_8 Depth 2 # Child Loop BB1_11 Depth 2 @@ -515,12 +523,12 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b # Child Loop BB1_33 Depth 2 # Child Loop BB1_37 Depth 2 # Child Loop BB1_45 Depth 2 - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload blez $a0, .LBB1_3 # %bb.5: # %.lr.ph.preheader # in Loop: Header=BB1_4 Depth=1 - st.d $a6, $sp, 312 # 8-byte Folded Spill - ld.d $a0, $sp, 248 # 8-byte Folded Reload + st.d $a6, $sp, 320 # 8-byte Folded Spill + ld.d $a0, $sp, 256 # 8-byte Folded Reload beqz $a0, .LBB1_7 # %bb.6: # in Loop: Header=BB1_4 Depth=1 move $a6, $zero @@ -528,13 +536,13 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b .p2align 4, , 16 .LBB1_7: # %vector.body290.preheader # in Loop: Header=BB1_4 Depth=1 - ld.d $a0, $sp, 216 # 8-byte Folded Reload - ld.d $a1, $sp, 224 # 8-byte Folded Reload - ld.d $a2, $sp, 264 # 8-byte Folded Reload - ld.d $a3, $sp, 232 # 8-byte Folded Reload - ld.d $a4, $sp, 272 # 8-byte Folded Reload - ld.d $a5, $sp, 240 # 8-byte Folded Reload - ld.d $a6, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a1, $sp, 232 # 8-byte Folded Reload + ld.d $a2, $sp, 272 # 8-byte Folded Reload + ld.d $a3, $sp, 240 # 8-byte Folded Reload + ld.d $a4, $sp, 280 # 8-byte Folded Reload + ld.d $a5, $sp, 248 # 8-byte Folded Reload + ld.d $a6, $sp, 328 # 8-byte Folded Reload lu52i.d $fp, $zero, -1026 .p2align 4, , 16 .LBB1_8: # %vector.body290 @@ -571,7 +579,7 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b bnez $a6, .LBB1_8 # %bb.9: # %middle.block303 # in Loop: Header=BB1_4 Depth=1 - ld.d $a1, $sp, 336 # 8-byte Folded Reload + ld.d $a1, $sp, 328 # 8-byte Folded Reload move $a6, $a1 ld.d $a0, $sp, 352 # 8-byte Folded Reload move $fp, $a0 @@ -584,6 +592,7 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b ld.d $s6, $sp, 368 # 8-byte Folded Reload ld.d $s7, $sp, 360 # 8-byte Folded Reload move $s8, $t1 + ld.d $a2, $sp, 344 # 8-byte Folded Reload beq $a0, $a1, .LBB1_16 .LBB1_10: # %.lr.ph.preheader308 # in Loop: Header=BB1_4 Depth=1 @@ -627,10 +636,11 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b ld.d $s6, $sp, 368 # 8-byte Folded Reload ld.d $s7, $sp, 360 # 8-byte Folded Reload move $s8, $t1 + ld.d $a2, $sp, 344 # 8-byte Folded Reload b .LBB1_16 .p2align 4, , 16 .LBB1_13: # in Loop: Header=BB1_16 Depth=2 - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) + movgr2fr.d $fa0, $a2 .LBB1_14: # in Loop: Header=BB1_16 Depth=2 fld.d $fa1, $s6, 0 fld.d $fa2, $s7, 0 @@ -654,8 +664,6 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b # => This Inner Loop Header: Depth=2 fld.d $fa0, $s0, 0 fcmp.clt.d $fcc0, $fs2, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - pcalau12i $a1, %pc_hi20(.LCPI1_1) fmov.d $fa0, $fs2 bcnez $fcc0, .LBB1_15 # %bb.17: # in Loop: Header=BB1_16 Depth=2 @@ -664,15 +672,14 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b frecip.d $fa0, $fa0 fld.d $fa1, $s2, 0 fld.d $fa2, $s4, 0 - fld.d $fa3, $s3, 0 - fld.d $fa4, $s5, 0 + fld.d $fa3, $s5, 0 + fld.d $fa4, $s3, 0 fmul.d $fa0, $fa0, $fa0 fmul.d $fa0, $fa0, $fa2 - fld.d $fa2, $a1, %pc_lo12(.LCPI1_1) - fmul.d $fa0, $fa0, $fa4 - fmadd.d $fa0, $fa1, $fa3, $fa0 + fmul.d $fa0, $fa0, $fa3 + fmadd.d $fa0, $fa1, $fa4, $fa0 fdiv.d $fa1, $fa0, $fs0 - fcmp.cle.d $fcc0, $fa1, $fa2 + fcmp.cle.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB1_13 # %bb.18: # in Loop: Header=BB1_16 Depth=2 fsqrt.d $fa0, $fa1 @@ -686,14 +693,11 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b vst $vr10, $sp, 64 # 16-byte Folded Spill vst $vr11, $sp, 48 # 16-byte Folded Spill vst $vr19, $sp, 32 # 16-byte Folded Spill - st.d $a0, $sp, 328 # 8-byte Folded Spill - st.d $a1, $sp, 320 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $a1, $sp, 320 # 8-byte Folded Reload - ld.d $a0, $sp, 328 # 8-byte Folded Reload vldi $vr20, -912 vld $vr19, $sp, 32 # 16-byte Folded Reload + ld.d $a2, $sp, 344 # 8-byte Folded Reload vldi $vr18, -872 vldi $vr17, -996 vldi $vr16, -864 @@ -707,24 +711,22 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b vld $vr8, $sp, 144 # 16-byte Folded Reload ld.d $t4, $sp, 104 # 8-byte Folded Reload ld.d $t0, $sp, 112 # 8-byte Folded Reload - ld.d $t3, $sp, 184 # 8-byte Folded Reload - ld.d $t2, $sp, 192 # 8-byte Folded Reload + ld.d $t3, $sp, 192 # 8-byte Folded Reload + ld.d $t2, $sp, 200 # 8-byte Folded Reload ld.d $t1, $sp, 120 # 8-byte Folded Reload - ld.d $a7, $sp, 200 # 8-byte Folded Reload + ld.d $a7, $sp, 208 # 8-byte Folded Reload ld.d $ra, $sp, 88 # 8-byte Folded Reload - ld.d $t8, $sp, 160 # 8-byte Folded Reload - ld.d $t7, $sp, 168 # 8-byte Folded Reload - ld.d $t6, $sp, 176 # 8-byte Folded Reload + ld.d $t8, $sp, 168 # 8-byte Folded Reload + ld.d $t7, $sp, 176 # 8-byte Folded Reload + ld.d $t6, $sp, 184 # 8-byte Folded Reload ld.d $t5, $sp, 96 # 8-byte Folded Reload b .LBB1_14 .p2align 4, , 16 .LBB1_20: # %"_Z6forallIZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateEE3$_1Ev9simd_execiiT_.exit.preheader" # in Loop: Header=BB1_4 Depth=1 - st.d $a1, $sp, 320 # 8-byte Folded Spill - st.d $a0, $sp, 328 # 8-byte Folded Spill ld.d $s7, $sp, 352 # 8-byte Folded Reload sltui $a0, $s7, 2 - ld.d $a1, $sp, 296 # 8-byte Folded Reload + ld.d $a1, $sp, 304 # 8-byte Folded Reload or $a0, $a0, $a1 andi $a0, $a0, 1 beqz $a0, .LBB1_22 @@ -740,9 +742,9 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b move $a3, $t2 move $a4, $t4 move $a5, $t1 - ld.d $a6, $sp, 288 # 8-byte Folded Reload + ld.d $a6, $sp, 296 # 8-byte Folded Reload lu52i.d $fp, $zero, -1023 - ld.d $s0, $sp, 208 # 8-byte Folded Reload + ld.d $s0, $sp, 216 # 8-byte Folded Reload .p2align 4, , 16 .LBB1_23: # %vector.body264 # Parent Loop BB1_4 Depth=1 @@ -772,7 +774,7 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b bnez $a6, .LBB1_23 # %bb.24: # %middle.block273 # in Loop: Header=BB1_4 Depth=1 - ld.d $a0, $sp, 288 # 8-byte Folded Reload + ld.d $a0, $sp, 296 # 8-byte Folded Reload move $a6, $a0 beq $s7, $a0, .LBB1_27 .LBB1_25: # %"_Z6forallIZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateEE3$_1Ev9simd_execiiT_.exit.preheader307" @@ -811,22 +813,22 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b bnez $a6, .LBB1_26 .LBB1_27: # %"_ZZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateEENK3$_3clEi.exit.preheader" # in Loop: Header=BB1_4 Depth=1 - ld.d $a0, $sp, 280 # 8-byte Folded Reload - ld.d $a1, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 288 # 8-byte Folded Reload + ld.d $a1, $sp, 264 # 8-byte Folded Reload or $a0, $a1, $a0 andi $a0, $a0, 1 beqz $a0, .LBB1_29 # %bb.28: # in Loop: Header=BB1_4 Depth=1 move $a2, $zero - ld.d $a6, $sp, 312 # 8-byte Folded Reload + ld.d $a6, $sp, 320 # 8-byte Folded Reload b .LBB1_32 .p2align 4, , 16 .LBB1_29: # %vector.body.preheader # in Loop: Header=BB1_4 Depth=1 - ld.d $a0, $sp, 264 # 8-byte Folded Reload - ld.d $a1, $sp, 272 # 8-byte Folded Reload - ld.d $a2, $sp, 336 # 8-byte Folded Reload - ld.d $a6, $sp, 312 # 8-byte Folded Reload + ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a1, $sp, 280 # 8-byte Folded Reload + ld.d $a2, $sp, 328 # 8-byte Folded Reload + ld.d $a6, $sp, 320 # 8-byte Folded Reload .p2align 4, , 16 .LBB1_30: # %vector.body # Parent Loop BB1_4 Depth=1 @@ -855,7 +857,7 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b bnez $a2, .LBB1_30 # %bb.31: # %middle.block # in Loop: Header=BB1_4 Depth=1 - ld.d $a0, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload move $a2, $a0 beq $s7, $a0, .LBB1_34 .LBB1_32: # %"_ZZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateEENK3$_3clEi.exit.preheader306" @@ -883,80 +885,78 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b .LBB1_34: # %.lr.ph195.preheader # in Loop: Header=BB1_4 Depth=1 move $s3, $t8 - ld.d $a2, $sp, 376 # 8-byte Folded Reload - ld.d $a3, $sp, 344 # 8-byte Folded Reload - ld.d $a4, $sp, 384 # 8-byte Folded Reload - ld.d $s8, $sp, 368 # 8-byte Folded Reload - ld.d $s5, $sp, 360 # 8-byte Folded Reload - move $fp, $a7 - move $s4, $t2 - move $s0, $t4 - move $s1, $t1 - move $s6, $t6 - move $s2, $ra - ld.d $a0, $sp, 328 # 8-byte Folded Reload - ld.d $a1, $sp, 320 # 8-byte Folded Reload + ld.d $a1, $sp, 376 # 8-byte Folded Reload + ld.d $a2, $sp, 336 # 8-byte Folded Reload + ld.d $a3, $sp, 384 # 8-byte Folded Reload + ld.d $fp, $sp, 368 # 8-byte Folded Reload + ld.d $s4, $sp, 360 # 8-byte Folded Reload + move $s5, $a7 + move $s2, $t2 + move $s8, $t4 + move $s6, $t1 + move $s1, $t6 + move $s0, $ra + ld.d $a0, $sp, 344 # 8-byte Folded Reload b .LBB1_37 .p2align 4, , 16 .LBB1_35: # %._crit_edge.i # in Loop: Header=BB1_37 Depth=2 - fld.d $fs4, $s6, 0 - fld.d $fs5, $s2, 0 + fld.d $fs6, $s1, 0 + fld.d $fs7, $s0, 0 fmov.d $fa0, $fs2 .LBB1_36: # %"_ZZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateEENK3$_4clEi.exit" # in Loop: Header=BB1_37 Depth=2 - fld.d $fa1, $fp, 0 - fld.d $fa2, $s4, 0 - fld.d $fa3, $s0, 0 - fld.d $fa4, $s1, 0 + fld.d $fa1, $s5, 0 + fld.d $fa2, $s2, 0 + fld.d $fa3, $s8, 0 + fld.d $fa4, $s6, 0 fadd.d $fa1, $fa1, $fa2 fadd.d $fa2, $fa3, $fa4 fmul.d $fa2, $fa2, $ft8 fmadd.d $fa1, $fa1, $ft9, $fa2 - fadd.d $fa0, $fs5, $fa0 + fadd.d $fa0, $fs7, $fa0 fadd.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fs3, $fa0 + fmul.d $fa0, $fs5, $fa0 fdiv.d $fa0, $fa0, $ft10 - fadd.d $fa0, $fs4, $fa0 + fadd.d $fa0, $fs6, $fa0 fabs.d $fa1, $fa0 fcmp.clt.d $fcc0, $fa1, $ft0 fsel $fa0, $fa0, $fs2, $fcc0 fcmp.clt.d $fcc0, $fa0, $ft1 fsel $fa0, $fa0, $ft1, $fcc0 - fst.d $fa0, $s6, 0 - addi.d $s2, $s2, 8 - addi.d $s6, $s6, 8 - addi.d $s1, $s1, 8 + fst.d $fa0, $s1, 0 addi.d $s0, $s0, 8 + addi.d $s1, $s1, 8 + addi.d $s6, $s6, 8 + addi.d $s8, $s8, 8 + addi.d $s2, $s2, 8 + addi.d $s5, $s5, 8 addi.d $s4, $s4, 8 addi.d $fp, $fp, 8 - addi.d $s5, $s5, 8 - addi.d $s8, $s8, 8 - addi.d $a4, $a4, 8 addi.d $a3, $a3, 8 addi.d $a2, $a2, 8 + addi.d $a1, $a1, 8 addi.d $s7, $s7, -1 addi.d $s3, $s3, 8 beqz $s7, .LBB1_43 .LBB1_37: # %.lr.ph195 # Parent Loop BB1_4 Depth=1 # => This Inner Loop Header: Depth=2 - fld.d $fs3, $s3, 0 - fcmp.cule.d $fcc0, $fs3, $fs2 + fld.d $fs5, $s3, 0 + fcmp.cule.d $fcc0, $fs5, $fs2 bceqz $fcc0, .LBB1_35 # %bb.38: # in Loop: Header=BB1_37 Depth=2 - fld.d $fa0, $a2, 0 - fld.d $fa1, $a3, 0 - fld.d $fa2, $a4, 0 - fld.d $fs4, $s6, 0 - fld.d $fs5, $s2, 0 + fld.d $fa0, $a1, 0 + fld.d $fa1, $a2, 0 + fld.d $fa2, $a3, 0 + fld.d $fs7, $s0, 0 + fld.d $fs6, $s1, 0 fmul.d $fa1, $fa1, $fa1 fmul.d $fa1, $fa1, $fa2 - fld.d $fa2, $a1, %pc_lo12(.LCPI1_1) - fmul.d $fa1, $fa1, $fs5 - fmadd.d $fa0, $fa0, $fs4, $fa1 + fmul.d $fa1, $fa1, $fs7 + fmadd.d $fa0, $fa0, $fs6, $fa1 fdiv.d $fa1, $fa0, $fs0 - fcmp.cle.d $fcc0, $fa1, $fa2 + fcmp.cle.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB1_41 # %bb.39: # in Loop: Header=BB1_37 Depth=2 fsqrt.d $fa0, $fa1 @@ -970,18 +970,17 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b vst $vr10, $sp, 64 # 16-byte Folded Spill vst $vr11, $sp, 48 # 16-byte Folded Spill vst $vr19, $sp, 32 # 16-byte Folded Spill - st.d $a2, $sp, 24 # 8-byte Folded Spill - st.d $a3, $sp, 16 # 8-byte Folded Spill - st.d $a4, $sp, 8 # 8-byte Folded Spill + st.d $a1, $sp, 24 # 8-byte Folded Spill + st.d $a2, $sp, 16 # 8-byte Folded Spill + st.d $a3, $sp, 8 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $a4, $sp, 8 # 8-byte Folded Reload - ld.d $a3, $sp, 16 # 8-byte Folded Reload - ld.d $a2, $sp, 24 # 8-byte Folded Reload - ld.d $a1, $sp, 320 # 8-byte Folded Reload - ld.d $a0, $sp, 328 # 8-byte Folded Reload + ld.d $a3, $sp, 8 # 8-byte Folded Reload + ld.d $a2, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 24 # 8-byte Folded Reload vldi $vr20, -912 vld $vr19, $sp, 32 # 16-byte Folded Reload + ld.d $a0, $sp, 344 # 8-byte Folded Reload vldi $vr18, -872 vldi $vr17, -996 vldi $vr16, -864 @@ -991,27 +990,27 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b vldi $vr12, -800 vld $vr11, $sp, 48 # 16-byte Folded Reload vld $vr10, $sp, 64 # 16-byte Folded Reload - ld.d $a6, $sp, 312 # 8-byte Folded Reload + ld.d $a6, $sp, 320 # 8-byte Folded Reload vld $vr9, $sp, 128 # 16-byte Folded Reload vld $vr8, $sp, 144 # 16-byte Folded Reload ld.d $t4, $sp, 104 # 8-byte Folded Reload ld.d $t0, $sp, 112 # 8-byte Folded Reload - ld.d $t3, $sp, 184 # 8-byte Folded Reload - ld.d $t2, $sp, 192 # 8-byte Folded Reload + ld.d $t3, $sp, 192 # 8-byte Folded Reload + ld.d $t2, $sp, 200 # 8-byte Folded Reload ld.d $t1, $sp, 120 # 8-byte Folded Reload - ld.d $a7, $sp, 200 # 8-byte Folded Reload + ld.d $a7, $sp, 208 # 8-byte Folded Reload ld.d $ra, $sp, 88 # 8-byte Folded Reload - ld.d $t8, $sp, 160 # 8-byte Folded Reload - ld.d $t7, $sp, 168 # 8-byte Folded Reload - ld.d $t6, $sp, 176 # 8-byte Folded Reload + ld.d $t8, $sp, 168 # 8-byte Folded Reload + ld.d $t7, $sp, 176 # 8-byte Folded Reload + ld.d $t6, $sp, 184 # 8-byte Folded Reload ld.d $t5, $sp, 96 # 8-byte Folded Reload b .LBB1_42 .p2align 4, , 16 .LBB1_41: # in Loop: Header=BB1_37 Depth=2 - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) + movgr2fr.d $fa0, $a0 .LBB1_42: # in Loop: Header=BB1_37 Depth=2 - fld.d $fa1, $s8, 0 - fld.d $fa2, $s5, 0 + fld.d $fa1, $fp, 0 + fld.d $fa2, $s4, 0 fmadd.d $fa0, $fa0, $fa1, $fa2 b .LBB1_36 .p2align 4, , 16 @@ -1021,7 +1020,7 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b move $s0, $t8 ld.d $s1, $sp, 376 # 8-byte Folded Reload move $s2, $t6 - ld.d $s3, $sp, 344 # 8-byte Folded Reload + ld.d $s3, $sp, 336 # 8-byte Folded Reload ld.d $s4, $sp, 384 # 8-byte Folded Reload move $s5, $ra ld.d $s6, $sp, 368 # 8-byte Folded Reload @@ -1052,21 +1051,32 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b fld.d $fa0, $s1, 0 fld.d $fa1, $s3, 0 fld.d $fa2, $s4, 0 - fld.d $fa3, $s2, 0 - fld.d $fa4, $s5, 0 + fld.d $fa3, $s5, 0 + fld.d $fa4, $s2, 0 fmul.d $fa1, $fa1, $fa1 fmul.d $fa1, $fa1, $fa2 - fld.d $fa2, $a1, %pc_lo12(.LCPI1_1) - fmul.d $fa1, $fa1, $fa4 - fmadd.d $fa0, $fa0, $fa3, $fa1 + fmul.d $fa1, $fa1, $fa3 + fmadd.d $fa0, $fa0, $fa4, $fa1 fdiv.d $fa1, $fa0, $fs0 - fcmp.cle.d $fcc0, $fa1, $fa2 - bcnez $fcc0, .LBB1_49 + fcmp.cle.d $fcc0, $fa1, $fs4 + fmov.d $fa0, $fs3 + bcnez $fcc0, .LBB1_48 # %bb.47: # in Loop: Header=BB1_45 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bcnez $fcc0, .LBB1_50 -# %bb.48: # %call.sqrt474 + bceqz $fcc0, .LBB1_50 +.LBB1_48: # in Loop: Header=BB1_45 Depth=2 + fld.d $fa1, $s6, 0 + fld.d $fa2, $s7, 0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + fabs.d $fa1, $fa0 + fcmp.cule.d $fcc0, $fs1, $fa1 + fst.d $fa0, $s8, 0 + bcnez $fcc0, .LBB1_44 +# %bb.49: # in Loop: Header=BB1_45 Depth=2 + st.d $zero, $s8, 0 + b .LBB1_44 +.LBB1_50: # %call.sqrt474 # in Loop: Header=BB1_45 Depth=2 fmov.d $fa0, $fa1 st.d $t5, $sp, 96 # 8-byte Folded Spill @@ -1076,8 +1086,6 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b vst $vr19, $sp, 32 # 16-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $a1, $sp, 320 # 8-byte Folded Reload - ld.d $a0, $sp, 328 # 8-byte Folded Reload vldi $vr20, -912 vld $vr19, $sp, 32 # 16-byte Folded Reload vldi $vr18, -872 @@ -1089,67 +1097,50 @@ _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE: # @_ZL21BM_ENERGY_CALC_LAMBDARN9b vldi $vr12, -800 vld $vr11, $sp, 48 # 16-byte Folded Reload vld $vr10, $sp, 64 # 16-byte Folded Reload - ld.d $a6, $sp, 312 # 8-byte Folded Reload + ld.d $a6, $sp, 320 # 8-byte Folded Reload vld $vr9, $sp, 128 # 16-byte Folded Reload vld $vr8, $sp, 144 # 16-byte Folded Reload ld.d $t4, $sp, 104 # 8-byte Folded Reload ld.d $t0, $sp, 112 # 8-byte Folded Reload - ld.d $t3, $sp, 184 # 8-byte Folded Reload - ld.d $t2, $sp, 192 # 8-byte Folded Reload + ld.d $t3, $sp, 192 # 8-byte Folded Reload + ld.d $t2, $sp, 200 # 8-byte Folded Reload ld.d $t1, $sp, 120 # 8-byte Folded Reload - ld.d $a7, $sp, 200 # 8-byte Folded Reload + ld.d $a7, $sp, 208 # 8-byte Folded Reload ld.d $ra, $sp, 88 # 8-byte Folded Reload - ld.d $t8, $sp, 160 # 8-byte Folded Reload - ld.d $t7, $sp, 168 # 8-byte Folded Reload - ld.d $t6, $sp, 176 # 8-byte Folded Reload + ld.d $t8, $sp, 168 # 8-byte Folded Reload + ld.d $t7, $sp, 176 # 8-byte Folded Reload + ld.d $t6, $sp, 184 # 8-byte Folded Reload ld.d $t5, $sp, 96 # 8-byte Folded Reload - b .LBB1_50 - .p2align 4, , 16 -.LBB1_49: # in Loop: Header=BB1_45 Depth=2 - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) -.LBB1_50: # in Loop: Header=BB1_45 Depth=2 - fld.d $fa1, $s6, 0 - fld.d $fa2, $s7, 0 - fmadd.d $fa0, $fa0, $fa1, $fa2 - fabs.d $fa1, $fa0 - fcmp.cule.d $fcc0, $fs1, $fa1 - fst.d $fa0, $s8, 0 - bcnez $fcc0, .LBB1_44 -# %bb.51: # in Loop: Header=BB1_45 Depth=2 - st.d $zero, $s8, 0 - b .LBB1_44 -.LBB1_52: # %._crit_edge + b .LBB1_48 +.LBB1_51: # %._crit_edge move $a0, $t5 - fld.d $fs5, $sp, 392 # 8-byte Folded Reload - fld.d $fs4, $sp, 400 # 8-byte Folded Reload - fld.d $fs3, $sp, 408 # 8-byte Folded Reload - fld.d $fs2, $sp, 416 # 8-byte Folded Reload - fld.d $fs1, $sp, 424 # 8-byte Folded Reload - fld.d $fs0, $sp, 432 # 8-byte Folded Reload - ld.d $s8, $sp, 440 # 8-byte Folded Reload - ld.d $s7, $sp, 448 # 8-byte Folded Reload - ld.d $s6, $sp, 456 # 8-byte Folded Reload - ld.d $s5, $sp, 464 # 8-byte Folded Reload - ld.d $s4, $sp, 472 # 8-byte Folded Reload - ld.d $s3, $sp, 480 # 8-byte Folded Reload - ld.d $s2, $sp, 488 # 8-byte Folded Reload - ld.d $s1, $sp, 496 # 8-byte Folded Reload - ld.d $s0, $sp, 504 # 8-byte Folded Reload - ld.d $fp, $sp, 512 # 8-byte Folded Reload - ld.d $ra, $sp, 520 # 8-byte Folded Reload - addi.d $sp, $sp, 528 + fld.d $fs7, $sp, 392 # 8-byte Folded Reload + fld.d $fs6, $sp, 400 # 8-byte Folded Reload + fld.d $fs5, $sp, 408 # 8-byte Folded Reload + fld.d $fs4, $sp, 416 # 8-byte Folded Reload + fld.d $fs3, $sp, 424 # 8-byte Folded Reload + fld.d $fs2, $sp, 432 # 8-byte Folded Reload + fld.d $fs1, $sp, 440 # 8-byte Folded Reload + fld.d $fs0, $sp, 448 # 8-byte Folded Reload + ld.d $s8, $sp, 456 # 8-byte Folded Reload + ld.d $s7, $sp, 464 # 8-byte Folded Reload + ld.d $s6, $sp, 472 # 8-byte Folded Reload + ld.d $s5, $sp, 480 # 8-byte Folded Reload + ld.d $s4, $sp, 488 # 8-byte Folded Reload + ld.d $s3, $sp, 496 # 8-byte Folded Reload + ld.d $s2, $sp, 504 # 8-byte Folded Reload + ld.d $s1, $sp, 512 # 8-byte Folded Reload + ld.d $s0, $sp, 520 # 8-byte Folded Reload + ld.d $fp, $sp, 528 # 8-byte Folded Reload + ld.d $ra, $sp, 536 # 8-byte Folded Reload + addi.d $sp, $sp, 544 pcaddu18i $t8, %call36(_ZN9benchmark5State17FinishKeepRunningEv) jr $t8 .Lfunc_end1: .size _ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE, .Lfunc_end1-_ZL21BM_ENERGY_CALC_LAMBDARN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL20BM_VOL3D_CALC_LAMBDARN9benchmark5StateE -.LCPI2_0: - .dword 0x3fb5555555555555 # double 0.083333333333333329 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL20BM_VOL3D_CALC_LAMBDARN9benchmark5StateE .type _ZL20BM_VOL3D_CALC_LAMBDARN9benchmark5StateE,@function _ZL20BM_VOL3D_CALC_LAMBDARN9benchmark5StateE: # @_ZL20BM_VOL3D_CALC_LAMBDARN9benchmark5StateE .Lfunc_begin0: @@ -1171,7 +1162,6 @@ _ZL20BM_VOL3D_CALC_LAMBDARN9benchmark5StateE: # @_ZL20BM_VOL3D_CALC_LAMBDARN9ben st.d $s7, $sp, 256 # 8-byte Folded Spill st.d $s8, $sp, 248 # 8-byte Folded Spill fst.d $fs0, $sp, 240 # 8-byte Folded Spill - fst.d $fs1, $sp, 232 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -1184,7 +1174,6 @@ _ZL20BM_VOL3D_CALC_LAMBDARN9benchmark5StateE: # @_ZL20BM_VOL3D_CALC_LAMBDARN9ben .cfi_offset 30, -80 .cfi_offset 31, -88 .cfi_offset 56, -96 - .cfi_offset 57, -104 move $s2, $a0 pcaddu18i $ra, %call36(_Z11getLoopDatav) jirl $ra, $ra, 0 @@ -1194,18 +1183,19 @@ _ZL20BM_VOL3D_CALC_LAMBDARN9benchmark5StateE: # @_ZL20BM_VOL3D_CALC_LAMBDARN9ben jirl $ra, $ra, 0 ld.d $s7, $s0, 8 ld.d $a0, $s2, 32 - ld.d $s6, $s0, 16 + ld.d $a1, $s0, 16 + st.d $a1, $sp, 72 # 8-byte Folded Spill ld.d $a1, $s0, 24 - st.d $a1, $sp, 112 # 8-byte Folded Spill + st.d $a1, $sp, 120 # 8-byte Folded Spill ld.d $a1, $s0, 32 - st.d $a1, $sp, 136 # 8-byte Folded Spill + st.d $a1, $sp, 144 # 8-byte Folded Spill ld.w $a1, $a0, 0 - addi.d $a0, $sp, 144 + addi.d $a0, $sp, 152 ori $a2, $zero, 3 pcaddu18i $ra, %call36(_ZN7ADomainC2Eii) jirl $ra, $ra, 0 - ld.w $s0, $sp, 180 - ld.w $fp, $sp, 184 + ld.w $s0, $sp, 188 + ld.w $fp, $sp, 192 ld.w $s1, $s2, 28 ld.d $s4, $s2, 16 .Ltmp0: # EH_LABEL @@ -1215,13 +1205,15 @@ _ZL20BM_VOL3D_CALC_LAMBDARN9benchmark5StateE: # @_ZL20BM_VOL3D_CALC_LAMBDARN9ben jirl $ra, $ra, 0 .Ltmp1: # EH_LABEL # %bb.1: # %_ZN9benchmark5State3endEv.exit.preheader + ld.d $t8, $sp, 72 # 8-byte Folded Reload bnez $s1, .LBB2_4 # %bb.2: # %_ZN9benchmark5State3endEv.exit.preheader beqz $s4, .LBB2_4 # %bb.3: # %.lr.ph92 - ld.w $t1, $sp, 208 - ld.w $a1, $sp, 212 - bge $a1, $t1, .LBB2_8 + ld.w $a0, $sp, 216 + ld.w $a1, $sp, 220 + st.d $a0, $sp, 128 # 8-byte Folded Spill + bge $a1, $a0, .LBB2_8 .LBB2_4: # %_ZN9benchmark5State3endEv.exit._crit_edge .Ltmp2: # EH_LABEL ld.d $a0, $sp, 8 # 8-byte Folded Reload @@ -1229,13 +1221,12 @@ _ZL20BM_VOL3D_CALC_LAMBDARN9benchmark5StateE: # @_ZL20BM_VOL3D_CALC_LAMBDARN9ben jirl $ra, $ra, 0 .Ltmp3: # EH_LABEL # %bb.5: # %_ZNK9benchmark5State13StateIteratorneERKS1_.exit - ld.d $a0, $sp, 216 + ld.d $a0, $sp, 224 beqz $a0, .LBB2_7 # %bb.6: pcaddu18i $ra, %call36(_ZdaPv) jirl $ra, $ra, 0 .LBB2_7: # %_ZN7ADomainD2Ev.exit - fld.d $fs1, $sp, 232 # 8-byte Folded Reload fld.d $fs0, $sp, 240 # 8-byte Folded Reload ld.d $s8, $sp, 248 # 8-byte Folded Reload ld.d $s7, $sp, 256 # 8-byte Folded Reload @@ -1251,145 +1242,143 @@ _ZL20BM_VOL3D_CALC_LAMBDARN9benchmark5StateE: # @_ZL20BM_VOL3D_CALC_LAMBDARN9ben addi.d $sp, $sp, 336 ret .LBB2_8: # %.lr.ph.preheader - addi.d $a2, $s7, 8 - alsl.d $a3, $s0, $s7, 3 - alsl.d $a0, $s0, $a2, 3 - st.d $a0, $sp, 128 # 8-byte Folded Spill + addi.d $a0, $s7, 8 + alsl.d $a2, $s0, $s7, 3 + alsl.d $a3, $s0, $a0, 3 + st.d $a3, $sp, 136 # 8-byte Folded Spill + alsl.d $t3, $fp, $a0, 3 alsl.d $a0, $fp, $a2, 3 - st.d $a0, $sp, 120 # 8-byte Folded Spill - alsl.d $t7, $fp, $a3, 3 - slli.d $a4, $t1, 3 - ld.d $a0, $sp, 136 # 8-byte Folded Reload - alsl.d $a3, $t1, $a0, 3 - sub.d $a2, $a1, $t1 + addi.d $a6, $t8, 8 + ld.d $s6, $sp, 128 # 8-byte Folded Reload + slli.d $a4, $s6, 3 + ld.d $a2, $sp, 144 # 8-byte Folded Reload + alsl.d $a5, $s6, $a2, 3 + sub.d $a2, $a1, $s6 bstrpick.d $a2, $a2, 31, 0 - alsl.d $a5, $a2, $a3, 3 - addi.d $a5, $a5, 8 - slli.d $a6, $fp, 3 + alsl.d $a3, $a2, $a5, 3 + addi.d $a3, $a3, 8 + slli.d $a7, $fp, 3 slli.d $t4, $s0, 3 - alsl.d $a6, $t1, $a6, 3 - alsl.d $t0, $s0, $a6, 3 - add.d $t5, $s7, $t0 - alsl.d $a7, $a2, $t0, 3 - addi.d $t3, $a7, 16 - add.d $t6, $s7, $t3 - add.d $t8, $s7, $a6 - alsl.d $a7, $a2, $a6, 3 - addi.d $a7, $a7, 16 - add.d $s1, $s7, $a7 - alsl.d $s2, $t1, $s7, 3 - sltu $t6, $a3, $t6 - sltu $t5, $t5, $a5 + alsl.d $a7, $s6, $a7, 3 + alsl.d $t1, $s0, $a7, 3 + add.d $t5, $s7, $t1 + alsl.d $t0, $a2, $t1, 3 + addi.d $t2, $t0, 16 + add.d $t6, $s7, $t2 + add.d $t7, $s7, $a7 + alsl.d $t0, $a2, $a7, 3 + addi.d $t0, $t0, 16 + add.d $s1, $s7, $t0 + alsl.d $s2, $s6, $s7, 3 + sltu $t6, $a5, $t6 + sltu $t5, $t5, $a3 and $t6, $t6, $t5 alsl.d $t5, $a2, $a4, 3 addi.d $t5, $t5, 16 - sltu $s1, $a3, $s1 - sltu $t8, $t8, $a5 - and $t8, $s1, $t8 + sltu $s1, $a5, $s1 + sltu $t7, $t7, $a3 + and $t7, $s1, $t7 add.d $s1, $s7, $t5 - alsl.d $t4, $t1, $t4, 3 - or $t8, $t6, $t8 + alsl.d $t4, $s6, $t4, 3 + or $t7, $t6, $t7 add.d $s3, $s7, $t4 - sltu $t6, $a3, $s1 - sltu $s1, $s2, $a5 + sltu $t6, $a5, $s1 + sltu $s1, $s2, $a3 and $s1, $t6, $s1 alsl.d $t6, $a2, $t4, 3 addi.d $t6, $t6, 16 - or $t8, $t8, $s1 + or $t7, $t7, $s1 add.d $s1, $s7, $t6 - sltu $s1, $a3, $s1 - sltu $s2, $s3, $a5 + sltu $s1, $a5, $s1 + sltu $s2, $s3, $a3 and $s1, $s1, $s2 - add.d $s2, $s6, $t0 - or $t8, $t8, $s1 - add.d $s1, $s6, $t3 - sltu $s1, $a3, $s1 - sltu $s2, $s2, $a5 + add.d $s2, $t8, $t1 + or $t7, $t7, $s1 + add.d $s1, $t8, $t2 + sltu $s1, $a5, $s1 + sltu $s2, $s2, $a3 and $s1, $s1, $s2 - add.d $s2, $s6, $a6 - or $t8, $t8, $s1 - add.d $s1, $s6, $a7 - sltu $s1, $a3, $s1 - sltu $s2, $s2, $a5 + add.d $s2, $t8, $a7 + or $t7, $t7, $s1 + add.d $s1, $t8, $t0 + sltu $s1, $a5, $s1 + sltu $s2, $s2, $a3 and $s1, $s1, $s2 - alsl.d $s2, $t1, $s6, 3 - or $t8, $t8, $s1 - add.d $s1, $s6, $t5 - sltu $s1, $a3, $s1 - sltu $s2, $s2, $a5 + alsl.d $s2, $s6, $t8, 3 + or $t7, $t7, $s1 + add.d $s1, $t8, $t5 + sltu $s1, $a5, $s1 + sltu $s2, $s2, $a3 and $s1, $s1, $s2 - add.d $s2, $s6, $t4 - or $t8, $t8, $s1 - add.d $s1, $s6, $t6 - sltu $s1, $a3, $s1 - sltu $s2, $s2, $a5 - and $s1, $s1, $s2 - addi.d $s2, $s6, 8 - or $s1, $t8, $s1 - alsl.d $t8, $s0, $s2, 3 - ld.d $a0, $sp, 112 # 8-byte Folded Reload - add.d $t0, $a0, $t0 - add.d $t3, $a0, $t3 - sltu $t3, $a3, $t3 - sltu $t0, $t0, $a5 - and $t0, $t3, $t0 - alsl.d $t2, $fp, $s6, 3 - alsl.d $t3, $fp, $s2, 3 - st.d $t3, $sp, 104 # 8-byte Folded Spill - or $t0, $s1, $t0 - st.d $t2, $sp, 64 # 8-byte Folded Spill - alsl.d $t2, $s0, $t2, 3 - st.d $t2, $sp, 96 # 8-byte Folded Spill - add.d $a6, $a0, $a6 - add.d $a7, $a0, $a7 - sltu $a7, $a3, $a7 - sltu $a6, $a6, $a5 - and $a6, $a7, $a6 - alsl.d $a7, $fp, $t8, 3 - st.d $a7, $sp, 56 # 8-byte Folded Spill - or $a6, $t0, $a6 - alsl.d $a7, $t1, $a0, 3 - add.d $t0, $a0, $t5 - sltu $t0, $a3, $t0 - sltu $a7, $a7, $a5 + add.d $s2, $t8, $t4 + or $s1, $t7, $s1 + add.d $t7, $t8, $t6 + sltu $t7, $a5, $t7 + sltu $s2, $s2, $a3 + and $s2, $t7, $s2 + alsl.d $t7, $s0, $a6, 3 + or $s1, $s1, $s2 + alsl.d $s3, $fp, $t8, 3 + alsl.d $a6, $fp, $a6, 3 + st.d $a6, $sp, 112 # 8-byte Folded Spill + ld.d $s2, $sp, 120 # 8-byte Folded Reload + add.d $a6, $s2, $t1 + add.d $t1, $s2, $t2 + sltu $t1, $a5, $t1 + sltu $a6, $a6, $a3 + and $a6, $t1, $a6 + st.d $s3, $sp, 64 # 8-byte Folded Spill + alsl.d $t1, $s0, $s3, 3 + st.d $t1, $sp, 104 # 8-byte Folded Spill + or $a6, $s1, $a6 + alsl.d $t1, $fp, $t7, 3 + st.d $t1, $sp, 56 # 8-byte Folded Spill + add.d $a7, $s2, $a7 + add.d $t0, $s2, $t0 + sltu $t0, $a5, $t0 + sltu $a7, $a7, $a3 + and $a7, $t0, $a7 + addi.d $t1, $s2, 8 + or $a6, $a6, $a7 + alsl.d $a7, $s6, $s2, 3 + add.d $t0, $s2, $t5 + sltu $t0, $a5, $t0 + sltu $a7, $a7, $a3 and $a7, $t0, $a7 - addi.d $t0, $a0, 8 - or $a7, $a6, $a7 - alsl.d $t2, $s0, $a0, 3 - st.d $t0, $sp, 48 # 8-byte Folded Spill - alsl.d $t3, $s0, $t0, 3 - add.d $a6, $a0, $t6 - sltu $a3, $a3, $a6 - alsl.d $a6, $fp, $a0, 3 - add.d $t0, $a0, $t4 - sltu $a5, $t0, $a5 - st.d $t2, $sp, 40 # 8-byte Folded Spill - alsl.d $a0, $fp, $t2, 3 - st.d $a0, $sp, 80 # 8-byte Folded Spill - st.d $t3, $sp, 88 # 8-byte Folded Spill - alsl.d $a0, $fp, $t3, 3 - st.d $a0, $sp, 32 # 8-byte Folded Spill - addi.d $a0, $a2, 1 - and $a2, $a3, $a5 - or $a2, $a7, $a2 - st.d $a0, $sp, 24 # 8-byte Folded Spill - bstrpick.d $a3, $a0, 32, 1 - xor $a5, $a1, $t1 - sltui $a5, $a5, 1 - or $a2, $a5, $a2 - pcalau12i $a5, %pc_hi20(.LCPI2_0) - fld.d $fa0, $a5, %pc_lo12(.LCPI2_0) - lu12i.w $a5, 349525 - ori $a5, $a5, 1365 - lu32i.d $a5, 349525 - lu52i.d $a5, $a5, 1019 - vreplgr2vr.d $vr1, $a5 + alsl.d $t0, $s0, $s2, 3 + st.d $t1, $sp, 48 # 8-byte Folded Spill + alsl.d $t2, $s0, $t1, 3 + or $a6, $a6, $a7 + alsl.d $t1, $fp, $s2, 3 + add.d $a7, $s2, $t6 + sltu $a5, $a5, $a7 + st.d $t0, $sp, 40 # 8-byte Folded Spill + alsl.d $a7, $fp, $t0, 3 + st.d $a7, $sp, 88 # 8-byte Folded Spill + st.d $t2, $sp, 96 # 8-byte Folded Spill + alsl.d $a7, $fp, $t2, 3 + st.d $a7, $sp, 32 # 8-byte Folded Spill + add.d $a7, $s2, $t4 + addi.d $t0, $a2, 1 + sltu $a2, $a7, $a3 + and $a2, $a5, $a2 + or $a2, $a6, $a2 + st.d $t0, $sp, 24 # 8-byte Folded Spill + bstrpick.d $a3, $t0, 32, 1 slli.d $s5, $a3, 1 - alsl.d $a0, $a3, $t1, 1 - st.d $a0, $sp, 16 # 8-byte Folded Spill - addi.d $a0, $a1, 1 - st.d $a0, $sp, 72 # 8-byte Folded Spill - andi $s8, $a2, 1 + alsl.d $a3, $a3, $s6, 1 + st.d $a3, $sp, 16 # 8-byte Folded Spill + addi.d $a3, $a1, 1 + st.d $a3, $sp, 80 # 8-byte Folded Spill + xor $a1, $a1, $s6 + sltui $a1, $a1, 1 + or $a1, $a1, $a2 + andi $s8, $a1, 1 + lu12i.w $a1, 349525 + ori $a1, $a1, 1365 + lu32i.d $a1, 349525 + lu52i.d $ra, $a1, 1019 + movgr2fr.d $fa0, $ra b .LBB2_10 .p2align 4, , 16 .LBB2_9: # %"._Z6forallI9simd_execZL20BM_VOL3D_CALC_LAMBDARN9benchmark5StateEE3$_0EviiT0_.exit_crit_edge" @@ -1400,136 +1389,134 @@ _ZL20BM_VOL3D_CALC_LAMBDARN9benchmark5StateE: # @_ZL20BM_VOL3D_CALC_LAMBDARN9ben # =>This Loop Header: Depth=1 # Child Loop BB2_12 Depth 2 # Child Loop BB2_15 Depth 2 - move $a1, $t1 + ld.d $a1, $sp, 128 # 8-byte Folded Reload bnez $s8, .LBB2_14 # %bb.11: # %vector.body.preheader # in Loop: Header=BB2_10 Depth=1 - move $t2, $t1 + move $a6, $t7 move $t4, $t8 - move $t1, $s6 - move $a7, $s6 - ld.d $t5, $sp, 64 # 8-byte Folded Reload - ld.d $a5, $sp, 56 # 8-byte Folded Reload - ld.d $fp, $sp, 32 # 8-byte Folded Reload - ld.d $t6, $sp, 48 # 8-byte Folded Reload - ld.d $t0, $sp, 40 # 8-byte Folded Reload - move $a2, $a6 - move $s1, $t7 - ld.d $a1, $sp, 120 # 8-byte Folded Reload - move $s6, $s7 - move $s0, $s7 - ld.d $s2, $sp, 128 # 8-byte Folded Reload - ld.d $a3, $sp, 136 # 8-byte Folded Reload - move $s3, $s5 + ld.d $a7, $sp, 64 # 8-byte Folded Reload + ld.d $t5, $sp, 56 # 8-byte Folded Reload + ld.d $a5, $sp, 32 # 8-byte Folded Reload + ld.d $fp, $sp, 48 # 8-byte Folded Reload + ld.d $t6, $sp, 40 # 8-byte Folded Reload + move $t0, $t1 + move $a2, $a0 + move $s1, $t3 + move $t8, $s7 + move $a1, $s7 + ld.d $s0, $sp, 136 # 8-byte Folded Reload + ld.d $s2, $sp, 144 # 8-byte Folded Reload + move $a3, $s5 .p2align 4, , 16 .LBB2_12: # %vector.body # Parent Loop BB2_10 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $ra, $s1, $a4 - vld $vr3, $ra, 8 - add.d $ra, $s0, $a4 - vld $vr2, $ra, 8 - vfsub.d $vr2, $vr3, $vr2 - add.d $ra, $s2, $a4 - vld $vr4, $ra, -8 - add.d $ra, $a1, $a4 - vld $vr5, $ra, -8 - vldx $vr6, $s2, $a4 - vldx $vr8, $s0, $a4 - vldx $vr9, $a1, $a4 - vfsub.d $vr7, $vr3, $vr4 - vfsub.d $vr4, $vr3, $vr5 - vfsub.d $vr5, $vr6, $vr8 - vfsub.d $vr3, $vr9, $vr8 - vldx $vr6, $s1, $a4 - vldx $vr10, $a5, $a4 - add.d $ra, $a7, $a4 - vld $vr11, $ra, 8 - add.d $ra, $t4, $a4 - vld $vr12, $ra, -8 - vfsub.d $vr9, $vr6, $vr8 - add.d $ra, $a5, $a4 - vfsub.d $vr6, $vr10, $vr11 - vfsub.d $vr11, $vr10, $vr12 - add.d $a0, $t5, $a4 - vldx $vr8, $t5, $a4 + add.d $s3, $a2, $a4 + vld $vr2, $s3, 8 + add.d $s3, $a1, $a4 + vld $vr1, $s3, 8 + vfsub.d $vr1, $vr2, $vr1 + add.d $s3, $s0, $a4 + vld $vr3, $s3, -8 + add.d $s3, $s1, $a4 + vld $vr4, $s3, -8 + vldx $vr5, $s0, $a4 + vldx $vr7, $a1, $a4 + vldx $vr8, $s1, $a4 + vfsub.d $vr6, $vr2, $vr3 + vfsub.d $vr3, $vr2, $vr4 + vfsub.d $vr4, $vr5, $vr7 + vfsub.d $vr2, $vr8, $vr7 + vldx $vr5, $a2, $a4 + vldx $vr9, $t5, $a4 + add.d $s3, $t4, $a4 + vld $vr10, $s3, 8 + add.d $s3, $a6, $a4 + vld $vr11, $s3, -8 + vfsub.d $vr8, $vr5, $vr7 + add.d $s3, $t5, $a4 + vfsub.d $vr5, $vr9, $vr10 + vfsub.d $vr10, $vr9, $vr11 + add.d $t2, $a7, $a4 + vldx $vr7, $a7, $a4 + vldx $vr11, $a6, $a4 vldx $vr12, $t4, $a4 - vldx $vr13, $a7, $a4 - vld $vr14, $a0, 8 - vld $vr15, $ra, -8 - vfsub.d $vr10, $vr10, $vr8 - vfsub.d $vr12, $vr12, $vr13 - vfsub.d $vr8, $vr14, $vr13 - vfsub.d $vr13, $vr15, $vr13 + vld $vr13, $t2, 8 + vld $vr14, $s3, -8 + vfsub.d $vr9, $vr9, $vr7 + vfsub.d $vr11, $vr11, $vr12 + vfsub.d $vr7, $vr13, $vr12 + vfsub.d $vr12, $vr14, $vr12 + vldx $vr13, $a5, $a4 vldx $vr14, $fp, $a4 + add.d $t2, $a5, $a4 vldx $vr15, $t6, $a4 - add.d $a0, $fp, $a4 + add.d $s3, $fp, $a4 + vfsub.d $vr14, $vr13, $vr14 + add.d $s7, $t6, $a4 + vfsub.d $vr15, $vr13, $vr15 + add.d $s6, $t0, $a4 vldx $vr16, $t0, $a4 - add.d $ra, $t6, $a4 - vfsub.d $vr15, $vr14, $vr15 - add.d $t3, $t0, $a4 - vfsub.d $vr16, $vr14, $vr16 - add.d $s7, $a2, $a4 - vldx $vr17, $a2, $a4 - vld $vr18, $t3, 8 - vld $vr19, $ra, -8 - vld $vr20, $s7, 8 - vld $vr21, $a0, -8 - vfsub.d $vr14, $vr14, $vr17 - vfsub.d $vr17, $vr18, $vr19 - vfsub.d $vr18, $vr20, $vr19 - vfsub.d $vr19, $vr21, $vr19 - vfadd.d $vr20, $vr2, $vr9 - vfadd.d $vr21, $vr6, $vr13 - vfadd.d $vr22, $vr15, $vr19 - vbitrevi.d $vr23, $vr12, 63 - vfmul.d $vr23, $vr16, $vr23 - vfmadd.d $vr23, $vr11, $vr17, $vr23 - vbitrevi.d $vr24, $vr17, 63 - vfmul.d $vr24, $vr7, $vr24 - vfmadd.d $vr24, $vr16, $vr5, $vr24 - vbitrevi.d $vr25, $vr5, 63 - vfmul.d $vr25, $vr11, $vr25 - vfmadd.d $vr25, $vr7, $vr12, $vr25 - vfmul.d $vr21, $vr21, $vr24 - vfmadd.d $vr20, $vr20, $vr23, $vr21 - vfmadd.d $vr20, $vr22, $vr25, $vr20 - vfadd.d $vr7, $vr7, $vr3 - vfadd.d $vr11, $vr11, $vr8 - vfadd.d $vr16, $vr16, $vr18 - vbitrevi.d $vr21, $vr13, 63 - vfmul.d $vr21, $vr14, $vr21 - vfmadd.d $vr21, $vr10, $vr19, $vr21 - vbitrevi.d $vr19, $vr19, 63 - vfmul.d $vr19, $vr4, $vr19 - vfmadd.d $vr19, $vr14, $vr9, $vr19 - vbitrevi.d $vr9, $vr9, 63 - vfmul.d $vr9, $vr10, $vr9 - vfmadd.d $vr9, $vr4, $vr13, $vr9 - vfmul.d $vr11, $vr11, $vr19 - vfmadd.d $vr7, $vr7, $vr21, $vr11 - vfmadd.d $vr7, $vr16, $vr9, $vr7 - vfadd.d $vr7, $vr20, $vr7 - vfadd.d $vr4, $vr4, $vr5 - vfadd.d $vr5, $vr10, $vr12 - vfadd.d $vr9, $vr14, $vr17 - vbitrevi.d $vr10, $vr8, 63 - vfmul.d $vr10, $vr15, $vr10 - vfmadd.d $vr10, $vr6, $vr18, $vr10 - vbitrevi.d $vr11, $vr18, 63 - vfmul.d $vr11, $vr2, $vr11 - vfmadd.d $vr11, $vr15, $vr3, $vr11 - vbitrevi.d $vr3, $vr3, 63 - vfmul.d $vr3, $vr6, $vr3 - vfmadd.d $vr2, $vr2, $vr8, $vr3 - vfmul.d $vr3, $vr5, $vr11 - vfmadd.d $vr3, $vr4, $vr10, $vr3 - vfmadd.d $vr2, $vr9, $vr2, $vr3 - vfadd.d $vr2, $vr2, $vr7 - vfmul.d $vr2, $vr2, $vr1 - vstx $vr2, $a3, $a4 - addi.d $s3, $s3, -2 - addi.d $a3, $a3, 16 + vld $vr17, $s7, 8 + vld $vr18, $s3, -8 + vld $vr19, $s6, 8 + vld $vr20, $t2, -8 + vfsub.d $vr13, $vr13, $vr16 + vfsub.d $vr16, $vr17, $vr18 + vfsub.d $vr17, $vr19, $vr18 + vfsub.d $vr18, $vr20, $vr18 + vfadd.d $vr19, $vr1, $vr8 + vfadd.d $vr20, $vr5, $vr12 + vfadd.d $vr21, $vr14, $vr18 + vbitrevi.d $vr22, $vr11, 63 + vfmul.d $vr22, $vr15, $vr22 + vfmadd.d $vr22, $vr10, $vr16, $vr22 + vbitrevi.d $vr23, $vr16, 63 + vfmul.d $vr23, $vr6, $vr23 + vfmadd.d $vr23, $vr15, $vr4, $vr23 + vbitrevi.d $vr24, $vr4, 63 + vfmul.d $vr24, $vr10, $vr24 + vfmadd.d $vr24, $vr6, $vr11, $vr24 + vfmul.d $vr20, $vr20, $vr23 + vfmadd.d $vr19, $vr19, $vr22, $vr20 + vfmadd.d $vr19, $vr21, $vr24, $vr19 + vfadd.d $vr6, $vr6, $vr2 + vfadd.d $vr10, $vr10, $vr7 + vfadd.d $vr15, $vr15, $vr17 + vbitrevi.d $vr20, $vr12, 63 + vfmul.d $vr20, $vr13, $vr20 + vfmadd.d $vr20, $vr9, $vr18, $vr20 + vbitrevi.d $vr18, $vr18, 63 + vfmul.d $vr18, $vr3, $vr18 + vfmadd.d $vr18, $vr13, $vr8, $vr18 + vbitrevi.d $vr8, $vr8, 63 + vfmul.d $vr8, $vr9, $vr8 + vfmadd.d $vr8, $vr3, $vr12, $vr8 + vfmul.d $vr10, $vr10, $vr18 + vfmadd.d $vr6, $vr6, $vr20, $vr10 + vfmadd.d $vr6, $vr15, $vr8, $vr6 + vfadd.d $vr6, $vr19, $vr6 + vfadd.d $vr3, $vr3, $vr4 + vfadd.d $vr4, $vr9, $vr11 + vfadd.d $vr8, $vr13, $vr16 + vbitrevi.d $vr9, $vr7, 63 + vfmul.d $vr9, $vr14, $vr9 + vfmadd.d $vr9, $vr5, $vr17, $vr9 + vbitrevi.d $vr10, $vr17, 63 + vfmul.d $vr10, $vr1, $vr10 + vfmadd.d $vr10, $vr14, $vr2, $vr10 + vbitrevi.d $vr2, $vr2, 63 + vfmul.d $vr2, $vr5, $vr2 + vfmadd.d $vr1, $vr1, $vr7, $vr2 + vfmul.d $vr2, $vr4, $vr10 + vfmadd.d $vr2, $vr3, $vr9, $vr2 + vfmadd.d $vr1, $vr8, $vr1, $vr2 + vfadd.d $vr1, $vr1, $vr6 + vreplgr2vr.d $vr2, $ra + vfmul.d $vr1, $vr1, $vr2 + vstx $vr1, $s2, $a4 + addi.d $a3, $a3, -2 addi.d $s2, $s2, 16 addi.d $s0, $s0, 16 addi.d $a1, $a1, 16 @@ -1542,142 +1529,142 @@ _ZL20BM_VOL3D_CALC_LAMBDARN9benchmark5StateE: # @_ZL20BM_VOL3D_CALC_LAMBDARN9ben addi.d $t5, $t5, 16 addi.d $a7, $a7, 16 addi.d $t4, $t4, 16 - bnez $s3, .LBB2_12 + addi.d $a6, $a6, 16 + bnez $a3, .LBB2_12 # %bb.13: # %middle.block # in Loop: Header=BB2_10 Depth=1 ld.d $a1, $sp, 16 # 8-byte Folded Reload - move $s7, $s6 - move $s6, $t1 - move $t1, $t2 - ld.d $a0, $sp, 24 # 8-byte Folded Reload - beq $a0, $s5, .LBB2_9 + move $s7, $t8 + ld.d $t8, $sp, 72 # 8-byte Folded Reload + ld.d $a2, $sp, 24 # 8-byte Folded Reload + beq $a2, $s5, .LBB2_9 .LBB2_14: # %scalar.ph.preheader # in Loop: Header=BB2_10 Depth=1 - slli.d $ra, $a1, 3 - ld.d $a0, $sp, 72 # 8-byte Folded Reload - sub.d $fp, $a0, $a1 - move $t5, $a6 - ld.d $t4, $sp, 88 # 8-byte Folded Reload - ld.d $a7, $sp, 104 # 8-byte Folded Reload - move $s2, $s6 - move $a5, $t8 - ld.d $s3, $sp, 112 # 8-byte Folded Reload - ld.d $t6, $sp, 80 # 8-byte Folded Reload - ld.d $t0, $sp, 96 # 8-byte Folded Reload - ld.d $a2, $sp, 120 # 8-byte Folded Reload + slli.d $fp, $a1, 3 + ld.d $a2, $sp, 80 # 8-byte Folded Reload + sub.d $a6, $a2, $a1 + move $t5, $t1 + ld.d $t4, $sp, 96 # 8-byte Folded Reload + ld.d $a7, $sp, 112 # 8-byte Folded Reload + move $s2, $t8 + move $a5, $t7 + ld.d $s3, $sp, 120 # 8-byte Folded Reload + ld.d $t6, $sp, 88 # 8-byte Folded Reload + ld.d $t0, $sp, 104 # 8-byte Folded Reload + move $a2, $t3 move $s1, $s7 - ld.d $a1, $sp, 128 # 8-byte Folded Reload - ld.d $s0, $sp, 136 # 8-byte Folded Reload - move $a3, $t7 + ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $s0, $sp, 144 # 8-byte Folded Reload + move $a3, $a0 .p2align 4, , 16 .LBB2_15: # %scalar.ph # Parent Loop BB2_10 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $a0, $a3, $ra - add.d $t3, $a1, $ra - fld.d $fa2, $t3, -8 - add.d $t3, $a2, $ra - fld.d $fa3, $t3, -8 - fldx.d $fa4, $a1, $ra - fld.d $fa5, $a0, 8 - fldx.d $fa7, $a2, $ra - vldx $vr8, $a3, $ra - fldx.d $ft1, $s1, $ra - fsub.d $fa6, $fa5, $fa2 - vldx $vr10, $s1, $ra - fsub.d $fa3, $fa5, $fa3 - fsub.d $fa4, $fa4, $ft1 - fsub.d $fa2, $fa7, $ft1 - vfsub.d $vr9, $vr8, $vr10 - add.d $a0, $t0, $ra - add.d $t3, $a5, $ra - fld.d $fa5, $t3, -8 - add.d $t3, $a7, $ra - fld.d $fa7, $t3, -8 - fldx.d $ft0, $a5, $ra - fld.d $ft2, $a0, 8 - fldx.d $ft3, $a7, $ra - vldx $vr12, $t0, $ra - fldx.d $ft5, $s2, $ra - fsub.d $ft6, $ft2, $fa5 - vldx $vr15, $s2, $ra - fsub.d $fa7, $ft2, $fa7 - fsub.d $ft0, $ft0, $ft5 - fsub.d $fa5, $ft3, $ft5 - vfsub.d $vr10, $vr12, $vr15 - add.d $a0, $t6, $ra - add.d $t3, $t4, $ra - fld.d $ft3, $t3, -8 - add.d $t3, $t5, $ra - fldx.d $ft4, $t5, $ra - fldx.d $ft5, $t4, $ra - fld.d $ft7, $a0, 8 - fld.d $ft8, $t3, 8 - vldx $vr17, $t6, $ra - fldx.d $ft10, $s3, $ra - fsub.d $ft3, $ft7, $ft3 - vldx $vr19, $s3, $ra - fsub.d $ft4, $ft7, $ft4 - fsub.d $ft5, $ft5, $ft10 - fsub.d $ft7, $ft8, $ft10 - vfsub.d $vr16, $vr17, $vr19 - vreplvei.d $vr17, $vr9, 0 + add.d $t2, $a3, $fp + add.d $s6, $a1, $fp + fld.d $fa1, $s6, -8 + add.d $s6, $a2, $fp + fld.d $fa2, $s6, -8 + fldx.d $fa3, $a1, $fp + fld.d $fa4, $t2, 8 + fldx.d $fa6, $a2, $fp + vldx $vr7, $a3, $fp + fldx.d $ft0, $s1, $fp + fsub.d $fa5, $fa4, $fa1 + vldx $vr9, $s1, $fp + fsub.d $fa2, $fa4, $fa2 + fsub.d $fa3, $fa3, $ft0 + fsub.d $fa1, $fa6, $ft0 + vfsub.d $vr8, $vr7, $vr9 + add.d $t2, $t0, $fp + add.d $s6, $a5, $fp + fld.d $fa4, $s6, -8 + add.d $s6, $a7, $fp + fld.d $fa6, $s6, -8 + fldx.d $fa7, $a5, $fp + fld.d $ft1, $t2, 8 + fldx.d $ft2, $a7, $fp + vldx $vr11, $t0, $fp + fldx.d $ft4, $s2, $fp + fsub.d $ft5, $ft1, $fa4 + vldx $vr14, $s2, $fp + fsub.d $fa6, $ft1, $fa6 + fsub.d $fa7, $fa7, $ft4 + fsub.d $fa4, $ft2, $ft4 + vfsub.d $vr9, $vr11, $vr14 + add.d $t2, $t6, $fp + add.d $s6, $t4, $fp + fld.d $ft2, $s6, -8 + add.d $s6, $t5, $fp + fldx.d $ft3, $t5, $fp + fldx.d $ft4, $t4, $fp + fld.d $ft6, $t2, 8 + fld.d $ft7, $s6, 8 + vldx $vr16, $t6, $fp + fldx.d $ft9, $s3, $fp + fsub.d $ft2, $ft6, $ft2 + vldx $vr18, $s3, $fp + fsub.d $ft3, $ft6, $ft3 + fsub.d $ft4, $ft4, $ft9 + fsub.d $ft6, $ft7, $ft9 + vfsub.d $vr15, $vr16, $vr18 + vreplvei.d $vr16, $vr8, 0 + vreplvei.d $vr8, $vr8, 1 + fadd.d $ft9, $ft0, $ft8 + vreplvei.d $vr18, $vr9, 0 vreplvei.d $vr9, $vr9, 1 - fadd.d $ft10, $ft1, $ft9 - vreplvei.d $vr19, $vr10, 0 - vreplvei.d $vr10, $vr10, 1 - fadd.d $ft12, $ft2, $ft11 - vreplvei.d $vr21, $vr16, 0 - vreplvei.d $vr16, $vr16, 1 - fadd.d $ft14, $ft8, $ft13 - fneg.d $ft15, $ft0 - fmul.d $ft15, $ft3, $ft15 - fmadd.d $ft15, $ft6, $ft5, $ft15 - fneg.d $fs0, $ft5 - fmul.d $fs0, $fa6, $fs0 - fmadd.d $fs0, $ft3, $fa4, $fs0 - fneg.d $fs1, $fa4 - fmul.d $fs1, $ft6, $fs1 - fmadd.d $fs1, $fa6, $ft0, $fs1 - fmul.d $ft12, $ft12, $fs0 - fmadd.d $ft10, $ft10, $ft15, $ft12 - fmadd.d $ft10, $ft14, $fs1, $ft10 - fadd.d $fa6, $fa6, $fa2 - fadd.d $ft6, $ft6, $fa5 - fadd.d $ft3, $ft3, $ft7 - fneg.d $ft12, $ft11 - fmul.d $ft12, $ft4, $ft12 - fmadd.d $ft12, $fa7, $ft13, $ft12 - fneg.d $ft13, $ft13 - fmul.d $ft13, $fa3, $ft13 - fmadd.d $ft13, $ft4, $ft9, $ft13 - fneg.d $ft9, $ft9 - fmul.d $ft9, $fa7, $ft9 - fmadd.d $ft9, $fa3, $ft11, $ft9 - fmul.d $ft6, $ft6, $ft13 - fmadd.d $fa6, $fa6, $ft12, $ft6 - fmadd.d $fa6, $ft3, $ft9, $fa6 - fadd.d $fa6, $ft10, $fa6 - fadd.d $fa3, $fa3, $fa4 - fadd.d $fa4, $fa7, $ft0 - fadd.d $fa7, $ft4, $ft5 - fneg.d $ft0, $fa5 - fmul.d $ft0, $ft8, $ft0 - fmadd.d $ft0, $ft2, $ft7, $ft0 - fneg.d $ft3, $ft7 - fmul.d $ft3, $ft1, $ft3 - fmadd.d $ft3, $ft8, $fa2, $ft3 - fneg.d $fa2, $fa2 - fmul.d $fa2, $ft2, $fa2 - fmadd.d $fa2, $ft1, $fa5, $fa2 - fmul.d $fa4, $fa4, $ft3 - fmadd.d $fa3, $fa3, $ft0, $fa4 - fmadd.d $fa2, $fa7, $fa2, $fa3 - fadd.d $fa2, $fa2, $fa6 - fmul.d $fa2, $fa2, $fa0 - fstx.d $fa2, $s0, $ra + fadd.d $ft11, $ft1, $ft10 + vreplvei.d $vr20, $vr15, 0 + vreplvei.d $vr15, $vr15, 1 + fadd.d $ft13, $ft7, $ft12 + fneg.d $ft14, $fa7 + fmul.d $ft14, $ft2, $ft14 + fmadd.d $ft14, $ft5, $ft4, $ft14 + fneg.d $ft15, $ft4 + fmul.d $ft15, $fa5, $ft15 + fmadd.d $ft15, $ft2, $fa3, $ft15 + fneg.d $fs0, $fa3 + fmul.d $fs0, $ft5, $fs0 + fmadd.d $fs0, $fa5, $fa7, $fs0 + fmul.d $ft11, $ft11, $ft15 + fmadd.d $ft9, $ft9, $ft14, $ft11 + fmadd.d $ft9, $ft13, $fs0, $ft9 + fadd.d $fa5, $fa5, $fa1 + fadd.d $ft5, $ft5, $fa4 + fadd.d $ft2, $ft2, $ft6 + fneg.d $ft11, $ft10 + fmul.d $ft11, $ft3, $ft11 + fmadd.d $ft11, $fa6, $ft12, $ft11 + fneg.d $ft12, $ft12 + fmul.d $ft12, $fa2, $ft12 + fmadd.d $ft12, $ft3, $ft8, $ft12 + fneg.d $ft8, $ft8 + fmul.d $ft8, $fa6, $ft8 + fmadd.d $ft8, $fa2, $ft10, $ft8 + fmul.d $ft5, $ft5, $ft12 + fmadd.d $fa5, $fa5, $ft11, $ft5 + fmadd.d $fa5, $ft2, $ft8, $fa5 + fadd.d $fa5, $ft9, $fa5 + fadd.d $fa2, $fa2, $fa3 + fadd.d $fa3, $fa6, $fa7 + fadd.d $fa6, $ft3, $ft4 + fneg.d $fa7, $fa4 + fmul.d $fa7, $ft7, $fa7 + fmadd.d $fa7, $ft1, $ft6, $fa7 + fneg.d $ft2, $ft6 + fmul.d $ft2, $ft0, $ft2 + fmadd.d $ft2, $ft7, $fa1, $ft2 + fneg.d $fa1, $fa1 + fmul.d $fa1, $ft1, $fa1 + fmadd.d $fa1, $ft0, $fa4, $fa1 + fmul.d $fa3, $fa3, $ft2 + fmadd.d $fa2, $fa2, $fa7, $fa3 + fmadd.d $fa1, $fa6, $fa1, $fa2 + fadd.d $fa1, $fa1, $fa5 + fmul.d $fa1, $fa1, $fa0 + fstx.d $fa1, $s0, $fp addi.d $a3, $a3, 8 - addi.w $fp, $fp, -1 + addi.w $a6, $a6, -1 addi.d $s0, $s0, 8 addi.d $a1, $a1, 8 addi.d $s1, $s1, 8 @@ -1690,11 +1677,11 @@ _ZL20BM_VOL3D_CALC_LAMBDARN9benchmark5StateE: # @_ZL20BM_VOL3D_CALC_LAMBDARN9ben addi.d $a7, $a7, 8 addi.d $t4, $t4, 8 addi.d $t5, $t5, 8 - bnez $fp, .LBB2_15 + bnez $a6, .LBB2_15 b .LBB2_9 .LBB2_16: .Ltmp4: # EH_LABEL - ld.d $a1, $sp, 216 + ld.d $a1, $sp, 224 move $fp, $a0 beqz $a1, .LBB2_18 # %bb.17: @@ -1737,10 +1724,6 @@ GCC_except_table2: .LCPI3_0: .dword 0xbfe0000000000000 # double -0.5 .dword 0x3fe0000000000000 # double 0.5 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI3_1: - .dword 0x3bc79ca10c924223 # double 9.9999999999999995E-21 .text .p2align 5 .type _ZL24BM_DEL_DOT_VEC_2D_LAMBDARN9benchmark5StateE,@function @@ -1815,13 +1798,16 @@ _ZL24BM_DEL_DOT_VEC_2D_LAMBDARN9benchmark5StateE: # @_ZL24BM_DEL_DOT_VEC_2D_LAMB alsl.d $a7, $s6, $s3, 3 alsl.d $t0, $s6, $s4, 3 ld.d $t1, $sp, 80 - pcalau12i $t2, %pc_hi20(.LCPI3_0) - vld $vr0, $t2, %pc_lo12(.LCPI3_0) - pcalau12i $t2, %pc_hi20(.LCPI3_1) - fld.d $fa1, $t2, %pc_lo12(.LCPI3_1) - vldi $vr2, -928 + vldi $vr0, -928 lu52i.d $t2, $zero, 1022 - vreplgr2vr.d $vr3, $t2 + vreplgr2vr.d $vr1, $t2 + pcalau12i $t2, %pc_hi20(.LCPI3_0) + vld $vr2, $t2, %pc_lo12(.LCPI3_0) + lu12i.w $t2, 51492 + ori $t2, $t2, 547 + lu32i.d $t2, 498849 + lu52i.d $t2, $t2, 956 + movgr2fr.d $fa3, $t2 .p2align 4, , 16 .LBB3_5: # %.lr.ph.us # =>This Loop Header: Depth=1 @@ -1842,7 +1828,7 @@ _ZL24BM_DEL_DOT_VEC_2D_LAMBDARN9benchmark5StateE: # @_ZL24BM_DEL_DOT_VEC_2D_LAMB fadd.d $fa4, $fa4, $fa5 fsub.d $fa4, $fa4, $fa6 fsub.d $fa4, $fa4, $ft0 - fmul.d $fa5, $fa4, $fa2 + fmul.d $fa5, $fa4, $fa0 vldx $vr8, $a4, $t4 fldx.d $fa4, $a4, $t4 vldx $vr10, $t0, $t4 @@ -1858,7 +1844,7 @@ _ZL24BM_DEL_DOT_VEC_2D_LAMBDARN9benchmark5StateE: # @_ZL24BM_DEL_DOT_VEC_2D_LAMB vpackev.d $vr8, $vr14, $vr11 vshuf4i.d $vr11, $vr9, 9 vfsub.d $vr7, $vr7, $vr11 - vfmul.d $vr7, $vr7, $vr3 + vfmul.d $vr7, $vr7, $vr1 vldx $vr9, $a7, $t4 fldx.d $ft7, $a6, $t4 fldx.d $ft8, $a7, $t4 @@ -1868,7 +1854,7 @@ _ZL24BM_DEL_DOT_VEC_2D_LAMBDARN9benchmark5StateE: # @_ZL24BM_DEL_DOT_VEC_2D_LAMB fadd.d $ft7, $ft7, $ft8 fsub.d $ft7, $ft7, $ft9 fsub.d $ft7, $ft7, $ft10 - fmul.d $ft7, $ft7, $fa2 + fmul.d $ft7, $ft7, $fa0 fldx.d $ft8, $t0, $t4 fldx.d $ft9, $s4, $t4 vshuf4i.d $vr11, $vr14, 12 @@ -1884,14 +1870,14 @@ _ZL24BM_DEL_DOT_VEC_2D_LAMBDARN9benchmark5StateE: # @_ZL24BM_DEL_DOT_VEC_2D_LAMB vfsub.d $vr8, $vr10, $vr9 vshuf4i.d $vr19, $vr11, 12 vfsub.d $vr8, $vr8, $vr19 - vfmul.d $vr8, $vr8, $vr3 - vfmul.d $vr9, $vr13, $vr0 + vfmul.d $vr8, $vr8, $vr1 + vfmul.d $vr9, $vr13, $vr2 vreplvei.d $vr10, $vr9, 0 fmul.d $ft2, $fa5, $ft2 vreplvei.d $vr11, $vr7, 0 vreplvei.d $vr13, $vr7, 1 fmadd.d $ft2, $ft5, $ft3, $ft2 - fadd.d $ft2, $ft2, $fa1 + fadd.d $ft2, $ft2, $fa3 frecip.d $ft2, $ft2 fneg.d $fa5, $fa5 vextrins.d $vr15, $vr5, 16 @@ -1983,16 +1969,8 @@ GCC_except_table3: .Lcst_end1: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE -.LCPI4_0: - .dword 0x406e56fd83ba6863 # double 242.71844660194174 -.LCPI4_1: - .dword 0x38e09d8792fb4c49 # double 9.9999999999999992E-35 -.LCPI4_2: - .dword 0x3fca9fbe76c8b439 # double 0.20799999999999999 .text - .p2align 5 + .p2align 5 # -- Begin function _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE .type _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE,@function _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5StateE .Lfunc_begin2: @@ -2000,27 +1978,27 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception2 # %bb.0: - addi.d $sp, $sp, -544 - .cfi_def_cfa_offset 544 - st.d $ra, $sp, 536 # 8-byte Folded Spill - st.d $fp, $sp, 528 # 8-byte Folded Spill - st.d $s0, $sp, 520 # 8-byte Folded Spill - st.d $s1, $sp, 512 # 8-byte Folded Spill - st.d $s2, $sp, 504 # 8-byte Folded Spill - st.d $s3, $sp, 496 # 8-byte Folded Spill - st.d $s4, $sp, 488 # 8-byte Folded Spill - st.d $s5, $sp, 480 # 8-byte Folded Spill - st.d $s6, $sp, 472 # 8-byte Folded Spill - st.d $s7, $sp, 464 # 8-byte Folded Spill - st.d $s8, $sp, 456 # 8-byte Folded Spill - fst.d $fs0, $sp, 448 # 8-byte Folded Spill - fst.d $fs1, $sp, 440 # 8-byte Folded Spill - fst.d $fs2, $sp, 432 # 8-byte Folded Spill - fst.d $fs3, $sp, 424 # 8-byte Folded Spill - fst.d $fs4, $sp, 416 # 8-byte Folded Spill - fst.d $fs5, $sp, 408 # 8-byte Folded Spill - fst.d $fs6, $sp, 400 # 8-byte Folded Spill - fst.d $fs7, $sp, 392 # 8-byte Folded Spill + addi.d $sp, $sp, -576 + .cfi_def_cfa_offset 576 + st.d $ra, $sp, 568 # 8-byte Folded Spill + st.d $fp, $sp, 560 # 8-byte Folded Spill + st.d $s0, $sp, 552 # 8-byte Folded Spill + st.d $s1, $sp, 544 # 8-byte Folded Spill + st.d $s2, $sp, 536 # 8-byte Folded Spill + st.d $s3, $sp, 528 # 8-byte Folded Spill + st.d $s4, $sp, 520 # 8-byte Folded Spill + st.d $s5, $sp, 512 # 8-byte Folded Spill + st.d $s6, $sp, 504 # 8-byte Folded Spill + st.d $s7, $sp, 496 # 8-byte Folded Spill + st.d $s8, $sp, 488 # 8-byte Folded Spill + fst.d $fs0, $sp, 480 # 8-byte Folded Spill + fst.d $fs1, $sp, 472 # 8-byte Folded Spill + fst.d $fs2, $sp, 464 # 8-byte Folded Spill + fst.d $fs3, $sp, 456 # 8-byte Folded Spill + fst.d $fs4, $sp, 448 # 8-byte Folded Spill + fst.d $fs5, $sp, 440 # 8-byte Folded Spill + fst.d $fs6, $sp, 432 # 8-byte Folded Spill + fst.d $fs7, $sp, 424 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -2054,24 +2032,24 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S ld.d $s2, $s0, 216 ld.d $s0, $s0, 224 ld.w $a1, $a0, 0 - addi.d $a0, $sp, 304 + addi.d $a0, $sp, 336 ori $a2, $zero, 3 pcaddu18i $ra, %call36(_ZN7ADomainC2Eii) jirl $ra, $ra, 0 - ld.w $s4, $sp, 316 - ld.w $s5, $sp, 328 - ld.w $a0, $sp, 320 - st.d $a0, $sp, 184 # 8-byte Folded Spill - ld.w $a0, $sp, 332 - st.d $a0, $sp, 272 # 8-byte Folded Spill - ld.w $a0, $sp, 324 - st.d $a0, $sp, 152 # 8-byte Folded Spill - ld.w $a0, $sp, 336 + ld.w $s4, $sp, 348 + ld.w $s5, $sp, 360 + ld.w $a0, $sp, 352 st.d $a0, $sp, 192 # 8-byte Folded Spill + ld.w $a0, $sp, 364 + st.d $a0, $sp, 280 # 8-byte Folded Spill + ld.w $a0, $sp, 356 + st.d $a0, $sp, 160 # 8-byte Folded Spill + ld.w $a0, $sp, 368 + st.d $a0, $sp, 200 # 8-byte Folded Spill ld.w $s6, $s7, 28 ld.d $s8, $s7, 16 .Ltmp10: # EH_LABEL - st.d $s7, $sp, 104 # 8-byte Folded Spill + st.d $s7, $sp, 112 # 8-byte Folded Spill move $a0, $s7 pcaddu18i $ra, %call36(_ZN9benchmark5State16StartKeepRunningEv) jirl $ra, $ra, 0 @@ -2081,38 +2059,38 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S # %bb.2: # %_ZN9benchmark5State3endEv.exit.preheader beqz $s8, .LBB4_47 # %bb.3: # %.preheader.lr.ph - ld.d $a0, $sp, 152 # 8-byte Folded Reload - ld.d $a1, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 200 # 8-byte Folded Reload bge $a0, $a1, .LBB4_47 # %bb.4: # %.preheader.lr.ph.split.us - ld.d $a0, $sp, 184 # 8-byte Folded Reload - ld.d $a1, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a1, $sp, 280 # 8-byte Folded Reload bge $a0, $a1, .LBB4_47 # %bb.5: # %.preheader.lr.ph.split.us bge $s4, $s5, .LBB4_47 # %bb.6: # %.preheader.us.us.preheader addi.w $a0, $s5, 2 - ld.d $a3, $sp, 272 # 8-byte Folded Reload + ld.d $a3, $sp, 280 # 8-byte Folded Reload addi.w $a1, $a3, 2 addi.w $a2, $s5, 1 addi.w $a3, $a3, 1 sub.d $a4, $s5, $s4 - st.d $a4, $sp, 264 # 8-byte Folded Spill - ld.d $a6, $sp, 152 # 8-byte Folded Reload + st.d $a4, $sp, 272 # 8-byte Folded Spill + ld.d $a6, $sp, 160 # 8-byte Folded Reload mul.d $a4, $a6, $a3 - ld.d $a5, $sp, 184 # 8-byte Folded Reload + ld.d $a5, $sp, 192 # 8-byte Folded Reload add.d $a4, $a4, $a5 mul.d $a4, $a4, $a2 add.d $a4, $a4, $s4 slli.d $a4, $a4, 4 addi.d $a4, $a4, 8 add.d $a7, $s3, $a4 - st.d $a7, $sp, 144 # 8-byte Folded Spill + st.d $a7, $sp, 152 # 8-byte Folded Spill mul.d $a3, $a3, $a2 slli.d $a3, $a3, 4 - st.d $a3, $sp, 176 # 8-byte Folded Spill + st.d $a3, $sp, 184 # 8-byte Folded Spill slli.d $a2, $a2, 4 - st.d $a2, $sp, 256 # 8-byte Folded Spill + st.d $a2, $sp, 264 # 8-byte Folded Spill mul.d $a2, $a6, $a1 add.d $a2, $a2, $a5 mul.d $a2, $a2, $a0 @@ -2120,23 +2098,41 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S alsl.d $a2, $s4, $a2, 4 add.d $a2, $a2, $s2 addi.d $a2, $a2, 8 - st.d $a2, $sp, 136 # 8-byte Folded Spill + st.d $a2, $sp, 144 # 8-byte Folded Spill mul.d $a1, $a1, $a0 slli.d $a1, $a1, 4 - st.d $a1, $sp, 168 # 8-byte Folded Spill + st.d $a1, $sp, 176 # 8-byte Folded Spill slli.d $a0, $a0, 4 - st.d $a0, $sp, 248 # 8-byte Folded Spill + st.d $a0, $sp, 256 # 8-byte Folded Spill add.d $a0, $s1, $a4 - st.d $a0, $sp, 128 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill add.d $a0, $s0, $a4 - st.d $a0, $sp, 120 # 8-byte Folded Spill + st.d $a0, $sp, 128 # 8-byte Folded Spill add.d $a0, $fp, $a4 - st.d $a0, $sp, 112 # 8-byte Folded Spill + st.d $a0, $sp, 120 # 8-byte Folded Spill + lu12i.w $a0, -509018 + ori $a0, $a0, 2147 + lu32i.d $a0, -108803 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa0, $a0 + fst.d $fa0, $sp, 320 # 8-byte Folded Spill + lu12i.w $a0, -446540 + ori $a0, $a0, 3145 + lu32i.d $a0, 40327 + lu52i.d $a0, $a0, 910 + movgr2fr.d $fa0, $a0 + fst.d $fa0, $sp, 312 # 8-byte Folded Spill + lu12i.w $a0, 486539 + ori $a0, $a0, 1081 + lu32i.d $a0, -352322 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fa0, $a0 + fst.d $fa0, $sp, 304 # 8-byte Folded Spill b .LBB4_8 .p2align 4, , 16 .LBB4_7: # %"._Z6forallI9simd_execZL16BM_COUPLE_LAMBDARN9benchmark5StateEE3$_0EviiT0_.exit_crit_edge.split.us.us.us" # in Loop: Header=BB4_8 Depth=1 - ld.d $s8, $sp, 160 # 8-byte Folded Reload + ld.d $s8, $sp, 168 # 8-byte Folded Reload addi.d $s8, $s8, -1 beqz $s8, .LBB4_47 .LBB4_8: # %.preheader.us.us @@ -2144,72 +2140,72 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S # Child Loop BB4_10 Depth 2 # Child Loop BB4_12 Depth 3 # Child Loop BB4_13 Depth 4 - st.d $s8, $sp, 160 # 8-byte Folded Spill - ld.d $fp, $sp, 112 # 8-byte Folded Reload - ld.d $s4, $sp, 120 # 8-byte Folded Reload - ld.d $s6, $sp, 128 # 8-byte Folded Reload - ld.d $s8, $sp, 136 # 8-byte Folded Reload - ld.d $s1, $sp, 144 # 8-byte Folded Reload - ld.d $a3, $sp, 152 # 8-byte Folded Reload + st.d $s8, $sp, 168 # 8-byte Folded Spill + ld.d $fp, $sp, 120 # 8-byte Folded Reload + ld.d $s4, $sp, 128 # 8-byte Folded Reload + ld.d $s6, $sp, 136 # 8-byte Folded Reload + ld.d $s8, $sp, 144 # 8-byte Folded Reload + ld.d $s1, $sp, 152 # 8-byte Folded Reload + ld.d $a3, $sp, 160 # 8-byte Folded Reload b .LBB4_10 .p2align 4, , 16 .LBB4_9: # %.noexc26.loopexit.us.us.us # in Loop: Header=BB4_10 Depth=2 - ld.d $a3, $sp, 200 # 8-byte Folded Reload + ld.d $a3, $sp, 208 # 8-byte Folded Reload addi.d $a3, $a3, 1 - ld.d $a0, $sp, 176 # 8-byte Folded Reload - ld.d $s1, $sp, 208 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $s1, $sp, 216 # 8-byte Folded Reload add.d $s1, $s1, $a0 - ld.d $a1, $sp, 168 # 8-byte Folded Reload - ld.d $s8, $sp, 216 # 8-byte Folded Reload + ld.d $a1, $sp, 176 # 8-byte Folded Reload + ld.d $s8, $sp, 224 # 8-byte Folded Reload add.d $s8, $s8, $a1 - ld.d $s6, $sp, 224 # 8-byte Folded Reload + ld.d $s6, $sp, 232 # 8-byte Folded Reload add.d $s6, $s6, $a0 - ld.d $s4, $sp, 232 # 8-byte Folded Reload + ld.d $s4, $sp, 240 # 8-byte Folded Reload add.d $s4, $s4, $a0 - ld.d $fp, $sp, 240 # 8-byte Folded Reload + ld.d $fp, $sp, 248 # 8-byte Folded Reload add.d $fp, $fp, $a0 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload beq $a3, $a0, .LBB4_7 .LBB4_10: # %.lr.ph249.split.i.preheader.us.us.us # Parent Loop BB4_8 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB4_12 Depth 3 # Child Loop BB4_13 Depth 4 - st.d $a3, $sp, 200 # 8-byte Folded Spill - st.d $fp, $sp, 240 # 8-byte Folded Spill - st.d $s4, $sp, 232 # 8-byte Folded Spill - st.d $s6, $sp, 224 # 8-byte Folded Spill - st.d $s8, $sp, 216 # 8-byte Folded Spill - st.d $s1, $sp, 208 # 8-byte Folded Spill - ld.d $s7, $sp, 184 # 8-byte Folded Reload + st.d $a3, $sp, 208 # 8-byte Folded Spill + st.d $fp, $sp, 248 # 8-byte Folded Spill + st.d $s4, $sp, 240 # 8-byte Folded Spill + st.d $s6, $sp, 232 # 8-byte Folded Spill + st.d $s8, $sp, 224 # 8-byte Folded Spill + st.d $s1, $sp, 216 # 8-byte Folded Spill + ld.d $s7, $sp, 192 # 8-byte Folded Reload b .LBB4_12 .p2align 4, , 16 .LBB4_11: # %._crit_edge.i.loopexit.us.us.us # in Loop: Header=BB4_12 Depth=3 addi.w $s7, $s7, 1 - ld.d $a0, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload add.d $s1, $s1, $a0 - ld.d $a1, $sp, 248 # 8-byte Folded Reload + ld.d $a1, $sp, 256 # 8-byte Folded Reload add.d $s8, $s8, $a1 add.d $s6, $s6, $a0 - ld.d $s4, $sp, 280 # 8-byte Folded Reload + ld.d $s4, $sp, 288 # 8-byte Folded Reload add.d $s4, $s4, $a0 - ld.d $fp, $sp, 288 # 8-byte Folded Reload + ld.d $fp, $sp, 296 # 8-byte Folded Reload add.d $fp, $fp, $a0 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 280 # 8-byte Folded Reload beq $a0, $s7, .LBB4_9 .LBB4_12: # %.lr.ph249.split.i.us.us.us # Parent Loop BB4_8 Depth=1 # Parent Loop BB4_10 Depth=2 # => This Loop Header: Depth=3 # Child Loop BB4_13 Depth 4 - st.d $fp, $sp, 288 # 8-byte Folded Spill - st.d $s4, $sp, 280 # 8-byte Folded Spill + st.d $fp, $sp, 296 # 8-byte Folded Spill + st.d $s4, $sp, 288 # 8-byte Folded Spill move $s3, $s6 move $s0, $s8 move $s2, $s1 - ld.d $s5, $sp, 264 # 8-byte Folded Reload + ld.d $s5, $sp, 272 # 8-byte Folded Reload .p2align 4, , 16 .LBB4_13: # %.lr.ph.i.us.us.us # Parent Loop BB4_8 Depth=1 @@ -2217,32 +2213,29 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S # Parent Loop BB4_12 Depth=3 # => This Inner Loop Header: Depth=4 fld.d $fa0, $s0, -8 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_0) - fld.d $fa2, $s0, 0 - fld.d $fa3, $s4, -8 - fld.d $fa4, $s4, 0 - fmul.d $fs1, $fa0, $fa1 - fmul.d $fs2, $fa2, $fa1 - fmul.d $fs3, $fa3, $fa1 - fmul.d $fs6, $fa4, $fa1 + fld.d $fa1, $s0, 0 + fld.d $fa2, $s4, -8 + fld.d $fa3, $s4, 0 + fld.d $fa4, $sp, 320 # 8-byte Folded Reload + fmul.d $fs1, $fa0, $fa4 + fmul.d $fs2, $fa1, $fa4 + fmul.d $fs3, $fa2, $fa4 + fmul.d $fs6, $fa3, $fa4 fmul.d $fa0, $fs2, $fs2 fmadd.d $fa0, $fs1, $fs1, $fa0 fmadd.d $fa0, $fs3, $fs3, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_1) - pcalau12i $a0, %pc_hi20(.LCPI4_2) - fld.d $fa2, $a0, %pc_lo12(.LCPI4_2) fmadd.d $fa0, $fs6, $fs6, $fa0 + fld.d $fa1, $sp, 312 # 8-byte Folded Reload fadd.d $fs4, $fa0, $fa1 fsqrt.d $fa0, $fs4 - fmul.d $fa0, $fa0, $fa2 + fld.d $fa1, $sp, 304 # 8-byte Folded Reload + fmul.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fs0, $fa0, $fa1 fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fst.d $fa0, $sp, 296 # 8-byte Folded Spill + fst.d $fa0, $sp, 328 # 8-byte Folded Spill fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 @@ -2283,7 +2276,7 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S # in Loop: Header=BB4_13 Depth=4 fadd.d $fa2, $fs4, $fa2 fadd.d $fa1, $fs2, $fa1 - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmul.d $fa2, $ft1, $fa2 fmul.d $fa3, $ft1, $fa1 movgr2fr.d $fs2, $zero @@ -2411,35 +2404,35 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S fmov.d $fs2, $ft5 bcnez $fcc0, .LBB4_14 # %bb.26: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs1 fmov.d $fa1, $fs7 fmov.d $fa2, $ft6 fmov.d $fa3, $ft7 - fst.d $fa7, $sp, 32 # 8-byte Folded Spill + fst.d $fa7, $sp, 40 # 8-byte Folded Spill fmov.d $fs5, $ft0 - fst.d $ft2, $sp, 40 # 8-byte Folded Spill + fst.d $ft2, $sp, 48 # 8-byte Folded Spill fmov.d $fs2, $ft3 - fst.d $ft4, $sp, 80 # 8-byte Folded Spill + fst.d $ft4, $sp, 88 # 8-byte Folded Spill fmov.d $fs4, $ft5 - fst.d $ft6, $sp, 72 # 8-byte Folded Spill - fst.d $ft7, $sp, 64 # 8-byte Folded Spill + fst.d $ft6, $sp, 80 # 8-byte Folded Spill + fst.d $ft7, $sp, 72 # 8-byte Folded Spill movcf2gr $a0, $fcc1 - st.d $a0, $sp, 56 + st.d $a0, $sp, 64 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - ld.d $a0, $sp, 56 + ld.d $a0, $sp, 64 movgr2cf $fcc1, $a0 - fld.d $ft7, $sp, 64 # 8-byte Folded Reload - fld.d $ft6, $sp, 72 # 8-byte Folded Reload + fld.d $ft7, $sp, 72 # 8-byte Folded Reload + fld.d $ft6, $sp, 80 # 8-byte Folded Reload fmov.d $ft5, $fs4 - fld.d $ft4, $sp, 80 # 8-byte Folded Reload + fld.d $ft4, $sp, 88 # 8-byte Folded Reload fmov.d $ft3, $fs2 - fld.d $ft2, $sp, 40 # 8-byte Folded Reload + fld.d $ft2, $sp, 48 # 8-byte Folded Reload fmov.d $ft0, $fs5 - fld.d $fa7, $sp, 32 # 8-byte Folded Reload + fld.d $fa7, $sp, 40 # 8-byte Folded Reload fmov.d $fs4, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload fmov.d $fs2, $fa1 b .LBB4_14 .LBB4_27: # in Loop: Header=BB4_13 Depth=4 @@ -2448,39 +2441,39 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S fmov.d $fa1, $fs3 bcnez $fcc0, .LBB4_15 # %bb.28: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs5 fmov.d $fa1, $fs0 fmov.d $fa2, $fa7 fmov.d $fa3, $ft0 - fst.d $fa7, $sp, 32 # 8-byte Folded Spill - fst.d $ft0, $sp, 16 # 8-byte Folded Spill - fst.d $ft2, $sp, 40 # 8-byte Folded Spill - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft4, $sp, 80 # 8-byte Folded Spill - fst.d $ft5, $sp, 48 # 8-byte Folded Spill - fst.d $ft6, $sp, 72 # 8-byte Folded Spill - fst.d $ft7, $sp, 64 # 8-byte Folded Spill + fst.d $fa7, $sp, 40 # 8-byte Folded Spill + fst.d $ft0, $sp, 24 # 8-byte Folded Spill + fst.d $ft2, $sp, 48 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 88 # 8-byte Folded Spill + fst.d $ft5, $sp, 56 # 8-byte Folded Spill + fst.d $ft6, $sp, 80 # 8-byte Folded Spill + fst.d $ft7, $sp, 72 # 8-byte Folded Spill movcf2gr $a0, $fcc1 - st.d $a0, $sp, 56 + st.d $a0, $sp, 64 movcf2gr $a0, $fcc2 - st.d $a0, $sp, 24 + st.d $a0, $sp, 32 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - ld.d $a0, $sp, 24 + ld.d $a0, $sp, 32 movgr2cf $fcc2, $a0 - ld.d $a0, $sp, 56 + ld.d $a0, $sp, 64 movgr2cf $fcc1, $a0 - fld.d $ft7, $sp, 64 # 8-byte Folded Reload - fld.d $ft6, $sp, 72 # 8-byte Folded Reload - fld.d $ft5, $sp, 48 # 8-byte Folded Reload - fld.d $ft4, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload - fld.d $ft2, $sp, 40 # 8-byte Folded Reload - fld.d $ft0, $sp, 16 # 8-byte Folded Reload - fld.d $fa7, $sp, 32 # 8-byte Folded Reload + fld.d $ft7, $sp, 72 # 8-byte Folded Reload + fld.d $ft6, $sp, 80 # 8-byte Folded Reload + fld.d $ft5, $sp, 56 # 8-byte Folded Reload + fld.d $ft4, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 48 # 8-byte Folded Reload + fld.d $ft0, $sp, 24 # 8-byte Folded Reload + fld.d $fa7, $sp, 40 # 8-byte Folded Reload fmov.d $fa2, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload b .LBB4_15 .LBB4_29: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fa1, $fa1 @@ -2488,43 +2481,43 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S # %bb.30: # in Loop: Header=BB4_13 Depth=4 movgr2fr.d $fa4, $zero vldi $vr1, -912 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fa4 - fst.d $fa7, $sp, 32 # 8-byte Folded Spill + fst.d $fa7, $sp, 40 # 8-byte Folded Spill fmov.d $fs4, $ft0 - fst.d $ft2, $sp, 40 # 8-byte Folded Spill - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft4, $sp, 80 # 8-byte Folded Spill - fst.d $ft5, $sp, 48 # 8-byte Folded Spill - fst.d $ft6, $sp, 72 # 8-byte Folded Spill - fst.d $ft7, $sp, 64 # 8-byte Folded Spill + fst.d $ft2, $sp, 48 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 88 # 8-byte Folded Spill + fst.d $ft5, $sp, 56 # 8-byte Folded Spill + fst.d $ft6, $sp, 80 # 8-byte Folded Spill + fst.d $ft7, $sp, 72 # 8-byte Folded Spill movcf2gr $a0, $fcc1 - st.d $a0, $sp, 56 + st.d $a0, $sp, 64 movcf2gr $a0, $fcc2 - st.d $a0, $sp, 24 + st.d $a0, $sp, 32 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - ld.d $a0, $sp, 24 + ld.d $a0, $sp, 32 movgr2cf $fcc2, $a0 - ld.d $a0, $sp, 56 + ld.d $a0, $sp, 64 movgr2cf $fcc1, $a0 - fld.d $ft7, $sp, 64 # 8-byte Folded Reload - fld.d $ft6, $sp, 72 # 8-byte Folded Reload - fld.d $ft5, $sp, 48 # 8-byte Folded Reload - fld.d $ft4, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload - fld.d $ft2, $sp, 40 # 8-byte Folded Reload - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft7, $sp, 72 # 8-byte Folded Reload + fld.d $ft6, $sp, 80 # 8-byte Folded Reload + fld.d $ft5, $sp, 56 # 8-byte Folded Reload + fld.d $ft4, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 48 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs4 - fld.d $fa7, $sp, 32 # 8-byte Folded Reload + fld.d $fa7, $sp, 40 # 8-byte Folded Reload fmov.d $fa4, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload b .LBB4_16 .LBB4_31: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fs3, $fs3 bcnez $fcc0, .LBB4_17 # %bb.32: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs5 fmov.d $fa1, $fs0 fmov.d $fa2, $fa7 @@ -2532,35 +2525,35 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S fmov.d $fs4, $fa7 fmov.d $fs6, $ft0 fmov.d $fs3, $ft2 - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft4, $sp, 80 # 8-byte Folded Spill - fst.d $ft5, $sp, 48 # 8-byte Folded Spill - fst.d $ft6, $sp, 72 # 8-byte Folded Spill - fst.d $ft7, $sp, 64 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 88 # 8-byte Folded Spill + fst.d $ft5, $sp, 56 # 8-byte Folded Spill + fst.d $ft6, $sp, 80 # 8-byte Folded Spill + fst.d $ft7, $sp, 72 # 8-byte Folded Spill movcf2gr $a0, $fcc1 - st.d $a0, $sp, 56 + st.d $a0, $sp, 64 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - ld.d $a0, $sp, 56 + ld.d $a0, $sp, 64 movgr2cf $fcc1, $a0 - fld.d $ft7, $sp, 64 # 8-byte Folded Reload - fld.d $ft6, $sp, 72 # 8-byte Folded Reload - fld.d $ft5, $sp, 48 # 8-byte Folded Reload - fld.d $ft4, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload + fld.d $ft7, $sp, 72 # 8-byte Folded Reload + fld.d $ft6, $sp, 80 # 8-byte Folded Reload + fld.d $ft5, $sp, 56 # 8-byte Folded Reload + fld.d $ft4, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload fmov.d $ft2, $fs3 - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs6 fmov.d $fa7, $fs4 fmov.d $fs6, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload fmov.d $fs3, $fa1 b .LBB4_17 .LBB4_33: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fa6, $fa6 bcnez $fcc0, .LBB4_18 # %bb.34: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs1 fmov.d $fa1, $fa4 fmov.d $fa2, $fs6 @@ -2568,68 +2561,68 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S fmov.d $fs3, $fa7 fmov.d $fs4, $ft0 fmov.d $fs6, $ft2 - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft4, $sp, 80 # 8-byte Folded Spill - fst.d $ft5, $sp, 48 # 8-byte Folded Spill - fst.d $ft6, $sp, 72 # 8-byte Folded Spill - fst.d $ft7, $sp, 64 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 88 # 8-byte Folded Spill + fst.d $ft5, $sp, 56 # 8-byte Folded Spill + fst.d $ft6, $sp, 80 # 8-byte Folded Spill + fst.d $ft7, $sp, 72 # 8-byte Folded Spill movcf2gr $a0, $fcc1 - st.d $a0, $sp, 56 - fst.d $fa4, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 64 + fst.d $fa4, $sp, 48 # 8-byte Folded Spill pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - fld.d $fa4, $sp, 40 # 8-byte Folded Reload - ld.d $a0, $sp, 56 + fld.d $fa4, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 64 movgr2cf $fcc1, $a0 - fld.d $ft7, $sp, 64 # 8-byte Folded Reload - fld.d $ft6, $sp, 72 # 8-byte Folded Reload - fld.d $ft5, $sp, 48 # 8-byte Folded Reload - fld.d $ft4, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload + fld.d $ft7, $sp, 72 # 8-byte Folded Reload + fld.d $ft6, $sp, 80 # 8-byte Folded Reload + fld.d $ft5, $sp, 56 # 8-byte Folded Reload + fld.d $ft4, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload fmov.d $ft2, $fs6 - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs4 fmov.d $fa7, $fs3 fmov.d $fs4, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload fmov.d $fa6, $fa1 b .LBB4_18 .LBB4_35: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fa1, $fa1 bcnez $fcc0, .LBB4_19 # %bb.36: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs1 fmov.d $fa1, $fa4 fmov.d $fa2, $ft2 fmov.d $fa3, $ft3 fmov.d $fs6, $fa7 fmov.d $fs3, $ft0 - fst.d $ft2, $sp, 40 # 8-byte Folded Spill - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft4, $sp, 80 # 8-byte Folded Spill - fst.d $ft5, $sp, 48 # 8-byte Folded Spill - fst.d $ft6, $sp, 72 # 8-byte Folded Spill - fst.d $ft7, $sp, 64 # 8-byte Folded Spill + fst.d $ft2, $sp, 48 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 88 # 8-byte Folded Spill + fst.d $ft5, $sp, 56 # 8-byte Folded Spill + fst.d $ft6, $sp, 80 # 8-byte Folded Spill + fst.d $ft7, $sp, 72 # 8-byte Folded Spill movcf2gr $a0, $fcc1 - st.d $a0, $sp, 56 - fst.d $fa6, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 64 + fst.d $fa6, $sp, 40 # 8-byte Folded Spill pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - fld.d $fa6, $sp, 32 # 8-byte Folded Reload - ld.d $a0, $sp, 56 + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 64 movgr2cf $fcc1, $a0 - fld.d $ft7, $sp, 64 # 8-byte Folded Reload - fld.d $ft6, $sp, 72 # 8-byte Folded Reload - fld.d $ft5, $sp, 48 # 8-byte Folded Reload - fld.d $ft4, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload - fld.d $ft2, $sp, 40 # 8-byte Folded Reload - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft7, $sp, 72 # 8-byte Folded Reload + fld.d $ft6, $sp, 80 # 8-byte Folded Reload + fld.d $ft5, $sp, 56 # 8-byte Folded Reload + fld.d $ft4, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 48 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs3 fmov.d $fa7, $fs6 fmov.d $fa2, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload b .LBB4_19 .LBB4_37: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fa1, $fa1 @@ -2637,41 +2630,41 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S # %bb.38: # in Loop: Header=BB4_13 Depth=4 movgr2fr.d $fa4, $zero vldi $vr1, -912 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fa4 fmov.d $fs6, $fa7 fmov.d $fs3, $ft0 - fst.d $ft2, $sp, 40 # 8-byte Folded Spill - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft4, $sp, 80 # 8-byte Folded Spill - fst.d $ft5, $sp, 48 # 8-byte Folded Spill - fst.d $ft6, $sp, 72 # 8-byte Folded Spill - fst.d $ft7, $sp, 64 # 8-byte Folded Spill + fst.d $ft2, $sp, 48 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 88 # 8-byte Folded Spill + fst.d $ft5, $sp, 56 # 8-byte Folded Spill + fst.d $ft6, $sp, 80 # 8-byte Folded Spill + fst.d $ft7, $sp, 72 # 8-byte Folded Spill movcf2gr $a0, $fcc1 - st.d $a0, $sp, 56 - fst.d $fa6, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 64 + fst.d $fa6, $sp, 40 # 8-byte Folded Spill pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - fld.d $fa6, $sp, 32 # 8-byte Folded Reload - ld.d $a0, $sp, 56 + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 64 movgr2cf $fcc1, $a0 - fld.d $ft7, $sp, 64 # 8-byte Folded Reload - fld.d $ft6, $sp, 72 # 8-byte Folded Reload - fld.d $ft5, $sp, 48 # 8-byte Folded Reload - fld.d $ft4, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload - fld.d $ft2, $sp, 40 # 8-byte Folded Reload - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft7, $sp, 72 # 8-byte Folded Reload + fld.d $ft6, $sp, 80 # 8-byte Folded Reload + fld.d $ft5, $sp, 56 # 8-byte Folded Reload + fld.d $ft4, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 48 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs3 fmov.d $fa7, $fs6 fmov.d $fa4, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload b .LBB4_20 .LBB4_39: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $ft5, $ft5 bcnez $fcc0, .LBB4_21 # %bb.40: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs1 fmov.d $fa1, $fs7 fmov.d $fa2, $ft6 @@ -2679,51 +2672,51 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S fmov.d $fs1, $fa7 fmov.d $fs7, $ft0 fmov.d $fs4, $ft2 - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft8, $sp, 80 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft8, $sp, 88 # 8-byte Folded Spill pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - fld.d $ft8, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload + fld.d $ft8, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload fmov.d $ft2, $fs4 - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs7 fmov.d $fa7, $fs1 fmov.d $ft4, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload fmov.d $ft5, $fa1 b .LBB4_21 .LBB4_41: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fs7, $fs7 bcnez $fcc0, .LBB4_22 # %bb.42: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs5 fmov.d $fa1, $fs4 fmov.d $fa2, $ft4 fmov.d $fa3, $ft5 fmov.d $fs1, $fa7 fmov.d $fs7, $ft0 - fst.d $ft2, $sp, 40 # 8-byte Folded Spill - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft8, $sp, 80 # 8-byte Folded Spill + fst.d $ft2, $sp, 48 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft8, $sp, 88 # 8-byte Folded Spill pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - fld.d $ft8, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload - fld.d $ft2, $sp, 40 # 8-byte Folded Reload - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft8, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 48 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs7 fmov.d $fa7, $fs1 fmov.d $fs1, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload fmov.d $fs7, $fa1 b .LBB4_22 .LBB4_43: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fa1, $fa1 bcnez $fcc0, .LBB4_23 # %bb.44: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs5 fmov.d $fa1, $fs4 fmov.d $fa2, $ft2 @@ -2734,11 +2727,11 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 fmov.d $ft8, $fs0 - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs5 fmov.d $fa7, $fs4 fmov.d $fa2, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload b .LBB4_23 .LBB4_45: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fa1, $fa1 @@ -2761,41 +2754,41 @@ _ZL16BM_COUPLE_LAMBDARN9benchmark5StateE: # @_ZL16BM_COUPLE_LAMBDARN9benchmark5S b .LBB4_24 .LBB4_47: # %_ZN9benchmark5State3endEv.exit._crit_edge .Ltmp12: # EH_LABEL - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZN9benchmark5State17FinishKeepRunningEv) jirl $ra, $ra, 0 .Ltmp13: # EH_LABEL # %bb.48: # %_ZNK9benchmark5State13StateIteratorneERKS1_.exit - ld.d $a0, $sp, 376 + ld.d $a0, $sp, 408 beqz $a0, .LBB4_50 # %bb.49: pcaddu18i $ra, %call36(_ZdaPv) jirl $ra, $ra, 0 .LBB4_50: # %_ZN7ADomainD2Ev.exit - fld.d $fs7, $sp, 392 # 8-byte Folded Reload - fld.d $fs6, $sp, 400 # 8-byte Folded Reload - fld.d $fs5, $sp, 408 # 8-byte Folded Reload - fld.d $fs4, $sp, 416 # 8-byte Folded Reload - fld.d $fs3, $sp, 424 # 8-byte Folded Reload - fld.d $fs2, $sp, 432 # 8-byte Folded Reload - fld.d $fs1, $sp, 440 # 8-byte Folded Reload - fld.d $fs0, $sp, 448 # 8-byte Folded Reload - ld.d $s8, $sp, 456 # 8-byte Folded Reload - ld.d $s7, $sp, 464 # 8-byte Folded Reload - ld.d $s6, $sp, 472 # 8-byte Folded Reload - ld.d $s5, $sp, 480 # 8-byte Folded Reload - ld.d $s4, $sp, 488 # 8-byte Folded Reload - ld.d $s3, $sp, 496 # 8-byte Folded Reload - ld.d $s2, $sp, 504 # 8-byte Folded Reload - ld.d $s1, $sp, 512 # 8-byte Folded Reload - ld.d $s0, $sp, 520 # 8-byte Folded Reload - ld.d $fp, $sp, 528 # 8-byte Folded Reload - ld.d $ra, $sp, 536 # 8-byte Folded Reload - addi.d $sp, $sp, 544 + fld.d $fs7, $sp, 424 # 8-byte Folded Reload + fld.d $fs6, $sp, 432 # 8-byte Folded Reload + fld.d $fs5, $sp, 440 # 8-byte Folded Reload + fld.d $fs4, $sp, 448 # 8-byte Folded Reload + fld.d $fs3, $sp, 456 # 8-byte Folded Reload + fld.d $fs2, $sp, 464 # 8-byte Folded Reload + fld.d $fs1, $sp, 472 # 8-byte Folded Reload + fld.d $fs0, $sp, 480 # 8-byte Folded Reload + ld.d $s8, $sp, 488 # 8-byte Folded Reload + ld.d $s7, $sp, 496 # 8-byte Folded Reload + ld.d $s6, $sp, 504 # 8-byte Folded Reload + ld.d $s5, $sp, 512 # 8-byte Folded Reload + ld.d $s4, $sp, 520 # 8-byte Folded Reload + ld.d $s3, $sp, 528 # 8-byte Folded Reload + ld.d $s2, $sp, 536 # 8-byte Folded Reload + ld.d $s1, $sp, 544 # 8-byte Folded Reload + ld.d $s0, $sp, 552 # 8-byte Folded Reload + ld.d $fp, $sp, 560 # 8-byte Folded Reload + ld.d $ra, $sp, 568 # 8-byte Folded Reload + addi.d $sp, $sp, 576 ret .LBB4_51: .Ltmp14: # EH_LABEL - ld.d $a1, $sp, 376 + ld.d $a1, $sp, 408 move $fp, $a0 beqz $a1, .LBB4_53 # %bb.52: @@ -3033,15 +3026,9 @@ _ZL13BM_FIR_LAMBDARN9benchmark5StateE: # @_ZL13BM_FIR_LAMBDARN9benchmark5StateE .size _ZL13BM_FIR_LAMBDARN9benchmark5StateE, .Lfunc_end5-_ZL13BM_FIR_LAMBDARN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN7ADomainC2Eii -.LCPI6_0: - .dword 0x4050000000000000 # double 64 -.LCPI6_1: - .dword 0x4063800000000000 # double 156 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI6_2: + .p2align 4, 0x0 # -- Begin function _ZN7ADomainC2Eii +.LCPI6_0: .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 @@ -3082,79 +3069,80 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii ori $a2, $zero, 2 lu32i.d $a2, 1 st.d $a2, $fp, 4 - beq $a1, $a0, .LBB6_9 + beq $a1, $a0, .LBB6_10 # %bb.1: ori $a0, $zero, 1 beq $a1, $a0, .LBB6_6 # %bb.2: # implicit-def: $r29 - bnez $a1, .LBB6_17 + bnez $a1, .LBB6_18 # %bb.3: ori $a0, $zero, 3 - beq $s1, $a0, .LBB6_14 + beq $s1, $a0, .LBB6_15 # %bb.4: ori $a0, $zero, 2 # implicit-def: $r29 - bne $s1, $a0, .LBB6_17 + bne $s1, $a0, .LBB6_18 # %bb.5: pcalau12i $a0, %got_pc_hi20(_ZN7ADomain18loop_length_factorE) ld.d $a0, $a0, %got_pc_lo12(_ZN7ADomain18loop_length_factorE) fld.d $fa0, $a0, 0 - pcalau12i $a0, %pc_hi20(.LCPI6_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI6_1) - fmul.d $fa0, $fa0, $fa1 - b .LBB6_16 + ori $a0, $zero, 0 + lu32i.d $a0, 229376 + lu52i.d $a0, $a0, 1030 + b .LBB6_9 .LBB6_6: ori $a0, $zero, 3 - beq $s1, $a0, .LBB6_12 + beq $s1, $a0, .LBB6_13 # %bb.7: ori $a0, $zero, 2 # implicit-def: $r29 - bne $s1, $a0, .LBB6_17 + bne $s1, $a0, .LBB6_18 # %bb.8: pcalau12i $a0, %got_pc_hi20(_ZN7ADomain18loop_length_factorE) ld.d $a0, $a0, %got_pc_lo12(_ZN7ADomain18loop_length_factorE) fld.d $fa0, $a0, 0 - pcalau12i $a0, %pc_hi20(.LCPI6_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI6_0) - fmul.d $fa0, $fa0, $fa1 - b .LBB6_16 + lu52i.d $a0, $zero, 1029 .LBB6_9: + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + b .LBB6_17 +.LBB6_10: ori $a0, $zero, 3 - beq $s1, $a0, .LBB6_13 -# %bb.10: + beq $s1, $a0, .LBB6_14 +# %bb.11: ori $a0, $zero, 2 # implicit-def: $r29 - bne $s1, $a0, .LBB6_17 -# %bb.11: + bne $s1, $a0, .LBB6_18 +# %bb.12: pcalau12i $a0, %got_pc_hi20(_ZN7ADomain18loop_length_factorE) ld.d $a0, $a0, %got_pc_lo12(_ZN7ADomain18loop_length_factorE) fld.d $fa0, $a0, 0 vldi $vr1, -992 - b .LBB6_15 -.LBB6_12: + b .LBB6_16 +.LBB6_13: pcalau12i $a0, %got_pc_hi20(_ZN7ADomain18loop_length_factorE) ld.d $a0, $a0, %got_pc_lo12(_ZN7ADomain18loop_length_factorE) fld.d $fa0, $a0, 0 vldi $vr1, -976 - b .LBB6_15 -.LBB6_13: + b .LBB6_16 +.LBB6_14: pcalau12i $a0, %got_pc_hi20(_ZN7ADomain18loop_length_factorE) ld.d $a0, $a0, %got_pc_lo12(_ZN7ADomain18loop_length_factorE) fld.d $fa0, $a0, 0 vldi $vr1, -1008 - b .LBB6_15 -.LBB6_14: + b .LBB6_16 +.LBB6_15: pcalau12i $a0, %got_pc_hi20(_ZN7ADomain18loop_length_factorE) ld.d $a0, $a0, %got_pc_lo12(_ZN7ADomain18loop_length_factorE) fld.d $fa0, $a0, 0 vldi $vr1, -964 -.LBB6_15: - fmul.d $fa0, $fa0, $fa1 .LBB6_16: + fmul.d $fa0, $fa0, $fa1 +.LBB6_17: ftintrz.w.d $fa0, $fa0 movfr2gr.s $s6, $fa0 -.LBB6_17: +.LBB6_18: ori $a0, $zero, 2 ori $a1, $zero, 2 lu32i.d $a1, 2 @@ -3164,31 +3152,31 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii st.w $s3, $fp, 28 addi.w $s4, $s6, 3 st.w $s4, $fp, 36 - bne $s1, $a0, .LBB6_19 -# %bb.18: + bne $s1, $a0, .LBB6_20 +# %bb.19: move $s5, $zero st.w $zero, $fp, 20 st.w $zero, $fp, 32 st.w $zero, $fp, 40 mul.w $s7, $s4, $s4 - b .LBB6_21 -.LBB6_19: + b .LBB6_22 +.LBB6_20: ori $a0, $zero, 3 - bne $s1, $a0, .LBB6_22 -# %bb.20: + bne $s1, $a0, .LBB6_23 +# %bb.21: ori $a0, $zero, 2 st.w $a0, $fp, 20 st.w $s3, $fp, 32 mul.d $s5, $s4, $s4 st.w $s5, $fp, 40 mul.w $s7, $s5, $s4 -.LBB6_21: +.LBB6_22: st.w $s7, $fp, 44 - b .LBB6_23 -.LBB6_22: # %._crit_edge89 + b .LBB6_24 +.LBB6_23: # %._crit_edge89 ld.w $s7, $fp, 44 ld.w $s5, $fp, 40 -.LBB6_23: +.LBB6_24: st.w $zero, $fp, 48 addi.d $a0, $s7, -1 st.w $a0, $fp, 52 @@ -3212,21 +3200,21 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii jirl $ra, $ra, 0 move $s0, $a0 st.d $a0, $fp, 72 - blez $s7, .LBB6_25 -# %bb.24: # %.lr.ph.preheader + blez $s7, .LBB6_26 +# %bb.25: # %.lr.ph.preheader ori $a1, $zero, 255 move $a0, $s0 move $a2, $s2 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 -.LBB6_25: # %._crit_edge +.LBB6_26: # %._crit_edge ori $a0, $zero, 2 st.w $zero, $fp, 80 - bne $s1, $a0, .LBB6_36 -# %bb.26: # %.preheader + bne $s1, $a0, .LBB6_37 +# %bb.27: # %.preheader addi.w $a1, $s6, 0 - blt $a1, $a0, .LBB6_51 -# %bb.27: # %.lr.ph72.us.preheader + blt $a1, $a0, .LBB6_52 +# %bb.28: # %.lr.ph72.us.preheader move $a0, $zero addi.d $a4, $s6, -1 bstrpick.d $a2, $a4, 31, 0 @@ -3237,32 +3225,32 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii ori $a4, $zero, 2 bstrins.d $a4, $a5, 31, 3 addi.d $a5, $s0, 16 - pcalau12i $a6, %pc_hi20(.LCPI6_2) - vld $vr0, $a6, %pc_lo12(.LCPI6_2) + pcalau12i $a6, %pc_hi20(.LCPI6_0) + vld $vr0, $a6, %pc_lo12(.LCPI6_0) ori $a6, $zero, 6 alsl.w $a6, $s6, $a6, 1 ori $a7, $zero, 9 - b .LBB6_29 + b .LBB6_30 .p2align 4, , 16 -.LBB6_28: # %._crit_edge73.us - # in Loop: Header=BB6_29 Depth=1 +.LBB6_29: # %._crit_edge73.us + # in Loop: Header=BB6_30 Depth=1 addi.w $t1, $t0, 1 add.d $a6, $a6, $s4 - beq $t0, $a1, .LBB6_50 -.LBB6_29: # %.lr.ph72.us + beq $t0, $a1, .LBB6_51 +.LBB6_30: # %.lr.ph72.us # =>This Loop Header: Depth=1 - # Child Loop BB6_32 Depth 2 - # Child Loop BB6_35 Depth 2 + # Child Loop BB6_33 Depth 2 + # Child Loop BB6_36 Depth 2 move $t0, $t1 addi.w $t1, $a0, 0 - bgeu $a1, $a7, .LBB6_31 -# %bb.30: # in Loop: Header=BB6_29 Depth=1 + bgeu $a1, $a7, .LBB6_32 +# %bb.31: # in Loop: Header=BB6_30 Depth=1 move $a0, $t1 ori $t3, $zero, 2 - b .LBB6_34 + b .LBB6_35 .p2align 4, , 16 -.LBB6_31: # %vector.ph106 - # in Loop: Header=BB6_29 Depth=1 +.LBB6_32: # %vector.ph106 + # in Loop: Header=BB6_30 Depth=1 mul.d $t2, $s4, $t0 add.d $a0, $t1, $a3 vreplgr2vr.w $vr1, $t2 @@ -3271,8 +3259,8 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii move $t2, $a3 vori.b $vr3, $vr0, 0 .p2align 4, , 16 -.LBB6_32: # %vector.body112 - # Parent Loop BB6_29 Depth=1 +.LBB6_33: # %vector.body112 + # Parent Loop BB6_30 Depth=1 # => This Inner Loop Header: Depth=2 vadd.w $vr4, $vr1, $vr3 vadd.w $vr5, $vr3, $vr2 @@ -3281,39 +3269,39 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii vaddi.wu $vr3, $vr3, 8 addi.d $t2, $t2, -8 addi.d $t1, $t1, 32 - bnez $t2, .LBB6_32 -# %bb.33: # %middle.block119 - # in Loop: Header=BB6_29 Depth=1 + bnez $t2, .LBB6_33 +# %bb.34: # %middle.block119 + # in Loop: Header=BB6_30 Depth=1 move $t3, $a4 - beq $a3, $a2, .LBB6_28 -.LBB6_34: # %scalar.ph104.preheader - # in Loop: Header=BB6_29 Depth=1 + beq $a3, $a2, .LBB6_29 +.LBB6_35: # %scalar.ph104.preheader + # in Loop: Header=BB6_30 Depth=1 alsl.d $t1, $a0, $s0, 2 sub.d $t2, $s3, $t3 add.d $t3, $t3, $a6 .p2align 4, , 16 -.LBB6_35: # %scalar.ph104 - # Parent Loop BB6_29 Depth=1 +.LBB6_36: # %scalar.ph104 + # Parent Loop BB6_30 Depth=1 # => This Inner Loop Header: Depth=2 st.w $t3, $t1, 0 addi.d $a0, $a0, 1 addi.d $t1, $t1, 4 addi.w $t2, $t2, -1 addi.d $t3, $t3, 1 - bnez $t2, .LBB6_35 - b .LBB6_28 -.LBB6_36: + bnez $t2, .LBB6_36 + b .LBB6_29 +.LBB6_37: ori $a0, $zero, 3 - bne $s1, $a0, .LBB6_51 -# %bb.37: + bne $s1, $a0, .LBB6_52 +# %bb.38: ld.w $a1, $fp, 20 ld.w $a2, $fp, 32 - bge $a1, $a2, .LBB6_51 -# %bb.38: + bge $a1, $a2, .LBB6_52 +# %bb.39: addi.w $a3, $s6, 0 ori $a4, $zero, 2 - blt $a3, $a4, .LBB6_51 -# %bb.39: # %.lr.ph57.us.us.preheader + blt $a3, $a4, .LBB6_52 +# %bb.40: # %.lr.ph57.us.us.preheader move $a0, $zero addi.d $a7, $s6, -1 bstrpick.d $a5, $a7, 31, 0 @@ -3326,56 +3314,56 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii alsl.d $t0, $s6, $t0, 1 addi.d $t0, $t0, 6 ori $t1, $zero, 9 - b .LBB6_41 + b .LBB6_42 .p2align 4, , 16 -.LBB6_40: # %._crit_edge58.split.us.us.us - # in Loop: Header=BB6_41 Depth=1 +.LBB6_41: # %._crit_edge58.split.us.us.us + # in Loop: Header=BB6_42 Depth=1 addi.w $a1, $a1, 1 add.d $t0, $t0, $s5 - beq $a1, $a2, .LBB6_50 -.LBB6_41: # %.lr.ph57.us.us + beq $a1, $a2, .LBB6_51 +.LBB6_42: # %.lr.ph57.us.us # =>This Loop Header: Depth=1 - # Child Loop BB6_43 Depth 2 - # Child Loop BB6_46 Depth 3 - # Child Loop BB6_49 Depth 3 + # Child Loop BB6_44 Depth 2 + # Child Loop BB6_47 Depth 3 + # Child Loop BB6_50 Depth 3 mul.d $t2, $s5, $a1 ori $t5, $zero, 2 move $t3, $t0 - b .LBB6_43 + b .LBB6_44 .p2align 4, , 16 -.LBB6_42: # %._crit_edge50.us.us.us - # in Loop: Header=BB6_43 Depth=2 +.LBB6_43: # %._crit_edge50.us.us.us + # in Loop: Header=BB6_44 Depth=2 addi.w $t5, $t4, 1 add.d $t3, $t3, $s4 - beq $t4, $a3, .LBB6_40 -.LBB6_43: # %.lr.ph49.us.us.us - # Parent Loop BB6_41 Depth=1 + beq $t4, $a3, .LBB6_41 +.LBB6_44: # %.lr.ph49.us.us.us + # Parent Loop BB6_42 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB6_46 Depth 3 - # Child Loop BB6_49 Depth 3 + # Child Loop BB6_47 Depth 3 + # Child Loop BB6_50 Depth 3 move $t4, $t5 addi.w $t5, $a0, 0 - bgeu $a3, $t1, .LBB6_45 -# %bb.44: # in Loop: Header=BB6_43 Depth=2 + bgeu $a3, $t1, .LBB6_46 +# %bb.45: # in Loop: Header=BB6_44 Depth=2 move $a0, $t5 ori $t7, $zero, 2 - b .LBB6_48 + b .LBB6_49 .p2align 4, , 16 -.LBB6_45: # %vector.ph - # in Loop: Header=BB6_43 Depth=2 +.LBB6_46: # %vector.ph + # in Loop: Header=BB6_44 Depth=2 mul.d $a0, $s4, $t4 add.d $t6, $a0, $t2 add.d $a0, $t5, $a6 - pcalau12i $t7, %pc_hi20(.LCPI6_2) - vld $vr0, $t7, %pc_lo12(.LCPI6_2) + pcalau12i $t7, %pc_hi20(.LCPI6_0) + vld $vr0, $t7, %pc_lo12(.LCPI6_0) vreplgr2vr.w $vr1, $t6 vaddi.wu $vr2, $vr1, 4 alsl.d $t5, $t5, $a7, 2 move $t6, $a6 .p2align 4, , 16 -.LBB6_46: # %vector.body - # Parent Loop BB6_41 Depth=1 - # Parent Loop BB6_43 Depth=2 +.LBB6_47: # %vector.body + # Parent Loop BB6_42 Depth=1 + # Parent Loop BB6_44 Depth=2 # => This Inner Loop Header: Depth=3 vadd.w $vr3, $vr0, $vr1 vadd.w $vr4, $vr0, $vr2 @@ -3384,31 +3372,31 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii vaddi.wu $vr0, $vr0, 8 addi.d $t6, $t6, -8 addi.d $t5, $t5, 32 - bnez $t6, .LBB6_46 -# %bb.47: # %middle.block - # in Loop: Header=BB6_43 Depth=2 + bnez $t6, .LBB6_47 +# %bb.48: # %middle.block + # in Loop: Header=BB6_44 Depth=2 move $t7, $a4 - beq $a6, $a5, .LBB6_42 -.LBB6_48: # %scalar.ph.preheader - # in Loop: Header=BB6_43 Depth=2 + beq $a6, $a5, .LBB6_43 +.LBB6_49: # %scalar.ph.preheader + # in Loop: Header=BB6_44 Depth=2 alsl.d $t5, $a0, $s0, 2 sub.d $t6, $s3, $t7 add.d $t7, $t7, $t3 .p2align 4, , 16 -.LBB6_49: # %scalar.ph - # Parent Loop BB6_41 Depth=1 - # Parent Loop BB6_43 Depth=2 +.LBB6_50: # %scalar.ph + # Parent Loop BB6_42 Depth=1 + # Parent Loop BB6_44 Depth=2 # => This Inner Loop Header: Depth=3 st.w $t7, $t5, 0 addi.d $a0, $a0, 1 addi.d $t5, $t5, 4 addi.w $t6, $t6, -1 addi.d $t7, $t7, 1 - bnez $t6, .LBB6_49 - b .LBB6_42 -.LBB6_50: # %.loopexit.sink.split + bnez $t6, .LBB6_50 + b .LBB6_43 +.LBB6_51: # %.loopexit.sink.split st.w $a0, $fp, 80 -.LBB6_51: # %.loopexit +.LBB6_52: # %.loopexit ld.d $s7, $sp, 16 # 8-byte Folded Reload ld.d $s6, $sp, 24 # 8-byte Folded Reload ld.d $s5, $sp, 32 # 8-byte Folded Reload diff --git a/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSStats.s b/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSStats.s index 65ccd92d..95272aa5 100644 --- a/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSStats.s +++ b/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSStats.s @@ -868,12 +868,7 @@ _Z19getLoopSuiteRunInfov: # @_Z19getLoopSuiteRunInfov .Lfunc_end1: .size _Z19getLoopSuiteRunInfov, .Lfunc_end1-_Z19getLoopSuiteRunInfov # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm -.LCPI2_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 - .text - .globl _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm + .globl _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm # -- Begin function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm .p2align 5 .type _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm,@function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm: # @_Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm @@ -1018,10 +1013,13 @@ _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcE st.d $a0, $s4, 560 beqz $a1, .LBB2_15 # %bb.13: # %.lr.ph18.preheader - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI2_0) move $fp, $zero move $s0, $zero + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_14: # %.lr.ph18 # =>This Inner Loop Header: Depth=1 diff --git a/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSSuite.s index a1e8c653..e35fa2d7 100644 --- a/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/LCALSSuite.s @@ -23,33 +23,21 @@ _Z11getLoopDatav: # @_Z11getLoopDatav .LCPI1_1: .dword 0x3ff6666666666666 # double 1.3999999999999999 .dword 0x3ff0000000000000 # double 1 -.LCPI1_5: +.LCPI1_2: .dword 8 # 0x8 .dword 4923084613239392580 # 0x44524f5f43534944 -.LCPI1_6: +.LCPI1_3: .dword 8 # 0x8 .dword 4914094937701898568 # 0x44325f4f52445948 -.LCPI1_7: +.LCPI1_4: .dword 8 # 0x8 .dword 4913813462725187912 # 0x44315f4f52445948 -.LCPI1_8: +.LCPI1_5: .dword 8 # 0x8 .dword 6074873621086556756 # 0x544e495f50415254 -.LCPI1_11: +.LCPI1_6: .dword 8 # 0x8 .dword 5786931235628926290 # 0x504f4f4c5f464552 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI1_2: - .dword 0x40e5972000000000 # double 44217 -.LCPI1_3: - .dword 0x40b3890000000000 # double 5001 -.LCPI1_4: - .dword 0x4065600000000000 # double 171 -.LCPI1_9: - .dword 0x4063800000000000 # double 156 -.LCPI1_10: - .dword 0x4050000000000000 # double 64 .text .globl _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd .p2align 5 @@ -640,21 +628,27 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define .Ltmp34: # EH_LABEL # %bb.92: move $s8, $a0 - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_2) - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_3) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 366368 + lu52i.d $a1, $a1, 1038 + movgr2fr.d $fa0, $a1 fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a0, $fa0 - fmul.d $fa0, $fs0, $fa1 + movfr2gr.s $a1, $fa0 + st.w $a1, $s8, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 231680 + lu52i.d $a1, $a1, 1035 + movgr2fr.d $fa0, $a1 + fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI1_4) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_4) movfr2gr.s $a1, $fa0 - st.w $a0, $s8, 0 st.w $a1, $s8, 4 - fmul.d $fa0, $fs0, $fa1 + lu32i.d $a0, 352256 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa0, $a0 + fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 ld.d $s7, $sp, 96 # 8-byte Folded Reload ld.w $a0, $s7, 32 @@ -732,8 +726,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define jr $a0 .LBB1_99: # %._crit_edge.i.i352 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_11) - vld $vr0, $a0, %pc_lo12(.LCPI1_11) + pcalau12i $a0, %pc_hi20(.LCPI1_6) + vld $vr0, $a0, %pc_lo12(.LCPI1_6) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -936,8 +930,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_133 .LBB1_112: # %._crit_edge.i.i732 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_8) - vld $vr0, $a0, %pc_lo12(.LCPI1_8) + pcalau12i $a0, %pc_hi20(.LCPI1_5) + vld $vr0, $a0, %pc_lo12(.LCPI1_5) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -1687,8 +1681,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_158: # %._crit_edge.i.i748 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_7) - vld $vr0, $a0, %pc_lo12(.LCPI1_7) + pcalau12i $a0, %pc_hi20(.LCPI1_4) + vld $vr0, $a0, %pc_lo12(.LCPI1_4) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -1761,16 +1755,18 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define # in Loop: Header=BB1_95 Depth=1 ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.d $a0, $a0, 0 - ld.d $a1, $sp, 8 # 8-byte Folded Reload - fld.d $fa0, $a1, %pc_lo12(_ZN7ADomain18loop_length_factorE) - pcalau12i $a1, %pc_hi20(.LCPI1_9) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_9) - fld.d $fa2, $a0, 0 - fmul.d $fa1, $fa0, $fa1 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + fld.d $fa1, $a0, 0 + ld.d $a0, $sp, 8 # 8-byte Folded Reload + fld.d $fa0, $a0, %pc_lo12(_ZN7ADomain18loop_length_factorE) + ori $a0, $zero, 0 + lu32i.d $a0, 229376 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa2, $a0 + fmul.d $fa2, $fa0, $fa2 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $a0, $fa2 ori $a2, $zero, 2 - fst.d $fa2, $sp, 168 + fst.d $fa1, $sp, 168 blt $a0, $a2, .LBB1_211 # %bb.164: # %.lr.ph72.us.i.preheader # in Loop: Header=BB1_95 Depth=1 @@ -2202,8 +2198,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_196: # %._crit_edge.i.i988 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_5) - vld $vr0, $a0, %pc_lo12(.LCPI1_5) + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -2297,8 +2293,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_202: # %._crit_edge.i.i956 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_6) - vld $vr0, $a0, %pc_lo12(.LCPI1_6) + pcalau12i $a0, %pc_hi20(.LCPI1_3) + vld $vr0, $a0, %pc_lo12(.LCPI1_3) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -2382,9 +2378,9 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define move $a3, $zero .LBB1_212: # %_ZN7ADomainC2Eii.exit527 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI1_10) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_10) ld.d $a1, $sp, 368 + lu52i.d $a4, $zero, 1029 + movgr2fr.d $fa1, $a4 fmul.d $fa1, $fa0, $fa1 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a4, $fa1 @@ -4488,15 +4484,9 @@ GCC_except_table7: .LCPI8_0: .dword 0x3fb999999999999a # double 0.10000000000000001 .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI8_3: +.LCPI8_1: .dword 0x3fc999999999999a # double 0.20000000000000001 .dword 0x3fd3333333333333 # double 0.29999999999999999 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI8_1: - .dword 0x3ff199999999999a # double 1.1000000000000001 -.LCPI8_2: - .dword 0x3ff1f9a6b50b0f28 # double 1.1234500000000001 .text .globl _Z8loopInitjR8LoopStat .p2align 5 @@ -4747,8 +4737,8 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat .LBB8_40: pcalau12i $a0, %pc_hi20(.LCPI8_0) addi.d $a0, $a0, %pc_lo12(.LCPI8_0) - pcalau12i $a1, %pc_hi20(.LCPI8_3) - addi.d $a1, $a1, %pc_lo12(.LCPI8_3) + pcalau12i $a1, %pc_hi20(.LCPI8_1) + addi.d $a1, $a1, %pc_lo12(.LCPI8_1) ld.w $a3, $s1, 1032 blez $a3, .LBB8_577 # %bb.41: # %.lr.ph.preheader.i430 @@ -4962,8 +4952,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat pcalau12i $a2, %pc_hi20(.LCPI8_0) addi.d $a2, $a2, %pc_lo12(.LCPI8_0) fldx.d $fa0, $a2, $a0 - ld.d $a2, $s1, 472 + ld.d $a3, $s1, 472 ori $a0, $zero, 4 + lu12i.w $a4, -419431 + lu12i.w $a2, -307024 bgeu $a1, $a0, .LBB8_920 # %bb.78: move $a0, $zero @@ -5152,12 +5144,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.102: # %middle.block3803 beq $a1, $a2, .LBB8_105 .LBB8_103: # %.lr.ph.i184.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_104: # %.lr.ph.i184 # =>This Inner Loop Header: Depth=1 @@ -5246,12 +5244,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.110: # %middle.block3819 beq $a1, $a2, .LBB8_113 .LBB8_111: # %.lr.ph.i192.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_112: # %.lr.ph.i192 # =>This Inner Loop Header: Depth=1 @@ -5340,12 +5344,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.118: # %middle.block3835 beq $a1, $a2, .LBB8_121 .LBB8_119: # %.lr.ph.i200.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_120: # %.lr.ph.i200 # =>This Inner Loop Header: Depth=1 @@ -5434,12 +5444,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.126: # %middle.block3851 beq $a1, $a2, .LBB8_129 .LBB8_127: # %.lr.ph.i208.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_128: # %.lr.ph.i208 # =>This Inner Loop Header: Depth=1 @@ -5528,12 +5544,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.134: # %middle.block3867 beq $a1, $a2, .LBB8_137 .LBB8_135: # %.lr.ph.i216.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_136: # %.lr.ph.i216 # =>This Inner Loop Header: Depth=1 @@ -5622,12 +5644,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.142: # %middle.block3883 beq $a0, $a1, .LBB8_1187 .LBB8_143: # %.lr.ph.i224.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_144: # %.lr.ph.i224 # =>This Inner Loop Header: Depth=1 @@ -5702,12 +5730,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.147: # %middle.block3547 beq $a1, $a2, .LBB8_150 .LBB8_148: # %.lr.ph.i232.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_149: # %.lr.ph.i232 # =>This Inner Loop Header: Depth=1 @@ -5796,12 +5830,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.155: # %middle.block3563 beq $a1, $a2, .LBB8_158 .LBB8_156: # %.lr.ph.i240.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_157: # %.lr.ph.i240 # =>This Inner Loop Header: Depth=1 @@ -5890,12 +5930,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.163: # %middle.block3579 beq $a1, $a2, .LBB8_166 .LBB8_164: # %.lr.ph.i248.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_165: # %.lr.ph.i248 # =>This Inner Loop Header: Depth=1 @@ -5984,12 +6030,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.171: # %middle.block3595 beq $a1, $a2, .LBB8_174 .LBB8_172: # %.lr.ph.i256.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_173: # %.lr.ph.i256 # =>This Inner Loop Header: Depth=1 @@ -6078,12 +6130,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.179: # %middle.block3611 beq $a1, $a2, .LBB8_182 .LBB8_180: # %.lr.ph.i264.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_181: # %.lr.ph.i264 # =>This Inner Loop Header: Depth=1 @@ -6172,12 +6230,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.187: # %middle.block3627 beq $a1, $a2, .LBB8_190 .LBB8_188: # %.lr.ph.i272.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_189: # %.lr.ph.i272 # =>This Inner Loop Header: Depth=1 @@ -6266,12 +6330,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.195: # %middle.block3643 beq $a1, $a2, .LBB8_198 .LBB8_196: # %.lr.ph.i280.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_197: # %.lr.ph.i280 # =>This Inner Loop Header: Depth=1 @@ -6360,12 +6430,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.203: # %middle.block3659 beq $a1, $a2, .LBB8_206 .LBB8_204: # %.lr.ph.i288.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_205: # %.lr.ph.i288 # =>This Inner Loop Header: Depth=1 @@ -6454,12 +6530,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.211: # %middle.block3675 beq $a1, $a2, .LBB8_214 .LBB8_212: # %.lr.ph.i296.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_213: # %.lr.ph.i296 # =>This Inner Loop Header: Depth=1 @@ -6548,12 +6630,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.219: # %middle.block3691 beq $a1, $a2, .LBB8_222 .LBB8_220: # %.lr.ph.i304.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_221: # %.lr.ph.i304 # =>This Inner Loop Header: Depth=1 @@ -6642,12 +6730,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.227: # %middle.block3707 beq $a1, $a2, .LBB8_230 .LBB8_228: # %.lr.ph.i312.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_229: # %.lr.ph.i312 # =>This Inner Loop Header: Depth=1 @@ -6736,12 +6830,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.235: # %middle.block3723 beq $a1, $a2, .LBB8_238 .LBB8_236: # %.lr.ph.i320.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_237: # %.lr.ph.i320 # =>This Inner Loop Header: Depth=1 @@ -6830,12 +6930,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.243: # %middle.block3739 beq $a1, $a2, .LBB8_246 .LBB8_244: # %.lr.ph.i328.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_245: # %.lr.ph.i328 # =>This Inner Loop Header: Depth=1 @@ -6924,12 +7030,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.251: # %middle.block3755 beq $a1, $a2, .LBB8_254 .LBB8_252: # %.lr.ph.i336.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_253: # %.lr.ph.i336 # =>This Inner Loop Header: Depth=1 @@ -7018,12 +7130,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.259: # %middle.block3771 beq $a1, $a2, .LBB8_262 .LBB8_260: # %.lr.ph.i344.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_261: # %.lr.ph.i344 # =>This Inner Loop Header: Depth=1 @@ -7112,12 +7230,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.267: # %middle.block3787 beq $a0, $a1, .LBB8_1187 .LBB8_268: # %.lr.ph.i352.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_269: # %.lr.ph.i352 # =>This Inner Loop Header: Depth=1 @@ -7215,12 +7339,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.274: # %middle.block2006 beq $a1, $a2, .LBB8_277 .LBB8_275: # %.lr.ph.i1118.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_276: # %.lr.ph.i1118 # =>This Inner Loop Header: Depth=1 @@ -7309,12 +7439,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.282: # %middle.block2022 beq $a1, $a2, .LBB8_285 .LBB8_283: # %.lr.ph.i1126.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_284: # %.lr.ph.i1126 # =>This Inner Loop Header: Depth=1 @@ -7403,12 +7539,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.290: # %middle.block2038 beq $a1, $a2, .LBB8_293 .LBB8_291: # %.lr.ph.i1134.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_292: # %.lr.ph.i1134 # =>This Inner Loop Header: Depth=1 @@ -7497,12 +7639,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.298: # %middle.block2054 beq $a0, $a1, .LBB8_1187 .LBB8_299: # %.lr.ph.i1142.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_300: # %.lr.ph.i1142 # =>This Inner Loop Header: Depth=1 @@ -7577,12 +7725,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.303: # %middle.block2870 beq $a1, $a2, .LBB8_306 .LBB8_304: # %.lr.ph.i692.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_305: # %.lr.ph.i692 # =>This Inner Loop Header: Depth=1 @@ -7671,12 +7825,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.311: # %middle.block2886 beq $a0, $a1, .LBB8_1187 .LBB8_312: # %.lr.ph.i700.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_313: # %.lr.ph.i700 # =>This Inner Loop Header: Depth=1 @@ -7751,12 +7911,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.316: # %middle.block2934 beq $a1, $a2, .LBB8_319 .LBB8_317: # %.lr.ph.i660.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_318: # %.lr.ph.i660 # =>This Inner Loop Header: Depth=1 @@ -7845,12 +8011,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.324: # %middle.block2950 beq $a0, $a1, .LBB8_1187 .LBB8_325: # %.lr.ph.i668.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_326: # %.lr.ph.i668 # =>This Inner Loop Header: Depth=1 @@ -7925,12 +8097,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.329: # %middle.block3046 beq $a1, $a2, .LBB8_332 .LBB8_330: # %.lr.ph.i579.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_331: # %.lr.ph.i579 # =>This Inner Loop Header: Depth=1 @@ -8019,13 +8197,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.337: # %middle.block3062 beq $a1, $a2, .LBB8_340 .LBB8_338: # %.lr.ph.i587.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 - .p2align 4, , 16 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 + .p2align 4, , 16 .LBB8_339: # %.lr.ph.i587 # =>This Inner Loop Header: Depth=1 bstrpick.d $a4, $a1, 31, 0 @@ -8113,12 +8297,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.345: # %middle.block3078 beq $a1, $a2, .LBB8_348 .LBB8_346: # %.lr.ph.i595.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_347: # %.lr.ph.i595 # =>This Inner Loop Header: Depth=1 @@ -8207,12 +8397,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.353: # %middle.block3094 beq $a1, $a2, .LBB8_356 .LBB8_354: # %.lr.ph.i603.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_355: # %.lr.ph.i603 # =>This Inner Loop Header: Depth=1 @@ -8301,12 +8497,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.361: # %middle.block3110 beq $a0, $a1, .LBB8_1187 .LBB8_362: # %.lr.ph.i611.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_363: # %.lr.ph.i611 # =>This Inner Loop Header: Depth=1 @@ -8381,12 +8583,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.366: # %middle.block3030 beq $a0, $a1, .LBB8_1187 .LBB8_367: # %.lr.ph.i620.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_368: # %.lr.ph.i620 # =>This Inner Loop Header: Depth=1 @@ -8461,12 +8669,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.371: # %middle.block3899 beq $a1, $a2, .LBB8_374 .LBB8_372: # %.lr.ph.i.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_373: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 @@ -8555,12 +8769,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.379: # %middle.block3915 beq $a1, $a2, .LBB8_382 .LBB8_380: # %.lr.ph.i168.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_381: # %.lr.ph.i168 # =>This Inner Loop Header: Depth=1 @@ -8649,12 +8869,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.387: # %middle.block3931 beq $a0, $a1, .LBB8_1187 .LBB8_388: # %.lr.ph.i176.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_389: # %.lr.ph.i176 # =>This Inner Loop Header: Depth=1 @@ -8729,12 +8955,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.392: # %middle.block3206 beq $a1, $a2, .LBB8_395 .LBB8_393: # %.lr.ph.i499.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_394: # %.lr.ph.i499 # =>This Inner Loop Header: Depth=1 @@ -8823,12 +9055,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.400: # %middle.block3222 beq $a1, $a2, .LBB8_403 .LBB8_401: # %.lr.ph.i507.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_402: # %.lr.ph.i507 # =>This Inner Loop Header: Depth=1 @@ -8917,12 +9155,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.408: # %middle.block3238 beq $a1, $a2, .LBB8_411 .LBB8_409: # %.lr.ph.i515.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_410: # %.lr.ph.i515 # =>This Inner Loop Header: Depth=1 @@ -9011,12 +9255,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.416: # %middle.block3254 beq $a1, $a2, .LBB8_419 .LBB8_417: # %.lr.ph.i523.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_418: # %.lr.ph.i523 # =>This Inner Loop Header: Depth=1 @@ -9105,12 +9355,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.424: # %middle.block3270 beq $a0, $a1, .LBB8_1187 .LBB8_425: # %.lr.ph.i531.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_426: # %.lr.ph.i531 # =>This Inner Loop Header: Depth=1 @@ -9185,12 +9441,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.429: # %middle.block2246 beq $a1, $a2, .LBB8_432 .LBB8_430: # %.lr.ph.i944.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_431: # %.lr.ph.i944 # =>This Inner Loop Header: Depth=1 @@ -9279,12 +9541,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.437: # %middle.block2262 beq $a1, $a2, .LBB8_440 .LBB8_438: # %.lr.ph.i952.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_439: # %.lr.ph.i952 # =>This Inner Loop Header: Depth=1 @@ -9373,12 +9641,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.445: # %middle.block2278 beq $a1, $a2, .LBB8_448 .LBB8_446: # %.lr.ph.i960.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_447: # %.lr.ph.i960 # =>This Inner Loop Header: Depth=1 @@ -9467,12 +9741,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.453: # %middle.block2294 beq $a1, $a2, .LBB8_456 .LBB8_454: # %.lr.ph.i968.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_455: # %.lr.ph.i968 # =>This Inner Loop Header: Depth=1 @@ -9561,12 +9841,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.461: # %middle.block2310 beq $a1, $a2, .LBB8_464 .LBB8_462: # %.lr.ph.i976.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_463: # %.lr.ph.i976 # =>This Inner Loop Header: Depth=1 @@ -9655,12 +9941,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.469: # %middle.block2326 beq $a1, $a2, .LBB8_472 .LBB8_470: # %.lr.ph.i984.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_471: # %.lr.ph.i984 # =>This Inner Loop Header: Depth=1 @@ -9749,12 +10041,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.477: # %middle.block2342 beq $a1, $a2, .LBB8_480 .LBB8_478: # %.lr.ph.i992.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_479: # %.lr.ph.i992 # =>This Inner Loop Header: Depth=1 @@ -9843,12 +10141,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.485: # %middle.block2358 beq $a1, $a2, .LBB8_488 .LBB8_486: # %.lr.ph.i1000.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_487: # %.lr.ph.i1000 # =>This Inner Loop Header: Depth=1 @@ -9937,12 +10241,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.493: # %middle.block2374 beq $a1, $a2, .LBB8_496 .LBB8_494: # %.lr.ph.i1008.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_495: # %.lr.ph.i1008 # =>This Inner Loop Header: Depth=1 @@ -10031,12 +10341,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.501: # %middle.block2390 beq $a0, $a1, .LBB8_504 .LBB8_502: # %.lr.ph.i1016.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_503: # %.lr.ph.i1016 # =>This Inner Loop Header: Depth=1 @@ -10142,12 +10458,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.512: # %middle.block2406 beq $a0, $a1, .LBB8_515 .LBB8_513: # %.lr.ph.i892.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_514: # %.lr.ph.i892 # =>This Inner Loop Header: Depth=1 @@ -10236,12 +10558,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.520: # %middle.block2422 beq $a0, $a1, .LBB8_523 .LBB8_521: # %.lr.ph.i900.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_522: # %.lr.ph.i900 # =>This Inner Loop Header: Depth=1 @@ -10330,12 +10658,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.528: # %middle.block2438 beq $a0, $a1, .LBB8_531 .LBB8_529: # %.lr.ph.i908.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_530: # %.lr.ph.i908 # =>This Inner Loop Header: Depth=1 @@ -10424,12 +10758,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.536: # %middle.block2454 beq $a0, $a1, .LBB8_539 .LBB8_537: # %.lr.ph.i916.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_538: # %.lr.ph.i916 # =>This Inner Loop Header: Depth=1 @@ -10518,12 +10858,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.544: # %middle.block2470 beq $a0, $a1, .LBB8_547 .LBB8_545: # %.lr.ph.i924.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_546: # %.lr.ph.i924 # =>This Inner Loop Header: Depth=1 @@ -10630,12 +10976,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.556: # %middle.block2486 beq $a0, $a1, .LBB8_1187 .LBB8_557: # %.lr.ph.i936.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_558: # %.lr.ph.i936 # =>This Inner Loop Header: Depth=1 @@ -10710,12 +11062,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.561: # %middle.block2902 beq $a1, $a2, .LBB8_564 .LBB8_562: # %.lr.ph.i676.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_563: # %.lr.ph.i676 # =>This Inner Loop Header: Depth=1 @@ -10804,12 +11162,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.569: # %middle.block2918 beq $a0, $a1, .LBB8_1187 .LBB8_570: # %.lr.ph.i684.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_571: # %.lr.ph.i684 # =>This Inner Loop Header: Depth=1 @@ -10875,13 +11239,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.574: # %middle.block3319 beq $a2, $a3, .LBB8_577 .LBB8_575: # %.lr.ph.i432.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_576: # %.lr.ph.i432 # =>This Inner Loop Header: Depth=1 @@ -10965,13 +11335,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.582: # %middle.block3336 beq $a2, $a3, .LBB8_585 .LBB8_583: # %.lr.ph.i441.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_584: # %.lr.ph.i441 # =>This Inner Loop Header: Depth=1 @@ -11055,13 +11431,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.590: # %middle.block3353 beq $a2, $a3, .LBB8_593 .LBB8_591: # %.lr.ph.i452.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_592: # %.lr.ph.i452 # =>This Inner Loop Header: Depth=1 @@ -11145,13 +11527,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.598: # %middle.block3370 beq $a2, $a3, .LBB8_601 .LBB8_599: # %.lr.ph.i463.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_600: # %.lr.ph.i463 # =>This Inner Loop Header: Depth=1 @@ -11235,13 +11623,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.606: # %middle.block3387 beq $a0, $a2, .LBB8_1187 .LBB8_607: # %.lr.ph.i474.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a3, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a0 alsl.d $a1, $a0, $a1, 4 addi.d $a1, $a1, 8 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa3, $a3 .p2align 4, , 16 .LBB8_608: # %.lr.ph.i474 # =>This Inner Loop Header: Depth=1 @@ -11319,12 +11713,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.611: # %middle.block1622 beq $a1, $a2, .LBB8_614 .LBB8_612: # %.lr.ph.i1294.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_613: # %.lr.ph.i1294 # =>This Inner Loop Header: Depth=1 @@ -11413,12 +11813,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.619: # %middle.block1638 beq $a1, $a2, .LBB8_622 .LBB8_620: # %.lr.ph.i1302.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_621: # %.lr.ph.i1302 # =>This Inner Loop Header: Depth=1 @@ -11507,12 +11913,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.627: # %middle.block1654 beq $a1, $a2, .LBB8_630 .LBB8_628: # %.lr.ph.i1310.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_629: # %.lr.ph.i1310 # =>This Inner Loop Header: Depth=1 @@ -11601,12 +12013,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.635: # %middle.block1670 beq $a1, $a2, .LBB8_638 .LBB8_636: # %.lr.ph.i1318.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_637: # %.lr.ph.i1318 # =>This Inner Loop Header: Depth=1 @@ -11695,12 +12113,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.643: # %middle.block1686 beq $a1, $a2, .LBB8_646 .LBB8_644: # %.lr.ph.i1326.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_645: # %.lr.ph.i1326 # =>This Inner Loop Header: Depth=1 @@ -11789,12 +12213,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.651: # %middle.block1702 beq $a0, $a1, .LBB8_1187 .LBB8_652: # %.lr.ph.i1334.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_653: # %.lr.ph.i1334 # =>This Inner Loop Header: Depth=1 @@ -11869,12 +12299,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.656: # %middle.block2822 beq $a1, $a2, .LBB8_659 .LBB8_657: # %.lr.ph.i708.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_658: # %.lr.ph.i708 # =>This Inner Loop Header: Depth=1 @@ -11963,12 +12399,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.664: # %middle.block2838 beq $a1, $a2, .LBB8_667 .LBB8_665: # %.lr.ph.i716.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_666: # %.lr.ph.i716 # =>This Inner Loop Header: Depth=1 @@ -12057,12 +12499,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.672: # %middle.block2854 beq $a0, $a1, .LBB8_1187 .LBB8_673: # %.lr.ph.i724.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_674: # %.lr.ph.i724 # =>This Inner Loop Header: Depth=1 @@ -12137,12 +12585,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.677: # %middle.block2598 beq $a1, $a2, .LBB8_680 .LBB8_678: # %.lr.ph.i828.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_679: # %.lr.ph.i828 # =>This Inner Loop Header: Depth=1 @@ -12231,12 +12685,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.685: # %middle.block2614 beq $a0, $a1, .LBB8_1187 .LBB8_686: # %.lr.ph.i836.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_687: # %.lr.ph.i836 # =>This Inner Loop Header: Depth=1 @@ -12311,12 +12771,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.690: # %middle.block3483 beq $a1, $a2, .LBB8_693 .LBB8_691: # %.lr.ph.i360.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_692: # %.lr.ph.i360 # =>This Inner Loop Header: Depth=1 @@ -12405,12 +12871,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.698: # %middle.block3499 beq $a1, $a2, .LBB8_701 .LBB8_699: # %.lr.ph.i368.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_700: # %.lr.ph.i368 # =>This Inner Loop Header: Depth=1 @@ -12499,12 +12971,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.706: # %middle.block3515 beq $a1, $a2, .LBB8_709 .LBB8_707: # %.lr.ph.i376.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_708: # %.lr.ph.i376 # =>This Inner Loop Header: Depth=1 @@ -12593,12 +13071,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.714: # %middle.block3531 beq $a0, $a1, .LBB8_1187 .LBB8_715: # %.lr.ph.i384.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_716: # %.lr.ph.i384 # =>This Inner Loop Header: Depth=1 @@ -12673,12 +13157,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.719: # %middle.block2566 beq $a1, $a2, .LBB8_722 .LBB8_720: # %.lr.ph.i844.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_721: # %.lr.ph.i844 # =>This Inner Loop Header: Depth=1 @@ -12767,12 +13257,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.727: # %middle.block2582 beq $a0, $a1, .LBB8_1187 .LBB8_728: # %.lr.ph.i852.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_729: # %.lr.ph.i852 # =>This Inner Loop Header: Depth=1 @@ -12847,12 +13343,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.732: # %middle.block2966 beq $a1, $a2, .LBB8_735 .LBB8_733: # %.lr.ph.i628.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_734: # %.lr.ph.i628 # =>This Inner Loop Header: Depth=1 @@ -12941,12 +13443,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.740: # %middle.block2982 beq $a1, $a2, .LBB8_743 .LBB8_741: # %.lr.ph.i636.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_742: # %.lr.ph.i636 # =>This Inner Loop Header: Depth=1 @@ -13035,12 +13543,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.748: # %middle.block2998 beq $a1, $a2, .LBB8_751 .LBB8_749: # %.lr.ph.i644.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_750: # %.lr.ph.i644 # =>This Inner Loop Header: Depth=1 @@ -13129,12 +13643,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.756: # %middle.block3014 beq $a0, $a1, .LBB8_1187 .LBB8_757: # %.lr.ph.i652.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_758: # %.lr.ph.i652 # =>This Inner Loop Header: Depth=1 @@ -13209,12 +13729,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.761: # %middle.block3403 beq $a1, $a2, .LBB8_764 .LBB8_762: # %.lr.ph.i392.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_763: # %.lr.ph.i392 # =>This Inner Loop Header: Depth=1 @@ -13303,12 +13829,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.769: # %middle.block3419 beq $a1, $a2, .LBB8_772 .LBB8_770: # %.lr.ph.i400.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_771: # %.lr.ph.i400 # =>This Inner Loop Header: Depth=1 @@ -13397,12 +13929,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.777: # %middle.block3435 beq $a1, $a2, .LBB8_780 .LBB8_778: # %.lr.ph.i408.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_779: # %.lr.ph.i408 # =>This Inner Loop Header: Depth=1 @@ -13491,12 +14029,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.785: # %middle.block3451 beq $a1, $a2, .LBB8_788 .LBB8_786: # %.lr.ph.i416.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_787: # %.lr.ph.i416 # =>This Inner Loop Header: Depth=1 @@ -13585,12 +14129,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.793: # %middle.block3467 beq $a0, $a1, .LBB8_1187 .LBB8_794: # %.lr.ph.i424.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_795: # %.lr.ph.i424 # =>This Inner Loop Header: Depth=1 @@ -13665,12 +14215,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.798: # %middle.block2742 beq $a1, $a2, .LBB8_801 .LBB8_799: # %.lr.ph.i732.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_800: # %.lr.ph.i732 # =>This Inner Loop Header: Depth=1 @@ -13759,12 +14315,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.806: # %middle.block2758 beq $a1, $a2, .LBB8_809 .LBB8_807: # %.lr.ph.i740.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_808: # %.lr.ph.i740 # =>This Inner Loop Header: Depth=1 @@ -13853,12 +14415,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.814: # %middle.block2774 beq $a1, $a2, .LBB8_817 .LBB8_815: # %.lr.ph.i748.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_816: # %.lr.ph.i748 # =>This Inner Loop Header: Depth=1 @@ -13947,12 +14515,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.822: # %middle.block2790 beq $a1, $a2, .LBB8_825 .LBB8_823: # %.lr.ph.i756.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_824: # %.lr.ph.i756 # =>This Inner Loop Header: Depth=1 @@ -14041,12 +14615,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.830: # %middle.block2806 beq $a0, $a1, .LBB8_1187 .LBB8_831: # %.lr.ph.i764.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_832: # %.lr.ph.i764 # =>This Inner Loop Header: Depth=1 @@ -14121,12 +14701,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.835: # %middle.block3126 beq $a1, $a2, .LBB8_838 .LBB8_836: # %.lr.ph.i539.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_837: # %.lr.ph.i539 # =>This Inner Loop Header: Depth=1 @@ -14215,12 +14801,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.843: # %middle.block3142 beq $a1, $a2, .LBB8_846 .LBB8_844: # %.lr.ph.i547.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_845: # %.lr.ph.i547 # =>This Inner Loop Header: Depth=1 @@ -14309,12 +14901,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.851: # %middle.block3158 beq $a1, $a2, .LBB8_854 .LBB8_852: # %.lr.ph.i555.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_853: # %.lr.ph.i555 # =>This Inner Loop Header: Depth=1 @@ -14403,12 +15001,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.859: # %middle.block3174 beq $a1, $a2, .LBB8_862 .LBB8_860: # %.lr.ph.i563.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_861: # %.lr.ph.i563 # =>This Inner Loop Header: Depth=1 @@ -14497,12 +15101,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.867: # %middle.block3190 beq $a0, $a1, .LBB8_1187 .LBB8_868: # %.lr.ph.i571.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_869: # %.lr.ph.i571 # =>This Inner Loop Header: Depth=1 @@ -14577,12 +15187,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.872: # %middle.block3286 beq $a1, $a2, .LBB8_875 .LBB8_873: # %.lr.ph.i483.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_874: # %.lr.ph.i483 # =>This Inner Loop Header: Depth=1 @@ -14671,12 +15287,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.880: # %middle.block3302 beq $a0, $a1, .LBB8_1187 .LBB8_881: # %.lr.ph.i491.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_882: # %.lr.ph.i491 # =>This Inner Loop Header: Depth=1 @@ -14751,12 +15373,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.885: # %middle.block1718 beq $a1, $a2, .LBB8_888 .LBB8_886: # %.lr.ph.i1254.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_887: # %.lr.ph.i1254 # =>This Inner Loop Header: Depth=1 @@ -14845,12 +15473,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.893: # %middle.block1734 beq $a1, $a2, .LBB8_896 .LBB8_894: # %.lr.ph.i1262.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_895: # %.lr.ph.i1262 # =>This Inner Loop Header: Depth=1 @@ -14939,12 +15573,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.901: # %middle.block1750 beq $a1, $a2, .LBB8_904 .LBB8_902: # %.lr.ph.i1270.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_903: # %.lr.ph.i1270 # =>This Inner Loop Header: Depth=1 @@ -15033,12 +15673,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.909: # %middle.block1766 beq $a1, $a2, .LBB8_912 .LBB8_910: # %.lr.ph.i1278.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_911: # %.lr.ph.i1278 # =>This Inner Loop Header: Depth=1 @@ -15127,12 +15773,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.917: # %middle.block1782 beq $a0, $a1, .LBB8_1187 .LBB8_918: # %.lr.ph.i1286.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_919: # %.lr.ph.i1286 # =>This Inner Loop Header: Depth=1 @@ -15153,41 +15805,39 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat bstrpick.d $a0, $a1, 30, 2 slli.d $a0, $a0, 2 vreplvei.d $vr1, $vr0, 0 - addi.d $a3, $a2, 16 - ori $a4, $zero, 0 - lu32i.d $a4, 1 - vreplgr2vr.d $vr2, $a4 - lu12i.w $a4, -419431 - ori $a4, $a4, 2458 - lu32i.d $a4, 104857 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr3, $a4 - lu12i.w $a4, -307024 - ori $a4, $a4, 3880 - lu32i.d $a4, 129446 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr4, $a4 - move $a4, $a0 + addi.d $a5, $a3, 16 + ori $a6, $zero, 0 + lu32i.d $a6, 1 + vreplgr2vr.d $vr2, $a6 + ori $a6, $a4, 2458 + lu32i.d $a6, 104857 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr3, $a6 + ori $a6, $a2, 3880 + lu32i.d $a6, 129446 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr4, $a6 + move $a6, $a0 .p2align 4, , 16 .LBB8_921: # %vector.body # =>This Inner Loop Header: Depth=1 vaddi.wu $vr5, $vr2, 2 - vpickve2gr.w $a5, $vr2, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr2, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr2, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa7, $a5 + vpickve2gr.w $a7, $vr2, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa7, $a7 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr6, 16 - vpickve2gr.w $a5, $vr5, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr5, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr5, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa5, $a5 + vpickve2gr.w $a7, $vr5, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa5, $a7 ffint.d.l $fa5, $fa5 vextrins.d $vr5, $vr6, 16 vfadd.d $vr6, $vr7, $vr3 @@ -15198,34 +15848,38 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat vfadd.d $vr5, $vr5, $vr4 vfdiv.d $vr6, $vr6, $vr7 vfdiv.d $vr5, $vr8, $vr5 - vst $vr6, $a3, -16 - vst $vr5, $a3, 0 + vst $vr6, $a5, -16 + vst $vr5, $a5, 0 vaddi.wu $vr2, $vr2, 4 - addi.d $a4, $a4, -4 - addi.d $a3, $a3, 32 - bnez $a4, .LBB8_921 + addi.d $a6, $a6, -4 + addi.d $a5, $a5, 32 + bnez $a6, .LBB8_921 # %bb.922: # %middle.block beq $a0, $a1, .LBB8_1187 .LBB8_923: # %.lr.ph.i1342.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $a2, 3 + alsl.d $a3, $a0, $a3, 3 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + ori $a2, $a2, 3880 + lu32i.d $a2, 129446 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa2, $a2 .p2align 4, , 16 .LBB8_924: # %.lr.ph.i1342 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a0, 31, 0 - movgr2fr.d $fa3, $a3 + bstrpick.d $a2, $a0, 31, 0 + movgr2fr.d $fa3, $a2 ffint.d.l $fa3, $fa3 fadd.d $fa4, $fa3, $fa1 fmul.d $fa4, $fa0, $fa4 fadd.d $fa3, $fa3, $fa2 fdiv.d $fa3, $fa4, $fa3 - fst.d $fa3, $a2, 0 + fst.d $fa3, $a3, 0 addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + addi.d $a3, $a3, 8 addi.w $a0, $a0, 1 bnez $a1, .LBB8_924 b .LBB8_1187 @@ -15287,12 +15941,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.927: # %middle.block2534 beq $a1, $a2, .LBB8_930 .LBB8_928: # %.lr.ph.i860.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_929: # %.lr.ph.i860 # =>This Inner Loop Header: Depth=1 @@ -15381,12 +16041,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.935: # %middle.block2550 beq $a0, $a1, .LBB8_1187 .LBB8_936: # %.lr.ph.i868.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_937: # %.lr.ph.i868 # =>This Inner Loop Header: Depth=1 @@ -15461,12 +16127,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.940: # %middle.block2630 beq $a1, $a2, .LBB8_943 .LBB8_941: # %.lr.ph.i772.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_942: # %.lr.ph.i772 # =>This Inner Loop Header: Depth=1 @@ -15555,12 +16227,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.948: # %middle.block2646 beq $a1, $a2, .LBB8_951 .LBB8_949: # %.lr.ph.i780.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_950: # %.lr.ph.i780 # =>This Inner Loop Header: Depth=1 @@ -15649,12 +16327,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.956: # %middle.block2662 beq $a1, $a2, .LBB8_959 .LBB8_957: # %.lr.ph.i788.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_958: # %.lr.ph.i788 # =>This Inner Loop Header: Depth=1 @@ -15743,12 +16427,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.964: # %middle.block2678 beq $a1, $a2, .LBB8_967 .LBB8_965: # %.lr.ph.i796.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_966: # %.lr.ph.i796 # =>This Inner Loop Header: Depth=1 @@ -15837,12 +16527,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.972: # %middle.block2694 beq $a1, $a2, .LBB8_975 .LBB8_973: # %.lr.ph.i804.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_974: # %.lr.ph.i804 # =>This Inner Loop Header: Depth=1 @@ -15931,12 +16627,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.980: # %middle.block2710 beq $a1, $a2, .LBB8_983 .LBB8_981: # %.lr.ph.i812.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_982: # %.lr.ph.i812 # =>This Inner Loop Header: Depth=1 @@ -16025,12 +16727,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.988: # %middle.block2726 beq $a0, $a1, .LBB8_1187 .LBB8_989: # %.lr.ph.i820.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_990: # %.lr.ph.i820 # =>This Inner Loop Header: Depth=1 @@ -16105,12 +16813,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.993: # %middle.block2502 beq $a1, $a2, .LBB8_996 .LBB8_994: # %.lr.ph.i876.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_995: # %.lr.ph.i876 # =>This Inner Loop Header: Depth=1 @@ -16199,12 +16913,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1001: # %middle.block2518 beq $a0, $a1, .LBB8_1187 .LBB8_1002: # %.lr.ph.i884.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1003: # %.lr.ph.i884 # =>This Inner Loop Header: Depth=1 @@ -16279,12 +16999,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1006: # %middle.block1846 beq $a1, $a2, .LBB8_1009 .LBB8_1007: # %.lr.ph.i1150.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1008: # %.lr.ph.i1150 # =>This Inner Loop Header: Depth=1 @@ -16373,12 +17099,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1014: # %middle.block1862 beq $a1, $a2, .LBB8_1017 .LBB8_1015: # %.lr.ph.i1158.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1016: # %.lr.ph.i1158 # =>This Inner Loop Header: Depth=1 @@ -16467,12 +17199,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1022: # %middle.block1878 beq $a1, $a2, .LBB8_1025 .LBB8_1023: # %.lr.ph.i1166.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1024: # %.lr.ph.i1166 # =>This Inner Loop Header: Depth=1 @@ -16561,12 +17299,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1030: # %middle.block1894 beq $a1, $a2, .LBB8_1033 .LBB8_1031: # %.lr.ph.i1174.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1032: # %.lr.ph.i1174 # =>This Inner Loop Header: Depth=1 @@ -16655,12 +17399,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1038: # %middle.block1910 beq $a1, $a2, .LBB8_1041 .LBB8_1039: # %.lr.ph.i1182.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1040: # %.lr.ph.i1182 # =>This Inner Loop Header: Depth=1 @@ -16749,12 +17499,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1046: # %middle.block1926 beq $a1, $a2, .LBB8_1049 .LBB8_1047: # %.lr.ph.i1190.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1048: # %.lr.ph.i1190 # =>This Inner Loop Header: Depth=1 @@ -16843,12 +17599,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1054: # %middle.block1942 beq $a1, $a2, .LBB8_1057 .LBB8_1055: # %.lr.ph.i1198.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1056: # %.lr.ph.i1198 # =>This Inner Loop Header: Depth=1 @@ -16937,12 +17699,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1062: # %middle.block1958 beq $a1, $a2, .LBB8_1065 .LBB8_1063: # %.lr.ph.i1206.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1064: # %.lr.ph.i1206 # =>This Inner Loop Header: Depth=1 @@ -17031,12 +17799,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1070: # %middle.block1974 beq $a1, $a2, .LBB8_1073 .LBB8_1071: # %.lr.ph.i1214.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1072: # %.lr.ph.i1214 # =>This Inner Loop Header: Depth=1 @@ -17125,12 +17899,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1078: # %middle.block1990 beq $a0, $a1, .LBB8_1187 .LBB8_1079: # %.lr.ph.i1222.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1080: # %.lr.ph.i1222 # =>This Inner Loop Header: Depth=1 @@ -17205,12 +17985,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1083: # %middle.block1798 beq $a1, $a2, .LBB8_1086 .LBB8_1084: # %.lr.ph.i1230.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1085: # %.lr.ph.i1230 # =>This Inner Loop Header: Depth=1 @@ -17299,12 +18085,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1091: # %middle.block1814 beq $a1, $a2, .LBB8_1094 .LBB8_1092: # %.lr.ph.i1238.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1093: # %.lr.ph.i1238 # =>This Inner Loop Header: Depth=1 @@ -17393,12 +18185,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1099: # %middle.block1830 beq $a0, $a1, .LBB8_1187 .LBB8_1100: # %.lr.ph.i1246.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1101: # %.lr.ph.i1246 # =>This Inner Loop Header: Depth=1 @@ -17473,12 +18271,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1104: # %middle.block2070 beq $a1, $a2, .LBB8_1107 .LBB8_1105: # %.lr.ph.i1030.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1106: # %.lr.ph.i1030 # =>This Inner Loop Header: Depth=1 @@ -17567,12 +18371,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1112: # %middle.block2086 beq $a1, $a2, .LBB8_1115 .LBB8_1113: # %.lr.ph.i1038.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1114: # %.lr.ph.i1038 # =>This Inner Loop Header: Depth=1 @@ -17661,12 +18471,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1120: # %middle.block2102 beq $a1, $a2, .LBB8_1123 .LBB8_1121: # %.lr.ph.i1046.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1122: # %.lr.ph.i1046 # =>This Inner Loop Header: Depth=1 @@ -17755,12 +18571,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1128: # %middle.block2118 beq $a1, $a2, .LBB8_1131 .LBB8_1129: # %.lr.ph.i1054.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1130: # %.lr.ph.i1054 # =>This Inner Loop Header: Depth=1 @@ -17849,12 +18671,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1136: # %middle.block2134 beq $a1, $a2, .LBB8_1139 .LBB8_1137: # %.lr.ph.i1062.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1138: # %.lr.ph.i1062 # =>This Inner Loop Header: Depth=1 @@ -17943,12 +18771,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1144: # %middle.block2150 beq $a1, $a2, .LBB8_1147 .LBB8_1145: # %.lr.ph.i1070.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1146: # %.lr.ph.i1070 # =>This Inner Loop Header: Depth=1 @@ -18037,12 +18871,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1152: # %middle.block2166 beq $a1, $a2, .LBB8_1155 .LBB8_1153: # %.lr.ph.i1078.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1154: # %.lr.ph.i1078 # =>This Inner Loop Header: Depth=1 @@ -18131,12 +18971,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1160: # %middle.block2182 beq $a1, $a2, .LBB8_1163 .LBB8_1161: # %.lr.ph.i1086.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1162: # %.lr.ph.i1086 # =>This Inner Loop Header: Depth=1 @@ -18225,12 +19071,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1168: # %middle.block2198 beq $a1, $a2, .LBB8_1171 .LBB8_1169: # %.lr.ph.i1094.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1170: # %.lr.ph.i1094 # =>This Inner Loop Header: Depth=1 @@ -18319,12 +19171,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1176: # %middle.block2214 beq $a1, $a2, .LBB8_1179 .LBB8_1177: # %.lr.ph.i1102.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1178: # %.lr.ph.i1102 # =>This Inner Loop Header: Depth=1 @@ -18413,12 +19271,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1184: # %middle.block2230 beq $a0, $a1, .LBB8_1187 .LBB8_1185: # %.lr.ph.i1110.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1186: # %.lr.ph.i1110 # =>This Inner Loop Header: Depth=1 @@ -18489,15 +19353,9 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat .LCPI9_0: .dword 0x3fb999999999999a # double 0.10000000000000001 .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI9_3: +.LCPI9_1: .dword 0x3fc999999999999a # double 0.20000000000000001 .dword 0x3fd3333333333333 # double 0.29999999999999999 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI9_1: - .dword 0x3ff199999999999a # double 1.1000000000000001 -.LCPI9_2: - .dword 0x3ff1f9a6b50b0f28 # double 1.1234500000000001 .text .globl _Z8loopInitj .p2align 5 @@ -18743,8 +19601,8 @@ _Z8loopInitj: # @_Z8loopInitj .LBB9_40: pcalau12i $a0, %pc_hi20(.LCPI9_0) addi.d $a0, $a0, %pc_lo12(.LCPI9_0) - pcalau12i $a1, %pc_hi20(.LCPI9_3) - addi.d $a1, $a1, %pc_lo12(.LCPI9_3) + pcalau12i $a1, %pc_hi20(.LCPI9_1) + addi.d $a1, $a1, %pc_lo12(.LCPI9_1) ld.w $a3, $s0, 1032 blez $a3, .LBB9_577 # %bb.41: # %.lr.ph.preheader.i429 @@ -18958,8 +19816,10 @@ _Z8loopInitj: # @_Z8loopInitj pcalau12i $a2, %pc_hi20(.LCPI9_0) addi.d $a2, $a2, %pc_lo12(.LCPI9_0) fldx.d $fa0, $a2, $a0 - ld.d $a2, $s0, 472 + ld.d $a3, $s0, 472 ori $a0, $zero, 4 + lu12i.w $a4, -419431 + lu12i.w $a2, -307024 bgeu $a1, $a0, .LBB9_920 # %bb.78: move $a0, $zero @@ -19148,12 +20008,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.102: # %middle.block3802 beq $a1, $a2, .LBB9_105 .LBB9_103: # %.lr.ph.i183.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_104: # %.lr.ph.i183 # =>This Inner Loop Header: Depth=1 @@ -19242,12 +20108,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.110: # %middle.block3818 beq $a1, $a2, .LBB9_113 .LBB9_111: # %.lr.ph.i191.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_112: # %.lr.ph.i191 # =>This Inner Loop Header: Depth=1 @@ -19336,12 +20208,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.118: # %middle.block3834 beq $a1, $a2, .LBB9_121 .LBB9_119: # %.lr.ph.i199.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_120: # %.lr.ph.i199 # =>This Inner Loop Header: Depth=1 @@ -19430,12 +20308,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.126: # %middle.block3850 beq $a1, $a2, .LBB9_129 .LBB9_127: # %.lr.ph.i207.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_128: # %.lr.ph.i207 # =>This Inner Loop Header: Depth=1 @@ -19524,12 +20408,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.134: # %middle.block3866 beq $a1, $a2, .LBB9_137 .LBB9_135: # %.lr.ph.i215.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_136: # %.lr.ph.i215 # =>This Inner Loop Header: Depth=1 @@ -19618,12 +20508,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.142: # %middle.block3882 beq $a0, $a1, .LBB9_1187 .LBB9_143: # %.lr.ph.i223.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_144: # %.lr.ph.i223 # =>This Inner Loop Header: Depth=1 @@ -19698,12 +20594,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.147: # %middle.block3546 beq $a1, $a2, .LBB9_150 .LBB9_148: # %.lr.ph.i231.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_149: # %.lr.ph.i231 # =>This Inner Loop Header: Depth=1 @@ -19792,12 +20694,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.155: # %middle.block3562 beq $a1, $a2, .LBB9_158 .LBB9_156: # %.lr.ph.i239.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_157: # %.lr.ph.i239 # =>This Inner Loop Header: Depth=1 @@ -19886,12 +20794,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.163: # %middle.block3578 beq $a1, $a2, .LBB9_166 .LBB9_164: # %.lr.ph.i247.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_165: # %.lr.ph.i247 # =>This Inner Loop Header: Depth=1 @@ -19980,12 +20894,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.171: # %middle.block3594 beq $a1, $a2, .LBB9_174 .LBB9_172: # %.lr.ph.i255.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_173: # %.lr.ph.i255 # =>This Inner Loop Header: Depth=1 @@ -20074,12 +20994,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.179: # %middle.block3610 beq $a1, $a2, .LBB9_182 .LBB9_180: # %.lr.ph.i263.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_181: # %.lr.ph.i263 # =>This Inner Loop Header: Depth=1 @@ -20168,12 +21094,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.187: # %middle.block3626 beq $a1, $a2, .LBB9_190 .LBB9_188: # %.lr.ph.i271.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_189: # %.lr.ph.i271 # =>This Inner Loop Header: Depth=1 @@ -20262,12 +21194,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.195: # %middle.block3642 beq $a1, $a2, .LBB9_198 .LBB9_196: # %.lr.ph.i279.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_197: # %.lr.ph.i279 # =>This Inner Loop Header: Depth=1 @@ -20356,12 +21294,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.203: # %middle.block3658 beq $a1, $a2, .LBB9_206 .LBB9_204: # %.lr.ph.i287.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_205: # %.lr.ph.i287 # =>This Inner Loop Header: Depth=1 @@ -20450,12 +21394,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.211: # %middle.block3674 beq $a1, $a2, .LBB9_214 .LBB9_212: # %.lr.ph.i295.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_213: # %.lr.ph.i295 # =>This Inner Loop Header: Depth=1 @@ -20544,12 +21494,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.219: # %middle.block3690 beq $a1, $a2, .LBB9_222 .LBB9_220: # %.lr.ph.i303.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_221: # %.lr.ph.i303 # =>This Inner Loop Header: Depth=1 @@ -20638,12 +21594,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.227: # %middle.block3706 beq $a1, $a2, .LBB9_230 .LBB9_228: # %.lr.ph.i311.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_229: # %.lr.ph.i311 # =>This Inner Loop Header: Depth=1 @@ -20732,12 +21694,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.235: # %middle.block3722 beq $a1, $a2, .LBB9_238 .LBB9_236: # %.lr.ph.i319.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_237: # %.lr.ph.i319 # =>This Inner Loop Header: Depth=1 @@ -20826,12 +21794,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.243: # %middle.block3738 beq $a1, $a2, .LBB9_246 .LBB9_244: # %.lr.ph.i327.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_245: # %.lr.ph.i327 # =>This Inner Loop Header: Depth=1 @@ -20920,12 +21894,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.251: # %middle.block3754 beq $a1, $a2, .LBB9_254 .LBB9_252: # %.lr.ph.i335.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_253: # %.lr.ph.i335 # =>This Inner Loop Header: Depth=1 @@ -21014,12 +21994,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.259: # %middle.block3770 beq $a1, $a2, .LBB9_262 .LBB9_260: # %.lr.ph.i343.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_261: # %.lr.ph.i343 # =>This Inner Loop Header: Depth=1 @@ -21108,12 +22094,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.267: # %middle.block3786 beq $a0, $a1, .LBB9_1187 .LBB9_268: # %.lr.ph.i351.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_269: # %.lr.ph.i351 # =>This Inner Loop Header: Depth=1 @@ -21210,12 +22202,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.274: # %middle.block2005 beq $a1, $a2, .LBB9_277 .LBB9_275: # %.lr.ph.i1117.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_276: # %.lr.ph.i1117 # =>This Inner Loop Header: Depth=1 @@ -21304,12 +22302,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.282: # %middle.block2021 beq $a1, $a2, .LBB9_285 .LBB9_283: # %.lr.ph.i1125.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_284: # %.lr.ph.i1125 # =>This Inner Loop Header: Depth=1 @@ -21398,12 +22402,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.290: # %middle.block2037 beq $a1, $a2, .LBB9_293 .LBB9_291: # %.lr.ph.i1133.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_292: # %.lr.ph.i1133 # =>This Inner Loop Header: Depth=1 @@ -21492,12 +22502,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.298: # %middle.block2053 beq $a0, $a1, .LBB9_1187 .LBB9_299: # %.lr.ph.i1141.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_300: # %.lr.ph.i1141 # =>This Inner Loop Header: Depth=1 @@ -21572,12 +22588,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.303: # %middle.block2869 beq $a1, $a2, .LBB9_306 .LBB9_304: # %.lr.ph.i691.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_305: # %.lr.ph.i691 # =>This Inner Loop Header: Depth=1 @@ -21666,12 +22688,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.311: # %middle.block2885 beq $a0, $a1, .LBB9_1187 .LBB9_312: # %.lr.ph.i699.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_313: # %.lr.ph.i699 # =>This Inner Loop Header: Depth=1 @@ -21746,12 +22774,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.316: # %middle.block2933 beq $a1, $a2, .LBB9_319 .LBB9_317: # %.lr.ph.i659.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_318: # %.lr.ph.i659 # =>This Inner Loop Header: Depth=1 @@ -21840,12 +22874,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.324: # %middle.block2949 beq $a0, $a1, .LBB9_1187 .LBB9_325: # %.lr.ph.i667.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_326: # %.lr.ph.i667 # =>This Inner Loop Header: Depth=1 @@ -21920,12 +22960,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.329: # %middle.block3045 beq $a1, $a2, .LBB9_332 .LBB9_330: # %.lr.ph.i578.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_331: # %.lr.ph.i578 # =>This Inner Loop Header: Depth=1 @@ -22014,12 +23060,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.337: # %middle.block3061 beq $a1, $a2, .LBB9_340 .LBB9_338: # %.lr.ph.i586.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_339: # %.lr.ph.i586 # =>This Inner Loop Header: Depth=1 @@ -22108,12 +23160,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.345: # %middle.block3077 beq $a1, $a2, .LBB9_348 .LBB9_346: # %.lr.ph.i594.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_347: # %.lr.ph.i594 # =>This Inner Loop Header: Depth=1 @@ -22202,12 +23260,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.353: # %middle.block3093 beq $a1, $a2, .LBB9_356 .LBB9_354: # %.lr.ph.i602.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_355: # %.lr.ph.i602 # =>This Inner Loop Header: Depth=1 @@ -22296,12 +23360,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.361: # %middle.block3109 beq $a0, $a1, .LBB9_1187 .LBB9_362: # %.lr.ph.i610.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_363: # %.lr.ph.i610 # =>This Inner Loop Header: Depth=1 @@ -22376,12 +23446,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.366: # %middle.block3029 beq $a0, $a1, .LBB9_1187 .LBB9_367: # %.lr.ph.i619.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_368: # %.lr.ph.i619 # =>This Inner Loop Header: Depth=1 @@ -22456,12 +23532,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.371: # %middle.block3898 beq $a1, $a2, .LBB9_374 .LBB9_372: # %.lr.ph.i.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_373: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 @@ -22550,12 +23632,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.379: # %middle.block3914 beq $a1, $a2, .LBB9_382 .LBB9_380: # %.lr.ph.i167.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_381: # %.lr.ph.i167 # =>This Inner Loop Header: Depth=1 @@ -22644,12 +23732,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.387: # %middle.block3930 beq $a0, $a1, .LBB9_1187 .LBB9_388: # %.lr.ph.i175.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_389: # %.lr.ph.i175 # =>This Inner Loop Header: Depth=1 @@ -22724,12 +23818,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.392: # %middle.block3205 beq $a1, $a2, .LBB9_395 .LBB9_393: # %.lr.ph.i498.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_394: # %.lr.ph.i498 # =>This Inner Loop Header: Depth=1 @@ -22818,12 +23918,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.400: # %middle.block3221 beq $a1, $a2, .LBB9_403 .LBB9_401: # %.lr.ph.i506.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_402: # %.lr.ph.i506 # =>This Inner Loop Header: Depth=1 @@ -22912,12 +24018,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.408: # %middle.block3237 beq $a1, $a2, .LBB9_411 .LBB9_409: # %.lr.ph.i514.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_410: # %.lr.ph.i514 # =>This Inner Loop Header: Depth=1 @@ -23006,12 +24118,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.416: # %middle.block3253 beq $a1, $a2, .LBB9_419 .LBB9_417: # %.lr.ph.i522.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_418: # %.lr.ph.i522 # =>This Inner Loop Header: Depth=1 @@ -23100,12 +24218,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.424: # %middle.block3269 beq $a0, $a1, .LBB9_1187 .LBB9_425: # %.lr.ph.i530.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_426: # %.lr.ph.i530 # =>This Inner Loop Header: Depth=1 @@ -23180,12 +24304,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.429: # %middle.block2245 beq $a1, $a2, .LBB9_432 .LBB9_430: # %.lr.ph.i943.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_431: # %.lr.ph.i943 # =>This Inner Loop Header: Depth=1 @@ -23274,12 +24404,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.437: # %middle.block2261 beq $a1, $a2, .LBB9_440 .LBB9_438: # %.lr.ph.i951.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_439: # %.lr.ph.i951 # =>This Inner Loop Header: Depth=1 @@ -23368,12 +24504,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.445: # %middle.block2277 beq $a1, $a2, .LBB9_448 .LBB9_446: # %.lr.ph.i959.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_447: # %.lr.ph.i959 # =>This Inner Loop Header: Depth=1 @@ -23462,12 +24604,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.453: # %middle.block2293 beq $a1, $a2, .LBB9_456 .LBB9_454: # %.lr.ph.i967.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_455: # %.lr.ph.i967 # =>This Inner Loop Header: Depth=1 @@ -23556,12 +24704,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.461: # %middle.block2309 beq $a1, $a2, .LBB9_464 .LBB9_462: # %.lr.ph.i975.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_463: # %.lr.ph.i975 # =>This Inner Loop Header: Depth=1 @@ -23650,12 +24804,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.469: # %middle.block2325 beq $a1, $a2, .LBB9_472 .LBB9_470: # %.lr.ph.i983.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_471: # %.lr.ph.i983 # =>This Inner Loop Header: Depth=1 @@ -23744,12 +24904,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.477: # %middle.block2341 beq $a1, $a2, .LBB9_480 .LBB9_478: # %.lr.ph.i991.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_479: # %.lr.ph.i991 # =>This Inner Loop Header: Depth=1 @@ -23838,12 +25004,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.485: # %middle.block2357 beq $a1, $a2, .LBB9_488 .LBB9_486: # %.lr.ph.i999.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_487: # %.lr.ph.i999 # =>This Inner Loop Header: Depth=1 @@ -23932,12 +25104,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.493: # %middle.block2373 beq $a1, $a2, .LBB9_496 .LBB9_494: # %.lr.ph.i1007.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_495: # %.lr.ph.i1007 # =>This Inner Loop Header: Depth=1 @@ -24026,12 +25204,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.501: # %middle.block2389 beq $a0, $a1, .LBB9_504 .LBB9_502: # %.lr.ph.i1015.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_503: # %.lr.ph.i1015 # =>This Inner Loop Header: Depth=1 @@ -24136,12 +25320,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.512: # %middle.block2405 beq $a0, $a1, .LBB9_515 .LBB9_513: # %.lr.ph.i891.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_514: # %.lr.ph.i891 # =>This Inner Loop Header: Depth=1 @@ -24230,12 +25420,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.520: # %middle.block2421 beq $a0, $a1, .LBB9_523 .LBB9_521: # %.lr.ph.i899.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_522: # %.lr.ph.i899 # =>This Inner Loop Header: Depth=1 @@ -24324,12 +25520,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.528: # %middle.block2437 beq $a0, $a1, .LBB9_531 .LBB9_529: # %.lr.ph.i907.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_530: # %.lr.ph.i907 # =>This Inner Loop Header: Depth=1 @@ -24418,12 +25620,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.536: # %middle.block2453 beq $a0, $a1, .LBB9_539 .LBB9_537: # %.lr.ph.i915.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_538: # %.lr.ph.i915 # =>This Inner Loop Header: Depth=1 @@ -24512,12 +25720,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.544: # %middle.block2469 beq $a0, $a1, .LBB9_547 .LBB9_545: # %.lr.ph.i923.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_546: # %.lr.ph.i923 # =>This Inner Loop Header: Depth=1 @@ -24624,12 +25838,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.556: # %middle.block2485 beq $a0, $a1, .LBB9_1187 .LBB9_557: # %.lr.ph.i935.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_558: # %.lr.ph.i935 # =>This Inner Loop Header: Depth=1 @@ -24704,12 +25924,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.561: # %middle.block2901 beq $a1, $a2, .LBB9_564 .LBB9_562: # %.lr.ph.i675.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_563: # %.lr.ph.i675 # =>This Inner Loop Header: Depth=1 @@ -24798,12 +26024,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.569: # %middle.block2917 beq $a0, $a1, .LBB9_1187 .LBB9_570: # %.lr.ph.i683.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_571: # %.lr.ph.i683 # =>This Inner Loop Header: Depth=1 @@ -24869,13 +26101,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.574: # %middle.block3318 beq $a2, $a3, .LBB9_577 .LBB9_575: # %.lr.ph.i431.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_576: # %.lr.ph.i431 # =>This Inner Loop Header: Depth=1 @@ -24959,13 +26197,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.582: # %middle.block3335 beq $a2, $a3, .LBB9_585 .LBB9_583: # %.lr.ph.i440.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_584: # %.lr.ph.i440 # =>This Inner Loop Header: Depth=1 @@ -25049,13 +26293,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.590: # %middle.block3352 beq $a2, $a3, .LBB9_593 .LBB9_591: # %.lr.ph.i451.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_592: # %.lr.ph.i451 # =>This Inner Loop Header: Depth=1 @@ -25139,13 +26389,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.598: # %middle.block3369 beq $a2, $a3, .LBB9_601 .LBB9_599: # %.lr.ph.i462.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_600: # %.lr.ph.i462 # =>This Inner Loop Header: Depth=1 @@ -25229,13 +26485,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.606: # %middle.block3386 beq $a0, $a2, .LBB9_1187 .LBB9_607: # %.lr.ph.i473.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a3, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a0 alsl.d $a1, $a0, $a1, 4 addi.d $a1, $a1, 8 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa3, $a3 .p2align 4, , 16 .LBB9_608: # %.lr.ph.i473 # =>This Inner Loop Header: Depth=1 @@ -25313,12 +26575,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.611: # %middle.block1621 beq $a1, $a2, .LBB9_614 .LBB9_612: # %.lr.ph.i1293.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_613: # %.lr.ph.i1293 # =>This Inner Loop Header: Depth=1 @@ -25407,12 +26675,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.619: # %middle.block1637 beq $a1, $a2, .LBB9_622 .LBB9_620: # %.lr.ph.i1301.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_621: # %.lr.ph.i1301 # =>This Inner Loop Header: Depth=1 @@ -25501,12 +26775,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.627: # %middle.block1653 beq $a1, $a2, .LBB9_630 .LBB9_628: # %.lr.ph.i1309.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_629: # %.lr.ph.i1309 # =>This Inner Loop Header: Depth=1 @@ -25595,12 +26875,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.635: # %middle.block1669 beq $a1, $a2, .LBB9_638 .LBB9_636: # %.lr.ph.i1317.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_637: # %.lr.ph.i1317 # =>This Inner Loop Header: Depth=1 @@ -25689,12 +26975,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.643: # %middle.block1685 beq $a1, $a2, .LBB9_646 .LBB9_644: # %.lr.ph.i1325.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_645: # %.lr.ph.i1325 # =>This Inner Loop Header: Depth=1 @@ -25783,12 +27075,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.651: # %middle.block1701 beq $a0, $a1, .LBB9_1187 .LBB9_652: # %.lr.ph.i1333.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_653: # %.lr.ph.i1333 # =>This Inner Loop Header: Depth=1 @@ -25863,12 +27161,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.656: # %middle.block2821 beq $a1, $a2, .LBB9_659 .LBB9_657: # %.lr.ph.i707.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_658: # %.lr.ph.i707 # =>This Inner Loop Header: Depth=1 @@ -25957,12 +27261,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.664: # %middle.block2837 beq $a1, $a2, .LBB9_667 .LBB9_665: # %.lr.ph.i715.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_666: # %.lr.ph.i715 # =>This Inner Loop Header: Depth=1 @@ -26051,12 +27361,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.672: # %middle.block2853 beq $a0, $a1, .LBB9_1187 .LBB9_673: # %.lr.ph.i723.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_674: # %.lr.ph.i723 # =>This Inner Loop Header: Depth=1 @@ -26131,12 +27447,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.677: # %middle.block2597 beq $a1, $a2, .LBB9_680 .LBB9_678: # %.lr.ph.i827.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_679: # %.lr.ph.i827 # =>This Inner Loop Header: Depth=1 @@ -26225,12 +27547,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.685: # %middle.block2613 beq $a0, $a1, .LBB9_1187 .LBB9_686: # %.lr.ph.i835.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_687: # %.lr.ph.i835 # =>This Inner Loop Header: Depth=1 @@ -26305,12 +27633,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.690: # %middle.block3482 beq $a1, $a2, .LBB9_693 .LBB9_691: # %.lr.ph.i359.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_692: # %.lr.ph.i359 # =>This Inner Loop Header: Depth=1 @@ -26399,12 +27733,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.698: # %middle.block3498 beq $a1, $a2, .LBB9_701 .LBB9_699: # %.lr.ph.i367.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_700: # %.lr.ph.i367 # =>This Inner Loop Header: Depth=1 @@ -26493,12 +27833,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.706: # %middle.block3514 beq $a1, $a2, .LBB9_709 .LBB9_707: # %.lr.ph.i375.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_708: # %.lr.ph.i375 # =>This Inner Loop Header: Depth=1 @@ -26587,12 +27933,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.714: # %middle.block3530 beq $a0, $a1, .LBB9_1187 .LBB9_715: # %.lr.ph.i383.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_716: # %.lr.ph.i383 # =>This Inner Loop Header: Depth=1 @@ -26667,12 +28019,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.719: # %middle.block2565 beq $a1, $a2, .LBB9_722 .LBB9_720: # %.lr.ph.i843.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_721: # %.lr.ph.i843 # =>This Inner Loop Header: Depth=1 @@ -26761,12 +28119,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.727: # %middle.block2581 beq $a0, $a1, .LBB9_1187 .LBB9_728: # %.lr.ph.i851.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_729: # %.lr.ph.i851 # =>This Inner Loop Header: Depth=1 @@ -26841,12 +28205,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.732: # %middle.block2965 beq $a1, $a2, .LBB9_735 .LBB9_733: # %.lr.ph.i627.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_734: # %.lr.ph.i627 # =>This Inner Loop Header: Depth=1 @@ -26935,12 +28305,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.740: # %middle.block2981 beq $a1, $a2, .LBB9_743 .LBB9_741: # %.lr.ph.i635.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_742: # %.lr.ph.i635 # =>This Inner Loop Header: Depth=1 @@ -27029,12 +28405,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.748: # %middle.block2997 beq $a1, $a2, .LBB9_751 .LBB9_749: # %.lr.ph.i643.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_750: # %.lr.ph.i643 # =>This Inner Loop Header: Depth=1 @@ -27123,12 +28505,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.756: # %middle.block3013 beq $a0, $a1, .LBB9_1187 .LBB9_757: # %.lr.ph.i651.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_758: # %.lr.ph.i651 # =>This Inner Loop Header: Depth=1 @@ -27203,12 +28591,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.761: # %middle.block3402 beq $a1, $a2, .LBB9_764 .LBB9_762: # %.lr.ph.i391.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_763: # %.lr.ph.i391 # =>This Inner Loop Header: Depth=1 @@ -27297,12 +28691,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.769: # %middle.block3418 beq $a1, $a2, .LBB9_772 .LBB9_770: # %.lr.ph.i399.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_771: # %.lr.ph.i399 # =>This Inner Loop Header: Depth=1 @@ -27391,12 +28791,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.777: # %middle.block3434 beq $a1, $a2, .LBB9_780 .LBB9_778: # %.lr.ph.i407.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_779: # %.lr.ph.i407 # =>This Inner Loop Header: Depth=1 @@ -27485,12 +28891,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.785: # %middle.block3450 beq $a1, $a2, .LBB9_788 .LBB9_786: # %.lr.ph.i415.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_787: # %.lr.ph.i415 # =>This Inner Loop Header: Depth=1 @@ -27579,12 +28991,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.793: # %middle.block3466 beq $a0, $a1, .LBB9_1187 .LBB9_794: # %.lr.ph.i423.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_795: # %.lr.ph.i423 # =>This Inner Loop Header: Depth=1 @@ -27659,12 +29077,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.798: # %middle.block2741 beq $a1, $a2, .LBB9_801 .LBB9_799: # %.lr.ph.i731.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_800: # %.lr.ph.i731 # =>This Inner Loop Header: Depth=1 @@ -27753,12 +29177,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.806: # %middle.block2757 beq $a1, $a2, .LBB9_809 .LBB9_807: # %.lr.ph.i739.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_808: # %.lr.ph.i739 # =>This Inner Loop Header: Depth=1 @@ -27847,12 +29277,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.814: # %middle.block2773 beq $a1, $a2, .LBB9_817 .LBB9_815: # %.lr.ph.i747.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_816: # %.lr.ph.i747 # =>This Inner Loop Header: Depth=1 @@ -27941,12 +29377,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.822: # %middle.block2789 beq $a1, $a2, .LBB9_825 .LBB9_823: # %.lr.ph.i755.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_824: # %.lr.ph.i755 # =>This Inner Loop Header: Depth=1 @@ -28035,12 +29477,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.830: # %middle.block2805 beq $a0, $a1, .LBB9_1187 .LBB9_831: # %.lr.ph.i763.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_832: # %.lr.ph.i763 # =>This Inner Loop Header: Depth=1 @@ -28115,12 +29563,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.835: # %middle.block3125 beq $a1, $a2, .LBB9_838 .LBB9_836: # %.lr.ph.i538.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_837: # %.lr.ph.i538 # =>This Inner Loop Header: Depth=1 @@ -28209,12 +29663,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.843: # %middle.block3141 beq $a1, $a2, .LBB9_846 .LBB9_844: # %.lr.ph.i546.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_845: # %.lr.ph.i546 # =>This Inner Loop Header: Depth=1 @@ -28303,12 +29763,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.851: # %middle.block3157 beq $a1, $a2, .LBB9_854 .LBB9_852: # %.lr.ph.i554.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_853: # %.lr.ph.i554 # =>This Inner Loop Header: Depth=1 @@ -28397,12 +29863,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.859: # %middle.block3173 beq $a1, $a2, .LBB9_862 .LBB9_860: # %.lr.ph.i562.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_861: # %.lr.ph.i562 # =>This Inner Loop Header: Depth=1 @@ -28491,12 +29963,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.867: # %middle.block3189 beq $a0, $a1, .LBB9_1187 .LBB9_868: # %.lr.ph.i570.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_869: # %.lr.ph.i570 # =>This Inner Loop Header: Depth=1 @@ -28571,12 +30049,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.872: # %middle.block3285 beq $a1, $a2, .LBB9_875 .LBB9_873: # %.lr.ph.i482.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_874: # %.lr.ph.i482 # =>This Inner Loop Header: Depth=1 @@ -28665,12 +30149,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.880: # %middle.block3301 beq $a0, $a1, .LBB9_1187 .LBB9_881: # %.lr.ph.i490.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_882: # %.lr.ph.i490 # =>This Inner Loop Header: Depth=1 @@ -28745,12 +30235,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.885: # %middle.block1717 beq $a1, $a2, .LBB9_888 .LBB9_886: # %.lr.ph.i1253.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_887: # %.lr.ph.i1253 # =>This Inner Loop Header: Depth=1 @@ -28839,12 +30335,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.893: # %middle.block1733 beq $a1, $a2, .LBB9_896 .LBB9_894: # %.lr.ph.i1261.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_895: # %.lr.ph.i1261 # =>This Inner Loop Header: Depth=1 @@ -28933,12 +30435,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.901: # %middle.block1749 beq $a1, $a2, .LBB9_904 .LBB9_902: # %.lr.ph.i1269.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_903: # %.lr.ph.i1269 # =>This Inner Loop Header: Depth=1 @@ -29027,12 +30535,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.909: # %middle.block1765 beq $a1, $a2, .LBB9_912 .LBB9_910: # %.lr.ph.i1277.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_911: # %.lr.ph.i1277 # =>This Inner Loop Header: Depth=1 @@ -29121,12 +30635,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.917: # %middle.block1781 beq $a0, $a1, .LBB9_1187 .LBB9_918: # %.lr.ph.i1285.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_919: # %.lr.ph.i1285 # =>This Inner Loop Header: Depth=1 @@ -29147,41 +30667,39 @@ _Z8loopInitj: # @_Z8loopInitj bstrpick.d $a0, $a1, 30, 2 slli.d $a0, $a0, 2 vreplvei.d $vr1, $vr0, 0 - addi.d $a3, $a2, 16 - ori $a4, $zero, 0 - lu32i.d $a4, 1 - vreplgr2vr.d $vr2, $a4 - lu12i.w $a4, -419431 - ori $a4, $a4, 2458 - lu32i.d $a4, 104857 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr3, $a4 - lu12i.w $a4, -307024 - ori $a4, $a4, 3880 - lu32i.d $a4, 129446 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr4, $a4 - move $a4, $a0 + addi.d $a5, $a3, 16 + ori $a6, $zero, 0 + lu32i.d $a6, 1 + vreplgr2vr.d $vr2, $a6 + ori $a6, $a4, 2458 + lu32i.d $a6, 104857 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr3, $a6 + ori $a6, $a2, 3880 + lu32i.d $a6, 129446 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr4, $a6 + move $a6, $a0 .p2align 4, , 16 .LBB9_921: # %vector.body # =>This Inner Loop Header: Depth=1 vaddi.wu $vr5, $vr2, 2 - vpickve2gr.w $a5, $vr2, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr2, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr2, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa7, $a5 + vpickve2gr.w $a7, $vr2, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa7, $a7 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr6, 16 - vpickve2gr.w $a5, $vr5, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr5, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr5, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa5, $a5 + vpickve2gr.w $a7, $vr5, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa5, $a7 ffint.d.l $fa5, $fa5 vextrins.d $vr5, $vr6, 16 vfadd.d $vr6, $vr7, $vr3 @@ -29192,34 +30710,38 @@ _Z8loopInitj: # @_Z8loopInitj vfadd.d $vr5, $vr5, $vr4 vfdiv.d $vr6, $vr6, $vr7 vfdiv.d $vr5, $vr8, $vr5 - vst $vr6, $a3, -16 - vst $vr5, $a3, 0 + vst $vr6, $a5, -16 + vst $vr5, $a5, 0 vaddi.wu $vr2, $vr2, 4 - addi.d $a4, $a4, -4 - addi.d $a3, $a3, 32 - bnez $a4, .LBB9_921 + addi.d $a6, $a6, -4 + addi.d $a5, $a5, 32 + bnez $a6, .LBB9_921 # %bb.922: # %middle.block beq $a0, $a1, .LBB9_1187 .LBB9_923: # %.lr.ph.i1341.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $a2, 3 + alsl.d $a3, $a0, $a3, 3 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + ori $a2, $a2, 3880 + lu32i.d $a2, 129446 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa2, $a2 .p2align 4, , 16 .LBB9_924: # %.lr.ph.i1341 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a0, 31, 0 - movgr2fr.d $fa3, $a3 + bstrpick.d $a2, $a0, 31, 0 + movgr2fr.d $fa3, $a2 ffint.d.l $fa3, $fa3 fadd.d $fa4, $fa3, $fa1 fmul.d $fa4, $fa0, $fa4 fadd.d $fa3, $fa3, $fa2 fdiv.d $fa3, $fa4, $fa3 - fst.d $fa3, $a2, 0 + fst.d $fa3, $a3, 0 addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + addi.d $a3, $a3, 8 addi.w $a0, $a0, 1 bnez $a1, .LBB9_924 b .LBB9_1187 @@ -29281,12 +30803,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.927: # %middle.block2533 beq $a1, $a2, .LBB9_930 .LBB9_928: # %.lr.ph.i859.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_929: # %.lr.ph.i859 # =>This Inner Loop Header: Depth=1 @@ -29375,12 +30903,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.935: # %middle.block2549 beq $a0, $a1, .LBB9_1187 .LBB9_936: # %.lr.ph.i867.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_937: # %.lr.ph.i867 # =>This Inner Loop Header: Depth=1 @@ -29455,12 +30989,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.940: # %middle.block2629 beq $a1, $a2, .LBB9_943 .LBB9_941: # %.lr.ph.i771.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_942: # %.lr.ph.i771 # =>This Inner Loop Header: Depth=1 @@ -29549,12 +31089,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.948: # %middle.block2645 beq $a1, $a2, .LBB9_951 .LBB9_949: # %.lr.ph.i779.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_950: # %.lr.ph.i779 # =>This Inner Loop Header: Depth=1 @@ -29643,12 +31189,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.956: # %middle.block2661 beq $a1, $a2, .LBB9_959 .LBB9_957: # %.lr.ph.i787.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_958: # %.lr.ph.i787 # =>This Inner Loop Header: Depth=1 @@ -29737,12 +31289,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.964: # %middle.block2677 beq $a1, $a2, .LBB9_967 .LBB9_965: # %.lr.ph.i795.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_966: # %.lr.ph.i795 # =>This Inner Loop Header: Depth=1 @@ -29831,12 +31389,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.972: # %middle.block2693 beq $a1, $a2, .LBB9_975 .LBB9_973: # %.lr.ph.i803.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_974: # %.lr.ph.i803 # =>This Inner Loop Header: Depth=1 @@ -29925,12 +31489,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.980: # %middle.block2709 beq $a1, $a2, .LBB9_983 .LBB9_981: # %.lr.ph.i811.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_982: # %.lr.ph.i811 # =>This Inner Loop Header: Depth=1 @@ -30019,12 +31589,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.988: # %middle.block2725 beq $a0, $a1, .LBB9_1187 .LBB9_989: # %.lr.ph.i819.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_990: # %.lr.ph.i819 # =>This Inner Loop Header: Depth=1 @@ -30099,12 +31675,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.993: # %middle.block2501 beq $a1, $a2, .LBB9_996 .LBB9_994: # %.lr.ph.i875.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_995: # %.lr.ph.i875 # =>This Inner Loop Header: Depth=1 @@ -30193,12 +31775,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1001: # %middle.block2517 beq $a0, $a1, .LBB9_1187 .LBB9_1002: # %.lr.ph.i883.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1003: # %.lr.ph.i883 # =>This Inner Loop Header: Depth=1 @@ -30273,12 +31861,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1006: # %middle.block1845 beq $a1, $a2, .LBB9_1009 .LBB9_1007: # %.lr.ph.i1149.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1008: # %.lr.ph.i1149 # =>This Inner Loop Header: Depth=1 @@ -30367,12 +31961,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1014: # %middle.block1861 beq $a1, $a2, .LBB9_1017 .LBB9_1015: # %.lr.ph.i1157.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1016: # %.lr.ph.i1157 # =>This Inner Loop Header: Depth=1 @@ -30461,12 +32061,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1022: # %middle.block1877 beq $a1, $a2, .LBB9_1025 .LBB9_1023: # %.lr.ph.i1165.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1024: # %.lr.ph.i1165 # =>This Inner Loop Header: Depth=1 @@ -30555,12 +32161,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1030: # %middle.block1893 beq $a1, $a2, .LBB9_1033 .LBB9_1031: # %.lr.ph.i1173.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1032: # %.lr.ph.i1173 # =>This Inner Loop Header: Depth=1 @@ -30649,12 +32261,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1038: # %middle.block1909 beq $a1, $a2, .LBB9_1041 .LBB9_1039: # %.lr.ph.i1181.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1040: # %.lr.ph.i1181 # =>This Inner Loop Header: Depth=1 @@ -30743,12 +32361,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1046: # %middle.block1925 beq $a1, $a2, .LBB9_1049 .LBB9_1047: # %.lr.ph.i1189.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1048: # %.lr.ph.i1189 # =>This Inner Loop Header: Depth=1 @@ -30837,12 +32461,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1054: # %middle.block1941 beq $a1, $a2, .LBB9_1057 .LBB9_1055: # %.lr.ph.i1197.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1056: # %.lr.ph.i1197 # =>This Inner Loop Header: Depth=1 @@ -30931,12 +32561,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1062: # %middle.block1957 beq $a1, $a2, .LBB9_1065 .LBB9_1063: # %.lr.ph.i1205.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1064: # %.lr.ph.i1205 # =>This Inner Loop Header: Depth=1 @@ -31025,12 +32661,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1070: # %middle.block1973 beq $a1, $a2, .LBB9_1073 .LBB9_1071: # %.lr.ph.i1213.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1072: # %.lr.ph.i1213 # =>This Inner Loop Header: Depth=1 @@ -31119,12 +32761,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1078: # %middle.block1989 beq $a0, $a1, .LBB9_1187 .LBB9_1079: # %.lr.ph.i1221.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1080: # %.lr.ph.i1221 # =>This Inner Loop Header: Depth=1 @@ -31199,12 +32847,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1083: # %middle.block1797 beq $a1, $a2, .LBB9_1086 .LBB9_1084: # %.lr.ph.i1229.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1085: # %.lr.ph.i1229 # =>This Inner Loop Header: Depth=1 @@ -31293,12 +32947,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1091: # %middle.block1813 beq $a1, $a2, .LBB9_1094 .LBB9_1092: # %.lr.ph.i1237.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1093: # %.lr.ph.i1237 # =>This Inner Loop Header: Depth=1 @@ -31387,12 +33047,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1099: # %middle.block1829 beq $a0, $a1, .LBB9_1187 .LBB9_1100: # %.lr.ph.i1245.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1101: # %.lr.ph.i1245 # =>This Inner Loop Header: Depth=1 @@ -31467,12 +33133,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1104: # %middle.block2069 beq $a1, $a2, .LBB9_1107 .LBB9_1105: # %.lr.ph.i1029.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1106: # %.lr.ph.i1029 # =>This Inner Loop Header: Depth=1 @@ -31561,12 +33233,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1112: # %middle.block2085 beq $a1, $a2, .LBB9_1115 .LBB9_1113: # %.lr.ph.i1037.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1114: # %.lr.ph.i1037 # =>This Inner Loop Header: Depth=1 @@ -31655,12 +33333,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1120: # %middle.block2101 beq $a1, $a2, .LBB9_1123 .LBB9_1121: # %.lr.ph.i1045.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1122: # %.lr.ph.i1045 # =>This Inner Loop Header: Depth=1 @@ -31749,12 +33433,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1128: # %middle.block2117 beq $a1, $a2, .LBB9_1131 .LBB9_1129: # %.lr.ph.i1053.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1130: # %.lr.ph.i1053 # =>This Inner Loop Header: Depth=1 @@ -31843,12 +33533,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1136: # %middle.block2133 beq $a1, $a2, .LBB9_1139 .LBB9_1137: # %.lr.ph.i1061.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1138: # %.lr.ph.i1061 # =>This Inner Loop Header: Depth=1 @@ -31937,12 +33633,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1144: # %middle.block2149 beq $a1, $a2, .LBB9_1147 .LBB9_1145: # %.lr.ph.i1069.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1146: # %.lr.ph.i1069 # =>This Inner Loop Header: Depth=1 @@ -32031,12 +33733,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1152: # %middle.block2165 beq $a1, $a2, .LBB9_1155 .LBB9_1153: # %.lr.ph.i1077.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1154: # %.lr.ph.i1077 # =>This Inner Loop Header: Depth=1 @@ -32125,12 +33833,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1160: # %middle.block2181 beq $a1, $a2, .LBB9_1163 .LBB9_1161: # %.lr.ph.i1085.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1162: # %.lr.ph.i1085 # =>This Inner Loop Header: Depth=1 @@ -32219,12 +33933,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1168: # %middle.block2197 beq $a1, $a2, .LBB9_1171 .LBB9_1169: # %.lr.ph.i1093.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1170: # %.lr.ph.i1093 # =>This Inner Loop Header: Depth=1 @@ -32313,12 +34033,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1176: # %middle.block2213 beq $a1, $a2, .LBB9_1179 .LBB9_1177: # %.lr.ph.i1101.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1178: # %.lr.ph.i1101 # =>This Inner Loop Header: Depth=1 @@ -32407,12 +34133,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1184: # %middle.block2229 beq $a0, $a1, .LBB9_1187 .LBB9_1185: # %.lr.ph.i1109.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1186: # %.lr.ph.i1109 # =>This Inner Loop Header: Depth=1 diff --git a/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/runReferenceLoops.s b/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/runReferenceLoops.s index b31732f8..26d4daf4 100644 --- a/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/runReferenceLoops.s +++ b/results/MicroBenchmarks/LCALS/SubsetALambdaLoops/CMakeFiles/lcalsALambda.dir/__/runReferenceLoops.s @@ -903,14 +903,8 @@ _ZN8LoopStatD2Ev: # @_ZN8LoopStatD2Ev .size _ZN8LoopStatD2Ev, .Lfunc_end3-_ZN8LoopStatD2Ev .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z25computeReferenceLoopTimesv -.LCPI4_0: - .dword 0x3f5426fe718a86d7 # double 0.00123 -.LCPI4_1: - .dword 0xbf5426fe718a86d7 # double -0.00123 .text - .globl _Z25computeReferenceLoopTimesv + .globl _Z25computeReferenceLoopTimesv # -- Begin function _Z25computeReferenceLoopTimesv .p2align 5 .type _Z25computeReferenceLoopTimesv,@function _Z25computeReferenceLoopTimesv: # @_Z25computeReferenceLoopTimesv @@ -1649,13 +1643,16 @@ _Z25computeReferenceLoopTimesv: # @_Z25computeReferenceLoopTimesv pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 st.d $a0, $sp, 640 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI4_0) - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI4_1) ori $a0, $zero, 1 st.b $a0, $sp, 648 + lu12i.w $a0, 465064 + ori $a0, $a0, 1751 + lu32i.d $a0, 272126 + lu52i.d $a1, $a0, 1013 + movgr2fr.d $fs0, $a1 fadd.d $fa0, $fs2, $fs0 + lu52i.d $a0, $a0, -1035 + movgr2fr.d $fs1, $a0 fadd.d $fa1, $fs2, $fs1 fdiv.d $fa0, $fa0, $fa1 fst.d $fa0, $fp, 384 diff --git a/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/RawSubsetAbenchmarks.s b/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/RawSubsetAbenchmarks.s index 1b639180..b901ee1b 100644 --- a/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/RawSubsetAbenchmarks.s +++ b/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/RawSubsetAbenchmarks.s @@ -289,37 +289,32 @@ _ZL20BM_PRESSURE_CALC_RAWRN9benchmark5StateE: # @_ZL20BM_PRESSURE_CALC_RAWRN9ben .size _ZL20BM_PRESSURE_CALC_RAWRN9benchmark5StateE, .Lfunc_end0-_ZL20BM_PRESSURE_CALC_RAWRN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE -.LCPI1_0: - .dword 0x3c18987cee7f439d # double 3.333333E-19 -.LCPI1_1: - .dword 0x3842e7922a37d1a0 # double 1.1111110000000001E-37 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE .type _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE,@function _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE .cfi_startproc # %bb.0: # %_ZN9benchmark5State13StateIteratorC2EPS0_.exit - addi.d $sp, $sp, -512 - .cfi_def_cfa_offset 512 - st.d $ra, $sp, 504 # 8-byte Folded Spill - st.d $fp, $sp, 496 # 8-byte Folded Spill - st.d $s0, $sp, 488 # 8-byte Folded Spill - st.d $s1, $sp, 480 # 8-byte Folded Spill - st.d $s2, $sp, 472 # 8-byte Folded Spill - st.d $s3, $sp, 464 # 8-byte Folded Spill - st.d $s4, $sp, 456 # 8-byte Folded Spill - st.d $s5, $sp, 448 # 8-byte Folded Spill - st.d $s6, $sp, 440 # 8-byte Folded Spill - st.d $s7, $sp, 432 # 8-byte Folded Spill - st.d $s8, $sp, 424 # 8-byte Folded Spill - fst.d $fs0, $sp, 416 # 8-byte Folded Spill - fst.d $fs1, $sp, 408 # 8-byte Folded Spill - fst.d $fs2, $sp, 400 # 8-byte Folded Spill - fst.d $fs3, $sp, 392 # 8-byte Folded Spill - fst.d $fs4, $sp, 384 # 8-byte Folded Spill - fst.d $fs5, $sp, 376 # 8-byte Folded Spill + addi.d $sp, $sp, -528 + .cfi_def_cfa_offset 528 + st.d $ra, $sp, 520 # 8-byte Folded Spill + st.d $fp, $sp, 512 # 8-byte Folded Spill + st.d $s0, $sp, 504 # 8-byte Folded Spill + st.d $s1, $sp, 496 # 8-byte Folded Spill + st.d $s2, $sp, 488 # 8-byte Folded Spill + st.d $s3, $sp, 480 # 8-byte Folded Spill + st.d $s4, $sp, 472 # 8-byte Folded Spill + st.d $s5, $sp, 464 # 8-byte Folded Spill + st.d $s6, $sp, 456 # 8-byte Folded Spill + st.d $s7, $sp, 448 # 8-byte Folded Spill + st.d $s8, $sp, 440 # 8-byte Folded Spill + fst.d $fs0, $sp, 432 # 8-byte Folded Spill + fst.d $fs1, $sp, 424 # 8-byte Folded Spill + fst.d $fs2, $sp, 416 # 8-byte Folded Spill + fst.d $fs3, $sp, 408 # 8-byte Folded Spill + fst.d $fs4, $sp, 400 # 8-byte Folded Spill + fst.d $fs5, $sp, 392 # 8-byte Folded Spill + fst.d $fs6, $sp, 384 # 8-byte Folded Spill + fst.d $fs7, $sp, 376 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -337,6 +332,8 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma .cfi_offset 59, -120 .cfi_offset 60, -128 .cfi_offset 61, -136 + .cfi_offset 62, -144 + .cfi_offset 63, -152 move $s0, $a0 pcaddu18i $ra, %call36(_Z11getLoopDatav) jirl $ra, $ra, 0 @@ -345,11 +342,11 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma pcaddu18i $ra, %call36(_Z8loopInitj) jirl $ra, $ra, 0 ld.d $a0, $fp, 8 - st.d $a0, $sp, 176 # 8-byte Folded Spill + st.d $a0, $sp, 184 # 8-byte Folded Spill ld.d $a0, $fp, 16 - st.d $a0, $sp, 168 # 8-byte Folded Spill + st.d $a0, $sp, 176 # 8-byte Folded Spill ld.d $a0, $fp, 24 - st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $a0, $sp, 168 # 8-byte Folded Spill ld.d $s4, $fp, 32 ld.d $s5, $fp, 40 ld.d $s6, $fp, 48 @@ -366,7 +363,7 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma ld.d $a0, $fp, 112 st.d $a0, $sp, 336 # 8-byte Folded Spill ld.d $a0, $fp, 120 - st.d $a0, $sp, 328 # 8-byte Folded Spill + st.d $a0, $sp, 320 # 8-byte Folded Spill fld.d $fs0, $fp, 384 fld.d $fa0, $fp, 392 vst $vr0, $sp, 144 # 16-byte Folded Spill @@ -380,13 +377,13 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma jirl $ra, $ra, 0 vld $vr9, $sp, 128 # 16-byte Folded Reload vld $vr8, $sp, 144 # 16-byte Folded Reload - ld.d $t7, $sp, 160 # 8-byte Folded Reload - ld.d $t6, $sp, 168 # 8-byte Folded Reload - ld.d $t5, $sp, 176 # 8-byte Folded Reload + ld.d $t7, $sp, 168 # 8-byte Folded Reload + ld.d $t6, $sp, 176 # 8-byte Folded Reload + ld.d $t5, $sp, 184 # 8-byte Folded Reload move $t4, $s0 - bnez $fp, .LBB1_51 + bnez $fp, .LBB1_50 # %bb.1: # %_ZN9benchmark5State13StateIteratorC2EPS0_.exit - beqz $s3, .LBB1_51 + beqz $s3, .LBB1_50 # %bb.2: # %.preheader226.lr.ph move $ra, $s5 move $t8, $s4 @@ -403,29 +400,29 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma sub.d $a7, $t5, $t6 st.d $s6, $sp, 120 # 8-byte Folded Spill sub.d $t0, $t5, $t7 - st.d $s7, $sp, 192 # 8-byte Folded Spill + st.d $s7, $sp, 200 # 8-byte Folded Spill sub.d $t1, $t5, $s5 - st.d $s8, $sp, 184 # 8-byte Folded Spill - ld.d $t2, $sp, 192 # 8-byte Folded Reload + st.d $s8, $sp, 192 # 8-byte Folded Spill + ld.d $t2, $sp, 200 # 8-byte Folded Reload sub.d $t2, $t5, $t2 st.d $s2, $sp, 104 # 8-byte Folded Spill - ld.d $t3, $sp, 184 # 8-byte Folded Reload + ld.d $t3, $sp, 192 # 8-byte Folded Reload sub.d $t3, $t5, $t3 sltui $a7, $a7, 32 sltui $t0, $t0, 32 or $a7, $a7, $t0 sltui $t0, $t1, 32 - ld.d $t1, $sp, 192 # 8-byte Folded Reload + ld.d $t1, $sp, 200 # 8-byte Folded Reload or $a7, $a7, $t0 sltui $t0, $t2, 32 - ld.d $t2, $sp, 184 # 8-byte Folded Reload + ld.d $t2, $sp, 192 # 8-byte Folded Reload or $a7, $a7, $t0 sltui $t0, $t3, 32 ld.d $t3, $sp, 104 # 8-byte Folded Reload or $a7, $a7, $t0 bstrpick.d $t0, $s0, 62, 2 slli.d $t0, $t0, 2 - st.d $t0, $sp, 320 # 8-byte Folded Spill + st.d $t0, $sp, 312 # 8-byte Folded Spill sltu $a2, $t5, $a2 sltu $t0, $t7, $a0 and $a2, $a2, $t0 @@ -447,35 +444,35 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma sltu $a4, $t0, $a0 and $a3, $a3, $a4 or $a2, $a2, $a3 - st.d $a2, $sp, 288 # 8-byte Folded Spill + st.d $a2, $sp, 296 # 8-byte Folded Spill move $a3, $s0 bstrpick.d $a2, $s0, 62, 1 slli.d $a2, $a2, 1 - st.d $a2, $sp, 280 # 8-byte Folded Spill + st.d $a2, $sp, 288 # 8-byte Folded Spill sltu $a1, $t5, $a1 sltu $a0, $t2, $a0 and $a0, $a1, $a0 - st.d $a0, $sp, 272 # 8-byte Folded Spill + st.d $a0, $sp, 280 # 8-byte Folded Spill vreplvei.d $vr10, $vr8, 0 vreplvei.d $vr11, $vr9, 0 addi.d $a0, $t6, 16 - st.d $a0, $sp, 232 # 8-byte Folded Spill + st.d $a0, $sp, 240 # 8-byte Folded Spill addi.d $a0, $t5, 16 - st.d $a0, $sp, 264 # 8-byte Folded Spill + st.d $a0, $sp, 272 # 8-byte Folded Spill addi.d $a0, $t7, 16 - st.d $a0, $sp, 224 # 8-byte Folded Spill + st.d $a0, $sp, 232 # 8-byte Folded Spill addi.d $a0, $t2, 16 - st.d $a0, $sp, 256 # 8-byte Folded Spill + st.d $a0, $sp, 264 # 8-byte Folded Spill addi.d $a0, $s5, 16 - st.d $a0, $sp, 216 # 8-byte Folded Spill + st.d $a0, $sp, 224 # 8-byte Folded Spill addi.d $a0, $t1, 16 - st.d $a0, $sp, 208 # 8-byte Folded Spill + st.d $a0, $sp, 216 # 8-byte Folded Spill sltui $a0, $s0, 4 - st.d $a0, $sp, 248 # 8-byte Folded Spill + st.d $a0, $sp, 256 # 8-byte Folded Spill or $a0, $a0, $a7 ld.d $a7, $sp, 112 # 8-byte Folded Reload andi $a0, $a0, 1 - st.d $a0, $sp, 240 # 8-byte Folded Spill + st.d $a0, $sp, 248 # 8-byte Folded Spill vldi $vr12, -800 vldi $vr13, -928 movgr2fr.d $fs2, $zero @@ -484,12 +481,23 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma vldi $vr16, -864 vldi $vr17, -996 vldi $vr18, -872 + lu12i.w $a0, -71692 + ori $a0, $a0, 925 + lu32i.d $a0, -485252 + lu52i.d $a0, $a0, 961 + st.d $a0, $sp, 328 # 8-byte Folded Spill + movgr2fr.d $fs3, $a0 + lu12i.w $a0, 172925 + ori $a0, $a0, 416 + lu32i.d $a0, 190354 + lu52i.d $a0, $a0, 900 + movgr2fr.d $fs4, $a0 lu52i.d $a0, $zero, 1022 vreplgr2vr.d $vr19, $a0 ori $a0, $zero, 0 lu32i.d $a0, -524288 lu52i.d $a0, $a0, 1024 - st.d $a0, $sp, 200 # 8-byte Folded Spill + st.d $a0, $sp, 208 # 8-byte Folded Spill vldi $vr20, -912 st.d $s0, $sp, 368 # 8-byte Folded Spill b .LBB1_4 @@ -497,7 +505,7 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma .LBB1_3: # %._crit_edge # in Loop: Header=BB1_4 Depth=1 addi.d $a5, $a5, -1 - beqz $a5, .LBB1_51 + beqz $a5, .LBB1_50 .LBB1_4: # %.preheader226 # =>This Loop Header: Depth=1 # Child Loop BB1_8 Depth 2 @@ -508,12 +516,12 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma # Child Loop BB1_30 Depth 2 # Child Loop BB1_33 Depth 2 # Child Loop BB1_37 Depth 2 - # Child Loop BB1_47 Depth 2 + # Child Loop BB1_46 Depth 2 blez $a3, .LBB1_3 # %bb.5: # %.lr.ph.preheader # in Loop: Header=BB1_4 Depth=1 - st.d $a5, $sp, 296 # 8-byte Folded Spill - ld.d $a0, $sp, 240 # 8-byte Folded Reload + st.d $a5, $sp, 304 # 8-byte Folded Spill + ld.d $a0, $sp, 248 # 8-byte Folded Reload beqz $a0, .LBB1_7 # %bb.6: # in Loop: Header=BB1_4 Depth=1 move $a6, $zero @@ -521,13 +529,13 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma .p2align 4, , 16 .LBB1_7: # %vector.body327.preheader # in Loop: Header=BB1_4 Depth=1 - ld.d $a0, $sp, 208 # 8-byte Folded Reload - ld.d $a1, $sp, 216 # 8-byte Folded Reload - ld.d $a2, $sp, 256 # 8-byte Folded Reload - ld.d $a3, $sp, 224 # 8-byte Folded Reload - ld.d $a4, $sp, 264 # 8-byte Folded Reload - ld.d $a5, $sp, 232 # 8-byte Folded Reload - ld.d $a6, $sp, 320 # 8-byte Folded Reload + ld.d $a0, $sp, 216 # 8-byte Folded Reload + ld.d $a1, $sp, 224 # 8-byte Folded Reload + ld.d $a2, $sp, 264 # 8-byte Folded Reload + ld.d $a3, $sp, 232 # 8-byte Folded Reload + ld.d $a4, $sp, 272 # 8-byte Folded Reload + ld.d $a5, $sp, 240 # 8-byte Folded Reload + ld.d $a6, $sp, 312 # 8-byte Folded Reload lu52i.d $fp, $zero, -1026 .p2align 4, , 16 .LBB1_8: # %vector.body327 @@ -564,7 +572,7 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma bnez $a6, .LBB1_8 # %bb.9: # %middle.block340 # in Loop: Header=BB1_4 Depth=1 - ld.d $a0, $sp, 320 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload move $a6, $a0 ld.d $a3, $sp, 368 # 8-byte Folded Reload move $fp, $a3 @@ -577,6 +585,7 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma ld.d $s6, $sp, 344 # 8-byte Folded Reload ld.d $s7, $sp, 336 # 8-byte Folded Reload move $s8, $t0 + ld.d $a1, $sp, 328 # 8-byte Folded Reload beq $a3, $a0, .LBB1_16 .LBB1_10: # %.lr.ph.preheader344 # in Loop: Header=BB1_4 Depth=1 @@ -622,10 +631,11 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma ld.d $s6, $sp, 344 # 8-byte Folded Reload ld.d $s7, $sp, 336 # 8-byte Folded Reload move $s8, $t0 + ld.d $a1, $sp, 328 # 8-byte Folded Reload b .LBB1_16 .p2align 4, , 16 .LBB1_13: # in Loop: Header=BB1_16 Depth=2 - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) + movgr2fr.d $fa0, $a1 .LBB1_14: # in Loop: Header=BB1_16 Depth=2 fld.d $fa1, $s6, 0 fld.d $fa2, $s7, 0 @@ -648,8 +658,6 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma # => This Inner Loop Header: Depth=2 fld.d $fa0, $s0, 0 fcmp.clt.d $fcc0, $fs2, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - pcalau12i $a1, %pc_hi20(.LCPI1_1) fmov.d $fa0, $fs2 bcnez $fcc0, .LBB1_15 # %bb.17: # in Loop: Header=BB1_16 Depth=2 @@ -658,15 +666,14 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma frecip.d $fa0, $fa0 fld.d $fa1, $s2, 0 fld.d $fa2, $s4, 0 - fld.d $fa3, $s3, 0 - fld.d $fa4, $s5, 0 + fld.d $fa3, $s5, 0 + fld.d $fa4, $s3, 0 fmul.d $fa0, $fa0, $fa0 fmul.d $fa0, $fa0, $fa2 - fld.d $fa2, $a1, %pc_lo12(.LCPI1_1) - fmul.d $fa0, $fa0, $fa4 - fmadd.d $fa0, $fa1, $fa3, $fa0 + fmul.d $fa0, $fa0, $fa3 + fmadd.d $fa0, $fa1, $fa4, $fa0 fdiv.d $fa1, $fa0, $fs0 - fcmp.cle.d $fcc0, $fa1, $fa2 + fcmp.cle.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB1_13 # %bb.18: # in Loop: Header=BB1_16 Depth=2 fsqrt.d $fa0, $fa1 @@ -681,14 +688,11 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma vst $vr10, $sp, 64 # 16-byte Folded Spill vst $vr11, $sp, 48 # 16-byte Folded Spill vst $vr19, $sp, 32 # 16-byte Folded Spill - st.d $a0, $sp, 312 # 8-byte Folded Spill - st.d $a1, $sp, 304 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $a1, $sp, 304 # 8-byte Folded Reload - ld.d $a0, $sp, 312 # 8-byte Folded Reload vldi $vr20, -912 vld $vr19, $sp, 32 # 16-byte Folded Reload + ld.d $a1, $sp, 328 # 8-byte Folded Reload vldi $vr18, -872 vldi $vr17, -996 vldi $vr16, -864 @@ -703,24 +707,22 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma vld $vr8, $sp, 144 # 16-byte Folded Reload ld.d $t3, $sp, 104 # 8-byte Folded Reload ld.d $a7, $sp, 112 # 8-byte Folded Reload - ld.d $t2, $sp, 184 # 8-byte Folded Reload - ld.d $t1, $sp, 192 # 8-byte Folded Reload + ld.d $t2, $sp, 192 # 8-byte Folded Reload + ld.d $t1, $sp, 200 # 8-byte Folded Reload ld.d $t0, $sp, 120 # 8-byte Folded Reload ld.d $ra, $sp, 80 # 8-byte Folded Reload ld.d $t8, $sp, 88 # 8-byte Folded Reload - ld.d $t7, $sp, 160 # 8-byte Folded Reload - ld.d $t6, $sp, 168 # 8-byte Folded Reload - ld.d $t5, $sp, 176 # 8-byte Folded Reload + ld.d $t7, $sp, 168 # 8-byte Folded Reload + ld.d $t6, $sp, 176 # 8-byte Folded Reload + ld.d $t5, $sp, 184 # 8-byte Folded Reload ld.d $t4, $sp, 96 # 8-byte Folded Reload b .LBB1_14 .p2align 4, , 16 .LBB1_20: # %.preheader224 # in Loop: Header=BB1_4 Depth=1 - st.d $a1, $sp, 304 # 8-byte Folded Spill - st.d $a0, $sp, 312 # 8-byte Folded Spill addi.d $a0, $a3, -1 sltui $a0, $a0, 1 - ld.d $a1, $sp, 288 # 8-byte Folded Reload + ld.d $a1, $sp, 296 # 8-byte Folded Reload or $a0, $a0, $a1 andi $a0, $a0, 1 beqz $a0, .LBB1_22 @@ -736,9 +738,9 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma move $a3, $t1 move $a4, $t3 move $a5, $t0 - ld.d $a6, $sp, 280 # 8-byte Folded Reload + ld.d $a6, $sp, 288 # 8-byte Folded Reload lu52i.d $fp, $zero, -1023 - ld.d $s0, $sp, 200 # 8-byte Folded Reload + ld.d $s0, $sp, 208 # 8-byte Folded Reload .p2align 4, , 16 .LBB1_23: # %vector.body301 # Parent Loop BB1_4 Depth=1 @@ -768,7 +770,7 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma bnez $a6, .LBB1_23 # %bb.24: # %middle.block310 # in Loop: Header=BB1_4 Depth=1 - ld.d $a1, $sp, 280 # 8-byte Folded Reload + ld.d $a1, $sp, 288 # 8-byte Folded Reload move $a6, $a1 ld.d $a0, $sp, 368 # 8-byte Folded Reload beq $a0, $a1, .LBB1_27 @@ -809,23 +811,23 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma bnez $a6, .LBB1_26 .LBB1_27: # %.lr.ph233.preheader # in Loop: Header=BB1_4 Depth=1 - ld.d $a0, $sp, 272 # 8-byte Folded Reload - ld.d $a1, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 280 # 8-byte Folded Reload + ld.d $a1, $sp, 256 # 8-byte Folded Reload or $a0, $a1, $a0 andi $a0, $a0, 1 beqz $a0, .LBB1_29 # %bb.28: # in Loop: Header=BB1_4 Depth=1 move $a2, $zero - ld.d $a5, $sp, 296 # 8-byte Folded Reload + ld.d $a5, $sp, 304 # 8-byte Folded Reload ld.d $a3, $sp, 368 # 8-byte Folded Reload b .LBB1_32 .p2align 4, , 16 .LBB1_29: # %vector.body.preheader # in Loop: Header=BB1_4 Depth=1 - ld.d $a0, $sp, 256 # 8-byte Folded Reload - ld.d $a1, $sp, 264 # 8-byte Folded Reload - ld.d $a2, $sp, 320 # 8-byte Folded Reload - ld.d $a5, $sp, 296 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload + ld.d $a1, $sp, 272 # 8-byte Folded Reload + ld.d $a2, $sp, 312 # 8-byte Folded Reload + ld.d $a5, $sp, 304 # 8-byte Folded Reload ld.d $a3, $sp, 368 # 8-byte Folded Reload .p2align 4, , 16 .LBB1_30: # %vector.body @@ -855,7 +857,7 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma bnez $a2, .LBB1_30 # %bb.31: # %middle.block # in Loop: Header=BB1_4 Depth=1 - ld.d $a0, $sp, 320 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload move $a2, $a0 beq $a3, $a0, .LBB1_34 .LBB1_32: # %.lr.ph233.preheader343 @@ -884,79 +886,77 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma # in Loop: Header=BB1_4 Depth=1 move $s7, $a3 move $s3, $t7 - move $s1, $ra - move $s2, $t1 - move $s8, $t3 - move $s6, $t0 - ld.d $a2, $sp, 352 # 8-byte Folded Reload - ld.d $a4, $sp, 328 # 8-byte Folded Reload - ld.d $a6, $sp, 360 # 8-byte Folded Reload - ld.d $s0, $sp, 344 # 8-byte Folded Reload - ld.d $s5, $sp, 336 # 8-byte Folded Reload - move $fp, $t5 - move $s4, $t8 - ld.d $a0, $sp, 312 # 8-byte Folded Reload - ld.d $a1, $sp, 304 # 8-byte Folded Reload + move $s6, $ra + move $s1, $t1 + move $fp, $t3 + move $s4, $t0 + ld.d $a1, $sp, 352 # 8-byte Folded Reload + ld.d $a2, $sp, 320 # 8-byte Folded Reload + ld.d $a4, $sp, 360 # 8-byte Folded Reload + ld.d $s8, $sp, 344 # 8-byte Folded Reload + ld.d $s0, $sp, 336 # 8-byte Folded Reload + move $s5, $t5 + move $s2, $t8 + ld.d $a0, $sp, 328 # 8-byte Folded Reload b .LBB1_37 .p2align 4, , 16 .LBB1_35: # %.lr.ph235._crit_edge # in Loop: Header=BB1_37 Depth=2 - fld.d $fs4, $fp, 0 - fld.d $fs5, $s4, 0 + fld.d $fs6, $s5, 0 + fld.d $fs7, $s2, 0 fmov.d $fa0, $fs2 .LBB1_36: # in Loop: Header=BB1_37 Depth=2 - fld.d $fa1, $s1, 0 - fld.d $fa2, $s2, 0 - fld.d $fa3, $s8, 0 - fld.d $fa4, $s6, 0 + fld.d $fa1, $s6, 0 + fld.d $fa2, $s1, 0 + fld.d $fa3, $fp, 0 + fld.d $fa4, $s4, 0 fadd.d $fa1, $fa1, $fa2 fadd.d $fa2, $fa3, $fa4 fmul.d $fa2, $fa2, $ft8 fmadd.d $fa1, $fa1, $ft9, $fa2 - fadd.d $fa0, $fa0, $fs5 + fadd.d $fa0, $fa0, $fs7 fadd.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fs3, $fa0 + fmul.d $fa0, $fs5, $fa0 fdiv.d $fa0, $fa0, $ft10 - fadd.d $fa0, $fs4, $fa0 + fadd.d $fa0, $fs6, $fa0 fabs.d $fa1, $fa0 fcmp.clt.d $fcc0, $fa1, $ft0 fsel $fa0, $fa0, $fs2, $fcc0 fcmp.clt.d $fcc0, $fa0, $ft1 fsel $fa0, $fa0, $ft1, $fcc0 - fst.d $fa0, $fp, 0 - addi.d $s4, $s4, 8 - addi.d $fp, $fp, 8 + fst.d $fa0, $s5, 0 + addi.d $s2, $s2, 8 addi.d $s5, $s5, 8 addi.d $s0, $s0, 8 - addi.d $a6, $a6, 8 + addi.d $s8, $s8, 8 addi.d $a4, $a4, 8 addi.d $a2, $a2, 8 - addi.d $s6, $s6, 8 - addi.d $s8, $s8, 8 - addi.d $s2, $s2, 8 + addi.d $a1, $a1, 8 + addi.d $s4, $s4, 8 + addi.d $fp, $fp, 8 addi.d $s1, $s1, 8 + addi.d $s6, $s6, 8 addi.d $s7, $s7, -1 addi.d $s3, $s3, 8 beqz $s7, .LBB1_43 .LBB1_37: # %.lr.ph235 # Parent Loop BB1_4 Depth=1 # => This Inner Loop Header: Depth=2 - fld.d $fs3, $s3, 0 - fcmp.cule.d $fcc0, $fs3, $fs2 + fld.d $fs5, $s3, 0 + fcmp.cule.d $fcc0, $fs5, $fs2 bceqz $fcc0, .LBB1_35 # %bb.38: # in Loop: Header=BB1_37 Depth=2 - fld.d $fa0, $a2, 0 - fld.d $fa1, $a4, 0 - fld.d $fa2, $a6, 0 - fld.d $fs4, $fp, 0 - fld.d $fs5, $s4, 0 + fld.d $fa0, $a1, 0 + fld.d $fa1, $a2, 0 + fld.d $fa2, $a4, 0 + fld.d $fs7, $s2, 0 + fld.d $fs6, $s5, 0 fmul.d $fa1, $fa1, $fa1 fmul.d $fa1, $fa1, $fa2 - fld.d $fa2, $a1, %pc_lo12(.LCPI1_1) - fmul.d $fa1, $fa1, $fs5 - fmadd.d $fa0, $fa0, $fs4, $fa1 + fmul.d $fa1, $fa1, $fs7 + fmadd.d $fa0, $fa0, $fs6, $fa1 fdiv.d $fa1, $fa0, $fs0 - fcmp.cle.d $fcc0, $fa1, $fa2 + fcmp.cle.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB1_41 # %bb.39: # in Loop: Header=BB1_37 Depth=2 fsqrt.d $fa0, $fa1 @@ -971,18 +971,17 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma vst $vr10, $sp, 64 # 16-byte Folded Spill vst $vr11, $sp, 48 # 16-byte Folded Spill vst $vr19, $sp, 32 # 16-byte Folded Spill - st.d $a2, $sp, 24 # 8-byte Folded Spill - st.d $a4, $sp, 16 # 8-byte Folded Spill - st.d $a6, $sp, 8 # 8-byte Folded Spill + st.d $a1, $sp, 24 # 8-byte Folded Spill + st.d $a2, $sp, 16 # 8-byte Folded Spill + st.d $a4, $sp, 8 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $a6, $sp, 8 # 8-byte Folded Reload - ld.d $a4, $sp, 16 # 8-byte Folded Reload - ld.d $a2, $sp, 24 # 8-byte Folded Reload - ld.d $a1, $sp, 304 # 8-byte Folded Reload - ld.d $a0, $sp, 312 # 8-byte Folded Reload + ld.d $a4, $sp, 8 # 8-byte Folded Reload + ld.d $a2, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 24 # 8-byte Folded Reload vldi $vr20, -912 vld $vr19, $sp, 32 # 16-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload vldi $vr18, -872 vldi $vr17, -996 vldi $vr16, -864 @@ -993,27 +992,27 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma vld $vr11, $sp, 48 # 16-byte Folded Reload vld $vr10, $sp, 64 # 16-byte Folded Reload ld.d $a3, $sp, 368 # 8-byte Folded Reload - ld.d $a5, $sp, 296 # 8-byte Folded Reload + ld.d $a5, $sp, 304 # 8-byte Folded Reload vld $vr9, $sp, 128 # 16-byte Folded Reload vld $vr8, $sp, 144 # 16-byte Folded Reload ld.d $t3, $sp, 104 # 8-byte Folded Reload ld.d $a7, $sp, 112 # 8-byte Folded Reload - ld.d $t2, $sp, 184 # 8-byte Folded Reload - ld.d $t1, $sp, 192 # 8-byte Folded Reload + ld.d $t2, $sp, 192 # 8-byte Folded Reload + ld.d $t1, $sp, 200 # 8-byte Folded Reload ld.d $t0, $sp, 120 # 8-byte Folded Reload ld.d $ra, $sp, 80 # 8-byte Folded Reload ld.d $t8, $sp, 88 # 8-byte Folded Reload - ld.d $t7, $sp, 160 # 8-byte Folded Reload - ld.d $t6, $sp, 168 # 8-byte Folded Reload - ld.d $t5, $sp, 176 # 8-byte Folded Reload + ld.d $t7, $sp, 168 # 8-byte Folded Reload + ld.d $t6, $sp, 176 # 8-byte Folded Reload + ld.d $t5, $sp, 184 # 8-byte Folded Reload ld.d $t4, $sp, 96 # 8-byte Folded Reload b .LBB1_42 .p2align 4, , 16 .LBB1_41: # in Loop: Header=BB1_37 Depth=2 - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) + movgr2fr.d $fa0, $a0 .LBB1_42: # in Loop: Header=BB1_37 Depth=2 - fld.d $fa1, $s0, 0 - fld.d $fa2, $s5, 0 + fld.d $fa1, $s8, 0 + fld.d $fa2, $s0, 0 fmadd.d $fa0, $fa0, $fa1, $fa2 b .LBB1_36 .p2align 4, , 16 @@ -1022,17 +1021,15 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma move $s0, $t7 ld.d $s1, $sp, 352 # 8-byte Folded Reload move $s2, $t5 - ld.d $s3, $sp, 328 # 8-byte Folded Reload + ld.d $s3, $sp, 320 # 8-byte Folded Reload ld.d $s4, $sp, 360 # 8-byte Folded Reload move $s5, $t8 ld.d $s6, $sp, 344 # 8-byte Folded Reload ld.d $s7, $sp, 336 # 8-byte Folded Reload move $s8, $t0 - b .LBB1_47 + b .LBB1_46 .p2align 4, , 16 -.LBB1_44: # in Loop: Header=BB1_47 Depth=2 - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) -.LBB1_45: # in Loop: Header=BB1_47 Depth=2 +.LBB1_44: # in Loop: Header=BB1_46 Depth=2 fld.d $fa1, $s6, 0 fld.d $fa2, $s7, 0 fmadd.d $fa0, $fa0, $fa1, $fa2 @@ -1040,7 +1037,7 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma fcmp.clt.d $fcc0, $fa1, $fs1 fsel $fa0, $fa0, $fs2, $fcc0 fst.d $fa0, $s8, 0 -.LBB1_46: # in Loop: Header=BB1_47 Depth=2 +.LBB1_45: # in Loop: Header=BB1_46 Depth=2 addi.d $s8, $s8, 8 addi.d $s7, $s7, 8 addi.d $s6, $s6, 8 @@ -1052,32 +1049,32 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma addi.d $fp, $fp, -1 addi.d $s0, $s0, 8 beqz $fp, .LBB1_3 -.LBB1_47: # %.lr.ph237 +.LBB1_46: # %.lr.ph237 # Parent Loop BB1_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa0, $s0, 0 fcmp.cult.d $fcc0, $fs2, $fa0 - bcnez $fcc0, .LBB1_46 -# %bb.48: # in Loop: Header=BB1_47 Depth=2 + bcnez $fcc0, .LBB1_45 +# %bb.47: # in Loop: Header=BB1_46 Depth=2 fld.d $fa0, $s1, 0 fld.d $fa1, $s3, 0 fld.d $fa2, $s4, 0 - fld.d $fa3, $s2, 0 - fld.d $fa4, $s5, 0 + fld.d $fa3, $s5, 0 + fld.d $fa4, $s2, 0 fmul.d $fa1, $fa1, $fa1 fmul.d $fa1, $fa1, $fa2 - fld.d $fa2, $a1, %pc_lo12(.LCPI1_1) - fmul.d $fa1, $fa1, $fa4 - fmadd.d $fa0, $fa0, $fa3, $fa1 + fmul.d $fa1, $fa1, $fa3 + fmadd.d $fa0, $fa0, $fa4, $fa1 fdiv.d $fa1, $fa0, $fs0 - fcmp.cle.d $fcc0, $fa1, $fa2 + fcmp.cle.d $fcc0, $fa1, $fs4 + fmov.d $fa0, $fs3 bcnez $fcc0, .LBB1_44 -# %bb.49: # in Loop: Header=BB1_47 Depth=2 +# %bb.48: # in Loop: Header=BB1_46 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bcnez $fcc0, .LBB1_45 -# %bb.50: # %call.sqrt510 - # in Loop: Header=BB1_47 Depth=2 + bcnez $fcc0, .LBB1_44 +# %bb.49: # %call.sqrt510 + # in Loop: Header=BB1_46 Depth=2 fmov.d $fa0, $fa1 st.d $t4, $sp, 96 # 8-byte Folded Spill st.d $t8, $sp, 88 # 8-byte Folded Spill @@ -1087,8 +1084,6 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma vst $vr19, $sp, 32 # 16-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $a1, $sp, 304 # 8-byte Folded Reload - ld.d $a0, $sp, 312 # 8-byte Folded Reload vldi $vr20, -912 vld $vr19, $sp, 32 # 16-byte Folded Reload vldi $vr18, -872 @@ -1101,53 +1096,50 @@ _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE: # @_ZL18BM_ENERGY_CALC_RAWRN9benchma vld $vr11, $sp, 48 # 16-byte Folded Reload vld $vr10, $sp, 64 # 16-byte Folded Reload ld.d $a3, $sp, 368 # 8-byte Folded Reload - ld.d $a5, $sp, 296 # 8-byte Folded Reload + ld.d $a5, $sp, 304 # 8-byte Folded Reload vld $vr9, $sp, 128 # 16-byte Folded Reload vld $vr8, $sp, 144 # 16-byte Folded Reload ld.d $t3, $sp, 104 # 8-byte Folded Reload ld.d $a7, $sp, 112 # 8-byte Folded Reload - ld.d $t2, $sp, 184 # 8-byte Folded Reload - ld.d $t1, $sp, 192 # 8-byte Folded Reload + ld.d $t2, $sp, 192 # 8-byte Folded Reload + ld.d $t1, $sp, 200 # 8-byte Folded Reload ld.d $t0, $sp, 120 # 8-byte Folded Reload ld.d $ra, $sp, 80 # 8-byte Folded Reload ld.d $t8, $sp, 88 # 8-byte Folded Reload - ld.d $t7, $sp, 160 # 8-byte Folded Reload - ld.d $t6, $sp, 168 # 8-byte Folded Reload - ld.d $t5, $sp, 176 # 8-byte Folded Reload + ld.d $t7, $sp, 168 # 8-byte Folded Reload + ld.d $t6, $sp, 176 # 8-byte Folded Reload + ld.d $t5, $sp, 184 # 8-byte Folded Reload ld.d $t4, $sp, 96 # 8-byte Folded Reload - b .LBB1_45 -.LBB1_51: # %._crit_edge240 + b .LBB1_44 +.LBB1_50: # %._crit_edge240 move $a0, $t4 - fld.d $fs5, $sp, 376 # 8-byte Folded Reload - fld.d $fs4, $sp, 384 # 8-byte Folded Reload - fld.d $fs3, $sp, 392 # 8-byte Folded Reload - fld.d $fs2, $sp, 400 # 8-byte Folded Reload - fld.d $fs1, $sp, 408 # 8-byte Folded Reload - fld.d $fs0, $sp, 416 # 8-byte Folded Reload - ld.d $s8, $sp, 424 # 8-byte Folded Reload - ld.d $s7, $sp, 432 # 8-byte Folded Reload - ld.d $s6, $sp, 440 # 8-byte Folded Reload - ld.d $s5, $sp, 448 # 8-byte Folded Reload - ld.d $s4, $sp, 456 # 8-byte Folded Reload - ld.d $s3, $sp, 464 # 8-byte Folded Reload - ld.d $s2, $sp, 472 # 8-byte Folded Reload - ld.d $s1, $sp, 480 # 8-byte Folded Reload - ld.d $s0, $sp, 488 # 8-byte Folded Reload - ld.d $fp, $sp, 496 # 8-byte Folded Reload - ld.d $ra, $sp, 504 # 8-byte Folded Reload - addi.d $sp, $sp, 512 + fld.d $fs7, $sp, 376 # 8-byte Folded Reload + fld.d $fs6, $sp, 384 # 8-byte Folded Reload + fld.d $fs5, $sp, 392 # 8-byte Folded Reload + fld.d $fs4, $sp, 400 # 8-byte Folded Reload + fld.d $fs3, $sp, 408 # 8-byte Folded Reload + fld.d $fs2, $sp, 416 # 8-byte Folded Reload + fld.d $fs1, $sp, 424 # 8-byte Folded Reload + fld.d $fs0, $sp, 432 # 8-byte Folded Reload + ld.d $s8, $sp, 440 # 8-byte Folded Reload + ld.d $s7, $sp, 448 # 8-byte Folded Reload + ld.d $s6, $sp, 456 # 8-byte Folded Reload + ld.d $s5, $sp, 464 # 8-byte Folded Reload + ld.d $s4, $sp, 472 # 8-byte Folded Reload + ld.d $s3, $sp, 480 # 8-byte Folded Reload + ld.d $s2, $sp, 488 # 8-byte Folded Reload + ld.d $s1, $sp, 496 # 8-byte Folded Reload + ld.d $s0, $sp, 504 # 8-byte Folded Reload + ld.d $fp, $sp, 512 # 8-byte Folded Reload + ld.d $ra, $sp, 520 # 8-byte Folded Reload + addi.d $sp, $sp, 528 pcaddu18i $t8, %call36(_ZN9benchmark5State17FinishKeepRunningEv) jr $t8 .Lfunc_end1: .size _ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE, .Lfunc_end1-_ZL18BM_ENERGY_CALC_RAWRN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL17BM_VOL3D_CALC_RAWRN9benchmark5StateE -.LCPI2_0: - .dword 0x3fb5555555555555 # double 0.083333333333333329 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL17BM_VOL3D_CALC_RAWRN9benchmark5StateE .type _ZL17BM_VOL3D_CALC_RAWRN9benchmark5StateE,@function _ZL17BM_VOL3D_CALC_RAWRN9benchmark5StateE: # @_ZL17BM_VOL3D_CALC_RAWRN9benchmark5StateE .Lfunc_begin0: @@ -1169,7 +1161,6 @@ _ZL17BM_VOL3D_CALC_RAWRN9benchmark5StateE: # @_ZL17BM_VOL3D_CALC_RAWRN9benchmark st.d $s7, $sp, 256 # 8-byte Folded Spill st.d $s8, $sp, 248 # 8-byte Folded Spill fst.d $fs0, $sp, 240 # 8-byte Folded Spill - fst.d $fs1, $sp, 232 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -1182,7 +1173,6 @@ _ZL17BM_VOL3D_CALC_RAWRN9benchmark5StateE: # @_ZL17BM_VOL3D_CALC_RAWRN9benchmark .cfi_offset 30, -80 .cfi_offset 31, -88 .cfi_offset 56, -96 - .cfi_offset 57, -104 move $s2, $a0 pcaddu18i $ra, %call36(_Z11getLoopDatav) jirl $ra, $ra, 0 @@ -1192,18 +1182,19 @@ _ZL17BM_VOL3D_CALC_RAWRN9benchmark5StateE: # @_ZL17BM_VOL3D_CALC_RAWRN9benchmark jirl $ra, $ra, 0 ld.d $s7, $s0, 8 ld.d $a0, $s2, 32 - ld.d $s6, $s0, 16 + ld.d $a1, $s0, 16 + st.d $a1, $sp, 72 # 8-byte Folded Spill ld.d $a1, $s0, 24 - st.d $a1, $sp, 112 # 8-byte Folded Spill + st.d $a1, $sp, 120 # 8-byte Folded Spill ld.d $a1, $s0, 32 - st.d $a1, $sp, 136 # 8-byte Folded Spill + st.d $a1, $sp, 144 # 8-byte Folded Spill ld.w $a1, $a0, 0 - addi.d $a0, $sp, 144 + addi.d $a0, $sp, 152 ori $a2, $zero, 3 pcaddu18i $ra, %call36(_ZN7ADomainC2Eii) jirl $ra, $ra, 0 - ld.w $s0, $sp, 180 - ld.w $fp, $sp, 184 + ld.w $s0, $sp, 188 + ld.w $fp, $sp, 192 ld.w $s1, $s2, 28 ld.d $s4, $s2, 16 .Ltmp0: # EH_LABEL @@ -1213,13 +1204,15 @@ _ZL17BM_VOL3D_CALC_RAWRN9benchmark5StateE: # @_ZL17BM_VOL3D_CALC_RAWRN9benchmark jirl $ra, $ra, 0 .Ltmp1: # EH_LABEL # %bb.1: # %_ZN9benchmark5State3endEv.exit.preheader + ld.d $t8, $sp, 72 # 8-byte Folded Reload bnez $s1, .LBB2_4 # %bb.2: # %_ZN9benchmark5State3endEv.exit.preheader beqz $s4, .LBB2_4 # %bb.3: # %.lr.ph213 - ld.w $t1, $sp, 208 - ld.w $a1, $sp, 212 - bge $a1, $t1, .LBB2_8 + ld.w $a0, $sp, 216 + ld.w $a1, $sp, 220 + st.d $a0, $sp, 128 # 8-byte Folded Spill + bge $a1, $a0, .LBB2_8 .LBB2_4: # %_ZN9benchmark5State3endEv.exit._crit_edge .Ltmp2: # EH_LABEL ld.d $a0, $sp, 8 # 8-byte Folded Reload @@ -1227,13 +1220,12 @@ _ZL17BM_VOL3D_CALC_RAWRN9benchmark5StateE: # @_ZL17BM_VOL3D_CALC_RAWRN9benchmark jirl $ra, $ra, 0 .Ltmp3: # EH_LABEL # %bb.5: # %_ZNK9benchmark5State13StateIteratorneERKS1_.exit - ld.d $a0, $sp, 216 + ld.d $a0, $sp, 224 beqz $a0, .LBB2_7 # %bb.6: pcaddu18i $ra, %call36(_ZdaPv) jirl $ra, $ra, 0 .LBB2_7: # %_ZN7ADomainD2Ev.exit - fld.d $fs1, $sp, 232 # 8-byte Folded Reload fld.d $fs0, $sp, 240 # 8-byte Folded Reload ld.d $s8, $sp, 248 # 8-byte Folded Reload ld.d $s7, $sp, 256 # 8-byte Folded Reload @@ -1249,145 +1241,143 @@ _ZL17BM_VOL3D_CALC_RAWRN9benchmark5StateE: # @_ZL17BM_VOL3D_CALC_RAWRN9benchmark addi.d $sp, $sp, 336 ret .LBB2_8: # %.lr.ph.preheader - addi.d $a2, $s7, 8 - alsl.d $a3, $s0, $s7, 3 - alsl.d $a0, $s0, $a2, 3 - st.d $a0, $sp, 128 # 8-byte Folded Spill + addi.d $a0, $s7, 8 + alsl.d $a2, $s0, $s7, 3 + alsl.d $a3, $s0, $a0, 3 + st.d $a3, $sp, 136 # 8-byte Folded Spill + alsl.d $t3, $fp, $a0, 3 alsl.d $a0, $fp, $a2, 3 - st.d $a0, $sp, 120 # 8-byte Folded Spill - alsl.d $t7, $fp, $a3, 3 - slli.d $a4, $t1, 3 - ld.d $a0, $sp, 136 # 8-byte Folded Reload - alsl.d $a3, $t1, $a0, 3 - sub.d $a2, $a1, $t1 + addi.d $a6, $t8, 8 + ld.d $s6, $sp, 128 # 8-byte Folded Reload + slli.d $a4, $s6, 3 + ld.d $a2, $sp, 144 # 8-byte Folded Reload + alsl.d $a5, $s6, $a2, 3 + sub.d $a2, $a1, $s6 bstrpick.d $a2, $a2, 31, 0 - alsl.d $a5, $a2, $a3, 3 - addi.d $a5, $a5, 8 - slli.d $a6, $fp, 3 + alsl.d $a3, $a2, $a5, 3 + addi.d $a3, $a3, 8 + slli.d $a7, $fp, 3 slli.d $t4, $s0, 3 - alsl.d $a6, $t1, $a6, 3 - alsl.d $t0, $s0, $a6, 3 - add.d $t5, $s7, $t0 - alsl.d $a7, $a2, $t0, 3 - addi.d $t3, $a7, 16 - add.d $t6, $s7, $t3 - add.d $t8, $s7, $a6 - alsl.d $a7, $a2, $a6, 3 - addi.d $a7, $a7, 16 - add.d $s1, $s7, $a7 - alsl.d $s2, $t1, $s7, 3 - sltu $t6, $a3, $t6 - sltu $t5, $t5, $a5 + alsl.d $a7, $s6, $a7, 3 + alsl.d $t1, $s0, $a7, 3 + add.d $t5, $s7, $t1 + alsl.d $t0, $a2, $t1, 3 + addi.d $t2, $t0, 16 + add.d $t6, $s7, $t2 + add.d $t7, $s7, $a7 + alsl.d $t0, $a2, $a7, 3 + addi.d $t0, $t0, 16 + add.d $s1, $s7, $t0 + alsl.d $s2, $s6, $s7, 3 + sltu $t6, $a5, $t6 + sltu $t5, $t5, $a3 and $t6, $t6, $t5 alsl.d $t5, $a2, $a4, 3 addi.d $t5, $t5, 16 - sltu $s1, $a3, $s1 - sltu $t8, $t8, $a5 - and $t8, $s1, $t8 + sltu $s1, $a5, $s1 + sltu $t7, $t7, $a3 + and $t7, $s1, $t7 add.d $s1, $s7, $t5 - alsl.d $t4, $t1, $t4, 3 - or $t8, $t6, $t8 + alsl.d $t4, $s6, $t4, 3 + or $t7, $t6, $t7 add.d $s3, $s7, $t4 - sltu $t6, $a3, $s1 - sltu $s1, $s2, $a5 + sltu $t6, $a5, $s1 + sltu $s1, $s2, $a3 and $s1, $t6, $s1 alsl.d $t6, $a2, $t4, 3 addi.d $t6, $t6, 16 - or $t8, $t8, $s1 + or $t7, $t7, $s1 add.d $s1, $s7, $t6 - sltu $s1, $a3, $s1 - sltu $s2, $s3, $a5 - and $s1, $s1, $s2 - add.d $s2, $s6, $t0 - or $t8, $t8, $s1 - add.d $s1, $s6, $t3 - sltu $s1, $a3, $s1 - sltu $s2, $s2, $a5 + sltu $s1, $a5, $s1 + sltu $s2, $s3, $a3 and $s1, $s1, $s2 - add.d $s2, $s6, $a6 - or $t8, $t8, $s1 - add.d $s1, $s6, $a7 - sltu $s1, $a3, $s1 - sltu $s2, $s2, $a5 + add.d $s2, $t8, $t1 + or $t7, $t7, $s1 + add.d $s1, $t8, $t2 + sltu $s1, $a5, $s1 + sltu $s2, $s2, $a3 and $s1, $s1, $s2 - alsl.d $s2, $t1, $s6, 3 - or $t8, $t8, $s1 - add.d $s1, $s6, $t5 - sltu $s1, $a3, $s1 - sltu $s2, $s2, $a5 + add.d $s2, $t8, $a7 + or $t7, $t7, $s1 + add.d $s1, $t8, $t0 + sltu $s1, $a5, $s1 + sltu $s2, $s2, $a3 and $s1, $s1, $s2 - add.d $s2, $s6, $t4 - or $t8, $t8, $s1 - add.d $s1, $s6, $t6 - sltu $s1, $a3, $s1 - sltu $s2, $s2, $a5 + alsl.d $s2, $s6, $t8, 3 + or $t7, $t7, $s1 + add.d $s1, $t8, $t5 + sltu $s1, $a5, $s1 + sltu $s2, $s2, $a3 and $s1, $s1, $s2 - addi.d $s2, $s6, 8 - or $s1, $t8, $s1 - alsl.d $t8, $s0, $s2, 3 - ld.d $a0, $sp, 112 # 8-byte Folded Reload - add.d $t0, $a0, $t0 - add.d $t3, $a0, $t3 - sltu $t3, $a3, $t3 - sltu $t0, $t0, $a5 - and $t0, $t3, $t0 - alsl.d $t2, $fp, $s6, 3 - alsl.d $t3, $fp, $s2, 3 - st.d $t3, $sp, 104 # 8-byte Folded Spill - or $t0, $s1, $t0 - st.d $t2, $sp, 64 # 8-byte Folded Spill - alsl.d $t2, $s0, $t2, 3 - st.d $t2, $sp, 96 # 8-byte Folded Spill - add.d $a6, $a0, $a6 - add.d $a7, $a0, $a7 - sltu $a7, $a3, $a7 - sltu $a6, $a6, $a5 - and $a6, $a7, $a6 - alsl.d $a7, $fp, $t8, 3 - st.d $a7, $sp, 56 # 8-byte Folded Spill - or $a6, $t0, $a6 - alsl.d $a7, $t1, $a0, 3 - add.d $t0, $a0, $t5 - sltu $t0, $a3, $t0 - sltu $a7, $a7, $a5 + add.d $s2, $t8, $t4 + or $s1, $t7, $s1 + add.d $t7, $t8, $t6 + sltu $t7, $a5, $t7 + sltu $s2, $s2, $a3 + and $s2, $t7, $s2 + alsl.d $t7, $s0, $a6, 3 + or $s1, $s1, $s2 + alsl.d $s3, $fp, $t8, 3 + alsl.d $a6, $fp, $a6, 3 + st.d $a6, $sp, 112 # 8-byte Folded Spill + ld.d $s2, $sp, 120 # 8-byte Folded Reload + add.d $a6, $s2, $t1 + add.d $t1, $s2, $t2 + sltu $t1, $a5, $t1 + sltu $a6, $a6, $a3 + and $a6, $t1, $a6 + st.d $s3, $sp, 64 # 8-byte Folded Spill + alsl.d $t1, $s0, $s3, 3 + st.d $t1, $sp, 104 # 8-byte Folded Spill + or $a6, $s1, $a6 + alsl.d $t1, $fp, $t7, 3 + st.d $t1, $sp, 56 # 8-byte Folded Spill + add.d $a7, $s2, $a7 + add.d $t0, $s2, $t0 + sltu $t0, $a5, $t0 + sltu $a7, $a7, $a3 + and $a7, $t0, $a7 + addi.d $t1, $s2, 8 + or $a6, $a6, $a7 + alsl.d $a7, $s6, $s2, 3 + add.d $t0, $s2, $t5 + sltu $t0, $a5, $t0 + sltu $a7, $a7, $a3 and $a7, $t0, $a7 - addi.d $t0, $a0, 8 - or $a7, $a6, $a7 - alsl.d $t2, $s0, $a0, 3 - st.d $t0, $sp, 48 # 8-byte Folded Spill - alsl.d $t3, $s0, $t0, 3 - add.d $a6, $a0, $t6 - sltu $a3, $a3, $a6 - alsl.d $a6, $fp, $a0, 3 - add.d $t0, $a0, $t4 - sltu $a5, $t0, $a5 - st.d $t2, $sp, 40 # 8-byte Folded Spill - alsl.d $a0, $fp, $t2, 3 - st.d $a0, $sp, 80 # 8-byte Folded Spill - st.d $t3, $sp, 88 # 8-byte Folded Spill - alsl.d $a0, $fp, $t3, 3 - st.d $a0, $sp, 32 # 8-byte Folded Spill - addi.d $a0, $a2, 1 - and $a2, $a3, $a5 - or $a2, $a7, $a2 - st.d $a0, $sp, 24 # 8-byte Folded Spill - bstrpick.d $a3, $a0, 32, 1 - xor $a5, $a1, $t1 - sltui $a5, $a5, 1 - or $a2, $a5, $a2 - pcalau12i $a5, %pc_hi20(.LCPI2_0) - fld.d $fa0, $a5, %pc_lo12(.LCPI2_0) - lu12i.w $a5, 349525 - ori $a5, $a5, 1365 - lu32i.d $a5, 349525 - lu52i.d $a5, $a5, 1019 - vreplgr2vr.d $vr1, $a5 + alsl.d $t0, $s0, $s2, 3 + st.d $t1, $sp, 48 # 8-byte Folded Spill + alsl.d $t2, $s0, $t1, 3 + or $a6, $a6, $a7 + alsl.d $t1, $fp, $s2, 3 + add.d $a7, $s2, $t6 + sltu $a5, $a5, $a7 + st.d $t0, $sp, 40 # 8-byte Folded Spill + alsl.d $a7, $fp, $t0, 3 + st.d $a7, $sp, 88 # 8-byte Folded Spill + st.d $t2, $sp, 96 # 8-byte Folded Spill + alsl.d $a7, $fp, $t2, 3 + st.d $a7, $sp, 32 # 8-byte Folded Spill + add.d $a7, $s2, $t4 + addi.d $t0, $a2, 1 + sltu $a2, $a7, $a3 + and $a2, $a5, $a2 + or $a2, $a6, $a2 + st.d $t0, $sp, 24 # 8-byte Folded Spill + bstrpick.d $a3, $t0, 32, 1 slli.d $s5, $a3, 1 - alsl.d $a0, $a3, $t1, 1 - st.d $a0, $sp, 16 # 8-byte Folded Spill - addi.d $a0, $a1, 1 - st.d $a0, $sp, 72 # 8-byte Folded Spill - andi $s8, $a2, 1 + alsl.d $a3, $a3, $s6, 1 + st.d $a3, $sp, 16 # 8-byte Folded Spill + addi.d $a3, $a1, 1 + st.d $a3, $sp, 80 # 8-byte Folded Spill + xor $a1, $a1, $s6 + sltui $a1, $a1, 1 + or $a1, $a1, $a2 + andi $s8, $a1, 1 + lu12i.w $a1, 349525 + ori $a1, $a1, 1365 + lu32i.d $a1, 349525 + lu52i.d $ra, $a1, 1019 + movgr2fr.d $fa0, $ra b .LBB2_10 .p2align 4, , 16 .LBB2_9: # %._crit_edge @@ -1398,136 +1388,134 @@ _ZL17BM_VOL3D_CALC_RAWRN9benchmark5StateE: # @_ZL17BM_VOL3D_CALC_RAWRN9benchmark # =>This Loop Header: Depth=1 # Child Loop BB2_12 Depth 2 # Child Loop BB2_15 Depth 2 - move $a1, $t1 + ld.d $a1, $sp, 128 # 8-byte Folded Reload bnez $s8, .LBB2_14 # %bb.11: # %vector.body.preheader # in Loop: Header=BB2_10 Depth=1 - move $t2, $t1 + move $a6, $t7 move $a7, $t8 - move $t1, $s6 - move $t4, $s6 - ld.d $a5, $sp, 64 # 8-byte Folded Reload - ld.d $t5, $sp, 56 # 8-byte Folded Reload - ld.d $t6, $sp, 32 # 8-byte Folded Reload - ld.d $t0, $sp, 48 # 8-byte Folded Reload - ld.d $a2, $sp, 40 # 8-byte Folded Reload - move $fp, $a6 - move $a1, $t7 - ld.d $s0, $sp, 120 # 8-byte Folded Reload - move $s6, $s7 - move $s1, $s7 - ld.d $a3, $sp, 128 # 8-byte Folded Reload - ld.d $s2, $sp, 136 # 8-byte Folded Reload - move $s3, $s5 + ld.d $t4, $sp, 64 # 8-byte Folded Reload + ld.d $a5, $sp, 56 # 8-byte Folded Reload + ld.d $t5, $sp, 32 # 8-byte Folded Reload + ld.d $t6, $sp, 48 # 8-byte Folded Reload + ld.d $t0, $sp, 40 # 8-byte Folded Reload + move $a2, $t1 + move $fp, $a0 + move $a1, $t3 + move $t8, $s7 + move $s0, $s7 + ld.d $s1, $sp, 136 # 8-byte Folded Reload + ld.d $a3, $sp, 144 # 8-byte Folded Reload + move $s2, $s5 .p2align 4, , 16 .LBB2_12: # %vector.body # Parent Loop BB2_10 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $ra, $a1, $a4 - vld $vr3, $ra, 8 - add.d $ra, $s1, $a4 - vld $vr2, $ra, 8 - vfsub.d $vr2, $vr3, $vr2 - add.d $ra, $a3, $a4 - vld $vr4, $ra, -8 - add.d $ra, $s0, $a4 - vld $vr5, $ra, -8 - vldx $vr6, $a3, $a4 - vldx $vr8, $s1, $a4 - vldx $vr9, $s0, $a4 - vfsub.d $vr7, $vr3, $vr4 - vfsub.d $vr4, $vr3, $vr5 - vfsub.d $vr5, $vr6, $vr8 - vfsub.d $vr3, $vr9, $vr8 - vldx $vr6, $a1, $a4 - vldx $vr10, $t5, $a4 - add.d $ra, $t4, $a4 - vld $vr11, $ra, 8 - add.d $ra, $a7, $a4 - vld $vr12, $ra, -8 - vfsub.d $vr9, $vr6, $vr8 - add.d $ra, $t5, $a4 - vfsub.d $vr6, $vr10, $vr11 - vfsub.d $vr11, $vr10, $vr12 - add.d $a0, $a5, $a4 - vldx $vr8, $a5, $a4 + add.d $s3, $fp, $a4 + vld $vr2, $s3, 8 + add.d $s3, $s0, $a4 + vld $vr1, $s3, 8 + vfsub.d $vr1, $vr2, $vr1 + add.d $s3, $s1, $a4 + vld $vr3, $s3, -8 + add.d $s3, $a1, $a4 + vld $vr4, $s3, -8 + vldx $vr5, $s1, $a4 + vldx $vr7, $s0, $a4 + vldx $vr8, $a1, $a4 + vfsub.d $vr6, $vr2, $vr3 + vfsub.d $vr3, $vr2, $vr4 + vfsub.d $vr4, $vr5, $vr7 + vfsub.d $vr2, $vr8, $vr7 + vldx $vr5, $fp, $a4 + vldx $vr9, $a5, $a4 + add.d $s3, $a7, $a4 + vld $vr10, $s3, 8 + add.d $s3, $a6, $a4 + vld $vr11, $s3, -8 + vfsub.d $vr8, $vr5, $vr7 + add.d $s3, $a5, $a4 + vfsub.d $vr5, $vr9, $vr10 + vfsub.d $vr10, $vr9, $vr11 + add.d $t2, $t4, $a4 + vldx $vr7, $t4, $a4 + vldx $vr11, $a6, $a4 vldx $vr12, $a7, $a4 - vldx $vr13, $t4, $a4 - vld $vr14, $a0, 8 - vld $vr15, $ra, -8 - vfsub.d $vr10, $vr10, $vr8 - vfsub.d $vr12, $vr12, $vr13 - vfsub.d $vr8, $vr14, $vr13 - vfsub.d $vr13, $vr15, $vr13 + vld $vr13, $t2, 8 + vld $vr14, $s3, -8 + vfsub.d $vr9, $vr9, $vr7 + vfsub.d $vr11, $vr11, $vr12 + vfsub.d $vr7, $vr13, $vr12 + vfsub.d $vr12, $vr14, $vr12 + vldx $vr13, $t5, $a4 vldx $vr14, $t6, $a4 + add.d $t2, $t5, $a4 vldx $vr15, $t0, $a4 - add.d $a0, $t6, $a4 + add.d $s3, $t6, $a4 + vfsub.d $vr14, $vr13, $vr14 + add.d $s7, $t0, $a4 + vfsub.d $vr15, $vr13, $vr15 + add.d $s6, $a2, $a4 vldx $vr16, $a2, $a4 - add.d $ra, $t0, $a4 - vfsub.d $vr15, $vr14, $vr15 - add.d $t3, $a2, $a4 - vfsub.d $vr16, $vr14, $vr16 - add.d $s7, $fp, $a4 - vldx $vr17, $fp, $a4 - vld $vr18, $t3, 8 - vld $vr19, $ra, -8 - vld $vr20, $s7, 8 - vld $vr21, $a0, -8 - vfsub.d $vr14, $vr14, $vr17 - vfsub.d $vr17, $vr18, $vr19 - vfsub.d $vr18, $vr20, $vr19 - vfsub.d $vr19, $vr21, $vr19 - vfadd.d $vr20, $vr2, $vr9 - vfadd.d $vr21, $vr6, $vr13 - vfadd.d $vr22, $vr15, $vr19 - vbitrevi.d $vr23, $vr12, 63 - vfmul.d $vr23, $vr16, $vr23 - vfmadd.d $vr23, $vr11, $vr17, $vr23 - vbitrevi.d $vr24, $vr17, 63 - vfmul.d $vr24, $vr7, $vr24 - vfmadd.d $vr24, $vr16, $vr5, $vr24 - vbitrevi.d $vr25, $vr5, 63 - vfmul.d $vr25, $vr11, $vr25 - vfmadd.d $vr25, $vr7, $vr12, $vr25 - vfmul.d $vr21, $vr21, $vr24 - vfmadd.d $vr20, $vr20, $vr23, $vr21 - vfmadd.d $vr20, $vr22, $vr25, $vr20 - vfadd.d $vr7, $vr7, $vr3 - vfadd.d $vr11, $vr11, $vr8 - vfadd.d $vr16, $vr16, $vr18 - vbitrevi.d $vr21, $vr13, 63 - vfmul.d $vr21, $vr14, $vr21 - vfmadd.d $vr21, $vr10, $vr19, $vr21 - vbitrevi.d $vr19, $vr19, 63 - vfmul.d $vr19, $vr4, $vr19 - vfmadd.d $vr19, $vr14, $vr9, $vr19 - vbitrevi.d $vr9, $vr9, 63 - vfmul.d $vr9, $vr10, $vr9 - vfmadd.d $vr9, $vr4, $vr13, $vr9 - vfmul.d $vr11, $vr11, $vr19 - vfmadd.d $vr7, $vr7, $vr21, $vr11 - vfmadd.d $vr7, $vr16, $vr9, $vr7 - vfadd.d $vr7, $vr20, $vr7 - vfadd.d $vr4, $vr4, $vr5 - vfadd.d $vr5, $vr10, $vr12 - vfadd.d $vr9, $vr14, $vr17 - vbitrevi.d $vr10, $vr8, 63 - vfmul.d $vr10, $vr15, $vr10 - vfmadd.d $vr10, $vr6, $vr18, $vr10 - vbitrevi.d $vr11, $vr18, 63 - vfmul.d $vr11, $vr2, $vr11 - vfmadd.d $vr11, $vr15, $vr3, $vr11 - vbitrevi.d $vr3, $vr3, 63 - vfmul.d $vr3, $vr6, $vr3 - vfmadd.d $vr2, $vr2, $vr8, $vr3 - vfmul.d $vr3, $vr5, $vr11 - vfmadd.d $vr3, $vr4, $vr10, $vr3 - vfmadd.d $vr2, $vr9, $vr2, $vr3 - vfadd.d $vr2, $vr2, $vr7 - vfmul.d $vr2, $vr2, $vr1 - vstx $vr2, $s2, $a4 - addi.d $s3, $s3, -2 - addi.d $s2, $s2, 16 + vld $vr17, $s7, 8 + vld $vr18, $s3, -8 + vld $vr19, $s6, 8 + vld $vr20, $t2, -8 + vfsub.d $vr13, $vr13, $vr16 + vfsub.d $vr16, $vr17, $vr18 + vfsub.d $vr17, $vr19, $vr18 + vfsub.d $vr18, $vr20, $vr18 + vfadd.d $vr19, $vr1, $vr8 + vfadd.d $vr20, $vr5, $vr12 + vfadd.d $vr21, $vr14, $vr18 + vbitrevi.d $vr22, $vr11, 63 + vfmul.d $vr22, $vr15, $vr22 + vfmadd.d $vr22, $vr10, $vr16, $vr22 + vbitrevi.d $vr23, $vr16, 63 + vfmul.d $vr23, $vr6, $vr23 + vfmadd.d $vr23, $vr15, $vr4, $vr23 + vbitrevi.d $vr24, $vr4, 63 + vfmul.d $vr24, $vr10, $vr24 + vfmadd.d $vr24, $vr6, $vr11, $vr24 + vfmul.d $vr20, $vr20, $vr23 + vfmadd.d $vr19, $vr19, $vr22, $vr20 + vfmadd.d $vr19, $vr21, $vr24, $vr19 + vfadd.d $vr6, $vr6, $vr2 + vfadd.d $vr10, $vr10, $vr7 + vfadd.d $vr15, $vr15, $vr17 + vbitrevi.d $vr20, $vr12, 63 + vfmul.d $vr20, $vr13, $vr20 + vfmadd.d $vr20, $vr9, $vr18, $vr20 + vbitrevi.d $vr18, $vr18, 63 + vfmul.d $vr18, $vr3, $vr18 + vfmadd.d $vr18, $vr13, $vr8, $vr18 + vbitrevi.d $vr8, $vr8, 63 + vfmul.d $vr8, $vr9, $vr8 + vfmadd.d $vr8, $vr3, $vr12, $vr8 + vfmul.d $vr10, $vr10, $vr18 + vfmadd.d $vr6, $vr6, $vr20, $vr10 + vfmadd.d $vr6, $vr15, $vr8, $vr6 + vfadd.d $vr6, $vr19, $vr6 + vfadd.d $vr3, $vr3, $vr4 + vfadd.d $vr4, $vr9, $vr11 + vfadd.d $vr8, $vr13, $vr16 + vbitrevi.d $vr9, $vr7, 63 + vfmul.d $vr9, $vr14, $vr9 + vfmadd.d $vr9, $vr5, $vr17, $vr9 + vbitrevi.d $vr10, $vr17, 63 + vfmul.d $vr10, $vr1, $vr10 + vfmadd.d $vr10, $vr14, $vr2, $vr10 + vbitrevi.d $vr2, $vr2, 63 + vfmul.d $vr2, $vr5, $vr2 + vfmadd.d $vr1, $vr1, $vr7, $vr2 + vfmul.d $vr2, $vr4, $vr10 + vfmadd.d $vr2, $vr3, $vr9, $vr2 + vfmadd.d $vr1, $vr8, $vr1, $vr2 + vfadd.d $vr1, $vr1, $vr6 + vreplgr2vr.d $vr2, $ra + vfmul.d $vr1, $vr1, $vr2 + vstx $vr1, $a3, $a4 + addi.d $s2, $s2, -2 addi.d $a3, $a3, 16 addi.d $s1, $s1, 16 addi.d $s0, $s0, 16 @@ -1540,142 +1528,142 @@ _ZL17BM_VOL3D_CALC_RAWRN9benchmark5StateE: # @_ZL17BM_VOL3D_CALC_RAWRN9benchmark addi.d $a5, $a5, 16 addi.d $t4, $t4, 16 addi.d $a7, $a7, 16 - bnez $s3, .LBB2_12 + addi.d $a6, $a6, 16 + bnez $s2, .LBB2_12 # %bb.13: # %middle.block # in Loop: Header=BB2_10 Depth=1 ld.d $a1, $sp, 16 # 8-byte Folded Reload - move $s7, $s6 - move $s6, $t1 - move $t1, $t2 - ld.d $a0, $sp, 24 # 8-byte Folded Reload - beq $a0, $s5, .LBB2_9 + move $s7, $t8 + ld.d $t8, $sp, 72 # 8-byte Folded Reload + ld.d $a2, $sp, 24 # 8-byte Folded Reload + beq $a2, $s5, .LBB2_9 .LBB2_14: # %scalar.ph.preheader # in Loop: Header=BB2_10 Depth=1 - slli.d $ra, $a1, 3 - ld.d $a0, $sp, 72 # 8-byte Folded Reload - sub.d $fp, $a0, $a1 - move $t5, $a6 - ld.d $t4, $sp, 88 # 8-byte Folded Reload - ld.d $a7, $sp, 104 # 8-byte Folded Reload - move $s2, $s6 - move $a5, $t8 - ld.d $s3, $sp, 112 # 8-byte Folded Reload - ld.d $t6, $sp, 80 # 8-byte Folded Reload - ld.d $t0, $sp, 96 # 8-byte Folded Reload - ld.d $a2, $sp, 120 # 8-byte Folded Reload + slli.d $fp, $a1, 3 + ld.d $a2, $sp, 80 # 8-byte Folded Reload + sub.d $a6, $a2, $a1 + move $t5, $t1 + ld.d $t4, $sp, 96 # 8-byte Folded Reload + ld.d $a7, $sp, 112 # 8-byte Folded Reload + move $s2, $t8 + move $a5, $t7 + ld.d $s3, $sp, 120 # 8-byte Folded Reload + ld.d $t6, $sp, 88 # 8-byte Folded Reload + ld.d $t0, $sp, 104 # 8-byte Folded Reload + move $a2, $t3 move $s1, $s7 - ld.d $a1, $sp, 128 # 8-byte Folded Reload - ld.d $s0, $sp, 136 # 8-byte Folded Reload - move $a3, $t7 + ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $s0, $sp, 144 # 8-byte Folded Reload + move $a3, $a0 .p2align 4, , 16 .LBB2_15: # %scalar.ph # Parent Loop BB2_10 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $a0, $a3, $ra - add.d $t3, $a1, $ra - fld.d $fa2, $t3, -8 - add.d $t3, $a2, $ra - fld.d $fa3, $t3, -8 - fldx.d $fa4, $a1, $ra - fld.d $fa5, $a0, 8 - fldx.d $fa7, $a2, $ra - vldx $vr8, $a3, $ra - fldx.d $ft1, $s1, $ra - fsub.d $fa6, $fa5, $fa2 - vldx $vr10, $s1, $ra - fsub.d $fa3, $fa5, $fa3 - fsub.d $fa4, $fa4, $ft1 - fsub.d $fa2, $fa7, $ft1 - vfsub.d $vr9, $vr8, $vr10 - add.d $a0, $t0, $ra - add.d $t3, $a5, $ra - fld.d $fa5, $t3, -8 - add.d $t3, $a7, $ra - fld.d $fa7, $t3, -8 - fldx.d $ft0, $a5, $ra - fld.d $ft2, $a0, 8 - fldx.d $ft3, $a7, $ra - vldx $vr12, $t0, $ra - fldx.d $ft5, $s2, $ra - fsub.d $ft6, $ft2, $fa5 - vldx $vr15, $s2, $ra - fsub.d $fa7, $ft2, $fa7 - fsub.d $ft0, $ft0, $ft5 - fsub.d $fa5, $ft3, $ft5 - vfsub.d $vr10, $vr12, $vr15 - add.d $a0, $t6, $ra - add.d $t3, $t4, $ra - fld.d $ft3, $t3, -8 - add.d $t3, $t5, $ra - fldx.d $ft4, $t5, $ra - fldx.d $ft5, $t4, $ra - fld.d $ft7, $a0, 8 - fld.d $ft8, $t3, 8 - vldx $vr17, $t6, $ra - fldx.d $ft10, $s3, $ra - fsub.d $ft3, $ft7, $ft3 - vldx $vr19, $s3, $ra - fsub.d $ft4, $ft7, $ft4 - fsub.d $ft5, $ft5, $ft10 - fsub.d $ft7, $ft8, $ft10 - vfsub.d $vr16, $vr17, $vr19 - vreplvei.d $vr17, $vr9, 0 + add.d $t2, $a3, $fp + add.d $s6, $a1, $fp + fld.d $fa1, $s6, -8 + add.d $s6, $a2, $fp + fld.d $fa2, $s6, -8 + fldx.d $fa3, $a1, $fp + fld.d $fa4, $t2, 8 + fldx.d $fa6, $a2, $fp + vldx $vr7, $a3, $fp + fldx.d $ft0, $s1, $fp + fsub.d $fa5, $fa4, $fa1 + vldx $vr9, $s1, $fp + fsub.d $fa2, $fa4, $fa2 + fsub.d $fa3, $fa3, $ft0 + fsub.d $fa1, $fa6, $ft0 + vfsub.d $vr8, $vr7, $vr9 + add.d $t2, $t0, $fp + add.d $s6, $a5, $fp + fld.d $fa4, $s6, -8 + add.d $s6, $a7, $fp + fld.d $fa6, $s6, -8 + fldx.d $fa7, $a5, $fp + fld.d $ft1, $t2, 8 + fldx.d $ft2, $a7, $fp + vldx $vr11, $t0, $fp + fldx.d $ft4, $s2, $fp + fsub.d $ft5, $ft1, $fa4 + vldx $vr14, $s2, $fp + fsub.d $fa6, $ft1, $fa6 + fsub.d $fa7, $fa7, $ft4 + fsub.d $fa4, $ft2, $ft4 + vfsub.d $vr9, $vr11, $vr14 + add.d $t2, $t6, $fp + add.d $s6, $t4, $fp + fld.d $ft2, $s6, -8 + add.d $s6, $t5, $fp + fldx.d $ft3, $t5, $fp + fldx.d $ft4, $t4, $fp + fld.d $ft6, $t2, 8 + fld.d $ft7, $s6, 8 + vldx $vr16, $t6, $fp + fldx.d $ft9, $s3, $fp + fsub.d $ft2, $ft6, $ft2 + vldx $vr18, $s3, $fp + fsub.d $ft3, $ft6, $ft3 + fsub.d $ft4, $ft4, $ft9 + fsub.d $ft6, $ft7, $ft9 + vfsub.d $vr15, $vr16, $vr18 + vreplvei.d $vr16, $vr8, 0 + vreplvei.d $vr8, $vr8, 1 + fadd.d $ft9, $ft0, $ft8 + vreplvei.d $vr18, $vr9, 0 vreplvei.d $vr9, $vr9, 1 - fadd.d $ft10, $ft1, $ft9 - vreplvei.d $vr19, $vr10, 0 - vreplvei.d $vr10, $vr10, 1 - fadd.d $ft12, $ft2, $ft11 - vreplvei.d $vr21, $vr16, 0 - vreplvei.d $vr16, $vr16, 1 - fadd.d $ft14, $ft8, $ft13 - fneg.d $ft15, $ft0 - fmul.d $ft15, $ft3, $ft15 - fmadd.d $ft15, $ft6, $ft5, $ft15 - fneg.d $fs0, $ft5 - fmul.d $fs0, $fa6, $fs0 - fmadd.d $fs0, $ft3, $fa4, $fs0 - fneg.d $fs1, $fa4 - fmul.d $fs1, $ft6, $fs1 - fmadd.d $fs1, $fa6, $ft0, $fs1 - fmul.d $ft12, $ft12, $fs0 - fmadd.d $ft10, $ft10, $ft15, $ft12 - fmadd.d $ft10, $ft14, $fs1, $ft10 - fadd.d $fa6, $fa6, $fa2 - fadd.d $ft6, $ft6, $fa5 - fadd.d $ft3, $ft3, $ft7 - fneg.d $ft12, $ft11 - fmul.d $ft12, $ft4, $ft12 - fmadd.d $ft12, $fa7, $ft13, $ft12 - fneg.d $ft13, $ft13 - fmul.d $ft13, $fa3, $ft13 - fmadd.d $ft13, $ft4, $ft9, $ft13 - fneg.d $ft9, $ft9 - fmul.d $ft9, $fa7, $ft9 - fmadd.d $ft9, $fa3, $ft11, $ft9 - fmul.d $ft6, $ft6, $ft13 - fmadd.d $fa6, $fa6, $ft12, $ft6 - fmadd.d $fa6, $ft3, $ft9, $fa6 - fadd.d $fa6, $ft10, $fa6 - fadd.d $fa3, $fa3, $fa4 - fadd.d $fa4, $fa7, $ft0 - fadd.d $fa7, $ft4, $ft5 - fneg.d $ft0, $fa5 - fmul.d $ft0, $ft8, $ft0 - fmadd.d $ft0, $ft2, $ft7, $ft0 - fneg.d $ft3, $ft7 - fmul.d $ft3, $ft1, $ft3 - fmadd.d $ft3, $ft8, $fa2, $ft3 - fneg.d $fa2, $fa2 - fmul.d $fa2, $ft2, $fa2 - fmadd.d $fa2, $ft1, $fa5, $fa2 - fmul.d $fa4, $fa4, $ft3 - fmadd.d $fa3, $fa3, $ft0, $fa4 - fmadd.d $fa2, $fa7, $fa2, $fa3 - fadd.d $fa2, $fa2, $fa6 - fmul.d $fa2, $fa2, $fa0 - fstx.d $fa2, $s0, $ra + fadd.d $ft11, $ft1, $ft10 + vreplvei.d $vr20, $vr15, 0 + vreplvei.d $vr15, $vr15, 1 + fadd.d $ft13, $ft7, $ft12 + fneg.d $ft14, $fa7 + fmul.d $ft14, $ft2, $ft14 + fmadd.d $ft14, $ft5, $ft4, $ft14 + fneg.d $ft15, $ft4 + fmul.d $ft15, $fa5, $ft15 + fmadd.d $ft15, $ft2, $fa3, $ft15 + fneg.d $fs0, $fa3 + fmul.d $fs0, $ft5, $fs0 + fmadd.d $fs0, $fa5, $fa7, $fs0 + fmul.d $ft11, $ft11, $ft15 + fmadd.d $ft9, $ft9, $ft14, $ft11 + fmadd.d $ft9, $ft13, $fs0, $ft9 + fadd.d $fa5, $fa5, $fa1 + fadd.d $ft5, $ft5, $fa4 + fadd.d $ft2, $ft2, $ft6 + fneg.d $ft11, $ft10 + fmul.d $ft11, $ft3, $ft11 + fmadd.d $ft11, $fa6, $ft12, $ft11 + fneg.d $ft12, $ft12 + fmul.d $ft12, $fa2, $ft12 + fmadd.d $ft12, $ft3, $ft8, $ft12 + fneg.d $ft8, $ft8 + fmul.d $ft8, $fa6, $ft8 + fmadd.d $ft8, $fa2, $ft10, $ft8 + fmul.d $ft5, $ft5, $ft12 + fmadd.d $fa5, $fa5, $ft11, $ft5 + fmadd.d $fa5, $ft2, $ft8, $fa5 + fadd.d $fa5, $ft9, $fa5 + fadd.d $fa2, $fa2, $fa3 + fadd.d $fa3, $fa6, $fa7 + fadd.d $fa6, $ft3, $ft4 + fneg.d $fa7, $fa4 + fmul.d $fa7, $ft7, $fa7 + fmadd.d $fa7, $ft1, $ft6, $fa7 + fneg.d $ft2, $ft6 + fmul.d $ft2, $ft0, $ft2 + fmadd.d $ft2, $ft7, $fa1, $ft2 + fneg.d $fa1, $fa1 + fmul.d $fa1, $ft1, $fa1 + fmadd.d $fa1, $ft0, $fa4, $fa1 + fmul.d $fa3, $fa3, $ft2 + fmadd.d $fa2, $fa2, $fa7, $fa3 + fmadd.d $fa1, $fa6, $fa1, $fa2 + fadd.d $fa1, $fa1, $fa5 + fmul.d $fa1, $fa1, $fa0 + fstx.d $fa1, $s0, $fp addi.d $a3, $a3, 8 - addi.w $fp, $fp, -1 + addi.w $a6, $a6, -1 addi.d $s0, $s0, 8 addi.d $a1, $a1, 8 addi.d $s1, $s1, 8 @@ -1688,11 +1676,11 @@ _ZL17BM_VOL3D_CALC_RAWRN9benchmark5StateE: # @_ZL17BM_VOL3D_CALC_RAWRN9benchmark addi.d $a7, $a7, 8 addi.d $t4, $t4, 8 addi.d $t5, $t5, 8 - bnez $fp, .LBB2_15 + bnez $a6, .LBB2_15 b .LBB2_9 .LBB2_16: .Ltmp4: # EH_LABEL - ld.d $a1, $sp, 216 + ld.d $a1, $sp, 224 move $fp, $a0 beqz $a1, .LBB2_18 # %bb.17: @@ -1735,10 +1723,6 @@ GCC_except_table2: .LCPI3_0: .dword 0xbfe0000000000000 # double -0.5 .dword 0x3fe0000000000000 # double 0.5 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI3_1: - .dword 0x3bc79ca10c924223 # double 9.9999999999999995E-21 .text .p2align 5 .type _ZL21BM_DEL_DOT_VEC_2D_RAWRN9benchmark5StateE,@function @@ -1813,13 +1797,16 @@ _ZL21BM_DEL_DOT_VEC_2D_RAWRN9benchmark5StateE: # @_ZL21BM_DEL_DOT_VEC_2D_RAWRN9b alsl.d $a7, $s6, $s3, 3 alsl.d $t0, $s6, $s4, 3 ld.d $t1, $sp, 80 - pcalau12i $t2, %pc_hi20(.LCPI3_0) - vld $vr0, $t2, %pc_lo12(.LCPI3_0) - pcalau12i $t2, %pc_hi20(.LCPI3_1) - fld.d $fa1, $t2, %pc_lo12(.LCPI3_1) - vldi $vr2, -928 + vldi $vr0, -928 lu52i.d $t2, $zero, 1022 - vreplgr2vr.d $vr3, $t2 + vreplgr2vr.d $vr1, $t2 + pcalau12i $t2, %pc_hi20(.LCPI3_0) + vld $vr2, $t2, %pc_lo12(.LCPI3_0) + lu12i.w $t2, 51492 + ori $t2, $t2, 547 + lu32i.d $t2, 498849 + lu52i.d $t2, $t2, 956 + movgr2fr.d $fa3, $t2 .p2align 4, , 16 .LBB3_5: # %.preheader.us # =>This Loop Header: Depth=1 @@ -1840,7 +1827,7 @@ _ZL21BM_DEL_DOT_VEC_2D_RAWRN9benchmark5StateE: # @_ZL21BM_DEL_DOT_VEC_2D_RAWRN9b fadd.d $fa4, $fa4, $fa5 fsub.d $fa4, $fa4, $fa6 fsub.d $fa4, $fa4, $ft0 - fmul.d $fa5, $fa4, $fa2 + fmul.d $fa5, $fa4, $fa0 vldx $vr8, $a4, $t4 fldx.d $fa4, $a4, $t4 vldx $vr10, $t0, $t4 @@ -1856,7 +1843,7 @@ _ZL21BM_DEL_DOT_VEC_2D_RAWRN9benchmark5StateE: # @_ZL21BM_DEL_DOT_VEC_2D_RAWRN9b vpackev.d $vr8, $vr14, $vr11 vshuf4i.d $vr11, $vr9, 9 vfsub.d $vr7, $vr7, $vr11 - vfmul.d $vr7, $vr7, $vr3 + vfmul.d $vr7, $vr7, $vr1 vldx $vr9, $a7, $t4 fldx.d $ft7, $a6, $t4 fldx.d $ft8, $a7, $t4 @@ -1866,7 +1853,7 @@ _ZL21BM_DEL_DOT_VEC_2D_RAWRN9benchmark5StateE: # @_ZL21BM_DEL_DOT_VEC_2D_RAWRN9b fadd.d $ft7, $ft7, $ft8 fsub.d $ft7, $ft7, $ft9 fsub.d $ft7, $ft7, $ft10 - fmul.d $ft7, $ft7, $fa2 + fmul.d $ft7, $ft7, $fa0 fldx.d $ft8, $t0, $t4 fldx.d $ft9, $s4, $t4 vshuf4i.d $vr11, $vr14, 12 @@ -1882,14 +1869,14 @@ _ZL21BM_DEL_DOT_VEC_2D_RAWRN9benchmark5StateE: # @_ZL21BM_DEL_DOT_VEC_2D_RAWRN9b vfsub.d $vr8, $vr10, $vr9 vshuf4i.d $vr19, $vr11, 12 vfsub.d $vr8, $vr8, $vr19 - vfmul.d $vr8, $vr8, $vr3 - vfmul.d $vr9, $vr13, $vr0 + vfmul.d $vr8, $vr8, $vr1 + vfmul.d $vr9, $vr13, $vr2 vreplvei.d $vr10, $vr9, 0 fmul.d $ft2, $fa5, $ft2 vreplvei.d $vr11, $vr7, 0 vreplvei.d $vr13, $vr7, 1 fmadd.d $ft2, $ft5, $ft3, $ft2 - fadd.d $ft2, $ft2, $fa1 + fadd.d $ft2, $ft2, $fa3 frecip.d $ft2, $ft2 fneg.d $fa5, $fa5 vextrins.d $vr15, $vr5, 16 @@ -1981,16 +1968,8 @@ GCC_except_table3: .Lcst_end1: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL13BM_COUPLE_RAWRN9benchmark5StateE -.LCPI4_0: - .dword 0x406e56fd83ba6863 # double 242.71844660194174 -.LCPI4_1: - .dword 0x38e09d8792fb4c49 # double 9.9999999999999992E-35 -.LCPI4_2: - .dword 0x3fca9fbe76c8b439 # double 0.20799999999999999 .text - .p2align 5 + .p2align 5 # -- Begin function _ZL13BM_COUPLE_RAWRN9benchmark5StateE .type _ZL13BM_COUPLE_RAWRN9benchmark5StateE,@function _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE .Lfunc_begin2: @@ -1998,27 +1977,27 @@ _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception2 # %bb.0: - addi.d $sp, $sp, -544 - .cfi_def_cfa_offset 544 - st.d $ra, $sp, 536 # 8-byte Folded Spill - st.d $fp, $sp, 528 # 8-byte Folded Spill - st.d $s0, $sp, 520 # 8-byte Folded Spill - st.d $s1, $sp, 512 # 8-byte Folded Spill - st.d $s2, $sp, 504 # 8-byte Folded Spill - st.d $s3, $sp, 496 # 8-byte Folded Spill - st.d $s4, $sp, 488 # 8-byte Folded Spill - st.d $s5, $sp, 480 # 8-byte Folded Spill - st.d $s6, $sp, 472 # 8-byte Folded Spill - st.d $s7, $sp, 464 # 8-byte Folded Spill - st.d $s8, $sp, 456 # 8-byte Folded Spill - fst.d $fs0, $sp, 448 # 8-byte Folded Spill - fst.d $fs1, $sp, 440 # 8-byte Folded Spill - fst.d $fs2, $sp, 432 # 8-byte Folded Spill - fst.d $fs3, $sp, 424 # 8-byte Folded Spill - fst.d $fs4, $sp, 416 # 8-byte Folded Spill - fst.d $fs5, $sp, 408 # 8-byte Folded Spill - fst.d $fs6, $sp, 400 # 8-byte Folded Spill - fst.d $fs7, $sp, 392 # 8-byte Folded Spill + addi.d $sp, $sp, -576 + .cfi_def_cfa_offset 576 + st.d $ra, $sp, 568 # 8-byte Folded Spill + st.d $fp, $sp, 560 # 8-byte Folded Spill + st.d $s0, $sp, 552 # 8-byte Folded Spill + st.d $s1, $sp, 544 # 8-byte Folded Spill + st.d $s2, $sp, 536 # 8-byte Folded Spill + st.d $s3, $sp, 528 # 8-byte Folded Spill + st.d $s4, $sp, 520 # 8-byte Folded Spill + st.d $s5, $sp, 512 # 8-byte Folded Spill + st.d $s6, $sp, 504 # 8-byte Folded Spill + st.d $s7, $sp, 496 # 8-byte Folded Spill + st.d $s8, $sp, 488 # 8-byte Folded Spill + fst.d $fs0, $sp, 480 # 8-byte Folded Spill + fst.d $fs1, $sp, 472 # 8-byte Folded Spill + fst.d $fs2, $sp, 464 # 8-byte Folded Spill + fst.d $fs3, $sp, 456 # 8-byte Folded Spill + fst.d $fs4, $sp, 448 # 8-byte Folded Spill + fst.d $fs5, $sp, 440 # 8-byte Folded Spill + fst.d $fs6, $sp, 432 # 8-byte Folded Spill + fst.d $fs7, $sp, 424 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -2052,24 +2031,24 @@ _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE ld.d $s3, $s0, 216 ld.d $s0, $s0, 224 ld.w $a1, $a0, 0 - addi.d $a0, $sp, 304 + addi.d $a0, $sp, 336 ori $a2, $zero, 3 pcaddu18i $ra, %call36(_ZN7ADomainC2Eii) jirl $ra, $ra, 0 - ld.w $s4, $sp, 316 - ld.w $s5, $sp, 328 - ld.w $a0, $sp, 320 + ld.w $s4, $sp, 348 + ld.w $s5, $sp, 360 + ld.w $a0, $sp, 352 + st.d $a0, $sp, 200 # 8-byte Folded Spill + ld.w $a0, $sp, 364 + st.d $a0, $sp, 280 # 8-byte Folded Spill + ld.w $a0, $sp, 356 + st.d $a0, $sp, 160 # 8-byte Folded Spill + ld.w $a0, $sp, 368 st.d $a0, $sp, 192 # 8-byte Folded Spill - ld.w $a0, $sp, 332 - st.d $a0, $sp, 272 # 8-byte Folded Spill - ld.w $a0, $sp, 324 - st.d $a0, $sp, 152 # 8-byte Folded Spill - ld.w $a0, $sp, 336 - st.d $a0, $sp, 184 # 8-byte Folded Spill ld.w $s6, $s7, 28 ld.d $s8, $s7, 16 .Ltmp10: # EH_LABEL - st.d $s7, $sp, 104 # 8-byte Folded Spill + st.d $s7, $sp, 112 # 8-byte Folded Spill move $a0, $s7 pcaddu18i $ra, %call36(_ZN9benchmark5State16StartKeepRunningEv) jirl $ra, $ra, 0 @@ -2079,38 +2058,38 @@ _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE # %bb.2: # %_ZN9benchmark5State3endEv.exit.preheader beqz $s8, .LBB4_47 # %bb.3: # %.preheader350.lr.ph - ld.d $a0, $sp, 152 # 8-byte Folded Reload - ld.d $a1, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 192 # 8-byte Folded Reload bge $a0, $a1, .LBB4_47 # %bb.4: # %.preheader350.lr.ph - ld.d $a0, $sp, 192 # 8-byte Folded Reload - ld.d $a1, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload + ld.d $a1, $sp, 280 # 8-byte Folded Reload bge $a0, $a1, .LBB4_47 # %bb.5: # %.preheader350.lr.ph bge $s4, $s5, .LBB4_47 # %bb.6: # %.preheader350.us.us.us.preheader addi.w $a0, $s5, 2 - ld.d $a3, $sp, 272 # 8-byte Folded Reload + ld.d $a3, $sp, 280 # 8-byte Folded Reload addi.w $a1, $a3, 2 addi.w $a2, $s5, 1 addi.w $a3, $a3, 1 sub.d $a4, $s5, $s4 - st.d $a4, $sp, 264 # 8-byte Folded Spill - ld.d $a6, $sp, 152 # 8-byte Folded Reload + st.d $a4, $sp, 272 # 8-byte Folded Spill + ld.d $a6, $sp, 160 # 8-byte Folded Reload mul.d $a4, $a6, $a1 - ld.d $a5, $sp, 192 # 8-byte Folded Reload + ld.d $a5, $sp, 200 # 8-byte Folded Reload add.d $a4, $a4, $a5 mul.d $a4, $a4, $a0 slli.d $a4, $a4, 4 alsl.d $a4, $s4, $a4, 4 add.d $a4, $a4, $s3 addi.d $a4, $a4, 8 - st.d $a4, $sp, 144 # 8-byte Folded Spill + st.d $a4, $sp, 152 # 8-byte Folded Spill mulw.d.w $a1, $a1, $a0 slli.d $a1, $a1, 4 - st.d $a1, $sp, 176 # 8-byte Folded Spill + st.d $a1, $sp, 184 # 8-byte Folded Spill slli.d $a0, $a0, 4 - st.d $a0, $sp, 256 # 8-byte Folded Spill + st.d $a0, $sp, 264 # 8-byte Folded Spill mul.d $a0, $a6, $a3 add.d $a0, $a0, $a5 mul.d $a0, $a0, $a2 @@ -2118,23 +2097,41 @@ _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE slli.d $a0, $a0, 4 addi.d $a0, $a0, 8 add.d $a1, $s2, $a0 - st.d $a1, $sp, 136 # 8-byte Folded Spill + st.d $a1, $sp, 144 # 8-byte Folded Spill mulw.d.w $a1, $a3, $a2 slli.d $a1, $a1, 4 - st.d $a1, $sp, 168 # 8-byte Folded Spill + st.d $a1, $sp, 176 # 8-byte Folded Spill slli.d $a1, $a2, 4 - st.d $a1, $sp, 248 # 8-byte Folded Spill + st.d $a1, $sp, 256 # 8-byte Folded Spill add.d $a1, $s0, $a0 - st.d $a1, $sp, 128 # 8-byte Folded Spill + st.d $a1, $sp, 136 # 8-byte Folded Spill add.d $a1, $s1, $a0 - st.d $a1, $sp, 120 # 8-byte Folded Spill + st.d $a1, $sp, 128 # 8-byte Folded Spill add.d $a0, $fp, $a0 - st.d $a0, $sp, 112 # 8-byte Folded Spill + st.d $a0, $sp, 120 # 8-byte Folded Spill + lu12i.w $a0, -509018 + ori $a0, $a0, 2147 + lu32i.d $a0, -108803 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa0, $a0 + fst.d $fa0, $sp, 320 # 8-byte Folded Spill + lu12i.w $a0, -446540 + ori $a0, $a0, 3145 + lu32i.d $a0, 40327 + lu52i.d $a0, $a0, 910 + movgr2fr.d $fa0, $a0 + fst.d $fa0, $sp, 312 # 8-byte Folded Spill + lu12i.w $a0, 486539 + ori $a0, $a0, 1081 + lu32i.d $a0, -352322 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fa0, $a0 + fst.d $fa0, $sp, 304 # 8-byte Folded Spill b .LBB4_8 .p2align 4, , 16 .LBB4_7: # %._crit_edge.split.us.split.us.us.us.us # in Loop: Header=BB4_8 Depth=1 - ld.d $s8, $sp, 160 # 8-byte Folded Reload + ld.d $s8, $sp, 168 # 8-byte Folded Reload addi.d $s8, $s8, -1 beqz $s8, .LBB4_47 .LBB4_8: # %.preheader350.us.us.us @@ -2142,104 +2139,101 @@ _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE # Child Loop BB4_10 Depth 2 # Child Loop BB4_12 Depth 3 # Child Loop BB4_13 Depth 4 - st.d $s8, $sp, 160 # 8-byte Folded Spill - ld.d $fp, $sp, 112 # 8-byte Folded Reload - ld.d $s4, $sp, 120 # 8-byte Folded Reload - ld.d $s8, $sp, 128 # 8-byte Folded Reload - ld.d $s6, $sp, 136 # 8-byte Folded Reload - ld.d $s1, $sp, 144 # 8-byte Folded Reload - ld.d $a3, $sp, 152 # 8-byte Folded Reload + st.d $s8, $sp, 168 # 8-byte Folded Spill + ld.d $fp, $sp, 120 # 8-byte Folded Reload + ld.d $s4, $sp, 128 # 8-byte Folded Reload + ld.d $s8, $sp, 136 # 8-byte Folded Reload + ld.d $s6, $sp, 144 # 8-byte Folded Reload + ld.d $s1, $sp, 152 # 8-byte Folded Reload + ld.d $a3, $sp, 160 # 8-byte Folded Reload b .LBB4_10 .p2align 4, , 16 .LBB4_9: # %._crit_edge354.split.us.us.us.us.us.us # in Loop: Header=BB4_10 Depth=2 - ld.d $a3, $sp, 200 # 8-byte Folded Reload + ld.d $a3, $sp, 208 # 8-byte Folded Reload addi.d $a3, $a3, 1 - ld.d $a0, $sp, 176 # 8-byte Folded Reload - ld.d $s1, $sp, 208 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $s1, $sp, 216 # 8-byte Folded Reload add.d $s1, $s1, $a0 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - ld.d $s6, $sp, 216 # 8-byte Folded Reload + ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $s6, $sp, 224 # 8-byte Folded Reload add.d $s6, $s6, $a0 - ld.d $s8, $sp, 224 # 8-byte Folded Reload + ld.d $s8, $sp, 232 # 8-byte Folded Reload add.d $s8, $s8, $a0 - ld.d $s4, $sp, 232 # 8-byte Folded Reload + ld.d $s4, $sp, 240 # 8-byte Folded Reload add.d $s4, $s4, $a0 - ld.d $fp, $sp, 240 # 8-byte Folded Reload + ld.d $fp, $sp, 248 # 8-byte Folded Reload add.d $fp, $fp, $a0 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload beq $a3, $a0, .LBB4_7 .LBB4_10: # %.preheader.us.us.us.us.us # Parent Loop BB4_8 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB4_12 Depth 3 # Child Loop BB4_13 Depth 4 - st.d $a3, $sp, 200 # 8-byte Folded Spill - st.d $fp, $sp, 240 # 8-byte Folded Spill - st.d $s4, $sp, 232 # 8-byte Folded Spill - st.d $s8, $sp, 224 # 8-byte Folded Spill - st.d $s6, $sp, 216 # 8-byte Folded Spill - st.d $s1, $sp, 208 # 8-byte Folded Spill - ld.d $s2, $sp, 192 # 8-byte Folded Reload + st.d $a3, $sp, 208 # 8-byte Folded Spill + st.d $fp, $sp, 248 # 8-byte Folded Spill + st.d $s4, $sp, 240 # 8-byte Folded Spill + st.d $s8, $sp, 232 # 8-byte Folded Spill + st.d $s6, $sp, 224 # 8-byte Folded Spill + st.d $s1, $sp, 216 # 8-byte Folded Spill + ld.d $s2, $sp, 200 # 8-byte Folded Reload b .LBB4_12 .p2align 4, , 16 .LBB4_11: # %._crit_edge.us.us.us.us.us.us # in Loop: Header=BB4_12 Depth=3 addi.d $s2, $s2, 1 - ld.d $a0, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload add.d $s1, $s1, $a0 - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 256 # 8-byte Folded Reload add.d $s6, $s6, $a0 add.d $s8, $s8, $a0 - ld.d $s4, $sp, 280 # 8-byte Folded Reload + ld.d $s4, $sp, 288 # 8-byte Folded Reload add.d $s4, $s4, $a0 - ld.d $fp, $sp, 288 # 8-byte Folded Reload + ld.d $fp, $sp, 296 # 8-byte Folded Reload add.d $fp, $fp, $a0 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 280 # 8-byte Folded Reload beq $s2, $a0, .LBB4_9 .LBB4_12: # %.lr.ph.us.us.us.us.us.us # Parent Loop BB4_8 Depth=1 # Parent Loop BB4_10 Depth=2 # => This Loop Header: Depth=3 # Child Loop BB4_13 Depth 4 - st.d $fp, $sp, 288 # 8-byte Folded Spill - st.d $s4, $sp, 280 # 8-byte Folded Spill + st.d $fp, $sp, 296 # 8-byte Folded Spill + st.d $s4, $sp, 288 # 8-byte Folded Spill move $s3, $s8 move $s0, $s6 move $s7, $s1 - ld.d $s5, $sp, 264 # 8-byte Folded Reload + ld.d $s5, $sp, 272 # 8-byte Folded Reload .p2align 4, , 16 .LBB4_13: # Parent Loop BB4_8 Depth=1 # Parent Loop BB4_10 Depth=2 # Parent Loop BB4_12 Depth=3 # => This Inner Loop Header: Depth=4 fld.d $fa0, $s7, -8 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_0) - fld.d $fa2, $s7, 0 - fld.d $fa3, $s3, -8 - fld.d $fa4, $s3, 0 - fmul.d $fs1, $fa0, $fa1 - fmul.d $fs2, $fa2, $fa1 - fmul.d $fs3, $fa3, $fa1 - fmul.d $fs6, $fa4, $fa1 + fld.d $fa1, $s7, 0 + fld.d $fa2, $s3, -8 + fld.d $fa3, $s3, 0 + fld.d $fa4, $sp, 320 # 8-byte Folded Reload + fmul.d $fs1, $fa0, $fa4 + fmul.d $fs2, $fa1, $fa4 + fmul.d $fs3, $fa2, $fa4 + fmul.d $fs6, $fa3, $fa4 fmul.d $fa0, $fs2, $fs2 fmadd.d $fa0, $fs1, $fs1, $fa0 fmadd.d $fa0, $fs3, $fs3, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_1) - pcalau12i $a0, %pc_hi20(.LCPI4_2) - fld.d $fa2, $a0, %pc_lo12(.LCPI4_2) fmadd.d $fa0, $fs6, $fs6, $fa0 + fld.d $fa1, $sp, 312 # 8-byte Folded Reload fadd.d $fs4, $fa0, $fa1 fsqrt.d $fa0, $fs4 - fmul.d $fa0, $fa0, $fa2 + fld.d $fa1, $sp, 304 # 8-byte Folded Reload + fmul.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fs0, $fa0, $fa1 fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fst.d $fa0, $sp, 296 # 8-byte Folded Spill + fst.d $fa0, $sp, 328 # 8-byte Folded Spill fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 @@ -2278,7 +2272,7 @@ _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE .LBB4_15: # in Loop: Header=BB4_13 Depth=4 fadd.d $fa2, $fs4, $fa2 fadd.d $fa1, $fs2, $fa1 - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmul.d $fa2, $ft1, $fa2 fmul.d $fa3, $ft1, $fa1 movgr2fr.d $fs2, $zero @@ -2397,35 +2391,35 @@ _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE fmov.d $fs2, $ft5 bcnez $fcc0, .LBB4_14 # %bb.26: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs1 fmov.d $fa1, $fs7 fmov.d $fa2, $ft6 fmov.d $fa3, $ft7 - fst.d $fa7, $sp, 32 # 8-byte Folded Spill + fst.d $fa7, $sp, 40 # 8-byte Folded Spill fmov.d $fs5, $ft0 - fst.d $ft2, $sp, 48 # 8-byte Folded Spill + fst.d $ft2, $sp, 56 # 8-byte Folded Spill fmov.d $fs2, $ft3 - fst.d $ft4, $sp, 80 # 8-byte Folded Spill + fst.d $ft4, $sp, 88 # 8-byte Folded Spill fmov.d $fs4, $ft5 - fst.d $ft6, $sp, 72 # 8-byte Folded Spill - fst.d $ft7, $sp, 64 # 8-byte Folded Spill + fst.d $ft6, $sp, 80 # 8-byte Folded Spill + fst.d $ft7, $sp, 72 # 8-byte Folded Spill movcf2gr $a0, $fcc1 - st.d $a0, $sp, 56 + st.d $a0, $sp, 64 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - ld.d $a0, $sp, 56 + ld.d $a0, $sp, 64 movgr2cf $fcc1, $a0 - fld.d $ft7, $sp, 64 # 8-byte Folded Reload - fld.d $ft6, $sp, 72 # 8-byte Folded Reload + fld.d $ft7, $sp, 72 # 8-byte Folded Reload + fld.d $ft6, $sp, 80 # 8-byte Folded Reload fmov.d $ft5, $fs4 - fld.d $ft4, $sp, 80 # 8-byte Folded Reload + fld.d $ft4, $sp, 88 # 8-byte Folded Reload fmov.d $ft3, $fs2 - fld.d $ft2, $sp, 48 # 8-byte Folded Reload + fld.d $ft2, $sp, 56 # 8-byte Folded Reload fmov.d $ft0, $fs5 - fld.d $fa7, $sp, 32 # 8-byte Folded Reload + fld.d $fa7, $sp, 40 # 8-byte Folded Reload fmov.d $fs4, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload fmov.d $fs2, $fa1 b .LBB4_14 .LBB4_27: # in Loop: Header=BB4_13 Depth=4 @@ -2434,39 +2428,39 @@ _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE fmov.d $fa1, $fs3 bcnez $fcc0, .LBB4_15 # %bb.28: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs5 fmov.d $fa1, $fs0 fmov.d $fa2, $fa7 fmov.d $fa3, $ft0 - fst.d $fa7, $sp, 32 # 8-byte Folded Spill - fst.d $ft0, $sp, 16 # 8-byte Folded Spill - fst.d $ft2, $sp, 48 # 8-byte Folded Spill - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft4, $sp, 80 # 8-byte Folded Spill - fst.d $ft5, $sp, 40 # 8-byte Folded Spill - fst.d $ft6, $sp, 72 # 8-byte Folded Spill - fst.d $ft7, $sp, 64 # 8-byte Folded Spill + fst.d $fa7, $sp, 40 # 8-byte Folded Spill + fst.d $ft0, $sp, 24 # 8-byte Folded Spill + fst.d $ft2, $sp, 56 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 88 # 8-byte Folded Spill + fst.d $ft5, $sp, 48 # 8-byte Folded Spill + fst.d $ft6, $sp, 80 # 8-byte Folded Spill + fst.d $ft7, $sp, 72 # 8-byte Folded Spill movcf2gr $a0, $fcc1 - st.d $a0, $sp, 56 + st.d $a0, $sp, 64 movcf2gr $a0, $fcc2 - st.d $a0, $sp, 24 + st.d $a0, $sp, 32 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - ld.d $a0, $sp, 24 + ld.d $a0, $sp, 32 movgr2cf $fcc2, $a0 - ld.d $a0, $sp, 56 + ld.d $a0, $sp, 64 movgr2cf $fcc1, $a0 - fld.d $ft7, $sp, 64 # 8-byte Folded Reload - fld.d $ft6, $sp, 72 # 8-byte Folded Reload - fld.d $ft5, $sp, 40 # 8-byte Folded Reload - fld.d $ft4, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload - fld.d $ft2, $sp, 48 # 8-byte Folded Reload - fld.d $ft0, $sp, 16 # 8-byte Folded Reload - fld.d $fa7, $sp, 32 # 8-byte Folded Reload + fld.d $ft7, $sp, 72 # 8-byte Folded Reload + fld.d $ft6, $sp, 80 # 8-byte Folded Reload + fld.d $ft5, $sp, 48 # 8-byte Folded Reload + fld.d $ft4, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 56 # 8-byte Folded Reload + fld.d $ft0, $sp, 24 # 8-byte Folded Reload + fld.d $fa7, $sp, 40 # 8-byte Folded Reload fmov.d $fa2, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload b .LBB4_15 .LBB4_29: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fa1, $fa1 @@ -2474,43 +2468,43 @@ _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE # %bb.30: # in Loop: Header=BB4_13 Depth=4 movgr2fr.d $fa4, $zero vldi $vr1, -912 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fa4 - fst.d $fa7, $sp, 32 # 8-byte Folded Spill + fst.d $fa7, $sp, 40 # 8-byte Folded Spill fmov.d $fs4, $ft0 - fst.d $ft2, $sp, 48 # 8-byte Folded Spill - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft4, $sp, 80 # 8-byte Folded Spill - fst.d $ft5, $sp, 40 # 8-byte Folded Spill - fst.d $ft6, $sp, 72 # 8-byte Folded Spill - fst.d $ft7, $sp, 64 # 8-byte Folded Spill + fst.d $ft2, $sp, 56 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 88 # 8-byte Folded Spill + fst.d $ft5, $sp, 48 # 8-byte Folded Spill + fst.d $ft6, $sp, 80 # 8-byte Folded Spill + fst.d $ft7, $sp, 72 # 8-byte Folded Spill movcf2gr $a0, $fcc1 - st.d $a0, $sp, 56 + st.d $a0, $sp, 64 movcf2gr $a0, $fcc2 - st.d $a0, $sp, 24 + st.d $a0, $sp, 32 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - ld.d $a0, $sp, 24 + ld.d $a0, $sp, 32 movgr2cf $fcc2, $a0 - ld.d $a0, $sp, 56 + ld.d $a0, $sp, 64 movgr2cf $fcc1, $a0 - fld.d $ft7, $sp, 64 # 8-byte Folded Reload - fld.d $ft6, $sp, 72 # 8-byte Folded Reload - fld.d $ft5, $sp, 40 # 8-byte Folded Reload - fld.d $ft4, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload - fld.d $ft2, $sp, 48 # 8-byte Folded Reload - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft7, $sp, 72 # 8-byte Folded Reload + fld.d $ft6, $sp, 80 # 8-byte Folded Reload + fld.d $ft5, $sp, 48 # 8-byte Folded Reload + fld.d $ft4, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 56 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs4 - fld.d $fa7, $sp, 32 # 8-byte Folded Reload + fld.d $fa7, $sp, 40 # 8-byte Folded Reload fmov.d $fa4, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload b .LBB4_16 .LBB4_31: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fs3, $fs3 bcnez $fcc0, .LBB4_17 # %bb.32: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs5 fmov.d $fa1, $fs0 fmov.d $fa2, $fa7 @@ -2518,35 +2512,35 @@ _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE fmov.d $fs4, $fa7 fmov.d $fs6, $ft0 fmov.d $fs3, $ft2 - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft4, $sp, 80 # 8-byte Folded Spill - fst.d $ft5, $sp, 40 # 8-byte Folded Spill - fst.d $ft6, $sp, 72 # 8-byte Folded Spill - fst.d $ft7, $sp, 64 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 88 # 8-byte Folded Spill + fst.d $ft5, $sp, 48 # 8-byte Folded Spill + fst.d $ft6, $sp, 80 # 8-byte Folded Spill + fst.d $ft7, $sp, 72 # 8-byte Folded Spill movcf2gr $a0, $fcc1 - st.d $a0, $sp, 56 + st.d $a0, $sp, 64 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - ld.d $a0, $sp, 56 + ld.d $a0, $sp, 64 movgr2cf $fcc1, $a0 - fld.d $ft7, $sp, 64 # 8-byte Folded Reload - fld.d $ft6, $sp, 72 # 8-byte Folded Reload - fld.d $ft5, $sp, 40 # 8-byte Folded Reload - fld.d $ft4, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload + fld.d $ft7, $sp, 72 # 8-byte Folded Reload + fld.d $ft6, $sp, 80 # 8-byte Folded Reload + fld.d $ft5, $sp, 48 # 8-byte Folded Reload + fld.d $ft4, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload fmov.d $ft2, $fs3 - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs6 fmov.d $fa7, $fs4 fmov.d $fs6, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload fmov.d $fs3, $fa1 b .LBB4_17 .LBB4_33: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fa6, $fa6 bcnez $fcc0, .LBB4_18 # %bb.34: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs1 fmov.d $fa1, $fa4 fmov.d $fa2, $fs6 @@ -2554,68 +2548,68 @@ _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE fmov.d $fs3, $fa7 fmov.d $fs4, $ft0 fmov.d $fs6, $ft2 - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft4, $sp, 80 # 8-byte Folded Spill - fst.d $ft5, $sp, 40 # 8-byte Folded Spill - fst.d $ft6, $sp, 72 # 8-byte Folded Spill - fst.d $ft7, $sp, 64 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 88 # 8-byte Folded Spill + fst.d $ft5, $sp, 48 # 8-byte Folded Spill + fst.d $ft6, $sp, 80 # 8-byte Folded Spill + fst.d $ft7, $sp, 72 # 8-byte Folded Spill movcf2gr $a0, $fcc1 - st.d $a0, $sp, 56 - fst.d $fa4, $sp, 48 # 8-byte Folded Spill + st.d $a0, $sp, 64 + fst.d $fa4, $sp, 56 # 8-byte Folded Spill pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - fld.d $fa4, $sp, 48 # 8-byte Folded Reload - ld.d $a0, $sp, 56 + fld.d $fa4, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 64 movgr2cf $fcc1, $a0 - fld.d $ft7, $sp, 64 # 8-byte Folded Reload - fld.d $ft6, $sp, 72 # 8-byte Folded Reload - fld.d $ft5, $sp, 40 # 8-byte Folded Reload - fld.d $ft4, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload + fld.d $ft7, $sp, 72 # 8-byte Folded Reload + fld.d $ft6, $sp, 80 # 8-byte Folded Reload + fld.d $ft5, $sp, 48 # 8-byte Folded Reload + fld.d $ft4, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload fmov.d $ft2, $fs6 - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs4 fmov.d $fa7, $fs3 fmov.d $fs4, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload fmov.d $fa6, $fa1 b .LBB4_18 .LBB4_35: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fa1, $fa1 bcnez $fcc0, .LBB4_19 # %bb.36: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs1 fmov.d $fa1, $fa4 fmov.d $fa2, $ft2 fmov.d $fa3, $ft3 fmov.d $fs6, $fa7 fmov.d $fs3, $ft0 - fst.d $ft2, $sp, 48 # 8-byte Folded Spill - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft4, $sp, 80 # 8-byte Folded Spill - fst.d $ft5, $sp, 40 # 8-byte Folded Spill - fst.d $ft6, $sp, 72 # 8-byte Folded Spill - fst.d $ft7, $sp, 64 # 8-byte Folded Spill + fst.d $ft2, $sp, 56 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 88 # 8-byte Folded Spill + fst.d $ft5, $sp, 48 # 8-byte Folded Spill + fst.d $ft6, $sp, 80 # 8-byte Folded Spill + fst.d $ft7, $sp, 72 # 8-byte Folded Spill movcf2gr $a0, $fcc1 - st.d $a0, $sp, 56 - fst.d $fa6, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 64 + fst.d $fa6, $sp, 40 # 8-byte Folded Spill pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - fld.d $fa6, $sp, 32 # 8-byte Folded Reload - ld.d $a0, $sp, 56 + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 64 movgr2cf $fcc1, $a0 - fld.d $ft7, $sp, 64 # 8-byte Folded Reload - fld.d $ft6, $sp, 72 # 8-byte Folded Reload - fld.d $ft5, $sp, 40 # 8-byte Folded Reload - fld.d $ft4, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload - fld.d $ft2, $sp, 48 # 8-byte Folded Reload - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft7, $sp, 72 # 8-byte Folded Reload + fld.d $ft6, $sp, 80 # 8-byte Folded Reload + fld.d $ft5, $sp, 48 # 8-byte Folded Reload + fld.d $ft4, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 56 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs3 fmov.d $fa7, $fs6 fmov.d $fa2, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload b .LBB4_19 .LBB4_37: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fa1, $fa1 @@ -2623,41 +2617,41 @@ _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE # %bb.38: # in Loop: Header=BB4_13 Depth=4 movgr2fr.d $fa4, $zero vldi $vr1, -912 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fa4 fmov.d $fs6, $fa7 fmov.d $fs3, $ft0 - fst.d $ft2, $sp, 48 # 8-byte Folded Spill - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft4, $sp, 80 # 8-byte Folded Spill - fst.d $ft5, $sp, 40 # 8-byte Folded Spill - fst.d $ft6, $sp, 72 # 8-byte Folded Spill - fst.d $ft7, $sp, 64 # 8-byte Folded Spill + fst.d $ft2, $sp, 56 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 88 # 8-byte Folded Spill + fst.d $ft5, $sp, 48 # 8-byte Folded Spill + fst.d $ft6, $sp, 80 # 8-byte Folded Spill + fst.d $ft7, $sp, 72 # 8-byte Folded Spill movcf2gr $a0, $fcc1 - st.d $a0, $sp, 56 - fst.d $fa6, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 64 + fst.d $fa6, $sp, 40 # 8-byte Folded Spill pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - fld.d $fa6, $sp, 32 # 8-byte Folded Reload - ld.d $a0, $sp, 56 + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 64 movgr2cf $fcc1, $a0 - fld.d $ft7, $sp, 64 # 8-byte Folded Reload - fld.d $ft6, $sp, 72 # 8-byte Folded Reload - fld.d $ft5, $sp, 40 # 8-byte Folded Reload - fld.d $ft4, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload - fld.d $ft2, $sp, 48 # 8-byte Folded Reload - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft7, $sp, 72 # 8-byte Folded Reload + fld.d $ft6, $sp, 80 # 8-byte Folded Reload + fld.d $ft5, $sp, 48 # 8-byte Folded Reload + fld.d $ft4, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 56 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs3 fmov.d $fa7, $fs6 fmov.d $fa4, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload b .LBB4_20 .LBB4_39: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $ft5, $ft5 bcnez $fcc0, .LBB4_21 # %bb.40: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs1 fmov.d $fa1, $fs7 fmov.d $fa2, $ft6 @@ -2665,51 +2659,51 @@ _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE fmov.d $fs1, $fa7 fmov.d $fs7, $ft0 fmov.d $fs4, $ft2 - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft8, $sp, 80 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft8, $sp, 88 # 8-byte Folded Spill pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - fld.d $ft8, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload + fld.d $ft8, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload fmov.d $ft2, $fs4 - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs7 fmov.d $fa7, $fs1 fmov.d $ft4, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload fmov.d $ft5, $fa1 b .LBB4_21 .LBB4_41: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fs7, $fs7 bcnez $fcc0, .LBB4_22 # %bb.42: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs5 fmov.d $fa1, $fs4 fmov.d $fa2, $ft4 fmov.d $fa3, $ft5 fmov.d $fs1, $fa7 fmov.d $fs7, $ft0 - fst.d $ft2, $sp, 48 # 8-byte Folded Spill - fst.d $ft3, $sp, 88 # 8-byte Folded Spill - fst.d $ft8, $sp, 80 # 8-byte Folded Spill + fst.d $ft2, $sp, 56 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft8, $sp, 88 # 8-byte Folded Spill pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - fld.d $ft8, $sp, 80 # 8-byte Folded Reload - fld.d $ft3, $sp, 88 # 8-byte Folded Reload - fld.d $ft2, $sp, 48 # 8-byte Folded Reload - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft8, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 56 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs7 fmov.d $fa7, $fs1 fmov.d $fs1, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload fmov.d $fs7, $fa1 b .LBB4_22 .LBB4_43: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fa1, $fa1 bcnez $fcc0, .LBB4_23 # %bb.44: # in Loop: Header=BB4_13 Depth=4 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fmov.d $fa0, $fs5 fmov.d $fa1, $fs4 fmov.d $fa2, $ft2 @@ -2720,11 +2714,11 @@ _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 fmov.d $ft8, $fs0 - fld.d $ft1, $sp, 296 # 8-byte Folded Reload + fld.d $ft1, $sp, 328 # 8-byte Folded Reload fmov.d $ft0, $fs5 fmov.d $fa7, $fs4 fmov.d $fa2, $fa0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload b .LBB4_23 .LBB4_45: # in Loop: Header=BB4_13 Depth=4 fcmp.cor.d $fcc0, $fa1, $fa1 @@ -2747,41 +2741,41 @@ _ZL13BM_COUPLE_RAWRN9benchmark5StateE: # @_ZL13BM_COUPLE_RAWRN9benchmark5StateE b .LBB4_24 .LBB4_47: # %_ZN9benchmark5State3endEv.exit._crit_edge .Ltmp12: # EH_LABEL - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZN9benchmark5State17FinishKeepRunningEv) jirl $ra, $ra, 0 .Ltmp13: # EH_LABEL # %bb.48: # %_ZNK9benchmark5State13StateIteratorneERKS1_.exit - ld.d $a0, $sp, 376 + ld.d $a0, $sp, 408 beqz $a0, .LBB4_50 # %bb.49: pcaddu18i $ra, %call36(_ZdaPv) jirl $ra, $ra, 0 .LBB4_50: # %_ZN7ADomainD2Ev.exit - fld.d $fs7, $sp, 392 # 8-byte Folded Reload - fld.d $fs6, $sp, 400 # 8-byte Folded Reload - fld.d $fs5, $sp, 408 # 8-byte Folded Reload - fld.d $fs4, $sp, 416 # 8-byte Folded Reload - fld.d $fs3, $sp, 424 # 8-byte Folded Reload - fld.d $fs2, $sp, 432 # 8-byte Folded Reload - fld.d $fs1, $sp, 440 # 8-byte Folded Reload - fld.d $fs0, $sp, 448 # 8-byte Folded Reload - ld.d $s8, $sp, 456 # 8-byte Folded Reload - ld.d $s7, $sp, 464 # 8-byte Folded Reload - ld.d $s6, $sp, 472 # 8-byte Folded Reload - ld.d $s5, $sp, 480 # 8-byte Folded Reload - ld.d $s4, $sp, 488 # 8-byte Folded Reload - ld.d $s3, $sp, 496 # 8-byte Folded Reload - ld.d $s2, $sp, 504 # 8-byte Folded Reload - ld.d $s1, $sp, 512 # 8-byte Folded Reload - ld.d $s0, $sp, 520 # 8-byte Folded Reload - ld.d $fp, $sp, 528 # 8-byte Folded Reload - ld.d $ra, $sp, 536 # 8-byte Folded Reload - addi.d $sp, $sp, 544 + fld.d $fs7, $sp, 424 # 8-byte Folded Reload + fld.d $fs6, $sp, 432 # 8-byte Folded Reload + fld.d $fs5, $sp, 440 # 8-byte Folded Reload + fld.d $fs4, $sp, 448 # 8-byte Folded Reload + fld.d $fs3, $sp, 456 # 8-byte Folded Reload + fld.d $fs2, $sp, 464 # 8-byte Folded Reload + fld.d $fs1, $sp, 472 # 8-byte Folded Reload + fld.d $fs0, $sp, 480 # 8-byte Folded Reload + ld.d $s8, $sp, 488 # 8-byte Folded Reload + ld.d $s7, $sp, 496 # 8-byte Folded Reload + ld.d $s6, $sp, 504 # 8-byte Folded Reload + ld.d $s5, $sp, 512 # 8-byte Folded Reload + ld.d $s4, $sp, 520 # 8-byte Folded Reload + ld.d $s3, $sp, 528 # 8-byte Folded Reload + ld.d $s2, $sp, 536 # 8-byte Folded Reload + ld.d $s1, $sp, 544 # 8-byte Folded Reload + ld.d $s0, $sp, 552 # 8-byte Folded Reload + ld.d $fp, $sp, 560 # 8-byte Folded Reload + ld.d $ra, $sp, 568 # 8-byte Folded Reload + addi.d $sp, $sp, 576 ret .LBB4_51: .Ltmp14: # EH_LABEL - ld.d $a1, $sp, 376 + ld.d $a1, $sp, 408 move $fp, $a0 beqz $a1, .LBB4_53 # %bb.52: @@ -3019,15 +3013,9 @@ _ZL10BM_FIR_RAWRN9benchmark5StateE: # @_ZL10BM_FIR_RAWRN9benchmark5StateE .size _ZL10BM_FIR_RAWRN9benchmark5StateE, .Lfunc_end5-_ZL10BM_FIR_RAWRN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN7ADomainC2Eii -.LCPI6_0: - .dword 0x4050000000000000 # double 64 -.LCPI6_1: - .dword 0x4063800000000000 # double 156 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI6_2: + .p2align 4, 0x0 # -- Begin function _ZN7ADomainC2Eii +.LCPI6_0: .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 @@ -3068,79 +3056,80 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii ori $a2, $zero, 2 lu32i.d $a2, 1 st.d $a2, $fp, 4 - beq $a1, $a0, .LBB6_9 + beq $a1, $a0, .LBB6_10 # %bb.1: ori $a0, $zero, 1 beq $a1, $a0, .LBB6_6 # %bb.2: # implicit-def: $r29 - bnez $a1, .LBB6_17 + bnez $a1, .LBB6_18 # %bb.3: ori $a0, $zero, 3 - beq $s1, $a0, .LBB6_14 + beq $s1, $a0, .LBB6_15 # %bb.4: ori $a0, $zero, 2 # implicit-def: $r29 - bne $s1, $a0, .LBB6_17 + bne $s1, $a0, .LBB6_18 # %bb.5: pcalau12i $a0, %got_pc_hi20(_ZN7ADomain18loop_length_factorE) ld.d $a0, $a0, %got_pc_lo12(_ZN7ADomain18loop_length_factorE) fld.d $fa0, $a0, 0 - pcalau12i $a0, %pc_hi20(.LCPI6_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI6_1) - fmul.d $fa0, $fa0, $fa1 - b .LBB6_16 + ori $a0, $zero, 0 + lu32i.d $a0, 229376 + lu52i.d $a0, $a0, 1030 + b .LBB6_9 .LBB6_6: ori $a0, $zero, 3 - beq $s1, $a0, .LBB6_12 + beq $s1, $a0, .LBB6_13 # %bb.7: ori $a0, $zero, 2 # implicit-def: $r29 - bne $s1, $a0, .LBB6_17 + bne $s1, $a0, .LBB6_18 # %bb.8: pcalau12i $a0, %got_pc_hi20(_ZN7ADomain18loop_length_factorE) ld.d $a0, $a0, %got_pc_lo12(_ZN7ADomain18loop_length_factorE) fld.d $fa0, $a0, 0 - pcalau12i $a0, %pc_hi20(.LCPI6_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI6_0) - fmul.d $fa0, $fa0, $fa1 - b .LBB6_16 + lu52i.d $a0, $zero, 1029 .LBB6_9: + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + b .LBB6_17 +.LBB6_10: ori $a0, $zero, 3 - beq $s1, $a0, .LBB6_13 -# %bb.10: + beq $s1, $a0, .LBB6_14 +# %bb.11: ori $a0, $zero, 2 # implicit-def: $r29 - bne $s1, $a0, .LBB6_17 -# %bb.11: + bne $s1, $a0, .LBB6_18 +# %bb.12: pcalau12i $a0, %got_pc_hi20(_ZN7ADomain18loop_length_factorE) ld.d $a0, $a0, %got_pc_lo12(_ZN7ADomain18loop_length_factorE) fld.d $fa0, $a0, 0 vldi $vr1, -992 - b .LBB6_15 -.LBB6_12: + b .LBB6_16 +.LBB6_13: pcalau12i $a0, %got_pc_hi20(_ZN7ADomain18loop_length_factorE) ld.d $a0, $a0, %got_pc_lo12(_ZN7ADomain18loop_length_factorE) fld.d $fa0, $a0, 0 vldi $vr1, -976 - b .LBB6_15 -.LBB6_13: + b .LBB6_16 +.LBB6_14: pcalau12i $a0, %got_pc_hi20(_ZN7ADomain18loop_length_factorE) ld.d $a0, $a0, %got_pc_lo12(_ZN7ADomain18loop_length_factorE) fld.d $fa0, $a0, 0 vldi $vr1, -1008 - b .LBB6_15 -.LBB6_14: + b .LBB6_16 +.LBB6_15: pcalau12i $a0, %got_pc_hi20(_ZN7ADomain18loop_length_factorE) ld.d $a0, $a0, %got_pc_lo12(_ZN7ADomain18loop_length_factorE) fld.d $fa0, $a0, 0 vldi $vr1, -964 -.LBB6_15: - fmul.d $fa0, $fa0, $fa1 .LBB6_16: + fmul.d $fa0, $fa0, $fa1 +.LBB6_17: ftintrz.w.d $fa0, $fa0 movfr2gr.s $s6, $fa0 -.LBB6_17: +.LBB6_18: ori $a0, $zero, 2 ori $a1, $zero, 2 lu32i.d $a1, 2 @@ -3150,31 +3139,31 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii st.w $s3, $fp, 28 addi.w $s4, $s6, 3 st.w $s4, $fp, 36 - bne $s1, $a0, .LBB6_19 -# %bb.18: + bne $s1, $a0, .LBB6_20 +# %bb.19: move $s5, $zero st.w $zero, $fp, 20 st.w $zero, $fp, 32 st.w $zero, $fp, 40 mul.w $s7, $s4, $s4 - b .LBB6_21 -.LBB6_19: + b .LBB6_22 +.LBB6_20: ori $a0, $zero, 3 - bne $s1, $a0, .LBB6_22 -# %bb.20: + bne $s1, $a0, .LBB6_23 +# %bb.21: ori $a0, $zero, 2 st.w $a0, $fp, 20 st.w $s3, $fp, 32 mul.d $s5, $s4, $s4 st.w $s5, $fp, 40 mul.w $s7, $s5, $s4 -.LBB6_21: +.LBB6_22: st.w $s7, $fp, 44 - b .LBB6_23 -.LBB6_22: # %._crit_edge89 + b .LBB6_24 +.LBB6_23: # %._crit_edge89 ld.w $s7, $fp, 44 ld.w $s5, $fp, 40 -.LBB6_23: +.LBB6_24: st.w $zero, $fp, 48 addi.d $a0, $s7, -1 st.w $a0, $fp, 52 @@ -3198,21 +3187,21 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii jirl $ra, $ra, 0 move $s0, $a0 st.d $a0, $fp, 72 - blez $s7, .LBB6_25 -# %bb.24: # %.lr.ph.preheader + blez $s7, .LBB6_26 +# %bb.25: # %.lr.ph.preheader ori $a1, $zero, 255 move $a0, $s0 move $a2, $s2 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 -.LBB6_25: # %._crit_edge +.LBB6_26: # %._crit_edge ori $a0, $zero, 2 st.w $zero, $fp, 80 - bne $s1, $a0, .LBB6_36 -# %bb.26: # %.preheader + bne $s1, $a0, .LBB6_37 +# %bb.27: # %.preheader addi.w $a1, $s6, 0 - blt $a1, $a0, .LBB6_51 -# %bb.27: # %.lr.ph72.us.preheader + blt $a1, $a0, .LBB6_52 +# %bb.28: # %.lr.ph72.us.preheader move $a0, $zero addi.d $a4, $s6, -1 bstrpick.d $a2, $a4, 31, 0 @@ -3223,32 +3212,32 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii ori $a4, $zero, 2 bstrins.d $a4, $a5, 31, 3 addi.d $a5, $s0, 16 - pcalau12i $a6, %pc_hi20(.LCPI6_2) - vld $vr0, $a6, %pc_lo12(.LCPI6_2) + pcalau12i $a6, %pc_hi20(.LCPI6_0) + vld $vr0, $a6, %pc_lo12(.LCPI6_0) ori $a6, $zero, 6 alsl.w $a6, $s6, $a6, 1 ori $a7, $zero, 9 - b .LBB6_29 + b .LBB6_30 .p2align 4, , 16 -.LBB6_28: # %._crit_edge73.us - # in Loop: Header=BB6_29 Depth=1 +.LBB6_29: # %._crit_edge73.us + # in Loop: Header=BB6_30 Depth=1 addi.w $t1, $t0, 1 add.d $a6, $a6, $s4 - beq $t0, $a1, .LBB6_50 -.LBB6_29: # %.lr.ph72.us + beq $t0, $a1, .LBB6_51 +.LBB6_30: # %.lr.ph72.us # =>This Loop Header: Depth=1 - # Child Loop BB6_32 Depth 2 - # Child Loop BB6_35 Depth 2 + # Child Loop BB6_33 Depth 2 + # Child Loop BB6_36 Depth 2 move $t0, $t1 addi.w $t1, $a0, 0 - bgeu $a1, $a7, .LBB6_31 -# %bb.30: # in Loop: Header=BB6_29 Depth=1 + bgeu $a1, $a7, .LBB6_32 +# %bb.31: # in Loop: Header=BB6_30 Depth=1 move $a0, $t1 ori $t3, $zero, 2 - b .LBB6_34 + b .LBB6_35 .p2align 4, , 16 -.LBB6_31: # %vector.ph106 - # in Loop: Header=BB6_29 Depth=1 +.LBB6_32: # %vector.ph106 + # in Loop: Header=BB6_30 Depth=1 mul.d $t2, $s4, $t0 add.d $a0, $t1, $a3 vreplgr2vr.w $vr1, $t2 @@ -3257,8 +3246,8 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii move $t2, $a3 vori.b $vr3, $vr0, 0 .p2align 4, , 16 -.LBB6_32: # %vector.body112 - # Parent Loop BB6_29 Depth=1 +.LBB6_33: # %vector.body112 + # Parent Loop BB6_30 Depth=1 # => This Inner Loop Header: Depth=2 vadd.w $vr4, $vr1, $vr3 vadd.w $vr5, $vr3, $vr2 @@ -3267,39 +3256,39 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii vaddi.wu $vr3, $vr3, 8 addi.d $t2, $t2, -8 addi.d $t1, $t1, 32 - bnez $t2, .LBB6_32 -# %bb.33: # %middle.block119 - # in Loop: Header=BB6_29 Depth=1 + bnez $t2, .LBB6_33 +# %bb.34: # %middle.block119 + # in Loop: Header=BB6_30 Depth=1 move $t3, $a4 - beq $a3, $a2, .LBB6_28 -.LBB6_34: # %scalar.ph104.preheader - # in Loop: Header=BB6_29 Depth=1 + beq $a3, $a2, .LBB6_29 +.LBB6_35: # %scalar.ph104.preheader + # in Loop: Header=BB6_30 Depth=1 alsl.d $t1, $a0, $s0, 2 sub.d $t2, $s3, $t3 add.d $t3, $t3, $a6 .p2align 4, , 16 -.LBB6_35: # %scalar.ph104 - # Parent Loop BB6_29 Depth=1 +.LBB6_36: # %scalar.ph104 + # Parent Loop BB6_30 Depth=1 # => This Inner Loop Header: Depth=2 st.w $t3, $t1, 0 addi.d $a0, $a0, 1 addi.d $t1, $t1, 4 addi.w $t2, $t2, -1 addi.d $t3, $t3, 1 - bnez $t2, .LBB6_35 - b .LBB6_28 -.LBB6_36: + bnez $t2, .LBB6_36 + b .LBB6_29 +.LBB6_37: ori $a0, $zero, 3 - bne $s1, $a0, .LBB6_51 -# %bb.37: + bne $s1, $a0, .LBB6_52 +# %bb.38: ld.w $a1, $fp, 20 ld.w $a2, $fp, 32 - bge $a1, $a2, .LBB6_51 -# %bb.38: + bge $a1, $a2, .LBB6_52 +# %bb.39: addi.w $a3, $s6, 0 ori $a4, $zero, 2 - blt $a3, $a4, .LBB6_51 -# %bb.39: # %.lr.ph57.us.us.preheader + blt $a3, $a4, .LBB6_52 +# %bb.40: # %.lr.ph57.us.us.preheader move $a0, $zero addi.d $a7, $s6, -1 bstrpick.d $a5, $a7, 31, 0 @@ -3312,56 +3301,56 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii alsl.d $t0, $s6, $t0, 1 addi.d $t0, $t0, 6 ori $t1, $zero, 9 - b .LBB6_41 + b .LBB6_42 .p2align 4, , 16 -.LBB6_40: # %._crit_edge58.split.us.us.us - # in Loop: Header=BB6_41 Depth=1 +.LBB6_41: # %._crit_edge58.split.us.us.us + # in Loop: Header=BB6_42 Depth=1 addi.w $a1, $a1, 1 add.d $t0, $t0, $s5 - beq $a1, $a2, .LBB6_50 -.LBB6_41: # %.lr.ph57.us.us + beq $a1, $a2, .LBB6_51 +.LBB6_42: # %.lr.ph57.us.us # =>This Loop Header: Depth=1 - # Child Loop BB6_43 Depth 2 - # Child Loop BB6_46 Depth 3 - # Child Loop BB6_49 Depth 3 + # Child Loop BB6_44 Depth 2 + # Child Loop BB6_47 Depth 3 + # Child Loop BB6_50 Depth 3 mul.d $t2, $s5, $a1 ori $t5, $zero, 2 move $t3, $t0 - b .LBB6_43 + b .LBB6_44 .p2align 4, , 16 -.LBB6_42: # %._crit_edge50.us.us.us - # in Loop: Header=BB6_43 Depth=2 +.LBB6_43: # %._crit_edge50.us.us.us + # in Loop: Header=BB6_44 Depth=2 addi.w $t5, $t4, 1 add.d $t3, $t3, $s4 - beq $t4, $a3, .LBB6_40 -.LBB6_43: # %.lr.ph49.us.us.us - # Parent Loop BB6_41 Depth=1 + beq $t4, $a3, .LBB6_41 +.LBB6_44: # %.lr.ph49.us.us.us + # Parent Loop BB6_42 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB6_46 Depth 3 - # Child Loop BB6_49 Depth 3 + # Child Loop BB6_47 Depth 3 + # Child Loop BB6_50 Depth 3 move $t4, $t5 addi.w $t5, $a0, 0 - bgeu $a3, $t1, .LBB6_45 -# %bb.44: # in Loop: Header=BB6_43 Depth=2 + bgeu $a3, $t1, .LBB6_46 +# %bb.45: # in Loop: Header=BB6_44 Depth=2 move $a0, $t5 ori $t7, $zero, 2 - b .LBB6_48 + b .LBB6_49 .p2align 4, , 16 -.LBB6_45: # %vector.ph - # in Loop: Header=BB6_43 Depth=2 +.LBB6_46: # %vector.ph + # in Loop: Header=BB6_44 Depth=2 mul.d $a0, $s4, $t4 add.d $t6, $a0, $t2 add.d $a0, $t5, $a6 - pcalau12i $t7, %pc_hi20(.LCPI6_2) - vld $vr0, $t7, %pc_lo12(.LCPI6_2) + pcalau12i $t7, %pc_hi20(.LCPI6_0) + vld $vr0, $t7, %pc_lo12(.LCPI6_0) vreplgr2vr.w $vr1, $t6 vaddi.wu $vr2, $vr1, 4 alsl.d $t5, $t5, $a7, 2 move $t6, $a6 .p2align 4, , 16 -.LBB6_46: # %vector.body - # Parent Loop BB6_41 Depth=1 - # Parent Loop BB6_43 Depth=2 +.LBB6_47: # %vector.body + # Parent Loop BB6_42 Depth=1 + # Parent Loop BB6_44 Depth=2 # => This Inner Loop Header: Depth=3 vadd.w $vr3, $vr0, $vr1 vadd.w $vr4, $vr0, $vr2 @@ -3370,31 +3359,31 @@ _ZN7ADomainC2Eii: # @_ZN7ADomainC2Eii vaddi.wu $vr0, $vr0, 8 addi.d $t6, $t6, -8 addi.d $t5, $t5, 32 - bnez $t6, .LBB6_46 -# %bb.47: # %middle.block - # in Loop: Header=BB6_43 Depth=2 + bnez $t6, .LBB6_47 +# %bb.48: # %middle.block + # in Loop: Header=BB6_44 Depth=2 move $t7, $a4 - beq $a6, $a5, .LBB6_42 -.LBB6_48: # %scalar.ph.preheader - # in Loop: Header=BB6_43 Depth=2 + beq $a6, $a5, .LBB6_43 +.LBB6_49: # %scalar.ph.preheader + # in Loop: Header=BB6_44 Depth=2 alsl.d $t5, $a0, $s0, 2 sub.d $t6, $s3, $t7 add.d $t7, $t7, $t3 .p2align 4, , 16 -.LBB6_49: # %scalar.ph - # Parent Loop BB6_41 Depth=1 - # Parent Loop BB6_43 Depth=2 +.LBB6_50: # %scalar.ph + # Parent Loop BB6_42 Depth=1 + # Parent Loop BB6_44 Depth=2 # => This Inner Loop Header: Depth=3 st.w $t7, $t5, 0 addi.d $a0, $a0, 1 addi.d $t5, $t5, 4 addi.w $t6, $t6, -1 addi.d $t7, $t7, 1 - bnez $t6, .LBB6_49 - b .LBB6_42 -.LBB6_50: # %.loopexit.sink.split + bnez $t6, .LBB6_50 + b .LBB6_43 +.LBB6_51: # %.loopexit.sink.split st.w $a0, $fp, 80 -.LBB6_51: # %.loopexit +.LBB6_52: # %.loopexit ld.d $s7, $sp, 16 # 8-byte Folded Reload ld.d $s6, $sp, 24 # 8-byte Folded Reload ld.d $s5, $sp, 32 # 8-byte Folded Reload diff --git a/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSStats.s b/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSStats.s index 65ccd92d..95272aa5 100644 --- a/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSStats.s +++ b/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSStats.s @@ -868,12 +868,7 @@ _Z19getLoopSuiteRunInfov: # @_Z19getLoopSuiteRunInfov .Lfunc_end1: .size _Z19getLoopSuiteRunInfov, .Lfunc_end1-_Z19getLoopSuiteRunInfov # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm -.LCPI2_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 - .text - .globl _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm + .globl _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm # -- Begin function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm .p2align 5 .type _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm,@function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm: # @_Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm @@ -1018,10 +1013,13 @@ _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcE st.d $a0, $s4, 560 beqz $a1, .LBB2_15 # %bb.13: # %.lr.ph18.preheader - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI2_0) move $fp, $zero move $s0, $zero + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_14: # %.lr.ph18 # =>This Inner Loop Header: Depth=1 diff --git a/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSSuite.s index a1e8c653..e35fa2d7 100644 --- a/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/LCALSSuite.s @@ -23,33 +23,21 @@ _Z11getLoopDatav: # @_Z11getLoopDatav .LCPI1_1: .dword 0x3ff6666666666666 # double 1.3999999999999999 .dword 0x3ff0000000000000 # double 1 -.LCPI1_5: +.LCPI1_2: .dword 8 # 0x8 .dword 4923084613239392580 # 0x44524f5f43534944 -.LCPI1_6: +.LCPI1_3: .dword 8 # 0x8 .dword 4914094937701898568 # 0x44325f4f52445948 -.LCPI1_7: +.LCPI1_4: .dword 8 # 0x8 .dword 4913813462725187912 # 0x44315f4f52445948 -.LCPI1_8: +.LCPI1_5: .dword 8 # 0x8 .dword 6074873621086556756 # 0x544e495f50415254 -.LCPI1_11: +.LCPI1_6: .dword 8 # 0x8 .dword 5786931235628926290 # 0x504f4f4c5f464552 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI1_2: - .dword 0x40e5972000000000 # double 44217 -.LCPI1_3: - .dword 0x40b3890000000000 # double 5001 -.LCPI1_4: - .dword 0x4065600000000000 # double 171 -.LCPI1_9: - .dword 0x4063800000000000 # double 156 -.LCPI1_10: - .dword 0x4050000000000000 # double 64 .text .globl _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd .p2align 5 @@ -640,21 +628,27 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define .Ltmp34: # EH_LABEL # %bb.92: move $s8, $a0 - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_2) - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_3) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 366368 + lu52i.d $a1, $a1, 1038 + movgr2fr.d $fa0, $a1 fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a0, $fa0 - fmul.d $fa0, $fs0, $fa1 + movfr2gr.s $a1, $fa0 + st.w $a1, $s8, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 231680 + lu52i.d $a1, $a1, 1035 + movgr2fr.d $fa0, $a1 + fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI1_4) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_4) movfr2gr.s $a1, $fa0 - st.w $a0, $s8, 0 st.w $a1, $s8, 4 - fmul.d $fa0, $fs0, $fa1 + lu32i.d $a0, 352256 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa0, $a0 + fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 ld.d $s7, $sp, 96 # 8-byte Folded Reload ld.w $a0, $s7, 32 @@ -732,8 +726,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define jr $a0 .LBB1_99: # %._crit_edge.i.i352 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_11) - vld $vr0, $a0, %pc_lo12(.LCPI1_11) + pcalau12i $a0, %pc_hi20(.LCPI1_6) + vld $vr0, $a0, %pc_lo12(.LCPI1_6) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -936,8 +930,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_133 .LBB1_112: # %._crit_edge.i.i732 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_8) - vld $vr0, $a0, %pc_lo12(.LCPI1_8) + pcalau12i $a0, %pc_hi20(.LCPI1_5) + vld $vr0, $a0, %pc_lo12(.LCPI1_5) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -1687,8 +1681,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_158: # %._crit_edge.i.i748 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_7) - vld $vr0, $a0, %pc_lo12(.LCPI1_7) + pcalau12i $a0, %pc_hi20(.LCPI1_4) + vld $vr0, $a0, %pc_lo12(.LCPI1_4) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -1761,16 +1755,18 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define # in Loop: Header=BB1_95 Depth=1 ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.d $a0, $a0, 0 - ld.d $a1, $sp, 8 # 8-byte Folded Reload - fld.d $fa0, $a1, %pc_lo12(_ZN7ADomain18loop_length_factorE) - pcalau12i $a1, %pc_hi20(.LCPI1_9) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_9) - fld.d $fa2, $a0, 0 - fmul.d $fa1, $fa0, $fa1 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + fld.d $fa1, $a0, 0 + ld.d $a0, $sp, 8 # 8-byte Folded Reload + fld.d $fa0, $a0, %pc_lo12(_ZN7ADomain18loop_length_factorE) + ori $a0, $zero, 0 + lu32i.d $a0, 229376 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa2, $a0 + fmul.d $fa2, $fa0, $fa2 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $a0, $fa2 ori $a2, $zero, 2 - fst.d $fa2, $sp, 168 + fst.d $fa1, $sp, 168 blt $a0, $a2, .LBB1_211 # %bb.164: # %.lr.ph72.us.i.preheader # in Loop: Header=BB1_95 Depth=1 @@ -2202,8 +2198,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_196: # %._crit_edge.i.i988 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_5) - vld $vr0, $a0, %pc_lo12(.LCPI1_5) + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -2297,8 +2293,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_202: # %._crit_edge.i.i956 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_6) - vld $vr0, $a0, %pc_lo12(.LCPI1_6) + pcalau12i $a0, %pc_hi20(.LCPI1_3) + vld $vr0, $a0, %pc_lo12(.LCPI1_3) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -2382,9 +2378,9 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define move $a3, $zero .LBB1_212: # %_ZN7ADomainC2Eii.exit527 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI1_10) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_10) ld.d $a1, $sp, 368 + lu52i.d $a4, $zero, 1029 + movgr2fr.d $fa1, $a4 fmul.d $fa1, $fa0, $fa1 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a4, $fa1 @@ -4488,15 +4484,9 @@ GCC_except_table7: .LCPI8_0: .dword 0x3fb999999999999a # double 0.10000000000000001 .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI8_3: +.LCPI8_1: .dword 0x3fc999999999999a # double 0.20000000000000001 .dword 0x3fd3333333333333 # double 0.29999999999999999 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI8_1: - .dword 0x3ff199999999999a # double 1.1000000000000001 -.LCPI8_2: - .dword 0x3ff1f9a6b50b0f28 # double 1.1234500000000001 .text .globl _Z8loopInitjR8LoopStat .p2align 5 @@ -4747,8 +4737,8 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat .LBB8_40: pcalau12i $a0, %pc_hi20(.LCPI8_0) addi.d $a0, $a0, %pc_lo12(.LCPI8_0) - pcalau12i $a1, %pc_hi20(.LCPI8_3) - addi.d $a1, $a1, %pc_lo12(.LCPI8_3) + pcalau12i $a1, %pc_hi20(.LCPI8_1) + addi.d $a1, $a1, %pc_lo12(.LCPI8_1) ld.w $a3, $s1, 1032 blez $a3, .LBB8_577 # %bb.41: # %.lr.ph.preheader.i430 @@ -4962,8 +4952,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat pcalau12i $a2, %pc_hi20(.LCPI8_0) addi.d $a2, $a2, %pc_lo12(.LCPI8_0) fldx.d $fa0, $a2, $a0 - ld.d $a2, $s1, 472 + ld.d $a3, $s1, 472 ori $a0, $zero, 4 + lu12i.w $a4, -419431 + lu12i.w $a2, -307024 bgeu $a1, $a0, .LBB8_920 # %bb.78: move $a0, $zero @@ -5152,12 +5144,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.102: # %middle.block3803 beq $a1, $a2, .LBB8_105 .LBB8_103: # %.lr.ph.i184.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_104: # %.lr.ph.i184 # =>This Inner Loop Header: Depth=1 @@ -5246,12 +5244,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.110: # %middle.block3819 beq $a1, $a2, .LBB8_113 .LBB8_111: # %.lr.ph.i192.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_112: # %.lr.ph.i192 # =>This Inner Loop Header: Depth=1 @@ -5340,12 +5344,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.118: # %middle.block3835 beq $a1, $a2, .LBB8_121 .LBB8_119: # %.lr.ph.i200.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_120: # %.lr.ph.i200 # =>This Inner Loop Header: Depth=1 @@ -5434,12 +5444,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.126: # %middle.block3851 beq $a1, $a2, .LBB8_129 .LBB8_127: # %.lr.ph.i208.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_128: # %.lr.ph.i208 # =>This Inner Loop Header: Depth=1 @@ -5528,12 +5544,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.134: # %middle.block3867 beq $a1, $a2, .LBB8_137 .LBB8_135: # %.lr.ph.i216.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_136: # %.lr.ph.i216 # =>This Inner Loop Header: Depth=1 @@ -5622,12 +5644,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.142: # %middle.block3883 beq $a0, $a1, .LBB8_1187 .LBB8_143: # %.lr.ph.i224.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_144: # %.lr.ph.i224 # =>This Inner Loop Header: Depth=1 @@ -5702,12 +5730,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.147: # %middle.block3547 beq $a1, $a2, .LBB8_150 .LBB8_148: # %.lr.ph.i232.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_149: # %.lr.ph.i232 # =>This Inner Loop Header: Depth=1 @@ -5796,12 +5830,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.155: # %middle.block3563 beq $a1, $a2, .LBB8_158 .LBB8_156: # %.lr.ph.i240.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_157: # %.lr.ph.i240 # =>This Inner Loop Header: Depth=1 @@ -5890,12 +5930,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.163: # %middle.block3579 beq $a1, $a2, .LBB8_166 .LBB8_164: # %.lr.ph.i248.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_165: # %.lr.ph.i248 # =>This Inner Loop Header: Depth=1 @@ -5984,12 +6030,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.171: # %middle.block3595 beq $a1, $a2, .LBB8_174 .LBB8_172: # %.lr.ph.i256.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_173: # %.lr.ph.i256 # =>This Inner Loop Header: Depth=1 @@ -6078,12 +6130,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.179: # %middle.block3611 beq $a1, $a2, .LBB8_182 .LBB8_180: # %.lr.ph.i264.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_181: # %.lr.ph.i264 # =>This Inner Loop Header: Depth=1 @@ -6172,12 +6230,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.187: # %middle.block3627 beq $a1, $a2, .LBB8_190 .LBB8_188: # %.lr.ph.i272.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_189: # %.lr.ph.i272 # =>This Inner Loop Header: Depth=1 @@ -6266,12 +6330,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.195: # %middle.block3643 beq $a1, $a2, .LBB8_198 .LBB8_196: # %.lr.ph.i280.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_197: # %.lr.ph.i280 # =>This Inner Loop Header: Depth=1 @@ -6360,12 +6430,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.203: # %middle.block3659 beq $a1, $a2, .LBB8_206 .LBB8_204: # %.lr.ph.i288.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_205: # %.lr.ph.i288 # =>This Inner Loop Header: Depth=1 @@ -6454,12 +6530,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.211: # %middle.block3675 beq $a1, $a2, .LBB8_214 .LBB8_212: # %.lr.ph.i296.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_213: # %.lr.ph.i296 # =>This Inner Loop Header: Depth=1 @@ -6548,12 +6630,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.219: # %middle.block3691 beq $a1, $a2, .LBB8_222 .LBB8_220: # %.lr.ph.i304.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_221: # %.lr.ph.i304 # =>This Inner Loop Header: Depth=1 @@ -6642,12 +6730,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.227: # %middle.block3707 beq $a1, $a2, .LBB8_230 .LBB8_228: # %.lr.ph.i312.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_229: # %.lr.ph.i312 # =>This Inner Loop Header: Depth=1 @@ -6736,12 +6830,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.235: # %middle.block3723 beq $a1, $a2, .LBB8_238 .LBB8_236: # %.lr.ph.i320.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_237: # %.lr.ph.i320 # =>This Inner Loop Header: Depth=1 @@ -6830,12 +6930,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.243: # %middle.block3739 beq $a1, $a2, .LBB8_246 .LBB8_244: # %.lr.ph.i328.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_245: # %.lr.ph.i328 # =>This Inner Loop Header: Depth=1 @@ -6924,12 +7030,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.251: # %middle.block3755 beq $a1, $a2, .LBB8_254 .LBB8_252: # %.lr.ph.i336.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_253: # %.lr.ph.i336 # =>This Inner Loop Header: Depth=1 @@ -7018,12 +7130,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.259: # %middle.block3771 beq $a1, $a2, .LBB8_262 .LBB8_260: # %.lr.ph.i344.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_261: # %.lr.ph.i344 # =>This Inner Loop Header: Depth=1 @@ -7112,12 +7230,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.267: # %middle.block3787 beq $a0, $a1, .LBB8_1187 .LBB8_268: # %.lr.ph.i352.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_269: # %.lr.ph.i352 # =>This Inner Loop Header: Depth=1 @@ -7215,12 +7339,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.274: # %middle.block2006 beq $a1, $a2, .LBB8_277 .LBB8_275: # %.lr.ph.i1118.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_276: # %.lr.ph.i1118 # =>This Inner Loop Header: Depth=1 @@ -7309,12 +7439,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.282: # %middle.block2022 beq $a1, $a2, .LBB8_285 .LBB8_283: # %.lr.ph.i1126.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_284: # %.lr.ph.i1126 # =>This Inner Loop Header: Depth=1 @@ -7403,12 +7539,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.290: # %middle.block2038 beq $a1, $a2, .LBB8_293 .LBB8_291: # %.lr.ph.i1134.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_292: # %.lr.ph.i1134 # =>This Inner Loop Header: Depth=1 @@ -7497,12 +7639,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.298: # %middle.block2054 beq $a0, $a1, .LBB8_1187 .LBB8_299: # %.lr.ph.i1142.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_300: # %.lr.ph.i1142 # =>This Inner Loop Header: Depth=1 @@ -7577,12 +7725,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.303: # %middle.block2870 beq $a1, $a2, .LBB8_306 .LBB8_304: # %.lr.ph.i692.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_305: # %.lr.ph.i692 # =>This Inner Loop Header: Depth=1 @@ -7671,12 +7825,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.311: # %middle.block2886 beq $a0, $a1, .LBB8_1187 .LBB8_312: # %.lr.ph.i700.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_313: # %.lr.ph.i700 # =>This Inner Loop Header: Depth=1 @@ -7751,12 +7911,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.316: # %middle.block2934 beq $a1, $a2, .LBB8_319 .LBB8_317: # %.lr.ph.i660.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_318: # %.lr.ph.i660 # =>This Inner Loop Header: Depth=1 @@ -7845,12 +8011,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.324: # %middle.block2950 beq $a0, $a1, .LBB8_1187 .LBB8_325: # %.lr.ph.i668.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_326: # %.lr.ph.i668 # =>This Inner Loop Header: Depth=1 @@ -7925,12 +8097,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.329: # %middle.block3046 beq $a1, $a2, .LBB8_332 .LBB8_330: # %.lr.ph.i579.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_331: # %.lr.ph.i579 # =>This Inner Loop Header: Depth=1 @@ -8019,13 +8197,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.337: # %middle.block3062 beq $a1, $a2, .LBB8_340 .LBB8_338: # %.lr.ph.i587.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 - .p2align 4, , 16 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 + .p2align 4, , 16 .LBB8_339: # %.lr.ph.i587 # =>This Inner Loop Header: Depth=1 bstrpick.d $a4, $a1, 31, 0 @@ -8113,12 +8297,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.345: # %middle.block3078 beq $a1, $a2, .LBB8_348 .LBB8_346: # %.lr.ph.i595.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_347: # %.lr.ph.i595 # =>This Inner Loop Header: Depth=1 @@ -8207,12 +8397,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.353: # %middle.block3094 beq $a1, $a2, .LBB8_356 .LBB8_354: # %.lr.ph.i603.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_355: # %.lr.ph.i603 # =>This Inner Loop Header: Depth=1 @@ -8301,12 +8497,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.361: # %middle.block3110 beq $a0, $a1, .LBB8_1187 .LBB8_362: # %.lr.ph.i611.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_363: # %.lr.ph.i611 # =>This Inner Loop Header: Depth=1 @@ -8381,12 +8583,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.366: # %middle.block3030 beq $a0, $a1, .LBB8_1187 .LBB8_367: # %.lr.ph.i620.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_368: # %.lr.ph.i620 # =>This Inner Loop Header: Depth=1 @@ -8461,12 +8669,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.371: # %middle.block3899 beq $a1, $a2, .LBB8_374 .LBB8_372: # %.lr.ph.i.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_373: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 @@ -8555,12 +8769,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.379: # %middle.block3915 beq $a1, $a2, .LBB8_382 .LBB8_380: # %.lr.ph.i168.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_381: # %.lr.ph.i168 # =>This Inner Loop Header: Depth=1 @@ -8649,12 +8869,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.387: # %middle.block3931 beq $a0, $a1, .LBB8_1187 .LBB8_388: # %.lr.ph.i176.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_389: # %.lr.ph.i176 # =>This Inner Loop Header: Depth=1 @@ -8729,12 +8955,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.392: # %middle.block3206 beq $a1, $a2, .LBB8_395 .LBB8_393: # %.lr.ph.i499.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_394: # %.lr.ph.i499 # =>This Inner Loop Header: Depth=1 @@ -8823,12 +9055,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.400: # %middle.block3222 beq $a1, $a2, .LBB8_403 .LBB8_401: # %.lr.ph.i507.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_402: # %.lr.ph.i507 # =>This Inner Loop Header: Depth=1 @@ -8917,12 +9155,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.408: # %middle.block3238 beq $a1, $a2, .LBB8_411 .LBB8_409: # %.lr.ph.i515.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_410: # %.lr.ph.i515 # =>This Inner Loop Header: Depth=1 @@ -9011,12 +9255,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.416: # %middle.block3254 beq $a1, $a2, .LBB8_419 .LBB8_417: # %.lr.ph.i523.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_418: # %.lr.ph.i523 # =>This Inner Loop Header: Depth=1 @@ -9105,12 +9355,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.424: # %middle.block3270 beq $a0, $a1, .LBB8_1187 .LBB8_425: # %.lr.ph.i531.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_426: # %.lr.ph.i531 # =>This Inner Loop Header: Depth=1 @@ -9185,12 +9441,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.429: # %middle.block2246 beq $a1, $a2, .LBB8_432 .LBB8_430: # %.lr.ph.i944.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_431: # %.lr.ph.i944 # =>This Inner Loop Header: Depth=1 @@ -9279,12 +9541,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.437: # %middle.block2262 beq $a1, $a2, .LBB8_440 .LBB8_438: # %.lr.ph.i952.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_439: # %.lr.ph.i952 # =>This Inner Loop Header: Depth=1 @@ -9373,12 +9641,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.445: # %middle.block2278 beq $a1, $a2, .LBB8_448 .LBB8_446: # %.lr.ph.i960.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_447: # %.lr.ph.i960 # =>This Inner Loop Header: Depth=1 @@ -9467,12 +9741,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.453: # %middle.block2294 beq $a1, $a2, .LBB8_456 .LBB8_454: # %.lr.ph.i968.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_455: # %.lr.ph.i968 # =>This Inner Loop Header: Depth=1 @@ -9561,12 +9841,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.461: # %middle.block2310 beq $a1, $a2, .LBB8_464 .LBB8_462: # %.lr.ph.i976.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_463: # %.lr.ph.i976 # =>This Inner Loop Header: Depth=1 @@ -9655,12 +9941,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.469: # %middle.block2326 beq $a1, $a2, .LBB8_472 .LBB8_470: # %.lr.ph.i984.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_471: # %.lr.ph.i984 # =>This Inner Loop Header: Depth=1 @@ -9749,12 +10041,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.477: # %middle.block2342 beq $a1, $a2, .LBB8_480 .LBB8_478: # %.lr.ph.i992.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_479: # %.lr.ph.i992 # =>This Inner Loop Header: Depth=1 @@ -9843,12 +10141,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.485: # %middle.block2358 beq $a1, $a2, .LBB8_488 .LBB8_486: # %.lr.ph.i1000.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_487: # %.lr.ph.i1000 # =>This Inner Loop Header: Depth=1 @@ -9937,12 +10241,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.493: # %middle.block2374 beq $a1, $a2, .LBB8_496 .LBB8_494: # %.lr.ph.i1008.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_495: # %.lr.ph.i1008 # =>This Inner Loop Header: Depth=1 @@ -10031,12 +10341,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.501: # %middle.block2390 beq $a0, $a1, .LBB8_504 .LBB8_502: # %.lr.ph.i1016.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_503: # %.lr.ph.i1016 # =>This Inner Loop Header: Depth=1 @@ -10142,12 +10458,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.512: # %middle.block2406 beq $a0, $a1, .LBB8_515 .LBB8_513: # %.lr.ph.i892.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_514: # %.lr.ph.i892 # =>This Inner Loop Header: Depth=1 @@ -10236,12 +10558,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.520: # %middle.block2422 beq $a0, $a1, .LBB8_523 .LBB8_521: # %.lr.ph.i900.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_522: # %.lr.ph.i900 # =>This Inner Loop Header: Depth=1 @@ -10330,12 +10658,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.528: # %middle.block2438 beq $a0, $a1, .LBB8_531 .LBB8_529: # %.lr.ph.i908.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_530: # %.lr.ph.i908 # =>This Inner Loop Header: Depth=1 @@ -10424,12 +10758,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.536: # %middle.block2454 beq $a0, $a1, .LBB8_539 .LBB8_537: # %.lr.ph.i916.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_538: # %.lr.ph.i916 # =>This Inner Loop Header: Depth=1 @@ -10518,12 +10858,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.544: # %middle.block2470 beq $a0, $a1, .LBB8_547 .LBB8_545: # %.lr.ph.i924.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_546: # %.lr.ph.i924 # =>This Inner Loop Header: Depth=1 @@ -10630,12 +10976,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.556: # %middle.block2486 beq $a0, $a1, .LBB8_1187 .LBB8_557: # %.lr.ph.i936.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_558: # %.lr.ph.i936 # =>This Inner Loop Header: Depth=1 @@ -10710,12 +11062,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.561: # %middle.block2902 beq $a1, $a2, .LBB8_564 .LBB8_562: # %.lr.ph.i676.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_563: # %.lr.ph.i676 # =>This Inner Loop Header: Depth=1 @@ -10804,12 +11162,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.569: # %middle.block2918 beq $a0, $a1, .LBB8_1187 .LBB8_570: # %.lr.ph.i684.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_571: # %.lr.ph.i684 # =>This Inner Loop Header: Depth=1 @@ -10875,13 +11239,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.574: # %middle.block3319 beq $a2, $a3, .LBB8_577 .LBB8_575: # %.lr.ph.i432.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_576: # %.lr.ph.i432 # =>This Inner Loop Header: Depth=1 @@ -10965,13 +11335,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.582: # %middle.block3336 beq $a2, $a3, .LBB8_585 .LBB8_583: # %.lr.ph.i441.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_584: # %.lr.ph.i441 # =>This Inner Loop Header: Depth=1 @@ -11055,13 +11431,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.590: # %middle.block3353 beq $a2, $a3, .LBB8_593 .LBB8_591: # %.lr.ph.i452.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_592: # %.lr.ph.i452 # =>This Inner Loop Header: Depth=1 @@ -11145,13 +11527,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.598: # %middle.block3370 beq $a2, $a3, .LBB8_601 .LBB8_599: # %.lr.ph.i463.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_600: # %.lr.ph.i463 # =>This Inner Loop Header: Depth=1 @@ -11235,13 +11623,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.606: # %middle.block3387 beq $a0, $a2, .LBB8_1187 .LBB8_607: # %.lr.ph.i474.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a3, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a0 alsl.d $a1, $a0, $a1, 4 addi.d $a1, $a1, 8 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa3, $a3 .p2align 4, , 16 .LBB8_608: # %.lr.ph.i474 # =>This Inner Loop Header: Depth=1 @@ -11319,12 +11713,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.611: # %middle.block1622 beq $a1, $a2, .LBB8_614 .LBB8_612: # %.lr.ph.i1294.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_613: # %.lr.ph.i1294 # =>This Inner Loop Header: Depth=1 @@ -11413,12 +11813,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.619: # %middle.block1638 beq $a1, $a2, .LBB8_622 .LBB8_620: # %.lr.ph.i1302.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_621: # %.lr.ph.i1302 # =>This Inner Loop Header: Depth=1 @@ -11507,12 +11913,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.627: # %middle.block1654 beq $a1, $a2, .LBB8_630 .LBB8_628: # %.lr.ph.i1310.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_629: # %.lr.ph.i1310 # =>This Inner Loop Header: Depth=1 @@ -11601,12 +12013,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.635: # %middle.block1670 beq $a1, $a2, .LBB8_638 .LBB8_636: # %.lr.ph.i1318.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_637: # %.lr.ph.i1318 # =>This Inner Loop Header: Depth=1 @@ -11695,12 +12113,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.643: # %middle.block1686 beq $a1, $a2, .LBB8_646 .LBB8_644: # %.lr.ph.i1326.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_645: # %.lr.ph.i1326 # =>This Inner Loop Header: Depth=1 @@ -11789,12 +12213,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.651: # %middle.block1702 beq $a0, $a1, .LBB8_1187 .LBB8_652: # %.lr.ph.i1334.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_653: # %.lr.ph.i1334 # =>This Inner Loop Header: Depth=1 @@ -11869,12 +12299,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.656: # %middle.block2822 beq $a1, $a2, .LBB8_659 .LBB8_657: # %.lr.ph.i708.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_658: # %.lr.ph.i708 # =>This Inner Loop Header: Depth=1 @@ -11963,12 +12399,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.664: # %middle.block2838 beq $a1, $a2, .LBB8_667 .LBB8_665: # %.lr.ph.i716.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_666: # %.lr.ph.i716 # =>This Inner Loop Header: Depth=1 @@ -12057,12 +12499,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.672: # %middle.block2854 beq $a0, $a1, .LBB8_1187 .LBB8_673: # %.lr.ph.i724.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_674: # %.lr.ph.i724 # =>This Inner Loop Header: Depth=1 @@ -12137,12 +12585,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.677: # %middle.block2598 beq $a1, $a2, .LBB8_680 .LBB8_678: # %.lr.ph.i828.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_679: # %.lr.ph.i828 # =>This Inner Loop Header: Depth=1 @@ -12231,12 +12685,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.685: # %middle.block2614 beq $a0, $a1, .LBB8_1187 .LBB8_686: # %.lr.ph.i836.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_687: # %.lr.ph.i836 # =>This Inner Loop Header: Depth=1 @@ -12311,12 +12771,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.690: # %middle.block3483 beq $a1, $a2, .LBB8_693 .LBB8_691: # %.lr.ph.i360.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_692: # %.lr.ph.i360 # =>This Inner Loop Header: Depth=1 @@ -12405,12 +12871,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.698: # %middle.block3499 beq $a1, $a2, .LBB8_701 .LBB8_699: # %.lr.ph.i368.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_700: # %.lr.ph.i368 # =>This Inner Loop Header: Depth=1 @@ -12499,12 +12971,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.706: # %middle.block3515 beq $a1, $a2, .LBB8_709 .LBB8_707: # %.lr.ph.i376.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_708: # %.lr.ph.i376 # =>This Inner Loop Header: Depth=1 @@ -12593,12 +13071,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.714: # %middle.block3531 beq $a0, $a1, .LBB8_1187 .LBB8_715: # %.lr.ph.i384.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_716: # %.lr.ph.i384 # =>This Inner Loop Header: Depth=1 @@ -12673,12 +13157,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.719: # %middle.block2566 beq $a1, $a2, .LBB8_722 .LBB8_720: # %.lr.ph.i844.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_721: # %.lr.ph.i844 # =>This Inner Loop Header: Depth=1 @@ -12767,12 +13257,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.727: # %middle.block2582 beq $a0, $a1, .LBB8_1187 .LBB8_728: # %.lr.ph.i852.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_729: # %.lr.ph.i852 # =>This Inner Loop Header: Depth=1 @@ -12847,12 +13343,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.732: # %middle.block2966 beq $a1, $a2, .LBB8_735 .LBB8_733: # %.lr.ph.i628.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_734: # %.lr.ph.i628 # =>This Inner Loop Header: Depth=1 @@ -12941,12 +13443,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.740: # %middle.block2982 beq $a1, $a2, .LBB8_743 .LBB8_741: # %.lr.ph.i636.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_742: # %.lr.ph.i636 # =>This Inner Loop Header: Depth=1 @@ -13035,12 +13543,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.748: # %middle.block2998 beq $a1, $a2, .LBB8_751 .LBB8_749: # %.lr.ph.i644.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_750: # %.lr.ph.i644 # =>This Inner Loop Header: Depth=1 @@ -13129,12 +13643,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.756: # %middle.block3014 beq $a0, $a1, .LBB8_1187 .LBB8_757: # %.lr.ph.i652.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_758: # %.lr.ph.i652 # =>This Inner Loop Header: Depth=1 @@ -13209,12 +13729,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.761: # %middle.block3403 beq $a1, $a2, .LBB8_764 .LBB8_762: # %.lr.ph.i392.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_763: # %.lr.ph.i392 # =>This Inner Loop Header: Depth=1 @@ -13303,12 +13829,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.769: # %middle.block3419 beq $a1, $a2, .LBB8_772 .LBB8_770: # %.lr.ph.i400.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_771: # %.lr.ph.i400 # =>This Inner Loop Header: Depth=1 @@ -13397,12 +13929,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.777: # %middle.block3435 beq $a1, $a2, .LBB8_780 .LBB8_778: # %.lr.ph.i408.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_779: # %.lr.ph.i408 # =>This Inner Loop Header: Depth=1 @@ -13491,12 +14029,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.785: # %middle.block3451 beq $a1, $a2, .LBB8_788 .LBB8_786: # %.lr.ph.i416.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_787: # %.lr.ph.i416 # =>This Inner Loop Header: Depth=1 @@ -13585,12 +14129,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.793: # %middle.block3467 beq $a0, $a1, .LBB8_1187 .LBB8_794: # %.lr.ph.i424.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_795: # %.lr.ph.i424 # =>This Inner Loop Header: Depth=1 @@ -13665,12 +14215,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.798: # %middle.block2742 beq $a1, $a2, .LBB8_801 .LBB8_799: # %.lr.ph.i732.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_800: # %.lr.ph.i732 # =>This Inner Loop Header: Depth=1 @@ -13759,12 +14315,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.806: # %middle.block2758 beq $a1, $a2, .LBB8_809 .LBB8_807: # %.lr.ph.i740.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_808: # %.lr.ph.i740 # =>This Inner Loop Header: Depth=1 @@ -13853,12 +14415,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.814: # %middle.block2774 beq $a1, $a2, .LBB8_817 .LBB8_815: # %.lr.ph.i748.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_816: # %.lr.ph.i748 # =>This Inner Loop Header: Depth=1 @@ -13947,12 +14515,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.822: # %middle.block2790 beq $a1, $a2, .LBB8_825 .LBB8_823: # %.lr.ph.i756.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_824: # %.lr.ph.i756 # =>This Inner Loop Header: Depth=1 @@ -14041,12 +14615,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.830: # %middle.block2806 beq $a0, $a1, .LBB8_1187 .LBB8_831: # %.lr.ph.i764.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_832: # %.lr.ph.i764 # =>This Inner Loop Header: Depth=1 @@ -14121,12 +14701,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.835: # %middle.block3126 beq $a1, $a2, .LBB8_838 .LBB8_836: # %.lr.ph.i539.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_837: # %.lr.ph.i539 # =>This Inner Loop Header: Depth=1 @@ -14215,12 +14801,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.843: # %middle.block3142 beq $a1, $a2, .LBB8_846 .LBB8_844: # %.lr.ph.i547.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_845: # %.lr.ph.i547 # =>This Inner Loop Header: Depth=1 @@ -14309,12 +14901,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.851: # %middle.block3158 beq $a1, $a2, .LBB8_854 .LBB8_852: # %.lr.ph.i555.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_853: # %.lr.ph.i555 # =>This Inner Loop Header: Depth=1 @@ -14403,12 +15001,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.859: # %middle.block3174 beq $a1, $a2, .LBB8_862 .LBB8_860: # %.lr.ph.i563.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_861: # %.lr.ph.i563 # =>This Inner Loop Header: Depth=1 @@ -14497,12 +15101,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.867: # %middle.block3190 beq $a0, $a1, .LBB8_1187 .LBB8_868: # %.lr.ph.i571.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_869: # %.lr.ph.i571 # =>This Inner Loop Header: Depth=1 @@ -14577,12 +15187,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.872: # %middle.block3286 beq $a1, $a2, .LBB8_875 .LBB8_873: # %.lr.ph.i483.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_874: # %.lr.ph.i483 # =>This Inner Loop Header: Depth=1 @@ -14671,12 +15287,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.880: # %middle.block3302 beq $a0, $a1, .LBB8_1187 .LBB8_881: # %.lr.ph.i491.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_882: # %.lr.ph.i491 # =>This Inner Loop Header: Depth=1 @@ -14751,12 +15373,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.885: # %middle.block1718 beq $a1, $a2, .LBB8_888 .LBB8_886: # %.lr.ph.i1254.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_887: # %.lr.ph.i1254 # =>This Inner Loop Header: Depth=1 @@ -14845,12 +15473,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.893: # %middle.block1734 beq $a1, $a2, .LBB8_896 .LBB8_894: # %.lr.ph.i1262.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_895: # %.lr.ph.i1262 # =>This Inner Loop Header: Depth=1 @@ -14939,12 +15573,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.901: # %middle.block1750 beq $a1, $a2, .LBB8_904 .LBB8_902: # %.lr.ph.i1270.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_903: # %.lr.ph.i1270 # =>This Inner Loop Header: Depth=1 @@ -15033,12 +15673,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.909: # %middle.block1766 beq $a1, $a2, .LBB8_912 .LBB8_910: # %.lr.ph.i1278.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_911: # %.lr.ph.i1278 # =>This Inner Loop Header: Depth=1 @@ -15127,12 +15773,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.917: # %middle.block1782 beq $a0, $a1, .LBB8_1187 .LBB8_918: # %.lr.ph.i1286.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_919: # %.lr.ph.i1286 # =>This Inner Loop Header: Depth=1 @@ -15153,41 +15805,39 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat bstrpick.d $a0, $a1, 30, 2 slli.d $a0, $a0, 2 vreplvei.d $vr1, $vr0, 0 - addi.d $a3, $a2, 16 - ori $a4, $zero, 0 - lu32i.d $a4, 1 - vreplgr2vr.d $vr2, $a4 - lu12i.w $a4, -419431 - ori $a4, $a4, 2458 - lu32i.d $a4, 104857 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr3, $a4 - lu12i.w $a4, -307024 - ori $a4, $a4, 3880 - lu32i.d $a4, 129446 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr4, $a4 - move $a4, $a0 + addi.d $a5, $a3, 16 + ori $a6, $zero, 0 + lu32i.d $a6, 1 + vreplgr2vr.d $vr2, $a6 + ori $a6, $a4, 2458 + lu32i.d $a6, 104857 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr3, $a6 + ori $a6, $a2, 3880 + lu32i.d $a6, 129446 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr4, $a6 + move $a6, $a0 .p2align 4, , 16 .LBB8_921: # %vector.body # =>This Inner Loop Header: Depth=1 vaddi.wu $vr5, $vr2, 2 - vpickve2gr.w $a5, $vr2, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr2, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr2, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa7, $a5 + vpickve2gr.w $a7, $vr2, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa7, $a7 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr6, 16 - vpickve2gr.w $a5, $vr5, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr5, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr5, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa5, $a5 + vpickve2gr.w $a7, $vr5, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa5, $a7 ffint.d.l $fa5, $fa5 vextrins.d $vr5, $vr6, 16 vfadd.d $vr6, $vr7, $vr3 @@ -15198,34 +15848,38 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat vfadd.d $vr5, $vr5, $vr4 vfdiv.d $vr6, $vr6, $vr7 vfdiv.d $vr5, $vr8, $vr5 - vst $vr6, $a3, -16 - vst $vr5, $a3, 0 + vst $vr6, $a5, -16 + vst $vr5, $a5, 0 vaddi.wu $vr2, $vr2, 4 - addi.d $a4, $a4, -4 - addi.d $a3, $a3, 32 - bnez $a4, .LBB8_921 + addi.d $a6, $a6, -4 + addi.d $a5, $a5, 32 + bnez $a6, .LBB8_921 # %bb.922: # %middle.block beq $a0, $a1, .LBB8_1187 .LBB8_923: # %.lr.ph.i1342.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $a2, 3 + alsl.d $a3, $a0, $a3, 3 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + ori $a2, $a2, 3880 + lu32i.d $a2, 129446 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa2, $a2 .p2align 4, , 16 .LBB8_924: # %.lr.ph.i1342 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a0, 31, 0 - movgr2fr.d $fa3, $a3 + bstrpick.d $a2, $a0, 31, 0 + movgr2fr.d $fa3, $a2 ffint.d.l $fa3, $fa3 fadd.d $fa4, $fa3, $fa1 fmul.d $fa4, $fa0, $fa4 fadd.d $fa3, $fa3, $fa2 fdiv.d $fa3, $fa4, $fa3 - fst.d $fa3, $a2, 0 + fst.d $fa3, $a3, 0 addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + addi.d $a3, $a3, 8 addi.w $a0, $a0, 1 bnez $a1, .LBB8_924 b .LBB8_1187 @@ -15287,12 +15941,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.927: # %middle.block2534 beq $a1, $a2, .LBB8_930 .LBB8_928: # %.lr.ph.i860.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_929: # %.lr.ph.i860 # =>This Inner Loop Header: Depth=1 @@ -15381,12 +16041,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.935: # %middle.block2550 beq $a0, $a1, .LBB8_1187 .LBB8_936: # %.lr.ph.i868.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_937: # %.lr.ph.i868 # =>This Inner Loop Header: Depth=1 @@ -15461,12 +16127,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.940: # %middle.block2630 beq $a1, $a2, .LBB8_943 .LBB8_941: # %.lr.ph.i772.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_942: # %.lr.ph.i772 # =>This Inner Loop Header: Depth=1 @@ -15555,12 +16227,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.948: # %middle.block2646 beq $a1, $a2, .LBB8_951 .LBB8_949: # %.lr.ph.i780.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_950: # %.lr.ph.i780 # =>This Inner Loop Header: Depth=1 @@ -15649,12 +16327,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.956: # %middle.block2662 beq $a1, $a2, .LBB8_959 .LBB8_957: # %.lr.ph.i788.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_958: # %.lr.ph.i788 # =>This Inner Loop Header: Depth=1 @@ -15743,12 +16427,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.964: # %middle.block2678 beq $a1, $a2, .LBB8_967 .LBB8_965: # %.lr.ph.i796.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_966: # %.lr.ph.i796 # =>This Inner Loop Header: Depth=1 @@ -15837,12 +16527,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.972: # %middle.block2694 beq $a1, $a2, .LBB8_975 .LBB8_973: # %.lr.ph.i804.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_974: # %.lr.ph.i804 # =>This Inner Loop Header: Depth=1 @@ -15931,12 +16627,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.980: # %middle.block2710 beq $a1, $a2, .LBB8_983 .LBB8_981: # %.lr.ph.i812.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_982: # %.lr.ph.i812 # =>This Inner Loop Header: Depth=1 @@ -16025,12 +16727,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.988: # %middle.block2726 beq $a0, $a1, .LBB8_1187 .LBB8_989: # %.lr.ph.i820.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_990: # %.lr.ph.i820 # =>This Inner Loop Header: Depth=1 @@ -16105,12 +16813,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.993: # %middle.block2502 beq $a1, $a2, .LBB8_996 .LBB8_994: # %.lr.ph.i876.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_995: # %.lr.ph.i876 # =>This Inner Loop Header: Depth=1 @@ -16199,12 +16913,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1001: # %middle.block2518 beq $a0, $a1, .LBB8_1187 .LBB8_1002: # %.lr.ph.i884.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1003: # %.lr.ph.i884 # =>This Inner Loop Header: Depth=1 @@ -16279,12 +16999,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1006: # %middle.block1846 beq $a1, $a2, .LBB8_1009 .LBB8_1007: # %.lr.ph.i1150.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1008: # %.lr.ph.i1150 # =>This Inner Loop Header: Depth=1 @@ -16373,12 +17099,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1014: # %middle.block1862 beq $a1, $a2, .LBB8_1017 .LBB8_1015: # %.lr.ph.i1158.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1016: # %.lr.ph.i1158 # =>This Inner Loop Header: Depth=1 @@ -16467,12 +17199,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1022: # %middle.block1878 beq $a1, $a2, .LBB8_1025 .LBB8_1023: # %.lr.ph.i1166.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1024: # %.lr.ph.i1166 # =>This Inner Loop Header: Depth=1 @@ -16561,12 +17299,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1030: # %middle.block1894 beq $a1, $a2, .LBB8_1033 .LBB8_1031: # %.lr.ph.i1174.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1032: # %.lr.ph.i1174 # =>This Inner Loop Header: Depth=1 @@ -16655,12 +17399,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1038: # %middle.block1910 beq $a1, $a2, .LBB8_1041 .LBB8_1039: # %.lr.ph.i1182.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1040: # %.lr.ph.i1182 # =>This Inner Loop Header: Depth=1 @@ -16749,12 +17499,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1046: # %middle.block1926 beq $a1, $a2, .LBB8_1049 .LBB8_1047: # %.lr.ph.i1190.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1048: # %.lr.ph.i1190 # =>This Inner Loop Header: Depth=1 @@ -16843,12 +17599,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1054: # %middle.block1942 beq $a1, $a2, .LBB8_1057 .LBB8_1055: # %.lr.ph.i1198.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1056: # %.lr.ph.i1198 # =>This Inner Loop Header: Depth=1 @@ -16937,12 +17699,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1062: # %middle.block1958 beq $a1, $a2, .LBB8_1065 .LBB8_1063: # %.lr.ph.i1206.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1064: # %.lr.ph.i1206 # =>This Inner Loop Header: Depth=1 @@ -17031,12 +17799,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1070: # %middle.block1974 beq $a1, $a2, .LBB8_1073 .LBB8_1071: # %.lr.ph.i1214.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1072: # %.lr.ph.i1214 # =>This Inner Loop Header: Depth=1 @@ -17125,12 +17899,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1078: # %middle.block1990 beq $a0, $a1, .LBB8_1187 .LBB8_1079: # %.lr.ph.i1222.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1080: # %.lr.ph.i1222 # =>This Inner Loop Header: Depth=1 @@ -17205,12 +17985,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1083: # %middle.block1798 beq $a1, $a2, .LBB8_1086 .LBB8_1084: # %.lr.ph.i1230.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1085: # %.lr.ph.i1230 # =>This Inner Loop Header: Depth=1 @@ -17299,12 +18085,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1091: # %middle.block1814 beq $a1, $a2, .LBB8_1094 .LBB8_1092: # %.lr.ph.i1238.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1093: # %.lr.ph.i1238 # =>This Inner Loop Header: Depth=1 @@ -17393,12 +18185,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1099: # %middle.block1830 beq $a0, $a1, .LBB8_1187 .LBB8_1100: # %.lr.ph.i1246.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1101: # %.lr.ph.i1246 # =>This Inner Loop Header: Depth=1 @@ -17473,12 +18271,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1104: # %middle.block2070 beq $a1, $a2, .LBB8_1107 .LBB8_1105: # %.lr.ph.i1030.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1106: # %.lr.ph.i1030 # =>This Inner Loop Header: Depth=1 @@ -17567,12 +18371,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1112: # %middle.block2086 beq $a1, $a2, .LBB8_1115 .LBB8_1113: # %.lr.ph.i1038.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1114: # %.lr.ph.i1038 # =>This Inner Loop Header: Depth=1 @@ -17661,12 +18471,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1120: # %middle.block2102 beq $a1, $a2, .LBB8_1123 .LBB8_1121: # %.lr.ph.i1046.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1122: # %.lr.ph.i1046 # =>This Inner Loop Header: Depth=1 @@ -17755,12 +18571,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1128: # %middle.block2118 beq $a1, $a2, .LBB8_1131 .LBB8_1129: # %.lr.ph.i1054.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1130: # %.lr.ph.i1054 # =>This Inner Loop Header: Depth=1 @@ -17849,12 +18671,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1136: # %middle.block2134 beq $a1, $a2, .LBB8_1139 .LBB8_1137: # %.lr.ph.i1062.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1138: # %.lr.ph.i1062 # =>This Inner Loop Header: Depth=1 @@ -17943,12 +18771,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1144: # %middle.block2150 beq $a1, $a2, .LBB8_1147 .LBB8_1145: # %.lr.ph.i1070.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1146: # %.lr.ph.i1070 # =>This Inner Loop Header: Depth=1 @@ -18037,12 +18871,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1152: # %middle.block2166 beq $a1, $a2, .LBB8_1155 .LBB8_1153: # %.lr.ph.i1078.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1154: # %.lr.ph.i1078 # =>This Inner Loop Header: Depth=1 @@ -18131,12 +18971,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1160: # %middle.block2182 beq $a1, $a2, .LBB8_1163 .LBB8_1161: # %.lr.ph.i1086.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1162: # %.lr.ph.i1086 # =>This Inner Loop Header: Depth=1 @@ -18225,12 +19071,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1168: # %middle.block2198 beq $a1, $a2, .LBB8_1171 .LBB8_1169: # %.lr.ph.i1094.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1170: # %.lr.ph.i1094 # =>This Inner Loop Header: Depth=1 @@ -18319,12 +19171,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1176: # %middle.block2214 beq $a1, $a2, .LBB8_1179 .LBB8_1177: # %.lr.ph.i1102.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1178: # %.lr.ph.i1102 # =>This Inner Loop Header: Depth=1 @@ -18413,12 +19271,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1184: # %middle.block2230 beq $a0, $a1, .LBB8_1187 .LBB8_1185: # %.lr.ph.i1110.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1186: # %.lr.ph.i1110 # =>This Inner Loop Header: Depth=1 @@ -18489,15 +19353,9 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat .LCPI9_0: .dword 0x3fb999999999999a # double 0.10000000000000001 .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI9_3: +.LCPI9_1: .dword 0x3fc999999999999a # double 0.20000000000000001 .dword 0x3fd3333333333333 # double 0.29999999999999999 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI9_1: - .dword 0x3ff199999999999a # double 1.1000000000000001 -.LCPI9_2: - .dword 0x3ff1f9a6b50b0f28 # double 1.1234500000000001 .text .globl _Z8loopInitj .p2align 5 @@ -18743,8 +19601,8 @@ _Z8loopInitj: # @_Z8loopInitj .LBB9_40: pcalau12i $a0, %pc_hi20(.LCPI9_0) addi.d $a0, $a0, %pc_lo12(.LCPI9_0) - pcalau12i $a1, %pc_hi20(.LCPI9_3) - addi.d $a1, $a1, %pc_lo12(.LCPI9_3) + pcalau12i $a1, %pc_hi20(.LCPI9_1) + addi.d $a1, $a1, %pc_lo12(.LCPI9_1) ld.w $a3, $s0, 1032 blez $a3, .LBB9_577 # %bb.41: # %.lr.ph.preheader.i429 @@ -18958,8 +19816,10 @@ _Z8loopInitj: # @_Z8loopInitj pcalau12i $a2, %pc_hi20(.LCPI9_0) addi.d $a2, $a2, %pc_lo12(.LCPI9_0) fldx.d $fa0, $a2, $a0 - ld.d $a2, $s0, 472 + ld.d $a3, $s0, 472 ori $a0, $zero, 4 + lu12i.w $a4, -419431 + lu12i.w $a2, -307024 bgeu $a1, $a0, .LBB9_920 # %bb.78: move $a0, $zero @@ -19148,12 +20008,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.102: # %middle.block3802 beq $a1, $a2, .LBB9_105 .LBB9_103: # %.lr.ph.i183.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_104: # %.lr.ph.i183 # =>This Inner Loop Header: Depth=1 @@ -19242,12 +20108,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.110: # %middle.block3818 beq $a1, $a2, .LBB9_113 .LBB9_111: # %.lr.ph.i191.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_112: # %.lr.ph.i191 # =>This Inner Loop Header: Depth=1 @@ -19336,12 +20208,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.118: # %middle.block3834 beq $a1, $a2, .LBB9_121 .LBB9_119: # %.lr.ph.i199.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_120: # %.lr.ph.i199 # =>This Inner Loop Header: Depth=1 @@ -19430,12 +20308,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.126: # %middle.block3850 beq $a1, $a2, .LBB9_129 .LBB9_127: # %.lr.ph.i207.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_128: # %.lr.ph.i207 # =>This Inner Loop Header: Depth=1 @@ -19524,12 +20408,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.134: # %middle.block3866 beq $a1, $a2, .LBB9_137 .LBB9_135: # %.lr.ph.i215.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_136: # %.lr.ph.i215 # =>This Inner Loop Header: Depth=1 @@ -19618,12 +20508,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.142: # %middle.block3882 beq $a0, $a1, .LBB9_1187 .LBB9_143: # %.lr.ph.i223.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_144: # %.lr.ph.i223 # =>This Inner Loop Header: Depth=1 @@ -19698,12 +20594,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.147: # %middle.block3546 beq $a1, $a2, .LBB9_150 .LBB9_148: # %.lr.ph.i231.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_149: # %.lr.ph.i231 # =>This Inner Loop Header: Depth=1 @@ -19792,12 +20694,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.155: # %middle.block3562 beq $a1, $a2, .LBB9_158 .LBB9_156: # %.lr.ph.i239.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_157: # %.lr.ph.i239 # =>This Inner Loop Header: Depth=1 @@ -19886,12 +20794,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.163: # %middle.block3578 beq $a1, $a2, .LBB9_166 .LBB9_164: # %.lr.ph.i247.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_165: # %.lr.ph.i247 # =>This Inner Loop Header: Depth=1 @@ -19980,12 +20894,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.171: # %middle.block3594 beq $a1, $a2, .LBB9_174 .LBB9_172: # %.lr.ph.i255.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_173: # %.lr.ph.i255 # =>This Inner Loop Header: Depth=1 @@ -20074,12 +20994,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.179: # %middle.block3610 beq $a1, $a2, .LBB9_182 .LBB9_180: # %.lr.ph.i263.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_181: # %.lr.ph.i263 # =>This Inner Loop Header: Depth=1 @@ -20168,12 +21094,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.187: # %middle.block3626 beq $a1, $a2, .LBB9_190 .LBB9_188: # %.lr.ph.i271.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_189: # %.lr.ph.i271 # =>This Inner Loop Header: Depth=1 @@ -20262,12 +21194,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.195: # %middle.block3642 beq $a1, $a2, .LBB9_198 .LBB9_196: # %.lr.ph.i279.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_197: # %.lr.ph.i279 # =>This Inner Loop Header: Depth=1 @@ -20356,12 +21294,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.203: # %middle.block3658 beq $a1, $a2, .LBB9_206 .LBB9_204: # %.lr.ph.i287.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_205: # %.lr.ph.i287 # =>This Inner Loop Header: Depth=1 @@ -20450,12 +21394,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.211: # %middle.block3674 beq $a1, $a2, .LBB9_214 .LBB9_212: # %.lr.ph.i295.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_213: # %.lr.ph.i295 # =>This Inner Loop Header: Depth=1 @@ -20544,12 +21494,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.219: # %middle.block3690 beq $a1, $a2, .LBB9_222 .LBB9_220: # %.lr.ph.i303.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_221: # %.lr.ph.i303 # =>This Inner Loop Header: Depth=1 @@ -20638,12 +21594,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.227: # %middle.block3706 beq $a1, $a2, .LBB9_230 .LBB9_228: # %.lr.ph.i311.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_229: # %.lr.ph.i311 # =>This Inner Loop Header: Depth=1 @@ -20732,12 +21694,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.235: # %middle.block3722 beq $a1, $a2, .LBB9_238 .LBB9_236: # %.lr.ph.i319.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_237: # %.lr.ph.i319 # =>This Inner Loop Header: Depth=1 @@ -20826,12 +21794,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.243: # %middle.block3738 beq $a1, $a2, .LBB9_246 .LBB9_244: # %.lr.ph.i327.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_245: # %.lr.ph.i327 # =>This Inner Loop Header: Depth=1 @@ -20920,12 +21894,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.251: # %middle.block3754 beq $a1, $a2, .LBB9_254 .LBB9_252: # %.lr.ph.i335.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_253: # %.lr.ph.i335 # =>This Inner Loop Header: Depth=1 @@ -21014,12 +21994,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.259: # %middle.block3770 beq $a1, $a2, .LBB9_262 .LBB9_260: # %.lr.ph.i343.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_261: # %.lr.ph.i343 # =>This Inner Loop Header: Depth=1 @@ -21108,12 +22094,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.267: # %middle.block3786 beq $a0, $a1, .LBB9_1187 .LBB9_268: # %.lr.ph.i351.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_269: # %.lr.ph.i351 # =>This Inner Loop Header: Depth=1 @@ -21210,12 +22202,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.274: # %middle.block2005 beq $a1, $a2, .LBB9_277 .LBB9_275: # %.lr.ph.i1117.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_276: # %.lr.ph.i1117 # =>This Inner Loop Header: Depth=1 @@ -21304,12 +22302,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.282: # %middle.block2021 beq $a1, $a2, .LBB9_285 .LBB9_283: # %.lr.ph.i1125.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_284: # %.lr.ph.i1125 # =>This Inner Loop Header: Depth=1 @@ -21398,12 +22402,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.290: # %middle.block2037 beq $a1, $a2, .LBB9_293 .LBB9_291: # %.lr.ph.i1133.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_292: # %.lr.ph.i1133 # =>This Inner Loop Header: Depth=1 @@ -21492,12 +22502,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.298: # %middle.block2053 beq $a0, $a1, .LBB9_1187 .LBB9_299: # %.lr.ph.i1141.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_300: # %.lr.ph.i1141 # =>This Inner Loop Header: Depth=1 @@ -21572,12 +22588,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.303: # %middle.block2869 beq $a1, $a2, .LBB9_306 .LBB9_304: # %.lr.ph.i691.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_305: # %.lr.ph.i691 # =>This Inner Loop Header: Depth=1 @@ -21666,12 +22688,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.311: # %middle.block2885 beq $a0, $a1, .LBB9_1187 .LBB9_312: # %.lr.ph.i699.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_313: # %.lr.ph.i699 # =>This Inner Loop Header: Depth=1 @@ -21746,12 +22774,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.316: # %middle.block2933 beq $a1, $a2, .LBB9_319 .LBB9_317: # %.lr.ph.i659.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_318: # %.lr.ph.i659 # =>This Inner Loop Header: Depth=1 @@ -21840,12 +22874,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.324: # %middle.block2949 beq $a0, $a1, .LBB9_1187 .LBB9_325: # %.lr.ph.i667.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_326: # %.lr.ph.i667 # =>This Inner Loop Header: Depth=1 @@ -21920,12 +22960,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.329: # %middle.block3045 beq $a1, $a2, .LBB9_332 .LBB9_330: # %.lr.ph.i578.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_331: # %.lr.ph.i578 # =>This Inner Loop Header: Depth=1 @@ -22014,12 +23060,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.337: # %middle.block3061 beq $a1, $a2, .LBB9_340 .LBB9_338: # %.lr.ph.i586.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_339: # %.lr.ph.i586 # =>This Inner Loop Header: Depth=1 @@ -22108,12 +23160,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.345: # %middle.block3077 beq $a1, $a2, .LBB9_348 .LBB9_346: # %.lr.ph.i594.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_347: # %.lr.ph.i594 # =>This Inner Loop Header: Depth=1 @@ -22202,12 +23260,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.353: # %middle.block3093 beq $a1, $a2, .LBB9_356 .LBB9_354: # %.lr.ph.i602.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_355: # %.lr.ph.i602 # =>This Inner Loop Header: Depth=1 @@ -22296,12 +23360,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.361: # %middle.block3109 beq $a0, $a1, .LBB9_1187 .LBB9_362: # %.lr.ph.i610.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_363: # %.lr.ph.i610 # =>This Inner Loop Header: Depth=1 @@ -22376,12 +23446,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.366: # %middle.block3029 beq $a0, $a1, .LBB9_1187 .LBB9_367: # %.lr.ph.i619.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_368: # %.lr.ph.i619 # =>This Inner Loop Header: Depth=1 @@ -22456,12 +23532,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.371: # %middle.block3898 beq $a1, $a2, .LBB9_374 .LBB9_372: # %.lr.ph.i.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_373: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 @@ -22550,12 +23632,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.379: # %middle.block3914 beq $a1, $a2, .LBB9_382 .LBB9_380: # %.lr.ph.i167.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_381: # %.lr.ph.i167 # =>This Inner Loop Header: Depth=1 @@ -22644,12 +23732,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.387: # %middle.block3930 beq $a0, $a1, .LBB9_1187 .LBB9_388: # %.lr.ph.i175.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_389: # %.lr.ph.i175 # =>This Inner Loop Header: Depth=1 @@ -22724,12 +23818,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.392: # %middle.block3205 beq $a1, $a2, .LBB9_395 .LBB9_393: # %.lr.ph.i498.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_394: # %.lr.ph.i498 # =>This Inner Loop Header: Depth=1 @@ -22818,12 +23918,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.400: # %middle.block3221 beq $a1, $a2, .LBB9_403 .LBB9_401: # %.lr.ph.i506.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_402: # %.lr.ph.i506 # =>This Inner Loop Header: Depth=1 @@ -22912,12 +24018,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.408: # %middle.block3237 beq $a1, $a2, .LBB9_411 .LBB9_409: # %.lr.ph.i514.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_410: # %.lr.ph.i514 # =>This Inner Loop Header: Depth=1 @@ -23006,12 +24118,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.416: # %middle.block3253 beq $a1, $a2, .LBB9_419 .LBB9_417: # %.lr.ph.i522.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_418: # %.lr.ph.i522 # =>This Inner Loop Header: Depth=1 @@ -23100,12 +24218,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.424: # %middle.block3269 beq $a0, $a1, .LBB9_1187 .LBB9_425: # %.lr.ph.i530.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_426: # %.lr.ph.i530 # =>This Inner Loop Header: Depth=1 @@ -23180,12 +24304,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.429: # %middle.block2245 beq $a1, $a2, .LBB9_432 .LBB9_430: # %.lr.ph.i943.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_431: # %.lr.ph.i943 # =>This Inner Loop Header: Depth=1 @@ -23274,12 +24404,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.437: # %middle.block2261 beq $a1, $a2, .LBB9_440 .LBB9_438: # %.lr.ph.i951.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_439: # %.lr.ph.i951 # =>This Inner Loop Header: Depth=1 @@ -23368,12 +24504,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.445: # %middle.block2277 beq $a1, $a2, .LBB9_448 .LBB9_446: # %.lr.ph.i959.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_447: # %.lr.ph.i959 # =>This Inner Loop Header: Depth=1 @@ -23462,12 +24604,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.453: # %middle.block2293 beq $a1, $a2, .LBB9_456 .LBB9_454: # %.lr.ph.i967.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_455: # %.lr.ph.i967 # =>This Inner Loop Header: Depth=1 @@ -23556,12 +24704,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.461: # %middle.block2309 beq $a1, $a2, .LBB9_464 .LBB9_462: # %.lr.ph.i975.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_463: # %.lr.ph.i975 # =>This Inner Loop Header: Depth=1 @@ -23650,12 +24804,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.469: # %middle.block2325 beq $a1, $a2, .LBB9_472 .LBB9_470: # %.lr.ph.i983.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_471: # %.lr.ph.i983 # =>This Inner Loop Header: Depth=1 @@ -23744,12 +24904,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.477: # %middle.block2341 beq $a1, $a2, .LBB9_480 .LBB9_478: # %.lr.ph.i991.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_479: # %.lr.ph.i991 # =>This Inner Loop Header: Depth=1 @@ -23838,12 +25004,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.485: # %middle.block2357 beq $a1, $a2, .LBB9_488 .LBB9_486: # %.lr.ph.i999.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_487: # %.lr.ph.i999 # =>This Inner Loop Header: Depth=1 @@ -23932,12 +25104,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.493: # %middle.block2373 beq $a1, $a2, .LBB9_496 .LBB9_494: # %.lr.ph.i1007.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_495: # %.lr.ph.i1007 # =>This Inner Loop Header: Depth=1 @@ -24026,12 +25204,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.501: # %middle.block2389 beq $a0, $a1, .LBB9_504 .LBB9_502: # %.lr.ph.i1015.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_503: # %.lr.ph.i1015 # =>This Inner Loop Header: Depth=1 @@ -24136,12 +25320,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.512: # %middle.block2405 beq $a0, $a1, .LBB9_515 .LBB9_513: # %.lr.ph.i891.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_514: # %.lr.ph.i891 # =>This Inner Loop Header: Depth=1 @@ -24230,12 +25420,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.520: # %middle.block2421 beq $a0, $a1, .LBB9_523 .LBB9_521: # %.lr.ph.i899.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_522: # %.lr.ph.i899 # =>This Inner Loop Header: Depth=1 @@ -24324,12 +25520,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.528: # %middle.block2437 beq $a0, $a1, .LBB9_531 .LBB9_529: # %.lr.ph.i907.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_530: # %.lr.ph.i907 # =>This Inner Loop Header: Depth=1 @@ -24418,12 +25620,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.536: # %middle.block2453 beq $a0, $a1, .LBB9_539 .LBB9_537: # %.lr.ph.i915.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_538: # %.lr.ph.i915 # =>This Inner Loop Header: Depth=1 @@ -24512,12 +25720,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.544: # %middle.block2469 beq $a0, $a1, .LBB9_547 .LBB9_545: # %.lr.ph.i923.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_546: # %.lr.ph.i923 # =>This Inner Loop Header: Depth=1 @@ -24624,12 +25838,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.556: # %middle.block2485 beq $a0, $a1, .LBB9_1187 .LBB9_557: # %.lr.ph.i935.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_558: # %.lr.ph.i935 # =>This Inner Loop Header: Depth=1 @@ -24704,12 +25924,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.561: # %middle.block2901 beq $a1, $a2, .LBB9_564 .LBB9_562: # %.lr.ph.i675.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_563: # %.lr.ph.i675 # =>This Inner Loop Header: Depth=1 @@ -24798,12 +26024,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.569: # %middle.block2917 beq $a0, $a1, .LBB9_1187 .LBB9_570: # %.lr.ph.i683.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_571: # %.lr.ph.i683 # =>This Inner Loop Header: Depth=1 @@ -24869,13 +26101,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.574: # %middle.block3318 beq $a2, $a3, .LBB9_577 .LBB9_575: # %.lr.ph.i431.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_576: # %.lr.ph.i431 # =>This Inner Loop Header: Depth=1 @@ -24959,13 +26197,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.582: # %middle.block3335 beq $a2, $a3, .LBB9_585 .LBB9_583: # %.lr.ph.i440.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_584: # %.lr.ph.i440 # =>This Inner Loop Header: Depth=1 @@ -25049,13 +26293,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.590: # %middle.block3352 beq $a2, $a3, .LBB9_593 .LBB9_591: # %.lr.ph.i451.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_592: # %.lr.ph.i451 # =>This Inner Loop Header: Depth=1 @@ -25139,13 +26389,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.598: # %middle.block3369 beq $a2, $a3, .LBB9_601 .LBB9_599: # %.lr.ph.i462.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_600: # %.lr.ph.i462 # =>This Inner Loop Header: Depth=1 @@ -25229,13 +26485,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.606: # %middle.block3386 beq $a0, $a2, .LBB9_1187 .LBB9_607: # %.lr.ph.i473.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a3, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a0 alsl.d $a1, $a0, $a1, 4 addi.d $a1, $a1, 8 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa3, $a3 .p2align 4, , 16 .LBB9_608: # %.lr.ph.i473 # =>This Inner Loop Header: Depth=1 @@ -25313,12 +26575,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.611: # %middle.block1621 beq $a1, $a2, .LBB9_614 .LBB9_612: # %.lr.ph.i1293.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_613: # %.lr.ph.i1293 # =>This Inner Loop Header: Depth=1 @@ -25407,12 +26675,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.619: # %middle.block1637 beq $a1, $a2, .LBB9_622 .LBB9_620: # %.lr.ph.i1301.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_621: # %.lr.ph.i1301 # =>This Inner Loop Header: Depth=1 @@ -25501,12 +26775,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.627: # %middle.block1653 beq $a1, $a2, .LBB9_630 .LBB9_628: # %.lr.ph.i1309.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_629: # %.lr.ph.i1309 # =>This Inner Loop Header: Depth=1 @@ -25595,12 +26875,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.635: # %middle.block1669 beq $a1, $a2, .LBB9_638 .LBB9_636: # %.lr.ph.i1317.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_637: # %.lr.ph.i1317 # =>This Inner Loop Header: Depth=1 @@ -25689,12 +26975,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.643: # %middle.block1685 beq $a1, $a2, .LBB9_646 .LBB9_644: # %.lr.ph.i1325.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_645: # %.lr.ph.i1325 # =>This Inner Loop Header: Depth=1 @@ -25783,12 +27075,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.651: # %middle.block1701 beq $a0, $a1, .LBB9_1187 .LBB9_652: # %.lr.ph.i1333.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_653: # %.lr.ph.i1333 # =>This Inner Loop Header: Depth=1 @@ -25863,12 +27161,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.656: # %middle.block2821 beq $a1, $a2, .LBB9_659 .LBB9_657: # %.lr.ph.i707.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_658: # %.lr.ph.i707 # =>This Inner Loop Header: Depth=1 @@ -25957,12 +27261,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.664: # %middle.block2837 beq $a1, $a2, .LBB9_667 .LBB9_665: # %.lr.ph.i715.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_666: # %.lr.ph.i715 # =>This Inner Loop Header: Depth=1 @@ -26051,12 +27361,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.672: # %middle.block2853 beq $a0, $a1, .LBB9_1187 .LBB9_673: # %.lr.ph.i723.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_674: # %.lr.ph.i723 # =>This Inner Loop Header: Depth=1 @@ -26131,12 +27447,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.677: # %middle.block2597 beq $a1, $a2, .LBB9_680 .LBB9_678: # %.lr.ph.i827.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_679: # %.lr.ph.i827 # =>This Inner Loop Header: Depth=1 @@ -26225,12 +27547,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.685: # %middle.block2613 beq $a0, $a1, .LBB9_1187 .LBB9_686: # %.lr.ph.i835.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_687: # %.lr.ph.i835 # =>This Inner Loop Header: Depth=1 @@ -26305,12 +27633,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.690: # %middle.block3482 beq $a1, $a2, .LBB9_693 .LBB9_691: # %.lr.ph.i359.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_692: # %.lr.ph.i359 # =>This Inner Loop Header: Depth=1 @@ -26399,12 +27733,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.698: # %middle.block3498 beq $a1, $a2, .LBB9_701 .LBB9_699: # %.lr.ph.i367.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_700: # %.lr.ph.i367 # =>This Inner Loop Header: Depth=1 @@ -26493,12 +27833,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.706: # %middle.block3514 beq $a1, $a2, .LBB9_709 .LBB9_707: # %.lr.ph.i375.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_708: # %.lr.ph.i375 # =>This Inner Loop Header: Depth=1 @@ -26587,12 +27933,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.714: # %middle.block3530 beq $a0, $a1, .LBB9_1187 .LBB9_715: # %.lr.ph.i383.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_716: # %.lr.ph.i383 # =>This Inner Loop Header: Depth=1 @@ -26667,12 +28019,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.719: # %middle.block2565 beq $a1, $a2, .LBB9_722 .LBB9_720: # %.lr.ph.i843.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_721: # %.lr.ph.i843 # =>This Inner Loop Header: Depth=1 @@ -26761,12 +28119,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.727: # %middle.block2581 beq $a0, $a1, .LBB9_1187 .LBB9_728: # %.lr.ph.i851.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_729: # %.lr.ph.i851 # =>This Inner Loop Header: Depth=1 @@ -26841,12 +28205,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.732: # %middle.block2965 beq $a1, $a2, .LBB9_735 .LBB9_733: # %.lr.ph.i627.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_734: # %.lr.ph.i627 # =>This Inner Loop Header: Depth=1 @@ -26935,12 +28305,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.740: # %middle.block2981 beq $a1, $a2, .LBB9_743 .LBB9_741: # %.lr.ph.i635.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_742: # %.lr.ph.i635 # =>This Inner Loop Header: Depth=1 @@ -27029,12 +28405,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.748: # %middle.block2997 beq $a1, $a2, .LBB9_751 .LBB9_749: # %.lr.ph.i643.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_750: # %.lr.ph.i643 # =>This Inner Loop Header: Depth=1 @@ -27123,12 +28505,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.756: # %middle.block3013 beq $a0, $a1, .LBB9_1187 .LBB9_757: # %.lr.ph.i651.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_758: # %.lr.ph.i651 # =>This Inner Loop Header: Depth=1 @@ -27203,12 +28591,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.761: # %middle.block3402 beq $a1, $a2, .LBB9_764 .LBB9_762: # %.lr.ph.i391.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_763: # %.lr.ph.i391 # =>This Inner Loop Header: Depth=1 @@ -27297,12 +28691,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.769: # %middle.block3418 beq $a1, $a2, .LBB9_772 .LBB9_770: # %.lr.ph.i399.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_771: # %.lr.ph.i399 # =>This Inner Loop Header: Depth=1 @@ -27391,12 +28791,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.777: # %middle.block3434 beq $a1, $a2, .LBB9_780 .LBB9_778: # %.lr.ph.i407.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_779: # %.lr.ph.i407 # =>This Inner Loop Header: Depth=1 @@ -27485,12 +28891,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.785: # %middle.block3450 beq $a1, $a2, .LBB9_788 .LBB9_786: # %.lr.ph.i415.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_787: # %.lr.ph.i415 # =>This Inner Loop Header: Depth=1 @@ -27579,12 +28991,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.793: # %middle.block3466 beq $a0, $a1, .LBB9_1187 .LBB9_794: # %.lr.ph.i423.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_795: # %.lr.ph.i423 # =>This Inner Loop Header: Depth=1 @@ -27659,12 +29077,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.798: # %middle.block2741 beq $a1, $a2, .LBB9_801 .LBB9_799: # %.lr.ph.i731.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_800: # %.lr.ph.i731 # =>This Inner Loop Header: Depth=1 @@ -27753,12 +29177,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.806: # %middle.block2757 beq $a1, $a2, .LBB9_809 .LBB9_807: # %.lr.ph.i739.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_808: # %.lr.ph.i739 # =>This Inner Loop Header: Depth=1 @@ -27847,12 +29277,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.814: # %middle.block2773 beq $a1, $a2, .LBB9_817 .LBB9_815: # %.lr.ph.i747.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_816: # %.lr.ph.i747 # =>This Inner Loop Header: Depth=1 @@ -27941,12 +29377,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.822: # %middle.block2789 beq $a1, $a2, .LBB9_825 .LBB9_823: # %.lr.ph.i755.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_824: # %.lr.ph.i755 # =>This Inner Loop Header: Depth=1 @@ -28035,12 +29477,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.830: # %middle.block2805 beq $a0, $a1, .LBB9_1187 .LBB9_831: # %.lr.ph.i763.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_832: # %.lr.ph.i763 # =>This Inner Loop Header: Depth=1 @@ -28115,12 +29563,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.835: # %middle.block3125 beq $a1, $a2, .LBB9_838 .LBB9_836: # %.lr.ph.i538.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_837: # %.lr.ph.i538 # =>This Inner Loop Header: Depth=1 @@ -28209,12 +29663,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.843: # %middle.block3141 beq $a1, $a2, .LBB9_846 .LBB9_844: # %.lr.ph.i546.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_845: # %.lr.ph.i546 # =>This Inner Loop Header: Depth=1 @@ -28303,12 +29763,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.851: # %middle.block3157 beq $a1, $a2, .LBB9_854 .LBB9_852: # %.lr.ph.i554.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_853: # %.lr.ph.i554 # =>This Inner Loop Header: Depth=1 @@ -28397,12 +29863,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.859: # %middle.block3173 beq $a1, $a2, .LBB9_862 .LBB9_860: # %.lr.ph.i562.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_861: # %.lr.ph.i562 # =>This Inner Loop Header: Depth=1 @@ -28491,12 +29963,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.867: # %middle.block3189 beq $a0, $a1, .LBB9_1187 .LBB9_868: # %.lr.ph.i570.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_869: # %.lr.ph.i570 # =>This Inner Loop Header: Depth=1 @@ -28571,12 +30049,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.872: # %middle.block3285 beq $a1, $a2, .LBB9_875 .LBB9_873: # %.lr.ph.i482.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_874: # %.lr.ph.i482 # =>This Inner Loop Header: Depth=1 @@ -28665,12 +30149,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.880: # %middle.block3301 beq $a0, $a1, .LBB9_1187 .LBB9_881: # %.lr.ph.i490.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_882: # %.lr.ph.i490 # =>This Inner Loop Header: Depth=1 @@ -28745,12 +30235,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.885: # %middle.block1717 beq $a1, $a2, .LBB9_888 .LBB9_886: # %.lr.ph.i1253.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_887: # %.lr.ph.i1253 # =>This Inner Loop Header: Depth=1 @@ -28839,12 +30335,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.893: # %middle.block1733 beq $a1, $a2, .LBB9_896 .LBB9_894: # %.lr.ph.i1261.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_895: # %.lr.ph.i1261 # =>This Inner Loop Header: Depth=1 @@ -28933,12 +30435,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.901: # %middle.block1749 beq $a1, $a2, .LBB9_904 .LBB9_902: # %.lr.ph.i1269.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_903: # %.lr.ph.i1269 # =>This Inner Loop Header: Depth=1 @@ -29027,12 +30535,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.909: # %middle.block1765 beq $a1, $a2, .LBB9_912 .LBB9_910: # %.lr.ph.i1277.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_911: # %.lr.ph.i1277 # =>This Inner Loop Header: Depth=1 @@ -29121,12 +30635,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.917: # %middle.block1781 beq $a0, $a1, .LBB9_1187 .LBB9_918: # %.lr.ph.i1285.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_919: # %.lr.ph.i1285 # =>This Inner Loop Header: Depth=1 @@ -29147,41 +30667,39 @@ _Z8loopInitj: # @_Z8loopInitj bstrpick.d $a0, $a1, 30, 2 slli.d $a0, $a0, 2 vreplvei.d $vr1, $vr0, 0 - addi.d $a3, $a2, 16 - ori $a4, $zero, 0 - lu32i.d $a4, 1 - vreplgr2vr.d $vr2, $a4 - lu12i.w $a4, -419431 - ori $a4, $a4, 2458 - lu32i.d $a4, 104857 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr3, $a4 - lu12i.w $a4, -307024 - ori $a4, $a4, 3880 - lu32i.d $a4, 129446 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr4, $a4 - move $a4, $a0 + addi.d $a5, $a3, 16 + ori $a6, $zero, 0 + lu32i.d $a6, 1 + vreplgr2vr.d $vr2, $a6 + ori $a6, $a4, 2458 + lu32i.d $a6, 104857 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr3, $a6 + ori $a6, $a2, 3880 + lu32i.d $a6, 129446 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr4, $a6 + move $a6, $a0 .p2align 4, , 16 .LBB9_921: # %vector.body # =>This Inner Loop Header: Depth=1 vaddi.wu $vr5, $vr2, 2 - vpickve2gr.w $a5, $vr2, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr2, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr2, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa7, $a5 + vpickve2gr.w $a7, $vr2, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa7, $a7 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr6, 16 - vpickve2gr.w $a5, $vr5, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr5, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr5, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa5, $a5 + vpickve2gr.w $a7, $vr5, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa5, $a7 ffint.d.l $fa5, $fa5 vextrins.d $vr5, $vr6, 16 vfadd.d $vr6, $vr7, $vr3 @@ -29192,34 +30710,38 @@ _Z8loopInitj: # @_Z8loopInitj vfadd.d $vr5, $vr5, $vr4 vfdiv.d $vr6, $vr6, $vr7 vfdiv.d $vr5, $vr8, $vr5 - vst $vr6, $a3, -16 - vst $vr5, $a3, 0 + vst $vr6, $a5, -16 + vst $vr5, $a5, 0 vaddi.wu $vr2, $vr2, 4 - addi.d $a4, $a4, -4 - addi.d $a3, $a3, 32 - bnez $a4, .LBB9_921 + addi.d $a6, $a6, -4 + addi.d $a5, $a5, 32 + bnez $a6, .LBB9_921 # %bb.922: # %middle.block beq $a0, $a1, .LBB9_1187 .LBB9_923: # %.lr.ph.i1341.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $a2, 3 + alsl.d $a3, $a0, $a3, 3 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + ori $a2, $a2, 3880 + lu32i.d $a2, 129446 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa2, $a2 .p2align 4, , 16 .LBB9_924: # %.lr.ph.i1341 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a0, 31, 0 - movgr2fr.d $fa3, $a3 + bstrpick.d $a2, $a0, 31, 0 + movgr2fr.d $fa3, $a2 ffint.d.l $fa3, $fa3 fadd.d $fa4, $fa3, $fa1 fmul.d $fa4, $fa0, $fa4 fadd.d $fa3, $fa3, $fa2 fdiv.d $fa3, $fa4, $fa3 - fst.d $fa3, $a2, 0 + fst.d $fa3, $a3, 0 addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + addi.d $a3, $a3, 8 addi.w $a0, $a0, 1 bnez $a1, .LBB9_924 b .LBB9_1187 @@ -29281,12 +30803,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.927: # %middle.block2533 beq $a1, $a2, .LBB9_930 .LBB9_928: # %.lr.ph.i859.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_929: # %.lr.ph.i859 # =>This Inner Loop Header: Depth=1 @@ -29375,12 +30903,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.935: # %middle.block2549 beq $a0, $a1, .LBB9_1187 .LBB9_936: # %.lr.ph.i867.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_937: # %.lr.ph.i867 # =>This Inner Loop Header: Depth=1 @@ -29455,12 +30989,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.940: # %middle.block2629 beq $a1, $a2, .LBB9_943 .LBB9_941: # %.lr.ph.i771.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_942: # %.lr.ph.i771 # =>This Inner Loop Header: Depth=1 @@ -29549,12 +31089,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.948: # %middle.block2645 beq $a1, $a2, .LBB9_951 .LBB9_949: # %.lr.ph.i779.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_950: # %.lr.ph.i779 # =>This Inner Loop Header: Depth=1 @@ -29643,12 +31189,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.956: # %middle.block2661 beq $a1, $a2, .LBB9_959 .LBB9_957: # %.lr.ph.i787.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_958: # %.lr.ph.i787 # =>This Inner Loop Header: Depth=1 @@ -29737,12 +31289,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.964: # %middle.block2677 beq $a1, $a2, .LBB9_967 .LBB9_965: # %.lr.ph.i795.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_966: # %.lr.ph.i795 # =>This Inner Loop Header: Depth=1 @@ -29831,12 +31389,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.972: # %middle.block2693 beq $a1, $a2, .LBB9_975 .LBB9_973: # %.lr.ph.i803.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_974: # %.lr.ph.i803 # =>This Inner Loop Header: Depth=1 @@ -29925,12 +31489,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.980: # %middle.block2709 beq $a1, $a2, .LBB9_983 .LBB9_981: # %.lr.ph.i811.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_982: # %.lr.ph.i811 # =>This Inner Loop Header: Depth=1 @@ -30019,12 +31589,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.988: # %middle.block2725 beq $a0, $a1, .LBB9_1187 .LBB9_989: # %.lr.ph.i819.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_990: # %.lr.ph.i819 # =>This Inner Loop Header: Depth=1 @@ -30099,12 +31675,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.993: # %middle.block2501 beq $a1, $a2, .LBB9_996 .LBB9_994: # %.lr.ph.i875.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_995: # %.lr.ph.i875 # =>This Inner Loop Header: Depth=1 @@ -30193,12 +31775,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1001: # %middle.block2517 beq $a0, $a1, .LBB9_1187 .LBB9_1002: # %.lr.ph.i883.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1003: # %.lr.ph.i883 # =>This Inner Loop Header: Depth=1 @@ -30273,12 +31861,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1006: # %middle.block1845 beq $a1, $a2, .LBB9_1009 .LBB9_1007: # %.lr.ph.i1149.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1008: # %.lr.ph.i1149 # =>This Inner Loop Header: Depth=1 @@ -30367,12 +31961,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1014: # %middle.block1861 beq $a1, $a2, .LBB9_1017 .LBB9_1015: # %.lr.ph.i1157.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1016: # %.lr.ph.i1157 # =>This Inner Loop Header: Depth=1 @@ -30461,12 +32061,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1022: # %middle.block1877 beq $a1, $a2, .LBB9_1025 .LBB9_1023: # %.lr.ph.i1165.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1024: # %.lr.ph.i1165 # =>This Inner Loop Header: Depth=1 @@ -30555,12 +32161,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1030: # %middle.block1893 beq $a1, $a2, .LBB9_1033 .LBB9_1031: # %.lr.ph.i1173.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1032: # %.lr.ph.i1173 # =>This Inner Loop Header: Depth=1 @@ -30649,12 +32261,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1038: # %middle.block1909 beq $a1, $a2, .LBB9_1041 .LBB9_1039: # %.lr.ph.i1181.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1040: # %.lr.ph.i1181 # =>This Inner Loop Header: Depth=1 @@ -30743,12 +32361,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1046: # %middle.block1925 beq $a1, $a2, .LBB9_1049 .LBB9_1047: # %.lr.ph.i1189.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1048: # %.lr.ph.i1189 # =>This Inner Loop Header: Depth=1 @@ -30837,12 +32461,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1054: # %middle.block1941 beq $a1, $a2, .LBB9_1057 .LBB9_1055: # %.lr.ph.i1197.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1056: # %.lr.ph.i1197 # =>This Inner Loop Header: Depth=1 @@ -30931,12 +32561,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1062: # %middle.block1957 beq $a1, $a2, .LBB9_1065 .LBB9_1063: # %.lr.ph.i1205.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1064: # %.lr.ph.i1205 # =>This Inner Loop Header: Depth=1 @@ -31025,12 +32661,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1070: # %middle.block1973 beq $a1, $a2, .LBB9_1073 .LBB9_1071: # %.lr.ph.i1213.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1072: # %.lr.ph.i1213 # =>This Inner Loop Header: Depth=1 @@ -31119,12 +32761,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1078: # %middle.block1989 beq $a0, $a1, .LBB9_1187 .LBB9_1079: # %.lr.ph.i1221.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1080: # %.lr.ph.i1221 # =>This Inner Loop Header: Depth=1 @@ -31199,12 +32847,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1083: # %middle.block1797 beq $a1, $a2, .LBB9_1086 .LBB9_1084: # %.lr.ph.i1229.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1085: # %.lr.ph.i1229 # =>This Inner Loop Header: Depth=1 @@ -31293,12 +32947,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1091: # %middle.block1813 beq $a1, $a2, .LBB9_1094 .LBB9_1092: # %.lr.ph.i1237.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1093: # %.lr.ph.i1237 # =>This Inner Loop Header: Depth=1 @@ -31387,12 +33047,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1099: # %middle.block1829 beq $a0, $a1, .LBB9_1187 .LBB9_1100: # %.lr.ph.i1245.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1101: # %.lr.ph.i1245 # =>This Inner Loop Header: Depth=1 @@ -31467,12 +33133,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1104: # %middle.block2069 beq $a1, $a2, .LBB9_1107 .LBB9_1105: # %.lr.ph.i1029.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1106: # %.lr.ph.i1029 # =>This Inner Loop Header: Depth=1 @@ -31561,12 +33233,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1112: # %middle.block2085 beq $a1, $a2, .LBB9_1115 .LBB9_1113: # %.lr.ph.i1037.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1114: # %.lr.ph.i1037 # =>This Inner Loop Header: Depth=1 @@ -31655,12 +33333,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1120: # %middle.block2101 beq $a1, $a2, .LBB9_1123 .LBB9_1121: # %.lr.ph.i1045.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1122: # %.lr.ph.i1045 # =>This Inner Loop Header: Depth=1 @@ -31749,12 +33433,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1128: # %middle.block2117 beq $a1, $a2, .LBB9_1131 .LBB9_1129: # %.lr.ph.i1053.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1130: # %.lr.ph.i1053 # =>This Inner Loop Header: Depth=1 @@ -31843,12 +33533,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1136: # %middle.block2133 beq $a1, $a2, .LBB9_1139 .LBB9_1137: # %.lr.ph.i1061.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1138: # %.lr.ph.i1061 # =>This Inner Loop Header: Depth=1 @@ -31937,12 +33633,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1144: # %middle.block2149 beq $a1, $a2, .LBB9_1147 .LBB9_1145: # %.lr.ph.i1069.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1146: # %.lr.ph.i1069 # =>This Inner Loop Header: Depth=1 @@ -32031,12 +33733,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1152: # %middle.block2165 beq $a1, $a2, .LBB9_1155 .LBB9_1153: # %.lr.ph.i1077.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1154: # %.lr.ph.i1077 # =>This Inner Loop Header: Depth=1 @@ -32125,12 +33833,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1160: # %middle.block2181 beq $a1, $a2, .LBB9_1163 .LBB9_1161: # %.lr.ph.i1085.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1162: # %.lr.ph.i1085 # =>This Inner Loop Header: Depth=1 @@ -32219,12 +33933,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1168: # %middle.block2197 beq $a1, $a2, .LBB9_1171 .LBB9_1169: # %.lr.ph.i1093.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1170: # %.lr.ph.i1093 # =>This Inner Loop Header: Depth=1 @@ -32313,12 +34033,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1176: # %middle.block2213 beq $a1, $a2, .LBB9_1179 .LBB9_1177: # %.lr.ph.i1101.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1178: # %.lr.ph.i1101 # =>This Inner Loop Header: Depth=1 @@ -32407,12 +34133,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1184: # %middle.block2229 beq $a0, $a1, .LBB9_1187 .LBB9_1185: # %.lr.ph.i1109.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1186: # %.lr.ph.i1109 # =>This Inner Loop Header: Depth=1 diff --git a/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/runReferenceLoops.s b/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/runReferenceLoops.s index b31732f8..26d4daf4 100644 --- a/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/runReferenceLoops.s +++ b/results/MicroBenchmarks/LCALS/SubsetARawLoops/CMakeFiles/lcalsARaw.dir/__/runReferenceLoops.s @@ -903,14 +903,8 @@ _ZN8LoopStatD2Ev: # @_ZN8LoopStatD2Ev .size _ZN8LoopStatD2Ev, .Lfunc_end3-_ZN8LoopStatD2Ev .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z25computeReferenceLoopTimesv -.LCPI4_0: - .dword 0x3f5426fe718a86d7 # double 0.00123 -.LCPI4_1: - .dword 0xbf5426fe718a86d7 # double -0.00123 .text - .globl _Z25computeReferenceLoopTimesv + .globl _Z25computeReferenceLoopTimesv # -- Begin function _Z25computeReferenceLoopTimesv .p2align 5 .type _Z25computeReferenceLoopTimesv,@function _Z25computeReferenceLoopTimesv: # @_Z25computeReferenceLoopTimesv @@ -1649,13 +1643,16 @@ _Z25computeReferenceLoopTimesv: # @_Z25computeReferenceLoopTimesv pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 st.d $a0, $sp, 640 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI4_0) - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI4_1) ori $a0, $zero, 1 st.b $a0, $sp, 648 + lu12i.w $a0, 465064 + ori $a0, $a0, 1751 + lu32i.d $a0, 272126 + lu52i.d $a1, $a0, 1013 + movgr2fr.d $fs0, $a1 fadd.d $fa0, $fs2, $fs0 + lu52i.d $a0, $a0, -1035 + movgr2fr.d $fs1, $a0 fadd.d $fa1, $fs2, $fs1 fdiv.d $fa0, $fa0, $fa1 fst.d $fa0, $fp, 384 diff --git a/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSStats.s b/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSStats.s index 65ccd92d..95272aa5 100644 --- a/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSStats.s +++ b/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSStats.s @@ -868,12 +868,7 @@ _Z19getLoopSuiteRunInfov: # @_Z19getLoopSuiteRunInfov .Lfunc_end1: .size _Z19getLoopSuiteRunInfov, .Lfunc_end1-_Z19getLoopSuiteRunInfov # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm -.LCPI2_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 - .text - .globl _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm + .globl _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm # -- Begin function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm .p2align 5 .type _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm,@function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm: # @_Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm @@ -1018,10 +1013,13 @@ _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcE st.d $a0, $s4, 560 beqz $a1, .LBB2_15 # %bb.13: # %.lr.ph18.preheader - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI2_0) move $fp, $zero move $s0, $zero + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_14: # %.lr.ph18 # =>This Inner Loop Header: Depth=1 diff --git a/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSSuite.s index a1e8c653..e35fa2d7 100644 --- a/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/LCALSSuite.s @@ -23,33 +23,21 @@ _Z11getLoopDatav: # @_Z11getLoopDatav .LCPI1_1: .dword 0x3ff6666666666666 # double 1.3999999999999999 .dword 0x3ff0000000000000 # double 1 -.LCPI1_5: +.LCPI1_2: .dword 8 # 0x8 .dword 4923084613239392580 # 0x44524f5f43534944 -.LCPI1_6: +.LCPI1_3: .dword 8 # 0x8 .dword 4914094937701898568 # 0x44325f4f52445948 -.LCPI1_7: +.LCPI1_4: .dword 8 # 0x8 .dword 4913813462725187912 # 0x44315f4f52445948 -.LCPI1_8: +.LCPI1_5: .dword 8 # 0x8 .dword 6074873621086556756 # 0x544e495f50415254 -.LCPI1_11: +.LCPI1_6: .dword 8 # 0x8 .dword 5786931235628926290 # 0x504f4f4c5f464552 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI1_2: - .dword 0x40e5972000000000 # double 44217 -.LCPI1_3: - .dword 0x40b3890000000000 # double 5001 -.LCPI1_4: - .dword 0x4065600000000000 # double 171 -.LCPI1_9: - .dword 0x4063800000000000 # double 156 -.LCPI1_10: - .dword 0x4050000000000000 # double 64 .text .globl _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd .p2align 5 @@ -640,21 +628,27 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define .Ltmp34: # EH_LABEL # %bb.92: move $s8, $a0 - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_2) - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_3) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 366368 + lu52i.d $a1, $a1, 1038 + movgr2fr.d $fa0, $a1 fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a0, $fa0 - fmul.d $fa0, $fs0, $fa1 + movfr2gr.s $a1, $fa0 + st.w $a1, $s8, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 231680 + lu52i.d $a1, $a1, 1035 + movgr2fr.d $fa0, $a1 + fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI1_4) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_4) movfr2gr.s $a1, $fa0 - st.w $a0, $s8, 0 st.w $a1, $s8, 4 - fmul.d $fa0, $fs0, $fa1 + lu32i.d $a0, 352256 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa0, $a0 + fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 ld.d $s7, $sp, 96 # 8-byte Folded Reload ld.w $a0, $s7, 32 @@ -732,8 +726,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define jr $a0 .LBB1_99: # %._crit_edge.i.i352 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_11) - vld $vr0, $a0, %pc_lo12(.LCPI1_11) + pcalau12i $a0, %pc_hi20(.LCPI1_6) + vld $vr0, $a0, %pc_lo12(.LCPI1_6) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -936,8 +930,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_133 .LBB1_112: # %._crit_edge.i.i732 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_8) - vld $vr0, $a0, %pc_lo12(.LCPI1_8) + pcalau12i $a0, %pc_hi20(.LCPI1_5) + vld $vr0, $a0, %pc_lo12(.LCPI1_5) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -1687,8 +1681,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_158: # %._crit_edge.i.i748 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_7) - vld $vr0, $a0, %pc_lo12(.LCPI1_7) + pcalau12i $a0, %pc_hi20(.LCPI1_4) + vld $vr0, $a0, %pc_lo12(.LCPI1_4) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -1761,16 +1755,18 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define # in Loop: Header=BB1_95 Depth=1 ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.d $a0, $a0, 0 - ld.d $a1, $sp, 8 # 8-byte Folded Reload - fld.d $fa0, $a1, %pc_lo12(_ZN7ADomain18loop_length_factorE) - pcalau12i $a1, %pc_hi20(.LCPI1_9) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_9) - fld.d $fa2, $a0, 0 - fmul.d $fa1, $fa0, $fa1 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + fld.d $fa1, $a0, 0 + ld.d $a0, $sp, 8 # 8-byte Folded Reload + fld.d $fa0, $a0, %pc_lo12(_ZN7ADomain18loop_length_factorE) + ori $a0, $zero, 0 + lu32i.d $a0, 229376 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa2, $a0 + fmul.d $fa2, $fa0, $fa2 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $a0, $fa2 ori $a2, $zero, 2 - fst.d $fa2, $sp, 168 + fst.d $fa1, $sp, 168 blt $a0, $a2, .LBB1_211 # %bb.164: # %.lr.ph72.us.i.preheader # in Loop: Header=BB1_95 Depth=1 @@ -2202,8 +2198,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_196: # %._crit_edge.i.i988 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_5) - vld $vr0, $a0, %pc_lo12(.LCPI1_5) + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -2297,8 +2293,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_202: # %._crit_edge.i.i956 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_6) - vld $vr0, $a0, %pc_lo12(.LCPI1_6) + pcalau12i $a0, %pc_hi20(.LCPI1_3) + vld $vr0, $a0, %pc_lo12(.LCPI1_3) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -2382,9 +2378,9 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define move $a3, $zero .LBB1_212: # %_ZN7ADomainC2Eii.exit527 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI1_10) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_10) ld.d $a1, $sp, 368 + lu52i.d $a4, $zero, 1029 + movgr2fr.d $fa1, $a4 fmul.d $fa1, $fa0, $fa1 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a4, $fa1 @@ -4488,15 +4484,9 @@ GCC_except_table7: .LCPI8_0: .dword 0x3fb999999999999a # double 0.10000000000000001 .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI8_3: +.LCPI8_1: .dword 0x3fc999999999999a # double 0.20000000000000001 .dword 0x3fd3333333333333 # double 0.29999999999999999 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI8_1: - .dword 0x3ff199999999999a # double 1.1000000000000001 -.LCPI8_2: - .dword 0x3ff1f9a6b50b0f28 # double 1.1234500000000001 .text .globl _Z8loopInitjR8LoopStat .p2align 5 @@ -4747,8 +4737,8 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat .LBB8_40: pcalau12i $a0, %pc_hi20(.LCPI8_0) addi.d $a0, $a0, %pc_lo12(.LCPI8_0) - pcalau12i $a1, %pc_hi20(.LCPI8_3) - addi.d $a1, $a1, %pc_lo12(.LCPI8_3) + pcalau12i $a1, %pc_hi20(.LCPI8_1) + addi.d $a1, $a1, %pc_lo12(.LCPI8_1) ld.w $a3, $s1, 1032 blez $a3, .LBB8_577 # %bb.41: # %.lr.ph.preheader.i430 @@ -4962,8 +4952,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat pcalau12i $a2, %pc_hi20(.LCPI8_0) addi.d $a2, $a2, %pc_lo12(.LCPI8_0) fldx.d $fa0, $a2, $a0 - ld.d $a2, $s1, 472 + ld.d $a3, $s1, 472 ori $a0, $zero, 4 + lu12i.w $a4, -419431 + lu12i.w $a2, -307024 bgeu $a1, $a0, .LBB8_920 # %bb.78: move $a0, $zero @@ -5152,12 +5144,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.102: # %middle.block3803 beq $a1, $a2, .LBB8_105 .LBB8_103: # %.lr.ph.i184.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_104: # %.lr.ph.i184 # =>This Inner Loop Header: Depth=1 @@ -5246,12 +5244,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.110: # %middle.block3819 beq $a1, $a2, .LBB8_113 .LBB8_111: # %.lr.ph.i192.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_112: # %.lr.ph.i192 # =>This Inner Loop Header: Depth=1 @@ -5340,12 +5344,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.118: # %middle.block3835 beq $a1, $a2, .LBB8_121 .LBB8_119: # %.lr.ph.i200.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_120: # %.lr.ph.i200 # =>This Inner Loop Header: Depth=1 @@ -5434,12 +5444,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.126: # %middle.block3851 beq $a1, $a2, .LBB8_129 .LBB8_127: # %.lr.ph.i208.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_128: # %.lr.ph.i208 # =>This Inner Loop Header: Depth=1 @@ -5528,12 +5544,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.134: # %middle.block3867 beq $a1, $a2, .LBB8_137 .LBB8_135: # %.lr.ph.i216.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_136: # %.lr.ph.i216 # =>This Inner Loop Header: Depth=1 @@ -5622,12 +5644,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.142: # %middle.block3883 beq $a0, $a1, .LBB8_1187 .LBB8_143: # %.lr.ph.i224.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_144: # %.lr.ph.i224 # =>This Inner Loop Header: Depth=1 @@ -5702,12 +5730,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.147: # %middle.block3547 beq $a1, $a2, .LBB8_150 .LBB8_148: # %.lr.ph.i232.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_149: # %.lr.ph.i232 # =>This Inner Loop Header: Depth=1 @@ -5796,12 +5830,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.155: # %middle.block3563 beq $a1, $a2, .LBB8_158 .LBB8_156: # %.lr.ph.i240.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_157: # %.lr.ph.i240 # =>This Inner Loop Header: Depth=1 @@ -5890,12 +5930,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.163: # %middle.block3579 beq $a1, $a2, .LBB8_166 .LBB8_164: # %.lr.ph.i248.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_165: # %.lr.ph.i248 # =>This Inner Loop Header: Depth=1 @@ -5984,12 +6030,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.171: # %middle.block3595 beq $a1, $a2, .LBB8_174 .LBB8_172: # %.lr.ph.i256.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_173: # %.lr.ph.i256 # =>This Inner Loop Header: Depth=1 @@ -6078,12 +6130,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.179: # %middle.block3611 beq $a1, $a2, .LBB8_182 .LBB8_180: # %.lr.ph.i264.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_181: # %.lr.ph.i264 # =>This Inner Loop Header: Depth=1 @@ -6172,12 +6230,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.187: # %middle.block3627 beq $a1, $a2, .LBB8_190 .LBB8_188: # %.lr.ph.i272.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_189: # %.lr.ph.i272 # =>This Inner Loop Header: Depth=1 @@ -6266,12 +6330,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.195: # %middle.block3643 beq $a1, $a2, .LBB8_198 .LBB8_196: # %.lr.ph.i280.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_197: # %.lr.ph.i280 # =>This Inner Loop Header: Depth=1 @@ -6360,12 +6430,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.203: # %middle.block3659 beq $a1, $a2, .LBB8_206 .LBB8_204: # %.lr.ph.i288.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_205: # %.lr.ph.i288 # =>This Inner Loop Header: Depth=1 @@ -6454,12 +6530,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.211: # %middle.block3675 beq $a1, $a2, .LBB8_214 .LBB8_212: # %.lr.ph.i296.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_213: # %.lr.ph.i296 # =>This Inner Loop Header: Depth=1 @@ -6548,12 +6630,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.219: # %middle.block3691 beq $a1, $a2, .LBB8_222 .LBB8_220: # %.lr.ph.i304.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_221: # %.lr.ph.i304 # =>This Inner Loop Header: Depth=1 @@ -6642,12 +6730,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.227: # %middle.block3707 beq $a1, $a2, .LBB8_230 .LBB8_228: # %.lr.ph.i312.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_229: # %.lr.ph.i312 # =>This Inner Loop Header: Depth=1 @@ -6736,12 +6830,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.235: # %middle.block3723 beq $a1, $a2, .LBB8_238 .LBB8_236: # %.lr.ph.i320.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_237: # %.lr.ph.i320 # =>This Inner Loop Header: Depth=1 @@ -6830,12 +6930,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.243: # %middle.block3739 beq $a1, $a2, .LBB8_246 .LBB8_244: # %.lr.ph.i328.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_245: # %.lr.ph.i328 # =>This Inner Loop Header: Depth=1 @@ -6924,12 +7030,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.251: # %middle.block3755 beq $a1, $a2, .LBB8_254 .LBB8_252: # %.lr.ph.i336.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_253: # %.lr.ph.i336 # =>This Inner Loop Header: Depth=1 @@ -7018,12 +7130,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.259: # %middle.block3771 beq $a1, $a2, .LBB8_262 .LBB8_260: # %.lr.ph.i344.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_261: # %.lr.ph.i344 # =>This Inner Loop Header: Depth=1 @@ -7112,12 +7230,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.267: # %middle.block3787 beq $a0, $a1, .LBB8_1187 .LBB8_268: # %.lr.ph.i352.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_269: # %.lr.ph.i352 # =>This Inner Loop Header: Depth=1 @@ -7215,12 +7339,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.274: # %middle.block2006 beq $a1, $a2, .LBB8_277 .LBB8_275: # %.lr.ph.i1118.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_276: # %.lr.ph.i1118 # =>This Inner Loop Header: Depth=1 @@ -7309,12 +7439,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.282: # %middle.block2022 beq $a1, $a2, .LBB8_285 .LBB8_283: # %.lr.ph.i1126.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_284: # %.lr.ph.i1126 # =>This Inner Loop Header: Depth=1 @@ -7403,12 +7539,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.290: # %middle.block2038 beq $a1, $a2, .LBB8_293 .LBB8_291: # %.lr.ph.i1134.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_292: # %.lr.ph.i1134 # =>This Inner Loop Header: Depth=1 @@ -7497,12 +7639,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.298: # %middle.block2054 beq $a0, $a1, .LBB8_1187 .LBB8_299: # %.lr.ph.i1142.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_300: # %.lr.ph.i1142 # =>This Inner Loop Header: Depth=1 @@ -7577,12 +7725,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.303: # %middle.block2870 beq $a1, $a2, .LBB8_306 .LBB8_304: # %.lr.ph.i692.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_305: # %.lr.ph.i692 # =>This Inner Loop Header: Depth=1 @@ -7671,12 +7825,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.311: # %middle.block2886 beq $a0, $a1, .LBB8_1187 .LBB8_312: # %.lr.ph.i700.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_313: # %.lr.ph.i700 # =>This Inner Loop Header: Depth=1 @@ -7751,12 +7911,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.316: # %middle.block2934 beq $a1, $a2, .LBB8_319 .LBB8_317: # %.lr.ph.i660.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_318: # %.lr.ph.i660 # =>This Inner Loop Header: Depth=1 @@ -7845,12 +8011,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.324: # %middle.block2950 beq $a0, $a1, .LBB8_1187 .LBB8_325: # %.lr.ph.i668.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_326: # %.lr.ph.i668 # =>This Inner Loop Header: Depth=1 @@ -7925,12 +8097,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.329: # %middle.block3046 beq $a1, $a2, .LBB8_332 .LBB8_330: # %.lr.ph.i579.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_331: # %.lr.ph.i579 # =>This Inner Loop Header: Depth=1 @@ -8019,13 +8197,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.337: # %middle.block3062 beq $a1, $a2, .LBB8_340 .LBB8_338: # %.lr.ph.i587.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 - .p2align 4, , 16 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 + .p2align 4, , 16 .LBB8_339: # %.lr.ph.i587 # =>This Inner Loop Header: Depth=1 bstrpick.d $a4, $a1, 31, 0 @@ -8113,12 +8297,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.345: # %middle.block3078 beq $a1, $a2, .LBB8_348 .LBB8_346: # %.lr.ph.i595.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_347: # %.lr.ph.i595 # =>This Inner Loop Header: Depth=1 @@ -8207,12 +8397,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.353: # %middle.block3094 beq $a1, $a2, .LBB8_356 .LBB8_354: # %.lr.ph.i603.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_355: # %.lr.ph.i603 # =>This Inner Loop Header: Depth=1 @@ -8301,12 +8497,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.361: # %middle.block3110 beq $a0, $a1, .LBB8_1187 .LBB8_362: # %.lr.ph.i611.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_363: # %.lr.ph.i611 # =>This Inner Loop Header: Depth=1 @@ -8381,12 +8583,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.366: # %middle.block3030 beq $a0, $a1, .LBB8_1187 .LBB8_367: # %.lr.ph.i620.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_368: # %.lr.ph.i620 # =>This Inner Loop Header: Depth=1 @@ -8461,12 +8669,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.371: # %middle.block3899 beq $a1, $a2, .LBB8_374 .LBB8_372: # %.lr.ph.i.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_373: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 @@ -8555,12 +8769,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.379: # %middle.block3915 beq $a1, $a2, .LBB8_382 .LBB8_380: # %.lr.ph.i168.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_381: # %.lr.ph.i168 # =>This Inner Loop Header: Depth=1 @@ -8649,12 +8869,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.387: # %middle.block3931 beq $a0, $a1, .LBB8_1187 .LBB8_388: # %.lr.ph.i176.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_389: # %.lr.ph.i176 # =>This Inner Loop Header: Depth=1 @@ -8729,12 +8955,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.392: # %middle.block3206 beq $a1, $a2, .LBB8_395 .LBB8_393: # %.lr.ph.i499.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_394: # %.lr.ph.i499 # =>This Inner Loop Header: Depth=1 @@ -8823,12 +9055,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.400: # %middle.block3222 beq $a1, $a2, .LBB8_403 .LBB8_401: # %.lr.ph.i507.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_402: # %.lr.ph.i507 # =>This Inner Loop Header: Depth=1 @@ -8917,12 +9155,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.408: # %middle.block3238 beq $a1, $a2, .LBB8_411 .LBB8_409: # %.lr.ph.i515.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_410: # %.lr.ph.i515 # =>This Inner Loop Header: Depth=1 @@ -9011,12 +9255,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.416: # %middle.block3254 beq $a1, $a2, .LBB8_419 .LBB8_417: # %.lr.ph.i523.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_418: # %.lr.ph.i523 # =>This Inner Loop Header: Depth=1 @@ -9105,12 +9355,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.424: # %middle.block3270 beq $a0, $a1, .LBB8_1187 .LBB8_425: # %.lr.ph.i531.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_426: # %.lr.ph.i531 # =>This Inner Loop Header: Depth=1 @@ -9185,12 +9441,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.429: # %middle.block2246 beq $a1, $a2, .LBB8_432 .LBB8_430: # %.lr.ph.i944.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_431: # %.lr.ph.i944 # =>This Inner Loop Header: Depth=1 @@ -9279,12 +9541,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.437: # %middle.block2262 beq $a1, $a2, .LBB8_440 .LBB8_438: # %.lr.ph.i952.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_439: # %.lr.ph.i952 # =>This Inner Loop Header: Depth=1 @@ -9373,12 +9641,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.445: # %middle.block2278 beq $a1, $a2, .LBB8_448 .LBB8_446: # %.lr.ph.i960.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_447: # %.lr.ph.i960 # =>This Inner Loop Header: Depth=1 @@ -9467,12 +9741,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.453: # %middle.block2294 beq $a1, $a2, .LBB8_456 .LBB8_454: # %.lr.ph.i968.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_455: # %.lr.ph.i968 # =>This Inner Loop Header: Depth=1 @@ -9561,12 +9841,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.461: # %middle.block2310 beq $a1, $a2, .LBB8_464 .LBB8_462: # %.lr.ph.i976.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_463: # %.lr.ph.i976 # =>This Inner Loop Header: Depth=1 @@ -9655,12 +9941,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.469: # %middle.block2326 beq $a1, $a2, .LBB8_472 .LBB8_470: # %.lr.ph.i984.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_471: # %.lr.ph.i984 # =>This Inner Loop Header: Depth=1 @@ -9749,12 +10041,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.477: # %middle.block2342 beq $a1, $a2, .LBB8_480 .LBB8_478: # %.lr.ph.i992.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_479: # %.lr.ph.i992 # =>This Inner Loop Header: Depth=1 @@ -9843,12 +10141,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.485: # %middle.block2358 beq $a1, $a2, .LBB8_488 .LBB8_486: # %.lr.ph.i1000.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_487: # %.lr.ph.i1000 # =>This Inner Loop Header: Depth=1 @@ -9937,12 +10241,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.493: # %middle.block2374 beq $a1, $a2, .LBB8_496 .LBB8_494: # %.lr.ph.i1008.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_495: # %.lr.ph.i1008 # =>This Inner Loop Header: Depth=1 @@ -10031,12 +10341,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.501: # %middle.block2390 beq $a0, $a1, .LBB8_504 .LBB8_502: # %.lr.ph.i1016.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_503: # %.lr.ph.i1016 # =>This Inner Loop Header: Depth=1 @@ -10142,12 +10458,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.512: # %middle.block2406 beq $a0, $a1, .LBB8_515 .LBB8_513: # %.lr.ph.i892.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_514: # %.lr.ph.i892 # =>This Inner Loop Header: Depth=1 @@ -10236,12 +10558,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.520: # %middle.block2422 beq $a0, $a1, .LBB8_523 .LBB8_521: # %.lr.ph.i900.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_522: # %.lr.ph.i900 # =>This Inner Loop Header: Depth=1 @@ -10330,12 +10658,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.528: # %middle.block2438 beq $a0, $a1, .LBB8_531 .LBB8_529: # %.lr.ph.i908.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_530: # %.lr.ph.i908 # =>This Inner Loop Header: Depth=1 @@ -10424,12 +10758,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.536: # %middle.block2454 beq $a0, $a1, .LBB8_539 .LBB8_537: # %.lr.ph.i916.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_538: # %.lr.ph.i916 # =>This Inner Loop Header: Depth=1 @@ -10518,12 +10858,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.544: # %middle.block2470 beq $a0, $a1, .LBB8_547 .LBB8_545: # %.lr.ph.i924.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_546: # %.lr.ph.i924 # =>This Inner Loop Header: Depth=1 @@ -10630,12 +10976,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.556: # %middle.block2486 beq $a0, $a1, .LBB8_1187 .LBB8_557: # %.lr.ph.i936.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_558: # %.lr.ph.i936 # =>This Inner Loop Header: Depth=1 @@ -10710,12 +11062,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.561: # %middle.block2902 beq $a1, $a2, .LBB8_564 .LBB8_562: # %.lr.ph.i676.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_563: # %.lr.ph.i676 # =>This Inner Loop Header: Depth=1 @@ -10804,12 +11162,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.569: # %middle.block2918 beq $a0, $a1, .LBB8_1187 .LBB8_570: # %.lr.ph.i684.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_571: # %.lr.ph.i684 # =>This Inner Loop Header: Depth=1 @@ -10875,13 +11239,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.574: # %middle.block3319 beq $a2, $a3, .LBB8_577 .LBB8_575: # %.lr.ph.i432.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_576: # %.lr.ph.i432 # =>This Inner Loop Header: Depth=1 @@ -10965,13 +11335,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.582: # %middle.block3336 beq $a2, $a3, .LBB8_585 .LBB8_583: # %.lr.ph.i441.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_584: # %.lr.ph.i441 # =>This Inner Loop Header: Depth=1 @@ -11055,13 +11431,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.590: # %middle.block3353 beq $a2, $a3, .LBB8_593 .LBB8_591: # %.lr.ph.i452.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_592: # %.lr.ph.i452 # =>This Inner Loop Header: Depth=1 @@ -11145,13 +11527,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.598: # %middle.block3370 beq $a2, $a3, .LBB8_601 .LBB8_599: # %.lr.ph.i463.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_600: # %.lr.ph.i463 # =>This Inner Loop Header: Depth=1 @@ -11235,13 +11623,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.606: # %middle.block3387 beq $a0, $a2, .LBB8_1187 .LBB8_607: # %.lr.ph.i474.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a3, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a0 alsl.d $a1, $a0, $a1, 4 addi.d $a1, $a1, 8 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa3, $a3 .p2align 4, , 16 .LBB8_608: # %.lr.ph.i474 # =>This Inner Loop Header: Depth=1 @@ -11319,12 +11713,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.611: # %middle.block1622 beq $a1, $a2, .LBB8_614 .LBB8_612: # %.lr.ph.i1294.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_613: # %.lr.ph.i1294 # =>This Inner Loop Header: Depth=1 @@ -11413,12 +11813,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.619: # %middle.block1638 beq $a1, $a2, .LBB8_622 .LBB8_620: # %.lr.ph.i1302.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_621: # %.lr.ph.i1302 # =>This Inner Loop Header: Depth=1 @@ -11507,12 +11913,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.627: # %middle.block1654 beq $a1, $a2, .LBB8_630 .LBB8_628: # %.lr.ph.i1310.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_629: # %.lr.ph.i1310 # =>This Inner Loop Header: Depth=1 @@ -11601,12 +12013,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.635: # %middle.block1670 beq $a1, $a2, .LBB8_638 .LBB8_636: # %.lr.ph.i1318.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_637: # %.lr.ph.i1318 # =>This Inner Loop Header: Depth=1 @@ -11695,12 +12113,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.643: # %middle.block1686 beq $a1, $a2, .LBB8_646 .LBB8_644: # %.lr.ph.i1326.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_645: # %.lr.ph.i1326 # =>This Inner Loop Header: Depth=1 @@ -11789,12 +12213,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.651: # %middle.block1702 beq $a0, $a1, .LBB8_1187 .LBB8_652: # %.lr.ph.i1334.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_653: # %.lr.ph.i1334 # =>This Inner Loop Header: Depth=1 @@ -11869,12 +12299,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.656: # %middle.block2822 beq $a1, $a2, .LBB8_659 .LBB8_657: # %.lr.ph.i708.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_658: # %.lr.ph.i708 # =>This Inner Loop Header: Depth=1 @@ -11963,12 +12399,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.664: # %middle.block2838 beq $a1, $a2, .LBB8_667 .LBB8_665: # %.lr.ph.i716.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_666: # %.lr.ph.i716 # =>This Inner Loop Header: Depth=1 @@ -12057,12 +12499,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.672: # %middle.block2854 beq $a0, $a1, .LBB8_1187 .LBB8_673: # %.lr.ph.i724.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_674: # %.lr.ph.i724 # =>This Inner Loop Header: Depth=1 @@ -12137,12 +12585,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.677: # %middle.block2598 beq $a1, $a2, .LBB8_680 .LBB8_678: # %.lr.ph.i828.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_679: # %.lr.ph.i828 # =>This Inner Loop Header: Depth=1 @@ -12231,12 +12685,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.685: # %middle.block2614 beq $a0, $a1, .LBB8_1187 .LBB8_686: # %.lr.ph.i836.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_687: # %.lr.ph.i836 # =>This Inner Loop Header: Depth=1 @@ -12311,12 +12771,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.690: # %middle.block3483 beq $a1, $a2, .LBB8_693 .LBB8_691: # %.lr.ph.i360.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_692: # %.lr.ph.i360 # =>This Inner Loop Header: Depth=1 @@ -12405,12 +12871,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.698: # %middle.block3499 beq $a1, $a2, .LBB8_701 .LBB8_699: # %.lr.ph.i368.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_700: # %.lr.ph.i368 # =>This Inner Loop Header: Depth=1 @@ -12499,12 +12971,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.706: # %middle.block3515 beq $a1, $a2, .LBB8_709 .LBB8_707: # %.lr.ph.i376.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_708: # %.lr.ph.i376 # =>This Inner Loop Header: Depth=1 @@ -12593,12 +13071,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.714: # %middle.block3531 beq $a0, $a1, .LBB8_1187 .LBB8_715: # %.lr.ph.i384.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_716: # %.lr.ph.i384 # =>This Inner Loop Header: Depth=1 @@ -12673,12 +13157,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.719: # %middle.block2566 beq $a1, $a2, .LBB8_722 .LBB8_720: # %.lr.ph.i844.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_721: # %.lr.ph.i844 # =>This Inner Loop Header: Depth=1 @@ -12767,12 +13257,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.727: # %middle.block2582 beq $a0, $a1, .LBB8_1187 .LBB8_728: # %.lr.ph.i852.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_729: # %.lr.ph.i852 # =>This Inner Loop Header: Depth=1 @@ -12847,12 +13343,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.732: # %middle.block2966 beq $a1, $a2, .LBB8_735 .LBB8_733: # %.lr.ph.i628.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_734: # %.lr.ph.i628 # =>This Inner Loop Header: Depth=1 @@ -12941,12 +13443,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.740: # %middle.block2982 beq $a1, $a2, .LBB8_743 .LBB8_741: # %.lr.ph.i636.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_742: # %.lr.ph.i636 # =>This Inner Loop Header: Depth=1 @@ -13035,12 +13543,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.748: # %middle.block2998 beq $a1, $a2, .LBB8_751 .LBB8_749: # %.lr.ph.i644.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_750: # %.lr.ph.i644 # =>This Inner Loop Header: Depth=1 @@ -13129,12 +13643,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.756: # %middle.block3014 beq $a0, $a1, .LBB8_1187 .LBB8_757: # %.lr.ph.i652.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_758: # %.lr.ph.i652 # =>This Inner Loop Header: Depth=1 @@ -13209,12 +13729,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.761: # %middle.block3403 beq $a1, $a2, .LBB8_764 .LBB8_762: # %.lr.ph.i392.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_763: # %.lr.ph.i392 # =>This Inner Loop Header: Depth=1 @@ -13303,12 +13829,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.769: # %middle.block3419 beq $a1, $a2, .LBB8_772 .LBB8_770: # %.lr.ph.i400.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_771: # %.lr.ph.i400 # =>This Inner Loop Header: Depth=1 @@ -13397,12 +13929,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.777: # %middle.block3435 beq $a1, $a2, .LBB8_780 .LBB8_778: # %.lr.ph.i408.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_779: # %.lr.ph.i408 # =>This Inner Loop Header: Depth=1 @@ -13491,12 +14029,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.785: # %middle.block3451 beq $a1, $a2, .LBB8_788 .LBB8_786: # %.lr.ph.i416.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_787: # %.lr.ph.i416 # =>This Inner Loop Header: Depth=1 @@ -13585,12 +14129,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.793: # %middle.block3467 beq $a0, $a1, .LBB8_1187 .LBB8_794: # %.lr.ph.i424.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_795: # %.lr.ph.i424 # =>This Inner Loop Header: Depth=1 @@ -13665,12 +14215,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.798: # %middle.block2742 beq $a1, $a2, .LBB8_801 .LBB8_799: # %.lr.ph.i732.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_800: # %.lr.ph.i732 # =>This Inner Loop Header: Depth=1 @@ -13759,12 +14315,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.806: # %middle.block2758 beq $a1, $a2, .LBB8_809 .LBB8_807: # %.lr.ph.i740.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_808: # %.lr.ph.i740 # =>This Inner Loop Header: Depth=1 @@ -13853,12 +14415,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.814: # %middle.block2774 beq $a1, $a2, .LBB8_817 .LBB8_815: # %.lr.ph.i748.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_816: # %.lr.ph.i748 # =>This Inner Loop Header: Depth=1 @@ -13947,12 +14515,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.822: # %middle.block2790 beq $a1, $a2, .LBB8_825 .LBB8_823: # %.lr.ph.i756.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_824: # %.lr.ph.i756 # =>This Inner Loop Header: Depth=1 @@ -14041,12 +14615,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.830: # %middle.block2806 beq $a0, $a1, .LBB8_1187 .LBB8_831: # %.lr.ph.i764.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_832: # %.lr.ph.i764 # =>This Inner Loop Header: Depth=1 @@ -14121,12 +14701,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.835: # %middle.block3126 beq $a1, $a2, .LBB8_838 .LBB8_836: # %.lr.ph.i539.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_837: # %.lr.ph.i539 # =>This Inner Loop Header: Depth=1 @@ -14215,12 +14801,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.843: # %middle.block3142 beq $a1, $a2, .LBB8_846 .LBB8_844: # %.lr.ph.i547.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_845: # %.lr.ph.i547 # =>This Inner Loop Header: Depth=1 @@ -14309,12 +14901,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.851: # %middle.block3158 beq $a1, $a2, .LBB8_854 .LBB8_852: # %.lr.ph.i555.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_853: # %.lr.ph.i555 # =>This Inner Loop Header: Depth=1 @@ -14403,12 +15001,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.859: # %middle.block3174 beq $a1, $a2, .LBB8_862 .LBB8_860: # %.lr.ph.i563.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_861: # %.lr.ph.i563 # =>This Inner Loop Header: Depth=1 @@ -14497,12 +15101,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.867: # %middle.block3190 beq $a0, $a1, .LBB8_1187 .LBB8_868: # %.lr.ph.i571.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_869: # %.lr.ph.i571 # =>This Inner Loop Header: Depth=1 @@ -14577,12 +15187,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.872: # %middle.block3286 beq $a1, $a2, .LBB8_875 .LBB8_873: # %.lr.ph.i483.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_874: # %.lr.ph.i483 # =>This Inner Loop Header: Depth=1 @@ -14671,12 +15287,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.880: # %middle.block3302 beq $a0, $a1, .LBB8_1187 .LBB8_881: # %.lr.ph.i491.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_882: # %.lr.ph.i491 # =>This Inner Loop Header: Depth=1 @@ -14751,12 +15373,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.885: # %middle.block1718 beq $a1, $a2, .LBB8_888 .LBB8_886: # %.lr.ph.i1254.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_887: # %.lr.ph.i1254 # =>This Inner Loop Header: Depth=1 @@ -14845,12 +15473,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.893: # %middle.block1734 beq $a1, $a2, .LBB8_896 .LBB8_894: # %.lr.ph.i1262.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_895: # %.lr.ph.i1262 # =>This Inner Loop Header: Depth=1 @@ -14939,12 +15573,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.901: # %middle.block1750 beq $a1, $a2, .LBB8_904 .LBB8_902: # %.lr.ph.i1270.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_903: # %.lr.ph.i1270 # =>This Inner Loop Header: Depth=1 @@ -15033,12 +15673,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.909: # %middle.block1766 beq $a1, $a2, .LBB8_912 .LBB8_910: # %.lr.ph.i1278.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_911: # %.lr.ph.i1278 # =>This Inner Loop Header: Depth=1 @@ -15127,12 +15773,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.917: # %middle.block1782 beq $a0, $a1, .LBB8_1187 .LBB8_918: # %.lr.ph.i1286.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_919: # %.lr.ph.i1286 # =>This Inner Loop Header: Depth=1 @@ -15153,41 +15805,39 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat bstrpick.d $a0, $a1, 30, 2 slli.d $a0, $a0, 2 vreplvei.d $vr1, $vr0, 0 - addi.d $a3, $a2, 16 - ori $a4, $zero, 0 - lu32i.d $a4, 1 - vreplgr2vr.d $vr2, $a4 - lu12i.w $a4, -419431 - ori $a4, $a4, 2458 - lu32i.d $a4, 104857 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr3, $a4 - lu12i.w $a4, -307024 - ori $a4, $a4, 3880 - lu32i.d $a4, 129446 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr4, $a4 - move $a4, $a0 + addi.d $a5, $a3, 16 + ori $a6, $zero, 0 + lu32i.d $a6, 1 + vreplgr2vr.d $vr2, $a6 + ori $a6, $a4, 2458 + lu32i.d $a6, 104857 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr3, $a6 + ori $a6, $a2, 3880 + lu32i.d $a6, 129446 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr4, $a6 + move $a6, $a0 .p2align 4, , 16 .LBB8_921: # %vector.body # =>This Inner Loop Header: Depth=1 vaddi.wu $vr5, $vr2, 2 - vpickve2gr.w $a5, $vr2, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr2, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr2, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa7, $a5 + vpickve2gr.w $a7, $vr2, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa7, $a7 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr6, 16 - vpickve2gr.w $a5, $vr5, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr5, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr5, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa5, $a5 + vpickve2gr.w $a7, $vr5, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa5, $a7 ffint.d.l $fa5, $fa5 vextrins.d $vr5, $vr6, 16 vfadd.d $vr6, $vr7, $vr3 @@ -15198,34 +15848,38 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat vfadd.d $vr5, $vr5, $vr4 vfdiv.d $vr6, $vr6, $vr7 vfdiv.d $vr5, $vr8, $vr5 - vst $vr6, $a3, -16 - vst $vr5, $a3, 0 + vst $vr6, $a5, -16 + vst $vr5, $a5, 0 vaddi.wu $vr2, $vr2, 4 - addi.d $a4, $a4, -4 - addi.d $a3, $a3, 32 - bnez $a4, .LBB8_921 + addi.d $a6, $a6, -4 + addi.d $a5, $a5, 32 + bnez $a6, .LBB8_921 # %bb.922: # %middle.block beq $a0, $a1, .LBB8_1187 .LBB8_923: # %.lr.ph.i1342.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $a2, 3 + alsl.d $a3, $a0, $a3, 3 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + ori $a2, $a2, 3880 + lu32i.d $a2, 129446 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa2, $a2 .p2align 4, , 16 .LBB8_924: # %.lr.ph.i1342 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a0, 31, 0 - movgr2fr.d $fa3, $a3 + bstrpick.d $a2, $a0, 31, 0 + movgr2fr.d $fa3, $a2 ffint.d.l $fa3, $fa3 fadd.d $fa4, $fa3, $fa1 fmul.d $fa4, $fa0, $fa4 fadd.d $fa3, $fa3, $fa2 fdiv.d $fa3, $fa4, $fa3 - fst.d $fa3, $a2, 0 + fst.d $fa3, $a3, 0 addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + addi.d $a3, $a3, 8 addi.w $a0, $a0, 1 bnez $a1, .LBB8_924 b .LBB8_1187 @@ -15287,12 +15941,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.927: # %middle.block2534 beq $a1, $a2, .LBB8_930 .LBB8_928: # %.lr.ph.i860.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_929: # %.lr.ph.i860 # =>This Inner Loop Header: Depth=1 @@ -15381,12 +16041,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.935: # %middle.block2550 beq $a0, $a1, .LBB8_1187 .LBB8_936: # %.lr.ph.i868.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_937: # %.lr.ph.i868 # =>This Inner Loop Header: Depth=1 @@ -15461,12 +16127,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.940: # %middle.block2630 beq $a1, $a2, .LBB8_943 .LBB8_941: # %.lr.ph.i772.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_942: # %.lr.ph.i772 # =>This Inner Loop Header: Depth=1 @@ -15555,12 +16227,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.948: # %middle.block2646 beq $a1, $a2, .LBB8_951 .LBB8_949: # %.lr.ph.i780.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_950: # %.lr.ph.i780 # =>This Inner Loop Header: Depth=1 @@ -15649,12 +16327,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.956: # %middle.block2662 beq $a1, $a2, .LBB8_959 .LBB8_957: # %.lr.ph.i788.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_958: # %.lr.ph.i788 # =>This Inner Loop Header: Depth=1 @@ -15743,12 +16427,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.964: # %middle.block2678 beq $a1, $a2, .LBB8_967 .LBB8_965: # %.lr.ph.i796.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_966: # %.lr.ph.i796 # =>This Inner Loop Header: Depth=1 @@ -15837,12 +16527,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.972: # %middle.block2694 beq $a1, $a2, .LBB8_975 .LBB8_973: # %.lr.ph.i804.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_974: # %.lr.ph.i804 # =>This Inner Loop Header: Depth=1 @@ -15931,12 +16627,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.980: # %middle.block2710 beq $a1, $a2, .LBB8_983 .LBB8_981: # %.lr.ph.i812.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_982: # %.lr.ph.i812 # =>This Inner Loop Header: Depth=1 @@ -16025,12 +16727,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.988: # %middle.block2726 beq $a0, $a1, .LBB8_1187 .LBB8_989: # %.lr.ph.i820.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_990: # %.lr.ph.i820 # =>This Inner Loop Header: Depth=1 @@ -16105,12 +16813,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.993: # %middle.block2502 beq $a1, $a2, .LBB8_996 .LBB8_994: # %.lr.ph.i876.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_995: # %.lr.ph.i876 # =>This Inner Loop Header: Depth=1 @@ -16199,12 +16913,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1001: # %middle.block2518 beq $a0, $a1, .LBB8_1187 .LBB8_1002: # %.lr.ph.i884.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1003: # %.lr.ph.i884 # =>This Inner Loop Header: Depth=1 @@ -16279,12 +16999,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1006: # %middle.block1846 beq $a1, $a2, .LBB8_1009 .LBB8_1007: # %.lr.ph.i1150.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1008: # %.lr.ph.i1150 # =>This Inner Loop Header: Depth=1 @@ -16373,12 +17099,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1014: # %middle.block1862 beq $a1, $a2, .LBB8_1017 .LBB8_1015: # %.lr.ph.i1158.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1016: # %.lr.ph.i1158 # =>This Inner Loop Header: Depth=1 @@ -16467,12 +17199,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1022: # %middle.block1878 beq $a1, $a2, .LBB8_1025 .LBB8_1023: # %.lr.ph.i1166.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1024: # %.lr.ph.i1166 # =>This Inner Loop Header: Depth=1 @@ -16561,12 +17299,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1030: # %middle.block1894 beq $a1, $a2, .LBB8_1033 .LBB8_1031: # %.lr.ph.i1174.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1032: # %.lr.ph.i1174 # =>This Inner Loop Header: Depth=1 @@ -16655,12 +17399,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1038: # %middle.block1910 beq $a1, $a2, .LBB8_1041 .LBB8_1039: # %.lr.ph.i1182.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1040: # %.lr.ph.i1182 # =>This Inner Loop Header: Depth=1 @@ -16749,12 +17499,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1046: # %middle.block1926 beq $a1, $a2, .LBB8_1049 .LBB8_1047: # %.lr.ph.i1190.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1048: # %.lr.ph.i1190 # =>This Inner Loop Header: Depth=1 @@ -16843,12 +17599,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1054: # %middle.block1942 beq $a1, $a2, .LBB8_1057 .LBB8_1055: # %.lr.ph.i1198.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1056: # %.lr.ph.i1198 # =>This Inner Loop Header: Depth=1 @@ -16937,12 +17699,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1062: # %middle.block1958 beq $a1, $a2, .LBB8_1065 .LBB8_1063: # %.lr.ph.i1206.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1064: # %.lr.ph.i1206 # =>This Inner Loop Header: Depth=1 @@ -17031,12 +17799,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1070: # %middle.block1974 beq $a1, $a2, .LBB8_1073 .LBB8_1071: # %.lr.ph.i1214.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1072: # %.lr.ph.i1214 # =>This Inner Loop Header: Depth=1 @@ -17125,12 +17899,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1078: # %middle.block1990 beq $a0, $a1, .LBB8_1187 .LBB8_1079: # %.lr.ph.i1222.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1080: # %.lr.ph.i1222 # =>This Inner Loop Header: Depth=1 @@ -17205,12 +17985,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1083: # %middle.block1798 beq $a1, $a2, .LBB8_1086 .LBB8_1084: # %.lr.ph.i1230.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1085: # %.lr.ph.i1230 # =>This Inner Loop Header: Depth=1 @@ -17299,12 +18085,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1091: # %middle.block1814 beq $a1, $a2, .LBB8_1094 .LBB8_1092: # %.lr.ph.i1238.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1093: # %.lr.ph.i1238 # =>This Inner Loop Header: Depth=1 @@ -17393,12 +18185,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1099: # %middle.block1830 beq $a0, $a1, .LBB8_1187 .LBB8_1100: # %.lr.ph.i1246.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1101: # %.lr.ph.i1246 # =>This Inner Loop Header: Depth=1 @@ -17473,12 +18271,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1104: # %middle.block2070 beq $a1, $a2, .LBB8_1107 .LBB8_1105: # %.lr.ph.i1030.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1106: # %.lr.ph.i1030 # =>This Inner Loop Header: Depth=1 @@ -17567,12 +18371,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1112: # %middle.block2086 beq $a1, $a2, .LBB8_1115 .LBB8_1113: # %.lr.ph.i1038.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1114: # %.lr.ph.i1038 # =>This Inner Loop Header: Depth=1 @@ -17661,12 +18471,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1120: # %middle.block2102 beq $a1, $a2, .LBB8_1123 .LBB8_1121: # %.lr.ph.i1046.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1122: # %.lr.ph.i1046 # =>This Inner Loop Header: Depth=1 @@ -17755,12 +18571,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1128: # %middle.block2118 beq $a1, $a2, .LBB8_1131 .LBB8_1129: # %.lr.ph.i1054.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1130: # %.lr.ph.i1054 # =>This Inner Loop Header: Depth=1 @@ -17849,12 +18671,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1136: # %middle.block2134 beq $a1, $a2, .LBB8_1139 .LBB8_1137: # %.lr.ph.i1062.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1138: # %.lr.ph.i1062 # =>This Inner Loop Header: Depth=1 @@ -17943,12 +18771,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1144: # %middle.block2150 beq $a1, $a2, .LBB8_1147 .LBB8_1145: # %.lr.ph.i1070.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1146: # %.lr.ph.i1070 # =>This Inner Loop Header: Depth=1 @@ -18037,12 +18871,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1152: # %middle.block2166 beq $a1, $a2, .LBB8_1155 .LBB8_1153: # %.lr.ph.i1078.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1154: # %.lr.ph.i1078 # =>This Inner Loop Header: Depth=1 @@ -18131,12 +18971,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1160: # %middle.block2182 beq $a1, $a2, .LBB8_1163 .LBB8_1161: # %.lr.ph.i1086.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1162: # %.lr.ph.i1086 # =>This Inner Loop Header: Depth=1 @@ -18225,12 +19071,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1168: # %middle.block2198 beq $a1, $a2, .LBB8_1171 .LBB8_1169: # %.lr.ph.i1094.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1170: # %.lr.ph.i1094 # =>This Inner Loop Header: Depth=1 @@ -18319,12 +19171,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1176: # %middle.block2214 beq $a1, $a2, .LBB8_1179 .LBB8_1177: # %.lr.ph.i1102.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1178: # %.lr.ph.i1102 # =>This Inner Loop Header: Depth=1 @@ -18413,12 +19271,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1184: # %middle.block2230 beq $a0, $a1, .LBB8_1187 .LBB8_1185: # %.lr.ph.i1110.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1186: # %.lr.ph.i1110 # =>This Inner Loop Header: Depth=1 @@ -18489,15 +19353,9 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat .LCPI9_0: .dword 0x3fb999999999999a # double 0.10000000000000001 .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI9_3: +.LCPI9_1: .dword 0x3fc999999999999a # double 0.20000000000000001 .dword 0x3fd3333333333333 # double 0.29999999999999999 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI9_1: - .dword 0x3ff199999999999a # double 1.1000000000000001 -.LCPI9_2: - .dword 0x3ff1f9a6b50b0f28 # double 1.1234500000000001 .text .globl _Z8loopInitj .p2align 5 @@ -18743,8 +19601,8 @@ _Z8loopInitj: # @_Z8loopInitj .LBB9_40: pcalau12i $a0, %pc_hi20(.LCPI9_0) addi.d $a0, $a0, %pc_lo12(.LCPI9_0) - pcalau12i $a1, %pc_hi20(.LCPI9_3) - addi.d $a1, $a1, %pc_lo12(.LCPI9_3) + pcalau12i $a1, %pc_hi20(.LCPI9_1) + addi.d $a1, $a1, %pc_lo12(.LCPI9_1) ld.w $a3, $s0, 1032 blez $a3, .LBB9_577 # %bb.41: # %.lr.ph.preheader.i429 @@ -18958,8 +19816,10 @@ _Z8loopInitj: # @_Z8loopInitj pcalau12i $a2, %pc_hi20(.LCPI9_0) addi.d $a2, $a2, %pc_lo12(.LCPI9_0) fldx.d $fa0, $a2, $a0 - ld.d $a2, $s0, 472 + ld.d $a3, $s0, 472 ori $a0, $zero, 4 + lu12i.w $a4, -419431 + lu12i.w $a2, -307024 bgeu $a1, $a0, .LBB9_920 # %bb.78: move $a0, $zero @@ -19148,12 +20008,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.102: # %middle.block3802 beq $a1, $a2, .LBB9_105 .LBB9_103: # %.lr.ph.i183.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_104: # %.lr.ph.i183 # =>This Inner Loop Header: Depth=1 @@ -19242,12 +20108,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.110: # %middle.block3818 beq $a1, $a2, .LBB9_113 .LBB9_111: # %.lr.ph.i191.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_112: # %.lr.ph.i191 # =>This Inner Loop Header: Depth=1 @@ -19336,12 +20208,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.118: # %middle.block3834 beq $a1, $a2, .LBB9_121 .LBB9_119: # %.lr.ph.i199.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_120: # %.lr.ph.i199 # =>This Inner Loop Header: Depth=1 @@ -19430,12 +20308,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.126: # %middle.block3850 beq $a1, $a2, .LBB9_129 .LBB9_127: # %.lr.ph.i207.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_128: # %.lr.ph.i207 # =>This Inner Loop Header: Depth=1 @@ -19524,12 +20408,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.134: # %middle.block3866 beq $a1, $a2, .LBB9_137 .LBB9_135: # %.lr.ph.i215.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_136: # %.lr.ph.i215 # =>This Inner Loop Header: Depth=1 @@ -19618,12 +20508,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.142: # %middle.block3882 beq $a0, $a1, .LBB9_1187 .LBB9_143: # %.lr.ph.i223.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_144: # %.lr.ph.i223 # =>This Inner Loop Header: Depth=1 @@ -19698,12 +20594,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.147: # %middle.block3546 beq $a1, $a2, .LBB9_150 .LBB9_148: # %.lr.ph.i231.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_149: # %.lr.ph.i231 # =>This Inner Loop Header: Depth=1 @@ -19792,12 +20694,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.155: # %middle.block3562 beq $a1, $a2, .LBB9_158 .LBB9_156: # %.lr.ph.i239.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_157: # %.lr.ph.i239 # =>This Inner Loop Header: Depth=1 @@ -19886,12 +20794,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.163: # %middle.block3578 beq $a1, $a2, .LBB9_166 .LBB9_164: # %.lr.ph.i247.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_165: # %.lr.ph.i247 # =>This Inner Loop Header: Depth=1 @@ -19980,12 +20894,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.171: # %middle.block3594 beq $a1, $a2, .LBB9_174 .LBB9_172: # %.lr.ph.i255.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_173: # %.lr.ph.i255 # =>This Inner Loop Header: Depth=1 @@ -20074,12 +20994,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.179: # %middle.block3610 beq $a1, $a2, .LBB9_182 .LBB9_180: # %.lr.ph.i263.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_181: # %.lr.ph.i263 # =>This Inner Loop Header: Depth=1 @@ -20168,12 +21094,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.187: # %middle.block3626 beq $a1, $a2, .LBB9_190 .LBB9_188: # %.lr.ph.i271.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_189: # %.lr.ph.i271 # =>This Inner Loop Header: Depth=1 @@ -20262,12 +21194,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.195: # %middle.block3642 beq $a1, $a2, .LBB9_198 .LBB9_196: # %.lr.ph.i279.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_197: # %.lr.ph.i279 # =>This Inner Loop Header: Depth=1 @@ -20356,12 +21294,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.203: # %middle.block3658 beq $a1, $a2, .LBB9_206 .LBB9_204: # %.lr.ph.i287.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_205: # %.lr.ph.i287 # =>This Inner Loop Header: Depth=1 @@ -20450,12 +21394,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.211: # %middle.block3674 beq $a1, $a2, .LBB9_214 .LBB9_212: # %.lr.ph.i295.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_213: # %.lr.ph.i295 # =>This Inner Loop Header: Depth=1 @@ -20544,12 +21494,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.219: # %middle.block3690 beq $a1, $a2, .LBB9_222 .LBB9_220: # %.lr.ph.i303.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_221: # %.lr.ph.i303 # =>This Inner Loop Header: Depth=1 @@ -20638,12 +21594,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.227: # %middle.block3706 beq $a1, $a2, .LBB9_230 .LBB9_228: # %.lr.ph.i311.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_229: # %.lr.ph.i311 # =>This Inner Loop Header: Depth=1 @@ -20732,12 +21694,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.235: # %middle.block3722 beq $a1, $a2, .LBB9_238 .LBB9_236: # %.lr.ph.i319.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_237: # %.lr.ph.i319 # =>This Inner Loop Header: Depth=1 @@ -20826,12 +21794,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.243: # %middle.block3738 beq $a1, $a2, .LBB9_246 .LBB9_244: # %.lr.ph.i327.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_245: # %.lr.ph.i327 # =>This Inner Loop Header: Depth=1 @@ -20920,12 +21894,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.251: # %middle.block3754 beq $a1, $a2, .LBB9_254 .LBB9_252: # %.lr.ph.i335.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_253: # %.lr.ph.i335 # =>This Inner Loop Header: Depth=1 @@ -21014,12 +21994,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.259: # %middle.block3770 beq $a1, $a2, .LBB9_262 .LBB9_260: # %.lr.ph.i343.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_261: # %.lr.ph.i343 # =>This Inner Loop Header: Depth=1 @@ -21108,12 +22094,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.267: # %middle.block3786 beq $a0, $a1, .LBB9_1187 .LBB9_268: # %.lr.ph.i351.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_269: # %.lr.ph.i351 # =>This Inner Loop Header: Depth=1 @@ -21210,12 +22202,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.274: # %middle.block2005 beq $a1, $a2, .LBB9_277 .LBB9_275: # %.lr.ph.i1117.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_276: # %.lr.ph.i1117 # =>This Inner Loop Header: Depth=1 @@ -21304,12 +22302,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.282: # %middle.block2021 beq $a1, $a2, .LBB9_285 .LBB9_283: # %.lr.ph.i1125.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_284: # %.lr.ph.i1125 # =>This Inner Loop Header: Depth=1 @@ -21398,12 +22402,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.290: # %middle.block2037 beq $a1, $a2, .LBB9_293 .LBB9_291: # %.lr.ph.i1133.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_292: # %.lr.ph.i1133 # =>This Inner Loop Header: Depth=1 @@ -21492,12 +22502,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.298: # %middle.block2053 beq $a0, $a1, .LBB9_1187 .LBB9_299: # %.lr.ph.i1141.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_300: # %.lr.ph.i1141 # =>This Inner Loop Header: Depth=1 @@ -21572,12 +22588,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.303: # %middle.block2869 beq $a1, $a2, .LBB9_306 .LBB9_304: # %.lr.ph.i691.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_305: # %.lr.ph.i691 # =>This Inner Loop Header: Depth=1 @@ -21666,12 +22688,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.311: # %middle.block2885 beq $a0, $a1, .LBB9_1187 .LBB9_312: # %.lr.ph.i699.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_313: # %.lr.ph.i699 # =>This Inner Loop Header: Depth=1 @@ -21746,12 +22774,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.316: # %middle.block2933 beq $a1, $a2, .LBB9_319 .LBB9_317: # %.lr.ph.i659.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_318: # %.lr.ph.i659 # =>This Inner Loop Header: Depth=1 @@ -21840,12 +22874,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.324: # %middle.block2949 beq $a0, $a1, .LBB9_1187 .LBB9_325: # %.lr.ph.i667.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_326: # %.lr.ph.i667 # =>This Inner Loop Header: Depth=1 @@ -21920,12 +22960,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.329: # %middle.block3045 beq $a1, $a2, .LBB9_332 .LBB9_330: # %.lr.ph.i578.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_331: # %.lr.ph.i578 # =>This Inner Loop Header: Depth=1 @@ -22014,12 +23060,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.337: # %middle.block3061 beq $a1, $a2, .LBB9_340 .LBB9_338: # %.lr.ph.i586.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_339: # %.lr.ph.i586 # =>This Inner Loop Header: Depth=1 @@ -22108,12 +23160,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.345: # %middle.block3077 beq $a1, $a2, .LBB9_348 .LBB9_346: # %.lr.ph.i594.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_347: # %.lr.ph.i594 # =>This Inner Loop Header: Depth=1 @@ -22202,12 +23260,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.353: # %middle.block3093 beq $a1, $a2, .LBB9_356 .LBB9_354: # %.lr.ph.i602.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_355: # %.lr.ph.i602 # =>This Inner Loop Header: Depth=1 @@ -22296,12 +23360,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.361: # %middle.block3109 beq $a0, $a1, .LBB9_1187 .LBB9_362: # %.lr.ph.i610.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_363: # %.lr.ph.i610 # =>This Inner Loop Header: Depth=1 @@ -22376,12 +23446,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.366: # %middle.block3029 beq $a0, $a1, .LBB9_1187 .LBB9_367: # %.lr.ph.i619.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_368: # %.lr.ph.i619 # =>This Inner Loop Header: Depth=1 @@ -22456,12 +23532,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.371: # %middle.block3898 beq $a1, $a2, .LBB9_374 .LBB9_372: # %.lr.ph.i.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_373: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 @@ -22550,12 +23632,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.379: # %middle.block3914 beq $a1, $a2, .LBB9_382 .LBB9_380: # %.lr.ph.i167.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_381: # %.lr.ph.i167 # =>This Inner Loop Header: Depth=1 @@ -22644,12 +23732,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.387: # %middle.block3930 beq $a0, $a1, .LBB9_1187 .LBB9_388: # %.lr.ph.i175.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_389: # %.lr.ph.i175 # =>This Inner Loop Header: Depth=1 @@ -22724,12 +23818,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.392: # %middle.block3205 beq $a1, $a2, .LBB9_395 .LBB9_393: # %.lr.ph.i498.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_394: # %.lr.ph.i498 # =>This Inner Loop Header: Depth=1 @@ -22818,12 +23918,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.400: # %middle.block3221 beq $a1, $a2, .LBB9_403 .LBB9_401: # %.lr.ph.i506.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_402: # %.lr.ph.i506 # =>This Inner Loop Header: Depth=1 @@ -22912,12 +24018,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.408: # %middle.block3237 beq $a1, $a2, .LBB9_411 .LBB9_409: # %.lr.ph.i514.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_410: # %.lr.ph.i514 # =>This Inner Loop Header: Depth=1 @@ -23006,12 +24118,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.416: # %middle.block3253 beq $a1, $a2, .LBB9_419 .LBB9_417: # %.lr.ph.i522.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_418: # %.lr.ph.i522 # =>This Inner Loop Header: Depth=1 @@ -23100,12 +24218,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.424: # %middle.block3269 beq $a0, $a1, .LBB9_1187 .LBB9_425: # %.lr.ph.i530.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_426: # %.lr.ph.i530 # =>This Inner Loop Header: Depth=1 @@ -23180,12 +24304,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.429: # %middle.block2245 beq $a1, $a2, .LBB9_432 .LBB9_430: # %.lr.ph.i943.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_431: # %.lr.ph.i943 # =>This Inner Loop Header: Depth=1 @@ -23274,12 +24404,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.437: # %middle.block2261 beq $a1, $a2, .LBB9_440 .LBB9_438: # %.lr.ph.i951.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_439: # %.lr.ph.i951 # =>This Inner Loop Header: Depth=1 @@ -23368,12 +24504,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.445: # %middle.block2277 beq $a1, $a2, .LBB9_448 .LBB9_446: # %.lr.ph.i959.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_447: # %.lr.ph.i959 # =>This Inner Loop Header: Depth=1 @@ -23462,12 +24604,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.453: # %middle.block2293 beq $a1, $a2, .LBB9_456 .LBB9_454: # %.lr.ph.i967.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_455: # %.lr.ph.i967 # =>This Inner Loop Header: Depth=1 @@ -23556,12 +24704,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.461: # %middle.block2309 beq $a1, $a2, .LBB9_464 .LBB9_462: # %.lr.ph.i975.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_463: # %.lr.ph.i975 # =>This Inner Loop Header: Depth=1 @@ -23650,12 +24804,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.469: # %middle.block2325 beq $a1, $a2, .LBB9_472 .LBB9_470: # %.lr.ph.i983.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_471: # %.lr.ph.i983 # =>This Inner Loop Header: Depth=1 @@ -23744,12 +24904,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.477: # %middle.block2341 beq $a1, $a2, .LBB9_480 .LBB9_478: # %.lr.ph.i991.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_479: # %.lr.ph.i991 # =>This Inner Loop Header: Depth=1 @@ -23838,12 +25004,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.485: # %middle.block2357 beq $a1, $a2, .LBB9_488 .LBB9_486: # %.lr.ph.i999.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_487: # %.lr.ph.i999 # =>This Inner Loop Header: Depth=1 @@ -23932,12 +25104,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.493: # %middle.block2373 beq $a1, $a2, .LBB9_496 .LBB9_494: # %.lr.ph.i1007.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_495: # %.lr.ph.i1007 # =>This Inner Loop Header: Depth=1 @@ -24026,12 +25204,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.501: # %middle.block2389 beq $a0, $a1, .LBB9_504 .LBB9_502: # %.lr.ph.i1015.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_503: # %.lr.ph.i1015 # =>This Inner Loop Header: Depth=1 @@ -24136,12 +25320,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.512: # %middle.block2405 beq $a0, $a1, .LBB9_515 .LBB9_513: # %.lr.ph.i891.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_514: # %.lr.ph.i891 # =>This Inner Loop Header: Depth=1 @@ -24230,12 +25420,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.520: # %middle.block2421 beq $a0, $a1, .LBB9_523 .LBB9_521: # %.lr.ph.i899.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_522: # %.lr.ph.i899 # =>This Inner Loop Header: Depth=1 @@ -24324,12 +25520,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.528: # %middle.block2437 beq $a0, $a1, .LBB9_531 .LBB9_529: # %.lr.ph.i907.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_530: # %.lr.ph.i907 # =>This Inner Loop Header: Depth=1 @@ -24418,12 +25620,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.536: # %middle.block2453 beq $a0, $a1, .LBB9_539 .LBB9_537: # %.lr.ph.i915.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_538: # %.lr.ph.i915 # =>This Inner Loop Header: Depth=1 @@ -24512,12 +25720,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.544: # %middle.block2469 beq $a0, $a1, .LBB9_547 .LBB9_545: # %.lr.ph.i923.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_546: # %.lr.ph.i923 # =>This Inner Loop Header: Depth=1 @@ -24624,12 +25838,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.556: # %middle.block2485 beq $a0, $a1, .LBB9_1187 .LBB9_557: # %.lr.ph.i935.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_558: # %.lr.ph.i935 # =>This Inner Loop Header: Depth=1 @@ -24704,12 +25924,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.561: # %middle.block2901 beq $a1, $a2, .LBB9_564 .LBB9_562: # %.lr.ph.i675.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_563: # %.lr.ph.i675 # =>This Inner Loop Header: Depth=1 @@ -24798,12 +26024,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.569: # %middle.block2917 beq $a0, $a1, .LBB9_1187 .LBB9_570: # %.lr.ph.i683.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_571: # %.lr.ph.i683 # =>This Inner Loop Header: Depth=1 @@ -24869,13 +26101,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.574: # %middle.block3318 beq $a2, $a3, .LBB9_577 .LBB9_575: # %.lr.ph.i431.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_576: # %.lr.ph.i431 # =>This Inner Loop Header: Depth=1 @@ -24959,13 +26197,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.582: # %middle.block3335 beq $a2, $a3, .LBB9_585 .LBB9_583: # %.lr.ph.i440.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_584: # %.lr.ph.i440 # =>This Inner Loop Header: Depth=1 @@ -25049,13 +26293,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.590: # %middle.block3352 beq $a2, $a3, .LBB9_593 .LBB9_591: # %.lr.ph.i451.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_592: # %.lr.ph.i451 # =>This Inner Loop Header: Depth=1 @@ -25139,13 +26389,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.598: # %middle.block3369 beq $a2, $a3, .LBB9_601 .LBB9_599: # %.lr.ph.i462.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_600: # %.lr.ph.i462 # =>This Inner Loop Header: Depth=1 @@ -25229,13 +26485,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.606: # %middle.block3386 beq $a0, $a2, .LBB9_1187 .LBB9_607: # %.lr.ph.i473.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a3, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a0 alsl.d $a1, $a0, $a1, 4 addi.d $a1, $a1, 8 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa3, $a3 .p2align 4, , 16 .LBB9_608: # %.lr.ph.i473 # =>This Inner Loop Header: Depth=1 @@ -25313,12 +26575,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.611: # %middle.block1621 beq $a1, $a2, .LBB9_614 .LBB9_612: # %.lr.ph.i1293.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_613: # %.lr.ph.i1293 # =>This Inner Loop Header: Depth=1 @@ -25407,12 +26675,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.619: # %middle.block1637 beq $a1, $a2, .LBB9_622 .LBB9_620: # %.lr.ph.i1301.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_621: # %.lr.ph.i1301 # =>This Inner Loop Header: Depth=1 @@ -25501,12 +26775,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.627: # %middle.block1653 beq $a1, $a2, .LBB9_630 .LBB9_628: # %.lr.ph.i1309.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_629: # %.lr.ph.i1309 # =>This Inner Loop Header: Depth=1 @@ -25595,12 +26875,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.635: # %middle.block1669 beq $a1, $a2, .LBB9_638 .LBB9_636: # %.lr.ph.i1317.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_637: # %.lr.ph.i1317 # =>This Inner Loop Header: Depth=1 @@ -25689,12 +26975,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.643: # %middle.block1685 beq $a1, $a2, .LBB9_646 .LBB9_644: # %.lr.ph.i1325.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_645: # %.lr.ph.i1325 # =>This Inner Loop Header: Depth=1 @@ -25783,12 +27075,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.651: # %middle.block1701 beq $a0, $a1, .LBB9_1187 .LBB9_652: # %.lr.ph.i1333.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_653: # %.lr.ph.i1333 # =>This Inner Loop Header: Depth=1 @@ -25863,12 +27161,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.656: # %middle.block2821 beq $a1, $a2, .LBB9_659 .LBB9_657: # %.lr.ph.i707.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_658: # %.lr.ph.i707 # =>This Inner Loop Header: Depth=1 @@ -25957,12 +27261,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.664: # %middle.block2837 beq $a1, $a2, .LBB9_667 .LBB9_665: # %.lr.ph.i715.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_666: # %.lr.ph.i715 # =>This Inner Loop Header: Depth=1 @@ -26051,12 +27361,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.672: # %middle.block2853 beq $a0, $a1, .LBB9_1187 .LBB9_673: # %.lr.ph.i723.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_674: # %.lr.ph.i723 # =>This Inner Loop Header: Depth=1 @@ -26131,12 +27447,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.677: # %middle.block2597 beq $a1, $a2, .LBB9_680 .LBB9_678: # %.lr.ph.i827.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_679: # %.lr.ph.i827 # =>This Inner Loop Header: Depth=1 @@ -26225,12 +27547,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.685: # %middle.block2613 beq $a0, $a1, .LBB9_1187 .LBB9_686: # %.lr.ph.i835.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_687: # %.lr.ph.i835 # =>This Inner Loop Header: Depth=1 @@ -26305,12 +27633,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.690: # %middle.block3482 beq $a1, $a2, .LBB9_693 .LBB9_691: # %.lr.ph.i359.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_692: # %.lr.ph.i359 # =>This Inner Loop Header: Depth=1 @@ -26399,12 +27733,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.698: # %middle.block3498 beq $a1, $a2, .LBB9_701 .LBB9_699: # %.lr.ph.i367.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_700: # %.lr.ph.i367 # =>This Inner Loop Header: Depth=1 @@ -26493,12 +27833,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.706: # %middle.block3514 beq $a1, $a2, .LBB9_709 .LBB9_707: # %.lr.ph.i375.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_708: # %.lr.ph.i375 # =>This Inner Loop Header: Depth=1 @@ -26587,12 +27933,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.714: # %middle.block3530 beq $a0, $a1, .LBB9_1187 .LBB9_715: # %.lr.ph.i383.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_716: # %.lr.ph.i383 # =>This Inner Loop Header: Depth=1 @@ -26667,12 +28019,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.719: # %middle.block2565 beq $a1, $a2, .LBB9_722 .LBB9_720: # %.lr.ph.i843.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_721: # %.lr.ph.i843 # =>This Inner Loop Header: Depth=1 @@ -26761,12 +28119,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.727: # %middle.block2581 beq $a0, $a1, .LBB9_1187 .LBB9_728: # %.lr.ph.i851.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_729: # %.lr.ph.i851 # =>This Inner Loop Header: Depth=1 @@ -26841,12 +28205,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.732: # %middle.block2965 beq $a1, $a2, .LBB9_735 .LBB9_733: # %.lr.ph.i627.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_734: # %.lr.ph.i627 # =>This Inner Loop Header: Depth=1 @@ -26935,12 +28305,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.740: # %middle.block2981 beq $a1, $a2, .LBB9_743 .LBB9_741: # %.lr.ph.i635.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_742: # %.lr.ph.i635 # =>This Inner Loop Header: Depth=1 @@ -27029,12 +28405,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.748: # %middle.block2997 beq $a1, $a2, .LBB9_751 .LBB9_749: # %.lr.ph.i643.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_750: # %.lr.ph.i643 # =>This Inner Loop Header: Depth=1 @@ -27123,12 +28505,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.756: # %middle.block3013 beq $a0, $a1, .LBB9_1187 .LBB9_757: # %.lr.ph.i651.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_758: # %.lr.ph.i651 # =>This Inner Loop Header: Depth=1 @@ -27203,12 +28591,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.761: # %middle.block3402 beq $a1, $a2, .LBB9_764 .LBB9_762: # %.lr.ph.i391.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_763: # %.lr.ph.i391 # =>This Inner Loop Header: Depth=1 @@ -27297,12 +28691,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.769: # %middle.block3418 beq $a1, $a2, .LBB9_772 .LBB9_770: # %.lr.ph.i399.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_771: # %.lr.ph.i399 # =>This Inner Loop Header: Depth=1 @@ -27391,12 +28791,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.777: # %middle.block3434 beq $a1, $a2, .LBB9_780 .LBB9_778: # %.lr.ph.i407.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_779: # %.lr.ph.i407 # =>This Inner Loop Header: Depth=1 @@ -27485,12 +28891,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.785: # %middle.block3450 beq $a1, $a2, .LBB9_788 .LBB9_786: # %.lr.ph.i415.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_787: # %.lr.ph.i415 # =>This Inner Loop Header: Depth=1 @@ -27579,12 +28991,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.793: # %middle.block3466 beq $a0, $a1, .LBB9_1187 .LBB9_794: # %.lr.ph.i423.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_795: # %.lr.ph.i423 # =>This Inner Loop Header: Depth=1 @@ -27659,12 +29077,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.798: # %middle.block2741 beq $a1, $a2, .LBB9_801 .LBB9_799: # %.lr.ph.i731.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_800: # %.lr.ph.i731 # =>This Inner Loop Header: Depth=1 @@ -27753,12 +29177,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.806: # %middle.block2757 beq $a1, $a2, .LBB9_809 .LBB9_807: # %.lr.ph.i739.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_808: # %.lr.ph.i739 # =>This Inner Loop Header: Depth=1 @@ -27847,12 +29277,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.814: # %middle.block2773 beq $a1, $a2, .LBB9_817 .LBB9_815: # %.lr.ph.i747.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_816: # %.lr.ph.i747 # =>This Inner Loop Header: Depth=1 @@ -27941,12 +29377,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.822: # %middle.block2789 beq $a1, $a2, .LBB9_825 .LBB9_823: # %.lr.ph.i755.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_824: # %.lr.ph.i755 # =>This Inner Loop Header: Depth=1 @@ -28035,12 +29477,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.830: # %middle.block2805 beq $a0, $a1, .LBB9_1187 .LBB9_831: # %.lr.ph.i763.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_832: # %.lr.ph.i763 # =>This Inner Loop Header: Depth=1 @@ -28115,12 +29563,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.835: # %middle.block3125 beq $a1, $a2, .LBB9_838 .LBB9_836: # %.lr.ph.i538.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_837: # %.lr.ph.i538 # =>This Inner Loop Header: Depth=1 @@ -28209,12 +29663,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.843: # %middle.block3141 beq $a1, $a2, .LBB9_846 .LBB9_844: # %.lr.ph.i546.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_845: # %.lr.ph.i546 # =>This Inner Loop Header: Depth=1 @@ -28303,12 +29763,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.851: # %middle.block3157 beq $a1, $a2, .LBB9_854 .LBB9_852: # %.lr.ph.i554.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_853: # %.lr.ph.i554 # =>This Inner Loop Header: Depth=1 @@ -28397,12 +29863,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.859: # %middle.block3173 beq $a1, $a2, .LBB9_862 .LBB9_860: # %.lr.ph.i562.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_861: # %.lr.ph.i562 # =>This Inner Loop Header: Depth=1 @@ -28491,12 +29963,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.867: # %middle.block3189 beq $a0, $a1, .LBB9_1187 .LBB9_868: # %.lr.ph.i570.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_869: # %.lr.ph.i570 # =>This Inner Loop Header: Depth=1 @@ -28571,12 +30049,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.872: # %middle.block3285 beq $a1, $a2, .LBB9_875 .LBB9_873: # %.lr.ph.i482.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_874: # %.lr.ph.i482 # =>This Inner Loop Header: Depth=1 @@ -28665,12 +30149,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.880: # %middle.block3301 beq $a0, $a1, .LBB9_1187 .LBB9_881: # %.lr.ph.i490.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_882: # %.lr.ph.i490 # =>This Inner Loop Header: Depth=1 @@ -28745,12 +30235,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.885: # %middle.block1717 beq $a1, $a2, .LBB9_888 .LBB9_886: # %.lr.ph.i1253.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_887: # %.lr.ph.i1253 # =>This Inner Loop Header: Depth=1 @@ -28839,12 +30335,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.893: # %middle.block1733 beq $a1, $a2, .LBB9_896 .LBB9_894: # %.lr.ph.i1261.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_895: # %.lr.ph.i1261 # =>This Inner Loop Header: Depth=1 @@ -28933,12 +30435,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.901: # %middle.block1749 beq $a1, $a2, .LBB9_904 .LBB9_902: # %.lr.ph.i1269.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_903: # %.lr.ph.i1269 # =>This Inner Loop Header: Depth=1 @@ -29027,12 +30535,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.909: # %middle.block1765 beq $a1, $a2, .LBB9_912 .LBB9_910: # %.lr.ph.i1277.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_911: # %.lr.ph.i1277 # =>This Inner Loop Header: Depth=1 @@ -29121,12 +30635,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.917: # %middle.block1781 beq $a0, $a1, .LBB9_1187 .LBB9_918: # %.lr.ph.i1285.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_919: # %.lr.ph.i1285 # =>This Inner Loop Header: Depth=1 @@ -29147,41 +30667,39 @@ _Z8loopInitj: # @_Z8loopInitj bstrpick.d $a0, $a1, 30, 2 slli.d $a0, $a0, 2 vreplvei.d $vr1, $vr0, 0 - addi.d $a3, $a2, 16 - ori $a4, $zero, 0 - lu32i.d $a4, 1 - vreplgr2vr.d $vr2, $a4 - lu12i.w $a4, -419431 - ori $a4, $a4, 2458 - lu32i.d $a4, 104857 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr3, $a4 - lu12i.w $a4, -307024 - ori $a4, $a4, 3880 - lu32i.d $a4, 129446 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr4, $a4 - move $a4, $a0 + addi.d $a5, $a3, 16 + ori $a6, $zero, 0 + lu32i.d $a6, 1 + vreplgr2vr.d $vr2, $a6 + ori $a6, $a4, 2458 + lu32i.d $a6, 104857 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr3, $a6 + ori $a6, $a2, 3880 + lu32i.d $a6, 129446 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr4, $a6 + move $a6, $a0 .p2align 4, , 16 .LBB9_921: # %vector.body # =>This Inner Loop Header: Depth=1 vaddi.wu $vr5, $vr2, 2 - vpickve2gr.w $a5, $vr2, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr2, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr2, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa7, $a5 + vpickve2gr.w $a7, $vr2, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa7, $a7 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr6, 16 - vpickve2gr.w $a5, $vr5, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr5, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr5, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa5, $a5 + vpickve2gr.w $a7, $vr5, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa5, $a7 ffint.d.l $fa5, $fa5 vextrins.d $vr5, $vr6, 16 vfadd.d $vr6, $vr7, $vr3 @@ -29192,34 +30710,38 @@ _Z8loopInitj: # @_Z8loopInitj vfadd.d $vr5, $vr5, $vr4 vfdiv.d $vr6, $vr6, $vr7 vfdiv.d $vr5, $vr8, $vr5 - vst $vr6, $a3, -16 - vst $vr5, $a3, 0 + vst $vr6, $a5, -16 + vst $vr5, $a5, 0 vaddi.wu $vr2, $vr2, 4 - addi.d $a4, $a4, -4 - addi.d $a3, $a3, 32 - bnez $a4, .LBB9_921 + addi.d $a6, $a6, -4 + addi.d $a5, $a5, 32 + bnez $a6, .LBB9_921 # %bb.922: # %middle.block beq $a0, $a1, .LBB9_1187 .LBB9_923: # %.lr.ph.i1341.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $a2, 3 + alsl.d $a3, $a0, $a3, 3 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + ori $a2, $a2, 3880 + lu32i.d $a2, 129446 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa2, $a2 .p2align 4, , 16 .LBB9_924: # %.lr.ph.i1341 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a0, 31, 0 - movgr2fr.d $fa3, $a3 + bstrpick.d $a2, $a0, 31, 0 + movgr2fr.d $fa3, $a2 ffint.d.l $fa3, $fa3 fadd.d $fa4, $fa3, $fa1 fmul.d $fa4, $fa0, $fa4 fadd.d $fa3, $fa3, $fa2 fdiv.d $fa3, $fa4, $fa3 - fst.d $fa3, $a2, 0 + fst.d $fa3, $a3, 0 addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + addi.d $a3, $a3, 8 addi.w $a0, $a0, 1 bnez $a1, .LBB9_924 b .LBB9_1187 @@ -29281,12 +30803,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.927: # %middle.block2533 beq $a1, $a2, .LBB9_930 .LBB9_928: # %.lr.ph.i859.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_929: # %.lr.ph.i859 # =>This Inner Loop Header: Depth=1 @@ -29375,12 +30903,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.935: # %middle.block2549 beq $a0, $a1, .LBB9_1187 .LBB9_936: # %.lr.ph.i867.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_937: # %.lr.ph.i867 # =>This Inner Loop Header: Depth=1 @@ -29455,12 +30989,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.940: # %middle.block2629 beq $a1, $a2, .LBB9_943 .LBB9_941: # %.lr.ph.i771.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_942: # %.lr.ph.i771 # =>This Inner Loop Header: Depth=1 @@ -29549,12 +31089,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.948: # %middle.block2645 beq $a1, $a2, .LBB9_951 .LBB9_949: # %.lr.ph.i779.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_950: # %.lr.ph.i779 # =>This Inner Loop Header: Depth=1 @@ -29643,12 +31189,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.956: # %middle.block2661 beq $a1, $a2, .LBB9_959 .LBB9_957: # %.lr.ph.i787.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_958: # %.lr.ph.i787 # =>This Inner Loop Header: Depth=1 @@ -29737,12 +31289,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.964: # %middle.block2677 beq $a1, $a2, .LBB9_967 .LBB9_965: # %.lr.ph.i795.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_966: # %.lr.ph.i795 # =>This Inner Loop Header: Depth=1 @@ -29831,12 +31389,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.972: # %middle.block2693 beq $a1, $a2, .LBB9_975 .LBB9_973: # %.lr.ph.i803.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_974: # %.lr.ph.i803 # =>This Inner Loop Header: Depth=1 @@ -29925,12 +31489,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.980: # %middle.block2709 beq $a1, $a2, .LBB9_983 .LBB9_981: # %.lr.ph.i811.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_982: # %.lr.ph.i811 # =>This Inner Loop Header: Depth=1 @@ -30019,12 +31589,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.988: # %middle.block2725 beq $a0, $a1, .LBB9_1187 .LBB9_989: # %.lr.ph.i819.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_990: # %.lr.ph.i819 # =>This Inner Loop Header: Depth=1 @@ -30099,12 +31675,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.993: # %middle.block2501 beq $a1, $a2, .LBB9_996 .LBB9_994: # %.lr.ph.i875.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_995: # %.lr.ph.i875 # =>This Inner Loop Header: Depth=1 @@ -30193,12 +31775,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1001: # %middle.block2517 beq $a0, $a1, .LBB9_1187 .LBB9_1002: # %.lr.ph.i883.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1003: # %.lr.ph.i883 # =>This Inner Loop Header: Depth=1 @@ -30273,12 +31861,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1006: # %middle.block1845 beq $a1, $a2, .LBB9_1009 .LBB9_1007: # %.lr.ph.i1149.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1008: # %.lr.ph.i1149 # =>This Inner Loop Header: Depth=1 @@ -30367,12 +31961,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1014: # %middle.block1861 beq $a1, $a2, .LBB9_1017 .LBB9_1015: # %.lr.ph.i1157.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1016: # %.lr.ph.i1157 # =>This Inner Loop Header: Depth=1 @@ -30461,12 +32061,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1022: # %middle.block1877 beq $a1, $a2, .LBB9_1025 .LBB9_1023: # %.lr.ph.i1165.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1024: # %.lr.ph.i1165 # =>This Inner Loop Header: Depth=1 @@ -30555,12 +32161,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1030: # %middle.block1893 beq $a1, $a2, .LBB9_1033 .LBB9_1031: # %.lr.ph.i1173.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1032: # %.lr.ph.i1173 # =>This Inner Loop Header: Depth=1 @@ -30649,12 +32261,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1038: # %middle.block1909 beq $a1, $a2, .LBB9_1041 .LBB9_1039: # %.lr.ph.i1181.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1040: # %.lr.ph.i1181 # =>This Inner Loop Header: Depth=1 @@ -30743,12 +32361,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1046: # %middle.block1925 beq $a1, $a2, .LBB9_1049 .LBB9_1047: # %.lr.ph.i1189.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1048: # %.lr.ph.i1189 # =>This Inner Loop Header: Depth=1 @@ -30837,12 +32461,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1054: # %middle.block1941 beq $a1, $a2, .LBB9_1057 .LBB9_1055: # %.lr.ph.i1197.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1056: # %.lr.ph.i1197 # =>This Inner Loop Header: Depth=1 @@ -30931,12 +32561,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1062: # %middle.block1957 beq $a1, $a2, .LBB9_1065 .LBB9_1063: # %.lr.ph.i1205.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1064: # %.lr.ph.i1205 # =>This Inner Loop Header: Depth=1 @@ -31025,12 +32661,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1070: # %middle.block1973 beq $a1, $a2, .LBB9_1073 .LBB9_1071: # %.lr.ph.i1213.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1072: # %.lr.ph.i1213 # =>This Inner Loop Header: Depth=1 @@ -31119,12 +32761,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1078: # %middle.block1989 beq $a0, $a1, .LBB9_1187 .LBB9_1079: # %.lr.ph.i1221.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1080: # %.lr.ph.i1221 # =>This Inner Loop Header: Depth=1 @@ -31199,12 +32847,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1083: # %middle.block1797 beq $a1, $a2, .LBB9_1086 .LBB9_1084: # %.lr.ph.i1229.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1085: # %.lr.ph.i1229 # =>This Inner Loop Header: Depth=1 @@ -31293,12 +32947,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1091: # %middle.block1813 beq $a1, $a2, .LBB9_1094 .LBB9_1092: # %.lr.ph.i1237.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1093: # %.lr.ph.i1237 # =>This Inner Loop Header: Depth=1 @@ -31387,12 +33047,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1099: # %middle.block1829 beq $a0, $a1, .LBB9_1187 .LBB9_1100: # %.lr.ph.i1245.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1101: # %.lr.ph.i1245 # =>This Inner Loop Header: Depth=1 @@ -31467,12 +33133,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1104: # %middle.block2069 beq $a1, $a2, .LBB9_1107 .LBB9_1105: # %.lr.ph.i1029.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1106: # %.lr.ph.i1029 # =>This Inner Loop Header: Depth=1 @@ -31561,12 +33233,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1112: # %middle.block2085 beq $a1, $a2, .LBB9_1115 .LBB9_1113: # %.lr.ph.i1037.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1114: # %.lr.ph.i1037 # =>This Inner Loop Header: Depth=1 @@ -31655,12 +33333,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1120: # %middle.block2101 beq $a1, $a2, .LBB9_1123 .LBB9_1121: # %.lr.ph.i1045.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1122: # %.lr.ph.i1045 # =>This Inner Loop Header: Depth=1 @@ -31749,12 +33433,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1128: # %middle.block2117 beq $a1, $a2, .LBB9_1131 .LBB9_1129: # %.lr.ph.i1053.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1130: # %.lr.ph.i1053 # =>This Inner Loop Header: Depth=1 @@ -31843,12 +33533,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1136: # %middle.block2133 beq $a1, $a2, .LBB9_1139 .LBB9_1137: # %.lr.ph.i1061.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1138: # %.lr.ph.i1061 # =>This Inner Loop Header: Depth=1 @@ -31937,12 +33633,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1144: # %middle.block2149 beq $a1, $a2, .LBB9_1147 .LBB9_1145: # %.lr.ph.i1069.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1146: # %.lr.ph.i1069 # =>This Inner Loop Header: Depth=1 @@ -32031,12 +33733,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1152: # %middle.block2165 beq $a1, $a2, .LBB9_1155 .LBB9_1153: # %.lr.ph.i1077.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1154: # %.lr.ph.i1077 # =>This Inner Loop Header: Depth=1 @@ -32125,12 +33833,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1160: # %middle.block2181 beq $a1, $a2, .LBB9_1163 .LBB9_1161: # %.lr.ph.i1085.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1162: # %.lr.ph.i1085 # =>This Inner Loop Header: Depth=1 @@ -32219,12 +33933,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1168: # %middle.block2197 beq $a1, $a2, .LBB9_1171 .LBB9_1169: # %.lr.ph.i1093.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1170: # %.lr.ph.i1093 # =>This Inner Loop Header: Depth=1 @@ -32313,12 +34033,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1176: # %middle.block2213 beq $a1, $a2, .LBB9_1179 .LBB9_1177: # %.lr.ph.i1101.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1178: # %.lr.ph.i1101 # =>This Inner Loop Header: Depth=1 @@ -32407,12 +34133,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1184: # %middle.block2229 beq $a0, $a1, .LBB9_1187 .LBB9_1185: # %.lr.ph.i1109.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1186: # %.lr.ph.i1109 # =>This Inner Loop Header: Depth=1 diff --git a/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/runReferenceLoops.s b/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/runReferenceLoops.s index b31732f8..26d4daf4 100644 --- a/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/runReferenceLoops.s +++ b/results/MicroBenchmarks/LCALS/SubsetBLambdaLoops/CMakeFiles/lcalsBLambda.dir/__/runReferenceLoops.s @@ -903,14 +903,8 @@ _ZN8LoopStatD2Ev: # @_ZN8LoopStatD2Ev .size _ZN8LoopStatD2Ev, .Lfunc_end3-_ZN8LoopStatD2Ev .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z25computeReferenceLoopTimesv -.LCPI4_0: - .dword 0x3f5426fe718a86d7 # double 0.00123 -.LCPI4_1: - .dword 0xbf5426fe718a86d7 # double -0.00123 .text - .globl _Z25computeReferenceLoopTimesv + .globl _Z25computeReferenceLoopTimesv # -- Begin function _Z25computeReferenceLoopTimesv .p2align 5 .type _Z25computeReferenceLoopTimesv,@function _Z25computeReferenceLoopTimesv: # @_Z25computeReferenceLoopTimesv @@ -1649,13 +1643,16 @@ _Z25computeReferenceLoopTimesv: # @_Z25computeReferenceLoopTimesv pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 st.d $a0, $sp, 640 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI4_0) - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI4_1) ori $a0, $zero, 1 st.b $a0, $sp, 648 + lu12i.w $a0, 465064 + ori $a0, $a0, 1751 + lu32i.d $a0, 272126 + lu52i.d $a1, $a0, 1013 + movgr2fr.d $fs0, $a1 fadd.d $fa0, $fs2, $fs0 + lu52i.d $a0, $a0, -1035 + movgr2fr.d $fs1, $a0 fadd.d $fa1, $fs2, $fs1 fdiv.d $fa0, $fa0, $fa1 fst.d $fa0, $fp, 384 diff --git a/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSStats.s b/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSStats.s index 65ccd92d..95272aa5 100644 --- a/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSStats.s +++ b/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSStats.s @@ -868,12 +868,7 @@ _Z19getLoopSuiteRunInfov: # @_Z19getLoopSuiteRunInfov .Lfunc_end1: .size _Z19getLoopSuiteRunInfov, .Lfunc_end1-_Z19getLoopSuiteRunInfov # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm -.LCPI2_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 - .text - .globl _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm + .globl _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm # -- Begin function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm .p2align 5 .type _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm,@function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm: # @_Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm @@ -1018,10 +1013,13 @@ _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcE st.d $a0, $s4, 560 beqz $a1, .LBB2_15 # %bb.13: # %.lr.ph18.preheader - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI2_0) move $fp, $zero move $s0, $zero + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_14: # %.lr.ph18 # =>This Inner Loop Header: Depth=1 diff --git a/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSSuite.s index a1e8c653..e35fa2d7 100644 --- a/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/LCALSSuite.s @@ -23,33 +23,21 @@ _Z11getLoopDatav: # @_Z11getLoopDatav .LCPI1_1: .dword 0x3ff6666666666666 # double 1.3999999999999999 .dword 0x3ff0000000000000 # double 1 -.LCPI1_5: +.LCPI1_2: .dword 8 # 0x8 .dword 4923084613239392580 # 0x44524f5f43534944 -.LCPI1_6: +.LCPI1_3: .dword 8 # 0x8 .dword 4914094937701898568 # 0x44325f4f52445948 -.LCPI1_7: +.LCPI1_4: .dword 8 # 0x8 .dword 4913813462725187912 # 0x44315f4f52445948 -.LCPI1_8: +.LCPI1_5: .dword 8 # 0x8 .dword 6074873621086556756 # 0x544e495f50415254 -.LCPI1_11: +.LCPI1_6: .dword 8 # 0x8 .dword 5786931235628926290 # 0x504f4f4c5f464552 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI1_2: - .dword 0x40e5972000000000 # double 44217 -.LCPI1_3: - .dword 0x40b3890000000000 # double 5001 -.LCPI1_4: - .dword 0x4065600000000000 # double 171 -.LCPI1_9: - .dword 0x4063800000000000 # double 156 -.LCPI1_10: - .dword 0x4050000000000000 # double 64 .text .globl _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd .p2align 5 @@ -640,21 +628,27 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define .Ltmp34: # EH_LABEL # %bb.92: move $s8, $a0 - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_2) - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_3) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 366368 + lu52i.d $a1, $a1, 1038 + movgr2fr.d $fa0, $a1 fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a0, $fa0 - fmul.d $fa0, $fs0, $fa1 + movfr2gr.s $a1, $fa0 + st.w $a1, $s8, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 231680 + lu52i.d $a1, $a1, 1035 + movgr2fr.d $fa0, $a1 + fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI1_4) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_4) movfr2gr.s $a1, $fa0 - st.w $a0, $s8, 0 st.w $a1, $s8, 4 - fmul.d $fa0, $fs0, $fa1 + lu32i.d $a0, 352256 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa0, $a0 + fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 ld.d $s7, $sp, 96 # 8-byte Folded Reload ld.w $a0, $s7, 32 @@ -732,8 +726,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define jr $a0 .LBB1_99: # %._crit_edge.i.i352 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_11) - vld $vr0, $a0, %pc_lo12(.LCPI1_11) + pcalau12i $a0, %pc_hi20(.LCPI1_6) + vld $vr0, $a0, %pc_lo12(.LCPI1_6) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -936,8 +930,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_133 .LBB1_112: # %._crit_edge.i.i732 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_8) - vld $vr0, $a0, %pc_lo12(.LCPI1_8) + pcalau12i $a0, %pc_hi20(.LCPI1_5) + vld $vr0, $a0, %pc_lo12(.LCPI1_5) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -1687,8 +1681,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_158: # %._crit_edge.i.i748 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_7) - vld $vr0, $a0, %pc_lo12(.LCPI1_7) + pcalau12i $a0, %pc_hi20(.LCPI1_4) + vld $vr0, $a0, %pc_lo12(.LCPI1_4) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -1761,16 +1755,18 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define # in Loop: Header=BB1_95 Depth=1 ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.d $a0, $a0, 0 - ld.d $a1, $sp, 8 # 8-byte Folded Reload - fld.d $fa0, $a1, %pc_lo12(_ZN7ADomain18loop_length_factorE) - pcalau12i $a1, %pc_hi20(.LCPI1_9) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_9) - fld.d $fa2, $a0, 0 - fmul.d $fa1, $fa0, $fa1 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + fld.d $fa1, $a0, 0 + ld.d $a0, $sp, 8 # 8-byte Folded Reload + fld.d $fa0, $a0, %pc_lo12(_ZN7ADomain18loop_length_factorE) + ori $a0, $zero, 0 + lu32i.d $a0, 229376 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa2, $a0 + fmul.d $fa2, $fa0, $fa2 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $a0, $fa2 ori $a2, $zero, 2 - fst.d $fa2, $sp, 168 + fst.d $fa1, $sp, 168 blt $a0, $a2, .LBB1_211 # %bb.164: # %.lr.ph72.us.i.preheader # in Loop: Header=BB1_95 Depth=1 @@ -2202,8 +2198,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_196: # %._crit_edge.i.i988 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_5) - vld $vr0, $a0, %pc_lo12(.LCPI1_5) + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -2297,8 +2293,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_202: # %._crit_edge.i.i956 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_6) - vld $vr0, $a0, %pc_lo12(.LCPI1_6) + pcalau12i $a0, %pc_hi20(.LCPI1_3) + vld $vr0, $a0, %pc_lo12(.LCPI1_3) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -2382,9 +2378,9 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define move $a3, $zero .LBB1_212: # %_ZN7ADomainC2Eii.exit527 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI1_10) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_10) ld.d $a1, $sp, 368 + lu52i.d $a4, $zero, 1029 + movgr2fr.d $fa1, $a4 fmul.d $fa1, $fa0, $fa1 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a4, $fa1 @@ -4488,15 +4484,9 @@ GCC_except_table7: .LCPI8_0: .dword 0x3fb999999999999a # double 0.10000000000000001 .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI8_3: +.LCPI8_1: .dword 0x3fc999999999999a # double 0.20000000000000001 .dword 0x3fd3333333333333 # double 0.29999999999999999 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI8_1: - .dword 0x3ff199999999999a # double 1.1000000000000001 -.LCPI8_2: - .dword 0x3ff1f9a6b50b0f28 # double 1.1234500000000001 .text .globl _Z8loopInitjR8LoopStat .p2align 5 @@ -4747,8 +4737,8 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat .LBB8_40: pcalau12i $a0, %pc_hi20(.LCPI8_0) addi.d $a0, $a0, %pc_lo12(.LCPI8_0) - pcalau12i $a1, %pc_hi20(.LCPI8_3) - addi.d $a1, $a1, %pc_lo12(.LCPI8_3) + pcalau12i $a1, %pc_hi20(.LCPI8_1) + addi.d $a1, $a1, %pc_lo12(.LCPI8_1) ld.w $a3, $s1, 1032 blez $a3, .LBB8_577 # %bb.41: # %.lr.ph.preheader.i430 @@ -4962,8 +4952,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat pcalau12i $a2, %pc_hi20(.LCPI8_0) addi.d $a2, $a2, %pc_lo12(.LCPI8_0) fldx.d $fa0, $a2, $a0 - ld.d $a2, $s1, 472 + ld.d $a3, $s1, 472 ori $a0, $zero, 4 + lu12i.w $a4, -419431 + lu12i.w $a2, -307024 bgeu $a1, $a0, .LBB8_920 # %bb.78: move $a0, $zero @@ -5152,12 +5144,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.102: # %middle.block3803 beq $a1, $a2, .LBB8_105 .LBB8_103: # %.lr.ph.i184.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_104: # %.lr.ph.i184 # =>This Inner Loop Header: Depth=1 @@ -5246,12 +5244,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.110: # %middle.block3819 beq $a1, $a2, .LBB8_113 .LBB8_111: # %.lr.ph.i192.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_112: # %.lr.ph.i192 # =>This Inner Loop Header: Depth=1 @@ -5340,12 +5344,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.118: # %middle.block3835 beq $a1, $a2, .LBB8_121 .LBB8_119: # %.lr.ph.i200.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_120: # %.lr.ph.i200 # =>This Inner Loop Header: Depth=1 @@ -5434,12 +5444,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.126: # %middle.block3851 beq $a1, $a2, .LBB8_129 .LBB8_127: # %.lr.ph.i208.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_128: # %.lr.ph.i208 # =>This Inner Loop Header: Depth=1 @@ -5528,12 +5544,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.134: # %middle.block3867 beq $a1, $a2, .LBB8_137 .LBB8_135: # %.lr.ph.i216.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_136: # %.lr.ph.i216 # =>This Inner Loop Header: Depth=1 @@ -5622,12 +5644,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.142: # %middle.block3883 beq $a0, $a1, .LBB8_1187 .LBB8_143: # %.lr.ph.i224.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_144: # %.lr.ph.i224 # =>This Inner Loop Header: Depth=1 @@ -5702,12 +5730,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.147: # %middle.block3547 beq $a1, $a2, .LBB8_150 .LBB8_148: # %.lr.ph.i232.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_149: # %.lr.ph.i232 # =>This Inner Loop Header: Depth=1 @@ -5796,12 +5830,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.155: # %middle.block3563 beq $a1, $a2, .LBB8_158 .LBB8_156: # %.lr.ph.i240.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_157: # %.lr.ph.i240 # =>This Inner Loop Header: Depth=1 @@ -5890,12 +5930,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.163: # %middle.block3579 beq $a1, $a2, .LBB8_166 .LBB8_164: # %.lr.ph.i248.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_165: # %.lr.ph.i248 # =>This Inner Loop Header: Depth=1 @@ -5984,12 +6030,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.171: # %middle.block3595 beq $a1, $a2, .LBB8_174 .LBB8_172: # %.lr.ph.i256.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_173: # %.lr.ph.i256 # =>This Inner Loop Header: Depth=1 @@ -6078,12 +6130,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.179: # %middle.block3611 beq $a1, $a2, .LBB8_182 .LBB8_180: # %.lr.ph.i264.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_181: # %.lr.ph.i264 # =>This Inner Loop Header: Depth=1 @@ -6172,12 +6230,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.187: # %middle.block3627 beq $a1, $a2, .LBB8_190 .LBB8_188: # %.lr.ph.i272.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_189: # %.lr.ph.i272 # =>This Inner Loop Header: Depth=1 @@ -6266,12 +6330,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.195: # %middle.block3643 beq $a1, $a2, .LBB8_198 .LBB8_196: # %.lr.ph.i280.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_197: # %.lr.ph.i280 # =>This Inner Loop Header: Depth=1 @@ -6360,12 +6430,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.203: # %middle.block3659 beq $a1, $a2, .LBB8_206 .LBB8_204: # %.lr.ph.i288.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_205: # %.lr.ph.i288 # =>This Inner Loop Header: Depth=1 @@ -6454,12 +6530,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.211: # %middle.block3675 beq $a1, $a2, .LBB8_214 .LBB8_212: # %.lr.ph.i296.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_213: # %.lr.ph.i296 # =>This Inner Loop Header: Depth=1 @@ -6548,12 +6630,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.219: # %middle.block3691 beq $a1, $a2, .LBB8_222 .LBB8_220: # %.lr.ph.i304.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_221: # %.lr.ph.i304 # =>This Inner Loop Header: Depth=1 @@ -6642,12 +6730,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.227: # %middle.block3707 beq $a1, $a2, .LBB8_230 .LBB8_228: # %.lr.ph.i312.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_229: # %.lr.ph.i312 # =>This Inner Loop Header: Depth=1 @@ -6736,12 +6830,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.235: # %middle.block3723 beq $a1, $a2, .LBB8_238 .LBB8_236: # %.lr.ph.i320.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_237: # %.lr.ph.i320 # =>This Inner Loop Header: Depth=1 @@ -6830,12 +6930,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.243: # %middle.block3739 beq $a1, $a2, .LBB8_246 .LBB8_244: # %.lr.ph.i328.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_245: # %.lr.ph.i328 # =>This Inner Loop Header: Depth=1 @@ -6924,12 +7030,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.251: # %middle.block3755 beq $a1, $a2, .LBB8_254 .LBB8_252: # %.lr.ph.i336.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_253: # %.lr.ph.i336 # =>This Inner Loop Header: Depth=1 @@ -7018,12 +7130,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.259: # %middle.block3771 beq $a1, $a2, .LBB8_262 .LBB8_260: # %.lr.ph.i344.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_261: # %.lr.ph.i344 # =>This Inner Loop Header: Depth=1 @@ -7112,12 +7230,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.267: # %middle.block3787 beq $a0, $a1, .LBB8_1187 .LBB8_268: # %.lr.ph.i352.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_269: # %.lr.ph.i352 # =>This Inner Loop Header: Depth=1 @@ -7215,12 +7339,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.274: # %middle.block2006 beq $a1, $a2, .LBB8_277 .LBB8_275: # %.lr.ph.i1118.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_276: # %.lr.ph.i1118 # =>This Inner Loop Header: Depth=1 @@ -7309,12 +7439,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.282: # %middle.block2022 beq $a1, $a2, .LBB8_285 .LBB8_283: # %.lr.ph.i1126.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_284: # %.lr.ph.i1126 # =>This Inner Loop Header: Depth=1 @@ -7403,12 +7539,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.290: # %middle.block2038 beq $a1, $a2, .LBB8_293 .LBB8_291: # %.lr.ph.i1134.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_292: # %.lr.ph.i1134 # =>This Inner Loop Header: Depth=1 @@ -7497,12 +7639,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.298: # %middle.block2054 beq $a0, $a1, .LBB8_1187 .LBB8_299: # %.lr.ph.i1142.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_300: # %.lr.ph.i1142 # =>This Inner Loop Header: Depth=1 @@ -7577,12 +7725,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.303: # %middle.block2870 beq $a1, $a2, .LBB8_306 .LBB8_304: # %.lr.ph.i692.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_305: # %.lr.ph.i692 # =>This Inner Loop Header: Depth=1 @@ -7671,12 +7825,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.311: # %middle.block2886 beq $a0, $a1, .LBB8_1187 .LBB8_312: # %.lr.ph.i700.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_313: # %.lr.ph.i700 # =>This Inner Loop Header: Depth=1 @@ -7751,12 +7911,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.316: # %middle.block2934 beq $a1, $a2, .LBB8_319 .LBB8_317: # %.lr.ph.i660.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_318: # %.lr.ph.i660 # =>This Inner Loop Header: Depth=1 @@ -7845,12 +8011,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.324: # %middle.block2950 beq $a0, $a1, .LBB8_1187 .LBB8_325: # %.lr.ph.i668.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_326: # %.lr.ph.i668 # =>This Inner Loop Header: Depth=1 @@ -7925,12 +8097,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.329: # %middle.block3046 beq $a1, $a2, .LBB8_332 .LBB8_330: # %.lr.ph.i579.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_331: # %.lr.ph.i579 # =>This Inner Loop Header: Depth=1 @@ -8019,13 +8197,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.337: # %middle.block3062 beq $a1, $a2, .LBB8_340 .LBB8_338: # %.lr.ph.i587.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 - .p2align 4, , 16 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 + .p2align 4, , 16 .LBB8_339: # %.lr.ph.i587 # =>This Inner Loop Header: Depth=1 bstrpick.d $a4, $a1, 31, 0 @@ -8113,12 +8297,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.345: # %middle.block3078 beq $a1, $a2, .LBB8_348 .LBB8_346: # %.lr.ph.i595.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_347: # %.lr.ph.i595 # =>This Inner Loop Header: Depth=1 @@ -8207,12 +8397,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.353: # %middle.block3094 beq $a1, $a2, .LBB8_356 .LBB8_354: # %.lr.ph.i603.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_355: # %.lr.ph.i603 # =>This Inner Loop Header: Depth=1 @@ -8301,12 +8497,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.361: # %middle.block3110 beq $a0, $a1, .LBB8_1187 .LBB8_362: # %.lr.ph.i611.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_363: # %.lr.ph.i611 # =>This Inner Loop Header: Depth=1 @@ -8381,12 +8583,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.366: # %middle.block3030 beq $a0, $a1, .LBB8_1187 .LBB8_367: # %.lr.ph.i620.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_368: # %.lr.ph.i620 # =>This Inner Loop Header: Depth=1 @@ -8461,12 +8669,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.371: # %middle.block3899 beq $a1, $a2, .LBB8_374 .LBB8_372: # %.lr.ph.i.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_373: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 @@ -8555,12 +8769,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.379: # %middle.block3915 beq $a1, $a2, .LBB8_382 .LBB8_380: # %.lr.ph.i168.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_381: # %.lr.ph.i168 # =>This Inner Loop Header: Depth=1 @@ -8649,12 +8869,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.387: # %middle.block3931 beq $a0, $a1, .LBB8_1187 .LBB8_388: # %.lr.ph.i176.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_389: # %.lr.ph.i176 # =>This Inner Loop Header: Depth=1 @@ -8729,12 +8955,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.392: # %middle.block3206 beq $a1, $a2, .LBB8_395 .LBB8_393: # %.lr.ph.i499.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_394: # %.lr.ph.i499 # =>This Inner Loop Header: Depth=1 @@ -8823,12 +9055,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.400: # %middle.block3222 beq $a1, $a2, .LBB8_403 .LBB8_401: # %.lr.ph.i507.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_402: # %.lr.ph.i507 # =>This Inner Loop Header: Depth=1 @@ -8917,12 +9155,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.408: # %middle.block3238 beq $a1, $a2, .LBB8_411 .LBB8_409: # %.lr.ph.i515.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_410: # %.lr.ph.i515 # =>This Inner Loop Header: Depth=1 @@ -9011,12 +9255,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.416: # %middle.block3254 beq $a1, $a2, .LBB8_419 .LBB8_417: # %.lr.ph.i523.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_418: # %.lr.ph.i523 # =>This Inner Loop Header: Depth=1 @@ -9105,12 +9355,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.424: # %middle.block3270 beq $a0, $a1, .LBB8_1187 .LBB8_425: # %.lr.ph.i531.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_426: # %.lr.ph.i531 # =>This Inner Loop Header: Depth=1 @@ -9185,12 +9441,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.429: # %middle.block2246 beq $a1, $a2, .LBB8_432 .LBB8_430: # %.lr.ph.i944.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_431: # %.lr.ph.i944 # =>This Inner Loop Header: Depth=1 @@ -9279,12 +9541,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.437: # %middle.block2262 beq $a1, $a2, .LBB8_440 .LBB8_438: # %.lr.ph.i952.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_439: # %.lr.ph.i952 # =>This Inner Loop Header: Depth=1 @@ -9373,12 +9641,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.445: # %middle.block2278 beq $a1, $a2, .LBB8_448 .LBB8_446: # %.lr.ph.i960.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_447: # %.lr.ph.i960 # =>This Inner Loop Header: Depth=1 @@ -9467,12 +9741,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.453: # %middle.block2294 beq $a1, $a2, .LBB8_456 .LBB8_454: # %.lr.ph.i968.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_455: # %.lr.ph.i968 # =>This Inner Loop Header: Depth=1 @@ -9561,12 +9841,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.461: # %middle.block2310 beq $a1, $a2, .LBB8_464 .LBB8_462: # %.lr.ph.i976.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_463: # %.lr.ph.i976 # =>This Inner Loop Header: Depth=1 @@ -9655,12 +9941,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.469: # %middle.block2326 beq $a1, $a2, .LBB8_472 .LBB8_470: # %.lr.ph.i984.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_471: # %.lr.ph.i984 # =>This Inner Loop Header: Depth=1 @@ -9749,12 +10041,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.477: # %middle.block2342 beq $a1, $a2, .LBB8_480 .LBB8_478: # %.lr.ph.i992.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_479: # %.lr.ph.i992 # =>This Inner Loop Header: Depth=1 @@ -9843,12 +10141,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.485: # %middle.block2358 beq $a1, $a2, .LBB8_488 .LBB8_486: # %.lr.ph.i1000.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_487: # %.lr.ph.i1000 # =>This Inner Loop Header: Depth=1 @@ -9937,12 +10241,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.493: # %middle.block2374 beq $a1, $a2, .LBB8_496 .LBB8_494: # %.lr.ph.i1008.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_495: # %.lr.ph.i1008 # =>This Inner Loop Header: Depth=1 @@ -10031,12 +10341,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.501: # %middle.block2390 beq $a0, $a1, .LBB8_504 .LBB8_502: # %.lr.ph.i1016.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_503: # %.lr.ph.i1016 # =>This Inner Loop Header: Depth=1 @@ -10142,12 +10458,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.512: # %middle.block2406 beq $a0, $a1, .LBB8_515 .LBB8_513: # %.lr.ph.i892.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_514: # %.lr.ph.i892 # =>This Inner Loop Header: Depth=1 @@ -10236,12 +10558,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.520: # %middle.block2422 beq $a0, $a1, .LBB8_523 .LBB8_521: # %.lr.ph.i900.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_522: # %.lr.ph.i900 # =>This Inner Loop Header: Depth=1 @@ -10330,12 +10658,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.528: # %middle.block2438 beq $a0, $a1, .LBB8_531 .LBB8_529: # %.lr.ph.i908.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_530: # %.lr.ph.i908 # =>This Inner Loop Header: Depth=1 @@ -10424,12 +10758,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.536: # %middle.block2454 beq $a0, $a1, .LBB8_539 .LBB8_537: # %.lr.ph.i916.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_538: # %.lr.ph.i916 # =>This Inner Loop Header: Depth=1 @@ -10518,12 +10858,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.544: # %middle.block2470 beq $a0, $a1, .LBB8_547 .LBB8_545: # %.lr.ph.i924.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_546: # %.lr.ph.i924 # =>This Inner Loop Header: Depth=1 @@ -10630,12 +10976,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.556: # %middle.block2486 beq $a0, $a1, .LBB8_1187 .LBB8_557: # %.lr.ph.i936.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_558: # %.lr.ph.i936 # =>This Inner Loop Header: Depth=1 @@ -10710,12 +11062,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.561: # %middle.block2902 beq $a1, $a2, .LBB8_564 .LBB8_562: # %.lr.ph.i676.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_563: # %.lr.ph.i676 # =>This Inner Loop Header: Depth=1 @@ -10804,12 +11162,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.569: # %middle.block2918 beq $a0, $a1, .LBB8_1187 .LBB8_570: # %.lr.ph.i684.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_571: # %.lr.ph.i684 # =>This Inner Loop Header: Depth=1 @@ -10875,13 +11239,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.574: # %middle.block3319 beq $a2, $a3, .LBB8_577 .LBB8_575: # %.lr.ph.i432.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_576: # %.lr.ph.i432 # =>This Inner Loop Header: Depth=1 @@ -10965,13 +11335,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.582: # %middle.block3336 beq $a2, $a3, .LBB8_585 .LBB8_583: # %.lr.ph.i441.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_584: # %.lr.ph.i441 # =>This Inner Loop Header: Depth=1 @@ -11055,13 +11431,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.590: # %middle.block3353 beq $a2, $a3, .LBB8_593 .LBB8_591: # %.lr.ph.i452.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_592: # %.lr.ph.i452 # =>This Inner Loop Header: Depth=1 @@ -11145,13 +11527,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.598: # %middle.block3370 beq $a2, $a3, .LBB8_601 .LBB8_599: # %.lr.ph.i463.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_600: # %.lr.ph.i463 # =>This Inner Loop Header: Depth=1 @@ -11235,13 +11623,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.606: # %middle.block3387 beq $a0, $a2, .LBB8_1187 .LBB8_607: # %.lr.ph.i474.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a3, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a0 alsl.d $a1, $a0, $a1, 4 addi.d $a1, $a1, 8 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa3, $a3 .p2align 4, , 16 .LBB8_608: # %.lr.ph.i474 # =>This Inner Loop Header: Depth=1 @@ -11319,12 +11713,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.611: # %middle.block1622 beq $a1, $a2, .LBB8_614 .LBB8_612: # %.lr.ph.i1294.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_613: # %.lr.ph.i1294 # =>This Inner Loop Header: Depth=1 @@ -11413,12 +11813,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.619: # %middle.block1638 beq $a1, $a2, .LBB8_622 .LBB8_620: # %.lr.ph.i1302.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_621: # %.lr.ph.i1302 # =>This Inner Loop Header: Depth=1 @@ -11507,12 +11913,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.627: # %middle.block1654 beq $a1, $a2, .LBB8_630 .LBB8_628: # %.lr.ph.i1310.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_629: # %.lr.ph.i1310 # =>This Inner Loop Header: Depth=1 @@ -11601,12 +12013,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.635: # %middle.block1670 beq $a1, $a2, .LBB8_638 .LBB8_636: # %.lr.ph.i1318.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_637: # %.lr.ph.i1318 # =>This Inner Loop Header: Depth=1 @@ -11695,12 +12113,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.643: # %middle.block1686 beq $a1, $a2, .LBB8_646 .LBB8_644: # %.lr.ph.i1326.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_645: # %.lr.ph.i1326 # =>This Inner Loop Header: Depth=1 @@ -11789,12 +12213,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.651: # %middle.block1702 beq $a0, $a1, .LBB8_1187 .LBB8_652: # %.lr.ph.i1334.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_653: # %.lr.ph.i1334 # =>This Inner Loop Header: Depth=1 @@ -11869,12 +12299,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.656: # %middle.block2822 beq $a1, $a2, .LBB8_659 .LBB8_657: # %.lr.ph.i708.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_658: # %.lr.ph.i708 # =>This Inner Loop Header: Depth=1 @@ -11963,12 +12399,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.664: # %middle.block2838 beq $a1, $a2, .LBB8_667 .LBB8_665: # %.lr.ph.i716.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_666: # %.lr.ph.i716 # =>This Inner Loop Header: Depth=1 @@ -12057,12 +12499,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.672: # %middle.block2854 beq $a0, $a1, .LBB8_1187 .LBB8_673: # %.lr.ph.i724.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_674: # %.lr.ph.i724 # =>This Inner Loop Header: Depth=1 @@ -12137,12 +12585,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.677: # %middle.block2598 beq $a1, $a2, .LBB8_680 .LBB8_678: # %.lr.ph.i828.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_679: # %.lr.ph.i828 # =>This Inner Loop Header: Depth=1 @@ -12231,12 +12685,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.685: # %middle.block2614 beq $a0, $a1, .LBB8_1187 .LBB8_686: # %.lr.ph.i836.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_687: # %.lr.ph.i836 # =>This Inner Loop Header: Depth=1 @@ -12311,12 +12771,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.690: # %middle.block3483 beq $a1, $a2, .LBB8_693 .LBB8_691: # %.lr.ph.i360.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_692: # %.lr.ph.i360 # =>This Inner Loop Header: Depth=1 @@ -12405,12 +12871,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.698: # %middle.block3499 beq $a1, $a2, .LBB8_701 .LBB8_699: # %.lr.ph.i368.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_700: # %.lr.ph.i368 # =>This Inner Loop Header: Depth=1 @@ -12499,12 +12971,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.706: # %middle.block3515 beq $a1, $a2, .LBB8_709 .LBB8_707: # %.lr.ph.i376.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_708: # %.lr.ph.i376 # =>This Inner Loop Header: Depth=1 @@ -12593,12 +13071,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.714: # %middle.block3531 beq $a0, $a1, .LBB8_1187 .LBB8_715: # %.lr.ph.i384.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_716: # %.lr.ph.i384 # =>This Inner Loop Header: Depth=1 @@ -12673,12 +13157,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.719: # %middle.block2566 beq $a1, $a2, .LBB8_722 .LBB8_720: # %.lr.ph.i844.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_721: # %.lr.ph.i844 # =>This Inner Loop Header: Depth=1 @@ -12767,12 +13257,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.727: # %middle.block2582 beq $a0, $a1, .LBB8_1187 .LBB8_728: # %.lr.ph.i852.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_729: # %.lr.ph.i852 # =>This Inner Loop Header: Depth=1 @@ -12847,12 +13343,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.732: # %middle.block2966 beq $a1, $a2, .LBB8_735 .LBB8_733: # %.lr.ph.i628.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_734: # %.lr.ph.i628 # =>This Inner Loop Header: Depth=1 @@ -12941,12 +13443,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.740: # %middle.block2982 beq $a1, $a2, .LBB8_743 .LBB8_741: # %.lr.ph.i636.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_742: # %.lr.ph.i636 # =>This Inner Loop Header: Depth=1 @@ -13035,12 +13543,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.748: # %middle.block2998 beq $a1, $a2, .LBB8_751 .LBB8_749: # %.lr.ph.i644.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_750: # %.lr.ph.i644 # =>This Inner Loop Header: Depth=1 @@ -13129,12 +13643,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.756: # %middle.block3014 beq $a0, $a1, .LBB8_1187 .LBB8_757: # %.lr.ph.i652.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_758: # %.lr.ph.i652 # =>This Inner Loop Header: Depth=1 @@ -13209,12 +13729,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.761: # %middle.block3403 beq $a1, $a2, .LBB8_764 .LBB8_762: # %.lr.ph.i392.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_763: # %.lr.ph.i392 # =>This Inner Loop Header: Depth=1 @@ -13303,12 +13829,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.769: # %middle.block3419 beq $a1, $a2, .LBB8_772 .LBB8_770: # %.lr.ph.i400.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_771: # %.lr.ph.i400 # =>This Inner Loop Header: Depth=1 @@ -13397,12 +13929,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.777: # %middle.block3435 beq $a1, $a2, .LBB8_780 .LBB8_778: # %.lr.ph.i408.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_779: # %.lr.ph.i408 # =>This Inner Loop Header: Depth=1 @@ -13491,12 +14029,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.785: # %middle.block3451 beq $a1, $a2, .LBB8_788 .LBB8_786: # %.lr.ph.i416.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_787: # %.lr.ph.i416 # =>This Inner Loop Header: Depth=1 @@ -13585,12 +14129,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.793: # %middle.block3467 beq $a0, $a1, .LBB8_1187 .LBB8_794: # %.lr.ph.i424.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_795: # %.lr.ph.i424 # =>This Inner Loop Header: Depth=1 @@ -13665,12 +14215,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.798: # %middle.block2742 beq $a1, $a2, .LBB8_801 .LBB8_799: # %.lr.ph.i732.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_800: # %.lr.ph.i732 # =>This Inner Loop Header: Depth=1 @@ -13759,12 +14315,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.806: # %middle.block2758 beq $a1, $a2, .LBB8_809 .LBB8_807: # %.lr.ph.i740.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_808: # %.lr.ph.i740 # =>This Inner Loop Header: Depth=1 @@ -13853,12 +14415,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.814: # %middle.block2774 beq $a1, $a2, .LBB8_817 .LBB8_815: # %.lr.ph.i748.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_816: # %.lr.ph.i748 # =>This Inner Loop Header: Depth=1 @@ -13947,12 +14515,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.822: # %middle.block2790 beq $a1, $a2, .LBB8_825 .LBB8_823: # %.lr.ph.i756.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_824: # %.lr.ph.i756 # =>This Inner Loop Header: Depth=1 @@ -14041,12 +14615,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.830: # %middle.block2806 beq $a0, $a1, .LBB8_1187 .LBB8_831: # %.lr.ph.i764.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_832: # %.lr.ph.i764 # =>This Inner Loop Header: Depth=1 @@ -14121,12 +14701,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.835: # %middle.block3126 beq $a1, $a2, .LBB8_838 .LBB8_836: # %.lr.ph.i539.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_837: # %.lr.ph.i539 # =>This Inner Loop Header: Depth=1 @@ -14215,12 +14801,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.843: # %middle.block3142 beq $a1, $a2, .LBB8_846 .LBB8_844: # %.lr.ph.i547.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_845: # %.lr.ph.i547 # =>This Inner Loop Header: Depth=1 @@ -14309,12 +14901,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.851: # %middle.block3158 beq $a1, $a2, .LBB8_854 .LBB8_852: # %.lr.ph.i555.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_853: # %.lr.ph.i555 # =>This Inner Loop Header: Depth=1 @@ -14403,12 +15001,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.859: # %middle.block3174 beq $a1, $a2, .LBB8_862 .LBB8_860: # %.lr.ph.i563.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_861: # %.lr.ph.i563 # =>This Inner Loop Header: Depth=1 @@ -14497,12 +15101,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.867: # %middle.block3190 beq $a0, $a1, .LBB8_1187 .LBB8_868: # %.lr.ph.i571.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_869: # %.lr.ph.i571 # =>This Inner Loop Header: Depth=1 @@ -14577,12 +15187,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.872: # %middle.block3286 beq $a1, $a2, .LBB8_875 .LBB8_873: # %.lr.ph.i483.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_874: # %.lr.ph.i483 # =>This Inner Loop Header: Depth=1 @@ -14671,12 +15287,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.880: # %middle.block3302 beq $a0, $a1, .LBB8_1187 .LBB8_881: # %.lr.ph.i491.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_882: # %.lr.ph.i491 # =>This Inner Loop Header: Depth=1 @@ -14751,12 +15373,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.885: # %middle.block1718 beq $a1, $a2, .LBB8_888 .LBB8_886: # %.lr.ph.i1254.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_887: # %.lr.ph.i1254 # =>This Inner Loop Header: Depth=1 @@ -14845,12 +15473,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.893: # %middle.block1734 beq $a1, $a2, .LBB8_896 .LBB8_894: # %.lr.ph.i1262.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_895: # %.lr.ph.i1262 # =>This Inner Loop Header: Depth=1 @@ -14939,12 +15573,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.901: # %middle.block1750 beq $a1, $a2, .LBB8_904 .LBB8_902: # %.lr.ph.i1270.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_903: # %.lr.ph.i1270 # =>This Inner Loop Header: Depth=1 @@ -15033,12 +15673,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.909: # %middle.block1766 beq $a1, $a2, .LBB8_912 .LBB8_910: # %.lr.ph.i1278.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_911: # %.lr.ph.i1278 # =>This Inner Loop Header: Depth=1 @@ -15127,12 +15773,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.917: # %middle.block1782 beq $a0, $a1, .LBB8_1187 .LBB8_918: # %.lr.ph.i1286.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_919: # %.lr.ph.i1286 # =>This Inner Loop Header: Depth=1 @@ -15153,41 +15805,39 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat bstrpick.d $a0, $a1, 30, 2 slli.d $a0, $a0, 2 vreplvei.d $vr1, $vr0, 0 - addi.d $a3, $a2, 16 - ori $a4, $zero, 0 - lu32i.d $a4, 1 - vreplgr2vr.d $vr2, $a4 - lu12i.w $a4, -419431 - ori $a4, $a4, 2458 - lu32i.d $a4, 104857 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr3, $a4 - lu12i.w $a4, -307024 - ori $a4, $a4, 3880 - lu32i.d $a4, 129446 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr4, $a4 - move $a4, $a0 + addi.d $a5, $a3, 16 + ori $a6, $zero, 0 + lu32i.d $a6, 1 + vreplgr2vr.d $vr2, $a6 + ori $a6, $a4, 2458 + lu32i.d $a6, 104857 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr3, $a6 + ori $a6, $a2, 3880 + lu32i.d $a6, 129446 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr4, $a6 + move $a6, $a0 .p2align 4, , 16 .LBB8_921: # %vector.body # =>This Inner Loop Header: Depth=1 vaddi.wu $vr5, $vr2, 2 - vpickve2gr.w $a5, $vr2, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr2, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr2, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa7, $a5 + vpickve2gr.w $a7, $vr2, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa7, $a7 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr6, 16 - vpickve2gr.w $a5, $vr5, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr5, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr5, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa5, $a5 + vpickve2gr.w $a7, $vr5, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa5, $a7 ffint.d.l $fa5, $fa5 vextrins.d $vr5, $vr6, 16 vfadd.d $vr6, $vr7, $vr3 @@ -15198,34 +15848,38 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat vfadd.d $vr5, $vr5, $vr4 vfdiv.d $vr6, $vr6, $vr7 vfdiv.d $vr5, $vr8, $vr5 - vst $vr6, $a3, -16 - vst $vr5, $a3, 0 + vst $vr6, $a5, -16 + vst $vr5, $a5, 0 vaddi.wu $vr2, $vr2, 4 - addi.d $a4, $a4, -4 - addi.d $a3, $a3, 32 - bnez $a4, .LBB8_921 + addi.d $a6, $a6, -4 + addi.d $a5, $a5, 32 + bnez $a6, .LBB8_921 # %bb.922: # %middle.block beq $a0, $a1, .LBB8_1187 .LBB8_923: # %.lr.ph.i1342.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $a2, 3 + alsl.d $a3, $a0, $a3, 3 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + ori $a2, $a2, 3880 + lu32i.d $a2, 129446 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa2, $a2 .p2align 4, , 16 .LBB8_924: # %.lr.ph.i1342 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a0, 31, 0 - movgr2fr.d $fa3, $a3 + bstrpick.d $a2, $a0, 31, 0 + movgr2fr.d $fa3, $a2 ffint.d.l $fa3, $fa3 fadd.d $fa4, $fa3, $fa1 fmul.d $fa4, $fa0, $fa4 fadd.d $fa3, $fa3, $fa2 fdiv.d $fa3, $fa4, $fa3 - fst.d $fa3, $a2, 0 + fst.d $fa3, $a3, 0 addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + addi.d $a3, $a3, 8 addi.w $a0, $a0, 1 bnez $a1, .LBB8_924 b .LBB8_1187 @@ -15287,12 +15941,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.927: # %middle.block2534 beq $a1, $a2, .LBB8_930 .LBB8_928: # %.lr.ph.i860.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_929: # %.lr.ph.i860 # =>This Inner Loop Header: Depth=1 @@ -15381,12 +16041,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.935: # %middle.block2550 beq $a0, $a1, .LBB8_1187 .LBB8_936: # %.lr.ph.i868.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_937: # %.lr.ph.i868 # =>This Inner Loop Header: Depth=1 @@ -15461,12 +16127,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.940: # %middle.block2630 beq $a1, $a2, .LBB8_943 .LBB8_941: # %.lr.ph.i772.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_942: # %.lr.ph.i772 # =>This Inner Loop Header: Depth=1 @@ -15555,12 +16227,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.948: # %middle.block2646 beq $a1, $a2, .LBB8_951 .LBB8_949: # %.lr.ph.i780.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_950: # %.lr.ph.i780 # =>This Inner Loop Header: Depth=1 @@ -15649,12 +16327,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.956: # %middle.block2662 beq $a1, $a2, .LBB8_959 .LBB8_957: # %.lr.ph.i788.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_958: # %.lr.ph.i788 # =>This Inner Loop Header: Depth=1 @@ -15743,12 +16427,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.964: # %middle.block2678 beq $a1, $a2, .LBB8_967 .LBB8_965: # %.lr.ph.i796.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_966: # %.lr.ph.i796 # =>This Inner Loop Header: Depth=1 @@ -15837,12 +16527,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.972: # %middle.block2694 beq $a1, $a2, .LBB8_975 .LBB8_973: # %.lr.ph.i804.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_974: # %.lr.ph.i804 # =>This Inner Loop Header: Depth=1 @@ -15931,12 +16627,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.980: # %middle.block2710 beq $a1, $a2, .LBB8_983 .LBB8_981: # %.lr.ph.i812.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_982: # %.lr.ph.i812 # =>This Inner Loop Header: Depth=1 @@ -16025,12 +16727,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.988: # %middle.block2726 beq $a0, $a1, .LBB8_1187 .LBB8_989: # %.lr.ph.i820.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_990: # %.lr.ph.i820 # =>This Inner Loop Header: Depth=1 @@ -16105,12 +16813,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.993: # %middle.block2502 beq $a1, $a2, .LBB8_996 .LBB8_994: # %.lr.ph.i876.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_995: # %.lr.ph.i876 # =>This Inner Loop Header: Depth=1 @@ -16199,12 +16913,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1001: # %middle.block2518 beq $a0, $a1, .LBB8_1187 .LBB8_1002: # %.lr.ph.i884.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1003: # %.lr.ph.i884 # =>This Inner Loop Header: Depth=1 @@ -16279,12 +16999,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1006: # %middle.block1846 beq $a1, $a2, .LBB8_1009 .LBB8_1007: # %.lr.ph.i1150.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1008: # %.lr.ph.i1150 # =>This Inner Loop Header: Depth=1 @@ -16373,12 +17099,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1014: # %middle.block1862 beq $a1, $a2, .LBB8_1017 .LBB8_1015: # %.lr.ph.i1158.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1016: # %.lr.ph.i1158 # =>This Inner Loop Header: Depth=1 @@ -16467,12 +17199,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1022: # %middle.block1878 beq $a1, $a2, .LBB8_1025 .LBB8_1023: # %.lr.ph.i1166.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1024: # %.lr.ph.i1166 # =>This Inner Loop Header: Depth=1 @@ -16561,12 +17299,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1030: # %middle.block1894 beq $a1, $a2, .LBB8_1033 .LBB8_1031: # %.lr.ph.i1174.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1032: # %.lr.ph.i1174 # =>This Inner Loop Header: Depth=1 @@ -16655,12 +17399,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1038: # %middle.block1910 beq $a1, $a2, .LBB8_1041 .LBB8_1039: # %.lr.ph.i1182.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1040: # %.lr.ph.i1182 # =>This Inner Loop Header: Depth=1 @@ -16749,12 +17499,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1046: # %middle.block1926 beq $a1, $a2, .LBB8_1049 .LBB8_1047: # %.lr.ph.i1190.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1048: # %.lr.ph.i1190 # =>This Inner Loop Header: Depth=1 @@ -16843,12 +17599,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1054: # %middle.block1942 beq $a1, $a2, .LBB8_1057 .LBB8_1055: # %.lr.ph.i1198.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1056: # %.lr.ph.i1198 # =>This Inner Loop Header: Depth=1 @@ -16937,12 +17699,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1062: # %middle.block1958 beq $a1, $a2, .LBB8_1065 .LBB8_1063: # %.lr.ph.i1206.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1064: # %.lr.ph.i1206 # =>This Inner Loop Header: Depth=1 @@ -17031,12 +17799,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1070: # %middle.block1974 beq $a1, $a2, .LBB8_1073 .LBB8_1071: # %.lr.ph.i1214.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1072: # %.lr.ph.i1214 # =>This Inner Loop Header: Depth=1 @@ -17125,12 +17899,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1078: # %middle.block1990 beq $a0, $a1, .LBB8_1187 .LBB8_1079: # %.lr.ph.i1222.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1080: # %.lr.ph.i1222 # =>This Inner Loop Header: Depth=1 @@ -17205,12 +17985,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1083: # %middle.block1798 beq $a1, $a2, .LBB8_1086 .LBB8_1084: # %.lr.ph.i1230.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1085: # %.lr.ph.i1230 # =>This Inner Loop Header: Depth=1 @@ -17299,12 +18085,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1091: # %middle.block1814 beq $a1, $a2, .LBB8_1094 .LBB8_1092: # %.lr.ph.i1238.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1093: # %.lr.ph.i1238 # =>This Inner Loop Header: Depth=1 @@ -17393,12 +18185,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1099: # %middle.block1830 beq $a0, $a1, .LBB8_1187 .LBB8_1100: # %.lr.ph.i1246.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1101: # %.lr.ph.i1246 # =>This Inner Loop Header: Depth=1 @@ -17473,12 +18271,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1104: # %middle.block2070 beq $a1, $a2, .LBB8_1107 .LBB8_1105: # %.lr.ph.i1030.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1106: # %.lr.ph.i1030 # =>This Inner Loop Header: Depth=1 @@ -17567,12 +18371,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1112: # %middle.block2086 beq $a1, $a2, .LBB8_1115 .LBB8_1113: # %.lr.ph.i1038.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1114: # %.lr.ph.i1038 # =>This Inner Loop Header: Depth=1 @@ -17661,12 +18471,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1120: # %middle.block2102 beq $a1, $a2, .LBB8_1123 .LBB8_1121: # %.lr.ph.i1046.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1122: # %.lr.ph.i1046 # =>This Inner Loop Header: Depth=1 @@ -17755,12 +18571,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1128: # %middle.block2118 beq $a1, $a2, .LBB8_1131 .LBB8_1129: # %.lr.ph.i1054.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1130: # %.lr.ph.i1054 # =>This Inner Loop Header: Depth=1 @@ -17849,12 +18671,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1136: # %middle.block2134 beq $a1, $a2, .LBB8_1139 .LBB8_1137: # %.lr.ph.i1062.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1138: # %.lr.ph.i1062 # =>This Inner Loop Header: Depth=1 @@ -17943,12 +18771,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1144: # %middle.block2150 beq $a1, $a2, .LBB8_1147 .LBB8_1145: # %.lr.ph.i1070.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1146: # %.lr.ph.i1070 # =>This Inner Loop Header: Depth=1 @@ -18037,12 +18871,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1152: # %middle.block2166 beq $a1, $a2, .LBB8_1155 .LBB8_1153: # %.lr.ph.i1078.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1154: # %.lr.ph.i1078 # =>This Inner Loop Header: Depth=1 @@ -18131,12 +18971,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1160: # %middle.block2182 beq $a1, $a2, .LBB8_1163 .LBB8_1161: # %.lr.ph.i1086.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1162: # %.lr.ph.i1086 # =>This Inner Loop Header: Depth=1 @@ -18225,12 +19071,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1168: # %middle.block2198 beq $a1, $a2, .LBB8_1171 .LBB8_1169: # %.lr.ph.i1094.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1170: # %.lr.ph.i1094 # =>This Inner Loop Header: Depth=1 @@ -18319,12 +19171,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1176: # %middle.block2214 beq $a1, $a2, .LBB8_1179 .LBB8_1177: # %.lr.ph.i1102.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1178: # %.lr.ph.i1102 # =>This Inner Loop Header: Depth=1 @@ -18413,12 +19271,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1184: # %middle.block2230 beq $a0, $a1, .LBB8_1187 .LBB8_1185: # %.lr.ph.i1110.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1186: # %.lr.ph.i1110 # =>This Inner Loop Header: Depth=1 @@ -18489,15 +19353,9 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat .LCPI9_0: .dword 0x3fb999999999999a # double 0.10000000000000001 .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI9_3: +.LCPI9_1: .dword 0x3fc999999999999a # double 0.20000000000000001 .dword 0x3fd3333333333333 # double 0.29999999999999999 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI9_1: - .dword 0x3ff199999999999a # double 1.1000000000000001 -.LCPI9_2: - .dword 0x3ff1f9a6b50b0f28 # double 1.1234500000000001 .text .globl _Z8loopInitj .p2align 5 @@ -18743,8 +19601,8 @@ _Z8loopInitj: # @_Z8loopInitj .LBB9_40: pcalau12i $a0, %pc_hi20(.LCPI9_0) addi.d $a0, $a0, %pc_lo12(.LCPI9_0) - pcalau12i $a1, %pc_hi20(.LCPI9_3) - addi.d $a1, $a1, %pc_lo12(.LCPI9_3) + pcalau12i $a1, %pc_hi20(.LCPI9_1) + addi.d $a1, $a1, %pc_lo12(.LCPI9_1) ld.w $a3, $s0, 1032 blez $a3, .LBB9_577 # %bb.41: # %.lr.ph.preheader.i429 @@ -18958,8 +19816,10 @@ _Z8loopInitj: # @_Z8loopInitj pcalau12i $a2, %pc_hi20(.LCPI9_0) addi.d $a2, $a2, %pc_lo12(.LCPI9_0) fldx.d $fa0, $a2, $a0 - ld.d $a2, $s0, 472 + ld.d $a3, $s0, 472 ori $a0, $zero, 4 + lu12i.w $a4, -419431 + lu12i.w $a2, -307024 bgeu $a1, $a0, .LBB9_920 # %bb.78: move $a0, $zero @@ -19148,12 +20008,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.102: # %middle.block3802 beq $a1, $a2, .LBB9_105 .LBB9_103: # %.lr.ph.i183.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_104: # %.lr.ph.i183 # =>This Inner Loop Header: Depth=1 @@ -19242,12 +20108,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.110: # %middle.block3818 beq $a1, $a2, .LBB9_113 .LBB9_111: # %.lr.ph.i191.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_112: # %.lr.ph.i191 # =>This Inner Loop Header: Depth=1 @@ -19336,12 +20208,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.118: # %middle.block3834 beq $a1, $a2, .LBB9_121 .LBB9_119: # %.lr.ph.i199.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_120: # %.lr.ph.i199 # =>This Inner Loop Header: Depth=1 @@ -19430,12 +20308,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.126: # %middle.block3850 beq $a1, $a2, .LBB9_129 .LBB9_127: # %.lr.ph.i207.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_128: # %.lr.ph.i207 # =>This Inner Loop Header: Depth=1 @@ -19524,12 +20408,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.134: # %middle.block3866 beq $a1, $a2, .LBB9_137 .LBB9_135: # %.lr.ph.i215.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_136: # %.lr.ph.i215 # =>This Inner Loop Header: Depth=1 @@ -19618,12 +20508,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.142: # %middle.block3882 beq $a0, $a1, .LBB9_1187 .LBB9_143: # %.lr.ph.i223.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_144: # %.lr.ph.i223 # =>This Inner Loop Header: Depth=1 @@ -19698,12 +20594,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.147: # %middle.block3546 beq $a1, $a2, .LBB9_150 .LBB9_148: # %.lr.ph.i231.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_149: # %.lr.ph.i231 # =>This Inner Loop Header: Depth=1 @@ -19792,12 +20694,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.155: # %middle.block3562 beq $a1, $a2, .LBB9_158 .LBB9_156: # %.lr.ph.i239.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_157: # %.lr.ph.i239 # =>This Inner Loop Header: Depth=1 @@ -19886,12 +20794,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.163: # %middle.block3578 beq $a1, $a2, .LBB9_166 .LBB9_164: # %.lr.ph.i247.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_165: # %.lr.ph.i247 # =>This Inner Loop Header: Depth=1 @@ -19980,12 +20894,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.171: # %middle.block3594 beq $a1, $a2, .LBB9_174 .LBB9_172: # %.lr.ph.i255.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_173: # %.lr.ph.i255 # =>This Inner Loop Header: Depth=1 @@ -20074,12 +20994,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.179: # %middle.block3610 beq $a1, $a2, .LBB9_182 .LBB9_180: # %.lr.ph.i263.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_181: # %.lr.ph.i263 # =>This Inner Loop Header: Depth=1 @@ -20168,12 +21094,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.187: # %middle.block3626 beq $a1, $a2, .LBB9_190 .LBB9_188: # %.lr.ph.i271.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_189: # %.lr.ph.i271 # =>This Inner Loop Header: Depth=1 @@ -20262,12 +21194,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.195: # %middle.block3642 beq $a1, $a2, .LBB9_198 .LBB9_196: # %.lr.ph.i279.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_197: # %.lr.ph.i279 # =>This Inner Loop Header: Depth=1 @@ -20356,12 +21294,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.203: # %middle.block3658 beq $a1, $a2, .LBB9_206 .LBB9_204: # %.lr.ph.i287.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_205: # %.lr.ph.i287 # =>This Inner Loop Header: Depth=1 @@ -20450,12 +21394,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.211: # %middle.block3674 beq $a1, $a2, .LBB9_214 .LBB9_212: # %.lr.ph.i295.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_213: # %.lr.ph.i295 # =>This Inner Loop Header: Depth=1 @@ -20544,12 +21494,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.219: # %middle.block3690 beq $a1, $a2, .LBB9_222 .LBB9_220: # %.lr.ph.i303.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_221: # %.lr.ph.i303 # =>This Inner Loop Header: Depth=1 @@ -20638,12 +21594,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.227: # %middle.block3706 beq $a1, $a2, .LBB9_230 .LBB9_228: # %.lr.ph.i311.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_229: # %.lr.ph.i311 # =>This Inner Loop Header: Depth=1 @@ -20732,12 +21694,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.235: # %middle.block3722 beq $a1, $a2, .LBB9_238 .LBB9_236: # %.lr.ph.i319.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_237: # %.lr.ph.i319 # =>This Inner Loop Header: Depth=1 @@ -20826,12 +21794,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.243: # %middle.block3738 beq $a1, $a2, .LBB9_246 .LBB9_244: # %.lr.ph.i327.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_245: # %.lr.ph.i327 # =>This Inner Loop Header: Depth=1 @@ -20920,12 +21894,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.251: # %middle.block3754 beq $a1, $a2, .LBB9_254 .LBB9_252: # %.lr.ph.i335.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_253: # %.lr.ph.i335 # =>This Inner Loop Header: Depth=1 @@ -21014,12 +21994,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.259: # %middle.block3770 beq $a1, $a2, .LBB9_262 .LBB9_260: # %.lr.ph.i343.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_261: # %.lr.ph.i343 # =>This Inner Loop Header: Depth=1 @@ -21108,12 +22094,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.267: # %middle.block3786 beq $a0, $a1, .LBB9_1187 .LBB9_268: # %.lr.ph.i351.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_269: # %.lr.ph.i351 # =>This Inner Loop Header: Depth=1 @@ -21210,12 +22202,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.274: # %middle.block2005 beq $a1, $a2, .LBB9_277 .LBB9_275: # %.lr.ph.i1117.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_276: # %.lr.ph.i1117 # =>This Inner Loop Header: Depth=1 @@ -21304,12 +22302,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.282: # %middle.block2021 beq $a1, $a2, .LBB9_285 .LBB9_283: # %.lr.ph.i1125.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_284: # %.lr.ph.i1125 # =>This Inner Loop Header: Depth=1 @@ -21398,12 +22402,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.290: # %middle.block2037 beq $a1, $a2, .LBB9_293 .LBB9_291: # %.lr.ph.i1133.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_292: # %.lr.ph.i1133 # =>This Inner Loop Header: Depth=1 @@ -21492,12 +22502,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.298: # %middle.block2053 beq $a0, $a1, .LBB9_1187 .LBB9_299: # %.lr.ph.i1141.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_300: # %.lr.ph.i1141 # =>This Inner Loop Header: Depth=1 @@ -21572,12 +22588,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.303: # %middle.block2869 beq $a1, $a2, .LBB9_306 .LBB9_304: # %.lr.ph.i691.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_305: # %.lr.ph.i691 # =>This Inner Loop Header: Depth=1 @@ -21666,12 +22688,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.311: # %middle.block2885 beq $a0, $a1, .LBB9_1187 .LBB9_312: # %.lr.ph.i699.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_313: # %.lr.ph.i699 # =>This Inner Loop Header: Depth=1 @@ -21746,12 +22774,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.316: # %middle.block2933 beq $a1, $a2, .LBB9_319 .LBB9_317: # %.lr.ph.i659.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_318: # %.lr.ph.i659 # =>This Inner Loop Header: Depth=1 @@ -21840,12 +22874,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.324: # %middle.block2949 beq $a0, $a1, .LBB9_1187 .LBB9_325: # %.lr.ph.i667.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_326: # %.lr.ph.i667 # =>This Inner Loop Header: Depth=1 @@ -21920,12 +22960,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.329: # %middle.block3045 beq $a1, $a2, .LBB9_332 .LBB9_330: # %.lr.ph.i578.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_331: # %.lr.ph.i578 # =>This Inner Loop Header: Depth=1 @@ -22014,12 +23060,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.337: # %middle.block3061 beq $a1, $a2, .LBB9_340 .LBB9_338: # %.lr.ph.i586.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_339: # %.lr.ph.i586 # =>This Inner Loop Header: Depth=1 @@ -22108,12 +23160,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.345: # %middle.block3077 beq $a1, $a2, .LBB9_348 .LBB9_346: # %.lr.ph.i594.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_347: # %.lr.ph.i594 # =>This Inner Loop Header: Depth=1 @@ -22202,12 +23260,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.353: # %middle.block3093 beq $a1, $a2, .LBB9_356 .LBB9_354: # %.lr.ph.i602.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_355: # %.lr.ph.i602 # =>This Inner Loop Header: Depth=1 @@ -22296,12 +23360,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.361: # %middle.block3109 beq $a0, $a1, .LBB9_1187 .LBB9_362: # %.lr.ph.i610.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_363: # %.lr.ph.i610 # =>This Inner Loop Header: Depth=1 @@ -22376,12 +23446,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.366: # %middle.block3029 beq $a0, $a1, .LBB9_1187 .LBB9_367: # %.lr.ph.i619.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_368: # %.lr.ph.i619 # =>This Inner Loop Header: Depth=1 @@ -22456,12 +23532,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.371: # %middle.block3898 beq $a1, $a2, .LBB9_374 .LBB9_372: # %.lr.ph.i.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_373: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 @@ -22550,12 +23632,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.379: # %middle.block3914 beq $a1, $a2, .LBB9_382 .LBB9_380: # %.lr.ph.i167.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_381: # %.lr.ph.i167 # =>This Inner Loop Header: Depth=1 @@ -22644,12 +23732,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.387: # %middle.block3930 beq $a0, $a1, .LBB9_1187 .LBB9_388: # %.lr.ph.i175.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_389: # %.lr.ph.i175 # =>This Inner Loop Header: Depth=1 @@ -22724,12 +23818,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.392: # %middle.block3205 beq $a1, $a2, .LBB9_395 .LBB9_393: # %.lr.ph.i498.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_394: # %.lr.ph.i498 # =>This Inner Loop Header: Depth=1 @@ -22818,12 +23918,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.400: # %middle.block3221 beq $a1, $a2, .LBB9_403 .LBB9_401: # %.lr.ph.i506.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_402: # %.lr.ph.i506 # =>This Inner Loop Header: Depth=1 @@ -22912,12 +24018,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.408: # %middle.block3237 beq $a1, $a2, .LBB9_411 .LBB9_409: # %.lr.ph.i514.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_410: # %.lr.ph.i514 # =>This Inner Loop Header: Depth=1 @@ -23006,12 +24118,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.416: # %middle.block3253 beq $a1, $a2, .LBB9_419 .LBB9_417: # %.lr.ph.i522.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_418: # %.lr.ph.i522 # =>This Inner Loop Header: Depth=1 @@ -23100,12 +24218,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.424: # %middle.block3269 beq $a0, $a1, .LBB9_1187 .LBB9_425: # %.lr.ph.i530.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_426: # %.lr.ph.i530 # =>This Inner Loop Header: Depth=1 @@ -23180,12 +24304,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.429: # %middle.block2245 beq $a1, $a2, .LBB9_432 .LBB9_430: # %.lr.ph.i943.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_431: # %.lr.ph.i943 # =>This Inner Loop Header: Depth=1 @@ -23274,12 +24404,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.437: # %middle.block2261 beq $a1, $a2, .LBB9_440 .LBB9_438: # %.lr.ph.i951.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_439: # %.lr.ph.i951 # =>This Inner Loop Header: Depth=1 @@ -23368,12 +24504,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.445: # %middle.block2277 beq $a1, $a2, .LBB9_448 .LBB9_446: # %.lr.ph.i959.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_447: # %.lr.ph.i959 # =>This Inner Loop Header: Depth=1 @@ -23462,12 +24604,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.453: # %middle.block2293 beq $a1, $a2, .LBB9_456 .LBB9_454: # %.lr.ph.i967.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_455: # %.lr.ph.i967 # =>This Inner Loop Header: Depth=1 @@ -23556,12 +24704,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.461: # %middle.block2309 beq $a1, $a2, .LBB9_464 .LBB9_462: # %.lr.ph.i975.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_463: # %.lr.ph.i975 # =>This Inner Loop Header: Depth=1 @@ -23650,12 +24804,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.469: # %middle.block2325 beq $a1, $a2, .LBB9_472 .LBB9_470: # %.lr.ph.i983.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_471: # %.lr.ph.i983 # =>This Inner Loop Header: Depth=1 @@ -23744,12 +24904,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.477: # %middle.block2341 beq $a1, $a2, .LBB9_480 .LBB9_478: # %.lr.ph.i991.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_479: # %.lr.ph.i991 # =>This Inner Loop Header: Depth=1 @@ -23838,12 +25004,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.485: # %middle.block2357 beq $a1, $a2, .LBB9_488 .LBB9_486: # %.lr.ph.i999.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_487: # %.lr.ph.i999 # =>This Inner Loop Header: Depth=1 @@ -23932,12 +25104,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.493: # %middle.block2373 beq $a1, $a2, .LBB9_496 .LBB9_494: # %.lr.ph.i1007.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_495: # %.lr.ph.i1007 # =>This Inner Loop Header: Depth=1 @@ -24026,12 +25204,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.501: # %middle.block2389 beq $a0, $a1, .LBB9_504 .LBB9_502: # %.lr.ph.i1015.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_503: # %.lr.ph.i1015 # =>This Inner Loop Header: Depth=1 @@ -24136,12 +25320,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.512: # %middle.block2405 beq $a0, $a1, .LBB9_515 .LBB9_513: # %.lr.ph.i891.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_514: # %.lr.ph.i891 # =>This Inner Loop Header: Depth=1 @@ -24230,12 +25420,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.520: # %middle.block2421 beq $a0, $a1, .LBB9_523 .LBB9_521: # %.lr.ph.i899.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_522: # %.lr.ph.i899 # =>This Inner Loop Header: Depth=1 @@ -24324,12 +25520,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.528: # %middle.block2437 beq $a0, $a1, .LBB9_531 .LBB9_529: # %.lr.ph.i907.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_530: # %.lr.ph.i907 # =>This Inner Loop Header: Depth=1 @@ -24418,12 +25620,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.536: # %middle.block2453 beq $a0, $a1, .LBB9_539 .LBB9_537: # %.lr.ph.i915.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_538: # %.lr.ph.i915 # =>This Inner Loop Header: Depth=1 @@ -24512,12 +25720,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.544: # %middle.block2469 beq $a0, $a1, .LBB9_547 .LBB9_545: # %.lr.ph.i923.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_546: # %.lr.ph.i923 # =>This Inner Loop Header: Depth=1 @@ -24624,12 +25838,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.556: # %middle.block2485 beq $a0, $a1, .LBB9_1187 .LBB9_557: # %.lr.ph.i935.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_558: # %.lr.ph.i935 # =>This Inner Loop Header: Depth=1 @@ -24704,12 +25924,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.561: # %middle.block2901 beq $a1, $a2, .LBB9_564 .LBB9_562: # %.lr.ph.i675.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_563: # %.lr.ph.i675 # =>This Inner Loop Header: Depth=1 @@ -24798,12 +26024,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.569: # %middle.block2917 beq $a0, $a1, .LBB9_1187 .LBB9_570: # %.lr.ph.i683.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_571: # %.lr.ph.i683 # =>This Inner Loop Header: Depth=1 @@ -24869,13 +26101,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.574: # %middle.block3318 beq $a2, $a3, .LBB9_577 .LBB9_575: # %.lr.ph.i431.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_576: # %.lr.ph.i431 # =>This Inner Loop Header: Depth=1 @@ -24959,13 +26197,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.582: # %middle.block3335 beq $a2, $a3, .LBB9_585 .LBB9_583: # %.lr.ph.i440.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_584: # %.lr.ph.i440 # =>This Inner Loop Header: Depth=1 @@ -25049,13 +26293,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.590: # %middle.block3352 beq $a2, $a3, .LBB9_593 .LBB9_591: # %.lr.ph.i451.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_592: # %.lr.ph.i451 # =>This Inner Loop Header: Depth=1 @@ -25139,13 +26389,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.598: # %middle.block3369 beq $a2, $a3, .LBB9_601 .LBB9_599: # %.lr.ph.i462.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_600: # %.lr.ph.i462 # =>This Inner Loop Header: Depth=1 @@ -25229,13 +26485,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.606: # %middle.block3386 beq $a0, $a2, .LBB9_1187 .LBB9_607: # %.lr.ph.i473.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a3, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a0 alsl.d $a1, $a0, $a1, 4 addi.d $a1, $a1, 8 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa3, $a3 .p2align 4, , 16 .LBB9_608: # %.lr.ph.i473 # =>This Inner Loop Header: Depth=1 @@ -25313,12 +26575,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.611: # %middle.block1621 beq $a1, $a2, .LBB9_614 .LBB9_612: # %.lr.ph.i1293.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_613: # %.lr.ph.i1293 # =>This Inner Loop Header: Depth=1 @@ -25407,12 +26675,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.619: # %middle.block1637 beq $a1, $a2, .LBB9_622 .LBB9_620: # %.lr.ph.i1301.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_621: # %.lr.ph.i1301 # =>This Inner Loop Header: Depth=1 @@ -25501,12 +26775,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.627: # %middle.block1653 beq $a1, $a2, .LBB9_630 .LBB9_628: # %.lr.ph.i1309.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_629: # %.lr.ph.i1309 # =>This Inner Loop Header: Depth=1 @@ -25595,12 +26875,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.635: # %middle.block1669 beq $a1, $a2, .LBB9_638 .LBB9_636: # %.lr.ph.i1317.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_637: # %.lr.ph.i1317 # =>This Inner Loop Header: Depth=1 @@ -25689,12 +26975,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.643: # %middle.block1685 beq $a1, $a2, .LBB9_646 .LBB9_644: # %.lr.ph.i1325.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_645: # %.lr.ph.i1325 # =>This Inner Loop Header: Depth=1 @@ -25783,12 +27075,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.651: # %middle.block1701 beq $a0, $a1, .LBB9_1187 .LBB9_652: # %.lr.ph.i1333.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_653: # %.lr.ph.i1333 # =>This Inner Loop Header: Depth=1 @@ -25863,12 +27161,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.656: # %middle.block2821 beq $a1, $a2, .LBB9_659 .LBB9_657: # %.lr.ph.i707.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_658: # %.lr.ph.i707 # =>This Inner Loop Header: Depth=1 @@ -25957,12 +27261,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.664: # %middle.block2837 beq $a1, $a2, .LBB9_667 .LBB9_665: # %.lr.ph.i715.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_666: # %.lr.ph.i715 # =>This Inner Loop Header: Depth=1 @@ -26051,12 +27361,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.672: # %middle.block2853 beq $a0, $a1, .LBB9_1187 .LBB9_673: # %.lr.ph.i723.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_674: # %.lr.ph.i723 # =>This Inner Loop Header: Depth=1 @@ -26131,12 +27447,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.677: # %middle.block2597 beq $a1, $a2, .LBB9_680 .LBB9_678: # %.lr.ph.i827.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_679: # %.lr.ph.i827 # =>This Inner Loop Header: Depth=1 @@ -26225,12 +27547,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.685: # %middle.block2613 beq $a0, $a1, .LBB9_1187 .LBB9_686: # %.lr.ph.i835.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_687: # %.lr.ph.i835 # =>This Inner Loop Header: Depth=1 @@ -26305,12 +27633,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.690: # %middle.block3482 beq $a1, $a2, .LBB9_693 .LBB9_691: # %.lr.ph.i359.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_692: # %.lr.ph.i359 # =>This Inner Loop Header: Depth=1 @@ -26399,12 +27733,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.698: # %middle.block3498 beq $a1, $a2, .LBB9_701 .LBB9_699: # %.lr.ph.i367.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_700: # %.lr.ph.i367 # =>This Inner Loop Header: Depth=1 @@ -26493,12 +27833,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.706: # %middle.block3514 beq $a1, $a2, .LBB9_709 .LBB9_707: # %.lr.ph.i375.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_708: # %.lr.ph.i375 # =>This Inner Loop Header: Depth=1 @@ -26587,12 +27933,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.714: # %middle.block3530 beq $a0, $a1, .LBB9_1187 .LBB9_715: # %.lr.ph.i383.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_716: # %.lr.ph.i383 # =>This Inner Loop Header: Depth=1 @@ -26667,12 +28019,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.719: # %middle.block2565 beq $a1, $a2, .LBB9_722 .LBB9_720: # %.lr.ph.i843.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_721: # %.lr.ph.i843 # =>This Inner Loop Header: Depth=1 @@ -26761,12 +28119,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.727: # %middle.block2581 beq $a0, $a1, .LBB9_1187 .LBB9_728: # %.lr.ph.i851.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_729: # %.lr.ph.i851 # =>This Inner Loop Header: Depth=1 @@ -26841,12 +28205,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.732: # %middle.block2965 beq $a1, $a2, .LBB9_735 .LBB9_733: # %.lr.ph.i627.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_734: # %.lr.ph.i627 # =>This Inner Loop Header: Depth=1 @@ -26935,12 +28305,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.740: # %middle.block2981 beq $a1, $a2, .LBB9_743 .LBB9_741: # %.lr.ph.i635.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_742: # %.lr.ph.i635 # =>This Inner Loop Header: Depth=1 @@ -27029,12 +28405,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.748: # %middle.block2997 beq $a1, $a2, .LBB9_751 .LBB9_749: # %.lr.ph.i643.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_750: # %.lr.ph.i643 # =>This Inner Loop Header: Depth=1 @@ -27123,12 +28505,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.756: # %middle.block3013 beq $a0, $a1, .LBB9_1187 .LBB9_757: # %.lr.ph.i651.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_758: # %.lr.ph.i651 # =>This Inner Loop Header: Depth=1 @@ -27203,12 +28591,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.761: # %middle.block3402 beq $a1, $a2, .LBB9_764 .LBB9_762: # %.lr.ph.i391.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_763: # %.lr.ph.i391 # =>This Inner Loop Header: Depth=1 @@ -27297,12 +28691,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.769: # %middle.block3418 beq $a1, $a2, .LBB9_772 .LBB9_770: # %.lr.ph.i399.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_771: # %.lr.ph.i399 # =>This Inner Loop Header: Depth=1 @@ -27391,12 +28791,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.777: # %middle.block3434 beq $a1, $a2, .LBB9_780 .LBB9_778: # %.lr.ph.i407.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_779: # %.lr.ph.i407 # =>This Inner Loop Header: Depth=1 @@ -27485,12 +28891,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.785: # %middle.block3450 beq $a1, $a2, .LBB9_788 .LBB9_786: # %.lr.ph.i415.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_787: # %.lr.ph.i415 # =>This Inner Loop Header: Depth=1 @@ -27579,12 +28991,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.793: # %middle.block3466 beq $a0, $a1, .LBB9_1187 .LBB9_794: # %.lr.ph.i423.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_795: # %.lr.ph.i423 # =>This Inner Loop Header: Depth=1 @@ -27659,12 +29077,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.798: # %middle.block2741 beq $a1, $a2, .LBB9_801 .LBB9_799: # %.lr.ph.i731.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_800: # %.lr.ph.i731 # =>This Inner Loop Header: Depth=1 @@ -27753,12 +29177,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.806: # %middle.block2757 beq $a1, $a2, .LBB9_809 .LBB9_807: # %.lr.ph.i739.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_808: # %.lr.ph.i739 # =>This Inner Loop Header: Depth=1 @@ -27847,12 +29277,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.814: # %middle.block2773 beq $a1, $a2, .LBB9_817 .LBB9_815: # %.lr.ph.i747.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_816: # %.lr.ph.i747 # =>This Inner Loop Header: Depth=1 @@ -27941,12 +29377,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.822: # %middle.block2789 beq $a1, $a2, .LBB9_825 .LBB9_823: # %.lr.ph.i755.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_824: # %.lr.ph.i755 # =>This Inner Loop Header: Depth=1 @@ -28035,12 +29477,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.830: # %middle.block2805 beq $a0, $a1, .LBB9_1187 .LBB9_831: # %.lr.ph.i763.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_832: # %.lr.ph.i763 # =>This Inner Loop Header: Depth=1 @@ -28115,12 +29563,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.835: # %middle.block3125 beq $a1, $a2, .LBB9_838 .LBB9_836: # %.lr.ph.i538.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_837: # %.lr.ph.i538 # =>This Inner Loop Header: Depth=1 @@ -28209,12 +29663,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.843: # %middle.block3141 beq $a1, $a2, .LBB9_846 .LBB9_844: # %.lr.ph.i546.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_845: # %.lr.ph.i546 # =>This Inner Loop Header: Depth=1 @@ -28303,12 +29763,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.851: # %middle.block3157 beq $a1, $a2, .LBB9_854 .LBB9_852: # %.lr.ph.i554.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_853: # %.lr.ph.i554 # =>This Inner Loop Header: Depth=1 @@ -28397,12 +29863,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.859: # %middle.block3173 beq $a1, $a2, .LBB9_862 .LBB9_860: # %.lr.ph.i562.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_861: # %.lr.ph.i562 # =>This Inner Loop Header: Depth=1 @@ -28491,12 +29963,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.867: # %middle.block3189 beq $a0, $a1, .LBB9_1187 .LBB9_868: # %.lr.ph.i570.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_869: # %.lr.ph.i570 # =>This Inner Loop Header: Depth=1 @@ -28571,12 +30049,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.872: # %middle.block3285 beq $a1, $a2, .LBB9_875 .LBB9_873: # %.lr.ph.i482.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_874: # %.lr.ph.i482 # =>This Inner Loop Header: Depth=1 @@ -28665,12 +30149,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.880: # %middle.block3301 beq $a0, $a1, .LBB9_1187 .LBB9_881: # %.lr.ph.i490.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_882: # %.lr.ph.i490 # =>This Inner Loop Header: Depth=1 @@ -28745,12 +30235,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.885: # %middle.block1717 beq $a1, $a2, .LBB9_888 .LBB9_886: # %.lr.ph.i1253.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_887: # %.lr.ph.i1253 # =>This Inner Loop Header: Depth=1 @@ -28839,12 +30335,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.893: # %middle.block1733 beq $a1, $a2, .LBB9_896 .LBB9_894: # %.lr.ph.i1261.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_895: # %.lr.ph.i1261 # =>This Inner Loop Header: Depth=1 @@ -28933,12 +30435,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.901: # %middle.block1749 beq $a1, $a2, .LBB9_904 .LBB9_902: # %.lr.ph.i1269.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_903: # %.lr.ph.i1269 # =>This Inner Loop Header: Depth=1 @@ -29027,12 +30535,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.909: # %middle.block1765 beq $a1, $a2, .LBB9_912 .LBB9_910: # %.lr.ph.i1277.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_911: # %.lr.ph.i1277 # =>This Inner Loop Header: Depth=1 @@ -29121,12 +30635,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.917: # %middle.block1781 beq $a0, $a1, .LBB9_1187 .LBB9_918: # %.lr.ph.i1285.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_919: # %.lr.ph.i1285 # =>This Inner Loop Header: Depth=1 @@ -29147,41 +30667,39 @@ _Z8loopInitj: # @_Z8loopInitj bstrpick.d $a0, $a1, 30, 2 slli.d $a0, $a0, 2 vreplvei.d $vr1, $vr0, 0 - addi.d $a3, $a2, 16 - ori $a4, $zero, 0 - lu32i.d $a4, 1 - vreplgr2vr.d $vr2, $a4 - lu12i.w $a4, -419431 - ori $a4, $a4, 2458 - lu32i.d $a4, 104857 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr3, $a4 - lu12i.w $a4, -307024 - ori $a4, $a4, 3880 - lu32i.d $a4, 129446 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr4, $a4 - move $a4, $a0 + addi.d $a5, $a3, 16 + ori $a6, $zero, 0 + lu32i.d $a6, 1 + vreplgr2vr.d $vr2, $a6 + ori $a6, $a4, 2458 + lu32i.d $a6, 104857 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr3, $a6 + ori $a6, $a2, 3880 + lu32i.d $a6, 129446 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr4, $a6 + move $a6, $a0 .p2align 4, , 16 .LBB9_921: # %vector.body # =>This Inner Loop Header: Depth=1 vaddi.wu $vr5, $vr2, 2 - vpickve2gr.w $a5, $vr2, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr2, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr2, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa7, $a5 + vpickve2gr.w $a7, $vr2, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa7, $a7 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr6, 16 - vpickve2gr.w $a5, $vr5, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr5, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr5, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa5, $a5 + vpickve2gr.w $a7, $vr5, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa5, $a7 ffint.d.l $fa5, $fa5 vextrins.d $vr5, $vr6, 16 vfadd.d $vr6, $vr7, $vr3 @@ -29192,34 +30710,38 @@ _Z8loopInitj: # @_Z8loopInitj vfadd.d $vr5, $vr5, $vr4 vfdiv.d $vr6, $vr6, $vr7 vfdiv.d $vr5, $vr8, $vr5 - vst $vr6, $a3, -16 - vst $vr5, $a3, 0 + vst $vr6, $a5, -16 + vst $vr5, $a5, 0 vaddi.wu $vr2, $vr2, 4 - addi.d $a4, $a4, -4 - addi.d $a3, $a3, 32 - bnez $a4, .LBB9_921 + addi.d $a6, $a6, -4 + addi.d $a5, $a5, 32 + bnez $a6, .LBB9_921 # %bb.922: # %middle.block beq $a0, $a1, .LBB9_1187 .LBB9_923: # %.lr.ph.i1341.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $a2, 3 + alsl.d $a3, $a0, $a3, 3 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + ori $a2, $a2, 3880 + lu32i.d $a2, 129446 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa2, $a2 .p2align 4, , 16 .LBB9_924: # %.lr.ph.i1341 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a0, 31, 0 - movgr2fr.d $fa3, $a3 + bstrpick.d $a2, $a0, 31, 0 + movgr2fr.d $fa3, $a2 ffint.d.l $fa3, $fa3 fadd.d $fa4, $fa3, $fa1 fmul.d $fa4, $fa0, $fa4 fadd.d $fa3, $fa3, $fa2 fdiv.d $fa3, $fa4, $fa3 - fst.d $fa3, $a2, 0 + fst.d $fa3, $a3, 0 addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + addi.d $a3, $a3, 8 addi.w $a0, $a0, 1 bnez $a1, .LBB9_924 b .LBB9_1187 @@ -29281,12 +30803,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.927: # %middle.block2533 beq $a1, $a2, .LBB9_930 .LBB9_928: # %.lr.ph.i859.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_929: # %.lr.ph.i859 # =>This Inner Loop Header: Depth=1 @@ -29375,12 +30903,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.935: # %middle.block2549 beq $a0, $a1, .LBB9_1187 .LBB9_936: # %.lr.ph.i867.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_937: # %.lr.ph.i867 # =>This Inner Loop Header: Depth=1 @@ -29455,12 +30989,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.940: # %middle.block2629 beq $a1, $a2, .LBB9_943 .LBB9_941: # %.lr.ph.i771.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_942: # %.lr.ph.i771 # =>This Inner Loop Header: Depth=1 @@ -29549,12 +31089,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.948: # %middle.block2645 beq $a1, $a2, .LBB9_951 .LBB9_949: # %.lr.ph.i779.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_950: # %.lr.ph.i779 # =>This Inner Loop Header: Depth=1 @@ -29643,12 +31189,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.956: # %middle.block2661 beq $a1, $a2, .LBB9_959 .LBB9_957: # %.lr.ph.i787.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_958: # %.lr.ph.i787 # =>This Inner Loop Header: Depth=1 @@ -29737,12 +31289,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.964: # %middle.block2677 beq $a1, $a2, .LBB9_967 .LBB9_965: # %.lr.ph.i795.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_966: # %.lr.ph.i795 # =>This Inner Loop Header: Depth=1 @@ -29831,12 +31389,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.972: # %middle.block2693 beq $a1, $a2, .LBB9_975 .LBB9_973: # %.lr.ph.i803.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_974: # %.lr.ph.i803 # =>This Inner Loop Header: Depth=1 @@ -29925,12 +31489,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.980: # %middle.block2709 beq $a1, $a2, .LBB9_983 .LBB9_981: # %.lr.ph.i811.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_982: # %.lr.ph.i811 # =>This Inner Loop Header: Depth=1 @@ -30019,12 +31589,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.988: # %middle.block2725 beq $a0, $a1, .LBB9_1187 .LBB9_989: # %.lr.ph.i819.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_990: # %.lr.ph.i819 # =>This Inner Loop Header: Depth=1 @@ -30099,12 +31675,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.993: # %middle.block2501 beq $a1, $a2, .LBB9_996 .LBB9_994: # %.lr.ph.i875.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_995: # %.lr.ph.i875 # =>This Inner Loop Header: Depth=1 @@ -30193,12 +31775,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1001: # %middle.block2517 beq $a0, $a1, .LBB9_1187 .LBB9_1002: # %.lr.ph.i883.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1003: # %.lr.ph.i883 # =>This Inner Loop Header: Depth=1 @@ -30273,12 +31861,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1006: # %middle.block1845 beq $a1, $a2, .LBB9_1009 .LBB9_1007: # %.lr.ph.i1149.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1008: # %.lr.ph.i1149 # =>This Inner Loop Header: Depth=1 @@ -30367,12 +31961,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1014: # %middle.block1861 beq $a1, $a2, .LBB9_1017 .LBB9_1015: # %.lr.ph.i1157.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1016: # %.lr.ph.i1157 # =>This Inner Loop Header: Depth=1 @@ -30461,12 +32061,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1022: # %middle.block1877 beq $a1, $a2, .LBB9_1025 .LBB9_1023: # %.lr.ph.i1165.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1024: # %.lr.ph.i1165 # =>This Inner Loop Header: Depth=1 @@ -30555,12 +32161,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1030: # %middle.block1893 beq $a1, $a2, .LBB9_1033 .LBB9_1031: # %.lr.ph.i1173.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1032: # %.lr.ph.i1173 # =>This Inner Loop Header: Depth=1 @@ -30649,12 +32261,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1038: # %middle.block1909 beq $a1, $a2, .LBB9_1041 .LBB9_1039: # %.lr.ph.i1181.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1040: # %.lr.ph.i1181 # =>This Inner Loop Header: Depth=1 @@ -30743,12 +32361,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1046: # %middle.block1925 beq $a1, $a2, .LBB9_1049 .LBB9_1047: # %.lr.ph.i1189.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1048: # %.lr.ph.i1189 # =>This Inner Loop Header: Depth=1 @@ -30837,12 +32461,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1054: # %middle.block1941 beq $a1, $a2, .LBB9_1057 .LBB9_1055: # %.lr.ph.i1197.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1056: # %.lr.ph.i1197 # =>This Inner Loop Header: Depth=1 @@ -30931,12 +32561,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1062: # %middle.block1957 beq $a1, $a2, .LBB9_1065 .LBB9_1063: # %.lr.ph.i1205.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1064: # %.lr.ph.i1205 # =>This Inner Loop Header: Depth=1 @@ -31025,12 +32661,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1070: # %middle.block1973 beq $a1, $a2, .LBB9_1073 .LBB9_1071: # %.lr.ph.i1213.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1072: # %.lr.ph.i1213 # =>This Inner Loop Header: Depth=1 @@ -31119,12 +32761,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1078: # %middle.block1989 beq $a0, $a1, .LBB9_1187 .LBB9_1079: # %.lr.ph.i1221.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1080: # %.lr.ph.i1221 # =>This Inner Loop Header: Depth=1 @@ -31199,12 +32847,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1083: # %middle.block1797 beq $a1, $a2, .LBB9_1086 .LBB9_1084: # %.lr.ph.i1229.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1085: # %.lr.ph.i1229 # =>This Inner Loop Header: Depth=1 @@ -31293,12 +32947,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1091: # %middle.block1813 beq $a1, $a2, .LBB9_1094 .LBB9_1092: # %.lr.ph.i1237.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1093: # %.lr.ph.i1237 # =>This Inner Loop Header: Depth=1 @@ -31387,12 +33047,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1099: # %middle.block1829 beq $a0, $a1, .LBB9_1187 .LBB9_1100: # %.lr.ph.i1245.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1101: # %.lr.ph.i1245 # =>This Inner Loop Header: Depth=1 @@ -31467,12 +33133,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1104: # %middle.block2069 beq $a1, $a2, .LBB9_1107 .LBB9_1105: # %.lr.ph.i1029.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1106: # %.lr.ph.i1029 # =>This Inner Loop Header: Depth=1 @@ -31561,12 +33233,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1112: # %middle.block2085 beq $a1, $a2, .LBB9_1115 .LBB9_1113: # %.lr.ph.i1037.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1114: # %.lr.ph.i1037 # =>This Inner Loop Header: Depth=1 @@ -31655,12 +33333,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1120: # %middle.block2101 beq $a1, $a2, .LBB9_1123 .LBB9_1121: # %.lr.ph.i1045.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1122: # %.lr.ph.i1045 # =>This Inner Loop Header: Depth=1 @@ -31749,12 +33433,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1128: # %middle.block2117 beq $a1, $a2, .LBB9_1131 .LBB9_1129: # %.lr.ph.i1053.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1130: # %.lr.ph.i1053 # =>This Inner Loop Header: Depth=1 @@ -31843,12 +33533,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1136: # %middle.block2133 beq $a1, $a2, .LBB9_1139 .LBB9_1137: # %.lr.ph.i1061.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1138: # %.lr.ph.i1061 # =>This Inner Loop Header: Depth=1 @@ -31937,12 +33633,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1144: # %middle.block2149 beq $a1, $a2, .LBB9_1147 .LBB9_1145: # %.lr.ph.i1069.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1146: # %.lr.ph.i1069 # =>This Inner Loop Header: Depth=1 @@ -32031,12 +33733,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1152: # %middle.block2165 beq $a1, $a2, .LBB9_1155 .LBB9_1153: # %.lr.ph.i1077.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1154: # %.lr.ph.i1077 # =>This Inner Loop Header: Depth=1 @@ -32125,12 +33833,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1160: # %middle.block2181 beq $a1, $a2, .LBB9_1163 .LBB9_1161: # %.lr.ph.i1085.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1162: # %.lr.ph.i1085 # =>This Inner Loop Header: Depth=1 @@ -32219,12 +33933,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1168: # %middle.block2197 beq $a1, $a2, .LBB9_1171 .LBB9_1169: # %.lr.ph.i1093.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1170: # %.lr.ph.i1093 # =>This Inner Loop Header: Depth=1 @@ -32313,12 +34033,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1176: # %middle.block2213 beq $a1, $a2, .LBB9_1179 .LBB9_1177: # %.lr.ph.i1101.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1178: # %.lr.ph.i1101 # =>This Inner Loop Header: Depth=1 @@ -32407,12 +34133,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1184: # %middle.block2229 beq $a0, $a1, .LBB9_1187 .LBB9_1185: # %.lr.ph.i1109.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1186: # %.lr.ph.i1109 # =>This Inner Loop Header: Depth=1 diff --git a/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/runReferenceLoops.s b/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/runReferenceLoops.s index b31732f8..26d4daf4 100644 --- a/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/runReferenceLoops.s +++ b/results/MicroBenchmarks/LCALS/SubsetBRawLoops/CMakeFiles/lcalsBRaw.dir/__/runReferenceLoops.s @@ -903,14 +903,8 @@ _ZN8LoopStatD2Ev: # @_ZN8LoopStatD2Ev .size _ZN8LoopStatD2Ev, .Lfunc_end3-_ZN8LoopStatD2Ev .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z25computeReferenceLoopTimesv -.LCPI4_0: - .dword 0x3f5426fe718a86d7 # double 0.00123 -.LCPI4_1: - .dword 0xbf5426fe718a86d7 # double -0.00123 .text - .globl _Z25computeReferenceLoopTimesv + .globl _Z25computeReferenceLoopTimesv # -- Begin function _Z25computeReferenceLoopTimesv .p2align 5 .type _Z25computeReferenceLoopTimesv,@function _Z25computeReferenceLoopTimesv: # @_Z25computeReferenceLoopTimesv @@ -1649,13 +1643,16 @@ _Z25computeReferenceLoopTimesv: # @_Z25computeReferenceLoopTimesv pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 st.d $a0, $sp, 640 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI4_0) - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI4_1) ori $a0, $zero, 1 st.b $a0, $sp, 648 + lu12i.w $a0, 465064 + ori $a0, $a0, 1751 + lu32i.d $a0, 272126 + lu52i.d $a1, $a0, 1013 + movgr2fr.d $fs0, $a1 fadd.d $fa0, $fs2, $fs0 + lu52i.d $a0, $a0, -1035 + movgr2fr.d $fs1, $a0 fadd.d $fa1, $fs2, $fs1 fdiv.d $fa0, $fa0, $fa1 fst.d $fa0, $fp, 384 diff --git a/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/LambdaSubsetCbenchmarks.s b/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/LambdaSubsetCbenchmarks.s index e89f5666..49d50ab5 100644 --- a/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/LambdaSubsetCbenchmarks.s +++ b/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/LambdaSubsetCbenchmarks.s @@ -2083,31 +2083,24 @@ _ZL16BM_PIC_1D_LAMBDARN9benchmark5StateE: # @_ZL16BM_PIC_1D_LAMBDARN9benchmark5S .size _ZL16BM_PIC_1D_LAMBDARN9benchmark5StateE, .Lfunc_end12-_ZL16BM_PIC_1D_LAMBDARN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateE -.LCPI13_0: - .dword 0x3f70cb295e9e1b09 # double 0.0041000000000000003 -.LCPI13_1: - .dword 0x3f6e4f765fd8adac # double 0.0037000000000000002 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateE .type _ZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateE,@function _ZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateE .cfi_startproc # %bb.0: # %_ZN9benchmark5State13StateIteratorC2EPS0_.exit - addi.d $sp, $sp, -304 - .cfi_def_cfa_offset 304 - st.d $ra, $sp, 296 # 8-byte Folded Spill - st.d $fp, $sp, 288 # 8-byte Folded Spill - st.d $s0, $sp, 280 # 8-byte Folded Spill - st.d $s1, $sp, 272 # 8-byte Folded Spill - st.d $s2, $sp, 264 # 8-byte Folded Spill - st.d $s3, $sp, 256 # 8-byte Folded Spill - st.d $s4, $sp, 248 # 8-byte Folded Spill - st.d $s5, $sp, 240 # 8-byte Folded Spill - st.d $s6, $sp, 232 # 8-byte Folded Spill - st.d $s7, $sp, 224 # 8-byte Folded Spill - st.d $s8, $sp, 216 # 8-byte Folded Spill + addi.d $sp, $sp, -320 + .cfi_def_cfa_offset 320 + st.d $ra, $sp, 312 # 8-byte Folded Spill + st.d $fp, $sp, 304 # 8-byte Folded Spill + st.d $s0, $sp, 296 # 8-byte Folded Spill + st.d $s1, $sp, 288 # 8-byte Folded Spill + st.d $s2, $sp, 280 # 8-byte Folded Spill + st.d $s3, $sp, 272 # 8-byte Folded Spill + st.d $s4, $sp, 264 # 8-byte Folded Spill + st.d $s5, $sp, 256 # 8-byte Folded Spill + st.d $s6, $sp, 248 # 8-byte Folded Spill + st.d $s7, $sp, 240 # 8-byte Folded Spill + st.d $s8, $sp, 232 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -2129,20 +2122,20 @@ _ZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL18BM_HYDRO_2D_LAMBDARN9benchma ld.d $s7, $fp, 264 ld.d $s8, $fp, 272 ld.d $a0, $fp, 280 - st.d $a0, $sp, 48 # 8-byte Folded Spill + st.d $a0, $sp, 56 # 8-byte Folded Spill ld.d $a0, $fp, 288 - st.d $a0, $sp, 112 # 8-byte Folded Spill + st.d $a0, $sp, 128 # 8-byte Folded Spill ld.d $a0, $fp, 296 - st.d $a0, $sp, 104 # 8-byte Folded Spill + st.d $a0, $sp, 120 # 8-byte Folded Spill ld.d $a0, $fp, 304 - st.d $a0, $sp, 168 # 8-byte Folded Spill + st.d $a0, $sp, 184 # 8-byte Folded Spill ld.d $a0, $fp, 312 - st.d $a0, $sp, 128 # 8-byte Folded Spill + st.d $a0, $sp, 144 # 8-byte Folded Spill ld.d $a0, $fp, 320 - st.d $a0, $sp, 120 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill ld.d $a0, $s1, 32 ld.d $a1, $fp, 328 - st.d $a1, $sp, 136 # 8-byte Folded Spill + st.d $a1, $sp, 152 # 8-byte Folded Spill ld.d $s2, $fp, 336 ld.d $s3, $fp, 344 ld.d $fp, $a0, 0 @@ -2158,36 +2151,38 @@ _ZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL18BM_HYDRO_2D_LAMBDARN9benchma # %bb.2: # %.preheader99.lr.ph bstrpick.d $a0, $fp, 30, 0 addi.d $a1, $a0, -1 - st.d $a0, $sp, 152 # 8-byte Folded Spill + st.d $a0, $sp, 168 # 8-byte Folded Spill slli.d $a0, $a0, 3 addi.d $a2, $a0, 8 - st.d $a2, $sp, 80 # 8-byte Folded Spill - st.d $a0, $sp, 208 # 8-byte Folded Spill + st.d $a2, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 224 # 8-byte Folded Spill addi.d $a0, $a0, -8 - st.d $a0, $sp, 144 # 8-byte Folded Spill - move $t6, $a1 - bstrins.d $t6, $zero, 0, 0 - st.d $a1, $sp, 176 # 8-byte Folded Spill + st.d $a0, $sp, 160 # 8-byte Folded Spill + move $t5, $a1 + bstrins.d $t5, $zero, 0, 0 + st.d $a1, $sp, 192 # 8-byte Folded Spill ori $a0, $a1, 1 - st.d $a0, $sp, 72 # 8-byte Folded Spill - addi.w $t7, $fp, 0 - ori $t8, $zero, 2 - lu12i.w $a0, 392586 - ori $a0, $a0, 3500 - lu32i.d $a0, -110730 - lu52i.d $a0, $a0, 1014 - vreplgr2vr.d $vr0, $a0 + st.d $a0, $sp, 88 # 8-byte Folded Spill + addi.w $t6, $fp, 0 + ori $t7, $zero, 2 lu12i.w $a0, 387553 ori $a0, $a0, 2825 lu32i.d $a0, 52009 lu52i.d $a0, $a0, 1015 - vreplgr2vr.d $vr1, $a0 - st.d $s7, $sp, 96 # 8-byte Folded Spill - st.d $s8, $sp, 88 # 8-byte Folded Spill - st.d $s2, $sp, 40 # 8-byte Folded Spill - st.d $s3, $sp, 32 # 8-byte Folded Spill - st.d $t6, $sp, 64 # 8-byte Folded Spill - st.d $t7, $sp, 24 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill + movgr2fr.d $fa0, $a0 + lu12i.w $a0, 392586 + ori $a0, $a0, 3500 + lu32i.d $a0, -110730 + lu52i.d $t8, $a0, 1014 + movgr2fr.d $fa1, $t8 + st.d $s7, $sp, 112 # 8-byte Folded Spill + st.d $s8, $sp, 104 # 8-byte Folded Spill + st.d $s2, $sp, 48 # 8-byte Folded Spill + st.d $s3, $sp, 40 # 8-byte Folded Spill + st.d $t5, $sp, 80 # 8-byte Folded Spill + st.d $t6, $sp, 32 # 8-byte Folded Spill + st.d $t8, $sp, 24 # 8-byte Folded Spill b .LBB13_4 .p2align 4, , 16 .LBB13_3: # %.split.us @@ -2212,22 +2207,22 @@ _ZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL18BM_HYDRO_2D_LAMBDARN9benchma # Child Loop BB13_69 Depth 2 # Child Loop BB13_131 Depth 2 # Child Loop BB13_72 Depth 2 - blt $t7, $t8, .LBB13_3 + blt $t6, $t7, .LBB13_3 # %bb.5: # %.preheader96.us.preheader # in Loop: Header=BB13_4 Depth=1 - st.d $s4, $sp, 56 # 8-byte Folded Spill - ld.d $a0, $sp, 112 # 8-byte Folded Reload + st.d $s4, $sp, 64 # 8-byte Folded Spill + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $t8, $a0, 8 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $t7, $a0, 8 - ld.d $ra, $sp, 48 # 8-byte Folded Reload + ld.d $ra, $sp, 56 # 8-byte Folded Reload ld.d $a3, $ra, 8 ori $a6, $zero, 1 b .LBB13_7 .p2align 4, , 16 .LBB13_6: # %"._Z6forallIZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateEE3$_0Ev9simd_execiiT_.exit_crit_edge.us" # in Loop: Header=BB13_7 Depth=2 - move $a3, $t3 + move $a3, $t1 move $t7, $t0 move $t8, $a7 ori $a0, $zero, 6 @@ -2237,31 +2232,31 @@ _ZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL18BM_HYDRO_2D_LAMBDARN9benchma # => This Loop Header: Depth=2 # Child Loop BB13_28 Depth 3 # Child Loop BB13_9 Depth 3 - ld.d $a5, $sp, 168 # 8-byte Folded Reload + ld.d $a5, $sp, 184 # 8-byte Folded Reload alsl.d $a0, $a6, $a5, 3 slli.d $a1, $a6, 3 addi.d $a6, $a6, 1 slli.d $a2, $a6, 3 - ld.d $a4, $sp, 112 # 8-byte Folded Reload + ld.d $a4, $sp, 128 # 8-byte Folded Reload ldx.d $a7, $a4, $a2 - ld.d $a4, $sp, 104 # 8-byte Folded Reload + ld.d $a4, $sp, 120 # 8-byte Folded Reload ldx.d $t0, $a4, $a2 ldx.d $s0, $a5, $a1 - ldx.d $t3, $ra, $a2 - ldx.d $t4, $s7, $a1 + ldx.d $t1, $ra, $a2 + ldx.d $t2, $s7, $a1 ld.d $t5, $a0, -8 ldx.d $t6, $s8, $a1 ori $s5, $zero, 1 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload ori $a1, $zero, 4 bgeu $a0, $a1, .LBB13_10 .LBB13_8: # %scalar.ph501.preheader # in Loop: Header=BB13_7 Depth=2 - ld.d $a0, $sp, 152 # 8-byte Folded Reload - sub.d $a4, $a0, $s5 - slli.d $a5, $s5, 3 - addi.d $t1, $t3, -8 - addi.d $t2, $s0, -8 + ld.d $a0, $sp, 168 # 8-byte Folded Reload + sub.d $a0, $a0, $s5 + slli.d $a4, $s5, 3 + addi.d $a5, $t1, -8 + addi.d $t4, $s0, -8 addi.d $s0, $a7, -8 addi.d $s3, $t0, -8 addi.d $t8, $t8, -8 @@ -2271,46 +2266,46 @@ _ZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL18BM_HYDRO_2D_LAMBDARN9benchma # Parent Loop BB13_4 Depth=1 # Parent Loop BB13_7 Depth=2 # => This Inner Loop Header: Depth=3 - fldx.d $fa2, $s0, $a5 - fldx.d $fa3, $s3, $a5 - fldx.d $fa4, $t8, $a5 + fldx.d $fa2, $s0, $a4 + fldx.d $fa3, $s3, $a4 + fldx.d $fa4, $t8, $a4 fadd.d $fa2, $fa2, $fa3 - add.d $a0, $t8, $a5 + add.d $a1, $t8, $a4 fsub.d $fa2, $fa2, $fa4 - fldx.d $fa3, $t7, $a5 - add.d $a1, $t2, $a5 - fld.d $fa4, $a1, 8 - fldx.d $fa5, $t2, $a5 - add.d $a2, $a3, $a5 - fld.d $fa6, $a2, -8 - fldx.d $fa7, $t1, $a5 + fldx.d $fa3, $t7, $a4 + add.d $a2, $t4, $a4 + fld.d $fa4, $a2, 8 + fldx.d $fa5, $t4, $a4 + add.d $t3, $a3, $a4 + fld.d $fa6, $t3, -8 + fldx.d $fa7, $a5, $a4 fsub.d $fa2, $fa2, $fa3 fadd.d $fa3, $fa4, $fa5 fmul.d $fa2, $fa2, $fa3 fadd.d $fa3, $fa6, $fa7 fdiv.d $fa2, $fa2, $fa3 - fstx.d $fa2, $t4, $a5 - fldx.d $fa2, $t8, $a5 - fldx.d $fa3, $t7, $a5 - fld.d $fa4, $a0, 8 - add.d $a0, $t7, $a5 + fstx.d $fa2, $t2, $a4 + fldx.d $fa2, $t8, $a4 + fldx.d $fa3, $t7, $a4 + fld.d $fa4, $a1, 8 + add.d $a1, $t7, $a4 fadd.d $fa2, $fa2, $fa3 fsub.d $fa2, $fa2, $fa4 - fld.d $fa3, $a0, 8 - fld.d $fa4, $a1, 8 - fldx.d $fa5, $t5, $a5 - fldx.d $fa6, $a3, $a5 - fld.d $fa7, $a2, -8 + fld.d $fa3, $a1, 8 + fld.d $fa4, $a2, 8 + fldx.d $fa5, $t5, $a4 + fldx.d $fa6, $a3, $a4 + fld.d $fa7, $t3, -8 fsub.d $fa2, $fa2, $fa3 fadd.d $fa3, $fa4, $fa5 fmul.d $fa2, $fa2, $fa3 fadd.d $fa3, $fa6, $fa7 fdiv.d $fa2, $fa2, $fa3 - fstx.d $fa2, $t6, $a5 - addi.d $a4, $a4, -1 - addi.d $t4, $t4, 8 - addi.d $t1, $t1, 8 + fstx.d $fa2, $t6, $a4 + addi.d $a0, $a0, -1 addi.d $t2, $t2, 8 + addi.d $a5, $a5, 8 + addi.d $t4, $t4, 8 addi.d $t5, $t5, 8 addi.d $a3, $a3, 8 addi.d $t6, $t6, 8 @@ -2318,232 +2313,232 @@ _ZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL18BM_HYDRO_2D_LAMBDARN9benchma addi.d $s3, $s3, 8 addi.d $t8, $t8, 8 addi.d $t7, $t7, 8 - bnez $a4, .LBB13_9 + bnez $a0, .LBB13_9 b .LBB13_6 .p2align 4, , 16 .LBB13_10: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - addi.d $t1, $t4, 8 - ld.d $a1, $sp, 208 # 8-byte Folded Reload - add.d $a0, $t4, $a1 - addi.d $t2, $t6, 8 - add.d $a4, $t6, $a1 - sltu $a1, $t1, $a4 - sltu $a2, $t2, $a0 - and $a1, $a1, $a2 + addi.d $s3, $t2, 8 + ld.d $a0, $sp, 224 # 8-byte Folded Reload + add.d $a1, $t2, $a0 + addi.d $s4, $t6, 8 + add.d $a0, $t6, $a0 + sltu $a2, $s3, $a0 + sltu $a4, $s4, $a1 + and $a2, $a2, $a4 ori $s5, $zero, 1 - bnez $a1, .LBB13_8 + bnez $a2, .LBB13_8 # %bb.11: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - ld.d $a1, $sp, 144 # 8-byte Folded Reload - add.d $a1, $a7, $a1 - sltu $a2, $t1, $a1 - sltu $a5, $a7, $a0 - and $a2, $a2, $a5 + ld.d $a2, $sp, 160 # 8-byte Folded Reload + add.d $a2, $a7, $a2 + sltu $a4, $s3, $a2 + sltu $a5, $a7, $a1 + and $a4, $a4, $a5 ori $s5, $zero, 1 - bnez $a2, .LBB13_8 + bnez $a4, .LBB13_8 # %bb.12: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - ld.d $a2, $sp, 144 # 8-byte Folded Reload - add.d $a2, $t0, $a2 - sltu $a5, $t1, $a2 - sltu $fp, $t0, $a0 - and $a5, $a5, $fp + ld.d $a4, $sp, 160 # 8-byte Folded Reload + add.d $a5, $t0, $a4 + sltu $a4, $s3, $a5 + sltu $t3, $t0, $a1 + and $a4, $a4, $t3 ori $s5, $zero, 1 - bnez $a5, .LBB13_8 + bnez $a4, .LBB13_8 # %bb.13: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - ld.d $a5, $sp, 208 # 8-byte Folded Reload - add.d $fp, $t8, $a5 - sltu $a5, $t1, $fp - sltu $s1, $t8, $a0 - and $a5, $a5, $s1 + ld.d $a4, $sp, 224 # 8-byte Folded Reload + add.d $t3, $t8, $a4 + sltu $a4, $s3, $t3 + sltu $t4, $t8, $a1 + and $a4, $a4, $t4 ori $s5, $zero, 1 - bnez $a5, .LBB13_8 + bnez $a4, .LBB13_8 # %bb.14: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - ld.d $a5, $sp, 208 # 8-byte Folded Reload - add.d $s1, $t7, $a5 - sltu $a5, $t1, $s1 - sltu $s2, $t7, $a0 - and $a5, $a5, $s2 + ld.d $a4, $sp, 224 # 8-byte Folded Reload + add.d $t4, $t7, $a4 + sltu $a4, $s3, $t4 + sltu $fp, $t7, $a1 + and $a4, $a4, $fp ori $s5, $zero, 1 - bnez $a5, .LBB13_8 + bnez $a4, .LBB13_8 # %bb.15: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - ld.d $a5, $sp, 208 # 8-byte Folded Reload - add.d $s2, $s0, $a5 - sltu $a5, $t1, $s2 - sltu $s3, $s0, $a0 - and $a5, $a5, $s3 + ld.d $a4, $sp, 224 # 8-byte Folded Reload + add.d $fp, $s0, $a4 + sltu $a4, $s3, $fp + sltu $s1, $s0, $a1 + and $a4, $a4, $s1 ori $s5, $zero, 1 - bnez $a5, .LBB13_8 + bnez $a4, .LBB13_8 # %bb.16: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - ld.d $a5, $sp, 208 # 8-byte Folded Reload - add.d $s3, $a3, $a5 - sltu $a5, $t1, $s3 - sltu $s4, $a3, $a0 - and $a5, $a5, $s4 + ld.d $a4, $sp, 224 # 8-byte Folded Reload + add.d $s1, $a3, $a4 + sltu $a4, $s3, $s1 + sltu $s2, $a3, $a1 + and $a4, $a4, $s2 ori $s5, $zero, 1 - bnez $a5, .LBB13_8 + bnez $a4, .LBB13_8 # %bb.17: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - ld.d $a5, $sp, 144 # 8-byte Folded Reload - add.d $s4, $t3, $a5 - sltu $a5, $t1, $s4 - sltu $s5, $t3, $a0 - and $a5, $a5, $s5 + ld.d $a4, $sp, 160 # 8-byte Folded Reload + add.d $s2, $t1, $a4 + sltu $a4, $s3, $s2 + sltu $s5, $t1, $a1 + and $a4, $a4, $s5 ori $s5, $zero, 1 - bnez $a5, .LBB13_8 + bnez $a4, .LBB13_8 # %bb.18: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - addi.d $a5, $t5, 8 - ld.d $s5, $sp, 208 # 8-byte Folded Reload + addi.d $a4, $t5, 8 + ld.d $s5, $sp, 224 # 8-byte Folded Reload add.d $s6, $t5, $s5 - sltu $s5, $t1, $s6 - sltu $a0, $a5, $a0 - and $a0, $s5, $a0 + sltu $s5, $s3, $s6 + sltu $a1, $a4, $a1 + and $a1, $s5, $a1 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.19: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $a1 - sltu $a1, $a7, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $a2 + sltu $a2, $a7, $a0 + and $a1, $a1, $a2 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.20: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $a2 - sltu $a1, $t0, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $a5 + sltu $a2, $t0, $a0 + and $a1, $a1, $a2 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.21: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $fp - sltu $a1, $t8, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $t3 + sltu $a2, $t8, $a0 + and $a1, $a1, $a2 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.22: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $s1 - sltu $a1, $t7, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $t4 + sltu $a2, $t7, $a0 + and $a1, $a1, $a2 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.23: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $s2 - sltu $a1, $s0, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $fp + sltu $a2, $s0, $a0 + and $a1, $a1, $a2 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.24: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $s3 - sltu $a1, $a3, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $s1 + sltu $a2, $a3, $a0 + and $a1, $a1, $a2 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.25: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $s4 - sltu $a1, $t3, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $s2 + sltu $a2, $t1, $a0 + and $a1, $a1, $a2 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.26: # %vector.memcheck420 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $s6 - sltu $a1, $a5, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $s6 + sltu $a0, $a4, $a0 + and $a0, $a1, $a0 ori $s5, $zero, 1 bnez $a0, .LBB13_8 # %bb.27: # %vector.body506.preheader # in Loop: Header=BB13_7 Depth=2 - addi.d $a4, $s0, 8 - move $s3, $t7 - move $s4, $t8 - move $a0, $a7 + addi.d $a0, $s0, 8 + move $a5, $t7 + move $t4, $t8 + move $s1, $a7 move $s5, $t0 - move $s1, $a3 - ld.d $a1, $sp, 64 # 8-byte Folded Reload - move $fp, $a1 - move $s2, $t3 + move $fp, $a3 + ld.d $a2, $sp, 80 # 8-byte Folded Reload + move $a1, $a2 + move $s2, $t1 .p2align 4, , 16 .LBB13_28: # %vector.body506 # Parent Loop BB13_4 Depth=1 # Parent Loop BB13_7 Depth=2 # => This Inner Loop Header: Depth=3 - vld $vr2, $a0, 0 + vld $vr2, $s1, 0 vld $vr3, $s5, 0 - vld $vr4, $s4, 0 + vld $vr4, $t4, 0 vfadd.d $vr2, $vr2, $vr3 vfsub.d $vr2, $vr2, $vr4 - vld $vr3, $s3, 0 - vld $vr5, $a4, 0 - vld $vr6, $a4, -8 - vld $vr7, $s1, 0 + vld $vr3, $a5, 0 + vld $vr5, $a0, 0 + vld $vr6, $a0, -8 + vld $vr7, $fp, 0 vld $vr8, $s2, 0 vfsub.d $vr2, $vr2, $vr3 vfadd.d $vr6, $vr5, $vr6 vfmul.d $vr2, $vr2, $vr6 vfadd.d $vr6, $vr7, $vr8 vfdiv.d $vr2, $vr2, $vr6 - vst $vr2, $t1, 0 - vld $vr2, $s4, 8 + vst $vr2, $s3, 0 + vld $vr2, $t4, 8 vfadd.d $vr3, $vr4, $vr3 - vld $vr4, $s3, 8 - vld $vr6, $a5, 0 + vld $vr4, $a5, 8 + vld $vr6, $a4, 0 vfsub.d $vr2, $vr3, $vr2 - vld $vr3, $s1, 8 + vld $vr3, $fp, 8 vfsub.d $vr2, $vr2, $vr4 vfadd.d $vr4, $vr5, $vr6 vfmul.d $vr2, $vr2, $vr4 vfadd.d $vr3, $vr3, $vr7 vfdiv.d $vr2, $vr2, $vr3 - vst $vr2, $t2, 0 + vst $vr2, $s4, 0 addi.d $s2, $s2, 16 - addi.d $fp, $fp, -2 - addi.d $t2, $t2, 16 - addi.d $s1, $s1, 16 - addi.d $a5, $a5, 16 + addi.d $a1, $a1, -2 + addi.d $s4, $s4, 16 + addi.d $fp, $fp, 16 + addi.d $a4, $a4, 16 addi.d $s5, $s5, 16 + addi.d $s1, $s1, 16 addi.d $a0, $a0, 16 - addi.d $a4, $a4, 16 - addi.d $t1, $t1, 16 - addi.d $s4, $s4, 16 addi.d $s3, $s3, 16 - bnez $fp, .LBB13_28 + addi.d $t4, $t4, 16 + addi.d $a5, $a5, 16 + bnez $a1, .LBB13_28 # %bb.29: # %middle.block526 # in Loop: Header=BB13_7 Depth=2 - ld.d $s5, $sp, 72 # 8-byte Folded Reload - ld.d $a0, $sp, 176 # 8-byte Folded Reload - beq $a0, $a1, .LBB13_6 + ld.d $s5, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload + beq $a0, $a2, .LBB13_6 b .LBB13_8 .p2align 4, , 16 .LBB13_30: # %.preheader95.us.preheader # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $t8, $a0, 8 + ld.d $t7, $s8, 8 + ld.d $a0, $sp, 184 # 8-byte Folded Reload ld.d $t6, $a0, 8 - ld.d $t5, $s8, 8 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - ld.d $t4, $a0, 8 ori $a1, $zero, 1 b .LBB13_32 .p2align 4, , 16 .LBB13_31: # %"._Z6forallIZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateEE3$_1Ev9simd_execiiT_.exit_crit_edge.us" # in Loop: Header=BB13_32 Depth=2 - ld.d $t4, $sp, 184 # 8-byte Folded Reload - ld.d $t5, $sp, 200 # 8-byte Folded Reload - ld.d $t6, $sp, 192 # 8-byte Folded Reload - ld.d $s7, $sp, 96 # 8-byte Folded Reload - ld.d $s8, $sp, 88 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload + ld.d $t7, $sp, 216 # 8-byte Folded Reload + ld.d $t8, $sp, 208 # 8-byte Folded Reload + ld.d $s7, $sp, 112 # 8-byte Folded Reload + ld.d $s8, $sp, 104 # 8-byte Folded Reload ori $a0, $zero, 6 - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 176 # 8-byte Folded Reload beq $a1, $a0, .LBB13_57 .LBB13_32: # %.preheader95.us # Parent Loop BB13_4 Depth=1 @@ -2552,955 +2547,956 @@ _ZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL18BM_HYDRO_2D_LAMBDARN9benchma # Child Loop BB13_34 Depth 3 move $a0, $a1 slli.d $a1, $a1, 3 - ldx.d $t8, $s7, $a1 - ld.d $a4, $sp, 136 # 8-byte Folded Reload + ldx.d $a7, $s7, $a1 + ld.d $a4, $sp, 152 # 8-byte Folded Reload alsl.d $a2, $a0, $a4, 3 - ld.d $ra, $a2, -8 - ld.d $a3, $sp, 168 # 8-byte Folded Reload + ld.d $t0, $a2, -8 + ld.d $a3, $sp, 184 # 8-byte Folded Reload alsl.d $a2, $a0, $a3, 3 addi.d $a0, $a0, 1 - st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $a0, $sp, 176 # 8-byte Folded Spill slli.d $a0, $a0, 3 ldx.d $a5, $s8, $a0 - st.d $a5, $sp, 200 # 8-byte Folded Spill + st.d $a5, $sp, 216 # 8-byte Folded Spill ldx.d $a4, $a4, $a0 - st.d $a4, $sp, 192 # 8-byte Folded Spill - ld.d $a4, $sp, 128 # 8-byte Folded Reload - ldx.d $t3, $a4, $a1 - ld.d $a6, $a2, -8 + st.d $a4, $sp, 208 # 8-byte Folded Spill + ld.d $a4, $sp, 144 # 8-byte Folded Reload + ldx.d $a6, $a4, $a1 + ld.d $t1, $a2, -8 ldx.d $a0, $a3, $a0 - st.d $a0, $sp, 184 # 8-byte Folded Spill - ld.d $a0, $sp, 120 # 8-byte Folded Reload - ldx.d $a3, $a0, $a1 - ori $s0, $zero, 1 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + st.d $a0, $sp, 200 # 8-byte Folded Spill + ld.d $a0, $sp, 136 # 8-byte Folded Reload + ldx.d $t2, $a0, $a1 + ori $a3, $zero, 1 + ld.d $a0, $sp, 192 # 8-byte Folded Reload ori $a1, $zero, 4 bgeu $a0, $a1, .LBB13_35 .LBB13_33: # %scalar.ph388.preheader # in Loop: Header=BB13_32 Depth=2 - ld.d $a0, $sp, 152 # 8-byte Folded Reload - sub.d $a4, $a0, $s0 - slli.d $a5, $s0, 3 - ld.d $t1, $sp, 192 # 8-byte Folded Reload - ld.d $t2, $sp, 200 # 8-byte Folded Reload - ld.d $s3, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload + sub.d $a0, $a0, $a3 + slli.d $a4, $a3, 3 + ld.d $a5, $sp, 208 # 8-byte Folded Reload + ld.d $t4, $sp, 216 # 8-byte Folded Reload + ld.d $s3, $sp, 200 # 8-byte Folded Reload .p2align 4, , 16 .LBB13_34: # %scalar.ph388 # Parent Loop BB13_4 Depth=1 # Parent Loop BB13_32 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $a0, $t6, $a5 - fldx.d $fa2, $t6, $a5 - fld.d $fa3, $a0, 8 - add.d $a1, $t8, $a5 - fld.d $fa4, $a0, -8 - fldx.d $fa5, $t8, $a5 + add.d $a1, $t8, $a4 + fldx.d $fa2, $t8, $a4 + fld.d $fa3, $a1, 8 + add.d $a2, $a7, $a4 + fld.d $fa4, $a1, -8 + fldx.d $fa5, $a7, $a4 fsub.d $fa3, $fa2, $fa3 - fld.d $fa6, $a1, -8 + fld.d $fa6, $a2, -8 fsub.d $fa4, $fa2, $fa4 fneg.d $fa4, $fa4 - fldx.d $fa7, $ra, $a5 - fldx.d $ft0, $t5, $a5 + fldx.d $fa7, $t0, $a4 + fldx.d $ft0, $t7, $a4 fmul.d $fa4, $fa6, $fa4 fmadd.d $fa3, $fa5, $fa3, $fa4 fsub.d $fa4, $fa2, $fa7 fneg.d $fa5, $ft0 - fldx.d $fa6, $t2, $a5 - fldx.d $fa7, $t1, $a5 - fldx.d $ft0, $t3, $a5 - pcalau12i $a0, %pc_hi20(.LCPI13_0) - fld.d $ft1, $a0, %pc_lo12(.LCPI13_0) + fldx.d $fa6, $a5, $a4 + fldx.d $fa7, $t4, $a4 + fldx.d $ft0, $a6, $a4 fmadd.d $fa3, $fa5, $fa4, $fa3 - fsub.d $fa2, $fa2, $fa7 - fmadd.d $fa2, $fa6, $fa2, $fa3 - fmadd.d $fa2, $fa2, $ft1, $ft0 - fstx.d $fa2, $t3, $a5 - add.d $a0, $t4, $a5 - fldx.d $fa2, $t4, $a5 - fld.d $fa3, $a0, 8 - fld.d $fa4, $a0, -8 - fldx.d $fa5, $t8, $a5 + fsub.d $fa2, $fa2, $fa6 + fmadd.d $fa2, $fa7, $fa2, $fa3 + fmadd.d $fa2, $fa2, $fa0, $ft0 + fstx.d $fa2, $a6, $a4 + add.d $a1, $t6, $a4 + fldx.d $fa2, $t6, $a4 + fld.d $fa3, $a1, 8 + fld.d $fa4, $a1, -8 + fldx.d $fa5, $a7, $a4 fsub.d $fa3, $fa2, $fa3 - fld.d $fa6, $a1, -8 + fld.d $fa6, $a2, -8 fsub.d $fa4, $fa2, $fa4 fneg.d $fa4, $fa4 - fldx.d $fa7, $a6, $a5 - fldx.d $ft0, $t5, $a5 + fldx.d $fa7, $t1, $a4 + fldx.d $ft0, $t7, $a4 fmul.d $fa4, $fa6, $fa4 fmadd.d $fa3, $fa5, $fa3, $fa4 fsub.d $fa4, $fa2, $fa7 fneg.d $fa5, $ft0 - fldx.d $fa6, $s3, $a5 - fldx.d $fa7, $t2, $a5 - fldx.d $ft0, $a3, $a5 + fldx.d $fa6, $s3, $a4 + fldx.d $fa7, $t4, $a4 + fldx.d $ft0, $t2, $a4 fmadd.d $fa3, $fa5, $fa4, $fa3 fsub.d $fa2, $fa2, $fa6 fmadd.d $fa2, $fa7, $fa2, $fa3 - fmadd.d $fa2, $fa2, $ft1, $ft0 - fstx.d $fa2, $a3, $a5 - addi.d $a4, $a4, -1 - addi.d $a3, $a3, 8 + fmadd.d $fa2, $fa2, $fa0, $ft0 + fstx.d $fa2, $t2, $a4 + addi.d $a0, $a0, -1 + addi.d $t2, $t2, 8 addi.d $s3, $s3, 8 + addi.d $a7, $a7, 8 addi.d $t8, $t8, 8 - addi.d $t6, $t6, 8 - addi.d $t5, $t5, 8 - addi.d $ra, $ra, 8 - addi.d $t2, $t2, 8 - addi.d $t1, $t1, 8 - addi.d $t3, $t3, 8 + addi.d $t7, $t7, 8 + addi.d $t0, $t0, 8 addi.d $t4, $t4, 8 + addi.d $a5, $a5, 8 addi.d $a6, $a6, 8 - bnez $a4, .LBB13_34 + addi.d $t6, $t6, 8 + addi.d $t1, $t1, 8 + bnez $a0, .LBB13_34 b .LBB13_31 .p2align 4, , 16 .LBB13_35: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - addi.d $t1, $t3, 8 - ld.d $a1, $sp, 208 # 8-byte Folded Reload - add.d $a0, $t3, $a1 - addi.d $t2, $a3, 8 - add.d $a4, $a3, $a1 - sltu $a1, $t1, $a4 - sltu $a2, $t2, $a0 + addi.d $s3, $a6, 8 + ld.d $a0, $sp, 224 # 8-byte Folded Reload + add.d $s1, $a6, $a0 + addi.d $s4, $t2, 8 + add.d $a0, $t2, $a0 + sltu $a1, $s3, $a0 + sltu $a2, $s4, $s1 and $a1, $a1, $a2 - ori $s0, $zero, 1 + ori $a3, $zero, 1 bnez $a1, .LBB13_33 # %bb.36: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - ld.d $a1, $sp, 208 # 8-byte Folded Reload - add.d $s1, $t8, $a1 - sltu $a1, $t1, $s1 - sltu $a2, $t8, $a0 + ld.d $a1, $sp, 224 # 8-byte Folded Reload + add.d $s2, $a7, $a1 + sltu $a1, $s3, $s2 + sltu $a2, $a7, $s1 and $a1, $a1, $a2 - ori $s0, $zero, 1 + ori $a3, $zero, 1 bnez $a1, .LBB13_33 # %bb.37: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - ld.d $a1, $sp, 80 # 8-byte Folded Reload - add.d $s2, $t6, $a1 - sltu $a1, $t1, $s2 - sltu $a2, $t6, $a0 + ld.d $a1, $sp, 96 # 8-byte Folded Reload + add.d $fp, $t8, $a1 + sltu $a1, $s3, $fp + sltu $a2, $t8, $s1 and $a1, $a1, $a2 - ori $s0, $zero, 1 + ori $a3, $zero, 1 bnez $a1, .LBB13_33 # %bb.38: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - addi.d $s3, $t5, 8 - ld.d $a1, $sp, 208 # 8-byte Folded Reload - add.d $fp, $t5, $a1 - sltu $a1, $t1, $fp - sltu $a2, $s3, $a0 - and $a1, $a1, $a2 - ori $s0, $zero, 1 - bnez $a1, .LBB13_33 + addi.d $s5, $t7, 8 + ld.d $a1, $sp, 224 # 8-byte Folded Reload + add.d $a1, $t7, $a1 + sltu $a2, $s3, $a1 + sltu $a3, $s5, $s1 + and $a2, $a2, $a3 + ori $a3, $zero, 1 + bnez $a2, .LBB13_33 # %bb.39: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - addi.d $s4, $ra, 8 - ld.d $a1, $sp, 208 # 8-byte Folded Reload - add.d $a1, $ra, $a1 - sltu $a2, $t1, $a1 - sltu $a5, $s4, $a0 - and $a2, $a2, $a5 - ori $s0, $zero, 1 + addi.d $a5, $t0, 8 + ld.d $a2, $sp, 224 # 8-byte Folded Reload + add.d $s6, $t0, $a2 + sltu $a2, $s3, $s6 + sltu $a3, $a5, $s1 + and $a2, $a2, $a3 + ori $a3, $zero, 1 bnez $a2, .LBB13_33 # %bb.40: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - ld.d $a5, $sp, 200 # 8-byte Folded Reload - addi.d $s5, $a5, 8 - ld.d $a2, $sp, 208 # 8-byte Folded Reload - add.d $s6, $a5, $a2 - sltu $a2, $t1, $s6 - sltu $a5, $s5, $a0 - and $a2, $a2, $a5 - ori $s0, $zero, 1 - bnez $a2, .LBB13_33 + ld.d $a3, $sp, 216 # 8-byte Folded Reload + addi.d $s7, $a3, 8 + ld.d $a2, $sp, 224 # 8-byte Folded Reload + add.d $a2, $a3, $a2 + sltu $a3, $s3, $a2 + sltu $a4, $s7, $s1 + and $a4, $a3, $a4 + ori $a3, $zero, 1 + bnez $a4, .LBB13_33 # %bb.41: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - ld.d $a7, $sp, 192 # 8-byte Folded Reload - addi.d $a5, $a7, 8 - ld.d $a2, $sp, 208 # 8-byte Folded Reload - add.d $a2, $a7, $a2 - sltu $a7, $t1, $a2 - sltu $t0, $a5, $a0 - and $a7, $a7, $t0 - ori $s0, $zero, 1 - bnez $a7, .LBB13_33 + ld.d $a4, $sp, 208 # 8-byte Folded Reload + addi.d $s8, $a4, 8 + ld.d $a3, $sp, 224 # 8-byte Folded Reload + add.d $t3, $a4, $a3 + sltu $a3, $s3, $t3 + sltu $a4, $s8, $s1 + and $a4, $a3, $a4 + ori $a3, $zero, 1 + bnez $a4, .LBB13_33 # %bb.42: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - ld.d $a7, $sp, 80 # 8-byte Folded Reload - add.d $t0, $t4, $a7 - sltu $a7, $t1, $t0 - sltu $t7, $t4, $a0 - and $a7, $a7, $t7 - ori $s0, $zero, 1 - bnez $a7, .LBB13_33 + ld.d $a3, $sp, 96 # 8-byte Folded Reload + add.d $s0, $t6, $a3 + sltu $a3, $s3, $s0 + sltu $a4, $t6, $s1 + and $a4, $a3, $a4 + ori $a3, $zero, 1 + bnez $a4, .LBB13_33 # %bb.43: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - addi.d $s7, $a6, 8 - ld.d $a7, $sp, 208 # 8-byte Folded Reload - add.d $a7, $a6, $a7 - sltu $t7, $t1, $a7 - sltu $s0, $s7, $a0 - and $t7, $t7, $s0 - ori $s0, $zero, 1 - bnez $t7, .LBB13_33 + addi.d $t4, $t1, 8 + ld.d $a3, $sp, 224 # 8-byte Folded Reload + add.d $t5, $t1, $a3 + sltu $a3, $s3, $t5 + sltu $a4, $t4, $s1 + and $a4, $a3, $a4 + ori $a3, $zero, 1 + bnez $a4, .LBB13_33 # %bb.44: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - ld.d $s0, $sp, 184 # 8-byte Folded Reload - addi.d $s8, $s0, 8 - ld.d $t7, $sp, 208 # 8-byte Folded Reload - add.d $t7, $s0, $t7 - sltu $s0, $t1, $t7 - sltu $a0, $s8, $a0 - and $a0, $s0, $a0 - ori $s0, $zero, 1 - bnez $a0, .LBB13_33 + ld.d $ra, $sp, 200 # 8-byte Folded Reload + addi.d $a4, $ra, 8 + ld.d $a3, $sp, 224 # 8-byte Folded Reload + add.d $ra, $ra, $a3 + sltu $a3, $s3, $ra + sltu $s1, $a4, $s1 + and $s1, $a3, $s1 + ori $a3, $zero, 1 + bnez $s1, .LBB13_33 # %bb.45: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $s1 - sltu $s0, $t8, $a4 - and $a0, $a0, $s0 - ori $s0, $zero, 1 - bnez $a0, .LBB13_33 + sltu $a3, $s4, $s2 + sltu $s1, $a7, $a0 + and $s1, $a3, $s1 + ori $a3, $zero, 1 + bnez $s1, .LBB13_33 # %bb.46: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $s2 - sltu $s0, $t6, $a4 - and $a0, $a0, $s0 - ori $s0, $zero, 1 - bnez $a0, .LBB13_33 + sltu $a3, $s4, $fp + sltu $fp, $t8, $a0 + and $fp, $a3, $fp + ori $a3, $zero, 1 + bnez $fp, .LBB13_33 # %bb.47: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $fp - sltu $fp, $s3, $a4 - and $a0, $a0, $fp - ori $s0, $zero, 1 - bnez $a0, .LBB13_33 + sltu $a1, $s4, $a1 + sltu $a3, $s5, $a0 + and $a1, $a1, $a3 + ori $a3, $zero, 1 + bnez $a1, .LBB13_33 # %bb.48: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $a1 - sltu $a1, $s4, $a4 - and $a0, $a0, $a1 - ori $s0, $zero, 1 - bnez $a0, .LBB13_33 + sltu $a1, $s4, $s6 + sltu $a3, $a5, $a0 + and $a1, $a1, $a3 + ori $a3, $zero, 1 + bnez $a1, .LBB13_33 # %bb.49: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $s6 - sltu $a1, $s5, $a4 - and $a0, $a0, $a1 - ori $s0, $zero, 1 - bnez $a0, .LBB13_33 + sltu $a1, $s4, $a2 + sltu $a2, $s7, $a0 + and $a1, $a1, $a2 + ori $a3, $zero, 1 + bnez $a1, .LBB13_33 # %bb.50: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $a2 - sltu $a1, $a5, $a4 - and $a0, $a0, $a1 - ori $s0, $zero, 1 - bnez $a0, .LBB13_33 + sltu $a1, $s4, $t3 + sltu $a2, $s8, $a0 + and $a1, $a1, $a2 + ori $a3, $zero, 1 + bnez $a1, .LBB13_33 # %bb.51: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $t0 - sltu $a1, $t4, $a4 - and $a0, $a0, $a1 - ori $s0, $zero, 1 - bnez $a0, .LBB13_33 + sltu $a1, $s4, $s0 + sltu $a2, $t6, $a0 + and $a1, $a1, $a2 + ori $a3, $zero, 1 + bnez $a1, .LBB13_33 # %bb.52: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $a7 - sltu $a1, $s7, $a4 - and $a0, $a0, $a1 - ori $s0, $zero, 1 - bnez $a0, .LBB13_33 + sltu $a1, $s4, $t5 + sltu $a2, $t4, $a0 + and $a1, $a1, $a2 + ori $a3, $zero, 1 + bnez $a1, .LBB13_33 # %bb.53: # %vector.memcheck297 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $t7 - sltu $a1, $s8, $a4 - and $a0, $a0, $a1 - ori $s0, $zero, 1 + sltu $a1, $s4, $ra + sltu $a0, $a4, $a0 + and $a0, $a1, $a0 + ori $a3, $zero, 1 bnez $a0, .LBB13_33 # %bb.54: # %vector.body393.preheader # in Loop: Header=BB13_32 Depth=2 move $a0, $zero - addi.d $a4, $t4, 8 - addi.d $a1, $t8, 8 - addi.d $fp, $t6, 8 - ld.d $t0, $sp, 64 # 8-byte Folded Reload - move $s1, $t0 + addi.d $a1, $t6, 8 + addi.d $fp, $a7, 8 + addi.d $s1, $t8, 8 + ld.d $t3, $sp, 80 # 8-byte Folded Reload + move $s2, $t3 + ld.d $t5, $sp, 72 # 8-byte Folded Reload .p2align 4, , 16 .LBB13_55: # %vector.body393 # Parent Loop BB13_4 Depth=1 # Parent Loop BB13_32 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $a2, $fp, $a0 - vldx $vr2, $fp, $a0 + add.d $a2, $s1, $a0 + vldx $vr2, $s1, $a0 vld $vr3, $a2, 8 - add.d $a7, $a1, $a0 + add.d $a3, $fp, $a0 vld $vr4, $a2, -8 - vldx $vr5, $a1, $a0 + vldx $vr5, $fp, $a0 vfsub.d $vr3, $vr2, $vr3 - vld $vr6, $a7, -8 + vld $vr6, $a3, -8 vfsub.d $vr4, $vr2, $vr4 vbitrevi.d $vr4, $vr4, 63 - vldx $vr7, $s4, $a0 + vldx $vr7, $a5, $a0 vfmul.d $vr4, $vr6, $vr4 vfmadd.d $vr3, $vr5, $vr3, $vr4 - vldx $vr4, $s3, $a0 + vldx $vr4, $s5, $a0 vfsub.d $vr7, $vr2, $vr7 - vldx $vr8, $a5, $a0 - vldx $vr9, $s5, $a0 + vldx $vr8, $s8, $a0 + vldx $vr9, $s7, $a0 vbitrevi.d $vr4, $vr4, 63 vfmadd.d $vr3, $vr4, $vr7, $vr3 vfsub.d $vr2, $vr2, $vr8 vfmadd.d $vr2, $vr9, $vr2, $vr3 - vldx $vr3, $t1, $a0 - add.d $a2, $a4, $a0 - vldx $vr7, $a4, $a0 - vld $vr8, $a2, -8 - vfmadd.d $vr2, $vr2, $vr1, $vr3 - vstx $vr2, $t1, $a0 + vldx $vr3, $s3, $a0 + vreplgr2vr.d $vr7, $t5 + add.d $a2, $a1, $a0 + vldx $vr8, $a1, $a0 + vld $vr10, $a2, -8 + vfmadd.d $vr2, $vr2, $vr7, $vr3 + vstx $vr2, $s3, $a0 vld $vr2, $a2, 8 - vfsub.d $vr3, $vr7, $vr8 + vfsub.d $vr3, $vr8, $vr10 vbitrevi.d $vr3, $vr3, 63 vfmul.d $vr3, $vr6, $vr3 - vldx $vr6, $s7, $a0 - vfsub.d $vr2, $vr7, $vr2 + vldx $vr6, $t4, $a0 + vfsub.d $vr2, $vr8, $vr2 vfmadd.d $vr2, $vr5, $vr2, $vr3 - vldx $vr3, $s8, $a0 - vfsub.d $vr5, $vr7, $vr6 - vldx $vr6, $t2, $a0 + vldx $vr3, $a4, $a0 + vfsub.d $vr5, $vr8, $vr6 + vldx $vr6, $s4, $a0 vfmadd.d $vr2, $vr4, $vr5, $vr2 - vfsub.d $vr3, $vr7, $vr3 + vfsub.d $vr3, $vr8, $vr3 vfmadd.d $vr2, $vr9, $vr3, $vr2 - vfmadd.d $vr2, $vr2, $vr1, $vr6 - vstx $vr2, $t2, $a0 - addi.d $s1, $s1, -2 + vfmadd.d $vr2, $vr2, $vr7, $vr6 + vstx $vr2, $s4, $a0 + addi.d $s2, $s2, -2 addi.d $a0, $a0, 16 - bnez $s1, .LBB13_55 + bnez $s2, .LBB13_55 # %bb.56: # %middle.block417 # in Loop: Header=BB13_32 Depth=2 - ld.d $s0, $sp, 72 # 8-byte Folded Reload - ld.d $a0, $sp, 176 # 8-byte Folded Reload - beq $a0, $t0, .LBB13_31 + ld.d $a3, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload + beq $a0, $t3, .LBB13_31 b .LBB13_33 .p2align 4, , 16 .LBB13_57: # %.preheader.us.preheader # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - ld.d $a3, $a0, 8 - ld.d $a0, $sp, 128 # 8-byte Folded Reload - ld.d $a4, $a0, 8 - ld.d $s2, $sp, 40 # 8-byte Folded Reload - ld.d $a5, $s2, 8 - ld.d $a0, $sp, 136 # 8-byte Folded Reload - ld.d $a6, $a0, 8 - ld.d $a0, $sp, 120 # 8-byte Folded Reload - ld.d $a7, $a0, 8 - ld.d $s3, $sp, 32 # 8-byte Folded Reload - ld.d $t0, $s3, 8 - ori $a0, $zero, 1 - pcalau12i $t4, %pc_hi20(.LCPI13_1) - ld.d $a1, $sp, 176 # 8-byte Folded Reload - ori $t8, $zero, 2 - bgeu $a1, $t8, .LBB13_73 + ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $a0, 8 + ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a3, $a1, 8 + ld.d $s2, $sp, 48 # 8-byte Folded Reload + ld.d $a4, $s2, 8 + ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a5, $a1, 8 + ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a6, $a1, 8 + ld.d $s3, $sp, 40 # 8-byte Folded Reload + ld.d $a7, $s3, 8 + ori $a1, $zero, 1 + ld.d $a2, $sp, 192 # 8-byte Folded Reload + ori $t7, $zero, 2 + bgeu $a2, $t7, .LBB13_73 # %bb.58: # in Loop: Header=BB13_4 Depth=1 - ld.d $s4, $sp, 56 # 8-byte Folded Reload - ld.d $t6, $sp, 64 # 8-byte Folded Reload - ld.d $t7, $sp, 24 # 8-byte Folded Reload + ld.d $s4, $sp, 64 # 8-byte Folded Reload + ld.d $t5, $sp, 80 # 8-byte Folded Reload + ld.d $t6, $sp, 32 # 8-byte Folded Reload + ld.d $t8, $sp, 24 # 8-byte Folded Reload ori $fp, $zero, 16 .LBB13_59: # %scalar.ph281.preheader # in Loop: Header=BB13_4 Depth=1 - alsl.d $a1, $a0, $a3, 3 - alsl.d $a2, $a0, $a4, 3 - alsl.d $a3, $a0, $a5, 3 - alsl.d $a4, $a0, $a6, 3 - alsl.d $a5, $a0, $a7, 3 - alsl.d $a6, $a0, $t0, 3 - ld.d $a7, $sp, 152 # 8-byte Folded Reload - sub.d $a0, $a7, $a0 + alsl.d $a0, $a1, $a0, 3 + alsl.d $a2, $a1, $a3, 3 + alsl.d $a3, $a1, $a4, 3 + alsl.d $a4, $a1, $a5, 3 + alsl.d $a5, $a1, $a6, 3 + alsl.d $a6, $a1, $a7, 3 + ld.d $a7, $sp, 168 # 8-byte Folded Reload + sub.d $a1, $a7, $a1 .p2align 4, , 16 .LBB13_60: # %scalar.ph281 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 - fld.d $fa2, $a1, 0 + fld.d $fa2, $a0, 0 fld.d $fa3, $a2, 0 - fld.d $fa4, $t4, %pc_lo12(.LCPI13_1) - fmadd.d $fa2, $fa3, $fa4, $fa2 + fmadd.d $fa2, $fa3, $fa1, $fa2 fst.d $fa2, $a3, 0 fld.d $fa2, $a4, 0 fld.d $fa3, $a5, 0 - fmadd.d $fa2, $fa3, $fa4, $fa2 + fmadd.d $fa2, $fa3, $fa1, $fa2 fst.d $fa2, $a6, 0 - addi.d $a1, $a1, 8 + addi.d $a0, $a0, 8 addi.d $a2, $a2, 8 addi.d $a3, $a3, 8 addi.d $a4, $a4, 8 addi.d $a5, $a5, 8 - addi.d $a0, $a0, -1 + addi.d $a1, $a1, -1 addi.d $a6, $a6, 8 - bnez $a0, .LBB13_60 + bnez $a1, .LBB13_60 .LBB13_61: # %"._Z6forallIZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateEE3$_2Ev9simd_execiiT_.exit_crit_edge.us" # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - ld.d $a3, $a0, 16 - ld.d $a0, $sp, 128 # 8-byte Folded Reload - ld.d $a4, $a0, 16 - ld.d $a5, $s2, 16 - ld.d $a0, $sp, 136 # 8-byte Folded Reload - ld.d $a6, $a0, 16 - ld.d $a0, $sp, 120 # 8-byte Folded Reload - ld.d $a7, $a0, 16 - ld.d $t0, $s3, 16 - ori $a0, $zero, 1 - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bgeu $a1, $t8, .LBB13_85 + ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $a0, 16 + ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a3, $a1, 16 + ld.d $a4, $s2, 16 + ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a5, $a1, 16 + ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a6, $a1, 16 + ld.d $a7, $s3, 16 + ori $a1, $zero, 1 + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bgeu $a2, $t7, .LBB13_85 .LBB13_62: # %scalar.ph247.preheader # in Loop: Header=BB13_4 Depth=1 - ld.d $a1, $sp, 152 # 8-byte Folded Reload - sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $t0, 3 - alsl.d $a7, $a0, $a7, 3 - alsl.d $a6, $a0, $a6, 3 - alsl.d $a5, $a0, $a5, 3 - alsl.d $a4, $a0, $a4, 3 - alsl.d $a0, $a0, $a3, 3 + ld.d $a2, $sp, 168 # 8-byte Folded Reload + sub.d $a2, $a2, $a1 + alsl.d $a7, $a1, $a7, 3 + alsl.d $a6, $a1, $a6, 3 + alsl.d $a5, $a1, $a5, 3 + alsl.d $a4, $a1, $a4, 3 + alsl.d $a3, $a1, $a3, 3 + alsl.d $a0, $a1, $a0, 3 .p2align 4, , 16 .LBB13_63: # %scalar.ph247 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $a0, 0 - fld.d $fa3, $a4, 0 - fld.d $fa4, $t4, %pc_lo12(.LCPI13_1) - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a5, 0 - fld.d $fa2, $a6, 0 - fld.d $fa3, $a7, 0 - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a2, 0 - addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + fld.d $fa3, $a3, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a4, 0 + fld.d $fa2, $a5, 0 + fld.d $fa3, $a6, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a7, 0 + addi.d $a2, $a2, -1 addi.d $a7, $a7, 8 addi.d $a6, $a6, 8 addi.d $a5, $a5, 8 addi.d $a4, $a4, 8 + addi.d $a3, $a3, 8 addi.d $a0, $a0, 8 - bnez $a1, .LBB13_63 + bnez $a2, .LBB13_63 .LBB13_64: # %"._Z6forallIZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateEE3$_2Ev9simd_execiiT_.exit_crit_edge.us.1" # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - ld.d $a3, $a0, 24 - ld.d $a0, $sp, 128 # 8-byte Folded Reload - ld.d $a4, $a0, 24 - ld.d $a5, $s2, 24 - ld.d $a0, $sp, 136 # 8-byte Folded Reload - ld.d $a6, $a0, 24 - ld.d $a0, $sp, 120 # 8-byte Folded Reload - ld.d $a7, $a0, 24 - ld.d $t0, $s3, 24 - ori $a0, $zero, 1 - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bgeu $a1, $t8, .LBB13_97 + ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $a0, 24 + ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a3, $a1, 24 + ld.d $a4, $s2, 24 + ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a5, $a1, 24 + ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a6, $a1, 24 + ld.d $a7, $s3, 24 + ori $a1, $zero, 1 + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bgeu $a2, $t7, .LBB13_97 .LBB13_65: # %scalar.ph213.preheader # in Loop: Header=BB13_4 Depth=1 - ld.d $a1, $sp, 152 # 8-byte Folded Reload - sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $t0, 3 - alsl.d $a7, $a0, $a7, 3 - alsl.d $a6, $a0, $a6, 3 - alsl.d $a5, $a0, $a5, 3 - alsl.d $a4, $a0, $a4, 3 - alsl.d $a0, $a0, $a3, 3 + ld.d $a2, $sp, 168 # 8-byte Folded Reload + sub.d $a2, $a2, $a1 + alsl.d $a7, $a1, $a7, 3 + alsl.d $a6, $a1, $a6, 3 + alsl.d $a5, $a1, $a5, 3 + alsl.d $a4, $a1, $a4, 3 + alsl.d $a3, $a1, $a3, 3 + alsl.d $a0, $a1, $a0, 3 .p2align 4, , 16 .LBB13_66: # %scalar.ph213 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $a0, 0 - fld.d $fa3, $a4, 0 - fld.d $fa4, $t4, %pc_lo12(.LCPI13_1) - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a5, 0 - fld.d $fa2, $a6, 0 - fld.d $fa3, $a7, 0 - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a2, 0 - addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + fld.d $fa3, $a3, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a4, 0 + fld.d $fa2, $a5, 0 + fld.d $fa3, $a6, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a7, 0 + addi.d $a2, $a2, -1 addi.d $a7, $a7, 8 addi.d $a6, $a6, 8 addi.d $a5, $a5, 8 addi.d $a4, $a4, 8 + addi.d $a3, $a3, 8 addi.d $a0, $a0, 8 - bnez $a1, .LBB13_66 + bnez $a2, .LBB13_66 .LBB13_67: # %"._Z6forallIZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateEE3$_2Ev9simd_execiiT_.exit_crit_edge.us.2" # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - ld.d $a3, $a0, 32 - ld.d $a0, $sp, 128 # 8-byte Folded Reload - ld.d $a4, $a0, 32 - ld.d $a5, $s2, 32 - ld.d $a0, $sp, 136 # 8-byte Folded Reload - ld.d $a6, $a0, 32 - ld.d $a0, $sp, 120 # 8-byte Folded Reload - ld.d $a7, $a0, 32 - ld.d $t0, $s3, 32 - ori $a0, $zero, 1 - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bgeu $a1, $t8, .LBB13_109 + ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $a0, 32 + ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a3, $a1, 32 + ld.d $a4, $s2, 32 + ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a5, $a1, 32 + ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a6, $a1, 32 + ld.d $a7, $s3, 32 + ori $a1, $zero, 1 + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bgeu $a2, $t7, .LBB13_109 .LBB13_68: # %scalar.ph179.preheader # in Loop: Header=BB13_4 Depth=1 - ld.d $a1, $sp, 152 # 8-byte Folded Reload - sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $t0, 3 - alsl.d $a7, $a0, $a7, 3 - alsl.d $a6, $a0, $a6, 3 - alsl.d $a5, $a0, $a5, 3 - alsl.d $a4, $a0, $a4, 3 - alsl.d $a0, $a0, $a3, 3 + ld.d $a2, $sp, 168 # 8-byte Folded Reload + sub.d $a2, $a2, $a1 + alsl.d $a7, $a1, $a7, 3 + alsl.d $a6, $a1, $a6, 3 + alsl.d $a5, $a1, $a5, 3 + alsl.d $a4, $a1, $a4, 3 + alsl.d $a3, $a1, $a3, 3 + alsl.d $a0, $a1, $a0, 3 .p2align 4, , 16 .LBB13_69: # %scalar.ph179 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $a0, 0 - fld.d $fa3, $a4, 0 - fld.d $fa4, $t4, %pc_lo12(.LCPI13_1) - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a5, 0 - fld.d $fa2, $a6, 0 - fld.d $fa3, $a7, 0 - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a2, 0 - addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + fld.d $fa3, $a3, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a4, 0 + fld.d $fa2, $a5, 0 + fld.d $fa3, $a6, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a7, 0 + addi.d $a2, $a2, -1 addi.d $a7, $a7, 8 addi.d $a6, $a6, 8 addi.d $a5, $a5, 8 addi.d $a4, $a4, 8 + addi.d $a3, $a3, 8 addi.d $a0, $a0, 8 - bnez $a1, .LBB13_69 + bnez $a2, .LBB13_69 .LBB13_70: # %"._Z6forallIZL18BM_HYDRO_2D_LAMBDARN9benchmark5StateEE3$_2Ev9simd_execiiT_.exit_crit_edge.us.3" # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - ld.d $a3, $a0, 40 - ld.d $a0, $sp, 128 # 8-byte Folded Reload - ld.d $a4, $a0, 40 - ld.d $a5, $s2, 40 - ld.d $a0, $sp, 136 # 8-byte Folded Reload - ld.d $a6, $a0, 40 - ld.d $a0, $sp, 120 # 8-byte Folded Reload - ld.d $a7, $a0, 40 - ld.d $t0, $s3, 40 - ori $a0, $zero, 1 - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bgeu $a1, $t8, .LBB13_121 + ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $a0, 40 + ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a3, $a1, 40 + ld.d $a4, $s2, 40 + ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a5, $a1, 40 + ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a6, $a1, 40 + ld.d $a7, $s3, 40 + ori $a1, $zero, 1 + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bgeu $a2, $t7, .LBB13_121 .LBB13_71: # %scalar.ph.preheader # in Loop: Header=BB13_4 Depth=1 - ld.d $a1, $sp, 152 # 8-byte Folded Reload - sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $t0, 3 - alsl.d $a7, $a0, $a7, 3 - alsl.d $a6, $a0, $a6, 3 - alsl.d $a5, $a0, $a5, 3 - alsl.d $a4, $a0, $a4, 3 - alsl.d $a0, $a0, $a3, 3 + ld.d $a2, $sp, 168 # 8-byte Folded Reload + sub.d $a2, $a2, $a1 + alsl.d $a7, $a1, $a7, 3 + alsl.d $a6, $a1, $a6, 3 + alsl.d $a5, $a1, $a5, 3 + alsl.d $a4, $a1, $a4, 3 + alsl.d $a3, $a1, $a3, 3 + alsl.d $a0, $a1, $a0, 3 .p2align 4, , 16 .LBB13_72: # %scalar.ph # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $a0, 0 - fld.d $fa3, $a4, 0 - fld.d $fa4, $t4, %pc_lo12(.LCPI13_1) - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a5, 0 - fld.d $fa2, $a6, 0 - fld.d $fa3, $a7, 0 - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a2, 0 - addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + fld.d $fa3, $a3, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a4, 0 + fld.d $fa2, $a5, 0 + fld.d $fa3, $a6, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a7, 0 + addi.d $a2, $a2, -1 addi.d $a7, $a7, 8 addi.d $a6, $a6, 8 addi.d $a5, $a5, 8 addi.d $a4, $a4, 8 + addi.d $a3, $a3, 8 addi.d $a0, $a0, 8 - bnez $a1, .LBB13_72 + bnez $a2, .LBB13_72 b .LBB13_3 .p2align 4, , 16 .LBB13_73: # %vector.memcheck263 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a5 - ld.d $s4, $sp, 56 # 8-byte Folded Reload - ld.d $t6, $sp, 64 # 8-byte Folded Reload - ld.d $t7, $sp, 24 # 8-byte Folded Reload + sub.d $a2, $a7, $a4 + ld.d $s4, $sp, 64 # 8-byte Folded Reload + ld.d $t5, $sp, 80 # 8-byte Folded Reload + ld.d $t6, $sp, 32 # 8-byte Folded Reload + ld.d $t8, $sp, 24 # 8-byte Folded Reload ori $fp, $zero, 16 - bltu $a1, $fp, .LBB13_59 + bltu $a2, $fp, .LBB13_59 # %bb.74: # %vector.memcheck263 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a3 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a4, $a0 + bltu $a2, $fp, .LBB13_59 # %bb.75: # %vector.memcheck263 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a4 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a4, $a3 + bltu $a2, $fp, .LBB13_59 # %bb.76: # %vector.memcheck263 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a6, $a5 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a5, $a4 + bltu $a2, $fp, .LBB13_59 # %bb.77: # %vector.memcheck263 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a7, $a5 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a6, $a4 + bltu $a2, $fp, .LBB13_59 # %bb.78: # %vector.memcheck263 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a3 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a7, $a0 + bltu $a2, $fp, .LBB13_59 # %bb.79: # %vector.memcheck263 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a4 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a7, $a3 + bltu $a2, $fp, .LBB13_59 # %bb.80: # %vector.memcheck263 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a6 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a7, $a5 + bltu $a2, $fp, .LBB13_59 # %bb.81: # %vector.memcheck263 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a7 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a7, $a6 + bltu $a2, $fp, .LBB13_59 # %bb.82: # %vector.body286.preheader # in Loop: Header=BB13_4 Depth=1 - addi.d $a0, $a3, 8 - addi.d $a1, $a4, 8 - addi.d $a2, $a5, 8 - addi.d $t1, $a6, 8 - addi.d $t2, $a7, 8 - addi.d $t3, $t0, 8 - move $t5, $t6 + addi.d $a1, $a0, 8 + addi.d $a2, $a3, 8 + addi.d $t0, $a4, 8 + addi.d $t1, $a5, 8 + addi.d $t2, $a6, 8 + addi.d $t3, $a7, 8 + move $t4, $t5 .p2align 4, , 16 .LBB13_83: # %vector.body286 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr2, $a0, 0 - vld $vr3, $a1, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 - vst $vr2, $a2, 0 + vld $vr2, $a1, 0 + vld $vr3, $a2, 0 + vreplgr2vr.d $vr4, $t8 + vfmadd.d $vr2, $vr3, $vr4, $vr2 + vst $vr2, $t0, 0 vld $vr2, $t1, 0 vld $vr3, $t2, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 + vfmadd.d $vr2, $vr3, $vr4, $vr2 vst $vr2, $t3, 0 - addi.d $a0, $a0, 16 addi.d $a1, $a1, 16 addi.d $a2, $a2, 16 + addi.d $t0, $t0, 16 addi.d $t1, $t1, 16 addi.d $t2, $t2, 16 - addi.d $t5, $t5, -2 + addi.d $t4, $t4, -2 addi.d $t3, $t3, 16 - bnez $t5, .LBB13_83 + bnez $t4, .LBB13_83 # %bb.84: # %middle.block294 # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bne $a1, $t6, .LBB13_59 + ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bne $a2, $t5, .LBB13_59 b .LBB13_61 .p2align 4, , 16 .LBB13_85: # %vector.memcheck229 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a5 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a7, $a4 + bltu $a2, $fp, .LBB13_62 # %bb.86: # %vector.memcheck229 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a3 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a4, $a0 + bltu $a2, $fp, .LBB13_62 # %bb.87: # %vector.memcheck229 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a4 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a4, $a3 + bltu $a2, $fp, .LBB13_62 # %bb.88: # %vector.memcheck229 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a6, $a5 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a5, $a4 + bltu $a2, $fp, .LBB13_62 # %bb.89: # %vector.memcheck229 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a7, $a5 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a6, $a4 + bltu $a2, $fp, .LBB13_62 # %bb.90: # %vector.memcheck229 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a3 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a7, $a0 + bltu $a2, $fp, .LBB13_62 # %bb.91: # %vector.memcheck229 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a4 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a7, $a3 + bltu $a2, $fp, .LBB13_62 # %bb.92: # %vector.memcheck229 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a6 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a7, $a5 + bltu $a2, $fp, .LBB13_62 # %bb.93: # %vector.memcheck229 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a7 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a7, $a6 + bltu $a2, $fp, .LBB13_62 # %bb.94: # %vector.body252.preheader # in Loop: Header=BB13_4 Depth=1 - addi.d $a0, $a3, 8 - addi.d $a1, $a4, 8 - addi.d $a2, $a5, 8 - addi.d $t1, $a6, 8 - addi.d $t2, $a7, 8 - addi.d $t3, $t0, 8 - move $t5, $t6 + addi.d $a1, $a0, 8 + addi.d $a2, $a3, 8 + addi.d $t0, $a4, 8 + addi.d $t1, $a5, 8 + addi.d $t2, $a6, 8 + addi.d $t3, $a7, 8 + move $t4, $t5 .p2align 4, , 16 .LBB13_95: # %vector.body252 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr2, $a0, 0 - vld $vr3, $a1, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 - vst $vr2, $a2, 0 + vld $vr2, $a1, 0 + vld $vr3, $a2, 0 + vreplgr2vr.d $vr4, $t8 + vfmadd.d $vr2, $vr3, $vr4, $vr2 + vst $vr2, $t0, 0 vld $vr2, $t1, 0 vld $vr3, $t2, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 + vfmadd.d $vr2, $vr3, $vr4, $vr2 vst $vr2, $t3, 0 - addi.d $a0, $a0, 16 addi.d $a1, $a1, 16 addi.d $a2, $a2, 16 + addi.d $t0, $t0, 16 addi.d $t1, $t1, 16 addi.d $t2, $t2, 16 - addi.d $t5, $t5, -2 + addi.d $t4, $t4, -2 addi.d $t3, $t3, 16 - bnez $t5, .LBB13_95 + bnez $t4, .LBB13_95 # %bb.96: # %middle.block260 # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bne $a1, $t6, .LBB13_62 + ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bne $a2, $t5, .LBB13_62 b .LBB13_64 .p2align 4, , 16 .LBB13_97: # %vector.memcheck195 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a5 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a7, $a4 + bltu $a2, $fp, .LBB13_65 # %bb.98: # %vector.memcheck195 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a3 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a4, $a0 + bltu $a2, $fp, .LBB13_65 # %bb.99: # %vector.memcheck195 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a4 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a4, $a3 + bltu $a2, $fp, .LBB13_65 # %bb.100: # %vector.memcheck195 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a6, $a5 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a5, $a4 + bltu $a2, $fp, .LBB13_65 # %bb.101: # %vector.memcheck195 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a7, $a5 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a6, $a4 + bltu $a2, $fp, .LBB13_65 # %bb.102: # %vector.memcheck195 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a3 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a7, $a0 + bltu $a2, $fp, .LBB13_65 # %bb.103: # %vector.memcheck195 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a4 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a7, $a3 + bltu $a2, $fp, .LBB13_65 # %bb.104: # %vector.memcheck195 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a6 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a7, $a5 + bltu $a2, $fp, .LBB13_65 # %bb.105: # %vector.memcheck195 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a7 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a7, $a6 + bltu $a2, $fp, .LBB13_65 # %bb.106: # %vector.body218.preheader # in Loop: Header=BB13_4 Depth=1 - addi.d $a0, $a3, 8 - addi.d $a1, $a4, 8 - addi.d $a2, $a5, 8 - addi.d $t1, $a6, 8 - addi.d $t2, $a7, 8 - addi.d $t3, $t0, 8 - move $t5, $t6 + addi.d $a1, $a0, 8 + addi.d $a2, $a3, 8 + addi.d $t0, $a4, 8 + addi.d $t1, $a5, 8 + addi.d $t2, $a6, 8 + addi.d $t3, $a7, 8 + move $t4, $t5 .p2align 4, , 16 .LBB13_107: # %vector.body218 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr2, $a0, 0 - vld $vr3, $a1, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 - vst $vr2, $a2, 0 + vld $vr2, $a1, 0 + vld $vr3, $a2, 0 + vreplgr2vr.d $vr4, $t8 + vfmadd.d $vr2, $vr3, $vr4, $vr2 + vst $vr2, $t0, 0 vld $vr2, $t1, 0 vld $vr3, $t2, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 + vfmadd.d $vr2, $vr3, $vr4, $vr2 vst $vr2, $t3, 0 - addi.d $a0, $a0, 16 addi.d $a1, $a1, 16 addi.d $a2, $a2, 16 + addi.d $t0, $t0, 16 addi.d $t1, $t1, 16 addi.d $t2, $t2, 16 - addi.d $t5, $t5, -2 + addi.d $t4, $t4, -2 addi.d $t3, $t3, 16 - bnez $t5, .LBB13_107 + bnez $t4, .LBB13_107 # %bb.108: # %middle.block226 # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bne $a1, $t6, .LBB13_65 + ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bne $a2, $t5, .LBB13_65 b .LBB13_67 .p2align 4, , 16 .LBB13_109: # %vector.memcheck161 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a5 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a7, $a4 + bltu $a2, $fp, .LBB13_68 # %bb.110: # %vector.memcheck161 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a3 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a4, $a0 + bltu $a2, $fp, .LBB13_68 # %bb.111: # %vector.memcheck161 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a4 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a4, $a3 + bltu $a2, $fp, .LBB13_68 # %bb.112: # %vector.memcheck161 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a6, $a5 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a5, $a4 + bltu $a2, $fp, .LBB13_68 # %bb.113: # %vector.memcheck161 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a7, $a5 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a6, $a4 + bltu $a2, $fp, .LBB13_68 # %bb.114: # %vector.memcheck161 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a3 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a7, $a0 + bltu $a2, $fp, .LBB13_68 # %bb.115: # %vector.memcheck161 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a4 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a7, $a3 + bltu $a2, $fp, .LBB13_68 # %bb.116: # %vector.memcheck161 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a6 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a7, $a5 + bltu $a2, $fp, .LBB13_68 # %bb.117: # %vector.memcheck161 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a7 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a7, $a6 + bltu $a2, $fp, .LBB13_68 # %bb.118: # %vector.body184.preheader # in Loop: Header=BB13_4 Depth=1 - addi.d $a0, $a3, 8 - addi.d $a1, $a4, 8 - addi.d $a2, $a5, 8 - addi.d $t1, $a6, 8 - addi.d $t2, $a7, 8 - addi.d $t3, $t0, 8 - move $t5, $t6 + addi.d $a1, $a0, 8 + addi.d $a2, $a3, 8 + addi.d $t0, $a4, 8 + addi.d $t1, $a5, 8 + addi.d $t2, $a6, 8 + addi.d $t3, $a7, 8 + move $t4, $t5 .p2align 4, , 16 .LBB13_119: # %vector.body184 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr2, $a0, 0 - vld $vr3, $a1, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 - vst $vr2, $a2, 0 + vld $vr2, $a1, 0 + vld $vr3, $a2, 0 + vreplgr2vr.d $vr4, $t8 + vfmadd.d $vr2, $vr3, $vr4, $vr2 + vst $vr2, $t0, 0 vld $vr2, $t1, 0 vld $vr3, $t2, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 + vfmadd.d $vr2, $vr3, $vr4, $vr2 vst $vr2, $t3, 0 - addi.d $a0, $a0, 16 addi.d $a1, $a1, 16 addi.d $a2, $a2, 16 + addi.d $t0, $t0, 16 addi.d $t1, $t1, 16 addi.d $t2, $t2, 16 - addi.d $t5, $t5, -2 + addi.d $t4, $t4, -2 addi.d $t3, $t3, 16 - bnez $t5, .LBB13_119 + bnez $t4, .LBB13_119 # %bb.120: # %middle.block192 # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bne $a1, $t6, .LBB13_68 + ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bne $a2, $t5, .LBB13_68 b .LBB13_70 .p2align 4, , 16 .LBB13_121: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a5 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a7, $a4 + bltu $a2, $fp, .LBB13_71 # %bb.122: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a3 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a4, $a0 + bltu $a2, $fp, .LBB13_71 # %bb.123: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a4 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a4, $a3 + bltu $a2, $fp, .LBB13_71 # %bb.124: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a6, $a5 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a5, $a4 + bltu $a2, $fp, .LBB13_71 # %bb.125: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a7, $a5 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a6, $a4 + bltu $a2, $fp, .LBB13_71 # %bb.126: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a3 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a7, $a0 + bltu $a2, $fp, .LBB13_71 # %bb.127: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a4 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a7, $a3 + bltu $a2, $fp, .LBB13_71 # %bb.128: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a6 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a7, $a5 + bltu $a2, $fp, .LBB13_71 # %bb.129: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a7 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a7, $a6 + bltu $a2, $fp, .LBB13_71 # %bb.130: # %vector.body.preheader # in Loop: Header=BB13_4 Depth=1 - addi.d $a0, $a3, 8 - addi.d $a1, $a4, 8 - addi.d $a2, $a5, 8 - addi.d $t1, $a6, 8 - addi.d $t2, $a7, 8 - addi.d $t3, $t0, 8 - move $t5, $t6 + addi.d $a1, $a0, 8 + addi.d $a2, $a3, 8 + addi.d $t0, $a4, 8 + addi.d $t1, $a5, 8 + addi.d $t2, $a6, 8 + addi.d $t3, $a7, 8 + move $t4, $t5 .p2align 4, , 16 .LBB13_131: # %vector.body # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr2, $a0, 0 - vld $vr3, $a1, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 - vst $vr2, $a2, 0 + vld $vr2, $a1, 0 + vld $vr3, $a2, 0 + vreplgr2vr.d $vr4, $t8 + vfmadd.d $vr2, $vr3, $vr4, $vr2 + vst $vr2, $t0, 0 vld $vr2, $t1, 0 vld $vr3, $t2, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 + vfmadd.d $vr2, $vr3, $vr4, $vr2 vst $vr2, $t3, 0 - addi.d $a0, $a0, 16 addi.d $a1, $a1, 16 addi.d $a2, $a2, 16 + addi.d $t0, $t0, 16 addi.d $t1, $t1, 16 addi.d $t2, $t2, 16 - addi.d $t5, $t5, -2 + addi.d $t4, $t4, -2 addi.d $t3, $t3, 16 - bnez $t5, .LBB13_131 + bnez $t4, .LBB13_131 # %bb.132: # %middle.block # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload - ld.d $a1, $sp, 176 # 8-byte Folded Reload - beq $a1, $t6, .LBB13_3 + ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload + beq $a2, $t5, .LBB13_3 b .LBB13_71 .LBB13_133: # %._crit_edge ld.d $a0, $sp, 16 # 8-byte Folded Reload - ld.d $s8, $sp, 216 # 8-byte Folded Reload - ld.d $s7, $sp, 224 # 8-byte Folded Reload - ld.d $s6, $sp, 232 # 8-byte Folded Reload - ld.d $s5, $sp, 240 # 8-byte Folded Reload - ld.d $s4, $sp, 248 # 8-byte Folded Reload - ld.d $s3, $sp, 256 # 8-byte Folded Reload - ld.d $s2, $sp, 264 # 8-byte Folded Reload - ld.d $s1, $sp, 272 # 8-byte Folded Reload - ld.d $s0, $sp, 280 # 8-byte Folded Reload - ld.d $fp, $sp, 288 # 8-byte Folded Reload - ld.d $ra, $sp, 296 # 8-byte Folded Reload - addi.d $sp, $sp, 304 + ld.d $s8, $sp, 232 # 8-byte Folded Reload + ld.d $s7, $sp, 240 # 8-byte Folded Reload + ld.d $s6, $sp, 248 # 8-byte Folded Reload + ld.d $s5, $sp, 256 # 8-byte Folded Reload + ld.d $s4, $sp, 264 # 8-byte Folded Reload + ld.d $s3, $sp, 272 # 8-byte Folded Reload + ld.d $s2, $sp, 280 # 8-byte Folded Reload + ld.d $s1, $sp, 288 # 8-byte Folded Reload + ld.d $s0, $sp, 296 # 8-byte Folded Reload + ld.d $fp, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 312 # 8-byte Folded Reload + addi.d $sp, $sp, 320 pcaddu18i $t8, %call36(_ZN9benchmark5State17FinishKeepRunningEv) jr $t8 .Lfunc_end13: @@ -3656,12 +3652,7 @@ _ZL23BM_GEN_LIN_RECUR_LAMBDARN9benchmark5StateE: # @_ZL23BM_GEN_LIN_RECUR_LAMBDA .size _ZL23BM_GEN_LIN_RECUR_LAMBDARN9benchmark5StateE, .Lfunc_end14-_ZL23BM_GEN_LIN_RECUR_LAMBDARN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL18BM_DISC_ORD_LAMBDARN9benchmark5StateE -.LCPI15_0: - .dword 0x3fc999999999999a # double 0.20000000000000001 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL18BM_DISC_ORD_LAMBDARN9benchmark5StateE .type _ZL18BM_DISC_ORD_LAMBDARN9benchmark5StateE,@function _ZL18BM_DISC_ORD_LAMBDARN9benchmark5StateE: # @_ZL18BM_DISC_ORD_LAMBDARN9benchmark5StateE .cfi_startproc @@ -3732,10 +3723,13 @@ _ZL18BM_DISC_ORD_LAMBDARN9benchmark5StateE: # @_ZL18BM_DISC_ORD_LAMBDARN9benchma addi.w $a1, $a2, 0 blez $a1, .LBB15_10 # %bb.3: # %.lr.ph.us.preheader - pcalau12i $a1, %pc_hi20(.LCPI15_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI15_0) bstrpick.d $t4, $a2, 30, 0 addi.d $a1, $s8, 8 + lu12i.w $a2, -419431 + ori $a2, $a2, 2458 + lu32i.d $a2, -419431 + lu52i.d $a2, $a2, 1020 + movgr2fr.d $fa0, $a2 movgr2fr.d $fa1, $zero b .LBB15_5 .p2align 4, , 16 @@ -3945,12 +3939,7 @@ _ZL19BM_MAT_X_MAT_LAMBDARN9benchmark5StateE: # @_ZL19BM_MAT_X_MAT_LAMBDARN9bench .size _ZL19BM_MAT_X_MAT_LAMBDARN9benchmark5StateE, .Lfunc_end16-_ZL19BM_MAT_X_MAT_LAMBDARN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL19BM_PLANCKIAN_LAMBDARN9benchmark5StateE -.LCPI17_0: - .dword 0x4033cccccccccccd # double 19.800000000000001 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL19BM_PLANCKIAN_LAMBDARN9benchmark5StateE .type _ZL19BM_PLANCKIAN_LAMBDARN9benchmark5StateE,@function _ZL19BM_PLANCKIAN_LAMBDARN9benchmark5StateE: # @_ZL19BM_PLANCKIAN_LAMBDARN9benchmark5StateE .cfi_startproc @@ -3989,30 +3978,34 @@ _ZL19BM_PLANCKIAN_LAMBDARN9benchmark5StateE: # @_ZL19BM_PLANCKIAN_LAMBDARN9bench pcaddu18i $ra, %call36(_Z8loopInitj) jirl $ra, $ra, 0 ld.d $a0, $s2, 32 + ld.d $a1, $s0, 8 + st.d $a1, $sp, 40 # 8-byte Folded Spill ld.d $a1, $s0, 32 ld.d $a0, $a0, 0 - ld.d $a2, $s0, 8 + ld.d $a2, $s0, 16 st.d $a2, $sp, 32 # 8-byte Folded Spill - st.d $a1, $sp, 40 # 8-byte Folded Spill + ld.d $a2, $s0, 24 + ld.d $a3, $s0, 40 + st.d $a3, $sp, 24 # 8-byte Folded Spill + move $fp, $a1 alsl.d $a1, $a0, $a1, 3 fld.d $fa0, $a1, -8 - pcalau12i $a1, %pc_hi20(.LCPI17_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI17_0) - ld.d $a1, $s0, 24 - ld.d $a2, $s0, 16 - st.d $a2, $sp, 24 # 8-byte Folded Spill - ld.d $s0, $s0, 40 + lu12i.w $a1, -209716 + ori $a1, $a1, 3277 + lu32i.d $a1, 249036 + lu52i.d $a1, $a1, 1027 + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa0, $fa1 - move $s1, $a1 - alsl.d $a0, $a0, $a1, 3 + move $s0, $a2 + alsl.d $a0, $a0, $a2, 3 fst.d $fa0, $a0, -8 - ld.w $fp, $s2, 28 + ld.w $s1, $s2, 28 ld.d $s5, $s2, 16 st.d $s2, $sp, 16 # 8-byte Folded Spill move $a0, $s2 pcaddu18i $ra, %call36(_ZN9benchmark5State16StartKeepRunningEv) jirl $ra, $ra, 0 - bnez $fp, .LBB17_7 + bnez $s1, .LBB17_7 # %bb.1: # %_ZN9benchmark5State13StateIteratorC2EPS0_.exit beqz $s5, .LBB17_7 # %bb.2: # %.lr.ph40 @@ -4028,29 +4021,29 @@ _ZL19BM_PLANCKIAN_LAMBDARN9benchmark5StateE: # @_ZL19BM_PLANCKIAN_LAMBDARN9bench # =>This Loop Header: Depth=1 # Child Loop BB17_5 Depth 2 move $s7, $s6 - move $s8, $s0 - ld.d $s2, $sp, 32 # 8-byte Folded Reload - ld.d $s4, $sp, 24 # 8-byte Folded Reload - ld.d $fp, $sp, 40 # 8-byte Folded Reload - move $s3, $s1 + ld.d $s8, $sp, 24 # 8-byte Folded Reload + ld.d $s1, $sp, 40 # 8-byte Folded Reload + ld.d $s3, $sp, 32 # 8-byte Folded Reload + move $s2, $fp + move $s4, $s0 .p2align 4, , 16 .LBB17_5: # Parent Loop BB17_4 Depth=1 # => This Inner Loop Header: Depth=2 - fld.d $fa0, $s3, 0 - fld.d $fa1, $fp, 0 + fld.d $fa0, $s4, 0 + fld.d $fa1, $s2, 0 fdiv.d $fa0, $fa0, $fa1 - fst.d $fa0, $s4, 0 - fld.d $fs0, $s2, 0 + fst.d $fa0, $s3, 0 + fld.d $fs0, $s1, 0 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 vldi $vr1, -784 fadd.d $fa0, $fa0, $fa1 fdiv.d $fa0, $fs0, $fa0 fst.d $fa0, $s8, 0 - addi.d $s3, $s3, 8 - addi.d $fp, $fp, 8 addi.d $s4, $s4, 8 addi.d $s2, $s2, 8 + addi.d $s3, $s3, 8 + addi.d $s1, $s1, 8 addi.d $s7, $s7, -1 addi.d $s8, $s8, 8 bnez $s7, .LBB17_5 @@ -4079,12 +4072,7 @@ _ZL19BM_PLANCKIAN_LAMBDARN9benchmark5StateE: # @_ZL19BM_PLANCKIAN_LAMBDARN9bench .size _ZL19BM_PLANCKIAN_LAMBDARN9benchmark5StateE, .Lfunc_end17-_ZL19BM_PLANCKIAN_LAMBDARN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateE -.LCPI18_0: - .dword 0x3fc6666666666666 # double 0.17499999999999999 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateE .type _ZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateE,@function _ZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateE .cfi_startproc @@ -4128,17 +4116,17 @@ _ZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL22BM_IMP_HYDRO_2D_LAMBDARN ld.d $fp, $s0, 304 ld.w $s0, $s6, 28 ld.d $a0, $s6, 16 - st.d $a0, $sp, 200 # 8-byte Folded Spill - st.d $s6, $sp, 8 # 8-byte Folded Spill + st.d $a0, $sp, 208 # 8-byte Folded Spill + st.d $s6, $sp, 16 # 8-byte Folded Spill move $a0, $s6 pcaddu18i $ra, %call36(_ZN9benchmark5State16StartKeepRunningEv) jirl $ra, $ra, 0 bnez $s0, .LBB18_15 # %bb.1: # %_ZN9benchmark5State13StateIteratorC2EPS0_.exit - ld.d $a0, $sp, 200 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload beqz $a0, .LBB18_15 # %bb.2: # %.preheader.lr.ph - ld.d $a0, $sp, 8 # 8-byte Folded Reload + ld.d $a0, $sp, 16 # 8-byte Folded Reload ld.d $a0, $a0, 32 ld.d $a0, $a0, 0 addi.w $a1, $a0, 0 @@ -4146,115 +4134,119 @@ _ZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL22BM_IMP_HYDRO_2D_LAMBDARN blt $a1, $a2, .LBB18_15 # %bb.3: # %.preheader.us.preheader ld.d $a6, $s3, 16 - ld.d $s7, $s5, 8 - ld.d $a2, $s3, 0 - ld.d $t6, $s1, 8 + ld.d $a1, $s5, 8 + st.d $a1, $sp, 200 # 8-byte Folded Spill + ld.d $a1, $s3, 0 + st.d $a1, $sp, 184 # 8-byte Folded Spill + ld.d $a1, $s1, 8 + st.d $a1, $sp, 176 # 8-byte Folded Spill ld.d $a5, $s3, 8 - ld.d $t7, $s4, 8 - ld.d $s0, $s2, 8 - bstrpick.d $s6, $a0, 30, 0 + ld.d $a1, $s4, 8 + st.d $a1, $sp, 168 # 8-byte Folded Spill + ld.d $a1, $s2, 8 + st.d $a1, $sp, 160 # 8-byte Folded Spill + bstrpick.d $a0, $a0, 30, 0 + st.d $a0, $sp, 104 # 8-byte Folded Spill ld.d $a4, $s3, 24 ld.d $a0, $s5, 16 st.d $a0, $sp, 192 # 8-byte Folded Spill - ld.d $ra, $s1, 16 - ld.d $t0, $s4, 16 - ld.d $t1, $s2, 16 + ld.d $a0, $s1, 16 + st.d $a0, $sp, 80 # 8-byte Folded Spill + ld.d $t6, $s4, 16 + ld.d $t5, $s2, 16 ld.d $a3, $s3, 32 - ld.d $a0, $s5, 24 - st.d $a0, $sp, 184 # 8-byte Folded Spill - ld.d $a0, $s1, 24 - st.d $a0, $sp, 56 # 8-byte Folded Spill - ld.d $t4, $s4, 24 - ld.d $t5, $s2, 24 - ld.d $a7, $s3, 40 - ld.d $a0, $s5, 32 - st.d $a0, $sp, 176 # 8-byte Folded Spill - ld.d $a0, $s1, 32 - st.d $a0, $sp, 48 # 8-byte Folded Spill - ld.d $t8, $s4, 32 - ld.d $a0, $s3, 48 - st.d $a0, $sp, 160 # 8-byte Folded Spill - ld.d $a0, $s5, 40 - st.d $a0, $sp, 168 # 8-byte Folded Spill - ld.d $a0, $s1, 40 - st.d $a0, $sp, 24 # 8-byte Folded Spill - ld.d $t2, $s4, 40 - ld.d $s8, $s2, 32 - ld.d $t3, $s2, 40 + ld.d $s0, $s5, 24 + ld.d $s6, $s1, 24 + ld.d $s7, $s4, 24 + ld.d $s8, $s2, 24 + ld.d $t3, $s3, 40 + ld.d $t8, $s5, 32 + ld.d $ra, $s1, 32 + lu12i.w $a7, 419430 + ori $a7, $a7, 1638 + lu32i.d $a7, 419430 + lu52i.d $a7, $a7, 1020 + movgr2fr.d $fa0, $a7 + ld.d $a7, $s4, 32 + ld.d $t0, $s3, 48 + ld.d $t1, $s5, 40 + ld.d $t2, $s1, 40 + ld.d $t4, $s4, 40 + ld.d $t7, $s2, 32 + ld.d $a2, $s2, 40 ld.d $s1, $fp, 8 ld.d $s2, $fp, 16 ld.d $s3, $fp, 24 ld.d $a0, $fp, 32 ld.d $a1, $fp, 40 - addi.d $fp, $s7, 8 + ld.d $fp, $sp, 200 # 8-byte Folded Reload + addi.d $fp, $fp, 8 st.d $fp, $sp, 152 # 8-byte Folded Spill - addi.d $a2, $a2, 8 - st.d $a2, $sp, 144 # 8-byte Folded Spill - addi.d $t6, $t6, 8 - addi.d $t7, $t7, 8 - addi.d $a2, $s0, 8 - st.d $a2, $sp, 120 # 8-byte Folded Spill - addi.d $a2, $s1, 8 - st.d $a2, $sp, 112 # 8-byte Folded Spill - addi.d $a2, $s6, -1 - st.d $a2, $sp, 208 # 8-byte Folded Spill - addi.d $a2, $s2, 8 - st.d $a2, $sp, 104 # 8-byte Folded Spill - addi.d $a2, $t1, 8 - st.d $a2, $sp, 96 # 8-byte Folded Spill - addi.d $a2, $t0, 8 - st.d $a2, $sp, 88 # 8-byte Folded Spill - addi.d $a2, $ra, 8 - st.d $a2, $sp, 80 # 8-byte Folded Spill - ld.d $a2, $sp, 192 # 8-byte Folded Reload - addi.d $a2, $a2, 8 - st.d $a2, $sp, 72 # 8-byte Folded Spill + ld.d $fp, $sp, 184 # 8-byte Folded Reload + addi.d $fp, $fp, 8 + st.d $fp, $sp, 144 # 8-byte Folded Spill + ld.d $fp, $sp, 176 # 8-byte Folded Reload + addi.d $fp, $fp, 8 + st.d $fp, $sp, 136 # 8-byte Folded Spill + ld.d $fp, $sp, 168 # 8-byte Folded Reload + addi.d $fp, $fp, 8 + st.d $fp, $sp, 128 # 8-byte Folded Spill + ld.d $fp, $sp, 160 # 8-byte Folded Reload + addi.d $fp, $fp, 8 + st.d $fp, $sp, 120 # 8-byte Folded Spill + addi.d $fp, $s1, 8 + st.d $fp, $sp, 112 # 8-byte Folded Spill + ld.d $fp, $sp, 104 # 8-byte Folded Reload + addi.d $fp, $fp, -1 + st.d $fp, $sp, 200 # 8-byte Folded Spill + addi.d $fp, $s2, 8 + st.d $fp, $sp, 104 # 8-byte Folded Spill + addi.d $t5, $t5, 8 + st.d $t5, $sp, 96 # 8-byte Folded Spill + addi.d $t5, $t6, 8 + st.d $t5, $sp, 88 # 8-byte Folded Spill + ld.d $t5, $sp, 80 # 8-byte Folded Reload + addi.d $t5, $t5, 8 + st.d $t5, $sp, 80 # 8-byte Folded Spill + ld.d $t5, $sp, 192 # 8-byte Folded Reload + addi.d $t5, $t5, 8 + st.d $t5, $sp, 72 # 8-byte Folded Spill addi.d $s1, $s3, 8 - addi.d $s2, $t5, 8 - addi.d $s3, $t4, 8 - ld.d $a2, $sp, 56 # 8-byte Folded Reload - addi.d $s4, $a2, 8 - ld.d $a2, $sp, 184 # 8-byte Folded Reload - addi.d $s5, $a2, 8 + addi.d $s2, $s8, 8 + addi.d $s3, $s7, 8 + addi.d $s4, $s6, 8 + addi.d $s5, $s0, 8 addi.d $s6, $a0, 8 - addi.d $s7, $s8, 8 - addi.d $s8, $t8, 8 - ld.d $a0, $sp, 48 # 8-byte Folded Reload - addi.d $ra, $a0, 8 - ld.d $a0, $sp, 176 # 8-byte Folded Reload - addi.d $fp, $a0, 8 + addi.d $s7, $t7, 8 + addi.d $s8, $a7, 8 + addi.d $ra, $ra, 8 + addi.d $fp, $t8, 8 addi.d $a0, $a1, 8 + st.d $a0, $sp, 64 # 8-byte Folded Spill + addi.d $a0, $a2, 8 st.d $a0, $sp, 56 # 8-byte Folded Spill - addi.d $a0, $t3, 8 + addi.d $a0, $t4, 8 st.d $a0, $sp, 48 # 8-byte Folded Spill - addi.d $a0, $t2, 8 + addi.d $a0, $t0, 8 st.d $a0, $sp, 40 # 8-byte Folded Spill - ld.d $a0, $sp, 160 # 8-byte Folded Reload - addi.d $a0, $a0, 8 + addi.d $a0, $t2, 8 st.d $a0, $sp, 32 # 8-byte Folded Spill - ld.d $a0, $sp, 24 # 8-byte Folded Reload - addi.d $a0, $a0, 8 + addi.d $a0, $t1, 8 st.d $a0, $sp, 24 # 8-byte Folded Spill - ld.d $a0, $sp, 168 # 8-byte Folded Reload - addi.d $a0, $a0, 8 - st.d $a0, $sp, 16 # 8-byte Folded Spill addi.d $a1, $a6, 8 addi.d $a2, $a4, 8 addi.d $t0, $a3, 8 - st.d $a7, $sp, 160 # 8-byte Folded Spill - addi.d $t8, $a7, 8 + st.d $t3, $sp, 160 # 8-byte Folded Spill + addi.d $t8, $t3, 8 addi.d $a0, $a5, 16 st.d $a6, $sp, 192 # 8-byte Folded Spill - addi.d $t3, $a6, 16 + addi.d $s0, $a6, 16 st.d $a5, $sp, 184 # 8-byte Folded Spill - addi.d $s0, $a5, 8 + addi.d $t3, $a5, 8 st.d $a4, $sp, 176 # 8-byte Folded Spill addi.d $a5, $a4, 16 st.d $a3, $sp, 168 # 8-byte Folded Spill addi.d $a6, $a3, 16 - st.d $t6, $sp, 136 # 8-byte Folded Spill - st.d $t7, $sp, 128 # 8-byte Folded Spill - st.d $s1, $sp, 64 # 8-byte Folded Spill .p2align 4, , 16 .LBB18_4: # %.preheader.us # =>This Loop Header: Depth=1 @@ -4266,33 +4258,33 @@ _ZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL22BM_IMP_HYDRO_2D_LAMBDARN ld.d $a3, $sp, 184 # 8-byte Folded Reload fld.d $fa1, $a3, 0 move $a3, $zero - ld.d $a4, $sp, 208 # 8-byte Folded Reload - ld.d $t2, $sp, 152 # 8-byte Folded Reload - ld.d $t4, $sp, 144 # 8-byte Folded Reload - ld.d $t5, $sp, 120 # 8-byte Folded Reload - ld.d $s1, $sp, 112 # 8-byte Folded Reload + ld.d $a4, $sp, 200 # 8-byte Folded Reload + ld.d $t1, $sp, 152 # 8-byte Folded Reload + ld.d $t2, $sp, 144 # 8-byte Folded Reload + ld.d $t4, $sp, 136 # 8-byte Folded Reload + ld.d $t5, $sp, 128 # 8-byte Folded Reload + ld.d $t6, $sp, 120 # 8-byte Folded Reload + ld.d $t7, $sp, 112 # 8-byte Folded Reload .p2align 4, , 16 .LBB18_5: # Parent Loop BB18_4 Depth=1 # => This Inner Loop Header: Depth=2 - fldx.d $fa0, $a1, $a3 - fldx.d $fa2, $t4, $a3 - fldx.d $fa3, $t6, $a3 - fldx.d $fa4, $t2, $a3 - fldx.d $fa5, $a0, $a3 - fldx.d $fa6, $t7, $a3 - fmul.d $fa2, $fa2, $fa3 - fmadd.d $fa0, $fa0, $fa4, $fa2 + fldx.d $fa2, $a1, $a3 + fldx.d $fa3, $t2, $a3 + fldx.d $fa4, $t4, $a3 + fldx.d $fa5, $t1, $a3 + fmul.d $fa3, $fa3, $fa4 + fmadd.d $fa2, $fa2, $fa5, $fa3 add.d $a7, $a0, $a3 - fmadd.d $fa2, $fa5, $fa6, $fa0 - fldx.d $fa3, $t5, $a3 - fldx.d $fa4, $s1, $a3 - fld.d $fa5, $a7, -8 - pcalau12i $t1, %pc_hi20(.LCPI18_0) - fld.d $fa0, $t1, %pc_lo12(.LCPI18_0) - fmadd.d $fa1, $fa1, $fa3, $fa2 - fadd.d $fa1, $fa1, $fa4 - fsub.d $fa1, $fa1, $fa5 - fmadd.d $fa1, $fa1, $fa0, $fa5 + fldx.d $fa3, $a0, $a3 + fldx.d $fa4, $t5, $a3 + fldx.d $fa5, $t6, $a3 + fldx.d $fa6, $t7, $a3 + fld.d $fa7, $a7, -8 + fmadd.d $fa2, $fa3, $fa4, $fa2 + fmadd.d $fa1, $fa1, $fa5, $fa2 + fadd.d $fa1, $fa1, $fa6 + fsub.d $fa1, $fa1, $fa7 + fmadd.d $fa1, $fa1, $fa0, $fa7 fst.d $fa1, $a7, -8 addi.d $a4, $a4, -1 addi.d $a3, $a3, 8 @@ -4302,24 +4294,25 @@ _ZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL22BM_IMP_HYDRO_2D_LAMBDARN ld.d $a3, $sp, 192 # 8-byte Folded Reload fld.d $fa1, $a3, 0 move $a3, $zero - ld.d $a4, $sp, 208 # 8-byte Folded Reload + ld.d $t7, $sp, 200 # 8-byte Folded Reload + move $a4, $t7 ld.d $t1, $sp, 104 # 8-byte Folded Reload ld.d $t2, $sp, 96 # 8-byte Folded Reload ld.d $t4, $sp, 88 # 8-byte Folded Reload ld.d $t5, $sp, 80 # 8-byte Folded Reload - ld.d $s1, $sp, 72 # 8-byte Folded Reload + ld.d $t6, $sp, 72 # 8-byte Folded Reload .p2align 4, , 16 .LBB18_7: # %"._Z6forallIZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateEE3$_0Ev9simd_execiiT_.exit_crit_edge.us.us" # Parent Loop BB18_4 Depth=1 # => This Inner Loop Header: Depth=2 fldx.d $fa2, $a2, $a3 - fldx.d $fa3, $s0, $a3 + fldx.d $fa3, $t3, $a3 fldx.d $fa4, $t5, $a3 - fldx.d $fa5, $s1, $a3 + fldx.d $fa5, $t6, $a3 fmul.d $fa3, $fa3, $fa4 fmadd.d $fa2, $fa2, $fa5, $fa3 - add.d $a7, $t3, $a3 - fldx.d $fa3, $t3, $a3 + add.d $a7, $s0, $a3 + fldx.d $fa3, $s0, $a3 fldx.d $fa4, $t4, $a3 fldx.d $fa5, $t2, $a3 fldx.d $fa6, $t1, $a3 @@ -4338,8 +4331,7 @@ _ZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL22BM_IMP_HYDRO_2D_LAMBDARN ld.d $a3, $sp, 176 # 8-byte Folded Reload fld.d $fa1, $a3, 0 move $a3, $zero - ld.d $a4, $sp, 208 # 8-byte Folded Reload - ld.d $s1, $sp, 64 # 8-byte Folded Reload + move $a4, $t7 .p2align 4, , 16 .LBB18_9: # %"._Z6forallIZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateEE3$_0Ev9simd_execiiT_.exit_crit_edge.us.us.1" # Parent Loop BB18_4 Depth=1 @@ -4370,7 +4362,7 @@ _ZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL22BM_IMP_HYDRO_2D_LAMBDARN ld.d $a3, $sp, 168 # 8-byte Folded Reload fld.d $fa1, $a3, 0 move $a3, $zero - ld.d $a4, $sp, 208 # 8-byte Folded Reload + move $a4, $t7 .p2align 4, , 16 .LBB18_11: # %"._Z6forallIZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateEE3$_0Ev9simd_execiiT_.exit_crit_edge.us.us.2" # Parent Loop BB18_4 Depth=1 @@ -4400,15 +4392,14 @@ _ZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL22BM_IMP_HYDRO_2D_LAMBDARN # in Loop: Header=BB18_4 Depth=1 ld.d $a3, $sp, 160 # 8-byte Folded Reload fld.d $fa1, $a3, 0 - ld.d $a7, $sp, 16 # 8-byte Folded Reload + ld.d $a7, $sp, 24 # 8-byte Folded Reload move $a3, $t0 - ld.d $t1, $sp, 24 # 8-byte Folded Reload + ld.d $t1, $sp, 32 # 8-byte Folded Reload move $a4, $t8 - ld.d $t2, $sp, 32 # 8-byte Folded Reload - ld.d $t4, $sp, 40 # 8-byte Folded Reload - ld.d $t5, $sp, 48 # 8-byte Folded Reload - ld.d $t6, $sp, 56 # 8-byte Folded Reload - ld.d $t7, $sp, 208 # 8-byte Folded Reload + ld.d $t2, $sp, 40 # 8-byte Folded Reload + ld.d $t4, $sp, 48 # 8-byte Folded Reload + ld.d $t5, $sp, 56 # 8-byte Folded Reload + ld.d $t6, $sp, 64 # 8-byte Folded Reload .p2align 4, , 16 .LBB18_13: # %"._Z6forallIZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateEE3$_0Ev9simd_execiiT_.exit_crit_edge.us.us.3" # Parent Loop BB18_4 Depth=1 @@ -4442,14 +4433,12 @@ _ZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateE: # @_ZL22BM_IMP_HYDRO_2D_LAMBDARN bnez $t7, .LBB18_13 # %bb.14: # %"._Z6forallIZL22BM_IMP_HYDRO_2D_LAMBDARN9benchmark5StateEE3$_0Ev9simd_execiiT_.exit_crit_edge.us.us.4" # in Loop: Header=BB18_4 Depth=1 - ld.d $a3, $sp, 200 # 8-byte Folded Reload + ld.d $a3, $sp, 208 # 8-byte Folded Reload addi.d $a3, $a3, -1 - st.d $a3, $sp, 200 # 8-byte Folded Spill - ld.d $t6, $sp, 136 # 8-byte Folded Reload - ld.d $t7, $sp, 128 # 8-byte Folded Reload + st.d $a3, $sp, 208 # 8-byte Folded Spill bnez $a3, .LBB18_4 .LBB18_15: # %._crit_edge - ld.d $a0, $sp, 8 # 8-byte Folded Reload + ld.d $a0, $sp, 16 # 8-byte Folded Reload ld.d $s8, $sp, 216 # 8-byte Folded Reload ld.d $s7, $sp, 224 # 8-byte Folded Reload ld.d $s6, $sp, 232 # 8-byte Folded Reload diff --git a/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSStats.s b/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSStats.s index 65ccd92d..95272aa5 100644 --- a/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSStats.s +++ b/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSStats.s @@ -868,12 +868,7 @@ _Z19getLoopSuiteRunInfov: # @_Z19getLoopSuiteRunInfov .Lfunc_end1: .size _Z19getLoopSuiteRunInfov, .Lfunc_end1-_Z19getLoopSuiteRunInfov # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm -.LCPI2_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 - .text - .globl _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm + .globl _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm # -- Begin function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm .p2align 5 .type _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm,@function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm: # @_Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm @@ -1018,10 +1013,13 @@ _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcE st.d $a0, $s4, 560 beqz $a1, .LBB2_15 # %bb.13: # %.lr.ph18.preheader - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI2_0) move $fp, $zero move $s0, $zero + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_14: # %.lr.ph18 # =>This Inner Loop Header: Depth=1 diff --git a/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSSuite.s index a1e8c653..e35fa2d7 100644 --- a/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/LCALSSuite.s @@ -23,33 +23,21 @@ _Z11getLoopDatav: # @_Z11getLoopDatav .LCPI1_1: .dword 0x3ff6666666666666 # double 1.3999999999999999 .dword 0x3ff0000000000000 # double 1 -.LCPI1_5: +.LCPI1_2: .dword 8 # 0x8 .dword 4923084613239392580 # 0x44524f5f43534944 -.LCPI1_6: +.LCPI1_3: .dword 8 # 0x8 .dword 4914094937701898568 # 0x44325f4f52445948 -.LCPI1_7: +.LCPI1_4: .dword 8 # 0x8 .dword 4913813462725187912 # 0x44315f4f52445948 -.LCPI1_8: +.LCPI1_5: .dword 8 # 0x8 .dword 6074873621086556756 # 0x544e495f50415254 -.LCPI1_11: +.LCPI1_6: .dword 8 # 0x8 .dword 5786931235628926290 # 0x504f4f4c5f464552 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI1_2: - .dword 0x40e5972000000000 # double 44217 -.LCPI1_3: - .dword 0x40b3890000000000 # double 5001 -.LCPI1_4: - .dword 0x4065600000000000 # double 171 -.LCPI1_9: - .dword 0x4063800000000000 # double 156 -.LCPI1_10: - .dword 0x4050000000000000 # double 64 .text .globl _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd .p2align 5 @@ -640,21 +628,27 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define .Ltmp34: # EH_LABEL # %bb.92: move $s8, $a0 - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_2) - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_3) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 366368 + lu52i.d $a1, $a1, 1038 + movgr2fr.d $fa0, $a1 fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a0, $fa0 - fmul.d $fa0, $fs0, $fa1 + movfr2gr.s $a1, $fa0 + st.w $a1, $s8, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 231680 + lu52i.d $a1, $a1, 1035 + movgr2fr.d $fa0, $a1 + fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI1_4) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_4) movfr2gr.s $a1, $fa0 - st.w $a0, $s8, 0 st.w $a1, $s8, 4 - fmul.d $fa0, $fs0, $fa1 + lu32i.d $a0, 352256 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa0, $a0 + fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 ld.d $s7, $sp, 96 # 8-byte Folded Reload ld.w $a0, $s7, 32 @@ -732,8 +726,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define jr $a0 .LBB1_99: # %._crit_edge.i.i352 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_11) - vld $vr0, $a0, %pc_lo12(.LCPI1_11) + pcalau12i $a0, %pc_hi20(.LCPI1_6) + vld $vr0, $a0, %pc_lo12(.LCPI1_6) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -936,8 +930,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_133 .LBB1_112: # %._crit_edge.i.i732 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_8) - vld $vr0, $a0, %pc_lo12(.LCPI1_8) + pcalau12i $a0, %pc_hi20(.LCPI1_5) + vld $vr0, $a0, %pc_lo12(.LCPI1_5) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -1687,8 +1681,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_158: # %._crit_edge.i.i748 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_7) - vld $vr0, $a0, %pc_lo12(.LCPI1_7) + pcalau12i $a0, %pc_hi20(.LCPI1_4) + vld $vr0, $a0, %pc_lo12(.LCPI1_4) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -1761,16 +1755,18 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define # in Loop: Header=BB1_95 Depth=1 ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.d $a0, $a0, 0 - ld.d $a1, $sp, 8 # 8-byte Folded Reload - fld.d $fa0, $a1, %pc_lo12(_ZN7ADomain18loop_length_factorE) - pcalau12i $a1, %pc_hi20(.LCPI1_9) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_9) - fld.d $fa2, $a0, 0 - fmul.d $fa1, $fa0, $fa1 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + fld.d $fa1, $a0, 0 + ld.d $a0, $sp, 8 # 8-byte Folded Reload + fld.d $fa0, $a0, %pc_lo12(_ZN7ADomain18loop_length_factorE) + ori $a0, $zero, 0 + lu32i.d $a0, 229376 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa2, $a0 + fmul.d $fa2, $fa0, $fa2 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $a0, $fa2 ori $a2, $zero, 2 - fst.d $fa2, $sp, 168 + fst.d $fa1, $sp, 168 blt $a0, $a2, .LBB1_211 # %bb.164: # %.lr.ph72.us.i.preheader # in Loop: Header=BB1_95 Depth=1 @@ -2202,8 +2198,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_196: # %._crit_edge.i.i988 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_5) - vld $vr0, $a0, %pc_lo12(.LCPI1_5) + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -2297,8 +2293,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_202: # %._crit_edge.i.i956 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_6) - vld $vr0, $a0, %pc_lo12(.LCPI1_6) + pcalau12i $a0, %pc_hi20(.LCPI1_3) + vld $vr0, $a0, %pc_lo12(.LCPI1_3) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -2382,9 +2378,9 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define move $a3, $zero .LBB1_212: # %_ZN7ADomainC2Eii.exit527 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI1_10) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_10) ld.d $a1, $sp, 368 + lu52i.d $a4, $zero, 1029 + movgr2fr.d $fa1, $a4 fmul.d $fa1, $fa0, $fa1 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a4, $fa1 @@ -4488,15 +4484,9 @@ GCC_except_table7: .LCPI8_0: .dword 0x3fb999999999999a # double 0.10000000000000001 .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI8_3: +.LCPI8_1: .dword 0x3fc999999999999a # double 0.20000000000000001 .dword 0x3fd3333333333333 # double 0.29999999999999999 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI8_1: - .dword 0x3ff199999999999a # double 1.1000000000000001 -.LCPI8_2: - .dword 0x3ff1f9a6b50b0f28 # double 1.1234500000000001 .text .globl _Z8loopInitjR8LoopStat .p2align 5 @@ -4747,8 +4737,8 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat .LBB8_40: pcalau12i $a0, %pc_hi20(.LCPI8_0) addi.d $a0, $a0, %pc_lo12(.LCPI8_0) - pcalau12i $a1, %pc_hi20(.LCPI8_3) - addi.d $a1, $a1, %pc_lo12(.LCPI8_3) + pcalau12i $a1, %pc_hi20(.LCPI8_1) + addi.d $a1, $a1, %pc_lo12(.LCPI8_1) ld.w $a3, $s1, 1032 blez $a3, .LBB8_577 # %bb.41: # %.lr.ph.preheader.i430 @@ -4962,8 +4952,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat pcalau12i $a2, %pc_hi20(.LCPI8_0) addi.d $a2, $a2, %pc_lo12(.LCPI8_0) fldx.d $fa0, $a2, $a0 - ld.d $a2, $s1, 472 + ld.d $a3, $s1, 472 ori $a0, $zero, 4 + lu12i.w $a4, -419431 + lu12i.w $a2, -307024 bgeu $a1, $a0, .LBB8_920 # %bb.78: move $a0, $zero @@ -5152,12 +5144,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.102: # %middle.block3803 beq $a1, $a2, .LBB8_105 .LBB8_103: # %.lr.ph.i184.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_104: # %.lr.ph.i184 # =>This Inner Loop Header: Depth=1 @@ -5246,12 +5244,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.110: # %middle.block3819 beq $a1, $a2, .LBB8_113 .LBB8_111: # %.lr.ph.i192.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_112: # %.lr.ph.i192 # =>This Inner Loop Header: Depth=1 @@ -5340,12 +5344,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.118: # %middle.block3835 beq $a1, $a2, .LBB8_121 .LBB8_119: # %.lr.ph.i200.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_120: # %.lr.ph.i200 # =>This Inner Loop Header: Depth=1 @@ -5434,12 +5444,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.126: # %middle.block3851 beq $a1, $a2, .LBB8_129 .LBB8_127: # %.lr.ph.i208.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_128: # %.lr.ph.i208 # =>This Inner Loop Header: Depth=1 @@ -5528,12 +5544,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.134: # %middle.block3867 beq $a1, $a2, .LBB8_137 .LBB8_135: # %.lr.ph.i216.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_136: # %.lr.ph.i216 # =>This Inner Loop Header: Depth=1 @@ -5622,12 +5644,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.142: # %middle.block3883 beq $a0, $a1, .LBB8_1187 .LBB8_143: # %.lr.ph.i224.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_144: # %.lr.ph.i224 # =>This Inner Loop Header: Depth=1 @@ -5702,12 +5730,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.147: # %middle.block3547 beq $a1, $a2, .LBB8_150 .LBB8_148: # %.lr.ph.i232.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_149: # %.lr.ph.i232 # =>This Inner Loop Header: Depth=1 @@ -5796,12 +5830,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.155: # %middle.block3563 beq $a1, $a2, .LBB8_158 .LBB8_156: # %.lr.ph.i240.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_157: # %.lr.ph.i240 # =>This Inner Loop Header: Depth=1 @@ -5890,12 +5930,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.163: # %middle.block3579 beq $a1, $a2, .LBB8_166 .LBB8_164: # %.lr.ph.i248.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_165: # %.lr.ph.i248 # =>This Inner Loop Header: Depth=1 @@ -5984,12 +6030,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.171: # %middle.block3595 beq $a1, $a2, .LBB8_174 .LBB8_172: # %.lr.ph.i256.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_173: # %.lr.ph.i256 # =>This Inner Loop Header: Depth=1 @@ -6078,12 +6130,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.179: # %middle.block3611 beq $a1, $a2, .LBB8_182 .LBB8_180: # %.lr.ph.i264.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_181: # %.lr.ph.i264 # =>This Inner Loop Header: Depth=1 @@ -6172,12 +6230,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.187: # %middle.block3627 beq $a1, $a2, .LBB8_190 .LBB8_188: # %.lr.ph.i272.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_189: # %.lr.ph.i272 # =>This Inner Loop Header: Depth=1 @@ -6266,12 +6330,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.195: # %middle.block3643 beq $a1, $a2, .LBB8_198 .LBB8_196: # %.lr.ph.i280.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_197: # %.lr.ph.i280 # =>This Inner Loop Header: Depth=1 @@ -6360,12 +6430,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.203: # %middle.block3659 beq $a1, $a2, .LBB8_206 .LBB8_204: # %.lr.ph.i288.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_205: # %.lr.ph.i288 # =>This Inner Loop Header: Depth=1 @@ -6454,12 +6530,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.211: # %middle.block3675 beq $a1, $a2, .LBB8_214 .LBB8_212: # %.lr.ph.i296.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_213: # %.lr.ph.i296 # =>This Inner Loop Header: Depth=1 @@ -6548,12 +6630,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.219: # %middle.block3691 beq $a1, $a2, .LBB8_222 .LBB8_220: # %.lr.ph.i304.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_221: # %.lr.ph.i304 # =>This Inner Loop Header: Depth=1 @@ -6642,12 +6730,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.227: # %middle.block3707 beq $a1, $a2, .LBB8_230 .LBB8_228: # %.lr.ph.i312.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_229: # %.lr.ph.i312 # =>This Inner Loop Header: Depth=1 @@ -6736,12 +6830,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.235: # %middle.block3723 beq $a1, $a2, .LBB8_238 .LBB8_236: # %.lr.ph.i320.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_237: # %.lr.ph.i320 # =>This Inner Loop Header: Depth=1 @@ -6830,12 +6930,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.243: # %middle.block3739 beq $a1, $a2, .LBB8_246 .LBB8_244: # %.lr.ph.i328.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_245: # %.lr.ph.i328 # =>This Inner Loop Header: Depth=1 @@ -6924,12 +7030,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.251: # %middle.block3755 beq $a1, $a2, .LBB8_254 .LBB8_252: # %.lr.ph.i336.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_253: # %.lr.ph.i336 # =>This Inner Loop Header: Depth=1 @@ -7018,12 +7130,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.259: # %middle.block3771 beq $a1, $a2, .LBB8_262 .LBB8_260: # %.lr.ph.i344.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_261: # %.lr.ph.i344 # =>This Inner Loop Header: Depth=1 @@ -7112,12 +7230,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.267: # %middle.block3787 beq $a0, $a1, .LBB8_1187 .LBB8_268: # %.lr.ph.i352.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_269: # %.lr.ph.i352 # =>This Inner Loop Header: Depth=1 @@ -7215,12 +7339,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.274: # %middle.block2006 beq $a1, $a2, .LBB8_277 .LBB8_275: # %.lr.ph.i1118.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_276: # %.lr.ph.i1118 # =>This Inner Loop Header: Depth=1 @@ -7309,12 +7439,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.282: # %middle.block2022 beq $a1, $a2, .LBB8_285 .LBB8_283: # %.lr.ph.i1126.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_284: # %.lr.ph.i1126 # =>This Inner Loop Header: Depth=1 @@ -7403,12 +7539,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.290: # %middle.block2038 beq $a1, $a2, .LBB8_293 .LBB8_291: # %.lr.ph.i1134.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_292: # %.lr.ph.i1134 # =>This Inner Loop Header: Depth=1 @@ -7497,12 +7639,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.298: # %middle.block2054 beq $a0, $a1, .LBB8_1187 .LBB8_299: # %.lr.ph.i1142.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_300: # %.lr.ph.i1142 # =>This Inner Loop Header: Depth=1 @@ -7577,12 +7725,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.303: # %middle.block2870 beq $a1, $a2, .LBB8_306 .LBB8_304: # %.lr.ph.i692.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_305: # %.lr.ph.i692 # =>This Inner Loop Header: Depth=1 @@ -7671,12 +7825,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.311: # %middle.block2886 beq $a0, $a1, .LBB8_1187 .LBB8_312: # %.lr.ph.i700.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_313: # %.lr.ph.i700 # =>This Inner Loop Header: Depth=1 @@ -7751,12 +7911,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.316: # %middle.block2934 beq $a1, $a2, .LBB8_319 .LBB8_317: # %.lr.ph.i660.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_318: # %.lr.ph.i660 # =>This Inner Loop Header: Depth=1 @@ -7845,12 +8011,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.324: # %middle.block2950 beq $a0, $a1, .LBB8_1187 .LBB8_325: # %.lr.ph.i668.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_326: # %.lr.ph.i668 # =>This Inner Loop Header: Depth=1 @@ -7925,12 +8097,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.329: # %middle.block3046 beq $a1, $a2, .LBB8_332 .LBB8_330: # %.lr.ph.i579.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_331: # %.lr.ph.i579 # =>This Inner Loop Header: Depth=1 @@ -8019,13 +8197,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.337: # %middle.block3062 beq $a1, $a2, .LBB8_340 .LBB8_338: # %.lr.ph.i587.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 - .p2align 4, , 16 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 + .p2align 4, , 16 .LBB8_339: # %.lr.ph.i587 # =>This Inner Loop Header: Depth=1 bstrpick.d $a4, $a1, 31, 0 @@ -8113,12 +8297,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.345: # %middle.block3078 beq $a1, $a2, .LBB8_348 .LBB8_346: # %.lr.ph.i595.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_347: # %.lr.ph.i595 # =>This Inner Loop Header: Depth=1 @@ -8207,12 +8397,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.353: # %middle.block3094 beq $a1, $a2, .LBB8_356 .LBB8_354: # %.lr.ph.i603.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_355: # %.lr.ph.i603 # =>This Inner Loop Header: Depth=1 @@ -8301,12 +8497,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.361: # %middle.block3110 beq $a0, $a1, .LBB8_1187 .LBB8_362: # %.lr.ph.i611.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_363: # %.lr.ph.i611 # =>This Inner Loop Header: Depth=1 @@ -8381,12 +8583,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.366: # %middle.block3030 beq $a0, $a1, .LBB8_1187 .LBB8_367: # %.lr.ph.i620.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_368: # %.lr.ph.i620 # =>This Inner Loop Header: Depth=1 @@ -8461,12 +8669,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.371: # %middle.block3899 beq $a1, $a2, .LBB8_374 .LBB8_372: # %.lr.ph.i.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_373: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 @@ -8555,12 +8769,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.379: # %middle.block3915 beq $a1, $a2, .LBB8_382 .LBB8_380: # %.lr.ph.i168.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_381: # %.lr.ph.i168 # =>This Inner Loop Header: Depth=1 @@ -8649,12 +8869,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.387: # %middle.block3931 beq $a0, $a1, .LBB8_1187 .LBB8_388: # %.lr.ph.i176.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_389: # %.lr.ph.i176 # =>This Inner Loop Header: Depth=1 @@ -8729,12 +8955,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.392: # %middle.block3206 beq $a1, $a2, .LBB8_395 .LBB8_393: # %.lr.ph.i499.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_394: # %.lr.ph.i499 # =>This Inner Loop Header: Depth=1 @@ -8823,12 +9055,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.400: # %middle.block3222 beq $a1, $a2, .LBB8_403 .LBB8_401: # %.lr.ph.i507.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_402: # %.lr.ph.i507 # =>This Inner Loop Header: Depth=1 @@ -8917,12 +9155,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.408: # %middle.block3238 beq $a1, $a2, .LBB8_411 .LBB8_409: # %.lr.ph.i515.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_410: # %.lr.ph.i515 # =>This Inner Loop Header: Depth=1 @@ -9011,12 +9255,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.416: # %middle.block3254 beq $a1, $a2, .LBB8_419 .LBB8_417: # %.lr.ph.i523.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_418: # %.lr.ph.i523 # =>This Inner Loop Header: Depth=1 @@ -9105,12 +9355,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.424: # %middle.block3270 beq $a0, $a1, .LBB8_1187 .LBB8_425: # %.lr.ph.i531.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_426: # %.lr.ph.i531 # =>This Inner Loop Header: Depth=1 @@ -9185,12 +9441,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.429: # %middle.block2246 beq $a1, $a2, .LBB8_432 .LBB8_430: # %.lr.ph.i944.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_431: # %.lr.ph.i944 # =>This Inner Loop Header: Depth=1 @@ -9279,12 +9541,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.437: # %middle.block2262 beq $a1, $a2, .LBB8_440 .LBB8_438: # %.lr.ph.i952.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_439: # %.lr.ph.i952 # =>This Inner Loop Header: Depth=1 @@ -9373,12 +9641,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.445: # %middle.block2278 beq $a1, $a2, .LBB8_448 .LBB8_446: # %.lr.ph.i960.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_447: # %.lr.ph.i960 # =>This Inner Loop Header: Depth=1 @@ -9467,12 +9741,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.453: # %middle.block2294 beq $a1, $a2, .LBB8_456 .LBB8_454: # %.lr.ph.i968.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_455: # %.lr.ph.i968 # =>This Inner Loop Header: Depth=1 @@ -9561,12 +9841,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.461: # %middle.block2310 beq $a1, $a2, .LBB8_464 .LBB8_462: # %.lr.ph.i976.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_463: # %.lr.ph.i976 # =>This Inner Loop Header: Depth=1 @@ -9655,12 +9941,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.469: # %middle.block2326 beq $a1, $a2, .LBB8_472 .LBB8_470: # %.lr.ph.i984.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_471: # %.lr.ph.i984 # =>This Inner Loop Header: Depth=1 @@ -9749,12 +10041,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.477: # %middle.block2342 beq $a1, $a2, .LBB8_480 .LBB8_478: # %.lr.ph.i992.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_479: # %.lr.ph.i992 # =>This Inner Loop Header: Depth=1 @@ -9843,12 +10141,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.485: # %middle.block2358 beq $a1, $a2, .LBB8_488 .LBB8_486: # %.lr.ph.i1000.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_487: # %.lr.ph.i1000 # =>This Inner Loop Header: Depth=1 @@ -9937,12 +10241,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.493: # %middle.block2374 beq $a1, $a2, .LBB8_496 .LBB8_494: # %.lr.ph.i1008.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_495: # %.lr.ph.i1008 # =>This Inner Loop Header: Depth=1 @@ -10031,12 +10341,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.501: # %middle.block2390 beq $a0, $a1, .LBB8_504 .LBB8_502: # %.lr.ph.i1016.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_503: # %.lr.ph.i1016 # =>This Inner Loop Header: Depth=1 @@ -10142,12 +10458,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.512: # %middle.block2406 beq $a0, $a1, .LBB8_515 .LBB8_513: # %.lr.ph.i892.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_514: # %.lr.ph.i892 # =>This Inner Loop Header: Depth=1 @@ -10236,12 +10558,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.520: # %middle.block2422 beq $a0, $a1, .LBB8_523 .LBB8_521: # %.lr.ph.i900.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_522: # %.lr.ph.i900 # =>This Inner Loop Header: Depth=1 @@ -10330,12 +10658,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.528: # %middle.block2438 beq $a0, $a1, .LBB8_531 .LBB8_529: # %.lr.ph.i908.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_530: # %.lr.ph.i908 # =>This Inner Loop Header: Depth=1 @@ -10424,12 +10758,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.536: # %middle.block2454 beq $a0, $a1, .LBB8_539 .LBB8_537: # %.lr.ph.i916.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_538: # %.lr.ph.i916 # =>This Inner Loop Header: Depth=1 @@ -10518,12 +10858,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.544: # %middle.block2470 beq $a0, $a1, .LBB8_547 .LBB8_545: # %.lr.ph.i924.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_546: # %.lr.ph.i924 # =>This Inner Loop Header: Depth=1 @@ -10630,12 +10976,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.556: # %middle.block2486 beq $a0, $a1, .LBB8_1187 .LBB8_557: # %.lr.ph.i936.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_558: # %.lr.ph.i936 # =>This Inner Loop Header: Depth=1 @@ -10710,12 +11062,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.561: # %middle.block2902 beq $a1, $a2, .LBB8_564 .LBB8_562: # %.lr.ph.i676.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_563: # %.lr.ph.i676 # =>This Inner Loop Header: Depth=1 @@ -10804,12 +11162,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.569: # %middle.block2918 beq $a0, $a1, .LBB8_1187 .LBB8_570: # %.lr.ph.i684.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_571: # %.lr.ph.i684 # =>This Inner Loop Header: Depth=1 @@ -10875,13 +11239,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.574: # %middle.block3319 beq $a2, $a3, .LBB8_577 .LBB8_575: # %.lr.ph.i432.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_576: # %.lr.ph.i432 # =>This Inner Loop Header: Depth=1 @@ -10965,13 +11335,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.582: # %middle.block3336 beq $a2, $a3, .LBB8_585 .LBB8_583: # %.lr.ph.i441.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_584: # %.lr.ph.i441 # =>This Inner Loop Header: Depth=1 @@ -11055,13 +11431,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.590: # %middle.block3353 beq $a2, $a3, .LBB8_593 .LBB8_591: # %.lr.ph.i452.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_592: # %.lr.ph.i452 # =>This Inner Loop Header: Depth=1 @@ -11145,13 +11527,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.598: # %middle.block3370 beq $a2, $a3, .LBB8_601 .LBB8_599: # %.lr.ph.i463.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_600: # %.lr.ph.i463 # =>This Inner Loop Header: Depth=1 @@ -11235,13 +11623,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.606: # %middle.block3387 beq $a0, $a2, .LBB8_1187 .LBB8_607: # %.lr.ph.i474.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a3, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a0 alsl.d $a1, $a0, $a1, 4 addi.d $a1, $a1, 8 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa3, $a3 .p2align 4, , 16 .LBB8_608: # %.lr.ph.i474 # =>This Inner Loop Header: Depth=1 @@ -11319,12 +11713,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.611: # %middle.block1622 beq $a1, $a2, .LBB8_614 .LBB8_612: # %.lr.ph.i1294.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_613: # %.lr.ph.i1294 # =>This Inner Loop Header: Depth=1 @@ -11413,12 +11813,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.619: # %middle.block1638 beq $a1, $a2, .LBB8_622 .LBB8_620: # %.lr.ph.i1302.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_621: # %.lr.ph.i1302 # =>This Inner Loop Header: Depth=1 @@ -11507,12 +11913,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.627: # %middle.block1654 beq $a1, $a2, .LBB8_630 .LBB8_628: # %.lr.ph.i1310.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_629: # %.lr.ph.i1310 # =>This Inner Loop Header: Depth=1 @@ -11601,12 +12013,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.635: # %middle.block1670 beq $a1, $a2, .LBB8_638 .LBB8_636: # %.lr.ph.i1318.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_637: # %.lr.ph.i1318 # =>This Inner Loop Header: Depth=1 @@ -11695,12 +12113,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.643: # %middle.block1686 beq $a1, $a2, .LBB8_646 .LBB8_644: # %.lr.ph.i1326.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_645: # %.lr.ph.i1326 # =>This Inner Loop Header: Depth=1 @@ -11789,12 +12213,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.651: # %middle.block1702 beq $a0, $a1, .LBB8_1187 .LBB8_652: # %.lr.ph.i1334.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_653: # %.lr.ph.i1334 # =>This Inner Loop Header: Depth=1 @@ -11869,12 +12299,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.656: # %middle.block2822 beq $a1, $a2, .LBB8_659 .LBB8_657: # %.lr.ph.i708.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_658: # %.lr.ph.i708 # =>This Inner Loop Header: Depth=1 @@ -11963,12 +12399,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.664: # %middle.block2838 beq $a1, $a2, .LBB8_667 .LBB8_665: # %.lr.ph.i716.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_666: # %.lr.ph.i716 # =>This Inner Loop Header: Depth=1 @@ -12057,12 +12499,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.672: # %middle.block2854 beq $a0, $a1, .LBB8_1187 .LBB8_673: # %.lr.ph.i724.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_674: # %.lr.ph.i724 # =>This Inner Loop Header: Depth=1 @@ -12137,12 +12585,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.677: # %middle.block2598 beq $a1, $a2, .LBB8_680 .LBB8_678: # %.lr.ph.i828.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_679: # %.lr.ph.i828 # =>This Inner Loop Header: Depth=1 @@ -12231,12 +12685,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.685: # %middle.block2614 beq $a0, $a1, .LBB8_1187 .LBB8_686: # %.lr.ph.i836.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_687: # %.lr.ph.i836 # =>This Inner Loop Header: Depth=1 @@ -12311,12 +12771,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.690: # %middle.block3483 beq $a1, $a2, .LBB8_693 .LBB8_691: # %.lr.ph.i360.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_692: # %.lr.ph.i360 # =>This Inner Loop Header: Depth=1 @@ -12405,12 +12871,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.698: # %middle.block3499 beq $a1, $a2, .LBB8_701 .LBB8_699: # %.lr.ph.i368.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_700: # %.lr.ph.i368 # =>This Inner Loop Header: Depth=1 @@ -12499,12 +12971,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.706: # %middle.block3515 beq $a1, $a2, .LBB8_709 .LBB8_707: # %.lr.ph.i376.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_708: # %.lr.ph.i376 # =>This Inner Loop Header: Depth=1 @@ -12593,12 +13071,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.714: # %middle.block3531 beq $a0, $a1, .LBB8_1187 .LBB8_715: # %.lr.ph.i384.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_716: # %.lr.ph.i384 # =>This Inner Loop Header: Depth=1 @@ -12673,12 +13157,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.719: # %middle.block2566 beq $a1, $a2, .LBB8_722 .LBB8_720: # %.lr.ph.i844.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_721: # %.lr.ph.i844 # =>This Inner Loop Header: Depth=1 @@ -12767,12 +13257,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.727: # %middle.block2582 beq $a0, $a1, .LBB8_1187 .LBB8_728: # %.lr.ph.i852.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_729: # %.lr.ph.i852 # =>This Inner Loop Header: Depth=1 @@ -12847,12 +13343,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.732: # %middle.block2966 beq $a1, $a2, .LBB8_735 .LBB8_733: # %.lr.ph.i628.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_734: # %.lr.ph.i628 # =>This Inner Loop Header: Depth=1 @@ -12941,12 +13443,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.740: # %middle.block2982 beq $a1, $a2, .LBB8_743 .LBB8_741: # %.lr.ph.i636.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_742: # %.lr.ph.i636 # =>This Inner Loop Header: Depth=1 @@ -13035,12 +13543,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.748: # %middle.block2998 beq $a1, $a2, .LBB8_751 .LBB8_749: # %.lr.ph.i644.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_750: # %.lr.ph.i644 # =>This Inner Loop Header: Depth=1 @@ -13129,12 +13643,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.756: # %middle.block3014 beq $a0, $a1, .LBB8_1187 .LBB8_757: # %.lr.ph.i652.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_758: # %.lr.ph.i652 # =>This Inner Loop Header: Depth=1 @@ -13209,12 +13729,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.761: # %middle.block3403 beq $a1, $a2, .LBB8_764 .LBB8_762: # %.lr.ph.i392.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_763: # %.lr.ph.i392 # =>This Inner Loop Header: Depth=1 @@ -13303,12 +13829,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.769: # %middle.block3419 beq $a1, $a2, .LBB8_772 .LBB8_770: # %.lr.ph.i400.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_771: # %.lr.ph.i400 # =>This Inner Loop Header: Depth=1 @@ -13397,12 +13929,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.777: # %middle.block3435 beq $a1, $a2, .LBB8_780 .LBB8_778: # %.lr.ph.i408.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_779: # %.lr.ph.i408 # =>This Inner Loop Header: Depth=1 @@ -13491,12 +14029,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.785: # %middle.block3451 beq $a1, $a2, .LBB8_788 .LBB8_786: # %.lr.ph.i416.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_787: # %.lr.ph.i416 # =>This Inner Loop Header: Depth=1 @@ -13585,12 +14129,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.793: # %middle.block3467 beq $a0, $a1, .LBB8_1187 .LBB8_794: # %.lr.ph.i424.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_795: # %.lr.ph.i424 # =>This Inner Loop Header: Depth=1 @@ -13665,12 +14215,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.798: # %middle.block2742 beq $a1, $a2, .LBB8_801 .LBB8_799: # %.lr.ph.i732.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_800: # %.lr.ph.i732 # =>This Inner Loop Header: Depth=1 @@ -13759,12 +14315,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.806: # %middle.block2758 beq $a1, $a2, .LBB8_809 .LBB8_807: # %.lr.ph.i740.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_808: # %.lr.ph.i740 # =>This Inner Loop Header: Depth=1 @@ -13853,12 +14415,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.814: # %middle.block2774 beq $a1, $a2, .LBB8_817 .LBB8_815: # %.lr.ph.i748.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_816: # %.lr.ph.i748 # =>This Inner Loop Header: Depth=1 @@ -13947,12 +14515,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.822: # %middle.block2790 beq $a1, $a2, .LBB8_825 .LBB8_823: # %.lr.ph.i756.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_824: # %.lr.ph.i756 # =>This Inner Loop Header: Depth=1 @@ -14041,12 +14615,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.830: # %middle.block2806 beq $a0, $a1, .LBB8_1187 .LBB8_831: # %.lr.ph.i764.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_832: # %.lr.ph.i764 # =>This Inner Loop Header: Depth=1 @@ -14121,12 +14701,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.835: # %middle.block3126 beq $a1, $a2, .LBB8_838 .LBB8_836: # %.lr.ph.i539.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_837: # %.lr.ph.i539 # =>This Inner Loop Header: Depth=1 @@ -14215,12 +14801,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.843: # %middle.block3142 beq $a1, $a2, .LBB8_846 .LBB8_844: # %.lr.ph.i547.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_845: # %.lr.ph.i547 # =>This Inner Loop Header: Depth=1 @@ -14309,12 +14901,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.851: # %middle.block3158 beq $a1, $a2, .LBB8_854 .LBB8_852: # %.lr.ph.i555.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_853: # %.lr.ph.i555 # =>This Inner Loop Header: Depth=1 @@ -14403,12 +15001,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.859: # %middle.block3174 beq $a1, $a2, .LBB8_862 .LBB8_860: # %.lr.ph.i563.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_861: # %.lr.ph.i563 # =>This Inner Loop Header: Depth=1 @@ -14497,12 +15101,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.867: # %middle.block3190 beq $a0, $a1, .LBB8_1187 .LBB8_868: # %.lr.ph.i571.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_869: # %.lr.ph.i571 # =>This Inner Loop Header: Depth=1 @@ -14577,12 +15187,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.872: # %middle.block3286 beq $a1, $a2, .LBB8_875 .LBB8_873: # %.lr.ph.i483.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_874: # %.lr.ph.i483 # =>This Inner Loop Header: Depth=1 @@ -14671,12 +15287,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.880: # %middle.block3302 beq $a0, $a1, .LBB8_1187 .LBB8_881: # %.lr.ph.i491.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_882: # %.lr.ph.i491 # =>This Inner Loop Header: Depth=1 @@ -14751,12 +15373,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.885: # %middle.block1718 beq $a1, $a2, .LBB8_888 .LBB8_886: # %.lr.ph.i1254.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_887: # %.lr.ph.i1254 # =>This Inner Loop Header: Depth=1 @@ -14845,12 +15473,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.893: # %middle.block1734 beq $a1, $a2, .LBB8_896 .LBB8_894: # %.lr.ph.i1262.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_895: # %.lr.ph.i1262 # =>This Inner Loop Header: Depth=1 @@ -14939,12 +15573,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.901: # %middle.block1750 beq $a1, $a2, .LBB8_904 .LBB8_902: # %.lr.ph.i1270.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_903: # %.lr.ph.i1270 # =>This Inner Loop Header: Depth=1 @@ -15033,12 +15673,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.909: # %middle.block1766 beq $a1, $a2, .LBB8_912 .LBB8_910: # %.lr.ph.i1278.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_911: # %.lr.ph.i1278 # =>This Inner Loop Header: Depth=1 @@ -15127,12 +15773,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.917: # %middle.block1782 beq $a0, $a1, .LBB8_1187 .LBB8_918: # %.lr.ph.i1286.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_919: # %.lr.ph.i1286 # =>This Inner Loop Header: Depth=1 @@ -15153,41 +15805,39 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat bstrpick.d $a0, $a1, 30, 2 slli.d $a0, $a0, 2 vreplvei.d $vr1, $vr0, 0 - addi.d $a3, $a2, 16 - ori $a4, $zero, 0 - lu32i.d $a4, 1 - vreplgr2vr.d $vr2, $a4 - lu12i.w $a4, -419431 - ori $a4, $a4, 2458 - lu32i.d $a4, 104857 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr3, $a4 - lu12i.w $a4, -307024 - ori $a4, $a4, 3880 - lu32i.d $a4, 129446 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr4, $a4 - move $a4, $a0 + addi.d $a5, $a3, 16 + ori $a6, $zero, 0 + lu32i.d $a6, 1 + vreplgr2vr.d $vr2, $a6 + ori $a6, $a4, 2458 + lu32i.d $a6, 104857 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr3, $a6 + ori $a6, $a2, 3880 + lu32i.d $a6, 129446 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr4, $a6 + move $a6, $a0 .p2align 4, , 16 .LBB8_921: # %vector.body # =>This Inner Loop Header: Depth=1 vaddi.wu $vr5, $vr2, 2 - vpickve2gr.w $a5, $vr2, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr2, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr2, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa7, $a5 + vpickve2gr.w $a7, $vr2, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa7, $a7 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr6, 16 - vpickve2gr.w $a5, $vr5, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr5, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr5, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa5, $a5 + vpickve2gr.w $a7, $vr5, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa5, $a7 ffint.d.l $fa5, $fa5 vextrins.d $vr5, $vr6, 16 vfadd.d $vr6, $vr7, $vr3 @@ -15198,34 +15848,38 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat vfadd.d $vr5, $vr5, $vr4 vfdiv.d $vr6, $vr6, $vr7 vfdiv.d $vr5, $vr8, $vr5 - vst $vr6, $a3, -16 - vst $vr5, $a3, 0 + vst $vr6, $a5, -16 + vst $vr5, $a5, 0 vaddi.wu $vr2, $vr2, 4 - addi.d $a4, $a4, -4 - addi.d $a3, $a3, 32 - bnez $a4, .LBB8_921 + addi.d $a6, $a6, -4 + addi.d $a5, $a5, 32 + bnez $a6, .LBB8_921 # %bb.922: # %middle.block beq $a0, $a1, .LBB8_1187 .LBB8_923: # %.lr.ph.i1342.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $a2, 3 + alsl.d $a3, $a0, $a3, 3 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + ori $a2, $a2, 3880 + lu32i.d $a2, 129446 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa2, $a2 .p2align 4, , 16 .LBB8_924: # %.lr.ph.i1342 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a0, 31, 0 - movgr2fr.d $fa3, $a3 + bstrpick.d $a2, $a0, 31, 0 + movgr2fr.d $fa3, $a2 ffint.d.l $fa3, $fa3 fadd.d $fa4, $fa3, $fa1 fmul.d $fa4, $fa0, $fa4 fadd.d $fa3, $fa3, $fa2 fdiv.d $fa3, $fa4, $fa3 - fst.d $fa3, $a2, 0 + fst.d $fa3, $a3, 0 addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + addi.d $a3, $a3, 8 addi.w $a0, $a0, 1 bnez $a1, .LBB8_924 b .LBB8_1187 @@ -15287,12 +15941,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.927: # %middle.block2534 beq $a1, $a2, .LBB8_930 .LBB8_928: # %.lr.ph.i860.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_929: # %.lr.ph.i860 # =>This Inner Loop Header: Depth=1 @@ -15381,12 +16041,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.935: # %middle.block2550 beq $a0, $a1, .LBB8_1187 .LBB8_936: # %.lr.ph.i868.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_937: # %.lr.ph.i868 # =>This Inner Loop Header: Depth=1 @@ -15461,12 +16127,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.940: # %middle.block2630 beq $a1, $a2, .LBB8_943 .LBB8_941: # %.lr.ph.i772.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_942: # %.lr.ph.i772 # =>This Inner Loop Header: Depth=1 @@ -15555,12 +16227,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.948: # %middle.block2646 beq $a1, $a2, .LBB8_951 .LBB8_949: # %.lr.ph.i780.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_950: # %.lr.ph.i780 # =>This Inner Loop Header: Depth=1 @@ -15649,12 +16327,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.956: # %middle.block2662 beq $a1, $a2, .LBB8_959 .LBB8_957: # %.lr.ph.i788.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_958: # %.lr.ph.i788 # =>This Inner Loop Header: Depth=1 @@ -15743,12 +16427,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.964: # %middle.block2678 beq $a1, $a2, .LBB8_967 .LBB8_965: # %.lr.ph.i796.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_966: # %.lr.ph.i796 # =>This Inner Loop Header: Depth=1 @@ -15837,12 +16527,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.972: # %middle.block2694 beq $a1, $a2, .LBB8_975 .LBB8_973: # %.lr.ph.i804.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_974: # %.lr.ph.i804 # =>This Inner Loop Header: Depth=1 @@ -15931,12 +16627,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.980: # %middle.block2710 beq $a1, $a2, .LBB8_983 .LBB8_981: # %.lr.ph.i812.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_982: # %.lr.ph.i812 # =>This Inner Loop Header: Depth=1 @@ -16025,12 +16727,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.988: # %middle.block2726 beq $a0, $a1, .LBB8_1187 .LBB8_989: # %.lr.ph.i820.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_990: # %.lr.ph.i820 # =>This Inner Loop Header: Depth=1 @@ -16105,12 +16813,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.993: # %middle.block2502 beq $a1, $a2, .LBB8_996 .LBB8_994: # %.lr.ph.i876.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_995: # %.lr.ph.i876 # =>This Inner Loop Header: Depth=1 @@ -16199,12 +16913,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1001: # %middle.block2518 beq $a0, $a1, .LBB8_1187 .LBB8_1002: # %.lr.ph.i884.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1003: # %.lr.ph.i884 # =>This Inner Loop Header: Depth=1 @@ -16279,12 +16999,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1006: # %middle.block1846 beq $a1, $a2, .LBB8_1009 .LBB8_1007: # %.lr.ph.i1150.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1008: # %.lr.ph.i1150 # =>This Inner Loop Header: Depth=1 @@ -16373,12 +17099,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1014: # %middle.block1862 beq $a1, $a2, .LBB8_1017 .LBB8_1015: # %.lr.ph.i1158.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1016: # %.lr.ph.i1158 # =>This Inner Loop Header: Depth=1 @@ -16467,12 +17199,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1022: # %middle.block1878 beq $a1, $a2, .LBB8_1025 .LBB8_1023: # %.lr.ph.i1166.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1024: # %.lr.ph.i1166 # =>This Inner Loop Header: Depth=1 @@ -16561,12 +17299,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1030: # %middle.block1894 beq $a1, $a2, .LBB8_1033 .LBB8_1031: # %.lr.ph.i1174.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1032: # %.lr.ph.i1174 # =>This Inner Loop Header: Depth=1 @@ -16655,12 +17399,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1038: # %middle.block1910 beq $a1, $a2, .LBB8_1041 .LBB8_1039: # %.lr.ph.i1182.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1040: # %.lr.ph.i1182 # =>This Inner Loop Header: Depth=1 @@ -16749,12 +17499,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1046: # %middle.block1926 beq $a1, $a2, .LBB8_1049 .LBB8_1047: # %.lr.ph.i1190.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1048: # %.lr.ph.i1190 # =>This Inner Loop Header: Depth=1 @@ -16843,12 +17599,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1054: # %middle.block1942 beq $a1, $a2, .LBB8_1057 .LBB8_1055: # %.lr.ph.i1198.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1056: # %.lr.ph.i1198 # =>This Inner Loop Header: Depth=1 @@ -16937,12 +17699,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1062: # %middle.block1958 beq $a1, $a2, .LBB8_1065 .LBB8_1063: # %.lr.ph.i1206.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1064: # %.lr.ph.i1206 # =>This Inner Loop Header: Depth=1 @@ -17031,12 +17799,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1070: # %middle.block1974 beq $a1, $a2, .LBB8_1073 .LBB8_1071: # %.lr.ph.i1214.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1072: # %.lr.ph.i1214 # =>This Inner Loop Header: Depth=1 @@ -17125,12 +17899,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1078: # %middle.block1990 beq $a0, $a1, .LBB8_1187 .LBB8_1079: # %.lr.ph.i1222.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1080: # %.lr.ph.i1222 # =>This Inner Loop Header: Depth=1 @@ -17205,12 +17985,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1083: # %middle.block1798 beq $a1, $a2, .LBB8_1086 .LBB8_1084: # %.lr.ph.i1230.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1085: # %.lr.ph.i1230 # =>This Inner Loop Header: Depth=1 @@ -17299,12 +18085,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1091: # %middle.block1814 beq $a1, $a2, .LBB8_1094 .LBB8_1092: # %.lr.ph.i1238.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1093: # %.lr.ph.i1238 # =>This Inner Loop Header: Depth=1 @@ -17393,12 +18185,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1099: # %middle.block1830 beq $a0, $a1, .LBB8_1187 .LBB8_1100: # %.lr.ph.i1246.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1101: # %.lr.ph.i1246 # =>This Inner Loop Header: Depth=1 @@ -17473,12 +18271,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1104: # %middle.block2070 beq $a1, $a2, .LBB8_1107 .LBB8_1105: # %.lr.ph.i1030.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1106: # %.lr.ph.i1030 # =>This Inner Loop Header: Depth=1 @@ -17567,12 +18371,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1112: # %middle.block2086 beq $a1, $a2, .LBB8_1115 .LBB8_1113: # %.lr.ph.i1038.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1114: # %.lr.ph.i1038 # =>This Inner Loop Header: Depth=1 @@ -17661,12 +18471,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1120: # %middle.block2102 beq $a1, $a2, .LBB8_1123 .LBB8_1121: # %.lr.ph.i1046.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1122: # %.lr.ph.i1046 # =>This Inner Loop Header: Depth=1 @@ -17755,12 +18571,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1128: # %middle.block2118 beq $a1, $a2, .LBB8_1131 .LBB8_1129: # %.lr.ph.i1054.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1130: # %.lr.ph.i1054 # =>This Inner Loop Header: Depth=1 @@ -17849,12 +18671,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1136: # %middle.block2134 beq $a1, $a2, .LBB8_1139 .LBB8_1137: # %.lr.ph.i1062.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1138: # %.lr.ph.i1062 # =>This Inner Loop Header: Depth=1 @@ -17943,12 +18771,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1144: # %middle.block2150 beq $a1, $a2, .LBB8_1147 .LBB8_1145: # %.lr.ph.i1070.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1146: # %.lr.ph.i1070 # =>This Inner Loop Header: Depth=1 @@ -18037,12 +18871,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1152: # %middle.block2166 beq $a1, $a2, .LBB8_1155 .LBB8_1153: # %.lr.ph.i1078.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1154: # %.lr.ph.i1078 # =>This Inner Loop Header: Depth=1 @@ -18131,12 +18971,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1160: # %middle.block2182 beq $a1, $a2, .LBB8_1163 .LBB8_1161: # %.lr.ph.i1086.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1162: # %.lr.ph.i1086 # =>This Inner Loop Header: Depth=1 @@ -18225,12 +19071,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1168: # %middle.block2198 beq $a1, $a2, .LBB8_1171 .LBB8_1169: # %.lr.ph.i1094.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1170: # %.lr.ph.i1094 # =>This Inner Loop Header: Depth=1 @@ -18319,12 +19171,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1176: # %middle.block2214 beq $a1, $a2, .LBB8_1179 .LBB8_1177: # %.lr.ph.i1102.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1178: # %.lr.ph.i1102 # =>This Inner Loop Header: Depth=1 @@ -18413,12 +19271,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1184: # %middle.block2230 beq $a0, $a1, .LBB8_1187 .LBB8_1185: # %.lr.ph.i1110.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1186: # %.lr.ph.i1110 # =>This Inner Loop Header: Depth=1 @@ -18489,15 +19353,9 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat .LCPI9_0: .dword 0x3fb999999999999a # double 0.10000000000000001 .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI9_3: +.LCPI9_1: .dword 0x3fc999999999999a # double 0.20000000000000001 .dword 0x3fd3333333333333 # double 0.29999999999999999 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI9_1: - .dword 0x3ff199999999999a # double 1.1000000000000001 -.LCPI9_2: - .dword 0x3ff1f9a6b50b0f28 # double 1.1234500000000001 .text .globl _Z8loopInitj .p2align 5 @@ -18743,8 +19601,8 @@ _Z8loopInitj: # @_Z8loopInitj .LBB9_40: pcalau12i $a0, %pc_hi20(.LCPI9_0) addi.d $a0, $a0, %pc_lo12(.LCPI9_0) - pcalau12i $a1, %pc_hi20(.LCPI9_3) - addi.d $a1, $a1, %pc_lo12(.LCPI9_3) + pcalau12i $a1, %pc_hi20(.LCPI9_1) + addi.d $a1, $a1, %pc_lo12(.LCPI9_1) ld.w $a3, $s0, 1032 blez $a3, .LBB9_577 # %bb.41: # %.lr.ph.preheader.i429 @@ -18958,8 +19816,10 @@ _Z8loopInitj: # @_Z8loopInitj pcalau12i $a2, %pc_hi20(.LCPI9_0) addi.d $a2, $a2, %pc_lo12(.LCPI9_0) fldx.d $fa0, $a2, $a0 - ld.d $a2, $s0, 472 + ld.d $a3, $s0, 472 ori $a0, $zero, 4 + lu12i.w $a4, -419431 + lu12i.w $a2, -307024 bgeu $a1, $a0, .LBB9_920 # %bb.78: move $a0, $zero @@ -19148,12 +20008,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.102: # %middle.block3802 beq $a1, $a2, .LBB9_105 .LBB9_103: # %.lr.ph.i183.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_104: # %.lr.ph.i183 # =>This Inner Loop Header: Depth=1 @@ -19242,12 +20108,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.110: # %middle.block3818 beq $a1, $a2, .LBB9_113 .LBB9_111: # %.lr.ph.i191.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_112: # %.lr.ph.i191 # =>This Inner Loop Header: Depth=1 @@ -19336,12 +20208,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.118: # %middle.block3834 beq $a1, $a2, .LBB9_121 .LBB9_119: # %.lr.ph.i199.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_120: # %.lr.ph.i199 # =>This Inner Loop Header: Depth=1 @@ -19430,12 +20308,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.126: # %middle.block3850 beq $a1, $a2, .LBB9_129 .LBB9_127: # %.lr.ph.i207.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_128: # %.lr.ph.i207 # =>This Inner Loop Header: Depth=1 @@ -19524,12 +20408,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.134: # %middle.block3866 beq $a1, $a2, .LBB9_137 .LBB9_135: # %.lr.ph.i215.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_136: # %.lr.ph.i215 # =>This Inner Loop Header: Depth=1 @@ -19618,12 +20508,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.142: # %middle.block3882 beq $a0, $a1, .LBB9_1187 .LBB9_143: # %.lr.ph.i223.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_144: # %.lr.ph.i223 # =>This Inner Loop Header: Depth=1 @@ -19698,12 +20594,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.147: # %middle.block3546 beq $a1, $a2, .LBB9_150 .LBB9_148: # %.lr.ph.i231.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_149: # %.lr.ph.i231 # =>This Inner Loop Header: Depth=1 @@ -19792,12 +20694,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.155: # %middle.block3562 beq $a1, $a2, .LBB9_158 .LBB9_156: # %.lr.ph.i239.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_157: # %.lr.ph.i239 # =>This Inner Loop Header: Depth=1 @@ -19886,12 +20794,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.163: # %middle.block3578 beq $a1, $a2, .LBB9_166 .LBB9_164: # %.lr.ph.i247.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_165: # %.lr.ph.i247 # =>This Inner Loop Header: Depth=1 @@ -19980,12 +20894,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.171: # %middle.block3594 beq $a1, $a2, .LBB9_174 .LBB9_172: # %.lr.ph.i255.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_173: # %.lr.ph.i255 # =>This Inner Loop Header: Depth=1 @@ -20074,12 +20994,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.179: # %middle.block3610 beq $a1, $a2, .LBB9_182 .LBB9_180: # %.lr.ph.i263.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_181: # %.lr.ph.i263 # =>This Inner Loop Header: Depth=1 @@ -20168,12 +21094,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.187: # %middle.block3626 beq $a1, $a2, .LBB9_190 .LBB9_188: # %.lr.ph.i271.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_189: # %.lr.ph.i271 # =>This Inner Loop Header: Depth=1 @@ -20262,12 +21194,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.195: # %middle.block3642 beq $a1, $a2, .LBB9_198 .LBB9_196: # %.lr.ph.i279.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_197: # %.lr.ph.i279 # =>This Inner Loop Header: Depth=1 @@ -20356,12 +21294,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.203: # %middle.block3658 beq $a1, $a2, .LBB9_206 .LBB9_204: # %.lr.ph.i287.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_205: # %.lr.ph.i287 # =>This Inner Loop Header: Depth=1 @@ -20450,12 +21394,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.211: # %middle.block3674 beq $a1, $a2, .LBB9_214 .LBB9_212: # %.lr.ph.i295.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_213: # %.lr.ph.i295 # =>This Inner Loop Header: Depth=1 @@ -20544,12 +21494,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.219: # %middle.block3690 beq $a1, $a2, .LBB9_222 .LBB9_220: # %.lr.ph.i303.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_221: # %.lr.ph.i303 # =>This Inner Loop Header: Depth=1 @@ -20638,12 +21594,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.227: # %middle.block3706 beq $a1, $a2, .LBB9_230 .LBB9_228: # %.lr.ph.i311.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_229: # %.lr.ph.i311 # =>This Inner Loop Header: Depth=1 @@ -20732,12 +21694,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.235: # %middle.block3722 beq $a1, $a2, .LBB9_238 .LBB9_236: # %.lr.ph.i319.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_237: # %.lr.ph.i319 # =>This Inner Loop Header: Depth=1 @@ -20826,12 +21794,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.243: # %middle.block3738 beq $a1, $a2, .LBB9_246 .LBB9_244: # %.lr.ph.i327.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_245: # %.lr.ph.i327 # =>This Inner Loop Header: Depth=1 @@ -20920,12 +21894,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.251: # %middle.block3754 beq $a1, $a2, .LBB9_254 .LBB9_252: # %.lr.ph.i335.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_253: # %.lr.ph.i335 # =>This Inner Loop Header: Depth=1 @@ -21014,12 +21994,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.259: # %middle.block3770 beq $a1, $a2, .LBB9_262 .LBB9_260: # %.lr.ph.i343.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_261: # %.lr.ph.i343 # =>This Inner Loop Header: Depth=1 @@ -21108,12 +22094,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.267: # %middle.block3786 beq $a0, $a1, .LBB9_1187 .LBB9_268: # %.lr.ph.i351.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_269: # %.lr.ph.i351 # =>This Inner Loop Header: Depth=1 @@ -21210,12 +22202,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.274: # %middle.block2005 beq $a1, $a2, .LBB9_277 .LBB9_275: # %.lr.ph.i1117.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_276: # %.lr.ph.i1117 # =>This Inner Loop Header: Depth=1 @@ -21304,12 +22302,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.282: # %middle.block2021 beq $a1, $a2, .LBB9_285 .LBB9_283: # %.lr.ph.i1125.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_284: # %.lr.ph.i1125 # =>This Inner Loop Header: Depth=1 @@ -21398,12 +22402,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.290: # %middle.block2037 beq $a1, $a2, .LBB9_293 .LBB9_291: # %.lr.ph.i1133.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_292: # %.lr.ph.i1133 # =>This Inner Loop Header: Depth=1 @@ -21492,12 +22502,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.298: # %middle.block2053 beq $a0, $a1, .LBB9_1187 .LBB9_299: # %.lr.ph.i1141.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_300: # %.lr.ph.i1141 # =>This Inner Loop Header: Depth=1 @@ -21572,12 +22588,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.303: # %middle.block2869 beq $a1, $a2, .LBB9_306 .LBB9_304: # %.lr.ph.i691.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_305: # %.lr.ph.i691 # =>This Inner Loop Header: Depth=1 @@ -21666,12 +22688,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.311: # %middle.block2885 beq $a0, $a1, .LBB9_1187 .LBB9_312: # %.lr.ph.i699.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_313: # %.lr.ph.i699 # =>This Inner Loop Header: Depth=1 @@ -21746,12 +22774,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.316: # %middle.block2933 beq $a1, $a2, .LBB9_319 .LBB9_317: # %.lr.ph.i659.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_318: # %.lr.ph.i659 # =>This Inner Loop Header: Depth=1 @@ -21840,12 +22874,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.324: # %middle.block2949 beq $a0, $a1, .LBB9_1187 .LBB9_325: # %.lr.ph.i667.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_326: # %.lr.ph.i667 # =>This Inner Loop Header: Depth=1 @@ -21920,12 +22960,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.329: # %middle.block3045 beq $a1, $a2, .LBB9_332 .LBB9_330: # %.lr.ph.i578.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_331: # %.lr.ph.i578 # =>This Inner Loop Header: Depth=1 @@ -22014,12 +23060,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.337: # %middle.block3061 beq $a1, $a2, .LBB9_340 .LBB9_338: # %.lr.ph.i586.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_339: # %.lr.ph.i586 # =>This Inner Loop Header: Depth=1 @@ -22108,12 +23160,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.345: # %middle.block3077 beq $a1, $a2, .LBB9_348 .LBB9_346: # %.lr.ph.i594.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_347: # %.lr.ph.i594 # =>This Inner Loop Header: Depth=1 @@ -22202,12 +23260,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.353: # %middle.block3093 beq $a1, $a2, .LBB9_356 .LBB9_354: # %.lr.ph.i602.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_355: # %.lr.ph.i602 # =>This Inner Loop Header: Depth=1 @@ -22296,12 +23360,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.361: # %middle.block3109 beq $a0, $a1, .LBB9_1187 .LBB9_362: # %.lr.ph.i610.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_363: # %.lr.ph.i610 # =>This Inner Loop Header: Depth=1 @@ -22376,12 +23446,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.366: # %middle.block3029 beq $a0, $a1, .LBB9_1187 .LBB9_367: # %.lr.ph.i619.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_368: # %.lr.ph.i619 # =>This Inner Loop Header: Depth=1 @@ -22456,12 +23532,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.371: # %middle.block3898 beq $a1, $a2, .LBB9_374 .LBB9_372: # %.lr.ph.i.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_373: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 @@ -22550,12 +23632,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.379: # %middle.block3914 beq $a1, $a2, .LBB9_382 .LBB9_380: # %.lr.ph.i167.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_381: # %.lr.ph.i167 # =>This Inner Loop Header: Depth=1 @@ -22644,12 +23732,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.387: # %middle.block3930 beq $a0, $a1, .LBB9_1187 .LBB9_388: # %.lr.ph.i175.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_389: # %.lr.ph.i175 # =>This Inner Loop Header: Depth=1 @@ -22724,12 +23818,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.392: # %middle.block3205 beq $a1, $a2, .LBB9_395 .LBB9_393: # %.lr.ph.i498.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_394: # %.lr.ph.i498 # =>This Inner Loop Header: Depth=1 @@ -22818,12 +23918,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.400: # %middle.block3221 beq $a1, $a2, .LBB9_403 .LBB9_401: # %.lr.ph.i506.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_402: # %.lr.ph.i506 # =>This Inner Loop Header: Depth=1 @@ -22912,12 +24018,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.408: # %middle.block3237 beq $a1, $a2, .LBB9_411 .LBB9_409: # %.lr.ph.i514.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_410: # %.lr.ph.i514 # =>This Inner Loop Header: Depth=1 @@ -23006,12 +24118,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.416: # %middle.block3253 beq $a1, $a2, .LBB9_419 .LBB9_417: # %.lr.ph.i522.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_418: # %.lr.ph.i522 # =>This Inner Loop Header: Depth=1 @@ -23100,12 +24218,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.424: # %middle.block3269 beq $a0, $a1, .LBB9_1187 .LBB9_425: # %.lr.ph.i530.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_426: # %.lr.ph.i530 # =>This Inner Loop Header: Depth=1 @@ -23180,12 +24304,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.429: # %middle.block2245 beq $a1, $a2, .LBB9_432 .LBB9_430: # %.lr.ph.i943.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_431: # %.lr.ph.i943 # =>This Inner Loop Header: Depth=1 @@ -23274,12 +24404,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.437: # %middle.block2261 beq $a1, $a2, .LBB9_440 .LBB9_438: # %.lr.ph.i951.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_439: # %.lr.ph.i951 # =>This Inner Loop Header: Depth=1 @@ -23368,12 +24504,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.445: # %middle.block2277 beq $a1, $a2, .LBB9_448 .LBB9_446: # %.lr.ph.i959.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_447: # %.lr.ph.i959 # =>This Inner Loop Header: Depth=1 @@ -23462,12 +24604,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.453: # %middle.block2293 beq $a1, $a2, .LBB9_456 .LBB9_454: # %.lr.ph.i967.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_455: # %.lr.ph.i967 # =>This Inner Loop Header: Depth=1 @@ -23556,12 +24704,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.461: # %middle.block2309 beq $a1, $a2, .LBB9_464 .LBB9_462: # %.lr.ph.i975.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_463: # %.lr.ph.i975 # =>This Inner Loop Header: Depth=1 @@ -23650,12 +24804,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.469: # %middle.block2325 beq $a1, $a2, .LBB9_472 .LBB9_470: # %.lr.ph.i983.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_471: # %.lr.ph.i983 # =>This Inner Loop Header: Depth=1 @@ -23744,12 +24904,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.477: # %middle.block2341 beq $a1, $a2, .LBB9_480 .LBB9_478: # %.lr.ph.i991.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_479: # %.lr.ph.i991 # =>This Inner Loop Header: Depth=1 @@ -23838,12 +25004,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.485: # %middle.block2357 beq $a1, $a2, .LBB9_488 .LBB9_486: # %.lr.ph.i999.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_487: # %.lr.ph.i999 # =>This Inner Loop Header: Depth=1 @@ -23932,12 +25104,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.493: # %middle.block2373 beq $a1, $a2, .LBB9_496 .LBB9_494: # %.lr.ph.i1007.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_495: # %.lr.ph.i1007 # =>This Inner Loop Header: Depth=1 @@ -24026,12 +25204,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.501: # %middle.block2389 beq $a0, $a1, .LBB9_504 .LBB9_502: # %.lr.ph.i1015.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_503: # %.lr.ph.i1015 # =>This Inner Loop Header: Depth=1 @@ -24136,12 +25320,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.512: # %middle.block2405 beq $a0, $a1, .LBB9_515 .LBB9_513: # %.lr.ph.i891.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_514: # %.lr.ph.i891 # =>This Inner Loop Header: Depth=1 @@ -24230,12 +25420,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.520: # %middle.block2421 beq $a0, $a1, .LBB9_523 .LBB9_521: # %.lr.ph.i899.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_522: # %.lr.ph.i899 # =>This Inner Loop Header: Depth=1 @@ -24324,12 +25520,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.528: # %middle.block2437 beq $a0, $a1, .LBB9_531 .LBB9_529: # %.lr.ph.i907.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_530: # %.lr.ph.i907 # =>This Inner Loop Header: Depth=1 @@ -24418,12 +25620,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.536: # %middle.block2453 beq $a0, $a1, .LBB9_539 .LBB9_537: # %.lr.ph.i915.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_538: # %.lr.ph.i915 # =>This Inner Loop Header: Depth=1 @@ -24512,12 +25720,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.544: # %middle.block2469 beq $a0, $a1, .LBB9_547 .LBB9_545: # %.lr.ph.i923.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_546: # %.lr.ph.i923 # =>This Inner Loop Header: Depth=1 @@ -24624,12 +25838,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.556: # %middle.block2485 beq $a0, $a1, .LBB9_1187 .LBB9_557: # %.lr.ph.i935.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_558: # %.lr.ph.i935 # =>This Inner Loop Header: Depth=1 @@ -24704,12 +25924,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.561: # %middle.block2901 beq $a1, $a2, .LBB9_564 .LBB9_562: # %.lr.ph.i675.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_563: # %.lr.ph.i675 # =>This Inner Loop Header: Depth=1 @@ -24798,12 +26024,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.569: # %middle.block2917 beq $a0, $a1, .LBB9_1187 .LBB9_570: # %.lr.ph.i683.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_571: # %.lr.ph.i683 # =>This Inner Loop Header: Depth=1 @@ -24869,13 +26101,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.574: # %middle.block3318 beq $a2, $a3, .LBB9_577 .LBB9_575: # %.lr.ph.i431.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_576: # %.lr.ph.i431 # =>This Inner Loop Header: Depth=1 @@ -24959,13 +26197,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.582: # %middle.block3335 beq $a2, $a3, .LBB9_585 .LBB9_583: # %.lr.ph.i440.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_584: # %.lr.ph.i440 # =>This Inner Loop Header: Depth=1 @@ -25049,13 +26293,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.590: # %middle.block3352 beq $a2, $a3, .LBB9_593 .LBB9_591: # %.lr.ph.i451.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_592: # %.lr.ph.i451 # =>This Inner Loop Header: Depth=1 @@ -25139,13 +26389,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.598: # %middle.block3369 beq $a2, $a3, .LBB9_601 .LBB9_599: # %.lr.ph.i462.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_600: # %.lr.ph.i462 # =>This Inner Loop Header: Depth=1 @@ -25229,13 +26485,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.606: # %middle.block3386 beq $a0, $a2, .LBB9_1187 .LBB9_607: # %.lr.ph.i473.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a3, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a0 alsl.d $a1, $a0, $a1, 4 addi.d $a1, $a1, 8 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa3, $a3 .p2align 4, , 16 .LBB9_608: # %.lr.ph.i473 # =>This Inner Loop Header: Depth=1 @@ -25313,12 +26575,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.611: # %middle.block1621 beq $a1, $a2, .LBB9_614 .LBB9_612: # %.lr.ph.i1293.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_613: # %.lr.ph.i1293 # =>This Inner Loop Header: Depth=1 @@ -25407,12 +26675,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.619: # %middle.block1637 beq $a1, $a2, .LBB9_622 .LBB9_620: # %.lr.ph.i1301.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_621: # %.lr.ph.i1301 # =>This Inner Loop Header: Depth=1 @@ -25501,12 +26775,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.627: # %middle.block1653 beq $a1, $a2, .LBB9_630 .LBB9_628: # %.lr.ph.i1309.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_629: # %.lr.ph.i1309 # =>This Inner Loop Header: Depth=1 @@ -25595,12 +26875,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.635: # %middle.block1669 beq $a1, $a2, .LBB9_638 .LBB9_636: # %.lr.ph.i1317.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_637: # %.lr.ph.i1317 # =>This Inner Loop Header: Depth=1 @@ -25689,12 +26975,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.643: # %middle.block1685 beq $a1, $a2, .LBB9_646 .LBB9_644: # %.lr.ph.i1325.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_645: # %.lr.ph.i1325 # =>This Inner Loop Header: Depth=1 @@ -25783,12 +27075,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.651: # %middle.block1701 beq $a0, $a1, .LBB9_1187 .LBB9_652: # %.lr.ph.i1333.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_653: # %.lr.ph.i1333 # =>This Inner Loop Header: Depth=1 @@ -25863,12 +27161,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.656: # %middle.block2821 beq $a1, $a2, .LBB9_659 .LBB9_657: # %.lr.ph.i707.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_658: # %.lr.ph.i707 # =>This Inner Loop Header: Depth=1 @@ -25957,12 +27261,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.664: # %middle.block2837 beq $a1, $a2, .LBB9_667 .LBB9_665: # %.lr.ph.i715.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_666: # %.lr.ph.i715 # =>This Inner Loop Header: Depth=1 @@ -26051,12 +27361,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.672: # %middle.block2853 beq $a0, $a1, .LBB9_1187 .LBB9_673: # %.lr.ph.i723.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_674: # %.lr.ph.i723 # =>This Inner Loop Header: Depth=1 @@ -26131,12 +27447,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.677: # %middle.block2597 beq $a1, $a2, .LBB9_680 .LBB9_678: # %.lr.ph.i827.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_679: # %.lr.ph.i827 # =>This Inner Loop Header: Depth=1 @@ -26225,12 +27547,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.685: # %middle.block2613 beq $a0, $a1, .LBB9_1187 .LBB9_686: # %.lr.ph.i835.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_687: # %.lr.ph.i835 # =>This Inner Loop Header: Depth=1 @@ -26305,12 +27633,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.690: # %middle.block3482 beq $a1, $a2, .LBB9_693 .LBB9_691: # %.lr.ph.i359.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_692: # %.lr.ph.i359 # =>This Inner Loop Header: Depth=1 @@ -26399,12 +27733,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.698: # %middle.block3498 beq $a1, $a2, .LBB9_701 .LBB9_699: # %.lr.ph.i367.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_700: # %.lr.ph.i367 # =>This Inner Loop Header: Depth=1 @@ -26493,12 +27833,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.706: # %middle.block3514 beq $a1, $a2, .LBB9_709 .LBB9_707: # %.lr.ph.i375.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_708: # %.lr.ph.i375 # =>This Inner Loop Header: Depth=1 @@ -26587,12 +27933,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.714: # %middle.block3530 beq $a0, $a1, .LBB9_1187 .LBB9_715: # %.lr.ph.i383.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_716: # %.lr.ph.i383 # =>This Inner Loop Header: Depth=1 @@ -26667,12 +28019,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.719: # %middle.block2565 beq $a1, $a2, .LBB9_722 .LBB9_720: # %.lr.ph.i843.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_721: # %.lr.ph.i843 # =>This Inner Loop Header: Depth=1 @@ -26761,12 +28119,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.727: # %middle.block2581 beq $a0, $a1, .LBB9_1187 .LBB9_728: # %.lr.ph.i851.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_729: # %.lr.ph.i851 # =>This Inner Loop Header: Depth=1 @@ -26841,12 +28205,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.732: # %middle.block2965 beq $a1, $a2, .LBB9_735 .LBB9_733: # %.lr.ph.i627.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_734: # %.lr.ph.i627 # =>This Inner Loop Header: Depth=1 @@ -26935,12 +28305,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.740: # %middle.block2981 beq $a1, $a2, .LBB9_743 .LBB9_741: # %.lr.ph.i635.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_742: # %.lr.ph.i635 # =>This Inner Loop Header: Depth=1 @@ -27029,12 +28405,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.748: # %middle.block2997 beq $a1, $a2, .LBB9_751 .LBB9_749: # %.lr.ph.i643.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_750: # %.lr.ph.i643 # =>This Inner Loop Header: Depth=1 @@ -27123,12 +28505,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.756: # %middle.block3013 beq $a0, $a1, .LBB9_1187 .LBB9_757: # %.lr.ph.i651.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_758: # %.lr.ph.i651 # =>This Inner Loop Header: Depth=1 @@ -27203,12 +28591,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.761: # %middle.block3402 beq $a1, $a2, .LBB9_764 .LBB9_762: # %.lr.ph.i391.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_763: # %.lr.ph.i391 # =>This Inner Loop Header: Depth=1 @@ -27297,12 +28691,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.769: # %middle.block3418 beq $a1, $a2, .LBB9_772 .LBB9_770: # %.lr.ph.i399.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_771: # %.lr.ph.i399 # =>This Inner Loop Header: Depth=1 @@ -27391,12 +28791,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.777: # %middle.block3434 beq $a1, $a2, .LBB9_780 .LBB9_778: # %.lr.ph.i407.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_779: # %.lr.ph.i407 # =>This Inner Loop Header: Depth=1 @@ -27485,12 +28891,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.785: # %middle.block3450 beq $a1, $a2, .LBB9_788 .LBB9_786: # %.lr.ph.i415.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_787: # %.lr.ph.i415 # =>This Inner Loop Header: Depth=1 @@ -27579,12 +28991,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.793: # %middle.block3466 beq $a0, $a1, .LBB9_1187 .LBB9_794: # %.lr.ph.i423.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_795: # %.lr.ph.i423 # =>This Inner Loop Header: Depth=1 @@ -27659,12 +29077,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.798: # %middle.block2741 beq $a1, $a2, .LBB9_801 .LBB9_799: # %.lr.ph.i731.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_800: # %.lr.ph.i731 # =>This Inner Loop Header: Depth=1 @@ -27753,12 +29177,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.806: # %middle.block2757 beq $a1, $a2, .LBB9_809 .LBB9_807: # %.lr.ph.i739.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_808: # %.lr.ph.i739 # =>This Inner Loop Header: Depth=1 @@ -27847,12 +29277,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.814: # %middle.block2773 beq $a1, $a2, .LBB9_817 .LBB9_815: # %.lr.ph.i747.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_816: # %.lr.ph.i747 # =>This Inner Loop Header: Depth=1 @@ -27941,12 +29377,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.822: # %middle.block2789 beq $a1, $a2, .LBB9_825 .LBB9_823: # %.lr.ph.i755.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_824: # %.lr.ph.i755 # =>This Inner Loop Header: Depth=1 @@ -28035,12 +29477,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.830: # %middle.block2805 beq $a0, $a1, .LBB9_1187 .LBB9_831: # %.lr.ph.i763.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_832: # %.lr.ph.i763 # =>This Inner Loop Header: Depth=1 @@ -28115,12 +29563,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.835: # %middle.block3125 beq $a1, $a2, .LBB9_838 .LBB9_836: # %.lr.ph.i538.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_837: # %.lr.ph.i538 # =>This Inner Loop Header: Depth=1 @@ -28209,12 +29663,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.843: # %middle.block3141 beq $a1, $a2, .LBB9_846 .LBB9_844: # %.lr.ph.i546.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_845: # %.lr.ph.i546 # =>This Inner Loop Header: Depth=1 @@ -28303,12 +29763,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.851: # %middle.block3157 beq $a1, $a2, .LBB9_854 .LBB9_852: # %.lr.ph.i554.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_853: # %.lr.ph.i554 # =>This Inner Loop Header: Depth=1 @@ -28397,12 +29863,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.859: # %middle.block3173 beq $a1, $a2, .LBB9_862 .LBB9_860: # %.lr.ph.i562.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_861: # %.lr.ph.i562 # =>This Inner Loop Header: Depth=1 @@ -28491,12 +29963,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.867: # %middle.block3189 beq $a0, $a1, .LBB9_1187 .LBB9_868: # %.lr.ph.i570.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_869: # %.lr.ph.i570 # =>This Inner Loop Header: Depth=1 @@ -28571,12 +30049,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.872: # %middle.block3285 beq $a1, $a2, .LBB9_875 .LBB9_873: # %.lr.ph.i482.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_874: # %.lr.ph.i482 # =>This Inner Loop Header: Depth=1 @@ -28665,12 +30149,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.880: # %middle.block3301 beq $a0, $a1, .LBB9_1187 .LBB9_881: # %.lr.ph.i490.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_882: # %.lr.ph.i490 # =>This Inner Loop Header: Depth=1 @@ -28745,12 +30235,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.885: # %middle.block1717 beq $a1, $a2, .LBB9_888 .LBB9_886: # %.lr.ph.i1253.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_887: # %.lr.ph.i1253 # =>This Inner Loop Header: Depth=1 @@ -28839,12 +30335,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.893: # %middle.block1733 beq $a1, $a2, .LBB9_896 .LBB9_894: # %.lr.ph.i1261.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_895: # %.lr.ph.i1261 # =>This Inner Loop Header: Depth=1 @@ -28933,12 +30435,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.901: # %middle.block1749 beq $a1, $a2, .LBB9_904 .LBB9_902: # %.lr.ph.i1269.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_903: # %.lr.ph.i1269 # =>This Inner Loop Header: Depth=1 @@ -29027,12 +30535,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.909: # %middle.block1765 beq $a1, $a2, .LBB9_912 .LBB9_910: # %.lr.ph.i1277.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_911: # %.lr.ph.i1277 # =>This Inner Loop Header: Depth=1 @@ -29121,12 +30635,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.917: # %middle.block1781 beq $a0, $a1, .LBB9_1187 .LBB9_918: # %.lr.ph.i1285.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_919: # %.lr.ph.i1285 # =>This Inner Loop Header: Depth=1 @@ -29147,41 +30667,39 @@ _Z8loopInitj: # @_Z8loopInitj bstrpick.d $a0, $a1, 30, 2 slli.d $a0, $a0, 2 vreplvei.d $vr1, $vr0, 0 - addi.d $a3, $a2, 16 - ori $a4, $zero, 0 - lu32i.d $a4, 1 - vreplgr2vr.d $vr2, $a4 - lu12i.w $a4, -419431 - ori $a4, $a4, 2458 - lu32i.d $a4, 104857 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr3, $a4 - lu12i.w $a4, -307024 - ori $a4, $a4, 3880 - lu32i.d $a4, 129446 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr4, $a4 - move $a4, $a0 + addi.d $a5, $a3, 16 + ori $a6, $zero, 0 + lu32i.d $a6, 1 + vreplgr2vr.d $vr2, $a6 + ori $a6, $a4, 2458 + lu32i.d $a6, 104857 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr3, $a6 + ori $a6, $a2, 3880 + lu32i.d $a6, 129446 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr4, $a6 + move $a6, $a0 .p2align 4, , 16 .LBB9_921: # %vector.body # =>This Inner Loop Header: Depth=1 vaddi.wu $vr5, $vr2, 2 - vpickve2gr.w $a5, $vr2, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr2, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr2, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa7, $a5 + vpickve2gr.w $a7, $vr2, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa7, $a7 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr6, 16 - vpickve2gr.w $a5, $vr5, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr5, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr5, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa5, $a5 + vpickve2gr.w $a7, $vr5, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa5, $a7 ffint.d.l $fa5, $fa5 vextrins.d $vr5, $vr6, 16 vfadd.d $vr6, $vr7, $vr3 @@ -29192,34 +30710,38 @@ _Z8loopInitj: # @_Z8loopInitj vfadd.d $vr5, $vr5, $vr4 vfdiv.d $vr6, $vr6, $vr7 vfdiv.d $vr5, $vr8, $vr5 - vst $vr6, $a3, -16 - vst $vr5, $a3, 0 + vst $vr6, $a5, -16 + vst $vr5, $a5, 0 vaddi.wu $vr2, $vr2, 4 - addi.d $a4, $a4, -4 - addi.d $a3, $a3, 32 - bnez $a4, .LBB9_921 + addi.d $a6, $a6, -4 + addi.d $a5, $a5, 32 + bnez $a6, .LBB9_921 # %bb.922: # %middle.block beq $a0, $a1, .LBB9_1187 .LBB9_923: # %.lr.ph.i1341.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $a2, 3 + alsl.d $a3, $a0, $a3, 3 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + ori $a2, $a2, 3880 + lu32i.d $a2, 129446 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa2, $a2 .p2align 4, , 16 .LBB9_924: # %.lr.ph.i1341 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a0, 31, 0 - movgr2fr.d $fa3, $a3 + bstrpick.d $a2, $a0, 31, 0 + movgr2fr.d $fa3, $a2 ffint.d.l $fa3, $fa3 fadd.d $fa4, $fa3, $fa1 fmul.d $fa4, $fa0, $fa4 fadd.d $fa3, $fa3, $fa2 fdiv.d $fa3, $fa4, $fa3 - fst.d $fa3, $a2, 0 + fst.d $fa3, $a3, 0 addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + addi.d $a3, $a3, 8 addi.w $a0, $a0, 1 bnez $a1, .LBB9_924 b .LBB9_1187 @@ -29281,12 +30803,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.927: # %middle.block2533 beq $a1, $a2, .LBB9_930 .LBB9_928: # %.lr.ph.i859.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_929: # %.lr.ph.i859 # =>This Inner Loop Header: Depth=1 @@ -29375,12 +30903,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.935: # %middle.block2549 beq $a0, $a1, .LBB9_1187 .LBB9_936: # %.lr.ph.i867.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_937: # %.lr.ph.i867 # =>This Inner Loop Header: Depth=1 @@ -29455,12 +30989,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.940: # %middle.block2629 beq $a1, $a2, .LBB9_943 .LBB9_941: # %.lr.ph.i771.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_942: # %.lr.ph.i771 # =>This Inner Loop Header: Depth=1 @@ -29549,12 +31089,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.948: # %middle.block2645 beq $a1, $a2, .LBB9_951 .LBB9_949: # %.lr.ph.i779.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_950: # %.lr.ph.i779 # =>This Inner Loop Header: Depth=1 @@ -29643,12 +31189,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.956: # %middle.block2661 beq $a1, $a2, .LBB9_959 .LBB9_957: # %.lr.ph.i787.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_958: # %.lr.ph.i787 # =>This Inner Loop Header: Depth=1 @@ -29737,12 +31289,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.964: # %middle.block2677 beq $a1, $a2, .LBB9_967 .LBB9_965: # %.lr.ph.i795.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_966: # %.lr.ph.i795 # =>This Inner Loop Header: Depth=1 @@ -29831,12 +31389,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.972: # %middle.block2693 beq $a1, $a2, .LBB9_975 .LBB9_973: # %.lr.ph.i803.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_974: # %.lr.ph.i803 # =>This Inner Loop Header: Depth=1 @@ -29925,12 +31489,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.980: # %middle.block2709 beq $a1, $a2, .LBB9_983 .LBB9_981: # %.lr.ph.i811.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_982: # %.lr.ph.i811 # =>This Inner Loop Header: Depth=1 @@ -30019,12 +31589,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.988: # %middle.block2725 beq $a0, $a1, .LBB9_1187 .LBB9_989: # %.lr.ph.i819.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_990: # %.lr.ph.i819 # =>This Inner Loop Header: Depth=1 @@ -30099,12 +31675,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.993: # %middle.block2501 beq $a1, $a2, .LBB9_996 .LBB9_994: # %.lr.ph.i875.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_995: # %.lr.ph.i875 # =>This Inner Loop Header: Depth=1 @@ -30193,12 +31775,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1001: # %middle.block2517 beq $a0, $a1, .LBB9_1187 .LBB9_1002: # %.lr.ph.i883.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1003: # %.lr.ph.i883 # =>This Inner Loop Header: Depth=1 @@ -30273,12 +31861,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1006: # %middle.block1845 beq $a1, $a2, .LBB9_1009 .LBB9_1007: # %.lr.ph.i1149.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1008: # %.lr.ph.i1149 # =>This Inner Loop Header: Depth=1 @@ -30367,12 +31961,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1014: # %middle.block1861 beq $a1, $a2, .LBB9_1017 .LBB9_1015: # %.lr.ph.i1157.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1016: # %.lr.ph.i1157 # =>This Inner Loop Header: Depth=1 @@ -30461,12 +32061,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1022: # %middle.block1877 beq $a1, $a2, .LBB9_1025 .LBB9_1023: # %.lr.ph.i1165.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1024: # %.lr.ph.i1165 # =>This Inner Loop Header: Depth=1 @@ -30555,12 +32161,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1030: # %middle.block1893 beq $a1, $a2, .LBB9_1033 .LBB9_1031: # %.lr.ph.i1173.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1032: # %.lr.ph.i1173 # =>This Inner Loop Header: Depth=1 @@ -30649,12 +32261,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1038: # %middle.block1909 beq $a1, $a2, .LBB9_1041 .LBB9_1039: # %.lr.ph.i1181.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1040: # %.lr.ph.i1181 # =>This Inner Loop Header: Depth=1 @@ -30743,12 +32361,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1046: # %middle.block1925 beq $a1, $a2, .LBB9_1049 .LBB9_1047: # %.lr.ph.i1189.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1048: # %.lr.ph.i1189 # =>This Inner Loop Header: Depth=1 @@ -30837,12 +32461,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1054: # %middle.block1941 beq $a1, $a2, .LBB9_1057 .LBB9_1055: # %.lr.ph.i1197.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1056: # %.lr.ph.i1197 # =>This Inner Loop Header: Depth=1 @@ -30931,12 +32561,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1062: # %middle.block1957 beq $a1, $a2, .LBB9_1065 .LBB9_1063: # %.lr.ph.i1205.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1064: # %.lr.ph.i1205 # =>This Inner Loop Header: Depth=1 @@ -31025,12 +32661,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1070: # %middle.block1973 beq $a1, $a2, .LBB9_1073 .LBB9_1071: # %.lr.ph.i1213.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1072: # %.lr.ph.i1213 # =>This Inner Loop Header: Depth=1 @@ -31119,12 +32761,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1078: # %middle.block1989 beq $a0, $a1, .LBB9_1187 .LBB9_1079: # %.lr.ph.i1221.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1080: # %.lr.ph.i1221 # =>This Inner Loop Header: Depth=1 @@ -31199,12 +32847,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1083: # %middle.block1797 beq $a1, $a2, .LBB9_1086 .LBB9_1084: # %.lr.ph.i1229.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1085: # %.lr.ph.i1229 # =>This Inner Loop Header: Depth=1 @@ -31293,12 +32947,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1091: # %middle.block1813 beq $a1, $a2, .LBB9_1094 .LBB9_1092: # %.lr.ph.i1237.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1093: # %.lr.ph.i1237 # =>This Inner Loop Header: Depth=1 @@ -31387,12 +33047,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1099: # %middle.block1829 beq $a0, $a1, .LBB9_1187 .LBB9_1100: # %.lr.ph.i1245.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1101: # %.lr.ph.i1245 # =>This Inner Loop Header: Depth=1 @@ -31467,12 +33133,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1104: # %middle.block2069 beq $a1, $a2, .LBB9_1107 .LBB9_1105: # %.lr.ph.i1029.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1106: # %.lr.ph.i1029 # =>This Inner Loop Header: Depth=1 @@ -31561,12 +33233,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1112: # %middle.block2085 beq $a1, $a2, .LBB9_1115 .LBB9_1113: # %.lr.ph.i1037.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1114: # %.lr.ph.i1037 # =>This Inner Loop Header: Depth=1 @@ -31655,12 +33333,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1120: # %middle.block2101 beq $a1, $a2, .LBB9_1123 .LBB9_1121: # %.lr.ph.i1045.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1122: # %.lr.ph.i1045 # =>This Inner Loop Header: Depth=1 @@ -31749,12 +33433,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1128: # %middle.block2117 beq $a1, $a2, .LBB9_1131 .LBB9_1129: # %.lr.ph.i1053.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1130: # %.lr.ph.i1053 # =>This Inner Loop Header: Depth=1 @@ -31843,12 +33533,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1136: # %middle.block2133 beq $a1, $a2, .LBB9_1139 .LBB9_1137: # %.lr.ph.i1061.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1138: # %.lr.ph.i1061 # =>This Inner Loop Header: Depth=1 @@ -31937,12 +33633,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1144: # %middle.block2149 beq $a1, $a2, .LBB9_1147 .LBB9_1145: # %.lr.ph.i1069.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1146: # %.lr.ph.i1069 # =>This Inner Loop Header: Depth=1 @@ -32031,12 +33733,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1152: # %middle.block2165 beq $a1, $a2, .LBB9_1155 .LBB9_1153: # %.lr.ph.i1077.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1154: # %.lr.ph.i1077 # =>This Inner Loop Header: Depth=1 @@ -32125,12 +33833,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1160: # %middle.block2181 beq $a1, $a2, .LBB9_1163 .LBB9_1161: # %.lr.ph.i1085.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1162: # %.lr.ph.i1085 # =>This Inner Loop Header: Depth=1 @@ -32219,12 +33933,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1168: # %middle.block2197 beq $a1, $a2, .LBB9_1171 .LBB9_1169: # %.lr.ph.i1093.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1170: # %.lr.ph.i1093 # =>This Inner Loop Header: Depth=1 @@ -32313,12 +34033,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1176: # %middle.block2213 beq $a1, $a2, .LBB9_1179 .LBB9_1177: # %.lr.ph.i1101.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1178: # %.lr.ph.i1101 # =>This Inner Loop Header: Depth=1 @@ -32407,12 +34133,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1184: # %middle.block2229 beq $a0, $a1, .LBB9_1187 .LBB9_1185: # %.lr.ph.i1109.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1186: # %.lr.ph.i1109 # =>This Inner Loop Header: Depth=1 diff --git a/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/runReferenceLoops.s b/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/runReferenceLoops.s index b31732f8..26d4daf4 100644 --- a/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/runReferenceLoops.s +++ b/results/MicroBenchmarks/LCALS/SubsetCLambdaLoops/CMakeFiles/lcalsCLambda.dir/__/runReferenceLoops.s @@ -903,14 +903,8 @@ _ZN8LoopStatD2Ev: # @_ZN8LoopStatD2Ev .size _ZN8LoopStatD2Ev, .Lfunc_end3-_ZN8LoopStatD2Ev .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z25computeReferenceLoopTimesv -.LCPI4_0: - .dword 0x3f5426fe718a86d7 # double 0.00123 -.LCPI4_1: - .dword 0xbf5426fe718a86d7 # double -0.00123 .text - .globl _Z25computeReferenceLoopTimesv + .globl _Z25computeReferenceLoopTimesv # -- Begin function _Z25computeReferenceLoopTimesv .p2align 5 .type _Z25computeReferenceLoopTimesv,@function _Z25computeReferenceLoopTimesv: # @_Z25computeReferenceLoopTimesv @@ -1649,13 +1643,16 @@ _Z25computeReferenceLoopTimesv: # @_Z25computeReferenceLoopTimesv pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 st.d $a0, $sp, 640 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI4_0) - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI4_1) ori $a0, $zero, 1 st.b $a0, $sp, 648 + lu12i.w $a0, 465064 + ori $a0, $a0, 1751 + lu32i.d $a0, 272126 + lu52i.d $a1, $a0, 1013 + movgr2fr.d $fs0, $a1 fadd.d $fa0, $fs2, $fs0 + lu52i.d $a0, $a0, -1035 + movgr2fr.d $fs1, $a0 fadd.d $fa1, $fs2, $fs1 fdiv.d $fa0, $fa0, $fa1 fst.d $fa0, $fp, 384 diff --git a/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/RawSubsetCbenchmarks.s b/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/RawSubsetCbenchmarks.s index 2865e3bb..62479529 100644 --- a/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/RawSubsetCbenchmarks.s +++ b/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/RawSubsetCbenchmarks.s @@ -2070,31 +2070,24 @@ _ZL13BM_PIC_1D_RAWRN9benchmark5StateE: # @_ZL13BM_PIC_1D_RAWRN9benchmark5StateE .size _ZL13BM_PIC_1D_RAWRN9benchmark5StateE, .Lfunc_end12-_ZL13BM_PIC_1D_RAWRN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL15BM_HYDRO_2D_RAWRN9benchmark5StateE -.LCPI13_0: - .dword 0x3f70cb295e9e1b09 # double 0.0041000000000000003 -.LCPI13_1: - .dword 0x3f6e4f765fd8adac # double 0.0037000000000000002 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL15BM_HYDRO_2D_RAWRN9benchmark5StateE .type _ZL15BM_HYDRO_2D_RAWRN9benchmark5StateE,@function _ZL15BM_HYDRO_2D_RAWRN9benchmark5StateE: # @_ZL15BM_HYDRO_2D_RAWRN9benchmark5StateE .cfi_startproc # %bb.0: # %_ZN9benchmark5State13StateIteratorC2EPS0_.exit - addi.d $sp, $sp, -304 - .cfi_def_cfa_offset 304 - st.d $ra, $sp, 296 # 8-byte Folded Spill - st.d $fp, $sp, 288 # 8-byte Folded Spill - st.d $s0, $sp, 280 # 8-byte Folded Spill - st.d $s1, $sp, 272 # 8-byte Folded Spill - st.d $s2, $sp, 264 # 8-byte Folded Spill - st.d $s3, $sp, 256 # 8-byte Folded Spill - st.d $s4, $sp, 248 # 8-byte Folded Spill - st.d $s5, $sp, 240 # 8-byte Folded Spill - st.d $s6, $sp, 232 # 8-byte Folded Spill - st.d $s7, $sp, 224 # 8-byte Folded Spill - st.d $s8, $sp, 216 # 8-byte Folded Spill + addi.d $sp, $sp, -320 + .cfi_def_cfa_offset 320 + st.d $ra, $sp, 312 # 8-byte Folded Spill + st.d $fp, $sp, 304 # 8-byte Folded Spill + st.d $s0, $sp, 296 # 8-byte Folded Spill + st.d $s1, $sp, 288 # 8-byte Folded Spill + st.d $s2, $sp, 280 # 8-byte Folded Spill + st.d $s3, $sp, 272 # 8-byte Folded Spill + st.d $s4, $sp, 264 # 8-byte Folded Spill + st.d $s5, $sp, 256 # 8-byte Folded Spill + st.d $s6, $sp, 248 # 8-byte Folded Spill + st.d $s7, $sp, 240 # 8-byte Folded Spill + st.d $s8, $sp, 232 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -2116,20 +2109,20 @@ _ZL15BM_HYDRO_2D_RAWRN9benchmark5StateE: # @_ZL15BM_HYDRO_2D_RAWRN9benchmark5Sta ld.d $s7, $fp, 264 ld.d $s8, $fp, 272 ld.d $a0, $fp, 280 - st.d $a0, $sp, 48 # 8-byte Folded Spill + st.d $a0, $sp, 56 # 8-byte Folded Spill ld.d $a0, $fp, 288 - st.d $a0, $sp, 112 # 8-byte Folded Spill + st.d $a0, $sp, 128 # 8-byte Folded Spill ld.d $a0, $fp, 296 - st.d $a0, $sp, 104 # 8-byte Folded Spill + st.d $a0, $sp, 120 # 8-byte Folded Spill ld.d $a0, $fp, 304 - st.d $a0, $sp, 168 # 8-byte Folded Spill + st.d $a0, $sp, 184 # 8-byte Folded Spill ld.d $a0, $fp, 312 - st.d $a0, $sp, 128 # 8-byte Folded Spill + st.d $a0, $sp, 144 # 8-byte Folded Spill ld.d $a0, $fp, 320 - st.d $a0, $sp, 120 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill ld.d $a0, $s1, 32 ld.d $a1, $fp, 328 - st.d $a1, $sp, 136 # 8-byte Folded Spill + st.d $a1, $sp, 152 # 8-byte Folded Spill ld.d $s2, $fp, 336 ld.d $s3, $fp, 344 ld.d $fp, $a0, 0 @@ -2145,36 +2138,38 @@ _ZL15BM_HYDRO_2D_RAWRN9benchmark5StateE: # @_ZL15BM_HYDRO_2D_RAWRN9benchmark5Sta # %bb.2: # %.preheader203.lr.ph bstrpick.d $a0, $fp, 30, 0 addi.d $a1, $a0, -1 - st.d $a0, $sp, 152 # 8-byte Folded Spill + st.d $a0, $sp, 168 # 8-byte Folded Spill slli.d $a0, $a0, 3 addi.d $a2, $a0, 8 - st.d $a2, $sp, 80 # 8-byte Folded Spill - st.d $a0, $sp, 208 # 8-byte Folded Spill + st.d $a2, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 224 # 8-byte Folded Spill addi.d $a0, $a0, -8 - st.d $a0, $sp, 144 # 8-byte Folded Spill - move $t6, $a1 - bstrins.d $t6, $zero, 0, 0 - st.d $a1, $sp, 176 # 8-byte Folded Spill + st.d $a0, $sp, 160 # 8-byte Folded Spill + move $t5, $a1 + bstrins.d $t5, $zero, 0, 0 + st.d $a1, $sp, 192 # 8-byte Folded Spill ori $a0, $a1, 1 - st.d $a0, $sp, 72 # 8-byte Folded Spill - addi.w $t7, $fp, 0 - ori $t8, $zero, 2 - lu12i.w $a0, 392586 - ori $a0, $a0, 3500 - lu32i.d $a0, -110730 - lu52i.d $a0, $a0, 1014 - vreplgr2vr.d $vr0, $a0 + st.d $a0, $sp, 88 # 8-byte Folded Spill + addi.w $t6, $fp, 0 + ori $t7, $zero, 2 lu12i.w $a0, 387553 ori $a0, $a0, 2825 lu32i.d $a0, 52009 lu52i.d $a0, $a0, 1015 - vreplgr2vr.d $vr1, $a0 - st.d $s7, $sp, 96 # 8-byte Folded Spill - st.d $s8, $sp, 88 # 8-byte Folded Spill - st.d $s2, $sp, 40 # 8-byte Folded Spill - st.d $s3, $sp, 32 # 8-byte Folded Spill - st.d $t6, $sp, 64 # 8-byte Folded Spill - st.d $t7, $sp, 24 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill + movgr2fr.d $fa0, $a0 + lu12i.w $a0, 392586 + ori $a0, $a0, 3500 + lu32i.d $a0, -110730 + lu52i.d $t8, $a0, 1014 + movgr2fr.d $fa1, $t8 + st.d $s7, $sp, 112 # 8-byte Folded Spill + st.d $s8, $sp, 104 # 8-byte Folded Spill + st.d $s2, $sp, 48 # 8-byte Folded Spill + st.d $s3, $sp, 40 # 8-byte Folded Spill + st.d $t5, $sp, 80 # 8-byte Folded Spill + st.d $t6, $sp, 32 # 8-byte Folded Spill + st.d $t8, $sp, 24 # 8-byte Folded Spill b .LBB13_4 .p2align 4, , 16 .LBB13_3: # %.split.us @@ -2199,22 +2194,22 @@ _ZL15BM_HYDRO_2D_RAWRN9benchmark5StateE: # @_ZL15BM_HYDRO_2D_RAWRN9benchmark5Sta # Child Loop BB13_69 Depth 2 # Child Loop BB13_131 Depth 2 # Child Loop BB13_72 Depth 2 - blt $t7, $t8, .LBB13_3 + blt $t6, $t7, .LBB13_3 # %bb.5: # %.preheader200.us.preheader # in Loop: Header=BB13_4 Depth=1 - st.d $s4, $sp, 56 # 8-byte Folded Spill - ld.d $a0, $sp, 112 # 8-byte Folded Reload + st.d $s4, $sp, 64 # 8-byte Folded Spill + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $t8, $a0, 8 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $t7, $a0, 8 - ld.d $ra, $sp, 48 # 8-byte Folded Reload + ld.d $ra, $sp, 56 # 8-byte Folded Reload ld.d $a3, $ra, 8 ori $a6, $zero, 1 b .LBB13_7 .p2align 4, , 16 .LBB13_6: # %._crit_edge.us # in Loop: Header=BB13_7 Depth=2 - move $a3, $t3 + move $a3, $t1 move $t7, $t0 move $t8, $a7 ori $a0, $zero, 6 @@ -2224,31 +2219,31 @@ _ZL15BM_HYDRO_2D_RAWRN9benchmark5StateE: # @_ZL15BM_HYDRO_2D_RAWRN9benchmark5Sta # => This Loop Header: Depth=2 # Child Loop BB13_28 Depth 3 # Child Loop BB13_9 Depth 3 - ld.d $a5, $sp, 168 # 8-byte Folded Reload + ld.d $a5, $sp, 184 # 8-byte Folded Reload alsl.d $a0, $a6, $a5, 3 slli.d $a1, $a6, 3 addi.d $a6, $a6, 1 slli.d $a2, $a6, 3 - ld.d $a4, $sp, 112 # 8-byte Folded Reload + ld.d $a4, $sp, 128 # 8-byte Folded Reload ldx.d $a7, $a4, $a2 - ld.d $a4, $sp, 104 # 8-byte Folded Reload + ld.d $a4, $sp, 120 # 8-byte Folded Reload ldx.d $t0, $a4, $a2 ldx.d $s0, $a5, $a1 - ldx.d $t3, $ra, $a2 - ldx.d $t4, $s7, $a1 + ldx.d $t1, $ra, $a2 + ldx.d $t2, $s7, $a1 ld.d $t5, $a0, -8 ldx.d $t6, $s8, $a1 ori $s5, $zero, 1 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload ori $a1, $zero, 4 bgeu $a0, $a1, .LBB13_10 .LBB13_8: # %scalar.ph607.preheader # in Loop: Header=BB13_7 Depth=2 - ld.d $a0, $sp, 152 # 8-byte Folded Reload - sub.d $a4, $a0, $s5 - slli.d $a5, $s5, 3 - addi.d $t1, $t3, -8 - addi.d $t2, $s0, -8 + ld.d $a0, $sp, 168 # 8-byte Folded Reload + sub.d $a0, $a0, $s5 + slli.d $a4, $s5, 3 + addi.d $a5, $t1, -8 + addi.d $t4, $s0, -8 addi.d $s0, $a7, -8 addi.d $s3, $t0, -8 addi.d $t8, $t8, -8 @@ -2258,46 +2253,46 @@ _ZL15BM_HYDRO_2D_RAWRN9benchmark5StateE: # @_ZL15BM_HYDRO_2D_RAWRN9benchmark5Sta # Parent Loop BB13_4 Depth=1 # Parent Loop BB13_7 Depth=2 # => This Inner Loop Header: Depth=3 - fldx.d $fa2, $s0, $a5 - fldx.d $fa3, $s3, $a5 - fldx.d $fa4, $t8, $a5 + fldx.d $fa2, $s0, $a4 + fldx.d $fa3, $s3, $a4 + fldx.d $fa4, $t8, $a4 fadd.d $fa2, $fa2, $fa3 - add.d $a0, $t8, $a5 + add.d $a1, $t8, $a4 fsub.d $fa2, $fa2, $fa4 - fldx.d $fa3, $t7, $a5 - add.d $a1, $t2, $a5 - fld.d $fa4, $a1, 8 - fldx.d $fa5, $t2, $a5 - add.d $a2, $a3, $a5 - fld.d $fa6, $a2, -8 - fldx.d $fa7, $t1, $a5 + fldx.d $fa3, $t7, $a4 + add.d $a2, $t4, $a4 + fld.d $fa4, $a2, 8 + fldx.d $fa5, $t4, $a4 + add.d $t3, $a3, $a4 + fld.d $fa6, $t3, -8 + fldx.d $fa7, $a5, $a4 fsub.d $fa2, $fa2, $fa3 fadd.d $fa3, $fa4, $fa5 fmul.d $fa2, $fa2, $fa3 fadd.d $fa3, $fa6, $fa7 fdiv.d $fa2, $fa2, $fa3 - fstx.d $fa2, $t4, $a5 - fldx.d $fa2, $t8, $a5 - fldx.d $fa3, $t7, $a5 - fld.d $fa4, $a0, 8 - add.d $a0, $t7, $a5 + fstx.d $fa2, $t2, $a4 + fldx.d $fa2, $t8, $a4 + fldx.d $fa3, $t7, $a4 + fld.d $fa4, $a1, 8 + add.d $a1, $t7, $a4 fadd.d $fa2, $fa2, $fa3 fsub.d $fa2, $fa2, $fa4 - fld.d $fa3, $a0, 8 - fld.d $fa4, $a1, 8 - fldx.d $fa5, $t5, $a5 - fldx.d $fa6, $a3, $a5 - fld.d $fa7, $a2, -8 + fld.d $fa3, $a1, 8 + fld.d $fa4, $a2, 8 + fldx.d $fa5, $t5, $a4 + fldx.d $fa6, $a3, $a4 + fld.d $fa7, $t3, -8 fsub.d $fa2, $fa2, $fa3 fadd.d $fa3, $fa4, $fa5 fmul.d $fa2, $fa2, $fa3 fadd.d $fa3, $fa6, $fa7 fdiv.d $fa2, $fa2, $fa3 - fstx.d $fa2, $t6, $a5 - addi.d $a4, $a4, -1 - addi.d $t4, $t4, 8 - addi.d $t1, $t1, 8 + fstx.d $fa2, $t6, $a4 + addi.d $a0, $a0, -1 addi.d $t2, $t2, 8 + addi.d $a5, $a5, 8 + addi.d $t4, $t4, 8 addi.d $t5, $t5, 8 addi.d $a3, $a3, 8 addi.d $t6, $t6, 8 @@ -2305,232 +2300,232 @@ _ZL15BM_HYDRO_2D_RAWRN9benchmark5StateE: # @_ZL15BM_HYDRO_2D_RAWRN9benchmark5Sta addi.d $s3, $s3, 8 addi.d $t8, $t8, 8 addi.d $t7, $t7, 8 - bnez $a4, .LBB13_9 + bnez $a0, .LBB13_9 b .LBB13_6 .p2align 4, , 16 .LBB13_10: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - addi.d $t1, $t4, 8 - ld.d $a1, $sp, 208 # 8-byte Folded Reload - add.d $a0, $t4, $a1 - addi.d $t2, $t6, 8 - add.d $a4, $t6, $a1 - sltu $a1, $t1, $a4 - sltu $a2, $t2, $a0 - and $a1, $a1, $a2 + addi.d $s3, $t2, 8 + ld.d $a0, $sp, 224 # 8-byte Folded Reload + add.d $a1, $t2, $a0 + addi.d $s4, $t6, 8 + add.d $a0, $t6, $a0 + sltu $a2, $s3, $a0 + sltu $a4, $s4, $a1 + and $a2, $a2, $a4 ori $s5, $zero, 1 - bnez $a1, .LBB13_8 + bnez $a2, .LBB13_8 # %bb.11: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - ld.d $a1, $sp, 144 # 8-byte Folded Reload - add.d $a1, $a7, $a1 - sltu $a2, $t1, $a1 - sltu $a5, $a7, $a0 - and $a2, $a2, $a5 + ld.d $a2, $sp, 160 # 8-byte Folded Reload + add.d $a2, $a7, $a2 + sltu $a4, $s3, $a2 + sltu $a5, $a7, $a1 + and $a4, $a4, $a5 ori $s5, $zero, 1 - bnez $a2, .LBB13_8 + bnez $a4, .LBB13_8 # %bb.12: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - ld.d $a2, $sp, 144 # 8-byte Folded Reload - add.d $a2, $t0, $a2 - sltu $a5, $t1, $a2 - sltu $fp, $t0, $a0 - and $a5, $a5, $fp + ld.d $a4, $sp, 160 # 8-byte Folded Reload + add.d $a5, $t0, $a4 + sltu $a4, $s3, $a5 + sltu $t3, $t0, $a1 + and $a4, $a4, $t3 ori $s5, $zero, 1 - bnez $a5, .LBB13_8 + bnez $a4, .LBB13_8 # %bb.13: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - ld.d $a5, $sp, 208 # 8-byte Folded Reload - add.d $fp, $t8, $a5 - sltu $a5, $t1, $fp - sltu $s1, $t8, $a0 - and $a5, $a5, $s1 + ld.d $a4, $sp, 224 # 8-byte Folded Reload + add.d $t3, $t8, $a4 + sltu $a4, $s3, $t3 + sltu $t4, $t8, $a1 + and $a4, $a4, $t4 ori $s5, $zero, 1 - bnez $a5, .LBB13_8 + bnez $a4, .LBB13_8 # %bb.14: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - ld.d $a5, $sp, 208 # 8-byte Folded Reload - add.d $s1, $t7, $a5 - sltu $a5, $t1, $s1 - sltu $s2, $t7, $a0 - and $a5, $a5, $s2 + ld.d $a4, $sp, 224 # 8-byte Folded Reload + add.d $t4, $t7, $a4 + sltu $a4, $s3, $t4 + sltu $fp, $t7, $a1 + and $a4, $a4, $fp ori $s5, $zero, 1 - bnez $a5, .LBB13_8 + bnez $a4, .LBB13_8 # %bb.15: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - ld.d $a5, $sp, 208 # 8-byte Folded Reload - add.d $s2, $s0, $a5 - sltu $a5, $t1, $s2 - sltu $s3, $s0, $a0 - and $a5, $a5, $s3 + ld.d $a4, $sp, 224 # 8-byte Folded Reload + add.d $fp, $s0, $a4 + sltu $a4, $s3, $fp + sltu $s1, $s0, $a1 + and $a4, $a4, $s1 ori $s5, $zero, 1 - bnez $a5, .LBB13_8 + bnez $a4, .LBB13_8 # %bb.16: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - ld.d $a5, $sp, 208 # 8-byte Folded Reload - add.d $s3, $a3, $a5 - sltu $a5, $t1, $s3 - sltu $s4, $a3, $a0 - and $a5, $a5, $s4 + ld.d $a4, $sp, 224 # 8-byte Folded Reload + add.d $s1, $a3, $a4 + sltu $a4, $s3, $s1 + sltu $s2, $a3, $a1 + and $a4, $a4, $s2 ori $s5, $zero, 1 - bnez $a5, .LBB13_8 + bnez $a4, .LBB13_8 # %bb.17: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - ld.d $a5, $sp, 144 # 8-byte Folded Reload - add.d $s4, $t3, $a5 - sltu $a5, $t1, $s4 - sltu $s5, $t3, $a0 - and $a5, $a5, $s5 + ld.d $a4, $sp, 160 # 8-byte Folded Reload + add.d $s2, $t1, $a4 + sltu $a4, $s3, $s2 + sltu $s5, $t1, $a1 + and $a4, $a4, $s5 ori $s5, $zero, 1 - bnez $a5, .LBB13_8 + bnez $a4, .LBB13_8 # %bb.18: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - addi.d $a5, $t5, 8 - ld.d $s5, $sp, 208 # 8-byte Folded Reload + addi.d $a4, $t5, 8 + ld.d $s5, $sp, 224 # 8-byte Folded Reload add.d $s6, $t5, $s5 - sltu $s5, $t1, $s6 - sltu $a0, $a5, $a0 - and $a0, $s5, $a0 + sltu $s5, $s3, $s6 + sltu $a1, $a4, $a1 + and $a1, $s5, $a1 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.19: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $a1 - sltu $a1, $a7, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $a2 + sltu $a2, $a7, $a0 + and $a1, $a1, $a2 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.20: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $a2 - sltu $a1, $t0, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $a5 + sltu $a2, $t0, $a0 + and $a1, $a1, $a2 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.21: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $fp - sltu $a1, $t8, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $t3 + sltu $a2, $t8, $a0 + and $a1, $a1, $a2 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.22: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $s1 - sltu $a1, $t7, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $t4 + sltu $a2, $t7, $a0 + and $a1, $a1, $a2 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.23: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $s2 - sltu $a1, $s0, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $fp + sltu $a2, $s0, $a0 + and $a1, $a1, $a2 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.24: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $s3 - sltu $a1, $a3, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $s1 + sltu $a2, $a3, $a0 + and $a1, $a1, $a2 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.25: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $s4 - sltu $a1, $t3, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $s2 + sltu $a2, $t1, $a0 + and $a1, $a1, $a2 ori $s5, $zero, 1 - bnez $a0, .LBB13_8 + bnez $a1, .LBB13_8 # %bb.26: # %vector.memcheck526 # in Loop: Header=BB13_7 Depth=2 - sltu $a0, $t2, $s6 - sltu $a1, $a5, $a4 - and $a0, $a0, $a1 + sltu $a1, $s4, $s6 + sltu $a0, $a4, $a0 + and $a0, $a1, $a0 ori $s5, $zero, 1 bnez $a0, .LBB13_8 # %bb.27: # %vector.body612.preheader # in Loop: Header=BB13_7 Depth=2 - addi.d $a4, $s0, 8 - move $s3, $t7 - move $s4, $t8 - move $a0, $a7 + addi.d $a0, $s0, 8 + move $a5, $t7 + move $t4, $t8 + move $s1, $a7 move $s5, $t0 - move $s1, $a3 - ld.d $a1, $sp, 64 # 8-byte Folded Reload - move $fp, $a1 - move $s2, $t3 + move $fp, $a3 + ld.d $a2, $sp, 80 # 8-byte Folded Reload + move $a1, $a2 + move $s2, $t1 .p2align 4, , 16 .LBB13_28: # %vector.body612 # Parent Loop BB13_4 Depth=1 # Parent Loop BB13_7 Depth=2 # => This Inner Loop Header: Depth=3 - vld $vr2, $a0, 0 + vld $vr2, $s1, 0 vld $vr3, $s5, 0 - vld $vr4, $s4, 0 + vld $vr4, $t4, 0 vfadd.d $vr2, $vr2, $vr3 vfsub.d $vr2, $vr2, $vr4 - vld $vr3, $s3, 0 - vld $vr5, $a4, 0 - vld $vr6, $a4, -8 - vld $vr7, $s1, 0 + vld $vr3, $a5, 0 + vld $vr5, $a0, 0 + vld $vr6, $a0, -8 + vld $vr7, $fp, 0 vld $vr8, $s2, 0 vfsub.d $vr2, $vr2, $vr3 vfadd.d $vr6, $vr5, $vr6 vfmul.d $vr2, $vr2, $vr6 vfadd.d $vr6, $vr7, $vr8 vfdiv.d $vr2, $vr2, $vr6 - vst $vr2, $t1, 0 - vld $vr2, $s4, 8 + vst $vr2, $s3, 0 + vld $vr2, $t4, 8 vfadd.d $vr3, $vr4, $vr3 - vld $vr4, $s3, 8 - vld $vr6, $a5, 0 + vld $vr4, $a5, 8 + vld $vr6, $a4, 0 vfsub.d $vr2, $vr3, $vr2 - vld $vr3, $s1, 8 + vld $vr3, $fp, 8 vfsub.d $vr2, $vr2, $vr4 vfadd.d $vr4, $vr5, $vr6 vfmul.d $vr2, $vr2, $vr4 vfadd.d $vr3, $vr3, $vr7 vfdiv.d $vr2, $vr2, $vr3 - vst $vr2, $t2, 0 + vst $vr2, $s4, 0 addi.d $s2, $s2, 16 - addi.d $fp, $fp, -2 - addi.d $t2, $t2, 16 - addi.d $s1, $s1, 16 - addi.d $a5, $a5, 16 + addi.d $a1, $a1, -2 + addi.d $s4, $s4, 16 + addi.d $fp, $fp, 16 + addi.d $a4, $a4, 16 addi.d $s5, $s5, 16 + addi.d $s1, $s1, 16 addi.d $a0, $a0, 16 - addi.d $a4, $a4, 16 - addi.d $t1, $t1, 16 - addi.d $s4, $s4, 16 addi.d $s3, $s3, 16 - bnez $fp, .LBB13_28 + addi.d $t4, $t4, 16 + addi.d $a5, $a5, 16 + bnez $a1, .LBB13_28 # %bb.29: # %middle.block632 # in Loop: Header=BB13_7 Depth=2 - ld.d $s5, $sp, 72 # 8-byte Folded Reload - ld.d $a0, $sp, 176 # 8-byte Folded Reload - beq $a0, $a1, .LBB13_6 + ld.d $s5, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload + beq $a0, $a2, .LBB13_6 b .LBB13_8 .p2align 4, , 16 .LBB13_30: # %.preheader199.us.preheader # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $ra, $a0, 8 + ld.d $t8, $s8, 8 + ld.d $a0, $sp, 184 # 8-byte Folded Reload ld.d $t7, $a0, 8 - ld.d $t6, $s8, 8 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - ld.d $t5, $a0, 8 ori $a2, $zero, 1 b .LBB13_32 .p2align 4, , 16 .LBB13_31: # %._crit_edge.us209 # in Loop: Header=BB13_32 Depth=2 - ld.d $t5, $sp, 184 # 8-byte Folded Reload - ld.d $t6, $sp, 200 # 8-byte Folded Reload - ld.d $t7, $sp, 192 # 8-byte Folded Reload - ld.d $s7, $sp, 96 # 8-byte Folded Reload - ld.d $s8, $sp, 88 # 8-byte Folded Reload + ld.d $t7, $sp, 200 # 8-byte Folded Reload + ld.d $t8, $sp, 216 # 8-byte Folded Reload + ld.d $ra, $sp, 208 # 8-byte Folded Reload + ld.d $s7, $sp, 112 # 8-byte Folded Reload + ld.d $s8, $sp, 104 # 8-byte Folded Reload ori $a0, $zero, 6 - ld.d $a2, $sp, 160 # 8-byte Folded Reload + ld.d $a2, $sp, 176 # 8-byte Folded Reload beq $a2, $a0, .LBB13_57 .LBB13_32: # %.preheader199.us # Parent Loop BB13_4 Depth=1 @@ -2538,954 +2533,955 @@ _ZL15BM_HYDRO_2D_RAWRN9benchmark5StateE: # @_ZL15BM_HYDRO_2D_RAWRN9benchmark5Sta # Child Loop BB13_55 Depth 3 # Child Loop BB13_34 Depth 3 slli.d $a0, $a2, 3 - ldx.d $t8, $s7, $a0 + ldx.d $a7, $s7, $a0 addi.d $a1, $a0, -8 - ld.d $a3, $sp, 136 # 8-byte Folded Reload - ldx.d $ra, $a3, $a1 + ld.d $a3, $sp, 152 # 8-byte Folded Reload + ldx.d $t0, $a3, $a1 addi.d $a2, $a2, 1 - st.d $a2, $sp, 160 # 8-byte Folded Spill + st.d $a2, $sp, 176 # 8-byte Folded Spill slli.d $a2, $a2, 3 ldx.d $a4, $s8, $a2 - st.d $a4, $sp, 200 # 8-byte Folded Spill + st.d $a4, $sp, 216 # 8-byte Folded Spill ldx.d $a3, $a3, $a2 - st.d $a3, $sp, 192 # 8-byte Folded Spill - ld.d $a3, $sp, 128 # 8-byte Folded Reload - ldx.d $t3, $a3, $a0 - ld.d $a3, $sp, 168 # 8-byte Folded Reload - ldx.d $a6, $a3, $a1 + st.d $a3, $sp, 208 # 8-byte Folded Spill + ld.d $a3, $sp, 144 # 8-byte Folded Reload + ldx.d $a6, $a3, $a0 + ld.d $a3, $sp, 184 # 8-byte Folded Reload + ldx.d $t1, $a3, $a1 ldx.d $a1, $a3, $a2 - st.d $a1, $sp, 184 # 8-byte Folded Spill - ld.d $a1, $sp, 120 # 8-byte Folded Reload - ldx.d $a3, $a1, $a0 - ori $t4, $zero, 1 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + st.d $a1, $sp, 200 # 8-byte Folded Spill + ld.d $a1, $sp, 136 # 8-byte Folded Reload + ldx.d $t2, $a1, $a0 + ori $t6, $zero, 1 + ld.d $a0, $sp, 192 # 8-byte Folded Reload ori $a1, $zero, 4 bgeu $a0, $a1, .LBB13_35 .LBB13_33: # %scalar.ph494.preheader # in Loop: Header=BB13_32 Depth=2 - ld.d $a0, $sp, 152 # 8-byte Folded Reload - sub.d $a4, $a0, $t4 - slli.d $a5, $t4, 3 - ld.d $t1, $sp, 192 # 8-byte Folded Reload - ld.d $t2, $sp, 200 # 8-byte Folded Reload - ld.d $s3, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload + sub.d $a0, $a0, $t6 + slli.d $a4, $t6, 3 + ld.d $a5, $sp, 208 # 8-byte Folded Reload + ld.d $t4, $sp, 216 # 8-byte Folded Reload + ld.d $s3, $sp, 200 # 8-byte Folded Reload .p2align 4, , 16 .LBB13_34: # %scalar.ph494 # Parent Loop BB13_4 Depth=1 # Parent Loop BB13_32 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $a0, $t7, $a5 - fldx.d $fa2, $t7, $a5 - fld.d $fa3, $a0, 8 - add.d $a1, $t8, $a5 - fld.d $fa4, $a0, -8 - fldx.d $fa5, $t8, $a5 + add.d $a1, $ra, $a4 + fldx.d $fa2, $ra, $a4 + fld.d $fa3, $a1, 8 + add.d $a2, $a7, $a4 + fld.d $fa4, $a1, -8 + fldx.d $fa5, $a7, $a4 fsub.d $fa3, $fa2, $fa3 - fld.d $fa6, $a1, -8 + fld.d $fa6, $a2, -8 fsub.d $fa4, $fa2, $fa4 fneg.d $fa4, $fa4 - fldx.d $fa7, $ra, $a5 - fldx.d $ft0, $t6, $a5 + fldx.d $fa7, $t0, $a4 + fldx.d $ft0, $t8, $a4 fmul.d $fa4, $fa6, $fa4 fmadd.d $fa3, $fa5, $fa3, $fa4 fsub.d $fa4, $fa2, $fa7 fneg.d $fa5, $ft0 - fldx.d $fa6, $t2, $a5 - fldx.d $fa7, $t1, $a5 - fldx.d $ft0, $t3, $a5 - pcalau12i $a0, %pc_hi20(.LCPI13_0) - fld.d $ft1, $a0, %pc_lo12(.LCPI13_0) + fldx.d $fa6, $a5, $a4 + fldx.d $fa7, $t4, $a4 + fldx.d $ft0, $a6, $a4 fmadd.d $fa3, $fa5, $fa4, $fa3 - fsub.d $fa2, $fa2, $fa7 - fmadd.d $fa2, $fa6, $fa2, $fa3 - fmadd.d $fa2, $fa2, $ft1, $ft0 - fstx.d $fa2, $t3, $a5 - add.d $a0, $t5, $a5 - fldx.d $fa2, $t5, $a5 - fld.d $fa3, $a0, 8 - fld.d $fa4, $a0, -8 - fldx.d $fa5, $t8, $a5 + fsub.d $fa2, $fa2, $fa6 + fmadd.d $fa2, $fa7, $fa2, $fa3 + fmadd.d $fa2, $fa2, $fa0, $ft0 + fstx.d $fa2, $a6, $a4 + add.d $a1, $t7, $a4 + fldx.d $fa2, $t7, $a4 + fld.d $fa3, $a1, 8 + fld.d $fa4, $a1, -8 + fldx.d $fa5, $a7, $a4 fsub.d $fa3, $fa2, $fa3 - fld.d $fa6, $a1, -8 + fld.d $fa6, $a2, -8 fsub.d $fa4, $fa2, $fa4 fneg.d $fa4, $fa4 - fldx.d $fa7, $a6, $a5 - fldx.d $ft0, $t6, $a5 + fldx.d $fa7, $t1, $a4 + fldx.d $ft0, $t8, $a4 fmul.d $fa4, $fa6, $fa4 fmadd.d $fa3, $fa5, $fa3, $fa4 fsub.d $fa4, $fa2, $fa7 fneg.d $fa5, $ft0 - fldx.d $fa6, $s3, $a5 - fldx.d $fa7, $t2, $a5 - fldx.d $ft0, $a3, $a5 + fldx.d $fa6, $s3, $a4 + fldx.d $fa7, $t4, $a4 + fldx.d $ft0, $t2, $a4 fmadd.d $fa3, $fa5, $fa4, $fa3 fsub.d $fa2, $fa2, $fa6 fmadd.d $fa2, $fa7, $fa2, $fa3 - fmadd.d $fa2, $fa2, $ft1, $ft0 - fstx.d $fa2, $a3, $a5 - addi.d $a4, $a4, -1 - addi.d $a3, $a3, 8 + fmadd.d $fa2, $fa2, $fa0, $ft0 + fstx.d $fa2, $t2, $a4 + addi.d $a0, $a0, -1 + addi.d $t2, $t2, 8 addi.d $s3, $s3, 8 + addi.d $a7, $a7, 8 + addi.d $ra, $ra, 8 addi.d $t8, $t8, 8 + addi.d $t0, $t0, 8 + addi.d $t4, $t4, 8 + addi.d $a5, $a5, 8 + addi.d $a6, $a6, 8 addi.d $t7, $t7, 8 - addi.d $t6, $t6, 8 - addi.d $ra, $ra, 8 - addi.d $t2, $t2, 8 addi.d $t1, $t1, 8 - addi.d $t3, $t3, 8 - addi.d $t5, $t5, 8 - addi.d $a6, $a6, 8 - bnez $a4, .LBB13_34 + bnez $a0, .LBB13_34 b .LBB13_31 .p2align 4, , 16 .LBB13_35: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - addi.d $t1, $t3, 8 - ld.d $a1, $sp, 208 # 8-byte Folded Reload - add.d $a0, $t3, $a1 - addi.d $t2, $a3, 8 - add.d $a4, $a3, $a1 - sltu $a1, $t1, $a4 - sltu $a2, $t2, $a0 + addi.d $s3, $a6, 8 + ld.d $a0, $sp, 224 # 8-byte Folded Reload + add.d $s1, $a6, $a0 + addi.d $s4, $t2, 8 + add.d $a0, $t2, $a0 + sltu $a1, $s3, $a0 + sltu $a2, $s4, $s1 and $a1, $a1, $a2 - ori $t4, $zero, 1 + ori $t6, $zero, 1 bnez $a1, .LBB13_33 # %bb.36: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - ld.d $a1, $sp, 208 # 8-byte Folded Reload - add.d $s1, $t8, $a1 - sltu $a1, $t1, $s1 - sltu $a2, $t8, $a0 + ld.d $a1, $sp, 224 # 8-byte Folded Reload + add.d $s2, $a7, $a1 + sltu $a1, $s3, $s2 + sltu $a2, $a7, $s1 and $a1, $a1, $a2 - ori $t4, $zero, 1 + ori $t6, $zero, 1 bnez $a1, .LBB13_33 # %bb.37: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - ld.d $a1, $sp, 80 # 8-byte Folded Reload - add.d $s2, $t7, $a1 - sltu $a1, $t1, $s2 - sltu $a2, $t7, $a0 + ld.d $a1, $sp, 96 # 8-byte Folded Reload + add.d $fp, $ra, $a1 + sltu $a1, $s3, $fp + sltu $a2, $ra, $s1 and $a1, $a1, $a2 - ori $t4, $zero, 1 + ori $t6, $zero, 1 bnez $a1, .LBB13_33 # %bb.38: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - addi.d $s3, $t6, 8 - ld.d $a1, $sp, 208 # 8-byte Folded Reload - add.d $fp, $t6, $a1 - sltu $a1, $t1, $fp - sltu $a2, $s3, $a0 - and $a1, $a1, $a2 - ori $t4, $zero, 1 - bnez $a1, .LBB13_33 + addi.d $s5, $t8, 8 + ld.d $a1, $sp, 224 # 8-byte Folded Reload + add.d $a1, $t8, $a1 + sltu $a2, $s3, $a1 + sltu $a3, $s5, $s1 + and $a2, $a2, $a3 + ori $t6, $zero, 1 + bnez $a2, .LBB13_33 # %bb.39: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - addi.d $s4, $ra, 8 - ld.d $a1, $sp, 208 # 8-byte Folded Reload - add.d $a1, $ra, $a1 - sltu $a2, $t1, $a1 - sltu $a5, $s4, $a0 - and $a2, $a2, $a5 - ori $t4, $zero, 1 + addi.d $a5, $t0, 8 + ld.d $a2, $sp, 224 # 8-byte Folded Reload + add.d $s6, $t0, $a2 + sltu $a2, $s3, $s6 + sltu $a3, $a5, $s1 + and $a2, $a2, $a3 + ori $t6, $zero, 1 bnez $a2, .LBB13_33 # %bb.40: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - ld.d $a5, $sp, 200 # 8-byte Folded Reload - addi.d $s5, $a5, 8 - ld.d $a2, $sp, 208 # 8-byte Folded Reload - add.d $s6, $a5, $a2 - sltu $a2, $t1, $s6 - sltu $a5, $s5, $a0 - and $a2, $a2, $a5 - ori $t4, $zero, 1 - bnez $a2, .LBB13_33 + ld.d $a3, $sp, 216 # 8-byte Folded Reload + addi.d $s7, $a3, 8 + ld.d $a2, $sp, 224 # 8-byte Folded Reload + add.d $a2, $a3, $a2 + sltu $a3, $s3, $a2 + sltu $a4, $s7, $s1 + and $a3, $a3, $a4 + ori $t6, $zero, 1 + bnez $a3, .LBB13_33 # %bb.41: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - ld.d $a7, $sp, 192 # 8-byte Folded Reload - addi.d $a5, $a7, 8 - ld.d $a2, $sp, 208 # 8-byte Folded Reload - add.d $a2, $a7, $a2 - sltu $a7, $t1, $a2 - sltu $t0, $a5, $a0 - and $a7, $a7, $t0 - ori $t4, $zero, 1 - bnez $a7, .LBB13_33 + ld.d $a4, $sp, 208 # 8-byte Folded Reload + addi.d $s8, $a4, 8 + ld.d $a3, $sp, 224 # 8-byte Folded Reload + add.d $t3, $a4, $a3 + sltu $a3, $s3, $t3 + sltu $a4, $s8, $s1 + and $a3, $a3, $a4 + ori $t6, $zero, 1 + bnez $a3, .LBB13_33 # %bb.42: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - ld.d $a7, $sp, 80 # 8-byte Folded Reload - add.d $t0, $t5, $a7 - sltu $a7, $t1, $t0 - sltu $t4, $t5, $a0 - and $a7, $a7, $t4 - ori $t4, $zero, 1 - bnez $a7, .LBB13_33 + ld.d $a3, $sp, 96 # 8-byte Folded Reload + add.d $s0, $t7, $a3 + sltu $a3, $s3, $s0 + sltu $a4, $t7, $s1 + and $a3, $a3, $a4 + ori $t6, $zero, 1 + bnez $a3, .LBB13_33 # %bb.43: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - addi.d $s7, $a6, 8 - ld.d $a7, $sp, 208 # 8-byte Folded Reload - add.d $a7, $a6, $a7 - sltu $t4, $t1, $a7 - sltu $s0, $s7, $a0 - and $s0, $t4, $s0 - ori $t4, $zero, 1 - bnez $s0, .LBB13_33 + addi.d $t4, $t1, 8 + ld.d $a3, $sp, 224 # 8-byte Folded Reload + add.d $t5, $t1, $a3 + sltu $a3, $s3, $t5 + sltu $a4, $t4, $s1 + and $a3, $a3, $a4 + ori $t6, $zero, 1 + bnez $a3, .LBB13_33 # %bb.44: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - ld.d $s0, $sp, 184 # 8-byte Folded Reload - addi.d $s8, $s0, 8 - ld.d $t4, $sp, 208 # 8-byte Folded Reload - add.d $s0, $s0, $t4 - sltu $t4, $t1, $s0 - sltu $a0, $s8, $a0 - and $a0, $t4, $a0 - ori $t4, $zero, 1 - bnez $a0, .LBB13_33 + ld.d $t6, $sp, 200 # 8-byte Folded Reload + addi.d $a4, $t6, 8 + ld.d $a3, $sp, 224 # 8-byte Folded Reload + add.d $a3, $t6, $a3 + sltu $t6, $s3, $a3 + sltu $s1, $a4, $s1 + and $s1, $t6, $s1 + ori $t6, $zero, 1 + bnez $s1, .LBB13_33 # %bb.45: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $s1 - sltu $t4, $t8, $a4 - and $a0, $a0, $t4 - ori $t4, $zero, 1 - bnez $a0, .LBB13_33 + sltu $t6, $s4, $s2 + sltu $s1, $a7, $a0 + and $s1, $t6, $s1 + ori $t6, $zero, 1 + bnez $s1, .LBB13_33 # %bb.46: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $s2 - sltu $t4, $t7, $a4 - and $a0, $a0, $t4 - ori $t4, $zero, 1 - bnez $a0, .LBB13_33 + sltu $t6, $s4, $fp + sltu $fp, $ra, $a0 + and $fp, $t6, $fp + ori $t6, $zero, 1 + bnez $fp, .LBB13_33 # %bb.47: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $fp - sltu $t4, $s3, $a4 - and $a0, $a0, $t4 - ori $t4, $zero, 1 - bnez $a0, .LBB13_33 + sltu $a1, $s4, $a1 + sltu $t6, $s5, $a0 + and $a1, $a1, $t6 + ori $t6, $zero, 1 + bnez $a1, .LBB13_33 # %bb.48: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $a1 - sltu $a1, $s4, $a4 - and $a0, $a0, $a1 - ori $t4, $zero, 1 - bnez $a0, .LBB13_33 + sltu $a1, $s4, $s6 + sltu $t6, $a5, $a0 + and $a1, $a1, $t6 + ori $t6, $zero, 1 + bnez $a1, .LBB13_33 # %bb.49: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $s6 - sltu $a1, $s5, $a4 - and $a0, $a0, $a1 - ori $t4, $zero, 1 - bnez $a0, .LBB13_33 + sltu $a1, $s4, $a2 + sltu $a2, $s7, $a0 + and $a1, $a1, $a2 + ori $t6, $zero, 1 + bnez $a1, .LBB13_33 # %bb.50: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $a2 - sltu $a1, $a5, $a4 - and $a0, $a0, $a1 - ori $t4, $zero, 1 - bnez $a0, .LBB13_33 + sltu $a1, $s4, $t3 + sltu $a2, $s8, $a0 + and $a1, $a1, $a2 + ori $t6, $zero, 1 + bnez $a1, .LBB13_33 # %bb.51: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $t0 - sltu $a1, $t5, $a4 - and $a0, $a0, $a1 - ori $t4, $zero, 1 - bnez $a0, .LBB13_33 + sltu $a1, $s4, $s0 + sltu $a2, $t7, $a0 + and $a1, $a1, $a2 + ori $t6, $zero, 1 + bnez $a1, .LBB13_33 # %bb.52: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $a7 - sltu $a1, $s7, $a4 - and $a0, $a0, $a1 - ori $t4, $zero, 1 - bnez $a0, .LBB13_33 + sltu $a1, $s4, $t5 + sltu $a2, $t4, $a0 + and $a1, $a1, $a2 + ori $t6, $zero, 1 + bnez $a1, .LBB13_33 # %bb.53: # %vector.memcheck403 # in Loop: Header=BB13_32 Depth=2 - sltu $a0, $t2, $s0 - sltu $a1, $s8, $a4 - and $a0, $a0, $a1 - ori $t4, $zero, 1 + sltu $a1, $s4, $a3 + sltu $a0, $a4, $a0 + and $a0, $a1, $a0 + ori $t6, $zero, 1 bnez $a0, .LBB13_33 # %bb.54: # %vector.body499.preheader # in Loop: Header=BB13_32 Depth=2 move $a0, $zero - addi.d $a4, $t5, 8 - addi.d $a1, $t8, 8 - addi.d $fp, $t7, 8 - ld.d $t0, $sp, 64 # 8-byte Folded Reload - move $s1, $t0 + addi.d $a1, $t7, 8 + addi.d $fp, $a7, 8 + addi.d $s1, $ra, 8 + ld.d $t3, $sp, 80 # 8-byte Folded Reload + move $s2, $t3 + ld.d $t5, $sp, 72 # 8-byte Folded Reload .p2align 4, , 16 .LBB13_55: # %vector.body499 # Parent Loop BB13_4 Depth=1 # Parent Loop BB13_32 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $a2, $fp, $a0 - vldx $vr2, $fp, $a0 + add.d $a2, $s1, $a0 + vldx $vr2, $s1, $a0 vld $vr3, $a2, 8 - add.d $a7, $a1, $a0 + add.d $a3, $fp, $a0 vld $vr4, $a2, -8 - vldx $vr5, $a1, $a0 + vldx $vr5, $fp, $a0 vfsub.d $vr3, $vr2, $vr3 - vld $vr6, $a7, -8 + vld $vr6, $a3, -8 vfsub.d $vr4, $vr2, $vr4 vbitrevi.d $vr4, $vr4, 63 - vldx $vr7, $s4, $a0 + vldx $vr7, $a5, $a0 vfmul.d $vr4, $vr6, $vr4 vfmadd.d $vr3, $vr5, $vr3, $vr4 - vldx $vr4, $s3, $a0 + vldx $vr4, $s5, $a0 vfsub.d $vr7, $vr2, $vr7 - vldx $vr8, $a5, $a0 - vldx $vr9, $s5, $a0 + vldx $vr8, $s8, $a0 + vldx $vr9, $s7, $a0 vbitrevi.d $vr4, $vr4, 63 vfmadd.d $vr3, $vr4, $vr7, $vr3 vfsub.d $vr2, $vr2, $vr8 vfmadd.d $vr2, $vr9, $vr2, $vr3 - vldx $vr3, $t1, $a0 - add.d $a2, $a4, $a0 - vldx $vr7, $a4, $a0 - vld $vr8, $a2, -8 - vfmadd.d $vr2, $vr2, $vr1, $vr3 - vstx $vr2, $t1, $a0 + vldx $vr3, $s3, $a0 + vreplgr2vr.d $vr7, $t5 + add.d $a2, $a1, $a0 + vldx $vr8, $a1, $a0 + vld $vr10, $a2, -8 + vfmadd.d $vr2, $vr2, $vr7, $vr3 + vstx $vr2, $s3, $a0 vld $vr2, $a2, 8 - vfsub.d $vr3, $vr7, $vr8 + vfsub.d $vr3, $vr8, $vr10 vbitrevi.d $vr3, $vr3, 63 vfmul.d $vr3, $vr6, $vr3 - vldx $vr6, $s7, $a0 - vfsub.d $vr2, $vr7, $vr2 + vldx $vr6, $t4, $a0 + vfsub.d $vr2, $vr8, $vr2 vfmadd.d $vr2, $vr5, $vr2, $vr3 - vldx $vr3, $s8, $a0 - vfsub.d $vr5, $vr7, $vr6 - vldx $vr6, $t2, $a0 + vldx $vr3, $a4, $a0 + vfsub.d $vr5, $vr8, $vr6 + vldx $vr6, $s4, $a0 vfmadd.d $vr2, $vr4, $vr5, $vr2 - vfsub.d $vr3, $vr7, $vr3 + vfsub.d $vr3, $vr8, $vr3 vfmadd.d $vr2, $vr9, $vr3, $vr2 - vfmadd.d $vr2, $vr2, $vr1, $vr6 - vstx $vr2, $t2, $a0 - addi.d $s1, $s1, -2 + vfmadd.d $vr2, $vr2, $vr7, $vr6 + vstx $vr2, $s4, $a0 + addi.d $s2, $s2, -2 addi.d $a0, $a0, 16 - bnez $s1, .LBB13_55 + bnez $s2, .LBB13_55 # %bb.56: # %middle.block523 # in Loop: Header=BB13_32 Depth=2 - ld.d $t4, $sp, 72 # 8-byte Folded Reload - ld.d $a0, $sp, 176 # 8-byte Folded Reload - beq $a0, $t0, .LBB13_31 + ld.d $t6, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload + beq $a0, $t3, .LBB13_31 b .LBB13_33 .p2align 4, , 16 .LBB13_57: # %.preheader.us.preheader # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - ld.d $a3, $a0, 8 - ld.d $a0, $sp, 128 # 8-byte Folded Reload - ld.d $a4, $a0, 8 - ld.d $s2, $sp, 40 # 8-byte Folded Reload - ld.d $a5, $s2, 8 - ld.d $a0, $sp, 136 # 8-byte Folded Reload - ld.d $a6, $a0, 8 - ld.d $a0, $sp, 120 # 8-byte Folded Reload - ld.d $a7, $a0, 8 - ld.d $s3, $sp, 32 # 8-byte Folded Reload - ld.d $t0, $s3, 8 - ori $a0, $zero, 1 - pcalau12i $t4, %pc_hi20(.LCPI13_1) - ld.d $a1, $sp, 176 # 8-byte Folded Reload - ori $t8, $zero, 2 - bgeu $a1, $t8, .LBB13_73 + ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $a0, 8 + ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a3, $a1, 8 + ld.d $s2, $sp, 48 # 8-byte Folded Reload + ld.d $a4, $s2, 8 + ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a5, $a1, 8 + ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a6, $a1, 8 + ld.d $s3, $sp, 40 # 8-byte Folded Reload + ld.d $a7, $s3, 8 + ori $a1, $zero, 1 + ld.d $a2, $sp, 192 # 8-byte Folded Reload + ori $t7, $zero, 2 + bgeu $a2, $t7, .LBB13_73 # %bb.58: # in Loop: Header=BB13_4 Depth=1 - ld.d $s4, $sp, 56 # 8-byte Folded Reload - ld.d $t6, $sp, 64 # 8-byte Folded Reload - ld.d $t7, $sp, 24 # 8-byte Folded Reload + ld.d $s4, $sp, 64 # 8-byte Folded Reload + ld.d $t5, $sp, 80 # 8-byte Folded Reload + ld.d $t6, $sp, 32 # 8-byte Folded Reload + ld.d $t8, $sp, 24 # 8-byte Folded Reload ori $fp, $zero, 16 .LBB13_59: # %scalar.ph387.preheader # in Loop: Header=BB13_4 Depth=1 - alsl.d $a1, $a0, $a3, 3 - alsl.d $a2, $a0, $a4, 3 - alsl.d $a3, $a0, $a5, 3 - alsl.d $a4, $a0, $a6, 3 - alsl.d $a5, $a0, $a7, 3 - alsl.d $a6, $a0, $t0, 3 - ld.d $a7, $sp, 152 # 8-byte Folded Reload - sub.d $a0, $a7, $a0 + alsl.d $a0, $a1, $a0, 3 + alsl.d $a2, $a1, $a3, 3 + alsl.d $a3, $a1, $a4, 3 + alsl.d $a4, $a1, $a5, 3 + alsl.d $a5, $a1, $a6, 3 + alsl.d $a6, $a1, $a7, 3 + ld.d $a7, $sp, 168 # 8-byte Folded Reload + sub.d $a1, $a7, $a1 .p2align 4, , 16 .LBB13_60: # %scalar.ph387 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 - fld.d $fa2, $a1, 0 + fld.d $fa2, $a0, 0 fld.d $fa3, $a2, 0 - fld.d $fa4, $t4, %pc_lo12(.LCPI13_1) - fmadd.d $fa2, $fa3, $fa4, $fa2 + fmadd.d $fa2, $fa3, $fa1, $fa2 fst.d $fa2, $a3, 0 fld.d $fa2, $a4, 0 fld.d $fa3, $a5, 0 - fmadd.d $fa2, $fa3, $fa4, $fa2 + fmadd.d $fa2, $fa3, $fa1, $fa2 fst.d $fa2, $a6, 0 - addi.d $a1, $a1, 8 + addi.d $a0, $a0, 8 addi.d $a2, $a2, 8 addi.d $a3, $a3, 8 addi.d $a4, $a4, 8 addi.d $a5, $a5, 8 - addi.d $a0, $a0, -1 + addi.d $a1, $a1, -1 addi.d $a6, $a6, 8 - bnez $a0, .LBB13_60 + bnez $a1, .LBB13_60 .LBB13_61: # %._crit_edge.us213 # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - ld.d $a3, $a0, 16 - ld.d $a0, $sp, 128 # 8-byte Folded Reload - ld.d $a4, $a0, 16 - ld.d $a5, $s2, 16 - ld.d $a0, $sp, 136 # 8-byte Folded Reload - ld.d $a6, $a0, 16 - ld.d $a0, $sp, 120 # 8-byte Folded Reload - ld.d $a7, $a0, 16 - ld.d $t0, $s3, 16 - ori $a0, $zero, 1 - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bgeu $a1, $t8, .LBB13_85 + ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $a0, 16 + ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a3, $a1, 16 + ld.d $a4, $s2, 16 + ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a5, $a1, 16 + ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a6, $a1, 16 + ld.d $a7, $s3, 16 + ori $a1, $zero, 1 + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bgeu $a2, $t7, .LBB13_85 .LBB13_62: # %scalar.ph353.preheader # in Loop: Header=BB13_4 Depth=1 - ld.d $a1, $sp, 152 # 8-byte Folded Reload - sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $t0, 3 - alsl.d $a7, $a0, $a7, 3 - alsl.d $a6, $a0, $a6, 3 - alsl.d $a5, $a0, $a5, 3 - alsl.d $a4, $a0, $a4, 3 - alsl.d $a0, $a0, $a3, 3 + ld.d $a2, $sp, 168 # 8-byte Folded Reload + sub.d $a2, $a2, $a1 + alsl.d $a7, $a1, $a7, 3 + alsl.d $a6, $a1, $a6, 3 + alsl.d $a5, $a1, $a5, 3 + alsl.d $a4, $a1, $a4, 3 + alsl.d $a3, $a1, $a3, 3 + alsl.d $a0, $a1, $a0, 3 .p2align 4, , 16 .LBB13_63: # %scalar.ph353 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $a0, 0 - fld.d $fa3, $a4, 0 - fld.d $fa4, $t4, %pc_lo12(.LCPI13_1) - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a5, 0 - fld.d $fa2, $a6, 0 - fld.d $fa3, $a7, 0 - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a2, 0 - addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + fld.d $fa3, $a3, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a4, 0 + fld.d $fa2, $a5, 0 + fld.d $fa3, $a6, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a7, 0 + addi.d $a2, $a2, -1 addi.d $a7, $a7, 8 addi.d $a6, $a6, 8 addi.d $a5, $a5, 8 addi.d $a4, $a4, 8 + addi.d $a3, $a3, 8 addi.d $a0, $a0, 8 - bnez $a1, .LBB13_63 + bnez $a2, .LBB13_63 .LBB13_64: # %._crit_edge.us213.1 # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - ld.d $a3, $a0, 24 - ld.d $a0, $sp, 128 # 8-byte Folded Reload - ld.d $a4, $a0, 24 - ld.d $a5, $s2, 24 - ld.d $a0, $sp, 136 # 8-byte Folded Reload - ld.d $a6, $a0, 24 - ld.d $a0, $sp, 120 # 8-byte Folded Reload - ld.d $a7, $a0, 24 - ld.d $t0, $s3, 24 - ori $a0, $zero, 1 - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bgeu $a1, $t8, .LBB13_97 + ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $a0, 24 + ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a3, $a1, 24 + ld.d $a4, $s2, 24 + ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a5, $a1, 24 + ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a6, $a1, 24 + ld.d $a7, $s3, 24 + ori $a1, $zero, 1 + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bgeu $a2, $t7, .LBB13_97 .LBB13_65: # %scalar.ph319.preheader # in Loop: Header=BB13_4 Depth=1 - ld.d $a1, $sp, 152 # 8-byte Folded Reload - sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $t0, 3 - alsl.d $a7, $a0, $a7, 3 - alsl.d $a6, $a0, $a6, 3 - alsl.d $a5, $a0, $a5, 3 - alsl.d $a4, $a0, $a4, 3 - alsl.d $a0, $a0, $a3, 3 + ld.d $a2, $sp, 168 # 8-byte Folded Reload + sub.d $a2, $a2, $a1 + alsl.d $a7, $a1, $a7, 3 + alsl.d $a6, $a1, $a6, 3 + alsl.d $a5, $a1, $a5, 3 + alsl.d $a4, $a1, $a4, 3 + alsl.d $a3, $a1, $a3, 3 + alsl.d $a0, $a1, $a0, 3 .p2align 4, , 16 .LBB13_66: # %scalar.ph319 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $a0, 0 - fld.d $fa3, $a4, 0 - fld.d $fa4, $t4, %pc_lo12(.LCPI13_1) - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a5, 0 - fld.d $fa2, $a6, 0 - fld.d $fa3, $a7, 0 - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a2, 0 - addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + fld.d $fa3, $a3, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a4, 0 + fld.d $fa2, $a5, 0 + fld.d $fa3, $a6, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a7, 0 + addi.d $a2, $a2, -1 addi.d $a7, $a7, 8 addi.d $a6, $a6, 8 addi.d $a5, $a5, 8 addi.d $a4, $a4, 8 + addi.d $a3, $a3, 8 addi.d $a0, $a0, 8 - bnez $a1, .LBB13_66 + bnez $a2, .LBB13_66 .LBB13_67: # %._crit_edge.us213.2 # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - ld.d $a3, $a0, 32 - ld.d $a0, $sp, 128 # 8-byte Folded Reload - ld.d $a4, $a0, 32 - ld.d $a5, $s2, 32 - ld.d $a0, $sp, 136 # 8-byte Folded Reload - ld.d $a6, $a0, 32 - ld.d $a0, $sp, 120 # 8-byte Folded Reload - ld.d $a7, $a0, 32 - ld.d $t0, $s3, 32 - ori $a0, $zero, 1 - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bgeu $a1, $t8, .LBB13_109 + ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $a0, 32 + ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a3, $a1, 32 + ld.d $a4, $s2, 32 + ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a5, $a1, 32 + ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a6, $a1, 32 + ld.d $a7, $s3, 32 + ori $a1, $zero, 1 + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bgeu $a2, $t7, .LBB13_109 .LBB13_68: # %scalar.ph285.preheader # in Loop: Header=BB13_4 Depth=1 - ld.d $a1, $sp, 152 # 8-byte Folded Reload - sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $t0, 3 - alsl.d $a7, $a0, $a7, 3 - alsl.d $a6, $a0, $a6, 3 - alsl.d $a5, $a0, $a5, 3 - alsl.d $a4, $a0, $a4, 3 - alsl.d $a0, $a0, $a3, 3 + ld.d $a2, $sp, 168 # 8-byte Folded Reload + sub.d $a2, $a2, $a1 + alsl.d $a7, $a1, $a7, 3 + alsl.d $a6, $a1, $a6, 3 + alsl.d $a5, $a1, $a5, 3 + alsl.d $a4, $a1, $a4, 3 + alsl.d $a3, $a1, $a3, 3 + alsl.d $a0, $a1, $a0, 3 .p2align 4, , 16 .LBB13_69: # %scalar.ph285 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $a0, 0 - fld.d $fa3, $a4, 0 - fld.d $fa4, $t4, %pc_lo12(.LCPI13_1) - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a5, 0 - fld.d $fa2, $a6, 0 - fld.d $fa3, $a7, 0 - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a2, 0 - addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + fld.d $fa3, $a3, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a4, 0 + fld.d $fa2, $a5, 0 + fld.d $fa3, $a6, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a7, 0 + addi.d $a2, $a2, -1 addi.d $a7, $a7, 8 addi.d $a6, $a6, 8 addi.d $a5, $a5, 8 addi.d $a4, $a4, 8 + addi.d $a3, $a3, 8 addi.d $a0, $a0, 8 - bnez $a1, .LBB13_69 + bnez $a2, .LBB13_69 .LBB13_70: # %._crit_edge.us213.3 # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - ld.d $a3, $a0, 40 - ld.d $a0, $sp, 128 # 8-byte Folded Reload - ld.d $a4, $a0, 40 - ld.d $a5, $s2, 40 - ld.d $a0, $sp, 136 # 8-byte Folded Reload - ld.d $a6, $a0, 40 - ld.d $a0, $sp, 120 # 8-byte Folded Reload - ld.d $a7, $a0, 40 - ld.d $t0, $s3, 40 - ori $a0, $zero, 1 - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bgeu $a1, $t8, .LBB13_121 + ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $a0, 40 + ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a3, $a1, 40 + ld.d $a4, $s2, 40 + ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a5, $a1, 40 + ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a6, $a1, 40 + ld.d $a7, $s3, 40 + ori $a1, $zero, 1 + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bgeu $a2, $t7, .LBB13_121 .LBB13_71: # %scalar.ph.preheader # in Loop: Header=BB13_4 Depth=1 - ld.d $a1, $sp, 152 # 8-byte Folded Reload - sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $t0, 3 - alsl.d $a7, $a0, $a7, 3 - alsl.d $a6, $a0, $a6, 3 - alsl.d $a5, $a0, $a5, 3 - alsl.d $a4, $a0, $a4, 3 - alsl.d $a0, $a0, $a3, 3 + ld.d $a2, $sp, 168 # 8-byte Folded Reload + sub.d $a2, $a2, $a1 + alsl.d $a7, $a1, $a7, 3 + alsl.d $a6, $a1, $a6, 3 + alsl.d $a5, $a1, $a5, 3 + alsl.d $a4, $a1, $a4, 3 + alsl.d $a3, $a1, $a3, 3 + alsl.d $a0, $a1, $a0, 3 .p2align 4, , 16 .LBB13_72: # %scalar.ph # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $a0, 0 - fld.d $fa3, $a4, 0 - fld.d $fa4, $t4, %pc_lo12(.LCPI13_1) - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a5, 0 - fld.d $fa2, $a6, 0 - fld.d $fa3, $a7, 0 - fmadd.d $fa2, $fa3, $fa4, $fa2 - fst.d $fa2, $a2, 0 - addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + fld.d $fa3, $a3, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a4, 0 + fld.d $fa2, $a5, 0 + fld.d $fa3, $a6, 0 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fst.d $fa2, $a7, 0 + addi.d $a2, $a2, -1 addi.d $a7, $a7, 8 addi.d $a6, $a6, 8 addi.d $a5, $a5, 8 addi.d $a4, $a4, 8 + addi.d $a3, $a3, 8 addi.d $a0, $a0, 8 - bnez $a1, .LBB13_72 + bnez $a2, .LBB13_72 b .LBB13_3 .p2align 4, , 16 .LBB13_73: # %vector.memcheck369 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a5 - ld.d $s4, $sp, 56 # 8-byte Folded Reload - ld.d $t6, $sp, 64 # 8-byte Folded Reload - ld.d $t7, $sp, 24 # 8-byte Folded Reload + sub.d $a2, $a7, $a4 + ld.d $s4, $sp, 64 # 8-byte Folded Reload + ld.d $t5, $sp, 80 # 8-byte Folded Reload + ld.d $t6, $sp, 32 # 8-byte Folded Reload + ld.d $t8, $sp, 24 # 8-byte Folded Reload ori $fp, $zero, 16 - bltu $a1, $fp, .LBB13_59 + bltu $a2, $fp, .LBB13_59 # %bb.74: # %vector.memcheck369 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a3 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a4, $a0 + bltu $a2, $fp, .LBB13_59 # %bb.75: # %vector.memcheck369 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a4 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a4, $a3 + bltu $a2, $fp, .LBB13_59 # %bb.76: # %vector.memcheck369 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a6, $a5 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a5, $a4 + bltu $a2, $fp, .LBB13_59 # %bb.77: # %vector.memcheck369 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a7, $a5 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a6, $a4 + bltu $a2, $fp, .LBB13_59 # %bb.78: # %vector.memcheck369 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a3 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a7, $a0 + bltu $a2, $fp, .LBB13_59 # %bb.79: # %vector.memcheck369 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a4 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a7, $a3 + bltu $a2, $fp, .LBB13_59 # %bb.80: # %vector.memcheck369 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a6 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a7, $a5 + bltu $a2, $fp, .LBB13_59 # %bb.81: # %vector.memcheck369 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a7 - bltu $a1, $fp, .LBB13_59 + sub.d $a2, $a7, $a6 + bltu $a2, $fp, .LBB13_59 # %bb.82: # %vector.body392.preheader # in Loop: Header=BB13_4 Depth=1 - addi.d $a0, $a3, 8 - addi.d $a1, $a4, 8 - addi.d $a2, $a5, 8 - addi.d $t1, $a6, 8 - addi.d $t2, $a7, 8 - addi.d $t3, $t0, 8 - move $t5, $t6 + addi.d $a1, $a0, 8 + addi.d $a2, $a3, 8 + addi.d $t0, $a4, 8 + addi.d $t1, $a5, 8 + addi.d $t2, $a6, 8 + addi.d $t3, $a7, 8 + move $t4, $t5 .p2align 4, , 16 .LBB13_83: # %vector.body392 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr2, $a0, 0 - vld $vr3, $a1, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 - vst $vr2, $a2, 0 + vld $vr2, $a1, 0 + vld $vr3, $a2, 0 + vreplgr2vr.d $vr4, $t8 + vfmadd.d $vr2, $vr3, $vr4, $vr2 + vst $vr2, $t0, 0 vld $vr2, $t1, 0 vld $vr3, $t2, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 + vfmadd.d $vr2, $vr3, $vr4, $vr2 vst $vr2, $t3, 0 - addi.d $a0, $a0, 16 addi.d $a1, $a1, 16 addi.d $a2, $a2, 16 + addi.d $t0, $t0, 16 addi.d $t1, $t1, 16 addi.d $t2, $t2, 16 - addi.d $t5, $t5, -2 + addi.d $t4, $t4, -2 addi.d $t3, $t3, 16 - bnez $t5, .LBB13_83 + bnez $t4, .LBB13_83 # %bb.84: # %middle.block400 # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bne $a1, $t6, .LBB13_59 + ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bne $a2, $t5, .LBB13_59 b .LBB13_61 .p2align 4, , 16 .LBB13_85: # %vector.memcheck335 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a5 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a7, $a4 + bltu $a2, $fp, .LBB13_62 # %bb.86: # %vector.memcheck335 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a3 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a4, $a0 + bltu $a2, $fp, .LBB13_62 # %bb.87: # %vector.memcheck335 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a4 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a4, $a3 + bltu $a2, $fp, .LBB13_62 # %bb.88: # %vector.memcheck335 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a6, $a5 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a5, $a4 + bltu $a2, $fp, .LBB13_62 # %bb.89: # %vector.memcheck335 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a7, $a5 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a6, $a4 + bltu $a2, $fp, .LBB13_62 # %bb.90: # %vector.memcheck335 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a3 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a7, $a0 + bltu $a2, $fp, .LBB13_62 # %bb.91: # %vector.memcheck335 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a4 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a7, $a3 + bltu $a2, $fp, .LBB13_62 # %bb.92: # %vector.memcheck335 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a6 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a7, $a5 + bltu $a2, $fp, .LBB13_62 # %bb.93: # %vector.memcheck335 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a7 - bltu $a1, $fp, .LBB13_62 + sub.d $a2, $a7, $a6 + bltu $a2, $fp, .LBB13_62 # %bb.94: # %vector.body358.preheader # in Loop: Header=BB13_4 Depth=1 - addi.d $a0, $a3, 8 - addi.d $a1, $a4, 8 - addi.d $a2, $a5, 8 - addi.d $t1, $a6, 8 - addi.d $t2, $a7, 8 - addi.d $t3, $t0, 8 - move $t5, $t6 + addi.d $a1, $a0, 8 + addi.d $a2, $a3, 8 + addi.d $t0, $a4, 8 + addi.d $t1, $a5, 8 + addi.d $t2, $a6, 8 + addi.d $t3, $a7, 8 + move $t4, $t5 .p2align 4, , 16 .LBB13_95: # %vector.body358 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr2, $a0, 0 - vld $vr3, $a1, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 - vst $vr2, $a2, 0 + vld $vr2, $a1, 0 + vld $vr3, $a2, 0 + vreplgr2vr.d $vr4, $t8 + vfmadd.d $vr2, $vr3, $vr4, $vr2 + vst $vr2, $t0, 0 vld $vr2, $t1, 0 vld $vr3, $t2, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 + vfmadd.d $vr2, $vr3, $vr4, $vr2 vst $vr2, $t3, 0 - addi.d $a0, $a0, 16 addi.d $a1, $a1, 16 addi.d $a2, $a2, 16 + addi.d $t0, $t0, 16 addi.d $t1, $t1, 16 addi.d $t2, $t2, 16 - addi.d $t5, $t5, -2 + addi.d $t4, $t4, -2 addi.d $t3, $t3, 16 - bnez $t5, .LBB13_95 + bnez $t4, .LBB13_95 # %bb.96: # %middle.block366 # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bne $a1, $t6, .LBB13_62 + ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bne $a2, $t5, .LBB13_62 b .LBB13_64 .p2align 4, , 16 .LBB13_97: # %vector.memcheck301 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a5 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a7, $a4 + bltu $a2, $fp, .LBB13_65 # %bb.98: # %vector.memcheck301 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a3 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a4, $a0 + bltu $a2, $fp, .LBB13_65 # %bb.99: # %vector.memcheck301 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a4 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a4, $a3 + bltu $a2, $fp, .LBB13_65 # %bb.100: # %vector.memcheck301 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a6, $a5 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a5, $a4 + bltu $a2, $fp, .LBB13_65 # %bb.101: # %vector.memcheck301 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a7, $a5 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a6, $a4 + bltu $a2, $fp, .LBB13_65 # %bb.102: # %vector.memcheck301 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a3 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a7, $a0 + bltu $a2, $fp, .LBB13_65 # %bb.103: # %vector.memcheck301 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a4 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a7, $a3 + bltu $a2, $fp, .LBB13_65 # %bb.104: # %vector.memcheck301 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a6 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a7, $a5 + bltu $a2, $fp, .LBB13_65 # %bb.105: # %vector.memcheck301 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a7 - bltu $a1, $fp, .LBB13_65 + sub.d $a2, $a7, $a6 + bltu $a2, $fp, .LBB13_65 # %bb.106: # %vector.body324.preheader # in Loop: Header=BB13_4 Depth=1 - addi.d $a0, $a3, 8 - addi.d $a1, $a4, 8 - addi.d $a2, $a5, 8 - addi.d $t1, $a6, 8 - addi.d $t2, $a7, 8 - addi.d $t3, $t0, 8 - move $t5, $t6 + addi.d $a1, $a0, 8 + addi.d $a2, $a3, 8 + addi.d $t0, $a4, 8 + addi.d $t1, $a5, 8 + addi.d $t2, $a6, 8 + addi.d $t3, $a7, 8 + move $t4, $t5 .p2align 4, , 16 .LBB13_107: # %vector.body324 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr2, $a0, 0 - vld $vr3, $a1, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 - vst $vr2, $a2, 0 + vld $vr2, $a1, 0 + vld $vr3, $a2, 0 + vreplgr2vr.d $vr4, $t8 + vfmadd.d $vr2, $vr3, $vr4, $vr2 + vst $vr2, $t0, 0 vld $vr2, $t1, 0 vld $vr3, $t2, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 + vfmadd.d $vr2, $vr3, $vr4, $vr2 vst $vr2, $t3, 0 - addi.d $a0, $a0, 16 addi.d $a1, $a1, 16 addi.d $a2, $a2, 16 + addi.d $t0, $t0, 16 addi.d $t1, $t1, 16 addi.d $t2, $t2, 16 - addi.d $t5, $t5, -2 + addi.d $t4, $t4, -2 addi.d $t3, $t3, 16 - bnez $t5, .LBB13_107 + bnez $t4, .LBB13_107 # %bb.108: # %middle.block332 # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bne $a1, $t6, .LBB13_65 + ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bne $a2, $t5, .LBB13_65 b .LBB13_67 .p2align 4, , 16 .LBB13_109: # %vector.memcheck267 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a5 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a7, $a4 + bltu $a2, $fp, .LBB13_68 # %bb.110: # %vector.memcheck267 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a3 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a4, $a0 + bltu $a2, $fp, .LBB13_68 # %bb.111: # %vector.memcheck267 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a4 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a4, $a3 + bltu $a2, $fp, .LBB13_68 # %bb.112: # %vector.memcheck267 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a6, $a5 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a5, $a4 + bltu $a2, $fp, .LBB13_68 # %bb.113: # %vector.memcheck267 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a7, $a5 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a6, $a4 + bltu $a2, $fp, .LBB13_68 # %bb.114: # %vector.memcheck267 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a3 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a7, $a0 + bltu $a2, $fp, .LBB13_68 # %bb.115: # %vector.memcheck267 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a4 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a7, $a3 + bltu $a2, $fp, .LBB13_68 # %bb.116: # %vector.memcheck267 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a6 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a7, $a5 + bltu $a2, $fp, .LBB13_68 # %bb.117: # %vector.memcheck267 # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a7 - bltu $a1, $fp, .LBB13_68 + sub.d $a2, $a7, $a6 + bltu $a2, $fp, .LBB13_68 # %bb.118: # %vector.body290.preheader # in Loop: Header=BB13_4 Depth=1 - addi.d $a0, $a3, 8 - addi.d $a1, $a4, 8 - addi.d $a2, $a5, 8 - addi.d $t1, $a6, 8 - addi.d $t2, $a7, 8 - addi.d $t3, $t0, 8 - move $t5, $t6 + addi.d $a1, $a0, 8 + addi.d $a2, $a3, 8 + addi.d $t0, $a4, 8 + addi.d $t1, $a5, 8 + addi.d $t2, $a6, 8 + addi.d $t3, $a7, 8 + move $t4, $t5 .p2align 4, , 16 .LBB13_119: # %vector.body290 # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr2, $a0, 0 - vld $vr3, $a1, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 - vst $vr2, $a2, 0 + vld $vr2, $a1, 0 + vld $vr3, $a2, 0 + vreplgr2vr.d $vr4, $t8 + vfmadd.d $vr2, $vr3, $vr4, $vr2 + vst $vr2, $t0, 0 vld $vr2, $t1, 0 vld $vr3, $t2, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 + vfmadd.d $vr2, $vr3, $vr4, $vr2 vst $vr2, $t3, 0 - addi.d $a0, $a0, 16 addi.d $a1, $a1, 16 addi.d $a2, $a2, 16 + addi.d $t0, $t0, 16 addi.d $t1, $t1, 16 addi.d $t2, $t2, 16 - addi.d $t5, $t5, -2 + addi.d $t4, $t4, -2 addi.d $t3, $t3, 16 - bnez $t5, .LBB13_119 + bnez $t4, .LBB13_119 # %bb.120: # %middle.block298 # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload - ld.d $a1, $sp, 176 # 8-byte Folded Reload - bne $a1, $t6, .LBB13_68 + ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload + bne $a2, $t5, .LBB13_68 b .LBB13_70 .p2align 4, , 16 .LBB13_121: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a5 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a7, $a4 + bltu $a2, $fp, .LBB13_71 # %bb.122: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a3 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a4, $a0 + bltu $a2, $fp, .LBB13_71 # %bb.123: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a5, $a4 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a4, $a3 + bltu $a2, $fp, .LBB13_71 # %bb.124: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a6, $a5 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a5, $a4 + bltu $a2, $fp, .LBB13_71 # %bb.125: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $a7, $a5 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a6, $a4 + bltu $a2, $fp, .LBB13_71 # %bb.126: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a3 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a7, $a0 + bltu $a2, $fp, .LBB13_71 # %bb.127: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a4 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a7, $a3 + bltu $a2, $fp, .LBB13_71 # %bb.128: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a6 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a7, $a5 + bltu $a2, $fp, .LBB13_71 # %bb.129: # %vector.memcheck # in Loop: Header=BB13_4 Depth=1 - sub.d $a1, $t0, $a7 - bltu $a1, $fp, .LBB13_71 + sub.d $a2, $a7, $a6 + bltu $a2, $fp, .LBB13_71 # %bb.130: # %vector.body.preheader # in Loop: Header=BB13_4 Depth=1 - addi.d $a0, $a3, 8 - addi.d $a1, $a4, 8 - addi.d $a2, $a5, 8 - addi.d $t1, $a6, 8 - addi.d $t2, $a7, 8 - addi.d $t3, $t0, 8 - move $t5, $t6 + addi.d $a1, $a0, 8 + addi.d $a2, $a3, 8 + addi.d $t0, $a4, 8 + addi.d $t1, $a5, 8 + addi.d $t2, $a6, 8 + addi.d $t3, $a7, 8 + move $t4, $t5 .p2align 4, , 16 .LBB13_131: # %vector.body # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr2, $a0, 0 - vld $vr3, $a1, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 - vst $vr2, $a2, 0 + vld $vr2, $a1, 0 + vld $vr3, $a2, 0 + vreplgr2vr.d $vr4, $t8 + vfmadd.d $vr2, $vr3, $vr4, $vr2 + vst $vr2, $t0, 0 vld $vr2, $t1, 0 vld $vr3, $t2, 0 - vfmadd.d $vr2, $vr3, $vr0, $vr2 + vfmadd.d $vr2, $vr3, $vr4, $vr2 vst $vr2, $t3, 0 - addi.d $a0, $a0, 16 addi.d $a1, $a1, 16 addi.d $a2, $a2, 16 + addi.d $t0, $t0, 16 addi.d $t1, $t1, 16 addi.d $t2, $t2, 16 - addi.d $t5, $t5, -2 + addi.d $t4, $t4, -2 addi.d $t3, $t3, 16 - bnez $t5, .LBB13_131 + bnez $t4, .LBB13_131 # %bb.132: # %middle.block # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload - ld.d $a1, $sp, 176 # 8-byte Folded Reload - beq $a1, $t6, .LBB13_3 + ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload + beq $a2, $t5, .LBB13_3 b .LBB13_71 .LBB13_133: # %._crit_edge ld.d $a0, $sp, 16 # 8-byte Folded Reload - ld.d $s8, $sp, 216 # 8-byte Folded Reload - ld.d $s7, $sp, 224 # 8-byte Folded Reload - ld.d $s6, $sp, 232 # 8-byte Folded Reload - ld.d $s5, $sp, 240 # 8-byte Folded Reload - ld.d $s4, $sp, 248 # 8-byte Folded Reload - ld.d $s3, $sp, 256 # 8-byte Folded Reload - ld.d $s2, $sp, 264 # 8-byte Folded Reload - ld.d $s1, $sp, 272 # 8-byte Folded Reload - ld.d $s0, $sp, 280 # 8-byte Folded Reload - ld.d $fp, $sp, 288 # 8-byte Folded Reload - ld.d $ra, $sp, 296 # 8-byte Folded Reload - addi.d $sp, $sp, 304 + ld.d $s8, $sp, 232 # 8-byte Folded Reload + ld.d $s7, $sp, 240 # 8-byte Folded Reload + ld.d $s6, $sp, 248 # 8-byte Folded Reload + ld.d $s5, $sp, 256 # 8-byte Folded Reload + ld.d $s4, $sp, 264 # 8-byte Folded Reload + ld.d $s3, $sp, 272 # 8-byte Folded Reload + ld.d $s2, $sp, 280 # 8-byte Folded Reload + ld.d $s1, $sp, 288 # 8-byte Folded Reload + ld.d $s0, $sp, 296 # 8-byte Folded Reload + ld.d $fp, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 312 # 8-byte Folded Reload + addi.d $sp, $sp, 320 pcaddu18i $t8, %call36(_ZN9benchmark5State17FinishKeepRunningEv) jr $t8 .Lfunc_end13: @@ -3602,12 +3598,7 @@ _ZL20BM_GEN_LIN_RECUR_RAWRN9benchmark5StateE: # @_ZL20BM_GEN_LIN_RECUR_RAWRN9ben .size _ZL20BM_GEN_LIN_RECUR_RAWRN9benchmark5StateE, .Lfunc_end14-_ZL20BM_GEN_LIN_RECUR_RAWRN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL15BM_DISC_ORD_RAWRN9benchmark5StateE -.LCPI15_0: - .dword 0x3fc999999999999a # double 0.20000000000000001 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL15BM_DISC_ORD_RAWRN9benchmark5StateE .type _ZL15BM_DISC_ORD_RAWRN9benchmark5StateE,@function _ZL15BM_DISC_ORD_RAWRN9benchmark5StateE: # @_ZL15BM_DISC_ORD_RAWRN9benchmark5StateE .cfi_startproc @@ -3677,9 +3668,12 @@ _ZL15BM_DISC_ORD_RAWRN9benchmark5StateE: # @_ZL15BM_DISC_ORD_RAWRN9benchmark5Sta ld.d $t4, $a1, 0 blez $t4, .LBB15_10 # %bb.3: # %.preheader.us.preheader - pcalau12i $a1, %pc_hi20(.LCPI15_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI15_0) addi.d $a1, $s8, 8 + lu12i.w $a2, -419431 + ori $a2, $a2, 2458 + lu32i.d $a2, -419431 + lu52i.d $a2, $a2, 1020 + movgr2fr.d $fa0, $a2 movgr2fr.d $fa1, $zero b .LBB15_5 .p2align 4, , 16 @@ -3886,12 +3880,7 @@ _ZL16BM_MAT_X_MAT_RAWRN9benchmark5StateE: # @_ZL16BM_MAT_X_MAT_RAWRN9benchmark5S .size _ZL16BM_MAT_X_MAT_RAWRN9benchmark5StateE, .Lfunc_end16-_ZL16BM_MAT_X_MAT_RAWRN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL16BM_PLANCKIAN_RAWRN9benchmark5StateE -.LCPI17_0: - .dword 0x4033cccccccccccd # double 19.800000000000001 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL16BM_PLANCKIAN_RAWRN9benchmark5StateE .type _ZL16BM_PLANCKIAN_RAWRN9benchmark5StateE,@function _ZL16BM_PLANCKIAN_RAWRN9benchmark5StateE: # @_ZL16BM_PLANCKIAN_RAWRN9benchmark5StateE .cfi_startproc @@ -3930,30 +3919,34 @@ _ZL16BM_PLANCKIAN_RAWRN9benchmark5StateE: # @_ZL16BM_PLANCKIAN_RAWRN9benchmark5S pcaddu18i $ra, %call36(_Z8loopInitj) jirl $ra, $ra, 0 ld.d $a0, $s2, 32 + ld.d $a1, $s0, 8 + st.d $a1, $sp, 40 # 8-byte Folded Spill ld.d $a1, $s0, 32 ld.d $a0, $a0, 0 - ld.d $a2, $s0, 8 + ld.d $a2, $s0, 16 st.d $a2, $sp, 32 # 8-byte Folded Spill - st.d $a1, $sp, 40 # 8-byte Folded Spill + ld.d $a2, $s0, 24 + ld.d $a3, $s0, 40 + st.d $a3, $sp, 24 # 8-byte Folded Spill + move $fp, $a1 alsl.d $a1, $a0, $a1, 3 fld.d $fa0, $a1, -8 - pcalau12i $a1, %pc_hi20(.LCPI17_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI17_0) - ld.d $a1, $s0, 24 - ld.d $a2, $s0, 16 - st.d $a2, $sp, 24 # 8-byte Folded Spill - ld.d $s0, $s0, 40 + lu12i.w $a1, -209716 + ori $a1, $a1, 3277 + lu32i.d $a1, 249036 + lu52i.d $a1, $a1, 1027 + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa0, $fa1 - move $s1, $a1 - alsl.d $a0, $a0, $a1, 3 + move $s0, $a2 + alsl.d $a0, $a0, $a2, 3 fst.d $fa0, $a0, -8 - ld.w $fp, $s2, 28 + ld.w $s1, $s2, 28 ld.d $s5, $s2, 16 st.d $s2, $sp, 16 # 8-byte Folded Spill move $a0, $s2 pcaddu18i $ra, %call36(_ZN9benchmark5State16StartKeepRunningEv) jirl $ra, $ra, 0 - bnez $fp, .LBB17_6 + bnez $s1, .LBB17_6 # %bb.1: # %_ZN9benchmark5State13StateIteratorC2EPS0_.exit beqz $s5, .LBB17_6 # %bb.2: # %.preheader.lr.ph @@ -3966,29 +3959,29 @@ _ZL16BM_PLANCKIAN_RAWRN9benchmark5StateE: # @_ZL16BM_PLANCKIAN_RAWRN9benchmark5S # =>This Loop Header: Depth=1 # Child Loop BB17_4 Depth 2 move $s7, $s6 - move $s8, $s0 - ld.d $s2, $sp, 32 # 8-byte Folded Reload - ld.d $s4, $sp, 24 # 8-byte Folded Reload - ld.d $fp, $sp, 40 # 8-byte Folded Reload - move $s3, $s1 + ld.d $s8, $sp, 24 # 8-byte Folded Reload + ld.d $s1, $sp, 40 # 8-byte Folded Reload + ld.d $s3, $sp, 32 # 8-byte Folded Reload + move $s2, $fp + move $s4, $s0 .p2align 4, , 16 .LBB17_4: # Parent Loop BB17_3 Depth=1 # => This Inner Loop Header: Depth=2 - fld.d $fa0, $s3, 0 - fld.d $fa1, $fp, 0 + fld.d $fa0, $s4, 0 + fld.d $fa1, $s2, 0 fdiv.d $fa0, $fa0, $fa1 - fst.d $fa0, $s4, 0 - fld.d $fs0, $s2, 0 + fst.d $fa0, $s3, 0 + fld.d $fs0, $s1, 0 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 vldi $vr1, -784 fadd.d $fa0, $fa0, $fa1 fdiv.d $fa0, $fs0, $fa0 fst.d $fa0, $s8, 0 - addi.d $s3, $s3, 8 - addi.d $fp, $fp, 8 addi.d $s4, $s4, 8 addi.d $s2, $s2, 8 + addi.d $s3, $s3, 8 + addi.d $s1, $s1, 8 addi.d $s7, $s7, -1 addi.d $s8, $s8, 8 bnez $s7, .LBB17_4 @@ -4017,12 +4010,7 @@ _ZL16BM_PLANCKIAN_RAWRN9benchmark5StateE: # @_ZL16BM_PLANCKIAN_RAWRN9benchmark5S .size _ZL16BM_PLANCKIAN_RAWRN9benchmark5StateE, .Lfunc_end17-_ZL16BM_PLANCKIAN_RAWRN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL19BM_IMP_HYDRO_2D_RAWRN9benchmark5StateE -.LCPI18_0: - .dword 0x3fc6666666666666 # double 0.17499999999999999 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL19BM_IMP_HYDRO_2D_RAWRN9benchmark5StateE .type _ZL19BM_IMP_HYDRO_2D_RAWRN9benchmark5StateE,@function _ZL19BM_IMP_HYDRO_2D_RAWRN9benchmark5StateE: # @_ZL19BM_IMP_HYDRO_2D_RAWRN9benchmark5StateE .cfi_startproc @@ -4083,95 +4071,100 @@ _ZL19BM_IMP_HYDRO_2D_RAWRN9benchmark5StateE: # @_ZL19BM_IMP_HYDRO_2D_RAWRN9bench blt $a0, $a1, .LBB18_15 # %bb.3: # %.preheader63.us.preheader ld.d $t1, $s2, 16 - ld.d $s8, $s5, 8 + ld.d $a1, $s5, 8 + st.d $a1, $sp, 216 # 8-byte Folded Spill ld.d $a1, $s2, 0 - st.d $a1, $sp, 184 # 8-byte Folded Spill + st.d $a1, $sp, 208 # 8-byte Folded Spill ld.d $a1, $s1, 8 - st.d $a1, $sp, 160 # 8-byte Folded Spill + st.d $a1, $sp, 200 # 8-byte Folded Spill ld.d $a6, $s2, 8 - ld.d $t6, $s3, 8 - ld.d $s6, $s4, 8 + ld.d $a1, $s3, 8 + st.d $a1, $sp, 184 # 8-byte Folded Spill + ld.d $a1, $s4, 8 + st.d $a1, $sp, 144 # 8-byte Folded Spill ld.d $a7, $s2, 24 ld.d $a1, $s5, 16 - st.d $a1, $sp, 216 # 8-byte Folded Spill - ld.d $s7, $s1, 16 - ld.d $a5, $s3, 16 - ld.d $a4, $s4, 16 + st.d $a1, $sp, 192 # 8-byte Folded Spill + ld.d $a1, $s1, 16 + st.d $a1, $sp, 104 # 8-byte Folded Spill + ld.d $t6, $s3, 16 + ld.d $t5, $s4, 16 ld.d $t0, $s2, 32 - ld.d $a1, $s5, 24 - st.d $a1, $sp, 200 # 8-byte Folded Spill - ld.d $a1, $s1, 24 - st.d $a1, $sp, 80 # 8-byte Folded Spill - ld.d $t4, $s3, 24 - ld.d $t5, $s4, 24 + ld.d $a5, $s5, 24 + ld.d $s0, $s1, 24 + ld.d $s6, $s3, 24 + ld.d $s7, $s4, 24 ld.d $a3, $s2, 40 - ld.d $a1, $s5, 32 - st.d $a1, $sp, 192 # 8-byte Folded Spill - ld.d $t7, $s1, 32 - ld.d $t8, $s3, 32 - ld.d $s0, $s4, 32 + ld.d $a4, $s5, 32 + ld.d $ra, $s1, 32 + ld.d $s8, $s3, 32 + lu12i.w $t2, 419430 + ori $t2, $t2, 1638 + lu32i.d $t2, 419430 + lu52i.d $t2, $t2, 1020 + movgr2fr.d $fa0, $t2 + ld.d $t2, $s4, 32 ld.d $t3, $s2, 48 - ld.d $a1, $s5, 40 - st.d $a1, $sp, 208 # 8-byte Folded Spill - ld.d $s1, $s1, 40 - ld.d $t2, $s3, 40 - ld.d $a2, $s4, 40 + ld.d $t4, $s5, 40 + ld.d $t7, $s1, 40 + ld.d $t8, $s3, 40 + ld.d $s1, $s4, 40 ld.d $s2, $fp, 8 ld.d $s3, $fp, 16 ld.d $s4, $fp, 24 - ld.d $ra, $fp, 32 - ld.d $a1, $fp, 40 - addi.d $fp, $s8, 8 + ld.d $a1, $fp, 32 + ld.d $a2, $fp, 40 + ld.d $fp, $sp, 216 # 8-byte Folded Reload + addi.d $fp, $fp, 8 st.d $fp, $sp, 176 # 8-byte Folded Spill - ld.d $fp, $sp, 184 # 8-byte Folded Reload + ld.d $fp, $sp, 208 # 8-byte Folded Reload addi.d $fp, $fp, 8 st.d $fp, $sp, 168 # 8-byte Folded Spill - ld.d $fp, $sp, 160 # 8-byte Folded Reload + ld.d $fp, $sp, 200 # 8-byte Folded Reload addi.d $fp, $fp, 8 st.d $fp, $sp, 160 # 8-byte Folded Spill - addi.d $t6, $t6, 8 - st.d $t6, $sp, 152 # 8-byte Folded Spill - addi.d $t6, $s6, 8 - st.d $t6, $sp, 144 # 8-byte Folded Spill - addi.d $t6, $s2, 8 - st.d $t6, $sp, 136 # 8-byte Folded Spill - addi.d $t6, $s3, 8 - st.d $t6, $sp, 128 # 8-byte Folded Spill - addi.d $a4, $a4, 8 - st.d $a4, $sp, 120 # 8-byte Folded Spill - addi.d $a4, $a5, 8 - st.d $a4, $sp, 112 # 8-byte Folded Spill - addi.d $a4, $s7, 8 - st.d $a4, $sp, 104 # 8-byte Folded Spill - ld.d $a4, $sp, 216 # 8-byte Folded Reload - addi.d $a4, $a4, 8 - st.d $a4, $sp, 96 # 8-byte Folded Spill - addi.d $a4, $s4, 8 - st.d $a4, $sp, 88 # 8-byte Folded Spill - addi.d $s2, $t5, 8 - addi.d $s3, $t4, 8 - ld.d $a4, $sp, 80 # 8-byte Folded Reload - addi.d $s4, $a4, 8 - ld.d $a4, $sp, 200 # 8-byte Folded Reload - addi.d $s5, $a4, 8 - addi.d $s6, $ra, 8 - addi.d $s7, $s0, 8 - addi.d $s8, $t8, 8 - addi.d $ra, $t7, 8 - ld.d $a4, $sp, 192 # 8-byte Folded Reload + ld.d $fp, $sp, 184 # 8-byte Folded Reload + addi.d $fp, $fp, 8 + st.d $fp, $sp, 152 # 8-byte Folded Spill + ld.d $fp, $sp, 144 # 8-byte Folded Reload + addi.d $fp, $fp, 8 + st.d $fp, $sp, 144 # 8-byte Folded Spill + addi.d $fp, $s2, 8 + st.d $fp, $sp, 136 # 8-byte Folded Spill + addi.d $fp, $s3, 8 + st.d $fp, $sp, 128 # 8-byte Folded Spill + addi.d $t5, $t5, 8 + st.d $t5, $sp, 120 # 8-byte Folded Spill + addi.d $t5, $t6, 8 + st.d $t5, $sp, 112 # 8-byte Folded Spill + ld.d $t5, $sp, 104 # 8-byte Folded Reload + addi.d $t5, $t5, 8 + st.d $t5, $sp, 104 # 8-byte Folded Spill + ld.d $t5, $sp, 192 # 8-byte Folded Reload + addi.d $t5, $t5, 8 + st.d $t5, $sp, 96 # 8-byte Folded Spill + addi.d $t5, $s4, 8 + st.d $t5, $sp, 88 # 8-byte Folded Spill + addi.d $s2, $s7, 8 + addi.d $s3, $s6, 8 + addi.d $s4, $s0, 8 + addi.d $s5, $a5, 8 + addi.d $s6, $a1, 8 + addi.d $s7, $t2, 8 + addi.d $s8, $s8, 8 + addi.d $ra, $ra, 8 addi.d $fp, $a4, 8 - addi.d $a1, $a1, 8 - st.d $a1, $sp, 80 # 8-byte Folded Spill addi.d $a1, $a2, 8 + st.d $a1, $sp, 80 # 8-byte Folded Spill + addi.d $a1, $s1, 8 st.d $a1, $sp, 72 # 8-byte Folded Spill addi.d $a1, $t3, 8 st.d $a1, $sp, 64 # 8-byte Folded Spill - addi.d $a1, $t2, 8 + addi.d $a1, $t8, 8 st.d $a1, $sp, 56 # 8-byte Folded Spill - addi.d $a1, $s1, 8 + addi.d $a1, $t7, 8 st.d $a1, $sp, 48 # 8-byte Folded Spill - ld.d $a1, $sp, 208 # 8-byte Folded Reload - addi.d $a1, $a1, 8 + addi.d $a1, $t4, 8 st.d $a1, $sp, 40 # 8-byte Folded Spill addi.d $a1, $t1, 8 addi.d $a2, $a7, 8 @@ -4200,8 +4193,8 @@ _ZL19BM_IMP_HYDRO_2D_RAWRN9benchmark5StateE: # @_ZL19BM_IMP_HYDRO_2D_RAWRN9bench # Child Loop BB18_11 Depth 2 # Child Loop BB18_13 Depth 2 ld.d $a3, $sp, 208 # 8-byte Folded Reload - fld.d $fa1, $a3, 8 - fld.d $fa2, $a3, 0 + fld.d $fa2, $a3, 8 + fld.d $fa1, $a3, 0 move $a3, $zero ori $t1, $zero, 1 ld.d $t4, $sp, 176 # 8-byte Folded Reload @@ -4213,28 +4206,26 @@ _ZL19BM_IMP_HYDRO_2D_RAWRN9benchmark5StateE: # @_ZL19BM_IMP_HYDRO_2D_RAWRN9bench .p2align 4, , 16 .LBB18_5: # Parent Loop BB18_4 Depth=1 # => This Inner Loop Header: Depth=2 - fldx.d $fa0, $t5, $a3 - fldx.d $fa3, $t6, $a3 - fldx.d $fa4, $a1, $a3 - fldx.d $fa5, $t4, $a3 - fmul.d $fa0, $fa0, $fa3 - fldx.d $fa3, $a5, $a3 - fldx.d $fa6, $t7, $a3 - fldx.d $fa7, $t8, $a3 - fmadd.d $fa0, $fa4, $fa5, $fa0 + fldx.d $fa3, $a1, $a3 + fldx.d $fa4, $t4, $a3 + fldx.d $fa5, $t5, $a3 + fldx.d $fa6, $t6, $a3 + fldx.d $fa7, $a5, $a3 + fldx.d $ft0, $t7, $a3 + fmul.d $fa5, $fa5, $fa6 + fmadd.d $fa3, $fa3, $fa4, $fa5 + fmadd.d $fa3, $fa7, $ft0, $fa3 + fldx.d $fa4, $t8, $a3 + fldx.d $fa5, $s1, $a3 addi.d $t1, $t1, 1 - fmadd.d $fa0, $fa3, $fa6, $fa0 - fmadd.d $fa2, $fa2, $fa7, $fa0 - fldx.d $fa4, $s1, $a3 - pcalau12i $t3, %pc_hi20(.LCPI18_0) - fld.d $fa0, $t3, %pc_lo12(.LCPI18_0) add.d $t3, $a5, $a3 - fadd.d $fa2, $fa2, $fa4 - fsub.d $fa2, $fa2, $fa1 - fmadd.d $fa2, $fa2, $fa0, $fa1 - fst.d $fa2, $t3, -8 + fmadd.d $fa1, $fa1, $fa4, $fa3 + fadd.d $fa1, $fa1, $fa5 + fsub.d $fa1, $fa1, $fa2 + fmadd.d $fa1, $fa1, $fa0, $fa2 + fst.d $fa1, $t3, -8 addi.d $a3, $a3, 8 - fmov.d $fa1, $fa3 + fmov.d $fa2, $fa7 bne $a0, $t1, .LBB18_5 # %bb.6: # %._crit_edge.us.us # in Loop: Header=BB18_4 Depth=1 diff --git a/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSStats.s b/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSStats.s index 65ccd92d..95272aa5 100644 --- a/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSStats.s +++ b/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSStats.s @@ -868,12 +868,7 @@ _Z19getLoopSuiteRunInfov: # @_Z19getLoopSuiteRunInfov .Lfunc_end1: .size _Z19getLoopSuiteRunInfov, .Lfunc_end1-_Z19getLoopSuiteRunInfov # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm -.LCPI2_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 - .text - .globl _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm + .globl _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm # -- Begin function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm .p2align 5 .type _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm,@function _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm: # @_Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEjjjPbm @@ -1018,10 +1013,13 @@ _Z24allocateLoopSuiteRunInfoRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcE st.d $a0, $s4, 560 beqz $a1, .LBB2_15 # %bb.13: # %.lr.ph18.preheader - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI2_0) move $fp, $zero move $s0, $zero + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_14: # %.lr.ph18 # =>This Inner Loop Header: Depth=1 diff --git a/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSSuite.s b/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSSuite.s index a1e8c653..e35fa2d7 100644 --- a/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSSuite.s +++ b/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/LCALSSuite.s @@ -23,33 +23,21 @@ _Z11getLoopDatav: # @_Z11getLoopDatav .LCPI1_1: .dword 0x3ff6666666666666 # double 1.3999999999999999 .dword 0x3ff0000000000000 # double 1 -.LCPI1_5: +.LCPI1_2: .dword 8 # 0x8 .dword 4923084613239392580 # 0x44524f5f43534944 -.LCPI1_6: +.LCPI1_3: .dword 8 # 0x8 .dword 4914094937701898568 # 0x44325f4f52445948 -.LCPI1_7: +.LCPI1_4: .dword 8 # 0x8 .dword 4913813462725187912 # 0x44315f4f52445948 -.LCPI1_8: +.LCPI1_5: .dword 8 # 0x8 .dword 6074873621086556756 # 0x544e495f50415254 -.LCPI1_11: +.LCPI1_6: .dword 8 # 0x8 .dword 5786931235628926290 # 0x504f4f4c5f464552 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI1_2: - .dword 0x40e5972000000000 # double 44217 -.LCPI1_3: - .dword 0x40b3890000000000 # double 5001 -.LCPI1_4: - .dword 0x4065600000000000 # double 171 -.LCPI1_9: - .dword 0x4063800000000000 # double 156 -.LCPI1_10: - .dword 0x4050000000000000 # double 64 .text .globl _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd .p2align 5 @@ -640,21 +628,27 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define .Ltmp34: # EH_LABEL # %bb.92: move $s8, $a0 - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_2) - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_3) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 366368 + lu52i.d $a1, $a1, 1038 + movgr2fr.d $fa0, $a1 fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a0, $fa0 - fmul.d $fa0, $fs0, $fa1 + movfr2gr.s $a1, $fa0 + st.w $a1, $s8, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 231680 + lu52i.d $a1, $a1, 1035 + movgr2fr.d $fa0, $a1 + fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI1_4) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_4) movfr2gr.s $a1, $fa0 - st.w $a0, $s8, 0 st.w $a1, $s8, 4 - fmul.d $fa0, $fs0, $fa1 + lu32i.d $a0, 352256 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa0, $a0 + fmul.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 ld.d $s7, $sp, 96 # 8-byte Folded Reload ld.w $a0, $s7, 32 @@ -732,8 +726,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define jr $a0 .LBB1_99: # %._crit_edge.i.i352 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_11) - vld $vr0, $a0, %pc_lo12(.LCPI1_11) + pcalau12i $a0, %pc_hi20(.LCPI1_6) + vld $vr0, $a0, %pc_lo12(.LCPI1_6) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -936,8 +930,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_133 .LBB1_112: # %._crit_edge.i.i732 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_8) - vld $vr0, $a0, %pc_lo12(.LCPI1_8) + pcalau12i $a0, %pc_hi20(.LCPI1_5) + vld $vr0, $a0, %pc_lo12(.LCPI1_5) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -1687,8 +1681,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_158: # %._crit_edge.i.i748 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_7) - vld $vr0, $a0, %pc_lo12(.LCPI1_7) + pcalau12i $a0, %pc_hi20(.LCPI1_4) + vld $vr0, $a0, %pc_lo12(.LCPI1_4) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -1761,16 +1755,18 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define # in Loop: Header=BB1_95 Depth=1 ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.d $a0, $a0, 0 - ld.d $a1, $sp, 8 # 8-byte Folded Reload - fld.d $fa0, $a1, %pc_lo12(_ZN7ADomain18loop_length_factorE) - pcalau12i $a1, %pc_hi20(.LCPI1_9) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_9) - fld.d $fa2, $a0, 0 - fmul.d $fa1, $fa0, $fa1 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + fld.d $fa1, $a0, 0 + ld.d $a0, $sp, 8 # 8-byte Folded Reload + fld.d $fa0, $a0, %pc_lo12(_ZN7ADomain18loop_length_factorE) + ori $a0, $zero, 0 + lu32i.d $a0, 229376 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa2, $a0 + fmul.d $fa2, $fa0, $fa2 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $a0, $fa2 ori $a2, $zero, 2 - fst.d $fa2, $sp, 168 + fst.d $fa1, $sp, 168 blt $a0, $a2, .LBB1_211 # %bb.164: # %.lr.ph72.us.i.preheader # in Loop: Header=BB1_95 Depth=1 @@ -2202,8 +2198,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_196: # %._crit_edge.i.i988 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_5) - vld $vr0, $a0, %pc_lo12(.LCPI1_5) + pcalau12i $a0, %pc_hi20(.LCPI1_2) + vld $vr0, $a0, %pc_lo12(.LCPI1_2) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -2297,8 +2293,8 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define b .LBB1_205 .LBB1_202: # %._crit_edge.i.i956 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_6) - vld $vr0, $a0, %pc_lo12(.LCPI1_6) + pcalau12i $a0, %pc_hi20(.LCPI1_3) + vld $vr0, $a0, %pc_lo12(.LCPI1_3) vst $vr0, $sp, 136 ld.d $a0, $sp, 440 ld.d $a1, $sp, 144 @@ -2382,9 +2378,9 @@ _Z22defineLoopSuiteRunInfoRKSt6vectorI13LoopVariantIDSaIS0_EEPbdd: # @_Z22define move $a3, $zero .LBB1_212: # %_ZN7ADomainC2Eii.exit527 # in Loop: Header=BB1_95 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI1_10) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_10) ld.d $a1, $sp, 368 + lu52i.d $a4, $zero, 1029 + movgr2fr.d $fa1, $a4 fmul.d $fa1, $fa0, $fa1 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a4, $fa1 @@ -4488,15 +4484,9 @@ GCC_except_table7: .LCPI8_0: .dword 0x3fb999999999999a # double 0.10000000000000001 .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI8_3: +.LCPI8_1: .dword 0x3fc999999999999a # double 0.20000000000000001 .dword 0x3fd3333333333333 # double 0.29999999999999999 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI8_1: - .dword 0x3ff199999999999a # double 1.1000000000000001 -.LCPI8_2: - .dword 0x3ff1f9a6b50b0f28 # double 1.1234500000000001 .text .globl _Z8loopInitjR8LoopStat .p2align 5 @@ -4747,8 +4737,8 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat .LBB8_40: pcalau12i $a0, %pc_hi20(.LCPI8_0) addi.d $a0, $a0, %pc_lo12(.LCPI8_0) - pcalau12i $a1, %pc_hi20(.LCPI8_3) - addi.d $a1, $a1, %pc_lo12(.LCPI8_3) + pcalau12i $a1, %pc_hi20(.LCPI8_1) + addi.d $a1, $a1, %pc_lo12(.LCPI8_1) ld.w $a3, $s1, 1032 blez $a3, .LBB8_577 # %bb.41: # %.lr.ph.preheader.i430 @@ -4962,8 +4952,10 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat pcalau12i $a2, %pc_hi20(.LCPI8_0) addi.d $a2, $a2, %pc_lo12(.LCPI8_0) fldx.d $fa0, $a2, $a0 - ld.d $a2, $s1, 472 + ld.d $a3, $s1, 472 ori $a0, $zero, 4 + lu12i.w $a4, -419431 + lu12i.w $a2, -307024 bgeu $a1, $a0, .LBB8_920 # %bb.78: move $a0, $zero @@ -5152,12 +5144,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.102: # %middle.block3803 beq $a1, $a2, .LBB8_105 .LBB8_103: # %.lr.ph.i184.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_104: # %.lr.ph.i184 # =>This Inner Loop Header: Depth=1 @@ -5246,12 +5244,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.110: # %middle.block3819 beq $a1, $a2, .LBB8_113 .LBB8_111: # %.lr.ph.i192.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_112: # %.lr.ph.i192 # =>This Inner Loop Header: Depth=1 @@ -5340,12 +5344,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.118: # %middle.block3835 beq $a1, $a2, .LBB8_121 .LBB8_119: # %.lr.ph.i200.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_120: # %.lr.ph.i200 # =>This Inner Loop Header: Depth=1 @@ -5434,12 +5444,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.126: # %middle.block3851 beq $a1, $a2, .LBB8_129 .LBB8_127: # %.lr.ph.i208.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_128: # %.lr.ph.i208 # =>This Inner Loop Header: Depth=1 @@ -5528,12 +5544,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.134: # %middle.block3867 beq $a1, $a2, .LBB8_137 .LBB8_135: # %.lr.ph.i216.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_136: # %.lr.ph.i216 # =>This Inner Loop Header: Depth=1 @@ -5622,12 +5644,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.142: # %middle.block3883 beq $a0, $a1, .LBB8_1187 .LBB8_143: # %.lr.ph.i224.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_144: # %.lr.ph.i224 # =>This Inner Loop Header: Depth=1 @@ -5702,12 +5730,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.147: # %middle.block3547 beq $a1, $a2, .LBB8_150 .LBB8_148: # %.lr.ph.i232.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_149: # %.lr.ph.i232 # =>This Inner Loop Header: Depth=1 @@ -5796,12 +5830,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.155: # %middle.block3563 beq $a1, $a2, .LBB8_158 .LBB8_156: # %.lr.ph.i240.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_157: # %.lr.ph.i240 # =>This Inner Loop Header: Depth=1 @@ -5890,12 +5930,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.163: # %middle.block3579 beq $a1, $a2, .LBB8_166 .LBB8_164: # %.lr.ph.i248.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_165: # %.lr.ph.i248 # =>This Inner Loop Header: Depth=1 @@ -5984,12 +6030,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.171: # %middle.block3595 beq $a1, $a2, .LBB8_174 .LBB8_172: # %.lr.ph.i256.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_173: # %.lr.ph.i256 # =>This Inner Loop Header: Depth=1 @@ -6078,12 +6130,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.179: # %middle.block3611 beq $a1, $a2, .LBB8_182 .LBB8_180: # %.lr.ph.i264.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_181: # %.lr.ph.i264 # =>This Inner Loop Header: Depth=1 @@ -6172,12 +6230,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.187: # %middle.block3627 beq $a1, $a2, .LBB8_190 .LBB8_188: # %.lr.ph.i272.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_189: # %.lr.ph.i272 # =>This Inner Loop Header: Depth=1 @@ -6266,12 +6330,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.195: # %middle.block3643 beq $a1, $a2, .LBB8_198 .LBB8_196: # %.lr.ph.i280.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_197: # %.lr.ph.i280 # =>This Inner Loop Header: Depth=1 @@ -6360,12 +6430,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.203: # %middle.block3659 beq $a1, $a2, .LBB8_206 .LBB8_204: # %.lr.ph.i288.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_205: # %.lr.ph.i288 # =>This Inner Loop Header: Depth=1 @@ -6454,12 +6530,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.211: # %middle.block3675 beq $a1, $a2, .LBB8_214 .LBB8_212: # %.lr.ph.i296.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_213: # %.lr.ph.i296 # =>This Inner Loop Header: Depth=1 @@ -6548,12 +6630,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.219: # %middle.block3691 beq $a1, $a2, .LBB8_222 .LBB8_220: # %.lr.ph.i304.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_221: # %.lr.ph.i304 # =>This Inner Loop Header: Depth=1 @@ -6642,12 +6730,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.227: # %middle.block3707 beq $a1, $a2, .LBB8_230 .LBB8_228: # %.lr.ph.i312.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_229: # %.lr.ph.i312 # =>This Inner Loop Header: Depth=1 @@ -6736,12 +6830,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.235: # %middle.block3723 beq $a1, $a2, .LBB8_238 .LBB8_236: # %.lr.ph.i320.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_237: # %.lr.ph.i320 # =>This Inner Loop Header: Depth=1 @@ -6830,12 +6930,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.243: # %middle.block3739 beq $a1, $a2, .LBB8_246 .LBB8_244: # %.lr.ph.i328.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_245: # %.lr.ph.i328 # =>This Inner Loop Header: Depth=1 @@ -6924,12 +7030,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.251: # %middle.block3755 beq $a1, $a2, .LBB8_254 .LBB8_252: # %.lr.ph.i336.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_253: # %.lr.ph.i336 # =>This Inner Loop Header: Depth=1 @@ -7018,12 +7130,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.259: # %middle.block3771 beq $a1, $a2, .LBB8_262 .LBB8_260: # %.lr.ph.i344.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_261: # %.lr.ph.i344 # =>This Inner Loop Header: Depth=1 @@ -7112,12 +7230,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.267: # %middle.block3787 beq $a0, $a1, .LBB8_1187 .LBB8_268: # %.lr.ph.i352.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_269: # %.lr.ph.i352 # =>This Inner Loop Header: Depth=1 @@ -7215,12 +7339,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.274: # %middle.block2006 beq $a1, $a2, .LBB8_277 .LBB8_275: # %.lr.ph.i1118.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_276: # %.lr.ph.i1118 # =>This Inner Loop Header: Depth=1 @@ -7309,12 +7439,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.282: # %middle.block2022 beq $a1, $a2, .LBB8_285 .LBB8_283: # %.lr.ph.i1126.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_284: # %.lr.ph.i1126 # =>This Inner Loop Header: Depth=1 @@ -7403,12 +7539,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.290: # %middle.block2038 beq $a1, $a2, .LBB8_293 .LBB8_291: # %.lr.ph.i1134.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_292: # %.lr.ph.i1134 # =>This Inner Loop Header: Depth=1 @@ -7497,12 +7639,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.298: # %middle.block2054 beq $a0, $a1, .LBB8_1187 .LBB8_299: # %.lr.ph.i1142.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_300: # %.lr.ph.i1142 # =>This Inner Loop Header: Depth=1 @@ -7577,12 +7725,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.303: # %middle.block2870 beq $a1, $a2, .LBB8_306 .LBB8_304: # %.lr.ph.i692.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_305: # %.lr.ph.i692 # =>This Inner Loop Header: Depth=1 @@ -7671,12 +7825,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.311: # %middle.block2886 beq $a0, $a1, .LBB8_1187 .LBB8_312: # %.lr.ph.i700.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_313: # %.lr.ph.i700 # =>This Inner Loop Header: Depth=1 @@ -7751,12 +7911,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.316: # %middle.block2934 beq $a1, $a2, .LBB8_319 .LBB8_317: # %.lr.ph.i660.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_318: # %.lr.ph.i660 # =>This Inner Loop Header: Depth=1 @@ -7845,12 +8011,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.324: # %middle.block2950 beq $a0, $a1, .LBB8_1187 .LBB8_325: # %.lr.ph.i668.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_326: # %.lr.ph.i668 # =>This Inner Loop Header: Depth=1 @@ -7925,12 +8097,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.329: # %middle.block3046 beq $a1, $a2, .LBB8_332 .LBB8_330: # %.lr.ph.i579.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_331: # %.lr.ph.i579 # =>This Inner Loop Header: Depth=1 @@ -8019,13 +8197,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.337: # %middle.block3062 beq $a1, $a2, .LBB8_340 .LBB8_338: # %.lr.ph.i587.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 - .p2align 4, , 16 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 + .p2align 4, , 16 .LBB8_339: # %.lr.ph.i587 # =>This Inner Loop Header: Depth=1 bstrpick.d $a4, $a1, 31, 0 @@ -8113,12 +8297,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.345: # %middle.block3078 beq $a1, $a2, .LBB8_348 .LBB8_346: # %.lr.ph.i595.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_347: # %.lr.ph.i595 # =>This Inner Loop Header: Depth=1 @@ -8207,12 +8397,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.353: # %middle.block3094 beq $a1, $a2, .LBB8_356 .LBB8_354: # %.lr.ph.i603.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_355: # %.lr.ph.i603 # =>This Inner Loop Header: Depth=1 @@ -8301,12 +8497,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.361: # %middle.block3110 beq $a0, $a1, .LBB8_1187 .LBB8_362: # %.lr.ph.i611.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_363: # %.lr.ph.i611 # =>This Inner Loop Header: Depth=1 @@ -8381,12 +8583,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.366: # %middle.block3030 beq $a0, $a1, .LBB8_1187 .LBB8_367: # %.lr.ph.i620.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_368: # %.lr.ph.i620 # =>This Inner Loop Header: Depth=1 @@ -8461,12 +8669,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.371: # %middle.block3899 beq $a1, $a2, .LBB8_374 .LBB8_372: # %.lr.ph.i.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_373: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 @@ -8555,12 +8769,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.379: # %middle.block3915 beq $a1, $a2, .LBB8_382 .LBB8_380: # %.lr.ph.i168.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_381: # %.lr.ph.i168 # =>This Inner Loop Header: Depth=1 @@ -8649,12 +8869,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.387: # %middle.block3931 beq $a0, $a1, .LBB8_1187 .LBB8_388: # %.lr.ph.i176.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_389: # %.lr.ph.i176 # =>This Inner Loop Header: Depth=1 @@ -8729,12 +8955,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.392: # %middle.block3206 beq $a1, $a2, .LBB8_395 .LBB8_393: # %.lr.ph.i499.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_394: # %.lr.ph.i499 # =>This Inner Loop Header: Depth=1 @@ -8823,12 +9055,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.400: # %middle.block3222 beq $a1, $a2, .LBB8_403 .LBB8_401: # %.lr.ph.i507.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_402: # %.lr.ph.i507 # =>This Inner Loop Header: Depth=1 @@ -8917,12 +9155,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.408: # %middle.block3238 beq $a1, $a2, .LBB8_411 .LBB8_409: # %.lr.ph.i515.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_410: # %.lr.ph.i515 # =>This Inner Loop Header: Depth=1 @@ -9011,12 +9255,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.416: # %middle.block3254 beq $a1, $a2, .LBB8_419 .LBB8_417: # %.lr.ph.i523.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_418: # %.lr.ph.i523 # =>This Inner Loop Header: Depth=1 @@ -9105,12 +9355,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.424: # %middle.block3270 beq $a0, $a1, .LBB8_1187 .LBB8_425: # %.lr.ph.i531.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_426: # %.lr.ph.i531 # =>This Inner Loop Header: Depth=1 @@ -9185,12 +9441,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.429: # %middle.block2246 beq $a1, $a2, .LBB8_432 .LBB8_430: # %.lr.ph.i944.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_431: # %.lr.ph.i944 # =>This Inner Loop Header: Depth=1 @@ -9279,12 +9541,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.437: # %middle.block2262 beq $a1, $a2, .LBB8_440 .LBB8_438: # %.lr.ph.i952.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_439: # %.lr.ph.i952 # =>This Inner Loop Header: Depth=1 @@ -9373,12 +9641,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.445: # %middle.block2278 beq $a1, $a2, .LBB8_448 .LBB8_446: # %.lr.ph.i960.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_447: # %.lr.ph.i960 # =>This Inner Loop Header: Depth=1 @@ -9467,12 +9741,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.453: # %middle.block2294 beq $a1, $a2, .LBB8_456 .LBB8_454: # %.lr.ph.i968.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_455: # %.lr.ph.i968 # =>This Inner Loop Header: Depth=1 @@ -9561,12 +9841,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.461: # %middle.block2310 beq $a1, $a2, .LBB8_464 .LBB8_462: # %.lr.ph.i976.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_463: # %.lr.ph.i976 # =>This Inner Loop Header: Depth=1 @@ -9655,12 +9941,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.469: # %middle.block2326 beq $a1, $a2, .LBB8_472 .LBB8_470: # %.lr.ph.i984.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_471: # %.lr.ph.i984 # =>This Inner Loop Header: Depth=1 @@ -9749,12 +10041,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.477: # %middle.block2342 beq $a1, $a2, .LBB8_480 .LBB8_478: # %.lr.ph.i992.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_479: # %.lr.ph.i992 # =>This Inner Loop Header: Depth=1 @@ -9843,12 +10141,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.485: # %middle.block2358 beq $a1, $a2, .LBB8_488 .LBB8_486: # %.lr.ph.i1000.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_487: # %.lr.ph.i1000 # =>This Inner Loop Header: Depth=1 @@ -9937,12 +10241,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.493: # %middle.block2374 beq $a1, $a2, .LBB8_496 .LBB8_494: # %.lr.ph.i1008.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_495: # %.lr.ph.i1008 # =>This Inner Loop Header: Depth=1 @@ -10031,12 +10341,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.501: # %middle.block2390 beq $a0, $a1, .LBB8_504 .LBB8_502: # %.lr.ph.i1016.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_503: # %.lr.ph.i1016 # =>This Inner Loop Header: Depth=1 @@ -10142,12 +10458,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.512: # %middle.block2406 beq $a0, $a1, .LBB8_515 .LBB8_513: # %.lr.ph.i892.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_514: # %.lr.ph.i892 # =>This Inner Loop Header: Depth=1 @@ -10236,12 +10558,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.520: # %middle.block2422 beq $a0, $a1, .LBB8_523 .LBB8_521: # %.lr.ph.i900.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_522: # %.lr.ph.i900 # =>This Inner Loop Header: Depth=1 @@ -10330,12 +10658,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.528: # %middle.block2438 beq $a0, $a1, .LBB8_531 .LBB8_529: # %.lr.ph.i908.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_530: # %.lr.ph.i908 # =>This Inner Loop Header: Depth=1 @@ -10424,12 +10758,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.536: # %middle.block2454 beq $a0, $a1, .LBB8_539 .LBB8_537: # %.lr.ph.i916.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_538: # %.lr.ph.i916 # =>This Inner Loop Header: Depth=1 @@ -10518,12 +10858,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.544: # %middle.block2470 beq $a0, $a1, .LBB8_547 .LBB8_545: # %.lr.ph.i924.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_546: # %.lr.ph.i924 # =>This Inner Loop Header: Depth=1 @@ -10630,12 +10976,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.556: # %middle.block2486 beq $a0, $a1, .LBB8_1187 .LBB8_557: # %.lr.ph.i936.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_558: # %.lr.ph.i936 # =>This Inner Loop Header: Depth=1 @@ -10710,12 +11062,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.561: # %middle.block2902 beq $a1, $a2, .LBB8_564 .LBB8_562: # %.lr.ph.i676.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_563: # %.lr.ph.i676 # =>This Inner Loop Header: Depth=1 @@ -10804,12 +11162,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.569: # %middle.block2918 beq $a0, $a1, .LBB8_1187 .LBB8_570: # %.lr.ph.i684.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_571: # %.lr.ph.i684 # =>This Inner Loop Header: Depth=1 @@ -10875,13 +11239,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.574: # %middle.block3319 beq $a2, $a3, .LBB8_577 .LBB8_575: # %.lr.ph.i432.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_576: # %.lr.ph.i432 # =>This Inner Loop Header: Depth=1 @@ -10965,13 +11335,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.582: # %middle.block3336 beq $a2, $a3, .LBB8_585 .LBB8_583: # %.lr.ph.i441.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_584: # %.lr.ph.i441 # =>This Inner Loop Header: Depth=1 @@ -11055,13 +11431,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.590: # %middle.block3353 beq $a2, $a3, .LBB8_593 .LBB8_591: # %.lr.ph.i452.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_592: # %.lr.ph.i452 # =>This Inner Loop Header: Depth=1 @@ -11145,13 +11527,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.598: # %middle.block3370 beq $a2, $a3, .LBB8_601 .LBB8_599: # %.lr.ph.i463.preheader - pcalau12i $a5, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI8_1) - pcalau12i $a5, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI8_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB8_600: # %.lr.ph.i463 # =>This Inner Loop Header: Depth=1 @@ -11235,13 +11623,19 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.606: # %middle.block3387 beq $a0, $a2, .LBB8_1187 .LBB8_607: # %.lr.ph.i474.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa3, $a3, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a0 alsl.d $a1, $a0, $a1, 4 addi.d $a1, $a1, 8 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa3, $a3 .p2align 4, , 16 .LBB8_608: # %.lr.ph.i474 # =>This Inner Loop Header: Depth=1 @@ -11319,12 +11713,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.611: # %middle.block1622 beq $a1, $a2, .LBB8_614 .LBB8_612: # %.lr.ph.i1294.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_613: # %.lr.ph.i1294 # =>This Inner Loop Header: Depth=1 @@ -11413,12 +11813,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.619: # %middle.block1638 beq $a1, $a2, .LBB8_622 .LBB8_620: # %.lr.ph.i1302.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_621: # %.lr.ph.i1302 # =>This Inner Loop Header: Depth=1 @@ -11507,12 +11913,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.627: # %middle.block1654 beq $a1, $a2, .LBB8_630 .LBB8_628: # %.lr.ph.i1310.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_629: # %.lr.ph.i1310 # =>This Inner Loop Header: Depth=1 @@ -11601,12 +12013,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.635: # %middle.block1670 beq $a1, $a2, .LBB8_638 .LBB8_636: # %.lr.ph.i1318.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_637: # %.lr.ph.i1318 # =>This Inner Loop Header: Depth=1 @@ -11695,12 +12113,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.643: # %middle.block1686 beq $a1, $a2, .LBB8_646 .LBB8_644: # %.lr.ph.i1326.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_645: # %.lr.ph.i1326 # =>This Inner Loop Header: Depth=1 @@ -11789,12 +12213,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.651: # %middle.block1702 beq $a0, $a1, .LBB8_1187 .LBB8_652: # %.lr.ph.i1334.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_653: # %.lr.ph.i1334 # =>This Inner Loop Header: Depth=1 @@ -11869,12 +12299,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.656: # %middle.block2822 beq $a1, $a2, .LBB8_659 .LBB8_657: # %.lr.ph.i708.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_658: # %.lr.ph.i708 # =>This Inner Loop Header: Depth=1 @@ -11963,12 +12399,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.664: # %middle.block2838 beq $a1, $a2, .LBB8_667 .LBB8_665: # %.lr.ph.i716.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_666: # %.lr.ph.i716 # =>This Inner Loop Header: Depth=1 @@ -12057,12 +12499,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.672: # %middle.block2854 beq $a0, $a1, .LBB8_1187 .LBB8_673: # %.lr.ph.i724.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_674: # %.lr.ph.i724 # =>This Inner Loop Header: Depth=1 @@ -12137,12 +12585,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.677: # %middle.block2598 beq $a1, $a2, .LBB8_680 .LBB8_678: # %.lr.ph.i828.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_679: # %.lr.ph.i828 # =>This Inner Loop Header: Depth=1 @@ -12231,12 +12685,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.685: # %middle.block2614 beq $a0, $a1, .LBB8_1187 .LBB8_686: # %.lr.ph.i836.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_687: # %.lr.ph.i836 # =>This Inner Loop Header: Depth=1 @@ -12311,12 +12771,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.690: # %middle.block3483 beq $a1, $a2, .LBB8_693 .LBB8_691: # %.lr.ph.i360.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_692: # %.lr.ph.i360 # =>This Inner Loop Header: Depth=1 @@ -12405,12 +12871,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.698: # %middle.block3499 beq $a1, $a2, .LBB8_701 .LBB8_699: # %.lr.ph.i368.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_700: # %.lr.ph.i368 # =>This Inner Loop Header: Depth=1 @@ -12499,12 +12971,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.706: # %middle.block3515 beq $a1, $a2, .LBB8_709 .LBB8_707: # %.lr.ph.i376.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_708: # %.lr.ph.i376 # =>This Inner Loop Header: Depth=1 @@ -12593,12 +13071,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.714: # %middle.block3531 beq $a0, $a1, .LBB8_1187 .LBB8_715: # %.lr.ph.i384.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_716: # %.lr.ph.i384 # =>This Inner Loop Header: Depth=1 @@ -12673,12 +13157,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.719: # %middle.block2566 beq $a1, $a2, .LBB8_722 .LBB8_720: # %.lr.ph.i844.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_721: # %.lr.ph.i844 # =>This Inner Loop Header: Depth=1 @@ -12767,12 +13257,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.727: # %middle.block2582 beq $a0, $a1, .LBB8_1187 .LBB8_728: # %.lr.ph.i852.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_729: # %.lr.ph.i852 # =>This Inner Loop Header: Depth=1 @@ -12847,12 +13343,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.732: # %middle.block2966 beq $a1, $a2, .LBB8_735 .LBB8_733: # %.lr.ph.i628.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_734: # %.lr.ph.i628 # =>This Inner Loop Header: Depth=1 @@ -12941,12 +13443,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.740: # %middle.block2982 beq $a1, $a2, .LBB8_743 .LBB8_741: # %.lr.ph.i636.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_742: # %.lr.ph.i636 # =>This Inner Loop Header: Depth=1 @@ -13035,12 +13543,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.748: # %middle.block2998 beq $a1, $a2, .LBB8_751 .LBB8_749: # %.lr.ph.i644.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_750: # %.lr.ph.i644 # =>This Inner Loop Header: Depth=1 @@ -13129,12 +13643,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.756: # %middle.block3014 beq $a0, $a1, .LBB8_1187 .LBB8_757: # %.lr.ph.i652.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_758: # %.lr.ph.i652 # =>This Inner Loop Header: Depth=1 @@ -13209,12 +13729,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.761: # %middle.block3403 beq $a1, $a2, .LBB8_764 .LBB8_762: # %.lr.ph.i392.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_763: # %.lr.ph.i392 # =>This Inner Loop Header: Depth=1 @@ -13303,12 +13829,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.769: # %middle.block3419 beq $a1, $a2, .LBB8_772 .LBB8_770: # %.lr.ph.i400.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_771: # %.lr.ph.i400 # =>This Inner Loop Header: Depth=1 @@ -13397,12 +13929,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.777: # %middle.block3435 beq $a1, $a2, .LBB8_780 .LBB8_778: # %.lr.ph.i408.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_779: # %.lr.ph.i408 # =>This Inner Loop Header: Depth=1 @@ -13491,12 +14029,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.785: # %middle.block3451 beq $a1, $a2, .LBB8_788 .LBB8_786: # %.lr.ph.i416.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_787: # %.lr.ph.i416 # =>This Inner Loop Header: Depth=1 @@ -13585,12 +14129,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.793: # %middle.block3467 beq $a0, $a1, .LBB8_1187 .LBB8_794: # %.lr.ph.i424.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_795: # %.lr.ph.i424 # =>This Inner Loop Header: Depth=1 @@ -13665,12 +14215,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.798: # %middle.block2742 beq $a1, $a2, .LBB8_801 .LBB8_799: # %.lr.ph.i732.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_800: # %.lr.ph.i732 # =>This Inner Loop Header: Depth=1 @@ -13759,12 +14315,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.806: # %middle.block2758 beq $a1, $a2, .LBB8_809 .LBB8_807: # %.lr.ph.i740.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_808: # %.lr.ph.i740 # =>This Inner Loop Header: Depth=1 @@ -13853,12 +14415,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.814: # %middle.block2774 beq $a1, $a2, .LBB8_817 .LBB8_815: # %.lr.ph.i748.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_816: # %.lr.ph.i748 # =>This Inner Loop Header: Depth=1 @@ -13947,12 +14515,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.822: # %middle.block2790 beq $a1, $a2, .LBB8_825 .LBB8_823: # %.lr.ph.i756.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_824: # %.lr.ph.i756 # =>This Inner Loop Header: Depth=1 @@ -14041,12 +14615,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.830: # %middle.block2806 beq $a0, $a1, .LBB8_1187 .LBB8_831: # %.lr.ph.i764.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_832: # %.lr.ph.i764 # =>This Inner Loop Header: Depth=1 @@ -14121,12 +14701,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.835: # %middle.block3126 beq $a1, $a2, .LBB8_838 .LBB8_836: # %.lr.ph.i539.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_837: # %.lr.ph.i539 # =>This Inner Loop Header: Depth=1 @@ -14215,12 +14801,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.843: # %middle.block3142 beq $a1, $a2, .LBB8_846 .LBB8_844: # %.lr.ph.i547.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_845: # %.lr.ph.i547 # =>This Inner Loop Header: Depth=1 @@ -14309,12 +14901,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.851: # %middle.block3158 beq $a1, $a2, .LBB8_854 .LBB8_852: # %.lr.ph.i555.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_853: # %.lr.ph.i555 # =>This Inner Loop Header: Depth=1 @@ -14403,12 +15001,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.859: # %middle.block3174 beq $a1, $a2, .LBB8_862 .LBB8_860: # %.lr.ph.i563.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_861: # %.lr.ph.i563 # =>This Inner Loop Header: Depth=1 @@ -14497,12 +15101,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.867: # %middle.block3190 beq $a0, $a1, .LBB8_1187 .LBB8_868: # %.lr.ph.i571.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_869: # %.lr.ph.i571 # =>This Inner Loop Header: Depth=1 @@ -14577,12 +15187,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.872: # %middle.block3286 beq $a1, $a2, .LBB8_875 .LBB8_873: # %.lr.ph.i483.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_874: # %.lr.ph.i483 # =>This Inner Loop Header: Depth=1 @@ -14671,12 +15287,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.880: # %middle.block3302 beq $a0, $a1, .LBB8_1187 .LBB8_881: # %.lr.ph.i491.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_882: # %.lr.ph.i491 # =>This Inner Loop Header: Depth=1 @@ -14751,12 +15373,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.885: # %middle.block1718 beq $a1, $a2, .LBB8_888 .LBB8_886: # %.lr.ph.i1254.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_887: # %.lr.ph.i1254 # =>This Inner Loop Header: Depth=1 @@ -14845,12 +15473,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.893: # %middle.block1734 beq $a1, $a2, .LBB8_896 .LBB8_894: # %.lr.ph.i1262.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_895: # %.lr.ph.i1262 # =>This Inner Loop Header: Depth=1 @@ -14939,12 +15573,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.901: # %middle.block1750 beq $a1, $a2, .LBB8_904 .LBB8_902: # %.lr.ph.i1270.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_903: # %.lr.ph.i1270 # =>This Inner Loop Header: Depth=1 @@ -15033,12 +15673,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.909: # %middle.block1766 beq $a1, $a2, .LBB8_912 .LBB8_910: # %.lr.ph.i1278.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_911: # %.lr.ph.i1278 # =>This Inner Loop Header: Depth=1 @@ -15127,12 +15773,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.917: # %middle.block1782 beq $a0, $a1, .LBB8_1187 .LBB8_918: # %.lr.ph.i1286.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_919: # %.lr.ph.i1286 # =>This Inner Loop Header: Depth=1 @@ -15153,41 +15805,39 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat bstrpick.d $a0, $a1, 30, 2 slli.d $a0, $a0, 2 vreplvei.d $vr1, $vr0, 0 - addi.d $a3, $a2, 16 - ori $a4, $zero, 0 - lu32i.d $a4, 1 - vreplgr2vr.d $vr2, $a4 - lu12i.w $a4, -419431 - ori $a4, $a4, 2458 - lu32i.d $a4, 104857 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr3, $a4 - lu12i.w $a4, -307024 - ori $a4, $a4, 3880 - lu32i.d $a4, 129446 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr4, $a4 - move $a4, $a0 + addi.d $a5, $a3, 16 + ori $a6, $zero, 0 + lu32i.d $a6, 1 + vreplgr2vr.d $vr2, $a6 + ori $a6, $a4, 2458 + lu32i.d $a6, 104857 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr3, $a6 + ori $a6, $a2, 3880 + lu32i.d $a6, 129446 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr4, $a6 + move $a6, $a0 .p2align 4, , 16 .LBB8_921: # %vector.body # =>This Inner Loop Header: Depth=1 vaddi.wu $vr5, $vr2, 2 - vpickve2gr.w $a5, $vr2, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr2, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr2, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa7, $a5 + vpickve2gr.w $a7, $vr2, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa7, $a7 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr6, 16 - vpickve2gr.w $a5, $vr5, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr5, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr5, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa5, $a5 + vpickve2gr.w $a7, $vr5, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa5, $a7 ffint.d.l $fa5, $fa5 vextrins.d $vr5, $vr6, 16 vfadd.d $vr6, $vr7, $vr3 @@ -15198,34 +15848,38 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat vfadd.d $vr5, $vr5, $vr4 vfdiv.d $vr6, $vr6, $vr7 vfdiv.d $vr5, $vr8, $vr5 - vst $vr6, $a3, -16 - vst $vr5, $a3, 0 + vst $vr6, $a5, -16 + vst $vr5, $a5, 0 vaddi.wu $vr2, $vr2, 4 - addi.d $a4, $a4, -4 - addi.d $a3, $a3, 32 - bnez $a4, .LBB8_921 + addi.d $a6, $a6, -4 + addi.d $a5, $a5, 32 + bnez $a6, .LBB8_921 # %bb.922: # %middle.block beq $a0, $a1, .LBB8_1187 .LBB8_923: # %.lr.ph.i1342.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $a2, 3 + alsl.d $a3, $a0, $a3, 3 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + ori $a2, $a2, 3880 + lu32i.d $a2, 129446 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa2, $a2 .p2align 4, , 16 .LBB8_924: # %.lr.ph.i1342 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a0, 31, 0 - movgr2fr.d $fa3, $a3 + bstrpick.d $a2, $a0, 31, 0 + movgr2fr.d $fa3, $a2 ffint.d.l $fa3, $fa3 fadd.d $fa4, $fa3, $fa1 fmul.d $fa4, $fa0, $fa4 fadd.d $fa3, $fa3, $fa2 fdiv.d $fa3, $fa4, $fa3 - fst.d $fa3, $a2, 0 + fst.d $fa3, $a3, 0 addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + addi.d $a3, $a3, 8 addi.w $a0, $a0, 1 bnez $a1, .LBB8_924 b .LBB8_1187 @@ -15287,12 +15941,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.927: # %middle.block2534 beq $a1, $a2, .LBB8_930 .LBB8_928: # %.lr.ph.i860.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_929: # %.lr.ph.i860 # =>This Inner Loop Header: Depth=1 @@ -15381,12 +16041,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.935: # %middle.block2550 beq $a0, $a1, .LBB8_1187 .LBB8_936: # %.lr.ph.i868.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_937: # %.lr.ph.i868 # =>This Inner Loop Header: Depth=1 @@ -15461,12 +16127,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.940: # %middle.block2630 beq $a1, $a2, .LBB8_943 .LBB8_941: # %.lr.ph.i772.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_942: # %.lr.ph.i772 # =>This Inner Loop Header: Depth=1 @@ -15555,12 +16227,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.948: # %middle.block2646 beq $a1, $a2, .LBB8_951 .LBB8_949: # %.lr.ph.i780.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_950: # %.lr.ph.i780 # =>This Inner Loop Header: Depth=1 @@ -15649,12 +16327,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.956: # %middle.block2662 beq $a1, $a2, .LBB8_959 .LBB8_957: # %.lr.ph.i788.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_958: # %.lr.ph.i788 # =>This Inner Loop Header: Depth=1 @@ -15743,12 +16427,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.964: # %middle.block2678 beq $a1, $a2, .LBB8_967 .LBB8_965: # %.lr.ph.i796.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_966: # %.lr.ph.i796 # =>This Inner Loop Header: Depth=1 @@ -15837,12 +16527,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.972: # %middle.block2694 beq $a1, $a2, .LBB8_975 .LBB8_973: # %.lr.ph.i804.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_974: # %.lr.ph.i804 # =>This Inner Loop Header: Depth=1 @@ -15931,12 +16627,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.980: # %middle.block2710 beq $a1, $a2, .LBB8_983 .LBB8_981: # %.lr.ph.i812.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_982: # %.lr.ph.i812 # =>This Inner Loop Header: Depth=1 @@ -16025,12 +16727,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.988: # %middle.block2726 beq $a0, $a1, .LBB8_1187 .LBB8_989: # %.lr.ph.i820.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_990: # %.lr.ph.i820 # =>This Inner Loop Header: Depth=1 @@ -16105,12 +16813,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.993: # %middle.block2502 beq $a1, $a2, .LBB8_996 .LBB8_994: # %.lr.ph.i876.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_995: # %.lr.ph.i876 # =>This Inner Loop Header: Depth=1 @@ -16199,12 +16913,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1001: # %middle.block2518 beq $a0, $a1, .LBB8_1187 .LBB8_1002: # %.lr.ph.i884.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1003: # %.lr.ph.i884 # =>This Inner Loop Header: Depth=1 @@ -16279,12 +16999,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1006: # %middle.block1846 beq $a1, $a2, .LBB8_1009 .LBB8_1007: # %.lr.ph.i1150.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1008: # %.lr.ph.i1150 # =>This Inner Loop Header: Depth=1 @@ -16373,12 +17099,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1014: # %middle.block1862 beq $a1, $a2, .LBB8_1017 .LBB8_1015: # %.lr.ph.i1158.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1016: # %.lr.ph.i1158 # =>This Inner Loop Header: Depth=1 @@ -16467,12 +17199,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1022: # %middle.block1878 beq $a1, $a2, .LBB8_1025 .LBB8_1023: # %.lr.ph.i1166.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1024: # %.lr.ph.i1166 # =>This Inner Loop Header: Depth=1 @@ -16561,12 +17299,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1030: # %middle.block1894 beq $a1, $a2, .LBB8_1033 .LBB8_1031: # %.lr.ph.i1174.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1032: # %.lr.ph.i1174 # =>This Inner Loop Header: Depth=1 @@ -16655,12 +17399,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1038: # %middle.block1910 beq $a1, $a2, .LBB8_1041 .LBB8_1039: # %.lr.ph.i1182.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1040: # %.lr.ph.i1182 # =>This Inner Loop Header: Depth=1 @@ -16749,12 +17499,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1046: # %middle.block1926 beq $a1, $a2, .LBB8_1049 .LBB8_1047: # %.lr.ph.i1190.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1048: # %.lr.ph.i1190 # =>This Inner Loop Header: Depth=1 @@ -16843,12 +17599,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1054: # %middle.block1942 beq $a1, $a2, .LBB8_1057 .LBB8_1055: # %.lr.ph.i1198.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1056: # %.lr.ph.i1198 # =>This Inner Loop Header: Depth=1 @@ -16937,12 +17699,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1062: # %middle.block1958 beq $a1, $a2, .LBB8_1065 .LBB8_1063: # %.lr.ph.i1206.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1064: # %.lr.ph.i1206 # =>This Inner Loop Header: Depth=1 @@ -17031,12 +17799,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1070: # %middle.block1974 beq $a1, $a2, .LBB8_1073 .LBB8_1071: # %.lr.ph.i1214.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1072: # %.lr.ph.i1214 # =>This Inner Loop Header: Depth=1 @@ -17125,12 +17899,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1078: # %middle.block1990 beq $a0, $a1, .LBB8_1187 .LBB8_1079: # %.lr.ph.i1222.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1080: # %.lr.ph.i1222 # =>This Inner Loop Header: Depth=1 @@ -17205,12 +17985,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1083: # %middle.block1798 beq $a1, $a2, .LBB8_1086 .LBB8_1084: # %.lr.ph.i1230.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1085: # %.lr.ph.i1230 # =>This Inner Loop Header: Depth=1 @@ -17299,12 +18085,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1091: # %middle.block1814 beq $a1, $a2, .LBB8_1094 .LBB8_1092: # %.lr.ph.i1238.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1093: # %.lr.ph.i1238 # =>This Inner Loop Header: Depth=1 @@ -17393,12 +18185,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1099: # %middle.block1830 beq $a0, $a1, .LBB8_1187 .LBB8_1100: # %.lr.ph.i1246.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1101: # %.lr.ph.i1246 # =>This Inner Loop Header: Depth=1 @@ -17473,12 +18271,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1104: # %middle.block2070 beq $a1, $a2, .LBB8_1107 .LBB8_1105: # %.lr.ph.i1030.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1106: # %.lr.ph.i1030 # =>This Inner Loop Header: Depth=1 @@ -17567,12 +18371,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1112: # %middle.block2086 beq $a1, $a2, .LBB8_1115 .LBB8_1113: # %.lr.ph.i1038.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1114: # %.lr.ph.i1038 # =>This Inner Loop Header: Depth=1 @@ -17661,12 +18471,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1120: # %middle.block2102 beq $a1, $a2, .LBB8_1123 .LBB8_1121: # %.lr.ph.i1046.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1122: # %.lr.ph.i1046 # =>This Inner Loop Header: Depth=1 @@ -17755,12 +18571,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1128: # %middle.block2118 beq $a1, $a2, .LBB8_1131 .LBB8_1129: # %.lr.ph.i1054.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1130: # %.lr.ph.i1054 # =>This Inner Loop Header: Depth=1 @@ -17849,12 +18671,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1136: # %middle.block2134 beq $a1, $a2, .LBB8_1139 .LBB8_1137: # %.lr.ph.i1062.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1138: # %.lr.ph.i1062 # =>This Inner Loop Header: Depth=1 @@ -17943,12 +18771,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1144: # %middle.block2150 beq $a1, $a2, .LBB8_1147 .LBB8_1145: # %.lr.ph.i1070.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1146: # %.lr.ph.i1070 # =>This Inner Loop Header: Depth=1 @@ -18037,12 +18871,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1152: # %middle.block2166 beq $a1, $a2, .LBB8_1155 .LBB8_1153: # %.lr.ph.i1078.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1154: # %.lr.ph.i1078 # =>This Inner Loop Header: Depth=1 @@ -18131,12 +18971,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1160: # %middle.block2182 beq $a1, $a2, .LBB8_1163 .LBB8_1161: # %.lr.ph.i1086.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1162: # %.lr.ph.i1086 # =>This Inner Loop Header: Depth=1 @@ -18225,12 +19071,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1168: # %middle.block2198 beq $a1, $a2, .LBB8_1171 .LBB8_1169: # %.lr.ph.i1094.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1170: # %.lr.ph.i1094 # =>This Inner Loop Header: Depth=1 @@ -18319,12 +19171,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1176: # %middle.block2214 beq $a1, $a2, .LBB8_1179 .LBB8_1177: # %.lr.ph.i1102.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI8_1) - pcalau12i $a4, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI8_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB8_1178: # %.lr.ph.i1102 # =>This Inner Loop Header: Depth=1 @@ -18413,12 +19271,18 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat # %bb.1184: # %middle.block2230 beq $a0, $a1, .LBB8_1187 .LBB8_1185: # %.lr.ph.i1110.preheader - pcalau12i $a3, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI8_1) - pcalau12i $a3, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI8_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB8_1186: # %.lr.ph.i1110 # =>This Inner Loop Header: Depth=1 @@ -18489,15 +19353,9 @@ _Z8loopInitjR8LoopStat: # @_Z8loopInitjR8LoopStat .LCPI9_0: .dword 0x3fb999999999999a # double 0.10000000000000001 .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI9_3: +.LCPI9_1: .dword 0x3fc999999999999a # double 0.20000000000000001 .dword 0x3fd3333333333333 # double 0.29999999999999999 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI9_1: - .dword 0x3ff199999999999a # double 1.1000000000000001 -.LCPI9_2: - .dword 0x3ff1f9a6b50b0f28 # double 1.1234500000000001 .text .globl _Z8loopInitj .p2align 5 @@ -18743,8 +19601,8 @@ _Z8loopInitj: # @_Z8loopInitj .LBB9_40: pcalau12i $a0, %pc_hi20(.LCPI9_0) addi.d $a0, $a0, %pc_lo12(.LCPI9_0) - pcalau12i $a1, %pc_hi20(.LCPI9_3) - addi.d $a1, $a1, %pc_lo12(.LCPI9_3) + pcalau12i $a1, %pc_hi20(.LCPI9_1) + addi.d $a1, $a1, %pc_lo12(.LCPI9_1) ld.w $a3, $s0, 1032 blez $a3, .LBB9_577 # %bb.41: # %.lr.ph.preheader.i429 @@ -18958,8 +19816,10 @@ _Z8loopInitj: # @_Z8loopInitj pcalau12i $a2, %pc_hi20(.LCPI9_0) addi.d $a2, $a2, %pc_lo12(.LCPI9_0) fldx.d $fa0, $a2, $a0 - ld.d $a2, $s0, 472 + ld.d $a3, $s0, 472 ori $a0, $zero, 4 + lu12i.w $a4, -419431 + lu12i.w $a2, -307024 bgeu $a1, $a0, .LBB9_920 # %bb.78: move $a0, $zero @@ -19148,12 +20008,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.102: # %middle.block3802 beq $a1, $a2, .LBB9_105 .LBB9_103: # %.lr.ph.i183.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_104: # %.lr.ph.i183 # =>This Inner Loop Header: Depth=1 @@ -19242,12 +20108,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.110: # %middle.block3818 beq $a1, $a2, .LBB9_113 .LBB9_111: # %.lr.ph.i191.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_112: # %.lr.ph.i191 # =>This Inner Loop Header: Depth=1 @@ -19336,12 +20208,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.118: # %middle.block3834 beq $a1, $a2, .LBB9_121 .LBB9_119: # %.lr.ph.i199.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_120: # %.lr.ph.i199 # =>This Inner Loop Header: Depth=1 @@ -19430,12 +20308,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.126: # %middle.block3850 beq $a1, $a2, .LBB9_129 .LBB9_127: # %.lr.ph.i207.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_128: # %.lr.ph.i207 # =>This Inner Loop Header: Depth=1 @@ -19524,12 +20408,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.134: # %middle.block3866 beq $a1, $a2, .LBB9_137 .LBB9_135: # %.lr.ph.i215.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_136: # %.lr.ph.i215 # =>This Inner Loop Header: Depth=1 @@ -19618,12 +20508,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.142: # %middle.block3882 beq $a0, $a1, .LBB9_1187 .LBB9_143: # %.lr.ph.i223.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_144: # %.lr.ph.i223 # =>This Inner Loop Header: Depth=1 @@ -19698,12 +20594,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.147: # %middle.block3546 beq $a1, $a2, .LBB9_150 .LBB9_148: # %.lr.ph.i231.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_149: # %.lr.ph.i231 # =>This Inner Loop Header: Depth=1 @@ -19792,12 +20694,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.155: # %middle.block3562 beq $a1, $a2, .LBB9_158 .LBB9_156: # %.lr.ph.i239.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_157: # %.lr.ph.i239 # =>This Inner Loop Header: Depth=1 @@ -19886,12 +20794,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.163: # %middle.block3578 beq $a1, $a2, .LBB9_166 .LBB9_164: # %.lr.ph.i247.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_165: # %.lr.ph.i247 # =>This Inner Loop Header: Depth=1 @@ -19980,12 +20894,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.171: # %middle.block3594 beq $a1, $a2, .LBB9_174 .LBB9_172: # %.lr.ph.i255.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_173: # %.lr.ph.i255 # =>This Inner Loop Header: Depth=1 @@ -20074,12 +20994,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.179: # %middle.block3610 beq $a1, $a2, .LBB9_182 .LBB9_180: # %.lr.ph.i263.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_181: # %.lr.ph.i263 # =>This Inner Loop Header: Depth=1 @@ -20168,12 +21094,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.187: # %middle.block3626 beq $a1, $a2, .LBB9_190 .LBB9_188: # %.lr.ph.i271.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_189: # %.lr.ph.i271 # =>This Inner Loop Header: Depth=1 @@ -20262,12 +21194,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.195: # %middle.block3642 beq $a1, $a2, .LBB9_198 .LBB9_196: # %.lr.ph.i279.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_197: # %.lr.ph.i279 # =>This Inner Loop Header: Depth=1 @@ -20356,12 +21294,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.203: # %middle.block3658 beq $a1, $a2, .LBB9_206 .LBB9_204: # %.lr.ph.i287.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_205: # %.lr.ph.i287 # =>This Inner Loop Header: Depth=1 @@ -20450,12 +21394,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.211: # %middle.block3674 beq $a1, $a2, .LBB9_214 .LBB9_212: # %.lr.ph.i295.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_213: # %.lr.ph.i295 # =>This Inner Loop Header: Depth=1 @@ -20544,12 +21494,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.219: # %middle.block3690 beq $a1, $a2, .LBB9_222 .LBB9_220: # %.lr.ph.i303.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_221: # %.lr.ph.i303 # =>This Inner Loop Header: Depth=1 @@ -20638,12 +21594,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.227: # %middle.block3706 beq $a1, $a2, .LBB9_230 .LBB9_228: # %.lr.ph.i311.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_229: # %.lr.ph.i311 # =>This Inner Loop Header: Depth=1 @@ -20732,12 +21694,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.235: # %middle.block3722 beq $a1, $a2, .LBB9_238 .LBB9_236: # %.lr.ph.i319.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_237: # %.lr.ph.i319 # =>This Inner Loop Header: Depth=1 @@ -20826,12 +21794,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.243: # %middle.block3738 beq $a1, $a2, .LBB9_246 .LBB9_244: # %.lr.ph.i327.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_245: # %.lr.ph.i327 # =>This Inner Loop Header: Depth=1 @@ -20920,12 +21894,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.251: # %middle.block3754 beq $a1, $a2, .LBB9_254 .LBB9_252: # %.lr.ph.i335.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_253: # %.lr.ph.i335 # =>This Inner Loop Header: Depth=1 @@ -21014,12 +21994,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.259: # %middle.block3770 beq $a1, $a2, .LBB9_262 .LBB9_260: # %.lr.ph.i343.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_261: # %.lr.ph.i343 # =>This Inner Loop Header: Depth=1 @@ -21108,12 +22094,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.267: # %middle.block3786 beq $a0, $a1, .LBB9_1187 .LBB9_268: # %.lr.ph.i351.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_269: # %.lr.ph.i351 # =>This Inner Loop Header: Depth=1 @@ -21210,12 +22202,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.274: # %middle.block2005 beq $a1, $a2, .LBB9_277 .LBB9_275: # %.lr.ph.i1117.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_276: # %.lr.ph.i1117 # =>This Inner Loop Header: Depth=1 @@ -21304,12 +22302,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.282: # %middle.block2021 beq $a1, $a2, .LBB9_285 .LBB9_283: # %.lr.ph.i1125.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_284: # %.lr.ph.i1125 # =>This Inner Loop Header: Depth=1 @@ -21398,12 +22402,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.290: # %middle.block2037 beq $a1, $a2, .LBB9_293 .LBB9_291: # %.lr.ph.i1133.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_292: # %.lr.ph.i1133 # =>This Inner Loop Header: Depth=1 @@ -21492,12 +22502,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.298: # %middle.block2053 beq $a0, $a1, .LBB9_1187 .LBB9_299: # %.lr.ph.i1141.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_300: # %.lr.ph.i1141 # =>This Inner Loop Header: Depth=1 @@ -21572,12 +22588,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.303: # %middle.block2869 beq $a1, $a2, .LBB9_306 .LBB9_304: # %.lr.ph.i691.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_305: # %.lr.ph.i691 # =>This Inner Loop Header: Depth=1 @@ -21666,12 +22688,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.311: # %middle.block2885 beq $a0, $a1, .LBB9_1187 .LBB9_312: # %.lr.ph.i699.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_313: # %.lr.ph.i699 # =>This Inner Loop Header: Depth=1 @@ -21746,12 +22774,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.316: # %middle.block2933 beq $a1, $a2, .LBB9_319 .LBB9_317: # %.lr.ph.i659.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_318: # %.lr.ph.i659 # =>This Inner Loop Header: Depth=1 @@ -21840,12 +22874,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.324: # %middle.block2949 beq $a0, $a1, .LBB9_1187 .LBB9_325: # %.lr.ph.i667.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_326: # %.lr.ph.i667 # =>This Inner Loop Header: Depth=1 @@ -21920,12 +22960,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.329: # %middle.block3045 beq $a1, $a2, .LBB9_332 .LBB9_330: # %.lr.ph.i578.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_331: # %.lr.ph.i578 # =>This Inner Loop Header: Depth=1 @@ -22014,12 +23060,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.337: # %middle.block3061 beq $a1, $a2, .LBB9_340 .LBB9_338: # %.lr.ph.i586.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_339: # %.lr.ph.i586 # =>This Inner Loop Header: Depth=1 @@ -22108,12 +23160,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.345: # %middle.block3077 beq $a1, $a2, .LBB9_348 .LBB9_346: # %.lr.ph.i594.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_347: # %.lr.ph.i594 # =>This Inner Loop Header: Depth=1 @@ -22202,12 +23260,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.353: # %middle.block3093 beq $a1, $a2, .LBB9_356 .LBB9_354: # %.lr.ph.i602.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_355: # %.lr.ph.i602 # =>This Inner Loop Header: Depth=1 @@ -22296,12 +23360,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.361: # %middle.block3109 beq $a0, $a1, .LBB9_1187 .LBB9_362: # %.lr.ph.i610.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_363: # %.lr.ph.i610 # =>This Inner Loop Header: Depth=1 @@ -22376,12 +23446,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.366: # %middle.block3029 beq $a0, $a1, .LBB9_1187 .LBB9_367: # %.lr.ph.i619.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_368: # %.lr.ph.i619 # =>This Inner Loop Header: Depth=1 @@ -22456,12 +23532,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.371: # %middle.block3898 beq $a1, $a2, .LBB9_374 .LBB9_372: # %.lr.ph.i.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_373: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 @@ -22550,12 +23632,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.379: # %middle.block3914 beq $a1, $a2, .LBB9_382 .LBB9_380: # %.lr.ph.i167.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_381: # %.lr.ph.i167 # =>This Inner Loop Header: Depth=1 @@ -22644,12 +23732,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.387: # %middle.block3930 beq $a0, $a1, .LBB9_1187 .LBB9_388: # %.lr.ph.i175.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_389: # %.lr.ph.i175 # =>This Inner Loop Header: Depth=1 @@ -22724,12 +23818,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.392: # %middle.block3205 beq $a1, $a2, .LBB9_395 .LBB9_393: # %.lr.ph.i498.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_394: # %.lr.ph.i498 # =>This Inner Loop Header: Depth=1 @@ -22818,12 +23918,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.400: # %middle.block3221 beq $a1, $a2, .LBB9_403 .LBB9_401: # %.lr.ph.i506.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_402: # %.lr.ph.i506 # =>This Inner Loop Header: Depth=1 @@ -22912,12 +24018,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.408: # %middle.block3237 beq $a1, $a2, .LBB9_411 .LBB9_409: # %.lr.ph.i514.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_410: # %.lr.ph.i514 # =>This Inner Loop Header: Depth=1 @@ -23006,12 +24118,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.416: # %middle.block3253 beq $a1, $a2, .LBB9_419 .LBB9_417: # %.lr.ph.i522.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_418: # %.lr.ph.i522 # =>This Inner Loop Header: Depth=1 @@ -23100,12 +24218,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.424: # %middle.block3269 beq $a0, $a1, .LBB9_1187 .LBB9_425: # %.lr.ph.i530.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_426: # %.lr.ph.i530 # =>This Inner Loop Header: Depth=1 @@ -23180,12 +24304,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.429: # %middle.block2245 beq $a1, $a2, .LBB9_432 .LBB9_430: # %.lr.ph.i943.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_431: # %.lr.ph.i943 # =>This Inner Loop Header: Depth=1 @@ -23274,12 +24404,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.437: # %middle.block2261 beq $a1, $a2, .LBB9_440 .LBB9_438: # %.lr.ph.i951.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_439: # %.lr.ph.i951 # =>This Inner Loop Header: Depth=1 @@ -23368,12 +24504,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.445: # %middle.block2277 beq $a1, $a2, .LBB9_448 .LBB9_446: # %.lr.ph.i959.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_447: # %.lr.ph.i959 # =>This Inner Loop Header: Depth=1 @@ -23462,12 +24604,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.453: # %middle.block2293 beq $a1, $a2, .LBB9_456 .LBB9_454: # %.lr.ph.i967.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_455: # %.lr.ph.i967 # =>This Inner Loop Header: Depth=1 @@ -23556,12 +24704,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.461: # %middle.block2309 beq $a1, $a2, .LBB9_464 .LBB9_462: # %.lr.ph.i975.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_463: # %.lr.ph.i975 # =>This Inner Loop Header: Depth=1 @@ -23650,12 +24804,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.469: # %middle.block2325 beq $a1, $a2, .LBB9_472 .LBB9_470: # %.lr.ph.i983.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_471: # %.lr.ph.i983 # =>This Inner Loop Header: Depth=1 @@ -23744,12 +24904,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.477: # %middle.block2341 beq $a1, $a2, .LBB9_480 .LBB9_478: # %.lr.ph.i991.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_479: # %.lr.ph.i991 # =>This Inner Loop Header: Depth=1 @@ -23838,12 +25004,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.485: # %middle.block2357 beq $a1, $a2, .LBB9_488 .LBB9_486: # %.lr.ph.i999.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_487: # %.lr.ph.i999 # =>This Inner Loop Header: Depth=1 @@ -23932,12 +25104,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.493: # %middle.block2373 beq $a1, $a2, .LBB9_496 .LBB9_494: # %.lr.ph.i1007.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_495: # %.lr.ph.i1007 # =>This Inner Loop Header: Depth=1 @@ -24026,12 +25204,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.501: # %middle.block2389 beq $a0, $a1, .LBB9_504 .LBB9_502: # %.lr.ph.i1015.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_503: # %.lr.ph.i1015 # =>This Inner Loop Header: Depth=1 @@ -24136,12 +25320,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.512: # %middle.block2405 beq $a0, $a1, .LBB9_515 .LBB9_513: # %.lr.ph.i891.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_514: # %.lr.ph.i891 # =>This Inner Loop Header: Depth=1 @@ -24230,12 +25420,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.520: # %middle.block2421 beq $a0, $a1, .LBB9_523 .LBB9_521: # %.lr.ph.i899.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_522: # %.lr.ph.i899 # =>This Inner Loop Header: Depth=1 @@ -24324,12 +25520,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.528: # %middle.block2437 beq $a0, $a1, .LBB9_531 .LBB9_529: # %.lr.ph.i907.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_530: # %.lr.ph.i907 # =>This Inner Loop Header: Depth=1 @@ -24418,12 +25620,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.536: # %middle.block2453 beq $a0, $a1, .LBB9_539 .LBB9_537: # %.lr.ph.i915.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_538: # %.lr.ph.i915 # =>This Inner Loop Header: Depth=1 @@ -24512,12 +25720,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.544: # %middle.block2469 beq $a0, $a1, .LBB9_547 .LBB9_545: # %.lr.ph.i923.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_546: # %.lr.ph.i923 # =>This Inner Loop Header: Depth=1 @@ -24624,12 +25838,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.556: # %middle.block2485 beq $a0, $a1, .LBB9_1187 .LBB9_557: # %.lr.ph.i935.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_558: # %.lr.ph.i935 # =>This Inner Loop Header: Depth=1 @@ -24704,12 +25924,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.561: # %middle.block2901 beq $a1, $a2, .LBB9_564 .LBB9_562: # %.lr.ph.i675.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_563: # %.lr.ph.i675 # =>This Inner Loop Header: Depth=1 @@ -24798,12 +26024,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.569: # %middle.block2917 beq $a0, $a1, .LBB9_1187 .LBB9_570: # %.lr.ph.i683.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_571: # %.lr.ph.i683 # =>This Inner Loop Header: Depth=1 @@ -24869,13 +26101,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.574: # %middle.block3318 beq $a2, $a3, .LBB9_577 .LBB9_575: # %.lr.ph.i431.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_576: # %.lr.ph.i431 # =>This Inner Loop Header: Depth=1 @@ -24959,13 +26197,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.582: # %middle.block3335 beq $a2, $a3, .LBB9_585 .LBB9_583: # %.lr.ph.i440.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_584: # %.lr.ph.i440 # =>This Inner Loop Header: Depth=1 @@ -25049,13 +26293,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.590: # %middle.block3352 beq $a2, $a3, .LBB9_593 .LBB9_591: # %.lr.ph.i451.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_592: # %.lr.ph.i451 # =>This Inner Loop Header: Depth=1 @@ -25139,13 +26389,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.598: # %middle.block3369 beq $a2, $a3, .LBB9_601 .LBB9_599: # %.lr.ph.i462.preheader - pcalau12i $a5, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI9_1) - pcalau12i $a5, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a5, %pc_lo12(.LCPI9_2) sub.d $a3, $a3, $a2 alsl.d $a4, $a2, $a4, 4 addi.d $a4, $a4, 8 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, 104857 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa2, $a5 + lu12i.w $a5, -307024 + ori $a5, $a5, 3880 + lu32i.d $a5, 129446 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa3, $a5 .p2align 4, , 16 .LBB9_600: # %.lr.ph.i462 # =>This Inner Loop Header: Depth=1 @@ -25229,13 +26485,19 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.606: # %middle.block3386 beq $a0, $a2, .LBB9_1187 .LBB9_607: # %.lr.ph.i473.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa3, $a3, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a0 alsl.d $a1, $a0, $a1, 4 addi.d $a1, $a1, 8 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa3, $a3 .p2align 4, , 16 .LBB9_608: # %.lr.ph.i473 # =>This Inner Loop Header: Depth=1 @@ -25313,12 +26575,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.611: # %middle.block1621 beq $a1, $a2, .LBB9_614 .LBB9_612: # %.lr.ph.i1293.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_613: # %.lr.ph.i1293 # =>This Inner Loop Header: Depth=1 @@ -25407,12 +26675,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.619: # %middle.block1637 beq $a1, $a2, .LBB9_622 .LBB9_620: # %.lr.ph.i1301.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_621: # %.lr.ph.i1301 # =>This Inner Loop Header: Depth=1 @@ -25501,12 +26775,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.627: # %middle.block1653 beq $a1, $a2, .LBB9_630 .LBB9_628: # %.lr.ph.i1309.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_629: # %.lr.ph.i1309 # =>This Inner Loop Header: Depth=1 @@ -25595,12 +26875,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.635: # %middle.block1669 beq $a1, $a2, .LBB9_638 .LBB9_636: # %.lr.ph.i1317.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_637: # %.lr.ph.i1317 # =>This Inner Loop Header: Depth=1 @@ -25689,12 +26975,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.643: # %middle.block1685 beq $a1, $a2, .LBB9_646 .LBB9_644: # %.lr.ph.i1325.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_645: # %.lr.ph.i1325 # =>This Inner Loop Header: Depth=1 @@ -25783,12 +27075,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.651: # %middle.block1701 beq $a0, $a1, .LBB9_1187 .LBB9_652: # %.lr.ph.i1333.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_653: # %.lr.ph.i1333 # =>This Inner Loop Header: Depth=1 @@ -25863,12 +27161,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.656: # %middle.block2821 beq $a1, $a2, .LBB9_659 .LBB9_657: # %.lr.ph.i707.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_658: # %.lr.ph.i707 # =>This Inner Loop Header: Depth=1 @@ -25957,12 +27261,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.664: # %middle.block2837 beq $a1, $a2, .LBB9_667 .LBB9_665: # %.lr.ph.i715.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_666: # %.lr.ph.i715 # =>This Inner Loop Header: Depth=1 @@ -26051,12 +27361,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.672: # %middle.block2853 beq $a0, $a1, .LBB9_1187 .LBB9_673: # %.lr.ph.i723.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_674: # %.lr.ph.i723 # =>This Inner Loop Header: Depth=1 @@ -26131,12 +27447,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.677: # %middle.block2597 beq $a1, $a2, .LBB9_680 .LBB9_678: # %.lr.ph.i827.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_679: # %.lr.ph.i827 # =>This Inner Loop Header: Depth=1 @@ -26225,12 +27547,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.685: # %middle.block2613 beq $a0, $a1, .LBB9_1187 .LBB9_686: # %.lr.ph.i835.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_687: # %.lr.ph.i835 # =>This Inner Loop Header: Depth=1 @@ -26305,12 +27633,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.690: # %middle.block3482 beq $a1, $a2, .LBB9_693 .LBB9_691: # %.lr.ph.i359.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_692: # %.lr.ph.i359 # =>This Inner Loop Header: Depth=1 @@ -26399,12 +27733,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.698: # %middle.block3498 beq $a1, $a2, .LBB9_701 .LBB9_699: # %.lr.ph.i367.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_700: # %.lr.ph.i367 # =>This Inner Loop Header: Depth=1 @@ -26493,12 +27833,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.706: # %middle.block3514 beq $a1, $a2, .LBB9_709 .LBB9_707: # %.lr.ph.i375.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_708: # %.lr.ph.i375 # =>This Inner Loop Header: Depth=1 @@ -26587,12 +27933,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.714: # %middle.block3530 beq $a0, $a1, .LBB9_1187 .LBB9_715: # %.lr.ph.i383.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_716: # %.lr.ph.i383 # =>This Inner Loop Header: Depth=1 @@ -26667,12 +28019,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.719: # %middle.block2565 beq $a1, $a2, .LBB9_722 .LBB9_720: # %.lr.ph.i843.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_721: # %.lr.ph.i843 # =>This Inner Loop Header: Depth=1 @@ -26761,12 +28119,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.727: # %middle.block2581 beq $a0, $a1, .LBB9_1187 .LBB9_728: # %.lr.ph.i851.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_729: # %.lr.ph.i851 # =>This Inner Loop Header: Depth=1 @@ -26841,12 +28205,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.732: # %middle.block2965 beq $a1, $a2, .LBB9_735 .LBB9_733: # %.lr.ph.i627.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_734: # %.lr.ph.i627 # =>This Inner Loop Header: Depth=1 @@ -26935,12 +28305,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.740: # %middle.block2981 beq $a1, $a2, .LBB9_743 .LBB9_741: # %.lr.ph.i635.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_742: # %.lr.ph.i635 # =>This Inner Loop Header: Depth=1 @@ -27029,12 +28405,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.748: # %middle.block2997 beq $a1, $a2, .LBB9_751 .LBB9_749: # %.lr.ph.i643.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_750: # %.lr.ph.i643 # =>This Inner Loop Header: Depth=1 @@ -27123,12 +28505,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.756: # %middle.block3013 beq $a0, $a1, .LBB9_1187 .LBB9_757: # %.lr.ph.i651.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_758: # %.lr.ph.i651 # =>This Inner Loop Header: Depth=1 @@ -27203,12 +28591,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.761: # %middle.block3402 beq $a1, $a2, .LBB9_764 .LBB9_762: # %.lr.ph.i391.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_763: # %.lr.ph.i391 # =>This Inner Loop Header: Depth=1 @@ -27297,12 +28691,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.769: # %middle.block3418 beq $a1, $a2, .LBB9_772 .LBB9_770: # %.lr.ph.i399.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_771: # %.lr.ph.i399 # =>This Inner Loop Header: Depth=1 @@ -27391,12 +28791,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.777: # %middle.block3434 beq $a1, $a2, .LBB9_780 .LBB9_778: # %.lr.ph.i407.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_779: # %.lr.ph.i407 # =>This Inner Loop Header: Depth=1 @@ -27485,12 +28891,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.785: # %middle.block3450 beq $a1, $a2, .LBB9_788 .LBB9_786: # %.lr.ph.i415.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_787: # %.lr.ph.i415 # =>This Inner Loop Header: Depth=1 @@ -27579,12 +28991,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.793: # %middle.block3466 beq $a0, $a1, .LBB9_1187 .LBB9_794: # %.lr.ph.i423.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_795: # %.lr.ph.i423 # =>This Inner Loop Header: Depth=1 @@ -27659,12 +29077,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.798: # %middle.block2741 beq $a1, $a2, .LBB9_801 .LBB9_799: # %.lr.ph.i731.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_800: # %.lr.ph.i731 # =>This Inner Loop Header: Depth=1 @@ -27753,12 +29177,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.806: # %middle.block2757 beq $a1, $a2, .LBB9_809 .LBB9_807: # %.lr.ph.i739.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_808: # %.lr.ph.i739 # =>This Inner Loop Header: Depth=1 @@ -27847,12 +29277,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.814: # %middle.block2773 beq $a1, $a2, .LBB9_817 .LBB9_815: # %.lr.ph.i747.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_816: # %.lr.ph.i747 # =>This Inner Loop Header: Depth=1 @@ -27941,12 +29377,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.822: # %middle.block2789 beq $a1, $a2, .LBB9_825 .LBB9_823: # %.lr.ph.i755.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_824: # %.lr.ph.i755 # =>This Inner Loop Header: Depth=1 @@ -28035,12 +29477,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.830: # %middle.block2805 beq $a0, $a1, .LBB9_1187 .LBB9_831: # %.lr.ph.i763.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_832: # %.lr.ph.i763 # =>This Inner Loop Header: Depth=1 @@ -28115,12 +29563,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.835: # %middle.block3125 beq $a1, $a2, .LBB9_838 .LBB9_836: # %.lr.ph.i538.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_837: # %.lr.ph.i538 # =>This Inner Loop Header: Depth=1 @@ -28209,12 +29663,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.843: # %middle.block3141 beq $a1, $a2, .LBB9_846 .LBB9_844: # %.lr.ph.i546.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_845: # %.lr.ph.i546 # =>This Inner Loop Header: Depth=1 @@ -28303,12 +29763,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.851: # %middle.block3157 beq $a1, $a2, .LBB9_854 .LBB9_852: # %.lr.ph.i554.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_853: # %.lr.ph.i554 # =>This Inner Loop Header: Depth=1 @@ -28397,12 +29863,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.859: # %middle.block3173 beq $a1, $a2, .LBB9_862 .LBB9_860: # %.lr.ph.i562.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_861: # %.lr.ph.i562 # =>This Inner Loop Header: Depth=1 @@ -28491,12 +29963,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.867: # %middle.block3189 beq $a0, $a1, .LBB9_1187 .LBB9_868: # %.lr.ph.i570.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_869: # %.lr.ph.i570 # =>This Inner Loop Header: Depth=1 @@ -28571,12 +30049,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.872: # %middle.block3285 beq $a1, $a2, .LBB9_875 .LBB9_873: # %.lr.ph.i482.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_874: # %.lr.ph.i482 # =>This Inner Loop Header: Depth=1 @@ -28665,12 +30149,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.880: # %middle.block3301 beq $a0, $a1, .LBB9_1187 .LBB9_881: # %.lr.ph.i490.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_882: # %.lr.ph.i490 # =>This Inner Loop Header: Depth=1 @@ -28745,12 +30235,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.885: # %middle.block1717 beq $a1, $a2, .LBB9_888 .LBB9_886: # %.lr.ph.i1253.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_887: # %.lr.ph.i1253 # =>This Inner Loop Header: Depth=1 @@ -28839,12 +30335,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.893: # %middle.block1733 beq $a1, $a2, .LBB9_896 .LBB9_894: # %.lr.ph.i1261.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_895: # %.lr.ph.i1261 # =>This Inner Loop Header: Depth=1 @@ -28933,12 +30435,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.901: # %middle.block1749 beq $a1, $a2, .LBB9_904 .LBB9_902: # %.lr.ph.i1269.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_903: # %.lr.ph.i1269 # =>This Inner Loop Header: Depth=1 @@ -29027,12 +30535,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.909: # %middle.block1765 beq $a1, $a2, .LBB9_912 .LBB9_910: # %.lr.ph.i1277.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_911: # %.lr.ph.i1277 # =>This Inner Loop Header: Depth=1 @@ -29121,12 +30635,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.917: # %middle.block1781 beq $a0, $a1, .LBB9_1187 .LBB9_918: # %.lr.ph.i1285.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_919: # %.lr.ph.i1285 # =>This Inner Loop Header: Depth=1 @@ -29147,41 +30667,39 @@ _Z8loopInitj: # @_Z8loopInitj bstrpick.d $a0, $a1, 30, 2 slli.d $a0, $a0, 2 vreplvei.d $vr1, $vr0, 0 - addi.d $a3, $a2, 16 - ori $a4, $zero, 0 - lu32i.d $a4, 1 - vreplgr2vr.d $vr2, $a4 - lu12i.w $a4, -419431 - ori $a4, $a4, 2458 - lu32i.d $a4, 104857 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr3, $a4 - lu12i.w $a4, -307024 - ori $a4, $a4, 3880 - lu32i.d $a4, 129446 - lu52i.d $a4, $a4, 1023 - vreplgr2vr.d $vr4, $a4 - move $a4, $a0 + addi.d $a5, $a3, 16 + ori $a6, $zero, 0 + lu32i.d $a6, 1 + vreplgr2vr.d $vr2, $a6 + ori $a6, $a4, 2458 + lu32i.d $a6, 104857 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr3, $a6 + ori $a6, $a2, 3880 + lu32i.d $a6, 129446 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr4, $a6 + move $a6, $a0 .p2align 4, , 16 .LBB9_921: # %vector.body # =>This Inner Loop Header: Depth=1 vaddi.wu $vr5, $vr2, 2 - vpickve2gr.w $a5, $vr2, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr2, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr2, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa7, $a5 + vpickve2gr.w $a7, $vr2, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa7, $a7 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr6, 16 - vpickve2gr.w $a5, $vr5, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa6, $a5 + vpickve2gr.w $a7, $vr5, 1 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa6, $a7 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a5, $vr5, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa5, $a5 + vpickve2gr.w $a7, $vr5, 0 + bstrpick.d $a7, $a7, 31, 0 + movgr2fr.d $fa5, $a7 ffint.d.l $fa5, $fa5 vextrins.d $vr5, $vr6, 16 vfadd.d $vr6, $vr7, $vr3 @@ -29192,34 +30710,38 @@ _Z8loopInitj: # @_Z8loopInitj vfadd.d $vr5, $vr5, $vr4 vfdiv.d $vr6, $vr6, $vr7 vfdiv.d $vr5, $vr8, $vr5 - vst $vr6, $a3, -16 - vst $vr5, $a3, 0 + vst $vr6, $a5, -16 + vst $vr5, $a5, 0 vaddi.wu $vr2, $vr2, 4 - addi.d $a4, $a4, -4 - addi.d $a3, $a3, 32 - bnez $a4, .LBB9_921 + addi.d $a6, $a6, -4 + addi.d $a5, $a5, 32 + bnez $a6, .LBB9_921 # %bb.922: # %middle.block beq $a0, $a1, .LBB9_1187 .LBB9_923: # %.lr.ph.i1341.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 - alsl.d $a2, $a0, $a2, 3 + alsl.d $a3, $a0, $a3, 3 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + ori $a2, $a2, 3880 + lu32i.d $a2, 129446 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa2, $a2 .p2align 4, , 16 .LBB9_924: # %.lr.ph.i1341 # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a0, 31, 0 - movgr2fr.d $fa3, $a3 + bstrpick.d $a2, $a0, 31, 0 + movgr2fr.d $fa3, $a2 ffint.d.l $fa3, $fa3 fadd.d $fa4, $fa3, $fa1 fmul.d $fa4, $fa0, $fa4 fadd.d $fa3, $fa3, $fa2 fdiv.d $fa3, $fa4, $fa3 - fst.d $fa3, $a2, 0 + fst.d $fa3, $a3, 0 addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + addi.d $a3, $a3, 8 addi.w $a0, $a0, 1 bnez $a1, .LBB9_924 b .LBB9_1187 @@ -29281,12 +30803,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.927: # %middle.block2533 beq $a1, $a2, .LBB9_930 .LBB9_928: # %.lr.ph.i859.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_929: # %.lr.ph.i859 # =>This Inner Loop Header: Depth=1 @@ -29375,12 +30903,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.935: # %middle.block2549 beq $a0, $a1, .LBB9_1187 .LBB9_936: # %.lr.ph.i867.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_937: # %.lr.ph.i867 # =>This Inner Loop Header: Depth=1 @@ -29455,12 +30989,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.940: # %middle.block2629 beq $a1, $a2, .LBB9_943 .LBB9_941: # %.lr.ph.i771.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_942: # %.lr.ph.i771 # =>This Inner Loop Header: Depth=1 @@ -29549,12 +31089,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.948: # %middle.block2645 beq $a1, $a2, .LBB9_951 .LBB9_949: # %.lr.ph.i779.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_950: # %.lr.ph.i779 # =>This Inner Loop Header: Depth=1 @@ -29643,12 +31189,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.956: # %middle.block2661 beq $a1, $a2, .LBB9_959 .LBB9_957: # %.lr.ph.i787.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_958: # %.lr.ph.i787 # =>This Inner Loop Header: Depth=1 @@ -29737,12 +31289,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.964: # %middle.block2677 beq $a1, $a2, .LBB9_967 .LBB9_965: # %.lr.ph.i795.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_966: # %.lr.ph.i795 # =>This Inner Loop Header: Depth=1 @@ -29831,12 +31389,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.972: # %middle.block2693 beq $a1, $a2, .LBB9_975 .LBB9_973: # %.lr.ph.i803.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_974: # %.lr.ph.i803 # =>This Inner Loop Header: Depth=1 @@ -29925,12 +31489,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.980: # %middle.block2709 beq $a1, $a2, .LBB9_983 .LBB9_981: # %.lr.ph.i811.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_982: # %.lr.ph.i811 # =>This Inner Loop Header: Depth=1 @@ -30019,12 +31589,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.988: # %middle.block2725 beq $a0, $a1, .LBB9_1187 .LBB9_989: # %.lr.ph.i819.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_990: # %.lr.ph.i819 # =>This Inner Loop Header: Depth=1 @@ -30099,12 +31675,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.993: # %middle.block2501 beq $a1, $a2, .LBB9_996 .LBB9_994: # %.lr.ph.i875.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_995: # %.lr.ph.i875 # =>This Inner Loop Header: Depth=1 @@ -30193,12 +31775,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1001: # %middle.block2517 beq $a0, $a1, .LBB9_1187 .LBB9_1002: # %.lr.ph.i883.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1003: # %.lr.ph.i883 # =>This Inner Loop Header: Depth=1 @@ -30273,12 +31861,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1006: # %middle.block1845 beq $a1, $a2, .LBB9_1009 .LBB9_1007: # %.lr.ph.i1149.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1008: # %.lr.ph.i1149 # =>This Inner Loop Header: Depth=1 @@ -30367,12 +31961,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1014: # %middle.block1861 beq $a1, $a2, .LBB9_1017 .LBB9_1015: # %.lr.ph.i1157.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1016: # %.lr.ph.i1157 # =>This Inner Loop Header: Depth=1 @@ -30461,12 +32061,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1022: # %middle.block1877 beq $a1, $a2, .LBB9_1025 .LBB9_1023: # %.lr.ph.i1165.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1024: # %.lr.ph.i1165 # =>This Inner Loop Header: Depth=1 @@ -30555,12 +32161,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1030: # %middle.block1893 beq $a1, $a2, .LBB9_1033 .LBB9_1031: # %.lr.ph.i1173.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1032: # %.lr.ph.i1173 # =>This Inner Loop Header: Depth=1 @@ -30649,12 +32261,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1038: # %middle.block1909 beq $a1, $a2, .LBB9_1041 .LBB9_1039: # %.lr.ph.i1181.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1040: # %.lr.ph.i1181 # =>This Inner Loop Header: Depth=1 @@ -30743,12 +32361,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1046: # %middle.block1925 beq $a1, $a2, .LBB9_1049 .LBB9_1047: # %.lr.ph.i1189.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1048: # %.lr.ph.i1189 # =>This Inner Loop Header: Depth=1 @@ -30837,12 +32461,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1054: # %middle.block1941 beq $a1, $a2, .LBB9_1057 .LBB9_1055: # %.lr.ph.i1197.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1056: # %.lr.ph.i1197 # =>This Inner Loop Header: Depth=1 @@ -30931,12 +32561,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1062: # %middle.block1957 beq $a1, $a2, .LBB9_1065 .LBB9_1063: # %.lr.ph.i1205.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1064: # %.lr.ph.i1205 # =>This Inner Loop Header: Depth=1 @@ -31025,12 +32661,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1070: # %middle.block1973 beq $a1, $a2, .LBB9_1073 .LBB9_1071: # %.lr.ph.i1213.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1072: # %.lr.ph.i1213 # =>This Inner Loop Header: Depth=1 @@ -31119,12 +32761,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1078: # %middle.block1989 beq $a0, $a1, .LBB9_1187 .LBB9_1079: # %.lr.ph.i1221.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1080: # %.lr.ph.i1221 # =>This Inner Loop Header: Depth=1 @@ -31199,12 +32847,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1083: # %middle.block1797 beq $a1, $a2, .LBB9_1086 .LBB9_1084: # %.lr.ph.i1229.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1085: # %.lr.ph.i1229 # =>This Inner Loop Header: Depth=1 @@ -31293,12 +32947,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1091: # %middle.block1813 beq $a1, $a2, .LBB9_1094 .LBB9_1092: # %.lr.ph.i1237.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1093: # %.lr.ph.i1237 # =>This Inner Loop Header: Depth=1 @@ -31387,12 +33047,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1099: # %middle.block1829 beq $a0, $a1, .LBB9_1187 .LBB9_1100: # %.lr.ph.i1245.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1101: # %.lr.ph.i1245 # =>This Inner Loop Header: Depth=1 @@ -31467,12 +33133,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1104: # %middle.block2069 beq $a1, $a2, .LBB9_1107 .LBB9_1105: # %.lr.ph.i1029.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1106: # %.lr.ph.i1029 # =>This Inner Loop Header: Depth=1 @@ -31561,12 +33233,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1112: # %middle.block2085 beq $a1, $a2, .LBB9_1115 .LBB9_1113: # %.lr.ph.i1037.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1114: # %.lr.ph.i1037 # =>This Inner Loop Header: Depth=1 @@ -31655,12 +33333,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1120: # %middle.block2101 beq $a1, $a2, .LBB9_1123 .LBB9_1121: # %.lr.ph.i1045.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1122: # %.lr.ph.i1045 # =>This Inner Loop Header: Depth=1 @@ -31749,12 +33433,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1128: # %middle.block2117 beq $a1, $a2, .LBB9_1131 .LBB9_1129: # %.lr.ph.i1053.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1130: # %.lr.ph.i1053 # =>This Inner Loop Header: Depth=1 @@ -31843,12 +33533,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1136: # %middle.block2133 beq $a1, $a2, .LBB9_1139 .LBB9_1137: # %.lr.ph.i1061.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1138: # %.lr.ph.i1061 # =>This Inner Loop Header: Depth=1 @@ -31937,12 +33633,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1144: # %middle.block2149 beq $a1, $a2, .LBB9_1147 .LBB9_1145: # %.lr.ph.i1069.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1146: # %.lr.ph.i1069 # =>This Inner Loop Header: Depth=1 @@ -32031,12 +33733,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1152: # %middle.block2165 beq $a1, $a2, .LBB9_1155 .LBB9_1153: # %.lr.ph.i1077.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1154: # %.lr.ph.i1077 # =>This Inner Loop Header: Depth=1 @@ -32125,12 +33833,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1160: # %middle.block2181 beq $a1, $a2, .LBB9_1163 .LBB9_1161: # %.lr.ph.i1085.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1162: # %.lr.ph.i1085 # =>This Inner Loop Header: Depth=1 @@ -32219,12 +33933,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1168: # %middle.block2197 beq $a1, $a2, .LBB9_1171 .LBB9_1169: # %.lr.ph.i1093.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1170: # %.lr.ph.i1093 # =>This Inner Loop Header: Depth=1 @@ -32313,12 +34033,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1176: # %middle.block2213 beq $a1, $a2, .LBB9_1179 .LBB9_1177: # %.lr.ph.i1101.preheader - pcalau12i $a4, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI9_1) - pcalau12i $a4, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a4, %pc_lo12(.LCPI9_2) sub.d $a2, $a2, $a1 alsl.d $a3, $a1, $a3, 3 + lu12i.w $a4, -419431 + ori $a4, $a4, 2458 + lu32i.d $a4, 104857 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa1, $a4 + lu12i.w $a4, -307024 + ori $a4, $a4, 3880 + lu32i.d $a4, 129446 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa2, $a4 .p2align 4, , 16 .LBB9_1178: # %.lr.ph.i1101 # =>This Inner Loop Header: Depth=1 @@ -32407,12 +34133,18 @@ _Z8loopInitj: # @_Z8loopInitj # %bb.1184: # %middle.block2229 beq $a0, $a1, .LBB9_1187 .LBB9_1185: # %.lr.ph.i1109.preheader - pcalau12i $a3, %pc_hi20(.LCPI9_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI9_1) - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI9_2) sub.d $a1, $a1, $a0 alsl.d $a2, $a0, $a2, 3 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, -307024 + ori $a3, $a3, 3880 + lu32i.d $a3, 129446 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa2, $a3 .p2align 4, , 16 .LBB9_1186: # %.lr.ph.i1109 # =>This Inner Loop Header: Depth=1 diff --git a/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/runReferenceLoops.s b/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/runReferenceLoops.s index b31732f8..26d4daf4 100644 --- a/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/runReferenceLoops.s +++ b/results/MicroBenchmarks/LCALS/SubsetCRawLoops/CMakeFiles/lcalsCRaw.dir/__/runReferenceLoops.s @@ -903,14 +903,8 @@ _ZN8LoopStatD2Ev: # @_ZN8LoopStatD2Ev .size _ZN8LoopStatD2Ev, .Lfunc_end3-_ZN8LoopStatD2Ev .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z25computeReferenceLoopTimesv -.LCPI4_0: - .dword 0x3f5426fe718a86d7 # double 0.00123 -.LCPI4_1: - .dword 0xbf5426fe718a86d7 # double -0.00123 .text - .globl _Z25computeReferenceLoopTimesv + .globl _Z25computeReferenceLoopTimesv # -- Begin function _Z25computeReferenceLoopTimesv .p2align 5 .type _Z25computeReferenceLoopTimesv,@function _Z25computeReferenceLoopTimesv: # @_Z25computeReferenceLoopTimesv @@ -1649,13 +1643,16 @@ _Z25computeReferenceLoopTimesv: # @_Z25computeReferenceLoopTimesv pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 st.d $a0, $sp, 640 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI4_0) - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI4_1) ori $a0, $zero, 1 st.b $a0, $sp, 648 + lu12i.w $a0, 465064 + ori $a0, $a0, 1751 + lu32i.d $a0, 272126 + lu52i.d $a1, $a0, 1013 + movgr2fr.d $fs0, $a1 fadd.d $fa0, $fs2, $fs0 + lu52i.d $a0, $a0, -1035 + movgr2fr.d $fs1, $a0 fadd.d $fa1, $fs2, $fs1 fdiv.d $fa0, $fa0, $fa1 fst.d $fa0, $fp, 384 diff --git a/results/MicroBenchmarks/LoopVectorization/CMakeFiles/LoopVectorizationBenchmarks.dir/MathFunctions.s b/results/MicroBenchmarks/LoopVectorization/CMakeFiles/LoopVectorizationBenchmarks.dir/MathFunctions.s index 9297e7df..34a09e46 100644 --- a/results/MicroBenchmarks/LoopVectorization/CMakeFiles/LoopVectorizationBenchmarks.dir/MathFunctions.s +++ b/results/MicroBenchmarks/LoopVectorization/CMakeFiles/LoopVectorizationBenchmarks.dir/MathFunctions.s @@ -3,12 +3,8 @@ .globl _ZSt21ios_base_library_initv # End of file scope inline assembly - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z29BENCHMARK_expf_autovec_float_RN9benchmark5StateE -.LCPI0_0: - .word 0x00800000 # float 1.17549435E-38 .text - .globl _Z29BENCHMARK_expf_autovec_float_RN9benchmark5StateE + .globl _Z29BENCHMARK_expf_autovec_float_RN9benchmark5StateE # -- Begin function _Z29BENCHMARK_expf_autovec_float_RN9benchmark5StateE .p2align 5 .type _Z29BENCHMARK_expf_autovec_float_RN9benchmark5StateE,@function _Z29BENCHMARK_expf_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_expf_autovec_float_RN9benchmark5StateE @@ -127,9 +123,9 @@ _Z29BENCHMARK_expf_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_expf_auto bne $s5, $s4, .LBB0_8 # %bb.9: # %_ZL14run_fn_autovecIfEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI0_0) - movgr2fr.w $fa1, $zero + movgr2fr.w $fa0, $zero + lu12i.w $a0, 2048 + movgr2fr.w $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -155,7 +151,7 @@ _Z29BENCHMARK_expf_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_expf_auto bcnez $fcc0, .LBB0_12 # %bb.14: # in Loop: Header=BB0_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.s $fcc0, $fa3, $fa1 + fcmp.ceq.s $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB0_18 # %bb.15: # in Loop: Header=BB0_13 Depth=1 @@ -171,13 +167,13 @@ _Z29BENCHMARK_expf_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_expf_auto bnez $a2, .LBB0_18 # %bb.17: # in Loop: Header=BB0_13 Depth=1 fabs.s $fa3, $fa3 - fcmp.cule.s $fcc0, $fa0, $fa3 + fcmp.cule.s $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB0_18: # %_ZSt10fpclassifyf.exit # in Loop: Header=BB0_13 Depth=1 - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB0_11 .LBB0_19: # in Loop: Header=BB0_13 Depth=1 fcmp.cun.s $fcc0, $fa2, $fa2 @@ -192,13 +188,13 @@ _Z29BENCHMARK_expf_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_expf_auto bnez $a2, .LBB0_11 # %bb.21: # in Loop: Header=BB0_13 Depth=1 fabs.s $fa2, $fa2 - fcmp.cule.s $fcc0, $fa0, $fa2 + fcmp.cule.s $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB0_11 .LBB0_22: # in Loop: Header=BB0_13 Depth=1 move $a1, $zero - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB0_11 b .LBB0_19 .LBB0_23: # %_ZNSt10unique_ptrIA_fSt14default_deleteIS0_EED2Ev.exit @@ -731,12 +727,8 @@ GCC_except_table1: .Lcst_end1: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29BENCHMARK_exp_autovec_double_RN9benchmark5StateE -.LCPI2_0: - .dword 0x0010000000000000 # double 2.2250738585072014E-308 .text - .globl _Z29BENCHMARK_exp_autovec_double_RN9benchmark5StateE + .globl _Z29BENCHMARK_exp_autovec_double_RN9benchmark5StateE # -- Begin function _Z29BENCHMARK_exp_autovec_double_RN9benchmark5StateE .p2align 5 .type _Z29BENCHMARK_exp_autovec_double_RN9benchmark5StateE,@function _Z29BENCHMARK_exp_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_exp_autovec_double_RN9benchmark5StateE @@ -855,9 +847,9 @@ _Z29BENCHMARK_exp_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_exp_autov bne $s5, $s4, .LBB2_8 # %bb.9: # %_ZL14run_fn_autovecIdEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI2_0) - movgr2fr.d $fa1, $zero + movgr2fr.d $fa0, $zero + lu52i.d $a0, $zero, 1 + movgr2fr.d $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -883,7 +875,7 @@ _Z29BENCHMARK_exp_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_exp_autov bcnez $fcc0, .LBB2_12 # %bb.14: # in Loop: Header=BB2_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.d $fcc0, $fa3, $fa1 + fcmp.ceq.d $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB2_18 # %bb.15: # in Loop: Header=BB2_13 Depth=1 @@ -899,13 +891,13 @@ _Z29BENCHMARK_exp_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_exp_autov bnez $a2, .LBB2_18 # %bb.17: # in Loop: Header=BB2_13 Depth=1 fabs.d $fa3, $fa3 - fcmp.cule.d $fcc0, $fa0, $fa3 + fcmp.cule.d $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB2_18: # %_ZSt10fpclassifyd.exit # in Loop: Header=BB2_13 Depth=1 - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB2_11 .LBB2_19: # in Loop: Header=BB2_13 Depth=1 fcmp.cun.d $fcc0, $fa2, $fa2 @@ -920,13 +912,13 @@ _Z29BENCHMARK_exp_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_exp_autov bnez $a2, .LBB2_11 # %bb.21: # in Loop: Header=BB2_13 Depth=1 fabs.d $fa2, $fa2 - fcmp.cule.d $fcc0, $fa0, $fa2 + fcmp.cule.d $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB2_11 .LBB2_22: # in Loop: Header=BB2_13 Depth=1 move $a1, $zero - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB2_11 b .LBB2_19 .LBB2_23: # %_ZNSt10unique_ptrIA_dSt14default_deleteIS0_EED2Ev.exit @@ -1457,12 +1449,8 @@ GCC_except_table3: .Lcst_end3: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z30BENCHMARK_acosf_autovec_float_RN9benchmark5StateE -.LCPI4_0: - .word 0x00800000 # float 1.17549435E-38 .text - .globl _Z30BENCHMARK_acosf_autovec_float_RN9benchmark5StateE + .globl _Z30BENCHMARK_acosf_autovec_float_RN9benchmark5StateE # -- Begin function _Z30BENCHMARK_acosf_autovec_float_RN9benchmark5StateE .p2align 5 .type _Z30BENCHMARK_acosf_autovec_float_RN9benchmark5StateE,@function _Z30BENCHMARK_acosf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_acosf_autovec_float_RN9benchmark5StateE @@ -1581,9 +1569,9 @@ _Z30BENCHMARK_acosf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_acosf_au bne $s5, $s4, .LBB4_8 # %bb.9: # %_ZL14run_fn_autovecIfEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI4_0) - movgr2fr.w $fa1, $zero + movgr2fr.w $fa0, $zero + lu12i.w $a0, 2048 + movgr2fr.w $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -1609,7 +1597,7 @@ _Z30BENCHMARK_acosf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_acosf_au bcnez $fcc0, .LBB4_12 # %bb.14: # in Loop: Header=BB4_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.s $fcc0, $fa3, $fa1 + fcmp.ceq.s $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB4_18 # %bb.15: # in Loop: Header=BB4_13 Depth=1 @@ -1625,13 +1613,13 @@ _Z30BENCHMARK_acosf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_acosf_au bnez $a2, .LBB4_18 # %bb.17: # in Loop: Header=BB4_13 Depth=1 fabs.s $fa3, $fa3 - fcmp.cule.s $fcc0, $fa0, $fa3 + fcmp.cule.s $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB4_18: # %_ZSt10fpclassifyf.exit # in Loop: Header=BB4_13 Depth=1 - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB4_11 .LBB4_19: # in Loop: Header=BB4_13 Depth=1 fcmp.cun.s $fcc0, $fa2, $fa2 @@ -1646,13 +1634,13 @@ _Z30BENCHMARK_acosf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_acosf_au bnez $a2, .LBB4_11 # %bb.21: # in Loop: Header=BB4_13 Depth=1 fabs.s $fa2, $fa2 - fcmp.cule.s $fcc0, $fa0, $fa2 + fcmp.cule.s $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB4_11 .LBB4_22: # in Loop: Header=BB4_13 Depth=1 move $a1, $zero - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB4_11 b .LBB4_19 .LBB4_23: # %_ZNSt10unique_ptrIA_fSt14default_deleteIS0_EED2Ev.exit @@ -2185,12 +2173,8 @@ GCC_except_table5: .Lcst_end5: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z30BENCHMARK_acos_autovec_double_RN9benchmark5StateE -.LCPI6_0: - .dword 0x0010000000000000 # double 2.2250738585072014E-308 .text - .globl _Z30BENCHMARK_acos_autovec_double_RN9benchmark5StateE + .globl _Z30BENCHMARK_acos_autovec_double_RN9benchmark5StateE # -- Begin function _Z30BENCHMARK_acos_autovec_double_RN9benchmark5StateE .p2align 5 .type _Z30BENCHMARK_acos_autovec_double_RN9benchmark5StateE,@function _Z30BENCHMARK_acos_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_acos_autovec_double_RN9benchmark5StateE @@ -2309,9 +2293,9 @@ _Z30BENCHMARK_acos_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_acos_aut bne $s5, $s4, .LBB6_8 # %bb.9: # %_ZL14run_fn_autovecIdEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI6_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI6_0) - movgr2fr.d $fa1, $zero + movgr2fr.d $fa0, $zero + lu52i.d $a0, $zero, 1 + movgr2fr.d $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -2337,7 +2321,7 @@ _Z30BENCHMARK_acos_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_acos_aut bcnez $fcc0, .LBB6_12 # %bb.14: # in Loop: Header=BB6_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.d $fcc0, $fa3, $fa1 + fcmp.ceq.d $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB6_18 # %bb.15: # in Loop: Header=BB6_13 Depth=1 @@ -2353,13 +2337,13 @@ _Z30BENCHMARK_acos_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_acos_aut bnez $a2, .LBB6_18 # %bb.17: # in Loop: Header=BB6_13 Depth=1 fabs.d $fa3, $fa3 - fcmp.cule.d $fcc0, $fa0, $fa3 + fcmp.cule.d $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB6_18: # %_ZSt10fpclassifyd.exit # in Loop: Header=BB6_13 Depth=1 - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB6_11 .LBB6_19: # in Loop: Header=BB6_13 Depth=1 fcmp.cun.d $fcc0, $fa2, $fa2 @@ -2374,13 +2358,13 @@ _Z30BENCHMARK_acos_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_acos_aut bnez $a2, .LBB6_11 # %bb.21: # in Loop: Header=BB6_13 Depth=1 fabs.d $fa2, $fa2 - fcmp.cule.d $fcc0, $fa0, $fa2 + fcmp.cule.d $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB6_11 .LBB6_22: # in Loop: Header=BB6_13 Depth=1 move $a1, $zero - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB6_11 b .LBB6_19 .LBB6_23: # %_ZNSt10unique_ptrIA_dSt14default_deleteIS0_EED2Ev.exit @@ -2911,12 +2895,8 @@ GCC_except_table7: .Lcst_end7: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z30BENCHMARK_asinf_autovec_float_RN9benchmark5StateE -.LCPI8_0: - .word 0x00800000 # float 1.17549435E-38 .text - .globl _Z30BENCHMARK_asinf_autovec_float_RN9benchmark5StateE + .globl _Z30BENCHMARK_asinf_autovec_float_RN9benchmark5StateE # -- Begin function _Z30BENCHMARK_asinf_autovec_float_RN9benchmark5StateE .p2align 5 .type _Z30BENCHMARK_asinf_autovec_float_RN9benchmark5StateE,@function _Z30BENCHMARK_asinf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_asinf_autovec_float_RN9benchmark5StateE @@ -3035,9 +3015,9 @@ _Z30BENCHMARK_asinf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_asinf_au bne $s5, $s4, .LBB8_8 # %bb.9: # %_ZL14run_fn_autovecIfEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI8_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI8_0) - movgr2fr.w $fa1, $zero + movgr2fr.w $fa0, $zero + lu12i.w $a0, 2048 + movgr2fr.w $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -3063,7 +3043,7 @@ _Z30BENCHMARK_asinf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_asinf_au bcnez $fcc0, .LBB8_12 # %bb.14: # in Loop: Header=BB8_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.s $fcc0, $fa3, $fa1 + fcmp.ceq.s $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB8_18 # %bb.15: # in Loop: Header=BB8_13 Depth=1 @@ -3079,13 +3059,13 @@ _Z30BENCHMARK_asinf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_asinf_au bnez $a2, .LBB8_18 # %bb.17: # in Loop: Header=BB8_13 Depth=1 fabs.s $fa3, $fa3 - fcmp.cule.s $fcc0, $fa0, $fa3 + fcmp.cule.s $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB8_18: # %_ZSt10fpclassifyf.exit # in Loop: Header=BB8_13 Depth=1 - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB8_11 .LBB8_19: # in Loop: Header=BB8_13 Depth=1 fcmp.cun.s $fcc0, $fa2, $fa2 @@ -3100,13 +3080,13 @@ _Z30BENCHMARK_asinf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_asinf_au bnez $a2, .LBB8_11 # %bb.21: # in Loop: Header=BB8_13 Depth=1 fabs.s $fa2, $fa2 - fcmp.cule.s $fcc0, $fa0, $fa2 + fcmp.cule.s $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB8_11 .LBB8_22: # in Loop: Header=BB8_13 Depth=1 move $a1, $zero - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB8_11 b .LBB8_19 .LBB8_23: # %_ZNSt10unique_ptrIA_fSt14default_deleteIS0_EED2Ev.exit @@ -3639,12 +3619,8 @@ GCC_except_table9: .Lcst_end9: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z30BENCHMARK_asin_autovec_double_RN9benchmark5StateE -.LCPI10_0: - .dword 0x0010000000000000 # double 2.2250738585072014E-308 .text - .globl _Z30BENCHMARK_asin_autovec_double_RN9benchmark5StateE + .globl _Z30BENCHMARK_asin_autovec_double_RN9benchmark5StateE # -- Begin function _Z30BENCHMARK_asin_autovec_double_RN9benchmark5StateE .p2align 5 .type _Z30BENCHMARK_asin_autovec_double_RN9benchmark5StateE,@function _Z30BENCHMARK_asin_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_asin_autovec_double_RN9benchmark5StateE @@ -3763,9 +3739,9 @@ _Z30BENCHMARK_asin_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_asin_aut bne $s5, $s4, .LBB10_8 # %bb.9: # %_ZL14run_fn_autovecIdEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI10_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI10_0) - movgr2fr.d $fa1, $zero + movgr2fr.d $fa0, $zero + lu52i.d $a0, $zero, 1 + movgr2fr.d $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -3791,7 +3767,7 @@ _Z30BENCHMARK_asin_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_asin_aut bcnez $fcc0, .LBB10_12 # %bb.14: # in Loop: Header=BB10_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.d $fcc0, $fa3, $fa1 + fcmp.ceq.d $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB10_18 # %bb.15: # in Loop: Header=BB10_13 Depth=1 @@ -3807,13 +3783,13 @@ _Z30BENCHMARK_asin_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_asin_aut bnez $a2, .LBB10_18 # %bb.17: # in Loop: Header=BB10_13 Depth=1 fabs.d $fa3, $fa3 - fcmp.cule.d $fcc0, $fa0, $fa3 + fcmp.cule.d $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB10_18: # %_ZSt10fpclassifyd.exit # in Loop: Header=BB10_13 Depth=1 - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB10_11 .LBB10_19: # in Loop: Header=BB10_13 Depth=1 fcmp.cun.d $fcc0, $fa2, $fa2 @@ -3828,13 +3804,13 @@ _Z30BENCHMARK_asin_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_asin_aut bnez $a2, .LBB10_11 # %bb.21: # in Loop: Header=BB10_13 Depth=1 fabs.d $fa2, $fa2 - fcmp.cule.d $fcc0, $fa0, $fa2 + fcmp.cule.d $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB10_11 .LBB10_22: # in Loop: Header=BB10_13 Depth=1 move $a1, $zero - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB10_11 b .LBB10_19 .LBB10_23: # %_ZNSt10unique_ptrIA_dSt14default_deleteIS0_EED2Ev.exit @@ -4365,12 +4341,8 @@ GCC_except_table11: .Lcst_end11: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z30BENCHMARK_atanf_autovec_float_RN9benchmark5StateE -.LCPI12_0: - .word 0x00800000 # float 1.17549435E-38 .text - .globl _Z30BENCHMARK_atanf_autovec_float_RN9benchmark5StateE + .globl _Z30BENCHMARK_atanf_autovec_float_RN9benchmark5StateE # -- Begin function _Z30BENCHMARK_atanf_autovec_float_RN9benchmark5StateE .p2align 5 .type _Z30BENCHMARK_atanf_autovec_float_RN9benchmark5StateE,@function _Z30BENCHMARK_atanf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_atanf_autovec_float_RN9benchmark5StateE @@ -4489,9 +4461,9 @@ _Z30BENCHMARK_atanf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_atanf_au bne $s5, $s4, .LBB12_8 # %bb.9: # %_ZL14run_fn_autovecIfEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI12_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI12_0) - movgr2fr.w $fa1, $zero + movgr2fr.w $fa0, $zero + lu12i.w $a0, 2048 + movgr2fr.w $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -4517,7 +4489,7 @@ _Z30BENCHMARK_atanf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_atanf_au bcnez $fcc0, .LBB12_12 # %bb.14: # in Loop: Header=BB12_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.s $fcc0, $fa3, $fa1 + fcmp.ceq.s $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB12_18 # %bb.15: # in Loop: Header=BB12_13 Depth=1 @@ -4533,13 +4505,13 @@ _Z30BENCHMARK_atanf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_atanf_au bnez $a2, .LBB12_18 # %bb.17: # in Loop: Header=BB12_13 Depth=1 fabs.s $fa3, $fa3 - fcmp.cule.s $fcc0, $fa0, $fa3 + fcmp.cule.s $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB12_18: # %_ZSt10fpclassifyf.exit # in Loop: Header=BB12_13 Depth=1 - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB12_11 .LBB12_19: # in Loop: Header=BB12_13 Depth=1 fcmp.cun.s $fcc0, $fa2, $fa2 @@ -4554,13 +4526,13 @@ _Z30BENCHMARK_atanf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_atanf_au bnez $a2, .LBB12_11 # %bb.21: # in Loop: Header=BB12_13 Depth=1 fabs.s $fa2, $fa2 - fcmp.cule.s $fcc0, $fa0, $fa2 + fcmp.cule.s $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB12_11 .LBB12_22: # in Loop: Header=BB12_13 Depth=1 move $a1, $zero - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB12_11 b .LBB12_19 .LBB12_23: # %_ZNSt10unique_ptrIA_fSt14default_deleteIS0_EED2Ev.exit @@ -5093,12 +5065,8 @@ GCC_except_table13: .Lcst_end13: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z30BENCHMARK_atan_autovec_double_RN9benchmark5StateE -.LCPI14_0: - .dword 0x0010000000000000 # double 2.2250738585072014E-308 .text - .globl _Z30BENCHMARK_atan_autovec_double_RN9benchmark5StateE + .globl _Z30BENCHMARK_atan_autovec_double_RN9benchmark5StateE # -- Begin function _Z30BENCHMARK_atan_autovec_double_RN9benchmark5StateE .p2align 5 .type _Z30BENCHMARK_atan_autovec_double_RN9benchmark5StateE,@function _Z30BENCHMARK_atan_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_atan_autovec_double_RN9benchmark5StateE @@ -5217,9 +5185,9 @@ _Z30BENCHMARK_atan_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_atan_aut bne $s5, $s4, .LBB14_8 # %bb.9: # %_ZL14run_fn_autovecIdEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI14_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI14_0) - movgr2fr.d $fa1, $zero + movgr2fr.d $fa0, $zero + lu52i.d $a0, $zero, 1 + movgr2fr.d $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -5245,7 +5213,7 @@ _Z30BENCHMARK_atan_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_atan_aut bcnez $fcc0, .LBB14_12 # %bb.14: # in Loop: Header=BB14_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.d $fcc0, $fa3, $fa1 + fcmp.ceq.d $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB14_18 # %bb.15: # in Loop: Header=BB14_13 Depth=1 @@ -5261,13 +5229,13 @@ _Z30BENCHMARK_atan_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_atan_aut bnez $a2, .LBB14_18 # %bb.17: # in Loop: Header=BB14_13 Depth=1 fabs.d $fa3, $fa3 - fcmp.cule.d $fcc0, $fa0, $fa3 + fcmp.cule.d $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB14_18: # %_ZSt10fpclassifyd.exit # in Loop: Header=BB14_13 Depth=1 - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB14_11 .LBB14_19: # in Loop: Header=BB14_13 Depth=1 fcmp.cun.d $fcc0, $fa2, $fa2 @@ -5282,13 +5250,13 @@ _Z30BENCHMARK_atan_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_atan_aut bnez $a2, .LBB14_11 # %bb.21: # in Loop: Header=BB14_13 Depth=1 fabs.d $fa2, $fa2 - fcmp.cule.d $fcc0, $fa0, $fa2 + fcmp.cule.d $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB14_11 .LBB14_22: # in Loop: Header=BB14_13 Depth=1 move $a1, $zero - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB14_11 b .LBB14_19 .LBB14_23: # %_ZNSt10unique_ptrIA_dSt14default_deleteIS0_EED2Ev.exit @@ -5819,12 +5787,8 @@ GCC_except_table15: .Lcst_end15: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z30BENCHMARK_cbrtf_autovec_float_RN9benchmark5StateE -.LCPI16_0: - .word 0x00800000 # float 1.17549435E-38 .text - .globl _Z30BENCHMARK_cbrtf_autovec_float_RN9benchmark5StateE + .globl _Z30BENCHMARK_cbrtf_autovec_float_RN9benchmark5StateE # -- Begin function _Z30BENCHMARK_cbrtf_autovec_float_RN9benchmark5StateE .p2align 5 .type _Z30BENCHMARK_cbrtf_autovec_float_RN9benchmark5StateE,@function _Z30BENCHMARK_cbrtf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_cbrtf_autovec_float_RN9benchmark5StateE @@ -5943,9 +5907,9 @@ _Z30BENCHMARK_cbrtf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_cbrtf_au bne $s5, $s4, .LBB16_8 # %bb.9: # %_ZL14run_fn_autovecIfEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI16_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI16_0) - movgr2fr.w $fa1, $zero + movgr2fr.w $fa0, $zero + lu12i.w $a0, 2048 + movgr2fr.w $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -5971,7 +5935,7 @@ _Z30BENCHMARK_cbrtf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_cbrtf_au bcnez $fcc0, .LBB16_12 # %bb.14: # in Loop: Header=BB16_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.s $fcc0, $fa3, $fa1 + fcmp.ceq.s $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB16_18 # %bb.15: # in Loop: Header=BB16_13 Depth=1 @@ -5987,13 +5951,13 @@ _Z30BENCHMARK_cbrtf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_cbrtf_au bnez $a2, .LBB16_18 # %bb.17: # in Loop: Header=BB16_13 Depth=1 fabs.s $fa3, $fa3 - fcmp.cule.s $fcc0, $fa0, $fa3 + fcmp.cule.s $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB16_18: # %_ZSt10fpclassifyf.exit # in Loop: Header=BB16_13 Depth=1 - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB16_11 .LBB16_19: # in Loop: Header=BB16_13 Depth=1 fcmp.cun.s $fcc0, $fa2, $fa2 @@ -6008,13 +5972,13 @@ _Z30BENCHMARK_cbrtf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_cbrtf_au bnez $a2, .LBB16_11 # %bb.21: # in Loop: Header=BB16_13 Depth=1 fabs.s $fa2, $fa2 - fcmp.cule.s $fcc0, $fa0, $fa2 + fcmp.cule.s $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB16_11 .LBB16_22: # in Loop: Header=BB16_13 Depth=1 move $a1, $zero - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB16_11 b .LBB16_19 .LBB16_23: # %_ZNSt10unique_ptrIA_fSt14default_deleteIS0_EED2Ev.exit @@ -6547,12 +6511,8 @@ GCC_except_table17: .Lcst_end17: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z30BENCHMARK_cbrt_autovec_double_RN9benchmark5StateE -.LCPI18_0: - .dword 0x0010000000000000 # double 2.2250738585072014E-308 .text - .globl _Z30BENCHMARK_cbrt_autovec_double_RN9benchmark5StateE + .globl _Z30BENCHMARK_cbrt_autovec_double_RN9benchmark5StateE # -- Begin function _Z30BENCHMARK_cbrt_autovec_double_RN9benchmark5StateE .p2align 5 .type _Z30BENCHMARK_cbrt_autovec_double_RN9benchmark5StateE,@function _Z30BENCHMARK_cbrt_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_cbrt_autovec_double_RN9benchmark5StateE @@ -6671,9 +6631,9 @@ _Z30BENCHMARK_cbrt_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_cbrt_aut bne $s5, $s4, .LBB18_8 # %bb.9: # %_ZL14run_fn_autovecIdEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI18_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI18_0) - movgr2fr.d $fa1, $zero + movgr2fr.d $fa0, $zero + lu52i.d $a0, $zero, 1 + movgr2fr.d $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -6699,7 +6659,7 @@ _Z30BENCHMARK_cbrt_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_cbrt_aut bcnez $fcc0, .LBB18_12 # %bb.14: # in Loop: Header=BB18_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.d $fcc0, $fa3, $fa1 + fcmp.ceq.d $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB18_18 # %bb.15: # in Loop: Header=BB18_13 Depth=1 @@ -6715,13 +6675,13 @@ _Z30BENCHMARK_cbrt_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_cbrt_aut bnez $a2, .LBB18_18 # %bb.17: # in Loop: Header=BB18_13 Depth=1 fabs.d $fa3, $fa3 - fcmp.cule.d $fcc0, $fa0, $fa3 + fcmp.cule.d $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB18_18: # %_ZSt10fpclassifyd.exit # in Loop: Header=BB18_13 Depth=1 - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB18_11 .LBB18_19: # in Loop: Header=BB18_13 Depth=1 fcmp.cun.d $fcc0, $fa2, $fa2 @@ -6736,13 +6696,13 @@ _Z30BENCHMARK_cbrt_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_cbrt_aut bnez $a2, .LBB18_11 # %bb.21: # in Loop: Header=BB18_13 Depth=1 fabs.d $fa2, $fa2 - fcmp.cule.d $fcc0, $fa0, $fa2 + fcmp.cule.d $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB18_11 .LBB18_22: # in Loop: Header=BB18_13 Depth=1 move $a1, $zero - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB18_11 b .LBB18_19 .LBB18_23: # %_ZNSt10unique_ptrIA_dSt14default_deleteIS0_EED2Ev.exit @@ -7273,12 +7233,8 @@ GCC_except_table19: .Lcst_end19: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z29BENCHMARK_erff_autovec_float_RN9benchmark5StateE -.LCPI20_0: - .word 0x00800000 # float 1.17549435E-38 .text - .globl _Z29BENCHMARK_erff_autovec_float_RN9benchmark5StateE + .globl _Z29BENCHMARK_erff_autovec_float_RN9benchmark5StateE # -- Begin function _Z29BENCHMARK_erff_autovec_float_RN9benchmark5StateE .p2align 5 .type _Z29BENCHMARK_erff_autovec_float_RN9benchmark5StateE,@function _Z29BENCHMARK_erff_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_erff_autovec_float_RN9benchmark5StateE @@ -7397,9 +7353,9 @@ _Z29BENCHMARK_erff_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_erff_auto bne $s5, $s4, .LBB20_8 # %bb.9: # %_ZL14run_fn_autovecIfEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI20_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI20_0) - movgr2fr.w $fa1, $zero + movgr2fr.w $fa0, $zero + lu12i.w $a0, 2048 + movgr2fr.w $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -7425,7 +7381,7 @@ _Z29BENCHMARK_erff_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_erff_auto bcnez $fcc0, .LBB20_12 # %bb.14: # in Loop: Header=BB20_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.s $fcc0, $fa3, $fa1 + fcmp.ceq.s $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB20_18 # %bb.15: # in Loop: Header=BB20_13 Depth=1 @@ -7441,13 +7397,13 @@ _Z29BENCHMARK_erff_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_erff_auto bnez $a2, .LBB20_18 # %bb.17: # in Loop: Header=BB20_13 Depth=1 fabs.s $fa3, $fa3 - fcmp.cule.s $fcc0, $fa0, $fa3 + fcmp.cule.s $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB20_18: # %_ZSt10fpclassifyf.exit # in Loop: Header=BB20_13 Depth=1 - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB20_11 .LBB20_19: # in Loop: Header=BB20_13 Depth=1 fcmp.cun.s $fcc0, $fa2, $fa2 @@ -7462,13 +7418,13 @@ _Z29BENCHMARK_erff_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_erff_auto bnez $a2, .LBB20_11 # %bb.21: # in Loop: Header=BB20_13 Depth=1 fabs.s $fa2, $fa2 - fcmp.cule.s $fcc0, $fa0, $fa2 + fcmp.cule.s $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB20_11 .LBB20_22: # in Loop: Header=BB20_13 Depth=1 move $a1, $zero - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB20_11 b .LBB20_19 .LBB20_23: # %_ZNSt10unique_ptrIA_fSt14default_deleteIS0_EED2Ev.exit @@ -8001,12 +7957,8 @@ GCC_except_table21: .Lcst_end21: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29BENCHMARK_erf_autovec_double_RN9benchmark5StateE -.LCPI22_0: - .dword 0x0010000000000000 # double 2.2250738585072014E-308 .text - .globl _Z29BENCHMARK_erf_autovec_double_RN9benchmark5StateE + .globl _Z29BENCHMARK_erf_autovec_double_RN9benchmark5StateE # -- Begin function _Z29BENCHMARK_erf_autovec_double_RN9benchmark5StateE .p2align 5 .type _Z29BENCHMARK_erf_autovec_double_RN9benchmark5StateE,@function _Z29BENCHMARK_erf_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_erf_autovec_double_RN9benchmark5StateE @@ -8125,9 +8077,9 @@ _Z29BENCHMARK_erf_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_erf_autov bne $s5, $s4, .LBB22_8 # %bb.9: # %_ZL14run_fn_autovecIdEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI22_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI22_0) - movgr2fr.d $fa1, $zero + movgr2fr.d $fa0, $zero + lu52i.d $a0, $zero, 1 + movgr2fr.d $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -8153,7 +8105,7 @@ _Z29BENCHMARK_erf_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_erf_autov bcnez $fcc0, .LBB22_12 # %bb.14: # in Loop: Header=BB22_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.d $fcc0, $fa3, $fa1 + fcmp.ceq.d $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB22_18 # %bb.15: # in Loop: Header=BB22_13 Depth=1 @@ -8169,13 +8121,13 @@ _Z29BENCHMARK_erf_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_erf_autov bnez $a2, .LBB22_18 # %bb.17: # in Loop: Header=BB22_13 Depth=1 fabs.d $fa3, $fa3 - fcmp.cule.d $fcc0, $fa0, $fa3 + fcmp.cule.d $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB22_18: # %_ZSt10fpclassifyd.exit # in Loop: Header=BB22_13 Depth=1 - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB22_11 .LBB22_19: # in Loop: Header=BB22_13 Depth=1 fcmp.cun.d $fcc0, $fa2, $fa2 @@ -8190,13 +8142,13 @@ _Z29BENCHMARK_erf_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_erf_autov bnez $a2, .LBB22_11 # %bb.21: # in Loop: Header=BB22_13 Depth=1 fabs.d $fa2, $fa2 - fcmp.cule.d $fcc0, $fa0, $fa2 + fcmp.cule.d $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB22_11 .LBB22_22: # in Loop: Header=BB22_13 Depth=1 move $a1, $zero - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB22_11 b .LBB22_19 .LBB22_23: # %_ZNSt10unique_ptrIA_dSt14default_deleteIS0_EED2Ev.exit @@ -8727,12 +8679,8 @@ GCC_except_table23: .Lcst_end23: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z29BENCHMARK_cosf_autovec_float_RN9benchmark5StateE -.LCPI24_0: - .word 0x00800000 # float 1.17549435E-38 .text - .globl _Z29BENCHMARK_cosf_autovec_float_RN9benchmark5StateE + .globl _Z29BENCHMARK_cosf_autovec_float_RN9benchmark5StateE # -- Begin function _Z29BENCHMARK_cosf_autovec_float_RN9benchmark5StateE .p2align 5 .type _Z29BENCHMARK_cosf_autovec_float_RN9benchmark5StateE,@function _Z29BENCHMARK_cosf_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_cosf_autovec_float_RN9benchmark5StateE @@ -8851,9 +8799,9 @@ _Z29BENCHMARK_cosf_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_cosf_auto bne $s5, $s4, .LBB24_8 # %bb.9: # %_ZL14run_fn_autovecIfEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI24_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI24_0) - movgr2fr.w $fa1, $zero + movgr2fr.w $fa0, $zero + lu12i.w $a0, 2048 + movgr2fr.w $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -8879,7 +8827,7 @@ _Z29BENCHMARK_cosf_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_cosf_auto bcnez $fcc0, .LBB24_12 # %bb.14: # in Loop: Header=BB24_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.s $fcc0, $fa3, $fa1 + fcmp.ceq.s $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB24_18 # %bb.15: # in Loop: Header=BB24_13 Depth=1 @@ -8895,13 +8843,13 @@ _Z29BENCHMARK_cosf_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_cosf_auto bnez $a2, .LBB24_18 # %bb.17: # in Loop: Header=BB24_13 Depth=1 fabs.s $fa3, $fa3 - fcmp.cule.s $fcc0, $fa0, $fa3 + fcmp.cule.s $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB24_18: # %_ZSt10fpclassifyf.exit # in Loop: Header=BB24_13 Depth=1 - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB24_11 .LBB24_19: # in Loop: Header=BB24_13 Depth=1 fcmp.cun.s $fcc0, $fa2, $fa2 @@ -8916,13 +8864,13 @@ _Z29BENCHMARK_cosf_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_cosf_auto bnez $a2, .LBB24_11 # %bb.21: # in Loop: Header=BB24_13 Depth=1 fabs.s $fa2, $fa2 - fcmp.cule.s $fcc0, $fa0, $fa2 + fcmp.cule.s $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB24_11 .LBB24_22: # in Loop: Header=BB24_13 Depth=1 move $a1, $zero - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB24_11 b .LBB24_19 .LBB24_23: # %_ZNSt10unique_ptrIA_fSt14default_deleteIS0_EED2Ev.exit @@ -9455,12 +9403,8 @@ GCC_except_table25: .Lcst_end25: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29BENCHMARK_cos_autovec_double_RN9benchmark5StateE -.LCPI26_0: - .dword 0x0010000000000000 # double 2.2250738585072014E-308 .text - .globl _Z29BENCHMARK_cos_autovec_double_RN9benchmark5StateE + .globl _Z29BENCHMARK_cos_autovec_double_RN9benchmark5StateE # -- Begin function _Z29BENCHMARK_cos_autovec_double_RN9benchmark5StateE .p2align 5 .type _Z29BENCHMARK_cos_autovec_double_RN9benchmark5StateE,@function _Z29BENCHMARK_cos_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_cos_autovec_double_RN9benchmark5StateE @@ -9579,9 +9523,9 @@ _Z29BENCHMARK_cos_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_cos_autov bne $s5, $s4, .LBB26_8 # %bb.9: # %_ZL14run_fn_autovecIdEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI26_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI26_0) - movgr2fr.d $fa1, $zero + movgr2fr.d $fa0, $zero + lu52i.d $a0, $zero, 1 + movgr2fr.d $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -9607,7 +9551,7 @@ _Z29BENCHMARK_cos_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_cos_autov bcnez $fcc0, .LBB26_12 # %bb.14: # in Loop: Header=BB26_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.d $fcc0, $fa3, $fa1 + fcmp.ceq.d $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB26_18 # %bb.15: # in Loop: Header=BB26_13 Depth=1 @@ -9623,13 +9567,13 @@ _Z29BENCHMARK_cos_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_cos_autov bnez $a2, .LBB26_18 # %bb.17: # in Loop: Header=BB26_13 Depth=1 fabs.d $fa3, $fa3 - fcmp.cule.d $fcc0, $fa0, $fa3 + fcmp.cule.d $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB26_18: # %_ZSt10fpclassifyd.exit # in Loop: Header=BB26_13 Depth=1 - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB26_11 .LBB26_19: # in Loop: Header=BB26_13 Depth=1 fcmp.cun.d $fcc0, $fa2, $fa2 @@ -9644,13 +9588,13 @@ _Z29BENCHMARK_cos_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_cos_autov bnez $a2, .LBB26_11 # %bb.21: # in Loop: Header=BB26_13 Depth=1 fabs.d $fa2, $fa2 - fcmp.cule.d $fcc0, $fa0, $fa2 + fcmp.cule.d $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB26_11 .LBB26_22: # in Loop: Header=BB26_13 Depth=1 move $a1, $zero - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB26_11 b .LBB26_19 .LBB26_23: # %_ZNSt10unique_ptrIA_dSt14default_deleteIS0_EED2Ev.exit @@ -10181,12 +10125,8 @@ GCC_except_table27: .Lcst_end27: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z29BENCHMARK_sinf_autovec_float_RN9benchmark5StateE -.LCPI28_0: - .word 0x00800000 # float 1.17549435E-38 .text - .globl _Z29BENCHMARK_sinf_autovec_float_RN9benchmark5StateE + .globl _Z29BENCHMARK_sinf_autovec_float_RN9benchmark5StateE # -- Begin function _Z29BENCHMARK_sinf_autovec_float_RN9benchmark5StateE .p2align 5 .type _Z29BENCHMARK_sinf_autovec_float_RN9benchmark5StateE,@function _Z29BENCHMARK_sinf_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_sinf_autovec_float_RN9benchmark5StateE @@ -10305,9 +10245,9 @@ _Z29BENCHMARK_sinf_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_sinf_auto bne $s5, $s4, .LBB28_8 # %bb.9: # %_ZL14run_fn_autovecIfEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI28_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI28_0) - movgr2fr.w $fa1, $zero + movgr2fr.w $fa0, $zero + lu12i.w $a0, 2048 + movgr2fr.w $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -10333,7 +10273,7 @@ _Z29BENCHMARK_sinf_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_sinf_auto bcnez $fcc0, .LBB28_12 # %bb.14: # in Loop: Header=BB28_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.s $fcc0, $fa3, $fa1 + fcmp.ceq.s $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB28_18 # %bb.15: # in Loop: Header=BB28_13 Depth=1 @@ -10349,13 +10289,13 @@ _Z29BENCHMARK_sinf_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_sinf_auto bnez $a2, .LBB28_18 # %bb.17: # in Loop: Header=BB28_13 Depth=1 fabs.s $fa3, $fa3 - fcmp.cule.s $fcc0, $fa0, $fa3 + fcmp.cule.s $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB28_18: # %_ZSt10fpclassifyf.exit # in Loop: Header=BB28_13 Depth=1 - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB28_11 .LBB28_19: # in Loop: Header=BB28_13 Depth=1 fcmp.cun.s $fcc0, $fa2, $fa2 @@ -10370,13 +10310,13 @@ _Z29BENCHMARK_sinf_autovec_float_RN9benchmark5StateE: # @_Z29BENCHMARK_sinf_auto bnez $a2, .LBB28_11 # %bb.21: # in Loop: Header=BB28_13 Depth=1 fabs.s $fa2, $fa2 - fcmp.cule.s $fcc0, $fa0, $fa2 + fcmp.cule.s $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB28_11 .LBB28_22: # in Loop: Header=BB28_13 Depth=1 move $a1, $zero - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB28_11 b .LBB28_19 .LBB28_23: # %_ZNSt10unique_ptrIA_fSt14default_deleteIS0_EED2Ev.exit @@ -10909,12 +10849,8 @@ GCC_except_table29: .Lcst_end29: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29BENCHMARK_sin_autovec_double_RN9benchmark5StateE -.LCPI30_0: - .dword 0x0010000000000000 # double 2.2250738585072014E-308 .text - .globl _Z29BENCHMARK_sin_autovec_double_RN9benchmark5StateE + .globl _Z29BENCHMARK_sin_autovec_double_RN9benchmark5StateE # -- Begin function _Z29BENCHMARK_sin_autovec_double_RN9benchmark5StateE .p2align 5 .type _Z29BENCHMARK_sin_autovec_double_RN9benchmark5StateE,@function _Z29BENCHMARK_sin_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_sin_autovec_double_RN9benchmark5StateE @@ -11033,9 +10969,9 @@ _Z29BENCHMARK_sin_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_sin_autov bne $s5, $s4, .LBB30_8 # %bb.9: # %_ZL14run_fn_autovecIdEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI30_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI30_0) - movgr2fr.d $fa1, $zero + movgr2fr.d $fa0, $zero + lu52i.d $a0, $zero, 1 + movgr2fr.d $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -11061,7 +10997,7 @@ _Z29BENCHMARK_sin_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_sin_autov bcnez $fcc0, .LBB30_12 # %bb.14: # in Loop: Header=BB30_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.d $fcc0, $fa3, $fa1 + fcmp.ceq.d $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB30_18 # %bb.15: # in Loop: Header=BB30_13 Depth=1 @@ -11077,13 +11013,13 @@ _Z29BENCHMARK_sin_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_sin_autov bnez $a2, .LBB30_18 # %bb.17: # in Loop: Header=BB30_13 Depth=1 fabs.d $fa3, $fa3 - fcmp.cule.d $fcc0, $fa0, $fa3 + fcmp.cule.d $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB30_18: # %_ZSt10fpclassifyd.exit # in Loop: Header=BB30_13 Depth=1 - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB30_11 .LBB30_19: # in Loop: Header=BB30_13 Depth=1 fcmp.cun.d $fcc0, $fa2, $fa2 @@ -11098,13 +11034,13 @@ _Z29BENCHMARK_sin_autovec_double_RN9benchmark5StateE: # @_Z29BENCHMARK_sin_autov bnez $a2, .LBB30_11 # %bb.21: # in Loop: Header=BB30_13 Depth=1 fabs.d $fa2, $fa2 - fcmp.cule.d $fcc0, $fa0, $fa2 + fcmp.cule.d $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB30_11 .LBB30_22: # in Loop: Header=BB30_13 Depth=1 move $a1, $zero - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB30_11 b .LBB30_19 .LBB30_23: # %_ZNSt10unique_ptrIA_dSt14default_deleteIS0_EED2Ev.exit @@ -11635,12 +11571,8 @@ GCC_except_table31: .Lcst_end31: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z30BENCHMARK_sinhf_autovec_float_RN9benchmark5StateE -.LCPI32_0: - .word 0x00800000 # float 1.17549435E-38 .text - .globl _Z30BENCHMARK_sinhf_autovec_float_RN9benchmark5StateE + .globl _Z30BENCHMARK_sinhf_autovec_float_RN9benchmark5StateE # -- Begin function _Z30BENCHMARK_sinhf_autovec_float_RN9benchmark5StateE .p2align 5 .type _Z30BENCHMARK_sinhf_autovec_float_RN9benchmark5StateE,@function _Z30BENCHMARK_sinhf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_sinhf_autovec_float_RN9benchmark5StateE @@ -11759,9 +11691,9 @@ _Z30BENCHMARK_sinhf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_sinhf_au bne $s5, $s4, .LBB32_8 # %bb.9: # %_ZL14run_fn_autovecIfEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI32_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI32_0) - movgr2fr.w $fa1, $zero + movgr2fr.w $fa0, $zero + lu12i.w $a0, 2048 + movgr2fr.w $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -11787,7 +11719,7 @@ _Z30BENCHMARK_sinhf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_sinhf_au bcnez $fcc0, .LBB32_12 # %bb.14: # in Loop: Header=BB32_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.s $fcc0, $fa3, $fa1 + fcmp.ceq.s $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB32_18 # %bb.15: # in Loop: Header=BB32_13 Depth=1 @@ -11803,13 +11735,13 @@ _Z30BENCHMARK_sinhf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_sinhf_au bnez $a2, .LBB32_18 # %bb.17: # in Loop: Header=BB32_13 Depth=1 fabs.s $fa3, $fa3 - fcmp.cule.s $fcc0, $fa0, $fa3 + fcmp.cule.s $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB32_18: # %_ZSt10fpclassifyf.exit # in Loop: Header=BB32_13 Depth=1 - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB32_11 .LBB32_19: # in Loop: Header=BB32_13 Depth=1 fcmp.cun.s $fcc0, $fa2, $fa2 @@ -11824,13 +11756,13 @@ _Z30BENCHMARK_sinhf_autovec_float_RN9benchmark5StateE: # @_Z30BENCHMARK_sinhf_au bnez $a2, .LBB32_11 # %bb.21: # in Loop: Header=BB32_13 Depth=1 fabs.s $fa2, $fa2 - fcmp.cule.s $fcc0, $fa0, $fa2 + fcmp.cule.s $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB32_11 .LBB32_22: # in Loop: Header=BB32_13 Depth=1 move $a1, $zero - fcmp.ceq.s $fcc0, $fa2, $fa1 + fcmp.ceq.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB32_11 b .LBB32_19 .LBB32_23: # %_ZNSt10unique_ptrIA_fSt14default_deleteIS0_EED2Ev.exit @@ -12363,12 +12295,8 @@ GCC_except_table33: .Lcst_end33: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z30BENCHMARK_sinh_autovec_double_RN9benchmark5StateE -.LCPI34_0: - .dword 0x0010000000000000 # double 2.2250738585072014E-308 .text - .globl _Z30BENCHMARK_sinh_autovec_double_RN9benchmark5StateE + .globl _Z30BENCHMARK_sinh_autovec_double_RN9benchmark5StateE # -- Begin function _Z30BENCHMARK_sinh_autovec_double_RN9benchmark5StateE .p2align 5 .type _Z30BENCHMARK_sinh_autovec_double_RN9benchmark5StateE,@function _Z30BENCHMARK_sinh_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_sinh_autovec_double_RN9benchmark5StateE @@ -12487,9 +12415,9 @@ _Z30BENCHMARK_sinh_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_sinh_aut bne $s5, $s4, .LBB34_8 # %bb.9: # %_ZL14run_fn_autovecIdEvPT_S1_S1_PFS0_S0_E.exit.preheader.preheader move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI34_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI34_0) - movgr2fr.d $fa1, $zero + movgr2fr.d $fa0, $zero + lu52i.d $a0, $zero, 1 + movgr2fr.d $fa1, $a0 lu12i.w $a0, 2 ori $s3, $a0, 1808 move $s4, $s1 @@ -12515,7 +12443,7 @@ _Z30BENCHMARK_sinh_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_sinh_aut bcnez $fcc0, .LBB34_12 # %bb.14: # in Loop: Header=BB34_13 Depth=1 ori $a0, $zero, 2 - fcmp.ceq.d $fcc0, $fa3, $fa1 + fcmp.ceq.d $fcc0, $fa3, $fa0 ori $a1, $zero, 2 bcnez $fcc0, .LBB34_18 # %bb.15: # in Loop: Header=BB34_13 Depth=1 @@ -12531,13 +12459,13 @@ _Z30BENCHMARK_sinh_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_sinh_aut bnez $a2, .LBB34_18 # %bb.17: # in Loop: Header=BB34_13 Depth=1 fabs.d $fa3, $fa3 - fcmp.cule.d $fcc0, $fa0, $fa3 + fcmp.cule.d $fcc0, $fa1, $fa3 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 3 .p2align 4, , 16 .LBB34_18: # %_ZSt10fpclassifyd.exit # in Loop: Header=BB34_13 Depth=1 - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB34_11 .LBB34_19: # in Loop: Header=BB34_13 Depth=1 fcmp.cun.d $fcc0, $fa2, $fa2 @@ -12552,13 +12480,13 @@ _Z30BENCHMARK_sinh_autovec_double_RN9benchmark5StateE: # @_Z30BENCHMARK_sinh_aut bnez $a2, .LBB34_11 # %bb.21: # in Loop: Header=BB34_13 Depth=1 fabs.d $fa2, $fa2 - fcmp.cule.d $fcc0, $fa0, $fa2 + fcmp.cule.d $fcc0, $fa1, $fa2 movcf2gr $a0, $fcc0 addi.d $a0, $a0, 3 b .LBB34_11 .LBB34_22: # in Loop: Header=BB34_13 Depth=1 move $a1, $zero - fcmp.ceq.d $fcc0, $fa2, $fa1 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB34_11 b .LBB34_19 .LBB34_23: # %_ZNSt10unique_ptrIA_dSt14default_deleteIS0_EED2Ev.exit @@ -13089,16 +13017,8 @@ GCC_except_table35: .Lcst_end35: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZL9init_dataIfEvPT_S1_S1_ -.LCPI36_0: - .word 0x4f800000 # float 4.2949673E+9 -.LCPI36_1: - .word 0xc2c80000 # float -100 -.LCPI36_2: - .word 0x43480000 # float 200 .text - .p2align 5 + .p2align 5 # -- Begin function _ZL9init_dataIfEvPT_S1_S1_ .type _ZL9init_dataIfEvPT_S1_S1_,@function _ZL9init_dataIfEvPT_S1_S1_: # @_ZL9init_dataIfEvPT_S1_S1_ # %bb.0: @@ -13204,7 +13124,14 @@ _ZL9init_dataIfEvPT_S1_S1_: # @_ZL9init_dataIfEvPT_S1_S1_ lu32i.d $fp, 0 lu12i.w $s0, -66464 lu32i.d $s0, 0 + lu12i.w $a0, 325632 + movgr2fr.w $fs1, $a0 vldi $vr11, -1168 + lu12i.w $a0, 275584 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, -250752 + lu32i.d $a0, 0 + movgr2fr.w $fs3, $a0 lu12i.w $a0, 2 ori $t5, $a0, 1808 ori $s4, $zero, 624 @@ -13256,8 +13183,6 @@ _ZL9init_dataIfEvPT_S1_S1_: # @_ZL9init_dataIfEvPT_S1_S1_ movgr2fr.d $fa3, $a1 ffint.s.l $fa3, $fa3 movgr2cf $fcc0, $a2 - pcalau12i $a1, %pc_hi20(.LCPI36_0) - fld.s $fs1, $a1, %pc_lo12(.LCPI36_0) fsel $fa2, $fa3, $fa2, $fcc0 fmadd.s $fa1, $fa2, $fa0, $fa1 addi.d $a0, $a0, -1 @@ -13372,11 +13297,7 @@ _ZL9init_dataIfEvPT_S1_S1_: # @_ZL9init_dataIfEvPT_S1_S1_ bceqz $fcc0, .LBB36_31 .LBB36_12: # %_ZNSt25uniform_real_distributionIfEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEfRT_.exit # in Loop: Header=BB36_3 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI36_1) - fld.s $fs2, $a0, %pc_lo12(.LCPI36_1) - pcalau12i $a0, %pc_hi20(.LCPI36_2) - fld.s $fs3, $a0, %pc_lo12(.LCPI36_2) - fmadd.s $fa0, $fa0, $fs3, $fs2 + fmadd.s $fa0, $fa0, $fs2, $fs3 slli.d $t6, $t1, 2 fstx.s $fa0, $a7, $t6 vldi $vr0, -1168 @@ -13527,7 +13448,7 @@ _ZL9init_dataIfEvPT_S1_S1_: # @_ZL9init_dataIfEvPT_S1_S1_ bceqz $fcc0, .LBB36_32 .LBB36_21: # %_ZNSt25uniform_real_distributionIfEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEfRT_.exit14 # in Loop: Header=BB36_3 Depth=1 - fmadd.s $fa0, $fa0, $fs3, $fs2 + fmadd.s $fa0, $fa0, $fs2, $fs3 fstx.s $fa0, $a6, $t6 vldi $vr0, -1168 move $a0, $t2 @@ -13677,7 +13598,7 @@ _ZL9init_dataIfEvPT_S1_S1_: # @_ZL9init_dataIfEvPT_S1_S1_ bceqz $fcc0, .LBB36_33 # %bb.30: # %_ZNSt25uniform_real_distributionIfEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEfRT_.exit22 # in Loop: Header=BB36_3 Depth=1 - fmadd.s $fa0, $fa0, $fs3, $fs2 + fmadd.s $fa0, $fa0, $fs2, $fs3 addi.d $t1, $t1, 1 fstx.s $fa0, $a5, $t6 bne $t1, $t5, .LBB36_3 @@ -13775,7 +13696,7 @@ _ZL9init_dataIfEvPT_S1_S1_: # @_ZL9init_dataIfEvPT_S1_S1_ ld.d $a7, $sp, 136 # 8-byte Folded Reload ld.d $a6, $sp, 144 # 8-byte Folded Reload ld.d $a5, $sp, 152 # 8-byte Folded Reload - fmadd.s $fa0, $fa0, $fs3, $fs2 + fmadd.s $fa0, $fa0, $fs2, $fs3 addi.d $t1, $t1, 1 fstx.s $fa0, $a5, $t6 bne $t1, $t5, .LBB36_3 @@ -13802,18 +13723,7 @@ _ZL9init_dataIfEvPT_S1_S1_: # @_ZL9init_dataIfEvPT_S1_S1_ .Lfunc_end36: .size _ZL9init_dataIfEvPT_S1_S1_, .Lfunc_end36-_ZL9init_dataIfEvPT_S1_S1_ # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL9init_dataIdEvPT_S1_S1_ -.LCPI37_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI37_1: - .dword 0x41f0000000000000 # double 4294967296 -.LCPI37_2: - .dword 0xc059000000000000 # double -100 -.LCPI37_3: - .dword 0x4069000000000000 # double 200 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL9init_dataIdEvPT_S1_S1_ .type _ZL9init_dataIdEvPT_S1_S1_,@function _ZL9init_dataIdEvPT_S1_S1_: # @_ZL9init_dataIdEvPT_S1_S1_ # %bb.0: @@ -13921,7 +13831,12 @@ _ZL9init_dataIdEvPT_S1_S1_: # @_ZL9init_dataIdEvPT_S1_S1_ lu12i.w $s0, -66464 lu32i.d $s0, 0 lu52i.d $t5, $zero, 1107 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs1, $a0 lu12i.w $t6, 275200 + lu52i.d $a0, $zero, 1055 + movgr2fr.d $fs2, $a0 vldi $vr11, -912 lu12i.w $a0, 2 ori $t7, $a0, 1808 @@ -13964,16 +13879,12 @@ _ZL9init_dataIdEvPT_S1_S1_: # @_ZL9init_dataIdEvPT_S1_S1_ xor $a1, $a2, $a1 srli.d $a2, $a1, 18 xor $a1, $a2, $a1 - pcalau12i $a2, %pc_hi20(.LCPI37_0) - fld.d $fs1, $a2, %pc_lo12(.LCPI37_0) srli.d $a2, $a1, 32 or $a2, $a2, $t5 movgr2fr.d $fa2, $a2 fsub.d $fa2, $fa2, $fs1 bstrins.d $a1, $t6, 63, 32 movgr2fr.d $fa3, $a1 - pcalau12i $a1, %pc_hi20(.LCPI37_1) - fld.d $fs2, $a1, %pc_lo12(.LCPI37_1) fadd.d $fa2, $fa3, $fa2 fmadd.d $fa1, $fa2, $fa0, $fa1 addi.d $a0, $a0, -1 @@ -14088,10 +13999,12 @@ _ZL9init_dataIdEvPT_S1_S1_: # @_ZL9init_dataIdEvPT_S1_S1_ bceqz $fcc0, .LBB37_31 .LBB37_12: # %_ZNSt25uniform_real_distributionIdEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEdRT_.exit # in Loop: Header=BB37_3 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI37_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI37_2) - pcalau12i $a0, %pc_hi20(.LCPI37_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI37_3) + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a1, $a0, -1019 + movgr2fr.d $fs3, $a1 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fs4, $a0 fmadd.d $fa0, $fa0, $fs4, $fs3 slli.d $t8, $t1, 3 fstx.d $fa0, $a7, $t8 diff --git a/results/MicroBenchmarks/SLPVectorization/CMakeFiles/SLPVectorizationBenchmarks.dir/Versioning.s b/results/MicroBenchmarks/SLPVectorization/CMakeFiles/SLPVectorizationBenchmarks.dir/Versioning.s index bdb6e072..bcd6f320 100644 --- a/results/MicroBenchmarks/SLPVectorization/CMakeFiles/SLPVectorizationBenchmarks.dir/Versioning.s +++ b/results/MicroBenchmarks/SLPVectorization/CMakeFiles/SLPVectorizationBenchmarks.dir/Versioning.s @@ -1483,18 +1483,8 @@ _Z54benchmark_multiply_accumulate_no_runtime_checks_neededILj4EdEvRN9benchmark5S .size _Z54benchmark_multiply_accumulate_no_runtime_checks_neededILj4EdEvRN9benchmark5StateE, .Lfunc_end20-_Z54benchmark_multiply_accumulate_no_runtime_checks_neededILj4EdEvRN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL9init_dataIiEvPT_j -.LCPI21_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI21_1: - .dword 0x41f0000000000000 # double 4294967296 -.LCPI21_2: - .dword 0xc059000000000000 # double -100 -.LCPI21_3: - .dword 0x4069000000000000 # double 200 .text - .p2align 5 + .p2align 5 # -- Begin function _ZL9init_dataIiEvPT_j .type _ZL9init_dataIiEvPT_j,@function _ZL9init_dataIiEvPT_j: # @_ZL9init_dataIiEvPT_j # %bb.0: @@ -1511,8 +1501,10 @@ _ZL9init_dataIiEvPT_j: # @_ZL9init_dataIiEvPT_j st.d $s7, $sp, 1952 # 8-byte Folded Spill st.d $s8, $sp, 1944 # 8-byte Folded Spill fst.d $fs0, $sp, 1936 # 8-byte Folded Spill + fst.d $fs1, $sp, 1928 # 8-byte Folded Spill + fst.d $fs2, $sp, 1920 # 8-byte Folded Spill addi.d $sp, $sp, -2048 - addi.d $sp, $sp, -1152 + addi.d $sp, $sp, -1168 st.d $a1, $sp, 128 # 8-byte Folded Spill st.d $a0, $sp, 120 # 8-byte Folded Spill lu12i.w $a0, 3 @@ -1597,7 +1589,12 @@ _ZL9init_dataIiEvPT_j: # @_ZL9init_dataIiEvPT_j lu12i.w $s3, -66464 lu32i.d $s3, 0 lu52i.d $t3, $zero, 1107 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs1, $a0 lu12i.w $t4, 275200 + lu52i.d $a0, $zero, 1055 + movgr2fr.d $fs2, $a0 vldi $vr11, -912 ori $s8, $zero, 624 ld.d $a5, $sp, 128 # 8-byte Folded Reload @@ -1631,20 +1628,16 @@ _ZL9init_dataIiEvPT_j: # @_ZL9init_dataIiEvPT_j xor $a1, $a2, $a1 srli.d $a2, $a1, 18 xor $a1, $a2, $a1 - pcalau12i $a2, %pc_hi20(.LCPI21_0) - fld.d $fa2, $a2, %pc_lo12(.LCPI21_0) srli.d $a2, $a1, 32 or $a2, $a2, $t3 - movgr2fr.d $fa3, $a2 - fsub.d $fa2, $fa3, $fa2 + movgr2fr.d $fa2, $a2 + fsub.d $fa2, $fa2, $fs1 bstrins.d $a1, $t4, 63, 32 movgr2fr.d $fa3, $a1 - pcalau12i $a1, %pc_hi20(.LCPI21_1) - fld.d $fa4, $a1, %pc_lo12(.LCPI21_1) fadd.d $fa2, $fa3, $fa2 fmadd.d $fa1, $fa2, $fa0, $fa1 addi.d $a0, $a0, -1 - fmul.d $fa0, $fa0, $fa4 + fmul.d $fa0, $fa0, $fs2 beqz $a0, .LBB21_11 .LBB21_5: # %select.unfold.i.i.i.i # Parent Loop BB21_3 Depth=1 @@ -1755,10 +1748,12 @@ _ZL9init_dataIiEvPT_j: # @_ZL9init_dataIiEvPT_j bceqz $fcc0, .LBB21_13 .LBB21_12: # %_ZNSt25uniform_real_distributionIdEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEdRT_.exit # in Loop: Header=BB21_3 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI21_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI21_2) - pcalau12i $a0, %pc_hi20(.LCPI21_3) - fld.d $fa2, $a0, %pc_lo12(.LCPI21_3) + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a1, $a0, -1019 + movgr2fr.d $fa1, $a1 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa2, $a0 fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 @@ -1797,7 +1792,9 @@ _ZL9init_dataIiEvPT_j: # @_ZL9init_dataIiEvPT_j b .LBB21_12 .LBB21_14: addi.d $sp, $sp, 2032 - addi.d $sp, $sp, 1168 + addi.d $sp, $sp, 1184 + fld.d $fs2, $sp, 1920 # 8-byte Folded Reload + fld.d $fs1, $sp, 1928 # 8-byte Folded Reload fld.d $fs0, $sp, 1936 # 8-byte Folded Reload ld.d $s8, $sp, 1944 # 8-byte Folded Reload ld.d $s7, $sp, 1952 # 8-byte Folded Reload @@ -2055,18 +2052,8 @@ _Z10do_add_xorILj16EiEvPT0_S1_S1_: # @_Z10do_add_xorILj16EiEvPT0_S1_S1_ .Lfunc_end25: .size _Z10do_add_xorILj16EiEvPT0_S1_S1_, .Lfunc_end25-_Z10do_add_xorILj16EiEvPT0_S1_S1_ # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL9init_dataIdEvPT_j -.LCPI26_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI26_1: - .dword 0x41f0000000000000 # double 4294967296 -.LCPI26_2: - .dword 0xc059000000000000 # double -100 -.LCPI26_3: - .dword 0x4069000000000000 # double 200 .text - .p2align 5 + .p2align 5 # -- Begin function _ZL9init_dataIdEvPT_j .type _ZL9init_dataIdEvPT_j,@function _ZL9init_dataIdEvPT_j: # @_ZL9init_dataIdEvPT_j # %bb.0: @@ -2083,8 +2070,10 @@ _ZL9init_dataIdEvPT_j: # @_ZL9init_dataIdEvPT_j st.d $s7, $sp, 1952 # 8-byte Folded Spill st.d $s8, $sp, 1944 # 8-byte Folded Spill fst.d $fs0, $sp, 1936 # 8-byte Folded Spill + fst.d $fs1, $sp, 1928 # 8-byte Folded Spill + fst.d $fs2, $sp, 1920 # 8-byte Folded Spill addi.d $sp, $sp, -2048 - addi.d $sp, $sp, -1152 + addi.d $sp, $sp, -1168 st.d $a1, $sp, 128 # 8-byte Folded Spill st.d $a0, $sp, 120 # 8-byte Folded Spill lu12i.w $a0, 3 @@ -2169,7 +2158,12 @@ _ZL9init_dataIdEvPT_j: # @_ZL9init_dataIdEvPT_j lu12i.w $s3, -66464 lu32i.d $s3, 0 lu52i.d $t3, $zero, 1107 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs1, $a0 lu12i.w $t4, 275200 + lu52i.d $a0, $zero, 1055 + movgr2fr.d $fs2, $a0 vldi $vr11, -912 ori $s8, $zero, 624 ld.d $a5, $sp, 128 # 8-byte Folded Reload @@ -2203,20 +2197,16 @@ _ZL9init_dataIdEvPT_j: # @_ZL9init_dataIdEvPT_j xor $a1, $a2, $a1 srli.d $a2, $a1, 18 xor $a1, $a2, $a1 - pcalau12i $a2, %pc_hi20(.LCPI26_0) - fld.d $fa2, $a2, %pc_lo12(.LCPI26_0) srli.d $a2, $a1, 32 or $a2, $a2, $t3 - movgr2fr.d $fa3, $a2 - fsub.d $fa2, $fa3, $fa2 + movgr2fr.d $fa2, $a2 + fsub.d $fa2, $fa2, $fs1 bstrins.d $a1, $t4, 63, 32 movgr2fr.d $fa3, $a1 - pcalau12i $a1, %pc_hi20(.LCPI26_1) - fld.d $fa4, $a1, %pc_lo12(.LCPI26_1) fadd.d $fa2, $fa3, $fa2 fmadd.d $fa1, $fa2, $fa0, $fa1 addi.d $a0, $a0, -1 - fmul.d $fa0, $fa0, $fa4 + fmul.d $fa0, $fa0, $fs2 beqz $a0, .LBB26_11 .LBB26_5: # %select.unfold.i.i.i.i # Parent Loop BB26_3 Depth=1 @@ -2327,10 +2317,12 @@ _ZL9init_dataIdEvPT_j: # @_ZL9init_dataIdEvPT_j bceqz $fcc0, .LBB26_13 .LBB26_12: # %_ZNSt25uniform_real_distributionIdEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEdRT_.exit # in Loop: Header=BB26_3 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI26_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI26_2) - pcalau12i $a0, %pc_hi20(.LCPI26_3) - fld.d $fa2, $a0, %pc_lo12(.LCPI26_3) + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a1, $a0, -1019 + movgr2fr.d $fa1, $a1 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa2, $a0 fmadd.d $fa0, $fa0, $fa2, $fa1 slli.d $a0, $t0, 3 addi.d $t0, $t0, 1 @@ -2367,7 +2359,9 @@ _ZL9init_dataIdEvPT_j: # @_ZL9init_dataIdEvPT_j b .LBB26_12 .LBB26_14: addi.d $sp, $sp, 2032 - addi.d $sp, $sp, 1168 + addi.d $sp, $sp, 1184 + fld.d $fs2, $sp, 1920 # 8-byte Folded Reload + fld.d $fs1, $sp, 1928 # 8-byte Folded Reload fld.d $fs0, $sp, 1936 # 8-byte Folded Reload ld.d $s8, $sp, 1944 # 8-byte Folded Reload ld.d $s7, $sp, 1952 # 8-byte Folded Reload diff --git a/results/MicroBenchmarks/harris/CMakeFiles/harris.dir/harrisKernel.s b/results/MicroBenchmarks/harris/CMakeFiles/harris.dir/harrisKernel.s index c8dc1068..e7e8fde8 100644 --- a/results/MicroBenchmarks/harris/CMakeFiles/harris.dir/harrisKernel.s +++ b/results/MicroBenchmarks/harris/CMakeFiles/harris.dir/harrisKernel.s @@ -3,16 +3,8 @@ .globl _ZSt21ios_base_library_initv # End of file scope inline assembly - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_ -.LCPI0_0: - .word 0x3e2aaaab # float 0.166666672 -.LCPI0_1: - .word 0x3daaaaab # float 0.0833333358 -.LCPI0_2: - .word 0xbd23d70a # float -0.0399999991 .text - .globl _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_ + .globl _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_ # -- Begin function _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_ .p2align 5 .type _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_,@function _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_ @@ -20,127 +12,127 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha addi.w $t1, $zero, -1 blt $a0, $t1, .LBB0_105 # %bb.1: # %.preheader317.lr.ph - addi.d $sp, $sp, -144 - st.d $ra, $sp, 136 # 8-byte Folded Spill - st.d $fp, $sp, 128 # 8-byte Folded Spill - st.d $s0, $sp, 120 # 8-byte Folded Spill - st.d $s1, $sp, 112 # 8-byte Folded Spill - st.d $s2, $sp, 104 # 8-byte Folded Spill - st.d $s3, $sp, 96 # 8-byte Folded Spill - st.d $s4, $sp, 88 # 8-byte Folded Spill - st.d $s5, $sp, 80 # 8-byte Folded Spill - st.d $s6, $sp, 72 # 8-byte Folded Spill - st.d $s7, $sp, 64 # 8-byte Folded Spill - st.d $s8, $sp, 56 # 8-byte Folded Spill - ld.d $t0, $sp, 144 - st.d $a3, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -160 + st.d $ra, $sp, 152 # 8-byte Folded Spill + st.d $fp, $sp, 144 # 8-byte Folded Spill + st.d $s0, $sp, 136 # 8-byte Folded Spill + st.d $s1, $sp, 128 # 8-byte Folded Spill + st.d $s2, $sp, 120 # 8-byte Folded Spill + st.d $s3, $sp, 112 # 8-byte Folded Spill + st.d $s4, $sp, 104 # 8-byte Folded Spill + st.d $s5, $sp, 96 # 8-byte Folded Spill + st.d $s6, $sp, 88 # 8-byte Folded Spill + st.d $s7, $sp, 80 # 8-byte Folded Spill + st.d $s8, $sp, 72 # 8-byte Folded Spill + ld.d $t0, $sp, 160 + st.d $a3, $sp, 64 # 8-byte Folded Spill blt $a1, $t1, .LBB0_47 # %bb.2: # %.preheader317.us.preheader - move $s8, $a5 - move $t4, $a2 - move $s2, $zero + move $s5, $a5 + move $s4, $zero addi.w $a5, $a0, 2 addi.w $t2, $a1, 2 + move $t3, $a2 ori $a2, $zero, 1 slt $a3, $a2, $a5 masknez $a2, $a2, $a3 - st.d $a5, $sp, 40 # 8-byte Folded Spill + st.d $a5, $sp, 56 # 8-byte Folded Spill maskeqz $a3, $a5, $a3 - or $s3, $a3, $a2 + or $s1, $a3, $a2 bstrpick.d $t1, $t2, 31, 0 - slli.d $a2, $s3, 13 - alsl.d $a3, $s3, $a2, 3 - st.d $a3, $sp, 24 # 8-byte Folded Spill - add.d $a3, $s8, $a3 + slli.d $a2, $s1, 13 + alsl.d $a3, $s1, $a2, 3 + st.d $a3, $sp, 32 # 8-byte Folded Spill + add.d $a3, $s5, $a3 alsl.d $a3, $t1, $a3, 2 - lu12i.w $s4, -3 - ori $a5, $s4, 4088 - st.d $a5, $sp, 16 # 8-byte Folded Spill + lu12i.w $s6, -3 + ori $a5, $s6, 4088 + st.d $a5, $sp, 24 # 8-byte Folded Spill add.d $a3, $a3, $a5 - alsl.d $a2, $s3, $a2, 4 - add.d $a2, $t4, $a2 + alsl.d $a2, $s1, $a2, 4 + add.d $a2, $t3, $a2 alsl.d $a2, $t1, $a2, 2 lu12i.w $a5, 2 - ori $s5, $a5, 24 - add.d $a2, $a2, $s5 - st.d $a2, $sp, 8 # 8-byte Folded Spill - sltu $a2, $s8, $a2 - sltu $a3, $t4, $a3 + ori $s7, $a5, 24 + add.d $a2, $a2, $s7 + st.d $a2, $sp, 16 # 8-byte Folded Spill + sltu $a2, $s5, $a2 + sltu $a3, $t3, $a3 and $a2, $a2, $a3 bstrpick.d $a3, $t2, 31, 2 - slli.d $t6, $a3, 2 - add.d $s7, $t4, $s5 - st.d $t2, $sp, 32 # 8-byte Folded Spill - sltui $fp, $t2, 4 - or $a2, $fp, $a2 - andi $t7, $a2, 1 + slli.d $s0, $a3, 2 + add.d $ra, $t3, $s7 + st.d $t2, $sp, 48 # 8-byte Folded Spill + sltui $a3, $t2, 4 + st.d $a3, $sp, 8 # 8-byte Folded Spill + or $a2, $a3, $a2 + andi $t4, $a2, 1 ori $t5, $a5, 16 lu12i.w $a2, 4 - ori $s0, $a2, 32 - ori $s1, $a2, 40 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI0_0) - pcalau12i $a2, %pc_hi20(.LCPI0_1) - fld.s $fa1, $a2, %pc_lo12(.LCPI0_1) - ori $t2, $a5, 8 - ori $ra, $s4, 4072 - lu12i.w $a2, 252586 - ori $a2, $a2, 2731 - vreplgr2vr.w $vr2, $a2 + ori $s2, $a2, 32 + ori $s3, $a2, 40 lu12i.w $a2, 254634 - ori $a2, $a2, 2731 - vreplgr2vr.w $vr3, $a2 - move $a2, $t4 - move $t3, $s8 + ori $t6, $a2, 2731 + movgr2fr.w $fa0, $t6 + lu12i.w $a2, 252586 + ori $t7, $a2, 2731 + movgr2fr.w $fa1, $t7 + ori $t2, $a5, 8 + ori $t8, $s6, 4072 + st.d $t3, $sp, 40 # 8-byte Folded Spill + move $a2, $t3 + move $t3, $s5 b .LBB0_4 .p2align 4, , 16 .LBB0_3: # %._crit_edge.us # in Loop: Header=BB0_4 Depth=1 - addi.d $s2, $s2, 1 + addi.d $s4, $s4, 1 add.d $t3, $t3, $t2 - add.d $s7, $s7, $t5 + add.d $ra, $ra, $t5 add.d $a2, $a2, $t5 - beq $s2, $s3, .LBB0_11 + beq $s4, $s1, .LBB0_11 .LBB0_4: # %.preheader317.us # =>This Loop Header: Depth=1 # Child Loop BB0_7 Depth 2 # Child Loop BB0_10 Depth 2 - beqz $t7, .LBB0_6 + beqz $t4, .LBB0_6 # %bb.5: # in Loop: Header=BB0_4 Depth=1 move $a5, $zero b .LBB0_9 .p2align 4, , 16 .LBB0_6: # %vector.body.preheader # in Loop: Header=BB0_4 Depth=1 - move $a3, $s7 - move $s6, $t6 + move $a3, $ra + move $s8, $s0 move $a5, $t3 .p2align 4, , 16 .LBB0_7: # %vector.body # Parent Loop BB0_4 Depth=1 # => This Inner Loop Header: Depth=2 - vldx $vr4, $a3, $ra - ori $t8, $s4, 4080 - vldx $vr5, $a3, $t8 - vldx $vr6, $a3, $t2 - vld $vr7, $a3, -8 - vldx $vr8, $a3, $t5 - vld $vr9, $a3, 0 - vfadd.s $vr4, $vr4, $vr6 - vfsub.s $vr4, $vr5, $vr4 - vfadd.s $vr4, $vr4, $vr8 - vfsub.s $vr5, $vr9, $vr7 - vfmul.s $vr5, $vr5, $vr3 - vfmadd.s $vr4, $vr4, $vr2, $vr5 - vst $vr4, $a5, 0 + vldx $vr2, $a3, $t8 + ori $fp, $s6, 4080 + vldx $vr3, $a3, $fp + vldx $vr4, $a3, $t2 + vld $vr5, $a3, -8 + vldx $vr6, $a3, $t5 + vld $vr7, $a3, 0 + vfadd.s $vr2, $vr2, $vr4 + vfsub.s $vr2, $vr3, $vr2 + vfadd.s $vr2, $vr2, $vr6 + vreplgr2vr.w $vr3, $t7 + vfsub.s $vr4, $vr7, $vr5 + vreplgr2vr.w $vr5, $t6 + vfmul.s $vr4, $vr4, $vr5 + vfmadd.s $vr2, $vr2, $vr3, $vr4 + vst $vr2, $a5, 0 addi.d $a5, $a5, 16 - addi.d $s6, $s6, -4 + addi.d $s8, $s8, -4 addi.d $a3, $a3, 16 - bnez $s6, .LBB0_7 + bnez $s8, .LBB0_7 # %bb.8: # %middle.block # in Loop: Header=BB0_4 Depth=1 - move $a5, $t6 - beq $t6, $t1, .LBB0_3 + move $a5, $s0 + beq $s0, $t1, .LBB0_3 .LBB0_9: # %scalar.ph.preheader # in Loop: Header=BB0_4 Depth=1 sub.d $a3, $t1, $a5 @@ -149,156 +141,160 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha .LBB0_10: # %scalar.ph # Parent Loop BB0_4 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t8, $a2, $a5 - fldx.s $fa4, $a2, $a5 - fld.s $fa5, $t8, 8 - fldx.s $fa6, $t8, $s0 - fldx.s $fa7, $t8, $t5 - fldx.s $ft0, $t8, $s1 - fldx.s $ft1, $t8, $s5 - fadd.s $fa4, $fa4, $fa6 - fsub.s $fa4, $fa5, $fa4 - fadd.s $fa4, $fa4, $ft0 - fsub.s $fa5, $ft1, $fa7 - fmul.s $fa5, $fa5, $fa0 - fmadd.s $fa4, $fa4, $fa1, $fa5 - fstx.s $fa4, $t3, $a5 + add.d $fp, $a2, $a5 + fldx.s $fa2, $a2, $a5 + fld.s $fa3, $fp, 8 + fldx.s $fa4, $fp, $s2 + fldx.s $fa5, $fp, $t5 + fldx.s $fa6, $fp, $s3 + fldx.s $fa7, $fp, $s7 + fadd.s $fa2, $fa2, $fa4 + fsub.s $fa2, $fa3, $fa2 + fadd.s $fa2, $fa2, $fa6 + fsub.s $fa3, $fa7, $fa5 + fmul.s $fa3, $fa3, $fa0 + fmadd.s $fa2, $fa2, $fa1, $fa3 + fstx.s $fa2, $t3, $a5 addi.d $a3, $a3, -1 addi.d $a5, $a5, 4 bnez $a3, .LBB0_10 b .LBB0_3 .LBB0_11: # %.preheader315.us.preheader - move $s2, $zero + move $s4, $zero ori $a2, $zero, 1 - ld.d $s5, $sp, 40 # 8-byte Folded Reload - slt $a3, $a2, $s5 + ld.d $s7, $sp, 56 # 8-byte Folded Reload + slt $a3, $a2, $s7 masknez $a2, $a2, $a3 - maskeqz $a3, $s5, $a3 - or $s3, $a3, $a2 - ld.d $a2, $sp, 24 # 8-byte Folded Reload + maskeqz $a3, $s7, $a3 + or $t3, $a3, $a2 + ld.d $a2, $sp, 32 # 8-byte Folded Reload add.d $a2, $a4, $a2 alsl.d $a2, $t1, $a2, 2 - ld.d $a3, $sp, 16 # 8-byte Folded Reload + ld.d $a3, $sp, 24 # 8-byte Folded Reload add.d $a2, $a2, $a3 - ld.d $a3, $sp, 8 # 8-byte Folded Reload + ld.d $a3, $sp, 16 # 8-byte Folded Reload sltu $a3, $a4, $a3 - sltu $a2, $t4, $a2 + ld.d $a5, $sp, 40 # 8-byte Folded Reload + sltu $a2, $a5, $a2 and $a3, $a3, $a2 bstrpick.d $a2, $t1, 31, 2 - slli.d $t6, $a2, 2 - add.d $t7, $t4, $s1 - add.d $a2, $t4, $s0 - or $a3, $fp, $a3 - andi $t8, $a3, 1 - lu12i.w $fp, -5 - move $s0, $a4 + slli.d $t4, $a2, 2 + add.d $t8, $a5, $s3 + add.d $a2, $a5, $s2 + ld.d $a5, $sp, 8 # 8-byte Folded Reload + or $a3, $a5, $a3 + andi $fp, $a3, 1 + lu12i.w $s0, -5 + move $s1, $a4 b .LBB0_13 .p2align 4, , 16 .LBB0_12: # %._crit_edge.us322 # in Loop: Header=BB0_13 Depth=1 - addi.d $s2, $s2, 1 - add.d $s0, $s0, $t2 - add.d $t7, $t7, $t5 + addi.d $s4, $s4, 1 + add.d $s1, $s1, $t2 + add.d $t8, $t8, $t5 add.d $a2, $a2, $t5 - beq $s2, $s3, .LBB0_20 + beq $s4, $t3, .LBB0_20 .LBB0_13: # %.preheader315.us # =>This Loop Header: Depth=1 # Child Loop BB0_16 Depth 2 # Child Loop BB0_19 Depth 2 - ori $a5, $fp, 4064 - beqz $t8, .LBB0_15 + ori $a5, $s0, 4064 + beqz $fp, .LBB0_15 # %bb.14: # in Loop: Header=BB0_13 Depth=1 - move $t3, $zero + move $s2, $zero b .LBB0_18 .p2align 4, , 16 .LBB0_15: # %vector.body502.preheader # in Loop: Header=BB0_13 Depth=1 - move $a3, $t6 - move $t4, $t7 - move $s1, $s0 + move $a3, $t4 + move $s2, $t8 + move $s3, $s1 .p2align 4, , 16 .LBB0_16: # %vector.body502 # Parent Loop BB0_13 Depth=1 # => This Inner Loop Header: Depth=2 - ori $t3, $fp, 4056 - vldx $vr4, $t4, $t3 - vld $vr5, $t4, -8 - ori $t3, $fp, 4060 - vldx $vr6, $t4, $a5 - vldx $vr7, $t4, $t3 - vld $vr8, $t4, 0 - vld $vr9, $t4, -4 - vfadd.s $vr4, $vr4, $vr6 - vfsub.s $vr4, $vr5, $vr4 - vfadd.s $vr4, $vr4, $vr8 - vfsub.s $vr5, $vr9, $vr7 - vfmul.s $vr5, $vr5, $vr3 - vfmadd.s $vr4, $vr4, $vr2, $vr5 - vst $vr4, $s1, 0 - addi.d $s1, $s1, 16 + ori $s6, $s0, 4056 + vldx $vr2, $s2, $s6 + vld $vr3, $s2, -8 + ori $s6, $s0, 4060 + vldx $vr4, $s2, $a5 + vldx $vr5, $s2, $s6 + vld $vr6, $s2, 0 + vld $vr7, $s2, -4 + vfadd.s $vr2, $vr2, $vr4 + vfsub.s $vr2, $vr3, $vr2 + vfadd.s $vr2, $vr2, $vr6 + vreplgr2vr.w $vr3, $t7 + vfsub.s $vr4, $vr7, $vr5 + vreplgr2vr.w $vr5, $t6 + vfmul.s $vr4, $vr4, $vr5 + vfmadd.s $vr2, $vr2, $vr3, $vr4 + vst $vr2, $s3, 0 + addi.d $s3, $s3, 16 addi.d $a3, $a3, -4 - addi.d $t4, $t4, 16 + addi.d $s2, $s2, 16 bnez $a3, .LBB0_16 # %bb.17: # %middle.block511 # in Loop: Header=BB0_13 Depth=1 - move $t3, $t6 - beq $t6, $t1, .LBB0_12 + move $s2, $t4 + beq $t4, $t1, .LBB0_12 .LBB0_18: # %scalar.ph497.preheader # in Loop: Header=BB0_13 Depth=1 - sub.d $a3, $t1, $t3 - slli.d $t4, $t3, 2 + sub.d $a3, $t1, $s2 + slli.d $s2, $s2, 2 .p2align 4, , 16 .LBB0_19: # %scalar.ph497 # Parent Loop BB0_13 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t3, $a2, $t4 - fldx.s $fa4, $t3, $a5 - fldx.s $fa5, $a2, $t4 - ori $s1, $fp, 4072 - fldx.s $fa6, $t3, $s1 - ori $s1, $fp, 4068 - fldx.s $fa7, $t3, $s1 - fld.s $ft0, $t3, 8 - fld.s $ft1, $t3, 4 - fadd.s $fa4, $fa4, $fa6 - fsub.s $fa4, $fa5, $fa4 - fadd.s $fa4, $fa4, $ft0 - fsub.s $fa5, $ft1, $fa7 - fmul.s $fa5, $fa5, $fa0 - fmadd.s $fa4, $fa4, $fa1, $fa5 - fstx.s $fa4, $s0, $t4 + add.d $s3, $a2, $s2 + fldx.s $fa2, $s3, $a5 + fldx.s $fa3, $a2, $s2 + ori $s6, $s0, 4072 + fldx.s $fa4, $s3, $s6 + ori $s6, $s0, 4068 + fldx.s $fa5, $s3, $s6 + fld.s $fa6, $s3, 8 + fld.s $fa7, $s3, 4 + fadd.s $fa2, $fa2, $fa4 + fsub.s $fa2, $fa3, $fa2 + fadd.s $fa2, $fa2, $fa6 + fsub.s $fa3, $fa7, $fa5 + fmul.s $fa3, $fa3, $fa0 + fmadd.s $fa2, $fa2, $fa1, $fa3 + fstx.s $fa2, $s1, $s2 addi.d $a3, $a3, -1 - addi.d $t4, $t4, 4 + addi.d $s2, $s2, 4 bnez $a3, .LBB0_19 b .LBB0_12 .LBB0_20: # %.preheader313.us.preheader - move $t5, $zero + move $a5, $zero ori $a2, $zero, 1 - slt $a3, $a2, $s5 + slt $a3, $a2, $s7 masknez $a2, $a2, $a3 - maskeqz $a3, $s5, $a3 - or $a5, $a3, $a2 - sub.d $a3, $t0, $s8 - ld.d $a2, $sp, 32 # 8-byte Folded Reload + maskeqz $a3, $s7, $a3 + or $t5, $a3, $a2 + sub.d $a3, $t0, $s5 + ld.d $a2, $sp, 48 # 8-byte Folded Reload sltui $a2, $a2, 8 sltui $a3, $a3, 32 or $t6, $a2, $a3 bstrpick.d $t4, $t1, 31, 3 slli.d $t7, $t4, 3 addi.d $t8, $t0, 16 - addi.d $fp, $s8, 16 + addi.d $fp, $s5, 16 move $s0, $t0 - move $s1, $s8 + move $s1, $s5 b .LBB0_22 .p2align 4, , 16 .LBB0_21: # %._crit_edge.us325 # in Loop: Header=BB0_22 Depth=1 - addi.d $t5, $t5, 1 + addi.d $a5, $a5, 1 add.d $t8, $t8, $t2 add.d $fp, $fp, $t2 add.d $s1, $s1, $t2 add.d $s0, $s0, $t2 - beq $t5, $a5, .LBB0_29 + beq $a5, $t5, .LBB0_29 .LBB0_22: # %.preheader313.us # =>This Loop Header: Depth=1 # Child Loop BB0_25 Depth 2 @@ -351,19 +347,19 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha .LBB0_29: # %.preheader311.us.preheader move $t5, $zero ori $a3, $zero, 1 - slt $a5, $a3, $s5 + slt $a5, $a3, $s7 masknez $a3, $a3, $a5 - maskeqz $a5, $s5, $a5 + maskeqz $a5, $s7, $a5 or $t6, $a5, $a3 sub.d $a3, $a7, $a4 - sub.d $a5, $a7, $s8 + sub.d $a5, $a7, $s5 sltui $a3, $a3, 32 sltui $a5, $a5, 32 or $a3, $a3, $a5 slli.d $t7, $t4, 3 addi.d $t8, $a4, 16 addi.d $fp, $a7, 16 - addi.d $s0, $s8, 16 + addi.d $s0, $s5, 16 or $s1, $a2, $a3 move $s2, $a7 move $s3, $a4 @@ -376,7 +372,7 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha add.d $fp, $fp, $t2 add.d $s0, $s0, $t2 add.d $s3, $s3, $t2 - add.d $s8, $s8, $t2 + add.d $s5, $s5, $t2 add.d $s2, $s2, $t2 beq $t5, $t6, .LBB0_38 .LBB0_31: # %.preheader311.us @@ -418,7 +414,7 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha .LBB0_36: # %scalar.ph531.preheader # in Loop: Header=BB0_31 Depth=1 alsl.d $a3, $s4, $s3, 2 - alsl.d $a5, $s4, $s8, 2 + alsl.d $a5, $s4, $s5, 2 alsl.d $t3, $s4, $s2, 2 sub.d $s4, $t1, $s4 .p2align 4, , 16 @@ -438,9 +434,9 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha .LBB0_38: # %.preheader309.us.preheader move $a5, $zero ori $a3, $zero, 1 - slt $t3, $a3, $s5 + slt $t3, $a3, $s7 masknez $a3, $a3, $t3 - maskeqz $t3, $s5, $t3 + maskeqz $t3, $s7, $t3 or $t3, $t3, $a3 sub.d $a3, $a6, $a4 sltui $a3, $a3, 32 @@ -514,11 +510,11 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha blez $a1, .LBB0_104 # %bb.49: # %.preheader307.us.preheader move $s5, $zero - ld.d $a3, $sp, 184 - ld.d $a4, $sp, 176 - ld.d $t1, $sp, 168 - ld.d $a2, $sp, 160 - ld.d $t3, $sp, 152 + ld.d $a3, $sp, 200 + ld.d $a4, $sp, 192 + ld.d $t1, $sp, 184 + ld.d $a2, $sp, 176 + ld.d $t3, $sp, 168 slli.d $a5, $a0, 13 add.d $t2, $t1, $a5 alsl.d $t2, $a1, $t2, 2 @@ -539,7 +535,7 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha ori $t5, $a5, 12 add.d $s4, $t0, $t5 sltui $t6, $a1, 4 - st.d $t6, $sp, 40 # 8-byte Folded Spill + st.d $t6, $sp, 56 # 8-byte Folded Spill or $t2, $t6, $t2 andi $t2, $t2, 1 ori $t6, $a5, 8 @@ -644,9 +640,9 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha alsl.d $t0, $a1, $t0, 2 lu12i.w $t2, -2 add.d $t0, $t0, $t2 - st.d $s3, $sp, 32 # 8-byte Folded Spill + st.d $s3, $sp, 48 # 8-byte Folded Spill alsl.d $t2, $a0, $s3, 3 - st.d $t2, $sp, 24 # 8-byte Folded Spill + st.d $t2, $sp, 40 # 8-byte Folded Spill add.d $t2, $a7, $t2 alsl.d $t2, $a1, $t2, 2 add.d $t2, $t2, $t4 @@ -654,10 +650,10 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha sltu $t0, $a7, $t0 and $t0, $t2, $t0 bstrpick.d $t2, $a1, 30, 2 - st.d $t2, $sp, 16 # 8-byte Folded Spill + st.d $t2, $sp, 32 # 8-byte Folded Spill slli.d $s5, $t2, 2 add.d $s8, $a7, $t5 - ld.d $t2, $sp, 40 # 8-byte Folded Reload + ld.d $t2, $sp, 56 # 8-byte Folded Reload or $t0, $t2, $t0 andi $s7, $t0, 1 lu12i.w $t0, -3 @@ -749,22 +745,22 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha b .LBB0_60 .LBB0_68: # %.preheader303.us.preheader move $a7, $zero - ld.d $t2, $sp, 32 # 8-byte Folded Reload + ld.d $t2, $sp, 48 # 8-byte Folded Reload add.d $t2, $t3, $t2 alsl.d $t2, $a1, $t2, 2 lu12i.w $s3, -2 add.d $t2, $t2, $s3 - ld.d $s3, $sp, 24 # 8-byte Folded Reload + ld.d $s3, $sp, 40 # 8-byte Folded Reload add.d $s3, $a6, $s3 alsl.d $s3, $a1, $s3, 2 add.d $s3, $s3, $t4 sltu $s3, $t3, $s3 sltu $t2, $a6, $t2 and $t2, $s3, $t2 - ld.d $s3, $sp, 16 # 8-byte Folded Reload + ld.d $s3, $sp, 32 # 8-byte Folded Reload slli.d $s3, $s3, 2 add.d $s4, $a6, $t5 - ld.d $s5, $sp, 40 # 8-byte Folded Reload + ld.d $s5, $sp, 56 # 8-byte Folded Reload or $t2, $s5, $t2 andi $s5, $t2, 1 move $t2, $t3 @@ -1036,21 +1032,24 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha b .LBB0_87 .LBB0_95: # %.preheader.us.preheader move $t0, $zero - ld.d $t8, $sp, 48 # 8-byte Folded Reload - sub.d $a2, $t8, $a4 - sub.d $t1, $t8, $a3 + ld.d $t7, $sp, 64 # 8-byte Folded Reload + sub.d $a2, $t7, $a4 + sub.d $t1, $t7, $a3 sltui $a2, $a2, 32 sltui $t1, $t1, 32 or $a2, $a2, $t1 slli.d $a6, $a6, 3 addi.d $t1, $a4, 16 - addi.d $t2, $t8, 16 - addi.d $t3, $a3, 16 + addi.d $t2, $t7, 16 or $a7, $a7, $a2 + lu12i.w $a2, -273859 + ori $a2, $a2, 1802 + lu32i.d $a2, 0 + movgr2fr.w $fa0, $a2 lu12i.w $a2, 250429 ori $a2, $a2, 1802 - vreplgr2vr.w $vr0, $a2 - pcalau12i $t7, %pc_hi20(.LCPI0_2) + vreplgr2vr.w $vr1, $a2 + addi.d $t3, $a3, 16 b .LBB0_97 .p2align 4, , 16 .LBB0_96: # %._crit_edge.us350 @@ -1061,7 +1060,7 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha add.d $t3, $t3, $a5 add.d $a4, $a4, $a5 add.d $a3, $a3, $a5 - add.d $t8, $t8, $a5 + add.d $t7, $t7, $a5 beq $t0, $a0, .LBB0_104 .LBB0_97: # %.preheader.us # =>This Loop Header: Depth=1 @@ -1082,16 +1081,16 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha .LBB0_100: # %vector.body690 # Parent Loop BB0_97 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr1, $a2, -16 - vld $vr2, $a2, 0 - vld $vr3, $t5, -16 - vld $vr4, $t5, 0 - vfmul.s $vr1, $vr1, $vr1 + vld $vr2, $a2, -16 + vld $vr3, $a2, 0 + vld $vr4, $t5, -16 + vld $vr5, $t5, 0 vfmul.s $vr2, $vr2, $vr2 - vfnmsub.s $vr1, $vr1, $vr0, $vr3 - vfnmsub.s $vr2, $vr2, $vr0, $vr4 - vst $vr1, $t4, -16 - vst $vr2, $t4, 0 + vfmul.s $vr3, $vr3, $vr3 + vfnmsub.s $vr2, $vr2, $vr1, $vr4 + vfnmsub.s $vr3, $vr3, $vr1, $vr5 + vst $vr2, $t4, -16 + vst $vr3, $t4, 0 addi.d $t6, $t6, -8 addi.d $t5, $t5, 32 addi.d $t4, $t4, 32 @@ -1105,18 +1104,17 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha # in Loop: Header=BB0_97 Depth=1 alsl.d $a2, $t6, $a4, 2 alsl.d $t4, $t6, $a3, 2 - alsl.d $t5, $t6, $t8, 2 + alsl.d $t5, $t6, $t7, 2 sub.d $t6, $a1, $t6 .p2align 4, , 16 .LBB0_103: # %scalar.ph685 # Parent Loop BB0_97 Depth=1 # => This Inner Loop Header: Depth=2 - fld.s $fa1, $a2, 0 fld.s $fa2, $t4, 0 - fld.s $fa3, $t7, %pc_lo12(.LCPI0_2) + fld.s $fa3, $a2, 0 fmul.s $fa2, $fa2, $fa2 - fmadd.s $fa1, $fa2, $fa3, $fa1 - fst.s $fa1, $t5, 0 + fmadd.s $fa2, $fa2, $fa0, $fa3 + fst.s $fa2, $t5, 0 addi.d $a2, $a2, 4 addi.d $t4, $t4, 4 addi.d $t6, $t6, -1 @@ -1124,18 +1122,18 @@ _Z12harrisKerneliiPA2052_fPA2048_fPA2050_fS4_S4_S4_S4_S2_S2_S2_S2_S2_: # @_Z12ha bnez $t6, .LBB0_103 b .LBB0_96 .LBB0_104: - ld.d $s8, $sp, 56 # 8-byte Folded Reload - ld.d $s7, $sp, 64 # 8-byte Folded Reload - ld.d $s6, $sp, 72 # 8-byte Folded Reload - ld.d $s5, $sp, 80 # 8-byte Folded Reload - ld.d $s4, $sp, 88 # 8-byte Folded Reload - ld.d $s3, $sp, 96 # 8-byte Folded Reload - ld.d $s2, $sp, 104 # 8-byte Folded Reload - ld.d $s1, $sp, 112 # 8-byte Folded Reload - ld.d $s0, $sp, 120 # 8-byte Folded Reload - ld.d $fp, $sp, 128 # 8-byte Folded Reload - ld.d $ra, $sp, 136 # 8-byte Folded Reload - addi.d $sp, $sp, 144 + ld.d $s8, $sp, 72 # 8-byte Folded Reload + ld.d $s7, $sp, 80 # 8-byte Folded Reload + ld.d $s6, $sp, 88 # 8-byte Folded Reload + ld.d $s5, $sp, 96 # 8-byte Folded Reload + ld.d $s4, $sp, 104 # 8-byte Folded Reload + ld.d $s3, $sp, 112 # 8-byte Folded Reload + ld.d $s2, $sp, 120 # 8-byte Folded Reload + ld.d $s1, $sp, 128 # 8-byte Folded Reload + ld.d $s0, $sp, 136 # 8-byte Folded Reload + ld.d $fp, $sp, 144 # 8-byte Folded Reload + ld.d $ra, $sp, 152 # 8-byte Folded Reload + addi.d $sp, $sp, 160 .LBB0_105: # %._crit_edge349 ret .Lfunc_end0: diff --git a/results/MicroBenchmarks/harris/CMakeFiles/harris.dir/main.s b/results/MicroBenchmarks/harris/CMakeFiles/harris.dir/main.s index bcc1caf0..3c7cea9a 100644 --- a/results/MicroBenchmarks/harris/CMakeFiles/harris.dir/main.s +++ b/results/MicroBenchmarks/harris/CMakeFiles/harris.dir/main.s @@ -3,12 +3,8 @@ .globl _ZSt21ios_base_library_initv # End of file scope inline assembly - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z19initCheckboardImageiiPA2052_f -.LCPI0_0: - .word 0x437f0000 # float 255 .text - .globl _Z19initCheckboardImageiiPA2052_f + .globl _Z19initCheckboardImageiiPA2052_f # -- Begin function _Z19initCheckboardImageiiPA2052_f .p2align 5 .type _Z19initCheckboardImageiiPA2052_f,@function _Z19initCheckboardImageiiPA2052_f: # @_Z19initCheckboardImageiiPA2052_f @@ -25,8 +21,8 @@ _Z19initCheckboardImageiiPA2052_f: # @_Z19initCheckboardImageiiPA2052_f ori $a6, $a6, 2458 move $a7, $a5 lu32i.d $a7, 0 - pcalau12i $t0, %pc_hi20(.LCPI0_0) - fld.s $fa0, $t0, %pc_lo12(.LCPI0_0) + lu12i.w $t0, 276464 + movgr2fr.w $fa0, $t0 movgr2fr.w $fa1, $zero lu12i.w $t0, 2 ori $t0, $t0, 16 @@ -74,12 +70,7 @@ _Z19initCheckboardImageiiPA2052_f: # @_Z19initCheckboardImageiiPA2052_f .Lfunc_end0: .size _Z19initCheckboardImageiiPA2052_f, .Lfunc_end0-_Z19initCheckboardImageiiPA2052_f # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z10printImageiiPA2048_fi -.LCPI1_0: - .word 0x437f0000 # float 255 - .text - .globl _Z10printImageiiPA2048_fi + .globl _Z10printImageiiPA2048_fi # -- Begin function _Z10printImageiiPA2048_fi .p2align 5 .type _Z10printImageiiPA2048_fi,@function _Z10printImageiiPA2048_fi: # @_Z10printImageiiPA2048_fi @@ -158,10 +149,10 @@ _Z10printImageiiPA2048_fi: # @_Z10printImageiiPA2048_fi movgr2fr.w $fs0, $zero pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $s4, $a0, %pc_lo12(.L.str.1) - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI1_0) move $s5, $zero lu12i.w $s6, 2 + lu12i.w $a0, 276464 + movgr2fr.w $fs1, $a0 .p2align 4, , 16 .LBB1_8: # %.preheader.us # =>This Loop Header: Depth=1 @@ -330,12 +321,8 @@ GCC_except_table1: .Lcst_end0: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z16BENCHMARK_HARRISRN9benchmark5StateE -.LCPI2_0: - .word 0x437f0000 # float 255 .text - .globl _Z16BENCHMARK_HARRISRN9benchmark5StateE + .globl _Z16BENCHMARK_HARRISRN9benchmark5StateE # -- Begin function _Z16BENCHMARK_HARRISRN9benchmark5StateE .p2align 5 .type _Z16BENCHMARK_HARRISRN9benchmark5StateE,@function _Z16BENCHMARK_HARRISRN9benchmark5StateE: # @_Z16BENCHMARK_HARRISRN9benchmark5StateE @@ -389,8 +376,8 @@ _Z16BENCHMARK_HARRISRN9benchmark5StateE: # @_Z16BENCHMARK_HARRISRN9benchmark5Sta ori $a4, $a4, 4080 move $a5, $a2 lu32i.d $a5, 0 - pcalau12i $a6, %pc_hi20(.LCPI2_0) - fld.s $fa0, $a6, %pc_lo12(.LCPI2_0) + lu12i.w $a6, 276464 + movgr2fr.w $fa0, $a6 movgr2fr.w $fa1, $zero lu12i.w $a6, 2 ori $a6, $a6, 16 @@ -736,12 +723,8 @@ GCC_except_table2: .Lcst_end1: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI3_0: - .word 0x437f0000 # float 255 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -805,8 +788,8 @@ main: # @main ori $a4, $a4, 4080 move $a5, $a2 lu32i.d $a5, 0 - pcalau12i $a6, %pc_hi20(.LCPI3_0) - fld.s $fa0, $a6, %pc_lo12(.LCPI3_0) + lu12i.w $a6, 276464 + movgr2fr.w $fa0, $a6 movgr2fr.w $fa1, $zero lu12i.w $a6, 2 ori $a6, $a6, 16 diff --git a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/benchmark.s b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/benchmark.s index 11265aa2..b77d317e 100644 --- a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/benchmark.s +++ b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/benchmark.s @@ -862,14 +862,8 @@ GCC_except_table7: .Lttbase1: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark5State11PauseTimingEv -.LCPI8_0: - .dword 0x41cdcd6500000000 # double 1.0E+9 -.LCPI8_1: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 .text - .hidden _ZN9benchmark5State11PauseTimingEv + .hidden _ZN9benchmark5State11PauseTimingEv # -- Begin function _ZN9benchmark5State11PauseTimingEv .globl _ZN9benchmark5State11PauseTimingEv .p2align 5 .type _ZN9benchmark5State11PauseTimingEv,@function @@ -921,8 +915,10 @@ _ZN9benchmark5State11PauseTimingEv: # @_ZN9benchmark5State11PauseTimingEv jirl $ra, $ra, 0 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI8_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI8_0) + ori $a0, $zero, 0 + lu32i.d $a0, -144027 + lu52i.d $a0, $a0, 1052 + movgr2fr.d $fa1, $a0 fld.d $fa2, $fp, 8 fld.d $fa3, $fp, 24 ld.bu $a0, $fp, 0 @@ -985,8 +981,9 @@ _ZN9benchmark5State11PauseTimingEv: # @_ZN9benchmark5State11PauseTimingEv move $s6, $zero move $s7, $zero addi.d $s1, $sp, 64 - pcalau12i $a1, %pc_hi20(.LCPI8_1) - fld.d $fs0, $a1, %pc_lo12(.LCPI8_1) + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 + movgr2fr.d $fs0, $a1 lu12i.w $fp, 275200 b .LBB8_13 .p2align 4, , 16 @@ -1402,12 +1399,8 @@ _ZNSt6vectorISt4pairINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEdESaIS7 .size _ZNSt6vectorISt4pairINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEdESaIS7_EED2Ev, .Lfunc_end9-_ZNSt6vectorISt4pairINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEdESaIS7_EED2Ev .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark5State12ResumeTimingEv -.LCPI10_0: - .dword 0x41cdcd6500000000 # double 1.0E+9 .text - .hidden _ZN9benchmark5State12ResumeTimingEv + .hidden _ZN9benchmark5State12ResumeTimingEv # -- Begin function _ZN9benchmark5State12ResumeTimingEv .globl _ZN9benchmark5State12ResumeTimingEv .p2align 5 .type _ZN9benchmark5State12ResumeTimingEv,@function @@ -1433,12 +1426,14 @@ _ZN9benchmark5State12ResumeTimingEv: # @_ZN9benchmark5State12ResumeTimingEv st.b $a0, $s0, 1 pcaddu18i $ra, %call36(_ZNSt6chrono3_V212steady_clock3nowEv) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI10_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI10_0) + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -144027 ld.bu $a1, $s0, 0 + lu52i.d $a0, $a0, 1052 movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 fst.d $fa0, $s0, 8 beqz $a1, .LBB10_3 # %bb.2: @@ -1499,12 +1494,7 @@ _ZN9benchmark5State12ResumeTimingEv: # @_ZN9benchmark5State12ResumeTimingEv .size _ZN9benchmark5State12ResumeTimingEv, .Lfunc_end10-_ZN9benchmark5State12ResumeTimingEv .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark5State15SkipWithMessageERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE -.LCPI11_0: - .dword 0x41cdcd6500000000 # double 1.0E+9 - .text - .hidden _ZN9benchmark5State15SkipWithMessageERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE + .hidden _ZN9benchmark5State15SkipWithMessageERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE # -- Begin function _ZN9benchmark5State15SkipWithMessageERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE .globl _ZN9benchmark5State15SkipWithMessageERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE .p2align 5 .type _ZN9benchmark5State15SkipWithMessageERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE,@function @@ -1568,8 +1558,10 @@ _ZN9benchmark5State15SkipWithMessageERKNSt7__cxx1112basic_stringIcSt11char_trait jirl $ra, $ra, 0 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI11_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI11_0) + ori $a0, $zero, 0 + lu32i.d $a0, -144027 + lu52i.d $a0, $a0, 1052 + movgr2fr.d $fa1, $a0 fld.d $fa2, $s0, 8 fld.d $fa3, $s0, 24 ld.bu $a0, $s0, 0 @@ -1655,12 +1647,8 @@ GCC_except_table11: .Lcst_end5: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark5State13SkipWithErrorERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE -.LCPI12_0: - .dword 0x41cdcd6500000000 # double 1.0E+9 .text - .hidden _ZN9benchmark5State13SkipWithErrorERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE + .hidden _ZN9benchmark5State13SkipWithErrorERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE # -- Begin function _ZN9benchmark5State13SkipWithErrorERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE .globl _ZN9benchmark5State13SkipWithErrorERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE .p2align 5 .type _ZN9benchmark5State13SkipWithErrorERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE,@function @@ -1724,8 +1712,10 @@ _ZN9benchmark5State13SkipWithErrorERKNSt7__cxx1112basic_stringIcSt11char_traitsI jirl $ra, $ra, 0 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI12_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI12_0) + ori $a0, $zero, 0 + lu32i.d $a0, -144027 + lu52i.d $a0, $a0, 1052 + movgr2fr.d $fa1, $a0 fld.d $fa2, $s0, 8 fld.d $fa3, $s0, 24 ld.bu $a0, $s0, 0 @@ -2048,20 +2038,15 @@ _ZN9benchmark5State17FinishKeepRunningEv: # @_ZN9benchmark5State17FinishKeepRunn .size _ZN9benchmark5State17FinishKeepRunningEv, .Lfunc_end16-_ZN9benchmark5State17FinishKeepRunningEv .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark8internal6IsZeroEd -.LCPI17_0: - .dword 0x3cb0000000000000 # double 2.2204460492503131E-16 - .text - .hidden _ZN9benchmark8internal6IsZeroEd + .hidden _ZN9benchmark8internal6IsZeroEd # -- Begin function _ZN9benchmark8internal6IsZeroEd .globl _ZN9benchmark8internal6IsZeroEd .p2align 5 .type _ZN9benchmark8internal6IsZeroEd,@function _ZN9benchmark8internal6IsZeroEd: # @_ZN9benchmark8internal6IsZeroEd # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI17_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI17_0) fabs.d $fa0, $fa0 + lu52i.d $a0, $zero, 971 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 movcf2gr $a0, $fcc0 ret diff --git a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/benchmark_runner.s b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/benchmark_runner.s index 430c0c92..ba0e9c3b 100644 --- a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/benchmark_runner.s +++ b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/benchmark_runner.s @@ -1995,16 +1995,8 @@ GCC_except_table7: .Lttbase2: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZNK9benchmark8internal15BenchmarkRunner21PredictNumItersNeededERKNS1_16IterationResultsE -.LCPI8_0: - .dword 0x3ff6666666666666 # double 1.3999999999999999 -.LCPI8_1: - .dword 0x3e112e0be826d695 # double 1.0000000000000001E-9 -.LCPI8_2: - .dword 0x3fb999999999999a # double 0.10000000000000001 .text - .hidden _ZNK9benchmark8internal15BenchmarkRunner21PredictNumItersNeededERKNS1_16IterationResultsE + .hidden _ZNK9benchmark8internal15BenchmarkRunner21PredictNumItersNeededERKNS1_16IterationResultsE # -- Begin function _ZNK9benchmark8internal15BenchmarkRunner21PredictNumItersNeededERKNS1_16IterationResultsE .globl _ZNK9benchmark8internal15BenchmarkRunner21PredictNumItersNeededERKNS1_16IterationResultsE .p2align 5 .type _ZNK9benchmark8internal15BenchmarkRunner21PredictNumItersNeededERKNS1_16IterationResultsE,@function @@ -2032,20 +2024,29 @@ _ZNK9benchmark8internal15BenchmarkRunner21PredictNumItersNeededERKNS1_16Iteratio maskeqz $a2, $a4, $a2 or $a2, $a2, $a3 fldx.d $fa0, $a0, $a2 - pcalau12i $a0, %pc_hi20(.LCPI8_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI8_0) - fld.d $fa2, $a1, 168 - pcalau12i $a0, %pc_hi20(.LCPI8_1) - fld.d $fa3, $a0, %pc_lo12(.LCPI8_1) + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, 419430 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 fmul.d $fa1, $fa0, $fa1 + fld.d $fa2, $a1, 168 + lu12i.w $a0, -97683 + ori $a0, $a0, 1685 + lu32i.d $a0, 77323 + lu52i.d $a0, $a0, 993 + movgr2fr.d $fa3, $a0 fcmp.clt.d $fcc0, $fa2, $fa3 fsel $fa3, $fa2, $fa3, $fcc0 - pcalau12i $a0, %pc_hi20(.LCPI8_2) - fld.d $fa4, $a0, %pc_lo12(.LCPI8_2) fdiv.d $fa1, $fa1, $fa3 fdiv.d $fa0, $fa2, $fa0 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fa2, $a0 ld.d $a0, $a1, 160 - fcmp.clt.d $fcc0, $fa4, $fa0 + fcmp.clt.d $fcc0, $fa2, $fa0 vldi $vr0, -988 fsel $fs0, $fa0, $fa1, $fcc0 movgr2fr.d $fa0, $a0 diff --git a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/complexity.s b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/complexity.s index 08eda905..898e7819 100644 --- a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/complexity.s +++ b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/complexity.s @@ -105,12 +105,8 @@ _ZN9benchmark13GetBigOStringB5cxx11ENS_4BigOE: # @_ZN9benchmark13GetBigOStringB5 .word .LBB1_3-.LJTI1_0 .word .LBB1_8-.LJTI1_0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEEPFdlE -.LCPI2_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 .text - .hidden _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEEPFdlE + .hidden _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEEPFdlE # -- Begin function _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEEPFdlE .globl _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEEPFdlE .p2align 5 .type _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEEPFdlE,@function @@ -215,12 +211,13 @@ _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEEPFdlE: # @_ZN9benc movgr2fr.d $fs1, $zero .LBB2_8: # %._crit_edge48 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI2_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI2_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa1, $a0 @@ -254,14 +251,7 @@ _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEEPFdlE: # @_ZN9benc .size _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEEPFdlE, .Lfunc_end2-_ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEEPFdlE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE -.LCPI3_0: - .dword 0x7ff8000000000000 # double NaN -.LCPI3_1: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .hidden _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE + .hidden _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE # -- Begin function _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE .globl _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE .p2align 5 .type _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE,@function @@ -421,12 +411,13 @@ _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE: # @_ZN9 bltu $s4, $a1, .LBB3_14 # %bb.15: # %_ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEEPFdlE.exit70.loopexit srli.d $a0, $a1, 32 - pcalau12i $a2, %pc_hi20(.LCPI3_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI3_1) lu52i.d $a2, $zero, 1107 or $a0, $a0, $a2 + movgr2fr.d $fa0, $a0 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 movgr2fr.d $fa1, $a0 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a0, 275200 bstrins.d $a1, $a0, 63, 32 movgr2fr.d $fa1, $a1 @@ -445,12 +436,11 @@ _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE: # @_ZN9 addi.d $a0, $a0, %pc_lo12(.Lconstinit) vld $vr0, $a0, 0 ld.w $a1, $a0, 16 - vst $vr0, $s2, 0 ld.d $a0, $s1, 8 ld.d $a2, $s1, 0 + vst $vr0, $s2, 0 st.w $a1, $s2, 16 - pcalau12i $s4, %pc_hi20(.LCPI3_0) - pcalau12i $s3, %pc_hi20(.LCPI3_1) + lu12i.w $s4, 256 beq $a0, $a2, .LBB3_28 # %bb.18: # %.lr.ph.i.preheader ld.d $a1, $s0, 0 @@ -488,11 +478,12 @@ _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE: # @_ZN9 bnez $a2, .LBB3_21 # %bb.22: # %.loopexit88.loopexit srli.d $a1, $a0, 32 - fld.d $fa1, $s3, %pc_lo12(.LCPI3_1) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa1, $a1 + lu52i.d $a1, $s4, 1107 movgr2fr.d $fa3, $a1 - fsub.d $fa1, $fa3, $fa1 + fsub.d $fa1, $fa1, $fa3 lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa3, $a0 @@ -505,9 +496,11 @@ _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE: # @_ZN9 ld.d $a0, $s1, 0 bne $a1, $a0, .LBB3_10 .LBB3_24: - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI3_0) movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 2047 + movgr2fr.d $fs1, $a0 fmov.d $fs2, $fs0 fmov.d $fa2, $fs0 .LBB3_25: # %_ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEEPFdlE.exit70 @@ -544,8 +537,11 @@ _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE: # @_ZN9 addi.d $sp, $sp, 192 ret .LBB3_28: - fld.d $fs0, $s4, %pc_lo12(.LCPI3_0) movgr2fr.d $fa2, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 2047 + movgr2fr.d $fs0, $a0 fmov.d $fa0, $fa2 fmov.d $fa3, $fa2 .LBB3_29: # %.loopexit88 @@ -564,11 +560,15 @@ _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE: # @_ZN9 pcalau12i $a0, %pc_hi20(.Lswitch.table._ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE) addi.d $s7, $a0, %pc_lo12(.Lswitch.table._ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE) move $s8, $zero - fld.d $fs2, $s4, %pc_lo12(.LCPI3_0) - fld.d $fa0, $s3, %pc_lo12(.LCPI3_1) - fst.d $fa0, $sp, 24 # 8-byte Folded Spill - movgr2fr.d $fs3, $zero + movgr2fr.d $fs4, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 2047 + movgr2fr.d $fs3, $a0 ori $s3, $zero, 20 + lu52i.d $a0, $s4, 1107 + movgr2fr.d $fa0, $a0 + fst.d $fa0, $sp, 24 # 8-byte Folded Spill b .LBB3_32 .p2align 4, , 16 .LBB3_31: # in Loop: Header=BB3_32 Depth=1 @@ -590,18 +590,18 @@ _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE: # @_ZN9 # in Loop: Header=BB3_32 Depth=1 ld.d $a1, $s1, 8 ld.d $a0, $s1, 0 - fmov.d $fs6, $fs2 - fmov.d $fs5, $fs3 - fmov.d $fs7, $fs3 - fmov.d $fs4, $fs3 + fmov.d $fs6, $fs3 + fmov.d $fs5, $fs4 + fmov.d $fs7, $fs4 + fmov.d $fs2, $fs4 beq $a1, $a0, .LBB3_43 # %bb.35: # %.lr.ph.i26.preheader # in Loop: Header=BB3_32 Depth=1 move $s4, $zero move $s6, $zero - fmov.d $fs6, $fs3 - fmov.d $fs5, $fs3 - fmov.d $fs7, $fs3 + fmov.d $fs6, $fs4 + fmov.d $fs5, $fs4 + fmov.d $fs7, $fs4 .p2align 4, , 16 .LBB3_36: # %.lr.ph.i26 # Parent Loop BB3_32 Depth=1 @@ -632,7 +632,7 @@ _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE: # @_ZN9 # in Loop: Header=BB3_32 Depth=1 move $s4, $zero move $s6, $zero - fmov.d $fs7, $fs3 + fmov.d $fs7, $fs4 .p2align 4, , 16 .LBB3_40: # %.lr.ph47.i32 # Parent Loop BB3_32 Depth=1 @@ -667,16 +667,16 @@ _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE: # @_ZN9 lu12i.w $a0, 275200 bstrins.d $a1, $a0, 63, 32 movgr2fr.d $fa1, $a1 - fadd.d $fs4, $fa1, $fa0 + fadd.d $fs2, $fa1, $fa0 .LBB3_43: # %.loopexit # in Loop: Header=BB3_32 Depth=1 - fdiv.d $fa1, $fs7, $fs4 + fdiv.d $fa1, $fs7, $fs2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB3_47 .LBB3_44: # %.loopexit.split # in Loop: Header=BB3_32 Depth=1 - fdiv.d $fa1, $fs5, $fs4 + fdiv.d $fa1, $fs5, $fs2 fdiv.d $fa0, $fa0, $fa1 fcmp.cule.d $fcc0, $fs1, $fa0 bcnez $fcc0, .LBB3_31 @@ -688,9 +688,9 @@ _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE: # @_ZN9 b .LBB3_31 .p2align 4, , 16 .LBB3_46: # in Loop: Header=BB3_32 Depth=1 - fmov.d $fs7, $fs3 - fmov.d $fs4, $fs3 - fdiv.d $fa1, $fs7, $fs4 + fmov.d $fs7, $fs4 + fmov.d $fs2, $fs4 + fdiv.d $fa1, $fs7, $fs2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 bcnez $fcc0, .LBB3_44 @@ -699,7 +699,7 @@ _ZN9benchmark14MinimalLeastSqERKSt6vectorIlSaIlEERKS0_IdSaIdEENS_4BigOE: # @_ZN9 fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fdiv.d $fa1, $fs5, $fs4 + fdiv.d $fa1, $fs5, $fs2 fdiv.d $fa0, $fa0, $fa1 fcmp.cule.d $fcc0, $fs1, $fa0 bcnez $fcc0, .LBB3_31 @@ -829,14 +829,8 @@ GCC_except_table3: .Lcst_end0: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark11ComputeBigOERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE -.LCPI4_0: - .dword 0x7ff8000000000000 # double NaN -.LCPI4_1: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 .text - .hidden _ZN9benchmark11ComputeBigOERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE + .hidden _ZN9benchmark11ComputeBigOERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE # -- Begin function _ZN9benchmark11ComputeBigOERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE .globl _ZN9benchmark11ComputeBigOERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE .p2align 5 .type _ZN9benchmark11ComputeBigOERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE,@function @@ -1171,148 +1165,146 @@ _ZN9benchmark11ComputeBigOERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE: # @_ ori $a0, $zero, 8 bne $a3, $a0, .LBB4_53 .LBB4_36: - ld.d $fp, $sp, 1480 + ld.d $s1, $sp, 1480 ld.d $s2, $s7, 464 - pcalau12i $s0, %pc_hi20(.LCPI4_0) - sub.d $s5, $s8, $fp - pcalau12i $a0, %pc_hi20(.LCPI4_1) - st.d $a0, $sp, 32 # 8-byte Folded Spill - beq $s8, $fp, .LBB4_61 + sub.d $s4, $s8, $s1 + beq $s8, $s1, .LBB4_61 # %bb.37: # %.lr.ph.i.preheader - st.d $s0, $sp, 24 # 8-byte Folded Spill - ld.d $s0, $sp, 1432 - srai.d $s4, $s5, 3 + ld.d $fp, $sp, 1432 + srai.d $s0, $s4, 3 movgr2fr.d $fs0, $zero - move $s1, $fp - move $s7, $s0 - move $s6, $s4 + move $s7, $s1 + move $s6, $fp + move $s5, $s0 fmov.d $fs2, $fs0 fmov.d $fs1, $fs0 .p2align 4, , 16 .LBB4_38: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 - ld.d $a0, $s1, 0 + ld.d $a0, $s7, 0 .Ltmp35: # EH_LABEL jirl $ra, $s2, 0 .Ltmp36: # EH_LABEL # %bb.39: # %.noexc71 # in Loop: Header=BB4_38 Depth=1 - fld.d $fa1, $s7, 0 + fld.d $fa1, $s6, 0 fmadd.d $fs0, $fa0, $fa0, $fs0 fadd.d $fs2, $fs2, $fa1 fmadd.d $fs1, $fa1, $fa0, $fs1 - addi.d $s6, $s6, -1 + addi.d $s5, $s5, -1 + addi.d $s6, $s6, 8 addi.d $s7, $s7, 8 - addi.d $s1, $s1, 8 - bnez $s6, .LBB4_38 + bnez $s5, .LBB4_38 # %bb.40: # %._crit_edge.i fdiv.d $fs1, $fs1, $fs0 movgr2fr.d $fs0, $zero - move $s1, $fp - move $s6, $s4 + move $s5, $s1 + move $s6, $s0 .p2align 4, , 16 .LBB4_41: # %.lr.ph47.i # =>This Inner Loop Header: Depth=1 - ld.d $a0, $s1, 0 + ld.d $a0, $s5, 0 .Ltmp38: # EH_LABEL jirl $ra, $s2, 0 .Ltmp39: # EH_LABEL # %bb.42: # %.noexc72 # in Loop: Header=BB4_41 Depth=1 - fld.d $fa1, $s0, 0 + fld.d $fa1, $fp, 0 fmul.d $fa0, $fs1, $fa0 fsub.d $fa0, $fa1, $fa0 fmul.d $fa0, $fa0, $fa0 fadd.d $fs0, $fs0, $fa0 addi.d $s6, $s6, -1 - addi.d $s0, $s0, 8 - addi.d $s1, $s1, 8 + addi.d $fp, $fp, 8 + addi.d $s5, $s5, 8 bnez $s6, .LBB4_41 # %bb.43: # %.loopexit172.loopexit ld.d $s6, $sp, 8 # 8-byte Folded Reload ld.d $a0, $s6, 0 ld.d $s2, $a0, 464 - srli.d $a0, $s4, 32 - ld.d $a1, $sp, 32 # 8-byte Folded Reload - fld.d $fa0, $a1, %pc_lo12(.LCPI4_1) + srli.d $a0, $s0, 32 lu52i.d $a1, $zero, 1107 or $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 movgr2fr.d $fa1, $a0 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a0, 275200 - bstrins.d $s4, $a0, 63, 32 - movgr2fr.d $fa1, $s4 + bstrins.d $s0, $a0, 63, 32 + movgr2fr.d $fa1, $s0 fadd.d $fs4, $fa1, $fa0 - ld.d $s0, $sp, 24 # 8-byte Folded Reload + ld.d $s5, $sp, 16 # 8-byte Folded Reload fdiv.d $fa0, $fs0, $fs4 fsqrt.d $fs0, $fa0 fcmp.cor.d $fcc0, $fs0, $fs0 bceqz $fcc0, .LBB4_62 .LBB4_44: # %.loopexit172.split - beq $s8, $fp, .LBB4_63 + beq $s8, $s1, .LBB4_63 .LBB4_45: # %.lr.ph.i74.preheader move $s7, $s6 - ld.d $s6, $sp, 1456 - srai.d $s4, $s5, 3 + ld.d $s5, $sp, 1456 + srai.d $s4, $s4, 3 movgr2fr.d $fs3, $zero - move $s1, $fp - move $s0, $s6 - move $s5, $s4 + move $s6, $s1 + move $fp, $s5 + move $s0, $s4 fmov.d $fs5, $fs3 fmov.d $fs6, $fs3 .p2align 4, , 16 .LBB4_46: # %.lr.ph.i74 # =>This Inner Loop Header: Depth=1 - ld.d $a0, $s1, 0 + ld.d $a0, $s6, 0 .Ltmp41: # EH_LABEL jirl $ra, $s2, 0 .Ltmp42: # EH_LABEL # %bb.47: # %.noexc92 # in Loop: Header=BB4_46 Depth=1 - fld.d $fa1, $s0, 0 + fld.d $fa1, $fp, 0 fmadd.d $fs3, $fa0, $fa0, $fs3 fadd.d $fs5, $fs5, $fa1 fmadd.d $fs6, $fa1, $fa0, $fs6 - addi.d $s5, $s5, -1 - addi.d $s0, $s0, 8 - addi.d $s1, $s1, 8 - bnez $s5, .LBB4_46 + addi.d $s0, $s0, -1 + addi.d $fp, $fp, 8 + addi.d $s6, $s6, 8 + bnez $s0, .LBB4_46 # %bb.48: # %._crit_edge.i79 fdiv.d $fs3, $fs6, $fs3 movgr2fr.d $fs6, $zero - move $s0, $s4 - ld.d $s5, $sp, 16 # 8-byte Folded Reload + move $fp, $s4 + move $s6, $s7 .p2align 4, , 16 .LBB4_49: # %.lr.ph47.i80 # =>This Inner Loop Header: Depth=1 - ld.d $a0, $fp, 0 + ld.d $a0, $s1, 0 .Ltmp44: # EH_LABEL jirl $ra, $s2, 0 .Ltmp45: # EH_LABEL # %bb.50: # %.noexc93 # in Loop: Header=BB4_49 Depth=1 - fld.d $fa1, $s6, 0 + fld.d $fa1, $s5, 0 fmul.d $fa0, $fs3, $fa0 fsub.d $fa0, $fa1, $fa0 fmul.d $fa0, $fa0, $fa0 fadd.d $fs6, $fs6, $fa0 - addi.d $s0, $s0, -1 - addi.d $s6, $s6, 8 - addi.d $fp, $fp, 8 - bnez $s0, .LBB4_49 + addi.d $fp, $fp, -1 + addi.d $s5, $s5, 8 + addi.d $s1, $s1, 8 + bnez $fp, .LBB4_49 # %bb.51: # %.loopexit166.loopexit srli.d $a0, $s4, 32 - ld.d $a1, $sp, 32 # 8-byte Folded Reload - fld.d $fa0, $a1, %pc_lo12(.LCPI4_1) lu52i.d $a1, $zero, 1107 or $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 movgr2fr.d $fa1, $a0 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a0, 275200 bstrins.d $s4, $a0, 63, 32 movgr2fr.d $fa1, $s4 fadd.d $fa2, $fa1, $fa0 - move $s6, $s7 + ld.d $s5, $sp, 16 # 8-byte Folded Reload b .LBB4_64 .LBB4_52: st.d $zero, $sp, 48 # 8-byte Folded Spill @@ -1368,8 +1360,11 @@ _ZN9benchmark11ComputeBigOERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE: # @_ fld.d $fs0, $sp, 624 b .LBB4_66 .LBB4_61: - fld.d $fs1, $s0, %pc_lo12(.LCPI4_0) movgr2fr.d $fs2, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 2047 + movgr2fr.d $fs1, $a0 fmov.d $fs0, $fs2 fmov.d $fs4, $fs2 fdiv.d $fa0, $fs0, $fs4 @@ -1380,13 +1375,15 @@ _ZN9benchmark11ComputeBigOERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE: # @_ pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 fmov.d $fs0, $fa0 - bne $s8, $fp, .LBB4_45 + bne $s8, $s1, .LBB4_45 .LBB4_63: - fld.d $fs3, $s0, %pc_lo12(.LCPI4_0) movgr2fr.d $fs5, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 2047 + movgr2fr.d $fs3, $a0 fmov.d $fs6, $fs5 fmov.d $fa2, $fs5 - ld.d $s5, $sp, 16 # 8-byte Folded Reload .LBB4_64: # %.loopexit166 fdiv.d $fs2, $fs2, $fs4 fdiv.d $fa1, $fs6, $fa2 diff --git a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/console_reporter.s b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/console_reporter.s index e58809ae..3819bedc 100644 --- a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/console_reporter.s +++ b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/console_reporter.s @@ -510,14 +510,7 @@ _ZN9benchmark15ConsoleReporter10ReportRunsERKSt6vectorINS_17BenchmarkReporter3Ru .size _ZN9benchmark15ConsoleReporter10ReportRunsERKSt6vectorINS_17BenchmarkReporter3RunESaIS3_EE, .Lfunc_end2-_ZN9benchmark15ConsoleReporter10ReportRunsERKSt6vectorINS_17BenchmarkReporter3RunESaIS3_EE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark15ConsoleReporter12PrintRunDataERKNS_17BenchmarkReporter3RunE -.LCPI3_0: - .dword 0x4059000000000000 # double 100 -.LCPI3_1: - .dword 0x4202a05f1ff80000 # double 9999999999 - .text - .hidden _ZN9benchmark15ConsoleReporter12PrintRunDataERKNS_17BenchmarkReporter3RunE + .hidden _ZN9benchmark15ConsoleReporter12PrintRunDataERKNS_17BenchmarkReporter3RunE # -- Begin function _ZN9benchmark15ConsoleReporter12PrintRunDataERKNS_17BenchmarkReporter3RunE .globl _ZN9benchmark15ConsoleReporter12PrintRunDataERKNS_17BenchmarkReporter3RunE .p2align 5 .type _ZN9benchmark15ConsoleReporter12PrintRunDataERKNS_17BenchmarkReporter3RunE,@function @@ -652,8 +645,10 @@ _ZN9benchmark15ConsoleReporter12PrintRunDataERKNS_17BenchmarkReporter3RunE: # @_ addi.d $a1, $a0, %pc_lo12(.L.str.24) b .LBB3_14 .LBB3_11: - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI3_0) + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 fcmp.clt.d $fcc0, $fs0, $fa0 bceqz $fcc0, .LBB3_13 # %bb.12: @@ -661,8 +656,10 @@ _ZN9benchmark15ConsoleReporter12PrintRunDataERKNS_17BenchmarkReporter3RunE: # @_ addi.d $a1, $a0, %pc_lo12(.L.str.25) b .LBB3_14 .LBB3_13: - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI3_1) + lu12i.w $a0, 130944 + lu32i.d $a0, 172127 + lu52i.d $a0, $a0, 1056 + movgr2fr.d $fa0, $a0 fcmp.clt.d $fcc0, $fa0, $fs0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $a0, $a0, %pc_lo12(.L.str.27) @@ -690,8 +687,10 @@ _ZN9benchmark15ConsoleReporter12PrintRunDataERKNS_17BenchmarkReporter3RunE: # @_ addi.d $s3, $a0, %pc_lo12(.L.str.24) b .LBB3_20 .LBB3_17: - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI3_0) + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 fcmp.clt.d $fcc0, $fs1, $fa0 bceqz $fcc0, .LBB3_19 # %bb.18: @@ -699,8 +698,10 @@ _ZN9benchmark15ConsoleReporter12PrintRunDataERKNS_17BenchmarkReporter3RunE: # @_ addi.d $s3, $a0, %pc_lo12(.L.str.25) b .LBB3_20 .LBB3_19: - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI3_1) + lu12i.w $a0, 130944 + lu32i.d $a0, 172127 + lu52i.d $a0, $a0, 1056 + movgr2fr.d $fa0, $a0 fcmp.clt.d $fcc0, $fa0, $fs1 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $a0, $a0, %pc_lo12(.L.str.27) @@ -771,8 +772,10 @@ _ZN9benchmark15ConsoleReporter12PrintRunDataERKNS_17BenchmarkReporter3RunE: # @_ pcalau12i $a0, %pc_hi20(.L.str.11) addi.d $a2, $a0, %pc_lo12(.L.str.11) .LBB3_31: # %.invoke - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI3_0) + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 fmul.d $fa1, $fs0, $fa0 fmul.d $fa0, $fs1, $fa0 movfr2gr.d $a3, $fa1 @@ -824,9 +827,11 @@ _ZN9benchmark15ConsoleReporter12PrintRunDataERKNS_17BenchmarkReporter3RunE: # @_ beq $s2, $s3, .LBB3_77 # %bb.37: # %.lr.ph addi.d $s5, $sp, 56 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI3_0) ori $s4, $zero, 1 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs0, $a0 pcalau12i $a0, %pc_hi20(.L.str.17) addi.d $a0, $a0, %pc_lo12(.L.str.17) st.d $a0, $sp, 32 # 8-byte Folded Spill diff --git a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/json_reporter.s b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/json_reporter.s index 14be0723..ee1cef17 100644 --- a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/json_reporter.s +++ b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/json_reporter.s @@ -8,10 +8,6 @@ .LCPI0_0: .dword 8 # 0x8 .dword 8319679458741941614 # 0x737570635f6d756e - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0x412e848000000000 # double 1.0E+6 .text .hidden _ZN9benchmark12JSONReporter13ReportContextERKNS_17BenchmarkReporter7ContextE .globl _ZN9benchmark12JSONReporter13ReportContextERKNS_17BenchmarkReporter7ContextE @@ -350,8 +346,10 @@ _ZN9benchmark12JSONReporter13ReportContextERKNS_17BenchmarkReporter7ContextE: # st.d $a0, $sp, 128 st.b $zero, $sp, 147 fld.d $fa0, $s8, 8 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_1) + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 pcaddu18i $ra, %call36(lround) jirl $ra, $ra, 0 diff --git a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/reporter.s b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/reporter.s index 163347d9..3cb1e6a3 100644 --- a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/reporter.s +++ b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/reporter.s @@ -44,12 +44,8 @@ _ZN9benchmark17BenchmarkReporterD0Ev: # @_ZN9benchmark17BenchmarkReporterD0Ev .Lfunc_end2: .size _ZN9benchmark17BenchmarkReporterD0Ev, .Lfunc_end2-_ZN9benchmark17BenchmarkReporterD0Ev # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark17BenchmarkReporter17PrintBasicContextEPSoRKNS0_7ContextE -.LCPI3_0: - .dword 0x412e848000000000 # double 1.0E+6 .text - .hidden _ZN9benchmark17BenchmarkReporter17PrintBasicContextEPSoRKNS0_7ContextE + .hidden _ZN9benchmark17BenchmarkReporter17PrintBasicContextEPSoRKNS0_7ContextE # -- Begin function _ZN9benchmark17BenchmarkReporter17PrintBasicContextEPSoRKNS0_7ContextE .globl _ZN9benchmark17BenchmarkReporter17PrintBasicContextEPSoRKNS0_7ContextE .p2align 5 .type _ZN9benchmark17BenchmarkReporter17PrintBasicContextEPSoRKNS0_7ContextE,@function @@ -185,8 +181,10 @@ _ZN9benchmark17BenchmarkReporter17PrintBasicContextEPSoRKNS0_7ContextE: # @_ZN9b pcaddu18i $ra, %call36(_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l) jirl $ra, $ra, 0 fld.d $fa0, $s7, 8 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_0) + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 move $a0, $s0 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) diff --git a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/statistics.s b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/statistics.s index e2fca5ed..5970beb4 100644 --- a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/statistics.s +++ b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/statistics.s @@ -3,12 +3,8 @@ .globl _ZSt21ios_base_library_initv # End of file scope inline assembly - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark14StatisticsMeanERKSt6vectorIdSaIdEE -.LCPI0_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 .text - .hidden _ZN9benchmark14StatisticsMeanERKSt6vectorIdSaIdEE + .hidden _ZN9benchmark14StatisticsMeanERKSt6vectorIdSaIdEE # -- Begin function _ZN9benchmark14StatisticsMeanERKSt6vectorIdSaIdEE .globl _ZN9benchmark14StatisticsMeanERKSt6vectorIdSaIdEE .p2align 5 .type _ZN9benchmark14StatisticsMeanERKSt6vectorIdSaIdEE,@function @@ -31,12 +27,13 @@ _ZN9benchmark14StatisticsMeanERKSt6vectorIdSaIdEE: # @_ZN9benchmark14StatisticsM sub.d $a0, $a0, $a1 srai.d $a0, $a0, 3 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI0_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa1, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa2, $a1 - fsub.d $fa1, $fa2, $fa1 + fsub.d $fa1, $fa1, $fa2 lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa2, $a0 @@ -262,12 +259,8 @@ GCC_except_table1: .Lcst_end0: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark16StatisticsStdDevERKSt6vectorIdSaIdEE -.LCPI2_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 .text - .hidden _ZN9benchmark16StatisticsStdDevERKSt6vectorIdSaIdEE + .hidden _ZN9benchmark16StatisticsStdDevERKSt6vectorIdSaIdEE # -- Begin function _ZN9benchmark16StatisticsStdDevERKSt6vectorIdSaIdEE .globl _ZN9benchmark16StatisticsStdDevERKSt6vectorIdSaIdEE .p2align 5 .type _ZN9benchmark16StatisticsStdDevERKSt6vectorIdSaIdEE,@function @@ -297,12 +290,13 @@ _ZN9benchmark16StatisticsStdDevERKSt6vectorIdSaIdEE: # @_ZN9benchmark16Statistic .LBB2_6: # %.lr.ph.i.i.preheader srai.d $a2, $a2, 3 srli.d $a3, $a2, 32 - pcalau12i $a4, %pc_hi20(.LCPI2_0) - fld.d $fa1, $a4, %pc_lo12(.LCPI2_0) lu52i.d $a4, $zero, 1107 or $a3, $a3, $a4 + movgr2fr.d $fa1, $a3 + lu12i.w $a3, 256 + lu52i.d $a3, $a3, 1107 movgr2fr.d $fa2, $a3 - fsub.d $fa1, $fa2, $fa1 + fsub.d $fa1, $fa1, $fa2 lu12i.w $a3, 275200 bstrins.d $a2, $a3, 63, 32 movgr2fr.d $fa2, $a2 @@ -339,12 +333,7 @@ _ZN9benchmark16StatisticsStdDevERKSt6vectorIdSaIdEE: # @_ZN9benchmark16Statistic .Lfunc_end2: .size _ZN9benchmark16StatisticsStdDevERKSt6vectorIdSaIdEE, .Lfunc_end2-_ZN9benchmark16StatisticsStdDevERKSt6vectorIdSaIdEE # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark12StatisticsCVERKSt6vectorIdSaIdEE -.LCPI3_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .hidden _ZN9benchmark12StatisticsCVERKSt6vectorIdSaIdEE + .hidden _ZN9benchmark12StatisticsCVERKSt6vectorIdSaIdEE # -- Begin function _ZN9benchmark12StatisticsCVERKSt6vectorIdSaIdEE .globl _ZN9benchmark12StatisticsCVERKSt6vectorIdSaIdEE .p2align 5 .type _ZN9benchmark12StatisticsCVERKSt6vectorIdSaIdEE,@function @@ -377,12 +366,13 @@ _ZN9benchmark12StatisticsCVERKSt6vectorIdSaIdEE: # @_ZN9benchmark12StatisticsCVE bne $a2, $fp, .LBB3_3 # %bb.4: srli.d $a2, $a1, 32 - pcalau12i $a3, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI3_0) lu52i.d $a3, $zero, 1107 or $a2, $a2, $a3 + movgr2fr.d $fa0, $a2 + lu12i.w $a2, 256 + lu52i.d $a2, $a2, 1107 movgr2fr.d $fa2, $a2 - fsub.d $fa0, $fa2, $fa0 + fsub.d $fa0, $fa0, $fa2 lu12i.w $a2, 275200 bstrins.d $a1, $a2, 63, 32 movgr2fr.d $fa2, $a1 @@ -449,12 +439,7 @@ _ZN9benchmark12StatisticsCVERKSt6vectorIdSaIdEE: # @_ZN9benchmark12StatisticsCVE .Lfunc_end3: .size _ZN9benchmark12StatisticsCVERKSt6vectorIdSaIdEE, .Lfunc_end3-_ZN9benchmark12StatisticsCVERKSt6vectorIdSaIdEE # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark12ComputeStatsERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE -.LCPI4_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .hidden _ZN9benchmark12ComputeStatsERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE + .hidden _ZN9benchmark12ComputeStatsERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE # -- Begin function _ZN9benchmark12ComputeStatsERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE .globl _ZN9benchmark12ComputeStatsERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE .p2align 5 .type _ZN9benchmark12ComputeStatsERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE,@function @@ -1589,12 +1574,13 @@ _ZN9benchmark12ComputeStatsERKSt6vectorINS_17BenchmarkReporter3RunESaIS2_EE: # @ beq $a1, $a0, .LBB4_201 # %bb.142: # %.lr.ph356 srli.d $a0, $s1, 32 - pcalau12i $a1, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI4_0) lu52i.d $a1, $zero, 1107 or $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 movgr2fr.d $fa1, $a0 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a0, 275200 bstrins.d $s1, $a0, 63, 32 movgr2fr.d $fa1, $s1 diff --git a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/string_util.s b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/string_util.s index 0a77a427..7c4ce517 100644 --- a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/string_util.s +++ b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/string_util.s @@ -8,10 +8,6 @@ .LCPI0_0: .dword 0x408f400000000000 # double 1000 .dword 0x4090000000000000 # double 1024 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0x3f847ae147ae147b # double 0.01 .text .hidden _ZN9benchmark19HumanReadableNumberB5cxx11EdNS_7Counter4OneKE .globl _ZN9benchmark19HumanReadableNumberB5cxx11EdNS_7Counter4OneKE @@ -132,8 +128,11 @@ _ZN9benchmark19HumanReadableNumberB5cxx11EdNS_7Counter4OneKE: # @_ZN9benchmark19 fcmp.cule.d $fcc0, $fa0, $fs0 bcnez $fcc0, .LBB0_24 # %bb.15: - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_1) + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa0, $a0 fcmp.cule.d $fcc0, $fa0, $fs0 bcnez $fcc0, .LBB0_24 # %bb.16: # %.preheader129.preheader.i.i diff --git a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/sysinfo.s b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/sysinfo.s index aabcde39..df044365 100644 --- a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/sysinfo.s +++ b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/sysinfo.s @@ -110,17 +110,9 @@ GCC_except_table0: .Lcst_end0: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark7CPUInfoC2Ev -.LCPI1_0: - .dword 0x41cdcd6500000000 # double 1.0E+9 -.LCPI1_2: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI1_3: - .dword 0x408f400000000000 # double 1000 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI1_1: + .p2align 4, 0x0 # -- Begin function _ZN9benchmark7CPUInfoC2Ev +.LCPI1_0: .dword 8 # 0x8 .dword 8318264430494707554 # 0x7370696d6f676f62 .text @@ -827,11 +819,13 @@ _ZN9benchmark7CPUInfoC2Ev: # @_ZN9benchmark7CPUInfoC2Ev beqz $fp, .LBB1_99 # %bb.98: ld.d $a0, $sp, 920 - pcalau12i $a1, %pc_hi20(.LCPI1_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI1_3) + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + fmul.d $fa0, $fa0, $fa1 b .LBB1_180 .LBB1_99: pcalau12i $a0, %pc_hi20(.L.str.1) @@ -855,11 +849,13 @@ _ZN9benchmark7CPUInfoC2Ev: # @_ZN9benchmark7CPUInfoC2Ev pcalau12i $a0, %pc_hi20(.L.str.20) addi.d $s8, $a0, %pc_lo12(.L.str.20) ori $s7, $zero, 6 - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fs1, $a0, %pc_lo12(.LCPI1_2) + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fs1, $a0 movgr2fr.d $fs0, $zero - pcalau12i $a0, %pc_hi20(.LCPI1_1) - vld $vr0, $a0, %pc_lo12(.LCPI1_1) + pcalau12i $a0, %pc_hi20(.LCPI1_0) + vld $vr0, $a0, %pc_lo12(.LCPI1_0) vst $vr0, $sp, 80 # 16-byte Folded Spill # implicit-def: $f0_64 vst $vr0, $sp, 96 # 16-byte Folded Spill @@ -2316,10 +2312,12 @@ _ZN9benchmark7CPUInfoC2Ev: # @_ZN9benchmark7CPUInfoC2Ev .LBB1_322: pcaddu18i $ra, %call36(_ZNSt6chrono3_V212steady_clock3nowEv) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.d $fs1, $a1, %pc_lo12(.LCPI1_0) movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -144027 + lu52i.d $a0, $a0, 1052 + movgr2fr.d $fs1, $a0 fdiv.d $fs0, $fa0, $fs1 addi.d $a0, $sp, 144 move $a1, $zero @@ -2417,7 +2415,11 @@ _ZN9benchmark7CPUInfoC2Ev: # @_ZN9benchmark7CPUInfoC2Ev mul.d $a0, $s5, $s2 movgr2fr.d $fa0, $s1 ffint.d.l $fa0, $fa0 - fdiv.d $fa0, $fa0, $fs1 + ori $a1, $zero, 0 + lu32i.d $a1, -144027 + lu52i.d $a1, $a1, 1052 + movgr2fr.d $fa1, $a1 + fdiv.d $fa0, $fa0, $fa1 sub.d $a1, $s4, $fp add.d $a0, $a1, $a0 movgr2fr.d $fa1, $a0 diff --git a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/timers.s b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/timers.s index 65787514..46732671 100644 --- a/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/timers.s +++ b/results/MicroBenchmarks/libs/benchmark/src/CMakeFiles/benchmark.dir/timers.s @@ -3,12 +3,8 @@ .globl _ZSt21ios_base_library_initv # End of file scope inline assembly - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark15ProcessCPUUsageEv -.LCPI0_0: - .dword 0x3e112e0be826d695 # double 1.0000000000000001E-9 .text - .hidden _ZN9benchmark15ProcessCPUUsageEv + .hidden _ZN9benchmark15ProcessCPUUsageEv # -- Begin function _ZN9benchmark15ProcessCPUUsageEv .globl _ZN9benchmark15ProcessCPUUsageEv .p2align 5 .type _ZN9benchmark15ProcessCPUUsageEv,@function @@ -28,12 +24,15 @@ _ZN9benchmark15ProcessCPUUsageEv: # @_ZN9benchmark15ProcessCPUUsageEv ld.d $a0, $sp, 8 ld.d $a1, $sp, 16 movgr2fr.d $fa0, $a0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) ffint.d.l $fa0, $fa0 - movgr2fr.d $fa2, $a1 - ffint.d.l $fa2, $fa2 - fmadd.d $fa0, $fa2, $fa1, $fa0 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + lu12i.w $a0, -97683 + ori $a0, $a0, 1685 + lu32i.d $a0, 77323 + lu52i.d $a0, $a0, 993 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa2, $fa0 ld.d $ra, $sp, 24 # 8-byte Folded Reload addi.d $sp, $sp, 32 ret @@ -77,12 +76,8 @@ _ZN9benchmark12_GLOBAL__N_115DiagnoseAndExitEPKc: # @_ZN9benchmark12_GLOBAL__N_1 .size _ZN9benchmark12_GLOBAL__N_115DiagnoseAndExitEPKc, .Lfunc_end1-_ZN9benchmark12_GLOBAL__N_115DiagnoseAndExitEPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN9benchmark14ThreadCPUUsageEv -.LCPI2_0: - .dword 0x3e112e0be826d695 # double 1.0000000000000001E-9 .text - .hidden _ZN9benchmark14ThreadCPUUsageEv + .hidden _ZN9benchmark14ThreadCPUUsageEv # -- Begin function _ZN9benchmark14ThreadCPUUsageEv .globl _ZN9benchmark14ThreadCPUUsageEv .p2align 5 .type _ZN9benchmark14ThreadCPUUsageEv,@function @@ -102,12 +97,15 @@ _ZN9benchmark14ThreadCPUUsageEv: # @_ZN9benchmark14ThreadCPUUsageEv ld.d $a0, $sp, 8 ld.d $a1, $sp, 16 movgr2fr.d $fa0, $a0 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_0) ffint.d.l $fa0, $fa0 - movgr2fr.d $fa2, $a1 - ffint.d.l $fa2, $fa2 - fmadd.d $fa0, $fa2, $fa1, $fa0 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + lu12i.w $a0, -97683 + ori $a0, $a0, 1685 + lu32i.d $a0, 77323 + lu52i.d $a0, $a0, 993 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa2, $fa0 ld.d $ra, $sp, 24 # 8-byte Folded Reload addi.d $sp, $sp, 32 ret diff --git a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/benchmark_min_time_flag_time_test.dir/benchmark_min_time_flag_time_test.s b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/benchmark_min_time_flag_time_test.dir/benchmark_min_time_flag_time_test.s index 33c7934b..b4383c9c 100644 --- a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/benchmark_min_time_flag_time_test.dir/benchmark_min_time_flag_time_test.s +++ b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/benchmark_min_time_flag_time_test.dir/benchmark_min_time_flag_time_test.s @@ -125,12 +125,7 @@ main: # @main .size main, .Lfunc_end1-main .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN12_GLOBAL__N_112DoTestHelperEPiPPKcd -.LCPI2_0: - .dword 0x3cb0000000000000 # double 2.2204460492503131E-16 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZN12_GLOBAL__N_112DoTestHelperEPiPPKcd .type _ZN12_GLOBAL__N_112DoTestHelperEPiPPKcd,@function _ZN12_GLOBAL__N_112DoTestHelperEPiPPKcd: # @_ZN12_GLOBAL__N_112DoTestHelperEPiPPKcd .Lfunc_begin0: @@ -206,11 +201,11 @@ _ZN12_GLOBAL__N_112DoTestHelperEPiPPKcd: # @_ZN12_GLOBAL__N_112DoTestHelperEPiPP beq $a0, $a1, .LBB2_8 # %bb.5: fld.d $fa0, $a0, 0 - pcalau12i $a1, %pc_hi20(.LCPI2_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_0) - vldi $vr2, -880 - fadd.d $fa0, $fa0, $fa2 + vldi $vr1, -880 + fadd.d $fa0, $fa0, $fa1 fabs.d $fa0, $fa0 + lu52i.d $a1, $zero, 971 + movgr2fr.d $fa1, $a1 fcmp.clt.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB2_8 # %bb.6: # %_ZNSt6vectorIdSaIdEED2Ev.exit.i diff --git a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/benchmark_test.dir/benchmark_test.s b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/benchmark_test.dir/benchmark_test.s index 54d3df4e..d37f5592 100644 --- a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/benchmark_test.dir/benchmark_test.s +++ b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/benchmark_test.dir/benchmark_test.s @@ -2781,14 +2781,8 @@ GCC_except_table9: .Lcst_end7: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL15BM_ManualTimingRN9benchmark5StateE -.LCPI10_0: - .dword 0x408f400000000000 # double 1000 -.LCPI10_1: - .dword 0x41cdcd6500000000 # double 1.0E+9 .text - .p2align 5 + .p2align 5 # -- Begin function _ZL15BM_ManualTimingRN9benchmark5StateE .type _ZL15BM_ManualTimingRN9benchmark5StateE,@function _ZL15BM_ManualTimingRN9benchmark5StateE: # @_ZL15BM_ManualTimingRN9benchmark5StateE .Lfunc_begin8: @@ -2825,41 +2819,45 @@ _ZL15BM_ManualTimingRN9benchmark5StateE: # @_ZL15BM_ManualTimingRN9benchmark5Sta ld.d $a0, $a0, 32 beq $a1, $a0, .LBB10_18 # %bb.1: # %_ZNK9benchmark5State5rangeEm.exit - ld.d $s2, $a0, 0 + ld.d $s1, $a0, 0 ld.w $s0, $fp, 28 - ld.d $s3, $fp, 16 + ld.d $s2, $fp, 16 move $a0, $fp pcaddu18i $ra, %call36(_ZN9benchmark5State16StartKeepRunningEv) jirl $ra, $ra, 0 - move $s1, $zero + move $s3, $zero bnez $s0, .LBB10_13 # %bb.2: # %_ZNK9benchmark5State5rangeEm.exit - beqz $s3, .LBB10_13 + beqz $s2, .LBB10_13 # %bb.3: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI10_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI10_0) - movgr2fr.d $fa1, $s2 - ffint.d.l $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.d $fa0, $s1 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 ftintrz.l.d $fa0, $fa0 - movfr2gr.d $a0, $fa0 - blez $a0, .LBB10_10 + movfr2gr.d $a1, $fa0 + ori $a0, $zero, 0 + move $s3, $zero + blez $a1, .LBB10_10 # %bb.4: # %.lr.ph.split.preheader - move $s1, $zero - srli.d $a1, $a0, 9 - lu12i.w $a2, -390731 - ori $a2, $a2, 2643 - lu32i.d $a2, 309295 - lu52i.d $a2, $a2, 4 - mulh.du $a1, $a1, $a2 - srli.d $s4, $a1, 11 - lu12i.w $a1, -244141 - ori $a1, $a1, 1536 - mul.d $a1, $s4, $a1 - pcalau12i $a2, %pc_hi20(.LCPI10_1) - fld.d $fs0, $a2, %pc_lo12(.LCPI10_1) - add.d $s5, $a1, $a0 + srli.d $a2, $a1, 9 + lu12i.w $a3, -390731 + ori $a3, $a3, 2643 + lu32i.d $a3, 309295 + lu52i.d $a3, $a3, 4 + mulh.du $a2, $a2, $a3 + srli.d $s4, $a2, 11 + lu12i.w $a2, -244141 + ori $a2, $a2, 1536 + mul.d $a2, $s4, $a2 + add.d $s5, $a2, $a1 addi.w $s6, $zero, -1 + lu32i.d $a0, -144027 + lu52i.d $a0, $a0, 1052 + movgr2fr.d $fs0, $a0 ori $s7, $zero, 4 .p2align 4, , 16 .LBB10_5: # %.lr.ph.split @@ -2894,17 +2892,17 @@ _ZL15BM_ManualTimingRN9benchmark5StateE: # @_ZL15BM_ManualTimingRN9benchmark5Sta move $a0, $fp pcaddu18i $ra, %call36(_ZN9benchmark5State16SetIterationTimeEd) jirl $ra, $ra, 0 - blez $s3, .LBB10_17 + blez $s2, .LBB10_17 # %bb.9: # %_ZN9benchmark5State13StateIteratorppEv.exit # in Loop: Header=BB10_5 Depth=1 - addi.d $s3, $s3, -1 - add.d $s1, $s1, $s2 - bnez $s3, .LBB10_5 + addi.d $s2, $s2, -1 + add.d $s3, $s3, $s1 + bnez $s2, .LBB10_5 b .LBB10_13 .LBB10_10: # %_ZNSt11this_thread9sleep_forIlSt5ratioILl1ELl1000000000EEEEvRKNSt6chrono8durationIT_T0_EE.exit.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI10_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI10_1) - move $s1, $zero + lu32i.d $a0, -144027 + lu52i.d $a0, $a0, 1052 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB10_11: # %_ZNSt11this_thread9sleep_forIlSt5ratioILl1ELl1000000000EEEEvRKNSt6chrono8durationIT_T0_EE.exit.us # =>This Inner Loop Header: Depth=1 @@ -2920,12 +2918,12 @@ _ZL15BM_ManualTimingRN9benchmark5StateE: # @_ZL15BM_ManualTimingRN9benchmark5Sta move $a0, $fp pcaddu18i $ra, %call36(_ZN9benchmark5State16SetIterationTimeEd) jirl $ra, $ra, 0 - blez $s3, .LBB10_17 + blez $s2, .LBB10_17 # %bb.12: # %_ZN9benchmark5State13StateIteratorppEv.exit.us # in Loop: Header=BB10_11 Depth=1 - addi.d $s3, $s3, -1 - add.d $s1, $s1, $s2 - bnez $s3, .LBB10_11 + addi.d $s2, $s2, -1 + add.d $s3, $s3, $s1 + bnez $s2, .LBB10_11 .LBB10_13: # %.noexc.i move $a0, $fp pcaddu18i $ra, %call36(_ZN9benchmark5State17FinishKeepRunningEv) @@ -2955,7 +2953,7 @@ _ZL15BM_ManualTimingRN9benchmark5StateE: # @_ZL15BM_ManualTimingRN9benchmark5Sta jirl $ra, $ra, 0 .Ltmp114: # EH_LABEL # %bb.14: # %_ZN9benchmark5State17SetItemsProcessedEl.exit - movgr2fr.d $fa0, $s1 + movgr2fr.d $fa0, $s3 ffint.d.l $fa0, $fa0 fst.d $fa0, $a0, 0 ld.d $a2, $sp, 16 diff --git a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/complexity_test.dir/complexity_test.s b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/complexity_test.dir/complexity_test.s index 9f7bb513..f1f8bdaa 100644 --- a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/complexity_test.dir/complexity_test.s +++ b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/complexity_test.dir/complexity_test.s @@ -3,12 +3,8 @@ .globl _ZSt21ios_base_library_initv # End of file scope inline assembly - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z16BM_Complexity_O1RN9benchmark5StateE -.LCPI0_0: - .dword 0x3e668c6fa0b2f9a4 # double 4.2000000000000006E-8 .text - .hidden _Z16BM_Complexity_O1RN9benchmark5StateE + .hidden _Z16BM_Complexity_O1RN9benchmark5StateE # -- Begin function _Z16BM_Complexity_O1RN9benchmark5StateE .globl _Z16BM_Complexity_O1RN9benchmark5StateE .p2align 5 .type _Z16BM_Complexity_O1RN9benchmark5StateE,@function @@ -43,11 +39,14 @@ _Z16BM_Complexity_O1RN9benchmark5StateE: # @_Z16BM_Complexity_O1RN9benchmark5Sta beqz $s0, .LBB0_16 # %bb.2: addi.d $s1, $sp, 24 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) movgr2fr.d $fs1, $zero addi.w $s2, $zero, -1 addi.d $s3, $sp, 16 + lu12i.w $a0, -390353 + ori $a0, $a0, 2468 + lu32i.d $a0, 429167 + lu52i.d $a0, $a0, 998 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB0_3: # =>This Loop Header: Depth=1 # Child Loop BB0_9 Depth 2 @@ -3921,14 +3920,8 @@ GCC_except_table1: .Lttbase0: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z17BM_Complexity_O_NRN9benchmark5StateE -.LCPI2_0: - .dword 0x4045000000000000 # double 42 -.LCPI2_1: - .dword 0x3e112e0be826d695 # double 1.0000000000000001E-9 .text - .hidden _Z17BM_Complexity_O_NRN9benchmark5StateE + .hidden _Z17BM_Complexity_O_NRN9benchmark5StateE # -- Begin function _Z17BM_Complexity_O_NRN9benchmark5StateE .globl _Z17BM_Complexity_O_NRN9benchmark5StateE .p2align 5 .type _Z17BM_Complexity_O_NRN9benchmark5StateE,@function @@ -3965,13 +3958,18 @@ _Z17BM_Complexity_O_NRN9benchmark5StateE: # @_Z17BM_Complexity_O_NRN9benchmark5S beqz $s0, .LBB2_17 # %bb.2: addi.d $s1, $sp, 16 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI2_0) - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI2_1) - movgr2fr.d $fs2, $zero + movgr2fr.d $fs0, $zero addi.w $s2, $zero, -1 addi.d $s3, $sp, 8 + ori $a0, $zero, 0 + lu32i.d $a0, 327680 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -97683 + ori $a0, $a0, 1685 + lu32i.d $a0, 77323 + lu52i.d $a0, $a0, 993 + movgr2fr.d $fs2, $a0 .p2align 4, , 16 .LBB2_3: # =>This Loop Header: Depth=1 # Child Loop BB2_9 Depth 2 @@ -3989,7 +3987,7 @@ _Z17BM_Complexity_O_NRN9benchmark5StateE: # @_Z17BM_Complexity_O_NRN9benchmark5S #APP #NO_APP ld.bu $a0, $fp, 24 - fmov.d $fa0, $fs2 + fmov.d $fa0, $fs0 beqz $a0, .LBB2_7 # %bb.6: # in Loop: Header=BB2_3 Depth=1 ld.d $a0, $fp, 16 @@ -4026,7 +4024,7 @@ _Z17BM_Complexity_O_NRN9benchmark5StateE: # @_Z17BM_Complexity_O_NRN9benchmark5S #APP #NO_APP ld.bu $a1, $fp, 24 - fmov.d $fa0, $fs2 + fmov.d $fa0, $fs0 beqz $a1, .LBB2_12 # %bb.11: # in Loop: Header=BB2_9 Depth=2 ld.d $a1, $fp, 16 @@ -4056,8 +4054,8 @@ _Z17BM_Complexity_O_NRN9benchmark5StateE: # @_Z17BM_Complexity_O_NRN9benchmark5S ld.d $a0, $a0, 0 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - fmul.d $fa0, $fa0, $fs0 fmul.d $fa0, $fa0, $fs1 + fmul.d $fa0, $fa0, $fs2 move $a0, $fp pcaddu18i $ra, %call36(_ZN9benchmark5State16SetIterationTimeEd) jirl $ra, $ra, 0 @@ -4114,16 +4112,7 @@ _Z17BM_Complexity_O_NRN9benchmark5StateE: # @_Z17BM_Complexity_O_NRN9benchmark5S .size _Z17BM_Complexity_O_NRN9benchmark5StateE, .Lfunc_end2-_Z17BM_Complexity_O_NRN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL23BM_Complexity_O_N_log_NRN9benchmark5StateE -.LCPI3_0: - .dword 0x3ff71547652b82fe # double 1.4426950408889634 -.LCPI3_1: - .dword 0x4045000000000000 # double 42 -.LCPI3_2: - .dword 0x3e112e0be826d695 # double 1.0000000000000001E-9 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL23BM_Complexity_O_N_log_NRN9benchmark5StateE .type _ZL23BM_Complexity_O_N_log_NRN9benchmark5StateE,@function _ZL23BM_Complexity_O_N_log_NRN9benchmark5StateE: # @_ZL23BM_Complexity_O_N_log_NRN9benchmark5StateE .cfi_startproc @@ -4162,15 +4151,23 @@ _ZL23BM_Complexity_O_N_log_NRN9benchmark5StateE: # @_ZL23BM_Complexity_O_N_log_N beqz $s0, .LBB3_17 # %bb.2: addi.d $s1, $sp, 16 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI3_0) - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI3_1) - pcalau12i $a0, %pc_hi20(.LCPI3_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI3_2) - movgr2fr.d $fs3, $zero + movgr2fr.d $fs0, $zero addi.w $s2, $zero, -1 addi.d $s3, $sp, 8 + lu12i.w $a0, 414392 + ori $a0, $a0, 766 + lu32i.d $a0, 464199 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs1, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, 327680 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -97683 + ori $a0, $a0, 1685 + lu32i.d $a0, 77323 + lu52i.d $a0, $a0, 993 + movgr2fr.d $fs3, $a0 .p2align 4, , 16 .LBB3_3: # =>This Loop Header: Depth=1 # Child Loop BB3_9 Depth 2 @@ -4188,7 +4185,7 @@ _ZL23BM_Complexity_O_N_log_NRN9benchmark5StateE: # @_ZL23BM_Complexity_O_N_log_N #APP #NO_APP ld.bu $a0, $fp, 24 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs0 beqz $a0, .LBB3_7 # %bb.6: # in Loop: Header=BB3_3 Depth=1 ld.d $a0, $fp, 16 @@ -4225,7 +4222,7 @@ _ZL23BM_Complexity_O_N_log_NRN9benchmark5StateE: # @_ZL23BM_Complexity_O_N_log_N #APP #NO_APP ld.bu $a1, $fp, 24 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs0 beqz $a1, .LBB3_12 # %bb.11: # in Loop: Header=BB3_9 Depth=2 ld.d $a1, $fp, 16 @@ -4255,12 +4252,12 @@ _ZL23BM_Complexity_O_N_log_NRN9benchmark5StateE: # @_ZL23BM_Complexity_O_N_log_N ld.d $a0, $a0, 0 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - fmul.d $fs4, $fa0, $fs0 + fmul.d $fs4, $fa0, $fs1 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 fmul.d $fa0, $fa0, $fs4 - fmul.d $fa0, $fa0, $fs1 fmul.d $fa0, $fa0, $fs2 + fmul.d $fa0, $fa0, $fs3 move $a0, $fp pcaddu18i $ra, %call36(_ZN9benchmark5State16SetIterationTimeEd) jirl $ra, $ra, 0 @@ -4319,14 +4316,7 @@ _ZL23BM_Complexity_O_N_log_NRN9benchmark5StateE: # @_ZL23BM_Complexity_O_N_log_N .size _ZL23BM_Complexity_O_N_log_NRN9benchmark5StateE, .Lfunc_end3-_ZL23BM_Complexity_O_N_log_NRN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z24BM_ComplexityCaptureArgsRN9benchmark5StateEi -.LCPI4_0: - .dword 0x4045000000000000 # double 42 -.LCPI4_1: - .dword 0x3e112e0be826d695 # double 1.0000000000000001E-9 - .text - .hidden _Z24BM_ComplexityCaptureArgsRN9benchmark5StateEi + .hidden _Z24BM_ComplexityCaptureArgsRN9benchmark5StateEi # -- Begin function _Z24BM_ComplexityCaptureArgsRN9benchmark5StateEi .globl _Z24BM_ComplexityCaptureArgsRN9benchmark5StateEi .p2align 5 .type _Z24BM_ComplexityCaptureArgsRN9benchmark5StateEi,@function @@ -4366,13 +4356,18 @@ _Z24BM_ComplexityCaptureArgsRN9benchmark5StateEi: # @_Z24BM_ComplexityCaptureArg beqz $s1, .LBB4_17 # %bb.2: addi.d $s2, $sp, 24 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI4_0) - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI4_1) - movgr2fr.d $fs2, $zero + movgr2fr.d $fs0, $zero addi.w $s3, $zero, -1 addi.d $s4, $sp, 16 + ori $a0, $zero, 0 + lu32i.d $a0, 327680 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -97683 + ori $a0, $a0, 1685 + lu32i.d $a0, 77323 + lu52i.d $a0, $a0, 993 + movgr2fr.d $fs2, $a0 .p2align 4, , 16 .LBB4_3: # =>This Loop Header: Depth=1 # Child Loop BB4_9 Depth 2 @@ -4390,7 +4385,7 @@ _Z24BM_ComplexityCaptureArgsRN9benchmark5StateEi: # @_Z24BM_ComplexityCaptureArg #APP #NO_APP ld.bu $a0, $fp, 24 - fmov.d $fa0, $fs2 + fmov.d $fa0, $fs0 beqz $a0, .LBB4_7 # %bb.6: # in Loop: Header=BB4_3 Depth=1 ld.d $a0, $fp, 16 @@ -4427,7 +4422,7 @@ _Z24BM_ComplexityCaptureArgsRN9benchmark5StateEi: # @_Z24BM_ComplexityCaptureArg #APP #NO_APP ld.bu $a1, $fp, 24 - fmov.d $fa0, $fs2 + fmov.d $fa0, $fs0 beqz $a1, .LBB4_12 # %bb.11: # in Loop: Header=BB4_9 Depth=2 ld.d $a1, $fp, 16 @@ -4457,8 +4452,8 @@ _Z24BM_ComplexityCaptureArgsRN9benchmark5StateEi: # @_Z24BM_ComplexityCaptureArg ld.d $a0, $a0, 0 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - fmul.d $fa0, $fa0, $fs0 fmul.d $fa0, $fa0, $fs1 + fmul.d $fa0, $fa0, $fs2 move $a0, $fp pcaddu18i $ra, %call36(_ZN9benchmark5State16SetIterationTimeEd) jirl $ra, $ra, 0 @@ -5403,22 +5398,20 @@ _ZN3$_18__invokeEl: # @"_ZN3$_18__invokeEl" .Lfunc_end15: .size _ZN3$_18__invokeEl, .Lfunc_end15-_ZN3$_18__invokeEl # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN3$_28__invokeEl -.LCPI16_0: - .dword 0x3ff71547652b82fe # double 1.4426950408889634 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZN3$_28__invokeEl .type _ZN3$_28__invokeEl,@function _ZN3$_28__invokeEl: # @"_ZN3$_28__invokeEl" # %bb.0: addi.d $sp, $sp, -16 st.d $ra, $sp, 8 # 8-byte Folded Spill fst.d $fs0, $sp, 0 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI16_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI16_0) movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 + lu12i.w $a0, 414392 + ori $a0, $a0, 766 + lu32i.d $a0, 464199 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 fmul.d $fs0, $fa0, $fa1 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 diff --git a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/internal_threading_test.dir/internal_threading_test.s b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/internal_threading_test.dir/internal_threading_test.s index ecd17643..d163f6ce 100644 --- a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/internal_threading_test.dir/internal_threading_test.s +++ b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/internal_threading_test.dir/internal_threading_test.s @@ -3,16 +3,8 @@ .globl _ZSt21ios_base_library_initv # End of file scope inline assembly - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14MyBusySpinwaitv -.LCPI0_0: - .dword 0x41cdcd6500000000 # double 1.0E+9 -.LCPI0_1: - .dword 0x408f400000000000 # double 1000 -.LCPI0_2: - .dword 0x4049000000000000 # double 50 .text - .hidden _Z14MyBusySpinwaitv + .hidden _Z14MyBusySpinwaitv # -- Begin function _Z14MyBusySpinwaitv .globl _Z14MyBusySpinwaitv .p2align 5 .type _Z14MyBusySpinwaitv,@function @@ -26,15 +18,21 @@ _Z14MyBusySpinwaitv: # @_Z14MyBusySpinwaitv fst.d $fs3, $sp, 8 # 8-byte Folded Spill pcaddu18i $ra, %call36(_ZNSt6chrono3_V212steady_clock3nowEv) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a1, %pc_lo12(.LCPI0_0) - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.d $fs1, $a1, %pc_lo12(.LCPI0_1) - pcalau12i $a1, %pc_hi20(.LCPI0_2) - fld.d $fs2, $a1, %pc_lo12(.LCPI0_2) movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - fdiv.d $fs3, $fa0, $fs0 + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -144027 + lu52i.d $a1, $a1, 1052 + movgr2fr.d $fs0, $a1 + fdiv.d $fs1, $fa0, $fs0 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fs2, $a1 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs3, $a0 .p2align 4, , 16 .LBB0_1: # =>This Inner Loop Header: Depth=1 pcaddu18i $ra, %call36(_ZNSt6chrono3_V212steady_clock3nowEv) @@ -42,9 +40,9 @@ _Z14MyBusySpinwaitv: # @_Z14MyBusySpinwaitv movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 fdiv.d $fa0, $fa0, $fs0 - fsub.d $fa0, $fa0, $fs3 - fmul.d $fa0, $fa0, $fs1 - fcmp.clt.d $fcc0, $fa0, $fs2 + fsub.d $fa0, $fa0, $fs1 + fmul.d $fa0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB0_1 # %bb.2: fld.d $fs3, $sp, 8 # 8-byte Folded Reload @@ -57,18 +55,7 @@ _Z14MyBusySpinwaitv: # @_Z14MyBusySpinwaitv .Lfunc_end0: .size _Z14MyBusySpinwaitv, .Lfunc_end0-_Z14MyBusySpinwaitv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13BM_MainThreadRN9benchmark5StateE -.LCPI1_0: - .dword 0x41cdcd6500000000 # double 1.0E+9 -.LCPI1_1: - .dword 0x408f400000000000 # double 1000 -.LCPI1_2: - .dword 0x4049000000000000 # double 50 -.LCPI1_3: - .dword 0x3fa999999999999a # double 0.050000000000000003 - .text - .hidden _Z13BM_MainThreadRN9benchmark5StateE + .hidden _Z13BM_MainThreadRN9benchmark5StateE # -- Begin function _Z13BM_MainThreadRN9benchmark5StateE .globl _Z13BM_MainThreadRN9benchmark5StateE .p2align 5 .type _Z13BM_MainThreadRN9benchmark5StateE,@function @@ -107,14 +94,23 @@ _Z13BM_MainThreadRN9benchmark5StateE: # @_Z13BM_MainThreadRN9benchmark5StateE # %bb.1: # %_ZN9benchmark5State13StateIteratorC2EPS0_.exit beqz $s0, .LBB1_7 # %bb.2: - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI1_0) - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI1_1) - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI1_2) - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fs0, $a0, %pc_lo12(.LCPI1_3) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -144027 + lu52i.d $a1, $a1, 1052 + movgr2fr.d $fs1, $a1 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fs2, $a1 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB1_3: # %.lr.ph # =>This Loop Header: Depth=1 @@ -394,12 +390,8 @@ _ZNSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEN9benchmark7Counte .size _ZNSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEN9benchmark7CounterESt4lessIS5_ESaISt4pairIKS5_S7_EEEixEOS5_, .Lfunc_end2-_ZNSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEN9benchmark7CounterESt4lessIS5_ESaISt4pairIKS5_S7_EEEixEOS5_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z15BM_WorkerThreadRN9benchmark5StateE -.LCPI3_0: - .dword 0x3fa999999999999a # double 0.050000000000000003 .text - .hidden _Z15BM_WorkerThreadRN9benchmark5StateE + .hidden _Z15BM_WorkerThreadRN9benchmark5StateE # -- Begin function _Z15BM_WorkerThreadRN9benchmark5StateE .globl _Z15BM_WorkerThreadRN9benchmark5StateE .p2align 5 .type _Z15BM_WorkerThreadRN9benchmark5StateE,@function @@ -440,8 +432,11 @@ _Z15BM_WorkerThreadRN9benchmark5StateE: # @_Z15BM_WorkerThreadRN9benchmark5State addi.d $s3, $a0, %pc_lo12(_Z14MyBusySpinwaitv) pcalau12i $a0, %pc_hi20(_ZNSt6thread24_M_thread_deps_never_runEv) addi.d $s0, $a0, %pc_lo12(_ZNSt6thread24_M_thread_deps_never_runEv) - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI3_0) + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB3_3: # %.lr.ph # =>This Inner Loop Header: Depth=1 @@ -624,18 +619,8 @@ GCC_except_table3: .Lcst_end1: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28BM_MainThreadAndWorkerThreadRN9benchmark5StateE -.LCPI4_0: - .dword 0x41cdcd6500000000 # double 1.0E+9 -.LCPI4_1: - .dword 0x408f400000000000 # double 1000 -.LCPI4_2: - .dword 0x4049000000000000 # double 50 -.LCPI4_3: - .dword 0x3fa999999999999a # double 0.050000000000000003 .text - .hidden _Z28BM_MainThreadAndWorkerThreadRN9benchmark5StateE + .hidden _Z28BM_MainThreadAndWorkerThreadRN9benchmark5StateE # -- Begin function _Z28BM_MainThreadAndWorkerThreadRN9benchmark5StateE .globl _Z28BM_MainThreadAndWorkerThreadRN9benchmark5StateE .p2align 5 .type _Z28BM_MainThreadAndWorkerThreadRN9benchmark5StateE,@function @@ -684,14 +669,23 @@ _Z28BM_MainThreadAndWorkerThreadRN9benchmark5StateE: # @_Z28BM_MainThreadAndWork addi.d $s3, $a0, %pc_lo12(_Z14MyBusySpinwaitv) pcalau12i $a0, %pc_hi20(_ZNSt6thread24_M_thread_deps_never_runEv) addi.d $s0, $a0, %pc_lo12(_ZNSt6thread24_M_thread_deps_never_runEv) - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI4_0) - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI4_1) - pcalau12i $a0, %pc_hi20(.LCPI4_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI4_2) - pcalau12i $a0, %pc_hi20(.LCPI4_3) - fld.d $fs0, $a0, %pc_lo12(.LCPI4_3) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -144027 + lu52i.d $a1, $a1, 1052 + movgr2fr.d $fs1, $a1 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fs2, $a1 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB4_3: # %.lr.ph # =>This Loop Header: Depth=1 diff --git a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/options_test.dir/options_test.s b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/options_test.dir/options_test.s index fa49af63..9e477b44 100644 --- a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/options_test.dir/options_test.s +++ b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/options_test.dir/options_test.s @@ -420,22 +420,12 @@ main: # @main .LCPI5_1: .dword 64 # 0x40 .dword 512 # 0x200 -.LCPI5_6: +.LCPI5_2: .dword -64 # 0xffffffffffffffc0 .dword 1 # 0x1 -.LCPI5_7: +.LCPI5_3: .dword -8 # 0xfffffffffffffff8 .dword -1 # 0xffffffffffffffff - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI5_2: - .dword 0x3fe6666666666666 # double 0.69999999999999996 -.LCPI5_3: - .dword 0x3fe999999999999a # double 0.80000000000000004 -.LCPI5_4: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI5_5: - .dword 0x3fc999999999999a # double 0.20000000000000001 .section .text.startup,"ax",@progbits .p2align 5 .type _GLOBAL__sub_I_options_test.cc,@function @@ -1058,8 +1048,11 @@ _GLOBAL__sub_I_options_test.cc: # @_GLOBAL__sub_I_options_test.cc .Ltmp96: # EH_LABEL # %bb.67: .Ltmp97: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI5_2) - fld.d $fa0, $a1, %pc_lo12(.LCPI5_2) + lu12i.w $a1, 419430 + ori $a1, $a1, 1638 + lu32i.d $a1, 419430 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa0, $a1 move $s4, $zero pcaddu18i $ra, %call36(_ZN9benchmark8internal9Benchmark7MinTimeEd) jirl $ra, $ra, 0 @@ -1084,7 +1077,7 @@ _GLOBAL__sub_I_options_test.cc: # @_GLOBAL__sub_I_options_test.cc vld $vr0, $sp, 16 # 16-byte Folded Reload vst $vr0, $sp, 72 st.b $zero, $sp, 88 - ori $s4, $zero, 1 + ori $s5, $zero, 1 .Ltmp100: # EH_LABEL addi.d $a1, $sp, 64 pcaddu18i $ra, %call36(_ZN9benchmark8internal9BenchmarkC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) @@ -1094,16 +1087,19 @@ _GLOBAL__sub_I_options_test.cc: # @_GLOBAL__sub_I_options_test.cc st.d $s2, $s0, 0 st.d $s3, $s0, 224 .Ltmp102: # EH_LABEL - move $s4, $zero + move $s5, $zero move $a0, $s0 pcaddu18i $ra, %call36(_ZN9benchmark8internal25RegisterBenchmarkInternalEPNS0_9BenchmarkE) jirl $ra, $ra, 0 .Ltmp103: # EH_LABEL # %bb.72: .Ltmp104: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI5_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI5_3) - move $s4, $zero + lu12i.w $a1, -419431 + ori $s4, $a1, 2458 + lu32i.d $s4, -419431 + lu52i.d $a1, $s4, 1022 + movgr2fr.d $fa0, $a1 + move $s5, $zero pcaddu18i $ra, %call36(_ZN9benchmark8internal9Benchmark13MinWarmUpTimeEd) jirl $ra, $ra, 0 .Ltmp105: # EH_LABEL @@ -1127,7 +1123,7 @@ _GLOBAL__sub_I_options_test.cc: # @_GLOBAL__sub_I_options_test.cc vld $vr0, $sp, 16 # 16-byte Folded Reload vst $vr0, $sp, 72 st.b $zero, $sp, 88 - ori $s4, $zero, 1 + ori $s5, $zero, 1 .Ltmp107: # EH_LABEL addi.d $a1, $sp, 64 pcaddu18i $ra, %call36(_ZN9benchmark8internal9BenchmarkC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) @@ -1137,24 +1133,24 @@ _GLOBAL__sub_I_options_test.cc: # @_GLOBAL__sub_I_options_test.cc st.d $s2, $s0, 0 st.d $s3, $s0, 224 .Ltmp109: # EH_LABEL - move $s4, $zero + move $s5, $zero move $a0, $s0 pcaddu18i $ra, %call36(_ZN9benchmark8internal25RegisterBenchmarkInternalEPNS0_9BenchmarkE) jirl $ra, $ra, 0 .Ltmp110: # EH_LABEL # %bb.77: .Ltmp111: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI5_4) - fld.d $fa0, $a1, %pc_lo12(.LCPI5_4) - move $s4, $zero + lu52i.d $a1, $s4, 1019 + movgr2fr.d $fa0, $a1 + move $s5, $zero pcaddu18i $ra, %call36(_ZN9benchmark8internal9Benchmark7MinTimeEd) jirl $ra, $ra, 0 .Ltmp112: # EH_LABEL # %bb.78: .Ltmp113: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI5_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI5_5) - move $s4, $zero + lu52i.d $a1, $s4, 1020 + movgr2fr.d $fa0, $a1 + move $s5, $zero pcaddu18i $ra, %call36(_ZN9benchmark8internal9Benchmark13MinWarmUpTimeEd) jirl $ra, $ra, 0 .Ltmp114: # EH_LABEL @@ -1558,10 +1554,10 @@ _GLOBAL__sub_I_options_test.cc: # @_GLOBAL__sub_I_options_test.cc .Ltmp182: # EH_LABEL # %bb.126: st.d $a0, $sp, 40 - pcalau12i $a1, %pc_hi20(.LCPI5_6) - vld $vr0, $a1, %pc_lo12(.LCPI5_6) - pcalau12i $a1, %pc_hi20(.LCPI5_7) - vld $vr1, $a1, %pc_lo12(.LCPI5_7) + pcalau12i $a1, %pc_hi20(.LCPI5_2) + vld $vr0, $a1, %pc_lo12(.LCPI5_2) + pcalau12i $a1, %pc_hi20(.LCPI5_3) + vld $vr1, $a1, %pc_lo12(.LCPI5_3) addi.d $a1, $a0, 32 st.d $a1, $sp, 56 vst $vr0, $a0, 0 @@ -1717,7 +1713,7 @@ _GLOBAL__sub_I_options_test.cc: # @_GLOBAL__sub_I_options_test.cc .LBB5_143: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit8.thread.i .Ltmp196: # EH_LABEL move $fp, $a0 - b .LBB5_181 + b .LBB5_183 .LBB5_144: .Ltmp186: # EH_LABEL b .LBB5_149 @@ -1749,198 +1745,207 @@ _GLOBAL__sub_I_options_test.cc: # @_GLOBAL__sub_I_options_test.cc move $fp, $a0 .LBB5_153: # %.thread.i ld.d $a0, $sp, 64 - beq $a0, $s1, .LBB5_183 + beq $a0, $s1, .LBB5_185 # %bb.154: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit19.thread33.i ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 - b .LBB5_182 + b .LBB5_184 .LBB5_155: .Ltmp180: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 + beq $a2, $s1, .LBB5_182 + b .LBB5_180 .LBB5_156: .Ltmp86: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 + beq $a2, $s1, .LBB5_182 + b .LBB5_180 .LBB5_157: .Ltmp75: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 + beq $a2, $s1, .LBB5_182 + b .LBB5_180 .LBB5_158: .Ltmp4: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - bne $a2, $s1, .LBB5_178 - b .LBB5_180 + bne $a2, $s1, .LBB5_180 + b .LBB5_182 .LBB5_159: .Ltmp203: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 bne $a2, $s1, .LBB5_161 # %bb.160: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.thread.i.i7.i132 - bnez $s3, .LBB5_181 - b .LBB5_183 + bnez $s3, .LBB5_183 + b .LBB5_185 .LBB5_161: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit8.i131 ld.d $a0, $sp, 80 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - bnez $s3, .LBB5_181 - b .LBB5_183 + bnez $s3, .LBB5_183 + b .LBB5_185 .LBB5_162: .Ltmp193: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 + beq $a2, $s1, .LBB5_182 + b .LBB5_180 .LBB5_163: .Ltmp175: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 + beq $a2, $s1, .LBB5_182 + b .LBB5_180 .LBB5_164: .Ltmp159: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 + beq $a2, $s1, .LBB5_182 + b .LBB5_180 .LBB5_165: .Ltmp143: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 + beq $a2, $s1, .LBB5_182 + b .LBB5_180 .LBB5_166: .Ltmp136: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 + beq $a2, $s1, .LBB5_182 + b .LBB5_180 .LBB5_167: .Ltmp129: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 + beq $a2, $s1, .LBB5_182 + b .LBB5_180 .LBB5_168: .Ltmp122: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 + beq $a2, $s1, .LBB5_182 + b .LBB5_180 .LBB5_169: .Ltmp106: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 -.LBB5_170: + beq $a2, $s1, .LBB5_178 +.LBB5_170: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit8.i60 + ld.d $a0, $sp, 80 + addi.d $a1, $a0, 1 + move $a0, $a2 + pcaddu18i $ra, %call36(_ZdlPvm) + jirl $ra, $ra, 0 + bnez $s5, .LBB5_183 + b .LBB5_185 +.LBB5_171: .Ltmp99: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 -.LBB5_171: + beq $a2, $s1, .LBB5_182 + b .LBB5_180 +.LBB5_172: .Ltmp70: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 -.LBB5_172: + beq $a2, $s1, .LBB5_182 + b .LBB5_180 +.LBB5_173: .Ltmp54: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 -.LBB5_173: + beq $a2, $s1, .LBB5_182 + b .LBB5_180 +.LBB5_174: .Ltmp11: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 -.LBB5_174: + beq $a2, $s1, .LBB5_182 + b .LBB5_180 +.LBB5_175: .Ltmp168: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 -.LBB5_175: + beq $a2, $s1, .LBB5_182 + b .LBB5_180 +.LBB5_176: .Ltmp152: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 -.LBB5_176: + beq $a2, $s1, .LBB5_182 + b .LBB5_180 +.LBB5_177: .Ltmp115: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 - b .LBB5_178 -.LBB5_177: + bne $a2, $s1, .LBB5_170 +.LBB5_178: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.thread.i.i7.i61 + bnez $s5, .LBB5_183 + b .LBB5_185 +.LBB5_179: .Ltmp63: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_180 -.LBB5_178: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit8.i + beq $a2, $s1, .LBB5_182 +.LBB5_180: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit8.i ld.d $a0, $sp, 80 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - beqz $s4, .LBB5_183 - b .LBB5_181 -.LBB5_179: + beqz $s4, .LBB5_185 + b .LBB5_183 +.LBB5_181: .Ltmp47: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - bne $a2, $s1, .LBB5_178 -.LBB5_180: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.thread.i.i7.i - beqz $s4, .LBB5_183 -.LBB5_181: + bne $a2, $s1, .LBB5_180 +.LBB5_182: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.thread.i.i7.i + beqz $s4, .LBB5_185 +.LBB5_183: ori $a1, $zero, 232 move $a0, $s0 -.LBB5_182: # %common.resume +.LBB5_184: # %common.resume pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 -.LBB5_183: # %common.resume +.LBB5_185: # %common.resume move $a0, $fp pcaddu18i $ra, %call36(_Unwind_Resume) jirl $ra, $ra, 0 -.LBB5_184: +.LBB5_186: .Ltmp38: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - bne $a2, $s1, .LBB5_186 - b .LBB5_188 -.LBB5_185: + bne $a2, $s1, .LBB5_188 + b .LBB5_190 +.LBB5_187: .Ltmp29: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - beq $a2, $s1, .LBB5_188 -.LBB5_186: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit8.i7 + beq $a2, $s1, .LBB5_190 +.LBB5_188: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit8.i7 ld.d $a0, $sp, 80 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - bnez $s7, .LBB5_181 - b .LBB5_183 -.LBB5_187: + bnez $s7, .LBB5_183 + b .LBB5_185 +.LBB5_189: .Ltmp20: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - bne $a2, $s1, .LBB5_186 -.LBB5_188: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.thread.i.i7.i8 - bnez $s7, .LBB5_181 - b .LBB5_183 + bne $a2, $s1, .LBB5_188 +.LBB5_190: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.thread.i.i7.i8 + bnez $s7, .LBB5_183 + b .LBB5_185 .Lfunc_end5: .size _GLOBAL__sub_I_options_test.cc, .Lfunc_end5-_GLOBAL__sub_I_options_test.cc .cfi_endproc diff --git a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/output_test_helper.dir/output_test_helper.s b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/output_test_helper.dir/output_test_helper.s index cba1c593..30e6332c 100644 --- a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/output_test_helper.dir/output_test_helper.s +++ b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/output_test_helper.dir/output_test_helper.s @@ -4219,16 +4219,8 @@ _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC2IS3_EEPKcRKS3_: # @_ZNSt7 .size _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC2IS3_EEPKcRKS3_, .Lfunc_end20-_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC2IS3_EEPKcRKS3_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZNK7Results7GetTimeENS_13BenchmarkTimeE -.LCPI21_0: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI21_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI21_2: - .dword 0x3e112e0be826d695 # double 1.0000000000000001E-9 .text - .hidden _ZNK7Results7GetTimeENS_13BenchmarkTimeE + .hidden _ZNK7Results7GetTimeENS_13BenchmarkTimeE # -- Begin function _ZNK7Results7GetTimeENS_13BenchmarkTimeE .globl _ZNK7Results7GetTimeENS_13BenchmarkTimeE .p2align 5 .type _ZNK7Results7GetTimeENS_13BenchmarkTimeE,@function @@ -4431,8 +4423,10 @@ _ZNK7Results7GetTimeENS_13BenchmarkTimeE: # @_ZNK7Results7GetTimeENS_13Benchmark ori $a1, $a1, 877 bne $a0, $a1, .LBB21_24 # %bb.22: # %_ZSteqIcSt11char_traitsIcESaIcEEbRKNSt7__cxx1112basic_stringIT_T0_T1_EEPKS5_.exit52.thread - pcalau12i $a0, %pc_hi20(.LCPI21_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI21_0) + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 b .LBB21_29 .LBB21_23: # %_ZSteqIcSt11char_traitsIcESaIcEEbRKNSt7__cxx1112basic_stringIT_T0_T1_EEPKS5_.exit54 ld.d $a0, $fp, 0 @@ -4470,13 +4464,18 @@ _ZNK7Results7GetTimeENS_13BenchmarkTimeE: # @_ZNK7Results7GetTimeENS_13Benchmark pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .LBB21_27: # %_ZSteqIcSt11char_traitsIcESaIcEEbRKNSt7__cxx1112basic_stringIT_T0_T1_EEPKS5_.exit.thread - pcalau12i $a0, %pc_hi20(.LCPI21_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI21_2) + lu12i.w $a0, -97683 + ori $a0, $a0, 1685 + lu32i.d $a0, 77323 + lu52i.d $a0, $a0, 993 b .LBB21_29 .LBB21_28: # %_ZSteqIcSt11char_traitsIcESaIcEEbRKNSt7__cxx1112basic_stringIT_T0_T1_EEPKS5_.exit50.thread - pcalau12i $a0, %pc_hi20(.LCPI21_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI21_1) + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 .LBB21_29: # %_ZSteqIcSt11char_traitsIcESaIcEEbRKNSt7__cxx1112basic_stringIT_T0_T1_EEPKS5_.exit54.thread + movgr2fr.d $fa0, $a0 fmul.d $fs0, $fs0, $fa0 .LBB21_30: # %_ZSteqIcSt11char_traitsIcESaIcEEbRKNSt7__cxx1112basic_stringIT_T0_T1_EEPKS5_.exit54.thread fmov.d $fa0, $fs0 diff --git a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/perf_counters_test.dir/perf_counters_test.s b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/perf_counters_test.dir/perf_counters_test.s index 8e5d04bf..8ec38035 100644 --- a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/perf_counters_test.dir/perf_counters_test.s +++ b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/perf_counters_test.dir/perf_counters_test.s @@ -1014,12 +1014,8 @@ GCC_except_table6: .Lcst_end2: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI7_0: - .dword 0x412e848000000000 # double 1.0E+6 .text - .hidden main + .hidden main # -- Begin function main .globl main .p2align 5 .type main,@function @@ -1032,12 +1028,10 @@ main: # @main st.d $fp, $sp, 32 # 8-byte Folded Spill st.d $s0, $sp, 24 # 8-byte Folded Spill st.d $s1, $sp, 16 # 8-byte Folded Spill - fst.d $fs0, $sp, 8 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 .cfi_offset 24, -32 - .cfi_offset 56, -40 pcalau12i $a2, %got_pc_hi20(_ZN9benchmark8internal12PerfCounters10kSupportedE) ld.d $a2, $a2, %got_pc_lo12(_ZN9benchmark8internal12PerfCounters10kSupportedE) ld.bu $a2, $a2, 0 @@ -1063,9 +1057,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $fp, %pc_hi20(withPauseResumeInstrCount) fld.d $fa0, $fp, %pc_lo12(withPauseResumeInstrCount) - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI7_0) - fcmp.cule.d $fcc0, $fa0, $fs0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fcmp.cule.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB7_8 # %bb.2: pcalau12i $s0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) @@ -1075,7 +1071,11 @@ main: # @main .LBB7_3: pcalau12i $s1, %pc_hi20(withoutPauseResumeInstrCount) fld.d $fa0, $s1, %pc_lo12(withoutPauseResumeInstrCount) - fcmp.cule.d $fcc0, $fa0, $fs0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fcmp.cule.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB7_11 # %bb.4: ld.b $a0, $s0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) @@ -1094,7 +1094,6 @@ main: # @main beqz $a0, .LBB7_15 .LBB7_7: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit17.thread move $a0, $zero - fld.d $fs0, $sp, 8 # 8-byte Folded Reload ld.d $s1, $sp, 16 # 8-byte Folded Reload ld.d $s0, $sp, 24 # 8-byte Folded Reload ld.d $fp, $sp, 32 # 8-byte Folded Reload @@ -1108,11 +1107,11 @@ main: # @main addi.d $a2, $a0, %pc_lo12(.L.str.17) pcalau12i $a0, %pc_hi20(.L__func__.main) addi.d $a3, $a0, %pc_lo12(.L__func__.main) - addi.d $a0, $sp, 0 + addi.d $a0, $sp, 8 ori $a4, $zero, 89 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 - addi.d $a0, $sp, 0 + addi.d $a0, $sp, 8 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .LBB7_9: @@ -1137,11 +1136,11 @@ main: # @main addi.d $a2, $a0, %pc_lo12(.L.str.17) pcalau12i $a0, %pc_hi20(.L__func__.main) addi.d $a3, $a0, %pc_lo12(.L__func__.main) - addi.d $a0, $sp, 0 + addi.d $a0, $sp, 8 ori $a4, $zero, 90 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 - addi.d $a0, $sp, 0 + addi.d $a0, $sp, 8 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .LBB7_12: @@ -1166,11 +1165,11 @@ main: # @main addi.d $a2, $a0, %pc_lo12(.L.str.17) pcalau12i $a0, %pc_hi20(.L__func__.main) addi.d $a3, $a0, %pc_lo12(.L__func__.main) - addi.d $a0, $sp, 0 + addi.d $a0, $sp, 8 ori $a4, $zero, 91 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 - addi.d $a0, $sp, 0 + addi.d $a0, $sp, 8 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .LBB7_15: diff --git a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/reporter_output_test.dir/reporter_output_test.s b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/reporter_output_test.dir/reporter_output_test.s index 28f27444..ac071314 100644 --- a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/reporter_output_test.dir/reporter_output_test.s +++ b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/reporter_output_test.dir/reporter_output_test.s @@ -12091,12 +12091,7 @@ _Z17BM_RepeatTimeUnitRN9benchmark5StateE: # @_Z17BM_RepeatTimeUnitRN9benchmark5S .size _Z17BM_RepeatTimeUnitRN9benchmark5StateE, .Lfunc_end26-_Z17BM_RepeatTimeUnitRN9benchmark5StateE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z12BM_UserStatsRN9benchmark5StateE -.LCPI27_0: - .dword 0x3e8421f5f40d8376 # double 1.4999999999999999E-7 - .text - .hidden _Z12BM_UserStatsRN9benchmark5StateE + .hidden _Z12BM_UserStatsRN9benchmark5StateE # -- Begin function _Z12BM_UserStatsRN9benchmark5StateE .globl _Z12BM_UserStatsRN9benchmark5StateE .p2align 5 .type _Z12BM_UserStatsRN9benchmark5StateE,@function @@ -12124,8 +12119,11 @@ _Z12BM_UserStatsRN9benchmark5StateE: # @_Z12BM_UserStatsRN9benchmark5StateE # %bb.1: # %_ZN9benchmark5State13StateIteratorC2EPS0_.exit beqz $s0, .LBB27_5 # %bb.2: - pcalau12i $a0, %pc_hi20(.LCPI27_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI27_0) + lu12i.w $a0, -48936 + ori $a0, $a0, 886 + lu32i.d $a0, 270837 + lu52i.d $a0, $a0, 1000 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB27_3: # %_ZN9benchmark5State13StateIteratorppEv.exit # =>This Inner Loop Header: Depth=1 @@ -19095,12 +19093,8 @@ GCC_except_table28: .Lcst_end8: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z19BM_UserPercentStatsRN9benchmark5StateE -.LCPI29_0: - .dword 0x3e8421f5f40d8376 # double 1.4999999999999999E-7 .text - .hidden _Z19BM_UserPercentStatsRN9benchmark5StateE + .hidden _Z19BM_UserPercentStatsRN9benchmark5StateE # -- Begin function _Z19BM_UserPercentStatsRN9benchmark5StateE .globl _Z19BM_UserPercentStatsRN9benchmark5StateE .p2align 5 .type _Z19BM_UserPercentStatsRN9benchmark5StateE,@function @@ -19128,8 +19122,11 @@ _Z19BM_UserPercentStatsRN9benchmark5StateE: # @_Z19BM_UserPercentStatsRN9benchma # %bb.1: # %_ZN9benchmark5State13StateIteratorC2EPS0_.exit beqz $s0, .LBB29_5 # %bb.2: - pcalau12i $a0, %pc_hi20(.LCPI29_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI29_0) + lu12i.w $a0, -48936 + ori $a0, $a0, 886 + lu32i.d $a0, 270837 + lu52i.d $a0, $a0, 1000 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB29_3: # %_ZN9benchmark5State13StateIteratorppEv.exit # =>This Inner Loop Header: Depth=1 @@ -27058,17 +27055,15 @@ _ZN3$_08__invokeERKSt6vectorIdSaIdEE: # @"_ZN3$_08__invokeERKSt6vectorIdSaIdEE .Lfunc_end40: .size _ZN3$_08__invokeERKSt6vectorIdSaIdEE, .Lfunc_end40-_ZN3$_08__invokeERKSt6vectorIdSaIdEE # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN3$_18__invokeERKSt6vectorIdSaIdEE -.LCPI41_0: - .dword 0x3f847ae147ae147b # double 0.01 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZN3$_18__invokeERKSt6vectorIdSaIdEE .type _ZN3$_18__invokeERKSt6vectorIdSaIdEE,@function _ZN3$_18__invokeERKSt6vectorIdSaIdEE: # @"_ZN3$_18__invokeERKSt6vectorIdSaIdEE" # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI41_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI41_0) + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa0, $a0 ret .Lfunc_end41: .size _ZN3$_18__invokeERKSt6vectorIdSaIdEE, .Lfunc_end41-_ZN3$_18__invokeERKSt6vectorIdSaIdEE diff --git a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/user_counters_tabular_test.dir/user_counters_tabular_test.s b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/user_counters_tabular_test.dir/user_counters_tabular_test.s index 33bd26d3..c5cb852e 100644 --- a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/user_counters_tabular_test.dir/user_counters_tabular_test.s +++ b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/user_counters_tabular_test.dir/user_counters_tabular_test.s @@ -3842,20 +3842,8 @@ GCC_except_table6: .Lcst_end2: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z16CheckTabularRateRK7Results -.LCPI7_0: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI7_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI7_2: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI7_3: - .dword 0x4059000000000000 # double 100 -.LCPI7_4: - .dword 0x4040000000000000 # double 32 .text - .hidden _Z16CheckTabularRateRK7Results + .hidden _Z16CheckTabularRateRK7Results # -- Begin function _Z16CheckTabularRateRK7Results .globl _Z16CheckTabularRateRK7Results .p2align 5 .type _Z16CheckTabularRateRK7Results,@function @@ -3865,24 +3853,23 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception3 # %bb.0: # %._crit_edge.i.i - addi.d $sp, $sp, -336 - .cfi_def_cfa_offset 336 - st.d $ra, $sp, 328 # 8-byte Folded Spill - st.d $fp, $sp, 320 # 8-byte Folded Spill - st.d $s0, $sp, 312 # 8-byte Folded Spill - st.d $s1, $sp, 304 # 8-byte Folded Spill - st.d $s2, $sp, 296 # 8-byte Folded Spill - st.d $s3, $sp, 288 # 8-byte Folded Spill - st.d $s4, $sp, 280 # 8-byte Folded Spill - st.d $s5, $sp, 272 # 8-byte Folded Spill - st.d $s6, $sp, 264 # 8-byte Folded Spill - st.d $s7, $sp, 256 # 8-byte Folded Spill - st.d $s8, $sp, 248 # 8-byte Folded Spill - fst.d $fs0, $sp, 240 # 8-byte Folded Spill - fst.d $fs1, $sp, 232 # 8-byte Folded Spill - fst.d $fs2, $sp, 224 # 8-byte Folded Spill - fst.d $fs3, $sp, 216 # 8-byte Folded Spill - fst.d $fs4, $sp, 208 # 8-byte Folded Spill + addi.d $sp, $sp, -320 + .cfi_def_cfa_offset 320 + st.d $ra, $sp, 312 # 8-byte Folded Spill + st.d $fp, $sp, 304 # 8-byte Folded Spill + st.d $s0, $sp, 296 # 8-byte Folded Spill + st.d $s1, $sp, 288 # 8-byte Folded Spill + st.d $s2, $sp, 280 # 8-byte Folded Spill + st.d $s3, $sp, 272 # 8-byte Folded Spill + st.d $s4, $sp, 264 # 8-byte Folded Spill + st.d $s5, $sp, 256 # 8-byte Folded Spill + st.d $s6, $sp, 248 # 8-byte Folded Spill + st.d $s7, $sp, 240 # 8-byte Folded Spill + fst.d $fs0, $sp, 232 # 8-byte Folded Spill + fst.d $fs1, $sp, 224 # 8-byte Folded Spill + fst.d $fs2, $sp, 216 # 8-byte Folded Spill + fst.d $fs3, $sp, 208 # 8-byte Folded Spill + fst.d $fs4, $sp, 200 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -3893,12 +3880,11 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results .cfi_offset 28, -64 .cfi_offset 29, -72 .cfi_offset 30, -80 - .cfi_offset 31, -88 - .cfi_offset 56, -96 - .cfi_offset 57, -104 - .cfi_offset 58, -112 - .cfi_offset 59, -120 - .cfi_offset 60, -128 + .cfi_offset 56, -88 + .cfi_offset 57, -96 + .cfi_offset 58, -104 + .cfi_offset 59, -112 + .cfi_offset 60, -120 move $fp, $a0 pcaddu18i $ra, %call36(_ZNK7Results13NumIterationsEv) jirl $ra, $ra, 0 @@ -3908,34 +3894,37 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results pcaddu18i $ra, %call36(_ZNK7Results7GetTimeENS_13BenchmarkTimeE) jirl $ra, $ra, 0 fmov.d $fs1, $fa0 - addi.d $s0, $sp, 192 - st.d $s0, $sp, 176 + addi.d $s0, $sp, 184 + st.d $s0, $sp, 168 pcalau12i $a0, %pc_hi20(.L.str.21) addi.d $a0, $a0, %pc_lo12(.L.str.21) ld.h $s4, $a0, 0 ld.b $s5, $a0, 2 - st.h $s4, $sp, 192 - st.b $s5, $sp, 194 + st.h $s4, $sp, 184 + st.b $s5, $sp, 186 ori $a0, $zero, 3 - st.d $a0, $sp, 184 - st.b $zero, $sp, 195 + st.d $a0, $sp, 176 + st.b $zero, $sp, 187 .Ltmp359: # EH_LABEL - addi.d $a1, $sp, 176 + addi.d $a1, $sp, 168 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp360: # EH_LABEL # %bb.1: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit fmul.d $fs2, $fs0, $fs1 - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.d $fs3, $a0, %pc_lo12(.LCPI7_0) frecip.d $fs1, $fs2 fsub.d $fa0, $fa0, $fs1 fabs.d $fs4, $fa0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 fmul.d $fs0, $fs1, $fs3 fcmp.cule.d $fcc0, $fs0, $fs4 movcf2gr $a0, $fcc0 - st.d $a0, $sp, 24 + st.d $a0, $sp, 16 bceqz $fcc0, .LBB7_4 # %bb.2: .Ltmp362: # EH_LABEL @@ -3945,13 +3934,13 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.101) pcalau12i $a0, %pc_hi20(.L__func__._Z16CheckTabularRateRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z16CheckTabularRateRK7Results) - addi.d $a0, $sp, 168 + addi.d $a0, $sp, 160 ori $a4, $zero, 419 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp363: # EH_LABEL # %bb.3: - ld.d $s6, $sp, 168 + ld.d $s6, $sp, 160 b .LBB7_5 .LBB7_4: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) @@ -4132,15 +4121,15 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp397: # EH_LABEL .LBB7_37: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit378 - addi.d $s1, $sp, 152 - st.d $s1, $sp, 136 - st.h $s4, $sp, 152 - st.b $s5, $sp, 154 + addi.d $s1, $sp, 144 + st.d $s1, $sp, 128 + st.h $s4, $sp, 144 + st.b $s5, $sp, 146 ori $a0, $zero, 3 - st.d $a0, $sp, 144 - st.b $zero, $sp, 155 + st.d $a0, $sp, 136 + st.b $zero, $sp, 147 .Ltmp399: # EH_LABEL - addi.d $a1, $sp, 136 + addi.d $a1, $sp, 128 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -4262,8 +4251,11 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results beqz $a0, .LBB7_66 # %bb.61: .Ltmp427: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI7_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp428: # EH_LABEL @@ -4290,15 +4282,15 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp432: # EH_LABEL .LBB7_66: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit - addi.d $s2, $sp, 120 - st.d $s2, $sp, 104 - st.h $s4, $sp, 120 - st.b $s5, $sp, 122 + addi.d $s2, $sp, 112 + st.d $s2, $sp, 96 + st.h $s4, $sp, 112 + st.b $s5, $sp, 114 ori $a0, $zero, 3 - st.d $a0, $sp, 112 - st.b $zero, $sp, 123 + st.d $a0, $sp, 104 + st.b $zero, $sp, 115 .Ltmp434: # EH_LABEL - addi.d $a1, $sp, 104 + addi.d $a1, $sp, 96 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -4324,33 +4316,38 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp439: # EH_LABEL .LBB7_71: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit433 - addi.d $s3, $sp, 88 - st.d $s3, $sp, 72 - st.h $s4, $sp, 88 - st.b $s5, $sp, 90 + addi.d $s3, $sp, 80 + st.d $s3, $sp, 64 + st.h $s4, $sp, 80 + st.b $s5, $sp, 82 ori $a0, $zero, 3 - st.d $a0, $sp, 80 - st.b $zero, $sp, 91 + st.d $a0, $sp, 72 + st.b $zero, $sp, 83 .Ltmp441: # EH_LABEL - addi.d $a1, $sp, 72 + addi.d $a1, $sp, 64 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp442: # EH_LABEL # %bb.72: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit439 ld.d $a0, $s6, 0 - pcalau12i $s5, %pc_hi20(.LCPI7_2) - pcalau12i $s4, %pc_hi20(.LCPI7_3) + lu12i.w $s4, -487882 beqz $a0, .LBB7_76 # %bb.73: - fld.d $fa1, $s5, %pc_lo12(.LCPI7_2) fsub.d $fa0, $fa0, $fs1 - fabs.d $fa2, $fs1 - fld.d $fa3, $s4, %pc_lo12(.LCPI7_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs1, $fcc0 + fabs.d $fa1, $fs1 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs1, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp443: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -4367,26 +4364,26 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp446: # EH_LABEL .LBB7_76: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit445 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s3, .LBB7_78 # %bb.77: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_78: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_80 # %bb.79: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i446 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_80: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit448 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_82 # %bb.81: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i449 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 144 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -4394,26 +4391,26 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results fcmp.clt.d $fcc0, $fs4, $fs0 bceqz $fcc0, .LBB7_511 # %bb.83: - ld.d $a0, $sp, 176 + ld.d $a0, $sp, 168 beq $a0, $s0, .LBB7_85 # %bb.84: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i461 - ld.d $a1, $sp, 192 + ld.d $a1, $sp, 184 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_85: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit463 - st.d $s0, $sp, 176 + st.d $s0, $sp, 168 pcalau12i $a0, %pc_hi20(.L.str.22) addi.d $a0, $a0, %pc_lo12(.L.str.22) - ld.h $s6, $a0, 0 - ld.b $s7, $a0, 2 - st.h $s6, $sp, 192 - st.b $s7, $sp, 194 + ld.h $s5, $a0, 0 + ld.b $s6, $a0, 2 + st.h $s5, $sp, 184 + st.b $s6, $sp, 186 ori $a0, $zero, 3 - st.d $a0, $sp, 184 - st.b $zero, $sp, 195 + st.d $a0, $sp, 176 + st.b $zero, $sp, 187 .Ltmp448: # EH_LABEL - addi.d $a1, $sp, 176 + addi.d $a1, $sp, 168 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -4426,7 +4423,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results fmul.d $fs0, $fs1, $fs3 fcmp.cule.d $fcc0, $fs0, $fs4 movcf2gr $a0, $fcc0 - st.d $a0, $sp, 24 + st.d $a0, $sp, 16 bceqz $fcc0, .LBB7_89 # %bb.87: .Ltmp451: # EH_LABEL @@ -4436,23 +4433,23 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.101) pcalau12i $a0, %pc_hi20(.L__func__._Z16CheckTabularRateRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z16CheckTabularRateRK7Results) - addi.d $a0, $sp, 64 + addi.d $a0, $sp, 56 ori $a4, $zero, 420 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp452: # EH_LABEL # %bb.88: - ld.d $s8, $sp, 64 + ld.d $s7, $sp, 56 b .LBB7_90 .LBB7_89: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s7, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB7_525 .LBB7_90: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit471 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.91: .Ltmp453: # EH_LABEL @@ -4463,7 +4460,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp454: # EH_LABEL # %bb.92: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit474 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.93: .Ltmp455: # EH_LABEL @@ -4474,7 +4471,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp456: # EH_LABEL # %bb.94: # %_ZN9benchmark8internallsIA149_cEERNS0_7LogTypeES4_RKT_.exit477 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.95: .Ltmp457: # EH_LABEL @@ -4485,7 +4482,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp458: # EH_LABEL # %bb.96: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit480 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.97: .Ltmp460: # EH_LABEL @@ -4494,7 +4491,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp461: # EH_LABEL # %bb.98: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit483 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.99: .Ltmp462: # EH_LABEL @@ -4505,7 +4502,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp463: # EH_LABEL # %bb.100: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit486 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.101: ld.d $a1, $fp, 0 @@ -4515,7 +4512,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp465: # EH_LABEL # %bb.102: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit489 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.103: .Ltmp466: # EH_LABEL @@ -4526,7 +4523,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp467: # EH_LABEL # %bb.104: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit492 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.105: .Ltmp468: # EH_LABEL @@ -4537,7 +4534,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp469: # EH_LABEL # %bb.106: # %_ZN9benchmark8internallsIA149_cEERNS0_7LogTypeES4_RKT_.exit495 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.107: .Ltmp470: # EH_LABEL @@ -4548,7 +4545,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp471: # EH_LABEL # %bb.108: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit498 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.109: .Ltmp473: # EH_LABEL @@ -4557,7 +4554,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp474: # EH_LABEL # %bb.110: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit501 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.111: .Ltmp475: # EH_LABEL @@ -4568,7 +4565,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp476: # EH_LABEL # %bb.112: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit504 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.113: .Ltmp477: # EH_LABEL @@ -4579,7 +4576,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp478: # EH_LABEL # %bb.114: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit507 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.115: .Ltmp479: # EH_LABEL @@ -4590,7 +4587,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp480: # EH_LABEL # %bb.116: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit510 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.117: .Ltmp481: # EH_LABEL @@ -4601,7 +4598,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp482: # EH_LABEL # %bb.118: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit513 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.119: .Ltmp483: # EH_LABEL @@ -4612,7 +4609,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp484: # EH_LABEL # %bb.120: # %_ZN9benchmark8internallsIA4_cEERNS0_7LogTypeES4_RKT_.exit516 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_122 # %bb.121: .Ltmp485: # EH_LABEL @@ -4623,20 +4620,20 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp486: # EH_LABEL .LBB7_122: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit519 - st.d $s1, $sp, 136 - st.h $s6, $sp, 152 - st.b $s7, $sp, 154 + st.d $s1, $sp, 128 + st.h $s5, $sp, 144 + st.b $s6, $sp, 146 ori $a0, $zero, 3 - st.d $a0, $sp, 144 - st.b $zero, $sp, 155 + st.d $a0, $sp, 136 + st.b $zero, $sp, 147 .Ltmp488: # EH_LABEL - addi.d $a1, $sp, 136 + addi.d $a1, $sp, 128 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp489: # EH_LABEL # %bb.123: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit525 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_151 # %bb.124: .Ltmp490: # EH_LABEL @@ -4644,7 +4641,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp491: # EH_LABEL # %bb.125: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit528 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_151 # %bb.126: .Ltmp492: # EH_LABEL @@ -4655,7 +4652,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp493: # EH_LABEL # %bb.127: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit531 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_151 # %bb.128: .Ltmp495: # EH_LABEL @@ -4664,7 +4661,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp496: # EH_LABEL # %bb.129: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit534 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_151 # %bb.130: .Ltmp497: # EH_LABEL @@ -4675,7 +4672,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp498: # EH_LABEL # %bb.131: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit537 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_151 # %bb.132: .Ltmp499: # EH_LABEL @@ -4686,7 +4683,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp500: # EH_LABEL # %bb.133: # %_ZN9benchmark8internallsIA149_cEERNS0_7LogTypeES4_RKT_.exit540 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_151 # %bb.134: .Ltmp501: # EH_LABEL @@ -4697,7 +4694,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp502: # EH_LABEL # %bb.135: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit543 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_151 # %bb.136: .Ltmp504: # EH_LABEL @@ -4706,7 +4703,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp505: # EH_LABEL # %bb.137: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit546 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_151 # %bb.138: .Ltmp506: # EH_LABEL @@ -4717,7 +4714,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp507: # EH_LABEL # %bb.139: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit549 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_151 # %bb.140: .Ltmp508: # EH_LABEL @@ -4728,7 +4725,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp509: # EH_LABEL # %bb.141: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit552 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_151 # %bb.142: .Ltmp511: # EH_LABEL @@ -4737,7 +4734,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp512: # EH_LABEL # %bb.143: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit555 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_151 # %bb.144: .Ltmp513: # EH_LABEL @@ -4748,17 +4745,20 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp514: # EH_LABEL # %bb.145: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit558 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_151 # %bb.146: .Ltmp516: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI7_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp517: # EH_LABEL # %bb.147: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit561 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_151 # %bb.148: .Ltmp518: # EH_LABEL @@ -4769,7 +4769,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp519: # EH_LABEL # %bb.149: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit564 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_151 # %bb.150: .Ltmp520: # EH_LABEL @@ -4780,20 +4780,20 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp521: # EH_LABEL .LBB7_151: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit567 - st.d $s2, $sp, 104 - st.h $s6, $sp, 120 - st.b $s7, $sp, 122 + st.d $s2, $sp, 96 + st.h $s5, $sp, 112 + st.b $s6, $sp, 114 ori $a0, $zero, 3 - st.d $a0, $sp, 112 - st.b $zero, $sp, 123 + st.d $a0, $sp, 104 + st.b $zero, $sp, 115 .Ltmp523: # EH_LABEL - addi.d $a1, $sp, 104 + addi.d $a1, $sp, 96 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp524: # EH_LABEL # %bb.152: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit573 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_156 # %bb.153: fsub.d $fa0, $fa0, $fs1 @@ -4802,7 +4802,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp526: # EH_LABEL # %bb.154: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit576 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_156 # %bb.155: .Ltmp527: # EH_LABEL @@ -4813,36 +4813,42 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp528: # EH_LABEL .LBB7_156: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit579 - st.d $s3, $sp, 72 - st.h $s6, $sp, 88 - st.b $s7, $sp, 90 + st.d $s3, $sp, 64 + st.h $s5, $sp, 80 + st.b $s6, $sp, 82 ori $a0, $zero, 3 - st.d $a0, $sp, 80 - st.b $zero, $sp, 91 + st.d $a0, $sp, 72 + st.b $zero, $sp, 83 .Ltmp530: # EH_LABEL - addi.d $a1, $sp, 72 + addi.d $a1, $sp, 64 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp531: # EH_LABEL # %bb.157: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit585 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_161 # %bb.158: - fld.d $fa1, $s5, %pc_lo12(.LCPI7_2) fsub.d $fa0, $fa0, $fs1 - fabs.d $fa2, $fs1 - fld.d $fa3, $s4, %pc_lo12(.LCPI7_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs1, $fcc0 + fabs.d $fa1, $fs1 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs1, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp532: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp533: # EH_LABEL # %bb.159: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit588 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_161 # %bb.160: .Ltmp534: # EH_LABEL @@ -4853,26 +4859,26 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp535: # EH_LABEL .LBB7_161: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit591 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s3, .LBB7_163 # %bb.162: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i592 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_163: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit594 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_165 # %bb.164: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i595 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_165: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit597 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_167 # %bb.166: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i598 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 144 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -4880,26 +4886,26 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results fcmp.clt.d $fcc0, $fs4, $fs0 bceqz $fcc0, .LBB7_513 # %bb.168: - ld.d $a0, $sp, 176 + ld.d $a0, $sp, 168 beq $a0, $s0, .LBB7_170 # %bb.169: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i613 - ld.d $a1, $sp, 192 + ld.d $a1, $sp, 184 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_170: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit615 - st.d $s0, $sp, 176 + st.d $s0, $sp, 168 pcalau12i $a0, %pc_hi20(.L.str.23) addi.d $a0, $a0, %pc_lo12(.L.str.23) - ld.h $s6, $a0, 0 - ld.b $s7, $a0, 2 - st.h $s6, $sp, 192 - st.b $s7, $sp, 194 + ld.h $s5, $a0, 0 + ld.b $s6, $a0, 2 + st.h $s5, $sp, 184 + st.b $s6, $sp, 186 ori $a0, $zero, 3 - st.d $a0, $sp, 184 - st.b $zero, $sp, 195 + st.d $a0, $sp, 176 + st.b $zero, $sp, 187 .Ltmp537: # EH_LABEL - addi.d $a1, $sp, 176 + addi.d $a1, $sp, 168 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -4912,7 +4918,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results fmul.d $fs0, $fs1, $fs3 fcmp.cule.d $fcc0, $fs0, $fs4 movcf2gr $a0, $fcc0 - st.d $a0, $sp, 24 + st.d $a0, $sp, 16 bceqz $fcc0, .LBB7_174 # %bb.172: .Ltmp540: # EH_LABEL @@ -4922,23 +4928,23 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.101) pcalau12i $a0, %pc_hi20(.L__func__._Z16CheckTabularRateRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z16CheckTabularRateRK7Results) - addi.d $a0, $sp, 56 + addi.d $a0, $sp, 48 ori $a4, $zero, 421 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp541: # EH_LABEL # %bb.173: - ld.d $s8, $sp, 56 + ld.d $s7, $sp, 48 b .LBB7_175 .LBB7_174: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s7, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB7_527 .LBB7_175: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit623 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.176: .Ltmp542: # EH_LABEL @@ -4949,7 +4955,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp543: # EH_LABEL # %bb.177: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit626 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.178: .Ltmp544: # EH_LABEL @@ -4960,7 +4966,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp545: # EH_LABEL # %bb.179: # %_ZN9benchmark8internallsIA149_cEERNS0_7LogTypeES4_RKT_.exit629 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.180: .Ltmp546: # EH_LABEL @@ -4971,7 +4977,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp547: # EH_LABEL # %bb.181: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit632 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.182: .Ltmp549: # EH_LABEL @@ -4980,7 +4986,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp550: # EH_LABEL # %bb.183: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit635 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.184: .Ltmp551: # EH_LABEL @@ -4991,7 +4997,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp552: # EH_LABEL # %bb.185: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit638 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.186: ld.d $a1, $fp, 0 @@ -5001,7 +5007,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp554: # EH_LABEL # %bb.187: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit641 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.188: .Ltmp555: # EH_LABEL @@ -5012,7 +5018,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp556: # EH_LABEL # %bb.189: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit644 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.190: .Ltmp557: # EH_LABEL @@ -5023,7 +5029,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp558: # EH_LABEL # %bb.191: # %_ZN9benchmark8internallsIA149_cEERNS0_7LogTypeES4_RKT_.exit647 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.192: .Ltmp559: # EH_LABEL @@ -5034,7 +5040,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp560: # EH_LABEL # %bb.193: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit650 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.194: .Ltmp562: # EH_LABEL @@ -5043,7 +5049,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp563: # EH_LABEL # %bb.195: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit653 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.196: .Ltmp564: # EH_LABEL @@ -5054,7 +5060,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp565: # EH_LABEL # %bb.197: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit656 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.198: .Ltmp566: # EH_LABEL @@ -5065,7 +5071,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp567: # EH_LABEL # %bb.199: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit659 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.200: .Ltmp568: # EH_LABEL @@ -5076,7 +5082,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp569: # EH_LABEL # %bb.201: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit662 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.202: .Ltmp570: # EH_LABEL @@ -5087,7 +5093,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp571: # EH_LABEL # %bb.203: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit665 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.204: .Ltmp572: # EH_LABEL @@ -5098,7 +5104,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp573: # EH_LABEL # %bb.205: # %_ZN9benchmark8internallsIA4_cEERNS0_7LogTypeES4_RKT_.exit668 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_207 # %bb.206: .Ltmp574: # EH_LABEL @@ -5109,20 +5115,20 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp575: # EH_LABEL .LBB7_207: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit671 - st.d $s1, $sp, 136 - st.h $s6, $sp, 152 - st.b $s7, $sp, 154 + st.d $s1, $sp, 128 + st.h $s5, $sp, 144 + st.b $s6, $sp, 146 ori $a0, $zero, 3 - st.d $a0, $sp, 144 - st.b $zero, $sp, 155 + st.d $a0, $sp, 136 + st.b $zero, $sp, 147 .Ltmp577: # EH_LABEL - addi.d $a1, $sp, 136 + addi.d $a1, $sp, 128 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp578: # EH_LABEL # %bb.208: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit677 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_236 # %bb.209: .Ltmp579: # EH_LABEL @@ -5130,7 +5136,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp580: # EH_LABEL # %bb.210: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit680 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_236 # %bb.211: .Ltmp581: # EH_LABEL @@ -5141,7 +5147,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp582: # EH_LABEL # %bb.212: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit683 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_236 # %bb.213: .Ltmp584: # EH_LABEL @@ -5150,7 +5156,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp585: # EH_LABEL # %bb.214: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit686 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_236 # %bb.215: .Ltmp586: # EH_LABEL @@ -5161,7 +5167,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp587: # EH_LABEL # %bb.216: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit689 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_236 # %bb.217: .Ltmp588: # EH_LABEL @@ -5172,7 +5178,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp589: # EH_LABEL # %bb.218: # %_ZN9benchmark8internallsIA149_cEERNS0_7LogTypeES4_RKT_.exit692 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_236 # %bb.219: .Ltmp590: # EH_LABEL @@ -5183,7 +5189,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp591: # EH_LABEL # %bb.220: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit695 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_236 # %bb.221: .Ltmp593: # EH_LABEL @@ -5192,7 +5198,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp594: # EH_LABEL # %bb.222: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit698 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_236 # %bb.223: .Ltmp595: # EH_LABEL @@ -5203,7 +5209,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp596: # EH_LABEL # %bb.224: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit701 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_236 # %bb.225: .Ltmp597: # EH_LABEL @@ -5214,7 +5220,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp598: # EH_LABEL # %bb.226: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit704 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_236 # %bb.227: .Ltmp600: # EH_LABEL @@ -5223,7 +5229,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp601: # EH_LABEL # %bb.228: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit707 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_236 # %bb.229: .Ltmp602: # EH_LABEL @@ -5234,17 +5240,20 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp603: # EH_LABEL # %bb.230: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit710 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_236 # %bb.231: .Ltmp605: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI7_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp606: # EH_LABEL # %bb.232: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit713 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_236 # %bb.233: .Ltmp607: # EH_LABEL @@ -5255,7 +5264,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp608: # EH_LABEL # %bb.234: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit716 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_236 # %bb.235: .Ltmp609: # EH_LABEL @@ -5266,20 +5275,20 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp610: # EH_LABEL .LBB7_236: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit719 - st.d $s2, $sp, 104 - st.h $s6, $sp, 120 - st.b $s7, $sp, 122 + st.d $s2, $sp, 96 + st.h $s5, $sp, 112 + st.b $s6, $sp, 114 ori $a0, $zero, 3 - st.d $a0, $sp, 112 - st.b $zero, $sp, 123 + st.d $a0, $sp, 104 + st.b $zero, $sp, 115 .Ltmp612: # EH_LABEL - addi.d $a1, $sp, 104 + addi.d $a1, $sp, 96 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp613: # EH_LABEL # %bb.237: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit725 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_241 # %bb.238: fsub.d $fa0, $fa0, $fs1 @@ -5288,7 +5297,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp615: # EH_LABEL # %bb.239: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit728 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_241 # %bb.240: .Ltmp616: # EH_LABEL @@ -5299,36 +5308,42 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp617: # EH_LABEL .LBB7_241: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit731 - st.d $s3, $sp, 72 - st.h $s6, $sp, 88 - st.b $s7, $sp, 90 + st.d $s3, $sp, 64 + st.h $s5, $sp, 80 + st.b $s6, $sp, 82 ori $a0, $zero, 3 - st.d $a0, $sp, 80 - st.b $zero, $sp, 91 + st.d $a0, $sp, 72 + st.b $zero, $sp, 83 .Ltmp619: # EH_LABEL - addi.d $a1, $sp, 72 + addi.d $a1, $sp, 64 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp620: # EH_LABEL # %bb.242: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit737 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_246 # %bb.243: - fld.d $fa1, $s5, %pc_lo12(.LCPI7_2) fsub.d $fa0, $fa0, $fs1 - fabs.d $fa2, $fs1 - fld.d $fa3, $s4, %pc_lo12(.LCPI7_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs1, $fcc0 + fabs.d $fa1, $fs1 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs1, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp621: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp622: # EH_LABEL # %bb.244: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit740 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_246 # %bb.245: .Ltmp623: # EH_LABEL @@ -5339,26 +5354,26 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp624: # EH_LABEL .LBB7_246: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit743 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s3, .LBB7_248 # %bb.247: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i744 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_248: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit746 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_250 # %bb.249: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i747 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_250: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit749 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_252 # %bb.251: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i750 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 144 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -5366,26 +5381,26 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results fcmp.clt.d $fcc0, $fs4, $fs0 bceqz $fcc0, .LBB7_515 # %bb.253: - ld.d $a0, $sp, 176 + ld.d $a0, $sp, 168 beq $a0, $s0, .LBB7_255 # %bb.254: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i765 - ld.d $a1, $sp, 192 + ld.d $a1, $sp, 184 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_255: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit767 - st.d $s0, $sp, 176 + st.d $s0, $sp, 168 pcalau12i $a0, %pc_hi20(.L.str.24) addi.d $a0, $a0, %pc_lo12(.L.str.24) - ld.h $s6, $a0, 0 - ld.b $s7, $a0, 2 - st.h $s6, $sp, 192 - st.b $s7, $sp, 194 + ld.h $s5, $a0, 0 + ld.b $s6, $a0, 2 + st.h $s5, $sp, 184 + st.b $s6, $sp, 186 ori $a0, $zero, 3 - st.d $a0, $sp, 184 - st.b $zero, $sp, 195 + st.d $a0, $sp, 176 + st.b $zero, $sp, 187 .Ltmp626: # EH_LABEL - addi.d $a1, $sp, 176 + addi.d $a1, $sp, 168 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -5398,7 +5413,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results fmul.d $fs0, $fs1, $fs3 fcmp.cule.d $fcc0, $fs0, $fs4 movcf2gr $a0, $fcc0 - st.d $a0, $sp, 24 + st.d $a0, $sp, 16 bceqz $fcc0, .LBB7_259 # %bb.257: .Ltmp629: # EH_LABEL @@ -5408,23 +5423,23 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.101) pcalau12i $a0, %pc_hi20(.L__func__._Z16CheckTabularRateRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z16CheckTabularRateRK7Results) - addi.d $a0, $sp, 48 + addi.d $a0, $sp, 40 ori $a4, $zero, 422 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp630: # EH_LABEL # %bb.258: - ld.d $s8, $sp, 48 + ld.d $s7, $sp, 40 b .LBB7_260 .LBB7_259: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s7, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB7_529 .LBB7_260: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit775 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.261: .Ltmp631: # EH_LABEL @@ -5435,7 +5450,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp632: # EH_LABEL # %bb.262: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit778 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.263: .Ltmp633: # EH_LABEL @@ -5446,7 +5461,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp634: # EH_LABEL # %bb.264: # %_ZN9benchmark8internallsIA149_cEERNS0_7LogTypeES4_RKT_.exit781 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.265: .Ltmp635: # EH_LABEL @@ -5457,7 +5472,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp636: # EH_LABEL # %bb.266: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit784 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.267: .Ltmp638: # EH_LABEL @@ -5466,7 +5481,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp639: # EH_LABEL # %bb.268: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit787 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.269: .Ltmp640: # EH_LABEL @@ -5477,7 +5492,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp641: # EH_LABEL # %bb.270: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit790 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.271: ld.d $a1, $fp, 0 @@ -5487,7 +5502,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp643: # EH_LABEL # %bb.272: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit793 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.273: .Ltmp644: # EH_LABEL @@ -5498,7 +5513,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp645: # EH_LABEL # %bb.274: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit796 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.275: .Ltmp646: # EH_LABEL @@ -5509,7 +5524,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp647: # EH_LABEL # %bb.276: # %_ZN9benchmark8internallsIA149_cEERNS0_7LogTypeES4_RKT_.exit799 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.277: .Ltmp648: # EH_LABEL @@ -5520,7 +5535,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp649: # EH_LABEL # %bb.278: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit802 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.279: .Ltmp651: # EH_LABEL @@ -5529,7 +5544,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp652: # EH_LABEL # %bb.280: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit805 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.281: .Ltmp653: # EH_LABEL @@ -5540,7 +5555,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp654: # EH_LABEL # %bb.282: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit808 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.283: .Ltmp655: # EH_LABEL @@ -5551,7 +5566,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp656: # EH_LABEL # %bb.284: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit811 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.285: .Ltmp657: # EH_LABEL @@ -5562,7 +5577,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp658: # EH_LABEL # %bb.286: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit814 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.287: .Ltmp659: # EH_LABEL @@ -5573,7 +5588,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp660: # EH_LABEL # %bb.288: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit817 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.289: .Ltmp661: # EH_LABEL @@ -5584,7 +5599,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp662: # EH_LABEL # %bb.290: # %_ZN9benchmark8internallsIA4_cEERNS0_7LogTypeES4_RKT_.exit820 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_292 # %bb.291: .Ltmp663: # EH_LABEL @@ -5595,20 +5610,20 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp664: # EH_LABEL .LBB7_292: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit823 - st.d $s1, $sp, 136 - st.h $s6, $sp, 152 - st.b $s7, $sp, 154 + st.d $s1, $sp, 128 + st.h $s5, $sp, 144 + st.b $s6, $sp, 146 ori $a0, $zero, 3 - st.d $a0, $sp, 144 - st.b $zero, $sp, 155 + st.d $a0, $sp, 136 + st.b $zero, $sp, 147 .Ltmp666: # EH_LABEL - addi.d $a1, $sp, 136 + addi.d $a1, $sp, 128 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp667: # EH_LABEL # %bb.293: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit829 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_321 # %bb.294: .Ltmp668: # EH_LABEL @@ -5616,7 +5631,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp669: # EH_LABEL # %bb.295: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit832 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_321 # %bb.296: .Ltmp670: # EH_LABEL @@ -5627,7 +5642,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp671: # EH_LABEL # %bb.297: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit835 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_321 # %bb.298: .Ltmp673: # EH_LABEL @@ -5636,7 +5651,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp674: # EH_LABEL # %bb.299: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit838 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_321 # %bb.300: .Ltmp675: # EH_LABEL @@ -5647,7 +5662,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp676: # EH_LABEL # %bb.301: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit841 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_321 # %bb.302: .Ltmp677: # EH_LABEL @@ -5658,7 +5673,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp678: # EH_LABEL # %bb.303: # %_ZN9benchmark8internallsIA149_cEERNS0_7LogTypeES4_RKT_.exit844 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_321 # %bb.304: .Ltmp679: # EH_LABEL @@ -5669,7 +5684,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp680: # EH_LABEL # %bb.305: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit847 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_321 # %bb.306: .Ltmp682: # EH_LABEL @@ -5678,7 +5693,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp683: # EH_LABEL # %bb.307: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit850 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_321 # %bb.308: .Ltmp684: # EH_LABEL @@ -5689,7 +5704,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp685: # EH_LABEL # %bb.309: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit853 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_321 # %bb.310: .Ltmp686: # EH_LABEL @@ -5700,7 +5715,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp687: # EH_LABEL # %bb.311: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit856 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_321 # %bb.312: .Ltmp689: # EH_LABEL @@ -5709,7 +5724,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp690: # EH_LABEL # %bb.313: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit859 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_321 # %bb.314: .Ltmp691: # EH_LABEL @@ -5720,17 +5735,20 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp692: # EH_LABEL # %bb.315: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit862 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_321 # %bb.316: .Ltmp694: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI7_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp695: # EH_LABEL # %bb.317: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit865 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_321 # %bb.318: .Ltmp696: # EH_LABEL @@ -5741,7 +5759,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp697: # EH_LABEL # %bb.319: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit868 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_321 # %bb.320: .Ltmp698: # EH_LABEL @@ -5752,20 +5770,20 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp699: # EH_LABEL .LBB7_321: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit871 - st.d $s2, $sp, 104 - st.h $s6, $sp, 120 - st.b $s7, $sp, 122 + st.d $s2, $sp, 96 + st.h $s5, $sp, 112 + st.b $s6, $sp, 114 ori $a0, $zero, 3 - st.d $a0, $sp, 112 - st.b $zero, $sp, 123 + st.d $a0, $sp, 104 + st.b $zero, $sp, 115 .Ltmp701: # EH_LABEL - addi.d $a1, $sp, 104 + addi.d $a1, $sp, 96 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp702: # EH_LABEL # %bb.322: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit877 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_326 # %bb.323: fsub.d $fa0, $fa0, $fs1 @@ -5774,7 +5792,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp704: # EH_LABEL # %bb.324: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit880 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_326 # %bb.325: .Ltmp705: # EH_LABEL @@ -5785,36 +5803,42 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp706: # EH_LABEL .LBB7_326: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit883 - st.d $s3, $sp, 72 - st.h $s6, $sp, 88 - st.b $s7, $sp, 90 + st.d $s3, $sp, 64 + st.h $s5, $sp, 80 + st.b $s6, $sp, 82 ori $a0, $zero, 3 - st.d $a0, $sp, 80 - st.b $zero, $sp, 91 + st.d $a0, $sp, 72 + st.b $zero, $sp, 83 .Ltmp708: # EH_LABEL - addi.d $a1, $sp, 72 + addi.d $a1, $sp, 64 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp709: # EH_LABEL # %bb.327: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit889 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_331 # %bb.328: - fld.d $fa1, $s5, %pc_lo12(.LCPI7_2) fsub.d $fa0, $fa0, $fs1 - fabs.d $fa2, $fs1 - fld.d $fa3, $s4, %pc_lo12(.LCPI7_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs1, $fcc0 + fabs.d $fa1, $fs1 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs1, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp710: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp711: # EH_LABEL # %bb.329: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit892 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_331 # %bb.330: .Ltmp712: # EH_LABEL @@ -5825,26 +5849,26 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp713: # EH_LABEL .LBB7_331: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit895 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s3, .LBB7_333 # %bb.332: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i896 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_333: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit898 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_335 # %bb.334: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i899 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_335: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit901 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_337 # %bb.336: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i902 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 144 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -5852,23 +5876,23 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results fcmp.clt.d $fcc0, $fs4, $fs0 bceqz $fcc0, .LBB7_517 # %bb.338: - ld.d $a0, $sp, 176 + ld.d $a0, $sp, 168 beq $a0, $s0, .LBB7_340 # %bb.339: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i917 - ld.d $a1, $sp, 192 + ld.d $a1, $sp, 184 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_340: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit919 - st.d $s0, $sp, 176 + st.d $s0, $sp, 168 lu12i.w $a0, 403191 - ori $s6, $a0, 582 - st.w $s6, $sp, 192 + ori $s5, $a0, 582 + st.w $s5, $sp, 184 ori $a0, $zero, 4 - st.d $a0, $sp, 184 - st.b $zero, $sp, 196 + st.d $a0, $sp, 176 + st.b $zero, $sp, 188 .Ltmp715: # EH_LABEL - addi.d $a1, $sp, 176 + addi.d $a1, $sp, 168 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -5881,7 +5905,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results fmul.d $fs0, $fs1, $fs3 fcmp.cule.d $fcc0, $fs0, $fs4 movcf2gr $a0, $fcc0 - st.d $a0, $sp, 24 + st.d $a0, $sp, 16 bceqz $fcc0, .LBB7_344 # %bb.342: .Ltmp718: # EH_LABEL @@ -5891,23 +5915,23 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.101) pcalau12i $a0, %pc_hi20(.L__func__._Z16CheckTabularRateRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z16CheckTabularRateRK7Results) - addi.d $a0, $sp, 40 + addi.d $a0, $sp, 32 ori $a4, $zero, 423 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp719: # EH_LABEL # %bb.343: - ld.d $s7, $sp, 40 + ld.d $s6, $sp, 32 b .LBB7_345 .LBB7_344: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s7, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s6, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB7_531 .LBB7_345: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit927 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.346: .Ltmp720: # EH_LABEL @@ -5918,7 +5942,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp721: # EH_LABEL # %bb.347: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit930 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.348: .Ltmp722: # EH_LABEL @@ -5929,7 +5953,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp723: # EH_LABEL # %bb.349: # %_ZN9benchmark8internallsIA149_cEERNS0_7LogTypeES4_RKT_.exit933 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.350: .Ltmp724: # EH_LABEL @@ -5940,7 +5964,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp725: # EH_LABEL # %bb.351: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit936 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.352: .Ltmp727: # EH_LABEL @@ -5949,7 +5973,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp728: # EH_LABEL # %bb.353: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit939 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.354: .Ltmp729: # EH_LABEL @@ -5960,7 +5984,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp730: # EH_LABEL # %bb.355: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit942 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.356: ld.d $a1, $fp, 0 @@ -5970,7 +5994,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp732: # EH_LABEL # %bb.357: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit945 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.358: .Ltmp733: # EH_LABEL @@ -5981,7 +6005,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp734: # EH_LABEL # %bb.359: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit948 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.360: .Ltmp735: # EH_LABEL @@ -5992,7 +6016,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp736: # EH_LABEL # %bb.361: # %_ZN9benchmark8internallsIA149_cEERNS0_7LogTypeES4_RKT_.exit951 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.362: .Ltmp737: # EH_LABEL @@ -6003,7 +6027,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp738: # EH_LABEL # %bb.363: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit954 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.364: .Ltmp740: # EH_LABEL @@ -6012,7 +6036,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp741: # EH_LABEL # %bb.365: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit957 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.366: .Ltmp742: # EH_LABEL @@ -6023,7 +6047,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp743: # EH_LABEL # %bb.367: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit960 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.368: .Ltmp744: # EH_LABEL @@ -6034,7 +6058,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp745: # EH_LABEL # %bb.369: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit963 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.370: .Ltmp746: # EH_LABEL @@ -6045,7 +6069,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp747: # EH_LABEL # %bb.371: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit966 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.372: .Ltmp748: # EH_LABEL @@ -6056,7 +6080,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp749: # EH_LABEL # %bb.373: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit969 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.374: .Ltmp750: # EH_LABEL @@ -6067,7 +6091,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp751: # EH_LABEL # %bb.375: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit972 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_377 # %bb.376: .Ltmp752: # EH_LABEL @@ -6078,19 +6102,19 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp753: # EH_LABEL .LBB7_377: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit975 - st.d $s1, $sp, 136 - st.w $s6, $sp, 152 + st.d $s1, $sp, 128 + st.w $s5, $sp, 144 ori $a0, $zero, 4 - st.d $a0, $sp, 144 - st.b $zero, $sp, 156 + st.d $a0, $sp, 136 + st.b $zero, $sp, 148 .Ltmp755: # EH_LABEL - addi.d $a1, $sp, 136 + addi.d $a1, $sp, 128 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp756: # EH_LABEL # %bb.378: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit981 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_406 # %bb.379: .Ltmp757: # EH_LABEL @@ -6098,7 +6122,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp758: # EH_LABEL # %bb.380: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit984 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_406 # %bb.381: .Ltmp759: # EH_LABEL @@ -6109,7 +6133,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp760: # EH_LABEL # %bb.382: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit987 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_406 # %bb.383: .Ltmp762: # EH_LABEL @@ -6118,7 +6142,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp763: # EH_LABEL # %bb.384: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit990 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_406 # %bb.385: .Ltmp764: # EH_LABEL @@ -6129,7 +6153,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp765: # EH_LABEL # %bb.386: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit993 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_406 # %bb.387: .Ltmp766: # EH_LABEL @@ -6140,7 +6164,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp767: # EH_LABEL # %bb.388: # %_ZN9benchmark8internallsIA149_cEERNS0_7LogTypeES4_RKT_.exit996 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_406 # %bb.389: .Ltmp768: # EH_LABEL @@ -6151,7 +6175,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp769: # EH_LABEL # %bb.390: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit999 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_406 # %bb.391: .Ltmp771: # EH_LABEL @@ -6160,7 +6184,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp772: # EH_LABEL # %bb.392: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit1002 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_406 # %bb.393: .Ltmp773: # EH_LABEL @@ -6171,7 +6195,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp774: # EH_LABEL # %bb.394: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit1005 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_406 # %bb.395: .Ltmp775: # EH_LABEL @@ -6182,7 +6206,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp776: # EH_LABEL # %bb.396: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit1008 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_406 # %bb.397: .Ltmp778: # EH_LABEL @@ -6191,7 +6215,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp779: # EH_LABEL # %bb.398: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit1011 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_406 # %bb.399: .Ltmp780: # EH_LABEL @@ -6202,17 +6226,20 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp781: # EH_LABEL # %bb.400: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit1014 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_406 # %bb.401: .Ltmp783: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI7_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp784: # EH_LABEL # %bb.402: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit1017 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_406 # %bb.403: .Ltmp785: # EH_LABEL @@ -6223,7 +6250,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp786: # EH_LABEL # %bb.404: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit1020 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_406 # %bb.405: .Ltmp787: # EH_LABEL @@ -6234,19 +6261,19 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp788: # EH_LABEL .LBB7_406: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit1023 - st.d $s2, $sp, 104 - st.w $s6, $sp, 120 + st.d $s2, $sp, 96 + st.w $s5, $sp, 112 ori $a0, $zero, 4 - st.d $a0, $sp, 112 - st.b $zero, $sp, 124 + st.d $a0, $sp, 104 + st.b $zero, $sp, 116 .Ltmp790: # EH_LABEL - addi.d $a1, $sp, 104 + addi.d $a1, $sp, 96 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp791: # EH_LABEL # %bb.407: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit1029 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_411 # %bb.408: fsub.d $fa0, $fa0, $fs1 @@ -6255,7 +6282,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp793: # EH_LABEL # %bb.409: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit1032 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_411 # %bb.410: .Ltmp794: # EH_LABEL @@ -6266,35 +6293,41 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp795: # EH_LABEL .LBB7_411: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit1035 - st.d $s3, $sp, 72 - st.w $s6, $sp, 88 + st.d $s3, $sp, 64 + st.w $s5, $sp, 80 ori $a0, $zero, 4 - st.d $a0, $sp, 80 - st.b $zero, $sp, 92 + st.d $a0, $sp, 72 + st.b $zero, $sp, 84 .Ltmp797: # EH_LABEL - addi.d $a1, $sp, 72 + addi.d $a1, $sp, 64 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp798: # EH_LABEL # %bb.412: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit1041 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_416 # %bb.413: - fld.d $fa1, $s5, %pc_lo12(.LCPI7_2) fsub.d $fa0, $fa0, $fs1 - fabs.d $fa2, $fs1 - fld.d $fa3, $s4, %pc_lo12(.LCPI7_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs1, $fcc0 + fabs.d $fa1, $fs1 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs1, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp799: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp800: # EH_LABEL # %bb.414: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit1044 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB7_416 # %bb.415: .Ltmp801: # EH_LABEL @@ -6305,26 +6338,26 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp802: # EH_LABEL .LBB7_416: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit1047 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s3, .LBB7_418 # %bb.417: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i1048 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_418: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit1050 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_420 # %bb.419: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i1051 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_420: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit1053 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_422 # %bb.421: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i1054 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 144 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -6332,33 +6365,33 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results fcmp.clt.d $fcc0, $fs4, $fs0 bceqz $fcc0, .LBB7_519 # %bb.423: - ld.d $a0, $sp, 176 + ld.d $a0, $sp, 168 beq $a0, $s0, .LBB7_425 # %bb.424: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i1069 - ld.d $a1, $sp, 192 + ld.d $a1, $sp, 184 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_425: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit1071 - st.d $s0, $sp, 176 + st.d $s0, $sp, 168 pcalau12i $a0, %pc_hi20(.L.str.26) addi.d $a0, $a0, %pc_lo12(.L.str.26) - ld.h $s6, $a0, 0 - ld.b $s7, $a0, 2 - st.h $s6, $sp, 192 - st.b $s7, $sp, 194 + ld.h $s5, $a0, 0 + ld.b $s6, $a0, 2 + st.h $s5, $sp, 184 + st.b $s6, $sp, 186 ori $a0, $zero, 3 - st.d $a0, $sp, 184 - st.b $zero, $sp, 195 + st.d $a0, $sp, 176 + st.b $zero, $sp, 187 .Ltmp804: # EH_LABEL - addi.d $a1, $sp, 176 + addi.d $a1, $sp, 168 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp805: # EH_LABEL # %bb.426: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit1077 - pcalau12i $a0, %pc_hi20(.LCPI7_4) - fld.d $fa1, $a0, %pc_lo12(.LCPI7_4) + lu52i.d $a0, $zero, 1028 + movgr2fr.d $fa1, $a0 fdiv.d $fs1, $fa1, $fs2 fsub.d $fa0, $fa0, $fs1 fabs.d $fs2, $fa0 @@ -6373,23 +6406,23 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.101) pcalau12i $a0, %pc_hi20(.L__func__._Z16CheckTabularRateRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z16CheckTabularRateRK7Results) - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 ori $a4, $zero, 424 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp808: # EH_LABEL # %bb.428: - ld.d $s8, $sp, 32 + ld.d $s7, $sp, 24 b .LBB7_430 .LBB7_429: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s7, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB7_533 .LBB7_430: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit1079 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.431: .Ltmp809: # EH_LABEL @@ -6400,7 +6433,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp810: # EH_LABEL # %bb.432: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit1082 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.433: .Ltmp811: # EH_LABEL @@ -6411,7 +6444,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp812: # EH_LABEL # %bb.434: # %_ZN9benchmark8internallsIA149_cEERNS0_7LogTypeES4_RKT_.exit1085 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.435: .Ltmp813: # EH_LABEL @@ -6422,7 +6455,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp814: # EH_LABEL # %bb.436: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit1088 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.437: .Ltmp816: # EH_LABEL @@ -6431,7 +6464,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp817: # EH_LABEL # %bb.438: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit1091 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.439: .Ltmp818: # EH_LABEL @@ -6442,7 +6475,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp819: # EH_LABEL # %bb.440: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit1094 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.441: ld.d $a1, $fp, 0 @@ -6452,7 +6485,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp821: # EH_LABEL # %bb.442: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit1097 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.443: .Ltmp822: # EH_LABEL @@ -6463,7 +6496,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp823: # EH_LABEL # %bb.444: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit1100 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.445: .Ltmp824: # EH_LABEL @@ -6474,7 +6507,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp825: # EH_LABEL # %bb.446: # %_ZN9benchmark8internallsIA149_cEERNS0_7LogTypeES4_RKT_.exit1103 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.447: .Ltmp826: # EH_LABEL @@ -6485,7 +6518,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp827: # EH_LABEL # %bb.448: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit1106 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.449: .Ltmp829: # EH_LABEL @@ -6494,7 +6527,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp830: # EH_LABEL # %bb.450: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit1109 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.451: .Ltmp831: # EH_LABEL @@ -6505,7 +6538,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp832: # EH_LABEL # %bb.452: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit1112 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.453: .Ltmp833: # EH_LABEL @@ -6516,7 +6549,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp834: # EH_LABEL # %bb.454: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit1115 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.455: .Ltmp835: # EH_LABEL @@ -6527,7 +6560,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp836: # EH_LABEL # %bb.456: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit1118 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.457: .Ltmp837: # EH_LABEL @@ -6538,7 +6571,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp838: # EH_LABEL # %bb.458: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit1121 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.459: .Ltmp839: # EH_LABEL @@ -6549,7 +6582,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp840: # EH_LABEL # %bb.460: # %_ZN9benchmark8internallsIA4_cEERNS0_7LogTypeES4_RKT_.exit1124 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_462 # %bb.461: .Ltmp841: # EH_LABEL @@ -6560,20 +6593,20 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp842: # EH_LABEL .LBB7_462: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit1127 - st.d $s1, $sp, 136 - st.h $s6, $sp, 152 - st.b $s7, $sp, 154 + st.d $s1, $sp, 128 + st.h $s5, $sp, 144 + st.b $s6, $sp, 146 ori $a0, $zero, 3 - st.d $a0, $sp, 144 - st.b $zero, $sp, 155 + st.d $a0, $sp, 136 + st.b $zero, $sp, 147 .Ltmp844: # EH_LABEL - addi.d $a1, $sp, 136 + addi.d $a1, $sp, 128 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp845: # EH_LABEL # %bb.463: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit1133 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_491 # %bb.464: .Ltmp846: # EH_LABEL @@ -6581,7 +6614,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp847: # EH_LABEL # %bb.465: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit1136 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_491 # %bb.466: .Ltmp848: # EH_LABEL @@ -6592,7 +6625,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp849: # EH_LABEL # %bb.467: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit1139 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_491 # %bb.468: .Ltmp851: # EH_LABEL @@ -6601,7 +6634,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp852: # EH_LABEL # %bb.469: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit1142 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_491 # %bb.470: .Ltmp853: # EH_LABEL @@ -6612,7 +6645,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp854: # EH_LABEL # %bb.471: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit1145 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_491 # %bb.472: .Ltmp855: # EH_LABEL @@ -6623,7 +6656,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp856: # EH_LABEL # %bb.473: # %_ZN9benchmark8internallsIA149_cEERNS0_7LogTypeES4_RKT_.exit1148 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_491 # %bb.474: .Ltmp857: # EH_LABEL @@ -6634,7 +6667,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp858: # EH_LABEL # %bb.475: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit1151 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_491 # %bb.476: .Ltmp860: # EH_LABEL @@ -6643,7 +6676,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp861: # EH_LABEL # %bb.477: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit1154 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_491 # %bb.478: .Ltmp862: # EH_LABEL @@ -6654,7 +6687,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp863: # EH_LABEL # %bb.479: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit1157 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_491 # %bb.480: .Ltmp864: # EH_LABEL @@ -6665,7 +6698,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp865: # EH_LABEL # %bb.481: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit1160 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_491 # %bb.482: .Ltmp867: # EH_LABEL @@ -6674,7 +6707,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp868: # EH_LABEL # %bb.483: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit1163 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_491 # %bb.484: .Ltmp869: # EH_LABEL @@ -6685,17 +6718,20 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp870: # EH_LABEL # %bb.485: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit1166 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_491 # %bb.486: .Ltmp872: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI7_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp873: # EH_LABEL # %bb.487: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit1169 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_491 # %bb.488: .Ltmp874: # EH_LABEL @@ -6706,7 +6742,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp875: # EH_LABEL # %bb.489: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit1172 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_491 # %bb.490: .Ltmp876: # EH_LABEL @@ -6717,20 +6753,20 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp877: # EH_LABEL .LBB7_491: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit1175 - st.d $s2, $sp, 104 - st.h $s6, $sp, 120 - st.b $s7, $sp, 122 + st.d $s2, $sp, 96 + st.h $s5, $sp, 112 + st.b $s6, $sp, 114 ori $a0, $zero, 3 - st.d $a0, $sp, 112 - st.b $zero, $sp, 123 + st.d $a0, $sp, 104 + st.b $zero, $sp, 115 .Ltmp879: # EH_LABEL - addi.d $a1, $sp, 104 + addi.d $a1, $sp, 96 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp880: # EH_LABEL # %bb.492: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit1181 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_496 # %bb.493: fsub.d $fa0, $fa0, $fs1 @@ -6739,7 +6775,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp882: # EH_LABEL # %bb.494: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit1184 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_496 # %bb.495: .Ltmp883: # EH_LABEL @@ -6750,36 +6786,42 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp884: # EH_LABEL .LBB7_496: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit1187 - st.d $s3, $sp, 72 - st.h $s6, $sp, 88 - st.b $s7, $sp, 90 + st.d $s3, $sp, 64 + st.h $s5, $sp, 80 + st.b $s6, $sp, 82 ori $a0, $zero, 3 - st.d $a0, $sp, 80 - st.b $zero, $sp, 91 + st.d $a0, $sp, 72 + st.b $zero, $sp, 83 .Ltmp886: # EH_LABEL - addi.d $a1, $sp, 72 + addi.d $a1, $sp, 64 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp887: # EH_LABEL # %bb.497: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit1193 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_501 # %bb.498: - fld.d $fa1, $s5, %pc_lo12(.LCPI7_2) fsub.d $fa0, $fa0, $fs1 - fabs.d $fa2, $fs1 - fld.d $fa3, $s4, %pc_lo12(.LCPI7_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs1, $fcc0 + fabs.d $fa1, $fs1 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs1, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp888: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp889: # EH_LABEL # %bb.499: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit1196 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB7_501 # %bb.500: .Ltmp890: # EH_LABEL @@ -6790,26 +6832,26 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results jirl $ra, $ra, 0 .Ltmp891: # EH_LABEL .LBB7_501: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit1199 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s3, .LBB7_503 # %bb.502: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i1200 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_503: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit1202 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_505 # %bb.504: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i1203 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_505: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit1205 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_507 # %bb.506: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i1206 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 144 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -6817,70 +6859,69 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results fcmp.clt.d $fcc0, $fs2, $fs0 bceqz $fcc0, .LBB7_521 # %bb.508: - ld.d $a0, $sp, 176 + ld.d $a0, $sp, 168 beq $a0, $s0, .LBB7_510 # %bb.509: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i1221 - ld.d $a1, $sp, 192 + ld.d $a1, $sp, 184 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_510: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit1223 - fld.d $fs4, $sp, 208 # 8-byte Folded Reload - fld.d $fs3, $sp, 216 # 8-byte Folded Reload - fld.d $fs2, $sp, 224 # 8-byte Folded Reload - fld.d $fs1, $sp, 232 # 8-byte Folded Reload - fld.d $fs0, $sp, 240 # 8-byte Folded Reload - ld.d $s8, $sp, 248 # 8-byte Folded Reload - ld.d $s7, $sp, 256 # 8-byte Folded Reload - ld.d $s6, $sp, 264 # 8-byte Folded Reload - ld.d $s5, $sp, 272 # 8-byte Folded Reload - ld.d $s4, $sp, 280 # 8-byte Folded Reload - ld.d $s3, $sp, 288 # 8-byte Folded Reload - ld.d $s2, $sp, 296 # 8-byte Folded Reload - ld.d $s1, $sp, 304 # 8-byte Folded Reload - ld.d $s0, $sp, 312 # 8-byte Folded Reload - ld.d $fp, $sp, 320 # 8-byte Folded Reload - ld.d $ra, $sp, 328 # 8-byte Folded Reload - addi.d $sp, $sp, 336 + fld.d $fs4, $sp, 200 # 8-byte Folded Reload + fld.d $fs3, $sp, 208 # 8-byte Folded Reload + fld.d $fs2, $sp, 216 # 8-byte Folded Reload + fld.d $fs1, $sp, 224 # 8-byte Folded Reload + fld.d $fs0, $sp, 232 # 8-byte Folded Reload + ld.d $s7, $sp, 240 # 8-byte Folded Reload + ld.d $s6, $sp, 248 # 8-byte Folded Reload + ld.d $s5, $sp, 256 # 8-byte Folded Reload + ld.d $s4, $sp, 264 # 8-byte Folded Reload + ld.d $s3, $sp, 272 # 8-byte Folded Reload + ld.d $s2, $sp, 280 # 8-byte Folded Reload + ld.d $s1, $sp, 288 # 8-byte Folded Reload + ld.d $s0, $sp, 296 # 8-byte Folded Reload + ld.d $fp, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 312 # 8-byte Folded Reload + addi.d $sp, $sp, 320 ret .LBB7_511: .Ltmp911: # EH_LABEL - addi.d $a0, $sp, 168 + addi.d $a0, $sp, 160 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp912: # EH_LABEL # %bb.512: .LBB7_513: .Ltmp908: # EH_LABEL - addi.d $a0, $sp, 64 + addi.d $a0, $sp, 56 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp909: # EH_LABEL # %bb.514: .LBB7_515: .Ltmp905: # EH_LABEL - addi.d $a0, $sp, 56 + addi.d $a0, $sp, 48 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp906: # EH_LABEL # %bb.516: .LBB7_517: .Ltmp902: # EH_LABEL - addi.d $a0, $sp, 48 + addi.d $a0, $sp, 40 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp903: # EH_LABEL # %bb.518: .LBB7_519: .Ltmp899: # EH_LABEL - addi.d $a0, $sp, 40 + addi.d $a0, $sp, 32 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp900: # EH_LABEL # %bb.520: .LBB7_521: .Ltmp896: # EH_LABEL - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp897: # EH_LABEL @@ -6910,8 +6951,8 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results beqz $a0, .LBB7_90 # %bb.526: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s8, 0 + addi.d $s7, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s7, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -6926,8 +6967,8 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results beqz $a0, .LBB7_175 # %bb.528: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s8, 0 + addi.d $s7, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s7, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -6942,8 +6983,8 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results beqz $a0, .LBB7_260 # %bb.530: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s8, 0 + addi.d $s7, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s7, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -6958,8 +6999,8 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results beqz $a0, .LBB7_345 # %bb.532: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s7, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s7, 0 + addi.d $s6, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s6, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -6974,8 +7015,8 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results beqz $a0, .LBB7_430 # %bb.534: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s8, 0 + addi.d $s7, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s7, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -6984,73 +7025,73 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results .LBB7_535: .Ltmp878: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_599 b .LBB7_607 .LBB7_536: .Ltmp789: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_611 b .LBB7_616 .LBB7_537: .Ltmp700: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_620 b .LBB7_625 .LBB7_538: .Ltmp611: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_629 b .LBB7_634 .LBB7_539: .Ltmp522: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_638 b .LBB7_643 .LBB7_540: .Ltmp433: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 bne $a0, $s1, .LBB7_647 b .LBB7_648 .LBB7_541: .Ltmp871: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_599 b .LBB7_607 .LBB7_542: .Ltmp782: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_611 b .LBB7_616 .LBB7_543: .Ltmp693: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_620 b .LBB7_625 .LBB7_544: .Ltmp604: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_629 b .LBB7_634 .LBB7_545: .Ltmp515: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_638 b .LBB7_643 .LBB7_546: .Ltmp426: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 bne $a0, $s1, .LBB7_647 b .LBB7_648 .LBB7_547: @@ -7074,37 +7115,37 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results .LBB7_553: .Ltmp866: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_599 b .LBB7_607 .LBB7_554: .Ltmp777: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_611 b .LBB7_616 .LBB7_555: .Ltmp688: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_620 b .LBB7_625 .LBB7_556: .Ltmp599: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_629 b .LBB7_634 .LBB7_557: .Ltmp510: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_638 b .LBB7_643 .LBB7_558: .Ltmp421: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 bne $a0, $s1, .LBB7_647 b .LBB7_648 .LBB7_559: @@ -7128,37 +7169,37 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results .LBB7_565: .Ltmp859: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_599 b .LBB7_607 .LBB7_566: .Ltmp770: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_611 b .LBB7_616 .LBB7_567: .Ltmp681: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_620 b .LBB7_625 .LBB7_568: .Ltmp592: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_629 b .LBB7_634 .LBB7_569: .Ltmp503: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_638 b .LBB7_643 .LBB7_570: .Ltmp414: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 bne $a0, $s1, .LBB7_647 b .LBB7_648 .LBB7_571: @@ -7184,7 +7225,7 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results .LBB7_578: move $fp, $a0 fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 bcnez $fcc0, .LBB7_600 b .LBB7_650 .LBB7_579: @@ -7234,24 +7275,24 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results b .LBB7_600 .LBB7_596: .Ltmp892: # EH_LABEL - ld.d $a2, $sp, 72 + ld.d $a2, $sp, 64 move $fp, $a0 bne $a2, $s3, .LBB7_603 # %bb.597: - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s2, .LBB7_605 .LBB7_598: - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 bne $a0, $s1, .LBB7_607 .LBB7_599: fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 bceqz $fcc0, .LBB7_650 .LBB7_600: - ld.d $a0, $sp, 176 + ld.d $a0, $sp, 168 beq $a0, $s0, .LBB7_602 # %bb.601: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i1224 - ld.d $a1, $sp, 192 + ld.d $a1, $sp, 184 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -7260,253 +7301,253 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results pcaddu18i $ra, %call36(_Unwind_Resume) jirl $ra, $ra, 0 .LBB7_603: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i1212 - ld.d $a0, $sp, 88 + ld.d $a0, $sp, 80 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_598 b .LBB7_605 .LBB7_604: .Ltmp885: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_598 .LBB7_605: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i1215 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_599 b .LBB7_607 .LBB7_606: .Ltmp850: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_599 .LBB7_607: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i1218 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 144 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 bcnez $fcc0, .LBB7_600 b .LBB7_650 .LBB7_608: .Ltmp803: # EH_LABEL - ld.d $a2, $sp, 72 + ld.d $a2, $sp, 64 move $fp, $a0 bne $a2, $s3, .LBB7_612 # %bb.609: - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s2, .LBB7_614 .LBB7_610: - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 bne $a0, $s1, .LBB7_616 .LBB7_611: - addi.d $a0, $sp, 40 + addi.d $a0, $sp, 32 b .LBB7_649 .LBB7_612: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i1060 - ld.d $a0, $sp, 88 + ld.d $a0, $sp, 80 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_610 b .LBB7_614 .LBB7_613: .Ltmp796: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_610 .LBB7_614: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i1063 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_611 b .LBB7_616 .LBB7_615: .Ltmp761: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_611 .LBB7_616: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i1066 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 144 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - addi.d $a0, $sp, 40 + addi.d $a0, $sp, 32 b .LBB7_649 .LBB7_617: .Ltmp714: # EH_LABEL - ld.d $a2, $sp, 72 + ld.d $a2, $sp, 64 move $fp, $a0 bne $a2, $s3, .LBB7_621 # %bb.618: - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s2, .LBB7_623 .LBB7_619: - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 bne $a0, $s1, .LBB7_625 .LBB7_620: - addi.d $a0, $sp, 48 + addi.d $a0, $sp, 40 b .LBB7_649 .LBB7_621: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i908 - ld.d $a0, $sp, 88 + ld.d $a0, $sp, 80 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_619 b .LBB7_623 .LBB7_622: .Ltmp707: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_619 .LBB7_623: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i911 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_620 b .LBB7_625 .LBB7_624: .Ltmp672: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_620 .LBB7_625: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i914 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 144 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - addi.d $a0, $sp, 48 + addi.d $a0, $sp, 40 b .LBB7_649 .LBB7_626: .Ltmp625: # EH_LABEL - ld.d $a2, $sp, 72 + ld.d $a2, $sp, 64 move $fp, $a0 bne $a2, $s3, .LBB7_630 # %bb.627: - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s2, .LBB7_632 .LBB7_628: - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 bne $a0, $s1, .LBB7_634 .LBB7_629: - addi.d $a0, $sp, 56 + addi.d $a0, $sp, 48 b .LBB7_649 .LBB7_630: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i756 - ld.d $a0, $sp, 88 + ld.d $a0, $sp, 80 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_628 b .LBB7_632 .LBB7_631: .Ltmp618: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_628 .LBB7_632: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i759 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_629 b .LBB7_634 .LBB7_633: .Ltmp583: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_629 .LBB7_634: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i762 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 144 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - addi.d $a0, $sp, 56 + addi.d $a0, $sp, 48 b .LBB7_649 .LBB7_635: .Ltmp536: # EH_LABEL - ld.d $a2, $sp, 72 + ld.d $a2, $sp, 64 move $fp, $a0 bne $a2, $s3, .LBB7_639 # %bb.636: - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s2, .LBB7_641 .LBB7_637: - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 bne $a0, $s1, .LBB7_643 .LBB7_638: - addi.d $a0, $sp, 64 + addi.d $a0, $sp, 56 b .LBB7_649 .LBB7_639: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i604 - ld.d $a0, $sp, 88 + ld.d $a0, $sp, 80 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_637 b .LBB7_641 .LBB7_640: .Ltmp529: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_637 .LBB7_641: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i607 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_638 b .LBB7_643 .LBB7_642: .Ltmp494: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_638 .LBB7_643: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i610 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 144 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - addi.d $a0, $sp, 64 + addi.d $a0, $sp, 56 b .LBB7_649 .LBB7_644: .Ltmp447: # EH_LABEL - ld.d $a2, $sp, 72 + ld.d $a2, $sp, 64 move $fp, $a0 bne $a2, $s3, .LBB7_652 # %bb.645: - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s2, .LBB7_654 .LBB7_646: - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 beq $a0, $s1, .LBB7_648 .LBB7_647: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i458 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 144 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_648: - addi.d $a0, $sp, 168 + addi.d $a0, $sp, 160 .LBB7_649: - ld.d $a1, $sp, 24 + ld.d $a1, $sp, 16 movgr2cf $fcc0, $a1 bceqz $fcc0, .LBB7_600 .LBB7_650: # %.invoke @@ -7516,31 +7557,31 @@ _Z16CheckTabularRateRK7Results: # @_Z16CheckTabularRateRK7Results .Ltmp894: # EH_LABEL # %bb.651: # %.cont .LBB7_652: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i452 - ld.d $a0, $sp, 88 + ld.d $a0, $sp, 80 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_646 b .LBB7_654 .LBB7_653: .Ltmp440: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s2, .LBB7_646 .LBB7_654: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i455 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 bne $a0, $s1, .LBB7_647 b .LBB7_648 .LBB7_655: .Ltmp405: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 128 bne $a0, $s1, .LBB7_647 b .LBB7_648 .LBB7_656: diff --git a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/user_counters_test.dir/user_counters_test.s b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/user_counters_test.dir/user_counters_test.s index 12f113db..504c39cb 100644 --- a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/user_counters_test.dir/user_counters_test.s +++ b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/user_counters_test.dir/user_counters_test.s @@ -438,18 +438,8 @@ _ZNSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEN9benchmark7Counte .size _ZNSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEN9benchmark7CounterESt4lessIS5_ESaISt4pairIKS5_S7_EEEixEOS5_, .Lfunc_end2-_ZNSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEN9benchmark7CounterESt4lessIS5_ESaISt4pairIKS5_S7_EEEixEOS5_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z11CheckSimpleRK7Results -.LCPI3_0: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI3_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI3_2: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI3_3: - .dword 0x4059000000000000 # double 100 .text - .hidden _Z11CheckSimpleRK7Results + .hidden _Z11CheckSimpleRK7Results # -- Begin function _Z11CheckSimpleRK7Results .globl _Z11CheckSimpleRK7Results .p2align 5 .type _Z11CheckSimpleRK7Results,@function @@ -815,12 +805,15 @@ _Z11CheckSimpleRK7Results: # @_Z11CheckSimpleRK7Results .Ltmp62: # EH_LABEL # %bb.54: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit vldi $vr1, -896 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI3_0) fmadd.d $fa0, $fs0, $fa1, $fa0 fabs.d $fs3, $fa0 fadd.d $fs2, $fs0, $fs0 - fmul.d $fs1, $fs2, $fa2 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa0, $a0 + fmul.d $fs1, $fs2, $fa0 fcmp.cule.d $fcc0, $fs1, $fs3 bceqz $fcc0, .LBB3_57 # %bb.55: @@ -1147,8 +1140,11 @@ _Z11CheckSimpleRK7Results: # @_Z11CheckSimpleRK7Results beqz $a0, .LBB3_119 # %bb.114: .Ltmp129: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI3_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI3_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp130: # EH_LABEL @@ -1229,15 +1225,20 @@ _Z11CheckSimpleRK7Results: # @_Z11CheckSimpleRK7Results # %bb.126: vldi $vr1, -896 fmadd.d $fa0, $fs0, $fa1, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI3_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI3_2) - fabs.d $fa2, $fs2 - pcalau12i $a1, %pc_hi20(.LCPI3_3) - fld.d $fa3, $a1, %pc_lo12(.LCPI3_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs2, $fcc0 + fabs.d $fa1, $fs2 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs2, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp145: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -2210,22 +2211,8 @@ GCC_except_table7: .Lcst_end2: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z22CheckBytesAndItemsPSecRK7Results -.LCPI8_0: - .dword 0x4076c00000000000 # double 364 -.LCPI8_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI8_2: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI8_3: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI8_4: - .dword 0x4059000000000000 # double 100 -.LCPI8_5: - .dword 0x4062c00000000000 # double 150 .text - .hidden _Z22CheckBytesAndItemsPSecRK7Results + .hidden _Z22CheckBytesAndItemsPSecRK7Results # -- Begin function _Z22CheckBytesAndItemsPSecRK7Results .globl _Z22CheckBytesAndItemsPSecRK7Results .p2align 5 .type _Z22CheckBytesAndItemsPSecRK7Results,@function @@ -2913,14 +2900,19 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp287: # EH_LABEL # %bb.108: - pcalau12i $a0, %pc_hi20(.LCPI8_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI8_0) fmul.d $fs2, $fs0, $fs1 - pcalau12i $a0, %pc_hi20(.LCPI8_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI8_1) + ori $a0, $zero, 0 + lu32i.d $a0, 442368 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa1, $a0 fdiv.d $fs1, $fa1, $fs2 fsub.d $fa0, $fa0, $fs1 fabs.d $fs4, $fa0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 fmul.d $fs0, $fs1, $fs3 fcmp.cule.d $fcc0, $fs0, $fs4 movcf2gr $a0, $fcc0 @@ -2940,17 +2932,17 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp290: # EH_LABEL # %bb.110: - ld.d $s6, $sp, 104 + ld.d $s5, $sp, 104 b .LBB8_112 .LBB8_111: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s6, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s5, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB8_293 .LBB8_112: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit327 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.113: .Ltmp291: # EH_LABEL @@ -2961,7 +2953,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp292: # EH_LABEL # %bb.114: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit330 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.115: .Ltmp293: # EH_LABEL @@ -2972,7 +2964,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp294: # EH_LABEL # %bb.116: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit333 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.117: .Ltmp295: # EH_LABEL @@ -2983,7 +2975,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp296: # EH_LABEL # %bb.118: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit336 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.119: .Ltmp298: # EH_LABEL @@ -2992,7 +2984,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp299: # EH_LABEL # %bb.120: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit339 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.121: .Ltmp300: # EH_LABEL @@ -3003,7 +2995,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp301: # EH_LABEL # %bb.122: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit342 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.123: ld.d $a1, $fp, 0 @@ -3013,7 +3005,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp303: # EH_LABEL # %bb.124: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit345 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.125: .Ltmp304: # EH_LABEL @@ -3024,7 +3016,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp305: # EH_LABEL # %bb.126: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit348 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.127: .Ltmp306: # EH_LABEL @@ -3035,7 +3027,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp307: # EH_LABEL # %bb.128: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit351 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.129: .Ltmp308: # EH_LABEL @@ -3046,7 +3038,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp309: # EH_LABEL # %bb.130: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit354 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.131: .Ltmp311: # EH_LABEL @@ -3055,7 +3047,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp312: # EH_LABEL # %bb.132: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit357 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.133: .Ltmp313: # EH_LABEL @@ -3066,7 +3058,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp314: # EH_LABEL # %bb.134: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit360 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.135: .Ltmp315: # EH_LABEL @@ -3077,7 +3069,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp316: # EH_LABEL # %bb.136: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit363 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.137: .Ltmp317: # EH_LABEL @@ -3088,7 +3080,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp318: # EH_LABEL # %bb.138: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.139: .Ltmp319: # EH_LABEL @@ -3099,7 +3091,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp320: # EH_LABEL # %bb.140: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit368 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.141: .Ltmp321: # EH_LABEL @@ -3110,7 +3102,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp322: # EH_LABEL # %bb.142: # %_ZN9benchmark8internallsIA17_cEERNS0_7LogTypeES4_RKT_.exit - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_144 # %bb.143: .Ltmp323: # EH_LABEL @@ -3147,7 +3139,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp330: # EH_LABEL # %bb.146: - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_174 # %bb.147: .Ltmp331: # EH_LABEL @@ -3155,7 +3147,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp332: # EH_LABEL # %bb.148: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_174 # %bb.149: .Ltmp333: # EH_LABEL @@ -3166,7 +3158,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp334: # EH_LABEL # %bb.150: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit382 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_174 # %bb.151: .Ltmp336: # EH_LABEL @@ -3175,7 +3167,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp337: # EH_LABEL # %bb.152: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit385 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_174 # %bb.153: .Ltmp338: # EH_LABEL @@ -3186,7 +3178,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp339: # EH_LABEL # %bb.154: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit388 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_174 # %bb.155: .Ltmp340: # EH_LABEL @@ -3197,7 +3189,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp341: # EH_LABEL # %bb.156: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit391 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_174 # %bb.157: .Ltmp342: # EH_LABEL @@ -3208,7 +3200,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp343: # EH_LABEL # %bb.158: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit394 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_174 # %bb.159: .Ltmp345: # EH_LABEL @@ -3217,7 +3209,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp346: # EH_LABEL # %bb.160: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit397 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_174 # %bb.161: .Ltmp347: # EH_LABEL @@ -3228,7 +3220,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp348: # EH_LABEL # %bb.162: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit400 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_174 # %bb.163: .Ltmp349: # EH_LABEL @@ -3239,7 +3231,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp350: # EH_LABEL # %bb.164: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_174 # %bb.165: .Ltmp352: # EH_LABEL @@ -3248,7 +3240,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp353: # EH_LABEL # %bb.166: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit405 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_174 # %bb.167: .Ltmp354: # EH_LABEL @@ -3259,17 +3251,20 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp355: # EH_LABEL # %bb.168: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit408 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_174 # %bb.169: .Ltmp357: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI8_2) - fld.d $fa0, $a1, %pc_lo12(.LCPI8_2) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp358: # EH_LABEL # %bb.170: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit411 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_174 # %bb.171: .Ltmp359: # EH_LABEL @@ -3280,7 +3275,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp360: # EH_LABEL # %bb.172: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_174 # %bb.173: .Ltmp361: # EH_LABEL @@ -3318,7 +3313,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp368: # EH_LABEL # %bb.176: - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_180 # %bb.177: fsub.d $fa0, $fa0, $fs1 @@ -3327,7 +3322,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp370: # EH_LABEL # %bb.178: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit422 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_180 # %bb.179: .Ltmp371: # EH_LABEL @@ -3365,25 +3360,30 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp378: # EH_LABEL # %bb.182: - ld.d $a0, $s6, 0 - pcalau12i $s5, %pc_hi20(.LCPI8_3) - pcalau12i $s4, %pc_hi20(.LCPI8_4) + ld.d $a0, $s5, 0 + lu12i.w $s4, -487882 beqz $a0, .LBB8_186 # %bb.183: - fld.d $fa1, $s5, %pc_lo12(.LCPI8_3) fsub.d $fa0, $fa0, $fs1 - fabs.d $fa2, $fs1 - fld.d $fa3, $s4, %pc_lo12(.LCPI8_4) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs1, $fcc0 + fabs.d $fa1, $fs1 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs1, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp379: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp380: # EH_LABEL # %bb.184: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit432 - ld.d $a0, $s6, 0 + ld.d $a0, $s5, 0 beqz $a0, .LBB8_186 # %bb.185: .Ltmp381: # EH_LABEL @@ -3441,8 +3441,8 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results .Ltmp385: # EH_LABEL # %bb.196: # %.noexc462 ld.d $a1, $sp, 120 - pcalau12i $s6, %pc_hi20(.L.str.64) - vld $vr0, $s6, %pc_lo12(.L.str.64) + pcalau12i $s5, %pc_hi20(.L.str.64) + vld $vr0, $s5, %pc_lo12(.L.str.64) st.d $a0, $sp, 160 st.d $a1, $sp, 176 vst $vr0, $a0, 0 @@ -3456,8 +3456,10 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp388: # EH_LABEL # %bb.197: - pcalau12i $a0, %pc_hi20(.LCPI8_5) - fld.d $fa1, $a0, %pc_lo12(.LCPI8_5) + ori $a0, $zero, 0 + lu32i.d $a0, 180224 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa1, $a0 fdiv.d $fs1, $fa1, $fs2 fsub.d $fa0, $fa0, $fs1 fabs.d $fs2, $fa0 @@ -3478,17 +3480,17 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp391: # EH_LABEL # %bb.199: - ld.d $s7, $sp, 32 + ld.d $s6, $sp, 32 b .LBB8_201 .LBB8_200: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s7, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s6, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB8_295 .LBB8_201: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit465 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.202: .Ltmp392: # EH_LABEL @@ -3499,7 +3501,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp393: # EH_LABEL # %bb.203: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit468 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.204: .Ltmp394: # EH_LABEL @@ -3510,7 +3512,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp395: # EH_LABEL # %bb.205: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit471 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.206: .Ltmp396: # EH_LABEL @@ -3521,7 +3523,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp397: # EH_LABEL # %bb.207: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit474 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.208: .Ltmp399: # EH_LABEL @@ -3530,7 +3532,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp400: # EH_LABEL # %bb.209: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit477 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.210: .Ltmp401: # EH_LABEL @@ -3541,7 +3543,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp402: # EH_LABEL # %bb.211: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit480 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.212: ld.d $a1, $fp, 0 @@ -3551,7 +3553,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp404: # EH_LABEL # %bb.213: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit483 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.214: .Ltmp405: # EH_LABEL @@ -3562,7 +3564,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp406: # EH_LABEL # %bb.215: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit486 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.216: .Ltmp407: # EH_LABEL @@ -3573,7 +3575,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp408: # EH_LABEL # %bb.217: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit489 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.218: .Ltmp409: # EH_LABEL @@ -3584,7 +3586,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp410: # EH_LABEL # %bb.219: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit492 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.220: .Ltmp412: # EH_LABEL @@ -3593,7 +3595,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp413: # EH_LABEL # %bb.221: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit495 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.222: .Ltmp414: # EH_LABEL @@ -3604,7 +3606,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp415: # EH_LABEL # %bb.223: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit498 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.224: .Ltmp416: # EH_LABEL @@ -3615,7 +3617,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp417: # EH_LABEL # %bb.225: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit501 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.226: .Ltmp418: # EH_LABEL @@ -3626,7 +3628,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp419: # EH_LABEL # %bb.227: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit504 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.228: .Ltmp420: # EH_LABEL @@ -3637,7 +3639,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp421: # EH_LABEL # %bb.229: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit507 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.230: .Ltmp422: # EH_LABEL @@ -3648,7 +3650,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp423: # EH_LABEL # %bb.231: # %_ZN9benchmark8internallsIA17_cEERNS0_7LogTypeES4_RKT_.exit510 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_233 # %bb.232: .Ltmp424: # EH_LABEL @@ -3671,7 +3673,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results .Ltmp428: # EH_LABEL # %bb.234: # %.noexc516 ld.d $a1, $sp, 72 - vld $vr0, $s6, %pc_lo12(.L.str.64) + vld $vr0, $s5, %pc_lo12(.L.str.64) st.d $a0, $sp, 120 st.d $a1, $sp, 136 vst $vr0, $a0, 0 @@ -3685,7 +3687,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp431: # EH_LABEL # %bb.235: - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_263 # %bb.236: .Ltmp432: # EH_LABEL @@ -3693,7 +3695,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp433: # EH_LABEL # %bb.237: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit520 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_263 # %bb.238: .Ltmp434: # EH_LABEL @@ -3704,7 +3706,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp435: # EH_LABEL # %bb.239: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit523 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_263 # %bb.240: .Ltmp437: # EH_LABEL @@ -3713,7 +3715,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp438: # EH_LABEL # %bb.241: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit526 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_263 # %bb.242: .Ltmp439: # EH_LABEL @@ -3724,7 +3726,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp440: # EH_LABEL # %bb.243: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit529 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_263 # %bb.244: .Ltmp441: # EH_LABEL @@ -3735,7 +3737,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp442: # EH_LABEL # %bb.245: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit532 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_263 # %bb.246: .Ltmp443: # EH_LABEL @@ -3746,7 +3748,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp444: # EH_LABEL # %bb.247: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit535 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_263 # %bb.248: .Ltmp446: # EH_LABEL @@ -3755,7 +3757,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp447: # EH_LABEL # %bb.249: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit538 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_263 # %bb.250: .Ltmp448: # EH_LABEL @@ -3766,7 +3768,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp449: # EH_LABEL # %bb.251: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit541 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_263 # %bb.252: .Ltmp450: # EH_LABEL @@ -3777,7 +3779,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp451: # EH_LABEL # %bb.253: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit544 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_263 # %bb.254: .Ltmp453: # EH_LABEL @@ -3786,7 +3788,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp454: # EH_LABEL # %bb.255: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit547 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_263 # %bb.256: .Ltmp455: # EH_LABEL @@ -3797,17 +3799,20 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp456: # EH_LABEL # %bb.257: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit550 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_263 # %bb.258: .Ltmp458: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI8_2) - fld.d $fa0, $a1, %pc_lo12(.LCPI8_2) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp459: # EH_LABEL # %bb.259: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit553 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_263 # %bb.260: .Ltmp460: # EH_LABEL @@ -3818,7 +3823,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp461: # EH_LABEL # %bb.261: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit556 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_263 # %bb.262: .Ltmp462: # EH_LABEL @@ -3841,7 +3846,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results .Ltmp466: # EH_LABEL # %bb.264: # %.noexc562 ld.d $a1, $sp, 40 - vld $vr0, $s6, %pc_lo12(.L.str.64) + vld $vr0, $s5, %pc_lo12(.L.str.64) st.d $a0, $sp, 72 st.d $a1, $sp, 88 vst $vr0, $a0, 0 @@ -3855,7 +3860,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp469: # EH_LABEL # %bb.265: - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_269 # %bb.266: fsub.d $fa0, $fa0, $fs1 @@ -3864,7 +3869,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp471: # EH_LABEL # %bb.267: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit566 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_269 # %bb.268: .Ltmp472: # EH_LABEL @@ -3887,7 +3892,7 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results .Ltmp476: # EH_LABEL # %bb.270: # %.noexc572 ld.d $a1, $sp, 192 - vld $vr0, $s6, %pc_lo12(.L.str.64) + vld $vr0, $s5, %pc_lo12(.L.str.64) st.d $a0, $sp, 40 st.d $a1, $sp, 56 vst $vr0, $a0, 0 @@ -3901,23 +3906,29 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results jirl $ra, $ra, 0 .Ltmp479: # EH_LABEL # %bb.271: - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_275 # %bb.272: - fld.d $fa1, $s5, %pc_lo12(.LCPI8_3) fsub.d $fa0, $fa0, $fs1 - fabs.d $fa2, $fs1 - fld.d $fa3, $s4, %pc_lo12(.LCPI8_4) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs1, $fcc0 + fabs.d $fa1, $fs1 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs1, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp480: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp481: # EH_LABEL # %bb.273: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit576 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB8_275 # %bb.274: .Ltmp482: # EH_LABEL @@ -4017,8 +4028,8 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results beqz $a0, .LBB8_112 # %bb.294: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s6, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s6, 0 + addi.d $s5, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s5, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -4033,8 +4044,8 @@ _Z22CheckBytesAndItemsPSecRK7Results: # @_Z22CheckBytesAndItemsPSecRK7Results beqz $a0, .LBB8_201 # %bb.296: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s7, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s7, 0 + addi.d $s6, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s6, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -5142,18 +5153,8 @@ GCC_except_table10: .Lcst_end5: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z9CheckRateRK7Results -.LCPI11_0: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI11_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI11_2: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI11_3: - .dword 0x4059000000000000 # double 100 .text - .hidden _Z9CheckRateRK7Results + .hidden _Z9CheckRateRK7Results # -- Begin function _Z9CheckRateRK7Results .globl _Z9CheckRateRK7Results .p2align 5 .type _Z9CheckRateRK7Results,@function @@ -5163,24 +5164,23 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception6 # %bb.0: # %._crit_edge.i.i - addi.d $sp, $sp, -304 - .cfi_def_cfa_offset 304 - st.d $ra, $sp, 296 # 8-byte Folded Spill - st.d $fp, $sp, 288 # 8-byte Folded Spill - st.d $s0, $sp, 280 # 8-byte Folded Spill - st.d $s1, $sp, 272 # 8-byte Folded Spill - st.d $s2, $sp, 264 # 8-byte Folded Spill - st.d $s3, $sp, 256 # 8-byte Folded Spill - st.d $s4, $sp, 248 # 8-byte Folded Spill - st.d $s5, $sp, 240 # 8-byte Folded Spill - st.d $s6, $sp, 232 # 8-byte Folded Spill - st.d $s7, $sp, 224 # 8-byte Folded Spill - st.d $s8, $sp, 216 # 8-byte Folded Spill - fst.d $fs0, $sp, 208 # 8-byte Folded Spill - fst.d $fs1, $sp, 200 # 8-byte Folded Spill - fst.d $fs2, $sp, 192 # 8-byte Folded Spill - fst.d $fs3, $sp, 184 # 8-byte Folded Spill - fst.d $fs4, $sp, 176 # 8-byte Folded Spill + addi.d $sp, $sp, -288 + .cfi_def_cfa_offset 288 + st.d $ra, $sp, 280 # 8-byte Folded Spill + st.d $fp, $sp, 272 # 8-byte Folded Spill + st.d $s0, $sp, 264 # 8-byte Folded Spill + st.d $s1, $sp, 256 # 8-byte Folded Spill + st.d $s2, $sp, 248 # 8-byte Folded Spill + st.d $s3, $sp, 240 # 8-byte Folded Spill + st.d $s4, $sp, 232 # 8-byte Folded Spill + st.d $s5, $sp, 224 # 8-byte Folded Spill + st.d $s6, $sp, 216 # 8-byte Folded Spill + st.d $s7, $sp, 208 # 8-byte Folded Spill + fst.d $fs0, $sp, 200 # 8-byte Folded Spill + fst.d $fs1, $sp, 192 # 8-byte Folded Spill + fst.d $fs2, $sp, 184 # 8-byte Folded Spill + fst.d $fs3, $sp, 176 # 8-byte Folded Spill + fst.d $fs4, $sp, 168 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -5191,12 +5191,11 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results .cfi_offset 28, -64 .cfi_offset 29, -72 .cfi_offset 30, -80 - .cfi_offset 31, -88 - .cfi_offset 56, -96 - .cfi_offset 57, -104 - .cfi_offset 58, -112 - .cfi_offset 59, -120 - .cfi_offset 60, -128 + .cfi_offset 56, -88 + .cfi_offset 57, -96 + .cfi_offset 58, -104 + .cfi_offset 59, -112 + .cfi_offset 60, -120 move $fp, $a0 pcaddu18i $ra, %call36(_ZNK7Results13NumIterationsEv) jirl $ra, $ra, 0 @@ -5206,34 +5205,37 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results pcaddu18i $ra, %call36(_ZNK7Results7GetTimeENS_13BenchmarkTimeE) jirl $ra, $ra, 0 fmov.d $fs1, $fa0 - addi.d $s0, $sp, 160 - st.d $s0, $sp, 144 + addi.d $s0, $sp, 152 + st.d $s0, $sp, 136 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $a0, $a0, %pc_lo12(.L.str.5) ld.h $s4, $a0, 0 ld.b $s5, $a0, 2 - st.h $s4, $sp, 160 - st.b $s5, $sp, 162 + st.h $s4, $sp, 152 + st.b $s5, $sp, 154 ori $a0, $zero, 3 - st.d $a0, $sp, 152 - st.b $zero, $sp, 163 + st.d $a0, $sp, 144 + st.b $zero, $sp, 155 .Ltmp511: # EH_LABEL - addi.d $a1, $sp, 144 + addi.d $a1, $sp, 136 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp512: # EH_LABEL # %bb.1: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit fmul.d $fs2, $fs0, $fs1 - pcalau12i $a0, %pc_hi20(.LCPI11_0) - fld.d $fs3, $a0, %pc_lo12(.LCPI11_0) frecip.d $fs1, $fs2 fsub.d $fa0, $fa0, $fs1 fabs.d $fs4, $fa0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 fmul.d $fs0, $fs1, $fs3 fcmp.cule.d $fcc0, $fs0, $fs4 movcf2gr $a0, $fcc0 - st.d $a0, $sp, 24 + st.d $a0, $sp, 16 bceqz $fcc0, .LBB11_4 # %bb.2: .Ltmp514: # EH_LABEL @@ -5243,13 +5245,13 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.30) pcalau12i $a0, %pc_hi20(.L__func__._Z9CheckRateRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z9CheckRateRK7Results) - addi.d $a0, $sp, 136 + addi.d $a0, $sp, 128 ori $a4, $zero, 154 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp515: # EH_LABEL # %bb.3: - ld.d $s6, $sp, 136 + ld.d $s6, $sp, 128 b .LBB11_5 .LBB11_4: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) @@ -5430,15 +5432,15 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp549: # EH_LABEL .LBB11_37: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit154 - addi.d $s1, $sp, 120 - st.d $s1, $sp, 104 - st.h $s4, $sp, 120 - st.b $s5, $sp, 122 + addi.d $s1, $sp, 112 + st.d $s1, $sp, 96 + st.h $s4, $sp, 112 + st.b $s5, $sp, 114 ori $a0, $zero, 3 - st.d $a0, $sp, 112 - st.b $zero, $sp, 123 + st.d $a0, $sp, 104 + st.b $zero, $sp, 115 .Ltmp551: # EH_LABEL - addi.d $a1, $sp, 104 + addi.d $a1, $sp, 96 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -5560,8 +5562,11 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results beqz $a0, .LBB11_66 # %bb.61: .Ltmp579: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI11_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI11_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp580: # EH_LABEL @@ -5588,15 +5593,15 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp584: # EH_LABEL .LBB11_66: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit - addi.d $s2, $sp, 88 - st.d $s2, $sp, 72 - st.h $s4, $sp, 88 - st.b $s5, $sp, 90 + addi.d $s2, $sp, 80 + st.d $s2, $sp, 64 + st.h $s4, $sp, 80 + st.b $s5, $sp, 82 ori $a0, $zero, 3 - st.d $a0, $sp, 80 - st.b $zero, $sp, 91 + st.d $a0, $sp, 72 + st.b $zero, $sp, 83 .Ltmp586: # EH_LABEL - addi.d $a1, $sp, 72 + addi.d $a1, $sp, 64 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -5622,33 +5627,38 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp591: # EH_LABEL .LBB11_71: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit209 - addi.d $s3, $sp, 56 - st.d $s3, $sp, 40 - st.h $s4, $sp, 56 - st.b $s5, $sp, 58 + addi.d $s3, $sp, 48 + st.d $s3, $sp, 32 + st.h $s4, $sp, 48 + st.b $s5, $sp, 50 ori $a0, $zero, 3 - st.d $a0, $sp, 48 - st.b $zero, $sp, 59 + st.d $a0, $sp, 40 + st.b $zero, $sp, 51 .Ltmp593: # EH_LABEL - addi.d $a1, $sp, 40 + addi.d $a1, $sp, 32 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp594: # EH_LABEL # %bb.72: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit215 ld.d $a0, $s6, 0 - pcalau12i $s5, %pc_hi20(.LCPI11_2) - pcalau12i $s4, %pc_hi20(.LCPI11_3) + lu12i.w $s4, -487882 beqz $a0, .LBB11_76 # %bb.73: - fld.d $fa1, $s5, %pc_lo12(.LCPI11_2) fsub.d $fa0, $fa0, $fs1 - fabs.d $fa2, $fs1 - fld.d $fa3, $s4, %pc_lo12(.LCPI11_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs1, $fcc0 + fabs.d $fa1, $fs1 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs1, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp595: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -5665,26 +5675,26 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp598: # EH_LABEL .LBB11_76: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit221 - ld.d $a0, $sp, 40 + ld.d $a0, $sp, 32 beq $a0, $s3, .LBB11_78 # %bb.77: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i - ld.d $a1, $sp, 56 + ld.d $a1, $sp, 48 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB11_78: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB11_80 # %bb.79: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i222 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB11_80: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit224 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB11_82 # %bb.81: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i225 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -5692,26 +5702,26 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results fcmp.clt.d $fcc0, $fs4, $fs0 bceqz $fcc0, .LBB11_171 # %bb.83: - ld.d $a0, $sp, 144 + ld.d $a0, $sp, 136 beq $a0, $s0, .LBB11_85 # %bb.84: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i237 - ld.d $a1, $sp, 160 + ld.d $a1, $sp, 152 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB11_85: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit239 - st.d $s0, $sp, 144 + st.d $s0, $sp, 136 pcalau12i $a0, %pc_hi20(.L.str.6) addi.d $a0, $a0, %pc_lo12(.L.str.6) - ld.h $s6, $a0, 0 - ld.b $s7, $a0, 2 - st.h $s6, $sp, 160 - st.b $s7, $sp, 162 + ld.h $s5, $a0, 0 + ld.b $s6, $a0, 2 + st.h $s5, $sp, 152 + st.b $s6, $sp, 154 ori $a0, $zero, 3 - st.d $a0, $sp, 152 - st.b $zero, $sp, 163 + st.d $a0, $sp, 144 + st.b $zero, $sp, 155 .Ltmp600: # EH_LABEL - addi.d $a1, $sp, 144 + addi.d $a1, $sp, 136 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -5732,23 +5742,23 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.30) pcalau12i $a0, %pc_hi20(.L__func__._Z9CheckRateRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z9CheckRateRK7Results) - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 ori $a4, $zero, 155 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp604: # EH_LABEL # %bb.88: - ld.d $s8, $sp, 32 + ld.d $s7, $sp, 24 b .LBB11_90 .LBB11_89: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s7, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB11_177 .LBB11_90: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit247 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.91: .Ltmp605: # EH_LABEL @@ -5759,7 +5769,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp606: # EH_LABEL # %bb.92: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit250 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.93: .Ltmp607: # EH_LABEL @@ -5770,7 +5780,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp608: # EH_LABEL # %bb.94: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit253 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.95: .Ltmp609: # EH_LABEL @@ -5781,7 +5791,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp610: # EH_LABEL # %bb.96: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit256 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.97: .Ltmp612: # EH_LABEL @@ -5790,7 +5800,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp613: # EH_LABEL # %bb.98: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit259 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.99: .Ltmp614: # EH_LABEL @@ -5801,7 +5811,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp615: # EH_LABEL # %bb.100: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit262 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.101: ld.d $a1, $fp, 0 @@ -5811,7 +5821,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp617: # EH_LABEL # %bb.102: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit265 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.103: .Ltmp618: # EH_LABEL @@ -5822,7 +5832,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp619: # EH_LABEL # %bb.104: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit268 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.105: .Ltmp620: # EH_LABEL @@ -5833,7 +5843,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp621: # EH_LABEL # %bb.106: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit271 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.107: .Ltmp622: # EH_LABEL @@ -5844,7 +5854,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp623: # EH_LABEL # %bb.108: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit274 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.109: .Ltmp625: # EH_LABEL @@ -5853,7 +5863,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp626: # EH_LABEL # %bb.110: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit277 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.111: .Ltmp627: # EH_LABEL @@ -5864,7 +5874,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp628: # EH_LABEL # %bb.112: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit280 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.113: .Ltmp629: # EH_LABEL @@ -5875,7 +5885,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp630: # EH_LABEL # %bb.114: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit283 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.115: .Ltmp631: # EH_LABEL @@ -5886,7 +5896,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp632: # EH_LABEL # %bb.116: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit286 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.117: .Ltmp633: # EH_LABEL @@ -5897,7 +5907,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp634: # EH_LABEL # %bb.118: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit289 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.119: .Ltmp635: # EH_LABEL @@ -5908,7 +5918,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp636: # EH_LABEL # %bb.120: # %_ZN9benchmark8internallsIA4_cEERNS0_7LogTypeES4_RKT_.exit292 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_122 # %bb.121: .Ltmp637: # EH_LABEL @@ -5919,20 +5929,20 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp638: # EH_LABEL .LBB11_122: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit295 - st.d $s1, $sp, 104 - st.h $s6, $sp, 120 - st.b $s7, $sp, 122 + st.d $s1, $sp, 96 + st.h $s5, $sp, 112 + st.b $s6, $sp, 114 ori $a0, $zero, 3 - st.d $a0, $sp, 112 - st.b $zero, $sp, 123 + st.d $a0, $sp, 104 + st.b $zero, $sp, 115 .Ltmp640: # EH_LABEL - addi.d $a1, $sp, 104 + addi.d $a1, $sp, 96 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp641: # EH_LABEL # %bb.123: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit301 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_151 # %bb.124: .Ltmp642: # EH_LABEL @@ -5940,7 +5950,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp643: # EH_LABEL # %bb.125: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit304 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_151 # %bb.126: .Ltmp644: # EH_LABEL @@ -5951,7 +5961,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp645: # EH_LABEL # %bb.127: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit307 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_151 # %bb.128: .Ltmp647: # EH_LABEL @@ -5960,7 +5970,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp648: # EH_LABEL # %bb.129: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit310 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_151 # %bb.130: .Ltmp649: # EH_LABEL @@ -5971,7 +5981,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp650: # EH_LABEL # %bb.131: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit313 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_151 # %bb.132: .Ltmp651: # EH_LABEL @@ -5982,7 +5992,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp652: # EH_LABEL # %bb.133: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit316 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_151 # %bb.134: .Ltmp653: # EH_LABEL @@ -5993,7 +6003,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp654: # EH_LABEL # %bb.135: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit319 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_151 # %bb.136: .Ltmp656: # EH_LABEL @@ -6002,7 +6012,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp657: # EH_LABEL # %bb.137: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit322 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_151 # %bb.138: .Ltmp658: # EH_LABEL @@ -6013,7 +6023,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp659: # EH_LABEL # %bb.139: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit325 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_151 # %bb.140: .Ltmp660: # EH_LABEL @@ -6024,7 +6034,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp661: # EH_LABEL # %bb.141: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit328 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_151 # %bb.142: .Ltmp663: # EH_LABEL @@ -6033,7 +6043,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp664: # EH_LABEL # %bb.143: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit331 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_151 # %bb.144: .Ltmp665: # EH_LABEL @@ -6044,17 +6054,20 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp666: # EH_LABEL # %bb.145: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit334 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_151 # %bb.146: .Ltmp668: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI11_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI11_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp669: # EH_LABEL # %bb.147: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit337 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_151 # %bb.148: .Ltmp670: # EH_LABEL @@ -6065,7 +6078,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp671: # EH_LABEL # %bb.149: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit340 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_151 # %bb.150: .Ltmp672: # EH_LABEL @@ -6076,20 +6089,20 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp673: # EH_LABEL .LBB11_151: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit343 - st.d $s2, $sp, 72 - st.h $s6, $sp, 88 - st.b $s7, $sp, 90 + st.d $s2, $sp, 64 + st.h $s5, $sp, 80 + st.b $s6, $sp, 82 ori $a0, $zero, 3 - st.d $a0, $sp, 80 - st.b $zero, $sp, 91 + st.d $a0, $sp, 72 + st.b $zero, $sp, 83 .Ltmp675: # EH_LABEL - addi.d $a1, $sp, 72 + addi.d $a1, $sp, 64 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp676: # EH_LABEL # %bb.152: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit349 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_156 # %bb.153: fsub.d $fa0, $fa0, $fs1 @@ -6098,7 +6111,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp678: # EH_LABEL # %bb.154: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit352 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_156 # %bb.155: .Ltmp679: # EH_LABEL @@ -6109,36 +6122,42 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp680: # EH_LABEL .LBB11_156: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit355 - st.d $s3, $sp, 40 - st.h $s6, $sp, 56 - st.b $s7, $sp, 58 + st.d $s3, $sp, 32 + st.h $s5, $sp, 48 + st.b $s6, $sp, 50 ori $a0, $zero, 3 - st.d $a0, $sp, 48 - st.b $zero, $sp, 59 + st.d $a0, $sp, 40 + st.b $zero, $sp, 51 .Ltmp682: # EH_LABEL - addi.d $a1, $sp, 40 + addi.d $a1, $sp, 32 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp683: # EH_LABEL # %bb.157: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit361 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_161 # %bb.158: - fld.d $fa1, $s5, %pc_lo12(.LCPI11_2) fsub.d $fa0, $fa0, $fs1 - fabs.d $fa2, $fs1 - fld.d $fa3, $s4, %pc_lo12(.LCPI11_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs1, $fcc0 + fabs.d $fa1, $fs1 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs1, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp684: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp685: # EH_LABEL # %bb.159: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit364 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB11_161 # %bb.160: .Ltmp686: # EH_LABEL @@ -6149,26 +6168,26 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results jirl $ra, $ra, 0 .Ltmp687: # EH_LABEL .LBB11_161: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit367 - ld.d $a0, $sp, 40 + ld.d $a0, $sp, 32 beq $a0, $s3, .LBB11_163 # %bb.162: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i368 - ld.d $a1, $sp, 56 + ld.d $a1, $sp, 48 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB11_163: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit370 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB11_165 # %bb.164: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i371 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB11_165: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit373 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB11_167 # %bb.166: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i374 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -6176,42 +6195,41 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results fcmp.clt.d $fcc0, $fs2, $fs0 bceqz $fcc0, .LBB11_173 # %bb.168: - ld.d $a0, $sp, 144 + ld.d $a0, $sp, 136 beq $a0, $s0, .LBB11_170 # %bb.169: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i389 - ld.d $a1, $sp, 160 + ld.d $a1, $sp, 152 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB11_170: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit391 - fld.d $fs4, $sp, 176 # 8-byte Folded Reload - fld.d $fs3, $sp, 184 # 8-byte Folded Reload - fld.d $fs2, $sp, 192 # 8-byte Folded Reload - fld.d $fs1, $sp, 200 # 8-byte Folded Reload - fld.d $fs0, $sp, 208 # 8-byte Folded Reload - ld.d $s8, $sp, 216 # 8-byte Folded Reload - ld.d $s7, $sp, 224 # 8-byte Folded Reload - ld.d $s6, $sp, 232 # 8-byte Folded Reload - ld.d $s5, $sp, 240 # 8-byte Folded Reload - ld.d $s4, $sp, 248 # 8-byte Folded Reload - ld.d $s3, $sp, 256 # 8-byte Folded Reload - ld.d $s2, $sp, 264 # 8-byte Folded Reload - ld.d $s1, $sp, 272 # 8-byte Folded Reload - ld.d $s0, $sp, 280 # 8-byte Folded Reload - ld.d $fp, $sp, 288 # 8-byte Folded Reload - ld.d $ra, $sp, 296 # 8-byte Folded Reload - addi.d $sp, $sp, 304 + fld.d $fs4, $sp, 168 # 8-byte Folded Reload + fld.d $fs3, $sp, 176 # 8-byte Folded Reload + fld.d $fs2, $sp, 184 # 8-byte Folded Reload + fld.d $fs1, $sp, 192 # 8-byte Folded Reload + fld.d $fs0, $sp, 200 # 8-byte Folded Reload + ld.d $s7, $sp, 208 # 8-byte Folded Reload + ld.d $s6, $sp, 216 # 8-byte Folded Reload + ld.d $s5, $sp, 224 # 8-byte Folded Reload + ld.d $s4, $sp, 232 # 8-byte Folded Reload + ld.d $s3, $sp, 240 # 8-byte Folded Reload + ld.d $s2, $sp, 248 # 8-byte Folded Reload + ld.d $s1, $sp, 256 # 8-byte Folded Reload + ld.d $s0, $sp, 264 # 8-byte Folded Reload + ld.d $fp, $sp, 272 # 8-byte Folded Reload + ld.d $ra, $sp, 280 # 8-byte Folded Reload + addi.d $sp, $sp, 288 ret .LBB11_171: .Ltmp695: # EH_LABEL - addi.d $a0, $sp, 136 + addi.d $a0, $sp, 128 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp696: # EH_LABEL # %bb.172: .LBB11_173: .Ltmp692: # EH_LABEL - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp693: # EH_LABEL @@ -6241,8 +6259,8 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results beqz $a0, .LBB11_90 # %bb.178: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s8, 0 + addi.d $s7, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s7, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -6251,25 +6269,25 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results .LBB11_179: .Ltmp674: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB11_203 b .LBB11_211 .LBB11_180: .Ltmp585: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB11_215 b .LBB11_216 .LBB11_181: .Ltmp667: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB11_203 b .LBB11_211 .LBB11_182: .Ltmp578: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB11_215 b .LBB11_216 .LBB11_183: @@ -6281,13 +6299,13 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results .LBB11_185: .Ltmp662: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB11_203 b .LBB11_211 .LBB11_186: .Ltmp573: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB11_215 b .LBB11_216 .LBB11_187: @@ -6299,13 +6317,13 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results .LBB11_189: .Ltmp655: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB11_203 b .LBB11_211 .LBB11_190: .Ltmp566: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB11_215 b .LBB11_216 .LBB11_191: @@ -6319,7 +6337,7 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results .LBB11_194: move $fp, $a0 fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 bcnez $fcc0, .LBB11_204 b .LBB11_217 .LBB11_195: @@ -6337,24 +6355,24 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results b .LBB11_204 .LBB11_200: .Ltmp688: # EH_LABEL - ld.d $a2, $sp, 40 + ld.d $a2, $sp, 32 move $fp, $a0 bne $a2, $s3, .LBB11_207 # %bb.201: - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 bne $a0, $s2, .LBB11_209 .LBB11_202: - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB11_211 .LBB11_203: fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 bceqz $fcc0, .LBB11_217 .LBB11_204: - ld.d $a0, $sp, 144 + ld.d $a0, $sp, 136 beq $a0, $s0, .LBB11_206 # %bb.205: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i392 - ld.d $a1, $sp, 160 + ld.d $a1, $sp, 152 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -6363,60 +6381,60 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results pcaddu18i $ra, %call36(_Unwind_Resume) jirl $ra, $ra, 0 .LBB11_207: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i380 - ld.d $a0, $sp, 56 + ld.d $a0, $sp, 48 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB11_202 b .LBB11_209 .LBB11_208: .Ltmp681: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB11_202 .LBB11_209: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i383 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB11_203 b .LBB11_211 .LBB11_210: .Ltmp646: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB11_203 .LBB11_211: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i386 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 bcnez $fcc0, .LBB11_204 b .LBB11_217 .LBB11_212: .Ltmp599: # EH_LABEL - ld.d $a2, $sp, 40 + ld.d $a2, $sp, 32 move $fp, $a0 bne $a2, $s3, .LBB11_219 # %bb.213: - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 bne $a0, $s2, .LBB11_221 .LBB11_214: - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB11_216 .LBB11_215: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i234 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB11_216: - addi.d $a0, $sp, 136 - ld.d $a1, $sp, 24 + addi.d $a0, $sp, 128 + ld.d $a1, $sp, 16 movgr2cf $fcc0, $a1 bceqz $fcc0, .LBB11_204 .LBB11_217: # %.invoke @@ -6426,31 +6444,31 @@ _Z9CheckRateRK7Results: # @_Z9CheckRateRK7Results .Ltmp690: # EH_LABEL # %bb.218: # %.cont .LBB11_219: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i228 - ld.d $a0, $sp, 56 + ld.d $a0, $sp, 48 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB11_214 b .LBB11_221 .LBB11_220: .Ltmp592: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB11_214 .LBB11_221: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i231 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB11_215 b .LBB11_216 .LBB11_222: .Ltmp557: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB11_215 b .LBB11_216 .LBB11_223: @@ -6805,24 +6823,8 @@ GCC_except_table12: .Lcst_end7: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z11CheckInvertRK7Results -.LCPI13_0: - .dword 0xc0c3880000000000 # double -1.0E+4 -.LCPI13_1: - .dword 0x3f847ae147ae147b # double 0.01 -.LCPI13_2: - .dword 0x40c3880000000000 # double 1.0E+4 -.LCPI13_3: - .dword 0x4059000000000000 # double 100 -.LCPI13_4: - .dword 0xbf1a36e2eb1c432d # double -1.0E-4 -.LCPI13_5: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI13_6: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 .text - .hidden _Z11CheckInvertRK7Results + .hidden _Z11CheckInvertRK7Results # -- Begin function _Z11CheckInvertRK7Results .globl _Z11CheckInvertRK7Results .p2align 5 .type _Z11CheckInvertRK7Results,@function @@ -6879,9 +6881,11 @@ _Z11CheckInvertRK7Results: # @_Z11CheckInvertRK7Results jirl $ra, $ra, 0 .Ltmp705: # EH_LABEL # %bb.1: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit - pcalau12i $a0, %pc_hi20(.LCPI13_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI13_0) - fadd.d $fa0, $fa0, $fs1 + ori $a0, $zero, 0 + lu32i.d $a0, 231424 + lu52i.d $a0, $a0, -1012 + movgr2fr.d $fa1, $a0 + fadd.d $fa0, $fa0, $fa1 fabs.d $fs0, $fa0 vldi $vr0, -912 fcmp.cule.d $fcc0, $fa0, $fs0 @@ -7212,8 +7216,11 @@ _Z11CheckInvertRK7Results: # @_Z11CheckInvertRK7Results beqz $a0, .LBB13_66 # %bb.61: .Ltmp772: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI13_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI13_1) + lu12i.w $a1, 293601 + ori $a1, $a1, 1147 + lu32i.d $a1, 293601 + lu52i.d $a1, $a1, 1016 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp773: # EH_LABEL @@ -7257,7 +7264,11 @@ _Z11CheckInvertRK7Results: # @_Z11CheckInvertRK7Results ld.d $a0, $s6, 0 beqz $a0, .LBB13_71 # %bb.68: - fadd.d $fa0, $fa0, $fs1 + ori $a1, $zero, 0 + lu32i.d $a1, 231424 + lu52i.d $a1, $a1, -1012 + movgr2fr.d $fa1, $a1 + fadd.d $fa0, $fa0, $fa1 .Ltmp781: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -7289,15 +7300,21 @@ _Z11CheckInvertRK7Results: # @_Z11CheckInvertRK7Results .Ltmp787: # EH_LABEL # %bb.72: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit193 ld.d $a0, $s6, 0 - pcalau12i $s4, %pc_hi20(.LCPI13_3) beqz $a0, .LBB13_76 # %bb.73: - pcalau12i $a1, %pc_hi20(.LCPI13_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI13_2) - fld.d $fa2, $s4, %pc_lo12(.LCPI13_3) - fadd.d $fa0, $fa0, $fs1 + ori $a1, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, 231424 + lu52i.d $a3, $a2, -1012 + movgr2fr.d $fa1, $a3 + fadd.d $fa0, $fa0, $fa1 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fa1, $a2 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa2 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp788: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -7367,12 +7384,18 @@ _Z11CheckInvertRK7Results: # @_Z11CheckInvertRK7Results jirl $ra, $ra, 0 .Ltmp794: # EH_LABEL # %bb.86: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit223 - pcalau12i $a0, %pc_hi20(.LCPI13_4) - fld.d $fs2, $a0, %pc_lo12(.LCPI13_4) - pcalau12i $a0, %pc_hi20(.LCPI13_5) - fld.d $fs0, $a0, %pc_lo12(.LCPI13_5) + lu12i.w $a0, -85564 + ori $s4, $a0, 813 + lu32i.d $s4, -379166 + lu52i.d $a0, $s4, -1039 + movgr2fr.d $fs2, $a0 fadd.d $fa0, $fa0, $fs2 fabs.d $fs1, $fa0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs0, $a0 fcmp.cule.d $fcc0, $fs0, $fs1 bceqz $fcc0, .LBB13_89 # %bb.87: @@ -7606,8 +7629,8 @@ _Z11CheckInvertRK7Results: # @_Z11CheckInvertRK7Results beqz $a0, .LBB13_151 # %bb.128: .Ltmp840: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI13_6) - fld.d $fa0, $a1, %pc_lo12(.LCPI13_6) + lu52i.d $a1, $s4, 1009 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp841: # EH_LABEL @@ -7700,8 +7723,11 @@ _Z11CheckInvertRK7Results: # @_Z11CheckInvertRK7Results beqz $a0, .LBB13_151 # %bb.146: .Ltmp861: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI13_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI13_1) + lu12i.w $a1, 293601 + ori $a1, $a1, 1147 + lu32i.d $a1, 293601 + lu52i.d $a1, $a1, 1016 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp862: # EH_LABEL @@ -7777,12 +7803,15 @@ _Z11CheckInvertRK7Results: # @_Z11CheckInvertRK7Results ld.d $a0, $s7, 0 beqz $a0, .LBB13_161 # %bb.158: - pcalau12i $a1, %pc_hi20(.LCPI13_6) - fld.d $fa1, $a1, %pc_lo12(.LCPI13_6) - fld.d $fa2, $s4, %pc_lo12(.LCPI13_3) fadd.d $fa0, $fa0, $fs2 + lu52i.d $a1, $s4, 1009 + movgr2fr.d $fa1, $a1 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa2 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp877: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -8444,20 +8473,8 @@ GCC_except_table14: .Lcst_end9: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z17CheckInvertedRateRK7Results -.LCPI15_0: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI15_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI15_2: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI15_3: - .dword 0x4059000000000000 # double 100 -.LCPI15_4: - .dword 0x3f20000000000000 # double 1.220703125E-4 .text - .hidden _Z17CheckInvertedRateRK7Results + .hidden _Z17CheckInvertedRateRK7Results # -- Begin function _Z17CheckInvertedRateRK7Results .globl _Z17CheckInvertedRateRK7Results .p2align 5 .type _Z17CheckInvertedRateRK7Results,@function @@ -8479,11 +8496,10 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results st.d $s5, $sp, 224 # 8-byte Folded Spill st.d $s6, $sp, 216 # 8-byte Folded Spill st.d $s7, $sp, 208 # 8-byte Folded Spill - st.d $s8, $sp, 200 # 8-byte Folded Spill - fst.d $fs0, $sp, 192 # 8-byte Folded Spill - fst.d $fs1, $sp, 184 # 8-byte Folded Spill - fst.d $fs2, $sp, 176 # 8-byte Folded Spill - fst.d $fs3, $sp, 168 # 8-byte Folded Spill + fst.d $fs0, $sp, 200 # 8-byte Folded Spill + fst.d $fs1, $sp, 192 # 8-byte Folded Spill + fst.d $fs2, $sp, 184 # 8-byte Folded Spill + fst.d $fs3, $sp, 176 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -8494,11 +8510,10 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results .cfi_offset 28, -64 .cfi_offset 29, -72 .cfi_offset 30, -80 - .cfi_offset 31, -88 - .cfi_offset 56, -96 - .cfi_offset 57, -104 - .cfi_offset 58, -112 - .cfi_offset 59, -120 + .cfi_offset 56, -88 + .cfi_offset 57, -96 + .cfi_offset 58, -104 + .cfi_offset 59, -112 move $fp, $a0 pcaddu18i $ra, %call36(_ZNK7Results13NumIterationsEv) jirl $ra, $ra, 0 @@ -8508,33 +8523,36 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results pcaddu18i $ra, %call36(_ZNK7Results7GetTimeENS_13BenchmarkTimeE) jirl $ra, $ra, 0 fmov.d $fs1, $fa0 - addi.d $s0, $sp, 152 - st.d $s0, $sp, 136 + addi.d $s0, $sp, 160 + st.d $s0, $sp, 144 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $a0, $a0, %pc_lo12(.L.str.5) ld.h $s4, $a0, 0 ld.b $s5, $a0, 2 - st.h $s4, $sp, 152 - st.b $s5, $sp, 154 + st.h $s4, $sp, 160 + st.b $s5, $sp, 162 ori $a0, $zero, 3 - st.d $a0, $sp, 144 - st.b $zero, $sp, 155 + st.d $a0, $sp, 152 + st.b $zero, $sp, 163 .Ltmp897: # EH_LABEL - addi.d $a1, $sp, 136 + addi.d $a1, $sp, 144 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp898: # EH_LABEL # %bb.1: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit - pcalau12i $a0, %pc_hi20(.LCPI15_0) - fld.d $fs3, $a0, %pc_lo12(.LCPI15_0) fmul.d $fs0, $fs0, $fs1 fsub.d $fa0, $fa0, $fs0 fabs.d $fs2, $fa0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 fmul.d $fs1, $fs0, $fs3 fcmp.cule.d $fcc0, $fs1, $fs2 movcf2gr $a0, $fcc0 - st.d $a0, $sp, 16 + st.d $a0, $sp, 24 bceqz $fcc0, .LBB15_4 # %bb.2: .Ltmp900: # EH_LABEL @@ -8544,13 +8562,13 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.30) pcalau12i $a0, %pc_hi20(.L__func__._Z17CheckInvertedRateRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z17CheckInvertedRateRK7Results) - addi.d $a0, $sp, 128 + addi.d $a0, $sp, 136 ori $a4, $zero, 242 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp901: # EH_LABEL # %bb.3: - ld.d $s6, $sp, 128 + ld.d $s6, $sp, 136 b .LBB15_5 .LBB15_4: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) @@ -8731,15 +8749,15 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp935: # EH_LABEL .LBB15_37: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit134 - addi.d $s1, $sp, 112 - st.d $s1, $sp, 96 - st.h $s4, $sp, 112 - st.b $s5, $sp, 114 + addi.d $s1, $sp, 120 + st.d $s1, $sp, 104 + st.h $s4, $sp, 120 + st.b $s5, $sp, 122 ori $a0, $zero, 3 - st.d $a0, $sp, 104 - st.b $zero, $sp, 115 + st.d $a0, $sp, 112 + st.b $zero, $sp, 123 .Ltmp937: # EH_LABEL - addi.d $a1, $sp, 96 + addi.d $a1, $sp, 104 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -8861,8 +8879,11 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results beqz $a0, .LBB15_66 # %bb.61: .Ltmp964: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI15_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI15_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp965: # EH_LABEL @@ -8889,15 +8910,15 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp969: # EH_LABEL .LBB15_66: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit - addi.d $s2, $sp, 80 - st.d $s2, $sp, 64 - st.h $s4, $sp, 80 - st.b $s5, $sp, 82 + addi.d $s2, $sp, 88 + st.d $s2, $sp, 72 + st.h $s4, $sp, 88 + st.b $s5, $sp, 90 ori $a0, $zero, 3 - st.d $a0, $sp, 72 - st.b $zero, $sp, 83 + st.d $a0, $sp, 80 + st.b $zero, $sp, 91 .Ltmp971: # EH_LABEL - addi.d $a1, $sp, 64 + addi.d $a1, $sp, 72 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -8923,33 +8944,38 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp976: # EH_LABEL .LBB15_71: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit189 - addi.d $s3, $sp, 48 - st.d $s3, $sp, 32 - st.h $s4, $sp, 48 - st.b $s5, $sp, 50 + addi.d $s3, $sp, 56 + st.d $s3, $sp, 40 + st.h $s4, $sp, 56 + st.b $s5, $sp, 58 ori $a0, $zero, 3 - st.d $a0, $sp, 40 - st.b $zero, $sp, 51 + st.d $a0, $sp, 48 + st.b $zero, $sp, 59 .Ltmp978: # EH_LABEL - addi.d $a1, $sp, 32 + addi.d $a1, $sp, 40 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp979: # EH_LABEL # %bb.72: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit195 ld.d $a0, $s6, 0 - pcalau12i $s5, %pc_hi20(.LCPI15_2) - pcalau12i $s4, %pc_hi20(.LCPI15_3) + lu12i.w $s4, -487882 beqz $a0, .LBB15_76 # %bb.73: - fld.d $fa1, $s5, %pc_lo12(.LCPI15_2) fsub.d $fa0, $fa0, $fs0 - fabs.d $fa2, $fs0 - fld.d $fa3, $s4, %pc_lo12(.LCPI15_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs0, $fcc0 + fabs.d $fa1, $fs0 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs0, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp980: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -8966,26 +8992,26 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp983: # EH_LABEL .LBB15_76: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit201 - ld.d $a0, $sp, 32 + ld.d $a0, $sp, 40 beq $a0, $s3, .LBB15_78 # %bb.77: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i - ld.d $a1, $sp, 48 + ld.d $a1, $sp, 56 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB15_78: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB15_80 # %bb.79: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i202 - ld.d $a1, $sp, 80 + ld.d $a1, $sp, 88 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB15_80: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit204 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB15_82 # %bb.81: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i205 - ld.d $a1, $sp, 112 + ld.d $a1, $sp, 120 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -8993,33 +9019,33 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results fcmp.clt.d $fcc0, $fs2, $fs1 bceqz $fcc0, .LBB15_171 # %bb.83: - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 144 beq $a0, $s0, .LBB15_85 # %bb.84: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i217 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 160 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB15_85: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit219 - st.d $s0, $sp, 136 + st.d $s0, $sp, 144 pcalau12i $a0, %pc_hi20(.L.str.6) addi.d $a0, $a0, %pc_lo12(.L.str.6) - ld.h $s6, $a0, 0 - ld.b $s7, $a0, 2 - st.h $s6, $sp, 152 - st.b $s7, $sp, 154 + ld.h $s5, $a0, 0 + ld.b $s6, $a0, 2 + st.h $s5, $sp, 160 + st.b $s6, $sp, 162 ori $a0, $zero, 3 - st.d $a0, $sp, 144 - st.b $zero, $sp, 155 + st.d $a0, $sp, 152 + st.b $zero, $sp, 163 .Ltmp985: # EH_LABEL - addi.d $a1, $sp, 136 + addi.d $a1, $sp, 144 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp986: # EH_LABEL # %bb.86: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit225 - pcalau12i $a0, %pc_hi20(.LCPI15_4) - fld.d $fa1, $a0, %pc_lo12(.LCPI15_4) + lu52i.d $a0, $zero, 1010 + movgr2fr.d $fa1, $a0 fmul.d $fs1, $fs0, $fa1 fsub.d $fa0, $fa0, $fs1 fabs.d $fs2, $fa0 @@ -9034,23 +9060,23 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.30) pcalau12i $a0, %pc_hi20(.L__func__._Z17CheckInvertedRateRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z17CheckInvertedRateRK7Results) - addi.d $a0, $sp, 24 + addi.d $a0, $sp, 32 ori $a4, $zero, 243 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp989: # EH_LABEL # %bb.88: - ld.d $s8, $sp, 24 + ld.d $s7, $sp, 32 b .LBB15_90 .LBB15_89: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s7, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB15_177 .LBB15_90: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit227 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.91: .Ltmp990: # EH_LABEL @@ -9061,7 +9087,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp991: # EH_LABEL # %bb.92: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit230 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.93: .Ltmp992: # EH_LABEL @@ -9072,7 +9098,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp993: # EH_LABEL # %bb.94: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit233 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.95: .Ltmp994: # EH_LABEL @@ -9083,7 +9109,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp995: # EH_LABEL # %bb.96: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit236 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.97: .Ltmp997: # EH_LABEL @@ -9092,7 +9118,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp998: # EH_LABEL # %bb.98: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit239 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.99: .Ltmp999: # EH_LABEL @@ -9103,7 +9129,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1000: # EH_LABEL # %bb.100: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit242 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.101: ld.d $a1, $fp, 0 @@ -9113,7 +9139,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1002: # EH_LABEL # %bb.102: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit245 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.103: .Ltmp1003: # EH_LABEL @@ -9124,7 +9150,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1004: # EH_LABEL # %bb.104: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit248 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.105: .Ltmp1005: # EH_LABEL @@ -9135,7 +9161,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1006: # EH_LABEL # %bb.106: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit251 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.107: .Ltmp1007: # EH_LABEL @@ -9146,7 +9172,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1008: # EH_LABEL # %bb.108: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit254 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.109: .Ltmp1010: # EH_LABEL @@ -9155,7 +9181,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1011: # EH_LABEL # %bb.110: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit257 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.111: .Ltmp1012: # EH_LABEL @@ -9166,7 +9192,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1013: # EH_LABEL # %bb.112: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit260 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.113: .Ltmp1014: # EH_LABEL @@ -9177,7 +9203,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1015: # EH_LABEL # %bb.114: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit263 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.115: .Ltmp1016: # EH_LABEL @@ -9188,7 +9214,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1017: # EH_LABEL # %bb.116: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit266 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.117: .Ltmp1018: # EH_LABEL @@ -9199,7 +9225,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1019: # EH_LABEL # %bb.118: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit269 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.119: .Ltmp1020: # EH_LABEL @@ -9210,7 +9236,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1021: # EH_LABEL # %bb.120: # %_ZN9benchmark8internallsIA4_cEERNS0_7LogTypeES4_RKT_.exit272 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_122 # %bb.121: .Ltmp1022: # EH_LABEL @@ -9221,20 +9247,20 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1023: # EH_LABEL .LBB15_122: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit275 - st.d $s1, $sp, 96 - st.h $s6, $sp, 112 - st.b $s7, $sp, 114 + st.d $s1, $sp, 104 + st.h $s5, $sp, 120 + st.b $s6, $sp, 122 ori $a0, $zero, 3 - st.d $a0, $sp, 104 - st.b $zero, $sp, 115 + st.d $a0, $sp, 112 + st.b $zero, $sp, 123 .Ltmp1025: # EH_LABEL - addi.d $a1, $sp, 96 + addi.d $a1, $sp, 104 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp1026: # EH_LABEL # %bb.123: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit281 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_151 # %bb.124: .Ltmp1027: # EH_LABEL @@ -9242,7 +9268,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1028: # EH_LABEL # %bb.125: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit284 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_151 # %bb.126: .Ltmp1029: # EH_LABEL @@ -9253,7 +9279,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1030: # EH_LABEL # %bb.127: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit287 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_151 # %bb.128: .Ltmp1032: # EH_LABEL @@ -9262,7 +9288,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1033: # EH_LABEL # %bb.129: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit290 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_151 # %bb.130: .Ltmp1034: # EH_LABEL @@ -9273,7 +9299,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1035: # EH_LABEL # %bb.131: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit293 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_151 # %bb.132: .Ltmp1036: # EH_LABEL @@ -9284,7 +9310,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1037: # EH_LABEL # %bb.133: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit296 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_151 # %bb.134: .Ltmp1038: # EH_LABEL @@ -9295,7 +9321,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1039: # EH_LABEL # %bb.135: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit299 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_151 # %bb.136: .Ltmp1041: # EH_LABEL @@ -9304,7 +9330,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1042: # EH_LABEL # %bb.137: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit302 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_151 # %bb.138: .Ltmp1043: # EH_LABEL @@ -9315,7 +9341,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1044: # EH_LABEL # %bb.139: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit305 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_151 # %bb.140: .Ltmp1045: # EH_LABEL @@ -9326,7 +9352,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1046: # EH_LABEL # %bb.141: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit308 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_151 # %bb.142: .Ltmp1048: # EH_LABEL @@ -9335,7 +9361,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1049: # EH_LABEL # %bb.143: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit311 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_151 # %bb.144: .Ltmp1050: # EH_LABEL @@ -9346,17 +9372,20 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1051: # EH_LABEL # %bb.145: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit314 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_151 # %bb.146: .Ltmp1053: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI15_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI15_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp1054: # EH_LABEL # %bb.147: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit317 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_151 # %bb.148: .Ltmp1055: # EH_LABEL @@ -9367,7 +9396,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1056: # EH_LABEL # %bb.149: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit320 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_151 # %bb.150: .Ltmp1057: # EH_LABEL @@ -9378,20 +9407,20 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1058: # EH_LABEL .LBB15_151: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit323 - st.d $s2, $sp, 64 - st.h $s6, $sp, 80 - st.b $s7, $sp, 82 + st.d $s2, $sp, 72 + st.h $s5, $sp, 88 + st.b $s6, $sp, 90 ori $a0, $zero, 3 - st.d $a0, $sp, 72 - st.b $zero, $sp, 83 + st.d $a0, $sp, 80 + st.b $zero, $sp, 91 .Ltmp1060: # EH_LABEL - addi.d $a1, $sp, 64 + addi.d $a1, $sp, 72 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp1061: # EH_LABEL # %bb.152: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit329 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_156 # %bb.153: fsub.d $fa0, $fa0, $fs1 @@ -9400,7 +9429,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1063: # EH_LABEL # %bb.154: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit332 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_156 # %bb.155: .Ltmp1064: # EH_LABEL @@ -9411,36 +9440,42 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1065: # EH_LABEL .LBB15_156: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit335 - st.d $s3, $sp, 32 - st.h $s6, $sp, 48 - st.b $s7, $sp, 50 + st.d $s3, $sp, 40 + st.h $s5, $sp, 56 + st.b $s6, $sp, 58 ori $a0, $zero, 3 - st.d $a0, $sp, 40 - st.b $zero, $sp, 51 + st.d $a0, $sp, 48 + st.b $zero, $sp, 59 .Ltmp1067: # EH_LABEL - addi.d $a1, $sp, 32 + addi.d $a1, $sp, 40 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp1068: # EH_LABEL # %bb.157: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit341 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_161 # %bb.158: - fld.d $fa1, $s5, %pc_lo12(.LCPI15_2) fsub.d $fa0, $fa0, $fs1 - fabs.d $fa2, $fs1 - fld.d $fa3, $s4, %pc_lo12(.LCPI15_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs1, $fcc0 + fabs.d $fa1, $fs1 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs1, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp1069: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp1070: # EH_LABEL # %bb.159: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit344 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB15_161 # %bb.160: .Ltmp1071: # EH_LABEL @@ -9451,26 +9486,26 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results jirl $ra, $ra, 0 .Ltmp1072: # EH_LABEL .LBB15_161: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit347 - ld.d $a0, $sp, 32 + ld.d $a0, $sp, 40 beq $a0, $s3, .LBB15_163 # %bb.162: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i348 - ld.d $a1, $sp, 48 + ld.d $a1, $sp, 56 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB15_163: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit350 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB15_165 # %bb.164: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i351 - ld.d $a1, $sp, 80 + ld.d $a1, $sp, 88 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB15_165: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit353 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB15_167 # %bb.166: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i354 - ld.d $a1, $sp, 112 + ld.d $a1, $sp, 120 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -9478,19 +9513,18 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results fcmp.clt.d $fcc0, $fs2, $fs0 bceqz $fcc0, .LBB15_173 # %bb.168: - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 144 beq $a0, $s0, .LBB15_170 # %bb.169: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i369 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 160 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB15_170: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit371 - fld.d $fs3, $sp, 168 # 8-byte Folded Reload - fld.d $fs2, $sp, 176 # 8-byte Folded Reload - fld.d $fs1, $sp, 184 # 8-byte Folded Reload - fld.d $fs0, $sp, 192 # 8-byte Folded Reload - ld.d $s8, $sp, 200 # 8-byte Folded Reload + fld.d $fs3, $sp, 176 # 8-byte Folded Reload + fld.d $fs2, $sp, 184 # 8-byte Folded Reload + fld.d $fs1, $sp, 192 # 8-byte Folded Reload + fld.d $fs0, $sp, 200 # 8-byte Folded Reload ld.d $s7, $sp, 208 # 8-byte Folded Reload ld.d $s6, $sp, 216 # 8-byte Folded Reload ld.d $s5, $sp, 224 # 8-byte Folded Reload @@ -9505,14 +9539,14 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results ret .LBB15_171: .Ltmp1080: # EH_LABEL - addi.d $a0, $sp, 128 + addi.d $a0, $sp, 136 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp1081: # EH_LABEL # %bb.172: .LBB15_173: .Ltmp1077: # EH_LABEL - addi.d $a0, $sp, 24 + addi.d $a0, $sp, 32 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp1078: # EH_LABEL @@ -9542,8 +9576,8 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results beqz $a0, .LBB15_90 # %bb.178: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s8, 0 + addi.d $s7, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s7, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -9552,25 +9586,25 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results .LBB15_179: .Ltmp1059: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB15_202 b .LBB15_210 .LBB15_180: .Ltmp970: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB15_214 b .LBB15_215 .LBB15_181: .Ltmp1052: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB15_202 b .LBB15_210 .LBB15_182: .Ltmp963: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB15_214 b .LBB15_215 .LBB15_183: @@ -9582,13 +9616,13 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results .LBB15_185: .Ltmp1047: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB15_202 b .LBB15_210 .LBB15_186: .Ltmp958: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB15_214 b .LBB15_215 .LBB15_187: @@ -9600,7 +9634,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results .LBB15_189: .Ltmp1040: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB15_202 b .LBB15_210 .LBB15_190: @@ -9614,7 +9648,7 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results .LBB15_193: move $fp, $a0 fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 24 + addi.d $a0, $sp, 32 bcnez $fcc0, .LBB15_203 b .LBB15_216 .LBB15_194: @@ -9632,24 +9666,24 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results b .LBB15_203 .LBB15_199: .Ltmp1073: # EH_LABEL - ld.d $a2, $sp, 32 + ld.d $a2, $sp, 40 move $fp, $a0 bne $a2, $s3, .LBB15_206 # %bb.200: - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 bne $a0, $s2, .LBB15_208 .LBB15_201: - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB15_210 .LBB15_202: fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 24 + addi.d $a0, $sp, 32 bceqz $fcc0, .LBB15_216 .LBB15_203: - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 144 beq $a0, $s0, .LBB15_205 # %bb.204: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i372 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 160 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -9658,60 +9692,60 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results pcaddu18i $ra, %call36(_Unwind_Resume) jirl $ra, $ra, 0 .LBB15_206: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i360 - ld.d $a0, $sp, 48 + ld.d $a0, $sp, 56 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB15_201 b .LBB15_208 .LBB15_207: .Ltmp1066: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB15_201 .LBB15_208: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i363 - ld.d $a1, $sp, 80 + ld.d $a1, $sp, 88 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB15_202 b .LBB15_210 .LBB15_209: .Ltmp1031: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB15_202 .LBB15_210: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i366 - ld.d $a1, $sp, 112 + ld.d $a1, $sp, 120 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 24 + addi.d $a0, $sp, 32 bcnez $fcc0, .LBB15_203 b .LBB15_216 .LBB15_211: .Ltmp984: # EH_LABEL - ld.d $a2, $sp, 32 + ld.d $a2, $sp, 40 move $fp, $a0 bne $a2, $s3, .LBB15_218 # %bb.212: - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 bne $a0, $s2, .LBB15_220 .LBB15_213: - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB15_215 .LBB15_214: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i214 - ld.d $a1, $sp, 112 + ld.d $a1, $sp, 120 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB15_215: - addi.d $a0, $sp, 128 - ld.d $a1, $sp, 16 + addi.d $a0, $sp, 136 + ld.d $a1, $sp, 24 movgr2cf $fcc0, $a1 bceqz $fcc0, .LBB15_203 .LBB15_216: # %.invoke @@ -9721,31 +9755,31 @@ _Z17CheckInvertedRateRK7Results: # @_Z17CheckInvertedRateRK7Results .Ltmp1075: # EH_LABEL # %bb.217: # %.cont .LBB15_218: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i208 - ld.d $a0, $sp, 48 + ld.d $a0, $sp, 56 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB15_213 b .LBB15_220 .LBB15_219: .Ltmp977: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB15_213 .LBB15_220: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i211 - ld.d $a1, $sp, 80 + ld.d $a1, $sp, 88 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB15_214 b .LBB15_215 .LBB15_221: .Ltmp951: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB15_214 b .LBB15_215 .LBB15_222: @@ -12212,20 +12246,8 @@ GCC_except_table20: .Lcst_end15: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z19CheckAvgThreadsRateRK7Results -.LCPI21_0: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI21_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI21_2: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI21_3: - .dword 0xbee4f8b588e368f1 # double -1.0000000000000001E-5 -.LCPI21_4: - .dword 0x4059000000000000 # double 100 .text - .hidden _Z19CheckAvgThreadsRateRK7Results + .hidden _Z19CheckAvgThreadsRateRK7Results # -- Begin function _Z19CheckAvgThreadsRateRK7Results .globl _Z19CheckAvgThreadsRateRK7Results .p2align 5 .type _Z19CheckAvgThreadsRateRK7Results,@function @@ -12327,11 +12349,14 @@ _Z19CheckAvgThreadsRateRK7Results: # @_Z19CheckAvgThreadsRateRK7Results fmul.d $fa1, $fs1, $fs2 fmul.d $fa0, $fs3, $fa0 frecip.d $fa1, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI21_0) - fld.d $fs4, $a0, %pc_lo12(.LCPI21_0) fsub.d $fa1, $fs0, $fa1 fabs.d $fs5, $fa1 frecip.d $fa0, $fa0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs4, $a0 fmul.d $fs6, $fa0, $fs4 fcmp.cule.d $fcc0, $fs6, $fs5 movcf2gr $a0, $fcc0 @@ -12693,8 +12718,11 @@ _Z19CheckAvgThreadsRateRK7Results: # @_Z19CheckAvgThreadsRateRK7Results beqz $a0, .LBB21_74 # %bb.69: .Ltmp1419: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI21_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI21_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp1420: # EH_LABEL @@ -12817,10 +12845,13 @@ _Z19CheckAvgThreadsRateRK7Results: # @_Z19CheckAvgThreadsRateRK7Results jirl $ra, $ra, 0 .Ltmp1446: # EH_LABEL # %bb.86: - pcalau12i $s5, %pc_hi20(.LCPI21_2) - fld.d $fs7, $s5, %pc_lo12(.LCPI21_2) fmul.d $fa0, $fs3, $fa0 frecip.d $fa0, $fa0 + lu12i.w $a0, -487882 + ori $s4, $a0, 2289 + lu32i.d $s4, 325813 + lu52i.d $s5, $s4, 1006 + movgr2fr.d $fs7, $s5 fcmp.clt.d $fcc0, $fs7, $fa0 bcnez $fcc0, .LBB21_90 # %bb.87: @@ -12838,10 +12869,10 @@ _Z19CheckAvgThreadsRateRK7Results: # @_Z19CheckAvgThreadsRateRK7Results jirl $ra, $ra, 0 .Ltmp1450: # EH_LABEL # %bb.89: - pcalau12i $a0, %pc_hi20(.LCPI21_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI21_3) fmul.d $fa0, $fs3, $fa0 frecip.d $fa0, $fa0 + lu52i.d $a0, $s4, -1042 + movgr2fr.d $fa1, $a0 fcmp.cule.d $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB21_93 .LBB21_90: @@ -12863,16 +12894,18 @@ _Z19CheckAvgThreadsRateRK7Results: # @_Z19CheckAvgThreadsRateRK7Results frecip.d $fs7, $fa0 .LBB21_93: ld.d $a0, $s6, 0 - pcalau12i $s4, %pc_hi20(.LCPI21_4) beqz $a0, .LBB21_97 # %bb.94: fmul.d $fa0, $fs1, $fs2 vldi $vr1, -784 - fld.d $fa2, $s4, %pc_lo12(.LCPI21_4) fdiv.d $fa0, $fa1, $fa0 fadd.d $fa0, $fs0, $fa0 fdiv.d $fa0, $fa0, $fs7 - fmul.d $fa0, $fa0, $fa2 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp1455: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -13338,8 +13371,11 @@ _Z19CheckAvgThreadsRateRK7Results: # @_Z19CheckAvgThreadsRateRK7Results beqz $a0, .LBB21_180 # %bb.175: .Ltmp1544: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI21_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI21_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp1545: # EH_LABEL @@ -13460,10 +13496,10 @@ _Z19CheckAvgThreadsRateRK7Results: # @_Z19CheckAvgThreadsRateRK7Results jirl $ra, $ra, 0 .Ltmp1571: # EH_LABEL # %bb.192: - fld.d $fs4, $s5, %pc_lo12(.LCPI21_2) fmul.d $fa0, $fs3, $fa0 vldi $vr1, -1024 fdiv.d $fa0, $fa1, $fa0 + movgr2fr.d $fs4, $s5 fcmp.clt.d $fcc0, $fs4, $fa0 bcnez $fcc0, .LBB21_196 # %bb.193: @@ -13481,11 +13517,11 @@ _Z19CheckAvgThreadsRateRK7Results: # @_Z19CheckAvgThreadsRateRK7Results jirl $ra, $ra, 0 .Ltmp1575: # EH_LABEL # %bb.195: - pcalau12i $a0, %pc_hi20(.LCPI21_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI21_3) fmul.d $fa0, $fs3, $fa0 - vldi $vr2, -1024 - fdiv.d $fa0, $fa2, $fa0 + vldi $vr1, -1024 + fdiv.d $fa0, $fa1, $fa0 + lu52i.d $a0, $s4, -1042 + movgr2fr.d $fa1, $a0 fcmp.cule.d $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB21_199 .LBB21_196: @@ -13512,11 +13548,14 @@ _Z19CheckAvgThreadsRateRK7Results: # @_Z19CheckAvgThreadsRateRK7Results # %bb.200: fmul.d $fa0, $fs1, $fs2 vldi $vr1, -896 - fld.d $fa2, $s4, %pc_lo12(.LCPI21_4) fdiv.d $fa0, $fa1, $fa0 fadd.d $fa0, $fs0, $fa0 fdiv.d $fa0, $fa0, $fs4 - fmul.d $fa0, $fa0, $fa2 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp1580: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -14164,18 +14203,8 @@ GCC_except_table22: .Lcst_end17: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z23CheckIterationInvariantRK7Results -.LCPI23_0: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI23_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI23_2: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI23_3: - .dword 0x4059000000000000 # double 100 .text - .hidden _Z23CheckIterationInvariantRK7Results + .hidden _Z23CheckIterationInvariantRK7Results # -- Begin function _Z23CheckIterationInvariantRK7Results .globl _Z23CheckIterationInvariantRK7Results .p2align 5 .type _Z23CheckIterationInvariantRK7Results,@function @@ -14185,70 +14214,71 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception18 # %bb.0: # %._crit_edge.i.i - addi.d $sp, $sp, -304 - .cfi_def_cfa_offset 304 - st.d $ra, $sp, 296 # 8-byte Folded Spill - st.d $fp, $sp, 288 # 8-byte Folded Spill - st.d $s0, $sp, 280 # 8-byte Folded Spill - st.d $s1, $sp, 272 # 8-byte Folded Spill - st.d $s2, $sp, 264 # 8-byte Folded Spill - st.d $s3, $sp, 256 # 8-byte Folded Spill - st.d $s4, $sp, 248 # 8-byte Folded Spill - st.d $s5, $sp, 240 # 8-byte Folded Spill - st.d $s6, $sp, 232 # 8-byte Folded Spill - st.d $s7, $sp, 224 # 8-byte Folded Spill - st.d $s8, $sp, 216 # 8-byte Folded Spill - fst.d $fs0, $sp, 208 # 8-byte Folded Spill - fst.d $fs1, $sp, 200 # 8-byte Folded Spill - fst.d $fs2, $sp, 192 # 8-byte Folded Spill - fst.d $fs3, $sp, 184 # 8-byte Folded Spill - fst.d $fs4, $sp, 176 # 8-byte Folded Spill - .cfi_offset 1, -8 - .cfi_offset 22, -16 - .cfi_offset 23, -24 - .cfi_offset 24, -32 - .cfi_offset 25, -40 - .cfi_offset 26, -48 - .cfi_offset 27, -56 - .cfi_offset 28, -64 - .cfi_offset 29, -72 - .cfi_offset 30, -80 - .cfi_offset 31, -88 - .cfi_offset 56, -96 - .cfi_offset 57, -104 - .cfi_offset 58, -112 - .cfi_offset 59, -120 - .cfi_offset 60, -128 + addi.d $sp, $sp, -288 + .cfi_def_cfa_offset 288 + st.d $ra, $sp, 280 # 8-byte Folded Spill + st.d $fp, $sp, 272 # 8-byte Folded Spill + st.d $s0, $sp, 264 # 8-byte Folded Spill + st.d $s1, $sp, 256 # 8-byte Folded Spill + st.d $s2, $sp, 248 # 8-byte Folded Spill + st.d $s3, $sp, 240 # 8-byte Folded Spill + st.d $s4, $sp, 232 # 8-byte Folded Spill + st.d $s5, $sp, 224 # 8-byte Folded Spill + st.d $s6, $sp, 216 # 8-byte Folded Spill + st.d $s7, $sp, 208 # 8-byte Folded Spill + fst.d $fs0, $sp, 200 # 8-byte Folded Spill + fst.d $fs1, $sp, 192 # 8-byte Folded Spill + fst.d $fs2, $sp, 184 # 8-byte Folded Spill + fst.d $fs3, $sp, 176 # 8-byte Folded Spill + fst.d $fs4, $sp, 168 # 8-byte Folded Spill + .cfi_offset 1, -8 + .cfi_offset 22, -16 + .cfi_offset 23, -24 + .cfi_offset 24, -32 + .cfi_offset 25, -40 + .cfi_offset 26, -48 + .cfi_offset 27, -56 + .cfi_offset 28, -64 + .cfi_offset 29, -72 + .cfi_offset 30, -80 + .cfi_offset 56, -88 + .cfi_offset 57, -96 + .cfi_offset 58, -104 + .cfi_offset 59, -112 + .cfi_offset 60, -120 move $fp, $a0 pcaddu18i $ra, %call36(_ZNK7Results13NumIterationsEv) jirl $ra, $ra, 0 fmov.d $fs0, $fa0 - addi.d $s0, $sp, 160 - st.d $s0, $sp, 144 + addi.d $s0, $sp, 152 + st.d $s0, $sp, 136 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $a0, $a0, %pc_lo12(.L.str.5) ld.h $s4, $a0, 0 ld.b $s5, $a0, 2 - st.h $s4, $sp, 160 - st.b $s5, $sp, 162 + st.h $s4, $sp, 152 + st.b $s5, $sp, 154 ori $a0, $zero, 3 - st.d $a0, $sp, 152 - st.b $zero, $sp, 163 + st.d $a0, $sp, 144 + st.b $zero, $sp, 155 .Ltmp1600: # EH_LABEL - addi.d $a1, $sp, 144 + addi.d $a1, $sp, 136 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp1601: # EH_LABEL # %bb.1: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit - pcalau12i $a0, %pc_hi20(.LCPI23_0) - fld.d $fs4, $a0, %pc_lo12(.LCPI23_0) fsub.d $fa0, $fa0, $fs0 fabs.d $fs2, $fa0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs4, $a0 fmul.d $fs1, $fs0, $fs4 fcmp.cule.d $fcc0, $fs1, $fs2 movcf2gr $a0, $fcc0 - st.d $a0, $sp, 24 + st.d $a0, $sp, 16 bceqz $fcc0, .LBB23_4 # %bb.2: .Ltmp1603: # EH_LABEL @@ -14258,13 +14288,13 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.30) pcalau12i $a0, %pc_hi20(.L__func__._Z23CheckIterationInvariantRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z23CheckIterationInvariantRK7Results) - addi.d $a0, $sp, 136 + addi.d $a0, $sp, 128 ori $a4, $zero, 411 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp1604: # EH_LABEL # %bb.3: - ld.d $s6, $sp, 136 + ld.d $s6, $sp, 128 b .LBB23_5 .LBB23_4: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) @@ -14445,15 +14475,15 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1638: # EH_LABEL .LBB23_37: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit134 - addi.d $s1, $sp, 120 - st.d $s1, $sp, 104 - st.h $s4, $sp, 120 - st.b $s5, $sp, 122 + addi.d $s1, $sp, 112 + st.d $s1, $sp, 96 + st.h $s4, $sp, 112 + st.b $s5, $sp, 114 ori $a0, $zero, 3 - st.d $a0, $sp, 112 - st.b $zero, $sp, 123 + st.d $a0, $sp, 104 + st.b $zero, $sp, 115 .Ltmp1640: # EH_LABEL - addi.d $a1, $sp, 104 + addi.d $a1, $sp, 96 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -14575,8 +14605,11 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results beqz $a0, .LBB23_66 # %bb.61: .Ltmp1667: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI23_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI23_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp1668: # EH_LABEL @@ -14603,15 +14636,15 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1672: # EH_LABEL .LBB23_66: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit - addi.d $s2, $sp, 88 - st.d $s2, $sp, 72 - st.h $s4, $sp, 88 - st.b $s5, $sp, 90 + addi.d $s2, $sp, 80 + st.d $s2, $sp, 64 + st.h $s4, $sp, 80 + st.b $s5, $sp, 82 ori $a0, $zero, 3 - st.d $a0, $sp, 80 - st.b $zero, $sp, 91 + st.d $a0, $sp, 72 + st.b $zero, $sp, 83 .Ltmp1674: # EH_LABEL - addi.d $a1, $sp, 72 + addi.d $a1, $sp, 64 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -14637,33 +14670,38 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1679: # EH_LABEL .LBB23_71: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit189 - addi.d $s3, $sp, 56 - st.d $s3, $sp, 40 - st.h $s4, $sp, 56 - st.b $s5, $sp, 58 + addi.d $s3, $sp, 48 + st.d $s3, $sp, 32 + st.h $s4, $sp, 48 + st.b $s5, $sp, 50 ori $a0, $zero, 3 - st.d $a0, $sp, 48 - st.b $zero, $sp, 59 + st.d $a0, $sp, 40 + st.b $zero, $sp, 51 .Ltmp1681: # EH_LABEL - addi.d $a1, $sp, 40 + addi.d $a1, $sp, 32 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp1682: # EH_LABEL # %bb.72: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit195 ld.d $a0, $s6, 0 - pcalau12i $s5, %pc_hi20(.LCPI23_2) - pcalau12i $s4, %pc_hi20(.LCPI23_3) + lu12i.w $s4, -487882 beqz $a0, .LBB23_76 # %bb.73: - fld.d $fa1, $s5, %pc_lo12(.LCPI23_2) fsub.d $fa0, $fa0, $fs0 - fabs.d $fa2, $fs0 - fld.d $fa3, $s4, %pc_lo12(.LCPI23_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs0, $fcc0 + fabs.d $fa1, $fs0 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs0, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp1683: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -14680,26 +14718,26 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1686: # EH_LABEL .LBB23_76: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit201 - ld.d $a0, $sp, 40 + ld.d $a0, $sp, 32 beq $a0, $s3, .LBB23_78 # %bb.77: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i - ld.d $a1, $sp, 56 + ld.d $a1, $sp, 48 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB23_78: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB23_80 # %bb.79: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i202 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB23_80: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit204 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB23_82 # %bb.81: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i205 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -14707,26 +14745,26 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results fcmp.clt.d $fcc0, $fs2, $fs1 bceqz $fcc0, .LBB23_171 # %bb.83: - ld.d $a0, $sp, 144 + ld.d $a0, $sp, 136 beq $a0, $s0, .LBB23_85 # %bb.84: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i217 - ld.d $a1, $sp, 160 + ld.d $a1, $sp, 152 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB23_85: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit219 - st.d $s0, $sp, 144 + st.d $s0, $sp, 136 pcalau12i $a0, %pc_hi20(.L.str.6) addi.d $a0, $a0, %pc_lo12(.L.str.6) - ld.h $s6, $a0, 0 - ld.b $s7, $a0, 2 - st.h $s6, $sp, 160 - st.b $s7, $sp, 162 + ld.h $s5, $a0, 0 + ld.b $s6, $a0, 2 + st.h $s5, $sp, 152 + st.b $s6, $sp, 154 ori $a0, $zero, 3 - st.d $a0, $sp, 152 - st.b $zero, $sp, 163 + st.d $a0, $sp, 144 + st.b $zero, $sp, 155 .Ltmp1688: # EH_LABEL - addi.d $a1, $sp, 144 + addi.d $a1, $sp, 136 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -14747,23 +14785,23 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.30) pcalau12i $a0, %pc_hi20(.L__func__._Z23CheckIterationInvariantRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z23CheckIterationInvariantRK7Results) - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 ori $a4, $zero, 412 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp1692: # EH_LABEL # %bb.88: - ld.d $s8, $sp, 32 + ld.d $s7, $sp, 24 b .LBB23_90 .LBB23_89: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s7, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB23_177 .LBB23_90: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit227 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.91: .Ltmp1693: # EH_LABEL @@ -14774,7 +14812,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1694: # EH_LABEL # %bb.92: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit230 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.93: .Ltmp1695: # EH_LABEL @@ -14785,7 +14823,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1696: # EH_LABEL # %bb.94: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit233 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.95: .Ltmp1697: # EH_LABEL @@ -14796,7 +14834,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1698: # EH_LABEL # %bb.96: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit236 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.97: .Ltmp1700: # EH_LABEL @@ -14805,7 +14843,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1701: # EH_LABEL # %bb.98: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit239 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.99: .Ltmp1702: # EH_LABEL @@ -14816,7 +14854,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1703: # EH_LABEL # %bb.100: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit242 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.101: ld.d $a1, $fp, 0 @@ -14826,7 +14864,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1705: # EH_LABEL # %bb.102: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit245 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.103: .Ltmp1706: # EH_LABEL @@ -14837,7 +14875,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1707: # EH_LABEL # %bb.104: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit248 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.105: .Ltmp1708: # EH_LABEL @@ -14848,7 +14886,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1709: # EH_LABEL # %bb.106: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit251 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.107: .Ltmp1710: # EH_LABEL @@ -14859,7 +14897,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1711: # EH_LABEL # %bb.108: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit254 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.109: .Ltmp1713: # EH_LABEL @@ -14868,7 +14906,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1714: # EH_LABEL # %bb.110: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit257 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.111: .Ltmp1715: # EH_LABEL @@ -14879,7 +14917,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1716: # EH_LABEL # %bb.112: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit260 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.113: .Ltmp1717: # EH_LABEL @@ -14890,7 +14928,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1718: # EH_LABEL # %bb.114: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit263 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.115: .Ltmp1719: # EH_LABEL @@ -14901,7 +14939,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1720: # EH_LABEL # %bb.116: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit266 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.117: .Ltmp1721: # EH_LABEL @@ -14912,7 +14950,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1722: # EH_LABEL # %bb.118: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit269 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.119: .Ltmp1723: # EH_LABEL @@ -14923,7 +14961,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1724: # EH_LABEL # %bb.120: # %_ZN9benchmark8internallsIA4_cEERNS0_7LogTypeES4_RKT_.exit272 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_122 # %bb.121: .Ltmp1725: # EH_LABEL @@ -14934,20 +14972,20 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1726: # EH_LABEL .LBB23_122: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit275 - st.d $s1, $sp, 104 - st.h $s6, $sp, 120 - st.b $s7, $sp, 122 + st.d $s1, $sp, 96 + st.h $s5, $sp, 112 + st.b $s6, $sp, 114 ori $a0, $zero, 3 - st.d $a0, $sp, 112 - st.b $zero, $sp, 123 + st.d $a0, $sp, 104 + st.b $zero, $sp, 115 .Ltmp1728: # EH_LABEL - addi.d $a1, $sp, 104 + addi.d $a1, $sp, 96 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp1729: # EH_LABEL # %bb.123: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit281 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_151 # %bb.124: .Ltmp1730: # EH_LABEL @@ -14955,7 +14993,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1731: # EH_LABEL # %bb.125: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit284 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_151 # %bb.126: .Ltmp1732: # EH_LABEL @@ -14966,7 +15004,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1733: # EH_LABEL # %bb.127: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit287 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_151 # %bb.128: .Ltmp1735: # EH_LABEL @@ -14975,7 +15013,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1736: # EH_LABEL # %bb.129: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit290 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_151 # %bb.130: .Ltmp1737: # EH_LABEL @@ -14986,7 +15024,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1738: # EH_LABEL # %bb.131: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit293 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_151 # %bb.132: .Ltmp1739: # EH_LABEL @@ -14997,7 +15035,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1740: # EH_LABEL # %bb.133: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit296 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_151 # %bb.134: .Ltmp1741: # EH_LABEL @@ -15008,7 +15046,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1742: # EH_LABEL # %bb.135: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit299 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_151 # %bb.136: .Ltmp1744: # EH_LABEL @@ -15017,7 +15055,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1745: # EH_LABEL # %bb.137: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit302 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_151 # %bb.138: .Ltmp1746: # EH_LABEL @@ -15028,7 +15066,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1747: # EH_LABEL # %bb.139: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit305 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_151 # %bb.140: .Ltmp1748: # EH_LABEL @@ -15039,7 +15077,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1749: # EH_LABEL # %bb.141: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit308 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_151 # %bb.142: .Ltmp1751: # EH_LABEL @@ -15048,7 +15086,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1752: # EH_LABEL # %bb.143: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit311 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_151 # %bb.144: .Ltmp1753: # EH_LABEL @@ -15059,17 +15097,20 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1754: # EH_LABEL # %bb.145: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit314 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_151 # %bb.146: .Ltmp1756: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI23_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI23_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp1757: # EH_LABEL # %bb.147: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit317 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_151 # %bb.148: .Ltmp1758: # EH_LABEL @@ -15080,7 +15121,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1759: # EH_LABEL # %bb.149: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit320 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_151 # %bb.150: .Ltmp1760: # EH_LABEL @@ -15091,20 +15132,20 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1761: # EH_LABEL .LBB23_151: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit323 - st.d $s2, $sp, 72 - st.h $s6, $sp, 88 - st.b $s7, $sp, 90 + st.d $s2, $sp, 64 + st.h $s5, $sp, 80 + st.b $s6, $sp, 82 ori $a0, $zero, 3 - st.d $a0, $sp, 80 - st.b $zero, $sp, 91 + st.d $a0, $sp, 72 + st.b $zero, $sp, 83 .Ltmp1763: # EH_LABEL - addi.d $a1, $sp, 72 + addi.d $a1, $sp, 64 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp1764: # EH_LABEL # %bb.152: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit329 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_156 # %bb.153: vldi $vr1, -896 @@ -15114,7 +15155,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1766: # EH_LABEL # %bb.154: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit332 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_156 # %bb.155: .Ltmp1767: # EH_LABEL @@ -15125,37 +15166,43 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1768: # EH_LABEL .LBB23_156: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit335 - st.d $s3, $sp, 40 - st.h $s6, $sp, 56 - st.b $s7, $sp, 58 + st.d $s3, $sp, 32 + st.h $s5, $sp, 48 + st.b $s6, $sp, 50 ori $a0, $zero, 3 - st.d $a0, $sp, 48 - st.b $zero, $sp, 59 + st.d $a0, $sp, 40 + st.b $zero, $sp, 51 .Ltmp1770: # EH_LABEL - addi.d $a1, $sp, 40 + addi.d $a1, $sp, 32 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp1771: # EH_LABEL # %bb.157: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit341 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_161 # %bb.158: vldi $vr1, -896 - fld.d $fa2, $s5, %pc_lo12(.LCPI23_2) fmadd.d $fa0, $fs0, $fa1, $fa0 fabs.d $fa1, $fs2 - fld.d $fa3, $s4, %pc_lo12(.LCPI23_3) + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 fcmp.clt.d $fcc0, $fa2, $fa1 fsel $fa1, $fa2, $fs2, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp1772: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp1773: # EH_LABEL # %bb.159: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit344 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB23_161 # %bb.160: .Ltmp1774: # EH_LABEL @@ -15166,26 +15213,26 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results jirl $ra, $ra, 0 .Ltmp1775: # EH_LABEL .LBB23_161: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit347 - ld.d $a0, $sp, 40 + ld.d $a0, $sp, 32 beq $a0, $s3, .LBB23_163 # %bb.162: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i348 - ld.d $a1, $sp, 56 + ld.d $a1, $sp, 48 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB23_163: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit350 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB23_165 # %bb.164: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i351 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB23_165: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit353 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB23_167 # %bb.166: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i354 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -15193,42 +15240,41 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results fcmp.clt.d $fcc0, $fs3, $fs1 bceqz $fcc0, .LBB23_173 # %bb.168: - ld.d $a0, $sp, 144 + ld.d $a0, $sp, 136 beq $a0, $s0, .LBB23_170 # %bb.169: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i369 - ld.d $a1, $sp, 160 + ld.d $a1, $sp, 152 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB23_170: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit371 - fld.d $fs4, $sp, 176 # 8-byte Folded Reload - fld.d $fs3, $sp, 184 # 8-byte Folded Reload - fld.d $fs2, $sp, 192 # 8-byte Folded Reload - fld.d $fs1, $sp, 200 # 8-byte Folded Reload - fld.d $fs0, $sp, 208 # 8-byte Folded Reload - ld.d $s8, $sp, 216 # 8-byte Folded Reload - ld.d $s7, $sp, 224 # 8-byte Folded Reload - ld.d $s6, $sp, 232 # 8-byte Folded Reload - ld.d $s5, $sp, 240 # 8-byte Folded Reload - ld.d $s4, $sp, 248 # 8-byte Folded Reload - ld.d $s3, $sp, 256 # 8-byte Folded Reload - ld.d $s2, $sp, 264 # 8-byte Folded Reload - ld.d $s1, $sp, 272 # 8-byte Folded Reload - ld.d $s0, $sp, 280 # 8-byte Folded Reload - ld.d $fp, $sp, 288 # 8-byte Folded Reload - ld.d $ra, $sp, 296 # 8-byte Folded Reload - addi.d $sp, $sp, 304 + fld.d $fs4, $sp, 168 # 8-byte Folded Reload + fld.d $fs3, $sp, 176 # 8-byte Folded Reload + fld.d $fs2, $sp, 184 # 8-byte Folded Reload + fld.d $fs1, $sp, 192 # 8-byte Folded Reload + fld.d $fs0, $sp, 200 # 8-byte Folded Reload + ld.d $s7, $sp, 208 # 8-byte Folded Reload + ld.d $s6, $sp, 216 # 8-byte Folded Reload + ld.d $s5, $sp, 224 # 8-byte Folded Reload + ld.d $s4, $sp, 232 # 8-byte Folded Reload + ld.d $s3, $sp, 240 # 8-byte Folded Reload + ld.d $s2, $sp, 248 # 8-byte Folded Reload + ld.d $s1, $sp, 256 # 8-byte Folded Reload + ld.d $s0, $sp, 264 # 8-byte Folded Reload + ld.d $fp, $sp, 272 # 8-byte Folded Reload + ld.d $ra, $sp, 280 # 8-byte Folded Reload + addi.d $sp, $sp, 288 ret .LBB23_171: .Ltmp1783: # EH_LABEL - addi.d $a0, $sp, 136 + addi.d $a0, $sp, 128 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp1784: # EH_LABEL # %bb.172: .LBB23_173: .Ltmp1780: # EH_LABEL - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp1781: # EH_LABEL @@ -15258,8 +15304,8 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results beqz $a0, .LBB23_90 # %bb.178: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s8, 0 + addi.d $s7, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s7, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -15268,25 +15314,25 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results .LBB23_179: .Ltmp1762: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB23_202 b .LBB23_210 .LBB23_180: .Ltmp1673: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB23_214 b .LBB23_215 .LBB23_181: .Ltmp1755: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB23_202 b .LBB23_210 .LBB23_182: .Ltmp1666: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB23_214 b .LBB23_215 .LBB23_183: @@ -15298,13 +15344,13 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results .LBB23_185: .Ltmp1750: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB23_202 b .LBB23_210 .LBB23_186: .Ltmp1661: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB23_214 b .LBB23_215 .LBB23_187: @@ -15316,7 +15362,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results .LBB23_189: .Ltmp1743: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB23_202 b .LBB23_210 .LBB23_190: @@ -15330,7 +15376,7 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results .LBB23_193: move $fp, $a0 fcmp.clt.d $fcc0, $fs3, $fs1 - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 bcnez $fcc0, .LBB23_203 b .LBB23_216 .LBB23_194: @@ -15348,24 +15394,24 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results b .LBB23_203 .LBB23_199: .Ltmp1776: # EH_LABEL - ld.d $a2, $sp, 40 + ld.d $a2, $sp, 32 move $fp, $a0 bne $a2, $s3, .LBB23_206 # %bb.200: - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 bne $a0, $s2, .LBB23_208 .LBB23_201: - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB23_210 .LBB23_202: fcmp.clt.d $fcc0, $fs3, $fs1 - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 bceqz $fcc0, .LBB23_216 .LBB23_203: - ld.d $a0, $sp, 144 + ld.d $a0, $sp, 136 beq $a0, $s0, .LBB23_205 # %bb.204: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i372 - ld.d $a1, $sp, 160 + ld.d $a1, $sp, 152 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -15374,60 +15420,60 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results pcaddu18i $ra, %call36(_Unwind_Resume) jirl $ra, $ra, 0 .LBB23_206: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i360 - ld.d $a0, $sp, 56 + ld.d $a0, $sp, 48 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB23_201 b .LBB23_208 .LBB23_207: .Ltmp1769: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB23_201 .LBB23_208: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i363 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB23_202 b .LBB23_210 .LBB23_209: .Ltmp1734: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB23_202 .LBB23_210: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i366 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 fcmp.clt.d $fcc0, $fs3, $fs1 - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 bcnez $fcc0, .LBB23_203 b .LBB23_216 .LBB23_211: .Ltmp1687: # EH_LABEL - ld.d $a2, $sp, 40 + ld.d $a2, $sp, 32 move $fp, $a0 bne $a2, $s3, .LBB23_218 # %bb.212: - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 bne $a0, $s2, .LBB23_220 .LBB23_213: - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB23_215 .LBB23_214: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i214 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB23_215: - addi.d $a0, $sp, 136 - ld.d $a1, $sp, 24 + addi.d $a0, $sp, 128 + ld.d $a1, $sp, 16 movgr2cf $fcc0, $a1 bceqz $fcc0, .LBB23_203 .LBB23_216: # %.invoke @@ -15437,31 +15483,31 @@ _Z23CheckIterationInvariantRK7Results: # @_Z23CheckIterationInvariantRK7Results .Ltmp1778: # EH_LABEL # %bb.217: # %.cont .LBB23_218: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i208 - ld.d $a0, $sp, 56 + ld.d $a0, $sp, 48 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB23_213 b .LBB23_220 .LBB23_219: .Ltmp1680: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB23_213 .LBB23_220: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i211 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB23_214 b .LBB23_215 .LBB23_221: .Ltmp1654: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB23_214 b .LBB23_215 .LBB23_222: @@ -15809,18 +15855,8 @@ GCC_except_table24: .Lcst_end19: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29CheckIsIterationInvariantRateRK7Results -.LCPI25_0: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI25_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI25_2: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI25_3: - .dword 0x4059000000000000 # double 100 .text - .hidden _Z29CheckIsIterationInvariantRateRK7Results + .hidden _Z29CheckIsIterationInvariantRateRK7Results # -- Begin function _Z29CheckIsIterationInvariantRateRK7Results .globl _Z29CheckIsIterationInvariantRateRK7Results .p2align 5 .type _Z29CheckIsIterationInvariantRateRK7Results,@function @@ -15842,13 +15878,12 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat st.d $s5, $sp, 240 # 8-byte Folded Spill st.d $s6, $sp, 232 # 8-byte Folded Spill st.d $s7, $sp, 224 # 8-byte Folded Spill - st.d $s8, $sp, 216 # 8-byte Folded Spill - fst.d $fs0, $sp, 208 # 8-byte Folded Spill - fst.d $fs1, $sp, 200 # 8-byte Folded Spill - fst.d $fs2, $sp, 192 # 8-byte Folded Spill - fst.d $fs3, $sp, 184 # 8-byte Folded Spill - fst.d $fs4, $sp, 176 # 8-byte Folded Spill - fst.d $fs5, $sp, 168 # 8-byte Folded Spill + fst.d $fs0, $sp, 216 # 8-byte Folded Spill + fst.d $fs1, $sp, 208 # 8-byte Folded Spill + fst.d $fs2, $sp, 200 # 8-byte Folded Spill + fst.d $fs3, $sp, 192 # 8-byte Folded Spill + fst.d $fs4, $sp, 184 # 8-byte Folded Spill + fst.d $fs5, $sp, 176 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -15859,13 +15894,12 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat .cfi_offset 28, -64 .cfi_offset 29, -72 .cfi_offset 30, -80 - .cfi_offset 31, -88 - .cfi_offset 56, -96 - .cfi_offset 57, -104 - .cfi_offset 58, -112 - .cfi_offset 59, -120 - .cfi_offset 60, -128 - .cfi_offset 61, -136 + .cfi_offset 56, -88 + .cfi_offset 57, -96 + .cfi_offset 58, -104 + .cfi_offset 59, -112 + .cfi_offset 60, -120 + .cfi_offset 61, -128 move $fp, $a0 pcaddu18i $ra, %call36(_ZNK7Results13NumIterationsEv) jirl $ra, $ra, 0 @@ -15879,34 +15913,37 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat pcaddu18i $ra, %call36(_ZNK7Results7GetTimeENS_13BenchmarkTimeE) jirl $ra, $ra, 0 fmov.d $fs2, $fa0 - addi.d $s0, $sp, 152 - st.d $s0, $sp, 136 + addi.d $s0, $sp, 160 + st.d $s0, $sp, 144 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $a0, $a0, %pc_lo12(.L.str.5) ld.h $s4, $a0, 0 ld.b $s5, $a0, 2 - st.h $s4, $sp, 152 - st.b $s5, $sp, 154 + st.h $s4, $sp, 160 + st.b $s5, $sp, 162 ori $a0, $zero, 3 - st.d $a0, $sp, 144 - st.b $zero, $sp, 155 + st.d $a0, $sp, 152 + st.b $zero, $sp, 163 .Ltmp1792: # EH_LABEL - addi.d $a1, $sp, 136 + addi.d $a1, $sp, 144 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp1793: # EH_LABEL # %bb.1: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit fmul.d $fs3, $fs1, $fs2 - pcalau12i $a0, %pc_hi20(.LCPI25_0) - fld.d $fs4, $a0, %pc_lo12(.LCPI25_0) fdiv.d $fs2, $fs0, $fs3 fsub.d $fa0, $fa0, $fs2 fabs.d $fs5, $fa0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs4, $a0 fmul.d $fs1, $fs2, $fs4 fcmp.cule.d $fcc0, $fs1, $fs5 movcf2gr $a0, $fcc0 - st.d $a0, $sp, 16 + st.d $a0, $sp, 24 bceqz $fcc0, .LBB25_4 # %bb.2: .Ltmp1795: # EH_LABEL @@ -15916,13 +15953,13 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat addi.d $a2, $a0, %pc_lo12(.L.str.30) pcalau12i $a0, %pc_hi20(.L__func__._Z29CheckIsIterationInvariantRateRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z29CheckIsIterationInvariantRateRK7Results) - addi.d $a0, $sp, 128 + addi.d $a0, $sp, 136 ori $a4, $zero, 461 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp1796: # EH_LABEL # %bb.3: - ld.d $s6, $sp, 128 + ld.d $s6, $sp, 136 b .LBB25_5 .LBB25_4: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) @@ -16103,15 +16140,15 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1830: # EH_LABEL .LBB25_37: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit173 - addi.d $s1, $sp, 112 - st.d $s1, $sp, 96 - st.h $s4, $sp, 112 - st.b $s5, $sp, 114 + addi.d $s1, $sp, 120 + st.d $s1, $sp, 104 + st.h $s4, $sp, 120 + st.b $s5, $sp, 122 ori $a0, $zero, 3 - st.d $a0, $sp, 104 - st.b $zero, $sp, 115 + st.d $a0, $sp, 112 + st.b $zero, $sp, 123 .Ltmp1832: # EH_LABEL - addi.d $a1, $sp, 96 + addi.d $a1, $sp, 104 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -16233,8 +16270,11 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat beqz $a0, .LBB25_66 # %bb.61: .Ltmp1860: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI25_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI25_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp1861: # EH_LABEL @@ -16261,15 +16301,15 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1865: # EH_LABEL .LBB25_66: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit - addi.d $s2, $sp, 80 - st.d $s2, $sp, 64 - st.h $s4, $sp, 80 - st.b $s5, $sp, 82 + addi.d $s2, $sp, 88 + st.d $s2, $sp, 72 + st.h $s4, $sp, 88 + st.b $s5, $sp, 90 ori $a0, $zero, 3 - st.d $a0, $sp, 72 - st.b $zero, $sp, 83 + st.d $a0, $sp, 80 + st.b $zero, $sp, 91 .Ltmp1867: # EH_LABEL - addi.d $a1, $sp, 64 + addi.d $a1, $sp, 72 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -16295,33 +16335,38 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1872: # EH_LABEL .LBB25_71: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit228 - addi.d $s3, $sp, 48 - st.d $s3, $sp, 32 - st.h $s4, $sp, 48 - st.b $s5, $sp, 50 + addi.d $s3, $sp, 56 + st.d $s3, $sp, 40 + st.h $s4, $sp, 56 + st.b $s5, $sp, 58 ori $a0, $zero, 3 - st.d $a0, $sp, 40 - st.b $zero, $sp, 51 + st.d $a0, $sp, 48 + st.b $zero, $sp, 59 .Ltmp1874: # EH_LABEL - addi.d $a1, $sp, 32 + addi.d $a1, $sp, 40 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp1875: # EH_LABEL # %bb.72: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit234 ld.d $a0, $s6, 0 - pcalau12i $s5, %pc_hi20(.LCPI25_2) - pcalau12i $s4, %pc_hi20(.LCPI25_3) + lu12i.w $s4, -487882 beqz $a0, .LBB25_76 # %bb.73: - fld.d $fa1, $s5, %pc_lo12(.LCPI25_2) fsub.d $fa0, $fa0, $fs2 - fabs.d $fa2, $fs2 - fld.d $fa3, $s4, %pc_lo12(.LCPI25_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs2, $fcc0 + fabs.d $fa1, $fs2 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs2, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp1876: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -16338,26 +16383,26 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1879: # EH_LABEL .LBB25_76: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit240 - ld.d $a0, $sp, 32 + ld.d $a0, $sp, 40 beq $a0, $s3, .LBB25_78 # %bb.77: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i - ld.d $a1, $sp, 48 + ld.d $a1, $sp, 56 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB25_78: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB25_80 # %bb.79: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i241 - ld.d $a1, $sp, 80 + ld.d $a1, $sp, 88 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB25_80: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit243 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB25_82 # %bb.81: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i244 - ld.d $a1, $sp, 112 + ld.d $a1, $sp, 120 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -16365,26 +16410,26 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat fcmp.clt.d $fcc0, $fs5, $fs1 bceqz $fcc0, .LBB25_171 # %bb.83: - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 144 beq $a0, $s0, .LBB25_85 # %bb.84: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i256 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 160 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB25_85: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit258 - st.d $s0, $sp, 136 + st.d $s0, $sp, 144 pcalau12i $a0, %pc_hi20(.L.str.6) addi.d $a0, $a0, %pc_lo12(.L.str.6) - ld.h $s6, $a0, 0 - ld.b $s7, $a0, 2 - st.h $s6, $sp, 152 - st.b $s7, $sp, 154 + ld.h $s5, $a0, 0 + ld.b $s6, $a0, 2 + st.h $s5, $sp, 160 + st.b $s6, $sp, 162 ori $a0, $zero, 3 - st.d $a0, $sp, 144 - st.b $zero, $sp, 155 + st.d $a0, $sp, 152 + st.b $zero, $sp, 163 .Ltmp1881: # EH_LABEL - addi.d $a1, $sp, 136 + addi.d $a1, $sp, 144 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -16405,23 +16450,23 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat addi.d $a2, $a0, %pc_lo12(.L.str.30) pcalau12i $a0, %pc_hi20(.L__func__._Z29CheckIsIterationInvariantRateRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z29CheckIsIterationInvariantRateRK7Results) - addi.d $a0, $sp, 24 + addi.d $a0, $sp, 32 ori $a4, $zero, 462 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp1885: # EH_LABEL # %bb.88: - ld.d $s8, $sp, 24 + ld.d $s7, $sp, 32 b .LBB25_90 .LBB25_89: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s7, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB25_177 .LBB25_90: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit266 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.91: .Ltmp1886: # EH_LABEL @@ -16432,7 +16477,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1887: # EH_LABEL # %bb.92: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit269 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.93: .Ltmp1888: # EH_LABEL @@ -16443,7 +16488,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1889: # EH_LABEL # %bb.94: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit272 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.95: .Ltmp1890: # EH_LABEL @@ -16454,7 +16499,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1891: # EH_LABEL # %bb.96: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit275 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.97: .Ltmp1893: # EH_LABEL @@ -16463,7 +16508,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1894: # EH_LABEL # %bb.98: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit278 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.99: .Ltmp1895: # EH_LABEL @@ -16474,7 +16519,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1896: # EH_LABEL # %bb.100: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit281 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.101: ld.d $a1, $fp, 0 @@ -16484,7 +16529,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1898: # EH_LABEL # %bb.102: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit284 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.103: .Ltmp1899: # EH_LABEL @@ -16495,7 +16540,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1900: # EH_LABEL # %bb.104: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit287 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.105: .Ltmp1901: # EH_LABEL @@ -16506,7 +16551,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1902: # EH_LABEL # %bb.106: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit290 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.107: .Ltmp1903: # EH_LABEL @@ -16517,7 +16562,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1904: # EH_LABEL # %bb.108: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit293 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.109: .Ltmp1906: # EH_LABEL @@ -16526,7 +16571,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1907: # EH_LABEL # %bb.110: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit296 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.111: .Ltmp1908: # EH_LABEL @@ -16537,7 +16582,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1909: # EH_LABEL # %bb.112: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit299 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.113: .Ltmp1910: # EH_LABEL @@ -16548,7 +16593,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1911: # EH_LABEL # %bb.114: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit302 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.115: .Ltmp1912: # EH_LABEL @@ -16559,7 +16604,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1913: # EH_LABEL # %bb.116: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit305 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.117: .Ltmp1914: # EH_LABEL @@ -16570,7 +16615,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1915: # EH_LABEL # %bb.118: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit308 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.119: .Ltmp1916: # EH_LABEL @@ -16581,7 +16626,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1917: # EH_LABEL # %bb.120: # %_ZN9benchmark8internallsIA4_cEERNS0_7LogTypeES4_RKT_.exit311 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_122 # %bb.121: .Ltmp1918: # EH_LABEL @@ -16592,20 +16637,20 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1919: # EH_LABEL .LBB25_122: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit314 - st.d $s1, $sp, 96 - st.h $s6, $sp, 112 - st.b $s7, $sp, 114 + st.d $s1, $sp, 104 + st.h $s5, $sp, 120 + st.b $s6, $sp, 122 ori $a0, $zero, 3 - st.d $a0, $sp, 104 - st.b $zero, $sp, 115 + st.d $a0, $sp, 112 + st.b $zero, $sp, 123 .Ltmp1921: # EH_LABEL - addi.d $a1, $sp, 96 + addi.d $a1, $sp, 104 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp1922: # EH_LABEL # %bb.123: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit320 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_151 # %bb.124: .Ltmp1923: # EH_LABEL @@ -16613,7 +16658,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1924: # EH_LABEL # %bb.125: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit323 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_151 # %bb.126: .Ltmp1925: # EH_LABEL @@ -16624,7 +16669,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1926: # EH_LABEL # %bb.127: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit326 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_151 # %bb.128: .Ltmp1928: # EH_LABEL @@ -16633,7 +16678,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1929: # EH_LABEL # %bb.129: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit329 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_151 # %bb.130: .Ltmp1930: # EH_LABEL @@ -16644,7 +16689,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1931: # EH_LABEL # %bb.131: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit332 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_151 # %bb.132: .Ltmp1932: # EH_LABEL @@ -16655,7 +16700,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1933: # EH_LABEL # %bb.133: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit335 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_151 # %bb.134: .Ltmp1934: # EH_LABEL @@ -16666,7 +16711,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1935: # EH_LABEL # %bb.135: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit338 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_151 # %bb.136: .Ltmp1937: # EH_LABEL @@ -16675,7 +16720,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1938: # EH_LABEL # %bb.137: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit341 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_151 # %bb.138: .Ltmp1939: # EH_LABEL @@ -16686,7 +16731,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1940: # EH_LABEL # %bb.139: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit344 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_151 # %bb.140: .Ltmp1941: # EH_LABEL @@ -16697,7 +16742,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1942: # EH_LABEL # %bb.141: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit347 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_151 # %bb.142: .Ltmp1944: # EH_LABEL @@ -16706,7 +16751,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1945: # EH_LABEL # %bb.143: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit350 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_151 # %bb.144: .Ltmp1946: # EH_LABEL @@ -16717,17 +16762,20 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1947: # EH_LABEL # %bb.145: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit353 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_151 # %bb.146: .Ltmp1949: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI25_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI25_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp1950: # EH_LABEL # %bb.147: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit356 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_151 # %bb.148: .Ltmp1951: # EH_LABEL @@ -16738,7 +16786,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1952: # EH_LABEL # %bb.149: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit359 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_151 # %bb.150: .Ltmp1953: # EH_LABEL @@ -16749,20 +16797,20 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1954: # EH_LABEL .LBB25_151: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit362 - st.d $s2, $sp, 64 - st.h $s6, $sp, 80 - st.b $s7, $sp, 82 + st.d $s2, $sp, 72 + st.h $s5, $sp, 88 + st.b $s6, $sp, 90 ori $a0, $zero, 3 - st.d $a0, $sp, 72 - st.b $zero, $sp, 83 + st.d $a0, $sp, 80 + st.b $zero, $sp, 91 .Ltmp1956: # EH_LABEL - addi.d $a1, $sp, 64 + addi.d $a1, $sp, 72 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp1957: # EH_LABEL # %bb.152: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit368 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_156 # %bb.153: fsub.d $fa0, $fa0, $fs1 @@ -16771,7 +16819,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1959: # EH_LABEL # %bb.154: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit371 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_156 # %bb.155: .Ltmp1960: # EH_LABEL @@ -16782,36 +16830,42 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1961: # EH_LABEL .LBB25_156: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit374 - st.d $s3, $sp, 32 - st.h $s6, $sp, 48 - st.b $s7, $sp, 50 + st.d $s3, $sp, 40 + st.h $s5, $sp, 56 + st.b $s6, $sp, 58 ori $a0, $zero, 3 - st.d $a0, $sp, 40 - st.b $zero, $sp, 51 + st.d $a0, $sp, 48 + st.b $zero, $sp, 59 .Ltmp1963: # EH_LABEL - addi.d $a1, $sp, 32 + addi.d $a1, $sp, 40 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp1964: # EH_LABEL # %bb.157: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit380 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_161 # %bb.158: - fld.d $fa1, $s5, %pc_lo12(.LCPI25_2) fsub.d $fa0, $fa0, $fs1 - fabs.d $fa2, $fs1 - fld.d $fa3, $s4, %pc_lo12(.LCPI25_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs1, $fcc0 + fabs.d $fa1, $fs1 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs1, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp1965: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp1966: # EH_LABEL # %bb.159: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit383 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB25_161 # %bb.160: .Ltmp1967: # EH_LABEL @@ -16822,26 +16876,26 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat jirl $ra, $ra, 0 .Ltmp1968: # EH_LABEL .LBB25_161: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit386 - ld.d $a0, $sp, 32 + ld.d $a0, $sp, 40 beq $a0, $s3, .LBB25_163 # %bb.162: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i387 - ld.d $a1, $sp, 48 + ld.d $a1, $sp, 56 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB25_163: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit389 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB25_165 # %bb.164: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i390 - ld.d $a1, $sp, 80 + ld.d $a1, $sp, 88 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB25_165: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit392 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB25_167 # %bb.166: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i393 - ld.d $a1, $sp, 112 + ld.d $a1, $sp, 120 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -16849,21 +16903,20 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat fcmp.clt.d $fcc0, $fs2, $fs0 bceqz $fcc0, .LBB25_173 # %bb.168: - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 144 beq $a0, $s0, .LBB25_170 # %bb.169: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i408 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 160 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB25_170: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit410 - fld.d $fs5, $sp, 168 # 8-byte Folded Reload - fld.d $fs4, $sp, 176 # 8-byte Folded Reload - fld.d $fs3, $sp, 184 # 8-byte Folded Reload - fld.d $fs2, $sp, 192 # 8-byte Folded Reload - fld.d $fs1, $sp, 200 # 8-byte Folded Reload - fld.d $fs0, $sp, 208 # 8-byte Folded Reload - ld.d $s8, $sp, 216 # 8-byte Folded Reload + fld.d $fs5, $sp, 176 # 8-byte Folded Reload + fld.d $fs4, $sp, 184 # 8-byte Folded Reload + fld.d $fs3, $sp, 192 # 8-byte Folded Reload + fld.d $fs2, $sp, 200 # 8-byte Folded Reload + fld.d $fs1, $sp, 208 # 8-byte Folded Reload + fld.d $fs0, $sp, 216 # 8-byte Folded Reload ld.d $s7, $sp, 224 # 8-byte Folded Reload ld.d $s6, $sp, 232 # 8-byte Folded Reload ld.d $s5, $sp, 240 # 8-byte Folded Reload @@ -16878,14 +16931,14 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat ret .LBB25_171: .Ltmp1976: # EH_LABEL - addi.d $a0, $sp, 128 + addi.d $a0, $sp, 136 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp1977: # EH_LABEL # %bb.172: .LBB25_173: .Ltmp1973: # EH_LABEL - addi.d $a0, $sp, 24 + addi.d $a0, $sp, 32 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp1974: # EH_LABEL @@ -16915,8 +16968,8 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat beqz $a0, .LBB25_90 # %bb.178: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s8, 0 + addi.d $s7, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s7, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -16925,25 +16978,25 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat .LBB25_179: .Ltmp1955: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB25_203 b .LBB25_211 .LBB25_180: .Ltmp1866: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB25_215 b .LBB25_216 .LBB25_181: .Ltmp1948: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB25_203 b .LBB25_211 .LBB25_182: .Ltmp1859: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB25_215 b .LBB25_216 .LBB25_183: @@ -16955,13 +17008,13 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat .LBB25_185: .Ltmp1943: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB25_203 b .LBB25_211 .LBB25_186: .Ltmp1854: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB25_215 b .LBB25_216 .LBB25_187: @@ -16973,13 +17026,13 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat .LBB25_189: .Ltmp1936: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB25_203 b .LBB25_211 .LBB25_190: .Ltmp1847: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB25_215 b .LBB25_216 .LBB25_191: @@ -16993,7 +17046,7 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat .LBB25_194: move $fp, $a0 fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 24 + addi.d $a0, $sp, 32 bcnez $fcc0, .LBB25_204 b .LBB25_217 .LBB25_195: @@ -17011,24 +17064,24 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat b .LBB25_204 .LBB25_200: .Ltmp1969: # EH_LABEL - ld.d $a2, $sp, 32 + ld.d $a2, $sp, 40 move $fp, $a0 bne $a2, $s3, .LBB25_207 # %bb.201: - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 bne $a0, $s2, .LBB25_209 .LBB25_202: - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB25_211 .LBB25_203: fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 24 + addi.d $a0, $sp, 32 bceqz $fcc0, .LBB25_217 .LBB25_204: - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 144 beq $a0, $s0, .LBB25_206 # %bb.205: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i411 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 160 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -17037,60 +17090,60 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat pcaddu18i $ra, %call36(_Unwind_Resume) jirl $ra, $ra, 0 .LBB25_207: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i399 - ld.d $a0, $sp, 48 + ld.d $a0, $sp, 56 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB25_202 b .LBB25_209 .LBB25_208: .Ltmp1962: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB25_202 .LBB25_209: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i402 - ld.d $a1, $sp, 80 + ld.d $a1, $sp, 88 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB25_203 b .LBB25_211 .LBB25_210: .Ltmp1927: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB25_203 .LBB25_211: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i405 - ld.d $a1, $sp, 112 + ld.d $a1, $sp, 120 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 24 + addi.d $a0, $sp, 32 bcnez $fcc0, .LBB25_204 b .LBB25_217 .LBB25_212: .Ltmp1880: # EH_LABEL - ld.d $a2, $sp, 32 + ld.d $a2, $sp, 40 move $fp, $a0 bne $a2, $s3, .LBB25_219 # %bb.213: - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 bne $a0, $s2, .LBB25_221 .LBB25_214: - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB25_216 .LBB25_215: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i253 - ld.d $a1, $sp, 112 + ld.d $a1, $sp, 120 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB25_216: - addi.d $a0, $sp, 128 - ld.d $a1, $sp, 16 + addi.d $a0, $sp, 136 + ld.d $a1, $sp, 24 movgr2cf $fcc0, $a1 bceqz $fcc0, .LBB25_204 .LBB25_217: # %.invoke @@ -17100,31 +17153,31 @@ _Z29CheckIsIterationInvariantRateRK7Results: # @_Z29CheckIsIterationInvariantRat .Ltmp1971: # EH_LABEL # %bb.218: # %.cont .LBB25_219: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i247 - ld.d $a0, $sp, 48 + ld.d $a0, $sp, 56 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB25_214 b .LBB25_221 .LBB25_220: .Ltmp1873: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB25_214 .LBB25_221: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i250 - ld.d $a1, $sp, 80 + ld.d $a1, $sp, 88 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB25_215 b .LBB25_216 .LBB25_222: .Ltmp1838: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB25_215 b .LBB25_216 .LBB25_223: @@ -17455,18 +17508,8 @@ GCC_except_table26: .Lcst_end21: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z18CheckAvgIterationsRK7Results -.LCPI27_0: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI27_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI27_2: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI27_3: - .dword 0x4059000000000000 # double 100 .text - .hidden _Z18CheckAvgIterationsRK7Results + .hidden _Z18CheckAvgIterationsRK7Results # -- Begin function _Z18CheckAvgIterationsRK7Results .globl _Z18CheckAvgIterationsRK7Results .p2align 5 .type _Z18CheckAvgIterationsRK7Results,@function @@ -17476,24 +17519,23 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception22 # %bb.0: # %._crit_edge.i.i - addi.d $sp, $sp, -304 - .cfi_def_cfa_offset 304 - st.d $ra, $sp, 296 # 8-byte Folded Spill - st.d $fp, $sp, 288 # 8-byte Folded Spill - st.d $s0, $sp, 280 # 8-byte Folded Spill - st.d $s1, $sp, 272 # 8-byte Folded Spill - st.d $s2, $sp, 264 # 8-byte Folded Spill - st.d $s3, $sp, 256 # 8-byte Folded Spill - st.d $s4, $sp, 248 # 8-byte Folded Spill - st.d $s5, $sp, 240 # 8-byte Folded Spill - st.d $s6, $sp, 232 # 8-byte Folded Spill - st.d $s7, $sp, 224 # 8-byte Folded Spill - st.d $s8, $sp, 216 # 8-byte Folded Spill - fst.d $fs0, $sp, 208 # 8-byte Folded Spill - fst.d $fs1, $sp, 200 # 8-byte Folded Spill - fst.d $fs2, $sp, 192 # 8-byte Folded Spill - fst.d $fs3, $sp, 184 # 8-byte Folded Spill - fst.d $fs4, $sp, 176 # 8-byte Folded Spill + addi.d $sp, $sp, -288 + .cfi_def_cfa_offset 288 + st.d $ra, $sp, 280 # 8-byte Folded Spill + st.d $fp, $sp, 272 # 8-byte Folded Spill + st.d $s0, $sp, 264 # 8-byte Folded Spill + st.d $s1, $sp, 256 # 8-byte Folded Spill + st.d $s2, $sp, 248 # 8-byte Folded Spill + st.d $s3, $sp, 240 # 8-byte Folded Spill + st.d $s4, $sp, 232 # 8-byte Folded Spill + st.d $s5, $sp, 224 # 8-byte Folded Spill + st.d $s6, $sp, 216 # 8-byte Folded Spill + st.d $s7, $sp, 208 # 8-byte Folded Spill + fst.d $fs0, $sp, 200 # 8-byte Folded Spill + fst.d $fs1, $sp, 192 # 8-byte Folded Spill + fst.d $fs2, $sp, 184 # 8-byte Folded Spill + fst.d $fs3, $sp, 176 # 8-byte Folded Spill + fst.d $fs4, $sp, 168 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -17504,43 +17546,45 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results .cfi_offset 28, -64 .cfi_offset 29, -72 .cfi_offset 30, -80 - .cfi_offset 31, -88 - .cfi_offset 56, -96 - .cfi_offset 57, -104 - .cfi_offset 58, -112 - .cfi_offset 59, -120 - .cfi_offset 60, -128 + .cfi_offset 56, -88 + .cfi_offset 57, -96 + .cfi_offset 58, -104 + .cfi_offset 59, -112 + .cfi_offset 60, -120 move $fp, $a0 pcaddu18i $ra, %call36(_ZNK7Results13NumIterationsEv) jirl $ra, $ra, 0 fmov.d $fs0, $fa0 - addi.d $s0, $sp, 160 - st.d $s0, $sp, 144 + addi.d $s0, $sp, 152 + st.d $s0, $sp, 136 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $a0, $a0, %pc_lo12(.L.str.5) ld.h $s4, $a0, 0 ld.b $s5, $a0, 2 - st.h $s4, $sp, 160 - st.b $s5, $sp, 162 + st.h $s4, $sp, 152 + st.b $s5, $sp, 154 ori $a0, $zero, 3 - st.d $a0, $sp, 152 - st.b $zero, $sp, 163 + st.d $a0, $sp, 144 + st.b $zero, $sp, 155 .Ltmp1985: # EH_LABEL - addi.d $a1, $sp, 144 + addi.d $a1, $sp, 136 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp1986: # EH_LABEL # %bb.1: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit - pcalau12i $a0, %pc_hi20(.LCPI27_0) - fld.d $fs3, $a0, %pc_lo12(.LCPI27_0) frecip.d $fs2, $fs0 fsub.d $fa0, $fa0, $fs2 fabs.d $fs4, $fa0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 fmul.d $fs1, $fs2, $fs3 fcmp.cule.d $fcc0, $fs1, $fs4 movcf2gr $a0, $fcc0 - st.d $a0, $sp, 24 + st.d $a0, $sp, 16 bceqz $fcc0, .LBB27_4 # %bb.2: .Ltmp1988: # EH_LABEL @@ -17550,13 +17594,13 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.30) pcalau12i $a0, %pc_hi20(.L__func__._Z18CheckAvgIterationsRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z18CheckAvgIterationsRK7Results) - addi.d $a0, $sp, 136 + addi.d $a0, $sp, 128 ori $a4, $zero, 504 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp1989: # EH_LABEL # %bb.3: - ld.d $s6, $sp, 136 + ld.d $s6, $sp, 128 b .LBB27_5 .LBB27_4: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) @@ -17737,15 +17781,15 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2023: # EH_LABEL .LBB27_37: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit154 - addi.d $s1, $sp, 120 - st.d $s1, $sp, 104 - st.h $s4, $sp, 120 - st.b $s5, $sp, 122 + addi.d $s1, $sp, 112 + st.d $s1, $sp, 96 + st.h $s4, $sp, 112 + st.b $s5, $sp, 114 ori $a0, $zero, 3 - st.d $a0, $sp, 112 - st.b $zero, $sp, 123 + st.d $a0, $sp, 104 + st.b $zero, $sp, 115 .Ltmp2025: # EH_LABEL - addi.d $a1, $sp, 104 + addi.d $a1, $sp, 96 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -17867,8 +17911,11 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results beqz $a0, .LBB27_66 # %bb.61: .Ltmp2053: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI27_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI27_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp2054: # EH_LABEL @@ -17895,15 +17942,15 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2058: # EH_LABEL .LBB27_66: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit - addi.d $s2, $sp, 88 - st.d $s2, $sp, 72 - st.h $s4, $sp, 88 - st.b $s5, $sp, 90 + addi.d $s2, $sp, 80 + st.d $s2, $sp, 64 + st.h $s4, $sp, 80 + st.b $s5, $sp, 82 ori $a0, $zero, 3 - st.d $a0, $sp, 80 - st.b $zero, $sp, 91 + st.d $a0, $sp, 72 + st.b $zero, $sp, 83 .Ltmp2060: # EH_LABEL - addi.d $a1, $sp, 72 + addi.d $a1, $sp, 64 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -17929,33 +17976,38 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2065: # EH_LABEL .LBB27_71: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit209 - addi.d $s3, $sp, 56 - st.d $s3, $sp, 40 - st.h $s4, $sp, 56 - st.b $s5, $sp, 58 + addi.d $s3, $sp, 48 + st.d $s3, $sp, 32 + st.h $s4, $sp, 48 + st.b $s5, $sp, 50 ori $a0, $zero, 3 - st.d $a0, $sp, 48 - st.b $zero, $sp, 59 + st.d $a0, $sp, 40 + st.b $zero, $sp, 51 .Ltmp2067: # EH_LABEL - addi.d $a1, $sp, 40 + addi.d $a1, $sp, 32 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp2068: # EH_LABEL # %bb.72: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit215 ld.d $a0, $s6, 0 - pcalau12i $s5, %pc_hi20(.LCPI27_2) - pcalau12i $s4, %pc_hi20(.LCPI27_3) + lu12i.w $s4, -487882 beqz $a0, .LBB27_76 # %bb.73: - fld.d $fa1, $s5, %pc_lo12(.LCPI27_2) fsub.d $fa0, $fa0, $fs2 - fabs.d $fa2, $fs2 - fld.d $fa3, $s4, %pc_lo12(.LCPI27_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs2, $fcc0 + fabs.d $fa1, $fs2 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs2, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp2069: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -17972,26 +18024,26 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2072: # EH_LABEL .LBB27_76: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit221 - ld.d $a0, $sp, 40 + ld.d $a0, $sp, 32 beq $a0, $s3, .LBB27_78 # %bb.77: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i - ld.d $a1, $sp, 56 + ld.d $a1, $sp, 48 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB27_78: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB27_80 # %bb.79: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i222 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB27_80: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit224 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB27_82 # %bb.81: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i225 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -17999,26 +18051,26 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results fcmp.clt.d $fcc0, $fs4, $fs1 bceqz $fcc0, .LBB27_171 # %bb.83: - ld.d $a0, $sp, 144 + ld.d $a0, $sp, 136 beq $a0, $s0, .LBB27_85 # %bb.84: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i237 - ld.d $a1, $sp, 160 + ld.d $a1, $sp, 152 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB27_85: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit239 - st.d $s0, $sp, 144 + st.d $s0, $sp, 136 pcalau12i $a0, %pc_hi20(.L.str.6) addi.d $a0, $a0, %pc_lo12(.L.str.6) - ld.h $s6, $a0, 0 - ld.b $s7, $a0, 2 - st.h $s6, $sp, 160 - st.b $s7, $sp, 162 + ld.h $s5, $a0, 0 + ld.b $s6, $a0, 2 + st.h $s5, $sp, 152 + st.b $s6, $sp, 154 ori $a0, $zero, 3 - st.d $a0, $sp, 152 - st.b $zero, $sp, 163 + st.d $a0, $sp, 144 + st.b $zero, $sp, 155 .Ltmp2074: # EH_LABEL - addi.d $a1, $sp, 144 + addi.d $a1, $sp, 136 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -18039,23 +18091,23 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.30) pcalau12i $a0, %pc_hi20(.L__func__._Z18CheckAvgIterationsRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z18CheckAvgIterationsRK7Results) - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 ori $a4, $zero, 505 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp2078: # EH_LABEL # %bb.88: - ld.d $s8, $sp, 32 + ld.d $s7, $sp, 24 b .LBB27_90 .LBB27_89: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s7, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB27_177 .LBB27_90: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit247 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.91: .Ltmp2079: # EH_LABEL @@ -18066,7 +18118,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2080: # EH_LABEL # %bb.92: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit250 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.93: .Ltmp2081: # EH_LABEL @@ -18077,7 +18129,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2082: # EH_LABEL # %bb.94: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit253 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.95: .Ltmp2083: # EH_LABEL @@ -18088,7 +18140,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2084: # EH_LABEL # %bb.96: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit256 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.97: .Ltmp2086: # EH_LABEL @@ -18097,7 +18149,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2087: # EH_LABEL # %bb.98: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit259 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.99: .Ltmp2088: # EH_LABEL @@ -18108,7 +18160,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2089: # EH_LABEL # %bb.100: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit262 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.101: ld.d $a1, $fp, 0 @@ -18118,7 +18170,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2091: # EH_LABEL # %bb.102: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit265 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.103: .Ltmp2092: # EH_LABEL @@ -18129,7 +18181,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2093: # EH_LABEL # %bb.104: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit268 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.105: .Ltmp2094: # EH_LABEL @@ -18140,7 +18192,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2095: # EH_LABEL # %bb.106: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit271 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.107: .Ltmp2096: # EH_LABEL @@ -18151,7 +18203,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2097: # EH_LABEL # %bb.108: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit274 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.109: .Ltmp2099: # EH_LABEL @@ -18160,7 +18212,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2100: # EH_LABEL # %bb.110: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit277 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.111: .Ltmp2101: # EH_LABEL @@ -18171,7 +18223,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2102: # EH_LABEL # %bb.112: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit280 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.113: .Ltmp2103: # EH_LABEL @@ -18182,7 +18234,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2104: # EH_LABEL # %bb.114: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit283 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.115: .Ltmp2105: # EH_LABEL @@ -18193,7 +18245,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2106: # EH_LABEL # %bb.116: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit286 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.117: .Ltmp2107: # EH_LABEL @@ -18204,7 +18256,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2108: # EH_LABEL # %bb.118: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit289 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.119: .Ltmp2109: # EH_LABEL @@ -18215,7 +18267,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2110: # EH_LABEL # %bb.120: # %_ZN9benchmark8internallsIA4_cEERNS0_7LogTypeES4_RKT_.exit292 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_122 # %bb.121: .Ltmp2111: # EH_LABEL @@ -18226,20 +18278,20 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2112: # EH_LABEL .LBB27_122: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit295 - st.d $s1, $sp, 104 - st.h $s6, $sp, 120 - st.b $s7, $sp, 122 + st.d $s1, $sp, 96 + st.h $s5, $sp, 112 + st.b $s6, $sp, 114 ori $a0, $zero, 3 - st.d $a0, $sp, 112 - st.b $zero, $sp, 123 + st.d $a0, $sp, 104 + st.b $zero, $sp, 115 .Ltmp2114: # EH_LABEL - addi.d $a1, $sp, 104 + addi.d $a1, $sp, 96 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp2115: # EH_LABEL # %bb.123: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit301 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_151 # %bb.124: .Ltmp2116: # EH_LABEL @@ -18247,7 +18299,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2117: # EH_LABEL # %bb.125: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit304 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_151 # %bb.126: .Ltmp2118: # EH_LABEL @@ -18258,7 +18310,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2119: # EH_LABEL # %bb.127: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit307 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_151 # %bb.128: .Ltmp2121: # EH_LABEL @@ -18267,7 +18319,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2122: # EH_LABEL # %bb.129: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit310 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_151 # %bb.130: .Ltmp2123: # EH_LABEL @@ -18278,7 +18330,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2124: # EH_LABEL # %bb.131: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit313 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_151 # %bb.132: .Ltmp2125: # EH_LABEL @@ -18289,7 +18341,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2126: # EH_LABEL # %bb.133: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit316 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_151 # %bb.134: .Ltmp2127: # EH_LABEL @@ -18300,7 +18352,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2128: # EH_LABEL # %bb.135: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit319 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_151 # %bb.136: .Ltmp2130: # EH_LABEL @@ -18309,7 +18361,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2131: # EH_LABEL # %bb.137: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit322 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_151 # %bb.138: .Ltmp2132: # EH_LABEL @@ -18320,7 +18372,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2133: # EH_LABEL # %bb.139: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit325 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_151 # %bb.140: .Ltmp2134: # EH_LABEL @@ -18331,7 +18383,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2135: # EH_LABEL # %bb.141: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit328 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_151 # %bb.142: .Ltmp2137: # EH_LABEL @@ -18340,7 +18392,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2138: # EH_LABEL # %bb.143: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit331 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_151 # %bb.144: .Ltmp2139: # EH_LABEL @@ -18351,17 +18403,20 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2140: # EH_LABEL # %bb.145: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit334 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_151 # %bb.146: .Ltmp2142: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI27_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI27_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp2143: # EH_LABEL # %bb.147: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit337 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_151 # %bb.148: .Ltmp2144: # EH_LABEL @@ -18372,7 +18427,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2145: # EH_LABEL # %bb.149: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit340 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_151 # %bb.150: .Ltmp2146: # EH_LABEL @@ -18383,20 +18438,20 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2147: # EH_LABEL .LBB27_151: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit343 - st.d $s2, $sp, 72 - st.h $s6, $sp, 88 - st.b $s7, $sp, 90 + st.d $s2, $sp, 64 + st.h $s5, $sp, 80 + st.b $s6, $sp, 82 ori $a0, $zero, 3 - st.d $a0, $sp, 80 - st.b $zero, $sp, 91 + st.d $a0, $sp, 72 + st.b $zero, $sp, 83 .Ltmp2149: # EH_LABEL - addi.d $a1, $sp, 72 + addi.d $a1, $sp, 64 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp2150: # EH_LABEL # %bb.152: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit349 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_156 # %bb.153: fsub.d $fa0, $fa0, $fs1 @@ -18405,7 +18460,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2152: # EH_LABEL # %bb.154: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit352 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_156 # %bb.155: .Ltmp2153: # EH_LABEL @@ -18416,36 +18471,42 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2154: # EH_LABEL .LBB27_156: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit355 - st.d $s3, $sp, 40 - st.h $s6, $sp, 56 - st.b $s7, $sp, 58 + st.d $s3, $sp, 32 + st.h $s5, $sp, 48 + st.b $s6, $sp, 50 ori $a0, $zero, 3 - st.d $a0, $sp, 48 - st.b $zero, $sp, 59 + st.d $a0, $sp, 40 + st.b $zero, $sp, 51 .Ltmp2156: # EH_LABEL - addi.d $a1, $sp, 40 + addi.d $a1, $sp, 32 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp2157: # EH_LABEL # %bb.157: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit361 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_161 # %bb.158: - fld.d $fa1, $s5, %pc_lo12(.LCPI27_2) fsub.d $fa0, $fa0, $fs1 - fabs.d $fa2, $fs1 - fld.d $fa3, $s4, %pc_lo12(.LCPI27_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs1, $fcc0 + fabs.d $fa1, $fs1 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs1, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp2158: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp2159: # EH_LABEL # %bb.159: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit364 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB27_161 # %bb.160: .Ltmp2160: # EH_LABEL @@ -18456,26 +18517,26 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results jirl $ra, $ra, 0 .Ltmp2161: # EH_LABEL .LBB27_161: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit367 - ld.d $a0, $sp, 40 + ld.d $a0, $sp, 32 beq $a0, $s3, .LBB27_163 # %bb.162: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i368 - ld.d $a1, $sp, 56 + ld.d $a1, $sp, 48 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB27_163: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit370 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB27_165 # %bb.164: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i371 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB27_165: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit373 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB27_167 # %bb.166: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i374 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -18483,42 +18544,41 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results fcmp.clt.d $fcc0, $fs2, $fs0 bceqz $fcc0, .LBB27_173 # %bb.168: - ld.d $a0, $sp, 144 + ld.d $a0, $sp, 136 beq $a0, $s0, .LBB27_170 # %bb.169: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i389 - ld.d $a1, $sp, 160 + ld.d $a1, $sp, 152 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB27_170: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit391 - fld.d $fs4, $sp, 176 # 8-byte Folded Reload - fld.d $fs3, $sp, 184 # 8-byte Folded Reload - fld.d $fs2, $sp, 192 # 8-byte Folded Reload - fld.d $fs1, $sp, 200 # 8-byte Folded Reload - fld.d $fs0, $sp, 208 # 8-byte Folded Reload - ld.d $s8, $sp, 216 # 8-byte Folded Reload - ld.d $s7, $sp, 224 # 8-byte Folded Reload - ld.d $s6, $sp, 232 # 8-byte Folded Reload - ld.d $s5, $sp, 240 # 8-byte Folded Reload - ld.d $s4, $sp, 248 # 8-byte Folded Reload - ld.d $s3, $sp, 256 # 8-byte Folded Reload - ld.d $s2, $sp, 264 # 8-byte Folded Reload - ld.d $s1, $sp, 272 # 8-byte Folded Reload - ld.d $s0, $sp, 280 # 8-byte Folded Reload - ld.d $fp, $sp, 288 # 8-byte Folded Reload - ld.d $ra, $sp, 296 # 8-byte Folded Reload - addi.d $sp, $sp, 304 + fld.d $fs4, $sp, 168 # 8-byte Folded Reload + fld.d $fs3, $sp, 176 # 8-byte Folded Reload + fld.d $fs2, $sp, 184 # 8-byte Folded Reload + fld.d $fs1, $sp, 192 # 8-byte Folded Reload + fld.d $fs0, $sp, 200 # 8-byte Folded Reload + ld.d $s7, $sp, 208 # 8-byte Folded Reload + ld.d $s6, $sp, 216 # 8-byte Folded Reload + ld.d $s5, $sp, 224 # 8-byte Folded Reload + ld.d $s4, $sp, 232 # 8-byte Folded Reload + ld.d $s3, $sp, 240 # 8-byte Folded Reload + ld.d $s2, $sp, 248 # 8-byte Folded Reload + ld.d $s1, $sp, 256 # 8-byte Folded Reload + ld.d $s0, $sp, 264 # 8-byte Folded Reload + ld.d $fp, $sp, 272 # 8-byte Folded Reload + ld.d $ra, $sp, 280 # 8-byte Folded Reload + addi.d $sp, $sp, 288 ret .LBB27_171: .Ltmp2169: # EH_LABEL - addi.d $a0, $sp, 136 + addi.d $a0, $sp, 128 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp2170: # EH_LABEL # %bb.172: .LBB27_173: .Ltmp2166: # EH_LABEL - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp2167: # EH_LABEL @@ -18548,8 +18608,8 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results beqz $a0, .LBB27_90 # %bb.178: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s8, 0 + addi.d $s7, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s7, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -18558,25 +18618,25 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results .LBB27_179: .Ltmp2148: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB27_203 b .LBB27_211 .LBB27_180: .Ltmp2059: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB27_215 b .LBB27_216 .LBB27_181: .Ltmp2141: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB27_203 b .LBB27_211 .LBB27_182: .Ltmp2052: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB27_215 b .LBB27_216 .LBB27_183: @@ -18588,13 +18648,13 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results .LBB27_185: .Ltmp2136: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB27_203 b .LBB27_211 .LBB27_186: .Ltmp2047: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB27_215 b .LBB27_216 .LBB27_187: @@ -18606,13 +18666,13 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results .LBB27_189: .Ltmp2129: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB27_203 b .LBB27_211 .LBB27_190: .Ltmp2040: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB27_215 b .LBB27_216 .LBB27_191: @@ -18626,7 +18686,7 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results .LBB27_194: move $fp, $a0 fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 bcnez $fcc0, .LBB27_204 b .LBB27_217 .LBB27_195: @@ -18644,24 +18704,24 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results b .LBB27_204 .LBB27_200: .Ltmp2162: # EH_LABEL - ld.d $a2, $sp, 40 + ld.d $a2, $sp, 32 move $fp, $a0 bne $a2, $s3, .LBB27_207 # %bb.201: - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 bne $a0, $s2, .LBB27_209 .LBB27_202: - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB27_211 .LBB27_203: fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 bceqz $fcc0, .LBB27_217 .LBB27_204: - ld.d $a0, $sp, 144 + ld.d $a0, $sp, 136 beq $a0, $s0, .LBB27_206 # %bb.205: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i392 - ld.d $a1, $sp, 160 + ld.d $a1, $sp, 152 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -18670,60 +18730,60 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results pcaddu18i $ra, %call36(_Unwind_Resume) jirl $ra, $ra, 0 .LBB27_207: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i380 - ld.d $a0, $sp, 56 + ld.d $a0, $sp, 48 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB27_202 b .LBB27_209 .LBB27_208: .Ltmp2155: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB27_202 .LBB27_209: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i383 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB27_203 b .LBB27_211 .LBB27_210: .Ltmp2120: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB27_203 .LBB27_211: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i386 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 24 bcnez $fcc0, .LBB27_204 b .LBB27_217 .LBB27_212: .Ltmp2073: # EH_LABEL - ld.d $a2, $sp, 40 + ld.d $a2, $sp, 32 move $fp, $a0 bne $a2, $s3, .LBB27_219 # %bb.213: - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 bne $a0, $s2, .LBB27_221 .LBB27_214: - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 beq $a0, $s1, .LBB27_216 .LBB27_215: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i234 - ld.d $a1, $sp, 120 + ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB27_216: - addi.d $a0, $sp, 136 - ld.d $a1, $sp, 24 + addi.d $a0, $sp, 128 + ld.d $a1, $sp, 16 movgr2cf $fcc0, $a1 bceqz $fcc0, .LBB27_204 .LBB27_217: # %.invoke @@ -18733,31 +18793,31 @@ _Z18CheckAvgIterationsRK7Results: # @_Z18CheckAvgIterationsRK7Results .Ltmp2164: # EH_LABEL # %bb.218: # %.cont .LBB27_219: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i228 - ld.d $a0, $sp, 56 + ld.d $a0, $sp, 48 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB27_214 b .LBB27_221 .LBB27_220: .Ltmp2066: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 72 + ld.d $a0, $sp, 64 beq $a0, $s2, .LBB27_214 .LBB27_221: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i231 - ld.d $a1, $sp, 88 + ld.d $a1, $sp, 80 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB27_215 b .LBB27_216 .LBB27_222: .Ltmp2031: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 96 bne $a0, $s1, .LBB27_215 b .LBB27_216 .LBB27_223: @@ -19109,18 +19169,8 @@ GCC_except_table28: .Lcst_end23: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z22CheckAvgIterationsRateRK7Results -.LCPI29_0: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI29_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI29_2: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI29_3: - .dword 0x4059000000000000 # double 100 .text - .hidden _Z22CheckAvgIterationsRateRK7Results + .hidden _Z22CheckAvgIterationsRateRK7Results # -- Begin function _Z22CheckAvgIterationsRateRK7Results .globl _Z22CheckAvgIterationsRateRK7Results .p2align 5 .type _Z22CheckAvgIterationsRateRK7Results,@function @@ -19142,13 +19192,12 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results st.d $s5, $sp, 240 # 8-byte Folded Spill st.d $s6, $sp, 232 # 8-byte Folded Spill st.d $s7, $sp, 224 # 8-byte Folded Spill - st.d $s8, $sp, 216 # 8-byte Folded Spill - fst.d $fs0, $sp, 208 # 8-byte Folded Spill - fst.d $fs1, $sp, 200 # 8-byte Folded Spill - fst.d $fs2, $sp, 192 # 8-byte Folded Spill - fst.d $fs3, $sp, 184 # 8-byte Folded Spill - fst.d $fs4, $sp, 176 # 8-byte Folded Spill - fst.d $fs5, $sp, 168 # 8-byte Folded Spill + fst.d $fs0, $sp, 216 # 8-byte Folded Spill + fst.d $fs1, $sp, 208 # 8-byte Folded Spill + fst.d $fs2, $sp, 200 # 8-byte Folded Spill + fst.d $fs3, $sp, 192 # 8-byte Folded Spill + fst.d $fs4, $sp, 184 # 8-byte Folded Spill + fst.d $fs5, $sp, 176 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -19159,13 +19208,12 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results .cfi_offset 28, -64 .cfi_offset 29, -72 .cfi_offset 30, -80 - .cfi_offset 31, -88 - .cfi_offset 56, -96 - .cfi_offset 57, -104 - .cfi_offset 58, -112 - .cfi_offset 59, -120 - .cfi_offset 60, -128 - .cfi_offset 61, -136 + .cfi_offset 56, -88 + .cfi_offset 57, -96 + .cfi_offset 58, -104 + .cfi_offset 59, -112 + .cfi_offset 60, -120 + .cfi_offset 61, -128 move $fp, $a0 pcaddu18i $ra, %call36(_ZNK7Results13NumIterationsEv) jirl $ra, $ra, 0 @@ -19179,19 +19227,19 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results pcaddu18i $ra, %call36(_ZNK7Results7GetTimeENS_13BenchmarkTimeE) jirl $ra, $ra, 0 fmov.d $fs2, $fa0 - addi.d $s0, $sp, 152 - st.d $s0, $sp, 136 + addi.d $s0, $sp, 160 + st.d $s0, $sp, 144 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $a0, $a0, %pc_lo12(.L.str.5) ld.h $s4, $a0, 0 ld.b $s5, $a0, 2 - st.h $s4, $sp, 152 - st.b $s5, $sp, 154 + st.h $s4, $sp, 160 + st.b $s5, $sp, 162 ori $a0, $zero, 3 - st.d $a0, $sp, 144 - st.b $zero, $sp, 155 + st.d $a0, $sp, 152 + st.b $zero, $sp, 163 .Ltmp2178: # EH_LABEL - addi.d $a1, $sp, 136 + addi.d $a1, $sp, 144 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -19199,15 +19247,18 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results # %bb.1: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit fmul.d $fs3, $fs1, $fs2 frecip.d $fa1, $fs0 - pcalau12i $a0, %pc_hi20(.LCPI29_0) - fld.d $fs4, $a0, %pc_lo12(.LCPI29_0) fdiv.d $fs2, $fa1, $fs3 fsub.d $fa0, $fa0, $fs2 fabs.d $fs5, $fa0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs4, $a0 fmul.d $fs1, $fs2, $fs4 fcmp.cule.d $fcc0, $fs1, $fs5 movcf2gr $a0, $fcc0 - st.d $a0, $sp, 16 + st.d $a0, $sp, 24 bceqz $fcc0, .LBB29_4 # %bb.2: .Ltmp2181: # EH_LABEL @@ -19217,13 +19268,13 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.30) pcalau12i $a0, %pc_hi20(.L__func__._Z22CheckAvgIterationsRateRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z22CheckAvgIterationsRateRK7Results) - addi.d $a0, $sp, 128 + addi.d $a0, $sp, 136 ori $a4, $zero, 551 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp2182: # EH_LABEL # %bb.3: - ld.d $s6, $sp, 128 + ld.d $s6, $sp, 136 b .LBB29_5 .LBB29_4: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) @@ -19404,15 +19455,15 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2216: # EH_LABEL .LBB29_37: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit173 - addi.d $s1, $sp, 112 - st.d $s1, $sp, 96 - st.h $s4, $sp, 112 - st.b $s5, $sp, 114 + addi.d $s1, $sp, 120 + st.d $s1, $sp, 104 + st.h $s4, $sp, 120 + st.b $s5, $sp, 122 ori $a0, $zero, 3 - st.d $a0, $sp, 104 - st.b $zero, $sp, 115 + st.d $a0, $sp, 112 + st.b $zero, $sp, 123 .Ltmp2218: # EH_LABEL - addi.d $a1, $sp, 96 + addi.d $a1, $sp, 104 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -19534,8 +19585,11 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results beqz $a0, .LBB29_66 # %bb.61: .Ltmp2246: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI29_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI29_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp2247: # EH_LABEL @@ -19562,15 +19616,15 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2251: # EH_LABEL .LBB29_66: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit - addi.d $s2, $sp, 80 - st.d $s2, $sp, 64 - st.h $s4, $sp, 80 - st.b $s5, $sp, 82 + addi.d $s2, $sp, 88 + st.d $s2, $sp, 72 + st.h $s4, $sp, 88 + st.b $s5, $sp, 90 ori $a0, $zero, 3 - st.d $a0, $sp, 72 - st.b $zero, $sp, 83 + st.d $a0, $sp, 80 + st.b $zero, $sp, 91 .Ltmp2253: # EH_LABEL - addi.d $a1, $sp, 64 + addi.d $a1, $sp, 72 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -19596,33 +19650,38 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2258: # EH_LABEL .LBB29_71: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit228 - addi.d $s3, $sp, 48 - st.d $s3, $sp, 32 - st.h $s4, $sp, 48 - st.b $s5, $sp, 50 + addi.d $s3, $sp, 56 + st.d $s3, $sp, 40 + st.h $s4, $sp, 56 + st.b $s5, $sp, 58 ori $a0, $zero, 3 - st.d $a0, $sp, 40 - st.b $zero, $sp, 51 + st.d $a0, $sp, 48 + st.b $zero, $sp, 59 .Ltmp2260: # EH_LABEL - addi.d $a1, $sp, 32 + addi.d $a1, $sp, 40 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp2261: # EH_LABEL # %bb.72: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit234 ld.d $a0, $s6, 0 - pcalau12i $s5, %pc_hi20(.LCPI29_2) - pcalau12i $s4, %pc_hi20(.LCPI29_3) + lu12i.w $s4, -487882 beqz $a0, .LBB29_76 # %bb.73: - fld.d $fa1, $s5, %pc_lo12(.LCPI29_2) fsub.d $fa0, $fa0, $fs2 - fabs.d $fa2, $fs2 - fld.d $fa3, $s4, %pc_lo12(.LCPI29_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs2, $fcc0 + fabs.d $fa1, $fs2 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs2, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp2262: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -19639,26 +19698,26 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2265: # EH_LABEL .LBB29_76: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit240 - ld.d $a0, $sp, 32 + ld.d $a0, $sp, 40 beq $a0, $s3, .LBB29_78 # %bb.77: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i - ld.d $a1, $sp, 48 + ld.d $a1, $sp, 56 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB29_78: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB29_80 # %bb.79: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i241 - ld.d $a1, $sp, 80 + ld.d $a1, $sp, 88 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB29_80: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit243 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB29_82 # %bb.81: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i244 - ld.d $a1, $sp, 112 + ld.d $a1, $sp, 120 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -19666,26 +19725,26 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results fcmp.clt.d $fcc0, $fs5, $fs1 bceqz $fcc0, .LBB29_171 # %bb.83: - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 144 beq $a0, $s0, .LBB29_85 # %bb.84: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i256 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 160 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB29_85: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit258 - st.d $s0, $sp, 136 + st.d $s0, $sp, 144 pcalau12i $a0, %pc_hi20(.L.str.6) addi.d $a0, $a0, %pc_lo12(.L.str.6) - ld.h $s6, $a0, 0 - ld.b $s7, $a0, 2 - st.h $s6, $sp, 152 - st.b $s7, $sp, 154 + ld.h $s5, $a0, 0 + ld.b $s6, $a0, 2 + st.h $s5, $sp, 160 + st.b $s6, $sp, 162 ori $a0, $zero, 3 - st.d $a0, $sp, 144 - st.b $zero, $sp, 155 + st.d $a0, $sp, 152 + st.b $zero, $sp, 163 .Ltmp2267: # EH_LABEL - addi.d $a1, $sp, 136 + addi.d $a1, $sp, 144 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 @@ -19707,23 +19766,23 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results addi.d $a2, $a0, %pc_lo12(.L.str.30) pcalau12i $a0, %pc_hi20(.L__func__._Z22CheckAvgIterationsRateRK7Results) addi.d $a3, $a0, %pc_lo12(.L__func__._Z22CheckAvgIterationsRateRK7Results) - addi.d $a0, $sp, 24 + addi.d $a0, $sp, 32 ori $a4, $zero, 552 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerC2EPKcS3_S3_i) jirl $ra, $ra, 0 .Ltmp2271: # EH_LABEL # %bb.88: - ld.d $s8, $sp, 24 + ld.d $s7, $sp, 32 b .LBB29_90 .LBB29_89: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s7, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB29_177 .LBB29_90: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit266 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.91: .Ltmp2272: # EH_LABEL @@ -19734,7 +19793,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2273: # EH_LABEL # %bb.92: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit269 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.93: .Ltmp2274: # EH_LABEL @@ -19745,7 +19804,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2275: # EH_LABEL # %bb.94: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit272 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.95: .Ltmp2276: # EH_LABEL @@ -19756,7 +19815,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2277: # EH_LABEL # %bb.96: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit275 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.97: .Ltmp2279: # EH_LABEL @@ -19765,7 +19824,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2280: # EH_LABEL # %bb.98: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit278 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.99: .Ltmp2281: # EH_LABEL @@ -19776,7 +19835,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2282: # EH_LABEL # %bb.100: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit281 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.101: ld.d $a1, $fp, 0 @@ -19786,7 +19845,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2284: # EH_LABEL # %bb.102: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit284 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.103: .Ltmp2285: # EH_LABEL @@ -19797,7 +19856,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2286: # EH_LABEL # %bb.104: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit287 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.105: .Ltmp2287: # EH_LABEL @@ -19808,7 +19867,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2288: # EH_LABEL # %bb.106: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit290 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.107: .Ltmp2289: # EH_LABEL @@ -19819,7 +19878,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2290: # EH_LABEL # %bb.108: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit293 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.109: .Ltmp2292: # EH_LABEL @@ -19828,7 +19887,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2293: # EH_LABEL # %bb.110: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit296 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.111: .Ltmp2294: # EH_LABEL @@ -19839,7 +19898,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2295: # EH_LABEL # %bb.112: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit299 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.113: .Ltmp2296: # EH_LABEL @@ -19850,7 +19909,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2297: # EH_LABEL # %bb.114: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit302 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.115: .Ltmp2298: # EH_LABEL @@ -19861,7 +19920,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2299: # EH_LABEL # %bb.116: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit305 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.117: .Ltmp2300: # EH_LABEL @@ -19872,7 +19931,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2301: # EH_LABEL # %bb.118: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit308 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.119: .Ltmp2302: # EH_LABEL @@ -19883,7 +19942,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2303: # EH_LABEL # %bb.120: # %_ZN9benchmark8internallsIA4_cEERNS0_7LogTypeES4_RKT_.exit311 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_122 # %bb.121: .Ltmp2304: # EH_LABEL @@ -19894,20 +19953,20 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2305: # EH_LABEL .LBB29_122: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit314 - st.d $s1, $sp, 96 - st.h $s6, $sp, 112 - st.b $s7, $sp, 114 + st.d $s1, $sp, 104 + st.h $s5, $sp, 120 + st.b $s6, $sp, 122 ori $a0, $zero, 3 - st.d $a0, $sp, 104 - st.b $zero, $sp, 115 + st.d $a0, $sp, 112 + st.b $zero, $sp, 123 .Ltmp2307: # EH_LABEL - addi.d $a1, $sp, 96 + addi.d $a1, $sp, 104 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp2308: # EH_LABEL # %bb.123: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit320 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_151 # %bb.124: .Ltmp2309: # EH_LABEL @@ -19915,7 +19974,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2310: # EH_LABEL # %bb.125: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit323 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_151 # %bb.126: .Ltmp2311: # EH_LABEL @@ -19926,7 +19985,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2312: # EH_LABEL # %bb.127: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit326 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_151 # %bb.128: .Ltmp2314: # EH_LABEL @@ -19935,7 +19994,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2315: # EH_LABEL # %bb.129: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit329 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_151 # %bb.130: .Ltmp2316: # EH_LABEL @@ -19946,7 +20005,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2317: # EH_LABEL # %bb.131: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit332 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_151 # %bb.132: .Ltmp2318: # EH_LABEL @@ -19957,7 +20016,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2319: # EH_LABEL # %bb.133: # %_ZN9benchmark8internallsIA141_cEERNS0_7LogTypeES4_RKT_.exit335 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_151 # %bb.134: .Ltmp2320: # EH_LABEL @@ -19968,7 +20027,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2321: # EH_LABEL # %bb.135: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit338 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_151 # %bb.136: .Ltmp2323: # EH_LABEL @@ -19977,7 +20036,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2324: # EH_LABEL # %bb.137: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit341 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_151 # %bb.138: .Ltmp2325: # EH_LABEL @@ -19988,7 +20047,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2326: # EH_LABEL # %bb.139: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit344 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_151 # %bb.140: .Ltmp2327: # EH_LABEL @@ -19999,7 +20058,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2328: # EH_LABEL # %bb.141: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit347 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_151 # %bb.142: .Ltmp2330: # EH_LABEL @@ -20008,7 +20067,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2331: # EH_LABEL # %bb.143: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit350 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_151 # %bb.144: .Ltmp2332: # EH_LABEL @@ -20019,17 +20078,20 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2333: # EH_LABEL # %bb.145: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit353 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_151 # %bb.146: .Ltmp2335: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI29_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI29_1) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1019 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp2336: # EH_LABEL # %bb.147: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit356 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_151 # %bb.148: .Ltmp2337: # EH_LABEL @@ -20040,7 +20102,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2338: # EH_LABEL # %bb.149: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit359 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_151 # %bb.150: .Ltmp2339: # EH_LABEL @@ -20051,20 +20113,20 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2340: # EH_LABEL .LBB29_151: # %_ZN9benchmark8internallsIA15_cEERNS0_7LogTypeES4_RKT_.exit362 - st.d $s2, $sp, 64 - st.h $s6, $sp, 80 - st.b $s7, $sp, 82 + st.d $s2, $sp, 72 + st.h $s5, $sp, 88 + st.b $s6, $sp, 90 ori $a0, $zero, 3 - st.d $a0, $sp, 72 - st.b $zero, $sp, 83 + st.d $a0, $sp, 80 + st.b $zero, $sp, 91 .Ltmp2342: # EH_LABEL - addi.d $a1, $sp, 64 + addi.d $a1, $sp, 72 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp2343: # EH_LABEL # %bb.152: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit368 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_156 # %bb.153: fsub.d $fa0, $fa0, $fs1 @@ -20073,7 +20135,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2345: # EH_LABEL # %bb.154: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit371 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_156 # %bb.155: .Ltmp2346: # EH_LABEL @@ -20084,36 +20146,42 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2347: # EH_LABEL .LBB29_156: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit374 - st.d $s3, $sp, 32 - st.h $s6, $sp, 48 - st.b $s7, $sp, 50 + st.d $s3, $sp, 40 + st.h $s5, $sp, 56 + st.b $s6, $sp, 58 ori $a0, $zero, 3 - st.d $a0, $sp, 40 - st.b $zero, $sp, 51 + st.d $a0, $sp, 48 + st.b $zero, $sp, 59 .Ltmp2349: # EH_LABEL - addi.d $a1, $sp, 32 + addi.d $a1, $sp, 40 move $a0, $fp pcaddu18i $ra, %call36(_ZNK7Results5GetAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE) jirl $ra, $ra, 0 .Ltmp2350: # EH_LABEL # %bb.157: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit380 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_161 # %bb.158: - fld.d $fa1, $s5, %pc_lo12(.LCPI29_2) fsub.d $fa0, $fa0, $fs1 - fabs.d $fa2, $fs1 - fld.d $fa3, $s4, %pc_lo12(.LCPI29_3) - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fs1, $fcc0 + fabs.d $fa1, $fs1 + ori $a1, $s4, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fa2, $fs1, $fcc0 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp2351: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp2352: # EH_LABEL # %bb.159: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit383 - ld.d $a0, $s8, 0 + ld.d $a0, $s7, 0 beqz $a0, .LBB29_161 # %bb.160: .Ltmp2353: # EH_LABEL @@ -20124,26 +20192,26 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results jirl $ra, $ra, 0 .Ltmp2354: # EH_LABEL .LBB29_161: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit386 - ld.d $a0, $sp, 32 + ld.d $a0, $sp, 40 beq $a0, $s3, .LBB29_163 # %bb.162: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i387 - ld.d $a1, $sp, 48 + ld.d $a1, $sp, 56 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB29_163: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit389 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB29_165 # %bb.164: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i390 - ld.d $a1, $sp, 80 + ld.d $a1, $sp, 88 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB29_165: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit392 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB29_167 # %bb.166: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i393 - ld.d $a1, $sp, 112 + ld.d $a1, $sp, 120 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -20151,21 +20219,20 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results fcmp.clt.d $fcc0, $fs2, $fs0 bceqz $fcc0, .LBB29_173 # %bb.168: - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 144 beq $a0, $s0, .LBB29_170 # %bb.169: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i408 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 160 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB29_170: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit410 - fld.d $fs5, $sp, 168 # 8-byte Folded Reload - fld.d $fs4, $sp, 176 # 8-byte Folded Reload - fld.d $fs3, $sp, 184 # 8-byte Folded Reload - fld.d $fs2, $sp, 192 # 8-byte Folded Reload - fld.d $fs1, $sp, 200 # 8-byte Folded Reload - fld.d $fs0, $sp, 208 # 8-byte Folded Reload - ld.d $s8, $sp, 216 # 8-byte Folded Reload + fld.d $fs5, $sp, 176 # 8-byte Folded Reload + fld.d $fs4, $sp, 184 # 8-byte Folded Reload + fld.d $fs3, $sp, 192 # 8-byte Folded Reload + fld.d $fs2, $sp, 200 # 8-byte Folded Reload + fld.d $fs1, $sp, 208 # 8-byte Folded Reload + fld.d $fs0, $sp, 216 # 8-byte Folded Reload ld.d $s7, $sp, 224 # 8-byte Folded Reload ld.d $s6, $sp, 232 # 8-byte Folded Reload ld.d $s5, $sp, 240 # 8-byte Folded Reload @@ -20180,14 +20247,14 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results ret .LBB29_171: .Ltmp2362: # EH_LABEL - addi.d $a0, $sp, 128 + addi.d $a0, $sp, 136 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp2363: # EH_LABEL # %bb.172: .LBB29_173: .Ltmp2359: # EH_LABEL - addi.d $a0, $sp, 24 + addi.d $a0, $sp, 32 pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp2360: # EH_LABEL @@ -20217,8 +20284,8 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results beqz $a0, .LBB29_90 # %bb.178: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s8, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s8, 0 + addi.d $s7, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s7, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -20227,25 +20294,25 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results .LBB29_179: .Ltmp2341: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB29_203 b .LBB29_211 .LBB29_180: .Ltmp2252: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB29_215 b .LBB29_216 .LBB29_181: .Ltmp2334: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB29_203 b .LBB29_211 .LBB29_182: .Ltmp2245: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB29_215 b .LBB29_216 .LBB29_183: @@ -20257,13 +20324,13 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results .LBB29_185: .Ltmp2329: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB29_203 b .LBB29_211 .LBB29_186: .Ltmp2240: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB29_215 b .LBB29_216 .LBB29_187: @@ -20275,13 +20342,13 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results .LBB29_189: .Ltmp2322: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB29_203 b .LBB29_211 .LBB29_190: .Ltmp2233: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB29_215 b .LBB29_216 .LBB29_191: @@ -20295,7 +20362,7 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results .LBB29_194: move $fp, $a0 fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 24 + addi.d $a0, $sp, 32 bcnez $fcc0, .LBB29_204 b .LBB29_217 .LBB29_195: @@ -20313,24 +20380,24 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results b .LBB29_204 .LBB29_200: .Ltmp2355: # EH_LABEL - ld.d $a2, $sp, 32 + ld.d $a2, $sp, 40 move $fp, $a0 bne $a2, $s3, .LBB29_207 # %bb.201: - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 bne $a0, $s2, .LBB29_209 .LBB29_202: - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB29_211 .LBB29_203: fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 24 + addi.d $a0, $sp, 32 bceqz $fcc0, .LBB29_217 .LBB29_204: - ld.d $a0, $sp, 136 + ld.d $a0, $sp, 144 beq $a0, $s0, .LBB29_206 # %bb.205: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i411 - ld.d $a1, $sp, 152 + ld.d $a1, $sp, 160 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -20339,60 +20406,60 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results pcaddu18i $ra, %call36(_Unwind_Resume) jirl $ra, $ra, 0 .LBB29_207: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i399 - ld.d $a0, $sp, 48 + ld.d $a0, $sp, 56 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB29_202 b .LBB29_209 .LBB29_208: .Ltmp2348: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB29_202 .LBB29_209: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i402 - ld.d $a1, $sp, 80 + ld.d $a1, $sp, 88 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB29_203 b .LBB29_211 .LBB29_210: .Ltmp2313: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB29_203 .LBB29_211: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i405 - ld.d $a1, $sp, 112 + ld.d $a1, $sp, 120 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 fcmp.clt.d $fcc0, $fs2, $fs0 - addi.d $a0, $sp, 24 + addi.d $a0, $sp, 32 bcnez $fcc0, .LBB29_204 b .LBB29_217 .LBB29_212: .Ltmp2266: # EH_LABEL - ld.d $a2, $sp, 32 + ld.d $a2, $sp, 40 move $fp, $a0 bne $a2, $s3, .LBB29_219 # %bb.213: - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 bne $a0, $s2, .LBB29_221 .LBB29_214: - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 beq $a0, $s1, .LBB29_216 .LBB29_215: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i253 - ld.d $a1, $sp, 112 + ld.d $a1, $sp, 120 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB29_216: - addi.d $a0, $sp, 128 - ld.d $a1, $sp, 16 + addi.d $a0, $sp, 136 + ld.d $a1, $sp, 24 movgr2cf $fcc0, $a1 bceqz $fcc0, .LBB29_204 .LBB29_217: # %.invoke @@ -20402,31 +20469,31 @@ _Z22CheckAvgIterationsRateRK7Results: # @_Z22CheckAvgIterationsRateRK7Results .Ltmp2357: # EH_LABEL # %bb.218: # %.cont .LBB29_219: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i247 - ld.d $a0, $sp, 48 + ld.d $a0, $sp, 56 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB29_214 b .LBB29_221 .LBB29_220: .Ltmp2259: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 72 beq $a0, $s2, .LBB29_214 .LBB29_221: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i250 - ld.d $a1, $sp, 80 + ld.d $a1, $sp, 88 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB29_215 b .LBB29_216 .LBB29_222: .Ltmp2224: # EH_LABEL move $fp, $a0 - ld.d $a0, $sp, 96 + ld.d $a0, $sp, 104 bne $a0, $s1, .LBB29_215 b .LBB29_216 .LBB29_223: diff --git a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/user_counters_thousands_test.dir/user_counters_thousands_test.s b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/user_counters_thousands_test.dir/user_counters_thousands_test.s index 913e2553..6465df8c 100644 --- a/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/user_counters_thousands_test.dir/user_counters_thousands_test.s +++ b/results/MicroBenchmarks/libs/benchmark/test/CMakeFiles/user_counters_thousands_test.dir/user_counters_thousands_test.s @@ -551,24 +551,8 @@ _ZN8TestCaseD2Ev: # @_ZN8TestCaseD2Ev .size _ZN8TestCaseD2Ev, .Lfunc_end1-_ZN8TestCaseD2Ev .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14CheckThousandsRK7Results -.LCPI2_0: - .dword 0xc12e848000000000 # double -1.0E+6 -.LCPI2_1: - .dword 0x4059000000000000 # double 100 -.LCPI2_2: - .dword 0x3f847ae147ae147b # double 0.01 -.LCPI2_3: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI2_4: - .dword 0xc130000000000000 # double -1048576 -.LCPI2_5: - .dword 0x405a36e2eb1c432d # double 104.85760000000001 -.LCPI2_6: - .dword 0x3eb0000000000000 # double 9.5367431640625E-7 .text - .hidden _Z14CheckThousandsRK7Results + .hidden _Z14CheckThousandsRK7Results # -- Begin function _Z14CheckThousandsRK7Results .globl _Z14CheckThousandsRK7Results .p2align 5 .type _Z14CheckThousandsRK7Results,@function @@ -578,22 +562,20 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception1 # %bb.0: - addi.d $sp, $sp, -320 - .cfi_def_cfa_offset 320 - st.d $ra, $sp, 312 # 8-byte Folded Spill - st.d $fp, $sp, 304 # 8-byte Folded Spill - st.d $s0, $sp, 296 # 8-byte Folded Spill - st.d $s1, $sp, 288 # 8-byte Folded Spill - st.d $s2, $sp, 280 # 8-byte Folded Spill - st.d $s3, $sp, 272 # 8-byte Folded Spill - st.d $s4, $sp, 264 # 8-byte Folded Spill - st.d $s5, $sp, 256 # 8-byte Folded Spill - st.d $s6, $sp, 248 # 8-byte Folded Spill - st.d $s7, $sp, 240 # 8-byte Folded Spill - fst.d $fs0, $sp, 232 # 8-byte Folded Spill - fst.d $fs1, $sp, 224 # 8-byte Folded Spill - fst.d $fs2, $sp, 216 # 8-byte Folded Spill - fst.d $fs3, $sp, 208 # 8-byte Folded Spill + addi.d $sp, $sp, -304 + .cfi_def_cfa_offset 304 + st.d $ra, $sp, 296 # 8-byte Folded Spill + st.d $fp, $sp, 288 # 8-byte Folded Spill + st.d $s0, $sp, 280 # 8-byte Folded Spill + st.d $s1, $sp, 272 # 8-byte Folded Spill + st.d $s2, $sp, 264 # 8-byte Folded Spill + st.d $s3, $sp, 256 # 8-byte Folded Spill + st.d $s4, $sp, 248 # 8-byte Folded Spill + st.d $s5, $sp, 240 # 8-byte Folded Spill + st.d $s6, $sp, 232 # 8-byte Folded Spill + fst.d $fs0, $sp, 224 # 8-byte Folded Spill + fst.d $fs1, $sp, 216 # 8-byte Folded Spill + fst.d $fs2, $sp, 208 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -603,11 +585,9 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .cfi_offset 27, -56 .cfi_offset 28, -64 .cfi_offset 29, -72 - .cfi_offset 30, -80 - .cfi_offset 56, -88 - .cfi_offset 57, -96 - .cfi_offset 58, -104 - .cfi_offset 59, -112 + .cfi_offset 56, -80 + .cfi_offset 57, -88 + .cfi_offset 58, -96 move $fp, $a0 ld.d $a0, $a0, 8 ori $a1, $zero, 31 @@ -652,15 +632,17 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp27: # EH_LABEL # %bb.4: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI2_0) - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI2_1) - fadd.d $fa0, $fa0, $fs1 - fabs.d $fs2, $fa0 - fcmp.cule.d $fcc0, $fs0, $fs2 - movcf2gr $a0, $fcc0 - st.d $a0, $sp, 24 + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, -1006 + movgr2fr.d $fa1, $a1 + fadd.d $fa0, $fa0, $fa1 + fabs.d $fs0, $fa0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 + fcmp.cule.d $fcc0, $fa0, $fs0 bceqz $fcc0, .LBB2_7 # %bb.5: .Ltmp29: # EH_LABEL @@ -982,7 +964,10 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results beqz $a0, .LBB2_70 # %bb.61: .Ltmp92: # EH_LABEL - fmov.d $fa0, $fs0 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp93: # EH_LABEL @@ -1002,8 +987,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results beqz $a0, .LBB2_70 # %bb.65: .Ltmp97: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI2_2) - fld.d $fa0, $a1, %pc_lo12(.LCPI2_2) + lu12i.w $a1, 293601 + ori $a1, $a1, 1147 + lu32i.d $a1, 293601 + lu52i.d $a1, $a1, 1016 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp98: # EH_LABEL @@ -1061,7 +1049,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results ld.d $a0, $s6, 0 beqz $a0, .LBB2_76 # %bb.73: - fadd.d $fa0, $fa0, $fs1 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, -1006 + movgr2fr.d $fa1, $a1 + fadd.d $fa0, $fa0, $fa1 .Ltmp109: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -1107,13 +1099,21 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp118: # EH_LABEL # %bb.78: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit329 ld.d $a0, $s6, 0 - pcalau12i $s4, %pc_hi20(.LCPI2_3) beqz $a0, .LBB2_82 # %bb.79: - fld.d $fa1, $s4, %pc_lo12(.LCPI2_3) - fadd.d $fa0, $fa0, $fs1 + ori $a1, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a3, $a2, -1006 + movgr2fr.d $fa1, $a3 + fadd.d $fa0, $fa0, $fa1 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fa1, $a2 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fs0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp119: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -1154,7 +1154,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB2_88: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit341 - fcmp.clt.d $fcc0, $fs2, $fs0 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 + fcmp.clt.d $fcc0, $fs0, $fa0 bceqz $fcc0, .LBB2_448 # %bb.89: ld.d $a0, $sp, 168 @@ -1180,11 +1184,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results st.d $a0, $sp, 168 st.d $a1, $sp, 184 pcalau12i $a2, %pc_hi20(.L.str.1) - addi.d $s6, $a2, %pc_lo12(.L.str.1) - vld $vr0, $s6, 0 - ld.h $s5, $s6, 16 + addi.d $s5, $a2, %pc_lo12(.L.str.1) + vld $vr0, $s5, 0 + ld.h $s4, $s5, 16 vst $vr0, $a0, 0 - st.h $s5, $a0, 16 + st.h $s4, $a0, 16 ld.d $a0, $sp, 168 st.d $a1, $sp, 176 stx.b $zero, $a0, $a1 @@ -1195,11 +1199,17 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp128: # EH_LABEL # %bb.93: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit359 - fadd.d $fa0, $fa0, $fs1 - fabs.d $fs2, $fa0 - fcmp.cule.d $fcc0, $fs0, $fs2 - movcf2gr $a0, $fcc0 - st.d $a0, $sp, 24 + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, -1006 + movgr2fr.d $fa1, $a1 + fadd.d $fa0, $fa0, $fa1 + fabs.d $fs0, $fa0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 + fcmp.cule.d $fcc0, $fa0, $fs0 bceqz $fcc0, .LBB2_96 # %bb.94: .Ltmp130: # EH_LABEL @@ -1215,17 +1225,17 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp131: # EH_LABEL # %bb.95: - ld.d $s7, $sp, 56 + ld.d $s6, $sp, 56 b .LBB2_97 .LBB2_96: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s7, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s6, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB2_460 .LBB2_97: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit361 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.98: .Ltmp132: # EH_LABEL @@ -1236,7 +1246,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp133: # EH_LABEL # %bb.99: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit364 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.100: .Ltmp134: # EH_LABEL @@ -1247,7 +1257,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp135: # EH_LABEL # %bb.101: # %_ZN9benchmark8internallsIA151_cEERNS0_7LogTypeES4_RKT_.exit367 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.102: .Ltmp136: # EH_LABEL @@ -1258,7 +1268,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp137: # EH_LABEL # %bb.103: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit370 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.104: .Ltmp139: # EH_LABEL @@ -1267,7 +1277,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp140: # EH_LABEL # %bb.105: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit373 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.106: .Ltmp141: # EH_LABEL @@ -1278,7 +1288,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp142: # EH_LABEL # %bb.107: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit376 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.108: ld.d $a1, $fp, 0 @@ -1288,7 +1298,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp144: # EH_LABEL # %bb.109: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit379 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.110: .Ltmp145: # EH_LABEL @@ -1299,7 +1309,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp146: # EH_LABEL # %bb.111: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit382 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.112: .Ltmp147: # EH_LABEL @@ -1310,7 +1320,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp148: # EH_LABEL # %bb.113: # %_ZN9benchmark8internallsIA151_cEERNS0_7LogTypeES4_RKT_.exit385 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.114: .Ltmp149: # EH_LABEL @@ -1321,7 +1331,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp150: # EH_LABEL # %bb.115: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit388 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.116: .Ltmp152: # EH_LABEL @@ -1330,7 +1340,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp153: # EH_LABEL # %bb.117: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit391 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.118: .Ltmp154: # EH_LABEL @@ -1341,7 +1351,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp155: # EH_LABEL # %bb.119: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit394 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.120: .Ltmp156: # EH_LABEL @@ -1352,7 +1362,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp157: # EH_LABEL # %bb.121: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit397 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.122: .Ltmp158: # EH_LABEL @@ -1363,7 +1373,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp159: # EH_LABEL # %bb.123: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit400 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.124: .Ltmp160: # EH_LABEL @@ -1374,7 +1384,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp161: # EH_LABEL # %bb.125: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit403 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.126: .Ltmp162: # EH_LABEL @@ -1385,7 +1395,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp163: # EH_LABEL # %bb.127: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit406 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_129 # %bb.128: .Ltmp164: # EH_LABEL @@ -1408,11 +1418,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp168: # EH_LABEL # %bb.130: # %.noexc412 ld.d $a1, $sp, 96 - vld $vr0, $s6, 0 + vld $vr0, $s5, 0 st.d $a0, $sp, 128 st.d $a1, $sp, 144 vst $vr0, $a0, 0 - st.h $s5, $a0, 16 + st.h $s4, $a0, 16 ld.d $a0, $sp, 128 st.d $a1, $sp, 136 stx.b $zero, $a0, $a1 @@ -1423,7 +1433,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp171: # EH_LABEL # %bb.131: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit415 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_159 # %bb.132: .Ltmp172: # EH_LABEL @@ -1431,7 +1441,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp173: # EH_LABEL # %bb.133: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit418 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_159 # %bb.134: .Ltmp174: # EH_LABEL @@ -1442,7 +1452,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp175: # EH_LABEL # %bb.135: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit421 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_159 # %bb.136: .Ltmp177: # EH_LABEL @@ -1452,7 +1462,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp178: # EH_LABEL # %bb.137: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit424 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_159 # %bb.138: .Ltmp179: # EH_LABEL @@ -1463,7 +1473,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp180: # EH_LABEL # %bb.139: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit427 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_159 # %bb.140: .Ltmp181: # EH_LABEL @@ -1474,7 +1484,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp182: # EH_LABEL # %bb.141: # %_ZN9benchmark8internallsIA151_cEERNS0_7LogTypeES4_RKT_.exit430 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_159 # %bb.142: .Ltmp183: # EH_LABEL @@ -1485,7 +1495,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp184: # EH_LABEL # %bb.143: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit433 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_159 # %bb.144: .Ltmp186: # EH_LABEL @@ -1494,7 +1504,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp187: # EH_LABEL # %bb.145: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit436 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_159 # %bb.146: .Ltmp188: # EH_LABEL @@ -1505,7 +1515,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp189: # EH_LABEL # %bb.147: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit439 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_159 # %bb.148: .Ltmp190: # EH_LABEL @@ -1516,16 +1526,19 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp191: # EH_LABEL # %bb.149: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit442 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_159 # %bb.150: .Ltmp193: # EH_LABEL - fmov.d $fa0, $fs0 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp194: # EH_LABEL # %bb.151: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit445 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_159 # %bb.152: .Ltmp195: # EH_LABEL @@ -1536,17 +1549,20 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp196: # EH_LABEL # %bb.153: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit448 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_159 # %bb.154: .Ltmp198: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI2_2) - fld.d $fa0, $a1, %pc_lo12(.LCPI2_2) + lu12i.w $a1, 293601 + ori $a1, $a1, 1147 + lu32i.d $a1, 293601 + lu52i.d $a1, $a1, 1016 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp199: # EH_LABEL # %bb.155: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit451 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_159 # %bb.156: .Ltmp200: # EH_LABEL @@ -1557,7 +1573,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp201: # EH_LABEL # %bb.157: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit454 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_159 # %bb.158: .Ltmp202: # EH_LABEL @@ -1580,11 +1596,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp206: # EH_LABEL # %bb.160: # %.noexc460 ld.d $a1, $sp, 64 - vld $vr0, $s6, 0 + vld $vr0, $s5, 0 st.d $a0, $sp, 96 st.d $a1, $sp, 112 vst $vr0, $a0, 0 - st.h $s5, $a0, 16 + st.h $s4, $a0, 16 ld.d $a0, $sp, 96 st.d $a1, $sp, 104 stx.b $zero, $a0, $a1 @@ -1595,16 +1611,20 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp209: # EH_LABEL # %bb.161: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit463 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_165 # %bb.162: - fadd.d $fa0, $fa0, $fs1 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, -1006 + movgr2fr.d $fa1, $a1 + fadd.d $fa0, $fa0, $fa1 .Ltmp210: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp211: # EH_LABEL # %bb.163: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit466 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_165 # %bb.164: .Ltmp212: # EH_LABEL @@ -1627,11 +1647,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp216: # EH_LABEL # %bb.166: # %.noexc472 ld.d $a1, $sp, 200 - vld $vr0, $s6, 0 + vld $vr0, $s5, 0 st.d $a0, $sp, 64 st.d $a1, $sp, 80 vst $vr0, $a0, 0 - st.h $s5, $a0, 16 + st.h $s4, $a0, 16 ld.d $a0, $sp, 64 st.d $a1, $sp, 72 stx.b $zero, $a0, $a1 @@ -1642,19 +1662,28 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp219: # EH_LABEL # %bb.167: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit475 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_171 # %bb.168: - fld.d $fa1, $s4, %pc_lo12(.LCPI2_3) - fadd.d $fa0, $fa0, $fs1 + ori $a1, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a3, $a2, -1006 + movgr2fr.d $fa1, $a3 + fadd.d $fa0, $fa0, $fa1 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fa1, $a2 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fs0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp220: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp221: # EH_LABEL # %bb.169: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit478 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_171 # %bb.170: .Ltmp222: # EH_LABEL @@ -1689,7 +1718,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB2_177: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit490 - fcmp.clt.d $fcc0, $fs2, $fs0 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 + fcmp.clt.d $fcc0, $fs0, $fa0 bceqz $fcc0, .LBB2_450 # %bb.178: ld.d $a0, $sp, 168 @@ -1715,11 +1748,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results st.d $a0, $sp, 168 st.d $a1, $sp, 184 pcalau12i $a2, %pc_hi20(.L.str.2) - addi.d $s6, $a2, %pc_lo12(.L.str.2) - vld $vr0, $s6, 0 - ld.h $s5, $s6, 16 + addi.d $s5, $a2, %pc_lo12(.L.str.2) + vld $vr0, $s5, 0 + ld.h $s4, $s5, 16 vst $vr0, $a0, 0 - st.h $s5, $a0, 16 + st.h $s4, $a0, 16 ld.d $a0, $sp, 168 st.d $a1, $sp, 176 stx.b $zero, $a0, $a1 @@ -1730,11 +1763,17 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp229: # EH_LABEL # %bb.182: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit511 - fadd.d $fa0, $fa0, $fs1 - fabs.d $fs2, $fa0 - fcmp.cule.d $fcc0, $fs0, $fs2 - movcf2gr $a0, $fcc0 - st.d $a0, $sp, 24 + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, -1006 + movgr2fr.d $fa1, $a1 + fadd.d $fa0, $fa0, $fa1 + fabs.d $fs0, $fa0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 + fcmp.cule.d $fcc0, $fa0, $fs0 bceqz $fcc0, .LBB2_185 # %bb.183: .Ltmp231: # EH_LABEL @@ -1750,17 +1789,17 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp232: # EH_LABEL # %bb.184: - ld.d $s7, $sp, 48 + ld.d $s6, $sp, 48 b .LBB2_186 .LBB2_185: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s7, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s6, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB2_462 .LBB2_186: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit513 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.187: .Ltmp233: # EH_LABEL @@ -1771,7 +1810,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp234: # EH_LABEL # %bb.188: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit516 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.189: .Ltmp235: # EH_LABEL @@ -1782,7 +1821,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp236: # EH_LABEL # %bb.190: # %_ZN9benchmark8internallsIA151_cEERNS0_7LogTypeES4_RKT_.exit519 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.191: .Ltmp237: # EH_LABEL @@ -1793,7 +1832,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp238: # EH_LABEL # %bb.192: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit522 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.193: .Ltmp240: # EH_LABEL @@ -1802,7 +1841,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp241: # EH_LABEL # %bb.194: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit525 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.195: .Ltmp242: # EH_LABEL @@ -1813,7 +1852,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp243: # EH_LABEL # %bb.196: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit528 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.197: ld.d $a1, $fp, 0 @@ -1823,7 +1862,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp245: # EH_LABEL # %bb.198: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit531 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.199: .Ltmp246: # EH_LABEL @@ -1834,7 +1873,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp247: # EH_LABEL # %bb.200: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit534 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.201: .Ltmp248: # EH_LABEL @@ -1845,7 +1884,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp249: # EH_LABEL # %bb.202: # %_ZN9benchmark8internallsIA151_cEERNS0_7LogTypeES4_RKT_.exit537 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.203: .Ltmp250: # EH_LABEL @@ -1856,7 +1895,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp251: # EH_LABEL # %bb.204: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit540 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.205: .Ltmp253: # EH_LABEL @@ -1865,7 +1904,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp254: # EH_LABEL # %bb.206: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit543 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.207: .Ltmp255: # EH_LABEL @@ -1876,7 +1915,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp256: # EH_LABEL # %bb.208: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit546 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.209: .Ltmp257: # EH_LABEL @@ -1887,7 +1926,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp258: # EH_LABEL # %bb.210: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit549 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.211: .Ltmp259: # EH_LABEL @@ -1898,7 +1937,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp260: # EH_LABEL # %bb.212: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit552 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.213: .Ltmp261: # EH_LABEL @@ -1909,7 +1948,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp262: # EH_LABEL # %bb.214: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit555 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.215: .Ltmp263: # EH_LABEL @@ -1920,7 +1959,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp264: # EH_LABEL # %bb.216: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit558 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_218 # %bb.217: .Ltmp265: # EH_LABEL @@ -1943,11 +1982,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp269: # EH_LABEL # %bb.219: # %.noexc564 ld.d $a1, $sp, 96 - vld $vr0, $s6, 0 + vld $vr0, $s5, 0 st.d $a0, $sp, 128 st.d $a1, $sp, 144 vst $vr0, $a0, 0 - st.h $s5, $a0, 16 + st.h $s4, $a0, 16 ld.d $a0, $sp, 128 st.d $a1, $sp, 136 stx.b $zero, $a0, $a1 @@ -1958,7 +1997,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp272: # EH_LABEL # %bb.220: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit567 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_248 # %bb.221: .Ltmp273: # EH_LABEL @@ -1966,7 +2005,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp274: # EH_LABEL # %bb.222: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit570 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_248 # %bb.223: .Ltmp275: # EH_LABEL @@ -1977,7 +2016,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp276: # EH_LABEL # %bb.224: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit573 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_248 # %bb.225: .Ltmp278: # EH_LABEL @@ -1987,7 +2026,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp279: # EH_LABEL # %bb.226: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit576 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_248 # %bb.227: .Ltmp280: # EH_LABEL @@ -1998,7 +2037,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp281: # EH_LABEL # %bb.228: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit579 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_248 # %bb.229: .Ltmp282: # EH_LABEL @@ -2009,7 +2048,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp283: # EH_LABEL # %bb.230: # %_ZN9benchmark8internallsIA151_cEERNS0_7LogTypeES4_RKT_.exit582 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_248 # %bb.231: .Ltmp284: # EH_LABEL @@ -2020,7 +2059,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp285: # EH_LABEL # %bb.232: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit585 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_248 # %bb.233: .Ltmp287: # EH_LABEL @@ -2029,7 +2068,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp288: # EH_LABEL # %bb.234: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit588 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_248 # %bb.235: .Ltmp289: # EH_LABEL @@ -2040,7 +2079,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp290: # EH_LABEL # %bb.236: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit591 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_248 # %bb.237: .Ltmp291: # EH_LABEL @@ -2051,16 +2090,19 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp292: # EH_LABEL # %bb.238: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit594 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_248 # %bb.239: .Ltmp294: # EH_LABEL - fmov.d $fa0, $fs0 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp295: # EH_LABEL # %bb.240: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit597 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_248 # %bb.241: .Ltmp296: # EH_LABEL @@ -2071,17 +2113,20 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp297: # EH_LABEL # %bb.242: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit600 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_248 # %bb.243: .Ltmp299: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI2_2) - fld.d $fa0, $a1, %pc_lo12(.LCPI2_2) + lu12i.w $a1, 293601 + ori $a1, $a1, 1147 + lu32i.d $a1, 293601 + lu52i.d $a1, $a1, 1016 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp300: # EH_LABEL # %bb.244: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit603 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_248 # %bb.245: .Ltmp301: # EH_LABEL @@ -2092,7 +2137,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp302: # EH_LABEL # %bb.246: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit606 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_248 # %bb.247: .Ltmp303: # EH_LABEL @@ -2115,11 +2160,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp307: # EH_LABEL # %bb.249: # %.noexc612 ld.d $a1, $sp, 64 - vld $vr0, $s6, 0 + vld $vr0, $s5, 0 st.d $a0, $sp, 96 st.d $a1, $sp, 112 vst $vr0, $a0, 0 - st.h $s5, $a0, 16 + st.h $s4, $a0, 16 ld.d $a0, $sp, 96 st.d $a1, $sp, 104 stx.b $zero, $a0, $a1 @@ -2130,16 +2175,20 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp310: # EH_LABEL # %bb.250: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit615 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_254 # %bb.251: - fadd.d $fa0, $fa0, $fs1 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, -1006 + movgr2fr.d $fa1, $a1 + fadd.d $fa0, $fa0, $fa1 .Ltmp311: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp312: # EH_LABEL # %bb.252: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit618 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_254 # %bb.253: .Ltmp313: # EH_LABEL @@ -2162,11 +2211,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp317: # EH_LABEL # %bb.255: # %.noexc624 ld.d $a1, $sp, 200 - vld $vr0, $s6, 0 + vld $vr0, $s5, 0 st.d $a0, $sp, 64 st.d $a1, $sp, 80 vst $vr0, $a0, 0 - st.h $s5, $a0, 16 + st.h $s4, $a0, 16 ld.d $a0, $sp, 64 st.d $a1, $sp, 72 stx.b $zero, $a0, $a1 @@ -2177,19 +2226,28 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp320: # EH_LABEL # %bb.256: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit627 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_260 # %bb.257: - fld.d $fa1, $s4, %pc_lo12(.LCPI2_3) - fadd.d $fa0, $fa0, $fs1 + ori $a1, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a3, $a2, -1006 + movgr2fr.d $fa1, $a3 + fadd.d $fa0, $fa0, $fa1 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fa1, $a2 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fs0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 .Ltmp321: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp322: # EH_LABEL # %bb.258: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit630 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_260 # %bb.259: .Ltmp323: # EH_LABEL @@ -2224,7 +2282,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB2_266: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit642 - fcmp.clt.d $fcc0, $fs2, $fs0 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 + fcmp.clt.d $fcc0, $fs0, $fa0 bceqz $fcc0, .LBB2_452 # %bb.267: ld.d $a0, $sp, 168 @@ -2265,13 +2327,16 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp330: # EH_LABEL # %bb.271: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit663 - pcalau12i $a0, %pc_hi20(.LCPI2_4) - fld.d $fs2, $a0, %pc_lo12(.LCPI2_4) - pcalau12i $a0, %pc_hi20(.LCPI2_5) - fld.d $fs1, $a0, %pc_lo12(.LCPI2_5) - fadd.d $fa0, $fa0, $fs2 - fabs.d $fs3, $fa0 - fcmp.cule.d $fcc0, $fs1, $fs3 + lu52i.d $a0, $zero, -1005 + movgr2fr.d $fs1, $a0 + fadd.d $fa0, $fa0, $fs1 + fabs.d $fs2, $fa0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs0, $a0 + fcmp.cule.d $fcc0, $fs0, $fs2 movcf2gr $a0, $fcc0 st.d $a0, $sp, 24 bceqz $fcc0, .LBB2_274 @@ -2593,7 +2658,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results beqz $a0, .LBB2_337 # %bb.328: .Ltmp395: # EH_LABEL - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp396: # EH_LABEL @@ -2613,8 +2678,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results beqz $a0, .LBB2_337 # %bb.332: .Ltmp400: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI2_2) - fld.d $fa0, $a1, %pc_lo12(.LCPI2_2) + lu12i.w $a1, 293601 + ori $a1, $a1, 1147 + lu32i.d $a1, 293601 + lu52i.d $a1, $a1, 1016 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp401: # EH_LABEL @@ -2671,7 +2739,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results ld.d $a0, $s6, 0 beqz $a0, .LBB2_343 # %bb.340: - fadd.d $fa0, $fa0, $fs2 + fadd.d $fa0, $fa0, $fs1 .Ltmp412: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -2716,13 +2784,17 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp421: # EH_LABEL # %bb.345: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit779 ld.d $a0, $s6, 0 - pcalau12i $s4, %pc_hi20(.LCPI2_6) beqz $a0, .LBB2_349 # %bb.346: - fld.d $fa1, $s4, %pc_lo12(.LCPI2_6) - fadd.d $fa0, $fa0, $fs2 + fadd.d $fa0, $fa0, $fs1 + lu52i.d $a1, $zero, 1003 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fs0 .Ltmp422: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 @@ -2763,7 +2835,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB2_355: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit794 - fcmp.clt.d $fcc0, $fs3, $fs1 + fcmp.clt.d $fcc0, $fs2, $fs0 bceqz $fcc0, .LBB2_454 # %bb.356: ld.d $a0, $sp, 168 @@ -2789,11 +2861,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results st.d $a0, $sp, 168 st.d $a1, $sp, 184 pcalau12i $a2, %pc_hi20(.L.str.4) - addi.d $s6, $a2, %pc_lo12(.L.str.4) - vld $vr0, $s6, 0 - ld.h $s5, $s6, 16 + addi.d $s5, $a2, %pc_lo12(.L.str.4) + vld $vr0, $s5, 0 + ld.h $s4, $s5, 16 vst $vr0, $a0, 0 - st.h $s5, $a0, 16 + st.h $s4, $a0, 16 ld.d $a0, $sp, 168 st.d $a1, $sp, 176 stx.b $zero, $a0, $a1 @@ -2804,9 +2876,9 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp431: # EH_LABEL # %bb.360: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit815 - fadd.d $fa0, $fa0, $fs2 - fabs.d $fs3, $fa0 - fcmp.cule.d $fcc0, $fs1, $fs3 + fadd.d $fa0, $fa0, $fs1 + fabs.d $fs2, $fa0 + fcmp.cule.d $fcc0, $fs0, $fs2 bceqz $fcc0, .LBB2_363 # %bb.361: .Ltmp433: # EH_LABEL @@ -2822,17 +2894,17 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp434: # EH_LABEL # %bb.362: - ld.d $s7, $sp, 32 + ld.d $s6, $sp, 32 b .LBB2_364 .LBB2_363: pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) ld.b $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s7, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + addi.d $s6, $a1, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) beqz $a0, .LBB2_466 .LBB2_364: # %_ZN9benchmark8internal18GetNullLogInstanceEv.exit817 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.365: .Ltmp435: # EH_LABEL @@ -2843,7 +2915,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp436: # EH_LABEL # %bb.366: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit820 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.367: .Ltmp437: # EH_LABEL @@ -2854,7 +2926,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp438: # EH_LABEL # %bb.368: # %_ZN9benchmark8internallsIA151_cEERNS0_7LogTypeES4_RKT_.exit823 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.369: .Ltmp439: # EH_LABEL @@ -2865,7 +2937,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp440: # EH_LABEL # %bb.370: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit826 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.371: .Ltmp442: # EH_LABEL @@ -2874,7 +2946,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp443: # EH_LABEL # %bb.372: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit829 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.373: .Ltmp444: # EH_LABEL @@ -2885,7 +2957,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp445: # EH_LABEL # %bb.374: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit832 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.375: ld.d $a1, $fp, 0 @@ -2895,7 +2967,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp447: # EH_LABEL # %bb.376: # %_ZN9benchmark8internallsINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEERNS0_7LogTypeES9_RKT_.exit835 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.377: .Ltmp448: # EH_LABEL @@ -2906,7 +2978,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp449: # EH_LABEL # %bb.378: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit838 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.379: .Ltmp450: # EH_LABEL @@ -2917,7 +2989,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp451: # EH_LABEL # %bb.380: # %_ZN9benchmark8internallsIA151_cEERNS0_7LogTypeES4_RKT_.exit841 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.381: .Ltmp452: # EH_LABEL @@ -2928,7 +3000,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp453: # EH_LABEL # %bb.382: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit844 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.383: .Ltmp455: # EH_LABEL @@ -2937,7 +3009,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp456: # EH_LABEL # %bb.384: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit847 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.385: .Ltmp457: # EH_LABEL @@ -2948,7 +3020,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp458: # EH_LABEL # %bb.386: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit850 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.387: .Ltmp459: # EH_LABEL @@ -2959,7 +3031,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp460: # EH_LABEL # %bb.388: # %_ZN9benchmark8internallsIA11_cEERNS0_7LogTypeES4_RKT_.exit853 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.389: .Ltmp461: # EH_LABEL @@ -2970,7 +3042,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp462: # EH_LABEL # %bb.390: # %_ZN9benchmark8internallsIA7_cEERNS0_7LogTypeES4_RKT_.exit856 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.391: .Ltmp463: # EH_LABEL @@ -2981,7 +3053,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp464: # EH_LABEL # %bb.392: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit859 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.393: .Ltmp465: # EH_LABEL @@ -2992,7 +3064,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp466: # EH_LABEL # %bb.394: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit862 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_396 # %bb.395: .Ltmp467: # EH_LABEL @@ -3015,11 +3087,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp471: # EH_LABEL # %bb.397: # %.noexc868 ld.d $a1, $sp, 96 - vld $vr0, $s6, 0 + vld $vr0, $s5, 0 st.d $a0, $sp, 128 st.d $a1, $sp, 144 vst $vr0, $a0, 0 - st.h $s5, $a0, 16 + st.h $s4, $a0, 16 ld.d $a0, $sp, 128 st.d $a1, $sp, 136 stx.b $zero, $a0, $a1 @@ -3030,7 +3102,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp474: # EH_LABEL # %bb.398: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit871 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_426 # %bb.399: .Ltmp475: # EH_LABEL @@ -3038,7 +3110,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp476: # EH_LABEL # %bb.400: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit874 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_426 # %bb.401: .Ltmp477: # EH_LABEL @@ -3049,7 +3121,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp478: # EH_LABEL # %bb.402: # %_ZN9benchmark8internallsIA14_cEERNS0_7LogTypeES4_RKT_.exit877 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_426 # %bb.403: .Ltmp480: # EH_LABEL @@ -3058,7 +3130,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp481: # EH_LABEL # %bb.404: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit880 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_426 # %bb.405: .Ltmp482: # EH_LABEL @@ -3069,7 +3141,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp483: # EH_LABEL # %bb.406: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit883 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_426 # %bb.407: .Ltmp484: # EH_LABEL @@ -3080,7 +3152,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp485: # EH_LABEL # %bb.408: # %_ZN9benchmark8internallsIA151_cEERNS0_7LogTypeES4_RKT_.exit886 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_426 # %bb.409: .Ltmp486: # EH_LABEL @@ -3091,7 +3163,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp487: # EH_LABEL # %bb.410: # %_ZN9benchmark8internallsIA2_cEERNS0_7LogTypeES4_RKT_.exit889 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_426 # %bb.411: .Ltmp489: # EH_LABEL @@ -3100,7 +3172,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp490: # EH_LABEL # %bb.412: # %_ZN9benchmark8internallsIiEERNS0_7LogTypeES3_RKT_.exit892 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_426 # %bb.413: .Ltmp491: # EH_LABEL @@ -3111,7 +3183,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp492: # EH_LABEL # %bb.414: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit895 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_426 # %bb.415: .Ltmp493: # EH_LABEL @@ -3122,16 +3194,16 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp494: # EH_LABEL # %bb.416: # %_ZN9benchmark8internallsIA19_cEERNS0_7LogTypeES4_RKT_.exit898 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_426 # %bb.417: .Ltmp496: # EH_LABEL - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp497: # EH_LABEL # %bb.418: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit901 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_426 # %bb.419: .Ltmp498: # EH_LABEL @@ -3142,17 +3214,20 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp499: # EH_LABEL # %bb.420: # %_ZN9benchmark8internallsIA3_cEERNS0_7LogTypeES4_RKT_.exit904 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_426 # %bb.421: .Ltmp501: # EH_LABEL - pcalau12i $a1, %pc_hi20(.LCPI2_2) - fld.d $fa0, $a1, %pc_lo12(.LCPI2_2) + lu12i.w $a1, 293601 + ori $a1, $a1, 1147 + lu32i.d $a1, 293601 + lu52i.d $a1, $a1, 1016 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp502: # EH_LABEL # %bb.422: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit907 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_426 # %bb.423: .Ltmp503: # EH_LABEL @@ -3163,7 +3238,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp504: # EH_LABEL # %bb.424: # %_ZN9benchmark8internallsIA5_cEERNS0_7LogTypeES4_RKT_.exit910 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_426 # %bb.425: .Ltmp505: # EH_LABEL @@ -3186,11 +3261,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp509: # EH_LABEL # %bb.427: # %.noexc916 ld.d $a1, $sp, 64 - vld $vr0, $s6, 0 + vld $vr0, $s5, 0 st.d $a0, $sp, 96 st.d $a1, $sp, 112 vst $vr0, $a0, 0 - st.h $s5, $a0, 16 + st.h $s4, $a0, 16 ld.d $a0, $sp, 96 st.d $a1, $sp, 104 stx.b $zero, $a0, $a1 @@ -3201,16 +3276,16 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp512: # EH_LABEL # %bb.428: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit919 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_432 # %bb.429: - fadd.d $fa0, $fa0, $fs2 + fadd.d $fa0, $fa0, $fs1 .Ltmp513: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp514: # EH_LABEL # %bb.430: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit922 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_432 # %bb.431: .Ltmp515: # EH_LABEL @@ -3233,11 +3308,11 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp519: # EH_LABEL # %bb.433: # %.noexc928 ld.d $a1, $sp, 200 - vld $vr0, $s6, 0 + vld $vr0, $s5, 0 st.d $a0, $sp, 64 st.d $a1, $sp, 80 vst $vr0, $a0, 0 - st.h $s5, $a0, 16 + st.h $s4, $a0, 16 ld.d $a0, $sp, 64 st.d $a1, $sp, 72 stx.b $zero, $a0, $a1 @@ -3248,19 +3323,24 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 .Ltmp522: # EH_LABEL # %bb.434: # %_ZNK7Results12GetCounterAsIdEET_RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.exit931 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_438 # %bb.435: - fld.d $fa1, $s4, %pc_lo12(.LCPI2_6) - fadd.d $fa0, $fa0, $fs2 + fadd.d $fa0, $fa0, $fs1 + lu52i.d $a1, $zero, 1003 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fs0 .Ltmp523: # EH_LABEL pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp524: # EH_LABEL # %bb.436: # %_ZN9benchmark8internallsIdEERNS0_7LogTypeES3_RKT_.exit934 - ld.d $a0, $s7, 0 + ld.d $a0, $s6, 0 beqz $a0, .LBB2_438 # %bb.437: .Ltmp525: # EH_LABEL @@ -3295,7 +3375,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB2_444: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit946 - fcmp.clt.d $fcc0, $fs3, $fs1 + fcmp.clt.d $fcc0, $fs2, $fs0 bceqz $fcc0, .LBB2_456 # %bb.445: ld.d $a0, $sp, 168 @@ -3306,21 +3386,19 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB2_447: # %_ZStneIcSt11char_traitsIcESaIcEEbRKNSt7__cxx1112basic_stringIT_T0_T1_EEPKS5_.exit.thread - fld.d $fs3, $sp, 208 # 8-byte Folded Reload - fld.d $fs2, $sp, 216 # 8-byte Folded Reload - fld.d $fs1, $sp, 224 # 8-byte Folded Reload - fld.d $fs0, $sp, 232 # 8-byte Folded Reload - ld.d $s7, $sp, 240 # 8-byte Folded Reload - ld.d $s6, $sp, 248 # 8-byte Folded Reload - ld.d $s5, $sp, 256 # 8-byte Folded Reload - ld.d $s4, $sp, 264 # 8-byte Folded Reload - ld.d $s3, $sp, 272 # 8-byte Folded Reload - ld.d $s2, $sp, 280 # 8-byte Folded Reload - ld.d $s1, $sp, 288 # 8-byte Folded Reload - ld.d $s0, $sp, 296 # 8-byte Folded Reload - ld.d $fp, $sp, 304 # 8-byte Folded Reload - ld.d $ra, $sp, 312 # 8-byte Folded Reload - addi.d $sp, $sp, 320 + fld.d $fs2, $sp, 208 # 8-byte Folded Reload + fld.d $fs1, $sp, 216 # 8-byte Folded Reload + fld.d $fs0, $sp, 224 # 8-byte Folded Reload + ld.d $s6, $sp, 232 # 8-byte Folded Reload + ld.d $s5, $sp, 240 # 8-byte Folded Reload + ld.d $s4, $sp, 248 # 8-byte Folded Reload + ld.d $s3, $sp, 256 # 8-byte Folded Reload + ld.d $s2, $sp, 264 # 8-byte Folded Reload + ld.d $s1, $sp, 272 # 8-byte Folded Reload + ld.d $s0, $sp, 280 # 8-byte Folded Reload + ld.d $fp, $sp, 288 # 8-byte Folded Reload + ld.d $ra, $sp, 296 # 8-byte Folded Reload + addi.d $sp, $sp, 304 ret .LBB2_448: .Ltmp543: # EH_LABEL @@ -3382,8 +3460,8 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results beqz $a0, .LBB2_97 # %bb.461: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s7, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s7, 0 + addi.d $s6, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s6, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -3398,8 +3476,8 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results beqz $a0, .LBB2_186 # %bb.463: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s7, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s7, 0 + addi.d $s6, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s6, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -3430,8 +3508,8 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results beqz $a0, .LBB2_364 # %bb.467: pcalau12i $a0, %pc_hi20(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - addi.d $s7, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) - st.d $zero, $s7, 0 + addi.d $s6, $a0, %pc_lo12(_ZZN9benchmark8internal18GetNullLogInstanceEvE8null_log) + st.d $zero, $s6, 0 pcalau12i $a0, %pc_hi20(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) addi.d $a0, $a0, %pc_lo12(_ZGVZN9benchmark8internal18GetNullLogInstanceEvE8null_log) pcaddu18i $ra, %call36(__cxa_guard_release) @@ -3442,25 +3520,25 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results move $fp, $a0 ld.d $a0, $sp, 128 beq $a0, $s1, .LBB2_542 - b .LBB2_550 + b .LBB2_547 .LBB2_469: .Ltmp406: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_554 - b .LBB2_558 + bne $a0, $s1, .LBB2_551 + b .LBB2_552 .LBB2_470: .Ltmp305: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_563 - b .LBB2_567 + bne $a0, $s1, .LBB2_560 + b .LBB2_561 .LBB2_471: .Ltmp204: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_572 - b .LBB2_576 + bne $a0, $s1, .LBB2_569 + b .LBB2_570 .LBB2_472: .Ltmp103: # EH_LABEL move $fp, $a0 @@ -3472,25 +3550,25 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results move $fp, $a0 ld.d $a0, $sp, 128 beq $a0, $s1, .LBB2_542 - b .LBB2_550 + b .LBB2_547 .LBB2_474: .Ltmp399: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_554 - b .LBB2_558 + bne $a0, $s1, .LBB2_551 + b .LBB2_552 .LBB2_475: .Ltmp298: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_563 - b .LBB2_567 + bne $a0, $s1, .LBB2_560 + b .LBB2_561 .LBB2_476: .Ltmp197: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_572 - b .LBB2_576 + bne $a0, $s1, .LBB2_569 + b .LBB2_570 .LBB2_477: .Ltmp96: # EH_LABEL move $fp, $a0 @@ -3517,25 +3595,25 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results move $fp, $a0 ld.d $a0, $sp, 128 beq $a0, $s1, .LBB2_542 - b .LBB2_550 + b .LBB2_547 .LBB2_484: .Ltmp394: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_554 - b .LBB2_558 + bne $a0, $s1, .LBB2_551 + b .LBB2_552 .LBB2_485: .Ltmp293: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_563 - b .LBB2_567 + bne $a0, $s1, .LBB2_560 + b .LBB2_561 .LBB2_486: .Ltmp192: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_572 - b .LBB2_576 + bne $a0, $s1, .LBB2_569 + b .LBB2_570 .LBB2_487: .Ltmp91: # EH_LABEL move $fp, $a0 @@ -3562,25 +3640,25 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results move $fp, $a0 ld.d $a0, $sp, 128 beq $a0, $s1, .LBB2_542 - b .LBB2_550 + b .LBB2_547 .LBB2_494: .Ltmp387: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_554 - b .LBB2_558 + bne $a0, $s1, .LBB2_551 + b .LBB2_552 .LBB2_495: .Ltmp286: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_563 - b .LBB2_567 + bne $a0, $s1, .LBB2_560 + b .LBB2_561 .LBB2_496: .Ltmp185: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_572 - b .LBB2_576 + bne $a0, $s1, .LBB2_569 + b .LBB2_570 .LBB2_497: .Ltmp84: # EH_LABEL move $fp, $a0 @@ -3592,13 +3670,13 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results move $fp, $a0 ld.d $a0, $sp, 96 beq $a0, $s2, .LBB2_541 - b .LBB2_548 + b .LBB2_545 .LBB2_499: .Ltmp510: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 beq $a0, $s1, .LBB2_542 - b .LBB2_550 + b .LBB2_547 .LBB2_500: .Ltmp472: # EH_LABEL b .LBB2_524 @@ -3613,14 +3691,14 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp419: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 96 - beq $a0, $s2, .LBB2_553 - b .LBB2_557 + beq $a0, $s2, .LBB2_550 + b .LBB2_555 .LBB2_504: .Ltmp409: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_554 - b .LBB2_558 + bne $a0, $s1, .LBB2_551 + b .LBB2_552 .LBB2_505: .Ltmp371: # EH_LABEL b .LBB2_526 @@ -3635,14 +3713,14 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp318: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 96 - beq $a0, $s2, .LBB2_562 - b .LBB2_566 + beq $a0, $s2, .LBB2_559 + b .LBB2_564 .LBB2_509: .Ltmp308: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_563 - b .LBB2_567 + bne $a0, $s1, .LBB2_560 + b .LBB2_561 .LBB2_510: .Ltmp270: # EH_LABEL b .LBB2_528 @@ -3657,14 +3735,14 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp217: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 96 - beq $a0, $s2, .LBB2_571 - b .LBB2_575 + beq $a0, $s2, .LBB2_568 + b .LBB2_576 .LBB2_514: .Ltmp207: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_572 - b .LBB2_576 + bne $a0, $s1, .LBB2_569 + b .LBB2_570 .LBB2_515: .Ltmp169: # EH_LABEL b .LBB2_530 @@ -3680,7 +3758,7 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results move $fp, $a0 ld.d $a0, $sp, 96 beq $a0, $s2, .LBB2_580 - b .LBB2_588 + b .LBB2_587 .LBB2_519: .Ltmp106: # EH_LABEL move $fp, $a0 @@ -3701,25 +3779,25 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp441: # EH_LABEL .LBB2_524: move $fp, $a0 - fcmp.clt.d $fcc0, $fs3, $fs1 + fcmp.clt.d $fcc0, $fs2, $fs0 addi.d $a0, $sp, 32 - bcnez $fcc0, .LBB2_543 - b .LBB2_584 + bcnez $fcc0, .LBB2_571 + b .LBB2_583 .LBB2_525: .Ltmp340: # EH_LABEL .LBB2_526: move $fp, $a0 - b .LBB2_554 + b .LBB2_552 .LBB2_527: .Ltmp239: # EH_LABEL .LBB2_528: move $fp, $a0 - b .LBB2_563 + b .LBB2_561 .LBB2_529: .Ltmp138: # EH_LABEL .LBB2_530: move $fp, $a0 - b .LBB2_572 + b .LBB2_570 .LBB2_531: .Ltmp37: # EH_LABEL .LBB2_532: @@ -3741,35 +3819,24 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results .Ltmp545: # EH_LABEL .LBB2_538: move $fp, $a0 - b .LBB2_543 + b .LBB2_571 .LBB2_539: .Ltmp527: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - bne $a2, $s3, .LBB2_546 + bne $a2, $s3, .LBB2_543 # %bb.540: ld.d $a0, $sp, 96 - bne $a0, $s2, .LBB2_548 + bne $a0, $s2, .LBB2_545 .LBB2_541: ld.d $a0, $sp, 128 - bne $a0, $s1, .LBB2_550 + bne $a0, $s1, .LBB2_547 .LBB2_542: - fcmp.clt.d $fcc0, $fs3, $fs1 + fcmp.clt.d $fcc0, $fs2, $fs0 addi.d $a0, $sp, 32 - bceqz $fcc0, .LBB2_584 -.LBB2_543: - ld.d $a0, $sp, 168 - beq $a0, $s0, .LBB2_545 -# %bb.544: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i962 - ld.d $a1, $sp, 184 - addi.d $a1, $a1, 1 - pcaddu18i $ra, %call36(_ZdlPvm) - jirl $ra, $ra, 0 -.LBB2_545: - move $a0, $fp - pcaddu18i $ra, %call36(_Unwind_Resume) - jirl $ra, $ra, 0 -.LBB2_546: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i950 + bcnez $fcc0, .LBB2_571 + b .LBB2_583 +.LBB2_543: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i950 ld.d $a0, $sp, 80 addi.d $a1, $a0, 1 move $a0, $a2 @@ -3777,186 +3844,209 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 ld.d $a0, $sp, 96 beq $a0, $s2, .LBB2_541 - b .LBB2_548 -.LBB2_547: + b .LBB2_545 +.LBB2_544: .Ltmp517: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 96 beq $a0, $s2, .LBB2_541 -.LBB2_548: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i953 +.LBB2_545: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i953 ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 ld.d $a0, $sp, 128 beq $a0, $s1, .LBB2_542 - b .LBB2_550 -.LBB2_549: + b .LBB2_547 +.LBB2_546: .Ltmp479: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 beq $a0, $s1, .LBB2_542 -.LBB2_550: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i956 +.LBB2_547: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i956 ld.d $a1, $sp, 144 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - fcmp.clt.d $fcc0, $fs3, $fs1 + fcmp.clt.d $fcc0, $fs2, $fs0 addi.d $a0, $sp, 32 - bcnez $fcc0, .LBB2_543 - b .LBB2_584 -.LBB2_551: + bcnez $fcc0, .LBB2_571 + b .LBB2_583 +.LBB2_548: .Ltmp426: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - bne $a2, $s3, .LBB2_555 -# %bb.552: + bne $a2, $s3, .LBB2_553 +# %bb.549: ld.d $a0, $sp, 96 - bne $a0, $s2, .LBB2_557 -.LBB2_553: + bne $a0, $s2, .LBB2_555 +.LBB2_550: ld.d $a0, $sp, 128 - bne $a0, $s1, .LBB2_558 -.LBB2_554: + beq $a0, $s1, .LBB2_552 +.LBB2_551: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i804 + ld.d $a1, $sp, 144 + addi.d $a1, $a1, 1 + pcaddu18i $ra, %call36(_ZdlPvm) + jirl $ra, $ra, 0 +.LBB2_552: addi.d $a0, $sp, 40 + ld.d $a1, $sp, 24 + movgr2cf $fcc0, $a1 + bceqz $fcc0, .LBB2_571 b .LBB2_583 -.LBB2_555: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i798 +.LBB2_553: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i798 ld.d $a0, $sp, 80 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 ld.d $a0, $sp, 96 - beq $a0, $s2, .LBB2_553 - b .LBB2_557 -.LBB2_556: + beq $a0, $s2, .LBB2_550 + b .LBB2_555 +.LBB2_554: .Ltmp416: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 96 - beq $a0, $s2, .LBB2_553 -.LBB2_557: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i801 + beq $a0, $s2, .LBB2_550 +.LBB2_555: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i801 ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_554 -.LBB2_558: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i804 - ld.d $a1, $sp, 144 - addi.d $a1, $a1, 1 - pcaddu18i $ra, %call36(_ZdlPvm) - jirl $ra, $ra, 0 - addi.d $a0, $sp, 40 - b .LBB2_583 -.LBB2_559: + bne $a0, $s1, .LBB2_551 + b .LBB2_552 +.LBB2_556: .Ltmp378: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_554 - b .LBB2_558 -.LBB2_560: + bne $a0, $s1, .LBB2_551 + b .LBB2_552 +.LBB2_557: .Ltmp325: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - bne $a2, $s3, .LBB2_564 -# %bb.561: + bne $a2, $s3, .LBB2_562 +# %bb.558: ld.d $a0, $sp, 96 - bne $a0, $s2, .LBB2_566 -.LBB2_562: + bne $a0, $s2, .LBB2_564 +.LBB2_559: ld.d $a0, $sp, 128 - bne $a0, $s1, .LBB2_567 -.LBB2_563: + beq $a0, $s1, .LBB2_561 +.LBB2_560: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i652 + ld.d $a1, $sp, 144 + addi.d $a1, $a1, 1 + pcaddu18i $ra, %call36(_ZdlPvm) + jirl $ra, $ra, 0 +.LBB2_561: + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 + fcmp.cule.d $fcc0, $fa0, $fs0 addi.d $a0, $sp, 48 + bceqz $fcc0, .LBB2_571 b .LBB2_583 -.LBB2_564: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i646 +.LBB2_562: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i646 ld.d $a0, $sp, 80 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 ld.d $a0, $sp, 96 - beq $a0, $s2, .LBB2_562 - b .LBB2_566 -.LBB2_565: + beq $a0, $s2, .LBB2_559 + b .LBB2_564 +.LBB2_563: .Ltmp315: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 96 - beq $a0, $s2, .LBB2_562 -.LBB2_566: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i649 + beq $a0, $s2, .LBB2_559 +.LBB2_564: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i649 ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_563 -.LBB2_567: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i652 - ld.d $a1, $sp, 144 - addi.d $a1, $a1, 1 - pcaddu18i $ra, %call36(_ZdlPvm) - jirl $ra, $ra, 0 - addi.d $a0, $sp, 48 - b .LBB2_583 -.LBB2_568: + bne $a0, $s1, .LBB2_560 + b .LBB2_561 +.LBB2_565: .Ltmp277: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_563 - b .LBB2_567 -.LBB2_569: + bne $a0, $s1, .LBB2_560 + b .LBB2_561 +.LBB2_566: .Ltmp224: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - bne $a2, $s3, .LBB2_573 -# %bb.570: + bne $a2, $s3, .LBB2_574 +# %bb.567: ld.d $a0, $sp, 96 - bne $a0, $s2, .LBB2_575 -.LBB2_571: + bne $a0, $s2, .LBB2_576 +.LBB2_568: ld.d $a0, $sp, 128 - bne $a0, $s1, .LBB2_576 -.LBB2_572: + beq $a0, $s1, .LBB2_570 +.LBB2_569: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i500 + ld.d $a1, $sp, 144 + addi.d $a1, $a1, 1 + pcaddu18i $ra, %call36(_ZdlPvm) + jirl $ra, $ra, 0 +.LBB2_570: + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 + fcmp.cule.d $fcc0, $fa0, $fs0 addi.d $a0, $sp, 56 - b .LBB2_583 -.LBB2_573: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i494 + bcnez $fcc0, .LBB2_583 +.LBB2_571: + ld.d $a0, $sp, 168 + beq $a0, $s0, .LBB2_573 +# %bb.572: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i962 + ld.d $a1, $sp, 184 + addi.d $a1, $a1, 1 + pcaddu18i $ra, %call36(_ZdlPvm) + jirl $ra, $ra, 0 +.LBB2_573: + move $a0, $fp + pcaddu18i $ra, %call36(_Unwind_Resume) + jirl $ra, $ra, 0 +.LBB2_574: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i494 ld.d $a0, $sp, 80 addi.d $a1, $a0, 1 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 ld.d $a0, $sp, 96 - beq $a0, $s2, .LBB2_571 - b .LBB2_575 -.LBB2_574: + beq $a0, $s2, .LBB2_568 + b .LBB2_576 +.LBB2_575: .Ltmp214: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 96 - beq $a0, $s2, .LBB2_571 -.LBB2_575: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i497 + beq $a0, $s2, .LBB2_568 +.LBB2_576: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i497 ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_572 -.LBB2_576: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i500 - ld.d $a1, $sp, 144 - addi.d $a1, $a1, 1 - pcaddu18i $ra, %call36(_ZdlPvm) - jirl $ra, $ra, 0 - addi.d $a0, $sp, 56 - b .LBB2_583 + bne $a0, $s1, .LBB2_569 + b .LBB2_570 .LBB2_577: .Ltmp176: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 - beq $a0, $s1, .LBB2_572 - b .LBB2_576 + bne $a0, $s1, .LBB2_569 + b .LBB2_570 .LBB2_578: .Ltmp123: # EH_LABEL ld.d $a2, $sp, 64 move $fp, $a0 - bne $a2, $s3, .LBB2_586 + bne $a2, $s3, .LBB2_585 # %bb.579: ld.d $a0, $sp, 96 - bne $a0, $s2, .LBB2_588 + bne $a0, $s2, .LBB2_587 .LBB2_580: ld.d $a0, $sp, 128 beq $a0, $s1, .LBB2_582 @@ -3966,18 +4056,20 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB2_582: + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 + fcmp.cule.d $fcc0, $fa0, $fs0 addi.d $a0, $sp, 160 -.LBB2_583: - ld.d $a1, $sp, 24 - movgr2cf $fcc0, $a1 - bceqz $fcc0, .LBB2_543 -.LBB2_584: # %.invoke + bceqz $fcc0, .LBB2_571 +.LBB2_583: # %.invoke .Ltmp528: # EH_LABEL pcaddu18i $ra, %call36(_ZN9benchmark8internal12CheckHandlerD2Ev) jirl $ra, $ra, 0 .Ltmp529: # EH_LABEL -# %bb.585: # %.cont -.LBB2_586: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i342 +# %bb.584: # %.cont +.LBB2_585: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i342 ld.d $a0, $sp, 80 addi.d $a1, $a0, 1 move $a0, $a2 @@ -3985,13 +4077,13 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results jirl $ra, $ra, 0 ld.d $a0, $sp, 96 beq $a0, $s2, .LBB2_580 - b .LBB2_588 -.LBB2_587: + b .LBB2_587 +.LBB2_586: .Ltmp113: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 96 beq $a0, $s2, .LBB2_580 -.LBB2_588: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i345 +.LBB2_587: # %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i345 ld.d $a1, $sp, 112 addi.d $a1, $a1, 1 pcaddu18i $ra, %call36(_ZdlPvm) @@ -3999,13 +4091,13 @@ _Z14CheckThousandsRK7Results: # @_Z14CheckThousandsRK7Results ld.d $a0, $sp, 128 bne $a0, $s1, .LBB2_581 b .LBB2_582 -.LBB2_589: +.LBB2_588: .Ltmp75: # EH_LABEL move $fp, $a0 ld.d $a0, $sp, 128 bne $a0, $s1, .LBB2_581 b .LBB2_582 -.LBB2_590: +.LBB2_589: .Ltmp530: # EH_LABEL pcaddu18i $ra, %call36(__clang_call_terminate) jirl $ra, $ra, 0 diff --git a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/clamscan_clamscan.s b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/clamscan_clamscan.s index 4a5b14d0..bf019e9f 100644 --- a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/clamscan_clamscan.s +++ b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/clamscan_clamscan.s @@ -1,12 +1,6 @@ .file "clamscan_clamscan.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI0_1: - .dword 0x3f50000000000000 # double 9.765625E-4 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -419,20 +413,21 @@ main: # @main ld.d $a0, $s1, 24 slli.d $a0, $a0, 2 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI0_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 bstrpick.d $a0, $a0, 31, 2 slli.d $a0, $a0, 2 lu52i.d $a1, $zero, 1075 - pcalau12i $a2, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a2, %pc_lo12(.LCPI0_1) or $a0, $a0, $a1 - movgr2fr.d $fa2, $a0 - fadd.d $fa0, $fa2, $fa0 + movgr2fr.d $fa1, $a0 + fadd.d $fa0, $fa1, $fa0 + lu52i.d $a0, $zero, 1013 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.41) diff --git a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_autoit.s b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_autoit.s index 28a92bcd..7bdfb654 100644 --- a/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_autoit.s +++ b/results/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_autoit.s @@ -4955,12 +4955,7 @@ MT_decrypt: # @MT_decrypt .Lfunc_end1: .size MT_decrypt, .Lfunc_end1-MT_decrypt # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function LAME_decrypt -.LCPI2_0: - .dword 0x4070000000000000 # double 256 - .text - .p2align 5 + .p2align 5 # -- Begin function LAME_decrypt .type LAME_decrypt,@function LAME_decrypt: # @LAME_decrypt # %bb.0: @@ -5057,10 +5052,10 @@ LAME_decrypt: # @LAME_decrypt vrepli.w $vr1, 16 lu12i.w $a3, -1 lu32i.d $a3, 0 - pcalau12i $a4, %pc_hi20(.LCPI2_0) - fld.d $fa2, $a4, %pc_lo12(.LCPI2_0) lu52i.d $a4, $zero, 1023 - vldi $vr3, -784 + vldi $vr2, -784 + lu52i.d $a5, $zero, 1031 + movgr2fr.d $fa3, $a5 addi.d $a5, $zero, -1 .p2align 4, , 16 .LBB2_2: # %.lr.ph @@ -5098,8 +5093,8 @@ LAME_decrypt: # @LAME_decrypt bstrins.d $a6, $a7, 31, 20 or $a6, $a6, $a4 movgr2fr.d $fa5, $a6 - fadd.d $fa5, $fa5, $fa3 - fmul.d $fa5, $fa5, $fa2 + fadd.d $fa5, $fa5, $fa2 + fmul.d $fa5, $fa5, $fa3 ftintrz.w.d $fa6, $fa5 movfr2gr.s $a6, $fa6 vadd.w $vr6, $vr4, $vr0 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/block.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/block.s index 4c99e1f2..6c192a0e 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/block.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/block.s @@ -8507,12 +8507,7 @@ dct_chroma: # @dct_chroma .Lfunc_end4: .size dct_chroma, .Lfunc_end4-dct_chroma # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function dct_luma_sp -.LCPI5_0: - .dword 0x3feb333333333333 # double 0.84999999999999998 - .text - .globl dct_luma_sp + .globl dct_luma_sp # -- Begin function dct_luma_sp .p2align 5 .type dct_luma_sp,@function dct_luma_sp: # @dct_luma_sp @@ -8844,11 +8839,14 @@ dct_luma_sp: # @dct_luma_sp st.w $a3, $sp, 424 alsl.d $a3, $a2, $a4, 1 st.w $a3, $sp, 420 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - fld.d $fa1, $a3, %pc_lo12(.LCPI5_0) slli.d $a3, $a4, 1 sub.d $a2, $a2, $a3 st.w $a2, $sp, 428 + lu12i.w $a2, 209715 + ori $a2, $a2, 819 + lu32i.d $a2, -314573 + lu52i.d $a2, $a2, 1022 + movgr2fr.d $fa1, $a2 fmul.d $fa0, $fa0, $fa1 vldi $vr1, -1008 fmul.d $fs0, $fa0, $fa1 @@ -9546,23 +9544,19 @@ dct_luma_sp: # @dct_luma_sp .Lfunc_end5: .size dct_luma_sp, .Lfunc_end5-dct_luma_sp # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function dct_chroma_sp -.LCPI6_0: - .dword 0x3feb333333333333 # double 0.84999999999999998 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI6_1: + .p2align 4, 0x0 # -- Begin function dct_chroma_sp +.LCPI6_0: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 .word 4 # 0x4 -.LCPI6_2: +.LCPI6_1: .word 0 # 0x0 .word 4 # 0x4 .word 5 # 0x5 .word 4294967295 # 0xffffffff -.LCPI6_3: +.LCPI6_2: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -9613,7 +9607,10 @@ dct_chroma_sp: # @dct_chroma_sp fdiv.d $fa0, $fa0, $fa1 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI6_0) + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, -314573 + lu52i.d $a0, $a0, 1022 ld.d $s7, $fp, 0 lu12i.w $a6, 3 pcalau12i $a1, %pc_hi20(active_pps) @@ -9622,7 +9619,7 @@ dct_chroma_sp: # @dct_chroma_sp ldx.w $a2, $s7, $a2 ld.w $a3, $s0, 8 ld.w $a1, $a1, 208 - fld.d $fa2, $a0, %pc_lo12(.LCPI6_0) + movgr2fr.d $fa2, $a0 vldi $vr1, -1008 sub.w $a2, $zero, $a2 add.w $a0, $a1, $a3 @@ -9793,8 +9790,8 @@ dct_chroma_sp: # @dct_chroma_sp ldptr.d $a0, $s7, 12720 addi.d $a1, $sp, 744 vstelm.w $vr8, $a1, 0, 3 - pcalau12i $a1, %pc_hi20(.LCPI6_1) - vld $vr1, $a1, %pc_lo12(.LCPI6_1) + pcalau12i $a1, %pc_hi20(.LCPI6_0) + vld $vr1, $a1, %pc_lo12(.LCPI6_0) vinsgr2vr.d $vr7, $a0, 0 vilvl.h $vr7, $vr0, $vr7 vpackev.w $vr3, $vr4, $vr3 @@ -10788,7 +10785,6 @@ dct_chroma_sp: # @dct_chroma_sp ld.d $a0, $sp, 440 # 8-byte Folded Reload ld.d $a0, $a0, 0 ldptr.w $a0, $a0, 4008 - ld.d $ra, $sp, 320 # 8-byte Folded Reload ld.d $s3, $sp, 472 # 8-byte Folded Reload beqz $a0, .LBB6_21 # %bb.19: # in Loop: Header=BB6_12 Depth=1 @@ -10825,6 +10821,7 @@ dct_chroma_sp: # @dct_chroma_sp sll.w $a0, $a0, $t2 srai.d $a0, $a0, 5 addi.d $a6, $zero, -1 + ld.d $ra, $sp, 320 # 8-byte Folded Reload b .LBB6_23 .p2align 4, , 16 .LBB6_21: # in Loop: Header=BB6_12 Depth=1 @@ -10842,6 +10839,7 @@ dct_chroma_sp: # @dct_chroma_sp bnez $s2, .LBB6_20 .LBB6_22: # in Loop: Header=BB6_12 Depth=1 move $a0, $zero + ld.d $ra, $sp, 320 # 8-byte Folded Reload move $a6, $s6 .LBB6_23: # in Loop: Header=BB6_12 Depth=1 add.w $a0, $a0, $fp @@ -11002,7 +11000,7 @@ dct_chroma_sp: # @dct_chroma_sp ori $a3, $zero, 1 sll.d $a2, $a3, $a2 st.d $a2, $sp, 328 # 8-byte Folded Spill - addi.d $s7, $zero, -1 + addi.d $s8, $zero, -1 st.d $s1, $sp, 432 # 8-byte Folded Spill b .LBB6_32 .p2align 4, , 16 @@ -11054,7 +11052,7 @@ dct_chroma_sp: # @dct_chroma_sp add.d $a2, $a2, $s2 ld.bu $fp, $a2, 2 ld.bu $a6, $a2, 3 - addi.w $s7, $s7, 1 + addi.w $s8, $s8, 1 add.d $a5, $s1, $fp slli.d $a2, $a5, 5 addi.d $a3, $sp, 512 @@ -11089,13 +11087,13 @@ dct_chroma_sp: # @dct_chroma_sp maskeqz $a2, $a2, $a3 masknez $a3, $a4, $a3 or $a2, $a2, $a3 - add.w $s8, $a2, $a1 + add.w $s7, $a2, $a1 ld.d $a2, $sp, 376 # 8-byte Folded Reload alsl.d $a2, $fp, $a2, 4 move $s0, $a6 ldx.w $a2, $a2, $a6 - srai.d $a3, $s8, 31 - xor $a4, $s8, $a3 + srai.d $a3, $s7, 31 + xor $a4, $s7, $a3 sub.d $a3, $a4, $a3 mul.d $a3, $a3, $a2 ld.d $a6, $sp, 400 # 8-byte Folded Reload @@ -11119,7 +11117,7 @@ dct_chroma_sp: # @dct_chroma_sp beqz $s3, .LBB6_36 # %bb.35: # %.thread614 # in Loop: Header=BB6_32 Depth=3 - slti $a0, $s8, 0 + slti $a0, $s7, 0 srai.d $a2, $s3, 31 xor $a3, $s3, $a2 sub.w $a7, $a3, $a2 @@ -11142,7 +11140,7 @@ dct_chroma_sp: # @dct_chroma_sp addi.d $a2, $sp, 492 addi.d $a3, $sp, 488 move $a0, $s3 - move $a1, $s7 + move $a1, $s8 st.d $a5, $sp, 368 # 8-byte Folded Spill st.d $a6, $sp, 360 # 8-byte Folded Spill pcaddu18i $ra, %call36(levrun_linfo_inter) @@ -11179,7 +11177,7 @@ dct_chroma_sp: # @dct_chroma_sp addi.d $a2, $sp, 492 addi.d $a3, $sp, 488 move $a0, $s2 - move $a1, $s7 + move $a1, $s8 pcaddu18i $ra, %call36(levrun_linfo_inter) jirl $ra, $ra, 0 ld.d $a6, $sp, 360 # 8-byte Folded Reload @@ -11219,7 +11217,7 @@ dct_chroma_sp: # @dct_chroma_sp xor $a0, $s2, $s3 sltui $a0, $a0, 1 masknez $a1, $s1, $a0 - maskeqz $a0, $s8, $a0 + maskeqz $a0, $s7, $a0 or $s1, $a0, $a1 b .LBB6_40 .p2align 4, , 16 @@ -11227,7 +11225,7 @@ dct_chroma_sp: # @dct_chroma_sp bne $s3, $s2, .LBB6_38 # %bb.37: # in Loop: Header=BB6_32 Depth=3 move $s2, $s3 - move $s1, $s8 + move $s1, $s7 b .LBB6_39 .p2align 4, , 16 .LBB6_38: # in Loop: Header=BB6_32 Depth=3 @@ -11251,7 +11249,7 @@ dct_chroma_sp: # @dct_chroma_sp ld.d $a3, $sp, 344 # 8-byte Folded Reload stx.w $a1, $a3, $a2 ld.d $a3, $sp, 336 # 8-byte Folded Reload - stx.w $s7, $a3, $a2 + stx.w $s8, $a3, $a2 ldx.w $a2, $a5, $s0 alsl.d $a3, $fp, $a6, 4 ldx.w $a3, $a3, $s0 @@ -11262,7 +11260,7 @@ dct_chroma_sp: # @dct_chroma_sp mul.d $a0, $a0, $a3 sll.w $a0, $a0, $t2 srai.d $a1, $a0, 6 - addi.d $s7, $zero, -1 + addi.d $s8, $zero, -1 ori $a0, $zero, 2 b .LBB6_42 .p2align 4, , 16 @@ -11326,10 +11324,10 @@ dct_chroma_sp: # @dct_chroma_sp add.d $a6, $a1, $a6 ld.d $a7, $sp, 24 # 8-byte Folded Reload add.d $a7, $a1, $a7 + pcalau12i $t0, %pc_hi20(.LCPI6_1) + vld $vr0, $t0, %pc_lo12(.LCPI6_1) pcalau12i $t0, %pc_hi20(.LCPI6_2) - vld $vr0, $t0, %pc_lo12(.LCPI6_2) - pcalau12i $t0, %pc_hi20(.LCPI6_3) - vld $vr1, $t0, %pc_lo12(.LCPI6_3) + vld $vr1, $t0, %pc_lo12(.LCPI6_2) ori $t1, $zero, 1 lu12i.w $t0, 3 ori $t0, $t0, 3236 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/context_ini.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/context_ini.s index eae3446e..50a7d748 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/context_ini.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/context_ini.s @@ -1,10 +1,6 @@ .file "context_ini.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function create_context_memory -.LCPI0_0: - .dword 0x3fd34413509f79ff # double 0.3010299956639812 .text - .globl create_context_memory + .globl create_context_memory # -- Begin function create_context_memory .p2align 5 .type create_context_memory,@function create_context_memory: # @create_context_memory @@ -375,8 +371,11 @@ create_context_memory: # @create_context_memory ori $fp, $zero, 1016 pcalau12i $a0, %pc_hi20(probability) addi.d $s0, $a0, %pc_lo12(probability) - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 330231 + ori $a0, $a0, 2559 + lu32i.d $a0, 214035 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fs0, $a0 pcalau12i $a0, %pc_hi20(entropy) addi.d $s1, $a0, %pc_lo12(entropy) move $s2, $zero @@ -9331,14 +9330,7 @@ init_contexts: # @init_contexts .Lfunc_end3: .size init_contexts, .Lfunc_end3-init_contexts # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function XRate -.LCPI4_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI4_1: - .dword 0x3fa0000000000000 # double 0.03125 - .text - .globl XRate + .globl XRate # -- Begin function XRate .p2align 5 .type XRate,@function XRate: # @XRate @@ -9368,30 +9360,31 @@ XRate: # @XRate andn $a1, $a1, $a2 slti $a2, $a1, 127 maskeqz $a1, $a1, $a2 - srli.d $a4, $a3, 32 - pcalau12i $a5, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a5, %pc_lo12(.LCPI4_0) + ori $a4, $zero, 127 + masknez $a2, $a4, $a2 + or $a1, $a1, $a2 + srli.d $a2, $a3, 32 lu52i.d $a5, $zero, 1107 - or $a4, $a4, $a5 - movgr2fr.d $fa1, $a4 - fsub.d $fa0, $fa1, $fa0 - lu12i.w $a4, 275200 - bstrins.d $a3, $a4, 63, 32 + or $a2, $a2, $a5 + movgr2fr.d $fa0, $a2 + lu12i.w $a2, 256 + lu52i.d $a2, $a2, 1107 + movgr2fr.d $fa1, $a2 + fsub.d $fa0, $fa0, $fa1 + lu12i.w $a2, 275200 + bstrins.d $a3, $a2, 63, 32 movgr2fr.d $fa1, $a3 fadd.d $fa0, $fa1, $fa0 - pcalau12i $a3, %pc_hi20(.LCPI4_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI4_1) - ori $a3, $zero, 127 - masknez $a2, $a3, $a2 - or $a1, $a1, $a2 + lu52i.d $a2, $zero, 1018 + movgr2fr.d $fa1, $a2 fmul.d $fa0, $fa0, $fa1 vldi $vr1, -912 fcmp.clt.d $fcc0, $fa1, $fa0 fsel $fa0, $fa0, $fa1, $fcc0 slli.d $a2, $a0, 3 - pcalau12i $a4, %pc_hi20(probability) - addi.d $a4, $a4, %pc_lo12(probability) - fldx.d $fa1, $a4, $a2 + pcalau12i $a3, %pc_hi20(probability) + addi.d $a3, $a3, %pc_lo12(probability) + fldx.d $fa1, $a3, $a2 slli.d $a2, $a1, 3 pcalau12i $a5, %pc_hi20(entropy) addi.d $a5, $a5, %pc_lo12(entropy) @@ -9399,9 +9392,9 @@ XRate: # @XRate fneg.d $fa1, $fa1 fmul.d $fa1, $fa0, $fa1 movgr2fr.d $fa3, $zero - sub.d $a0, $a3, $a0 + sub.d $a0, $a4, $a0 slli.d $a0, $a0, 3 - fldx.d $fa4, $a4, $a0 + fldx.d $fa4, $a3, $a0 xori $a0, $a1, 127 slli.d $a0, $a0, 3 fldx.d $fa5, $a5, $a0 @@ -9413,16 +9406,7 @@ XRate: # @XRate .Lfunc_end4: .size XRate, .Lfunc_end4-XRate # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function GetCtxModelNumber -.LCPI5_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI5_1: - .dword 0x3fa0000000000000 # double 0.03125 -.LCPI5_2: - .dword 0x46293e5939a08cea # double 1.0E+30 - .text - .globl GetCtxModelNumber + .globl GetCtxModelNumber # -- Begin function GetCtxModelNumber .p2align 5 .type GetCtxModelNumber,@function GetCtxModelNumber: # @GetCtxModelNumber @@ -9484,18 +9468,19 @@ GetCtxModelNumber: # @GetCtxModelNumber maskeqz $a4, $a5, $a4 or $a4, $a4, $a6 srli.d $a5, $a0, 32 - pcalau12i $a6, %pc_hi20(.LCPI5_0) - fld.d $fa0, $a6, %pc_lo12(.LCPI5_0) lu52i.d $t8, $zero, 1107 or $a5, $a5, $t8 movgr2fr.d $fa1, $a5 - fsub.d $fa2, $fa1, $fa0 + lu12i.w $a5, 256 + lu52i.d $a5, $a5, 1107 + movgr2fr.d $fa0, $a5 + fsub.d $fa1, $fa1, $fa0 lu12i.w $fp, 275200 - pcalau12i $a5, %pc_hi20(.LCPI5_1) - fld.d $fa1, $a5, %pc_lo12(.LCPI5_1) bstrins.d $a0, $fp, 63, 32 - movgr2fr.d $fa3, $a0 - fadd.d $fa2, $fa3, $fa2 + movgr2fr.d $fa2, $a0 + fadd.d $fa2, $fa2, $fa1 + lu52i.d $a0, $zero, 1018 + movgr2fr.d $fa1, $a0 fmul.d $fa3, $fa2, $fa1 vldi $vr2, -912 fcmp.clt.d $fcc0, $fa2, $fa3 @@ -9742,8 +9727,11 @@ GetCtxModelNumber: # @GetCtxModelNumber addi.d $a0, $a0, %pc_lo12(INIT_FLD_LAST_I) st.d $a0, $sp, 128 # 8-byte Folded Spill move $a0, $zero - pcalau12i $a4, %pc_hi20(.LCPI5_2) - fld.d $ft10, $a4, %pc_lo12(.LCPI5_2) + lu12i.w $a4, 236040 + ori $a4, $a4, 3306 + lu32i.d $a4, -442791 + lu52i.d $a4, $a4, 1122 + movgr2fr.d $ft10, $a4 ori $s5, $zero, 2 ori $s6, $zero, 176 ori $t0, $zero, 120 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/decoder.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/decoder.s index bb74f21a..320fcc76 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/decoder.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/decoder.s @@ -2031,14 +2031,7 @@ UpdateDecoders: # @UpdateDecoders .Lfunc_end4: .size UpdateDecoders, .Lfunc_end4-UpdateDecoders # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Build_Status_Map -.LCPI5_0: - .dword 0x41dfffffffc00000 # double 2147483647 -.LCPI5_1: - .dword 0x4059000000000000 # double 100 - .text - .globl Build_Status_Map + .globl Build_Status_Map # -- Begin function Build_Status_Map .p2align 5 .type Build_Status_Map,@function Build_Status_Map: # @Build_Status_Map @@ -2079,13 +2072,16 @@ Build_Status_Map: # @Build_Status_Map addi.w $s6, $zero, -1 pcalau12i $a0, %got_pc_hi20(input) ld.d $s8, $a0, %got_pc_lo12(input) - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI5_0) - pcalau12i $a0, %pc_hi20(.LCPI5_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI5_1) move $a2, $zero move $a0, $zero move $a3, $zero + lu12i.w $a1, -1024 + lu52i.d $a1, $a1, 1053 + movgr2fr.d $fs0, $a1 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fs1, $a1 b .LBB5_4 .p2align 4, , 16 .LBB5_3: # %._crit_edge.us diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/header.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/header.s index 9aca44a7..b052c4d1 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/header.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/header.s @@ -1,10 +1,6 @@ .file "header.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function SliceHeader -.LCPI0_0: - .dword 0x3fe62e42fefa39ef # double 0.69314718055994529 .text - .globl SliceHeader + .globl SliceHeader # -- Begin function SliceHeader .p2align 5 .type SliceHeader,@function SliceHeader: # @SliceHeader @@ -1054,8 +1050,11 @@ SliceHeader: # @SliceHeader fcvt.d.s $fa0, $fa0 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, -4189 + ori $a0, $a0, 2543 + lu32i.d $a0, 405058 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 vreplvei.d $vr0, $vr0, 0 ldptr.w $a2, $s1, 15436 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/image.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/image.s index 51a6b3d2..f7ffd356 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/image.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/image.s @@ -988,18 +988,7 @@ code_a_picture: # @code_a_picture .Lfunc_end1: .size code_a_picture, .Lfunc_end1-code_a_picture # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function encode_one_frame -.LCPI2_0: - .word 0x3f59999a # float 0.850000023 -.LCPI2_1: - .word 0x3f933333 # float 1.14999998 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI2_2: - .dword 0x3fe5c28f5c28f5c3 # double 0.68000000000000005 - .text - .globl encode_one_frame + .globl encode_one_frame # -- Begin function encode_one_frame .p2align 5 .type encode_one_frame,@function encode_one_frame: # @encode_one_frame @@ -2387,13 +2376,14 @@ encode_one_frame: # @encode_one_frame ld.d $a0, $sp, 304 # 8-byte Folded Reload ld.d $a0, $a0, 0 ld.w $a1, $a0, 1576 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI2_0) ld.d $a0, $s1, %pc_lo12(quadratic_RC) sltui $a1, $a1, 1 - vldi $vr1, -1168 + vldi $vr0, -1168 + lu12i.w $a2, 259481 + ori $a2, $a2, 2458 + movgr2fr.w $fa1, $a2 movgr2cf $fcc0, $a1 - fsel $fa0, $fa1, $fa0, $fcc0 + fsel $fa0, $fa0, $fa1, $fcc0 ori $a1, $zero, 1 ori $a3, $zero, 1 move $a2, $zero @@ -2653,13 +2643,14 @@ encode_one_frame: # @encode_one_frame jirl $ra, $ra, 0 ld.d $a0, $s6, 0 ld.w $a1, $a0, 1576 - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.s $fa0, $a0, %pc_lo12(.LCPI2_1) ld.d $a0, $s0, %pc_lo12(quadratic_RC) sltui $a1, $a1, 1 - vldi $vr1, -1168 + vldi $vr0, -1168 + lu12i.w $a2, 260403 + ori $a2, $a2, 819 + movgr2fr.w $fa1, $a2 movgr2cf $fcc0, $a1 - fsel $fa0, $fa1, $fa0, $fcc0 + fsel $fa0, $fa0, $fa1, $fcc0 ori $a1, $zero, 1 ori $a3, $zero, 1 move $a2, $zero @@ -2877,14 +2868,17 @@ encode_one_frame: # @encode_one_frame fadd.d $fa0, $fa1, $fa0 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI2_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_2) - fld.s $fa3, $s7, 812 - fld.s $fa4, $s7, 816 - fld.s $fa5, $s7, 820 - fmul.d $fa2, $fa0, $fa1 - fadd.s $fa0, $fa3, $fa4 - fadd.s $fa0, $fa0, $fa5 + lu12i.w $a0, 377487 + ori $a0, $a0, 1475 + lu32i.d $a0, 377487 + lu52i.d $a0, $a0, 1022 + fld.s $fa1, $s7, 812 + fld.s $fa3, $s7, 816 + fld.s $fa4, $s7, 820 + movgr2fr.d $fa2, $a0 + fmul.d $fa2, $fa0, $fa2 + fadd.s $fa0, $fa1, $fa3 + fadd.s $fa0, $fa0, $fa4 fld.s $fa1, $s1, 812 fld.s $fa3, $s1, 816 fld.s $fa4, $s1, 820 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/leaky_bucket.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/leaky_bucket.s index 0eac32be..fe345707 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/leaky_bucket.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/leaky_bucket.s @@ -305,12 +305,7 @@ Sort: # @Sort .Lfunc_end3: .size Sort, .Lfunc_end3-Sort # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function calc_buffer -.LCPI4_0: - .word 0x5f000000 # float 9.22337203E+18 - .text - .globl calc_buffer + .globl calc_buffer # -- Begin function calc_buffer .p2align 5 .type calc_buffer,@function calc_buffer: # @calc_buffer @@ -455,12 +450,12 @@ calc_buffer: # @calc_buffer fadd.s $fa1, $fa1, $fa1 slti $a1, $a0, 0 movgr2fr.d $fa2, $a0 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI4_0) ffint.s.l $fa2, $fa2 movgr2cf $fcc0, $a1 fsel $fa1, $fa2, $fa1, $fcc0 fdiv.s $fa0, $fa0, $fa1 + lu12i.w $a0, 389120 + movgr2fr.w $fs0, $a0 fcmp.clt.s $fcc0, $fa0, $fs0 ftintrz.l.s $fa1, $fa0 movfr2gr.d $a0, $fa1 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/lencod.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/lencod.s index bd61f012..72689dad 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/lencod.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/lencod.s @@ -26,12 +26,7 @@ init_stats: # @init_stats .Lfunc_end0: .size init_stats, .Lfunc_end0-init_stats # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI1_0: - .dword 0x3fdffffbce4217d3 # double 0.49999900000000003 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -48,11 +43,12 @@ main: # @main st.d $s6, $sp, 200 # 8-byte Folded Spill st.d $s7, $sp, 192 # 8-byte Folded Spill st.d $s8, $sp, 184 # 8-byte Folded Spill + fst.d $fs0, $sp, 176 # 8-byte Folded Spill pcalau12i $a2, %pc_hi20(giRDOpt_B8OnlyFlag) st.w $zero, $a2, %pc_lo12(giRDOpt_B8OnlyFlag) addi.w $a2, $zero, -1 pcalau12i $a3, %pc_hi20(p_in) - st.d $a2, $sp, 136 # 8-byte Folded Spill + st.d $a2, $sp, 128 # 8-byte Folded Spill lu32i.d $a2, 0 st.d $a3, $sp, 64 # 8-byte Folded Spill st.w $a2, $a3, %pc_lo12(p_in) @@ -326,9 +322,9 @@ main: # @main vld $vr0, $a0, 16 vld $vr1, $a0, 0 ldptr.w $a0, $a1, 5112 - st.d $a2, $sp, 176 - vst $vr0, $sp, 160 - vst $vr1, $sp, 144 + st.d $a2, $sp, 168 + vst $vr0, $sp, 152 + vst $vr1, $sp, 136 sltui $a0, $a0, 2 pcalau12i $a1, %pc_hi20(.L.str.214) addi.d $a1, $a1, %pc_lo12(.L.str.214) @@ -365,7 +361,7 @@ main: # @main pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 ld.w $a0, $s2, %pc_lo12(p_dec) - ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload beq $a0, $a1, .LBB1_30 # %bb.29: ld.d $a0, $s0, %pc_lo12(input) @@ -379,7 +375,7 @@ main: # @main ldptr.w $a0, $a0, 15536 slli.d $a1, $a0, 3 alsl.d $a0, $a0, $a1, 1 - addi.d $a1, $sp, 144 + addi.d $a1, $sp, 136 add.d $a1, $a1, $a0 pcalau12i $a0, %pc_hi20(.L.str.218) addi.d $a0, $a0, %pc_lo12(.L.str.218) @@ -592,6 +588,11 @@ main: # @main lu12i.w $a5, 3 ori $s6, $a5, 3044 ori $s2, $a3, 640 + lu12i.w $a3, -203743 + ori $a3, $a3, 2003 + lu32i.d $a3, -5 + lu52i.d $a3, $a3, 1021 + movgr2fr.d $fs0, $a3 lu12i.w $a3, 524287 ori $a3, $a3, 4095 st.d $a3, $sp, 112 # 8-byte Folded Spill @@ -692,7 +693,7 @@ main: # @main ld.w $a4, $fp, %pc_lo12(log2_max_frame_num_minus4) ldx.w $a5, $a0, $s6 addi.d $a4, $a4, 4 - ld.d $a6, $sp, 136 # 8-byte Folded Reload + ld.d $a6, $sp, 128 # 8-byte Folded Reload sll.w $a4, $a6, $a4 andn $a4, $a5, $a4 b .LBB1_78 @@ -784,17 +785,17 @@ main: # @main stptr.w $a0, $a2, 2256 .LBB1_90: # %.thread102 # in Loop: Header=BB1_53 Depth=1 - ldptr.w $a4, $a1, 2096 - beqz $a4, .LBB1_97 + ldptr.w $a3, $a1, 2096 + beqz $a3, .LBB1_97 .LBB1_91: # in Loop: Header=BB1_53 Depth=1 ldptr.w $a2, $a1, 4144 ld.d $a0, $s5, %pc_lo12(img) beqz $a2, .LBB1_94 # %bb.92: # in Loop: Header=BB1_53 Depth=1 - ld.w $a3, $a0, 0 + ld.w $a4, $a0, 0 ld.w $a5, $s4, %pc_lo12(start_frame_no_in_this_IGOP) ld.w $a6, $a1, 8 - sub.d $a5, $a3, $a5 + sub.d $a5, $a4, $a5 addi.w $a5, $a5, 1 bne $a5, $a6, .LBB1_94 # %bb.93: # in Loop: Header=BB1_53 Depth=1 @@ -803,25 +804,23 @@ main: # @main movgr2fr.w $fa0, $a5 ffint.s.w $fa0, $fa0 fcvt.d.s $fa0, $fa0 - movgr2fr.w $fa1, $a4 - pcalau12i $a4, %pc_hi20(.LCPI1_0) - fld.d $fa2, $a4, %pc_lo12(.LCPI1_0) + movgr2fr.w $fa1, $a3 ffint.d.w $fa1, $fa1 - vldi $vr3, -912 - fadd.d $fa1, $fa1, $fa3 + vldi $vr2, -912 + fadd.d $fa1, $fa1, $fa2 fdiv.d $fa0, $fa0, $fa1 - fadd.d $fa0, $fa0, $fa2 + fadd.d $fa0, $fa0, $fs0 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a4, $fa0 - addi.d $a3, $a3, -1 - mul.d $a3, $a5, $a3 - sub.w $a2, $a2, $a3 - ld.d $a3, $sp, 88 # 8-byte Folded Reload - ld.w $a3, $a3, %pc_lo12(initial_Bframes) - div.w $a2, $a2, $a4 + movfr2gr.s $a3, $fa0 + addi.d $a4, $a4, -1 + mul.d $a4, $a5, $a4 + sub.w $a2, $a2, $a4 + ld.d $a4, $sp, 88 # 8-byte Folded Reload + ld.w $a4, $a4, %pc_lo12(initial_Bframes) + div.w $a2, $a2, $a3 addi.d $a2, $a2, -1 stptr.w $a2, $a1, 2096 - sub.w $a2, $a2, $a3 + sub.w $a2, $a2, $a4 slli.d $a3, $a2, 1 lu12i.w $a5, 3 ori $a4, $a5, 3028 @@ -851,8 +850,8 @@ main: # @main .LBB1_96: # %.thread102 # in Loop: Header=BB1_53 Depth=1 st.w $a2, $a0, 20 - ldptr.w $a4, $a1, 2096 - bnez $a4, .LBB1_91 + ldptr.w $a3, $a1, 2096 + bnez $a3, .LBB1_91 .LBB1_97: # %.thread102._crit_edge # in Loop: Header=BB1_53 Depth=1 ld.d $a0, $s5, %pc_lo12(img) @@ -992,7 +991,7 @@ main: # @main ld.d $a3, $sp, 112 # 8-byte Folded Reload add.d $a1, $a1, $a3 addi.d $a2, $a2, 4 - ld.d $a3, $sp, 136 # 8-byte Folded Reload + ld.d $a3, $sp, 128 # 8-byte Folded Reload sll.w $a2, $a3, $a2 andn $a1, $a1, $a2 stptr.w $a1, $a0, 15332 @@ -1036,7 +1035,7 @@ main: # @main jirl $ra, $ra, 0 ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(p_dec) - ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload beq $a0, $a1, .LBB1_132 # %bb.131: pcaddu18i $ra, %call36(close) @@ -1176,6 +1175,7 @@ main: # @main pcaddu18i $ra, %call36(FreeParameterSets) jirl $ra, $ra, 0 move $a0, $zero + fld.d $fs0, $sp, 176 # 8-byte Folded Reload ld.d $s8, $sp, 184 # 8-byte Folded Reload ld.d $s7, $sp, 192 # 8-byte Folded Reload ld.d $s6, $sp, 200 # 8-byte Folded Reload @@ -3811,16 +3811,7 @@ process_2nd_IGOP: # @process_2nd_IGOP .Lfunc_end13: .size process_2nd_IGOP, .Lfunc_end13-process_2nd_IGOP # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function report -.LCPI14_0: - .word 0x447a0000 # float 1000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI14_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 - .text - .globl report + .globl report # -- Begin function report .p2align 5 .type report,@function report: # @report @@ -4745,11 +4736,11 @@ report: # @report ld.d $a1, $sp, 344 # 8-byte Folded Reload add.d $s4, $s2, $a1 add.d $s2, $s0, $s8 - pcalau12i $a1, %pc_hi20(.LCPI14_0) - fld.s $fs0, $a1, %pc_lo12(.LCPI14_0) fst.s $fa1, $a0, 12 ld.d $a0, $s7, 0 fcvt.d.s $fa0, $fa0 + lu12i.w $a1, 280480 + movgr2fr.w $fs0, $a1 fdiv.s $fa1, $fa1, $fs0 fcvt.d.s $fa1, $fa1 movfr2gr.d $a3, $fa1 @@ -6658,16 +6649,19 @@ report: # @report fcvt.d.s $fa5, $fa5 add.d $a6, $a6, $a7 add.d $t0, $a2, $a5 - addi.w $t1, $t0, 0 - div.d $a6, $a6, $t1 ld.d $a1, $a1, 736 ld.d $t1, $s2, %pc_lo12(tot_time) - pcalau12i $t2, %pc_hi20(.LCPI14_1) - fld.d $fa6, $t2, %pc_lo12(.LCPI14_1) + addi.w $t2, $t0, 0 + div.d $a6, $a6, $t2 div.d $a1, $a1, $a5 - movgr2fr.d $fa7, $t1 - ffint.d.l $fa7, $fa7 - fmul.d $fa6, $fa7, $fa6 + movgr2fr.d $fa6, $t1 + ffint.d.l $fa6, $fa6 + lu12i.w $a5, -184550 + ori $a5, $a5, 2556 + lu32i.d $a5, 25165 + lu52i.d $a5, $a5, 1013 + movgr2fr.d $fa7, $a5 + fmul.d $fa6, $fa6, $fa7 movgr2fr.w $fa7, $t0 ffint.d.w $fa7, $fa7 fdiv.d $fa6, $fa6, $fa7 @@ -6694,27 +6688,30 @@ report: # @report ld.w $a3, $a4, 12 fld.s $fa0, $a5, 12 fld.s $fa1, $a5, 16 - fld.s $fa2, $a5, 20 ld.w $a4, $a4, 16 + fld.s $fa2, $a5, 20 fcvt.d.s $fa0, $fa0 fcvt.d.s $fa1, $fa1 + fld.s $fa3, $a5, 84 fcvt.d.s $fa2, $fa2 ld.d $a6, $a1, 720 - fld.s $fa3, $a5, 84 fld.s $fa4, $a5, 88 + fcvt.d.s $fa3, $fa3 fld.s $fa5, $a5, 92 ld.d $a1, $a1, 16 - fcvt.d.s $fa3, $fa3 fcvt.d.s $fa4, $fa4 + ld.d $a5, $s2, %pc_lo12(tot_time) fcvt.d.s $fa5, $fa5 add.d $a1, $a1, $a6 - ld.d $a5, $s2, %pc_lo12(tot_time) - pcalau12i $a7, %pc_hi20(.LCPI14_1) - fld.d $fa6, $a7, %pc_lo12(.LCPI14_1) div.d $a1, $a1, $a2 + movgr2fr.d $fa6, $a5 + ffint.d.l $fa6, $fa6 + lu12i.w $a5, -184550 + ori $a5, $a5, 2556 + lu32i.d $a5, 25165 + lu52i.d $a5, $a5, 1013 movgr2fr.d $fa7, $a5 - ffint.d.l $fa7, $fa7 - fmul.d $fa6, $fa7, $fa6 + fmul.d $fa6, $fa6, $fa7 movgr2fr.w $fa7, $a2 ffint.d.w $fa7, $fa7 fdiv.d $fa6, $fa6, $fa7 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/md_highfast.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/md_highfast.s index 332b2d52..0008c8cc 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/md_highfast.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/md_highfast.s @@ -1,12 +1,6 @@ .file "md_highfast.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function encode_one_macroblock_highfast -.LCPI0_0: - .dword 0x7fefffffffffffff # double 1.7976931348623157E+308 -.LCPI0_1: - .dword 0x46293e5939a08cea # double 1.0E+30 .text - .globl encode_one_macroblock_highfast + .globl encode_one_macroblock_highfast # -- Begin function encode_one_macroblock_highfast .p2align 5 .type encode_one_macroblock_highfast,@function encode_one_macroblock_highfast: # @encode_one_macroblock_highfast @@ -65,7 +59,7 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast .LBB0_6: ld.w $a0, $a1, 12 ldptr.d $s0, $a1, 14224 - addi.d $s4, $s8, -1 + addi.d $s5, $s8, -1 ori $s2, $zero, 536 mul.d $s1, $a0, $s2 pcaddu18i $ra, %call36(FmoGetPreviousMBNr) @@ -77,17 +71,17 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ldptr.d $a2, $a1, 14224 mul.d $a0, $a0, $s2 add.d $a0, $a2, $a0 - st.d $a0, $sp, 48 # 8-byte Folded Spill + st.d $a0, $sp, 56 # 8-byte Folded Spill b .LBB0_9 .LBB0_8: - st.d $zero, $sp, 48 # 8-byte Folded Spill + st.d $zero, $sp, 56 # 8-byte Folded Spill .LBB0_9: ldptr.d $a0, $a1, 14384 ld.d $a0, $a0, 0 ld.d $a0, $a0, 0 ld.d $a0, $a0, 0 ld.d $a0, $a0, 0 - sltui $a1, $s4, 1 + sltui $a1, $s5, 1 st.d $a1, $sp, 152 # 8-byte Folded Spill add.d $s3, $s0, $s1 ld.d $s0, $a0, 0 @@ -100,7 +94,6 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $a0, $a0, 0 ldptr.w $a0, $a0, 5244 ori $a1, $zero, 2 - st.d $s4, $sp, 56 # 8-byte Folded Spill beq $a0, $a1, .LBB0_12 # %bb.10: ori $a1, $zero, 1 @@ -133,14 +126,15 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $a0, $a0, 0 pcaddu18i $ra, %call36(store_coding_state) jirl $ra, $ra, 0 - move $t7, $fp - pcalau12i $fp, %pc_hi20(.LCPI0_0) + addi.w $a0, $zero, -1 + lu52i.d $s2, $a0, 2046 st.d $s3, $sp, 144 # 8-byte Folded Spill - st.d $t7, $sp, 112 # 8-byte Folded Spill - beqz $t7, .LBB0_16 + st.d $fp, $sp, 112 # 8-byte Folded Spill + beqz $fp, .LBB0_16 # %bb.14: - fld.d $fs1, $fp, %pc_lo12(.LCPI0_0) + move $t7, $fp move $a3, $zero + movgr2fr.d $fs1, $s2 ld.d $t6, $sp, 160 # 8-byte Folded Reload ld.hu $a0, $sp, 290 beqz $a0, .LBB0_84 @@ -148,17 +142,17 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.w $a0, $s3, 72 ori $a1, $zero, 13 lu12i.w $fp, 2 - bgeu $a1, $a0, .LBB0_172 - b .LBB0_173 + bgeu $a1, $a0, .LBB0_171 + b .LBB0_172 .LBB0_16: - st.d $s0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(best_mode) ld.d $a0, $a0, %got_pc_lo12(best_mode) - ori $s0, $zero, 1 + ori $fp, $zero, 1 st.d $a0, $sp, 96 # 8-byte Folded Spill - st.h $s0, $a0, 0 + st.h $fp, $a0, 0 lu12i.w $s1, 236040 - bne $s8, $s0, .LBB0_19 + bne $s8, $fp, .LBB0_19 # %bb.17: pcaddu18i $ra, %call36(Get_Direct_Motion_Vectors) jirl $ra, $ra, 0 @@ -188,27 +182,29 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast pcaddu18i $ra, %call36(compute_mode_RD_cost) jirl $ra, $ra, 0 .LBB0_19: + st.d $s0, $sp, 32 # 8-byte Folded Spill + st.d $s5, $sp, 48 # 8-byte Folded Spill st.d $s4, $sp, 64 # 8-byte Folded Spill ld.d $t6, $sp, 160 # 8-byte Folded Reload ld.d $a0, $t6, 0 ldptr.w $a0, $a0, 4172 - bne $a0, $s0, .LBB0_21 + bne $a0, $fp, .LBB0_21 # %bb.20: pcaddu18i $ra, %call36(get_initial_mb16x16_cost) jirl $ra, $ra, 0 ld.d $t6, $sp, 160 # 8-byte Folded Reload .LBB0_21: - fld.d $fs1, $fp, %pc_lo12(.LCPI0_0) ori $fp, $zero, 1 lu12i.w $a0, 524287 ori $s0, $a0, 4095 + movgr2fr.d $fs1, $s2 pcalau12i $a0, %got_pc_hi20(bi_pred_me) ld.d $a0, $a0, %got_pc_lo12(bi_pred_me) st.d $a0, $sp, 136 # 8-byte Folded Spill lu12i.w $a0, 3 - ori $s5, $a0, 2120 + ori $s6, $a0, 2120 pcalau12i $a0, %got_pc_hi20(lambda_mf_factor) - ld.d $s6, $a0, %got_pc_lo12(lambda_mf_factor) + ld.d $s5, $a0, %got_pc_lo12(lambda_mf_factor) st.d $zero, $sp, 80 # 8-byte Folded Spill move $a0, $s0 bstrins.d $a0, $s0, 62, 32 @@ -221,7 +217,7 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ori $s1, $a0, 257 ori $s3, $zero, 1 move $s4, $s0 - st.d $s5, $sp, 88 # 8-byte Folded Spill + st.d $s6, $sp, 88 # 8-byte Folded Spill b .LBB0_24 .LBB0_22: # in Loop: Header=BB0_24 Depth=1 move $s4, $a0 @@ -240,7 +236,7 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $a2, $sp, 136 # 8-byte Folded Reload st.h $zero, $a2, 0 alsl.d $a0, $s3, $a0, 1 - stx.h $zero, $a0, $s5 + stx.h $zero, $a0, $s6 beqz $a1, .LBB0_23 # %bb.25: # in Loop: Header=BB0_24 Depth=1 ld.hu $a0, $sp, 290 @@ -249,7 +245,7 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast st.d $s4, $sp, 104 # 8-byte Folded Spill move $s4, $zero st.w $zero, $sp, 316 - ori $s5, $zero, 1 + ori $s6, $zero, 1 b .LBB0_29 .p2align 4, , 16 .LBB0_27: # in Loop: Header=BB0_29 Depth=2 @@ -261,7 +257,7 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast pcaddu18i $ra, %call36(SetRefAndMotionVectors) jirl $ra, $ra, 0 .LBB0_28: # in Loop: Header=BB0_29 Depth=2 - move $s5, $zero + move $s6, $zero addi.d $a0, $s3, -1 sltu $a0, $zero, $a0 xori $a1, $s4, 1 @@ -277,7 +273,7 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.w $s7, $sp, 384 beqz $a0, .LBB0_36 # %bb.30: # in Loop: Header=BB0_29 Depth=2 - fld.d $fs0, $s6, 0 + fld.d $fs0, $s5, 0 fsqrt.d $fa0, $fs0 fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB0_51 @@ -473,7 +469,7 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast stx.b $a4, $a3, $a2 stx.b $a4, $a3, $s4 sltui $a2, $s3, 2 - orn $a2, $a2, $s5 + orn $a2, $a2, $s6 andi $a2, $a2, 1 bnez $a2, .LBB0_28 b .LBB0_27 @@ -499,7 +495,7 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast stx.b $a4, $a5, $a3 stx.b $a4, $a5, $a2 sltui $a2, $s3, 2 - orn $a2, $a2, $s5 + orn $a2, $a2, $s6 andi $a2, $a2, 1 beqz $a2, .LBB0_27 b .LBB0_28 @@ -522,7 +518,7 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast st.b $a1, $a2, 5 st.b $a1, $a2, 4 sltui $a2, $s3, 2 - orn $a2, $a2, $s5 + orn $a2, $a2, $s6 andi $a2, $a2, 1 bnez $a2, .LBB0_28 b .LBB0_27 @@ -546,7 +542,7 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast b .LBB0_35 .p2align 4, , 16 .LBB0_54: # in Loop: Header=BB0_24 Depth=1 - ld.d $s5, $sp, 88 # 8-byte Folded Reload + ld.d $s6, $sp, 88 # 8-byte Folded Reload bne $s3, $fp, .LBB0_69 # %bb.55: # in Loop: Header=BB0_24 Depth=1 ori $a0, $zero, 3 @@ -635,12 +631,12 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast slli.d $a1, $a2, 3 ldx.d $a0, $a0, $a1 ld.hu $a1, $a0, 0 - ld.d $a2, $sp, 40 # 8-byte Folded Reload + ld.d $a2, $sp, 32 # 8-byte Folded Reload ld.hu $a2, $a2, 0 bne $a1, $a2, .LBB0_68 # %bb.66: # in Loop: Header=BB0_24 Depth=1 ld.hu $a0, $a0, 2 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a1, $sp, 32 # 8-byte Folded Reload ld.hu $a1, $a1, 2 bne $a0, $a1, .LBB0_68 # %bb.67: # in Loop: Header=BB0_24 Depth=1 @@ -702,7 +698,7 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast move $s7, $t6 pcaddu18i $ra, %call36(store_coding_state) jirl $ra, $ra, 0 - addi.w $a0, $zero, -1 + ld.d $a0, $sp, 40 # 8-byte Folded Reload lu32i.d $a0, 0 ld.d $s6, $sp, 144 # 8-byte Folded Reload st.w $a0, $s6, 468 @@ -1105,6 +1101,7 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $s3, $sp, 144 # 8-byte Folded Reload ld.d $s4, $sp, 64 # 8-byte Folded Reload ld.d $t7, $sp, 112 # 8-byte Folded Reload + ld.d $s5, $sp, 48 # 8-byte Folded Reload ld.d $a3, $sp, 80 # 8-byte Folded Reload ld.hu $a0, $sp, 290 bnez $a0, .LBB0_15 @@ -1113,23 +1110,27 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $a0, $a0, 0 ld.w $a1, $a0, 20 ori $a2, $zero, 2 - st.d $s4, $sp, 64 # 8-byte Folded Spill bne $a1, $a2, .LBB0_86 # %bb.85: - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.d $fs1, $a1, %pc_lo12(.LCPI0_1) + lu12i.w $a1, 236040 + ori $a1, $a1, 3306 + lu32i.d $a1, -442791 + lu52i.d $a1, $a1, 1122 + movgr2fr.d $fs1, $a1 + ld.d $a1, $t6, 0 + fst.d $fs1, $sp, 344 + ori $s6, $zero, 9 + bnez $t7, .LBB0_88 b .LBB0_87 .LBB0_86: pcalau12i $a1, %got_pc_hi20(best_mode) ld.d $a1, $a1, %got_pc_lo12(best_mode) st.h $a3, $a1, 0 -.LBB0_87: ld.d $a1, $t6, 0 fst.d $fs1, $sp, 344 ori $s6, $zero, 9 - ld.d $fp, $sp, 56 # 8-byte Folded Reload - bnez $t7, .LBB0_89 -# %bb.88: + bnez $t7, .LBB0_88 +.LBB0_87: ldptr.w $a2, $a1, 5748 sltui $a2, $a2, 1 ori $a3, $zero, 5 @@ -1137,22 +1138,22 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ori $a4, $zero, 9 maskeqz $a2, $a4, $a2 or $s6, $a2, $a3 -.LBB0_89: +.LBB0_88: ldptr.w $a1, $a1, 2120 - beqz $a1, .LBB0_91 -# %bb.90: + beqz $a1, .LBB0_90 +# %bb.89: lu12i.w $a1, 3 ori $a1, $a1, 2122 stx.h $zero, $a0, $a1 -.LBB0_91: +.LBB0_90: ldptr.w $a0, $a0, 15536 move $a1, $zero - beqz $a0, .LBB0_95 -# %bb.92: + beqz $a0, .LBB0_94 +# %bb.91: ori $a0, $zero, 5 st.d $a1, $sp, 72 # 8-byte Folded Spill - beq $s6, $a0, .LBB0_98 -# %bb.93: + beq $s6, $a0, .LBB0_97 +# %bb.92: addi.d $a0, $sp, 180 addi.d $a1, $sp, 176 addi.d $a2, $sp, 172 @@ -1161,8 +1162,8 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $t6, $sp, 160 # 8-byte Folded Reload ld.d $a0, $t6, 0 ldptr.w $a0, $a0, 4176 - beqz $a0, .LBB0_96 -# %bb.94: + beqz $a0, .LBB0_95 +# %bb.93: addi.d $a0, $sp, 184 addi.d $a1, $sp, 352 ori $a2, $zero, 96 @@ -1173,22 +1174,23 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast jirl $ra, $ra, 0 ld.d $t6, $sp, 160 # 8-byte Folded Reload ld.h $a1, $s3, 416 + st.d $a1, $sp, 72 # 8-byte Folded Spill + b .LBB0_96 +.LBB0_94: st.d $a1, $sp, 72 # 8-byte Folded Spill b .LBB0_97 .LBB0_95: - st.d $a1, $sp, 72 # 8-byte Folded Spill - b .LBB0_98 -.LBB0_96: move $a1, $zero ori $a0, $zero, 3 st.d $a0, $sp, 72 # 8-byte Folded Spill -.LBB0_97: # %.lr.ph.preheader +.LBB0_96: # %.lr.ph.preheader ld.d $t7, $sp, 112 # 8-byte Folded Reload -.LBB0_98: # %.lr.ph.preheader +.LBB0_97: # %.lr.ph.preheader + st.d $s4, $sp, 64 # 8-byte Folded Spill st.w $a1, $s3, 416 ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a0, $a0, 0 - sltu $t8, $zero, $fp + sltu $t8, $zero, $s5 pcalau12i $a2, %got_pc_hi20(mb_mode_table) ld.d $s5, $a2, %got_pc_lo12(mb_mode_table) ori $s0, $zero, 1 @@ -1212,154 +1214,154 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast pcalau12i $a2, %pc_hi20(wbp_weight) st.d $a2, $sp, 80 # 8-byte Folded Spill move $s7, $t8 - b .LBB0_101 + b .LBB0_100 .p2align 4, , 16 -.LBB0_99: # %.loopexit503.loopexit - # in Loop: Header=BB0_101 Depth=1 +.LBB0_98: # %.loopexit503.loopexit + # in Loop: Header=BB0_100 Depth=1 ld.d $s3, $sp, 144 # 8-byte Folded Reload ld.w $a2, $s3, 416 -.LBB0_100: # %.loopexit503 - # in Loop: Header=BB0_101 Depth=1 +.LBB0_99: # %.loopexit503 + # in Loop: Header=BB0_100 Depth=1 addi.w $a1, $a2, 1 st.w $a1, $s3, 416 ld.d $a3, $sp, 72 # 8-byte Folded Reload - bge $a2, $a3, .LBB0_167 -.LBB0_101: # %.lr.ph + bge $a2, $a3, .LBB0_166 +.LBB0_100: # %.lr.ph # =>This Loop Header: Depth=1 - # Child Loop BB0_118 Depth 2 - # Child Loop BB0_137 Depth 3 - # Child Loop BB0_142 Depth 3 - # Child Loop BB0_147 Depth 3 - # Child Loop BB0_152 Depth 3 + # Child Loop BB0_117 Depth 2 + # Child Loop BB0_136 Depth 3 + # Child Loop BB0_141 Depth 3 + # Child Loop BB0_146 Depth 3 + # Child Loop BB0_151 Depth 3 ldptr.w $a2, $a0, 15536 - beqz $a2, .LBB0_115 -# %bb.102: # in Loop: Header=BB0_101 Depth=1 + beqz $a2, .LBB0_114 +# %bb.101: # in Loop: Header=BB0_100 Depth=1 ld.d $a2, $t6, 0 - beqz $t7, .LBB0_104 -# %bb.103: # in Loop: Header=BB0_101 Depth=1 + beqz $t7, .LBB0_103 +# %bb.102: # in Loop: Header=BB0_100 Depth=1 ldptr.w $a3, $a2, 4048 - bnez $a3, .LBB0_107 -.LBB0_104: # in Loop: Header=BB0_101 Depth=1 + bnez $a3, .LBB0_106 +.LBB0_103: # in Loop: Header=BB0_100 Depth=1 ldptr.w $a2, $a2, 4072 - bne $a2, $s0, .LBB0_107 -# %bb.105: # in Loop: Header=BB0_101 Depth=1 - beqz $a1, .LBB0_115 -# %bb.106: # in Loop: Header=BB0_101 Depth=1 + bne $a2, $s0, .LBB0_106 +# %bb.104: # in Loop: Header=BB0_100 Depth=1 + beqz $a1, .LBB0_114 +# %bb.105: # in Loop: Header=BB0_100 Depth=1 move $a2, $a1 - b .LBB0_100 + b .LBB0_99 .p2align 4, , 16 -.LBB0_107: # in Loop: Header=BB0_101 Depth=1 +.LBB0_106: # in Loop: Header=BB0_100 Depth=1 ld.w $a3, $sp, 180 ori $a2, $zero, 2 - bne $a1, $a2, .LBB0_109 -# %bb.108: # in Loop: Header=BB0_101 Depth=1 - beqz $a3, .LBB0_100 -.LBB0_109: # in Loop: Header=BB0_101 Depth=1 + bne $a1, $a2, .LBB0_108 +# %bb.107: # in Loop: Header=BB0_100 Depth=1 + beqz $a3, .LBB0_99 +.LBB0_108: # in Loop: Header=BB0_100 Depth=1 ld.w $a4, $sp, 176 ori $a2, $zero, 1 - bne $a1, $a2, .LBB0_111 -# %bb.110: # in Loop: Header=BB0_101 Depth=1 - beqz $a4, .LBB0_100 -.LBB0_111: # in Loop: Header=BB0_101 Depth=1 + bne $a1, $a2, .LBB0_110 +# %bb.109: # in Loop: Header=BB0_100 Depth=1 + beqz $a4, .LBB0_99 +.LBB0_110: # in Loop: Header=BB0_100 Depth=1 ori $a2, $zero, 3 - bne $a1, $a2, .LBB0_115 -# %bb.112: # in Loop: Header=BB0_101 Depth=1 + bne $a1, $a2, .LBB0_114 +# %bb.111: # in Loop: Header=BB0_100 Depth=1 ori $a2, $zero, 3 - beqz $a4, .LBB0_100 -# %bb.113: # in Loop: Header=BB0_101 Depth=1 - beqz $a3, .LBB0_100 -# %bb.114: # in Loop: Header=BB0_101 Depth=1 + beqz $a4, .LBB0_99 +# %bb.112: # in Loop: Header=BB0_100 Depth=1 + beqz $a3, .LBB0_99 +# %bb.113: # in Loop: Header=BB0_100 Depth=1 ld.w $a1, $sp, 172 - beqz $a1, .LBB0_100 + beqz $a1, .LBB0_99 .p2align 4, , 16 -.LBB0_115: # %.thread489.preheader - # in Loop: Header=BB0_101 Depth=1 +.LBB0_114: # %.thread489.preheader + # in Loop: Header=BB0_100 Depth=1 move $fp, $zero move $s3, $zero - b .LBB0_118 -.LBB0_116: # in Loop: Header=BB0_118 Depth=2 + b .LBB0_117 +.LBB0_115: # in Loop: Header=BB0_117 Depth=2 add.d $a1, $a1, $a3 addi.d $a2, $a2, 1 st.h $a2, $a1, 0 .p2align 4, , 16 -.LBB0_117: # %.critedge483.thread - # in Loop: Header=BB0_118 Depth=2 +.LBB0_116: # %.critedge483.thread + # in Loop: Header=BB0_117 Depth=2 addi.w $fp, $fp, 1 - bge $fp, $s6, .LBB0_99 -.LBB0_118: # %.thread489 - # Parent Loop BB0_101 Depth=1 + bge $fp, $s6, .LBB0_98 +.LBB0_117: # %.thread489 + # Parent Loop BB0_100 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_137 Depth 3 - # Child Loop BB0_142 Depth 3 - # Child Loop BB0_147 Depth 3 - # Child Loop BB0_152 Depth 3 + # Child Loop BB0_136 Depth 3 + # Child Loop BB0_141 Depth 3 + # Child Loop BB0_146 Depth 3 + # Child Loop BB0_151 Depth 3 slli.d $a1, $fp, 2 ldptr.w $a2, $a0, 15536 ldx.w $s2, $s5, $a1 - beqz $a2, .LBB0_121 -# %bb.119: # in Loop: Header=BB0_118 Depth=2 + beqz $a2, .LBB0_120 +# %bb.118: # in Loop: Header=BB0_117 Depth=2 sltu $a1, $zero, $s2 or $a1, $t8, $a1 - beqz $a1, .LBB0_117 -# %bb.120: # in Loop: Header=BB0_118 Depth=2 + beqz $a1, .LBB0_116 +# %bb.119: # in Loop: Header=BB0_117 Depth=2 addi.d $a1, $s8, -2 sltui $a1, $a1, 1 addi.d $a2, $s2, -1 sltu $a2, $zero, $a2 or $a1, $a1, $a2 - beqz $a1, .LBB0_117 -.LBB0_121: # in Loop: Header=BB0_118 Depth=2 + beqz $a1, .LBB0_116 +.LBB0_120: # in Loop: Header=BB0_117 Depth=2 ld.d $a1, $t6, 0 - bne $s8, $s0, .LBB0_127 -# %bb.122: # in Loop: Header=BB0_118 Depth=2 - bne $s2, $s0, .LBB0_127 -# %bb.123: # in Loop: Header=BB0_118 Depth=2 + bne $s8, $s0, .LBB0_126 +# %bb.121: # in Loop: Header=BB0_117 Depth=2 + bne $s2, $s0, .LBB0_126 +# %bb.122: # in Loop: Header=BB0_117 Depth=2 ldptr.w $a2, $a1, 2120 andi $a3, $s3, 255 ld.d $a4, $sp, 120 # 8-byte Folded Reload mul.d $a3, $a3, $a4 st.w $a3, $s4, 4 - beqz $a2, .LBB0_126 -# %bb.124: # in Loop: Header=BB0_118 Depth=2 - bne $s3, $s1, .LBB0_126 -# %bb.125: # in Loop: Header=BB0_118 Depth=2 + beqz $a2, .LBB0_125 +# %bb.123: # in Loop: Header=BB0_117 Depth=2 + bne $s3, $s1, .LBB0_125 +# %bb.124: # in Loop: Header=BB0_117 Depth=2 ld.d $a2, $sp, 96 # 8-byte Folded Reload ldx.h $a2, $a0, $a2 slti $a2, $a2, 2 sub.w $s3, $s1, $a2 -.LBB0_126: # in Loop: Header=BB0_118 Depth=2 +.LBB0_125: # in Loop: Header=BB0_117 Depth=2 slti $a2, $s3, 2 sub.d $fp, $fp, $a2 addi.w $s3, $s3, 1 -.LBB0_127: # in Loop: Header=BB0_118 Depth=2 - bnez $t7, .LBB0_132 -# %bb.128: # in Loop: Header=BB0_118 Depth=2 +.LBB0_126: # in Loop: Header=BB0_117 Depth=2 + bnez $t7, .LBB0_131 +# %bb.127: # in Loop: Header=BB0_117 Depth=2 ldx.w $a2, $a1, $ra - beqz $a2, .LBB0_132 -# %bb.129: # in Loop: Header=BB0_118 Depth=2 + beqz $a2, .LBB0_131 +# %bb.128: # in Loop: Header=BB0_117 Depth=2 ori $a2, $zero, 10 - blt $s2, $a2, .LBB0_132 -# %bb.130: # in Loop: Header=BB0_118 Depth=2 + blt $s2, $a2, .LBB0_131 +# %bb.129: # in Loop: Header=BB0_117 Depth=2 ld.d $a2, $sp, 136 # 8-byte Folded Reload ld.h $a2, $a2, 0 ori $a3, $zero, 3 - blt $a3, $a2, .LBB0_132 -# %bb.131: # in Loop: Header=BB0_118 Depth=2 + blt $a3, $a2, .LBB0_131 +# %bb.130: # in Loop: Header=BB0_117 Depth=2 ld.d $a2, $sp, 144 # 8-byte Folded Reload ld.w $a2, $a2, 364 - beqz $a2, .LBB0_117 -.LBB0_132: # in Loop: Header=BB0_118 Depth=2 - bne $s8, $s0, .LBB0_155 -# %bb.133: # in Loop: Header=BB0_118 Depth=2 + beqz $a2, .LBB0_116 +.LBB0_131: # in Loop: Header=BB0_117 Depth=2 + bne $s8, $s0, .LBB0_154 +# %bb.132: # in Loop: Header=BB0_117 Depth=2 ld.d $a2, $sp, 104 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(active_pps) ld.w $a2, $a2, 196 - bne $a2, $s0, .LBB0_155 -# %bb.134: # in Loop: Header=BB0_118 Depth=2 + bne $a2, $s0, .LBB0_154 +# %bb.133: # in Loop: Header=BB0_117 Depth=2 ori $a2, $zero, 7 - blt $a2, $s2, .LBB0_155 -# %bb.135: # %.preheader501 - # in Loop: Header=BB0_118 Depth=2 + blt $a2, $s2, .LBB0_154 +# %bb.134: # %.preheader501 + # in Loop: Header=BB0_117 Depth=2 ld.d $a2, $sp, 88 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(active_sps) ld.d $a3, $sp, 80 # 8-byte Folded Reload @@ -1372,9 +1374,9 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $a6, $a6, %got_pc_lo12(best8x8bwref) ldx.bu $a7, $s4, $a4 alsl.d $a4, $s2, $a6, 2 - bne $a7, $s1, .LBB0_140 -# %bb.136: # %.preheader - # in Loop: Header=BB0_118 Depth=2 + bne $a7, $s1, .LBB0_139 +# %bb.135: # %.preheader + # in Loop: Header=BB0_117 Depth=2 ld.b $a6, $a5, 0 ld.d $a7, $a3, 0 ld.d $t0, $a3, 8 @@ -1388,30 +1390,30 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ldx.d $t0, $t0, $t1 move $t1, $zero .p2align 4, , 16 -.LBB0_137: # Parent Loop BB0_101 Depth=1 - # Parent Loop BB0_118 Depth=2 +.LBB0_136: # Parent Loop BB0_100 Depth=1 + # Parent Loop BB0_117 Depth=2 # => This Inner Loop Header: Depth=3 ld.w $t2, $a7, 0 ld.w $t3, $t0, 0 add.d $t2, $t2, $t3 addi.w $t3, $t2, -128 addi.w $t4, $zero, -256 - bltu $t3, $t4, .LBB0_163 -# %bb.138: # in Loop: Header=BB0_137 Depth=3 + bltu $t3, $t4, .LBB0_162 +# %bb.137: # in Loop: Header=BB0_136 Depth=3 move $t2, $t1 - beqz $a6, .LBB0_140 -# %bb.139: # in Loop: Header=BB0_137 Depth=3 + beqz $a6, .LBB0_139 +# %bb.138: # in Loop: Header=BB0_136 Depth=3 addi.d $t1, $t2, 1 addi.d $t0, $t0, 4 addi.d $a7, $a7, 4 - bgeu $s0, $t2, .LBB0_137 -.LBB0_140: # %.critedge - # in Loop: Header=BB0_118 Depth=2 + bgeu $s0, $t2, .LBB0_136 +.LBB0_139: # %.critedge + # in Loop: Header=BB0_117 Depth=2 alsl.d $a6, $s2, $s4, 2 ld.bu $a7, $a6, 1 - bne $a7, $s1, .LBB0_145 -# %bb.141: # %.preheader.1 - # in Loop: Header=BB0_118 Depth=2 + bne $a7, $s1, .LBB0_144 +# %bb.140: # %.preheader.1 + # in Loop: Header=BB0_117 Depth=2 ld.b $a7, $a5, 1 ld.d $t0, $a3, 0 ld.d $t1, $a3, 8 @@ -1425,29 +1427,29 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ldx.d $t1, $t1, $t2 move $t2, $zero .p2align 4, , 16 -.LBB0_142: # Parent Loop BB0_101 Depth=1 - # Parent Loop BB0_118 Depth=2 +.LBB0_141: # Parent Loop BB0_100 Depth=1 + # Parent Loop BB0_117 Depth=2 # => This Inner Loop Header: Depth=3 ld.w $t3, $t0, 0 ld.w $t4, $t1, 0 add.d $t3, $t3, $t4 addi.w $t4, $t3, -128 addi.w $t5, $zero, -256 - bltu $t4, $t5, .LBB0_163 -# %bb.143: # in Loop: Header=BB0_142 Depth=3 - beqz $a7, .LBB0_145 -# %bb.144: # in Loop: Header=BB0_142 Depth=3 + bltu $t4, $t5, .LBB0_162 +# %bb.142: # in Loop: Header=BB0_141 Depth=3 + beqz $a7, .LBB0_144 +# %bb.143: # in Loop: Header=BB0_141 Depth=3 move $t3, $t2 addi.d $t2, $t2, 1 addi.d $t1, $t1, 4 addi.d $t0, $t0, 4 - bltu $t3, $s1, .LBB0_142 -.LBB0_145: # %.critedge.1 - # in Loop: Header=BB0_118 Depth=2 + bltu $t3, $s1, .LBB0_141 +.LBB0_144: # %.critedge.1 + # in Loop: Header=BB0_117 Depth=2 ld.bu $a7, $a6, 2 - bne $a7, $s1, .LBB0_150 -# %bb.146: # %.preheader.2 - # in Loop: Header=BB0_118 Depth=2 + bne $a7, $s1, .LBB0_149 +# %bb.145: # %.preheader.2 + # in Loop: Header=BB0_117 Depth=2 ld.b $a7, $a5, 2 ld.d $t0, $a3, 0 ld.d $t1, $a3, 8 @@ -1461,29 +1463,29 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ldx.d $t1, $t1, $t2 move $t2, $zero .p2align 4, , 16 -.LBB0_147: # Parent Loop BB0_101 Depth=1 - # Parent Loop BB0_118 Depth=2 +.LBB0_146: # Parent Loop BB0_100 Depth=1 + # Parent Loop BB0_117 Depth=2 # => This Inner Loop Header: Depth=3 ld.w $t3, $t0, 0 ld.w $t4, $t1, 0 add.d $t3, $t3, $t4 addi.w $t4, $t3, -128 addi.w $t5, $zero, -256 - bltu $t4, $t5, .LBB0_163 -# %bb.148: # in Loop: Header=BB0_147 Depth=3 - beqz $a7, .LBB0_150 -# %bb.149: # in Loop: Header=BB0_147 Depth=3 + bltu $t4, $t5, .LBB0_162 +# %bb.147: # in Loop: Header=BB0_146 Depth=3 + beqz $a7, .LBB0_149 +# %bb.148: # in Loop: Header=BB0_146 Depth=3 move $t3, $t2 addi.d $t2, $t2, 1 addi.d $t1, $t1, 4 addi.d $t0, $t0, 4 - bltu $t3, $s1, .LBB0_147 -.LBB0_150: # %.critedge.2 - # in Loop: Header=BB0_118 Depth=2 + bltu $t3, $s1, .LBB0_146 +.LBB0_149: # %.critedge.2 + # in Loop: Header=BB0_117 Depth=2 ld.bu $a6, $a6, 3 - bne $a6, $s1, .LBB0_155 -# %bb.151: # %.preheader.3 - # in Loop: Header=BB0_118 Depth=2 + bne $a6, $s1, .LBB0_154 +# %bb.150: # %.preheader.3 + # in Loop: Header=BB0_117 Depth=2 ld.b $a5, $a5, 3 ld.d $a6, $a3, 0 ld.d $a3, $a3, 8 @@ -1497,31 +1499,31 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ldx.d $a4, $a5, $a4 move $a5, $zero .p2align 4, , 16 -.LBB0_152: # Parent Loop BB0_101 Depth=1 - # Parent Loop BB0_118 Depth=2 +.LBB0_151: # Parent Loop BB0_100 Depth=1 + # Parent Loop BB0_117 Depth=2 # => This Inner Loop Header: Depth=3 ld.w $a6, $a3, 0 ld.w $a7, $a4, 0 add.d $a6, $a6, $a7 addi.w $a7, $a6, -128 addi.w $t0, $zero, -256 - bltu $a7, $t0, .LBB0_163 -# %bb.153: # in Loop: Header=BB0_152 Depth=3 - beqz $a2, .LBB0_155 -# %bb.154: # in Loop: Header=BB0_152 Depth=3 + bltu $a7, $t0, .LBB0_162 +# %bb.152: # in Loop: Header=BB0_151 Depth=3 + beqz $a2, .LBB0_154 +# %bb.153: # in Loop: Header=BB0_151 Depth=3 move $a6, $a5 addi.d $a5, $a5, 1 addi.d $a4, $a4, 4 addi.d $a3, $a3, 4 - bltu $a6, $s1, .LBB0_152 + bltu $a6, $s1, .LBB0_151 .p2align 4, , 16 -.LBB0_155: # %.critedge483 - # in Loop: Header=BB0_118 Depth=2 +.LBB0_154: # %.critedge483 + # in Loop: Header=BB0_117 Depth=2 slli.d $a2, $s2, 1 addi.d $a3, $sp, 396 ldx.hu $a2, $a3, $a2 - beqz $a2, .LBB0_157 -# %bb.156: # in Loop: Header=BB0_118 Depth=2 + beqz $a2, .LBB0_156 +# %bb.155: # in Loop: Header=BB0_117 Depth=2 addi.d $a0, $sp, 184 addi.d $a1, $sp, 352 ori $a2, $zero, 96 @@ -1544,57 +1546,57 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a0, $a0, 0 ld.d $a1, $t6, 0 -.LBB0_157: # in Loop: Header=BB0_118 Depth=2 - bne $s8, $s0, .LBB0_117 -# %bb.158: # in Loop: Header=BB0_118 Depth=2 +.LBB0_156: # in Loop: Header=BB0_117 Depth=2 + bne $s8, $s0, .LBB0_116 +# %bb.157: # in Loop: Header=BB0_117 Depth=2 ori $a2, $zero, 2120 ldx.w $a1, $a1, $a2 - beqz $a1, .LBB0_117 -# %bb.159: # in Loop: Header=BB0_118 Depth=2 - bne $s3, $s1, .LBB0_117 -# %bb.160: # in Loop: Header=BB0_118 Depth=2 + beqz $a1, .LBB0_116 +# %bb.158: # in Loop: Header=BB0_117 Depth=2 + bne $s3, $s1, .LBB0_116 +# %bb.159: # in Loop: Header=BB0_117 Depth=2 ori $s3, $zero, 2 - bne $s2, $s0, .LBB0_117 -# %bb.161: # in Loop: Header=BB0_118 Depth=2 + bne $s2, $s0, .LBB0_116 +# %bb.160: # in Loop: Header=BB0_117 Depth=2 alsl.d $a1, $s2, $a0, 1 lu12i.w $a2, 3 ori $a3, $a2, 2120 ldx.h $a2, $a1, $a3 - blt $s0, $a2, .LBB0_117 -# %bb.162: # in Loop: Header=BB0_118 Depth=2 + blt $s0, $a2, .LBB0_116 +# %bb.161: # in Loop: Header=BB0_117 Depth=2 ld.bu $a4, $s4, 4 - beq $a4, $s1, .LBB0_116 - b .LBB0_117 -.LBB0_163: # %.loopexit552 - # in Loop: Header=BB0_118 Depth=2 + beq $a4, $s1, .LBB0_115 + b .LBB0_116 +.LBB0_162: # %.loopexit552 + # in Loop: Header=BB0_117 Depth=2 ldptr.w $a1, $a1, 2120 - beqz $a1, .LBB0_117 -# %bb.164: # %.loopexit552 - # in Loop: Header=BB0_118 Depth=2 - bne $s3, $s1, .LBB0_117 -# %bb.165: # in Loop: Header=BB0_118 Depth=2 + beqz $a1, .LBB0_116 +# %bb.163: # %.loopexit552 + # in Loop: Header=BB0_117 Depth=2 + bne $s3, $s1, .LBB0_116 +# %bb.164: # in Loop: Header=BB0_117 Depth=2 ori $s3, $zero, 2 - bne $s2, $s0, .LBB0_117 -# %bb.166: # in Loop: Header=BB0_118 Depth=2 + bne $s2, $s0, .LBB0_116 +# %bb.165: # in Loop: Header=BB0_117 Depth=2 alsl.d $a1, $s2, $a0, 1 lu12i.w $a2, 3 ori $a3, $a2, 2120 ldx.h $a2, $a1, $a3 - bge $s0, $a2, .LBB0_116 - b .LBB0_117 -.LBB0_167: # %._crit_edge + bge $s0, $a2, .LBB0_115 + b .LBB0_116 +.LBB0_166: # %._crit_edge ld.w $a0, $a0, 20 ori $a1, $zero, 2 - beq $a0, $a1, .LBB0_171 -# %bb.168: + beq $a0, $a1, .LBB0_170 +# %bb.167: ld.d $a0, $t6, 0 ldptr.w $a1, $a0, 5748 - beqz $a1, .LBB0_171 -# %bb.169: + beqz $a1, .LBB0_170 +# %bb.168: ld.w $a0, $a0, 0 ori $a1, $zero, 99 - blt $a1, $a0, .LBB0_171 -# %bb.170: + blt $a1, $a0, .LBB0_170 +# %bb.169: fld.d $fa0, $sp, 280 addi.d $a0, $sp, 288 pcaddu18i $ra, %call36(fast_mode_intra_decision) @@ -1602,25 +1604,25 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $a6, $sp, 112 # 8-byte Folded Reload ld.d $t6, $sp, 160 # 8-byte Folded Reload ld.hu $a0, $sp, 288 - beqz $a0, .LBB0_207 -.LBB0_171: # %.loopexit504 + beqz $a0, .LBB0_206 +.LBB0_170: # %.loopexit504 ld.d $s3, $sp, 144 # 8-byte Folded Reload ld.d $s4, $sp, 64 # 8-byte Folded Reload ld.w $a0, $s3, 72 ori $a1, $zero, 13 lu12i.w $fp, 2 - bltu $a1, $a0, .LBB0_173 -.LBB0_172: + bltu $a1, $a0, .LBB0_172 +.LBB0_171: ori $a1, $zero, 1 sll.d $a1, $a1, $a0 ori $a2, $fp, 1536 and $a1, $a1, $a2 ori $s1, $zero, 1 - bnez $a1, .LBB0_174 -.LBB0_173: + bnez $a1, .LBB0_173 +.LBB0_172: addi.d $a0, $a0, -14 sltui $s1, $a0, 1 -.LBB0_174: +.LBB0_173: pcalau12i $a0, %got_pc_hi20(cbp) ld.d $a0, $a0, %got_pc_lo12(cbp) ld.w $a1, $a0, 0 @@ -1632,21 +1634,21 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast addi.d $a3, $a0, -10 sltui $a3, $a3, 1 or $a2, $a2, $a3 - beqz $a2, .LBB0_176 -# %bb.175: + beqz $a2, .LBB0_175 +# %bb.174: ori $a3, $zero, 14 ori $a2, $zero, 1 - bne $a0, $a3, .LBB0_180 -.LBB0_176: - bnez $a1, .LBB0_178 -# %bb.177: + bne $a0, $a3, .LBB0_179 +.LBB0_175: + bnez $a1, .LBB0_177 +# %bb.176: ld.d $a1, $t6, 0 ldptr.w $a1, $a1, 5116 - beqz $a1, .LBB0_179 -.LBB0_178: + beqz $a1, .LBB0_178 +.LBB0_177: ori $a1, $zero, 14 - bne $a0, $a1, .LBB0_181 -.LBB0_179: + bne $a0, $a1, .LBB0_180 +.LBB0_178: ld.w $a0, $s3, 496 st.w $zero, $s3, 4 st.w $a0, $s3, 8 @@ -1658,22 +1660,22 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $a1, $a1, 0 move $a2, $zero st.w $a0, $a1, 36 -.LBB0_180: # %.sink.split +.LBB0_179: # %.sink.split st.w $a2, $s3, 504 -.LBB0_181: +.LBB0_180: pcaddu18i $ra, %call36(set_stored_macroblock_parameters) jirl $ra, $ra, 0 ld.d $a2, $sp, 160 # 8-byte Folded Reload ld.d $a0, $a2, 0 ldptr.w $a0, $a0, 5116 - beqz $a0, .LBB0_183 -# %bb.182: + beqz $a0, .LBB0_182 +# %bb.181: ld.h $a1, $s0, 0 move $a0, $s3 pcaddu18i $ra, %call36(update_rc) jirl $ra, $ra, 0 ld.d $a2, $sp, 160 # 8-byte Folded Reload -.LBB0_183: +.LBB0_182: fld.d $fa0, $sp, 344 pcalau12i $s2, %pc_hi20(rdopt) ld.d $a0, $s2, %pc_lo12(rdopt) @@ -1681,19 +1683,19 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a0, $a0, 0 ldptr.w $a1, $a0, 15268 - beqz $a1, .LBB0_186 -# %bb.184: + beqz $a1, .LBB0_185 +# %bb.183: ld.bu $a0, $a0, 12 andi $a0, $a0, 1 - beqz $a0, .LBB0_186 -# %bb.185: + beqz $a0, .LBB0_185 +# %bb.184: ld.w $a0, $s3, 72 - beqz $a0, .LBB0_200 -.LBB0_186: + beqz $a0, .LBB0_199 +.LBB0_185: ld.d $a0, $a2, 0 ldptr.w $a1, $a0, 4732 - beqz $a1, .LBB0_188 -# %bb.187: + beqz $a1, .LBB0_187 +# %bb.186: move $a0, $s4 move $a1, $s1 move $a2, $s3 @@ -1701,14 +1703,14 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast jirl $ra, $ra, 0 ld.d $a2, $sp, 160 # 8-byte Folded Reload ld.d $a0, $a2, 0 -.LBB0_188: +.LBB0_187: ldptr.w $a0, $a0, 5244 ori $a1, $zero, 2 - beq $a0, $a1, .LBB0_191 -# %bb.189: + beq $a0, $a1, .LBB0_190 +# %bb.188: ori $a1, $zero, 1 - bne $a0, $a1, .LBB0_193 -# %bb.190: + bne $a0, $a1, .LBB0_192 +# %bb.189: ld.h $a1, $sp, 426 ld.h $a0, $s0, 0 slli.d $a1, $a1, 2 @@ -1717,8 +1719,8 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ldx.w $a1, $a2, $a1 pcaddu18i $ra, %call36(UMHEX_skip_intrabk_SAD) jirl $ra, $ra, 0 - b .LBB0_192 -.LBB0_191: + b .LBB0_191 +.LBB0_190: ld.h $a1, $sp, 426 ld.h $a0, $s0, 0 slli.d $a1, $a1, 2 @@ -1727,37 +1729,37 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ldx.w $a1, $a2, $a1 pcaddu18i $ra, %call36(smpUMHEX_skip_intrabk_SAD) jirl $ra, $ra, 0 -.LBB0_192: +.LBB0_191: ld.d $a2, $sp, 160 # 8-byte Folded Reload -.LBB0_193: +.LBB0_192: ld.d $a0, $a2, 0 ld.w $a0, $a0, 272 - beqz $a0, .LBB0_199 -# %bb.194: + beqz $a0, .LBB0_198 +# %bb.193: ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a0, $a0, 0 ld.w $a2, $a0, 20 ori $a1, $zero, 1 - bltu $a1, $a2, .LBB0_199 -# %bb.195: + bltu $a1, $a2, .LBB0_198 +# %bb.194: ld.w $a2, $s3, 72 ori $a3, $zero, 13 - bltu $a3, $a2, .LBB0_197 -# %bb.196: + bltu $a3, $a2, .LBB0_196 +# %bb.195: ori $a3, $zero, 1 sll.d $a3, $a3, $a2 ori $a4, $fp, 1536 and $a3, $a3, $a4 - bnez $a3, .LBB0_198 -.LBB0_197: + bnez $a3, .LBB0_197 +.LBB0_196: addi.d $a1, $a2, -14 sltui $a1, $a1, 1 -.LBB0_198: +.LBB0_197: ld.w $a2, $a0, 12 ldptr.d $a0, $a0, 14240 slli.d $a2, $a2, 2 stx.w $a1, $a0, $a2 -.LBB0_199: +.LBB0_198: fld.d $fs1, $sp, 456 # 8-byte Folded Reload fld.d $fs0, $sp, 464 # 8-byte Folded Reload ld.d $s8, $sp, 472 # 8-byte Folded Reload @@ -1773,45 +1775,45 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $ra, $sp, 552 # 8-byte Folded Reload addi.d $sp, $sp, 560 ret -.LBB0_200: +.LBB0_199: ori $a0, $zero, 1 - bne $s8, $a0, .LBB0_204 -# %bb.201: + bne $s8, $a0, .LBB0_203 +# %bb.200: ld.w $a0, $s3, 364 - bnez $a0, .LBB0_186 -# %bb.202: - ld.d $a0, $sp, 48 # 8-byte Folded Reload + bnez $a0, .LBB0_185 +# %bb.201: + ld.d $a0, $sp, 56 # 8-byte Folded Reload ld.w $a0, $a0, 72 - bnez $a0, .LBB0_186 -# %bb.203: - ld.d $a0, $sp, 48 # 8-byte Folded Reload + bnez $a0, .LBB0_185 +# %bb.202: + ld.d $a0, $sp, 56 # 8-byte Folded Reload ld.w $a0, $a0, 364 - bnez $a0, .LBB0_186 - b .LBB0_205 -.LBB0_204: # %.thread497 - ld.d $a0, $sp, 48 # 8-byte Folded Reload + bnez $a0, .LBB0_185 + b .LBB0_204 +.LBB0_203: # %.thread497 + ld.d $a0, $sp, 56 # 8-byte Folded Reload ld.w $a0, $a0, 72 - bnez $a0, .LBB0_186 -.LBB0_205: # %.thread499 + bnez $a0, .LBB0_185 +.LBB0_204: # %.thread499 pcaddu18i $ra, %call36(field_flag_inference) jirl $ra, $ra, 0 ld.d $a2, $sp, 160 # 8-byte Folded Reload ld.h $a1, $sp, 430 - beq $a0, $a1, .LBB0_186 -# %bb.206: + beq $a0, $a1, .LBB0_185 +# %bb.205: ld.d $a0, $s2, %pc_lo12(rdopt) lu12i.w $a1, 236040 ori $a1, $a1, 3306 lu32i.d $a1, -442791 lu52i.d $a1, $a1, 1122 st.d $a1, $a0, 0 - b .LBB0_186 -.LBB0_207: + b .LBB0_185 +.LBB0_206: ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a0, $a0, 0 ldptr.w $a0, $a0, 15536 - beqz $a0, .LBB0_210 -# %bb.208: + beqz $a0, .LBB0_209 +# %bb.207: addi.d $a0, $sp, 180 addi.d $a1, $sp, 176 addi.d $a2, $sp, 172 @@ -1821,8 +1823,8 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $a0, $t6, 0 ldptr.w $a0, $a0, 4176 ld.d $s0, $sp, 144 # 8-byte Folded Reload - beqz $a0, .LBB0_211 -# %bb.209: + beqz $a0, .LBB0_210 +# %bb.208: addi.d $a0, $sp, 184 addi.d $a1, $sp, 352 ori $a2, $zero, 96 @@ -1834,20 +1836,20 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $t6, $sp, 160 # 8-byte Folded Reload ld.h $a1, $s0, 416 move $fp, $a1 - b .LBB0_212 -.LBB0_210: + b .LBB0_211 +.LBB0_209: move $a7, $s7 move $a1, $zero move $fp, $zero ld.d $s0, $sp, 144 # 8-byte Folded Reload - b .LBB0_213 -.LBB0_211: + b .LBB0_212 +.LBB0_210: move $a1, $zero ori $fp, $zero, 3 -.LBB0_212: # %.lr.ph530.preheader +.LBB0_211: # %.lr.ph530.preheader ld.d $a6, $sp, 112 # 8-byte Folded Reload move $a7, $s7 -.LBB0_213: # %.lr.ph530.preheader +.LBB0_212: # %.lr.ph530.preheader addi.d $a0, $s8, -2 ld.w $s7, $s5, 20 sltui $a2, $a0, 1 @@ -1893,98 +1895,98 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast and $a2, $a3, $a2 st.d $a2, $sp, 96 # 8-byte Folded Spill alsl.d $a2, $s2, $a5, 1 - st.d $a2, $sp, 56 # 8-byte Folded Spill + st.d $a2, $sp, 48 # 8-byte Folded Spill ori $s6, $zero, 2960 ori $s1, $zero, 10 ori $s3, $zero, 3 - b .LBB0_216 + b .LBB0_215 .p2align 4, , 16 -.LBB0_214: # %.loopexit.loopexit - # in Loop: Header=BB0_216 Depth=1 +.LBB0_213: # %.loopexit.loopexit + # in Loop: Header=BB0_215 Depth=1 ld.d $a3, $sp, 144 # 8-byte Folded Reload ld.w $a2, $a3, 416 -.LBB0_215: # %.loopexit - # in Loop: Header=BB0_216 Depth=1 +.LBB0_214: # %.loopexit + # in Loop: Header=BB0_215 Depth=1 addi.w $a1, $a2, 1 st.w $a1, $a3, 416 - bge $a2, $fp, .LBB0_171 -.LBB0_216: # %.lr.ph530 + bge $a2, $fp, .LBB0_170 +.LBB0_215: # %.lr.ph530 # =>This Inner Loop Header: Depth=1 ld.d $a2, $sp, 128 # 8-byte Folded Reload ld.d $a2, $a2, 0 ldptr.w $a3, $a2, 15536 - beqz $a3, .LBB0_231 -# %bb.217: # in Loop: Header=BB0_216 Depth=1 - beqz $a6, .LBB0_219 -# %bb.218: # in Loop: Header=BB0_216 Depth=1 + beqz $a3, .LBB0_230 +# %bb.216: # in Loop: Header=BB0_215 Depth=1 + beqz $a6, .LBB0_218 +# %bb.217: # in Loop: Header=BB0_215 Depth=1 ldptr.w $a2, $a0, 4048 - bnez $a2, .LBB0_222 -.LBB0_219: # in Loop: Header=BB0_216 Depth=1 + bnez $a2, .LBB0_221 +.LBB0_218: # in Loop: Header=BB0_215 Depth=1 ldptr.w $a2, $a0, 4072 ori $a4, $zero, 1 - bne $a2, $a4, .LBB0_222 -# %bb.220: # in Loop: Header=BB0_216 Depth=1 - beqz $a1, .LBB0_231 -# %bb.221: # in Loop: Header=BB0_216 Depth=1 + bne $a2, $a4, .LBB0_221 +# %bb.219: # in Loop: Header=BB0_215 Depth=1 + beqz $a1, .LBB0_230 +# %bb.220: # in Loop: Header=BB0_215 Depth=1 move $a2, $a1 ld.d $a3, $sp, 144 # 8-byte Folded Reload - b .LBB0_215 + b .LBB0_214 .p2align 4, , 16 -.LBB0_222: # in Loop: Header=BB0_216 Depth=1 +.LBB0_221: # in Loop: Header=BB0_215 Depth=1 ld.w $a4, $sp, 180 ori $a2, $zero, 2 - bne $a1, $a2, .LBB0_224 -# %bb.223: # in Loop: Header=BB0_216 Depth=1 - beqz $a4, .LBB0_226 -.LBB0_224: # in Loop: Header=BB0_216 Depth=1 + bne $a1, $a2, .LBB0_223 +# %bb.222: # in Loop: Header=BB0_215 Depth=1 + beqz $a4, .LBB0_225 +.LBB0_223: # in Loop: Header=BB0_215 Depth=1 ld.w $a5, $sp, 176 ori $a2, $zero, 1 - bne $a1, $a2, .LBB0_227 -# %bb.225: # in Loop: Header=BB0_216 Depth=1 - bnez $a5, .LBB0_227 -.LBB0_226: # in Loop: Header=BB0_216 Depth=1 + bne $a1, $a2, .LBB0_226 +# %bb.224: # in Loop: Header=BB0_215 Depth=1 + bnez $a5, .LBB0_226 +.LBB0_225: # in Loop: Header=BB0_215 Depth=1 ld.d $a3, $sp, 144 # 8-byte Folded Reload - b .LBB0_215 -.LBB0_227: # in Loop: Header=BB0_216 Depth=1 - bne $a1, $s3, .LBB0_231 -# %bb.228: # in Loop: Header=BB0_216 Depth=1 + b .LBB0_214 +.LBB0_226: # in Loop: Header=BB0_215 Depth=1 + bne $a1, $s3, .LBB0_230 +# %bb.227: # in Loop: Header=BB0_215 Depth=1 ori $a2, $zero, 3 - beqz $a5, .LBB0_226 -# %bb.229: # in Loop: Header=BB0_216 Depth=1 - beqz $a4, .LBB0_226 -# %bb.230: # in Loop: Header=BB0_216 Depth=1 + beqz $a5, .LBB0_225 +# %bb.228: # in Loop: Header=BB0_215 Depth=1 + beqz $a4, .LBB0_225 +# %bb.229: # in Loop: Header=BB0_215 Depth=1 ld.w $a1, $sp, 172 - beqz $a1, .LBB0_226 + beqz $a1, .LBB0_225 .p2align 4, , 16 -.LBB0_231: # %.thread496 - # in Loop: Header=BB0_216 Depth=1 +.LBB0_230: # %.thread496 + # in Loop: Header=BB0_215 Depth=1 ldx.w $a2, $a0, $s6 ld.d $a1, $sp, 136 # 8-byte Folded Reload ld.h $a1, $a1, 0 sltui $a3, $a3, 1 - bnez $a6, .LBB0_236 + bnez $a6, .LBB0_235 +# %bb.231: # %.thread496 + # in Loop: Header=BB0_215 Depth=1 + beqz $a2, .LBB0_235 # %bb.232: # %.thread496 - # in Loop: Header=BB0_216 Depth=1 - beqz $a2, .LBB0_236 + # in Loop: Header=BB0_215 Depth=1 + blt $s7, $s1, .LBB0_235 # %bb.233: # %.thread496 - # in Loop: Header=BB0_216 Depth=1 - blt $s7, $s1, .LBB0_236 -# %bb.234: # %.thread496 - # in Loop: Header=BB0_216 Depth=1 - blt $s3, $a1, .LBB0_236 -# %bb.235: # in Loop: Header=BB0_216 Depth=1 + # in Loop: Header=BB0_215 Depth=1 + blt $s3, $a1, .LBB0_235 +# %bb.234: # in Loop: Header=BB0_215 Depth=1 ld.d $a4, $sp, 144 # 8-byte Folded Reload ld.w $a4, $a4, 364 - beqz $a4, .LBB0_239 -.LBB0_236: # in Loop: Header=BB0_216 Depth=1 + beqz $a4, .LBB0_238 +.LBB0_235: # in Loop: Header=BB0_215 Depth=1 or $a3, $a3, $s0 andi $a3, $a3, 1 - beqz $a3, .LBB0_239 -# %bb.237: # in Loop: Header=BB0_216 Depth=1 + beqz $a3, .LBB0_238 +# %bb.236: # in Loop: Header=BB0_215 Depth=1 ld.d $a3, $sp, 88 # 8-byte Folded Reload ld.hu $a3, $a3, 0 - beqz $a3, .LBB0_239 -# %bb.238: # in Loop: Header=BB0_216 Depth=1 + beqz $a3, .LBB0_238 +# %bb.237: # in Loop: Header=BB0_215 Depth=1 addi.d $a0, $sp, 184 addi.d $a1, $sp, 352 ori $a2, $zero, 96 @@ -2006,20 +2008,20 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ldx.w $a2, $a0, $s6 ld.d $a1, $sp, 136 # 8-byte Folded Reload ld.hu $a1, $a1, 0 -.LBB0_239: # in Loop: Header=BB0_216 Depth=1 - bnez $a6, .LBB0_244 -# %bb.240: # in Loop: Header=BB0_216 Depth=1 - beqz $a2, .LBB0_244 -# %bb.241: # in Loop: Header=BB0_216 Depth=1 - blt $s4, $s1, .LBB0_244 -# %bb.242: # in Loop: Header=BB0_216 Depth=1 +.LBB0_238: # in Loop: Header=BB0_215 Depth=1 + bnez $a6, .LBB0_243 +# %bb.239: # in Loop: Header=BB0_215 Depth=1 + beqz $a2, .LBB0_243 +# %bb.240: # in Loop: Header=BB0_215 Depth=1 + blt $s4, $s1, .LBB0_243 +# %bb.241: # in Loop: Header=BB0_215 Depth=1 ext.w.h $a3, $a1 - blt $s3, $a3, .LBB0_244 -# %bb.243: # in Loop: Header=BB0_216 Depth=1 + blt $s3, $a3, .LBB0_243 +# %bb.242: # in Loop: Header=BB0_215 Depth=1 ld.d $a3, $sp, 144 # 8-byte Folded Reload ld.w $a3, $a3, 364 - beqz $a3, .LBB0_247 -.LBB0_244: # in Loop: Header=BB0_216 Depth=1 + beqz $a3, .LBB0_246 +.LBB0_243: # in Loop: Header=BB0_215 Depth=1 ld.d $a3, $sp, 128 # 8-byte Folded Reload ld.d $a3, $a3, 0 ldptr.w $a3, $a3, 15536 @@ -2027,12 +2029,12 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $a4, $sp, 120 # 8-byte Folded Reload or $a3, $a3, $a4 andi $a3, $a3, 1 - beqz $a3, .LBB0_247 -# %bb.245: # in Loop: Header=BB0_216 Depth=1 + beqz $a3, .LBB0_246 +# %bb.244: # in Loop: Header=BB0_215 Depth=1 ld.d $a3, $sp, 80 # 8-byte Folded Reload ld.hu $a3, $a3, 0 - beqz $a3, .LBB0_247 -# %bb.246: # in Loop: Header=BB0_216 Depth=1 + beqz $a3, .LBB0_246 +# %bb.245: # in Loop: Header=BB0_215 Depth=1 addi.d $a0, $sp, 184 addi.d $a1, $sp, 352 ori $a2, $zero, 96 @@ -2054,20 +2056,20 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ldx.w $a2, $a0, $s6 ld.d $a1, $sp, 136 # 8-byte Folded Reload ld.hu $a1, $a1, 0 -.LBB0_247: # in Loop: Header=BB0_216 Depth=1 - bnez $a6, .LBB0_252 -# %bb.248: # in Loop: Header=BB0_216 Depth=1 - beqz $a2, .LBB0_252 -# %bb.249: # in Loop: Header=BB0_216 Depth=1 - blt $s5, $s1, .LBB0_252 -# %bb.250: # in Loop: Header=BB0_216 Depth=1 +.LBB0_246: # in Loop: Header=BB0_215 Depth=1 + bnez $a6, .LBB0_251 +# %bb.247: # in Loop: Header=BB0_215 Depth=1 + beqz $a2, .LBB0_251 +# %bb.248: # in Loop: Header=BB0_215 Depth=1 + blt $s5, $s1, .LBB0_251 +# %bb.249: # in Loop: Header=BB0_215 Depth=1 ext.w.h $a3, $a1 - blt $s3, $a3, .LBB0_252 -# %bb.251: # in Loop: Header=BB0_216 Depth=1 + blt $s3, $a3, .LBB0_251 +# %bb.250: # in Loop: Header=BB0_215 Depth=1 ld.d $a3, $sp, 144 # 8-byte Folded Reload ld.w $a3, $a3, 364 - beqz $a3, .LBB0_255 -.LBB0_252: # in Loop: Header=BB0_216 Depth=1 + beqz $a3, .LBB0_254 +.LBB0_251: # in Loop: Header=BB0_215 Depth=1 ld.d $a3, $sp, 128 # 8-byte Folded Reload ld.d $a3, $a3, 0 ldptr.w $a3, $a3, 15536 @@ -2075,12 +2077,12 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $a4, $sp, 104 # 8-byte Folded Reload or $a3, $a3, $a4 andi $a3, $a3, 1 - beqz $a3, .LBB0_255 -# %bb.253: # in Loop: Header=BB0_216 Depth=1 + beqz $a3, .LBB0_254 +# %bb.252: # in Loop: Header=BB0_215 Depth=1 ld.d $a3, $sp, 72 # 8-byte Folded Reload ld.hu $a3, $a3, 0 - beqz $a3, .LBB0_255 -# %bb.254: # in Loop: Header=BB0_216 Depth=1 + beqz $a3, .LBB0_254 +# %bb.253: # in Loop: Header=BB0_215 Depth=1 addi.d $a0, $sp, 184 addi.d $a1, $sp, 352 ori $a2, $zero, 96 @@ -2102,20 +2104,20 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ldx.w $a2, $a0, $s6 ld.d $a1, $sp, 136 # 8-byte Folded Reload ld.hu $a1, $a1, 0 -.LBB0_255: # in Loop: Header=BB0_216 Depth=1 - bnez $a6, .LBB0_260 -# %bb.256: # in Loop: Header=BB0_216 Depth=1 - beqz $a2, .LBB0_260 -# %bb.257: # in Loop: Header=BB0_216 Depth=1 - blt $s2, $s1, .LBB0_260 -# %bb.258: # in Loop: Header=BB0_216 Depth=1 +.LBB0_254: # in Loop: Header=BB0_215 Depth=1 + bnez $a6, .LBB0_259 +# %bb.255: # in Loop: Header=BB0_215 Depth=1 + beqz $a2, .LBB0_259 +# %bb.256: # in Loop: Header=BB0_215 Depth=1 + blt $s2, $s1, .LBB0_259 +# %bb.257: # in Loop: Header=BB0_215 Depth=1 ext.w.h $a1, $a1 - blt $s3, $a1, .LBB0_260 -# %bb.259: # in Loop: Header=BB0_216 Depth=1 + blt $s3, $a1, .LBB0_259 +# %bb.258: # in Loop: Header=BB0_215 Depth=1 ld.d $a1, $sp, 144 # 8-byte Folded Reload ld.w $a1, $a1, 364 - beqz $a1, .LBB0_214 -.LBB0_260: # in Loop: Header=BB0_216 Depth=1 + beqz $a1, .LBB0_213 +.LBB0_259: # in Loop: Header=BB0_215 Depth=1 ld.d $a1, $sp, 128 # 8-byte Folded Reload ld.d $a1, $a1, 0 ldptr.w $a1, $a1, 15536 @@ -2123,12 +2125,12 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $a2, $sp, 96 # 8-byte Folded Reload or $a1, $a1, $a2 andi $a1, $a1, 1 - beqz $a1, .LBB0_214 -# %bb.261: # in Loop: Header=BB0_216 Depth=1 - ld.d $a1, $sp, 56 # 8-byte Folded Reload + beqz $a1, .LBB0_213 +# %bb.260: # in Loop: Header=BB0_215 Depth=1 + ld.d $a1, $sp, 48 # 8-byte Folded Reload ld.hu $a1, $a1, 0 - beqz $a1, .LBB0_214 -# %bb.262: # in Loop: Header=BB0_216 Depth=1 + beqz $a1, .LBB0_213 +# %bb.261: # in Loop: Header=BB0_215 Depth=1 addi.d $a0, $sp, 184 addi.d $a1, $sp, 352 ori $a2, $zero, 96 @@ -2147,7 +2149,7 @@ encode_one_macroblock_highfast: # @encode_one_macroblock_highfast ld.d $a6, $sp, 112 # 8-byte Folded Reload ld.d $t6, $sp, 160 # 8-byte Folded Reload ld.d $a0, $t6, 0 - b .LBB0_214 + b .LBB0_213 .Lfunc_end0: .size encode_one_macroblock_highfast, .Lfunc_end0-encode_one_macroblock_highfast # -- End function diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/md_low.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/md_low.s index fcdbb71d..a930e4e6 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/md_low.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/md_low.s @@ -1,10 +1,6 @@ .file "md_low.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function encode_one_macroblock_low -.LCPI0_0: - .dword 0x3fdffe5c91d14e3c # double 0.49990000000000001 .text - .globl encode_one_macroblock_low + .globl encode_one_macroblock_low # -- Begin function encode_one_macroblock_low .p2align 5 .type encode_one_macroblock_low,@function encode_one_macroblock_low: # @encode_one_macroblock_low @@ -1070,10 +1066,13 @@ encode_one_macroblock_low: # @encode_one_macroblock_low beq $a0, $a1, .LBB0_90 .LBB0_89: fld.d $fa0, $sp, 328 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_0) - vldi $vr2, -976 - fmadd.d $fa0, $fa0, $fa2, $fa1 + vldi $vr1, -976 + lu12i.w $a1, -451308 + ori $a1, $a1, 3644 + lu32i.d $a1, -420 + lu52i.d $a1, $a1, 1021 + movgr2fr.d $fa2, $a1 + fmadd.d $fa0, $fa0, $fa1, $fa2 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 ftintrz.w.d $fa0, $fa0 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/me_umhex.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/me_umhex.s index e34b996b..1c3c6045 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/me_umhex.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/me_umhex.s @@ -47,52 +47,24 @@ UMHEX_DefineThreshold: # @UMHEX_DefineThreshold .Lfunc_end0: .size UMHEX_DefineThreshold, .Lfunc_end0-UMHEX_DefineThreshold # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function UMHEX_DefineThresholdMB -.LCPI1_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI1_2: - .dword 0xbfeccccccccccccd # double -0.90000000000000002 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI1_1: - .word 0x424c0000 # float 51 -.LCPI1_3: - .word 0x41b3851f # float 22.4400005 -.LCPI1_4: - .word 0x43800000 # float 256 -.LCPI1_5: - .word 0x43960000 # float 300 -.LCPI1_6: - .word 0x42f00000 # float 120 -.LCPI1_10: - .word 0x42a00000 # float 80 -.LCPI1_11: - .word 0x43c80000 # float 400 -.LCPI1_12: - .word 0x437a0000 # float 250 -.LCPI1_13: - .word 0x42200000 # float 40 -.LCPI1_14: - .word 0x43480000 # float 200 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI1_7: + .p2align 4, 0x0 # -- Begin function UMHEX_DefineThresholdMB +.LCPI1_0: .word 0x443b8000 # float 750 .word 0x43af0000 # float 350 .word 0x43af0000 # float 350 .word 0x432a0000 # float 170 -.LCPI1_8: +.LCPI1_1: .word 0x453b8000 # float 3000 .word 0x44bb8000 # float 1500 .word 0x44bb8000 # float 1500 .word 0x44480000 # float 800 -.LCPI1_9: +.LCPI1_2: .word 0x45098000 # float 2200 .word 0x447a0000 # float 1000 .word 0x447a0000 # float 1000 .word 0x43fa0000 # float 500 -.LCPI1_15: +.LCPI1_3: .word 0x42700000 # float 60 .word 0x41f00000 # float 30 .word 0x41f00000 # float 30 @@ -119,13 +91,16 @@ UMHEX_DefineThresholdMB: # @UMHEX_DefineThresholdMB sub.w $a3, $a1, $a3 addi.d $a2, $a2, 15 movgr2fr.w $fa0, $a0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_0) ffint.d.w $fa0, $fa0 - fneg.d $fa2, $fa0 - vldi $vr3, -912 - fmadd.d $fa2, $fa2, $fa1, $fa3 - fmul.d $fa0, $fa0, $fa1 + fneg.d $fa1, $fa0 + vldi $vr2, -912 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fa3, $a0 + fmadd.d $fa1, $fa1, $fa3, $fa2 + fmul.d $fa0, $fa0, $fa3 pcalau12i $a0, %got_pc_hi20(img) ld.d $a0, $a0, %got_pc_lo12(img) ld.d $a0, $a0, 0 @@ -136,19 +111,22 @@ UMHEX_DefineThresholdMB: # @UMHEX_DefineThresholdMB srli.d $a4, $a0, 63 srai.d $a0, $a0, 37 add.d $a0, $a0, $a4 - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmadd.d $fa0, $fa0, $fa1, $fa2 + movgr2fr.w $fa3, $a0 + ffint.d.w $fa3, $fa3 + fmadd.d $fa0, $fa0, $fa3, $fa1 + fcvt.s.d $fa0, $fa0 movgr2fr.w $fa1, $a1 - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.s $fa2, $a0, %pc_lo12(.LCPI1_1) ffint.s.w $fa1, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fa4, $a0, %pc_lo12(.LCPI1_2) - fdiv.s $fa1, $fa1, $fa2 - fcvt.s.d $fa0, $fa0 + lu12i.w $a0, 271552 + movgr2fr.w $fa3, $a0 + fdiv.s $fa1, $fa1, $fa3 fcvt.d.s $fa1, $fa1 - fmadd.d $fa1, $fa1, $fa4, $fa3 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, -209716 + lu52i.d $a0, $a0, -1026 + movgr2fr.d $fa3, $a0 + fmadd.d $fa1, $fa1, $fa3, $fa2 fcvt.s.d $fa1, $fa1 ori $a0, $zero, 1 sll.w $a0, $a0, $a2 @@ -165,15 +143,16 @@ UMHEX_DefineThresholdMB: # @UMHEX_DefineThresholdMB ldx.w $a1, $a2, $a1 div.w $a0, $a0, $a1 movgr2fr.w $fa2, $a0 - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.s $fa3, $a0, %pc_lo12(.LCPI1_3) ffint.s.w $fa2, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI1_4) - fld.s $fa4, $a0, %pc_lo12(.LCPI1_4) + lu12i.w $a0, 269112 + ori $a0, $a0, 1311 + movgr2fr.w $fa3, $a0 fdiv.s $fa2, $fa2, $fa3 fadd.s $fa2, $fa2, $fa2 fmul.s $fa2, $fa2, $fa0 - fmul.s $fa2, $fa2, $fa4 + lu12i.w $a0, 276480 + movgr2fr.w $fa3, $a0 + fmul.s $fa2, $fa2, $fa3 pcalau12i $a0, %pc_hi20(Bsize) addi.d $a0, $a0, %pc_lo12(Bsize) fst.s $fa2, $a0, 28 @@ -185,26 +164,26 @@ UMHEX_DefineThresholdMB: # @UMHEX_DefineThresholdMB fst.s $fa2, $a0, 16 fmul.s $fa2, $fa2, $fa3 fst.s $fa2, $a0, 12 - pcalau12i $a1, %pc_hi20(.LCPI1_5) - fld.s $fa4, $a1, %pc_lo12(.LCPI1_5) fst.s $fa2, $a0, 8 fmul.s $fa2, $fa2, $fa3 fst.s $fa2, $a0, 4 - fmul.s $fa2, $fa0, $fa4 + lu12i.w $a0, 276832 + movgr2fr.w $fa2, $a0 + fmul.s $fa2, $fa0, $fa2 fmul.s $fa2, $fa2, $fa1 ftintrz.w.s $fa2, $fa2 movfr2gr.s $a1, $fa2 pcalau12i $a0, %pc_hi20(Multi_Ref_Thd_MB) addi.d $a0, $a0, %pc_lo12(Multi_Ref_Thd_MB) - pcalau12i $a2, %pc_hi20(.LCPI1_6) - fld.s $fa2, $a2, %pc_lo12(.LCPI1_6) st.w $a1, $a0, 4 + lu12i.w $a1, 274176 + movgr2fr.w $fa2, $a1 fmul.s $fa2, $fa0, $fa2 fmul.s $fa2, $fa2, $fa1 ftintrz.w.s $fa2, $fa2 movfr2gr.s $a1, $fa2 - pcalau12i $a2, %pc_hi20(.LCPI1_7) - vld $vr3, $a2, %pc_lo12(.LCPI1_7) + pcalau12i $a2, %pc_hi20(.LCPI1_0) + vld $vr3, $a2, %pc_lo12(.LCPI1_0) st.w $a1, $a0, 8 st.w $a1, $a0, 12 vreplvei.w $vr2, $vr0, 0 @@ -213,64 +192,64 @@ UMHEX_DefineThresholdMB: # @UMHEX_DefineThresholdMB vfmul.s $vr4, $vr4, $vr3 vftintrz.w.s $vr4, $vr4 pcalau12i $a2, %pc_hi20(Median_Pred_Thd_MB) - addi.d $a3, $a2, %pc_lo12(Median_Pred_Thd_MB) - pcalau12i $a2, %pc_hi20(.LCPI1_8) - vld $vr5, $a2, %pc_lo12(.LCPI1_8) - vst $vr4, $a3, 4 + addi.d $a2, $a2, %pc_lo12(Median_Pred_Thd_MB) + pcalau12i $a3, %pc_hi20(.LCPI1_1) + vld $vr5, $a3, %pc_lo12(.LCPI1_1) + vst $vr4, $a2, 4 vfmul.s $vr4, $vr2, $vr5 vfmul.s $vr4, $vr4, $vr3 vftintrz.w.s $vr4, $vr4 - pcalau12i $a2, %pc_hi20(Big_Hexagon_Thd_MB) - addi.d $a2, $a2, %pc_lo12(Big_Hexagon_Thd_MB) - pcalau12i $a4, %pc_hi20(.LCPI1_9) - vld $vr5, $a4, %pc_lo12(.LCPI1_9) - vst $vr4, $a2, 4 + pcalau12i $a3, %pc_hi20(Big_Hexagon_Thd_MB) + addi.d $a3, $a3, %pc_lo12(Big_Hexagon_Thd_MB) + pcalau12i $a4, %pc_hi20(.LCPI1_2) + vld $vr5, $a4, %pc_lo12(.LCPI1_2) + vst $vr4, $a3, 4 vfmul.s $vr4, $vr2, $vr5 vfmul.s $vr4, $vr4, $vr3 vftintrz.w.s $vr4, $vr4 pcalau12i $a4, %pc_hi20(Threshold_DSR_MB) addi.d $a4, $a4, %pc_lo12(Threshold_DSR_MB) - pcalau12i $a5, %pc_hi20(.LCPI1_10) - fld.s $fa5, $a5, %pc_lo12(.LCPI1_10) vst $vr4, $a4, 4 - fmul.s $fa4, $fa0, $fa5 - pcalau12i $a5, %pc_hi20(.LCPI1_11) - fld.s $fa5, $a5, %pc_lo12(.LCPI1_11) + lu12i.w $a5, 272896 + movgr2fr.w $fa4, $a5 + fmul.s $fa4, $fa0, $fa4 fmul.s $fa4, $fa4, $fa1 ftintrz.w.s $fa4, $fa4 movfr2gr.s $a5, $fa4 - fmul.s $fa4, $fa0, $fa5 - pcalau12i $a6, %pc_hi20(.LCPI1_12) - fld.s $fa5, $a6, %pc_lo12(.LCPI1_12) + st.w $a5, $a2, 20 + lu12i.w $a6, 277632 + movgr2fr.w $fa4, $a6 + fmul.s $fa4, $fa0, $fa4 fmul.s $fa4, $fa4, $fa1 ftintrz.w.s $fa4, $fa4 movfr2gr.s $a6, $fa4 - fmul.s $fa4, $fa0, $fa5 + st.w $a6, $a3, 20 + lu12i.w $a7, 276384 + movgr2fr.w $fa4, $a7 + fmul.s $fa4, $fa0, $fa4 fmul.s $fa4, $fa4, $fa1 ftintrz.w.s $fa4, $fa4 - pcalau12i $a7, %pc_hi20(.LCPI1_13) - fld.s $fa5, $a7, %pc_lo12(.LCPI1_13) movfr2gr.s $a7, $fa4 - st.w $a5, $a3, 20 - st.w $a5, $a3, 24 - fmul.s $fa4, $fa0, $fa5 + st.w $a7, $a4, 20 + st.w $a5, $a2, 24 + st.w $a6, $a3, 24 + st.w $a7, $a4, 24 + lu12i.w $a5, 270848 + movgr2fr.w $fa4, $a5 + fmul.s $fa4, $fa0, $fa4 fmul.s $fa4, $fa4, $fa1 ftintrz.w.s $fa4, $fa4 movfr2gr.s $a5, $fa4 - pcalau12i $t0, %pc_hi20(.LCPI1_14) - fld.s $fa4, $t0, %pc_lo12(.LCPI1_14) - st.w $a6, $a2, 20 - st.w $a7, $a4, 20 - st.w $a6, $a2, 24 - st.w $a7, $a4, 24 - st.w $a5, $a3, 28 + st.w $a5, $a2, 28 + lu12i.w $a2, 275584 + movgr2fr.w $fa4, $a2 fmul.s $fa0, $fa0, $fa4 fmul.s $fa0, $fa0, $fa1 - pcalau12i $a3, %pc_hi20(.LCPI1_15) - vld $vr1, $a3, %pc_lo12(.LCPI1_15) + pcalau12i $a2, %pc_hi20(.LCPI1_3) + vld $vr1, $a2, %pc_lo12(.LCPI1_3) ftintrz.w.s $fa0, $fa0 - movfr2gr.s $a3, $fa0 - st.w $a3, $a2, 28 + movfr2gr.s $a2, $fa0 + st.w $a2, $a3, 28 vfmul.s $vr0, $vr2, $vr1 vfmul.s $vr0, $vr0, $vr3 vftintrz.w.s $vr0, $vr0 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/mode_decision.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/mode_decision.s index 627074ed..7e739d15 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/mode_decision.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/mode_decision.s @@ -60,14 +60,7 @@ rc_store_diff: # @rc_store_diff .Lfunc_end0: .size rc_store_diff, .Lfunc_end0-rc_store_diff # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fast_mode_intra_decision -.LCPI1_0: - .dword 0x3f65555555555555 # double 0.0026041666666666665 -.LCPI1_1: - .dword 0x3f90000000000000 # double 0.015625 - .text - .globl fast_mode_intra_decision + .globl fast_mode_intra_decision # -- Begin function fast_mode_intra_decision .p2align 5 .type fast_mode_intra_decision,@function fast_mode_intra_decision: # @fast_mode_intra_decision @@ -109,17 +102,20 @@ fast_mode_intra_decision: # @fast_mode_intra_decision move $a2, $s0 move $a3, $zero jirl $ra, $a5, 0 - pcalau12i $a1, %pc_hi20(.LCPI1_0) + lu12i.w $a0, 349525 + ori $a1, $a0, 1365 + lu32i.d $a1, 349525 ld.d $a0, $s2, 0 - fld.d $fa0, $a1, %pc_lo12(.LCPI1_0) + lu52i.d $a2, $a1, 1014 lu12i.w $a1, 3 - ori $a2, $a1, 3056 - ldx.w $a2, $a0, $a2 - ld.w $a3, $a0, 164 + ori $a3, $a1, 3056 + ldx.w $a3, $a0, $a3 + ld.w $a4, $a0, 164 + movgr2fr.d $fa0, $a2 fmul.d $fa0, $fs0, $fa0 - addi.w $a2, $a2, -1 + addi.w $a2, $a3, -1 movgr2fr.d $fa1, $zero - beq $a3, $a2, .LBB1_7 + beq $a4, $a2, .LBB1_7 # %bb.1: ori $a1, $a1, 3048 ldx.w $a1, $a0, $a1 @@ -447,11 +443,11 @@ fast_mode_intra_decision: # @fast_mode_intra_decision bne $a2, $t3, .LBB1_5 # %bb.6: # %middle.block78 vhaddw.q.d $vr1, $vr2, $vr2 - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI1_1) vpickve2gr.d $a0, $vr1, 0 movgr2fr.d $fa1, $a0 ffint.d.l $fa1, $fa1 + lu52i.d $a0, $zero, 1017 + movgr2fr.d $fa2, $a0 fmul.d $fa1, $fa1, $fa2 .LBB1_7: fcmp.cult.d $fcc0, $fa1, $fa0 @@ -2005,12 +2001,7 @@ compute_mode_RD_cost: # @compute_mode_RD_cost .Lfunc_end7: .size compute_mode_RD_cost, .Lfunc_end7-compute_mode_RD_cost # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function submacroblock_mode_decision -.LCPI8_0: - .dword 0x46293e5939a08cea # double 1.0E+30 - .text - .globl submacroblock_mode_decision + .globl submacroblock_mode_decision # -- Begin function submacroblock_mode_decision .p2align 5 .type submacroblock_mode_decision,@function submacroblock_mode_decision: # @submacroblock_mode_decision @@ -2195,11 +2186,14 @@ submacroblock_mode_decision: # @submacroblock_mode_decision add.d $a0, $a0, $a3 st.d $a0, $sp, 112 # 8-byte Folded Spill movgr2fr.d $fs0, $zero - lu12i.w $a1, 524287 - pcalau12i $a0, %pc_hi20(.LCPI8_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI8_0) - ori $a0, $a1, 4095 + lu12i.w $a0, 524287 + ori $a0, $a0, 4095 st.d $a0, $sp, 360 # 8-byte Folded Spill + lu12i.w $a0, 236040 + ori $a0, $a0, 3306 + lu32i.d $a0, -442791 + lu52i.d $a0, $a0, 1122 + movgr2fr.d $fs1, $a0 st.d $s7, $sp, 328 # 8-byte Folded Spill st.d $s5, $sp, 440 # 8-byte Folded Spill st.d $s4, $sp, 368 # 8-byte Folded Spill @@ -2713,8 +2707,8 @@ submacroblock_mode_decision: # @submacroblock_mode_decision st.w $a4, $a1, 376 ld.d $a1, $s5, 0 ldptr.w $a1, $a1, 4168 - ld.w $s4, $sp, 564 - st.d $s4, $sp, 280 # 8-byte Folded Spill + ld.w $a2, $sp, 564 + st.d $a2, $sp, 280 # 8-byte Folded Spill beqz $a1, .LBB8_79 # %bb.50: # %.preheader362 # in Loop: Header=BB8_6 Depth=1 @@ -2823,8 +2817,8 @@ submacroblock_mode_decision: # @submacroblock_mode_decision ld.d $t8, $sp, 184 # 8-byte Folded Reload ld.d $fp, $sp, 176 # 8-byte Folded Reload ld.d $s0, $sp, 168 # 8-byte Folded Reload - ld.d $s5, $sp, 160 # 8-byte Folded Reload - ld.d $s7, $sp, 152 # 8-byte Folded Reload + ld.d $s4, $sp, 160 # 8-byte Folded Reload + ld.d $s5, $sp, 152 # 8-byte Folded Reload b .LBB8_52 .p2align 4, , 16 .LBB8_51: # in Loop: Header=BB8_52 Depth=2 @@ -2953,7 +2947,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision ld.w $t1, $a0, 176 ld.w $t2, $a0, 20 .LBB8_70: # in Loop: Header=BB8_52 Depth=2 - add.w $t3, $t1, $s5 + add.w $t3, $t1, $s4 slli.d $t4, $t3, 1 ldx.h $t4, $a7, $t4 st.h $t4, $a2, -514 @@ -2971,7 +2965,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision ld.w $t1, $a0, 176 ld.w $t2, $a0, 20 .LBB8_73: # in Loop: Header=BB8_52 Depth=2 - add.w $t0, $t1, $s7 + add.w $t0, $t1, $s5 slli.d $t1, $t0, 1 ldx.h $a7, $a7, $t1 st.h $a7, $a2, -512 @@ -3100,7 +3094,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision bge $a1, $a0, .LBB8_4 b .LBB8_49 .LBB8_89: - move $s2, $s1 + move $s8, $s1 ld.d $a0, $sp, 376 # 8-byte Folded Reload beqz $a0, .LBB8_92 # %bb.90: # %.thread354 @@ -3110,7 +3104,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision ldptr.w $a0, $a0, 4168 lu12i.w $s0, 3 ld.d $s1, $sp, 56 # 8-byte Folded Reload - ld.d $s8, $sp, 280 # 8-byte Folded Reload + ld.d $s4, $sp, 280 # 8-byte Folded Reload bnez $a0, .LBB8_99 # %bb.91: # %.thread356 ld.w $a0, $s1, 0 @@ -3129,7 +3123,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision ld.d $a0, $a0, 0 ldptr.w $a0, $a0, 4168 lu12i.w $s0, 3 - ld.d $s8, $sp, 280 # 8-byte Folded Reload + ld.d $s4, $sp, 280 # 8-byte Folded Reload bnez $a0, .LBB8_99 .LBB8_93: # %.preheader361 ld.d $a0, $sp, 304 # 8-byte Folded Reload @@ -3153,7 +3147,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision move $a2, $fp pcaddu18i $ra, %call36(LumaResidualCoding8x8) jirl $ra, $ra, 0 - move $s8, $a0 + move $s4, $a0 pcalau12i $a0, %got_pc_hi20(cbp_blk8x8) ld.d $a0, $a0, %got_pc_lo12(cbp_blk8x8) ld.w $a1, $a0, 0 @@ -3166,8 +3160,8 @@ submacroblock_mode_decision: # @submacroblock_mode_decision st.w $a1, $a0, 0 slli.d $fp, $fp, 3 ldx.d $a0, $a2, $fp - ld.d $s4, $sp, 232 # 8-byte Folded Reload - ld.d $a1, $s4, 0 + ld.d $s2, $sp, 232 # 8-byte Folded Reload + ld.d $a1, $s2, 0 ld.d $a2, $a0, 0 ld.d $a0, $a1, 0 ld.d $a1, $a2, 0 @@ -3177,7 +3171,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision ld.d $a0, $s7, 0 ldptr.d $a0, $a0, 14160 ldx.d $a0, $a0, $fp - ld.d $a1, $s4, 0 + ld.d $a1, $s2, 0 ld.d $a2, $a0, 0 ld.d $a0, $a1, 8 ld.d $a1, $a2, 8 @@ -3187,7 +3181,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision ld.d $a0, $s7, 0 ldptr.d $a0, $a0, 14160 ldx.d $a0, $a0, $fp - ld.d $a1, $s4, 8 + ld.d $a1, $s2, 8 ld.d $a2, $a0, 8 ld.d $a0, $a1, 0 ld.d $a1, $a2, 0 @@ -3197,7 +3191,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision ld.d $a0, $s7, 0 ldptr.d $a0, $a0, 14160 ldx.d $a0, $a0, $fp - ld.d $a1, $s4, 8 + ld.d $a1, $s2, 8 ld.d $a2, $a0, 8 ld.d $a0, $a1, 8 ld.d $a1, $a2, 8 @@ -3207,7 +3201,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision ld.d $a0, $s7, 0 ldptr.d $a0, $a0, 14160 ldx.d $a0, $a0, $fp - ld.d $a1, $s4, 16 + ld.d $a1, $s2, 16 ld.d $a2, $a0, 16 ld.d $a0, $a1, 0 ld.d $a1, $a2, 0 @@ -3217,7 +3211,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision ld.d $a0, $s7, 0 ldptr.d $a0, $a0, 14160 ldx.d $a0, $a0, $fp - ld.d $a1, $s4, 16 + ld.d $a1, $s2, 16 ld.d $a2, $a0, 16 ld.d $a0, $a1, 8 ld.d $a1, $a2, 8 @@ -3227,7 +3221,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision ld.d $a0, $s7, 0 ldptr.d $a0, $a0, 14160 ldx.d $a0, $a0, $fp - ld.d $a1, $s4, 24 + ld.d $a1, $s2, 24 ld.d $a2, $a0, 24 ld.d $a0, $a1, 0 ld.d $a1, $a2, 0 @@ -3237,7 +3231,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision ld.d $a0, $s7, 0 ldptr.d $a0, $a0, 14160 ldx.d $a0, $a0, $fp - ld.d $a1, $s4, 24 + ld.d $a1, $s2, 24 ld.d $a2, $a0, 24 ld.d $a0, $a1, 8 ld.d $a1, $a2, 8 @@ -3292,7 +3286,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision ld.w $t1, $s6, %pc_lo12(si_frame_indicator) bnez $t1, .LBB8_94 # %bb.97: # in Loop: Header=BB8_95 Depth=1 - ld.d $t1, $s2, %pc_lo12(lrec) + ld.d $t1, $s8, %pc_lo12(lrec) ld.w $t2, $t0, 180 ld.w $t0, $t0, 176 alsl.d $t1, $t2, $t1, 3 @@ -3311,7 +3305,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision .LBB8_98: ld.d $fp, $sp, 320 # 8-byte Folded Reload .LBB8_99: # %.loopexit360 - beqz $s8, .LBB8_101 + beqz $s4, .LBB8_101 # %bb.100: ori $a0, $zero, 1 sll.w $a0, $a0, $fp @@ -3323,7 +3317,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision pcalau12i $a0, %got_pc_hi20(cnt_nonz_8x8) ld.d $a0, $a0, %got_pc_lo12(cnt_nonz_8x8) ld.w $a1, $a0, 0 - add.d $a1, $a1, $s8 + add.d $a1, $a1, $s4 st.w $a1, $a0, 0 .LBB8_101: ld.d $a0, $sp, 376 # 8-byte Folded Reload @@ -3500,7 +3494,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision ld.w $a7, $s6, %pc_lo12(si_frame_indicator) bnez $a7, .LBB8_114 # %bb.117: # in Loop: Header=BB8_115 Depth=1 - ld.d $a7, $s2, %pc_lo12(lrec) + ld.d $a7, $s8, %pc_lo12(lrec) ld.w $t0, $a5, 180 alsl.d $a7, $t0, $a7, 3 ldx.d $a7, $a7, $a0 @@ -3544,12 +3538,7 @@ submacroblock_mode_decision: # @submacroblock_mode_decision .Lfunc_end8: .size submacroblock_mode_decision, .Lfunc_end8-submacroblock_mode_decision # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function get_initial_mb16x16_cost -.LCPI9_0: - .dword 0x4080000000000000 # double 512 - .text - .globl get_initial_mb16x16_cost + .globl get_initial_mb16x16_cost # -- Begin function get_initial_mb16x16_cost .p2align 5 .type get_initial_mb16x16_cost,@function get_initial_mb16x16_cost: # @get_initial_mb16x16_cost @@ -3559,27 +3548,26 @@ get_initial_mb16x16_cost: # @get_initial_mb16x16_cost pcalau12i $a0, %got_pc_hi20(img) ld.d $a0, $a0, %got_pc_lo12(img) ld.d $a0, $a0, 0 - ld.w $a2, $a0, 12 - ldptr.d $a1, $a0, 14224 + ld.w $a1, $a0, 12 + ldptr.d $a2, $a0, 14224 ori $a3, $zero, 536 - mul.d $a3, $a2, $a3 - add.d $a1, $a1, $a3 - ld.d $a4, $a1, 64 - ld.d $a3, $a1, 56 - pcalau12i $a1, %pc_hi20(.LCPI9_0) - beqz $a4, .LBB9_3 + mul.d $a3, $a1, $a3 + add.d $a2, $a2, $a3 + ld.d $a3, $a2, 64 + ld.d $a2, $a2, 56 + beqz $a3, .LBB9_3 # %bb.1: - pcalau12i $a4, %pc_hi20(mb16x16_cost_frame) - ld.d $a4, $a4, %pc_lo12(mb16x16_cost_frame) - alsl.d $a5, $a2, $a4, 3 - fld.d $fa0, $a5, -8 - beqz $a3, .LBB9_5 + pcalau12i $a3, %pc_hi20(mb16x16_cost_frame) + ld.d $a3, $a3, %pc_lo12(mb16x16_cost_frame) + alsl.d $a4, $a1, $a3, 3 + fld.d $fa0, $a4, -8 + beqz $a2, .LBB9_5 # %bb.2: - ld.w $a3, $a0, 52 - srli.d $a3, $a3, 4 - sub.w $a2, $a2, $a3 - slli.d $a2, $a2, 3 - fldx.d $fa1, $a4, $a2 + ld.w $a2, $a0, 52 + srli.d $a2, $a2, 4 + sub.w $a1, $a1, $a2 + slli.d $a1, $a1, 3 + fldx.d $fa1, $a3, $a1 fadd.d $fa0, $fa0, $fa1 vldi $vr1, -912 fadd.d $fa0, $fa0, $fa1 @@ -3587,40 +3575,42 @@ get_initial_mb16x16_cost: # @get_initial_mb16x16_cost fmul.d $fa0, $fa0, $fa1 b .LBB9_5 .LBB9_3: - beqz $a3, .LBB9_7 + beqz $a2, .LBB9_7 # %bb.4: - ld.w $a3, $a0, 52 - pcalau12i $a4, %pc_hi20(mb16x16_cost_frame) - ld.d $a4, $a4, %pc_lo12(mb16x16_cost_frame) - srli.d $a3, $a3, 4 - sub.w $a2, $a2, $a3 - slli.d $a2, $a2, 3 - fldx.d $fa0, $a4, $a2 + ld.w $a2, $a0, 52 + pcalau12i $a3, %pc_hi20(mb16x16_cost_frame) + ld.d $a3, $a3, %pc_lo12(mb16x16_cost_frame) + srli.d $a2, $a2, 4 + sub.w $a1, $a1, $a2 + slli.d $a1, $a1, 3 + fldx.d $fa0, $a3, $a1 .LBB9_5: - pcalau12i $a2, %got_pc_hi20(mb16x16_cost) - ld.d $a2, $a2, %got_pc_lo12(mb16x16_cost) - fld.d $fa1, $a1, %pc_lo12(.LCPI9_0) + pcalau12i $a1, %got_pc_hi20(mb16x16_cost) + ld.d $a1, $a1, %got_pc_lo12(mb16x16_cost) + lu52i.d $a2, $zero, 1032 + movgr2fr.d $fa1, $a2 fcmp.clt.d $fcc0, $fa0, $fa1 - fst.d $fa0, $a2, 0 + fst.d $fa0, $a1, 0 bceqz $fcc0, .LBB9_8 # %bb.6: vldi $vr0, -912 b .LBB9_9 .LBB9_7: # %.thread7 - pcalau12i $a2, %got_pc_hi20(mb16x16_cost) - ld.d $a2, $a2, %got_pc_lo12(mb16x16_cost) - fld.d $fa0, $a1, %pc_lo12(.LCPI9_0) - lu52i.d $a3, $zero, 1032 - st.d $a3, $a2, 0 + pcalau12i $a1, %got_pc_hi20(mb16x16_cost) + ld.d $a1, $a1, %got_pc_lo12(mb16x16_cost) + lu52i.d $a2, $zero, 1032 + st.d $a2, $a1, 0 + movgr2fr.d $fa0, $a2 .LBB9_8: - ld.w $a2, $a0, 20 - ldptr.d $a3, $a0, 15504 - slli.d $a2, $a2, 3 + ld.w $a1, $a0, 20 + ldptr.d $a2, $a0, 15504 + slli.d $a1, $a1, 3 ld.w $a0, $a0, 36 - ldx.d $a2, $a3, $a2 + ldx.d $a1, $a2, $a1 slli.d $a0, $a0, 3 - fldx.d $fa1, $a2, $a0 - fld.d $fa2, $a1, %pc_lo12(.LCPI9_0) + fldx.d $fa1, $a1, $a0 + lu52i.d $a0, $zero, 1032 + movgr2fr.d $fa2, $a0 fmul.d $fa1, $fa1, $fa2 fdiv.d $fa1, $fa0, $fa1 fsqrt.d $fa0, $fa1 @@ -3642,12 +3632,7 @@ get_initial_mb16x16_cost: # @get_initial_mb16x16_cost .Lfunc_end9: .size get_initial_mb16x16_cost, .Lfunc_end9-get_initial_mb16x16_cost # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function adjust_mb16x16_cost -.LCPI10_0: - .dword 0x4080000000000000 # double 512 - .text - .globl adjust_mb16x16_cost + .globl adjust_mb16x16_cost # -- Begin function adjust_mb16x16_cost .p2align 5 .type adjust_mb16x16_cost,@function adjust_mb16x16_cost: # @adjust_mb16x16_cost @@ -3668,8 +3653,8 @@ adjust_mb16x16_cost: # @adjust_mb16x16_cost slli.d $a3, $a3, 3 fstx.d $fa0, $a2, $a3 fld.d $fa0, $a1, 0 - pcalau12i $a1, %pc_hi20(.LCPI10_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI10_0) + lu52i.d $a1, $zero, 1032 + movgr2fr.d $fa1, $a1 fcmp.clt.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB10_2 # %bb.1: diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/mv-search.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/mv-search.s index 04a644de..b91e5673 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/mv-search.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/mv-search.s @@ -640,14 +640,8 @@ SetMotionVectorPredictor: # @SetMotionVectorPredictor .word .LBB0_91-.LJTI0_1 .word .LBB0_92-.LJTI0_1 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Init_Motion_Search_Module -.LCPI1_0: - .dword 0x3fe62e42fefa39ef # double 0.69314718055994529 -.LCPI1_1: - .dword 0x3ddb7cdfd9d7bdbb # double 1.0E-10 .text - .globl Init_Motion_Search_Module + .globl Init_Motion_Search_Module # -- Begin function Init_Motion_Search_Module .p2align 5 .type Init_Motion_Search_Module,@function Init_Motion_Search_Module: # @Init_Motion_Search_Module @@ -694,11 +688,17 @@ Init_Motion_Search_Module: # @Init_Motion_Search_Module ffint.d.l $fa0, $fa0 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI1_0) - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI1_1) + lu12i.w $a0, -4189 + ori $a0, $a0, 2543 + lu32i.d $a0, 405058 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs1, $a0 fdiv.d $fa0, $fa0, $fs1 + lu12i.w $a0, -156293 + ori $a0, $a0, 3515 + lu32i.d $a0, -295713 + lu52i.d $a0, $a0, 989 + movgr2fr.d $fs2, $a0 fadd.d $fa0, $fa0, $fs2 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/ratectl.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/ratectl.s index cc4ba66a..d38d2cd0 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/ratectl.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/ratectl.s @@ -854,12 +854,7 @@ QP2Qstep: # @QP2Qstep .Lfunc_end2: .size QP2Qstep, .Lfunc_end2-QP2Qstep # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Qstep2QP -.LCPI3_0: - .dword 0x406c000000000000 # double 224 - .text - .globl Qstep2QP + .globl Qstep2QP # -- Begin function Qstep2QP .p2align 5 .type Qstep2QP,@function Qstep2QP: # @Qstep2QP @@ -871,67 +866,63 @@ Qstep2QP: # @Qstep2QP move $a0, $zero ret .LBB3_2: # %.lr.ph.i.preheader - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_0) + ori $a0, $zero, 0 + lu32i.d $a0, -262144 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa1, $fa0 ori $a0, $zero, 51 - bceqz $fcc0, .LBB3_4 -# %bb.3: - ret -.LBB3_4: # %.preheader + bcnez $fcc0, .LBB3_13 +# %bb.3: # %.preheader vldi $vr1, -910 move $a0, $zero fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB3_7 -# %bb.5: # %.lr.ph.preheader + bcnez $fcc0, .LBB3_6 +# %bb.4: # %.lr.ph.preheader vldi $vr1, -928 vldi $vr2, -910 .p2align 4, , 16 -.LBB3_6: # %.lr.ph +.LBB3_5: # %.lr.ph # =>This Inner Loop Header: Depth=1 fmul.d $fa0, $fa0, $fa1 fcmp.clt.d $fcc0, $fa2, $fa0 addi.d $a0, $a0, 6 - bcnez $fcc0, .LBB3_6 -.LBB3_7: # %._crit_edge + bcnez $fcc0, .LBB3_5 +.LBB3_6: # %._crit_edge vldi $vr1, -923 fcmp.cle.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB3_13 -# %bb.8: + bcnez $fcc0, .LBB3_11 +# %bb.7: vldi $vr1, -920 fcmp.cle.d $fcc0, $fa0, $fa1 ori $a1, $zero, 1 bcnez $fcc0, .LBB3_12 -# %bb.9: +# %bb.8: vldi $vr1, -917 fcmp.cle.d $fcc0, $fa0, $fa1 ori $a1, $zero, 2 bcnez $fcc0, .LBB3_12 -# %bb.10: +# %bb.9: vldi $vr1, -914 fcmp.cle.d $fcc0, $fa0, $fa1 ori $a1, $zero, 3 bcnez $fcc0, .LBB3_12 -# %bb.11: +# %bb.10: vldi $vr1, -911 fcmp.cult.d $fcc0, $fa1, $fa0 movcf2gr $a1, $fcc0 addi.d $a1, $a1, 4 + b .LBB3_12 +.LBB3_11: + move $a1, $zero .LBB3_12: add.w $a0, $a1, $a0 - ret .LBB3_13: - add.w $a0, $zero, $a0 ret .Lfunc_end3: .size Qstep2QP, .Lfunc_end3-Qstep2QP # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ComputeFrameMAD -.LCPI4_0: - .dword 0x4070000000000000 # double 256 - .text - .globl ComputeFrameMAD + .globl ComputeFrameMAD # -- Begin function ComputeFrameMAD .p2align 5 .type ComputeFrameMAD,@function ComputeFrameMAD: # @ComputeFrameMAD @@ -1000,11 +991,11 @@ ComputeFrameMAD: # @ComputeFrameMAD movgr2fr.d $fa0, $a3 ffint.d.l $fa0, $fa0 .LBB4_10: # %._crit_edge - pcalau12i $a1, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI4_0) + movgr2fr.d $fa1, $a0 + ffint.d.l $fa1, $fa1 + lu52i.d $a0, $zero, 1031 movgr2fr.d $fa2, $a0 - ffint.d.l $fa2, $fa2 - fmul.d $fa1, $fa2, $fa1 + fmul.d $fa1, $fa1, $fa2 fdiv.d $fa0, $fa0, $fa1 ret .Lfunc_end4: diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rc_quadratic.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rc_quadratic.s index 4db29e77..d67c9793 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rc_quadratic.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rc_quadratic.s @@ -257,26 +257,7 @@ rc_free: # @rc_free .Lfunc_end2: .size rc_free, .Lfunc_end2-rc_free # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function rc_init_seq -.LCPI3_0: - .dword 0x3feccccccccccccd # double 0.90000000000000002 -.LCPI3_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI3_2: - .dword 0x3fe3333333333333 # double 0.59999999999999998 -.LCPI3_3: - .dword 0x3fd3333333333333 # double 0.29999999999999999 -.LCPI3_4: - .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI3_5: - .dword 0x3ff3333333333333 # double 1.2 -.LCPI3_6: - .dword 0x4003333333333333 # double 2.3999999999999999 -.LCPI3_7: - .dword 0x3ff6666666666666 # double 1.3999999999999999 - .text - .globl rc_init_seq + .globl rc_init_seq # -- Begin function rc_init_seq .p2align 5 .type rc_init_seq,@function rc_init_seq: # @rc_init_seq @@ -355,15 +336,18 @@ rc_init_seq: # @rc_init_seq st.w $zero, $s3, 40 st.w $zero, $s3, 80 slt $a0, $zero, $a0 - pcalau12i $a1, %pc_hi20(.LCPI3_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI3_0) - vldi $vr2, -944 - vldi $vr3, -928 + vldi $vr1, -944 + vldi $vr2, -928 movgr2cf $fcc0, $a0 - fsel $fa2, $fa3, $fa2, $fcc0 - fsel $fa1, $fa3, $fa1, $fcc0 - fst.d $fa2, $fp, 16 - fst.d $fa1, $fp, 24 + fsel $fa1, $fa2, $fa1, $fcc0 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, -209716 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa3, $a0 + fsel $fa2, $fa2, $fa3, $fcc0 + fst.d $fa1, $fp, 16 + fst.d $fa2, $fp, 24 st.w $zero, $fp, 1348 fcvt.d.s $fs1, $fa0 fst.d $fs1, $fp, 1312 @@ -383,49 +367,58 @@ rc_init_seq: # @rc_init_seq pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $zero, 2 - st.w $a0, $fp, 1596 ld.w $a1, $fp, 1388 + st.w $a0, $fp, 1596 st.w $zero, $fp, 1372 - st.w $zero, $fp, 1380 ori $a2, $zero, 8 slt $a1, $a2, $a1 lu12i.w $a2, 3 ori $a2, $a2, 3048 ldx.w $a2, $s2, $a2 + st.w $zero, $fp, 1380 sub.d $a0, $a0, $a1 - ld.w $a1, $s2, 52 st.w $a0, $fp, 1440 st.w $a2, $fp, 1444 st.w $zero, $s3, 4 - pcalau12i $a0, %pc_hi20(.LCPI3_2) - beq $a1, $s4, .LBB3_10 + ld.w $a3, $s2, 52 + lu12i.w $a0, -419431 + ori $a2, $a0, 2458 + lu32i.d $a2, -419431 + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 209715 + lu52i.d $a1, $a0, 1022 + beq $a3, $s4, .LBB3_10 # %bb.8: - ori $a2, $zero, 352 - bne $a1, $a2, .LBB3_11 + ori $a4, $zero, 352 + bne $a3, $a4, .LBB3_11 # %bb.9: - pcalau12i $a1, %pc_hi20(.LCPI3_4) - fld.d $fa1, $a1, %pc_lo12(.LCPI3_4) - pcalau12i $a1, %pc_hi20(.LCPI3_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI3_5) - fld.d $fa2, $a0, %pc_lo12(.LCPI3_2) + lu52i.d $a2, $a2, 1020 + movgr2fr.d $fa1, $a2 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa0, $a0 + movgr2fr.d $fa2, $a1 ldptr.w $a0, $s1, 5124 bnez $a0, .LBB3_16 b .LBB3_12 .LBB3_10: - pcalau12i $a1, %pc_hi20(.LCPI3_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI3_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI3_2) - pcalau12i $a0, %pc_hi20(.LCPI3_3) - fld.d $fa2, $a0, %pc_lo12(.LCPI3_3) + lu52i.d $a2, $a2, 1019 + movgr2fr.d $fa1, $a2 + movgr2fr.d $fa0, $a1 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 ldptr.w $a0, $s1, 5124 bnez $a0, .LBB3_16 b .LBB3_12 .LBB3_11: - fld.d $fa1, $a0, %pc_lo12(.LCPI3_2) - pcalau12i $a0, %pc_hi20(.LCPI3_6) - fld.d $fa0, $a0, %pc_lo12(.LCPI3_6) - pcalau12i $a0, %pc_hi20(.LCPI3_7) - fld.d $fa2, $a0, %pc_lo12(.LCPI3_7) + movgr2fr.d $fa1, $a1 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa0, $a0 + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, 419430 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa2, $a0 ldptr.w $a0, $s1, 5124 bnez $a0, .LBB3_16 .LBB3_12: @@ -2831,12 +2824,7 @@ updateQPRC3: # @updateQPRC3 .Lfunc_end7: .size updateQPRC3, .Lfunc_end7-updateQPRC3 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function rc_init_GOP -.LCPI8_0: - .dword 0x4000624dd2f1a9fc # double 2.048 - .text - .globl rc_init_GOP + .globl rc_init_GOP # -- Begin function rc_init_GOP .p2align 5 .type rc_init_GOP,@function rc_init_GOP: # @rc_init_GOP @@ -3113,12 +3101,15 @@ rc_init_GOP: # @rc_init_GOP fadd.s $fa3, $fa4, $fa3 ftintrz.w.s $fa3, $fa3 movfr2gr.s $a5, $fa3 - pcalau12i $a7, %pc_hi20(.LCPI8_0) - fld.d $fa3, $a7, %pc_lo12(.LCPI8_0) st.w $a5, $a0, 1564 ffint.d.w $fa2, $fa2 - fcvt.d.s $fa4, $fa0 - fmadd.d $fa2, $fa4, $fa3, $fa2 + fcvt.d.s $fa3, $fa0 + lu12i.w $a5, -184550 + ori $a5, $a5, 2556 + lu32i.d $a5, 25165 + lu52i.d $a5, $a5, 1024 + movgr2fr.d $fa4, $a5 + fmadd.d $fa2, $fa3, $fa4, $fa2 ftintrz.w.d $fa2, $fa2 movfr2gr.s $a5, $fa2 st.w $a5, $a0, 1556 @@ -3285,12 +3276,7 @@ rc_init_GOP: # @rc_init_GOP .Lfunc_end8: .size rc_init_GOP, .Lfunc_end8-rc_init_GOP # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function rc_init_pict -.LCPI9_0: - .dword 0x3fe3333333333333 # double 0.59999999999999998 - .text - .globl rc_init_pict + .globl rc_init_pict # -- Begin function rc_init_pict .p2align 5 .type rc_init_pict,@function rc_init_pict: # @rc_init_pict @@ -4022,12 +4008,15 @@ rc_init_pict: # @rc_init_pict beqz $a2, .LBB9_111 # %bb.110: ld.w $a1, $a0, 1540 - pcalau12i $a2, %pc_hi20(.LCPI9_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI9_0) st.w $zero, $a0, 1552 - movgr2fr.w $fa1, $a1 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 + lu12i.w $a1, 209715 + ori $a1, $a1, 819 + lu32i.d $a1, 209715 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 ori $a2, $zero, 1536 @@ -4058,12 +4047,7 @@ rc_init_pict: # @rc_init_pict .Lfunc_end9: .size rc_init_pict, .Lfunc_end9-rc_init_pict # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function rc_update_pict -.LCPI10_0: - .word 0x3f666666 # float 0.899999976 - .text - .globl rc_update_pict + .globl rc_update_pict # -- Begin function rc_update_pict .p2align 5 .type rc_update_pict,@function rc_update_pict: # @rc_update_pict @@ -4092,12 +4076,13 @@ rc_update_pict: # @rc_update_pict sub.d $a1, $a1, $a4 st.w $a1, $a0, 1564 sub.d $a1, $a5, $a4 - pcalau12i $a2, %pc_hi20(.LCPI10_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI10_0) st.w $a1, $a0, 1556 + movgr2fr.w $fa0, $a1 + ffint.s.w $fa0, $fa0 + lu12i.w $a1, 259686 + ori $a1, $a1, 1638 movgr2fr.w $fa1, $a1 - ffint.s.w $fa1, $fa1 - fmul.s $fa0, $fa1, $fa0 + fmul.s $fa0, $fa0, $fa1 ftintrz.w.s $fa0, $fa0 movfr2gr.s $a1, $fa0 st.w $a1, $a0, 1560 @@ -4195,12 +4180,7 @@ updatePparams: # @updatePparams .Lfunc_end12: .size updatePparams, .Lfunc_end12-updatePparams # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function updateBparams -.LCPI13_0: - .word 0x3fae8a72 # float 1.36360002 - .text - .globl updateBparams + .globl updateBparams # -- Begin function updateBparams .p2align 5 .type updateBparams,@function updateBparams: # @updateBparams @@ -4210,8 +4190,9 @@ updateBparams: # @updateBparams addi.d $a2, $a2, -1 st.w $a2, $a0, 1548 movgr2fr.w $fa0, $a1 - pcalau12i $a1, %pc_hi20(.LCPI13_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI13_0) + lu12i.w $a1, 260840 + ori $a1, $a1, 2674 + movgr2fr.w $fa1, $a1 ld.w $a1, $a0, 1360 pcalau12i $a2, %pc_hi20(generic_RC) ld.d $a2, $a2, %pc_lo12(generic_RC) @@ -4228,12 +4209,7 @@ updateBparams: # @updateBparams .Lfunc_end13: .size updateBparams, .Lfunc_end13-updateBparams # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function rc_update_pict_frame -.LCPI14_0: - .word 0x3fae8a72 # float 1.36360002 - .text - .globl rc_update_pict_frame + .globl rc_update_pict_frame # -- Begin function rc_update_pict_frame .p2align 5 .type rc_update_pict_frame,@function rc_update_pict_frame: # @rc_update_pict_frame @@ -4373,8 +4349,9 @@ rc_update_pict_frame: # @rc_update_pict_frame addi.d $a2, $a2, -1 st.w $a2, $a0, 1548 movgr2fr.w $fa0, $a1 - pcalau12i $a1, %pc_hi20(.LCPI14_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI14_0) + lu12i.w $a1, 260840 + ori $a1, $a1, 2674 + movgr2fr.w $fa1, $a1 ld.w $a1, $a0, 1360 pcalau12i $a2, %pc_hi20(generic_RC) ld.d $a2, $a2, %pc_lo12(generic_RC) @@ -4410,8 +4387,9 @@ rc_update_pict_frame: # @rc_update_pict_frame st.w $a4, $a0, 1548 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI14_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI14_0) + lu12i.w $a1, 260840 + ori $a1, $a1, 2674 + movgr2fr.w $fa1, $a1 ld.w $a4, $a0, 1360 pcalau12i $a1, %pc_hi20(generic_RC) ld.d $a1, $a1, %pc_lo12(generic_RC) @@ -5161,12 +5139,7 @@ updateRCModel: # @updateRCModel .Lfunc_end15: .size updateRCModel, .Lfunc_end15-updateRCModel # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function RCModelEstimator -.LCPI16_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl RCModelEstimator + .globl RCModelEstimator # -- Begin function RCModelEstimator .p2align 5 .type RCModelEstimator,@function RCModelEstimator: # @RCModelEstimator @@ -5323,12 +5296,15 @@ RCModelEstimator: # @RCModelEstimator b .LBB16_20 .LBB16_23: # %._crit_edge128 fneg.d $fa5, $fa4 - pcalau12i $a1, %pc_hi20(.LCPI16_0) - fld.d $fa7, $a1, %pc_lo12(.LCPI16_0) fmul.d $fa6, $fa4, $fa5 fmadd.d $fa6, $fa3, $fa0, $fa6 - fabs.d $ft0, $fa6 - fcmp.cule.d $fcc0, $ft0, $fa7 + fabs.d $fa7, $fa6 + lu12i.w $a1, -390306 + ori $a1, $a1, 3469 + lu32i.d $a1, 50935 + lu52i.d $a1, $a1, 1003 + movgr2fr.d $ft0, $a1 + fcmp.cule.d $fcc0, $fa7, $ft0 bcnez $fcc0, .LBB16_25 # %bb.24: fneg.d $fa4, $fa4 @@ -5741,12 +5717,7 @@ updateMADModel: # @updateMADModel .Lfunc_end17: .size updateMADModel, .Lfunc_end17-updateMADModel # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function MADModelEstimator -.LCPI18_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl MADModelEstimator + .globl MADModelEstimator # -- Begin function MADModelEstimator .p2align 5 .type MADModelEstimator,@function MADModelEstimator: # @MADModelEstimator @@ -5900,12 +5871,15 @@ MADModelEstimator: # @MADModelEstimator b .LBB18_20 .LBB18_23: # %._crit_edge128 fneg.d $fa5, $fa4 - pcalau12i $a1, %pc_hi20(.LCPI18_0) - fld.d $fa7, $a1, %pc_lo12(.LCPI18_0) fmul.d $fa6, $fa4, $fa5 fmadd.d $fa6, $fa2, $fa3, $fa6 - fabs.d $ft0, $fa6 - fcmp.cule.d $fcc0, $ft0, $fa7 + fabs.d $fa7, $fa6 + lu12i.w $a1, -390306 + ori $a1, $a1, 3469 + lu32i.d $a1, 50935 + lu52i.d $a1, $a1, 1003 + movgr2fr.d $ft0, $a1 + fcmp.cule.d $fcc0, $fa7, $ft0 bcnez $fcc0, .LBB18_25 # %bb.24: fneg.d $fa4, $fa4 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rdopt.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rdopt.s index b23f8de8..d2b36417 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rdopt.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rdopt.s @@ -2865,12 +2865,7 @@ RDCost_for_4x4IntraBlocks: # @RDCost_for_4x4IntraBlocks .Lfunc_end4: .size RDCost_for_4x4IntraBlocks, .Lfunc_end4-RDCost_for_4x4IntraBlocks # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Mode_Decision_for_4x4IntraBlocks -.LCPI5_0: - .dword 0x46293e5939a08cea # double 1.0E+30 - .text - .globl Mode_Decision_for_4x4IntraBlocks + .globl Mode_Decision_for_4x4IntraBlocks # -- Begin function Mode_Decision_for_4x4IntraBlocks .p2align 5 .type Mode_Decision_for_4x4IntraBlocks,@function Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks @@ -2985,7 +2980,7 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks ld.w $a2, $sp, 324 add.w $a3, $s2, $s0 st.d $a3, $sp, 216 # 8-byte Folded Spill - add.w $fp, $s3, $fp + add.w $s4, $s3, $fp vld $vr0, $sp, 192 # 16-byte Folded Reload ftintrz.w.d $fa0, $fa0 beqz $a2, .LBB5_11 @@ -3004,7 +2999,7 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks srai.d $a2, $s2, 2 st.d $a2, $sp, 8 # 8-byte Folded Spill movfr2gr.s $a2, $fa0 - st.d $a2, $sp, 112 # 8-byte Folded Spill + st.d $a2, $sp, 104 # 8-byte Folded Spill or $a2, $a1, $a4 slti $a2, $a2, 0 slt $a3, $a1, $a4 @@ -3022,15 +3017,15 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks addi.d $a2, $sp, 356 addi.d $a3, $sp, 352 addi.d $a4, $sp, 348 - st.d $a0, $sp, 128 # 8-byte Folded Spill + st.d $a0, $sp, 120 # 8-byte Folded Spill move $a1, $s2 pcaddu18i $ra, %call36(intrapred_luma) jirl $ra, $ra, 0 move $s5, $zero move $s7, $zero st.d $zero, $sp, 152 # 8-byte Folded Spill - st.d $zero, $sp, 120 # 8-byte Folded Spill - addi.w $s4, $s0, 0 + st.d $zero, $sp, 112 # 8-byte Folded Spill + addi.w $fp, $s0, 0 st.d $s3, $sp, 168 # 8-byte Folded Spill bstrpick.d $a0, $s3, 31, 0 ld.d $s3, $sp, 136 # 8-byte Folded Reload @@ -3043,10 +3038,13 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks addi.d $a2, $s2, 2 addi.d $a3, $s2, 3 slli.d $a0, $a0, 9 - st.d $a0, $sp, 104 # 8-byte Folded Spill + st.d $a0, $sp, 96 # 8-byte Folded Spill addi.w $s2, $zero, -3 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fs2, $a0, %pc_lo12(.LCPI5_0) + lu12i.w $a0, 236040 + ori $a0, $a0, 3306 + lu32i.d $a0, -442791 + lu52i.d $a0, $a0, 1122 + movgr2fr.d $fs2, $a0 ori $a4, $zero, 1024 ori $a5, $zero, 9 vrepli.b $vr0, 0 @@ -3065,7 +3063,7 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks # kill: killed $r4 # implicit-def: $r4 # kill: killed $r4 - st.d $s4, $sp, 96 # 8-byte Folded Spill + st.d $fp, $sp, 128 # 8-byte Folded Spill st.d $s0, $sp, 88 # 8-byte Folded Spill b .LBB5_16 .LBB5_12: # %.loopexit247 @@ -3140,7 +3138,7 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks # %bb.29: # in Loop: Header=BB5_16 Depth=1 ldptr.w $a3, $a1, 4168 pcalau12i $a0, %pc_hi20(imgY_org) - slli.d $a2, $fp, 3 + slli.d $a2, $s4, 3 ld.d $a1, $sp, 216 # 8-byte Folded Reload slli.d $a1, $a1, 1 beqz $a3, .LBB5_36 @@ -3151,7 +3149,7 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks ld.d $a4, $a4, 208 slli.d $a5, $s3, 5 add.d $a3, $a3, $a5 - alsl.d $a3, $s4, $a3, 1 + alsl.d $a3, $fp, $a3, 1 stptr.d $a4, $a3, 12624 ld.d $a3, $a0, %pc_lo12(imgY_org) ldx.d $a2, $a3, $a2 @@ -3172,10 +3170,10 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks ld.d $a4, $sp, 176 # 8-byte Folded Reload slli.d $a4, $a4, 5 add.d $a3, $a3, $a4 - alsl.d $a3, $s4, $a3, 1 + alsl.d $a3, $fp, $a3, 1 stptr.d $a2, $a3, 12624 ld.d $a2, $a0, %pc_lo12(imgY_org) - alsl.d $a2, $fp, $a2, 3 + alsl.d $a2, $s4, $a2, 3 ld.d $a2, $a2, 8 ld.d $a3, $s6, 0 ldx.d $a2, $a2, $a1 @@ -3192,10 +3190,10 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks ld.d $a4, $sp, 184 # 8-byte Folded Reload slli.d $a4, $a4, 5 add.d $a3, $a3, $a4 - alsl.d $a3, $s4, $a3, 1 + alsl.d $a3, $fp, $a3, 1 stptr.d $a2, $a3, 12624 ld.d $a2, $a0, %pc_lo12(imgY_org) - alsl.d $a2, $fp, $a2, 3 + alsl.d $a2, $s4, $a2, 3 ld.d $a2, $a2, 16 ld.d $a3, $s6, 0 ldx.d $a2, $a2, $a1 @@ -3211,10 +3209,10 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks ld.d $a2, $a4, 304 slli.d $a4, $s0, 5 add.d $a3, $a3, $a4 - alsl.d $a3, $s4, $a3, 1 + alsl.d $a3, $fp, $a3, 1 stptr.d $a2, $a3, 12624 ld.d $a0, $a0, %pc_lo12(imgY_org) - alsl.d $a0, $fp, $a0, 3 + alsl.d $a0, $s4, $a0, 3 ld.d $a0, $a0, 24 ld.d $a2, $s6, 0 ldx.d $a0, $a0, $a1 @@ -3243,8 +3241,8 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks ld.d $a0, $s6, 0 ldptr.d $a0, $a0, 14160 ld.d $a1, $sp, 208 # 8-byte Folded Reload - slli.d $s4, $a1, 3 - ldx.d $a0, $a0, $s4 + slli.d $fp, $a1, 3 + ldx.d $a0, $a0, $fp pcalau12i $a1, %pc_hi20(cofAC4x4) ld.d $s0, $a1, %pc_lo12(cofAC4x4) ld.d $a1, $sp, 224 # 8-byte Folded Reload @@ -3257,7 +3255,7 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks jirl $ra, $ra, 0 ld.d $a0, $s6, 0 ldptr.d $a0, $a0, 14160 - ldx.d $a0, $a0, $s4 + ldx.d $a0, $a0, $fp ldx.d $a1, $a0, $s3 ld.d $a0, $s0, 8 ld.d $a1, $a1, 8 @@ -3272,7 +3270,7 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks ldptr.d $a1, $a1, 6440 ld.d $a2, $sp, 160 # 8-byte Folded Reload slli.d $a2, $a2, 3 - ld.d $a6, $sp, 128 # 8-byte Folded Reload + ld.d $a6, $sp, 120 # 8-byte Folded Reload slli.d $a3, $a6, 1 ori $a5, $zero, 3 bne $a4, $a5, .LBB5_38 @@ -3328,7 +3326,7 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks ldx.d $a2, $a2, $a1 add.d $a3, $a3, $s5 ld.d $a4, $a3, 208 - alsl.d $a5, $fp, $a0, 3 + alsl.d $a5, $s4, $a0, 3 vinsgr2vr.d $vr0, $a2, 0 vld $vr2, $sp, 192 # 16-byte Folded Reload vilvl.h $vr0, $vr2, $vr0 @@ -3365,10 +3363,10 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks vilvl.h $vr1, $vr2, $vr1 vsub.w $vr0, $vr0, $vr1 vst $vr0, $a0, 48 - ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload xor $a1, $a1, $s5 sltui $a1, $a1, 1 - ld.d $a2, $sp, 112 # 8-byte Folded Reload + ld.d $a2, $sp, 104 # 8-byte Folded Reload masknez $s8, $a2, $a1 pcaddu18i $ra, %call36(distortion4x4) jirl $ra, $ra, 0 @@ -3400,9 +3398,9 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks st.d $a1, $sp, 32 # 8-byte Folded Spill ldptr.w $a1, $a0, 15260 ld.w $a2, $sp, 360 - st.d $a2, $sp, 120 # 8-byte Folded Spill + st.d $a2, $sp, 112 # 8-byte Folded Spill ld.d $s3, $sp, 136 # 8-byte Folded Reload - ld.d $s4, $sp, 96 # 8-byte Folded Reload + ld.d $fp, $sp, 128 # 8-byte Folded Reload ld.d $s0, $sp, 88 # 8-byte Folded Reload beqz $a1, .LBB5_12 # %bb.40: # %.preheader246 @@ -3414,12 +3412,12 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks slli.d $a2, $s3, 3 ldx.d $a0, $a0, $a2 ldx.d $a2, $a1, $a2 - slli.d $a3, $s4, 2 + slli.d $a3, $fp, 2 ldx.d $a4, $a0, $a3 - alsl.d $a0, $s4, $a0, 2 + alsl.d $a0, $fp, $a0, 2 stx.d $a4, $a2, $a3 ld.d $a0, $a0, 8 - alsl.d $a2, $s4, $a2, 2 + alsl.d $a2, $fp, $a2, 2 st.d $a0, $a2, 8 ld.d $a0, $s6, 0 ldptr.d $a0, $a0, 14176 @@ -3500,6 +3498,7 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks ld.d $a0, $s6, 0 ldptr.d $a0, $a0, 14160 ldx.d $a0, $a0, $fp + ld.d $fp, $sp, 128 # 8-byte Folded Reload ldx.d $a0, $a0, $s0 ld.d $a0, $a0, 8 ld.d $a1, $s1, 8 @@ -3513,7 +3512,7 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks ld.d $a2, $sp, 160 # 8-byte Folded Reload slli.d $a6, $a2, 3 ldx.d $a2, $a1, $a6 - ld.d $t1, $sp, 128 # 8-byte Folded Reload + ld.d $t1, $sp, 120 # 8-byte Folded Reload slli.d $a1, $t1, 1 ld.d $a3, $sp, 32 # 8-byte Folded Reload stx.d $a3, $a2, $a1 @@ -3523,7 +3522,7 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks ld.d $a3, $a3, 208 slli.d $a5, $s3, 5 add.d $a2, $a2, $a5 - alsl.d $a2, $s4, $a2, 1 + alsl.d $a2, $fp, $a2, 1 stptr.d $a3, $a2, 12624 ld.d $a2, $s6, 0 ld.w $a7, $a2, 20 @@ -3557,7 +3556,7 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks ld.d $t0, $sp, 176 # 8-byte Folded Reload slli.d $t0, $t0, 5 add.d $a6, $a6, $t0 - alsl.d $a6, $s4, $a6, 1 + alsl.d $a6, $fp, $a6, 1 stptr.d $a7, $a6, 12624 ld.d $a6, $s6, 0 ld.w $a6, $a6, 20 @@ -3590,7 +3589,7 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks ld.d $a7, $sp, 184 # 8-byte Folded Reload slli.d $a7, $a7, 5 add.d $a5, $a5, $a7 - alsl.d $a5, $s4, $a5, 1 + alsl.d $a5, $fp, $a5, 1 stptr.d $a6, $a5, 12624 ld.d $a5, $s6, 0 ld.w $a6, $a5, 20 @@ -3622,7 +3621,7 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks ld.d $a1, $a1, 304 slli.d $a4, $t0, 5 add.d $a0, $a0, $a4 - alsl.d $a0, $s4, $a0, 1 + alsl.d $a0, $fp, $a0, 1 stptr.d $a1, $a0, 12624 ld.d $a0, $s6, 0 ld.w $a1, $a0, 20 @@ -3653,12 +3652,12 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks slli.d $a2, $s3, 3 ldx.d $a3, $a1, $a2 ldx.d $a0, $a0, $a2 - slli.d $a2, $s4, 2 + slli.d $a2, $fp, 2 ldx.d $a4, $a3, $a2 - alsl.d $a3, $s4, $a3, 2 + alsl.d $a3, $fp, $a3, 2 stx.d $a4, $a0, $a2 ld.d $a3, $a3, 8 - alsl.d $a0, $s4, $a0, 2 + alsl.d $a0, $fp, $a0, 2 st.d $a3, $a0, 8 ld.d $a0, $s6, 0 ldptr.d $a0, $a0, 14176 @@ -3687,31 +3686,31 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks vldx $vr0, $a1, $a2 vstx $vr0, $a0, $a2 .LBB5_60: # %.loopexit - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload b .LBB5_62 .LBB5_61: # %.preheader - ld.d $a0, $s6, 0 + ld.d $s1, $s6, 0 slli.d $a1, $s5, 9 - add.d $a1, $a0, $a1 + add.d $a1, $s1, $a1 slli.d $a2, $s3, 5 - add.d $a2, $a0, $a2 + add.d $a2, $s1, $a2 pcalau12i $a3, %pc_hi20(imgY_org) ld.d $a4, $a3, %pc_lo12(imgY_org) - lu12i.w $s1, 3 - ori $a3, $s1, 336 + lu12i.w $s2, 3 + ori $a3, $s2, 336 add.d $t3, $a2, $a3 ld.hu $a6, $a1, 208 - slli.d $a3, $fp, 3 + slli.d $a3, $s4, 3 ldx.d $t4, $a4, $a3 - slli.d $a5, $s4, 1 + slli.d $a5, $fp, 1 stx.h $a6, $t3, $a5 - ld.d $a3, $sp, 216 # 8-byte Folded Reload - slli.d $a3, $a3, 1 + ld.d $a0, $sp, 216 # 8-byte Folded Reload + slli.d $a3, $a0, 1 ldx.hu $a7, $t4, $a3 - alsl.d $t1, $fp, $a4, 3 + alsl.d $t1, $s4, $a4, 3 ld.hu $t2, $a1, 210 sub.d $a4, $a7, $a6 - stptr.w $a4, $a0, 13136 + stptr.w $a4, $s1, 13136 addi.d $a6, $a5, 2 stx.h $t2, $t3, $a6 ld.hu $t5, $a1, 212 @@ -3722,88 +3721,88 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks addi.d $a4, $a3, 4 ldx.hu $t7, $t4, $a4 sub.d $t2, $t6, $t2 - stptr.w $t2, $a0, 13140 + stptr.w $t2, $s1, 13140 ld.hu $t6, $a1, 214 sub.d $t2, $t7, $t5 - stptr.w $t2, $a0, 13144 + stptr.w $t2, $s1, 13144 addi.d $t2, $a5, 6 stx.h $t6, $t3, $t2 addi.d $t3, $a3, 6 ldx.hu $t4, $t4, $t3 ld.hu $t5, $a1, 240 - ori $t7, $s1, 368 + ori $t7, $s2, 368 add.d $t7, $a2, $t7 ld.d $t8, $t1, 8 stx.h $t5, $t7, $a5 + move $a0, $fp ld.hu $fp, $a1, 242 sub.d $t4, $t4, $t6 ldx.hu $t6, $t8, $a3 - stptr.w $t4, $a0, 13148 + stptr.w $t4, $s1, 13148 stx.h $fp, $t7, $a6 ld.hu $t4, $a1, 244 sub.d $t5, $t6, $t5 ldx.hu $t6, $t8, $a7 - stptr.w $t5, $a0, 13200 + stptr.w $t5, $s1, 13200 stx.h $t4, $t7, $t0 ldx.hu $t5, $t8, $a4 sub.d $t6, $t6, $fp ld.hu $fp, $a1, 246 - stptr.w $t6, $a0, 13204 + stptr.w $t6, $s1, 13204 sub.d $t4, $t5, $t4 - stptr.w $t4, $a0, 13208 + stptr.w $t4, $s1, 13208 stx.h $fp, $t7, $t2 ldx.hu $t4, $t8, $t3 ld.hu $t5, $a1, 272 - ori $t6, $s1, 400 + ori $t6, $s2, 400 add.d $t6, $a2, $t6 ld.d $t7, $t1, 16 stx.h $t5, $t6, $a5 ld.hu $t8, $a1, 274 sub.d $t4, $t4, $fp ldx.hu $fp, $t7, $a3 - stptr.w $t4, $a0, 13212 + stptr.w $t4, $s1, 13212 stx.h $t8, $t6, $a6 ld.hu $t4, $a1, 276 sub.d $t5, $fp, $t5 - stptr.w $t5, $a0, 13264 + stptr.w $t5, $s1, 13264 ldx.hu $t5, $t7, $a7 stx.h $t4, $t6, $t0 ldx.hu $fp, $t7, $a4 ld.hu $s0, $a1, 278 sub.d $t5, $t5, $t8 - stptr.w $t5, $a0, 13268 + stptr.w $t5, $s1, 13268 sub.d $t4, $fp, $t4 stx.h $s0, $t6, $t2 ldx.hu $t5, $t7, $t3 ld.hu $t6, $a1, 304 - ori $t7, $s1, 432 + ori $t7, $s2, 432 add.d $a2, $a2, $t7 ld.d $t1, $t1, 24 stx.h $t6, $a2, $a5 ld.hu $a5, $a1, 306 - stptr.w $t4, $a0, 13272 + stptr.w $t4, $s1, 13272 sub.d $t4, $t5, $s0 ldx.hu $a3, $t1, $a3 stx.h $a5, $a2, $a6 ld.hu $a6, $a1, 308 - stptr.w $t4, $a0, 13276 + stptr.w $t4, $s1, 13276 sub.d $a3, $a3, $t6 ldx.hu $a7, $t1, $a7 stx.h $a6, $a2, $t0 ld.hu $a1, $a1, 310 - stptr.w $a3, $a0, 13328 + stptr.w $a3, $s1, 13328 sub.d $a3, $a7, $a5 ldx.hu $a4, $t1, $a4 stx.h $a1, $a2, $t2 ldx.hu $a2, $t1, $t3 - stptr.w $a3, $a0, 13332 + stptr.w $a3, $s1, 13332 sub.d $a3, $a4, $a6 - stptr.w $a3, $a0, 13336 + stptr.w $a3, $s1, 13336 sub.d $a1, $a2, $a1 - stptr.w $a1, $a0, 13340 + stptr.w $a1, $s1, 13340 addi.d $a2, $sp, 364 ori $a3, $zero, 1 - move $a0, $s4 move $a1, $s3 pcaddu18i $ra, %call36(dct_luma) jirl $ra, $ra, 0 @@ -3827,12 +3826,7 @@ Mode_Decision_for_4x4IntraBlocks: # @Mode_Decision_for_4x4IntraBlocks .Lfunc_end5: .size Mode_Decision_for_4x4IntraBlocks, .Lfunc_end5-Mode_Decision_for_4x4IntraBlocks # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Mode_Decision_for_8x8IntraBlocks -.LCPI6_0: - .dword 0x3fdffe5c91d14e3c # double 0.49990000000000001 - .text - .globl Mode_Decision_for_8x8IntraBlocks + .globl Mode_Decision_for_8x8IntraBlocks # -- Begin function Mode_Decision_for_8x8IntraBlocks .p2align 5 .type Mode_Decision_for_8x8IntraBlocks,@function Mode_Decision_for_8x8IntraBlocks: # @Mode_Decision_for_8x8IntraBlocks @@ -3845,12 +3839,15 @@ Mode_Decision_for_8x8IntraBlocks: # @Mode_Decision_for_8x8IntraBlocks st.d $s2, $sp, 40 # 8-byte Folded Spill st.d $s3, $sp, 32 # 8-byte Folded Spill fst.d $fs0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a2, %pc_hi20(.LCPI6_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI6_0) move $fp, $a1 fmov.d $fs0, $fa0 move $s0, $a0 vldi $vr0, -1000 + lu12i.w $a0, -451308 + ori $a0, $a0, 3644 + lu32i.d $a0, -420 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa1, $a0 fmadd.d $fa0, $fs0, $fa0, $fa1 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 @@ -3916,12 +3913,7 @@ Mode_Decision_for_8x8IntraBlocks: # @Mode_Decision_for_8x8IntraBlocks .Lfunc_end6: .size Mode_Decision_for_8x8IntraBlocks, .Lfunc_end6-Mode_Decision_for_8x8IntraBlocks # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Mode_Decision_for_Intra4x4Macroblock -.LCPI7_0: - .dword 0x3fdffe5c91d14e3c # double 0.49990000000000001 - .text - .globl Mode_Decision_for_Intra4x4Macroblock + .globl Mode_Decision_for_Intra4x4Macroblock # -- Begin function Mode_Decision_for_Intra4x4Macroblock .p2align 5 .type Mode_Decision_for_Intra4x4Macroblock,@function Mode_Decision_for_Intra4x4Macroblock: # @Mode_Decision_for_Intra4x4Macroblock @@ -3937,12 +3929,15 @@ Mode_Decision_for_Intra4x4Macroblock: # @Mode_Decision_for_Intra4x4Macroblock st.d $s5, $sp, 32 # 8-byte Folded Spill st.d $s6, $sp, 24 # 8-byte Folded Spill fst.d $fs0, $sp, 16 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI7_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI7_0) move $fp, $a0 fmov.d $fs0, $fa0 st.w $zero, $a0, 0 vldi $vr0, -1000 + lu12i.w $a0, -451308 + ori $a0, $a0, 3644 + lu32i.d $a0, -420 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa1, $a0 fmadd.d $fa0, $fs0, $fa0, $fa1 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 @@ -4146,12 +4141,7 @@ Mode_Decision_for_Intra4x4Macroblock: # @Mode_Decision_for_Intra4x4Macroblock .Lfunc_end7: .size Mode_Decision_for_Intra4x4Macroblock, .Lfunc_end7-Mode_Decision_for_Intra4x4Macroblock # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function RDCost_for_8x8blocks -.LCPI8_0: - .dword 0x4415af1d78b58c40 # double 1.0E+20 - .text - .globl RDCost_for_8x8blocks + .globl RDCost_for_8x8blocks # -- Begin function RDCost_for_8x8blocks .p2align 5 .type RDCost_for_8x8blocks,@function RDCost_for_8x8blocks: # @RDCost_for_8x8blocks @@ -4436,8 +4426,11 @@ RDCost_for_8x8blocks: # @RDCost_for_8x8blocks beqz $s3, .LBB8_24 b .LBB8_36 .LBB8_15: - pcalau12i $a0, %pc_hi20(.LCPI8_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI8_0) + lu12i.w $a0, 494424 + ori $a0, $a0, 3136 + lu32i.d $a0, 372509 + lu52i.d $a0, $a0, 1089 + movgr2fr.d $fa0, $a0 b .LBB8_41 .LBB8_16: # %.preheader st.d $s4, $sp, 24 # 8-byte Folded Spill diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rdpicdecision.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rdpicdecision.s index 7afa695a..98f021f6 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rdpicdecision.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/rdpicdecision.s @@ -23,12 +23,7 @@ rd_pic_decision: # @rd_pic_decision .Lfunc_end0: .size rd_pic_decision, .Lfunc_end0-rd_pic_decision # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function picture_coding_decision -.LCPI1_0: - .dword 0x3fe5c28f5c28f5c3 # double 0.68000000000000005 - .text - .globl picture_coding_decision + .globl picture_coding_decision # -- Begin function picture_coding_decision .p2align 5 .type picture_coding_decision,@function picture_coding_decision: # @picture_coding_decision @@ -57,8 +52,11 @@ picture_coding_decision: # @picture_coding_decision fdiv.d $fa0, $fa0, $fa1 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_0) + lu12i.w $a0, 377487 + ori $a0, $a0, 1475 + lu32i.d $a0, 377487 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 addi.w $a0, $zero, -3 lu32i.d $a0, 0 @@ -78,8 +76,11 @@ picture_coding_decision: # @picture_coding_decision fdiv.d $fa0, $fa0, $fa1 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_0) + lu12i.w $a0, 377487 + ori $a0, $a0, 1475 + lu32i.d $a0, 377487 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 .LBB1_3: fmul.d $fa0, $fa0, $fa1 fld.s $fa1, $s0, 812 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/slice.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/slice.s index d81776b5..61e2cba8 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/slice.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/slice.s @@ -376,12 +376,7 @@ terminate_slice: # @terminate_slice .Lfunc_end2: .size terminate_slice, .Lfunc_end2-terminate_slice # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function encode_one_slice -.LCPI3_0: - .dword 0x7fefffffffffffff # double 1.7976931348623157E+308 - .text - .globl encode_one_slice + .globl encode_one_slice # -- Begin function encode_one_slice .p2align 5 .type encode_one_slice,@function encode_one_slice: # @encode_one_slice @@ -588,7 +583,7 @@ encode_one_slice: # @encode_one_slice ld.d $a2, $a0, %got_pc_lo12(dummy_slice_too_big) ld.w $a0, $s1, 16 st.d $a2, $s1, 112 - ld.d $fp, $sp, 104 # 8-byte Folded Reload + ld.d $s6, $sp, 104 # 8-byte Folded Reload blez $a0, .LBB3_28 # %bb.20: # %.lr.ph.i ld.d $a2, $s1, 24 @@ -647,7 +642,7 @@ encode_one_slice: # @encode_one_slice .LBB3_28: # %._crit_edge.i pcalau12i $s5, %pc_hi20(active_pps) ld.d $a0, $s5, %pc_lo12(active_pps) - ld.d $a3, $fp, 0 + ld.d $a3, $s6, 0 ld.w $a2, $a0, 184 ld.w $a0, $a0, 188 ldptr.w $a5, $a3, 5084 @@ -671,7 +666,6 @@ encode_one_slice: # @encode_one_slice .LBB3_31: # %.sink.split.i stptr.w $a4, $a1, 14456 .LBB3_32: - lu12i.w $s7, 3 ld.w $a0, $a1, 20 ori $a5, $zero, 3 beq $a0, $a5, .LBB3_35 @@ -728,7 +722,7 @@ encode_one_slice: # @encode_one_slice ld.d $a0, $s8, 0 ld.d $a2, $s3, 0 ldptr.w $a3, $a0, 15360 - ld.d $a1, $fp, 0 + ld.d $a1, $s6, 0 stptr.d $a2, $a0, 14456 pcalau12i $s4, %pc_hi20(active_sps) beqz $a3, .LBB3_46 @@ -746,8 +740,7 @@ encode_one_slice: # @encode_one_slice ldptr.w $a0, $a0, 15332 pcaddu18i $ra, %call36(poc_based_ref_management) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 # 8-byte Folded Reload - ld.d $a1, $a0, 0 + ld.d $a1, $s6, 0 .LBB3_46: ld.w $a0, $a1, 1564 beqz $a0, .LBB3_59 @@ -825,12 +818,13 @@ encode_one_slice: # @encode_one_slice beqz $a3, .LBB3_63 .LBB3_61: ld.w $a1, $a0, 24 - beqz $a1, .LBB3_80 + beqz $a1, .LBB3_81 # %bb.62: ld.w $a0, $a0, 20 ori $a1, $zero, 2 - bne $a0, $a1, .LBB3_81 - b .LBB3_91 + lu12i.w $s1, 3 + bne $a0, $a1, .LBB3_82 + b .LBB3_92 .LBB3_63: ldptr.w $a1, $a1, 4000 beqz $a1, .LBB3_61 @@ -871,7 +865,9 @@ encode_one_slice: # @encode_one_slice ori $a4, $zero, 1 bne $a3, $a4, .LBB3_75 # %bb.71: # %.preheader.i + move $t0, $s6 ldptr.w $a3, $a0, 14460 + lu12i.w $s6, 3 bltz $a3, .LBB3_78 # %bb.72: # %.lr.ph136.i ld.d $a1, $s1, 88 @@ -892,13 +888,15 @@ encode_one_slice: # @encode_one_slice blt $a5, $a7, .LBB3_73 # %bb.74: # %thread-pre-split.loopexit.i ld.w $a3, $a0, 20 + move $s6, $t0 .LBB3_75: # %thread-pre-split.i beq $a3, $a2, .LBB3_61 # %bb.76: # %thread-pre-split.i ori $a1, $zero, 4 beq $a3, $a1, .LBB3_61 # %bb.77: # %thread-pre-split.i.thread-pre-split.thread.i_crit_edge - ori $a1, $s7, 2168 + lu12i.w $s6, 3 + ori $a1, $s6, 2168 ldx.w $a1, $a0, $a1 .LBB3_78: # %thread-pre-split.thread.i pcalau12i $a0, %got_pc_hi20(listX) @@ -911,7 +909,7 @@ encode_one_slice: # @encode_one_slice jirl $ra, $ra, 0 ld.d $a1, $s8, 0 ld.d $a0, $fp, 0 - ori $a2, $s7, 2168 + ori $a2, $s6, 2168 ldx.w $a1, $a1, $a2 ld.d $a3, $s1, 56 ld.d $a4, $s1, 64 @@ -925,7 +923,7 @@ encode_one_slice: # @encode_one_slice ld.d $a0, $s8, 0 ld.w $a1, $a0, 20 ori $a2, $zero, 1 - bne $a1, $a2, .LBB3_61 + bne $a1, $a2, .LBB3_80 # %bb.79: ldptr.w $a1, $a0, 14460 ld.d $a0, $fp, 8 @@ -936,7 +934,7 @@ encode_one_slice: # @encode_one_slice jirl $ra, $ra, 0 ld.d $a1, $s8, 0 ld.d $a0, $fp, 8 - ori $a2, $s7, 2172 + ori $a2, $s6, 2172 ldx.w $a1, $a1, $a2 ld.d $a3, $s1, 88 ld.d $a4, $s1, 96 @@ -946,55 +944,57 @@ encode_one_slice: # @encode_one_slice pcaddu18i $ra, %call36(reorder_ref_pic_list) jirl $ra, $ra, 0 ld.d $a0, $s8, 0 - b .LBB3_61 .LBB3_80: + ld.d $s6, $sp, 104 # 8-byte Folded Reload + b .LBB3_61 +.LBB3_81: pcaddu18i $ra, %call36(init_mbaff_lists) jirl $ra, $ra, 0 ld.d $a0, $s8, 0 ld.w $a0, $a0, 20 ori $a1, $zero, 2 - beq $a0, $a1, .LBB3_91 -.LBB3_81: + lu12i.w $s1, 3 + beq $a0, $a1, .LBB3_92 +.LBB3_82: ld.d $a1, $s5, %pc_lo12(active_pps) ld.w $a3, $a1, 192 ori $a2, $zero, 1 - bne $a3, $a2, .LBB3_88 -# %bb.82: - ori $a1, $zero, 3 - beq $a0, $a1, .LBB3_84 + bne $a3, $a2, .LBB3_89 # %bb.83: - bnez $a0, .LBB3_90 -.LBB3_84: - ld.d $a0, $sp, 104 # 8-byte Folded Reload - ld.d $a0, $a0, 0 + ori $a1, $zero, 3 + beq $a0, $a1, .LBB3_85 +# %bb.84: + bnez $a0, .LBB3_91 +.LBB3_85: + ld.d $a0, $s6, 0 ld.w $a1, $a0, 1576 - beqz $a1, .LBB3_212 -# %bb.85: - ldptr.w $a0, $a0, 2940 - beqz $a0, .LBB3_212 + beqz $a1, .LBB3_214 # %bb.86: + ldptr.w $a0, $a0, 2940 + beqz $a0, .LBB3_214 +# %bb.87: pcalau12i $a0, %got_pc_hi20(enc_picture) ld.d $a0, $a0, %got_pc_lo12(enc_picture) ld.d $a0, $a0, 0 pcalau12i $a1, %got_pc_hi20(enc_frame_picture2) ld.d $a1, $a1, %got_pc_lo12(enc_frame_picture2) ld.d $a1, $a1, 0 - beq $a0, $a1, .LBB3_212 -# %bb.87: + beq $a0, $a1, .LBB3_214 +# %bb.88: ori $a0, $zero, 1 - b .LBB3_213 -.LBB3_88: - bne $a0, $a2, .LBB3_91 -# %bb.89: + b .LBB3_215 +.LBB3_89: + bne $a0, $a2, .LBB3_92 +# %bb.90: ld.w $a0, $a1, 196 - beqz $a0, .LBB3_91 -.LBB3_90: # %.thread.i + beqz $a0, .LBB3_92 +.LBB3_91: # %.thread.i pcaddu18i $ra, %call36(estimate_weighting_factor_B_slice) jirl $ra, $ra, 0 -.LBB3_91: +.LBB3_92: ld.w $a0, $s3, 0 - blez $a0, .LBB3_94 -.LBB3_92: # %.lr.ph.i.i + blez $a0, .LBB3_95 +.LBB3_93: # %.lr.ph.i.i pcalau12i $a1, %got_pc_hi20(listX) ld.d $a1, $a1, %got_pc_lo12(listX) ld.d $a1, $a1, 0 @@ -1004,7 +1004,7 @@ encode_one_slice: # @encode_one_slice addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1145 .p2align 4, , 16 -.LBB3_93: # =>This Inner Loop Header: Depth=1 +.LBB3_94: # =>This Inner Loop Header: Depth=1 ld.d $a3, $a1, 0 ld.w $a4, $a3, 4 ld.w $a5, $a3, 0 @@ -1026,11 +1026,11 @@ encode_one_slice: # @encode_one_slice addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 addi.d $a1, $a1, 8 - bnez $a0, .LBB3_93 -.LBB3_94: # %.preheader45.i.i + bnez $a0, .LBB3_94 +.LBB3_95: # %.preheader45.i.i ld.w $a0, $s3, 4 - blez $a0, .LBB3_97 -# %bb.95: # %.lr.ph48.i.i + blez $a0, .LBB3_98 +# %bb.96: # %.lr.ph48.i.i pcalau12i $a1, %got_pc_hi20(listX) ld.d $a1, $a1, %got_pc_lo12(listX) ld.d $a1, $a1, 8 @@ -1040,7 +1040,7 @@ encode_one_slice: # @encode_one_slice addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1409 .p2align 4, , 16 -.LBB3_96: # =>This Inner Loop Header: Depth=1 +.LBB3_97: # =>This Inner Loop Header: Depth=1 ld.d $a3, $a1, 0 ld.w $a4, $a3, 4 ld.w $a5, $a3, 0 @@ -1062,30 +1062,29 @@ encode_one_slice: # @encode_one_slice addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 addi.d $a1, $a1, 8 - bnez $a0, .LBB3_96 -.LBB3_97: # %._crit_edge.i.i + bnez $a0, .LBB3_97 +.LBB3_98: # %._crit_edge.i.i ld.d $a0, $s4, %pc_lo12(active_sps) ld.w $a1, $a0, 1148 ld.d $a0, $s8, 0 - ld.d $s4, $sp, 104 # 8-byte Folded Reload - bnez $a1, .LBB3_111 -# %bb.98: + bnez $a1, .LBB3_112 +# %bb.99: ld.w $a1, $a0, 24 - bnez $a1, .LBB3_111 -# %bb.99: # %.preheader44.i.i + bnez $a1, .LBB3_112 +# %bb.100: # %.preheader44.i.i pcalau12i $a1, %got_pc_hi20(enc_picture) ld.d $a1, $a1, %got_pc_lo12(enc_picture) ld.d $a1, $a1, 0 ld.w $a3, $s3, 8 addi.d $a2, $a1, 2047 - blez $a3, .LBB3_102 -# %bb.100: # %.lr.ph50.i.i + blez $a3, .LBB3_103 +# %bb.101: # %.lr.ph50.i.i pcalau12i $a4, %got_pc_hi20(listX) ld.d $a4, $a4, %got_pc_lo12(listX) ld.d $a4, $a4, 16 addi.d $a5, $a2, 1673 .p2align 4, , 16 -.LBB3_101: # =>This Inner Loop Header: Depth=1 +.LBB3_102: # =>This Inner Loop Header: Depth=1 ld.d $a6, $a4, 0 ld.w $a7, $a6, 4 ld.w $t0, $a6, 0 @@ -1107,17 +1106,17 @@ encode_one_slice: # @encode_one_slice addi.d $a3, $a3, -1 addi.d $a5, $a5, 8 addi.d $a4, $a4, 8 - bnez $a3, .LBB3_101 -.LBB3_102: # %._crit_edge51.i.i + bnez $a3, .LBB3_102 +.LBB3_103: # %._crit_edge51.i.i ld.w $a3, $s3, 12 - blez $a3, .LBB3_105 -# %bb.103: # %.lr.ph50.1.i.i + blez $a3, .LBB3_106 +# %bb.104: # %.lr.ph50.1.i.i pcalau12i $a4, %got_pc_hi20(listX) ld.d $a4, $a4, %got_pc_lo12(listX) ld.d $a4, $a4, 24 addi.d $a2, $a2, 1937 .p2align 4, , 16 -.LBB3_104: # =>This Inner Loop Header: Depth=1 +.LBB3_105: # =>This Inner Loop Header: Depth=1 ld.d $a5, $a4, 0 ld.w $a6, $a5, 4 ld.w $a7, $a5, 0 @@ -1139,11 +1138,11 @@ encode_one_slice: # @encode_one_slice addi.d $a3, $a3, -1 addi.d $a2, $a2, 8 addi.d $a4, $a4, 8 - bnez $a3, .LBB3_104 -.LBB3_105: # %._crit_edge51.1.i.i + bnez $a3, .LBB3_105 +.LBB3_106: # %._crit_edge51.1.i.i ld.w $a2, $s3, 16 - blez $a2, .LBB3_108 -# %bb.106: # %.lr.ph50.2.i.i + blez $a2, .LBB3_109 +# %bb.107: # %.lr.ph50.2.i.i pcalau12i $a3, %got_pc_hi20(listX) ld.d $a3, $a3, %got_pc_lo12(listX) ld.d $a3, $a3, 32 @@ -1151,7 +1150,7 @@ encode_one_slice: # @encode_one_slice ori $a4, $a4, 152 add.d $a4, $a1, $a4 .p2align 4, , 16 -.LBB3_107: # =>This Inner Loop Header: Depth=1 +.LBB3_108: # =>This Inner Loop Header: Depth=1 ld.d $a5, $a3, 0 ld.w $a6, $a5, 4 ld.w $a7, $a5, 0 @@ -1173,11 +1172,11 @@ encode_one_slice: # @encode_one_slice addi.d $a2, $a2, -1 addi.d $a4, $a4, 8 addi.d $a3, $a3, 8 - bnez $a2, .LBB3_107 -.LBB3_108: # %._crit_edge51.2.i.i + bnez $a2, .LBB3_108 +.LBB3_109: # %._crit_edge51.2.i.i ld.w $a2, $s3, 20 - blez $a2, .LBB3_111 -# %bb.109: # %.lr.ph50.3.i.i + blez $a2, .LBB3_112 +# %bb.110: # %.lr.ph50.3.i.i pcalau12i $a3, %got_pc_hi20(listX) ld.d $a3, $a3, %got_pc_lo12(listX) ld.d $a3, $a3, 40 @@ -1185,7 +1184,7 @@ encode_one_slice: # @encode_one_slice ori $a4, $a4, 416 add.d $a1, $a1, $a4 .p2align 4, , 16 -.LBB3_110: # =>This Inner Loop Header: Depth=1 +.LBB3_111: # =>This Inner Loop Header: Depth=1 ld.d $a4, $a3, 0 ld.w $a5, $a4, 4 ld.w $a6, $a4, 0 @@ -1207,12 +1206,12 @@ encode_one_slice: # @encode_one_slice addi.d $a2, $a2, -1 addi.d $a1, $a1, 8 addi.d $a3, $a3, 8 - bnez $a2, .LBB3_110 -.LBB3_111: # %set_ref_pic_num.exit.i + bnez $a2, .LBB3_111 +.LBB3_112: # %set_ref_pic_num.exit.i ld.w $a1, $a0, 20 ori $a0, $zero, 1 - bne $a1, $a0, .LBB3_113 -# %bb.112: + bne $a1, $a0, .LBB3_114 +# %bb.113: pcalau12i $a0, %got_pc_hi20(Co_located) ld.d $a0, $a0, %got_pc_lo12(Co_located) ld.d $a0, $a0, 0 @@ -1222,15 +1221,15 @@ encode_one_slice: # @encode_one_slice jirl $ra, $ra, 0 ld.d $a0, $s8, 0 ld.w $a1, $a0, 20 -.LBB3_113: - ld.d $a0, $s4, 0 +.LBB3_114: + ld.d $a0, $s6, 0 ori $a2, $zero, 2 - beq $a1, $a2, .LBB3_116 -# %bb.114: + beq $a1, $a2, .LBB3_117 +# %bb.115: ldptr.w $a1, $a0, 5244 ori $a2, $zero, 3 - bne $a1, $a2, .LBB3_116 -# %bb.115: + bne $a1, $a2, .LBB3_117 +# %bb.116: pcalau12i $a0, %got_pc_hi20(EPZSCo_located) ld.d $a0, $a0, %got_pc_lo12(EPZSCo_located) ld.d $a0, $a0, 0 @@ -1238,11 +1237,11 @@ encode_one_slice: # @encode_one_slice ld.d $a1, $a1, %got_pc_lo12(listX) pcaddu18i $ra, %call36(EPZSSliceInit) jirl $ra, $ra, 0 - ld.d $a0, $s4, 0 -.LBB3_116: + ld.d $a0, $s6, 0 +.LBB3_117: ldptr.w $a0, $a0, 4008 - beqz $a0, .LBB3_118 -# %bb.117: # %.loopexit.loopexit139.i + beqz $a0, .LBB3_119 +# %bb.118: # %.loopexit.loopexit139.i pcalau12i $a0, %got_pc_hi20(writeMB_typeInfo) ld.d $a0, $a0, %got_pc_lo12(writeMB_typeInfo) pcalau12i $a1, %got_pc_hi20(writeMB_typeInfo_CABAC) @@ -1313,8 +1312,8 @@ encode_one_slice: # @encode_one_slice ld.d $a3, $a3, %got_pc_lo12(writeCBP_CABAC) pcalau12i $a5, %got_pc_hi20(writeMVD_CABAC) ld.d $a5, $a5, %got_pc_lo12(writeMVD_CABAC) - b .LBB3_132 -.LBB3_118: + b .LBB3_133 +.LBB3_119: pcalau12i $a0, %got_pc_hi20(writeMB_typeInfo) ld.d $a1, $a0, %got_pc_lo12(writeMB_typeInfo) pcalau12i $a0, %got_pc_hi20(writeSE_UVLC) @@ -1333,47 +1332,47 @@ encode_one_slice: # @encode_one_slice pcalau12i $a1, %pc_hi20(.Lswitch.table.encode_one_slice.5) addi.d $a1, $a1, %pc_lo12(.Lswitch.table.encode_one_slice.5) move $a4, $a0 - bltu $a3, $a2, .LBB3_120 -# %bb.119: # %switch.lookup + bltu $a3, $a2, .LBB3_121 +# %bb.120: # %switch.lookup slli.d $a2, $a2, 3 ldx.d $a4, $a1, $a2 -.LBB3_120: +.LBB3_121: pcalau12i $a2, %got_pc_hi20(writeRefFrame) ld.d $a2, $a2, %got_pc_lo12(writeRefFrame) ld.w $a5, $s3, 4 st.d $a4, $a2, 0 move $a4, $a0 - bltu $a3, $a5, .LBB3_122 -# %bb.121: # %switch.lookup175 + bltu $a3, $a5, .LBB3_123 +# %bb.122: # %switch.lookup175 slli.d $a3, $a5, 3 ldx.d $a4, $a1, $a3 -.LBB3_122: +.LBB3_123: ld.w $a5, $s3, 8 ori $a3, $zero, 2 st.d $a4, $a2, 8 move $a4, $a0 - bltu $a3, $a5, .LBB3_124 -# %bb.123: # %switch.lookup178 + bltu $a3, $a5, .LBB3_125 +# %bb.124: # %switch.lookup178 slli.d $a4, $a5, 3 ldx.d $a4, $a1, $a4 -.LBB3_124: +.LBB3_125: ld.w $a5, $s3, 12 st.d $a4, $a2, 16 move $a4, $a0 - bltu $a3, $a5, .LBB3_126 -# %bb.125: # %switch.lookup181 + bltu $a3, $a5, .LBB3_127 +# %bb.126: # %switch.lookup181 slli.d $a3, $a5, 3 ldx.d $a4, $a1, $a3 -.LBB3_126: +.LBB3_127: ld.w $a5, $s3, 16 ori $a6, $zero, 2 st.d $a4, $a2, 24 move $a3, $a0 - bltu $a6, $a5, .LBB3_128 -# %bb.127: # %switch.lookup184 + bltu $a6, $a5, .LBB3_129 +# %bb.128: # %switch.lookup184 slli.d $a3, $a5, 3 ldx.d $a3, $a1, $a3 -.LBB3_128: +.LBB3_129: ld.w $a4, $s3, 20 st.d $a3, $a2, 32 pcalau12i $a2, %got_pc_hi20(writeSE_Flag) @@ -1383,17 +1382,17 @@ encode_one_slice: # @encode_one_slice pcalau12i $a5, %got_pc_hi20(writeSE_SVLC) ld.d $a5, $a5, %got_pc_lo12(writeSE_SVLC) ori $a6, $zero, 3 - bgeu $a4, $a6, .LBB3_130 -# %bb.129: # %switch.lookup187 + bgeu $a4, $a6, .LBB3_131 +# %bb.130: # %switch.lookup187 slli.d $a4, $a4, 3 ldx.d $a4, $a1, $a4 - b .LBB3_131 -.LBB3_130: + b .LBB3_132 +.LBB3_131: move $a4, $a0 -.LBB3_131: # %init_slice.exit +.LBB3_132: # %init_slice.exit move $a6, $a5 move $a1, $a2 -.LBB3_132: # %init_slice.exit +.LBB3_133: # %init_slice.exit pcalau12i $a7, %got_pc_hi20(writeRefFrame) ld.d $a7, $a7, %got_pc_lo12(writeRefFrame) st.d $a4, $a7, 40 @@ -1424,57 +1423,57 @@ encode_one_slice: # @encode_one_slice st.w $a0, $a1, %pc_lo12(Bytes_After_Header) pcaddu18i $ra, %call36(SetLagrangianMultipliers) jirl $ra, $ra, 0 - ld.d $a0, $s4, 0 + ld.d $a0, $s6, 0 ldptr.w $a1, $a0, 4008 ori $a2, $zero, 1 - bne $a1, $a2, .LBB3_134 -# %bb.133: + bne $a1, $a2, .LBB3_135 +# %bb.134: pcaddu18i $ra, %call36(SetCtxModelNumber) jirl $ra, $ra, 0 - ld.d $a0, $s4, 0 -.LBB3_134: + ld.d $a0, $s6, 0 +.LBB3_135: ldptr.w $a1, $a0, 4168 - beqz $a1, .LBB3_137 -# %bb.135: - ldptr.w $a1, $a0, 4732 - ld.d $a0, $s8, 0 beqz $a1, .LBB3_138 # %bb.136: + ldptr.w $a1, $a0, 4732 + ld.d $a0, $s8, 0 + beqz $a1, .LBB3_139 +# %bb.137: ld.w $a1, $a0, 20 sltui $a2, $a1, 1 addi.d $a1, $a1, -3 sltui $a1, $a1, 1 or $a1, $a2, $a1 - b .LBB3_139 -.LBB3_137: # %._crit_edge83 + b .LBB3_140 +.LBB3_138: # %._crit_edge83 ld.d $a0, $s8, 0 -.LBB3_138: - move $a1, $zero .LBB3_139: - ori $a2, $s7, 3304 + move $a1, $zero +.LBB3_140: + ori $a2, $s1, 3304 stx.h $a1, $a0, $a2 pcaddu18i $ra, %call36(start_slice) jirl $ra, $ra, 0 - ld.d $a1, $s4, 0 + ld.d $a1, $s6, 0 ldptr.w $a2, $a1, 5116 ld.d $a1, $s8, 0 pcalau12i $a3, %pc_hi20(generic_RC) - st.d $a3, $sp, 56 # 8-byte Folded Spill - beqz $a2, .LBB3_142 -# %bb.140: - ld.d $a2, $sp, 56 # 8-byte Folded Reload + st.d $a3, $sp, 64 # 8-byte Folded Spill + beqz $a2, .LBB3_143 +# %bb.141: + ld.d $a2, $sp, 64 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(generic_RC) ld.w $a3, $a2, 16 ldptr.w $a4, $a1, 15404 ldptr.w $a5, $a1, 15352 add.d $a3, $a3, $a0 st.w $a3, $a2, 16 - bgeu $a4, $a5, .LBB3_142 -# %bb.141: + bgeu $a4, $a5, .LBB3_143 +# %bb.142: ld.w $a3, $a2, 24 add.d $a3, $a3, $a0 st.w $a3, $a2, 24 -.LBB3_142: +.LBB3_143: pcalau12i $a2, %got_pc_hi20(stats) ld.d $a2, $a2, %got_pc_lo12(stats) ld.d $a2, $a2, 0 @@ -1487,21 +1486,20 @@ encode_one_slice: # @encode_one_slice st.w $a4, $a2, 32 add.d $a0, $a5, $a0 st.d $a0, $a3, 2040 - beqz $a6, .LBB3_144 -# %bb.143: + beqz $a6, .LBB3_145 +# %bb.144: move $fp, $zero - b .LBB3_216 -.LBB3_144: # %.lr.ph.preheader - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI3_0) - pcalau12i $s6, %pc_hi20(rdopt) + b .LBB3_218 +.LBB3_145: # %.lr.ph.preheader + addi.w $s3, $zero, -1 + lu52i.d $a0, $s3, 2046 + movgr2fr.d $fs0, $a0 + pcalau12i $s7, %pc_hi20(rdopt) pcalau12i $a0, %pc_hi20(rddata_top_frame_mb) addi.d $a0, $a0, %pc_lo12(rddata_top_frame_mb) st.d $a0, $sp, 96 # 8-byte Folded Spill - pcalau12i $s3, %pc_hi20(encode_one_macroblock) + pcalau12i $s2, %pc_hi20(encode_one_macroblock) ori $fp, $zero, 1 - addi.w $a0, $zero, -1 - st.d $a0, $sp, 88 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(errortext) addi.d $a0, $a0, %pc_lo12(errortext) st.d $a0, $sp, 24 # 8-byte Folded Spill @@ -1510,10 +1508,10 @@ encode_one_slice: # @encode_one_slice st.d $a0, $sp, 16 # 8-byte Folded Spill lu12i.w $a0, 1 ori $a1, $a0, 612 - st.d $a1, $sp, 80 # 8-byte Folded Spill - ori $s2, $zero, 2 + st.d $a1, $sp, 88 # 8-byte Folded Spill + ori $s5, $zero, 2 ori $a0, $a0, 1020 - st.d $a0, $sp, 64 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(quadratic_RC_init) st.d $a0, $sp, 8 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(quadratic_RC) @@ -1521,112 +1519,110 @@ encode_one_slice: # @encode_one_slice pcalau12i $a0, %pc_hi20(rddata_bot_frame_mb) addi.d $a0, $a0, %pc_lo12(rddata_bot_frame_mb) st.d $a0, $sp, 48 # 8-byte Folded Spill - st.d $zero, $sp, 72 # 8-byte Folded Spill - pcalau12i $s5, %pc_hi20(MBPairIsField) - b .LBB3_148 -.LBB3_145: # in Loop: Header=BB3_148 Depth=1 + st.d $zero, $sp, 80 # 8-byte Folded Spill + st.d $s3, $sp, 56 # 8-byte Folded Spill + b .LBB3_149 +.LBB3_146: # in Loop: Header=BB3_149 Depth=1 move $a0, $s0 pcaddu18i $ra, %call36(FmoGetNextMBNr) jirl $ra, $ra, 0 move $s0, $a0 - ld.d $a0, $sp, 88 # 8-byte Folded Reload - beq $s0, $a0, .LBB3_211 -.LBB3_146: # in Loop: Header=BB3_148 Depth=1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + beq $a0, $s3, .LBB3_213 +.LBB3_147: # in Loop: Header=BB3_149 Depth=1 + ld.d $a0, $sp, 80 # 8-byte Folded Reload addi.w $a0, $a0, 1 - st.d $a0, $sp, 72 # 8-byte Folded Spill + st.d $a0, $sp, 80 # 8-byte Folded Spill pcaddu18i $ra, %call36(proceed2nextMacroblock) jirl $ra, $ra, 0 .p2align 4, , 16 -.LBB3_147: # in Loop: Header=BB3_148 Depth=1 +.LBB3_148: # in Loop: Header=BB3_149 Depth=1 ld.w $a0, $sp, 116 - bnez $a0, .LBB3_215 -.LBB3_148: # %.lr.ph + bnez $a0, .LBB3_217 +.LBB3_149: # %.lr.ph # =>This Inner Loop Header: Depth=1 ld.d $a0, $s8, 0 ldptr.w $a1, $a0, 15260 - beqz $a1, .LBB3_153 -# %bb.149: # in Loop: Header=BB3_148 Depth=1 - ld.d $a1, $s4, 0 + beqz $a1, .LBB3_154 +# %bb.150: # in Loop: Header=BB3_149 Depth=1 + ld.d $a1, $s6, 0 ldptr.w $a1, $a1, 5656 - beqz $a1, .LBB3_153 -# %bb.150: # in Loop: Header=BB3_148 Depth=1 + beqz $a1, .LBB3_154 +# %bb.151: # in Loop: Header=BB3_149 Depth=1 ld.w $a0, $a0, 12 mod.w $a0, $a0, $a1 - bnez $a0, .LBB3_153 -# %bb.151: # in Loop: Header=BB3_148 Depth=1 + bnez $a0, .LBB3_154 +# %bb.152: # in Loop: Header=BB3_149 Depth=1 pcaddu18i $ra, %call36(CalculateOffsetParam) jirl $ra, $ra, 0 - ld.d $a0, $s4, 0 + ld.d $a0, $s6, 0 ldptr.w $a0, $a0, 5100 - beqz $a0, .LBB3_153 -# %bb.152: # in Loop: Header=BB3_148 Depth=1 + beqz $a0, .LBB3_154 +# %bb.153: # in Loop: Header=BB3_149 Depth=1 pcaddu18i $ra, %call36(CalculateOffset8Param) jirl $ra, $ra, 0 .p2align 4, , 16 -.LBB3_153: # in Loop: Header=BB3_148 Depth=1 +.LBB3_154: # in Loop: Header=BB3_149 Depth=1 ld.d $a2, $s8, 0 ldptr.w $a0, $a2, 15268 - beqz $a0, .LBB3_183 -# %bb.154: # in Loop: Header=BB3_148 Depth=1 - ld.d $a0, $s4, 0 - ld.d $a1, $sp, 80 # 8-byte Folded Reload + beqz $a0, .LBB3_184 +# %bb.155: # in Loop: Header=BB3_149 Depth=1 + ld.d $a0, $s6, 0 + ld.d $a1, $sp, 88 # 8-byte Folded Reload ldx.wu $a1, $a0, $a1 stptr.w $zero, $a2, 15408 bstrpick.d $a3, $a1, 31, 1 slli.w $a3, $a3, 1 addi.w $s4, $s0, 1 - bne $a3, $s2, .LBB3_171 -# %bb.155: # in Loop: Header=BB3_148 Depth=1 + bne $a3, $s5, .LBB3_172 +# %bb.156: # in Loop: Header=BB3_149 Depth=1 stptr.d $zero, $a2, 14464 - ld.d $a3, $sp, 64 # 8-byte Folded Reload + ld.d $a3, $sp, 72 # 8-byte Folded Reload ldx.w $a3, $a0, $a3 st.w $zero, $sp, 112 stptr.w $zero, $a2, 15412 - bne $a1, $s2, .LBB3_160 -# %bb.156: # in Loop: Header=BB3_148 Depth=1 - beqz $a3, .LBB3_160 -# %bb.157: # in Loop: Header=BB3_148 Depth=1 + bne $a1, $s5, .LBB3_161 +# %bb.157: # in Loop: Header=BB3_149 Depth=1 + beqz $a3, .LBB3_161 +# %bb.158: # in Loop: Header=BB3_149 Depth=1 ldptr.w $a1, $a2, 15388 - blez $a1, .LBB3_161 -# %bb.158: # in Loop: Header=BB3_148 Depth=1 + blez $a1, .LBB3_162 +# %bb.159: # in Loop: Header=BB3_149 Depth=1 ldptr.w $a2, $a2, 15404 mod.wu $a1, $a1, $a2 - bnez $a1, .LBB3_161 -# %bb.159: # in Loop: Header=BB3_148 Depth=1 + bnez $a1, .LBB3_162 +# %bb.160: # in Loop: Header=BB3_149 Depth=1 ld.d $a0, $sp, 8 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(quadratic_RC_init) ld.d $a1, $sp, 32 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(quadratic_RC) pcaddu18i $ra, %call36(copy_rc_jvt) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 # 8-byte Folded Reload - ld.d $a0, $a0, 0 - ld.d $a1, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $s6, 0 + ld.d $a1, $sp, 72 # 8-byte Folded Reload ldx.w $a3, $a0, $a1 -.LBB3_160: # in Loop: Header=BB3_148 Depth=1 - beqz $a3, .LBB3_163 -.LBB3_161: # %.thread138 - # in Loop: Header=BB3_148 Depth=1 +.LBB3_161: # in Loop: Header=BB3_149 Depth=1 + beqz $a3, .LBB3_164 +.LBB3_162: # %.thread138 + # in Loop: Header=BB3_149 Depth=1 ldptr.w $a0, $a0, 4708 - bne $a0, $s2, .LBB3_163 -# %bb.162: # in Loop: Header=BB3_148 Depth=1 + bne $a0, $s5, .LBB3_164 +# %bb.163: # in Loop: Header=BB3_149 Depth=1 pcalau12i $a0, %pc_hi20(generic_RC_init) ld.d $a0, $a0, %pc_lo12(generic_RC_init) - ld.d $a1, $sp, 56 # 8-byte Folded Reload + ld.d $a1, $sp, 64 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(generic_RC) pcaddu18i $ra, %call36(copy_rc_generic) jirl $ra, $ra, 0 -.LBB3_163: # in Loop: Header=BB3_148 Depth=1 +.LBB3_164: # in Loop: Header=BB3_149 Depth=1 move $a0, $s0 move $a1, $zero pcaddu18i $ra, %call36(start_macroblock) jirl $ra, $ra, 0 - ld.d $a0, $s3, %pc_lo12(encode_one_macroblock) + ld.d $a0, $s2, %pc_lo12(encode_one_macroblock) ld.d $a1, $sp, 96 # 8-byte Folded Reload - st.d $a1, $s6, %pc_lo12(rdopt) + st.d $a1, $s7, %pc_lo12(rdopt) jirl $ra, $a0, 0 - ld.d $a0, $s6, %pc_lo12(rdopt) + ld.d $a0, $s7, %pc_lo12(rdopt) ld.d $a1, $s8, 0 fld.d $fs0, $a0, 0 stptr.w $fp, $a1, 15412 @@ -1635,65 +1631,63 @@ encode_one_slice: # @encode_one_slice move $a1, $zero pcaddu18i $ra, %call36(start_macroblock) jirl $ra, $ra, 0 - ld.d $a0, $s3, %pc_lo12(encode_one_macroblock) + ld.d $a0, $s2, %pc_lo12(encode_one_macroblock) ld.d $a1, $sp, 48 # 8-byte Folded Reload - st.d $a1, $s6, %pc_lo12(rdopt) + st.d $a1, $s7, %pc_lo12(rdopt) jirl $ra, $a0, 0 - ld.d $a0, $sp, 104 # 8-byte Folded Reload - ld.d $a0, $a0, 0 + ld.d $a0, $s6, 0 ldptr.w $a1, $a0, 5116 - beqz $a1, .LBB3_170 -# %bb.164: # in Loop: Header=BB3_148 Depth=1 + beqz $a1, .LBB3_171 +# %bb.165: # in Loop: Header=BB3_149 Depth=1 ldptr.w $a1, $a0, 4708 - bne $a1, $s2, .LBB3_168 -# %bb.165: # in Loop: Header=BB3_148 Depth=1 + bne $a1, $s5, .LBB3_169 +# %bb.166: # in Loop: Header=BB3_149 Depth=1 ld.d $a1, $s8, 0 ldptr.w $a2, $a1, 15388 - blez $a2, .LBB3_168 -# %bb.166: # in Loop: Header=BB3_148 Depth=1 + blez $a2, .LBB3_169 +# %bb.167: # in Loop: Header=BB3_149 Depth=1 ldptr.w $a1, $a1, 15404 mod.wu $a1, $a2, $a1 - bnez $a1, .LBB3_168 -# %bb.167: # in Loop: Header=BB3_148 Depth=1 + bnez $a1, .LBB3_169 +# %bb.168: # in Loop: Header=BB3_149 Depth=1 pcalau12i $a0, %pc_hi20(quadratic_RC_best) ld.d $a0, $a0, %pc_lo12(quadratic_RC_best) ld.d $a1, $sp, 32 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(quadratic_RC) pcaddu18i $ra, %call36(copy_rc_jvt) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 # 8-byte Folded Reload - ld.d $a0, $a0, 0 + ld.d $a0, $s6, 0 ldptr.w $a1, $a0, 5116 - beqz $a1, .LBB3_170 -.LBB3_168: # %.thread141 - # in Loop: Header=BB3_148 Depth=1 + beqz $a1, .LBB3_171 +.LBB3_169: # %.thread141 + # in Loop: Header=BB3_149 Depth=1 ldptr.w $a1, $a0, 4708 - bne $a1, $s2, .LBB3_170 -# %bb.169: # in Loop: Header=BB3_148 Depth=1 + bne $a1, $s5, .LBB3_171 +# %bb.170: # in Loop: Header=BB3_149 Depth=1 pcalau12i $a0, %pc_hi20(generic_RC_best) ld.d $a0, $a0, %pc_lo12(generic_RC_best) - ld.d $a1, $sp, 56 # 8-byte Folded Reload + ld.d $a1, $sp, 64 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(generic_RC) pcaddu18i $ra, %call36(copy_rc_generic) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 # 8-byte Folded Reload - ld.d $a0, $a0, 0 -.LBB3_170: # %.thread143 - # in Loop: Header=BB3_148 Depth=1 - ld.d $a1, $s6, %pc_lo12(rdopt) + ld.d $a0, $s6, 0 +.LBB3_171: # %.thread143 + # in Loop: Header=BB3_149 Depth=1 + ld.d $a1, $s7, %pc_lo12(rdopt) fld.d $fa0, $a1, 0 - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 88 # 8-byte Folded Reload ldx.w $a1, $a0, $a1 ld.d $a2, $s8, 0 fadd.d $fs0, $fs0, $fa0 -.LBB3_171: # in Loop: Header=BB3_148 Depth=1 +.LBB3_172: # in Loop: Header=BB3_149 Depth=1 addi.w $a3, $a1, -1 - ori $s1, $s7, 2168 + pcalau12i $s6, %pc_hi20(MBPairIsField) + move $a5, $s1 + ori $s1, $s1, 2168 pcalau12i $a4, %pc_hi20(rddata_top_field_mb) - move $a5, $s7 - addi.d $s7, $a4, %pc_lo12(rddata_top_field_mb) - bgeu $a3, $s2, .LBB3_186 -# %bb.172: # in Loop: Header=BB3_148 Depth=1 + addi.d $s3, $a4, %pc_lo12(rddata_top_field_mb) + bgeu $a3, $s5, .LBB3_187 +# %bb.173: # in Loop: Header=BB3_149 Depth=1 ori $a3, $zero, 1 ori $a4, $a5, 2952 ldx.w $a4, $a2, $a4 @@ -1707,22 +1701,22 @@ encode_one_slice: # @encode_one_slice st.w $a3, $a0, 32 slli.d $a3, $a5, 1 addi.d $a4, $a3, 1 - ld.d $a3, $sp, 64 # 8-byte Folded Reload + ld.d $a3, $sp, 72 # 8-byte Folded Reload ldx.w $a3, $a0, $a3 stptr.w $a4, $a2, 14456 addi.w $a1, $a1, 0 stptr.w $zero, $a2, 15412 - bne $a1, $s2, .LBB3_177 -# %bb.173: # in Loop: Header=BB3_148 Depth=1 - beqz $a3, .LBB3_177 -# %bb.174: # in Loop: Header=BB3_148 Depth=1 + bne $a1, $s5, .LBB3_178 +# %bb.174: # in Loop: Header=BB3_149 Depth=1 + beqz $a3, .LBB3_178 +# %bb.175: # in Loop: Header=BB3_149 Depth=1 ldptr.w $a1, $a2, 15388 - blez $a1, .LBB3_178 -# %bb.175: # in Loop: Header=BB3_148 Depth=1 + blez $a1, .LBB3_179 +# %bb.176: # in Loop: Header=BB3_149 Depth=1 ldptr.w $a2, $a2, 15404 mod.wu $a1, $a1, $a2 - bnez $a1, .LBB3_178 -# %bb.176: # in Loop: Header=BB3_148 Depth=1 + bnez $a1, .LBB3_179 +# %bb.177: # in Loop: Header=BB3_149 Depth=1 ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(quadratic_RC) ld.d $a1, $sp, 8 # 8-byte Folded Reload @@ -1731,30 +1725,30 @@ encode_one_slice: # @encode_one_slice jirl $ra, $ra, 0 ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.d $a0, $a0, 0 - ld.d $a1, $sp, 64 # 8-byte Folded Reload + ld.d $a1, $sp, 72 # 8-byte Folded Reload ldx.w $a3, $a0, $a1 -.LBB3_177: # in Loop: Header=BB3_148 Depth=1 - beqz $a3, .LBB3_180 -.LBB3_178: # %.thread146 - # in Loop: Header=BB3_148 Depth=1 +.LBB3_178: # in Loop: Header=BB3_149 Depth=1 + beqz $a3, .LBB3_181 +.LBB3_179: # %.thread146 + # in Loop: Header=BB3_149 Depth=1 ldptr.w $a0, $a0, 4708 - bne $a0, $s2, .LBB3_180 -# %bb.179: # in Loop: Header=BB3_148 Depth=1 - ld.d $a0, $sp, 56 # 8-byte Folded Reload + bne $a0, $s5, .LBB3_181 +# %bb.180: # in Loop: Header=BB3_149 Depth=1 + ld.d $a0, $sp, 64 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(generic_RC) pcalau12i $a1, %pc_hi20(generic_RC_init) ld.d $a1, $a1, %pc_lo12(generic_RC_init) pcaddu18i $ra, %call36(copy_rc_generic) jirl $ra, $ra, 0 -.LBB3_180: # in Loop: Header=BB3_148 Depth=1 +.LBB3_181: # in Loop: Header=BB3_149 Depth=1 ori $a1, $zero, 1 move $a0, $s0 pcaddu18i $ra, %call36(start_macroblock) jirl $ra, $ra, 0 - ld.d $a0, $s3, %pc_lo12(encode_one_macroblock) - st.d $s7, $s6, %pc_lo12(rdopt) + ld.d $a0, $s2, %pc_lo12(encode_one_macroblock) + st.d $s3, $s7, %pc_lo12(rdopt) jirl $ra, $a0, 0 - ld.d $a0, $s6, %pc_lo12(rdopt) + ld.d $a0, $s7, %pc_lo12(rdopt) ld.d $a1, $s8, 0 fld.d $fs1, $a0, 0 stptr.w $fp, $a1, 15412 @@ -1765,27 +1759,27 @@ encode_one_slice: # @encode_one_slice jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(rddata_bot_field_mb) addi.d $a0, $a0, %pc_lo12(rddata_bot_field_mb) - ld.d $a1, $s3, %pc_lo12(encode_one_macroblock) - st.d $a0, $s6, %pc_lo12(rdopt) + ld.d $a1, $s2, %pc_lo12(encode_one_macroblock) + st.d $a0, $s7, %pc_lo12(rdopt) jirl $ra, $a1, 0 - ld.d $s4, $sp, 104 # 8-byte Folded Reload - ld.d $a0, $s4, 0 - ld.d $a3, $s6, %pc_lo12(rdopt) + ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $a0, 0 + ld.d $a3, $s7, %pc_lo12(rdopt) ld.d $a2, $s8, 0 ldptr.w $a1, $a0, 4708 fld.d $fa0, $a3, 0 stptr.w $zero, $a2, 15416 addi.d $a3, $a1, -2 sltui $a3, $a3, 1 - bne $a1, $s2, .LBB3_187 -# %bb.181: # in Loop: Header=BB3_148 Depth=1 + bne $a1, $s5, .LBB3_188 +# %bb.182: # in Loop: Header=BB3_149 Depth=1 fadd.d $fa0, $fs1, $fa0 fcmp.clt.d $fcc0, $fs0, $fa0 - bceqz $fcc0, .LBB3_187 -# %bb.182: # in Loop: Header=BB3_148 Depth=1 + bceqz $fcc0, .LBB3_188 +# %bb.183: # in Loop: Header=BB3_149 Depth=1 ldptr.w $a1, $a2, 15240 stptr.w $zero, $a2, 14464 - st.w $zero, $s5, %pc_lo12(MBPairIsField) + st.w $zero, $s6, %pc_lo12(MBPairIsField) ld.w $a4, $a0, 32 srli.d $a1, $a1, 1 ldx.w $a5, $a2, $s1 @@ -1796,18 +1790,18 @@ encode_one_slice: # @encode_one_slice srli.d $a1, $a1, 1 stptr.w $a1, $a2, 14456 ldptr.w $a1, $a0, 5116 - bnez $a1, .LBB3_189 - b .LBB3_195 + bnez $a1, .LBB3_190 + b .LBB3_196 .p2align 4, , 16 -.LBB3_183: # in Loop: Header=BB3_148 Depth=1 +.LBB3_184: # in Loop: Header=BB3_149 Depth=1 st.w $zero, $sp, 112 ld.d $a0, $sp, 96 # 8-byte Folded Reload - st.d $a0, $s6, %pc_lo12(rdopt) + st.d $a0, $s7, %pc_lo12(rdopt) move $a0, $s0 move $a1, $zero pcaddu18i $ra, %call36(start_macroblock) jirl $ra, $ra, 0 - ld.d $a0, $s3, %pc_lo12(encode_one_macroblock) + ld.d $a0, $s2, %pc_lo12(encode_one_macroblock) jirl $ra, $a0, 0 ori $a0, $zero, 1 pcaddu18i $ra, %call36(write_one_macroblock) @@ -1817,93 +1811,92 @@ encode_one_slice: # @encode_one_slice pcaddu18i $ra, %call36(terminate_macroblock) jirl $ra, $ra, 0 ld.w $a0, $sp, 112 - beqz $a0, .LBB3_145 -# %bb.184: # in Loop: Header=BB3_148 Depth=1 + beqz $a0, .LBB3_146 +# %bb.185: # in Loop: Header=BB3_149 Depth=1 ld.d $a0, $s8, 0 ld.w $a0, $a0, 12 pcaddu18i $ra, %call36(FmoGetPreviousMBNr) jirl $ra, $ra, 0 ld.d $a1, $s8, 0 st.w $a0, $a1, 12 - ld.d $a1, $sp, 88 # 8-byte Folded Reload - bne $a0, $a1, .LBB3_147 -# %bb.185: # in Loop: Header=BB3_148 Depth=1 + bne $a0, $s3, .LBB3_148 +# %bb.186: # in Loop: Header=BB3_149 Depth=1 ori $a2, $zero, 84 - ld.d $s1, $sp, 24 # 8-byte Folded Reload - move $a0, $s1 + ld.d $s4, $sp, 24 # 8-byte Folded Reload + move $a0, $s4 ld.d $a1, $sp, 16 # 8-byte Folded Reload pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 ori $a1, $zero, 300 - move $a0, $s1 + move $a0, $s4 pcaddu18i $ra, %call36(error) jirl $ra, $ra, 0 - b .LBB3_147 + b .LBB3_148 .p2align 4, , 16 -.LBB3_186: # %.thread148 - # in Loop: Header=BB3_148 Depth=1 +.LBB3_187: # %.thread148 + # in Loop: Header=BB3_149 Depth=1 move $a3, $zero stptr.w $zero, $a2, 15416 - ld.d $s4, $sp, 104 # 8-byte Folded Reload -.LBB3_187: # in Loop: Header=BB3_148 Depth=1 +.LBB3_188: # in Loop: Header=BB3_149 Depth=1 addi.w $a1, $a1, 0 ori $a4, $zero, 3 - bne $a1, $a4, .LBB3_196 -# %bb.188: # %.thread - # in Loop: Header=BB3_148 Depth=1 + bne $a1, $a4, .LBB3_197 +# %bb.189: # %.thread + # in Loop: Header=BB3_149 Depth=1 stptr.w $zero, $a2, 14464 - st.w $zero, $s5, %pc_lo12(MBPairIsField) + st.w $zero, $s6, %pc_lo12(MBPairIsField) ldptr.w $a1, $a0, 5116 - beqz $a1, .LBB3_195 -.LBB3_189: # in Loop: Header=BB3_148 Depth=1 - beqz $a3, .LBB3_193 -# %bb.190: # in Loop: Header=BB3_148 Depth=1 + beqz $a1, .LBB3_196 +.LBB3_190: # in Loop: Header=BB3_149 Depth=1 + beqz $a3, .LBB3_194 +# %bb.191: # in Loop: Header=BB3_149 Depth=1 ldptr.w $a1, $a2, 15388 - blez $a1, .LBB3_193 -# %bb.191: # in Loop: Header=BB3_148 Depth=1 + blez $a1, .LBB3_194 +# %bb.192: # in Loop: Header=BB3_149 Depth=1 ldptr.w $a2, $a2, 15404 mod.wu $a1, $a1, $a2 - bnez $a1, .LBB3_193 -# %bb.192: # in Loop: Header=BB3_148 Depth=1 + bnez $a1, .LBB3_194 +# %bb.193: # in Loop: Header=BB3_149 Depth=1 ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(quadratic_RC) pcalau12i $a1, %pc_hi20(quadratic_RC_best) ld.d $a1, $a1, %pc_lo12(quadratic_RC_best) pcaddu18i $ra, %call36(copy_rc_jvt) jirl $ra, $ra, 0 - ld.d $a0, $s4, 0 + ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $a0, 0 ldptr.w $a1, $a0, 5116 - beqz $a1, .LBB3_195 + beqz $a1, .LBB3_196 .p2align 4, , 16 -.LBB3_193: # %.thread156 - # in Loop: Header=BB3_148 Depth=1 +.LBB3_194: # %.thread156 + # in Loop: Header=BB3_149 Depth=1 ldptr.w $a0, $a0, 4708 - bne $a0, $s2, .LBB3_195 -# %bb.194: # in Loop: Header=BB3_148 Depth=1 - ld.d $a0, $sp, 56 # 8-byte Folded Reload + bne $a0, $s5, .LBB3_196 +# %bb.195: # in Loop: Header=BB3_149 Depth=1 + ld.d $a0, $sp, 64 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(generic_RC) pcalau12i $a1, %pc_hi20(generic_RC_best) ld.d $a1, $a1, %pc_lo12(generic_RC_best) pcaddu18i $ra, %call36(copy_rc_generic) jirl $ra, $ra, 0 -.LBB3_195: # %.thread158 - # in Loop: Header=BB3_148 Depth=1 +.LBB3_196: # %.thread158 + # in Loop: Header=BB3_149 Depth=1 ld.d $a2, $s8, 0 - ld.w $a0, $s5, %pc_lo12(MBPairIsField) + ld.w $a0, $s6, %pc_lo12(MBPairIsField) ori $a1, $zero, 1 stptr.w $a1, $a2, 15416 ori $fp, $zero, 1 stptr.w $a1, $a2, 15408 sltu $a0, $zero, $a0 - b .LBB3_197 + b .LBB3_198 .p2align 4, , 16 -.LBB3_196: # %.thread70 - # in Loop: Header=BB3_148 Depth=1 +.LBB3_197: # %.thread70 + # in Loop: Header=BB3_149 Depth=1 ori $a0, $zero, 1 stptr.w $a0, $a2, 14464 - st.w $a0, $s5, %pc_lo12(MBPairIsField) + st.w $a0, $s6, %pc_lo12(MBPairIsField) stptr.w $a0, $a2, 15408 -.LBB3_197: # in Loop: Header=BB3_148 Depth=1 +.LBB3_198: # in Loop: Header=BB3_149 Depth=1 ldptr.w $a1, $a2, 14464 stptr.w $a0, $a2, 14468 stptr.w $zero, $a2, 15412 @@ -1913,11 +1906,11 @@ encode_one_slice: # @encode_one_slice ld.d $a0, $s8, 0 ldptr.w $a0, $a0, 14464 sltui $a0, $a0, 1 - masknez $a1, $s7, $a0 + masknez $a1, $s3, $a0 ld.d $a2, $sp, 96 # 8-byte Folded Reload maskeqz $a0, $a2, $a0 or $a0, $a0, $a1 - st.d $a0, $s6, %pc_lo12(rdopt) + st.d $a0, $s7, %pc_lo12(rdopt) move $a0, $zero pcaddu18i $ra, %call36(copy_rdopt_data) jirl $ra, $ra, 0 @@ -1929,18 +1922,17 @@ encode_one_slice: # @encode_one_slice pcaddu18i $ra, %call36(terminate_macroblock) jirl $ra, $ra, 0 ld.w $a0, $sp, 112 - beqz $a0, .LBB3_204 -# %bb.198: # in Loop: Header=BB3_148 Depth=1 + beqz $a0, .LBB3_206 +# %bb.199: # in Loop: Header=BB3_149 Depth=1 ld.d $a0, $s8, 0 ld.w $a0, $a0, 12 pcaddu18i $ra, %call36(FmoGetPreviousMBNr) jirl $ra, $ra, 0 ld.d $a1, $s8, 0 st.w $a0, $a1, 12 - lu12i.w $s7, 3 - ld.d $a1, $sp, 88 # 8-byte Folded Reload - bne $a0, $a1, .LBB3_200 -.LBB3_199: # in Loop: Header=BB3_148 Depth=1 + ld.d $s3, $sp, 56 # 8-byte Folded Reload + bne $a0, $s3, .LBB3_201 +.LBB3_200: # in Loop: Header=BB3_149 Depth=1 pcalau12i $a0, %pc_hi20(errortext) addi.d $s4, $a0, %pc_lo12(errortext) pcalau12i $a0, %pc_hi20(.L.str.1) @@ -1951,15 +1943,15 @@ encode_one_slice: # @encode_one_slice jirl $ra, $ra, 0 ori $a1, $zero, 300 move $a0, $s4 - ld.d $s4, $sp, 104 # 8-byte Folded Reload pcaddu18i $ra, %call36(error) jirl $ra, $ra, 0 -.LBB3_200: # in Loop: Header=BB3_148 Depth=1 - ld.w $a1, $s5, %pc_lo12(MBPairIsField) +.LBB3_201: # in Loop: Header=BB3_149 Depth=1 + ld.w $a1, $s6, %pc_lo12(MBPairIsField) ld.d $a0, $s8, 0 - beqz $a1, .LBB3_202 -.LBB3_201: # in Loop: Header=BB3_148 Depth=1 - ld.d $a1, $s4, 0 + ld.d $s6, $sp, 104 # 8-byte Folded Reload + beqz $a1, .LBB3_203 +.LBB3_202: # in Loop: Header=BB3_149 Depth=1 + ld.d $a1, $s6, 0 ldptr.w $a2, $a0, 15240 ld.w $a3, $a1, 32 srli.d $a2, $a2, 1 @@ -1970,30 +1962,31 @@ encode_one_slice: # @encode_one_slice addi.w $a1, $a4, -1 srli.d $a1, $a1, 1 stptr.w $a1, $a0, 14456 -.LBB3_202: # in Loop: Header=BB3_148 Depth=1 +.LBB3_203: # in Loop: Header=BB3_149 Depth=1 ld.w $a1, $sp, 116 stptr.d $zero, $a0, 14464 - bnez $a1, .LBB3_215 -# %bb.203: # in Loop: Header=BB3_148 Depth=1 + bnez $a1, .LBB3_217 +# %bb.204: # in Loop: Header=BB3_149 Depth=1 move $a0, $s0 pcaddu18i $ra, %call36(FmoMB2SliceGroup) jirl $ra, $ra, 0 pcaddu18i $ra, %call36(FmoGetLastCodedMBOfSliceGroup) jirl $ra, $ra, 0 - bne $s0, $a0, .LBB3_147 - b .LBB3_214 + beq $s0, $a0, .LBB3_216 +# %bb.205: # in Loop: Header=BB3_149 Depth=1 + lu12i.w $s1, 3 + b .LBB3_148 .p2align 4, , 16 -.LBB3_204: # in Loop: Header=BB3_148 Depth=1 +.LBB3_206: # in Loop: Header=BB3_149 Depth=1 move $a0, $s0 pcaddu18i $ra, %call36(FmoGetNextMBNr) jirl $ra, $ra, 0 move $s0, $a0 - lu12i.w $s7, 3 - ld.d $a0, $sp, 88 # 8-byte Folded Reload - bne $s0, $a0, .LBB3_206 -# %bb.205: # in Loop: Header=BB3_148 Depth=1 + ld.d $s3, $sp, 56 # 8-byte Folded Reload + bne $a0, $s3, .LBB3_208 +# %bb.207: # in Loop: Header=BB3_149 Depth=1 st.w $fp, $sp, 116 -.LBB3_206: # in Loop: Header=BB3_148 Depth=1 +.LBB3_208: # in Loop: Header=BB3_149 Depth=1 pcaddu18i $ra, %call36(proceed2nextMacroblock) jirl $ra, $ra, 0 ld.d $a0, $s8, 0 @@ -2012,7 +2005,7 @@ encode_one_slice: # @encode_one_slice ld.d $a2, $sp, 48 # 8-byte Folded Reload maskeqz $a0, $a2, $a0 or $a0, $a0, $a1 - st.d $a0, $s6, %pc_lo12(rdopt) + st.d $a0, $s7, %pc_lo12(rdopt) ori $a0, $zero, 1 pcaddu18i $ra, %call36(copy_rdopt_data) jirl $ra, $ra, 0 @@ -2024,13 +2017,13 @@ encode_one_slice: # @encode_one_slice pcaddu18i $ra, %call36(terminate_macroblock) jirl $ra, $ra, 0 ld.w $a0, $sp, 112 - beqz $a0, .LBB3_208 -# %bb.207: # in Loop: Header=BB3_148 Depth=1 + beqz $a0, .LBB3_210 +# %bb.209: # in Loop: Header=BB3_149 Depth=1 ld.d $a0, $s8, 0 ld.w $a0, $a0, 12 - ld.d $a1, $sp, 72 # 8-byte Folded Reload + ld.d $a1, $sp, 80 # 8-byte Folded Reload addi.w $a1, $a1, 1 - st.d $a1, $sp, 72 # 8-byte Folded Spill + st.d $a1, $sp, 80 # 8-byte Folded Spill pcaddu18i $ra, %call36(FmoGetPreviousMBNr) jirl $ra, $ra, 0 ld.d $a1, $s8, 0 @@ -2039,46 +2032,45 @@ encode_one_slice: # @encode_one_slice jirl $ra, $ra, 0 ld.d $a1, $s8, 0 st.w $a0, $a1, 12 - ld.d $a1, $sp, 88 # 8-byte Folded Reload - bne $a0, $a1, .LBB3_200 - b .LBB3_199 -.LBB3_208: # in Loop: Header=BB3_148 Depth=1 + bne $a0, $s3, .LBB3_201 + b .LBB3_200 +.LBB3_210: # in Loop: Header=BB3_149 Depth=1 move $a0, $s0 pcaddu18i $ra, %call36(FmoGetNextMBNr) jirl $ra, $ra, 0 move $s0, $a0 - ld.d $a0, $sp, 88 # 8-byte Folded Reload - bne $s0, $a0, .LBB3_210 -# %bb.209: # in Loop: Header=BB3_148 Depth=1 + bne $a0, $s3, .LBB3_212 +# %bb.211: # in Loop: Header=BB3_149 Depth=1 st.w $fp, $sp, 116 -.LBB3_210: # in Loop: Header=BB3_148 Depth=1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload +.LBB3_212: # in Loop: Header=BB3_149 Depth=1 + ld.d $a0, $sp, 80 # 8-byte Folded Reload addi.w $a0, $a0, 2 - st.d $a0, $sp, 72 # 8-byte Folded Spill + st.d $a0, $sp, 80 # 8-byte Folded Spill pcaddu18i $ra, %call36(proceed2nextMacroblock) jirl $ra, $ra, 0 - ld.w $a1, $s5, %pc_lo12(MBPairIsField) + ld.w $a1, $s6, %pc_lo12(MBPairIsField) ld.d $a0, $s8, 0 - bnez $a1, .LBB3_201 - b .LBB3_202 -.LBB3_211: # in Loop: Header=BB3_148 Depth=1 + ld.d $s6, $sp, 104 # 8-byte Folded Reload + bnez $a1, .LBB3_202 + b .LBB3_203 +.LBB3_213: # in Loop: Header=BB3_149 Depth=1 st.w $fp, $sp, 116 - b .LBB3_146 -.LBB3_212: + b .LBB3_147 +.LBB3_214: move $a0, $zero -.LBB3_213: +.LBB3_215: pcaddu18i $ra, %call36(estimate_weighting_factor_P_slice) jirl $ra, $ra, 0 ld.w $a0, $s3, 0 - bgtz $a0, .LBB3_92 - b .LBB3_94 -.LBB3_214: + bgtz $a0, .LBB3_93 + b .LBB3_95 +.LBB3_216: ori $a0, $zero, 1 st.w $a0, $sp, 116 -.LBB3_215: # %._crit_edge.loopexit +.LBB3_217: # %._crit_edge.loopexit ld.d $a1, $s8, 0 - ld.d $fp, $sp, 72 # 8-byte Folded Reload -.LBB3_216: # %._crit_edge + ld.d $fp, $sp, 80 # 8-byte Folded Reload +.LBB3_218: # %._crit_edge ldptr.w $a0, $a1, 15348 ld.d $a1, $sp, 40 # 8-byte Folded Reload add.w $a1, $fp, $a1 @@ -2632,133 +2624,133 @@ poc_ref_pic_reorder: # @poc_ref_pic_reorder .Lfunc_end5: .size poc_ref_pic_reorder, .Lfunc_end5-poc_ref_pic_reorder # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function SetLagrangianMultipliers -.LCPI6_0: - .dword 0x3fa999999999999a # double 0.050000000000000003 -.LCPI6_1: - .dword 0x3fee666666666666 # double 0.94999999999999996 -.LCPI6_2: - .dword 0x40f0000000000000 # double 65536 -.LCPI6_3: - .dword 0x3feb333333333333 # double 0.84999999999999998 -.LCPI6_4: - .dword 0x3ff6666666666666 # double 1.3999999999999999 -.LCPI6_5: - .dword 0x3fe5c28f5c28f5c3 # double 0.68000000000000005 -.LCPI6_6: - .dword 0x3fe999999999999a # double 0.80000000000000004 -.LCPI6_7: - .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI6_8: - .dword 0x3fd999999999999a # double 0.40000000000000002 -.LCPI6_9: - .dword 0x3fe62e42fefa39ef # double 0.69314718055994529 - .text - .globl SetLagrangianMultipliers + .globl SetLagrangianMultipliers # -- Begin function SetLagrangianMultipliers .p2align 5 .type SetLagrangianMultipliers,@function SetLagrangianMultipliers: # @SetLagrangianMultipliers # %bb.0: - addi.d $sp, $sp, -224 - st.d $ra, $sp, 216 # 8-byte Folded Spill - st.d $fp, $sp, 208 # 8-byte Folded Spill - st.d $s0, $sp, 200 # 8-byte Folded Spill - st.d $s1, $sp, 192 # 8-byte Folded Spill - st.d $s2, $sp, 184 # 8-byte Folded Spill - st.d $s3, $sp, 176 # 8-byte Folded Spill - st.d $s4, $sp, 168 # 8-byte Folded Spill - st.d $s5, $sp, 160 # 8-byte Folded Spill - st.d $s6, $sp, 152 # 8-byte Folded Spill - st.d $s7, $sp, 144 # 8-byte Folded Spill - st.d $s8, $sp, 136 # 8-byte Folded Spill - fst.d $fs0, $sp, 128 # 8-byte Folded Spill - fst.d $fs1, $sp, 120 # 8-byte Folded Spill - fst.d $fs2, $sp, 112 # 8-byte Folded Spill - fst.d $fs3, $sp, 104 # 8-byte Folded Spill - fst.d $fs4, $sp, 96 # 8-byte Folded Spill + addi.d $sp, $sp, -256 + st.d $ra, $sp, 248 # 8-byte Folded Spill + st.d $fp, $sp, 240 # 8-byte Folded Spill + st.d $s0, $sp, 232 # 8-byte Folded Spill + st.d $s1, $sp, 224 # 8-byte Folded Spill + st.d $s2, $sp, 216 # 8-byte Folded Spill + st.d $s3, $sp, 208 # 8-byte Folded Spill + st.d $s4, $sp, 200 # 8-byte Folded Spill + st.d $s5, $sp, 192 # 8-byte Folded Spill + st.d $s6, $sp, 184 # 8-byte Folded Spill + st.d $s7, $sp, 176 # 8-byte Folded Spill + st.d $s8, $sp, 168 # 8-byte Folded Spill + fst.d $fs0, $sp, 160 # 8-byte Folded Spill + fst.d $fs1, $sp, 152 # 8-byte Folded Spill + fst.d $fs2, $sp, 144 # 8-byte Folded Spill + fst.d $fs3, $sp, 136 # 8-byte Folded Spill + fst.d $fs4, $sp, 128 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(input) ld.d $a0, $a0, %got_pc_lo12(input) - st.d $a0, $sp, 80 # 8-byte Folded Spill + st.d $a0, $sp, 104 # 8-byte Folded Spill ld.d $s0, $a0, 0 ldptr.w $a1, $s0, 4168 pcalau12i $a0, %got_pc_hi20(img) ld.d $a0, $a0, %got_pc_lo12(img) - st.d $a0, $sp, 72 # 8-byte Folded Spill + st.d $a0, $sp, 96 # 8-byte Folded Spill ld.d $a4, $a0, 0 addi.w $a0, $zero, -51 - st.d $a0, $sp, 16 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI6_2) - pcalau12i $a2, %pc_hi20(.LCPI6_9) - beqz $a1, .LBB6_128 + st.d $a0, $sp, 24 # 8-byte Folded Spill + lu12i.w $a0, -4189 + beqz $a1, .LBB6_126 # %bb.1: # %.preheader196.preheader - st.d $a2, $sp, 32 # 8-byte Folded Spill ld.w $a1, $s0, 20 - pcalau12i $a2, %pc_hi20(.LCPI6_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI6_0) - move $s2, $zero - movgr2fr.w $fa1, $a1 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + move $s1, $zero + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 + lu12i.w $a1, -419431 + ori $a2, $a1, 2458 + lu32i.d $a2, -419431 + lu52i.d $a1, $a2, 1018 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 movgr2fr.d $fa1, $zero fmax.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmin.d $fa0, $fa0, $fa1 vldi $vr1, -912 fsub.d $fs1, $fa1, $fa0 - ori $s5, $zero, 2 - lu12i.w $a1, 1 - ori $a2, $a1, 1248 - st.d $a2, $sp, 48 # 8-byte Folded Spill - pcalau12i $a2, %pc_hi20(.LCPI6_1) - st.d $a2, $sp, 64 # 8-byte Folded Spill - fld.d $fs2, $a2, %pc_lo12(.LCPI6_1) + ori $s6, $zero, 2 + lu12i.w $a3, 1 + lu12i.w $a1, 419430 + ori $a1, $a1, 1638 + ori $a5, $a3, 1248 + st.d $a5, $sp, 80 # 8-byte Folded Spill + st.d $a1, $sp, 32 # 8-byte Folded Spill + lu32i.d $a1, -104858 + lu52i.d $a1, $a1, 1022 + st.d $a1, $sp, 88 # 8-byte Folded Spill + movgr2fr.d $fs2, $a1 ori $a5, $zero, 1 - vldi $vr4, -928 - fld.d $fs3, $a0, %pc_lo12(.LCPI6_2) - ori $a0, $a1, 1288 - st.d $a0, $sp, 24 # 8-byte Folded Spill + vldi $vr3, -928 + lu52i.d $a1, $zero, 1039 + movgr2fr.d $fs3, $a1 + ori $a1, $a3, 1288 + st.d $a1, $sp, 56 # 8-byte Folded Spill + lu12i.w $a1, 377487 + ori $a1, $a1, 1475 + lu32i.d $a1, 377487 + lu52i.d $a1, $a1, 1022 + st.d $a1, $sp, 64 # 8-byte Folded Spill + st.d $a2, $sp, 16 # 8-byte Folded Spill + lu52i.d $a1, $a2, 1022 + st.d $a1, $sp, 8 # 8-byte Folded Spill + ori $a0, $a0, 2543 + lu32i.d $a0, 405058 + lu52i.d $a0, $a0, 1022 + st.d $a0, $sp, 48 # 8-byte Folded Spill + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, -314573 + lu52i.d $a0, $a0, 1022 + st.d $a0, $sp, 40 # 8-byte Folded Spill b .LBB6_3 .p2align 4, , 16 .LBB6_2: # %._crit_edge # in Loop: Header=BB6_3 Depth=1 - addi.d $s2, $s2, 1 + addi.d $s1, $s1, 1 ori $a0, $zero, 5 - beq $s2, $a0, .LBB6_139 + beq $s1, $a0, .LBB6_137 .LBB6_3: # %.preheader196 # =>This Loop Header: Depth=1 # Child Loop BB6_7 Depth 2 ldptr.w $a0, $a4, 15452 - ld.d $a1, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 24 # 8-byte Folded Reload blt $a0, $a1, .LBB6_2 # %bb.4: # %.lr.ph # in Loop: Header=BB6_3 Depth=1 - sub.w $s4, $zero, $a0 + sub.w $s3, $zero, $a0 slli.d $a0, $a0, 3 sub.d $s7, $zero, $a0 - slli.d $a0, $s2, 3 - st.d $a0, $sp, 88 # 8-byte Folded Spill - move $s8, $s4 - st.d $s2, $sp, 56 # 8-byte Folded Spill + slli.d $a0, $s1, 3 + st.d $a0, $sp, 120 # 8-byte Folded Spill + move $fp, $s3 + st.d $s1, $sp, 112 # 8-byte Folded Spill b .LBB6_7 - .p2align 4, , 16 .LBB6_5: # in Loop: Header=BB6_7 Depth=2 - ori $s5, $zero, 2 + ori $s6, $zero, 2 + .p2align 4, , 16 .LBB6_6: # %.loopexit194 # in Loop: Header=BB6_7 Depth=2 - addi.d $s8, $s8, 1 - bstrpick.d $a0, $s8, 31, 0 + addi.d $fp, $fp, 1 + bstrpick.d $a0, $fp, 31, 0 addi.d $s7, $s7, 8 - addi.w $s4, $s4, 1 + addi.w $s3, $s3, 1 ori $a1, $zero, 52 beq $a0, $a1, .LBB6_2 .LBB6_7: # Parent Loop BB6_3 Depth=1 # => This Inner Loop Header: Depth=2 ldptr.w $a0, $s0, 5288 - beq $a0, $s5, .LBB6_11 + beq $a0, $s6, .LBB6_11 # %bb.8: # in Loop: Header=BB6_7 Depth=2 ldptr.w $a1, $a4, 15452 - movgr2fr.w $fa0, $s4 + movgr2fr.w $fa0, $s3 ffint.d.w $fa0, $fa0 movgr2fr.w $fa1, $a1 ffint.d.w $fa1, $fa1 @@ -2767,7 +2759,7 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers fadd.d $fs0, $fa0, $fa1 bne $a0, $a5, .LBB6_15 # %bb.9: # in Loop: Header=BB6_7 Depth=2 - alsl.d $a0, $s2, $s0, 3 + alsl.d $a0, $s1, $s0, 3 lu12i.w $a1, 1 ori $a1, $a1, 1200 fldx.d $fs4, $a0, $a1 @@ -2776,16 +2768,16 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.d $s0, $a0, 0 ldptr.w $a0, $s0, 5784 - bne $a0, $s5, .LBB6_19 + bne $a0, $s6, .LBB6_19 # %bb.10: # in Loop: Header=BB6_7 Depth=2 ldptr.w $a0, $s0, 5788 - ld.d $a1, $sp, 64 # 8-byte Folded Reload - fld.d $fa1, $a1, %pc_lo12(.LCPI6_1) addi.d $a0, $a0, -2 sltui $a0, $a0, 1 + ld.d $a1, $sp, 88 # 8-byte Folded Reload + movgr2fr.d $fa1, $a1 movgr2cf $fcc0, $a0 vldi $vr2, -912 fsel $fa1, $fa1, $fa2, $fcc0 @@ -2794,7 +2786,7 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers .LBB6_11: # in Loop: Header=BB6_7 Depth=2 ldptr.w $a0, $s0, 5784 fmov.d $fa0, $fs2 - bne $a0, $s5, .LBB6_13 + bne $a0, $s6, .LBB6_13 # %bb.12: # in Loop: Header=BB6_7 Depth=2 ldptr.w $a0, $s0, 5788 addi.d $a0, $a0, -2 @@ -2803,40 +2795,40 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers vldi $vr0, -912 fsel $fa0, $fs2, $fa0, $fcc0 .LBB6_13: # in Loop: Header=BB6_7 Depth=2 - ldptr.d $fp, $a4, 15488 - alsl.d $a0, $s2, $s0, 3 - ldptr.d $s5, $a4, 15496 + ldptr.d $s8, $a4, 15488 + alsl.d $a0, $s1, $s0, 3 + ldptr.d $s4, $a4, 15496 ldptr.d $s6, $a4, 15480 - ld.d $a3, $sp, 88 # 8-byte Folded Reload - ldx.d $a1, $fp, $a3 - ld.d $a2, $sp, 48 # 8-byte Folded Reload + ld.d $a3, $sp, 120 # 8-byte Folded Reload + ldx.d $a1, $s8, $a3 + ld.d $a2, $sp, 80 # 8-byte Folded Reload fldx.d $fa1, $a0, $a2 - ldx.d $a0, $s5, $a3 - ldx.d $s2, $s6, $a3 - ldx.d $s3, $a1, $s7 + ldx.d $a0, $s4, $a3 + ldx.d $s1, $s6, $a3 + ldx.d $s5, $a1, $s7 ldptr.w $a1, $s0, 5780 - ldx.d $s1, $a0, $s7 + ldx.d $s2, $a0, $s7 fmul.d $fa1, $fa1, $fa0 - fstx.d $fa1, $s2, $s7 + fstx.d $fa1, $s1, $s7 bne $a1, $a5, .LBB6_32 # %bb.14: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 b .LBB6_33 .p2align 4, , 16 .LBB6_15: # in Loop: Header=BB6_7 Depth=2 - ldptr.w $fp, $s0, 2096 + ldptr.w $s0, $s0, 2096 vldi $vr0, -1016 fdiv.d $fa0, $fs0, $fa0 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 ori $a5, $zero, 1 - blez $fp, .LBB6_45 + blez $s0, .LBB6_45 # %bb.16: # in Loop: Header=BB6_7 Depth=2 ori $a0, $zero, 3 - beq $s2, $a0, .LBB6_69 + beq $s1, $a0, .LBB6_68 # %bb.17: # in Loop: Header=BB6_7 Depth=2 - vldi $vr4, -928 - bne $s2, $a5, .LBB6_70 + vldi $vr3, -928 + bne $s1, $a5, .LBB6_69 # %bb.18: # in Loop: Header=BB6_7 Depth=2 vldi $vr1, -1000 fdiv.d $fa1, $fs0, $fa1 @@ -2844,28 +2836,29 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers fmax.d $fa1, $fa1, $fa2 vldi $vr2, -1008 fmin.d $fa1, $fa1, $fa2 - b .LBB6_71 -.LBB6_19: # in Loop: Header=BB6_7 Depth=2 ld.d $a0, $sp, 64 # 8-byte Folded Reload - fld.d $fa1, $a0, %pc_lo12(.LCPI6_1) + b .LBB6_73 +.LBB6_19: # in Loop: Header=BB6_7 Depth=2 + ld.d $a0, $sp, 88 # 8-byte Folded Reload + movgr2fr.d $fa1, $a0 .LBB6_20: # in Loop: Header=BB6_7 Depth=2 ori $a5, $zero, 1 - vldi $vr4, -928 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + vldi $vr3, -928 + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a4, $a0, 0 ldptr.d $a0, $a4, 15488 ldptr.d $a1, $a4, 15496 ldptr.d $a2, $a4, 15480 - ld.d $a3, $sp, 88 # 8-byte Folded Reload + ld.d $a3, $sp, 120 # 8-byte Folded Reload ldx.d $a0, $a0, $a3 ldx.d $a1, $a1, $a3 - ldx.d $s1, $a2, $a3 - ldx.d $s3, $a0, $s7 + ldx.d $s5, $a2, $a3 + ldx.d $s2, $a0, $s7 ldptr.w $a0, $s0, 5780 - ldx.d $fp, $a1, $s7 + ldx.d $s4, $a1, $s7 fmul.d $fa0, $fs4, $fa0 fmul.d $fa1, $fa0, $fa1 - fstx.d $fa1, $s1, $s7 + fstx.d $fa1, $s5, $s7 bne $a0, $a5, .LBB6_22 # %bb.21: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 @@ -2873,15 +2866,15 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers .LBB6_22: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB6_110 + bceqz $fcc0, .LBB6_108 .LBB6_23: # in Loop: Header=BB6_7 Depth=2 - fst.d $fa0, $s3, 0 - fmadd.d $fa0, $fa0, $fs3, $fa4 + fst.d $fa0, $s2, 0 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $fp, 0 + st.w $a0, $s4, 0 ldptr.w $a0, $s0, 5784 - fldx.d $fa1, $s1, $s7 + fldx.d $fa1, $s5, $s7 bne $a0, $a5, .LBB6_25 # %bb.24: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 @@ -2889,15 +2882,15 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers .LBB6_25: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB6_111 + bceqz $fcc0, .LBB6_109 .LBB6_26: # in Loop: Header=BB6_7 Depth=2 - fst.d $fa0, $s3, 8 - fmadd.d $fa0, $fa0, $fs3, $fa4 + fst.d $fa0, $s2, 8 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $fp, 4 + st.w $a0, $s4, 4 ldptr.w $a0, $s0, 5788 - fldx.d $fa1, $s1, $s7 + fldx.d $fa1, $s5, $s7 bne $a0, $a5, .LBB6_28 # %bb.27: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 @@ -2905,14 +2898,14 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers .LBB6_28: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB6_112 + bceqz $fcc0, .LBB6_110 .LBB6_29: # in Loop: Header=BB6_7 Depth=2 - fst.d $fa0, $s3, 16 - fmadd.d $fa0, $fa0, $fs3, $fa4 + fst.d $fa0, $s2, 16 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $fp, 8 - bne $s2, $a5, .LBB6_6 + st.w $a0, $s4, 8 + bne $s1, $a5, .LBB6_6 # %bb.30: # in Loop: Header=BB6_7 Depth=2 lu12i.w $a0, 1 ori $a0, $a0, 1240 @@ -2920,16 +2913,16 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.d $s0, $a0, 0 ldptr.w $a0, $s0, 5784 - bne $a0, $s5, .LBB6_48 + bne $a0, $s6, .LBB6_48 # %bb.31: # in Loop: Header=BB6_7 Depth=2 ldptr.w $a0, $s0, 5788 - ld.d $a1, $sp, 64 # 8-byte Folded Reload - fld.d $fa1, $a1, %pc_lo12(.LCPI6_1) addi.d $a0, $a0, -2 sltui $a0, $a0, 1 + ld.d $a1, $sp, 88 # 8-byte Folded Reload + movgr2fr.d $fa1, $a1 movgr2cf $fcc0, $a0 vldi $vr2, -912 fsel $fa1, $fa1, $fa2, $fcc0 @@ -2937,15 +2930,15 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers .LBB6_32: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB6_113 + bceqz $fcc0, .LBB6_111 .LBB6_33: # in Loop: Header=BB6_7 Depth=2 - fst.d $fa0, $s3, 0 - fmadd.d $fa0, $fa0, $fs3, $fa4 + fst.d $fa0, $s5, 0 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $s1, 0 + st.w $a0, $s2, 0 ldptr.w $a0, $s0, 5784 - fldx.d $fa1, $s2, $s7 + fldx.d $fa1, $s1, $s7 bne $a0, $a5, .LBB6_35 # %bb.34: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 @@ -2953,33 +2946,33 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers .LBB6_35: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB6_114 + bceqz $fcc0, .LBB6_112 .LBB6_36: # in Loop: Header=BB6_7 Depth=2 - fst.d $fa0, $s3, 8 - fmadd.d $fa0, $fa0, $fs3, $fa4 + fst.d $fa0, $s5, 8 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $s1, 4 + st.w $a0, $s2, 4 ldptr.w $a0, $s0, 5788 - fldx.d $fa1, $s2, $s7 + fldx.d $fa1, $s1, $s7 bne $a0, $a5, .LBB6_39 # %bb.37: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 .LBB6_38: # in Loop: Header=BB6_7 Depth=2 - ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 112 # 8-byte Folded Reload b .LBB6_40 .LBB6_39: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - ld.d $s2, $sp, 56 # 8-byte Folded Reload - bceqz $fcc0, .LBB6_115 + ld.d $s1, $sp, 112 # 8-byte Folded Reload + bceqz $fcc0, .LBB6_113 .LBB6_40: # in Loop: Header=BB6_7 Depth=2 - fst.d $fa0, $s3, 16 - fmadd.d $fa0, $fa0, $fs3, $fa4 + fst.d $fa0, $s5, 16 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $s1, 8 - bne $s2, $a5, .LBB6_5 + st.w $a0, $s2, 8 + bne $s1, $a5, .LBB6_5 # %bb.41: # in Loop: Header=BB6_7 Depth=2 ldptr.w $a0, $s0, 5784 fmov.d $fa0, $fs2 @@ -2993,56 +2986,58 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers vldi $vr0, -912 fsel $fa0, $fs2, $fa0, $fcc0 .LBB6_43: # in Loop: Header=BB6_7 Depth=2 - ld.d $a0, $fp, 40 - ld.d $a1, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $s8, 40 + ld.d $a1, $sp, 56 # 8-byte Folded Reload fldx.d $fa1, $s0, $a1 - ld.d $a1, $s5, 40 - ld.d $s2, $s6, 40 - ldx.d $s1, $a0, $s7 + ld.d $a1, $s4, 40 + ld.d $s1, $s6, 40 + ldx.d $s4, $a0, $s7 ldptr.w $a0, $s0, 5780 - ldx.d $fp, $a1, $s7 + ldx.d $s2, $a1, $s7 fmul.d $fa1, $fa1, $fa0 - fstx.d $fa1, $s2, $s7 + fstx.d $fa1, $s1, $s7 bne $a0, $a5, .LBB6_59 # %bb.44: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - ori $s5, $zero, 2 + ori $s6, $zero, 2 b .LBB6_60 .LBB6_45: # in Loop: Header=BB6_7 Depth=2 - beq $s2, $a5, .LBB6_72 + beq $s1, $a5, .LBB6_70 # %bb.46: # in Loop: Header=BB6_7 Depth=2 - vldi $vr4, -928 + vldi $vr3, -928 ori $a0, $zero, 3 - bne $s2, $a0, .LBB6_73 + bne $s1, $a0, .LBB6_71 # %bb.47: # in Loop: Header=BB6_7 Depth=2 - pcalau12i $a0, %pc_hi20(.LCPI6_4) - fld.d $fa1, $a0, %pc_lo12(.LCPI6_4) - vldi $vr2, -984 - fdiv.d $fa2, $fs0, $fa2 - fmax.d $fa1, $fa2, $fa1 + vldi $vr1, -984 + fdiv.d $fa1, $fs0, $fa1 + ld.d $a0, $sp, 32 # 8-byte Folded Reload + lu32i.d $a0, 419430 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa2, $a0 + fmax.d $fa1, $fa1, $fa2 vldi $vr2, -1016 fmin.d $fa1, $fa1, $fa2 - b .LBB6_74 + b .LBB6_72 .LBB6_48: # in Loop: Header=BB6_7 Depth=2 - ld.d $a0, $sp, 64 # 8-byte Folded Reload - fld.d $fa1, $a0, %pc_lo12(.LCPI6_1) + ld.d $a0, $sp, 88 # 8-byte Folded Reload + movgr2fr.d $fa1, $a0 .LBB6_49: # in Loop: Header=BB6_7 Depth=2 ori $a5, $zero, 1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a4, $a0, 0 ldptr.d $a0, $a4, 15488 ldptr.d $a1, $a4, 15496 ldptr.d $a2, $a4, 15480 ld.d $a0, $a0, 40 ld.d $a1, $a1, 40 - ld.d $s1, $a2, 40 - ldx.d $s3, $a0, $s7 + ld.d $s5, $a2, 40 + ldx.d $s2, $a0, $s7 ldptr.w $a0, $s0, 5780 - ldx.d $fp, $a1, $s7 + ldx.d $s4, $a1, $s7 fmul.d $fa0, $fs4, $fa0 fmul.d $fa1, $fa0, $fa1 - fstx.d $fa1, $s1, $s7 - vldi $vr4, -928 + fstx.d $fa1, $s5, $s7 + vldi $vr3, -928 bne $a0, $a5, .LBB6_51 # %bb.50: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 @@ -3050,15 +3045,15 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers .LBB6_51: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB6_119 + bceqz $fcc0, .LBB6_117 .LBB6_52: # in Loop: Header=BB6_7 Depth=2 - fst.d $fa0, $s3, 0 - fmadd.d $fa0, $fa0, $fs3, $fa4 + fst.d $fa0, $s2, 0 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $fp, 0 + st.w $a0, $s4, 0 ldptr.w $a0, $s0, 5784 - fldx.d $fa1, $s1, $s7 + fldx.d $fa1, $s5, $s7 bne $a0, $a5, .LBB6_54 # %bb.53: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 @@ -3066,15 +3061,15 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers .LBB6_54: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB6_120 + bceqz $fcc0, .LBB6_118 .LBB6_55: # in Loop: Header=BB6_7 Depth=2 - fst.d $fa0, $s3, 8 - fmadd.d $fa0, $fa0, $fs3, $fa4 + fst.d $fa0, $s2, 8 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $fp, 4 + st.w $a0, $s4, 4 ldptr.w $a0, $s0, 5788 - fldx.d $fa1, $s1, $s7 + fldx.d $fa1, $s5, $s7 bne $a0, $a5, .LBB6_57 # %bb.56: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 @@ -3082,24 +3077,28 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers .LBB6_57: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB6_121 + bceqz $fcc0, .LBB6_119 .LBB6_58: # %.loopexit194.loopexit # in Loop: Header=BB6_7 Depth=2 - fst.d $fa0, $s3, 16 - b .LBB6_68 + fst.d $fa0, $s2, 16 + fmadd.d $fa0, $fa0, $fs3, $fa3 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 + st.w $a0, $s4, 8 + b .LBB6_6 .LBB6_59: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - ori $s5, $zero, 2 - bceqz $fcc0, .LBB6_122 + ori $s6, $zero, 2 + bceqz $fcc0, .LBB6_120 .LBB6_60: # in Loop: Header=BB6_7 Depth=2 - fst.d $fa0, $s1, 0 - fmadd.d $fa0, $fa0, $fs3, $fa4 + fst.d $fa0, $s4, 0 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $fp, 0 + st.w $a0, $s2, 0 ldptr.w $a0, $s0, 5784 - fldx.d $fa1, $s2, $s7 + fldx.d $fa1, $s1, $s7 bne $a0, $a5, .LBB6_62 # %bb.61: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 @@ -3107,122 +3106,121 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers .LBB6_62: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB6_123 + bceqz $fcc0, .LBB6_121 .LBB6_63: # in Loop: Header=BB6_7 Depth=2 - fst.d $fa0, $s1, 8 - fmadd.d $fa0, $fa0, $fs3, $fa4 + fst.d $fa0, $s4, 8 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $fp, 4 + st.w $a0, $s2, 4 ldptr.w $a0, $s0, 5788 - fldx.d $fa1, $s2, $s7 + fldx.d $fa1, $s1, $s7 bne $a0, $a5, .LBB6_66 # %bb.64: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 .LBB6_65: # %.loopexit194.loopexit214 # in Loop: Header=BB6_7 Depth=2 - ld.d $s2, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 112 # 8-byte Folded Reload b .LBB6_67 .LBB6_66: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - ld.d $s2, $sp, 56 # 8-byte Folded Reload - bceqz $fcc0, .LBB6_124 + ld.d $s1, $sp, 112 # 8-byte Folded Reload + bceqz $fcc0, .LBB6_122 .LBB6_67: # %.loopexit194.loopexit214 # in Loop: Header=BB6_7 Depth=2 - fst.d $fa0, $s1, 16 -.LBB6_68: # %.loopexit194 - # in Loop: Header=BB6_7 Depth=2 - fmadd.d $fa0, $fa0, $fs3, $fa4 + fst.d $fa0, $s4, 16 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $fp, 8 + st.w $a0, $s2, 8 b .LBB6_6 -.LBB6_69: # in Loop: Header=BB6_7 Depth=2 - pcalau12i $a0, %pc_hi20(.LCPI6_4) - fld.d $fa1, $a0, %pc_lo12(.LCPI6_4) - vldi $vr2, -984 - fdiv.d $fa2, $fs0, $fa2 - fmax.d $fa1, $fa2, $fa1 +.LBB6_68: # in Loop: Header=BB6_7 Depth=2 + vldi $vr1, -984 + fdiv.d $fa1, $fs0, $fa1 + ld.d $a0, $sp, 32 # 8-byte Folded Reload + lu32i.d $a0, 419430 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa2, $a0 + fmax.d $fa1, $fa1, $fa2 vldi $vr2, -1016 fmin.d $fa1, $fa1, $fa2 - vldi $vr4, -928 - b .LBB6_71 -.LBB6_70: # in Loop: Header=BB6_7 Depth=2 + vldi $vr3, -928 + ld.d $a0, $sp, 64 # 8-byte Folded Reload + b .LBB6_73 +.LBB6_69: # in Loop: Header=BB6_7 Depth=2 vldi $vr1, -912 -.LBB6_71: # in Loop: Header=BB6_7 Depth=2 - pcalau12i $a0, %pc_hi20(.LCPI6_5) - fld.d $fa2, $a0, %pc_lo12(.LCPI6_5) - b .LBB6_75 -.LBB6_72: # in Loop: Header=BB6_7 Depth=2 + ld.d $a0, $sp, 64 # 8-byte Folded Reload + b .LBB6_73 +.LBB6_70: # in Loop: Header=BB6_7 Depth=2 vldi $vr1, -1008 - vldi $vr4, -928 - b .LBB6_74 -.LBB6_73: # %.fold.split + vldi $vr3, -928 + b .LBB6_72 +.LBB6_71: # %.fold.split # in Loop: Header=BB6_7 Depth=2 vldi $vr1, -912 -.LBB6_74: # in Loop: Header=BB6_7 Depth=2 - pcalau12i $a0, %pc_hi20(.LCPI6_3) - fld.d $fa2, $a0, %pc_lo12(.LCPI6_3) -.LBB6_75: # in Loop: Header=BB6_7 Depth=2 +.LBB6_72: # in Loop: Header=BB6_7 Depth=2 + ld.d $a0, $sp, 40 # 8-byte Folded Reload +.LBB6_73: # in Loop: Header=BB6_7 Depth=2 + movgr2fr.d $fa2, $a0 fmul.d $fa0, $fa0, $fa2 fmul.d $fa0, $fa0, $fa1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a4, $a0, 0 ldptr.d $a0, $a4, 15480 - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 104 # 8-byte Folded Reload ld.d $s0, $a1, 0 - ld.d $a1, $sp, 88 # 8-byte Folded Reload - ldx.d $fp, $a0, $a1 + ld.d $a1, $sp, 120 # 8-byte Folded Reload + ldx.d $s8, $a0, $a1 ldptr.w $a1, $s0, 5784 - fstx.d $fa0, $fp, $s7 - bne $a1, $s5, .LBB6_78 -# %bb.76: # in Loop: Header=BB6_7 Depth=2 + fstx.d $fa0, $s8, $s7 + bne $a1, $s6, .LBB6_76 +# %bb.74: # in Loop: Header=BB6_7 Depth=2 ldptr.w $a1, $s0, 5788 - ld.d $a2, $sp, 64 # 8-byte Folded Reload - fld.d $fa1, $a2, %pc_lo12(.LCPI6_1) addi.d $a1, $a1, -2 sltui $a1, $a1, 1 + ld.d $a2, $sp, 88 # 8-byte Folded Reload + movgr2fr.d $fa1, $a2 movgr2cf $fcc0, $a1 vldi $vr2, -912 fsel $fa1, $fa1, $fa2, $fcc0 fmul.d $fa0, $fa1, $fa0 - beq $s2, $a5, .LBB6_79 -.LBB6_77: # in Loop: Header=BB6_7 Depth=2 - ldptr.d $s5, $a4, 15488 + beq $s1, $a5, .LBB6_77 +.LBB6_75: # in Loop: Header=BB6_7 Depth=2 + ldptr.d $s4, $a4, 15488 ldptr.d $s6, $a4, 15496 fmul.d $fa0, $fs1, $fa0 - fstx.d $fa0, $fp, $s7 - b .LBB6_95 -.LBB6_78: # in Loop: Header=BB6_7 Depth=2 - ld.d $a1, $sp, 64 # 8-byte Folded Reload - fld.d $fa1, $a1, %pc_lo12(.LCPI6_1) + fstx.d $fa0, $s8, $s7 + b .LBB6_93 +.LBB6_76: # in Loop: Header=BB6_7 Depth=2 + ld.d $a1, $sp, 88 # 8-byte Folded Reload + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa1, $fa0 - bne $s2, $a5, .LBB6_77 -.LBB6_79: # in Loop: Header=BB6_7 Depth=2 + bne $s1, $a5, .LBB6_75 +.LBB6_77: # in Loop: Header=BB6_7 Depth=2 ldptr.w $a1, $s0, 2964 - bne $a1, $s5, .LBB6_82 -# %bb.80: # in Loop: Header=BB6_7 Depth=2 + bne $a1, $s6, .LBB6_80 +# %bb.78: # in Loop: Header=BB6_7 Depth=2 ldptr.w $a1, $a4, 14364 sltui $a1, $a1, 1 movgr2cf $fcc0, $a1 vldi $vr1, -912 - fsel $fa1, $fa1, $fa4, $fcc0 + fsel $fa1, $fa1, $fa3, $fcc0 ldptr.w $a1, $s0, 2968 fmul.d $fa0, $fa0, $fa1 - fstx.d $fa0, $fp, $s7 - beq $a1, $s5, .LBB6_83 -.LBB6_81: # in Loop: Header=BB6_7 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI6_6) - fld.d $fa1, $a1, %pc_lo12(.LCPI6_6) - b .LBB6_84 -.LBB6_82: # in Loop: Header=BB6_7 Depth=2 + fstx.d $fa0, $s8, $s7 + beq $a1, $s6, .LBB6_81 +.LBB6_79: # in Loop: Header=BB6_7 Depth=2 + ld.d $a1, $sp, 8 # 8-byte Folded Reload + movgr2fr.d $fa1, $a1 + b .LBB6_82 +.LBB6_80: # in Loop: Header=BB6_7 Depth=2 vldi $vr1, -912 ldptr.w $a1, $s0, 2968 fmul.d $fa0, $fa0, $fa1 - fstx.d $fa0, $fp, $s7 - bne $a1, $s5, .LBB6_81 -.LBB6_83: # in Loop: Header=BB6_7 Depth=2 + fstx.d $fa0, $s8, $s7 + bne $a1, $s6, .LBB6_79 +.LBB6_81: # in Loop: Header=BB6_7 Depth=2 ldptr.w $a1, $a4, 14364 pcalau12i $a2, %pc_hi20(gop_structure) ld.d $a2, $a2, %pc_lo12(gop_structure) @@ -3230,410 +3228,421 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers alsl.d $a1, $a1, $a3, 3 add.d $a1, $a2, $a1 ld.w $a1, $a1, -8 - pcalau12i $a2, %pc_hi20(.LCPI6_7) - fld.d $fa1, $a2, %pc_lo12(.LCPI6_7) - pcalau12i $a2, %pc_hi20(.LCPI6_8) - fld.d $fa2, $a2, %pc_lo12(.LCPI6_8) - movgr2fr.w $fa3, $a1 - ffint.d.w $fa3, $fa3 - fmul.d $fa1, $fa3, $fa1 + movgr2fr.w $fa1, $a1 + ffint.d.w $fa1, $fa1 + ld.d $a2, $sp, 16 # 8-byte Folded Reload + lu52i.d $a1, $a2, 1020 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 + lu52i.d $a1, $a2, 1021 + movgr2fr.d $fa2, $a1 fcmp.clt.d $fcc0, $fa2, $fa1 fsel $fa1, $fa1, $fa2, $fcc0 vldi $vr2, -912 fsub.d $fa1, $fa2, $fa1 -.LBB6_84: # in Loop: Header=BB6_7 Depth=2 - ldptr.d $s5, $a4, 15488 +.LBB6_82: # in Loop: Header=BB6_7 Depth=2 + ldptr.d $s4, $a4, 15488 ldptr.d $s6, $a4, 15496 - ld.d $a1, $s5, 40 + ld.d $a1, $s4, 40 ld.d $a2, $s6, 40 - ld.d $s2, $a0, 40 - ldx.d $s1, $a1, $s7 + ld.d $s1, $a0, 40 + ldx.d $s5, $a1, $s7 ldptr.w $a0, $s0, 5780 - ldx.d $s3, $a2, $s7 + ldx.d $s2, $a2, $s7 fmul.d $fa0, $fa0, $fa1 fmul.d $fa1, $fs1, $fa0 - fstx.d $fa1, $s2, $s7 - bne $a0, $a5, .LBB6_86 -# %bb.85: # in Loop: Header=BB6_7 Depth=2 + fstx.d $fa1, $s1, $s7 + bne $a0, $a5, .LBB6_84 +# %bb.83: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - b .LBB6_87 -.LBB6_86: # in Loop: Header=BB6_7 Depth=2 + b .LBB6_85 +.LBB6_84: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB6_125 -.LBB6_87: # in Loop: Header=BB6_7 Depth=2 - fst.d $fa0, $s1, 0 - fmadd.d $fa0, $fa0, $fs3, $fa4 + bceqz $fcc0, .LBB6_123 +.LBB6_85: # in Loop: Header=BB6_7 Depth=2 + fst.d $fa0, $s5, 0 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $s3, 0 + st.w $a0, $s2, 0 ldptr.w $a0, $s0, 5784 - fldx.d $fa1, $s2, $s7 - bne $a0, $a5, .LBB6_89 -# %bb.88: # in Loop: Header=BB6_7 Depth=2 + fldx.d $fa1, $s1, $s7 + bne $a0, $a5, .LBB6_87 +# %bb.86: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - b .LBB6_90 -.LBB6_89: # in Loop: Header=BB6_7 Depth=2 + b .LBB6_88 +.LBB6_87: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB6_126 -.LBB6_90: # in Loop: Header=BB6_7 Depth=2 - fst.d $fa0, $s1, 8 - fmadd.d $fa0, $fa0, $fs3, $fa4 + bceqz $fcc0, .LBB6_124 +.LBB6_88: # in Loop: Header=BB6_7 Depth=2 + fst.d $fa0, $s5, 8 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $s3, 4 + st.w $a0, $s2, 4 ldptr.w $a0, $s0, 5788 - fldx.d $fa1, $s2, $s7 - bne $a0, $a5, .LBB6_93 -# %bb.91: # in Loop: Header=BB6_7 Depth=2 + fldx.d $fa1, $s1, $s7 + bne $a0, $a5, .LBB6_91 +# %bb.89: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 -.LBB6_92: # %.loopexit193.loopexit +.LBB6_90: # %.loopexit193.loopexit # in Loop: Header=BB6_7 Depth=2 - ld.d $s2, $sp, 56 # 8-byte Folded Reload - b .LBB6_94 -.LBB6_93: # in Loop: Header=BB6_7 Depth=2 + ld.d $s1, $sp, 112 # 8-byte Folded Reload + b .LBB6_92 +.LBB6_91: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - ld.d $s2, $sp, 56 # 8-byte Folded Reload - bceqz $fcc0, .LBB6_127 -.LBB6_94: # %.loopexit193.loopexit + ld.d $s1, $sp, 112 # 8-byte Folded Reload + bceqz $fcc0, .LBB6_125 +.LBB6_92: # %.loopexit193.loopexit # in Loop: Header=BB6_7 Depth=2 - fst.d $fa0, $s1, 16 - fmadd.d $fa0, $fa0, $fs3, $fa4 + fst.d $fa0, $s5, 16 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $s3, 8 -.LBB6_95: # %.loopexit193 + st.w $a0, $s2, 8 +.LBB6_93: # %.loopexit193 # in Loop: Header=BB6_7 Depth=2 - ld.d $a0, $sp, 88 # 8-byte Folded Reload - ldx.d $s5, $s5, $a0 + ld.d $a0, $sp, 120 # 8-byte Folded Reload + ldx.d $s4, $s4, $a0 ldx.d $a0, $s6, $a0 - ldx.d $s6, $s5, $s7 - ldx.d $s3, $a0, $s7 + ldx.d $s6, $s4, $s7 + ldx.d $s2, $a0, $s7 ldptr.w $a0, $s0, 5780 - fldx.d $fa1, $fp, $s7 - bne $a0, $a5, .LBB6_97 -# %bb.96: # in Loop: Header=BB6_7 Depth=2 + fldx.d $fa1, $s8, $s7 + bne $a0, $a5, .LBB6_95 +# %bb.94: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - b .LBB6_98 -.LBB6_97: # in Loop: Header=BB6_7 Depth=2 + b .LBB6_96 +.LBB6_95: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB6_116 -.LBB6_98: # in Loop: Header=BB6_7 Depth=2 + bceqz $fcc0, .LBB6_114 +.LBB6_96: # in Loop: Header=BB6_7 Depth=2 fst.d $fa0, $s6, 0 - fmadd.d $fa0, $fa0, $fs3, $fa4 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $s3, 0 + st.w $a0, $s2, 0 ldptr.w $a0, $s0, 5784 - fldx.d $fa1, $fp, $s7 - bne $a0, $a5, .LBB6_100 -# %bb.99: # in Loop: Header=BB6_7 Depth=2 + fldx.d $fa1, $s8, $s7 + bne $a0, $a5, .LBB6_98 +# %bb.97: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - b .LBB6_101 -.LBB6_100: # in Loop: Header=BB6_7 Depth=2 + b .LBB6_99 +.LBB6_98: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB6_117 -.LBB6_101: # in Loop: Header=BB6_7 Depth=2 + bceqz $fcc0, .LBB6_115 +.LBB6_99: # in Loop: Header=BB6_7 Depth=2 fst.d $fa0, $s6, 8 - fmadd.d $fa0, $fa0, $fs3, $fa4 + fmadd.d $fa0, $fa0, $fs3, $fa3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - st.w $a0, $s3, 4 + st.w $a0, $s2, 4 ldptr.w $a0, $s0, 5788 - fldx.d $fa1, $fp, $s7 - bne $a0, $a5, .LBB6_103 -# %bb.102: # in Loop: Header=BB6_7 Depth=2 + fldx.d $fa1, $s8, $s7 + bne $a0, $a5, .LBB6_101 +# %bb.100: # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - b .LBB6_104 -.LBB6_103: # in Loop: Header=BB6_7 Depth=2 + b .LBB6_102 +.LBB6_101: # in Loop: Header=BB6_7 Depth=2 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB6_118 -.LBB6_104: # in Loop: Header=BB6_7 Depth=2 - fmadd.d $fa1, $fa0, $fs3, $fa4 + bceqz $fcc0, .LBB6_116 +.LBB6_102: # in Loop: Header=BB6_7 Depth=2 + fmadd.d $fa1, $fa0, $fs3, $fa3 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a0, $fa1 - st.w $a0, $s3, 8 + st.w $a0, $s2, 8 ldptr.w $a0, $s0, 4172 fst.d $fa0, $s6, 16 - bne $a0, $a5, .LBB6_5 -# %bb.105: # in Loop: Header=BB6_7 Depth=2 + ori $s6, $zero, 2 + bne $a0, $a5, .LBB6_6 +# %bb.103: # in Loop: Header=BB6_7 Depth=2 ori $a0, $zero, 32 - blt $s8, $a0, .LBB6_107 -# %bb.106: # in Loop: Header=BB6_7 Depth=2 + blt $fp, $a0, .LBB6_105 +# %bb.104: # in Loop: Header=BB6_7 Depth=2 ldptr.w $a0, $s0, 5116 - beqz $a0, .LBB6_109 -.LBB6_107: # in Loop: Header=BB6_7 Depth=2 - move $fp, $a4 + beqz $a0, .LBB6_107 +.LBB6_105: # in Loop: Header=BB6_7 Depth=2 + move $s1, $a4 ori $a1, $zero, 6 - slt $a0, $a1, $s4 + slt $a0, $a1, $s3 masknez $a1, $a1, $a0 - maskeqz $a0, $s4, $a0 + maskeqz $a0, $s3, $a0 or $a0, $a0, $a1 addi.w $a0, $a0, -6 -.LBB6_108: # in Loop: Header=BB6_7 Depth=2 +.LBB6_106: # in Loop: Header=BB6_7 Depth=2 slli.d $a0, $a0, 3 - ldx.d $a0, $s5, $a0 + ldx.d $a0, $s4, $a0 fld.d $fa0, $a0, 16 vldi $vr1, -912 fadd.d $fa0, $fa0, $fa1 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 - move $a4, $fp - ldptr.d $a0, $fp, 15504 - ld.d $a1, $sp, 32 # 8-byte Folded Reload - fld.d $fa1, $a1, %pc_lo12(.LCPI6_9) - ld.d $a1, $sp, 88 # 8-byte Folded Reload + move $a4, $s1 + ldptr.d $a0, $s1, 15504 + ld.d $a1, $sp, 120 # 8-byte Folded Reload ldx.d $a0, $a0, $a1 + ld.d $a1, $sp, 48 # 8-byte Folded Reload + movgr2fr.d $fa1, $a1 fdiv.d $fa0, $fa0, $fa1 fstx.d $fa0, $a0, $s7 - ori $s5, $zero, 2 + ld.d $s1, $sp, 112 # 8-byte Folded Reload ori $a5, $zero, 1 - vldi $vr4, -928 + vldi $vr3, -928 b .LBB6_6 -.LBB6_109: # in Loop: Header=BB6_7 Depth=2 - move $fp, $a4 - addi.w $a0, $s4, -4 - b .LBB6_108 -.LBB6_110: # %call.sqrt +.LBB6_107: # in Loop: Header=BB6_7 Depth=2 + move $s1, $a4 + addi.w $a0, $s3, -4 + b .LBB6_106 +.LBB6_108: # %call.sqrt # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - move $s2, $a4 + move $s1, $a4 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 - move $a4, $s2 - ld.d $s2, $sp, 56 # 8-byte Folded Reload + move $a4, $s1 + ld.d $s1, $sp, 112 # 8-byte Folded Reload b .LBB6_23 -.LBB6_111: # %call.sqrt278 +.LBB6_109: # %call.sqrt278 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - move $s2, $a4 + move $s1, $a4 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 - move $a4, $s2 - ld.d $s2, $sp, 56 # 8-byte Folded Reload + move $a4, $s1 + ld.d $s1, $sp, 112 # 8-byte Folded Reload b .LBB6_26 -.LBB6_112: # %call.sqrt280 +.LBB6_110: # %call.sqrt280 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 move $s1, $a4 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 move $a4, $s1 + ld.d $s1, $sp, 112 # 8-byte Folded Reload b .LBB6_29 -.LBB6_113: # %call.sqrt288 +.LBB6_111: # %call.sqrt288 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - st.d $a4, $sp, 40 # 8-byte Folded Spill + st.d $a4, $sp, 72 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 - ld.d $a4, $sp, 40 # 8-byte Folded Reload + ld.d $a4, $sp, 72 # 8-byte Folded Reload b .LBB6_33 -.LBB6_114: # %call.sqrt290 +.LBB6_112: # %call.sqrt290 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - st.d $a4, $sp, 40 # 8-byte Folded Spill + st.d $a4, $sp, 72 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 - ld.d $a4, $sp, 40 # 8-byte Folded Reload + ld.d $a4, $sp, 72 # 8-byte Folded Reload b .LBB6_36 -.LBB6_115: # %call.sqrt292 +.LBB6_113: # %call.sqrt292 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - move $s2, $a4 + move $s1, $a4 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 - move $a4, $s2 + move $a4, $s1 b .LBB6_38 -.LBB6_116: # %call.sqrt306 +.LBB6_114: # %call.sqrt306 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 move $s1, $a4 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 move $a4, $s1 - b .LBB6_98 -.LBB6_117: # %call.sqrt308 + ld.d $s1, $sp, 112 # 8-byte Folded Reload + b .LBB6_96 +.LBB6_115: # %call.sqrt308 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 move $s1, $a4 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 move $a4, $s1 - b .LBB6_101 -.LBB6_118: # %call.sqrt310 + ld.d $s1, $sp, 112 # 8-byte Folded Reload + b .LBB6_99 +.LBB6_116: # %call.sqrt310 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - move $fp, $a4 + move $s1, $a4 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 - move $a4, $fp - b .LBB6_104 -.LBB6_119: # %call.sqrt282 + move $a4, $s1 + ld.d $s1, $sp, 112 # 8-byte Folded Reload + b .LBB6_102 +.LBB6_117: # %call.sqrt282 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - move $s2, $a4 + move $s1, $a4 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 - move $a4, $s2 - ld.d $s2, $sp, 56 # 8-byte Folded Reload + move $a4, $s1 + ld.d $s1, $sp, 112 # 8-byte Folded Reload b .LBB6_52 -.LBB6_120: # %call.sqrt284 +.LBB6_118: # %call.sqrt284 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - move $s2, $a4 + move $s1, $a4 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 - move $a4, $s2 - ld.d $s2, $sp, 56 # 8-byte Folded Reload + move $a4, $s1 + ld.d $s1, $sp, 112 # 8-byte Folded Reload b .LBB6_55 -.LBB6_121: # %call.sqrt286 +.LBB6_119: # %call.sqrt286 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 move $s1, $a4 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 move $a4, $s1 + ld.d $s1, $sp, 112 # 8-byte Folded Reload b .LBB6_58 -.LBB6_122: # %call.sqrt294 +.LBB6_120: # %call.sqrt294 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - move $s3, $a4 + move $s5, $a4 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 - move $a4, $s3 + move $a4, $s5 b .LBB6_60 -.LBB6_123: # %call.sqrt296 +.LBB6_121: # %call.sqrt296 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - move $s3, $a4 + move $s5, $a4 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 - move $a4, $s3 + move $a4, $s5 b .LBB6_63 -.LBB6_124: # %call.sqrt298 +.LBB6_122: # %call.sqrt298 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - move $s2, $a4 + move $s1, $a4 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 - move $a4, $s2 + move $a4, $s1 b .LBB6_65 -.LBB6_125: # %call.sqrt300 +.LBB6_123: # %call.sqrt300 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - st.d $a4, $sp, 40 # 8-byte Folded Spill + st.d $a4, $sp, 72 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 - ld.d $a4, $sp, 40 # 8-byte Folded Reload - b .LBB6_87 -.LBB6_126: # %call.sqrt302 + ld.d $a4, $sp, 72 # 8-byte Folded Reload + b .LBB6_85 +.LBB6_124: # %call.sqrt302 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - st.d $a4, $sp, 40 # 8-byte Folded Spill + st.d $a4, $sp, 72 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 - ld.d $a4, $sp, 40 # 8-byte Folded Reload - b .LBB6_90 -.LBB6_127: # %call.sqrt304 + ld.d $a4, $sp, 72 # 8-byte Folded Reload + b .LBB6_88 +.LBB6_125: # %call.sqrt304 # in Loop: Header=BB6_7 Depth=2 fmov.d $fa0, $fa1 - move $s2, $a4 + move $s1, $a4 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - vldi $vr4, -928 + vldi $vr3, -928 ori $a5, $zero, 1 - move $a4, $s2 - b .LBB6_92 -.LBB6_128: # %.preheader + move $a4, $s1 + b .LBB6_90 +.LBB6_126: # %.preheader ori $fp, $zero, 12 pcalau12i $a1, %got_pc_hi20(QP2QUANT) - ld.d $s1, $a1, %got_pc_lo12(QP2QUANT) - st.d $zero, $sp, 88 # 8-byte Folded Spill + ld.d $s2, $a1, %got_pc_lo12(QP2QUANT) + st.d $zero, $sp, 120 # 8-byte Folded Spill vldi $vr3, -912 vldi $vr4, -928 - fld.d $fs0, $a0, %pc_lo12(.LCPI6_2) + lu52i.d $a1, $zero, 1039 + movgr2fr.d $fs0, $a1 ori $a6, $zero, 1 - fld.d $fs1, $a2, %pc_lo12(.LCPI6_9) + ori $a0, $a0, 2543 + lu32i.d $a0, 405058 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs1, $a0 ori $a7, $zero, 52 - st.d $a4, $sp, 40 # 8-byte Folded Spill - b .LBB6_130 + st.d $a4, $sp, 72 # 8-byte Folded Spill + b .LBB6_128 .p2align 4, , 16 -.LBB6_129: # %._crit_edge212 - # in Loop: Header=BB6_130 Depth=1 - ld.d $a0, $sp, 88 # 8-byte Folded Reload +.LBB6_127: # %._crit_edge212 + # in Loop: Header=BB6_128 Depth=1 + ld.d $a0, $sp, 120 # 8-byte Folded Reload addi.d $a0, $a0, 1 - st.d $a0, $sp, 88 # 8-byte Folded Spill + st.d $a0, $sp, 120 # 8-byte Folded Spill ori $a1, $zero, 6 - beq $a0, $a1, .LBB6_139 -.LBB6_130: # =>This Loop Header: Depth=1 - # Child Loop BB6_135 Depth 2 + beq $a0, $a1, .LBB6_137 +.LBB6_128: # =>This Loop Header: Depth=1 + # Child Loop BB6_133 Depth 2 ldptr.w $a0, $a4, 15452 - ld.d $a1, $sp, 16 # 8-byte Folded Reload - blt $a0, $a1, .LBB6_129 -# %bb.131: # %.lr.ph211 - # in Loop: Header=BB6_130 Depth=1 + ld.d $a1, $sp, 24 # 8-byte Folded Reload + blt $a0, $a1, .LBB6_127 +# %bb.129: # %.lr.ph211 + # in Loop: Header=BB6_128 Depth=1 ldptr.d $a1, $a4, 15480 ldptr.d $a2, $a4, 15488 ldptr.d $a3, $a4, 15496 - ld.d $a5, $sp, 88 # 8-byte Folded Reload + ld.d $a5, $sp, 120 # 8-byte Folded Reload slli.d $a5, $a5, 3 ldx.d $s5, $a1, $a5 ldx.d $s6, $a2, $a5 - move $s2, $a5 + move $s1, $a5 ldx.d $s3, $a3, $a5 sub.w $s4, $zero, $a0 slli.d $a0, $a0, 3 sub.d $s7, $zero, $a0 move $s8, $s4 - b .LBB6_135 + b .LBB6_133 .p2align 4, , 16 -.LBB6_132: # in Loop: Header=BB6_135 Depth=2 +.LBB6_130: # in Loop: Header=BB6_133 Depth=2 ori $a1, $zero, 6 slt $a0, $a1, $s4 masknez $a1, $a1, $a0 maskeqz $a0, $s4, $a0 or $a0, $a0, $a1 addi.w $a0, $a0, -6 -.LBB6_133: # in Loop: Header=BB6_135 Depth=2 +.LBB6_131: # in Loop: Header=BB6_133 Depth=2 slli.d $a0, $a0, 3 ldx.d $a0, $s6, $a0 fld.d $fa0, $a0, 16 @@ -3641,27 +3650,27 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 vldi $vr3, -912 - ld.d $a4, $sp, 40 # 8-byte Folded Reload + ld.d $a4, $sp, 72 # 8-byte Folded Reload ldptr.d $a0, $a4, 15504 - ldx.d $a0, $a0, $s2 + ldx.d $a0, $a0, $s1 fdiv.d $fa0, $fa0, $fs1 fstx.d $fa0, $a0, $s7 vldi $vr4, -928 ori $a6, $zero, 1 ori $a7, $zero, 52 -.LBB6_134: # in Loop: Header=BB6_135 Depth=2 +.LBB6_132: # in Loop: Header=BB6_133 Depth=2 addi.d $s8, $s8, 1 bstrpick.d $a0, $s8, 31, 0 addi.w $s4, $s4, 1 addi.d $s7, $s7, 8 - beq $a0, $a7, .LBB6_129 -.LBB6_135: # Parent Loop BB6_130 Depth=1 + beq $a0, $a7, .LBB6_127 +.LBB6_133: # Parent Loop BB6_128 Depth=1 # => This Inner Loop Header: Depth=2 slt $a0, $fp, $s4 masknez $a1, $fp, $a0 maskeqz $a0, $s4, $a0 or $a0, $a0, $a1 - alsl.d $a0, $a0, $s1, 2 + alsl.d $a0, $a0, $s2, 2 ld.w $a0, $a0, -48 ldptr.w $a1, $s0, 5780 movgr2fr.w $fa0, $a0 @@ -3704,34 +3713,34 @@ SetLagrangianMultipliers: # @SetLagrangianMultipliers st.w $a0, $a1, 8 ldptr.w $a0, $s0, 4172 fst.d $fa0, $a2, 16 - bne $a0, $a6, .LBB6_134 -# %bb.136: # in Loop: Header=BB6_135 Depth=2 + bne $a0, $a6, .LBB6_132 +# %bb.134: # in Loop: Header=BB6_133 Depth=2 ori $a0, $zero, 32 - blt $s8, $a0, .LBB6_132 -# %bb.137: # in Loop: Header=BB6_135 Depth=2 + blt $s8, $a0, .LBB6_130 +# %bb.135: # in Loop: Header=BB6_133 Depth=2 ldptr.w $a0, $s0, 5116 - bnez $a0, .LBB6_132 -# %bb.138: # in Loop: Header=BB6_135 Depth=2 + bnez $a0, .LBB6_130 +# %bb.136: # in Loop: Header=BB6_133 Depth=2 addi.w $a0, $s4, -4 - b .LBB6_133 -.LBB6_139: # %.loopexit - fld.d $fs4, $sp, 96 # 8-byte Folded Reload - fld.d $fs3, $sp, 104 # 8-byte Folded Reload - fld.d $fs2, $sp, 112 # 8-byte Folded Reload - fld.d $fs1, $sp, 120 # 8-byte Folded Reload - fld.d $fs0, $sp, 128 # 8-byte Folded Reload - ld.d $s8, $sp, 136 # 8-byte Folded Reload - ld.d $s7, $sp, 144 # 8-byte Folded Reload - ld.d $s6, $sp, 152 # 8-byte Folded Reload - ld.d $s5, $sp, 160 # 8-byte Folded Reload - ld.d $s4, $sp, 168 # 8-byte Folded Reload - ld.d $s3, $sp, 176 # 8-byte Folded Reload - ld.d $s2, $sp, 184 # 8-byte Folded Reload - ld.d $s1, $sp, 192 # 8-byte Folded Reload - ld.d $s0, $sp, 200 # 8-byte Folded Reload - ld.d $fp, $sp, 208 # 8-byte Folded Reload - ld.d $ra, $sp, 216 # 8-byte Folded Reload - addi.d $sp, $sp, 224 + b .LBB6_131 +.LBB6_137: # %.loopexit + fld.d $fs4, $sp, 128 # 8-byte Folded Reload + fld.d $fs3, $sp, 136 # 8-byte Folded Reload + fld.d $fs2, $sp, 144 # 8-byte Folded Reload + fld.d $fs1, $sp, 152 # 8-byte Folded Reload + fld.d $fs0, $sp, 160 # 8-byte Folded Reload + ld.d $s8, $sp, 168 # 8-byte Folded Reload + ld.d $s7, $sp, 176 # 8-byte Folded Reload + ld.d $s6, $sp, 184 # 8-byte Folded Reload + ld.d $s5, $sp, 192 # 8-byte Folded Reload + ld.d $s4, $sp, 200 # 8-byte Folded Reload + ld.d $s3, $sp, 208 # 8-byte Folded Reload + ld.d $s2, $sp, 216 # 8-byte Folded Reload + ld.d $s1, $sp, 224 # 8-byte Folded Reload + ld.d $s0, $sp, 232 # 8-byte Folded Reload + ld.d $fp, $sp, 240 # 8-byte Folded Reload + ld.d $ra, $sp, 248 # 8-byte Folded Reload + addi.d $sp, $sp, 256 ret .Lfunc_end6: .size SetLagrangianMultipliers, .Lfunc_end6-SetLagrangianMultipliers diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/transform8x8.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/transform8x8.s index 03283c2d..92e412b2 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/transform8x8.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/transform8x8.s @@ -1,10 +1,6 @@ .file "transform8x8.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Mode_Decision_for_new_Intra8x8Macroblock -.LCPI0_0: - .dword 0x3fdffe5c91d14e3c # double 0.49990000000000001 .text - .globl Mode_Decision_for_new_Intra8x8Macroblock + .globl Mode_Decision_for_new_Intra8x8Macroblock # -- Begin function Mode_Decision_for_new_Intra8x8Macroblock .p2align 5 .type Mode_Decision_for_new_Intra8x8Macroblock,@function Mode_Decision_for_new_Intra8x8Macroblock: # @Mode_Decision_for_new_Intra8x8Macroblock @@ -14,11 +10,14 @@ Mode_Decision_for_new_Intra8x8Macroblock: # @Mode_Decision_for_new_Intra8x8Macro st.d $fp, $sp, 32 # 8-byte Folded Spill st.d $s0, $sp, 24 # 8-byte Folded Spill fst.d $fs0, $sp, 16 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_0) move $fp, $a0 fmov.d $fs0, $fa0 vldi $vr0, -1000 + lu12i.w $a0, -451308 + ori $a0, $a0, 3644 + lu32i.d $a0, -420 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa1, $a0 fmadd.d $fa0, $fs0, $fa0, $fa1 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 @@ -80,12 +79,7 @@ Mode_Decision_for_new_Intra8x8Macroblock: # @Mode_Decision_for_new_Intra8x8Macro .Lfunc_end0: .size Mode_Decision_for_new_Intra8x8Macroblock, .Lfunc_end0-Mode_Decision_for_new_Intra8x8Macroblock # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Mode_Decision_for_new_8x8IntraBlocks -.LCPI1_0: - .dword 0x46293e5939a08cea # double 1.0E+30 - .text - .globl Mode_Decision_for_new_8x8IntraBlocks + .globl Mode_Decision_for_new_8x8IntraBlocks # -- Begin function Mode_Decision_for_new_8x8IntraBlocks .p2align 5 .type Mode_Decision_for_new_8x8IntraBlocks,@function Mode_Decision_for_new_8x8IntraBlocks: # @Mode_Decision_for_new_8x8IntraBlocks @@ -292,8 +286,11 @@ Mode_Decision_for_new_8x8IntraBlocks: # @Mode_Decision_for_new_8x8IntraBlocks bstrins.d $a2, $a3, 5, 5 add.d $a1, $a1, $a2 st.d $a1, $sp, 96 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.d $fs2, $a1, %pc_lo12(.LCPI1_0) + lu12i.w $a1, 236040 + ori $a1, $a1, 3306 + lu32i.d $a1, -442791 + lu52i.d $a1, $a1, 1122 + movgr2fr.d $fs2, $a1 vrepli.b $vr0, 0 vst $vr0, $sp, 192 # 16-byte Folded Spill lu12i.w $s4, 3 diff --git a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/weighted_prediction.s b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/weighted_prediction.s index 75611f37..31b9c1e9 100644 --- a/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/weighted_prediction.s +++ b/results/MultiSource/Applications/JM/lencod/CMakeFiles/lencod.dir/weighted_prediction.s @@ -1,10 +1,6 @@ .file "weighted_prediction.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function estimate_weighting_factor_P_slice -.LCPI0_0: - .dword 0x4040000000000000 # double 32 .text - .globl estimate_weighting_factor_P_slice + .globl estimate_weighting_factor_P_slice # -- Begin function estimate_weighting_factor_P_slice .p2align 5 .type estimate_weighting_factor_P_slice,@function estimate_weighting_factor_P_slice: # @estimate_weighting_factor_P_slice @@ -276,8 +272,8 @@ estimate_weighting_factor_P_slice: # @estimate_weighting_factor_P_slice bne $s5, $a7, .LBB0_24 b .LBB0_20 .LBB0_27: # %.preheader117.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) + lu52i.d $a0, $zero, 1028 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 addi.d $a0, $sp, 792 addi.d $t0, $sp, 812 @@ -523,12 +519,7 @@ estimate_weighting_factor_P_slice: # @estimate_weighting_factor_P_slice .Lfunc_end0: .size estimate_weighting_factor_P_slice, .Lfunc_end0-estimate_weighting_factor_P_slice # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function estimate_weighting_factor_B_slice -.LCPI1_0: - .dword 0x4040000000000000 # double 32 - .text - .globl estimate_weighting_factor_B_slice + .globl estimate_weighting_factor_B_slice # -- Begin function estimate_weighting_factor_B_slice .p2align 5 .type estimate_weighting_factor_B_slice,@function estimate_weighting_factor_B_slice: # @estimate_weighting_factor_B_slice @@ -916,8 +907,8 @@ estimate_weighting_factor_B_slice: # @estimate_weighting_factor_B_slice ld.d $a3, $sp, 96 # 8-byte Folded Reload bne $a2, $a3, .LBB1_35 # %bb.38: # %.preheader260.loopexit - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_0) + lu52i.d $a0, $zero, 1028 + movgr2fr.d $fa1, $a0 fmul.d $fs1, $fa0, $fa1 .LBB1_39: # %.preheader260 st.d $a6, $sp, 8 # 8-byte Folded Spill @@ -1316,12 +1307,7 @@ estimate_weighting_factor_B_slice: # @estimate_weighting_factor_B_slice .Lfunc_end1: .size estimate_weighting_factor_B_slice, .Lfunc_end1-estimate_weighting_factor_B_slice # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function test_wp_P_slice -.LCPI2_0: - .dword 0x4040000000000000 # double 32 - .text - .globl test_wp_P_slice + .globl test_wp_P_slice # -- Begin function test_wp_P_slice .p2align 5 .type test_wp_P_slice,@function test_wp_P_slice: # @test_wp_P_slice @@ -1591,8 +1577,8 @@ test_wp_P_slice: # @test_wp_P_slice bne $s4, $a5, .LBB2_24 b .LBB2_20 .LBB2_27: # %.preheader136.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_0) + lu52i.d $a0, $zero, 1028 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 addi.d $a0, $sp, 776 addi.d $a7, $sp, 796 diff --git a/results/MultiSource/Applications/SIBsim4/CMakeFiles/SIBsim4.dir/sim4b1.s b/results/MultiSource/Applications/SIBsim4/CMakeFiles/SIBsim4.dir/sim4b1.s index 418ab7fc..e38f7c8f 100644 --- a/results/MultiSource/Applications/SIBsim4/CMakeFiles/SIBsim4.dir/sim4b1.s +++ b/results/MultiSource/Applications/SIBsim4/CMakeFiles/SIBsim4.dir/sim4b1.s @@ -1,17 +1,7 @@ .file "sim4b1.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function SIM4 -.LCPI0_0: - .dword 0x3fc3333333333334 # double 0.15000000000000002 -.LCPI0_1: - .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI0_2: - .dword 0x3ff3333333333333 # double 1.2 -.LCPI0_4: - .dword 0x408f400000000000 # double 1000 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI0_3: + .p2align 4, 0x0 # -- Begin function SIM4 +.LCPI0_0: .word 1 # 0x1 .word 1 # 0x1 .word 0 # 0x0 @@ -37,42 +27,44 @@ SIM4: # @SIM4 fst.d $fs0, $sp, 544 # 8-byte Folded Spill fst.d $fs1, $sp, 536 # 8-byte Folded Spill fst.d $fs2, $sp, 528 # 8-byte Folded Spill + fst.d $fs3, $sp, 520 # 8-byte Folded Spill + fst.d $fs4, $sp, 512 # 8-byte Folded Spill move $s4, $a0 ld.w $a0, $a0, 16 - beqz $a0, .LBB0_593 + beqz $a0, .LBB0_592 # %bb.1: + move $s6, $a1 ldptr.w $a0, $a1, 4148 - beqz $a0, .LBB0_593 + beqz $a0, .LBB0_592 # %bb.2: - move $s6, $a2 + move $s5, $a2 + ori $fp, $zero, 0 ori $a0, $zero, 0 lu32i.d $a0, 5 - st.d $a0, $sp, 496 + st.d $a0, $sp, 480 ori $a0, $zero, 40 - move $fp, $a1 pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 - ld.d $a1, $fp, 16 - st.d $fp, $sp, 280 # 8-byte Folded Spill - ldptr.w $a2, $fp, 4148 - st.d $a0, $sp, 488 + ld.d $a1, $s6, 16 + ldptr.w $a2, $s6, 4148 + st.d $a0, $sp, 472 pcalau12i $a0, %got_pc_hi20(options) ld.d $a0, $a0, %got_pc_lo12(options) - st.d $a0, $sp, 376 # 8-byte Folded Spill + st.d $a0, $sp, 360 # 8-byte Folded Spill ld.w $a5, $a0, 40 st.d $zero, $sp, 0 ori $a3, $zero, 1 ori $a4, $zero, 1 - addi.d $a6, $sp, 488 + addi.d $a6, $sp, 472 move $a0, $s4 - move $a7, $s6 + move $a7, $s5 pcaddu18i $ra, %call36(exon_cores) jirl $ra, $ra, 0 - ld.w $a0, $s6, 8 + ld.w $a0, $s5, 8 vrepli.b $vr0, 0 - vst $vr0, $sp, 256 # 16-byte Folded Spill - vst $vr0, $sp, 472 - beqz $a0, .LBB0_591 + vst $vr0, $sp, 240 # 16-byte Folded Spill + vst $vr0, $sp, 456 + beqz $a0, .LBB0_590 # %bb.3: # %.lr.ph682 ori $s8, $zero, 2 addi.w $a0, $zero, -62 @@ -80,32 +72,47 @@ SIM4: # @SIM4 addi.w $a0, $zero, -1 st.d $a0, $sp, 184 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(free) - ld.d $s5, $a0, %got_pc_lo12(free) - move $s7, $zero + ld.d $s7, $a0, %got_pc_lo12(free) + move $a1, $zero + lu12i.w $a0, 209715 + ori $a0, $a0, 820 + lu32i.d $a0, 209715 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 244 ori $a0, $a0, 575 - st.d $a0, $sp, 144 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fs1, $a0 vrepli.b $vr0, -1 vst $vr0, $sp, 48 # 16-byte Folded Spill - movgr2fr.d $fs0, $zero + movgr2fr.d $fs2, $zero + lu32i.d $fp, -49152 + lu52i.d $a0, $fp, 1032 + movgr2fr.d $fs3, $a0 vrepli.w $vr0, 1 vst $vr0, $sp, 192 # 16-byte Folded Spill lu12i.w $a0, -4096 lu32i.d $a0, 0 st.d $a0, $sp, 40 # 8-byte Folded Spill st.d $s4, $sp, 152 # 8-byte Folded Spill - st.d $s6, $sp, 80 # 8-byte Folded Spill - st.d $s5, $sp, 168 # 8-byte Folded Spill + st.d $s5, $sp, 80 # 8-byte Folded Spill + st.d $s6, $sp, 264 # 8-byte Folded Spill + st.d $s7, $sp, 168 # 8-byte Folded Spill b .LBB0_6 .p2align 4, , 16 .LBB0_4: # %free_align.exit # in Loop: Header=BB0_6 Depth=1 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload st.d $zero, $a0, 0 .LBB0_5: # in Loop: Header=BB0_6 Depth=1 - ld.wu $a0, $s6, 8 - addi.d $s7, $s7, 1 - bgeu $s7, $a0, .LBB0_590 + ld.wu $a0, $s5, 8 + ld.d $a1, $sp, 144 # 8-byte Folded Reload + addi.d $a1, $a1, 1 + bgeu $a1, $a0, .LBB0_589 .LBB0_6: # =>This Loop Header: Depth=1 # Child Loop BB0_12 Depth 2 # Child Loop BB0_15 Depth 2 @@ -122,75 +129,75 @@ SIM4: # @SIM4 # Child Loop BB0_83 Depth 4 # Child Loop BB0_92 Depth 3 # Child Loop BB0_97 Depth 3 - # Child Loop BB0_101 Depth 3 - # Child Loop BB0_109 Depth 2 - # Child Loop BB0_120 Depth 2 - # Child Loop BB0_123 Depth 2 - # Child Loop BB0_127 Depth 2 - # Child Loop BB0_133 Depth 2 - # Child Loop BB0_140 Depth 2 - # Child Loop BB0_147 Depth 2 - # Child Loop BB0_157 Depth 2 - # Child Loop BB0_172 Depth 3 - # Child Loop BB0_184 Depth 4 - # Child Loop BB0_195 Depth 3 - # Child Loop BB0_200 Depth 3 - # Child Loop BB0_202 Depth 3 - # Child Loop BB0_586 Depth 2 - # Child Loop BB0_215 Depth 2 - # Child Loop BB0_224 Depth 3 - # Child Loop BB0_238 Depth 3 - # Child Loop BB0_245 Depth 3 - # Child Loop BB0_248 Depth 3 - # Child Loop BB0_258 Depth 3 - # Child Loop BB0_262 Depth 3 - # Child Loop BB0_267 Depth 3 - # Child Loop BB0_317 Depth 3 - # Child Loop BB0_321 Depth 3 - # Child Loop BB0_332 Depth 4 - # Child Loop BB0_342 Depth 5 - # Child Loop BB0_347 Depth 4 - # Child Loop BB0_325 Depth 4 - # Child Loop BB0_354 Depth 4 - # Child Loop BB0_366 Depth 5 - # Child Loop BB0_371 Depth 4 - # Child Loop BB0_379 Depth 4 - # Child Loop BB0_307 Depth 3 - # Child Loop BB0_312 Depth 3 - # Child Loop BB0_287 Depth 3 - # Child Loop BB0_292 Depth 3 - # Child Loop BB0_297 Depth 3 - # Child Loop BB0_388 Depth 2 - # Child Loop BB0_401 Depth 2 - # Child Loop BB0_410 Depth 2 - # Child Loop BB0_417 Depth 2 - # Child Loop BB0_427 Depth 2 - # Child Loop BB0_430 Depth 3 - # Child Loop BB0_433 Depth 3 - # Child Loop BB0_439 Depth 3 - # Child Loop BB0_447 Depth 2 - # Child Loop BB0_452 Depth 3 - # Child Loop BB0_454 Depth 4 - # Child Loop BB0_457 Depth 4 - # Child Loop BB0_463 Depth 4 - # Child Loop BB0_472 Depth 3 - # Child Loop BB0_480 Depth 2 - # Child Loop BB0_487 Depth 3 - # Child Loop BB0_489 Depth 4 - # Child Loop BB0_510 Depth 2 - # Child Loop BB0_518 Depth 3 - # Child Loop BB0_528 Depth 2 - # Child Loop BB0_559 Depth 3 - # Child Loop BB0_567 Depth 4 - # Child Loop BB0_570 Depth 4 - # Child Loop BB0_583 Depth 2 - ld.d $a0, $s6, 0 - slli.d $a1, $s7, 3 + # Child Loop BB0_100 Depth 3 + # Child Loop BB0_108 Depth 2 + # Child Loop BB0_119 Depth 2 + # Child Loop BB0_122 Depth 2 + # Child Loop BB0_126 Depth 2 + # Child Loop BB0_132 Depth 2 + # Child Loop BB0_139 Depth 2 + # Child Loop BB0_146 Depth 2 + # Child Loop BB0_156 Depth 2 + # Child Loop BB0_171 Depth 3 + # Child Loop BB0_183 Depth 4 + # Child Loop BB0_194 Depth 3 + # Child Loop BB0_199 Depth 3 + # Child Loop BB0_201 Depth 3 + # Child Loop BB0_585 Depth 2 + # Child Loop BB0_214 Depth 2 + # Child Loop BB0_223 Depth 3 + # Child Loop BB0_237 Depth 3 + # Child Loop BB0_244 Depth 3 + # Child Loop BB0_247 Depth 3 + # Child Loop BB0_257 Depth 3 + # Child Loop BB0_261 Depth 3 + # Child Loop BB0_266 Depth 3 + # Child Loop BB0_316 Depth 3 + # Child Loop BB0_320 Depth 3 + # Child Loop BB0_331 Depth 4 + # Child Loop BB0_341 Depth 5 + # Child Loop BB0_346 Depth 4 + # Child Loop BB0_324 Depth 4 + # Child Loop BB0_353 Depth 4 + # Child Loop BB0_365 Depth 5 + # Child Loop BB0_370 Depth 4 + # Child Loop BB0_378 Depth 4 + # Child Loop BB0_306 Depth 3 + # Child Loop BB0_311 Depth 3 + # Child Loop BB0_286 Depth 3 + # Child Loop BB0_291 Depth 3 + # Child Loop BB0_296 Depth 3 + # Child Loop BB0_387 Depth 2 + # Child Loop BB0_400 Depth 2 + # Child Loop BB0_409 Depth 2 + # Child Loop BB0_416 Depth 2 + # Child Loop BB0_426 Depth 2 + # Child Loop BB0_429 Depth 3 + # Child Loop BB0_432 Depth 3 + # Child Loop BB0_438 Depth 3 + # Child Loop BB0_446 Depth 2 + # Child Loop BB0_451 Depth 3 + # Child Loop BB0_453 Depth 4 + # Child Loop BB0_456 Depth 4 + # Child Loop BB0_462 Depth 4 + # Child Loop BB0_471 Depth 3 + # Child Loop BB0_479 Depth 2 + # Child Loop BB0_486 Depth 3 + # Child Loop BB0_488 Depth 4 + # Child Loop BB0_509 Depth 2 + # Child Loop BB0_517 Depth 3 + # Child Loop BB0_527 Depth 2 + # Child Loop BB0_558 Depth 3 + # Child Loop BB0_566 Depth 4 + # Child Loop BB0_569 Depth 4 + # Child Loop BB0_582 Depth 2 + ld.d $a0, $s5, 0 + st.d $a1, $sp, 144 # 8-byte Folded Spill + slli.d $a1, $a1, 3 ldx.d $fp, $a0, $a1 st.d $zero, $fp, 40 ld.d $a1, $s4, 8 - ld.d $a0, $sp, 280 # 8-byte Folded Reload - ld.d $a2, $a0, 16 + ld.d $a2, $s6, 16 move $a0, $fp pcaddu18i $ra, %call36(kill_polyA) jirl $ra, $ra, 0 @@ -198,23 +205,22 @@ SIM4: # @SIM4 beqz $s0, .LBB0_5 # %bb.7: # in Loop: Header=BB0_6 Depth=1 ld.w $a0, $fp, 44 - st.d $fp, $sp, 408 # 8-byte Folded Spill + st.d $fp, $sp, 392 # 8-byte Folded Spill addi.d $t7, $fp, 8 - st.d $s7, $sp, 136 # 8-byte Folded Spill - st.d $t7, $sp, 400 # 8-byte Folded Spill - bnez $a0, .LBB0_115 + st.d $t7, $sp, 384 # 8-byte Folded Spill + bnez $a0, .LBB0_114 # %bb.8: # in Loop: Header=BB0_6 Depth=1 ld.d $a0, $t7, 0 ld.d $t2, $a0, 0 ld.w $a0, $t2, 4 - bltu $a0, $s8, .LBB0_115 + bltu $a0, $s8, .LBB0_114 # %bb.9: # in Loop: Header=BB0_6 Depth=1 addi.w $a1, $a0, -62 ld.d $a2, $sp, 32 # 8-byte Folded Reload bltu $a2, $a1, .LBB0_23 # %bb.10: # in Loop: Header=BB0_6 Depth=1 ld.w $a1, $t2, 0 - ld.d $s1, $sp, 408 # 8-byte Folded Reload + ld.d $s1, $sp, 392 # 8-byte Folded Reload ld.w $a2, $s1, 24 addi.w $a1, $a1, -1 bgeu $a2, $a1, .LBB0_23 @@ -225,13 +231,12 @@ SIM4: # @SIM4 ori $a4, $zero, 10 masknez $a4, $a4, $a3 maskeqz $a1, $a1, $a3 - ld.d $a3, $sp, 280 # 8-byte Folded Reload - ld.d $a3, $a3, 16 + ld.d $a3, $s6, 16 or $a1, $a1, $a4 addi.d $a0, $a0, -1 - st.w $a1, $sp, 468 - st.d $a3, $sp, 440 - st.w $a0, $sp, 448 + st.w $a1, $sp, 452 + st.d $a3, $sp, 424 + st.w $a0, $sp, 432 slli.d $a0, $a1, 1 addi.d $a0, $a0, -2 bstrpick.d $a0, $a0, 31, 1 @@ -239,20 +244,20 @@ SIM4: # @SIM4 ld.d $a1, $sp, 184 # 8-byte Folded Reload sll.w $a0, $a1, $a0 nor $a0, $a0, $zero - st.w $a0, $sp, 464 + st.w $a0, $sp, 448 slli.d $a0, $a2, 2 move $s3, $t2 pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 move $s0, $a0 - st.d $a0, $sp, 456 + st.d $a0, $sp, 440 ori $a1, $zero, 8 lu12i.w $a0, 128 pcaddu18i $ra, %call36(xcalloc) jirl $ra, $ra, 0 move $fp, $a0 - st.d $a0, $sp, 432 - addi.d $a0, $sp, 432 + st.d $a0, $sp, 416 + addi.d $a0, $sp, 416 pcaddu18i $ra, %call36(bld_table) jirl $ra, $ra, 0 ld.d $a0, $s4, 8 @@ -260,15 +265,15 @@ SIM4: # @SIM4 move $s2, $s3 ld.w $a2, $s3, 0 add.d $a1, $a0, $a3 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.w $a5, $a0, 24 nor $a0, $a3, $zero add.w $a2, $a2, $a0 addi.w $a4, $a3, 1 - addi.d $a0, $sp, 432 + addi.d $a0, $sp, 416 ori $a3, $zero, 1 - addi.d $a6, $sp, 488 - addi.d $a7, $sp, 472 + addi.d $a6, $sp, 472 + addi.d $a7, $sp, 456 st.d $a7, $sp, 0 move $a7, $zero pcaddu18i $ra, %call36(exon_cores) @@ -282,7 +287,7 @@ SIM4: # @SIM4 .LBB0_12: # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ldx.d $a0, $fp, $s0 - move $a1, $s5 + move $a1, $s7 pcaddu18i $ra, %call36(tdestroy) jirl $ra, $ra, 0 addi.d $s0, $s0, 8 @@ -292,13 +297,13 @@ SIM4: # @SIM4 move $a0, $fp pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.w $a0, $sp, 480 - ld.d $t7, $sp, 400 # 8-byte Folded Reload + ld.w $a0, $sp, 464 + ld.d $t7, $sp, 384 # 8-byte Folded Reload move $t2, $s2 beqz $a0, .LBB0_22 # %bb.14: # %.lr.ph.preheader.i # in Loop: Header=BB0_6 Depth=1 - ld.d $a1, $sp, 472 + ld.d $a1, $sp, 456 bstrpick.d $a2, $a0, 31, 0 move $a3, $a1 .p2align 4, , 16 @@ -328,9 +333,8 @@ SIM4: # @SIM4 # in Loop: Header=BB0_6 Depth=1 ld.d $a3, $s4, 8 ld.w $a5, $a0, 12 - ld.d $a6, $sp, 280 # 8-byte Folded Reload - ldptr.w $a4, $a6, 4148 - ld.d $a6, $a6, 16 + ldptr.w $a4, $s6, 4148 + ld.d $a6, $s6, 16 bstrpick.d $a7, $a2, 31, 0 bstrpick.d $t0, $a5, 31, 0 sltu $t1, $a4, $a5 @@ -362,25 +366,24 @@ SIM4: # @SIM4 .LBB0_21: # %grow_exon_right.exit # in Loop: Header=BB0_6 Depth=1 ld.w $a3, $s4, 36 - addi.d $a1, $sp, 472 + addi.d $a1, $sp, 456 move $a0, $t7 move $a2, $zero pcaddu18i $ra, %call36(merge) jirl $ra, $ra, 0 - ld.d $t7, $sp, 400 # 8-byte Folded Reload + ld.d $t7, $sp, 384 # 8-byte Folded Reload ld.d $a0, $t7, 0 ld.d $t2, $a0, 0 - st.w $zero, $sp, 480 + st.w $zero, $sp, 464 .LBB0_22: # in Loop: Header=BB0_6 Depth=1 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.w $s0, $a0, 16 beqz $s0, .LBB0_32 .LBB0_23: # %.lr.ph # in Loop: Header=BB0_6 Depth=1 - move $s5, $t2 + move $s7, $t2 ld.d $s2, $t7, 0 - ld.d $a0, $sp, 280 # 8-byte Folded Reload - ld.d $fp, $a0, 16 + ld.d $fp, $s6, 16 move $s1, $zero bstrpick.d $s3, $s0, 31, 0 .p2align 4, , 16 @@ -402,9 +405,9 @@ SIM4: # @SIM4 move $s1, $s0 .LBB0_27: # %.critedge # in Loop: Header=BB0_6 Depth=1 - move $t2, $s5 - ld.d $t7, $sp, 400 # 8-byte Folded Reload - ld.d $s5, $sp, 168 # 8-byte Folded Reload + move $t2, $s7 + ld.d $t7, $sp, 384 # 8-byte Folded Reload + ld.d $s7, $sp, 168 # 8-byte Folded Reload beqz $s1, .LBB0_32 # %bb.28: # %.preheader601.preheader # in Loop: Header=BB0_6 Depth=1 @@ -423,7 +426,7 @@ SIM4: # @SIM4 addi.d $s0, $s0, 8 bne $s2, $s0, .LBB0_29 # %bb.30: # in Loop: Header=BB0_6 Depth=1 - ld.d $s0, $sp, 408 # 8-byte Folded Reload + ld.d $s0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $s0, 8 ld.w $a2, $s0, 16 alsl.d $a1, $fp, $a0, 3 @@ -437,14 +440,14 @@ SIM4: # @SIM4 st.w $a1, $s0, 16 beq $a0, $s1, .LBB0_5 # %bb.31: # in Loop: Header=BB0_6 Depth=1 - ld.d $t7, $sp, 400 # 8-byte Folded Reload + ld.d $t7, $sp, 384 # 8-byte Folded Reload ld.d $a0, $t7, 0 ld.d $t2, $a0, 0 .LBB0_32: # %.critedge.thread # in Loop: Header=BB0_6 Depth=1 ld.wu $a0, $t2, 4 addi.w $s2, $a0, -1 - beqz $s2, .LBB0_115 + beqz $s2, .LBB0_114 # %bb.33: # in Loop: Header=BB0_6 Depth=1 ori $t1, $zero, 1 sltui $a2, $s2, 250 @@ -452,31 +455,30 @@ SIM4: # @SIM4 masknez $a3, $a1, $a2 ld.w $a1, $t2, 0 maskeqz $a2, $s2, $a2 - or $t3, $a2, $a3 - slli.d $a2, $t3, 2 + or $s5, $a2, $a3 + slli.d $a2, $s5, 2 addi.w $a3, $a1, -1 slt $a4, $a2, $a3 - ld.d $a5, $sp, 280 # 8-byte Folded Reload - ld.d $a5, $a5, 16 + ld.d $a5, $s6, 16 maskeqz $a2, $a2, $a4 masknez $a3, $a3, $a4 - or $t4, $a2, $a3 + or $t3, $a2, $a3 add.d $a2, $a5, $a0 ld.d $a4, $s4, 8 - nor $a3, $t3, $zero + nor $a3, $s5, $zero ori $a5, $zero, 2 add.d $s8, $a3, $a2 bstrpick.d $a2, $a1, 31, 0 add.d $a2, $a4, $a2 - nor $a4, $t4, $zero + nor $a4, $t3, $zero add.d $s6, $a4, $a2 ori $s7, $zero, 2 blt $a1, $a5, .LBB0_38 # %bb.34: # %.lr.ph.i367.preheader # in Loop: Header=BB0_6 Depth=1 - move $a2, $t4 - move $s3, $t3 - ld.d $fp, $sp, 376 # 8-byte Folded Reload + move $a2, $t3 + move $s3, $s5 + ld.d $fp, $sp, 360 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_35: # %.lr.ph.i367 # Parent Loop BB0_6 Depth=1 @@ -496,9 +498,9 @@ SIM4: # @SIM4 bne $a5, $t1, .LBB0_35 b .LBB0_39 .LBB0_38: # in Loop: Header=BB0_6 Depth=1 - move $s3, $t3 - move $a2, $t4 - ld.d $fp, $sp, 376 # 8-byte Folded Reload + move $s3, $s5 + move $a2, $t3 + ld.d $fp, $sp, 360 # 8-byte Folded Reload .LBB0_39: # %.critedge.i # in Loop: Header=BB0_6 Depth=1 add.d $s4, $a0, $a3 @@ -515,25 +517,25 @@ SIM4: # @SIM4 addi.w $a0, $a2, 0 beqz $a0, .LBB0_45 # %bb.42: # in Loop: Header=BB0_6 Depth=1 - st.d $a4, $sp, 312 # 8-byte Folded Spill - st.d $s4, $sp, 320 # 8-byte Folded Spill - st.d $t2, $sp, 328 # 8-byte Folded Spill + st.d $a4, $sp, 304 # 8-byte Folded Spill + st.d $s4, $sp, 312 # 8-byte Folded Spill + st.d $t2, $sp, 320 # 8-byte Folded Spill ld.d $a0, $sp, 152 # 8-byte Folded Reload - ld.w $s5, $a0, 36 - addi.w $s4, $t3, 1 - st.d $t3, $sp, 416 # 8-byte Folded Spill - st.d $t4, $sp, 360 # 8-byte Folded Spill - add.w $s1, $t4, $t3 + ld.w $a0, $a0, 36 + st.d $a0, $sp, 400 # 8-byte Folded Spill + addi.w $s4, $s5, 1 + st.d $t3, $sp, 344 # 8-byte Folded Spill + add.w $s1, $t3, $s5 addi.w $s0, $s1, 1 slli.d $fp, $s0, 2 move $a0, $fp pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 - st.d $a0, $sp, 392 # 8-byte Folded Spill + st.d $a0, $sp, 408 # 8-byte Folded Spill move $a0, $fp pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 - st.d $a0, $sp, 424 # 8-byte Folded Spill + move $a3, $a0 bltz $s1, .LBB0_51 # %bb.43: # %.lr.ph279.preheader.i # in Loop: Header=BB0_6 Depth=1 @@ -547,12 +549,11 @@ SIM4: # @SIM4 add.d $s4, $s3, $s4 add.d $a4, $a2, $a4 ori $s8, $zero, 2 - ld.d $s5, $sp, 168 # 8-byte Folded Reload - ld.d $s7, $sp, 136 # 8-byte Folded Reload - b .LBB0_113 + ld.d $s7, $sp, 168 # 8-byte Folded Reload + b .LBB0_112 .LBB0_46: # %vector.ph1145 # in Loop: Header=BB0_6 Depth=1 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload addi.d $a1, $a0, 16 bstrpick.d $a0, $s0, 30, 3 slli.d $a0, $a0, 3 @@ -573,7 +574,7 @@ SIM4: # @SIM4 .LBB0_49: # %.lr.ph279.i.preheader # in Loop: Header=BB0_6 Depth=1 sub.d $a1, $a0, $s0 - ld.d $a2, $sp, 392 # 8-byte Folded Reload + ld.d $a2, $sp, 408 # 8-byte Folded Reload alsl.d $a0, $a0, $a2, 2 .p2align 4, , 16 .LBB0_50: # %.lr.ph279.i @@ -585,12 +586,13 @@ SIM4: # @SIM4 bnez $a1, .LBB0_50 .LBB0_51: # %._crit_edge.i # in Loop: Header=BB0_6 Depth=1 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 344 # 8-byte Folded Reload slli.d $a0, $a0, 2 - ld.d $a1, $sp, 392 # 8-byte Folded Reload + ld.d $a1, $sp, 408 # 8-byte Folded Reload stx.w $s3, $a1, $a0 slli.w $s1, $s4, 2 move $a0, $s1 + st.d $a3, $sp, 352 # 8-byte Folded Spill pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 move $fp, $a0 @@ -598,15 +600,15 @@ SIM4: # @SIM4 pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 move $t1, $fp + ld.d $t3, $sp, 352 # 8-byte Folded Reload move $t5, $a0 ori $a0, $zero, 1 ori $a1, $zero, 8 - ld.d $t2, $sp, 416 # 8-byte Folded Reload bltu $s2, $a1, .LBB0_55 # %bb.52: # %vector.ph1131 # in Loop: Header=BB0_6 Depth=1 - andi $a1, $t2, 248 - srli.d $a2, $t2, 3 + andi $a1, $s5, 248 + srli.d $a2, $s5, 3 ori $a0, $zero, 1 bstrins.d $a0, $a2, 7, 3 vreplgr2vr.w $vr0, $s4 @@ -623,10 +625,10 @@ SIM4: # @SIM4 bnez $a3, .LBB0_53 # %bb.54: # %middle.block1140 # in Loop: Header=BB0_6 Depth=1 - beq $a1, $t2, .LBB0_57 + beq $a1, $s5, .LBB0_57 .LBB0_55: # %scalar.ph1129.preheader # in Loop: Header=BB0_6 Depth=1 - sub.d $a1, $t2, $a0 + sub.d $a1, $s5, $a0 addi.d $a1, $a1, 1 alsl.d $a0, $a0, $t1, 2 .p2align 4, , 16 @@ -639,144 +641,136 @@ SIM4: # @SIM4 bnez $a1, .LBB0_56 .LBB0_57: # %.loopexit1157 # in Loop: Header=BB0_6 Depth=1 - ld.d $t4, $sp, 360 # 8-byte Folded Reload - ld.d $a3, $sp, 392 # 8-byte Folded Reload - alsl.d $t7, $t4, $a3, 2 - ld.w $a0, $t7, 0 + ld.d $t4, $sp, 344 # 8-byte Folded Reload + ld.d $a3, $sp, 408 # 8-byte Folded Reload + alsl.d $t2, $t4, $a3, 2 + ld.w $a0, $t2, 0 move $a1, $zero move $a2, $zero st.w $a0, $t1, 0 st.w $t4, $t5, 0 - bstrpick.d $a0, $s5, 31, 31 - add.w $a0, $s5, $a0 - srai.d $t8, $a0, 1 - slli.w $fp, $s5, 1 + ld.d $a4, $sp, 400 # 8-byte Folded Reload + bstrpick.d $a0, $a4, 31, 31 + add.w $a0, $a4, $a0 + srai.d $t7, $a0, 1 + slli.w $t8, $a4, 1 addi.w $a5, $t4, 2 - addi.d $s1, $t2, 2 - ld.d $a0, $sp, 424 # 8-byte Folded Reload - sub.d $a4, $a3, $a0 - st.d $a4, $sp, 232 # 8-byte Folded Spill + addi.d $s1, $s5, 2 + sub.d $a0, $a3, $t3 + st.d $a0, $sp, 216 # 8-byte Folded Spill addi.w $a7, $t4, -1 - sub.d $s0, $a7, $t2 - addi.d $ra, $a3, 16 - addi.d $a0, $a0, 16 - st.d $a0, $sp, 336 # 8-byte Folded Spill + sub.d $t0, $a7, $s5 + addi.d $fp, $a3, 16 + addi.d $s0, $t3, 16 ori $s2, $zero, 1 - ori $t3, $zero, 2 + ori $a0, $zero, 2 + st.d $a0, $sp, 400 # 8-byte Folded Spill move $s3, $t4 - st.d $t4, $sp, 384 # 8-byte Folded Spill - st.d $t5, $sp, 344 # 8-byte Folded Spill - st.d $t1, $sp, 352 # 8-byte Folded Spill - st.d $t7, $sp, 304 # 8-byte Folded Spill - st.d $t8, $sp, 272 # 8-byte Folded Spill - st.d $fp, $sp, 248 # 8-byte Folded Spill - st.d $ra, $sp, 240 # 8-byte Folded Spill + move $ra, $t4 + st.d $t5, $sp, 328 # 8-byte Folded Spill + st.d $t1, $sp, 336 # 8-byte Folded Spill + st.d $t2, $sp, 296 # 8-byte Folded Spill + st.d $t7, $sp, 272 # 8-byte Folded Spill + st.d $t8, $sp, 256 # 8-byte Folded Spill + st.d $fp, $sp, 232 # 8-byte Folded Spill + st.d $s0, $sp, 224 # 8-byte Folded Spill .LBB0_58: # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB0_74 Depth 3 # Child Loop BB0_83 Depth 4 # Child Loop BB0_92 Depth 3 # Child Loop BB0_97 Depth 3 - # Child Loop BB0_101 Depth 3 + # Child Loop BB0_100 Depth 3 slli.d $a0, $a2, 2 ldx.w $a0, $t1, $a0 move $t6, $s3 - sub.w $a0, $t2, $a0 - ori $a4, $zero, 2 - bge $t8, $a0, .LBB0_61 + sub.w $a4, $s5, $a0 + ori $a0, $zero, 2 + bge $t7, $a4, .LBB0_61 # %bb.59: # in Loop: Header=BB0_58 Depth=2 - bge $a0, $fp, .LBB0_66 + bge $a4, $t8, .LBB0_66 # %bb.60: # in Loop: Header=BB0_58 Depth=2 - ld.d $a0, $sp, 376 # 8-byte Folded Reload - ld.w $a4, $a0, 28 + ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.w $a0, $a0, 28 .LBB0_61: # %good_ratio.exit.i # in Loop: Header=BB0_58 Depth=2 - ori $t0, $zero, 1 - bge $a4, $a2, .LBB0_69 + ori $a3, $zero, 1 + bge $a0, $a2, .LBB0_69 .LBB0_62: # in Loop: Header=BB0_58 Depth=2 - beqz $a2, .LBB0_107 + beqz $a2, .LBB0_106 # %bb.63: # in Loop: Header=BB0_58 Depth=2 addi.d $a0, $a2, -1 slli.d $a4, $a0, 2 ldx.w $a4, $t1, $a4 - sub.w $a4, $t2, $a4 - ori $a6, $zero, 2 - bge $t8, $a4, .LBB0_68 + sub.w $a6, $s5, $a4 + ori $a4, $zero, 2 + bge $t7, $a6, .LBB0_68 # %bb.64: # in Loop: Header=BB0_58 Depth=2 - bge $a4, $fp, .LBB0_67 + bge $a6, $t8, .LBB0_67 # %bb.65: # in Loop: Header=BB0_58 Depth=2 - ld.d $a3, $sp, 376 # 8-byte Folded Reload - ld.w $a6, $a3, 28 + ld.d $a4, $sp, 360 # 8-byte Folded Reload + ld.w $a4, $a4, 28 b .LBB0_68 .LBB0_66: # in Loop: Header=BB0_58 Depth=2 - pcalau12i $a4, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a4, %pc_lo12(.LCPI0_0) - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - vldi $vr2, -912 - fmadd.d $fa0, $fa1, $fa0, $fa2 + movgr2fr.w $fa0, $a4 + ffint.d.w $fa0, $fa0 + vldi $vr1, -912 + fmadd.d $fa0, $fa0, $fs0, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a4, $fa0 - ori $t0, $zero, 1 - blt $a4, $a2, .LBB0_62 + movfr2gr.s $a0, $fa0 + ori $a3, $zero, 1 + blt $a0, $a2, .LBB0_62 b .LBB0_69 .LBB0_67: # in Loop: Header=BB0_58 Depth=2 - pcalau12i $a6, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a6, %pc_lo12(.LCPI0_0) - movgr2fr.w $fa1, $a4 - ffint.d.w $fa1, $fa1 - vldi $vr2, -912 - fmadd.d $fa0, $fa1, $fa0, $fa2 + movgr2fr.w $fa0, $a6 + ffint.d.w $fa0, $fa0 + vldi $vr1, -912 + fmadd.d $fa0, $fa0, $fs0, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a6, $fa0 + movfr2gr.s $a4, $fa0 .LBB0_68: # %good_ratio.exit259.i # in Loop: Header=BB0_58 Depth=2 - blt $a6, $a0, .LBB0_107 + blt $a4, $a0, .LBB0_106 .LBB0_69: # %.critedge7.i # in Loop: Header=BB0_58 Depth=2 - ori $s5, $zero, 1 + ori $a3, $zero, 1 addi.w $s3, $t6, -1 - ld.d $a0, $sp, 384 # 8-byte Folded Reload - addi.w $a0, $a0, 1 - st.d $a0, $sp, 384 # 8-byte Folded Spill - bge $a0, $s3, .LBB0_71 + addi.w $ra, $ra, 1 + st.d $t0, $sp, 368 # 8-byte Folded Spill + bge $ra, $s3, .LBB0_71 # %bb.70: # %._crit_edge291.thread.i # in Loop: Header=BB0_58 Depth=2 - ld.w $a0, $t7, 0 + ld.w $a0, $t2, 0 slli.d $a4, $s2, 2 stx.w $a0, $t1, $a4 stx.w $t4, $t5, $a4 - b .LBB0_102 + b .LBB0_101 .p2align 4, , 16 .LBB0_71: # %.lr.ph290.i # in Loop: Header=BB0_58 Depth=2 - move $t7, $t3 + st.d $ra, $sp, 288 # 8-byte Folded Spill + ld.d $t7, $sp, 400 # 8-byte Folded Reload bstrins.d $t7, $zero, 2, 0 - alsl.d $t8, $a7, $ra, 2 - move $t0, $s0 - ld.d $a0, $sp, 336 # 8-byte Folded Reload - alsl.d $ra, $a7, $a0, 2 - ld.d $a0, $sp, 424 # 8-byte Folded Reload - alsl.d $t1, $a7, $a0, 2 + alsl.d $t8, $a7, $fp, 2 + alsl.d $ra, $a7, $s0, 2 + alsl.d $t1, $a7, $t3, 2 slli.d $a0, $a2, 1 addi.d $a0, $a0, 3 - st.d $a0, $sp, 288 # 8-byte Folded Spill + st.d $a0, $sp, 280 # 8-byte Folded Spill nor $a0, $a2, $zero add.w $t2, $t4, $a0 add.d $a4, $s2, $t4 - ld.d $a0, $sp, 392 # 8-byte Folded Reload - alsl.d $a3, $a4, $a0, 2 - st.d $a3, $sp, 368 # 8-byte Folded Spill + ld.d $a0, $sp, 408 # 8-byte Folded Reload + alsl.d $a6, $a4, $a0, 2 + st.d $a6, $sp, 376 # 8-byte Folded Spill alsl.d $a6, $t2, $a0, 2 - st.d $s0, $sp, 296 # 8-byte Folded Spill move $t5, $s3 b .LBB0_74 .p2align 4, , 16 .LBB0_72: # %.critedge11.thread.i # in Loop: Header=BB0_74 Depth=3 slli.d $a0, $t5, 2 - ld.d $a3, $sp, 424 # 8-byte Folded Reload - stx.w $fp, $a3, $a0 + stx.w $fp, $t3, $a0 .LBB0_73: # in Loop: Header=BB0_74 Depth=3 addi.d $t5, $t5, 1 addi.w $a0, $t5, 0 @@ -794,16 +788,16 @@ SIM4: # @SIM4 .LBB0_76: # in Loop: Header=BB0_74 Depth=3 bne $t5, $a4, .LBB0_78 # %bb.77: # in Loop: Header=BB0_74 Depth=3 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.w $a0, $a0, -4 addi.w $s0, $a0, -1 b .LBB0_81 .p2align 4, , 16 .LBB0_78: # in Loop: Header=BB0_74 Depth=3 - ld.d $a3, $sp, 392 # 8-byte Folded Reload - alsl.d $a0, $t5, $a3, 2 + ld.d $fp, $sp, 408 # 8-byte Folded Reload + alsl.d $a0, $t5, $fp, 2 slli.d $t4, $t5, 2 - ldx.w $t4, $a3, $t4 + ldx.w $t4, $fp, $t4 ld.w $fp, $a0, 4 ld.w $a0, $a0, -4 addi.w $s0, $t4, -1 @@ -815,16 +809,18 @@ SIM4: # @SIM4 addi.w $s0, $a0, -1 slt $a0, $t4, $a0 slt $t4, $s0, $fp - masknez $a3, $fp, $t4 + move $t3, $s5 + masknez $s5, $fp, $t4 maskeqz $t4, $s0, $t4 - or $a3, $t4, $a3 - masknez $a3, $a3, $a0 + or $t4, $t4, $s5 + move $s5, $t3 + ld.d $t3, $sp, 352 # 8-byte Folded Reload + masknez $t4, $t4, $a0 maskeqz $a0, $fp, $a0 - or $s0, $a0, $a3 + or $s0, $a0, $t4 .p2align 4, , 16 .LBB0_81: # in Loop: Header=BB0_74 Depth=3 - ld.d $a0, $sp, 416 # 8-byte Folded Reload - sub.d $a0, $t5, $a0 + sub.d $a0, $t5, $s5 add.w $fp, $s0, $a0 blez $s0, .LBB0_87 # %bb.82: # in Loop: Header=BB0_74 Depth=3 @@ -837,45 +833,43 @@ SIM4: # @SIM4 # => This Inner Loop Header: Depth=4 move $fp, $s0 add.w $a0, $t0, $s0 - add.d $a3, $s8, $s0 - ld.bu $a3, $a3, -1 - add.d $t4, $s6, $a0 + add.d $t4, $s8, $s0 ld.bu $t4, $t4, -1 - bne $a3, $t4, .LBB0_72 + add.d $s0, $s6, $a0 + ld.bu $s0, $s0, -1 + bne $t4, $s0, .LBB0_72 # %bb.84: # in Loop: Header=BB0_83 Depth=4 addi.w $s0, $fp, -1 blt $a0, $s7, .LBB0_86 # %bb.85: # in Loop: Header=BB0_83 Depth=4 - blt $s5, $fp, .LBB0_83 + blt $a3, $fp, .LBB0_83 .LBB0_86: # %.critedge11.i.loopexit # in Loop: Header=BB0_74 Depth=3 add.w $fp, $t0, $s0 .LBB0_87: # %.critedge11.i # in Loop: Header=BB0_74 Depth=3 slli.d $a0, $t5, 2 - or $a3, $s0, $fp - ld.d $t4, $sp, 424 # 8-byte Folded Reload - stx.w $s0, $t4, $a0 - beqz $a3, .LBB0_104 + or $t4, $s0, $fp + stx.w $s0, $t3, $a0 + beqz $t4, .LBB0_103 # %bb.88: # in Loop: Header=BB0_74 Depth=3 - beqz $s0, .LBB0_105 + beqz $s0, .LBB0_104 # %bb.89: # in Loop: Header=BB0_74 Depth=3 bnez $fp, .LBB0_73 - b .LBB0_106 + b .LBB0_105 .LBB0_90: # %._crit_edge291.i # in Loop: Header=BB0_58 Depth=2 - ld.d $a0, $sp, 304 # 8-byte Folded Reload - ld.w $a3, $a0, 0 - ld.d $a4, $sp, 352 # 8-byte Folded Reload - alsl.d $a0, $s2, $a4, 2 + ld.d $a0, $sp, 296 # 8-byte Folded Reload + ld.w $a4, $a0, 0 + ld.d $a3, $sp, 336 # 8-byte Folded Reload + alsl.d $a0, $s2, $a3, 2 slli.d $a6, $s2, 2 - stx.w $a3, $a4, $a6 - ld.d $t5, $sp, 344 # 8-byte Folded Reload + stx.w $a4, $a3, $a6 + ld.d $t5, $sp, 328 # 8-byte Folded Reload alsl.d $a4, $s2, $t5, 2 - ld.d $t4, $sp, 360 # 8-byte Folded Reload + ld.d $t4, $sp, 344 # 8-byte Folded Reload stx.w $t4, $t5, $a6 addi.d $a6, $zero, -3 - ld.d $t2, $sp, 416 # 8-byte Folded Reload b .LBB0_92 .p2align 4, , 16 .LBB0_91: # in Loop: Header=BB0_92 Depth=3 @@ -887,27 +881,26 @@ SIM4: # @SIM4 # Parent Loop BB0_58 Depth=2 # => This Inner Loop Header: Depth=3 ld.w $t0, $t1, 0 - ld.w $a3, $a0, 0 - bge $t0, $a3, .LBB0_91 + ld.w $t2, $a0, 0 + bge $t0, $t2, .LBB0_91 # %bb.93: # in Loop: Header=BB0_92 Depth=3 st.w $t0, $a0, 0 - add.d $a3, $t6, $a6 - addi.d $a3, $a3, 2 - st.w $a3, $a4, 0 + add.d $t0, $t6, $a6 + addi.d $t0, $t0, 2 + st.w $t0, $a4, 0 b .LBB0_91 .LBB0_94: # %.lr.ph298.i.preheader # in Loop: Header=BB0_58 Depth=2 ori $a0, $zero, 3 - bltu $a2, $a0, .LBB0_99 + bltu $a2, $a0, .LBB0_98 # %bb.95: # %.lr.ph298.i.preheader # in Loop: Header=BB0_58 Depth=2 ori $a0, $zero, 32 - ld.d $t1, $sp, 352 # 8-byte Folded Reload - ld.d $a3, $sp, 232 # 8-byte Folded Reload + ld.d $a3, $sp, 216 # 8-byte Folded Reload bltu $a3, $a0, .LBB0_98 # %bb.96: # %vector.ph1117 # in Loop: Header=BB0_58 Depth=2 - ld.d $a0, $sp, 288 # 8-byte Folded Reload + ld.d $a0, $sp, 280 # 8-byte Folded Reload bstrins.d $a0, $zero, 2, 0 add.d $a0, $a0, $s3 .p2align 4, , 16 @@ -923,132 +916,130 @@ SIM4: # @SIM4 addi.d $t8, $t8, 32 addi.d $ra, $ra, 32 bnez $t7, .LBB0_97 - b .LBB0_100 + b .LBB0_99 .LBB0_98: # in Loop: Header=BB0_58 Depth=2 move $a0, $s3 - b .LBB0_100 .LBB0_99: # in Loop: Header=BB0_58 Depth=2 - move $a0, $s3 - ld.d $t1, $sp, 352 # 8-byte Folded Reload -.LBB0_100: # %.lr.ph298.i.preheader1168 - # in Loop: Header=BB0_58 Depth=2 - ld.d $a3, $sp, 392 # 8-byte Folded Reload + ld.d $t1, $sp, 336 # 8-byte Folded Reload + ld.d $t2, $sp, 296 # 8-byte Folded Reload + ld.d $a3, $sp, 408 # 8-byte Folded Reload alsl.d $a4, $a0, $a3, 2 - ld.d $a3, $sp, 424 # 8-byte Folded Reload - alsl.d $a6, $a0, $a3, 2 - ld.d $t7, $sp, 304 # 8-byte Folded Reload - ld.d $t8, $sp, 272 # 8-byte Folded Reload - ld.d $fp, $sp, 248 # 8-byte Folded Reload - ld.d $s0, $sp, 296 # 8-byte Folded Reload - ld.d $ra, $sp, 240 # 8-byte Folded Reload - .p2align 4, , 16 -.LBB0_101: # %.lr.ph298.i + alsl.d $a6, $a0, $t3, 2 + ld.d $t7, $sp, 272 # 8-byte Folded Reload + ld.d $t8, $sp, 256 # 8-byte Folded Reload + ld.d $fp, $sp, 232 # 8-byte Folded Reload + ld.d $s0, $sp, 224 # 8-byte Folded Reload + ld.d $ra, $sp, 288 # 8-byte Folded Reload + .p2align 4, , 16 +.LBB0_100: # %.lr.ph298.i # Parent Loop BB0_6 Depth=1 # Parent Loop BB0_58 Depth=2 # => This Inner Loop Header: Depth=3 - ld.w $a3, $a6, 0 - st.w $a3, $a4, 0 + ld.w $t0, $a6, 0 + st.w $t0, $a4, 0 addi.w $a0, $a0, 1 addi.d $a4, $a4, 4 addi.d $a6, $a6, 4 - bne $a5, $a0, .LBB0_101 -.LBB0_102: # %.loopexit.i + bne $a5, $a0, .LBB0_100 +.LBB0_101: # %.loopexit.i # in Loop: Header=BB0_58 Depth=2 addi.d $s2, $s2, 1 addi.w $a5, $a5, 1 addi.d $a2, $a2, 1 - addi.d $s0, $s0, -1 + ld.d $t0, $sp, 368 # 8-byte Folded Reload + addi.d $t0, $t0, -1 addi.w $a1, $a1, 2 addi.w $a7, $a7, -1 - addi.d $t3, $t3, 2 + ld.d $a0, $sp, 400 # 8-byte Folded Reload + addi.d $a0, $a0, 2 + st.d $a0, $sp, 400 # 8-byte Folded Spill bne $a2, $s4, .LBB0_58 -# %bb.103: # in Loop: Header=BB0_6 Depth=1 - ori $t0, $zero, 1 - b .LBB0_108 -.LBB0_104: # in Loop: Header=BB0_6 Depth=1 - ld.d $a0, $sp, 392 # 8-byte Folded Reload +# %bb.102: # in Loop: Header=BB0_6 Depth=1 + ori $a3, $zero, 1 + b .LBB0_107 +.LBB0_103: # in Loop: Header=BB0_6 Depth=1 + ld.d $a0, $sp, 408 # 8-byte Folded Reload + move $fp, $t3 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + move $a0, $fp pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 336 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 move $s1, $s2 - ld.d $fp, $sp, 376 # 8-byte Folded Reload + ld.d $fp, $sp, 360 # 8-byte Folded Reload ori $s8, $zero, 2 - ld.d $s5, $sp, 168 # 8-byte Folded Reload - ld.d $s7, $sp, 136 # 8-byte Folded Reload - ld.d $t7, $sp, 400 # 8-byte Folded Reload - ld.d $t2, $sp, 328 # 8-byte Folded Reload - ld.d $s4, $sp, 320 # 8-byte Folded Reload - ld.d $a4, $sp, 312 # 8-byte Folded Reload - b .LBB0_113 -.LBB0_105: # in Loop: Header=BB0_6 Depth=1 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $s7, $sp, 168 # 8-byte Folded Reload + ld.d $t7, $sp, 384 # 8-byte Folded Reload + ld.d $t2, $sp, 320 # 8-byte Folded Reload + ld.d $s4, $sp, 312 # 8-byte Folded Reload + ld.d $a4, $sp, 304 # 8-byte Folded Reload + b .LBB0_112 +.LBB0_104: # in Loop: Header=BB0_6 Depth=1 + ld.d $a0, $sp, 408 # 8-byte Folded Reload + move $s0, $t3 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + move $a0, $s0 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 336 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a4, $sp, 312 # 8-byte Folded Reload + ld.d $a4, $sp, 304 # 8-byte Folded Reload add.d $a4, $fp, $a4 move $s1, $s2 - ld.d $fp, $sp, 376 # 8-byte Folded Reload + ld.d $fp, $sp, 360 # 8-byte Folded Reload ori $s8, $zero, 2 - ld.d $s5, $sp, 168 # 8-byte Folded Reload - ld.d $s7, $sp, 136 # 8-byte Folded Reload - ld.d $t7, $sp, 400 # 8-byte Folded Reload - ld.d $t2, $sp, 328 # 8-byte Folded Reload - ld.d $s4, $sp, 320 # 8-byte Folded Reload - b .LBB0_113 -.LBB0_106: # in Loop: Header=BB0_6 Depth=1 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $s7, $sp, 168 # 8-byte Folded Reload + ld.d $t7, $sp, 384 # 8-byte Folded Reload + ld.d $t2, $sp, 320 # 8-byte Folded Reload + ld.d $s4, $sp, 312 # 8-byte Folded Reload + b .LBB0_112 +.LBB0_105: # in Loop: Header=BB0_6 Depth=1 + ld.d $a0, $sp, 408 # 8-byte Folded Reload + move $fp, $t3 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + move $a0, $fp pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 336 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $s4, $sp, 320 # 8-byte Folded Reload + ld.d $s4, $sp, 312 # 8-byte Folded Reload add.d $s4, $s0, $s4 move $s1, $s2 - ld.d $fp, $sp, 376 # 8-byte Folded Reload + ld.d $fp, $sp, 360 # 8-byte Folded Reload ori $s8, $zero, 2 - ld.d $s5, $sp, 168 # 8-byte Folded Reload - ld.d $s7, $sp, 136 # 8-byte Folded Reload - ld.d $t7, $sp, 400 # 8-byte Folded Reload - ld.d $t2, $sp, 328 # 8-byte Folded Reload - ld.d $a4, $sp, 312 # 8-byte Folded Reload - b .LBB0_113 -.LBB0_107: # %.critedge5.split.loop.exit365.i + ld.d $s7, $sp, 168 # 8-byte Folded Reload + ld.d $t7, $sp, 384 # 8-byte Folded Reload + ld.d $t2, $sp, 320 # 8-byte Folded Reload + ld.d $a4, $sp, 304 # 8-byte Folded Reload + b .LBB0_112 +.LBB0_106: # %.critedge5.split.loop.exit365.i # in Loop: Header=BB0_6 Depth=1 move $s1, $s2 -.LBB0_108: # %.critedge5.i.preheader +.LBB0_107: # %.critedge5.i.preheader # in Loop: Header=BB0_6 Depth=1 - ld.d $fp, $sp, 376 # 8-byte Folded Reload + ld.d $fp, $sp, 360 # 8-byte Folded Reload ori $s8, $zero, 2 - ld.d $s5, $sp, 168 # 8-byte Folded Reload - ld.d $s7, $sp, 136 # 8-byte Folded Reload - ld.d $s4, $sp, 320 # 8-byte Folded Reload - ld.d $a3, $sp, 312 # 8-byte Folded Reload + ld.d $s7, $sp, 168 # 8-byte Folded Reload + ld.d $s4, $sp, 312 # 8-byte Folded Reload + ld.d $a4, $sp, 304 # 8-byte Folded Reload .p2align 4, , 16 -.LBB0_109: # %.critedge5.i +.LBB0_108: # %.critedge5.i # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 addi.w $a0, $s1, 0 @@ -1057,20 +1048,20 @@ SIM4: # @SIM4 ld.w $a1, $a1, -4 ldx.w $a2, $t1, $a2 sub.w $a1, $a1, $a2 - blt $s8, $a1, .LBB0_112 -# %bb.110: # in Loop: Header=BB0_109 Depth=2 + blt $s8, $a1, .LBB0_111 +# %bb.109: # in Loop: Header=BB0_108 Depth=2 addi.d $s1, $s1, -1 - blt $t0, $a0, .LBB0_109 -# %bb.111: # in Loop: Header=BB0_6 Depth=1 + blt $a3, $a0, .LBB0_108 +# %bb.110: # in Loop: Header=BB0_6 Depth=1 move $s1, $zero -.LBB0_112: # %.critedge15.i +.LBB0_111: # %.critedge15.i # in Loop: Header=BB0_6 Depth=1 addi.w $a0, $s1, 0 slli.d $a0, $a0, 2 ldx.w $a1, $t1, $a0 ldx.w $a0, $t5, $a0 add.d $s4, $a1, $s4 - sub.d $a2, $a3, $t2 + sub.d $a2, $a4, $s5 add.d $a1, $a2, $a1 add.d $s0, $a1, $a0 move $a0, $t1 @@ -1080,16 +1071,16 @@ SIM4: # @SIM4 move $a0, $s2 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 move $a4, $s0 - ld.d $t7, $sp, 400 # 8-byte Folded Reload - ld.d $t2, $sp, 328 # 8-byte Folded Reload -.LBB0_113: # %extend_bw.exit + ld.d $t7, $sp, 384 # 8-byte Folded Reload + ld.d $t2, $sp, 320 # 8-byte Folded Reload +.LBB0_112: # %extend_bw.exit # in Loop: Header=BB0_6 Depth=1 ld.w $a0, $t2, 4 ld.w $a1, $fp, 56 @@ -1099,19 +1090,19 @@ SIM4: # @SIM4 mul.d $a0, $a0, $a1 mul.d $a1, $a2, $s1 add.w $a0, $a1, $a0 - bltz $a0, .LBB0_115 -# %bb.114: # in Loop: Header=BB0_6 Depth=1 + bltz $a0, .LBB0_114 +# %bb.113: # in Loop: Header=BB0_6 Depth=1 addi.d $a0, $s4, 1 st.w $a0, $t2, 4 addi.d $a0, $a4, 1 st.w $a0, $t2, 0 .p2align 4, , 16 -.LBB0_115: # %.thread549 +.LBB0_114: # %.thread549 # in Loop: Header=BB0_6 Depth=1 - ld.d $s4, $sp, 408 # 8-byte Folded Reload + ld.d $s4, $sp, 392 # 8-byte Folded Reload ld.w $a0, $s4, 40 - bnez $a0, .LBB0_211 -# %bb.116: # in Loop: Header=BB0_6 Depth=1 + bnez $a0, .LBB0_210 +# %bb.115: # in Loop: Header=BB0_6 Depth=1 ld.w $s0, $s4, 16 ld.d $a0, $s4, 8 addi.d $a1, $s0, -1 @@ -1119,35 +1110,35 @@ SIM4: # @SIM4 slli.d $a1, $a1, 3 ldx.d $s6, $a0, $a1 ld.w $a1, $s6, 12 - ld.d $a0, $sp, 280 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload ldptr.w $a0, $a0, 4148 - bgeu $a1, $a0, .LBB0_211 -# %bb.117: # in Loop: Header=BB0_6 Depth=1 + bgeu $a1, $a0, .LBB0_210 +# %bb.116: # in Loop: Header=BB0_6 Depth=1 sub.w $a0, $a0, $a1 ori $a2, $zero, 61 - bltu $a0, $a2, .LBB0_131 -# %bb.118: # in Loop: Header=BB0_6 Depth=1 + bltu $a0, $a2, .LBB0_130 +# %bb.117: # in Loop: Header=BB0_6 Depth=1 ld.w $a2, $s4, 24 ld.w $a3, $s4, 28 ld.w $a4, $s6, 8 add.w $a2, $a3, $a2 - ld.d $s2, $sp, 376 # 8-byte Folded Reload - bgeu $a4, $a2, .LBB0_131 -# %bb.119: # in Loop: Header=BB0_6 Depth=1 + ld.d $s2, $sp, 360 # 8-byte Folded Reload + bgeu $a4, $a2, .LBB0_130 +# %bb.118: # in Loop: Header=BB0_6 Depth=1 ld.d $s1, $sp, 152 # 8-byte Folded Reload ld.w $a2, $s1, 36 bstrpick.d $a1, $a1, 31, 0 sltui $a3, $a2, 10 - ld.d $a4, $sp, 280 # 8-byte Folded Reload + ld.d $a4, $sp, 264 # 8-byte Folded Reload ld.d $a4, $a4, 16 maskeqz $a2, $a2, $a3 ori $a5, $zero, 10 masknez $a3, $a5, $a3 or $a2, $a2, $a3 add.d $a1, $a4, $a1 - st.w $a2, $sp, 468 - st.d $a1, $sp, 440 - st.w $a0, $sp, 448 + st.w $a2, $sp, 452 + st.d $a1, $sp, 424 + st.w $a0, $sp, 432 slli.d $a1, $a2, 1 addi.d $a1, $a1, -2 bstrpick.d $a1, $a1, 31, 1 @@ -1155,21 +1146,21 @@ SIM4: # @SIM4 addi.d $a2, $zero, -1 sll.w $a1, $a2, $a1 nor $a1, $a1, $zero - st.w $a1, $sp, 464 + st.w $a1, $sp, 448 addi.d $a0, $a0, 1 bstrpick.d $a0, $a0, 31, 0 slli.d $a0, $a0, 2 pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 move $s0, $a0 - st.d $a0, $sp, 456 + st.d $a0, $sp, 440 ori $a1, $zero, 8 lu12i.w $a0, 128 pcaddu18i $ra, %call36(xcalloc) jirl $ra, $ra, 0 move $fp, $a0 - st.d $a0, $sp, 432 - addi.d $a0, $sp, 432 + st.d $a0, $sp, 416 + addi.d $a0, $sp, 416 pcaddu18i $ra, %call36(bld_table) jirl $ra, $ra, 0 ld.d $a0, $s1, 8 @@ -1183,9 +1174,9 @@ SIM4: # @SIM4 add.w $a2, $a2, $a0 addi.w $a3, $a3, 1 addi.w $a4, $a4, 1 - addi.d $a0, $sp, 432 - addi.d $a6, $sp, 488 - addi.d $a7, $sp, 472 + addi.d $a0, $sp, 416 + addi.d $a6, $sp, 472 + addi.d $a7, $sp, 456 st.d $a7, $sp, 0 move $a7, $zero pcaddu18i $ra, %call36(exon_cores) @@ -1196,29 +1187,29 @@ SIM4: # @SIM4 move $s0, $zero lu12i.w $s1, 1024 .p2align 4, , 16 -.LBB0_120: # Parent Loop BB0_6 Depth=1 +.LBB0_119: # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ldx.d $a0, $fp, $s0 - move $a1, $s5 + move $a1, $s7 pcaddu18i $ra, %call36(tdestroy) jirl $ra, $ra, 0 addi.d $s0, $s0, 8 - bne $s0, $s1, .LBB0_120 -# %bb.121: # %free_hash_env.exit372 + bne $s0, $s1, .LBB0_119 +# %bb.120: # %free_hash_env.exit372 # in Loop: Header=BB0_6 Depth=1 move $a0, $fp pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.wu $a2, $sp, 480 - beqz $a2, .LBB0_203 -# %bb.122: # %.lr.ph.preheader.i374 + ld.wu $a2, $sp, 464 + beqz $a2, .LBB0_202 +# %bb.121: # %.lr.ph.preheader.i374 # in Loop: Header=BB0_6 Depth=1 - ld.d $a6, $sp, 472 + ld.d $a6, $sp, 456 move $a1, $zero slli.d $a2, $a2, 3 - ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload .p2align 4, , 16 -.LBB0_123: # %.lr.ph.i376 +.LBB0_122: # %.lr.ph.i376 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ldx.d $a3, $a6, $a1 @@ -1229,20 +1220,20 @@ SIM4: # @SIM4 rotri.d $a4, $a5, 32 addi.d $a1, $a1, 8 st.d $a4, $a3, 8 - bne $a2, $a1, .LBB0_123 -# %bb.124: # %swap_seqs.exit381 + bne $a2, $a1, .LBB0_122 +# %bb.123: # %swap_seqs.exit381 # in Loop: Header=BB0_6 Depth=1 ld.d $t5, $a6, 0 ld.w $a7, $t5, 0 ld.d $t4, $sp, 152 # 8-byte Folded Reload - bltu $a7, $s8, .LBB0_130 -# %bb.125: # %swap_seqs.exit381 + bltu $a7, $s8, .LBB0_129 +# %bb.124: # %swap_seqs.exit381 # in Loop: Header=BB0_6 Depth=1 ld.wu $a6, $t5, 4 - bltu $a6, $s8, .LBB0_130 -# %bb.126: # %.lr.ph.preheader.i382 + bltu $a6, $s8, .LBB0_129 +# %bb.125: # %.lr.ph.preheader.i382 # in Loop: Header=BB0_6 Depth=1 - ld.d $a1, $sp, 280 # 8-byte Folded Reload + ld.d $a1, $sp, 264 # 8-byte Folded Reload ld.d $a1, $a1, 16 ld.d $a2, $t4, 8 move $a3, $zero @@ -1252,52 +1243,52 @@ SIM4: # @SIM4 addi.d $a6, $a6, -1 addi.d $a7, $a7, -1 .p2align 4, , 16 -.LBB0_127: # %.lr.ph.i383 +.LBB0_126: # %.lr.ph.i383 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 add.d $t0, $a4, $a3 add.d $t1, $a5, $a3 ld.bu $t2, $t1, -2 ld.bu $t3, $t0, -2 - bne $t2, $t3, .LBB0_130 -# %bb.128: # in Loop: Header=BB0_127 Depth=2 + bne $t2, $t3, .LBB0_129 +# %bb.127: # in Loop: Header=BB0_126 Depth=2 addi.d $t1, $t1, -3 st.w $a7, $t5, 0 st.w $a6, $t5, 4 - bltu $t1, $a2, .LBB0_130 -# %bb.129: # in Loop: Header=BB0_127 Depth=2 + bltu $t1, $a2, .LBB0_129 +# %bb.128: # in Loop: Header=BB0_126 Depth=2 addi.d $t0, $t0, -3 addi.d $a6, $a6, -1 addi.d $a7, $a7, -1 addi.d $a3, $a3, -1 - bgeu $t0, $a1, .LBB0_127 -.LBB0_130: # %grow_exon_left.exit + bgeu $t0, $a1, .LBB0_126 +.LBB0_129: # %grow_exon_left.exit # in Loop: Header=BB0_6 Depth=1 ld.w $a2, $s4, 16 ld.w $a3, $t4, 36 - addi.d $a1, $sp, 472 + addi.d $a1, $sp, 456 pcaddu18i $ra, %call36(merge) jirl $ra, $ra, 0 - ld.d $t7, $sp, 400 # 8-byte Folded Reload + ld.d $t7, $sp, 384 # 8-byte Folded Reload ld.w $s0, $s4, 16 ld.d $a0, $s4, 8 addi.d $a1, $s0, -1 bstrpick.d $a1, $a1, 31, 0 slli.d $a1, $a1, 3 ldx.d $s6, $a0, $a1 - st.w $zero, $sp, 480 -.LBB0_131: # in Loop: Header=BB0_6 Depth=1 - ld.d $a0, $sp, 280 # 8-byte Folded Reload - beqz $s0, .LBB0_144 -.LBB0_132: # %.lr.ph660 + st.w $zero, $sp, 464 +.LBB0_130: # in Loop: Header=BB0_6 Depth=1 + beqz $s0, .LBB0_143 +.LBB0_131: # %.lr.ph660 # in Loop: Header=BB0_6 Depth=1 ld.d $s2, $t7, 0 + ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $fp, $a0, 16 move $s1, $zero bstrpick.d $s3, $s0, 31, 0 addi.w $s4, $s0, -1 .p2align 4, , 16 -.LBB0_133: # Parent Loop BB0_6 Depth=1 +.LBB0_132: # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 bstrpick.d $a0, $s4, 31, 0 slli.d $a0, $a0, 3 @@ -1307,36 +1298,36 @@ SIM4: # @SIM4 move $a2, $fp pcaddu18i $ra, %call36(is_polyAT_exon_p) jirl $ra, $ra, 0 - beqz $a0, .LBB0_136 -# %bb.134: # in Loop: Header=BB0_133 Depth=2 + beqz $a0, .LBB0_135 +# %bb.133: # in Loop: Header=BB0_132 Depth=2 addi.w $s1, $s1, 1 addi.d $s3, $s3, -1 addi.w $s4, $s4, -1 - bnez $s3, .LBB0_133 -# %bb.135: # in Loop: Header=BB0_6 Depth=1 + bnez $s3, .LBB0_132 +# %bb.134: # in Loop: Header=BB0_6 Depth=1 move $s1, $s0 -.LBB0_136: # %.critedge7 +.LBB0_135: # %.critedge7 # in Loop: Header=BB0_6 Depth=1 - ld.d $a0, $sp, 280 # 8-byte Folded Reload - ld.d $s4, $sp, 408 # 8-byte Folded Reload - ld.d $t7, $sp, 400 # 8-byte Folded Reload - beqz $s1, .LBB0_144 -# %bb.137: # in Loop: Header=BB0_6 Depth=1 + ld.d $s4, $sp, 392 # 8-byte Folded Reload + ld.d $t7, $sp, 384 # 8-byte Folded Reload + beqz $s1, .LBB0_143 +# %bb.136: # in Loop: Header=BB0_6 Depth=1 sub.w $a0, $s0, $s1 - bgeu $s0, $s1, .LBB0_139 -# %bb.138: # %._crit_edge.thread + bgeu $s0, $s1, .LBB0_138 +# %bb.137: # %._crit_edge.thread # in Loop: Header=BB0_6 Depth=1 st.w $a0, $s4, 16 - b .LBB0_143 -.LBB0_139: # %.lr.ph667.preheader + b .LBB0_142 +.LBB0_138: # %.lr.ph667.preheader # in Loop: Header=BB0_6 Depth=1 bstrpick.d $fp, $a0, 31, 0 slli.d $s0, $fp, 3 move $s2, $s4 ld.d $s4, $sp, 152 # 8-byte Folded Reload - ld.d $s6, $sp, 80 # 8-byte Folded Reload + ld.d $s5, $sp, 80 # 8-byte Folded Reload + ld.d $s6, $sp, 264 # 8-byte Folded Reload .p2align 4, , 16 -.LBB0_140: # %.lr.ph667 +.LBB0_139: # %.lr.ph667 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $a0, $s2, 8 @@ -1346,29 +1337,29 @@ SIM4: # @SIM4 ld.wu $a1, $s2, 16 addi.d $fp, $fp, 1 addi.d $s0, $s0, 8 - bltu $fp, $a1, .LBB0_140 -# %bb.141: # %._crit_edge + bltu $fp, $a1, .LBB0_139 +# %bb.140: # %._crit_edge # in Loop: Header=BB0_6 Depth=1 sub.w $a0, $a1, $s1 addi.w $a1, $a1, 0 st.w $a0, $s2, 16 beq $a1, $s1, .LBB0_5 -# %bb.142: # in Loop: Header=BB0_6 Depth=1 +# %bb.141: # in Loop: Header=BB0_6 Depth=1 move $s4, $s2 - ld.d $t7, $sp, 400 # 8-byte Folded Reload -.LBB0_143: # in Loop: Header=BB0_6 Depth=1 + ld.d $t7, $sp, 384 # 8-byte Folded Reload +.LBB0_142: # in Loop: Header=BB0_6 Depth=1 ld.d $a1, $t7, 0 addi.d $a0, $a0, -1 bstrpick.d $a0, $a0, 31, 0 slli.d $a0, $a0, 3 ldx.d $s6, $a1, $a0 - ld.d $a0, $sp, 280 # 8-byte Folded Reload -.LBB0_144: # %.critedge7.thread +.LBB0_143: # %.critedge7.thread # in Loop: Header=BB0_6 Depth=1 + ld.d $a0, $sp, 264 # 8-byte Folded Reload ldptr.w $a0, $a0, 4148 ld.w $a6, $s6, 12 - beq $a0, $a6, .LBB0_211 -# %bb.145: # in Loop: Header=BB0_6 Depth=1 + beq $a0, $a6, .LBB0_210 +# %bb.144: # in Loop: Header=BB0_6 Depth=1 sub.w $a0, $a0, $a6 sltui $a1, $a0, 250 maskeqz $a0, $a0, $a1 @@ -1378,7 +1369,7 @@ SIM4: # @SIM4 ld.wu $a7, $s6, 8 ld.d $a4, $sp, 152 # 8-byte Folded Reload ld.w $a0, $a4, 16 - ld.d $a1, $sp, 280 # 8-byte Folded Reload + ld.d $a1, $sp, 264 # 8-byte Folded Reload ld.d $t1, $a1, 16 ld.d $a1, $a4, 8 slli.d $s5, $s4, 2 @@ -1391,8 +1382,8 @@ SIM4: # @SIM4 bstrpick.d $t2, $a6, 31, 0 add.d $s8, $a1, $a7 move $s7, $zero - blez $a0, .LBB0_152 -# %bb.146: # %.lr.ph.preheader.i403 + blez $a0, .LBB0_151 +# %bb.145: # %.lr.ph.preheader.i403 # in Loop: Header=BB0_6 Depth=1 add.d $a0, $t1, $t2 addi.w $a1, $t0, -1 @@ -1406,45 +1397,45 @@ SIM4: # @SIM4 sub.d $a2, $zero, $a2 move $a3, $s8 .p2align 4, , 16 -.LBB0_147: # %.lr.ph.i405 +.LBB0_146: # %.lr.ph.i405 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ld.bu $a4, $a0, 0 ld.bu $a5, $a3, 0 - bne $a4, $a5, .LBB0_150 -# %bb.148: # in Loop: Header=BB0_147 Depth=2 + bne $a4, $a5, .LBB0_149 +# %bb.147: # in Loop: Header=BB0_146 Depth=2 addi.w $s7, $s7, 1 addi.d $a2, $a2, 1 addi.d $a3, $a3, 1 addi.d $a0, $a0, 1 - bnez $a2, .LBB0_147 -# %bb.149: # in Loop: Header=BB0_6 Depth=1 + bnez $a2, .LBB0_146 +# %bb.148: # in Loop: Header=BB0_6 Depth=1 move $s7, $a1 -.LBB0_150: # %.critedge.i407 +.LBB0_149: # %.critedge.i407 # in Loop: Header=BB0_6 Depth=1 - bne $s7, $s4, .LBB0_152 -# %bb.151: # in Loop: Header=BB0_6 Depth=1 + bne $s7, $s4, .LBB0_151 +# %bb.150: # in Loop: Header=BB0_6 Depth=1 move $s5, $zero add.d $fp, $a7, $s4 ori $s8, $zero, 2 - b .LBB0_209 -.LBB0_152: # %.critedge.thread.i + b .LBB0_208 +.LBB0_151: # %.critedge.thread.i # in Loop: Header=BB0_6 Depth=1 addi.w $s3, $t0, 0 - bne $s7, $s3, .LBB0_154 -# %bb.153: # in Loop: Header=BB0_6 Depth=1 + bne $s7, $s3, .LBB0_153 +# %bb.152: # in Loop: Header=BB0_6 Depth=1 move $s5, $zero add.d $fp, $t0, $a7 move $s4, $t0 ori $s8, $zero, 2 - b .LBB0_209 -.LBB0_154: # in Loop: Header=BB0_6 Depth=1 - st.d $t3, $sp, 384 # 8-byte Folded Spill - st.d $t2, $sp, 392 # 8-byte Folded Spill - st.d $t1, $sp, 416 # 8-byte Folded Spill - st.d $a7, $sp, 320 # 8-byte Folded Spill - st.d $a6, $sp, 328 # 8-byte Folded Spill - st.d $t0, $sp, 312 # 8-byte Folded Spill + b .LBB0_208 +.LBB0_153: # in Loop: Header=BB0_6 Depth=1 + st.d $t3, $sp, 368 # 8-byte Folded Spill + st.d $t2, $sp, 376 # 8-byte Folded Spill + st.d $t1, $sp, 400 # 8-byte Folded Spill + st.d $a7, $sp, 304 # 8-byte Folded Spill + st.d $a6, $sp, 312 # 8-byte Folded Spill + st.d $t0, $sp, 296 # 8-byte Folded Spill add.w $s2, $t0, $s4 addi.w $a0, $s2, 1 slli.d $s1, $a0, 2 @@ -1455,20 +1446,20 @@ SIM4: # @SIM4 move $a0, $s1 pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 - st.d $a0, $sp, 424 # 8-byte Folded Spill - bltz $s2, .LBB0_156 -# %bb.155: # %.lr.ph276.preheader.i + st.d $a0, $sp, 408 # 8-byte Folded Spill + bltz $s2, .LBB0_155 +# %bb.154: # %.lr.ph276.preheader.i # in Loop: Header=BB0_6 Depth=1 ori $a1, $zero, 255 move $a0, $fp move $a2, $s1 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 -.LBB0_156: # %._crit_edge.i387 +.LBB0_155: # %._crit_edge.i387 # in Loop: Header=BB0_6 Depth=1 addi.w $s0, $s4, 1 alsl.d $a0, $s4, $fp, 2 - st.d $a0, $sp, 352 # 8-byte Folded Spill + st.d $a0, $sp, 336 # 8-byte Folded Spill stx.w $s7, $fp, $s5 slli.w $s1, $s0, 2 move $a0, $s1 @@ -1488,116 +1479,112 @@ SIM4: # @SIM4 ldx.w $a1, $fp, $s5 move $t0, $zero st.w $a1, $s2, 0 - st.d $s1, $sp, 360 # 8-byte Folded Spill + st.d $s1, $sp, 344 # 8-byte Folded Spill st.w $s4, $s1, 0 - ld.d $a2, $sp, 384 # 8-byte Folded Reload + ld.d $a2, $sp, 368 # 8-byte Folded Reload bstrpick.d $a1, $a2, 31, 31 add.w $a1, $a2, $a1 srai.d $t8, $a1, 1 slli.w $s0, $a2, 1 addi.d $s5, $s4, 2 - ld.d $a1, $sp, 424 # 8-byte Folded Reload + ld.d $a1, $sp, 408 # 8-byte Folded Reload sub.d $a2, $fp, $a1 - st.d $a2, $sp, 240 # 8-byte Folded Spill - ld.d $a2, $sp, 416 # 8-byte Folded Reload - ld.d $a3, $sp, 392 # 8-byte Folded Reload + st.d $a2, $sp, 224 # 8-byte Folded Spill + ld.d $a2, $sp, 400 # 8-byte Folded Reload + ld.d $a3, $sp, 376 # 8-byte Folded Reload add.d $a2, $a2, $a3 - st.d $a2, $sp, 336 # 8-byte Folded Spill + st.d $a2, $sp, 320 # 8-byte Folded Spill addi.d $a5, $s4, -1 addi.d $a2, $zero, -1 alsl.d $a1, $s4, $a1, 2 addi.d $a7, $a1, -4 addi.d $a1, $a1, 12 - ld.d $a3, $sp, 352 # 8-byte Folded Reload + ld.d $a3, $sp, 336 # 8-byte Folded Reload addi.d $t1, $a3, 12 ori $s1, $zero, 1 ori $t2, $zero, 3 ori $t3, $zero, 2 move $t4, $s5 - st.d $s4, $sp, 416 # 8-byte Folded Spill move $a6, $s4 + st.d $s4, $sp, 400 # 8-byte Folded Spill move $t7, $t6 move $t5, $fp move $fp, $a1 - st.d $t5, $sp, 392 # 8-byte Folded Spill - st.d $s2, $sp, 344 # 8-byte Folded Spill - st.d $t6, $sp, 288 # 8-byte Folded Spill - st.d $t8, $sp, 272 # 8-byte Folded Spill - st.d $s0, $sp, 248 # 8-byte Folded Spill -.LBB0_157: # Parent Loop BB0_6 Depth=1 + st.d $t5, $sp, 376 # 8-byte Folded Spill + st.d $s2, $sp, 328 # 8-byte Folded Spill + st.d $t6, $sp, 272 # 8-byte Folded Spill + st.d $t8, $sp, 256 # 8-byte Folded Spill + st.d $s0, $sp, 232 # 8-byte Folded Spill +.LBB0_156: # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_172 Depth 3 - # Child Loop BB0_184 Depth 4 - # Child Loop BB0_195 Depth 3 - # Child Loop BB0_200 Depth 3 - # Child Loop BB0_202 Depth 3 + # Child Loop BB0_171 Depth 3 + # Child Loop BB0_183 Depth 4 + # Child Loop BB0_194 Depth 3 + # Child Loop BB0_199 Depth 3 + # Child Loop BB0_201 Depth 3 slli.d $a1, $t0, 2 - ldx.w $a1, $s2, $a1 - ori $a3, $zero, 2 - bge $t8, $a1, .LBB0_160 -# %bb.158: # in Loop: Header=BB0_157 Depth=2 - bge $a1, $s0, .LBB0_165 -# %bb.159: # in Loop: Header=BB0_157 Depth=2 - ld.d $a1, $sp, 376 # 8-byte Folded Reload - ld.w $a3, $a1, 28 -.LBB0_160: # %good_ratio.exit.i389 - # in Loop: Header=BB0_157 Depth=2 - bge $a3, $t0, .LBB0_168 -.LBB0_161: # in Loop: Header=BB0_157 Depth=2 - beqz $t0, .LBB0_584 -# %bb.162: # in Loop: Header=BB0_157 Depth=2 + ldx.w $a3, $s2, $a1 + ori $a1, $zero, 2 + bge $t8, $a3, .LBB0_159 +# %bb.157: # in Loop: Header=BB0_156 Depth=2 + bge $a3, $s0, .LBB0_164 +# %bb.158: # in Loop: Header=BB0_156 Depth=2 + ld.d $a1, $sp, 360 # 8-byte Folded Reload + ld.w $a1, $a1, 28 +.LBB0_159: # %good_ratio.exit.i389 + # in Loop: Header=BB0_156 Depth=2 + bge $a1, $t0, .LBB0_167 +.LBB0_160: # in Loop: Header=BB0_156 Depth=2 + beqz $t0, .LBB0_583 +# %bb.161: # in Loop: Header=BB0_156 Depth=2 addi.d $a1, $t0, -1 slli.d $a3, $a1, 2 - ldx.w $a3, $s2, $a3 - ori $a4, $zero, 2 - bge $t8, $a3, .LBB0_167 -# %bb.163: # in Loop: Header=BB0_157 Depth=2 - bge $a3, $s0, .LBB0_166 -# %bb.164: # in Loop: Header=BB0_157 Depth=2 - ld.d $a3, $sp, 376 # 8-byte Folded Reload - ld.w $a4, $a3, 28 + ldx.w $a4, $s2, $a3 + ori $a3, $zero, 2 + bge $t8, $a4, .LBB0_166 +# %bb.162: # in Loop: Header=BB0_156 Depth=2 + bge $a4, $s0, .LBB0_165 +# %bb.163: # in Loop: Header=BB0_156 Depth=2 + ld.d $a3, $sp, 360 # 8-byte Folded Reload + ld.w $a3, $a3, 28 + b .LBB0_166 +.LBB0_164: # in Loop: Header=BB0_156 Depth=2 + movgr2fr.w $fa0, $a3 + ffint.d.w $fa0, $fa0 + vldi $vr1, -912 + fmadd.d $fa0, $fa0, $fs0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a1, $fa0 + blt $a1, $t0, .LBB0_160 b .LBB0_167 -.LBB0_165: # in Loop: Header=BB0_157 Depth=2 - pcalau12i $a3, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI0_0) - movgr2fr.w $fa1, $a1 - ffint.d.w $fa1, $fa1 - vldi $vr2, -912 - fmadd.d $fa0, $fa1, $fa0, $fa2 +.LBB0_165: # in Loop: Header=BB0_156 Depth=2 + movgr2fr.w $fa0, $a4 + ffint.d.w $fa0, $fa0 + vldi $vr1, -912 + fmadd.d $fa0, $fa0, $fs0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a3, $fa0 - blt $a3, $t0, .LBB0_161 - b .LBB0_168 -.LBB0_166: # in Loop: Header=BB0_157 Depth=2 - pcalau12i $a4, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a4, %pc_lo12(.LCPI0_0) - movgr2fr.w $fa1, $a3 - ffint.d.w $fa1, $fa1 - vldi $vr2, -912 - fmadd.d $fa0, $fa1, $fa0, $fa2 - ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a4, $fa0 -.LBB0_167: # %good_ratio.exit258.i - # in Loop: Header=BB0_157 Depth=2 - blt $a4, $a1, .LBB0_584 -.LBB0_168: # %.critedge4.i - # in Loop: Header=BB0_157 Depth=2 - addi.w $a6, $a6, -1 - ld.d $a4, $sp, 416 # 8-byte Folded Reload - addi.d $a4, $a4, -1 - st.d $a6, $sp, 384 # 8-byte Folded Spill - st.d $a4, $sp, 416 # 8-byte Folded Spill - bge $t7, $a6, .LBB0_171 -# %bb.169: # %._crit_edge290.thread.i - # in Loop: Header=BB0_157 Depth=2 - ld.d $a1, $sp, 352 # 8-byte Folded Reload +.LBB0_166: # %good_ratio.exit258.i + # in Loop: Header=BB0_156 Depth=2 + blt $a3, $a1, .LBB0_583 +.LBB0_167: # %.critedge4.i + # in Loop: Header=BB0_156 Depth=2 + ld.d $a1, $sp, 400 # 8-byte Folded Reload + addi.w $a1, $a1, -1 + addi.d $a4, $a6, -1 + st.d $a1, $sp, 400 # 8-byte Folded Spill + st.d $a4, $sp, 368 # 8-byte Folded Spill + bge $t7, $a1, .LBB0_170 +# %bb.168: # %._crit_edge290.thread.i + # in Loop: Header=BB0_156 Depth=2 + ld.d $a1, $sp, 336 # 8-byte Folded Reload ld.w $a1, $a1, 0 slli.d $a3, $s1, 2 stx.w $a1, $s2, $a3 - ld.d $a1, $sp, 360 # 8-byte Folded Reload + ld.d $a1, $sp, 344 # 8-byte Folded Reload stx.w $s4, $a1, $a3 -.LBB0_170: # %._crit_edge298.i - # in Loop: Header=BB0_157 Depth=2 +.LBB0_169: # %._crit_edge298.i + # in Loop: Header=BB0_156 Depth=2 addi.w $t7, $t7, 1 addi.d $s1, $s1, 1 addi.d $t4, $t4, 1 @@ -1609,60 +1596,60 @@ SIM4: # @SIM4 addi.d $t3, $t3, 2 addi.d $fp, $fp, -4 addi.d $t1, $t1, -4 - ld.d $a6, $sp, 384 # 8-byte Folded Reload - bne $t0, $t6, .LBB0_157 - b .LBB0_585 -.LBB0_171: # %.lr.ph289.i - # in Loop: Header=BB0_157 Depth=2 - st.d $fp, $sp, 304 # 8-byte Folded Spill + ld.d $a6, $sp, 368 # 8-byte Folded Reload + bne $t0, $t6, .LBB0_156 + b .LBB0_584 +.LBB0_170: # %.lr.ph289.i + # in Loop: Header=BB0_156 Depth=2 + st.d $fp, $sp, 288 # 8-byte Folded Spill move $t8, $t3 bstrins.d $t8, $zero, 2, 0 slli.d $a1, $t0, 1 addi.d $a0, $a1, 3 - st.d $a0, $sp, 296 # 8-byte Folded Spill + st.d $a0, $sp, 280 # 8-byte Folded Spill nor $a1, $t0, $zero add.d $fp, $a1, $s4 add.d $s0, $s1, $s4 alsl.d $a0, $s0, $t5, 2 - st.d $a0, $sp, 368 # 8-byte Folded Spill + st.d $a0, $sp, 352 # 8-byte Folded Spill alsl.d $a3, $fp, $t5, 2 move $a6, $a2 move $t5, $a4 - ld.d $a0, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload .p2align 4, , 16 -.LBB0_172: # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_157 Depth=2 +.LBB0_171: # Parent Loop BB0_6 Depth=1 + # Parent Loop BB0_156 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_184 Depth 4 + # Child Loop BB0_183 Depth 4 slli.d $a4, $t5, 2 - bne $t5, $fp, .LBB0_174 -# %bb.173: # in Loop: Header=BB0_172 Depth=3 + bne $t5, $fp, .LBB0_173 +# %bb.172: # in Loop: Header=BB0_171 Depth=3 ld.w $t6, $a3, 4 addi.w $s7, $t6, 1 - b .LBB0_180 + b .LBB0_179 .p2align 4, , 16 -.LBB0_174: # in Loop: Header=BB0_172 Depth=3 - bne $t5, $s0, .LBB0_176 -# %bb.175: # in Loop: Header=BB0_172 Depth=3 - ld.d $a1, $sp, 368 # 8-byte Folded Reload +.LBB0_173: # in Loop: Header=BB0_171 Depth=3 + bne $t5, $s0, .LBB0_175 +# %bb.174: # in Loop: Header=BB0_171 Depth=3 + ld.d $a1, $sp, 352 # 8-byte Folded Reload ld.w $s7, $a1, -4 - b .LBB0_180 + b .LBB0_179 .p2align 4, , 16 -.LBB0_176: # in Loop: Header=BB0_172 Depth=3 - ld.d $a1, $sp, 392 # 8-byte Folded Reload +.LBB0_175: # in Loop: Header=BB0_171 Depth=3 + ld.d $a1, $sp, 376 # 8-byte Folded Reload alsl.d $t6, $t5, $a1, 2 ldx.w $s2, $a1, $a4 ld.w $ra, $t6, 4 - bge $s2, $ra, .LBB0_178 -# %bb.177: # %._crit_edge353.i - # in Loop: Header=BB0_172 Depth=3 + bge $s2, $ra, .LBB0_177 +# %bb.176: # %._crit_edge353.i + # in Loop: Header=BB0_171 Depth=3 ld.w $t6, $t6, -4 - b .LBB0_179 -.LBB0_178: # in Loop: Header=BB0_172 Depth=3 + b .LBB0_178 +.LBB0_177: # in Loop: Header=BB0_171 Depth=3 ld.w $t6, $t6, -4 addi.w $s7, $s2, 1 - bge $s7, $t6, .LBB0_180 -.LBB0_179: # in Loop: Header=BB0_172 Depth=3 + bge $s7, $t6, .LBB0_179 +.LBB0_178: # in Loop: Header=BB0_171 Depth=3 addi.w $s7, $ra, 1 slt $s2, $ra, $s2 slt $ra, $t6, $s7 @@ -1673,114 +1660,114 @@ SIM4: # @SIM4 maskeqz $t6, $t6, $s2 or $s7, $t6, $a1 .p2align 4, , 16 -.LBB0_180: # in Loop: Header=BB0_172 Depth=3 +.LBB0_179: # in Loop: Header=BB0_171 Depth=3 sub.d $a1, $t5, $s4 add.w $s2, $s7, $a1 - bltz $s7, .LBB0_188 + bltz $s7, .LBB0_187 +# %bb.180: # %.preheader.i396 + # in Loop: Header=BB0_171 Depth=3 + bge $s7, $s4, .LBB0_187 # %bb.181: # %.preheader.i396 - # in Loop: Header=BB0_172 Depth=3 - bge $s7, $s4, .LBB0_188 -# %bb.182: # %.preheader.i396 - # in Loop: Header=BB0_172 Depth=3 - bge $s2, $s3, .LBB0_188 -# %bb.183: # %.lr.ph281.preheader.i - # in Loop: Header=BB0_172 Depth=3 + # in Loop: Header=BB0_171 Depth=3 + bge $s2, $s3, .LBB0_187 +# %bb.182: # %.lr.ph281.preheader.i + # in Loop: Header=BB0_171 Depth=3 add.w $s2, $s7, $a6 bstrpick.d $a1, $s7, 31, 0 add.d $ra, $a0, $a1 .p2align 4, , 16 -.LBB0_184: # %.lr.ph281.i +.LBB0_183: # %.lr.ph281.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_157 Depth=2 - # Parent Loop BB0_172 Depth=3 + # Parent Loop BB0_156 Depth=2 + # Parent Loop BB0_171 Depth=3 # => This Inner Loop Header: Depth=4 ld.bu $a1, $ra, 0 ldx.bu $t6, $s8, $s2 - bne $a1, $t6, .LBB0_187 -# %bb.185: # in Loop: Header=BB0_184 Depth=4 + bne $a1, $t6, .LBB0_186 +# %bb.184: # in Loop: Header=BB0_183 Depth=4 addi.w $s7, $s7, 1 - bge $s7, $s4, .LBB0_187 -# %bb.186: # in Loop: Header=BB0_184 Depth=4 + bge $s7, $s4, .LBB0_186 +# %bb.185: # in Loop: Header=BB0_183 Depth=4 addi.d $s2, $s2, 1 addi.d $ra, $ra, 1 - blt $s2, $s3, .LBB0_184 -.LBB0_187: # %.critedge6.i.loopexit - # in Loop: Header=BB0_172 Depth=3 + blt $s2, $s3, .LBB0_183 +.LBB0_186: # %.critedge6.i.loopexit + # in Loop: Header=BB0_171 Depth=3 add.w $s2, $a6, $s7 -.LBB0_188: # %.critedge6.i - # in Loop: Header=BB0_172 Depth=3 - ld.d $a1, $sp, 424 # 8-byte Folded Reload +.LBB0_187: # %.critedge6.i + # in Loop: Header=BB0_171 Depth=3 + ld.d $a1, $sp, 408 # 8-byte Folded Reload stx.w $s7, $a1, $a4 - bne $s7, $s4, .LBB0_190 -# %bb.189: # %.critedge6.i - # in Loop: Header=BB0_172 Depth=3 - beq $s2, $s3, .LBB0_204 -.LBB0_190: # in Loop: Header=BB0_172 Depth=3 - beq $s7, $s4, .LBB0_205 -# %bb.191: # in Loop: Header=BB0_172 Depth=3 - beq $s2, $s3, .LBB0_206 -# %bb.192: # in Loop: Header=BB0_172 Depth=3 + bne $s7, $s4, .LBB0_189 +# %bb.188: # %.critedge6.i + # in Loop: Header=BB0_171 Depth=3 + beq $s2, $s3, .LBB0_203 +.LBB0_189: # in Loop: Header=BB0_171 Depth=3 + beq $s7, $s4, .LBB0_204 +# %bb.190: # in Loop: Header=BB0_171 Depth=3 + beq $s2, $s3, .LBB0_205 +# %bb.191: # in Loop: Header=BB0_171 Depth=3 addi.d $t5, $t5, 1 addi.d $a6, $a6, 1 - bne $t5, $t4, .LBB0_172 -# %bb.193: # %._crit_edge290.i - # in Loop: Header=BB0_157 Depth=2 - ld.d $a0, $sp, 352 # 8-byte Folded Reload + bne $t5, $t4, .LBB0_171 +# %bb.192: # %._crit_edge290.i + # in Loop: Header=BB0_156 Depth=2 + ld.d $a0, $sp, 336 # 8-byte Folded Reload ld.w $a3, $a0, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload alsl.d $a1, $s1, $a0, 2 slli.d $a4, $s1, 2 stx.w $a3, $a0, $a4 - ld.d $a6, $sp, 360 # 8-byte Folded Reload + ld.d $a6, $sp, 344 # 8-byte Folded Reload alsl.d $a3, $s1, $a6, 2 stx.w $s4, $a6, $a4 move $a4, $a7 move $a6, $t2 move $t5, $a5 - ld.d $s0, $sp, 248 # 8-byte Folded Reload - b .LBB0_195 + ld.d $s0, $sp, 232 # 8-byte Folded Reload + b .LBB0_194 .p2align 4, , 16 -.LBB0_194: # in Loop: Header=BB0_195 Depth=3 +.LBB0_193: # in Loop: Header=BB0_194 Depth=3 addi.d $t5, $t5, 1 addi.d $a6, $a6, -1 addi.d $a4, $a4, 4 - beqz $a6, .LBB0_197 -.LBB0_195: # %.lr.ph294.i + beqz $a6, .LBB0_196 +.LBB0_194: # %.lr.ph294.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_157 Depth=2 + # Parent Loop BB0_156 Depth=2 # => This Inner Loop Header: Depth=3 ld.w $t6, $a4, 0 ld.w $fp, $a1, 0 - bge $fp, $t6, .LBB0_194 -# %bb.196: # in Loop: Header=BB0_195 Depth=3 + bge $fp, $t6, .LBB0_193 +# %bb.195: # in Loop: Header=BB0_194 Depth=3 st.w $t6, $a1, 0 st.w $t5, $a3, 0 - b .LBB0_194 -.LBB0_197: # %.lr.ph297.i.preheader - # in Loop: Header=BB0_157 Depth=2 - ld.d $a4, $sp, 416 # 8-byte Folded Reload + b .LBB0_193 +.LBB0_196: # %.lr.ph297.i.preheader + # in Loop: Header=BB0_156 Depth=2 + ld.d $a4, $sp, 368 # 8-byte Folded Reload move $a1, $a4 ori $a3, $zero, 3 - ld.d $t5, $sp, 392 # 8-byte Folded Reload - ld.d $fp, $sp, 304 # 8-byte Folded Reload - bltu $t0, $a3, .LBB0_201 -# %bb.198: # %.lr.ph297.i.preheader - # in Loop: Header=BB0_157 Depth=2 + ld.d $t5, $sp, 376 # 8-byte Folded Reload + ld.d $fp, $sp, 288 # 8-byte Folded Reload + bltu $t0, $a3, .LBB0_200 +# %bb.197: # %.lr.ph297.i.preheader + # in Loop: Header=BB0_156 Depth=2 move $a1, $a4 ori $a3, $zero, 32 - ld.d $a0, $sp, 240 # 8-byte Folded Reload - bltu $a0, $a3, .LBB0_201 -# %bb.199: # %vector.ph1101 - # in Loop: Header=BB0_157 Depth=2 - ld.d $a0, $sp, 296 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload + bltu $a0, $a3, .LBB0_200 +# %bb.198: # %vector.ph1101 + # in Loop: Header=BB0_156 Depth=2 + ld.d $a0, $sp, 280 # 8-byte Folded Reload bstrins.d $a0, $zero, 2, 0 add.d $a1, $a4, $a0 move $a3, $t1 move $a4, $fp .p2align 4, , 16 -.LBB0_200: # %vector.body1104 +.LBB0_199: # %vector.body1104 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_157 Depth=2 + # Parent Loop BB0_156 Depth=2 # => This Inner Loop Header: Depth=3 vld $vr0, $a4, -16 vld $vr1, $a4, 0 @@ -1789,96 +1776,95 @@ SIM4: # @SIM4 addi.d $t8, $t8, -8 addi.d $a4, $a4, 32 addi.d $a3, $a3, 32 - bnez $t8, .LBB0_200 -.LBB0_201: # %.lr.ph297.i.preheader1167 - # in Loop: Header=BB0_157 Depth=2 + bnez $t8, .LBB0_199 +.LBB0_200: # %.lr.ph297.i.preheader1167 + # in Loop: Header=BB0_156 Depth=2 alsl.d $a3, $a1, $t5, 2 - ld.d $a4, $sp, 424 # 8-byte Folded Reload + ld.d $a4, $sp, 408 # 8-byte Folded Reload alsl.d $a4, $a1, $a4, 2 - ld.d $s2, $sp, 344 # 8-byte Folded Reload - ld.d $t6, $sp, 288 # 8-byte Folded Reload - ld.d $t8, $sp, 272 # 8-byte Folded Reload + ld.d $s2, $sp, 328 # 8-byte Folded Reload + ld.d $t6, $sp, 272 # 8-byte Folded Reload + ld.d $t8, $sp, 256 # 8-byte Folded Reload .p2align 4, , 16 -.LBB0_202: # %.lr.ph297.i +.LBB0_201: # %.lr.ph297.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_157 Depth=2 + # Parent Loop BB0_156 Depth=2 # => This Inner Loop Header: Depth=3 ld.w $a6, $a4, 0 st.w $a6, $a3, 0 addi.d $a1, $a1, 1 addi.d $a3, $a3, 4 addi.d $a4, $a4, 4 - bne $t4, $a1, .LBB0_202 - b .LBB0_170 -.LBB0_203: # %free_hash_env.exit372._crit_edge + bne $t4, $a1, .LBB0_201 + b .LBB0_169 +.LBB0_202: # %free_hash_env.exit372._crit_edge # in Loop: Header=BB0_6 Depth=1 ld.w $s0, $s4, 16 - ld.d $t7, $sp, 400 # 8-byte Folded Reload - ld.d $a0, $sp, 280 # 8-byte Folded Reload - bnez $s0, .LBB0_132 - b .LBB0_144 -.LBB0_204: # in Loop: Header=BB0_6 Depth=1 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $t7, $sp, 384 # 8-byte Folded Reload + bnez $s0, .LBB0_131 + b .LBB0_143 +.LBB0_203: # in Loop: Header=BB0_6 Depth=1 + ld.d $a0, $sp, 376 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 344 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 320 # 8-byte Folded Reload - ld.d $a1, $sp, 312 # 8-byte Folded Reload + ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a1, $sp, 296 # 8-byte Folded Reload add.d $fp, $a1, $a0 - b .LBB0_207 -.LBB0_205: # in Loop: Header=BB0_6 Depth=1 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + b .LBB0_206 +.LBB0_204: # in Loop: Header=BB0_6 Depth=1 + ld.d $a0, $sp, 408 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 344 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 320 # 8-byte Folded Reload + ld.d $a0, $sp, 304 # 8-byte Folded Reload add.d $fp, $s2, $a0 - b .LBB0_207 -.LBB0_206: # in Loop: Header=BB0_6 Depth=1 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + b .LBB0_206 +.LBB0_205: # in Loop: Header=BB0_6 Depth=1 + ld.d $a0, $sp, 408 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 344 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 320 # 8-byte Folded Reload - ld.d $a1, $sp, 312 # 8-byte Folded Reload + ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a1, $sp, 296 # 8-byte Folded Reload add.d $fp, $a1, $a0 move $s4, $s7 -.LBB0_207: # %extend_fw.exit +.LBB0_206: # %extend_fw.exit # in Loop: Header=BB0_6 Depth=1 move $s5, $s1 ori $s8, $zero, 2 -.LBB0_208: # %extend_fw.exit +.LBB0_207: # %extend_fw.exit # in Loop: Header=BB0_6 Depth=1 - ld.d $t7, $sp, 400 # 8-byte Folded Reload - ld.d $a6, $sp, 328 # 8-byte Folded Reload -.LBB0_209: # %extend_fw.exit + ld.d $t7, $sp, 384 # 8-byte Folded Reload + ld.d $a6, $sp, 312 # 8-byte Folded Reload +.LBB0_208: # %extend_fw.exit # in Loop: Header=BB0_6 Depth=1 ld.w $a1, $s6, 12 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.w $a2, $a0, 56 ld.w $a3, $a0, 48 add.d $a0, $s4, $a6 @@ -1886,55 +1872,53 @@ SIM4: # @SIM4 mul.d $a1, $a1, $a2 mul.d $a2, $a3, $s5 add.w $a1, $a2, $a1 - ld.d $s4, $sp, 408 # 8-byte Folded Reload - bltz $a1, .LBB0_211 -# %bb.210: # in Loop: Header=BB0_6 Depth=1 + ld.d $s4, $sp, 392 # 8-byte Folded Reload + bltz $a1, .LBB0_210 +# %bb.209: # in Loop: Header=BB0_6 Depth=1 st.w $a0, $s6, 12 st.w $fp, $s6, 8 .p2align 4, , 16 -.LBB0_211: # %.thread556 +.LBB0_210: # %.thread556 # in Loop: Header=BB0_6 Depth=1 ld.w $a0, $s4, 16 - pcalau12i $a1, %pc_hi20(.LCPI0_1) - st.d $a1, $sp, 248 # 8-byte Folded Spill ld.d $s6, $sp, 152 # 8-byte Folded Reload lu12i.w $s1, 128 - bltu $a0, $s8, .LBB0_383 -# %bb.212: # %.lr.ph670.preheader + bltu $a0, $s8, .LBB0_382 +# %bb.211: # %.lr.ph670.preheader # in Loop: Header=BB0_6 Depth=1 ori $s5, $zero, 1 - b .LBB0_215 + b .LBB0_214 .p2align 4, , 16 -.LBB0_213: # in Loop: Header=BB0_215 Depth=2 +.LBB0_212: # in Loop: Header=BB0_214 Depth=2 ori $s8, $zero, 2 -.LBB0_214: # in Loop: Header=BB0_215 Depth=2 +.LBB0_213: # in Loop: Header=BB0_214 Depth=2 addi.w $s5, $s5, 1 - bgeu $s5, $a0, .LBB0_383 -.LBB0_215: # %.lr.ph670 + bgeu $s5, $a0, .LBB0_382 +.LBB0_214: # %.lr.ph670 # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_224 Depth 3 - # Child Loop BB0_238 Depth 3 - # Child Loop BB0_245 Depth 3 - # Child Loop BB0_248 Depth 3 - # Child Loop BB0_258 Depth 3 - # Child Loop BB0_262 Depth 3 - # Child Loop BB0_267 Depth 3 - # Child Loop BB0_317 Depth 3 - # Child Loop BB0_321 Depth 3 - # Child Loop BB0_332 Depth 4 - # Child Loop BB0_342 Depth 5 - # Child Loop BB0_347 Depth 4 - # Child Loop BB0_325 Depth 4 - # Child Loop BB0_354 Depth 4 - # Child Loop BB0_366 Depth 5 - # Child Loop BB0_371 Depth 4 - # Child Loop BB0_379 Depth 4 - # Child Loop BB0_307 Depth 3 - # Child Loop BB0_312 Depth 3 - # Child Loop BB0_287 Depth 3 - # Child Loop BB0_292 Depth 3 - # Child Loop BB0_297 Depth 3 + # Child Loop BB0_223 Depth 3 + # Child Loop BB0_237 Depth 3 + # Child Loop BB0_244 Depth 3 + # Child Loop BB0_247 Depth 3 + # Child Loop BB0_257 Depth 3 + # Child Loop BB0_261 Depth 3 + # Child Loop BB0_266 Depth 3 + # Child Loop BB0_316 Depth 3 + # Child Loop BB0_320 Depth 3 + # Child Loop BB0_331 Depth 4 + # Child Loop BB0_341 Depth 5 + # Child Loop BB0_346 Depth 4 + # Child Loop BB0_324 Depth 4 + # Child Loop BB0_353 Depth 4 + # Child Loop BB0_365 Depth 5 + # Child Loop BB0_370 Depth 4 + # Child Loop BB0_378 Depth 4 + # Child Loop BB0_306 Depth 3 + # Child Loop BB0_311 Depth 3 + # Child Loop BB0_286 Depth 3 + # Child Loop BB0_291 Depth 3 + # Child Loop BB0_296 Depth 3 ld.d $a1, $t7, 0 addi.w $a3, $s5, -1 bstrpick.d $a2, $a3, 31, 0 @@ -1947,38 +1931,36 @@ SIM4: # @SIM4 ld.w $t0, $a5, 4 nor $a1, $t1, $zero add.w $s7, $t0, $a1 - blez $s7, .LBB0_214 -# %bb.216: # in Loop: Header=BB0_215 Depth=2 + blez $s7, .LBB0_213 +# %bb.215: # in Loop: Header=BB0_214 Depth=2 ori $a4, $zero, 2 ld.w $s3, $a5, 0 ld.w $s0, $s2, 8 addi.w $a7, $s3, -1 - bgeu $s0, $a7, .LBB0_213 -# %bb.217: # in Loop: Header=BB0_215 Depth=2 + bgeu $s0, $a7, .LBB0_212 +# %bb.216: # in Loop: Header=BB0_214 Depth=2 st.d $a3, $sp, 176 # 8-byte Folded Spill ld.w $a3, $s6, 36 ld.d $a0, $s6, 8 ori $a1, $zero, 500 - bltu $a1, $s7, .LBB0_220 -# %bb.218: # in Loop: Header=BB0_215 Depth=2 + bltu $a1, $s7, .LBB0_219 +# %bb.217: # in Loop: Header=BB0_214 Depth=2 nor $a1, $s0, $zero add.w $fp, $s3, $a1 bstrpick.d $a6, $s7, 31, 0 - ld.d $s4, $sp, 376 # 8-byte Folded Reload - ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $s4, $sp, 360 # 8-byte Folded Reload + ld.d $a1, $sp, 136 # 8-byte Folded Reload st.d $s2, $sp, 160 # 8-byte Folded Spill - bgeu $a1, $fp, .LBB0_221 -# %bb.219: # in Loop: Header=BB0_215 Depth=2 + bgeu $a1, $fp, .LBB0_220 +# %bb.218: # in Loop: Header=BB0_214 Depth=2 move $s2, $zero ori $s8, $zero, 2 - b .LBB0_284 -.LBB0_220: # in Loop: Header=BB0_215 Depth=2 - ld.d $s4, $sp, 376 # 8-byte Folded Reload + b .LBB0_283 +.LBB0_219: # in Loop: Header=BB0_214 Depth=2 + ld.d $s4, $sp, 360 # 8-byte Folded Reload ori $s8, $zero, 2 - b .LBB0_286 -.LBB0_221: # in Loop: Header=BB0_215 Depth=2 - ld.d $a1, $sp, 248 # 8-byte Folded Reload - fld.d $fs1, $a1, %pc_lo12(.LCPI0_1) + b .LBB0_285 +.LBB0_220: # in Loop: Header=BB0_214 Depth=2 sub.w $t3, $fp, $s7 movgr2fr.d $fa0, $a6 ffint.d.l $fa0, $fa0 @@ -1991,42 +1973,42 @@ SIM4: # @SIM4 masknez $a1, $a1, $a2 maskeqz $a2, $a3, $a2 or $t2, $a2, $a1 - bltz $t3, .LBB0_232 -# %bb.222: # in Loop: Header=BB0_215 Depth=2 - ld.d $a1, $sp, 280 # 8-byte Folded Reload + bltz $t3, .LBB0_231 +# %bb.221: # in Loop: Header=BB0_214 Depth=2 + ld.d $a1, $sp, 264 # 8-byte Folded Reload ld.d $a1, $a1, 16 bstrpick.d $a2, $s0, 31, 0 add.d $s6, $a1, $t1 add.d $s1, $a0, $a2 move $s2, $s7 st.d $a6, $sp, 120 # 8-byte Folded Spill - beqz $fp, .LBB0_235 -# %bb.223: # %.lr.ph.i410.preheader - # in Loop: Header=BB0_215 Depth=2 + beqz $fp, .LBB0_234 +# %bb.222: # %.lr.ph.i410.preheader + # in Loop: Header=BB0_214 Depth=2 move $a0, $fp move $s2, $s7 .p2align 4, , 16 -.LBB0_224: # %.lr.ph.i410 +.LBB0_223: # %.lr.ph.i410 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 + # Parent Loop BB0_214 Depth=2 # => This Inner Loop Header: Depth=3 move $a1, $s2 add.d $a2, $s6, $s2 ld.bu $a2, $a2, -1 add.d $a3, $s1, $a0 ld.bu $a3, $a3, -1 - bne $a2, $a3, .LBB0_234 -# %bb.225: # in Loop: Header=BB0_224 Depth=3 + bne $a2, $a3, .LBB0_233 +# %bb.224: # in Loop: Header=BB0_223 Depth=3 addi.w $s2, $a1, -1 - blt $a0, $a4, .LBB0_227 -# %bb.226: # in Loop: Header=BB0_224 Depth=3 + blt $a0, $a4, .LBB0_226 +# %bb.225: # in Loop: Header=BB0_223 Depth=3 addi.w $a0, $a0, -1 ori $a2, $zero, 1 - bne $a1, $a2, .LBB0_224 -.LBB0_227: # %.critedge.i428 - # in Loop: Header=BB0_215 Depth=2 - bnez $s2, .LBB0_235 -# %bb.228: # in Loop: Header=BB0_215 Depth=2 + bne $a1, $a2, .LBB0_223 +.LBB0_226: # %.critedge.i428 + # in Loop: Header=BB0_214 Depth=2 + bnez $s2, .LBB0_234 +# %bb.227: # in Loop: Header=BB0_214 Depth=2 move $s3, $a5 add.d $a0, $s0, $t3 addi.d $s0, $a0, 1 @@ -2038,70 +2020,73 @@ SIM4: # @SIM4 jirl $ra, $ra, 0 move $fp, $a0 st.w $s0, $a0, 0 - ld.w $a1, $sp, 484 - ld.w $s0, $sp, 480 - ld.d $a0, $sp, 472 + ld.w $a1, $sp, 468 + ld.w $s0, $sp, 464 + ld.d $a0, $sp, 456 st.w $s1, $fp, 4 st.w $s6, $fp, 8 st.w $s2, $fp, 12 ld.d $s6, $sp, 152 # 8-byte Folded Reload - bltu $s0, $a1, .LBB0_230 -# %bb.229: # in Loop: Header=BB0_215 Depth=2 + bltu $s0, $a1, .LBB0_229 +# %bb.228: # in Loop: Header=BB0_214 Depth=2 addi.d $a1, $a1, 5 - st.w $a1, $sp, 484 + st.w $a1, $sp, 468 bstrpick.d $a1, $a1, 31, 0 slli.d $a1, $a1, 3 pcaddu18i $ra, %call36(xrealloc) jirl $ra, $ra, 0 - st.d $a0, $sp, 472 -.LBB0_230: # %add_col_elt.exit508.i - # in Loop: Header=BB0_215 Depth=2 + st.d $a0, $sp, 456 +.LBB0_229: # %add_col_elt.exit508.i + # in Loop: Header=BB0_214 Depth=2 ori $s8, $zero, 2 lu12i.w $s1, 128 ld.d $s2, $sp, 160 # 8-byte Folded Reload move $a5, $s3 bstrpick.d $a2, $s0, 31, 0 addi.w $a1, $s0, 1 - st.w $a1, $sp, 480 + st.w $a1, $sp, 464 slli.d $a2, $a2, 3 stx.d $fp, $a0, $a2 - fmov.d $fa0, $fs0 - ld.d $t7, $sp, 400 # 8-byte Folded Reload + fmov.d $fa0, $fs2 + ld.d $t7, $sp, 384 # 8-byte Folded Reload ld.d $a6, $sp, 120 # 8-byte Folded Reload ld.w $a3, $s6, 36 - bnez $a1, .LBB0_285 -.LBB0_231: # %greedy.exit..thread560_crit_edge - # in Loop: Header=BB0_215 Depth=2 + bnez $a1, .LBB0_284 +.LBB0_230: # %greedy.exit..thread560_crit_edge + # in Loop: Header=BB0_214 Depth=2 ld.d $a0, $s6, 8 - b .LBB0_286 -.LBB0_232: # in Loop: Header=BB0_215 Depth=2 + b .LBB0_285 +.LBB0_231: # in Loop: Header=BB0_214 Depth=2 bstrpick.d $a0, $a3, 31, 0 movgr2fr.d $fa1, $a0 ffint.d.l $fa1, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_2) bstrpick.d $a0, $fp, 31, 0 - movgr2fr.d $fa3, $a0 - ffint.d.l $fs2, $fa3 - fmul.d $fa2, $fs2, $fa2 + movgr2fr.d $fa2, $a0 + ffint.d.l $fs4, $fa2 + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 209715 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa2, $a0 + fmul.d $fa2, $fs4, $fa2 fcmp.clt.d $fcc0, $fa2, $fa1 fsel $fa1, $fa1, $fa2, $fcc0 fcmp.cult.d $fcc0, $fa1, $fa0 - bceqz $fcc0, .LBB0_240 -# %bb.233: # in Loop: Header=BB0_215 Depth=2 + bceqz $fcc0, .LBB0_239 +# %bb.232: # in Loop: Header=BB0_214 Depth=2 addi.w $s2, $t2, 1 ori $s8, $zero, 2 - b .LBB0_284 -.LBB0_234: # in Loop: Header=BB0_215 Depth=2 + b .LBB0_283 +.LBB0_233: # in Loop: Header=BB0_214 Depth=2 move $s2, $a1 -.LBB0_235: # %.critedge.thread.i411 - # in Loop: Header=BB0_215 Depth=2 - st.d $t3, $sp, 424 # 8-byte Folded Spill +.LBB0_234: # %.critedge.thread.i411 + # in Loop: Header=BB0_214 Depth=2 + st.d $t3, $sp, 408 # 8-byte Folded Spill st.d $s0, $sp, 96 # 8-byte Folded Spill st.d $a7, $sp, 72 # 8-byte Folded Spill st.d $a5, $sp, 104 # 8-byte Folded Spill st.d $s5, $sp, 88 # 8-byte Folded Spill - st.d $t2, $sp, 416 # 8-byte Folded Spill + st.d $t2, $sp, 400 # 8-byte Folded Spill add.d $a0, $t2, $fp addi.w $s8, $a0, 0 addi.d $a0, $a0, 1 @@ -2123,28 +2108,28 @@ SIM4: # @SIM4 bstrpick.d $t0, $s8, 31, 0 addi.d $t1, $t0, 1 ori $a1, $zero, 7 - st.d $a0, $sp, 368 # 8-byte Folded Spill - st.d $s8, $sp, 384 # 8-byte Folded Spill - bltu $s8, $a1, .LBB0_243 -# %bb.236: # %.critedge.thread.i411 - # in Loop: Header=BB0_215 Depth=2 + st.d $a0, $sp, 352 # 8-byte Folded Spill + st.d $s8, $sp, 368 # 8-byte Folded Spill + bltu $s8, $a1, .LBB0_242 +# %bb.235: # %.critedge.thread.i411 + # in Loop: Header=BB0_214 Depth=2 sub.d $a1, $a0, $s5 ori $a2, $zero, 32 - ld.d $a5, $sp, 416 # 8-byte Folded Reload - bltu $a1, $a2, .LBB0_244 -# %bb.237: # %vector.ph1086 - # in Loop: Header=BB0_215 Depth=2 + ld.d $a5, $sp, 400 # 8-byte Folded Reload + bltu $a1, $a2, .LBB0_243 +# %bb.236: # %vector.ph1086 + # in Loop: Header=BB0_214 Depth=2 bstrpick.d $a0, $t1, 32, 3 slli.d $a4, $a0, 3 vreplgr2vr.w $vr0, $s4 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload addi.d $a1, $a0, 16 addi.d $a2, $s5, 16 move $a3, $a4 .p2align 4, , 16 -.LBB0_238: # %vector.body1091 +.LBB0_237: # %vector.body1091 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 + # Parent Loop BB0_214 Depth=2 # => This Inner Loop Header: Depth=3 vst $vr0, $a2, -16 vst $vr0, $a2, 0 @@ -2153,12 +2138,12 @@ SIM4: # @SIM4 addi.d $a3, $a3, -8 addi.d $a1, $a1, 32 addi.d $a2, $a2, 32 - bnez $a3, .LBB0_238 -# %bb.239: # %middle.block1094 - # in Loop: Header=BB0_215 Depth=2 - bne $t1, $a4, .LBB0_244 - b .LBB0_246 -.LBB0_240: # in Loop: Header=BB0_215 Depth=2 + bnez $a3, .LBB0_237 +# %bb.238: # %middle.block1094 + # in Loop: Header=BB0_214 Depth=2 + bne $t1, $a4, .LBB0_243 + b .LBB0_245 +.LBB0_239: # in Loop: Header=BB0_214 Depth=2 st.d $a6, $sp, 120 # 8-byte Folded Spill st.d $a5, $sp, 104 # 8-byte Folded Spill ori $a0, $zero, 32 @@ -2172,70 +2157,70 @@ SIM4: # @SIM4 vinsgr2vr.w $vr0, $a1, 0 vinsgr2vr.w $vr0, $s3, 1 vinsgr2vr.w $vr0, $s1, 2 - pcalau12i $a0, %pc_hi20(.LCPI0_3) - vld $vr1, $a0, %pc_lo12(.LCPI0_3) - ld.w $a1, $sp, 484 - ld.w $s1, $sp, 480 - ld.d $a0, $sp, 472 + pcalau12i $a0, %pc_hi20(.LCPI0_0) + vld $vr1, $a0, %pc_lo12(.LCPI0_0) + ld.w $a1, $sp, 468 + ld.w $s1, $sp, 464 + ld.d $a0, $sp, 456 vinsgr2vr.w $vr0, $s8, 3 vadd.w $vr0, $vr0, $vr1 vst $vr0, $s0, 0 ori $s8, $zero, 2 - bltu $s1, $a1, .LBB0_242 -# %bb.241: # in Loop: Header=BB0_215 Depth=2 + bltu $s1, $a1, .LBB0_241 +# %bb.240: # in Loop: Header=BB0_214 Depth=2 addi.d $a1, $a1, 5 - st.w $a1, $sp, 484 + st.w $a1, $sp, 468 bstrpick.d $a1, $a1, 31, 0 slli.d $a1, $a1, 3 pcaddu18i $ra, %call36(xrealloc) jirl $ra, $ra, 0 - st.d $a0, $sp, 472 -.LBB0_242: # %add_col_elt.exit.i - # in Loop: Header=BB0_215 Depth=2 - ld.d $t7, $sp, 400 # 8-byte Folded Reload + st.d $a0, $sp, 456 +.LBB0_241: # %add_col_elt.exit.i + # in Loop: Header=BB0_214 Depth=2 + ld.d $t7, $sp, 384 # 8-byte Folded Reload ld.d $a5, $sp, 104 # 8-byte Folded Reload bstrpick.d $a1, $s1, 31, 0 addi.d $a2, $s1, 1 - st.w $a2, $sp, 480 + st.w $a2, $sp, 464 slli.d $a1, $a1, 3 stx.d $s0, $a0, $a1 sub.d $a0, $s7, $fp vldi $vr0, -912 - fmadd.d $fa0, $fs2, $fs1, $fa0 + fmadd.d $fa0, $fs4, $fs1, $fa0 ftintrz.l.d $fa0, $fa0 movfr2gr.d $a1, $fa0 add.w $s2, $a0, $a1 lu12i.w $s1, 128 - b .LBB0_283 -.LBB0_243: # in Loop: Header=BB0_215 Depth=2 - ld.d $a5, $sp, 416 # 8-byte Folded Reload -.LBB0_244: # %scalar.ph1084.preheader - # in Loop: Header=BB0_215 Depth=2 + b .LBB0_282 +.LBB0_242: # in Loop: Header=BB0_214 Depth=2 + ld.d $a5, $sp, 400 # 8-byte Folded Reload +.LBB0_243: # %scalar.ph1084.preheader + # in Loop: Header=BB0_214 Depth=2 sub.d $a1, $t0, $a4 addi.d $a1, $a1, 1 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload alsl.d $a2, $a4, $a0, 2 alsl.d $a0, $a4, $s5, 2 .p2align 4, , 16 -.LBB0_245: # %scalar.ph1084 +.LBB0_244: # %scalar.ph1084 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 + # Parent Loop BB0_214 Depth=2 # => This Inner Loop Header: Depth=3 st.w $s4, $a0, 0 st.w $s4, $a2, 0 addi.d $a1, $a1, -1 addi.d $a2, $a2, 4 addi.d $a0, $a0, 4 - bnez $a1, .LBB0_245 -.LBB0_246: # %.loopexit1156 - # in Loop: Header=BB0_215 Depth=2 - st.d $s5, $sp, 392 # 8-byte Folded Spill + bnez $a1, .LBB0_244 +.LBB0_245: # %.loopexit1156 + # in Loop: Header=BB0_214 Depth=2 + st.d $s5, $sp, 376 # 8-byte Folded Spill slli.d $t3, $a5, 2 stx.w $s2, $s5, $t3 move $s2, $zero - beqz $fp, .LBB0_255 -# %bb.247: # %.lr.ph536.preheader.i - # in Loop: Header=BB0_215 Depth=2 + beqz $fp, .LBB0_254 +# %bb.246: # %.lr.ph536.preheader.i + # in Loop: Header=BB0_214 Depth=2 sltu $a0, $fp, $s7 masknez $a1, $s7, $a0 maskeqz $a0, $fp, $a0 @@ -2244,25 +2229,25 @@ SIM4: # @SIM4 move $a2, $s6 move $a3, $s1 .p2align 4, , 16 -.LBB0_248: # %.lr.ph536.i +.LBB0_247: # %.lr.ph536.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 + # Parent Loop BB0_214 Depth=2 # => This Inner Loop Header: Depth=3 ld.bu $a4, $a2, 0 ld.bu $a5, $a3, 0 - bne $a4, $a5, .LBB0_251 -# %bb.249: # in Loop: Header=BB0_248 Depth=3 + bne $a4, $a5, .LBB0_250 +# %bb.248: # in Loop: Header=BB0_247 Depth=3 addi.w $s2, $s2, 1 addi.d $a1, $a1, -1 addi.d $a3, $a3, 1 addi.d $a2, $a2, 1 - bnez $a1, .LBB0_248 -# %bb.250: # in Loop: Header=BB0_215 Depth=2 + bnez $a1, .LBB0_247 +# %bb.249: # in Loop: Header=BB0_214 Depth=2 move $s2, $a0 -.LBB0_251: # %.critedge3.i - # in Loop: Header=BB0_215 Depth=2 - bne $s2, $s7, .LBB0_255 -# %bb.252: # in Loop: Header=BB0_215 Depth=2 +.LBB0_250: # %.critedge3.i + # in Loop: Header=BB0_214 Depth=2 + bne $s2, $s7, .LBB0_254 +# %bb.251: # in Loop: Header=BB0_214 Depth=2 ld.d $a0, $sp, 96 # 8-byte Folded Reload addi.d $s0, $a0, 1 addi.d $s1, $a7, 1 @@ -2273,33 +2258,33 @@ SIM4: # @SIM4 jirl $ra, $ra, 0 move $fp, $a0 st.w $s0, $a0, 0 - ld.w $a1, $sp, 484 - ld.w $s0, $sp, 480 - ld.d $a0, $sp, 472 + ld.w $a1, $sp, 468 + ld.w $s0, $sp, 464 + ld.d $a0, $sp, 456 st.w $s1, $fp, 4 st.w $s2, $fp, 8 st.w $s3, $fp, 12 - bltu $s0, $a1, .LBB0_254 -# %bb.253: # in Loop: Header=BB0_215 Depth=2 + bltu $s0, $a1, .LBB0_253 +# %bb.252: # in Loop: Header=BB0_214 Depth=2 addi.d $a1, $a1, 5 - st.w $a1, $sp, 484 + st.w $a1, $sp, 468 bstrpick.d $a1, $a1, 31, 0 slli.d $a1, $a1, 3 pcaddu18i $ra, %call36(xrealloc) jirl $ra, $ra, 0 - st.d $a0, $sp, 472 -.LBB0_254: # %add_col_elt.exit512.i - # in Loop: Header=BB0_215 Depth=2 + st.d $a0, $sp, 456 +.LBB0_253: # %add_col_elt.exit512.i + # in Loop: Header=BB0_214 Depth=2 ld.d $s6, $sp, 152 # 8-byte Folded Reload - ld.d $s4, $sp, 376 # 8-byte Folded Reload + ld.d $s4, $sp, 360 # 8-byte Folded Reload ori $s8, $zero, 2 lu12i.w $s1, 128 ld.d $s5, $sp, 88 # 8-byte Folded Reload - ld.d $s2, $sp, 368 # 8-byte Folded Reload - ld.d $a3, $sp, 392 # 8-byte Folded Reload + ld.d $s2, $sp, 352 # 8-byte Folded Reload + ld.d $a3, $sp, 376 # 8-byte Folded Reload bstrpick.d $a1, $s0, 31, 0 addi.d $a2, $s0, 1 - st.w $a2, $sp, 480 + st.w $a2, $sp, 464 slli.d $a1, $a1, 3 stx.d $fp, $a0, $a1 move $a0, $a3 @@ -2309,12 +2294,12 @@ SIM4: # @SIM4 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 move $s2, $zero - ld.d $t7, $sp, 400 # 8-byte Folded Reload - b .LBB0_282 -.LBB0_255: # %.critedge3.thread.i - # in Loop: Header=BB0_215 Depth=2 - st.d $t3, $sp, 360 # 8-byte Folded Spill - st.d $t1, $sp, 352 # 8-byte Folded Spill + ld.d $t7, $sp, 384 # 8-byte Folded Reload + b .LBB0_281 +.LBB0_254: # %.critedge3.thread.i + # in Loop: Header=BB0_214 Depth=2 + st.d $t3, $sp, 344 # 8-byte Folded Spill + st.d $t1, $sp, 336 # 8-byte Folded Spill move $s8, $t0 move $a0, $s0 pcaddu18i $ra, %call36(xmalloc) @@ -2326,20 +2311,20 @@ SIM4: # @SIM4 move $s0, $a0 move $a0, $zero ori $a1, $zero, 7 - ld.d $a2, $sp, 384 # 8-byte Folded Reload - bltu $a2, $a1, .LBB0_260 -# %bb.256: # %.critedge3.thread.i - # in Loop: Header=BB0_215 Depth=2 + ld.d $a2, $sp, 368 # 8-byte Folded Reload + bltu $a2, $a1, .LBB0_259 +# %bb.255: # %.critedge3.thread.i + # in Loop: Header=BB0_214 Depth=2 sub.d $a1, $s0, $s5 ori $a2, $zero, 32 - ld.d $a4, $sp, 392 # 8-byte Folded Reload - ld.d $a5, $sp, 416 # 8-byte Folded Reload - ld.d $a7, $sp, 424 # 8-byte Folded Reload - ld.d $t1, $sp, 360 # 8-byte Folded Reload - bltu $a1, $a2, .LBB0_261 -# %bb.257: # %vector.ph1073 - # in Loop: Header=BB0_215 Depth=2 - ld.d $t0, $sp, 352 # 8-byte Folded Reload + ld.d $a4, $sp, 376 # 8-byte Folded Reload + ld.d $a5, $sp, 400 # 8-byte Folded Reload + ld.d $a7, $sp, 408 # 8-byte Folded Reload + ld.d $t1, $sp, 344 # 8-byte Folded Reload + bltu $a1, $a2, .LBB0_260 +# %bb.256: # %vector.ph1073 + # in Loop: Header=BB0_214 Depth=2 + ld.d $t0, $sp, 336 # 8-byte Folded Reload bstrpick.d $a0, $t0, 32, 3 slli.d $a0, $a0, 3 addi.d $a1, $s0, 16 @@ -2347,9 +2332,9 @@ SIM4: # @SIM4 move $a3, $a0 vld $vr0, $sp, 48 # 16-byte Folded Reload .p2align 4, , 16 -.LBB0_258: # %vector.body1076 +.LBB0_257: # %vector.body1076 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 + # Parent Loop BB0_214 Depth=2 # => This Inner Loop Header: Depth=3 vst $vr0, $a2, -16 vst $vr0, $a2, 0 @@ -2358,26 +2343,26 @@ SIM4: # @SIM4 addi.d $a3, $a3, -8 addi.d $a1, $a1, 32 addi.d $a2, $a2, 32 - bnez $a3, .LBB0_258 -# %bb.259: # %middle.block1079 - # in Loop: Header=BB0_215 Depth=2 - bne $t0, $a0, .LBB0_261 - b .LBB0_263 -.LBB0_260: # in Loop: Header=BB0_215 Depth=2 - ld.d $a4, $sp, 392 # 8-byte Folded Reload - ld.d $a5, $sp, 416 # 8-byte Folded Reload - ld.d $a7, $sp, 424 # 8-byte Folded Reload - ld.d $t1, $sp, 360 # 8-byte Folded Reload -.LBB0_261: # %scalar.ph1071.preheader - # in Loop: Header=BB0_215 Depth=2 + bnez $a3, .LBB0_257 +# %bb.258: # %middle.block1079 + # in Loop: Header=BB0_214 Depth=2 + bne $t0, $a0, .LBB0_260 + b .LBB0_262 +.LBB0_259: # in Loop: Header=BB0_214 Depth=2 + ld.d $a4, $sp, 376 # 8-byte Folded Reload + ld.d $a5, $sp, 400 # 8-byte Folded Reload + ld.d $a7, $sp, 408 # 8-byte Folded Reload + ld.d $t1, $sp, 344 # 8-byte Folded Reload +.LBB0_260: # %scalar.ph1071.preheader + # in Loop: Header=BB0_214 Depth=2 sub.d $a1, $s8, $a0 addi.d $a1, $a1, 1 alsl.d $a2, $a0, $s0, 2 alsl.d $a0, $a0, $s5, 2 .p2align 4, , 16 -.LBB0_262: # %scalar.ph1071 +.LBB0_261: # %scalar.ph1071 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 + # Parent Loop BB0_214 Depth=2 # => This Inner Loop Header: Depth=3 addi.w $a3, $zero, -1 lu32i.d $a3, 0 @@ -2386,25 +2371,25 @@ SIM4: # @SIM4 addi.d $a1, $a1, -1 addi.d $a2, $a2, 4 addi.d $a0, $a0, 4 - bnez $a1, .LBB0_262 -.LBB0_263: # %.loopexit - # in Loop: Header=BB0_215 Depth=2 + bnez $a1, .LBB0_261 +.LBB0_262: # %.loopexit + # in Loop: Header=BB0_214 Depth=2 sub.d $a0, $a7, $a5 - st.d $a0, $sp, 424 # 8-byte Folded Spill + st.d $a0, $sp, 408 # 8-byte Folded Spill alsl.d $a0, $a5, $a4, 2 - st.d $a0, $sp, 360 # 8-byte Folded Spill + st.d $a0, $sp, 344 # 8-byte Folded Spill addi.w $a1, $a5, 1 alsl.d $a0, $a5, $s5, 2 - st.d $a0, $sp, 344 # 8-byte Folded Spill - st.d $s5, $sp, 384 # 8-byte Folded Spill + st.d $a0, $sp, 328 # 8-byte Folded Spill + st.d $s5, $sp, 368 # 8-byte Folded Spill stx.w $s2, $s5, $t1 - st.d $a1, $sp, 352 # 8-byte Folded Spill + st.d $a1, $sp, 336 # 8-byte Folded Spill bstrpick.d $a0, $a1, 31, 0 slli.d $s8, $a0, 2 move $a0, $s8 pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 - st.d $a0, $sp, 288 # 8-byte Folded Spill + st.d $a0, $sp, 272 # 8-byte Folded Spill move $a0, $s8 pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 @@ -2416,17 +2401,17 @@ SIM4: # @SIM4 move $a0, $s8 pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 - ld.d $a2, $sp, 288 # 8-byte Folded Reload - ld.d $t5, $sp, 416 # 8-byte Folded Reload - st.d $s2, $sp, 232 # 8-byte Folded Spill - st.d $a0, $sp, 240 # 8-byte Folded Spill - beqz $t5, .LBB0_269 -# %bb.264: # %.lr.ph543.i - # in Loop: Header=BB0_215 Depth=2 - ld.d $t7, $sp, 360 # 8-byte Folded Reload - ld.d $t6, $sp, 344 # 8-byte Folded Reload + ld.d $a2, $sp, 272 # 8-byte Folded Reload + ld.d $t5, $sp, 400 # 8-byte Folded Reload + st.d $s2, $sp, 224 # 8-byte Folded Spill + st.d $a0, $sp, 232 # 8-byte Folded Spill + beqz $t5, .LBB0_268 +# %bb.263: # %.lr.ph543.i + # in Loop: Header=BB0_214 Depth=2 + ld.d $t7, $sp, 344 # 8-byte Folded Reload + ld.d $t6, $sp, 328 # 8-byte Folded Reload ori $a1, $zero, 2 - ld.d $a6, $sp, 352 # 8-byte Folded Reload + ld.d $a6, $sp, 336 # 8-byte Folded Reload sltu $a2, $a1, $a6 masknez $a1, $a1, $a2 maskeqz $a2, $a6, $a2 @@ -2434,20 +2419,20 @@ SIM4: # @SIM4 bstrpick.d $t3, $a1, 31, 0 ori $a1, $zero, 1 ori $a2, $zero, 9 - bltu $a6, $a2, .LBB0_315 -# %bb.265: # %.lr.ph543.i - # in Loop: Header=BB0_215 Depth=2 - ld.d $a2, $sp, 288 # 8-byte Folded Reload + bltu $a6, $a2, .LBB0_314 +# %bb.264: # %.lr.ph543.i + # in Loop: Header=BB0_214 Depth=2 + ld.d $a2, $sp, 272 # 8-byte Folded Reload sub.d $a2, $a2, $s5 ori $a3, $zero, 32 vld $vr1, $sp, 48 # 16-byte Folded Reload ld.d $a7, $sp, 112 # 8-byte Folded Reload ld.d $t0, $sp, 128 # 8-byte Folded Reload - ld.d $t1, $sp, 392 # 8-byte Folded Reload - ld.d $t2, $sp, 232 # 8-byte Folded Reload - bltu $a2, $a3, .LBB0_316 -# %bb.266: # %vector.ph1060 - # in Loop: Header=BB0_215 Depth=2 + ld.d $t1, $sp, 376 # 8-byte Folded Reload + ld.d $t2, $sp, 224 # 8-byte Folded Reload + bltu $a2, $a3, .LBB0_315 +# %bb.265: # %vector.ph1060 + # in Loop: Header=BB0_214 Depth=2 move $t4, $a6 addi.d $a2, $t3, -1 move $a3, $a2 @@ -2457,13 +2442,13 @@ SIM4: # @SIM4 bstrins.d $a1, $a4, 2, 0 vreplgr2vr.w $vr0, $s4 addi.d $a4, $s5, 20 - ld.d $a5, $sp, 288 # 8-byte Folded Reload + ld.d $a5, $sp, 272 # 8-byte Folded Reload addi.d $a5, $a5, 20 move $a6, $a3 .p2align 4, , 16 -.LBB0_267: # %vector.body1063 +.LBB0_266: # %vector.body1063 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 + # Parent Loop BB0_214 Depth=2 # => This Inner Loop Header: Depth=3 vst $vr0, $a4, -16 vst $vr0, $a4, 0 @@ -2472,38 +2457,38 @@ SIM4: # @SIM4 addi.d $a4, $a4, 32 addi.d $a6, $a6, -8 addi.d $a5, $a5, 32 - bnez $a6, .LBB0_267 -# %bb.268: # %middle.block1066 - # in Loop: Header=BB0_215 Depth=2 + bnez $a6, .LBB0_266 +# %bb.267: # %middle.block1066 + # in Loop: Header=BB0_214 Depth=2 move $a6, $t4 - bne $a2, $a3, .LBB0_316 - b .LBB0_318 -.LBB0_269: # %._crit_edge.thread.i - # in Loop: Header=BB0_215 Depth=2 + bne $a2, $a3, .LBB0_315 + b .LBB0_317 +.LBB0_268: # %._crit_edge.thread.i + # in Loop: Header=BB0_214 Depth=2 move $t0, $s2 - ld.d $a1, $sp, 360 # 8-byte Folded Reload + ld.d $a1, $sp, 344 # 8-byte Folded Reload ld.w $a1, $a1, 0 st.w $a1, $s5, 0 st.w $zero, $a0, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload ld.w $a0, $a0, 0 st.w $a0, $a2, 0 st.w $zero, $s2, 0 addi.w $s3, $zero, -1 ori $s2, $zero, 1 move $s8, $s3 -.LBB0_270: # %._crit_edge600.i - # in Loop: Header=BB0_215 Depth=2 - bgeu $t5, $s2, .LBB0_272 -# %bb.271: # in Loop: Header=BB0_215 Depth=2 - ld.d $a0, $sp, 392 # 8-byte Folded Reload +.LBB0_269: # %._crit_edge600.i + # in Loop: Header=BB0_214 Depth=2 + bgeu $t5, $s2, .LBB0_271 +# %bb.270: # in Loop: Header=BB0_214 Depth=2 + ld.d $a0, $sp, 376 # 8-byte Folded Reload move $fp, $t0 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 move $a0, $s0 @@ -2512,38 +2497,38 @@ SIM4: # @SIM4 move $a0, $s5 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 288 # 8-byte Folded Reload + ld.d $a0, $sp, 272 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 move $a0, $fp pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 ld.d $s6, $sp, 152 # 8-byte Folded Reload - ld.d $s4, $sp, 376 # 8-byte Folded Reload - b .LBB0_281 -.LBB0_272: # in Loop: Header=BB0_215 Depth=2 + ld.d $s4, $sp, 360 # 8-byte Folded Reload + b .LBB0_280 +.LBB0_271: # in Loop: Header=BB0_214 Depth=2 slli.d $a1, $s3, 2 ldx.w $a2, $s5, $a1 slli.d $a5, $s8, 2 - ld.d $a0, $sp, 288 # 8-byte Folded Reload + ld.d $a0, $sp, 272 # 8-byte Folded Reload ldx.w $a3, $a0, $a5 sub.w $a4, $s7, $a2 slt $a4, $a4, $a3 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ldx.w $a6, $a0, $a1 masknez $a1, $a2, $a4 maskeqz $a2, $a3, $a4 or $s1, $a2, $a1 ld.d $s6, $sp, 152 # 8-byte Folded Reload - ld.d $s4, $sp, 376 # 8-byte Folded Reload + ld.d $s4, $sp, 360 # 8-byte Folded Reload ld.d $a2, $sp, 128 # 8-byte Folded Reload - ld.d $a0, $sp, 392 # 8-byte Folded Reload - beqz $s1, .LBB0_276 -# %bb.273: # in Loop: Header=BB0_215 Depth=2 - st.d $a6, $sp, 416 # 8-byte Folded Spill + ld.d $a0, $sp, 376 # 8-byte Folded Reload + beqz $s1, .LBB0_275 +# %bb.272: # in Loop: Header=BB0_214 Depth=2 + st.d $a6, $sp, 400 # 8-byte Folded Spill ldx.w $a0, $t0, $a5 ld.d $a1, $sp, 96 # 8-byte Folded Reload addi.d $s2, $a1, 1 @@ -2557,41 +2542,41 @@ SIM4: # @SIM4 jirl $ra, $ra, 0 move $a3, $a0 st.w $s2, $a0, 0 - ld.w $a1, $sp, 484 - ld.w $s2, $sp, 480 - ld.d $a0, $sp, 472 + ld.w $a1, $sp, 468 + ld.w $s2, $sp, 464 + ld.d $a0, $sp, 456 st.w $s4, $a3, 4 st.w $s6, $a3, 8 st.w $fp, $a3, 12 - bltu $s2, $a1, .LBB0_275 -# %bb.274: # in Loop: Header=BB0_215 Depth=2 + bltu $s2, $a1, .LBB0_274 +# %bb.273: # in Loop: Header=BB0_214 Depth=2 addi.d $a1, $a1, 5 - st.w $a1, $sp, 484 + st.w $a1, $sp, 468 bstrpick.d $a1, $a1, 31, 0 slli.d $a1, $a1, 3 move $fp, $a3 pcaddu18i $ra, %call36(xrealloc) jirl $ra, $ra, 0 move $a3, $fp - st.d $a0, $sp, 472 -.LBB0_275: # %add_col_elt.exit516.i - # in Loop: Header=BB0_215 Depth=2 + st.d $a0, $sp, 456 +.LBB0_274: # %add_col_elt.exit516.i + # in Loop: Header=BB0_214 Depth=2 bstrpick.d $a1, $s2, 31, 0 addi.d $a2, $s2, 1 - st.w $a2, $sp, 480 + st.w $a2, $sp, 464 slli.d $a1, $a1, 3 stx.d $a3, $a0, $a1 ld.d $s6, $sp, 152 # 8-byte Folded Reload - ld.d $s4, $sp, 376 # 8-byte Folded Reload + ld.d $s4, $sp, 360 # 8-byte Folded Reload ld.d $a2, $sp, 128 # 8-byte Folded Reload - ld.d $a0, $sp, 392 # 8-byte Folded Reload - ld.d $a6, $sp, 416 # 8-byte Folded Reload -.LBB0_276: # in Loop: Header=BB0_215 Depth=2 + ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a6, $sp, 400 # 8-byte Folded Reload +.LBB0_275: # in Loop: Header=BB0_214 Depth=2 ld.d $a1, $sp, 112 # 8-byte Folded Reload - bgeu $s1, $s7, .LBB0_280 -# %bb.277: # in Loop: Header=BB0_215 Depth=2 + bgeu $s1, $s7, .LBB0_279 +# %bb.276: # in Loop: Header=BB0_214 Depth=2 ld.d $a0, $sp, 96 # 8-byte Folded Reload - ld.d $a3, $sp, 424 # 8-byte Folded Reload + ld.d $a3, $sp, 408 # 8-byte Folded Reload add.d $a0, $a0, $a3 add.d $a0, $a0, $a6 add.d $a0, $a0, $s1 @@ -2604,39 +2589,39 @@ SIM4: # @SIM4 jirl $ra, $ra, 0 move $a3, $a0 st.w $fp, $a0, 0 - ld.w $a1, $sp, 484 - ld.w $fp, $sp, 480 - ld.d $a0, $sp, 472 + ld.w $a1, $sp, 468 + ld.w $fp, $sp, 464 + ld.d $a0, $sp, 456 st.w $s1, $a3, 4 ld.d $a2, $sp, 72 # 8-byte Folded Reload st.w $a2, $a3, 8 st.w $s2, $a3, 12 - bltu $fp, $a1, .LBB0_279 -# %bb.278: # in Loop: Header=BB0_215 Depth=2 + bltu $fp, $a1, .LBB0_278 +# %bb.277: # in Loop: Header=BB0_214 Depth=2 addi.d $a1, $a1, 5 - st.w $a1, $sp, 484 + st.w $a1, $sp, 468 bstrpick.d $a1, $a1, 31, 0 slli.d $a1, $a1, 3 move $s1, $a3 pcaddu18i $ra, %call36(xrealloc) jirl $ra, $ra, 0 move $a3, $s1 - st.d $a0, $sp, 472 -.LBB0_279: # %add_col_elt.exit520.i - # in Loop: Header=BB0_215 Depth=2 + st.d $a0, $sp, 456 +.LBB0_278: # %add_col_elt.exit520.i + # in Loop: Header=BB0_214 Depth=2 bstrpick.d $a1, $fp, 31, 0 addi.d $a2, $fp, 1 - st.w $a2, $sp, 480 + st.w $a2, $sp, 464 slli.d $a1, $a1, 3 stx.d $a3, $a0, $a1 - ld.d $a0, $sp, 392 # 8-byte Folded Reload -.LBB0_280: # in Loop: Header=BB0_215 Depth=2 + ld.d $a0, $sp, 376 # 8-byte Folded Reload +.LBB0_279: # in Loop: Header=BB0_214 Depth=2 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 move $a0, $s0 @@ -2645,54 +2630,52 @@ SIM4: # @SIM4 move $a0, $s5 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 288 # 8-byte Folded Reload + ld.d $a0, $sp, 272 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 232 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 add.w $s2, $s8, $s3 -.LBB0_281: # %greedy.exitthread-pre-split - # in Loop: Header=BB0_215 Depth=2 +.LBB0_280: # %greedy.exitthread-pre-split + # in Loop: Header=BB0_214 Depth=2 ori $s8, $zero, 2 lu12i.w $s1, 128 - ld.d $t7, $sp, 400 # 8-byte Folded Reload + ld.d $t7, $sp, 384 # 8-byte Folded Reload ld.d $s5, $sp, 88 # 8-byte Folded Reload -.LBB0_282: # %greedy.exitthread-pre-split - # in Loop: Header=BB0_215 Depth=2 +.LBB0_281: # %greedy.exitthread-pre-split + # in Loop: Header=BB0_214 Depth=2 ld.d $a5, $sp, 104 # 8-byte Folded Reload -.LBB0_283: # %greedy.exitthread-pre-split - # in Loop: Header=BB0_215 Depth=2 +.LBB0_282: # %greedy.exitthread-pre-split + # in Loop: Header=BB0_214 Depth=2 ld.d $a6, $sp, 120 # 8-byte Folded Reload -.LBB0_284: # %greedy.exitthread-pre-split - # in Loop: Header=BB0_215 Depth=2 - ld.w $a1, $sp, 480 +.LBB0_283: # %greedy.exitthread-pre-split + # in Loop: Header=BB0_214 Depth=2 + ld.w $a1, $sp, 464 movgr2fr.w $fa0, $s2 ffint.d.w $fa0, $fa0 ld.d $s2, $sp, 160 # 8-byte Folded Reload ld.w $a3, $s6, 36 - beqz $a1, .LBB0_231 -.LBB0_285: # in Loop: Header=BB0_215 Depth=2 + beqz $a1, .LBB0_230 +.LBB0_284: # in Loop: Header=BB0_214 Depth=2 bstrpick.d $a0, $a3, 31, 0 movgr2fr.d $fa1, $a0 - ld.d $a0, $sp, 248 # 8-byte Folded Reload - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) ffint.d.l $fa1, $fa1 - movgr2fr.d $fa3, $a6 - ffint.d.l $fa3, $fa3 - vldi $vr4, -912 - fmadd.d $fa2, $fa3, $fa2, $fa4 + movgr2fr.d $fa2, $a6 + ffint.d.l $fa2, $fa2 + vldi $vr3, -912 + fmadd.d $fa2, $fa2, $fs1, $fa3 fcmp.clt.d $fcc0, $fa1, $fa2 ld.d $a0, $s6, 8 fsel $fa1, $fa1, $fa2, $fcc0 fcmp.cult.d $fcc0, $fa1, $fa0 - bceqz $fcc0, .LBB0_304 + bceqz $fcc0, .LBB0_303 .p2align 4, , 16 -.LBB0_286: # %.thread560 - # in Loop: Header=BB0_215 Depth=2 +.LBB0_285: # %.thread560 + # in Loop: Header=BB0_214 Depth=2 sltui $a1, $a3, 8 maskeqz $a2, $a3, $a1 ld.wu $a3, $s2, 8 @@ -2703,9 +2686,9 @@ SIM4: # @SIM4 add.d $a0, $a0, $a3 nor $a2, $a3, $zero add.d $a2, $a4, $a2 - st.w $a1, $sp, 468 - st.d $a0, $sp, 440 - st.w $a2, $sp, 448 + st.w $a1, $sp, 452 + st.d $a0, $sp, 424 + st.w $a2, $sp, 432 slli.d $a0, $a1, 1 addi.d $a0, $a0, -2 bstrpick.d $a0, $a0, 31, 1 @@ -2713,24 +2696,24 @@ SIM4: # @SIM4 addi.d $a1, $zero, -1 sll.w $a0, $a1, $a0 nor $a0, $a0, $zero - st.w $a0, $sp, 464 + st.w $a0, $sp, 448 sub.d $a0, $a4, $a3 bstrpick.d $a0, $a0, 31, 0 slli.d $a0, $a0, 2 pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 move $s0, $a0 - st.d $a0, $sp, 456 + st.d $a0, $sp, 440 ori $a1, $zero, 8 move $a0, $s1 pcaddu18i $ra, %call36(xcalloc) jirl $ra, $ra, 0 move $fp, $a0 - st.d $a0, $sp, 432 - addi.d $a0, $sp, 432 + st.d $a0, $sp, 416 + addi.d $a0, $sp, 416 pcaddu18i $ra, %call36(bld_table) jirl $ra, $ra, 0 - ld.d $a0, $sp, 280 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $a0, $a0, 16 ld.wu $a2, $s2, 12 ld.w $a3, $s2, 8 @@ -2738,9 +2721,9 @@ SIM4: # @SIM4 add.d $a1, $a0, $a2 addi.w $a3, $a3, 1 addi.w $a4, $a2, 1 - addi.d $a0, $sp, 432 - addi.d $a6, $sp, 488 - addi.d $a2, $sp, 472 + addi.d $a0, $sp, 416 + addi.d $a6, $sp, 472 + addi.d $a2, $sp, 456 st.d $a2, $sp, 0 move $a2, $s7 move $a7, $zero @@ -2753,37 +2736,37 @@ SIM4: # @SIM4 ld.d $s2, $sp, 168 # 8-byte Folded Reload lu12i.w $s3, 1024 .p2align 4, , 16 -.LBB0_287: # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 +.LBB0_286: # Parent Loop BB0_6 Depth=1 + # Parent Loop BB0_214 Depth=2 # => This Inner Loop Header: Depth=3 ldx.d $a0, $fp, $s0 move $a1, $s2 pcaddu18i $ra, %call36(tdestroy) jirl $ra, $ra, 0 addi.d $s0, $s0, 8 - bne $s0, $s3, .LBB0_287 -# %bb.288: # %free_hash_env.exit459 - # in Loop: Header=BB0_215 Depth=2 + bne $s0, $s3, .LBB0_286 +# %bb.287: # %free_hash_env.exit459 + # in Loop: Header=BB0_214 Depth=2 move $a0, $fp pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.w $a2, $sp, 480 - ld.d $s4, $sp, 408 # 8-byte Folded Reload - beqz $a2, .LBB0_302 -# %bb.289: # in Loop: Header=BB0_215 Depth=2 - ld.d $a3, $sp, 472 + ld.w $a2, $sp, 464 + ld.d $s4, $sp, 392 # 8-byte Folded Reload + beqz $a2, .LBB0_301 +# %bb.288: # in Loop: Header=BB0_214 Depth=2 + ld.d $a3, $sp, 456 ld.d $a4, $a3, 0 ld.d $a0, $s6, 8 ld.w $t1, $a4, 0 - ld.d $a1, $sp, 280 # 8-byte Folded Reload + ld.d $a1, $sp, 264 # 8-byte Folded Reload ld.d $a1, $a1, 16 - ld.d $t7, $sp, 400 # 8-byte Folded Reload - bltu $t1, $s8, .LBB0_295 -# %bb.290: # in Loop: Header=BB0_215 Depth=2 + ld.d $t7, $sp, 384 # 8-byte Folded Reload + bltu $t1, $s8, .LBB0_294 +# %bb.289: # in Loop: Header=BB0_214 Depth=2 ld.wu $t0, $a4, 4 - bltu $t0, $s8, .LBB0_295 -# %bb.291: # %.lr.ph.preheader.i463 - # in Loop: Header=BB0_215 Depth=2 + bltu $t0, $s8, .LBB0_294 +# %bb.290: # %.lr.ph.preheader.i463 + # in Loop: Header=BB0_214 Depth=2 move $a5, $zero bstrpick.d $a7, $t1, 31, 0 add.d $a6, $a1, $t0 @@ -2791,39 +2774,39 @@ SIM4: # @SIM4 addi.d $t0, $t0, -1 addi.d $t1, $t1, -1 .p2align 4, , 16 -.LBB0_292: # %.lr.ph.i464 +.LBB0_291: # %.lr.ph.i464 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 + # Parent Loop BB0_214 Depth=2 # => This Inner Loop Header: Depth=3 add.d $t2, $a6, $a5 add.d $t3, $a7, $a5 ld.bu $t4, $t3, -2 ld.bu $t5, $t2, -2 - bne $t4, $t5, .LBB0_295 -# %bb.293: # in Loop: Header=BB0_292 Depth=3 + bne $t4, $t5, .LBB0_294 +# %bb.292: # in Loop: Header=BB0_291 Depth=3 addi.d $t3, $t3, -3 st.w $t1, $a4, 0 st.w $t0, $a4, 4 - bltu $t3, $a0, .LBB0_295 -# %bb.294: # in Loop: Header=BB0_292 Depth=3 + bltu $t3, $a0, .LBB0_294 +# %bb.293: # in Loop: Header=BB0_291 Depth=3 addi.d $t2, $t2, -3 addi.d $t0, $t0, -1 addi.d $t1, $t1, -1 addi.d $a5, $a5, -1 - bgeu $t2, $a1, .LBB0_292 -.LBB0_295: # %grow_exon_left.exit471 - # in Loop: Header=BB0_215 Depth=2 + bgeu $t2, $a1, .LBB0_291 +.LBB0_294: # %grow_exon_left.exit471 + # in Loop: Header=BB0_214 Depth=2 addi.d $a2, $a2, -1 bstrpick.d $a2, $a2, 31, 0 slli.d $a2, $a2, 3 ldx.d $a2, $a3, $a2 ld.w $a3, $s6, 16 ld.w $a4, $a2, 8 - bgeu $a4, $a3, .LBB0_300 -# %bb.296: # %.lr.ph.i474 - # in Loop: Header=BB0_215 Depth=2 + bgeu $a4, $a3, .LBB0_299 +# %bb.295: # %.lr.ph.i474 + # in Loop: Header=BB0_214 Depth=2 ld.w $a5, $a2, 12 - ld.d $a6, $sp, 280 # 8-byte Folded Reload + ld.d $a6, $sp, 264 # 8-byte Folded Reload ldptr.w $a6, $a6, 4148 bstrpick.d $a7, $a4, 31, 0 bstrpick.d $t0, $a5, 31, 0 @@ -2837,15 +2820,15 @@ SIM4: # @SIM4 addi.d $a5, $a5, 1 sub.d $a6, $a6, $t0 .p2align 4, , 16 -.LBB0_297: # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 +.LBB0_296: # Parent Loop BB0_6 Depth=1 + # Parent Loop BB0_214 Depth=2 # => This Inner Loop Header: Depth=3 - beqz $a6, .LBB0_300 -# %bb.298: # in Loop: Header=BB0_297 Depth=3 + beqz $a6, .LBB0_299 +# %bb.297: # in Loop: Header=BB0_296 Depth=3 ld.bu $a7, $a0, 0 ld.bu $t0, $a1, 0 - bne $a7, $t0, .LBB0_300 -# %bb.299: # in Loop: Header=BB0_297 Depth=3 + bne $a7, $t0, .LBB0_299 +# %bb.298: # in Loop: Header=BB0_296 Depth=3 addi.w $a4, $a4, 1 st.w $a4, $a2, 8 st.w $a5, $a2, 12 @@ -2853,38 +2836,38 @@ SIM4: # @SIM4 addi.d $a1, $a1, 1 addi.d $a5, $a5, 1 addi.d $a6, $a6, -1 - bne $a3, $a4, .LBB0_297 -.LBB0_300: # %grow_exon_right.exit484 - # in Loop: Header=BB0_215 Depth=2 + bne $a3, $a4, .LBB0_296 +.LBB0_299: # %grow_exon_right.exit484 + # in Loop: Header=BB0_214 Depth=2 ld.w $a3, $s6, 36 -.LBB0_301: # %.sink.split - # in Loop: Header=BB0_215 Depth=2 - addi.d $a1, $sp, 472 +.LBB0_300: # %.sink.split + # in Loop: Header=BB0_214 Depth=2 + addi.d $a1, $sp, 456 move $a0, $t7 move $a2, $s5 pcaddu18i $ra, %call36(merge) jirl $ra, $ra, 0 - ld.d $t7, $sp, 400 # 8-byte Folded Reload - st.w $zero, $sp, 480 + ld.d $t7, $sp, 384 # 8-byte Folded Reload + st.w $zero, $sp, 464 ld.d $s5, $sp, 176 # 8-byte Folded Reload - b .LBB0_303 -.LBB0_302: # in Loop: Header=BB0_215 Depth=2 - ld.d $t7, $sp, 400 # 8-byte Folded Reload -.LBB0_303: # in Loop: Header=BB0_215 Depth=2 + b .LBB0_302 +.LBB0_301: # in Loop: Header=BB0_214 Depth=2 + ld.d $t7, $sp, 384 # 8-byte Folded Reload +.LBB0_302: # in Loop: Header=BB0_214 Depth=2 ld.w $a0, $s4, 16 - b .LBB0_214 -.LBB0_304: # in Loop: Header=BB0_215 Depth=2 - ld.d $a4, $sp, 472 + b .LBB0_213 +.LBB0_303: # in Loop: Header=BB0_214 Depth=2 + ld.d $a4, $sp, 456 ld.d $a5, $a4, 0 ld.w $t2, $a5, 0 - ld.d $a2, $sp, 280 # 8-byte Folded Reload + ld.d $a2, $sp, 264 # 8-byte Folded Reload ld.d $a2, $a2, 16 - bltu $t2, $s8, .LBB0_310 -# %bb.305: # in Loop: Header=BB0_215 Depth=2 + bltu $t2, $s8, .LBB0_309 +# %bb.304: # in Loop: Header=BB0_214 Depth=2 ld.wu $t1, $a5, 4 - bltu $t1, $s8, .LBB0_310 -# %bb.306: # %.lr.ph.preheader.i433 - # in Loop: Header=BB0_215 Depth=2 + bltu $t1, $s8, .LBB0_309 +# %bb.305: # %.lr.ph.preheader.i433 + # in Loop: Header=BB0_214 Depth=2 move $a6, $zero bstrpick.d $t0, $t2, 31, 0 add.d $a7, $a2, $t1 @@ -2892,40 +2875,40 @@ SIM4: # @SIM4 addi.d $t1, $t1, -1 addi.d $t2, $t2, -1 .p2align 4, , 16 -.LBB0_307: # %.lr.ph.i434 +.LBB0_306: # %.lr.ph.i434 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 + # Parent Loop BB0_214 Depth=2 # => This Inner Loop Header: Depth=3 add.d $t3, $a7, $a6 add.d $t4, $t0, $a6 ld.bu $t5, $t4, -2 ld.bu $t6, $t3, -2 - bne $t5, $t6, .LBB0_310 -# %bb.308: # in Loop: Header=BB0_307 Depth=3 + bne $t5, $t6, .LBB0_309 +# %bb.307: # in Loop: Header=BB0_306 Depth=3 addi.d $t4, $t4, -3 st.w $t2, $a5, 0 st.w $t1, $a5, 4 - bltu $t4, $a0, .LBB0_310 -# %bb.309: # in Loop: Header=BB0_307 Depth=3 + bltu $t4, $a0, .LBB0_309 +# %bb.308: # in Loop: Header=BB0_306 Depth=3 addi.d $t3, $t3, -3 addi.d $t1, $t1, -1 addi.d $t2, $t2, -1 addi.d $a6, $a6, -1 - bgeu $t3, $a2, .LBB0_307 -.LBB0_310: # %grow_exon_left.exit441 - # in Loop: Header=BB0_215 Depth=2 + bgeu $t3, $a2, .LBB0_306 +.LBB0_309: # %grow_exon_left.exit441 + # in Loop: Header=BB0_214 Depth=2 addi.d $a1, $a1, -1 bstrpick.d $a1, $a1, 31, 0 slli.d $a1, $a1, 3 ldx.d $a1, $a4, $a1 ld.w $a4, $s6, 16 ld.w $a5, $a1, 8 - ld.d $s4, $sp, 408 # 8-byte Folded Reload - bgeu $a5, $a4, .LBB0_301 -# %bb.311: # %.lr.ph.i444 - # in Loop: Header=BB0_215 Depth=2 + ld.d $s4, $sp, 392 # 8-byte Folded Reload + bgeu $a5, $a4, .LBB0_300 +# %bb.310: # %.lr.ph.i444 + # in Loop: Header=BB0_214 Depth=2 ld.w $a6, $a1, 12 - ld.d $a7, $sp, 280 # 8-byte Folded Reload + ld.d $a7, $sp, 264 # 8-byte Folded Reload ldptr.w $a7, $a7, 4148 bstrpick.d $t0, $a5, 31, 0 bstrpick.d $t1, $a6, 31, 0 @@ -2939,15 +2922,15 @@ SIM4: # @SIM4 addi.d $a6, $a6, 1 sub.d $a7, $a7, $t1 .p2align 4, , 16 -.LBB0_312: # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 +.LBB0_311: # Parent Loop BB0_6 Depth=1 + # Parent Loop BB0_214 Depth=2 # => This Inner Loop Header: Depth=3 - beqz $a7, .LBB0_301 -# %bb.313: # in Loop: Header=BB0_312 Depth=3 + beqz $a7, .LBB0_300 +# %bb.312: # in Loop: Header=BB0_311 Depth=3 ld.bu $t0, $a0, 0 ld.bu $t1, $a2, 0 - bne $t0, $t1, .LBB0_301 -# %bb.314: # in Loop: Header=BB0_312 Depth=3 + bne $t0, $t1, .LBB0_300 +# %bb.313: # in Loop: Header=BB0_311 Depth=3 addi.w $a5, $a5, 1 st.w $a5, $a1, 8 st.w $a6, $a1, 12 @@ -2955,23 +2938,23 @@ SIM4: # @SIM4 addi.d $a2, $a2, 1 addi.d $a6, $a6, 1 addi.d $a7, $a7, -1 - bne $a4, $a5, .LBB0_312 - b .LBB0_301 -.LBB0_315: # in Loop: Header=BB0_215 Depth=2 + bne $a4, $a5, .LBB0_311 + b .LBB0_300 +.LBB0_314: # in Loop: Header=BB0_214 Depth=2 ld.d $a7, $sp, 112 # 8-byte Folded Reload ld.d $t0, $sp, 128 # 8-byte Folded Reload - ld.d $t1, $sp, 392 # 8-byte Folded Reload - ld.d $t2, $sp, 232 # 8-byte Folded Reload -.LBB0_316: # %scalar.ph1058.preheader - # in Loop: Header=BB0_215 Depth=2 + ld.d $t1, $sp, 376 # 8-byte Folded Reload + ld.d $t2, $sp, 224 # 8-byte Folded Reload +.LBB0_315: # %scalar.ph1058.preheader + # in Loop: Header=BB0_214 Depth=2 sub.d $a4, $t3, $a1 - ld.d $a2, $sp, 288 # 8-byte Folded Reload + ld.d $a2, $sp, 272 # 8-byte Folded Reload alsl.d $a2, $a1, $a2, 2 alsl.d $a1, $a1, $s5, 2 .p2align 4, , 16 -.LBB0_317: # %scalar.ph1058 +.LBB0_316: # %scalar.ph1058 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 + # Parent Loop BB0_214 Depth=2 # => This Inner Loop Header: Depth=3 st.w $s4, $a1, 0 addi.w $a3, $zero, -1 @@ -2980,22 +2963,22 @@ SIM4: # @SIM4 addi.d $a4, $a4, -1 addi.d $a2, $a2, 4 addi.d $a1, $a1, 4 - bnez $a4, .LBB0_317 -.LBB0_318: # %.preheader528.lr.ph.i - # in Loop: Header=BB0_215 Depth=2 + bnez $a4, .LBB0_316 +.LBB0_317: # %.preheader528.lr.ph.i + # in Loop: Header=BB0_214 Depth=2 ld.w $a1, $t7, 0 st.w $a1, $s5, 0 st.w $t5, $a0, 0 ld.w $a0, $t6, 0 addi.w $t4, $t5, -1 - ld.d $a1, $sp, 288 # 8-byte Folded Reload + ld.d $a1, $sp, 272 # 8-byte Folded Reload st.w $a0, $a1, 0 st.w $t5, $t2, 0 - ld.d $a2, $sp, 368 # 8-byte Folded Reload + ld.d $a2, $sp, 352 # 8-byte Folded Reload alsl.d $a0, $t5, $a2, 2 - st.d $a0, $sp, 224 # 8-byte Folded Spill - alsl.d $a0, $t5, $s0, 2 st.d $a0, $sp, 216 # 8-byte Folded Spill + alsl.d $a0, $t5, $s0, 2 + st.d $a0, $sp, 208 # 8-byte Folded Spill addi.w $a3, $t5, 2 add.d $a0, $t0, $s3 ld.d $a1, $sp, 96 # 8-byte Folded Reload @@ -3003,195 +2986,195 @@ SIM4: # @SIM4 nor $a1, $a1, $zero add.d $a0, $a1, $a0 alsl.d $a1, $t4, $t1, 2 - st.d $a1, $sp, 344 # 8-byte Folded Spill + st.d $a1, $sp, 328 # 8-byte Folded Spill alsl.d $a1, $t4, $a2, 2 - st.d $a1, $sp, 336 # 8-byte Folded Spill + st.d $a1, $sp, 320 # 8-byte Folded Spill move $a2, $a0 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload alsl.d $t6, $t4, $a0, 2 alsl.d $t7, $t4, $s0, 2 ori $s2, $zero, 1 addi.w $a0, $zero, -1 ori $t2, $zero, 2 ori $t3, $zero, 3 - st.d $t4, $sp, 360 # 8-byte Folded Spill + st.d $t4, $sp, 344 # 8-byte Folded Spill move $t1, $a0 - st.d $a0, $sp, 328 # 8-byte Folded Spill - st.d $a0, $sp, 272 # 8-byte Folded Spill + st.d $a0, $sp, 312 # 8-byte Folded Spill + st.d $a0, $sp, 256 # 8-byte Folded Spill move $ra, $t5 - b .LBB0_321 + b .LBB0_320 .p2align 4, , 16 -.LBB0_319: # in Loop: Header=BB0_321 Depth=3 +.LBB0_318: # in Loop: Header=BB0_320 Depth=3 move $ra, $a2 - ld.d $s3, $sp, 272 # 8-byte Folded Reload -.LBB0_320: # in Loop: Header=BB0_321 Depth=3 + ld.d $s3, $sp, 256 # 8-byte Folded Reload +.LBB0_319: # in Loop: Header=BB0_320 Depth=3 addi.w $s2, $s2, 1 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 344 # 8-byte Folded Reload addi.d $a0, $a0, -1 - st.d $a0, $sp, 360 # 8-byte Folded Spill + st.d $a0, $sp, 344 # 8-byte Folded Spill addi.w $a3, $a3, 1 - ld.d $a0, $sp, 328 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload addi.w $a0, $a0, -1 - st.d $a0, $sp, 328 # 8-byte Folded Spill + st.d $a0, $sp, 312 # 8-byte Folded Spill addi.w $t2, $t2, 1 - ld.d $a2, $sp, 312 # 8-byte Folded Reload + ld.d $a2, $sp, 296 # 8-byte Folded Reload addi.d $a2, $a2, -1 addi.w $t3, $t3, 2 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload addi.d $a0, $a0, -4 - st.d $a0, $sp, 344 # 8-byte Folded Spill - ld.d $a0, $sp, 336 # 8-byte Folded Reload + st.d $a0, $sp, 328 # 8-byte Folded Spill + ld.d $a0, $sp, 320 # 8-byte Folded Reload addi.d $a0, $a0, -4 - st.d $a0, $sp, 336 # 8-byte Folded Spill + st.d $a0, $sp, 320 # 8-byte Folded Spill addi.d $t6, $t6, -4 addi.d $t7, $t7, -4 move $t1, $s8 - bltu $ra, $s2, .LBB0_270 -.LBB0_321: # %.preheader528.i + bltu $ra, $s2, .LBB0_269 +.LBB0_320: # %.preheader528.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 + # Parent Loop BB0_214 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_332 Depth 4 - # Child Loop BB0_342 Depth 5 - # Child Loop BB0_347 Depth 4 - # Child Loop BB0_325 Depth 4 - # Child Loop BB0_354 Depth 4 - # Child Loop BB0_366 Depth 5 - # Child Loop BB0_371 Depth 4 - # Child Loop BB0_379 Depth 4 - st.d $t7, $sp, 296 # 8-byte Folded Spill - st.d $t6, $sp, 304 # 8-byte Folded Spill + # Child Loop BB0_331 Depth 4 + # Child Loop BB0_341 Depth 5 + # Child Loop BB0_346 Depth 4 + # Child Loop BB0_324 Depth 4 + # Child Loop BB0_353 Depth 4 + # Child Loop BB0_365 Depth 5 + # Child Loop BB0_370 Depth 4 + # Child Loop BB0_378 Depth 4 + st.d $t7, $sp, 280 # 8-byte Folded Spill + st.d $t6, $sp, 288 # 8-byte Folded Spill sub.w $t7, $t5, $s2 add.w $t8, $s2, $t5 - st.d $a6, $sp, 352 # 8-byte Folded Spill - st.d $a2, $sp, 312 # 8-byte Folded Spill - bge $a6, $t4, .LBB0_329 -# %bb.322: # %._crit_edge555.thread.i - # in Loop: Header=BB0_321 Depth=3 - ld.d $a0, $sp, 224 # 8-byte Folded Reload + st.d $a6, $sp, 336 # 8-byte Folded Spill + st.d $a2, $sp, 296 # 8-byte Folded Spill + bge $a6, $t4, .LBB0_328 +# %bb.321: # %._crit_edge555.thread.i + # in Loop: Header=BB0_320 Depth=3 + ld.d $a0, $sp, 216 # 8-byte Folded Reload ld.w $a1, $a0, 0 bstrpick.d $t6, $s2, 31, 0 alsl.d $a0, $t6, $s5, 2 slli.d $a2, $t6, 2 stx.w $a1, $s5, $a2 - ld.d $a1, $sp, 240 # 8-byte Folded Reload + ld.d $a1, $sp, 232 # 8-byte Folded Reload stx.w $t5, $a1, $a2 - ld.d $t0, $sp, 288 # 8-byte Folded Reload - beqz $s2, .LBB0_350 -.LBB0_323: # %.lr.ph561.i - # in Loop: Header=BB0_321 Depth=3 + ld.d $t0, $sp, 272 # 8-byte Folded Reload + beqz $s2, .LBB0_349 +.LBB0_322: # %.lr.ph561.i + # in Loop: Header=BB0_320 Depth=3 ld.w $a0, $a0, 0 move $s8, $zero sub.d $a1, $s2, $ra move $a6, $t0 move $a7, $t6 - b .LBB0_325 + b .LBB0_324 .p2align 4, , 16 -.LBB0_324: # in Loop: Header=BB0_325 Depth=4 +.LBB0_323: # in Loop: Header=BB0_324 Depth=4 addi.d $a7, $a7, -1 addi.w $s8, $s8, 1 addi.d $a6, $a6, 4 - beqz $a7, .LBB0_374 -.LBB0_325: # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 - # Parent Loop BB0_321 Depth=3 + beqz $a7, .LBB0_373 +.LBB0_324: # Parent Loop BB0_6 Depth=1 + # Parent Loop BB0_214 Depth=2 + # Parent Loop BB0_320 Depth=3 # => This Inner Loop Header: Depth=4 ld.w $a2, $a6, 0 - blt $a2, $a0, .LBB0_324 -# %bb.326: # in Loop: Header=BB0_325 Depth=4 + blt $a2, $a0, .LBB0_323 +# %bb.325: # in Loop: Header=BB0_324 Depth=4 add.w $a2, $s2, $s8 - bltu $a2, $ra, .LBB0_328 -# %bb.327: # in Loop: Header=BB0_325 Depth=4 + bltu $a2, $ra, .LBB0_327 +# %bb.326: # in Loop: Header=BB0_324 Depth=4 slti $a4, $t1, 0 add.w $a5, $a1, $s8 sltui $a5, $a5, 1 and $a4, $a5, $a4 - beqz $a4, .LBB0_324 -.LBB0_328: # %.._crit_edge562.i.loopexit_crit_edge - # in Loop: Header=BB0_321 Depth=3 - st.d $s2, $sp, 272 # 8-byte Folded Spill - b .LBB0_375 - .p2align 4, , 16 -.LBB0_329: # %.lr.ph554.i - # in Loop: Header=BB0_321 Depth=3 + beqz $a4, .LBB0_323 +.LBB0_327: # %.._crit_edge562.i.loopexit_crit_edge + # in Loop: Header=BB0_320 Depth=3 + st.d $s2, $sp, 256 # 8-byte Folded Spill + b .LBB0_374 + .p2align 4, , 16 +.LBB0_328: # %.lr.ph554.i + # in Loop: Header=BB0_320 Depth=3 bstrpick.d $a0, $s2, 31, 0 - st.d $a0, $sp, 320 # 8-byte Folded Spill - ld.d $a1, $sp, 392 # 8-byte Folded Reload + st.d $a0, $sp, 304 # 8-byte Folded Spill + ld.d $a1, $sp, 376 # 8-byte Folded Reload alsl.d $a0, $t8, $a1, 2 alsl.d $a1, $t7, $a1, 2 - ld.d $t5, $sp, 360 # 8-byte Folded Reload + ld.d $t5, $sp, 344 # 8-byte Folded Reload ori $t6, $zero, 1 ori $s8, $zero, 2 - ld.d $s3, $sp, 368 # 8-byte Folded Reload - b .LBB0_332 + ld.d $s3, $sp, 352 # 8-byte Folded Reload + b .LBB0_331 .p2align 4, , 16 -.LBB0_330: # in Loop: Header=BB0_332 Depth=4 +.LBB0_329: # in Loop: Header=BB0_331 Depth=4 move $a7, $a4 -.LBB0_331: # %.critedge7.i416 - # in Loop: Header=BB0_332 Depth=4 +.LBB0_330: # %.critedge7.i416 + # in Loop: Header=BB0_331 Depth=4 stx.w $a7, $s3, $a6 addi.d $t5, $t5, 1 addi.w $a4, $t5, 0 addi.d $a2, $a2, 1 - beq $a3, $a4, .LBB0_345 -.LBB0_332: # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 - # Parent Loop BB0_321 Depth=3 + beq $a3, $a4, .LBB0_344 +.LBB0_331: # Parent Loop BB0_6 Depth=1 + # Parent Loop BB0_214 Depth=2 + # Parent Loop BB0_320 Depth=3 # => This Loop Header: Depth=4 - # Child Loop BB0_342 Depth 5 + # Child Loop BB0_341 Depth 5 slli.d $a6, $t5, 2 - bne $t5, $t7, .LBB0_334 -# %bb.333: # in Loop: Header=BB0_332 Depth=4 + bne $t5, $t7, .LBB0_333 +# %bb.332: # in Loop: Header=BB0_331 Depth=4 ld.w $a7, $a1, 4 - b .LBB0_340 + b .LBB0_339 .p2align 4, , 16 -.LBB0_334: # in Loop: Header=BB0_332 Depth=4 - bne $t5, $t8, .LBB0_336 -# %bb.335: # in Loop: Header=BB0_332 Depth=4 +.LBB0_333: # in Loop: Header=BB0_331 Depth=4 + bne $t5, $t8, .LBB0_335 +# %bb.334: # in Loop: Header=BB0_331 Depth=4 ld.w $a4, $a0, -4 addi.w $a7, $a4, -1 - b .LBB0_340 + b .LBB0_339 .p2align 4, , 16 -.LBB0_336: # in Loop: Header=BB0_332 Depth=4 - ld.d $a5, $sp, 392 # 8-byte Folded Reload +.LBB0_335: # in Loop: Header=BB0_331 Depth=4 + ld.d $a5, $sp, 376 # 8-byte Folded Reload alsl.d $a4, $t5, $a5, 2 ldx.w $a5, $a5, $a6 ld.w $t0, $a4, 4 - bge $t0, $a5, .LBB0_338 -# %bb.337: # %._crit_edge653.i - # in Loop: Header=BB0_332 Depth=4 + bge $t0, $a5, .LBB0_337 +# %bb.336: # %._crit_edge653.i + # in Loop: Header=BB0_331 Depth=4 ld.w $a4, $a4, -4 - b .LBB0_339 -.LBB0_338: # in Loop: Header=BB0_332 Depth=4 + b .LBB0_338 +.LBB0_337: # in Loop: Header=BB0_331 Depth=4 ld.w $a4, $a4, -4 addi.w $a7, $a5, -1 - bge $a4, $a7, .LBB0_340 -.LBB0_339: # in Loop: Header=BB0_332 Depth=4 + bge $a4, $a7, .LBB0_339 +.LBB0_338: # in Loop: Header=BB0_331 Depth=4 slt $a7, $a4, $t0 slt $a5, $a4, $a5 addi.w $a4, $a4, -1 masknez $s3, $t0, $a5 maskeqz $a4, $a4, $a5 or $a4, $a4, $s3 - ld.d $s3, $sp, 368 # 8-byte Folded Reload + ld.d $s3, $sp, 352 # 8-byte Folded Reload ori $t6, $zero, 1 maskeqz $a4, $a4, $a7 masknez $a5, $t0, $a7 or $a7, $a4, $a5 .p2align 4, , 16 -.LBB0_340: # in Loop: Header=BB0_332 Depth=4 - blez $a7, .LBB0_331 -# %bb.341: # in Loop: Header=BB0_332 Depth=4 - ld.d $a4, $sp, 424 # 8-byte Folded Reload +.LBB0_339: # in Loop: Header=BB0_331 Depth=4 + blez $a7, .LBB0_330 +# %bb.340: # in Loop: Header=BB0_331 Depth=4 + ld.d $a4, $sp, 408 # 8-byte Folded Reload add.d $a4, $a4, $t5 add.w $a4, $a4, $a7 - blez $a4, .LBB0_331 + blez $a4, .LBB0_330 .p2align 4, , 16 -.LBB0_342: # %.lr.ph548.i +.LBB0_341: # %.lr.ph548.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 - # Parent Loop BB0_321 Depth=3 - # Parent Loop BB0_332 Depth=4 + # Parent Loop BB0_214 Depth=2 + # Parent Loop BB0_320 Depth=3 + # Parent Loop BB0_331 Depth=4 # => This Inner Loop Header: Depth=5 move $a4, $a7 add.w $a5, $a2, $a7 @@ -3199,114 +3182,114 @@ SIM4: # @SIM4 ld.bu $a7, $a7, -1 add.d $t0, $s1, $a5 ld.bu $t0, $t0, -1 - bne $a7, $t0, .LBB0_330 -# %bb.343: # in Loop: Header=BB0_342 Depth=5 + bne $a7, $t0, .LBB0_329 +# %bb.342: # in Loop: Header=BB0_341 Depth=5 addi.w $a7, $a4, -1 - blt $a5, $s8, .LBB0_331 -# %bb.344: # in Loop: Header=BB0_342 Depth=5 - blt $t6, $a4, .LBB0_342 - b .LBB0_331 + blt $a5, $s8, .LBB0_330 +# %bb.343: # in Loop: Header=BB0_341 Depth=5 + blt $t6, $a4, .LBB0_341 + b .LBB0_330 .p2align 4, , 16 -.LBB0_345: # %._crit_edge555.i - # in Loop: Header=BB0_321 Depth=3 - ld.d $a0, $sp, 224 # 8-byte Folded Reload +.LBB0_344: # %._crit_edge555.i + # in Loop: Header=BB0_320 Depth=3 + ld.d $a0, $sp, 216 # 8-byte Folded Reload ld.w $a2, $a0, 0 move $a1, $zero - ld.d $t6, $sp, 320 # 8-byte Folded Reload + ld.d $t6, $sp, 304 # 8-byte Folded Reload alsl.d $a0, $t6, $s5, 2 slli.d $a4, $t6, 2 stx.w $a2, $s5, $a4 - ld.d $a5, $sp, 240 # 8-byte Folded Reload + ld.d $a5, $sp, 232 # 8-byte Folded Reload alsl.d $a2, $t6, $a5, 2 - ld.d $t5, $sp, 416 # 8-byte Folded Reload + ld.d $t5, $sp, 400 # 8-byte Folded Reload stx.w $t5, $a5, $a4 - ld.d $a6, $sp, 336 # 8-byte Folded Reload - ld.d $a7, $sp, 344 # 8-byte Folded Reload - ld.d $t0, $sp, 288 # 8-byte Folded Reload - b .LBB0_347 + ld.d $a6, $sp, 320 # 8-byte Folded Reload + ld.d $a7, $sp, 328 # 8-byte Folded Reload + ld.d $t0, $sp, 272 # 8-byte Folded Reload + b .LBB0_346 .p2align 4, , 16 -.LBB0_346: # in Loop: Header=BB0_347 Depth=4 +.LBB0_345: # in Loop: Header=BB0_346 Depth=4 addi.w $a1, $a1, 1 addi.d $a7, $a7, 4 addi.d $a6, $a6, 4 - beq $t3, $a1, .LBB0_349 -.LBB0_347: # %.lr.ph559.i + beq $t3, $a1, .LBB0_348 +.LBB0_346: # %.lr.ph559.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 - # Parent Loop BB0_321 Depth=3 + # Parent Loop BB0_214 Depth=2 + # Parent Loop BB0_320 Depth=3 # => This Inner Loop Header: Depth=4 ld.w $a4, $a6, 0 st.w $a4, $a7, 0 st.w $s4, $a6, 0 ld.w $a4, $a7, 0 ld.w $a5, $a0, 0 - bge $a4, $a5, .LBB0_346 -# %bb.348: # in Loop: Header=BB0_347 Depth=4 + bge $a4, $a5, .LBB0_345 +# %bb.347: # in Loop: Header=BB0_346 Depth=4 st.w $a4, $a0, 0 add.d $a4, $t4, $a1 st.w $a4, $a2, 0 - b .LBB0_346 + b .LBB0_345 .p2align 4, , 16 -.LBB0_349: # in Loop: Header=BB0_321 Depth=3 - ld.d $a6, $sp, 352 # 8-byte Folded Reload - bnez $s2, .LBB0_323 -.LBB0_350: # in Loop: Header=BB0_321 Depth=3 +.LBB0_348: # in Loop: Header=BB0_320 Depth=3 + ld.d $a6, $sp, 336 # 8-byte Folded Reload + bnez $s2, .LBB0_322 +.LBB0_349: # in Loop: Header=BB0_320 Depth=3 move $a2, $ra move $s8, $t1 alsl.d $t1, $t6, $t0, 2 - st.d $t6, $sp, 320 # 8-byte Folded Spill + st.d $t6, $sp, 304 # 8-byte Folded Spill slli.d $s3, $t6, 2 - blt $a6, $t4, .LBB0_376 -.LBB0_351: # %.lr.ph584.i - # in Loop: Header=BB0_321 Depth=3 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + blt $a6, $t4, .LBB0_375 +.LBB0_350: # %.lr.ph584.i + # in Loop: Header=BB0_320 Depth=3 + ld.d $a0, $sp, 368 # 8-byte Folded Reload alsl.d $ra, $t8, $a0, 2 alsl.d $a1, $t7, $a0, 2 - ld.d $a0, $sp, 360 # 8-byte Folded Reload - ld.d $t5, $sp, 328 # 8-byte Folded Reload - b .LBB0_354 + ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $t5, $sp, 312 # 8-byte Folded Reload + b .LBB0_353 .p2align 4, , 16 -.LBB0_352: # in Loop: Header=BB0_354 Depth=4 +.LBB0_351: # in Loop: Header=BB0_353 Depth=4 move $t0, $a7 -.LBB0_353: # %.critedge11.i419 - # in Loop: Header=BB0_354 Depth=4 +.LBB0_352: # %.critedge11.i419 + # in Loop: Header=BB0_353 Depth=4 stx.w $t0, $s0, $a6 addi.w $t5, $t5, 1 addi.d $a0, $a0, 1 - beq $t5, $t2, .LBB0_369 -.LBB0_354: # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 - # Parent Loop BB0_321 Depth=3 + beq $t5, $t2, .LBB0_368 +.LBB0_353: # Parent Loop BB0_6 Depth=1 + # Parent Loop BB0_214 Depth=2 + # Parent Loop BB0_320 Depth=3 # => This Loop Header: Depth=4 - # Child Loop BB0_366 Depth 5 + # Child Loop BB0_365 Depth 5 slli.d $a6, $a0, 2 - bne $a0, $t7, .LBB0_356 -# %bb.355: # in Loop: Header=BB0_354 Depth=4 + bne $a0, $t7, .LBB0_355 +# %bb.354: # in Loop: Header=BB0_353 Depth=4 ld.w $a4, $a1, 4 addi.w $a7, $a4, 1 - b .LBB0_362 + b .LBB0_361 .p2align 4, , 16 -.LBB0_356: # in Loop: Header=BB0_354 Depth=4 - bne $a0, $t8, .LBB0_358 -# %bb.357: # in Loop: Header=BB0_354 Depth=4 +.LBB0_355: # in Loop: Header=BB0_353 Depth=4 + bne $a0, $t8, .LBB0_357 +# %bb.356: # in Loop: Header=BB0_353 Depth=4 ld.w $a7, $ra, -4 - b .LBB0_362 + b .LBB0_361 .p2align 4, , 16 -.LBB0_358: # in Loop: Header=BB0_354 Depth=4 - ld.d $a5, $sp, 384 # 8-byte Folded Reload +.LBB0_357: # in Loop: Header=BB0_353 Depth=4 + ld.d $a5, $sp, 368 # 8-byte Folded Reload alsl.d $a4, $a0, $a5, 2 ldx.w $a5, $a5, $a6 ld.w $t0, $a4, 4 - bge $a5, $t0, .LBB0_360 -# %bb.359: # %._crit_edge654.i - # in Loop: Header=BB0_354 Depth=4 + bge $a5, $t0, .LBB0_359 +# %bb.358: # %._crit_edge654.i + # in Loop: Header=BB0_353 Depth=4 ld.w $a4, $a4, -4 - b .LBB0_361 -.LBB0_360: # in Loop: Header=BB0_354 Depth=4 + b .LBB0_360 +.LBB0_359: # in Loop: Header=BB0_353 Depth=4 ld.w $a4, $a4, -4 addi.w $a7, $a5, 1 - bge $a7, $a4, .LBB0_362 -.LBB0_361: # in Loop: Header=BB0_354 Depth=4 + bge $a7, $a4, .LBB0_361 +.LBB0_360: # in Loop: Header=BB0_353 Depth=4 addi.w $a7, $t0, 1 slt $a5, $t0, $a5 slt $t0, $a4, $a7 @@ -3317,68 +3300,68 @@ SIM4: # @SIM4 maskeqz $a4, $a4, $a5 or $a7, $a4, $a7 .p2align 4, , 16 -.LBB0_362: # in Loop: Header=BB0_354 Depth=4 - bltz $a7, .LBB0_352 +.LBB0_361: # in Loop: Header=BB0_353 Depth=4 + bltz $a7, .LBB0_351 +# %bb.362: # %.preheader.i422 + # in Loop: Header=BB0_353 Depth=4 + bgeu $a7, $s7, .LBB0_351 # %bb.363: # %.preheader.i422 - # in Loop: Header=BB0_354 Depth=4 - bgeu $a7, $s7, .LBB0_352 -# %bb.364: # %.preheader.i422 - # in Loop: Header=BB0_354 Depth=4 - ld.d $a4, $sp, 416 # 8-byte Folded Reload + # in Loop: Header=BB0_353 Depth=4 + ld.d $a4, $sp, 400 # 8-byte Folded Reload sub.d $a4, $a0, $a4 add.w $a4, $a4, $a7 - bgeu $a4, $fp, .LBB0_352 -# %bb.365: # %.lr.ph577.preheader.i - # in Loop: Header=BB0_354 Depth=4 + bgeu $a4, $fp, .LBB0_351 +# %bb.364: # %.lr.ph577.preheader.i + # in Loop: Header=BB0_353 Depth=4 add.d $a4, $a7, $t5 bstrpick.d $a5, $a4, 31, 0 move $t0, $a7 .p2align 4, , 16 -.LBB0_366: # %.lr.ph577.i +.LBB0_365: # %.lr.ph577.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 - # Parent Loop BB0_321 Depth=3 - # Parent Loop BB0_354 Depth=4 + # Parent Loop BB0_214 Depth=2 + # Parent Loop BB0_320 Depth=3 + # Parent Loop BB0_353 Depth=4 # => This Inner Loop Header: Depth=5 ldx.bu $a4, $s6, $a7 ldx.bu $t6, $s1, $a5 - bne $a4, $t6, .LBB0_353 -# %bb.367: # in Loop: Header=BB0_366 Depth=5 + bne $a4, $t6, .LBB0_352 +# %bb.366: # in Loop: Header=BB0_365 Depth=5 addi.d $a7, $a7, 1 addi.d $t0, $t0, 1 - bgeu $a7, $s7, .LBB0_353 -# %bb.368: # in Loop: Header=BB0_366 Depth=5 + bgeu $a7, $s7, .LBB0_352 +# %bb.367: # in Loop: Header=BB0_365 Depth=5 addi.d $a5, $a5, 1 - bltu $a5, $fp, .LBB0_366 - b .LBB0_353 + bltu $a5, $fp, .LBB0_365 + b .LBB0_352 .p2align 4, , 16 -.LBB0_369: # %._crit_edge585.i - # in Loop: Header=BB0_321 Depth=3 - ld.d $a0, $sp, 216 # 8-byte Folded Reload +.LBB0_368: # %._crit_edge585.i + # in Loop: Header=BB0_320 Depth=3 + ld.d $a0, $sp, 208 # 8-byte Folded Reload ld.w $a1, $a0, 0 move $a0, $zero - ld.d $a4, $sp, 288 # 8-byte Folded Reload + ld.d $a4, $sp, 272 # 8-byte Folded Reload stx.w $a1, $a4, $s3 - ld.d $t0, $sp, 232 # 8-byte Folded Reload - ld.d $a1, $sp, 320 # 8-byte Folded Reload + ld.d $t0, $sp, 224 # 8-byte Folded Reload + ld.d $a1, $sp, 304 # 8-byte Folded Reload alsl.d $a1, $a1, $t0, 2 - ld.d $t5, $sp, 416 # 8-byte Folded Reload + ld.d $t5, $sp, 400 # 8-byte Folded Reload stx.w $t5, $t0, $s3 - ld.d $t7, $sp, 296 # 8-byte Folded Reload + ld.d $t7, $sp, 280 # 8-byte Folded Reload move $a6, $t7 - ld.d $t6, $sp, 304 # 8-byte Folded Reload + ld.d $t6, $sp, 288 # 8-byte Folded Reload move $a7, $t6 - b .LBB0_371 + b .LBB0_370 .p2align 4, , 16 -.LBB0_370: # in Loop: Header=BB0_371 Depth=4 +.LBB0_369: # in Loop: Header=BB0_370 Depth=4 addi.w $a0, $a0, 1 addi.d $a7, $a7, 4 addi.d $a6, $a6, 4 - beq $t3, $a0, .LBB0_373 -.LBB0_371: # %.lr.ph589.i + beq $t3, $a0, .LBB0_372 +.LBB0_370: # %.lr.ph589.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 - # Parent Loop BB0_321 Depth=3 + # Parent Loop BB0_214 Depth=2 + # Parent Loop BB0_320 Depth=3 # => This Inner Loop Header: Depth=4 ld.w $a4, $a6, 0 st.w $a4, $a7, 0 @@ -3387,100 +3370,100 @@ SIM4: # @SIM4 st.w $a4, $a6, 0 ld.w $a4, $a7, 0 ld.w $a5, $t1, 0 - bge $a5, $a4, .LBB0_370 -# %bb.372: # in Loop: Header=BB0_371 Depth=4 + bge $a5, $a4, .LBB0_369 +# %bb.371: # in Loop: Header=BB0_370 Depth=4 st.w $a4, $t1, 0 add.d $a4, $t4, $a0 st.w $a4, $a1, 0 - b .LBB0_370 + b .LBB0_369 .p2align 4, , 16 -.LBB0_373: # in Loop: Header=BB0_321 Depth=3 - ld.d $a6, $sp, 352 # 8-byte Folded Reload - b .LBB0_377 +.LBB0_372: # in Loop: Header=BB0_320 Depth=3 + ld.d $a6, $sp, 336 # 8-byte Folded Reload + b .LBB0_376 .p2align 4, , 16 -.LBB0_374: # in Loop: Header=BB0_321 Depth=3 +.LBB0_373: # in Loop: Header=BB0_320 Depth=3 move $a2, $ra move $s8, $t1 -.LBB0_375: # %._crit_edge562.i - # in Loop: Header=BB0_321 Depth=3 - ld.d $a6, $sp, 352 # 8-byte Folded Reload +.LBB0_374: # %._crit_edge562.i + # in Loop: Header=BB0_320 Depth=3 + ld.d $a6, $sp, 336 # 8-byte Folded Reload alsl.d $t1, $t6, $t0, 2 - st.d $t6, $sp, 320 # 8-byte Folded Spill + st.d $t6, $sp, 304 # 8-byte Folded Spill slli.d $s3, $t6, 2 - bge $a6, $t4, .LBB0_351 -.LBB0_376: # %._crit_edge585.thread.i - # in Loop: Header=BB0_321 Depth=3 - ld.d $a0, $sp, 216 # 8-byte Folded Reload + bge $a6, $t4, .LBB0_350 +.LBB0_375: # %._crit_edge585.thread.i + # in Loop: Header=BB0_320 Depth=3 + ld.d $a0, $sp, 208 # 8-byte Folded Reload ld.w $a0, $a0, 0 stx.w $a0, $t0, $s3 - ld.d $t0, $sp, 232 # 8-byte Folded Reload + ld.d $t0, $sp, 224 # 8-byte Folded Reload stx.w $t5, $t0, $s3 - ld.d $t6, $sp, 304 # 8-byte Folded Reload - ld.d $t7, $sp, 296 # 8-byte Folded Reload -.LBB0_377: # %.preheader526.i - # in Loop: Header=BB0_321 Depth=3 + ld.d $t6, $sp, 288 # 8-byte Folded Reload + ld.d $t7, $sp, 280 # 8-byte Folded Reload +.LBB0_376: # %.preheader526.i + # in Loop: Header=BB0_320 Depth=3 move $s3, $zero ld.w $a0, $t1, 0 addi.w $t4, $t4, -1 addi.w $a6, $a6, 1 sub.d $a1, $s2, $a2 - b .LBB0_379 + b .LBB0_378 .p2align 4, , 16 -.LBB0_378: # in Loop: Header=BB0_379 Depth=4 +.LBB0_377: # in Loop: Header=BB0_378 Depth=4 addi.w $s3, $s3, 1 - bltu $s2, $s3, .LBB0_319 -.LBB0_379: # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_215 Depth=2 - # Parent Loop BB0_321 Depth=3 + bltu $s2, $s3, .LBB0_318 +.LBB0_378: # Parent Loop BB0_6 Depth=1 + # Parent Loop BB0_214 Depth=2 + # Parent Loop BB0_320 Depth=3 # => This Inner Loop Header: Depth=4 bstrpick.d $a4, $s3, 31, 0 slli.d $a4, $a4, 2 ldx.w $a4, $s5, $a4 - blt $a0, $a4, .LBB0_378 -# %bb.380: # in Loop: Header=BB0_379 Depth=4 + blt $a0, $a4, .LBB0_377 +# %bb.379: # in Loop: Header=BB0_378 Depth=4 add.w $ra, $s2, $s3 - bltu $ra, $a2, .LBB0_382 -# %bb.381: # in Loop: Header=BB0_379 Depth=4 + bltu $ra, $a2, .LBB0_381 +# %bb.380: # in Loop: Header=BB0_378 Depth=4 slti $a4, $s8, 0 add.w $a5, $a1, $s3 sltui $a5, $a5, 1 and $a4, $a5, $a4 - beqz $a4, .LBB0_378 -.LBB0_382: # %._crit_edge1583 - # in Loop: Header=BB0_321 Depth=3 + beqz $a4, .LBB0_377 +.LBB0_381: # %._crit_edge1583 + # in Loop: Header=BB0_320 Depth=3 move $s8, $s2 - st.d $s3, $sp, 272 # 8-byte Folded Spill - b .LBB0_320 + st.d $s3, $sp, 256 # 8-byte Folded Spill + b .LBB0_319 .p2align 4, , 16 -.LBB0_383: # %.loopexit600 +.LBB0_382: # %.loopexit600 # in Loop: Header=BB0_6 Depth=1 ld.d $a1, $s6, 8 - ld.d $a0, $sp, 280 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $a2, $a0, 16 move $a0, $s4 pcaddu18i $ra, %call36(kill_polyA) jirl $ra, $ra, 0 ld.w $a2, $s4, 16 ori $s7, $zero, 28 - bltu $a2, $s8, .LBB0_396 -# %bb.384: # %.lr.ph.i486.preheader + bltu $a2, $s8, .LBB0_395 +# %bb.383: # %.lr.ph.i486.preheader # in Loop: Header=BB0_6 Depth=1 ld.w $a0, $s6, 36 - st.d $a0, $sp, 424 # 8-byte Folded Spill + st.d $a0, $sp, 408 # 8-byte Folded Spill ori $s2, $zero, 1 - ld.d $ra, $sp, 400 # 8-byte Folded Reload - b .LBB0_388 + ld.d $ra, $sp, 384 # 8-byte Folded Reload + b .LBB0_387 .p2align 4, , 16 -.LBB0_385: # in Loop: Header=BB0_388 Depth=2 +.LBB0_384: # in Loop: Header=BB0_387 Depth=2 addi.w $s2, $s2, 1 +.LBB0_385: # %thread-pre-split + # in Loop: Header=BB0_387 Depth=2 + ld.d $s4, $sp, 392 # 8-byte Folded Reload .LBB0_386: # %thread-pre-split - # in Loop: Header=BB0_388 Depth=2 - ld.d $s4, $sp, 408 # 8-byte Folded Reload -.LBB0_387: # %thread-pre-split - # in Loop: Header=BB0_388 Depth=2 + # in Loop: Header=BB0_387 Depth=2 ld.w $a2, $s4, 16 - bgeu $s2, $a2, .LBB0_397 -.LBB0_388: # %.lr.ph.i486 + bgeu $s2, $a2, .LBB0_396 +.LBB0_387: # %.lr.ph.i486 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $a0, $ra, 0 @@ -3493,18 +3476,18 @@ SIM4: # @SIM4 ldx.d $s0, $a0, $a1 ld.w $a0, $fp, 4 ld.w $a1, $s0, 4 - ld.d $a3, $sp, 376 # 8-byte Folded Reload + ld.d $a3, $sp, 360 # 8-byte Folded Reload ld.w $a3, $a3, 36 sub.w $s3, $a0, $a1 - bltu $a3, $s3, .LBB0_385 -# %bb.389: # in Loop: Header=BB0_388 Depth=2 + bltu $a3, $s3, .LBB0_384 +# %bb.388: # in Loop: Header=BB0_387 Depth=2 addi.d $a0, $a2, -1 - ld.d $s4, $sp, 408 # 8-byte Folded Reload + ld.d $s4, $sp, 392 # 8-byte Folded Reload st.w $a0, $s4, 16 ld.w $a0, $s0, 12 ld.w $a1, $fp, 12 - bgeu $a1, $a0, .LBB0_392 -# %bb.390: # in Loop: Header=BB0_388 Depth=2 + bgeu $a1, $a0, .LBB0_391 +# %bb.389: # in Loop: Header=BB0_387 Depth=2 move $a0, $fp pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 @@ -3518,9 +3501,9 @@ SIM4: # @SIM4 pcaddu18i $ra, %call36(memmove) jirl $ra, $ra, 0 ld.w $a2, $s4, 16 - bgeu $s2, $a2, .LBB0_394 -# %bb.391: # in Loop: Header=BB0_388 Depth=2 - ld.d $ra, $sp, 400 # 8-byte Folded Reload + bgeu $s2, $a2, .LBB0_393 +# %bb.390: # in Loop: Header=BB0_387 Depth=2 + ld.d $ra, $sp, 384 # 8-byte Folded Reload ld.d $a0, $ra, 0 ld.w $a1, $s0, 8 ld.w $a2, $s0, 12 @@ -3535,9 +3518,9 @@ SIM4: # @SIM4 st.w $a1, $a0, 0 sub.d $a1, $a2, $s3 st.w $a1, $a0, 4 - b .LBB0_386 + b .LBB0_385 .p2align 4, , 16 -.LBB0_392: # in Loop: Header=BB0_388 Depth=2 +.LBB0_391: # in Loop: Header=BB0_387 Depth=2 move $a0, $s0 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 @@ -3551,9 +3534,9 @@ SIM4: # @SIM4 slli.d $a2, $a2, 3 pcaddu18i $ra, %call36(memmove) jirl $ra, $ra, 0 - bltu $s2, $s8, .LBB0_395 -# %bb.393: # in Loop: Header=BB0_388 Depth=2 - ld.d $ra, $sp, 400 # 8-byte Folded Reload + bltu $s2, $s8, .LBB0_394 +# %bb.392: # in Loop: Header=BB0_387 Depth=2 + ld.d $ra, $sp, 384 # 8-byte Folded Reload ld.d $a0, $ra, 0 addi.d $a1, $s2, -2 bstrpick.d $a1, $a1, 31, 0 @@ -3571,38 +3554,38 @@ SIM4: # @SIM4 st.w $a0, $fp, 0 sub.d $a0, $a1, $s3 st.w $a0, $fp, 4 - b .LBB0_387 -.LBB0_394: # in Loop: Header=BB0_388 Depth=2 - ld.d $s4, $sp, 408 # 8-byte Folded Reload - ld.d $ra, $sp, 400 # 8-byte Folded Reload - bltu $s2, $a2, .LBB0_388 - b .LBB0_397 -.LBB0_395: # in Loop: Header=BB0_388 Depth=2 - ld.d $ra, $sp, 400 # 8-byte Folded Reload - b .LBB0_387 + b .LBB0_386 +.LBB0_393: # in Loop: Header=BB0_387 Depth=2 + ld.d $s4, $sp, 392 # 8-byte Folded Reload + ld.d $ra, $sp, 384 # 8-byte Folded Reload + bltu $s2, $a2, .LBB0_387 + b .LBB0_396 +.LBB0_394: # in Loop: Header=BB0_387 Depth=2 + ld.d $ra, $sp, 384 # 8-byte Folded Reload + b .LBB0_386 .p2align 4, , 16 -.LBB0_396: # in Loop: Header=BB0_6 Depth=1 - ld.d $ra, $sp, 400 # 8-byte Folded Reload - b .LBB0_407 +.LBB0_395: # in Loop: Header=BB0_6 Depth=1 + ld.d $ra, $sp, 384 # 8-byte Folded Reload + b .LBB0_406 .p2align 4, , 16 -.LBB0_397: # %.preheader.i487 +.LBB0_396: # %.preheader.i487 # in Loop: Header=BB0_6 Depth=1 - bltu $a2, $s8, .LBB0_407 -# %bb.398: # %.lr.ph86.i + bltu $a2, $s8, .LBB0_406 +# %bb.397: # %.lr.ph86.i # in Loop: Header=BB0_6 Depth=1 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload addi.d $fp, $a0, 1 ori $s1, $zero, 1 - ld.d $s3, $sp, 376 # 8-byte Folded Reload - b .LBB0_401 + ld.d $s3, $sp, 360 # 8-byte Folded Reload + b .LBB0_400 .p2align 4, , 16 -.LBB0_399: # in Loop: Header=BB0_401 Depth=2 +.LBB0_398: # in Loop: Header=BB0_400 Depth=2 move $s0, $s1 addi.w $s1, $s0, 1 - bltu $s1, $a2, .LBB0_401 - b .LBB0_407 + bltu $s1, $a2, .LBB0_400 + b .LBB0_406 .p2align 4, , 16 -.LBB0_400: # in Loop: Header=BB0_401 Depth=2 +.LBB0_399: # in Loop: Header=BB0_400 Depth=2 ld.d $a2, $a0, 8 st.d $a2, $a1, 8 pcaddu18i $ra, %call36(free) @@ -3618,11 +3601,11 @@ SIM4: # @SIM4 slli.d $a2, $a2, 3 pcaddu18i $ra, %call36(memmove) jirl $ra, $ra, 0 - ld.d $ra, $sp, 400 # 8-byte Folded Reload + ld.d $ra, $sp, 384 # 8-byte Folded Reload ld.w $a2, $s4, 16 addi.w $s1, $s0, 1 - bgeu $s1, $a2, .LBB0_407 -.LBB0_401: # Parent Loop BB0_6 Depth=1 + bgeu $s1, $a2, .LBB0_406 +.LBB0_400: # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $a0, $ra, 0 addi.w $s0, $s1, -1 @@ -3635,25 +3618,25 @@ SIM4: # @SIM4 ld.w $a4, $a1, 8 ld.w $a3, $a0, 0 addi.w $a5, $a4, 31 - bgeu $a3, $a5, .LBB0_404 -# %bb.402: # in Loop: Header=BB0_401 Depth=2 + bgeu $a3, $a5, .LBB0_403 +# %bb.401: # in Loop: Header=BB0_400 Depth=2 ld.w $a5, $a1, 12 ld.w $a6, $a0, 4 add.w $a7, $fp, $a5 - bgeu $a7, $a6, .LBB0_400 -# %bb.403: # in Loop: Header=BB0_401 Depth=2 - bltu $a4, $a3, .LBB0_405 - b .LBB0_399 + bgeu $a7, $a6, .LBB0_399 +# %bb.402: # in Loop: Header=BB0_400 Depth=2 + bltu $a4, $a3, .LBB0_404 + b .LBB0_398 .p2align 4, , 16 -.LBB0_404: # %._crit_edge87.i - # in Loop: Header=BB0_401 Depth=2 +.LBB0_403: # %._crit_edge87.i + # in Loop: Header=BB0_400 Depth=2 ld.w $a5, $a1, 12 ld.w $a6, $a0, 4 - bgeu $a4, $a3, .LBB0_399 -.LBB0_405: # in Loop: Header=BB0_401 Depth=2 - bgeu $a5, $a6, .LBB0_399 -# %bb.406: # %about_same_gap_p.exit.i - # in Loop: Header=BB0_401 Depth=2 + bgeu $a4, $a3, .LBB0_398 +.LBB0_404: # in Loop: Header=BB0_400 Depth=2 + bgeu $a5, $a6, .LBB0_398 +# %bb.405: # %about_same_gap_p.exit.i + # in Loop: Header=BB0_400 Depth=2 nor $a4, $a4, $zero add.w $a3, $a3, $a4 bstrpick.d $a4, $a3, 31, 0 @@ -3672,13 +3655,13 @@ SIM4: # @SIM4 ori $a5, $zero, 100 mul.w $a4, $a4, $a5 div.wu $a3, $a4, $a3 - bgeu $a6, $a3, .LBB0_400 - b .LBB0_399 + bgeu $a6, $a3, .LBB0_399 + b .LBB0_398 .p2align 4, , 16 -.LBB0_407: # %compact_exons.exit +.LBB0_406: # %compact_exons.exit # in Loop: Header=BB0_6 Depth=1 - beqz $a2, .LBB0_419 -# %bb.408: # %.lr.ph672.preheader + beqz $a2, .LBB0_418 +# %bb.407: # %.lr.ph672.preheader # in Loop: Header=BB0_6 Depth=1 ld.d $a0, $ra, 0 ld.d $a0, $a0, 0 @@ -3689,22 +3672,22 @@ SIM4: # @SIM4 addi.w $a1, $a1, 1 ori $s1, $zero, 1 move $fp, $zero - bgeu $a1, $a4, .LBB0_412 -# %bb.409: # %.lr.ph1040.preheader + bgeu $a1, $a4, .LBB0_411 +# %bb.408: # %.lr.ph1040.preheader # in Loop: Header=BB0_6 Depth=1 ori $s0, $zero, 8 .p2align 4, , 16 -.LBB0_410: # %.lr.ph1040 +.LBB0_409: # %.lr.ph1040 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 ld.w $a2, $s4, 16 addi.w $fp, $fp, 1 - bgeu $fp, $a2, .LBB0_413 -# %bb.411: # %.lr.ph672 - # in Loop: Header=BB0_410 Depth=2 - ld.d $ra, $sp, 400 # 8-byte Folded Reload + bgeu $fp, $a2, .LBB0_412 +# %bb.410: # %.lr.ph672 + # in Loop: Header=BB0_409 Depth=2 + ld.d $ra, $sp, 384 # 8-byte Folded Reload ld.d $a0, $ra, 0 ldx.d $a0, $a0, $s0 ld.w $a1, $a0, 12 @@ -3713,11 +3696,11 @@ SIM4: # @SIM4 sub.d $a1, $a1, $a3 addi.w $a1, $a1, 1 addi.d $s0, $s0, 8 - bltu $a1, $a4, .LBB0_410 -.LBB0_412: # %.thread564 + bltu $a1, $a4, .LBB0_409 +.LBB0_411: # %.thread564 # in Loop: Header=BB0_6 Depth=1 - beqz $fp, .LBB0_414 -.LBB0_413: # %.thread564.thread + beqz $fp, .LBB0_413 +.LBB0_412: # %.thread564.thread # in Loop: Header=BB0_6 Depth=1 ld.d $a0, $s4, 8 bstrpick.d $a1, $fp, 31, 0 @@ -3727,15 +3710,15 @@ SIM4: # @SIM4 slli.d $a2, $a2, 3 pcaddu18i $ra, %call36(memmove) jirl $ra, $ra, 0 - ld.d $ra, $sp, 400 # 8-byte Folded Reload + ld.d $ra, $sp, 384 # 8-byte Folded Reload ld.w $a0, $s4, 16 sub.w $a2, $a0, $fp st.w $a2, $s4, 16 -.LBB0_414: # %thread-pre-split568 +.LBB0_413: # %thread-pre-split568 # in Loop: Header=BB0_6 Depth=1 addi.w $a0, $a2, -1 - bltz $a0, .LBB0_422 -# %bb.415: # %.lr.ph678.preheader + bltz $a0, .LBB0_421 +# %bb.414: # %.lr.ph678.preheader # in Loop: Header=BB0_6 Depth=1 ld.d $a1, $ra, 0 slli.d $a0, $a0, 3 @@ -3745,12 +3728,12 @@ SIM4: # @SIM4 ld.w $a4, $s6, 36 sub.d $a1, $a1, $a3 addi.w $a1, $a1, 1 - bgeu $a1, $a4, .LBB0_422 -# %bb.416: # %.lr.ph1044.preheader + bgeu $a1, $a4, .LBB0_421 +# %bb.415: # %.lr.ph1044.preheader # in Loop: Header=BB0_6 Depth=1 addi.w $fp, $a2, -2 .p2align 4, , 16 -.LBB0_417: # %.lr.ph1044 +.LBB0_416: # %.lr.ph1044 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 addi.w $s0, $fp, 1 @@ -3759,10 +3742,10 @@ SIM4: # @SIM4 ld.w $a0, $s4, 16 addi.w $a2, $a0, -1 st.w $a2, $s4, 16 - blez $s0, .LBB0_421 -# %bb.418: # %.lr.ph678 - # in Loop: Header=BB0_417 Depth=2 - ld.d $ra, $sp, 400 # 8-byte Folded Reload + blez $s0, .LBB0_420 +# %bb.417: # %.lr.ph678 + # in Loop: Header=BB0_416 Depth=2 + ld.d $ra, $sp, 384 # 8-byte Folded Reload ld.d $a0, $ra, 0 slli.d $a1, $fp, 3 ldx.d $a0, $a0, $a1 @@ -3772,47 +3755,48 @@ SIM4: # @SIM4 sub.d $a1, $a1, $a3 addi.w $a1, $a1, 1 addi.w $fp, $fp, -1 - bltu $a1, $a4, .LBB0_417 - b .LBB0_422 + bltu $a1, $a4, .LBB0_416 + b .LBB0_421 .p2align 4, , 16 -.LBB0_419: # %._crit_edge225.i.thread +.LBB0_418: # %._crit_edge225.i.thread # in Loop: Header=BB0_6 Depth=1 ld.w $a0, $s4, 32 move $a2, $zero - bnez $a0, .LBB0_524 -# %bb.420: # in Loop: Header=BB0_6 Depth=1 + bnez $a0, .LBB0_523 +# %bb.419: # in Loop: Header=BB0_6 Depth=1 ld.d $s6, $s6, 8 - ld.d $a0, $sp, 280 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $s7, $a0, 16 addi.d $a5, $s4, 32 ori $a0, $zero, 1 - b .LBB0_505 -.LBB0_421: # in Loop: Header=BB0_6 Depth=1 - ld.d $ra, $sp, 400 # 8-byte Folded Reload + b .LBB0_504 +.LBB0_420: # in Loop: Header=BB0_6 Depth=1 + ld.d $ra, $sp, 384 # 8-byte Folded Reload .p2align 4, , 16 -.LBB0_422: # %.thread572 +.LBB0_421: # %.thread572 # in Loop: Header=BB0_6 Depth=1 ld.d $s6, $s6, 8 - ld.d $a0, $sp, 280 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $a0, $a0, 16 - st.d $a0, $sp, 392 # 8-byte Folded Spill - bltu $a2, $s8, .LBB0_474 -# %bb.423: # %.lr.ph.i496 + st.d $a0, $sp, 376 # 8-byte Folded Spill + bltu $a2, $s8, .LBB0_473 +# %bb.422: # %.lr.ph.i496 # in Loop: Header=BB0_6 Depth=1 - ld.d $s0, $sp, 376 # 8-byte Folded Reload + ld.d $s0, $sp, 360 # 8-byte Folded Reload ld.wu $a0, $s0, 16 ld.d $a1, $s0, 8 - ld.d $a2, $sp, 392 # 8-byte Folded Reload + ld.d $a2, $sp, 376 # 8-byte Folded Reload addi.d $a2, $a2, 1 - st.d $a2, $sp, 384 # 8-byte Folded Spill + st.d $a2, $sp, 368 # 8-byte Folded Spill ori $a3, $zero, 1 + lu52i.d $fp, $zero, -16 ld.d $s2, $sp, 40 # 8-byte Folded Reload - ld.d $s3, $sp, 408 # 8-byte Folded Reload - st.d $s6, $sp, 416 # 8-byte Folded Spill - b .LBB0_427 -.LBB0_424: # in Loop: Header=BB0_427 Depth=2 + ld.d $s3, $sp, 392 # 8-byte Folded Reload + st.d $s6, $sp, 400 # 8-byte Folded Spill + b .LBB0_426 +.LBB0_423: # in Loop: Header=BB0_426 Depth=2 ori $a5, $zero, 1 -.LBB0_425: # in Loop: Header=BB0_427 Depth=2 +.LBB0_424: # in Loop: Header=BB0_426 Depth=2 ld.w $a6, $s3, 32 add.d $a6, $a6, $a5 st.w $a6, $s3, 32 @@ -3833,15 +3817,15 @@ SIM4: # @SIM4 or $a4, $a5, $a4 st.d $a4, $a2, 24 .p2align 4, , 16 -.LBB0_426: # in Loop: Header=BB0_427 Depth=2 +.LBB0_425: # in Loop: Header=BB0_426 Depth=2 ld.wu $a2, $s3, 16 addi.d $a3, $a3, 1 - bgeu $a3, $a2, .LBB0_443 -.LBB0_427: # Parent Loop BB0_6 Depth=1 + bgeu $a3, $a2, .LBB0_442 +.LBB0_426: # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_430 Depth 3 - # Child Loop BB0_433 Depth 3 - # Child Loop BB0_439 Depth 3 + # Child Loop BB0_429 Depth 3 + # Child Loop BB0_432 Depth 3 + # Child Loop BB0_438 Depth 3 ld.d $a4, $ra, 0 alsl.d $a2, $a3, $a4, 3 slli.d $a5, $a3, 3 @@ -3851,13 +3835,13 @@ SIM4: # @SIM4 ld.w $a4, $a5, 4 ld.wu $a7, $a2, 12 sub.w $a4, $a4, $a7 - bne $a4, $s1, .LBB0_426 -# %bb.428: # in Loop: Header=BB0_427 Depth=2 + bne $a4, $s1, .LBB0_425 +# %bb.427: # in Loop: Header=BB0_426 Depth=2 ld.wu $a4, $a2, 8 ld.wu $t0, $s0, 44 add.d $a4, $s6, $a4 sub.d $a6, $a4, $t0 - ld.d $t1, $sp, 392 # 8-byte Folded Reload + ld.d $t1, $sp, 376 # 8-byte Folded Reload add.d $t1, $t1, $a7 sub.d $t3, $t1, $t0 ld.bu $t2, $a6, 0 @@ -3866,16 +3850,16 @@ SIM4: # @SIM4 addi.w $a6, $t0, 0 xor $t3, $t2, $t6 sltui $t3, $t3, 1 - bltu $a6, $s8, .LBB0_435 -# %bb.429: # %.lr.ph.i.i533.preheader - # in Loop: Header=BB0_427 Depth=2 + bltu $a6, $s8, .LBB0_434 +# %bb.428: # %.lr.ph.i.i533.preheader + # in Loop: Header=BB0_426 Depth=2 move $t7, $zero move $t5, $zero sub.d $t4, $s1, $t0 .p2align 4, , 16 -.LBB0_430: # %.lr.ph.i.i533 +.LBB0_429: # %.lr.ph.i.i533 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_427 Depth=2 + # Parent Loop BB0_426 Depth=2 # => This Inner Loop Header: Depth=3 ldx.bu $t8, $t1, $t4 xor $t2, $t2, $t8 @@ -3907,12 +3891,13 @@ SIM4: # @SIM4 addi.d $t4, $t4, 1 or $t3, $t6, $t3 move $t6, $t8 - bnez $t4, .LBB0_430 -# %bb.431: # %SWscore.exit.i - # in Loop: Header=BB0_427 Depth=2 - ld.d $s6, $sp, 416 # 8-byte Folded Reload - bltu $t3, $a6, .LBB0_426 -# %bb.432: # in Loop: Header=BB0_427 Depth=2 + bnez $t4, .LBB0_429 +# %bb.430: # %SWscore.exit.i + # in Loop: Header=BB0_426 Depth=2 + lu52i.d $fp, $zero, -16 + ld.d $s6, $sp, 400 # 8-byte Folded Reload + bltu $t3, $a6, .LBB0_425 +# %bb.431: # in Loop: Header=BB0_426 Depth=2 add.d $t2, $s6, $a5 ld.bu $t3, $t2, -1 ld.bu $t6, $t1, 0 @@ -3921,12 +3906,12 @@ SIM4: # @SIM4 xor $t1, $t3, $t6 sltui $t1, $t1, 1 addi.d $t0, $t0, -1 - ld.d $t7, $sp, 384 # 8-byte Folded Reload + ld.d $t7, $sp, 368 # 8-byte Folded Reload add.d $a7, $t7, $a7 .p2align 4, , 16 -.LBB0_433: # %.lr.ph.i30.i +.LBB0_432: # %.lr.ph.i30.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_427 Depth=2 + # Parent Loop BB0_426 Depth=2 # => This Inner Loop Header: Depth=3 ld.bu $t7, $a7, 0 xor $t3, $t3, $t7 @@ -3960,88 +3945,87 @@ SIM4: # @SIM4 addi.d $t2, $t2, 1 addi.d $a7, $a7, 1 move $t6, $t7 - bnez $t0, .LBB0_433 -# %bb.434: # %SWscore.exit39.i - # in Loop: Header=BB0_427 Depth=2 - ld.d $s6, $sp, 416 # 8-byte Folded Reload - bltu $t1, $a6, .LBB0_426 - b .LBB0_437 - .p2align 4, , 16 -.LBB0_435: # %SWscore.exit.thread.i - # in Loop: Header=BB0_427 Depth=2 - bltu $t3, $t0, .LBB0_426 -# %bb.436: # %.thread.i525 - # in Loop: Header=BB0_427 Depth=2 + bnez $t0, .LBB0_432 +# %bb.433: # %SWscore.exit39.i + # in Loop: Header=BB0_426 Depth=2 + ld.d $s6, $sp, 400 # 8-byte Folded Reload + bltu $t1, $a6, .LBB0_425 + b .LBB0_436 + .p2align 4, , 16 +.LBB0_434: # %SWscore.exit.thread.i + # in Loop: Header=BB0_426 Depth=2 + bltu $t3, $t0, .LBB0_425 +# %bb.435: # %.thread.i525 + # in Loop: Header=BB0_426 Depth=2 add.d $a7, $s6, $a5 ld.bu $a7, $a7, -1 ld.bu $t0, $t1, 0 xor $a7, $a7, $t0 sltui $t1, $a7, 1 - ld.d $s6, $sp, 416 # 8-byte Folded Reload - bltu $t1, $a6, .LBB0_426 -.LBB0_437: # in Loop: Header=BB0_427 Depth=2 + ld.d $s6, $sp, 400 # 8-byte Folded Reload + bltu $t1, $a6, .LBB0_425 +.LBB0_436: # in Loop: Header=BB0_426 Depth=2 ld.h $a4, $a4, 0 add.d $a5, $s6, $a5 - st.h $a4, $sp, 432 + st.h $a4, $sp, 416 ld.h $a4, $a5, -3 - st.h $a4, $sp, 434 - beqz $a0, .LBB0_426 -# %bb.438: # %.lr.ph.i527.preheader - # in Loop: Header=BB0_427 Depth=2 + st.h $a4, $sp, 418 + beqz $a0, .LBB0_425 +# %bb.437: # %.lr.ph.i527.preheader + # in Loop: Header=BB0_426 Depth=2 move $a4, $zero move $a5, $a1 move $a6, $a0 .p2align 4, , 16 -.LBB0_439: # %.lr.ph.i527 +.LBB0_438: # %.lr.ph.i527 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_427 Depth=2 + # Parent Loop BB0_426 Depth=2 # => This Inner Loop Header: Depth=3 ld.w $a7, $a5, 0 - ld.w $t0, $sp, 432 - beq $t0, $a7, .LBB0_424 -# %bb.440: # in Loop: Header=BB0_439 Depth=3 + ld.w $t0, $sp, 416 + beq $t0, $a7, .LBB0_423 +# %bb.439: # in Loop: Header=BB0_438 Depth=3 ld.w $a7, $a5, 4 - ld.w $t0, $sp, 432 - beq $t0, $a7, .LBB0_442 -# %bb.441: # in Loop: Header=BB0_439 Depth=3 - lu52i.d $a7, $zero, -16 - add.d $a4, $a4, $a7 + ld.w $t0, $sp, 416 + beq $t0, $a7, .LBB0_441 +# %bb.440: # in Loop: Header=BB0_438 Depth=3 + add.d $a4, $a4, $fp addi.d $a6, $a6, -1 addi.d $a5, $a5, 8 - bnez $a6, .LBB0_439 - b .LBB0_426 -.LBB0_442: # in Loop: Header=BB0_427 Depth=2 - addi.w $a5, $zero, -1 + bnez $a6, .LBB0_438 b .LBB0_425 -.LBB0_443: # %.preheader209.i +.LBB0_441: # in Loop: Header=BB0_426 Depth=2 + addi.w $a5, $zero, -1 + b .LBB0_424 +.LBB0_442: # %.preheader209.i # in Loop: Header=BB0_6 Depth=1 - bltu $a2, $s8, .LBB0_474 -# %bb.444: # %.lr.ph224.i.preheader + bltu $a2, $s8, .LBB0_473 +# %bb.443: # %.lr.ph224.i.preheader # in Loop: Header=BB0_6 Depth=1 ori $a4, $zero, 1 - b .LBB0_447 + b .LBB0_446 .p2align 4, , 16 -.LBB0_445: # %.loopexit208.i - # in Loop: Header=BB0_447 Depth=2 +.LBB0_444: # %.loopexit208.i + # in Loop: Header=BB0_446 Depth=2 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $s6, $sp, 416 # 8-byte Folded Reload - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $s6, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.w $a2, $a0, 16 - ld.d $ra, $sp, 400 # 8-byte Folded Reload - ld.d $a4, $sp, 320 # 8-byte Folded Reload -.LBB0_446: # in Loop: Header=BB0_447 Depth=2 + ld.d $ra, $sp, 384 # 8-byte Folded Reload + ld.d $a4, $sp, 304 # 8-byte Folded Reload +.LBB0_445: # in Loop: Header=BB0_446 Depth=2 addi.d $a4, $a4, 1 bstrpick.d $a0, $a2, 31, 0 - bgeu $a4, $a0, .LBB0_474 -.LBB0_447: # %.lr.ph224.i + bgeu $a4, $a0, .LBB0_473 +.LBB0_446: # %.lr.ph224.i # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_452 Depth 3 - # Child Loop BB0_454 Depth 4 - # Child Loop BB0_457 Depth 4 - # Child Loop BB0_463 Depth 4 - # Child Loop BB0_472 Depth 3 + # Child Loop BB0_451 Depth 3 + # Child Loop BB0_453 Depth 4 + # Child Loop BB0_456 Depth 4 + # Child Loop BB0_462 Depth 4 + # Child Loop BB0_471 Depth 3 ld.d $a0, $ra, 0 alsl.d $a1, $a4, $a0, 3 slli.d $a3, $a4, 3 @@ -4049,72 +4033,72 @@ SIM4: # @SIM4 ld.d $s2, $a1, -8 ld.w $s4, $a3, 4 ld.w $fp, $s2, 12 - bltu $fp, $s4, .LBB0_446 -# %bb.448: # in Loop: Header=BB0_447 Depth=2 - st.d $a4, $sp, 320 # 8-byte Folded Spill + bltu $fp, $s4, .LBB0_445 +# %bb.447: # in Loop: Header=BB0_446 Depth=2 + st.d $a4, $sp, 304 # 8-byte Folded Spill sub.d $s0, $fp, $s4 addi.w $s1, $s0, 2 bstrpick.d $s3, $s1, 31, 0 mul.d $a0, $s3, $s7 - st.d $a3, $sp, 328 # 8-byte Folded Spill + st.d $a3, $sp, 312 # 8-byte Folded Spill pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 - ld.d $t5, $sp, 416 # 8-byte Folded Reload - ld.d $a7, $sp, 328 # 8-byte Folded Reload - beqz $s1, .LBB0_445 -# %bb.449: # %.lr.ph217.i - # in Loop: Header=BB0_447 Depth=2 + ld.d $t5, $sp, 400 # 8-byte Folded Reload + ld.d $a7, $sp, 312 # 8-byte Folded Reload + beqz $s1, .LBB0_444 +# %bb.448: # %.lr.ph217.i + # in Loop: Header=BB0_446 Depth=2 move $a2, $zero - st.d $zero, $sp, 344 # 8-byte Folded Spill + st.d $zero, $sp, 328 # 8-byte Folded Spill ld.w $a1, $s2, 8 ld.w $a4, $s2, 12 nor $a5, $s0, $zero - ld.d $a6, $sp, 376 # 8-byte Folded Reload + ld.d $a6, $sp, 360 # 8-byte Folded Reload ld.wu $a3, $a6, 44 add.w $s0, $a1, $a5 add.w $a5, $a4, $a5 - st.d $a5, $sp, 424 # 8-byte Folded Spill + st.d $a5, $sp, 408 # 8-byte Folded Spill ld.w $t6, $a7, 0 sub.d $a7, $zero, $a3 ld.wu $a5, $a6, 16 - st.d $a5, $sp, 360 # 8-byte Folded Spill + st.d $a5, $sp, 344 # 8-byte Folded Spill ld.d $a5, $a6, 8 - st.d $a5, $sp, 336 # 8-byte Folded Spill + st.d $a5, $sp, 320 # 8-byte Folded Spill ori $a5, $zero, 1 sub.d $a5, $a5, $a3 - st.d $a5, $sp, 368 # 8-byte Folded Spill + st.d $a5, $sp, 352 # 8-byte Folded Spill addi.d $t0, $s4, -1 add.d $a1, $t0, $a1 sub.w $t3, $a1, $fp add.d $a1, $t0, $a4 sub.w $t4, $a1, $fp addi.d $a1, $a3, -1 - st.d $a1, $sp, 352 # 8-byte Folded Spill + st.d $a1, $sp, 336 # 8-byte Folded Spill move $s6, $t6 - ld.d $s1, $sp, 392 # 8-byte Folded Reload - b .LBB0_452 + ld.d $s1, $sp, 376 # 8-byte Folded Reload + b .LBB0_451 .p2align 4, , 16 -.LBB0_450: # %.loopexit.i500 - # in Loop: Header=BB0_452 Depth=3 +.LBB0_449: # %.loopexit.i500 + # in Loop: Header=BB0_451 Depth=3 st.w $zero, $t7, 24 -.LBB0_451: # in Loop: Header=BB0_452 Depth=3 +.LBB0_450: # in Loop: Header=BB0_451 Depth=3 ori $s8, $zero, 2 addi.d $a2, $a2, 1 addi.w $t3, $t3, 1 addi.w $t4, $t4, 1 addi.w $t6, $t6, 1 - beq $a2, $s3, .LBB0_469 -.LBB0_452: # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_447 Depth=2 + beq $a2, $s3, .LBB0_468 +.LBB0_451: # Parent Loop BB0_6 Depth=1 + # Parent Loop BB0_446 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_454 Depth 4 - # Child Loop BB0_457 Depth 4 - # Child Loop BB0_463 Depth 4 + # Child Loop BB0_453 Depth 4 + # Child Loop BB0_456 Depth 4 + # Child Loop BB0_462 Depth 4 add.d $a1, $s0, $a2 mul.d $a4, $a2, $s7 add.d $t7, $a0, $a4 stx.w $a1, $a0, $a4 - ld.d $a4, $sp, 424 # 8-byte Folded Reload + ld.d $a4, $sp, 408 # 8-byte Folded Reload add.d $a4, $a4, $a2 st.w $a4, $t7, 4 add.w $t1, $s6, $a2 @@ -4128,26 +4112,26 @@ SIM4: # @SIM4 xor $a1, $fp, $a5 sltui $a1, $a1, 1 addi.w $s4, $a3, 0 - bltu $a3, $s8, .LBB0_459 -# %bb.453: # %.lr.ph.i.i.i.preheader - # in Loop: Header=BB0_452 Depth=3 + bltu $a3, $s8, .LBB0_458 +# %bb.452: # %.lr.ph.i.i.i.preheader + # in Loop: Header=BB0_451 Depth=3 move $s7, $s0 move $s0, $zero move $t0, $zero bstrpick.d $a4, $t6, 31, 0 add.d $s5, $t5, $a4 bstrpick.d $a4, $t4, 31, 0 - ld.d $a6, $sp, 384 # 8-byte Folded Reload + ld.d $a6, $sp, 368 # 8-byte Folded Reload add.d $s8, $a6, $a4 bstrpick.d $t2, $t3, 31, 0 add.d $t2, $t5, $t2 add.d $s1, $s1, $a4 - ld.d $a4, $sp, 368 # 8-byte Folded Reload + ld.d $a4, $sp, 352 # 8-byte Folded Reload .p2align 4, , 16 -.LBB0_454: # %.lr.ph.i.i.i +.LBB0_453: # %.lr.ph.i.i.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_447 Depth=2 - # Parent Loop BB0_452 Depth=3 + # Parent Loop BB0_446 Depth=2 + # Parent Loop BB0_451 Depth=3 # => This Inner Loop Header: Depth=4 ldx.bu $t5, $s1, $a4 xor $fp, $fp, $t5 @@ -4179,15 +4163,15 @@ SIM4: # @SIM4 addi.d $a4, $a4, 1 or $a1, $a5, $a1 move $a5, $t5 - bnez $a4, .LBB0_454 -# %bb.455: # %SWscore.exit.i.i - # in Loop: Header=BB0_452 Depth=3 - ld.d $s1, $sp, 392 # 8-byte Folded Reload + bnez $a4, .LBB0_453 +# %bb.454: # %SWscore.exit.i.i + # in Loop: Header=BB0_451 Depth=3 + ld.d $s1, $sp, 376 # 8-byte Folded Reload move $s0, $s7 ori $s7, $zero, 28 - ld.d $t5, $sp, 416 # 8-byte Folded Reload - bltu $a1, $s4, .LBB0_450 -# %bb.456: # in Loop: Header=BB0_452 Depth=3 + ld.d $t5, $sp, 400 # 8-byte Folded Reload + bltu $a1, $s4, .LBB0_449 +# %bb.455: # in Loop: Header=BB0_451 Depth=3 bstrpick.d $t1, $t1, 31, 0 add.d $a1, $t5, $t1 ld.bu $a1, $a1, -1 @@ -4196,12 +4180,12 @@ SIM4: # @SIM4 move $t5, $zero xor $a5, $a1, $a4 sltui $fp, $a5, 1 - ld.d $t0, $sp, 352 # 8-byte Folded Reload + ld.d $t0, $sp, 336 # 8-byte Folded Reload .p2align 4, , 16 -.LBB0_457: # %.lr.ph.i30.i.i +.LBB0_456: # %.lr.ph.i30.i.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_447 Depth=2 - # Parent Loop BB0_452 Depth=3 + # Parent Loop BB0_446 Depth=2 + # Parent Loop BB0_451 Depth=3 # => This Inner Loop Header: Depth=4 ld.bu $a5, $s8, 0 xor $a1, $a1, $a5 @@ -4235,91 +4219,91 @@ SIM4: # @SIM4 addi.d $s5, $s5, 1 addi.d $s8, $s8, 1 move $a4, $a5 - bnez $t0, .LBB0_457 -# %bb.458: # in Loop: Header=BB0_452 Depth=3 - ld.d $t5, $sp, 416 # 8-byte Folded Reload - bltu $fp, $s4, .LBB0_450 - b .LBB0_461 - .p2align 4, , 16 -.LBB0_459: # %SWscore.exit.thread.i.i - # in Loop: Header=BB0_452 Depth=3 - bltu $a1, $a3, .LBB0_450 -# %bb.460: # %.thread.i.i - # in Loop: Header=BB0_452 Depth=3 + bnez $t0, .LBB0_456 +# %bb.457: # in Loop: Header=BB0_451 Depth=3 + ld.d $t5, $sp, 400 # 8-byte Folded Reload + bltu $fp, $s4, .LBB0_449 + b .LBB0_460 + .p2align 4, , 16 +.LBB0_458: # %SWscore.exit.thread.i.i + # in Loop: Header=BB0_451 Depth=3 + bltu $a1, $a3, .LBB0_449 +# %bb.459: # %.thread.i.i + # in Loop: Header=BB0_451 Depth=3 bstrpick.d $t1, $t1, 31, 0 add.d $a1, $t5, $t1 ld.bu $a1, $a1, -1 ld.bu $a4, $ra, 0 xor $a1, $a1, $a4 sltui $fp, $a1, 1 - bltu $fp, $s4, .LBB0_450 -.LBB0_461: # in Loop: Header=BB0_452 Depth=3 + bltu $fp, $s4, .LBB0_449 +.LBB0_460: # in Loop: Header=BB0_451 Depth=3 ld.h $a1, $t8, 0 add.d $a4, $t5, $t1 - st.h $a1, $sp, 432 + st.h $a1, $sp, 416 ld.h $a1, $a4, -3 - st.h $a1, $sp, 434 - ld.d $a1, $sp, 360 # 8-byte Folded Reload - beqz $a1, .LBB0_450 -# %bb.462: # %.lr.ph.i.i.preheader - # in Loop: Header=BB0_452 Depth=3 + st.h $a1, $sp, 418 + ld.d $a1, $sp, 344 # 8-byte Folded Reload + beqz $a1, .LBB0_449 +# %bb.461: # %.lr.ph.i.i.preheader + # in Loop: Header=BB0_451 Depth=3 move $a1, $zero - ld.d $a4, $sp, 336 # 8-byte Folded Reload - ld.d $t0, $sp, 360 # 8-byte Folded Reload + ld.d $a4, $sp, 320 # 8-byte Folded Reload + ld.d $t0, $sp, 344 # 8-byte Folded Reload .p2align 4, , 16 -.LBB0_463: # %.lr.ph.i.i +.LBB0_462: # %.lr.ph.i.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_447 Depth=2 - # Parent Loop BB0_452 Depth=3 + # Parent Loop BB0_446 Depth=2 + # Parent Loop BB0_451 Depth=3 # => This Inner Loop Header: Depth=4 ld.w $a5, $a4, 0 - ld.w $a6, $sp, 432 - beq $a6, $a5, .LBB0_466 -# %bb.464: # in Loop: Header=BB0_463 Depth=4 + ld.w $a6, $sp, 416 + beq $a6, $a5, .LBB0_465 +# %bb.463: # in Loop: Header=BB0_462 Depth=4 ld.w $a5, $a4, 4 - ld.w $a6, $sp, 432 - beq $a6, $a5, .LBB0_467 -# %bb.465: # in Loop: Header=BB0_463 Depth=4 + ld.w $a6, $sp, 416 + beq $a6, $a5, .LBB0_466 +# %bb.464: # in Loop: Header=BB0_462 Depth=4 addi.d $a1, $a1, 1 addi.d $t0, $t0, -1 addi.d $a4, $a4, 8 - bnez $t0, .LBB0_463 - b .LBB0_450 -.LBB0_466: # in Loop: Header=BB0_452 Depth=3 + bnez $t0, .LBB0_462 + b .LBB0_449 +.LBB0_465: # in Loop: Header=BB0_451 Depth=3 ori $a4, $zero, 1 - b .LBB0_468 -.LBB0_467: # in Loop: Header=BB0_452 Depth=3 + b .LBB0_467 +.LBB0_466: # in Loop: Header=BB0_451 Depth=3 addi.d $a4, $zero, -1 -.LBB0_468: # in Loop: Header=BB0_452 Depth=3 +.LBB0_467: # in Loop: Header=BB0_451 Depth=3 st.w $a1, $t7, 12 st.w $a4, $t7, 24 - ld.d $a1, $sp, 344 # 8-byte Folded Reload + ld.d $a1, $sp, 328 # 8-byte Folded Reload addi.w $a1, $a1, 1 - st.d $a1, $sp, 344 # 8-byte Folded Spill - b .LBB0_451 + st.d $a1, $sp, 328 # 8-byte Folded Spill + b .LBB0_450 .p2align 4, , 16 -.LBB0_469: # %._crit_edge.i502 - # in Loop: Header=BB0_447 Depth=2 +.LBB0_468: # %._crit_edge.i502 + # in Loop: Header=BB0_446 Depth=2 ori $a1, $zero, 1 - ld.d $a5, $sp, 328 # 8-byte Folded Reload - ld.d $a2, $sp, 344 # 8-byte Folded Reload - bne $a2, $a1, .LBB0_445 -# %bb.470: # %.lr.ph222.i - # in Loop: Header=BB0_447 Depth=2 + ld.d $a5, $sp, 312 # 8-byte Folded Reload + ld.d $a2, $sp, 328 # 8-byte Folded Reload + bne $a2, $a1, .LBB0_444 +# %bb.469: # %.lr.ph222.i + # in Loop: Header=BB0_446 Depth=2 addi.d $a1, $a0, 12 - b .LBB0_472 + b .LBB0_471 .p2align 4, , 16 -.LBB0_471: # in Loop: Header=BB0_472 Depth=3 +.LBB0_470: # in Loop: Header=BB0_471 Depth=3 addi.d $s3, $s3, -1 addi.d $a1, $a1, 28 - beqz $s3, .LBB0_445 -.LBB0_472: # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_447 Depth=2 + beqz $s3, .LBB0_444 +.LBB0_471: # Parent Loop BB0_6 Depth=1 + # Parent Loop BB0_446 Depth=2 # => This Inner Loop Header: Depth=3 ld.w $a2, $a1, 12 - beqz $a2, .LBB0_471 -# %bb.473: # in Loop: Header=BB0_472 Depth=3 - ld.d $a4, $sp, 408 # 8-byte Folded Reload + beqz $a2, .LBB0_470 +# %bb.472: # in Loop: Header=BB0_471 Depth=3 + ld.d $a4, $sp, 392 # 8-byte Folded Reload ld.w $a3, $a4, 32 add.d $a3, $a3, $a2 st.w $a3, $a4, 32 @@ -4329,7 +4313,7 @@ SIM4: # @SIM4 ld.w $a2, $a1, 0 bstrins.d $a3, $a2, 63, 56 st.d $a3, $s2, 24 - ld.d $a2, $sp, 376 # 8-byte Folded Reload + ld.d $a2, $sp, 360 # 8-byte Folded Reload ld.w $a2, $a2, 44 slli.d $a2, $a2, 1 addi.d $a2, $a2, 4 @@ -4346,114 +4330,114 @@ SIM4: # @SIM4 st.w $a2, $a5, 4 ld.w $a2, $a1, -4 st.w $a2, $a5, 0 - b .LBB0_471 + b .LBB0_470 .p2align 4, , 16 -.LBB0_474: # %._crit_edge225.i +.LBB0_473: # %._crit_edge225.i # in Loop: Header=BB0_6 Depth=1 - ld.d $s4, $sp, 408 # 8-byte Folded Reload + ld.d $s4, $sp, 392 # 8-byte Folded Reload ld.w $a0, $s4, 32 addi.d $a5, $s4, 32 - beqz $a0, .LBB0_476 -# %bb.475: # in Loop: Header=BB0_6 Depth=1 - ld.d $s7, $sp, 392 # 8-byte Folded Reload + beqz $a0, .LBB0_475 +# %bb.474: # in Loop: Header=BB0_6 Depth=1 + ld.d $s7, $sp, 376 # 8-byte Folded Reload addi.w $a0, $a2, 0 - bgeu $a0, $s8, .LBB0_506 - b .LBB0_524 -.LBB0_476: # %.preheader.i493 + bgeu $a0, $s8, .LBB0_505 + b .LBB0_523 +.LBB0_475: # %.preheader.i493 # in Loop: Header=BB0_6 Depth=1 addi.w $a1, $a2, 0 ori $a0, $zero, 1 - ld.d $s7, $sp, 392 # 8-byte Folded Reload - bltu $a1, $s8, .LBB0_505 -# %bb.477: # %.lr.ph240.i.preheader + ld.d $s7, $sp, 376 # 8-byte Folded Reload + bltu $a1, $s8, .LBB0_504 +# %bb.476: # %.lr.ph240.i.preheader # in Loop: Header=BB0_6 Depth=1 - st.d $a5, $sp, 296 # 8-byte Folded Spill + st.d $a5, $sp, 280 # 8-byte Folded Spill move $a5, $zero move $a6, $zero ori $a7, $zero, 1 # implicit-def: $r4 # kill: killed $r4 - ld.d $s3, $sp, 376 # 8-byte Folded Reload - b .LBB0_480 + ld.d $s3, $sp, 360 # 8-byte Folded Reload + b .LBB0_479 .p2align 4, , 16 -.LBB0_478: # in Loop: Header=BB0_480 Depth=2 +.LBB0_477: # in Loop: Header=BB0_479 Depth=2 bstrpick.d $a0, $a3, 55, 34 add.w $a5, $a0, $a5 -.LBB0_479: # in Loop: Header=BB0_480 Depth=2 +.LBB0_478: # in Loop: Header=BB0_479 Depth=2 addi.d $a7, $a7, 1 bstrpick.d $a0, $a2, 31, 0 - bgeu $a7, $a0, .LBB0_504 -.LBB0_480: # %.lr.ph240.i + bgeu $a7, $a0, .LBB0_503 +.LBB0_479: # %.lr.ph240.i # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_487 Depth 3 - # Child Loop BB0_489 Depth 4 + # Child Loop BB0_486 Depth 3 + # Child Loop BB0_488 Depth 4 ld.d $a0, $ra, 0 alsl.d $a1, $a7, $a0, 3 slli.d $a3, $a7, 3 ld.d $s5, $a1, -8 ldx.d $a1, $a0, $a3 ld.w $a0, $s5, 12 - st.d $a1, $sp, 368 # 8-byte Folded Spill + st.d $a1, $sp, 352 # 8-byte Folded Spill ld.w $a1, $a1, 4 addi.w $a3, $a0, 1 - bltu $a3, $a1, .LBB0_479 -# %bb.481: # in Loop: Header=BB0_480 Depth=2 + bltu $a3, $a1, .LBB0_478 +# %bb.480: # in Loop: Header=BB0_479 Depth=2 ld.d $a3, $s5, 24 slli.d $a4, $a3, 30 srai.d $a4, $a4, 62 - bgtz $a4, .LBB0_478 -# %bb.482: # in Loop: Header=BB0_480 Depth=2 - bltz $a4, .LBB0_500 -# %bb.483: # in Loop: Header=BB0_480 Depth=2 + bgtz $a4, .LBB0_477 +# %bb.481: # in Loop: Header=BB0_479 Depth=2 + bltz $a4, .LBB0_499 +# %bb.482: # in Loop: Header=BB0_479 Depth=2 sub.d $a0, $a0, $a1 addi.w $a1, $a0, 2 - st.d $a1, $sp, 360 # 8-byte Folded Spill - beqz $a1, .LBB0_501 -# %bb.484: # %.lr.ph232.i - # in Loop: Header=BB0_480 Depth=2 - st.d $a7, $sp, 304 # 8-byte Folded Spill - st.d $a6, $sp, 312 # 8-byte Folded Spill - st.d $a5, $sp, 320 # 8-byte Folded Spill + st.d $a1, $sp, 344 # 8-byte Folded Spill + beqz $a1, .LBB0_500 +# %bb.483: # %.lr.ph232.i + # in Loop: Header=BB0_479 Depth=2 + st.d $a7, $sp, 288 # 8-byte Folded Spill + st.d $a6, $sp, 296 # 8-byte Folded Spill + st.d $a5, $sp, 304 # 8-byte Folded Spill move $a4, $zero move $a3, $zero move $a2, $zero nor $a0, $a0, $zero - st.d $a0, $sp, 352 # 8-byte Folded Spill - ld.d $a0, $sp, 184 # 8-byte Folded Reload st.d $a0, $sp, 336 # 8-byte Folded Spill - st.d $s5, $sp, 344 # 8-byte Folded Spill - b .LBB0_487 - .p2align 4, , 16 -.LBB0_485: # in Loop: Header=BB0_487 Depth=3 - ld.d $ra, $sp, 400 # 8-byte Folded Reload - ld.d $a4, $sp, 416 # 8-byte Folded Reload - ld.d $a2, $sp, 384 # 8-byte Folded Reload -.LBB0_486: # %splice_score_compare.exit.thread.i - # in Loop: Header=BB0_487 Depth=3 + ld.d $a0, $sp, 184 # 8-byte Folded Reload + st.d $a0, $sp, 320 # 8-byte Folded Spill + st.d $s5, $sp, 328 # 8-byte Folded Spill + b .LBB0_486 + .p2align 4, , 16 +.LBB0_484: # in Loop: Header=BB0_486 Depth=3 + ld.d $ra, $sp, 384 # 8-byte Folded Reload + ld.d $a4, $sp, 400 # 8-byte Folded Reload + ld.d $a2, $sp, 368 # 8-byte Folded Reload +.LBB0_485: # %splice_score_compare.exit.thread.i + # in Loop: Header=BB0_486 Depth=3 addi.w $a4, $a4, 1 - ld.d $a0, $sp, 360 # 8-byte Folded Reload - beq $a4, $a0, .LBB0_499 -.LBB0_487: # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_480 Depth=2 + ld.d $a0, $sp, 344 # 8-byte Folded Reload + beq $a4, $a0, .LBB0_498 +.LBB0_486: # Parent Loop BB0_6 Depth=1 + # Parent Loop BB0_479 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_489 Depth 4 - st.d $a2, $sp, 384 # 8-byte Folded Spill - st.d $a3, $sp, 424 # 8-byte Folded Spill + # Child Loop BB0_488 Depth 4 + st.d $a2, $sp, 368 # 8-byte Folded Spill + st.d $a3, $sp, 408 # 8-byte Folded Spill ld.w $a0, $s5, 8 ld.w $a1, $s5, 12 - ld.d $a2, $sp, 352 # 8-byte Folded Reload + ld.d $a2, $sp, 336 # 8-byte Folded Reload add.d $a2, $a4, $a2 add.w $fp, $a0, $a2 - st.w $fp, $sp, 432 + st.w $fp, $sp, 416 add.w $s0, $a1, $a2 - st.w $s0, $sp, 436 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + st.w $s0, $sp, 420 + ld.d $a0, $sp, 352 # 8-byte Folded Reload ld.w $a0, $a0, 0 ld.w $a1, $s3, 44 - st.d $a4, $sp, 416 # 8-byte Folded Spill + st.d $a4, $sp, 400 # 8-byte Folded Spill add.w $s1, $a0, $a4 - st.w $s1, $sp, 440 + st.w $s1, $sp, 424 slli.d $a0, $a1, 1 addi.d $a0, $a0, 4 bstrpick.d $a0, $a0, 31, 1 @@ -4462,31 +4446,31 @@ SIM4: # @SIM4 jirl $ra, $ra, 0 ld.w $a1, $s3, 16 move $s2, $a0 - st.w $zero, $sp, 452 + st.w $zero, $sp, 436 ld.d $a0, $sp, 184 # 8-byte Folded Reload lu32i.d $a0, 0 - st.d $a0, $sp, 444 - beqz $a1, .LBB0_491 -# %bb.488: # %.thread.us.us.i.preheader - # in Loop: Header=BB0_487 Depth=3 + st.d $a0, $sp, 428 + beqz $a1, .LBB0_490 +# %bb.487: # %.thread.us.us.i.preheader + # in Loop: Header=BB0_486 Depth=3 move $s5, $zero move $s8, $s3 move $s4, $zero ori $s3, $zero, 4 move $s7, $s6 - ld.d $s6, $sp, 392 # 8-byte Folded Reload + ld.d $s6, $sp, 376 # 8-byte Folded Reload .p2align 4, , 16 -.LBB0_489: # %.thread.us.us.i +.LBB0_488: # %.thread.us.us.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_480 Depth=2 - # Parent Loop BB0_487 Depth=3 + # Parent Loop BB0_479 Depth=2 + # Parent Loop BB0_486 Depth=3 # => This Inner Loop Header: Depth=4 ld.d $a0, $s8, 8 add.d $a0, $a0, $s3 addi.d $a0, $a0, -4 ori $a1, $zero, 1 st.d $a1, $sp, 8 - addi.d $a2, $sp, 432 + addi.d $a2, $sp, 416 st.d $a0, $sp, 0 move $a0, $s7 move $a1, $s6 @@ -4501,7 +4485,7 @@ SIM4: # @SIM4 add.d $a0, $a0, $s3 addi.w $a1, $zero, -1 st.d $a1, $sp, 8 - addi.d $a2, $sp, 432 + addi.d $a2, $sp, 416 st.d $a0, $sp, 0 move $a0, $s7 move $a1, $s6 @@ -4516,91 +4500,91 @@ SIM4: # @SIM4 addi.d $s4, $s4, 1 addi.d $s3, $s3, 8 addi.w $s5, $s5, 1 - bltu $s4, $a0, .LBB0_489 -# %bb.490: # %compute_max_score.exit.loopexit - # in Loop: Header=BB0_487 Depth=3 - ld.w $fp, $sp, 448 + bltu $s4, $a0, .LBB0_488 +# %bb.489: # %compute_max_score.exit.loopexit + # in Loop: Header=BB0_486 Depth=3 + ld.w $fp, $sp, 432 move $s3, $s8 ori $s8, $zero, 2 - ld.d $s4, $sp, 408 # 8-byte Folded Reload + ld.d $s4, $sp, 392 # 8-byte Folded Reload move $s6, $s7 - ld.d $s5, $sp, 344 # 8-byte Folded Reload - b .LBB0_492 + ld.d $s5, $sp, 328 # 8-byte Folded Reload + b .LBB0_491 .p2align 4, , 16 -.LBB0_491: # in Loop: Header=BB0_487 Depth=3 +.LBB0_490: # in Loop: Header=BB0_486 Depth=3 move $fp, $zero -.LBB0_492: # %compute_max_score.exit - # in Loop: Header=BB0_487 Depth=3 +.LBB0_491: # %compute_max_score.exit + # in Loop: Header=BB0_486 Depth=3 move $a0, $s2 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $s7, $sp, 392 # 8-byte Folded Reload - ld.d $a3, $sp, 424 # 8-byte Folded Reload - bltu $fp, $a3, .LBB0_485 -# %bb.493: # in Loop: Header=BB0_487 Depth=3 - ld.d $ra, $sp, 400 # 8-byte Folded Reload - ld.d $a4, $sp, 416 # 8-byte Folded Reload - ld.d $a2, $sp, 384 # 8-byte Folded Reload - bgeu $a3, $fp, .LBB0_496 -# %bb.494: # %.splice_score_compare.exit.thread198_crit_edge.i - # in Loop: Header=BB0_487 Depth=3 - ld.w $a1, $sp, 444 - ld.w $a0, $sp, 452 -.LBB0_495: # %splice_score_compare.exit.thread198.i - # in Loop: Header=BB0_487 Depth=3 - ld.w $a2, $sp, 456 - st.d $a2, $sp, 328 # 8-byte Folded Spill + ld.d $s7, $sp, 376 # 8-byte Folded Reload + ld.d $a3, $sp, 408 # 8-byte Folded Reload + bltu $fp, $a3, .LBB0_484 +# %bb.492: # in Loop: Header=BB0_486 Depth=3 + ld.d $ra, $sp, 384 # 8-byte Folded Reload + ld.d $a4, $sp, 400 # 8-byte Folded Reload + ld.d $a2, $sp, 368 # 8-byte Folded Reload + bgeu $a3, $fp, .LBB0_495 +# %bb.493: # %.splice_score_compare.exit.thread198_crit_edge.i + # in Loop: Header=BB0_486 Depth=3 + ld.w $a1, $sp, 428 + ld.w $a0, $sp, 436 +.LBB0_494: # %splice_score_compare.exit.thread198.i + # in Loop: Header=BB0_486 Depth=3 + ld.w $a2, $sp, 440 + st.d $a2, $sp, 312 # 8-byte Folded Spill move $a2, $a0 move $a3, $fp - st.d $a1, $sp, 336 # 8-byte Folded Spill - b .LBB0_486 - .p2align 4, , 16 -.LBB0_496: # in Loop: Header=BB0_487 Depth=3 - ld.w $a0, $sp, 452 - bltu $a0, $a2, .LBB0_486 -# %bb.497: # in Loop: Header=BB0_487 Depth=3 - ld.w $a1, $sp, 444 - bltu $a2, $a0, .LBB0_495 -# %bb.498: # in Loop: Header=BB0_487 Depth=3 - ld.d $a5, $sp, 336 # 8-byte Folded Reload - bgeu $a1, $a5, .LBB0_486 - b .LBB0_495 -.LBB0_499: # %._crit_edge233.loopexit.i - # in Loop: Header=BB0_480 Depth=2 - ld.w $a2, $s4, 16 + st.d $a1, $sp, 320 # 8-byte Folded Spill + b .LBB0_485 + .p2align 4, , 16 +.LBB0_495: # in Loop: Header=BB0_486 Depth=3 + ld.w $a0, $sp, 436 + bltu $a0, $a2, .LBB0_485 +# %bb.496: # in Loop: Header=BB0_486 Depth=3 + ld.w $a1, $sp, 428 + bltu $a2, $a0, .LBB0_494 +# %bb.497: # in Loop: Header=BB0_486 Depth=3 ld.d $a5, $sp, 320 # 8-byte Folded Reload - ld.d $a6, $sp, 312 # 8-byte Folded Reload - ld.d $a7, $sp, 304 # 8-byte Folded Reload - b .LBB0_502 -.LBB0_500: # in Loop: Header=BB0_480 Depth=2 + bgeu $a1, $a5, .LBB0_485 + b .LBB0_494 +.LBB0_498: # %._crit_edge233.loopexit.i + # in Loop: Header=BB0_479 Depth=2 + ld.w $a2, $s4, 16 + ld.d $a5, $sp, 304 # 8-byte Folded Reload + ld.d $a6, $sp, 296 # 8-byte Folded Reload + ld.d $a7, $sp, 288 # 8-byte Folded Reload + b .LBB0_501 +.LBB0_499: # in Loop: Header=BB0_479 Depth=2 bstrpick.d $a0, $a3, 55, 34 - b .LBB0_503 -.LBB0_501: # in Loop: Header=BB0_480 Depth=2 + b .LBB0_502 +.LBB0_500: # in Loop: Header=BB0_479 Depth=2 move $a3, $zero -.LBB0_502: # %._crit_edge233.i - # in Loop: Header=BB0_480 Depth=2 - ld.d $a0, $sp, 328 # 8-byte Folded Reload +.LBB0_501: # %._crit_edge233.i + # in Loop: Header=BB0_479 Depth=2 + ld.d $a0, $sp, 312 # 8-byte Folded Reload addi.w $a0, $a0, 0 slt $a1, $zero, $a0 maskeqz $a1, $a3, $a1 add.w $a5, $a1, $a5 srli.d $a0, $a0, 31 and $a0, $a0, $a3 -.LBB0_503: # in Loop: Header=BB0_480 Depth=2 +.LBB0_502: # in Loop: Header=BB0_479 Depth=2 add.w $a6, $a0, $a6 - b .LBB0_479 -.LBB0_504: # %._crit_edge241.loopexit.i + b .LBB0_478 +.LBB0_503: # %._crit_edge241.loopexit.i # in Loop: Header=BB0_6 Depth=1 sltu $a0, $a5, $a6 sub.d $a0, $zero, $a0 ori $a0, $a0, 1 - ld.d $a5, $sp, 296 # 8-byte Folded Reload -.LBB0_505: # %._crit_edge241.i + ld.d $a5, $sp, 280 # 8-byte Folded Reload +.LBB0_504: # %._crit_edge241.i # in Loop: Header=BB0_6 Depth=1 st.w $a0, $a5, 0 addi.w $a0, $a2, 0 - bltu $a0, $s8, .LBB0_524 -.LBB0_506: # %.lr.ph267.i.preheader + bltu $a0, $s8, .LBB0_523 +.LBB0_505: # %.lr.ph267.i.preheader # in Loop: Header=BB0_6 Depth=1 ori $fp, $zero, 1 # implicit-def: $r4 @@ -4611,173 +4595,173 @@ SIM4: # @SIM4 # kill: killed $r4 # implicit-def: $r4 # kill: killed $r4 - b .LBB0_510 -.LBB0_507: # %._crit_edge254.loopexit.i - # in Loop: Header=BB0_510 Depth=2 + b .LBB0_509 +.LBB0_506: # %._crit_edge254.loopexit.i + # in Loop: Header=BB0_509 Depth=2 ld.wu $a0, $s5, 24 - ld.d $a1, $sp, 424 # 8-byte Folded Reload + ld.d $a1, $sp, 408 # 8-byte Folded Reload slli.d $a2, $a1, 56 bstrpick.d $a1, $s3, 21, 0 slli.d $a1, $a1, 34 - ld.d $a3, $sp, 408 # 8-byte Folded Reload - ld.d $ra, $sp, 400 # 8-byte Folded Reload + ld.d $a3, $sp, 392 # 8-byte Folded Reload + ld.d $ra, $sp, 384 # 8-byte Folded Reload move $a5, $s6 move $s6, $s4 move $s4, $a3 -.LBB0_508: # %._crit_edge254.i - # in Loop: Header=BB0_510 Depth=2 - ld.d $a3, $sp, 368 # 8-byte Folded Reload +.LBB0_507: # %._crit_edge254.i + # in Loop: Header=BB0_509 Depth=2 + ld.d $a3, $sp, 352 # 8-byte Folded Reload andi $a3, $a3, 3 bstrins.d $a0, $a3, 63, 32 or $a0, $a0, $a2 add.d $a0, $a0, $a1 st.d $a0, $s5, 24 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 400 # 8-byte Folded Reload st.w $a0, $s5, 8 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload st.w $a0, $s5, 12 addi.d $a0, $a0, 1 st.w $a0, $s8, 4 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload st.w $a0, $s8, 0 ld.w $a2, $s4, 16 -.LBB0_509: # in Loop: Header=BB0_510 Depth=2 +.LBB0_508: # in Loop: Header=BB0_509 Depth=2 addi.d $fp, $fp, 1 bstrpick.d $a0, $a2, 31, 0 - bgeu $fp, $a0, .LBB0_524 -.LBB0_510: # %.lr.ph267.i + bgeu $fp, $a0, .LBB0_523 +.LBB0_509: # %.lr.ph267.i # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_518 Depth 3 + # Child Loop BB0_517 Depth 3 ld.d $a1, $ra, 0 alsl.d $a0, $fp, $a1, 3 ld.d $s5, $a0, -8 slli.d $a3, $fp, 3 ld.d $a0, $s5, 24 ldx.d $s8, $a1, $a3 - bltz $a0, .LBB0_512 -# %bb.511: # in Loop: Header=BB0_510 Depth=2 + bltz $a0, .LBB0_511 +# %bb.510: # in Loop: Header=BB0_509 Depth=2 ld.w $a1, $a5, 0 slli.d $a3, $a0, 30 srai.d $a3, $a3, 62 mul.w $a1, $a1, $a3 - bgtz $a1, .LBB0_509 -.LBB0_512: # in Loop: Header=BB0_510 Depth=2 + bgtz $a1, .LBB0_508 +.LBB0_511: # in Loop: Header=BB0_509 Depth=2 ld.w $a1, $s5, 12 ld.w $a3, $s8, 4 addi.w $a4, $a1, 1 - bltu $a4, $a3, .LBB0_509 -# %bb.513: # in Loop: Header=BB0_510 Depth=2 + bltu $a4, $a3, .LBB0_508 +# %bb.512: # in Loop: Header=BB0_509 Depth=2 sub.w $a2, $a1, $a3 addi.w $a4, $zero, -2 - beq $a2, $a4, .LBB0_523 -# %bb.514: # %.lr.ph253.i - # in Loop: Header=BB0_510 Depth=2 + beq $a2, $a4, .LBB0_522 +# %bb.513: # %.lr.ph253.i + # in Loop: Header=BB0_509 Depth=2 move $s4, $s6 move $s6, $a5 move $s3, $zero move $s1, $zero addi.w $s0, $zero, -1 sub.d $s2, $a3, $a1 - st.d $s0, $sp, 424 # 8-byte Folded Spill - b .LBB0_518 - .p2align 4, , 16 -.LBB0_515: # %.splice_score_compare.exit185.thread202_crit_edge.i - # in Loop: Header=BB0_518 Depth=3 - ld.w $a2, $sp, 444 - ld.w $a1, $sp, 452 -.LBB0_516: # %splice_score_compare.exit185.thread202.i - # in Loop: Header=BB0_518 Depth=3 - ld.w $a3, $sp, 432 - st.d $a3, $sp, 416 # 8-byte Folded Spill - ld.w $a3, $sp, 436 - st.d $a3, $sp, 392 # 8-byte Folded Spill - ld.w $a3, $sp, 440 - st.d $a3, $sp, 384 # 8-byte Folded Spill - ld.w $a3, $sp, 456 + st.d $s0, $sp, 408 # 8-byte Folded Spill + b .LBB0_517 + .p2align 4, , 16 +.LBB0_514: # %.splice_score_compare.exit185.thread202_crit_edge.i + # in Loop: Header=BB0_517 Depth=3 + ld.w $a2, $sp, 428 + ld.w $a1, $sp, 436 +.LBB0_515: # %splice_score_compare.exit185.thread202.i + # in Loop: Header=BB0_517 Depth=3 + ld.w $a3, $sp, 416 + st.d $a3, $sp, 400 # 8-byte Folded Spill + ld.w $a3, $sp, 420 + st.d $a3, $sp, 376 # 8-byte Folded Spill + ld.w $a3, $sp, 424 st.d $a3, $sp, 368 # 8-byte Folded Spill + ld.w $a3, $sp, 440 + st.d $a3, $sp, 352 # 8-byte Folded Spill move $s1, $a1 move $s3, $a0 - st.d $a2, $sp, 424 # 8-byte Folded Spill -.LBB0_517: # %splice_score_compare.exit185.thread.i - # in Loop: Header=BB0_518 Depth=3 + st.d $a2, $sp, 408 # 8-byte Folded Spill +.LBB0_516: # %splice_score_compare.exit185.thread.i + # in Loop: Header=BB0_517 Depth=3 addi.d $s0, $s0, 1 add.w $a0, $s2, $s0 ori $a1, $zero, 1 - beq $a0, $a1, .LBB0_507 -.LBB0_518: # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_510 Depth=2 + beq $a0, $a1, .LBB0_506 +.LBB0_517: # Parent Loop BB0_6 Depth=1 + # Parent Loop BB0_509 Depth=2 # => This Inner Loop Header: Depth=3 ld.w $a0, $s5, 8 ld.w $a1, $s5, 12 add.d $a2, $s2, $s0 add.d $a0, $a2, $a0 - st.w $a0, $sp, 432 + st.w $a0, $sp, 416 add.d $a0, $a2, $a1 - st.w $a0, $sp, 436 + st.w $a0, $sp, 420 ld.w $a0, $s8, 0 add.d $a0, $s0, $a0 addi.d $a0, $a0, 1 - st.w $a0, $sp, 440 + st.w $a0, $sp, 424 ld.w $a3, $s6, 0 - addi.d $a2, $sp, 432 + addi.d $a2, $sp, 416 move $a0, $s4 move $a1, $s7 pcaddu18i $ra, %call36(compute_max_score) jirl $ra, $ra, 0 - ld.w $a0, $sp, 448 - bltu $a0, $s3, .LBB0_517 -# %bb.519: # in Loop: Header=BB0_518 Depth=3 - bltu $s3, $a0, .LBB0_515 -# %bb.520: # in Loop: Header=BB0_518 Depth=3 - ld.w $a1, $sp, 452 - bltu $a1, $s1, .LBB0_517 -# %bb.521: # in Loop: Header=BB0_518 Depth=3 - ld.w $a2, $sp, 444 - bltu $s1, $a1, .LBB0_516 -# %bb.522: # in Loop: Header=BB0_518 Depth=3 - ld.d $a3, $sp, 424 # 8-byte Folded Reload - bgeu $a2, $a3, .LBB0_517 - b .LBB0_516 -.LBB0_523: # in Loop: Header=BB0_510 Depth=2 + ld.w $a0, $sp, 432 + bltu $a0, $s3, .LBB0_516 +# %bb.518: # in Loop: Header=BB0_517 Depth=3 + bltu $s3, $a0, .LBB0_514 +# %bb.519: # in Loop: Header=BB0_517 Depth=3 + ld.w $a1, $sp, 436 + bltu $a1, $s1, .LBB0_516 +# %bb.520: # in Loop: Header=BB0_517 Depth=3 + ld.w $a2, $sp, 428 + bltu $s1, $a1, .LBB0_515 +# %bb.521: # in Loop: Header=BB0_517 Depth=3 + ld.d $a3, $sp, 408 # 8-byte Folded Reload + bgeu $a2, $a3, .LBB0_516 + b .LBB0_515 +.LBB0_522: # in Loop: Header=BB0_509 Depth=2 move $a1, $zero lu52i.d $a2, $zero, -16 - b .LBB0_508 + b .LBB0_507 .p2align 4, , 16 -.LBB0_524: # %slide_intron.exit +.LBB0_523: # %slide_intron.exit # in Loop: Header=BB0_6 Depth=1 ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $fp, $a0, 8 ld.w $a4, $a0, 16 - ld.d $a0, $sp, 280 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $s0, $a0, 16 ldptr.w $a3, $a0, 4148 st.d $zero, $s4, 0 addi.d $a0, $a4, 1 - st.w $a0, $sp, 432 + st.w $a0, $sp, 416 addi.d $a0, $a3, 1 - st.w $a0, $sp, 436 + st.w $a0, $sp, 420 addi.w $a1, $a2, -1 - st.d $zero, $sp, 440 - bltz $a1, .LBB0_573 -# %bb.525: # %.lr.ph259.preheader.i + st.d $zero, $sp, 424 + bltz $a1, .LBB0_572 +# %bb.524: # %.lr.ph259.preheader.i # in Loop: Header=BB0_6 Depth=1 ori $s2, $zero, 1 move $a0, $zero move $s7, $zero move $s4, $zero move $s6, $zero - addi.d $s3, $sp, 432 - st.d $a4, $sp, 392 # 8-byte Folded Spill - st.d $a4, $sp, 424 # 8-byte Folded Spill - st.d $a3, $sp, 416 # 8-byte Folded Spill - st.d $a3, $sp, 384 # 8-byte Folded Spill - b .LBB0_528 - .p2align 4, , 16 -.LBB0_526: # in Loop: Header=BB0_528 Depth=2 + addi.d $s3, $sp, 416 + st.d $a4, $sp, 376 # 8-byte Folded Spill + st.d $a4, $sp, 408 # 8-byte Folded Spill + st.d $a3, $sp, 400 # 8-byte Folded Spill + st.d $a3, $sp, 368 # 8-byte Folded Spill + b .LBB0_527 + .p2align 4, , 16 +.LBB0_525: # in Loop: Header=BB0_527 Depth=2 ori $s2, $zero, 1 -.LBB0_527: # %._crit_edge.i513 - # in Loop: Header=BB0_528 Depth=2 +.LBB0_526: # %._crit_edge.i513 + # in Loop: Header=BB0_527 Depth=2 ld.w $a0, $s8, 8 ld.w $a4, $s8, 12 add.d $a1, $a2, $a1 @@ -4793,22 +4777,22 @@ SIM4: # @SIM4 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 sub.d $a2, $a1, $a6 - ld.d $a3, $sp, 512 + ld.d $a3, $sp, 496 ori $a4, $zero, 100 mul.w $a2, $a2, $a4 div.w $a1, $a2, $a1 st.w $a1, $s8, 16 st.d $s4, $a3, 0 - ld.d $s4, $sp, 520 + ld.d $s4, $sp, 504 addi.d $a1, $s5, -1 move $s3, $s8 - blez $s5, .LBB0_574 -.LBB0_528: # %.lr.ph259.i + blez $s5, .LBB0_573 +.LBB0_527: # %.lr.ph259.i # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_559 Depth 3 - # Child Loop BB0_567 Depth 4 - # Child Loop BB0_570 Depth 4 + # Child Loop BB0_558 Depth 3 + # Child Loop BB0_566 Depth 4 + # Child Loop BB0_569 Depth 4 ld.d $a2, $ra, 0 move $s5, $a1 slli.d $a1, $a1, 3 @@ -4816,16 +4800,16 @@ SIM4: # @SIM4 ld.w $a5, $s8, 12 ld.w $a1, $s3, 4 addi.w $a2, $a5, 1 - bne $a1, $a2, .LBB0_532 -# %bb.529: # in Loop: Header=BB0_528 Depth=2 + bne $a1, $a2, .LBB0_531 +# %bb.528: # in Loop: Header=BB0_527 Depth=2 ld.w $a4, $s8, 8 ld.w $a1, $s3, 0 nor $a2, $a4, $zero add.w $s1, $a1, $a2 - beqz $s1, .LBB0_536 -# %bb.530: # in Loop: Header=BB0_528 Depth=2 - beqz $a0, .LBB0_535 -# %bb.531: # in Loop: Header=BB0_528 Depth=2 + beqz $s1, .LBB0_535 +# %bb.529: # in Loop: Header=BB0_527 Depth=2 + beqz $a0, .LBB0_534 +# %bb.530: # in Loop: Header=BB0_527 Depth=2 ori $a0, $zero, 16 pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 @@ -4835,15 +4819,15 @@ SIM4: # @SIM4 ld.w $a4, $s8, 8 ld.w $a5, $s8, 12 move $s4, $a0 - b .LBB0_536 + b .LBB0_535 .p2align 4, , 16 -.LBB0_532: # in Loop: Header=BB0_528 Depth=2 - beqz $a0, .LBB0_534 -# %bb.533: # in Loop: Header=BB0_528 Depth=2 +.LBB0_531: # in Loop: Header=BB0_527 Depth=2 + beqz $a0, .LBB0_533 +# %bb.532: # in Loop: Header=BB0_527 Depth=2 ori $a0, $zero, 40 pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 - ld.d $a2, $sp, 408 # 8-byte Folded Reload + ld.d $a2, $sp, 392 # 8-byte Folded Reload ld.d $a1, $a2, 0 st.d $a1, $a0, 0 st.d $a0, $a2, 0 @@ -4852,11 +4836,11 @@ SIM4: # @SIM4 st.w $a1, $a0, 16 ld.w $a2, $s3, 4 st.w $a2, $a0, 20 - ld.d $a3, $sp, 424 # 8-byte Folded Reload + ld.d $a3, $sp, 408 # 8-byte Folded Reload sub.d $a1, $a3, $a1 addi.d $a1, $a1, 1 st.w $a1, $a0, 24 - ld.d $a1, $sp, 384 # 8-byte Folded Reload + ld.d $a1, $sp, 368 # 8-byte Folded Reload sub.d $a1, $a1, $a2 addi.d $a1, $a1, 1 st.w $a1, $a0, 28 @@ -4864,37 +4848,33 @@ SIM4: # @SIM4 ld.w $a5, $s8, 12 move $s4, $zero move $s7, $zero -.LBB0_534: # in Loop: Header=BB0_528 Depth=2 +.LBB0_533: # in Loop: Header=BB0_527 Depth=2 ld.w $a4, $s8, 8 - st.d $a5, $sp, 384 # 8-byte Folded Spill + st.d $a5, $sp, 368 # 8-byte Folded Spill +.LBB0_534: # %.critedge.i507 + # in Loop: Header=BB0_527 Depth=2 + st.d $a4, $sp, 408 # 8-byte Folded Spill .LBB0_535: # %.critedge.i507 - # in Loop: Header=BB0_528 Depth=2 - st.d $a4, $sp, 424 # 8-byte Folded Spill -.LBB0_536: # %.critedge.i507 - # in Loop: Header=BB0_528 Depth=2 - ld.w $a0, $s8, 4 - ld.w $a1, $s8, 0 - sub.d $a2, $a5, $a0 - addi.d $a2, $a2, 1 - bstrpick.d $a2, $a2, 31, 0 - movgr2fr.d $fa0, $a2 - ld.d $a2, $sp, 248 # 8-byte Folded Reload - fld.d $fa1, $a2, %pc_lo12(.LCPI0_1) + # in Loop: Header=BB0_527 Depth=2 + ld.w $a0, $s8, 0 + ld.w $a1, $s8, 4 + addi.w $a2, $a0, -1 + addi.w $a3, $a1, -1 + sub.d $a0, $a5, $a1 + addi.d $a0, $a0, 1 + bstrpick.d $a0, $a0, 31, 0 + movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - pcalau12i $a2, %pc_hi20(.LCPI0_4) - fld.d $fa2, $a2, %pc_lo12(.LCPI0_4) - fmul.d $fa0, $fa0, $fa1 - addi.w $a2, $a1, -1 - addi.w $a3, $a0, -1 - fmax.d $fa0, $fa0, $fa2 + fmul.d $fa0, $fa0, $fs1 + fmax.d $fa0, $fa0, $fs3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a6, $fa0 move $a0, $fp move $a1, $s0 pcaddu18i $ra, %call36(align_get_dist) jirl $ra, $ra, 0 - bltz $a0, .LBB0_572 -# %bb.537: # in Loop: Header=BB0_528 Depth=2 + bltz $a0, .LBB0_571 +# %bb.536: # in Loop: Header=BB0_527 Depth=2 move $s1, $a0 ld.w $a0, $s8, 0 ld.w $a1, $s8, 4 @@ -4902,120 +4882,120 @@ SIM4: # @SIM4 ld.w $a5, $s8, 12 addi.w $a2, $a0, -1 addi.w $a3, $a1, -1 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 400 # 8-byte Folded Reload st.d $a0, $sp, 16 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload st.d $a0, $sp, 8 - addi.d $a7, $sp, 520 - addi.d $a0, $sp, 512 + addi.d $a7, $sp, 504 + addi.d $a0, $sp, 496 st.d $a0, $sp, 0 move $a0, $fp move $a1, $s0 move $a6, $s1 pcaddu18i $ra, %call36(align_path) jirl $ra, $ra, 0 - ld.d $a0, $sp, 512 - beqz $a0, .LBB0_572 -# %bb.538: # in Loop: Header=BB0_528 Depth=2 - addi.d $a0, $sp, 520 - addi.d $a1, $sp, 512 - addi.d $a2, $sp, 504 + ld.d $a0, $sp, 496 + beqz $a0, .LBB0_571 +# %bb.537: # in Loop: Header=BB0_527 Depth=2 + addi.d $a0, $sp, 504 + addi.d $a1, $sp, 496 + addi.d $a2, $sp, 488 pcaddu18i $ra, %call36(Condense_both_Ends) jirl $ra, $ra, 0 ld.w $a0, $s3, 8 - beqz $a0, .LBB0_540 -# %bb.539: # in Loop: Header=BB0_528 Depth=2 + beqz $a0, .LBB0_539 +# %bb.538: # in Loop: Header=BB0_527 Depth=2 ori $s3, $zero, 1 - b .LBB0_545 + b .LBB0_544 .p2align 4, , 16 -.LBB0_540: # in Loop: Header=BB0_528 Depth=2 - ld.d $a0, $sp, 512 +.LBB0_539: # in Loop: Header=BB0_527 Depth=2 + ld.d $a0, $sp, 496 ld.bu $a1, $a0, 12 ori $s3, $zero, 1 vld $vr8, $sp, 192 # 16-byte Folded Reload - ld.d $ra, $sp, 400 # 8-byte Folded Reload - bne $a1, $s3, .LBB0_546 -# %bb.541: # in Loop: Header=BB0_528 Depth=2 + ld.d $ra, $sp, 384 # 8-byte Folded Reload + bne $a1, $s3, .LBB0_545 +# %bb.540: # in Loop: Header=BB0_527 Depth=2 ld.w $a1, $a0, 8 ld.w $a2, $s8, 8 sub.d $a2, $a2, $a1 st.w $a2, $s8, 8 - beqz $s4, .LBB0_544 -# %bb.542: # in Loop: Header=BB0_528 Depth=2 + beqz $s4, .LBB0_543 +# %bb.541: # in Loop: Header=BB0_527 Depth=2 ld.bu $a2, $s4, 12 - bne $a2, $s3, .LBB0_544 -# %bb.543: # in Loop: Header=BB0_528 Depth=2 + bne $a2, $s3, .LBB0_543 +# %bb.542: # in Loop: Header=BB0_527 Depth=2 ld.w $a2, $s4, 8 add.d $a2, $a2, $a1 st.w $a2, $s4, 8 -.LBB0_544: # in Loop: Header=BB0_528 Depth=2 +.LBB0_543: # in Loop: Header=BB0_527 Depth=2 sub.d $s1, $s1, $a1 - ld.d $a2, $sp, 424 # 8-byte Folded Reload + ld.d $a2, $sp, 408 # 8-byte Folded Reload sub.d $a2, $a2, $a1 - st.d $a2, $sp, 424 # 8-byte Folded Spill + st.d $a2, $sp, 408 # 8-byte Folded Spill pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 504 + ld.d $a0, $sp, 488 st.d $zero, $a0, 0 - ld.d $a0, $sp, 504 - st.d $a0, $sp, 512 -.LBB0_545: # in Loop: Header=BB0_528 Depth=2 + ld.d $a0, $sp, 488 + st.d $a0, $sp, 496 +.LBB0_544: # in Loop: Header=BB0_527 Depth=2 vld $vr8, $sp, 192 # 16-byte Folded Reload - ld.d $ra, $sp, 400 # 8-byte Folded Reload -.LBB0_546: # in Loop: Header=BB0_528 Depth=2 - ld.d $a0, $sp, 520 - bnez $s5, .LBB0_553 -# %bb.547: # in Loop: Header=BB0_528 Depth=2 - beqz $a0, .LBB0_553 -# %bb.548: # in Loop: Header=BB0_528 Depth=2 + ld.d $ra, $sp, 384 # 8-byte Folded Reload +.LBB0_545: # in Loop: Header=BB0_527 Depth=2 + ld.d $a0, $sp, 504 + bnez $s5, .LBB0_552 +# %bb.546: # in Loop: Header=BB0_527 Depth=2 + beqz $a0, .LBB0_552 +# %bb.547: # in Loop: Header=BB0_527 Depth=2 ld.bu $a1, $a0, 12 - bne $a1, $s3, .LBB0_555 -# %bb.549: # in Loop: Header=BB0_528 Depth=2 + bne $a1, $s3, .LBB0_554 +# %bb.548: # in Loop: Header=BB0_527 Depth=2 ori $s2, $zero, 1 ld.w $a1, $a0, 8 ld.w $a2, $s8, 0 add.d $a2, $a2, $a1 st.w $a2, $s8, 0 - ld.d $a2, $sp, 512 + ld.d $a2, $sp, 496 ld.d $s3, $a0, 0 - bne $a2, $a0, .LBB0_551 -# %bb.550: # in Loop: Header=BB0_528 Depth=2 - st.d $s3, $sp, 512 -.LBB0_551: # in Loop: Header=BB0_528 Depth=2 + bne $a2, $a0, .LBB0_550 +# %bb.549: # in Loop: Header=BB0_527 Depth=2 + st.d $s3, $sp, 496 +.LBB0_550: # in Loop: Header=BB0_527 Depth=2 sub.d $s1, $s1, $a1 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - st.d $s3, $sp, 520 + st.d $s3, $sp, 504 vld $vr8, $sp, 192 # 16-byte Folded Reload - ld.d $ra, $sp, 400 # 8-byte Folded Reload + ld.d $ra, $sp, 384 # 8-byte Folded Reload ld.w $a1, $s8, 0 ld.w $a2, $s8, 4 add.d $s7, $s1, $s7 - bnez $s3, .LBB0_554 -.LBB0_552: # in Loop: Header=BB0_528 Depth=2 + bnez $s3, .LBB0_553 +.LBB0_551: # in Loop: Header=BB0_527 Depth=2 move $a6, $zero move $a3, $zero - b .LBB0_527 + b .LBB0_526 .p2align 4, , 16 -.LBB0_553: # in Loop: Header=BB0_528 Depth=2 +.LBB0_552: # in Loop: Header=BB0_527 Depth=2 ori $s2, $zero, 1 move $s3, $a0 ld.w $a1, $s8, 0 ld.w $a2, $s8, 4 add.d $s7, $s1, $s7 - beqz $s3, .LBB0_552 -.LBB0_554: # in Loop: Header=BB0_528 Depth=2 + beqz $s3, .LBB0_551 +.LBB0_553: # in Loop: Header=BB0_527 Depth=2 move $a0, $s3 ori $s3, $zero, 1 - b .LBB0_556 + b .LBB0_555 .p2align 4, , 16 -.LBB0_555: # %.thread.i - # in Loop: Header=BB0_528 Depth=2 +.LBB0_554: # %.thread.i + # in Loop: Header=BB0_527 Depth=2 ld.w $a1, $s8, 0 ld.w $a2, $s8, 4 add.d $s7, $s1, $s7 -.LBB0_556: # %.lr.ph246.preheader.i - # in Loop: Header=BB0_528 Depth=2 +.LBB0_555: # %.lr.ph246.preheader.i + # in Loop: Header=BB0_527 Depth=2 ori $s1, $zero, 3 move $a3, $zero move $a6, $zero @@ -5025,59 +5005,59 @@ SIM4: # @SIM4 bstrpick.d $a5, $a1, 31, 0 add.d $a5, $fp, $a5 addi.d $a5, $a5, -1 - b .LBB0_559 + b .LBB0_558 .p2align 4, , 16 -.LBB0_557: # in Loop: Header=BB0_559 Depth=3 +.LBB0_556: # in Loop: Header=BB0_558 Depth=3 ld.w $a7, $a0, 8 add.w $a3, $a7, $a3 add.d $a6, $a7, $a6 add.d $a4, $a4, $a7 -.LBB0_558: # %.loopexit.i511 - # in Loop: Header=BB0_559 Depth=3 +.LBB0_557: # %.loopexit.i511 + # in Loop: Header=BB0_558 Depth=3 ld.d $a0, $a0, 0 - beqz $a0, .LBB0_526 -.LBB0_559: # %.lr.ph246.i + beqz $a0, .LBB0_525 +.LBB0_558: # %.lr.ph246.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_528 Depth=2 + # Parent Loop BB0_527 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_567 Depth 4 - # Child Loop BB0_570 Depth 4 + # Child Loop BB0_566 Depth 4 + # Child Loop BB0_569 Depth 4 ld.bu $a7, $a0, 12 - beq $a7, $s3, .LBB0_565 -# %bb.560: # %.lr.ph246.i - # in Loop: Header=BB0_559 Depth=3 + beq $a7, $s3, .LBB0_564 +# %bb.559: # %.lr.ph246.i + # in Loop: Header=BB0_558 Depth=3 ori $t0, $zero, 2 - beq $a7, $t0, .LBB0_557 -# %bb.561: # %.lr.ph246.i - # in Loop: Header=BB0_559 Depth=3 - bne $a7, $s1, .LBB0_558 -# %bb.562: # %.preheader.i510 - # in Loop: Header=BB0_559 Depth=3 + beq $a7, $t0, .LBB0_556 +# %bb.560: # %.lr.ph246.i + # in Loop: Header=BB0_558 Depth=3 + bne $a7, $s1, .LBB0_557 +# %bb.561: # %.preheader.i510 + # in Loop: Header=BB0_558 Depth=3 ld.w $a7, $a0, 8 - blez $a7, .LBB0_558 -# %bb.563: # %.lr.ph.i516.preheader - # in Loop: Header=BB0_559 Depth=3 + blez $a7, .LBB0_557 +# %bb.562: # %.lr.ph.i516.preheader + # in Loop: Header=BB0_558 Depth=3 ori $t0, $zero, 8 - bgeu $a7, $t0, .LBB0_566 -# %bb.564: # in Loop: Header=BB0_559 Depth=3 + bgeu $a7, $t0, .LBB0_565 +# %bb.563: # in Loop: Header=BB0_558 Depth=3 move $t2, $zero move $t0, $a4 move $t1, $a5 - b .LBB0_569 + b .LBB0_568 .p2align 4, , 16 -.LBB0_565: # in Loop: Header=BB0_559 Depth=3 +.LBB0_564: # in Loop: Header=BB0_558 Depth=3 ld.w $a7, $a0, 8 add.w $a3, $a7, $a3 add.d $a6, $a7, $a6 add.d $a5, $a5, $a7 - b .LBB0_558 -.LBB0_566: # %vector.ph - # in Loop: Header=BB0_559 Depth=3 + b .LBB0_557 +.LBB0_565: # %vector.ph + # in Loop: Header=BB0_558 Depth=3 bstrpick.d $t1, $a7, 30, 3 slli.d $t2, $t1, 3 alsl.d $t0, $t1, $a4, 3 alsl.d $t1, $t1, $a5, 3 - vld $vr2, $sp, 256 # 16-byte Folded Reload + vld $vr2, $sp, 240 # 16-byte Folded Reload vori.b $vr1, $vr2, 0 vinsgr2vr.w $vr1, $a6, 0 vori.b $vr0, $vr2, 0 @@ -5087,10 +5067,10 @@ SIM4: # @SIM4 move $t4, $t2 vori.b $vr3, $vr2, 0 .p2align 4, , 16 -.LBB0_567: # %vector.body +.LBB0_566: # %vector.body # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_528 Depth=2 - # Parent Loop BB0_559 Depth=3 + # Parent Loop BB0_527 Depth=2 + # Parent Loop BB0_558 Depth=3 # => This Inner Loop Header: Depth=4 ld.w $t5, $a6, -4 ld.w $t6, $a6, 0 @@ -5123,9 +5103,9 @@ SIM4: # @SIM4 addi.d $t4, $t4, -8 addi.d $a6, $a6, 8 addi.d $t3, $t3, 8 - bnez $t4, .LBB0_567 -# %bb.568: # %middle.block - # in Loop: Header=BB0_559 Depth=3 + bnez $t4, .LBB0_566 +# %bb.567: # %middle.block + # in Loop: Header=BB0_558 Depth=3 vadd.w $vr1, $vr3, $vr1 vhaddw.d.w $vr1, $vr1, $vr1 vhaddw.q.d $vr1, $vr1, $vr1 @@ -5134,15 +5114,15 @@ SIM4: # @SIM4 vhaddw.d.w $vr0, $vr0, $vr0 vhaddw.q.d $vr0, $vr0, $vr0 vpickve2gr.d $s6, $vr0, 0 - beq $t2, $a7, .LBB0_571 -.LBB0_569: # %.lr.ph.i516.preheader1166 - # in Loop: Header=BB0_559 Depth=3 + beq $t2, $a7, .LBB0_570 +.LBB0_568: # %.lr.ph.i516.preheader1166 + # in Loop: Header=BB0_558 Depth=3 sub.d $t2, $a7, $t2 .p2align 4, , 16 -.LBB0_570: # %.lr.ph.i516 +.LBB0_569: # %.lr.ph.i516 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_528 Depth=2 - # Parent Loop BB0_559 Depth=3 + # Parent Loop BB0_527 Depth=2 + # Parent Loop BB0_558 Depth=3 # => This Inner Loop Header: Depth=4 ld.bu $t3, $t1, 0 ld.bu $t4, $t0, 0 @@ -5154,48 +5134,47 @@ SIM4: # @SIM4 addi.d $t1, $t1, 1 addi.w $t2, $t2, -1 addi.d $t0, $t0, 1 - bnez $t2, .LBB0_570 -.LBB0_571: # %.loopexit.loopexit.i - # in Loop: Header=BB0_559 Depth=3 + bnez $t2, .LBB0_569 +.LBB0_570: # %.loopexit.loopexit.i + # in Loop: Header=BB0_558 Depth=3 addi.w $a7, $a7, -1 add.d $a5, $a5, $a7 addi.d $a5, $a5, 1 add.d $a4, $a4, $a7 addi.d $a4, $a4, 1 - b .LBB0_558 + b .LBB0_557 .p2align 4, , 16 -.LBB0_572: # %pluri_align.exit.thread +.LBB0_571: # %pluri_align.exit.thread # in Loop: Header=BB0_6 Depth=1 ld.d $s4, $sp, 152 # 8-byte Folded Reload - ld.d $s6, $sp, 80 # 8-byte Folded Reload + ld.d $s5, $sp, 80 # 8-byte Folded Reload + ld.d $s6, $sp, 264 # 8-byte Folded Reload ori $s8, $zero, 2 - ld.d $s5, $sp, 168 # 8-byte Folded Reload - ld.d $s7, $sp, 136 # 8-byte Folded Reload - ld.d $a1, $sp, 408 # 8-byte Folded Reload - b .LBB0_582 + ld.d $s7, $sp, 168 # 8-byte Folded Reload + ld.d $a1, $sp, 392 # 8-byte Folded Reload + b .LBB0_581 .p2align 4, , 16 -.LBB0_573: # in Loop: Header=BB0_6 Depth=1 +.LBB0_572: # in Loop: Header=BB0_6 Depth=1 move $s6, $zero ori $s8, $zero, 2 - ld.d $s5, $sp, 168 # 8-byte Folded Reload - ld.d $s7, $sp, 136 # 8-byte Folded Reload - b .LBB0_581 -.LBB0_574: # %._crit_edge260.i + ld.d $s7, $sp, 168 # 8-byte Folded Reload + b .LBB0_580 +.LBB0_573: # %._crit_edge260.i # in Loop: Header=BB0_6 Depth=1 move $s0, $s7 ld.w $a0, $s8, 4 addi.w $a0, $a0, -1 ori $fp, $zero, 1 - beqz $a0, .LBB0_577 -# %bb.575: # %._crit_edge260.i + ld.d $a1, $sp, 400 # 8-byte Folded Reload + beqz $a0, .LBB0_576 +# %bb.574: # %._crit_edge260.i # in Loop: Header=BB0_6 Depth=1 - ld.d $a1, $sp, 416 # 8-byte Folded Reload - beq $a0, $a1, .LBB0_577 -# %bb.576: # in Loop: Header=BB0_6 Depth=1 + beq $a0, $a1, .LBB0_576 +# %bb.575: # in Loop: Header=BB0_6 Depth=1 ori $a0, $zero, 40 pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 - ld.d $a2, $sp, 408 # 8-byte Folded Reload + ld.d $a2, $sp, 392 # 8-byte Folded Reload ld.d $a1, $a2, 0 st.d $a1, $a0, 0 st.d $a0, $a2, 0 @@ -5203,59 +5182,57 @@ SIM4: # @SIM4 st.w $a1, $a0, 16 ld.w $a2, $s8, 4 st.w $a2, $a0, 20 - ld.d $a3, $sp, 424 # 8-byte Folded Reload + ld.d $a3, $sp, 408 # 8-byte Folded Reload sub.d $a1, $a3, $a1 addi.d $a1, $a1, 1 st.w $a1, $a0, 24 - ld.d $a1, $sp, 384 # 8-byte Folded Reload + ld.d $a1, $sp, 368 # 8-byte Folded Reload sub.d $a1, $a1, $a2 addi.d $a1, $a1, 1 - ld.d $s5, $sp, 168 # 8-byte Folded Reload - ld.d $s7, $sp, 136 # 8-byte Folded Reload - b .LBB0_579 -.LBB0_577: # in Loop: Header=BB0_6 Depth=1 - ld.d $s5, $sp, 168 # 8-byte Folded Reload - ld.d $s7, $sp, 136 # 8-byte Folded Reload - ld.d $a1, $sp, 416 # 8-byte Folded Reload - beq $a0, $a1, .LBB0_580 -# %bb.578: # in Loop: Header=BB0_6 Depth=1 + ld.d $s7, $sp, 168 # 8-byte Folded Reload + b .LBB0_578 +.LBB0_576: # in Loop: Header=BB0_6 Depth=1 + ld.d $s7, $sp, 168 # 8-byte Folded Reload + beq $a0, $a1, .LBB0_579 +# %bb.577: # in Loop: Header=BB0_6 Depth=1 ori $a0, $zero, 40 pcaddu18i $ra, %call36(xmalloc) jirl $ra, $ra, 0 - ld.d $a2, $sp, 408 # 8-byte Folded Reload + ld.d $a2, $sp, 392 # 8-byte Folded Reload ld.d $a1, $a2, 0 st.d $a1, $a0, 0 st.d $a0, $a2, 0 ld.w $a1, $s8, 0 st.w $a1, $a0, 16 st.w $fp, $a0, 20 - ld.d $a2, $sp, 424 # 8-byte Folded Reload + ld.d $a2, $sp, 408 # 8-byte Folded Reload sub.d $a1, $a2, $a1 addi.d $a1, $a1, 1 st.w $a1, $a0, 24 - ld.d $a1, $sp, 384 # 8-byte Folded Reload -.LBB0_579: # %.thread305.sink.split.i + ld.d $a1, $sp, 368 # 8-byte Folded Reload +.LBB0_578: # %.thread305.sink.split.i # in Loop: Header=BB0_6 Depth=1 st.w $a1, $a0, 28 st.d $s4, $a0, 8 st.w $s0, $a0, 32 -.LBB0_580: # in Loop: Header=BB0_6 Depth=1 +.LBB0_579: # in Loop: Header=BB0_6 Depth=1 ori $s8, $zero, 2 - ld.d $s4, $sp, 408 # 8-byte Folded Reload -.LBB0_581: # %pluri_align.exit + ld.d $s4, $sp, 392 # 8-byte Folded Reload +.LBB0_580: # %pluri_align.exit # in Loop: Header=BB0_6 Depth=1 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.w $a0, $a0, 20 st.w $s6, $s4, 36 move $a1, $s4 ld.d $s4, $sp, 152 # 8-byte Folded Reload - ld.d $s6, $sp, 80 # 8-byte Folded Reload + ld.d $s5, $sp, 80 # 8-byte Folded Reload + ld.d $s6, $sp, 264 # 8-byte Folded Reload bnez $a0, .LBB0_5 -.LBB0_582: # in Loop: Header=BB0_6 Depth=1 +.LBB0_581: # in Loop: Header=BB0_6 Depth=1 ld.d $fp, $a1, 0 beqz $fp, .LBB0_4 .p2align 4, , 16 -.LBB0_583: # %.lr.ph.i519 +.LBB0_582: # %.lr.ph.i519 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $s0, $fp, 0 @@ -5266,18 +5243,18 @@ SIM4: # @SIM4 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 move $fp, $s0 - bnez $s0, .LBB0_583 + bnez $s0, .LBB0_582 b .LBB0_4 -.LBB0_584: # %.critedge2.split.loop.exit385.i +.LBB0_583: # %.critedge2.split.loop.exit385.i # in Loop: Header=BB0_6 Depth=1 move $s5, $s1 -.LBB0_585: # %.critedge2.i.preheader +.LBB0_584: # %.critedge2.i.preheader # in Loop: Header=BB0_6 Depth=1 ori $a3, $zero, 1 ori $s8, $zero, 2 - ld.d $a4, $sp, 320 # 8-byte Folded Reload + ld.d $a4, $sp, 304 # 8-byte Folded Reload .p2align 4, , 16 -.LBB0_586: # %.critedge2.i +.LBB0_585: # %.critedge2.i # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 addi.w $a5, $s5, 0 @@ -5286,18 +5263,18 @@ SIM4: # @SIM4 ldx.w $a2, $s2, $a2 ld.w $a1, $a1, -4 sub.w $a1, $a2, $a1 - blt $s8, $a1, .LBB0_589 -# %bb.587: # in Loop: Header=BB0_586 Depth=2 + blt $s8, $a1, .LBB0_588 +# %bb.586: # in Loop: Header=BB0_585 Depth=2 addi.d $s5, $s5, -1 - blt $a3, $a5, .LBB0_586 -# %bb.588: # in Loop: Header=BB0_6 Depth=1 + blt $a3, $a5, .LBB0_585 +# %bb.587: # in Loop: Header=BB0_6 Depth=1 move $s5, $zero -.LBB0_589: # %.critedge8.i +.LBB0_588: # %.critedge8.i # in Loop: Header=BB0_6 Depth=1 addi.w $a1, $s5, 0 slli.d $a1, $a1, 2 ldx.w $s0, $s2, $a1 - ld.d $s1, $sp, 360 # 8-byte Folded Reload + ld.d $s1, $sp, 344 # 8-byte Folded Reload ldx.w $a2, $s1, $a1 sub.d $a1, $a4, $s4 add.d $a1, $a1, $s0 @@ -5308,27 +5285,29 @@ SIM4: # @SIM4 move $a0, $s1 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 move $s4, $s0 - b .LBB0_208 -.LBB0_590: # %._crit_edge683.loopexit - ld.d $fp, $sp, 472 - b .LBB0_592 -.LBB0_591: + b .LBB0_207 +.LBB0_589: # %._crit_edge683.loopexit + ld.d $fp, $sp, 456 + b .LBB0_591 +.LBB0_590: move $fp, $zero -.LBB0_592: # %._crit_edge683 - ld.d $a0, $sp, 488 +.LBB0_591: # %._crit_edge683 + ld.d $a0, $sp, 472 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 move $a0, $fp pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 -.LBB0_593: +.LBB0_592: + fld.d $fs4, $sp, 512 # 8-byte Folded Reload + fld.d $fs3, $sp, 520 # 8-byte Folded Reload fld.d $fs2, $sp, 528 # 8-byte Folded Reload fld.d $fs1, $sp, 536 # 8-byte Folded Reload fld.d $fs0, $sp, 544 # 8-byte Folded Reload diff --git a/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lbaselib.s b/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lbaselib.s index db3b1a9e..c0788281 100644 --- a/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lbaselib.s +++ b/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lbaselib.s @@ -418,12 +418,7 @@ luaB_assert: # @luaB_assert .Lfunc_end6: .size luaB_assert, .Lfunc_end6-luaB_assert # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function luaB_collectgarbage -.LCPI7_0: - .dword 0x3f50000000000000 # double 9.765625E-4 - .text - .p2align 5 + .p2align 5 # -- Begin function luaB_collectgarbage .type luaB_collectgarbage,@function luaB_collectgarbage: # @luaB_collectgarbage # %bb.0: @@ -469,12 +464,12 @@ luaB_collectgarbage: # @luaB_collectgarbage pcaddu18i $ra, %call36(lua_gc) jirl $ra, $ra, 0 movgr2fr.w $fa0, $s0 - pcalau12i $a1, %pc_hi20(.LCPI7_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI7_0) ffint.d.w $fa0, $fa0 - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fmul.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + lu52i.d $a0, $zero, 1013 + movgr2fr.d $fa2, $a0 + fmul.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa1, $fa0 b .LBB7_5 .LBB7_3: @@ -1327,12 +1322,7 @@ luaB_setmetatable: # @luaB_setmetatable .Lfunc_end23: .size luaB_setmetatable, .Lfunc_end23-luaB_setmetatable # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function luaB_tonumber -.LCPI24_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .p2align 5 + .p2align 5 # -- Begin function luaB_tonumber .type luaB_tonumber,@function luaB_tonumber: # @luaB_tonumber # %bb.0: @@ -1414,12 +1404,13 @@ luaB_tonumber: # @luaB_tonumber b .LBB24_12 .LBB24_10: # %.critedge srli.d $a0, $s0, 32 - pcalau12i $a1, %pc_hi20(.LCPI24_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI24_0) lu52i.d $a1, $zero, 1107 or $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 movgr2fr.d $fa1, $a0 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a0, 275200 bstrins.d $s0, $a0, 63, 32 movgr2fr.d $fa1, $s0 diff --git a/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lmathlib.s b/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lmathlib.s index 5680a519..2c05fe50 100644 --- a/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lmathlib.s +++ b/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lmathlib.s @@ -1,12 +1,6 @@ .file "lmathlib.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function luaopen_math -.LCPI0_0: - .dword 0x400921fb54442d18 # double 3.1415926535897931 -.LCPI0_1: - .dword 0x7ff0000000000000 # double +Inf .text - .globl luaopen_math + .globl luaopen_math # -- Begin function luaopen_math .p2align 5 .type luaopen_math,@function luaopen_math: # @luaopen_math @@ -23,8 +17,11 @@ luaopen_math: # @luaopen_math move $a0, $fp pcaddu18i $ra, %call36(luaL_register) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa0, $a0 move $a0, $fp pcaddu18i $ra, %call36(lua_pushnumber) jirl $ra, $ra, 0 @@ -35,8 +32,8 @@ luaopen_math: # @luaopen_math move $a1, $s0 pcaddu18i $ra, %call36(lua_setfield) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_1) + lu52i.d $a0, $zero, 2047 + movgr2fr.d $fa0, $a0 move $a0, $fp pcaddu18i $ra, %call36(lua_pushnumber) jirl $ra, $ra, 0 @@ -269,12 +266,7 @@ math_cos: # @math_cos .Lfunc_end8: .size math_cos, .Lfunc_end8-math_cos # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function math_deg -.LCPI9_0: - .dword 0x3f91df46a2529d39 # double 0.017453292519943295 - .text - .p2align 5 + .p2align 5 # -- Begin function math_deg .type math_deg,@function math_deg: # @math_deg # %bb.0: @@ -285,8 +277,11 @@ math_deg: # @math_deg ori $a1, $zero, 1 pcaddu18i $ra, %call36(luaL_checknumber) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI9_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI9_0) + lu12i.w $a0, -383703 + ori $a0, $a0, 3385 + lu32i.d $a0, 122694 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 move $a0, $fp pcaddu18i $ra, %call36(lua_pushnumber) @@ -657,12 +652,7 @@ math_pow: # @math_pow .Lfunc_end20: .size math_pow, .Lfunc_end20-math_pow # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function math_rad -.LCPI21_0: - .dword 0x3f91df46a2529d39 # double 0.017453292519943295 - .text - .p2align 5 + .p2align 5 # -- Begin function math_rad .type math_rad,@function math_rad: # @math_rad # %bb.0: @@ -673,8 +663,11 @@ math_rad: # @math_rad ori $a1, $zero, 1 pcaddu18i $ra, %call36(luaL_checknumber) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI21_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI21_0) + lu12i.w $a0, -383703 + ori $a0, $a0, 3385 + lu32i.d $a0, 122694 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 move $a0, $fp pcaddu18i $ra, %call36(lua_pushnumber) @@ -687,12 +680,7 @@ math_rad: # @math_rad .Lfunc_end21: .size math_rad, .Lfunc_end21-math_rad # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function math_random -.LCPI22_0: - .dword 0x41dfffffffc00000 # double 2147483647 - .text - .p2align 5 + .p2align 5 # -- Begin function math_random .type math_random,@function math_random: # @math_random # %bb.0: @@ -713,12 +701,13 @@ math_random: # @math_random add.d $a1, $a1, $a2 slli.d $a2, $a1, 31 add.d $a1, $a2, $a1 - pcalau12i $a2, %pc_hi20(.LCPI22_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI22_0) add.d $a0, $a0, $a1 - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fdiv.d $fs0, $fa1, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fa1, $a0 + fdiv.d $fs0, $fa0, $fa1 move $a0, $fp pcaddu18i $ra, %call36(lua_gettop) jirl $ra, $ra, 0 diff --git a/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lobject.s b/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lobject.s index 154ecd27..c25c7e2b 100644 --- a/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lobject.s +++ b/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lobject.s @@ -139,12 +139,8 @@ luaO_rawequalObj: # @luaO_rawequalObj .word .LBB3_5-.LJTI3_0 .word .LBB3_6-.LJTI3_0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function luaO_str2d -.LCPI4_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 .text - .hidden luaO_str2d + .hidden luaO_str2d # -- Begin function luaO_str2d .globl luaO_str2d .p2align 5 .type luaO_str2d,@function @@ -174,12 +170,13 @@ luaO_str2d: # @luaO_str2d pcaddu18i $ra, %call36(strtoul) jirl $ra, $ra, 0 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI4_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 ld.d $s0, $sp, 0 diff --git a/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/loslib.s b/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/loslib.s index 3c27cc4e..6c650a4a 100644 --- a/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/loslib.s +++ b/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/loslib.s @@ -20,12 +20,7 @@ luaopen_os: # @luaopen_os .Lfunc_end0: .size luaopen_os, .Lfunc_end0-luaopen_os # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function os_clock -.LCPI1_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .p2align 5 + .p2align 5 # -- Begin function os_clock .type os_clock,@function os_clock: # @os_clock # %bb.0: @@ -35,11 +30,13 @@ os_clock: # @os_clock move $fp, $a0 pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI1_0) + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 move $a0, $fp pcaddu18i $ra, %call36(lua_pushnumber) jirl $ra, $ra, 0 diff --git a/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lstrlib.s b/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lstrlib.s index 8a942008..c089a52f 100644 --- a/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lstrlib.s +++ b/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lstrlib.s @@ -322,12 +322,7 @@ str_find: # @str_find .Lfunc_end4: .size str_find, .Lfunc_end4-str_find # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function str_format -.LCPI5_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 - .text - .p2align 5 + .p2align 5 # -- Begin function str_format .type str_format,@function str_format: # @str_format # %bb.0: @@ -343,24 +338,25 @@ str_format: # @str_format st.d $s6, $sp, 1960 # 8-byte Folded Spill st.d $s7, $sp, 1952 # 8-byte Folded Spill st.d $s8, $sp, 1944 # 8-byte Folded Spill + fst.d $fs0, $sp, 1936 # 8-byte Folded Spill lu12i.w $a1, 1 ori $a1, $a1, 2752 sub.d $sp, $sp, $a1 move $fp, $a0 ori $a1, $zero, 1 lu12i.w $a2, 2 - ori $a2, $a2, 584 + ori $a2, $a2, 576 add.d $a2, $sp, $a2 ori $s5, $zero, 1 pcaddu18i $ra, %call36(luaL_checklstring) jirl $ra, $ra, 0 lu12i.w $a1, 2 - ori $a1, $a1, 584 + ori $a1, $a1, 576 add.d $a1, $sp, $a1 ld.d $s0, $a1, 0 move $s3, $a0 - addi.d $a1, $sp, 560 - addi.d $s1, $sp, 560 + addi.d $a1, $sp, 552 + addi.d $s1, $sp, 552 move $a0, $fp pcaddu18i $ra, %call36(luaL_buffinit) jirl $ra, $ra, 0 @@ -373,7 +369,9 @@ str_format: # @str_format ori $s8, $zero, 37 pcalau12i $a0, %pc_hi20(.L.str.30) addi.d $a0, $a0, %pc_lo12(.L.str.30) - st.d $a0, $sp, 16 # 8-byte Folded Spill + st.d $a0, $sp, 8 # 8-byte Folded Spill + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 ori $s1, $zero, 63 ori $s2, $zero, 1 b .LBB5_4 @@ -381,7 +379,7 @@ str_format: # @str_format .LBB5_2: # in Loop: Header=BB5_4 Depth=1 addi.d $s3, $s3, 1 addi.d $a2, $a1, 1 - st.d $a2, $sp, 560 + st.d $a2, $sp, 552 st.b $a0, $a1, 0 .LBB5_3: # %.backedge # in Loop: Header=BB5_4 Depth=1 @@ -396,31 +394,31 @@ str_format: # @str_format addi.d $s4, $s3, 1 bne $s0, $s8, .LBB5_11 # %bb.6: # in Loop: Header=BB5_4 Depth=1 - ld.d $a0, $sp, 560 + ld.d $a0, $sp, 552 ori $a1, $zero, 37 bltu $a0, $s7, .LBB5_8 # %bb.7: # in Loop: Header=BB5_4 Depth=1 - addi.d $a0, $sp, 560 + addi.d $a0, $sp, 552 pcaddu18i $ra, %call36(luaL_prepbuffer) jirl $ra, $ra, 0 ld.bu $a1, $s4, 0 - ld.d $a0, $sp, 560 + ld.d $a0, $sp, 552 .LBB5_8: # in Loop: Header=BB5_4 Depth=1 addi.d $s3, $s3, 2 addi.d $a2, $a0, 1 - st.d $a2, $sp, 560 + st.d $a2, $sp, 552 st.b $a1, $a0, 0 b .LBB5_3 .p2align 4, , 16 .LBB5_9: # in Loop: Header=BB5_4 Depth=1 - ld.d $a1, $sp, 560 + ld.d $a1, $sp, 552 bltu $a1, $s7, .LBB5_2 # %bb.10: # in Loop: Header=BB5_4 Depth=1 - addi.d $a0, $sp, 560 + addi.d $a0, $sp, 552 pcaddu18i $ra, %call36(luaL_prepbuffer) jirl $ra, $ra, 0 ld.bu $a0, $s3, 0 - ld.d $a1, $sp, 560 + ld.d $a1, $sp, 552 b .LBB5_2 .p2align 4, , 16 .LBB5_11: # in Loop: Header=BB5_4 Depth=1 @@ -459,7 +457,7 @@ str_format: # @str_format bltu $a0, $a1, .LBB5_19 .LBB5_18: # in Loop: Header=BB5_4 Depth=1 move $a0, $fp - ld.d $a1, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 8 # 8-byte Folded Reload pcaddu18i $ra, %call36(luaL_error) jirl $ra, $ra, 0 ld.bu $s0, $s3, 0 @@ -512,10 +510,10 @@ str_format: # @str_format .LBB5_23: # %scanformat.exit # in Loop: Header=BB5_4 Depth=1 move $s8, $fp - st.b $s3, $sp, 542 + st.b $s3, $sp, 534 sub.d $a0, $s0, $s4 addi.d $s3, $a0, 1 - addi.d $fp, $sp, 543 + addi.d $fp, $sp, 535 move $a0, $fp move $a1, $s4 move $a2, $s3 @@ -550,10 +548,10 @@ str_format: # @str_format bltu $a0, $a1, .LBB5_19 b .LBB5_18 .LBB5_27: # in Loop: Header=BB5_4 Depth=1 - addi.d $a0, $sp, 542 + addi.d $a0, $sp, 534 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - addi.d $a3, $sp, 542 + addi.d $a3, $sp, 534 add.d $a1, $a3, $a0 ld.b $a2, $a1, -1 ori $a4, $zero, 108 @@ -565,14 +563,12 @@ str_format: # @str_format move $a1, $s2 pcaddu18i $ra, %call36(luaL_checknumber) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI5_0) - fcmp.clt.d $fcc0, $fa0, $fa1 - ftintrz.l.d $fa2, $fa0 - movfr2gr.d $a0, $fa2 + fcmp.clt.d $fcc0, $fa0, $fs0 + ftintrz.l.d $fa1, $fa0 + movfr2gr.d $a0, $fa1 movcf2gr $a1, $fcc0 maskeqz $a0, $a0, $a1 - fsub.d $fa0, $fa0, $fa1 + fsub.d $fa0, $fa0, $fs0 ftintrz.l.d $fa0, $fa0 movfr2gr.d $a2, $fa0 lu52i.d $a3, $zero, -2048 @@ -581,10 +577,10 @@ str_format: # @str_format or $a2, $a0, $a1 b .LBB5_30 .LBB5_28: # in Loop: Header=BB5_4 Depth=1 - addi.d $a0, $sp, 542 + addi.d $a0, $sp, 534 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - addi.d $a3, $sp, 542 + addi.d $a3, $sp, 534 add.d $a1, $a3, $a0 ld.b $a2, $a1, -1 ori $a4, $zero, 108 @@ -602,25 +598,25 @@ str_format: # @str_format movfr2gr.d $a2, $fa0 .LBB5_30: # %.thread49 # in Loop: Header=BB5_4 Depth=1 - addi.d $a0, $sp, 30 - addi.d $a1, $sp, 542 + addi.d $a0, $sp, 22 + addi.d $a1, $sp, 534 .LBB5_31: # %.thread49 # in Loop: Header=BB5_4 Depth=1 pcaddu18i $ra, %call36(sprintf) jirl $ra, $ra, 0 ori $s8, $zero, 37 - addi.d $a0, $sp, 30 + addi.d $a0, $sp, 22 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 move $a2, $a0 - addi.d $a0, $sp, 560 - addi.d $a1, $sp, 30 + addi.d $a0, $sp, 552 + addi.d $a1, $sp, 22 pcaddu18i $ra, %call36(luaL_addlstring) jirl $ra, $ra, 0 b .LBB5_3 .LBB5_32: # in Loop: Header=BB5_4 Depth=1 lu12i.w $a0, 2 - ori $a0, $a0, 592 + ori $a0, $a0, 584 add.d $a2, $sp, $a0 move $fp, $s8 move $a0, $s8 @@ -628,14 +624,14 @@ str_format: # @str_format pcaddu18i $ra, %call36(luaL_checklstring) jirl $ra, $ra, 0 move $s4, $a0 - addi.d $a0, $sp, 542 + addi.d $a0, $sp, 534 ori $a1, $zero, 46 pcaddu18i $ra, %call36(strchr) jirl $ra, $ra, 0 bnez $a0, .LBB5_56 # %bb.33: # in Loop: Header=BB5_4 Depth=1 lu12i.w $a0, 2 - ori $a0, $a0, 592 + ori $a0, $a0, 584 add.d $a0, $sp, $a0 ld.d $a0, $a0, 0 ori $a1, $zero, 99 @@ -645,57 +641,57 @@ str_format: # @str_format move $a1, $s2 pcaddu18i $ra, %call36(lua_pushvalue) jirl $ra, $ra, 0 - addi.d $a0, $sp, 560 + addi.d $a0, $sp, 552 pcaddu18i $ra, %call36(luaL_addvalue) jirl $ra, $ra, 0 ori $s8, $zero, 37 b .LBB5_3 .LBB5_35: # in Loop: Header=BB5_4 Depth=1 lu12i.w $a0, 2 - ori $a0, $a0, 592 + ori $a0, $a0, 584 add.d $a2, $sp, $a0 move $fp, $s8 move $a0, $s8 move $a1, $s2 pcaddu18i $ra, %call36(luaL_checklstring) jirl $ra, $ra, 0 - ld.d $a1, $sp, 560 + ld.d $a1, $sp, 552 move $s4, $a0 bltu $a1, $s7, .LBB5_37 # %bb.36: # in Loop: Header=BB5_4 Depth=1 - addi.d $a0, $sp, 560 + addi.d $a0, $sp, 552 pcaddu18i $ra, %call36(luaL_prepbuffer) jirl $ra, $ra, 0 - ld.d $a1, $sp, 560 + ld.d $a1, $sp, 552 .LBB5_37: # in Loop: Header=BB5_4 Depth=1 addi.d $a0, $a1, 1 - st.d $a0, $sp, 560 + st.d $a0, $sp, 552 ori $a0, $zero, 34 st.b $a0, $a1, 0 lu12i.w $a0, 2 - ori $a0, $a0, 592 + ori $a0, $a0, 584 add.d $a0, $sp, $a0 ld.d $a0, $a0, 0 addi.d $a1, $a0, -1 lu12i.w $a2, 2 - ori $a2, $a2, 592 + ori $a2, $a2, 584 add.d $a2, $sp, $a2 st.d $a1, $a2, 0 ori $s8, $zero, 37 bnez $a0, .LBB5_43 .LBB5_38: # %._crit_edge.i # in Loop: Header=BB5_4 Depth=1 - ld.d $a0, $sp, 560 + ld.d $a0, $sp, 552 bltu $a0, $s7, .LBB5_40 # %bb.39: # in Loop: Header=BB5_4 Depth=1 - addi.d $a0, $sp, 560 + addi.d $a0, $sp, 552 pcaddu18i $ra, %call36(luaL_prepbuffer) jirl $ra, $ra, 0 - ld.d $a0, $sp, 560 + ld.d $a0, $sp, 552 .LBB5_40: # %addquoted.exit # in Loop: Header=BB5_4 Depth=1 addi.d $a1, $a0, 1 - st.d $a1, $sp, 560 + st.d $a1, $sp, 552 ori $a1, $zero, 34 st.b $a1, $a0, 0 b .LBB5_3 @@ -703,17 +699,17 @@ str_format: # @str_format .LBB5_41: # in Loop: Header=BB5_43 Depth=2 ld.b $a1, $s4, 0 addi.d $a2, $a0, 1 - st.d $a2, $sp, 560 + st.d $a2, $sp, 552 st.b $a1, $a0, 0 .LBB5_42: # in Loop: Header=BB5_43 Depth=2 lu12i.w $a0, 2 - ori $a0, $a0, 592 + ori $a0, $a0, 584 add.d $a0, $sp, $a0 ld.d $a0, $a0, 0 addi.d $s4, $s4, 1 addi.d $a1, $a0, -1 lu12i.w $a2, 2 - ori $a2, $a2, 592 + ori $a2, $a2, 584 add.d $a2, $sp, $a2 st.d $a1, $a2, 0 beqz $a0, .LBB5_38 @@ -734,7 +730,7 @@ str_format: # @str_format .LBB5_45: # in Loop: Header=BB5_43 Depth=2 pcalau12i $a0, %pc_hi20(.L.str.34) addi.d $a1, $a0, %pc_lo12(.L.str.34) - addi.d $a0, $sp, 560 + addi.d $a0, $sp, 552 ori $a2, $zero, 4 pcaddu18i $ra, %call36(luaL_addlstring) jirl $ra, $ra, 0 @@ -745,44 +741,44 @@ str_format: # @str_format ori $a1, $zero, 92 bne $a0, $a1, .LBB5_51 .LBB5_47: # in Loop: Header=BB5_43 Depth=2 - ld.d $a0, $sp, 560 + ld.d $a0, $sp, 552 bltu $a0, $s7, .LBB5_49 # %bb.48: # in Loop: Header=BB5_43 Depth=2 - addi.d $a0, $sp, 560 + addi.d $a0, $sp, 552 pcaddu18i $ra, %call36(luaL_prepbuffer) jirl $ra, $ra, 0 - ld.d $a0, $sp, 560 + ld.d $a0, $sp, 552 .LBB5_49: # in Loop: Header=BB5_43 Depth=2 addi.d $a1, $a0, 1 - st.d $a1, $sp, 560 + st.d $a1, $sp, 552 ori $a1, $zero, 92 st.b $a1, $a0, 0 - ld.d $a0, $sp, 560 + ld.d $a0, $sp, 552 bltu $a0, $s7, .LBB5_41 # %bb.50: # in Loop: Header=BB5_43 Depth=2 - addi.d $a0, $sp, 560 + addi.d $a0, $sp, 552 pcaddu18i $ra, %call36(luaL_prepbuffer) jirl $ra, $ra, 0 - ld.d $a0, $sp, 560 + ld.d $a0, $sp, 552 b .LBB5_41 .LBB5_51: # in Loop: Header=BB5_43 Depth=2 - ld.d $a1, $sp, 560 + ld.d $a1, $sp, 552 bltu $a1, $s7, .LBB5_53 # %bb.52: # in Loop: Header=BB5_43 Depth=2 - addi.d $a0, $sp, 560 + addi.d $a0, $sp, 552 pcaddu18i $ra, %call36(luaL_prepbuffer) jirl $ra, $ra, 0 ld.bu $a0, $s4, 0 - ld.d $a1, $sp, 560 + ld.d $a1, $sp, 552 .LBB5_53: # in Loop: Header=BB5_43 Depth=2 addi.d $a2, $a1, 1 - st.d $a2, $sp, 560 + st.d $a2, $sp, 552 st.b $a0, $a1, 0 b .LBB5_42 .LBB5_54: # in Loop: Header=BB5_43 Depth=2 pcalau12i $a0, %pc_hi20(.L.str.33) addi.d $a1, $a0, %pc_lo12(.L.str.33) - addi.d $a0, $sp, 560 + addi.d $a0, $sp, 552 ori $a2, $zero, 2 pcaddu18i $ra, %call36(luaL_addlstring) jirl $ra, $ra, 0 @@ -798,12 +794,12 @@ str_format: # @str_format b .LBB5_30 .LBB5_56: # %.thread # in Loop: Header=BB5_4 Depth=1 - addi.d $a0, $sp, 30 - addi.d $a1, $sp, 542 + addi.d $a0, $sp, 22 + addi.d $a1, $sp, 534 move $a2, $s4 b .LBB5_31 .LBB5_57: # %._crit_edge - addi.d $a0, $sp, 560 + addi.d $a0, $sp, 552 pcaddu18i $ra, %call36(luaL_pushresult) jirl $ra, $ra, 0 ori $a0, $zero, 1 @@ -811,6 +807,7 @@ str_format: # @str_format lu12i.w $a1, 1 ori $a1, $a1, 2752 add.d $sp, $sp, $a1 + fld.d $fs0, $sp, 1936 # 8-byte Folded Reload ld.d $s8, $sp, 1944 # 8-byte Folded Reload ld.d $s7, $sp, 1952 # 8-byte Folded Reload ld.d $s6, $sp, 1960 # 8-byte Folded Reload diff --git a/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lvm.s b/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lvm.s index 4c90e1bd..b881abcb 100644 --- a/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lvm.s +++ b/results/MultiSource/Applications/lua/CMakeFiles/lua.dir/lvm.s @@ -1207,12 +1207,7 @@ call_binTM: # @call_binTM .Lfunc_end8: .size call_binTM, .Lfunc_end8-call_binTM # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function luaV_execute -.LCPI9_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .hidden luaV_execute + .hidden luaV_execute # -- Begin function luaV_execute .globl luaV_execute .p2align 5 .type luaV_execute,@function @@ -2531,12 +2526,13 @@ luaV_execute: # @luaV_execute ld.d $a0, $s4, 0 ld.d $a0, $a0, 16 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI9_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI9_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa1, $a0 diff --git a/results/MultiSource/Applications/minisat/CMakeFiles/minisat.dir/Main.s b/results/MultiSource/Applications/minisat/CMakeFiles/minisat.dir/Main.s index 0905a36d..a6a7cd82 100644 --- a/results/MultiSource/Applications/minisat/CMakeFiles/minisat.dir/Main.s +++ b/results/MultiSource/Applications/minisat/CMakeFiles/minisat.dir/Main.s @@ -1,14 +1,6 @@ .file "Main.cpp" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z10printStatsR6Solver -.LCPI0_0: - .word 0x42c80000 # float 100 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 .text - .globl _Z10printStatsR6Solver + .globl _Z10printStatsR6Solver # -- Begin function _Z10printStatsR6Solver .p2align 5 .type _Z10printStatsR6Solver,@function _Z10printStatsR6Solver: # @_Z10printStatsR6Solver @@ -95,12 +87,12 @@ _Z10printStatsR6Solver: # @_Z10printStatsR6Solver ffint.s.l $fa0, $fa0 fadd.s $fa0, $fa0, $fa0 slti $a3, $a1, 0 - pcalau12i $a4, %pc_hi20(.LCPI0_0) - fld.s $fa1, $a4, %pc_lo12(.LCPI0_0) - movgr2fr.d $fa2, $a1 - ffint.s.l $fa2, $fa2 + movgr2fr.d $fa1, $a1 + ffint.s.l $fa1, $fa1 movgr2cf $fcc0, $a3 - fsel $fa0, $fa2, $fa0, $fcc0 + fsel $fa0, $fa1, $fa0, $fcc0 + lu12i.w $a1, 273536 + movgr2fr.w $fa1, $a1 fmul.s $fa0, $fa0, $fa1 srli.d $a1, $a2, 1 andi $a3, $a2, 1 @@ -145,27 +137,28 @@ _Z10printStatsR6Solver: # @_Z10printStatsR6Solver ori $a4, $zero, 100 mul.d $a3, $a3, $a4 srli.d $a4, $a3, 32 - pcalau12i $a5, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a5, %pc_lo12(.LCPI0_1) lu52i.d $a5, $zero, 1107 or $a4, $a4, $a5 + movgr2fr.d $fa0, $a4 + lu12i.w $a4, 256 + lu52i.d $a4, $a4, 1107 movgr2fr.d $fa1, $a4 - fsub.d $fa1, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 lu52i.d $a4, $zero, 1075 or $a3, $a3, $a4 movgr2fr.d $fa2, $a3 - fadd.d $fa1, $fa2, $fa1 + fadd.d $fa0, $fa2, $fa0 srli.d $a3, $a1, 32 or $a3, $a3, $a5 movgr2fr.d $fa2, $a3 - fsub.d $fa0, $fa2, $fa0 + fsub.d $fa1, $fa2, $fa1 lu12i.w $a3, 275200 bstrins.d $a1, $a3, 63, 32 movgr2fr.d $fa2, $a1 - fadd.d $fa0, $fa2, $fa0 - fdiv.d $fa0, $fa1, $fa0 + fadd.d $fa1, $fa2, $fa1 + fdiv.d $fa0, $fa0, $fa1 movfr2gr.d $a3, $fa0 pcalau12i $a1, %pc_hi20(.L.str.8) addi.d $a1, $a1, %pc_lo12(.L.str.8) diff --git a/results/MultiSource/Applications/minisat/CMakeFiles/minisat.dir/Solver.s b/results/MultiSource/Applications/minisat/CMakeFiles/minisat.dir/Solver.s index 7e74c0f6..36ffacd5 100644 --- a/results/MultiSource/Applications/minisat/CMakeFiles/minisat.dir/Solver.s +++ b/results/MultiSource/Applications/minisat/CMakeFiles/minisat.dir/Solver.s @@ -1713,43 +1713,37 @@ _ZN6Solver11cancelUntilEi: # @_ZN6Solver11cancelUntilEi .size _ZN6Solver11cancelUntilEi, .Lfunc_end10-_ZN6Solver11cancelUntilEi .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN6Solver13pickBranchLitEid -.LCPI11_0: - .dword 0x413534e400000000 # double 1389796 -.LCPI11_1: - .dword 0x41dfffffffc00000 # double 2147483647 - .text - .globl _ZN6Solver13pickBranchLitEid + .globl _ZN6Solver13pickBranchLitEid # -- Begin function _ZN6Solver13pickBranchLitEid .p2align 5 .type _ZN6Solver13pickBranchLitEid,@function _ZN6Solver13pickBranchLitEid: # @_ZN6Solver13pickBranchLitEid .cfi_startproc # %bb.0: - addi.d $sp, $sp, -64 - .cfi_def_cfa_offset 64 - st.d $ra, $sp, 56 # 8-byte Folded Spill - st.d $fp, $sp, 48 # 8-byte Folded Spill - st.d $s0, $sp, 40 # 8-byte Folded Spill - st.d $s1, $sp, 32 # 8-byte Folded Spill - st.d $s2, $sp, 24 # 8-byte Folded Spill - fst.d $fs0, $sp, 16 # 8-byte Folded Spill - fst.d $fs1, $sp, 8 # 8-byte Folded Spill + addi.d $sp, $sp, -48 + .cfi_def_cfa_offset 48 + st.d $ra, $sp, 40 # 8-byte Folded Spill + st.d $fp, $sp, 32 # 8-byte Folded Spill + st.d $s0, $sp, 24 # 8-byte Folded Spill + st.d $s1, $sp, 16 # 8-byte Folded Spill + st.d $s2, $sp, 8 # 8-byte Folded Spill + fst.d $fs0, $sp, 0 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 .cfi_offset 24, -32 .cfi_offset 25, -40 .cfi_offset 56, -48 - .cfi_offset 57, -56 + move $s0, $a1 move $fp, $a0 fld.d $fa1, $a0, 448 - pcalau12i $a0, %pc_hi20(.LCPI11_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI11_0) - pcalau12i $a0, %pc_hi20(.LCPI11_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI11_1) - move $s0, $a1 - fmul.d $fa1, $fa1, $fs1 + ori $a0, $zero, 0 + lu32i.d $a0, 341220 + lu52i.d $a0, $a0, 1043 + movgr2fr.d $fa2, $a0 + fmul.d $fa1, $fa1, $fa2 + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs0, $a0 fdiv.d $fa2, $fa1, $fs0 ftintrz.w.d $fa2, $fa2 movfr2gr.s $a0, $fa2 @@ -1768,7 +1762,11 @@ _ZN6Solver13pickBranchLitEid: # @_ZN6Solver13pickBranchLitEid move $a0, $s2 beqz $a1, .LBB11_5 # %bb.2: - fmul.d $fa0, $fa1, $fs1 + ori $a0, $zero, 0 + lu32i.d $a0, 341220 + lu52i.d $a0, $a0, 1043 + movgr2fr.d $fa0, $a0 + fmul.d $fa0, $fa1, $fa0 fdiv.d $fa1, $fa0, $fs0 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a0, $fa1 @@ -1839,7 +1837,11 @@ _ZN6Solver13pickBranchLitEid: # @_ZN6Solver13pickBranchLitEid b .LBB11_17 .LBB11_16: fld.d $fa0, $fp, 448 - fmul.d $fa0, $fa0, $fs1 + ori $a1, $zero, 0 + lu32i.d $a1, 341220 + lu52i.d $a1, $a1, 1043 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 fdiv.d $fa1, $fa0, $fs0 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a1, $fa1 @@ -1864,14 +1866,13 @@ _ZN6Solver13pickBranchLitEid: # @_ZN6Solver13pickBranchLitEid maskeqz $a1, $a1, $a2 or $a0, $a1, $a0 bstrpick.d $a0, $a0, 31, 0 - fld.d $fs1, $sp, 8 # 8-byte Folded Reload - fld.d $fs0, $sp, 16 # 8-byte Folded Reload - ld.d $s2, $sp, 24 # 8-byte Folded Reload - ld.d $s1, $sp, 32 # 8-byte Folded Reload - ld.d $s0, $sp, 40 # 8-byte Folded Reload - ld.d $fp, $sp, 48 # 8-byte Folded Reload - ld.d $ra, $sp, 56 # 8-byte Folded Reload - addi.d $sp, $sp, 64 + fld.d $fs0, $sp, 0 # 8-byte Folded Reload + ld.d $s2, $sp, 8 # 8-byte Folded Reload + ld.d $s1, $sp, 16 # 8-byte Folded Reload + ld.d $s0, $sp, 24 # 8-byte Folded Reload + ld.d $fp, $sp, 32 # 8-byte Folded Reload + ld.d $ra, $sp, 40 # 8-byte Folded Reload + addi.d $sp, $sp, 48 ret .Lfunc_end11: .size _ZN6Solver13pickBranchLitEid, .Lfunc_end11-_ZN6Solver13pickBranchLitEid @@ -1968,18 +1969,8 @@ _ZN4HeapIN6Solver10VarOrderLtEE9removeMinEv: # @_ZN4HeapIN6Solver10VarOrderLtEE9 .size _ZN4HeapIN6Solver10VarOrderLtEE9removeMinEv, .Lfunc_end12-_ZN4HeapIN6Solver10VarOrderLtEE9removeMinEv .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi -.LCPI13_0: - .dword 0x4415af1d78b58c40 # double 1.0E+20 -.LCPI13_1: - .dword 0x3bc79ca10c924223 # double 9.9999999999999995E-21 -.LCPI13_2: - .dword 0x54b249ad2594c37d # double 1.0E+100 -.LCPI13_3: - .dword 0x2b2bff2ee48e0530 # double 1.0E-100 .text - .globl _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi + .globl _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi # -- Begin function _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi .p2align 5 .type _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi,@function _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3LitERi @@ -1998,13 +1989,16 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L st.d $s8, $sp, 56 # 8-byte Folded Spill fst.d $fs0, $sp, 48 # 8-byte Folded Spill fst.d $fs1, $sp, 40 # 8-byte Folded Spill - move $s1, $a2 + fst.d $fs2, $sp, 32 # 8-byte Folded Spill + fst.d $fs3, $sp, 24 # 8-byte Folded Spill + move $ra, $a2 ld.w $a4, $a2, 8 ld.w $a5, $a2, 12 ld.d $a2, $a2, 0 move $s2, $a1 move $fp, $a0 - st.d $a3, $sp, 32 # 8-byte Folded Spill + st.d $a3, $sp, 16 # 8-byte Folded Spill + st.d $ra, $sp, 8 # 8-byte Folded Spill bne $a4, $a5, .LBB13_2 # %bb.1: alsl.d $a0, $a4, $a4, 1 @@ -2015,15 +2009,16 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L masknez $a1, $a1, $a3 maskeqz $a0, $a0, $a3 or $a0, $a0, $a1 - st.w $a0, $s1, 12 + st.w $a0, $ra, 12 slli.d $a1, $a0, 2 move $a0, $a2 pcaddu18i $ra, %call36(realloc) jirl $ra, $ra, 0 - ld.d $t8, $sp, 32 # 8-byte Folded Reload - ld.w $a4, $s1, 8 + ld.d $ra, $sp, 8 # 8-byte Folded Reload + ld.d $t8, $sp, 16 # 8-byte Folded Reload + ld.w $a4, $ra, 8 move $a2, $a0 - st.d $a0, $s1, 0 + st.d $a0, $ra, 0 b .LBB13_3 .LBB13_2: move $t8, $a3 @@ -2031,7 +2026,7 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L move $a1, $zero addi.w $a0, $zero, -2 addi.d $a3, $a4, 1 - st.w $a3, $s1, 8 + st.w $a3, $ra, 8 ld.w $a3, $fp, 320 slli.d $a4, $a4, 2 move $a5, $a0 @@ -2039,17 +2034,27 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L stx.w $a5, $a2, $a4 addi.w $s3, $a3, -1 st.w $zero, $t8, 0 - pcalau12i $a2, %pc_hi20(.LCPI13_2) - fld.d $fs0, $a2, %pc_lo12(.LCPI13_2) - pcalau12i $a2, %pc_hi20(.LCPI13_3) - fld.d $fs1, $a2, %pc_lo12(.LCPI13_3) + lu12i.w $a2, 153932 + ori $a2, $a2, 893 + lu32i.d $a2, 149933 + lu52i.d $a2, $a2, 1355 + movgr2fr.d $fs0, $a2 lu12i.w $a2, -112416 ori $a2, $a2, 1328 lu32i.d $a2, -262354 - lu52i.d $a2, $a2, 690 - vreplgr2vr.d $vr4, $a2 - ori $s5, $zero, 1 - vst $vr4, $sp, 16 # 16-byte Folded Spill + lu52i.d $s5, $a2, 690 + movgr2fr.d $fs1, $s5 + ori $s6, $zero, 1 + lu12i.w $a2, 494424 + ori $a2, $a2, 3136 + lu32i.d $a2, 372509 + lu52i.d $a2, $a2, 1089 + movgr2fr.d $fs2, $a2 + lu12i.w $a2, 51492 + ori $a2, $a2, 547 + lu32i.d $a2, 498849 + lu52i.d $a2, $a2, 956 + movgr2fr.d $fs3, $a2 .p2align 4, , 16 .LBB13_4: # =>This Loop Header: Depth=1 # Child Loop BB13_8 Depth 2 @@ -2065,74 +2070,69 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L fld.s $fa1, $s2, 4 fld.d $fa0, $fp, 216 fcvt.d.s $fa1, $fa1 - pcalau12i $a3, %pc_hi20(.LCPI13_0) - fld.d $fa2, $a3, %pc_lo12(.LCPI13_0) fadd.d $fa1, $fa0, $fa1 fcvt.s.d $fa1, $fa1 - fcvt.d.s $fa3, $fa1 - fcmp.cule.d $fcc0, $fa3, $fa2 + fcvt.d.s $fa2, $fa1 + fcmp.cule.d $fcc0, $fa2, $fs2 fst.s $fa1, $s2, 4 bcnez $fcc0, .LBB13_10 # %bb.6: # %.preheader.i # in Loop: Header=BB13_4 Depth=1 ld.w $a3, $fp, 208 - pcalau12i $a4, %pc_hi20(.LCPI13_1) blez $a3, .LBB13_9 # %bb.7: # %.lr.ph.i # in Loop: Header=BB13_4 Depth=1 - ld.d $a5, $fp, 200 + ld.d $a4, $fp, 200 .p2align 4, , 16 .LBB13_8: # Parent Loop BB13_4 Depth=1 # => This Inner Loop Header: Depth=2 - ld.d $a6, $a5, 0 - fld.s $fa1, $a6, 4 - fld.d $fa2, $a4, %pc_lo12(.LCPI13_1) + ld.d $a5, $a4, 0 + fld.s $fa1, $a5, 4 fcvt.d.s $fa1, $fa1 - fmul.d $fa1, $fa1, $fa2 + fmul.d $fa1, $fa1, $fs3 fcvt.s.d $fa1, $fa1 - fst.s $fa1, $a6, 4 + fst.s $fa1, $a5, 4 addi.d $a3, $a3, -1 - addi.d $a5, $a5, 8 + addi.d $a4, $a4, 8 bnez $a3, .LBB13_8 .LBB13_9: # %._crit_edge.i # in Loop: Header=BB13_4 Depth=1 - fld.d $fa1, $a4, %pc_lo12(.LCPI13_1) - fmul.d $fa0, $fa0, $fa1 + fmul.d $fa0, $fa0, $fs3 fst.d $fa0, $fp, 216 .LBB13_10: # %_ZN6Solver15claBumpActivityER6Clause.exit # in Loop: Header=BB13_4 Depth=1 addi.d $a0, $a0, 2 - sltu $s8, $zero, $a0 + sltu $s4, $zero, $a0 srli.d $a0, $a2, 3 - bgeu $s8, $a0, .LBB13_41 + bgeu $s4, $a0, .LBB13_41 # %bb.11: # %.lr.ph # in Loop: Header=BB13_4 Depth=1 - addi.d $s4, $s2, 8 - move $s7, $a1 + addi.d $s0, $s2, 8 + move $s8, $a1 b .LBB13_14 .LBB13_12: # in Loop: Header=BB13_14 Depth=2 - addi.w $s7, $s7, 1 + addi.w $s8, $s8, 1 .p2align 4, , 16 .LBB13_13: # in Loop: Header=BB13_14 Depth=2 ld.wu $a0, $s2, 0 - addi.d $s8, $s8, 1 + addi.d $s4, $s4, 1 srli.d $a0, $a0, 3 - bgeu $s8, $a0, .LBB13_42 + bgeu $s4, $a0, .LBB13_42 .LBB13_14: # Parent Loop BB13_4 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB13_21 Depth 3 # Child Loop BB13_24 Depth 3 # Child Loop BB13_30 Depth 3 - slli.d $a0, $s8, 2 - ldx.w $s0, $s4, $a0 + slli.d $a0, $s4, 2 + ldx.w $s7, $s0, $a0 ld.d $a0, $fp, 472 - srai.d $a1, $s0, 1 + srai.d $a1, $s7, 1 ldx.bu $a2, $a0, $a1 bnez $a2, .LBB13_13 # %bb.15: # in Loop: Header=BB13_14 Depth=2 ld.d $a2, $fp, 360 - slli.d $s6, $a1, 2 - ldx.w $a2, $a2, $s6 + slli.d $s1, $a1, 2 + ldx.w $a2, $a2, $s1 blez $a2, .LBB13_13 # %bb.16: # in Loop: Header=BB13_14 Depth=2 ld.d $a2, $fp, 224 @@ -2167,8 +2167,9 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L # => This Inner Loop Header: Depth=3 vld $vr0, $a5, -16 vld $vr1, $a5, 0 - vfmul.d $vr0, $vr0, $vr4 - vfmul.d $vr1, $vr1, $vr4 + vreplgr2vr.d $vr2, $s5 + vfmul.d $vr0, $vr0, $vr2 + vfmul.d $vr1, $vr1, $vr2 vst $vr0, $a5, -16 vst $vr1, $a5, 0 addi.d $a6, $a6, -4 @@ -2203,7 +2204,7 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L # %bb.27: # %_ZNK4HeapIN6Solver10VarOrderLtEE6inHeapEi.exit.i # in Loop: Header=BB13_14 Depth=2 ld.d $a2, $fp, 432 - ldx.w $a3, $a2, $s6 + ldx.w $a3, $a2, $s1 bltz $a3, .LBB13_36 # %bb.28: # in Loop: Header=BB13_14 Depth=2 ld.d $a5, $fp, 416 @@ -2235,7 +2236,7 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L slli.d $t1, $t1, 2 stx.w $a3, $a2, $t1 move $a3, $a7 - bltu $s5, $t0, .LBB13_30 + bltu $s6, $t0, .LBB13_30 # %bb.32: # in Loop: Header=BB13_14 Depth=2 move $a3, $a7 b .LBB13_35 @@ -2251,15 +2252,15 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L stx.w $a3, $a2, $a4 .LBB13_36: # %_ZN6Solver15varBumpActivityEi.exit # in Loop: Header=BB13_14 Depth=2 - stx.b $s5, $a0, $a1 + stx.b $s6, $a0, $a1 ld.d $a1, $fp, 360 - ldx.w $a0, $a1, $s6 + ldx.w $a0, $a1, $s1 ld.w $a2, $fp, 336 bge $a0, $a2, .LBB13_12 # %bb.37: # in Loop: Header=BB13_14 Depth=2 - ld.w $a2, $s1, 8 - ld.w $a3, $s1, 12 - ld.d $a0, $s1, 0 + ld.w $a2, $ra, 8 + ld.w $a3, $ra, 12 + ld.d $a0, $ra, 0 bne $a2, $a3, .LBB13_39 # %bb.38: # in Loop: Header=BB13_14 Depth=2 alsl.d $a1, $a2, $a2, 1 @@ -2270,22 +2271,22 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L masknez $a3, $a3, $a2 maskeqz $a1, $a1, $a2 or $a1, $a1, $a3 - st.w $a1, $s1, 12 + st.w $a1, $ra, 12 slli.d $a1, $a1, 2 pcaddu18i $ra, %call36(realloc) jirl $ra, $ra, 0 - vld $vr4, $sp, 16 # 16-byte Folded Reload - ld.d $t8, $sp, 32 # 8-byte Folded Reload - ld.w $a2, $s1, 8 + ld.d $ra, $sp, 8 # 8-byte Folded Reload + ld.d $t8, $sp, 16 # 8-byte Folded Reload + ld.w $a2, $ra, 8 ld.d $a1, $fp, 360 - st.d $a0, $s1, 0 + st.d $a0, $ra, 0 .LBB13_39: # %_ZN3vecI3LitE4pushERKS0_.exit # in Loop: Header=BB13_14 Depth=2 addi.d $a3, $a2, 1 - st.w $a3, $s1, 8 + st.w $a3, $ra, 8 slli.d $a2, $a2, 2 - stx.w $s0, $a0, $a2 - ldx.w $a0, $a1, $s6 + stx.w $s7, $a0, $a2 + ldx.w $a0, $a1, $s1 ld.w $a1, $t8, 0 bge $a1, $a0, .LBB13_13 # %bb.40: # in Loop: Header=BB13_14 Depth=2 @@ -2293,7 +2294,7 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L b .LBB13_13 .p2align 4, , 16 .LBB13_41: # in Loop: Header=BB13_4 Depth=1 - move $s7, $a1 + move $s8, $a1 .LBB13_42: # %.preheader179 # in Loop: Header=BB13_4 Depth=1 ld.d $a0, $fp, 312 @@ -2313,16 +2314,16 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L slli.d $a4, $a3, 3 ldx.d $s2, $a2, $a4 stx.b $zero, $a1, $a3 - addi.w $a1, $s7, -1 - blt $s5, $s7, .LBB13_4 + addi.w $a1, $s8, -1 + blt $s6, $s8, .LBB13_4 # %bb.45: - ld.d $a2, $s1, 0 + ld.d $a2, $ra, 0 ld.bu $a1, $fp, 88 xori $a0, $a0, 1 st.w $a0, $a2, 0 beqz $a1, .LBB13_49 # %bb.46: # %.preheader176 - ld.w $s3, $s1, 8 + ld.w $s3, $ra, 8 ori $a0, $zero, 2 blt $s3, $a0, .LBB13_54 # %bb.47: # %.lr.ph201 @@ -2340,7 +2341,7 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L # %bb.50: # %.preheader.i.i141 move $a1, $zero st.w $zero, $fp, 512 - ld.w $s3, $s1, 8 + ld.w $s3, $ra, 8 bge $a1, $s3, .LBB13_56 .LBB13_51: ld.w $a2, $fp, 516 @@ -2360,10 +2361,10 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L b .LBB13_62 .LBB13_55: # %._ZN3vecI3LitE5clearEb.exit_crit_edge.i162 ld.w $a1, $fp, 512 - ld.w $s3, $s1, 8 + ld.w $s3, $ra, 8 blt $a1, $s3, .LBB13_51 .LBB13_56: # %_ZN3vecI3LitE6growToEi.exit.i144 - ori $s4, $zero, 1 + ori $s0, $zero, 1 bgtz $s3, .LBB13_80 b .LBB13_112 .LBB13_57: # %vector.ph316 @@ -2461,7 +2462,7 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L beqz $a0, .LBB13_67 # %bb.63: # %.preheader.i.i st.w $zero, $fp, 512 - ld.w $s3, $s1, 8 + ld.w $s3, $ra, 8 move $a1, $zero bge $a1, $s3, .LBB13_68 .LBB13_64: @@ -2481,7 +2482,7 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L ld.w $a1, $fp, 512 blt $a1, $s3, .LBB13_64 .LBB13_68: # %_ZN3vecI3LitE6growToEi.exit.i - ori $s4, $zero, 1 + ori $s0, $zero, 1 bgtz $s3, .LBB13_102 b .LBB13_112 .LBB13_69: @@ -2497,7 +2498,8 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L jirl $ra, $ra, 0 ld.w $a1, $fp, 512 st.d $a0, $fp, 504 - ld.d $t8, $sp, 32 # 8-byte Folded Reload + ld.d $t8, $sp, 16 # 8-byte Folded Reload + ld.d $ra, $sp, 8 # 8-byte Folded Reload .LBB13_71: # %_ZN3vecI3LitE4growEi.exit.i.i150 bge $a1, $s3, .LBB13_79 # %bb.72: # %.lr.ph.i.i153 @@ -2539,17 +2541,17 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L bnez $a1, .LBB13_78 .LBB13_79: # %._crit_edge.i.i151 st.w $s3, $fp, 512 - ld.w $s3, $s1, 8 - ori $s4, $zero, 1 + ld.w $s3, $ra, 8 + ori $s0, $zero, 1 blez $s3, .LBB13_112 .LBB13_80: # %.lr.ph.i146 - ld.d $a1, $s1, 0 + ld.d $a1, $ra, 0 move $a2, $zero .p2align 4, , 16 .LBB13_81: # =>This Inner Loop Header: Depth=1 ld.w $a3, $a1, 0 st.w $a3, $a0, 0 - ld.w $s3, $s1, 8 + ld.w $s3, $ra, 8 addi.d $a2, $a2, 1 addi.d $a1, $a1, 4 addi.d $a0, $a0, 4 @@ -2558,21 +2560,21 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L ori $a0, $zero, 2 blt $s3, $a0, .LBB13_112 # %bb.83: # %.lr.ph196 - ld.d $a0, $s1, 0 + ld.d $a0, $ra, 0 ld.d $a1, $fp, 344 ld.d $a2, $fp, 472 - ori $s0, $zero, 1 + ori $s1, $zero, 1 ori $a3, $zero, 16 - ori $s4, $zero, 1 + ori $s0, $zero, 1 b .LBB13_85 .p2align 4, , 16 .LBB13_84: # %.loopexit177 # in Loop: Header=BB13_85 Depth=1 - addi.d $s0, $s0, 1 - bge $s0, $s3, .LBB13_113 + addi.d $s1, $s1, 1 + bge $s1, $s3, .LBB13_113 .LBB13_85: # =>This Loop Header: Depth=1 # Child Loop BB13_88 Depth 2 - slli.d $a4, $s0, 2 + slli.d $a4, $s1, 2 ldx.w $a4, $a0, $a4 slli.d $a5, $a4, 2 bstrins.d $a5, $zero, 2, 0 @@ -2602,10 +2604,10 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L ldx.w $t0, $a5, $t0 blez $t0, .LBB13_87 # %bb.90: # in Loop: Header=BB13_85 Depth=1 - slli.d $a5, $s4, 2 + slli.d $a5, $s0, 2 stx.w $a4, $a0, $a5 - ld.w $s3, $s1, 8 - addi.w $s4, $s4, 1 + ld.w $s3, $ra, 8 + addi.w $s0, $s0, 1 b .LBB13_84 .LBB13_91: ori $a1, $zero, 2 @@ -2620,7 +2622,8 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L jirl $ra, $ra, 0 ld.w $a1, $fp, 512 st.d $a0, $fp, 504 - ld.d $t8, $sp, 32 # 8-byte Folded Reload + ld.d $t8, $sp, 16 # 8-byte Folded Reload + ld.d $ra, $sp, 8 # 8-byte Folded Reload .LBB13_93: # %_ZN3vecI3LitE4growEi.exit.i.i bge $a1, $s3, .LBB13_101 # %bb.94: # %.lr.ph.i.i @@ -2662,51 +2665,52 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L bnez $a1, .LBB13_100 .LBB13_101: # %._crit_edge.i.i st.w $s3, $fp, 512 - ld.w $s3, $s1, 8 - ori $s4, $zero, 1 + ld.w $s3, $ra, 8 + ori $s0, $zero, 1 blez $s3, .LBB13_112 .LBB13_102: # %.lr.ph.i134 - ld.d $a1, $s1, 0 + ld.d $a1, $ra, 0 move $a2, $zero .p2align 4, , 16 .LBB13_103: # =>This Inner Loop Header: Depth=1 ld.w $a3, $a1, 0 st.w $a3, $a0, 0 - ld.w $s3, $s1, 8 + ld.w $s3, $ra, 8 addi.d $a2, $a2, 1 addi.d $a1, $a1, 4 addi.d $a0, $a0, 4 blt $a2, $s3, .LBB13_103 # %bb.104: # %_ZNK3vecI3LitE6copyToERS1_.exit ori $a0, $zero, 2 - ori $s0, $zero, 1 + ori $s1, $zero, 1 blt $s3, $a0, .LBB13_113 # %bb.105: # %.lr.ph205.preheader + ori $s1, $zero, 1 + ori $s4, $zero, 4 + ori $s5, $zero, 1 ori $s0, $zero, 1 - ori $s5, $zero, 4 - ori $s6, $zero, 1 - ori $s4, $zero, 1 b .LBB13_109 .p2align 4, , 16 .LBB13_106: # %._crit_edge247 # in Loop: Header=BB13_109 Depth=1 - ld.d $a0, $s1, 0 - ldx.w $a1, $a0, $s5 - ld.d $t8, $sp, 32 # 8-byte Folded Reload + ld.d $ra, $sp, 8 # 8-byte Folded Reload + ld.d $a0, $ra, 0 + ldx.w $a1, $a0, $s4 + ld.d $t8, $sp, 16 # 8-byte Folded Reload .LBB13_107: # in Loop: Header=BB13_109 Depth=1 - slli.d $a2, $s4, 2 - addi.w $s4, $s4, 1 + slli.d $a2, $s0, 2 + addi.w $s0, $s0, 1 stx.w $a1, $a0, $a2 .LBB13_108: # in Loop: Header=BB13_109 Depth=1 - ld.w $s3, $s1, 8 - addi.d $s6, $s6, 1 - addi.d $s5, $s5, 4 - addi.d $s0, $s0, 1 - bge $s6, $s3, .LBB13_113 + ld.w $s3, $ra, 8 + addi.d $s5, $s5, 1 + addi.d $s4, $s4, 4 + addi.d $s1, $s1, 1 + bge $s5, $s3, .LBB13_113 .LBB13_109: # %.lr.ph205 # =>This Inner Loop Header: Depth=1 - ld.d $a0, $s1, 0 - ldx.w $a1, $a0, $s5 + ld.d $a0, $ra, 0 + ldx.w $a1, $a0, $s4 ld.d $a2, $fp, 344 slli.d $a3, $a1, 2 bstrins.d $a3, $zero, 2, 0 @@ -2720,19 +2724,20 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L jirl $ra, $ra, 0 beqz $a0, .LBB13_106 # %bb.111: # in Loop: Header=BB13_109 Depth=1 - ld.d $t8, $sp, 32 # 8-byte Folded Reload + ld.d $t8, $sp, 16 # 8-byte Folded Reload + ld.d $ra, $sp, 8 # 8-byte Folded Reload b .LBB13_108 .LBB13_112: - ori $s0, $zero, 1 + ori $s1, $zero, 1 .LBB13_113: # %.loopexit ld.d $a0, $fp, 160 add.d $a1, $a0, $s3 - sub.w $a0, $s0, $s4 + sub.w $a0, $s1, $s0 st.d $a1, $fp, 160 blez $a0, .LBB13_115 # %bb.114: # %.lr.ph.i165 sub.w $s3, $s3, $a0 - st.w $s3, $s1, 8 + st.w $s3, $ra, 8 .LBB13_115: # %_ZN3vecI3LitE6shrinkEi.exit ld.d $a0, $fp, 168 add.d $a0, $a0, $s3 @@ -2746,7 +2751,7 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L bgtz $a0, .LBB13_121 b .LBB13_123 .LBB13_117: # %.preheader - ld.d $a1, $s1, 0 + ld.d $a1, $ra, 0 ld.d $a0, $fp, 360 ori $a3, $zero, 3 blt $s3, $a3, .LBB13_120 @@ -2802,6 +2807,8 @@ _ZN6Solver7analyzeEP6ClauseR3vecI3LitERi: # @_ZN6Solver7analyzeEP6ClauseR3vecI3L addi.d $a0, $a0, 4 blt $a1, $a2, .LBB13_122 .LBB13_123: # %._crit_edge216 + fld.d $fs3, $sp, 24 # 8-byte Folded Reload + fld.d $fs2, $sp, 32 # 8-byte Folded Reload fld.d $fs1, $sp, 40 # 8-byte Folded Reload fld.d $fs0, $sp, 48 # 8-byte Folded Reload ld.d $s8, $sp, 56 # 8-byte Folded Reload @@ -3780,14 +3787,8 @@ _ZN4HeapIN6Solver10VarOrderLtEE6filterINS0_9VarFilterEEEvRKT_: # @_ZN4HeapIN6Sol .size _ZN4HeapIN6Solver10VarOrderLtEE6filterINS0_9VarFilterEEEvRKT_, .Lfunc_end19-_ZN4HeapIN6Solver10VarOrderLtEE6filterINS0_9VarFilterEEEvRKT_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN6Solver6searchEii -.LCPI20_0: - .dword 0x4415af1d78b58c40 # double 1.0E+20 -.LCPI20_1: - .dword 0x3bc79ca10c924223 # double 9.9999999999999995E-21 .text - .globl _ZN6Solver6searchEii + .globl _ZN6Solver6searchEii # -- Begin function _ZN6Solver6searchEii .p2align 5 .type _ZN6Solver6searchEii,@function _ZN6Solver6searchEii: # @_ZN6Solver6searchEii @@ -3839,6 +3840,16 @@ _ZN6Solver6searchEii: # @_ZN6Solver6searchEii addi.d $s0, $fp, 408 ori $s7, $zero, 1 ori $s8, $zero, 2 + lu12i.w $a0, 494424 + ori $a0, $a0, 3136 + lu32i.d $a0, 372509 + lu52i.d $a0, $a0, 1089 + movgr2fr.d $fs0, $a0 + lu12i.w $a0, 51492 + ori $a0, $a0, 547 + lu32i.d $a0, 498849 + lu52i.d $a0, $a0, 956 + movgr2fr.d $fs1, $a0 move $a0, $fp pcaddu18i $ra, %call36(_ZN6Solver9propagateEv) jirl $ra, $ra, 0 @@ -4189,39 +4200,34 @@ _ZN6Solver6searchEii: # @_ZN6Solver6searchEii fld.s $fa1, $s4, 4 fld.d $fa0, $fp, 216 fcvt.d.s $fa1, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI20_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI20_0) fadd.d $fa1, $fa0, $fa1 fcvt.s.d $fa1, $fa1 - fcvt.d.s $fa3, $fa1 - fcmp.cule.d $fcc0, $fa3, $fa2 + fcvt.d.s $fa2, $fa1 + fcmp.cule.d $fcc0, $fa2, $fs0 fst.s $fa1, $s4, 4 bcnez $fcc0, .LBB20_51 # %bb.47: # %.preheader.i56 # in Loop: Header=BB20_24 Depth=1 ld.w $a0, $fp, 208 - pcalau12i $a1, %pc_hi20(.LCPI20_1) blez $a0, .LBB20_50 # %bb.48: # %.lr.ph.i58 # in Loop: Header=BB20_24 Depth=1 - ld.d $a2, $fp, 200 + ld.d $a1, $fp, 200 .p2align 4, , 16 .LBB20_49: # Parent Loop BB20_24 Depth=1 # => This Inner Loop Header: Depth=2 - ld.d $a3, $a2, 0 - fld.s $fa1, $a3, 4 - fld.d $fa2, $a1, %pc_lo12(.LCPI20_1) + ld.d $a2, $a1, 0 + fld.s $fa1, $a2, 4 fcvt.d.s $fa1, $fa1 - fmul.d $fa1, $fa1, $fa2 + fmul.d $fa1, $fa1, $fs1 fcvt.s.d $fa1, $fa1 - fst.s $fa1, $a3, 4 + fst.s $fa1, $a2, 4 addi.d $a0, $a0, -1 - addi.d $a2, $a2, 8 + addi.d $a1, $a1, 8 bnez $a0, .LBB20_49 .LBB20_50: # %._crit_edge.i57 # in Loop: Header=BB20_24 Depth=1 - fld.d $fa1, $a1, %pc_lo12(.LCPI20_1) - fmul.d $fa0, $fa0, $fa1 + fmul.d $fa0, $fa0, $fs1 fst.d $fa0, $fp, 216 .LBB20_51: # %_ZN6Solver15claBumpActivityER6Clause.exit # in Loop: Header=BB20_24 Depth=1 diff --git a/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s b/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s index c8ac2de8..4b3ac3b1 100644 --- a/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s +++ b/results/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s @@ -1,12 +1,6 @@ .file "oggenc.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI0_1: - .dword 0x3ff0000010000000 # double 1.0000000596046448 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -472,17 +466,21 @@ main: # @main ori $a1, $zero, 1 bne $a0, $a1, .LBB0_74 # %bb.54: # in Loop: Header=BB0_3 Depth=1 - st.w $a1, $sp, 580 fld.s $fa0, $sp, 576 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) + st.w $a1, $sp, 580 fcvt.d.s $fa0, $fa0 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 - fcvt.s.d $fa1, $fa0 - fcmp.cule.d $fcc0, $fa0, $fa2 - fst.s $fa1, $sp, 576 + lu12i.w $a0, 65536 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fcmp.cule.d $fcc0, $fa0, $fa1 + fcvt.s.d $fa0, $fa0 + fst.s $fa0, $sp, 576 bcnez $fcc0, .LBB0_3 # %bb.55: # in Loop: Header=BB0_3 Depth=1 lu12i.w $a0, 260096 @@ -2183,25 +2181,20 @@ main: # @main .word .LBB0_152-.LJTI0_1 .word .LBB0_146-.LJTI0_1 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function update_statistics_full -.LCPI1_0: - .dword 0xc04e000000000000 # double -60 -.LCPI1_1: - .dword 0x4059000000000000 # double 100 .section .text.unlikely.,"ax",@progbits - .globl update_statistics_full + .globl update_statistics_full # -- Begin function update_statistics_full .p2align 5 .type update_statistics_full,@function update_statistics_full: # @update_statistics_full # %bb.0: - addi.d $sp, $sp, -48 - st.d $ra, $sp, 40 # 8-byte Folded Spill - st.d $fp, $sp, 32 # 8-byte Folded Spill - st.d $s0, $sp, 24 # 8-byte Folded Spill - st.d $s1, $sp, 16 # 8-byte Folded Spill - fst.d $fs0, $sp, 8 # 8-byte Folded Spill - fst.d $fs1, $sp, 0 # 8-byte Folded Spill + addi.d $sp, $sp, -64 + st.d $ra, $sp, 56 # 8-byte Folded Spill + st.d $fp, $sp, 48 # 8-byte Folded Spill + st.d $s0, $sp, 40 # 8-byte Folded Spill + st.d $s1, $sp, 32 # 8-byte Folded Spill + st.d $s2, $sp, 24 # 8-byte Folded Spill + fst.d $fs0, $sp, 16 # 8-byte Folded Spill + fst.d $fs1, $sp, 8 # 8-byte Folded Spill movgr2fr.d $fa1, $a2 ffint.d.l $fs0, $fa1 movgr2fr.d $fa1, $a1 @@ -2218,35 +2211,39 @@ update_statistics_full: # @update_statistics_full add.w $a0, $a1, $a0 bstrpick.d $a1, $a0, 31, 31 srai.d $a0, $a0, 5 - pcalau12i $a2, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI1_0) add.d $fp, $a0, $a1 - movgr2fr.d $fa2, $fp - ffint.d.l $fa2, $fa2 - fmul.d $fa1, $fa2, $fa1 + movgr2fr.d $fa1, $fp + ffint.d.l $fa1, $fa1 + ori $s1, $zero, 0 + ori $a0, $zero, 0 + lu32i.d $a0, -131072 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa2, $a0 + fmul.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $s0, $fa0 pcalau12i $a0, %got_pc_hi20(stderr) - ld.d $s1, $a0, %got_pc_lo12(stderr) - ld.d $a1, $s1, 0 + ld.d $s2, $a0, %got_pc_lo12(stderr) + ld.d $a1, $s2, 0 ori $a0, $zero, 13 pcaddu18i $ra, %call36(fputc) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_1) - pcalau12i $a1, %pc_hi20(update_statistics_full.spinpoint) - ld.w $a2, $a1, %pc_lo12(update_statistics_full.spinpoint) - ld.d $a0, $s1, 0 + ld.d $a0, $s2, 0 + lu32i.d $s1, -458752 + lu52i.d $a1, $s1, 1029 + pcalau12i $a2, %pc_hi20(update_statistics_full.spinpoint) + ld.w $a3, $a2, %pc_lo12(update_statistics_full.spinpoint) + movgr2fr.d $fa0, $a1 fmul.d $fa0, $fs0, $fa0 fdiv.d $fa0, $fa0, $fs1 - addi.d $a3, $a2, 1 - st.w $a3, $a1, %pc_lo12(update_statistics_full.spinpoint) - bstrpick.d $a1, $a2, 62, 61 - add.d $a1, $a2, $a1 + addi.d $a1, $a3, 1 + st.w $a1, $a2, %pc_lo12(update_statistics_full.spinpoint) + bstrpick.d $a1, $a3, 62, 61 + add.d $a1, $a3, $a1 bstrpick.d $a1, $a1, 31, 2 slli.d $a1, $a1, 2 - sub.w $a1, $a2, $a1 + sub.w $a1, $a3, $a1 pcalau12i $a2, %pc_hi20(.L.str.90) addi.d $a2, $a2, %pc_lo12(.L.str.90) ldx.b $a5, $a2, $a1 @@ -2255,13 +2252,14 @@ update_statistics_full: # @update_statistics_full addi.d $a1, $a1, %pc_lo12(.L.str.92) move $a3, $fp move $a4, $s0 - fld.d $fs1, $sp, 0 # 8-byte Folded Reload - fld.d $fs0, $sp, 8 # 8-byte Folded Reload - ld.d $s1, $sp, 16 # 8-byte Folded Reload - ld.d $s0, $sp, 24 # 8-byte Folded Reload - ld.d $fp, $sp, 32 # 8-byte Folded Reload - ld.d $ra, $sp, 40 # 8-byte Folded Reload - addi.d $sp, $sp, 48 + fld.d $fs1, $sp, 8 # 8-byte Folded Reload + fld.d $fs0, $sp, 16 # 8-byte Folded Reload + ld.d $s2, $sp, 24 # 8-byte Folded Reload + ld.d $s1, $sp, 32 # 8-byte Folded Reload + ld.d $s0, $sp, 40 # 8-byte Folded Reload + ld.d $fp, $sp, 48 # 8-byte Folded Reload + ld.d $ra, $sp, 56 # 8-byte Folded Reload + addi.d $sp, $sp, 64 pcaddu18i $t8, %call36(fprintf) jr $t8 .Lfunc_end1: @@ -2445,14 +2443,7 @@ start_encode_full: # @start_encode_full .Lfunc_end2: .size start_encode_full, .Lfunc_end2-start_encode_full # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function final_statistics -.LCPI3_0: - .dword 0x404e000000000000 # double 60 -.LCPI3_1: - .dword 0x3f80624dd2f1a9fc # double 0.0080000000000000002 - .section .text.unlikely.,"ax",@progbits - .globl final_statistics + .globl final_statistics # -- Begin function final_statistics .p2align 5 .type final_statistics,@function final_statistics: # @final_statistics @@ -2509,11 +2500,13 @@ final_statistics: # @final_statistics srai.d $a2, $a2, 5 add.d $a2, $a2, $a3 movgr2fr.d $fa0, $a1 - pcalau12i $a1, %pc_hi20(.LCPI3_0) - fld.d $fs3, $a1, %pc_lo12(.LCPI3_0) ffint.d.l $fa0, $fa0 movgr2fr.d $fa1, $a2 ffint.d.l $fa1, $fa1 + ori $a1, $zero, 0 + lu32i.d $a1, -131072 + lu52i.d $a1, $a1, 1028 + movgr2fr.d $fs3, $a1 fmul.d $fa1, $fa1, $fs3 fsub.d $fa0, $fa0, $fa1 addi.w $a2, $a2, 0 @@ -2542,12 +2535,15 @@ final_statistics: # @final_statistics pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ld.d $a0, $s2, 0 - pcalau12i $a1, %pc_hi20(.LCPI3_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI3_1) - movgr2fr.d $fa1, $fp - ffint.d.l $fa1, $fa1 - fdiv.d $fa1, $fa1, $fs1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.d $fa0, $fp + ffint.d.l $fa0, $fa0 + fdiv.d $fa0, $fa0, $fs1 + lu12i.w $a1, -184550 + ori $a1, $a1, 2556 + lu32i.d $a1, 25165 + lu52i.d $a1, $a1, 1016 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 movfr2gr.d $a2, $fa0 pcalau12i $a1, %pc_hi20(.L.str.99) addi.d $a1, $a1, %pc_lo12(.L.str.99) @@ -3105,12 +3101,8 @@ setup_scaler: # @setup_scaler .Lfunc_end11: .size setup_scaler, .Lfunc_end11-setup_scaler # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function update_statistics_notime -.LCPI12_0: - .dword 0xc04e000000000000 # double -60 .section .text.unlikely.,"ax",@progbits - .globl update_statistics_notime + .globl update_statistics_notime # -- Begin function update_statistics_notime .p2align 5 .type update_statistics_notime,@function update_statistics_notime: # @update_statistics_notime @@ -3136,12 +3128,14 @@ update_statistics_notime: # @update_statistics_notime add.w $a1, $a2, $a1 bstrpick.d $a2, $a1, 31, 31 srai.d $a1, $a1, 5 - pcalau12i $a3, %pc_hi20(.LCPI12_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI12_0) add.d $a2, $a1, $a2 - movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -131072 + lu52i.d $a1, $a1, -1020 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 pcalau12i $a1, %pc_hi20(update_statistics_notime.spinpoint) ld.w $a4, $a1, %pc_lo12(update_statistics_notime.spinpoint) fadd.d $fa0, $fs0, $fa0 @@ -3196,33 +3190,29 @@ final_statistics_null: # @final_statistics_null .Lfunc_end15: .size final_statistics_null, .Lfunc_end15-final_statistics_null # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function oe_encode -.LCPI16_0: - .dword 0x4058c00000000000 # double 99 - .text - .globl oe_encode + .globl oe_encode # -- Begin function oe_encode .p2align 5 .type oe_encode,@function oe_encode: # @oe_encode # %bb.0: - addi.d $sp, $sp, -1248 - st.d $ra, $sp, 1240 # 8-byte Folded Spill - st.d $fp, $sp, 1232 # 8-byte Folded Spill - st.d $s0, $sp, 1224 # 8-byte Folded Spill - st.d $s1, $sp, 1216 # 8-byte Folded Spill - st.d $s2, $sp, 1208 # 8-byte Folded Spill - st.d $s3, $sp, 1200 # 8-byte Folded Spill - st.d $s4, $sp, 1192 # 8-byte Folded Spill - st.d $s5, $sp, 1184 # 8-byte Folded Spill - st.d $s6, $sp, 1176 # 8-byte Folded Spill - st.d $s7, $sp, 1168 # 8-byte Folded Spill - st.d $s8, $sp, 1160 # 8-byte Folded Spill - fst.d $fs0, $sp, 1152 # 8-byte Folded Spill - fst.d $fs1, $sp, 1144 # 8-byte Folded Spill - fst.d $fs2, $sp, 1136 # 8-byte Folded Spill - fst.d $fs3, $sp, 1128 # 8-byte Folded Spill - fst.d $fs4, $sp, 1120 # 8-byte Folded Spill + addi.d $sp, $sp, -1264 + st.d $ra, $sp, 1256 # 8-byte Folded Spill + st.d $fp, $sp, 1248 # 8-byte Folded Spill + st.d $s0, $sp, 1240 # 8-byte Folded Spill + st.d $s1, $sp, 1232 # 8-byte Folded Spill + st.d $s2, $sp, 1224 # 8-byte Folded Spill + st.d $s3, $sp, 1216 # 8-byte Folded Spill + st.d $s4, $sp, 1208 # 8-byte Folded Spill + st.d $s5, $sp, 1200 # 8-byte Folded Spill + st.d $s6, $sp, 1192 # 8-byte Folded Spill + st.d $s7, $sp, 1184 # 8-byte Folded Spill + st.d $s8, $sp, 1176 # 8-byte Folded Spill + fst.d $fs0, $sp, 1168 # 8-byte Folded Spill + fst.d $fs1, $sp, 1160 # 8-byte Folded Spill + fst.d $fs2, $sp, 1152 # 8-byte Folded Spill + fst.d $fs3, $sp, 1144 # 8-byte Folded Spill + fst.d $fs4, $sp, 1136 # 8-byte Folded Spill + fst.d $fs5, $sp, 1128 # 8-byte Folded Spill move $fp, $a0 ld.w $a0, $a0, 72 ori $a1, $zero, 256 @@ -3257,10 +3247,10 @@ oe_encode: # @oe_encode ld.w $a6, $fp, 116 jirl $ra, $a7, 0 vrepli.b $vr0, 0 - vst $vr0, $sp, 272 - vst $vr0, $sp, 256 + vst $vr0, $sp, 280 + vst $vr0, $sp, 264 vst $vr0, $sp, 128 # 16-byte Folded Spill - vst $vr0, $sp, 240 + vst $vr0, $sp, 248 lu12i.w $s7, 1 ori $a1, $s7, 1720 ori $a0, $zero, 1 @@ -3269,11 +3259,11 @@ oe_encode: # @oe_encode ld.w $a3, $fp, 124 ld.w $a1, $fp, 72 ld.d $a2, $fp, 80 - st.d $a0, $sp, 288 + st.d $a0, $sp, 296 blez $a3, .LBB16_13 # %bb.9: fld.s $fa0, $fp, 120 - addi.d $a0, $sp, 240 + addi.d $a0, $sp, 248 pcaddu18i $ra, %call36(vorbis_encode_setup_vbr) jirl $ra, $ra, 0 bnez $a0, .LBB16_148 @@ -3284,23 +3274,23 @@ oe_encode: # @oe_encode # %bb.11: blez $a1, .LBB16_14 .LBB16_12: - ld.d $a2, $sp, 288 + ld.d $a2, $sp, 296 ori $a3, $s7, 1512 vldx $vr0, $a2, $a3 ori $a3, $s7, 1504 fldx.d $fa1, $a2, $a3 ori $a3, $s7, 1480 vldx $vr2, $a2, $a3 - vst $vr0, $sp, 760 - fst.d $fa1, $sp, 736 - vst $vr2, $sp, 744 - st.d $a1, $sp, 720 - st.d $a0, $sp, 728 + vst $vr0, $sp, 768 + fst.d $fa1, $sp, 744 + vst $vr2, $sp, 752 + st.d $a1, $sp, 728 + st.d $a0, $sp, 736 ori $a0, $zero, 1 - st.w $a0, $sp, 712 - addi.d $a0, $sp, 240 + st.w $a0, $sp, 720 + addi.d $a0, $sp, 248 ori $a1, $zero, 17 - addi.d $a2, $sp, 712 + addi.d $a2, $sp, 720 pcaddu18i $ra, %call36(vorbis_encode_ctl) jirl $ra, $ra, 0 b .LBB16_14 @@ -3321,7 +3311,7 @@ oe_encode: # @oe_encode maskeqz $a5, $a5, $a0 masknez $a0, $a6, $a0 or $a5, $a5, $a0 - addi.d $a0, $sp, 240 + addi.d $a0, $sp, 248 pcaddu18i $ra, %call36(vorbis_encode_setup_managed) jirl $ra, $ra, 0 bnez $a0, .LBB16_149 @@ -3332,7 +3322,7 @@ oe_encode: # @oe_encode ld.w $a0, $fp, 108 bgez $a0, .LBB16_25 # %bb.16: - ld.d $a0, $sp, 288 + ld.d $a0, $sp, 296 ldptr.w $a1, $a0, 5520 bnez $a1, .LBB16_25 # %bb.17: @@ -3369,7 +3359,7 @@ oe_encode: # @oe_encode bceqz $fcc0, .LBB16_24 b .LBB16_25 .LBB16_23: - ld.d $a0, $sp, 288 + ld.d $a0, $sp, 296 ldptr.w $a1, $a0, 5520 bnez $a1, .LBB16_25 .LBB16_24: # %vorbis_encode_ctl.exit.sink.split @@ -3378,7 +3368,7 @@ oe_encode: # @oe_encode ld.w $s8, $fp, 136 blez $s8, .LBB16_97 # %bb.26: # %.lr.ph.preheader.i - ld.d $a0, $sp, 288 + ld.d $a0, $sp, 296 ld.d $a1, $fp, 128 ldptr.d $a2, $a0, 5584 st.d $a2, $sp, 72 # 8-byte Folded Spill @@ -3419,8 +3409,12 @@ oe_encode: # @oe_encode st.d $zero, $sp, 112 # 8-byte Folded Spill movgr2fr.d $fa0, $zero vst $vr0, $sp, 32 # 16-byte Folded Spill - # implicit-def: $f27_64 + ori $a0, $zero, 0 + lu32i.d $a0, -475136 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs3, $a0 # implicit-def: $f28_64 + # implicit-def: $f29_64 .LBB16_27: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 ld.d $a0, $s3, 0 @@ -3486,20 +3480,20 @@ oe_encode: # @oe_encode ld.d $a0, $s5, 0 pcalau12i $a1, %pc_hi20(.L.str.179) addi.d $a1, $a1, %pc_lo12(.L.str.179) - addi.d $a2, $sp, 712 + addi.d $a2, $sp, 720 pcaddu18i $ra, %call36(__isoc99_sscanf) jirl $ra, $ra, 0 ori $a1, $zero, 1 bne $a0, $a1, .LBB16_62 # %bb.36: # in Loop: Header=BB16_27 Depth=1 - fld.d $fs4, $sp, 712 - ld.d $a0, $sp, 288 + fld.d $fs5, $sp, 720 + ld.d $a0, $sp, 296 ldptr.w $a1, $a0, 5520 bnez $a1, .LBB16_58 .LBB16_37: # in Loop: Header=BB16_27 Depth=1 vld $vr0, $sp, 32 # 16-byte Folded Reload - fcmp.clt.d $fcc0, $fa0, $fs4 - fsel $fa0, $fs4, $fa0, $fcc0 + fcmp.clt.d $fcc0, $fa0, $fs5 + fsel $fa0, $fs5, $fa0, $fcc0 vldi $vr1, -850 fcmp.clt.d $fcc0, $fa0, $fa1 fsel $fa0, $fa0, $fa1, $fcc0 @@ -3509,40 +3503,40 @@ oe_encode: # @oe_encode .p2align 4, , 16 .LBB16_38: # in Loop: Header=BB16_27 Depth=1 ld.d $a0, $s5, 0 - addi.d $a2, $sp, 712 + addi.d $a2, $sp, 720 ld.d $a1, $sp, 120 # 8-byte Folded Reload pcaddu18i $ra, %call36(__isoc99_sscanf) jirl $ra, $ra, 0 ori $a1, $zero, 1 bne $a0, $a1, .LBB16_48 # %bb.39: # in Loop: Header=BB16_27 Depth=1 - fld.d $fs1, $sp, 712 + fld.d $fs1, $sp, 720 ori $s2, $zero, 1 b .LBB16_58 .p2align 4, , 16 .LBB16_40: # in Loop: Header=BB16_27 Depth=1 ld.d $a0, $s5, 0 - addi.d $a2, $sp, 712 + addi.d $a2, $sp, 720 ld.d $a1, $sp, 120 # 8-byte Folded Reload pcaddu18i $ra, %call36(__isoc99_sscanf) jirl $ra, $ra, 0 ori $a1, $zero, 1 bne $a0, $a1, .LBB16_48 # %bb.41: # in Loop: Header=BB16_27 Depth=1 - fld.d $fs0, $sp, 712 + fld.d $fs0, $sp, 720 ori $s2, $zero, 1 b .LBB16_58 .LBB16_42: # in Loop: Header=BB16_27 Depth=1 ld.d $a0, $s5, 0 pcalau12i $a1, %pc_hi20(.L.str.183) addi.d $a1, $a1, %pc_lo12(.L.str.183) - addi.d $a2, $sp, 296 + addi.d $a2, $sp, 304 pcaddu18i $ra, %call36(__isoc99_sscanf) jirl $ra, $ra, 0 ori $a1, $zero, 1 bne $a0, $a1, .LBB16_53 # %bb.43: # in Loop: Header=BB16_27 Depth=1 - ld.d $a0, $sp, 296 + ld.d $a0, $sp, 304 st.d $a0, $sp, 96 # 8-byte Folded Spill ori $s2, $zero, 1 b .LBB16_58 @@ -3550,13 +3544,13 @@ oe_encode: # @oe_encode ld.d $a0, $s5, 0 pcalau12i $a1, %pc_hi20(.L.str.183) addi.d $a1, $a1, %pc_lo12(.L.str.183) - addi.d $a2, $sp, 296 + addi.d $a2, $sp, 304 pcaddu18i $ra, %call36(__isoc99_sscanf) jirl $ra, $ra, 0 ori $a1, $zero, 1 bne $a0, $a1, .LBB16_53 # %bb.45: # in Loop: Header=BB16_27 Depth=1 - ld.d $a0, $sp, 296 + ld.d $a0, $sp, 304 st.d $a0, $sp, 72 # 8-byte Folded Spill ori $s2, $zero, 1 b .LBB16_58 @@ -3564,13 +3558,13 @@ oe_encode: # @oe_encode ld.d $a0, $s5, 0 pcalau12i $a1, %pc_hi20(.L.str.183) addi.d $a1, $a1, %pc_lo12(.L.str.183) - addi.d $a2, $sp, 296 + addi.d $a2, $sp, 304 pcaddu18i $ra, %call36(__isoc99_sscanf) jirl $ra, $ra, 0 ori $a1, $zero, 1 bne $a0, $a1, .LBB16_55 # %bb.47: # in Loop: Header=BB16_27 Depth=1 - ld.d $a0, $sp, 296 + ld.d $a0, $sp, 304 st.d $a0, $sp, 88 # 8-byte Folded Spill b .LBB16_57 .LBB16_48: # in Loop: Header=BB16_27 Depth=1 @@ -3583,26 +3577,26 @@ oe_encode: # @oe_encode ld.d $a0, $s5, 0 pcalau12i $a1, %pc_hi20(.L.str.183) addi.d $a1, $a1, %pc_lo12(.L.str.183) - addi.d $a2, $sp, 296 + addi.d $a2, $sp, 304 pcaddu18i $ra, %call36(__isoc99_sscanf) jirl $ra, $ra, 0 ori $a1, $zero, 1 bne $a0, $a1, .LBB16_55 # %bb.50: # in Loop: Header=BB16_27 Depth=1 - ld.d $a0, $sp, 296 + ld.d $a0, $sp, 304 st.d $a0, $sp, 64 # 8-byte Folded Spill b .LBB16_57 .LBB16_51: # in Loop: Header=BB16_27 Depth=1 ld.d $a0, $s5, 0 pcalau12i $a1, %pc_hi20(.L.str.179) addi.d $a1, $a1, %pc_lo12(.L.str.179) - addi.d $a2, $sp, 712 + addi.d $a2, $sp, 720 pcaddu18i $ra, %call36(__isoc99_sscanf) jirl $ra, $ra, 0 ori $a1, $zero, 1 bne $a0, $a1, .LBB16_63 # %bb.52: # in Loop: Header=BB16_27 Depth=1 - fld.d $fs2, $sp, 712 + fld.d $fs2, $sp, 720 b .LBB16_57 .LBB16_53: # in Loop: Header=BB16_27 Depth=1 ld.d $a0, $s3, 0 @@ -3648,13 +3642,13 @@ oe_encode: # @oe_encode ld.d $a0, $s5, 0 pcalau12i $a1, %pc_hi20(.L.str.179) addi.d $a1, $a1, %pc_lo12(.L.str.179) - addi.d $a2, $sp, 712 + addi.d $a2, $sp, 720 pcaddu18i $ra, %call36(__isoc99_sscanf) jirl $ra, $ra, 0 ori $a1, $zero, 1 bne $a0, $a1, .LBB16_65 # %bb.61: # in Loop: Header=BB16_27 Depth=1 - fld.d $fs3, $sp, 712 + fld.d $fs4, $sp, 720 b .LBB16_66 .LBB16_62: # in Loop: Header=BB16_27 Depth=1 ld.d $a0, $s3, 0 @@ -3664,7 +3658,7 @@ oe_encode: # @oe_encode addi.d $a1, $a1, %pc_lo12(.L.str.180) pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 - ld.d $a0, $sp, 288 + ld.d $a0, $sp, 296 ldptr.w $a1, $a0, 5520 bnez $a1, .LBB16_58 b .LBB16_37 @@ -3692,25 +3686,23 @@ oe_encode: # @oe_encode pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 .LBB16_66: # in Loop: Header=BB16_27 Depth=1 - ld.d $a0, $sp, 288 + ld.d $a0, $sp, 296 ori $a1, $s7, 1544 ldptr.w $a2, $a0, 5520 fldx.d $fa0, $a0, $a1 bnez $a2, .LBB16_68 # %bb.67: # in Loop: Header=BB16_27 Depth=1 - pcalau12i $a2, %pc_hi20(.LCPI16_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI16_0) - vldi $vr2, -1024 - fcmp.clt.d $fcc0, $fs3, $fa2 - fsel $fa2, $fs3, $fa2, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa2, $fa1, $fcc0 + vldi $vr1, -1024 + fcmp.clt.d $fcc0, $fs4, $fa1 + fsel $fa1, $fs4, $fa1, $fcc0 + fcmp.clt.d $fcc0, $fs3, $fa1 + fsel $fa1, $fa1, $fs3, $fcc0 fstx.d $fa1, $a0, $a1 .LBB16_68: # %vorbis_encode_ctl.exit113.i # in Loop: Header=BB16_27 Depth=1 ld.d $a0, $s3, 0 movfr2gr.d $a2, $fa0 - movfr2gr.d $a3, $fs3 + movfr2gr.d $a3, $fs4 pcalau12i $a1, %pc_hi20(.L.str.191) addi.d $a1, $a1, %pc_lo12(.L.str.191) pcaddu18i $ra, %call36(fprintf) @@ -3720,7 +3712,7 @@ oe_encode: # @oe_encode ld.d $a0, $sp, 112 # 8-byte Folded Reload beqz $a0, .LBB16_79 # %bb.70: - ld.d $a0, $sp, 288 + ld.d $a0, $sp, 296 ldptr.w $a1, $a0, 5520 bnez $a1, .LBB16_79 # %bb.71: @@ -3767,7 +3759,7 @@ oe_encode: # @oe_encode .LBB16_79: # %vorbis_encode_ctl.exit116.i beqz $s2, .LBB16_97 # %bb.80: - ld.d $a0, $sp, 288 + ld.d $a0, $sp, 296 ldptr.w $a1, $a0, 5520 bnez $a1, .LBB16_97 .LBB16_81: # %.thread @@ -3855,26 +3847,26 @@ oe_encode: # @oe_encode stptr.w $zero, $a0, 5560 bnez $s2, .LBB16_81 .LBB16_97: # %set_advanced_encoder_options.exit - addi.d $a0, $sp, 240 + addi.d $a0, $sp, 248 pcaddu18i $ra, %call36(vorbis_encode_setup_init) jirl $ra, $ra, 0 - addi.d $a0, $sp, 488 - addi.d $a1, $sp, 240 + addi.d $a0, $sp, 496 + addi.d $a1, $sp, 248 ori $a2, $zero, 1 - addi.d $s0, $sp, 488 + addi.d $s0, $sp, 496 pcaddu18i $ra, %call36(_vds_shared_init) jirl $ra, $ra, 0 bnez $a0, .LBB16_99 # %bb.98: - ld.d $a0, $sp, 288 - ld.d $s1, $sp, 624 + ld.d $a0, $sp, 296 + ld.d $s1, $sp, 632 ori $a1, $s7, 848 add.d $s2, $a0, $a1 ori $a0, $zero, 1 ori $a1, $zero, 40 pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 - ld.w $a1, $sp, 244 + ld.w $a1, $sp, 252 st.w $a1, $a0, 4 lu12i.w $a1, -237117 ori $a1, $a1, 3072 @@ -3887,21 +3879,21 @@ oe_encode: # @oe_encode pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 st.d $a0, $s1, 0 - addi.d $a1, $sp, 240 + addi.d $a1, $sp, 248 pcaddu18i $ra, %call36(_ve_envelope_init) jirl $ra, $ra, 0 addi.d $a1, $s1, 144 - addi.d $a0, $sp, 240 + addi.d $a0, $sp, 248 pcaddu18i $ra, %call36(vorbis_bitrate_init) jirl $ra, $ra, 0 .LBB16_99: # %vorbis_analysis_init.exit - addi.d $a0, $sp, 296 + addi.d $a0, $sp, 304 ori $a2, $zero, 192 move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.w $a0, $sp, 488 - st.d $s0, $sp, 400 + ld.w $a0, $sp, 496 + st.d $s0, $sp, 408 beqz $a0, .LBB16_101 # %bb.100: ori $a0, $zero, 1 @@ -3909,81 +3901,81 @@ oe_encode: # @oe_encode pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 move $s0, $a0 - st.d $a0, $sp, 480 + st.d $a0, $sp, 488 vld $vr0, $sp, 128 # 16-byte Folded Reload - vst $vr0, $sp, 304 + vst $vr0, $sp, 312 ori $a0, $zero, 256 ori $s1, $zero, 256 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - st.d $a0, $sp, 320 st.d $a0, $sp, 328 + st.d $a0, $sp, 336 st.b $zero, $a0, 0 - st.d $s1, $sp, 336 + st.d $s1, $sp, 344 lu12i.w $a0, -237117 ori $a0, $a0, 3072 lu32i.d $a0, 0 st.w $a0, $s0, 8 .LBB16_101: # %vorbis_block_init.exit ld.w $s0, $fp, 8 - addi.d $a0, $sp, 728 + addi.d $a0, $sp, 736 ori $a2, $zero, 392 move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $a0, 4 - st.d $a0, $sp, 720 + st.d $a0, $sp, 728 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - st.d $a0, $sp, 712 + st.d $a0, $sp, 720 ori $a0, $zero, 1024 - st.d $a0, $sp, 760 + st.d $a0, $sp, 768 lu12i.w $a0, 1 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - st.d $a0, $sp, 744 + st.d $a0, $sp, 752 lu12i.w $a0, 2 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 ld.d $a1, $fp, 0 - st.d $a0, $sp, 752 - st.d $s0, $sp, 1088 - addi.d $a0, $sp, 488 - addi.d $a2, $sp, 632 - addi.d $a3, $sp, 192 - addi.d $a4, $sp, 144 + st.d $a0, $sp, 760 + st.d $s0, $sp, 1096 + addi.d $a0, $sp, 496 + addi.d $a2, $sp, 640 + addi.d $a3, $sp, 200 + addi.d $a4, $sp, 152 pcaddu18i $ra, %call36(vorbis_analysis_headerout) jirl $ra, $ra, 0 - addi.d $a0, $sp, 712 - addi.d $a1, $sp, 632 + addi.d $a0, $sp, 720 + addi.d $a1, $sp, 640 pcaddu18i $ra, %call36(ogg_stream_packetin) jirl $ra, $ra, 0 - addi.d $a0, $sp, 712 - addi.d $a1, $sp, 192 + addi.d $a0, $sp, 720 + addi.d $a1, $sp, 200 pcaddu18i $ra, %call36(ogg_stream_packetin) jirl $ra, $ra, 0 - addi.d $a0, $sp, 712 - addi.d $a1, $sp, 144 + addi.d $a0, $sp, 720 + addi.d $a1, $sp, 152 pcaddu18i $ra, %call36(ogg_stream_packetin) jirl $ra, $ra, 0 .p2align 4, , 16 .LBB16_102: # =>This Inner Loop Header: Depth=1 - addi.d $a0, $sp, 712 - addi.d $a1, $sp, 680 + addi.d $a0, $sp, 720 + addi.d $a1, $sp, 688 pcaddu18i $ra, %call36(ogg_stream_flush) jirl $ra, $ra, 0 beqz $a0, .LBB16_105 # %bb.103: # in Loop: Header=BB16_102 Depth=1 ld.d $s1, $fp, 144 - ld.d $a0, $sp, 680 - ld.d $s2, $sp, 688 + ld.d $a0, $sp, 688 + ld.d $s2, $sp, 696 ori $a1, $zero, 1 move $a2, $s2 move $a3, $s1 pcaddu18i $ra, %call36(fwrite) jirl $ra, $ra, 0 - ld.d $a2, $sp, 696 - ld.d $s3, $sp, 704 + ld.d $a2, $sp, 704 + ld.d $s3, $sp, 712 move $s4, $a0 ori $a1, $zero, 1 move $a0, $a2 @@ -4015,11 +4007,11 @@ oe_encode: # @oe_encode move $a0, $zero .LBB16_107: # in Loop: Header=BB16_108 Depth=1 addi.w $a1, $a0, 0 - addi.d $a0, $sp, 488 + addi.d $a0, $sp, 496 pcaddu18i $ra, %call36(vorbis_analysis_wrote) jirl $ra, $ra, 0 - addi.d $a0, $sp, 488 - addi.d $a1, $sp, 296 + addi.d $a0, $sp, 496 + addi.d $a1, $sp, 304 pcaddu18i $ra, %call36(vorbis_analysis_blockout) jirl $ra, $ra, 0 bnez $a0, .LBB16_111 @@ -4029,7 +4021,7 @@ oe_encode: # @oe_encode # Child Loop BB16_116 Depth 3 # Child Loop BB16_120 Depth 4 # Child Loop BB16_124 Depth 4 - addi.d $a0, $sp, 488 + addi.d $a0, $sp, 496 ori $a1, $zero, 1024 pcaddu18i $ra, %call36(vorbis_analysis_buffer) jirl $ra, $ra, 0 @@ -4070,8 +4062,8 @@ oe_encode: # @oe_encode .p2align 4, , 16 .LBB16_112: # %vorbis_bitrate_flushpacket.exit # in Loop: Header=BB16_113 Depth=2 - addi.d $a0, $sp, 488 - addi.d $a1, $sp, 296 + addi.d $a0, $sp, 496 + addi.d $a1, $sp, 304 pcaddu18i $ra, %call36(vorbis_analysis_blockout) jirl $ra, $ra, 0 beqz $a0, .LBB16_137 @@ -4081,21 +4073,21 @@ oe_encode: # @oe_encode # Child Loop BB16_116 Depth 3 # Child Loop BB16_120 Depth 4 # Child Loop BB16_124 Depth 4 - ld.d $a0, $sp, 320 + ld.d $a0, $sp, 328 vld $vr0, $sp, 128 # 16-byte Folded Reload - addi.d $a1, $sp, 448 + addi.d $a1, $sp, 456 vst $vr0, $a1, 16 vst $vr0, $a1, 0 - st.d $a0, $sp, 328 + st.d $a0, $sp, 336 st.b $zero, $a0, 0 ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(_mapping_P) ld.d $a1, $a0, 24 - st.d $zero, $sp, 304 - st.w $zero, $sp, 312 - addi.d $a0, $sp, 296 + st.d $zero, $sp, 312 + st.w $zero, $sp, 320 + addi.d $a0, $sp, 304 jirl $ra, $a1, 0 - addi.d $a0, $sp, 296 + addi.d $a0, $sp, 304 pcaddu18i $ra, %call36(vorbis_bitrate_addblock) jirl $ra, $ra, 0 move $a2, $s4 @@ -4103,14 +4095,14 @@ oe_encode: # @oe_encode .p2align 4, , 16 .LBB16_114: # %._crit_edge.i138 # in Loop: Header=BB16_116 Depth=3 - st.d $a5, $sp, 640 + st.d $a5, $sp, 648 addi.w $a2, $a2, 1 slt $a1, $a2, $a1 maskeqz $a1, $a2, $a1 st.w $a1, $a0, 260 .LBB16_115: # in Loop: Header=BB16_116 Depth=3 - addi.d $a0, $sp, 712 - addi.d $a1, $sp, 632 + addi.d $a0, $sp, 720 + addi.d $a1, $sp, 640 pcaddu18i $ra, %call36(ogg_stream_packetin) jirl $ra, $ra, 0 addi.d $s0, $s0, 1 @@ -4122,7 +4114,7 @@ oe_encode: # @oe_encode # => This Loop Header: Depth=3 # Child Loop BB16_120 Depth 4 # Child Loop BB16_124 Depth 4 - ld.d $a0, $sp, 624 + ld.d $a0, $sp, 632 ld.w $a1, $a0, 160 move $s4, $a2 beqz $a1, .LBB16_122 @@ -4148,13 +4140,13 @@ oe_encode: # @oe_encode vld $vr1, $t0, 16 vld $vr2, $t0, 32 ldx.wu $a5, $a4, $a6 - vst $vr0, $sp, 632 - vst $vr1, $sp, 648 - vst $vr2, $sp, 664 + vst $vr0, $sp, 640 + vst $vr1, $sp, 656 + vst $vr2, $sp, 672 beqz $a3, .LBB16_114 # %bb.119: # %.lr.ph.i136 # in Loop: Header=BB16_116 Depth=3 - ld.d $a6, $sp, 632 + ld.d $a6, $sp, 640 .p2align 4, , 16 .LBB16_120: # Parent Loop BB16_108 Depth=1 # Parent Loop BB16_113 Depth=2 @@ -4167,7 +4159,7 @@ oe_encode: # @oe_encode bnez $a3, .LBB16_120 # %bb.121: # %._crit_edge.i138.loopexit # in Loop: Header=BB16_116 Depth=3 - st.d $a6, $sp, 632 + st.d $a6, $sp, 640 b .LBB16_114 .p2align 4, , 16 .LBB16_122: # in Loop: Header=BB16_116 Depth=3 @@ -4178,9 +4170,9 @@ oe_encode: # @oe_encode vld $vr0, $a1, 0 vld $vr1, $a1, 16 vld $vr2, $a1, 32 - vst $vr0, $sp, 632 - vst $vr1, $sp, 648 - vst $vr2, $sp, 664 + vst $vr0, $sp, 640 + vst $vr1, $sp, 656 + vst $vr2, $sp, 672 st.w $zero, $a0, 164 b .LBB16_115 .p2align 4, , 16 @@ -4189,15 +4181,15 @@ oe_encode: # @oe_encode # Parent Loop BB16_113 Depth=2 # Parent Loop BB16_116 Depth=3 # => This Inner Loop Header: Depth=4 - ld.w $a1, $sp, 1080 - ld.d $a0, $sp, 768 + ld.w $a1, $sp, 1088 + ld.d $a0, $sp, 776 beqz $a1, .LBB16_126 # %bb.125: # %.lr.ph # in Loop: Header=BB16_124 Depth=4 bnez $a0, .LBB16_130 .LBB16_126: # in Loop: Header=BB16_124 Depth=4 - ld.d $a1, $sp, 728 - ld.d $a2, $sp, 736 + ld.d $a1, $sp, 736 + ld.d $a2, $sp, 744 sub.d $a1, $a1, $a2 lu12i.w $a2, 1 blt $a2, $a1, .LBB16_130 @@ -4205,29 +4197,29 @@ oe_encode: # @oe_encode ori $a1, $zero, 254 blt $a1, $a0, .LBB16_130 # %bb.128: # in Loop: Header=BB16_124 Depth=4 - ld.w $a1, $sp, 1084 + ld.w $a1, $sp, 1092 beqz $a0, .LBB16_136 # %bb.129: # in Loop: Header=BB16_124 Depth=4 bnez $a1, .LBB16_135 .LBB16_130: # %ogg_stream_pageout.exit # in Loop: Header=BB16_124 Depth=4 - addi.d $a0, $sp, 712 - addi.d $a1, $sp, 680 + addi.d $a0, $sp, 720 + addi.d $a1, $sp, 688 pcaddu18i $ra, %call36(ogg_stream_flush) jirl $ra, $ra, 0 beqz $a0, .LBB16_134 # %bb.131: # in Loop: Header=BB16_124 Depth=4 ld.d $s4, $fp, 144 - ld.d $s3, $sp, 680 - ld.d $s5, $sp, 688 + ld.d $s3, $sp, 688 + ld.d $s5, $sp, 696 ori $a1, $zero, 1 move $a0, $s3 move $a2, $s5 move $a3, $s4 pcaddu18i $ra, %call36(fwrite) jirl $ra, $ra, 0 - ld.d $a2, $sp, 696 - ld.d $s6, $sp, 704 + ld.d $a2, $sp, 704 + ld.d $s6, $sp, 712 move $s7, $a0 ori $a1, $zero, 1 move $a0, $a2 @@ -4270,36 +4262,36 @@ oe_encode: # @oe_encode .LBB16_139: # %.loopexit177 ori $s3, $zero, 1 .LBB16_140: # %.loopexit177 - ld.d $a0, $sp, 712 + ld.d $a0, $sp, 720 beqz $a0, .LBB16_142 # %bb.141: pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 .LBB16_142: - ld.d $a0, $sp, 744 + ld.d $a0, $sp, 752 beqz $a0, .LBB16_144 # %bb.143: pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 .LBB16_144: - ld.d $a0, $sp, 752 + ld.d $a0, $sp, 760 beqz $a0, .LBB16_146 # %bb.145: pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 .LBB16_146: # %ogg_stream_clear.exit - addi.d $a0, $sp, 712 + addi.d $a0, $sp, 720 ori $a2, $zero, 408 move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - addi.d $a0, $sp, 296 + addi.d $a0, $sp, 304 pcaddu18i $ra, %call36(vorbis_block_clear) jirl $ra, $ra, 0 - addi.d $a0, $sp, 488 + addi.d $a0, $sp, 496 pcaddu18i $ra, %call36(vorbis_dsp_clear) jirl $ra, $ra, 0 - addi.d $a0, $sp, 240 + addi.d $a0, $sp, 248 pcaddu18i $ra, %call36(vorbis_info_clear) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(timer_time.now.0) @@ -4318,23 +4310,24 @@ oe_encode: # @oe_encode jirl $ra, $a4, 0 .LBB16_147: move $a0, $s3 - fld.d $fs4, $sp, 1120 # 8-byte Folded Reload - fld.d $fs3, $sp, 1128 # 8-byte Folded Reload - fld.d $fs2, $sp, 1136 # 8-byte Folded Reload - fld.d $fs1, $sp, 1144 # 8-byte Folded Reload - fld.d $fs0, $sp, 1152 # 8-byte Folded Reload - ld.d $s8, $sp, 1160 # 8-byte Folded Reload - ld.d $s7, $sp, 1168 # 8-byte Folded Reload - ld.d $s6, $sp, 1176 # 8-byte Folded Reload - ld.d $s5, $sp, 1184 # 8-byte Folded Reload - ld.d $s4, $sp, 1192 # 8-byte Folded Reload - ld.d $s3, $sp, 1200 # 8-byte Folded Reload - ld.d $s2, $sp, 1208 # 8-byte Folded Reload - ld.d $s1, $sp, 1216 # 8-byte Folded Reload - ld.d $s0, $sp, 1224 # 8-byte Folded Reload - ld.d $fp, $sp, 1232 # 8-byte Folded Reload - ld.d $ra, $sp, 1240 # 8-byte Folded Reload - addi.d $sp, $sp, 1248 + fld.d $fs5, $sp, 1128 # 8-byte Folded Reload + fld.d $fs4, $sp, 1136 # 8-byte Folded Reload + fld.d $fs3, $sp, 1144 # 8-byte Folded Reload + fld.d $fs2, $sp, 1152 # 8-byte Folded Reload + fld.d $fs1, $sp, 1160 # 8-byte Folded Reload + fld.d $fs0, $sp, 1168 # 8-byte Folded Reload + ld.d $s8, $sp, 1176 # 8-byte Folded Reload + ld.d $s7, $sp, 1184 # 8-byte Folded Reload + ld.d $s6, $sp, 1192 # 8-byte Folded Reload + ld.d $s5, $sp, 1200 # 8-byte Folded Reload + ld.d $s4, $sp, 1208 # 8-byte Folded Reload + ld.d $s3, $sp, 1216 # 8-byte Folded Reload + ld.d $s2, $sp, 1224 # 8-byte Folded Reload + ld.d $s1, $sp, 1232 # 8-byte Folded Reload + ld.d $s0, $sp, 1240 # 8-byte Folded Reload + ld.d $fp, $sp, 1248 # 8-byte Folded Reload + ld.d $ra, $sp, 1256 # 8-byte Folded Reload + addi.d $sp, $sp, 1264 ret .LBB16_148: pcalau12i $a0, %got_pc_hi20(stderr) @@ -4355,7 +4348,7 @@ oe_encode: # @oe_encode ori $s3, $zero, 1 pcaddu18i $ra, %call36(fwrite) jirl $ra, $ra, 0 - addi.d $a0, $sp, 240 + addi.d $a0, $sp, 248 pcaddu18i $ra, %call36(vorbis_info_clear) jirl $ra, $ra, 0 b .LBB16_147 @@ -4815,13 +4808,9 @@ aiff_id: # @aiff_id .Lfunc_end23: .size aiff_id, .Lfunc_end23-aiff_id # -- End function - .section .rodata.cst8,"aM",@progbits,8 + .section .rodata.cst16,"aM",@progbits,16 .p2align 3, 0x0 # -- Begin function aiff_open .LCPI24_0: - .dword 0x7ff0000000000000 # double +Inf - .section .rodata.cst16,"aM",@progbits,16 - .p2align 3, 0x0 -.LCPI24_1: .dword 0xfff0000000000000 # double -Inf .dword 0x7ff0000000000000 # double +Inf .text @@ -4894,8 +4883,8 @@ aiff_open: # @aiff_open # %bb.5: sltui $a0, $a0, 1 slli.d $a0, $a0, 3 - pcalau12i $a1, %pc_hi20(.LCPI24_1) - addi.d $a1, $a1, %pc_lo12(.LCPI24_1) + pcalau12i $a1, %pc_hi20(.LCPI24_0) + addi.d $a1, $a1, %pc_lo12(.LCPI24_0) fldx.d $fs0, $a1, $a0 ori $a0, $zero, 1 ori $a1, $zero, 67 @@ -4934,8 +4923,8 @@ aiff_open: # @aiff_open beq $s7, $a1, .LBB24_8 b .LBB24_12 .LBB24_7: - pcalau12i $a0, %pc_hi20(.LCPI24_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI24_0) + lu52i.d $a0, $zero, 2047 + movgr2fr.d $fs0, $a0 ori $a0, $zero, 1 ori $a1, $zero, 67 st.h $a0, $s2, 32 @@ -5148,13 +5137,9 @@ aiff_open: # @aiff_open .Lfunc_end24: .size aiff_open, .Lfunc_end24-aiff_open # -- End function - .section .rodata.cst8,"aM",@progbits,8 + .section .rodata.cst16,"aM",@progbits,16 .p2align 3, 0x0 # -- Begin function read_IEEE80 .LCPI25_0: - .dword 0x7ff0000000000000 # double +Inf - .section .rodata.cst16,"aM",@progbits,16 - .p2align 3, 0x0 -.LCPI25_1: .dword 0xfff0000000000000 # double -Inf .dword 0x7ff0000000000000 # double +Inf .text @@ -5180,8 +5165,8 @@ read_IEEE80: # @read_IEEE80 # %bb.2: sltui $a0, $a1, 1 slli.d $a0, $a0, 3 - pcalau12i $a1, %pc_hi20(.LCPI25_1) - addi.d $a1, $a1, %pc_lo12(.LCPI25_1) + pcalau12i $a1, %pc_hi20(.LCPI25_0) + addi.d $a1, $a1, %pc_lo12(.LCPI25_0) fldx.d $fa0, $a1, $a0 b .LBB25_5 .LBB25_3: @@ -5215,8 +5200,8 @@ read_IEEE80: # @read_IEEE80 pcaddu18i $t8, %call36(ldexp) jr $t8 .LBB25_4: - pcalau12i $a0, %pc_hi20(.LCPI25_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI25_0) + lu52i.d $a0, $zero, 2047 + movgr2fr.d $fa0, $a0 .LBB25_5: ld.d $s0, $sp, 8 # 8-byte Folded Reload ld.d $fp, $sp, 16 # 8-byte Folded Reload @@ -5342,16 +5327,7 @@ find_aiff_chunk: # @find_aiff_chunk .Lfunc_end26: .size find_aiff_chunk, .Lfunc_end26-find_aiff_chunk # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function wav_read -.LCPI27_0: - .word 0x34000000 # float 1.1920929E-7 -.LCPI27_1: - .word 0x38000000 # float 3.05175781E-5 -.LCPI27_2: - .word 0x3c000000 # float 0.0078125 - .text - .globl wav_read + .globl wav_read # -- Begin function wav_read .p2align 5 .type wav_read,@function wav_read: # @wav_read @@ -5412,9 +5388,9 @@ wav_read: # @wav_read # %bb.6: # %.preheader123 blez $a0, .LBB27_35 # %bb.7: # %.preheader.preheader - pcalau12i $a2, %pc_hi20(.LCPI27_2) - fld.s $fa0, $a2, %pc_lo12(.LCPI27_2) move $a2, $zero + lu12i.w $a3, 245760 + movgr2fr.w $fa0, $a3 b .LBB27_9 .p2align 4, , 16 .LBB27_8: # %._crit_edge145 @@ -5457,9 +5433,9 @@ wav_read: # @wav_read # %bb.13: # %.preheader131 blez $a0, .LBB27_35 # %bb.14: # %.preheader130.preheader - pcalau12i $a2, %pc_hi20(.LCPI27_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI27_0) move $a2, $zero + lu12i.w $a3, 212992 + movgr2fr.w $fa0, $a3 b .LBB27_16 .p2align 4, , 16 .LBB27_15: # %._crit_edge @@ -5511,10 +5487,10 @@ wav_read: # @wav_read # %bb.20: # %.preheader128 blez $a0, .LBB27_35 # %bb.21: # %.preheader127.preheader - pcalau12i $a2, %pc_hi20(.LCPI27_1) - fld.s $fa0, $a2, %pc_lo12(.LCPI27_1) move $a2, $zero move $a3, $zero + lu12i.w $a4, 229376 + movgr2fr.w $fa0, $a4 b .LBB27_23 .p2align 4, , 16 .LBB27_22: # %._crit_edge137 @@ -5561,10 +5537,10 @@ wav_read: # @wav_read .LBB27_26: # %.preheader125 blez $a0, .LBB27_35 # %bb.27: # %.preheader124.preheader - pcalau12i $a2, %pc_hi20(.LCPI27_1) - fld.s $fa0, $a2, %pc_lo12(.LCPI27_1) move $a2, $zero move $a3, $zero + lu12i.w $a4, 229376 + movgr2fr.w $fa0, $a4 b .LBB27_29 .p2align 4, , 16 .LBB27_28: # %._crit_edge141 @@ -5847,16 +5823,7 @@ wav_ieee_read: # @wav_ieee_read .Lfunc_end29: .size wav_ieee_read, .Lfunc_end29-wav_ieee_read # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function res_init -.LCPI30_0: - .dword 0x3fe999999999999a # double 0.80000000000000004 -.LCPI30_1: - .dword 0x400921fb54442d18 # double 3.1415926535897931 -.LCPI30_2: - .dword 0x3b92e3b40a0e9b4f # double 9.9999999999999991E-22 - .text - .globl res_init + .globl res_init # -- Begin function res_init .p2align 5 .type res_init,@function res_init: # @res_init @@ -5891,16 +5858,19 @@ res_init: # @res_init # %bb.3: blez $a3, .LBB30_50 # %bb.4: - pcalau12i $a6, %pc_hi20(.LCPI30_0) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a6, $a1, 1022 beqz $a4, .LBB30_14 # %bb.5: addi.d $a1, $sp, 184 st.d $a1, $sp, 48 ori $s3, $zero, 45 addi.d $a5, $sp, 192 - fld.d $fs0, $a6, %pc_lo12(.LCPI30_0) vldi $vr1, -912 vldi $vr0, -976 + movgr2fr.d $fs0, $a6 ori $a6, $zero, 3 pcalau12i $a7, %pc_hi20(.LJTI30_0) addi.d $a7, $a7, %pc_lo12(.LJTI30_0) @@ -5942,10 +5912,10 @@ res_init: # @res_init fld.d $fs0, $a5, -8 b .LBB30_7 .LBB30_14: - fld.d $fs0, $a6, %pc_lo12(.LCPI30_0) ori $s3, $zero, 45 vldi $vr1, -912 vldi $vr0, -976 + movgr2fr.d $fs0, $a6 .LBB30_15: or $a1, $a3, $a2 andi $a1, $a1, 1 @@ -6034,11 +6004,14 @@ res_init: # @res_init fdiv.d $fs2, $fs0, $fs1 alsl.d $s2, $s1, $s2, 2 bstrpick.d $a0, $s5, 31, 31 - pcalau12i $a1, %pc_hi20(.LCPI30_1) - fld.d $fs3, $a1, %pc_lo12(.LCPI30_1) add.w $a0, $s5, $a0 srai.d $a0, $a0, 1 sub.d $s7, $zero, $a0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fs3, $a0 vld $vr1, $sp, 32 # 16-byte Folded Reload b .LBB30_27 .p2align 4, , 16 @@ -6123,10 +6096,13 @@ res_init: # @res_init addi.w $s0, $s1, -1 .LBB30_38: alsl.d $s1, $s1, $a0, 2 - pcalau12i $a0, %pc_hi20(.LCPI30_2) - fld.d $fs0, $a0, %pc_lo12(.LCPI30_2) ori $a0, $zero, 2 vldi $vr0, -912 + lu12i.w $a1, 41193 + ori $a1, $a1, 2895 + lu32i.d $a1, 189364 + lu52i.d $a1, $a1, 953 + movgr2fr.d $fs0, $a1 vldi $vr5, -912 vld $vr4, $sp, 16 # 16-byte Folded Reload .p2align 4, , 16 @@ -6591,16 +6567,7 @@ vorbis_info_init: # @vorbis_info_init .Lfunc_end36: .size vorbis_info_init, .Lfunc_end36-vorbis_info_init # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function vorbis_encode_setup_vbr -.LCPI37_0: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI37_1: - .dword 0x3feffffff0000000 # double 0.99999997019767761 -.LCPI37_2: - .dword 0x3fefff2e40000000 # double 0.99989998340606689 - .text - .globl vorbis_encode_setup_vbr + .globl vorbis_encode_setup_vbr # -- Begin function vorbis_encode_setup_vbr .p2align 5 .type vorbis_encode_setup_vbr,@function vorbis_encode_setup_vbr: # @vorbis_encode_setup_vbr @@ -6615,18 +6582,24 @@ vorbis_encode_setup_vbr: # @vorbis_encode_setup_vbr move $s1, $a1 move $s0, $a0 ld.d $s2, $a0, 48 - pcalau12i $a0, %pc_hi20(.LCPI37_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI37_0) fcvt.d.s $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI37_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI37_1) + lu12i.w $a0, -487882 + ori $a0, $a0, 2289 + lu32i.d $a0, 325813 + lu52i.d $a0, $a0, 1006 + movgr2fr.d $fa1, $a0 fadd.d $fa0, $fa0, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI37_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI37_2) + fcvt.s.d $fa1, $fa0 + fcvt.d.s $fa1, $fa1 + lu12i.w $a0, -65536 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 fcmp.cult.d $fcc0, $fa0, $fa2 - fcvt.s.d $fa0, $fa0 - fcvt.d.s $fa0, $fa0 - fsel $fa0, $fa1, $fa0, $fcc0 + lu12i.w $a0, 262144 + lu32i.d $a0, -210 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa0, $a0 + fsel $fa0, $fa0, $fa1, $fcc0 move $a0, $s2 move $a3, $zero pcaddu18i $ra, %call36(get_setup_template) @@ -6976,12 +6949,7 @@ vorbis_info_clear: # @vorbis_info_clear .Lfunc_end38: .size vorbis_info_clear, .Lfunc_end38-vorbis_info_clear # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function vorbis_encode_ctl -.LCPI39_0: - .dword 0x4058c00000000000 # double 99 - .text - .globl vorbis_encode_ctl + .globl vorbis_encode_ctl # -- Begin function vorbis_encode_ctl .p2align 5 .type vorbis_encode_ctl,@function vorbis_encode_ctl: # @vorbis_encode_ctl @@ -7035,13 +7003,15 @@ vorbis_encode_ctl: # @vorbis_encode_ctl move $a0, $zero ret .LBB39_7: - move $a3, $zero fld.d $fa0, $a2, 0 - pcalau12i $a0, %pc_hi20(.LCPI39_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI39_0) - vldi $vr2, -1024 - fcmp.clt.d $fcc0, $fa0, $fa2 - fsel $fa0, $fa0, $fa2, $fcc0 + move $a3, $zero + vldi $vr1, -1024 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 + ori $a0, $zero, 0 + lu32i.d $a0, -475136 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa1, $fa0 fsel $fa0, $fa0, $fa1, $fcc0 lu12i.w $a0, 1 @@ -7595,21 +7565,9 @@ vorbis_encode_setup_managed: # @vorbis_encode_setup_managed .Lfunc_end40: .size vorbis_encode_setup_managed, .Lfunc_end40-vorbis_encode_setup_managed # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function vorbis_encode_setup_init -.LCPI41_0: - .dword 0xc054000000000000 # double -80 -.LCPI41_1: - .dword 0xc069000000000000 # double -200 -.LCPI41_2: - .dword 0xc0f869f000000000 # double -99999 -.LCPI41_3: - .dword 0x408f400000000000 # double 1000 -.LCPI41_4: - .dword 0x3feccccccccccccd # double 0.90000000000000002 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI41_5: + .p2align 4, 0x0 # -- Begin function vorbis_encode_setup_init +.LCPI41_0: .dword 0xc12e847e00000000 # double -999999 .dword 0x412e847e00000000 # double 999999 .text @@ -7618,20 +7576,20 @@ vorbis_encode_setup_managed: # @vorbis_encode_setup_managed .type vorbis_encode_setup_init,@function vorbis_encode_setup_init: # @vorbis_encode_setup_init # %bb.0: - addi.d $sp, $sp, -224 - st.d $ra, $sp, 216 # 8-byte Folded Spill - st.d $fp, $sp, 208 # 8-byte Folded Spill - st.d $s0, $sp, 200 # 8-byte Folded Spill - st.d $s1, $sp, 192 # 8-byte Folded Spill - st.d $s2, $sp, 184 # 8-byte Folded Spill - st.d $s3, $sp, 176 # 8-byte Folded Spill - st.d $s4, $sp, 168 # 8-byte Folded Spill - st.d $s5, $sp, 160 # 8-byte Folded Spill - st.d $s6, $sp, 152 # 8-byte Folded Spill - st.d $s7, $sp, 144 # 8-byte Folded Spill - st.d $s8, $sp, 136 # 8-byte Folded Spill - fst.d $fs0, $sp, 128 # 8-byte Folded Spill - fst.d $fs1, $sp, 120 # 8-byte Folded Spill + addi.d $sp, $sp, -208 + st.d $ra, $sp, 200 # 8-byte Folded Spill + st.d $fp, $sp, 192 # 8-byte Folded Spill + st.d $s0, $sp, 184 # 8-byte Folded Spill + st.d $s1, $sp, 176 # 8-byte Folded Spill + st.d $s2, $sp, 168 # 8-byte Folded Spill + st.d $s3, $sp, 160 # 8-byte Folded Spill + st.d $s4, $sp, 152 # 8-byte Folded Spill + st.d $s5, $sp, 144 # 8-byte Folded Spill + st.d $s6, $sp, 136 # 8-byte Folded Spill + st.d $s7, $sp, 128 # 8-byte Folded Spill + st.d $s8, $sp, 120 # 8-byte Folded Spill + fst.d $fs0, $sp, 112 # 8-byte Folded Spill + fst.d $fs1, $sp, 104 # 8-byte Folded Spill move $fp, $a0 ld.d $s1, $a0, 48 addi.w $a0, $zero, -131 @@ -7639,43 +7597,49 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init # %bb.1: lu12i.w $s7, 1 ori $a2, $s7, 1552 - fldx.d $fa1, $s1, $a2 - pcalau12i $a1, %pc_hi20(.LCPI41_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI41_0) - fcmp.clt.d $fcc0, $fa0, $fa1 + fldx.d $fa0, $s1, $a2 + ori $a1, $zero, 0 + lu32i.d $a1, 262144 + lu52i.d $a1, $a1, -1019 + movgr2fr.d $fa1, $a1 + fcmp.clt.d $fcc0, $fa1, $fa0 + ori $a1, $zero, 0 bcnez $fcc0, .LBB41_3 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI41_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI41_1) - fcmp.cule.d $fcc0, $fa0, $fa1 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, -1018 + movgr2fr.d $fa1, $a1 + fcmp.cule.d $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB41_4 .LBB41_3: # %.sink.split - fstx.d $fa0, $s1, $a2 + fstx.d $fa1, $s1, $a2 .LBB41_4: ori $s0, $s7, 1568 - fldx.d $fa1, $s1, $s0 - movgr2fr.d $fa0, $zero - fcmp.clt.d $fcc0, $fa0, $fa1 + fldx.d $fa0, $s1, $s0 + movgr2fr.d $fa1, $zero + fcmp.clt.d $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB41_6 # %bb.5: - pcalau12i $a1, %pc_hi20(.LCPI41_2) - fld.d $fa0, $a1, %pc_lo12(.LCPI41_2) - fcmp.cule.d $fcc0, $fa0, $fa1 + ori $a1, $zero, 0 + lu32i.d $a1, -497168 + lu52i.d $a1, $a1, -1009 + movgr2fr.d $fa1, $a1 + fcmp.cule.d $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB41_7 .LBB41_6: # %.sink.split317 - fstx.d $fa0, $s1, $s0 + fstx.d $fa1, $s1, $s0 .LBB41_7: - ldptr.d $s6, $s1, 5512 - beqz $s6, .LBB41_190 + ldptr.d $s5, $s1, 5512 + beqz $s5, .LBB41_190 # %bb.8: - st.d $a2, $sp, 80 # 8-byte Folded Spill + st.d $a2, $sp, 64 # 8-byte Folded Spill ori $a0, $zero, 1 stptr.w $a0, $s1, 5520 ori $a0, $s7, 1432 - st.d $a0, $sp, 16 # 8-byte Folded Spill + st.d $a0, $sp, 8 # 8-byte Folded Spill fldx.d $fa0, $s1, $a0 - ld.d $a0, $s6, 48 - ld.d $a1, $s6, 56 + ld.d $a0, $s5, 48 + ld.d $a1, $s5, 56 ld.d $a2, $fp, 48 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a3, $fa0 @@ -7683,30 +7647,30 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ldx.w $a0, $a0, $a3 ldx.w $a1, $a1, $a3 ldptr.w $a3, $s1, 5624 - st.d $a3, $sp, 96 # 8-byte Folded Spill + st.d $a3, $sp, 88 # 8-byte Folded Spill st.d $a0, $a2, 0 st.d $a1, $a2, 8 ld.d $s2, $s1, 0 ld.d $s3, $s1, 8 ori $a0, $s7, 1448 - st.d $a0, $sp, 112 # 8-byte Folded Spill + st.d $a0, $sp, 96 # 8-byte Folded Spill fldx.d $fa0, $s1, $a0 - ld.d $a2, $s6, 272 - ld.d $a3, $s6, 280 - ld.d $a4, $s6, 288 + ld.d $a2, $s5, 272 + ld.d $a3, $s5, 280 + ld.d $a4, $s5, 288 ld.d $a0, $fp, 48 move $a1, $zero pcaddu18i $ra, %call36(vorbis_encode_floor_setup) jirl $ra, $ra, 0 - st.d $s2, $sp, 72 # 8-byte Folded Spill - st.d $s3, $sp, 88 # 8-byte Folded Spill + st.d $s2, $sp, 80 # 8-byte Folded Spill + st.d $s3, $sp, 72 # 8-byte Folded Spill beq $s2, $s3, .LBB41_10 # %bb.9: ori $a0, $s7, 1440 fldx.d $fa0, $s1, $a0 - ld.d $a2, $s6, 272 - ld.d $a3, $s6, 280 - ld.d $a4, $s6, 296 + ld.d $a2, $s5, 272 + ld.d $a3, $s5, 280 + ld.d $a4, $s5, 296 ld.d $a0, $fp, 48 ori $a1, $zero, 1 pcaddu18i $ra, %call36(vorbis_encode_floor_setup) @@ -7714,29 +7678,29 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .LBB41_10: ori $a0, $s7, 1576 fldx.d $fa0, $s1, $a0 - ld.d $s3, $s6, 248 - ld.d $s4, $s6, 256 + ld.d $s3, $s5, 248 + ld.d $s4, $s5, 256 ld.d $s2, $fp, 48 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a1, $fa1 movgr2fr.w $fa1, $a1 ffint.d.w $fa1, $fa1 fsub.d $fs0, $fa0, $fa1 - slli.d $s5, $a1, 3 - fldx.d $fa0, $s4, $s5 + slli.d $s6, $a1, 3 + fldx.d $fa0, $s4, $s6 ori $a0, $s7, 848 add.d $a0, $s2, $a0 - alsl.d $s7, $a1, $s4, 3 + alsl.d $s8, $a1, $s4, 3 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 - ori $s8, $zero, 492 - mul.d $a1, $a1, $s8 + ori $s7, $zero, 492 + mul.d $a1, $a1, $s7 add.d $a1, $s3, $a1 ori $a2, $zero, 492 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 - fldx.d $fa0, $s4, $s5 - fld.d $fa1, $s7, 8 + fldx.d $fa0, $s4, $s6 + fld.d $fa1, $s8, 8 vldi $vr2, -912 fsub.d $fa3, $fa2, $fs0 fmul.d $fa0, $fa3, $fa0 @@ -7754,8 +7718,8 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init and $a1, $a1, $a2 sub.w $a0, $a0, $a1 movgr2cf $fcc0, $a1 - mul.d $a0, $a0, $s8 - lu12i.w $s8, 1 + mul.d $a0, $a0, $s7 + lu12i.w $a4, 1 add.d $a0, $s3, $a0 fld.s $fa3, $a0, 4 fsel $fa0, $fa0, $fa2, $fcc0 @@ -7767,7 +7731,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fmul.d $fa3, $fa0, $fa3 fadd.d $fa2, $fa3, $fa2 fcvt.s.d $fa2, $fa2 - ori $a1, $s8, 852 + ori $a1, $a4, 852 fstx.s $fa2, $s2, $a1 fld.s $fa2, $a0, 32 fld.s $fa3, $a0, 524 @@ -7777,7 +7741,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fmul.d $fa3, $fa0, $fa3 fadd.d $fa2, $fa3, $fa2 fcvt.s.d $fa2, $fa2 - ori $a1, $s8, 880 + ori $a1, $a4, 880 fstx.s $fa2, $s2, $a1 fld.s $fa2, $a0, 8 fld.s $fa3, $a0, 500 @@ -7787,7 +7751,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fmul.d $fa3, $fa0, $fa3 fadd.d $fa2, $fa2, $fa3 fcvt.s.d $fa2, $fa2 - ori $a1, $s8, 856 + ori $a1, $a4, 856 fstx.s $fa2, $s2, $a1 fld.s $fa2, $a0, 36 fld.s $fa3, $a0, 528 @@ -7797,7 +7761,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fmul.d $fa3, $fa0, $fa3 fadd.d $fa2, $fa2, $fa3 fcvt.s.d $fa2, $fa2 - ori $a1, $s8, 884 + ori $a1, $a4, 884 fstx.s $fa2, $s2, $a1 fld.s $fa2, $a0, 12 fld.s $fa3, $a0, 504 @@ -7807,7 +7771,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fmul.d $fa3, $fa0, $fa3 fadd.d $fa2, $fa2, $fa3 fcvt.s.d $fa2, $fa2 - ori $a1, $s8, 860 + ori $a1, $a4, 860 fstx.s $fa2, $s2, $a1 fld.s $fa2, $a0, 40 fld.s $fa3, $a0, 532 @@ -7817,7 +7781,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fmul.d $fa3, $fa0, $fa3 fadd.d $fa2, $fa2, $fa3 fcvt.s.d $fa2, $fa2 - ori $a1, $s8, 888 + ori $a1, $a4, 888 fstx.s $fa2, $s2, $a1 fld.s $fa2, $a0, 16 fld.s $fa3, $a0, 508 @@ -7827,7 +7791,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fmul.d $fa3, $fa0, $fa3 fadd.d $fa2, $fa2, $fa3 fcvt.s.d $fa2, $fa2 - ori $a1, $s8, 864 + ori $a1, $a4, 864 fstx.s $fa2, $s2, $a1 fld.s $fa2, $a0, 44 fld.s $fa3, $a0, 536 @@ -7838,19 +7802,17 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fadd.d $fa0, $fa1, $fa0 fldx.d $fa1, $s2, $s0 fcvt.s.d $fa0, $fa0 - ori $a0, $s8, 892 + ori $a0, $a4, 892 fstx.s $fa0, $s2, $a0 fcvt.s.d $fa0, $fa1 - ori $a0, $s8, 916 + ori $a0, $a4, 916 fstx.s $fa0, $s2, $a0 - ld.d $a1, $s6, 264 + ld.d $a1, $s5, 264 ld.d $a0, $fp, 48 - pcalau12i $a2, %pc_hi20(.LCPI41_3) - st.d $a2, $sp, 104 # 8-byte Folded Spill move $a7, $fp beqz $a1, .LBB41_14 # %bb.11: - ori $a2, $s8, 1536 + ori $a2, $a4, 1536 fldx.d $fa0, $s1, $a2 fcvt.s.d $fa0, $fa0 ftintrz.w.s $fa1, $fa0 @@ -7859,26 +7821,26 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init mul.d $a3, $a2, $a3 vldx $vr1, $a1, $a3 add.d $a1, $a1, $a3 - ori $a3, $s8, 1100 + ori $a3, $a4, 1100 vstx $vr1, $a0, $a3 vld $vr1, $a1, 32 - ori $a3, $s8, 1132 + ori $a3, $a4, 1132 vstx $vr1, $a0, $a3 vld $vr1, $a1, 16 - ori $a3, $s8, 1116 + ori $a3, $a4, 1116 vstx $vr1, $a0, $a3 ld.w $a3, $a1, 56 stptr.w $a3, $a0, 5252 ld.d $a3, $a1, 48 stptr.d $a3, $a0, 5244 vld $vr1, $a1, 100 - ori $a3, $s8, 1200 + ori $a3, $a4, 1200 vstx $vr1, $a0, $a3 vld $vr1, $a1, 84 - ori $a3, $s8, 1184 + ori $a3, $a4, 1184 vstx $vr1, $a0, $a3 vld $vr1, $a1, 68 - ori $a3, $s8, 1168 + ori $a3, $a4, 1168 vstx $vr1, $a0, $a3 ld.w $a3, $a1, 116 stptr.w $a3, $a0, 5312 @@ -7889,28 +7851,30 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ffint.s.w $fa1, $fa1 fsub.s $fa0, $fa0, $fa1 fcvt.d.s $fa0, $fa0 - ld.d $s7, $sp, 88 # 8-byte Folded Reload - ld.d $fp, $sp, 112 # 8-byte Folded Reload + ld.d $s6, $sp, 80 # 8-byte Folded Reload + ld.d $fp, $sp, 96 # 8-byte Folded Reload beqz $a3, .LBB41_15 # %bb.12: # %.preheader104.i ld.d $a3, $a0, 0 move $a2, $zero vldi $vr1, -912 - fsub.d $fa1, $fa1, $fa0 movgr2fr.d $fa2, $a3 - ffint.d.l $fa2, $fa2 - ori $a3, $s8, 920 + ori $a3, $a4, 920 + add.d $a3, $a0, $a3 ld.d $a4, $a0, 8 - add.d $a0, $a0, $a3 - addi.d $a1, $a1, 420 - ld.d $a3, $sp, 104 # 8-byte Folded Reload - fld.d $fa3, $a3, %pc_lo12(.LCPI41_3) - movgr2fr.d $fa4, $a4 - ffint.d.l $fa4, $fa4 - ori $a3, $zero, 60 + fsub.d $fa1, $fa1, $fa0 + ffint.d.l $fa2, $fa2 + addi.d $a0, $a1, 420 + movgr2fr.d $fa3, $a4 + ffint.d.l $fa3, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa4, $a1 + ori $a1, $zero, 60 .p2align 4, , 16 .LBB41_13: # =>This Inner Loop Header: Depth=1 - add.d $a4, $a1, $a2 + add.d $a4, $a0, $a2 fld.s $fa5, $a4, -300 fld.s $fa6, $a4, -60 fcvt.d.s $fa5, $fa5 @@ -7921,28 +7885,28 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ld.d $a5, $a7, 8 fcvt.s.d $fa5, $fa5 fcvt.d.s $fa6, $fa5 - fmul.d $fa6, $fa6, $fa3 + fmul.d $fa6, $fa6, $fa4 movgr2fr.d $fa7, $a5 ffint.d.l $fa7, $fa7 fdiv.d $fa7, $fa6, $fa7 fmul.d $fa7, $fa7, $fa2 ftintrz.w.d $fa7, $fa7 movfr2gr.s $a5, $fa7 - add.d $a6, $a0, $a2 + add.d $a6, $a3, $a2 st.w $a5, $a6, 60 ld.d $a5, $a7, 8 movgr2fr.d $fa7, $a5 ffint.d.l $fa7, $fa7 fdiv.d $fa6, $fa6, $fa7 - fmul.d $fa6, $fa6, $fa4 + fmul.d $fa6, $fa6, $fa3 ftintrz.w.d $fa6, $fa6 movfr2gr.s $a5, $fa6 st.w $a5, $a6, 120 ftintrz.w.s $fa5, $fa5 movfr2gr.s $a5, $fa5 - stx.w $a5, $a0, $a2 + stx.w $a5, $a3, $a2 fld.s $fa5, $a4, -240 - fldx.s $fa6, $a1, $a2 + fldx.s $fa6, $a0, $a2 fcvt.d.s $fa5, $fa5 fmul.d $fa5, $fa1, $fa5 fcvt.d.s $fa6, $fa6 @@ -7951,7 +7915,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ld.d $a4, $a7, 8 fcvt.s.d $fa5, $fa5 fcvt.d.s $fa5, $fa5 - fmul.d $fa5, $fa5, $fa3 + fmul.d $fa5, $fa5, $fa4 movgr2fr.d $fa6, $a4 ffint.d.l $fa6, $fa6 fdiv.d $fa6, $fa5, $fa6 @@ -7963,12 +7927,12 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init movgr2fr.d $fa6, $a4 ffint.d.l $fa6, $fa6 fdiv.d $fa5, $fa5, $fa6 - fmul.d $fa5, $fa5, $fa4 + fmul.d $fa5, $fa5, $fa3 ftintrz.w.d $fa5, $fa5 movfr2gr.s $a4, $fa5 addi.d $a2, $a2, 4 st.w $a4, $a6, 360 - bne $a2, $a3, .LBB41_13 + bne $a2, $a1, .LBB41_13 b .LBB41_19 .LBB41_14: # %.preheader.i ld.w $a1, $a0, 0 @@ -8003,8 +7967,8 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init stptr.w $a2, $a0, 5428 stptr.w $a1, $a0, 5372 stptr.w $a2, $a0, 5432 - ld.d $s7, $sp, 88 # 8-byte Folded Reload - ld.d $fp, $sp, 112 # 8-byte Folded Reload + ld.d $s6, $sp, 80 # 8-byte Folded Reload + ld.d $fp, $sp, 96 # 8-byte Folded Reload b .LBB41_19 .LBB41_15: fld.s $fa1, $a1, 148 @@ -8016,13 +7980,15 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fcvt.d.s $fa2, $fa2 fmul.d $fa2, $fa0, $fa2 fadd.d $fa1, $fa1, $fa2 - ld.d $a2, $sp, 104 # 8-byte Folded Reload - fld.d $fa4, $a2, %pc_lo12(.LCPI41_3) - ld.d $a2, $a0, 0 fcvt.s.d $fa2, $fa1 fcvt.d.s $fa1, $fa2 - fmul.d $fa5, $fa1, $fa4 - movgr2fr.d $fa1, $a2 + ori $a2, $zero, 0 + lu32i.d $a2, -49152 + ld.d $a3, $a0, 0 + lu52i.d $a2, $a2, 1032 + movgr2fr.d $fa4, $a2 + fmul.d $fa4, $fa1, $fa4 + movgr2fr.d $fa1, $a3 ld.d $a3, $a0, 8 ffint.d.l $fa1, $fa1 ftintrz.w.s $fa2, $fa2 @@ -8033,36 +7999,40 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .p2align 4, , 16 .LBB41_16: # =>This Inner Loop Header: Depth=1 ld.d $a4, $a7, 8 - movgr2fr.d $fa6, $a4 - ffint.d.l $fa6, $fa6 - fdiv.d $fa6, $fa5, $fa6 - fmul.d $fa6, $fa6, $fa1 - ftintrz.w.d $fa6, $fa6 - movfr2gr.s $a4, $fa6 + movgr2fr.d $fa5, $a4 + ffint.d.l $fa5, $fa5 + fdiv.d $fa5, $fa4, $fa5 + fmul.d $fa5, $fa5, $fa1 + ftintrz.w.d $fa5, $fa5 + movfr2gr.s $a4, $fa5 add.d $a5, $a0, $a3 stptr.w $a4, $a5, 5136 ld.d $a4, $a7, 8 - movgr2fr.d $fa6, $a4 - ffint.d.l $fa6, $fa6 - fdiv.d $fa6, $fa5, $fa6 - fmul.d $fa6, $fa6, $fa2 - ftintrz.w.d $fa6, $fa6 - movfr2gr.s $a4, $fa6 + movgr2fr.d $fa5, $a4 + ffint.d.l $fa5, $fa5 + fdiv.d $fa5, $fa4, $fa5 + fmul.d $fa5, $fa5, $fa2 + ftintrz.w.d $fa5, $fa5 + movfr2gr.s $a4, $fa5 stptr.w $a4, $a5, 5196 addi.d $a3, $a3, 4 stptr.w $a2, $a5, 5076 bnez $a3, .LBB41_16 # %bb.17: - fld.s $fa5, $a1, 208 - fld.s $fa6, $a1, 448 - fcvt.d.s $fa5, $fa5 - fmul.d $fa3, $fa3, $fa5 - fcvt.d.s $fa5, $fa6 - fmul.d $fa0, $fa0, $fa5 + fld.s $fa4, $a1, 208 + fld.s $fa5, $a1, 448 + fcvt.d.s $fa4, $fa4 + fmul.d $fa3, $fa3, $fa4 + fcvt.d.s $fa4, $fa5 + fmul.d $fa0, $fa0, $fa4 fadd.d $fa0, $fa3, $fa0 fcvt.s.d $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fmul.d $fa0, $fa0, $fa4 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa3, $a1 + fmul.d $fa0, $fa0, $fa3 addi.w $a1, $zero, -60 .p2align 4, , 16 .LBB41_18: # =>This Inner Loop Header: Depth=1 @@ -8086,18 +8056,17 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init stptr.w $a2, $a3, 5436 bnez $a1, .LBB41_18 .LBB41_19: # %vorbis_encode_global_stereo.exit - st.d $s1, $sp, 24 # 8-byte Folded Spill + st.d $s1, $sp, 16 # 8-byte Folded Spill fldx.d $fs1, $s1, $fp - ld.d $s4, $a7, 48 - ld.d $s5, $s6, 184 - ld.d $s3, $s6, 200 - ld.d $s2, $s6, 216 - ld.w $a0, $s4, 36 - ldptr.d $s0, $s4, 4912 + ld.d $s8, $a7, 48 + ld.d $s4, $s5, 184 + ld.d $s3, $s5, 200 + ld.d $s2, $s5, 216 + ld.w $a0, $s8, 36 + ldptr.d $s0, $s8, 4912 blez $a0, .LBB41_62 # %bb.20: - st.d $a7, $sp, 112 # 8-byte Folded Spill - ld.d $s1, $sp, 72 # 8-byte Folded Reload + st.d $a7, $sp, 96 # 8-byte Folded Spill beqz $s0, .LBB41_63 .LBB41_21: pcalau12i $a0, %pc_hi20(_psy_info_template) @@ -8107,7 +8076,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 st.w $zero, $s0, 0 - ldptr.w $a0, $s4, 5628 + ldptr.w $a0, $s8, 5628 beqz $a0, .LBB41_23 # %bb.22: ftintrz.w.d $fa0, $fs1 @@ -8116,7 +8085,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init lu32i.d $a1, 1 st.d $a1, $s0, 500 slli.d $a1, $a0, 2 - ldx.w $a2, $s5, $a1 + ldx.w $a2, $s4, $a1 st.w $a2, $s0, 508 ldx.w $a1, $s3, $a1 st.w $a1, $s0, 512 @@ -8124,16 +8093,17 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fldx.d $fa0, $s2, $a0 fst.d $fa0, $s0, 520 .LBB41_23: # %vorbis_encode_psyset_setup.exit - ld.d $s5, $sp, 24 # 8-byte Folded Reload - fldx.d $fs1, $s5, $fp - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $s7, $sp, 16 # 8-byte Folded Reload + fldx.d $fs1, $s7, $fp + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $s4, $a0, 48 - ld.d $s3, $s6, 184 - ld.d $s2, $s6, 200 - ld.d $fp, $s6, 216 + ld.d $s3, $s5, 184 + ld.d $s2, $s5, 200 + ld.d $fp, $s5, 216 ld.w $a0, $s4, 36 ldptr.d $s0, $s4, 4920 ori $a1, $zero, 1 + ld.d $s8, $sp, 72 # 8-byte Folded Reload blt $a1, $a0, .LBB41_25 # %bb.24: ori $a0, $zero, 2 @@ -8172,24 +8142,25 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fldx.d $fa0, $fp, $a0 fst.d $fa0, $s0, 520 .LBB41_29: # %vorbis_encode_psyset_setup.exit215 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload sltui $fp, $a0, 1 - ld.d $a6, $sp, 112 # 8-byte Folded Reload - beq $s1, $s7, .LBB41_42 + ld.d $s1, $sp, 96 # 8-byte Folded Reload + beq $s6, $s8, .LBB41_42 # %bb.30: - ori $s2, $s8, 1440 - fldx.d $fs1, $s5, $s2 - ld.d $s7, $a6, 48 - ld.d $s4, $s6, 192 - ld.d $s5, $s6, 208 - ld.d $s3, $s6, 216 - ld.w $a0, $s7, 36 - ldptr.d $s0, $s7, 4928 + lu12i.w $a0, 1 + ori $s2, $a0, 1440 + fldx.d $fs1, $s7, $s2 + ld.d $s6, $s1, 48 + ld.d $s8, $s5, 192 + ld.d $s4, $s5, 208 + ld.d $s3, $s5, 216 + ld.w $a0, $s6, 36 + ldptr.d $s0, $s6, 4928 ori $a1, $zero, 2 blt $a1, $a0, .LBB41_32 # %bb.31: ori $a0, $zero, 3 - st.w $a0, $s7, 36 + st.w $a0, $s6, 36 .LBB41_32: bnez $s0, .LBB41_34 # %bb.33: @@ -8198,7 +8169,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 move $s0, $a0 - stptr.d $a0, $s7, 4928 + stptr.d $a0, $s6, 4928 .LBB41_34: pcalau12i $a0, %pc_hi20(_psy_info_template) addi.d $a1, $a0, %pc_lo12(_psy_info_template) @@ -8208,7 +8179,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init jirl $ra, $ra, 0 ori $a0, $zero, 1 st.w $a0, $s0, 0 - ldptr.w $a1, $s7, 5628 + ldptr.w $a1, $s6, 5628 beqz $a1, .LBB41_36 # %bb.35: ftintrz.w.d $fa0, $fs1 @@ -8216,30 +8187,30 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init lu32i.d $a0, 1 st.d $a0, $s0, 500 slli.d $a0, $a1, 2 - ldx.w $a2, $s4, $a0 + ldx.w $a2, $s8, $a0 st.w $a2, $s0, 508 - ldx.w $a0, $s5, $a0 + ldx.w $a0, $s4, $a0 st.w $a0, $s0, 512 slli.d $a0, $a1, 3 fldx.d $fa0, $s3, $a0 fst.d $fa0, $s0, 520 .LBB41_36: # %vorbis_encode_psyset_setup.exit220 - ld.d $a0, $sp, 24 # 8-byte Folded Reload - fldx.d $fs1, $a0, $s2 - ld.d $a0, $sp, 112 # 8-byte Folded Reload - ld.d $s5, $a0, 48 - ld.d $s4, $s6, 192 - ld.d $s3, $s6, 208 - ld.d $s2, $s6, 216 - ld.w $a0, $s5, 36 - ldptr.d $s0, $s5, 4936 + ld.d $s7, $sp, 16 # 8-byte Folded Reload + fldx.d $fs1, $s7, $s2 + ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $s6, $a0, 48 + ld.d $s4, $s5, 192 + ld.d $s3, $s5, 208 + ld.d $s2, $s5, 216 + ld.w $a0, $s6, 36 + ldptr.d $s0, $s6, 4936 ori $a1, $zero, 3 - ld.d $s7, $sp, 88 # 8-byte Folded Reload blt $a1, $a0, .LBB41_38 # %bb.37: ori $a0, $zero, 4 - st.w $a0, $s5, 36 + st.w $a0, $s6, 36 .LBB41_38: + ld.d $s8, $sp, 72 # 8-byte Folded Reload bnez $s0, .LBB41_40 # %bb.39: ori $a0, $zero, 1 @@ -8247,7 +8218,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 move $s0, $a0 - stptr.d $a0, $s5, 4936 + stptr.d $a0, $s6, 4936 .LBB41_40: pcalau12i $a0, %pc_hi20(_psy_info_template) addi.d $a1, $a0, %pc_lo12(_psy_info_template) @@ -8257,9 +8228,9 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init jirl $ra, $ra, 0 ori $a0, $zero, 1 st.w $a0, $s0, 0 - ldptr.w $a1, $s5, 5628 - ld.d $s5, $sp, 24 # 8-byte Folded Reload - ld.d $a6, $sp, 112 # 8-byte Folded Reload + ldptr.w $a1, $s6, 5628 + ld.d $s1, $sp, 96 # 8-byte Folded Reload + ld.d $s6, $sp, 80 # 8-byte Folded Reload beqz $a1, .LBB41_42 # %bb.41: ftintrz.w.d $fa0, $fs1 @@ -8276,51 +8247,52 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fst.d $fa0, $s0, 520 .LBB41_42: # %vorbis_encode_psyset_setup.exit225 slli.d $a0, $fp, 5 - add.d $a1, $s5, $a0 - ori $a5, $s8, 1584 + add.d $a1, $s7, $a0 + lu12i.w $fp, 1 + ori $a5, $fp, 1584 fldx.d $fa0, $a1, $a5 - ld.d $a2, $s6, 64 - ld.d $a3, $s6, 72 - ld.d $a4, $s6, 88 - ld.d $a0, $a6, 48 + ld.d $a2, $s5, 64 + ld.d $a3, $s5, 72 + ld.d $a4, $s5, 88 + ld.d $a0, $s1, 48 add.d $s0, $a1, $a5 move $a1, $zero - move $fp, $a6 pcaddu18i $ra, %call36(vorbis_encode_tonemask_setup) jirl $ra, $ra, 0 - ori $a0, $s8, 1616 - fldx.d $fa0, $s5, $a0 - ld.d $a2, $s6, 64 - ld.d $a3, $s6, 72 - ld.d $a4, $s6, 104 - ld.d $a0, $fp, 48 + ori $a0, $fp, 1616 + fldx.d $fa0, $s7, $a0 + ld.d $a2, $s5, 64 + ld.d $a3, $s5, 72 + ld.d $a4, $s5, 104 + ld.d $a0, $s1, 48 ori $a1, $zero, 1 pcaddu18i $ra, %call36(vorbis_encode_tonemask_setup) jirl $ra, $ra, 0 - beq $s1, $s7, .LBB41_44 + beq $s6, $s8, .LBB41_44 # %bb.43: - ori $a0, $s8, 1648 - fldx.d $fa0, $s5, $a0 - ld.d $a2, $s6, 64 - ld.d $a3, $s6, 72 - ld.d $a4, $s6, 104 - ld.d $a0, $fp, 48 + lu12i.w $fp, 1 + ori $a0, $fp, 1648 + fldx.d $fa0, $s7, $a0 + ld.d $a2, $s5, 64 + ld.d $a3, $s5, 72 + ld.d $a4, $s5, 104 + ld.d $a0, $s1, 48 ori $a1, $zero, 2 pcaddu18i $ra, %call36(vorbis_encode_tonemask_setup) jirl $ra, $ra, 0 - ori $a0, $s8, 1680 - fldx.d $fa0, $s5, $a0 - ld.d $a2, $s6, 64 - ld.d $a3, $s6, 72 - ld.d $a4, $s6, 96 - ld.d $a0, $fp, 48 + ori $a0, $fp, 1680 + fldx.d $fa0, $s7, $a0 + ld.d $a2, $s5, 64 + ld.d $a3, $s5, 72 + ld.d $a4, $s5, 96 + ld.d $a0, $s1, 48 ori $a1, $zero, 3 pcaddu18i $ra, %call36(vorbis_encode_tonemask_setup) jirl $ra, $ra, 0 .LBB41_44: # %vector.memcheck fld.d $fa0, $s0, 24 - ld.d $a0, $s6, 168 - ld.d $a1, $fp, 48 + ld.d $a0, $s5, 168 + ld.d $a1, $s1, 48 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a2, $fa1 movgr2fr.w $fa1, $a2 @@ -8333,7 +8305,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fsub.d $fa3, $fa2, $fa0 fld.d $fa4, $a3, 8 fmul.d $fa1, $fa3, $fa1 - ld.d $a0, $s6, 160 + ld.d $a0, $s5, 160 ldptr.d $a3, $a1, 4912 fmul.d $fa0, $fa0, $fa4 fadd.d $fa0, $fa1, $fa0 @@ -8355,7 +8327,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init addi.d $a1, $a3, 336 addi.d $a4, $a0, 320 fsub.d $fa1, $fa2, $fa0 - move $a6, $fp + move $a6, $s1 bgeu $a1, $a4, .LBB41_48 # %bb.45: # %vector.memcheck addi.d $a3, $a3, 496 @@ -8440,9 +8412,10 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init addi.d $a3, $a3, 16 bne $a3, $a2, .LBB41_49 .LBB41_50: # %vorbis_encode_compand_setup.exit - ori $a0, $s8, 1640 - fldx.d $fa0, $s5, $a0 - ld.d $a0, $s6, 168 + lu12i.w $a0, 1 + ori $a0, $a0, 1640 + fldx.d $fa0, $s7, $a0 + ld.d $a0, $s5, 168 ld.d $a1, $a6, 48 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a2, $fa1 @@ -8456,7 +8429,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fsub.d $fa3, $fa2, $fa0 fld.d $fa4, $a3, 8 fmul.d $fa1, $fa3, $fa1 - ld.d $a0, $s6, 160 + ld.d $a0, $s5, 160 ldptr.d $a3, $a1, 4920 fmul.d $fa0, $fa0, $fa4 fadd.d $fa0, $fa1, $fa0 @@ -8562,17 +8535,19 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init addi.d $a3, $a3, 16 bne $a3, $a2, .LBB41_55 .LBB41_56: # %vorbis_encode_compand_setup.exit237 - bne $s1, $s7, .LBB41_58 + bne $s6, $s8, .LBB41_58 # %bb.57: # %.critedge addi.d $a3, $s0, 8 - ori $a0, $s8, 824 - ori $a1, $s8, 1624 - ori $a2, $s8, 816 + lu12i.w $a2, 1 + ori $a0, $a2, 824 + ori $a1, $a2, 1624 + ori $a2, $a2, 816 b .LBB41_73 .LBB41_58: # %vector.memcheck405 - ori $a0, $s8, 1672 - fldx.d $fa0, $s5, $a0 - ld.d $a0, $s6, 176 + lu12i.w $a0, 1 + ori $a0, $a0, 1672 + fldx.d $fa0, $s7, $a0 + ld.d $a0, $s5, 176 ld.d $a1, $a6, 48 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a2, $fa1 @@ -8586,7 +8561,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fsub.d $fa3, $fa2, $fa0 fld.d $fa4, $a3, 8 fmul.d $fa1, $fa3, $fa1 - ld.d $a0, $s6, 160 + ld.d $a0, $s5, 160 ldptr.d $a3, $a1, 4928 fmul.d $fa0, $fa0, $fa4 fadd.d $fa0, $fa1, $fa0 @@ -8635,9 +8610,8 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init b .LBB41_66 .LBB41_62: ori $a0, $zero, 1 - st.w $a0, $s4, 36 - st.d $a7, $sp, 112 # 8-byte Folded Spill - ld.d $s1, $sp, 72 # 8-byte Folded Reload + st.w $a0, $s8, 36 + st.d $a7, $sp, 96 # 8-byte Folded Spill bnez $s0, .LBB41_21 .LBB41_63: ori $a0, $zero, 1 @@ -8645,7 +8619,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 move $s0, $a0 - stptr.d $a0, $s4, 4912 + stptr.d $a0, $s8, 4912 b .LBB41_21 .LBB41_64: # %vector.ph412 move $a3, $zero @@ -8706,9 +8680,10 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init addi.d $a3, $a3, 16 bne $a3, $a2, .LBB41_65 .LBB41_66: # %vorbis_encode_compand_setup.exit244 - ori $a0, $s8, 1704 - fldx.d $fa0, $s5, $a0 - ld.d $a0, $s6, 176 + lu12i.w $a0, 1 + ori $a0, $a0, 1704 + fldx.d $fa0, $s7, $a0 + ld.d $a0, $s5, 176 ld.d $a1, $a6, 48 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a2, $fa1 @@ -8722,7 +8697,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fsub.d $fa3, $fa2, $fa0 fld.d $fa4, $a3, 8 fmul.d $fa1, $fa3, $fa1 - ld.d $a0, $s6, 160 + ld.d $a0, $s5, 160 ldptr.d $a3, $a1, 4936 fmul.d $fa0, $fa0, $fa4 fadd.d $fa0, $fa1, $fa0 @@ -8829,7 +8804,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init bne $a3, $a2, .LBB41_71 .LBB41_72: # %vorbis_encode_compand_setup.exit251 fld.d $fa0, $s0, 8 - ld.d $a0, $s6, 80 + ld.d $a0, $s5, 80 ld.d $a1, $a6, 48 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a2, $fa1 @@ -8852,9 +8827,10 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fadd.d $fa0, $fa1, $fa0 fcvt.s.d $fa0, $fa0 fst.s $fa0, $a1, 32 - ori $a0, $s8, 1624 - fldx.d $fa0, $s5, $a0 - ld.d $a0, $s6, 80 + lu12i.w $a4, 1 + ori $a0, $a4, 1624 + fldx.d $fa0, $s7, $a0 + ld.d $a0, $s5, 80 ld.d $a1, $a6, 48 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a2, $fa1 @@ -8876,14 +8852,14 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fadd.d $fa0, $fa1, $fa0 fcvt.s.d $fa0, $fa0 fst.s $fa0, $a1, 32 - ori $a0, $s8, 1656 - add.d $a3, $s5, $a0 - ori $a0, $s8, 840 - ori $a1, $s8, 1688 - ori $a2, $s8, 832 + ori $a0, $a4, 1656 + add.d $a3, $s7, $a0 + ori $a0, $a4, 840 + ori $a1, $a4, 1688 + ori $a2, $a4, 832 .LBB41_73: fld.d $fa0, $a3, 0 - ld.d $a3, $s6, 80 + ld.d $a3, $s5, 80 ld.d $a4, $a6, 48 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a5, $fa1 @@ -8906,8 +8882,8 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fadd.d $fa0, $fa1, $fa0 fcvt.s.d $fa0, $fa0 fst.s $fa0, $a2, 32 - fldx.d $fa0, $s5, $a1 - ld.d $a1, $s6, 80 + fldx.d $fa0, $s7, $a1 + ld.d $a1, $s5, 80 ld.d $a2, $a6, 48 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a3, $fa1 @@ -8930,71 +8906,72 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fcvt.s.d $fa0, $fa0 fst.s $fa0, $a0, 32 fld.d $fa0, $s0, 16 - ld.d $a2, $s6, 152 - ld.d $a3, $s6, 120 - ld.d $a4, $s6, 112 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a2, $s5, 152 + ld.d $a3, $s5, 120 + ld.d $a4, $s5, 112 + lu12i.w $fp, 1 + ld.d $a0, $sp, 88 # 8-byte Folded Reload beqz $a0, .LBB41_75 # %bb.74: - ori $a0, $s8, 1456 - fldx.d $fs0, $s5, $a0 + ori $a0, $fp, 1456 + fldx.d $fs0, $s7, $a0 .LBB41_75: ld.d $a0, $a6, 48 move $a1, $zero fmov.d $fa1, $fs0 - move $fp, $a6 + move $s0, $a6 pcaddu18i $ra, %call36(vorbis_encode_noisebias_setup) jirl $ra, $ra, 0 - ori $a0, $s8, 1632 - fldx.d $fa0, $s5, $a0 - ld.d $a2, $s6, 152 - ld.d $a3, $s6, 128 - ld.d $a4, $s6, 112 - ld.d $a0, $fp, 48 + ori $a0, $fp, 1632 + fldx.d $fa0, $s7, $a0 + ld.d $a2, $s5, 152 + ld.d $a3, $s5, 128 + ld.d $a4, $s5, 112 + ld.d $a0, $s0, 48 movgr2fr.d $fa1, $zero ori $a1, $zero, 1 - ori $s0, $zero, 1 + ori $s1, $zero, 1 pcaddu18i $ra, %call36(vorbis_encode_noisebias_setup) jirl $ra, $ra, 0 - bne $s1, $s7, .LBB41_77 + bne $s6, $s8, .LBB41_77 # %bb.76: - ori $a0, $s8, 824 - ori $a1, $s8, 816 - ld.d $a4, $sp, 80 # 8-byte Folded Reload + ori $a0, $fp, 824 + ori $a1, $fp, 816 + ld.d $a4, $sp, 64 # 8-byte Folded Reload b .LBB41_78 .LBB41_77: - ori $a0, $s8, 1664 - fldx.d $fa0, $s5, $a0 - ld.d $a2, $s6, 152 - ld.d $a3, $s6, 136 - ld.d $a4, $s6, 112 - ld.d $a0, $fp, 48 + ori $a0, $fp, 1664 + fldx.d $fa0, $s7, $a0 + ld.d $a2, $s5, 152 + ld.d $a3, $s5, 136 + ld.d $a4, $s5, 112 + ld.d $a0, $s0, 48 movgr2fr.d $fs0, $zero ori $a1, $zero, 2 fmov.d $fa1, $fs0 pcaddu18i $ra, %call36(vorbis_encode_noisebias_setup) jirl $ra, $ra, 0 - ori $a0, $s8, 1696 - fldx.d $fa0, $s5, $a0 - ld.d $a2, $s6, 152 - ld.d $a3, $s6, 144 - ld.d $a4, $s6, 112 - ld.d $a0, $fp, 48 + ori $a0, $fp, 1696 + fldx.d $fa0, $s7, $a0 + ld.d $a2, $s5, 152 + ld.d $a3, $s5, 144 + ld.d $a4, $s5, 112 + ld.d $a0, $s0, 48 ori $a1, $zero, 3 fmov.d $fa1, $fs0 pcaddu18i $ra, %call36(vorbis_encode_noisebias_setup) jirl $ra, $ra, 0 - ld.d $a0, $fp, 48 - ld.d $a4, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $s0, 48 + ld.d $a4, $sp, 64 # 8-byte Folded Reload fldx.d $fa0, $a0, $a4 ldptr.d $a1, $a0, 4912 fcvt.s.d $fa0, $fa0 fst.s $fa0, $a1, 4 - ori $a2, $s8, 1560 + ori $a2, $fp, 1560 fldx.d $fa0, $a0, $a2 fcvt.s.d $fa0, $fa0 fst.s $fa0, $a1, 8 - ld.d $a0, $fp, 48 + ld.d $a0, $s0, 48 fldx.d $fa0, $a0, $a4 ldptr.d $a1, $a0, 4920 fcvt.s.d $fa0, $fa0 @@ -9002,19 +8979,19 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fldx.d $fa0, $a0, $a2 fcvt.s.d $fa0, $fa0 fst.s $fa0, $a1, 8 - ori $a0, $s8, 840 - ori $a1, $s8, 832 + ori $a0, $fp, 840 + ori $a1, $fp, 832 .LBB41_78: # %.critedge178 - ld.d $a2, $fp, 48 + ld.d $a2, $s0, 48 fldx.d $fa0, $a2, $a4 ldx.d $a1, $a2, $a1 fcvt.s.d $fa0, $fa0 fst.s $fa0, $a1, 4 - ori $a3, $s8, 1560 + ori $a3, $fp, 1560 fldx.d $fa0, $a2, $a3 fcvt.s.d $fa0, $fa0 fst.s $fa0, $a1, 8 - ld.d $a1, $fp, 48 + ld.d $a1, $s0, 48 fldx.d $fa0, $a1, $a4 ldx.d $a0, $a1, $a0 fcvt.s.d $fa0, $fa0 @@ -9022,45 +8999,54 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init fldx.d $fa0, $a1, $a3 fcvt.s.d $fa0, $fa0 fst.s $fa0, $a0, 8 - ld.d $a0, $sp, 16 # 8-byte Folded Reload - fldx.d $fa0, $s5, $a0 - ld.d $a0, $s6, 304 - ld.d $fp, $fp, 48 + ld.d $a0, $sp, 8 # 8-byte Folded Reload + fldx.d $fa0, $s7, $a0 + ld.d $a0, $s5, 304 + ld.d $fp, $s0, 48 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 alsl.d $a2, $a1, $a0, 4 slli.d $a1, $a1, 4 ldx.d $a0, $a0, $a1 - st.d $a0, $sp, 72 # 8-byte Folded Spill + st.d $a0, $sp, 64 # 8-byte Folded Spill ld.d $a0, $a2, 8 - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill ld.d $a0, $fp, 0 ld.d $a1, $fp, 8 - addi.d $s1, $fp, 808 + addi.d $s0, $fp, 808 addi.d $a2, $fp, 40 - st.d $a2, $sp, 56 # 8-byte Folded Spill - addi.d $a2, $fp, 552 st.d $a2, $sp, 48 # 8-byte Folded Spill + addi.d $a2, $fp, 552 + st.d $a2, $sp, 40 # 8-byte Folded Spill pcalau12i $a2, %pc_hi20(_mode_template) addi.d $a2, $a2, %pc_lo12(_mode_template) - st.d $a2, $sp, 40 # 8-byte Folded Spill - move $s6, $zero + st.d $a2, $sp, 32 # 8-byte Folded Spill + move $s4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -49152 + lu52i.d $a2, $a2, 1032 + movgr2fr.d $fs0, $a2 + lu12i.w $a2, -209716 + ori $a2, $a2, 3277 + lu32i.d $a2, -209716 + lu52i.d $a2, $a2, 1022 + movgr2fr.d $fs1, $a2 xor $a0, $a0, $a1 sltu $a0, $zero, $a0 - st.d $a0, $sp, 32 # 8-byte Folded Spill - st.d $fp, $sp, 80 # 8-byte Folded Spill - st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $a0, $sp, 24 # 8-byte Folded Spill + st.d $fp, $sp, 72 # 8-byte Folded Spill + st.d $s0, $sp, 56 # 8-byte Folded Spill b .LBB41_80 .p2align 4, , 16 .LBB41_79: # %._crit_edge.i # in Loop: Header=BB41_80 Depth=1 - ld.d $a0, $sp, 88 # 8-byte Folded Reload - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 24 # 8-byte Folded Reload and $a0, $a1, $a0 - ori $s6, $zero, 1 - move $s0, $zero - ld.d $fp, $sp, 80 # 8-byte Folded Reload - ld.d $s1, $sp, 64 # 8-byte Folded Reload + ori $s4, $zero, 1 + move $s1, $zero + ld.d $fp, $sp, 72 # 8-byte Folded Reload + ld.d $s0, $sp, 56 # 8-byte Folded Reload beqz $a0, .LBB41_184 .LBB41_80: # =>This Loop Header: Depth=1 # Child Loop BB41_87 Depth 2 @@ -9076,81 +9062,79 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init # Child Loop BB41_162 Depth 4 # Child Loop BB41_170 Depth 4 # Child Loop BB41_178 Depth 4 - st.d $s0, $sp, 88 # 8-byte Folded Spill ori $a0, $zero, 1 ori $a1, $zero, 3208 pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 - slli.d $s3, $s6, 3 - stx.d $a0, $s1, $s3 + slli.d $s3, $s4, 3 + stx.d $a0, $s0, $s3 ori $a0, $zero, 1 ori $a1, $zero, 16 pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 - ld.d $a1, $sp, 56 # 8-byte Folded Reload + ld.d $a1, $sp, 48 # 8-byte Folded Reload stx.d $a0, $a1, $s3 - slli.d $a1, $s6, 4 - ld.d $a2, $sp, 40 # 8-byte Folded Reload + slli.d $a1, $s4, 4 + ld.d $a2, $sp, 32 # 8-byte Folded Reload vldx $vr0, $a2, $a1 vst $vr0, $a0, 0 ld.w $a0, $fp, 16 - blt $s6, $a0, .LBB41_82 + blt $s4, $a0, .LBB41_82 # %bb.81: # in Loop: Header=BB41_80 Depth=1 - addi.d $a0, $s6, 1 + addi.d $a0, $s4, 1 st.w $a0, $fp, 16 .LBB41_82: # in Loop: Header=BB41_80 Depth=1 - alsl.d $a0, $s6, $s1, 3 - slli.d $s5, $s6, 2 + st.d $s1, $sp, 80 # 8-byte Folded Spill + alsl.d $a0, $s4, $s0, 3 + slli.d $s8, $s4, 2 ld.d $a0, $a0, 0 - ld.d $a1, $sp, 48 # 8-byte Folded Reload - stx.w $zero, $a1, $s5 + ld.d $a1, $sp, 40 # 8-byte Folded Reload + stx.w $zero, $a1, $s8 ori $a1, $zero, 3208 - mul.d $a1, $s6, $a1 - ld.d $a2, $sp, 72 # 8-byte Folded Reload + mul.d $a1, $s4, $a1 + ld.d $a2, $sp, 64 # 8-byte Folded Reload add.d $s0, $a2, $a1 ori $a2, $zero, 3208 move $a1, $s0 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 ld.w $a0, $fp, 20 - blt $s6, $a0, .LBB41_84 + blt $s4, $a0, .LBB41_84 # %bb.83: # in Loop: Header=BB41_80 Depth=1 - addi.d $a0, $s6, 1 + addi.d $a0, $s4, 1 st.w $a0, $fp, 20 .LBB41_84: # in Loop: Header=BB41_80 Depth=1 ld.w $a0, $s0, 0 - ld.d $t6, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 96 # 8-byte Folded Reload blez $a0, .LBB41_79 # %bb.85: # %.lr.ph.i # in Loop: Header=BB41_80 Depth=1 - move $s7, $zero + move $s6, $zero addi.d $s2, $s0, 1092 b .LBB41_87 .p2align 4, , 16 .LBB41_86: # %vorbis_encode_residue_setup.exit.i # in Loop: Header=BB41_87 Depth=2 - ldx.w $a0, $a0, $s5 + ldx.w $a0, $a0, $s8 addi.d $a0, $a0, -2 sltui $a0, $a0, 1 - fdiv.d $fa0, $fa3, $fa0 - fmul.d $fa0, $fa0, $fa2 + fdiv.d $fa0, $fa2, $fa0 + ld.w $a1, $s1, 16 + fmul.d $fa0, $fa0, $fa1 fadd.d $fa1, $fa0, $fa0 movgr2cf $fcc0, $a0 - ld.w $a0, $s1, 16 fsel $fa0, $fa0, $fa1, $fcc0 - pcalau12i $a1, %pc_hi20(.LCPI41_4) - fld.d $fa1, $a1, %pc_lo12(.LCPI41_4) - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fdiv.d $fa0, $fa0, $fa2 - fadd.d $fa0, $fa0, $fa1 + movgr2fr.w $fa1, $a1 + ffint.d.w $fa1, $fa1 + fdiv.d $fa0, $fa0, $fa1 + fadd.d $fa0, $fa0, $fs1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a1, $fa0 - mul.w $a0, $a0, $a1 + movfr2gr.s $a0, $fa0 + mul.w $a0, $a1, $a0 st.d $a0, $s1, 8 ld.w $a0, $s0, 0 - addi.d $s7, $s7, 1 - bge $s7, $a0, .LBB41_79 + addi.d $s6, $s6, 1 + bge $s6, $a0, .LBB41_79 .LBB41_87: # Parent Loop BB41_80 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB41_100 Depth 3 @@ -9165,36 +9149,36 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init # Child Loop BB41_162 Depth 4 # Child Loop BB41_170 Depth 4 # Child Loop BB41_178 Depth 4 - slli.d $a0, $s7, 2 - ldx.w $s8, $s2, $a0 - ld.d $s4, $t6, 48 + slli.d $a0, $s6, 2 + ldx.w $s7, $s2, $a0 + ld.d $s5, $t7, 48 ori $a0, $zero, 48 - mul.d $a0, $s8, $a0 - ld.d $a1, $sp, 96 # 8-byte Folded Reload + mul.d $a0, $s7, $a0 + ld.d $a1, $sp, 88 # 8-byte Folded Reload add.d $fp, $a1, $a0 ori $a0, $zero, 1824 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 move $s1, $a0 - alsl.d $a0, $s8, $s4, 3 + alsl.d $a0, $s7, $s5, 3 stptr.d $s1, $a0, 2344 ld.d $a1, $fp, 8 ori $a2, $zero, 1824 move $a0, $s1 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 - ld.w $a0, $s4, 28 - blt $s8, $a0, .LBB41_89 + ld.w $a0, $s5, 28 + blt $s7, $a0, .LBB41_89 # %bb.88: # in Loop: Header=BB41_87 Depth=2 - addi.d $a0, $s8, 1 - st.w $a0, $s4, 28 + addi.d $a0, $s7, 1 + st.w $a0, $s5, 28 .LBB41_89: # in Loop: Header=BB41_87 Depth=2 - ldx.d $a1, $s4, $s3 + ldx.d $a1, $s5, $s3 ori $a0, $zero, 16 ori $a2, $zero, 64 beq $a1, $a2, .LBB41_93 # %bb.90: # in Loop: Header=BB41_87 Depth=2 - ld.d $t6, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 96 # 8-byte Folded Reload ori $a2, $zero, 128 beq $a1, $a2, .LBB41_94 # %bb.91: # in Loop: Header=BB41_87 Depth=2 @@ -9205,14 +9189,14 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init b .LBB41_94 .p2align 4, , 16 .LBB41_93: # in Loop: Header=BB41_87 Depth=2 - ld.d $t6, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 96 # 8-byte Folded Reload .LBB41_94: # in Loop: Header=BB41_87 Depth=2 - alsl.d $a1, $s6, $s4, 3 + alsl.d $a1, $s4, $s5, 3 st.w $a0, $s1, 16 ld.w $a4, $fp, 0 - addi.d $a2, $s4, 2047 + addi.d $a2, $s5, 2047 addi.d $a0, $a2, 41 - slli.d $a3, $s8, 2 + slli.d $a3, $s7, 2 stx.w $a4, $a0, $a3 ld.d $a3, $a1, 0 srai.d $a3, $a3, 1 @@ -9220,13 +9204,13 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ori $a5, $zero, 2 bne $a4, $a5, .LBB41_96 # %bb.95: # in Loop: Header=BB41_87 Depth=2 - ld.w $a4, $t6, 4 + ld.w $a4, $t7, 4 mul.d $a3, $a3, $a4 st.d $a3, $s1, 8 .LBB41_96: # in Loop: Header=BB41_87 Depth=2 - ldptr.w $a4, $s4, 5560 + ldptr.w $a4, $s5, 5560 ld.w $a3, $s1, 20 - lu12i.w $s8, 1 + lu12i.w $t6, 1 beqz $a4, .LBB41_111 # %bb.97: # %.preheader186.i.i # in Loop: Header=BB41_87 Depth=2 @@ -9272,7 +9256,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .p2align 4, , 16 .LBB41_106: # %._crit_edge.i.i # in Loop: Header=BB41_87 Depth=2 - ld.w $a4, $s4, 32 + ld.w $a4, $s5, 32 blez $a4, .LBB41_110 # %bb.107: # %.lr.ph.i.i.i # in Loop: Header=BB41_87 Depth=2 @@ -9294,7 +9278,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .LBB41_110: # %._crit_edge.i.i.i # in Loop: Header=BB41_87 Depth=2 addi.d $a5, $a4, 1 - st.w $a5, $s4, 32 + st.w $a5, $s5, 32 ld.d $a5, $fp, 24 b .LBB41_126 .p2align 4, , 16 @@ -9342,7 +9326,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .p2align 4, , 16 .LBB41_120: # %._crit_edge200.i.i # in Loop: Header=BB41_87 Depth=2 - ld.w $a4, $s4, 32 + ld.w $a4, $s5, 32 blez $a4, .LBB41_124 # %bb.121: # %.lr.ph.i165.i.i # in Loop: Header=BB41_87 Depth=2 @@ -9364,7 +9348,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .LBB41_124: # %._crit_edge.i163.i.i # in Loop: Header=BB41_87 Depth=2 addi.d $a5, $a4, 1 - st.w $a5, $s4, 32 + st.w $a5, $s5, 32 ld.d $a5, $fp, 16 b .LBB41_154 .LBB41_125: # in Loop: Header=BB41_87 Depth=2 @@ -9386,7 +9370,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .LBB41_128: # %._crit_edge.i154.2.i.i # in Loop: Header=BB41_131 Depth=3 addi.d $a7, $t0, 1 - st.w $a7, $s4, 32 + st.w $a7, $s5, 32 ld.d $a7, $fp, 40 .LBB41_129: # %book_dup_or_new.exit162.2.i.i # in Loop: Header=BB41_131 Depth=3 @@ -9414,7 +9398,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ldx.d $t1, $a7, $a6 beqz $t1, .LBB41_139 # %bb.132: # in Loop: Header=BB41_131 Depth=3 - ld.w $t0, $s4, 32 + ld.w $t0, $s5, 32 blez $t0, .LBB41_136 # %bb.133: # %.lr.ph.i156.i.i # in Loop: Header=BB41_131 Depth=3 @@ -9436,7 +9420,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .LBB41_136: # %._crit_edge.i154.i.i # in Loop: Header=BB41_131 Depth=3 addi.d $a7, $t0, 1 - st.w $a7, $s4, 32 + st.w $a7, $s5, 32 ld.d $a7, $fp, 40 b .LBB41_138 .LBB41_137: # in Loop: Header=BB41_131 Depth=3 @@ -9455,7 +9439,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ld.d $t1, $t0, 8 beqz $t1, .LBB41_147 # %bb.140: # in Loop: Header=BB41_131 Depth=3 - ld.w $t0, $s4, 32 + ld.w $t0, $s5, 32 blez $t0, .LBB41_144 # %bb.141: # %.lr.ph.i156.1.i.i # in Loop: Header=BB41_131 Depth=3 @@ -9477,7 +9461,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .LBB41_144: # %._crit_edge.i154.1.i.i # in Loop: Header=BB41_131 Depth=3 addi.d $a7, $t0, 1 - st.w $a7, $s4, 32 + st.w $a7, $s5, 32 ld.d $a7, $fp, 40 b .LBB41_146 .LBB41_145: # in Loop: Header=BB41_131 Depth=3 @@ -9497,7 +9481,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ld.d $t1, $t0, 16 beqz $t1, .LBB41_130 # %bb.148: # in Loop: Header=BB41_131 Depth=3 - ld.w $t0, $s4, 32 + ld.w $t0, $s5, 32 blez $t0, .LBB41_128 # %bb.149: # %.lr.ph.i156.2.i.i # in Loop: Header=BB41_131 Depth=3 @@ -9539,7 +9523,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .LBB41_156: # %._crit_edge.i172.2.i.i # in Loop: Header=BB41_159 Depth=3 addi.d $a7, $t0, 1 - st.w $a7, $s4, 32 + st.w $a7, $s5, 32 ld.d $a7, $fp, 32 .LBB41_157: # %book_dup_or_new.exit180.2.i.i # in Loop: Header=BB41_159 Depth=3 @@ -9567,7 +9551,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ldx.d $t1, $a7, $a6 beqz $t1, .LBB41_167 # %bb.160: # in Loop: Header=BB41_159 Depth=3 - ld.w $t0, $s4, 32 + ld.w $t0, $s5, 32 blez $t0, .LBB41_164 # %bb.161: # %.lr.ph.i174.i.i # in Loop: Header=BB41_159 Depth=3 @@ -9589,7 +9573,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .LBB41_164: # %._crit_edge.i172.i.i # in Loop: Header=BB41_159 Depth=3 addi.d $a7, $t0, 1 - st.w $a7, $s4, 32 + st.w $a7, $s5, 32 ld.d $a7, $fp, 32 b .LBB41_166 .LBB41_165: # in Loop: Header=BB41_159 Depth=3 @@ -9608,7 +9592,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ld.d $t1, $t0, 8 beqz $t1, .LBB41_175 # %bb.168: # in Loop: Header=BB41_159 Depth=3 - ld.w $t0, $s4, 32 + ld.w $t0, $s5, 32 blez $t0, .LBB41_172 # %bb.169: # %.lr.ph.i174.1.i.i # in Loop: Header=BB41_159 Depth=3 @@ -9630,7 +9614,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .LBB41_172: # %._crit_edge.i172.1.i.i # in Loop: Header=BB41_159 Depth=3 addi.d $a7, $t0, 1 - st.w $a7, $s4, 32 + st.w $a7, $s5, 32 ld.d $a7, $fp, 32 b .LBB41_174 .LBB41_173: # in Loop: Header=BB41_159 Depth=3 @@ -9650,7 +9634,7 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init ld.d $t1, $t0, 16 beqz $t1, .LBB41_158 # %bb.176: # in Loop: Header=BB41_159 Depth=3 - ld.w $t0, $s4, 32 + ld.w $t0, $s5, 32 blez $t0, .LBB41_156 # %bb.177: # %.lr.ph.i174.2.i.i # in Loop: Header=BB41_159 Depth=3 @@ -9676,56 +9660,54 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .p2align 4, , 16 .LBB41_181: # %.loopexit.i.i # in Loop: Header=BB41_87 Depth=2 - ori $a2, $s8, 1544 - fldx.d $fa0, $s4, $a2 - ld.d $a2, $sp, 104 # 8-byte Folded Reload - fld.d $fa1, $a2, %pc_lo12(.LCPI41_3) - ld.d $a2, $t6, 8 - fmul.d $fa2, $fa0, $fa1 + ori $a2, $t6, 1544 + fldx.d $fa0, $s5, $a2 + ld.d $a2, $t7, 8 + fmul.d $fa1, $fa0, $fs0 ld.d $a3, $a1, 1576 ld.d $a1, $a1, 0 movgr2fr.d $fa0, $a2 ffint.d.l $fa0, $fa0 - vldi $vr3, -928 - fmul.d $fa0, $fa0, $fa3 + vldi $vr2, -928 + fmul.d $fa0, $fa0, $fa2 srai.d $a1, $a1, 1 - fcmp.clt.d $fcc0, $fa0, $fa2 - fsel $fa3, $fa2, $fa0, $fcc0 - fdiv.d $fa4, $fa3, $fa0 - movgr2fr.d $fa2, $a1 - ffint.d.l $fa2, $fa2 - fmul.d $fa4, $fa4, $fa2 - ftintrz.w.d $fa4, $fa4 - movfr2gr.s $a1, $fa4 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa2, $fa1, $fa0, $fcc0 + fdiv.d $fa3, $fa2, $fa0 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + fmul.d $fa3, $fa3, $fa1 + ftintrz.w.d $fa3, $fa3 + movfr2gr.s $a1, $fa3 st.w $a1, $a3, 1116 ld.w $a1, $fp, 4 beqz $a1, .LBB41_86 # %bb.182: # in Loop: Header=BB41_87 Depth=2 - ldptr.w $a1, $s4, 5560 + ldptr.w $a1, $s5, 5560 sltui $a1, $a1, 1 - ori $a2, $s8, 976 + ori $a2, $t6, 976 masknez $a2, $a2, $a1 - ori $a3, $s8, 948 + ori $a3, $t6, 948 maskeqz $a1, $a3, $a1 or $a1, $a1, $a2 - ldx.w $a1, $s4, $a1 - movgr2fr.w $fa3, $a1 - ffint.d.w $fa3, $fa3 - fmul.d $fa3, $fa3, $fa1 - fcmp.cule.d $fcc0, $fa3, $fa0 + ldx.w $a1, $s5, $a1 + movgr2fr.w $fa2, $a1 + ffint.d.w $fa2, $fa2 + fmul.d $fa2, $fa2, $fs0 + fcmp.cule.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB41_86 # %bb.183: # in Loop: Header=BB41_87 Depth=2 - fmov.d $fa3, $fa0 + fmov.d $fa2, $fa0 b .LBB41_86 .LBB41_184: # %vorbis_encode_map_n_res_setup.exit - ld.d $a1, $t6, 48 + ld.d $a1, $t7, 48 ldptr.d $a0, $a1, 5512 ld.d $a0, $a0, 8 beqz $a0, .LBB41_186 # %bb.185: - ld.d $a2, $sp, 16 # 8-byte Folded Reload + ld.d $a2, $sp, 8 # 8-byte Folded Reload fldx.d $fa0, $a1, $a2 - ld.w $a1, $t6, 4 + ld.w $a1, $t7, 4 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a2, $fa1 movgr2fr.w $fa1, $a2 @@ -9749,69 +9731,70 @@ vorbis_encode_setup_init: # @vorbis_encode_setup_init .LBB41_186: addi.w $a0, $zero, -1 .LBB41_187: # %setting_to_approx_bitrate.exit - ld.d $a3, $sp, 24 # 8-byte Folded Reload - st.d $a0, $t6, 24 + ld.d $a3, $sp, 16 # 8-byte Folded Reload + lu12i.w $a4, 1 + st.d $a0, $t7, 24 ldptr.d $a0, $a3, 5568 - st.d $a0, $t6, 32 + st.d $a0, $t7, 32 ldptr.d $a0, $a3, 5592 - st.d $a0, $t6, 16 - ori $a1, $s8, 1504 + st.d $a0, $t7, 16 + ori $a1, $a4, 1504 fldx.d $fa0, $a3, $a1 ftintrz.l.d $fa0, $fa0 - fst.d $fa0, $t6, 40 + fst.d $fa0, $t7, 40 ldptr.w $a0, $a3, 5560 beqz $a0, .LBB41_189 # %bb.188: move $a0, $zero - ori $a2, $s8, 1512 + ori $a2, $a4, 1512 vldx $vr0, $a3, $a2 - ori $a2, $s8, 1344 + ori $a2, $a4, 1344 fldx.d $fa1, $a3, $a1 ldptr.d $a1, $a3, 5568 vstx $vr0, $a3, $a2 - ori $a2, $s8, 1360 + ori $a2, $a4, 1360 fstx.d $fa1, $a3, $a2 movgr2fr.d $fa0, $a1 ldptr.d $a1, $a3, 5592 ffint.d.l $fa0, $fa0 - ori $a2, $s8, 1368 + ori $a2, $a4, 1368 fstx.d $fa0, $a3, $a2 movgr2fr.d $fa0, $a1 ldptr.d $a1, $a3, 5576 ffint.d.l $fa0, $fa0 - ori $a2, $s8, 1376 + ori $a2, $a4, 1376 fstx.d $fa0, $a3, $a2 movgr2fr.d $fa0, $a1 ldptr.d $a1, $a3, 5584 ffint.d.l $fa0, $fa0 - ori $a2, $s8, 1384 + ori $a2, $a4, 1384 fstx.d $fa0, $a3, $a2 movgr2fr.d $fa0, $a1 ffint.d.l $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI41_5) - vld $vr1, $a1, %pc_lo12(.LCPI41_5) - ori $a1, $s8, 1392 + pcalau12i $a1, %pc_hi20(.LCPI41_0) + vld $vr1, $a1, %pc_lo12(.LCPI41_0) + ori $a1, $a4, 1392 fstx.d $fa0, $a3, $a1 - ori $a1, $s8, 1400 + ori $a1, $a4, 1400 vstx $vr1, $a3, $a1 b .LBB41_190 .LBB41_189: move $a0, $zero .LBB41_190: - fld.d $fs1, $sp, 120 # 8-byte Folded Reload - fld.d $fs0, $sp, 128 # 8-byte Folded Reload - ld.d $s8, $sp, 136 # 8-byte Folded Reload - ld.d $s7, $sp, 144 # 8-byte Folded Reload - ld.d $s6, $sp, 152 # 8-byte Folded Reload - ld.d $s5, $sp, 160 # 8-byte Folded Reload - ld.d $s4, $sp, 168 # 8-byte Folded Reload - ld.d $s3, $sp, 176 # 8-byte Folded Reload - ld.d $s2, $sp, 184 # 8-byte Folded Reload - ld.d $s1, $sp, 192 # 8-byte Folded Reload - ld.d $s0, $sp, 200 # 8-byte Folded Reload - ld.d $fp, $sp, 208 # 8-byte Folded Reload - ld.d $ra, $sp, 216 # 8-byte Folded Reload - addi.d $sp, $sp, 224 + fld.d $fs1, $sp, 104 # 8-byte Folded Reload + fld.d $fs0, $sp, 112 # 8-byte Folded Reload + ld.d $s8, $sp, 120 # 8-byte Folded Reload + ld.d $s7, $sp, 128 # 8-byte Folded Reload + ld.d $s6, $sp, 136 # 8-byte Folded Reload + ld.d $s5, $sp, 144 # 8-byte Folded Reload + ld.d $s4, $sp, 152 # 8-byte Folded Reload + ld.d $s3, $sp, 160 # 8-byte Folded Reload + ld.d $s2, $sp, 168 # 8-byte Folded Reload + ld.d $s1, $sp, 176 # 8-byte Folded Reload + ld.d $s0, $sp, 184 # 8-byte Folded Reload + ld.d $fp, $sp, 192 # 8-byte Folded Reload + ld.d $ra, $sp, 200 # 8-byte Folded Reload + addi.d $sp, $sp, 208 ret .Lfunc_end41: .size vorbis_encode_setup_init, .Lfunc_end41-vorbis_encode_setup_init @@ -12388,12 +12371,7 @@ vorbis_analysis_wrote: # @vorbis_analysis_wrote .Lfunc_end50: .size vorbis_analysis_wrote, .Lfunc_end50-vorbis_analysis_wrote # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function timer_time -.LCPI51_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl timer_time + .globl timer_time # -- Begin function timer_time .p2align 5 .type timer_time,@function timer_time: # @timer_time @@ -12409,11 +12387,13 @@ timer_time: # @timer_time ffint.d.l $fa1, $fa1 fsub.d $fa0, $fa0, $fa1 movgr2fr.d $fa1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI51_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI51_0) ffint.d.l $fa1, $fa1 - movgr2fr.d $fa3, $zero - fsub.d $fa1, $fa3, $fa1 + movgr2fr.d $fa2, $zero + fsub.d $fa1, $fa2, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa2, $a0 fdiv.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa1, $fa0 st.d $a2, $a1, %pc_lo12(timer_time.now.0) @@ -12421,12 +12401,7 @@ timer_time: # @timer_time .Lfunc_end51: .size timer_time, .Lfunc_end51-timer_time # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function vorbis_analysis_blockout -.LCPI52_0: - .word 0xc61c3c00 # float -9999 - .text - .globl vorbis_analysis_blockout + .globl vorbis_analysis_blockout # -- Begin function vorbis_analysis_blockout .p2align 5 .type vorbis_analysis_blockout,@function vorbis_analysis_blockout: # @vorbis_analysis_blockout @@ -12634,22 +12609,24 @@ vorbis_analysis_blockout: # @vorbis_analysis_blockout ldx.d $a1, $a2, $a1 srli.d $a3, $a1, 63 add.d $a1, $a1, $a3 - ld.d $a0, $a0, 8 srli.d $a1, $a1, 1 movgr2fr.w $fa1, $a1 ffint.s.w $fa1, $fa1 - movgr2fr.d $fa2, $a0 - ffint.s.l $fa2, $fa2 - lu12i.w $a0, 1 - ori $a0, $a0, 916 - fldx.s $fa3, $a2, $a0 - pcalau12i $a0, %pc_hi20(.LCPI52_0) - fld.s $fa4, $a0, %pc_lo12(.LCPI52_0) - fdiv.s $fa1, $fa1, $fa2 - fmul.s $fa1, $fa3, $fa1 + ld.d $a0, $a0, 8 + lu12i.w $a1, 1 + ori $a1, $a1, 916 + fldx.s $fa2, $a2, $a1 + movgr2fr.d $fa3, $a0 + ffint.s.l $fa3, $fa3 + fdiv.s $fa1, $fa1, $fa3 + fmul.s $fa1, $fa2, $fa1 fadd.s $fa0, $fa0, $fa1 - fcmp.clt.s $fcc0, $fa0, $fa4 - fsel $fa0, $fa0, $fa4, $fcc0 + lu12i.w $a0, -237117 + ori $a0, $a0, 3072 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 + fcmp.clt.s $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 fst.s $fa0, $s2, 0 fst.s $fa0, $s7, 8 ld.w $a1, $s4, 4 @@ -17592,12 +17569,7 @@ vorbis_encode_noisebias_setup: # @vorbis_encode_noisebias_setup .Lfunc_end76: .size vorbis_encode_noisebias_setup, .Lfunc_end76-vorbis_encode_noisebias_setup # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function get_setup_template -.LCPI77_0: - .dword 0xbf50624dd2f1a9fc # double -0.001 - .text - .p2align 5 + .p2align 5 # -- Begin function get_setup_template .type get_setup_template,@function get_setup_template: # @get_setup_template # %bb.0: @@ -17698,12 +17670,15 @@ get_setup_template: # @get_setup_template .LBB77_19: # %._crit_edge.thread stptr.d $a3, $a0, 5512 .LBB77_20: - pcalau12i $a1, %pc_hi20(.LCPI77_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI77_0) bstrpick.d $a1, $a5, 31, 0 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + lu12i.w $a1, -184550 + ori $a1, $a1, 2556 + lu32i.d $a1, 25165 + lu52i.d $a1, $a1, -1035 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fadd.d $fa0, $fa1, $fa0 + fadd.d $fa0, $fa0, $fa1 lu12i.w $a1, 1 ori $a1, $a1, 1432 fstx.d $fa0, $a0, $a1 @@ -19460,12 +19435,7 @@ ov_bitrate: # @ov_bitrate .Lfunc_end96: .size ov_bitrate, .Lfunc_end96-ov_bitrate # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ov_time_total -.LCPI97_0: - .dword 0xc060600000000000 # double -131 - .text - .globl ov_time_total + .globl ov_time_total # -- Begin function ov_time_total .p2align 5 .type ov_time_total,@function ov_time_total: # @ov_time_total @@ -19478,18 +19448,20 @@ ov_time_total: # @ov_time_total fst.d $fs0, $sp, 8 # 8-byte Folded Spill move $fp, $a0 ld.w $a0, $a0, 128 - pcalau12i $a2, %pc_hi20(.LCPI97_0) - fld.d $fs0, $a2, %pc_lo12(.LCPI97_0) - ori $a2, $zero, 2 - blt $a0, $a2, .LBB97_9 + ori $a2, $zero, 0 + lu32i.d $a2, 24576 + lu52i.d $a2, $a2, -1018 + ori $a3, $zero, 2 + movgr2fr.d $fs0, $a2 + blt $a0, $a3, .LBB97_6 # %bb.1: ld.w $a0, $fp, 8 - beqz $a0, .LBB97_9 + beqz $a0, .LBB97_6 # %bb.2: ld.w $s1, $fp, 64 - bge $a1, $s1, .LBB97_9 + bge $a1, $s1, .LBB97_5 # %bb.3: - bltz $a1, .LBB97_5 + bltz $a1, .LBB97_7 # %bb.4: ld.d $a0, $fp, 96 slli.w $a2, $a1, 1 @@ -19505,14 +19477,28 @@ ov_time_total: # @ov_time_total movgr2fr.d $fa1, $a1 ffint.d.l $fa1, $fa1 fdiv.d $fs0, $fa0, $fa1 - b .LBB97_9 -.LBB97_5: # %.preheader - blez $s1, .LBB97_8 -# %bb.6: # %.lr.ph.preheader + b .LBB97_6 +.LBB97_5: + ori $a0, $zero, 0 + lu32i.d $a0, 24576 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fs0, $a0 +.LBB97_6: # %.loopexit + fmov.d $fa0, $fs0 + fld.d $fs0, $sp, 8 # 8-byte Folded Reload + ld.d $s1, $sp, 16 # 8-byte Folded Reload + ld.d $s0, $sp, 24 # 8-byte Folded Reload + ld.d $fp, $sp, 32 # 8-byte Folded Reload + ld.d $ra, $sp, 40 # 8-byte Folded Reload + addi.d $sp, $sp, 48 + ret +.LBB97_7: # %.preheader + blez $s1, .LBB97_10 +# %bb.8: # %.lr.ph.preheader move $s0, $zero movgr2fr.d $fs0, $zero .p2align 4, , 16 -.LBB97_7: # %.lr.ph +.LBB97_9: # %.lr.ph # =>This Inner Loop Header: Depth=1 move $a0, $fp move $a1, $s0 @@ -19520,19 +19506,11 @@ ov_time_total: # @ov_time_total jirl $ra, $ra, 0 addi.w $s0, $s0, 1 fadd.d $fs0, $fs0, $fa0 - bne $s1, $s0, .LBB97_7 - b .LBB97_9 -.LBB97_8: + bne $s1, $s0, .LBB97_9 + b .LBB97_6 +.LBB97_10: movgr2fr.d $fs0, $zero -.LBB97_9: # %.loopexit - fmov.d $fa0, $fs0 - fld.d $fs0, $sp, 8 # 8-byte Folded Reload - ld.d $s1, $sp, 16 # 8-byte Folded Reload - ld.d $s0, $sp, 24 # 8-byte Folded Reload - ld.d $fp, $sp, 32 # 8-byte Folded Reload - ld.d $ra, $sp, 40 # 8-byte Folded Reload - addi.d $sp, $sp, 48 - ret + b .LBB97_6 .Lfunc_end97: .size ov_time_total, .Lfunc_end97-ov_time_total # -- End function @@ -24585,12 +24563,7 @@ ov_pcm_tell: # @ov_pcm_tell .Lfunc_end126: .size ov_pcm_tell, .Lfunc_end126-ov_pcm_tell # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ov_time_tell -.LCPI127_0: - .dword 0xc060600000000000 # double -131 - .text - .globl ov_time_tell + .globl ov_time_tell # -- Begin function ov_time_tell .p2align 5 .type ov_time_tell,@function ov_time_tell: # @ov_time_tell @@ -24608,8 +24581,10 @@ ov_time_tell: # @ov_time_tell ori $a1, $zero, 2 bge $a0, $a1, .LBB127_2 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI127_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI127_0) + ori $a0, $zero, 0 + lu32i.d $a0, 24576 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa0, $a0 b .LBB127_8 .LBB127_2: ld.w $a0, $fp, 8 @@ -24759,14 +24734,7 @@ ov_comment: # @ov_comment .Lfunc_end129: .size ov_comment, .Lfunc_end129-ov_comment # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function ov_read -.LCPI130_0: - .word 0x47000000 # float 32768 -.LCPI130_1: - .word 0x43000000 # float 128 - .text - .globl ov_read + .globl ov_read # -- Begin function ov_read .p2align 5 .type ov_read,@function ov_read: # @ov_read @@ -24906,8 +24874,8 @@ ov_read: # @ov_read move $a4, $zero sltui $a5, $s2, 1 slli.d $a5, $a5, 7 - pcalau12i $a6, %pc_hi20(.LCPI130_1) - fld.s $fa0, $a6, %pc_lo12(.LCPI130_1) + lu12i.w $a6, 274432 + movgr2fr.w $fa0, $a6 vldi $vr1, -928 addi.w $a6, $zero, -128 ori $a7, $zero, 127 @@ -24954,8 +24922,8 @@ ov_read: # @ov_read blez $a2, .LBB130_45 # %bb.30: # %.preheader188.us.preheader move $a5, $zero - pcalau12i $a6, %pc_hi20(.LCPI130_0) - fld.s $fa0, $a6, %pc_lo12(.LCPI130_0) + lu12i.w $a6, 290816 + movgr2fr.w $fa0, $a6 vldi $vr1, -928 lu12i.w $a6, -8 lu12i.w $a7, 7 @@ -25004,8 +24972,8 @@ ov_read: # @ov_read # %bb.36: # %.lr.ph.us.preheader move $a4, $zero slli.d $a5, $a2, 1 - pcalau12i $a6, %pc_hi20(.LCPI130_0) - fld.s $fa0, $a6, %pc_lo12(.LCPI130_0) + lu12i.w $a6, 290816 + movgr2fr.w $fa0, $a6 vldi $vr1, -928 lu12i.w $a6, -8 lu12i.w $a7, 7 @@ -25050,8 +25018,8 @@ ov_read: # @ov_read # %bb.41: # %.lr.ph.us209.preheader move $a5, $zero slli.d $a6, $a2, 1 - pcalau12i $a7, %pc_hi20(.LCPI130_0) - fld.s $fa0, $a7, %pc_lo12(.LCPI130_0) + lu12i.w $a7, 290816 + movgr2fr.w $fa0, $a7 vldi $vr1, -928 lu12i.w $a7, -8 lu12i.w $t0, 7 @@ -26245,32 +26213,7 @@ vorbis_synthesis_lapout: # @vorbis_synthesis_lapout .Lfunc_end135: .size vorbis_synthesis_lapout, .Lfunc_end135-vorbis_synthesis_lapout # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _analysis_output_always -.LCPI136_0: - .word 0x457a0000 # float 4000 -.LCPI136_7: - .word 0x3540a8c1 # float 7.1771143E-7 -.LCPI136_8: - .word 0xc43f115b # float -764.271179 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI136_1: - .dword 0x3f483f91e0000000 # double 7.3999998858198524E-4 -.LCPI136_2: - .dword 0x402a333340000000 # double 13.100000381469727 -.LCPI136_3: - .dword 0x3e53dd3dc0000000 # double 1.8499999754340024E-8 -.LCPI136_4: - .dword 0x4001eb8520000000 # double 2.2400000095367432 -.LCPI136_5: - .dword 0x3f1a36e2e0000000 # double 9.9999997473787516E-5 -.LCPI136_6: - .dword 0xc061800000000000 # double -140 -.LCPI136_9: - .dword 0x40bf400000000000 # double 8000 - .text - .globl _analysis_output_always + .globl _analysis_output_always # -- Begin function _analysis_output_always .p2align 5 .type _analysis_output_always,@function _analysis_output_always: # @_analysis_output_always @@ -26319,36 +26262,51 @@ _analysis_output_always: # @_analysis_output_always movgr2fr.d $fa0, $a0 ffint.s.l $fa1, $fa0 fst.s $fa1, $sp, 36 # 4-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI136_0) + lu12i.w $a0, 284576 beqz $s3, .LBB136_14 # %bb.4: # %.lr.ph.split.split.preheader - fld.s $fa0, $a0, %pc_lo12(.LCPI136_0) + movgr2fr.w $fa0, $a0 fst.s $fa0, $sp, 32 # 4-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI136_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI136_1) + lu12i.w $a0, -131072 + move $a1, $a0 + lu32i.d $a1, -508015 + lu52i.d $a1, $a1, 1012 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI136_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI136_2) - pcalau12i $a0, %pc_hi20(.LCPI136_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI136_3) - pcalau12i $a0, %pc_hi20(.LCPI136_4) - fld.d $fs5, $a0, %pc_lo12(.LCPI136_4) - pcalau12i $a0, %pc_hi20(.LCPI136_5) - fld.d $fs6, $a0, %pc_lo12(.LCPI136_5) + lu12i.w $a1, 262144 + lu32i.d $a1, -380109 + lu52i.d $a1, $a1, 1026 + movgr2fr.d $fs3, $a1 + lu12i.w $a1, -262144 + lu32i.d $a1, 253245 + lu52i.d $a1, $a1, 997 + movgr2fr.d $fs4, $a1 + lu12i.w $a1, 131072 + lu32i.d $a1, 125829 + lu52i.d $a1, $a1, 1024 + movgr2fr.d $fs5, $a1 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs6, $a0 pcalau12i $a0, %pc_hi20(.L.str.134) addi.d $s2, $a0, %pc_lo12(.L.str.134) - pcalau12i $a0, %pc_hi20(.LCPI136_6) - fld.d $fs7, $a0, %pc_lo12(.LCPI136_6) + ori $a0, $zero, 0 + lu32i.d $a0, 98304 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fs7, $a0 movgr2fr.w $fs0, $zero pcalau12i $a0, %pc_hi20(.L.str.135) addi.d $s3, $a0, %pc_lo12(.L.str.135) - pcalau12i $a0, %pc_hi20(.LCPI136_7) - fld.s $fa0, $a0, %pc_lo12(.LCPI136_7) + move $s4, $zero + lu12i.w $a0, 218122 + ori $a0, $a0, 2241 + movgr2fr.w $fa0, $a0 fst.s $fa0, $sp, 20 # 4-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI136_8) - fld.s $fa0, $a0, %pc_lo12(.LCPI136_8) + lu12i.w $a0, -244751 + ori $a0, $a0, 347 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 fst.s $fa0, $sp, 16 # 4-byte Folded Spill - move $s4, $zero b .LBB136_6 .p2align 4, , 16 .LBB136_5: # in Loop: Header=BB136_6 Depth=1 @@ -26413,19 +26371,26 @@ _analysis_output_always: # @_analysis_output_always # %bb.9: # %.lr.ph.split.us.split beqz $s2, .LBB136_20 # %bb.10: - pcalau12i $a0, %pc_hi20(.LCPI136_9) - fld.d $fs0, $a0, %pc_lo12(.LCPI136_9) - pcalau12i $a0, %pc_hi20(.L.str.134) - addi.d $s3, $a0, %pc_lo12(.L.str.134) - pcalau12i $a0, %pc_hi20(.LCPI136_6) - fld.d $fs1, $a0, %pc_lo12(.LCPI136_6) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1035 + movgr2fr.d $fs0, $a1 + pcalau12i $a1, %pc_hi20(.L.str.134) + addi.d $s3, $a1, %pc_lo12(.L.str.134) + lu32i.d $a0, 98304 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fs1, $a0 movgr2fr.w $fs2, $zero pcalau12i $a0, %pc_hi20(.L.str.135) addi.d $s4, $a0, %pc_lo12(.L.str.135) - pcalau12i $a0, %pc_hi20(.LCPI136_7) - fld.s $fs3, $a0, %pc_lo12(.LCPI136_7) - pcalau12i $a0, %pc_hi20(.LCPI136_8) - fld.s $fs4, $a0, %pc_lo12(.LCPI136_8) + lu12i.w $a0, 218122 + ori $a0, $a0, 2241 + movgr2fr.w $fs3, $a0 + lu12i.w $a0, -244751 + ori $a0, $a0, 347 + lu32i.d $a0, 0 + movgr2fr.w $fs4, $a0 b .LBB136_12 .p2align 4, , 16 .LBB136_11: # in Loop: Header=BB136_12 Depth=1 @@ -26462,17 +26427,27 @@ _analysis_output_always: # @_analysis_output_always fcvt.d.s $fa0, $fa0 b .LBB136_11 .LBB136_14: # %.lr.ph.split.split.us.preheader - fld.s $fs0, $a0, %pc_lo12(.LCPI136_0) - pcalau12i $a0, %pc_hi20(.LCPI136_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI136_1) - pcalau12i $a0, %pc_hi20(.LCPI136_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI136_2) - pcalau12i $a0, %pc_hi20(.LCPI136_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI136_3) - pcalau12i $a0, %pc_hi20(.LCPI136_4) - fld.d $fs4, $a0, %pc_lo12(.LCPI136_4) - pcalau12i $a0, %pc_hi20(.LCPI136_5) - fld.d $fs5, $a0, %pc_lo12(.LCPI136_5) + movgr2fr.w $fs0, $a0 + lu12i.w $a0, -131072 + move $a1, $a0 + lu32i.d $a1, -508015 + lu52i.d $a1, $a1, 1012 + movgr2fr.d $fs1, $a1 + lu12i.w $a1, 262144 + lu32i.d $a1, -380109 + lu52i.d $a1, $a1, 1026 + movgr2fr.d $fs2, $a1 + lu12i.w $a1, -262144 + lu32i.d $a1, 253245 + lu52i.d $a1, $a1, 997 + movgr2fr.d $fs3, $a1 + lu12i.w $a1, 131072 + lu32i.d $a1, 125829 + lu52i.d $a1, $a1, 1024 + movgr2fr.d $fs4, $a1 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.134) addi.d $s2, $a0, %pc_lo12(.L.str.134) pcalau12i $a0, %pc_hi20(.L.str.135) @@ -26524,8 +26499,10 @@ _analysis_output_always: # @_analysis_output_always .LBB136_16: # %.lr.ph.split.us.split.us beqz $s2, .LBB136_24 # %bb.17: - pcalau12i $a0, %pc_hi20(.LCPI136_9) - fld.d $fs0, $a0, %pc_lo12(.LCPI136_9) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1035 + movgr2fr.d $fs0, $a0 pcalau12i $a0, %pc_hi20(.L.str.134) addi.d $s3, $a0, %pc_lo12(.L.str.134) pcalau12i $a0, %pc_hi20(.L.str.135) @@ -26563,16 +26540,21 @@ _analysis_output_always: # @_analysis_output_always .LBB136_20: # %.lr.ph.split.us.split.split.us.preheader pcalau12i $a0, %pc_hi20(.L.str.134) addi.d $s2, $a0, %pc_lo12(.L.str.134) - pcalau12i $a0, %pc_hi20(.LCPI136_6) - fld.d $fs0, $a0, %pc_lo12(.LCPI136_6) + ori $a0, $zero, 0 + lu32i.d $a0, 98304 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fs0, $a0 movgr2fr.w $fs1, $zero pcalau12i $a0, %pc_hi20(.L.str.135) addi.d $s3, $a0, %pc_lo12(.L.str.135) - pcalau12i $a0, %pc_hi20(.LCPI136_7) - fld.s $fs2, $a0, %pc_lo12(.LCPI136_7) - pcalau12i $a0, %pc_hi20(.LCPI136_8) - fld.s $fs3, $a0, %pc_lo12(.LCPI136_8) move $s4, $zero + lu12i.w $a0, 218122 + ori $a0, $a0, 2241 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, -244751 + ori $a0, $a0, 347 + lu32i.d $a0, 0 + movgr2fr.w $fs3, $a0 b .LBB136_22 .p2align 4, , 16 .LBB136_21: # in Loop: Header=BB136_22 Depth=1 @@ -31365,14 +31347,7 @@ _vp_global_look: # @_vp_global_look .Lfunc_end166: .size _vp_global_look, .Lfunc_end166-_vp_global_look # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ve_envelope_init -.LCPI167_0: - .dword 0x405fc00000000000 # double 127 -.LCPI167_1: - .dword 0x400921fb54442d18 # double 3.1415926535897931 - .text - .globl _ve_envelope_init + .globl _ve_envelope_init # -- Begin function _ve_envelope_init .p2align 5 .type _ve_envelope_init,@function _ve_envelope_init: # @_ve_envelope_init @@ -31414,12 +31389,17 @@ _ve_envelope_init: # @_ve_envelope_init ori $a1, $zero, 128 pcaddu18i $ra, %call36(mdct_init) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI167_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI167_0) - pcalau12i $a0, %pc_hi20(.LCPI167_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI167_1) move $s0, $zero move $s2, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -16384 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fs0, $a0 ori $s3, $zero, 512 .p2align 4, , 16 .LBB167_1: # =>This Inner Loop Header: Depth=1 @@ -33185,12 +33165,7 @@ _ve_envelope_mark: # @_ve_envelope_mark .Lfunc_end179: .size _ve_envelope_mark, .Lfunc_end179-_ve_envelope_mark # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _vp_ampmax_decay -.LCPI180_0: - .word 0xc61c3c00 # float -9999 - .text - .globl _vp_ampmax_decay + .globl _vp_ampmax_decay # -- Begin function _vp_ampmax_decay .p2align 5 .type _vp_ampmax_decay,@function _vp_ampmax_decay: # @_vp_ampmax_decay @@ -33202,22 +33177,24 @@ _vp_ampmax_decay: # @_vp_ampmax_decay ldx.d $a0, $a2, $a0 srli.d $a3, $a0, 63 add.d $a0, $a0, $a3 - ld.d $a1, $a1, 8 srli.d $a0, $a0, 1 movgr2fr.w $fa1, $a0 ffint.s.w $fa1, $fa1 - movgr2fr.d $fa2, $a1 - ffint.s.l $fa2, $fa2 - lu12i.w $a0, 1 - ori $a0, $a0, 916 - fldx.s $fa3, $a2, $a0 - pcalau12i $a0, %pc_hi20(.LCPI180_0) - fld.s $fa4, $a0, %pc_lo12(.LCPI180_0) - fdiv.s $fa1, $fa1, $fa2 - fmul.s $fa1, $fa3, $fa1 + ld.d $a0, $a1, 8 + lu12i.w $a1, 1 + ori $a1, $a1, 916 + fldx.s $fa2, $a2, $a1 + movgr2fr.d $fa3, $a0 + ffint.s.l $fa3, $fa3 + fdiv.s $fa1, $fa1, $fa3 + fmul.s $fa1, $fa2, $fa1 fadd.s $fa0, $fa0, $fa1 - fcmp.clt.s $fcc0, $fa0, $fa4 - fsel $fa0, $fa0, $fa4, $fcc0 + lu12i.w $a0, -237117 + ori $a0, $a0, 3072 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 + fcmp.clt.s $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 ret .Lfunc_end180: .size _vp_ampmax_decay, .Lfunc_end180-_vp_ampmax_decay @@ -33739,12 +33716,7 @@ _ilog: # @_ilog .Lfunc_end189: .size _ilog, .Lfunc_end189-_ilog # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _float32_pack -.LCPI190_0: - .dword 0x3fe62e42fefa39ef # double 0.69314718055994529 - .text - .globl _float32_pack + .globl _float32_pack # -- Begin function _float32_pack .p2align 5 .type _float32_pack,@function _float32_pack: # @_float32_pack @@ -33765,8 +33737,11 @@ _float32_pack: # @_float32_pack fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI190_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI190_0) + lu12i.w $a0, -4189 + ori $a0, $a0, 2543 + lu32i.d $a0, 405058 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 @@ -38363,15 +38338,9 @@ vorbis_book_decodevv_add: # @vorbis_book_decodevv_add .Lfunc_end213: .size vorbis_book_decodevv_add, .Lfunc_end213-vorbis_book_decodevv_add # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function mdct_init -.LCPI214_0: - .dword 0x3fe62e42fefa39ef # double 0.69314718055994529 -.LCPI214_1: - .dword 0x400921fb54442d18 # double 3.1415926535897931 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI214_2: + .p2align 4, 0x0 # -- Begin function mdct_init +.LCPI214_0: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -38417,8 +38386,11 @@ mdct_init: # @mdct_init fcvt.d.s $fa0, $fs1 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI214_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI214_0) + lu12i.w $a0, -4189 + ori $a0, $a0, 2543 + lu32i.d $a0, 405058 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 frint.d $fa0, $fa0 ftintrz.w.d $fa0, $fa0 @@ -38432,18 +38404,21 @@ mdct_init: # @mdct_init # %bb.1: # %.lr.ph st.d $a1, $sp, 16 # 8-byte Folded Spill move $s5, $zero - pcalau12i $a0, %pc_hi20(.LCPI214_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI214_1) bstrpick.d $a0, $s1, 31, 0 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 - fdiv.d $fs2, $fa0, $fa1 + fdiv.d $fs2, $fa1, $fa0 slli.d $a0, $s1, 1 bstrpick.d $a1, $a0, 31, 1 slli.d $a1, $a1, 1 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fs3, $fa0, $fa1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + fdiv.d $fs3, $fa1, $fa0 bstrpick.d $s4, $s4, 31, 0 bstrpick.d $a0, $a0, 32, 2 slli.d $s6, $a0, 2 @@ -38537,8 +38512,8 @@ mdct_init: # @mdct_init vreplgr2vr.w $vr0, $a4 vinsgr2vr.w $vr1, $a4, 3 ori $t1, $zero, 7 - pcalau12i $t2, %pc_hi20(.LCPI214_2) - vld $vr2, $t2, %pc_lo12(.LCPI214_2) + pcalau12i $t2, %pc_hi20(.LCPI214_0) + vld $vr2, $t2, %pc_lo12(.LCPI214_0) addi.d $t2, $zero, -2 vrepli.b $vr3, 0 vrepli.w $vr4, 1 @@ -39011,14 +38986,6 @@ mdct_backward: # @mdct_backward .word 1 # 0x1 .word 2 # 0x2 .word 4 # 0x4 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI216_2: - .word 0x3f6c835e # float 0.923879504 -.LCPI216_3: - .word 0x3ec3ef15 # float 0.382683426 -.LCPI216_4: - .word 0x3f3504f3 # float 0.707106769 .text .p2align 5 .type mdct_butterflies,@function @@ -39156,67 +39123,67 @@ mdct_butterflies: # @mdct_butterflies blez $a3, .LBB216_12 # %bb.5: # %.lr.ph5.preheader ori $a0, $zero, 97 + lu12i.w $a5, 259784 + lu12i.w $a4, 257086 + lu12i.w $a1, 258896 bgeu $a3, $a0, .LBB216_7 # %bb.6: move $a0, $zero b .LBB216_10 .LBB216_7: # %vector.ph - move $a1, $zero + move $a6, $zero addi.d $a0, $a3, -1 srli.d $a0, $a0, 5 - addi.d $a4, $a0, 1 - bstrpick.d $a0, $a4, 59, 2 - lu12i.w $a5, 259784 - ori $a5, $a5, 862 - vreplgr2vr.w $vr0, $a5 + addi.d $a7, $a0, 1 + bstrpick.d $a0, $a7, 59, 2 + ori $t0, $a5, 862 + vreplgr2vr.w $vr0, $t0 vst $vr0, $sp, 80 # 16-byte Folded Spill - lu12i.w $a5, 257086 - ori $a5, $a5, 3861 - vreplgr2vr.w $vr0, $a5 + ori $t0, $a4, 3861 + vreplgr2vr.w $vr0, $t0 vst $vr0, $sp, 64 # 16-byte Folded Spill - pcalau12i $a5, %pc_hi20(.LCPI216_0) - vld $vr0, $a5, %pc_lo12(.LCPI216_0) + pcalau12i $t0, %pc_hi20(.LCPI216_0) + vld $vr0, $t0, %pc_lo12(.LCPI216_0) vst $vr0, $sp, 48 # 16-byte Folded Spill - pcalau12i $a5, %pc_hi20(.LCPI216_1) - vld $vr0, $a5, %pc_lo12(.LCPI216_1) + pcalau12i $t0, %pc_hi20(.LCPI216_1) + vld $vr0, $t0, %pc_lo12(.LCPI216_1) vst $vr0, $sp, 32 # 16-byte Folded Spill - lu12i.w $a5, 258896 - ori $a5, $a5, 1267 - vreplgr2vr.w $vr0, $a5 + ori $t0, $a1, 1267 + vreplgr2vr.w $vr0, $t0 vst $vr0, $sp, 16 # 16-byte Folded Spill - slli.d $a5, $a0, 2 + slli.d $t0, $a0, 2 slli.d $a0, $a0, 7 .p2align 4, , 16 .LBB216_8: # %vector.body # =>This Inner Loop Header: Depth=1 - slli.d $a7, $a1, 7 - add.d $a6, $a2, $a7 - fld.s $fa0, $a6, 120 - fld.s $fa2, $a6, 248 - fld.s $fa5, $a6, 376 - fld.s $fa6, $a6, 504 + slli.d $t2, $a6, 7 + add.d $t1, $a2, $t2 + fld.s $fa0, $t1, 120 + fld.s $fa2, $t1, 248 + fld.s $fa5, $t1, 376 + fld.s $fa6, $t1, 504 vextrins.w $vr0, $vr2, 16 vextrins.w $vr0, $vr5, 32 - fld.s $fa2, $a6, 56 - fld.s $fa5, $a6, 184 - fld.s $fa7, $a6, 312 - fld.s $ft0, $a6, 440 + fld.s $fa2, $t1, 56 + fld.s $fa5, $t1, 184 + fld.s $fa7, $t1, 312 + fld.s $ft0, $t1, 440 vextrins.w $vr0, $vr6, 48 vextrins.w $vr2, $vr5, 16 vextrins.w $vr2, $vr7, 32 vextrins.w $vr2, $vr8, 48 - fld.s $fa5, $a6, 124 - fld.s $fa6, $a6, 252 - fld.s $fa7, $a6, 380 + fld.s $fa5, $t1, 124 + fld.s $fa6, $t1, 252 + fld.s $fa7, $t1, 380 vfsub.s $vr1, $vr0, $vr2 vst $vr1, $sp, 128 # 16-byte Folded Spill - fld.s $ft0, $a6, 508 + fld.s $ft0, $t1, 508 vextrins.w $vr5, $vr6, 16 vextrins.w $vr5, $vr7, 32 - fld.s $fa6, $a6, 60 - fld.s $fa7, $a6, 188 - fld.s $ft2, $a6, 316 - fld.s $ft3, $a6, 444 + fld.s $fa6, $t1, 60 + fld.s $fa7, $t1, 188 + fld.s $ft2, $t1, 316 + fld.s $ft3, $t1, 444 vextrins.w $vr5, $vr8, 48 vextrins.w $vr6, $vr7, 16 vextrins.w $vr6, $vr10, 32 @@ -39224,33 +39191,33 @@ mdct_butterflies: # @mdct_butterflies vfsub.s $vr12, $vr5, $vr6 vfadd.s $vr0, $vr0, $vr2 vst $vr0, $sp, 192 # 16-byte Folded Spill - fld.s $fa0, $a6, 112 - fld.s $fa2, $a6, 240 - fld.s $fa7, $a6, 368 + fld.s $fa0, $t1, 112 + fld.s $fa2, $t1, 240 + fld.s $fa7, $t1, 368 vfadd.s $vr1, $vr5, $vr6 vst $vr1, $sp, 176 # 16-byte Folded Spill - fld.s $fa5, $a6, 496 + fld.s $fa5, $t1, 496 vextrins.w $vr0, $vr2, 16 vextrins.w $vr0, $vr7, 32 - fld.s $fa2, $a6, 48 - fld.s $fa6, $a6, 176 - fld.s $fa7, $a6, 304 - fld.s $ft0, $a6, 432 + fld.s $fa2, $t1, 48 + fld.s $fa6, $t1, 176 + fld.s $fa7, $t1, 304 + fld.s $ft0, $t1, 432 vextrins.w $vr0, $vr5, 48 vextrins.w $vr2, $vr6, 16 vextrins.w $vr2, $vr7, 32 vextrins.w $vr2, $vr8, 48 - fld.s $fa5, $a6, 116 - fld.s $fa6, $a6, 244 - fld.s $fa7, $a6, 372 + fld.s $fa5, $t1, 116 + fld.s $fa6, $t1, 244 + fld.s $fa7, $t1, 372 vfsub.s $vr8, $vr0, $vr2 - fld.s $ft2, $a6, 500 + fld.s $ft2, $t1, 500 vextrins.w $vr5, $vr6, 16 vextrins.w $vr5, $vr7, 32 - fld.s $fa6, $a6, 52 - fld.s $fa7, $a6, 180 - fld.s $ft3, $a6, 308 - fld.s $ft5, $a6, 436 + fld.s $fa6, $t1, 52 + fld.s $fa7, $t1, 180 + fld.s $ft3, $t1, 308 + fld.s $ft5, $t1, 436 vextrins.w $vr5, $vr10, 48 vextrins.w $vr6, $vr7, 16 vextrins.w $vr6, $vr11, 32 @@ -39267,32 +39234,32 @@ mdct_butterflies: # @mdct_butterflies vfsub.s $vr15, $vr0, $vr2 vfmul.s $vr0, $vr8, $vr7 vfmul.s $vr2, $vr10, $vr19 - fld.s $fa5, $a6, 104 - fld.s $fa6, $a6, 232 - fld.s $ft0, $a6, 360 + fld.s $fa5, $t1, 104 + fld.s $fa6, $t1, 232 + fld.s $ft0, $t1, 360 vfadd.s $vr25, $vr0, $vr2 - fld.s $fa0, $a6, 488 + fld.s $fa0, $t1, 488 vextrins.w $vr5, $vr6, 16 vextrins.w $vr5, $vr8, 32 - fld.s $fa2, $a6, 40 - fld.s $fa6, $a6, 168 - fld.s $ft0, $a6, 296 - fld.s $ft2, $a6, 424 + fld.s $fa2, $t1, 40 + fld.s $fa6, $t1, 168 + fld.s $ft0, $t1, 296 + fld.s $ft2, $t1, 424 vextrins.w $vr5, $vr0, 48 vextrins.w $vr2, $vr6, 16 vextrins.w $vr2, $vr8, 32 vextrins.w $vr2, $vr10, 48 - fld.s $fa0, $a6, 108 - fld.s $fa6, $a6, 236 - fld.s $ft0, $a6, 364 + fld.s $fa0, $t1, 108 + fld.s $fa6, $t1, 236 + fld.s $ft0, $t1, 364 vfsub.s $vr13, $vr5, $vr2 - fld.s $ft2, $a6, 492 + fld.s $ft2, $t1, 492 vextrins.w $vr0, $vr6, 16 vextrins.w $vr0, $vr8, 32 - fld.s $fa6, $a6, 44 - fld.s $ft0, $a6, 172 - fld.s $ft3, $a6, 300 - fld.s $ft6, $a6, 428 + fld.s $fa6, $t1, 44 + fld.s $ft0, $t1, 172 + fld.s $ft3, $t1, 300 + fld.s $ft6, $t1, 428 vextrins.w $vr0, $vr10, 48 vextrins.w $vr6, $vr8, 16 vextrins.w $vr6, $vr11, 32 @@ -39303,32 +39270,32 @@ mdct_butterflies: # @mdct_butterflies vfadd.s $vr0, $vr0, $vr6 vst $vr0, $sp, 96 # 16-byte Folded Spill vfsub.s $vr11, $vr13, $vr8 - fld.s $fa0, $a6, 96 - fld.s $fa2, $a6, 224 - fld.s $fa5, $a6, 352 + fld.s $fa0, $t1, 96 + fld.s $fa2, $t1, 224 + fld.s $fa5, $t1, 352 vfadd.s $vr10, $vr13, $vr8 - fld.s $ft0, $a6, 480 + fld.s $ft0, $t1, 480 vextrins.w $vr0, $vr2, 16 vextrins.w $vr0, $vr5, 32 - fld.s $fa2, $a6, 32 - fld.s $fa5, $a6, 160 - fld.s $ft5, $a6, 288 - fld.s $ft6, $a6, 416 + fld.s $fa2, $t1, 32 + fld.s $fa5, $t1, 160 + fld.s $ft5, $t1, 288 + fld.s $ft6, $t1, 416 vextrins.w $vr0, $vr8, 48 vextrins.w $vr2, $vr5, 16 vextrins.w $vr2, $vr13, 32 vextrins.w $vr2, $vr14, 48 - fld.s $fa5, $a6, 100 - fld.s $ft0, $a6, 228 - fld.s $ft5, $a6, 356 + fld.s $fa5, $t1, 100 + fld.s $ft0, $t1, 228 + fld.s $ft5, $t1, 356 vfsub.s $vr17, $vr0, $vr2 - fld.s $ft6, $a6, 484 + fld.s $ft6, $t1, 484 vextrins.w $vr5, $vr8, 16 vextrins.w $vr5, $vr13, 32 - fld.s $ft0, $a6, 36 - fld.s $ft5, $a6, 164 - fld.s $ft10, $a6, 292 - fld.s $ft13, $a6, 420 + fld.s $ft0, $t1, 36 + fld.s $ft5, $t1, 164 + fld.s $ft10, $t1, 292 + fld.s $ft13, $t1, 420 vextrins.w $vr5, $vr14, 48 vextrins.w $vr8, $vr13, 16 vextrins.w $vr8, $vr18, 32 @@ -39341,64 +39308,64 @@ mdct_butterflies: # @mdct_butterflies vfsub.s $vr23, $vr0, $vr2 vfmul.s $vr0, $vr18, $vr7 vfmul.s $vr2, $vr17, $vr19 - fld.s $fa5, $a6, 88 - fld.s $ft0, $a6, 216 - fld.s $ft9, $a6, 344 + fld.s $fa5, $t1, 88 + fld.s $ft0, $t1, 216 + fld.s $ft9, $t1, 344 vfadd.s $vr24, $vr2, $vr0 - fld.s $fa0, $a6, 472 + fld.s $fa0, $t1, 472 vextrins.w $vr5, $vr8, 16 vextrins.w $vr5, $vr17, 32 - fld.s $fa2, $a6, 24 - fld.s $ft0, $a6, 152 - fld.s $ft9, $a6, 280 - fld.s $ft10, $a6, 408 + fld.s $fa2, $t1, 24 + fld.s $ft0, $t1, 152 + fld.s $ft9, $t1, 280 + fld.s $ft10, $t1, 408 vextrins.w $vr5, $vr0, 48 vextrins.w $vr2, $vr8, 16 vextrins.w $vr2, $vr17, 32 vextrins.w $vr2, $vr18, 48 - fld.s $fa0, $a6, 28 - fld.s $ft0, $a6, 156 - fld.s $ft9, $a6, 284 + fld.s $fa0, $t1, 28 + fld.s $ft0, $t1, 156 + fld.s $ft9, $t1, 284 vfsub.s $vr6, $vr5, $vr2 - fld.s $ft10, $a6, 412 + fld.s $ft10, $t1, 412 vextrins.w $vr0, $vr8, 16 vextrins.w $vr0, $vr17, 32 - fld.s $ft0, $a6, 92 - fld.s $ft9, $a6, 220 - fld.s $ft13, $a6, 348 - fld.s $ft14, $a6, 476 + fld.s $ft0, $t1, 92 + fld.s $ft9, $t1, 220 + fld.s $ft13, $t1, 348 + fld.s $ft14, $t1, 476 vextrins.w $vr0, $vr18, 48 vextrins.w $vr8, $vr17, 16 vextrins.w $vr8, $vr21, 32 vextrins.w $vr8, $vr22, 48 vfsub.s $vr28, $vr0, $vr8 vfadd.s $vr17, $vr5, $vr2 - fld.s $fa2, $a6, 16 - fld.s $fa5, $a6, 144 - fld.s $ft13, $a6, 272 + fld.s $fa2, $t1, 16 + fld.s $fa5, $t1, 144 + fld.s $ft13, $t1, 272 vfadd.s $vr18, $vr0, $vr8 - fld.s $fa0, $a6, 400 + fld.s $fa0, $t1, 400 vextrins.w $vr2, $vr5, 16 vextrins.w $vr2, $vr21, 32 - fld.s $fa5, $a6, 80 - fld.s $ft0, $a6, 208 - fld.s $ft13, $a6, 336 - fld.s $ft14, $a6, 464 + fld.s $fa5, $t1, 80 + fld.s $ft0, $t1, 208 + fld.s $ft13, $t1, 336 + fld.s $ft14, $t1, 464 vextrins.w $vr2, $vr0, 48 vextrins.w $vr5, $vr8, 16 vextrins.w $vr5, $vr21, 32 vextrins.w $vr5, $vr22, 48 - fld.s $fa0, $a6, 20 - fld.s $ft0, $a6, 148 - fld.s $ft13, $a6, 276 + fld.s $fa0, $t1, 20 + fld.s $ft0, $t1, 148 + fld.s $ft13, $t1, 276 vfsub.s $vr26, $vr2, $vr5 - fld.s $ft14, $a6, 404 + fld.s $ft14, $t1, 404 vextrins.w $vr0, $vr8, 16 vextrins.w $vr0, $vr21, 32 - fld.s $ft0, $a6, 84 - fld.s $ft13, $a6, 212 - fld.s $fs3, $a6, 340 - fld.s $fs5, $a6, 468 + fld.s $ft0, $t1, 84 + fld.s $ft13, $t1, 212 + fld.s $fs3, $t1, 340 + fld.s $fs5, $t1, 468 vextrins.w $vr0, $vr22, 48 vextrins.w $vr8, $vr21, 16 vextrins.w $vr8, $vr27, 32 @@ -39411,32 +39378,32 @@ mdct_butterflies: # @mdct_butterflies vfadd.s $vr31, $vr2, $vr0 vfmul.s $vr0, $vr27, $vr7 vfmul.s $vr2, $vr26, $vr19 - fld.s $fa5, $a6, 8 - fld.s $ft0, $a6, 136 - fld.s $fs2, $a6, 264 + fld.s $fa5, $t1, 8 + fld.s $ft0, $t1, 136 + fld.s $fs2, $t1, 264 vfsub.s $vr2, $vr0, $vr2 - fld.s $fa0, $a6, 392 + fld.s $fa0, $t1, 392 vextrins.w $vr5, $vr8, 16 vextrins.w $vr5, $vr26, 32 - fld.s $ft0, $a6, 72 - fld.s $fs2, $a6, 200 - fld.s $fs3, $a6, 328 + fld.s $ft0, $t1, 72 + fld.s $fs2, $t1, 200 + fld.s $fs3, $t1, 328 vextrins.w $vr5, $vr0, 48 - fld.s $fa0, $a6, 456 + fld.s $fa0, $t1, 456 vextrins.w $vr8, $vr26, 16 vextrins.w $vr8, $vr27, 32 - fld.s $fs3, $a6, 12 - fld.s $fs2, $a6, 140 - fld.s $fs5, $a6, 268 - fld.s $fs6, $a6, 396 + fld.s $fs3, $t1, 12 + fld.s $fs2, $t1, 140 + fld.s $fs5, $t1, 268 + fld.s $fs6, $t1, 396 vextrins.w $vr8, $vr0, 48 vextrins.w $vr27, $vr26, 16 vextrins.w $vr27, $vr29, 32 vextrins.w $vr27, $vr30, 48 - fld.s $fa0, $a6, 76 - fld.s $fs2, $a6, 204 - fld.s $fs5, $a6, 332 - fld.s $fs6, $a6, 460 + fld.s $fa0, $t1, 76 + fld.s $fs2, $t1, 204 + fld.s $fs5, $t1, 332 + fld.s $fs6, $t1, 460 vfsub.s $vr20, $vr5, $vr8 vextrins.w $vr0, $vr26, 16 vextrins.w $vr0, $vr29, 32 @@ -39445,31 +39412,31 @@ mdct_butterflies: # @mdct_butterflies vfadd.s $vr26, $vr5, $vr8 vfadd.s $vr27, $vr27, $vr0 vfadd.s $vr5, $vr20, $vr29 - fldx.s $fa0, $a2, $a7 - fld.s $fs6, $a6, 128 + fldx.s $fa0, $a2, $t2 + fld.s $fs6, $t1, 128 vfsub.s $vr8, $vr29, $vr20 - fld.s $ft12, $a6, 256 - fld.s $fs5, $a6, 384 + fld.s $ft12, $t1, 256 + fld.s $fs5, $t1, 384 vextrins.w $vr0, $vr30, 16 - fld.s $fs6, $a6, 64 - fld.s $ft8, $a6, 192 + fld.s $fs6, $t1, 64 + fld.s $ft8, $t1, 192 vextrins.w $vr0, $vr20, 32 vextrins.w $vr0, $vr29, 48 - fld.s $ft12, $a6, 320 + fld.s $ft12, $t1, 320 vextrins.w $vr30, $vr16, 16 - fld.s $ft8, $a6, 448 - fld.s $fa1, $a6, 4 - fld.s $fs5, $a6, 132 - fld.s $fa3, $a6, 260 + fld.s $ft8, $t1, 448 + fld.s $fa1, $t1, 4 + fld.s $fs5, $t1, 132 + fld.s $fa3, $t1, 260 vextrins.w $vr30, $vr20, 32 vextrins.w $vr30, $vr16, 48 vextrins.w $vr1, $vr29, 16 vextrins.w $vr1, $vr3, 32 - fld.s $fa3, $a6, 388 - fld.s $ft8, $a6, 68 - fld.s $ft12, $a6, 196 - fld.s $fs5, $a6, 324 - fld.s $fa4, $a6, 452 + fld.s $fa3, $t1, 388 + fld.s $ft8, $t1, 68 + fld.s $ft12, $t1, 196 + fld.s $fs5, $t1, 324 + fld.s $fa4, $t1, 452 vextrins.w $vr1, $vr3, 48 vextrins.w $vr16, $vr20, 16 vextrins.w $vr16, $vr29, 32 @@ -39527,17 +39494,17 @@ mdct_butterflies: # @mdct_butterflies vilvl.w $vr20, $vr31, $vr7 vpackod.w $vr10, $vr12, $vr6 vshuf4i.d $vr10, $vr20, 12 - vst $vr10, $a6, 144 + vst $vr10, $t1, 144 vpackev.w $vr10, $vr31, $vr7 vilvh.w $vr20, $vr12, $vr6 vshuf4i.d $vr20, $vr10, 12 - vst $vr20, $a6, 272 + vst $vr20, $t1, 272 vld $vr25, $sp, 48 # 16-byte Folded Reload vori.b $vr10, $vr25, 0 vshuf.w $vr10, $vr12, $vr6 vpackod.w $vr20, $vr31, $vr7 vshuf4i.d $vr10, $vr20, 12 - vst $vr10, $a6, 400 + vst $vr10, $t1, 400 vfsub.s $vr1, $vr4, $vr1 vfsub.s $vr4, $vr28, $vr16 vfadd.s $vr10, $vr8, $vr1 @@ -39547,7 +39514,7 @@ mdct_butterflies: # @mdct_butterflies vilvl.w $vr4, $vr8, $vr1 vpackod.w $vr16, $vr3, $vr10 vshuf4i.d $vr16, $vr4, 12 - vst $vr16, $a6, 128 + vst $vr16, $t1, 128 vpackev.w $vr4, $vr12, $vr6 vpackev.d $vr4, $vr7, $vr4 vld $vr20, $sp, 32 # 16-byte Folded Reload @@ -39556,7 +39523,7 @@ mdct_butterflies: # @mdct_butterflies vpackev.w $vr4, $vr8, $vr1 vilvh.w $vr7, $vr3, $vr10 vshuf4i.d $vr7, $vr4, 12 - vst $vr7, $a6, 256 + vst $vr7, $t1, 256 vshuf.w $vr25, $vr3, $vr10 vpackev.w $vr3, $vr3, $vr10 vpackev.d $vr3, $vr1, $vr3 @@ -39564,9 +39531,9 @@ mdct_butterflies: # @mdct_butterflies vshuf.w $vr7, $vr8, $vr3 vpackod.w $vr1, $vr8, $vr1 vshuf4i.d $vr25, $vr1, 12 - vst $vr25, $a6, 384 - vstx $vr7, $a2, $a7 - vst $vr6, $a6, 16 + vst $vr25, $t1, 384 + vstx $vr7, $a2, $t2 + vst $vr6, $t1, 16 vfadd.s $vr1, $vr5, $vr24 vfsub.s $vr3, $vr5, $vr24 vfadd.s $vr4, $vr15, $vr0 @@ -39581,42 +39548,42 @@ mdct_butterflies: # @mdct_butterflies vfadd.s $vr7, $vr11, $vr19 vfadd.s $vr8, $vr4, $vr0 vfsub.s $vr0, $vr4, $vr0 - vstelm.w $vr6, $a6, 160, 1 - vstelm.w $vr6, $a6, 288, 2 - vstelm.w $vr6, $a6, 416, 3 - vstelm.w $vr3, $a6, 168, 1 - vstelm.w $vr3, $a6, 296, 2 - vstelm.w $vr3, $a6, 424, 3 + vstelm.w $vr6, $t1, 160, 1 + vstelm.w $vr6, $t1, 288, 2 + vstelm.w $vr6, $t1, 416, 3 + vstelm.w $vr3, $t1, 168, 1 + vstelm.w $vr3, $t1, 296, 2 + vstelm.w $vr3, $t1, 424, 3 vpackev.w $vr4, $vr0, $vr6 vpackev.d $vr3, $vr3, $vr4 vori.b $vr4, $vr20, 0 vshuf.w $vr4, $vr8, $vr3 - vstelm.w $vr8, $a6, 172, 1 - vstelm.w $vr8, $a6, 300, 2 - vstelm.w $vr8, $a6, 428, 3 - vst $vr4, $a6, 32 - vstelm.w $vr0, $a6, 164, 1 - vstelm.w $vr0, $a6, 292, 2 - vstelm.w $vr0, $a6, 420, 3 + vstelm.w $vr8, $t1, 172, 1 + vstelm.w $vr8, $t1, 300, 2 + vstelm.w $vr8, $t1, 428, 3 + vst $vr4, $t1, 32 + vstelm.w $vr0, $t1, 164, 1 + vstelm.w $vr0, $t1, 292, 2 + vstelm.w $vr0, $t1, 420, 3 vfadd.s $vr0, $vr7, $vr2 vfsub.s $vr2, $vr7, $vr2 - vstelm.w $vr5, $a6, 184, 1 - vstelm.w $vr5, $a6, 312, 2 - vstelm.w $vr5, $a6, 440, 3 - vstelm.w $vr1, $a6, 176, 1 - vstelm.w $vr1, $a6, 304, 2 - vstelm.w $vr1, $a6, 432, 3 + vstelm.w $vr5, $t1, 184, 1 + vstelm.w $vr5, $t1, 312, 2 + vstelm.w $vr5, $t1, 440, 3 + vstelm.w $vr1, $t1, 176, 1 + vstelm.w $vr1, $t1, 304, 2 + vstelm.w $vr1, $t1, 432, 3 vpackev.w $vr1, $vr2, $vr1 vpackev.d $vr1, $vr5, $vr1 vori.b $vr3, $vr20, 0 vshuf.w $vr3, $vr0, $vr1 - vstelm.w $vr0, $a6, 188, 1 - vstelm.w $vr0, $a6, 316, 2 - vstelm.w $vr0, $a6, 444, 3 - vst $vr3, $a6, 48 - vstelm.w $vr2, $a6, 180, 1 - vstelm.w $vr2, $a6, 308, 2 - vstelm.w $vr2, $a6, 436, 3 + vstelm.w $vr0, $t1, 188, 1 + vstelm.w $vr0, $t1, 316, 2 + vstelm.w $vr0, $t1, 444, 3 + vst $vr3, $t1, 48 + vstelm.w $vr2, $t1, 180, 1 + vstelm.w $vr2, $t1, 308, 2 + vstelm.w $vr2, $t1, 436, 3 vfsub.s $vr0, $vr30, $vr14 vfsub.s $vr1, $vr29, $vr13 vfadd.s $vr12, $vr13, $vr29 @@ -39663,12 +39630,12 @@ mdct_butterflies: # @mdct_butterflies vextrins.w $vr17, $vr16, 17 vextrins.w $vr17, $vr1, 33 vextrins.w $vr17, $vr3, 49 - vst $vr17, $a6, 192 + vst $vr17, $t1, 192 vreplvei.w $vr17, $vr18, 2 vextrins.w $vr17, $vr16, 18 vextrins.w $vr17, $vr1, 34 vextrins.w $vr17, $vr3, 50 - vst $vr17, $a6, 320 + vst $vr17, $t1, 320 vreplvei.w $vr17, $vr18, 3 vextrins.w $vr17, $vr16, 19 vpackev.w $vr16, $vr16, $vr18 @@ -39677,29 +39644,29 @@ mdct_butterflies: # @mdct_butterflies vori.b $vr16, $vr20, 0 vshuf.w $vr16, $vr3, $vr1 vextrins.w $vr17, $vr3, 51 - vst $vr17, $a6, 448 + vst $vr17, $t1, 448 vfadd.s $vr1, $vr4, $vr8 vfadd.s $vr3, $vr15, $vr13 - vst $vr16, $a6, 64 + vst $vr16, $t1, 64 vfadd.s $vr4, $vr3, $vr1 vfsub.s $vr1, $vr3, $vr1 - vstelm.w $vr11, $a6, 216, 1 - vstelm.w $vr11, $a6, 344, 2 - vstelm.w $vr11, $a6, 472, 3 - vstelm.w $vr14, $a6, 208, 1 - vstelm.w $vr14, $a6, 336, 2 - vstelm.w $vr14, $a6, 464, 3 + vstelm.w $vr11, $t1, 216, 1 + vstelm.w $vr11, $t1, 344, 2 + vstelm.w $vr11, $t1, 472, 3 + vstelm.w $vr14, $t1, 208, 1 + vstelm.w $vr14, $t1, 336, 2 + vstelm.w $vr14, $t1, 464, 3 vpackev.w $vr3, $vr1, $vr14 vpackev.d $vr3, $vr11, $vr3 vori.b $vr8, $vr20, 0 vshuf.w $vr8, $vr4, $vr3 - vstelm.w $vr4, $a6, 220, 1 - vstelm.w $vr4, $a6, 348, 2 - vstelm.w $vr4, $a6, 476, 3 - vst $vr8, $a6, 80 - vstelm.w $vr1, $a6, 212, 1 - vstelm.w $vr1, $a6, 340, 2 - vstelm.w $vr1, $a6, 468, 3 + vstelm.w $vr4, $t1, 220, 1 + vstelm.w $vr4, $t1, 348, 2 + vstelm.w $vr4, $t1, 476, 3 + vst $vr8, $t1, 80 + vstelm.w $vr1, $t1, 212, 1 + vstelm.w $vr1, $t1, 340, 2 + vstelm.w $vr1, $t1, 468, 3 vfadd.s $vr1, $vr5, $vr10 vfsub.s $vr3, $vr5, $vr10 vfadd.s $vr4, $vr7, $vr12 @@ -39714,71 +39681,71 @@ mdct_butterflies: # @mdct_butterflies vfadd.s $vr4, $vr6, $vr19 vfadd.s $vr6, $vr2, $vr5 vfsub.s $vr2, $vr2, $vr5 - vstelm.w $vr8, $a6, 224, 1 - vstelm.w $vr8, $a6, 352, 2 - vstelm.w $vr8, $a6, 480, 3 - vstelm.w $vr3, $a6, 232, 1 - vstelm.w $vr3, $a6, 360, 2 - vstelm.w $vr3, $a6, 488, 3 + vstelm.w $vr8, $t1, 224, 1 + vstelm.w $vr8, $t1, 352, 2 + vstelm.w $vr8, $t1, 480, 3 + vstelm.w $vr3, $t1, 232, 1 + vstelm.w $vr3, $t1, 360, 2 + vstelm.w $vr3, $t1, 488, 3 vpackev.w $vr5, $vr2, $vr8 vpackev.d $vr3, $vr3, $vr5 vori.b $vr5, $vr20, 0 vshuf.w $vr5, $vr6, $vr3 - vstelm.w $vr6, $a6, 236, 1 - vstelm.w $vr6, $a6, 364, 2 - vstelm.w $vr6, $a6, 492, 3 - vst $vr5, $a6, 96 - vstelm.w $vr2, $a6, 228, 1 - vstelm.w $vr2, $a6, 356, 2 - vstelm.w $vr2, $a6, 484, 3 + vstelm.w $vr6, $t1, 236, 1 + vstelm.w $vr6, $t1, 364, 2 + vstelm.w $vr6, $t1, 492, 3 + vst $vr5, $t1, 96 + vstelm.w $vr2, $t1, 228, 1 + vstelm.w $vr2, $t1, 356, 2 + vstelm.w $vr2, $t1, 484, 3 vfadd.s $vr2, $vr4, $vr0 vfsub.s $vr0, $vr4, $vr0 - vstelm.w $vr7, $a6, 248, 1 - vstelm.w $vr7, $a6, 376, 2 - vstelm.w $vr7, $a6, 504, 3 - vstelm.w $vr1, $a6, 240, 1 - vstelm.w $vr1, $a6, 368, 2 - vstelm.w $vr1, $a6, 496, 3 + vstelm.w $vr7, $t1, 248, 1 + vstelm.w $vr7, $t1, 376, 2 + vstelm.w $vr7, $t1, 504, 3 + vstelm.w $vr1, $t1, 240, 1 + vstelm.w $vr1, $t1, 368, 2 + vstelm.w $vr1, $t1, 496, 3 vpackev.w $vr1, $vr0, $vr1 vpackev.d $vr1, $vr7, $vr1 vori.b $vr3, $vr20, 0 vshuf.w $vr3, $vr2, $vr1 - vstelm.w $vr2, $a6, 252, 1 - vstelm.w $vr2, $a6, 380, 2 - vstelm.w $vr2, $a6, 508, 3 - vst $vr3, $a6, 112 - vstelm.w $vr0, $a6, 244, 1 - vstelm.w $vr0, $a6, 372, 2 - addi.d $a1, $a1, 4 - vstelm.w $vr0, $a6, 500, 3 - bne $a1, $a5, .LBB216_8 + vstelm.w $vr2, $t1, 252, 1 + vstelm.w $vr2, $t1, 380, 2 + vstelm.w $vr2, $t1, 508, 3 + vst $vr3, $t1, 112 + vstelm.w $vr0, $t1, 244, 1 + vstelm.w $vr0, $t1, 372, 2 + addi.d $a6, $a6, 4 + vstelm.w $vr0, $t1, 500, 3 + bne $a6, $t0, .LBB216_8 # %bb.9: # %middle.block - beq $a4, $a5, .LBB216_12 + beq $a7, $t0, .LBB216_12 .LBB216_10: # %.lr.ph5.preheader12 - pcalau12i $a1, %pc_hi20(.LCPI216_2) - fld.s $fa0, $a1, %pc_lo12(.LCPI216_2) + alsl.d $a2, $a0, $a2, 2 + addi.d $a2, $a2, 64 + ori $a5, $a5, 862 + movgr2fr.w $fa0, $a5 fst.s $fa0, $sp, 112 # 4-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI216_3) - fld.s $fa1, $a1, %pc_lo12(.LCPI216_3) - pcalau12i $a1, %pc_hi20(.LCPI216_4) - fld.s $fa2, $a1, %pc_lo12(.LCPI216_4) - alsl.d $a1, $a0, $a2, 2 - addi.d $a1, $a1, 64 + ori $a4, $a4, 3861 + movgr2fr.w $fa1, $a4 + ori $a1, $a1, 1267 + movgr2fr.w $fa2, $a1 .p2align 4, , 16 .LBB216_11: # %.lr.ph5 # =>This Inner Loop Header: Depth=1 - fld.s $fa3, $a1, 56 - fld.s $fa4, $a1, -8 - fld.s $fa5, $a1, 60 - fld.s $fa6, $a1, -4 + fld.s $fa3, $a2, 56 + fld.s $fa4, $a2, -8 + fld.s $fa5, $a2, 60 + fld.s $fa6, $a2, -4 fsub.s $fa0, $fa3, $fa4 fst.s $fa0, $sp, 128 # 4-byte Folded Spill fsub.s $fa0, $fa5, $fa6 fst.s $fa0, $sp, 144 # 4-byte Folded Spill - fld.s $fa7, $a1, 48 - fld.s $ft0, $a1, -16 - fld.s $ft1, $a1, 52 - fld.s $ft2, $a1, -12 + fld.s $fa7, $a2, 48 + fld.s $ft0, $a2, -16 + fld.s $ft1, $a2, 52 + fld.s $ft2, $a2, -12 fadd.s $fa0, $fa3, $fa4 fst.s $fa0, $sp, 176 # 4-byte Folded Spill fadd.s $fa0, $fa5, $fa6 @@ -39788,165 +39755,165 @@ mdct_butterflies: # @mdct_butterflies fadd.s $fa6, $fa7, $ft0 fadd.s $fa0, $ft1, $ft2 fst.s $fa0, $sp, 160 # 4-byte Folded Spill - fld.s $ft11, $sp, 112 # 4-byte Folded Reload - fmul.s $fa7, $ft3, $ft11 + fld.s $fs2, $sp, 112 # 4-byte Folded Reload + fmul.s $fa7, $ft3, $fs2 fmul.s $ft0, $ft4, $fa1 - fsub.s $fa3, $fa7, $ft0 + fsub.s $ft11, $fa7, $ft0 fmul.s $fa7, $ft3, $fa1 - fld.s $ft0, $a1, 40 - fld.s $ft1, $a1, -24 - fld.s $ft2, $a1, 44 - fld.s $ft3, $a1, -20 - fmul.s $ft4, $ft4, $ft11 + fld.s $ft0, $a2, 40 + fld.s $ft1, $a2, -24 + fld.s $ft2, $a2, 44 + fld.s $ft3, $a2, -20 + fmul.s $ft4, $ft4, $fs2 fadd.s $ft12, $fa7, $ft4 fsub.s $ft4, $ft0, $ft1 fsub.s $ft6, $ft2, $ft3 fadd.s $ft0, $ft0, $ft1 fadd.s $fa7, $ft2, $ft3 - fld.s $ft1, $a1, 32 - fld.s $ft2, $a1, -32 - fld.s $ft3, $a1, 36 - fld.s $ft8, $a1, -28 - fsub.s $fa4, $ft4, $ft6 - fadd.s $fa5, $ft4, $ft6 + fld.s $ft1, $a2, 32 + fld.s $ft2, $a2, -32 + fld.s $ft3, $a2, 36 + fld.s $ft8, $a2, -28 + fsub.s $fa3, $ft4, $ft6 + fadd.s $fa4, $ft4, $ft6 fsub.s $ft4, $ft1, $ft2 fsub.s $ft6, $ft3, $ft8 fadd.s $ft2, $ft1, $ft2 fadd.s $ft1, $ft3, $ft8 fmul.s $ft3, $ft4, $fa1 - fmul.s $ft8, $ft6, $ft11 + fmul.s $ft8, $ft6, $fs2 fsub.s $fs3, $ft3, $ft8 fmul.s $ft3, $ft6, $fa1 - fld.s $ft6, $a1, 24 - fld.s $ft8, $a1, -40 - fld.s $ft9, $a1, -36 - fld.s $ft10, $a1, 28 - fmul.s $ft4, $ft4, $ft11 + fld.s $ft6, $a2, 24 + fld.s $ft8, $a2, -40 + fld.s $ft9, $a2, -36 + fld.s $ft10, $a2, 28 + fmul.s $ft4, $ft4, $fs2 fadd.s $fs4, $ft4, $ft3 fsub.s $fs0, $ft6, $ft8 fsub.s $fs1, $ft9, $ft10 - fld.s $ft13, $a1, -48 - fld.s $ft14, $a1, 16 - fld.s $fs5, $a1, -44 - fld.s $fs6, $a1, 20 + fld.s $ft13, $a2, -48 + fld.s $ft14, $a2, 16 + fld.s $fs5, $a2, -44 + fld.s $fs6, $a2, 20 fadd.s $ft4, $ft6, $ft8 fadd.s $ft3, $ft9, $ft10 fsub.s $ft9, $ft13, $ft14 fsub.s $ft10, $fs5, $fs6 fadd.s $ft8, $ft13, $ft14 fadd.s $ft6, $fs5, $fs6 - fmul.s $ft13, $ft10, $ft11 + fmul.s $ft13, $ft10, $fs2 fmul.s $ft14, $ft9, $fa1 fadd.s $fs5, $ft14, $ft13 fmul.s $ft10, $ft10, $fa1 - fld.s $ft13, $a1, -56 - fld.s $ft14, $a1, 8 - fld.s $fs6, $a1, -52 - fld.s $fs7, $a1, 12 - fmul.s $ft9, $ft9, $ft11 + fld.s $ft13, $a2, -56 + fld.s $ft14, $a2, 8 + fld.s $fs6, $a2, -52 + fld.s $fs7, $a2, 12 + fmul.s $ft9, $ft9, $fs2 fsub.s $ft9, $ft10, $ft9 fsub.s $fa0, $ft13, $ft14 fsub.s $ft5, $fs6, $fs7 fadd.s $ft10, $ft13, $ft14 fadd.s $fs6, $fs6, $fs7 fadd.s $fs7, $fa0, $ft5 - fld.s $ft13, $a1, -64 - fld.s $ft14, $a1, 0 - fld.s $ft7, $a1, -60 - fld.s $ft15, $a1, 4 + fld.s $ft13, $a2, -64 + fld.s $ft14, $a2, 0 + fld.s $ft7, $a2, -60 + fld.s $ft15, $a2, 4 fsub.s $fa0, $ft5, $fa0 fsub.s $ft5, $ft13, $ft14 fadd.s $ft13, $ft13, $ft14 - fsub.s $fs2, $ft7, $ft15 - fadd.s $ft14, $ft7, $ft15 - fmul.s $ft7, $fs2, $fa1 - fmul.s $ft15, $ft5, $ft11 - fadd.s $ft7, $ft15, $ft7 - fmul.s $ft15, $fs2, $ft11 + fsub.s $ft14, $ft7, $ft15 + fadd.s $ft7, $ft7, $ft15 + fmul.s $ft15, $ft14, $fa1 + fmul.s $fa5, $ft5, $fs2 + fadd.s $fa5, $fa5, $ft15 + fmul.s $ft14, $ft14, $fs2 fmul.s $ft5, $ft5, $fa1 - fsub.s $ft5, $ft15, $ft5 - fsub.s $ft15, $ft7, $fs3 - fadd.s $ft7, $fs3, $ft7 - fsub.s $fs2, $ft5, $fs4 + fsub.s $ft5, $ft14, $ft5 + fsub.s $ft14, $fa5, $fs3 + fadd.s $fa5, $fs3, $fa5 + fsub.s $ft15, $ft5, $fs4 fadd.s $fs3, $fs4, $ft5 - fadd.s $ft5, $fs2, $ft15 - fsub.s $ft15, $fs2, $ft15 + fadd.s $ft5, $ft15, $ft14 + fsub.s $ft14, $ft15, $ft14 + fmul.s $fa3, $fa3, $fa2 + fmul.s $ft15, $fs7, $fa2 + fsub.s $fs4, $fa3, $ft15 + fadd.s $fa3, $fa3, $ft15 fmul.s $fa4, $fa4, $fa2 - fmul.s $fs2, $fs7, $fa2 - fsub.s $fs4, $fa4, $fs2 - fadd.s $fa4, $fa4, $fs2 - fmul.s $fa5, $fa5, $fa2 fmul.s $fa0, $fa0, $fa2 - fsub.s $fs2, $fa0, $fa5 - fadd.s $fa0, $fa5, $fa0 - fsub.s $fa5, $fa3, $fs5 - fadd.s $fa3, $fa3, $fs5 + fsub.s $ft15, $fa0, $fa4 + fadd.s $fa0, $fa4, $fa0 + fsub.s $fa4, $ft11, $fs5 + fadd.s $ft11, $ft11, $fs5 fsub.s $fs5, $ft12, $ft9 fadd.s $ft9, $ft12, $ft9 - fsub.s $ft12, $fa5, $fs5 - fadd.s $fa5, $fa5, $fs5 - fld.s $ft11, $sp, 128 # 4-byte Folded Reload - fsub.s $fs5, $ft11, $fs1 - fadd.s $fs1, $ft11, $fs1 - fld.s $ft11, $sp, 144 # 4-byte Folded Reload - fsub.s $fs7, $ft11, $fs0 - fadd.s $fs0, $ft11, $fs0 - fadd.s $ft11, $fs5, $fs2 - fsub.s $fs2, $fs5, $fs2 + fsub.s $ft12, $fa4, $fs5 + fadd.s $fa4, $fa4, $fs5 + fld.s $fs2, $sp, 128 # 4-byte Folded Reload + fsub.s $fs5, $fs2, $fs1 + fadd.s $fs1, $fs2, $fs1 + fld.s $fs2, $sp, 144 # 4-byte Folded Reload + fsub.s $fs7, $fs2, $fs0 + fadd.s $fs0, $fs2, $fs0 + fadd.s $fs2, $fs5, $ft15 + fsub.s $ft15, $fs5, $ft15 fmul.s $ft5, $ft5, $fa2 fmul.s $ft12, $ft12, $fa2 fadd.s $fs5, $ft12, $ft5 fsub.s $ft5, $ft12, $ft5 - fadd.s $ft12, $ft11, $fs5 - fst.s $ft12, $a1, -40 - fmul.s $ft12, $ft15, $fa2 - fmul.s $fa5, $fa5, $fa2 - fsub.s $ft11, $ft11, $fs5 - fst.s $ft11, $a1, -48 - fsub.s $ft11, $fa5, $ft12 - fadd.s $ft15, $fs2, $ft11 - fst.s $ft15, $a1, -64 - fsub.s $ft11, $fs2, $ft11 - fst.s $ft11, $a1, -56 - fadd.s $fa5, $fa5, $ft12 - fsub.s $ft11, $fs7, $fs4 - fadd.s $ft12, $fs7, $fs4 - fadd.s $ft15, $ft11, $ft5 - fst.s $ft15, $a1, -52 - fsub.s $ft5, $ft11, $ft5 - fst.s $ft5, $a1, -60 - fadd.s $ft5, $ft12, $fa5 - fst.s $ft5, $a1, -36 - fsub.s $fa5, $ft12, $fa5 - fst.s $fa5, $a1, -44 - fadd.s $fa5, $fs1, $fa4 - fsub.s $fa4, $fs1, $fa4 - fadd.s $ft5, $fa3, $ft7 - fsub.s $fa3, $fa3, $ft7 - fadd.s $ft7, $fa5, $ft5 - fst.s $ft7, $a1, -8 - fsub.s $fa5, $fa5, $ft5 - fst.s $fa5, $a1, -16 - fsub.s $fa5, $ft9, $fs3 + fadd.s $ft12, $fs2, $fs5 + fst.s $ft12, $a2, -40 + fmul.s $ft12, $ft14, $fa2 + fmul.s $fa4, $fa4, $fa2 + fsub.s $ft14, $fs2, $fs5 + fst.s $ft14, $a2, -48 + fsub.s $ft14, $fa4, $ft12 + fadd.s $fs2, $ft15, $ft14 + fst.s $fs2, $a2, -64 + fsub.s $ft14, $ft15, $ft14 + fst.s $ft14, $a2, -56 + fadd.s $fa4, $fa4, $ft12 + fsub.s $ft12, $fs7, $fs4 + fadd.s $ft14, $fs7, $fs4 + fadd.s $ft15, $ft12, $ft5 + fst.s $ft15, $a2, -52 + fsub.s $ft5, $ft12, $ft5 + fst.s $ft5, $a2, -60 + fadd.s $ft5, $ft14, $fa4 + fst.s $ft5, $a2, -36 + fsub.s $fa4, $ft14, $fa4 + fst.s $fa4, $a2, -44 + fadd.s $fa4, $fs1, $fa3 + fsub.s $fa3, $fs1, $fa3 + fadd.s $ft5, $ft11, $fa5 + fsub.s $fa5, $ft11, $fa5 + fadd.s $ft11, $fa4, $ft5 + fst.s $ft11, $a2, -8 + fsub.s $fa4, $fa4, $ft5 + fst.s $fa4, $a2, -16 + fsub.s $fa4, $ft9, $fs3 + fadd.s $ft5, $fa3, $fa4 + fst.s $ft5, $a2, -32 + fsub.s $fa3, $fa3, $fa4 + fst.s $fa3, $a2, -24 + fadd.s $fa3, $ft9, $fs3 + fsub.s $fa4, $fs0, $fa0 + fadd.s $fa0, $fs0, $fa0 fadd.s $ft5, $fa4, $fa5 - fst.s $ft5, $a1, -32 + fst.s $ft5, $a2, -20 fsub.s $fa4, $fa4, $fa5 - fst.s $fa4, $a1, -24 - fadd.s $fa4, $ft9, $fs3 - fsub.s $fa5, $fs0, $fa0 - fadd.s $fa0, $fs0, $fa0 - fadd.s $ft5, $fa5, $fa3 - fst.s $ft5, $a1, -20 - fsub.s $fa3, $fa5, $fa3 - fst.s $fa3, $a1, -28 - fadd.s $fa3, $fa0, $fa4 - fst.s $fa3, $a1, -4 - fsub.s $fa0, $fa0, $fa4 - fst.s $fa0, $a1, -12 + fst.s $fa4, $a2, -28 + fadd.s $fa4, $fa0, $fa3 + fst.s $fa4, $a2, -4 + fsub.s $fa0, $fa0, $fa3 + fst.s $fa0, $a2, -12 fsub.s $fa0, $ft13, $ft2 fadd.s $fa3, $ft2, $ft13 - fsub.s $fa4, $ft14, $ft1 - fadd.s $ft1, $ft1, $ft14 + fsub.s $fa4, $ft7, $ft1 + fadd.s $ft1, $ft1, $ft7 fadd.s $fa5, $fa0, $fa4 fmul.s $fa5, $fa5, $fa2 fsub.s $fa0, $fa4, $fa0 @@ -39973,53 +39940,53 @@ mdct_butterflies: # @mdct_butterflies fadd.s $ft7, $ft8, $fa5 fsub.s $fa5, $ft8, $fa5 fadd.s $ft8, $ft10, $ft7 - fst.s $ft8, $a1, 24 + fst.s $ft8, $a2, 24 fmul.s $fa0, $fa0, $fa2 fmul.s $ft5, $ft5, $fa2 fsub.s $ft7, $ft10, $ft7 - fst.s $ft7, $a1, 16 + fst.s $ft7, $a2, 16 fsub.s $ft7, $ft5, $fa0 fadd.s $ft8, $ft2, $ft7 - fst.s $ft8, $a1, 0 + fst.s $ft8, $a2, 0 fsub.s $ft2, $ft2, $ft7 - fst.s $ft2, $a1, 8 + fst.s $ft2, $a2, 8 fadd.s $fa0, $ft5, $fa0 fsub.s $ft2, $ft9, $fa4 fadd.s $fa4, $ft9, $fa4 fadd.s $ft5, $ft2, $fa5 - fst.s $ft5, $a1, 12 + fst.s $ft5, $a2, 12 fsub.s $fa5, $ft2, $fa5 - fst.s $fa5, $a1, 4 + fst.s $fa5, $a2, 4 fadd.s $fa5, $fa4, $fa0 - fst.s $fa5, $a1, 28 + fst.s $fa5, $a2, 28 fsub.s $fa0, $fa4, $fa0 - fst.s $fa0, $a1, 20 + fst.s $fa0, $a2, 20 fadd.s $fa0, $ft4, $ft0 fsub.s $fa4, $ft4, $ft0 fadd.s $fa5, $fa6, $fa3 fsub.s $fa3, $fa6, $fa3 fadd.s $fa6, $fa0, $fa5 - fst.s $fa6, $a1, 56 + fst.s $fa6, $a2, 56 fsub.s $fa0, $fa0, $fa5 - fst.s $fa0, $a1, 48 + fst.s $fa0, $a2, 48 fsub.s $fa0, $ft6, $ft1 fadd.s $fa5, $fa4, $fa0 - fst.s $fa5, $a1, 32 + fst.s $fa5, $a2, 32 fsub.s $fa0, $fa4, $fa0 - fst.s $fa0, $a1, 40 + fst.s $fa0, $a2, 40 fadd.s $fa0, $ft6, $ft1 fsub.s $fa4, $ft3, $fa7 fadd.s $fa5, $ft3, $fa7 fadd.s $fa6, $fa4, $fa3 - fst.s $fa6, $a1, 44 + fst.s $fa6, $a2, 44 fsub.s $fa3, $fa4, $fa3 - fst.s $fa3, $a1, 36 + fst.s $fa3, $a2, 36 fadd.s $fa3, $fa5, $fa0 - fst.s $fa3, $a1, 60 + fst.s $fa3, $a2, 60 fsub.s $fa0, $fa5, $fa0 - fst.s $fa0, $a1, 52 + fst.s $fa0, $a2, 52 addi.d $a0, $a0, 32 - addi.d $a1, $a1, 128 + addi.d $a2, $a2, 128 bltu $a0, $a3, .LBB216_11 .LBB216_12: # %._crit_edge6 fld.d $fs7, $sp, 216 # 8-byte Folded Reload @@ -40778,36 +40745,7 @@ _vi_gpsy_free: # @_vi_gpsy_free .Lfunc_end218: .size _vi_gpsy_free, .Lfunc_end218-_vi_gpsy_free # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _vp_psy_init -.LCPI219_0: - .dword 0x3fe62e42fefa39ef # double 0.69314718055994529 -.LCPI219_1: - .dword 0x3ff7154760000000 # double 1.4426950216293335 -.LCPI219_2: - .dword 0xc017dcf680000000 # double -5.9657840728759766 -.LCPI219_3: - .dword 0x4017dcf680000000 # double 5.9657840728759766 -.LCPI219_4: - .dword 0x3fe62e42a0000000 # double 0.69314700365066528 -.LCPI219_7: - .dword 0x402a333340000000 # double 13.100000381469727 -.LCPI219_9: - .dword 0x4001eb8520000000 # double 2.2400000095367432 -.LCPI219_11: - .dword 0xb690000000000000 # double -7.0064923216240854E-46 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI219_5: - .word 0x42c80000 # float 100 -.LCPI219_6: - .word 0x3a41fc8f # float 7.39999989E-4 -.LCPI219_8: - .word 0x329ee9ee # float 1.84999998E-8 -.LCPI219_10: - .word 0x38d1b717 # float 9.99999974E-5 - .text - .globl _vp_psy_init + .globl _vp_psy_init # -- Begin function _vp_psy_init .p2align 5 .type _vp_psy_init,@function _vp_psy_init: # @_vp_psy_init @@ -40850,8 +40788,11 @@ _vp_psy_init: # @_vp_psy_init fcvt.d.s $fa0, $fa0 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI219_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI219_0) + lu12i.w $a0, -4189 + ori $a0, $a0, 2543 + lu32i.d $a0, 405058 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 frint.d $fa0, $fa0 vldi $vr1, -784 @@ -40870,12 +40811,16 @@ _vp_psy_init: # @_vp_psy_init fdiv.d $fa0, $fa0, $fs3 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI219_1) - fld.d $fs5, $a0, %pc_lo12(.LCPI219_1) - pcalau12i $a0, %pc_hi20(.LCPI219_2) - fld.d $fs6, $a0, %pc_lo12(.LCPI219_2) - ld.w $a0, $s0, 64 + lu12i.w $a0, 393216 + lu32i.d $a0, 464199 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs5, $a0 fmul.d $fa0, $fa0, $fs5 + lu12i.w $s5, -524288 + lu32i.d $s5, 515318 + ld.w $a0, $s0, 64 + lu52i.d $a1, $s5, -1023 + movgr2fr.d $fs6, $a1 fadd.d $fa0, $fa0, $fs6 addi.d $a0, $a0, 1 ori $s4, $zero, 1 @@ -40932,17 +40877,19 @@ _vp_psy_init: # @_vp_psy_init st.d $s1, $s0, 8 st.w $fp, $s0, 0 st.d $s3, $s0, 80 - pcalau12i $a0, %pc_hi20(.LCPI219_3) - fld.d $fs1, $a0, %pc_lo12(.LCPI219_3) - pcalau12i $a0, %pc_hi20(.LCPI219_4) - fld.d $fs4, $a0, %pc_lo12(.LCPI219_4) ffint.d.l $fs2, $fs0 + lu52i.d $a0, $s5, 1025 + movgr2fr.d $fs0, $a0 + lu12i.w $a0, -393216 + lu32i.d $a0, 405058 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(ATH) addi.d $s4, $a0, %pc_lo12(ATH) - pcalau12i $a0, %pc_hi20(.LCPI219_5) - fld.s $fs0, $a0, %pc_lo12(.LCPI219_5) move $s5, $zero move $s6, $zero + lu12i.w $a0, 273536 + movgr2fr.w $fs4, $a0 ori $s7, $zero, 87 b .LBB219_2 .p2align 4, , 16 @@ -40959,8 +40906,8 @@ _vp_psy_init: # @_vp_psy_init fmul.d $fa0, $fa0, $fa1 vldi $vr1, -896 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa0, $fa0, $fs1 - fmul.d $fa0, $fa0, $fs4 + fadd.d $fa0, $fa0, $fs0 + fmul.d $fa0, $fa0, $fs1 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 fadd.d $fa0, $fa0, $fa0 @@ -40993,7 +40940,7 @@ _vp_psy_init: # @_vp_psy_init # Parent Loop BB219_2 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $a2, $s0, 32 - fadd.s $fa2, $fa0, $fs0 + fadd.s $fa2, $fa0, $fs4 fstx.s $fa2, $a2, $a1 fadd.s $fa0, $fa1, $fa0 addi.d $s6, $s6, 1 @@ -41010,27 +40957,34 @@ _vp_psy_init: # @_vp_psy_init move $s4, $zero slli.w $a0, $fp, 1 div.d $s3, $s3, $a0 - pcalau12i $a0, %pc_hi20(.LCPI219_6) - fld.s $fs4, $a0, %pc_lo12(.LCPI219_6) - pcalau12i $a0, %pc_hi20(.LCPI219_7) - fld.d $fs5, $a0, %pc_lo12(.LCPI219_7) - pcalau12i $a0, %pc_hi20(.LCPI219_8) - fld.s $fs6, $a0, %pc_lo12(.LCPI219_8) - pcalau12i $a0, %pc_hi20(.LCPI219_9) - fld.d $fs7, $a0, %pc_lo12(.LCPI219_9) - pcalau12i $a0, %pc_hi20(.LCPI219_10) - fld.s $fs0, $a0, %pc_lo12(.LCPI219_10) addi.d $s5, $fp, 1 - ori $s7, $zero, 1 - addi.w $s6, $zero, -99 + ori $s6, $zero, 1 + lu12i.w $a0, 238623 + ori $a0, $a0, 3215 + movgr2fr.w $fs4, $a0 + lu12i.w $a0, 262144 + lu32i.d $a0, -380109 + lu52i.d $a0, $a0, 1026 + movgr2fr.d $fs5, $a0 + lu12i.w $a0, 207342 + ori $a0, $a0, 2542 + movgr2fr.w $fs6, $a0 + lu12i.w $a0, 131072 + lu32i.d $a0, 125829 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fs7, $a0 + lu12i.w $a0, 232731 + ori $a0, $a0, 1815 + movgr2fr.w $fs0, $a0 + addi.w $s7, $zero, -99 b .LBB219_10 .p2align 4, , 16 .LBB219_8: # in Loop: Header=BB219_10 Depth=1 - move $s7, $s5 + move $s6, $s5 .LBB219_9: # %.critedge2 # in Loop: Header=BB219_10 Depth=1 - slli.d $a0, $s6, 16 - add.d $a0, $a0, $s7 + slli.d $a0, $s7, 16 + add.d $a0, $a0, $s6 addu16i.d $a0, $a0, -1 ld.d $a1, $s0, 48 addi.d $a0, $a0, -1 @@ -41062,12 +41016,12 @@ _vp_psy_init: # @_vp_psy_init fmul.s $fa1, $fs1, $fs0 fcvt.d.s $fa1, $fa1 fadd.d $fa0, $fa0, $fa1 - add.d $a0, $s6, $a0 + add.d $a0, $s7, $a0 fcvt.s.d $fs1, $fa0 bge $a0, $s4, .LBB219_14 # %bb.11: # %.lr.ph179.preheader # in Loop: Header=BB219_10 Depth=1 - mul.d $s8, $s3, $s6 + mul.d $s8, $s3, $s7 .p2align 4, , 16 .LBB219_12: # %.lr.ph179 # Parent Loop BB219_10 Depth=1 @@ -41098,29 +41052,29 @@ _vp_psy_init: # @_vp_psy_init bcnez $fcc0, .LBB219_14 # %bb.13: # in Loop: Header=BB219_12 Depth=2 ld.w $a0, $s1, 120 - addi.d $s6, $s6, 1 - add.d $a0, $a0, $s6 + addi.d $s7, $s7, 1 + add.d $a0, $a0, $s7 add.d $s8, $s8, $s3 blt $a0, $s4, .LBB219_12 .LBB219_14: # %.critedge # in Loop: Header=BB219_10 Depth=1 - blt $fp, $s7, .LBB219_9 + blt $fp, $s6, .LBB219_9 # %bb.15: # %.lr.ph185.preheader # in Loop: Header=BB219_10 Depth=1 - mul.d $s8, $s3, $s7 + mul.d $s8, $s3, $s6 b .LBB219_17 .p2align 4, , 16 .LBB219_16: # %.critedge4 # in Loop: Header=BB219_17 Depth=2 - addi.d $s7, $s7, 1 + addi.d $s6, $s6, 1 add.d $s8, $s8, $s3 - beq $s5, $s7, .LBB219_8 + beq $s5, $s6, .LBB219_8 .LBB219_17: # %.lr.ph185 # Parent Loop BB219_10 Depth=1 # => This Inner Loop Header: Depth=2 ld.w $a0, $s1, 124 add.d $a0, $s4, $a0 - blt $s7, $a0, .LBB219_16 + blt $s6, $a0, .LBB219_16 # %bb.18: # in Loop: Header=BB219_17 Depth=2 movgr2fr.d $fa0, $s8 ffint.s.l $fs2, $fa0 @@ -41219,8 +41173,8 @@ _vp_psy_init: # @_vp_psy_init move $s1, $zero move $s2, $zero fadd.d $fs0, $fs3, $fs3 - pcalau12i $a0, %pc_hi20(.LCPI219_11) - fld.d $fs1, $a0, %pc_lo12(.LCPI219_11) + lu52i.d $a0, $zero, -1175 + movgr2fr.d $fs1, $a0 movgr2fr.w $fs3, $zero .p2align 4, , 16 .LBB219_23: # =>This Inner Loop Header: Depth=1 @@ -41323,76 +41277,6 @@ _vp_psy_init: # @_vp_psy_init .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI220_1: - .dword 0x4059000000000000 # double 100 -.LCPI220_2: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI220_3: - .dword 0x4017dcf680000000 # double 5.9657840728759766 -.LCPI220_4: - .dword 0x3fe62e42a0000000 # double 0.69314700365066528 -.LCPI220_5: - .dword 0x3ff7154760000000 # double 1.4426950216293335 -.LCPI220_6: - .dword 0xc017dcf680000000 # double -5.9657840728759766 -.LCPI220_7: - .dword 0xc000800000000000 # double -2.0625 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI220_8: - .word 0xc479c000 # float -999 -.LCPI220_9: - .word 0xc3480000 # float -200 -.LCPI220_10: - .word 0x425c0000 # float 55 -.LCPI220_11: - .word 0x42580000 # float 54 -.LCPI220_12: - .word 0x42540000 # float 53 -.LCPI220_13: - .word 0x42500000 # float 52 -.LCPI220_14: - .word 0x424c0000 # float 51 -.LCPI220_15: - .word 0x42480000 # float 50 -.LCPI220_16: - .word 0x42440000 # float 49 -.LCPI220_17: - .word 0x42400000 # float 48 -.LCPI220_18: - .word 0x423c0000 # float 47 -.LCPI220_19: - .word 0x42380000 # float 46 -.LCPI220_20: - .word 0x42340000 # float 45 -.LCPI220_21: - .word 0x42300000 # float 44 -.LCPI220_22: - .word 0x422c0000 # float 43 -.LCPI220_23: - .word 0x42280000 # float 42 -.LCPI220_24: - .word 0x42240000 # float 41 -.LCPI220_25: - .word 0x42200000 # float 40 -.LCPI220_26: - .word 0x421c0000 # float 39 -.LCPI220_27: - .word 0x42180000 # float 38 -.LCPI220_28: - .word 0x42140000 # float 37 -.LCPI220_29: - .word 0x42100000 # float 36 -.LCPI220_30: - .word 0x420c0000 # float 35 -.LCPI220_31: - .word 0x42080000 # float 34 -.LCPI220_32: - .word 0x42040000 # float 33 -.LCPI220_33: - .word 0x42000000 # float 32 .text .p2align 5 .type setup_tone_curves,@function @@ -41420,7 +41304,7 @@ setup_tone_curves: # @setup_tone_curves fst.d $fs7, $sp, 1880 # 8-byte Folded Spill addi.d $fp, $sp, 2032 lu12i.w $a2, 7 - ori $a2, $a2, 2176 + ori $a2, $a2, 2272 sub.d $sp, $sp, $a2 # kill: def $f2 killed $f2 def $vr2 lu12i.w $a2, -8 @@ -41434,7 +41318,7 @@ setup_tone_curves: # @setup_tone_curves vst $vr1, $a2, 0 # 16-byte Folded Spill move $s0, $a1 lu12i.w $a2, -9 - ori $a2, $a2, 4020 + ori $a2, $a2, 3964 add.d $a2, $fp, $a2 fst.s $fa0, $a2, 0 # 4-byte Folded Spill lu12i.w $a2, -8 @@ -41454,7 +41338,7 @@ setup_tone_curves: # @setup_tone_curves pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 lu12i.w $a1, -9 - ori $a1, $a1, 4008 + ori $a1, $a1, 3952 add.d $a1, $fp, $a1 st.d $a0, $a1, 0 # 8-byte Folded Spill lu12i.w $a0, 7 @@ -41469,8 +41353,12 @@ setup_tone_curves: # @setup_tone_curves ori $a0, $a0, 48 add.d $a0, $fp, $a0 vld $vr3, $a0, 0 # 16-byte Folded Reload - movgr2fr.w $fs1, $zero - fcmp.clt.s $fcc0, $fa3, $fs1 + movgr2fr.w $fa0, $zero + lu12i.w $a0, -9 + ori $a0, $a0, 4092 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + fcmp.clt.s $fcc0, $fa3, $fa0 lu12i.w $a0, -8 ori $a0, $a0, 96 add.d $a0, $fp, $a0 @@ -41486,7 +41374,7 @@ setup_tone_curves: # @setup_tone_curves pcalau12i $a0, %pc_hi20(tonemasks) addi.d $a0, $a0, %pc_lo12(tonemasks) lu12i.w $a1, -9 - ori $a1, $a1, 4088 + ori $a1, $a1, 4080 add.d $a1, $fp, $a1 st.d $a0, $a1, 0 # 8-byte Folded Spill move $s7, $zero @@ -41496,7 +41384,14 @@ setup_tone_curves: # @setup_tone_curves ori $a7, $zero, 224 pcalau12i $a0, %pc_hi20(.LCPI220_0) lu12i.w $a1, -9 - ori $a1, $a1, 4080 + ori $a1, $a1, 4072 + add.d $a1, $fp, $a1 + st.d $a0, $a1, 0 # 8-byte Folded Spill + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + lu12i.w $a1, -9 + ori $a1, $a1, 4064 add.d $a1, $fp, $a1 st.d $a0, $a1, 0 # 8-byte Folded Spill lu12i.w $a0, -8 @@ -41510,7 +41405,7 @@ setup_tone_curves: # @setup_tone_curves add.d $a0, $fp, $a0 vst $vr6, $a0, 0 # 16-byte Folded Spill lu12i.w $a0, -9 - ori $a0, $a0, 4064 + ori $a0, $a0, 4048 add.d $a0, $fp, $a0 vst $vr7, $a0, 0 # 16-byte Folded Spill b .LBB220_2 @@ -41565,7 +41460,7 @@ setup_tone_curves: # @setup_tone_curves st.d $a3, $a2, 0 # 8-byte Folded Spill mul.d $a2, $a3, $s6 lu12i.w $a3, -9 - ori $a3, $a3, 4088 + ori $a3, $a3, 4080 add.d $a3, $fp, $a3 ld.d $a3, $a3, 0 # 8-byte Folded Reload add.d $s5, $a3, $a2 @@ -41646,11 +41541,15 @@ setup_tone_curves: # @setup_tone_curves move $a1, $s5 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 + lu12i.w $a0, -9 + ori $a0, $a0, 4092 + add.d $a0, $fp, $a0 + fld.s $fa0, $a0, 0 # 4-byte Folded Reload lu12i.w $a0, -8 ori $a0, $a0, 48 add.d $a0, $fp, $a0 vld $vr3, $a0, 0 # 16-byte Folded Reload - fcmp.clt.s $fcc0, $fs1, $fa3 + fcmp.clt.s $fcc0, $fa0, $fa3 vrepli.w $vr0, 16 vrepli.b $vr1, 0 lu12i.w $a0, -8 @@ -41682,7 +41581,7 @@ setup_tone_curves: # @setup_tone_curves add.d $a0, $fp, $a0 ld.d $a1, $a0, 0 # 8-byte Folded Reload lu12i.w $a0, -9 - ori $a0, $a0, 4080 + ori $a0, $a0, 4072 add.d $a0, $fp, $a0 ld.d $a3, $a0, 0 # 8-byte Folded Reload ori $a4, $zero, 448 @@ -42061,7 +41960,7 @@ setup_tone_curves: # @setup_tone_curves .LBB220_46: # %vector.body1177.preheader # in Loop: Header=BB220_2 Depth=1 lu12i.w $a0, -9 - ori $a0, $a0, 4080 + ori $a0, $a0, 4072 add.d $a0, $fp, $a0 ld.d $a3, $a0, 0 # 8-byte Folded Reload vld $vr2, $a3, %pc_lo12(.LCPI220_0) @@ -42074,7 +41973,7 @@ setup_tone_curves: # @setup_tone_curves add.d $a1, $fp, $a1 vld $vr7, $a1, 0 # 16-byte Folded Reload lu12i.w $a1, -9 - ori $a1, $a1, 4064 + ori $a1, $a1, 4048 add.d $a1, $fp, $a1 vld $vr8, $a1, 0 # 16-byte Folded Reload lu12i.w $a1, -8 @@ -42291,12 +42190,15 @@ setup_tone_curves: # @setup_tone_curves add.d $a1, $fp, $a1 ld.d $a1, $a1, 0 # 8-byte Folded Reload fldx.s $fa0, $a1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI220_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI220_1) move $s2, $zero move $s8, $zero fcvt.d.s $fa0, $fa0 - fadd.d $fs2, $fa0, $fs0 + lu12i.w $a0, -9 + ori $a0, $a0, 4064 + add.d $a0, $fp, $a0 + ld.d $a0, $a0, 0 # 8-byte Folded Reload + movgr2fr.d $fs0, $a0 + fadd.d $fs1, $fa0, $fs0 b .LBB220_64 .p2align 4, , 16 .LBB220_63: # %pred.store.continue857.6 @@ -42320,7 +42222,7 @@ setup_tone_curves: # @setup_tone_curves slli.d $a0, $a0, 1 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - fsub.d $fa0, $fs2, $fa0 + fsub.d $fa0, $fs1, $fa0 vldi $vr1, -834 fadd.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 @@ -54935,19 +54837,19 @@ setup_tone_curves: # @setup_tone_curves .LBB220_1744: # %.preheader288 move $s7, $zero lu12i.w $a0, -9 - ori $a0, $a0, 4020 + ori $a0, $a0, 3964 add.d $a0, $fp, $a0 fld.s $fa0, $a0, 0 # 4-byte Folded Reload fcvt.d.s $fs2, $fa0 bstrpick.d $a0, $s0, 31, 0 lu12i.w $a1, -9 - ori $a1, $a1, 4080 + ori $a1, $a1, 4048 add.d $a1, $fp, $a1 st.d $a0, $a1, 0 # 8-byte Folded Spill bstrpick.d $a0, $s0, 30, 3 slli.d $a0, $a0, 3 lu12i.w $a1, -9 - ori $a1, $a1, 4064 + ori $a1, $a1, 4040 add.d $a1, $fp, $a1 st.d $a0, $a1, 0 # 8-byte Folded Spill lu12i.w $a0, -8 @@ -54959,22 +54861,28 @@ setup_tone_curves: # @setup_tone_curves ori $a1, $a1, 96 add.d $a1, $fp, $a1 st.d $a0, $a1, 0 # 8-byte Folded Spill - lu52i.d $s6, $zero, 1107 - pcalau12i $a0, %pc_hi20(.LCPI220_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI220_2) + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs3, $a0 lu12i.w $s8, 275200 - pcalau12i $a0, %pc_hi20(.LCPI220_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI220_3) - pcalau12i $a0, %pc_hi20(.LCPI220_4) - fld.d $fs5, $a0, %pc_lo12(.LCPI220_4) - pcalau12i $a0, %pc_hi20(.LCPI220_5) - fld.d $fa0, $a0, %pc_lo12(.LCPI220_5) - lu12i.w $a0, -9 - ori $a0, $a0, 3992 - add.d $a0, $fp, $a0 - fst.d $fa0, $a0, 0 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI220_6) - fld.d $fa0, $a0, %pc_lo12(.LCPI220_6) + lu12i.w $a0, -524288 + lu32i.d $a0, 515318 + lu52i.d $a1, $a0, 1025 + movgr2fr.d $fs4, $a1 + lu12i.w $a1, -393216 + lu32i.d $a1, 405058 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fs5, $a1 + lu12i.w $a1, 393216 + lu32i.d $a1, 464199 + lu52i.d $a1, $a1, 1023 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, -9 + ori $a1, $a1, 3944 + add.d $a1, $fp, $a1 + fst.d $fa0, $a1, 0 # 8-byte Folded Spill + lu52i.d $a0, $a0, -1023 + movgr2fr.d $fa0, $a0 lu12i.w $a0, -9 ori $a0, $a0, 3984 add.d $a0, $fp, $a0 @@ -54982,45 +54890,151 @@ setup_tone_curves: # @setup_tone_curves lu12i.w $a0, 280476 vreplgr2vr.w $vr0, $a0 lu12i.w $a0, -9 - ori $a0, $a0, 4032 + ori $a0, $a0, 4000 add.d $a0, $fp, $a0 vst $vr0, $a0, 0 # 16-byte Folded Spill + lu12i.w $a0, -243812 + lu32i.d $a0, 0 + movgr2fr.w $fs0, $a0 + lu12i.w $a0, -248704 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 4072 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 271808 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 4064 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 271744 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 4028 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 271680 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 3996 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 271616 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 3992 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 271552 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 3980 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 271488 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 3940 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 271424 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 3936 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 271360 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 3932 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 271296 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 3928 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 271232 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 3924 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 271168 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 3920 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 271104 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 3916 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 271040 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 3912 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 270976 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 3908 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 270912 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, -9 + ori $a0, $a0, 3904 + add.d $a0, $fp, $a0 + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + lu12i.w $a0, 270848 + movgr2fr.w $fa0, $a0 lu12i.w $a0, -9 - ori $a0, $a0, 4004 + ori $a0, $a0, 3900 add.d $a0, $fp, $a0 - fst.s $fs1, $a0, 0 # 4-byte Folded Spill + fst.s $fa0, $a0, 0 # 4-byte Folded Spill + ori $a0, $zero, 0 + lu32i.d $a0, 32768 + lu52i.d $s1, $a0, -1024 + ori $s3, $zero, 56 b .LBB220_1746 .p2align 4, , 16 .LBB220_1745: # in Loop: Header=BB220_1746 Depth=1 lu12i.w $a0, -9 - ori $a0, $a0, 4024 + ori $a0, $a0, 3968 add.d $a0, $fp, $a0 ld.d $a1, $a0, 0 # 8-byte Folded Reload move $s7, $a1 ori $a0, $zero, 17 - beq $a1, $a0, .LBB220_1983 + beq $a1, $a0, .LBB220_1968 .LBB220_1746: # =>This Loop Header: Depth=1 - # Child Loop BB220_1749 Depth 2 - # Child Loop BB220_1753 Depth 3 - # Child Loop BB220_1756 Depth 3 - # Child Loop BB220_1790 Depth 3 - # Child Loop BB220_1793 Depth 4 - # Child Loop BB220_1798 Depth 5 - # Child Loop BB220_1817 Depth 5 - # Child Loop BB220_1823 Depth 4 - # Child Loop BB220_1842 Depth 4 - # Child Loop BB220_1762 Depth 3 - # Child Loop BB220_1767 Depth 4 - # Child Loop BB220_1786 Depth 4 - # Child Loop BB220_1848 Depth 3 - # Child Loop BB220_1877 Depth 3 - # Child Loop BB220_1867 Depth 3 + # Child Loop BB220_1750 Depth 2 + # Child Loop BB220_1754 Depth 3 + # Child Loop BB220_1757 Depth 3 + # Child Loop BB220_1791 Depth 3 + # Child Loop BB220_1794 Depth 4 + # Child Loop BB220_1799 Depth 5 + # Child Loop BB220_1818 Depth 5 + # Child Loop BB220_1824 Depth 4 + # Child Loop BB220_1843 Depth 4 + # Child Loop BB220_1763 Depth 3 + # Child Loop BB220_1768 Depth 4 + # Child Loop BB220_1787 Depth 4 + # Child Loop BB220_1849 Depth 3 + # Child Loop BB220_1878 Depth 3 + # Child Loop BB220_1868 Depth 3 ori $a0, $zero, 64 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 slli.d $a2, $s7, 3 lu12i.w $a1, -9 - ori $a1, $a1, 4008 + ori $a1, $a1, 3952 add.d $a1, $fp, $a1 ld.d $a1, $a1, 0 # 8-byte Folded Reload lu12i.w $a3, -8 @@ -55028,9 +55042,10 @@ setup_tone_curves: # @setup_tone_curves add.d $a3, $fp, $a3 st.d $a0, $a3, 0 # 8-byte Folded Spill stx.d $a0, $a1, $a2 - addi.w $s1, $s7, 0 + addi.w $s2, $s7, 0 srli.d $a0, $s7, 32 - or $a0, $a0, $s6 + lu52i.d $a1, $zero, 1107 + or $a0, $a0, $a1 movgr2fr.d $fa0, $a0 fsub.d $fa0, $fa0, $fs3 move $a0, $s7 @@ -55038,8 +55053,8 @@ setup_tone_curves: # @setup_tone_curves movgr2fr.d $fa1, $a0 fadd.d $fa0, $fa1, $fa0 vldi $vr1, -928 - fmul.d $fs0, $fa0, $fa1 - fadd.d $fa0, $fs0, $fs4 + fmul.d $fs1, $fa0, $fa1 + fadd.d $fa0, $fs1, $fs4 fmul.d $fa0, $fa0, $fs5 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 @@ -55047,11 +55062,11 @@ setup_tone_curves: # @setup_tone_curves vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $s3, $fa0 - movgr2fr.w $fa0, $s3 + movfr2gr.s $s4, $fa0 + movgr2fr.w $fa0, $s4 ffint.s.w $fa0, $fa0 lu12i.w $a0, -9 - ori $a0, $a0, 4020 + ori $a0, $a0, 3964 add.d $a0, $fp, $a0 fld.s $fs6, $a0, 0 # 4-byte Folded Reload fmul.s $fa0, $fs6, $fa0 @@ -55061,42 +55076,42 @@ setup_tone_curves: # @setup_tone_curves pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 lu12i.w $a0, -9 - ori $a0, $a0, 3992 + ori $a0, $a0, 3944 add.d $a0, $fp, $a0 fld.d $fs7, $a0, 0 # 8-byte Folded Reload fmul.d $fa0, $fa0, $fs7 lu12i.w $a0, -9 ori $a0, $a0, 3984 add.d $a0, $fp, $a0 - fld.d $fs1, $a0, 0 # 8-byte Folded Reload - fadd.d $fa0, $fa0, $fs1 + fld.d $fa1, $a0, 0 # 8-byte Folded Reload + fadd.d $fa0, $fa0, $fa1 fadd.d $fa0, $fa0, $fa0 vreplvei.d $vr0, $vr0, 0 vfrintrp.d $vr0, $vr0 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $s4, $fa0 - addi.d $a0, $s3, 1 + movfr2gr.s $s5, $fa0 + addi.d $a0, $s4, 1 movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 fmul.s $fa0, $fs6, $fa0 fcvt.d.s $fa0, $fa0 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 - move $s2, $zero + move $s4, $zero fmul.d $fa0, $fa0, $fs7 - fadd.d $fa0, $fa0, $fs1 lu12i.w $a0, -9 - ori $a0, $a0, 4004 + ori $a0, $a0, 3984 add.d $a0, $fp, $a0 - fld.s $fs1, $a0, 0 # 4-byte Folded Reload + fld.d $fa1, $a0, 0 # 8-byte Folded Reload + fadd.d $fa0, $fa0, $fa1 fadd.d $fa0, $fa0, $fa0 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - slt $a1, $s1, $s4 - masknez $a2, $s4, $a1 - maskeqz $a1, $s1, $a1 + slt $a1, $s2, $s5 + masknez $a2, $s5, $a1 + maskeqz $a1, $s2, $a1 or $a1, $a1, $a2 srai.d $a2, $a1, 63 andn $a1, $a1, $a2 @@ -55115,7 +55130,7 @@ setup_tone_curves: # @setup_tone_curves addi.d $a1, $s7, 1 ori $a0, $zero, 1792 lu12i.w $a2, -9 - ori $a2, $a2, 4024 + ori $a2, $a2, 3968 add.d $a2, $fp, $a2 st.d $a1, $a2, 0 # 8-byte Folded Spill mul.d $a0, $a1, $a0 @@ -55124,128 +55139,128 @@ setup_tone_curves: # @setup_tone_curves add.d $a1, $fp, $a1 add.d $a0, $a1, $a0 lu12i.w $a1, -9 - ori $a1, $a1, 4056 + ori $a1, $a1, 4032 add.d $a1, $fp, $a1 st.d $a0, $a1, 0 # 8-byte Folded Spill lu12i.w $a0, -9 - ori $a0, $a0, 4088 + ori $a0, $a0, 4080 add.d $a0, $fp, $a0 st.d $s7, $a0, 0 # 8-byte Folded Spill - b .LBB220_1749 + b .LBB220_1750 +.LBB220_1747: # in Loop: Header=BB220_1750 Depth=2 + lu12i.w $a1, 270784 +.LBB220_1748: # in Loop: Header=BB220_1750 Depth=2 + movgr2fr.w $fa0, $a1 .p2align 4, , 16 -.LBB220_1747: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_10) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_10) -.LBB220_1748: # in Loop: Header=BB220_1749 Depth=2 - addi.d $s2, $s2, 1 +.LBB220_1749: # in Loop: Header=BB220_1750 Depth=2 + addi.d $s4, $s4, 1 fst.s $fa0, $a0, 4 ori $a0, $zero, 8 - beq $s2, $a0, .LBB220_1745 -.LBB220_1749: # Parent Loop BB220_1746 Depth=1 + beq $s4, $a0, .LBB220_1745 +.LBB220_1750: # Parent Loop BB220_1746 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB220_1753 Depth 3 - # Child Loop BB220_1756 Depth 3 - # Child Loop BB220_1790 Depth 3 - # Child Loop BB220_1793 Depth 4 - # Child Loop BB220_1798 Depth 5 - # Child Loop BB220_1817 Depth 5 - # Child Loop BB220_1823 Depth 4 - # Child Loop BB220_1842 Depth 4 - # Child Loop BB220_1762 Depth 3 - # Child Loop BB220_1767 Depth 4 - # Child Loop BB220_1786 Depth 4 - # Child Loop BB220_1848 Depth 3 - # Child Loop BB220_1877 Depth 3 - # Child Loop BB220_1867 Depth 3 + # Child Loop BB220_1754 Depth 3 + # Child Loop BB220_1757 Depth 3 + # Child Loop BB220_1791 Depth 3 + # Child Loop BB220_1794 Depth 4 + # Child Loop BB220_1799 Depth 5 + # Child Loop BB220_1818 Depth 5 + # Child Loop BB220_1824 Depth 4 + # Child Loop BB220_1843 Depth 4 + # Child Loop BB220_1763 Depth 3 + # Child Loop BB220_1768 Depth 4 + # Child Loop BB220_1787 Depth 4 + # Child Loop BB220_1849 Depth 3 + # Child Loop BB220_1878 Depth 3 + # Child Loop BB220_1868 Depth 3 ori $a0, $zero, 232 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - slli.d $a1, $s2, 3 + slli.d $a1, $s4, 3 lu12i.w $a2, -8 ori $a2, $a2, 40 add.d $a2, $fp, $a2 ld.d $a2, $a2, 0 # 8-byte Folded Reload stx.d $a0, $a2, $a1 lu12i.w $a3, 280476 - blez $s0, .LBB220_1757 -# %bb.1750: # %.lr.ph.preheader - # in Loop: Header=BB220_1749 Depth=2 + blez $s0, .LBB220_1758 +# %bb.1751: # %.lr.ph.preheader + # in Loop: Header=BB220_1750 Depth=2 ori $a0, $zero, 8 - bgeu $s0, $a0, .LBB220_1752 -# %bb.1751: # in Loop: Header=BB220_1749 Depth=2 + bgeu $s0, $a0, .LBB220_1753 +# %bb.1752: # in Loop: Header=BB220_1750 Depth=2 move $a1, $zero - b .LBB220_1755 + b .LBB220_1756 .p2align 4, , 16 -.LBB220_1752: # %vector.body1311.preheader - # in Loop: Header=BB220_1749 Depth=2 +.LBB220_1753: # %vector.body1311.preheader + # in Loop: Header=BB220_1750 Depth=2 lu12i.w $a0, -8 ori $a0, $a0, 96 add.d $a0, $fp, $a0 ld.d $a0, $a0, 0 # 8-byte Folded Reload lu12i.w $a1, -9 - ori $a1, $a1, 4064 + ori $a1, $a1, 4040 add.d $a1, $fp, $a1 ld.d $a1, $a1, 0 # 8-byte Folded Reload lu12i.w $a2, -9 - ori $a2, $a2, 4032 + ori $a2, $a2, 4000 add.d $a2, $fp, $a2 vld $vr0, $a2, 0 # 16-byte Folded Reload .p2align 4, , 16 -.LBB220_1753: # %vector.body1311 +.LBB220_1754: # %vector.body1311 # Parent Loop BB220_1746 Depth=1 - # Parent Loop BB220_1749 Depth=2 + # Parent Loop BB220_1750 Depth=2 # => This Inner Loop Header: Depth=3 vst $vr0, $a0, -16 vst $vr0, $a0, 0 addi.d $a1, $a1, -8 addi.d $a0, $a0, 32 - bnez $a1, .LBB220_1753 -# %bb.1754: # %middle.block1314 - # in Loop: Header=BB220_1749 Depth=2 + bnez $a1, .LBB220_1754 +# %bb.1755: # %middle.block1314 + # in Loop: Header=BB220_1750 Depth=2 lu12i.w $a0, -9 - ori $a0, $a0, 4064 + ori $a0, $a0, 4040 add.d $a0, $fp, $a0 ld.d $a2, $a0, 0 # 8-byte Folded Reload move $a1, $a2 lu12i.w $a0, -9 - ori $a0, $a0, 4080 + ori $a0, $a0, 4048 add.d $a0, $fp, $a0 ld.d $a0, $a0, 0 # 8-byte Folded Reload - beq $a2, $a0, .LBB220_1757 -.LBB220_1755: # %.lr.ph.preheader1317 - # in Loop: Header=BB220_1749 Depth=2 + beq $a2, $a0, .LBB220_1758 +.LBB220_1756: # %.lr.ph.preheader1317 + # in Loop: Header=BB220_1750 Depth=2 lu12i.w $a0, -8 ori $a0, $a0, 120 add.d $a0, $fp, $a0 ld.d $a0, $a0, 0 # 8-byte Folded Reload alsl.d $a0, $a1, $a0, 2 lu12i.w $a2, -9 - ori $a2, $a2, 4080 + ori $a2, $a2, 4048 add.d $a2, $fp, $a2 ld.d $a2, $a2, 0 # 8-byte Folded Reload sub.d $a1, $a2, $a1 .p2align 4, , 16 -.LBB220_1756: # %.lr.ph +.LBB220_1757: # %.lr.ph # Parent Loop BB220_1746 Depth=1 - # Parent Loop BB220_1749 Depth=2 + # Parent Loop BB220_1750 Depth=2 # => This Inner Loop Header: Depth=3 st.w $a3, $a0, 0 addi.d $a1, $a1, -1 addi.d $a0, $a0, 4 - bnez $a1, .LBB220_1756 -.LBB220_1757: # %.preheader287 - # in Loop: Header=BB220_1749 Depth=2 + bnez $a1, .LBB220_1757 +.LBB220_1758: # %.preheader287 + # in Loop: Header=BB220_1750 Depth=2 ori $a0, $zero, 224 lu12i.w $a1, -8 ori $a1, $a1, 48 add.d $a1, $fp, $a1 - st.d $s2, $a1, 0 # 8-byte Folded Spill - mul.d $a0, $s2, $a0 + st.d $s4, $a1, 0 # 8-byte Folded Spill + mul.d $a0, $s4, $a0 lu12i.w $a1, -8 ori $a1, $a1, 16 add.d $a1, $fp, $a1 st.d $a0, $a1, 0 # 8-byte Folded Spill - pcalau12i $s4, %pc_hi20(.LCPI220_7) lu12i.w $a0, -8 add.d $a0, $fp, $a0 ld.d $a0, $a0, 0 # 8-byte Folded Reload @@ -55253,62 +55268,62 @@ setup_tone_curves: # @setup_tone_curves ori $a1, $a1, 80 add.d $a1, $fp, $a1 ld.d $a1, $a1, 0 # 8-byte Folded Reload - bge $a1, $a0, .LBB220_1788 -.LBB220_1758: # %._crit_edge325 - # in Loop: Header=BB220_1749 Depth=2 + bge $a1, $a0, .LBB220_1789 +.LBB220_1759: # %._crit_edge325 + # in Loop: Header=BB220_1750 Depth=2 lu12i.w $a0, -9 - ori $a0, $a0, 4088 + ori $a0, $a0, 4080 add.d $a0, $fp, $a0 ld.d $s7, $a0, 0 # 8-byte Folded Reload ori $a0, $zero, 16 - beq $s7, $a0, .LBB220_1865 -# %bb.1759: # %.preheader286 - # in Loop: Header=BB220_1749 Depth=2 + beq $s7, $a0, .LBB220_1866 +# %bb.1760: # %.preheader286 + # in Loop: Header=BB220_1750 Depth=2 move $s5, $zero - move $s1, $zero + move $s2, $zero lu12i.w $a0, -9 - ori $a0, $a0, 4056 + ori $a0, $a0, 4032 add.d $a0, $fp, $a0 ld.d $a0, $a0, 0 # 8-byte Folded Reload lu12i.w $a1, -8 ori $a1, $a1, 16 add.d $a1, $fp, $a1 ld.d $a1, $a1, 0 # 8-byte Folded Reload - add.d $s3, $a0, $a1 - b .LBB220_1762 + add.d $s4, $a0, $a1 + b .LBB220_1763 .p2align 4, , 16 -.LBB220_1760: # in Loop: Header=BB220_1762 Depth=3 - move $s1, $a0 -.LBB220_1761: # %._crit_edge331 - # in Loop: Header=BB220_1762 Depth=3 +.LBB220_1761: # in Loop: Header=BB220_1763 Depth=3 + move $s2, $a0 +.LBB220_1762: # %._crit_edge331 + # in Loop: Header=BB220_1763 Depth=3 addi.d $s5, $s5, 1 - ori $a0, $zero, 56 - beq $s5, $a0, .LBB220_1844 -.LBB220_1762: # Parent Loop BB220_1746 Depth=1 - # Parent Loop BB220_1749 Depth=2 + beq $s5, $s3, .LBB220_1845 +.LBB220_1763: # Parent Loop BB220_1746 Depth=1 + # Parent Loop BB220_1750 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB220_1767 Depth 4 - # Child Loop BB220_1786 Depth 4 + # Child Loop BB220_1768 Depth 4 + # Child Loop BB220_1787 Depth 4 srli.d $a0, $s5, 32 - or $a0, $a0, $s6 + lu52i.d $a1, $zero, 1107 + or $a0, $a0, $a1 movgr2fr.d $fa0, $a0 fsub.d $fa0, $fa0, $fs3 move $a0, $s5 bstrins.d $a0, $s8, 63, 32 movgr2fr.d $fa1, $a0 - fld.d $fa2, $s4, %pc_lo12(.LCPI220_7) fadd.d $fa0, $fa1, $fa0 vldi $vr1, -960 fmul.d $fa0, $fa0, $fa1 - fadd.d $fs6, $fs0, $fa0 - fadd.d $fa0, $fs6, $fa2 + fadd.d $fs6, $fs1, $fa0 + movgr2fr.d $fa0, $s1 + fadd.d $fa0, $fs6, $fa0 fadd.d $fa0, $fa0, $fs4 fmul.d $fa0, $fa0, $fs5 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 fdiv.d $fa0, $fa0, $fs2 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $s2, $fa0 + movfr2gr.s $s6, $fa0 vldi $vr0, -769 fadd.d $fa0, $fs6, $fa0 fadd.d $fa0, $fa0, $fs4 @@ -55320,15 +55335,15 @@ setup_tone_curves: # @setup_tone_curves fadd.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a2, $fa0 - srai.d $a0, $s2, 63 - andn $a1, $s2, $a0 + srai.d $a0, $s6, 63 + andn $a1, $s6, $a0 slt $a0, $a1, $s0 maskeqz $a3, $a1, $a0 masknez $a0, $s0, $a0 or $a0, $a3, $a0 - slt $a3, $a0, $s1 + slt $a3, $a0, $s2 maskeqz $a0, $a0, $a3 - masknez $a3, $s1, $a3 + masknez $a3, $s2, $a3 or $a0, $a0, $a3 srai.d $a3, $a2, 63 andn $a2, $a2, $a3 @@ -55336,14 +55351,14 @@ setup_tone_curves: # @setup_tone_curves maskeqz $a2, $a2, $a3 masknez $a3, $s0, $a3 or $a2, $a2, $a3 - bge $a0, $a2, .LBB220_1760 -# %bb.1763: # %.lr.ph330 - # in Loop: Header=BB220_1762 Depth=3 + bge $a0, $a2, .LBB220_1761 +# %bb.1764: # %.lr.ph330 + # in Loop: Header=BB220_1763 Depth=3 slli.d $a3, $s5, 2 - fldx.s $fa0, $s3, $a3 - slt $a3, $s1, $s0 + fldx.s $fa0, $s4, $a3 + slt $a3, $s2, $s0 masknez $a4, $s0, $a3 - maskeqz $a3, $s1, $a3 + maskeqz $a3, $s2, $a3 or $a3, $a3, $a4 bstrpick.d $a1, $a1, 31, 0 slt $a4, $a3, $a1 @@ -55353,15 +55368,15 @@ setup_tone_curves: # @setup_tone_curves add.d $a4, $a2, $a3 nor $a1, $a0, $zero add.w $a1, $a2, $a1 - sub.w $s1, $a4, $a0 + sub.w $s2, $a4, $a0 ori $a0, $zero, 7 - bgeu $a1, $a0, .LBB220_1765 -# %bb.1764: # in Loop: Header=BB220_1762 Depth=3 + bgeu $a1, $a0, .LBB220_1766 +# %bb.1765: # in Loop: Header=BB220_1763 Depth=3 move $a0, $a3 - b .LBB220_1784 + b .LBB220_1785 .p2align 4, , 16 -.LBB220_1765: # %vector.ph1212 - # in Loop: Header=BB220_1762 Depth=3 +.LBB220_1766: # %vector.ph1212 + # in Loop: Header=BB220_1763 Depth=3 bstrpick.d $a0, $a1, 31, 0 addi.d $a1, $a0, 1 bstrpick.d $a0, $a1, 32, 3 @@ -55374,139 +55389,139 @@ setup_tone_curves: # @setup_tone_curves ld.d $a4, $a4, 0 # 8-byte Folded Reload alsl.d $a3, $a3, $a4, 2 move $a4, $a2 - b .LBB220_1767 + b .LBB220_1768 .p2align 4, , 16 -.LBB220_1766: # %pred.store.continue1237 - # in Loop: Header=BB220_1767 Depth=4 +.LBB220_1767: # %pred.store.continue1237 + # in Loop: Header=BB220_1768 Depth=4 addi.d $a4, $a4, -8 addi.d $a3, $a3, 32 - beqz $a4, .LBB220_1783 -.LBB220_1767: # %vector.body1217 + beqz $a4, .LBB220_1784 +.LBB220_1768: # %vector.body1217 # Parent Loop BB220_1746 Depth=1 - # Parent Loop BB220_1749 Depth=2 - # Parent Loop BB220_1762 Depth=3 + # Parent Loop BB220_1750 Depth=2 + # Parent Loop BB220_1763 Depth=3 # => This Inner Loop Header: Depth=4 vld $vr2, $a3, -16 vfcmp.clt.s $vr2, $vr1, $vr2 vpickve2gr.w $a5, $vr2, 0 andi $a5, $a5, 1 - beqz $a5, .LBB220_1771 -# %bb.1768: # %pred.store.if1222 - # in Loop: Header=BB220_1767 Depth=4 + beqz $a5, .LBB220_1772 +# %bb.1769: # %pred.store.if1222 + # in Loop: Header=BB220_1768 Depth=4 fst.s $fa0, $a3, -16 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - bnez $a5, .LBB220_1772 -.LBB220_1769: # %pred.store.continue1225 - # in Loop: Header=BB220_1767 Depth=4 + bnez $a5, .LBB220_1773 +.LBB220_1770: # %pred.store.continue1225 + # in Loop: Header=BB220_1768 Depth=4 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - beqz $a5, .LBB220_1773 -.LBB220_1770: # %pred.store.if1226 - # in Loop: Header=BB220_1767 Depth=4 + beqz $a5, .LBB220_1774 +.LBB220_1771: # %pred.store.if1226 + # in Loop: Header=BB220_1768 Depth=4 fst.s $fa0, $a3, -8 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - bnez $a5, .LBB220_1774 - b .LBB220_1775 + bnez $a5, .LBB220_1775 + b .LBB220_1776 .p2align 4, , 16 -.LBB220_1771: # %pred.store.continue1223 - # in Loop: Header=BB220_1767 Depth=4 +.LBB220_1772: # %pred.store.continue1223 + # in Loop: Header=BB220_1768 Depth=4 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - beqz $a5, .LBB220_1769 -.LBB220_1772: # %pred.store.if1224 - # in Loop: Header=BB220_1767 Depth=4 + beqz $a5, .LBB220_1770 +.LBB220_1773: # %pred.store.if1224 + # in Loop: Header=BB220_1768 Depth=4 fst.s $fa0, $a3, -12 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - bnez $a5, .LBB220_1770 -.LBB220_1773: # %pred.store.continue1227 - # in Loop: Header=BB220_1767 Depth=4 + bnez $a5, .LBB220_1771 +.LBB220_1774: # %pred.store.continue1227 + # in Loop: Header=BB220_1768 Depth=4 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - beqz $a5, .LBB220_1775 -.LBB220_1774: # %pred.store.if1228 - # in Loop: Header=BB220_1767 Depth=4 + beqz $a5, .LBB220_1776 +.LBB220_1775: # %pred.store.if1228 + # in Loop: Header=BB220_1768 Depth=4 fst.s $fa0, $a3, -4 -.LBB220_1775: # %pred.store.continue1229 - # in Loop: Header=BB220_1767 Depth=4 +.LBB220_1776: # %pred.store.continue1229 + # in Loop: Header=BB220_1768 Depth=4 vld $vr2, $a3, 0 vfcmp.clt.s $vr2, $vr1, $vr2 vpickve2gr.w $a5, $vr2, 0 andi $a5, $a5, 1 - beqz $a5, .LBB220_1779 -# %bb.1776: # %pred.store.if1230 - # in Loop: Header=BB220_1767 Depth=4 + beqz $a5, .LBB220_1780 +# %bb.1777: # %pred.store.if1230 + # in Loop: Header=BB220_1768 Depth=4 fst.s $fa0, $a3, 0 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - bnez $a5, .LBB220_1780 -.LBB220_1777: # %pred.store.continue1233 - # in Loop: Header=BB220_1767 Depth=4 + bnez $a5, .LBB220_1781 +.LBB220_1778: # %pred.store.continue1233 + # in Loop: Header=BB220_1768 Depth=4 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - beqz $a5, .LBB220_1781 -.LBB220_1778: # %pred.store.if1234 - # in Loop: Header=BB220_1767 Depth=4 + beqz $a5, .LBB220_1782 +.LBB220_1779: # %pred.store.if1234 + # in Loop: Header=BB220_1768 Depth=4 fst.s $fa0, $a3, 8 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - beqz $a5, .LBB220_1766 - b .LBB220_1782 + beqz $a5, .LBB220_1767 + b .LBB220_1783 .p2align 4, , 16 -.LBB220_1779: # %pred.store.continue1231 - # in Loop: Header=BB220_1767 Depth=4 +.LBB220_1780: # %pred.store.continue1231 + # in Loop: Header=BB220_1768 Depth=4 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - beqz $a5, .LBB220_1777 -.LBB220_1780: # %pred.store.if1232 - # in Loop: Header=BB220_1767 Depth=4 + beqz $a5, .LBB220_1778 +.LBB220_1781: # %pred.store.if1232 + # in Loop: Header=BB220_1768 Depth=4 fst.s $fa0, $a3, 4 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - bnez $a5, .LBB220_1778 -.LBB220_1781: # %pred.store.continue1235 - # in Loop: Header=BB220_1767 Depth=4 + bnez $a5, .LBB220_1779 +.LBB220_1782: # %pred.store.continue1235 + # in Loop: Header=BB220_1768 Depth=4 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - beqz $a5, .LBB220_1766 -.LBB220_1782: # %pred.store.if1236 - # in Loop: Header=BB220_1767 Depth=4 + beqz $a5, .LBB220_1767 +.LBB220_1783: # %pred.store.if1236 + # in Loop: Header=BB220_1768 Depth=4 fst.s $fa0, $a3, 12 - b .LBB220_1766 + b .LBB220_1767 .p2align 4, , 16 -.LBB220_1783: # %middle.block1239 - # in Loop: Header=BB220_1762 Depth=3 - beq $a1, $a2, .LBB220_1761 -.LBB220_1784: # %scalar.ph1210.preheader - # in Loop: Header=BB220_1762 Depth=3 +.LBB220_1784: # %middle.block1239 + # in Loop: Header=BB220_1763 Depth=3 + beq $a1, $a2, .LBB220_1762 +.LBB220_1785: # %scalar.ph1210.preheader + # in Loop: Header=BB220_1763 Depth=3 lu12i.w $a1, -8 ori $a1, $a1, 120 add.d $a1, $fp, $a1 ld.d $a1, $a1, 0 # 8-byte Folded Reload alsl.d $a1, $a0, $a1, 2 - sub.d $a0, $s1, $a0 - b .LBB220_1786 + sub.d $a0, $s2, $a0 + b .LBB220_1787 .p2align 4, , 16 -.LBB220_1785: # in Loop: Header=BB220_1786 Depth=4 +.LBB220_1786: # in Loop: Header=BB220_1787 Depth=4 addi.w $a0, $a0, -1 addi.d $a1, $a1, 4 - beqz $a0, .LBB220_1761 -.LBB220_1786: # %scalar.ph1210 + beqz $a0, .LBB220_1762 +.LBB220_1787: # %scalar.ph1210 # Parent Loop BB220_1746 Depth=1 - # Parent Loop BB220_1749 Depth=2 - # Parent Loop BB220_1762 Depth=3 + # Parent Loop BB220_1750 Depth=2 + # Parent Loop BB220_1763 Depth=3 # => This Inner Loop Header: Depth=4 fld.s $fa1, $a1, 0 fcmp.cule.s $fcc0, $fa1, $fa0 - bcnez $fcc0, .LBB220_1785 -# %bb.1787: # in Loop: Header=BB220_1786 Depth=4 + bcnez $fcc0, .LBB220_1786 +# %bb.1788: # in Loop: Header=BB220_1787 Depth=4 fst.s $fa0, $a1, 0 - b .LBB220_1785 + b .LBB220_1786 .p2align 4, , 16 -.LBB220_1788: # %.preheader283.lr.ph - # in Loop: Header=BB220_1749 Depth=2 +.LBB220_1789: # %.preheader283.lr.ph + # in Loop: Header=BB220_1750 Depth=2 lu12i.w $a0, -8 ori $a0, $a0, 1920 add.d $a0, $fp, $a0 @@ -55527,30 +55542,31 @@ setup_tone_curves: # @setup_tone_curves lu12i.w $a0, -8 add.d $a0, $fp, $a0 ld.d $a0, $a0, 0 # 8-byte Folded Reload - b .LBB220_1790 + b .LBB220_1791 .p2align 4, , 16 -.LBB220_1789: # %._crit_edge322 - # in Loop: Header=BB220_1790 Depth=3 +.LBB220_1790: # %._crit_edge322 + # in Loop: Header=BB220_1791 Depth=3 addi.d $a0, $s7, 1 lu12i.w $a1, -8 ori $a1, $a1, 80 add.d $a1, $fp, $a1 ld.d $a1, $a1, 0 # 8-byte Folded Reload - bge $s7, $a1, .LBB220_1758 -.LBB220_1790: # %.preheader283 + bge $s7, $a1, .LBB220_1759 +.LBB220_1791: # %.preheader283 # Parent Loop BB220_1746 Depth=1 - # Parent Loop BB220_1749 Depth=2 + # Parent Loop BB220_1750 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB220_1793 Depth 4 - # Child Loop BB220_1798 Depth 5 - # Child Loop BB220_1817 Depth 5 - # Child Loop BB220_1823 Depth 4 - # Child Loop BB220_1842 Depth 4 - move $s1, $zero - move $s3, $zero + # Child Loop BB220_1794 Depth 4 + # Child Loop BB220_1799 Depth 5 + # Child Loop BB220_1818 Depth 5 + # Child Loop BB220_1824 Depth 4 + # Child Loop BB220_1843 Depth 4 + move $s5, $zero + move $s4, $zero move $s7, $a0 srli.d $a0, $a0, 32 - or $a0, $a0, $s6 + lu52i.d $a1, $zero, 1107 + or $a0, $a0, $a1 movgr2fr.d $fa0, $a0 fsub.d $fa0, $fa0, $fs3 move $a0, $s7 @@ -55558,7 +55574,7 @@ setup_tone_curves: # @setup_tone_curves movgr2fr.d $fa1, $a0 fadd.d $fa0, $fa1, $fa0 vldi $vr1, -928 - fmul.d $fs6, $fa0, $fa1 + fmul.d $fs7, $fa0, $fa1 ori $a0, $zero, 1792 mul.d $a1, $s7, $a0 lu12i.w $a0, -8 @@ -55569,44 +55585,44 @@ setup_tone_curves: # @setup_tone_curves ori $a2, $a2, 88 add.d $a2, $fp, $a2 st.d $a1, $a2, 0 # 8-byte Folded Spill - add.d $s5, $a0, $a1 - b .LBB220_1793 + add.d $s2, $a0, $a1 + b .LBB220_1794 .p2align 4, , 16 -.LBB220_1791: # in Loop: Header=BB220_1793 Depth=4 - move $s3, $a0 -.LBB220_1792: # %._crit_edge - # in Loop: Header=BB220_1793 Depth=4 - addi.d $s1, $s1, 1 - ori $a0, $zero, 56 - beq $s1, $a0, .LBB220_1819 -.LBB220_1793: # Parent Loop BB220_1746 Depth=1 - # Parent Loop BB220_1749 Depth=2 - # Parent Loop BB220_1790 Depth=3 +.LBB220_1792: # in Loop: Header=BB220_1794 Depth=4 + move $s4, $a0 +.LBB220_1793: # %._crit_edge + # in Loop: Header=BB220_1794 Depth=4 + addi.d $s5, $s5, 1 + beq $s5, $s3, .LBB220_1820 +.LBB220_1794: # Parent Loop BB220_1746 Depth=1 + # Parent Loop BB220_1750 Depth=2 + # Parent Loop BB220_1791 Depth=3 # => This Loop Header: Depth=4 - # Child Loop BB220_1798 Depth 5 - # Child Loop BB220_1817 Depth 5 - srli.d $a0, $s1, 32 - or $a0, $a0, $s6 + # Child Loop BB220_1799 Depth 5 + # Child Loop BB220_1818 Depth 5 + srli.d $a0, $s5, 32 + lu52i.d $a1, $zero, 1107 + or $a0, $a0, $a1 movgr2fr.d $fa0, $a0 fsub.d $fa0, $fa0, $fs3 - move $a0, $s1 + move $a0, $s5 bstrins.d $a0, $s8, 63, 32 movgr2fr.d $fa1, $a0 - fld.d $fa2, $s4, %pc_lo12(.LCPI220_7) fadd.d $fa0, $fa1, $fa0 vldi $vr1, -960 fmul.d $fa0, $fa0, $fa1 - fadd.d $fs7, $fs6, $fa0 - fadd.d $fa0, $fs7, $fa2 + fadd.d $fs6, $fs7, $fa0 + movgr2fr.d $fa0, $s1 + fadd.d $fa0, $fs6, $fa0 fadd.d $fa0, $fa0, $fs4 fmul.d $fa0, $fa0, $fs5 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 fdiv.d $fa0, $fa0, $fs2 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $s2, $fa0 + movfr2gr.s $s6, $fa0 vldi $vr0, -769 - fadd.d $fa0, $fs7, $fa0 + fadd.d $fa0, $fs6, $fa0 fadd.d $fa0, $fa0, $fs4 fmul.d $fa0, $fa0, $fs5 pcaddu18i $ra, %call36(exp) @@ -55616,15 +55632,15 @@ setup_tone_curves: # @setup_tone_curves fadd.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a2, $fa0 - srai.d $a0, $s2, 63 - andn $a1, $s2, $a0 + srai.d $a0, $s6, 63 + andn $a1, $s6, $a0 slt $a0, $a1, $s0 maskeqz $a3, $a1, $a0 masknez $a0, $s0, $a0 or $a0, $a3, $a0 - slt $a3, $a0, $s3 + slt $a3, $a0, $s4 maskeqz $a0, $a0, $a3 - masknez $a3, $s3, $a3 + masknez $a3, $s4, $a3 or $a0, $a0, $a3 srai.d $a3, $a2, 63 andn $a2, $a2, $a3 @@ -55632,14 +55648,14 @@ setup_tone_curves: # @setup_tone_curves maskeqz $a2, $a2, $a3 masknez $a3, $s0, $a3 or $a2, $a2, $a3 - bge $a0, $a2, .LBB220_1791 -# %bb.1794: # %.lr.ph315 - # in Loop: Header=BB220_1793 Depth=4 - slli.d $a3, $s1, 2 - fldx.s $fa0, $s5, $a3 - slt $a3, $s3, $s0 + bge $a0, $a2, .LBB220_1792 +# %bb.1795: # %.lr.ph315 + # in Loop: Header=BB220_1794 Depth=4 + slli.d $a3, $s5, 2 + fldx.s $fa0, $s2, $a3 + slt $a3, $s4, $s0 masknez $a4, $s0, $a3 - maskeqz $a3, $s3, $a3 + maskeqz $a3, $s4, $a3 or $a3, $a3, $a4 bstrpick.d $a1, $a1, 31, 0 slt $a4, $a3, $a1 @@ -55649,15 +55665,15 @@ setup_tone_curves: # @setup_tone_curves add.d $a4, $a2, $a3 nor $a1, $a0, $zero add.w $a1, $a2, $a1 - sub.w $s3, $a4, $a0 + sub.w $s4, $a4, $a0 ori $a0, $zero, 7 - bgeu $a1, $a0, .LBB220_1796 -# %bb.1795: # in Loop: Header=BB220_1793 Depth=4 + bgeu $a1, $a0, .LBB220_1797 +# %bb.1796: # in Loop: Header=BB220_1794 Depth=4 move $a0, $a3 - b .LBB220_1815 + b .LBB220_1816 .p2align 4, , 16 -.LBB220_1796: # %vector.ph1276 - # in Loop: Header=BB220_1793 Depth=4 +.LBB220_1797: # %vector.ph1276 + # in Loop: Header=BB220_1794 Depth=4 bstrpick.d $a0, $a1, 31, 0 addi.d $a1, $a0, 1 bstrpick.d $a0, $a1, 32, 3 @@ -55670,144 +55686,144 @@ setup_tone_curves: # @setup_tone_curves ld.d $a4, $a4, 0 # 8-byte Folded Reload alsl.d $a3, $a3, $a4, 2 move $a4, $a2 - b .LBB220_1798 + b .LBB220_1799 .p2align 4, , 16 -.LBB220_1797: # %pred.store.continue1301 - # in Loop: Header=BB220_1798 Depth=5 +.LBB220_1798: # %pred.store.continue1301 + # in Loop: Header=BB220_1799 Depth=5 addi.d $a4, $a4, -8 addi.d $a3, $a3, 32 - beqz $a4, .LBB220_1814 -.LBB220_1798: # %vector.body1281 + beqz $a4, .LBB220_1815 +.LBB220_1799: # %vector.body1281 # Parent Loop BB220_1746 Depth=1 - # Parent Loop BB220_1749 Depth=2 - # Parent Loop BB220_1790 Depth=3 - # Parent Loop BB220_1793 Depth=4 + # Parent Loop BB220_1750 Depth=2 + # Parent Loop BB220_1791 Depth=3 + # Parent Loop BB220_1794 Depth=4 # => This Inner Loop Header: Depth=5 vld $vr2, $a3, -16 vfcmp.clt.s $vr2, $vr1, $vr2 vpickve2gr.w $a5, $vr2, 0 andi $a5, $a5, 1 - beqz $a5, .LBB220_1802 -# %bb.1799: # %pred.store.if1286 - # in Loop: Header=BB220_1798 Depth=5 + beqz $a5, .LBB220_1803 +# %bb.1800: # %pred.store.if1286 + # in Loop: Header=BB220_1799 Depth=5 fst.s $fa0, $a3, -16 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - bnez $a5, .LBB220_1803 -.LBB220_1800: # %pred.store.continue1289 - # in Loop: Header=BB220_1798 Depth=5 + bnez $a5, .LBB220_1804 +.LBB220_1801: # %pred.store.continue1289 + # in Loop: Header=BB220_1799 Depth=5 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - beqz $a5, .LBB220_1804 -.LBB220_1801: # %pred.store.if1290 - # in Loop: Header=BB220_1798 Depth=5 + beqz $a5, .LBB220_1805 +.LBB220_1802: # %pred.store.if1290 + # in Loop: Header=BB220_1799 Depth=5 fst.s $fa0, $a3, -8 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - bnez $a5, .LBB220_1805 - b .LBB220_1806 + bnez $a5, .LBB220_1806 + b .LBB220_1807 .p2align 4, , 16 -.LBB220_1802: # %pred.store.continue1287 - # in Loop: Header=BB220_1798 Depth=5 +.LBB220_1803: # %pred.store.continue1287 + # in Loop: Header=BB220_1799 Depth=5 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - beqz $a5, .LBB220_1800 -.LBB220_1803: # %pred.store.if1288 - # in Loop: Header=BB220_1798 Depth=5 + beqz $a5, .LBB220_1801 +.LBB220_1804: # %pred.store.if1288 + # in Loop: Header=BB220_1799 Depth=5 fst.s $fa0, $a3, -12 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - bnez $a5, .LBB220_1801 -.LBB220_1804: # %pred.store.continue1291 - # in Loop: Header=BB220_1798 Depth=5 + bnez $a5, .LBB220_1802 +.LBB220_1805: # %pred.store.continue1291 + # in Loop: Header=BB220_1799 Depth=5 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - beqz $a5, .LBB220_1806 -.LBB220_1805: # %pred.store.if1292 - # in Loop: Header=BB220_1798 Depth=5 + beqz $a5, .LBB220_1807 +.LBB220_1806: # %pred.store.if1292 + # in Loop: Header=BB220_1799 Depth=5 fst.s $fa0, $a3, -4 -.LBB220_1806: # %pred.store.continue1293 - # in Loop: Header=BB220_1798 Depth=5 +.LBB220_1807: # %pred.store.continue1293 + # in Loop: Header=BB220_1799 Depth=5 vld $vr2, $a3, 0 vfcmp.clt.s $vr2, $vr1, $vr2 vpickve2gr.w $a5, $vr2, 0 andi $a5, $a5, 1 - beqz $a5, .LBB220_1810 -# %bb.1807: # %pred.store.if1294 - # in Loop: Header=BB220_1798 Depth=5 + beqz $a5, .LBB220_1811 +# %bb.1808: # %pred.store.if1294 + # in Loop: Header=BB220_1799 Depth=5 fst.s $fa0, $a3, 0 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - bnez $a5, .LBB220_1811 -.LBB220_1808: # %pred.store.continue1297 - # in Loop: Header=BB220_1798 Depth=5 + bnez $a5, .LBB220_1812 +.LBB220_1809: # %pred.store.continue1297 + # in Loop: Header=BB220_1799 Depth=5 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - beqz $a5, .LBB220_1812 -.LBB220_1809: # %pred.store.if1298 - # in Loop: Header=BB220_1798 Depth=5 + beqz $a5, .LBB220_1813 +.LBB220_1810: # %pred.store.if1298 + # in Loop: Header=BB220_1799 Depth=5 fst.s $fa0, $a3, 8 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - beqz $a5, .LBB220_1797 - b .LBB220_1813 + beqz $a5, .LBB220_1798 + b .LBB220_1814 .p2align 4, , 16 -.LBB220_1810: # %pred.store.continue1295 - # in Loop: Header=BB220_1798 Depth=5 +.LBB220_1811: # %pred.store.continue1295 + # in Loop: Header=BB220_1799 Depth=5 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - beqz $a5, .LBB220_1808 -.LBB220_1811: # %pred.store.if1296 - # in Loop: Header=BB220_1798 Depth=5 + beqz $a5, .LBB220_1809 +.LBB220_1812: # %pred.store.if1296 + # in Loop: Header=BB220_1799 Depth=5 fst.s $fa0, $a3, 4 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - bnez $a5, .LBB220_1809 -.LBB220_1812: # %pred.store.continue1299 - # in Loop: Header=BB220_1798 Depth=5 + bnez $a5, .LBB220_1810 +.LBB220_1813: # %pred.store.continue1299 + # in Loop: Header=BB220_1799 Depth=5 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - beqz $a5, .LBB220_1797 -.LBB220_1813: # %pred.store.if1300 - # in Loop: Header=BB220_1798 Depth=5 + beqz $a5, .LBB220_1798 +.LBB220_1814: # %pred.store.if1300 + # in Loop: Header=BB220_1799 Depth=5 fst.s $fa0, $a3, 12 - b .LBB220_1797 + b .LBB220_1798 .p2align 4, , 16 -.LBB220_1814: # %middle.block1303 - # in Loop: Header=BB220_1793 Depth=4 - beq $a1, $a2, .LBB220_1792 -.LBB220_1815: # %scalar.ph1274.preheader - # in Loop: Header=BB220_1793 Depth=4 +.LBB220_1815: # %middle.block1303 + # in Loop: Header=BB220_1794 Depth=4 + beq $a1, $a2, .LBB220_1793 +.LBB220_1816: # %scalar.ph1274.preheader + # in Loop: Header=BB220_1794 Depth=4 lu12i.w $a1, -8 ori $a1, $a1, 120 add.d $a1, $fp, $a1 ld.d $a1, $a1, 0 # 8-byte Folded Reload alsl.d $a1, $a0, $a1, 2 - sub.d $a0, $s3, $a0 - b .LBB220_1817 + sub.d $a0, $s4, $a0 + b .LBB220_1818 .p2align 4, , 16 -.LBB220_1816: # in Loop: Header=BB220_1817 Depth=5 +.LBB220_1817: # in Loop: Header=BB220_1818 Depth=5 addi.w $a0, $a0, -1 addi.d $a1, $a1, 4 - beqz $a0, .LBB220_1792 -.LBB220_1817: # %scalar.ph1274 + beqz $a0, .LBB220_1793 +.LBB220_1818: # %scalar.ph1274 # Parent Loop BB220_1746 Depth=1 - # Parent Loop BB220_1749 Depth=2 - # Parent Loop BB220_1790 Depth=3 - # Parent Loop BB220_1793 Depth=4 + # Parent Loop BB220_1750 Depth=2 + # Parent Loop BB220_1791 Depth=3 + # Parent Loop BB220_1794 Depth=4 # => This Inner Loop Header: Depth=5 fld.s $fa1, $a1, 0 fcmp.cule.s $fcc0, $fa1, $fa0 - bcnez $fcc0, .LBB220_1816 -# %bb.1818: # in Loop: Header=BB220_1817 Depth=5 + bcnez $fcc0, .LBB220_1817 +# %bb.1819: # in Loop: Header=BB220_1818 Depth=5 fst.s $fa0, $a1, 0 - b .LBB220_1816 + b .LBB220_1817 .p2align 4, , 16 -.LBB220_1819: # %.preheader - # in Loop: Header=BB220_1790 Depth=3 - bge $s3, $s0, .LBB220_1789 -# %bb.1820: # %.lr.ph321 - # in Loop: Header=BB220_1790 Depth=3 +.LBB220_1820: # %.preheader + # in Loop: Header=BB220_1791 Depth=3 + bge $s4, $s0, .LBB220_1790 +# %bb.1821: # %.lr.ph321 + # in Loop: Header=BB220_1791 Depth=3 lu12i.w $a0, -8 ori $a0, $a0, 64 add.d $a0, $fp, $a0 @@ -55817,282 +55833,282 @@ setup_tone_curves: # @setup_tone_curves add.d $a1, $fp, $a1 ld.d $a1, $a1, 0 # 8-byte Folded Reload fldx.s $fa0, $a0, $a1 - sub.d $a1, $s0, $s3 - move $a0, $s3 + sub.d $a1, $s0, $s4 + move $a0, $s4 ori $a2, $zero, 8 - bltu $a1, $a2, .LBB220_1840 -# %bb.1821: # %vector.ph1244 - # in Loop: Header=BB220_1790 Depth=3 + bltu $a1, $a2, .LBB220_1841 +# %bb.1822: # %vector.ph1244 + # in Loop: Header=BB220_1791 Depth=3 move $a2, $a1 bstrins.d $a2, $zero, 2, 0 - add.d $a0, $a2, $s3 + add.d $a0, $a2, $s4 vreplvei.w $vr1, $vr0, 0 lu12i.w $a3, -8 ori $a3, $a3, 96 add.d $a3, $fp, $a3 ld.d $a3, $a3, 0 # 8-byte Folded Reload - alsl.d $a3, $s3, $a3, 2 + alsl.d $a3, $s4, $a3, 2 move $a4, $a2 - b .LBB220_1823 + b .LBB220_1824 .p2align 4, , 16 -.LBB220_1822: # %pred.store.continue1269 - # in Loop: Header=BB220_1823 Depth=4 +.LBB220_1823: # %pred.store.continue1269 + # in Loop: Header=BB220_1824 Depth=4 addi.d $a4, $a4, -8 addi.d $a3, $a3, 32 - beqz $a4, .LBB220_1839 -.LBB220_1823: # %vector.body1249 + beqz $a4, .LBB220_1840 +.LBB220_1824: # %vector.body1249 # Parent Loop BB220_1746 Depth=1 - # Parent Loop BB220_1749 Depth=2 - # Parent Loop BB220_1790 Depth=3 + # Parent Loop BB220_1750 Depth=2 + # Parent Loop BB220_1791 Depth=3 # => This Inner Loop Header: Depth=4 vld $vr2, $a3, -16 vfcmp.clt.s $vr2, $vr1, $vr2 vpickve2gr.w $a5, $vr2, 0 andi $a5, $a5, 1 - beqz $a5, .LBB220_1827 -# %bb.1824: # %pred.store.if1254 - # in Loop: Header=BB220_1823 Depth=4 + beqz $a5, .LBB220_1828 +# %bb.1825: # %pred.store.if1254 + # in Loop: Header=BB220_1824 Depth=4 fst.s $fa0, $a3, -16 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - bnez $a5, .LBB220_1828 -.LBB220_1825: # %pred.store.continue1257 - # in Loop: Header=BB220_1823 Depth=4 + bnez $a5, .LBB220_1829 +.LBB220_1826: # %pred.store.continue1257 + # in Loop: Header=BB220_1824 Depth=4 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - beqz $a5, .LBB220_1829 -.LBB220_1826: # %pred.store.if1258 - # in Loop: Header=BB220_1823 Depth=4 + beqz $a5, .LBB220_1830 +.LBB220_1827: # %pred.store.if1258 + # in Loop: Header=BB220_1824 Depth=4 fst.s $fa0, $a3, -8 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - bnez $a5, .LBB220_1830 - b .LBB220_1831 + bnez $a5, .LBB220_1831 + b .LBB220_1832 .p2align 4, , 16 -.LBB220_1827: # %pred.store.continue1255 - # in Loop: Header=BB220_1823 Depth=4 +.LBB220_1828: # %pred.store.continue1255 + # in Loop: Header=BB220_1824 Depth=4 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - beqz $a5, .LBB220_1825 -.LBB220_1828: # %pred.store.if1256 - # in Loop: Header=BB220_1823 Depth=4 + beqz $a5, .LBB220_1826 +.LBB220_1829: # %pred.store.if1256 + # in Loop: Header=BB220_1824 Depth=4 fst.s $fa0, $a3, -12 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - bnez $a5, .LBB220_1826 -.LBB220_1829: # %pred.store.continue1259 - # in Loop: Header=BB220_1823 Depth=4 + bnez $a5, .LBB220_1827 +.LBB220_1830: # %pred.store.continue1259 + # in Loop: Header=BB220_1824 Depth=4 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - beqz $a5, .LBB220_1831 -.LBB220_1830: # %pred.store.if1260 - # in Loop: Header=BB220_1823 Depth=4 + beqz $a5, .LBB220_1832 +.LBB220_1831: # %pred.store.if1260 + # in Loop: Header=BB220_1824 Depth=4 fst.s $fa0, $a3, -4 -.LBB220_1831: # %pred.store.continue1261 - # in Loop: Header=BB220_1823 Depth=4 +.LBB220_1832: # %pred.store.continue1261 + # in Loop: Header=BB220_1824 Depth=4 vld $vr2, $a3, 0 vfcmp.clt.s $vr2, $vr1, $vr2 vpickve2gr.w $a5, $vr2, 0 andi $a5, $a5, 1 - beqz $a5, .LBB220_1835 -# %bb.1832: # %pred.store.if1262 - # in Loop: Header=BB220_1823 Depth=4 + beqz $a5, .LBB220_1836 +# %bb.1833: # %pred.store.if1262 + # in Loop: Header=BB220_1824 Depth=4 fst.s $fa0, $a3, 0 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - bnez $a5, .LBB220_1836 -.LBB220_1833: # %pred.store.continue1265 - # in Loop: Header=BB220_1823 Depth=4 + bnez $a5, .LBB220_1837 +.LBB220_1834: # %pred.store.continue1265 + # in Loop: Header=BB220_1824 Depth=4 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - beqz $a5, .LBB220_1837 -.LBB220_1834: # %pred.store.if1266 - # in Loop: Header=BB220_1823 Depth=4 + beqz $a5, .LBB220_1838 +.LBB220_1835: # %pred.store.if1266 + # in Loop: Header=BB220_1824 Depth=4 fst.s $fa0, $a3, 8 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - beqz $a5, .LBB220_1822 - b .LBB220_1838 + beqz $a5, .LBB220_1823 + b .LBB220_1839 .p2align 4, , 16 -.LBB220_1835: # %pred.store.continue1263 - # in Loop: Header=BB220_1823 Depth=4 +.LBB220_1836: # %pred.store.continue1263 + # in Loop: Header=BB220_1824 Depth=4 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - beqz $a5, .LBB220_1833 -.LBB220_1836: # %pred.store.if1264 - # in Loop: Header=BB220_1823 Depth=4 + beqz $a5, .LBB220_1834 +.LBB220_1837: # %pred.store.if1264 + # in Loop: Header=BB220_1824 Depth=4 fst.s $fa0, $a3, 4 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - bnez $a5, .LBB220_1834 -.LBB220_1837: # %pred.store.continue1267 - # in Loop: Header=BB220_1823 Depth=4 + bnez $a5, .LBB220_1835 +.LBB220_1838: # %pred.store.continue1267 + # in Loop: Header=BB220_1824 Depth=4 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - beqz $a5, .LBB220_1822 -.LBB220_1838: # %pred.store.if1268 - # in Loop: Header=BB220_1823 Depth=4 + beqz $a5, .LBB220_1823 +.LBB220_1839: # %pred.store.if1268 + # in Loop: Header=BB220_1824 Depth=4 fst.s $fa0, $a3, 12 - b .LBB220_1822 + b .LBB220_1823 .p2align 4, , 16 -.LBB220_1839: # %middle.block1271 - # in Loop: Header=BB220_1790 Depth=3 - beq $a1, $a2, .LBB220_1789 -.LBB220_1840: # %scalar.ph1242.preheader - # in Loop: Header=BB220_1790 Depth=3 +.LBB220_1840: # %middle.block1271 + # in Loop: Header=BB220_1791 Depth=3 + beq $a1, $a2, .LBB220_1790 +.LBB220_1841: # %scalar.ph1242.preheader + # in Loop: Header=BB220_1791 Depth=3 lu12i.w $a1, -8 ori $a1, $a1, 120 add.d $a1, $fp, $a1 ld.d $a1, $a1, 0 # 8-byte Folded Reload alsl.d $a1, $a0, $a1, 2 sub.d $a0, $s0, $a0 - b .LBB220_1842 + b .LBB220_1843 .p2align 4, , 16 -.LBB220_1841: # in Loop: Header=BB220_1842 Depth=4 +.LBB220_1842: # in Loop: Header=BB220_1843 Depth=4 addi.d $a0, $a0, -1 addi.d $a1, $a1, 4 - beqz $a0, .LBB220_1789 -.LBB220_1842: # %scalar.ph1242 + beqz $a0, .LBB220_1790 +.LBB220_1843: # %scalar.ph1242 # Parent Loop BB220_1746 Depth=1 - # Parent Loop BB220_1749 Depth=2 - # Parent Loop BB220_1790 Depth=3 + # Parent Loop BB220_1750 Depth=2 + # Parent Loop BB220_1791 Depth=3 # => This Inner Loop Header: Depth=4 fld.s $fa1, $a1, 0 fcmp.cule.s $fcc0, $fa1, $fa0 - bcnez $fcc0, .LBB220_1841 -# %bb.1843: # in Loop: Header=BB220_1842 Depth=4 + bcnez $fcc0, .LBB220_1842 +# %bb.1844: # in Loop: Header=BB220_1843 Depth=4 fst.s $fa0, $a1, 0 - b .LBB220_1841 - .p2align 4, , 16 -.LBB220_1844: # %.preheader285 - # in Loop: Header=BB220_1749 Depth=2 - bge $s1, $s0, .LBB220_1865 -# %bb.1845: # %.lr.ph338 - # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa0, $s3, 220 - sub.d $a1, $s0, $s1 - move $a0, $s1 + b .LBB220_1842 + .p2align 4, , 16 +.LBB220_1845: # %.preheader285 + # in Loop: Header=BB220_1750 Depth=2 + bge $s2, $s0, .LBB220_1866 +# %bb.1846: # %.lr.ph338 + # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $s4, 220 + sub.d $a1, $s0, $s2 + move $a0, $s2 ori $a2, $zero, 8 - bltu $a1, $a2, .LBB220_1875 -# %bb.1846: # %vector.ph1185 - # in Loop: Header=BB220_1749 Depth=2 + bltu $a1, $a2, .LBB220_1876 +# %bb.1847: # %vector.ph1185 + # in Loop: Header=BB220_1750 Depth=2 move $a2, $a1 bstrins.d $a2, $zero, 2, 0 - add.d $a0, $a2, $s1 + add.d $a0, $a2, $s2 vreplvei.w $vr1, $vr0, 0 lu12i.w $a3, -8 ori $a3, $a3, 96 add.d $a3, $fp, $a3 ld.d $a3, $a3, 0 # 8-byte Folded Reload - alsl.d $a3, $s1, $a3, 2 + alsl.d $a3, $s2, $a3, 2 move $a4, $a2 - b .LBB220_1848 + b .LBB220_1849 .p2align 4, , 16 -.LBB220_1847: # %pred.store.continue1207 - # in Loop: Header=BB220_1848 Depth=3 +.LBB220_1848: # %pred.store.continue1207 + # in Loop: Header=BB220_1849 Depth=3 addi.d $a4, $a4, -8 addi.d $a3, $a3, 32 - beqz $a4, .LBB220_1864 -.LBB220_1848: # %vector.body1188 + beqz $a4, .LBB220_1865 +.LBB220_1849: # %vector.body1188 # Parent Loop BB220_1746 Depth=1 - # Parent Loop BB220_1749 Depth=2 + # Parent Loop BB220_1750 Depth=2 # => This Inner Loop Header: Depth=3 vld $vr2, $a3, -16 vfcmp.clt.s $vr2, $vr1, $vr2 vpickve2gr.w $a5, $vr2, 0 andi $a5, $a5, 1 - beqz $a5, .LBB220_1852 -# %bb.1849: # %pred.store.if1192 - # in Loop: Header=BB220_1848 Depth=3 + beqz $a5, .LBB220_1853 +# %bb.1850: # %pred.store.if1192 + # in Loop: Header=BB220_1849 Depth=3 fst.s $fa0, $a3, -16 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - bnez $a5, .LBB220_1853 -.LBB220_1850: # %pred.store.continue1195 - # in Loop: Header=BB220_1848 Depth=3 + bnez $a5, .LBB220_1854 +.LBB220_1851: # %pred.store.continue1195 + # in Loop: Header=BB220_1849 Depth=3 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - beqz $a5, .LBB220_1854 -.LBB220_1851: # %pred.store.if1196 - # in Loop: Header=BB220_1848 Depth=3 + beqz $a5, .LBB220_1855 +.LBB220_1852: # %pred.store.if1196 + # in Loop: Header=BB220_1849 Depth=3 fst.s $fa0, $a3, -8 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - bnez $a5, .LBB220_1855 - b .LBB220_1856 + bnez $a5, .LBB220_1856 + b .LBB220_1857 .p2align 4, , 16 -.LBB220_1852: # %pred.store.continue1193 - # in Loop: Header=BB220_1848 Depth=3 +.LBB220_1853: # %pred.store.continue1193 + # in Loop: Header=BB220_1849 Depth=3 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - beqz $a5, .LBB220_1850 -.LBB220_1853: # %pred.store.if1194 - # in Loop: Header=BB220_1848 Depth=3 + beqz $a5, .LBB220_1851 +.LBB220_1854: # %pred.store.if1194 + # in Loop: Header=BB220_1849 Depth=3 fst.s $fa0, $a3, -12 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - bnez $a5, .LBB220_1851 -.LBB220_1854: # %pred.store.continue1197 - # in Loop: Header=BB220_1848 Depth=3 + bnez $a5, .LBB220_1852 +.LBB220_1855: # %pred.store.continue1197 + # in Loop: Header=BB220_1849 Depth=3 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - beqz $a5, .LBB220_1856 -.LBB220_1855: # %pred.store.if1198 - # in Loop: Header=BB220_1848 Depth=3 + beqz $a5, .LBB220_1857 +.LBB220_1856: # %pred.store.if1198 + # in Loop: Header=BB220_1849 Depth=3 fst.s $fa0, $a3, -4 -.LBB220_1856: # %pred.store.continue1199 - # in Loop: Header=BB220_1848 Depth=3 +.LBB220_1857: # %pred.store.continue1199 + # in Loop: Header=BB220_1849 Depth=3 vld $vr2, $a3, 0 vfcmp.clt.s $vr2, $vr1, $vr2 vpickve2gr.w $a5, $vr2, 0 andi $a5, $a5, 1 - beqz $a5, .LBB220_1860 -# %bb.1857: # %pred.store.if1200 - # in Loop: Header=BB220_1848 Depth=3 + beqz $a5, .LBB220_1861 +# %bb.1858: # %pred.store.if1200 + # in Loop: Header=BB220_1849 Depth=3 fst.s $fa0, $a3, 0 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - bnez $a5, .LBB220_1861 -.LBB220_1858: # %pred.store.continue1203 - # in Loop: Header=BB220_1848 Depth=3 + bnez $a5, .LBB220_1862 +.LBB220_1859: # %pred.store.continue1203 + # in Loop: Header=BB220_1849 Depth=3 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - beqz $a5, .LBB220_1862 -.LBB220_1859: # %pred.store.if1204 - # in Loop: Header=BB220_1848 Depth=3 + beqz $a5, .LBB220_1863 +.LBB220_1860: # %pred.store.if1204 + # in Loop: Header=BB220_1849 Depth=3 fst.s $fa0, $a3, 8 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - beqz $a5, .LBB220_1847 - b .LBB220_1863 + beqz $a5, .LBB220_1848 + b .LBB220_1864 .p2align 4, , 16 -.LBB220_1860: # %pred.store.continue1201 - # in Loop: Header=BB220_1848 Depth=3 +.LBB220_1861: # %pred.store.continue1201 + # in Loop: Header=BB220_1849 Depth=3 vpickve2gr.w $a5, $vr2, 1 andi $a5, $a5, 1 - beqz $a5, .LBB220_1858 -.LBB220_1861: # %pred.store.if1202 - # in Loop: Header=BB220_1848 Depth=3 + beqz $a5, .LBB220_1859 +.LBB220_1862: # %pred.store.if1202 + # in Loop: Header=BB220_1849 Depth=3 fst.s $fa0, $a3, 4 vpickve2gr.w $a5, $vr2, 2 andi $a5, $a5, 1 - bnez $a5, .LBB220_1859 -.LBB220_1862: # %pred.store.continue1205 - # in Loop: Header=BB220_1848 Depth=3 + bnez $a5, .LBB220_1860 +.LBB220_1863: # %pred.store.continue1205 + # in Loop: Header=BB220_1849 Depth=3 vpickve2gr.w $a5, $vr2, 3 andi $a5, $a5, 1 - beqz $a5, .LBB220_1847 -.LBB220_1863: # %pred.store.if1206 - # in Loop: Header=BB220_1848 Depth=3 + beqz $a5, .LBB220_1848 +.LBB220_1864: # %pred.store.if1206 + # in Loop: Header=BB220_1849 Depth=3 fst.s $fa0, $a3, 12 - b .LBB220_1847 -.LBB220_1864: # %middle.block1209 - # in Loop: Header=BB220_1749 Depth=2 - bne $a1, $a2, .LBB220_1875 + b .LBB220_1848 +.LBB220_1865: # %middle.block1209 + # in Loop: Header=BB220_1750 Depth=2 + bne $a1, $a2, .LBB220_1876 .p2align 4, , 16 -.LBB220_1865: # %.loopexit.preheader - # in Loop: Header=BB220_1749 Depth=2 - move $s3, $zero +.LBB220_1866: # %.loopexit.preheader + # in Loop: Header=BB220_1750 Depth=2 + move $s4, $zero lu12i.w $a0, -8 ori $a0, $a0, 40 add.d $a0, $fp, $a0 @@ -56101,508 +56117,570 @@ setup_tone_curves: # @setup_tone_curves ori $a1, $a1, 48 add.d $a1, $fp, $a1 ld.d $a1, $a1, 0 # 8-byte Folded Reload - alsl.d $s1, $a1, $a0, 3 - ori $s4, $zero, 8 - b .LBB220_1867 + alsl.d $s2, $a1, $a0, 3 + ori $s5, $zero, 8 + b .LBB220_1868 .p2align 4, , 16 -.LBB220_1866: # in Loop: Header=BB220_1867 Depth=3 - ld.d $a0, $s1, 0 - fstx.s $fa0, $a0, $s4 - addi.d $s4, $s4, 4 - addi.w $s3, $s3, 1 +.LBB220_1867: # in Loop: Header=BB220_1868 Depth=3 + ld.d $a0, $s2, 0 + fstx.s $fa0, $a0, $s5 + addi.d $s5, $s5, 4 + addi.w $s4, $s4, 1 ori $a0, $zero, 232 - beq $s4, $a0, .LBB220_1870 -.LBB220_1867: # %.loopexit + beq $s5, $a0, .LBB220_1871 +.LBB220_1868: # %.loopexit # Parent Loop BB220_1746 Depth=1 - # Parent Loop BB220_1749 Depth=2 + # Parent Loop BB220_1750 Depth=2 # => This Inner Loop Header: Depth=3 - bstrpick.d $a0, $s3, 31, 0 + bstrpick.d $a0, $s4, 31, 0 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 vldi $vr1, -960 fmul.d $fa0, $fa0, $fa1 - fadd.d $fa0, $fs0, $fa0 + fadd.d $fa0, $fs1, $fa0 vldi $vr1, -896 fadd.d $fa0, $fa0, $fa1 fadd.d $fa0, $fa0, $fs4 fmul.d $fa0, $fa0, $fs5 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 - fdiv.d $fa1, $fa0, $fs2 - pcalau12i $a0, %pc_hi20(.LCPI220_8) - fld.s $fa0, $a0, %pc_lo12(.LCPI220_8) - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 - bltz $a0, .LBB220_1866 -# %bb.1868: # %.loopexit - # in Loop: Header=BB220_1867 Depth=3 - bge $a0, $s0, .LBB220_1866 -# %bb.1869: # in Loop: Header=BB220_1867 Depth=3 + fdiv.d $fa0, $fa0, $fs2 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 + fmov.s $fa0, $fs0 + bltz $a0, .LBB220_1867 +# %bb.1869: # %.loopexit + # in Loop: Header=BB220_1868 Depth=3 + fmov.s $fa0, $fs0 + bge $a0, $s0, .LBB220_1867 +# %bb.1870: # in Loop: Header=BB220_1868 Depth=3 slli.d $a0, $a0, 2 lu12i.w $a1, -8 ori $a1, $a1, 120 add.d $a1, $fp, $a1 ld.d $a1, $a1, 0 # 8-byte Folded Reload fldx.s $fa0, $a1, $a0 - b .LBB220_1866 + b .LBB220_1867 .p2align 4, , 16 -.LBB220_1870: # %.preheader284 - # in Loop: Header=BB220_1749 Depth=2 - ld.d $a0, $s1, 0 - fld.s $fa1, $a0, 8 - pcalau12i $a1, %pc_hi20(.LCPI220_9) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_9) - fcmp.clt.s $fcc0, $fa0, $fa1 - fmov.s $fa1, $fs1 +.LBB220_1871: # %.preheader284 + # in Loop: Header=BB220_1750 Depth=2 + ld.d $a0, $s2, 0 + fld.s $fa0, $a0, 8 + lu12i.w $a1, -9 + ori $a1, $a1, 4072 + add.d $a1, $fp, $a1 + fld.s $fa1, $a1, 0 # 4-byte Folded Reload + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 4092 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 lu12i.w $a1, -8 ori $a1, $a1, 48 add.d $a1, $fp, $a1 - ld.d $s2, $a1, 0 # 8-byte Folded Reload - bcnez $fcc0, .LBB220_1906 -# %bb.1871: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 12 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1873 -# %bb.1872: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1168 - b .LBB220_1906 + ld.d $s4, $a1, 0 # 8-byte Folded Reload + bcnez $fcc0, .LBB220_1907 +# %bb.1872: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 12 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1874 +# %bb.1873: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1168 + b .LBB220_1907 .p2align 4, , 16 -.LBB220_1873: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 16 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1879 -# %bb.1874: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1280 - b .LBB220_1906 -.LBB220_1875: # %scalar.ph1184.preheader - # in Loop: Header=BB220_1749 Depth=2 +.LBB220_1874: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 16 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1880 +# %bb.1875: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1280 + b .LBB220_1907 +.LBB220_1876: # %scalar.ph1184.preheader + # in Loop: Header=BB220_1750 Depth=2 lu12i.w $a1, -8 ori $a1, $a1, 120 add.d $a1, $fp, $a1 ld.d $a1, $a1, 0 # 8-byte Folded Reload alsl.d $a1, $a0, $a1, 2 sub.d $a0, $s0, $a0 - b .LBB220_1877 + b .LBB220_1878 .p2align 4, , 16 -.LBB220_1876: # in Loop: Header=BB220_1877 Depth=3 +.LBB220_1877: # in Loop: Header=BB220_1878 Depth=3 addi.d $a0, $a0, -1 addi.d $a1, $a1, 4 - beqz $a0, .LBB220_1865 -.LBB220_1877: # %scalar.ph1184 + beqz $a0, .LBB220_1866 +.LBB220_1878: # %scalar.ph1184 # Parent Loop BB220_1746 Depth=1 - # Parent Loop BB220_1749 Depth=2 + # Parent Loop BB220_1750 Depth=2 # => This Inner Loop Header: Depth=3 fld.s $fa1, $a1, 0 fcmp.cule.s $fcc0, $fa1, $fa0 - bcnez $fcc0, .LBB220_1876 -# %bb.1878: # in Loop: Header=BB220_1877 Depth=3 + bcnez $fcc0, .LBB220_1877 +# %bb.1879: # in Loop: Header=BB220_1878 Depth=3 fst.s $fa0, $a1, 0 - b .LBB220_1876 -.LBB220_1879: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 20 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1881 -# %bb.1880: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1272 - b .LBB220_1906 -.LBB220_1881: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 24 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1883 -# %bb.1882: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1264 - b .LBB220_1906 -.LBB220_1883: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 28 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1885 -# %bb.1884: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1260 - b .LBB220_1906 -.LBB220_1885: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 32 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1887 -# %bb.1886: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1256 - b .LBB220_1906 -.LBB220_1887: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 36 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1889 -# %bb.1888: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1252 - b .LBB220_1906 -.LBB220_1889: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 40 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1891 -# %bb.1890: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1248 - b .LBB220_1906 -.LBB220_1891: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 44 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1893 -# %bb.1892: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1246 - b .LBB220_1906 -.LBB220_1893: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 48 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1895 -# %bb.1894: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1244 - b .LBB220_1906 -.LBB220_1895: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 52 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1897 -# %bb.1896: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1242 - b .LBB220_1906 -.LBB220_1897: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 56 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1899 -# %bb.1898: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1240 - b .LBB220_1906 -.LBB220_1899: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 60 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1901 -# %bb.1900: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1238 - b .LBB220_1906 -.LBB220_1901: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 64 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1903 -# %bb.1902: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1236 - b .LBB220_1906 -.LBB220_1903: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 68 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1905 -# %bb.1904: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1234 - b .LBB220_1906 -.LBB220_1905: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr1, -1232 - .p2align 4, , 16 -.LBB220_1906: # in Loop: Header=BB220_1749 Depth=2 - fst.s $fa1, $a0, 0 - ld.d $a0, $s1, 0 - fld.s $fa1, $a0, 228 - fcmp.clt.s $fcc0, $fa0, $fa1 + b .LBB220_1877 +.LBB220_1880: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 20 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1882 +# %bb.1881: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1272 + b .LBB220_1907 +.LBB220_1882: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 24 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1884 +# %bb.1883: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1264 + b .LBB220_1907 +.LBB220_1884: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 28 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1886 +# %bb.1885: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1260 + b .LBB220_1907 +.LBB220_1886: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 32 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1888 +# %bb.1887: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1256 + b .LBB220_1907 +.LBB220_1888: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 36 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1890 +# %bb.1889: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1252 + b .LBB220_1907 +.LBB220_1890: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 40 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1892 +# %bb.1891: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1248 + b .LBB220_1907 +.LBB220_1892: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 44 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1894 +# %bb.1893: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1246 + b .LBB220_1907 +.LBB220_1894: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 48 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1896 +# %bb.1895: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1244 + b .LBB220_1907 +.LBB220_1896: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 52 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1898 +# %bb.1897: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1242 + b .LBB220_1907 +.LBB220_1898: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 56 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1900 +# %bb.1899: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1240 + b .LBB220_1907 +.LBB220_1900: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 60 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1902 +# %bb.1901: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1238 + b .LBB220_1907 +.LBB220_1902: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 64 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1904 +# %bb.1903: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1236 + b .LBB220_1907 +.LBB220_1904: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 68 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1906 +# %bb.1905: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1234 + b .LBB220_1907 +.LBB220_1906: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1232 + .p2align 4, , 16 +.LBB220_1907: # in Loop: Header=BB220_1750 Depth=2 + fst.s $fa0, $a0, 0 + ld.d $a0, $s2, 0 + fld.s $fa0, $a0, 228 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 4064 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1908: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 224 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 4028 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1909: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 220 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 3996 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1910: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 216 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 3992 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1911: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 212 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 3980 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1912: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 208 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 3940 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1913: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 204 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 3936 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1914: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 200 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 3932 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1915: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 196 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 3928 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1916: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 192 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 3924 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1917: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 188 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 3920 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1918: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 184 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 3916 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1919: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 180 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 3912 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1920: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 176 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 3908 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1921: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 172 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 3904 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1922: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 168 + fcmp.clt.s $fcc0, $fa1, $fa0 + lu12i.w $a1, -9 + ori $a1, $a1, 3900 + add.d $a1, $fp, $a1 + fld.s $fa0, $a1, 0 # 4-byte Folded Reload + # kill: def $f0 killed $f0 def $vr0 + bcnez $fcc0, .LBB220_1749 +# %bb.1923: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 164 + fcmp.clt.s $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB220_1747 -# %bb.1907: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 224 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1909 -# %bb.1908: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_11) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_11) - b .LBB220_1748 - .p2align 4, , 16 -.LBB220_1909: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 220 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1911 -# %bb.1910: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_12) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_12) - b .LBB220_1748 -.LBB220_1911: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 216 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1913 -# %bb.1912: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_13) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_13) - b .LBB220_1748 -.LBB220_1913: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 212 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1915 -# %bb.1914: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_14) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_14) - b .LBB220_1748 -.LBB220_1915: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 208 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1917 -# %bb.1916: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_15) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_15) - b .LBB220_1748 -.LBB220_1917: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 204 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1919 -# %bb.1918: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_16) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_16) - b .LBB220_1748 -.LBB220_1919: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 200 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1921 -# %bb.1920: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_17) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_17) - b .LBB220_1748 -.LBB220_1921: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 196 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1923 -# %bb.1922: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_18) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_18) - b .LBB220_1748 -.LBB220_1923: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 192 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1925 -# %bb.1924: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_19) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_19) - b .LBB220_1748 -.LBB220_1925: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 188 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1927 -# %bb.1926: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_20) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_20) - b .LBB220_1748 -.LBB220_1927: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 184 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1929 -# %bb.1928: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_21) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_21) - b .LBB220_1748 -.LBB220_1929: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 180 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1931 -# %bb.1930: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_22) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_22) - b .LBB220_1748 -.LBB220_1931: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 176 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1933 -# %bb.1932: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_23) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_23) - b .LBB220_1748 -.LBB220_1933: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 172 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1935 -# %bb.1934: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_24) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_24) - b .LBB220_1748 -.LBB220_1935: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 168 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1937 -# %bb.1936: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_25) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_25) - b .LBB220_1748 -.LBB220_1937: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 164 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1939 -# %bb.1938: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_26) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_26) - b .LBB220_1748 -.LBB220_1939: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 160 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1941 -# %bb.1940: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_27) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_27) +# %bb.1924: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 160 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1926 +# %bb.1925: # in Loop: Header=BB220_1750 Depth=2 + lu12i.w $a1, 270720 b .LBB220_1748 -.LBB220_1941: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 156 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1943 -# %bb.1942: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_28) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_28) +.LBB220_1926: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 156 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1928 +# %bb.1927: # in Loop: Header=BB220_1750 Depth=2 + lu12i.w $a1, 270656 b .LBB220_1748 -.LBB220_1943: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 152 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1945 -# %bb.1944: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_29) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_29) +.LBB220_1928: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 152 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1930 +# %bb.1929: # in Loop: Header=BB220_1750 Depth=2 + lu12i.w $a1, 270592 b .LBB220_1748 -.LBB220_1945: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 148 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1947 -# %bb.1946: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_30) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_30) +.LBB220_1930: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 148 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1932 +# %bb.1931: # in Loop: Header=BB220_1750 Depth=2 + lu12i.w $a1, 270528 b .LBB220_1748 -.LBB220_1947: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 144 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1949 -# %bb.1948: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_31) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_31) +.LBB220_1932: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 144 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1934 +# %bb.1933: # in Loop: Header=BB220_1750 Depth=2 + lu12i.w $a1, 270464 b .LBB220_1748 -.LBB220_1949: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 140 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1951 -# %bb.1950: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_32) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_32) +.LBB220_1934: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 140 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1936 +# %bb.1935: # in Loop: Header=BB220_1750 Depth=2 + lu12i.w $a1, 270400 b .LBB220_1748 -.LBB220_1951: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 136 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1953 -# %bb.1952: # in Loop: Header=BB220_1749 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI220_33) - fld.s $fa0, $a1, %pc_lo12(.LCPI220_33) +.LBB220_1936: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 136 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1938 +# %bb.1937: # in Loop: Header=BB220_1750 Depth=2 + lu12i.w $a1, 270336 b .LBB220_1748 -.LBB220_1953: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 132 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1955 -# %bb.1954: # in Loop: Header=BB220_1749 Depth=2 +.LBB220_1938: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 132 + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1940 +# %bb.1939: # in Loop: Header=BB220_1750 Depth=2 vldi $vr0, -1217 - b .LBB220_1982 -.LBB220_1955: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 128 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1957 -# %bb.1956: # in Loop: Header=BB220_1749 Depth=2 + b .LBB220_1967 +.LBB220_1940: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 128 + lu12i.w $a1, -9 + ori $a1, $a1, 4072 + add.d $a1, $fp, $a1 + fld.s $fa1, $a1, 0 # 4-byte Folded Reload + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1942 +# %bb.1941: # in Loop: Header=BB220_1750 Depth=2 vldi $vr0, -1218 - b .LBB220_1982 -.LBB220_1957: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 124 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1959 -# %bb.1958: # in Loop: Header=BB220_1749 Depth=2 + b .LBB220_1967 +.LBB220_1942: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 124 + lu12i.w $a1, -9 + ori $a1, $a1, 4072 + add.d $a1, $fp, $a1 + fld.s $fa1, $a1, 0 # 4-byte Folded Reload + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1944 +# %bb.1943: # in Loop: Header=BB220_1750 Depth=2 vldi $vr0, -1219 - b .LBB220_1982 -.LBB220_1959: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 120 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1961 -# %bb.1960: # in Loop: Header=BB220_1749 Depth=2 + b .LBB220_1967 +.LBB220_1944: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 120 + lu12i.w $a1, -9 + ori $a1, $a1, 4072 + add.d $a1, $fp, $a1 + fld.s $fa1, $a1, 0 # 4-byte Folded Reload + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1946 +# %bb.1945: # in Loop: Header=BB220_1750 Depth=2 vldi $vr0, -1220 - b .LBB220_1982 -.LBB220_1961: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 116 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1963 -# %bb.1962: # in Loop: Header=BB220_1749 Depth=2 + b .LBB220_1967 +.LBB220_1946: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 116 + lu12i.w $a1, -9 + ori $a1, $a1, 4072 + add.d $a1, $fp, $a1 + fld.s $fa1, $a1, 0 # 4-byte Folded Reload + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1948 +# %bb.1947: # in Loop: Header=BB220_1750 Depth=2 vldi $vr0, -1221 - b .LBB220_1982 -.LBB220_1963: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 112 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1965 -# %bb.1964: # in Loop: Header=BB220_1749 Depth=2 + b .LBB220_1967 +.LBB220_1948: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 112 + lu12i.w $a1, -9 + ori $a1, $a1, 4072 + add.d $a1, $fp, $a1 + fld.s $fa1, $a1, 0 # 4-byte Folded Reload + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1950 +# %bb.1949: # in Loop: Header=BB220_1750 Depth=2 vldi $vr0, -1222 - b .LBB220_1982 -.LBB220_1965: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 108 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1967 -# %bb.1966: # in Loop: Header=BB220_1749 Depth=2 + b .LBB220_1967 +.LBB220_1950: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 108 + lu12i.w $a1, -9 + ori $a1, $a1, 4072 + add.d $a1, $fp, $a1 + fld.s $fa1, $a1, 0 # 4-byte Folded Reload + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1952 +# %bb.1951: # in Loop: Header=BB220_1750 Depth=2 vldi $vr0, -1223 - b .LBB220_1982 -.LBB220_1967: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 104 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1969 -# %bb.1968: # in Loop: Header=BB220_1749 Depth=2 + b .LBB220_1967 +.LBB220_1952: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 104 + lu12i.w $a1, -9 + ori $a1, $a1, 4072 + add.d $a1, $fp, $a1 + fld.s $fa1, $a1, 0 # 4-byte Folded Reload + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1954 +# %bb.1953: # in Loop: Header=BB220_1750 Depth=2 vldi $vr0, -1224 - b .LBB220_1982 -.LBB220_1969: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 100 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1971 -# %bb.1970: # in Loop: Header=BB220_1749 Depth=2 + b .LBB220_1967 +.LBB220_1954: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 100 + lu12i.w $a1, -9 + ori $a1, $a1, 4072 + add.d $a1, $fp, $a1 + fld.s $fa1, $a1, 0 # 4-byte Folded Reload + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1956 +# %bb.1955: # in Loop: Header=BB220_1750 Depth=2 vldi $vr0, -1225 - b .LBB220_1982 -.LBB220_1971: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 96 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1973 -# %bb.1972: # in Loop: Header=BB220_1749 Depth=2 + b .LBB220_1967 +.LBB220_1956: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 96 + lu12i.w $a1, -9 + ori $a1, $a1, 4072 + add.d $a1, $fp, $a1 + fld.s $fa1, $a1, 0 # 4-byte Folded Reload + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1958 +# %bb.1957: # in Loop: Header=BB220_1750 Depth=2 vldi $vr0, -1226 - b .LBB220_1982 -.LBB220_1973: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 92 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1975 -# %bb.1974: # in Loop: Header=BB220_1749 Depth=2 + b .LBB220_1967 +.LBB220_1958: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 92 + lu12i.w $a1, -9 + ori $a1, $a1, 4072 + add.d $a1, $fp, $a1 + fld.s $fa1, $a1, 0 # 4-byte Folded Reload + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1960 +# %bb.1959: # in Loop: Header=BB220_1750 Depth=2 vldi $vr0, -1227 - b .LBB220_1982 -.LBB220_1975: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 88 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1977 -# %bb.1976: # in Loop: Header=BB220_1749 Depth=2 + b .LBB220_1967 +.LBB220_1960: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 88 + lu12i.w $a1, -9 + ori $a1, $a1, 4072 + add.d $a1, $fp, $a1 + fld.s $fa1, $a1, 0 # 4-byte Folded Reload + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1962 +# %bb.1961: # in Loop: Header=BB220_1750 Depth=2 vldi $vr0, -1228 - b .LBB220_1982 -.LBB220_1977: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 84 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1979 -# %bb.1978: # in Loop: Header=BB220_1749 Depth=2 + b .LBB220_1967 +.LBB220_1962: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 84 + lu12i.w $a1, -9 + ori $a1, $a1, 4072 + add.d $a1, $fp, $a1 + fld.s $fa1, $a1, 0 # 4-byte Folded Reload + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1964 +# %bb.1963: # in Loop: Header=BB220_1750 Depth=2 vldi $vr0, -1229 - b .LBB220_1982 -.LBB220_1979: # in Loop: Header=BB220_1749 Depth=2 - fld.s $fa1, $a0, 80 - fcmp.clt.s $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB220_1981 -# %bb.1980: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr0, -1230 - b .LBB220_1982 -.LBB220_1981: # in Loop: Header=BB220_1749 Depth=2 - vldi $vr0, -1231 -.LBB220_1982: # in Loop: Header=BB220_1749 Depth=2 + b .LBB220_1967 +.LBB220_1964: # in Loop: Header=BB220_1750 Depth=2 + fld.s $fa0, $a0, 80 lu12i.w $a1, -9 - ori $a1, $a1, 4004 + ori $a1, $a1, 4072 add.d $a1, $fp, $a1 - fld.s $fs1, $a1, 0 # 4-byte Folded Reload + fld.s $fa1, $a1, 0 # 4-byte Folded Reload + fcmp.clt.s $fcc0, $fa1, $fa0 + bceqz $fcc0, .LBB220_1966 +# %bb.1965: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1230 + b .LBB220_1967 +.LBB220_1966: # in Loop: Header=BB220_1750 Depth=2 + vldi $vr0, -1231 +.LBB220_1967: # in Loop: Header=BB220_1750 Depth=2 lu12i.w $a1, -8 ori $a1, $a1, 48 add.d $a1, $fp, $a1 - ld.d $s2, $a1, 0 # 8-byte Folded Reload - b .LBB220_1748 -.LBB220_1983: + ld.d $s4, $a1, 0 # 8-byte Folded Reload + b .LBB220_1749 +.LBB220_1968: lu12i.w $a0, -9 - ori $a0, $a0, 4008 + ori $a0, $a0, 3952 add.d $a0, $fp, $a0 ld.d $a0, $a0, 0 # 8-byte Folded Reload lu12i.w $a1, 8 - ori $a1, $a1, 112 + ori $a1, $a1, 208 sub.d $sp, $fp, $a1 lu12i.w $a1, 7 - ori $a1, $a1, 2176 + ori $a1, $a1, 2272 add.d $sp, $sp, $a1 fld.d $fs7, $sp, 1880 # 8-byte Folded Reload fld.d $fs6, $sp, 1888 # 8-byte Folded Reload @@ -56736,12 +56814,7 @@ _vp_remove_floor: # @_vp_remove_floor .Lfunc_end221: .size _vp_remove_floor, .Lfunc_end221-_vp_remove_floor # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _vp_noisemask -.LCPI222_0: - .word 0x430c0000 # float 140 - .text - .globl _vp_noisemask + .globl _vp_noisemask # -- Begin function _vp_noisemask .p2align 5 .type _vp_noisemask,@function _vp_noisemask: # @_vp_noisemask @@ -56766,8 +56839,8 @@ _vp_noisemask: # @_vp_noisemask sub.d $s3, $sp, $a0 move $sp, $s3 ld.d $a1, $s2, 48 - pcalau12i $a0, %pc_hi20(.LCPI222_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI222_0) + lu12i.w $a0, 274624 + movgr2fr.w $fa0, $a0 addi.w $a4, $zero, -1 move $a0, $s1 move $a2, $s4 @@ -57342,16 +57415,7 @@ bark_noise_hybridmp: # @bark_noise_hybridmp .Lfunc_end223: .size bark_noise_hybridmp, .Lfunc_end223-bark_noise_hybridmp # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _vp_tonemask -.LCPI224_0: - .dword 0x3fb99999a0000000 # double 0.10000000149011612 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI224_1: - .word 0xc61c3c00 # float -9999 - .text - .globl _vp_tonemask + .globl _vp_tonemask # -- Begin function _vp_tonemask .p2align 5 .type _vp_tonemask,@function _vp_tonemask: # @_vp_tonemask @@ -57377,40 +57441,41 @@ _vp_tonemask: # @_vp_tonemask move $sp, $s2 blez $a0, .LBB224_8 # %bb.1: # %.lr.ph.preheader - ori $a3, $zero, 8 - lu12i.w $a2, -237117 - bgeu $a0, $a3, .LBB224_3 + ori $a2, $zero, 8 + bgeu $a0, $a2, .LBB224_3 # %bb.2: - move $a3, $zero + move $a2, $zero b .LBB224_6 .LBB224_3: # %vector.ph - bstrpick.d $a3, $a0, 30, 3 - slli.d $a3, $a3, 3 - addi.d $a4, $s2, 16 - ori $a5, $a2, 3072 - vreplgr2vr.w $vr2, $a5 - move $a5, $a3 + bstrpick.d $a2, $a0, 30, 3 + slli.d $a2, $a2, 3 + addi.d $a3, $s2, 16 + lu12i.w $a4, -237117 + ori $a4, $a4, 3072 + vreplgr2vr.w $vr2, $a4 + move $a4, $a2 .p2align 4, , 16 .LBB224_4: # %vector.body # =>This Inner Loop Header: Depth=1 - vst $vr2, $a4, -16 - vst $vr2, $a4, 0 - addi.d $a5, $a5, -8 - addi.d $a4, $a4, 32 - bnez $a5, .LBB224_4 + vst $vr2, $a3, -16 + vst $vr2, $a3, 0 + addi.d $a4, $a4, -8 + addi.d $a3, $a3, 32 + bnez $a4, .LBB224_4 # %bb.5: # %middle.block - beq $a3, $a0, .LBB224_8 + beq $a2, $a0, .LBB224_8 .LBB224_6: # %.lr.ph.preheader95 - alsl.d $a4, $a3, $s2, 2 - sub.d $a0, $a0, $a3 + alsl.d $a3, $a2, $s2, 2 + sub.d $a0, $a0, $a2 + lu12i.w $a2, -237117 ori $a2, $a2, 3072 lu32i.d $a2, 0 .p2align 4, , 16 .LBB224_7: # %.lr.ph # =>This Inner Loop Header: Depth=1 - st.w $a2, $a4, 0 + st.w $a2, $a3, 0 addi.d $a0, $a0, -1 - addi.d $a4, $a4, 4 + addi.d $a3, $a3, 4 bnez $a0, .LBB224_7 .LBB224_8: # %._crit_edge blez $s5, .LBB224_30 @@ -57447,9 +57512,11 @@ _vp_tonemask: # @_vp_tonemask addi.d $a6, $s5, -2 vldi $vr1, -1256 ori $a7, $zero, 16 - pcalau12i $t1, %pc_hi20(.LCPI224_0) - fld.d $fa2, $t1, %pc_lo12(.LCPI224_0) - vldi $vr3, -834 + vldi $vr2, -834 + lu12i.w $t1, -393216 + lu32i.d $t1, -419431 + lu52i.d $t1, $t1, 1019 + movgr2fr.d $fa3, $t1 ori $t1, $zero, 7 vldi $vr4, -1104 b .LBB224_14 @@ -57518,8 +57585,8 @@ _vp_tonemask: # @_vp_tonemask ldx.d $t2, $a0, $t2 fadd.s $fa6, $fa0, $fa5 fcvt.d.s $fa6, $fa6 - fadd.d $fa6, $fa6, $fa3 - fmul.d $fa6, $fa6, $fa2 + fadd.d $fa6, $fa6, $fa2 + fmul.d $fa6, $fa6, $fa3 ftintrz.w.d $fa6, $fa6 movfr2gr.s $t4, $fa6 srai.d $t5, $t4, 63 @@ -57603,15 +57670,17 @@ _vp_tonemask: # @_vp_tonemask # %bb.31: # %.lr.ph81.i ld.d $a0, $s0, 40 ld.d $a1, $a0, 0 + ld.d $a2, $s0, 56 move $a0, $zero - srai.d $a2, $s4, 1 - ld.d $a3, $s0, 56 - sub.d $a1, $a1, $a2 - pcalau12i $a2, %pc_hi20(.LCPI224_1) - fld.s $fa0, $a2, %pc_lo12(.LCPI224_1) - sub.d $a2, $a1, $a3 + srai.d $a3, $s4, 1 + sub.d $a1, $a1, $a3 + sub.d $a2, $a1, $a2 addi.d $a1, $s2, 4 ori $a4, $zero, 1 + lu12i.w $a3, -237117 + ori $a3, $a3, 3072 + lu32i.d $a3, 0 + movgr2fr.w $fa0, $a3 b .LBB224_33 .p2align 4, , 16 .LBB224_32: # %.critedge.i32 @@ -59010,12 +59079,7 @@ _vp_noise_normalize: # @_vp_noise_normalize .Lfunc_end230: .size _vp_noise_normalize, .Lfunc_end230-_vp_noise_normalize # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _vp_couple -.LCPI231_0: - .dword 0x3fffff9720000000 # double 1.9998999834060669 - .text - .globl _vp_couple + .globl _vp_couple # -- Begin function _vp_couple .p2align 5 .type _vp_couple,@function _vp_couple: # @_vp_couple @@ -59062,9 +59126,13 @@ _vp_couple: # @_vp_couple movgr2fr.w $fa0, $zero ori $s0, $zero, 31 vldi $vr1, -1168 - vldi $vr2, -1152 - vldi $vr3, -1040 - movgr2fr.d $fa4, $zero + lu12i.w $a0, 131072 + lu32i.d $a0, -105 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa2, $a0 + vldi $vr3, -1152 + vldi $vr4, -1040 + movgr2fr.d $fa5, $zero b .LBB231_3 .p2align 4, , 16 .LBB231_2: # %.loopexit @@ -59105,11 +59173,11 @@ _vp_couple: # @_vp_couple slli.d $t2, $t2, 3 ld.d $a7, $sp, 40 # 8-byte Folded Reload ld.w $t5, $a7, 0 - fldx.d $fa5, $a4, $t2 + fldx.d $fa6, $a4, $t2 ld.w $t2, $a1, 504 ldx.d $s8, $a3, $a5 slli.d $a5, $t5, 3 - fldx.d $fa6, $a4, $a5 + fldx.d $fa7, $a4, $a5 sltui $a5, $t2, 1 addi.d $t2, $a1, 512 masknez $t2, $t2, $a5 @@ -59141,8 +59209,8 @@ _vp_couple: # @_vp_couple move $t8, $zero move $a5, $zero alsl.d $a0, $a1, $s6, 2 - fcvt.s.d $fa5, $fa5 fcvt.s.d $fa6, $fa6 + fcvt.s.d $fa7, $fa7 ld.d $a1, $sp, 8 # 8-byte Folded Reload alsl.d $a6, $t7, $a1, 3 slli.d $a1, $s1, 2 @@ -59169,7 +59237,7 @@ _vp_couple: # @_vp_couple # Child Loop BB231_29 Depth 3 move $t5, $zero move $ra, $fp - fmov.s $fa7, $fa0 + fmov.s $ft0, $fa0 b .LBB231_12 .p2align 4, , 16 .LBB231_10: # in Loop: Header=BB231_12 Depth=3 @@ -59185,33 +59253,33 @@ _vp_couple: # @_vp_couple # => This Inner Loop Header: Depth=3 bge $ra, $t0, .LBB231_10 # %bb.13: # in Loop: Header=BB231_12 Depth=3 - fldx.s $ft0, $s5, $t5 - fabs.s $ft1, $ft0 + fldx.s $ft1, $s5, $t5 + fabs.s $ft2, $ft1 blt $ra, $s2, .LBB231_16 # %bb.14: # in Loop: Header=BB231_12 Depth=3 - fcmp.cule.s $fcc0, $fa6, $ft1 + fcmp.cule.s $fcc0, $fa7, $ft2 bcnez $fcc0, .LBB231_16 # %bb.15: # in Loop: Header=BB231_12 Depth=3 - fldx.s $ft0, $s6, $t5 - fabs.s $ft0, $ft0 - fcmp.clt.s $fcc0, $ft0, $fa6 + fldx.s $ft1, $s6, $t5 + fabs.s $ft1, $ft1 + fcmp.clt.s $fcc0, $ft1, $fa7 bcnez $fcc0, .LBB231_18 .LBB231_16: # %._crit_edge168 # in Loop: Header=BB231_12 Depth=3 - fldx.s $ft0, $s6, $t5 - fcmp.cule.s $fcc0, $fa5, $ft1 - fabs.s $ft4, $ft0 + fldx.s $ft1, $s6, $t5 + fcmp.cule.s $fcc0, $fa6, $ft2 + fabs.s $ft5, $ft1 bcnez $fcc0, .LBB231_20 # %bb.17: # %._crit_edge168 # in Loop: Header=BB231_12 Depth=3 - fcmp.clt.s $fcc0, $ft4, $fa5 + fcmp.clt.s $fcc0, $ft5, $fa6 bceqz $fcc0, .LBB231_20 .LBB231_18: # in Loop: Header=BB231_12 Depth=3 ld.d $a3, $a6, 0 ldx.w $t3, $s7, $t5 ldx.w $t6, $s8, $t5 add.d $a3, $a3, $t8 - fldx.s $ft0, $a3, $t5 + fldx.s $ft1, $a3, $t5 sub.w $a3, $t3, $t6 srai.d $a7, $a3, 31 xor $a3, $a3, $a7 @@ -59222,8 +59290,8 @@ _vp_couple: # @_vp_couple slli.d $a3, $a3, 2 pcalau12i $a7, %pc_hi20(hypot_lookup) addi.d $a7, $a7, %pc_lo12(hypot_lookup) - fldx.s $ft1, $a7, $a3 - fadd.s $ft1, $ft1, $fa1 + fldx.s $ft2, $a7, $a3 + fadd.s $ft2, $ft2, $fa1 slt $a3, $t6, $t3 masknez $a7, $t6, $a3 maskeqz $a3, $t3, $a3 @@ -59231,68 +59299,66 @@ _vp_couple: # @_vp_couple slli.d $a3, $a3, 2 pcalau12i $a7, %pc_hi20(FLOOR1_fromdB_INV_LOOKUP) addi.d $a7, $a7, %pc_lo12(FLOOR1_fromdB_INV_LOOKUP) - fldx.s $ft2, $a7, $a3 + fldx.s $ft3, $a7, $a3 + fmul.s $ft2, $ft2, $ft3 fmul.s $ft1, $ft1, $ft2 - fmul.s $ft0, $ft0, $ft1 - fstx.s $ft0, $s3, $t5 + fstx.s $ft1, $s3, $t5 stx.w $zero, $a0, $t5 - fldx.s $ft0, $s3, $t5 - frint.s $ft1, $ft0 - fcmp.cune.s $fcc0, $ft1, $fa0 + fldx.s $ft1, $s3, $t5 + frint.s $ft2, $ft1 + fcmp.cune.s $fcc0, $ft2, $fa0 bcnez $fcc0, .LBB231_11 # %bb.19: # in Loop: Header=BB231_12 Depth=3 - fmul.s $ft0, $ft0, $ft0 - fadd.s $fa7, $fa7, $ft0 + fmul.s $ft1, $ft1, $ft1 + fadd.s $ft0, $ft0, $ft1 b .LBB231_11 .LBB231_20: # %._crit_edge171 # in Loop: Header=BB231_12 Depth=3 - fldx.s $ft3, $s3, $t5 - fldx.s $ft2, $a0, $t5 - fabs.s $ft5, $ft3 - fabs.s $ft0, $ft2 - fcmp.clt.s $fcc0, $ft0, $ft5 - fcmp.clt.s $fcc1, $ft5, $ft0 + fldx.s $ft4, $s3, $t5 + fldx.s $ft3, $a0, $t5 + fabs.s $ft6, $ft4 + fabs.s $ft1, $ft3 + fcmp.clt.s $fcc0, $ft1, $ft6 + fcmp.clt.s $fcc1, $ft6, $ft1 movcf2gr $t3, $fcc0 movcf2gr $t6, $fcc1 sub.d $t3, $t3, $t6 sltui $t6, $t3, 1 - fcmp.clt.s $fcc0, $ft4, $ft1 + fcmp.clt.s $fcc0, $ft5, $ft2 addi.d $t3, $t3, -1 sltui $t3, $t3, 1 masknez $t3, $t3, $t6 movcf2gr $a3, $fcc0 maskeqz $a3, $a3, $t6 or $a3, $a3, $t3 - fsub.s $ft1, $ft3, $ft2 - fsub.s $ft4, $ft2, $ft3 + fsub.s $ft2, $ft4, $ft3 + fsub.s $ft5, $ft3, $ft4 beqz $a3, .LBB231_22 # %bb.21: # in Loop: Header=BB231_12 Depth=3 - fcmp.clt.s $fcc0, $fa0, $ft3 - fsel $ft1, $ft4, $ft1, $fcc0 - fstx.s $ft1, $a0, $t5 - fldx.s $ft0, $s3, $t5 - fabs.s $ft0, $ft0 + fcmp.clt.s $fcc0, $fa0, $ft4 + fsel $ft2, $ft5, $ft2, $fcc0 + fstx.s $ft2, $a0, $t5 + fldx.s $ft1, $s3, $t5 + fabs.s $ft1, $ft1 b .LBB231_23 .LBB231_22: # in Loop: Header=BB231_12 Depth=3 - fcmp.clt.s $fcc0, $fa0, $ft2 - fsel $ft1, $ft4, $ft1, $fcc0 - fstx.s $ft1, $a0, $t5 - fstx.s $ft2, $s3, $t5 - fldx.s $ft1, $a0, $t5 + fcmp.clt.s $fcc0, $fa0, $ft3 + fsel $ft2, $ft5, $ft2, $fcc0 + fstx.s $ft2, $a0, $t5 + fstx.s $ft3, $s3, $t5 + fldx.s $ft2, $a0, $t5 .LBB231_23: # in Loop: Header=BB231_12 Depth=3 - pcalau12i $a3, %pc_hi20(.LCPI231_0) - fld.d $ft2, $a3, %pc_lo12(.LCPI231_0) - fcvt.d.s $ft1, $ft1 - fcvt.d.s $ft3, $ft0 - fmul.d $ft2, $ft3, $ft2 - fcmp.cule.d $fcc0, $ft1, $ft2 + fcvt.d.s $ft2, $ft2 + fcvt.d.s $ft3, $ft1 + fmul.d $ft3, $ft3, $fa2 + fcmp.cule.d $fcc0, $ft2, $ft3 bcnez $fcc0, .LBB231_11 # %bb.24: # in Loop: Header=BB231_12 Depth=3 - fmul.s $ft0, $ft0, $fa2 - fstx.s $ft0, $a0, $t5 - fldx.s $ft0, $s3, $t5 - fneg.s $ft0, $ft0 - fstx.s $ft0, $s3, $t5 + fmul.s $ft1, $ft1, $fa3 + fstx.s $ft1, $a0, $t5 + fldx.s $ft1, $s3, $t5 + fneg.s $ft1, $ft1 + fstx.s $ft1, $s3, $t5 b .LBB231_11 .p2align 4, , 16 .LBB231_25: # %._crit_edge.us @@ -59316,9 +59382,9 @@ _vp_couple: # @_vp_couple # Parent Loop BB231_9 Depth=2 # => This Inner Loop Header: Depth=3 ld.d $ra, $a2, 8 - fld.d $ft0, $ra, 520 - fcvt.d.s $ft1, $fa7 - fcmp.cult.d $fcc0, $ft1, $ft0 + fld.d $ft1, $ra, 520 + fcvt.d.s $ft2, $ft0 + fcmp.cult.d $fcc0, $ft2, $ft1 bcnez $fcc0, .LBB231_8 # %bb.30: # in Loop: Header=BB231_29 Depth=3 ld.d $ra, $s4, 0 @@ -59329,15 +59395,15 @@ _vp_couple: # @_vp_couple blt $ra, $s2, .LBB231_28 # %bb.32: # in Loop: Header=BB231_29 Depth=3 slli.d $t3, $ra, 2 - fldx.s $ft0, $t2, $t3 - frint.s $ft1, $ft0 - fcmp.cune.s $fcc0, $ft1, $fa0 + fldx.s $ft1, $t2, $t3 + frint.s $ft2, $ft1 + fcmp.cune.s $fcc0, $ft2, $fa0 bcnez $fcc0, .LBB231_28 # %bb.33: # in Loop: Header=BB231_29 Depth=3 alsl.d $t3, $ra, $t2, 2 - fcopysign.s $ft0, $fa1, $ft0 - fst.s $ft0, $t3, 0 - fadd.s $fa7, $fa7, $fa3 + fcopysign.s $ft1, $fa1, $ft1 + fst.s $ft1, $t3, 0 + fadd.s $ft0, $ft0, $fa4 b .LBB231_28 .p2align 4, , 16 .LBB231_34: # %.preheader133.lr.ph.split @@ -59372,8 +59438,8 @@ _vp_couple: # @_vp_couple # %bb.40: # %.lr.ph138 # in Loop: Header=BB231_38 Depth=2 ld.d $a3, $a2, 8 - fld.d $fa5, $a3, 520 - fcmp.cult.d $fcc0, $fa4, $fa5 + fld.d $fa6, $a3, 520 + fcmp.cult.d $fcc0, $fa5, $fa6 bcnez $fcc0, .LBB231_36 # %bb.41: # in Loop: Header=BB231_38 Depth=2 ld.d $a3, $s4, 0 @@ -59383,14 +59449,14 @@ _vp_couple: # @_vp_couple blt $a6, $s2, .LBB231_36 # %bb.43: # in Loop: Header=BB231_38 Depth=2 slli.d $a3, $a6, 2 - fldx.s $fa5, $t2, $a3 - frint.s $fa6, $fa5 - fcmp.cune.s $fcc0, $fa6, $fa0 + fldx.s $fa6, $t2, $a3 + frint.s $fa7, $fa6 + fcmp.cune.s $fcc0, $fa7, $fa0 bcnez $fcc0, .LBB231_36 # %bb.44: # in Loop: Header=BB231_38 Depth=2 alsl.d $a3, $a6, $t2, 2 - fcopysign.s $fa5, $fa1, $fa5 - fst.s $fa5, $a3, 0 + fcopysign.s $fa6, $fa1, $fa6 + fst.s $fa6, $a3, 0 b .LBB231_36 .LBB231_45: # %._crit_edge ld.d $s8, $sp, 88 # 8-byte Folded Reload @@ -59828,12 +59894,7 @@ drft_backward: # @drft_backward .Lfunc_end233: .size drft_backward, .Lfunc_end233-drft_backward # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function drft_init -.LCPI234_0: - .word 0x40c90fdb # float 6.28318548 - .text - .globl drft_init + .globl drft_init # -- Begin function drft_init .p2align 5 .type drft_init,@function drft_init: # @drft_init @@ -60048,13 +60109,14 @@ drft_init: # @drft_init addi.w $a1, $s6, 0 blez $a1, .LBB234_1 # %bb.25: # %.lr.ph119.preheader.i.i - pcalau12i $a1, %pc_hi20(.LCPI234_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI234_0) move $s2, $zero move $a3, $zero - movgr2fr.w $fa1, $s5 - ffint.s.w $fa1, $fa1 - fdiv.s $fs1, $fa0, $fa1 + movgr2fr.w $fa0, $s5 + ffint.s.w $fa0, $fa0 + lu12i.w $a1, 265360 + ori $a1, $a1, 4059 + movgr2fr.w $fa1, $a1 + fdiv.s $fs1, $fa1, $fa0 bstrpick.d $a4, $s6, 30, 0 alsl.d $a1, $s5, $s1, 2 addi.d $a1, $a1, 4 @@ -60154,26 +60216,7 @@ drft_init: # @drft_init .Lfunc_end234: .size drft_init, .Lfunc_end234-drft_init # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ve_amp -.LCPI235_0: - .dword 0x3fe6666666666666 # double 0.69999999999999996 -.LCPI235_1: - .dword 0x3fc999999999999a # double 0.20000000000000001 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI235_2: - .word 0x3d800000 # float 0.0625 -.LCPI235_3: - .word 0x3540a8c1 # float 7.1771143E-7 -.LCPI235_4: - .word 0xc43f115b # float -764.271179 -.LCPI235_5: - .word 0xc7c34f80 # float -99999 -.LCPI235_6: - .word 0x47c34f80 # float 99999 - .text - .p2align 5 + .p2align 5 # -- Begin function _ve_amp .type _ve_amp,@function _ve_amp: # @_ve_amp # %bb.0: @@ -60275,22 +60318,28 @@ _ve_amp: # @_ve_amp move $a2, $s2 pcaddu18i $ra, %call36(mdct_forward) jirl $ra, $ra, 0 - fld.s $fa0, $s2, 0 - fld.s $fa1, $s2, 4 - pcalau12i $a0, %pc_hi20(.LCPI235_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI235_0) - fmul.s $fa0, $fa0, $fa0 + fld.s $fa0, $s2, 4 + fld.s $fa1, $s2, 0 fcvt.d.s $fa0, $fa0 - fcvt.d.s $fa1, $fa1 - fmul.d $fa2, $fa1, $fa2 - fmul.d $fa1, $fa2, $fa1 + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, 419430 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 + fmul.d $fa2, $fa0, $fa2 + fmul.d $fa0, $fa2, $fa0 fld.s $fa2, $s2, 8 - pcalau12i $a0, %pc_hi20(.LCPI235_1) - fld.d $fa3, $a0, %pc_lo12(.LCPI235_1) - fadd.d $fa0, $fa1, $fa0 + fmul.s $fa1, $fa1, $fa1 + fcvt.d.s $fa1, $fa1 + fadd.d $fa0, $fa0, $fa1 fcvt.d.s $fa1, $fa2 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fa2, $a0 ld.w $a0, $s0, 140 - fmul.d $fa2, $fa1, $fa3 + fmul.d $fa2, $fa1, $fa2 fmul.d $fa1, $fa2, $fa1 fadd.d $fa0, $fa0, $fa1 fcvt.s.d $fa1, $fa0 @@ -60326,21 +60375,24 @@ _ve_amp: # @_ve_amp blt $s4, $a1, .LBB235_14 # %bb.12: # %.lr.ph197.preheader move $a0, $zero - pcalau12i $a1, %pc_hi20(.LCPI235_2) - fld.s $fa1, $a1, %pc_lo12(.LCPI235_2) bstrpick.d $a1, $s4, 31, 31 add.w $a1, $s4, $a1 srai.d $a1, $a1, 1 + lu12i.w $a3, 251904 + movgr2fr.w $fa1, $a3 fmul.s $fa0, $fa0, $fa1 movfr2gr.s $a3, $fa0 bstrpick.d $a3, $a3, 30, 0 - pcalau12i $a4, %pc_hi20(.LCPI235_3) - fld.s $fa0, $a4, %pc_lo12(.LCPI235_3) - pcalau12i $a4, %pc_hi20(.LCPI235_4) - fld.s $fa1, $a4, %pc_lo12(.LCPI235_4) - movgr2fr.w $fa2, $a3 - ffint.s.w $fa2, $fa2 - fmul.s $fa2, $fa2, $fa0 + movgr2fr.w $fa0, $a3 + ffint.s.w $fa1, $fa0 + lu12i.w $a3, 218122 + ori $a3, $a3, 2241 + movgr2fr.w $fa0, $a3 + fmul.s $fa2, $fa1, $fa0 + lu12i.w $a3, -244751 + ori $a3, $a3, 347 + lu32i.d $a3, 0 + movgr2fr.w $fa1, $a3 fadd.s $fa2, $fa2, $fa1 fcvt.d.s $fa2, $fa2 vldi $vr3, -928 @@ -60383,11 +60435,14 @@ _ve_amp: # @_ve_amp srli.d $a2, $a2, 1 fsel $fa0, $fs2, $fs1, $fcc0 addi.d $a3, $s3, 4 - pcalau12i $a4, %pc_hi20(.LCPI235_5) - fld.s $fa1, $a4, %pc_lo12(.LCPI235_5) - pcalau12i $a4, %pc_hi20(.LCPI235_6) - fld.s $fa2, $a4, %pc_lo12(.LCPI235_6) addi.d $a4, $s3, 32 + lu12i.w $a5, 293940 + ori $a5, $a5, 3968 + movgr2fr.w $fa1, $a5 + lu12i.w $a5, -230348 + ori $a5, $a5, 3968 + lu32i.d $a5, 0 + movgr2fr.w $fa2, $a5 ori $a5, $zero, 15 ori $a6, $zero, 7 .p2align 4, , 16 @@ -60437,8 +60492,8 @@ _ve_amp: # @_ve_amp fcmp.clt.s $fcc0, $fa5, $fa7 fsel $fa4, $fa7, $fa5, $fcc0 move $t2, $a2 - fmov.s $fa5, $fa2 - fmov.s $fa6, $fa1 + fmov.s $fa5, $fa1 + fmov.s $fa6, $fa2 .p2align 4, , 16 .LBB235_19: # %.lr.ph208 # Parent Loop BB235_15 Depth=1 @@ -61431,22 +61486,7 @@ floor0_inverse1: # @floor0_inverse1 .Lfunc_end241: .size floor0_inverse1, .Lfunc_end241-floor0_inverse1 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function floor0_inverse2 -.LCPI242_0: - .word 0x3a41fc8f # float 7.39999989E-4 -.LCPI242_1: - .word 0x329ee9ee # float 1.84999998E-8 -.LCPI242_4: - .word 0x38d1b717 # float 9.99999974E-5 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI242_2: - .dword 0x402a333340000000 # double 13.100000381469727 -.LCPI242_3: - .dword 0x4001eb8520000000 # double 2.2400000095367432 - .text - .p2align 5 + .p2align 5 # -- Begin function floor0_inverse2 .type floor0_inverse2,@function floor0_inverse2: # @floor0_inverse2 # %bb.0: @@ -61517,11 +61557,12 @@ floor0_inverse2: # @floor0_inverse2 addi.w $s7, $s6, 0 ld.w $s4, $fp, 0 movgr2fr.d $fa0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI242_0) - fld.s $fs2, $a0, %pc_lo12(.LCPI242_0) ffint.s.l $fa0, $fa0 vldi $vr1, -1184 fmul.s $fa0, $fa0, $fa1 + lu12i.w $a0, 238623 + ori $a0, $a0, 3215 + movgr2fr.w $fs2, $a0 fmul.s $fa0, $fa0, $fs2 fcvt.d.s $fa0, $fa0 pcaddu18i $ra, %call36(atan) @@ -61529,12 +61570,13 @@ floor0_inverse2: # @floor0_inverse2 ld.d $a0, $s3, 8 fmov.d $fs0, $fa0 movgr2fr.d $fa0, $a0 - pcalau12i $a0, %pc_hi20(.LCPI242_1) - fld.s $fs4, $a0, %pc_lo12(.LCPI242_1) ffint.s.l $fa0, $fa0 vldi $vr1, -1184 fmul.s $fa0, $fa0, $fa1 fmul.s $fa0, $fa0, $fa0 + lu12i.w $a0, 207342 + ori $a0, $a0, 2542 + movgr2fr.w $fs4, $a0 fmul.s $fa0, $fa0, $fs4 fcvt.d.s $fa0, $fa0 pcaddu18i $ra, %call36(atan) @@ -61556,10 +61598,14 @@ floor0_inverse2: # @floor0_inverse2 move $s7, $zero move $s8, $zero movgr2fr.w $fa0, $s4 - pcalau12i $a0, %pc_hi20(.LCPI242_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI242_2) - pcalau12i $a0, %pc_hi20(.LCPI242_3) - fld.d $fs5, $a0, %pc_lo12(.LCPI242_3) + lu12i.w $a0, 262144 + lu32i.d $a0, -380109 + lu52i.d $a0, $a0, 1026 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 131072 + lu32i.d $a0, 125829 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fs5, $a0 bstrpick.d $a0, $s6, 31, 0 ffint.d.w $fa0, $fa0 fst.d $fa1, $sp, 32 # 8-byte Folded Spill @@ -61567,11 +61613,12 @@ floor0_inverse2: # @floor0_inverse2 fmul.d $fa2, $fs1, $fs5 fadd.d $fa1, $fa1, $fa2 movgr2fr.d $fa2, $s0 - pcalau12i $a1, %pc_hi20(.LCPI242_4) - fld.s $fs0, $a1, %pc_lo12(.LCPI242_4) ffint.s.l $fa2, $fa2 vldi $vr3, -1184 fmul.s $fa2, $fa2, $fa3 + lu12i.w $a1, 232731 + ori $a1, $a1, 1815 + movgr2fr.w $fs0, $a1 fmul.s $fa2, $fa2, $fs0 fcvt.d.s $fa2, $fa2 fadd.d $fa1, $fa1, $fa2 @@ -61695,19 +61742,17 @@ floor0_inverse2: # @floor0_inverse2 .Lfunc_end242: .size floor0_inverse2, .Lfunc_end242-floor0_inverse2 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function vorbis_coslook -.LCPI243_0: - .dword 0x40445f3071e2c65e # double 40.743665919999998 - .text - .globl vorbis_coslook + .globl vorbis_coslook # -- Begin function vorbis_coslook .p2align 5 .type vorbis_coslook,@function vorbis_coslook: # @vorbis_coslook # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI243_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI243_0) fcvt.d.s $fa0, $fa0 + lu12i.w $a0, 466476 + ori $a0, $a0, 1630 + lu32i.d $a0, 286512 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 vldi $vr1, -800 fadd.d $fa1, $fa0, $fa1 @@ -61734,24 +61779,18 @@ vorbis_coslook: # @vorbis_coslook .Lfunc_end243: .size vorbis_coslook, .Lfunc_end243-vorbis_coslook # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function vorbis_invsqlook -.LCPI244_0: - .word 0x42800000 # float 64 -.LCPI244_1: - .word 0xc2000000 # float -32 - .text - .globl vorbis_invsqlook + .globl vorbis_invsqlook # -- Begin function vorbis_invsqlook .p2align 5 .type vorbis_invsqlook,@function vorbis_invsqlook: # @vorbis_invsqlook # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI244_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI244_0) - pcalau12i $a0, %pc_hi20(.LCPI244_1) - fld.s $fa2, $a0, %pc_lo12(.LCPI244_1) + lu12i.w $a0, 272384 + movgr2fr.w $fa1, $a0 fmul.s $fa0, $fa0, $fa1 - fadd.s $fa0, $fa0, $fa2 + lu12i.w $a0, -253952 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 + fadd.s $fa0, $fa0, $fa1 fcvt.d.s $fa0, $fa0 vldi $vr1, -800 fadd.d $fa1, $fa0, $fa1 @@ -61836,22 +61875,7 @@ vorbis_fromdBlook: # @vorbis_fromdBlook .Lfunc_end246: .size vorbis_fromdBlook, .Lfunc_end246-vorbis_fromdBlook # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function vorbis_lsp_to_curve -.LCPI247_0: - .dword 0x400921fb54442d18 # double 3.1415926535897931 -.LCPI247_1: - .dword 0x40445f3071e2c65e # double 40.743665919999998 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI247_2: - .word 0x3f3504f3 # float 0.707106769 -.LCPI247_3: - .word 0x42800000 # float 64 -.LCPI247_4: - .word 0xc2000000 # float -32 - .text - .globl vorbis_lsp_to_curve + .globl vorbis_lsp_to_curve # -- Begin function vorbis_lsp_to_curve .p2align 5 .type vorbis_lsp_to_curve,@function vorbis_lsp_to_curve: # @vorbis_lsp_to_curve @@ -61883,12 +61907,15 @@ vorbis_lsp_to_curve: # @vorbis_lsp_to_curve move $s1, $a2 st.d $a1, $sp, 32 # 8-byte Folded Spill st.d $a0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI247_1) + lu12i.w $a0, 466476 pcalau12i $a1, %pc_hi20(COS_LOOKUP) addi.d $s4, $a1, %pc_lo12(COS_LOOKUP) blez $a5, .LBB247_3 # %bb.1: # %.lr.ph.preheader - fld.d $fa0, $a0, %pc_lo12(.LCPI247_1) + ori $a1, $a0, 1630 + lu32i.d $a1, 286512 + lu52i.d $a1, $a1, 1028 + movgr2fr.d $fa0, $a1 vldi $vr1, -800 vldi $vr2, -928 move $a1, $fp @@ -61923,25 +61950,33 @@ vorbis_lsp_to_curve: # @vorbis_lsp_to_curve .LBB247_3: # %.preheader blez $s1, .LBB247_16 # %bb.4: # %.lr.ph66 - pcalau12i $a1, %pc_hi20(.LCPI247_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI247_0) - movgr2fr.w $fa1, $a3 - ffint.d.w $fa1, $fa1 - fdiv.d $fa0, $fa0, $fa1 + lu12i.w $a1, 345154 + ori $a1, $a1, 3352 + lu32i.d $a1, -450053 + movgr2fr.w $fa0, $a3 + lu52i.d $a1, $a1, 1024 + ffint.d.w $fa0, $fa0 + movgr2fr.d $fa1, $a1 + fdiv.d $fa0, $fa1, $fa0 fcvt.s.d $fs2, $fa0 srai.d $s5, $fp, 1 andi $s6, $fp, 1 ld.d $a1, $sp, 32 # 8-byte Folded Reload addi.d $s7, $a1, 4 - fld.d $fs3, $a0, %pc_lo12(.LCPI247_1) - pcalau12i $a0, %pc_hi20(.LCPI247_2) - fld.s $fs4, $a0, %pc_lo12(.LCPI247_2) - pcalau12i $a0, %pc_hi20(.LCPI247_3) - fld.s $fs5, $a0, %pc_lo12(.LCPI247_3) - pcalau12i $a0, %pc_hi20(.LCPI247_4) - fld.s $fs6, $a0, %pc_lo12(.LCPI247_4) + ori $a0, $a0, 1630 + lu32i.d $a0, 286512 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs3, $a0 vldi $vr4, -800 vldi $vr5, -928 + lu12i.w $a0, 258896 + ori $a0, $a0, 1267 + movgr2fr.w $fs4, $a0 + lu12i.w $a0, 272384 + movgr2fr.w $fs5, $a0 + lu12i.w $a0, -253952 + lu32i.d $a0, 0 + movgr2fr.w $fs6, $a0 pcalau12i $a0, %pc_hi20(INVSQ_LOOKUP) addi.d $s8, $a0, %pc_lo12(INVSQ_LOOKUP) pcalau12i $a0, %pc_hi20(INVSQ2EXP_LOOKUP) @@ -62482,18 +62517,7 @@ vorbis_lpc_to_lsp: # @vorbis_lpc_to_lsp .Lfunc_end248: .size vorbis_lpc_to_lsp, .Lfunc_end248-vorbis_lpc_to_lsp # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Laguerre_With_Deflation -.LCPI249_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI249_1: - .dword 0xbeb0c6f7a0b5ed8d # double -9.9999999999999995E-7 -.LCPI249_2: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI249_3: - .dword 0x3da5fd7fe1796495 # double 9.9999999999999993E-12 - .text - .p2align 5 + .p2align 5 # -- Begin function Laguerre_With_Deflation .type Laguerre_With_Deflation,@function Laguerre_With_Deflation: # @Laguerre_With_Deflation # %bb.0: @@ -62573,18 +62597,25 @@ Laguerre_With_Deflation: # @Laguerre_With_Deflation alsl.d $a0, $a1, $s1, 3 addi.d $s3, $a0, -8 lu52i.d $s4, $zero, 1107 - pcalau12i $a0, %pc_hi20(.LCPI249_0) - fld.d $fa4, $a0, %pc_lo12(.LCPI249_0) + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fa4, $a0 lu12i.w $s5, 275200 - pcalau12i $a0, %pc_hi20(.LCPI249_2) - fld.d $fa5, $a0, %pc_lo12(.LCPI249_2) - pcalau12i $a0, %pc_hi20(.LCPI249_3) - fld.d $fs2, $a0, %pc_lo12(.LCPI249_3) - pcalau12i $a0, %pc_hi20(.LCPI249_1) - fld.d $fa6, $a0, %pc_lo12(.LCPI249_1) - movgr2fr.d $fs1, $zero + movgr2fr.d $fs2, $zero ori $s7, $zero, 1 addi.w $s6, $zero, -1 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a3, $a0, 1003 + movgr2fr.d $fa5, $a3 + lu12i.w $a3, -125034 + ori $a3, $a3, 1173 + lu32i.d $a3, 392575 + lu52i.d $a3, $a3, 986 + movgr2fr.d $fs3, $a3 + lu52i.d $a0, $a0, -1045 + movgr2fr.d $fa6, $a0 .LBB249_10: # %.preheader # =>This Loop Header: Depth=1 # Child Loop BB249_12 Depth 2 @@ -62609,18 +62640,18 @@ Laguerre_With_Deflation: # @Laguerre_With_Deflation bstrins.d $a0, $s5, 63, 32 movgr2fr.d $fa1, $a0 fadd.d $fs0, $fa1, $fa0 - fmov.d $fs5, $fs1 + fmov.d $fs5, $fs2 b .LBB249_12 .p2align 4, , 16 .LBB249_11: # in Loop: Header=BB249_12 Depth=2 - fdiv.d $fa0, $fs3, $fa0 + fdiv.d $fa0, $fs4, $fa0 fsub.d $fs5, $fs5, $fa0 fneg.d $fa1, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs1 + fcmp.clt.d $fcc0, $fa0, $fs2 fsel $fa0, $fa0, $fa1, $fcc0 fdiv.d $fa0, $fa0, $fs5 fabs.d $fa0, $fa0 - fcmp.cule.d $fcc0, $fs2, $fa0 + fcmp.cule.d $fcc0, $fs3, $fa0 bceqz $fcc0, .LBB249_22 .LBB249_12: # %.lr.ph107 # Parent Loop BB249_10 Depth=1 @@ -62628,18 +62659,18 @@ Laguerre_With_Deflation: # @Laguerre_With_Deflation # Child Loop BB249_13 Depth 3 move $a0, $s3 move $a3, $s0 - fmov.d $fa0, $fs1 - fmov.d $fs4, $fs1 + fmov.d $fa0, $fs2 + fmov.d $fs1, $fs2 fmov.d $fa1, $fs7 .p2align 4, , 16 .LBB249_13: # Parent Loop BB249_10 Depth=1 # Parent Loop BB249_12 Depth=2 # => This Inner Loop Header: Depth=3 fmul.d $fa0, $fs5, $fa0 - fadd.d $fa0, $fs4, $fa0 + fadd.d $fa0, $fs1, $fa0 fld.d $fa2, $a0, 0 - fmul.d $fa3, $fs5, $fs4 - fadd.d $fs4, $fa1, $fa3 + fmul.d $fa3, $fs5, $fs1 + fadd.d $fs1, $fa1, $fa3 fmul.d $fa1, $fs5, $fa1 fadd.d $fa1, $fa1, $fa2 addi.w $a3, $a3, -1 @@ -62647,13 +62678,13 @@ Laguerre_With_Deflation: # @Laguerre_With_Deflation blt $s7, $a3, .LBB249_13 # %bb.14: # %._crit_edge # in Loop: Header=BB249_12 Depth=2 - fmul.d $fa2, $fs4, $fs6 - fmul.d $fa2, $fs4, $fa2 - fmul.d $fs3, $fa1, $fs0 - fmul.d $fa0, $fs3, $fa0 + fmul.d $fa2, $fs1, $fs6 + fmul.d $fa2, $fs1, $fa2 + fmul.d $fs4, $fa1, $fs0 + fmul.d $fa0, $fs4, $fa0 fsub.d $fa0, $fa2, $fa0 fmul.d $fa1, $fa0, $fs6 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB249_26 # %bb.15: # in Loop: Header=BB249_12 Depth=2 fsqrt.d $fa0, $fa1 @@ -62661,10 +62692,10 @@ Laguerre_With_Deflation: # @Laguerre_With_Deflation bceqz $fcc0, .LBB249_19 # %bb.16: # %.split # in Loop: Header=BB249_12 Depth=2 - fcmp.cule.d $fcc0, $fs4, $fs1 + fcmp.cule.d $fcc0, $fs1, $fs2 bcnez $fcc0, .LBB249_20 .LBB249_17: # in Loop: Header=BB249_12 Depth=2 - fadd.d $fa0, $fs4, $fa0 + fadd.d $fa0, $fs1, $fa0 fcmp.cule.d $fcc0, $fa5, $fa0 bcnez $fcc0, .LBB249_11 # %bb.18: # in Loop: Header=BB249_12 Depth=2 @@ -62685,11 +62716,11 @@ Laguerre_With_Deflation: # @Laguerre_With_Deflation fld.d $fa4, $fp, -184 # 8-byte Folded Reload ld.d $a1, $fp, -176 # 8-byte Folded Reload ld.d $a2, $fp, -168 # 8-byte Folded Reload - fcmp.cule.d $fcc0, $fs4, $fs1 + fcmp.cule.d $fcc0, $fs1, $fs2 bceqz $fcc0, .LBB249_17 .p2align 4, , 16 .LBB249_20: # in Loop: Header=BB249_12 Depth=2 - fsub.d $fa0, $fs4, $fa0 + fsub.d $fa0, $fs1, $fa0 fcmp.cule.d $fcc0, $fa0, $fa6 bcnez $fcc0, .LBB249_11 # %bb.21: # in Loop: Header=BB249_12 Depth=2 @@ -62752,12 +62783,7 @@ Laguerre_With_Deflation: # @Laguerre_With_Deflation .Lfunc_end249: .size Laguerre_With_Deflation, .Lfunc_end249-Laguerre_With_Deflation # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Newton_Raphson -.LCPI250_0: - .dword 0x3bc79ca10c924223 # double 9.9999999999999995E-21 - .text - .p2align 5 + .p2align 5 # -- Begin function Newton_Raphson .type Newton_Raphson,@function Newton_Raphson: # @Newton_Raphson # %bb.0: @@ -62820,10 +62846,13 @@ Newton_Raphson: # @Newton_Raphson addi.d $a5, $a1, 1 alsl.d $a0, $a1, $a0, 2 addi.d $a0, $a0, -4 - pcalau12i $a6, %pc_hi20(.LCPI250_0) - fld.d $fa1, $a6, %pc_lo12(.LCPI250_0) - movgr2fr.d $fa2, $zero + movgr2fr.d $fa1, $zero ori $a6, $zero, 1 + lu12i.w $a7, 51492 + ori $a7, $a7, 547 + lu32i.d $a7, 498849 + lu52i.d $a7, $a7, 956 + movgr2fr.d $fa2, $a7 ori $a7, $zero, 41 .p2align 4, , 16 .LBB250_9: # %.preheader51.us @@ -62831,7 +62860,7 @@ Newton_Raphson: # @Newton_Raphson # Child Loop BB250_10 Depth 2 # Child Loop BB250_11 Depth 3 move $t0, $zero - fmov.d $fa3, $fa2 + fmov.d $fa3, $fa1 .p2align 4, , 16 .LBB250_10: # %.lr.ph58.us.us # Parent Loop BB250_9 Depth=1 @@ -62843,7 +62872,7 @@ Newton_Raphson: # @Newton_Raphson move $t2, $a0 move $t3, $a5 fmov.d $fa5, $fa0 - fmov.d $fa6, $fa2 + fmov.d $fa6, $fa1 .p2align 4, , 16 .LBB250_11: # Parent Loop BB250_9 Depth=1 # Parent Loop BB250_10 Depth=2 @@ -62870,7 +62899,7 @@ Newton_Raphson: # @Newton_Raphson # in Loop: Header=BB250_9 Depth=1 beq $a4, $a7, .LBB250_22 # %bb.14: # in Loop: Header=BB250_9 Depth=1 - fcmp.clt.d $fcc0, $fa1, $fa3 + fcmp.clt.d $fcc0, $fa2, $fa3 addi.w $a4, $a4, 1 bcnez $fcc0, .LBB250_9 # %bb.15: # %.lr.ph67.preheader @@ -62938,14 +62967,7 @@ comp: # @comp .Lfunc_end251: .size comp, .Lfunc_end251-comp # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function floor1_fit -.LCPI252_0: - .word 0x40ea0ea1 # float 7.31428576 -.LCPI252_1: - .word 0x447fe000 # float 1023.5 - .text - .globl floor1_fit + .globl floor1_fit # -- Begin function floor1_fit .p2align 5 .type floor1_fit,@function floor1_fit: # @floor1_fit @@ -62962,8 +62984,10 @@ floor1_fit: # @floor1_fit st.d $s6, $sp, 1960 # 8-byte Folded Spill st.d $s7, $sp, 1952 # 8-byte Folded Spill st.d $s8, $sp, 1944 # 8-byte Folded Spill + fst.d $fs0, $sp, 1936 # 8-byte Folded Spill + fst.d $fs1, $sp, 1928 # 8-byte Folded Spill addi.d $sp, $sp, -2048 - addi.d $sp, $sp, -1504 + addi.d $sp, $sp, -1520 move $s1, $a1 ld.d $fp, $a1, 1296 ld.w $s8, $a1, 1284 @@ -63163,6 +63187,11 @@ floor1_fit: # @floor1_fit addi.d $ra, $sp, 96 addi.d $a6, $sp, 1140 addi.d $a7, $sp, 880 + lu12i.w $a0, 265888 + ori $a0, $a0, 3745 + movgr2fr.w $fs0, $a0 + lu12i.w $a0, 280574 + movgr2fr.w $fs1, $a0 st.d $t7, $sp, 32 # 8-byte Folded Spill b .LBB252_31 .p2align 4, , 16 @@ -63186,16 +63215,16 @@ floor1_fit: # @floor1_fit slli.d $a0, $s7, 2 ldx.w $s1, $a0, $a4 ldx.w $s0, $a0, $s6 - slli.d $a3, $s1, 2 - ldx.w $a0, $a3, $ra + slli.d $a2, $s1, 2 + ldx.w $a0, $a2, $ra beq $a0, $s0, .LBB252_30 # %bb.32: # in Loop: Header=BB252_31 Depth=1 - ldx.w $a1, $t7, $a3 - slli.d $a2, $s0, 2 - ldx.w $a0, $t7, $a2 + ldx.w $a1, $t7, $a2 + slli.d $a3, $s0, 2 + ldx.w $a0, $t7, $a3 st.d $a0, $sp, 64 # 8-byte Folded Spill - ldx.w $a4, $a3, $a6 - ldx.w $a0, $a3, $a7 + ldx.w $a4, $a2, $a6 + ldx.w $a0, $a2, $a7 alsl.d $a5, $s1, $ra, 2 st.w $s0, $a5, 0 bltz $a4, .LBB252_35 @@ -63206,8 +63235,8 @@ floor1_fit: # @floor1_fit bstrpick.d $a0, $a0, 31, 1 .LBB252_35: # %post_Y.exit # in Loop: Header=BB252_31 Depth=1 - ldx.w $a5, $a2, $a6 - ldx.w $a4, $a2, $a7 + ldx.w $a5, $a3, $a6 + ldx.w $a4, $a3, $a7 bltz $a5, .LBB252_40 .LBB252_36: # in Loop: Header=BB252_31 Depth=1 bltz $a4, .LBB252_39 @@ -63217,8 +63246,8 @@ floor1_fit: # @floor1_fit b .LBB252_40 .LBB252_38: # in Loop: Header=BB252_31 Depth=1 move $a0, $a4 - ldx.w $a5, $a2, $a6 - ldx.w $a4, $a2, $a7 + ldx.w $a5, $a3, $a6 + ldx.w $a4, $a3, $a7 bgez $a5, .LBB252_36 b .LBB252_40 .LBB252_39: # in Loop: Header=BB252_31 Depth=1 @@ -63233,30 +63262,26 @@ floor1_fit: # @floor1_fit addi.w $a6, $a4, 0 beq $a6, $a5, .LBB252_90 # %bb.42: # in Loop: Header=BB252_31 Depth=1 - ld.d $a7, $sp, 40 # 8-byte Folded Reload - ldx.w $a5, $a7, $a3 - slli.d $a3, $a5, 2 - fldx.s $fa3, $s2, $a3 - pcalau12i $a6, %pc_hi20(.LCPI252_0) - fld.s $fa0, $a6, %pc_lo12(.LCPI252_0) - pcalau12i $a6, %pc_hi20(.LCPI252_1) - fld.s $fa1, $a6, %pc_lo12(.LCPI252_1) - ldx.w $a6, $a7, $a2 - fmul.s $fa2, $fa3, $fa0 - fadd.s $fa2, $fa2, $fa1 - ftintrz.w.s $fa2, $fa2 - movfr2gr.s $a2, $fa2 - srai.d $a7, $a2, 63 - andn $a2, $a2, $a7 - fldx.s $fa4, $s3, $a3 - fld.s $fa2, $fp, 1112 - slti $a3, $a2, 1023 - maskeqz $a2, $a2, $a3 + ld.d $a6, $sp, 40 # 8-byte Folded Reload + ldx.w $a5, $a6, $a2 + slli.d $a2, $a5, 2 + fldx.s $fa1, $s2, $a2 + ldx.w $a6, $a6, $a3 + fmul.s $fa0, $fa1, $fs0 + fadd.s $fa0, $fa0, $fs1 + ftintrz.w.s $fa0, $fa0 + movfr2gr.s $a3, $fa0 + srai.d $a7, $a3, 63 + andn $a3, $a3, $a7 + fldx.s $fa2, $s3, $a2 + fld.s $fa0, $fp, 1112 + slti $a2, $a3, 1023 + maskeqz $a3, $a3, $a2 ori $a7, $zero, 1023 - masknez $a3, $a7, $a3 - fadd.s $fa4, $fa4, $fa2 - fcmp.cult.s $fcc0, $fa4, $fa3 - or $a2, $a2, $a3 + masknez $a2, $a7, $a2 + fadd.s $fa2, $fa2, $fa0 + fcmp.cult.s $fcc0, $fa2, $fa1 + or $a2, $a3, $a2 bceqz $fcc0, .LBB252_50 .LBB252_43: # in Loop: Header=BB252_31 Depth=1 sub.d $a2, $a0, $a2 @@ -63301,58 +63326,58 @@ floor1_fit: # @floor1_fit # => This Inner Loop Header: Depth=2 add.w $t2, $t2, $t0 slt $t3, $t2, $a3 - fld.s $fa3, $t1, 0 + fld.s $fa1, $t1, 0 masknez $t4, $a7, $t3 add.d $a0, $a0, $a4 add.w $a0, $a0, $t4 - fmul.s $fa4, $fa3, $fa0 - fadd.s $fa4, $fa4, $fa1 - ftintrz.w.s $fa4, $fa4 - movfr2gr.s $t5, $fa4 + fmul.s $fa2, $fa1, $fs0 + fadd.s $fa2, $fa2, $fs1 + ftintrz.w.s $fa2, $fa2 + movfr2gr.s $t5, $fa2 srai.d $t4, $t5, 63 andn $t4, $t5, $t4 - fld.s $fa4, $a6, 0 + fld.s $fa2, $a6, 0 slti $t6, $t4, 1023 maskeqz $t4, $t4, $t6 ori $t8, $zero, 1023 masknez $t6, $t8, $t6 - fadd.s $fa4, $fa2, $fa4 - fcmp.cult.s $fcc0, $fa4, $fa3 + fadd.s $fa2, $fa0, $fa2 + fcmp.cult.s $fcc0, $fa2, $fa1 or $t4, $t4, $t6 bcnez $fcc0, .LBB252_45 # %bb.47: # in Loop: Header=BB252_46 Depth=2 blez $t5, .LBB252_45 # %bb.48: # in Loop: Header=BB252_46 Depth=2 - fld.s $fa4, $fp, 1096 - movgr2fr.w $fa3, $a0 - ffint.s.w $fa3, $fa3 - fadd.s $fa5, $fa4, $fa3 + fld.s $fa2, $fp, 1096 + movgr2fr.w $fa1, $a0 + ffint.s.w $fa1, $fa1 + fadd.s $fa3, $fa2, $fa1 bstrpick.d $t5, $t4, 31, 0 - movgr2fr.d $fa4, $t5 - ffint.s.l $fa4, $fa4 - fcmp.clt.s $fcc0, $fa5, $fa4 + movgr2fr.d $fa2, $t5 + ffint.s.l $fa2, $fa2 + fcmp.clt.s $fcc0, $fa3, $fa2 bcnez $fcc0, .LBB252_55 # %bb.49: # in Loop: Header=BB252_46 Depth=2 - fld.s $fa5, $fp, 1100 - fsub.s $fa3, $fa3, $fa5 - fcmp.clt.s $fcc0, $fa4, $fa3 + fld.s $fa3, $fp, 1100 + fsub.s $fa1, $fa1, $fa3 + fcmp.clt.s $fcc0, $fa2, $fa1 bceqz $fcc0, .LBB252_45 b .LBB252_55 .p2align 4, , 16 .LBB252_50: # in Loop: Header=BB252_31 Depth=1 - fld.s $fa4, $fp, 1096 - movgr2fr.w $fa3, $a0 - ffint.s.w $fa3, $fa3 - fadd.s $fa5, $fa4, $fa3 + fld.s $fa2, $fp, 1096 + movgr2fr.w $fa1, $a0 + ffint.s.w $fa1, $fa1 + fadd.s $fa3, $fa2, $fa1 bstrpick.d $a3, $a2, 31, 0 - movgr2fr.d $fa4, $a3 - ffint.s.l $fa4, $fa4 - fcmp.clt.s $fcc0, $fa5, $fa4 + movgr2fr.d $fa2, $a3 + ffint.s.l $fa2, $fa2 + fcmp.clt.s $fcc0, $fa3, $fa2 bcnez $fcc0, .LBB252_55 # %bb.51: # in Loop: Header=BB252_31 Depth=1 - fld.s $fa5, $fp, 1100 - fsub.s $fa3, $fa3, $fa5 - fcmp.clt.s $fcc0, $fa4, $fa3 + fld.s $fa3, $fp, 1100 + fsub.s $fa1, $fa1, $fa3 + fcmp.clt.s $fcc0, $fa2, $fa1 bceqz $fcc0, .LBB252_43 b .LBB252_55 .p2align 4, , 16 @@ -63484,7 +63509,9 @@ floor1_fit: # @floor1_fit move $a0, $zero .LBB252_70: # %.loopexit175.thread addi.d $sp, $sp, 2032 - addi.d $sp, $sp, 1520 + addi.d $sp, $sp, 1536 + fld.d $fs1, $sp, 1928 # 8-byte Folded Reload + fld.d $fs0, $sp, 1936 # 8-byte Folded Reload ld.d $s8, $sp, 1944 # 8-byte Folded Reload ld.d $s7, $sp, 1952 # 8-byte Folded Reload ld.d $s6, $sp, 1960 # 8-byte Folded Reload @@ -63656,14 +63683,7 @@ floor1_fit: # @floor1_fit .Lfunc_end252: .size floor1_fit, .Lfunc_end252-floor1_fit # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function accumulate_fit -.LCPI253_0: - .word 0x40ea0ea1 # float 7.31428576 -.LCPI253_1: - .word 0x447fe000 # float 1023.5 - .text - .p2align 5 + .p2align 5 # -- Begin function accumulate_fit .type accumulate_fit,@function accumulate_fit: # @accumulate_fit # %bb.0: @@ -63757,12 +63777,13 @@ accumulate_fit: # @accumulate_fit move $t1, $zero move $t2, $zero addi.d $fp, $fp, 1 - pcalau12i $s0, %pc_hi20(.LCPI253_0) - fld.s $fa0, $s0, %pc_lo12(.LCPI253_0) - pcalau12i $s0, %pc_hi20(.LCPI253_1) - fld.s $fa1, $s0, %pc_lo12(.LCPI253_1) alsl.d $a1, $a2, $a1, 2 alsl.d $a0, $a2, $a0, 2 + lu12i.w $s0, 265888 + ori $s0, $s0, 3745 + movgr2fr.w $fa0, $s0 + lu12i.w $s0, 280574 + movgr2fr.w $fa1, $s0 ori $s0, $zero, 1023 b .LBB253_6 .p2align 4, , 16 @@ -69263,12 +69284,7 @@ _encodepart: # @_encodepart .Lfunc_end273: .size _encodepart, .Lfunc_end273-_encodepart # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function res1_class -.LCPI274_0: - .dword 0x4059000000000000 # double 100 - .text - .globl res1_class + .globl res1_class # -- Begin function res1_class .p2align 5 .type res1_class,@function res1_class: # @res1_class @@ -69355,12 +69371,14 @@ res1_class: # @res1_class # %bb.11: # %.lr.ph.i st.d $s7, $sp, 16 # 8-byte Folded Spill sub.w $a1, $fp, $s4 - pcalau12i $a2, %pc_hi20(.LCPI274_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI274_0) div.w $s5, $a1, $s6 - movgr2fr.w $fa1, $s6 - ffint.d.w $fa1, $fa1 - fdiv.d $fa0, $fa0, $fa1 + movgr2fr.w $fa0, $s6 + ffint.d.w $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fdiv.d $fa0, $fa1, $fa0 fcvt.s.d $fs0, $fa0 slli.d $s2, $s5, 3 st.d $a0, $sp, 24 # 8-byte Folded Spill @@ -71349,35 +71367,28 @@ mapping0_free_info: # @mapping0_free_info .Lfunc_end281: .size mapping0_free_info, .Lfunc_end281-mapping0_free_info # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function mapping0_forward -.LCPI282_0: - .word 0x3540a8c1 # float 7.1771143E-7 -.LCPI282_1: - .word 0xc43f115b # float -764.271179 - .text - .p2align 5 + .p2align 5 # -- Begin function mapping0_forward .type mapping0_forward,@function mapping0_forward: # @mapping0_forward # %bb.0: - addi.d $sp, $sp, -496 - st.d $ra, $sp, 488 # 8-byte Folded Spill - st.d $fp, $sp, 480 # 8-byte Folded Spill - st.d $s0, $sp, 472 # 8-byte Folded Spill - st.d $s1, $sp, 464 # 8-byte Folded Spill - st.d $s2, $sp, 456 # 8-byte Folded Spill - st.d $s3, $sp, 448 # 8-byte Folded Spill - st.d $s4, $sp, 440 # 8-byte Folded Spill - st.d $s5, $sp, 432 # 8-byte Folded Spill - st.d $s6, $sp, 424 # 8-byte Folded Spill - st.d $s7, $sp, 416 # 8-byte Folded Spill - st.d $s8, $sp, 408 # 8-byte Folded Spill - fst.d $fs0, $sp, 400 # 8-byte Folded Spill - fst.d $fs1, $sp, 392 # 8-byte Folded Spill - fst.d $fs2, $sp, 384 # 8-byte Folded Spill - fst.d $fs3, $sp, 376 # 8-byte Folded Spill - fst.d $fs4, $sp, 368 # 8-byte Folded Spill - addi.d $fp, $sp, 496 + addi.d $sp, $sp, -464 + st.d $ra, $sp, 456 # 8-byte Folded Spill + st.d $fp, $sp, 448 # 8-byte Folded Spill + st.d $s0, $sp, 440 # 8-byte Folded Spill + st.d $s1, $sp, 432 # 8-byte Folded Spill + st.d $s2, $sp, 424 # 8-byte Folded Spill + st.d $s3, $sp, 416 # 8-byte Folded Spill + st.d $s4, $sp, 408 # 8-byte Folded Spill + st.d $s5, $sp, 400 # 8-byte Folded Spill + st.d $s6, $sp, 392 # 8-byte Folded Spill + st.d $s7, $sp, 384 # 8-byte Folded Spill + st.d $s8, $sp, 376 # 8-byte Folded Spill + fst.d $fs0, $sp, 368 # 8-byte Folded Spill + fst.d $fs1, $sp, 360 # 8-byte Folded Spill + fst.d $fs2, $sp, 352 # 8-byte Folded Spill + fst.d $fs3, $sp, 344 # 8-byte Folded Spill + fst.d $fs4, $sp, 336 # 8-byte Folded Spill + addi.d $fp, $sp, 464 move $s5, $a0 ld.d $a0, $a0, 104 ld.d $s7, $a0, 8 @@ -71387,7 +71398,7 @@ mapping0_forward: # @mapping0_forward st.d $a0, $fp, -152 # 8-byte Folded Spill ld.d $s4, $s5, 184 ld.w $a1, $s7, 4 - ld.w $s8, $s5, 72 + ld.w $s6, $s5, 72 slli.d $a0, $a1, 2 addi.d $a0, $a0, 15 bstrins.d $a0, $zero, 3, 0 @@ -71395,11 +71406,11 @@ mapping0_forward: # @mapping0_forward st.d $a0, $fp, -192 # 8-byte Folded Spill move $sp, $a0 ld.d $s0, $s5, 120 - ld.d $a0, $s5, 128 + ld.d $a4, $s5, 128 ld.d $s1, $s5, 112 alsl.d $a2, $a1, $s0, 3 slli.d $s2, $a1, 3 - bge $a0, $a2, .LBB282_4 + bge $a4, $a2, .LBB282_4 # %bb.1: beqz $s1, .LBB282_3 # %bb.2: @@ -71421,14 +71432,14 @@ mapping0_forward: # @mapping0_forward move $s1, $a0 move $s0, $zero st.d $a0, $s5, 112 - move $a0, $s2 + move $a4, $s2 .LBB282_4: # %_vorbis_block_alloc.exit add.d $s3, $s0, $s2 st.d $s3, $s5, 120 ld.w $a1, $s7, 4 alsl.d $a2, $a1, $s3, 3 slli.d $s2, $a1, 3 - bge $a0, $a2, .LBB282_8 + bge $a4, $a2, .LBB282_8 # %bb.5: beqz $s1, .LBB282_7 # %bb.6: @@ -71447,49 +71458,49 @@ mapping0_forward: # @mapping0_forward move $a0, $s2 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - move $s6, $a0 move $s3, $zero st.d $a0, $s5, 112 - move $a0, $s2 + move $a4, $s2 b .LBB282_9 .LBB282_8: - move $s6, $s1 + move $a0, $s1 .LBB282_9: # %_vorbis_block_alloc.exit371 - st.d $s3, $fp, -488 # 8-byte Folded Spill - add.d $s2, $s3, $s2 - st.d $s2, $s5, 120 + st.d $s3, $fp, -456 # 8-byte Folded Spill + add.d $s8, $s3, $s2 + st.d $s8, $s5, 120 ld.w $a1, $s7, 4 - alsl.d $a2, $a1, $s2, 3 + alsl.d $a2, $a1, $s8, 3 slli.d $s3, $a1, 3 - bge $a0, $a2, .LBB282_13 + st.d $a0, $fp, -464 # 8-byte Folded Spill + bge $a4, $a2, .LBB282_13 # %bb.10: - beqz $s6, .LBB282_12 + beqz $a0, .LBB282_12 # %bb.11: + move $s2, $a0 ori $a0, $zero, 16 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 ld.d $a1, $s5, 136 ld.d $a2, $s5, 144 - add.d $a1, $a1, $s2 + add.d $a1, $a1, $s8 st.d $a1, $s5, 136 st.d $a2, $a0, 8 - st.d $s6, $a0, 0 + st.d $s2, $a0, 0 st.d $a0, $s5, 144 .LBB282_12: st.d $s3, $s5, 128 move $a0, $s3 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - move $s2, $zero + move $s8, $zero st.d $a0, $fp, -200 # 8-byte Folded Spill st.d $a0, $s5, 112 - move $a0, $s3 + move $a4, $s3 b .LBB282_14 .LBB282_13: - st.d $s6, $fp, -200 # 8-byte Folded Spill + st.d $a0, $fp, -200 # 8-byte Folded Spill .LBB282_14: # %_vorbis_block_alloc.exit373 - st.d $s6, $fp, -496 # 8-byte Folded Spill - add.d $s3, $s2, $s3 + add.d $s3, $s8, $s3 st.d $s3, $s5, 120 ld.w $a1, $s7, 4 fld.s $fs0, $s4, 8 @@ -71498,61 +71509,63 @@ mapping0_forward: # @mapping0_forward slli.d $a1, $a1, 2 addi.d $a1, $a1, 15 bstrins.d $a1, $zero, 3, 0 - sub.d $a1, $sp, $a1 - st.d $a1, $fp, -344 # 8-byte Folded Spill - move $sp, $a1 - ld.d $a4, $s5, 56 - st.d $s4, $fp, -448 # 8-byte Folded Spill + sub.d $a0, $sp, $a1 + st.d $a0, $fp, -344 # 8-byte Folded Spill + move $sp, $a0 + ld.d $a0, $s5, 56 + st.d $s4, $fp, -432 # 8-byte Folded Spill ld.w $a1, $s4, 12 st.d $a1, $fp, -184 # 8-byte Folded Spill - addi.w $a1, $a4, 0 + addi.w $a1, $a0, 0 st.d $a1, $fp, -376 # 8-byte Folded Spill ld.d $a2, $fp, -368 # 8-byte Folded Reload alsl.d $a1, $a1, $a2, 3 ld.d $a1, $a1, 808 st.d $a1, $fp, -232 # 8-byte Folded Spill ld.d $a3, $fp, -152 # 8-byte Folded Reload - ld.d $a1, $a3, 104 - st.d $a1, $fp, -208 # 8-byte Folded Spill - st.d $a4, $fp, -392 # 8-byte Folded Spill - st.w $a4, $s5, 76 + ld.d $s2, $a3, 104 + st.d $a0, $fp, -392 # 8-byte Folded Spill + st.w $a0, $s5, 76 ld.w $a2, $s7, 4 - bstrpick.d $a1, $s8, 31, 31 - pcalau12i $a4, %pc_hi20(.LCPI282_0) - pcalau12i $a5, %pc_hi20(.LCPI282_1) + bstrpick.d $a1, $s6, 31, 31 + lu12i.w $a5, 218122 + lu12i.w $a6, -244751 st.d $s5, $fp, -144 # 8-byte Folded Spill st.d $s7, $fp, -160 # 8-byte Folded Spill - st.d $a4, $fp, -400 # 8-byte Folded Spill - st.d $a5, $fp, -408 # 8-byte Folded Spill - st.d $s8, $fp, -328 # 8-byte Folded Spill + st.d $s6, $fp, -328 # 8-byte Folded Spill blez $a2, .LBB282_29 # %bb.15: # %.lr.ph445 st.d $s2, $fp, -224 # 8-byte Folded Spill + st.d $s8, $fp, -208 # 8-byte Folded Spill + move $a4, $s6 move $s6, $zero - movgr2fr.w $fa0, $s8 + movgr2fr.w $fa0, $a4 ffint.s.w $fa0, $fa0 vldi $vr1, -1264 fdiv.s $fa0, $fa1, $fa0 movfr2gr.s $a0, $fa0 - add.w $a1, $s8, $a1 + add.w $a1, $a4, $a1 srai.d $a1, $a1, 1 st.d $a1, $fp, -336 # 8-byte Folded Spill slli.d $a1, $a1, 2 addi.d $a2, $a1, 7 bstrins.d $a2, $zero, 2, 0 bstrpick.d $a0, $a0, 30, 0 - fld.s $fs1, $a4, %pc_lo12(.LCPI282_0) - fld.s $fs2, $a5, %pc_lo12(.LCPI282_1) movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 + ori $a0, $a5, 2241 + movgr2fr.w $fs1, $a0 fmul.s $fa0, $fa0, $fs1 + ori $a0, $a6, 347 + lu32i.d $a0, 0 + movgr2fr.w $fs2, $a0 fadd.s $fs3, $fa0, $fs2 addi.d $a0, $a3, 8 st.d $a0, $fp, -168 # 8-byte Folded Spill addi.d $a0, $a3, 16 st.d $a0, $fp, -176 # 8-byte Folded Spill - addi.d $s2, $a3, 32 - addi.w $s8, $s8, -1 + addi.d $s8, $a3, 32 + addi.w $s2, $a4, -1 movgr2fr.w $fs4, $zero move $s5, $a2 b .LBB282_17 @@ -71630,7 +71643,7 @@ mapping0_forward: # @mapping0_forward ld.d $a0, $s4, 56 slli.d $a1, $a0, 4 alsl.d $a0, $a0, $a1, 3 - add.d $a0, $s2, $a0 + add.d $a0, $s8, $a0 move $a1, $s1 pcaddu18i $ra, %call36(drft_forward) jirl $ra, $ra, 0 @@ -71678,7 +71691,7 @@ mapping0_forward: # @mapping0_forward addi.d $a3, $a3, 2 addi.d $a1, $a1, 4 addi.d $a2, $a2, 8 - blt $a3, $s8, .LBB282_24 + blt $a3, $s2, .LBB282_24 # %bb.25: # %._crit_edge # in Loop: Header=BB282_17 Depth=1 fst.s $fa0, $a0, 0 @@ -71693,13 +71706,14 @@ mapping0_forward: # @mapping0_forward .LBB282_28: # %._crit_edge446.loopexit ld.d $s5, $fp, -144 # 8-byte Folded Reload ld.d $s3, $s5, 120 - ld.d $a0, $s5, 128 + ld.d $a4, $s5, 128 ld.d $s6, $s5, 112 ld.d $s7, $fp, -160 # 8-byte Folded Reload + ld.d $s8, $fp, -208 # 8-byte Folded Reload ld.d $s2, $fp, -224 # 8-byte Folded Reload b .LBB282_30 .LBB282_29: # %_vorbis_block_alloc.exit373.._crit_edge446_crit_edge - add.w $a1, $s8, $a1 + add.w $a1, $s6, $a1 srai.d $a1, $a1, 1 st.d $a1, $fp, -336 # 8-byte Folded Spill slli.d $a1, $a1, 2 @@ -71707,15 +71721,15 @@ mapping0_forward: # @mapping0_forward bstrins.d $a2, $zero, 2, 0 ld.d $s6, $fp, -200 # 8-byte Folded Reload .LBB282_30: # %._crit_edge446 - ld.d $a1, $fp, -184 # 8-byte Folded Reload - ori $a3, $zero, 88 - mul.d $s0, $a1, $a3 - ld.d $a1, $fp, -392 # 8-byte Folded Reload - sltui $s1, $a1, 1 + ld.d $a0, $fp, -184 # 8-byte Folded Reload + ori $a1, $zero, 88 + mul.d $s0, $a0, $a1 + ld.d $a0, $fp, -392 # 8-byte Folded Reload + sltui $s1, $a0, 1 add.d $a1, $s3, $a2 - ori $a3, $zero, 176 + ori $s4, $zero, 176 st.d $a2, $fp, -184 # 8-byte Folded Spill - bge $a0, $a1, .LBB282_34 + bge $a4, $a1, .LBB282_34 # %bb.31: beqz $s6, .LBB282_33 # %bb.32: @@ -71731,7 +71745,6 @@ mapping0_forward: # @mapping0_forward st.d $s6, $a0, 0 st.d $a0, $s5, 144 .LBB282_33: - move $s4, $s2 st.d $a2, $s5, 128 move $a0, $a2 pcaddu18i $ra, %call36(malloc) @@ -71740,21 +71753,16 @@ mapping0_forward: # @mapping0_forward move $s6, $a0 move $s3, $zero st.d $a0, $s5, 112 - move $a0, $a2 - ori $a3, $zero, 176 - b .LBB282_35 -.LBB282_34: - move $s4, $s2 -.LBB282_35: # %_vorbis_block_alloc.exit377 - ld.d $a1, $fp, -208 # 8-byte Folded Reload - add.d $s2, $a1, $s0 + move $a4, $a2 +.LBB282_34: # %_vorbis_block_alloc.exit377 + add.d $s2, $s2, $s0 add.d $s0, $s3, $a2 add.d $a1, $s0, $a2 - masknez $s1, $a3, $s1 - bge $a0, $a1, .LBB282_39 + masknez $s1, $s4, $s1 + bge $a4, $a1, .LBB282_38 +# %bb.35: + beqz $s6, .LBB282_37 # %bb.36: - beqz $s6, .LBB282_38 -# %bb.37: ori $a0, $zero, 16 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 @@ -71766,7 +71774,7 @@ mapping0_forward: # @mapping0_forward ld.d $a2, $fp, -184 # 8-byte Folded Reload st.d $s6, $a0, 0 st.d $a0, $s5, 144 -.LBB282_38: +.LBB282_37: st.d $a2, $s5, 128 move $a0, $a2 pcaddu18i $ra, %call36(malloc) @@ -71774,15 +71782,15 @@ mapping0_forward: # @mapping0_forward ld.d $a2, $fp, -184 # 8-byte Folded Reload move $s0, $zero st.d $a0, $s5, 112 - b .LBB282_40 -.LBB282_39: + b .LBB282_39 +.LBB282_38: move $a0, $s6 -.LBB282_40: # %_vorbis_block_alloc.exit379 +.LBB282_39: # %_vorbis_block_alloc.exit379 add.d $a1, $s0, $a2 st.d $a1, $s5, 120 ld.w $a1, $s7, 4 ld.d $a2, $fp, -200 # 8-byte Folded Reload - add.d $a2, $a2, $s4 + add.d $a2, $a2, $s8 st.d $a2, $fp, -240 # 8-byte Folded Spill add.d $a2, $s2, $s1 st.d $a2, $fp, -224 # 8-byte Folded Spill @@ -71791,31 +71799,34 @@ mapping0_forward: # @mapping0_forward st.d $a2, $fp, -248 # 8-byte Folded Spill ld.d $a2, $fp, -336 # 8-byte Folded Reload slli.d $s2, $a2, 2 - blez $a1, .LBB282_131 -# %bb.41: # %.lr.ph454 + blez $a1, .LBB282_130 +# %bb.40: # %.lr.ph454 move $a1, $zero add.d $a2, $s6, $s3 st.d $a2, $fp, -384 # 8-byte Folded Spill add.d $t1, $a0, $s0 ld.d $a0, $fp, -232 # 8-byte Folded Reload addi.d $a0, $a0, 4 - st.d $a0, $fp, -416 # 8-byte Folded Spill + st.d $a0, $fp, -400 # 8-byte Folded Spill ld.d $a0, $fp, -368 # 8-byte Folded Reload addi.d $a0, $a0, 1320 - st.d $a0, $fp, -424 # 8-byte Folded Spill + st.d $a0, $fp, -408 # 8-byte Folded Spill ld.d $a0, $fp, -336 # 8-byte Folded Reload bstrpick.d $a2, $a0, 31, 0 - st.d $a2, $fp, -432 # 8-byte Folded Spill + st.d $a2, $fp, -416 # 8-byte Folded Spill bstrpick.d $a0, $a0, 31, 2 slli.d $a0, $a0, 2 - st.d $a0, $fp, -440 # 8-byte Folded Spill + st.d $a0, $fp, -424 # 8-byte Folded Spill lu12i.w $a0, 218122 ori $a0, $a0, 2241 - vreplgr2vr.w $vr6, $a0 - lu12i.w $a0, -244751 - ori $a0, $a0, 347 - vreplgr2vr.w $vr0, $a0 - vst $vr0, $fp, -480 # 16-byte Folded Spill + lu12i.w $a2, -244751 + ori $a2, $a2, 347 + st.d $a0, $fp, -440 # 8-byte Folded Spill + movgr2fr.w $fs1, $a0 + st.d $a2, $fp, -448 # 8-byte Folded Spill + move $a0, $a2 + lu32i.d $a0, 0 + movgr2fr.w $fs2, $a0 lu12i.w $a0, -16 lu32i.d $a0, 0 st.d $a0, $fp, -176 # 8-byte Folded Spill @@ -71825,39 +71836,38 @@ mapping0_forward: # @mapping0_forward lu12i.w $s8, 8 lu12i.w $a0, 7 ori $a0, $a0, 4095 - vreplgr2vr.w $vr7, $a0 - vreplgr2vr.w $vr8, $s8 - vrepli.b $vr9, -1 + vreplgr2vr.w $vr6, $a0 + vreplgr2vr.w $vr7, $s8 + vrepli.b $vr8, -1 st.d $t1, $fp, -360 # 8-byte Folded Spill - vst $vr6, $fp, -464 # 16-byte Folded Spill - vst $vr7, $fp, -272 # 16-byte Folded Spill - vst $vr8, $fp, -288 # 16-byte Folded Spill - vst $vr9, $fp, -304 # 16-byte Folded Spill - b .LBB282_43 - .p2align 4, , 16 -.LBB282_42: # %.loopexit434 - # in Loop: Header=BB282_43 Depth=1 + vst $vr6, $fp, -272 # 16-byte Folded Spill + vst $vr7, $fp, -288 # 16-byte Folded Spill + vst $vr8, $fp, -304 # 16-byte Folded Spill + b .LBB282_42 + .p2align 4, , 16 +.LBB282_41: # %.loopexit434 + # in Loop: Header=BB282_42 Depth=1 ld.d $a0, $fp, -160 # 8-byte Folded Reload ld.w $a0, $a0, 4 ld.d $a1, $fp, -352 # 8-byte Folded Reload addi.d $a1, $a1, 1 - bge $a1, $a0, .LBB282_131 -.LBB282_43: # =>This Loop Header: Depth=1 - # Child Loop BB282_129 Depth 2 - # Child Loop BB282_53 Depth 2 - # Child Loop BB282_56 Depth 2 - # Child Loop BB282_62 Depth 2 - # Child Loop BB282_65 Depth 2 - # Child Loop BB282_69 Depth 2 - # Child Loop BB282_88 Depth 3 - # Child Loop BB282_81 Depth 3 - # Child Loop BB282_100 Depth 2 - # Child Loop BB282_119 Depth 3 - # Child Loop BB282_112 Depth 3 + bge $a1, $a0, .LBB282_130 +.LBB282_42: # =>This Loop Header: Depth=1 + # Child Loop BB282_128 Depth 2 + # Child Loop BB282_52 Depth 2 + # Child Loop BB282_55 Depth 2 + # Child Loop BB282_61 Depth 2 + # Child Loop BB282_64 Depth 2 + # Child Loop BB282_68 Depth 2 + # Child Loop BB282_87 Depth 3 + # Child Loop BB282_80 Depth 3 + # Child Loop BB282_99 Depth 2 + # Child Loop BB282_118 Depth 3 + # Child Loop BB282_111 Depth 3 slli.d $s0, $a1, 2 ld.d $a2, $fp, -144 # 8-byte Folded Reload ld.d $a0, $a2, 0 - ld.d $a3, $fp, -416 # 8-byte Folded Reload + ld.d $a3, $fp, -400 # 8-byte Folded Reload ldx.w $a3, $a3, $s0 st.d $a3, $fp, -168 # 8-byte Folded Spill st.d $a1, $fp, -352 # 8-byte Folded Spill @@ -71871,11 +71881,11 @@ mapping0_forward: # @mapping0_forward addi.d $a1, $s7, 120 ld.d $a3, $fp, -392 # 8-byte Folded Reload st.w $a3, $a2, 76 - bge $a0, $a1, .LBB282_47 -# %bb.44: # in Loop: Header=BB282_43 Depth=1 + bge $a0, $a1, .LBB282_46 +# %bb.43: # in Loop: Header=BB282_42 Depth=1 ld.d $s4, $fp, -144 # 8-byte Folded Reload - beqz $s1, .LBB282_46 -# %bb.45: # in Loop: Header=BB282_43 Depth=1 + beqz $s1, .LBB282_45 +# %bb.44: # in Loop: Header=BB282_42 Depth=1 ori $a0, $zero, 16 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 @@ -71886,7 +71896,7 @@ mapping0_forward: # @mapping0_forward st.d $a2, $a0, 8 st.d $s1, $a0, 0 st.d $a0, $s4, 144 -.LBB282_46: # in Loop: Header=BB282_43 Depth=1 +.LBB282_45: # in Loop: Header=BB282_42 Depth=1 ori $a0, $zero, 120 st.d $a0, $s4, 128 ori $a0, $zero, 120 @@ -71895,12 +71905,12 @@ mapping0_forward: # @mapping0_forward move $s1, $a0 move $s7, $zero st.d $a0, $s4, 112 - b .LBB282_48 + b .LBB282_47 .p2align 4, , 16 -.LBB282_47: # in Loop: Header=BB282_43 Depth=1 +.LBB282_46: # in Loop: Header=BB282_42 Depth=1 ld.d $s4, $fp, -144 # 8-byte Folded Reload -.LBB282_48: # %_vorbis_block_alloc.exit381 - # in Loop: Header=BB282_43 Depth=1 +.LBB282_47: # %_vorbis_block_alloc.exit381 + # in Loop: Header=BB282_42 Depth=1 ld.d $a0, $fp, -336 # 8-byte Folded Reload alsl.d $a2, $a0, $s6, 2 add.d $a0, $s1, $s7 @@ -71916,50 +71926,46 @@ mapping0_forward: # @mapping0_forward ld.d $s1, $fp, -152 # 8-byte Folded Reload ld.d $s4, $fp, -224 # 8-byte Folded Reload ld.d $a0, $fp, -328 # 8-byte Folded Reload - ld.d $a4, $fp, -400 # 8-byte Folded Reload - ld.d $a5, $fp, -408 # 8-byte Folded Reload ld.d $s5, $fp, -384 # 8-byte Folded Reload ori $a1, $zero, 2 - blt $a0, $a1, .LBB282_54 -# %bb.49: # %.lr.ph448.preheader - # in Loop: Header=BB282_43 Depth=1 + blt $a0, $a1, .LBB282_53 +# %bb.48: # %.lr.ph448.preheader + # in Loop: Header=BB282_42 Depth=1 ld.d $a0, $fp, -336 # 8-byte Folded Reload ori $a1, $zero, 4 - bltu $a0, $a1, .LBB282_51 -# %bb.50: # %vector.memcheck621 - # in Loop: Header=BB282_43 Depth=1 + bltu $a0, $a1, .LBB282_50 +# %bb.49: # %vector.memcheck621 + # in Loop: Header=BB282_42 Depth=1 add.d $a0, $s2, $s6 sub.d $a0, $a0, $s3 ori $a1, $zero, 16 - bgeu $a0, $a1, .LBB282_128 -.LBB282_51: # in Loop: Header=BB282_43 Depth=1 + bgeu $a0, $a1, .LBB282_127 +.LBB282_50: # in Loop: Header=BB282_42 Depth=1 move $a2, $zero -.LBB282_52: # %.lr.ph448.preheader688 - # in Loop: Header=BB282_43 Depth=1 +.LBB282_51: # %.lr.ph448.preheader688 + # in Loop: Header=BB282_42 Depth=1 add.d $a0, $s6, $s2 alsl.d $a0, $a2, $a0, 2 alsl.d $a1, $a2, $s3, 2 - ld.d $a3, $fp, -432 # 8-byte Folded Reload + ld.d $a3, $fp, -416 # 8-byte Folded Reload sub.d $a2, $a3, $a2 .p2align 4, , 16 -.LBB282_53: # %.lr.ph448 - # Parent Loop BB282_43 Depth=1 +.LBB282_52: # %.lr.ph448 + # Parent Loop BB282_42 Depth=1 # => This Inner Loop Header: Depth=2 ld.wu $a3, $a1, 0 bstrpick.d $a3, $a3, 30, 0 - fld.s $fa0, $a4, %pc_lo12(.LCPI282_0) - fld.s $fa1, $a5, %pc_lo12(.LCPI282_1) - movgr2fr.w $fa2, $a3 - ffint.s.w $fa2, $fa2 - fmul.s $fa0, $fa2, $fa0 - fadd.s $fa0, $fa0, $fa1 + movgr2fr.w $fa0, $a3 + ffint.s.w $fa0, $fa0 + fmul.s $fa0, $fa0, $fs1 + fadd.s $fa0, $fa0, $fs2 fst.s $fa0, $a0, 0 addi.d $a0, $a0, 4 addi.d $a2, $a2, -1 addi.d $a1, $a1, 4 - bnez $a2, .LBB282_53 -.LBB282_54: # %._crit_edge449 - # in Loop: Header=BB282_43 Depth=1 + bnez $a2, .LBB282_52 +.LBB282_53: # %._crit_edge449 + # in Loop: Header=BB282_42 Depth=1 move $a0, $s4 move $a1, $s7 move $a2, $s5 @@ -71975,15 +71981,15 @@ mapping0_forward: # @mapping0_forward pcaddu18i $ra, %call36(_vp_tonemask) jirl $ra, $ra, 0 ld.w $a1, $s4, 0 - blez $a1, .LBB282_57 -# %bb.55: # %.lr.ph.i - # in Loop: Header=BB282_43 Depth=1 + blez $a1, .LBB282_56 +# %bb.54: # %.lr.ph.i + # in Loop: Header=BB282_42 Depth=1 ld.d $a0, $s4, 8 fld.s $fa0, $a0, 16 move $a0, $zero slli.d $a1, $a1, 2 .p2align 4, , 16 -.LBB282_56: # Parent Loop BB282_43 Depth=1 +.LBB282_55: # Parent Loop BB282_42 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $a2, $s4, 24 ld.d $a2, $a2, 8 @@ -72000,19 +72006,19 @@ mapping0_forward: # @mapping0_forward fsel $fa1, $fa1, $fa2, $fcc0 fstx.s $fa1, $s6, $a0 addi.d $a0, $a0, 4 - bne $a1, $a0, .LBB282_56 -.LBB282_57: # %_vp_offset_and_mix.exit - # in Loop: Header=BB282_43 Depth=1 + bne $a1, $a0, .LBB282_55 +.LBB282_56: # %_vp_offset_and_mix.exit + # in Loop: Header=BB282_42 Depth=1 ld.d $s5, $fp, -168 # 8-byte Folded Reload slli.d $a0, $s5, 2 ld.d $a1, $fp, -248 # 8-byte Folded Reload ldx.w $a0, $a1, $a0 slli.d $a1, $a0, 2 - ld.d $a2, $fp, -424 # 8-byte Folded Reload + ld.d $a2, $fp, -408 # 8-byte Folded Reload ldx.w $a1, $a2, $a1 ori $a2, $zero, 1 - bne $a1, $a2, .LBB282_133 -# %bb.58: # in Loop: Header=BB282_43 Depth=1 + bne $a1, $a2, .LBB282_132 +# %bb.57: # in Loop: Header=BB282_42 Depth=1 ld.d $a1, $s1, 88 slli.d $a0, $a0, 3 ldx.d $a1, $a1, $a0 @@ -72031,25 +72037,25 @@ mapping0_forward: # @mapping0_forward ld.d $a0, $s0, 104 ld.d $a0, $a0, 136 ld.d $a0, $a0, 144 - beqz $a0, .LBB282_42 -# %bb.59: # in Loop: Header=BB282_43 Depth=1 + beqz $a0, .LBB282_41 +# %bb.58: # in Loop: Header=BB282_42 Depth=1 move $a2, $s3 ld.d $a0, $s3, 0 ld.d $a0, $a0, 56 - beqz $a0, .LBB282_42 -# %bb.60: # in Loop: Header=BB282_43 Depth=1 + beqz $a0, .LBB282_41 +# %bb.59: # in Loop: Header=BB282_42 Depth=1 st.d $a2, $fp, -208 # 8-byte Folded Spill ld.w $a1, $s4, 0 ld.d $s0, $fp, -384 # 8-byte Folded Reload - blez $a1, .LBB282_63 -# %bb.61: # %.lr.ph.i383 - # in Loop: Header=BB282_43 Depth=1 + blez $a1, .LBB282_62 +# %bb.60: # %.lr.ph.i383 + # in Loop: Header=BB282_42 Depth=1 ld.d $a0, $s4, 8 fld.s $fa0, $a0, 20 move $a0, $zero slli.d $a1, $a1, 2 .p2align 4, , 16 -.LBB282_62: # Parent Loop BB282_43 Depth=1 +.LBB282_61: # Parent Loop BB282_42 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $a2, $s4, 24 ld.d $a2, $a2, 16 @@ -72066,9 +72072,9 @@ mapping0_forward: # @mapping0_forward fsel $fa1, $fa1, $fa2, $fcc0 fstx.s $fa1, $s6, $a0 addi.d $a0, $a0, 4 - bne $a1, $a0, .LBB282_62 -.LBB282_63: # %_vp_offset_and_mix.exit389 - # in Loop: Header=BB282_43 Depth=1 + bne $a1, $a0, .LBB282_61 +.LBB282_62: # %_vp_offset_and_mix.exit389 + # in Loop: Header=BB282_42 Depth=1 ld.d $a0, $fp, -248 # 8-byte Folded Reload alsl.d $a0, $s5, $a0, 2 st.d $a0, $fp, -168 # 8-byte Folded Spill @@ -72086,15 +72092,15 @@ mapping0_forward: # @mapping0_forward ld.d $a1, $a1, 0 st.d $a0, $a1, 112 ld.w $a1, $s4, 0 - blez $a1, .LBB282_66 -# %bb.64: # %.lr.ph.i390 - # in Loop: Header=BB282_43 Depth=1 + blez $a1, .LBB282_65 +# %bb.63: # %.lr.ph.i390 + # in Loop: Header=BB282_42 Depth=1 ld.d $a0, $s4, 8 fld.s $fa0, $a0, 12 move $a0, $zero slli.d $a1, $a1, 2 .p2align 4, , 16 -.LBB282_65: # Parent Loop BB282_43 Depth=1 +.LBB282_64: # Parent Loop BB282_42 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $a2, $s4, 24 ld.d $a2, $a2, 0 @@ -72111,9 +72117,9 @@ mapping0_forward: # @mapping0_forward fsel $fa1, $fa1, $fa2, $fcc0 fstx.s $fa1, $s6, $a0 addi.d $a0, $a0, 4 - bne $a1, $a0, .LBB282_65 -.LBB282_66: # %_vp_offset_and_mix.exit396 - # in Loop: Header=BB282_43 Depth=1 + bne $a1, $a0, .LBB282_64 +.LBB282_65: # %_vp_offset_and_mix.exit396 + # in Loop: Header=BB282_42 Depth=1 ld.d $a0, $fp, -168 # 8-byte Folded Reload ld.w $a0, $a0, 0 ld.d $a1, $s1, 88 @@ -72128,33 +72134,33 @@ mapping0_forward: # @mapping0_forward ld.d $a1, $t2, 0 st.d $a0, $a1, 0 ori $s4, $zero, 1 - vld $vr7, $fp, -272 # 16-byte Folded Reload - vld $vr8, $fp, -288 # 16-byte Folded Reload - vld $vr9, $fp, -304 # 16-byte Folded Reload + vld $vr6, $fp, -272 # 16-byte Folded Reload + vld $vr7, $fp, -288 # 16-byte Folded Reload + vld $vr8, $fp, -304 # 16-byte Folded Reload ori $t1, $zero, 7 - b .LBB282_69 -.LBB282_67: # in Loop: Header=BB282_69 Depth=2 + b .LBB282_68 +.LBB282_66: # in Loop: Header=BB282_68 Depth=2 ld.d $s1, $fp, -152 # 8-byte Folded Reload .p2align 4, , 16 -.LBB282_68: # %floor1_interpolate_fit.exit - # in Loop: Header=BB282_69 Depth=2 +.LBB282_67: # %floor1_interpolate_fit.exit + # in Loop: Header=BB282_68 Depth=2 ld.d $a1, $t2, 0 slli.d $a2, $s4, 3 addi.d $s4, $s4, 1 stx.d $a0, $a1, $a2 - beq $s4, $t1, .LBB282_97 -.LBB282_69: # Parent Loop BB282_43 Depth=1 + beq $s4, $t1, .LBB282_96 +.LBB282_68: # Parent Loop BB282_42 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB282_88 Depth 3 - # Child Loop BB282_81 Depth 3 + # Child Loop BB282_87 Depth 3 + # Child Loop BB282_80 Depth 3 ld.d $a1, $t2, 0 ld.d $s5, $a1, 0 move $a0, $zero - beqz $s5, .LBB282_68 -# %bb.70: # in Loop: Header=BB282_69 Depth=2 + beqz $s5, .LBB282_67 +# %bb.69: # in Loop: Header=BB282_68 Depth=2 ld.d $t3, $a1, 56 - beqz $t3, .LBB282_68 -# %bb.71: # in Loop: Header=BB282_69 Depth=2 + beqz $t3, .LBB282_67 +# %bb.70: # in Loop: Header=BB282_68 Depth=2 ld.d $a0, $fp, -168 # 8-byte Folded Reload ld.w $a0, $a0, 0 ld.d $a1, $s1, 88 @@ -72169,13 +72175,13 @@ mapping0_forward: # @mapping0_forward ld.d $s6, $a1, 112 bstrins.d $s1, $zero, 2, 0 add.d $a1, $s0, $s1 - bge $a0, $a1, .LBB282_75 -# %bb.72: # in Loop: Header=BB282_69 Depth=2 + bge $a0, $a1, .LBB282_74 +# %bb.71: # in Loop: Header=BB282_68 Depth=2 st.d $t4, $fp, -320 # 8-byte Folded Spill st.d $t3, $fp, -312 # 8-byte Folded Spill ld.d $s3, $fp, -144 # 8-byte Folded Reload - beqz $s6, .LBB282_74 -# %bb.73: # in Loop: Header=BB282_69 Depth=2 + beqz $s6, .LBB282_73 +# %bb.72: # in Loop: Header=BB282_68 Depth=2 ori $a0, $zero, 16 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 @@ -72186,7 +72192,7 @@ mapping0_forward: # @mapping0_forward st.d $a2, $a0, 8 st.d $s6, $a0, 0 st.d $a0, $s3, 144 -.LBB282_74: # in Loop: Header=BB282_69 Depth=2 +.LBB282_73: # in Loop: Header=BB282_68 Depth=2 st.d $s1, $s3, 128 move $a0, $s1 pcaddu18i $ra, %call36(malloc) @@ -72194,9 +72200,9 @@ mapping0_forward: # @mapping0_forward move $s6, $a0 move $s0, $zero st.d $a0, $s3, 112 - vld $vr7, $fp, -272 # 16-byte Folded Reload - vld $vr8, $fp, -288 # 16-byte Folded Reload - vld $vr9, $fp, -304 # 16-byte Folded Reload + vld $vr6, $fp, -272 # 16-byte Folded Reload + vld $vr7, $fp, -288 # 16-byte Folded Reload + vld $vr8, $fp, -304 # 16-byte Folded Reload ori $t1, $zero, 7 ld.d $t2, $fp, -208 # 8-byte Folded Reload ld.d $t3, $fp, -312 # 8-byte Folded Reload @@ -72204,17 +72210,17 @@ mapping0_forward: # @mapping0_forward add.d $a0, $s6, $s0 add.d $a1, $s0, $s1 st.d $a1, $s3, 120 - bgtz $t4, .LBB282_76 - b .LBB282_67 + bgtz $t4, .LBB282_75 + b .LBB282_66 .p2align 4, , 16 -.LBB282_75: # in Loop: Header=BB282_69 Depth=2 +.LBB282_74: # in Loop: Header=BB282_68 Depth=2 ld.d $s3, $fp, -144 # 8-byte Folded Reload add.d $a0, $s6, $s0 add.d $a1, $s0, $s1 st.d $a1, $s3, 120 - blez $t4, .LBB282_67 -.LBB282_76: # %.lr.ph.i398 - # in Loop: Header=BB282_69 Depth=2 + blez $t4, .LBB282_66 +.LBB282_75: # %.lr.ph.i398 + # in Loop: Header=BB282_68 Depth=2 slli.d $a1, $s4, 16 ld.d $a2, $fp, -176 # 8-byte Folded Reload and $a2, $a1, $a2 @@ -72228,30 +72234,30 @@ mapping0_forward: # @mapping0_forward lu12i.w $a2, 16 sub.d $a2, $a2, $a1 ori $a3, $zero, 4 - bgeu $t4, $a3, .LBB282_84 -# %bb.77: # in Loop: Header=BB282_69 Depth=2 + bgeu $t4, $a3, .LBB282_83 +# %bb.76: # in Loop: Header=BB282_68 Depth=2 move $a3, $zero -.LBB282_78: # %scalar.ph594.preheader - # in Loop: Header=BB282_69 Depth=2 +.LBB282_77: # %scalar.ph594.preheader + # in Loop: Header=BB282_68 Depth=2 ld.d $s1, $fp, -152 # 8-byte Folded Reload -.LBB282_79: # %scalar.ph594.preheader - # in Loop: Header=BB282_69 Depth=2 +.LBB282_78: # %scalar.ph594.preheader + # in Loop: Header=BB282_68 Depth=2 sub.d $a4, $t4, $a3 alsl.d $a5, $a3, $s0, 2 add.d $a5, $s6, $a5 alsl.d $a6, $a3, $t3, 2 alsl.d $a3, $a3, $s5, 2 - b .LBB282_81 + b .LBB282_80 .p2align 4, , 16 -.LBB282_80: # in Loop: Header=BB282_81 Depth=3 +.LBB282_79: # in Loop: Header=BB282_80 Depth=3 addi.d $a4, $a4, -1 addi.d $a5, $a5, 4 addi.d $a6, $a6, 4 addi.d $a3, $a3, 4 - beqz $a4, .LBB282_68 -.LBB282_81: # %scalar.ph594 - # Parent Loop BB282_43 Depth=1 - # Parent Loop BB282_69 Depth=2 + beqz $a4, .LBB282_67 +.LBB282_80: # %scalar.ph594 + # Parent Loop BB282_42 Depth=1 + # Parent Loop BB282_68 Depth=2 # => This Inner Loop Header: Depth=3 ld.wu $a7, $a3, 0 ld.wu $t0, $a6, 0 @@ -72265,34 +72271,34 @@ mapping0_forward: # @mapping0_forward st.w $a7, $a5, 0 ld.bu $t0, $a3, 1 andi $t0, $t0, 128 - beqz $t0, .LBB282_80 -# %bb.82: # in Loop: Header=BB282_81 Depth=3 + beqz $t0, .LBB282_79 +# %bb.81: # in Loop: Header=BB282_80 Depth=3 ld.bu $t0, $a6, 1 andi $t0, $t0, 128 - beqz $t0, .LBB282_80 -# %bb.83: # in Loop: Header=BB282_81 Depth=3 + beqz $t0, .LBB282_79 +# %bb.82: # in Loop: Header=BB282_80 Depth=3 or $a7, $a7, $s8 st.w $a7, $a5, 0 - b .LBB282_80 -.LBB282_84: # %vector.memcheck583 - # in Loop: Header=BB282_69 Depth=2 + b .LBB282_79 +.LBB282_83: # %vector.memcheck583 + # in Loop: Header=BB282_68 Depth=2 add.d $a4, $a0, $s7 add.d $a3, $s5, $s7 sltu $a3, $a0, $a3 sltu $a5, $s5, $a4 and $a5, $a3, $a5 move $a3, $zero - bnez $a5, .LBB282_78 -# %bb.85: # %vector.memcheck583 - # in Loop: Header=BB282_69 Depth=2 + bnez $a5, .LBB282_77 +# %bb.84: # %vector.memcheck583 + # in Loop: Header=BB282_68 Depth=2 add.d $a5, $t3, $s7 sltu $a5, $a0, $a5 sltu $a4, $t3, $a4 and $a4, $a5, $a4 ld.d $s1, $fp, -152 # 8-byte Folded Reload - bnez $a4, .LBB282_79 -# %bb.86: # %vector.ph596 - # in Loop: Header=BB282_69 Depth=2 + bnez $a4, .LBB282_78 +# %bb.85: # %vector.ph596 + # in Loop: Header=BB282_68 Depth=2 bstrpick.d $a3, $t4, 30, 2 slli.d $a3, $a3, 2 vreplgr2vr.w $vr0, $a2 @@ -72301,119 +72307,119 @@ mapping0_forward: # @mapping0_forward move $a5, $t3 move $a6, $a0 move $a7, $s5 - b .LBB282_88 + b .LBB282_87 .p2align 4, , 16 -.LBB282_87: # %pred.store.continue616 - # in Loop: Header=BB282_88 Depth=3 +.LBB282_86: # %pred.store.continue616 + # in Loop: Header=BB282_87 Depth=3 addi.d $a7, $a7, 16 addi.d $a6, $a6, 16 addi.d $a4, $a4, -4 addi.d $a5, $a5, 16 - beqz $a4, .LBB282_96 -.LBB282_88: # %vector.body603 - # Parent Loop BB282_43 Depth=1 - # Parent Loop BB282_69 Depth=2 + beqz $a4, .LBB282_95 +.LBB282_87: # %vector.body603 + # Parent Loop BB282_42 Depth=1 + # Parent Loop BB282_68 Depth=2 # => This Inner Loop Header: Depth=3 vld $vr3, $a7, 0 vld $vr4, $a5, 0 - vand.v $vr2, $vr3, $vr7 + vand.v $vr2, $vr3, $vr6 vmul.w $vr2, $vr2, $vr0 - vand.v $vr5, $vr4, $vr7 + vand.v $vr5, $vr4, $vr6 vmadd.w $vr2, $vr5, $vr1 - vadd.w $vr2, $vr2, $vr8 + vadd.w $vr2, $vr2, $vr7 vsrai.w $vr2, $vr2, 16 - vand.v $vr3, $vr3, $vr8 + vand.v $vr3, $vr3, $vr7 vseqi.w $vr3, $vr3, 0 - vxor.v $vr3, $vr3, $vr9 - vand.v $vr4, $vr4, $vr8 + vxor.v $vr3, $vr3, $vr8 + vand.v $vr4, $vr4, $vr7 vseqi.w $vr4, $vr4, 0 - vxor.v $vr4, $vr4, $vr9 + vxor.v $vr4, $vr4, $vr8 vand.v $vr3, $vr3, $vr4 vpickve2gr.w $t0, $vr3, 0 andi $t0, $t0, 1 vst $vr2, $a6, 0 - beqz $t0, .LBB282_92 -# %bb.89: # %pred.store.if609 - # in Loop: Header=BB282_88 Depth=3 + beqz $t0, .LBB282_91 +# %bb.88: # %pred.store.if609 + # in Loop: Header=BB282_87 Depth=3 vpickve2gr.w $t0, $vr2, 0 or $t0, $t0, $s8 st.w $t0, $a6, 0 vpickve2gr.w $t0, $vr3, 1 andi $t0, $t0, 1 - bnez $t0, .LBB282_93 -.LBB282_90: # %pred.store.continue612 - # in Loop: Header=BB282_88 Depth=3 + bnez $t0, .LBB282_92 +.LBB282_89: # %pred.store.continue612 + # in Loop: Header=BB282_87 Depth=3 vpickve2gr.w $t0, $vr3, 2 andi $t0, $t0, 1 - beqz $t0, .LBB282_94 -.LBB282_91: # %pred.store.if613 - # in Loop: Header=BB282_88 Depth=3 + beqz $t0, .LBB282_93 +.LBB282_90: # %pred.store.if613 + # in Loop: Header=BB282_87 Depth=3 vpickve2gr.w $t0, $vr2, 2 or $t0, $t0, $s8 st.w $t0, $a6, 8 vpickve2gr.w $t0, $vr3, 3 andi $t0, $t0, 1 - beqz $t0, .LBB282_87 - b .LBB282_95 + beqz $t0, .LBB282_86 + b .LBB282_94 .p2align 4, , 16 -.LBB282_92: # %pred.store.continue610 - # in Loop: Header=BB282_88 Depth=3 +.LBB282_91: # %pred.store.continue610 + # in Loop: Header=BB282_87 Depth=3 vpickve2gr.w $t0, $vr3, 1 andi $t0, $t0, 1 - beqz $t0, .LBB282_90 -.LBB282_93: # %pred.store.if611 - # in Loop: Header=BB282_88 Depth=3 + beqz $t0, .LBB282_89 +.LBB282_92: # %pred.store.if611 + # in Loop: Header=BB282_87 Depth=3 vpickve2gr.w $t0, $vr2, 1 or $t0, $t0, $s8 st.w $t0, $a6, 4 vpickve2gr.w $t0, $vr3, 2 andi $t0, $t0, 1 - bnez $t0, .LBB282_91 -.LBB282_94: # %pred.store.continue614 - # in Loop: Header=BB282_88 Depth=3 + bnez $t0, .LBB282_90 +.LBB282_93: # %pred.store.continue614 + # in Loop: Header=BB282_87 Depth=3 vpickve2gr.w $t0, $vr3, 3 andi $t0, $t0, 1 - beqz $t0, .LBB282_87 -.LBB282_95: # %pred.store.if615 - # in Loop: Header=BB282_88 Depth=3 + beqz $t0, .LBB282_86 +.LBB282_94: # %pred.store.if615 + # in Loop: Header=BB282_87 Depth=3 vpickve2gr.w $t0, $vr2, 3 or $t0, $t0, $s8 st.w $t0, $a6, 12 - b .LBB282_87 -.LBB282_96: # %middle.block618 - # in Loop: Header=BB282_69 Depth=2 - beq $a3, $t4, .LBB282_68 - b .LBB282_79 - .p2align 4, , 16 -.LBB282_97: # %.preheader433.preheader - # in Loop: Header=BB282_43 Depth=1 + b .LBB282_86 +.LBB282_95: # %middle.block618 + # in Loop: Header=BB282_68 Depth=2 + beq $a3, $t4, .LBB282_67 + b .LBB282_78 + .p2align 4, , 16 +.LBB282_96: # %.preheader433.preheader + # in Loop: Header=BB282_42 Depth=1 ori $s4, $zero, 8 - b .LBB282_100 -.LBB282_98: # in Loop: Header=BB282_100 Depth=2 + b .LBB282_99 +.LBB282_97: # in Loop: Header=BB282_99 Depth=2 ld.d $s1, $fp, -152 # 8-byte Folded Reload .p2align 4, , 16 -.LBB282_99: # %floor1_interpolate_fit.exit410 - # in Loop: Header=BB282_100 Depth=2 +.LBB282_98: # %floor1_interpolate_fit.exit410 + # in Loop: Header=BB282_99 Depth=2 ld.d $a1, $t2, 0 slli.d $a2, $s4, 3 addi.d $s4, $s4, 1 stx.d $a0, $a1, $a2 ori $a0, $zero, 14 - beq $s4, $a0, .LBB282_42 -.LBB282_100: # %.preheader433 - # Parent Loop BB282_43 Depth=1 + beq $s4, $a0, .LBB282_41 +.LBB282_99: # %.preheader433 + # Parent Loop BB282_42 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB282_119 Depth 3 - # Child Loop BB282_112 Depth 3 + # Child Loop BB282_118 Depth 3 + # Child Loop BB282_111 Depth 3 ld.d $a1, $t2, 0 ld.d $s5, $a1, 56 move $a0, $zero - beqz $s5, .LBB282_99 -# %bb.101: # %.preheader433 - # in Loop: Header=BB282_100 Depth=2 + beqz $s5, .LBB282_98 +# %bb.100: # %.preheader433 + # in Loop: Header=BB282_99 Depth=2 ld.d $t3, $a1, 112 - beqz $t3, .LBB282_99 -# %bb.102: # in Loop: Header=BB282_100 Depth=2 + beqz $t3, .LBB282_98 +# %bb.101: # in Loop: Header=BB282_99 Depth=2 ld.d $a0, $fp, -168 # 8-byte Folded Reload ld.w $a0, $a0, 0 ld.d $a1, $s1, 88 @@ -72428,13 +72434,13 @@ mapping0_forward: # @mapping0_forward ld.d $s6, $a1, 112 bstrins.d $s1, $zero, 2, 0 add.d $a1, $s0, $s1 - bge $a0, $a1, .LBB282_106 -# %bb.103: # in Loop: Header=BB282_100 Depth=2 + bge $a0, $a1, .LBB282_105 +# %bb.102: # in Loop: Header=BB282_99 Depth=2 st.d $t4, $fp, -320 # 8-byte Folded Spill st.d $t3, $fp, -312 # 8-byte Folded Spill ld.d $s3, $fp, -144 # 8-byte Folded Reload - beqz $s6, .LBB282_105 -# %bb.104: # in Loop: Header=BB282_100 Depth=2 + beqz $s6, .LBB282_104 +# %bb.103: # in Loop: Header=BB282_99 Depth=2 ori $a0, $zero, 16 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 @@ -72445,7 +72451,7 @@ mapping0_forward: # @mapping0_forward st.d $a2, $a0, 8 st.d $s6, $a0, 0 st.d $a0, $s3, 144 -.LBB282_105: # in Loop: Header=BB282_100 Depth=2 +.LBB282_104: # in Loop: Header=BB282_99 Depth=2 st.d $s1, $s3, 128 move $a0, $s1 pcaddu18i $ra, %call36(malloc) @@ -72453,26 +72459,26 @@ mapping0_forward: # @mapping0_forward move $s6, $a0 move $s0, $zero st.d $a0, $s3, 112 - vld $vr7, $fp, -272 # 16-byte Folded Reload - vld $vr8, $fp, -288 # 16-byte Folded Reload - vld $vr9, $fp, -304 # 16-byte Folded Reload + vld $vr6, $fp, -272 # 16-byte Folded Reload + vld $vr7, $fp, -288 # 16-byte Folded Reload + vld $vr8, $fp, -304 # 16-byte Folded Reload ld.d $t2, $fp, -208 # 8-byte Folded Reload ld.d $t3, $fp, -312 # 8-byte Folded Reload ld.d $t4, $fp, -320 # 8-byte Folded Reload add.d $a0, $s6, $s0 add.d $a1, $s0, $s1 st.d $a1, $s3, 120 - bgtz $t4, .LBB282_107 - b .LBB282_98 + bgtz $t4, .LBB282_106 + b .LBB282_97 .p2align 4, , 16 -.LBB282_106: # in Loop: Header=BB282_100 Depth=2 +.LBB282_105: # in Loop: Header=BB282_99 Depth=2 ld.d $s3, $fp, -144 # 8-byte Folded Reload add.d $a0, $s6, $s0 add.d $a1, $s0, $s1 st.d $a1, $s3, 120 - blez $t4, .LBB282_98 -.LBB282_107: # %.lr.ph.i404 - # in Loop: Header=BB282_100 Depth=2 + blez $t4, .LBB282_97 +.LBB282_106: # %.lr.ph.i404 + # in Loop: Header=BB282_99 Depth=2 slli.d $a1, $s4, 16 addu16i.d $a1, $a1, -7 ld.d $a2, $fp, -176 # 8-byte Folded Reload @@ -72487,30 +72493,30 @@ mapping0_forward: # @mapping0_forward lu12i.w $a2, 16 sub.d $a2, $a2, $a1 ori $a3, $zero, 4 - bgeu $t4, $a3, .LBB282_115 -# %bb.108: # in Loop: Header=BB282_100 Depth=2 + bgeu $t4, $a3, .LBB282_114 +# %bb.107: # in Loop: Header=BB282_99 Depth=2 move $a3, $zero -.LBB282_109: # %scalar.ph.preheader - # in Loop: Header=BB282_100 Depth=2 +.LBB282_108: # %scalar.ph.preheader + # in Loop: Header=BB282_99 Depth=2 ld.d $s1, $fp, -152 # 8-byte Folded Reload -.LBB282_110: # %scalar.ph.preheader - # in Loop: Header=BB282_100 Depth=2 +.LBB282_109: # %scalar.ph.preheader + # in Loop: Header=BB282_99 Depth=2 sub.d $a4, $t4, $a3 alsl.d $a5, $a3, $s0, 2 add.d $a5, $s6, $a5 alsl.d $a6, $a3, $t3, 2 alsl.d $a3, $a3, $s5, 2 - b .LBB282_112 + b .LBB282_111 .p2align 4, , 16 -.LBB282_111: # in Loop: Header=BB282_112 Depth=3 +.LBB282_110: # in Loop: Header=BB282_111 Depth=3 addi.d $a4, $a4, -1 addi.d $a5, $a5, 4 addi.d $a6, $a6, 4 addi.d $a3, $a3, 4 - beqz $a4, .LBB282_99 -.LBB282_112: # %scalar.ph - # Parent Loop BB282_43 Depth=1 - # Parent Loop BB282_100 Depth=2 + beqz $a4, .LBB282_98 +.LBB282_111: # %scalar.ph + # Parent Loop BB282_42 Depth=1 + # Parent Loop BB282_99 Depth=2 # => This Inner Loop Header: Depth=3 ld.wu $a7, $a3, 0 ld.wu $t0, $a6, 0 @@ -72524,34 +72530,34 @@ mapping0_forward: # @mapping0_forward st.w $a7, $a5, 0 ld.bu $t0, $a3, 1 andi $t0, $t0, 128 - beqz $t0, .LBB282_111 -# %bb.113: # in Loop: Header=BB282_112 Depth=3 + beqz $t0, .LBB282_110 +# %bb.112: # in Loop: Header=BB282_111 Depth=3 ld.bu $t0, $a6, 1 andi $t0, $t0, 128 - beqz $t0, .LBB282_111 -# %bb.114: # in Loop: Header=BB282_112 Depth=3 + beqz $t0, .LBB282_110 +# %bb.113: # in Loop: Header=BB282_111 Depth=3 or $a7, $a7, $s8 st.w $a7, $a5, 0 - b .LBB282_111 -.LBB282_115: # %vector.memcheck - # in Loop: Header=BB282_100 Depth=2 + b .LBB282_110 +.LBB282_114: # %vector.memcheck + # in Loop: Header=BB282_99 Depth=2 add.d $a4, $a0, $s7 add.d $a3, $s5, $s7 sltu $a3, $a0, $a3 sltu $a5, $s5, $a4 and $a5, $a3, $a5 move $a3, $zero - bnez $a5, .LBB282_109 -# %bb.116: # %vector.memcheck - # in Loop: Header=BB282_100 Depth=2 + bnez $a5, .LBB282_108 +# %bb.115: # %vector.memcheck + # in Loop: Header=BB282_99 Depth=2 add.d $a5, $t3, $s7 sltu $a5, $a0, $a5 sltu $a4, $t3, $a4 and $a4, $a5, $a4 ld.d $s1, $fp, -152 # 8-byte Folded Reload - bnez $a4, .LBB282_110 -# %bb.117: # %vector.ph - # in Loop: Header=BB282_100 Depth=2 + bnez $a4, .LBB282_109 +# %bb.116: # %vector.ph + # in Loop: Header=BB282_99 Depth=2 bstrpick.d $a3, $t4, 30, 2 slli.d $a3, $a3, 2 vreplgr2vr.w $vr0, $a2 @@ -72560,119 +72566,121 @@ mapping0_forward: # @mapping0_forward move $a5, $t3 move $a6, $a0 move $a7, $s5 - b .LBB282_119 + b .LBB282_118 .p2align 4, , 16 -.LBB282_118: # %pred.store.continue582 - # in Loop: Header=BB282_119 Depth=3 +.LBB282_117: # %pred.store.continue582 + # in Loop: Header=BB282_118 Depth=3 addi.d $a7, $a7, 16 addi.d $a6, $a6, 16 addi.d $a4, $a4, -4 addi.d $a5, $a5, 16 - beqz $a4, .LBB282_127 -.LBB282_119: # %vector.body - # Parent Loop BB282_43 Depth=1 - # Parent Loop BB282_100 Depth=2 + beqz $a4, .LBB282_126 +.LBB282_118: # %vector.body + # Parent Loop BB282_42 Depth=1 + # Parent Loop BB282_99 Depth=2 # => This Inner Loop Header: Depth=3 vld $vr3, $a7, 0 vld $vr4, $a5, 0 - vand.v $vr2, $vr3, $vr7 + vand.v $vr2, $vr3, $vr6 vmul.w $vr2, $vr2, $vr0 - vand.v $vr5, $vr4, $vr7 + vand.v $vr5, $vr4, $vr6 vmadd.w $vr2, $vr5, $vr1 - vadd.w $vr2, $vr2, $vr8 + vadd.w $vr2, $vr2, $vr7 vsrai.w $vr2, $vr2, 16 - vand.v $vr3, $vr3, $vr8 + vand.v $vr3, $vr3, $vr7 vseqi.w $vr3, $vr3, 0 - vxor.v $vr3, $vr3, $vr9 - vand.v $vr4, $vr4, $vr8 + vxor.v $vr3, $vr3, $vr8 + vand.v $vr4, $vr4, $vr7 vseqi.w $vr4, $vr4, 0 - vxor.v $vr4, $vr4, $vr9 + vxor.v $vr4, $vr4, $vr8 vand.v $vr3, $vr3, $vr4 vpickve2gr.w $t0, $vr3, 0 andi $t0, $t0, 1 vst $vr2, $a6, 0 - beqz $t0, .LBB282_123 -# %bb.120: # %pred.store.if - # in Loop: Header=BB282_119 Depth=3 + beqz $t0, .LBB282_122 +# %bb.119: # %pred.store.if + # in Loop: Header=BB282_118 Depth=3 vpickve2gr.w $t0, $vr2, 0 or $t0, $t0, $s8 st.w $t0, $a6, 0 vpickve2gr.w $t0, $vr3, 1 andi $t0, $t0, 1 - bnez $t0, .LBB282_124 -.LBB282_121: # %pred.store.continue578 - # in Loop: Header=BB282_119 Depth=3 + bnez $t0, .LBB282_123 +.LBB282_120: # %pred.store.continue578 + # in Loop: Header=BB282_118 Depth=3 vpickve2gr.w $t0, $vr3, 2 andi $t0, $t0, 1 - beqz $t0, .LBB282_125 -.LBB282_122: # %pred.store.if579 - # in Loop: Header=BB282_119 Depth=3 + beqz $t0, .LBB282_124 +.LBB282_121: # %pred.store.if579 + # in Loop: Header=BB282_118 Depth=3 vpickve2gr.w $t0, $vr2, 2 or $t0, $t0, $s8 st.w $t0, $a6, 8 vpickve2gr.w $t0, $vr3, 3 andi $t0, $t0, 1 - beqz $t0, .LBB282_118 - b .LBB282_126 + beqz $t0, .LBB282_117 + b .LBB282_125 .p2align 4, , 16 -.LBB282_123: # %pred.store.continue - # in Loop: Header=BB282_119 Depth=3 +.LBB282_122: # %pred.store.continue + # in Loop: Header=BB282_118 Depth=3 vpickve2gr.w $t0, $vr3, 1 andi $t0, $t0, 1 - beqz $t0, .LBB282_121 -.LBB282_124: # %pred.store.if577 - # in Loop: Header=BB282_119 Depth=3 + beqz $t0, .LBB282_120 +.LBB282_123: # %pred.store.if577 + # in Loop: Header=BB282_118 Depth=3 vpickve2gr.w $t0, $vr2, 1 or $t0, $t0, $s8 st.w $t0, $a6, 4 vpickve2gr.w $t0, $vr3, 2 andi $t0, $t0, 1 - bnez $t0, .LBB282_122 -.LBB282_125: # %pred.store.continue580 - # in Loop: Header=BB282_119 Depth=3 + bnez $t0, .LBB282_121 +.LBB282_124: # %pred.store.continue580 + # in Loop: Header=BB282_118 Depth=3 vpickve2gr.w $t0, $vr3, 3 andi $t0, $t0, 1 - beqz $t0, .LBB282_118 -.LBB282_126: # %pred.store.if581 - # in Loop: Header=BB282_119 Depth=3 + beqz $t0, .LBB282_117 +.LBB282_125: # %pred.store.if581 + # in Loop: Header=BB282_118 Depth=3 vpickve2gr.w $t0, $vr2, 3 or $t0, $t0, $s8 st.w $t0, $a6, 12 - b .LBB282_118 -.LBB282_127: # %middle.block - # in Loop: Header=BB282_100 Depth=2 - beq $a3, $t4, .LBB282_99 - b .LBB282_110 -.LBB282_128: # %vector.body627.preheader - # in Loop: Header=BB282_43 Depth=1 + b .LBB282_117 +.LBB282_126: # %middle.block + # in Loop: Header=BB282_99 Depth=2 + beq $a3, $t4, .LBB282_98 + b .LBB282_109 +.LBB282_127: # %vector.body627.preheader + # in Loop: Header=BB282_42 Depth=1 move $a0, $s3 move $a1, $s7 - ld.d $a2, $fp, -440 # 8-byte Folded Reload - vld $vr1, $fp, -464 # 16-byte Folded Reload - vld $vr2, $fp, -480 # 16-byte Folded Reload + ld.d $a2, $fp, -424 # 8-byte Folded Reload + ld.d $a3, $fp, -440 # 8-byte Folded Reload + ld.d $a4, $fp, -448 # 8-byte Folded Reload .p2align 4, , 16 -.LBB282_129: # %vector.body627 - # Parent Loop BB282_43 Depth=1 +.LBB282_128: # %vector.body627 + # Parent Loop BB282_42 Depth=1 # => This Inner Loop Header: Depth=2 vld $vr0, $a0, 0 vbitclri.w $vr0, $vr0, 31 vffint.s.wu $vr0, $vr0 + vreplgr2vr.w $vr1, $a3 vfmul.s $vr0, $vr0, $vr1 - vfadd.s $vr0, $vr0, $vr2 + vreplgr2vr.w $vr1, $a4 + vfadd.s $vr0, $vr0, $vr1 vst $vr0, $a1, 0 addi.d $a2, $a2, -4 addi.d $a1, $a1, 16 addi.d $a0, $a0, 16 - bnez $a2, .LBB282_129 -# %bb.130: # %middle.block631 - # in Loop: Header=BB282_43 Depth=1 - ld.d $a1, $fp, -440 # 8-byte Folded Reload + bnez $a2, .LBB282_128 +# %bb.129: # %middle.block631 + # in Loop: Header=BB282_42 Depth=1 + ld.d $a1, $fp, -424 # 8-byte Folded Reload move $a2, $a1 + ld.d $a0, $fp, -416 # 8-byte Folded Reload + beq $a1, $a0, .LBB282_53 + b .LBB282_51 +.LBB282_130: # %.critedge366 ld.d $a0, $fp, -432 # 8-byte Folded Reload - beq $a1, $a0, .LBB282_54 - b .LBB282_52 -.LBB282_131: # %.critedge366 - ld.d $a0, $fp, -448 # 8-byte Folded Reload fst.s $fs0, $a0, 8 ld.d $s8, $fp, -160 # 8-byte Folded Reload ld.w $s1, $s8, 4 @@ -72695,8 +72703,8 @@ mapping0_forward: # @mapping0_forward ld.d $s6, $fp, -232 # 8-byte Folded Reload ld.w $a0, $s6, 1156 lu12i.w $a1, 1 - beqz $a0, .LBB282_134 -# %bb.132: + beqz $a0, .LBB282_133 +# %bb.131: ori $a0, $a1, 848 ld.d $a1, $fp, -368 # 8-byte Folded Reload add.d $a1, $a1, $a0 @@ -72718,28 +72726,28 @@ mapping0_forward: # @mapping0_forward ld.w $s1, $s8, 4 st.d $a0, $fp, -408 # 8-byte Folded Spill slli.d $a2, $s1, 3 - b .LBB282_135 -.LBB282_133: + b .LBB282_134 +.LBB282_132: addi.w $a0, $zero, -1 - b .LBB282_216 -.LBB282_134: + b .LBB282_215 +.LBB282_133: # implicit-def: $r4 # kill: killed $r4 # implicit-def: $r4 # kill: killed $r4 ld.d $s7, $fp, -144 # 8-byte Folded Reload ld.d $s0, $fp, -224 # 8-byte Folded Reload -.LBB282_135: +.LBB282_134: ld.d $a0, $fp, -352 # 8-byte Folded Reload move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ld.d $a0, $s0, 8 ld.w $a0, $a0, 500 - beqz $a0, .LBB282_139 -# %bb.136: - blez $s1, .LBB282_139 -# %bb.137: # %.lr.ph456 + beqz $a0, .LBB282_138 +# %bb.135: + blez $s1, .LBB282_138 +# %bb.136: # %.lr.ph456 move $s1, $zero ld.d $a0, $fp, -328 # 8-byte Folded Reload slli.d $a0, $a0, 1 @@ -72750,7 +72758,7 @@ mapping0_forward: # @mapping0_forward ld.d $s4, $fp, -216 # 8-byte Folded Reload ld.d $s5, $fp, -352 # 8-byte Folded Reload .p2align 4, , 16 -.LBB282_138: # =>This Inner Loop Header: Depth=1 +.LBB282_137: # =>This Inner Loop Header: Depth=1 ld.d $a1, $s4, 0 sub.d $a2, $sp, $s3 move $sp, $a2 @@ -72762,13 +72770,13 @@ mapping0_forward: # @mapping0_forward addi.d $s1, $s1, 1 addi.d $s5, $s5, 8 addi.d $s4, $s4, 8 - blt $s1, $a0, .LBB282_138 -.LBB282_139: # %.loopexit + blt $s1, $a0, .LBB282_137 +.LBB282_138: # %.loopexit ld.d $a0, $s7, 104 ld.d $a0, $a0, 136 ld.d $a0, $a0, 144 - ld.d $a1, $fp, -488 # 8-byte Folded Reload - ld.d $a2, $fp, -496 # 8-byte Folded Reload + ld.d $a1, $fp, -456 # 8-byte Folded Reload + ld.d $a2, $fp, -464 # 8-byte Folded Reload add.d $a1, $a2, $a1 st.d $a1, $fp, -272 # 8-byte Folded Spill addi.d $a1, $s6, 4 @@ -72788,7 +72796,7 @@ mapping0_forward: # @mapping0_forward addi.d $a1, $a2, 2047 addi.d $a1, $a1, 41 st.d $a1, $fp, -328 # 8-byte Folded Spill - ld.d $a1, $fp, -448 # 8-byte Folded Reload + ld.d $a1, $fp, -432 # 8-byte Folded Reload addi.d $a1, $a1, 16 st.d $a1, $fp, -368 # 8-byte Folded Spill ori $a1, $zero, 7 @@ -72800,10 +72808,10 @@ mapping0_forward: # @mapping0_forward movgr2fr.w $fs0, $zero st.d $s2, $fp, -312 # 8-byte Folded Spill st.d $s0, $fp, -384 # 8-byte Folded Spill - b .LBB282_141 + b .LBB282_140 .p2align 4, , 16 -.LBB282_140: # %oggpack_writealign.exit - # in Loop: Header=BB282_141 Depth=1 +.LBB282_139: # %oggpack_writealign.exit + # in Loop: Header=BB282_140 Depth=1 ld.d $a3, $fp, -176 # 8-byte Folded Reload ld.w $a1, $s7, 8 addi.w $a0, $a0, 7 @@ -72824,21 +72832,21 @@ mapping0_forward: # @mapping0_forward or $a1, $a0, $a1 ld.d $a2, $fp, -200 # 8-byte Folded Reload addi.d $a0, $a2, 1 - bgeu $a2, $a1, .LBB282_215 -.LBB282_141: # =>This Loop Header: Depth=1 - # Child Loop BB282_146 Depth 2 - # Child Loop BB282_175 Depth 3 - # Child Loop BB282_156 Depth 3 - # Child Loop BB282_165 Depth 3 - # Child Loop BB282_168 Depth 3 - # Child Loop BB282_181 Depth 3 - # Child Loop BB282_183 Depth 4 - # Child Loop BB282_187 Depth 4 - # Child Loop BB282_191 Depth 4 - # Child Loop BB282_197 Depth 3 - # Child Loop BB282_201 Depth 3 - # Child Loop BB282_208 Depth 2 - # Child Loop BB282_211 Depth 3 + bgeu $a2, $a1, .LBB282_214 +.LBB282_140: # =>This Loop Header: Depth=1 + # Child Loop BB282_145 Depth 2 + # Child Loop BB282_174 Depth 3 + # Child Loop BB282_155 Depth 3 + # Child Loop BB282_164 Depth 3 + # Child Loop BB282_167 Depth 3 + # Child Loop BB282_180 Depth 3 + # Child Loop BB282_182 Depth 4 + # Child Loop BB282_186 Depth 4 + # Child Loop BB282_190 Depth 4 + # Child Loop BB282_196 Depth 3 + # Child Loop BB282_200 Depth 3 + # Child Loop BB282_207 Depth 2 + # Child Loop BB282_210 Depth 3 st.d $a0, $fp, -200 # 8-byte Folded Spill ori $a2, $zero, 1 move $a0, $s0 @@ -72852,8 +72860,8 @@ mapping0_forward: # @mapping0_forward pcaddu18i $ra, %call36(oggpack_write) jirl $ra, $ra, 0 ld.d $a0, $s7, 56 - beqz $a0, .LBB282_143 -# %bb.142: # in Loop: Header=BB282_141 Depth=1 + beqz $a0, .LBB282_142 +# %bb.141: # in Loop: Header=BB282_140 Depth=1 ld.d $a1, $s7, 48 ori $a2, $zero, 1 move $a0, $s0 @@ -72864,37 +72872,37 @@ mapping0_forward: # @mapping0_forward move $a0, $s0 pcaddu18i $ra, %call36(oggpack_write) jirl $ra, $ra, 0 -.LBB282_143: # in Loop: Header=BB282_141 Depth=1 +.LBB282_142: # in Loop: Header=BB282_140 Depth=1 ld.w $a0, $s8, 4 ld.d $a3, $fp, -200 # 8-byte Folded Reload - blez $a0, .LBB282_202 -# %bb.144: # %.lr.ph459 - # in Loop: Header=BB282_141 Depth=1 + blez $a0, .LBB282_201 +# %bb.143: # %.lr.ph459 + # in Loop: Header=BB282_140 Depth=1 move $a4, $zero ld.d $a0, $fp, -360 # 8-byte Folded Reload alsl.d $a0, $a3, $a0, 2 st.d $a0, $fp, -288 # 8-byte Folded Spill - b .LBB282_146 + b .LBB282_145 .p2align 4, , 16 -.LBB282_145: # %_vp_noise_normalize.exit - # in Loop: Header=BB282_146 Depth=2 +.LBB282_144: # %_vp_noise_normalize.exit + # in Loop: Header=BB282_145 Depth=2 ld.w $a0, $s8, 4 ld.d $a4, $fp, -176 # 8-byte Folded Reload addi.d $a4, $a4, 1 ld.d $a3, $fp, -200 # 8-byte Folded Reload - bge $a4, $a0, .LBB282_202 -.LBB282_146: # Parent Loop BB282_141 Depth=1 + bge $a4, $a0, .LBB282_201 +.LBB282_145: # Parent Loop BB282_140 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB282_175 Depth 3 - # Child Loop BB282_156 Depth 3 - # Child Loop BB282_165 Depth 3 - # Child Loop BB282_168 Depth 3 - # Child Loop BB282_181 Depth 3 - # Child Loop BB282_183 Depth 4 - # Child Loop BB282_187 Depth 4 - # Child Loop BB282_191 Depth 4 - # Child Loop BB282_197 Depth 3 - # Child Loop BB282_201 Depth 3 + # Child Loop BB282_174 Depth 3 + # Child Loop BB282_155 Depth 3 + # Child Loop BB282_164 Depth 3 + # Child Loop BB282_167 Depth 3 + # Child Loop BB282_180 Depth 3 + # Child Loop BB282_182 Depth 4 + # Child Loop BB282_186 Depth 4 + # Child Loop BB282_190 Depth 4 + # Child Loop BB282_196 Depth 3 + # Child Loop BB282_200 Depth 3 slli.d $s8, $a4, 2 ld.d $a1, $fp, -144 # 8-byte Folded Reload ld.d $a0, $a1, 0 @@ -72910,12 +72918,12 @@ mapping0_forward: # @mapping0_forward ld.d $s1, $a1, 112 ld.d $a2, $fp, -184 # 8-byte Folded Reload add.d $a1, $s0, $a2 - bge $a0, $a1, .LBB282_150 -# %bb.147: # in Loop: Header=BB282_146 Depth=2 + bge $a0, $a1, .LBB282_149 +# %bb.146: # in Loop: Header=BB282_145 Depth=2 move $s3, $a4 ld.d $s4, $fp, -144 # 8-byte Folded Reload - beqz $s1, .LBB282_149 -# %bb.148: # in Loop: Header=BB282_146 Depth=2 + beqz $s1, .LBB282_148 +# %bb.147: # in Loop: Header=BB282_145 Depth=2 ori $a0, $zero, 16 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 @@ -72927,7 +72935,7 @@ mapping0_forward: # @mapping0_forward ld.d $a2, $fp, -184 # 8-byte Folded Reload st.d $s1, $a0, 0 st.d $a0, $s4, 144 -.LBB282_149: # in Loop: Header=BB282_146 Depth=2 +.LBB282_148: # in Loop: Header=BB282_145 Depth=2 st.d $a2, $s4, 128 move $a0, $a2 pcaddu18i $ra, %call36(malloc) @@ -72938,12 +72946,12 @@ mapping0_forward: # @mapping0_forward st.d $a0, $s4, 112 ld.d $a3, $fp, -200 # 8-byte Folded Reload move $a4, $s3 - b .LBB282_151 + b .LBB282_150 .p2align 4, , 16 -.LBB282_150: # in Loop: Header=BB282_146 Depth=2 +.LBB282_149: # in Loop: Header=BB282_145 Depth=2 ld.d $s4, $fp, -144 # 8-byte Folded Reload -.LBB282_151: # %_vorbis_block_alloc.exit416 - # in Loop: Header=BB282_146 Depth=2 +.LBB282_150: # %_vorbis_block_alloc.exit416 + # in Loop: Header=BB282_145 Depth=2 add.d $s3, $s1, $s0 add.d $a0, $s0, $a2 st.d $a0, $s4, 120 @@ -72978,29 +72986,29 @@ mapping0_forward: # @mapping0_forward or $a2, $a2, $a3 ld.d $a3, $fp, -192 # 8-byte Folded Reload stx.w $a0, $a3, $s8 - blez $a2, .LBB282_157 -# %bb.152: # %.lr.ph.preheader.i - # in Loop: Header=BB282_146 Depth=2 + blez $a2, .LBB282_156 +# %bb.151: # %.lr.ph.preheader.i + # in Loop: Header=BB282_145 Depth=2 pcalau12i $a0, %pc_hi20(FLOOR1_fromdB_INV_LOOKUP) addi.d $a0, $a0, %pc_lo12(FLOOR1_fromdB_INV_LOOKUP) ori $a3, $zero, 4 - bgeu $a2, $a3, .LBB282_172 -# %bb.153: # in Loop: Header=BB282_146 Depth=2 + bgeu $a2, $a3, .LBB282_171 +# %bb.152: # in Loop: Header=BB282_145 Depth=2 move $a3, $zero -.LBB282_154: # %.lr.ph.i418.preheader - # in Loop: Header=BB282_146 Depth=2 +.LBB282_153: # %.lr.ph.i418.preheader + # in Loop: Header=BB282_145 Depth=2 ld.d $s8, $fp, -160 # 8-byte Folded Reload -.LBB282_155: # %.lr.ph.i418.preheader - # in Loop: Header=BB282_146 Depth=2 +.LBB282_154: # %.lr.ph.i418.preheader + # in Loop: Header=BB282_145 Depth=2 sub.d $a4, $a2, $a3 alsl.d $a5, $a3, $s5, 2 alsl.d $a6, $a3, $s0, 2 add.d $a6, $s1, $a6 alsl.d $a3, $a3, $s6, 2 .p2align 4, , 16 -.LBB282_156: # %.lr.ph.i418 - # Parent Loop BB282_141 Depth=1 - # Parent Loop BB282_146 Depth=2 +.LBB282_155: # %.lr.ph.i418 + # Parent Loop BB282_140 Depth=1 + # Parent Loop BB282_145 Depth=2 # => This Inner Loop Header: Depth=3 ld.w $a7, $a6, 0 fld.s $fa0, $a3, 0 @@ -73012,17 +73020,17 @@ mapping0_forward: # @mapping0_forward addi.d $a5, $a5, 4 addi.d $a6, $a6, 4 addi.d $a3, $a3, 4 - bnez $a4, .LBB282_156 - b .LBB282_158 + bnez $a4, .LBB282_155 + b .LBB282_157 .p2align 4, , 16 -.LBB282_157: # in Loop: Header=BB282_146 Depth=2 +.LBB282_156: # in Loop: Header=BB282_145 Depth=2 move $a2, $zero ld.d $s8, $fp, -160 # 8-byte Folded Reload -.LBB282_158: # %.preheader.i - # in Loop: Header=BB282_146 Depth=2 - bge $a2, $a1, .LBB282_160 -# %bb.159: # %.lr.ph22.preheader.i - # in Loop: Header=BB282_146 Depth=2 +.LBB282_157: # %.preheader.i + # in Loop: Header=BB282_145 Depth=2 + bge $a2, $a1, .LBB282_159 +# %bb.158: # %.lr.ph22.preheader.i + # in Loop: Header=BB282_145 Depth=2 alsl.d $a0, $a2, $s5, 2 sub.d $a1, $a2, $a1 nor $a1, $a1, $zero @@ -73032,43 +73040,43 @@ mapping0_forward: # @mapping0_forward move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 -.LBB282_160: # %_vp_remove_floor.exit - # in Loop: Header=BB282_146 Depth=2 +.LBB282_159: # %_vp_remove_floor.exit + # in Loop: Header=BB282_145 Depth=2 ld.d $a2, $s4, 8 ld.w $a1, $a2, 500 ld.w $a0, $s4, 0 vldi $vr4, -1200 vldi $vr5, -1168 vldi $vr6, -1040 - beqz $a1, .LBB282_171 -# %bb.161: # %.preheader87.i - # in Loop: Header=BB282_146 Depth=2 + beqz $a1, .LBB282_170 +# %bb.160: # %.preheader87.i + # in Loop: Header=BB282_145 Depth=2 ld.w $a3, $a2, 508 ld.w $a1, $a2, 512 slt $a4, $a3, $a0 masknez $a5, $a0, $a4 maskeqz $a3, $a3, $a4 or $a6, $a3, $a5 - blez $a6, .LBB282_177 -# %bb.162: # %.lr.ph.preheader.i424 - # in Loop: Header=BB282_146 Depth=2 + blez $a6, .LBB282_176 +# %bb.161: # %.lr.ph.preheader.i424 + # in Loop: Header=BB282_145 Depth=2 move $a3, $zero ori $a4, $zero, 8 - bltu $a6, $a4, .LBB282_167 -# %bb.163: # %.lr.ph.preheader.i424 - # in Loop: Header=BB282_146 Depth=2 + bltu $a6, $a4, .LBB282_166 +# %bb.162: # %.lr.ph.preheader.i424 + # in Loop: Header=BB282_145 Depth=2 ld.d $a4, $fp, -344 # 8-byte Folded Reload - beqz $a4, .LBB282_167 -# %bb.164: # %vector.ph653 - # in Loop: Header=BB282_146 Depth=2 + beqz $a4, .LBB282_166 +# %bb.163: # %vector.ph653 + # in Loop: Header=BB282_145 Depth=2 bstrpick.d $a3, $a6, 30, 3 slli.d $a3, $a3, 3 move $a4, $s5 move $a5, $a3 .p2align 4, , 16 -.LBB282_165: # %vector.body656 - # Parent Loop BB282_141 Depth=1 - # Parent Loop BB282_146 Depth=2 +.LBB282_164: # %vector.body656 + # Parent Loop BB282_140 Depth=1 + # Parent Loop BB282_145 Depth=2 # => This Inner Loop Header: Depth=3 vld $vr0, $a4, 0 vld $vr1, $a4, 16 @@ -73099,61 +73107,61 @@ mapping0_forward: # @mapping0_forward vst $vr2, $a7, 16 addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 - bnez $a5, .LBB282_165 -# %bb.166: # %middle.block661 - # in Loop: Header=BB282_146 Depth=2 + bnez $a5, .LBB282_164 +# %bb.165: # %middle.block661 + # in Loop: Header=BB282_145 Depth=2 move $a7, $a6 - beq $a3, $a6, .LBB282_170 -.LBB282_167: # %.lr.ph.i426.preheader - # in Loop: Header=BB282_146 Depth=2 + beq $a3, $a6, .LBB282_169 +.LBB282_166: # %.lr.ph.i426.preheader + # in Loop: Header=BB282_145 Depth=2 sub.d $a4, $a6, $a3 alsl.d $a3, $a3, $s5, 2 .p2align 4, , 16 -.LBB282_168: # %.lr.ph.i426 - # Parent Loop BB282_141 Depth=1 - # Parent Loop BB282_146 Depth=2 +.LBB282_167: # %.lr.ph.i426 + # Parent Loop BB282_140 Depth=1 + # Parent Loop BB282_145 Depth=2 # => This Inner Loop Header: Depth=3 fld.s $fa0, $a3, 0 frint.s $fa0, $fa0 fstx.s $fa0, $a3, $s2 addi.d $a4, $a4, -1 addi.d $a3, $a3, 4 - bnez $a4, .LBB282_168 -# %bb.169: # in Loop: Header=BB282_146 Depth=2 + bnez $a4, .LBB282_167 +# %bb.168: # in Loop: Header=BB282_145 Depth=2 move $a7, $a6 -.LBB282_170: # %.preheader85.i - # in Loop: Header=BB282_146 Depth=2 +.LBB282_169: # %.preheader85.i + # in Loop: Header=BB282_145 Depth=2 add.w $a3, $a7, $a1 - bge $a0, $a3, .LBB282_178 - b .LBB282_193 + bge $a0, $a3, .LBB282_177 + b .LBB282_192 .p2align 4, , 16 -.LBB282_171: # in Loop: Header=BB282_146 Depth=2 +.LBB282_170: # in Loop: Header=BB282_145 Depth=2 move $a7, $zero - b .LBB282_193 + b .LBB282_192 .p2align 4, , 16 -.LBB282_172: # %vector.memcheck664 - # in Loop: Header=BB282_146 Depth=2 +.LBB282_171: # %vector.memcheck664 + # in Loop: Header=BB282_145 Depth=2 sub.d $a4, $s5, $s6 move $a3, $zero ori $a5, $zero, 16 - bltu $a4, $a5, .LBB282_154 -# %bb.173: # %vector.memcheck664 - # in Loop: Header=BB282_146 Depth=2 + bltu $a4, $a5, .LBB282_153 +# %bb.172: # %vector.memcheck664 + # in Loop: Header=BB282_145 Depth=2 add.d $a4, $s0, $s1 sub.d $a4, $s5, $a4 ld.d $s8, $fp, -160 # 8-byte Folded Reload - bltu $a4, $a5, .LBB282_155 -# %bb.174: # %vector.ph670 - # in Loop: Header=BB282_146 Depth=2 + bltu $a4, $a5, .LBB282_154 +# %bb.173: # %vector.ph670 + # in Loop: Header=BB282_145 Depth=2 bstrpick.d $a3, $a2, 30, 2 slli.d $a3, $a3, 2 move $a4, $s6 move $a5, $s5 move $a6, $a3 .p2align 4, , 16 -.LBB282_175: # %vector.body673 - # Parent Loop BB282_141 Depth=1 - # Parent Loop BB282_146 Depth=2 +.LBB282_174: # %vector.body673 + # Parent Loop BB282_140 Depth=1 + # Parent Loop BB282_145 Depth=2 # => This Inner Loop Header: Depth=3 vld $vr0, $s3, 0 vshuf4i.w $vr1, $vr0, 50 @@ -73184,85 +73192,85 @@ mapping0_forward: # @mapping0_forward addi.d $a5, $a5, 16 addi.d $s3, $s3, 16 addi.d $a4, $a4, 16 - bnez $a6, .LBB282_175 -# %bb.176: # %middle.block678 - # in Loop: Header=BB282_146 Depth=2 - bne $a3, $a2, .LBB282_155 - b .LBB282_158 + bnez $a6, .LBB282_174 +# %bb.175: # %middle.block678 + # in Loop: Header=BB282_145 Depth=2 + bne $a3, $a2, .LBB282_154 + b .LBB282_157 .p2align 4, , 16 -.LBB282_177: # in Loop: Header=BB282_146 Depth=2 +.LBB282_176: # in Loop: Header=BB282_145 Depth=2 move $a7, $zero add.w $a3, $a7, $a1 - blt $a0, $a3, .LBB282_193 -.LBB282_178: # %.preheader84.lr.ph.i - # in Loop: Header=BB282_146 Depth=2 - blez $a1, .LBB282_192 -# %bb.179: # %.preheader84.us.preheader.i - # in Loop: Header=BB282_146 Depth=2 + blt $a0, $a3, .LBB282_192 +.LBB282_177: # %.preheader84.lr.ph.i + # in Loop: Header=BB282_145 Depth=2 + blez $a1, .LBB282_191 +# %bb.178: # %.preheader84.us.preheader.i + # in Loop: Header=BB282_145 Depth=2 ld.d $a4, $fp, -352 # 8-byte Folded Reload ldx.d $a4, $a4, $s7 ld.d $a5, $fp, -336 # 8-byte Folded Reload alsl.d $a5, $a5, $s5, 2 sub.d $a6, $zero, $a6 - b .LBB282_181 + b .LBB282_180 .p2align 4, , 16 -.LBB282_180: # %.loopexit.us.i - # in Loop: Header=BB282_181 Depth=3 +.LBB282_179: # %.loopexit.us.i + # in Loop: Header=BB282_180 Depth=3 add.d $t0, $a3, $a1 move $a7, $a3 move $a3, $t0 - blt $a0, $t0, .LBB282_193 -.LBB282_181: # %.preheader84.us.i - # Parent Loop BB282_141 Depth=1 - # Parent Loop BB282_146 Depth=2 + blt $a0, $t0, .LBB282_192 +.LBB282_180: # %.preheader84.us.i + # Parent Loop BB282_140 Depth=1 + # Parent Loop BB282_145 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB282_183 Depth 4 - # Child Loop BB282_187 Depth 4 - # Child Loop BB282_191 Depth 4 + # Child Loop BB282_182 Depth 4 + # Child Loop BB282_186 Depth 4 + # Child Loop BB282_190 Depth 4 addi.w $t0, $a7, 0 fmov.s $fa0, $fs0 - bge $t0, $a3, .LBB282_184 -# %bb.182: # %.lr.ph91.us.i.preheader - # in Loop: Header=BB282_181 Depth=3 + bge $t0, $a3, .LBB282_183 +# %bb.181: # %.lr.ph91.us.i.preheader + # in Loop: Header=BB282_180 Depth=3 alsl.d $t1, $t0, $s5, 2 fmov.s $fa0, $fs0 .p2align 4, , 16 -.LBB282_183: # %.lr.ph91.us.i - # Parent Loop BB282_141 Depth=1 - # Parent Loop BB282_146 Depth=2 - # Parent Loop BB282_181 Depth=3 +.LBB282_182: # %.lr.ph91.us.i + # Parent Loop BB282_140 Depth=1 + # Parent Loop BB282_145 Depth=2 + # Parent Loop BB282_180 Depth=3 # => This Inner Loop Header: Depth=4 fld.s $fa1, $t1, 0 fmul.s $fa1, $fa1, $fa1 fadd.s $fa0, $fa0, $fa1 addi.d $t0, $t0, 1 addi.d $t1, $t1, 4 - blt $t0, $a3, .LBB282_183 -.LBB282_184: # %.preheader.us.i - # in Loop: Header=BB282_181 Depth=3 + blt $t0, $a3, .LBB282_182 +.LBB282_183: # %.preheader.us.i + # in Loop: Header=BB282_180 Depth=3 add.w $t1, $a6, $a7 move $t2, $zero move $t3, $zero move $a7, $t1 move $t0, $a1 - b .LBB282_187 + b .LBB282_186 .p2align 4, , 16 -.LBB282_185: # in Loop: Header=BB282_187 Depth=4 +.LBB282_184: # in Loop: Header=BB282_186 Depth=4 alsl.d $t5, $t5, $s5, 2 frint.s $fa1, $fa1 fstx.s $fa1, $a5, $t4 fld.s $fa1, $t5, 0 fmul.s $fa1, $fa1, $fa1 fsub.s $fa0, $fa0, $fa1 -.LBB282_186: # in Loop: Header=BB282_187 Depth=4 +.LBB282_185: # in Loop: Header=BB282_186 Depth=4 addi.d $t3, $t3, 1 addi.w $t2, $t2, 1 addi.d $t0, $t0, -1 addi.w $a7, $a7, 1 - beq $a1, $t3, .LBB282_180 -.LBB282_187: # Parent Loop BB282_141 Depth=1 - # Parent Loop BB282_146 Depth=2 - # Parent Loop BB282_181 Depth=3 + beq $a1, $t3, .LBB282_179 +.LBB282_186: # Parent Loop BB282_140 Depth=1 + # Parent Loop BB282_145 Depth=2 + # Parent Loop BB282_180 Depth=3 # => This Inner Loop Header: Depth=4 add.w $t4, $t1, $t2 slli.d $t4, $t4, 2 @@ -73271,25 +73279,25 @@ mapping0_forward: # @mapping0_forward fldx.s $fa1, $s5, $t4 fmul.s $fa2, $fa1, $fa1 fcmp.cult.s $fcc0, $fa2, $fa4 - bceqz $fcc0, .LBB282_185 -# %bb.188: # in Loop: Header=BB282_187 Depth=4 + bceqz $fcc0, .LBB282_184 +# %bb.187: # in Loop: Header=BB282_186 Depth=4 fld.d $fa2, $a2, 520 fcvt.d.s $fa3, $fa0 fcmp.clt.d $fcc0, $fa3, $fa2 - bcnez $fcc0, .LBB282_190 -# %bb.189: # in Loop: Header=BB282_187 Depth=4 + bcnez $fcc0, .LBB282_189 +# %bb.188: # in Loop: Header=BB282_186 Depth=4 fcopysign.s $fa1, $fa5, $fa1 fstx.s $fa1, $a5, $t4 fadd.s $fa0, $fa0, $fa6 - b .LBB282_186 + b .LBB282_185 .p2align 4, , 16 -.LBB282_190: # %._crit_edge.us.i - # in Loop: Header=BB282_181 Depth=3 - bge $t2, $a1, .LBB282_180 +.LBB282_189: # %._crit_edge.us.i + # in Loop: Header=BB282_180 Depth=3 + bge $t2, $a1, .LBB282_179 .p2align 4, , 16 -.LBB282_191: # Parent Loop BB282_141 Depth=1 - # Parent Loop BB282_146 Depth=2 - # Parent Loop BB282_181 Depth=3 +.LBB282_190: # Parent Loop BB282_140 Depth=1 + # Parent Loop BB282_145 Depth=2 + # Parent Loop BB282_180 Depth=3 # => This Inner Loop Header: Depth=4 slli.d $t1, $a7, 2 ldx.w $t1, $a4, $t1 @@ -73297,10 +73305,10 @@ mapping0_forward: # @mapping0_forward stx.w $zero, $a5, $t1 addi.d $t0, $t0, -1 addi.w $a7, $a7, 1 - bnez $t0, .LBB282_191 - b .LBB282_180 -.LBB282_192: # %.preheader84.i.preheader - # in Loop: Header=BB282_146 Depth=2 + bnez $t0, .LBB282_190 + b .LBB282_179 +.LBB282_191: # %.preheader84.i.preheader + # in Loop: Header=BB282_145 Depth=2 alsl.d $a2, $a1, $a7, 1 alsl.w $a3, $a1, $a7, 1 addi.w $a4, $a0, 1 @@ -73322,30 +73330,30 @@ mapping0_forward: # @mapping0_forward mul.d $a1, $a1, $a2 add.d $a7, $a7, $a1 .p2align 4, , 16 -.LBB282_193: # %.loopexit86.i - # in Loop: Header=BB282_146 Depth=2 +.LBB282_192: # %.loopexit86.i + # in Loop: Header=BB282_145 Depth=2 addi.w $a4, $a7, 0 - bge $a4, $a0, .LBB282_145 -# %bb.194: # %.lr.ph120.preheader.i - # in Loop: Header=BB282_146 Depth=2 + bge $a4, $a0, .LBB282_144 +# %bb.193: # %.lr.ph120.preheader.i + # in Loop: Header=BB282_145 Depth=2 sub.d $a2, $a0, $a4 ori $a1, $zero, 8 - bltu $a2, $a1, .LBB282_199 -# %bb.195: # %.lr.ph120.preheader.i - # in Loop: Header=BB282_146 Depth=2 + bltu $a2, $a1, .LBB282_198 +# %bb.194: # %.lr.ph120.preheader.i + # in Loop: Header=BB282_145 Depth=2 ld.d $a1, $fp, -344 # 8-byte Folded Reload - beqz $a1, .LBB282_199 -# %bb.196: # %vector.ph638 - # in Loop: Header=BB282_146 Depth=2 + beqz $a1, .LBB282_198 +# %bb.195: # %vector.ph638 + # in Loop: Header=BB282_145 Depth=2 move $a3, $a2 bstrins.d $a3, $zero, 2, 0 add.d $a1, $a3, $a4 alsl.d $a4, $a4, $s5, 2 move $a5, $a3 .p2align 4, , 16 -.LBB282_197: # %vector.body641 - # Parent Loop BB282_141 Depth=1 - # Parent Loop BB282_146 Depth=2 +.LBB282_196: # %vector.body641 + # Parent Loop BB282_140 Depth=1 + # Parent Loop BB282_145 Depth=2 # => This Inner Loop Header: Depth=3 vld $vr0, $a4, 0 vld $vr1, $a4, 16 @@ -73376,39 +73384,39 @@ mapping0_forward: # @mapping0_forward vst $vr2, $a6, 16 addi.d $a5, $a5, -8 addi.d $a4, $a4, 32 - bnez $a5, .LBB282_197 -# %bb.198: # %middle.block646 - # in Loop: Header=BB282_146 Depth=2 - beq $a2, $a3, .LBB282_145 - b .LBB282_200 + bnez $a5, .LBB282_196 +# %bb.197: # %middle.block646 + # in Loop: Header=BB282_145 Depth=2 + beq $a2, $a3, .LBB282_144 + b .LBB282_199 .p2align 4, , 16 -.LBB282_199: # in Loop: Header=BB282_146 Depth=2 +.LBB282_198: # in Loop: Header=BB282_145 Depth=2 move $a1, $a4 -.LBB282_200: # %.lr.ph120.i.preheader - # in Loop: Header=BB282_146 Depth=2 +.LBB282_199: # %.lr.ph120.i.preheader + # in Loop: Header=BB282_145 Depth=2 sub.d $a0, $a0, $a1 alsl.d $a1, $a1, $s5, 2 .p2align 4, , 16 -.LBB282_201: # %.lr.ph120.i - # Parent Loop BB282_141 Depth=1 - # Parent Loop BB282_146 Depth=2 +.LBB282_200: # %.lr.ph120.i + # Parent Loop BB282_140 Depth=1 + # Parent Loop BB282_145 Depth=2 # => This Inner Loop Header: Depth=3 fld.s $fa0, $a1, 0 frint.s $fa0, $fa0 fstx.s $fa0, $a1, $s2 addi.d $a0, $a0, -1 addi.d $a1, $a1, 4 - bnez $a0, .LBB282_201 - b .LBB282_145 + bnez $a0, .LBB282_200 + b .LBB282_144 .p2align 4, , 16 -.LBB282_202: # %._crit_edge460 - # in Loop: Header=BB282_141 Depth=1 +.LBB282_201: # %._crit_edge460 + # in Loop: Header=BB282_140 Depth=1 ld.d $s0, $fp, -232 # 8-byte Folded Reload ld.w $a0, $s0, 1156 slli.d $s1, $a3, 2 ld.d $s7, $fp, -144 # 8-byte Folded Reload - beqz $a0, .LBB282_204 -# %bb.203: # in Loop: Header=BB282_141 Depth=1 + beqz $a0, .LBB282_203 +# %bb.202: # in Loop: Header=BB282_140 Depth=1 ld.d $a0, $s7, 56 ld.d $a4, $s7, 0 ori $a1, $zero, 60 @@ -73430,21 +73438,21 @@ mapping0_forward: # @mapping0_forward pcaddu18i $ra, %call36(_vp_couple) jirl $ra, $ra, 0 addi.d $sp, $sp, 16 -.LBB282_204: # in Loop: Header=BB282_141 Depth=1 +.LBB282_203: # in Loop: Header=BB282_140 Depth=1 st.d $s1, $fp, -176 # 8-byte Folded Spill ld.w $a0, $s0, 0 - blez $a0, .LBB282_213 -# %bb.205: # %.lr.ph469.preheader - # in Loop: Header=BB282_141 Depth=1 + blez $a0, .LBB282_212 +# %bb.204: # %.lr.ph469.preheader + # in Loop: Header=BB282_140 Depth=1 move $s3, $zero - b .LBB282_208 + b .LBB282_207 .p2align 4, , 16 -.LBB282_206: # in Loop: Header=BB282_208 Depth=2 +.LBB282_205: # in Loop: Header=BB282_207 Depth=2 move $s1, $zero ld.d $s8, $fp, -304 # 8-byte Folded Reload ld.d $a3, $fp, -168 # 8-byte Folded Reload -.LBB282_207: # %._crit_edge465 - # in Loop: Header=BB282_208 Depth=2 +.LBB282_206: # %._crit_edge465 + # in Loop: Header=BB282_207 Depth=2 slli.d $s4, $a0, 2 move $s2, $s7 ld.d $s7, $fp, -328 # 8-byte Folded Reload @@ -73481,18 +73489,18 @@ mapping0_forward: # @mapping0_forward addi.d $s3, $s3, 1 ld.d $s2, $fp, -312 # 8-byte Folded Reload ld.d $s8, $fp, -160 # 8-byte Folded Reload - bge $s3, $a0, .LBB282_213 -.LBB282_208: # %.lr.ph469 - # Parent Loop BB282_141 Depth=1 + bge $s3, $a0, .LBB282_212 +.LBB282_207: # %.lr.ph469 + # Parent Loop BB282_140 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB282_211 Depth 3 + # Child Loop BB282_210 Depth 3 slli.d $a0, $s3, 2 ld.w $a1, $s8, 4 ld.d $a2, $fp, -320 # 8-byte Folded Reload ldx.w $a0, $a2, $a0 - blez $a1, .LBB282_206 -# %bb.209: # %.lr.ph464.preheader - # in Loop: Header=BB282_208 Depth=2 + blez $a1, .LBB282_205 +# %bb.208: # %.lr.ph464.preheader + # in Loop: Header=BB282_207 Depth=2 move $t2, $zero move $t3, $zero move $s1, $zero @@ -73501,21 +73509,21 @@ mapping0_forward: # @mapping0_forward ld.d $t1, $fp, -336 # 8-byte Folded Reload ld.d $s8, $fp, -304 # 8-byte Folded Reload ld.d $a3, $fp, -168 # 8-byte Folded Reload - b .LBB282_211 + b .LBB282_210 .p2align 4, , 16 -.LBB282_210: # in Loop: Header=BB282_211 Depth=3 +.LBB282_209: # in Loop: Header=BB282_210 Depth=3 addi.d $t3, $t3, 1 addi.d $t2, $t2, 8 addi.d $a5, $a5, 4 addi.d $a4, $a4, 4 - bgeu $t3, $a1, .LBB282_207 -.LBB282_211: # %.lr.ph464 - # Parent Loop BB282_141 Depth=1 - # Parent Loop BB282_208 Depth=2 + bgeu $t3, $a1, .LBB282_206 +.LBB282_210: # %.lr.ph464 + # Parent Loop BB282_140 Depth=1 + # Parent Loop BB282_207 Depth=2 # => This Inner Loop Header: Depth=3 ld.wu $a6, $a4, 0 - bne $s3, $a6, .LBB282_210 -# %bb.212: # in Loop: Header=BB282_211 Depth=3 + bne $s3, $a6, .LBB282_209 +# %bb.211: # in Loop: Header=BB282_210 Depth=3 ld.w $a6, $a5, 0 ld.d $a7, $s7, 0 slli.d $t0, $s1, 2 @@ -73526,14 +73534,14 @@ mapping0_forward: # @mapping0_forward slli.d $a7, $s1, 3 addi.w $s1, $s1, 1 stx.d $a6, $s8, $a7 - b .LBB282_210 + b .LBB282_209 .p2align 4, , 16 -.LBB282_213: # %._crit_edge470 - # in Loop: Header=BB282_141 Depth=1 +.LBB282_212: # %._crit_edge470 + # in Loop: Header=BB282_140 Depth=1 ld.w $a0, $s7, 16 ld.d $s0, $fp, -384 # 8-byte Folded Reload - blez $a0, .LBB282_140 -# %bb.214: # in Loop: Header=BB282_141 Depth=1 + blez $a0, .LBB282_139 +# %bb.213: # in Loop: Header=BB282_140 Depth=1 ori $a1, $zero, 8 sub.w $a2, $a1, $a0 move $a0, $s0 @@ -73541,28 +73549,28 @@ mapping0_forward: # @mapping0_forward pcaddu18i $ra, %call36(oggpack_write) jirl $ra, $ra, 0 ld.w $a0, $s7, 16 - b .LBB282_140 -.LBB282_215: + b .LBB282_139 +.LBB282_214: move $a0, $zero -.LBB282_216: # %.critedge - addi.d $sp, $fp, -496 - fld.d $fs4, $sp, 368 # 8-byte Folded Reload - fld.d $fs3, $sp, 376 # 8-byte Folded Reload - fld.d $fs2, $sp, 384 # 8-byte Folded Reload - fld.d $fs1, $sp, 392 # 8-byte Folded Reload - fld.d $fs0, $sp, 400 # 8-byte Folded Reload - ld.d $s8, $sp, 408 # 8-byte Folded Reload - ld.d $s7, $sp, 416 # 8-byte Folded Reload - ld.d $s6, $sp, 424 # 8-byte Folded Reload - ld.d $s5, $sp, 432 # 8-byte Folded Reload - ld.d $s4, $sp, 440 # 8-byte Folded Reload - ld.d $s3, $sp, 448 # 8-byte Folded Reload - ld.d $s2, $sp, 456 # 8-byte Folded Reload - ld.d $s1, $sp, 464 # 8-byte Folded Reload - ld.d $s0, $sp, 472 # 8-byte Folded Reload - ld.d $fp, $sp, 480 # 8-byte Folded Reload - ld.d $ra, $sp, 488 # 8-byte Folded Reload - addi.d $sp, $sp, 496 +.LBB282_215: # %.critedge + addi.d $sp, $fp, -464 + fld.d $fs4, $sp, 336 # 8-byte Folded Reload + fld.d $fs3, $sp, 344 # 8-byte Folded Reload + fld.d $fs2, $sp, 352 # 8-byte Folded Reload + fld.d $fs1, $sp, 360 # 8-byte Folded Reload + fld.d $fs0, $sp, 368 # 8-byte Folded Reload + ld.d $s8, $sp, 376 # 8-byte Folded Reload + ld.d $s7, $sp, 384 # 8-byte Folded Reload + ld.d $s6, $sp, 392 # 8-byte Folded Reload + ld.d $s5, $sp, 400 # 8-byte Folded Reload + ld.d $s4, $sp, 408 # 8-byte Folded Reload + ld.d $s3, $sp, 416 # 8-byte Folded Reload + ld.d $s2, $sp, 424 # 8-byte Folded Reload + ld.d $s1, $sp, 432 # 8-byte Folded Reload + ld.d $s0, $sp, 440 # 8-byte Folded Reload + ld.d $fp, $sp, 448 # 8-byte Folded Reload + ld.d $ra, $sp, 456 # 8-byte Folded Reload + addi.d $sp, $sp, 464 ret .Lfunc_end282: .size mapping0_forward, .Lfunc_end282-mapping0_forward @@ -76274,12 +76282,6 @@ seed_chase: # @seed_chase .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI331_1: - .word 0xbf3504f3 # float -0.707106769 -.LCPI331_2: - .word 0x3f3504f3 # float 0.707106769 .text .p2align 5 .type dradf4,@function @@ -76477,40 +76479,43 @@ dradf4: # @dradf4 blez $a1, .LBB331_17 # %bb.15: # %.lr.ph261.preheader slli.w $a4, $a0, 1 - slli.w $a6, $a0, 2 - add.d $a5, $a0, $a7 - addi.w $t1, $a5, -1 - add.w $t0, $t1, $t0 + slli.w $a5, $a0, 2 + add.d $a6, $a0, $a7 + addi.w $t1, $a6, -1 + add.w $a6, $t1, $t0 bstrpick.d $a0, $a0, 31, 0 slli.d $a0, $a0, 2 - addi.d $a5, $a2, -4 + addi.d $t0, $a2, -4 addi.d $a2, $a3, -4 alsl.d $a3, $a4, $a2, 2 - slli.d $a4, $a6, 2 - pcalau12i $a6, %pc_hi20(.LCPI331_1) - fld.s $fa0, $a6, %pc_lo12(.LCPI331_1) - pcalau12i $a6, %pc_hi20(.LCPI331_2) - fld.s $fa1, $a6, %pc_lo12(.LCPI331_2) - slli.d $a6, $t1, 2 - slli.d $t0, $t0, 2 + slli.d $a4, $a5, 2 + slli.d $a5, $t1, 2 + slli.d $a6, $a6, 2 slli.d $a7, $a7, 2 alsl.d $a7, $t1, $a7, 2 + lu12i.w $t1, -265392 + ori $t1, $t1, 1267 + lu32i.d $t1, 0 + movgr2fr.w $fa0, $t1 + lu12i.w $t1, 258896 + ori $t1, $t1, 1267 + movgr2fr.w $fa1, $t1 .p2align 4, , 16 .LBB331_16: # %.lr.ph261 # =>This Inner Loop Header: Depth=1 - add.d $t1, $a5, $a6 + add.d $t1, $t0, $a5 fld.s $fa2, $t1, 4 - add.d $t1, $a5, $t0 + add.d $t1, $t0, $a6 fld.s $fa3, $t1, 4 - fldx.s $fa4, $a5, $a0 + fldx.s $fa4, $t0, $a0 fsub.s $fa5, $fa2, $fa3 fmul.s $fa5, $fa5, $fa1 fadd.s $fa4, $fa4, $fa5 fstx.s $fa4, $a2, $a0 - fldx.s $fa4, $a5, $a0 + fldx.s $fa4, $t0, $a0 fsub.s $fa4, $fa4, $fa5 fstx.s $fa4, $a3, $a0 - add.d $t1, $a5, $a7 + add.d $t1, $t0, $a7 fld.s $fa4, $t1, 4 fadd.s $fa2, $fa2, $fa3 fmul.s $fa2, $fa2, $fa0 @@ -76518,7 +76523,7 @@ dradf4: # @dradf4 fsub.s $fa3, $fa2, $fa4 fst.s $fa3, $t2, 4 fld.s $fa3, $t1, 4 - add.d $a5, $a5, $a0 + add.d $t0, $t0, $a0 add.d $t1, $a3, $a0 fadd.s $fa2, $fa2, $fa3 fst.s $fa2, $t1, 4 @@ -76962,12 +76967,7 @@ dradf2: # @dradf2 .Lfunc_end332: .size dradf2, .Lfunc_end332-dradf2 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function dradfg -.LCPI333_0: - .word 0x40c90fdb # float 6.28318548 - .text - .p2align 5 + .p2align 5 # -- Begin function dradfg .type dradfg,@function dradfg: # @dradfg # %bb.0: @@ -76993,12 +76993,13 @@ dradfg: # @dradfg move $fp, $a2 move $s4, $a1 move $s0, $a0 - pcalau12i $a0, %pc_hi20(.LCPI333_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI333_0) ld.d $s7, $sp, 288 - movgr2fr.w $fa1, $a1 - ffint.s.w $fa1, $fa1 - fdiv.s $fa0, $fa0, $fa1 + movgr2fr.w $fa0, $a1 + ffint.s.w $fa0, $fa0 + lu12i.w $a0, 265360 + ori $a0, $a0, 4059 + movgr2fr.w $fa1, $a0 + fdiv.s $fa0, $fa1, $fa0 fcvt.d.s $fs1, $fa0 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(cos) @@ -78516,12 +78517,6 @@ dradfg: # @dradfg .LCPI334_1: .dword 0 # 0x0 .dword -1 # 0xffffffffffffffff - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI334_2: - .word 0x3fb504f3 # float 1.41421354 -.LCPI334_3: - .word 0xbfb504f3 # float -1.41421354 .text .p2align 5 .type dradb4,@function @@ -79014,13 +79009,16 @@ dradb4: # @dradb4 slli.d $a6, $t2, 3 alsl.d $a6, $t2, $a6, 2 add.d $a6, $a3, $a6 - pcalau12i $a7, %pc_hi20(.LCPI334_2) - fld.s $fa0, $a7, %pc_lo12(.LCPI334_2) - pcalau12i $a7, %pc_hi20(.LCPI334_3) - fld.s $fa1, $a7, %pc_lo12(.LCPI334_3) slli.d $a7, $t1, 2 slli.d $t0, $t0, 2 alsl.d $t1, $t2, $a3, 3 + lu12i.w $t2, 260944 + ori $t2, $t2, 1267 + movgr2fr.w $fa0, $t2 + lu12i.w $t2, -263344 + ori $t2, $t2, 1267 + lu32i.d $t2, 0 + movgr2fr.w $fa1, $t2 .p2align 4, , 16 .LBB334_47: # %.lr.ph257 # =>This Inner Loop Header: Depth=1 @@ -79369,16 +79367,12 @@ dradb2: # @dradb2 .Lfunc_end335: .size dradb2, .Lfunc_end335-dradb2 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function dradb3 -.LCPI336_0: - .word 0x3f5db3d7 # float 0.866025388 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI336_1: + .p2align 4, 0x0 # -- Begin function dradb3 +.LCPI336_0: .dword 6 # 0x6 .dword 8 # 0x8 -.LCPI336_2: +.LCPI336_1: .dword 2 # 0x2 .dword 4 # 0x4 .text @@ -79386,7 +79380,7 @@ dradb2: # @dradb2 .type dradb3,@function dradb3: # @dradb3 # %bb.0: - blez $a1, .LBB336_79 + blez $a1, .LBB336_97 # %bb.1: # %.lr.ph.preheader addi.d $sp, $sp, -576 st.d $ra, $sp, 568 # 8-byte Folded Spill @@ -79400,270 +79394,456 @@ dradb3: # @dradb3 st.d $s6, $sp, 504 # 8-byte Folded Spill st.d $s7, $sp, 496 # 8-byte Folded Spill st.d $s8, $sp, 488 # 8-byte Folded Spill - mul.d $s4, $a1, $a0 - slli.d $s6, $s4, 1 - slli.w $fp, $a0, 1 - alsl.w $s0, $a0, $a0, 1 - addi.w $t0, $s6, 0 - addi.w $a6, $s4, 0 + mul.d $t0, $a1, $a0 + slli.d $s3, $t0, 1 + slli.w $s1, $a0, 1 + alsl.w $s6, $a0, $a0, 1 + addi.w $t1, $s3, 0 + addi.w $a6, $t0, 0 ori $a7, $zero, 28 - slli.d $s7, $s0, 2 - slli.d $s8, $t0, 2 - slli.d $t3, $a6, 2 - pcalau12i $t2, %pc_hi20(.LCPI336_0) - move $t0, $a3 - st.d $a3, $sp, 288 # 8-byte Folded Spill - st.d $a1, $sp, 192 # 8-byte Folded Spill - st.d $s0, $sp, 208 # 8-byte Folded Spill - bgeu $a1, $a7, .LBB336_80 + slli.d $ra, $s6, 2 + slli.d $t1, $t1, 2 + slli.d $t2, $a6, 2 + lu12i.w $s4, 259547 + st.d $s6, $sp, 224 # 8-byte Folded Spill + bgeu $a1, $a7, .LBB336_3 # %bb.2: - move $t1, $zero + move $t3, $zero move $a7, $zero move $t4, $zero - move $a6, $fp -.LBB336_3: # %.lr.ph.preheader645 - slli.d $t1, $t1, 2 + move $a6, $s1 + b .LBB336_23 +.LBB336_3: # %vector.scevcheck + ori $a6, $zero, 1 + move $t3, $zero + bne $a0, $a6, .LBB336_22 +# %bb.4: # %vector.scevcheck + addi.d $a6, $a1, -1 + addi.d $t6, $a2, 4 + bstrpick.d $t8, $a6, 31, 0 + slli.d $a6, $t8, 3 + alsl.d $a6, $t8, $a6, 2 + add.d $a7, $t6, $a6 + bltu $a7, $t6, .LBB336_22 +# %bb.5: # %vector.scevcheck + addi.d $t5, $a2, 8 + add.d $t7, $t5, $a6 + move $a6, $s1 + move $a7, $t3 + move $t4, $t3 + bltu $t7, $t5, .LBB336_23 +# %bb.6: # %vector.memcheck + st.d $s1, $sp, 328 # 8-byte Folded Spill + alsl.d $a6, $t8, $a3, 2 + addi.d $fp, $a6, 4 + alsl.d $t4, $a1, $a3, 2 + alsl.d $a6, $t8, $t4, 2 + addi.d $t7, $a6, 4 + sltu $a6, $a3, $t7 + sltu $a7, $t4, $fp + and $a6, $a6, $a7 + move $t3, $zero + bnez $a6, .LBB336_21 +# %bb.7: # %vector.memcheck + slli.w $a6, $a1, 1 + alsl.d $a6, $a6, $a3, 2 + alsl.d $a7, $t8, $a6, 2 + addi.d $a7, $a7, 4 + sltu $s0, $a3, $a7 + sltu $s1, $a6, $fp + and $s0, $s0, $s1 + bnez $s0, .LBB336_98 +# %bb.8: # %vector.memcheck + alsl.d $t8, $t8, $t8, 1 + slli.d $t8, $t8, 2 + add.d $s1, $a2, $t8 + addi.d $t8, $s1, 8 + sltu $s0, $a3, $t8 + sltu $s2, $t6, $fp + and $s0, $s0, $s2 + bnez $s0, .LBB336_98 +# %bb.9: # %vector.memcheck + move $s7, $s3 + addi.d $s0, $s1, 4 + sltu $s2, $a3, $s0 + sltu $s3, $a2, $fp + and $s2, $s2, $s3 + bnez $s2, .LBB336_99 +# %bb.10: # %vector.memcheck + addi.d $s1, $s1, 12 + sltu $s2, $a3, $s1 + sltu $fp, $t5, $fp + and $fp, $s2, $fp + move $s3, $s7 + bnez $fp, .LBB336_98 +# %bb.11: # %vector.memcheck + sltu $fp, $t4, $a7 + sltu $s2, $a6, $t7 + and $fp, $fp, $s2 + bnez $fp, .LBB336_98 +# %bb.12: # %vector.memcheck + sltu $fp, $t4, $t8 + sltu $s2, $t6, $t7 + and $fp, $fp, $s2 + bnez $fp, .LBB336_98 +# %bb.13: # %vector.memcheck + sltu $fp, $t4, $s0 + sltu $s2, $a2, $t7 + and $fp, $fp, $s2 + bnez $fp, .LBB336_98 +# %bb.14: # %vector.memcheck + sltu $t4, $t4, $s1 + sltu $t7, $t5, $t7 + and $t4, $t4, $t7 + ld.d $s6, $sp, 224 # 8-byte Folded Reload + bnez $t4, .LBB336_21 +# %bb.15: # %vector.memcheck + sltu $t4, $a6, $t8 + sltu $t6, $t6, $a7 + and $t4, $t4, $t6 + bnez $t4, .LBB336_21 +# %bb.16: # %vector.memcheck + sltu $t4, $a6, $s0 + sltu $t6, $a2, $a7 + and $t4, $t4, $t6 + bnez $t4, .LBB336_21 +# %bb.17: # %vector.memcheck + sltu $a6, $a6, $s1 + sltu $a7, $t5, $a7 + and $t5, $a6, $a7 + ld.d $s1, $sp, 328 # 8-byte Folded Reload + move $a6, $s1 + move $a7, $t3 + move $t4, $t3 + bnez $t5, .LBB336_23 +# %bb.18: # %vector.ph + move $t5, $zero + bstrpick.d $t8, $a1, 30, 2 + slli.d $t4, $t8, 2 + mul.d $t3, $t4, $s6 + slli.d $a6, $t8, 3 + alsl.d $a6, $t8, $a6, 2 + addi.d $a6, $a6, 2 + mul.d $a7, $t4, $a0 + slli.d $t6, $s6, 3 + alsl.d $t6, $s6, $t6, 2 + slli.d $t7, $s6, 4 + slli.d $fp, $t8, 5 + alsl.d $t8, $t8, $fp, 4 + slli.d $fp, $a0, 4 + addi.d $s0, $ra, 8 + addi.d $s1, $t6, 8 + ori $s2, $zero, 24 + lu12i.w $s3, -266240 + vreplgr2vr.w $vr0, $s3 + ori $s3, $s4, 983 + vreplgr2vr.w $vr1, $s3 + move $s3, $a3 + .p2align 4, , 16 +.LBB336_19: # %vector.body + # =>This Inner Loop Header: Depth=1 + add.d $s4, $a2, $t5 + add.d $s5, $a2, $s0 + add.d $s6, $a2, $s1 + fld.s $fa2, $s4, 4 + fld.s $fa3, $s5, -4 + fld.s $fa4, $s4, 28 + fld.s $fa5, $s6, -4 + vextrins.w $vr2, $vr3, 16 + vextrins.w $vr2, $vr4, 32 + vextrins.w $vr2, $vr5, 48 + add.d $s5, $a2, $s2 + fld.s $fa3, $s5, -24 + fld.s $fa4, $s5, -12 + fldx.s $fa5, $a2, $s2 + fldx.s $fa6, $a2, $t6 + vfadd.s $vr2, $vr2, $vr2 + vextrins.w $vr3, $vr4, 16 + vextrins.w $vr3, $vr5, 32 + vextrins.w $vr3, $vr6, 48 + vfmul.s $vr4, $vr2, $vr0 + vfadd.s $vr4, $vr3, $vr4 + vfadd.s $vr2, $vr3, $vr2 + fld.s $fa3, $s4, 8 + fldx.s $fa5, $a2, $s0 + fld.s $fa6, $s4, 32 + fldx.s $fa7, $a2, $s1 + vst $vr2, $s3, 0 + vextrins.w $vr3, $vr5, 16 + vextrins.w $vr3, $vr6, 32 + vextrins.w $vr3, $vr7, 48 + vfadd.s $vr2, $vr3, $vr3 + vfmul.s $vr2, $vr2, $vr1 + vfsub.s $vr3, $vr4, $vr2 + vstx $vr3, $s3, $t2 + vfadd.s $vr2, $vr4, $vr2 + vstx $vr2, $s3, $t1 + add.d $t6, $t6, $t7 + addi.d $t5, $t5, 48 + add.d $s3, $s3, $fp + add.d $s2, $s2, $t7 + addi.d $s0, $s0, 48 + addi.d $s1, $s1, 48 + bne $t8, $t5, .LBB336_19 +# %bb.20: # %middle.block + move $s3, $s7 + ld.d $s1, $sp, 328 # 8-byte Folded Reload + ld.d $s6, $sp, 224 # 8-byte Folded Reload + lu12i.w $s4, 259547 + bne $t4, $a1, .LBB336_23 + b .LBB336_25 +.LBB336_21: + ld.d $s1, $sp, 328 # 8-byte Folded Reload +.LBB336_22: + move $a6, $s1 + move $a7, $t3 + move $t4, $t3 +.LBB336_23: # %.lr.ph.preheader645 + slli.d $t3, $t3, 2 sub.d $t4, $a1, $t4 slli.d $a6, $a6, 2 addi.d $a6, $a6, -4 - fld.s $fa0, $t2, %pc_lo12(.LCPI336_0) - alsl.d $a7, $a7, $t0, 2 + alsl.d $a7, $a7, $a3, 2 slli.d $t5, $a0, 2 - vldi $vr1, -1056 + vldi $vr0, -1056 + ori $t6, $s4, 983 + movgr2fr.w $fa1, $t6 move $t6, $a2 .p2align 4, , 16 -.LBB336_4: # %.lr.ph +.LBB336_24: # %.lr.ph # =>This Inner Loop Header: Depth=1 fldx.s $fa2, $t6, $a6 - fldx.s $fa3, $t6, $t1 + fldx.s $fa3, $t6, $t3 add.d $t7, $t6, $a6 fadd.s $fa2, $fa2, $fa2 fadd.s $fa4, $fa3, $fa2 fst.s $fa4, $a7, 0 fld.s $fa4, $t7, 4 - fmul.s $fa2, $fa2, $fa1 + fmul.s $fa2, $fa2, $fa0 fadd.s $fa2, $fa3, $fa2 fadd.s $fa3, $fa4, $fa4 - fmul.s $fa3, $fa3, $fa0 + fmul.s $fa3, $fa3, $fa1 fsub.s $fa4, $fa2, $fa3 - fstx.s $fa4, $a7, $t3 + fstx.s $fa4, $a7, $t2 fadd.s $fa2, $fa2, $fa3 - fstx.s $fa2, $a7, $s8 - add.d $t6, $t6, $s7 + fstx.s $fa2, $a7, $t1 + add.d $t6, $t6, $ra addi.w $t4, $t4, -1 add.d $a7, $a7, $t5 - bnez $t4, .LBB336_4 -.LBB336_5: # %._crit_edge - ori $a3, $zero, 3 - blt $a0, $a3, .LBB336_78 -# %bb.6: # %.lr.ph161.us.preheader + bnez $t4, .LBB336_24 +.LBB336_25: # %._crit_edge + ori $a6, $zero, 3 + blt $a0, $a6, .LBB336_96 +# %bb.26: # %.lr.ph161.us.preheader + move $t5, $t0 move $t8, $zero - move $s3, $zero - move $t5, $zero - move $t0, $zero - ori $a6, $zero, 4 - sltu $a3, $a6, $a0 - masknez $a6, $a6, $a3 - maskeqz $a3, $a0, $a3 - or $a6, $a3, $a6 - addi.d $s2, $a6, -3 - st.d $s2, $sp, 416 # 8-byte Folded Spill - ld.d $t3, $sp, 288 # 8-byte Folded Reload - addi.d $t4, $t3, 4 - ld.d $a1, $sp, 192 # 8-byte Folded Reload - addi.d $a3, $a1, -1 - bstrpick.d $a3, $a3, 31, 0 - mul.d $a3, $a0, $a3 - slli.d $a1, $a6, 2 - addi.d $a1, $a1, -12 - addi.w $a6, $zero, -8 - and $a7, $a1, $a6 - alsl.d $a3, $a3, $a7, 2 - add.d $a3, $t3, $a3 + move $fp, $zero + st.d $zero, $sp, 480 # 8-byte Folded Spill + move $s0, $zero + ori $a7, $zero, 4 + sltu $a6, $a7, $a0 + masknez $a7, $a7, $a6 + maskeqz $a6, $a0, $a6 + or $a7, $a6, $a7 + addi.d $t4, $a7, -3 + addi.d $t0, $a3, 4 + addi.d $a6, $a1, -1 + bstrpick.d $a6, $a6, 31, 0 + mul.d $a6, $a0, $a6 + slli.d $a7, $a7, 2 + addi.d $s5, $a7, -12 + addi.w $a7, $zero, -8 + and $t1, $s5, $a7 + alsl.d $a6, $a6, $t1, 2 + add.d $a6, $a3, $a6 + addi.d $t3, $a6, 8 addi.d $t6, $a3, 8 - addi.d $t7, $t3, 8 - addi.d $s5, $a3, 12 - addi.d $a3, $a7, 8 - add.d $t1, $t3, $a3 - st.d $t1, $sp, 160 # 8-byte Folded Spill - addi.d $t1, $a7, 12 - add.d $s7, $t3, $t1 - st.d $s7, $sp, 152 # 8-byte Folded Spill - add.d $s7, $a2, $a3 - st.d $s7, $sp, 144 # 8-byte Folded Spill - sub.d $a7, $a2, $a7 - addi.d $s7, $a7, -12 - st.d $s7, $sp, 136 # 8-byte Folded Spill - add.d $t1, $a2, $t1 - st.d $t1, $sp, 128 # 8-byte Folded Spill - addi.d $a7, $a7, -8 - st.d $a7, $sp, 120 # 8-byte Folded Spill - ori $a1, $a1, 4 - add.d $a7, $a4, $a1 - addi.d $t1, $a4, 4 - add.d $s7, $a4, $a3 - add.d $s8, $a5, $a1 - addi.d $ra, $a5, 4 - add.d $s1, $a5, $a3 - slli.d $a1, $s2, 2 - ld.d $s2, $sp, 192 # 8-byte Folded Reload + addi.d $t7, $a6, 12 + addi.d $a6, $t1, 8 + add.d $t2, $a3, $a6 + st.d $t2, $sp, 168 # 8-byte Folded Spill + addi.d $t2, $t1, 12 + add.d $s4, $a3, $t2 + st.d $s4, $sp, 160 # 8-byte Folded Spill + add.d $s4, $a2, $a6 + st.d $s4, $sp, 152 # 8-byte Folded Spill + sub.d $t1, $a2, $t1 + ori $s7, $s5, 4 + add.d $s4, $a4, $s7 + addi.d $s5, $a4, 4 + add.d $s8, $a4, $a6 + add.d $s7, $a5, $s7 + st.d $a3, $sp, 216 # 8-byte Folded Spill + addi.d $s2, $a5, 4 + move $a3, $a1 + add.d $ra, $a5, $a6 + slli.d $a1, $t4, 2 + and $a1, $a1, $a7 + st.d $a1, $sp, 288 # 8-byte Folded Spill + sltu $a1, $t0, $t7 + sltu $a6, $t6, $t3 and $a1, $a1, $a6 - st.d $a1, $sp, 280 # 8-byte Folded Spill - sltu $a1, $t4, $s5 - sltu $a3, $t7, $t6 - and $a1, $a1, $a3 - st.d $a1, $sp, 104 # 8-byte Folded Spill - sltu $a1, $t4, $a7 - sltu $a3, $a4, $t6 - and $a1, $a1, $a3 - sltu $a3, $t4, $s7 - sltu $a6, $t1, $t6 - and $a3, $a3, $a6 - or $a1, $a1, $a3 - sltu $a3, $t4, $s8 - sltu $a6, $a5, $t6 - and $a3, $a3, $a6 - or $a1, $a1, $a3 - st.d $t4, $sp, 184 # 8-byte Folded Spill - sltu $a3, $t4, $s1 - st.d $t6, $sp, 176 # 8-byte Folded Spill - sltu $a6, $ra, $t6 - and $a3, $a3, $a6 - or $a1, $a1, $a3 - st.d $a1, $sp, 96 # 8-byte Folded Spill - st.d $a7, $sp, 112 # 8-byte Folded Spill - sltu $a1, $t7, $a7 - sltu $a3, $a4, $s5 - and $a1, $a1, $a3 - st.d $s7, $sp, 272 # 8-byte Folded Spill - sltu $a3, $t7, $s7 - st.d $t1, $sp, 408 # 8-byte Folded Spill - sltu $a6, $t1, $s5 - and $a3, $a3, $a6 - or $a1, $a1, $a3 - st.d $s8, $sp, 264 # 8-byte Folded Spill - sltu $a3, $t7, $s8 - sltu $a6, $a5, $s5 - and $a3, $a3, $a6 - or $a1, $a1, $a3 - st.d $t7, $sp, 432 # 8-byte Folded Spill - st.d $s1, $sp, 352 # 8-byte Folded Spill - sltu $a3, $t7, $s1 - st.d $s5, $sp, 168 # 8-byte Folded Spill - st.d $ra, $sp, 360 # 8-byte Folded Spill - sltu $a6, $ra, $s5 - and $a3, $a3, $a6 - ld.d $a7, $sp, 416 # 8-byte Folded Reload - srli.d $a6, $a7, 1 - addi.d $a6, $a6, 1 - or $a1, $a1, $a3 + st.d $a1, $sp, 128 # 8-byte Folded Spill + sltu $a1, $t0, $s4 + sltu $a6, $a4, $t3 + and $a1, $a1, $a6 + sltu $a6, $t0, $s8 + sltu $a7, $s5, $t3 + and $a6, $a6, $a7 + or $a1, $a1, $a6 + sltu $a6, $t0, $s7 + sltu $a7, $a5, $t3 + and $a6, $a6, $a7 + or $a1, $a1, $a6 + st.d $t0, $sp, 192 # 8-byte Folded Spill + sltu $a6, $t0, $ra + st.d $t3, $sp, 184 # 8-byte Folded Spill + sltu $a7, $s2, $t3 + and $a6, $a6, $a7 + or $a1, $a1, $a6 + st.d $a1, $sp, 120 # 8-byte Folded Spill + st.d $s4, $sp, 144 # 8-byte Folded Spill + sltu $a1, $t6, $s4 + sltu $a6, $a4, $t7 + and $a1, $a1, $a6 + st.d $s8, $sp, 280 # 8-byte Folded Spill + sltu $a6, $t6, $s8 + st.d $s5, $sp, 136 # 8-byte Folded Spill + sltu $a7, $s5, $t7 + and $a6, $a6, $a7 + or $a1, $a1, $a6 + st.d $s7, $sp, 392 # 8-byte Folded Spill + sltu $a6, $t6, $s7 + sltu $a7, $a5, $t7 + and $a6, $a6, $a7 + or $a1, $a1, $a6 + st.d $t6, $sp, 440 # 8-byte Folded Spill + st.d $ra, $sp, 352 # 8-byte Folded Spill + sltu $a6, $t6, $ra + st.d $t7, $sp, 176 # 8-byte Folded Spill + st.d $s2, $sp, 232 # 8-byte Folded Spill + sltu $a7, $s2, $t7 + move $s2, $a3 + ld.d $ra, $sp, 216 # 8-byte Folded Reload + and $a6, $a6, $a7 + addi.d $a3, $t1, -12 + st.d $a3, $sp, 112 # 8-byte Folded Spill + add.d $a3, $a2, $t2 + st.d $a3, $sp, 104 # 8-byte Folded Spill + addi.d $a3, $t1, -8 + st.d $a3, $sp, 96 # 8-byte Folded Spill + or $a1, $a1, $a6 st.d $a1, $sp, 88 # 8-byte Folded Spill lu12i.w $a1, 258048 vreplgr2vr.w $vr0, $a1 - lu12i.w $a1, 259547 - ori $a1, $a1, 983 - vreplgr2vr.w $vr1, $a1 - st.d $a6, $sp, 48 # 8-byte Folded Spill - bstrpick.d $a1, $a6, 62, 2 + srli.d $a1, $t4, 1 + addi.d $a1, $a1, 1 + st.d $a1, $sp, 56 # 8-byte Folded Spill + bstrpick.d $a1, $a1, 62, 2 slli.d $a3, $a1, 2 st.d $a3, $sp, 80 # 8-byte Folded Spill slli.d $a1, $a1, 3 - addi.d $s1, $a2, 4 + addi.d $a3, $a2, 4 + st.d $a3, $sp, 432 # 8-byte Folded Spill addi.d $a3, $a2, -8 - st.d $a3, $sp, 64 # 8-byte Folded Spill + st.d $a3, $sp, 72 # 8-byte Folded Spill addi.d $a3, $a2, 8 st.d $a3, $sp, 424 # 8-byte Folded Spill addi.d $a3, $a2, -4 - st.d $a3, $sp, 56 # 8-byte Folded Spill + st.d $a3, $sp, 64 # 8-byte Folded Spill addi.d $a3, $a2, 16 - st.d $a3, $sp, 32 # 8-byte Folded Spill + st.d $a3, $sp, 40 # 8-byte Folded Spill addi.d $a3, $a2, -16 - st.d $a3, $sp, 24 # 8-byte Folded Spill + st.d $a3, $sp, 32 # 8-byte Folded Spill addi.d $a2, $a2, -12 - st.d $a2, $sp, 384 # 8-byte Folded Spill - vldi $vr2, -1184 + st.d $a2, $sp, 376 # 8-byte Folded Spill + vldi $vr1, -1184 + lu12i.w $a2, 259547 + ori $a2, $a2, 983 + st.d $a2, $sp, 24 # 8-byte Folded Spill + movgr2fr.w $fa2, $a2 vrepli.d $vr3, -2 vrepli.b $vr4, -1 slli.d $a2, $a0, 2 st.d $a2, $sp, 464 # 8-byte Folded Spill - st.d $a1, $sp, 40 # 8-byte Folded Spill + st.d $a1, $sp, 48 # 8-byte Folded Spill addi.d $a1, $a1, 2 st.d $a1, $sp, 16 # 8-byte Folded Spill - addi.d $a1, $t3, 16 + addi.d $a1, $ra, 16 addi.d $a2, $a5, -4 - st.d $a2, $sp, 376 # 8-byte Folded Spill - addi.d $a2, $a4, -4 st.d $a2, $sp, 368 # 8-byte Folded Spill - srli.d $a2, $a7, 62 - st.d $a2, $sp, 200 # 8-byte Folded Spill + addi.d $a2, $a4, -4 + st.d $a2, $sp, 360 # 8-byte Folded Spill + st.d $t4, $sp, 416 # 8-byte Folded Spill + srli.d $a2, $t4, 62 + st.d $a2, $sp, 208 # 8-byte Folded Spill st.d $a1, $sp, 8 # 8-byte Folded Spill - st.d $a1, $sp, 480 # 8-byte Folded Spill - st.d $s1, $sp, 72 # 8-byte Folded Spill + st.d $a1, $sp, 472 # 8-byte Folded Spill + st.d $s2, $sp, 200 # 8-byte Folded Spill .p2align 4, , 16 -.LBB336_7: # %.lr.ph161.us +.LBB336_27: # %.lr.ph161.us # =>This Loop Header: Depth=1 - # Child Loop BB336_62 Depth 2 - # Child Loop BB336_10 Depth 2 - bstrpick.d $a6, $s4, 31, 0 - bstrpick.d $t1, $s6, 31, 0 + # Child Loop BB336_82 Depth 2 + # Child Loop BB336_30 Depth 2 + bstrpick.d $a7, $t5, 31, 0 + bstrpick.d $t3, $s3, 31, 0 ori $s7, $zero, 2 ld.d $a1, $sp, 416 # 8-byte Folded Reload ori $a2, $zero, 46 - st.d $t0, $sp, 472 # 8-byte Folded Spill - bgeu $a1, $a2, .LBB336_12 -.LBB336_8: # in Loop: Header=BB336_7 Depth=1 - move $ra, $s3 - move $s5, $t5 - move $s8, $fp - move $t7, $fp -.LBB336_9: # %scalar.ph621.preheader - # in Loop: Header=BB336_7 Depth=1 + bgeu $a1, $a2, .LBB336_32 +.LBB336_28: # in Loop: Header=BB336_27 Depth=1 + move $s5, $fp + ld.d $s8, $sp, 480 # 8-byte Folded Reload + move $s4, $s1 + move $t6, $s1 +.LBB336_29: # %scalar.ph621.preheader + # in Loop: Header=BB336_27 Depth=1 move $a1, $zero - ld.d $t0, $sp, 432 # 8-byte Folded Reload - alsl.d $a2, $ra, $t0, 2 - ld.d $a3, $sp, 376 # 8-byte Folded Reload + ld.d $t0, $sp, 440 # 8-byte Folded Reload + alsl.d $a2, $s5, $t0, 2 + ld.d $a3, $sp, 368 # 8-byte Folded Reload alsl.d $a3, $s7, $a3, 2 - alsl.d $a6, $a6, $t0, 2 - ld.d $a7, $sp, 368 # 8-byte Folded Reload + alsl.d $a6, $a7, $t0, 2 + ld.d $a7, $sp, 360 # 8-byte Folded Reload alsl.d $a7, $s7, $a7, 2 - alsl.d $t1, $t1, $t0, 2 - alsl.d $t3, $t7, $s1, 2 + alsl.d $t1, $t3, $t0, 2 + ld.d $t0, $sp, 432 # 8-byte Folded Reload + alsl.d $t2, $t6, $t0, 2 ld.d $t0, $sp, 424 # 8-byte Folded Reload - alsl.d $t6, $s5, $t0, 2 - ld.d $t0, $sp, 384 # 8-byte Folded Reload - alsl.d $t7, $s8, $t0, 2 + alsl.d $t6, $s8, $t0, 2 + ld.d $t0, $sp, 376 # 8-byte Folded Reload + alsl.d $t7, $s4, $t0, 2 .p2align 4, , 16 -.LBB336_10: # %scalar.ph621 - # Parent Loop BB336_7 Depth=1 +.LBB336_30: # %scalar.ph621 + # Parent Loop BB336_27 Depth=1 # => This Inner Loop Header: Depth=2 - fldx.s $fa5, $t3, $a1 + fldx.s $fa5, $t2, $a1 fld.s $fa6, $t7, 0 add.d $t0, $t6, $a1 fld.s $fa7, $t0, -4 - add.d $t0, $t3, $a1 + add.d $t0, $t2, $a1 fadd.s $fa5, $fa5, $fa6 fadd.s $fa6, $fa7, $fa5 - add.d $t4, $a2, $a1 - fst.s $fa6, $t4, -4 + add.d $t3, $a2, $a1 + fst.s $fa6, $t3, -4 fld.s $fa6, $t0, 4 fld.s $ft0, $t7, 4 - fmul.s $fa5, $fa5, $fa2 + fmul.s $fa5, $fa5, $fa1 fldx.s $ft1, $t6, $a1 fsub.s $fa5, $fa7, $fa5 fsub.s $fa6, $fa6, $ft0 - fmul.s $fa7, $fa6, $fa2 + fmul.s $fa7, $fa6, $fa1 fadd.s $fa6, $ft1, $fa6 fstx.s $fa6, $a2, $a1 - fldx.s $fa6, $t3, $a1 + fldx.s $fa6, $t2, $a1 fld.s $ft0, $t7, 0 - fld.s $ft2, $t2, %pc_lo12(.LCPI336_0) - fld.s $ft3, $t0, 4 - fld.s $ft4, $t7, 4 + fld.s $ft2, $t0, 4 + fld.s $ft3, $t7, 4 fsub.s $fa7, $ft1, $fa7 fsub.s $fa6, $fa6, $ft0 - fmul.s $fa6, $fa6, $ft2 - fadd.s $ft0, $ft3, $ft4 - fmul.s $ft0, $ft0, $ft2 + fmul.s $fa6, $fa6, $fa2 + fadd.s $ft0, $ft2, $ft3 + fmul.s $ft0, $ft0, $fa2 add.d $t0, $a7, $a1 fld.s $ft1, $t0, -4 fldx.s $ft2, $a7, $a1 @@ -79672,8 +79852,8 @@ dradb3: # @dradb3 fmul.s $ft1, $ft1, $ft3 fmul.s $ft2, $ft4, $ft2 fsub.s $ft1, $ft1, $ft2 - add.d $t4, $a6, $a1 - fst.s $ft1, $t4, -4 + add.d $t3, $a6, $a1 + fst.s $ft1, $t3, -4 fld.s $ft1, $t0, -4 fldx.s $ft2, $a7, $a1 fmul.s $ft1, $ft4, $ft1 @@ -79688,8 +79868,8 @@ dradb3: # @dradb3 fmul.s $fa7, $fa5, $ft1 fmul.s $ft0, $fa6, $ft2 fsub.s $fa7, $fa7, $ft0 - add.d $t4, $t1, $a1 - fst.s $fa7, $t4, -4 + add.d $t3, $t1, $a1 + fst.s $fa7, $t3, -4 fld.s $fa7, $t0, -4 fldx.s $ft0, $a3, $a1 fmul.s $fa6, $fa6, $fa7 @@ -79699,548 +79879,545 @@ dradb3: # @dradb3 addi.d $s7, $s7, 2 addi.d $a1, $a1, 8 addi.d $t7, $t7, -8 - bltu $s7, $a0, .LBB336_10 -.LBB336_11: # %._crit_edge162.us - # in Loop: Header=BB336_7 Depth=1 - ld.d $t0, $sp, 472 # 8-byte Folded Reload - addi.w $t0, $t0, 1 - add.w $fp, $fp, $s0 - add.w $t5, $t5, $s0 - add.w $s6, $s6, $a0 - add.w $s4, $s4, $a0 - add.d $s3, $s3, $a0 + bltu $s7, $a0, .LBB336_30 +.LBB336_31: # %._crit_edge162.us + # in Loop: Header=BB336_27 Depth=1 + addi.w $s0, $s0, 1 + add.w $s1, $s1, $s6 + ld.d $a1, $sp, 480 # 8-byte Folded Reload + add.w $a1, $a1, $s6 + st.d $a1, $sp, 480 # 8-byte Folded Spill + add.w $s3, $s3, $a0 + add.w $t5, $t5, $a0 + add.d $fp, $fp, $a0 addi.d $t8, $t8, 1 ld.d $a1, $sp, 464 # 8-byte Folded Reload - ld.d $a2, $sp, 480 # 8-byte Folded Reload + ld.d $a2, $sp, 472 # 8-byte Folded Reload add.d $a2, $a2, $a1 - st.d $a2, $sp, 480 # 8-byte Folded Spill - bne $t0, $s2, .LBB336_7 - b .LBB336_78 + st.d $a2, $sp, 472 # 8-byte Folded Spill + bne $s0, $s2, .LBB336_27 + b .LBB336_96 .p2align 4, , 16 -.LBB336_12: # %vector.scevcheck264 - # in Loop: Header=BB336_7 Depth=1 +.LBB336_32: # %vector.scevcheck264 + # in Loop: Header=BB336_27 Depth=1 ld.d $a1, $sp, 464 # 8-byte Folded Reload mul.d $a1, $a1, $t8 - ld.d $a2, $sp, 288 # 8-byte Folded Reload - add.d $a1, $a2, $a1 + add.d $a1, $ra, $a1 addi.d $a1, $a1, 4 - ld.d $a2, $sp, 280 # 8-byte Folded Reload + ld.d $a2, $sp, 288 # 8-byte Folded Reload add.d $a2, $a1, $a2 - bltu $a2, $a1, .LBB336_8 -# %bb.13: # %vector.scevcheck264 - # in Loop: Header=BB336_7 Depth=1 - ld.d $a1, $sp, 200 # 8-byte Folded Reload - bnez $a1, .LBB336_8 -# %bb.14: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - st.d $s3, $sp, 296 # 8-byte Folded Spill - st.d $t8, $sp, 304 # 8-byte Folded Spill - st.d $s6, $sp, 328 # 8-byte Folded Spill - st.d $s4, $sp, 320 # 8-byte Folded Spill - ld.d $t7, $sp, 184 # 8-byte Folded Reload - alsl.d $a3, $a6, $t7, 2 - ld.d $a1, $sp, 160 # 8-byte Folded Reload - alsl.d $t6, $a6, $a1, 2 - ld.d $s4, $sp, 432 # 8-byte Folded Reload - alsl.d $s0, $a6, $s4, 2 - ld.d $a2, $sp, 152 # 8-byte Folded Reload - st.d $a6, $sp, 344 # 8-byte Folded Spill - alsl.d $t3, $a6, $a2, 2 - alsl.d $s6, $t1, $t7, 2 - alsl.d $s8, $t1, $a1, 2 - alsl.d $s2, $t1, $s4, 2 - st.d $t1, $sp, 256 # 8-byte Folded Spill - alsl.d $ra, $t1, $a2, 2 - alsl.d $s3, $fp, $s1, 2 - st.d $s3, $sp, 456 # 8-byte Folded Spill - ld.d $t0, $sp, 144 # 8-byte Folded Reload - alsl.d $t8, $fp, $t0, 2 - st.d $t8, $sp, 448 # 8-byte Folded Spill - ld.d $t1, $sp, 136 # 8-byte Folded Reload - alsl.d $a1, $fp, $t1, 2 - st.d $a1, $sp, 400 # 8-byte Folded Spill - ld.d $t1, $sp, 64 # 8-byte Folded Reload - alsl.d $a2, $fp, $t1, 2 - st.d $a2, $sp, 440 # 8-byte Folded Spill - alsl.d $a2, $t5, $s1, 2 - st.d $a2, $sp, 216 # 8-byte Folded Spill - alsl.d $a7, $t5, $t0, 2 - st.d $a7, $sp, 392 # 8-byte Folded Spill - ld.d $s5, $sp, 424 # 8-byte Folded Reload - alsl.d $a6, $fp, $s5, 2 - st.d $a6, $sp, 240 # 8-byte Folded Spill - ld.d $t1, $sp, 128 # 8-byte Folded Reload - alsl.d $a2, $fp, $t1, 2 - st.d $a2, $sp, 232 # 8-byte Folded Spill - ld.d $t4, $sp, 120 # 8-byte Folded Reload - alsl.d $t4, $fp, $t4, 2 - st.d $t4, $sp, 224 # 8-byte Folded Spill - st.d $fp, $sp, 312 # 8-byte Folded Spill - ld.d $t4, $sp, 56 # 8-byte Folded Reload - alsl.d $s1, $fp, $t4, 2 - st.d $s1, $sp, 248 # 8-byte Folded Spill - alsl.d $s5, $t5, $s5, 2 - st.d $t5, $sp, 336 # 8-byte Folded Spill - alsl.d $t5, $t5, $t1, 2 - sltu $t4, $t7, $t6 - ld.d $fp, $sp, 176 # 8-byte Folded Reload - sltu $t0, $a3, $fp - and $t0, $t4, $t0 - ld.d $t1, $sp, 104 # 8-byte Folded Reload - or $t0, $t1, $t0 - sltu $t4, $t7, $t3 - sltu $t1, $s0, $fp - and $t1, $t4, $t1 - or $t0, $t0, $t1 - sltu $t1, $t7, $s8 - sltu $t4, $s6, $fp - and $t1, $t1, $t4 - or $t0, $t0, $t1 - sltu $t1, $t7, $ra - sltu $t4, $s2, $fp - and $t1, $t1, $t4 - or $t0, $t0, $t1 - sltu $t1, $t7, $t8 - sltu $t4, $s3, $fp - and $t1, $t1, $t4 - or $t0, $t0, $t1 - ld.d $s3, $sp, 440 # 8-byte Folded Reload - sltu $t1, $t7, $s3 - sltu $t4, $a1, $fp - and $t1, $t1, $t4 - or $t0, $t0, $t1 - sltu $t1, $t7, $a7 - ld.d $t8, $sp, 216 # 8-byte Folded Reload - sltu $t4, $t8, $fp - and $t1, $t1, $t4 - or $t0, $t0, $t1 - sltu $t1, $t7, $a2 - sltu $t4, $a6, $fp - and $t1, $t1, $t4 - or $t0, $t0, $t1 - sltu $t1, $t7, $s1 - ld.d $a1, $sp, 224 # 8-byte Folded Reload - sltu $t4, $a1, $fp - and $t1, $t1, $t4 - or $t0, $t0, $t1 - sltu $t1, $t7, $t5 - sltu $t4, $s5, $fp - and $t1, $t1, $t4 - or $t0, $t0, $t1 - ld.d $t1, $sp, 96 # 8-byte Folded Reload - or $t0, $t0, $t1 - sltu $t1, $s4, $t6 - ld.d $t7, $sp, 168 # 8-byte Folded Reload - sltu $t4, $a3, $t7 - and $t1, $t1, $t4 - or $t0, $t0, $t1 - sltu $t1, $s4, $t3 - sltu $t4, $s0, $t7 - and $t1, $t1, $t4 - or $t0, $t0, $t1 - sltu $t1, $s4, $s8 - sltu $t4, $s6, $t7 - and $t1, $t1, $t4 - or $t0, $t0, $t1 - sltu $t1, $s4, $ra - sltu $t4, $s2, $t7 - and $t1, $t1, $t4 - or $t0, $t0, $t1 - ld.d $a7, $sp, 448 # 8-byte Folded Reload - sltu $t1, $s4, $a7 - ld.d $a7, $sp, 456 # 8-byte Folded Reload - sltu $t4, $a7, $t7 - and $t1, $t1, $t4 - or $t0, $t0, $t1 - sltu $t1, $s4, $s3 - ld.d $t4, $sp, 400 # 8-byte Folded Reload - sltu $t4, $t4, $t7 - and $t1, $t1, $t4 - or $t0, $t0, $t1 - ld.d $t1, $sp, 392 # 8-byte Folded Reload - sltu $t1, $s4, $t1 - sltu $t4, $t8, $t7 - and $t1, $t1, $t4 - or $t0, $t0, $t1 - ld.d $t1, $sp, 232 # 8-byte Folded Reload - move $a6, $t1 - sltu $t1, $s4, $t1 + bltu $a2, $a1, .LBB336_28 +# %bb.33: # %vector.scevcheck264 + # in Loop: Header=BB336_27 Depth=1 + ld.d $a1, $sp, 208 # 8-byte Folded Reload + bnez $a1, .LBB336_28 +# %bb.34: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + st.d $s0, $sp, 296 # 8-byte Folded Spill + st.d $fp, $sp, 304 # 8-byte Folded Spill + st.d $t8, $sp, 312 # 8-byte Folded Spill + st.d $s3, $sp, 320 # 8-byte Folded Spill + st.d $t5, $sp, 272 # 8-byte Folded Spill + ld.d $t1, $sp, 192 # 8-byte Folded Reload + alsl.d $t2, $a7, $t1, 2 + ld.d $a1, $sp, 168 # 8-byte Folded Reload + alsl.d $a3, $a7, $a1, 2 + ld.d $t8, $sp, 440 # 8-byte Folded Reload + alsl.d $a6, $a7, $t8, 2 + ld.d $a2, $sp, 160 # 8-byte Folded Reload + st.d $a7, $sp, 336 # 8-byte Folded Spill + alsl.d $fp, $a7, $a2, 2 + alsl.d $ra, $t3, $t1, 2 + alsl.d $a7, $t3, $a1, 2 + alsl.d $t4, $t3, $t8, 2 + st.d $t3, $sp, 344 # 8-byte Folded Spill + alsl.d $s8, $t3, $a2, 2 + ld.d $t5, $sp, 432 # 8-byte Folded Reload + ld.d $t6, $sp, 480 # 8-byte Folded Reload + alsl.d $s0, $s1, $t5, 2 + st.d $s0, $sp, 400 # 8-byte Folded Spill + ld.d $t0, $sp, 152 # 8-byte Folded Reload + alsl.d $s6, $s1, $t0, 2 + ld.d $t3, $sp, 112 # 8-byte Folded Reload + alsl.d $s4, $s1, $t3, 2 + ld.d $t3, $sp, 72 # 8-byte Folded Reload + alsl.d $s3, $s1, $t3, 2 + st.d $s3, $sp, 408 # 8-byte Folded Spill + alsl.d $t7, $t6, $t5, 2 + st.d $t7, $sp, 264 # 8-byte Folded Spill + alsl.d $a1, $t6, $t0, 2 + st.d $a1, $sp, 256 # 8-byte Folded Spill + ld.d $t5, $sp, 424 # 8-byte Folded Reload + alsl.d $s2, $s1, $t5, 2 + ld.d $t0, $sp, 104 # 8-byte Folded Reload + alsl.d $s5, $s1, $t0, 2 + st.d $s5, $sp, 384 # 8-byte Folded Spill + ld.d $t3, $sp, 96 # 8-byte Folded Reload + alsl.d $a2, $s1, $t3, 2 + st.d $s1, $sp, 328 # 8-byte Folded Spill + ld.d $t3, $sp, 64 # 8-byte Folded Reload + alsl.d $t3, $s1, $t3, 2 + st.d $t3, $sp, 240 # 8-byte Folded Spill + alsl.d $s1, $t6, $t5, 2 + st.d $t6, $sp, 480 # 8-byte Folded Spill + alsl.d $t0, $t6, $t0, 2 + st.d $t0, $sp, 248 # 8-byte Folded Spill + sltu $t5, $t1, $a3 + ld.d $t6, $sp, 184 # 8-byte Folded Reload + sltu $t3, $t2, $t6 + and $t3, $t5, $t3 + ld.d $t0, $sp, 128 # 8-byte Folded Reload + or $t3, $t0, $t3 + sltu $t5, $t1, $fp + sltu $t0, $a6, $t6 + and $t0, $t5, $t0 + or $t0, $t3, $t0 + sltu $t3, $t1, $a7 + sltu $t5, $ra, $t6 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + sltu $t3, $t1, $s8 + sltu $t5, $t4, $t6 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + sltu $t3, $t1, $s6 + sltu $t5, $s0, $t6 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + sltu $t3, $t1, $s3 + sltu $t5, $s4, $t6 + move $s0, $s4 + move $s4, $s8 + move $s8, $t4 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + sltu $t3, $t1, $a1 + sltu $t5, $t7, $t6 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + sltu $t3, $t1, $s5 + sltu $t5, $s2, $t6 + move $t7, $s2 + and $t3, $t3, $t5 + or $t0, $t0, $t3 ld.d $t4, $sp, 240 # 8-byte Folded Reload - move $s1, $t4 - sltu $t4, $t4, $t7 - and $t1, $t1, $t4 - or $t0, $t0, $t1 - ld.d $t1, $sp, 248 # 8-byte Folded Reload - move $s3, $t1 - sltu $t1, $s4, $t1 - sltu $t4, $a1, $t7 - move $a2, $s5 - and $t1, $t1, $t4 - or $t0, $t0, $t1 - sltu $t1, $s4, $t5 - sltu $t4, $s5, $t7 - and $t1, $t1, $t4 + sltu $t3, $t1, $t4 + sltu $t5, $a2, $t6 + move $s3, $a2 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + ld.d $a2, $sp, 248 # 8-byte Folded Reload + sltu $t3, $t1, $a2 + sltu $t5, $s1, $t6 + move $t6, $s1 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + ld.d $t1, $sp, 120 # 8-byte Folded Reload or $t0, $t0, $t1 + sltu $t3, $t8, $a3 + ld.d $t1, $sp, 176 # 8-byte Folded Reload + sltu $t5, $t2, $t1 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + sltu $t3, $t8, $fp + sltu $t5, $a6, $t1 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + sltu $t3, $t8, $a7 + sltu $t5, $ra, $t1 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + sltu $t3, $t8, $s4 + sltu $t5, $s8, $t1 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + st.d $s6, $sp, 456 # 8-byte Folded Spill + sltu $t3, $t8, $s6 + ld.d $a1, $sp, 400 # 8-byte Folded Reload + sltu $t5, $a1, $t1 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + ld.d $a1, $sp, 408 # 8-byte Folded Reload + sltu $t3, $t8, $a1 + sltu $t5, $s0, $t1 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + ld.d $a1, $sp, 256 # 8-byte Folded Reload + move $s2, $a1 + sltu $t3, $t8, $a1 + ld.d $a1, $sp, 264 # 8-byte Folded Reload + move $s5, $a1 + sltu $t5, $a1, $t1 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + ld.d $a1, $sp, 384 # 8-byte Folded Reload + sltu $t3, $t8, $a1 + sltu $t5, $t7, $t1 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + sltu $t3, $t8, $t4 + st.d $s3, $sp, 448 # 8-byte Folded Spill + sltu $t5, $s3, $t1 + and $t3, $t3, $t5 + or $t0, $t0, $t3 + sltu $t3, $t8, $a2 + sltu $t5, $s1, $t1 + and $t3, $t3, $t5 + or $t0, $t0, $t3 ld.d $t1, $sp, 88 # 8-byte Folded Reload or $t0, $t0, $t1 andi $t0, $t0, 1 - bnez $t0, .LBB336_76 -# %bb.15: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - move $fp, $t8 - sltu $t0, $a3, $t3 - sltu $t1, $s0, $t6 - and $t0, $t0, $t1 - ld.d $t4, $sp, 112 # 8-byte Folded Reload - ld.d $t7, $sp, 272 # 8-byte Folded Reload - ld.d $t8, $sp, 264 # 8-byte Folded Reload - bnez $t0, .LBB336_76 -# %bb.16: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - move $a7, $a1 - move $s5, $t5 - sltu $t0, $a3, $s8 - sltu $t1, $s6, $t6 - and $t0, $t0, $t1 - ld.d $t5, $sp, 336 # 8-byte Folded Reload - ld.d $a1, $sp, 392 # 8-byte Folded Reload - bnez $t0, .LBB336_74 -# %bb.17: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $t0, $a3, $ra - sltu $t1, $s2, $t6 - and $t0, $t0, $t1 - bnez $t0, .LBB336_74 -# %bb.18: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - ld.d $t0, $sp, 448 # 8-byte Folded Reload - sltu $t0, $a3, $t0 - ld.d $t1, $sp, 456 # 8-byte Folded Reload - sltu $t1, $t1, $t6 - and $t0, $t0, $t1 - bnez $t0, .LBB336_74 -# %bb.19: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - ld.d $t0, $sp, 440 # 8-byte Folded Reload - sltu $t0, $a3, $t0 - ld.d $t1, $sp, 400 # 8-byte Folded Reload - sltu $t1, $t1, $t6 - and $t0, $t0, $t1 - bnez $t0, .LBB336_74 -# %bb.20: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $t0, $a3, $a1 - sltu $t1, $fp, $t6 - and $t0, $t0, $t1 - bnez $t0, .LBB336_74 -# %bb.21: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $t0, $a3, $a6 - sltu $t1, $s1, $t6 - and $t0, $t0, $t1 - bnez $t0, .LBB336_74 -# %bb.22: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - move $s4, $s3 - sltu $t0, $a3, $s3 - sltu $t1, $a7, $t6 - and $t0, $t0, $t1 - bnez $t0, .LBB336_74 -# %bb.23: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $t0, $a3, $s5 - sltu $t1, $a2, $t6 - and $t0, $t0, $t1 - bnez $t0, .LBB336_74 -# %bb.24: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $t0, $a3, $t4 - sltu $t1, $a4, $t6 - and $t0, $t0, $t1 - bnez $t0, .LBB336_74 -# %bb.25: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $t0, $a3, $t7 - ld.d $t1, $sp, 408 # 8-byte Folded Reload - sltu $t1, $t1, $t6 - and $t0, $t0, $t1 - bnez $t0, .LBB336_74 -# %bb.26: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $t0, $a3, $t8 - sltu $t1, $a5, $t6 - and $t0, $t0, $t1 - bnez $t0, .LBB336_74 -# %bb.27: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - ld.d $t0, $sp, 352 # 8-byte Folded Reload - sltu $a3, $a3, $t0 - ld.d $t0, $sp, 360 # 8-byte Folded Reload - sltu $t0, $t0, $t6 - and $a3, $a3, $t0 - ld.d $t1, $sp, 256 # 8-byte Folded Reload - bnez $a3, .LBB336_77 -# %bb.28: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s0, $s8 - sltu $t0, $s6, $t3 - and $a3, $a3, $t0 - bnez $a3, .LBB336_77 -# %bb.29: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s0, $ra - sltu $t0, $s2, $t3 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 -# %bb.30: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - ld.d $a3, $sp, 448 # 8-byte Folded Reload - sltu $a3, $s0, $a3 - ld.d $t0, $sp, 456 # 8-byte Folded Reload - sltu $t0, $t0, $t3 - and $a3, $a3, $t0 - ld.d $s3, $sp, 400 # 8-byte Folded Reload - bnez $a3, .LBB336_71 -# %bb.31: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - ld.d $a3, $sp, 440 # 8-byte Folded Reload - sltu $a3, $s0, $a3 - sltu $t0, $s3, $t3 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 -# %bb.32: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s0, $a1 - sltu $t0, $fp, $t3 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 -# %bb.33: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - move $t5, $a6 - sltu $a3, $s0, $a6 - sltu $t0, $s1, $t3 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 -# %bb.34: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - move $t6, $s5 - move $s5, $a7 - sltu $a3, $s0, $s4 - sltu $t0, $a7, $t3 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + bnez $t0, .LBB336_95 # %bb.35: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s0, $t6 - sltu $t0, $a2, $t3 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + # in Loop: Header=BB336_27 Depth=1 + sltu $t0, $t2, $fp + sltu $t3, $a6, $a3 + and $t0, $t0, $t3 + ld.d $t1, $sp, 144 # 8-byte Folded Reload + ld.d $t5, $sp, 136 # 8-byte Folded Reload + ld.d $t8, $sp, 280 # 8-byte Folded Reload + bnez $t0, .LBB336_95 # %bb.36: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s0, $t4 - sltu $t0, $a4, $t3 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + # in Loop: Header=BB336_27 Depth=1 + sltu $t0, $t2, $a7 + sltu $t3, $ra, $a3 + and $t0, $t0, $t3 + ld.d $s3, $sp, 384 # 8-byte Folded Reload + bnez $t0, .LBB336_93 # %bb.37: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s0, $t7 - ld.d $a6, $sp, 408 # 8-byte Folded Reload - sltu $t0, $a6, $t3 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + # in Loop: Header=BB336_27 Depth=1 + sltu $t0, $t2, $s4 + sltu $t3, $s8, $a3 + and $t0, $t0, $t3 + bnez $t0, .LBB336_93 # %bb.38: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s0, $t8 - sltu $t0, $a5, $t3 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + # in Loop: Header=BB336_27 Depth=1 + ld.d $t0, $sp, 456 # 8-byte Folded Reload + sltu $t0, $t2, $t0 + ld.d $t3, $sp, 400 # 8-byte Folded Reload + sltu $t3, $t3, $a3 + and $t0, $t0, $t3 + bnez $t0, .LBB336_93 # %bb.39: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - ld.d $a3, $sp, 352 # 8-byte Folded Reload - sltu $a3, $s0, $a3 - ld.d $t0, $sp, 360 # 8-byte Folded Reload - sltu $t0, $t0, $t3 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + # in Loop: Header=BB336_27 Depth=1 + ld.d $t0, $sp, 408 # 8-byte Folded Reload + sltu $t0, $t2, $t0 + sltu $t3, $s0, $a3 + and $t0, $t0, $t3 + bnez $t0, .LBB336_93 # %bb.40: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s6, $ra - sltu $t0, $s2, $s8 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + # in Loop: Header=BB336_27 Depth=1 + sltu $t0, $t2, $s2 + sltu $t3, $s5, $a3 + and $t0, $t0, $t3 + bnez $t0, .LBB336_93 # %bb.41: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - ld.d $a3, $sp, 448 # 8-byte Folded Reload - sltu $a3, $s6, $a3 - ld.d $a6, $sp, 456 # 8-byte Folded Reload - sltu $t0, $a6, $s8 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + # in Loop: Header=BB336_27 Depth=1 + sltu $t0, $t2, $s3 + sltu $t3, $t7, $a3 + and $t0, $t0, $t3 + bnez $t0, .LBB336_93 # %bb.42: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - ld.d $a3, $sp, 440 # 8-byte Folded Reload - sltu $a3, $s6, $a3 - sltu $t0, $s3, $s8 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + # in Loop: Header=BB336_27 Depth=1 + move $s6, $t4 + move $s1, $a2 + sltu $t0, $t2, $t4 + ld.d $a2, $sp, 448 # 8-byte Folded Reload + sltu $t3, $a2, $a3 + and $t0, $t0, $t3 + bnez $t0, .LBB336_93 # %bb.43: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s6, $a1 - sltu $t0, $fp, $s8 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + # in Loop: Header=BB336_27 Depth=1 + move $a1, $t6 + sltu $t0, $t2, $s1 + sltu $t3, $t6, $a3 + and $t0, $t0, $t3 + bnez $t0, .LBB336_93 # %bb.44: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s6, $t5 - sltu $t0, $s1, $s8 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + # in Loop: Header=BB336_27 Depth=1 + sltu $t0, $t2, $t1 + sltu $t3, $a4, $a3 + and $t0, $t0, $t3 + bnez $t0, .LBB336_93 # %bb.45: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s6, $s4 - sltu $t0, $s5, $s8 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + # in Loop: Header=BB336_27 Depth=1 + sltu $t0, $t2, $t8 + sltu $t3, $t5, $a3 + and $t0, $t0, $t3 + bnez $t0, .LBB336_93 # %bb.46: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s6, $t6 - sltu $t0, $a2, $s8 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + # in Loop: Header=BB336_27 Depth=1 + ld.d $t0, $sp, 392 # 8-byte Folded Reload + sltu $t0, $t2, $t0 + sltu $t3, $a5, $a3 + and $t0, $t0, $t3 + bnez $t0, .LBB336_93 # %bb.47: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s6, $t4 - sltu $t0, $a4, $s8 - and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + # in Loop: Header=BB336_27 Depth=1 + ld.d $t0, $sp, 352 # 8-byte Folded Reload + sltu $t0, $t2, $t0 + ld.d $t2, $sp, 232 # 8-byte Folded Reload + sltu $a3, $t2, $a3 + and $a3, $t0, $a3 + bnez $a3, .LBB336_93 # %bb.48: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s6, $t7 - ld.d $a6, $sp, 408 # 8-byte Folded Reload - sltu $t0, $a6, $s8 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $a6, $a7 + sltu $t0, $ra, $fp and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + bnez $a3, .LBB336_93 # %bb.49: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s6, $t8 - sltu $t0, $a5, $s8 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $a6, $s4 + sltu $t0, $s8, $fp and $a3, $a3, $t0 - bnez $a3, .LBB336_71 + bnez $a3, .LBB336_93 # %bb.50: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - ld.d $a3, $sp, 352 # 8-byte Folded Reload - sltu $a3, $s6, $a3 - ld.d $t0, $sp, 360 # 8-byte Folded Reload - sltu $a7, $t0, $s8 - and $a3, $a3, $a7 - bnez $a3, .LBB336_71 + # in Loop: Header=BB336_27 Depth=1 + ld.d $a3, $sp, 456 # 8-byte Folded Reload + sltu $a3, $a6, $a3 + ld.d $t4, $sp, 400 # 8-byte Folded Reload + sltu $t0, $t4, $fp + and $a3, $a3, $t0 + ld.d $t6, $sp, 408 # 8-byte Folded Reload + bnez $a3, .LBB336_93 # %bb.51: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - ld.d $a3, $sp, 448 # 8-byte Folded Reload - sltu $a3, $s2, $a3 - ld.d $a6, $sp, 456 # 8-byte Folded Reload - sltu $a6, $a6, $ra - and $a3, $a3, $a6 - bnez $a3, .LBB336_73 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $a6, $t6 + sltu $t0, $s0, $fp + and $a3, $a3, $t0 + bnez $a3, .LBB336_93 # %bb.52: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - ld.d $a3, $sp, 440 # 8-byte Folded Reload - sltu $a3, $s2, $a3 - sltu $a6, $s3, $ra - and $a3, $a3, $a6 - ld.d $t0, $sp, 192 # 8-byte Folded Reload - bnez $a3, .LBB336_70 + # in Loop: Header=BB336_27 Depth=1 + move $t3, $s5 + move $s5, $s2 + sltu $a3, $a6, $s2 + sltu $t0, $t3, $fp + and $a3, $a3, $t0 + bnez $a3, .LBB336_93 # %bb.53: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - ld.d $a3, $sp, 392 # 8-byte Folded Reload - sltu $a3, $s2, $a3 - sltu $a6, $fp, $ra - and $a3, $a3, $a6 - ld.d $s0, $sp, 208 # 8-byte Folded Reload - ld.d $t8, $sp, 304 # 8-byte Folded Reload - ld.d $s3, $sp, 296 # 8-byte Folded Reload - bnez $a3, .LBB336_69 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $a6, $s3 + sltu $t0, $t7, $fp + and $a3, $a3, $t0 + bnez $a3, .LBB336_93 # %bb.54: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s2, $t5 - sltu $a6, $s1, $ra - and $a3, $a3, $a6 - bnez $a3, .LBB336_69 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $a6, $s6 + ld.d $a2, $sp, 448 # 8-byte Folded Reload + sltu $t0, $a2, $fp + and $a3, $a3, $t0 + bnez $a3, .LBB336_93 # %bb.55: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s2, $s4 - sltu $a6, $s5, $ra - and $a3, $a3, $a6 - ld.d $s6, $sp, 328 # 8-byte Folded Reload - ld.d $t5, $sp, 336 # 8-byte Folded Reload - bnez $a3, .LBB336_68 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $a6, $s1 + sltu $t0, $a1, $fp + and $a3, $a3, $t0 + bnez $a3, .LBB336_93 # %bb.56: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s2, $t6 - sltu $a6, $a2, $ra - and $a3, $a3, $a6 - ld.d $s4, $sp, 320 # 8-byte Folded Reload - ld.d $fp, $sp, 312 # 8-byte Folded Reload - bnez $a3, .LBB336_65 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $a6, $t1 + sltu $t0, $a4, $fp + and $a3, $a3, $t0 + bnez $a3, .LBB336_93 # %bb.57: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - sltu $a3, $s2, $t4 - sltu $a6, $a4, $ra - and $a3, $a3, $a6 - bnez $a3, .LBB336_65 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $a6, $t8 + sltu $t0, $t5, $fp + and $a3, $a3, $t0 + bnez $a3, .LBB336_93 # %bb.58: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - ld.d $a3, $sp, 272 # 8-byte Folded Reload - sltu $a3, $s2, $a3 - ld.d $a6, $sp, 408 # 8-byte Folded Reload - sltu $a6, $a6, $ra - and $a3, $a3, $a6 - bnez $a3, .LBB336_65 + # in Loop: Header=BB336_27 Depth=1 + ld.d $a3, $sp, 392 # 8-byte Folded Reload + sltu $a3, $a6, $a3 + sltu $t0, $a5, $fp + and $a3, $a3, $t0 + bnez $a3, .LBB336_93 # %bb.59: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 - ld.d $a3, $sp, 264 # 8-byte Folded Reload - sltu $a3, $s2, $a3 - sltu $a6, $a5, $ra + # in Loop: Header=BB336_27 Depth=1 + ld.d $a3, $sp, 352 # 8-byte Folded Reload + sltu $a3, $a6, $a3 + sltu $a6, $t2, $fp and $a3, $a3, $a6 - bnez $a3, .LBB336_65 + bnez $a3, .LBB336_93 # %bb.60: # %vector.memcheck269 - # in Loop: Header=BB336_7 Depth=1 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $ra, $s4 + sltu $a6, $s8, $a7 + and $a3, $a3, $a6 + bnez $a3, .LBB336_93 +# %bb.61: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + ld.d $a3, $sp, 456 # 8-byte Folded Reload + sltu $a3, $ra, $a3 + sltu $a6, $t4, $a7 + and $a3, $a3, $a6 + bnez $a3, .LBB336_93 +# %bb.62: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $ra, $t6 + sltu $a6, $s0, $a7 + and $a3, $a3, $a6 + bnez $a3, .LBB336_93 +# %bb.63: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $ra, $s5 + sltu $a6, $t3, $a7 + and $a3, $a3, $a6 + bnez $a3, .LBB336_93 +# %bb.64: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $ra, $s3 + sltu $a6, $t7, $a7 + and $a3, $a3, $a6 + bnez $a3, .LBB336_93 +# %bb.65: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $ra, $s6 + ld.d $a2, $sp, 448 # 8-byte Folded Reload + sltu $a6, $a2, $a7 + and $a3, $a3, $a6 + bnez $a3, .LBB336_93 +# %bb.66: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $ra, $s1 + sltu $a6, $a1, $a7 + and $a3, $a3, $a6 + bnez $a3, .LBB336_93 +# %bb.67: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $ra, $t1 + sltu $a6, $a4, $a7 + and $a3, $a3, $a6 + bnez $a3, .LBB336_93 +# %bb.68: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $ra, $t8 + sltu $a6, $t5, $a7 + and $a3, $a3, $a6 + bnez $a3, .LBB336_93 +# %bb.69: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + ld.d $a3, $sp, 392 # 8-byte Folded Reload + sltu $a3, $ra, $a3 + sltu $a6, $a5, $a7 + and $a3, $a3, $a6 + bnez $a3, .LBB336_93 +# %bb.70: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + ld.d $a3, $sp, 352 # 8-byte Folded Reload + sltu $a3, $ra, $a3 + sltu $a6, $t2, $a7 + and $a3, $a3, $a6 + bnez $a3, .LBB336_93 +# %bb.71: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + ld.d $a3, $sp, 456 # 8-byte Folded Reload + sltu $a3, $s8, $a3 + sltu $a6, $t4, $s4 + and $a3, $a3, $a6 + ld.d $s3, $sp, 320 # 8-byte Folded Reload + bnez $a3, .LBB336_94 +# %bb.72: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $s8, $t6 + sltu $a6, $s0, $s4 + and $a3, $a3, $a6 + ld.d $a7, $sp, 336 # 8-byte Folded Reload + bnez $a3, .LBB336_92 +# %bb.73: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $s8, $s5 + sltu $a6, $t3, $s4 + and $a3, $a3, $a6 + ld.d $fp, $sp, 304 # 8-byte Folded Reload + ld.d $s0, $sp, 296 # 8-byte Folded Reload + ld.d $t3, $sp, 344 # 8-byte Folded Reload + bnez $a3, .LBB336_88 +# %bb.74: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + ld.d $a3, $sp, 384 # 8-byte Folded Reload + sltu $a3, $s8, $a3 + sltu $a6, $t7, $s4 + and $a3, $a3, $a6 + bnez $a3, .LBB336_88 +# %bb.75: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $s8, $s6 + ld.d $a2, $sp, 448 # 8-byte Folded Reload + sltu $a6, $a2, $s4 + and $a3, $a3, $a6 + ld.d $s6, $sp, 224 # 8-byte Folded Reload + bnez $a3, .LBB336_87 +# %bb.76: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $s8, $s1 + sltu $a6, $a1, $s4 + and $a3, $a3, $a6 + ld.d $s1, $sp, 328 # 8-byte Folded Reload + ld.d $t8, $sp, 312 # 8-byte Folded Reload + bnez $a3, .LBB336_86 +# %bb.77: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + sltu $a3, $s8, $t1 + sltu $a6, $a4, $s4 + and $a3, $a3, $a6 + ld.d $ra, $sp, 216 # 8-byte Folded Reload + bnez $a3, .LBB336_85 +# %bb.78: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + ld.d $a3, $sp, 280 # 8-byte Folded Reload + sltu $a3, $s8, $a3 + sltu $a6, $t5, $s4 + and $a3, $a3, $a6 + bnez $a3, .LBB336_85 +# %bb.79: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 + ld.d $a3, $sp, 392 # 8-byte Folded Reload + sltu $a3, $s8, $a3 + sltu $a6, $a5, $s4 + and $a3, $a3, $a6 + ld.d $t5, $sp, 272 # 8-byte Folded Reload + bnez $a3, .LBB336_84 +# %bb.80: # %vector.memcheck269 + # in Loop: Header=BB336_27 Depth=1 ld.d $a3, $sp, 352 # 8-byte Folded Reload - sltu $a1, $s2, $a3 - ld.d $a3, $sp, 360 # 8-byte Folded Reload - sltu $a2, $a3, $ra + sltu $a1, $s8, $a3 + ld.d $a3, $sp, 232 # 8-byte Folded Reload + sltu $a2, $a3, $s4 and $a1, $a1, $a2 - ld.d $a6, $sp, 344 # 8-byte Folded Reload - bnez $a1, .LBB336_64 -# %bb.61: # %vector.ph623 - # in Loop: Header=BB336_7 Depth=1 + bnez $a1, .LBB336_84 +# %bb.81: # %vector.ph623 + # in Loop: Header=BB336_27 Depth=1 + ld.d $a2, $sp, 40 # 8-byte Folded Reload + alsl.d $s7, $s1, $a2, 2 + ld.d $a6, $sp, 8 # 8-byte Folded Reload + alsl.d $a1, $a7, $a6, 2 + move $a3, $s1 + alsl.d $s1, $t3, $a6, 2 + ld.d $t0, $sp, 480 # 8-byte Folded Reload + alsl.d $s2, $t0, $a2, 2 ld.d $a2, $sp, 32 # 8-byte Folded Reload - alsl.d $s7, $fp, $a2, 2 - ld.d $a3, $sp, 8 # 8-byte Folded Reload - alsl.d $a1, $a6, $a3, 2 - alsl.d $s4, $t1, $a3, 2 - alsl.d $s6, $t5, $a2, 2 - ld.d $a2, $sp, 24 # 8-byte Folded Reload - move $t4, $t1 - alsl.d $t1, $fp, $a2, 2 - ld.d $a3, $sp, 40 # 8-byte Folded Reload - add.d $ra, $s3, $a3 - add.d $a6, $a3, $a6 - st.d $a6, $sp, 344 # 8-byte Folded Spill - add.d $t4, $a3, $t4 + alsl.d $t1, $a3, $a2, 2 + ld.d $a6, $sp, 48 # 8-byte Folded Reload + add.d $s5, $fp, $a6 + add.d $a7, $a6, $a7 + st.d $a7, $sp, 336 # 8-byte Folded Spill + add.d $t3, $a6, $t3 + st.d $t3, $sp, 344 # 8-byte Folded Spill + pcalau12i $a2, %pc_hi20(.LCPI336_0) + vld $vr5, $a2, %pc_lo12(.LCPI336_0) pcalau12i $a2, %pc_hi20(.LCPI336_1) - vld $vr5, $a2, %pc_lo12(.LCPI336_1) - pcalau12i $a2, %pc_hi20(.LCPI336_2) - vld $vr6, $a2, %pc_lo12(.LCPI336_2) - add.d $s5, $a3, $t5 - sub.d $s8, $fp, $a3 - add.d $t7, $a3, $fp - ld.d $a2, $sp, 480 # 8-byte Folded Reload - ld.d $fp, $sp, 80 # 8-byte Folded Reload - move $s2, $t0 + vld $vr6, $a2, %pc_lo12(.LCPI336_1) + add.d $s8, $a6, $t0 + sub.d $s4, $a3, $a6 + add.d $t6, $a6, $a3 + ld.d $a2, $sp, 472 # 8-byte Folded Reload + ld.d $t8, $sp, 80 # 8-byte Folded Reload + ld.d $t3, $sp, 24 # 8-byte Folded Reload .p2align 4, , 16 -.LBB336_62: # %vector.body626 - # Parent Loop BB336_7 Depth=1 +.LBB336_82: # %vector.body626 + # Parent Loop BB336_27 Depth=1 # => This Inner Loop Header: Depth=2 fld.s $fa7, $s7, -12 fld.s $ft0, $s7, -4 @@ -80256,10 +80433,10 @@ dradb3: # @dradb3 vextrins.w $vr8, $vr9, 16 vextrins.w $vr8, $vr11, 32 vextrins.w $vr8, $vr12, 48 - fld.s $ft1, $s6, -12 - fld.s $ft2, $s6, -4 - fld.s $ft3, $s6, 4 - fld.s $ft4, $s6, 12 + fld.s $ft1, $s2, -12 + fld.s $ft2, $s2, -4 + fld.s $ft3, $s2, 4 + fld.s $ft4, $s2, 12 vfadd.s $vr8, $vr7, $vr8 vextrins.w $vr9, $vr10, 16 vextrins.w $vr9, $vr11, 32 @@ -80285,10 +80462,10 @@ dradb3: # @dradb3 vextrins.w $vr10, $vr11, 16 vextrins.w $vr10, $vr12, 32 vextrins.w $vr10, $vr13, 48 - fld.s $ft3, $s6, -8 - fld.s $ft0, $s6, 0 - fld.s $ft4, $s6, 8 - fld.s $ft5, $s6, 16 + fld.s $ft3, $s2, -8 + fld.s $ft0, $s2, 0 + fld.s $ft4, $s2, 8 + fld.s $ft5, $s2, 16 vfsub.s $vr9, $vr9, $vr10 vextrins.w $vr11, $vr8, 16 vextrins.w $vr11, $vr12, 32 @@ -80315,26 +80492,27 @@ dradb3: # @dradb3 vextrins.w $vr11, $vr13, 32 vextrins.w $vr11, $vr14, 48 vfsub.s $vr9, $vr10, $vr11 - fld.s $ft2, $s7, -8 - fld.s $ft3, $s7, 0 - fld.s $ft4, $s7, 8 - vfmul.s $vr13, $vr9, $vr1 + vreplgr2vr.w $vr10, $t3 + fld.s $ft3, $s7, -8 + fld.s $ft4, $s7, 0 + fld.s $ft5, $s7, 8 + vfmul.s $vr14, $vr9, $vr10 fld.s $ft1, $s7, 16 - vextrins.w $vr10, $vr11, 16 - vextrins.w $vr10, $vr12, 32 - fld.s $ft3, $t1, 8 - fld.s $ft4, $t1, 0 - fld.s $ft6, $t1, -8 - fld.s $ft7, $t1, -16 - vextrins.w $vr10, $vr9, 48 vextrins.w $vr11, $vr12, 16 - vextrins.w $vr11, $vr14, 32 - vextrins.w $vr11, $vr15, 48 - vfadd.s $vr9, $vr10, $vr11 - vfmul.s $vr10, $vr9, $vr1 + vextrins.w $vr11, $vr13, 32 + fld.s $ft4, $t1, 8 + fld.s $ft5, $t1, 0 + fld.s $ft7, $t1, -8 + fld.s $ft8, $t1, -16 + vextrins.w $vr11, $vr9, 48 + vextrins.w $vr12, $vr13, 16 + vextrins.w $vr12, $vr15, 32 + vextrins.w $vr12, $vr16, 48 + vfadd.s $vr9, $vr11, $vr12 + vfmul.s $vr10, $vr9, $vr10 vfsub.s $vr9, $vr7, $vr10 vfadd.s $vr7, $vr7, $vr10 - vfadd.s $vr10, $vr8, $vr13 + vfadd.s $vr10, $vr8, $vr14 vadd.d $vr11, $vr5, $vr3 vadd.d $vr12, $vr6, $vr3 vpickve2gr.d $a3, $vr12, 0 @@ -80344,29 +80522,29 @@ dradb3: # @dradb3 vpickve2gr.d $a7, $vr11, 0 slli.d $a7, $a7, 2 vpickve2gr.d $t0, $vr11, 1 - slli.d $t3, $t0, 2 + slli.d $t2, $t0, 2 fldx.s $ft3, $a4, $a3 fldx.s $ft4, $a4, $a6 - fldx.s $ft6, $a4, $a7 - fldx.s $ft7, $a4, $t3 - vfsub.s $vr8, $vr8, $vr13 + fldx.s $ft5, $a4, $a7 + fldx.s $ft7, $a4, $t2 + vfsub.s $vr8, $vr8, $vr14 vextrins.w $vr11, $vr12, 16 - vextrins.w $vr11, $vr14, 32 + vextrins.w $vr11, $vr13, 32 vextrins.w $vr11, $vr15, 48 vadd.d $vr12, $vr5, $vr4 vadd.d $vr13, $vr6, $vr4 vpickve2gr.d $t0, $vr13, 0 - slli.d $t6, $t0, 2 + slli.d $t7, $t0, 2 vpickve2gr.d $t0, $vr13, 1 - slli.d $t8, $t0, 2 + slli.d $fp, $t0, 2 vpickve2gr.d $t0, $vr12, 0 slli.d $s0, $t0, 2 vpickve2gr.d $t0, $vr12, 1 - slli.d $s1, $t0, 2 - fldx.s $ft4, $a4, $t6 - fldx.s $ft5, $a4, $t8 + slli.d $s3, $t0, 2 + fldx.s $ft4, $a4, $t7 + fldx.s $ft5, $a4, $fp fldx.s $ft6, $a4, $s0 - fldx.s $ft7, $a4, $s1 + fldx.s $ft7, $a4, $s3 vfmul.s $vr11, $vr11, $vr9 vextrins.w $vr12, $vr13, 16 vextrins.w $vr12, $vr14, 32 @@ -80375,15 +80553,15 @@ dradb3: # @dradb3 fldx.s $ft5, $a4, $a3 fldx.s $ft6, $a4, $a6 fldx.s $ft7, $a4, $a7 - fldx.s $ft8, $a4, $t3 + fldx.s $ft8, $a4, $t2 vfsub.s $vr11, $vr11, $vr12 vextrins.w $vr13, $vr14, 16 vextrins.w $vr13, $vr15, 32 vextrins.w $vr13, $vr16, 48 - fldx.s $ft4, $a4, $t6 - fldx.s $ft6, $a4, $t8 + fldx.s $ft4, $a4, $t7 + fldx.s $ft6, $a4, $fp fldx.s $ft7, $a4, $s0 - fldx.s $ft8, $a4, $s1 + fldx.s $ft8, $a4, $s3 vfmul.s $vr10, $vr10, $vr13 vextrins.w $vr12, $vr14, 16 vextrins.w $vr12, $vr15, 32 @@ -80396,15 +80574,15 @@ dradb3: # @dradb3 fldx.s $ft2, $a5, $a3 fldx.s $ft3, $a5, $a6 fldx.s $ft4, $a5, $a7 - fldx.s $ft5, $a5, $t3 + fldx.s $ft5, $a5, $t2 vst $vr9, $a1, 4 vextrins.w $vr10, $vr11, 16 vextrins.w $vr10, $vr12, 32 vextrins.w $vr10, $vr13, 48 - fldx.s $ft1, $a5, $t6 - fldx.s $ft3, $a5, $t8 + fldx.s $ft1, $a5, $t7 + fldx.s $ft3, $a5, $fp fldx.s $ft4, $a5, $s0 - fldx.s $ft5, $a5, $s1 + fldx.s $ft5, $a5, $s3 vfmul.s $vr10, $vr7, $vr10 vextrins.w $vr9, $vr11, 16 vextrins.w $vr9, $vr12, 32 @@ -80413,15 +80591,15 @@ dradb3: # @dradb3 fldx.s $ft3, $a5, $a3 fldx.s $ft4, $a5, $a6 fldx.s $ft5, $a5, $a7 - fldx.s $ft6, $a5, $t3 + fldx.s $ft6, $a5, $t2 vfsub.s $vr9, $vr10, $vr9 vextrins.w $vr11, $vr12, 16 vextrins.w $vr11, $vr13, 32 vextrins.w $vr11, $vr14, 48 - fldx.s $ft2, $a5, $t6 - fldx.s $ft4, $a5, $t8 + fldx.s $ft2, $a5, $t7 + fldx.s $ft4, $a5, $fp fldx.s $ft5, $a5, $s0 - fldx.s $ft6, $a5, $s1 + fldx.s $ft6, $a5, $s3 vfmul.s $vr8, $vr8, $vr11 vextrins.w $vr10, $vr12, 16 vextrins.w $vr10, $vr13, 32 @@ -80429,147 +80607,141 @@ dradb3: # @dradb3 vfmul.s $vr7, $vr7, $vr10 vfadd.s $vr7, $vr8, $vr7 vilvl.w $vr8, $vr7, $vr9 - vst $vr8, $s4, -12 + vst $vr8, $s1, -12 vilvh.w $vr7, $vr7, $vr9 - vst $vr7, $s4, 4 + vst $vr7, $s1, 4 vaddi.du $vr6, $vr6, 8 vaddi.du $vr5, $vr5, 8 - addi.d $fp, $fp, -4 + addi.d $t8, $t8, -4 addi.d $a2, $a2, 32 addi.d $s7, $s7, 32 addi.d $a1, $a1, 32 - addi.d $s4, $s4, 32 - addi.d $s6, $s6, 32 + addi.d $s1, $s1, 32 + addi.d $s2, $s2, 32 addi.d $t1, $t1, -32 - bnez $fp, .LBB336_62 -# %bb.63: # %middle.block635 - # in Loop: Header=BB336_7 Depth=1 + bnez $t8, .LBB336_82 +# %bb.83: # %middle.block635 + # in Loop: Header=BB336_27 Depth=1 ld.d $s7, $sp, 16 # 8-byte Folded Reload - ld.d $s4, $sp, 320 # 8-byte Folded Reload - ld.d $s6, $sp, 328 # 8-byte Folded Reload - ld.d $fp, $sp, 312 # 8-byte Folded Reload - ld.d $s0, $sp, 208 # 8-byte Folded Reload - ld.d $t8, $sp, 304 # 8-byte Folded Reload - ld.d $s3, $sp, 296 # 8-byte Folded Reload - ld.d $a1, $sp, 48 # 8-byte Folded Reload + ld.d $s3, $sp, 320 # 8-byte Folded Reload + ld.d $s1, $sp, 328 # 8-byte Folded Reload + ld.d $s6, $sp, 224 # 8-byte Folded Reload + ld.d $t8, $sp, 312 # 8-byte Folded Reload + ld.d $fp, $sp, 304 # 8-byte Folded Reload + ld.d $s0, $sp, 296 # 8-byte Folded Reload + ld.d $a1, $sp, 56 # 8-byte Folded Reload ld.d $a2, $sp, 80 # 8-byte Folded Reload - move $t1, $t4 - ld.d $a6, $sp, 344 # 8-byte Folded Reload - ld.d $s1, $sp, 72 # 8-byte Folded Reload - bne $a1, $a2, .LBB336_9 - b .LBB336_11 -.LBB336_64: # in Loop: Header=BB336_7 Depth=1 - move $ra, $s3 - move $s5, $t5 - move $s8, $fp - move $t7, $fp - b .LBB336_67 -.LBB336_65: # in Loop: Header=BB336_7 Depth=1 - move $ra, $s3 - move $s5, $t5 - move $s8, $fp - move $t7, $fp -.LBB336_66: # %scalar.ph621.preheader - # in Loop: Header=BB336_7 Depth=1 - ld.d $a6, $sp, 344 # 8-byte Folded Reload -.LBB336_67: # %scalar.ph621.preheader - # in Loop: Header=BB336_7 Depth=1 - move $s2, $t0 - ld.d $s1, $sp, 72 # 8-byte Folded Reload - b .LBB336_9 -.LBB336_68: # in Loop: Header=BB336_7 Depth=1 - move $ra, $s3 - move $s5, $t5 - ld.d $fp, $sp, 312 # 8-byte Folded Reload - move $s8, $fp - move $t7, $fp - ld.d $s4, $sp, 320 # 8-byte Folded Reload - b .LBB336_66 -.LBB336_69: # in Loop: Header=BB336_7 Depth=1 - move $ra, $s3 - ld.d $t5, $sp, 336 # 8-byte Folded Reload - move $s5, $t5 - ld.d $fp, $sp, 312 # 8-byte Folded Reload - move $s8, $fp - move $t7, $fp - ld.d $s4, $sp, 320 # 8-byte Folded Reload - ld.d $s6, $sp, 328 # 8-byte Folded Reload - b .LBB336_66 -.LBB336_70: # in Loop: Header=BB336_7 Depth=1 - ld.d $s3, $sp, 296 # 8-byte Folded Reload - move $ra, $s3 - ld.d $t5, $sp, 336 # 8-byte Folded Reload - move $s5, $t5 - ld.d $fp, $sp, 312 # 8-byte Folded Reload - move $s8, $fp - move $t7, $fp - ld.d $s4, $sp, 320 # 8-byte Folded Reload - ld.d $s6, $sp, 328 # 8-byte Folded Reload - ld.d $s0, $sp, 208 # 8-byte Folded Reload - ld.d $t8, $sp, 304 # 8-byte Folded Reload - b .LBB336_66 -.LBB336_71: # in Loop: Header=BB336_7 Depth=1 - ld.d $s3, $sp, 296 # 8-byte Folded Reload - move $ra, $s3 - ld.d $t5, $sp, 336 # 8-byte Folded Reload -.LBB336_72: # %scalar.ph621.preheader - # in Loop: Header=BB336_7 Depth=1 - move $s5, $t5 - ld.d $fp, $sp, 312 # 8-byte Folded Reload - move $s8, $fp - move $t7, $fp - ld.d $s2, $sp, 192 # 8-byte Folded Reload - ld.d $s4, $sp, 320 # 8-byte Folded Reload - ld.d $s6, $sp, 328 # 8-byte Folded Reload - ld.d $s0, $sp, 208 # 8-byte Folded Reload - ld.d $t8, $sp, 304 # 8-byte Folded Reload - ld.d $s1, $sp, 72 # 8-byte Folded Reload - ld.d $a6, $sp, 344 # 8-byte Folded Reload - b .LBB336_9 -.LBB336_73: # in Loop: Header=BB336_7 Depth=1 - ld.d $s3, $sp, 296 # 8-byte Folded Reload - move $ra, $s3 - ld.d $t5, $sp, 336 # 8-byte Folded Reload - move $s5, $t5 - ld.d $fp, $sp, 312 # 8-byte Folded Reload - move $s8, $fp - move $t7, $fp - ld.d $s2, $sp, 192 # 8-byte Folded Reload - ld.d $s4, $sp, 320 # 8-byte Folded Reload - ld.d $s6, $sp, 328 # 8-byte Folded Reload - ld.d $s0, $sp, 208 # 8-byte Folded Reload - ld.d $t8, $sp, 304 # 8-byte Folded Reload - ld.d $a6, $sp, 344 # 8-byte Folded Reload - ld.d $s1, $sp, 72 # 8-byte Folded Reload - b .LBB336_9 -.LBB336_74: # in Loop: Header=BB336_7 Depth=1 - ld.d $s3, $sp, 296 # 8-byte Folded Reload - move $ra, $s3 -.LBB336_75: # %scalar.ph621.preheader - # in Loop: Header=BB336_7 Depth=1 - move $s5, $t5 - ld.d $fp, $sp, 312 # 8-byte Folded Reload - move $s8, $fp - move $t7, $fp - ld.d $s2, $sp, 192 # 8-byte Folded Reload - ld.d $s4, $sp, 320 # 8-byte Folded Reload - ld.d $s6, $sp, 328 # 8-byte Folded Reload - ld.d $s0, $sp, 208 # 8-byte Folded Reload - ld.d $t8, $sp, 304 # 8-byte Folded Reload - ld.d $s1, $sp, 72 # 8-byte Folded Reload - ld.d $t1, $sp, 256 # 8-byte Folded Reload - ld.d $a6, $sp, 344 # 8-byte Folded Reload - b .LBB336_9 -.LBB336_76: # in Loop: Header=BB336_7 Depth=1 - ld.d $s3, $sp, 296 # 8-byte Folded Reload - move $ra, $s3 - ld.d $t5, $sp, 336 # 8-byte Folded Reload - b .LBB336_75 -.LBB336_77: # in Loop: Header=BB336_7 Depth=1 - ld.d $s3, $sp, 296 # 8-byte Folded Reload - move $ra, $s3 - b .LBB336_72 -.LBB336_78: + ld.d $t3, $sp, 344 # 8-byte Folded Reload + ld.d $a7, $sp, 336 # 8-byte Folded Reload + ld.d $ra, $sp, 216 # 8-byte Folded Reload + ld.d $s2, $sp, 200 # 8-byte Folded Reload + bne $a1, $a2, .LBB336_29 + b .LBB336_31 +.LBB336_84: # in Loop: Header=BB336_27 Depth=1 + move $s5, $fp + ld.d $s8, $sp, 480 # 8-byte Folded Reload + move $s4, $s1 + move $t6, $s1 + ld.d $s2, $sp, 200 # 8-byte Folded Reload + b .LBB336_29 +.LBB336_85: # in Loop: Header=BB336_27 Depth=1 + move $s5, $fp + ld.d $s8, $sp, 480 # 8-byte Folded Reload + move $s4, $s1 + move $t6, $s1 + b .LBB336_91 +.LBB336_86: # in Loop: Header=BB336_27 Depth=1 + move $s5, $fp + ld.d $s8, $sp, 480 # 8-byte Folded Reload + move $s4, $s1 + move $t6, $s1 + b .LBB336_90 +.LBB336_87: # in Loop: Header=BB336_27 Depth=1 + move $s5, $fp + ld.d $s8, $sp, 480 # 8-byte Folded Reload + ld.d $s1, $sp, 328 # 8-byte Folded Reload + move $s4, $s1 + move $t6, $s1 + b .LBB336_89 +.LBB336_88: # in Loop: Header=BB336_27 Depth=1 + move $s5, $fp + ld.d $s8, $sp, 480 # 8-byte Folded Reload + ld.d $s1, $sp, 328 # 8-byte Folded Reload + move $s4, $s1 + move $t6, $s1 + ld.d $s6, $sp, 224 # 8-byte Folded Reload +.LBB336_89: # %scalar.ph621.preheader + # in Loop: Header=BB336_27 Depth=1 + ld.d $t8, $sp, 312 # 8-byte Folded Reload +.LBB336_90: # %scalar.ph621.preheader + # in Loop: Header=BB336_27 Depth=1 + ld.d $ra, $sp, 216 # 8-byte Folded Reload +.LBB336_91: # %scalar.ph621.preheader + # in Loop: Header=BB336_27 Depth=1 + ld.d $t5, $sp, 272 # 8-byte Folded Reload + ld.d $s2, $sp, 200 # 8-byte Folded Reload + b .LBB336_29 +.LBB336_92: # in Loop: Header=BB336_27 Depth=1 + ld.d $fp, $sp, 304 # 8-byte Folded Reload + move $s5, $fp + ld.d $s8, $sp, 480 # 8-byte Folded Reload + ld.d $s1, $sp, 328 # 8-byte Folded Reload + move $s4, $s1 + move $t6, $s1 + ld.d $s6, $sp, 224 # 8-byte Folded Reload + ld.d $t8, $sp, 312 # 8-byte Folded Reload + ld.d $s0, $sp, 296 # 8-byte Folded Reload + ld.d $t3, $sp, 344 # 8-byte Folded Reload + b .LBB336_90 +.LBB336_93: # in Loop: Header=BB336_27 Depth=1 + ld.d $fp, $sp, 304 # 8-byte Folded Reload + move $s5, $fp + ld.d $s8, $sp, 480 # 8-byte Folded Reload + ld.d $s1, $sp, 328 # 8-byte Folded Reload + move $s4, $s1 + move $t6, $s1 + ld.d $ra, $sp, 216 # 8-byte Folded Reload + ld.d $s2, $sp, 200 # 8-byte Folded Reload + ld.d $s3, $sp, 320 # 8-byte Folded Reload + ld.d $s6, $sp, 224 # 8-byte Folded Reload + ld.d $t8, $sp, 312 # 8-byte Folded Reload + ld.d $s0, $sp, 296 # 8-byte Folded Reload + ld.d $t3, $sp, 344 # 8-byte Folded Reload + ld.d $a7, $sp, 336 # 8-byte Folded Reload + ld.d $t5, $sp, 272 # 8-byte Folded Reload + b .LBB336_29 +.LBB336_94: # in Loop: Header=BB336_27 Depth=1 + ld.d $fp, $sp, 304 # 8-byte Folded Reload + move $s5, $fp + ld.d $s8, $sp, 480 # 8-byte Folded Reload + ld.d $s1, $sp, 328 # 8-byte Folded Reload + move $s4, $s1 + move $t6, $s1 + ld.d $ra, $sp, 216 # 8-byte Folded Reload + ld.d $s6, $sp, 224 # 8-byte Folded Reload + ld.d $t8, $sp, 312 # 8-byte Folded Reload + ld.d $s0, $sp, 296 # 8-byte Folded Reload + ld.d $t3, $sp, 344 # 8-byte Folded Reload + ld.d $a7, $sp, 336 # 8-byte Folded Reload + b .LBB336_91 +.LBB336_95: # in Loop: Header=BB336_27 Depth=1 + ld.d $fp, $sp, 304 # 8-byte Folded Reload + move $s5, $fp + ld.d $s8, $sp, 480 # 8-byte Folded Reload + ld.d $s1, $sp, 328 # 8-byte Folded Reload + move $s4, $s1 + move $t6, $s1 + ld.d $ra, $sp, 216 # 8-byte Folded Reload + ld.d $s2, $sp, 200 # 8-byte Folded Reload + ld.d $t5, $sp, 272 # 8-byte Folded Reload + ld.d $s3, $sp, 320 # 8-byte Folded Reload + ld.d $s6, $sp, 224 # 8-byte Folded Reload + ld.d $t8, $sp, 312 # 8-byte Folded Reload + ld.d $s0, $sp, 296 # 8-byte Folded Reload + ld.d $t3, $sp, 344 # 8-byte Folded Reload + ld.d $a7, $sp, 336 # 8-byte Folded Reload + b .LBB336_29 +.LBB336_96: ld.d $s8, $sp, 488 # 8-byte Folded Reload ld.d $s7, $sp, 496 # 8-byte Folded Reload ld.d $s6, $sp, 504 # 8-byte Folded Reload @@ -80582,224 +80754,27 @@ dradb3: # @dradb3 ld.d $fp, $sp, 560 # 8-byte Folded Reload ld.d $ra, $sp, 568 # 8-byte Folded Reload addi.d $sp, $sp, 576 -.LBB336_79: # %.loopexit +.LBB336_97: # %.loopexit ret -.LBB336_80: # %vector.scevcheck - ori $a6, $zero, 1 - move $t1, $zero - bne $a0, $a6, .LBB336_98 -# %bb.81: # %vector.scevcheck - addi.d $a6, $a1, -1 - addi.d $t6, $a2, 4 - bstrpick.d $t8, $a6, 31, 0 - slli.d $a6, $t8, 3 - alsl.d $a6, $t8, $a6, 2 - add.d $a7, $t6, $a6 - bltu $a7, $t6, .LBB336_98 -# %bb.82: # %vector.scevcheck - addi.d $t5, $a2, 8 - add.d $t7, $t5, $a6 - move $a6, $fp - move $a7, $t1 - move $t4, $t1 - bltu $t7, $t5, .LBB336_3 -# %bb.83: # %vector.memcheck - move $ra, $fp - alsl.d $a6, $t8, $t0, 2 - addi.d $fp, $a6, 4 - alsl.d $t4, $a1, $t0, 2 - alsl.d $a6, $t8, $t4, 2 - addi.d $t7, $a6, 4 - sltu $a6, $t0, $t7 - sltu $a7, $t4, $fp - and $a6, $a6, $a7 - move $t1, $zero - bnez $a6, .LBB336_101 -# %bb.84: # %vector.memcheck - slli.w $a6, $a1, 1 - alsl.d $a6, $a6, $t0, 2 - alsl.d $a7, $t8, $a6, 2 - addi.d $a7, $a7, 4 - sltu $s0, $t0, $a7 - sltu $s1, $a6, $fp - and $s0, $s0, $s1 - bnez $s0, .LBB336_102 -# %bb.85: # %vector.memcheck - alsl.d $t8, $t8, $t8, 1 - slli.d $t8, $t8, 2 - add.d $s1, $a2, $t8 - addi.d $t8, $s1, 8 - sltu $s0, $t0, $t8 - sltu $s2, $t6, $fp - and $s0, $s0, $s2 - bnez $s0, .LBB336_102 -# %bb.86: # %vector.memcheck - addi.d $s0, $s1, 4 - sltu $s2, $t0, $s0 - sltu $s3, $a2, $fp - and $s2, $s2, $s3 - bnez $s2, .LBB336_102 -# %bb.87: # %vector.memcheck - addi.d $s1, $s1, 12 - sltu $s2, $t0, $s1 - sltu $fp, $t5, $fp - and $fp, $s2, $fp - bnez $fp, .LBB336_102 -# %bb.88: # %vector.memcheck - sltu $fp, $t4, $a7 - sltu $s2, $a6, $t7 - and $fp, $fp, $s2 - bnez $fp, .LBB336_102 -# %bb.89: # %vector.memcheck - sltu $fp, $t4, $t8 - sltu $s2, $t6, $t7 - and $fp, $fp, $s2 - bnez $fp, .LBB336_102 -# %bb.90: # %vector.memcheck - sltu $fp, $t4, $s0 - sltu $s2, $a2, $t7 - and $fp, $fp, $s2 - bnez $fp, .LBB336_102 -# %bb.91: # %vector.memcheck - sltu $t4, $t4, $s1 - sltu $t7, $t5, $t7 - and $t4, $t4, $t7 - ld.d $a1, $sp, 192 # 8-byte Folded Reload - bnez $t4, .LBB336_101 -# %bb.92: # %vector.memcheck - sltu $t4, $a6, $t8 - sltu $t6, $t6, $a7 - and $t4, $t4, $t6 - move $fp, $ra - bnez $t4, .LBB336_99 -# %bb.93: # %vector.memcheck - sltu $t4, $a6, $s0 - sltu $t6, $a2, $a7 - and $t4, $t4, $t6 - bnez $t4, .LBB336_99 -# %bb.94: # %vector.memcheck - sltu $a6, $a6, $s1 - sltu $a7, $t5, $a7 - and $t5, $a6, $a7 - move $a6, $fp - move $a7, $t1 - move $t4, $t1 - ld.d $s0, $sp, 208 # 8-byte Folded Reload - bnez $t5, .LBB336_3 -# %bb.95: # %vector.ph - st.d $s6, $sp, 328 # 8-byte Folded Spill - move $a3, $s4 - move $t5, $zero - bstrpick.d $t8, $a1, 30, 2 - slli.d $t4, $t8, 2 - mul.d $t1, $t4, $s0 - slli.d $a6, $t8, 3 - alsl.d $a6, $t8, $a6, 2 - addi.d $a6, $a6, 2 - mul.d $a7, $t4, $a0 - slli.d $t6, $s0, 3 - alsl.d $t6, $s0, $t6, 2 - slli.d $t7, $s0, 4 - slli.d $fp, $t8, 5 - alsl.d $t8, $t8, $fp, 4 - slli.d $fp, $a0, 4 - addi.d $s0, $s7, 8 - addi.d $s1, $t6, 8 - ori $s2, $zero, 24 - lu12i.w $s3, -266240 - vreplgr2vr.w $vr0, $s3 - lu12i.w $s3, 259547 - ori $s3, $s3, 983 - vreplgr2vr.w $vr1, $s3 - move $s3, $t0 - .p2align 4, , 16 -.LBB336_96: # %vector.body - # =>This Inner Loop Header: Depth=1 - add.d $s4, $a2, $t5 - add.d $s5, $a2, $s0 - add.d $s6, $a2, $s1 - fld.s $fa2, $s4, 4 - fld.s $fa3, $s5, -4 - fld.s $fa4, $s4, 28 - fld.s $fa5, $s6, -4 - vextrins.w $vr2, $vr3, 16 - vextrins.w $vr2, $vr4, 32 - vextrins.w $vr2, $vr5, 48 - add.d $s5, $a2, $s2 - fld.s $fa3, $s5, -24 - fld.s $fa4, $s5, -12 - fldx.s $fa5, $a2, $s2 - fldx.s $fa6, $a2, $t6 - vfadd.s $vr2, $vr2, $vr2 - vextrins.w $vr3, $vr4, 16 - vextrins.w $vr3, $vr5, 32 - vextrins.w $vr3, $vr6, 48 - vfmul.s $vr4, $vr2, $vr0 - vfadd.s $vr4, $vr3, $vr4 - vfadd.s $vr2, $vr3, $vr2 - fld.s $fa3, $s4, 8 - fldx.s $fa5, $a2, $s0 - fld.s $fa6, $s4, 32 - fldx.s $fa7, $a2, $s1 - vst $vr2, $s3, 0 - vextrins.w $vr3, $vr5, 16 - vextrins.w $vr3, $vr6, 32 - vextrins.w $vr3, $vr7, 48 - vfadd.s $vr2, $vr3, $vr3 - vfmul.s $vr2, $vr2, $vr1 - vfsub.s $vr3, $vr4, $vr2 - vstx $vr3, $s3, $t3 - vfadd.s $vr2, $vr4, $vr2 - vstx $vr2, $s3, $s8 - add.d $t6, $t6, $t7 - addi.d $t5, $t5, 48 - add.d $s3, $s3, $fp - add.d $s2, $s2, $t7 - addi.d $s0, $s0, 48 - addi.d $s1, $s1, 48 - bne $t8, $t5, .LBB336_96 -# %bb.97: # %middle.block - ld.d $t0, $sp, 288 # 8-byte Folded Reload - ld.d $a1, $sp, 192 # 8-byte Folded Reload - move $s4, $a3 - ld.d $s6, $sp, 328 # 8-byte Folded Reload - move $fp, $ra - ld.d $s0, $sp, 208 # 8-byte Folded Reload - bne $t4, $a1, .LBB336_3 - b .LBB336_5 .LBB336_98: - move $a6, $fp - move $a7, $t1 - move $t4, $t1 - b .LBB336_3 + ld.d $s1, $sp, 328 # 8-byte Folded Reload + move $a6, $s1 + move $a7, $t3 + move $t4, $t3 + ld.d $s6, $sp, 224 # 8-byte Folded Reload + b .LBB336_23 .LBB336_99: - move $a6, $fp -.LBB336_100: # %.lr.ph.preheader645 - move $a7, $t1 - move $t4, $t1 - ld.d $s0, $sp, 208 # 8-byte Folded Reload - b .LBB336_3 -.LBB336_101: - move $fp, $ra - move $a6, $ra - b .LBB336_100 -.LBB336_102: - move $fp, $ra - move $a6, $ra - move $a7, $t1 - move $t4, $t1 - ld.d $a1, $sp, 192 # 8-byte Folded Reload - ld.d $s0, $sp, 208 # 8-byte Folded Reload - b .LBB336_3 + ld.d $s1, $sp, 328 # 8-byte Folded Reload + move $a6, $s1 + move $a7, $t3 + move $t4, $t3 + move $s3, $s7 + ld.d $s6, $sp, 224 # 8-byte Folded Reload + b .LBB336_23 .Lfunc_end336: .size dradb3, .Lfunc_end336-dradb3 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function dradbg -.LCPI337_0: - .word 0x40c90fdb # float 6.28318548 - .text - .p2align 5 + .p2align 5 # -- Begin function dradbg .type dradbg,@function dradbg: # @dradbg # %bb.0: @@ -80825,12 +80800,13 @@ dradbg: # @dradbg move $fp, $a2 move $s3, $a1 move $s1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI337_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI337_0) - mul.w $s6, $a1, $s1 - movgr2fr.w $fa1, $a1 - ffint.s.w $fa1, $fa1 - fdiv.s $fa0, $fa0, $fa1 + mul.w $s6, $a1, $a0 + movgr2fr.w $fa0, $a1 + ffint.s.w $fa0, $fa0 + lu12i.w $a0, 265360 + ori $a0, $a0, 4059 + movgr2fr.w $fa1, $a0 + fdiv.s $fa0, $fa1, $fa0 fcvt.d.s $fs1, $fa0 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(cos) diff --git a/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/blas.s b/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/blas.s index 7f25c7d1..3d52d542 100644 --- a/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/blas.s +++ b/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/blas.s @@ -507,14 +507,7 @@ sdot: # @sdot .Lfunc_end4: .size sdot, .Lfunc_end4-sdot # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function snrm2 -.LCPI5_0: - .dword 0x43e158e460913d00 # double 1.0E+19 -.LCPI5_1: - .dword 0x3bfb084b92366cc2 # double 9.1589344358391385E-20 - .text - .globl snrm2 + .globl snrm2 # -- Begin function snrm2 .p2align 5 .type snrm2,@function snrm2: # @snrm2 @@ -528,18 +521,21 @@ snrm2: # @snrm2 blez $a2, .LBB5_5 # %bb.2: move $a3, $zero - pcalau12i $a4, %pc_hi20(.LCPI5_0) - fld.d $fa1, $a4, %pc_lo12(.LCPI5_0) bstrpick.d $a4, $a0, 31, 0 + movgr2fr.d $fa1, $a4 + ffint.d.l $fa1, $fa1 + lu12i.w $a4, 395539 + ori $a4, $a4, 3328 + lu32i.d $a4, 88292 + lu52i.d $a4, $a4, 1086 movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - fdiv.d $fa3, $fa1, $fa2 + fdiv.d $fa3, $fa2, $fa1 slli.d $a4, $a2, 2 movgr2fr.w $fa2, $zero .p2align 4, , 16 .LBB5_3: # =>This Inner Loop Header: Depth=1 - fld.s $fa4, $a1, 0 - fcmp.ceq.s $fcc0, $fa4, $fa2 + fld.s $fa1, $a1, 0 + fcmp.ceq.s $fcc0, $fa1, $fa2 bceqz $fcc0, .LBB5_6 # %bb.4: # in Loop: Header=BB5_3 Depth=1 addi.w $a3, $a3, 1 @@ -551,13 +547,16 @@ snrm2: # @snrm2 addi.d $sp, $sp, 16 ret .LBB5_6: # %.critedge.preheader - fneg.s $fa0, $fa4 - pcalau12i $a5, %pc_hi20(.LCPI5_1) - fld.d $fa1, $a5, %pc_lo12(.LCPI5_1) - fcmp.clt.s $fcc0, $fa2, $fa4 - fsel $fa4, $fa0, $fa4, $fcc0 - fcvt.d.s $fs0, $fa4 - fcmp.clt.d $fcc0, $fa1, $fs0 + fneg.s $fa0, $fa1 + fcmp.clt.s $fcc0, $fa2, $fa1 + fsel $fa1, $fa0, $fa1, $fcc0 + fcvt.d.s $fs0, $fa1 + lu12i.w $a5, -449690 + ori $a5, $a5, 3266 + lu32i.d $a5, -325557 + lu52i.d $a5, $a5, 959 + movgr2fr.d $fa4, $a5 + fcmp.clt.d $fcc0, $fa4, $fs0 bceqz $fcc0, .LBB5_8 # %bb.7: movgr2fr.d $fa1, $zero @@ -574,7 +573,7 @@ snrm2: # @snrm2 b .LBB5_11 .p2align 4, , 16 .LBB5_10: # in Loop: Header=BB5_11 Depth=1 - fdiv.s $fa6, $fa7, $fa4 + fdiv.s $fa6, $fa7, $fa1 fmul.s $fa6, $fa6, $fa6 fcvt.d.s $fa6, $fa6 fadd.d $fa0, $fa0, $fa6 @@ -588,22 +587,22 @@ snrm2: # @snrm2 fcmp.clt.s $fcc0, $fa2, $fa7 fsel $fa6, $fa6, $fa7, $fcc0 fcvt.d.s $ft0, $fa6 - fcmp.clt.d $fcc0, $fa1, $ft0 + fcmp.clt.d $fcc0, $fa4, $ft0 bcnez $fcc0, .LBB5_17 # %bb.12: # in Loop: Header=BB5_11 Depth=1 - fcmp.cule.s $fcc0, $fa6, $fa4 + fcmp.cule.s $fcc0, $fa6, $fa1 bcnez $fcc0, .LBB5_10 # %bb.13: # in Loop: Header=BB5_11 Depth=1 - fdiv.s $fa4, $fa4, $fa7 - fcvt.d.s $fa4, $fa4 - fmul.d $fa0, $fa0, $fa4 - fmadd.d $fa0, $fa0, $fa4, $fa5 - fmov.s $fa4, $fa6 + fdiv.s $fa1, $fa1, $fa7 + fcvt.d.s $fa1, $fa1 + fmul.d $fa0, $fa0, $fa1 + fmadd.d $fa0, $fa0, $fa1, $fa5 + fmov.s $fa1, $fa6 addi.w $a3, $a3, 1 add.d $a1, $a1, $a4 bne $a0, $a3, .LBB5_11 .LBB5_14: # %._crit_edge.loopexit - fcvt.d.s $fs0, $fa4 + fcvt.d.s $fs0, $fa1 fsqrt.d $fa1, $fa0 fcmp.cor.d $fcc0, $fa1, $fa1 bcnez $fcc0, .LBB5_30 @@ -617,7 +616,7 @@ snrm2: # @snrm2 # kill: def $f0_64 killed $f0_64 killed $vr0 b .LBB5_31 .LBB5_17: # %.preheader.loopexit - fcvt.d.s $fa1, $fa4 + fcvt.d.s $fa1, $fa1 fmul.d $fa0, $fa0, $fa1 fmul.d $fa1, $fa0, $fa1 bge $a3, $a0, .LBB5_21 @@ -708,18 +707,13 @@ snrm2: # @snrm2 .Lfunc_end5: .size snrm2, .Lfunc_end5-snrm2 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function r1mach -.LCPI6_0: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 - .text - .globl r1mach + .globl r1mach # -- Begin function r1mach .p2align 5 .type r1mach,@function r1mach: # @r1mach # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI6_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI6_0) + lu52i.d $a0, $zero, 1000 + movgr2fr.d $fa0, $a0 ret .Lfunc_end6: .size r1mach, .Lfunc_end6-r1mach diff --git a/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/driver.s b/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/driver.s index 481ab37c..935780dd 100644 --- a/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/driver.s +++ b/results/MultiSource/Applications/sgefa/CMakeFiles/sgefa.dir/driver.s @@ -245,14 +245,6 @@ main: # @main .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI1_1: - .dword 0x47d2ced32a16a1b1 # double 9.9999999999999997E+37 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI1_2: - .word 0x42c80000 # float 100 .text .globl matgen .p2align 5 @@ -589,8 +581,8 @@ matgen: # @matgen move $t2, $zero addi.d $a1, $s6, -7 sltui $a1, $a1, 1 - pcalau12i $a2, %pc_hi20(.LCPI1_2) - fld.s $fa1, $a2, %pc_lo12(.LCPI1_2) + lu12i.w $a2, 273536 + movgr2fr.w $fa1, $a2 vldi $vr2, -1168 movgr2cf $fcc0, $a1 addi.d $a1, $s6, -8 @@ -1077,11 +1069,10 @@ matgen: # @matgen slli.d $a3, $a4, 2 slli.d $a4, $a4, 4 vreplvei.d $vr1, $vr0, 0 - pcalau12i $a5, %pc_hi20(.LCPI1_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI1_1) pcalau12i $a5, %pc_hi20(.LCPI1_0) - vld $vr3, $a5, %pc_lo12(.LCPI1_0) + vld $vr2, $a5, %pc_lo12(.LCPI1_0) ori $a5, $zero, 4 + movgr2fr.d $fa3, $s5 vrepli.b $vr4, 0 vreplgr2vr.d $vr5, $s5 b .LBB1_102 @@ -1119,7 +1110,7 @@ matgen: # @matgen vreplgr2vr.w $vr8, $a6 vreplvei.w $vr9, $vr6, 0 move $t1, $a3 - vori.b $vr10, $vr3, 0 + vori.b $vr10, $vr2, 0 .p2align 4, , 16 .LBB1_105: # %vector.body632 # Parent Loop BB1_102 Depth=1 @@ -1184,7 +1175,7 @@ matgen: # @matgen ffint.s.l $fa7, $fa7 fdiv.s $fa7, $fa6, $fa7 fcvt.d.s $fa7, $fa7 - fmul.d $fa7, $fa7, $fa2 + fmul.d $fa7, $fa7, $fa3 fdiv.d $fa7, $fa7, $fa0 fcvt.s.d $fa7, $fa7 fst.s $fa7, $a7, 0 @@ -1417,17 +1408,16 @@ matgen: # @matgen slli.d $a3, $a4, 2 slli.d $a4, $a4, 4 vreplvei.w $vr1, $vr0, 0 - pcalau12i $a5, %pc_hi20(.LCPI1_1) - fld.d $fa2, $a5, %pc_lo12(.LCPI1_1) - pcalau12i $a5, %pc_hi20(.LCPI1_0) - vld $vr3, $a5, %pc_lo12(.LCPI1_0) - lu12i.w $a5, 172394 - ori $a5, $a5, 433 - lu32i.d $a5, 184019 - lu52i.d $a5, $a5, 1149 - vreplgr2vr.d $vr4, $a5 ori $a5, $zero, 4 - vrepli.b $vr5, 0 + pcalau12i $a6, %pc_hi20(.LCPI1_0) + vld $vr2, $a6, %pc_lo12(.LCPI1_0) + lu12i.w $a6, 172394 + ori $a6, $a6, 433 + lu32i.d $a6, 184019 + lu52i.d $a6, $a6, 1149 + movgr2fr.d $fa3, $a6 + vrepli.b $vr4, 0 + vreplgr2vr.d $vr5, $a6 b .LBB1_139 .LBB1_138: # %._crit_edge461 # in Loop: Header=BB1_139 Depth=1 @@ -1463,13 +1453,13 @@ matgen: # @matgen vreplgr2vr.w $vr8, $a6 vreplvei.w $vr9, $vr6, 0 move $t1, $a3 - vori.b $vr10, $vr3, 0 + vori.b $vr10, $vr2, 0 .p2align 4, , 16 .LBB1_142: # %vector.body # Parent Loop BB1_139 Depth=1 # => This Inner Loop Header: Depth=2 - vilvh.w $vr11, $vr5, $vr10 - vilvl.w $vr12, $vr5, $vr10 + vilvh.w $vr11, $vr4, $vr10 + vilvl.w $vr12, $vr4, $vr10 vslt.du $vr12, $vr7, $vr12 vslt.du $vr11, $vr7, $vr11 vpickev.w $vr11, $vr11, $vr12 @@ -1488,8 +1478,8 @@ matgen: # @matgen vreplvei.w $vr11, $vr11, 2 fcvt.d.s $ft3, $ft3 vextrins.d $vr11, $vr12, 16 - vfdiv.d $vr11, $vr11, $vr4 - vfdiv.d $vr12, $vr13, $vr4 + vfdiv.d $vr11, $vr11, $vr5 + vfdiv.d $vr12, $vr13, $vr5 vreplvei.d $vr13, $vr12, 1 fcvt.s.d $ft5, $ft5 vreplvei.d $vr12, $vr12, 0 @@ -1528,7 +1518,7 @@ matgen: # @matgen fdiv.s $fa7, $fa7, $fa6 fmul.s $fa7, $fa7, $fa0 fcvt.d.s $fa7, $fa7 - fdiv.d $fa7, $fa7, $fa2 + fdiv.d $fa7, $fa7, $fa3 fcvt.s.d $fa7, $fa7 fst.s $fa7, $a7, 0 addi.d $a7, $a7, 4 diff --git a/results/MultiSource/Applications/siod/CMakeFiles/siod.dir/slib.s b/results/MultiSource/Applications/siod/CMakeFiles/siod.dir/slib.s index 63b9145b..1d69ae9a 100644 --- a/results/MultiSource/Applications/siod/CMakeFiles/siod.dir/slib.s +++ b/results/MultiSource/Applications/siod/CMakeFiles/siod.dir/slib.s @@ -1466,12 +1466,7 @@ vload: # @vload .Lfunc_end12: .size vload, .Lfunc_end12-vload # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function repl -.LCPI13_0: - .dword 0x404e000000000000 # double 60 - .text - .globl repl + .globl repl # -- Begin function repl .p2align 5 .type repl,@function repl: # @repl @@ -1501,8 +1496,10 @@ repl: # @repl st.d $a0, $sp, 88 # 8-byte Folded Spill pcalau12i $s3, %pc_hi20(siod_verbose_level) ori $s0, $zero, 2 - pcalau12i $a0, %pc_hi20(.LCPI13_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI13_0) + ori $a0, $zero, 0 + lu32i.d $a0, -131072 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs0, $a0 pcalau12i $a0, %pc_hi20(tkbuffer) st.d $a0, $sp, 64 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(old_heap_used) @@ -2015,12 +2012,7 @@ ignore_print: # @ignore_print .Lfunc_end20: .size ignore_print, .Lfunc_end20-ignore_print # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function myruntime -.LCPI21_0: - .dword 0x404e000000000000 # double 60 - .text - .globl myruntime + .globl myruntime # -- Begin function myruntime .p2align 5 .type myruntime,@function myruntime: # @myruntime @@ -2031,14 +2023,16 @@ myruntime: # @myruntime pcaddu18i $ra, %call36(times) jirl $ra, $ra, 0 ld.d $a0, $sp, 8 + ld.d $a1, $sp, 16 movgr2fr.d $fa0, $a0 - ld.d $a0, $sp, 16 ffint.d.l $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI21_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI21_0) - movgr2fr.d $fa2, $a0 - ffint.d.l $fa2, $fa2 - fadd.d $fa0, $fa0, $fa2 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + fadd.d $fa0, $fa0, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -131072 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 ld.d $ra, $sp, 40 # 8-byte Folded Reload addi.d $sp, $sp, 48 @@ -2046,12 +2040,7 @@ myruntime: # @myruntime .Lfunc_end21: .size myruntime, .Lfunc_end21-myruntime # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function myrealtime -.LCPI22_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl myrealtime + .globl myrealtime # -- Begin function myrealtime .p2align 5 .type myrealtime,@function myrealtime: # @myrealtime @@ -2070,14 +2059,17 @@ myrealtime: # @myrealtime ret .LBB22_2: ld.d $a0, $sp, 8 + ld.d $a1, $sp, 16 movgr2fr.d $fa0, $a0 - ld.d $a0, $sp, 16 - pcalau12i $a1, %pc_hi20(.LCPI22_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI22_0) ffint.d.l $fa0, $fa0 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 movgr2fr.d $fa2, $a0 - ffint.d.l $fa2, $fa2 - fmadd.d $fa0, $fa2, $fa1, $fa0 + fmadd.d $fa0, $fa1, $fa2, $fa0 ld.d $ra, $sp, 24 # 8-byte Folded Reload addi.d $sp, $sp, 32 ret @@ -10470,12 +10462,7 @@ gc_kind_check: # @gc_kind_check .Lfunc_end109: .size gc_kind_check, .Lfunc_end109-gc_kind_check # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gc_mark_and_sweep -.LCPI110_0: - .dword 0x404e000000000000 # double 60 - .text - .globl gc_mark_and_sweep + .globl gc_mark_and_sweep # -- Begin function gc_mark_and_sweep .p2align 5 .type gc_mark_and_sweep,@function gc_mark_and_sweep: # @gc_mark_and_sweep @@ -10492,29 +10479,30 @@ gc_mark_and_sweep: # @gc_mark_and_sweep st.d $s6, $sp, 152 # 8-byte Folded Spill st.d $s7, $sp, 144 # 8-byte Folded Spill st.d $s8, $sp, 136 # 8-byte Folded Spill - fst.d $fs0, $sp, 128 # 8-byte Folded Spill - addi.d $a0, $sp, 96 + addi.d $a0, $sp, 104 pcaddu18i $ra, %call36(times) jirl $ra, $ra, 0 - ld.d $a0, $sp, 96 - movgr2fr.d $fa0, $a0 ld.d $a0, $sp, 104 + ld.d $a1, $sp, 112 + movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - pcalau12i $a2, %pc_hi20(.LCPI110_0) - fld.d $fs0, $a2, %pc_lo12(.LCPI110_0) - movgr2fr.d $fa1, $a0 + movgr2fr.d $fa1, $a1 ffint.d.l $fa1, $fa1 + ori $a0, $zero, 0 fadd.d $fa0, $fa0, $fa1 - fdiv.d $fa0, $fa0, $fs0 - pcalau12i $a2, %pc_hi20(gc_rt) + lu32i.d $a0, -131072 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + pcalau12i $a1, %pc_hi20(gc_rt) pcalau12i $a0, %pc_hi20(gc_status_flag) - st.d $a0, $sp, 56 # 8-byte Folded Spill + st.d $a0, $sp, 64 # 8-byte Folded Spill ld.d $a0, $a0, %pc_lo12(gc_status_flag) - st.d $a2, $sp, 72 # 8-byte Folded Spill - fst.d $fa0, $a2, %pc_lo12(gc_rt) - pcalau12i $a2, %pc_hi20(gc_cells_collected) - st.d $a2, $sp, 80 # 8-byte Folded Spill - st.d $zero, $a2, %pc_lo12(gc_cells_collected) + st.d $a1, $sp, 80 # 8-byte Folded Spill + fst.d $fa0, $a1, %pc_lo12(gc_rt) + pcalau12i $a1, %pc_hi20(gc_cells_collected) + st.d $a1, $sp, 88 # 8-byte Folded Spill + st.d $zero, $a1, %pc_lo12(gc_cells_collected) pcalau12i $fp, %pc_hi20(siod_verbose_level) beqz $a0, .LBB110_3 # %bb.1: @@ -10591,18 +10579,18 @@ gc_mark_and_sweep: # @gc_mark_and_sweep .LBB110_11: # %._crit_edge st.d $a3, $a0, %pc_lo12(heap) .LBB110_12: - st.d $fp, $sp, 64 # 8-byte Folded Spill + st.d $fp, $sp, 72 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(save_regs_gc_mark) addi.d $fp, $a0, %pc_lo12(save_regs_gc_mark) move $a0, $fp pcaddu18i $ra, %call36(getcontext) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(nheaps) - st.d $a0, $sp, 48 # 8-byte Folded Spill + st.d $a0, $sp, 56 # 8-byte Folded Spill ld.d $a0, $a0, %pc_lo12(nheaps) pcalau12i $s6, %pc_hi20(heaps) pcalau12i $a7, %pc_hi20(heap_size) - st.d $a7, $sp, 40 # 8-byte Folded Spill + st.d $a7, $sp, 48 # 8-byte Folded Spill blez $a0, .LBB110_24 # %bb.13: # %.split.preheader ori $a1, $s0, 2731 @@ -10621,8 +10609,8 @@ gc_mark_and_sweep: # @gc_mark_and_sweep # in Loop: Header=BB110_16 Depth=1 pcaddu18i $ra, %call36(gc_mark) jirl $ra, $ra, 0 - ld.d $a7, $sp, 40 # 8-byte Folded Reload - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a7, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 56 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(nheaps) .p2align 4, , 16 .LBB110_15: # %looks_pointerp.exit.i.thread @@ -10706,13 +10694,13 @@ gc_mark_and_sweep: # @gc_mark_and_sweep bnez $a2, .LBB110_29 b .LBB110_26 .LBB110_30: # %mark_protected_registers.exit.loopexit - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 56 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(nheaps) - ld.d $a7, $sp, 40 # 8-byte Folded Reload + ld.d $a7, $sp, 48 # 8-byte Folded Reload .LBB110_31: # %mark_protected_registers.exit pcalau12i $a1, %pc_hi20(stack_start_ptr) ld.d $a1, $a1, %pc_lo12(stack_start_ptr) - addi.d $a2, $sp, 88 + addi.d $a2, $sp, 96 sltu $a3, $a2, $a1 sub.d $a4, $a2, $a1 masknez $a4, $a4, $a3 @@ -10742,8 +10730,8 @@ gc_mark_and_sweep: # @gc_mark_and_sweep # in Loop: Header=BB110_36 Depth=1 pcaddu18i $ra, %call36(gc_mark) jirl $ra, $ra, 0 - ld.d $a7, $sp, 40 # 8-byte Folded Reload - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a7, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 56 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(nheaps) .p2align 4, , 16 .LBB110_35: # %looks_pointerp.exit.i7.thread @@ -10794,24 +10782,24 @@ gc_mark_and_sweep: # @gc_mark_and_sweep blez $a0, .LBB110_64 # %bb.45: # %.lr.ph48.preheader move $a2, $zero - st.d $zero, $sp, 32 # 8-byte Folded Spill + st.d $zero, $sp, 40 # 8-byte Folded Spill ori $s4, $zero, 12 ori $t0, $zero, 3 pcalau12i $a1, %pc_hi20(user_types) - st.d $a1, $sp, 24 # 8-byte Folded Spill + st.d $a1, $sp, 32 # 8-byte Folded Spill lu12i.w $a1, 1 ori $t1, $a1, 3904 pcalau12i $a1, %pc_hi20(.L.str.51) addi.d $a1, $a1, %pc_lo12(.L.str.51) - st.d $a1, $sp, 8 # 8-byte Folded Spill + st.d $a1, $sp, 16 # 8-byte Folded Spill move $s0, $zero move $a1, $zero - st.d $s6, $sp, 16 # 8-byte Folded Spill + st.d $s6, $sp, 24 # 8-byte Folded Spill b .LBB110_48 .p2align 4, , 16 .LBB110_46: # %mark_locations_array.exit17.loopexit # in Loop: Header=BB110_48 Depth=1 - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 56 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(nheaps) move $a2, $s5 .LBB110_47: # %mark_locations_array.exit17 @@ -10830,7 +10818,7 @@ gc_mark_and_sweep: # @gc_mark_and_sweep beqz $s2, .LBB110_47 # %bb.49: # in Loop: Header=BB110_48 Depth=1 ld.d $a4, $a7, %pc_lo12(heap_size) - ld.d $a5, $sp, 32 # 8-byte Folded Reload + ld.d $a5, $sp, 40 # 8-byte Folded Reload bge $a5, $a4, .LBB110_47 # %bb.50: # %.lr.ph43.preheader # in Loop: Header=BB110_48 Depth=1 @@ -10870,7 +10858,7 @@ gc_mark_and_sweep: # @gc_mark_and_sweep addi.d $a0, $s7, -19 bltu $a0, $t0, .LBB110_55 # %bb.57: # in Loop: Header=BB110_53 Depth=2 - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(user_types) move $s3, $a0 bnez $a0, .LBB110_61 @@ -10882,23 +10870,23 @@ gc_mark_and_sweep: # @gc_mark_and_sweep move $s3, $a0 bnez $a0, .LBB110_60 # %bb.59: # in Loop: Header=BB110_53 Depth=2 - ld.d $a0, $sp, 8 # 8-byte Folded Reload - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 40 # 8-byte Folded Reload pcaddu18i $ra, %call36(err) jirl $ra, $ra, 0 .LBB110_60: # %must_malloc.exit.i # in Loop: Header=BB110_53 Depth=2 - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 32 # 8-byte Folded Reload st.d $s3, $a0, %pc_lo12(user_types) move $a0, $s3 - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ld.d $a1, $sp, 40 # 8-byte Folded Reload move $a2, $s6 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.d $a7, $sp, 40 # 8-byte Folded Reload + ld.d $a7, $sp, 48 # 8-byte Folded Reload ori $t0, $zero, 3 move $t1, $s6 - ld.d $s6, $sp, 16 # 8-byte Folded Reload + ld.d $s6, $sp, 24 # 8-byte Folded Reload .LBB110_61: # in Loop: Header=BB110_53 Depth=2 bstrpick.d $a0, $s7, 15, 0 ori $a1, $zero, 99 @@ -10916,7 +10904,7 @@ gc_mark_and_sweep: # @gc_mark_and_sweep jirl $ra, $a1, 0 move $t1, $fp ori $t0, $zero, 3 - ld.d $a7, $sp, 40 # 8-byte Folded Reload + ld.d $a7, $sp, 48 # 8-byte Folded Reload b .LBB110_55 .LBB110_64: move $a3, $zero @@ -10926,26 +10914,30 @@ gc_mark_and_sweep: # @gc_mark_and_sweep move $a4, $zero move $a3, $zero .LBB110_66: # %gc_sweep.exit - ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $fp, $sp, 88 # 8-byte Folded Reload st.d $a3, $fp, %pc_lo12(gc_cells_collected) pcalau12i $a0, %pc_hi20(freelist) st.d $a4, $a0, %pc_lo12(freelist) - addi.d $a0, $sp, 96 + addi.d $a0, $sp, 104 pcaddu18i $ra, %call36(times) jirl $ra, $ra, 0 - ld.d $a0, $sp, 96 - ld.d $a1, $sp, 104 + ld.d $a0, $sp, 104 + ld.d $a1, $sp, 112 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 movgr2fr.d $fa1, $a1 ffint.d.l $fa1, $fa1 + ori $a0, $zero, 0 fadd.d $fa0, $fa0, $fa1 - fdiv.d $fa0, $fa0, $fs0 - ld.d $a2, $sp, 72 # 8-byte Folded Reload + lu32i.d $a0, -131072 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.d $a2, $sp, 80 # 8-byte Folded Reload fld.d $fa1, $a2, %pc_lo12(gc_rt) pcalau12i $a0, %pc_hi20(gc_time_taken) fld.d $fa2, $a0, %pc_lo12(gc_time_taken) - ld.d $a1, $sp, 56 # 8-byte Folded Reload + ld.d $a1, $sp, 64 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(gc_status_flag) fsub.d $fa0, $fa0, $fa1 fst.d $fa0, $a2, %pc_lo12(gc_rt) @@ -10953,7 +10945,7 @@ gc_mark_and_sweep: # @gc_mark_and_sweep fst.d $fa1, $a0, %pc_lo12(gc_time_taken) beqz $a1, .LBB110_69 # %bb.67: # %gc_sweep.exit - ld.d $a0, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(siod_verbose_level) ori $a1, $zero, 4 blt $a0, $a1, .LBB110_69 @@ -10965,7 +10957,6 @@ gc_mark_and_sweep: # @gc_mark_and_sweep pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 .LBB110_69: # %gc_ms_stats_end.exit - fld.d $fs0, $sp, 128 # 8-byte Folded Reload ld.d $s8, $sp, 136 # 8-byte Folded Reload ld.d $s7, $sp, 144 # 8-byte Folded Reload ld.d $s6, $sp, 152 # 8-byte Folded Reload @@ -10988,12 +10979,7 @@ gc_mark_and_sweep: # @gc_mark_and_sweep .Lfunc_end110: .size gc_mark_and_sweep, .Lfunc_end110-gc_mark_and_sweep # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gc_ms_stats_start -.LCPI111_0: - .dword 0x404e000000000000 # double 60 - .text - .globl gc_ms_stats_start + .globl gc_ms_stats_start # -- Begin function gc_ms_stats_start .p2align 5 .type gc_ms_stats_start,@function gc_ms_stats_start: # @gc_ms_stats_start @@ -11004,14 +10990,16 @@ gc_ms_stats_start: # @gc_ms_stats_start pcaddu18i $ra, %call36(times) jirl $ra, $ra, 0 ld.d $a0, $sp, 8 + ld.d $a1, $sp, 16 movgr2fr.d $fa0, $a0 - ld.d $a0, $sp, 16 ffint.d.l $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI111_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI111_0) - movgr2fr.d $fa2, $a0 - ffint.d.l $fa2, $fa2 - fadd.d $fa0, $fa0, $fa2 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + fadd.d $fa0, $fa0, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -131072 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 pcalau12i $a0, %pc_hi20(gc_status_flag) ld.d $a0, $a0, %pc_lo12(gc_status_flag) @@ -11352,12 +11340,7 @@ gc_sweep: # @gc_sweep .Lfunc_end114: .size gc_sweep, .Lfunc_end114-gc_sweep # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gc_ms_stats_end -.LCPI115_0: - .dword 0x404e000000000000 # double 60 - .text - .globl gc_ms_stats_end + .globl gc_ms_stats_end # -- Begin function gc_ms_stats_end .p2align 5 .type gc_ms_stats_end,@function gc_ms_stats_end: # @gc_ms_stats_end @@ -11368,14 +11351,16 @@ gc_ms_stats_end: # @gc_ms_stats_end pcaddu18i $ra, %call36(times) jirl $ra, $ra, 0 ld.d $a0, $sp, 8 + ld.d $a1, $sp, 16 movgr2fr.d $fa0, $a0 - ld.d $a0, $sp, 16 ffint.d.l $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI115_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI115_0) - movgr2fr.d $fa2, $a0 - ffint.d.l $fa2, $fa2 - fadd.d $fa0, $fa0, $fa2 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + fadd.d $fa0, $fa0, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -131072 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 pcalau12i $a0, %pc_hi20(gc_rt) fld.d $fa1, $a0, %pc_lo12(gc_rt) @@ -27548,12 +27533,7 @@ siod_verbose_check: # @siod_verbose_check .Lfunc_end196: .size siod_verbose_check, .Lfunc_end196-siod_verbose_check # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function lruntime -.LCPI197_0: - .dword 0x404e000000000000 # double 60 - .text - .globl lruntime + .globl lruntime # -- Begin function lruntime .p2align 5 .type lruntime,@function lruntime: # @lruntime @@ -27576,12 +27556,14 @@ lruntime: # @lruntime ffint.d.l $fa0, $fa0 movgr2fr.d $fa1, $a1 ffint.d.l $fa1, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI197_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI197_0) + fadd.d $fa0, $fa0, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -131072 + lu52i.d $a1, $a0, 1028 pcalau12i $s2, %pc_hi20(inums_dim) ld.d $a0, $s2, %pc_lo12(inums_dim) - fadd.d $fa0, $fa0, $fa1 - fdiv.d $fs1, $fa0, $fa2 + movgr2fr.d $fa1, $a1 + fdiv.d $fs1, $fa0, $fa1 pcalau12i $s0, %pc_hi20(gc_kind_copying) lu12i.w $s1, 32 blez $a0, .LBB197_5 @@ -27800,12 +27782,7 @@ lruntime: # @lruntime .Lfunc_end197: .size lruntime, .Lfunc_end197-lruntime # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function lrealtime -.LCPI198_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl lrealtime + .globl lrealtime # -- Begin function lrealtime .p2align 5 .type lrealtime,@function lrealtime: # @lrealtime @@ -27828,14 +27805,17 @@ lrealtime: # @lrealtime b .LBB198_7 .LBB198_2: ld.d $a0, $sp, 0 + ld.d $a1, $sp, 8 movgr2fr.d $fa0, $a0 - ld.d $a0, $sp, 8 - pcalau12i $a1, %pc_hi20(.LCPI198_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI198_0) ffint.d.l $fa0, $fa0 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 movgr2fr.d $fa2, $a0 - ffint.d.l $fa2, $fa2 - fmadd.d $fs0, $fa2, $fa1, $fa0 + fmadd.d $fs0, $fa1, $fa2, $fa0 pcalau12i $a0, %pc_hi20(inums_dim) ld.d $a1, $a0, %pc_lo12(inums_dim) blez $a1, .LBB198_7 diff --git a/results/MultiSource/Applications/siod/CMakeFiles/siod.dir/sliba.s b/results/MultiSource/Applications/siod/CMakeFiles/siod.dir/sliba.s index 6d73d6f1..72920e15 100644 --- a/results/MultiSource/Applications/siod/CMakeFiles/siod.dir/sliba.s +++ b/results/MultiSource/Applications/siod/CMakeFiles/siod.dir/sliba.s @@ -1003,12 +1003,8 @@ array_equal: # @array_equal .word .LBB8_5-.LJTI8_0 .word .LBB8_3-.LJTI8_0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function array_sxhash -.LCPI9_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 .text - .globl array_sxhash + .globl array_sxhash # -- Begin function array_sxhash .p2align 5 .type array_sxhash,@function array_sxhash: # @array_sxhash @@ -1071,10 +1067,10 @@ array_sxhash: # @array_sxhash ld.d $a1, $s0, 8 blez $a1, .LBB9_7 # %bb.10: # %.lr.ph61.preheader - ld.d $a2, $s0, 16 - pcalau12i $a0, %pc_hi20(.LCPI9_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI9_0) move $a0, $zero + ld.d $a2, $s0, 16 + lu52i.d $a3, $zero, 1086 + movgr2fr.d $fa0, $a3 lu52i.d $a3, $zero, -2048 .p2align 4, , 16 .LBB9_11: # %.lr.ph61 @@ -3655,12 +3651,8 @@ nlength: # @nlength .word .LBB37_5-.LJTI37_0 .word .LBB37_3-.LJTI37_0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function c_sxhash -.LCPI38_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 .text - .globl c_sxhash + .globl c_sxhash # -- Begin function c_sxhash .p2align 5 .type c_sxhash,@function c_sxhash: # @c_sxhash @@ -3735,8 +3727,8 @@ c_sxhash: # @c_sxhash b .LBB38_21 .LBB38_11: fld.d $fa0, $a1, 8 - pcalau12i $a0, %pc_hi20(.LCPI38_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI38_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 fsub.d $fa1, $fa0, $fa1 ftintrz.l.d $fa1, $fa1 @@ -5515,12 +5507,7 @@ lfflush: # @lfflush .Lfunc_end55: .size lfflush, .Lfunc_end55-lfflush # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function string_length -.LCPI56_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .globl string_length + .globl string_length # -- Begin function string_length .p2align 5 .type string_length,@function string_length: # @string_length @@ -5545,12 +5532,13 @@ string_length: # @string_length pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI56_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI56_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa1, $a0 @@ -5901,14 +5889,7 @@ number2string: # @number2string .Lfunc_end59: .size number2string, .Lfunc_end59-number2string # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function string2number -.LCPI60_0: - .dword 0xc048000000000000 # double -48 -.LCPI60_1: - .dword 0xc050400000000000 # double -65 - .text - .globl string2number + .globl string2number # -- Begin function string2number .p2align 5 .type string2number,@function string2number: # @string2number @@ -6004,12 +5985,16 @@ string2number: # @string2number ld.d $fp, $a0, 0 movgr2fr.d $fa0, $s0 ffint.d.l $fs0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI60_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI60_1) - pcalau12i $a0, %pc_hi20(.LCPI60_0) - fld.d $fs2, $a0, %pc_lo12(.LCPI60_0) addi.d $s0, $s2, 1 movgr2fr.d $fa0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 16384 + lu52i.d $a1, $a1, -1019 + movgr2fr.d $fs1, $a1 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fs2, $a0 b .LBB60_17 .p2align 4, , 16 .LBB60_15: # in Loop: Header=BB60_17 Depth=1 @@ -9142,12 +9127,7 @@ listn: # @listn .Lfunc_end98: .size listn, .Lfunc_end98-listn # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fast_load -.LCPI99_0: - .dword 0x4059000000000000 # double 100 - .text - .globl fast_load + .globl fast_load # -- Begin function fast_load .p2align 5 .type fast_load,@function fast_load: # @fast_load @@ -9188,8 +9168,10 @@ fast_load: # @fast_load pcaddu18i $ra, %call36(fopen_c) jirl $ra, $ra, 0 move $fp, $a0 - pcalau12i $a0, %pc_hi20(.LCPI99_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI99_0) + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 pcaddu18i $ra, %call36(flocons) jirl $ra, $ra, 0 move $a1, $zero @@ -9290,12 +9272,7 @@ fast_load: # @fast_load .Lfunc_end99: .size fast_load, .Lfunc_end99-fast_load # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fast_save -.LCPI100_0: - .dword 0x4059000000000000 # double 100 - .text - .globl fast_save + .globl fast_save # -- Begin function fast_save .p2align 5 .type fast_save,@function fast_save: # @fast_save @@ -9361,8 +9338,10 @@ fast_save: # @fast_save move $s0, $a0 bnez $fp, .LBB100_4 .LBB100_6: - pcalau12i $a0, %pc_hi20(.LCPI100_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI100_0) + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 pcaddu18i $ra, %call36(flocons) jirl $ra, $ra, 0 move $a1, $zero @@ -11279,12 +11258,7 @@ leval_cond: # @leval_cond .Lfunc_end133: .size leval_cond, .Lfunc_end133-leval_cond # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function lstrspn -.LCPI134_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .globl lstrspn + .globl lstrspn # -- Begin function lstrspn .p2align 5 .type lstrspn,@function lstrspn: # @lstrspn @@ -11305,12 +11279,13 @@ lstrspn: # @lstrspn pcaddu18i $ra, %call36(strspn) jirl $ra, $ra, 0 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI134_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI134_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa1, $a0 @@ -11324,12 +11299,7 @@ lstrspn: # @lstrspn .Lfunc_end134: .size lstrspn, .Lfunc_end134-lstrspn # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function lstrcspn -.LCPI135_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .globl lstrcspn + .globl lstrcspn # -- Begin function lstrcspn .p2align 5 .type lstrcspn,@function lstrcspn: # @lstrcspn @@ -11350,12 +11320,13 @@ lstrcspn: # @lstrcspn pcaddu18i $ra, %call36(strcspn) jirl $ra, $ra, 0 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI135_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI135_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa1, $a0 @@ -11676,13 +11647,9 @@ set_eval_history: # @set_eval_history .Lfunc_end138: .size set_eval_history, .Lfunc_end138-set_eval_history # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function init_subrs_a -.LCPI139_0: - .dword 0x400921fb54442d18 # double 3.1415926535897931 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI139_1: + .p2align 4, 0x0 # -- Begin function init_subrs_a +.LCPI139_0: .byte 0 # 0x0 .byte 1 # 0x1 .byte 2 # 0x2 @@ -11699,7 +11666,7 @@ set_eval_history: # @set_eval_history .byte 13 # 0xd .byte 14 # 0xe .byte 15 # 0xf -.LCPI139_2: +.LCPI139_1: .byte 26 # 0x1a .byte 27 # 0x1b .byte 28 # 0x1c @@ -12264,8 +12231,11 @@ init_subrs_a: # @init_subrs_a pcaddu18i $ra, %call36(cintern) jirl $ra, $ra, 0 move $s1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI139_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI139_0) + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa0, $a0 pcaddu18i $ra, %call36(flocons) jirl $ra, $ra, 0 move $a1, $a0 @@ -12284,16 +12254,16 @@ init_subrs_a: # @init_subrs_a move $a0, $s1 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI139_1) - vld $vr0, $a0, %pc_lo12(.LCPI139_1) + pcalau12i $a0, %pc_hi20(.LCPI139_0) + vld $vr0, $a0, %pc_lo12(.LCPI139_0) vst $vr0, $s1, 65 lu12i.w $a0, 78113 ori $a0, $a0, 272 lu32i.d $a0, 398612 lu52i.d $a0, $a0, 369 st.d $a0, $s1, 81 - pcalau12i $a0, %pc_hi20(.LCPI139_2) - vld $vr0, $a0, %pc_lo12(.LCPI139_2) + pcalau12i $a0, %pc_hi20(.LCPI139_1) + vld $vr0, $a0, %pc_lo12(.LCPI139_1) lu12i.w $a0, 1 ori $a0, $a0, 2328 st.h $a0, $s1, 89 @@ -12514,20 +12484,17 @@ init_subrs_a: # @init_subrs_a .Lfunc_end139: .size init_subrs_a, .Lfunc_end139-init_subrs_a # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function parser_fasl -.LCPI140_0: - .dword 0x4059000000000000 # double 100 - .text - .p2align 5 + .p2align 5 # -- Begin function parser_fasl .type parser_fasl,@function parser_fasl: # @parser_fasl # %bb.0: addi.d $sp, $sp, -16 st.d $ra, $sp, 8 # 8-byte Folded Spill st.d $fp, $sp, 0 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI140_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI140_0) + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 pcaddu18i $ra, %call36(flocons) jirl $ra, $ra, 0 move $a1, $zero diff --git a/results/MultiSource/Applications/siod/CMakeFiles/siod.dir/slibu.s b/results/MultiSource/Applications/siod/CMakeFiles/siod.dir/slibu.s index 0e4b9ab0..f3332aa9 100644 --- a/results/MultiSource/Applications/siod/CMakeFiles/siod.dir/slibu.s +++ b/results/MultiSource/Applications/siod/CMakeFiles/siod.dir/slibu.s @@ -2860,12 +2860,7 @@ decode_st_mode: # @decode_st_mode .Lfunc_end52: .size decode_st_mode, .Lfunc_end52-decode_st_mode # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function decode_stat -.LCPI53_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .globl decode_stat + .globl decode_stat # -- Begin function decode_stat .p2align 5 .type decode_stat,@function decode_stat: # @decode_stat @@ -2886,11 +2881,12 @@ decode_stat: # @decode_stat move $s3, $a0 ld.d $a0, $a0, 0 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI53_0) - fld.d $fs0, $a2, %pc_lo12(.LCPI53_0) lu52i.d $fp, $zero, 1107 or $a1, $a1, $fp movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 + movgr2fr.d $fs0, $a1 fsub.d $fa0, $fa0, $fs0 lu12i.w $s0, 275200 bstrins.d $a0, $s0, 63, 32 @@ -3892,12 +3888,7 @@ http_date: # @http_date .Lfunc_end72: .size http_date, .Lfunc_end72-http_date # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function lsleep -.LCPI73_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl lsleep + .globl lsleep # -- Begin function lsleep .p2align 5 .type lsleep,@function lsleep: # @lsleep @@ -3906,8 +3897,10 @@ lsleep: # @lsleep st.d $ra, $sp, 8 # 8-byte Folded Spill pcaddu18i $ra, %call36(get_c_double) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI73_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI73_0) + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 ftintrz.l.d $fa0, $fa0 movfr2gr.d $a0, $fa0 @@ -4538,12 +4531,7 @@ html_decode: # @html_decode .Lfunc_end77: .size html_decode, .Lfunc_end77-html_decode # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function lgets -.LCPI78_0: - .dword 0x40a0000000000000 # double 2048 - .text - .globl lgets + .globl lgets # -- Begin function lgets .p2align 5 .type lgets,@function lgets: # @lgets @@ -4572,8 +4560,8 @@ lgets: # @lgets ori $a0, $zero, 2049 bltu $s0, $a0, .LBB78_7 # %bb.3: - pcalau12i $a0, %pc_hi20(.LCPI78_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI78_0) + lu52i.d $a0, $zero, 1034 + movgr2fr.d $fa0, $a0 pcaddu18i $ra, %call36(flocons) jirl $ra, $ra, 0 move $a2, $a0 @@ -5901,12 +5889,7 @@ err_large_index: # @err_large_index .Lfunc_end101: .size err_large_index, .Lfunc_end101-err_large_index # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function datref -.LCPI102_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .globl datref + .globl datref # -- Begin function datref .p2align 5 .type datref,@function datref: # @datref @@ -5991,12 +5974,13 @@ datref: # @datref .LBB102_12: ldx.d $a0, $fp, $s1 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI102_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI102_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa1, $a0 @@ -6195,12 +6179,7 @@ mkdatref: # @mkdatref .Lfunc_end104: .size mkdatref, .Lfunc_end104-mkdatref # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function datlength -.LCPI105_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .globl datlength + .globl datlength # -- Begin function datlength .p2align 5 .type datlength,@function datlength: # @datlength @@ -6249,12 +6228,13 @@ datlength: # @datlength .LBB105_7: ld.d $a0, $sp, 8 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI105_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI105_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa1, $a0 diff --git a/results/MultiSource/Applications/sqlite3/CMakeFiles/sqlite3.dir/shell.s b/results/MultiSource/Applications/sqlite3/CMakeFiles/sqlite3.dir/shell.s index 4a13566b..6035ae67 100644 --- a/results/MultiSource/Applications/sqlite3/CMakeFiles/sqlite3.dir/shell.s +++ b/results/MultiSource/Applications/sqlite3/CMakeFiles/sqlite3.dir/shell.s @@ -4026,12 +4026,8 @@ callback: # @callback .word .LBB5_141-.LJTI5_0 .word .LBB5_2-.LJTI5_0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function process_input -.LCPI6_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 .text - .p2align 5 + .p2align 5 # -- Begin function process_input .type process_input,@function process_input: # @process_input # %bb.0: @@ -4047,6 +4043,7 @@ process_input: # @process_input st.d $s6, $sp, 312 # 8-byte Folded Spill st.d $s7, $sp, 304 # 8-byte Folded Spill st.d $s8, $sp, 296 # 8-byte Folded Spill + fst.d $fs0, $sp, 288 # 8-byte Folded Spill move $s2, $a1 move $s0, $a0 pcalau12i $a0, %pc_hi20(bail_on_error) @@ -4079,6 +4076,11 @@ process_input: # @process_input lu12i.w $a0, 244 ori $a0, $a0, 576 st.d $a0, $sp, 56 # 8-byte Folded Spill + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs0, $a0 .LBB6_1: # %.outer # =>This Loop Header: Depth=1 # Child Loop BB6_2 Depth 2 @@ -4542,22 +4544,20 @@ process_input: # @process_input mul.d $a0, $a0, $a2 vpickve2gr.d $a1, $vr0, 1 add.d $a0, $a1, $a0 - pcalau12i $a1, %pc_hi20(.LCPI6_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI6_0) - vld $vr1, $s0, 16 - vld $vr2, $sp, 160 - movgr2fr.w $fa3, $a0 - ffint.d.w $fa3, $fa3 - fmul.d $fa3, $fa3, $fa0 - vsub.d $vr1, $vr2, $vr1 - vpickve2gr.d $a0, $vr1, 0 + vld $vr0, $s0, 16 + vld $vr1, $sp, 160 + movgr2fr.w $fa2, $a0 + ffint.d.w $fa2, $fa2 + fmul.d $fa2, $fa2, $fs0 + vsub.d $vr0, $vr1, $vr0 + vpickve2gr.d $a0, $vr0, 0 mul.d $a0, $a0, $a2 - vpickve2gr.d $a1, $vr1, 1 + vpickve2gr.d $a1, $vr0, 1 add.d $a0, $a1, $a0 - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 - movfr2gr.d $a1, $fa3 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fs0 + movfr2gr.d $a1, $fa2 movfr2gr.d $a2, $fa0 pcalau12i $a0, %pc_hi20(.L.str.167) addi.d $a0, $a0, %pc_lo12(.L.str.167) @@ -4766,6 +4766,7 @@ process_input: # @process_input pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 move $a0, $s3 + fld.d $fs0, $sp, 288 # 8-byte Folded Reload ld.d $s8, $sp, 296 # 8-byte Folded Reload ld.d $s7, $sp, 304 # 8-byte Folded Reload ld.d $s6, $sp, 312 # 8-byte Folded Reload diff --git a/results/MultiSource/Applications/sqlite3/CMakeFiles/sqlite3.dir/sqlite3.s b/results/MultiSource/Applications/sqlite3/CMakeFiles/sqlite3.dir/sqlite3.s index 72317cf8..46df1a4f 100644 --- a/results/MultiSource/Applications/sqlite3/CMakeFiles/sqlite3.dir/sqlite3.s +++ b/results/MultiSource/Applications/sqlite3/CMakeFiles/sqlite3.dir/sqlite3.s @@ -571,104 +571,100 @@ sqlite3_vmprintf: # @sqlite3_vmprintf .Lfunc_end12: .size sqlite3_vmprintf, .Lfunc_end12-sqlite3_vmprintf # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function vxprintf -.LCPI13_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 - .text - .p2align 5 + .p2align 5 # -- Begin function vxprintf .type vxprintf,@function vxprintf: # @vxprintf # %bb.0: - addi.d $sp, $sp, -768 - st.d $ra, $sp, 760 # 8-byte Folded Spill - st.d $fp, $sp, 752 # 8-byte Folded Spill - st.d $s0, $sp, 744 # 8-byte Folded Spill - st.d $s1, $sp, 736 # 8-byte Folded Spill - st.d $s2, $sp, 728 # 8-byte Folded Spill - st.d $s3, $sp, 720 # 8-byte Folded Spill - st.d $s4, $sp, 712 # 8-byte Folded Spill - st.d $s5, $sp, 704 # 8-byte Folded Spill - st.d $s6, $sp, 696 # 8-byte Folded Spill - st.d $s7, $sp, 688 # 8-byte Folded Spill - st.d $s8, $sp, 680 # 8-byte Folded Spill - fst.d $fs0, $sp, 672 # 8-byte Folded Spill - fst.d $fs1, $sp, 664 # 8-byte Folded Spill - st.d $a3, $sp, 304 # 8-byte Folded Spill - st.d $a1, $sp, 216 # 8-byte Folded Spill - move $s4, $a0 + addi.d $sp, $sp, -784 + st.d $ra, $sp, 776 # 8-byte Folded Spill + st.d $fp, $sp, 768 # 8-byte Folded Spill + st.d $s0, $sp, 760 # 8-byte Folded Spill + st.d $s1, $sp, 752 # 8-byte Folded Spill + st.d $s2, $sp, 744 # 8-byte Folded Spill + st.d $s3, $sp, 736 # 8-byte Folded Spill + st.d $s4, $sp, 728 # 8-byte Folded Spill + st.d $s5, $sp, 720 # 8-byte Folded Spill + st.d $s6, $sp, 712 # 8-byte Folded Spill + st.d $s7, $sp, 704 # 8-byte Folded Spill + st.d $s8, $sp, 696 # 8-byte Folded Spill + fst.d $fs0, $sp, 688 # 8-byte Folded Spill + fst.d $fs1, $sp, 680 # 8-byte Folded Spill + st.d $a3, $sp, 320 # 8-byte Folded Spill + st.d $a1, $sp, 232 # 8-byte Folded Spill + move $s3, $a0 move $t4, $zero move $s2, $zero - addi.d $a1, $sp, 314 + addi.d $a1, $sp, 330 sub.d $a0, $zero, $a1 - st.d $a0, $sp, 168 # 8-byte Folded Spill + st.d $a0, $sp, 176 # 8-byte Folded Spill addi.w $a3, $zero, -350 addi.d $a0, $zero, -349 sub.d $a0, $a0, $a1 - st.d $a0, $sp, 240 # 8-byte Folded Spill + st.d $a0, $sp, 256 # 8-byte Folded Spill lu12i.w $a0, 2047 ori $a0, $a0, 3973 - st.d $a0, $sp, 248 # 8-byte Folded Spill + st.d $a0, $sp, 264 # 8-byte Folded Spill lu12i.w $a0, 992 ori $a0, $a0, 4090 - st.d $a0, $sp, 232 # 8-byte Folded Spill + st.d $a0, $sp, 248 # 8-byte Folded Spill lu12i.w $a0, -209716 ori $a0, $a0, 3277 lu32i.d $a0, -209716 lu52i.d $a0, $a0, -820 - st.d $a0, $sp, 200 # 8-byte Folded Spill + st.d $a0, $sp, 208 # 8-byte Folded Spill lu12i.w $a0, 209715 ori $a0, $a0, 819 - st.d $a0, $sp, 192 # 8-byte Folded Spill + st.d $a0, $sp, 200 # 8-byte Folded Spill lu12i.w $a0, -419431 - ori $a0, $a0, 2458 - lu32i.d $a0, -419431 - lu52i.d $a0, $a0, 409 - st.d $a0, $sp, 184 # 8-byte Folded Spill + ori $a4, $a0, 2458 + lu32i.d $a4, -419431 lu12i.w $a0, 1919 ori $a0, $a0, 767 - st.d $a0, $sp, 208 # 8-byte Folded Spill + st.d $a0, $sp, 224 # 8-byte Folded Spill movgr2fr.d $fs1, $zero ori $a0, $zero, 0 lu32i.d $a0, -131072 lu52i.d $a0, $a0, 1023 - st.d $a0, $sp, 176 # 8-byte Folded Spill + st.d $a0, $sp, 184 # 8-byte Folded Spill lu12i.w $a0, 374021 ori $a0, $a0, 1761 lu32i.d $a0, -443509 lu52i.d $a0, $a0, 1030 - st.d $a0, $sp, 272 # 8-byte Folded Spill + st.d $a0, $sp, 288 # 8-byte Folded Spill lu12i.w $a0, 262144 lu32i.d $a0, -426632 lu52i.d $a0, $a0, 1025 - st.d $a0, $sp, 288 # 8-byte Folded Spill + st.d $a0, $sp, 304 # 8-byte Folded Spill lu12i.w $a0, -73168 ori $a0, $a0, 2243 lu32i.d $a0, 284568 lu52i.d $a0, $a0, 1022 - st.d $a0, $sp, 280 # 8-byte Folded Spill + st.d $a0, $sp, 296 # 8-byte Folded Spill lu12i.w $a0, 251304 ori $a0, $a0, 2675 lu32i.d $a0, 302946 lu52i.d $a0, $a0, 1017 - st.d $a0, $sp, 256 # 8-byte Folded Spill + st.d $a0, $sp, 272 # 8-byte Folded Spill lu12i.w $a0, 335544 ori $a0, $a0, 1311 - st.d $a0, $sp, 160 # 8-byte Folded Spill - st.d $a3, $sp, 264 # 8-byte Folded Spill + st.d $a0, $sp, 168 # 8-byte Folded Spill + st.d $a3, $sp, 280 # 8-byte Folded Spill sub.d $a0, $a3, $a1 - st.d $a0, $sp, 224 # 8-byte Folded Spill - ori $s7, $zero, 37 - ori $s3, $zero, 16 - st.d $s4, $sp, 296 # 8-byte Folded Spill + st.d $a0, $sp, 240 # 8-byte Folded Spill + ori $s5, $zero, 37 + ori $s4, $zero, 16 + st.d $a4, $sp, 216 # 8-byte Folded Spill + lu52i.d $a0, $a4, 409 + st.d $a0, $sp, 192 # 8-byte Folded Spill + st.d $s3, $sp, 312 # 8-byte Folded Spill ld.bu $a0, $a2, 0 - beq $a0, $s7, .LBB13_6 + beq $a0, $s5, .LBB13_6 .LBB13_1: - beqz $a0, .LBB13_262 + beqz $a0, .LBB13_261 # %bb.2: # %.preheader657.preheader addi.d $fp, $a2, 1 ori $a3, $zero, 1 - ld.d $a0, $sp, 296 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload ori $a1, $zero, 37 ld.bu $s0, $fp, 0 beqz $s0, .LBB13_5 @@ -690,13 +686,13 @@ vxprintf: # @vxprintf jirl $ra, $ra, 0 move $t4, $s1 bnez $s0, .LBB13_7 - b .LBB13_262 + b .LBB13_261 .p2align 4, , 16 .LBB13_6: move $fp, $a2 .LBB13_7: ld.bu $a1, $fp, 1 - beqz $a1, .LBB13_261 + beqz $a1, .LBB13_260 # %bb.8: # %.preheader656.preheader move $t2, $zero move $t5, $zero @@ -715,12 +711,12 @@ vxprintf: # @vxprintf ld.bu $a1, $s1, 0 addi.d $s1, $s1, 1 move $a0, $a2 - beqz $a1, .LBB13_262 + beqz $a1, .LBB13_261 .LBB13_12: # %.preheader656 # =>This Inner Loop Header: Depth=1 andi $a2, $a1, 255 addi.d $a3, $a2, -32 - bltu $s3, $a3, .LBB13_19 + bltu $s4, $a3, .LBB13_19 # %bb.13: # %.preheader656 # in Loop: Header=BB13_12 Depth=1 ori $a2, $zero, 1 @@ -743,7 +739,7 @@ vxprintf: # @vxprintf ori $t3, $zero, 1 b .LBB13_10 .LBB13_18: - ld.d $a1, $sp, 304 # 8-byte Folded Reload + ld.d $a1, $sp, 320 # 8-byte Folded Reload ld.w $a2, $a1, 0 addi.d $a3, $a1, 8 slti $a1, $a2, 0 @@ -754,14 +750,14 @@ vxprintf: # @vxprintf ld.b $a1, $s1, 0 srai.d $a4, $a2, 31 xor $a2, $a2, $a4 - sub.w $s4, $a2, $a4 - st.d $a3, $sp, 304 # 8-byte Folded Spill + sub.w $fp, $a2, $a4 + st.d $a3, $sp, 320 # 8-byte Folded Spill b .LBB13_25 .LBB13_19: # %.preheader654 ext.w.b $a1, $a1 addi.d $a2, $a1, -48 addi.d $s1, $s1, -1 - move $s4, $zero + move $fp, $zero ori $a3, $zero, 9 bltu $a3, $a2, .LBB13_25 # %bb.20: # %.lr.ph.preheader @@ -770,10 +766,10 @@ vxprintf: # @vxprintf .p2align 4, , 16 .LBB13_21: # %.lr.ph # =>This Inner Loop Header: Depth=1 - mul.d $a2, $s4, $a4 + mul.d $a2, $fp, $a4 add.d $a2, $a1, $a2 ld.b $a1, $s1, 1 - addi.w $s4, $a2, -48 + addi.w $fp, $a2, -48 addi.d $a2, $a1, -48 addi.d $s1, $s1, 1 bltu $a2, $a4, .LBB13_21 @@ -785,7 +781,7 @@ vxprintf: # @vxprintf ori $a2, $zero, 42 bne $a1, $a2, .LBB13_31 # %bb.24: - ld.d $a1, $sp, 304 # 8-byte Folded Reload + ld.d $a1, $sp, 320 # 8-byte Folded Reload ld.w $a2, $a1, 0 addi.d $a3, $a1, 8 srai.d $a4, $a2, 31 @@ -793,7 +789,7 @@ vxprintf: # @vxprintf xor $a2, $a2, $a4 addi.d $s1, $s1, 2 sub.w $a2, $a2, $a4 - st.d $a3, $sp, 304 # 8-byte Folded Spill + st.d $a3, $sp, 320 # 8-byte Folded Spill b .LBB13_27 .p2align 4, , 16 .LBB13_25: # %.loopexit655 @@ -816,13 +812,13 @@ vxprintf: # @vxprintf addi.d $a4, $a4, -37 ori $a5, $zero, 85 bgeu $a5, $a4, .LBB13_35 - b .LBB13_262 + b .LBB13_261 .LBB13_30: move $a4, $a1 addi.d $a4, $a4, -37 ori $a5, $zero, 85 bgeu $a5, $a4, .LBB13_35 - b .LBB13_262 + b .LBB13_261 .LBB13_31: # %.preheader652 addi.d $s1, $s1, 1 addi.d $a2, $a1, -48 @@ -850,13 +846,13 @@ vxprintf: # @vxprintf ext.w.b $a4, $a4 addi.d $a4, $a4, -37 ori $a5, $zero, 85 - bltu $a5, $a4, .LBB13_262 + bltu $a5, $a4, .LBB13_261 .LBB13_35: pcalau12i $a5, %pc_hi20(fmtinfo) addi.d $t6, $a5, %pc_lo12(fmtinfo) addi.d $a7, $t6, 120 ori $t0, $zero, 20 - ori $fp, $zero, 39 + ori $s5, $zero, 39 slli.d $a4, $a4, 2 pcalau12i $a5, %pc_hi20(.LJTI13_1) addi.d $a5, $a5, %pc_lo12(.LJTI13_1) @@ -864,7 +860,7 @@ vxprintf: # @vxprintf add.d $t1, $a5, $a4 move $a4, $zero move $t7, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -874,7 +870,7 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+108) addi.d $t6, $a4, %pc_lo12(fmtinfo+108) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -888,7 +884,7 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+6) addi.d $t6, $a4, %pc_lo12(fmtinfo+6) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -899,7 +895,7 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+60) addi.d $t6, $a4, %pc_lo12(fmtinfo+60) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -910,19 +906,19 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+36) addi.d $t6, $a4, %pc_lo12(fmtinfo+36) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero move $a5, $zero - ori $fp, $zero, 34 + ori $s5, $zero, 34 ori $t7, $zero, 6 b .LBB13_60 .LBB13_41: # %.thread986.fold.split1711 pcalau12i $a4, %pc_hi20(fmtinfo+90) addi.d $t6, $a4, %pc_lo12(fmtinfo+90) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s7, $zero move $a6, $zero move $a5, $zero @@ -933,7 +929,7 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+84) addi.d $t6, $a4, %pc_lo12(fmtinfo+84) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -944,7 +940,7 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+30) addi.d $t6, $a4, %pc_lo12(fmtinfo+30) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -955,7 +951,7 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+42) addi.d $t6, $a4, %pc_lo12(fmtinfo+42) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -966,7 +962,7 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+12) addi.d $t6, $a4, %pc_lo12(fmtinfo+12) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $a6, $zero move $a5, $zero @@ -977,7 +973,7 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+102) addi.d $t6, $a4, %pc_lo12(fmtinfo+102) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -996,10 +992,10 @@ vxprintf: # @vxprintf ori $t0, $zero, 22 ori $a4, $zero, 1 .LBB13_49: - ld.d $a5, $sp, 216 # 8-byte Folded Reload - beqz $a5, .LBB13_262 + ld.d $a5, $sp, 232 # 8-byte Folded Reload + beqz $a5, .LBB13_261 # %bb.50: - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -1011,7 +1007,7 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+78) addi.d $t6, $a4, %pc_lo12(fmtinfo+78) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -1022,7 +1018,7 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+96) addi.d $t6, $a4, %pc_lo12(fmtinfo+96) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -1033,7 +1029,7 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+66) addi.d $t6, $a4, %pc_lo12(fmtinfo+66) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -1044,7 +1040,7 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+114) addi.d $t6, $a4, %pc_lo12(fmtinfo+114) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -1055,7 +1051,7 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+18) addi.d $t6, $a4, %pc_lo12(fmtinfo+18) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a5, $zero @@ -1066,7 +1062,7 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+54) addi.d $t6, $a4, %pc_lo12(fmtinfo+54) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -1081,14 +1077,14 @@ vxprintf: # @vxprintf move $s7, $zero move $a6, $zero move $a5, $zero - ori $ra, $zero, 1 + ori $t8, $zero, 1 ori $t7, $zero, 12 b .LBB13_60 .LBB13_58: # %.thread986.fold.split1704 pcalau12i $a4, %pc_hi20(fmtinfo+48) addi.d $t6, $a4, %pc_lo12(fmtinfo+48) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -1099,7 +1095,7 @@ vxprintf: # @vxprintf pcalau12i $a4, %pc_hi20(fmtinfo+24) addi.d $t6, $a4, %pc_lo12(fmtinfo+24) move $a4, $zero - move $ra, $zero + move $t8, $zero move $s8, $zero move $s7, $zero move $a6, $zero @@ -1107,28 +1103,30 @@ vxprintf: # @vxprintf ori $t7, $zero, 4 .p2align 4, , 16 .LBB13_60: # %.thread986 - slti $a7, $s4, 340 - ld.bu $t1, $t6, 3 + slti $a7, $fp, 340 + ld.bu $ra, $t6, 3 ori $t0, $zero, 340 - masknez $t8, $t0, $a7 - maskeqz $s5, $s4, $a7 - or $a7, $s5, $t8 - st.d $a7, $sp, 144 # 8-byte Folded Spill - addi.d $a7, $t1, -1 + masknez $t0, $t0, $a7 + maskeqz $t1, $fp, $a7 + or $a7, $t1, $t0 + st.d $a7, $sp, 152 # 8-byte Folded Spill + addi.d $a7, $ra, -1 andi $a0, $a0, 255 - st.d $a0, $sp, 136 # 8-byte Folded Spill - bltu $s3, $a7, .LBB13_108 + st.d $a0, $sp, 144 # 8-byte Folded Spill + bltu $s4, $a7, .LBB13_107 # %bb.61: # %.thread986 - st.d $s2, $sp, 152 # 8-byte Folded Spill - move $s2, $a4 - move $a4, $a3 - st.d $t1, $sp, 128 # 8-byte Folded Spill + st.d $t1, $sp, 96 # 8-byte Folded Spill + st.d $t0, $sp, 104 # 8-byte Folded Spill + st.d $t6, $sp, 136 # 8-byte Folded Spill + st.d $t5, $sp, 112 # 8-byte Folded Spill + st.d $t3, $sp, 128 # 8-byte Folded Spill + st.d $t2, $sp, 120 # 8-byte Folded Spill ori $t1, $zero, 310 slt $a0, $t1, $a2 - ori $t0, $zero, 1 - sll.d $a3, $t0, $t7 - ld.d $t0, $sp, 248 # 8-byte Folded Reload - and $t0, $a3, $t0 + ori $s3, $zero, 1 + sll.d $t2, $s3, $t7 + ld.d $t0, $sp, 264 # 8-byte Folded Reload + and $t0, $t2, $t0 sltui $t0, $t0, 1 masknez $t1, $t1, $t0 maskeqz $t0, $a2, $t0 @@ -1143,38 +1141,32 @@ vxprintf: # @vxprintf add.d $a0, $a2, $a0 jr $a0 .LBB13_62: - st.d $ra, $sp, 112 # 8-byte Folded Spill - st.d $s5, $sp, 24 # 8-byte Folded Spill - st.d $t8, $sp, 32 # 8-byte Folded Spill - st.d $t7, $sp, 96 # 8-byte Folded Spill - st.d $t6, $sp, 56 # 8-byte Folded Spill - st.d $t5, $sp, 80 # 8-byte Folded Spill - st.d $t4, $sp, 152 # 8-byte Folded Spill - st.d $t3, $sp, 120 # 8-byte Folded Spill - st.d $t2, $sp, 72 # 8-byte Folded Spill - ld.d $a0, $sp, 304 # 8-byte Folded Reload + st.d $ra, $sp, 32 # 8-byte Folded Spill + st.d $t8, $sp, 88 # 8-byte Folded Spill + st.d $t7, $sp, 72 # 8-byte Folded Spill + st.d $t4, $sp, 160 # 8-byte Folded Spill + ld.d $a0, $sp, 320 # 8-byte Folded Reload fld.d $fs0, $a0, 0 fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(__extenddftf2) jirl $ra, $ra, 0 fcmp.cule.d $fcc0, $fs1, $fs0 - st.d $a0, $sp, 88 # 8-byte Folded Spill - move $fp, $a1 + st.d $a0, $sp, 64 # 8-byte Folded Spill + move $s5, $a1 bcnez $fcc0, .LBB13_72 # %bb.63: lu52i.d $a0, $zero, -2048 - xor $fp, $fp, $a0 + xor $s5, $s5, $a0 ori $a0, $zero, 45 st.d $a0, $sp, 48 # 8-byte Folded Spill ori $a0, $zero, 1 - st.d $a0, $sp, 16 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.18) addi.d $s2, $a0, %pc_lo12(.L.str.18) - st.d $zero, $sp, 64 # 8-byte Folded Spill - ori $s5, $zero, 28 + st.d $zero, $sp, 56 # 8-byte Folded Spill b .LBB13_119 .LBB13_64: - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $a1, $a0, 0 sltu $a0, $zero, $a1 pcalau12i $a2, %pc_hi20(.L.str.23) @@ -1186,18 +1178,18 @@ vxprintf: # @vxprintf or $a2, $a3, $a2 masknez $a2, $a2, $a0 maskeqz $a1, $a1, $a0 - or $s4, $a1, $a2 - ld.bu $a3, $s4, 0 + or $fp, $a1, $a2 + ld.bu $a3, $fp, 0 beqz $a3, .LBB13_74 # %bb.65: # %.lr.ph697.preheader + ori $a7, $zero, 1 move $a1, $zero move $a2, $zero - addi.d $a4, $s4, 1 - ori $a7, $zero, 1 + addi.d $a4, $fp, 1 .p2align 4, , 16 .LBB13_66: # %.lr.ph697 # =>This Inner Loop Header: Depth=1 - xor $a6, $a3, $fp + xor $a6, $a3, $s5 ld.bu $a3, $a4, 0 sltui $a6, $a6, 1 add.d $a2, $a2, $a6 @@ -1206,24 +1198,22 @@ vxprintf: # @vxprintf bnez $a3, .LBB13_66 b .LBB13_75 .LBB13_67: - st.d $t3, $sp, 120 # 8-byte Folded Spill - ld.d $a0, $sp, 232 # 8-byte Folded Reload - and $a0, $a3, $a0 - move $t0, $a3 - beqz $a0, .LBB13_112 + ld.d $a0, $sp, 248 # 8-byte Folded Reload + and $a0, $t2, $a0 + beqz $a0, .LBB13_111 # %bb.68: ori $a0, $zero, 108 - ori $s5, $zero, 28 - beqz $a4, .LBB13_92 + ld.d $a2, $sp, 120 # 8-byte Folded Reload + beqz $a3, .LBB13_92 # %bb.69: - ld.d $s8, $sp, 144 # 8-byte Folded Reload - bne $a1, $a0, .LBB13_184 + ld.d $s6, $sp, 152 # 8-byte Folded Reload + bne $a1, $a0, .LBB13_183 # %bb.70: - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $fp, $a0, 0 - b .LBB13_185 + b .LBB13_184 .LBB13_71: - ld.d $a1, $sp, 304 # 8-byte Folded Reload + ld.d $a1, $sp, 320 # 8-byte Folded Reload ld.d $a0, $a1, 0 addi.d $fp, $a1, 8 sltui $a1, $a0, 1 @@ -1235,41 +1225,40 @@ vxprintf: # @vxprintf maskeqz $a1, $a2, $a1 or $s2, $a1, $a0 move $a0, $s2 - move $s4, $t4 + move $s3, $t4 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 addi.w $a1, $a0, 0 - slt $a2, $s4, $a1 + slt $a2, $s3, $a1 masknez $a1, $a1, $a2 - maskeqz $a2, $s4, $a2 + maskeqz $a2, $s3, $a2 or $a1, $a2, $a1 - slti $a2, $s4, 0 + slti $a2, $s3, 0 masknez $a1, $a1, $a2 maskeqz $a0, $a0, $a2 or $t4, $a0, $a1 - st.d $fp, $sp, 304 # 8-byte Folded Spill - b .LBB13_109 + st.d $fp, $sp, 320 # 8-byte Folded Spill + b .LBB13_108 .LBB13_72: - ori $s5, $zero, 28 beqz $s0, .LBB13_118 # %bb.73: ori $a0, $zero, 43 st.d $a0, $sp, 48 # 8-byte Folded Spill ori $a0, $zero, 1 - st.d $a0, $sp, 16 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) - st.d $zero, $sp, 64 # 8-byte Folded Spill + st.d $zero, $sp, 56 # 8-byte Folded Spill b .LBB13_119 .LBB13_74: + ori $a7, $zero, 1 move $a1, $zero move $a2, $zero - ori $a7, $zero, 1 .LBB13_75: # %._crit_edge - and $s5, $a5, $a0 - masknez $a0, $a7, $s5 + and $s3, $a5, $a0 + masknez $a0, $a7, $s3 ori $a3, $zero, 3 - maskeqz $a3, $a3, $s5 + maskeqz $a3, $a3, $s3 or $a0, $a3, $a0 add.d $a0, $a0, $a1 add.w $a0, $a0, $a2 @@ -1280,10 +1269,10 @@ vxprintf: # @vxprintf jirl $ra, $ra, 0 move $s0, $a0 move $s2, $a0 - ld.d $s6, $sp, 144 # 8-byte Folded Reload - beqz $a0, .LBB13_262 + ld.d $s6, $sp, 152 # 8-byte Folded Reload + beqz $a0, .LBB13_261 # %bb.77: - beqz $s5, .LBB13_80 + beqz $s3, .LBB13_80 .LBB13_78: ori $a0, $zero, 39 st.b $a0, $s2, 0 @@ -1291,18 +1280,17 @@ vxprintf: # @vxprintf b .LBB13_81 .LBB13_79: move $s0, $zero - addi.d $s2, $sp, 314 - ld.d $s6, $sp, 144 # 8-byte Folded Reload - bnez $s5, .LBB13_78 + addi.d $s2, $sp, 330 + ld.d $s6, $sp, 152 # 8-byte Folded Reload + bnez $s3, .LBB13_78 .LBB13_80: move $a1, $zero .LBB13_81: - ld.bu $a2, $s4, 0 + ld.bu $a2, $fp, 0 beqz $a2, .LBB13_86 # %bb.82: # %.lr.ph704.preheader - addi.d $a0, $s4, 1 - ld.d $s4, $sp, 296 # 8-byte Folded Reload - ori $s7, $zero, 37 + addi.d $a0, $fp, 1 + ori $s7, $zero, 28 b .LBB13_84 .p2align 4, , 16 .LBB13_83: # in Loop: Header=BB13_84 Depth=1 @@ -1314,203 +1302,205 @@ vxprintf: # @vxprintf # =>This Inner Loop Header: Depth=1 addi.w $t4, $a1, 1 stx.b $a2, $s2, $a1 - bne $a2, $fp, .LBB13_83 + bne $a2, $s5, .LBB13_83 # %bb.85: # in Loop: Header=BB13_84 Depth=1 addi.w $a1, $a1, 2 - stx.b $fp, $s2, $t4 + stx.b $s5, $s2, $t4 move $t4, $a1 b .LBB13_83 .LBB13_86: move $t4, $a1 - ld.d $s4, $sp, 296 # 8-byte Folded Reload - ori $s7, $zero, 37 + ori $s7, $zero, 28 .LBB13_87: # %._crit_edge705 - beqz $s5, .LBB13_89 + beqz $s3, .LBB13_89 # %bb.88: addi.w $a0, $t4, 1 ori $a1, $zero, 39 stx.b $a1, $s2, $t4 move $t4, $a0 .LBB13_89: # %.thread626 - ld.d $a1, $sp, 136 # 8-byte Folded Reload - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $s3, $sp, 312 # 8-byte Folded Reload + ori $s5, $zero, 37 + ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload addi.d $a0, $a0, 8 - st.d $a0, $sp, 304 # 8-byte Folded Spill + st.d $a0, $sp, 320 # 8-byte Folded Spill stx.b $zero, $s2, $t4 - ori $s5, $zero, 28 - b .LBB13_236 + b .LBB13_235 .LBB13_90: ld.bu $a1, $s1, 1 addi.d $s1, $s1, 1 b .LBB13_100 .LBB13_91: # %.thread605 - st.d $t3, $sp, 120 # 8-byte Folded Spill - ld.d $a0, $sp, 232 # 8-byte Folded Reload - and $a0, $a3, $a0 - ori $s5, $zero, 28 - move $t0, $a3 - beqz $a0, .LBB13_115 + ld.d $a0, $sp, 248 # 8-byte Folded Reload + and $a0, $t2, $a0 + ld.d $a2, $sp, 120 # 8-byte Folded Reload + beqz $a0, .LBB13_114 .LBB13_92: # %.thread617 - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $fp, $a0, 0 - move $t1, $zero - ld.d $s8, $sp, 144 # 8-byte Folded Reload - b .LBB13_217 + move $s2, $zero + ld.d $s6, $sp, 152 # 8-byte Folded Reload + b .LBB13_216 .LBB13_93: - ld.d $a1, $sp, 304 # 8-byte Folded Reload + ld.d $a1, $sp, 320 # 8-byte Folded Reload ld.d $a0, $a1, 0 addi.d $fp, $a1, 8 - beqz $a0, .LBB13_154 + beqz $a0, .LBB13_152 # %bb.94: ld.d $a1, $a0, 0 - ld.d $s4, $sp, 296 # 8-byte Folded Reload - ori $s5, $zero, 28 - ld.d $s2, $sp, 152 # 8-byte Folded Reload + ld.d $s3, $sp, 312 # 8-byte Folded Reload + ori $s7, $zero, 28 beqz $a1, .LBB13_96 # %bb.95: ld.wu $a0, $a0, 8 srli.d $a2, $a0, 1 - move $a0, $s4 + move $a0, $s3 pcaddu18i $ra, %call36(sqlite3StrAccumAppend) jirl $ra, $ra, 0 .LBB13_96: move $s0, $zero move $s6, $zero move $t4, $zero - st.d $fp, $sp, 304 # 8-byte Folded Spill - b .LBB13_147 + st.d $fp, $sp, 320 # 8-byte Folded Spill + ori $s5, $zero, 37 + b .LBB13_234 .LBB13_97: move $s0, $zero - ori $s7, $zero, 37 - st.b $s7, $sp, 314 - addi.d $s2, $sp, 314 + ori $s5, $zero, 37 + st.b $s5, $sp, 330 + addi.d $s2, $sp, 330 ori $t4, $zero, 1 - ld.d $s4, $sp, 296 # 8-byte Folded Reload - b .LBB13_110 + ld.d $s3, $sp, 312 # 8-byte Folded Reload + b .LBB13_109 .LBB13_98: - ld.d $s4, $sp, 296 # 8-byte Folded Reload - ld.w $a0, $s4, 16 - ld.d $a2, $sp, 304 # 8-byte Folded Reload + ld.d $s3, $sp, 312 # 8-byte Folded Reload + ld.w $a0, $s3, 16 + ld.d $a2, $sp, 320 # 8-byte Folded Reload ld.d $a1, $a2, 0 move $s0, $zero move $s6, $zero move $t4, $zero addi.d $a2, $a2, 8 st.w $a0, $a1, 0 - st.d $a2, $sp, 304 # 8-byte Folded Spill - b .LBB13_155 + st.d $a2, $sp, 320 # 8-byte Folded Spill + b .LBB13_154 .LBB13_99: - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.b $a1, $a0, 0 addi.d $a0, $a0, 8 - st.d $a0, $sp, 304 # 8-byte Folded Spill + st.d $a0, $sp, 320 # 8-byte Folded Spill .LBB13_100: - st.b $a1, $sp, 314 - addi.d $s2, $sp, 314 - ori $s5, $zero, 28 - ld.d $s6, $sp, 144 # 8-byte Folded Reload - bltz $t4, .LBB13_111 + st.b $a1, $sp, 330 + addi.d $s2, $sp, 330 + ori $s7, $zero, 28 + ld.d $s6, $sp, 152 # 8-byte Folded Reload + bltz $t4, .LBB13_110 # %bb.101: # %.preheader650 - ld.d $s4, $sp, 296 # 8-byte Folded Reload - ori $s7, $zero, 37 + ld.d $s3, $sp, 312 # 8-byte Folded Reload + ori $s5, $zero, 37 ori $a0, $zero, 2 - bltu $t4, $a0, .LBB13_103 + bltu $t4, $a0, .LBB13_117 # %bb.102: # %.lr.ph693.preheader addi.w $a2, $t4, -1 - addi.d $a0, $sp, 315 + addi.d $a0, $sp, 331 move $fp, $t4 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 move $t4, $fp -.LBB13_103: move $s0, $zero - b .LBB13_235 -.LBB13_104: - ld.d $a1, $sp, 304 # 8-byte Folded Reload + b .LBB13_234 +.LBB13_103: + ld.d $a1, $sp, 320 # 8-byte Folded Reload ld.w $a0, $a1, 8 ld.d $a1, $a1, 0 slli.d $a2, $a0, 6 alsl.d $a0, $a0, $a2, 3 add.d $fp, $a1, $a0 ld.d $a1, $fp, 8 - ld.d $s4, $sp, 296 # 8-byte Folded Reload - beqz $a1, .LBB13_107 -# %bb.105: + ld.d $s3, $sp, 312 # 8-byte Folded Reload + beqz $a1, .LBB13_106 +# %bb.104: ld.bu $a0, $a1, 0 - beqz $a0, .LBB13_107 -# %bb.106: + beqz $a0, .LBB13_106 +# %bb.105: addi.w $a2, $zero, -1 - move $a0, $s4 + move $a0, $s3 pcaddu18i $ra, %call36(sqlite3StrAccumAppend) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.24) addi.d $a1, $a0, %pc_lo12(.L.str.24) ori $a2, $zero, 1 - move $a0, $s4 + move $a0, $s3 pcaddu18i $ra, %call36(sqlite3StrAccumAppend) jirl $ra, $ra, 0 -.LBB13_107: +.LBB13_106: addi.d $a0, $fp, 8 ld.d $a1, $a0, 8 - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload addi.d $a0, $a0, 16 - st.d $a0, $sp, 304 # 8-byte Folded Spill + st.d $a0, $sp, 320 # 8-byte Folded Spill addi.w $a2, $zero, -1 - move $a0, $s4 + move $a0, $s3 pcaddu18i $ra, %call36(sqlite3StrAccumAppend) jirl $ra, $ra, 0 move $s0, $zero move $s6, $zero move $t4, $zero - b .LBB13_155 -.LBB13_108: + b .LBB13_154 +.LBB13_107: move $s0, $zero +.LBB13_108: # %.loopexit639 + ld.d $s3, $sp, 312 # 8-byte Folded Reload + ori $s5, $zero, 37 .LBB13_109: # %.loopexit639 - ld.d $s4, $sp, 296 # 8-byte Folded Reload - ori $s7, $zero, 37 -.LBB13_110: # %.loopexit639 - ori $s5, $zero, 28 - ld.d $s6, $sp, 144 # 8-byte Folded Reload - b .LBB13_235 -.LBB13_111: + ori $s7, $zero, 28 + ld.d $s6, $sp, 152 # 8-byte Folded Reload + b .LBB13_234 +.LBB13_110: move $s0, $zero ori $t4, $zero, 1 - b .LBB13_146 -.LBB13_112: + ld.d $s3, $sp, 312 # 8-byte Folded Reload + ori $s5, $zero, 37 + b .LBB13_234 +.LBB13_111: ori $a0, $zero, 108 - ori $s5, $zero, 28 - beqz $a4, .LBB13_115 + ld.d $a2, $sp, 120 # 8-byte Folded Reload + beqz $a3, .LBB13_114 +# %bb.112: + ld.d $a3, $sp, 152 # 8-byte Folded Reload + bne $a1, $a0, .LBB13_213 # %bb.113: - ld.d $s8, $sp, 144 # 8-byte Folded Reload - bne $a1, $a0, .LBB13_215 -# %bb.114: - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $fp, $a0, 0 - bgez $fp, .LBB13_116 - b .LBB13_216 -.LBB13_115: # %.thread613 - ld.d $a0, $sp, 304 # 8-byte Folded Reload + bgez $fp, .LBB13_115 + b .LBB13_214 +.LBB13_114: # %.thread613 + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $fp, $a0, 0 - ld.d $s8, $sp, 144 # 8-byte Folded Reload - bltz $fp, .LBB13_216 -.LBB13_116: - ori $t1, $zero, 43 - bnez $s0, .LBB13_217 -# %bb.117: + ld.d $a3, $sp, 152 # 8-byte Folded Reload + bltz $fp, .LBB13_214 +.LBB13_115: + ori $s2, $zero, 43 + bnez $s0, .LBB13_215 +# %bb.116: sltu $a0, $zero, $s6 - slli.d $t1, $a0, 5 - b .LBB13_217 + slli.d $s2, $a0, 5 + b .LBB13_215 +.LBB13_117: + move $s0, $zero + b .LBB13_234 .LBB13_118: sltui $a0, $s6, 1 - st.d $a0, $sp, 64 # 8-byte Folded Spill + st.d $a0, $sp, 56 # 8-byte Folded Spill sltu $a0, $zero, $s6 - st.d $a0, $sp, 16 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill slli.d $a0, $a0, 5 st.d $a0, $sp, 48 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.20) addi.d $s2, $a0, %pc_lo12(.L.str.20) .LBB13_119: - ld.d $a3, $sp, 152 # 8-byte Folded Reload + ld.d $a3, $sp, 160 # 8-byte Folded Reload slti $a0, $a3, 0 slti $a1, $a3, 165 ori $a2, $zero, 165 @@ -1525,48 +1515,48 @@ vxprintf: # @vxprintf slt $a2, $zero, $a0 and $a1, $a1, $a2 sub.w $a2, $a0, $a1 - st.d $a2, $sp, 104 # 8-byte Folded Spill + st.d $a2, $sp, 80 # 8-byte Folded Spill blez $a2, .LBB13_123 # %bb.120: # %.lr.ph710.preheader sub.d $a1, $zero, $a1 add.d $a0, $a0, $a1 addi.d $a0, $a0, 1 vldi $vr0, -928 - ori $a2, $zero, 1 - ld.d $s6, $sp, 144 # 8-byte Folded Reload + ld.d $a2, $sp, 216 # 8-byte Folded Reload + ld.d $s6, $sp, 152 # 8-byte Folded Reload .p2align 4, , 16 .LBB13_121: # %.lr.ph710 # =>This Inner Loop Header: Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI13_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI13_0) + lu52i.d $a1, $a2, 1019 + movgr2fr.d $fa1, $a1 addi.w $a0, $a0, -1 fmul.d $fa0, $fa0, $fa1 - bltu $a2, $a0, .LBB13_121 + bltu $s3, $a0, .LBB13_121 # %bb.122: # %._crit_edge711.loopexit # kill: def $f0_64 killed $f0_64 killed $vr0 pcaddu18i $ra, %call36(__extenddftf2) jirl $ra, $ra, 0 - move $s8, $a0 - move $a3, $a1 + move $s3, $a0 + move $s8, $a1 b .LBB13_124 .LBB13_123: - move $s8, $zero - ld.d $a3, $sp, 176 # 8-byte Folded Reload - ld.d $s6, $sp, 144 # 8-byte Folded Reload + move $s3, $zero + ld.d $s8, $sp, 184 # 8-byte Folded Reload + ld.d $s6, $sp, 152 # 8-byte Folded Reload .LBB13_124: # %._crit_edge711 - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload addi.d $a0, $a0, 8 - st.d $a0, $sp, 304 # 8-byte Folded Spill - ld.d $s0, $sp, 88 # 8-byte Folded Reload + st.d $a0, $sp, 320 # 8-byte Folded Spill + ld.d $s0, $sp, 64 # 8-byte Folded Reload move $a0, $s0 - move $a1, $fp - move $a2, $s8 - st.d $a3, $sp, 40 # 8-byte Folded Spill + move $a1, $s5 + move $a2, $s3 + move $a3, $s8 pcaddu18i $ra, %call36(__addtf3) jirl $ra, $ra, 0 - ld.d $a3, $sp, 112 # 8-byte Folded Reload + ld.d $a3, $sp, 88 # 8-byte Folded Reload maskeqz $a1, $a1, $a3 - masknez $a2, $fp, $a3 + masknez $a2, $s5, $a3 or $s7, $a1, $a2 maskeqz $a0, $a0, $a3 masknez $a1, $s0, $a3 @@ -1577,7 +1567,7 @@ vxprintf: # @vxprintf move $a3, $s7 pcaddu18i $ra, %call36(__unordtf2) jirl $ra, $ra, 0 - bnez $a0, .LBB13_260 + bnez $a0, .LBB13_259 # %bb.125: move $a0, $s0 move $a1, $s7 @@ -1587,78 +1577,79 @@ vxprintf: # @vxprintf jirl $ra, $ra, 0 ori $a1, $zero, 0 lu32i.d $a1, 147456 - st.d $a1, $sp, 152 # 8-byte Folded Spill + st.d $a1, $sp, 160 # 8-byte Folded Spill lu12i.w $a1, -419431 ori $s6, $a1, 2457 - st.d $s6, $sp, 88 # 8-byte Folded Spill - blez $a0, .LBB13_148 + st.d $s6, $sp, 64 # 8-byte Folded Spill + blez $a0, .LBB13_146 # %bb.126: # %.preheader649 + st.d $s3, $sp, 24 # 8-byte Folded Spill move $a0, $s0 move $a1, $s7 lu52i.d $a2, $zero, 1792 - ld.d $a3, $sp, 272 # 8-byte Folded Reload + ld.d $a3, $sp, 288 # 8-byte Folded Reload pcaddu18i $ra, %call36(__getf2) jirl $ra, $ra, 0 - move $fp, $zero + move $s5, $zero bltz $a0, .LBB13_129 .p2align 4, , 16 .LBB13_127: # %.lr.ph715 # =>This Inner Loop Header: Depth=1 - move $s5, $fp + move $s3, $s5 move $a0, $s0 move $a1, $s7 lu52i.d $a2, $zero, 768 - ld.d $a3, $sp, 256 # 8-byte Folded Reload + ld.d $a3, $sp, 272 # 8-byte Folded Reload pcaddu18i $ra, %call36(__multf3) jirl $ra, $ra, 0 move $s0, $a0 move $s7, $a1 - addi.w $fp, $fp, 32 + addi.w $s5, $s5, 32 lu52i.d $a2, $zero, 1792 - ld.d $a3, $sp, 272 # 8-byte Folded Reload + ld.d $a3, $sp, 288 # 8-byte Folded Reload pcaddu18i $ra, %call36(__getf2) jirl $ra, $ra, 0 bltz $a0, .LBB13_129 # %bb.128: # %.lr.ph715 # in Loop: Header=BB13_127 Depth=1 ori $a0, $zero, 319 - bltu $s5, $a0, .LBB13_127 + bltu $s3, $a0, .LBB13_127 .LBB13_129: # %.preheader648 move $a0, $s0 move $a1, $s7 move $a2, $zero - ld.d $a3, $sp, 288 # 8-byte Folded Reload + ld.d $a3, $sp, 304 # 8-byte Folded Reload pcaddu18i $ra, %call36(__getf2) jirl $ra, $ra, 0 bltz $a0, .LBB13_133 # %bb.130: # %.preheader648 ori $a0, $zero, 350 - bltu $a0, $fp, .LBB13_133 + bltu $a0, $s5, .LBB13_133 .p2align 4, , 16 .LBB13_131: # %.lr.ph720 # =>This Inner Loop Header: Depth=1 - move $s5, $fp + move $s3, $s5 move $a0, $s0 move $a1, $s7 lu52i.d $a2, $zero, -1536 - ld.d $a3, $sp, 280 # 8-byte Folded Reload + ld.d $a3, $sp, 296 # 8-byte Folded Reload pcaddu18i $ra, %call36(__multf3) jirl $ra, $ra, 0 move $s0, $a0 move $s7, $a1 - addi.w $fp, $fp, 8 + addi.w $s5, $s5, 8 move $a2, $zero - ld.d $a3, $sp, 288 # 8-byte Folded Reload + ld.d $a3, $sp, 304 # 8-byte Folded Reload pcaddu18i $ra, %call36(__getf2) jirl $ra, $ra, 0 bltz $a0, .LBB13_133 # %bb.132: # %.lr.ph720 # in Loop: Header=BB13_131 Depth=1 ori $a0, $zero, 343 - bltu $s5, $a0, .LBB13_131 + bltu $s3, $a0, .LBB13_131 .LBB13_133: # %.preheader647 - st.d $s8, $sp, 8 # 8-byte Folded Spill - ld.d $a0, $sp, 152 # 8-byte Folded Reload + st.d $s8, $sp, 16 # 8-byte Folded Spill + ld.d $a0, $sp, 160 # 8-byte Folded Reload lu52i.d $s8, $a0, 1024 move $a0, $s0 move $a1, $s7 @@ -1669,11 +1660,11 @@ vxprintf: # @vxprintf bltz $a0, .LBB13_137 # %bb.134: # %.preheader647 ori $a0, $zero, 350 - bltu $a0, $fp, .LBB13_137 + bltu $a0, $s5, .LBB13_137 .p2align 4, , 16 .LBB13_135: # %.lr.ph725 # =>This Inner Loop Header: Depth=1 - move $s5, $fp + move $s3, $s5 move $a0, $s6 lu32i.d $a0, -288359 lu52i.d $a3, $a0, 1023 @@ -1684,7 +1675,7 @@ vxprintf: # @vxprintf jirl $ra, $ra, 0 move $s0, $a0 move $s7, $a1 - addi.w $fp, $fp, 1 + addi.w $s5, $s5, 1 move $a2, $zero move $a3, $s8 pcaddu18i $ra, %call36(__getf2) @@ -1693,37 +1684,37 @@ vxprintf: # @vxprintf # %bb.136: # %.lr.ph725 # in Loop: Header=BB13_135 Depth=1 ori $a0, $zero, 350 - bltu $s5, $a0, .LBB13_135 + bltu $s3, $a0, .LBB13_135 .LBB13_137: # %.preheader646 move $a0, $s0 move $a1, $s7 lu52i.d $a2, $zero, -1536 - ld.d $a3, $sp, 280 # 8-byte Folded Reload + ld.d $a3, $sp, 296 # 8-byte Folded Reload pcaddu18i $ra, %call36(__lttf2) jirl $ra, $ra, 0 bgez $a0, .LBB13_140 .p2align 4, , 16 .LBB13_138: # %.lr.ph730 # =>This Inner Loop Header: Depth=1 - move $s5, $fp + move $s3, $s5 move $a0, $s0 move $a1, $s7 move $a2, $zero - ld.d $a3, $sp, 288 # 8-byte Folded Reload + ld.d $a3, $sp, 304 # 8-byte Folded Reload pcaddu18i $ra, %call36(__multf3) jirl $ra, $ra, 0 move $s0, $a0 move $s7, $a1 - addi.w $fp, $fp, -8 + addi.w $s5, $s5, -8 lu52i.d $a2, $zero, -1536 - ld.d $a3, $sp, 280 # 8-byte Folded Reload + ld.d $a3, $sp, 296 # 8-byte Folded Reload pcaddu18i $ra, %call36(__lttf2) jirl $ra, $ra, 0 bgez $a0, .LBB13_140 # %bb.139: # %.lr.ph730 # in Loop: Header=BB13_138 Depth=1 addi.w $a0, $zero, -343 - blt $a0, $s5, .LBB13_138 + blt $a0, $s3, .LBB13_138 .LBB13_140: # %.preheader645 ori $a0, $zero, 0 lu32i.d $a0, -65536 @@ -1736,12 +1727,12 @@ vxprintf: # @vxprintf jirl $ra, $ra, 0 bgez $a0, .LBB13_144 # %bb.141: # %.preheader645 - ld.d $a0, $sp, 264 # 8-byte Folded Reload - blt $fp, $a0, .LBB13_144 + ld.d $a0, $sp, 280 # 8-byte Folded Reload + blt $s5, $a0, .LBB13_144 .p2align 4, , 16 .LBB13_142: # %.lr.ph735 # =>This Inner Loop Header: Depth=1 - move $s5, $fp + move $s3, $s5 move $a0, $s0 move $a1, $s7 move $a2, $zero @@ -1750,7 +1741,7 @@ vxprintf: # @vxprintf jirl $ra, $ra, 0 move $s0, $a0 move $s7, $a1 - addi.w $fp, $fp, -1 + addi.w $s5, $s5, -1 move $a2, $zero move $a3, $s6 pcaddu18i $ra, %call36(__lttf2) @@ -1758,53 +1749,49 @@ vxprintf: # @vxprintf bgez $a0, .LBB13_144 # %bb.143: # %.lr.ph735 # in Loop: Header=BB13_142 Depth=1 - ld.d $a0, $sp, 264 # 8-byte Folded Reload - blt $a0, $s5, .LBB13_142 + ld.d $a0, $sp, 280 # 8-byte Folded Reload + blt $a0, $s3, .LBB13_142 .LBB13_144: # %._crit_edge736 - addi.w $a0, $fp, -351 + addi.w $a0, $s5, -351 addi.w $a1, $zero, -702 - ori $s5, $zero, 28 - ld.d $s6, $sp, 144 # 8-byte Folded Reload - ld.d $s8, $sp, 8 # 8-byte Folded Reload - bltu $a1, $a0, .LBB13_149 + ld.d $s6, $sp, 152 # 8-byte Folded Reload + ld.d $s3, $sp, 24 # 8-byte Folded Reload + ld.d $s8, $sp, 16 # 8-byte Folded Reload + bltu $a1, $a0, .LBB13_147 # %bb.145: move $a0, $s2 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 move $t4, $a0 move $s0, $zero -.LBB13_146: # %.loopexit639 - ld.d $s4, $sp, 296 # 8-byte Folded Reload -.LBB13_147: # %.loopexit639 - ori $s7, $zero, 37 - b .LBB13_235 -.LBB13_148: - move $fp, $zero -.LBB13_149: - ld.d $a0, $sp, 112 # 8-byte Folded Reload - beqz $a0, .LBB13_151 -# %bb.150: - st.d $zero, $sp, 112 # 8-byte Folded Spill + b .LBB13_153 +.LBB13_146: + move $s5, $zero +.LBB13_147: + ld.d $a0, $sp, 88 # 8-byte Folded Reload + beqz $a0, .LBB13_149 +# %bb.148: + st.d $zero, $sp, 88 # 8-byte Folded Spill ori $a1, $zero, 2 - b .LBB13_157 -.LBB13_151: + b .LBB13_156 +.LBB13_149: move $a0, $s0 move $a1, $s7 - move $a2, $s8 - ld.d $a3, $sp, 40 # 8-byte Folded Reload + move $a2, $s3 + move $a3, $s8 pcaddu18i $ra, %call36(__addtf3) jirl $ra, $ra, 0 move $s0, $a0 move $s2, $a1 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload lu52i.d $a3, $a0, 1024 move $a0, $s0 move $a2, $zero pcaddu18i $ra, %call36(__getf2) jirl $ra, $ra, 0 addi.w $a1, $zero, -1 - slt $s5, $a1, $a0 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + slt $s3, $a1, $a0 + ld.d $a0, $sp, 64 # 8-byte Folded Reload lu32i.d $a0, -288359 lu52i.d $a3, $a0, 1023 move $a0, $s0 @@ -1812,84 +1799,83 @@ vxprintf: # @vxprintf lu52i.d $a2, $zero, -1536 pcaddu18i $ra, %call36(__multf3) jirl $ra, $ra, 0 - masknez $a2, $s2, $s5 - maskeqz $a1, $a1, $s5 + masknez $a2, $s2, $s3 + maskeqz $a1, $a1, $s3 or $s7, $a1, $a2 - masknez $a1, $s0, $s5 - maskeqz $a0, $a0, $s5 + masknez $a1, $s0, $s3 + maskeqz $a0, $a0, $s3 or $s0, $a0, $a1 - add.w $fp, $fp, $s5 + add.w $s5, $s5, $s3 ori $a0, $zero, 15 - ld.d $a1, $sp, 96 # 8-byte Folded Reload - beq $a1, $a0, .LBB13_153 -# %bb.152: + ld.d $a1, $sp, 72 # 8-byte Folded Reload + beq $a1, $a0, .LBB13_151 +# %bb.150: ori $a0, $zero, 2 - bne $a1, $a0, .LBB13_156 -.LBB13_153: - slti $a0, $fp, -4 - ld.d $a2, $sp, 104 # 8-byte Folded Reload - slt $a1, $a2, $fp + bne $a1, $a0, .LBB13_155 +.LBB13_151: + slti $a0, $s5, -4 + ld.d $a2, $sp, 80 # 8-byte Folded Reload + slt $a1, $a2, $s5 or $a0, $a0, $a1 addi.d $a1, $a0, 2 - masknez $a0, $fp, $a0 + masknez $a0, $s5, $a0 sub.w $a2, $a2, $a0 - st.d $a2, $sp, 104 # 8-byte Folded Spill - ld.d $a0, $sp, 120 # 8-byte Folded Reload + st.d $a2, $sp, 80 # 8-byte Folded Spill + ld.d $a0, $sp, 128 # 8-byte Folded Reload andi $a0, $a0, 255 addi.d $a0, $a0, -1 sltu $a0, $zero, $a0 - st.d $a0, $sp, 112 # 8-byte Folded Spill - b .LBB13_157 -.LBB13_154: + st.d $a0, $sp, 88 # 8-byte Folded Spill + b .LBB13_156 +.LBB13_152: move $s0, $zero move $s6, $zero move $t4, $zero - st.d $fp, $sp, 304 # 8-byte Folded Spill - ld.d $s4, $sp, 296 # 8-byte Folded Reload -.LBB13_155: # %.loopexit639 - ori $s7, $zero, 37 - ori $s5, $zero, 28 - ld.d $a1, $sp, 136 # 8-byte Folded Reload - ld.d $s2, $sp, 152 # 8-byte Folded Reload - b .LBB13_236 -.LBB13_156: - st.d $zero, $sp, 112 # 8-byte Folded Spill - ld.d $a1, $sp, 128 # 8-byte Folded Reload -.LBB13_157: # %.thread621 - addi.d $s5, $sp, 314 - ld.d $a0, $sp, 64 # 8-byte Folded Reload - bnez $a0, .LBB13_159 -# %bb.158: + st.d $fp, $sp, 320 # 8-byte Folded Spill +.LBB13_153: # %.loopexit639 + ld.d $s3, $sp, 312 # 8-byte Folded Reload +.LBB13_154: # %.loopexit639 + ori $s5, $zero, 37 + ori $s7, $zero, 28 + b .LBB13_234 +.LBB13_155: + st.d $zero, $sp, 88 # 8-byte Folded Spill + ld.d $a1, $sp, 32 # 8-byte Folded Reload +.LBB13_156: # %.thread621 + addi.d $s3, $sp, 330 + ld.d $a0, $sp, 56 # 8-byte Folded Reload + bnez $a0, .LBB13_158 +# %bb.157: ld.d $a0, $sp, 48 # 8-byte Folded Reload - st.b $a0, $sp, 314 - addi.d $s5, $sp, 315 -.LBB13_159: + st.b $a0, $sp, 330 + addi.d $s3, $sp, 331 +.LBB13_158: addi.d $a0, $a1, -3 sltui $a0, $a0, 1 - st.d $a0, $sp, 88 # 8-byte Folded Spill - masknez $a0, $fp, $a0 + st.d $a0, $sp, 64 # 8-byte Folded Spill + masknez $a0, $s5, $a0 addi.w $s6, $a0, 1 move $s8, $zero - bgez $a0, .LBB13_162 -# %bb.160: - addi.d $s2, $s5, 1 + bgez $a0, .LBB13_161 +# %bb.159: + addi.d $s2, $s3, 1 ori $a0, $zero, 48 - st.b $a0, $s5, 0 - b .LBB13_165 + st.b $a0, $s3, 0 + b .LBB13_164 .p2align 4, , 16 -.LBB13_161: # %et_getdigit.exit - # in Loop: Header=BB13_162 Depth=1 +.LBB13_160: # %et_getdigit.exit + # in Loop: Header=BB13_161 Depth=1 addi.w $s8, $s8, 1 - addi.d $s2, $s5, 1 - st.b $a0, $s5, 0 - move $s5, $s2 - beq $s6, $s8, .LBB13_164 -.LBB13_162: # %.preheader643 + addi.d $s2, $s3, 1 + st.b $a0, $s3, 0 + move $s3, $s2 + beq $s6, $s8, .LBB13_163 +.LBB13_161: # %.preheader643 # =>This Inner Loop Header: Depth=1 ori $a0, $zero, 48 ori $a1, $zero, 15 - bltu $a1, $s8, .LBB13_161 -# %bb.163: # in Loop: Header=BB13_162 Depth=1 + bltu $a1, $s8, .LBB13_160 +# %bb.162: # in Loop: Header=BB13_161 Depth=1 move $a0, $s0 move $a1, $s7 pcaddu18i $ra, %call36(__fixtfsi) @@ -1903,7 +1889,7 @@ vxprintf: # @vxprintf move $a1, $s7 pcaddu18i $ra, %call36(__subtf3) jirl $ra, $ra, 0 - ld.d $a2, $sp, 152 # 8-byte Folded Reload + ld.d $a2, $sp, 160 # 8-byte Folded Reload lu52i.d $a3, $a2, 1024 move $a2, $zero pcaddu18i $ra, %call36(__multf3) @@ -1911,69 +1897,69 @@ vxprintf: # @vxprintf move $s0, $a0 move $s7, $a1 addi.d $a0, $s2, 48 - b .LBB13_161 -.LBB13_164: + b .LBB13_160 +.LBB13_163: move $s8, $s6 move $s6, $zero -.LBB13_165: # %.loopexit644 - ld.d $a2, $sp, 104 # 8-byte Folded Reload +.LBB13_164: # %.loopexit644 + ld.d $a2, $sp, 80 # 8-byte Folded Reload slt $a0, $zero, $a2 - ld.d $a1, $sp, 120 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload or $a0, $a1, $a0 - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 112 # 8-byte Folded Reload or $a0, $a0, $a1 andi $a3, $a0, 255 - beqz $a3, .LBB13_167 -# %bb.166: + beqz $a3, .LBB13_166 +# %bb.165: addi.d $a0, $s2, 1 ori $a1, $zero, 46 st.b $a1, $s2, 0 move $s2, $a0 -.LBB13_167: +.LBB13_166: st.d $a3, $sp, 128 # 8-byte Folded Spill - bgez $s6, .LBB13_170 -# %bb.168: - blez $a2, .LBB13_170 -# %bb.169: # %.lr.ph747.preheader - addi.w $s5, $a2, -1 + bgez $s6, .LBB13_169 +# %bb.167: + blez $a2, .LBB13_169 +# %bb.168: # %.lr.ph747.preheader + addi.w $s3, $a2, -1 nor $s6, $s6, $zero - sltu $a0, $s5, $s6 + sltu $a0, $s3, $s6 masknez $a1, $s6, $a0 - maskeqz $a0, $s5, $a0 + maskeqz $a0, $s3, $a0 or $a0, $a0, $a1 addi.d $a2, $a0, 1 - st.d $a2, $sp, 120 # 8-byte Folded Spill + st.d $a2, $sp, 80 # 8-byte Folded Spill ori $a1, $zero, 48 move $a0, $s2 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload add.d $s2, $a0, $s2 - sub.d $a0, $s5, $s6 - sltu $a1, $s5, $a0 + sub.d $a0, $s3, $s6 + sltu $a1, $s3, $a0 masknez $a2, $a0, $a1 -.LBB13_170: # %.preheader642 +.LBB13_169: # %.preheader642 addi.w $a0, $a2, 0 - blez $a0, .LBB13_175 -# %bb.171: # %.lr.ph754.preheader - addi.d $s5, $a2, 1 - b .LBB13_173 + blez $a0, .LBB13_174 +# %bb.170: # %.lr.ph754.preheader + addi.d $s3, $a2, 1 + b .LBB13_172 .p2align 4, , 16 -.LBB13_172: # %et_getdigit.exit548 - # in Loop: Header=BB13_173 Depth=1 +.LBB13_171: # %et_getdigit.exit548 + # in Loop: Header=BB13_172 Depth=1 addi.d $a0, $s2, 1 st.b $a1, $s2, 0 - addi.w $s5, $s5, -1 + addi.w $s3, $s3, -1 addi.w $s8, $s8, 1 move $s2, $a0 ori $a1, $zero, 1 - bgeu $a1, $s5, .LBB13_176 -.LBB13_173: # %.lr.ph754 + bgeu $a1, $s3, .LBB13_175 +.LBB13_172: # %.lr.ph754 # =>This Inner Loop Header: Depth=1 ori $a1, $zero, 48 ori $a0, $zero, 15 - bltu $a0, $s8, .LBB13_172 -# %bb.174: # in Loop: Header=BB13_173 Depth=1 + bltu $a0, $s8, .LBB13_171 +# %bb.173: # in Loop: Header=BB13_172 Depth=1 move $a0, $s0 move $a1, $s7 pcaddu18i $ra, %call36(__fixtfsi) @@ -1987,7 +1973,7 @@ vxprintf: # @vxprintf move $a1, $s7 pcaddu18i $ra, %call36(__subtf3) jirl $ra, $ra, 0 - ld.d $a2, $sp, 152 # 8-byte Folded Reload + ld.d $a2, $sp, 160 # 8-byte Folded Reload lu52i.d $a3, $a2, 1024 move $a2, $zero pcaddu18i $ra, %call36(__multf3) @@ -1995,90 +1981,93 @@ vxprintf: # @vxprintf move $s0, $a0 move $s7, $a1 addi.d $a1, $s6, 48 - b .LBB13_172 -.LBB13_175: + b .LBB13_171 +.LBB13_174: move $a0, $s2 -.LBB13_176: # %._crit_edge755 +.LBB13_175: # %._crit_edge755 ld.d $a1, $sp, 128 # 8-byte Folded Reload sltu $a1, $zero, $a1 - ld.d $a2, $sp, 112 # 8-byte Folded Reload + ld.d $a2, $sp, 88 # 8-byte Folded Reload and $a1, $a2, $a1 - beqz $a1, .LBB13_182 -# %bb.177: # %.preheader640.preheader + beqz $a1, .LBB13_181 +# %bb.176: # %.preheader640.preheader addi.d $a0, $a0, -1 - ori $s7, $zero, 37 - ori $s5, $zero, 28 + ori $s7, $zero, 28 ori $a3, $zero, 48 - ori $a5, $zero, 100 - ld.d $s6, $sp, 144 # 8-byte Folded Reload + ld.d $s6, $sp, 152 # 8-byte Folded Reload + ld.d $a4, $sp, 112 # 8-byte Folded Reload + ld.d $a5, $sp, 136 # 8-byte Folded Reload + ld.d $a6, $sp, 72 # 8-byte Folded Reload ld.bu $a1, $a0, 0 - bne $a1, $a3, .LBB13_179 + bne $a1, $a3, .LBB13_178 .p2align 4, , 16 -.LBB13_178: # =>This Inner Loop Header: Depth=1 +.LBB13_177: # =>This Inner Loop Header: Depth=1 st.b $zero, $a0, 0 addi.d $a0, $a0, -1 ld.bu $a1, $a0, 0 - beq $a1, $a3, .LBB13_178 -.LBB13_179: # %.preheader640 + beq $a1, $a3, .LBB13_177 +.LBB13_178: # %.preheader640 ori $a2, $zero, 46 - bne $a1, $a2, .LBB13_183 + bne $a1, $a2, .LBB13_182 +# %bb.179: + andi $a1, $a4, 255 + beqz $a1, .LBB13_185 # %bb.180: - ld.d $a1, $sp, 80 # 8-byte Folded Reload - andi $a1, $a1, 255 - beqz $a1, .LBB13_186 -# %bb.181: addi.d $a1, $a0, 2 st.b $a3, $a0, 1 move $a0, $a1 - b .LBB13_187 -.LBB13_182: - ori $s7, $zero, 37 - ori $s5, $zero, 28 - ori $a5, $zero, 100 - ld.d $s6, $sp, 144 # 8-byte Folded Reload - b .LBB13_187 -.LBB13_183: # %.loopexit641.loopexit + b .LBB13_186 +.LBB13_181: + ori $s7, $zero, 28 + ld.d $s6, $sp, 152 # 8-byte Folded Reload + ld.d $a5, $sp, 136 # 8-byte Folded Reload + ld.d $a6, $sp, 72 # 8-byte Folded Reload + b .LBB13_186 +.LBB13_182: # %.loopexit641.loopexit addi.d $a0, $a0, 1 - b .LBB13_187 -.LBB13_184: - ld.d $a0, $sp, 304 # 8-byte Folded Reload + b .LBB13_186 +.LBB13_183: + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.wu $fp, $a0, 0 +.LBB13_184: + move $s2, $zero + b .LBB13_216 .LBB13_185: - move $t1, $zero - b .LBB13_217 -.LBB13_186: st.b $zero, $a0, 0 -.LBB13_187: # %.loopexit641 - ld.d $a1, $sp, 96 # 8-byte Folded Reload - addi.d $a1, $a1, -13 +.LBB13_186: # %.loopexit641 + addi.d $a1, $a6, -13 ori $a2, $zero, 2 bltu $a1, $a2, .LBB13_189 -# %bb.188: # %.loopexit641 - sltu $a1, $zero, $fp - ld.d $a2, $sp, 88 # 8-byte Folded Reload +# %bb.187: # %.loopexit641 + sltu $a1, $zero, $s5 + ld.d $a2, $sp, 64 # 8-byte Folded Reload and $a1, $a2, $a1 - beqz $a1, .LBB13_193 + bnez $a1, .LBB13_189 +# %bb.188: + ld.d $s3, $sp, 312 # 8-byte Folded Reload + ori $s5, $zero, 37 + b .LBB13_193 .LBB13_189: - ld.d $a1, $sp, 56 # 8-byte Folded Reload - ld.bu $a1, $a1, 4 + ld.bu $a1, $a5, 4 pcalau12i $a2, %pc_hi20(aDigits) addi.d $a2, $a2, %pc_lo12(aDigits) ldx.b $a1, $a2, $a1 st.b $a1, $a0, 0 - slti $a1, $fp, 0 + slti $a1, $s5, 0 ori $a2, $zero, 43 masknez $a2, $a2, $a1 ori $a3, $zero, 45 maskeqz $a1, $a3, $a1 or $a2, $a1, $a2 - srai.d $a1, $fp, 31 - xor $a3, $fp, $a1 + srai.d $a1, $s5, 31 + xor $a3, $s5, $a1 sub.w $a1, $a3, $a1 st.b $a2, $a0, 1 + ori $a5, $zero, 100 bltu $a1, $a5, .LBB13_191 # %bb.190: bstrpick.d $a2, $a1, 31, 0 - ld.d $a3, $sp, 160 # 8-byte Folded Reload + ld.d $a3, $sp, 168 # 8-byte Folded Reload mul.d $a2, $a2, $a3 srli.d $a3, $a2, 37 addi.d $a4, $a3, 48 @@ -2090,6 +2079,8 @@ vxprintf: # @vxprintf .LBB13_191: addi.d $a2, $a0, 2 .LBB13_192: + ld.d $s3, $sp, 312 # 8-byte Folded Reload + ori $s5, $zero, 37 andi $a0, $a1, 255 ori $a3, $zero, 205 mul.d $a0, $a0, $a3 @@ -2105,22 +2096,22 @@ vxprintf: # @vxprintf .LBB13_193: move $s0, $zero st.b $zero, $a0, 0 - addi.d $s2, $sp, 314 + addi.d $s2, $sp, 330 sub.w $t4, $a0, $s2 - ld.d $a1, $sp, 72 # 8-byte Folded Reload - beqz $a1, .LBB13_214 + ld.d $a1, $sp, 120 # 8-byte Folded Reload + beqz $a1, .LBB13_234 # %bb.194: - ld.d $a1, $sp, 136 # 8-byte Folded Reload - bnez $a1, .LBB13_203 + ld.d $a1, $sp, 144 # 8-byte Folded Reload + bnez $a1, .LBB13_235 # %bb.195: - bge $t4, $s6, .LBB13_203 + bge $t4, $s6, .LBB13_235 # %bb.196: sub.w $a1, $s6, $t4 - addi.d $t1, $sp, 314 + addi.d $t1, $sp, 330 bltz $t4, .LBB13_201 # %bb.197: # %iter.check - ld.d $a2, $sp, 32 # 8-byte Folded Reload - ld.d $a3, $sp, 24 # 8-byte Folded Reload + ld.d $a2, $sp, 104 # 8-byte Folded Reload + ld.d $a3, $sp, 96 # 8-byte Folded Reload or $a3, $a3, $a2 slt $a2, $a3, $a1 maskeqz $a4, $a3, $a2 @@ -2129,11 +2120,11 @@ vxprintf: # @vxprintf sub.d $a2, $a3, $a2 addi.d $a2, $a2, 1 ori $a4, $zero, 7 - bltu $a4, $a2, .LBB13_206 + bltu $a4, $a2, .LBB13_205 .LBB13_198: move $a4, $a3 .LBB13_199: # %.lr.ph760.preheader - ld.d $a2, $sp, 168 # 8-byte Folded Reload + ld.d $a2, $sp, 176 # 8-byte Folded Reload add.d $a2, $a2, $a4 add.d $a0, $a2, $a0 sub.w $a0, $a0, $s6 @@ -2147,31 +2138,27 @@ vxprintf: # @vxprintf addi.w $a0, $a0, -1 blt $a1, $a3, .LBB13_200 .LBB13_201: # %.preheader638 - bne $s6, $t4, .LBB13_204 + bne $s6, $t4, .LBB13_203 # %bb.202: move $s0, $zero - addi.d $s2, $sp, 314 - b .LBB13_205 -.LBB13_203: - ld.d $s4, $sp, 296 # 8-byte Folded Reload - b .LBB13_236 -.LBB13_204: # %.lr.ph764.preheader - addi.d $s2, $sp, 314 - ld.d $a0, $sp, 16 # 8-byte Folded Reload + addi.d $s2, $sp, 330 + b .LBB13_204 +.LBB13_203: # %.lr.ph764.preheader + addi.d $s2, $sp, 330 + ld.d $a0, $sp, 40 # 8-byte Folded Reload add.d $a0, $s2, $a0 bstrpick.d $a2, $a1, 31, 0 ori $a1, $zero, 48 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 move $s0, $zero -.LBB13_205: # %.loopexit639.thread +.LBB13_204: # %.loopexit639.thread move $t4, $s6 - ld.d $s4, $sp, 296 # 8-byte Folded Reload - b .LBB13_248 -.LBB13_206: # %vector.scevcheck - slt $a4, $s4, $a1 + b .LBB13_247 +.LBB13_205: # %vector.scevcheck + slt $a4, $fp, $a1 masknez $a5, $a1, $a4 - maskeqz $a4, $s4, $a4 + maskeqz $a4, $fp, $a4 or $a4, $a4, $a5 slti $a5, $a4, 340 maskeqz $a4, $a4, $a5 @@ -2183,18 +2170,18 @@ vxprintf: # @vxprintf addi.w $a6, $a4, 0 addi.w $a7, $a5, 0 bltu $a6, $a7, .LBB13_198 -# %bb.207: # %vector.scevcheck +# %bb.206: # %vector.scevcheck srli.d $a5, $a5, 32 bnez $a5, .LBB13_198 -# %bb.208: # %vector.memcheck +# %bb.207: # %vector.memcheck bstrpick.d $a4, $a4, 31, 0 sub.d $a4, $a4, $a3 - bltu $a4, $s3, .LBB13_198 -# %bb.209: # %vector.main.loop.iter.check - bgeu $a2, $s3, .LBB13_255 -# %bb.210: + bltu $a4, $s4, .LBB13_198 +# %bb.208: # %vector.main.loop.iter.check + bgeu $a2, $s4, .LBB13_254 +# %bb.209: move $a5, $zero -.LBB13_211: # %vec.epilog.ph +.LBB13_210: # %vec.epilog.ph move $a6, $a2 bstrins.d $a6, $zero, 2, 0 sub.d $a4, $a3, $a6 @@ -2202,9 +2189,9 @@ vxprintf: # @vxprintf sub.w $a7, $a0, $a7 sub.d $t0, $a5, $a6 sub.d $a3, $a3, $a5 - addi.d $a5, $sp, 307 + addi.d $a5, $sp, 323 add.d $a3, $a5, $a3 -.LBB13_212: # %vec.epilog.vector.body +.LBB13_211: # %vec.epilog.vector.body # =>This Inner Loop Header: Depth=1 add.d $a5, $t1, $a7 ld.d $a5, $a5, -7 @@ -2212,37 +2199,37 @@ vxprintf: # @vxprintf addi.w $a7, $a7, -8 addi.d $t0, $t0, 8 addi.d $a3, $a3, -8 - bnez $t0, .LBB13_212 -# %bb.213: # %vec.epilog.middle.block + bnez $t0, .LBB13_211 +# %bb.212: # %vec.epilog.middle.block bne $a2, $a6, .LBB13_199 b .LBB13_201 +.LBB13_213: + ld.d $a0, $sp, 320 # 8-byte Folded Reload + ld.w $fp, $a0, 0 + bgez $fp, .LBB13_115 .LBB13_214: - ld.d $s4, $sp, 296 # 8-byte Folded Reload - b .LBB13_235 + sub.d $fp, $zero, $fp + ori $s2, $zero, 45 .LBB13_215: - ld.d $a0, $sp, 304 # 8-byte Folded Reload - ld.w $fp, $a0, 0 - bgez $fp, .LBB13_116 + move $s6, $a3 .LBB13_216: - sub.d $fp, $zero, $fp - ori $t1, $zero, 45 -.LBB13_217: - beqz $t2, .LBB13_219 -# %bb.218: - sltu $a0, $zero, $t1 - sub.w $a0, $s8, $a0 + beqz $a2, .LBB13_218 +# %bb.217: + sltu $a0, $zero, $s2 + sub.w $a0, $s6, $a0 slt $a1, $a0, $t4 masknez $a0, $a0, $a1 maskeqz $a1, $t4, $a1 or $t4, $a1, $a0 -.LBB13_219: - ld.d $s4, $sp, 296 # 8-byte Folded Reload - ori $s7, $zero, 37 - addi.d $a2, $sp, 663 +.LBB13_218: + ld.d $s3, $sp, 312 # 8-byte Folded Reload + ori $s5, $zero, 37 + ori $s7, $zero, 28 + addi.d $a2, $sp, 679 move $a0, $a2 - beqz $s2, .LBB13_221 -# %bb.220: - ld.d $a3, $sp, 200 # 8-byte Folded Reload + beqz $a4, .LBB13_220 +# %bb.219: + ld.d $a3, $sp, 208 # 8-byte Folded Reload mulh.du $a0, $fp, $a3 srli.d $a1, $a0, 3 bstrins.d $a0, $zero, 2, 0 @@ -2251,11 +2238,11 @@ vxprintf: # @vxprintf ori $a2, $zero, 3 sltu $a2, $a2, $a0 mul.d $a1, $a1, $a3 - ld.d $a3, $sp, 192 # 8-byte Folded Reload + ld.d $a3, $sp, 200 # 8-byte Folded Reload bstrins.d $a3, $a3, 61, 32 add.d $a1, $a1, $a3 rotri.d $a1, $a1, 1 - ld.d $a3, $sp, 184 # 8-byte Folded Reload + ld.d $a3, $sp, 192 # 8-byte Folded Reload sltu $a1, $a1, $a3 masknez $a0, $a0, $a1 masknez $a0, $a0, $a2 @@ -2263,29 +2250,29 @@ vxprintf: # @vxprintf pcalau12i $a1, %pc_hi20(vxprintf.zOrd) addi.d $a1, $a1, %pc_lo12(vxprintf.zOrd) ldx.h $a0, $a1, $a0 - st.h $a0, $sp, 661 - addi.d $a2, $sp, 661 + st.h $a0, $sp, 677 + addi.d $a2, $sp, 677 move $a0, $a2 -.LBB13_221: - ld.bu $a1, $t6, 4 - ld.d $a3, $sp, 304 # 8-byte Folded Reload +.LBB13_220: + ld.d $a4, $sp, 136 # 8-byte Folded Reload + ld.bu $a1, $a4, 4 + ld.d $a3, $sp, 320 # 8-byte Folded Reload addi.d $a3, $a3, 8 - st.d $a3, $sp, 304 # 8-byte Folded Spill + st.d $a3, $sp, 320 # 8-byte Folded Spill pcalau12i $a3, %pc_hi20(aDigits) addi.d $a3, $a3, %pc_lo12(aDigits) add.d $a3, $a3, $a1 - move $s6, $t6 - ld.bu $a4, $t6, 1 - ld.d $a1, $sp, 224 # 8-byte Folded Reload + ld.bu $a4, $a4, 1 + ld.d $a1, $sp, 240 # 8-byte Folded Reload add.d $a1, $a1, $t4 add.d $a1, $a1, $a0 addi.d $a0, $a2, -1 - ld.d $a5, $sp, 240 # 8-byte Folded Reload + ld.d $a5, $sp, 256 # 8-byte Folded Reload add.d $a5, $a5, $t4 add.d $a2, $a5, $a2 move $a5, $fp .p2align 4, , 16 -.LBB13_222: # =>This Inner Loop Header: Depth=1 +.LBB13_221: # =>This Inner Loop Header: Depth=1 move $a6, $a5 div.du $a5, $a5, $a4 mul.d $a7, $a5, $a4 @@ -2295,115 +2282,112 @@ vxprintf: # @vxprintf addi.w $a1, $a1, -1 addi.d $a0, $a0, -1 addi.w $a2, $a2, -1 - bgeu $a6, $a4, .LBB13_222 -# %bb.223: - blez $a2, .LBB13_231 -# %bb.224: # %.lr.ph768.preheader + bgeu $a6, $a4, .LBB13_221 +# %bb.222: + blez $a2, .LBB13_230 +# %bb.223: # %.lr.ph768.preheader bstrpick.d $a1, $a1, 31, 0 sub.d $s0, $a0, $a1 ori $a1, $zero, 48 move $a0, $s0 - move $s8, $t0 - move $s2, $t1 + move $s8, $t2 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - move $t1, $s2 - move $t0, $s8 - beqz $t1, .LBB13_226 -.LBB13_225: + move $t2, $s8 + beqz $s2, .LBB13_225 +.LBB13_224: addi.d $a0, $s0, -1 - st.b $t1, $s0, -1 + st.b $s2, $s0, -1 move $s0, $a0 -.LBB13_226: - beqz $fp, .LBB13_230 -# %bb.227: - ld.d $a0, $sp, 120 # 8-byte Folded Reload +.LBB13_225: + beqz $fp, .LBB13_229 +# %bb.226: + ld.d $a0, $sp, 128 # 8-byte Folded Reload andi $a0, $a0, 255 - beqz $a0, .LBB13_230 + beqz $a0, .LBB13_229 +# %bb.227: + ld.d $a0, $sp, 224 # 8-byte Folded Reload + and $a0, $t2, $a0 + bnez $a0, .LBB13_229 # %bb.228: - ld.d $a0, $sp, 208 # 8-byte Folded Reload - and $a0, $t0, $a0 - bnez $a0, .LBB13_230 -# %bb.229: - ld.bu $a1, $s6, 5 + ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.bu $a1, $a0, 5 pcalau12i $a0, %pc_hi20(aPrefix) addi.d $a2, $a0, %pc_lo12(aPrefix) ld.bu $a3, $s0, 0 ldx.bu $a0, $a2, $a1 - bne $a3, $a0, .LBB13_232 -.LBB13_230: + bne $a3, $a0, .LBB13_231 +.LBB13_229: move $s2, $s0 - ld.d $s6, $sp, 144 # 8-byte Folded Reload - b .LBB13_234 -.LBB13_231: + b .LBB13_233 +.LBB13_230: addi.d $s0, $a0, 1 - bnez $t1, .LBB13_225 - b .LBB13_226 -.LBB13_232: # %.lr.ph774.preheader + bnez $s2, .LBB13_224 + b .LBB13_225 +.LBB13_231: # %.lr.ph774.preheader add.d $a1, $a2, $a1 addi.d $a1, $a1, 1 - ld.d $s6, $sp, 144 # 8-byte Folded Reload .p2align 4, , 16 -.LBB13_233: # %.lr.ph774 +.LBB13_232: # %.lr.ph774 # =>This Inner Loop Header: Depth=1 st.b $a0, $s0, -1 ld.bu $a0, $a1, 0 addi.d $s2, $s0, -1 addi.d $a1, $a1, 1 move $s0, $s2 - bnez $a0, .LBB13_233 -.LBB13_234: # %.loopexit + bnez $a0, .LBB13_232 +.LBB13_233: # %.loopexit move $s0, $zero - addi.d $a0, $sp, 663 + addi.d $a0, $sp, 679 sub.d $t4, $a0, $s2 +.LBB13_234: # %.loopexit639 + ld.d $a1, $sp, 144 # 8-byte Folded Reload .LBB13_235: # %.loopexit639 - ld.d $a1, $sp, 136 # 8-byte Folded Reload -.LBB13_236: # %.loopexit639 - beqz $a1, .LBB13_248 -# %bb.237: + beqz $a1, .LBB13_247 +# %bb.236: move $fp, $zero addi.w $a2, $t4, 0 move $s8, $t4 - blez $a2, .LBB13_239 -.LBB13_238: - move $a0, $s4 + blez $a2, .LBB13_238 +.LBB13_237: + move $a0, $s3 move $a1, $s2 pcaddu18i $ra, %call36(sqlite3StrAccumAppend) jirl $ra, $ra, 0 move $t4, $s8 -.LBB13_239: - bnez $fp, .LBB13_245 -# %bb.240: +.LBB13_238: + bnez $fp, .LBB13_244 +# %bb.239: sub.w $s6, $s6, $t4 - blez $s6, .LBB13_245 -# %bb.241: + blez $s6, .LBB13_244 +# %bb.240: ori $a0, $zero, 29 - bltu $s6, $a0, .LBB13_244 + bltu $s6, $a0, .LBB13_243 .p2align 4, , 16 -.LBB13_242: # %.lr.ph.i551 +.LBB13_241: # %.lr.ph.i551 # =>This Inner Loop Header: Depth=1 pcalau12i $a0, %pc_hi20(appendSpace.zSpaces) addi.d $a1, $a0, %pc_lo12(appendSpace.zSpaces) ori $a2, $zero, 29 - move $a0, $s4 + move $a0, $s3 pcaddu18i $ra, %call36(sqlite3StrAccumAppend) jirl $ra, $ra, 0 addi.w $s6, $s6, -29 - bltu $s5, $s6, .LBB13_242 -# %bb.243: # %._crit_edge.i553 + bltu $s7, $s6, .LBB13_241 +# %bb.242: # %._crit_edge.i553 move $t4, $s8 - beqz $s6, .LBB13_245 -.LBB13_244: # %._crit_edge.thread.i549 + beqz $s6, .LBB13_244 +.LBB13_243: # %._crit_edge.thread.i549 pcalau12i $a0, %pc_hi20(appendSpace.zSpaces) addi.d $a1, $a0, %pc_lo12(appendSpace.zSpaces) - move $a0, $s4 + move $a0, $s3 move $a2, $s6 pcaddu18i $ra, %call36(sqlite3StrAccumAppend) jirl $ra, $ra, 0 move $t4, $s8 -.LBB13_245: # %appendSpace.exit555 - beqz $s0, .LBB13_247 -# %bb.246: # %sqlite3_free.exit +.LBB13_244: # %appendSpace.exit555 + beqz $s0, .LBB13_246 +# %bb.245: # %sqlite3_free.exit ld.w $a1, $s0, -8 pcalau12i $a2, %pc_hi20(mem.5) ld.d $a3, $a2, %pc_lo12(mem.5) @@ -2413,56 +2397,56 @@ vxprintf: # @vxprintf pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 move $t4, $s8 -.LBB13_247: +.LBB13_246: addi.d $a2, $s1, 1 ld.bu $a0, $a2, 0 - bne $a0, $s7, .LBB13_1 + bne $a0, $s5, .LBB13_1 b .LBB13_6 -.LBB13_248: # %.loopexit639.thread +.LBB13_247: # %.loopexit639.thread move $s8, $s6 sub.w $s6, $s6, $t4 ori $fp, $zero, 1 - blez $s6, .LBB13_254 -# %bb.249: - st.d $t4, $sp, 152 # 8-byte Folded Spill + blez $s6, .LBB13_253 +# %bb.248: + st.d $t4, $sp, 160 # 8-byte Folded Spill ori $a0, $zero, 29 - bltu $s6, $a0, .LBB13_252 + bltu $s6, $a0, .LBB13_251 .p2align 4, , 16 -.LBB13_250: # %.lr.ph.i +.LBB13_249: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 pcalau12i $a0, %pc_hi20(appendSpace.zSpaces) addi.d $a1, $a0, %pc_lo12(appendSpace.zSpaces) ori $a2, $zero, 29 - move $a0, $s4 + move $a0, $s3 pcaddu18i $ra, %call36(sqlite3StrAccumAppend) jirl $ra, $ra, 0 addi.w $s6, $s6, -29 - bltu $s5, $s6, .LBB13_250 -# %bb.251: # %._crit_edge.i - beqz $s6, .LBB13_253 -.LBB13_252: # %._crit_edge.thread.i + bltu $s7, $s6, .LBB13_249 +# %bb.250: # %._crit_edge.i + beqz $s6, .LBB13_252 +.LBB13_251: # %._crit_edge.thread.i pcalau12i $a0, %pc_hi20(appendSpace.zSpaces) addi.d $a1, $a0, %pc_lo12(appendSpace.zSpaces) - move $a0, $s4 + move $a0, $s3 move $a2, $s6 pcaddu18i $ra, %call36(sqlite3StrAccumAppend) jirl $ra, $ra, 0 +.LBB13_252: # %appendSpace.exit + ld.d $t4, $sp, 160 # 8-byte Folded Reload .LBB13_253: # %appendSpace.exit - ld.d $t4, $sp, 152 # 8-byte Folded Reload -.LBB13_254: # %appendSpace.exit move $s6, $s8 addi.w $a2, $t4, 0 move $s8, $t4 - bgtz $a2, .LBB13_238 - b .LBB13_239 -.LBB13_255: # %vector.ph + bgtz $a2, .LBB13_237 + b .LBB13_238 +.LBB13_254: # %vector.ph move $a5, $a2 bstrins.d $a5, $zero, 3, 0 - addi.d $a4, $sp, 299 + addi.d $a4, $sp, 315 add.d $a4, $a4, $a3 move $a6, $a5 move $a7, $t4 -.LBB13_256: # %vector.body +.LBB13_255: # %vector.body # =>This Inner Loop Header: Depth=1 add.d $t0, $t1, $a7 vld $vr0, $t0, -15 @@ -2470,43 +2454,43 @@ vxprintf: # @vxprintf addi.w $a7, $a7, -16 addi.d $a6, $a6, -16 addi.d $a4, $a4, -16 - bnez $a6, .LBB13_256 -# %bb.257: # %middle.block + bnez $a6, .LBB13_255 +# %bb.256: # %middle.block beq $a2, $a5, .LBB13_201 -# %bb.258: # %vec.epilog.iter.check +# %bb.257: # %vec.epilog.iter.check andi $a4, $a2, 8 - bnez $a4, .LBB13_211 -# %bb.259: + bnez $a4, .LBB13_210 +# %bb.258: sub.d $a4, $a3, $a5 b .LBB13_199 -.LBB13_260: +.LBB13_259: pcalau12i $a0, %pc_hi20(.L.str.17) addi.d $s2, $a0, %pc_lo12(.L.str.17) move $s0, $zero ori $t4, $zero, 3 - b .LBB13_146 -.LBB13_261: + b .LBB13_153 +.LBB13_260: pcalau12i $a0, %pc_hi20(.L.str.16) addi.d $a1, $a0, %pc_lo12(.L.str.16) ori $a2, $zero, 1 - ld.d $a0, $sp, 296 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload pcaddu18i $ra, %call36(sqlite3StrAccumAppend) jirl $ra, $ra, 0 -.LBB13_262: # %.thread602 - fld.d $fs1, $sp, 664 # 8-byte Folded Reload - fld.d $fs0, $sp, 672 # 8-byte Folded Reload - ld.d $s8, $sp, 680 # 8-byte Folded Reload - ld.d $s7, $sp, 688 # 8-byte Folded Reload - ld.d $s6, $sp, 696 # 8-byte Folded Reload - ld.d $s5, $sp, 704 # 8-byte Folded Reload - ld.d $s4, $sp, 712 # 8-byte Folded Reload - ld.d $s3, $sp, 720 # 8-byte Folded Reload - ld.d $s2, $sp, 728 # 8-byte Folded Reload - ld.d $s1, $sp, 736 # 8-byte Folded Reload - ld.d $s0, $sp, 744 # 8-byte Folded Reload - ld.d $fp, $sp, 752 # 8-byte Folded Reload - ld.d $ra, $sp, 760 # 8-byte Folded Reload - addi.d $sp, $sp, 768 +.LBB13_261: # %.thread602 + fld.d $fs1, $sp, 680 # 8-byte Folded Reload + fld.d $fs0, $sp, 688 # 8-byte Folded Reload + ld.d $s8, $sp, 696 # 8-byte Folded Reload + ld.d $s7, $sp, 704 # 8-byte Folded Reload + ld.d $s6, $sp, 712 # 8-byte Folded Reload + ld.d $s5, $sp, 720 # 8-byte Folded Reload + ld.d $s4, $sp, 728 # 8-byte Folded Reload + ld.d $s3, $sp, 736 # 8-byte Folded Reload + ld.d $s2, $sp, 744 # 8-byte Folded Reload + ld.d $s1, $sp, 752 # 8-byte Folded Reload + ld.d $s0, $sp, 760 # 8-byte Folded Reload + ld.d $fp, $sp, 768 # 8-byte Folded Reload + ld.d $ra, $sp, 776 # 8-byte Folded Reload + addi.d $sp, $sp, 784 ret .Lfunc_end13: .size vxprintf, .Lfunc_end13-vxprintf @@ -2532,90 +2516,90 @@ vxprintf: # @vxprintf .word .LBB13_16-.LJTI13_0 .LJTI13_1: .word .LBB13_36-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 .word .LBB13_42-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 .word .LBB13_41-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 .word .LBB13_43-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 .word .LBB13_47-.LJTI13_1 .word .LBB13_49-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 .word .LBB13_53-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 .word .LBB13_44-.LJTI13_1 .word .LBB13_60-.LJTI13_1 .word .LBB13_51-.LJTI13_1 .word .LBB13_57-.LJTI13_1 .word .LBB13_45-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 .word .LBB13_52-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 .word .LBB13_46-.LJTI13_1 .word .LBB13_58-.LJTI13_1 .word .LBB13_54-.LJTI13_1 .word .LBB13_59-.LJTI13_1 .word .LBB13_48-.LJTI13_1 .word .LBB13_38-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 .word .LBB13_56-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 .word .LBB13_40-.LJTI13_1 .word .LBB13_39-.LJTI13_1 - .word .LBB13_262-.LJTI13_1 + .word .LBB13_261-.LJTI13_1 .word .LBB13_55-.LJTI13_1 .LJTI13_2: .word .LBB13_67-.LJTI13_2 @@ -2631,7 +2615,7 @@ vxprintf: # @vxprintf .word .LBB13_64-.LJTI13_2 .word .LBB13_64-.LJTI13_2 .word .LBB13_93-.LJTI13_2 - .word .LBB13_104-.LJTI13_2 + .word .LBB13_103-.LJTI13_2 .word .LBB13_91-.LJTI13_2 .word .LBB13_64-.LJTI13_2 .word .LBB13_67-.LJTI13_2 @@ -4338,12 +4322,7 @@ sqlite3_value_int: # @sqlite3_value_int .Lfunc_end33: .size sqlite3_value_int, .Lfunc_end33-sqlite3_value_int # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function sqlite3VdbeIntValue -.LCPI34_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 - .text - .p2align 5 + .p2align 5 # -- Begin function sqlite3VdbeIntValue .type sqlite3VdbeIntValue,@function sqlite3VdbeIntValue: # @sqlite3VdbeIntValue # %bb.0: @@ -4378,10 +4357,10 @@ sqlite3VdbeIntValue: # @sqlite3VdbeIntValue ret .LBB34_6: fld.d $fa0, $a0, 8 - pcalau12i $a0, %pc_hi20(.LCPI34_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI34_0) - fabs.d $fa2, $fa0 - fcmp.clt.d $fcc0, $fa1, $fa2 + fabs.d $fa1, $fa0 + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fa2, $a0 + fcmp.clt.d $fcc0, $fa2, $fa1 ftintrz.l.d $fa0, $fa0 movfr2gr.d $a0, $fa0 movcf2gr $a1, $fcc0 @@ -5422,21 +5401,12 @@ sqlite3_result_error_nomem: # @sqlite3_result_error_nomem .Lfunc_end58: .size sqlite3_result_error_nomem, .Lfunc_end58-sqlite3_result_error_nomem # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function sqlite3_step -.LCPI59_0: - .dword 0x40ac200000000000 # double 3600 -.LCPI59_1: - .dword 0x41cdcd6500000000 # double 1.0E+9 -.LCPI59_2: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 - .text - .globl sqlite3_step + .globl sqlite3_step # -- Begin function sqlite3_step .p2align 5 .type sqlite3_step,@function sqlite3_step: # @sqlite3_step # %bb.0: - beqz $a0, .LBB59_155 + beqz $a0, .LBB59_153 # %bb.1: addi.d $sp, $sp, -736 st.d $ra, $sp, 728 # 8-byte Folded Spill @@ -5452,28 +5422,32 @@ sqlite3_step: # @sqlite3_step st.d $s8, $sp, 648 # 8-byte Folded Spill fst.d $fs0, $sp, 640 # 8-byte Folded Spill fst.d $fs1, $sp, 632 # 8-byte Folded Spill + fst.d $fs2, $sp, 624 # 8-byte Folded Spill move $fp, $a0 ld.w $a0, $a0, 116 ld.d $a1, $fp, 0 st.d $a1, $sp, 104 # 8-byte Folded Spill lu12i.w $a1, -270560 ori $s7, $a1, 3491 - ori $s4, $zero, 21 - bne $a0, $s7, .LBB59_166 + ori $s3, $zero, 21 + bne $a0, $s7, .LBB59_164 # %bb.2: # %.lr.ph move $s0, $zero addi.d $s6, $fp, 320 - pcalau12i $a0, %pc_hi20(.LCPI59_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI59_0) - pcalau12i $a0, %pc_hi20(.LCPI59_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI59_1) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -253952 + lu52i.d $a1, $a1, 1034 + movgr2fr.d $fs0, $a1 + lu32i.d $a0, -144027 + lu52i.d $a0, $a0, 1052 + movgr2fr.d $fs1, $a0 vrepli.b $vr0, 0 vst $vr0, $sp, 48 # 16-byte Folded Spill - ori $s3, $zero, 5 + ori $s4, $zero, 5 + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs2, $a0 ori $s1, $zero, 2 - ori $a0, $zero, 0 - lu32i.d $a0, 1 - st.d $a0, $sp, 128 # 8-byte Folded Spill lu12i.w $a0, 444054 ori $a0, $a0, 3628 st.d $a0, $sp, 40 # 8-byte Folded Spill @@ -5492,13 +5466,14 @@ sqlite3_step: # @sqlite3_step ori $a0, $a0, 1687 st.d $a0, $sp, 88 # 8-byte Folded Spill st.d $s5, $sp, 112 # 8-byte Folded Spill + st.d $s6, $sp, 80 # 8-byte Folded Spill .LBB59_3: # =>This Loop Header: Depth=1 # Child Loop BB59_10 Depth 2 # Child Loop BB59_14 Depth 2 - # Child Loop BB59_118 Depth 2 + # Child Loop BB59_119 Depth 2 # Child Loop BB59_98 Depth 2 ld.bu $a0, $fp, 338 - bnez $a0, .LBB59_165 + bnez $a0, .LBB59_163 # %bb.4: # in Loop: Header=BB59_3 Depth=1 ori $s8, $zero, 1 ld.w $a1, $fp, 184 @@ -5509,27 +5484,26 @@ sqlite3_step: # @sqlite3_step ld.bu $a0, $fp, 336 beqz $a0, .LBB59_23 # %bb.6: # in Loop: Header=BB59_3 Depth=1 - ld.d $s4, $fp, 128 + ld.d $s3, $fp, 128 ld.w $a2, $fp, 116 - ld.d $s3, $fp, 0 - addi.d $s5, $s4, 48 - st.d $s5, $fp, 328 + ld.d $s5, $fp, 0 + addi.d $s4, $s3, 48 + st.d $s4, $fp, 328 ori $a0, $zero, 21 bne $a2, $s7, .LBB59_64 # %bb.7: # in Loop: Header=BB59_3 Depth=1 ld.w $a0, $fp, 120 beqz $a0, .LBB59_13 # %bb.8: # in Loop: Header=BB59_3 Depth=1 - st.d $s0, $sp, 80 # 8-byte Folded Spill + st.d $s0, $sp, 72 # 8-byte Folded Spill st.d $s7, $sp, 64 # 8-byte Folded Spill - st.d $s6, $sp, 72 # 8-byte Folded Spill - ld.d $s0, $s4, 64 + ld.d $s0, $s3, 64 ld.bu $s6, $s0, 42 blez $a0, .LBB59_12 # %bb.9: # %.lr.ph.i.i.preheader # in Loop: Header=BB59_3 Depth=1 addi.d $s8, $a0, 1 - move $s1, $s5 + move $s1, $s4 ori $s7, $zero, 1 .p2align 4, , 16 .LBB59_10: # %.lr.ph.i.i @@ -5551,9 +5525,9 @@ sqlite3_step: # @sqlite3_step .LBB59_12: # %._crit_edge.i.i # in Loop: Header=BB59_3 Depth=1 st.b $s6, $s0, 42 - ld.d $s6, $sp, 72 # 8-byte Folded Reload + ld.d $s6, $sp, 80 # 8-byte Folded Reload ld.d $s7, $sp, 64 # 8-byte Folded Reload - ld.d $s0, $sp, 80 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload .LBB59_13: # %releaseMemArray.exit.i # in Loop: Header=BB59_3 Depth=1 ld.w $a0, $fp, 24 @@ -5579,7 +5553,8 @@ sqlite3_step: # @sqlite3_step # %bb.16: # in Loop: Header=BB59_14 Depth=2 ld.d $a6, $fp, 32 ldx.bu $t0, $a6, $a3 - ld.d $a6, $sp, 128 # 8-byte Folded Reload + ori $a6, $zero, 0 + lu32i.d $a6, 1 add.d $a6, $a0, $a6 addi.d $a3, $a3, 24 addi.d $a4, $a4, -1 @@ -5588,7 +5563,7 @@ sqlite3_step: # @sqlite3_step bne $t0, $t1, .LBB59_14 .LBB59_17: # in Loop: Header=BB59_3 Depth=1 st.w $a5, $fp, 184 - ld.w $a2, $s3, 264 + ld.w $a2, $s5, 264 beqz $a2, .LBB59_41 # %bb.18: # in Loop: Header=BB59_3 Depth=1 ori $a0, $zero, 9 @@ -5604,7 +5579,7 @@ sqlite3_step: # @sqlite3_step bnez $a1, .LBB59_65 .p2align 4, , 16 .LBB59_19: # in Loop: Header=BB59_3 Depth=1 - move $s4, $a0 + move $s3, $a0 b .LBB59_24 .p2align 4, , 16 .LBB59_20: # in Loop: Header=BB59_3 Depth=1 @@ -5612,7 +5587,7 @@ sqlite3_step: # @sqlite3_step beqz $a0, .LBB59_36 # %bb.21: # in Loop: Header=BB59_3 Depth=1 ld.w $s1, $fp, 188 - ori $s4, $zero, 1 + ori $s3, $zero, 1 bnez $s1, .LBB59_70 # %bb.22: # in Loop: Header=BB59_3 Depth=1 ori $s1, $zero, 17 @@ -5623,8 +5598,8 @@ sqlite3_step: # @sqlite3_step move $a0, $fp pcaddu18i $ra, %call36(sqlite3VdbeExec) jirl $ra, $ra, 0 - ori $s4, $zero, 100 - bne $a0, $s4, .LBB59_64 + ori $s3, $zero, 100 + bne $a0, $s3, .LBB59_64 .p2align 4, , 16 .LBB59_24: # in Loop: Header=BB59_3 Depth=1 ld.d $s1, $fp, 0 @@ -5637,11 +5612,11 @@ sqlite3_step: # @sqlite3_step # in Loop: Header=BB59_3 Depth=1 ld.h $a1, $a0, 36 bstrins.d $a1, $s8, 4, 0 - ori $s3, $zero, 5 + ori $s4, $zero, 5 .LBB59_27: # in Loop: Header=BB59_3 Depth=1 - st.w $s4, $s1, 32 + st.w $s3, $s1, 32 st.h $a1, $a0, 36 - st.b $s3, $a0, 38 + st.b $s4, $a0, 38 ld.d $s2, $fp, 0 ld.w $a0, $fp, 188 beqz $s2, .LBB59_35 @@ -5659,7 +5634,7 @@ sqlite3_step: # @sqlite3_step ori $a2, $zero, 1 bstrins.d $a2, $a1, 15, 5 st.h $a2, $a0, 36 - st.b $s3, $a0, 38 + st.b $s4, $a0, 38 .LBB59_31: # %sqlite3Error.exit.i36 # in Loop: Header=BB59_3 Depth=1 st.b $zero, $s2, 42 @@ -5669,7 +5644,7 @@ sqlite3_step: # @sqlite3_step b .LBB59_68 .LBB59_33: # in Loop: Header=BB59_3 Depth=1 ld.bu $a0, $s1, 42 - ori $s3, $zero, 5 + ori $s4, $zero, 5 beqz $a0, .LBB59_39 # %bb.34: # %sqlite3ValueNew.exit.thread.i42 # in Loop: Header=BB59_3 Depth=1 @@ -5707,7 +5682,7 @@ sqlite3_step: # @sqlite3_step vst $vr0, $a0, 0 ori $a1, $zero, 1 st.h $a1, $a0, 36 - st.b $s3, $a0, 38 + st.b $s4, $a0, 38 st.d $s1, $a0, 16 st.d $a0, $s1, 240 b .LBB59_27 @@ -5716,47 +5691,48 @@ sqlite3_step: # @sqlite3_step srai.d $a3, $a0, 29 srai.d $a4, $a0, 28 add.d $a3, $a4, $a3 - add.d $s8, $a2, $a3 + add.d $s6, $a2, $a3 ori $s1, $zero, 1 - ori $s3, $zero, 4 + ori $s5, $zero, 4 bne $a1, $s1, .LBB59_43 # %bb.42: # in Loop: Header=BB59_3 Depth=1 srai.d $a0, $a0, 32 - st.h $s3, $s4, 84 - st.b $s1, $s4, 86 - st.d $a0, $s4, 48 + st.h $s5, $s3, 84 + st.b $s1, $s3, 86 + st.d $a0, $s3, 48 ori $a0, $zero, 162 - st.h $a0, $s4, 132 - ld.bu $a0, $s8, 0 + st.h $a0, $s3, 132 + ld.bu $a0, $s6, 0 slli.d $a0, $a0, 3 pcalau12i $a1, %pc_hi20(sqlite3OpcodeName.azName) addi.d $a1, $a1, %pc_lo12(sqlite3OpcodeName.azName) ldx.d $a0, $a1, $a0 - st.d $a0, $s4, 120 + st.d $a0, $s3, 120 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - st.w $a0, $s4, 128 + st.w $a0, $s3, 128 ori $a0, $zero, 259 - st.h $a0, $s4, 134 - addi.d $s5, $s4, 144 + st.h $a0, $s3, 134 + addi.d $s4, $s3, 144 .LBB59_43: # in Loop: Header=BB59_3 Depth=1 - st.h $s3, $s5, 36 - ld.w $a0, $s8, 4 - st.d $a0, $s5, 0 - st.b $s1, $s5, 38 - st.h $s3, $s5, 84 - ld.w $a0, $s8, 8 - st.d $a0, $s5, 48 - st.b $s1, $s5, 86 + st.h $s5, $s4, 36 + ld.w $a0, $s6, 4 + st.d $a0, $s4, 0 + st.b $s1, $s4, 38 + st.h $s5, $s4, 84 + ld.w $a0, $s6, 8 + st.d $a0, $s4, 48 + st.b $s1, $s4, 86 ld.bu $a0, $fp, 336 ori $a1, $zero, 1 bne $a0, $a1, .LBB59_51 # %bb.44: # in Loop: Header=BB59_3 Depth=1 - st.h $s3, $s5, 132 - ld.w $a0, $s8, 12 - st.d $a0, $s5, 96 - st.b $a1, $s5, 134 - addi.d $s4, $s5, 144 + st.h $s5, $s4, 132 + ld.w $a0, $s6, 12 + st.d $a0, $s4, 96 + st.b $a1, $s4, 134 + addi.d $s4, $s4, 144 + ori $s1, $zero, 5 ld.hu $a0, $s4, 36 andi $a0, $a0, 64 bnez $a0, .LBB59_52 @@ -5775,9 +5751,9 @@ sqlite3_step: # @sqlite3_step # %bb.49: # in Loop: Header=BB59_3 Depth=1 ld.d $a0, $s2, 0 ld.d $a2, $a0, 128 - addi.d $a1, $sp, 144 + addi.d $a1, $sp, 136 jirl $ra, $a2, 0 - fld.d $fa0, $sp, 144 + fld.d $fa0, $sp, 136 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a0, $fa1 movgr2fr.w $fa1, $a0 @@ -5797,7 +5773,8 @@ sqlite3_step: # @sqlite3_step st.w $zero, $fp, 184 b .LBB59_5 .LBB59_51: # in Loop: Header=BB59_3 Depth=1 - addi.d $s4, $s5, 96 + addi.d $s4, $s4, 96 + ori $s1, $zero, 5 ld.hu $a0, $s4, 36 andi $a0, $a0, 64 beqz $a0, .LBB59_53 @@ -5843,14 +5820,14 @@ sqlite3_step: # @sqlite3_step st.d $s5, $s4, 24 st.d $zero, $s4, 40 move $s0, $s1 + ori $s1, $zero, 5 .LBB59_60: # %sqlite3VdbeMemGrow.exit.i # in Loop: Header=BB59_3 Depth=1 ori $a0, $zero, 98 st.h $a0, $s4, 36 - ld.bu $a0, $s8, 1 + ld.bu $a0, $s6, 1 addi.d $a0, $a0, -242 ori $a1, $zero, 10 - st.d $s4, $sp, 64 # 8-byte Folded Spill bltu $a1, $a0, .LBB59_112 # %bb.61: # %sqlite3VdbeMemGrow.exit.i # in Loop: Header=BB59_3 Depth=1 @@ -5861,16 +5838,17 @@ sqlite3_step: # @sqlite3_step add.d $a0, $a1, $a0 jr $a0 .LBB59_62: # in Loop: Header=BB59_3 Depth=1 - ld.w $a3, $s8, 16 + ld.w $a3, $s6, 16 pcalau12i $a0, %pc_hi20(.L.str.189) addi.d $a2, $a0, %pc_lo12(.L.str.189) - b .LBB59_130 + b .LBB59_132 .LBB59_63: # in Loop: Header=BB59_3 Depth=1 ld.d $a0, $fp, 0 ori $s8, $zero, 1 st.b $a1, $a0, 42 ori $a0, $zero, 7 move $s0, $s1 + ld.d $s6, $sp, 80 # 8-byte Folded Reload .p2align 4, , 16 .LBB59_64: # %sqlite3VdbeList.exit.thread # in Loop: Header=BB59_3 Depth=1 @@ -5880,7 +5858,7 @@ sqlite3_step: # @sqlite3_step ld.bu $a1, $s2, 104 beqz $a1, .LBB59_76 .LBB59_66: # in Loop: Header=BB59_3 Depth=1 - move $s4, $a0 + move $s3, $a0 ld.d $s1, $fp, 0 bnez $s1, .LBB59_25 .LBB59_67: # %sqlite3Error.exit44.thread @@ -5888,7 +5866,7 @@ sqlite3_step: # @sqlite3_step ld.w $a0, $fp, 188 move $s2, $zero ori $a1, $zero, 255 - ori $s3, $zero, 5 + ori $s4, $zero, 5 ld.d $s5, $sp, 112 # 8-byte Folded Reload .LBB59_68: # %sqlite3ApiExit.exit39 # in Loop: Header=BB59_3 Depth=1 @@ -5900,7 +5878,7 @@ sqlite3_step: # @sqlite3_step ld.d $a0, $fp, 472 beqz $a0, .LBB59_83 # %bb.71: # in Loop: Header=BB59_3 Depth=1 - andi $a0, $s4, 252 + andi $a0, $s3, 252 ori $a1, $zero, 99 bltu $a1, $a0, .LBB59_83 # %bb.72: # in Loop: Header=BB59_3 Depth=1 @@ -5915,7 +5893,7 @@ sqlite3_step: # @sqlite3_step .LBB59_75: # in Loop: Header=BB59_3 Depth=1 st.w $s1, $s2, 32 st.h $a1, $a0, 36 - st.b $s3, $a0, 38 + st.b $s4, $a0, 38 b .LBB59_82 .LBB59_76: # in Loop: Header=BB59_3 Depth=1 ld.w $a1, $fp, 24 @@ -5931,31 +5909,29 @@ sqlite3_step: # @sqlite3_step # %bb.79: # in Loop: Header=BB59_3 Depth=1 ld.d $a2, $s2, 0 ld.d $a3, $a2, 128 - addi.d $a1, $sp, 144 - move $s4, $a0 + addi.d $a1, $sp, 136 + move $s3, $a0 move $a0, $a2 jirl $ra, $a3, 0 - fld.d $fa0, $sp, 144 + fld.d $fa0, $sp, 136 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a0, $fa1 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 + ld.d $a0, $fp, 352 fsub.d $fa0, $fa0, $fa1 fmul.d $fa0, $fa0, $fs0 - ld.d $a0, $fp, 352 vldi $vr1, -968 fmul.d $fa0, $fa0, $fa1 - pcalau12i $a1, %pc_hi20(.LCPI59_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI59_2) - movgr2fr.d $fa2, $a0 - ffint.d.l $fa2, $fa2 - fmsub.d $fa0, $fa0, $fs1, $fa2 - fcmp.clt.d $fcc0, $fa0, $fa1 - ftintrz.l.d $fa2, $fa0 - movfr2gr.d $a0, $fa2 + movgr2fr.d $fa1, $a0 + ffint.d.l $fa1, $fa1 + fmsub.d $fa0, $fa0, $fs1, $fa1 + fcmp.clt.d $fcc0, $fa0, $fs2 + ftintrz.l.d $fa1, $fa0 + movfr2gr.d $a0, $fa1 movcf2gr $a2, $fcc0 maskeqz $a3, $a0, $a2 - fsub.d $fa0, $fa0, $fa1 + fsub.d $fa0, $fa0, $fs2 ftintrz.l.d $fa0, $fa0 ld.d $a1, $fp, 32 movfr2gr.d $a4, $fa0 @@ -5979,16 +5955,16 @@ sqlite3_step: # @sqlite3_step .p2align 4, , 16 .LBB59_82: # %sqlite3Error.exit # in Loop: Header=BB59_3 Depth=1 - ld.w $s4, $fp, 188 + ld.w $s3, $fp, 188 .LBB59_83: # %sqlite3Step.exit # in Loop: Header=BB59_3 Depth=1 ori $a0, $zero, 17 - bne $s4, $a0, .LBB59_166 + bne $s3, $a0, .LBB59_164 # %bb.84: # in Loop: Header=BB59_3 Depth=1 - beq $s0, $s3, .LBB59_158 + beq $s0, $s4, .LBB59_156 # %bb.85: # in Loop: Header=BB59_3 Depth=1 ld.d $s1, $fp, 0 - beqz $s1, .LBB59_158 + beqz $s1, .LBB59_156 # %bb.86: # %sqlite3SafetyCheckOk.exit.i # in Loop: Header=BB59_3 Depth=1 ld.w $a0, $s1, 72 @@ -5998,20 +5974,20 @@ sqlite3_step: # @sqlite3_step # %bb.87: # %sqlite3SafetyCheckOk.exit.i # in Loop: Header=BB59_3 Depth=1 ld.d $a2, $sp, 88 # 8-byte Folded Reload - bne $a0, $a2, .LBB59_158 + bne $a0, $a2, .LBB59_156 .LBB59_88: # %sqlite3LockAndPrepare.exit # in Loop: Header=BB59_3 Depth=1 addi.w $a2, $zero, -1 - addi.d $a4, $sp, 136 + addi.d $a4, $sp, 128 move $a0, $s1 move $a3, $zero move $a5, $zero pcaddu18i $ra, %call36(sqlite3Prepare) jirl $ra, $ra, 0 - bnez $a0, .LBB59_156 + bnez $a0, .LBB59_154 # %bb.89: # in Loop: Header=BB59_3 Depth=1 - ld.d $s1, $sp, 136 - addi.d $a0, $sp, 144 + ld.d $s1, $sp, 128 + addi.d $a0, $sp, 136 ori $a2, $zero, 488 move $a1, $s1 pcaddu18i $ra, %call36(memcpy) @@ -6021,7 +5997,7 @@ sqlite3_step: # @sqlite3_step move $a1, $fp pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 - addi.d $a1, $sp, 144 + addi.d $a1, $sp, 136 ori $a2, $zero, 488 move $a0, $fp pcaddu18i $ra, %call36(memcpy) @@ -6098,7 +6074,7 @@ sqlite3_step: # @sqlite3_step # in Loop: Header=BB59_3 Depth=1 ld.w $a0, $s1, 116 move $s0, $s5 - ori $s3, $zero, 5 + ori $s4, $zero, 5 ld.d $s5, $sp, 112 # 8-byte Folded Reload .p2align 4, , 16 .LBB59_101: # %sqlite3_transfer_bindings.exit @@ -6137,7 +6113,7 @@ sqlite3_step: # @sqlite3_step st.b $zero, $fp, 339 ori $s1, $zero, 2 beq $a0, $s7, .LBB59_3 - b .LBB59_172 + b .LBB59_170 .LBB59_107: # in Loop: Header=BB59_3 Depth=1 ori $a0, $zero, 48 pcaddu18i $ra, %call36(sqlite3_malloc) @@ -6151,7 +6127,7 @@ sqlite3_step: # @sqlite3_step vst $vr0, $a0, 0 ori $a1, $zero, 1 st.h $a1, $a0, 36 - st.b $s3, $a0, 38 + st.b $s4, $a0, 38 st.d $s2, $a0, 16 st.d $a0, $s2, 240 b .LBB59_75 @@ -6168,66 +6144,64 @@ sqlite3_step: # @sqlite3_step bge $a2, $a0, .LBB59_53 b .LBB59_60 .LBB59_112: # in Loop: Header=BB59_3 Depth=1 - ld.d $s3, $s8, 16 - ori $s1, $zero, 2 - bnez $s3, .LBB59_134 + ld.d $s3, $s6, 16 + bnez $s3, .LBB59_136 # %bb.113: # in Loop: Header=BB59_3 Depth=1 st.b $zero, $s5, 0 - b .LBB59_152 + b .LBB59_135 .LBB59_114: # in Loop: Header=BB59_3 Depth=1 - ld.d $s4, $s8, 16 - ld.w $a3, $s4, 12 + ld.d $s8, $s6, 16 + ld.w $a3, $s8, 12 pcalau12i $a0, %pc_hi20(.L.str.184) addi.d $a2, $a0, %pc_lo12(.L.str.184) ori $a0, $zero, 32 move $a1, $s5 pcaddu18i $ra, %call36(sqlite3_snprintf) jirl $ra, $ra, 0 - st.d $s5, $sp, 24 # 8-byte Folded Spill + st.d $s5, $sp, 64 # 8-byte Folded Spill move $a0, $s5 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - ld.w $a1, $s4, 12 + ld.w $a1, $s8, 12 move $a3, $a0 - ori $s1, $zero, 2 - blez $a1, .LBB59_154 + blez $a1, .LBB59_152 # %bb.115: # %.lr.ph.i100.i # in Loop: Header=BB59_3 Depth=1 - move $s3, $zero - addi.d $s5, $s4, 24 - st.d $s0, $sp, 80 # 8-byte Folded Spill - st.d $s6, $sp, 72 # 8-byte Folded Spill - st.d $s8, $sp, 16 # 8-byte Folded Spill - b .LBB59_118 -.LBB59_116: # in Loop: Header=BB59_118 Depth=2 - ld.d $a2, $sp, 24 # 8-byte Folded Reload + move $s5, $zero + addi.d $s1, $s8, 24 + st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $s6, $sp, 24 # 8-byte Folded Spill + st.d $s4, $sp, 16 # 8-byte Folded Spill + b .LBB59_119 +.LBB59_116: # in Loop: Header=BB59_119 Depth=2 + ld.d $a2, $sp, 64 # 8-byte Folded Reload .LBB59_117: # %select.unfold.i.i - # in Loop: Header=BB59_118 Depth=2 + # in Loop: Header=BB59_119 Depth=2 add.d $a0, $a2, $s0 - ld.d $a1, $s1, 0 + ld.d $a1, $s3, 0 slli.d $a2, $s4, 32 - ld.d $a3, $sp, 128 # 8-byte Folded Reload + ori $a3, $zero, 0 + lu32i.d $a3, 1 add.d $a2, $a2, $a3 srai.d $a2, $a2, 32 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 add.d $a3, $s0, $s4 - ld.d $s0, $sp, 80 # 8-byte Folded Reload - ld.d $s6, $sp, 72 # 8-byte Folded Reload - ori $s1, $zero, 2 - move $s4, $s8 - ld.d $s8, $sp, 16 # 8-byte Folded Reload - ld.w $a0, $s4, 12 - addi.d $s3, $s3, 1 - addi.d $s5, $s5, 8 - bge $s3, $a0, .LBB59_154 -.LBB59_118: # Parent Loop BB59_3 Depth=1 + ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $s4, $sp, 16 # 8-byte Folded Reload + ld.d $s6, $sp, 24 # 8-byte Folded Reload +.LBB59_118: # %.thread.i.i + # in Loop: Header=BB59_119 Depth=2 + ld.w $a0, $s8, 12 + addi.d $s5, $s5, 1 + addi.d $s1, $s1, 8 + bge $s5, $a0, .LBB59_152 +.LBB59_119: # Parent Loop BB59_3 Depth=1 # => This Inner Loop Header: Depth=2 - ld.d $s1, $s5, 0 - beqz $s1, .LBB59_123 -# %bb.119: # in Loop: Header=BB59_118 Depth=2 - move $s8, $s4 - ld.d $a0, $s1, 0 + ld.d $s3, $s1, 0 + beqz $s3, .LBB59_124 +# %bb.120: # in Loop: Header=BB59_119 Depth=2 + ld.d $a0, $s3, 0 move $s4, $a3 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 @@ -6236,154 +6210,149 @@ sqlite3_step: # @sqlite3_step add.w $a1, $a3, $a0 addi.w $a0, $a3, 0 ori $a2, $zero, 26 - blt $a2, $a1, .LBB59_153 -# %bb.120: # in Loop: Header=BB59_118 Depth=2 + blt $a2, $a1, .LBB59_151 +# %bb.121: # in Loop: Header=BB59_119 Depth=2 ori $a1, $zero, 44 - ld.d $a2, $sp, 24 # 8-byte Folded Reload + ld.d $a2, $sp, 64 # 8-byte Folded Reload stx.b $a1, $a2, $a0 ld.d $a0, $s8, 16 addi.w $s0, $a3, 1 beqz $a0, .LBB59_116 -# %bb.121: # in Loop: Header=BB59_118 Depth=2 - ldx.bu $a0, $a0, $s3 +# %bb.122: # in Loop: Header=BB59_119 Depth=2 + ldx.bu $a0, $a0, $s5 beqz $a0, .LBB59_116 -# %bb.122: # in Loop: Header=BB59_118 Depth=2 +# %bb.123: # in Loop: Header=BB59_119 Depth=2 addi.w $a0, $a3, 2 ori $a1, $zero, 45 - ld.d $a2, $sp, 24 # 8-byte Folded Reload + ld.d $a2, $sp, 64 # 8-byte Folded Reload stx.b $a1, $a2, $s0 move $s0, $a0 b .LBB59_117 -.LBB59_123: # in Loop: Header=BB59_118 Depth=2 +.LBB59_124: # in Loop: Header=BB59_119 Depth=2 addi.w $a0, $a3, 0 ori $a1, $zero, 21 - blt $a1, $a0, .LBB59_125 -# %bb.124: # in Loop: Header=BB59_118 Depth=2 + blt $a1, $a0, .LBB59_118 +# %bb.125: # in Loop: Header=BB59_119 Depth=2 addi.d $a3, $a3, 4 ld.d $a1, $sp, 40 # 8-byte Folded Reload - ld.d $a2, $sp, 24 # 8-byte Folded Reload + ld.d $a2, $sp, 64 # 8-byte Folded Reload stx.w $a1, $a2, $a0 -.LBB59_125: # %.thread.i.i - # in Loop: Header=BB59_118 Depth=2 - ori $s1, $zero, 2 - ld.w $a0, $s4, 12 - addi.d $s3, $s3, 1 - addi.d $s5, $s5, 8 - blt $s3, $a0, .LBB59_118 - b .LBB59_154 + b .LBB59_118 .LBB59_126: # in Loop: Header=BB59_3 Depth=1 - ld.d $a0, $s8, 16 + ld.d $a0, $s6, 16 +.LBB59_127: # in Loop: Header=BB59_3 Depth=1 ld.d $a3, $a0, 0 pcalau12i $a0, %pc_hi20(.L.str.38) addi.d $a2, $a0, %pc_lo12(.L.str.38) - b .LBB59_130 -.LBB59_127: # in Loop: Header=BB59_3 Depth=1 - ld.d $a0, $s8, 16 + b .LBB59_132 +.LBB59_128: # in Loop: Header=BB59_3 Depth=1 + ld.d $a0, $s6, 16 ld.h $a4, $a0, 0 addi.d $a3, $a0, 48 pcalau12i $a0, %pc_hi20(.L.str.188) addi.d $a2, $a0, %pc_lo12(.L.str.188) - b .LBB59_132 -.LBB59_128: # in Loop: Header=BB59_3 Depth=1 - ld.d $a0, $s8, 16 + b .LBB59_134 +.LBB59_129: # in Loop: Header=BB59_3 Depth=1 + ld.d $a0, $s6, 16 ld.d $a3, $a0, 0 pcalau12i $a0, %pc_hi20(.L.str.187) addi.d $a2, $a0, %pc_lo12(.L.str.187) - b .LBB59_130 -.LBB59_129: # in Loop: Header=BB59_3 Depth=1 - ld.d $a0, $s8, 16 + b .LBB59_132 +.LBB59_130: # in Loop: Header=BB59_3 Depth=1 + ld.d $a0, $s6, 16 ld.d $a3, $a0, 0 +.LBB59_131: # %displayP4.exit.i + # in Loop: Header=BB59_3 Depth=1 pcalau12i $a0, %pc_hi20(.L.str.190) addi.d $a2, $a0, %pc_lo12(.L.str.190) -.LBB59_130: # %displayP4.exit.i +.LBB59_132: # %displayP4.exit.i # in Loop: Header=BB59_3 Depth=1 ori $a0, $zero, 32 move $a1, $s5 pcaddu18i $ra, %call36(sqlite3_snprintf) jirl $ra, $ra, 0 - b .LBB59_133 -.LBB59_131: # in Loop: Header=BB59_3 Depth=1 - ld.d $a3, $s8, 16 + b .LBB59_135 +.LBB59_133: # in Loop: Header=BB59_3 Depth=1 + ld.d $a3, $s6, 16 ld.d $a4, $a3, 0 pcalau12i $a0, %pc_hi20(.L.str.191) addi.d $a2, $a0, %pc_lo12(.L.str.191) -.LBB59_132: # %displayP4.exit.i +.LBB59_134: # %displayP4.exit.i # in Loop: Header=BB59_3 Depth=1 ori $a0, $zero, 32 move $a1, $s5 pcaddu18i $ra, %call36(sqlite3_snprintf) jirl $ra, $ra, 0 -.LBB59_133: # %displayP4.exit.i +.LBB59_135: # %displayP4.exit.i # in Loop: Header=BB59_3 Depth=1 move $s3, $s5 - ori $s1, $zero, 2 -.LBB59_134: # %displayP4.exit.i +.LBB59_136: # %displayP4.exit.i # in Loop: Header=BB59_3 Depth=1 - ld.d $s4, $sp, 64 # 8-byte Folded Reload ld.d $a0, $s4, 24 - beq $s3, $a0, .LBB59_137 -# %bb.135: # in Loop: Header=BB59_3 Depth=1 - move $s5, $s8 - beqz $s3, .LBB59_139 -# %bb.136: # in Loop: Header=BB59_3 Depth=1 + beq $s3, $a0, .LBB59_139 +# %bb.137: # in Loop: Header=BB59_3 Depth=1 + move $s5, $s6 + beqz $s3, .LBB59_140 +# %bb.138: # in Loop: Header=BB59_3 Depth=1 move $a0, $s3 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - move $s1, $a0 - ld.d $s4, $sp, 64 # 8-byte Folded Reload - move $a0, $s4 + move $s8, $s4 + move $s4, $a0 + move $a0, $s8 pcaddu18i $ra, %call36(sqlite3VdbeMemRelease) jirl $ra, $ra, 0 - st.d $s3, $s4, 24 - st.d $zero, $s4, 40 - st.w $s1, $s4, 32 - ori $s1, $zero, 2 + st.d $s3, $s8, 24 + st.d $zero, $s8, 40 + st.w $s4, $s8, 32 + move $s4, $s8 ori $a0, $zero, 162 - st.h $a0, $s4, 36 - b .LBB59_138 -.LBB59_137: # in Loop: Header=BB59_3 Depth=1 - move $s5, $s8 + st.h $a0, $s8, 36 + ori $a1, $zero, 1 + st.b $a1, $s8, 39 + b .LBB59_141 +.LBB59_139: # in Loop: Header=BB59_3 Depth=1 + move $s5, $s6 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 st.w $a0, $s4, 32 -.LBB59_138: # %sqlite3VdbeMemSetStr.exit.i - # in Loop: Header=BB59_3 Depth=1 ori $a1, $zero, 1 st.b $a1, $s4, 39 - b .LBB59_140 -.LBB59_139: # in Loop: Header=BB59_3 Depth=1 - ld.d $s4, $sp, 64 # 8-byte Folded Reload + b .LBB59_141 +.LBB59_140: # in Loop: Header=BB59_3 Depth=1 ld.hu $a0, $s4, 36 srli.d $a0, $a0, 5 ori $a1, $zero, 1 bstrins.d $a1, $a0, 15, 5 st.h $a1, $s4, 36 ori $a1, $zero, 1 -.LBB59_140: # %sqlite3VdbeMemSetStr.exit.i +.LBB59_141: # %sqlite3VdbeMemSetStr.exit.i # in Loop: Header=BB59_3 Depth=1 ori $a0, $zero, 3 st.b $a0, $s4, 38 ld.bu $a0, $fp, 336 ori $s8, $zero, 1 - bne $a0, $a1, .LBB59_144 -# %bb.141: # in Loop: Header=BB59_3 Depth=1 + ld.d $s6, $sp, 80 # 8-byte Folded Reload + bne $a0, $a1, .LBB59_145 +# %bb.142: # in Loop: Header=BB59_3 Depth=1 addi.d $a0, $s4, 48 ori $a1, $zero, 4 move $a2, $zero pcaddu18i $ra, %call36(sqlite3VdbeMemGrow) jirl $ra, $ra, 0 - beqz $a0, .LBB59_143 -# %bb.142: # in Loop: Header=BB59_3 Depth=1 + beqz $a0, .LBB59_144 +# %bb.143: # in Loop: Header=BB59_3 Depth=1 ld.d $a0, $fp, 0 st.b $s8, $a0, 42 ori $a0, $zero, 7 ld.d $a1, $s2, 152 bnez $a1, .LBB59_65 b .LBB59_19 -.LBB59_143: # in Loop: Header=BB59_3 Depth=1 +.LBB59_144: # in Loop: Header=BB59_3 Depth=1 ori $a0, $zero, 98 st.h $a0, $s4, 84 - st.w $s1, $s4, 80 + ori $a0, $zero, 2 + st.w $a0, $s4, 80 ld.d $a1, $s4, 72 ld.bu $a3, $s5, 3 pcalau12i $a0, %pc_hi20(.L.str.40) @@ -6394,92 +6363,75 @@ sqlite3_step: # @sqlite3_step ori $a0, $zero, 259 st.h $a0, $s4, 86 st.h $s8, $s4, 132 - ori $a0, $zero, 5 - st.b $a0, $s4, 134 + st.b $s1, $s4, 134 ld.bu $a0, $fp, 336 -.LBB59_144: # %sqlite3VdbeList.exit.thread61 +.LBB59_145: # %sqlite3VdbeList.exit.thread61 # in Loop: Header=BB59_3 Depth=1 alsl.d $a0, $a0, $a0, 2 ori $a1, $zero, 13 sub.d $a0, $a1, $a0 st.w $a0, $fp, 308 st.w $zero, $fp, 188 - ori $s4, $zero, 100 + ori $s3, $zero, 100 ld.d $s1, $fp, 0 bnez $s1, .LBB59_25 b .LBB59_67 -.LBB59_145: # in Loop: Header=BB59_3 Depth=1 - ld.d $a0, $s8, 16 +.LBB59_146: # in Loop: Header=BB59_3 Depth=1 + ld.d $a0, $s6, 16 ld.hu $a1, $a0, 36 andi $a2, $a1, 2 - ori $s1, $zero, 2 - bnez $a2, .LBB59_149 -# %bb.146: # in Loop: Header=BB59_3 Depth=1 - andi $a2, $a1, 4 bnez $a2, .LBB59_150 # %bb.147: # in Loop: Header=BB59_3 Depth=1 - andi $a1, $a1, 8 - beqz $a1, .LBB59_152 + andi $a2, $a1, 4 + bnez $a2, .LBB59_127 # %bb.148: # in Loop: Header=BB59_3 Depth=1 + andi $a1, $a1, 8 + beqz $a1, .LBB59_135 +# %bb.149: # in Loop: Header=BB59_3 Depth=1 ld.d $a3, $a0, 8 - pcalau12i $a0, %pc_hi20(.L.str.190) - addi.d $a2, $a0, %pc_lo12(.L.str.190) - b .LBB59_151 -.LBB59_149: # in Loop: Header=BB59_3 Depth=1 - ld.d $s3, $a0, 24 - b .LBB59_134 + b .LBB59_131 .LBB59_150: # in Loop: Header=BB59_3 Depth=1 - ld.d $a3, $a0, 0 - pcalau12i $a0, %pc_hi20(.L.str.38) - addi.d $a2, $a0, %pc_lo12(.L.str.38) -.LBB59_151: # %displayP4.exit.i - # in Loop: Header=BB59_3 Depth=1 - ori $a0, $zero, 32 - move $a1, $s5 - pcaddu18i $ra, %call36(sqlite3_snprintf) - jirl $ra, $ra, 0 -.LBB59_152: # %displayP4.exit.i - # in Loop: Header=BB59_3 Depth=1 - move $s3, $s5 - b .LBB59_134 -.LBB59_153: # in Loop: Header=BB59_3 Depth=1 + ld.d $s3, $a0, 24 + b .LBB59_136 +.LBB59_151: # in Loop: Header=BB59_3 Depth=1 ld.d $a1, $sp, 32 # 8-byte Folded Reload - ld.d $a2, $sp, 24 # 8-byte Folded Reload + ld.d $a2, $sp, 64 # 8-byte Folded Reload stx.w $a1, $a2, $a0 - ori $s1, $zero, 2 - ld.d $s8, $sp, 16 # 8-byte Folded Reload -.LBB59_154: # %.loopexit.i.i + ld.d $s4, $sp, 16 # 8-byte Folded Reload + ld.d $s6, $sp, 24 # 8-byte Folded Reload +.LBB59_152: # %.loopexit.i.i # in Loop: Header=BB59_3 Depth=1 addi.w $a0, $a3, 0 ori $a1, $zero, 41 - ld.d $s3, $sp, 24 # 8-byte Folded Reload + ld.d $s3, $sp, 64 # 8-byte Folded Reload stx.h $a1, $s3, $a0 - b .LBB59_134 -.LBB59_155: + ori $s1, $zero, 5 + b .LBB59_136 +.LBB59_153: ori $a0, $zero, 21 ret -.LBB59_156: # %sqlite3LockAndPrepare.exit +.LBB59_154: # %sqlite3LockAndPrepare.exit ori $a1, $zero, 7 - bne $a0, $a1, .LBB59_158 -# %bb.157: + bne $a0, $a1, .LBB59_156 +# %bb.155: ori $a0, $zero, 1 st.b $a0, $s1, 42 -.LBB59_158: # %.critedge +.LBB59_156: # %.critedge ld.d $a0, $fp, 472 - beqz $a0, .LBB59_164 -# %bb.159: + beqz $a0, .LBB59_162 +# %bb.157: ld.d $s0, $sp, 104 # 8-byte Folded Reload ld.d $a0, $s0, 240 - ori $s4, $zero, 17 - beqz $a0, .LBB59_167 -# %bb.160: + ori $s3, $zero, 17 + beqz $a0, .LBB59_165 +# %bb.158: ori $a1, $zero, 1 pcaddu18i $ra, %call36(sqlite3ValueText) jirl $ra, $ra, 0 ld.d $a1, $s6, 0 move $s1, $a0 - beqz $a1, .LBB59_162 -# %bb.161: + beqz $a1, .LBB59_160 +# %bb.159: ld.w $a2, $a1, -8 pcalau12i $a3, %pc_hi20(mem.5) ld.d $a4, $a3, %pc_lo12(mem.5) @@ -6488,31 +6440,31 @@ sqlite3_step: # @sqlite3_step st.d $a1, $a3, %pc_lo12(mem.5) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 -.LBB59_162: # %sqlite3_free.exit +.LBB59_160: # %sqlite3_free.exit ld.bu $a0, $s0, 42 - beqz $a0, .LBB59_175 -# %bb.163: + beqz $a0, .LBB59_173 +# %bb.161: st.d $zero, $fp, 320 ori $a0, $zero, 7 st.w $a0, $fp, 188 - b .LBB59_167 -.LBB59_164: - ori $s4, $zero, 17 + b .LBB59_165 +.LBB59_162: + ori $s3, $zero, 17 ld.d $s0, $sp, 104 # 8-byte Folded Reload - bnez $s0, .LBB59_167 - b .LBB59_173 -.LBB59_165: - ori $s4, $zero, 4 -.LBB59_166: # %.critedge28 + bnez $s0, .LBB59_165 + b .LBB59_171 +.LBB59_163: + ori $s3, $zero, 4 +.LBB59_164: # %.critedge28 ld.d $s0, $sp, 104 # 8-byte Folded Reload - beqz $s0, .LBB59_173 -.LBB59_167: # %.critedge28.thread + beqz $s0, .LBB59_171 +.LBB59_165: # %.critedge28.thread ld.bu $a0, $s0, 42 - beqz $a0, .LBB59_171 -# %bb.168: + beqz $a0, .LBB59_169 +# %bb.166: ld.d $a0, $s0, 240 - beqz $a0, .LBB59_170 -# %bb.169: + beqz $a0, .LBB59_168 +# %bb.167: ori $a1, $zero, 7 st.w $a1, $s0, 32 ld.hu $a1, $a0, 36 @@ -6522,20 +6474,21 @@ sqlite3_step: # @sqlite3_step st.h $a2, $a0, 36 ori $a1, $zero, 5 st.b $a1, $a0, 38 -.LBB59_170: # %sqlite3Error.exit.i +.LBB59_168: # %sqlite3Error.exit.i st.b $zero, $s0, 42 - ori $s4, $zero, 7 -.LBB59_171: + ori $s3, $zero, 7 +.LBB59_169: ld.w $a0, $s0, 36 - b .LBB59_174 -.LBB59_172: - ori $s4, $zero, 21 + b .LBB59_172 +.LBB59_170: + ori $s3, $zero, 21 ld.d $s0, $sp, 104 # 8-byte Folded Reload - bnez $s0, .LBB59_167 -.LBB59_173: + bnez $s0, .LBB59_165 +.LBB59_171: ori $a0, $zero, 255 -.LBB59_174: # %sqlite3ApiExit.exit - and $a0, $a0, $s4 +.LBB59_172: # %sqlite3ApiExit.exit + and $a0, $a0, $s3 + fld.d $fs2, $sp, 624 # 8-byte Folded Reload fld.d $fs1, $sp, 632 # 8-byte Folded Reload fld.d $fs0, $sp, 640 # 8-byte Folded Reload ld.d $s8, $sp, 648 # 8-byte Folded Reload @@ -6551,9 +6504,9 @@ sqlite3_step: # @sqlite3_step ld.d $ra, $sp, 728 # 8-byte Folded Reload addi.d $sp, $sp, 736 ret -.LBB59_175: - beqz $s1, .LBB59_178 -# %bb.176: +.LBB59_173: + beqz $s1, .LBB59_176 +# %bb.174: move $a0, $s1 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 @@ -6561,23 +6514,23 @@ sqlite3_step: # @sqlite3_step move $a0, $s2 pcaddu18i $ra, %call36(sqlite3_malloc) jirl $ra, $ra, 0 - beqz $a0, .LBB59_179 -# %bb.177: + beqz $a0, .LBB59_177 +# %bb.175: move $fp, $a0 move $a1, $s1 move $a2, $s2 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 st.d $fp, $s6, 0 - b .LBB59_167 -.LBB59_178: + b .LBB59_165 +.LBB59_176: st.d $zero, $s6, 0 - b .LBB59_167 -.LBB59_179: # %sqlite3StrDup.exit.i + b .LBB59_165 +.LBB59_177: # %sqlite3StrDup.exit.i ori $a0, $zero, 1 st.b $a0, $s0, 42 st.d $zero, $s6, 0 - b .LBB59_167 + b .LBB59_165 .Lfunc_end59: .size sqlite3_step, .Lfunc_end59-sqlite3_step .section .rodata,"a",@progbits @@ -6585,15 +6538,15 @@ sqlite3_step: # @sqlite3_step .LJTI59_0: .word .LBB59_62-.LJTI59_0 .word .LBB59_126-.LJTI59_0 - .word .LBB59_129-.LJTI59_0 + .word .LBB59_130-.LJTI59_0 .word .LBB59_112-.LJTI59_0 - .word .LBB59_131-.LJTI59_0 + .word .LBB59_133-.LJTI59_0 .word .LBB59_112-.LJTI59_0 - .word .LBB59_145-.LJTI59_0 + .word .LBB59_146-.LJTI59_0 .word .LBB59_112-.LJTI59_0 .word .LBB59_114-.LJTI59_0 - .word .LBB59_127-.LJTI59_0 .word .LBB59_128-.LJTI59_0 + .word .LBB59_129-.LJTI59_0 # -- End function .text .p2align 5 # -- Begin function sqlite3DbStrDup @@ -19436,16 +19389,7 @@ unixSleep: # @unixSleep .Lfunc_end187: .size unixSleep, .Lfunc_end187-unixSleep # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function unixCurrentTime -.LCPI188_0: - .dword 0x40f5180000000000 # double 86400 -.LCPI188_1: - .dword 0x41429ec5c0000000 # double 2440587.5 -.LCPI188_2: - .dword 0x42341dd760000000 # double 8.64E+10 - .text - .p2align 5 + .p2align 5 # -- Begin function unixCurrentTime .type unixCurrentTime,@function unixCurrentTime: # @unixCurrentTime # %bb.0: @@ -19458,19 +19402,25 @@ unixCurrentTime: # @unixCurrentTime pcaddu18i $ra, %call36(gettimeofday) jirl $ra, $ra, 0 ld.d $a0, $sp, 0 - pcalau12i $a1, %pc_hi20(.LCPI188_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI188_0) + movgr2fr.d $fa0, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, 333824 + lu52i.d $a0, $a0, 1039 movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI188_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI188_1) + lu12i.w $a0, -262144 + lu32i.d $a0, 171717 + lu52i.d $a0, $a0, 1044 + movgr2fr.d $fa2, $a0 ld.d $a0, $sp, 8 - pcalau12i $a1, %pc_hi20(.LCPI188_2) - fld.d $fa2, $a1, %pc_lo12(.LCPI188_2) - fadd.d $fa0, $fa0, $fa1 + ffint.d.l $fa0, $fa0 + fdiv.d $fa0, $fa0, $fa1 + fadd.d $fa0, $fa0, $fa2 movgr2fr.d $fa1, $a0 ffint.d.l $fa1, $fa1 + lu12i.w $a0, 393216 + lu32i.d $a0, 269783 + lu52i.d $a0, $a0, 1059 + movgr2fr.d $fa2, $a0 fdiv.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa0, $fa1 fst.d $fa0, $fp, 0 @@ -35697,13 +35647,9 @@ sqlite3VdbeMemStringify: # @sqlite3VdbeMemStringify .Lfunc_end293: .size sqlite3VdbeMemStringify, .Lfunc_end293-sqlite3VdbeMemStringify # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function sqlite3VdbeExec -.LCPI294_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI294_1: + .p2align 4, 0x0 # -- Begin function sqlite3VdbeExec +.LCPI294_0: .byte 15 # 0xf .byte 14 # 0xe .byte 13 # 0xd @@ -36482,10 +36428,10 @@ sqlite3VdbeExec: # @sqlite3VdbeExec beqz $a1, .LBB294_109 # %bb.107: # in Loop: Header=BB294_11 Depth=1 fld.d $fa0, $s3, 8 - pcalau12i $a1, %pc_hi20(.LCPI294_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI294_0) - fabs.d $fa2, $fa0 - fcmp.clt.d $fcc0, $fa1, $fa2 + fabs.d $fa1, $fa0 + lu52i.d $a1, $zero, 1086 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 ftintrz.l.d $fa1, $fa0 movfr2gr.d $a1, $fa1 movcf2gr $a2, $fcc0 @@ -39050,10 +38996,10 @@ sqlite3VdbeExec: # @sqlite3VdbeExec beqz $a1, .LBB294_476 # %bb.473: # in Loop: Header=BB294_465 Depth=2 fld.d $fa0, $s2, 8 - pcalau12i $a1, %pc_hi20(.LCPI294_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI294_0) - fabs.d $fa2, $fa0 - fcmp.clt.d $fcc0, $fa1, $fa2 + fabs.d $fa1, $fa0 + lu52i.d $a1, $zero, 1086 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 ftintrz.l.d $fa1, $fa0 movfr2gr.d $a1, $fa1 movcf2gr $a2, $fcc0 @@ -39534,8 +39480,8 @@ sqlite3VdbeExec: # @sqlite3VdbeExec # Parent Loop BB294_536 Depth=2 # => This Inner Loop Header: Depth=3 vld $vr0, $t0, 0 - pcalau12i $t3, %pc_hi20(.LCPI294_1) - vld $vr1, $t3, %pc_lo12(.LCPI294_1) + pcalau12i $t3, %pc_hi20(.LCPI294_0) + vld $vr1, $t3, %pc_lo12(.LCPI294_0) vshuf.b $vr0, $vr0, $vr0, $vr1 vst $vr0, $t2, 0 addi.d $t1, $t1, -16 @@ -39983,10 +39929,10 @@ sqlite3VdbeExec: # @sqlite3VdbeExec beqz $a1, .LBB294_116 # %bb.626: # in Loop: Header=BB294_11 Depth=1 fld.d $fa0, $s2, 8 - pcalau12i $a1, %pc_hi20(.LCPI294_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI294_0) - fabs.d $fa2, $fa0 - fcmp.clt.d $fcc0, $fa1, $fa2 + fabs.d $fa1, $fa0 + lu52i.d $a1, $zero, 1086 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 ftintrz.l.d $fa1, $fa0 movfr2gr.d $a1, $fa1 movcf2gr $a2, $fcc0 @@ -40351,10 +40297,10 @@ sqlite3VdbeExec: # @sqlite3VdbeExec st.h $a2, $s5, 36 bnez $a1, .LBB294_9 # %bb.690: # in Loop: Header=BB294_11 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI294_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI294_0) - fabs.d $fa2, $fa0 - fcmp.clt.d $fcc0, $fa1, $fa2 + fabs.d $fa1, $fa0 + lu52i.d $a1, $zero, 1086 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 ftintrz.l.d $fa1, $fa0 movfr2gr.d $a1, $fa1 movcf2gr $a2, $fcc0 @@ -41422,12 +41368,8 @@ sqlite3VdbeExec: # @sqlite3VdbeExec .word .LBB294_684-.LJTI294_3 .word .LBB294_685-.LJTI294_3 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function applyAffinity -.LCPI295_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 .text - .p2align 5 + .p2align 5 # -- Begin function applyAffinity .type applyAffinity,@function applyAffinity: # @applyAffinity # %bb.0: @@ -41467,10 +41409,10 @@ applyAffinity: # @applyAffinity beqz $a1, .LBB295_10 # %bb.7: fld.d $fa0, $fp, 8 - pcalau12i $a1, %pc_hi20(.LCPI295_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI295_0) - fabs.d $fa2, $fa0 - fcmp.clt.d $fcc0, $fa1, $fa2 + fabs.d $fa1, $fa0 + lu52i.d $a1, $zero, 1086 + movgr2fr.d $fa2, $a1 + fcmp.clt.d $fcc0, $fa2, $fa1 ftintrz.l.d $fa1, $fa0 movfr2gr.d $a1, $fa1 movcf2gr $a2, $fcc0 @@ -41496,14 +41438,7 @@ applyAffinity: # @applyAffinity .Lfunc_end295: .size applyAffinity, .Lfunc_end295-applyAffinity # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function sqlite3VdbeMemNumerify -.LCPI296_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI296_1: - .dword 0xc3e0000000000000 # double -9.2233720368547758E+18 - .text - .p2align 5 + .p2align 5 # -- Begin function sqlite3VdbeMemNumerify .type sqlite3VdbeMemNumerify,@function sqlite3VdbeMemNumerify: # @sqlite3VdbeMemNumerify # %bb.0: @@ -41516,14 +41451,14 @@ sqlite3VdbeMemNumerify: # @sqlite3VdbeMemNumerify fabs.d $fa1, $fa0 ftintrz.l.d $fa2, $fa0 movfr2gr.d $a0, $fa2 - pcalau12i $a1, %pc_hi20(.LCPI296_0) - fld.d $fa2, $a1, %pc_lo12(.LCPI296_0) - pcalau12i $a1, %pc_hi20(.LCPI296_1) - fld.d $fa3, $a1, %pc_lo12(.LCPI296_1) - movgr2fr.d $fa4, $a0 - ffint.d.l $fa4, $fa4 - fcmp.clt.d $fcc0, $fa2, $fa1 - fsel $fa1, $fa4, $fa3, $fcc0 + movgr2fr.d $fa2, $a0 + ffint.d.l $fa2, $fa2 + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fa3, $a0 + fcmp.clt.d $fcc0, $fa3, $fa1 + lu52i.d $a0, $zero, -962 + movgr2fr.d $fa1, $a0 + fsel $fa1, $fa2, $fa1, $fcc0 fcmp.cune.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB296_2 # %bb.1: @@ -101742,12 +101677,7 @@ sqlite3IndexAffinityStr: # @sqlite3IndexAffinityStr .Lfunc_end481: .size sqlite3IndexAffinityStr, .Lfunc_end481-sqlite3IndexAffinityStr # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function sqlite3ValueFromExpr -.LCPI482_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 - .text - .p2align 5 + .p2align 5 # -- Begin function sqlite3ValueFromExpr .type sqlite3ValueFromExpr,@function sqlite3ValueFromExpr: # @sqlite3ValueFromExpr # %bb.0: @@ -102011,10 +101941,10 @@ sqlite3ValueFromExpr: # @sqlite3ValueFromExpr beqz $a0, .LBB482_7 # %bb.49: fld.d $fa0, $s0, 8 - pcalau12i $a0, %pc_hi20(.LCPI482_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI482_0) - fabs.d $fa2, $fa0 - fcmp.clt.d $fcc0, $fa1, $fa2 + fabs.d $fa1, $fa0 + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fa2, $a0 + fcmp.clt.d $fcc0, $fa2, $fa1 ftintrz.l.d $fa1, $fa0 movfr2gr.d $a0, $fa1 movcf2gr $a2, $fcc0 @@ -106347,38 +106277,28 @@ computeLimitRegisters: # @computeLimitRegisters .Lfunc_end492: .size computeLimitRegisters, .Lfunc_end492-computeLimitRegisters # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function sqlite3WhereBegin -.LCPI493_0: - .dword 0x547d42aea2879f2e # double 9.9999999999999997E+98 -.LCPI493_1: - .dword 0x546d42aea2879f2e # double 4.9999999999999998E+98 -.LCPI493_2: - .dword 0x4069000000000000 # double 200 -.LCPI493_3: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .p2align 5 + .p2align 5 # -- Begin function sqlite3WhereBegin .type sqlite3WhereBegin,@function sqlite3WhereBegin: # @sqlite3WhereBegin # %bb.0: - addi.d $sp, $sp, -1376 - st.d $ra, $sp, 1368 # 8-byte Folded Spill - st.d $fp, $sp, 1360 # 8-byte Folded Spill - st.d $s0, $sp, 1352 # 8-byte Folded Spill - st.d $s1, $sp, 1344 # 8-byte Folded Spill - st.d $s2, $sp, 1336 # 8-byte Folded Spill - st.d $s3, $sp, 1328 # 8-byte Folded Spill - st.d $s4, $sp, 1320 # 8-byte Folded Spill - st.d $s5, $sp, 1312 # 8-byte Folded Spill - st.d $s6, $sp, 1304 # 8-byte Folded Spill - st.d $s7, $sp, 1296 # 8-byte Folded Spill - st.d $s8, $sp, 1288 # 8-byte Folded Spill - fst.d $fs0, $sp, 1280 # 8-byte Folded Spill - fst.d $fs1, $sp, 1272 # 8-byte Folded Spill - fst.d $fs2, $sp, 1264 # 8-byte Folded Spill - fst.d $fs3, $sp, 1256 # 8-byte Folded Spill - fst.d $fs4, $sp, 1248 # 8-byte Folded Spill + addi.d $sp, $sp, -1392 + st.d $ra, $sp, 1384 # 8-byte Folded Spill + st.d $fp, $sp, 1376 # 8-byte Folded Spill + st.d $s0, $sp, 1368 # 8-byte Folded Spill + st.d $s1, $sp, 1360 # 8-byte Folded Spill + st.d $s2, $sp, 1352 # 8-byte Folded Spill + st.d $s3, $sp, 1344 # 8-byte Folded Spill + st.d $s4, $sp, 1336 # 8-byte Folded Spill + st.d $s5, $sp, 1328 # 8-byte Folded Spill + st.d $s6, $sp, 1320 # 8-byte Folded Spill + st.d $s7, $sp, 1312 # 8-byte Folded Spill + st.d $s8, $sp, 1304 # 8-byte Folded Spill + fst.d $fs0, $sp, 1296 # 8-byte Folded Spill + fst.d $fs1, $sp, 1288 # 8-byte Folded Spill + fst.d $fs2, $sp, 1280 # 8-byte Folded Spill + fst.d $fs3, $sp, 1272 # 8-byte Folded Spill + fst.d $fs4, $sp, 1264 # 8-byte Folded Spill + fst.d $fs5, $sp, 1256 # 8-byte Folded Spill st.d $a1, $sp, 240 # 8-byte Folded Spill ld.hu $a1, $a1, 0 ori $a5, $zero, 65 @@ -106394,7 +106314,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin b .LBB493_49 .LBB493_2: move $s2, $a2 - st.d $a3, $sp, 144 # 8-byte Folded Spill + st.d $a3, $sp, 136 # 8-byte Folded Spill st.d $a4, $sp, 40 # 8-byte Folded Spill beqz $a3, .LBB493_4 # %bb.3: @@ -106406,20 +106326,20 @@ sqlite3WhereBegin: # @sqlite3WhereBegin .LBB493_5: ld.d $a0, $s8, 24 st.d $a0, $sp, 272 # 8-byte Folded Spill - addi.d $a0, $sp, 984 + addi.d $a0, $sp, 992 ori $a2, $zero, 260 - addi.d $fp, $sp, 984 + addi.d $fp, $sp, 992 move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - st.d $s8, $sp, 472 - st.d $fp, $sp, 480 + st.d $s8, $sp, 480 + st.d $fp, $sp, 488 ori $a0, $zero, 0 lu32i.d $a0, 10 - st.d $a0, $sp, 488 - addi.d $a0, $sp, 504 st.d $a0, $sp, 496 - addi.d $a0, $sp, 472 + addi.d $a0, $sp, 512 + st.d $a0, $sp, 504 + addi.d $a0, $sp, 480 ori $a2, $zero, 61 move $a1, $s2 pcaddu18i $ra, %call36(whereSplit) @@ -106440,12 +106360,12 @@ sqlite3WhereBegin: # @sqlite3WhereBegin jirl $ra, $ra, 0 bnez $a0, .LBB493_10 # %bb.8: - st.d $zero, $sp, 136 # 8-byte Folded Spill + st.d $zero, $sp, 128 # 8-byte Folded Spill ori $a0, $zero, 1 st.b $a0, $s3, 42 ld.bu $a0, $s3, 42 pcalau12i $a1, %pc_hi20(mem.5) - st.d $a1, $sp, 96 # 8-byte Folded Spill + st.d $a1, $sp, 88 # 8-byte Folded Spill bnez $a0, .LBB493_33 b .LBB493_12 .LBB493_9: # %sqlite3DbMallocRaw.exit.i @@ -106455,25 +106375,25 @@ sqlite3WhereBegin: # @sqlite3WhereBegin beqz $a0, .LBB493_11 .LBB493_10: # %sqlite3DbMallocRaw.exit.thread8.i bstrpick.d $a2, $fp, 31, 0 - st.d $a0, $sp, 136 # 8-byte Folded Spill + st.d $a0, $sp, 128 # 8-byte Folded Spill move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ld.bu $a0, $s3, 42 pcalau12i $a1, %pc_hi20(mem.5) - st.d $a1, $sp, 96 # 8-byte Folded Spill + st.d $a1, $sp, 88 # 8-byte Folded Spill bnez $a0, .LBB493_33 b .LBB493_12 .LBB493_11: - st.d $zero, $sp, 136 # 8-byte Folded Spill + st.d $zero, $sp, 128 # 8-byte Folded Spill ld.bu $a0, $s3, 42 pcalau12i $a1, %pc_hi20(mem.5) - st.d $a1, $sp, 96 # 8-byte Folded Spill + st.d $a1, $sp, 88 # 8-byte Folded Spill bnez $a0, .LBB493_33 .LBB493_12: ld.d $a2, $sp, 240 # 8-byte Folded Reload ld.h $a0, $a2, 0 - ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload st.w $a0, $a1, 28 ld.d $a3, $sp, 272 # 8-byte Folded Reload ld.w $s0, $a3, 40 @@ -106501,7 +106421,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin beqz $fp, .LBB493_17 .LBB493_16: ld.w $a1, $fp, -8 - ld.d $a3, $sp, 96 # 8-byte Folded Reload + ld.d $a3, $sp, 88 # 8-byte Folded Reload ld.d $a2, $a3, %pc_lo12(mem.5) addi.d $a0, $fp, -8 sub.d $a1, $a2, $a1 @@ -106530,7 +106450,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin stx.w $a2, $a0, $a1 .LBB493_21: # %sqlite3VdbeMakeLabel.exit nor $a0, $s0, $zero - ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload st.w $a0, $a1, 24 beqz $s2, .LBB493_26 # %bb.22: @@ -106539,17 +106459,17 @@ sqlite3WhereBegin: # @sqlite3WhereBegin beqz $a1, .LBB493_25 # %bb.23: ori $a0, $zero, 3 - st.w $a0, $sp, 1244 + st.w $a0, $sp, 1252 pcalau12i $a0, %pc_hi20(exprNodeIsConstant) addi.d $a1, $a0, %pc_lo12(exprNodeIsConstant) - addi.d $a2, $sp, 1244 + addi.d $a2, $sp, 1252 move $a0, $s2 pcaddu18i $ra, %call36(walkExprTree) jirl $ra, $ra, 0 - ld.w $a0, $sp, 1244 + ld.w $a0, $sp, 1252 beqz $a0, .LBB493_26 # %bb.24: # %._crit_edge1883 - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.w $a0, $a0, 24 .LBB493_25: addi.w $a2, $a0, 0 @@ -106563,22 +106483,22 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.h $a0, $a0, 0 blez $a0, .LBB493_29 # %bb.27: # %.lr.ph - addi.d $a1, $sp, 988 + addi.d $a1, $sp, 996 ld.d $a2, $sp, 240 # 8-byte Folded Reload addi.d $a2, $a2, 52 .p2align 4, , 16 .LBB493_28: # =>This Inner Loop Header: Depth=1 - ld.w $a3, $sp, 984 + ld.w $a3, $sp, 992 ld.w $a4, $a2, 0 addi.d $a5, $a3, 1 - st.w $a5, $sp, 984 + st.w $a5, $sp, 992 slli.d $a3, $a3, 2 stx.w $a4, $a1, $a3 addi.d $a0, $a0, -1 addi.d $a2, $a2, 72 bnez $a0, .LBB493_28 .LBB493_29: # %._crit_edge - ld.w $a0, $sp, 488 + ld.w $a0, $sp, 496 blez $a0, .LBB493_32 # %bb.30: # %.lr.ph.i.preheader addi.d $fp, $a0, 1 @@ -106587,7 +106507,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin .LBB493_31: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 addi.w $a1, $fp, -2 - addi.d $a0, $sp, 472 + addi.d $a0, $sp, 480 pcaddu18i $ra, %call36(exprAnalyze) jirl $ra, $ra, 0 addi.w $fp, $fp, -1 @@ -106596,8 +106516,8 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.bu $a0, $s3, 42 beqz $a0, .LBB493_51 .LBB493_33: - ld.w $a1, $sp, 488 - ld.d $a0, $sp, 496 + ld.w $a1, $sp, 496 + ld.d $a0, $sp, 504 blez $a1, .LBB493_39 # %bb.34: # %.lr.ph.i1396.preheader addi.d $fp, $a1, 1 @@ -106620,15 +106540,15 @@ sqlite3WhereBegin: # @sqlite3WhereBegin jirl $ra, $ra, 0 b .LBB493_35 .LBB493_38: # %._crit_edge.loopexit.i1401 - ld.d $a0, $sp, 496 + ld.d $a0, $sp, 504 .LBB493_39: # %._crit_edge.i1391 - addi.d $a1, $sp, 504 + addi.d $a1, $sp, 512 beq $a0, $a1, .LBB493_42 # %bb.40: # %._crit_edge.i1391 beqz $a0, .LBB493_42 # %bb.41: ld.w $a1, $a0, -8 - ld.d $a3, $sp, 96 # 8-byte Folded Reload + ld.d $a3, $sp, 88 # 8-byte Folded Reload ld.d $a2, $a3, %pc_lo12(mem.5) addi.d $a0, $a0, -8 sub.d $a1, $a2, $a1 @@ -106636,11 +106556,11 @@ sqlite3WhereBegin: # @sqlite3WhereBegin pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 .LBB493_42: # %whereClauseClear.exit1403 - ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload beqz $a1, .LBB493_49 # %bb.43: # %.preheader.i1405 ld.w $a0, $a1, 28 - ld.d $a2, $sp, 96 # 8-byte Folded Reload + ld.d $a2, $sp, 88 # 8-byte Folded Reload ld.d $fp, $a2, %pc_lo12(mem.5) blez $a0, .LBB493_48 # %bb.44: # %.lr.ph.i1406.preheader @@ -106663,45 +106583,46 @@ sqlite3WhereBegin: # @sqlite3WhereBegin sub.d $fp, $fp, $a2 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.w $a0, $a0, 28 b .LBB493_45 .LBB493_48: # %sqlite3_free.exit12.i - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.w $a1, $a0, -8 addi.d $a0, $a0, -8 sub.d $a1, $fp, $a1 - ld.d $a2, $sp, 96 # 8-byte Folded Reload + ld.d $a2, $sp, 88 # 8-byte Folded Reload st.d $a1, $a2, %pc_lo12(mem.5) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 .LBB493_49: # %whereClauseClear.exit move $a0, $zero .LBB493_50: # %whereClauseClear.exit - fld.d $fs4, $sp, 1248 # 8-byte Folded Reload - fld.d $fs3, $sp, 1256 # 8-byte Folded Reload - fld.d $fs2, $sp, 1264 # 8-byte Folded Reload - fld.d $fs1, $sp, 1272 # 8-byte Folded Reload - fld.d $fs0, $sp, 1280 # 8-byte Folded Reload - ld.d $s8, $sp, 1288 # 8-byte Folded Reload - ld.d $s7, $sp, 1296 # 8-byte Folded Reload - ld.d $s6, $sp, 1304 # 8-byte Folded Reload - ld.d $s5, $sp, 1312 # 8-byte Folded Reload - ld.d $s4, $sp, 1320 # 8-byte Folded Reload - ld.d $s3, $sp, 1328 # 8-byte Folded Reload - ld.d $s2, $sp, 1336 # 8-byte Folded Reload - ld.d $s1, $sp, 1344 # 8-byte Folded Reload - ld.d $s0, $sp, 1352 # 8-byte Folded Reload - ld.d $fp, $sp, 1360 # 8-byte Folded Reload - ld.d $ra, $sp, 1368 # 8-byte Folded Reload - addi.d $sp, $sp, 1376 + fld.d $fs5, $sp, 1256 # 8-byte Folded Reload + fld.d $fs4, $sp, 1264 # 8-byte Folded Reload + fld.d $fs3, $sp, 1272 # 8-byte Folded Reload + fld.d $fs2, $sp, 1280 # 8-byte Folded Reload + fld.d $fs1, $sp, 1288 # 8-byte Folded Reload + fld.d $fs0, $sp, 1296 # 8-byte Folded Reload + ld.d $s8, $sp, 1304 # 8-byte Folded Reload + ld.d $s7, $sp, 1312 # 8-byte Folded Reload + ld.d $s6, $sp, 1320 # 8-byte Folded Reload + ld.d $s5, $sp, 1328 # 8-byte Folded Reload + ld.d $s4, $sp, 1336 # 8-byte Folded Reload + ld.d $s3, $sp, 1344 # 8-byte Folded Reload + ld.d $s2, $sp, 1352 # 8-byte Folded Reload + ld.d $s1, $sp, 1360 # 8-byte Folded Reload + ld.d $s0, $sp, 1368 # 8-byte Folded Reload + ld.d $fp, $sp, 1376 # 8-byte Folded Reload + ld.d $ra, $sp, 1384 # 8-byte Folded Reload + addi.d $sp, $sp, 1392 ret .LBB493_51: st.d $s3, $sp, 72 # 8-byte Folded Spill ld.d $a1, $sp, 240 # 8-byte Folded Reload addi.d $a0, $a1, 8 - st.d $a0, $sp, 128 # 8-byte Folded Spill - ld.d $a0, $sp, 136 # 8-byte Folded Reload + st.d $a0, $sp, 120 # 8-byte Folded Spill + ld.d $a0, $sp, 128 # 8-byte Folded Reload addi.d $s3, $a0, 40 ld.h $a0, $a1, 0 lu12i.w $a1, 15 @@ -106714,16 +106635,25 @@ sqlite3WhereBegin: # @sqlite3WhereBegin st.d $zero, $sp, 152 # 8-byte Folded Spill move $a6, $zero addi.w $s7, $zero, -1 - ld.d $a2, $sp, 144 # 8-byte Folded Reload + ld.d $a2, $sp, 136 # 8-byte Folded Reload sltu $a2, $zero, $a2 st.d $a2, $sp, 48 # 8-byte Folded Spill - pcalau12i $a2, %pc_hi20(.LCPI493_0) - st.d $a2, $sp, 56 # 8-byte Folded Spill - fld.d $fs0, $a2, %pc_lo12(.LCPI493_0) + move $a2, $a1 + lu12i.w $a1, -382855 + ori $a1, $a1, 3886 + lu32i.d $a1, -179538 + st.d $a1, $sp, 144 # 8-byte Folded Spill + lu52i.d $a1, $a1, 1351 + st.d $a1, $sp, 56 # 8-byte Folded Spill + movgr2fr.d $fs0, $a1 ori $s5, $zero, 1 movgr2fr.d $fs1, $zero - ori $a1, $a1, 4095 + ori $a1, $a2, 4095 st.d $a1, $sp, 296 # 8-byte Folded Spill + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 lu12i.w $a2, 407238 lu12i.w $a1, 403142 ori $a1, $a1, 3938 @@ -106754,11 +106684,6 @@ sqlite3WhereBegin: # @sqlite3WhereBegin lu12i.w $a1, 16 ori $a1, $a1, 512 st.d $a1, $sp, 24 # 8-byte Folded Spill - lu12i.w $a1, -382855 - ori $a1, $a1, 3886 - lu32i.d $a1, -179538 - lu52i.d $a1, $a1, 1350 - st.d $a1, $sp, 80 # 8-byte Folded Spill ori $a1, $a2, 3937 st.d $a1, $sp, 352 # 8-byte Folded Spill ori $t3, $zero, 48 @@ -106839,21 +106764,21 @@ sqlite3WhereBegin: # @sqlite3WhereBegin # in Loop: Header=BB493_55 Depth=1 st.d $a7, $sp, 64 # 8-byte Folded Spill move $a5, $zero - st.d $zero, $sp, 120 # 8-byte Folded Spill + st.d $zero, $sp, 112 # 8-byte Folded Spill move $t1, $zero st.d $zero, $sp, 160 # 8-byte Folded Spill move $a7, $zero slli.d $a1, $a6, 6 alsl.d $a1, $a6, $a1, 3 - ld.d $a2, $sp, 128 # 8-byte Folded Reload + ld.d $a2, $sp, 120 # 8-byte Folded Reload add.d $t0, $a2, $a1 ld.d $a1, $sp, 152 # 8-byte Folded Reload sltui $a1, $a1, 1 ld.d $a2, $sp, 48 # 8-byte Folded Reload and $a1, $a2, $a1 - st.d $a1, $sp, 88 # 8-byte Folded Spill + st.d $a1, $sp, 80 # 8-byte Folded Spill st.d $a6, $sp, 176 # 8-byte Folded Spill - fmov.d $fs2, $fs0 + fmov.d $fs3, $fs0 st.d $s7, $sp, 304 # 8-byte Folded Spill b .LBB493_58 .p2align 4, , 16 @@ -106923,13 +106848,13 @@ sqlite3WhereBegin: # @sqlite3WhereBegin # %bb.59: # in Loop: Header=BB493_58 Depth=2 bnez $t2, .LBB493_449 .LBB493_60: # in Loop: Header=BB493_58 Depth=2 - ld.w $a1, $sp, 984 + ld.w $a1, $sp, 992 ld.w $s8, $t0, 44 blez $a1, .LBB493_64 # %bb.61: # %.lr.ph.i809 # in Loop: Header=BB493_58 Depth=2 move $a2, $zero - addi.d $a3, $sp, 988 + addi.d $a3, $sp, 996 .p2align 4, , 16 .LBB493_62: # Parent Loop BB493_55 Depth=1 # Parent Loop BB493_58 Depth=2 @@ -106961,21 +106886,21 @@ sqlite3WhereBegin: # @sqlite3WhereBegin st.d $t2, $sp, 200 # 8-byte Folded Spill beqz $a0, .LBB493_69 # %bb.67: # in Loop: Header=BB493_58 Depth=2 - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload beqz $a0, .LBB493_71 # %bb.68: # in Loop: Header=BB493_58 Depth=2 ld.d $s4, $a0, 0 b .LBB493_72 .p2align 4, , 16 .LBB493_69: # in Loop: Header=BB493_58 Depth=2 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload beqz $a0, .LBB493_83 # %bb.70: # in Loop: Header=BB493_58 Depth=2 - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload ld.d $a0, $a0, 0 - st.d $a0, $sp, 464 # 8-byte Folded Spill + st.d $a0, $sp, 472 # 8-byte Folded Spill ld.d $s2, $s3, 32 - ld.w $s3, $sp, 488 + ld.w $s3, $sp, 496 bnez $s2, .LBB493_84 b .LBB493_141 .LBB493_71: # in Loop: Header=BB493_58 Depth=2 @@ -106996,18 +106921,18 @@ sqlite3WhereBegin: # @sqlite3WhereBegin .LBB493_74: # %.lr.ph192.i # in Loop: Header=BB493_58 Depth=2 ld.d $a3, $s2, 8 - ld.d $a0, $sp, 496 + ld.d $a0, $sp, 504 bne $s0, $s5, .LBB493_121 # %bb.75: # in Loop: Header=BB493_58 Depth=2 move $a1, $zero move $a2, $a3 b .LBB493_124 .LBB493_76: # in Loop: Header=BB493_58 Depth=2 - ld.w $a0, $sp, 488 + ld.w $a0, $sp, 496 blez $a0, .LBB493_158 # %bb.77: # %.lr.ph.i819 # in Loop: Header=BB493_58 Depth=2 - ld.d $a1, $sp, 496 + ld.d $a1, $sp, 504 move $s0, $zero addi.d $a1, $a1, 14 b .LBB493_79 @@ -107031,18 +106956,18 @@ sqlite3WhereBegin: # @sqlite3WhereBegin addi.w $s0, $s0, 1 b .LBB493_78 .LBB493_83: # in Loop: Header=BB493_58 Depth=2 - st.d $zero, $sp, 464 # 8-byte Folded Spill + st.d $zero, $sp, 472 # 8-byte Folded Spill ld.d $s2, $s3, 32 - ld.w $s3, $sp, 488 + ld.w $s3, $sp, 496 beqz $s2, .LBB493_141 .LBB493_84: # %findTerm.exit.i # in Loop: Header=BB493_58 Depth=2 ld.d $a0, $sp, 56 # 8-byte Folded Reload - fld.d $fs3, $a0, %pc_lo12(.LCPI493_0) - beqz $s3, .LBB493_98 + movgr2fr.d $fs4, $a0 + beqz $s3, .LBB493_97 # %bb.85: # %.lr.ph69.i231.i # in Loop: Header=BB493_58 Depth=2 - ld.d $a0, $sp, 496 + ld.d $a0, $sp, 504 move $a1, $s3 b .LBB493_87 .p2align 4, , 16 @@ -107050,7 +106975,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin # in Loop: Header=BB493_87 Depth=3 addi.w $a1, $a1, -1 addi.d $a0, $a0, 48 - beqz $a1, .LBB493_98 + beqz $a1, .LBB493_97 .LBB493_87: # %.lr.ph69.split.us.i234.i # Parent Loop BB493_55 Depth=1 # Parent Loop BB493_58 Depth=2 @@ -107099,25 +107024,24 @@ sqlite3WhereBegin: # @sqlite3WhereBegin bcnez $fcc0, .LBB493_95 .LBB493_96: # %estLog.exit.i # in Loop: Header=BB493_58 Depth=2 - fmul.d $fs3, $fa1, $fa0 - bnez $s2, .LBB493_99 + fmul.d $fs4, $fa1, $fa0 + fmov.d $fs5, $fs2 + bnez $s2, .LBB493_98 + b .LBB493_99 .LBB493_97: # in Loop: Header=BB493_58 Depth=2 - pcalau12i $a0, %pc_hi20(.LCPI493_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI493_3) - bnez $s3, .LBB493_100 - b .LBB493_112 -.LBB493_98: # in Loop: Header=BB493_58 Depth=2 move $s0, $zero - beqz $s2, .LBB493_97 -.LBB493_99: # in Loop: Header=BB493_58 Depth=2 + fmov.d $fs5, $fs2 + beqz $s2, .LBB493_99 +.LBB493_98: # in Loop: Header=BB493_58 Depth=2 ld.d $a0, $s2, 24 ld.wu $a0, $a0, 0 movgr2fr.d $fa0, $a0 - ffint.d.l $fs4, $fa0 + ffint.d.l $fs5, $fa0 +.LBB493_99: # in Loop: Header=BB493_58 Depth=2 beqz $s3, .LBB493_112 -.LBB493_100: # %.lr.ph69.i242.i +# %bb.100: # %.lr.ph69.i242.i # in Loop: Header=BB493_58 Depth=2 - ld.d $a1, $sp, 496 + ld.d $a1, $sp, 504 addi.d $a0, $a1, 32 move $a2, $a0 move $a3, $s3 @@ -107179,7 +107103,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin move $s4, $zero .LBB493_113: # %findTerm.exit252.thread.i # in Loop: Header=BB493_58 Depth=2 - ld.d $t1, $sp, 464 # 8-byte Folded Reload + ld.d $t1, $sp, 472 # 8-byte Folded Reload beqz $t1, .LBB493_157 # %bb.114: # in Loop: Header=BB493_58 Depth=2 ld.d $s3, $t1, 16 @@ -107195,7 +107119,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin addi.w $s5, $zero, -1 bne $a0, $s5, .LBB493_213 # %bb.117: # in Loop: Header=BB493_58 Depth=2 - ld.d $fp, $sp, 480 + ld.d $fp, $sp, 488 ld.w $a0, $fp, 0 blez $a0, .LBB493_210 # %bb.118: # %.lr.ph.i.i.i286.i @@ -107281,7 +107205,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin beqz $a0, .LBB493_129 # %bb.128: # in Loop: Header=BB493_58 Depth=2 ld.w $a1, $a0, -8 - ld.d $a3, $sp, 96 # 8-byte Folded Reload + ld.d $a3, $sp, 88 # 8-byte Folded Reload ld.d $a2, $a3, %pc_lo12(mem.5) addi.d $a0, $a0, -8 sub.d $a1, $a2, $a1 @@ -107290,11 +107214,12 @@ sqlite3WhereBegin: # @sqlite3WhereBegin jirl $ra, $ra, 0 .LBB493_129: # %sqlite3_free.exit.i # in Loop: Header=BB493_58 Depth=2 - ld.w $fp, $s2, 16 st.w $zero, $s2, 40 + ld.w $fp, $s2, 16 vld $vr0, $sp, 448 # 16-byte Folded Reload vst $vr0, $s2, 48 - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload + lu52i.d $a0, $a0, 1350 st.d $a0, $s2, 64 ld.d $a0, $sp, 152 # 8-byte Folded Reload beqz $a0, .LBB493_132 @@ -107361,7 +107286,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin beqz $s3, .LBB493_149 # %bb.142: # %.lr.ph69.i.i # in Loop: Header=BB493_58 Depth=2 - ld.d $a0, $sp, 496 + ld.d $a0, $sp, 504 addi.d $a0, $a0, 14 move $a1, $s3 b .LBB493_144 @@ -107404,7 +107329,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin b .LBB493_183 .LBB493_149: # %.loopexit383.i # in Loop: Header=BB493_58 Depth=2 - ld.d $a0, $sp, 464 # 8-byte Folded Reload + ld.d $a0, $sp, 472 # 8-byte Folded Reload beqz $a0, .LBB493_445 # %bb.150: # in Loop: Header=BB493_58 Depth=2 ld.d $a0, $a0, 16 @@ -107420,7 +107345,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin addi.w $s0, $zero, -1 bne $a1, $s0, .LBB493_445 # %bb.153: # in Loop: Header=BB493_58 Depth=2 - ld.d $fp, $sp, 480 + ld.d $fp, $sp, 488 ld.w $a1, $fp, 0 blez $a1, .LBB493_442 # %bb.154: # %.lr.ph.i.i.i.i @@ -107532,7 +107457,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin add.d $a3, $a1, $a0 alsl.d $s6, $s5, $a3, 3 st.w $s0, $s2, 0 - ld.w $a2, $sp, 488 + ld.w $a2, $sp, 496 st.w $s5, $s2, 16 st.d $a1, $s2, 8 st.d $a3, $s2, 24 @@ -107541,7 +107466,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin blez $a2, .LBB493_189 # %bb.176: # %.lr.ph186.i # in Loop: Header=BB493_58 Depth=2 - ld.d $a6, $sp, 496 + ld.d $a6, $sp, 504 ld.d $a3, $sp, 328 # 8-byte Folded Reload ld.w $a3, $a3, 44 move $a4, $zero @@ -107607,22 +107532,23 @@ sqlite3WhereBegin: # @sqlite3WhereBegin maskeqz $a0, $a3, $a0 or $a0, $a0, $a2 .LBB493_185: # in Loop: Header=BB493_58 Depth=2 - pcalau12i $a2, %pc_hi20(.LCPI493_1) - fld.d $fa1, $a2, %pc_lo12(.LCPI493_1) move $a3, $zero move $t2, $zero + ld.d $a2, $sp, 144 # 8-byte Folded Reload + lu52i.d $a2, $a2, 1350 + movgr2fr.d $fa1, $a2 fcmp.clt.d $fcc0, $fa1, $fa0 - fsel $fs3, $fa0, $fa1, $fcc0 - fcmp.cule.d $fcc0, $fs2, $fs3 + fsel $fs4, $fa0, $fa1, $fcc0 + fcmp.cule.d $fcc0, $fs3, $fs4 bcnez $fcc0, .LBB493_187 .LBB493_186: # in Loop: Header=BB493_58 Depth=2 ld.d $a2, $sp, 184 # 8-byte Folded Reload st.d $a1, $a2, 80 ori $a7, $zero, 1 st.d $a6, $sp, 160 # 8-byte Folded Spill - fmov.d $fs2, $fs3 + fmov.d $fs3, $fs4 move $t1, $a3 - st.d $a0, $sp, 120 # 8-byte Folded Spill + st.d $a0, $sp, 112 # 8-byte Folded Spill move $a5, $t2 .LBB493_187: # in Loop: Header=BB493_58 Depth=2 bnez $a4, .LBB493_449 @@ -107716,14 +107642,14 @@ sqlite3WhereBegin: # @sqlite3WhereBegin # in Loop: Header=BB493_58 Depth=2 sltui $a1, $a1, 1 vldi $vr0, -1016 - fdiv.d $fa0, $fs4, $fa0 + fdiv.d $fa0, $fs5, $fa0 ld.d $a2, $sp, 24 # 8-byte Folded Reload masknez $a2, $a2, $a1 ori $a3, $zero, 512 maskeqz $a3, $a3, $a1 or $s4, $a3, $a2 movgr2cf $fcc0, $a1 - fsel $fs4, $fa0, $fs4, $fcc0 + fsel $fs5, $fa0, $fs5, $fcc0 b .LBB493_202 .p2align 4, , 16 .LBB493_201: # %.thread.us.i273.i @@ -107755,7 +107681,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin lu12i.w $a0, 32 or $s4, $s4, $a0 vldi $vr0, -1016 - fdiv.d $fs4, $fs4, $fa0 + fdiv.d $fs5, $fs5, $fa0 b .LBB493_113 .LBB493_207: # in Loop: Header=BB493_58 Depth=2 ori $a4, $zero, 1 @@ -107772,7 +107698,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin nor $s5, $a0, $zero .LBB493_210: # %getMask.exit.i.i278.i # in Loop: Header=BB493_58 Depth=2 - ld.d $a0, $sp, 464 # 8-byte Folded Reload + ld.d $a0, $sp, 472 # 8-byte Folded Reload ld.w $a0, $a0, 0 ori $a2, $zero, 1 slt $a1, $a2, $a0 @@ -107801,7 +107727,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin .LBB493_213: # %.loopexit.i821 # in Loop: Header=BB493_58 Depth=2 vldi $vr0, -912 - fcmp.cule.d $fcc0, $fs4, $ft0 + fcmp.cule.d $fcc0, $fs5, $ft0 bcnez $fcc0, .LBB493_216 # %bb.214: # %.lr.ph.i293.i.preheader # in Loop: Header=BB493_58 Depth=2 @@ -107811,14 +107737,14 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ori $t4, $zero, 110 ld.d $t5, $sp, 368 # 8-byte Folded Reload ld.d $t6, $sp, 352 # 8-byte Folded Reload - ld.d $t1, $sp, 464 # 8-byte Folded Reload + ld.d $t1, $sp, 472 # 8-byte Folded Reload .p2align 4, , 16 .LBB493_215: # %.lr.ph.i293.i # Parent Loop BB493_55 Depth=1 # Parent Loop BB493_58 Depth=2 # => This Inner Loop Header: Depth=3 fmul.d $fa1, $fa1, $ft0 - fcmp.clt.d $fcc0, $fa1, $fs4 + fcmp.clt.d $fcc0, $fa1, $fs5 fadd.d $fa0, $fa0, $ft1 bcnez $fcc0, .LBB493_215 b .LBB493_217 @@ -107828,13 +107754,13 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ori $t4, $zero, 110 ld.d $t5, $sp, 368 # 8-byte Folded Reload ld.d $t6, $sp, 352 # 8-byte Folded Reload - ld.d $t1, $sp, 464 # 8-byte Folded Reload + ld.d $t1, $sp, 472 # 8-byte Folded Reload .LBB493_217: # %estLog.exit296.i # in Loop: Header=BB493_58 Depth=2 - fmadd.d $fs4, $fs4, $fa0, $fs4 + fmadd.d $fs5, $fs5, $fa0, $fs5 .LBB493_218: # in Loop: Header=BB493_58 Depth=2 - fcmp.clt.d $fcc0, $fs4, $fs3 - fsel $fs3, $fs3, $fs4, $fcc0 + fcmp.clt.d $fcc0, $fs5, $fs4 + fsel $fs4, $fs4, $fs5, $fcc0 movcf2gr $a0, $fcc0 masknez $a1, $s0, $a0 maskeqz $a0, $s4, $a0 @@ -107854,7 +107780,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin st.d $zero, $sp, 256 # 8-byte Folded Spill st.d $zero, $sp, 264 # 8-byte Folded Spill st.d $s8, $sp, 312 # 8-byte Folded Spill - st.d $t1, $sp, 464 # 8-byte Folded Spill + st.d $t1, $sp, 472 # 8-byte Folded Spill b .LBB493_221 .p2align 4, , 16 .LBB493_220: # %.thread372.i @@ -107916,7 +107842,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin # Child Loop BB493_330 Depth 6 # Child Loop BB493_336 Depth 6 # Child Loop BB493_226 Depth 5 - ld.w $s3, $sp, 488 + ld.w $s3, $sp, 496 beqz $s3, .LBB493_351 # %bb.224: # %.lr.ph69.i1413 # in Loop: Header=BB493_223 Depth=4 @@ -107924,7 +107850,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.d $a1, $s2, 16 slli.d $a2, $t8, 2 ldx.w $s4, $a1, $a2 - ld.d $s1, $sp, 496 + ld.d $s1, $sp, 504 bgez $s8, .LBB493_235 b .LBB493_226 .p2align 4, , 16 @@ -107956,7 +107882,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin # %bb.230: # %findTerm.exit1430.thread1549 # in Loop: Header=BB493_223 Depth=4 andi $a1, $a1, 1 - ld.d $t1, $sp, 464 # 8-byte Folded Reload + ld.d $t1, $sp, 472 # 8-byte Folded Reload bnez $a1, .LBB493_346 .LBB493_231: # in Loop: Header=BB493_223 Depth=4 lu12i.w $a1, 1 @@ -108015,7 +107941,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.d $a1, $s2, 32 ld.d $a5, $s1, 0 ld.d $a1, $a1, 16 - ld.d $ra, $sp, 472 + ld.d $ra, $sp, 480 slli.d $a2, $s4, 5 alsl.d $a2, $s4, $a2, 3 add.d $a1, $a1, $a2 @@ -108644,14 +108570,14 @@ sqlite3WhereBegin: # @sqlite3WhereBegin # in Loop: Header=BB493_223 Depth=4 lu12i.w $a1, 1 or $s6, $s6, $a1 - ld.d $t1, $sp, 464 # 8-byte Folded Reload + ld.d $t1, $sp, 472 # 8-byte Folded Reload ld.d $s0, $sp, 384 # 8-byte Folded Reload b .LBB493_350 .LBB493_345: # %sqlite3StrICmp.exit.i.findTerm.exit1430.thread1549.loopexit1789_crit_edge # in Loop: Header=BB493_223 Depth=4 ld.hu $a1, $s1, 14 andi $a1, $a1, 1 - ld.d $t1, $sp, 464 # 8-byte Folded Reload + ld.d $t1, $sp, 472 # 8-byte Folded Reload beqz $a1, .LBB493_231 .LBB493_346: # in Loop: Header=BB493_223 Depth=4 ld.d $a1, $s1, 0 @@ -108683,7 +108609,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin .LBB493_351: # %._crit_edge.loopexit.split.loop.exit.i # in Loop: Header=BB493_221 Depth=3 move $s0, $t8 - ld.d $t1, $sp, 464 # 8-byte Folded Reload + ld.d $t1, $sp, 472 # 8-byte Folded Reload b .LBB493_353 .p2align 4, , 16 .LBB493_352: # in Loop: Header=BB493_221 Depth=3 @@ -108718,7 +108644,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.bu $a2, $s2, 44 ffint.d.l $fa1, $fa1 fmul.d $fa1, $fa2, $fa1 - fmul.d $fs4, $fa1, $fa0 + fmul.d $fs5, $fa1, $fa0 sltu $a2, $zero, $a2 lu12i.w $a3, 4 and $a3, $s6, $a3 @@ -108741,7 +108667,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin move $s3, $t1 ld.d $a0, $s2, 16 ldx.w $fp, $a0, $a1 - addi.d $a0, $sp, 472 + addi.d $a0, $sp, 480 ori $a4, $zero, 60 move $a1, $s8 move $a2, $fp @@ -108751,7 +108677,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin jirl $ra, $ra, 0 beqz $a0, .LBB493_363 # %bb.358: # in Loop: Header=BB493_221 Depth=3 - addi.d $a0, $sp, 472 + addi.d $a0, $sp, 480 ori $a4, $zero, 24 move $a1, $s8 move $a2, $fp @@ -108764,13 +108690,13 @@ sqlite3WhereBegin: # @sqlite3WhereBegin lu12i.w $a0, 18 or $s1, $s1, $a0 vldi $vr0, -1016 - fdiv.d $fs4, $fs4, $fa0 + fdiv.d $fs5, $fs5, $fa0 b .LBB493_361 .LBB493_360: # in Loop: Header=BB493_221 Depth=3 lu12i.w $a0, 2 or $s1, $s1, $a0 .LBB493_361: # in Loop: Header=BB493_221 Depth=3 - addi.d $a0, $sp, 472 + addi.d $a0, $sp, 480 ori $a4, $zero, 36 move $a1, $s8 move $a2, $fp @@ -108783,7 +108709,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin lu12i.w $a0, 32 or $s1, $s1, $a0 vldi $vr0, -1016 - fdiv.d $fs4, $fs4, $fa0 + fdiv.d $fs5, $fs5, $fa0 .LBB493_363: # in Loop: Header=BB493_221 Depth=3 vld $vr7, $sp, 448 # 16-byte Folded Reload ori $t4, $zero, 110 @@ -108801,7 +108727,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin bltz $a0, .LBB493_419 # %bb.366: # in Loop: Header=BB493_221 Depth=3 ld.w $a0, $t1, 0 - ld.d $a7, $sp, 480 + ld.d $a7, $sp, 488 st.d $a7, $sp, 432 # 8-byte Folded Spill st.d $a0, $sp, 360 # 8-byte Folded Spill blez $a0, .LBB493_405 @@ -108876,7 +108802,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin jirl $ra, $ra, 0 ld.d $t3, $sp, 280 # 8-byte Folded Reload move $t2, $s1 - ld.d $t1, $sp, 464 # 8-byte Folded Reload + ld.d $t1, $sp, 472 # 8-byte Folded Reload ld.d $a7, $sp, 432 # 8-byte Folded Reload vldi $vr9, -912 vldi $vr8, -988 @@ -108920,7 +108846,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin jirl $ra, $ra, 0 ld.d $t3, $sp, 280 # 8-byte Folded Reload move $t2, $s1 - ld.d $t1, $sp, 464 # 8-byte Folded Reload + ld.d $t1, $sp, 472 # 8-byte Folded Reload ld.d $a7, $sp, 432 # 8-byte Folded Reload vldi $vr9, -912 vldi $vr8, -988 @@ -109005,7 +108931,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin .LBB493_396: # in Loop: Header=BB493_368 Depth=4 ld.w $a0, $a7, 0 addi.w $fp, $zero, -1 - st.d $t2, $sp, 104 # 8-byte Folded Spill + st.d $t2, $sp, 96 # 8-byte Folded Spill blez $a0, .LBB493_401 # %bb.397: # %.lr.ph.i.i.i311.i # in Loop: Header=BB493_368 Depth=4 @@ -109033,7 +108959,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.w $a0, $t1, 0 bstrpick.d $a1, $s3, 31, 0 slt $a2, $s3, $a0 - st.d $s3, $sp, 112 # 8-byte Folded Spill + st.d $s3, $sp, 104 # 8-byte Folded Spill masknez $a3, $s3, $a2 maskeqz $a0, $a0, $a2 or $a2, $a0, $a3 @@ -109050,7 +108976,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin addi.d $s6, $s6, -1 beqz $s6, .LBB493_408 # %bb.403: # in Loop: Header=BB493_402 Depth=5 - ld.d $a1, $sp, 464 # 8-byte Folded Reload + ld.d $a1, $sp, 472 # 8-byte Folded Reload ld.d $a1, $a1, 16 ldx.d $a1, $a1, $s3 addi.d $s3, $s3, 24 @@ -109064,9 +108990,9 @@ sqlite3WhereBegin: # @sqlite3WhereBegin and $a1, $a0, $fp beqz $a1, .LBB493_402 # %bb.404: # in Loop: Header=BB493_368 Depth=4 - ld.d $t1, $sp, 464 # 8-byte Folded Reload - ld.d $s3, $sp, 112 # 8-byte Folded Reload - ld.d $t2, $sp, 104 # 8-byte Folded Reload + ld.d $t1, $sp, 472 # 8-byte Folded Reload + ld.d $s3, $sp, 104 # 8-byte Folded Reload + ld.d $t2, $sp, 96 # 8-byte Folded Reload ld.d $t3, $sp, 280 # 8-byte Folded Reload b .LBB493_394 .LBB493_405: # in Loop: Header=BB493_221 Depth=3 @@ -109090,7 +109016,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin # in Loop: Header=BB493_221 Depth=3 addi.d $s0, $s0, 1 ld.d $s3, $sp, 360 # 8-byte Folded Reload - ld.d $t1, $sp, 464 # 8-byte Folded Reload + ld.d $t1, $sp, 472 # 8-byte Folded Reload ld.d $a0, $sp, 288 # 8-byte Folded Reload sltui $fp, $a0, 1 ld.d $a0, $sp, 360 # 8-byte Folded Reload @@ -109158,13 +109084,13 @@ sqlite3WhereBegin: # @sqlite3WhereBegin vld $vr7, $sp, 448 # 16-byte Folded Reload and $a1, $a0, $s0 move $a0, $s3 - ld.d $t1, $sp, 464 # 8-byte Folded Reload + ld.d $t1, $sp, 472 # 8-byte Folded Reload beqz $a1, .LBB493_417 .p2align 4, , 16 .LBB493_419: # %isSortingIndex.exit.thread.i # in Loop: Header=BB493_221 Depth=3 vldi $vr0, -912 - fcmp.cule.d $fcc0, $fs4, $ft0 + fcmp.cule.d $fcc0, $fs5, $ft0 bcnez $fcc0, .LBB493_422 # %bb.420: # %.lr.ph.i317.i.preheader # in Loop: Header=BB493_221 Depth=3 @@ -109180,7 +109106,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin # Parent Loop BB493_221 Depth=3 # => This Inner Loop Header: Depth=4 fmul.d $fa1, $fa1, $ft0 - fcmp.clt.d $fcc0, $fa1, $fs4 + fcmp.clt.d $fcc0, $fa1, $fs5 fadd.d $fa0, $fa0, $ft1 bcnez $fcc0, .LBB493_421 b .LBB493_423 @@ -109192,7 +109118,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.d $s0, $sp, 384 # 8-byte Folded Reload .LBB493_423: # %estLog.exit320.i # in Loop: Header=BB493_221 Depth=3 - fmadd.d $fs4, $fs4, $fa0, $fs4 + fmadd.d $fs5, $fs5, $fa0, $fs5 ld.d $s7, $sp, 304 # 8-byte Folded Reload ld.d $s1, $sp, 344 # 8-byte Folded Reload .LBB493_424: # in Loop: Header=BB493_221 Depth=3 @@ -109314,19 +109240,19 @@ sqlite3WhereBegin: # @sqlite3WhereBegin lu12i.w $a1, 128 or $a1, $s1, $a1 vldi $vr0, -928 - fmul.d $fa0, $fs4, $fa0 + fmul.d $fa0, $fs5, $fa0 masknez $a2, $s1, $a0 maskeqz $a1, $a1, $a0 or $s1, $a1, $a2 movgr2cf $fcc0, $a0 - fsel $fs4, $fs4, $fa0, $fcc0 + fsel $fs5, $fs5, $fa0, $fcc0 .LBB493_436: # in Loop: Header=BB493_221 Depth=3 - fcmp.cule.d $fcc0, $fs3, $fs4 + fcmp.cule.d $fcc0, $fs4, $fs5 bcnez $fcc0, .LBB493_220 # %bb.437: # in Loop: Header=BB493_221 Depth=3 st.d $s0, $sp, 264 # 8-byte Folded Spill st.d $s1, $sp, 248 # 8-byte Folded Spill - fmov.d $fs3, $fs4 + fmov.d $fs4, $fs5 st.d $s2, $sp, 256 # 8-byte Folded Spill b .LBB493_220 .LBB493_438: # in Loop: Header=BB493_58 Depth=2 @@ -109348,7 +109274,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.d $a4, $sp, 200 # 8-byte Folded Reload ld.d $a3, $sp, 264 # 8-byte Folded Reload ld.d $t2, $sp, 256 # 8-byte Folded Reload - fcmp.cule.d $fcc0, $fs2, $fs3 + fcmp.cule.d $fcc0, $fs3, $fs4 bcnez $fcc0, .LBB493_187 b .LBB493_186 .LBB493_440: # in Loop: Header=BB493_58 Depth=2 @@ -109366,14 +109292,14 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ori $t4, $zero, 110 ld.d $t5, $sp, 368 # 8-byte Folded Reload ld.d $t6, $sp, 352 # 8-byte Folded Reload - ld.d $t1, $sp, 464 # 8-byte Folded Reload + ld.d $t1, $sp, 472 # 8-byte Folded Reload b .LBB493_218 .LBB493_441: # in Loop: Header=BB493_58 Depth=2 sll.d $a1, $s5, $a2 nor $s0, $a1, $zero .LBB493_442: # %getMask.exit.i.i.i # in Loop: Header=BB493_58 Depth=2 - ld.d $a1, $sp, 464 # 8-byte Folded Reload + ld.d $a1, $sp, 472 # 8-byte Folded Reload ld.w $a1, $a1, 0 slt $a2, $s5, $a1 masknez $a3, $s5, $a2 @@ -109406,7 +109332,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin move $a1, $zero .LBB493_446: # %bestIndex.exit # in Loop: Header=BB493_58 Depth=2 - fmov.d $fs3, $fs1 + fmov.d $fs4, $fs1 ld.d $s8, $sp, 320 # 8-byte Folded Reload ld.d $a6, $sp, 232 # 8-byte Folded Reload ld.d $a5, $sp, 224 # 8-byte Folded Reload @@ -109414,14 +109340,17 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.d $a7, $sp, 208 # 8-byte Folded Reload ld.d $t0, $sp, 328 # 8-byte Folded Reload ld.d $a4, $sp, 200 # 8-byte Folded Reload - fcmp.cule.d $fcc0, $fs2, $fs3 + fcmp.cule.d $fcc0, $fs3, $fs4 bceqz $fcc0, .LBB493_186 b .LBB493_187 .LBB493_447: # in Loop: Header=BB493_58 Depth=2 - pcalau12i $a0, %pc_hi20(.LCPI493_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI493_2) - bnez $s2, .LBB493_99 - b .LBB493_97 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fs4, $a0 + fmov.d $fs5, $fs2 + bnez $s2, .LBB493_98 + b .LBB493_99 .p2align 4, , 16 .LBB493_448: # in Loop: Header=BB493_55 Depth=1 move $a5, $zero @@ -109432,11 +109361,11 @@ sqlite3WhereBegin: # @sqlite3WhereBegin .p2align 4, , 16 .LBB493_449: # %.thread # in Loop: Header=BB493_55 Depth=1 - ld.d $a1, $sp, 120 # 8-byte Folded Reload + ld.d $a1, $sp, 112 # 8-byte Folded Reload slli.d $a0, $a1, 43 bgez $a0, .LBB493_451 # %bb.450: # in Loop: Header=BB493_55 Depth=1 - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload st.d $zero, $a0, 0 .LBB493_451: # %.thread.thread # in Loop: Header=BB493_55 Depth=1 @@ -109460,7 +109389,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin addi.d $a0, $a2, 1 st.w $a0, $s8, 84 .LBB493_454: # in Loop: Header=BB493_55 Depth=1 - ld.w $a0, $sp, 984 + ld.w $a0, $sp, 992 ld.d $a3, $sp, 184 # 8-byte Folded Reload st.w $a2, $a3, 28 blez $a0, .LBB493_54 @@ -109470,11 +109399,11 @@ sqlite3WhereBegin: # @sqlite3WhereBegin addi.w $a2, $a2, 0 slli.d $a3, $a2, 6 alsl.d $a2, $a2, $a3, 3 - ld.d $a3, $sp, 128 # 8-byte Folded Reload + ld.d $a3, $sp, 120 # 8-byte Folded Reload add.d $a2, $a3, $a2 ld.w $a2, $a2, 44 move $a3, $zero - addi.d $a4, $sp, 988 + addi.d $a4, $sp, 996 .p2align 4, , 16 .LBB493_456: # Parent Loop BB493_55 Depth=1 # => This Inner Loop Header: Depth=2 @@ -109487,18 +109416,18 @@ sqlite3WhereBegin: # @sqlite3WhereBegin b .LBB493_54 .LBB493_458: # %._crit_edge1745.loopexit bstrpick.d $a0, $a7, 22, 22 - ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a1, $sp, 136 # 8-byte Folded Reload bnez $a1, .LBB493_460 b .LBB493_462 .LBB493_459: ori $a0, $zero, 1 - ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a1, $sp, 136 # 8-byte Folded Reload beqz $a1, .LBB493_462 .LBB493_460: # %._crit_edge1745 andi $a0, $a0, 1 beqz $a0, .LBB493_462 # %bb.461: - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload st.d $zero, $a0, 0 .LBB493_462: addi.w $fp, $zero, -1 @@ -109544,13 +109473,13 @@ sqlite3WhereBegin: # @sqlite3WhereBegin # Child Loop BB493_513 Depth 2 ld.bu $a0, $s8, 208 ld.w $a1, $s3, 28 - st.d $a1, $sp, 464 # 8-byte Folded Spill + st.d $a1, $sp, 472 # 8-byte Folded Spill bne $a0, $s4, .LBB493_510 # %bb.468: # in Loop: Header=BB493_467 Depth=1 ld.w $a0, $s3, 0 slli.d $a1, $a0, 6 alsl.d $a0, $a0, $a1, 3 - ld.d $a1, $sp, 128 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload add.d $fp, $a1, $a0 ld.d $a2, $fp, 8 ld.d $s0, $sp, 72 # 8-byte Folded Reload @@ -109733,7 +109662,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin beqz $s6, .LBB493_510 # %bb.496: # in Loop: Header=BB493_467 Depth=1 ld.w $a1, $s6, -8 - ld.d $a3, $sp, 96 # 8-byte Folded Reload + ld.d $a3, $sp, 88 # 8-byte Folded Reload ld.d $a2, $a3, %pc_lo12(mem.5) addi.d $a0, $s6, -8 sub.d $a1, $a2, $a1 @@ -109816,7 +109745,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.w $a0, $s3, 0 slli.d $a1, $a0, 6 alsl.d $a0, $a0, $a1, 3 - ld.d $a1, $sp, 128 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload add.d $s7, $a1, $a0 ld.d $fp, $s7, 24 ld.d $a0, $fp, 144 @@ -110042,7 +109971,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ori $a3, $zero, 12 stx.h $a3, $a1, $a0 ld.d $a1, $sp, 272 # 8-byte Folded Reload - ld.d $a0, $sp, 464 # 8-byte Folded Reload + ld.d $a0, $sp, 472 # 8-byte Folded Reload st.w $a0, $a2, 4 st.w $s5, $a2, 8 st.w $s6, $a2, 12 @@ -110058,7 +109987,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin beqz $s8, .LBB493_553 # %bb.546: # in Loop: Header=BB493_467 Depth=1 ld.w $a1, $s8, -8 - ld.d $a3, $sp, 96 # 8-byte Folded Reload + ld.d $a3, $sp, 88 # 8-byte Folded Reload ld.d $a2, $a3, %pc_lo12(mem.5) addi.d $a0, $s8, -8 sub.d $a1, $a2, $a1 @@ -110125,7 +110054,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin add.d $a3, $a2, $a0 ori $a5, $zero, 97 stx.h $a5, $a2, $a0 - ld.d $a0, $sp, 464 # 8-byte Folded Reload + ld.d $a0, $sp, 472 # 8-byte Folded Reload st.w $a0, $a3, 4 st.w $a1, $a3, 8 st.w $zero, $a3, 12 @@ -110178,7 +110107,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin .LBB493_563: # %._crit_edge1759 ld.d $a1, $sp, 272 # 8-byte Folded Reload ld.w $a1, $a1, 24 - ld.d $a2, $sp, 136 # 8-byte Folded Reload + ld.d $a2, $sp, 128 # 8-byte Folded Reload st.w $a1, $a2, 16 blez $a0, .LBB493_1117 # %bb.564: # %.lr.ph1786 @@ -110220,10 +110149,10 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.w $a0, $s1, 0 slli.d $a1, $a0, 6 alsl.d $a0, $a0, $a1, 3 - ld.d $a1, $sp, 128 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload add.d $s3, $a1, $a0 ld.w $a0, $s3, 44 - st.d $a0, $sp, 464 # 8-byte Folded Spill + st.d $a0, $sp, 472 # 8-byte Folded Spill ld.d $s2, $s1, 16 ld.w $a0, $s1, 28 st.d $a0, $sp, 408 # 8-byte Folded Spill @@ -110257,7 +110186,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin beqz $s6, .LBB493_571 .LBB493_570: # in Loop: Header=BB493_566 Depth=1 ld.w $a1, $s6, -8 - ld.d $a3, $sp, 96 # 8-byte Folded Reload + ld.d $a3, $sp, 88 # 8-byte Folded Reload ld.d $a2, $a3, %pc_lo12(mem.5) addi.d $a0, $s6, -8 sub.d $a1, $a2, $a1 @@ -110315,7 +110244,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin beqz $fp, .LBB493_579 .LBB493_578: # in Loop: Header=BB493_566 Depth=1 ld.w $a1, $fp, -8 - ld.d $a3, $sp, 96 # 8-byte Folded Reload + ld.d $a3, $sp, 88 # 8-byte Folded Reload ld.d $a2, $a3, %pc_lo12(mem.5) addi.d $a0, $fp, -8 sub.d $a1, $a2, $a1 @@ -110453,7 +110382,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin .p2align 4, , 16 .LBB493_598: # in Loop: Header=BB493_595 Depth=2 ld.w $a0, $a1, 0 - ld.d $a1, $sp, 496 + ld.d $a1, $sp, 504 slli.d $a2, $a0, 5 alsl.d $a0, $a0, $a2, 4 ldx.d $a0, $a1, $a0 @@ -110578,7 +110507,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.d $s1, $sp, 192 # 8-byte Folded Reload ori $a0, $zero, 102 st.w $a0, $s1, 48 - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload st.w $a6, $s1, 52 move $a0, $fp blt $fp, $s0, .LBB493_619 @@ -110592,7 +110521,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.bu $a1, $a0, 42 ori $a0, $zero, 1 ld.d $s1, $sp, 192 # 8-byte Folded Reload - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload bnez $a1, .LBB493_816 # %bb.618: # %resizeOpArray.exit._crit_edge.i.i1359 # in Loop: Header=BB493_566 Depth=1 @@ -110632,14 +110561,14 @@ sqlite3WhereBegin: # @sqlite3WhereBegin move $fp, $zero b .LBB493_778 .LBB493_622: # in Loop: Header=BB493_566 Depth=1 - ld.w $a0, $sp, 488 + ld.w $a0, $sp, 496 beqz $a0, .LBB493_641 # %bb.623: # %.lr.ph69.i # in Loop: Header=BB493_566 Depth=1 - ld.d $a1, $sp, 496 + ld.d $a1, $sp, 504 ld.d $fp, $sp, 192 # 8-byte Folded Reload ld.d $a3, $sp, 368 # 8-byte Folded Reload - ld.d $a4, $sp, 464 # 8-byte Folded Reload + ld.d $a4, $sp, 472 # 8-byte Folded Reload b .LBB493_625 .p2align 4, , 16 .LBB493_624: # %.thread.us.i @@ -110679,15 +110608,15 @@ sqlite3WhereBegin: # @sqlite3WhereBegin st.b $a0, $s8, 37 b .LBB493_643 .LBB493_632: # in Loop: Header=BB493_566 Depth=1 - ld.w $a0, $sp, 488 + ld.w $a0, $sp, 496 beqz $a0, .LBB493_687 # %bb.633: # %.lr.ph69.i987 # in Loop: Header=BB493_566 Depth=1 - ld.d $s4, $sp, 496 + ld.d $s4, $sp, 504 move $fp, $s4 move $a1, $a0 ld.d $a3, $sp, 368 # 8-byte Folded Reload - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload b .LBB493_635 .p2align 4, , 16 .LBB493_634: # %.thread.us.i1001 @@ -110802,7 +110731,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin add.d $a2, $a1, $a0 ori $a3, $zero, 44 stx.h $a3, $a1, $a0 - ld.d $a0, $sp, 464 # 8-byte Folded Reload + ld.d $a0, $sp, 472 # 8-byte Folded Reload st.w $a0, $a2, 4 st.w $fp, $a2, 8 st.w $s2, $a2, 12 @@ -110827,7 +110756,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.d $s1, $sp, 192 # 8-byte Folded Reload ori $a0, $zero, 22 st.w $a0, $s1, 48 - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload b .LBB493_817 .LBB493_656: # in Loop: Header=BB493_566 Depth=1 sltui $a0, $s3, 1 @@ -110893,7 +110822,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin .LBB493_666: # in Loop: Header=BB493_566 Depth=1 ld.d $fp, $sp, 192 # 8-byte Folded Reload ld.w $s4, $fp, 60 - addi.d $a2, $sp, 472 + addi.d $a2, $sp, 480 ori $a4, $zero, 2 move $a0, $s8 move $a1, $fp @@ -110963,8 +110892,8 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ori $a1, $zero, 24 maskeqz $a1, $a1, $s4 or $a4, $a1, $a0 - addi.d $a0, $sp, 472 - ld.d $a1, $sp, 464 # 8-byte Folded Reload + addi.d $a0, $sp, 480 + ld.d $a1, $sp, 472 # 8-byte Folded Reload move $a3, $s6 move $a5, $s2 pcaddu18i $ra, %call36(findTerm) @@ -111071,7 +111000,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin .LBB493_687: # in Loop: Header=BB493_566 Depth=1 move $fp, $zero move $s4, $zero - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload .LBB493_688: # %findTerm.exit1023 # in Loop: Header=BB493_566 Depth=1 sltu $s3, $zero, $s3 @@ -111172,7 +111101,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.bu $a0, $a0, 42 ld.d $s1, $sp, 192 # 8-byte Folded Reload ld.d $s5, $sp, 448 # 8-byte Folded Reload - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload bnez $a0, .LBB493_734 # %bb.700: # %resizeOpArray.exit._crit_edge.i.i1059 # in Loop: Header=BB493_566 Depth=1 @@ -111204,7 +111133,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin .LBB493_702: # in Loop: Header=BB493_566 Depth=1 ld.d $s1, $sp, 192 # 8-byte Folded Reload ld.w $s4, $s1, 60 - addi.d $a2, $sp, 472 + addi.d $a2, $sp, 480 ori $a4, $zero, 1 move $a0, $s8 move $a1, $s1 @@ -111383,7 +111312,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin maskeqz $a4, $a4, $s3 or $a3, $a4, $a3 stx.b $a3, $a1, $a0 - ld.d $a0, $sp, 464 # 8-byte Folded Reload + ld.d $a0, $sp, 472 # 8-byte Folded Reload st.w $a0, $a2, 4 ld.d $a0, $sp, 416 # 8-byte Folded Reload st.w $a0, $a2, 8 @@ -111411,7 +111340,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.d $s6, $sp, 432 # 8-byte Folded Reload ori $s7, $zero, 1 ld.d $s5, $sp, 448 # 8-byte Folded Reload - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload .p2align 4, , 16 .LBB493_729: # Parent Loop BB493_566 Depth=1 # => This Inner Loop Header: Depth=2 @@ -111572,7 +111501,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin jirl $ra, $ra, 0 ld.d $s8, $sp, 320 # 8-byte Folded Reload .LBB493_750: # in Loop: Header=BB493_566 Depth=1 - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload ld.w $fp, $s4, 24 ld.w $s1, $s4, 28 ld.d $a0, $sp, 192 # 8-byte Folded Reload @@ -111588,7 +111517,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin # in Loop: Header=BB493_566 Depth=1 ld.d $a0, $a1, 0 ld.bu $a0, $a0, 42 - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload beqz $a0, .LBB493_963 # %bb.753: # in Loop: Header=BB493_566 Depth=1 move $fp, $zero @@ -111950,7 +111879,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin add.d $a4, $a3, $a0 ori $a5, $zero, 99 stx.h $a5, $a3, $a0 - ld.d $a0, $sp, 464 # 8-byte Folded Reload + ld.d $a0, $sp, 472 # 8-byte Folded Reload st.w $a0, $a4, 4 ld.d $a0, $sp, 416 # 8-byte Folded Reload st.w $a0, $a4, 8 @@ -111990,7 +111919,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin blez $a0, .LBB493_814 # %bb.804: # %.lr.ph1769 # in Loop: Header=BB493_566 Depth=1 - ld.d $a1, $sp, 496 + ld.d $a1, $sp, 504 move $a2, $zero b .LBB493_806 .p2align 4, , 16 @@ -112053,7 +111982,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.d $a0, $sp, 272 # 8-byte Folded Reload ld.w $a0, $a0, 24 st.w $s7, $s1, 48 - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload .LBB493_815: # %sqlite3VdbeAddOp2.exit1363 # in Loop: Header=BB493_566 Depth=1 st.w $a6, $s1, 52 @@ -112062,13 +111991,13 @@ sqlite3WhereBegin: # @sqlite3WhereBegin st.w $a0, $s1, 56 .LBB493_817: # %sqlite3ReleaseTempReg.exit1099 # in Loop: Header=BB493_566 Depth=1 - ld.w $a1, $sp, 984 + ld.w $a1, $sp, 992 addi.w $a0, $zero, -1 blez $a1, .LBB493_821 # %bb.818: # %.lr.ph.i1366 # in Loop: Header=BB493_566 Depth=1 move $a2, $zero - addi.d $a3, $sp, 988 + addi.d $a3, $sp, 996 .p2align 4, , 16 .LBB493_819: # Parent Loop BB493_566 Depth=1 # => This Inner Loop Header: Depth=2 @@ -112080,7 +112009,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin bne $a1, $a2, .LBB493_819 .LBB493_821: # %getMask.exit1371 # in Loop: Header=BB493_566 Depth=1 - ld.w $a1, $sp, 488 + ld.w $a1, $sp, 496 and $s6, $a0, $s6 bgtz $a1, .LBB493_823 b .LBB493_830 @@ -112088,12 +112017,12 @@ sqlite3WhereBegin: # @sqlite3WhereBegin .LBB493_822: # in Loop: Header=BB493_566 Depth=1 sll.d $a0, $s7, $a2 nor $a0, $a0, $zero - ld.w $a1, $sp, 488 + ld.w $a1, $sp, 496 and $s6, $a0, $s6 blez $a1, .LBB493_830 .LBB493_823: # %.lr.ph1773 # in Loop: Header=BB493_566 Depth=1 - ld.d $fp, $sp, 496 + ld.d $fp, $sp, 504 addi.d $s0, $a1, 1 b .LBB493_826 .LBB493_824: # in Loop: Header=BB493_826 Depth=2 @@ -112172,12 +112101,12 @@ sqlite3WhereBegin: # @sqlite3WhereBegin st.b $zero, $a0, 339 .LBB493_836: # %sqlite3VdbeAddOp2.exit1382 # in Loop: Header=BB493_566 Depth=1 - ld.w $a0, $sp, 488 + ld.w $a0, $sp, 496 ld.d $s1, $sp, 192 # 8-byte Folded Reload blez $a0, .LBB493_565 # %bb.837: # %.lr.ph1777.preheader # in Loop: Header=BB493_566 Depth=1 - ld.d $fp, $sp, 496 + ld.d $fp, $sp, 504 move $s0, $zero b .LBB493_839 .p2align 4, , 16 @@ -112203,7 +112132,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin pcaddu18i $ra, %call36(sqlite3ExprIfFalse) jirl $ra, $ra, 0 ld.b $a1, $fp, 16 - ld.w $a0, $sp, 488 + ld.w $a0, $sp, 496 ori $a1, $a1, 4 st.b $a1, $fp, 16 b .LBB493_838 @@ -112340,7 +112269,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin jirl $ra, $ra, 0 ld.d $s8, $sp, 320 # 8-byte Folded Reload .LBB493_856: # in Loop: Header=BB493_566 Depth=1 - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload ld.w $fp, $s4, 24 ld.w $s1, $s4, 28 ld.d $a0, $sp, 192 # 8-byte Folded Reload @@ -112355,7 +112284,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin # in Loop: Header=BB493_566 Depth=1 ld.d $a0, $s4, 0 ld.bu $a0, $a0, 42 - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload beqz $a0, .LBB493_969 # %bb.859: # in Loop: Header=BB493_566 Depth=1 move $fp, $zero @@ -112684,8 +112613,8 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ori $a1, $zero, 36 maskeqz $a1, $a1, $s4 or $a4, $a1, $a0 - addi.d $a0, $sp, 472 - ld.d $a1, $sp, 464 # 8-byte Folded Reload + addi.d $a0, $sp, 480 + ld.d $a1, $sp, 472 # 8-byte Folded Reload move $a3, $s6 move $a5, $s2 pcaddu18i $ra, %call36(findTerm) @@ -113354,7 +113283,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin # in Loop: Header=BB493_566 Depth=1 ld.d $a0, $a1, 0 ld.bu $a0, $a0, 42 - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload bnez $a0, .LBB493_987 # %bb.985: # %resizeOpArray.exit._crit_edge.i1346 # in Loop: Header=BB493_566 Depth=1 @@ -113370,7 +113299,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin add.d $a2, $a1, $a0 ori $a3, $zero, 116 stx.h $a3, $a1, $a0 - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload st.w $a6, $a2, 4 st.w $zero, $a2, 8 st.w $s0, $a2, 12 @@ -113750,7 +113679,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin add.d $a2, $a1, $a0 ori $a3, $zero, 116 stx.h $a3, $a1, $a0 - ld.d $a0, $sp, 464 # 8-byte Folded Reload + ld.d $a0, $sp, 472 # 8-byte Folded Reload st.w $a0, $a2, 4 st.w $zero, $a2, 8 st.w $s0, $a2, 12 @@ -113783,7 +113712,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.d $a0, $sp, 408 # 8-byte Folded Reload st.w $a0, $s1, 52 st.w $s2, $s1, 56 - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload b .LBB493_817 .LBB493_1039: # in Loop: Header=BB493_566 Depth=1 sltui $a0, $s5, 1 @@ -114049,7 +113978,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin maskeqz $a1, $a1, $s3 or $fp, $a1, $a0 ld.d $a3, $sp, 192 # 8-byte Folded Reload - ld.d $a4, $sp, 464 # 8-byte Folded Reload + ld.d $a4, $sp, 472 # 8-byte Folded Reload ori $s7, $zero, 1 .p2align 4, , 16 .LBB493_1074: # Parent Loop BB493_566 Depth=1 @@ -114136,7 +114065,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ori $a3, $zero, 38 stx.h $a3, $a0, $a1 ld.d $a1, $sp, 272 # 8-byte Folded Reload - ld.d $a0, $sp, 464 # 8-byte Folded Reload + ld.d $a0, $sp, 472 # 8-byte Folded Reload st.w $a0, $a2, 4 st.d $zero, $a2, 16 ld.w $s2, $a1, 24 @@ -114195,7 +114124,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin .LBB493_1092: # %sqlite3VdbeChangeP5.exit # in Loop: Header=BB493_566 Depth=1 ld.d $s1, $sp, 192 # 8-byte Folded Reload - ld.d $a6, $sp, 464 # 8-byte Folded Reload + ld.d $a6, $sp, 472 # 8-byte Folded Reload beqz $s0, .LBB493_817 # %bb.1093: # in Loop: Header=BB493_566 Depth=1 ld.bu $a0, $s8, 37 @@ -114371,15 +114300,15 @@ sqlite3WhereBegin: # @sqlite3WhereBegin ld.d $a0, $sp, 272 # 8-byte Folded Reload ld.w $a0, $a0, 24 move $s5, $zero - ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload st.w $a0, $a1, 16 b .LBB493_1118 .LBB493_1117: move $s5, $zero .LBB493_1118: # %._crit_edge1787 - ld.w $a1, $sp, 488 - ld.d $a0, $sp, 496 - ld.d $a2, $sp, 136 # 8-byte Folded Reload + ld.w $a1, $sp, 496 + ld.d $a0, $sp, 504 + ld.d $a2, $sp, 128 # 8-byte Folded Reload st.w $s5, $a2, 20 blez $a1, .LBB493_1124 # %bb.1119: # %.lr.ph.i1388.preheader @@ -114403,15 +114332,15 @@ sqlite3WhereBegin: # @sqlite3WhereBegin jirl $ra, $ra, 0 b .LBB493_1120 .LBB493_1123: # %._crit_edge.loopexit.i - ld.d $a0, $sp, 496 + ld.d $a0, $sp, 504 .LBB493_1124: # %._crit_edge.i1383 - addi.d $a1, $sp, 504 + addi.d $a1, $sp, 512 beq $a0, $a1, .LBB493_1127 # %bb.1125: # %._crit_edge.i1383 beqz $a0, .LBB493_1127 # %bb.1126: ld.w $a1, $a0, -8 - ld.d $a3, $sp, 96 # 8-byte Folded Reload + ld.d $a3, $sp, 88 # 8-byte Folded Reload ld.d $a2, $a3, %pc_lo12(mem.5) addi.d $a0, $a0, -8 sub.d $a1, $a2, $a1 @@ -114419,7 +114348,7 @@ sqlite3WhereBegin: # @sqlite3WhereBegin pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 .LBB493_1127: - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload b .LBB493_50 .LBB493_1128: ori $a0, $zero, 1 @@ -153747,24 +153676,7 @@ detachFunc: # @detachFunc .Lfunc_end613: .size detachFunc, .Lfunc_end613-detachFunc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function juliandayFunc -.LCPI614_0: - .dword 0x4076d40000000000 # double 365.25 -.LCPI614_1: - .dword 0x403e99a027525461 # double 30.600100000000001 -.LCPI614_2: - .dword 0xc097d20000000000 # double -1524.5 -.LCPI614_3: - .dword 0x404e000000000000 # double 60 -.LCPI614_4: - .dword 0x40ac200000000000 # double 3600 -.LCPI614_5: - .dword 0x40f5180000000000 # double 86400 -.LCPI614_6: - .dword 0xc0f5180000000000 # double -86400 - .text - .p2align 5 + .p2align 5 # -- Begin function juliandayFunc .type juliandayFunc,@function juliandayFunc: # @juliandayFunc # %bb.0: @@ -153811,96 +153723,111 @@ juliandayFunc: # @juliandayFunc ori $a4, $zero, 3 maskeqz $a4, $a4, $a2 masknez $a1, $a1, $a2 - or $a1, $a4, $a1 - masknez $a4, $a0, $a2 + or $a4, $a4, $a1 + masknez $a1, $a0, $a2 ori $a0, $zero, 1 maskeqz $a5, $a0, $a2 - or $a4, $a5, $a4 + or $a1, $a5, $a1 masknez $a3, $a3, $a2 ori $a5, $zero, 2000 maskeqz $a2, $a5, $a2 or $a2, $a2, $a3 - slti $a3, $a4, 3 - addi.d $a5, $a4, 12 - masknez $a4, $a4, $a3 + slti $a3, $a1, 3 + addi.d $a5, $a1, 12 + masknez $a1, $a1, $a3 maskeqz $a5, $a5, $a3 - or $a4, $a5, $a4 - sub.d $a2, $a2, $a3 - addi.w $a3, $a2, 0 - lu12i.w $a5, -335545 - ori $a5, $a5, 2785 - mul.d $a5, $a3, $a5 - srli.d $a6, $a5, 63 - srai.d $a5, $a5, 37 - add.d $a5, $a5, $a6 - lu12i.w $a6, 335544 - ori $a6, $a6, 1311 - mul.d $a3, $a3, $a6 + or $a5, $a5, $a1 + sub.d $a1, $a2, $a3 + addi.w $a2, $a1, 0 + lu12i.w $a3, -335545 + ori $a3, $a3, 2785 + mul.d $a3, $a2, $a3 srli.d $a6, $a3, 63 - srai.d $a3, $a3, 39 + srai.d $a3, $a3, 37 add.d $a3, $a3, $a6 - pcalau12i $a6, %pc_hi20(.LCPI614_0) - fld.d $fa0, $a6, %pc_lo12(.LCPI614_0) + lu12i.w $a6, 335544 + ori $a6, $a6, 1311 + mul.d $a2, $a2, $a6 + srli.d $a6, $a2, 63 + srai.d $a2, $a2, 39 + add.d $a2, $a2, $a6 lu12i.w $a6, 1 ori $a6, $a6, 620 - add.d $a2, $a2, $a6 - movgr2fr.w $fa1, $a2 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + add.d $a1, $a1, $a6 + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 + ori $a1, $zero, 0 + ori $a6, $zero, 0 + lu32i.d $a6, 447488 + lu52i.d $a6, $a6, 1031 + movgr2fr.d $fa1, $a6 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a2, $fa0 - pcalau12i $a6, %pc_hi20(.LCPI614_1) - fld.d $fa0, $a6, %pc_lo12(.LCPI614_1) - addi.d $a4, $a4, 1 - movgr2fr.w $fa1, $a4 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movfr2gr.s $a6, $fa0 + addi.d $a5, $a5, 1 + movgr2fr.w $fa0, $a5 + ffint.d.w $fa0, $fa0 + lu12i.w $a5, 161061 + ori $a5, $a5, 1121 + lu32i.d $a5, -91744 + lu52i.d $a5, $a5, 1027 + movgr2fr.d $fa1, $a5 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a4, $fa0 - add.d $a1, $a5, $a1 - add.d $a1, $a1, $a3 - add.d $a1, $a1, $a4 - add.d $a1, $a1, $a2 - pcalau12i $a2, %pc_hi20(.LCPI614_2) - fld.d $fa0, $a2, %pc_lo12(.LCPI614_2) - movgr2fr.w $fa1, $a1 - ld.bu $a1, $sp, 49 - ffint.d.w $fa1, $fa1 - fadd.d $fs0, $fa1, $fa0 + movfr2gr.s $a5, $fa0 + add.d $a3, $a3, $a4 + add.d $a2, $a3, $a2 + add.d $a2, $a2, $a5 + add.d $a2, $a2, $a6 + movgr2fr.w $fa0, $a2 + ffint.d.w $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, 512512 + lu52i.d $a2, $a2, -1015 + ld.bu $a3, $sp, 49 + movgr2fr.d $fa1, $a2 + fadd.d $fs0, $fa0, $fa1 fst.d $fs0, $sp, 8 st.b $a0, $sp, 50 - beqz $a1, .LBB614_4 + beqz $a3, .LBB614_4 # %bb.8: ld.w $a0, $sp, 28 + ld.w $a2, $sp, 32 movgr2fr.w $fa0, $a0 - ld.w $a0, $sp, 32 - pcalau12i $a1, %pc_hi20(.LCPI614_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI614_3) ffint.d.w $fa0, $fa0 - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fmul.d $fa1, $fa2, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI614_4) - fld.d $fa2, $a0, %pc_lo12(.LCPI614_4) - fld.d $fa3, $sp, 40 - pcalau12i $a0, %pc_hi20(.LCPI614_5) - fld.d $fa4, $a0, %pc_lo12(.LCPI614_5) - fmadd.d $fa0, $fa0, $fa2, $fa1 - ld.bu $a0, $sp, 51 - fadd.d $fa0, $fa3, $fa0 - fdiv.d $fa0, $fa0, $fa4 + movgr2fr.w $fa1, $a2 + ffint.d.w $fa1, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -131072 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fa2, $a0 + fmul.d $fa1, $fa1, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, -253952 + fld.d $fa2, $sp, 40 + lu52i.d $a0, $a0, 1034 + movgr2fr.d $fa3, $a0 + fmadd.d $fa0, $fa0, $fa3, $fa1 + fadd.d $fa0, $fa2, $fa0 + lu32i.d $a1, 333824 + lu52i.d $a0, $a1, 1039 + ld.bu $a1, $sp, 51 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 fadd.d $fs0, $fs0, $fa0 fst.d $fs0, $sp, 8 - beqz $a0, .LBB614_3 + beqz $a1, .LBB614_3 # %bb.9: ld.w $a0, $sp, 36 ori $a1, $zero, 60 - pcalau12i $a2, %pc_hi20(.LCPI614_6) - fld.d $fa0, $a2, %pc_lo12(.LCPI614_6) mul.d $a0, $a0, $a1 - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, 333824 + lu52i.d $a0, $a0, -1009 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 fadd.d $fs0, $fs0, $fa0 fst.d $fs0, $sp, 8 st.h $zero, $sp, 48 @@ -153978,20 +153905,7 @@ dateFunc: # @dateFunc .Lfunc_end615: .size dateFunc, .Lfunc_end615-dateFunc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function timeFunc -.LCPI616_0: - .dword 0x4076d40000000000 # double 365.25 -.LCPI616_1: - .dword 0x403e99a027525461 # double 30.600100000000001 -.LCPI616_2: - .dword 0xc097d20000000000 # double -1524.5 -.LCPI616_3: - .dword 0x4194997000000000 # double 8.64E+7 -.LCPI616_4: - .dword 0x3f50624dd2f1a9fc # double 0.001 - .text - .p2align 5 + .p2align 5 # -- Begin function timeFunc .type timeFunc,@function timeFunc: # @timeFunc # %bb.0: @@ -154058,33 +153972,40 @@ timeFunc: # @timeFunc srli.d $a6, $a3, 63 srai.d $a3, $a3, 39 add.d $a3, $a3, $a6 - pcalau12i $a6, %pc_hi20(.LCPI616_0) - fld.d $fa0, $a6, %pc_lo12(.LCPI616_0) lu12i.w $a6, 1 ori $a6, $a6, 620 add.d $a2, $a2, $a6 - movgr2fr.w $fa1, $a2 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a2 + ffint.d.w $fa0, $fa0 + ori $a2, $zero, 0 + ori $a6, $zero, 0 + lu32i.d $a6, 447488 + lu52i.d $a6, $a6, 1031 + movgr2fr.d $fa1, $a6 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a2, $fa0 - pcalau12i $a6, %pc_hi20(.LCPI616_1) - fld.d $fa0, $a6, %pc_lo12(.LCPI616_1) + movfr2gr.s $a6, $fa0 addi.d $a4, $a4, 1 - movgr2fr.w $fa1, $a4 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a4 + ffint.d.w $fa0, $fa0 + lu12i.w $a4, 161061 + ori $a4, $a4, 1121 + lu32i.d $a4, -91744 + lu52i.d $a4, $a4, 1027 + movgr2fr.d $fa1, $a4 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a4, $fa0 add.d $a1, $a5, $a1 add.d $a1, $a1, $a3 add.d $a1, $a1, $a4 - pcalau12i $a3, %pc_hi20(.LCPI616_2) - fld.d $fa0, $a3, %pc_lo12(.LCPI616_2) - add.d $a1, $a1, $a2 - movgr2fr.w $fa1, $a1 - ffint.d.w $fa1, $fa1 - fadd.d $fa0, $fa1, $fa0 + add.d $a1, $a1, $a6 + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 + lu32i.d $a2, 512512 + lu52i.d $a1, $a2, -1015 + movgr2fr.d $fa1, $a1 + fadd.d $fa0, $fa0, $fa1 fst.d $fa0, $sp, 120 st.b $a0, $sp, 162 .LBB616_6: # %computeJD.exit @@ -154092,18 +154013,23 @@ timeFunc: # @timeFunc fadd.d $fa0, $fa0, $fa1 ftintrz.w.d $fa2, $fa0 movfr2gr.s $a0, $fa2 - pcalau12i $a1, %pc_hi20(.LCPI616_3) - fld.d $fa2, $a1, %pc_lo12(.LCPI616_3) - movgr2fr.w $fa3, $a0 - ffint.d.w $fa3, $fa3 - fsub.d $fa0, $fa0, $fa3 + movgr2fr.w $fa2, $a0 + ffint.d.w $fa2, $fa2 + fsub.d $fa0, $fa0, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, 301424 + lu52i.d $a0, $a0, 1049 + movgr2fr.d $fa2, $a0 fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI616_4) - fld.d $fa1, $a0, %pc_lo12(.LCPI616_4) movfr2gr.s $a0, $fa0 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a1, $fa1 @@ -154186,20 +154112,7 @@ timeFunc: # @timeFunc .Lfunc_end616: .size timeFunc, .Lfunc_end616-timeFunc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function datetimeFunc -.LCPI617_0: - .dword 0x4076d40000000000 # double 365.25 -.LCPI617_1: - .dword 0x403e99a027525461 # double 30.600100000000001 -.LCPI617_2: - .dword 0xc097d20000000000 # double -1524.5 -.LCPI617_3: - .dword 0x4194997000000000 # double 8.64E+7 -.LCPI617_4: - .dword 0x3f50624dd2f1a9fc # double 0.001 - .text - .p2align 5 + .p2align 5 # -- Begin function datetimeFunc .type datetimeFunc,@function datetimeFunc: # @datetimeFunc # %bb.0: @@ -154269,33 +154182,40 @@ datetimeFunc: # @datetimeFunc srli.d $a6, $a3, 63 srai.d $a3, $a3, 39 add.d $a3, $a3, $a6 - pcalau12i $a6, %pc_hi20(.LCPI617_0) - fld.d $fa0, $a6, %pc_lo12(.LCPI617_0) lu12i.w $a6, 1 ori $a6, $a6, 620 add.d $a2, $a2, $a6 - movgr2fr.w $fa1, $a2 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a2 + ffint.d.w $fa0, $fa0 + ori $a2, $zero, 0 + ori $a6, $zero, 0 + lu32i.d $a6, 447488 + lu52i.d $a6, $a6, 1031 + movgr2fr.d $fa1, $a6 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a2, $fa0 - pcalau12i $a6, %pc_hi20(.LCPI617_1) - fld.d $fa0, $a6, %pc_lo12(.LCPI617_1) + movfr2gr.s $a6, $fa0 addi.d $a4, $a4, 1 - movgr2fr.w $fa1, $a4 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a4 + ffint.d.w $fa0, $fa0 + lu12i.w $a4, 161061 + ori $a4, $a4, 1121 + lu32i.d $a4, -91744 + lu52i.d $a4, $a4, 1027 + movgr2fr.d $fa1, $a4 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a4, $fa0 add.d $a1, $a5, $a1 add.d $a1, $a1, $a3 add.d $a1, $a1, $a4 - pcalau12i $a3, %pc_hi20(.LCPI617_2) - fld.d $fa0, $a3, %pc_lo12(.LCPI617_2) - add.d $a1, $a1, $a2 - movgr2fr.w $fa1, $a1 - ffint.d.w $fa1, $fa1 - fadd.d $fa0, $fa1, $fa0 + add.d $a1, $a1, $a6 + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 + lu32i.d $a2, 512512 + lu52i.d $a1, $a2, -1015 + movgr2fr.d $fa1, $a1 + fadd.d $fa0, $fa0, $fa1 fst.d $fa0, $sp, 120 st.b $a0, $sp, 162 .LBB617_6: # %computeJD.exit @@ -154303,18 +154223,23 @@ datetimeFunc: # @datetimeFunc fadd.d $fa0, $fa0, $fa1 ftintrz.w.d $fa2, $fa0 movfr2gr.s $a0, $fa2 - pcalau12i $a1, %pc_hi20(.LCPI617_3) - fld.d $fa2, $a1, %pc_lo12(.LCPI617_3) - movgr2fr.w $fa3, $a0 - ffint.d.w $fa3, $fa3 - fsub.d $fa0, $fa0, $fa3 + movgr2fr.w $fa2, $a0 + ffint.d.w $fa2, $fa2 + fsub.d $fa0, $fa0, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, 301424 + lu52i.d $a0, $a0, 1049 + movgr2fr.d $fa2, $a0 fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI617_4) - fld.d $fa1, $a0, %pc_lo12(.LCPI617_4) movfr2gr.s $a0, $fa0 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a1, $fa1 @@ -154401,53 +154326,29 @@ datetimeFunc: # @datetimeFunc .Lfunc_end617: .size datetimeFunc, .Lfunc_end617-datetimeFunc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function strftimeFunc -.LCPI618_0: - .dword 0x4076d40000000000 # double 365.25 -.LCPI618_1: - .dword 0x403e99a027525461 # double 30.600100000000001 -.LCPI618_2: - .dword 0xc097d20000000000 # double -1524.5 -.LCPI618_3: - .dword 0x404e000000000000 # double 60 -.LCPI618_4: - .dword 0x40ac200000000000 # double 3600 -.LCPI618_5: - .dword 0x40f5180000000000 # double 86400 -.LCPI618_6: - .dword 0xc0f5180000000000 # double -86400 -.LCPI618_7: - .dword 0x4194997000000000 # double 8.64E+7 -.LCPI618_8: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI618_9: - .dword 0xc1429ec5c0000000 # double -2440587.5 -.LCPI618_10: - .dword 0x404dffdf3b645a1d # double 59.999000000000002 - .text - .p2align 5 + .p2align 5 # -- Begin function strftimeFunc .type strftimeFunc,@function strftimeFunc: # @strftimeFunc # %bb.0: - addi.d $sp, $sp, -352 - st.d $ra, $sp, 344 # 8-byte Folded Spill - st.d $fp, $sp, 336 # 8-byte Folded Spill - st.d $s0, $sp, 328 # 8-byte Folded Spill - st.d $s1, $sp, 320 # 8-byte Folded Spill - st.d $s2, $sp, 312 # 8-byte Folded Spill - st.d $s3, $sp, 304 # 8-byte Folded Spill - st.d $s4, $sp, 296 # 8-byte Folded Spill - st.d $s5, $sp, 288 # 8-byte Folded Spill - st.d $s6, $sp, 280 # 8-byte Folded Spill - st.d $s7, $sp, 272 # 8-byte Folded Spill - st.d $s8, $sp, 264 # 8-byte Folded Spill - fst.d $fs0, $sp, 256 # 8-byte Folded Spill - fst.d $fs1, $sp, 248 # 8-byte Folded Spill - fst.d $fs2, $sp, 240 # 8-byte Folded Spill - fst.d $fs3, $sp, 232 # 8-byte Folded Spill - fst.d $fs4, $sp, 224 # 8-byte Folded Spill - fst.d $fs5, $sp, 216 # 8-byte Folded Spill + addi.d $sp, $sp, -368 + st.d $ra, $sp, 360 # 8-byte Folded Spill + st.d $fp, $sp, 352 # 8-byte Folded Spill + st.d $s0, $sp, 344 # 8-byte Folded Spill + st.d $s1, $sp, 336 # 8-byte Folded Spill + st.d $s2, $sp, 328 # 8-byte Folded Spill + st.d $s3, $sp, 320 # 8-byte Folded Spill + st.d $s4, $sp, 312 # 8-byte Folded Spill + st.d $s5, $sp, 304 # 8-byte Folded Spill + st.d $s6, $sp, 296 # 8-byte Folded Spill + st.d $s7, $sp, 288 # 8-byte Folded Spill + st.d $s8, $sp, 280 # 8-byte Folded Spill + fst.d $fs0, $sp, 272 # 8-byte Folded Spill + fst.d $fs1, $sp, 264 # 8-byte Folded Spill + fst.d $fs2, $sp, 256 # 8-byte Folded Spill + fst.d $fs3, $sp, 248 # 8-byte Folded Spill + fst.d $fs4, $sp, 240 # 8-byte Folded Spill + fst.d $fs5, $sp, 232 # 8-byte Folded Spill + fst.d $fs6, $sp, 224 # 8-byte Folded Spill move $s1, $a2 ld.d $a2, $a2, 0 move $s2, $a1 @@ -154456,16 +154357,16 @@ strftimeFunc: # @strftimeFunc move $a0, $a2 pcaddu18i $ra, %call36(sqlite3ValueText) jirl $ra, $ra, 0 - beqz $a0, .LBB618_60 + beqz $a0, .LBB618_61 # %bb.1: move $s0, $a0 addi.w $a1, $s2, -1 addi.d $a2, $s1, 8 - addi.d $a3, $sp, 168 + addi.d $a3, $sp, 176 move $a0, $s3 pcaddu18i $ra, %call36(isDate) jirl $ra, $ra, 0 - bnez $a0, .LBB618_60 + bnez $a0, .LBB618_61 # %bb.2: # %.preheader.preheader ori $a0, $zero, 1 ori $a1, $zero, 37 @@ -154489,7 +154390,7 @@ strftimeFunc: # @strftimeFunc addi.w $a4, $a4, 1 ldx.bu $a5, $s0, $a4 addi.d $a5, $a5, -37 - bltu $a2, $a5, .LBB618_60 + bltu $a2, $a5, .LBB618_61 # %bb.7: # in Loop: Header=BB618_5 Depth=1 slli.d $a5, $a5, 2 ldx.w $a5, $a3, $a5 @@ -154509,7 +154410,7 @@ strftimeFunc: # @strftimeFunc b .LBB618_4 .LBB618_12: ori $a1, $zero, 100 - addi.d $s1, $sp, 68 + addi.d $s1, $sp, 76 bltu $a0, $a1, .LBB618_16 # %bb.13: lu12i.w $a1, 244140 @@ -154524,7 +154425,7 @@ strftimeFunc: # @strftimeFunc lu52i.d $a0, $a0, 16 st.d $a0, $s3, 48 st.d $fp, $s3, 16 - b .LBB618_60 + b .LBB618_61 .LBB618_15: pcaddu18i $ra, %call36(sqlite3_malloc) jirl $ra, $ra, 0 @@ -154532,133 +154433,139 @@ strftimeFunc: # @strftimeFunc beqz $a0, .LBB618_23 .LBB618_16: st.d $s3, $sp, 8 # 8-byte Folded Spill - ld.bu $a0, $sp, 210 - lu12i.w $t1, -335545 - lu12i.w $t2, 335544 - lu12i.w $t3, 1 - pcalau12i $s4, %pc_hi20(.LCPI618_0) - pcalau12i $s5, %pc_hi20(.LCPI618_2) - pcalau12i $s6, %pc_hi20(.LCPI618_3) - pcalau12i $fp, %pc_hi20(.LCPI618_4) - pcalau12i $t0, %pc_hi20(.LCPI618_5) - pcalau12i $a7, %pc_hi20(.LCPI618_6) + ld.bu $a0, $sp, 218 + lu12i.w $fp, -335545 + lu12i.w $s4, 335544 + lu12i.w $s5, 1 beqz $a0, .LBB618_20 .LBB618_17: # %computeJD.exit - addi.d $a0, $sp, 168 - move $s2, $a7 - move $s3, $t0 - move $s7, $t1 - move $s8, $t2 + addi.d $a0, $sp, 176 pcaddu18i $ra, %call36(computeYMD) jirl $ra, $ra, 0 - lu12i.w $t3, 1 - move $t2, $s8 - move $t1, $s7 - move $t0, $s3 - move $a7, $s2 - ld.bu $a0, $sp, 209 + ld.bu $a0, $sp, 217 bnez $a0, .LBB618_26 # %bb.18: - ld.bu $a0, $sp, 210 + ld.bu $a0, $sp, 218 beqz $a0, .LBB618_24 # %bb.19: # %.computeJD.exit131_crit_edge - fld.d $fa0, $sp, 168 + fld.d $fa0, $sp, 176 b .LBB618_25 .LBB618_20: - ld.bu $a0, $sp, 208 - ld.w $a1, $sp, 184 - sltui $a2, $a0, 1 - ld.w $a3, $sp, 176 - ld.w $a0, $sp, 180 + ld.bu $a0, $sp, 216 + ld.w $a1, $sp, 192 + sltui $a0, $a0, 1 + ld.w $a2, $sp, 184 + ld.w $a3, $sp, 188 addi.w $a1, $a1, 2 ori $a4, $zero, 3 - maskeqz $a4, $a4, $a2 - masknez $a1, $a1, $a2 - or $a1, $a4, $a1 - masknez $a4, $a0, $a2 - ori $a0, $zero, 1 - maskeqz $a5, $a0, $a2 - or $a4, $a5, $a4 - masknez $a3, $a3, $a2 + maskeqz $a4, $a4, $a0 + masknez $a1, $a1, $a0 + or $a4, $a4, $a1 + masknez $a3, $a3, $a0 + ori $a1, $zero, 1 + maskeqz $a5, $a1, $a0 + or $a3, $a5, $a3 + masknez $a2, $a2, $a0 ori $a5, $zero, 2000 - maskeqz $a2, $a5, $a2 - or $a2, $a2, $a3 - slti $a3, $a4, 3 - addi.d $a5, $a4, 12 - masknez $a4, $a4, $a3 - maskeqz $a5, $a5, $a3 - or $a4, $a5, $a4 - sub.d $a2, $a2, $a3 - addi.w $a3, $a2, 0 - ori $a5, $t1, 2785 - mul.d $a5, $a3, $a5 + maskeqz $a0, $a5, $a0 + or $a0, $a0, $a2 + slti $a2, $a3, 3 + addi.d $a5, $a3, 12 + masknez $a3, $a3, $a2 + maskeqz $a5, $a5, $a2 + or $a3, $a5, $a3 + sub.d $a0, $a0, $a2 + addi.w $a2, $a0, 0 + ori $a5, $fp, 2785 + mul.d $a5, $a2, $a5 srli.d $a6, $a5, 63 srai.d $a5, $a5, 37 add.d $a5, $a5, $a6 - ori $a6, $t2, 1311 - mul.d $a3, $a3, $a6 - srli.d $a6, $a3, 63 - srai.d $a3, $a3, 39 - add.d $a3, $a3, $a6 - ori $a6, $t3, 620 - fld.d $fa0, $s4, %pc_lo12(.LCPI618_0) + ori $a6, $s4, 1311 + mul.d $a2, $a2, $a6 + srli.d $a6, $a2, 63 + srai.d $a2, $a2, 39 add.d $a2, $a2, $a6 - movgr2fr.w $fa1, $a2 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + ori $a6, $s5, 620 + add.d $a0, $a0, $a6 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + ori $a0, $zero, 0 + ori $a6, $zero, 0 + lu32i.d $a6, 447488 + lu52i.d $a6, $a6, 1031 + movgr2fr.d $fa1, $a6 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a2, $fa0 - pcalau12i $a6, %pc_hi20(.LCPI618_1) - fld.d $fa0, $a6, %pc_lo12(.LCPI618_1) - addi.d $a4, $a4, 1 - movgr2fr.w $fa1, $a4 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movfr2gr.s $a6, $fa0 + addi.d $a3, $a3, 1 + movgr2fr.w $fa0, $a3 + ffint.d.w $fa0, $fa0 + lu12i.w $a3, 161061 + ori $a3, $a3, 1121 + lu32i.d $a3, -91744 + lu52i.d $a3, $a3, 1027 + movgr2fr.d $fa1, $a3 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a4, $fa0 - add.d $a1, $a5, $a1 - add.d $a1, $a1, $a3 - add.d $a1, $a1, $a4 - add.d $a1, $a1, $a2 - fld.d $fa0, $s5, %pc_lo12(.LCPI618_2) - movgr2fr.w $fa1, $a1 - ld.bu $a1, $sp, 209 - ffint.d.w $fa1, $fa1 - fadd.d $fa0, $fa1, $fa0 - fst.d $fa0, $sp, 168 - st.b $a0, $sp, 210 - beqz $a1, .LBB618_17 + movfr2gr.s $a3, $fa0 + add.d $a4, $a5, $a4 + add.d $a2, $a4, $a2 + add.d $a2, $a2, $a3 + add.d $a2, $a2, $a6 + movgr2fr.w $fa0, $a2 + ffint.d.w $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, 512512 + lu52i.d $a2, $a2, -1015 + ld.bu $a3, $sp, 217 + movgr2fr.d $fa1, $a2 + fadd.d $fa0, $fa0, $fa1 + fst.d $fa0, $sp, 176 + st.b $a1, $sp, 218 + beqz $a3, .LBB618_17 # %bb.21: - ld.w $a0, $sp, 188 - ld.w $a1, $sp, 192 - movgr2fr.w $fa1, $a0 + ld.w $a1, $sp, 196 + ld.w $a2, $sp, 200 + movgr2fr.w $fa1, $a1 ffint.d.w $fa1, $fa1 - fld.d $fa2, $s6, %pc_lo12(.LCPI618_3) - movgr2fr.w $fa3, $a1 - ffint.d.w $fa3, $fa3 - fld.d $fa4, $fp, %pc_lo12(.LCPI618_4) - fmul.d $fa2, $fa3, $fa2 - fld.d $fa3, $sp, 200 - fld.d $fa5, $t0, %pc_lo12(.LCPI618_5) + movgr2fr.w $fa2, $a2 + ffint.d.w $fa2, $fa2 + ori $a1, $zero, 0 + lu32i.d $a1, -131072 + lu52i.d $a1, $a1, 1028 + movgr2fr.d $fa3, $a1 + fmul.d $fa2, $fa2, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -253952 + fld.d $fa3, $sp, 208 + lu52i.d $a1, $a1, 1034 + movgr2fr.d $fa4, $a1 fmadd.d $fa1, $fa1, $fa4, $fa2 - ld.bu $a0, $sp, 211 fadd.d $fa1, $fa3, $fa1 - fdiv.d $fa1, $fa1, $fa5 + lu32i.d $a0, 333824 + lu52i.d $a0, $a0, 1039 + ld.bu $a1, $sp, 219 + movgr2fr.d $fa2, $a0 + fdiv.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa0, $fa1 - fst.d $fa0, $sp, 168 - beqz $a0, .LBB618_17 + fst.d $fa0, $sp, 176 + beqz $a1, .LBB618_17 # %bb.22: - ld.w $a0, $sp, 196 + ld.w $a0, $sp, 204 ori $a1, $zero, 60 - fld.d $fa1, $a7, %pc_lo12(.LCPI618_6) mul.d $a0, $a0, $a1 - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, 333824 + lu52i.d $a0, $a0, -1009 + movgr2fr.d $fa2, $a0 + fdiv.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa0, $fa1 - fst.d $fa0, $sp, 168 - st.h $zero, $sp, 208 - st.b $zero, $sp, 211 + fst.d $fa0, $sp, 176 + st.h $zero, $sp, 216 + st.b $zero, $sp, 219 b .LBB618_17 .LBB618_23: ld.hu $a0, $s3, 52 @@ -154673,13 +154580,13 @@ strftimeFunc: # @strftimeFunc ori $a0, $zero, 7 st.w $a0, $s3, 72 st.b $a1, $a2, 42 - b .LBB618_60 + b .LBB618_61 .LBB618_24: - ld.bu $a0, $sp, 208 - ld.w $a1, $sp, 184 + ld.bu $a0, $sp, 216 + ld.w $a1, $sp, 192 sltui $a2, $a0, 1 - ld.w $a3, $sp, 176 - ld.w $a0, $sp, 180 + ld.w $a3, $sp, 184 + ld.w $a0, $sp, 188 addi.w $a1, $a1, 2 ori $a4, $zero, 3 maskeqz $a4, $a4, $a2 @@ -154700,59 +154607,73 @@ strftimeFunc: # @strftimeFunc or $a4, $a5, $a4 sub.d $a2, $a2, $a3 addi.w $a3, $a2, 0 - ori $a5, $t1, 2785 + ori $a5, $fp, 2785 mul.d $a5, $a3, $a5 srli.d $a6, $a5, 63 srai.d $a5, $a5, 37 add.d $a5, $a5, $a6 - ori $a6, $t2, 1311 + ori $a6, $s4, 1311 mul.d $a3, $a3, $a6 srli.d $a6, $a3, 63 srai.d $a3, $a3, 39 add.d $a3, $a3, $a6 - ori $a6, $t3, 620 - fld.d $fa0, $s4, %pc_lo12(.LCPI618_0) + ori $a6, $s5, 620 add.d $a2, $a2, $a6 - movgr2fr.w $fa1, $a2 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a2 + ffint.d.w $fa0, $fa0 + ori $a2, $zero, 0 + ori $a6, $zero, 0 + lu32i.d $a6, 447488 + lu52i.d $a6, $a6, 1031 + movgr2fr.d $fa1, $a6 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a2, $fa0 - pcalau12i $a6, %pc_hi20(.LCPI618_1) - fld.d $fa0, $a6, %pc_lo12(.LCPI618_1) + movfr2gr.s $a6, $fa0 addi.d $a4, $a4, 1 - movgr2fr.w $fa1, $a4 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a4 + ffint.d.w $fa0, $fa0 + lu12i.w $a4, 161061 + ori $a4, $a4, 1121 + lu32i.d $a4, -91744 + lu52i.d $a4, $a4, 1027 + movgr2fr.d $fa1, $a4 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a4, $fa0 add.d $a1, $a5, $a1 add.d $a1, $a1, $a3 add.d $a1, $a1, $a4 - fld.d $fa0, $s5, %pc_lo12(.LCPI618_2) - add.d $a1, $a1, $a2 - movgr2fr.w $fa1, $a1 - ffint.d.w $fa1, $fa1 - fadd.d $fa0, $fa1, $fa0 - fst.d $fa0, $sp, 168 - st.b $a0, $sp, 210 + add.d $a1, $a1, $a6 + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 + lu32i.d $a2, 512512 + lu52i.d $a1, $a2, -1015 + movgr2fr.d $fa1, $a1 + fadd.d $fa0, $fa0, $fa1 + fst.d $fa0, $sp, 176 + st.b $a0, $sp, 218 .LBB618_25: # %computeJD.exit131 vldi $vr1, -928 fadd.d $fa0, $fa0, $fa1 ftintrz.w.d $fa2, $fa0 movfr2gr.s $a0, $fa2 - pcalau12i $a1, %pc_hi20(.LCPI618_7) - fld.d $fa2, $a1, %pc_lo12(.LCPI618_7) - movgr2fr.w $fa3, $a0 - ffint.d.w $fa3, $fa3 - fsub.d $fa0, $fa0, $fa3 + movgr2fr.w $fa2, $a0 + ffint.d.w $fa2, $fa2 + fsub.d $fa0, $fa0, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, 301424 + lu52i.d $a0, $a0, 1049 + movgr2fr.d $fa2, $a0 fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI618_8) - fld.d $fa1, $a0, %pc_lo12(.LCPI618_8) movfr2gr.s $a0, $fa0 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a0, $fa1 @@ -154767,7 +154688,7 @@ strftimeFunc: # @strftimeFunc bstrpick.d $a2, $a1, 31, 31 srai.d $a1, $a1, 11 add.d $a1, $a1, $a2 - st.w $a1, $sp, 188 + st.w $a1, $sp, 196 lu12i.w $a2, -1 ori $a2, $a2, 496 mul.d $a1, $a1, $a2 @@ -154781,16 +154702,16 @@ strftimeFunc: # @strftimeFunc bstrpick.d $a2, $a1, 31, 31 srli.d $a1, $a1, 5 add.d $a1, $a1, $a2 - st.w $a1, $sp, 192 + st.w $a1, $sp, 200 addi.w $a2, $zero, -60 mul.d $a1, $a1, $a2 add.d $a0, $a1, $a0 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 fadd.d $fa0, $fa0, $fa1 - fst.d $fa0, $sp, 200 + fst.d $fa0, $sp, 208 ori $a0, $zero, 1 - st.b $a0, $sp, 209 + st.b $a0, $sp, 217 .LBB618_26: # %computeYMD_HMS.exit ori $s7, $zero, 37 ori $s8, $zero, 47 @@ -154800,37 +154721,61 @@ strftimeFunc: # @strftimeFunc addi.d $s2, $a0, %pc_lo12(.L.str.644) pcalau12i $a0, %pc_hi20(.L.str.190) addi.d $a0, $a0, %pc_lo12(.L.str.190) - st.d $a0, $sp, 32 # 8-byte Folded Spill - ori $a0, $t1, 2785 + st.d $a0, $sp, 40 # 8-byte Folded Spill + ori $a0, $fp, 2785 + st.d $a0, $sp, 64 # 8-byte Folded Spill + ori $a0, $s4, 1311 st.d $a0, $sp, 56 # 8-byte Folded Spill - ori $a0, $t2, 1311 + ori $a0, $s5, 620 st.d $a0, $sp, 48 # 8-byte Folded Spill - ori $a0, $t3, 620 - st.d $a0, $sp, 40 # 8-byte Folded Spill - fld.d $fs0, $s4, %pc_lo12(.LCPI618_0) - fld.d $fs1, $s5, %pc_lo12(.LCPI618_2) + ori $a1, $zero, 0 + ori $a0, $zero, 0 + lu32i.d $a0, 447488 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fs0, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, 512512 + lu52i.d $a0, $a0, -1015 + movgr2fr.d $fs1, $a0 vldi $vr4, -928 lu12i.w $a0, -449390 - ori $s5, $a0, 1171 + ori $s4, $a0, 1171 pcalau12i $a0, %pc_hi20(.L.str.646) addi.d $a0, $a0, %pc_lo12(.L.str.646) - st.d $a0, $sp, 24 # 8-byte Folded Spill - fld.d $fs2, $s6, %pc_lo12(.LCPI618_3) - fld.d $fs3, $fp, %pc_lo12(.LCPI618_4) - fld.d $fs4, $t0, %pc_lo12(.LCPI618_5) - fld.d $fs5, $a7, %pc_lo12(.LCPI618_6) + st.d $a0, $sp, 32 # 8-byte Folded Spill + ori $a0, $zero, 0 + lu32i.d $a0, -131072 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs2, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, -253952 + lu52i.d $a0, $a0, 1034 + movgr2fr.d $fs3, $a0 + lu32i.d $a1, 333824 + st.d $a1, $sp, 16 # 8-byte Folded Spill + lu52i.d $a0, $a1, 1039 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.647) addi.d $a0, $a0, %pc_lo12(.L.str.647) - st.d $a0, $sp, 16 # 8-byte Folded Spill - move $s4, $zero + st.d $a0, $sp, 24 # 8-byte Folded Spill + move $s5, $zero move $fp, $zero + lu12i.w $a0, 243269 + ori $a0, $a0, 2589 + lu32i.d $a0, -131105 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs5, $a0 + lu12i.w $a0, -262144 + lu32i.d $a0, 171717 + lu52i.d $a0, $a0, -1004 + movgr2fr.d $fs6, $a0 b .LBB618_29 .LBB618_27: # in Loop: Header=BB618_29 Depth=1 - ld.d $a3, $sp, 168 - add.d $s6, $s1, $s4 + ld.d $a3, $sp, 176 + add.d $s6, $s1, $s5 ori $a0, $zero, 20 move $a1, $s6 - ld.d $a2, $sp, 32 # 8-byte Folded Reload + ld.d $a2, $sp, 40 # 8-byte Folded Reload .LBB618_28: # in Loop: Header=BB618_29 Depth=1 pcaddu18i $ra, %call36(sqlite3_snprintf) jirl $ra, $ra, 0 @@ -154838,18 +154783,18 @@ strftimeFunc: # @strftimeFunc pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 vldi $vr4, -928 - add.w $s4, $s4, $a0 + add.w $s5, $s5, $a0 addi.w $fp, $fp, 1 .p2align 4, , 16 .LBB618_29: # =>This Inner Loop Header: Depth=1 ldx.bu $a0, $s0, $fp beq $a0, $s7, .LBB618_32 # %bb.30: # in Loop: Header=BB618_29 Depth=1 - beqz $a0, .LBB618_54 + beqz $a0, .LBB618_55 .LBB618_31: # in Loop: Header=BB618_29 Depth=1 - addi.w $a1, $s4, 1 - stx.b $a0, $s1, $s4 - move $s4, $a1 + addi.w $a1, $s5, 1 + stx.b $a0, $s1, $s5 + move $s5, $a1 addi.w $fp, $fp, 1 b .LBB618_29 .p2align 4, , 16 @@ -154857,33 +154802,33 @@ strftimeFunc: # @strftimeFunc addi.w $fp, $fp, 1 ldx.bu $a0, $s0, $fp addi.d $a1, $a0, -72 - bltu $s8, $a1, .LBB618_53 + bltu $s8, $a1, .LBB618_54 # %bb.33: # in Loop: Header=BB618_29 Depth=1 slli.d $a1, $a1, 2 ldx.w $a1, $s3, $a1 add.d $a1, $s3, $a1 jr $a1 .LBB618_34: # in Loop: Header=BB618_29 Depth=1 - ld.bu $a1, $sp, 208 - ld.w $a2, $sp, 176 - ld.bu $a3, $sp, 209 + ld.bu $a1, $sp, 216 + ld.w $a2, $sp, 184 + ld.bu $a3, $sp, 217 sltui $a1, $a1, 1 addi.w $a2, $a2, -1 ori $a4, $zero, 1999 maskeqz $a4, $a4, $a1 masknez $a1, $a2, $a1 or $a1, $a4, $a1 - ld.d $a2, $sp, 56 # 8-byte Folded Reload + ld.d $a2, $sp, 64 # 8-byte Folded Reload mul.d $a2, $a1, $a2 srli.d $a4, $a2, 63 srai.d $a2, $a2, 37 add.d $a2, $a2, $a4 - ld.d $a4, $sp, 48 # 8-byte Folded Reload + ld.d $a4, $sp, 56 # 8-byte Folded Reload mul.d $a4, $a1, $a4 srli.d $a5, $a4, 63 srai.d $a4, $a4, 39 add.d $a4, $a4, $a5 - ld.d $a5, $sp, 40 # 8-byte Folded Reload + ld.d $a5, $sp, 48 # 8-byte Folded Reload add.d $a1, $a1, $a5 movgr2fr.w $fa0, $a1 ffint.d.w $fa0, $fa0 @@ -154898,10 +154843,10 @@ strftimeFunc: # @strftimeFunc fadd.d $fa0, $fa0, $fs1 beqz $a3, .LBB618_37 # %bb.35: # in Loop: Header=BB618_29 Depth=1 - ld.w $a1, $sp, 188 - ld.bu $a2, $sp, 211 - ld.w $a3, $sp, 192 - fld.d $fa1, $sp, 200 + ld.w $a1, $sp, 196 + ld.bu $a2, $sp, 219 + ld.w $a3, $sp, 200 + fld.d $fa1, $sp, 208 movgr2fr.w $fa2, $a1 ffint.d.w $fa2, $fa2 movgr2fr.w $fa3, $a3 @@ -154913,16 +154858,19 @@ strftimeFunc: # @strftimeFunc fadd.d $fa0, $fa0, $fa1 beqz $a2, .LBB618_37 # %bb.36: # in Loop: Header=BB618_29 Depth=1 - ld.w $a1, $sp, 196 + ld.w $a1, $sp, 204 ori $a2, $zero, 60 mul.d $a1, $a1, $a2 movgr2fr.w $fa1, $a1 ffint.d.w $fa1, $fa1 - fdiv.d $fa1, $fa1, $fs5 + ld.d $a1, $sp, 16 # 8-byte Folded Reload + lu52i.d $a1, $a1, -1009 + movgr2fr.d $fa2, $a1 + fdiv.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa0, $fa1 .LBB618_37: # %computeJD.exit119 # in Loop: Header=BB618_29 Depth=1 - fld.d $fa1, $sp, 168 + fld.d $fa1, $sp, 176 fsub.d $fa0, $fa1, $fa0 fadd.d $fa0, $fa0, $fa4 ftintrz.w.d $fa0, $fa0 @@ -154933,7 +154881,7 @@ strftimeFunc: # @strftimeFunc fadd.d $fa0, $fa1, $fa4 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - mul.d $a1, $a0, $s5 + mul.d $a1, $a0, $s4 srli.d $a1, $a1, 32 add.w $a1, $a1, $a0 bstrpick.d $a3, $a1, 31, 31 @@ -154941,11 +154889,11 @@ strftimeFunc: # @strftimeFunc add.d $a1, $a1, $a3 slli.d $a3, $a1, 3 sub.d $a3, $a3, $a1 - add.d $a1, $s1, $s4 + add.d $a1, $s1, $s5 sub.d $a0, $a3, $a0 add.d $a0, $a2, $a0 addi.w $a0, $a0, 7 - mul.d $a2, $a0, $s5 + mul.d $a2, $a0, $s4 srli.d $a2, $a2, 32 add.w $a0, $a2, $a0 bstrpick.d $a2, $a0, 31, 31 @@ -154953,28 +154901,26 @@ strftimeFunc: # @strftimeFunc add.d $a3, $a0, $a2 b .LBB618_48 .LBB618_39: # in Loop: Header=BB618_29 Depth=1 - fld.d $fa0, $sp, 168 - pcalau12i $a0, %pc_hi20(.LCPI618_9) - fld.d $fa1, $a0, %pc_lo12(.LCPI618_9) - add.d $s6, $s1, $s4 - fadd.d $fa0, $fa0, $fa1 + fld.d $fa0, $sp, 176 + add.d $s6, $s1, $s5 + fadd.d $fa0, $fa0, $fs6 fmadd.d $fa0, $fa0, $fs4, $fa4 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a3, $fa0 pcalau12i $a0, %pc_hi20(.L.str.189) addi.d $a2, $a0, %pc_lo12(.L.str.189) ori $a0, $zero, 30 - b .LBB618_52 + b .LBB618_53 .LBB618_40: # in Loop: Header=BB618_29 Depth=1 - ld.w $a3, $sp, 192 - b .LBB618_45 + ld.w $a3, $sp, 200 + b .LBB618_43 .LBB618_41: # in Loop: Header=BB618_29 Depth=1 - fld.d $fa0, $sp, 168 + fld.d $fa0, $sp, 176 vldi $vr1, -904 fadd.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - mul.d $a1, $a0, $s5 + mul.d $a1, $a0, $s4 srli.d $a1, $a1, 32 add.w $a1, $a1, $a0 bstrpick.d $a2, $a1, 31, 31 @@ -154986,75 +154932,74 @@ strftimeFunc: # @strftimeFunc addi.d $a0, $a0, 48 b .LBB618_31 .LBB618_42: # in Loop: Header=BB618_29 Depth=1 - ld.w $a3, $sp, 188 - b .LBB618_45 + ld.w $a3, $sp, 196 .LBB618_43: # in Loop: Header=BB618_29 Depth=1 - ld.w $a3, $sp, 176 - add.d $s6, $s1, $s4 + add.d $a1, $s1, $s5 + b .LBB618_48 +.LBB618_44: # in Loop: Header=BB618_29 Depth=1 + ld.w $a3, $sp, 184 + add.d $s6, $s1, $s5 ori $a0, $zero, 5 move $a1, $s6 - ld.d $a2, $sp, 16 # 8-byte Folded Reload + ld.d $a2, $sp, 24 # 8-byte Folded Reload b .LBB618_28 -.LBB618_44: # in Loop: Header=BB618_29 Depth=1 - ld.w $a3, $sp, 184 .LBB618_45: # in Loop: Header=BB618_29 Depth=1 - add.d $a1, $s1, $s4 - b .LBB618_48 + ld.w $a3, $sp, 192 + b .LBB618_50 .LBB618_46: # in Loop: Header=BB618_29 Depth=1 - add.d $a1, $s1, $s4 + add.d $a1, $s1, $s5 addi.w $a3, $a2, 1 ori $a0, $zero, 4 - ld.d $a2, $sp, 24 # 8-byte Folded Reload + ld.d $a2, $sp, 32 # 8-byte Folded Reload pcaddu18i $ra, %call36(sqlite3_snprintf) jirl $ra, $ra, 0 vldi $vr4, -928 - addi.w $s4, $s4, 3 + addi.w $s5, $s5, 3 addi.w $fp, $fp, 1 b .LBB618_29 .LBB618_47: # in Loop: Header=BB618_29 Depth=1 - fld.d $fa0, $sp, 200 - add.d $a1, $s1, $s4 + fld.d $fa0, $sp, 208 + add.d $a1, $s1, $s5 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a3, $fa0 .LBB618_48: # in Loop: Header=BB618_29 Depth=1 ori $a0, $zero, 3 move $a2, $s2 + b .LBB618_51 .LBB618_49: # in Loop: Header=BB618_29 Depth=1 - pcaddu18i $ra, %call36(sqlite3_snprintf) - jirl $ra, $ra, 0 - vldi $vr4, -928 - addi.w $s4, $s4, 2 - addi.w $fp, $fp, 1 - b .LBB618_29 + ld.w $a3, $sp, 188 .LBB618_50: # in Loop: Header=BB618_29 Depth=1 - ld.w $a3, $sp, 180 - add.d $a1, $s1, $s4 + add.d $a1, $s1, $s5 pcalau12i $a0, %pc_hi20(.L.str.644) addi.d $a2, $a0, %pc_lo12(.L.str.644) ori $a0, $zero, 3 - b .LBB618_49 .LBB618_51: # in Loop: Header=BB618_29 Depth=1 - fld.d $fa0, $sp, 200 - pcalau12i $a0, %pc_hi20(.LCPI618_10) - fld.d $fa1, $a0, %pc_lo12(.LCPI618_10) - fcmp.clt.d $fcc0, $fa1, $fa0 - fsel $fa0, $fa0, $fa1, $fcc0 - add.d $s6, $s1, $s4 + pcaddu18i $ra, %call36(sqlite3_snprintf) + jirl $ra, $ra, 0 + vldi $vr4, -928 + addi.w $s5, $s5, 2 + addi.w $fp, $fp, 1 + b .LBB618_29 +.LBB618_52: # in Loop: Header=BB618_29 Depth=1 + fld.d $fa0, $sp, 208 + fcmp.clt.d $fcc0, $fs5, $fa0 + fsel $fa0, $fa0, $fs5, $fcc0 + add.d $s6, $s1, $s5 movfr2gr.d $a3, $fa0 pcalau12i $a0, %pc_hi20(.L.str.645) addi.d $a2, $a0, %pc_lo12(.L.str.645) ori $a0, $zero, 7 -.LBB618_52: # in Loop: Header=BB618_29 Depth=1 +.LBB618_53: # in Loop: Header=BB618_29 Depth=1 move $a1, $s6 b .LBB618_28 -.LBB618_53: # in Loop: Header=BB618_29 Depth=1 - addi.w $a0, $s4, 1 - stx.b $s7, $s1, $s4 - move $s4, $a0 +.LBB618_54: # in Loop: Header=BB618_29 Depth=1 + addi.w $a0, $s5, 1 + stx.b $s7, $s1, $s5 + move $s5, $a0 addi.w $fp, $fp, 1 b .LBB618_29 -.LBB618_54: - stx.b $zero, $s1, $s4 +.LBB618_55: + stx.b $zero, $s1, $s5 ld.d $s4, $sp, 8 # 8-byte Folded Reload addi.d $s2, $s4, 16 move $a0, $s1 @@ -155064,57 +155009,58 @@ strftimeFunc: # @strftimeFunc pcalau12i $a0, %pc_hi20(sqlite3_free) addi.d $fp, $a0, %pc_lo12(sqlite3_free) addi.w $a0, $zero, -1 - beq $fp, $a0, .LBB618_57 -# %bb.55: - addi.d $a0, $sp, 68 + beq $fp, $a0, .LBB618_58 +# %bb.56: + addi.d $a0, $sp, 76 xor $a0, $s1, $a0 sltui $a0, $a0, 1 - bnez $a0, .LBB618_57 -# %bb.56: + bnez $a0, .LBB618_58 +# %bb.57: move $a0, $s2 pcaddu18i $ra, %call36(sqlite3VdbeMemRelease) jirl $ra, $ra, 0 st.d $s1, $s4, 40 st.d $fp, $s4, 56 - b .LBB618_59 -.LBB618_57: + b .LBB618_60 +.LBB618_58: addi.w $s3, $s0, 1 move $a0, $s2 move $a1, $s3 move $a2, $zero pcaddu18i $ra, %call36(sqlite3VdbeMemGrow) jirl $ra, $ra, 0 - bnez $a0, .LBB618_60 -# %bb.58: + bnez $a0, .LBB618_61 +# %bb.59: ld.d $a0, $s4, 40 move $a1, $s1 move $a2, $s3 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 -.LBB618_59: +.LBB618_60: st.w $s0, $s4, 48 lu12i.w $a0, 4144 ori $a0, $a0, 98 st.w $a0, $s4, 52 -.LBB618_60: # %sqlite3_result_text.exit - fld.d $fs5, $sp, 216 # 8-byte Folded Reload - fld.d $fs4, $sp, 224 # 8-byte Folded Reload - fld.d $fs3, $sp, 232 # 8-byte Folded Reload - fld.d $fs2, $sp, 240 # 8-byte Folded Reload - fld.d $fs1, $sp, 248 # 8-byte Folded Reload - fld.d $fs0, $sp, 256 # 8-byte Folded Reload - ld.d $s8, $sp, 264 # 8-byte Folded Reload - ld.d $s7, $sp, 272 # 8-byte Folded Reload - ld.d $s6, $sp, 280 # 8-byte Folded Reload - ld.d $s5, $sp, 288 # 8-byte Folded Reload - ld.d $s4, $sp, 296 # 8-byte Folded Reload - ld.d $s3, $sp, 304 # 8-byte Folded Reload - ld.d $s2, $sp, 312 # 8-byte Folded Reload - ld.d $s1, $sp, 320 # 8-byte Folded Reload - ld.d $s0, $sp, 328 # 8-byte Folded Reload - ld.d $fp, $sp, 336 # 8-byte Folded Reload - ld.d $ra, $sp, 344 # 8-byte Folded Reload - addi.d $sp, $sp, 352 +.LBB618_61: # %sqlite3_result_text.exit + fld.d $fs6, $sp, 224 # 8-byte Folded Reload + fld.d $fs5, $sp, 232 # 8-byte Folded Reload + fld.d $fs4, $sp, 240 # 8-byte Folded Reload + fld.d $fs3, $sp, 248 # 8-byte Folded Reload + fld.d $fs2, $sp, 256 # 8-byte Folded Reload + fld.d $fs1, $sp, 264 # 8-byte Folded Reload + fld.d $fs0, $sp, 272 # 8-byte Folded Reload + ld.d $s8, $sp, 280 # 8-byte Folded Reload + ld.d $s7, $sp, 288 # 8-byte Folded Reload + ld.d $s6, $sp, 296 # 8-byte Folded Reload + ld.d $s5, $sp, 304 # 8-byte Folded Reload + ld.d $s4, $sp, 312 # 8-byte Folded Reload + ld.d $s3, $sp, 320 # 8-byte Folded Reload + ld.d $s2, $sp, 328 # 8-byte Folded Reload + ld.d $s1, $sp, 336 # 8-byte Folded Reload + ld.d $s0, $sp, 344 # 8-byte Folded Reload + ld.d $fp, $sp, 352 # 8-byte Folded Reload + ld.d $ra, $sp, 360 # 8-byte Folded Reload + addi.d $sp, $sp, 368 ret .Lfunc_end618: .size strftimeFunc, .Lfunc_end618-strftimeFunc @@ -155122,136 +155068,136 @@ strftimeFunc: # @strftimeFunc .p2align 2, 0x0 .LJTI618_0: .word .LBB618_4-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 .word .LBB618_8-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 .word .LBB618_9-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 .word .LBB618_8-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 .word .LBB618_8-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 .word .LBB618_8-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 .word .LBB618_10-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 .word .LBB618_8-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 .word .LBB618_10-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 .word .LBB618_11-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 .word .LBB618_8-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 .word .LBB618_9-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 - .word .LBB618_60-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 + .word .LBB618_61-.LJTI618_0 .word .LBB618_4-.LJTI618_0 .LJTI618_1: .word .LBB618_42-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 .word .LBB618_27-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 .word .LBB618_40-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 .word .LBB618_47-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 .word .LBB618_34-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_43-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 .word .LBB618_44-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_51-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_45-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_52-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 .word .LBB618_34-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_50-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_49-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 .word .LBB618_39-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 - .word .LBB618_53-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 + .word .LBB618_54-.LJTI618_1 .word .LBB618_41-.LJTI618_1 # -- End function .text @@ -155288,48 +155234,26 @@ cdateFunc: # @cdateFunc .Lfunc_end621: .size cdateFunc, .Lfunc_end621-cdateFunc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function isDate -.LCPI622_0: - .dword 0x4194997000000000 # double 8.64E+7 -.LCPI622_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI622_2: - .dword 0x40f5180000000000 # double 86400 -.LCPI622_3: - .dword 0x4096800000000000 # double 1440 -.LCPI622_4: - .dword 0x4076d40000000000 # double 365.25 -.LCPI622_5: - .dword 0x403e99a027525461 # double 30.600100000000001 -.LCPI622_6: - .dword 0xc097d20000000000 # double -1524.5 -.LCPI622_7: - .dword 0x404e000000000000 # double 60 -.LCPI622_8: - .dword 0x40ac200000000000 # double 3600 -.LCPI622_9: - .dword 0xc0f5180000000000 # double -86400 -.LCPI622_10: - .dword 0x41429ec5c0000000 # double 2440587.5 - .text - .p2align 5 + .p2align 5 # -- Begin function isDate .type isDate,@function isDate: # @isDate # %bb.0: - addi.d $sp, $sp, -368 - st.d $ra, $sp, 360 # 8-byte Folded Spill - st.d $fp, $sp, 352 # 8-byte Folded Spill - st.d $s0, $sp, 344 # 8-byte Folded Spill - st.d $s1, $sp, 336 # 8-byte Folded Spill - st.d $s2, $sp, 328 # 8-byte Folded Spill - st.d $s3, $sp, 320 # 8-byte Folded Spill - st.d $s4, $sp, 312 # 8-byte Folded Spill - st.d $s5, $sp, 304 # 8-byte Folded Spill - st.d $s6, $sp, 296 # 8-byte Folded Spill - st.d $s7, $sp, 288 # 8-byte Folded Spill - st.d $s8, $sp, 280 # 8-byte Folded Spill - fst.d $fs0, $sp, 272 # 8-byte Folded Spill + addi.d $sp, $sp, -384 + st.d $ra, $sp, 376 # 8-byte Folded Spill + st.d $fp, $sp, 368 # 8-byte Folded Spill + st.d $s0, $sp, 360 # 8-byte Folded Spill + st.d $s1, $sp, 352 # 8-byte Folded Spill + st.d $s2, $sp, 344 # 8-byte Folded Spill + st.d $s3, $sp, 336 # 8-byte Folded Spill + st.d $s4, $sp, 328 # 8-byte Folded Spill + st.d $s5, $sp, 320 # 8-byte Folded Spill + st.d $s6, $sp, 312 # 8-byte Folded Spill + st.d $s7, $sp, 304 # 8-byte Folded Spill + st.d $s8, $sp, 296 # 8-byte Folded Spill + fst.d $fs0, $sp, 288 # 8-byte Folded Spill + fst.d $fs1, $sp, 280 # 8-byte Folded Spill + fst.d $fs2, $sp, 272 # 8-byte Folded Spill + fst.d $fs3, $sp, 264 # 8-byte Folded Spill move $fp, $a3 move $s0, $a2 move $s1, $a1 @@ -155343,7 +155267,7 @@ isDate: # @isDate jirl $ra, $ra, 0 move $s2, $a0 bnez $a0, .LBB622_3 - b .LBB622_124 + b .LBB622_125 .LBB622_2: pcalau12i $a0, %pc_hi20(.L.str.629) addi.d $s2, $a0, %pc_lo12(.L.str.629) @@ -155357,7 +155281,7 @@ isDate: # @isDate addi.d $a0, $a0, -45 sltui $s5, $a0, 1 add.d $a0, $s2, $s5 - addi.d $a1, $sp, 264 + addi.d $a1, $sp, 256 st.d $a1, $sp, 56 st.d $zero, $sp, 48 ori $a1, $zero, 31 @@ -155366,7 +155290,7 @@ isDate: # @isDate st.d $a1, $sp, 32 ori $a1, $zero, 2 st.d $a1, $sp, 24 - addi.d $a1, $sp, 232 + addi.d $a1, $sp, 224 st.d $a1, $sp, 16 ori $a1, $zero, 45 st.d $a1, $sp, 8 @@ -155375,7 +155299,7 @@ isDate: # @isDate ori $a3, $s6, 1807 ori $a1, $zero, 4 ori $a4, $zero, 45 - addi.d $a5, $sp, 184 + addi.d $a5, $sp, 176 ori $a6, $zero, 2 ori $a7, $zero, 1 st.d $a2, $sp, 0 @@ -155454,9 +155378,9 @@ isDate: # @isDate ld.d $a0, $s3, 0 ld.d $a0, $a0, 8 ld.d $a2, $a0, 128 - addi.d $a1, $sp, 184 + addi.d $a1, $sp, 176 jirl $ra, $a2, 0 - fld.d $fa0, $sp, 184 + fld.d $fa0, $sp, 176 fst.d $fa0, $fp, 0 b .LBB622_20 .LBB622_18: @@ -155466,7 +155390,7 @@ isDate: # @isDate move $a1, $zero pcaddu18i $ra, %call36(sqlite3IsNumber) jirl $ra, $ra, 0 - beqz $a0, .LBB622_124 + beqz $a0, .LBB622_125 # %bb.19: move $a0, $s2 move $a1, $fp @@ -155479,15 +155403,15 @@ isDate: # @isDate .LBB622_21: st.b $zero, $fp, 41 .LBB622_22: - ld.w $a0, $sp, 184 + ld.w $a0, $sp, 176 st.b $zero, $fp, 42 ori $a1, $zero, 1 st.b $a1, $fp, 40 sub.d $a1, $zero, $a0 masknez $a0, $a0, $s5 maskeqz $a1, $a1, $s5 - ld.w $a2, $sp, 232 - ld.w $a3, $sp, 264 + ld.w $a2, $sp, 224 + ld.w $a3, $sp, 256 ld.bu $a4, $fp, 43 or $a0, $a1, $a0 st.w $a0, $fp, 8 @@ -155500,9 +155424,9 @@ isDate: # @isDate jirl $ra, $ra, 0 .LBB622_24: # %parseDateOrTime.exit ori $a0, $zero, 2 - blt $s1, $a0, .LBB622_123 + blt $s1, $a0, .LBB622_124 # %bb.25: # %.lr.ph - addi.d $s6, $sp, 241 + addi.d $s6, $sp, 233 lu12i.w $a0, -335545 ori $a0, $a0, 2785 st.d $a0, $sp, 136 # 8-byte Folded Spill @@ -155512,6 +155436,16 @@ isDate: # @isDate lu12i.w $a0, 1 ori $a0, $a0, 620 st.d $a0, $sp, 120 # 8-byte Folded Spill + lu12i.w $a0, 161061 + ori $a0, $a0, 1121 + lu32i.d $a0, -91744 + lu52i.d $a0, $a0, 1027 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs2, $a0 lu12i.w $a0, -452053 ori $a0, $a0, 965 st.d $a0, $sp, 88 # 8-byte Folded Spill @@ -155528,6 +155462,10 @@ isDate: # @isDate lu12i.w $a0, 1942 ori $a0, $a0, 356 st.d $a0, $sp, 96 # 8-byte Folded Spill + lu12i.w $a0, -262144 + lu32i.d $a0, 171717 + lu52i.d $a0, $a0, 1044 + movgr2fr.d $fs3, $a0 lu12i.w $a0, 1591 ori $a0, $a0, 1141 st.d $a0, $sp, 112 # 8-byte Folded Spill @@ -155541,335 +155479,337 @@ isDate: # @isDate st.d $a0, $sp, 144 # 8-byte Folded Spill ori $s4, $zero, 1 ori $s3, $zero, 76 - addi.d $s7, $sp, 232 + addi.d $s7, $sp, 224 ori $s2, $zero, 1 - b .LBB622_31 -.LBB622_26: # in Loop: Header=BB622_31 Depth=1 - fld.d $fa0, $sp, 264 + b .LBB622_32 +.LBB622_26: # in Loop: Header=BB622_32 Depth=1 + fld.d $fa0, $sp, 256 fld.d $fa1, $fp, 0 move $s8, $zero vldi $vr2, -968 fdiv.d $fa0, $fa0, $fa2 - fadd.d $fa0, $fa1, $fa0 .LBB622_27: # %.thread164.i - # in Loop: Header=BB622_31 Depth=1 - fst.d $fa0, $fp, 0 + # in Loop: Header=BB622_32 Depth=1 + fadd.d $fa0, $fa1, $fa0 .LBB622_28: # %.thread164.i - # in Loop: Header=BB622_31 Depth=1 + # in Loop: Header=BB622_32 Depth=1 + fst.d $fa0, $fp, 0 +.LBB622_29: # %.thread164.i + # in Loop: Header=BB622_32 Depth=1 st.h $zero, $fp, 40 st.b $zero, $fp, 43 -.LBB622_29: # %parseModifier.exit - # in Loop: Header=BB622_31 Depth=1 - bnez $s8, .LBB622_124 -.LBB622_30: # in Loop: Header=BB622_31 Depth=1 +.LBB622_30: # %parseModifier.exit + # in Loop: Header=BB622_32 Depth=1 + bnez $s8, .LBB622_125 +.LBB622_31: # in Loop: Header=BB622_32 Depth=1 addi.d $s2, $s2, 1 - beq $s2, $s1, .LBB622_123 -.LBB622_31: # =>This Loop Header: Depth=1 - # Child Loop BB622_71 Depth 2 + beq $s2, $s1, .LBB622_124 +.LBB622_32: # =>This Loop Header: Depth=1 + # Child Loop BB622_72 Depth 2 slli.d $a0, $s2, 3 ldx.d $a0, $s0, $a0 ori $a1, $zero, 1 pcaddu18i $ra, %call36(sqlite3ValueText) jirl $ra, $ra, 0 - beqz $a0, .LBB622_124 -# %bb.32: # in Loop: Header=BB622_31 Depth=1 + beqz $a0, .LBB622_125 +# %bb.33: # in Loop: Header=BB622_32 Depth=1 move $s5, $a0 ld.b $s8, $a0, 0 - beqz $s8, .LBB622_62 -# %bb.33: # in Loop: Header=BB622_31 Depth=1 + beqz $s8, .LBB622_63 +# %bb.34: # in Loop: Header=BB622_32 Depth=1 pcaddu18i $ra, %call36(__ctype_tolower_loc) jirl $ra, $ra, 0 ld.d $a1, $a0, 0 slli.d $a2, $s8, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 1 - st.b $a1, $sp, 232 - addi.d $a1, $sp, 233 - beqz $a2, .LBB622_63 -# %bb.34: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 224 + addi.d $a1, $sp, 225 + beqz $a2, .LBB622_64 +# %bb.35: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 2 - st.b $a1, $sp, 233 - addi.d $a1, $sp, 234 - beqz $a2, .LBB622_63 -# %bb.35: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 225 + addi.d $a1, $sp, 226 + beqz $a2, .LBB622_64 +# %bb.36: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 3 - st.b $a1, $sp, 234 - addi.d $a1, $sp, 235 - beqz $a2, .LBB622_63 -# %bb.36: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 226 + addi.d $a1, $sp, 227 + beqz $a2, .LBB622_64 +# %bb.37: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 4 - st.b $a1, $sp, 235 - addi.d $a1, $sp, 236 - beqz $a2, .LBB622_63 -# %bb.37: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 227 + addi.d $a1, $sp, 228 + beqz $a2, .LBB622_64 +# %bb.38: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 5 - st.b $a1, $sp, 236 - addi.d $a1, $sp, 237 - beqz $a2, .LBB622_63 -# %bb.38: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 228 + addi.d $a1, $sp, 229 + beqz $a2, .LBB622_64 +# %bb.39: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 6 - st.b $a1, $sp, 237 - addi.d $a1, $sp, 238 - beqz $a2, .LBB622_63 -# %bb.39: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 229 + addi.d $a1, $sp, 230 + beqz $a2, .LBB622_64 +# %bb.40: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 7 - st.b $a1, $sp, 238 - addi.d $a1, $sp, 239 - beqz $a2, .LBB622_63 -# %bb.40: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 230 + addi.d $a1, $sp, 231 + beqz $a2, .LBB622_64 +# %bb.41: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 8 - st.b $a1, $sp, 239 - addi.d $a1, $sp, 240 - beqz $a2, .LBB622_63 -# %bb.41: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 231 + addi.d $a1, $sp, 232 + beqz $a2, .LBB622_64 +# %bb.42: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 9 - st.b $a1, $sp, 240 + st.b $a1, $sp, 232 move $a1, $s6 - beqz $a2, .LBB622_63 -# %bb.42: # in Loop: Header=BB622_31 Depth=1 + beqz $a2, .LBB622_64 +# %bb.43: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 10 - st.b $a1, $sp, 241 - addi.d $a1, $sp, 242 - beqz $a2, .LBB622_63 -# %bb.43: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 233 + addi.d $a1, $sp, 234 + beqz $a2, .LBB622_64 +# %bb.44: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 11 - st.b $a1, $sp, 242 - addi.d $a1, $sp, 243 - beqz $a2, .LBB622_63 -# %bb.44: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 234 + addi.d $a1, $sp, 235 + beqz $a2, .LBB622_64 +# %bb.45: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 12 - st.b $a1, $sp, 243 - addi.d $a1, $sp, 244 - beqz $a2, .LBB622_63 -# %bb.45: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 235 + addi.d $a1, $sp, 236 + beqz $a2, .LBB622_64 +# %bb.46: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 13 - st.b $a1, $sp, 244 - addi.d $a1, $sp, 245 - beqz $a2, .LBB622_63 -# %bb.46: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 236 + addi.d $a1, $sp, 237 + beqz $a2, .LBB622_64 +# %bb.47: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 14 - st.b $a1, $sp, 245 - addi.d $a1, $sp, 246 - beqz $a2, .LBB622_63 -# %bb.47: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 237 + addi.d $a1, $sp, 238 + beqz $a2, .LBB622_64 +# %bb.48: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 15 - st.b $a1, $sp, 246 - addi.d $a1, $sp, 247 - beqz $a2, .LBB622_63 -# %bb.48: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 238 + addi.d $a1, $sp, 239 + beqz $a2, .LBB622_64 +# %bb.49: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 16 - st.b $a1, $sp, 247 - addi.d $a1, $sp, 248 - beqz $a2, .LBB622_63 -# %bb.49: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 239 + addi.d $a1, $sp, 240 + beqz $a2, .LBB622_64 +# %bb.50: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 17 - st.b $a1, $sp, 248 - addi.d $a1, $sp, 249 - beqz $a2, .LBB622_63 -# %bb.50: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 240 + addi.d $a1, $sp, 241 + beqz $a2, .LBB622_64 +# %bb.51: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 18 - st.b $a1, $sp, 249 - addi.d $a1, $sp, 250 - beqz $a2, .LBB622_63 -# %bb.51: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 241 + addi.d $a1, $sp, 242 + beqz $a2, .LBB622_64 +# %bb.52: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 19 - st.b $a1, $sp, 250 - addi.d $a1, $sp, 251 - beqz $a2, .LBB622_63 -# %bb.52: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 242 + addi.d $a1, $sp, 243 + beqz $a2, .LBB622_64 +# %bb.53: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 20 - st.b $a1, $sp, 251 - addi.d $a1, $sp, 252 - beqz $a2, .LBB622_63 -# %bb.53: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 243 + addi.d $a1, $sp, 244 + beqz $a2, .LBB622_64 +# %bb.54: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 21 - st.b $a1, $sp, 252 - addi.d $a1, $sp, 253 - beqz $a2, .LBB622_63 -# %bb.54: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 244 + addi.d $a1, $sp, 245 + beqz $a2, .LBB622_64 +# %bb.55: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 22 - st.b $a1, $sp, 253 - addi.d $a1, $sp, 254 - beqz $a2, .LBB622_63 -# %bb.55: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 245 + addi.d $a1, $sp, 246 + beqz $a2, .LBB622_64 +# %bb.56: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 23 - st.b $a1, $sp, 254 - addi.d $a1, $sp, 255 - beqz $a2, .LBB622_63 -# %bb.56: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 246 + addi.d $a1, $sp, 247 + beqz $a2, .LBB622_64 +# %bb.57: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 24 - st.b $a1, $sp, 255 - addi.d $a1, $sp, 256 - beqz $a2, .LBB622_63 -# %bb.57: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 247 + addi.d $a1, $sp, 248 + beqz $a2, .LBB622_64 +# %bb.58: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 25 - st.b $a1, $sp, 256 - addi.d $a1, $sp, 257 - beqz $a2, .LBB622_63 -# %bb.58: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 248 + addi.d $a1, $sp, 249 + beqz $a2, .LBB622_64 +# %bb.59: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 26 - st.b $a1, $sp, 257 - addi.d $a1, $sp, 258 - beqz $a2, .LBB622_63 -# %bb.59: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 249 + addi.d $a1, $sp, 250 + beqz $a2, .LBB622_64 +# %bb.60: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 27 - st.b $a1, $sp, 258 - addi.d $a1, $sp, 259 - beqz $a2, .LBB622_63 -# %bb.60: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 250 + addi.d $a1, $sp, 251 + beqz $a2, .LBB622_64 +# %bb.61: # in Loop: Header=BB622_32 Depth=1 ld.d $a1, $a0, 0 slli.d $a2, $a2, 2 ldx.b $a1, $a1, $a2 ld.b $a2, $s5, 28 - st.b $a1, $sp, 259 - addi.d $a1, $sp, 260 - beqz $a2, .LBB622_63 -# %bb.61: # in Loop: Header=BB622_31 Depth=1 + st.b $a1, $sp, 251 + addi.d $a1, $sp, 252 + beqz $a2, .LBB622_64 +# %bb.62: # in Loop: Header=BB622_32 Depth=1 ld.d $a0, $a0, 0 slli.d $a1, $a2, 2 ldx.b $a0, $a0, $a1 - st.b $a0, $sp, 260 - addi.d $a1, $sp, 261 - b .LBB622_63 + st.b $a0, $sp, 252 + addi.d $a1, $sp, 253 + b .LBB622_64 .p2align 4, , 16 -.LBB622_62: # in Loop: Header=BB622_31 Depth=1 - addi.d $a1, $sp, 232 -.LBB622_63: # %.critedge.i - # in Loop: Header=BB622_31 Depth=1 +.LBB622_63: # in Loop: Header=BB622_32 Depth=1 + addi.d $a1, $sp, 224 +.LBB622_64: # %.critedge.i + # in Loop: Header=BB622_32 Depth=1 st.b $zero, $a1, 0 - ld.bu $a0, $sp, 232 + ld.bu $a0, $sp, 224 addi.d $a0, $a0, -43 - bltu $s3, $a0, .LBB622_124 -# %bb.64: # %.critedge.i - # in Loop: Header=BB622_31 Depth=1 + bltu $s3, $a0, .LBB622_125 +# %bb.65: # %.critedge.i + # in Loop: Header=BB622_32 Depth=1 slli.d $a0, $a0, 2 pcalau12i $a1, %pc_hi20(.LJTI622_0) addi.d $a1, $a1, %pc_lo12(.LJTI622_0) ldx.w $a0, $a1, $a0 add.d $a0, $a1, $a0 jr $a0 -.LBB622_65: # in Loop: Header=BB622_31 Depth=1 - addi.d $a0, $sp, 232 - addi.d $a1, $sp, 264 +.LBB622_66: # in Loop: Header=BB622_32 Depth=1 + addi.d $a0, $sp, 224 + addi.d $a1, $sp, 256 pcaddu18i $ra, %call36(sqlite3AtoF) jirl $ra, $ra, 0 move $s5, $a0 ldx.bu $a0, $a0, $s7 ori $a1, $zero, 58 - bne $a0, $a1, .LBB622_70 -# %bb.66: # in Loop: Header=BB622_31 Depth=1 + bne $a0, $a1, .LBB622_71 +# %bb.67: # in Loop: Header=BB622_32 Depth=1 pcaddu18i $ra, %call36(__ctype_b_loc) jirl $ra, $ra, 0 - ld.bu $a1, $sp, 232 + ld.bu $a1, $sp, 224 ld.d $a0, $a0, 0 slli.d $a1, $a1, 1 ldx.hu $a0, $a0, $a1 andi $a0, $a0, 2048 sltui $a0, $a0, 1 masknez $a1, $s7, $a0 - addi.d $a2, $sp, 233 + addi.d $a2, $sp, 225 maskeqz $a0, $a2, $a0 or $a0, $a0, $a1 vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 184 - vst $vr0, $sp, 200 - vst $vr0, $sp, 216 - addi.d $a1, $sp, 184 + vst $vr0, $sp, 176 + vst $vr0, $sp, 192 + vst $vr0, $sp, 208 + addi.d $a1, $sp, 176 pcaddu18i $ra, %call36(parseHhMmSs) jirl $ra, $ra, 0 ori $s8, $zero, 1 - bnez $a0, .LBB622_29 -# %bb.67: # in Loop: Header=BB622_31 Depth=1 - ld.bu $a0, $sp, 226 - beqz $a0, .LBB622_110 -# %bb.68: # %.computeJD.exit_crit_edge.i - # in Loop: Header=BB622_31 Depth=1 - fld.d $fa0, $sp, 184 -.LBB622_69: # %computeJD.exit.i - # in Loop: Header=BB622_31 Depth=1 + bnez $a0, .LBB622_30 +# %bb.68: # in Loop: Header=BB622_32 Depth=1 + ld.bu $a0, $sp, 218 + beqz $a0, .LBB622_111 +# %bb.69: # %.computeJD.exit_crit_edge.i + # in Loop: Header=BB622_32 Depth=1 + fld.d $fa0, $sp, 176 +.LBB622_70: # %computeJD.exit.i + # in Loop: Header=BB622_32 Depth=1 vldi $vr1, -800 fadd.d $fa0, $fa0, $fa1 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a0, $fa1 - ld.bu $a1, $sp, 232 + ld.bu $a1, $sp, 224 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 fsub.d $fa0, $fa0, $fa1 @@ -155887,43 +155827,43 @@ isDate: # @isDate st.b $zero, $fp, 43 fadd.d $fa0, $fs0, $fa0 fst.d $fa0, $fp, 0 - b .LBB622_29 + b .LBB622_30 .p2align 4, , 16 -.LBB622_70: # %.preheader.i - # in Loop: Header=BB622_31 Depth=1 +.LBB622_71: # %.preheader.i + # in Loop: Header=BB622_32 Depth=1 pcaddu18i $ra, %call36(__ctype_b_loc) jirl $ra, $ra, 0 ld.d $a0, $a0, 0 - addi.d $a1, $sp, 231 + addi.d $a1, $sp, 223 add.d $s5, $a1, $s5 .p2align 4, , 16 -.LBB622_71: # Parent Loop BB622_31 Depth=1 +.LBB622_72: # Parent Loop BB622_32 Depth=1 # => This Inner Loop Header: Depth=2 ld.bu $a1, $s5, 1 slli.d $a1, $a1, 1 ldx.hu $a1, $a0, $a1 slli.d $a1, $a1, 50 addi.d $s5, $s5, 1 - bltz $a1, .LBB622_71 -# %bb.72: # in Loop: Header=BB622_31 Depth=1 + bltz $a1, .LBB622_72 +# %bb.73: # in Loop: Header=BB622_32 Depth=1 move $a0, $s5 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 addi.w $a1, $a0, -11 addi.w $a2, $zero, -8 - bltu $a1, $a2, .LBB622_124 -# %bb.73: # in Loop: Header=BB622_31 Depth=1 + bltu $a1, $a2, .LBB622_125 +# %bb.74: # in Loop: Header=BB622_32 Depth=1 addi.d $s8, $a0, -1 bstrpick.d $a1, $s8, 31, 0 ldx.bu $a2, $s5, $a1 ori $a3, $zero, 115 - bne $a2, $a3, .LBB622_75 -# %bb.74: # in Loop: Header=BB622_31 Depth=1 + bne $a2, $a3, .LBB622_76 +# %bb.75: # in Loop: Header=BB622_32 Depth=1 stx.b $zero, $s5, $a1 - b .LBB622_76 -.LBB622_75: # in Loop: Header=BB622_31 Depth=1 + b .LBB622_77 +.LBB622_76: # in Loop: Header=BB622_32 Depth=1 move $s8, $a0 -.LBB622_76: # in Loop: Header=BB622_31 Depth=1 +.LBB622_77: # in Loop: Header=BB622_32 Depth=1 move $a0, $fp pcaddu18i $ra, %call36(computeJD) jirl $ra, $ra, 0 @@ -155931,62 +155871,62 @@ isDate: # @isDate addi.d $a0, $a0, -3 ori $s8, $zero, 1 ori $a1, $zero, 3 - bltu $a1, $a0, .LBB622_28 -# %bb.77: # in Loop: Header=BB622_31 Depth=1 + bltu $a1, $a0, .LBB622_29 +# %bb.78: # in Loop: Header=BB622_32 Depth=1 slli.d $a0, $a0, 2 pcalau12i $a1, %pc_hi20(.LJTI622_1) addi.d $a1, $a1, %pc_lo12(.LJTI622_1) ldx.w $a0, $a1, $a0 add.d $a0, $a1, $a0 jr $a0 -.LBB622_78: # in Loop: Header=BB622_31 Depth=1 +.LBB622_79: # in Loop: Header=BB622_32 Depth=1 pcalau12i $a0, %pc_hi20(.L.str.637) addi.d $a1, $a0, %pc_lo12(.L.str.637) move $a0, $s5 pcaddu18i $ra, %call36(strcmp) jirl $ra, $ra, 0 - bnez $a0, .LBB622_28 -# %bb.79: # in Loop: Header=BB622_31 Depth=1 - fld.d $fa0, $sp, 264 + bnez $a0, .LBB622_29 +# %bb.80: # in Loop: Header=BB622_32 Depth=1 + fld.d $fa0, $sp, 256 fld.d $fa1, $fp, 0 move $s8, $zero fadd.d $fa0, $fa0, $fa1 - b .LBB622_27 -.LBB622_80: # in Loop: Header=BB622_31 Depth=1 + b .LBB622_28 +.LBB622_81: # in Loop: Header=BB622_32 Depth=1 pcalau12i $a0, %pc_hi20(.L.str.635) addi.d $a1, $a0, %pc_lo12(.L.str.635) move $a0, $s5 pcaddu18i $ra, %call36(strcmp) jirl $ra, $ra, 0 - bnez $a0, .LBB622_28 -# %bb.81: # in Loop: Header=BB622_31 Depth=1 + bnez $a0, .LBB622_29 +# %bb.82: # in Loop: Header=BB622_32 Depth=1 move $a0, $fp pcaddu18i $ra, %call36(computeYMD) jirl $ra, $ra, 0 ld.bu $a0, $fp, 41 - bnez $a0, .LBB622_83 -# %bb.82: # in Loop: Header=BB622_31 Depth=1 + bnez $a0, .LBB622_84 +# %bb.83: # in Loop: Header=BB622_32 Depth=1 move $a0, $fp pcaddu18i $ra, %call36(computeJD) jirl $ra, $ra, 0 fld.d $fa0, $fp, 0 - vldi $vr3, -928 - fadd.d $fa0, $fa0, $fa3 + vldi $vr2, -928 + fadd.d $fa0, $fa0, $fa2 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a0, $fa1 - pcalau12i $a1, %pc_hi20(.LCPI622_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI622_0) - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fsub.d $fa0, $fa0, $fa2 - fmadd.d $fa0, $fa0, $fa1, $fa3 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + fsub.d $fa0, $fa0, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, 301424 + lu52i.d $a0, $a0, 1049 + movgr2fr.d $fa1, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 ftintrz.w.d $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI622_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI622_1) movfr2gr.s $a0, $fa0 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 - fmul.d $fa0, $fa0, $fa1 + fmul.d $fa0, $fa0, $fs2 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a0, $fa1 movgr2fr.w $fa1, $a0 @@ -156020,84 +155960,87 @@ isDate: # @isDate fadd.d $fa0, $fa0, $fa1 fst.d $fa0, $fp, 32 st.b $s4, $fp, 41 -.LBB622_83: # %computeYMD_HMS.exit27 - # in Loop: Header=BB622_31 Depth=1 +.LBB622_84: # %computeYMD_HMS.exit27 + # in Loop: Header=BB622_32 Depth=1 ld.w $a0, $fp, 12 - fld.d $fs0, $sp, 264 + fld.d $fs0, $sp, 256 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 fadd.d $fa0, $fs0, $fa0 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - blez $a0, .LBB622_119 -# %bb.84: # in Loop: Header=BB622_31 Depth=1 + blez $a0, .LBB622_120 +# %bb.85: # in Loop: Header=BB622_32 Depth=1 addi.d $a1, $a0, -1 bstrpick.d $a1, $a1, 31, 0 ld.d $a2, $sp, 104 # 8-byte Folded Reload mul.d $a1, $a1, $a2 srli.d $a1, $a1, 35 - b .LBB622_120 -.LBB622_85: # in Loop: Header=BB622_31 Depth=1 + b .LBB622_121 +.LBB622_86: # in Loop: Header=BB622_32 Depth=1 pcalau12i $a0, %pc_hi20(.L.str.639) addi.d $a1, $a0, %pc_lo12(.L.str.639) move $a0, $s5 pcaddu18i $ra, %call36(strcmp) jirl $ra, $ra, 0 - beqz $a0, .LBB622_113 -# %bb.86: # in Loop: Header=BB622_31 Depth=1 + beqz $a0, .LBB622_114 +# %bb.87: # in Loop: Header=BB622_32 Depth=1 pcalau12i $a0, %pc_hi20(.L.str.640) addi.d $a1, $a0, %pc_lo12(.L.str.640) move $a0, $s5 pcaddu18i $ra, %call36(strcmp) jirl $ra, $ra, 0 - bnez $a0, .LBB622_28 -# %bb.87: # in Loop: Header=BB622_31 Depth=1 - fld.d $fa0, $sp, 264 - pcalau12i $a0, %pc_hi20(.LCPI622_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI622_2) - b .LBB622_114 -.LBB622_88: # in Loop: Header=BB622_31 Depth=1 + bnez $a0, .LBB622_29 +# %bb.88: # in Loop: Header=BB622_32 Depth=1 + move $s8, $zero + fld.d $fa0, $sp, 256 + ori $a0, $zero, 0 + lu32i.d $a0, 333824 + fld.d $fa1, $fp, 0 + lu52i.d $a0, $a0, 1039 + b .LBB622_115 +.LBB622_89: # in Loop: Header=BB622_32 Depth=1 pcalau12i $a0, %pc_hi20(.L.str.638) addi.d $a1, $a0, %pc_lo12(.L.str.638) move $a0, $s5 pcaddu18i $ra, %call36(strcmp) jirl $ra, $ra, 0 beqz $a0, .LBB622_26 -# %bb.89: # in Loop: Header=BB622_31 Depth=1 +# %bb.90: # in Loop: Header=BB622_32 Depth=1 pcalau12i $a0, %pc_hi20(.L.str.636) addi.d $a1, $a0, %pc_lo12(.L.str.636) move $a0, $s5 pcaddu18i $ra, %call36(strcmp) jirl $ra, $ra, 0 - bnez $a0, .LBB622_28 -# %bb.90: # in Loop: Header=BB622_31 Depth=1 + bnez $a0, .LBB622_29 +# %bb.91: # in Loop: Header=BB622_32 Depth=1 move $a0, $fp pcaddu18i $ra, %call36(computeYMD) jirl $ra, $ra, 0 ld.bu $a0, $fp, 41 - bnez $a0, .LBB622_92 -# %bb.91: # in Loop: Header=BB622_31 Depth=1 + bnez $a0, .LBB622_93 +# %bb.92: # in Loop: Header=BB622_32 Depth=1 move $a0, $fp pcaddu18i $ra, %call36(computeJD) jirl $ra, $ra, 0 fld.d $fa0, $fp, 0 - vldi $vr3, -928 - fadd.d $fa0, $fa0, $fa3 + vldi $vr2, -928 + fadd.d $fa0, $fa0, $fa2 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a0, $fa1 - pcalau12i $a1, %pc_hi20(.LCPI622_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI622_0) - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fsub.d $fa0, $fa0, $fa2 - fmadd.d $fa0, $fa0, $fa1, $fa3 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + fsub.d $fa0, $fa0, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, 301424 + lu52i.d $a0, $a0, 1049 + movgr2fr.d $fa1, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 ftintrz.w.d $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI622_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI622_1) movfr2gr.s $a0, $fa0 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 - fmul.d $fa0, $fa0, $fa1 + fmul.d $fa0, $fa0, $fs2 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a0, $fa1 movgr2fr.w $fa1, $a0 @@ -156131,10 +156074,10 @@ isDate: # @isDate fadd.d $fa0, $fa0, $fa1 fst.d $fa0, $fp, 32 st.b $s4, $fp, 41 -.LBB622_92: # %computeYMD_HMS.exit - # in Loop: Header=BB622_31 Depth=1 +.LBB622_93: # %computeYMD_HMS.exit + # in Loop: Header=BB622_32 Depth=1 ld.w $a0, $fp, 8 - fld.d $fa0, $sp, 264 + fld.d $fa0, $sp, 256 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 fadd.d $fa0, $fa0, $fa1 @@ -156145,16 +156088,16 @@ isDate: # @isDate move $a0, $fp pcaddu18i $ra, %call36(computeJD) jirl $ra, $ra, 0 - b .LBB622_122 -.LBB622_93: # in Loop: Header=BB622_31 Depth=1 + b .LBB622_123 +.LBB622_94: # in Loop: Header=BB622_32 Depth=1 pcalau12i $a0, %pc_hi20(.L.str.634) addi.d $a1, $a0, %pc_lo12(.L.str.634) - addi.d $a0, $sp, 232 + addi.d $a0, $sp, 224 ori $a2, $zero, 9 pcaddu18i $ra, %call36(bcmp) jirl $ra, $ra, 0 - bnez $a0, .LBB622_124 -# %bb.94: # in Loop: Header=BB622_31 Depth=1 + bnez $a0, .LBB622_125 +# %bb.95: # in Loop: Header=BB622_32 Depth=1 move $a0, $fp pcaddu18i $ra, %call36(computeYMD) jirl $ra, $ra, 0 @@ -156168,30 +156111,30 @@ isDate: # @isDate move $a0, $s6 pcaddu18i $ra, %call36(bcmp) jirl $ra, $ra, 0 - beqz $a0, .LBB622_117 -# %bb.95: # in Loop: Header=BB622_31 Depth=1 + beqz $a0, .LBB622_118 +# %bb.96: # in Loop: Header=BB622_32 Depth=1 pcalau12i $a0, %pc_hi20(.L.str.636) addi.d $a1, $a0, %pc_lo12(.L.str.636) ori $a2, $zero, 5 move $a0, $s6 pcaddu18i $ra, %call36(bcmp) jirl $ra, $ra, 0 - beqz $a0, .LBB622_118 -# %bb.96: # in Loop: Header=BB622_31 Depth=1 + beqz $a0, .LBB622_119 +# %bb.97: # in Loop: Header=BB622_32 Depth=1 ld.w $a0, $s6, 0 ld.d $a1, $sp, 96 # 8-byte Folded Reload xor $a0, $a0, $a1 sltu $s8, $zero, $a0 - b .LBB622_29 -.LBB622_97: # in Loop: Header=BB622_31 Depth=1 + b .LBB622_30 +.LBB622_98: # in Loop: Header=BB622_32 Depth=1 pcalau12i $a0, %pc_hi20(.L.str.630) addi.d $a1, $a0, %pc_lo12(.L.str.630) - addi.d $a0, $sp, 232 + addi.d $a0, $sp, 224 ori $a2, $zero, 10 pcaddu18i $ra, %call36(bcmp) jirl $ra, $ra, 0 - bnez $a0, .LBB622_124 -# %bb.98: # in Loop: Header=BB622_31 Depth=1 + bnez $a0, .LBB622_125 +# %bb.99: # in Loop: Header=BB622_32 Depth=1 move $a0, $fp pcaddu18i $ra, %call36(computeJD) jirl $ra, $ra, 0 @@ -156199,32 +156142,32 @@ isDate: # @isDate pcaddu18i $ra, %call36(localtimeOffset) jirl $ra, $ra, 0 fld.d $fa1, $fp, 0 - b .LBB622_105 -.LBB622_99: # in Loop: Header=BB622_31 Depth=1 - ld.d $a0, $sp, 232 + b .LBB622_106 +.LBB622_100: # in Loop: Header=BB622_32 Depth=1 + ld.d $a0, $sp, 224 ld.d $a1, $sp, 152 # 8-byte Folded Reload - bne $a0, $a1, .LBB622_124 -# %bb.100: # in Loop: Header=BB622_31 Depth=1 - addi.d $a1, $sp, 264 - addi.d $a0, $sp, 240 + bne $a0, $a1, .LBB622_125 +# %bb.101: # in Loop: Header=BB622_32 Depth=1 + addi.d $a1, $sp, 256 + addi.d $a0, $sp, 232 pcaddu18i $ra, %call36(sqlite3AtoF) jirl $ra, $ra, 0 - blez $a0, .LBB622_124 -# %bb.101: # in Loop: Header=BB622_31 Depth=1 - fld.d $fa0, $sp, 264 + blez $a0, .LBB622_125 +# %bb.102: # in Loop: Header=BB622_32 Depth=1 + fld.d $fa0, $sp, 256 vldi $vr1, -996 fcmp.cule.d $fcc0, $fa1, $fa0 - bcnez $fcc0, .LBB622_124 -# %bb.102: # in Loop: Header=BB622_31 Depth=1 + bcnez $fcc0, .LBB622_125 +# %bb.103: # in Loop: Header=BB622_32 Depth=1 ftintrz.w.d $fa1, $fa0 movfr2gr.s $s5, $fa1 - bltz $s5, .LBB622_124 -# %bb.103: # in Loop: Header=BB622_31 Depth=1 + bltz $s5, .LBB622_125 +# %bb.104: # in Loop: Header=BB622_32 Depth=1 movgr2fr.w $fa1, $s5 ffint.d.w $fa1, $fa1 fcmp.cune.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB622_124 -# %bb.104: # in Loop: Header=BB622_31 Depth=1 + bcnez $fcc0, .LBB622_125 +# %bb.105: # in Loop: Header=BB622_32 Depth=1 move $a0, $fp pcaddu18i $ra, %call36(computeYMD_HMS) jirl $ra, $ra, 0 @@ -156256,26 +156199,26 @@ isDate: # @isDate sub.d $a0, $s5, $a0 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 -.LBB622_105: # in Loop: Header=BB622_31 Depth=1 +.LBB622_106: # in Loop: Header=BB622_32 Depth=1 fadd.d $fa0, $fa0, $fa1 -.LBB622_106: # in Loop: Header=BB622_31 Depth=1 +.LBB622_107: # in Loop: Header=BB622_32 Depth=1 fst.d $fa0, $fp, 0 st.h $zero, $fp, 40 st.b $zero, $fp, 43 - b .LBB622_30 -.LBB622_107: # in Loop: Header=BB622_31 Depth=1 + b .LBB622_31 +.LBB622_108: # in Loop: Header=BB622_32 Depth=1 pcalau12i $a0, %pc_hi20(.L.str.631) addi.d $a1, $a0, %pc_lo12(.L.str.631) - addi.d $a0, $sp, 232 + addi.d $a0, $sp, 224 ori $a2, $zero, 10 pcaddu18i $ra, %call36(bcmp) jirl $ra, $ra, 0 - beqz $a0, .LBB622_115 -.LBB622_108: # in Loop: Header=BB622_31 Depth=1 - ld.w $a0, $sp, 232 + beqz $a0, .LBB622_116 +.LBB622_109: # in Loop: Header=BB622_32 Depth=1 + ld.w $a0, $sp, 224 ld.d $a1, $sp, 112 # 8-byte Folded Reload - bne $a0, $a1, .LBB622_124 -# %bb.109: # in Loop: Header=BB622_31 Depth=1 + bne $a0, $a1, .LBB622_125 +# %bb.110: # in Loop: Header=BB622_32 Depth=1 move $a0, $fp pcaddu18i $ra, %call36(computeJD) jirl $ra, $ra, 0 @@ -156295,13 +156238,13 @@ isDate: # @isDate fsub.d $fa0, $fs0, $fa0 fadd.d $fa0, $fa1, $fa0 fst.d $fa0, $fp, 0 - b .LBB622_30 -.LBB622_110: # in Loop: Header=BB622_31 Depth=1 - ld.bu $a0, $sp, 224 - ld.w $a1, $sp, 200 + b .LBB622_31 +.LBB622_111: # in Loop: Header=BB622_32 Depth=1 + ld.bu $a0, $sp, 216 + ld.w $a1, $sp, 192 sltui $a0, $a0, 1 - ld.w $a2, $sp, 192 - ld.w $a3, $sp, 196 + ld.w $a2, $sp, 184 + ld.w $a3, $sp, 188 addi.w $a1, $a1, 2 ori $a4, $zero, 3 maskeqz $a4, $a4, $a0 @@ -156331,101 +156274,110 @@ isDate: # @isDate srli.d $a5, $a2, 63 srai.d $a2, $a2, 39 add.d $a2, $a2, $a5 - pcalau12i $a5, %pc_hi20(.LCPI622_4) - fld.d $fa0, $a5, %pc_lo12(.LCPI622_4) ld.d $a5, $sp, 120 # 8-byte Folded Reload add.d $a0, $a0, $a5 - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, 447488 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 - pcalau12i $a5, %pc_hi20(.LCPI622_5) - fld.d $fa0, $a5, %pc_lo12(.LCPI622_5) addi.d $a3, $a3, 1 - movgr2fr.w $fa1, $a3 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a3 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fs1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a3, $fa0 add.d $a1, $a4, $a1 add.d $a1, $a1, $a2 add.d $a1, $a1, $a3 - pcalau12i $a2, %pc_hi20(.LCPI622_6) - fld.d $fa0, $a2, %pc_lo12(.LCPI622_6) - ld.bu $a2, $sp, 225 add.d $a0, $a1, $a0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + ori $a0, $zero, 0 + ld.bu $a1, $sp, 217 + lu32i.d $a0, 512512 + lu52i.d $a0, $a0, -1015 + movgr2fr.d $fa1, $a0 + fadd.d $fa0, $fa0, $fa1 + beqz $a1, .LBB622_70 +# %bb.112: # in Loop: Header=BB622_32 Depth=1 + ld.w $a0, $sp, 196 + ld.w $a1, $sp, 200 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 - fadd.d $fa0, $fa1, $fa0 - beqz $a2, .LBB622_69 -# %bb.111: # in Loop: Header=BB622_31 Depth=1 - ld.w $a0, $sp, 204 - movgr2fr.w $fa1, $a0 - ld.w $a0, $sp, 208 - pcalau12i $a1, %pc_hi20(.LCPI622_7) - fld.d $fa2, $a1, %pc_lo12(.LCPI622_7) - ffint.d.w $fa1, $fa1 - movgr2fr.w $fa3, $a0 - ffint.d.w $fa3, $fa3 - fmul.d $fa2, $fa3, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI622_8) - fld.d $fa3, $a0, %pc_lo12(.LCPI622_8) - fld.d $fa4, $sp, 216 - pcalau12i $a0, %pc_hi20(.LCPI622_2) - fld.d $fa5, $a0, %pc_lo12(.LCPI622_2) - ld.bu $a0, $sp, 227 + movgr2fr.w $fa2, $a1 + ffint.d.w $fa2, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, -131072 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fa3, $a0 + fmul.d $fa2, $fa2, $fa3 + ori $a0, $zero, 0 + lu32i.d $a0, -253952 + lu52i.d $a0, $a0, 1034 + movgr2fr.d $fa3, $a0 fmadd.d $fa1, $fa1, $fa3, $fa2 - fadd.d $fa1, $fa4, $fa1 - fdiv.d $fa1, $fa1, $fa5 + fld.d $fa2, $sp, 208 + ori $a0, $zero, 0 + lu32i.d $a0, 333824 + lu52i.d $a1, $a0, 1039 + ld.bu $a2, $sp, 219 + movgr2fr.d $fa3, $a1 + fadd.d $fa1, $fa2, $fa1 + fdiv.d $fa1, $fa1, $fa3 fadd.d $fa0, $fa0, $fa1 - beqz $a0, .LBB622_69 -# %bb.112: # in Loop: Header=BB622_31 Depth=1 - ld.w $a0, $sp, 212 - pcalau12i $a1, %pc_hi20(.LCPI622_9) - fld.d $fa1, $a1, %pc_lo12(.LCPI622_9) - ori $a1, $zero, 60 - mul.d $a0, $a0, $a1 - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + beqz $a2, .LBB622_70 +# %bb.113: # in Loop: Header=BB622_32 Depth=1 + ld.w $a1, $sp, 204 + ori $a2, $zero, 60 + mul.d $a1, $a1, $a2 + movgr2fr.w $fa1, $a1 + ffint.d.w $fa1, $fa1 + lu52i.d $a0, $a0, -1009 + movgr2fr.d $fa2, $a0 + fdiv.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa0, $fa1 - b .LBB622_69 -.LBB622_113: # in Loop: Header=BB622_31 Depth=1 - fld.d $fa0, $sp, 264 - pcalau12i $a0, %pc_hi20(.LCPI622_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI622_3) -.LBB622_114: # %.thread164.i - # in Loop: Header=BB622_31 Depth=1 - fld.d $fa2, $fp, 0 + b .LBB622_70 +.LBB622_114: # in Loop: Header=BB622_32 Depth=1 move $s8, $zero - fdiv.d $fa0, $fa0, $fa1 - fadd.d $fa0, $fa2, $fa0 + fld.d $fa0, $sp, 256 + ori $a0, $zero, 0 + lu32i.d $a0, 425984 + fld.d $fa1, $fp, 0 + lu52i.d $a0, $a0, 1033 +.LBB622_115: # %.thread164.i + # in Loop: Header=BB622_32 Depth=1 + movgr2fr.d $fa2, $a0 + fdiv.d $fa0, $fa0, $fa2 b .LBB622_27 -.LBB622_115: # in Loop: Header=BB622_31 Depth=1 +.LBB622_116: # in Loop: Header=BB622_32 Depth=1 ld.bu $a0, $fp, 42 - beqz $a0, .LBB622_108 -# %bb.116: # in Loop: Header=BB622_31 Depth=1 + beqz $a0, .LBB622_109 +# %bb.117: # in Loop: Header=BB622_32 Depth=1 fld.d $fa0, $fp, 0 - pcalau12i $a0, %pc_hi20(.LCPI622_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI622_2) - pcalau12i $a0, %pc_hi20(.LCPI622_10) - fld.d $fa2, $a0, %pc_lo12(.LCPI622_10) + ori $a0, $zero, 0 + lu32i.d $a0, 333824 + lu52i.d $a0, $a0, 1039 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 - fadd.d $fa0, $fa0, $fa2 - b .LBB622_106 -.LBB622_117: # in Loop: Header=BB622_31 Depth=1 + fadd.d $fa0, $fa0, $fs3 + b .LBB622_107 +.LBB622_118: # in Loop: Header=BB622_32 Depth=1 st.w $s4, $fp, 16 - b .LBB622_30 -.LBB622_118: # in Loop: Header=BB622_31 Depth=1 + b .LBB622_31 +.LBB622_119: # in Loop: Header=BB622_32 Depth=1 move $a0, $fp pcaddu18i $ra, %call36(computeYMD) jirl $ra, $ra, 0 ori $a0, $zero, 1 lu32i.d $a0, 1 st.d $a0, $fp, 12 - b .LBB622_30 -.LBB622_119: # in Loop: Header=BB622_31 Depth=1 + b .LBB622_31 +.LBB622_120: # in Loop: Header=BB622_32 Depth=1 ori $a1, $zero, 12 sub.d $a1, $a1, $a0 bstrpick.d $a1, $a1, 31, 0 @@ -156433,7 +156385,7 @@ isDate: # @isDate mul.d $a1, $a1, $a2 srli.d $a1, $a1, 35 sub.d $a1, $zero, $a1 -.LBB622_120: # in Loop: Header=BB622_31 Depth=1 +.LBB622_121: # in Loop: Header=BB622_32 Depth=1 ld.w $a2, $fp, 8 add.d $a2, $a2, $a1 st.w $a2, $fp, 8 @@ -156450,142 +156402,129 @@ isDate: # @isDate movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 fcmp.ceq.d $fcc0, $fs0, $fa0 - bcnez $fcc0, .LBB622_122 -# %bb.121: # in Loop: Header=BB622_31 Depth=1 + bcnez $fcc0, .LBB622_123 +# %bb.122: # in Loop: Header=BB622_32 Depth=1 fld.d $fa1, $fp, 0 move $s8, $zero fsub.d $fa0, $fs0, $fa0 vldi $vr2, -962 fmadd.d $fa0, $fa0, $fa2, $fa1 - b .LBB622_27 -.LBB622_122: # %.thread164.i - # in Loop: Header=BB622_31 Depth=1 - move $s8, $zero b .LBB622_28 -.LBB622_123: +.LBB622_123: # %.thread164.i + # in Loop: Header=BB622_32 Depth=1 + move $s8, $zero + b .LBB622_29 +.LBB622_124: move $s4, $zero -.LBB622_124: # %parseDateOrTime.exit.thread +.LBB622_125: # %parseDateOrTime.exit.thread move $a0, $s4 - fld.d $fs0, $sp, 272 # 8-byte Folded Reload - ld.d $s8, $sp, 280 # 8-byte Folded Reload - ld.d $s7, $sp, 288 # 8-byte Folded Reload - ld.d $s6, $sp, 296 # 8-byte Folded Reload - ld.d $s5, $sp, 304 # 8-byte Folded Reload - ld.d $s4, $sp, 312 # 8-byte Folded Reload - ld.d $s3, $sp, 320 # 8-byte Folded Reload - ld.d $s2, $sp, 328 # 8-byte Folded Reload - ld.d $s1, $sp, 336 # 8-byte Folded Reload - ld.d $s0, $sp, 344 # 8-byte Folded Reload - ld.d $fp, $sp, 352 # 8-byte Folded Reload - ld.d $ra, $sp, 360 # 8-byte Folded Reload - addi.d $sp, $sp, 368 + fld.d $fs3, $sp, 264 # 8-byte Folded Reload + fld.d $fs2, $sp, 272 # 8-byte Folded Reload + fld.d $fs1, $sp, 280 # 8-byte Folded Reload + fld.d $fs0, $sp, 288 # 8-byte Folded Reload + ld.d $s8, $sp, 296 # 8-byte Folded Reload + ld.d $s7, $sp, 304 # 8-byte Folded Reload + ld.d $s6, $sp, 312 # 8-byte Folded Reload + ld.d $s5, $sp, 320 # 8-byte Folded Reload + ld.d $s4, $sp, 328 # 8-byte Folded Reload + ld.d $s3, $sp, 336 # 8-byte Folded Reload + ld.d $s2, $sp, 344 # 8-byte Folded Reload + ld.d $s1, $sp, 352 # 8-byte Folded Reload + ld.d $s0, $sp, 360 # 8-byte Folded Reload + ld.d $fp, $sp, 368 # 8-byte Folded Reload + ld.d $ra, $sp, 376 # 8-byte Folded Reload + addi.d $sp, $sp, 384 ret .Lfunc_end622: .size isDate, .Lfunc_end622-isDate .section .rodata,"a",@progbits .p2align 2, 0x0 .LJTI622_0: - .word .LBB622_65-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_65-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_65-.LJTI622_0 - .word .LBB622_65-.LJTI622_0 - .word .LBB622_65-.LJTI622_0 - .word .LBB622_65-.LJTI622_0 - .word .LBB622_65-.LJTI622_0 - .word .LBB622_65-.LJTI622_0 - .word .LBB622_65-.LJTI622_0 - .word .LBB622_65-.LJTI622_0 - .word .LBB622_65-.LJTI622_0 - .word .LBB622_65-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_97-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_93-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_107-.LJTI622_0 - .word .LBB622_124-.LJTI622_0 - .word .LBB622_99-.LJTI622_0 + .word .LBB622_66-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_66-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_66-.LJTI622_0 + .word .LBB622_66-.LJTI622_0 + .word .LBB622_66-.LJTI622_0 + .word .LBB622_66-.LJTI622_0 + .word .LBB622_66-.LJTI622_0 + .word .LBB622_66-.LJTI622_0 + .word .LBB622_66-.LJTI622_0 + .word .LBB622_66-.LJTI622_0 + .word .LBB622_66-.LJTI622_0 + .word .LBB622_66-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_98-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_94-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_108-.LJTI622_0 + .word .LBB622_125-.LJTI622_0 + .word .LBB622_100-.LJTI622_0 .LJTI622_1: - .word .LBB622_78-.LJTI622_1 - .word .LBB622_88-.LJTI622_1 - .word .LBB622_80-.LJTI622_1 - .word .LBB622_85-.LJTI622_1 - # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function computeJD -.LCPI623_0: - .dword 0x4076d40000000000 # double 365.25 -.LCPI623_1: - .dword 0x403e99a027525461 # double 30.600100000000001 -.LCPI623_2: - .dword 0xc097d20000000000 # double -1524.5 -.LCPI623_3: - .dword 0x404e000000000000 # double 60 -.LCPI623_4: - .dword 0x40ac200000000000 # double 3600 -.LCPI623_5: - .dword 0x40f5180000000000 # double 86400 -.LCPI623_6: - .dword 0xc0f5180000000000 # double -86400 + .word .LBB622_79-.LJTI622_1 + .word .LBB622_89-.LJTI622_1 + .word .LBB622_81-.LJTI622_1 + .word .LBB622_86-.LJTI622_1 + # -- End function .text - .p2align 5 + .p2align 5 # -- Begin function computeJD .type computeJD,@function computeJD: # @computeJD # %bb.0: @@ -156598,14 +156537,14 @@ computeJD: # @computeJD ori $a1, $zero, 1 beqz $a2, .LBB623_4 # %bb.3: - ld.w $a3, $a0, 8 - ld.w $a2, $a0, 16 + ld.w $a2, $a0, 8 + ld.w $a3, $a0, 16 ld.w $a4, $a0, 12 - addi.w $a2, $a2, 2 + addi.w $a3, $a3, 2 b .LBB623_5 .LBB623_4: - ori $a3, $zero, 2000 - ori $a2, $zero, 3 + ori $a2, $zero, 2000 + ori $a3, $zero, 3 ori $a4, $zero, 1 .LBB623_5: slti $a5, $a4, 3 @@ -156613,8 +156552,8 @@ computeJD: # @computeJD masknez $a4, $a4, $a5 maskeqz $a6, $a6, $a5 or $a4, $a6, $a4 - sub.d $a3, $a3, $a5 - addi.w $a5, $a3, 0 + sub.d $a2, $a2, $a5 + addi.w $a5, $a2, 0 lu12i.w $a6, -335545 ori $a6, $a6, 2785 mul.d $a6, $a5, $a6 @@ -156627,68 +156566,83 @@ computeJD: # @computeJD srli.d $a7, $a5, 63 srai.d $a5, $a5, 39 add.d $a5, $a5, $a7 - pcalau12i $a7, %pc_hi20(.LCPI623_0) - fld.d $fa0, $a7, %pc_lo12(.LCPI623_0) lu12i.w $a7, 1 ori $a7, $a7, 620 - add.d $a3, $a3, $a7 - movgr2fr.w $fa1, $a3 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + add.d $a2, $a2, $a7 + movgr2fr.w $fa0, $a2 + ffint.d.w $fa0, $fa0 + ori $a2, $zero, 0 + ori $a7, $zero, 0 + lu32i.d $a7, 447488 + lu52i.d $a7, $a7, 1031 + movgr2fr.d $fa1, $a7 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a3, $fa0 - pcalau12i $a7, %pc_hi20(.LCPI623_1) - fld.d $fa0, $a7, %pc_lo12(.LCPI623_1) + movfr2gr.s $a7, $fa0 addi.d $a4, $a4, 1 - movgr2fr.w $fa1, $a4 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a4 + ffint.d.w $fa0, $fa0 + lu12i.w $a4, 161061 + ori $a4, $a4, 1121 + lu32i.d $a4, -91744 + lu52i.d $a4, $a4, 1027 + movgr2fr.d $fa1, $a4 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a4, $fa0 - add.d $a2, $a2, $a6 - add.d $a2, $a2, $a5 - add.d $a2, $a2, $a4 - add.d $a2, $a2, $a3 - pcalau12i $a3, %pc_hi20(.LCPI623_2) - fld.d $fa0, $a3, %pc_lo12(.LCPI623_2) - movgr2fr.w $fa1, $a2 - ld.bu $a2, $a0, 41 - ffint.d.w $fa1, $fa1 - fadd.d $fa0, $fa1, $fa0 + add.d $a3, $a3, $a6 + add.d $a3, $a3, $a5 + add.d $a3, $a3, $a4 + add.d $a3, $a3, $a7 + movgr2fr.w $fa0, $a3 + ffint.d.w $fa0, $fa0 + ori $a3, $zero, 0 + lu32i.d $a3, 512512 + lu52i.d $a3, $a3, -1015 + ld.bu $a4, $a0, 41 + movgr2fr.d $fa1, $a3 + fadd.d $fa0, $fa0, $fa1 fst.d $fa0, $a0, 0 st.b $a1, $a0, 42 - beqz $a2, .LBB623_1 + beqz $a4, .LBB623_1 # %bb.6: ld.w $a1, $a0, 20 + ld.w $a3, $a0, 24 movgr2fr.w $fa1, $a1 - ld.w $a1, $a0, 24 - pcalau12i $a2, %pc_hi20(.LCPI623_3) - fld.d $fa2, $a2, %pc_lo12(.LCPI623_3) ffint.d.w $fa1, $fa1 - movgr2fr.w $fa3, $a1 - ffint.d.w $fa3, $fa3 - fmul.d $fa2, $fa3, $fa2 - pcalau12i $a1, %pc_hi20(.LCPI623_4) - fld.d $fa3, $a1, %pc_lo12(.LCPI623_4) - fld.d $fa4, $a0, 32 - pcalau12i $a1, %pc_hi20(.LCPI623_5) - fld.d $fa5, $a1, %pc_lo12(.LCPI623_5) - fmadd.d $fa1, $fa1, $fa3, $fa2 - ld.bu $a1, $a0, 43 - fadd.d $fa1, $fa4, $fa1 - fdiv.d $fa1, $fa1, $fa5 + movgr2fr.w $fa2, $a3 + ffint.d.w $fa2, $fa2 + ori $a1, $zero, 0 + lu32i.d $a1, -131072 + lu52i.d $a1, $a1, 1028 + movgr2fr.d $fa3, $a1 + fmul.d $fa2, $fa2, $fa3 + ori $a1, $zero, 0 + lu32i.d $a1, -253952 + fld.d $fa3, $a0, 32 + lu52i.d $a1, $a1, 1034 + movgr2fr.d $fa4, $a1 + fmadd.d $fa1, $fa1, $fa4, $fa2 + fadd.d $fa1, $fa3, $fa1 + lu32i.d $a2, 333824 + lu52i.d $a1, $a2, 1039 + ld.bu $a2, $a0, 43 + movgr2fr.d $fa2, $a1 + fdiv.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa0, $fa1 fst.d $fa0, $a0, 0 - beqz $a1, .LBB623_1 + beqz $a2, .LBB623_1 # %bb.7: ld.w $a1, $a0, 28 ori $a2, $zero, 60 - pcalau12i $a3, %pc_hi20(.LCPI623_6) - fld.d $fa1, $a3, %pc_lo12(.LCPI623_6) mul.d $a1, $a1, $a2 - movgr2fr.w $fa2, $a1 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a1 + ffint.d.w $fa1, $fa1 + ori $a1, $zero, 0 + lu32i.d $a1, 333824 + lu52i.d $a1, $a1, -1009 + movgr2fr.d $fa2, $a1 + fdiv.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa0, $fa1 fst.d $fa0, $a0, 0 st.h $zero, $a0, 40 @@ -156697,12 +156651,7 @@ computeJD: # @computeJD .Lfunc_end623: .size computeJD, .Lfunc_end623-computeJD # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function parseHhMmSs -.LCPI624_0: - .dword 0xc048000000000000 # double -48 - .text - .p2align 5 + .p2align 5 # -- Begin function parseHhMmSs .type parseHhMmSs,@function parseHhMmSs: # @parseHhMmSs # %bb.0: @@ -156768,11 +156717,13 @@ parseHhMmSs: # @parseHhMmSs bgez $a2, .LBB624_9 # %bb.5: # %.lr.ph.preheader addi.d $s0, $s1, 9 - pcalau12i $a2, %pc_hi20(.LCPI624_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI624_0) - movgr2fr.d $fa2, $zero - vldi $vr1, -912 - vldi $vr3, -988 + movgr2fr.d $fa1, $zero + vldi $vr0, -912 + vldi $vr2, -988 + ori $a2, $zero, 0 + lu32i.d $a2, -524288 + lu52i.d $a2, $a2, -1020 + movgr2fr.d $fa3, $a2 .p2align 4, , 16 .LBB624_6: # %.lr.ph # =>This Inner Loop Header: Depth=1 @@ -156782,14 +156733,14 @@ parseHhMmSs: # @parseHhMmSs ffint.d.w $fa4, $fa4 slli.d $a2, $a1, 1 ldx.hu $a2, $a0, $a2 - fmadd.d $fa2, $fa2, $fa3, $fa4 - fadd.d $fa2, $fa2, $fa0 - fmul.d $fa1, $fa1, $fa3 + fmadd.d $fa1, $fa1, $fa2, $fa4 + fadd.d $fa1, $fa1, $fa3 + fmul.d $fa0, $fa0, $fa2 slli.d $a2, $a2, 52 addi.d $s0, $s0, 1 bltz $a2, .LBB624_6 # %bb.7: # %._crit_edge - fdiv.d $fs0, $fa2, $fa1 + fdiv.d $fs0, $fa1, $fa0 b .LBB624_9 .LBB624_8: addi.d $s0, $s1, 5 @@ -157001,28 +156952,7 @@ getDigits: # @getDigits .Lfunc_end625: .size getDigits, .Lfunc_end625-getDigits # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function localtimeOffset -.LCPI626_0: - .dword 0x4076d40000000000 # double 365.25 -.LCPI626_1: - .dword 0x403e99a027525461 # double 30.600100000000001 -.LCPI626_2: - .dword 0xc097d20000000000 # double -1524.5 -.LCPI626_3: - .dword 0x4194997000000000 # double 8.64E+7 -.LCPI626_4: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI626_5: - .dword 0x404e000000000000 # double 60 -.LCPI626_6: - .dword 0x40ac200000000000 # double 3600 -.LCPI626_7: - .dword 0x40f5180000000000 # double 86400 -.LCPI626_8: - .dword 0xc1429ec5c0000000 # double -2440587.5 - .text - .p2align 5 + .p2align 5 # -- Begin function localtimeOffset .type localtimeOffset,@function localtimeOffset: # @localtimeOffset # %bb.0: @@ -157048,233 +156978,262 @@ localtimeOffset: # @localtimeOffset addi.d $a0, $sp, 16 pcaddu18i $ra, %call36(computeYMD) jirl $ra, $ra, 0 - ld.bu $a3, $sp, 57 + ld.bu $a1, $sp, 57 lu12i.w $s1, -335545 lu12i.w $s0, 335544 lu12i.w $fp, 1 - pcalau12i $a2, %pc_hi20(.LCPI626_0) - pcalau12i $a1, %pc_hi20(.LCPI626_1) - pcalau12i $a0, %pc_hi20(.LCPI626_2) - beqz $a3, .LBB626_3 + lu12i.w $a0, 161061 + beqz $a1, .LBB626_3 # %bb.1: # %computeYMD_HMS.exit - ld.w $a3, $sp, 24 - addi.w $a4, $a3, -2038 - addi.w $a5, $zero, -68 - bltu $a5, $a4, .LBB626_7 + ld.w $a1, $sp, 24 + addi.w $a2, $a1, -2038 + addi.w $a3, $zero, -68 + bltu $a3, $a2, .LBB626_7 .LBB626_2: st.d $zero, $sp, 36 - ori $a3, $zero, 2000 - ori $a4, $zero, 1 - ori $a5, $zero, 3 + ori $a1, $zero, 2000 + ori $a2, $zero, 1 + ori $a3, $zero, 3 movgr2fr.d $fa0, $zero b .LBB626_8 .LBB626_3: - ld.bu $a3, $sp, 58 - beqz $a3, .LBB626_5 + ld.bu $a1, $sp, 58 + beqz $a1, .LBB626_5 # %bb.4: # %.computeJD.exit30_crit_edge fld.d $fa0, $sp, 16 b .LBB626_6 .LBB626_5: - ld.bu $a3, $sp, 56 - ld.w $a4, $sp, 32 - sltui $a3, $a3, 1 - ld.w $a5, $sp, 24 - ld.w $a6, $sp, 28 - addi.w $a4, $a4, 2 - ori $a7, $zero, 3 - maskeqz $a7, $a7, $a3 + ld.bu $a1, $sp, 56 + ld.w $a2, $sp, 32 + sltui $a1, $a1, 1 + ld.w $a3, $sp, 24 + ld.w $a4, $sp, 28 + addi.w $a2, $a2, 2 + ori $a5, $zero, 3 + maskeqz $a5, $a5, $a1 + masknez $a2, $a2, $a1 + or $a2, $a5, $a2 + masknez $a4, $a4, $a1 + ori $a5, $zero, 1 + maskeqz $a5, $a5, $a1 + or $a4, $a5, $a4 + masknez $a3, $a3, $a1 + ori $a5, $zero, 2000 + maskeqz $a1, $a5, $a1 + or $a1, $a1, $a3 + slti $a3, $a4, 3 + addi.d $a5, $a4, 12 masknez $a4, $a4, $a3 - or $a4, $a7, $a4 - masknez $a6, $a6, $a3 - ori $a7, $zero, 1 - maskeqz $a7, $a7, $a3 - or $a6, $a7, $a6 - masknez $a5, $a5, $a3 - ori $a7, $zero, 2000 - maskeqz $a3, $a7, $a3 - or $a3, $a3, $a5 - slti $a5, $a6, 3 - addi.d $a7, $a6, 12 - masknez $a6, $a6, $a5 - maskeqz $a7, $a7, $a5 - or $a6, $a7, $a6 - sub.d $a3, $a3, $a5 - addi.w $a5, $a3, 0 - ori $a7, $s1, 2785 - mul.d $a7, $a5, $a7 - srli.d $t0, $a7, 63 - srai.d $a7, $a7, 37 - add.d $a7, $a7, $t0 - ori $t0, $s0, 1311 - mul.d $a5, $a5, $t0 - srli.d $t0, $a5, 63 - srai.d $a5, $a5, 39 - add.d $a5, $a5, $t0 - ori $t0, $fp, 620 - fld.d $fa0, $a2, %pc_lo12(.LCPI626_0) - add.d $a3, $a3, $t0 - movgr2fr.w $fa1, $a3 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 - ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a3, $fa0 - fld.d $fa0, $a1, %pc_lo12(.LCPI626_1) - addi.d $a6, $a6, 1 - movgr2fr.w $fa1, $a6 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + maskeqz $a5, $a5, $a3 + or $a4, $a5, $a4 + sub.d $a1, $a1, $a3 + addi.w $a3, $a1, 0 + ori $a5, $s1, 2785 + mul.d $a5, $a3, $a5 + srli.d $a6, $a5, 63 + srai.d $a5, $a5, 37 + add.d $a5, $a5, $a6 + ori $a6, $s0, 1311 + mul.d $a3, $a3, $a6 + srli.d $a6, $a3, 63 + srai.d $a3, $a3, 39 + add.d $a3, $a3, $a6 + ori $a6, $fp, 620 + add.d $a1, $a1, $a6 + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 + ori $a1, $zero, 0 + ori $a6, $zero, 0 + lu32i.d $a6, 447488 + lu52i.d $a6, $a6, 1031 + movgr2fr.d $fa1, $a6 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a6, $fa0 - add.d $a4, $a7, $a4 - add.d $a4, $a4, $a5 - add.d $a4, $a4, $a6 - fld.d $fa0, $a0, %pc_lo12(.LCPI626_2) - add.d $a3, $a4, $a3 - movgr2fr.w $fa1, $a3 - ffint.d.w $fa1, $fa1 - fadd.d $fa0, $fa1, $fa0 + addi.d $a4, $a4, 1 + movgr2fr.w $fa0, $a4 + ffint.d.w $fa0, $fa0 + ori $a4, $a0, 1121 + lu32i.d $a4, -91744 + lu52i.d $a4, $a4, 1027 + movgr2fr.d $fa1, $a4 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a4, $fa0 + add.d $a2, $a5, $a2 + add.d $a2, $a2, $a3 + add.d $a2, $a2, $a4 + add.d $a2, $a2, $a6 + movgr2fr.w $fa0, $a2 + ffint.d.w $fa0, $fa0 + lu32i.d $a1, 512512 + lu52i.d $a1, $a1, -1015 + movgr2fr.d $fa1, $a1 + fadd.d $fa0, $fa0, $fa1 .LBB626_6: # %computeJD.exit30 vldi $vr1, -928 fadd.d $fa0, $fa0, $fa1 ftintrz.w.d $fa2, $fa0 - movfr2gr.s $a3, $fa2 - pcalau12i $a4, %pc_hi20(.LCPI626_3) - fld.d $fa2, $a4, %pc_lo12(.LCPI626_3) - movgr2fr.w $fa3, $a3 - ffint.d.w $fa3, $fa3 - fsub.d $fa0, $fa0, $fa3 + movfr2gr.s $a1, $fa2 + movgr2fr.w $fa2, $a1 + ffint.d.w $fa2, $fa2 + fsub.d $fa0, $fa0, $fa2 + ori $a1, $zero, 0 + lu32i.d $a1, 301424 + lu52i.d $a1, $a1, 1049 + movgr2fr.d $fa2, $a1 fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 - pcalau12i $a3, %pc_hi20(.LCPI626_4) - fld.d $fa1, $a3, %pc_lo12(.LCPI626_4) - movfr2gr.s $a3, $fa0 - movgr2fr.w $fa0, $a3 + movfr2gr.s $a1, $fa0 + movgr2fr.w $fa0, $a1 ffint.d.w $fa0, $fa0 + lu12i.w $a1, -184550 + ori $a1, $a1, 2556 + lu32i.d $a1, 25165 + lu52i.d $a1, $a1, 1013 + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa1, $fa0 - movfr2gr.s $a3, $fa1 - movgr2fr.w $fa1, $a3 + movfr2gr.s $a1, $fa1 + movgr2fr.w $fa1, $a1 ffint.d.w $fa1, $fa1 fsub.d $fa0, $fa0, $fa1 - lu12i.w $a4, -452053 - ori $a4, $a4, 965 - mul.d $a4, $a3, $a4 - srli.d $a4, $a4, 32 - add.w $a4, $a4, $a3 - bstrpick.d $a5, $a4, 31, 31 - srai.d $a4, $a4, 11 - add.d $a4, $a4, $a5 - st.w $a4, $sp, 36 - lu12i.w $a5, -1 - ori $a5, $a5, 496 - mul.d $a4, $a4, $a5 - add.d $a3, $a4, $a3 - addi.w $a4, $a3, 0 - lu12i.w $a5, -489336 - ori $a5, $a5, 2185 - mul.d $a4, $a4, $a5 - srli.d $a4, $a4, 32 - add.w $a4, $a4, $a3 - bstrpick.d $a5, $a4, 31, 31 - srli.d $a4, $a4, 5 - add.d $a4, $a4, $a5 - st.w $a4, $sp, 40 - addi.w $a5, $zero, -60 - mul.d $a4, $a4, $a5 - add.d $a3, $a4, $a3 - movgr2fr.w $fa1, $a3 + lu12i.w $a2, -452053 + ori $a2, $a2, 965 + mul.d $a2, $a1, $a2 + srli.d $a2, $a2, 32 + add.w $a2, $a2, $a1 + bstrpick.d $a3, $a2, 31, 31 + srai.d $a2, $a2, 11 + add.d $a2, $a2, $a3 + st.w $a2, $sp, 36 + lu12i.w $a3, -1 + ori $a3, $a3, 496 + mul.d $a2, $a2, $a3 + add.d $a1, $a2, $a1 + addi.w $a2, $a1, 0 + lu12i.w $a3, -489336 + ori $a3, $a3, 2185 + mul.d $a2, $a2, $a3 + srli.d $a2, $a2, 32 + add.w $a2, $a2, $a1 + bstrpick.d $a3, $a2, 31, 31 + srli.d $a2, $a2, 5 + add.d $a2, $a2, $a3 + st.w $a2, $sp, 40 + addi.w $a3, $zero, -60 + mul.d $a2, $a2, $a3 + add.d $a1, $a2, $a1 + movgr2fr.w $fa1, $a1 ffint.d.w $fa1, $fa1 fadd.d $fa0, $fa0, $fa1 fst.d $fa0, $sp, 48 - ld.w $a3, $sp, 24 - addi.w $a4, $a3, -2038 - addi.w $a5, $zero, -68 - bgeu $a5, $a4, .LBB626_2 + ld.w $a1, $sp, 24 + addi.w $a2, $a1, -2038 + addi.w $a3, $zero, -68 + bgeu $a3, $a2, .LBB626_2 .LBB626_7: fld.d $fa0, $sp, 48 vldi $vr1, -928 fadd.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a5, $fa0 - ld.w $a6, $sp, 32 - ld.w $a4, $sp, 28 - movgr2fr.w $fa0, $a5 + movfr2gr.s $a3, $fa0 + ld.w $a4, $sp, 32 + ld.w $a2, $sp, 28 + movgr2fr.w $fa0, $a3 ffint.d.w $fa0, $fa0 - addi.w $a5, $a6, 2 + addi.w $a3, $a4, 2 .LBB626_8: # %computeJD.exit19 - ld.bu $a6, $sp, 56 - sltui $a6, $a6, 1 - masknez $a5, $a5, $a6 - ori $a7, $zero, 3 - maskeqz $a7, $a7, $a6 - or $a5, $a7, $a5 - masknez $a4, $a4, $a6 + ld.bu $a4, $sp, 56 + sltui $a4, $a4, 1 + masknez $a3, $a3, $a4 + ori $a5, $zero, 3 + maskeqz $a5, $a5, $a4 + or $a3, $a5, $a3 + masknez $a2, $a2, $a4 ori $s2, $zero, 1 - maskeqz $a7, $s2, $a6 - or $a4, $a7, $a4 - masknez $a3, $a3, $a6 - ori $a7, $zero, 2000 - maskeqz $a6, $a7, $a6 - or $a3, $a6, $a3 - slti $a6, $a4, 3 - addi.d $a7, $a4, 12 - masknez $a4, $a4, $a6 - maskeqz $a7, $a7, $a6 - or $a4, $a7, $a4 - sub.d $a3, $a3, $a6 - bstrpick.d $a6, $a3, 15, 2 - bstrpick.d $a6, $a6, 61, 0 - ori $a7, $fp, 1147 - mul.d $a6, $a6, $a7 - srli.d $a6, $a6, 17 - bstrpick.d $a7, $a3, 15, 4 - ori $t0, $zero, 2622 - mul.d $a7, $a7, $t0 - srli.d $a7, $a7, 16 - ori $t0, $fp, 620 - add.d $a3, $a3, $t0 - fld.d $fs0, $a2, %pc_lo12(.LCPI626_0) - bstrpick.d $a2, $a3, 31, 0 - movgr2fr.d $fa1, $a2 + maskeqz $a5, $s2, $a4 + or $a2, $a5, $a2 + masknez $a1, $a1, $a4 + ori $a5, $zero, 2000 + maskeqz $a4, $a5, $a4 + or $a1, $a4, $a1 + slti $a4, $a2, 3 + addi.d $a5, $a2, 12 + masknez $a2, $a2, $a4 + maskeqz $a5, $a5, $a4 + or $a2, $a5, $a2 + sub.d $a1, $a1, $a4 + bstrpick.d $a4, $a1, 15, 2 + bstrpick.d $a4, $a4, 61, 0 + ori $a5, $fp, 1147 + mul.d $a4, $a4, $a5 + srli.d $a4, $a4, 17 + bstrpick.d $a5, $a1, 15, 4 + ori $a6, $zero, 2622 + mul.d $a5, $a5, $a6 + srli.d $a5, $a5, 16 + ori $a6, $fp, 620 + add.d $a1, $a1, $a6 + bstrpick.d $a1, $a1, 31, 0 + movgr2fr.d $fa1, $a1 ffint.d.l $fa1, $fa1 + ori $a1, $zero, 0 + ori $a6, $zero, 0 + lu32i.d $a6, 447488 + lu52i.d $a6, $a6, 1031 + movgr2fr.d $fs0, $a6 fmul.d $fa1, $fa1, $fs0 ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a2, $fa1 - fld.d $fs1, $a1, %pc_lo12(.LCPI626_1) - addi.d $a1, $a4, 1 - movgr2fr.w $fa1, $a1 + movfr2gr.s $a6, $fa1 + addi.d $a2, $a2, 1 + movgr2fr.w $fa1, $a2 ffint.d.w $fa1, $fa1 + ori $a0, $a0, 1121 + lu32i.d $a0, -91744 + lu52i.d $a0, $a0, 1027 + movgr2fr.d $fs1, $a0 fmul.d $fa1, $fa1, $fs1 ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a1, $fa1 - sub.d $a3, $a5, $a6 - add.d $a3, $a3, $a7 - add.d $a1, $a3, $a1 - add.d $a1, $a1, $a2 - ld.w $a2, $sp, 36 - movgr2fr.w $fa1, $a1 + movfr2gr.s $a0, $fa1 + sub.d $a2, $a3, $a4 + add.d $a2, $a2, $a5 + add.d $a0, $a2, $a0 + add.d $a0, $a0, $a6 + movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 - fld.d $fs2, $a0, %pc_lo12(.LCPI626_2) + ori $a0, $zero, 0 + lu32i.d $a0, 512512 + lu52i.d $a0, $a0, -1015 + ld.w $a2, $sp, 36 + movgr2fr.d $fs2, $a0 + ld.w $a0, $sp, 40 + fadd.d $fa1, $fa1, $fs2 movgr2fr.w $fa2, $a2 ffint.d.w $fa2, $fa2 - ld.w $a0, $sp, 40 - pcalau12i $a1, %pc_hi20(.LCPI626_5) - fld.d $fs3, $a1, %pc_lo12(.LCPI626_5) - pcalau12i $a1, %pc_hi20(.LCPI626_6) - fld.d $fs4, $a1, %pc_lo12(.LCPI626_6) movgr2fr.w $fa3, $a0 ffint.d.w $fa3, $fa3 + ori $a0, $zero, 0 + lu32i.d $a0, -131072 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs3, $a0 fmul.d $fa3, $fa3, $fs3 + ori $a0, $zero, 0 + lu32i.d $a0, -253952 + lu52i.d $a0, $a0, 1034 + movgr2fr.d $fs4, $a0 fmadd.d $fa2, $fa2, $fs4, $fa3 fadd.d $fa0, $fa0, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI626_7) - fld.d $fs5, $a0, %pc_lo12(.LCPI626_7) - pcalau12i $a0, %pc_hi20(.LCPI626_8) - fld.d $fa2, $a0, %pc_lo12(.LCPI626_8) - fadd.d $fa1, $fa1, $fs2 + lu32i.d $a1, 333824 + lu52i.d $a0, $a1, 1039 + movgr2fr.d $fs5, $a0 fdiv.d $fa0, $fa0, $fs5 fadd.d $fs6, $fa1, $fa0 - fadd.d $fa0, $fs6, $fa2 + lu12i.w $a0, -262144 + lu32i.d $a0, 171717 + lu52i.d $a0, $a0, -1004 + movgr2fr.d $fa0, $a0 + fadd.d $fa0, $fs6, $fa0 vldi $vr1, -928 fmadd.d $fa0, $fa0, $fs5, $fa1 ftintrz.l.d $fa0, $fa0 @@ -157356,14 +157315,7 @@ localtimeOffset: # @localtimeOffset .Lfunc_end626: .size localtimeOffset, .Lfunc_end626-localtimeOffset # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function computeYMD_HMS -.LCPI627_0: - .dword 0x4194997000000000 # double 8.64E+7 -.LCPI627_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 - .text - .p2align 5 + .p2align 5 # -- Begin function computeYMD_HMS .type computeYMD_HMS,@function computeYMD_HMS: # @computeYMD_HMS # %bb.0: @@ -157389,18 +157341,23 @@ computeYMD_HMS: # @computeYMD_HMS fadd.d $fa0, $fa0, $fa1 ftintrz.w.d $fa2, $fa0 movfr2gr.s $a0, $fa2 - pcalau12i $a1, %pc_hi20(.LCPI627_0) - fld.d $fa2, $a1, %pc_lo12(.LCPI627_0) - movgr2fr.w $fa3, $a0 - ffint.d.w $fa3, $fa3 - fsub.d $fa0, $fa0, $fa3 + movgr2fr.w $fa2, $a0 + ffint.d.w $fa2, $fa2 + fsub.d $fa0, $fa0, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, 301424 + lu52i.d $a0, $a0, 1049 + movgr2fr.d $fa2, $a0 fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI627_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI627_1) movfr2gr.s $a0, $fa0 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa1, $fa0 movfr2gr.s $a0, $fa1 @@ -157446,20 +157403,7 @@ computeYMD_HMS: # @computeYMD_HMS .Lfunc_end627: .size computeYMD_HMS, .Lfunc_end627-computeYMD_HMS # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function computeYMD -.LCPI628_0: - .dword 0xc13c7dd040000000 # double -1867216.25 -.LCPI628_1: - .dword 0x40e1d58800000000 # double 36524.25 -.LCPI628_2: - .dword 0xc05e866666666666 # double -122.09999999999999 -.LCPI628_3: - .dword 0x4076d40000000000 # double 365.25 -.LCPI628_4: - .dword 0x403e99a027525461 # double 30.600100000000001 - .text - .p2align 5 + .p2align 5 # -- Begin function computeYMD .type computeYMD,@function computeYMD: # @computeYMD # %bb.0: @@ -157477,29 +157421,38 @@ computeYMD: # @computeYMD fadd.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a2, $fa0 - pcalau12i $a3, %pc_hi20(.LCPI628_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI628_0) - pcalau12i $a3, %pc_hi20(.LCPI628_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI628_1) - movgr2fr.w $fa2, $a2 - ffint.d.w $fa2, $fa2 - fadd.d $fa0, $fa2, $fa0 + movgr2fr.w $fa0, $a2 + ffint.d.w $fa0, $fa0 + lu12i.w $a3, 262144 + lu32i.d $a3, -229936 + lu52i.d $a3, $a3, -1005 + movgr2fr.d $fa1, $a3 + fadd.d $fa0, $fa0, $fa1 + ori $a3, $zero, 0 + ori $a4, $zero, 0 + lu32i.d $a4, 120200 + lu52i.d $a4, $a4, 1038 + movgr2fr.d $fa1, $a4 fdiv.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a3, $fa0 - bstrpick.d $a4, $a3, 62, 61 - add.w $a4, $a3, $a4 - srli.d $a4, $a4, 2 - add.d $a2, $a2, $a3 - sub.w $a2, $a2, $a4 + movfr2gr.s $a4, $fa0 + bstrpick.d $a5, $a4, 62, 61 + add.w $a5, $a4, $a5 + srli.d $a5, $a5, 2 + add.d $a2, $a2, $a4 + sub.w $a2, $a2, $a5 addi.d $a2, $a2, 1525 - pcalau12i $a3, %pc_hi20(.LCPI628_2) - fld.d $fa0, $a3, %pc_lo12(.LCPI628_2) - pcalau12i $a3, %pc_hi20(.LCPI628_3) - fld.d $fa1, $a3, %pc_lo12(.LCPI628_3) - movgr2fr.w $fa2, $a2 - ffint.d.w $fa2, $fa2 - fadd.d $fa0, $fa2, $fa0 + movgr2fr.w $fa0, $a2 + ffint.d.w $fa0, $fa0 + lu12i.w $a4, 419430 + ori $a4, $a4, 1638 + lu32i.d $a4, -96666 + lu52i.d $a4, $a4, -1019 + movgr2fr.d $fa1, $a4 + fadd.d $fa0, $fa0, $fa1 + lu32i.d $a3, 447488 + lu52i.d $a3, $a3, 1031 + movgr2fr.d $fa1, $a3 fdiv.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a4, $fa0 @@ -157508,17 +157461,20 @@ computeYMD: # @computeYMD fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a3, $fa0 - pcalau12i $a5, %pc_hi20(.LCPI628_4) - fld.d $fa0, $a5, %pc_lo12(.LCPI628_4) sub.d $a2, $a2, $a3 - movgr2fr.w $fa1, $a2 - ffint.d.w $fa1, $fa1 - fdiv.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a3, $fa1 - movgr2fr.w $fa1, $a3 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a2 + ffint.d.w $fa0, $fa0 + lu12i.w $a3, 161061 + ori $a3, $a3, 1121 + lu32i.d $a3, -91744 + lu52i.d $a3, $a3, 1027 + movgr2fr.d $fa1, $a3 + fdiv.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a3, $fa0 + movgr2fr.w $fa0, $a3 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a5, $fa0 sub.d $a2, $a2, $a5 diff --git a/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/random.s b/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/random.s index 09eba12b..a5d2eb22 100644 --- a/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/random.s +++ b/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/random.s @@ -20,12 +20,7 @@ hypre_SeedRand: # @hypre_SeedRand .Lfunc_end0: .size hypre_SeedRand, .Lfunc_end0-hypre_SeedRand # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function hypre_Rand -.LCPI1_0: - .dword 0x3f50000000000000 # double 9.765625E-4 - .text - .globl hypre_Rand + .globl hypre_Rand # -- Begin function hypre_Rand .p2align 5 .type hypre_Rand,@function hypre_Rand: # @hypre_Rand @@ -35,17 +30,17 @@ hypre_Rand: # @hypre_Rand lu12i.w $a2, 406 ori $a2, $a2, 1549 mul.d $a1, $a1, $a2 - pcalau12i $a2, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI1_0) addi.w $a2, $a1, 0 bstrpick.d $a2, $a2, 62, 53 add.d $a2, $a1, $a2 bstrpick.d $a2, $a2, 31, 10 slli.d $a2, $a2, 10 sub.d $a1, $a1, $a2 - movgr2fr.w $fa1, $a1 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 + lu52i.d $a2, $zero, 1013 + movgr2fr.d $fa1, $a2 + fmul.d $fa0, $fa0, $fa1 st.w $a1, $a0, %pc_lo12(Seed) ret .Lfunc_end1: diff --git a/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg2000.s b/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg2000.s index dfa680fa..84ba9c04 100644 --- a/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg2000.s +++ b/results/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/smg2000.s @@ -12,10 +12,6 @@ .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 .text .globl main .p2align 5 @@ -1396,8 +1392,11 @@ main: # @main pcaddu18i $ra, %call36(HYPRE_PCGSetMaxIter) jirl $ra, $ra, 0 ld.d $a0, $sp, 240 - pcalau12i $a1, %pc_hi20(.LCPI0_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_3) + lu12i.w $a1, -390306 + ori $a1, $a1, 3469 + lu32i.d $a1, 50935 + lu52i.d $a1, $a1, 1003 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(HYPRE_PCGSetTol) jirl $ra, $ra, 0 ld.d $a0, $sp, 240 @@ -1477,8 +1476,11 @@ main: # @main pcaddu18i $ra, %call36(HYPRE_StructSMGSetMaxIter) jirl $ra, $ra, 0 ld.d $a0, $sp, 240 - pcalau12i $a1, %pc_hi20(.LCPI0_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_3) + lu12i.w $a1, -390306 + ori $a1, $a1, 3469 + lu32i.d $a1, 50935 + lu52i.d $a1, $a1, 1003 + movgr2fr.d $fa0, $a1 pcaddu18i $ra, %call36(HYPRE_StructSMGSetTol) jirl $ra, $ra, 0 ld.d $a0, $sp, 240 diff --git a/results/MultiSource/Benchmarks/ASC_Sequoia/AMGmk/CMakeFiles/AMGmk.dir/csr_matvec.s b/results/MultiSource/Benchmarks/ASC_Sequoia/AMGmk/CMakeFiles/AMGmk.dir/csr_matvec.s index 3dfa476c..8d45285e 100644 --- a/results/MultiSource/Benchmarks/ASC_Sequoia/AMGmk/CMakeFiles/AMGmk.dir/csr_matvec.s +++ b/results/MultiSource/Benchmarks/ASC_Sequoia/AMGmk/CMakeFiles/AMGmk.dir/csr_matvec.s @@ -1,10 +1,6 @@ .file "csr_matvec.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function hypre_CSRMatrixMatvec -.LCPI0_0: - .dword 0x3fe6666666666666 # double 0.69999999999999996 .text - .globl hypre_CSRMatrixMatvec + .globl hypre_CSRMatrixMatvec # -- Begin function hypre_CSRMatrixMatvec .p2align 5 .type hypre_CSRMatrixMatvec,@function hypre_CSRMatrixMatvec: # @hypre_CSRMatrixMatvec @@ -158,12 +154,15 @@ hypre_CSRMatrixMatvec: # @hypre_CSRMatrixMatvec bnez $a0, .LBB0_20 .LBB0_21: # %.loopexit193 movgr2fr.w $fa0, $s7 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) ffint.d.w $fa0, $fa0 - movgr2fr.w $fa2, $s0 - ffint.d.w $fa2, $fa2 - fmul.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $s0 + ffint.d.w $fa1, $fa1 + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, 419430 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 + fmul.d $fa1, $fa1, $fa2 fcmp.clt.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB0_29 # %bb.22: # %.preheader186 diff --git a/results/MultiSource/Benchmarks/ASC_Sequoia/AMGmk/CMakeFiles/AMGmk.dir/main.s b/results/MultiSource/Benchmarks/ASC_Sequoia/AMGmk/CMakeFiles/AMGmk.dir/main.s index f87698c8..1b60eeb7 100644 --- a/results/MultiSource/Benchmarks/ASC_Sequoia/AMGmk/CMakeFiles/AMGmk.dir/main.s +++ b/results/MultiSource/Benchmarks/ASC_Sequoia/AMGmk/CMakeFiles/AMGmk.dir/main.s @@ -133,10 +133,6 @@ main: # @main .LCPI1_0: .dword 0x4018000000000000 # double 6 .dword 0xbff0000000000000 # double -1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI1_1: - .dword 0x412e848000000000 # double 1.0E+6 .text .globl test_Matvec .p2align 5 @@ -216,15 +212,17 @@ test_Matvec: # @test_Matvec ffint.d.l $fa0, $fa0 sub.d $a1, $a3, $a4 movgr2fr.d $fa1, $a1 - pcalau12i $a1, %pc_hi20(.LCPI1_1) - fld.d $fa2, $a1, %pc_lo12(.LCPI1_1) - pcalau12i $a1, %pc_hi20(totalWallTime) - fld.d $fa3, $a1, %pc_lo12(totalWallTime) ffint.d.l $fa1, $fa1 - fdiv.d $fa1, $fa1, $fa2 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 + pcalau12i $a2, %pc_hi20(totalWallTime) + fld.d $fa2, $a2, %pc_lo12(totalWallTime) + movgr2fr.d $fa3, $a1 + fdiv.d $fa1, $fa1, $fa3 fadd.d $fa0, $fa1, $fa0 - fadd.d $fa0, $fa3, $fa0 - fst.d $fa0, $a1, %pc_lo12(totalWallTime) + fadd.d $fa0, $fa2, $fa0 + fst.d $fa0, $a2, %pc_lo12(totalWallTime) sub.d $a0, $a0, $s1 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 @@ -232,7 +230,7 @@ test_Matvec: # @test_Matvec fld.d $fa1, $a2, %pc_lo12(totalCPUTime) ld.d $a0, $sp, 16 ld.d $a1, $sp, 8 - fdiv.d $fa0, $fa0, $fa2 + fdiv.d $fa0, $fa0, $fa3 fadd.d $fa0, $fa0, $fa1 ld.d $a0, $a0, 0 ld.d $a1, $a1, 0 @@ -293,10 +291,6 @@ test_Matvec: # @test_Matvec .LCPI2_0: .dword 0x4018000000000000 # double 6 .dword 0xbff0000000000000 # double -1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI2_1: - .dword 0x412e848000000000 # double 1.0E+6 .text .globl test_Relax .p2align 5 @@ -368,22 +362,24 @@ test_Relax: # @test_Relax ffint.d.l $fa0, $fa0 sub.d $a1, $a3, $a4 movgr2fr.d $fa1, $a1 - pcalau12i $a1, %pc_hi20(.LCPI2_1) - fld.d $fa2, $a1, %pc_lo12(.LCPI2_1) - pcalau12i $a1, %pc_hi20(totalWallTime) - fld.d $fa3, $a1, %pc_lo12(totalWallTime) ffint.d.l $fa1, $fa1 - fdiv.d $fa1, $fa1, $fa2 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 + pcalau12i $a2, %pc_hi20(totalWallTime) + fld.d $fa2, $a2, %pc_lo12(totalWallTime) + movgr2fr.d $fa3, $a1 + fdiv.d $fa1, $fa1, $fa3 fadd.d $fa0, $fa1, $fa0 - fadd.d $fa0, $fa3, $fa0 - fst.d $fa0, $a1, %pc_lo12(totalWallTime) + fadd.d $fa0, $fa2, $fa0 + fst.d $fa0, $a2, %pc_lo12(totalWallTime) sub.d $a0, $a0, $s1 movgr2fr.d $fa0, $a0 pcalau12i $a0, %pc_hi20(totalCPUTime) fld.d $fa1, $a0, %pc_lo12(totalCPUTime) ffint.d.l $fa0, $fa0 ld.d $a1, $sp, 32 - fdiv.d $fa0, $fa0, $fa2 + fdiv.d $fa0, $fa0, $fa3 fadd.d $fa0, $fa0, $fa1 fst.d $fa0, $a0, %pc_lo12(totalCPUTime) ld.d $a0, $a1, 0 @@ -437,14 +433,7 @@ test_Relax: # @test_Relax .Lfunc_end2: .size test_Relax, .Lfunc_end2-test_Relax # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function test_Axpy -.LCPI3_0: - .dword 0xc08f400000000000 # double -1000 -.LCPI3_1: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl test_Axpy + .globl test_Axpy # -- Begin function test_Axpy .p2align 5 .type test_Axpy,@function test_Axpy: # @test_Axpy @@ -504,22 +493,24 @@ test_Axpy: # @test_Axpy jirl $ra, $ra, 0 pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 - ld.d $a1, $s0, 0 - pcalau12i $a2, %pc_hi20(.LCPI3_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI3_0) move $s3, $a0 + ld.d $a0, $s0, 0 movgr2fr.d $fa0, $zero - vldi $vr2, -784 + vldi $vr1, -784 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, -1016 + movgr2fr.d $fa2, $a1 .p2align 4, , 16 .LBB3_3: # =>This Inner Loop Header: Depth=1 - fld.d $fa3, $a1, 0 - fadd.d $fa3, $fa3, $fa2 + fld.d $fa3, $a0, 0 fadd.d $fa3, $fa3, $fa1 + fadd.d $fa3, $fa3, $fa2 fabs.d $fa3, $fa3 fcmp.clt.d $fcc0, $fa0, $fa3 fsel $fa0, $fa0, $fa3, $fcc0 addi.d $s1, $s1, -1 - addi.d $a1, $a1, 8 + addi.d $a0, $a0, 8 bnez $s1, .LBB3_3 # %bb.4: movgr2fr.d $fa1, $zero @@ -541,21 +532,23 @@ test_Axpy: # @test_Axpy ffint.d.l $fa0, $fa0 sub.d $a0, $a2, $a3 movgr2fr.d $fa1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI3_1) - pcalau12i $a0, %pc_hi20(totalWallTime) - fld.d $fa3, $a0, %pc_lo12(totalWallTime) ffint.d.l $fa1, $fa1 - fdiv.d $fa1, $fa1, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + pcalau12i $a1, %pc_hi20(totalWallTime) + fld.d $fa2, $a1, %pc_lo12(totalWallTime) + movgr2fr.d $fa3, $a0 + fdiv.d $fa1, $fa1, $fa3 fadd.d $fa0, $fa1, $fa0 - fadd.d $fa0, $fa3, $fa0 - fst.d $fa0, $a0, %pc_lo12(totalWallTime) + fadd.d $fa0, $fa2, $fa0 + fst.d $fa0, $a1, %pc_lo12(totalWallTime) sub.d $a0, $s3, $s2 pcalau12i $a1, %pc_hi20(totalCPUTime) fld.d $fa0, $a1, %pc_lo12(totalCPUTime) movgr2fr.d $fa1, $a0 ffint.d.l $fa1, $fa1 - fdiv.d $fa1, $fa1, $fa2 + fdiv.d $fa1, $fa1, $fa3 fadd.d $fa0, $fa1, $fa0 fst.d $fa0, $a1, %pc_lo12(totalCPUTime) move $a0, $fp diff --git a/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_div.s b/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_div.s index b7fddd6c..a2483a14 100644 --- a/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_div.s +++ b/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_div.s @@ -1,19 +1,7 @@ .file "Crystal_div.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Crystal_div -.LCPI0_0: - .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI0_1: - .dword 0x3feccccccccccccd # double 0.90000000000000002 -.LCPI0_2: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI0_3: - .dword 0x3ff3333333333333 # double 1.2 -.LCPI0_4: - .dword 0x3f847ae147ae147b # double 0.01 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI0_5: + .p2align 4, 0x0 # -- Begin function Crystal_div +.LCPI0_0: .dword 0 # 0x0 .dword 1 # 0x1 .text @@ -30,56 +18,56 @@ Crystal_div: # @Crystal_div st.d $s1, $sp, 296 # 8-byte Folded Spill ori $t1, $zero, 4 bstrpick.d $t0, $a0, 30, 2 + lu12i.w $t3, -419431 + lu12i.w $t2, -209716 bgeu $a0, $t1, .LBB0_3 # %bb.2: move $t1, $zero b .LBB0_6 .LBB0_3: # %vector.ph - addi.d $t2, $sp, 120 + addi.d $t4, $sp, 120 slli.d $t1, $t0, 2 - addi.d $t3, $sp, 24 - ori $t4, $zero, 0 - ori $t5, $zero, 0 - lu32i.d $t5, 1 - vreplgr2vr.d $vr1, $t5 - lu52i.d $t5, $zero, 1023 - vreplgr2vr.d $vr2, $t5 - lu12i.w $t5, -419431 - ori $t5, $t5, 2458 - lu32i.d $t5, -419431 - lu52i.d $t5, $t5, 1020 - vreplgr2vr.d $vr3, $t5 - lu32i.d $t4, -524288 - lu52i.d $t4, $t4, 1026 - vreplgr2vr.d $vr4, $t4 - lu12i.w $t4, -209716 - ori $t4, $t4, 3277 - lu32i.d $t4, -209716 - lu52i.d $t4, $t4, 1022 - vreplgr2vr.d $vr5, $t4 - move $t4, $t1 + addi.d $t5, $sp, 24 + ori $t6, $zero, 0 + ori $t7, $zero, 0 + lu32i.d $t7, 1 + vreplgr2vr.d $vr1, $t7 + lu52i.d $t7, $zero, 1023 + vreplgr2vr.d $vr2, $t7 + ori $t7, $t3, 2458 + lu32i.d $t7, -419431 + lu52i.d $t7, $t7, 1020 + vreplgr2vr.d $vr3, $t7 + lu32i.d $t6, -524288 + lu52i.d $t6, $t6, 1026 + vreplgr2vr.d $vr4, $t6 + ori $t6, $t2, 3277 + lu32i.d $t6, -209716 + lu52i.d $t6, $t6, 1022 + vreplgr2vr.d $vr5, $t6 + move $t6, $t1 .p2align 4, , 16 .LBB0_4: # %vector.body # =>This Inner Loop Header: Depth=1 vaddi.wu $vr6, $vr1, 2 - vst $vr2, $t2, -16 - vst $vr2, $t2, 0 - vpickve2gr.w $t5, $vr1, 1 - bstrpick.d $t5, $t5, 31, 0 - movgr2fr.d $fa7, $t5 + vst $vr2, $t4, -16 + vst $vr2, $t4, 0 + vpickve2gr.w $t7, $vr1, 1 + bstrpick.d $t7, $t7, 31, 0 + movgr2fr.d $fa7, $t7 ffint.d.l $fa7, $fa7 - vpickve2gr.w $t5, $vr1, 0 - bstrpick.d $t5, $t5, 31, 0 - movgr2fr.d $ft0, $t5 + vpickve2gr.w $t7, $vr1, 0 + bstrpick.d $t7, $t7, 31, 0 + movgr2fr.d $ft0, $t7 ffint.d.l $ft0, $ft0 vextrins.d $vr8, $vr7, 16 - vpickve2gr.w $t5, $vr6, 1 - bstrpick.d $t5, $t5, 31, 0 - movgr2fr.d $fa7, $t5 + vpickve2gr.w $t7, $vr6, 1 + bstrpick.d $t7, $t7, 31, 0 + movgr2fr.d $fa7, $t7 ffint.d.l $fa7, $fa7 - vpickve2gr.w $t5, $vr6, 0 - bstrpick.d $t5, $t5, 31, 0 - movgr2fr.d $fa6, $t5 + vpickve2gr.w $t7, $vr6, 0 + bstrpick.d $t7, $t7, 31, 0 + movgr2fr.d $fa6, $t7 ffint.d.l $fa6, $fa6 vextrins.d $vr6, $vr7, 16 vfmul.d $vr7, $vr8, $vr3 @@ -88,116 +76,128 @@ Crystal_div: # @Crystal_div vfdiv.d $vr6, $vr6, $vr4 vfadd.d $vr7, $vr7, $vr5 vfadd.d $vr6, $vr6, $vr5 - vst $vr7, $t3, -16 - vst $vr6, $t3, 0 + vst $vr7, $t5, -16 + vst $vr6, $t5, 0 vaddi.wu $vr1, $vr1, 4 - addi.d $t4, $t4, -4 - addi.d $t3, $t3, 32 - addi.d $t2, $t2, 32 - bnez $t4, .LBB0_4 + addi.d $t6, $t6, -4 + addi.d $t5, $t5, 32 + addi.d $t4, $t4, 32 + bnez $t6, .LBB0_4 # %bb.5: # %middle.block beq $t1, $a0, .LBB0_8 .LBB0_6: # %.lr.ph.preheader167 - addi.d $t2, $sp, 8 - alsl.d $t2, $t1, $t2, 3 - addi.d $t3, $sp, 104 - alsl.d $t3, $t1, $t3, 3 - pcalau12i $t4, %pc_hi20(.LCPI0_0) - fld.d $fa1, $t4, %pc_lo12(.LCPI0_0) - pcalau12i $t4, %pc_hi20(.LCPI0_1) - fld.d $fa2, $t4, %pc_lo12(.LCPI0_1) - sub.d $t4, $a0, $t1 - lu52i.d $t5, $zero, 1023 - vldi $vr3, -984 + addi.d $t4, $sp, 8 + alsl.d $t4, $t1, $t4, 3 + addi.d $t5, $sp, 104 + alsl.d $t5, $t1, $t5, 3 + sub.d $t6, $a0, $t1 + lu52i.d $t7, $zero, 1023 + ori $t3, $t3, 2458 + lu32i.d $t3, -419431 + lu52i.d $t3, $t3, 1020 + movgr2fr.d $fa1, $t3 + vldi $vr2, -984 + ori $t2, $t2, 3277 + lu32i.d $t2, -209716 + lu52i.d $t2, $t2, 1022 + movgr2fr.d $fa3, $t2 .p2align 4, , 16 .LBB0_7: # %.lr.ph # =>This Inner Loop Header: Depth=1 - st.d $t5, $t3, 0 - bstrpick.d $t6, $t1, 31, 0 - movgr2fr.d $fa4, $t6 + st.d $t7, $t5, 0 + bstrpick.d $t2, $t1, 31, 0 + movgr2fr.d $fa4, $t2 ffint.d.l $fa4, $fa4 fmul.d $fa4, $fa4, $fa1 - fdiv.d $fa4, $fa4, $fa3 - fadd.d $fa4, $fa4, $fa2 - fst.d $fa4, $t2, 0 - addi.d $t2, $t2, 8 - addi.d $t3, $t3, 8 - addi.d $t4, $t4, -1 + fdiv.d $fa4, $fa4, $fa2 + fadd.d $fa4, $fa4, $fa3 + fst.d $fa4, $t4, 0 + addi.d $t4, $t4, 8 + addi.d $t5, $t5, 8 + addi.d $t6, $t6, -1 addi.w $t1, $t1, 1 - bnez $t4, .LBB0_7 + bnez $t6, .LBB0_7 .LBB0_8: # %.lr.ph89.preheader - ori $t1, $zero, 4 - bgeu $a0, $t1, .LBB0_10 + ori $t2, $zero, 4 + lu12i.w $t1, -390306 + bgeu $a0, $t2, .LBB0_10 # %bb.9: - move $t1, $zero + move $t2, $zero b .LBB0_13 .LBB0_10: # %vector.ph128 - addi.d $t2, $sp, 216 - slli.d $t1, $t0, 2 - addi.d $t3, $sp, 120 - addi.d $t4, $a1, 16 - lu12i.w $t5, -390306 - ori $t5, $t5, 3469 - lu32i.d $t5, 50935 - lu52i.d $t5, $t5, 1003 - vreplgr2vr.d $vr1, $t5 - move $t5, $t1 + addi.d $t3, $sp, 216 + slli.d $t2, $t0, 2 + addi.d $t4, $sp, 120 + addi.d $t5, $a1, 16 + ori $t6, $t1, 3469 + lu32i.d $t6, 50935 + lu52i.d $t6, $t6, 1003 + vreplgr2vr.d $vr1, $t6 + move $t6, $t2 .p2align 4, , 16 .LBB0_11: # %vector.body131 # =>This Inner Loop Header: Depth=1 - vld $vr2, $t4, -16 - vld $vr3, $t4, 0 - vld $vr4, $t3, -16 - vld $vr5, $t3, 0 + vld $vr2, $t5, -16 + vld $vr3, $t5, 0 + vld $vr4, $t4, -16 + vld $vr5, $t4, 0 vfmadd.d $vr2, $vr2, $vr4, $vr1 vfmadd.d $vr3, $vr3, $vr5, $vr1 vfrecip.d $vr2, $vr2 vfrecip.d $vr3, $vr3 - vst $vr2, $t2, -16 - vst $vr3, $t2, 0 - addi.d $t5, $t5, -4 - addi.d $t2, $t2, 32 + vst $vr2, $t3, -16 + vst $vr3, $t3, 0 + addi.d $t6, $t6, -4 addi.d $t3, $t3, 32 addi.d $t4, $t4, 32 - bnez $t5, .LBB0_11 + addi.d $t5, $t5, 32 + bnez $t6, .LBB0_11 # %bb.12: # %middle.block137 - beq $t1, $a0, .LBB0_15 + beq $t2, $a0, .LBB0_15 .LBB0_13: # %.lr.ph89.preheader166 - addi.d $t2, $sp, 200 - alsl.d $t2, $t1, $t2, 3 - addi.d $t3, $sp, 104 - pcalau12i $t4, %pc_hi20(.LCPI0_2) - fld.d $fa1, $t4, %pc_lo12(.LCPI0_2) - alsl.d $t3, $t1, $t3, 3 - alsl.d $a1, $t1, $a1, 3 - sub.d $t1, $a0, $t1 + addi.d $t3, $sp, 200 + alsl.d $t3, $t2, $t3, 3 + addi.d $t4, $sp, 104 + alsl.d $t4, $t2, $t4, 3 + alsl.d $a1, $t2, $a1, 3 + sub.d $t2, $a0, $t2 + ori $t1, $t1, 3469 + lu32i.d $t1, 50935 + lu52i.d $t1, $t1, 1003 + movgr2fr.d $fa1, $t1 .p2align 4, , 16 .LBB0_14: # %.lr.ph89 # =>This Inner Loop Header: Depth=1 fld.d $fa2, $a1, 0 - fld.d $fa3, $t3, 0 + fld.d $fa3, $t4, 0 fmadd.d $fa2, $fa2, $fa3, $fa1 frecip.d $fa2, $fa2 - fst.d $fa2, $t2, 0 - addi.d $t2, $t2, 8 + fst.d $fa2, $t3, 0 addi.d $t3, $t3, 8 - addi.d $t1, $t1, -1 + addi.d $t4, $t4, 8 + addi.d $t2, $t2, -1 addi.d $a1, $a1, 8 - bnez $t1, .LBB0_14 + bnez $t2, .LBB0_14 .LBB0_15: # %.lr.ph93 - pcalau12i $a1, %pc_hi20(.LCPI0_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_3) move $t1, $zero ld.d $a1, $sp, 320 + lu12i.w $t2, 209715 + ori $t2, $t2, 819 + lu32i.d $t2, 209715 + lu52i.d $t2, $t2, 1023 + movgr2fr.d $fa1, $t2 fmul.d $fa1, $fa0, $fa1 slli.d $t2, $t0, 2 addi.d $t3, $a6, 16 addi.d $t4, $sp, 8 vldi $vr2, -962 - pcalau12i $t5, %pc_hi20(.LCPI0_4) - fld.d $fa3, $t5, %pc_lo12(.LCPI0_4) addi.d $t5, $sp, 104 ori $t6, $zero, 4 + lu12i.w $t7, 293601 + ori $t7, $t7, 1147 + lu32i.d $t7, 293601 + lu52i.d $t7, $t7, 1016 + movgr2fr.d $fa3, $t7 addi.d $t7, $sp, 200 move $t8, $a6 b .LBB0_17 @@ -274,8 +274,8 @@ Crystal_div: # @Crystal_div b .LBB0_29 .LBB0_26: # %vector.ph153 slli.d $t0, $t0, 2 - pcalau12i $t1, %pc_hi20(.LCPI0_5) - vld $vr1, $t1, %pc_lo12(.LCPI0_5) + pcalau12i $t1, %pc_hi20(.LCPI0_0) + vld $vr1, $t1, %pc_lo12(.LCPI0_0) addi.d $t1, $sp, 216 vrepli.d $vr2, 96 vrepli.d $vr3, 192 diff --git a/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_pow.s b/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_pow.s index fa812e83..8d98cdc2 100644 --- a/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_pow.s +++ b/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/Crystal_pow.s @@ -1,18 +1,6 @@ .file "Crystal_pow.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Crystal_pow -.LCPI0_0: - .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI0_1: - .dword 0x3feccccccccccccd # double 0.90000000000000002 -.LCPI0_2: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI0_3: - .dword 0x3f847ae147ae147b # double 0.01 -.LCPI0_4: - .dword 0x3ff3333333333333 # double 1.2 .text - .globl Crystal_pow + .globl Crystal_pow # -- Begin function Crystal_pow .p2align 5 .type Crystal_pow,@function Crystal_pow: # @Crystal_pow @@ -31,6 +19,8 @@ Crystal_pow: # @Crystal_pow move $fp, $a1 move $s0, $a0 ori $a0, $zero, 4 + lu12i.w $a2, -419431 + lu12i.w $a1, -209716 bgeu $s0, $a0, .LBB0_3 # %bb.2: move $a0, $zero @@ -38,50 +28,48 @@ Crystal_pow: # @Crystal_pow .LBB0_3: # %vector.ph bstrpick.d $a0, $s0, 30, 2 slli.d $a0, $a0, 2 - addi.d $a1, $sp, 128 - ori $a3, $zero, 0 - ori $a2, $zero, 0 - lu32i.d $a2, 1 - vreplgr2vr.d $vr0, $a2 - addi.d $a2, $sp, 32 - lu52i.d $a4, $zero, 1023 - vreplgr2vr.d $vr1, $a4 - lu12i.w $a4, -419431 - ori $a4, $a4, 2458 - lu32i.d $a4, -419431 - lu52i.d $a4, $a4, 1020 - vreplgr2vr.d $vr2, $a4 - lu12i.w $a4, -209716 - ori $a4, $a4, 3277 - lu32i.d $a4, -209716 - lu52i.d $a4, $a4, 1022 - vreplgr2vr.d $vr3, $a4 - lu32i.d $a3, -524288 - lu52i.d $a3, $a3, 1026 - vreplgr2vr.d $vr4, $a3 - move $a3, $a0 + addi.d $a3, $sp, 128 + ori $a5, $zero, 0 + ori $a4, $zero, 0 + lu32i.d $a4, 1 + vreplgr2vr.d $vr0, $a4 + addi.d $a4, $sp, 32 + lu52i.d $a6, $zero, 1023 + vreplgr2vr.d $vr1, $a6 + ori $a6, $a2, 2458 + lu32i.d $a6, -419431 + lu52i.d $a6, $a6, 1020 + vreplgr2vr.d $vr2, $a6 + ori $a6, $a1, 3277 + lu32i.d $a6, -209716 + lu52i.d $a6, $a6, 1022 + vreplgr2vr.d $vr3, $a6 + lu32i.d $a5, -524288 + lu52i.d $a5, $a5, 1026 + vreplgr2vr.d $vr4, $a5 + move $a5, $a0 .p2align 4, , 16 .LBB0_4: # %vector.body # =>This Inner Loop Header: Depth=1 vaddi.wu $vr5, $vr0, 2 - vst $vr1, $a2, -16 - vst $vr1, $a2, 0 - vpickve2gr.w $a4, $vr0, 1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa6, $a4 + vst $vr1, $a4, -16 + vst $vr1, $a4, 0 + vpickve2gr.w $a6, $vr0, 1 + bstrpick.d $a6, $a6, 31, 0 + movgr2fr.d $fa6, $a6 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a4, $vr0, 0 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa7, $a4 + vpickve2gr.w $a6, $vr0, 0 + bstrpick.d $a6, $a6, 31, 0 + movgr2fr.d $fa7, $a6 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr6, 16 - vpickve2gr.w $a4, $vr5, 1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa6, $a4 + vpickve2gr.w $a6, $vr5, 1 + bstrpick.d $a6, $a6, 31, 0 + movgr2fr.d $fa6, $a6 ffint.d.l $fa6, $fa6 - vpickve2gr.w $a4, $vr5, 0 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa5, $a4 + vpickve2gr.w $a6, $vr5, 0 + bstrpick.d $a6, $a6, 31, 0 + movgr2fr.d $fa5, $a6 ffint.d.l $fa5, $fa5 vextrins.d $vr5, $vr6, 16 vfmul.d $vr6, $vr7, $vr2 @@ -90,52 +78,65 @@ Crystal_pow: # @Crystal_pow vfmul.d $vr5, $vr5, $vr3 vfdiv.d $vr6, $vr6, $vr4 vfdiv.d $vr5, $vr5, $vr4 - vst $vr6, $a1, -16 - vst $vr5, $a1, 0 + vst $vr6, $a3, -16 + vst $vr5, $a3, 0 vaddi.wu $vr0, $vr0, 4 - addi.d $a3, $a3, -4 - addi.d $a1, $a1, 32 - addi.d $a2, $a2, 32 - bnez $a3, .LBB0_4 + addi.d $a5, $a5, -4 + addi.d $a3, $a3, 32 + addi.d $a4, $a4, 32 + bnez $a5, .LBB0_4 # %bb.5: # %middle.block beq $a0, $s0, .LBB0_8 .LBB0_6: # %.lr.ph.preheader26 - addi.d $a1, $sp, 112 - alsl.d $a1, $a0, $a1, 3 - addi.d $a2, $sp, 16 - alsl.d $a2, $a0, $a2, 3 - pcalau12i $a3, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI0_0) - pcalau12i $a3, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI0_1) - sub.d $a3, $s0, $a0 - lu52i.d $a4, $zero, 1023 + addi.d $a3, $sp, 112 + alsl.d $a3, $a0, $a3, 3 + addi.d $a4, $sp, 16 + alsl.d $a4, $a0, $a4, 3 + sub.d $a5, $s0, $a0 + lu52i.d $a6, $zero, 1023 + ori $a2, $a2, 2458 + lu32i.d $a2, -419431 + lu52i.d $a2, $a2, 1020 + movgr2fr.d $fa0, $a2 + ori $a1, $a1, 3277 + lu32i.d $a1, -209716 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa1, $a1 vldi $vr2, -984 .p2align 4, , 16 .LBB0_7: # %.lr.ph # =>This Inner Loop Header: Depth=1 - st.d $a4, $a2, 0 - bstrpick.d $a5, $a0, 31, 0 - movgr2fr.d $fa3, $a5 + st.d $a6, $a4, 0 + bstrpick.d $a1, $a0, 31, 0 + movgr2fr.d $fa3, $a1 ffint.d.l $fa3, $fa3 fmul.d $fa3, $fa3, $fa0 fmul.d $fa3, $fa3, $fa1 fdiv.d $fa3, $fa3, $fa2 - fst.d $fa3, $a1, 0 - addi.d $a1, $a1, 8 - addi.d $a2, $a2, 8 - addi.d $a3, $a3, -1 + fst.d $fa3, $a3, 0 + addi.d $a3, $a3, 8 + addi.d $a4, $a4, 8 + addi.d $a5, $a5, -1 addi.w $a0, $a0, 1 - bnez $a3, .LBB0_7 + bnez $a5, .LBB0_7 .LBB0_8: # %.lr.ph21.preheader - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_2) - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_3) - pcalau12i $a0, %pc_hi20(.LCPI0_4) - fld.d $fs2, $a0, %pc_lo12(.LCPI0_4) addi.d $s1, $sp, 112 addi.d $s2, $sp, 16 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fs0, $a0 + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 209715 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs2, $a0 .p2align 4, , 16 .LBB0_9: # %.lr.ph21 # =>This Inner Loop Header: Depth=1 diff --git a/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/SPEdriver.s b/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/SPEdriver.s index 6a00fd00..177d8bdf 100644 --- a/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/SPEdriver.s +++ b/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/SPEdriver.s @@ -1,10 +1,6 @@ .file "SPEdriver.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function SPEdriver -.LCPI0_0: - .dword 0x3f847ae147ae147b # double 0.01 .text - .globl SPEdriver + .globl SPEdriver # -- Begin function SPEdriver .p2align 5 .type SPEdriver,@function SPEdriver: # @SPEdriver @@ -36,10 +32,13 @@ SPEdriver: # @SPEdriver jirl $ra, $ra, 0 pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) lu12i.w $s7, 488 ori $s8, $s7, 1152 + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB0_1: # =>This Inner Loop Header: Depth=1 ori $a0, $zero, 12 diff --git a/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/init.s b/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/init.s index afd60acb..d98aa038 100644 --- a/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/init.s +++ b/results/MultiSource/Benchmarks/ASC_Sequoia/CrystalMk/CMakeFiles/CrystalMk.dir/init.s @@ -1,38 +1,6 @@ .file "init.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function init -.LCPI0_0: - .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI0_1: - .dword 0x3ffe666666666666 # double 1.8999999999999999 -.LCPI0_2: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI0_3: - .dword 0x3f847ae147ae147b # double 0.01 -.LCPI0_4: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI0_5: - .dword 0x3ef4f8b588e368f1 # double 2.0000000000000002E-5 -.LCPI0_6: - .dword 0x3eff75104d551d6a # double 3.0000000000000004E-5 -.LCPI0_7: - .dword 0x3f04f8b588e368f1 # double 4.0000000000000003E-5 -.LCPI0_8: - .dword 0x3f0a36e2eb1c432d # double 5.0000000000000002E-5 -.LCPI0_9: - .dword 0x3f0f75104d551d6a # double 6.0000000000000008E-5 -.LCPI0_10: - .dword 0x3f12599ed7c6fbd3 # double 7.0000000000000007E-5 -.LCPI0_11: - .dword 0x3f14f8b588e368f1 # double 8.0000000000000007E-5 -.LCPI0_12: - .dword 0x3f1797cc39ffd60f # double 9.0000000000000006E-5 -.LCPI0_13: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI0_14: - .dword 0x3f1cd5f99c38b04b # double 1.1E-4 .text - .globl init + .globl init # -- Begin function init .p2align 5 .type init,@function init: # @init @@ -42,45 +10,71 @@ init: # @init addi.d $a6, $a6, 48 addi.d $a7, $a7, 48 ori $t1, $zero, 11 - pcalau12i $t2, %pc_hi20(.LCPI0_0) - fld.d $fa0, $t2, %pc_lo12(.LCPI0_0) + lu12i.w $t2, -419431 + ori $t2, $t2, 2458 + lu32i.d $t2, -419431 + lu52i.d $t2, $t2, 1020 + movgr2fr.d $fa0, $t2 vldi $vr1, -984 - pcalau12i $t2, %pc_hi20(.LCPI0_1) - fld.d $fa2, $t2, %pc_lo12(.LCPI0_1) - pcalau12i $t2, %pc_hi20(.LCPI0_2) - fld.d $fa3, $t2, %pc_lo12(.LCPI0_2) + lu12i.w $t2, 419430 + ori $t2, $t2, 1638 + lu32i.d $t2, -104858 + lu52i.d $t2, $t2, 1023 + movgr2fr.d $fa2, $t2 + lu12i.w $t2, -184550 + ori $t2, $t2, 2556 + lu32i.d $t2, 25165 + lu52i.d $t2, $t2, 1013 + movgr2fr.d $fa3, $t2 ori $t2, $zero, 0 lu32i.d $t2, -131072 lu52i.d $t2, $t2, 1027 vldi $vr4, -1012 + lu12i.w $t3, -487882 + ori $t4, $t3, 2289 + lu32i.d $t4, 325813 + lu52i.d $t3, $t4, 1006 + movgr2fr.d $fa5, $t3 + lu52i.d $t3, $t4, 1007 + movgr2fr.d $fa6, $t3 + lu12i.w $t3, 316753 + ori $t5, $t3, 3434 + lu32i.d $t5, -35568 + lu52i.d $t3, $t5, 1007 + movgr2fr.d $fa7, $t3 + lu52i.d $t3, $t4, 1008 + movgr2fr.d $ft0, $t3 + lu12i.w $t3, -85564 + ori $t6, $t3, 813 + lu32i.d $t6, -379166 + lu52i.d $t3, $t6, 1008 + movgr2fr.d $ft1, $t3 lu12i.w $t3, 293601 ori $t3, $t3, 1147 - pcalau12i $t4, %pc_hi20(.LCPI0_3) - fld.d $fa5, $t4, %pc_lo12(.LCPI0_3) - pcalau12i $t4, %pc_hi20(.LCPI0_4) - fld.d $fa6, $t4, %pc_lo12(.LCPI0_4) - pcalau12i $t4, %pc_hi20(.LCPI0_5) - fld.d $fa7, $t4, %pc_lo12(.LCPI0_5) - pcalau12i $t4, %pc_hi20(.LCPI0_6) - fld.d $ft0, $t4, %pc_lo12(.LCPI0_6) - pcalau12i $t4, %pc_hi20(.LCPI0_7) - fld.d $ft1, $t4, %pc_lo12(.LCPI0_7) - pcalau12i $t4, %pc_hi20(.LCPI0_8) - fld.d $ft2, $t4, %pc_lo12(.LCPI0_8) - pcalau12i $t4, %pc_hi20(.LCPI0_9) - fld.d $ft3, $t4, %pc_lo12(.LCPI0_9) - pcalau12i $t4, %pc_hi20(.LCPI0_10) - fld.d $ft4, $t4, %pc_lo12(.LCPI0_10) - pcalau12i $t4, %pc_hi20(.LCPI0_11) - fld.d $ft5, $t4, %pc_lo12(.LCPI0_11) - pcalau12i $t4, %pc_hi20(.LCPI0_12) - fld.d $ft6, $t4, %pc_lo12(.LCPI0_12) - pcalau12i $t4, %pc_hi20(.LCPI0_13) - fld.d $ft7, $t4, %pc_lo12(.LCPI0_13) - pcalau12i $t4, %pc_hi20(.LCPI0_14) - fld.d $ft8, $t4, %pc_lo12(.LCPI0_14) lu32i.d $t3, 293601 lu52i.d $t3, $t3, 1016 + movgr2fr.d $ft2, $t3 + lu52i.d $t5, $t5, 1008 + movgr2fr.d $ft3, $t5 + lu12i.w $t5, -164753 + ori $t5, $t5, 3027 + lu32i.d $t5, 154014 + lu52i.d $t5, $t5, 1009 + movgr2fr.d $ft4, $t5 + lu52i.d $t4, $t4, 1009 + movgr2fr.d $ft5, $t4 + lu12i.w $t4, 237565 + ori $t4, $t4, 1551 + lu32i.d $t4, 497612 + lu52i.d $t4, $t4, 1009 + movgr2fr.d $ft6, $t4 + lu52i.d $t4, $t6, 1009 + movgr2fr.d $ft7, $t4 + lu12i.w $t4, -408693 + ori $t4, $t4, 75 + lu32i.d $t4, -207367 + lu52i.d $t4, $t4, 1009 + movgr2fr.d $ft8, $t4 ori $t4, $zero, 96 .p2align 4, , 16 .LBB0_1: # =>This Inner Loop Header: Depth=1 @@ -106,7 +100,7 @@ init: # @init st.d $t3, $a5, -48 fst.d $ft9, $a6, -48 st.d $zero, $a7, -48 - fmadd.d $ft11, $ft9, $fa6, $fa5 + fmadd.d $ft11, $ft9, $fa5, $ft2 fst.d $ft11, $a5, -40 addi.d $t6, $t1, -10 bstrpick.d $t6, $t6, 31, 0 @@ -114,11 +108,11 @@ init: # @init ffint.d.l $ft11, $ft11 fst.d $ft11, $a6, -40 st.d $zero, $a7, -40 - fmadd.d $ft11, $ft9, $fa7, $fa5 + fmadd.d $ft11, $ft9, $fa6, $ft2 fst.d $ft11, $a5, -32 fst.d $ft10, $a6, -32 st.d $zero, $a7, -32 - fmadd.d $ft10, $ft9, $ft0, $fa5 + fmadd.d $ft10, $ft9, $fa7, $ft2 fst.d $ft10, $a5, -24 addi.d $t6, $t1, -8 bstrpick.d $t6, $t6, 31, 0 @@ -126,7 +120,7 @@ init: # @init ffint.d.l $ft10, $ft10 fst.d $ft10, $a6, -24 st.d $zero, $a7, -24 - fmadd.d $ft10, $ft9, $ft1, $fa5 + fmadd.d $ft10, $ft9, $ft0, $ft2 fst.d $ft10, $a5, -16 addi.d $t6, $t1, -7 bstrpick.d $t6, $t6, 31, 0 @@ -134,7 +128,7 @@ init: # @init ffint.d.l $ft10, $ft10 fst.d $ft10, $a6, -16 st.d $zero, $a7, -16 - fmadd.d $ft10, $ft9, $ft2, $fa5 + fmadd.d $ft10, $ft9, $ft1, $ft2 fst.d $ft10, $a5, -8 addi.d $t6, $t1, -6 bstrpick.d $t6, $t6, 31, 0 @@ -142,7 +136,7 @@ init: # @init ffint.d.l $ft10, $ft10 fst.d $ft10, $a6, -8 st.d $zero, $a7, -8 - fmadd.d $ft10, $ft9, $ft3, $fa5 + fmadd.d $ft10, $ft9, $ft3, $ft2 fst.d $ft10, $a5, 0 addi.d $t6, $t1, -5 bstrpick.d $t6, $t6, 31, 0 @@ -150,7 +144,7 @@ init: # @init ffint.d.l $ft10, $ft10 fst.d $ft10, $a6, 0 st.d $zero, $a7, 0 - fmadd.d $ft10, $ft9, $ft4, $fa5 + fmadd.d $ft10, $ft9, $ft4, $ft2 fst.d $ft10, $a5, 8 addi.d $t6, $t1, -4 bstrpick.d $t6, $t6, 31, 0 @@ -158,7 +152,7 @@ init: # @init ffint.d.l $ft10, $ft10 fst.d $ft10, $a6, 8 st.d $zero, $a7, 8 - fmadd.d $ft10, $ft9, $ft5, $fa5 + fmadd.d $ft10, $ft9, $ft5, $ft2 fst.d $ft10, $a5, 16 addi.d $t6, $t1, -3 bstrpick.d $t6, $t6, 31, 0 @@ -166,7 +160,7 @@ init: # @init ffint.d.l $ft10, $ft10 fst.d $ft10, $a6, 16 st.d $zero, $a7, 16 - fmadd.d $ft10, $ft9, $ft6, $fa5 + fmadd.d $ft10, $ft9, $ft6, $ft2 fst.d $ft10, $a5, 24 addi.d $t6, $t1, -2 bstrpick.d $t6, $t6, 31, 0 @@ -174,7 +168,7 @@ init: # @init ffint.d.l $ft10, $ft10 fst.d $ft10, $a6, 24 st.d $zero, $a7, 24 - fmadd.d $ft10, $ft9, $ft7, $fa5 + fmadd.d $ft10, $ft9, $ft7, $ft2 fst.d $ft10, $a5, 32 addi.d $t6, $t1, -1 bstrpick.d $t6, $t6, 31, 0 @@ -182,7 +176,7 @@ init: # @init ffint.d.l $ft10, $ft10 fst.d $ft10, $a6, 32 st.d $zero, $a7, 32 - fmadd.d $ft9, $ft9, $ft8, $fa5 + fmadd.d $ft9, $ft9, $ft8, $ft2 fst.d $ft9, $a5, 40 movgr2fr.d $ft9, $t5 ffint.d.l $ft9, $ft9 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/BenchmarkDemo.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/BenchmarkDemo.s index be1c08a4..16a7db29 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/BenchmarkDemo.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/BenchmarkDemo.s @@ -1,10 +1,6 @@ .file "BenchmarkDemo.cpp" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN13BenchmarkDemo20clientMoveAndDisplayEv -.LCPI0_0: - .word 0x3c888889 # float 0.0166666675 .text - .globl _ZN13BenchmarkDemo20clientMoveAndDisplayEv + .globl _ZN13BenchmarkDemo20clientMoveAndDisplayEv # -- Begin function _ZN13BenchmarkDemo20clientMoveAndDisplayEv .p2align 5 .type _ZN13BenchmarkDemo20clientMoveAndDisplayEv,@function _ZN13BenchmarkDemo20clientMoveAndDisplayEv: # @_ZN13BenchmarkDemo20clientMoveAndDisplayEv @@ -22,8 +18,9 @@ _ZN13BenchmarkDemo20clientMoveAndDisplayEv: # @_ZN13BenchmarkDemo20clientMoveAnd # %bb.1: ld.d $a1, $a0, 0 ld.d $a2, $a1, 64 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI0_0) + lu12i.w $a1, 247944 + ori $a1, $a1, 2185 + movgr2fr.w $fa0, $a1 ori $a1, $zero, 1 fmov.s $fa1, $fa0 jirl $ra, $a2, 0 @@ -114,12 +111,6 @@ _ZN13BenchmarkDemo15displayCallbackEv: # @_ZN13BenchmarkDemo15displayCallbackEv .word 0x3c23d70a # float 0.00999999977 .word 0x3c23d70a # float 0.00999999977 .word 0x3c23d70a # float 0.00999999977 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI3_7: - .word 0x451c4000 # float 2500 -.LCPI3_8: - .word 0x42480000 # float 50 .text .globl _ZN13BenchmarkDemo11initPhysicsEv .p2align 5 @@ -486,10 +477,10 @@ _ZN13BenchmarkDemo11initPhysicsEv: # @_ZN13BenchmarkDemo11initPhysicsEv move $a0, $fp pcaddu18i $ra, %call36(_ZN13BenchmarkDemo11createTest6Ev) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI3_7) - fld.s $fa0, $a0, %pc_lo12(.LCPI3_7) - pcalau12i $a0, %pc_hi20(.LCPI3_8) - fld.s $fa2, $a0, %pc_lo12(.LCPI3_8) + lu12i.w $a0, 283076 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, 271488 + movgr2fr.w $fa2, $a0 movgr2fr.w $fa1, $zero addi.d $a0, $sp, 80 pcaddu18i $ra, %call36(_ZN13btRaycastBar2C2Efff) @@ -733,12 +724,8 @@ __clang_call_terminate: # @__clang_call_terminate .Lfunc_end4: .size __clang_call_terminate, .Lfunc_end4-__clang_call_terminate # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN13BenchmarkDemo11createTest1Ev -.LCPI5_0: - .word 0xbeb33333 # float -0.349999994 .text - .globl _ZN13BenchmarkDemo11createTest1Ev + .globl _ZN13BenchmarkDemo11createTest1Ev # -- Begin function _ZN13BenchmarkDemo11createTest1Ev .p2align 5 .type _ZN13BenchmarkDemo11createTest1Ev,@function _ZN13BenchmarkDemo11createTest1Ev: # @_ZN13BenchmarkDemo11createTest1Ev @@ -850,26 +837,31 @@ _ZN13BenchmarkDemo11createTest1Ev: # @_ZN13BenchmarkDemo11createTest1Ev vldi $vr2, -1272 lu12i.w $s3, 260096 ori $s4, $zero, 8 + lu12i.w $a0, -267469 + ori $a0, $a0, 819 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 + fst.s $fa0, $sp, 12 # 4-byte Folded Spill ori $s5, $zero, 47 .p2align 4, , 16 .LBB5_5: # %.preheader # =>This Loop Header: Depth=1 # Child Loop BB5_6 Depth 2 move $s6, $zero - fadd.s $fs0, $fa1, $fa2 + fadd.s $fs1, $fa1, $fa2 vldi $vr0, -1256 - fadd.s $fs1, $fa1, $fa0 - vldi $vr0, -1246 fadd.s $fs2, $fa1, $fa0 - vldi $vr0, -1240 + vldi $vr0, -1246 fadd.s $fs3, $fa1, $fa0 - vldi $vr0, -1234 + vldi $vr0, -1240 fadd.s $fs4, $fa1, $fa0 - vldi $vr0, -1230 + vldi $vr0, -1234 fadd.s $fs5, $fa1, $fa0 + vldi $vr0, -1230 + fadd.s $fs6, $fa1, $fa0 vst $vr1, $sp, 16 # 16-byte Folded Spill vldi $vr0, -1227 - fadd.s $fs6, $fa1, $fa0 + fadd.s $fs7, $fa1, $fa0 .p2align 4, , 16 .LBB5_6: # Parent Loop BB5_5 Depth=1 # => This Inner Loop Header: Depth=2 @@ -903,7 +895,7 @@ _ZN13BenchmarkDemo11createTest1Ev: # @_ZN13BenchmarkDemo11createTest1Ev ffint.s.l $fa0, $fa0 vld $vr1, $sp, 16 # 16-byte Folded Reload vldi $vr2, -1272 - fmadd.s $fs7, $fa0, $fa2, $fa1 + fmadd.s $fs0, $fa0, $fa2, $fa1 st.w $s3, $s1, 8 vld $vr0, $sp, 48 # 16-byte Folded Reload vst $vr0, $s1, 12 @@ -913,7 +905,7 @@ _ZN13BenchmarkDemo11createTest1Ev: # @_ZN13BenchmarkDemo11createTest1Ev fst.s $fa1, $s1, 56 vld $vr0, $sp, 32 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs7, $s1, 64 + fst.s $fs0, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $s0, 8 ld.d $a1, $a0, 0 @@ -949,10 +941,10 @@ _ZN13BenchmarkDemo11createTest1Ev: # @_ZN13BenchmarkDemo11createTest1Ev st.w $s3, $s1, 28 vst $vr0, $s1, 32 st.d $s3, $s1, 48 - fst.s $fs0, $s1, 56 + fst.s $fs1, $s1, 56 vld $vr0, $sp, 32 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs7, $s1, 64 + fst.s $fs0, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $s0, 8 ld.d $a1, $a0, 0 @@ -988,10 +980,10 @@ _ZN13BenchmarkDemo11createTest1Ev: # @_ZN13BenchmarkDemo11createTest1Ev st.w $s3, $s1, 28 vst $vr0, $s1, 32 st.d $s3, $s1, 48 - fst.s $fs1, $s1, 56 + fst.s $fs2, $s1, 56 vld $vr0, $sp, 32 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs7, $s1, 64 + fst.s $fs0, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $s0, 8 ld.d $a1, $a0, 0 @@ -1027,10 +1019,10 @@ _ZN13BenchmarkDemo11createTest1Ev: # @_ZN13BenchmarkDemo11createTest1Ev st.w $s3, $s1, 28 vst $vr0, $s1, 32 st.d $s3, $s1, 48 - fst.s $fs2, $s1, 56 + fst.s $fs3, $s1, 56 vld $vr0, $sp, 32 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs7, $s1, 64 + fst.s $fs0, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $s0, 8 ld.d $a1, $a0, 0 @@ -1066,10 +1058,10 @@ _ZN13BenchmarkDemo11createTest1Ev: # @_ZN13BenchmarkDemo11createTest1Ev st.w $s3, $s1, 28 vst $vr0, $s1, 32 st.d $s3, $s1, 48 - fst.s $fs3, $s1, 56 + fst.s $fs4, $s1, 56 vld $vr0, $sp, 32 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs7, $s1, 64 + fst.s $fs0, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $s0, 8 ld.d $a1, $a0, 0 @@ -1105,10 +1097,10 @@ _ZN13BenchmarkDemo11createTest1Ev: # @_ZN13BenchmarkDemo11createTest1Ev st.w $s3, $s1, 28 vst $vr0, $s1, 32 st.d $s3, $s1, 48 - fst.s $fs4, $s1, 56 + fst.s $fs5, $s1, 56 vld $vr0, $sp, 32 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs7, $s1, 64 + fst.s $fs0, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $s0, 8 ld.d $a1, $a0, 0 @@ -1144,10 +1136,10 @@ _ZN13BenchmarkDemo11createTest1Ev: # @_ZN13BenchmarkDemo11createTest1Ev st.w $s3, $s1, 28 vst $vr0, $s1, 32 st.d $s3, $s1, 48 - fst.s $fs5, $s1, 56 + fst.s $fs6, $s1, 56 vld $vr0, $sp, 32 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs7, $s1, 64 + fst.s $fs0, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $s0, 8 ld.d $a1, $a0, 0 @@ -1183,10 +1175,10 @@ _ZN13BenchmarkDemo11createTest1Ev: # @_ZN13BenchmarkDemo11createTest1Ev st.w $s3, $s1, 28 vst $vr0, $s1, 32 st.d $s3, $s1, 48 - fst.s $fs6, $s1, 56 + fst.s $fs7, $s1, 56 vld $vr0, $sp, 32 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs7, $s1, 64 + fst.s $fs0, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $s0, 8 ld.d $a1, $a0, 0 @@ -1196,9 +1188,8 @@ _ZN13BenchmarkDemo11createTest1Ev: # @_ZN13BenchmarkDemo11createTest1Ev addi.w $s6, $s6, 1 bne $s6, $s4, .LBB5_6 # %bb.15: # in Loop: Header=BB5_5 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI5_0) vld $vr1, $sp, 16 # 16-byte Folded Reload + fld.s $fa0, $sp, 12 # 4-byte Folded Reload fadd.s $fa1, $fa1, $fa0 addi.w $s2, $s2, 1 vld $vr0, $sp, 32 # 16-byte Folded Reload @@ -1772,12 +1763,6 @@ GCC_except_table7: .word 0x3f800000 # float 1 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI8_1: - .word 0xbd4ccccd # float -0.0500000007 -.LCPI8_2: - .word 0x3f8147ae # float 1.00999999 .text .globl _ZN13BenchmarkDemo11createTest4Ev .p2align 5 @@ -1788,25 +1773,25 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception3 # %bb.0: - addi.d $sp, $sp, -272 - .cfi_def_cfa_offset 272 - st.d $ra, $sp, 264 # 8-byte Folded Spill - st.d $fp, $sp, 256 # 8-byte Folded Spill - st.d $s0, $sp, 248 # 8-byte Folded Spill - st.d $s1, $sp, 240 # 8-byte Folded Spill - st.d $s2, $sp, 232 # 8-byte Folded Spill - st.d $s3, $sp, 224 # 8-byte Folded Spill - st.d $s4, $sp, 216 # 8-byte Folded Spill - st.d $s5, $sp, 208 # 8-byte Folded Spill - st.d $s6, $sp, 200 # 8-byte Folded Spill - fst.d $fs0, $sp, 192 # 8-byte Folded Spill - fst.d $fs1, $sp, 184 # 8-byte Folded Spill - fst.d $fs2, $sp, 176 # 8-byte Folded Spill - fst.d $fs3, $sp, 168 # 8-byte Folded Spill - fst.d $fs4, $sp, 160 # 8-byte Folded Spill - fst.d $fs5, $sp, 152 # 8-byte Folded Spill - fst.d $fs6, $sp, 144 # 8-byte Folded Spill - fst.d $fs7, $sp, 136 # 8-byte Folded Spill + addi.d $sp, $sp, -288 + .cfi_def_cfa_offset 288 + st.d $ra, $sp, 280 # 8-byte Folded Spill + st.d $fp, $sp, 272 # 8-byte Folded Spill + st.d $s0, $sp, 264 # 8-byte Folded Spill + st.d $s1, $sp, 256 # 8-byte Folded Spill + st.d $s2, $sp, 248 # 8-byte Folded Spill + st.d $s3, $sp, 240 # 8-byte Folded Spill + st.d $s4, $sp, 232 # 8-byte Folded Spill + st.d $s5, $sp, 224 # 8-byte Folded Spill + st.d $s6, $sp, 216 # 8-byte Folded Spill + fst.d $fs0, $sp, 208 # 8-byte Folded Spill + fst.d $fs1, $sp, 200 # 8-byte Folded Spill + fst.d $fs2, $sp, 192 # 8-byte Folded Spill + fst.d $fs3, $sp, 184 # 8-byte Folded Spill + fst.d $fs4, $sp, 176 # 8-byte Folded Spill + fst.d $fs5, $sp, 168 # 8-byte Folded Spill + fst.d $fs6, $sp, 160 # 8-byte Folded Spill + fst.d $fs7, $sp, 152 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -1840,10 +1825,10 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev # %bb.1: pcalau12i $a0, %pc_hi20(.LCPI8_0) vld $vr0, $a0, %pc_lo12(.LCPI8_0) - vst $vr0, $sp, 112 + vst $vr0, $sp, 128 ld.d $a0, $s0, 0 ld.d $a2, $a0, 48 - addi.d $a1, $sp, 112 + addi.d $a1, $sp, 128 move $a0, $s0 jirl $ra, $a2, 0 pcalau12i $a0, %pc_hi20(_ZL7TaruVtx) @@ -1855,9 +1840,9 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev add.d $a0, $s1, $s2 ldx.d $a1, $s1, $s2 ld.wu $a0, $a0, 8 - st.d $a1, $sp, 112 - st.d $a0, $sp, 120 - addi.d $a1, $sp, 112 + st.d $a1, $sp, 128 + st.d $a0, $sp, 136 + addi.d $a1, $sp, 128 move $a0, $s0 pcaddu18i $ra, %call36(_ZN17btConvexHullShape8addPointERK9btVector3) jirl $ra, $ra, 0 @@ -1865,12 +1850,12 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev bne $s2, $s3, .LBB8_2 # %bb.3: vrepli.b $vr0, 0 - vst $vr0, $sp, 80 # 16-byte Folded Spill - vst $vr0, $sp, 96 + vst $vr0, $sp, 96 # 16-byte Folded Spill + vst $vr0, $sp, 112 ld.d $a0, $s0, 0 ld.d $a2, $a0, 64 vldi $vr0, -1168 - addi.d $a1, $sp, 96 + addi.d $a1, $sp, 112 move $a0, $s0 jirl $ra, $a2, 0 move $s2, $zero @@ -1878,45 +1863,54 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev vldi $vr2, -1102 vldi $vr3, -1160 movgr2fr.w $fa0, $zero - fst.s $fa0, $sp, 12 # 4-byte Folded Spill + fst.s $fa0, $sp, 28 # 4-byte Folded Spill vldi $vr4, -1252 lu12i.w $s3, 260096 ori $s4, $zero, 8 + lu12i.w $a0, -273204 + ori $a0, $a0, 3277 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 + fst.s $fa0, $sp, 24 # 4-byte Folded Spill + lu12i.w $a0, 260116 + ori $a0, $a0, 1966 + movgr2fr.w $fa0, $a0 + fst.s $fa0, $sp, 20 # 4-byte Folded Spill ori $s5, $zero, 15 vldi $vr0, -1272 .p2align 4, , 16 .LBB8_4: # %.preheader # =>This Loop Header: Depth=1 # Child Loop BB8_5 Depth 2 - vst $vr0, $sp, 64 # 16-byte Folded Spill + vst $vr0, $sp, 80 # 16-byte Folded Spill move $s6, $zero - vst $vr3, $sp, 16 # 16-byte Folded Spill - fadd.s $fs1, $fa3, $fa1 - fld.s $fa0, $sp, 12 # 4-byte Folded Reload - fmadd.s $fa0, $fs1, $fa0, $fa2 - fst.s $fa0, $sp, 44 # 4-byte Folded Spill - fadd.s $fa0, $fs1, $fa2 - fst.s $fa0, $sp, 40 # 4-byte Folded Spill + vst $vr3, $sp, 32 # 16-byte Folded Spill + fadd.s $fs3, $fa3, $fa1 + fld.s $fa0, $sp, 28 # 4-byte Folded Reload + fmadd.s $fa0, $fs3, $fa0, $fa2 + fst.s $fa0, $sp, 60 # 4-byte Folded Spill + fadd.s $fa0, $fs3, $fa2 + fst.s $fa0, $sp, 56 # 4-byte Folded Spill vldi $vr0, -1280 - fmadd.s $fs4, $fs1, $fa0, $fa2 - fmadd.s $fs5, $fs1, $fa1, $fa2 + fmadd.s $fs6, $fs3, $fa0, $fa2 + fmadd.s $fs7, $fs3, $fa1, $fa2 vldi $vr0, -1264 - fmadd.s $fs6, $fs1, $fa0, $fa2 + fmadd.s $fs0, $fs3, $fa0, $fa2 vldi $vr0, -1260 - fmadd.s $fs7, $fs1, $fa0, $fa2 + fmadd.s $fs1, $fs3, $fa0, $fa2 vldi $vr0, -1256 - fmadd.s $fs0, $fs1, $fa0, $fa2 - vst $vr2, $sp, 48 # 16-byte Folded Spill - fmadd.s $fs2, $fs1, $fa4, $fa2 + fmadd.s $fs2, $fs3, $fa0, $fa2 + vst $vr2, $sp, 64 # 16-byte Folded Spill + fmadd.s $fs4, $fs3, $fa4, $fa2 .p2align 4, , 16 .LBB8_5: # Parent Loop BB8_4 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 128 ld.d $a0, $s0, 0 ld.d $a2, $a0, 64 vldi $vr0, -1168 - addi.d $a1, $sp, 112 + addi.d $a1, $sp, 128 move $a0, $s0 jirl $ra, $a2, 0 ori $a0, $zero, 568 @@ -1926,7 +1920,7 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev move $s1, $a0 .Ltmp79: # EH_LABEL vldi $vr0, -1168 - addi.d $a3, $sp, 112 + addi.d $a3, $sp, 128 move $a1, $zero move $a2, $s0 pcaddu18i $ra, %call36(_ZN11btRigidBodyC1EfP13btMotionStateP16btCollisionShapeRK9btVector3) @@ -1937,31 +1931,31 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev bstrpick.d $a0, $s6, 31, 0 movgr2fr.d $fa0, $a0 ffint.s.l $fa0, $fa0 - vld $vr1, $sp, 48 # 16-byte Folded Reload - fmadd.s $fs3, $fa0, $fs1, $fa1 + vld $vr1, $sp, 64 # 16-byte Folded Reload + fmadd.s $fs5, $fa0, $fs3, $fa1 st.w $s3, $s1, 8 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vld $vr0, $sp, 96 # 16-byte Folded Reload vst $vr0, $s1, 12 st.w $s3, $s1, 28 vst $vr0, $s1, 32 st.d $s3, $s1, 48 - fld.s $fa0, $sp, 44 # 4-byte Folded Reload + fld.s $fa0, $sp, 60 # 4-byte Folded Reload fst.s $fa0, $s1, 56 - vld $vr0, $sp, 64 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs3, $s1, 64 + fst.s $fs5, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $fp, 8 ld.d $a1, $a0, 0 ld.d $a2, $a1, 136 move $a1, $s1 jirl $ra, $a2, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 128 ld.d $a0, $s0, 0 ld.d $a2, $a0, 64 vldi $vr0, -1168 - addi.d $a1, $sp, 112 + addi.d $a1, $sp, 128 move $a0, $s0 jirl $ra, $a2, 0 ori $a0, $zero, 568 @@ -1971,7 +1965,7 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev move $s1, $a0 .Ltmp81: # EH_LABEL vldi $vr0, -1168 - addi.d $a3, $sp, 112 + addi.d $a3, $sp, 128 move $a1, $zero move $a2, $s0 pcaddu18i $ra, %call36(_ZN11btRigidBodyC1EfP13btMotionStateP16btCollisionShapeRK9btVector3) @@ -1980,28 +1974,28 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev # %bb.7: # %_ZN15DemoApplication20localCreateRigidBodyEfRK11btTransformP16btCollisionShape.exit.1 # in Loop: Header=BB8_5 Depth=2 st.w $s3, $s1, 8 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vld $vr0, $sp, 96 # 16-byte Folded Reload vst $vr0, $s1, 12 st.w $s3, $s1, 28 vst $vr0, $s1, 32 st.d $s3, $s1, 48 - fld.s $fa0, $sp, 40 # 4-byte Folded Reload + fld.s $fa0, $sp, 56 # 4-byte Folded Reload fst.s $fa0, $s1, 56 - vld $vr0, $sp, 64 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs3, $s1, 64 + fst.s $fs5, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $fp, 8 ld.d $a1, $a0, 0 ld.d $a2, $a1, 136 move $a1, $s1 jirl $ra, $a2, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 128 ld.d $a0, $s0, 0 ld.d $a2, $a0, 64 vldi $vr0, -1168 - addi.d $a1, $sp, 112 + addi.d $a1, $sp, 128 move $a0, $s0 jirl $ra, $a2, 0 ori $a0, $zero, 568 @@ -2011,7 +2005,7 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev move $s1, $a0 .Ltmp83: # EH_LABEL vldi $vr0, -1168 - addi.d $a3, $sp, 112 + addi.d $a3, $sp, 128 move $a1, $zero move $a2, $s0 pcaddu18i $ra, %call36(_ZN11btRigidBodyC1EfP13btMotionStateP16btCollisionShapeRK9btVector3) @@ -2020,27 +2014,27 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev # %bb.8: # %_ZN15DemoApplication20localCreateRigidBodyEfRK11btTransformP16btCollisionShape.exit.2 # in Loop: Header=BB8_5 Depth=2 st.w $s3, $s1, 8 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vld $vr0, $sp, 96 # 16-byte Folded Reload vst $vr0, $s1, 12 st.w $s3, $s1, 28 vst $vr0, $s1, 32 st.d $s3, $s1, 48 - fst.s $fs4, $s1, 56 - vld $vr0, $sp, 64 # 16-byte Folded Reload + fst.s $fs6, $s1, 56 + vld $vr0, $sp, 80 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs3, $s1, 64 + fst.s $fs5, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $fp, 8 ld.d $a1, $a0, 0 ld.d $a2, $a1, 136 move $a1, $s1 jirl $ra, $a2, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 128 ld.d $a0, $s0, 0 ld.d $a2, $a0, 64 vldi $vr0, -1168 - addi.d $a1, $sp, 112 + addi.d $a1, $sp, 128 move $a0, $s0 jirl $ra, $a2, 0 ori $a0, $zero, 568 @@ -2050,7 +2044,7 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev move $s1, $a0 .Ltmp85: # EH_LABEL vldi $vr0, -1168 - addi.d $a3, $sp, 112 + addi.d $a3, $sp, 128 move $a1, $zero move $a2, $s0 pcaddu18i $ra, %call36(_ZN11btRigidBodyC1EfP13btMotionStateP16btCollisionShapeRK9btVector3) @@ -2059,27 +2053,27 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev # %bb.9: # %_ZN15DemoApplication20localCreateRigidBodyEfRK11btTransformP16btCollisionShape.exit.3 # in Loop: Header=BB8_5 Depth=2 st.w $s3, $s1, 8 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vld $vr0, $sp, 96 # 16-byte Folded Reload vst $vr0, $s1, 12 st.w $s3, $s1, 28 vst $vr0, $s1, 32 st.d $s3, $s1, 48 - fst.s $fs5, $s1, 56 - vld $vr0, $sp, 64 # 16-byte Folded Reload + fst.s $fs7, $s1, 56 + vld $vr0, $sp, 80 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs3, $s1, 64 + fst.s $fs5, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $fp, 8 ld.d $a1, $a0, 0 ld.d $a2, $a1, 136 move $a1, $s1 jirl $ra, $a2, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 128 ld.d $a0, $s0, 0 ld.d $a2, $a0, 64 vldi $vr0, -1168 - addi.d $a1, $sp, 112 + addi.d $a1, $sp, 128 move $a0, $s0 jirl $ra, $a2, 0 ori $a0, $zero, 568 @@ -2089,7 +2083,7 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev move $s1, $a0 .Ltmp87: # EH_LABEL vldi $vr0, -1168 - addi.d $a3, $sp, 112 + addi.d $a3, $sp, 128 move $a1, $zero move $a2, $s0 pcaddu18i $ra, %call36(_ZN11btRigidBodyC1EfP13btMotionStateP16btCollisionShapeRK9btVector3) @@ -2098,27 +2092,27 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev # %bb.10: # %_ZN15DemoApplication20localCreateRigidBodyEfRK11btTransformP16btCollisionShape.exit.4 # in Loop: Header=BB8_5 Depth=2 st.w $s3, $s1, 8 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vld $vr0, $sp, 96 # 16-byte Folded Reload vst $vr0, $s1, 12 st.w $s3, $s1, 28 vst $vr0, $s1, 32 st.d $s3, $s1, 48 - fst.s $fs6, $s1, 56 - vld $vr0, $sp, 64 # 16-byte Folded Reload + fst.s $fs0, $s1, 56 + vld $vr0, $sp, 80 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs3, $s1, 64 + fst.s $fs5, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $fp, 8 ld.d $a1, $a0, 0 ld.d $a2, $a1, 136 move $a1, $s1 jirl $ra, $a2, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 128 ld.d $a0, $s0, 0 ld.d $a2, $a0, 64 vldi $vr0, -1168 - addi.d $a1, $sp, 112 + addi.d $a1, $sp, 128 move $a0, $s0 jirl $ra, $a2, 0 ori $a0, $zero, 568 @@ -2128,7 +2122,7 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev move $s1, $a0 .Ltmp89: # EH_LABEL vldi $vr0, -1168 - addi.d $a3, $sp, 112 + addi.d $a3, $sp, 128 move $a1, $zero move $a2, $s0 pcaddu18i $ra, %call36(_ZN11btRigidBodyC1EfP13btMotionStateP16btCollisionShapeRK9btVector3) @@ -2137,27 +2131,27 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev # %bb.11: # %_ZN15DemoApplication20localCreateRigidBodyEfRK11btTransformP16btCollisionShape.exit.5 # in Loop: Header=BB8_5 Depth=2 st.w $s3, $s1, 8 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vld $vr0, $sp, 96 # 16-byte Folded Reload vst $vr0, $s1, 12 st.w $s3, $s1, 28 vst $vr0, $s1, 32 st.d $s3, $s1, 48 - fst.s $fs7, $s1, 56 - vld $vr0, $sp, 64 # 16-byte Folded Reload + fst.s $fs1, $s1, 56 + vld $vr0, $sp, 80 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs3, $s1, 64 + fst.s $fs5, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $fp, 8 ld.d $a1, $a0, 0 ld.d $a2, $a1, 136 move $a1, $s1 jirl $ra, $a2, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 128 ld.d $a0, $s0, 0 ld.d $a2, $a0, 64 vldi $vr0, -1168 - addi.d $a1, $sp, 112 + addi.d $a1, $sp, 128 move $a0, $s0 jirl $ra, $a2, 0 ori $a0, $zero, 568 @@ -2167,7 +2161,7 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev move $s1, $a0 .Ltmp91: # EH_LABEL vldi $vr0, -1168 - addi.d $a3, $sp, 112 + addi.d $a3, $sp, 128 move $a1, $zero move $a2, $s0 pcaddu18i $ra, %call36(_ZN11btRigidBodyC1EfP13btMotionStateP16btCollisionShapeRK9btVector3) @@ -2176,27 +2170,27 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev # %bb.12: # %_ZN15DemoApplication20localCreateRigidBodyEfRK11btTransformP16btCollisionShape.exit.6 # in Loop: Header=BB8_5 Depth=2 st.w $s3, $s1, 8 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vld $vr0, $sp, 96 # 16-byte Folded Reload vst $vr0, $s1, 12 st.w $s3, $s1, 28 vst $vr0, $s1, 32 st.d $s3, $s1, 48 - fst.s $fs0, $s1, 56 - vld $vr0, $sp, 64 # 16-byte Folded Reload + fst.s $fs2, $s1, 56 + vld $vr0, $sp, 80 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs3, $s1, 64 + fst.s $fs5, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $fp, 8 ld.d $a1, $a0, 0 ld.d $a2, $a1, 136 move $a1, $s1 jirl $ra, $a2, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload - vst $vr0, $sp, 112 + vld $vr0, $sp, 96 # 16-byte Folded Reload + vst $vr0, $sp, 128 ld.d $a0, $s0, 0 ld.d $a2, $a0, 64 vldi $vr0, -1168 - addi.d $a1, $sp, 112 + addi.d $a1, $sp, 128 move $a0, $s0 jirl $ra, $a2, 0 ori $a0, $zero, 568 @@ -2206,7 +2200,7 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev move $s1, $a0 .Ltmp93: # EH_LABEL vldi $vr0, -1168 - addi.d $a3, $sp, 112 + addi.d $a3, $sp, 128 move $a1, $zero move $a2, $s0 pcaddu18i $ra, %call36(_ZN11btRigidBodyC1EfP13btMotionStateP16btCollisionShapeRK9btVector3) @@ -2215,15 +2209,15 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev # %bb.13: # %_ZN15DemoApplication20localCreateRigidBodyEfRK11btTransformP16btCollisionShape.exit.7 # in Loop: Header=BB8_5 Depth=2 st.w $s3, $s1, 8 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vld $vr0, $sp, 96 # 16-byte Folded Reload vst $vr0, $s1, 12 st.w $s3, $s1, 28 vst $vr0, $s1, 32 st.d $s3, $s1, 48 - fst.s $fs2, $s1, 56 - vld $vr0, $sp, 64 # 16-byte Folded Reload + fst.s $fs4, $s1, 56 + vld $vr0, $sp, 80 # 16-byte Folded Reload fst.s $fa0, $s1, 60 - fst.s $fs3, $s1, 64 + fst.s $fs5, $s1, 64 st.w $zero, $s1, 68 ld.d $a0, $fp, 8 ld.d $a1, $a0, 0 @@ -2233,42 +2227,40 @@ _ZN13BenchmarkDemo11createTest4Ev: # @_ZN13BenchmarkDemo11createTest4Ev addi.w $s6, $s6, 1 bne $s6, $s4, .LBB8_5 # %bb.14: # in Loop: Header=BB8_4 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI8_1) - fld.s $fa0, $a0, %pc_lo12(.LCPI8_1) - pcalau12i $a0, %pc_hi20(.LCPI8_2) - fld.s $fa1, $a0, %pc_lo12(.LCPI8_2) - vld $vr3, $sp, 16 # 16-byte Folded Reload + vld $vr3, $sp, 32 # 16-byte Folded Reload + fld.s $fa0, $sp, 24 # 4-byte Folded Reload fmul.s $fa0, $fa3, $fa0 - vld $vr2, $sp, 48 # 16-byte Folded Reload + vld $vr2, $sp, 64 # 16-byte Folded Reload vldi $vr4, -1252 fmadd.s $fa2, $fa0, $fa4, $fa2 - fmul.s $fa3, $fa3, $fa1 + fld.s $fa0, $sp, 20 # 4-byte Folded Reload + fmul.s $fa3, $fa3, $fa0 vldi $vr1, -1272 fadd.s $fa0, $fa3, $fa1 addi.w $s2, $s2, 1 - vld $vr5, $sp, 64 # 16-byte Folded Reload + vld $vr5, $sp, 80 # 16-byte Folded Reload fadd.s $fa5, $fa5, $fa0 vori.b $vr0, $vr5, 0 bne $s2, $s5, .LBB8_4 # %bb.15: - fld.d $fs7, $sp, 136 # 8-byte Folded Reload - fld.d $fs6, $sp, 144 # 8-byte Folded Reload - fld.d $fs5, $sp, 152 # 8-byte Folded Reload - fld.d $fs4, $sp, 160 # 8-byte Folded Reload - fld.d $fs3, $sp, 168 # 8-byte Folded Reload - fld.d $fs2, $sp, 176 # 8-byte Folded Reload - fld.d $fs1, $sp, 184 # 8-byte Folded Reload - fld.d $fs0, $sp, 192 # 8-byte Folded Reload - ld.d $s6, $sp, 200 # 8-byte Folded Reload - ld.d $s5, $sp, 208 # 8-byte Folded Reload - ld.d $s4, $sp, 216 # 8-byte Folded Reload - ld.d $s3, $sp, 224 # 8-byte Folded Reload - ld.d $s2, $sp, 232 # 8-byte Folded Reload - ld.d $s1, $sp, 240 # 8-byte Folded Reload - ld.d $s0, $sp, 248 # 8-byte Folded Reload - ld.d $fp, $sp, 256 # 8-byte Folded Reload - ld.d $ra, $sp, 264 # 8-byte Folded Reload - addi.d $sp, $sp, 272 + fld.d $fs7, $sp, 152 # 8-byte Folded Reload + fld.d $fs6, $sp, 160 # 8-byte Folded Reload + fld.d $fs5, $sp, 168 # 8-byte Folded Reload + fld.d $fs4, $sp, 176 # 8-byte Folded Reload + fld.d $fs3, $sp, 184 # 8-byte Folded Reload + fld.d $fs2, $sp, 192 # 8-byte Folded Reload + fld.d $fs1, $sp, 200 # 8-byte Folded Reload + fld.d $fs0, $sp, 208 # 8-byte Folded Reload + ld.d $s6, $sp, 216 # 8-byte Folded Reload + ld.d $s5, $sp, 224 # 8-byte Folded Reload + ld.d $s4, $sp, 232 # 8-byte Folded Reload + ld.d $s3, $sp, 240 # 8-byte Folded Reload + ld.d $s2, $sp, 248 # 8-byte Folded Reload + ld.d $s1, $sp, 256 # 8-byte Folded Reload + ld.d $s0, $sp, 264 # 8-byte Folded Reload + ld.d $fp, $sp, 272 # 8-byte Folded Reload + ld.d $ra, $sp, 280 # 8-byte Folded Reload + addi.d $sp, $sp, 288 ret .LBB8_16: .Ltmp75: # EH_LABEL @@ -2415,14 +2407,8 @@ GCC_except_table8: .Lttbase2: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN13BenchmarkDemo11createTest5Ev -.LCPI9_0: - .word 0xbd4ccccd # float -0.0500000007 -.LCPI9_1: - .word 0x3f8ccccd # float 1.10000002 .text - .globl _ZN13BenchmarkDemo11createTest5Ev + .globl _ZN13BenchmarkDemo11createTest5Ev # -- Begin function _ZN13BenchmarkDemo11createTest5Ev .p2align 5 .type _ZN13BenchmarkDemo11createTest5Ev,@function _ZN13BenchmarkDemo11createTest5Ev: # @_ZN13BenchmarkDemo11createTest5Ev @@ -2483,10 +2469,13 @@ _ZN13BenchmarkDemo11createTest5Ev: # @_ZN13BenchmarkDemo11createTest5Ev lu12i.w $a1, 233016 ori $s3, $a1, 3641 ori $s4, $zero, 8 - pcalau12i $a1, %pc_hi20(.LCPI9_0) - fld.s $fs4, $a1, %pc_lo12(.LCPI9_0) - pcalau12i $a1, %pc_hi20(.LCPI9_1) - fld.s $fs5, $a1, %pc_lo12(.LCPI9_1) + lu12i.w $a1, -273204 + ori $a1, $a1, 3277 + lu32i.d $a1, 0 + movgr2fr.w $fs4, $a1 + lu12i.w $a1, 260300 + ori $a1, $a1, 3277 + movgr2fr.w $fs5, $a1 addi.d $a0, $a0, 16 st.d $a0, $sp, 64 # 8-byte Folded Spill vrepli.b $vr1, 0 @@ -3037,14 +3026,8 @@ GCC_except_table9: .Lttbase3: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN13BenchmarkDemo11createTest6Ev -.LCPI10_0: - .word 0xbd4ccccd # float -0.0500000007 -.LCPI10_1: - .word 0x3f8ccccd # float 1.10000002 .text - .globl _ZN13BenchmarkDemo11createTest6Ev + .globl _ZN13BenchmarkDemo11createTest6Ev # -- Begin function _ZN13BenchmarkDemo11createTest6Ev .p2align 5 .type _ZN13BenchmarkDemo11createTest6Ev,@function _ZN13BenchmarkDemo11createTest6Ev: # @_ZN13BenchmarkDemo11createTest6Ev @@ -3135,11 +3118,14 @@ _ZN13BenchmarkDemo11createTest6Ev: # @_ZN13BenchmarkDemo11createTest6Ev vst $vr2, $sp, 48 # 16-byte Folded Spill vldi $vr4, -1272 movgr2fr.w $fs0, $zero - pcalau12i $a0, %pc_hi20(.LCPI10_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI10_0) - pcalau12i $a0, %pc_hi20(.LCPI10_1) - fld.s $fs2, $a0, %pc_lo12(.LCPI10_1) lu12i.w $s3, 260096 + lu12i.w $a0, -273204 + ori $a0, $a0, 3277 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, 260300 + ori $a0, $a0, 3277 + movgr2fr.w $fs2, $a0 ori $s4, $zero, 10 .p2align 4, , 16 .LBB10_4: # %.preheader @@ -3350,14 +3336,8 @@ GCC_except_table10: .Lttbase4: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN13BenchmarkDemo11createTest7Ev -.LCPI11_0: - .word 0x451c4000 # float 2500 -.LCPI11_1: - .word 0x42480000 # float 50 .text - .globl _ZN13BenchmarkDemo11createTest7Ev + .globl _ZN13BenchmarkDemo11createTest7Ev # -- Begin function _ZN13BenchmarkDemo11createTest7Ev .p2align 5 .type _ZN13BenchmarkDemo11createTest7Ev,@function _ZN13BenchmarkDemo11createTest7Ev: # @_ZN13BenchmarkDemo11createTest7Ev @@ -3373,10 +3353,10 @@ _ZN13BenchmarkDemo11createTest7Ev: # @_ZN13BenchmarkDemo11createTest7Ev .cfi_def_cfa_offset 40080 pcaddu18i $ra, %call36(_ZN13BenchmarkDemo11createTest6Ev) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI11_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI11_0) - pcalau12i $a0, %pc_hi20(.LCPI11_1) - fld.s $fa2, $a0, %pc_lo12(.LCPI11_1) + lu12i.w $a0, 283076 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, 271488 + movgr2fr.w $fa2, $a0 movgr2fr.w $fa1, $zero addi.d $a0, $sp, 8 pcaddu18i $ra, %call36(_ZN13btRaycastBar2C2Efff) @@ -4315,14 +4295,7 @@ _Z6rotateRK12btQuaternionRK9btVector3: # @_Z6rotateRK12btQuaternionRK9btVector3 .Lfunc_end15: .size _Z6rotateRK12btQuaternionRK9btVector3, .Lfunc_end15-_Z6rotateRK12btQuaternionRK9btVector3 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN13BenchmarkDemo17createTowerCircleERK9btVector3iiS2_ -.LCPI16_0: - .word 0x3fa66666 # float 1.29999995 -.LCPI16_1: - .word 0x40490fdb # float 3.14159274 - .text - .globl _ZN13BenchmarkDemo17createTowerCircleERK9btVector3iiS2_ + .globl _ZN13BenchmarkDemo17createTowerCircleERK9btVector3iiS2_ # -- Begin function _ZN13BenchmarkDemo17createTowerCircleERK9btVector3iiS2_ .p2align 5 .type _ZN13BenchmarkDemo17createTowerCircleERK9btVector3iiS2_,@function _ZN13BenchmarkDemo17createTowerCircleERK9btVector3iiS2_: # @_ZN13BenchmarkDemo17createTowerCircleERK9btVector3iiS2_ @@ -4441,26 +4414,28 @@ _ZN13BenchmarkDemo17createTowerCircleERK9btVector3iiS2_: # @_ZN13BenchmarkDemo17 jirl $ra, $a2, 0 blez $s0, .LBB16_12 # %bb.5: # %.preheader.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI16_1) - fld.s $fa0, $a0, %pc_lo12(.LCPI16_1) - movgr2fr.w $fa1, $s2 - ffint.s.w $fa2, $fa1 - vldi $vr1, -1184 - fdiv.s $fa3, $fa0, $fa2 - fmul.s $fs1, $fa3, $fa1 + movgr2fr.w $fa0, $s2 + ffint.s.w $fa0, $fa0 + lu12i.w $a0, 263312 + ori $a0, $a0, 4059 + movgr2fr.w $fa1, $a0 + vldi $vr2, -1184 + fdiv.s $fa3, $fa1, $fa0 + fmul.s $fs1, $fa3, $fa2 blez $s2, .LBB16_11 # %bb.6: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI16_0) - fld.s $fa3, $a0, %pc_lo12(.LCPI16_0) - fld.s $fa4, $s1, 0 move $s6, $zero - fmul.s $fa3, $fa2, $fa3 - fmul.s $fa3, $fa3, $fa4 - fdiv.s $fa3, $fa3, $fa0 + lu12i.w $a0, 260710 + fld.s $fa3, $s1, 0 + ori $a0, $a0, 1638 + movgr2fr.w $fa4, $a0 + fmul.s $fa4, $fa0, $fa4 + fmul.s $fa3, $fa4, $fa3 + fdiv.s $fa3, $fa3, $fa1 fst.s $fa3, $sp, 24 # 4-byte Folded Spill - fmul.s $fa2, $fa2, $fa1 - fdiv.s $fa0, $fa0, $fa2 - fmul.s $fa0, $fa0, $fa1 + fmul.s $fa0, $fa0, $fa2 + fdiv.s $fa0, $fa1, $fa0 + fmul.s $fa0, $fa0, $fa2 fst.s $fa0, $sp, 20 # 4-byte Folded Spill fld.s $fa0, $s1, 4 fst.s $fa0, $sp, 56 # 4-byte Folded Spill @@ -4834,154 +4809,94 @@ GCC_except_table16: .Lttbase8: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f -.LCPI17_0: - .word 0x3e19999a # float 0.150000006 -.LCPI17_1: - .word 0x3e4ccccd # float 0.200000003 -.LCPI17_2: - .word 0x3e8f5c29 # float 0.280000001 -.LCPI17_3: - .word 0x3dcccccd # float 0.100000001 -.LCPI17_4: - .word 0x3d4ccccd # float 0.0500000007 -.LCPI17_5: - .word 0x3d8f5c29 # float 0.0700000003 -.LCPI17_6: - .word 0x3ee66666 # float 0.449999988 -.LCPI17_7: - .word 0x3ebd70a4 # float 0.370000005 -.LCPI17_8: - .word 0x3ea8f5c3 # float 0.330000013 -.LCPI17_9: - .word 0x3d23d70a # float 0.0399999991 -.LCPI17_10: - .word 0x3f99999a # float 1.20000005 -.LCPI17_11: - .word 0x3fcccccd # float 1.60000002 -.LCPI17_12: - .word 0xbe3851ec # float -0.180000007 -.LCPI17_13: - .word 0x3f266666 # float 0.649999976 -.LCPI17_14: - .word 0x3e3851ec # float 0.180000007 -.LCPI17_15: - .word 0xbeb33333 # float -0.349999994 -.LCPI17_16: - .word 0x3fb9999a # float 1.45000005 -.LCPI17_18: - .word 0xbf333333 # float -0.699999988 -.LCPI17_19: - .word 0x3eb33333 # float 0.349999994 -.LCPI17_23: - .word 0x3f333333 # float 0.699999988 -.LCPI17_24: - .word 0x3f59999a # float 0.850000023 -.LCPI17_28: - .word 0xbe19999a # float -0.150000006 -.LCPI17_31: - .word 0x3e99999a # float 0.300000012 -.LCPI17_32: - .word 0xbe0f5c29 # float -0.140000001 -.LCPI17_35: - .word 0xbdcccccd # float -0.100000001 -.LCPI17_36: - .word 0x3e666666 # float 0.224999994 -.LCPI17_37: - .word 0xbe666666 # float -0.224999994 -.LCPI17_38: - .word 0x3e3d70a4 # float 0.185000002 -.LCPI17_44: - .word 0xbe4ccccd # float -0.200000003 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI17_17: + .p2align 4, 0x0 # -- Begin function _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f +.LCPI17_0: .word 0xb33bbd2e # float -4.37113883E-8 .word 0xbf800000 # float -1 .word 0x00000000 # float 0 .word 0x00000000 # float 0 -.LCPI17_20: +.LCPI17_1: .word 0xb33bbd2e # float -4.37113883E-8 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 .word 0x00000000 # float 0 -.LCPI17_21: +.LCPI17_2: .word 0xbf800000 # float -1 .word 0xb33bbd2e # float -4.37113883E-8 .word 0x00000000 # float 0 .word 0x00000000 # float 0 -.LCPI17_22: +.LCPI17_3: .word 0x80000000 # float -0 .word 0x00000000 # float 0 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 -.LCPI17_25: +.LCPI17_4: .word 0xb33bbd2e # float -4.37113883E-8 .word 0x00000000 # float 0 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 -.LCPI17_26: +.LCPI17_5: .word 0x80000000 # float -0 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 .word 0x00000000 # float 0 -.LCPI17_27: +.LCPI17_6: .word 0xbf800000 # float -1 .word 0x80000000 # float -0 .word 0xb33bbd2e # float -4.37113883E-8 .word 0x00000000 # float 0 -.LCPI17_29: +.LCPI17_7: .word 0x3f666666 # float 0.899999976 .word 0x3e99999a # float 0.300000012 .word 0x3f800000 # float 1 .word 0xbf490fdb # float -0.785398185 -.LCPI17_30: +.LCPI17_8: .word 0x3f800000 # float 1 .word 0xb33bbd2e # float -4.37113883E-8 .word 0x00000000 # float 0 .word 0x00000000 # float 0 -.LCPI17_33: +.LCPI17_9: .word 0xbf3504f4 # float -0.707106829 .word 0xbf3504f2 # float -0.707106709 .word 0x00000000 # float 0 .word 0x00000000 # float 0 -.LCPI17_34: +.LCPI17_10: .word 0x3f3504f2 # float 0.707106709 .word 0xbf3504f4 # float -0.707106829 .word 0x00000000 # float 0 .word 0x00000000 # float 0 -.LCPI17_39: +.LCPI17_11: .word 0x3f666666 # float 0.899999976 .word 0x3e99999a # float 0.300000012 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 -.LCPI17_40: +.LCPI17_12: .word 0x3f3504f3 # float 0.707106769 .word 0xbf3504f3 # float -0.707106769 .word 0x00000000 # float 0 .word 0x00000000 # float 0 -.LCPI17_41: +.LCPI17_13: .word 0x3f3504f3 # float 0.707106769 .word 0x3f3504f3 # float 0.707106769 .word 0x00000000 # float 0 .word 0x00000000 # float 0 -.LCPI17_42: +.LCPI17_14: .word 0xbf800000 # float -1 .word 0x33bbbd2e # float 8.74227765E-8 .word 0x80000000 # float -0 .word 0x00000000 # float 0 -.LCPI17_43: +.LCPI17_15: .word 0xb3bbbd2e # float -8.74227765E-8 .word 0xbf800000 # float -1 .word 0x00000000 # float 0 .word 0x00000000 # float 0 -.LCPI17_45: +.LCPI17_16: .word 0x3f666666 # float 0.899999976 .word 0x3e99999a # float 0.300000012 .word 0x3f800000 # float 1 .word 0xbfc90fdb # float -1.57079637 -.LCPI17_46: +.LCPI17_17: .word 0x3f800000 # float 1 .word 0x00000000 # float 0 .word 0x00000000 # float 0 @@ -5047,14 +4962,17 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor ori $a1, $zero, 16 pcaddu18i $ra, %call36(_Z22btAlignedAllocInternalmi) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_0) - pcalau12i $a1, %pc_hi20(.LCPI17_1) - fld.s $fa1, $a1, %pc_lo12(.LCPI17_1) move $s1, $a0 + lu12i.w $a0, 254361 + ori $a0, $a0, 2458 + movgr2fr.w $fa0, $a0 fmul.s $fs2, $fs1, $fa0 - fmul.s $fs0, $fs1, $fa1 + lu12i.w $a0, 255180 + ori $a0, $a0, 3277 + movgr2fr.w $fa0, $a0 + fmul.s $fs0, $fs1, $fa0 .Ltmp235: # EH_LABEL + move $a0, $s1 fmov.s $fa0, $fs2 fmov.s $fa1, $fs0 pcaddu18i $ra, %call36(_ZN14btCapsuleShapeC1Eff) @@ -5066,11 +4984,13 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor ori $a1, $zero, 16 pcaddu18i $ra, %call36(_Z22btAlignedAllocInternalmi) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_2) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_2) move $s1, $a0 + lu12i.w $a0, 256245 + ori $a0, $a0, 3113 + movgr2fr.w $fa0, $a0 fmul.s $fa1, $fs1, $fa0 .Ltmp240: # EH_LABEL + move $a0, $s1 fmov.s $fa0, $fs2 pcaddu18i $ra, %call36(_ZN14btCapsuleShapeC1Eff) jirl $ra, $ra, 0 @@ -5081,14 +5001,17 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor ori $a1, $zero, 16 pcaddu18i $ra, %call36(_Z22btAlignedAllocInternalmi) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_3) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_3) - pcalau12i $a1, %pc_hi20(.LCPI17_4) - fld.s $fs3, $a1, %pc_lo12(.LCPI17_4) move $s1, $a0 + lu12i.w $a0, 253132 + ori $a0, $a0, 3277 + movgr2fr.w $fa0, $a0 fmul.s $fa0, $fs1, $fa0 + lu12i.w $a0, 251084 + ori $a0, $a0, 3277 + movgr2fr.w $fs3, $a0 fmul.s $fs4, $fs1, $fs3 .Ltmp245: # EH_LABEL + move $a0, $s1 fmov.s $fa1, $fs4 pcaddu18i $ra, %call36(_ZN14btCapsuleShapeC1Eff) jirl $ra, $ra, 0 @@ -5099,14 +5022,17 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor ori $a1, $zero, 16 pcaddu18i $ra, %call36(_Z22btAlignedAllocInternalmi) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_5) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_5) - pcalau12i $a1, %pc_hi20(.LCPI17_6) - fld.s $fa1, $a1, %pc_lo12(.LCPI17_6) move $s1, $a0 + lu12i.w $a0, 252149 + ori $a0, $a0, 3113 + movgr2fr.w $fa0, $a0 fmul.s $fs5, $fs1, $fa0 - fmul.s $fs6, $fs1, $fa1 + lu12i.w $a0, 257638 + ori $a0, $a0, 1638 + movgr2fr.w $fa0, $a0 + fmul.s $fs6, $fs1, $fa0 .Ltmp250: # EH_LABEL + move $a0, $s1 fmov.s $fa0, $fs5 fmov.s $fa1, $fs6 pcaddu18i $ra, %call36(_ZN14btCapsuleShapeC1Eff) @@ -5118,11 +5044,13 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor ori $a1, $zero, 16 pcaddu18i $ra, %call36(_Z22btAlignedAllocInternalmi) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_7) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_7) move $s1, $a0 + lu12i.w $a0, 256983 + ori $a0, $a0, 164 + movgr2fr.w $fa0, $a0 fmul.s $fs7, $fs1, $fa0 .Ltmp255: # EH_LABEL + move $a0, $s1 fmov.s $fa0, $fs4 fmov.s $fa1, $fs7 pcaddu18i $ra, %call36(_ZN14btCapsuleShapeC1Eff) @@ -5160,11 +5088,13 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor ori $a1, $zero, 16 pcaddu18i $ra, %call36(_Z22btAlignedAllocInternalmi) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_8) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_8) move $s1, $a0 + lu12i.w $a0, 256655 + ori $a0, $a0, 1475 + movgr2fr.w $fa0, $a0 fmul.s $fs5, $fs1, $fa0 .Ltmp270: # EH_LABEL + move $a0, $s1 fmov.s $fa0, $fs4 fmov.s $fa1, $fs5 pcaddu18i $ra, %call36(_ZN14btCapsuleShapeC1Eff) @@ -5176,13 +5106,15 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor ori $a1, $zero, 16 pcaddu18i $ra, %call36(_Z22btAlignedAllocInternalmi) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_9) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_9) move $s1, $a0 + lu12i.w $a0, 250429 + ori $a0, $a0, 1802 + movgr2fr.w $fa0, $a0 fmul.s $fs6, $fs1, $fa0 vldi $vr0, -1200 fmul.s $fs7, $fs1, $fa0 .Ltmp275: # EH_LABEL + move $a0, $s1 fmov.s $fa0, $fs6 fmov.s $fa1, $fs7 pcaddu18i $ra, %call36(_ZN14btCapsuleShapeC1Eff) @@ -5254,9 +5186,10 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor move $a0, $fp pcaddu18i $ra, %call36(_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_10) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_10) st.d $a0, $fp, 104 + lu12i.w $a0, 260505 + ori $a0, $a0, 2458 + movgr2fr.w $fa0, $a0 fmul.s $fa0, $fs1, $fa0 fmul.s $fa1, $fa0, $fs2 fadd.s $fa2, $fs7, $fa1 @@ -5289,9 +5222,10 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor move $a0, $fp pcaddu18i $ra, %call36(_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_11) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_11) st.d $a0, $fp, 112 + lu12i.w $a0, 261324 + ori $s0, $a0, 3277 + movgr2fr.w $fa0, $s0 fmul.s $fa0, $fs1, $fa0 fmul.s $fa1, $fa0, $fs2 fadd.s $fa2, $fs7, $fa1 @@ -5323,13 +5257,16 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor move $a0, $fp pcaddu18i $ra, %call36(_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_12) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_12) - pcalau12i $a1, %pc_hi20(.LCPI17_13) - fld.s $fa1, $a1, %pc_lo12(.LCPI17_13) st.d $a0, $fp, 120 + lu12i.w $a0, -269435 + ori $a0, $a0, 492 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 fmul.s $fs0, $fs1, $fa0 - fmul.s $fa1, $fs1, $fa1 + lu12i.w $a0, 258662 + ori $a0, $a0, 1638 + movgr2fr.w $fa0, $a0 + fmul.s $fa1, $fs1, $fa0 fst.s $fa1, $sp, 176 # 4-byte Folded Spill fst.s $fs1, $sp, 192 # 4-byte Folded Spill fmul.s $fs3, $fa1, $fs2 @@ -5396,9 +5333,10 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor move $a0, $fp pcaddu18i $ra, %call36(_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_14) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_14) st.d $a0, $fp, 136 + lu12i.w $a0, 254853 + ori $a0, $a0, 492 + movgr2fr.w $fa0, $a0 fld.s $fa1, $sp, 192 # 4-byte Folded Reload fmul.s $fs0, $fa1, $fa0 fadd.s $fa0, $fs0, $fs3 @@ -5464,13 +5402,16 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor move $a0, $fp pcaddu18i $ra, %call36(_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_15) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_15) - pcalau12i $a1, %pc_hi20(.LCPI17_16) - fld.s $fa1, $a1, %pc_lo12(.LCPI17_16) st.d $a0, $fp, 152 - fld.s $fa2, $sp, 192 # 4-byte Folded Reload - fmul.s $fa0, $fa2, $fa0 + lu12i.w $a0, -267469 + ori $a0, $a0, 819 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 + fld.s $fa1, $sp, 192 # 4-byte Folded Reload + fmul.s $fa0, $fa1, $fa0 + lu12i.w $a0, 261017 + ori $a0, $a0, 2458 + movgr2fr.w $fa1, $a0 fld.s $fa2, $sp, 192 # 4-byte Folded Reload fmul.s $fs3, $fa2, $fa1 fmul.s $fs6, $fs3, $fs2 @@ -5485,8 +5426,8 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor fadd.s $fa0, $fa0, $fs0 movfr2gr.s $a0, $fa1 movfr2gr.s $a1, $fa2 - pcalau12i $a2, %pc_hi20(.LCPI17_17) - vld $vr1, $a2, %pc_lo12(.LCPI17_17) + pcalau12i $a2, %pc_hi20(.LCPI17_0) + vld $vr1, $a2, %pc_lo12(.LCPI17_0) vst $vr1, $sp, 160 # 16-byte Folded Spill bstrins.d $a0, $a1, 63, 32 movfr2gr.s $a1, $fa0 @@ -5494,8 +5435,8 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor vst $vr1, $sp, 288 move $a2, $s1 lu32i.d $a2, -279250 - lu52i.d $s0, $a2, -1229 - st.d $s0, $sp, 304 + lu52i.d $s2, $a2, -1229 + st.d $s2, $sp, 304 st.d $a0, $sp, 336 ld.d $a2, $fp, 72 vld $vr0, $sp, 208 # 16-byte Folded Reload @@ -5507,9 +5448,11 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor move $a0, $fp pcaddu18i $ra, %call36(_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_18) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_18) st.d $a0, $fp, 160 + lu12i.w $a0, -265421 + ori $a0, $a0, 819 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 fld.s $fa1, $sp, 192 # 4-byte Folded Reload fmul.s $fa0, $fa1, $fa0 fadd.s $fa1, $fa0, $fs6 @@ -5528,7 +5471,7 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor bstrpick.d $a1, $a1, 31, 0 vld $vr0, $sp, 160 # 16-byte Folded Reload vst $vr0, $sp, 288 - st.d $s0, $sp, 304 + st.d $s2, $sp, 304 vld $vr0, $sp, 208 # 16-byte Folded Reload vst $vr0, $sp, 312 ld.d $a2, $fp, 80 @@ -5540,9 +5483,10 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor move $a0, $fp pcaddu18i $ra, %call36(_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_19) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_19) st.d $a0, $fp, 168 + lu12i.w $a0, 256819 + ori $a0, $a0, 819 + movgr2fr.w $fa0, $a0 fld.s $fa1, $sp, 192 # 4-byte Folded Reload fmul.s $fa0, $fa1, $fa0 fadd.s $fa1, $fa0, $fs6 @@ -5559,14 +5503,14 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor bstrins.d $a0, $a1, 63, 32 movfr2gr.s $a1, $fa0 bstrpick.d $a1, $a1, 31, 0 - pcalau12i $a2, %pc_hi20(.LCPI17_20) - vld $vr2, $a2, %pc_lo12(.LCPI17_20) + pcalau12i $a2, %pc_hi20(.LCPI17_1) + vld $vr2, $a2, %pc_lo12(.LCPI17_1) vst $vr2, $sp, 80 # 16-byte Folded Spill - pcalau12i $a2, %pc_hi20(.LCPI17_21) - vld $vr0, $a2, %pc_lo12(.LCPI17_21) + pcalau12i $a2, %pc_hi20(.LCPI17_2) + vld $vr0, $a2, %pc_lo12(.LCPI17_2) vst $vr0, $sp, 96 # 16-byte Folded Spill - pcalau12i $a2, %pc_hi20(.LCPI17_22) - vld $vr1, $a2, %pc_lo12(.LCPI17_22) + pcalau12i $a2, %pc_hi20(.LCPI17_3) + vld $vr1, $a2, %pc_lo12(.LCPI17_3) vst $vr1, $sp, 176 # 16-byte Folded Spill vst $vr2, $sp, 288 vst $vr0, $sp, 304 @@ -5579,9 +5523,10 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor move $a0, $fp pcaddu18i $ra, %call36(_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_23) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_23) st.d $a0, $fp, 176 + lu12i.w $a0, 258867 + ori $a0, $a0, 819 + movgr2fr.w $fa0, $a0 fld.s $fa1, $sp, 192 # 4-byte Folded Reload fmul.s $fa0, $fa1, $fa0 fadd.s $fa1, $fa0, $fs6 @@ -5615,8 +5560,9 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor jirl $ra, $ra, 0 ld.d $a1, $fp, 104 st.d $a0, $fp, 184 - pcalau12i $a0, %pc_hi20(.LCPI17_24) - fld.s $fs4, $a0, %pc_lo12(.LCPI17_24) + lu12i.w $a0, 259481 + ori $a0, $a0, 2458 + movgr2fr.w $fs4, $a0 move $a0, $a1 fld.s $fs0, $sp, 112 # 4-byte Folded Reload fmov.s $fa0, $fs0 @@ -5625,99 +5571,97 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor jirl $ra, $ra, 0 ld.d $a0, $fp, 104 lu12i.w $a1, 259276 - ori $s0, $a1, 3277 - st.w $s0, $a0, 232 - lu12i.w $a1, 261324 - ori $a1, $a1, 3277 - lu52i.d $s2, $a1, 1026 - st.d $s2, $a0, 504 + ori $s2, $a1, 3277 + st.w $s2, $a0, 232 + lu52i.d $s0, $s0, 1026 + st.d $s0, $a0, 504 ld.d $a0, $fp, 112 fmov.s $fa0, $fs0 fmov.s $fa1, $fs4 pcaddu18i $ra, %call36(_ZN11btRigidBody10setDampingEff) jirl $ra, $ra, 0 ld.d $a0, $fp, 112 - st.w $s0, $a0, 232 - st.d $s2, $a0, 504 + st.w $s2, $a0, 232 + st.d $s0, $a0, 504 ld.d $a0, $fp, 120 fmov.s $fa0, $fs0 fmov.s $fa1, $fs4 pcaddu18i $ra, %call36(_ZN11btRigidBody10setDampingEff) jirl $ra, $ra, 0 ld.d $a0, $fp, 120 - st.w $s0, $a0, 232 - st.d $s2, $a0, 504 + st.w $s2, $a0, 232 + st.d $s0, $a0, 504 ld.d $a0, $fp, 128 fmov.s $fa0, $fs0 fmov.s $fa1, $fs4 pcaddu18i $ra, %call36(_ZN11btRigidBody10setDampingEff) jirl $ra, $ra, 0 ld.d $a0, $fp, 128 - st.w $s0, $a0, 232 - st.d $s2, $a0, 504 + st.w $s2, $a0, 232 + st.d $s0, $a0, 504 ld.d $a0, $fp, 136 fmov.s $fa0, $fs0 fmov.s $fa1, $fs4 pcaddu18i $ra, %call36(_ZN11btRigidBody10setDampingEff) jirl $ra, $ra, 0 ld.d $a0, $fp, 136 - st.w $s0, $a0, 232 - st.d $s2, $a0, 504 + st.w $s2, $a0, 232 + st.d $s0, $a0, 504 ld.d $a0, $fp, 144 fmov.s $fa0, $fs0 fmov.s $fa1, $fs4 pcaddu18i $ra, %call36(_ZN11btRigidBody10setDampingEff) jirl $ra, $ra, 0 ld.d $a0, $fp, 144 - st.w $s0, $a0, 232 - st.d $s2, $a0, 504 + st.w $s2, $a0, 232 + st.d $s0, $a0, 504 ld.d $a0, $fp, 152 fmov.s $fa0, $fs0 fmov.s $fa1, $fs4 pcaddu18i $ra, %call36(_ZN11btRigidBody10setDampingEff) jirl $ra, $ra, 0 ld.d $a0, $fp, 152 - st.w $s0, $a0, 232 - st.d $s2, $a0, 504 + st.w $s2, $a0, 232 + st.d $s0, $a0, 504 ld.d $a0, $fp, 160 fmov.s $fa0, $fs0 fmov.s $fa1, $fs4 pcaddu18i $ra, %call36(_ZN11btRigidBody10setDampingEff) jirl $ra, $ra, 0 ld.d $a0, $fp, 160 - st.w $s0, $a0, 232 - st.d $s2, $a0, 504 + st.w $s2, $a0, 232 + st.d $s0, $a0, 504 ld.d $a0, $fp, 168 fmov.s $fa0, $fs0 fmov.s $fa1, $fs4 pcaddu18i $ra, %call36(_ZN11btRigidBody10setDampingEff) jirl $ra, $ra, 0 ld.d $a0, $fp, 168 - st.w $s0, $a0, 232 - st.d $s2, $a0, 504 + st.w $s2, $a0, 232 + st.d $s0, $a0, 504 ld.d $a0, $fp, 176 fmov.s $fa0, $fs0 fmov.s $fa1, $fs4 pcaddu18i $ra, %call36(_ZN11btRigidBody10setDampingEff) jirl $ra, $ra, 0 ld.d $a0, $fp, 176 - st.w $s0, $a0, 232 - st.d $s2, $a0, 504 + st.w $s2, $a0, 232 + st.d $s0, $a0, 504 ld.d $a0, $fp, 184 fmov.s $fa0, $fs0 fmov.s $fa1, $fs4 pcaddu18i $ra, %call36(_ZN11btRigidBody10setDampingEff) jirl $ra, $ra, 0 ld.d $a0, $fp, 184 - st.w $s0, $a0, 232 - st.d $s2, $a0, 504 + st.w $s2, $a0, 232 + st.d $s0, $a0, 504 movfr2gr.s $a0, $fs7 - pcalau12i $a1, %pc_hi20(.LCPI17_25) - vld $vr1, $a1, %pc_lo12(.LCPI17_25) - pcalau12i $a1, %pc_hi20(.LCPI17_26) - vld $vr2, $a1, %pc_lo12(.LCPI17_26) - pcalau12i $a1, %pc_hi20(.LCPI17_27) - vld $vr3, $a1, %pc_lo12(.LCPI17_27) + pcalau12i $a1, %pc_hi20(.LCPI17_4) + vld $vr1, $a1, %pc_lo12(.LCPI17_4) + pcalau12i $a1, %pc_hi20(.LCPI17_5) + vld $vr2, $a1, %pc_lo12(.LCPI17_5) + pcalau12i $a1, %pc_hi20(.LCPI17_6) + vld $vr3, $a1, %pc_lo12(.LCPI17_6) bstrpick.d $s2, $a0, 31, 0 vst $vr1, $sp, 288 vst $vr2, $sp, 304 @@ -5728,14 +5672,16 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor bstrins.d $a1, $s4, 63, 32 st.d $a1, $sp, 336 st.d $s2, $sp, 344 - pcalau12i $a1, %pc_hi20(.LCPI17_28) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_28) vst $vr1, $sp, 144 # 16-byte Folded Spill vst $vr1, $sp, 224 vst $vr2, $sp, 128 # 16-byte Folded Spill vst $vr2, $sp, 240 vst $vr3, $sp, 112 # 16-byte Folded Spill vst $vr3, $sp, 256 + lu12i.w $a1, -269927 + ori $a1, $a1, 2458 + lu32i.d $a1, 0 + movgr2fr.w $fa0, $a1 fmul.s $fa0, $fs3, $fa0 movfr2gr.s $a1, $fa0 bstrins.d $a0, $a1, 63, 32 @@ -5756,8 +5702,8 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor .Ltmp292: # EH_LABEL # %bb.12: # %_ZN17btHingeConstraint8setLimitEfffff.exit lu12i.w $a0, 261264 - pcalau12i $a1, %pc_hi20(.LCPI17_29) - vld $vr0, $a1, %pc_lo12(.LCPI17_29) + pcalau12i $a1, %pc_hi20(.LCPI17_7) + vld $vr0, $a1, %pc_lo12(.LCPI17_7) ori $s3, $a0, 4059 ld.d $a0, $fp, 8 st.w $s3, $s0, 752 @@ -5768,27 +5714,30 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor ori $a2, $zero, 1 move $a1, $s0 jirl $ra, $a3, 0 - pcalau12i $a0, %pc_hi20(.LCPI17_30) - vld $vr1, $a0, %pc_lo12(.LCPI17_30) - pcalau12i $a0, %pc_hi20(.LCPI17_31) - fld.s $fa0, $a0, %pc_lo12(.LCPI17_31) + pcalau12i $a0, %pc_hi20(.LCPI17_8) + vld $vr1, $a0, %pc_lo12(.LCPI17_8) vld $vr2, $sp, 160 # 16-byte Folded Reload vst $vr2, $sp, 288 vst $vr1, $sp, 304 vld $vr3, $sp, 176 # 16-byte Folded Reload vst $vr3, $sp, 320 + lu12i.w $a0, 256409 + ori $a0, $a0, 2458 + movgr2fr.w $fa0, $a0 fmul.s $fa0, $fs3, $fa0 movfr2gr.s $a0, $fa0 slli.d $a0, $a0, 32 or $a0, $a0, $s2 st.d $a0, $sp, 336 st.d $s2, $sp, 344 - pcalau12i $a0, %pc_hi20(.LCPI17_32) - fld.s $fa0, $a0, %pc_lo12(.LCPI17_32) vst $vr2, $sp, 224 vst $vr1, $sp, 80 # 16-byte Folded Spill vst $vr1, $sp, 240 vst $vr3, $sp, 256 + lu12i.w $a0, -270091 + ori $a0, $a0, 3113 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 fmul.s $fa0, $fs3, $fa0 movfr2gr.s $a0, $fa0 slli.d $a0, $a0, 32 @@ -5830,16 +5779,18 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor ori $a2, $zero, 1 move $a1, $s0 jirl $ra, $a3, 0 - pcalau12i $a0, %pc_hi20(.LCPI17_33) - vld $vr0, $a0, %pc_lo12(.LCPI17_33) - pcalau12i $a0, %pc_hi20(.LCPI17_34) - vld $vr1, $a0, %pc_lo12(.LCPI17_34) - pcalau12i $a0, %pc_hi20(.LCPI17_35) - fld.s $fa2, $a0, %pc_lo12(.LCPI17_35) + pcalau12i $a0, %pc_hi20(.LCPI17_9) + vld $vr0, $a0, %pc_lo12(.LCPI17_9) + pcalau12i $a0, %pc_hi20(.LCPI17_10) + vld $vr1, $a0, %pc_lo12(.LCPI17_10) vst $vr0, $sp, 288 vst $vr1, $sp, 304 vld $vr3, $sp, 176 # 16-byte Folded Reload vst $vr3, $sp, 320 + lu12i.w $a0, -271156 + ori $a0, $a0, 3277 + lu32i.d $a0, 0 + movgr2fr.w $fa2, $a0 fmul.s $fa2, $fs3, $fa2 movfr2gr.s $a0, $fa2 slli.d $s4, $a0, 32 @@ -5847,12 +5798,13 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor or $a0, $s4, $s6 st.d $a0, $sp, 336 st.d $s2, $sp, 344 - pcalau12i $a0, %pc_hi20(.LCPI17_36) - fld.s $fa2, $a0, %pc_lo12(.LCPI17_36) vst $vr0, $sp, 224 vst $vr1, $sp, 240 vst $vr3, $sp, 256 - fmul.s $fa0, $fs3, $fa2 + lu12i.w $a0, 255590 + ori $a0, $a0, 1638 + movgr2fr.w $fa0, $a0 + fmul.s $fa0, $fs3, $fa0 movfr2gr.s $a0, $fa0 slli.d $a0, $a0, 32 or $s7, $a0, $s2 @@ -5883,25 +5835,28 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor ori $a2, $zero, 1 move $a1, $s0 jirl $ra, $a3, 0 - pcalau12i $a0, %pc_hi20(.LCPI17_37) - fld.s $fa0, $a0, %pc_lo12(.LCPI17_37) vld $vr1, $sp, 144 # 16-byte Folded Reload vst $vr1, $sp, 288 vld $vr2, $sp, 128 # 16-byte Folded Reload vst $vr2, $sp, 304 vld $vr3, $sp, 112 # 16-byte Folded Reload vst $vr3, $sp, 320 + lu12i.w $a0, -268698 + ori $a0, $a0, 1638 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 fmul.s $fa0, $fs3, $fa0 movfr2gr.s $a0, $fa0 slli.d $a0, $a0, 32 or $s5, $a0, $s2 st.d $s5, $sp, 336 st.d $s2, $sp, 344 - pcalau12i $a0, %pc_hi20(.LCPI17_38) - fld.s $fa0, $a0, %pc_lo12(.LCPI17_38) vst $vr1, $sp, 224 vst $vr2, $sp, 240 vst $vr3, $sp, 256 + lu12i.w $a0, 254935 + ori $a0, $a0, 164 + movgr2fr.w $fa0, $a0 fmul.s $fa0, $fs3, $fa0 movfr2gr.s $a0, $fa0 slli.d $a0, $a0, 32 @@ -5922,8 +5877,8 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor jirl $ra, $ra, 0 .Ltmp301: # EH_LABEL # %bb.15: # %_ZN17btHingeConstraint8setLimitEfffff.exit448 - pcalau12i $a0, %pc_hi20(.LCPI17_39) - vld $vr0, $a0, %pc_lo12(.LCPI17_39) + pcalau12i $a0, %pc_hi20(.LCPI17_11) + vld $vr0, $a0, %pc_lo12(.LCPI17_11) ld.d $a0, $fp, 8 st.w $s3, $s0, 752 vst $vr0, $sp, 16 # 16-byte Folded Spill @@ -5937,10 +5892,10 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor ori $a2, $zero, 1 move $a1, $s0 jirl $ra, $a3, 0 - pcalau12i $a0, %pc_hi20(.LCPI17_40) - vld $vr0, $a0, %pc_lo12(.LCPI17_40) - pcalau12i $a0, %pc_hi20(.LCPI17_41) - vld $vr1, $a0, %pc_lo12(.LCPI17_41) + pcalau12i $a0, %pc_hi20(.LCPI17_12) + vld $vr0, $a0, %pc_lo12(.LCPI17_12) + pcalau12i $a0, %pc_hi20(.LCPI17_13) + vld $vr1, $a0, %pc_lo12(.LCPI17_13) vst $vr0, $sp, 288 vst $vr1, $sp, 304 vld $vr2, $sp, 176 # 16-byte Folded Reload @@ -6019,17 +5974,19 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor ori $a2, $zero, 1 move $a1, $s0 jirl $ra, $a3, 0 - pcalau12i $a0, %pc_hi20(.LCPI17_42) - vld $vr0, $a0, %pc_lo12(.LCPI17_42) - pcalau12i $a0, %pc_hi20(.LCPI17_43) - vld $vr1, $a0, %pc_lo12(.LCPI17_43) - pcalau12i $a0, %pc_hi20(.LCPI17_44) - fld.s $fa2, $a0, %pc_lo12(.LCPI17_44) + pcalau12i $a0, %pc_hi20(.LCPI17_14) + vld $vr0, $a0, %pc_lo12(.LCPI17_14) + pcalau12i $a0, %pc_hi20(.LCPI17_15) + vld $vr1, $a0, %pc_lo12(.LCPI17_15) vst $vr0, $sp, 288 vst $vr1, $sp, 304 vld $vr1, $sp, 176 # 16-byte Folded Reload vst $vr1, $sp, 320 - fmul.s $fa0, $fs3, $fa2 + lu12i.w $a0, -269108 + ori $a0, $a0, 3277 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 + fmul.s $fa0, $fs3, $fa0 movfr2gr.s $a0, $fa0 bstrpick.d $a0, $a0, 31, 0 or $a0, $s8, $a0 @@ -6102,8 +6059,8 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor jirl $ra, $ra, 0 .Ltmp313: # EH_LABEL # %bb.19: # %_ZN17btHingeConstraint8setLimitEfffff.exit512 - pcalau12i $a0, %pc_hi20(.LCPI17_45) - vld $vr0, $a0, %pc_lo12(.LCPI17_45) + pcalau12i $a0, %pc_hi20(.LCPI17_16) + vld $vr0, $a0, %pc_lo12(.LCPI17_16) ld.d $a0, $fp, 8 st.w $zero, $s0, 752 vst $vr0, $sp, 192 # 16-byte Folded Spill @@ -6117,8 +6074,8 @@ _ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f: # @_ZN7RagDollC2EP15btDynamicsWor ori $a2, $zero, 1 move $a1, $s0 jirl $ra, $a3, 0 - pcalau12i $a0, %pc_hi20(.LCPI17_46) - vld $vr0, $a0, %pc_lo12(.LCPI17_46) + pcalau12i $a0, %pc_hi20(.LCPI17_17) + vld $vr0, $a0, %pc_lo12(.LCPI17_17) vld $vr1, $sp, 208 # 16-byte Folded Reload vst $vr1, $s7, 0 st.w $s1, $sp, 288 @@ -6895,14 +6852,8 @@ GCC_except_table18: .Lttbase10: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN13BenchmarkDemo8initRaysEv -.LCPI19_0: - .word 0x451c4000 # float 2500 -.LCPI19_1: - .word 0x42480000 # float 50 .text - .globl _ZN13BenchmarkDemo8initRaysEv + .globl _ZN13BenchmarkDemo8initRaysEv # -- Begin function _ZN13BenchmarkDemo8initRaysEv .p2align 5 .type _ZN13BenchmarkDemo8initRaysEv,@function _ZN13BenchmarkDemo8initRaysEv: # @_ZN13BenchmarkDemo8initRaysEv @@ -6916,10 +6867,10 @@ _ZN13BenchmarkDemo8initRaysEv: # @_ZN13BenchmarkDemo8initRaysEv ori $a0, $a0, 1184 sub.d $sp, $sp, $a0 .cfi_def_cfa_offset 40080 - pcalau12i $a0, %pc_hi20(.LCPI19_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI19_0) - pcalau12i $a0, %pc_hi20(.LCPI19_1) - fld.s $fa2, $a0, %pc_lo12(.LCPI19_1) + lu12i.w $a0, 283076 + movgr2fr.w $fa0, $a0 + lu12i.w $a0, 271488 + movgr2fr.w $fa2, $a0 movgr2fr.w $fa1, $zero addi.d $a0, $sp, 8 pcaddu18i $ra, %call36(_ZN13btRaycastBar2C2Efff) @@ -6941,13 +6892,9 @@ _ZN13BenchmarkDemo8initRaysEv: # @_ZN13BenchmarkDemo8initRaysEv .size _ZN13BenchmarkDemo8initRaysEv, .Lfunc_end19-_ZN13BenchmarkDemo8initRaysEv .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN13btRaycastBar2C2Efff -.LCPI20_0: - .word 0x3ccde32e # float 0.0251327418 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI20_1: + .p2align 4, 0x0 # -- Begin function _ZN13btRaycastBar2C2Efff +.LCPI20_0: .word 0x3f800000 # float 1 .word 0x00000000 # float 0 .word 0x00000000 # float 0 @@ -7016,14 +6963,15 @@ _ZN13btRaycastBar2C2Efff: # @_ZN13btRaycastBar2C2Efff lu12i.w $a0, 260096 st.w $a0, $s0, 56 ori $s2, $zero, 500 - pcalau12i $a0, %pc_hi20(.LCPI20_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI20_0) + lu12i.w $a0, 249054 + ori $a0, $a0, 814 + movgr2fr.w $fa0, $a0 fst.s $fa0, $sp, 36 # 4-byte Folded Spill movgr2fr.w $fs6, $zero fneg.s $fs7, $fs6 lu12i.w $s3, -243808 - pcalau12i $a0, %pc_hi20(.LCPI20_1) - vld $vr0, $a0, %pc_lo12(.LCPI20_1) + pcalau12i $a0, %pc_hi20(.LCPI20_0) + vld $vr0, $a0, %pc_lo12(.LCPI20_0) vst $vr0, $sp, 16 # 16-byte Folded Spill lu32i.d $s3, 0 lu12i.w $a0, 7 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBox2dBox2dCollisionAlgorithm.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBox2dBox2dCollisionAlgorithm.s index 45d40951..4406705d 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBox2dBox2dCollisionAlgorithm.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBox2dBox2dCollisionAlgorithm.s @@ -420,16 +420,7 @@ _ZN30btBox2dBox2dCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK1 .size _ZN30btBox2dBox2dCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult, .Lfunc_end4-_ZN30btBox2dBox2dCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z17b2CollidePolygonsP16btManifoldResultPK12btBox2dShapeRK11btTransformS3_S6_ -.LCPI5_0: - .word 0x3a83126f # float 0.00100000005 -.LCPI5_1: - .word 0x3f7ae148 # float 0.980000019 -.LCPI5_2: - .word 0x5d5e0b6b # float 9.99999984E+17 - .text - .globl _Z17b2CollidePolygonsP16btManifoldResultPK12btBox2dShapeRK11btTransformS3_S6_ + .globl _Z17b2CollidePolygonsP16btManifoldResultPK12btBox2dShapeRK11btTransformS3_S6_ # -- Begin function _Z17b2CollidePolygonsP16btManifoldResultPK12btBox2dShapeRK11btTransformS3_S6_ .p2align 5 .type _Z17b2CollidePolygonsP16btManifoldResultPK12btBox2dShapeRK11btTransformS3_S6_,@function _Z17b2CollidePolygonsP16btManifoldResultPK12btBox2dShapeRK11btTransformS3_S6_: # @_Z17b2CollidePolygonsP16btManifoldResultPK12btBox2dShapeRK11btTransformS3_S6_ @@ -493,10 +484,12 @@ _Z17b2CollidePolygonsP16btManifoldResultPK12btBox2dShapeRK11btTransformS3_S6_: # fcmp.clt.s $fcc0, $fs0, $fa0 bcnez $fcc0, .LBB5_21 # %bb.2: - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI5_0) - pcalau12i $a0, %pc_hi20(.LCPI5_1) - fld.s $fa2, $a0, %pc_lo12(.LCPI5_1) + lu12i.w $a0, 239665 + ori $a0, $a0, 623 + movgr2fr.w $fa1, $a0 + lu12i.w $a0, 260014 + ori $a0, $a0, 328 + movgr2fr.w $fa2, $a0 fmadd.s $fa1, $fs1, $fa2, $fa1 fcmp.cule.s $fcc0, $fa0, $fa1 movcf2gr $a0, $fcc0 @@ -575,8 +568,9 @@ _Z17b2CollidePolygonsP16btManifoldResultPK12btBox2dShapeRK11btTransformS3_S6_: # fmul.s $fs4, $fs5, $fs3 fmadd.s $fs4, $fs2, $fs6, $fs4 fmadd.s $fs4, $fs1, $fs7, $fs4 - pcalau12i $a5, %pc_hi20(.LCPI5_2) - fld.s $fs5, $a5, %pc_lo12(.LCPI5_2) + lu12i.w $a5, 382432 + ori $a5, $a5, 2923 + movgr2fr.w $fs5, $a5 fld.s $fs6, $a4, 164 fld.s $fs7, $a4, 160 fld.s $fa0, $a4, 168 @@ -972,12 +966,7 @@ _ZN30btBox2dBox2dCollisionAlgorithm21calculateTimeOfImpactEP17btCollisionObjectS .Lfunc_end6: .size _ZN30btBox2dBox2dCollisionAlgorithm21calculateTimeOfImpactEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult, .Lfunc_end6-_ZN30btBox2dBox2dCollisionAlgorithm21calculateTimeOfImpactEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZL17FindMaxSeparationPiPK12btBox2dShapeRK11btTransformS2_S5_ -.LCPI7_0: - .word 0xdd5e0b6b # float -9.99999984E+17 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL17FindMaxSeparationPiPK12btBox2dShapeRK11btTransformS2_S5_ .type _ZL17FindMaxSeparationPiPK12btBox2dShapeRK11btTransformS2_S5_,@function _ZL17FindMaxSeparationPiPK12btBox2dShapeRK11btTransformS2_S5_: # @_ZL17FindMaxSeparationPiPK12btBox2dShapeRK11btTransformS2_S5_ # %bb.0: @@ -1071,8 +1060,10 @@ _ZL17FindMaxSeparationPiPK12btBox2dShapeRK11btTransformS2_S5_: # @_ZL17FindMaxSe fmul.s $fa1, $fa4, $fa2 fmadd.s $fa1, $fa5, $fa3, $fa1 fmadd.s $fa1, $fa6, $fa0, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.s $fa2, $a0, %pc_lo12(.LCPI7_0) + lu12i.w $a0, -141856 + ori $a0, $a0, 2923 + lu32i.d $a0, 0 + movgr2fr.w $fa2, $a0 fld.s $fa5, $a1, 164 fld.s $fa6, $a1, 160 fld.s $fa7, $a1, 168 @@ -1372,12 +1363,8 @@ _ZN30btBox2dBox2dCollisionAlgorithm22getAllContactManifoldsER20btAlignedObjectAr .size _ZN30btBox2dBox2dCollisionAlgorithm22getAllContactManifoldsER20btAlignedObjectArrayIP20btPersistentManifoldE, .Lfunc_end8-_ZN30btBox2dBox2dCollisionAlgorithm22getAllContactManifoldsER20btAlignedObjectArrayIP20btPersistentManifoldE .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZL14EdgeSeparationPK12btBox2dShapeRK11btTransformiS1_S4_ -.LCPI9_0: - .word 0x5d5e0b6b # float 9.99999984E+17 .text - .p2align 5 + .p2align 5 # -- Begin function _ZL14EdgeSeparationPK12btBox2dShapeRK11btTransformiS1_S4_ .type _ZL14EdgeSeparationPK12btBox2dShapeRK11btTransformiS1_S4_,@function _ZL14EdgeSeparationPK12btBox2dShapeRK11btTransformiS1_S4_: # @_ZL14EdgeSeparationPK12btBox2dShapeRK11btTransformiS1_S4_ # %bb.0: @@ -1433,8 +1420,9 @@ _ZL14EdgeSeparationPK12btBox2dShapeRK11btTransformiS1_S4_: # @_ZL14EdgeSeparatio fmul.s $fs0, $ft14, $fs0 fmadd.s $fs0, $fs1, $ft13, $fs0 fmadd.s $fs0, $fs2, $ft15, $fs0 - pcalau12i $a2, %pc_hi20(.LCPI9_0) - fld.s $fs1, $a2, %pc_lo12(.LCPI9_0) + lu12i.w $a2, 382432 + ori $a2, $a2, 2923 + movgr2fr.w $fs1, $a2 fld.s $fs2, $a3, 100 fld.s $fs3, $a3, 96 fld.s $fs4, $a3, 104 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBoxBoxDetector.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBoxBoxDetector.s index 7b91fa0a..9d4d3fb3 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBoxBoxDetector.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBoxBoxDetector.s @@ -14,12 +14,7 @@ _ZN16btBoxBoxDetectorC2EP10btBoxShapeS1_: # @_ZN16btBoxBoxDetectorC2EP10btBoxSha .Lfunc_end0: .size _ZN16btBoxBoxDetectorC2EP10btBoxShapeS1_, .Lfunc_end0-_ZN16btBoxBoxDetectorC2EP10btBoxShapeS1_ # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z20dLineClosestApproachRK9btVector3S1_S1_S1_PfS2_ -.LCPI1_0: - .word 0x38d1b717 # float 9.99999974E-5 - .text - .globl _Z20dLineClosestApproachRK9btVector3S1_S1_S1_PfS2_ + .globl _Z20dLineClosestApproachRK9btVector3S1_S1_S1_PfS2_ # -- Begin function _Z20dLineClosestApproachRK9btVector3S1_S1_S1_PfS2_ .p2align 5 .type _Z20dLineClosestApproachRK9btVector3S1_S1_S1_PfS2_,@function _Z20dLineClosestApproachRK9btVector3S1_S1_S1_PfS2_: # @_Z20dLineClosestApproachRK9btVector3S1_S1_S1_PfS2_ @@ -33,11 +28,12 @@ _Z20dLineClosestApproachRK9btVector3S1_S1_S1_PfS2_: # @_Z20dLineClosestApproachR fmul.s $fa3, $fa1, $fa6 fmadd.s $fa7, $fa0, $fa2, $fa3 fmadd.s $fa3, $fa4, $fa5, $fa7 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.s $ft0, $a1, %pc_lo12(.LCPI1_0) fnmadd.s $fa7, $fa4, $fa5, $fa7 - vldi $vr9, -1168 - fmadd.s $fa7, $fa7, $fa3, $ft1 + vldi $vr8, -1168 + fmadd.s $fa7, $fa7, $fa3, $ft0 + lu12i.w $a1, 232731 + ori $a1, $a1, 1815 + movgr2fr.w $ft0, $a1 fcmp.cle.s $fcc0, $fa7, $ft0 bcnez $fcc0, .LBB1_2 # %bb.1: @@ -74,22 +70,7 @@ _Z20dLineClosestApproachRK9btVector3S1_S1_S1_PfS2_: # @_Z20dLineClosestApproachR .Lfunc_end1: .size _Z20dLineClosestApproachRK9btVector3S1_S1_S1_PfS2_, .Lfunc_end1-_Z20dLineClosestApproachRK9btVector3S1_S1_S1_PfS2_ # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z11cullPoints2iPfiiPi -.LCPI2_0: - .word 0x5d5e0b6b # float 9.99999984E+17 -.LCPI2_1: - .word 0x34000000 # float 1.1920929E-7 -.LCPI2_2: - .word 0x40c90fdb # float 6.28318548 -.LCPI2_3: - .word 0xc0c90fdb # float -6.28318548 -.LCPI2_4: - .word 0x40490fdb # float 3.14159274 -.LCPI2_5: - .word 0x4e6e6b28 # float 1.0E+9 - .text - .globl _Z11cullPoints2iPfiiPi + .globl _Z11cullPoints2iPfiiPi # -- Begin function _Z11cullPoints2iPfiiPi .p2align 5 .type _Z11cullPoints2iPfiiPi,@function _Z11cullPoints2iPfiiPi: # @_Z11cullPoints2iPfiiPi @@ -163,18 +144,18 @@ _Z11cullPoints2iPfiiPi: # @_Z11cullPoints2iPfiiPi .LBB2_8: # %._crit_edge slli.w $a0, $s2, 1 alsl.d $a0, $a0, $a1, 2 - fld.s $fa3, $a0, -4 - fld.s $fa5, $a1, 0 - fld.s $fa6, $a0, -8 - fld.s $fa4, $a1, 4 - fneg.s $fa7, $fa3 - fmul.s $fa7, $fa5, $fa7 - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.s $ft0, $a0, %pc_lo12(.LCPI2_1) - fmadd.s $fa7, $fa6, $fa4, $fa7 + fld.s $fa3, $a0, -8 + fld.s $fa4, $a0, -4 + fld.s $fa6, $a1, 0 + fld.s $fa5, $a1, 4 + fneg.s $fa7, $fa4 + fmul.s $fa7, $fa6, $fa7 + fmadd.s $fa7, $fa3, $fa5, $fa7 fadd.s $fa2, $fa2, $fa7 - fabs.s $ft1, $fa2 - fcmp.cule.s $fcc0, $ft1, $ft0 + fabs.s $ft0, $fa2 + lu12i.w $a0, 212992 + movgr2fr.w $ft1, $a0 + fcmp.cule.s $fcc0, $ft0, $ft1 bcnez $fcc0, .LBB2_11 # %bb.9: vldi $vr8, -1272 @@ -185,14 +166,15 @@ _Z11cullPoints2iPfiiPi: # @_Z11cullPoints2iPfiiPi move $a1, $zero b .LBB2_22 .LBB2_11: - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.s $fa2, $a0, %pc_lo12(.LCPI2_0) + lu12i.w $a0, 382432 + ori $a0, $a0, 2923 + movgr2fr.w $fa2, $a0 blez $s2, .LBB2_10 .LBB2_12: - fadd.s $fa5, $fa6, $fa5 - fmadd.s $fa1, $fa7, $fa5, $fa1 + fadd.s $fa3, $fa3, $fa6 + fmadd.s $fa1, $fa7, $fa3, $fa1 fmul.s $fs0, $fa1, $fa2 - fadd.s $fa1, $fa4, $fa3 + fadd.s $fa1, $fa5, $fa4 fmadd.s $fa0, $fa7, $fa1, $fa0 fmul.s $fs1, $fa0, $fa2 .LBB2_13: # %.lr.ph124.preheader @@ -259,23 +241,28 @@ _Z11cullPoints2iPfiiPi: # @_Z11cullPoints2iPfiiPi addi.d $a0, $s1, 4 beqz $a1, .LBB2_31 # %bb.24: # %.lr.ph133.us.preheader - pcalau12i $a1, %pc_hi20(.LCPI2_2) - fld.s $fa0, $a1, %pc_lo12(.LCPI2_2) bstrpick.d $a1, $s0, 31, 0 - movgr2fr.d $fa1, $a1 - ffint.s.l $fa1, $fa1 + movgr2fr.d $fa0, $a1 + ffint.s.l $fa1, $fa0 + lu12i.w $a1, 265360 + ori $a1, $a1, 4059 + movgr2fr.w $fa0, $a1 fdiv.s $fa1, $fa0, $fa1 slli.d $a1, $fp, 2 addi.d $a2, $sp, 48 fldx.s $fa2, $a1, $a2 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.s $fa3, $a1, %pc_lo12(.LCPI2_3) - pcalau12i $a1, %pc_hi20(.LCPI2_4) - fld.s $fa4, $a1, %pc_lo12(.LCPI2_4) - pcalau12i $a1, %pc_hi20(.LCPI2_5) - fld.s $fa5, $a1, %pc_lo12(.LCPI2_5) bstrpick.d $a1, $s2, 31, 0 ori $a2, $zero, 1 + lu12i.w $a3, -258928 + ori $a3, $a3, 4059 + lu32i.d $a3, 0 + movgr2fr.w $fa3, $a3 + lu12i.w $a3, 263312 + ori $a3, $a3, 4059 + movgr2fr.w $fa4, $a3 + lu12i.w $a3, 321254 + ori $a3, $a3, 2856 + movgr2fr.w $fa5, $a3 addi.d $a3, $sp, 16 b .LBB2_26 .p2align 4, , 16 @@ -381,14 +368,7 @@ _Z11cullPoints2iPfiiPi: # @_Z11cullPoints2iPfiiPi .Lfunc_end2: .size _Z11cullPoints2iPfiiPi, .Lfunc_end2-_Z11cullPoints2iPfiiPi # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteCollisionDetectorInterface6ResultE -.LCPI3_0: - .word 0xff7fffff # float -3.40282347E+38 -.LCPI3_1: - .word 0x3f866666 # float 1.04999995 - .text - .globl _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteCollisionDetectorInterface6ResultE + .globl _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteCollisionDetectorInterface6ResultE # -- Begin function _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteCollisionDetectorInterface6ResultE .p2align 5 .type _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteCollisionDetectorInterface6ResultE,@function _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteCollisionDetectorInterface6ResultE: # @_Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteCollisionDetectorInterface6ResultE @@ -450,10 +430,10 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl fmadd.s $fa0, $fa5, $ft10, $fa0 fmadd.s $ft0, $ft2, $ft9, $fa0 fld.s $fa6, $a1, 4 - fld.s $ft1, $a1, 20 - fld.s $fs3, $a1, 36 + fld.s $fs3, $a1, 20 + fld.s $ft1, $a1, 36 fld.s $fa0, $a1, 8 - fst.s $fa0, $sp, 96 # 4-byte Folded Spill + fst.s $fa0, $sp, 88 # 4-byte Folded Spill fld.s $fa0, $a2, 0 fld.s $fa1, $a2, 4 vldi $vr11, -1184 @@ -468,36 +448,37 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl fld.s $ft12, $a4, 16 fmul.s $fa0, $ft4, $ft3 fmul.s $ft7, $ft5, $ft3 - fld.s $ft4, $a4, 0 - fmul.s $ft3, $fa7, $ft12 + fld.s $ft5, $a4, 0 + fmul.s $ft4, $fa7, $ft12 fld.s $ft13, $a4, 32 - fld.s $ft5, $a4, 20 + fld.s $ft3, $a4, 20 fld.s $fs6, $a4, 4 - fmadd.s $ft3, $fa5, $ft4, $ft3 - fmadd.s $ft11, $ft2, $ft13, $ft3 - fmul.s $ft3, $fa7, $ft5 - fmadd.s $ft3, $fa5, $fs6, $ft3 + fmadd.s $ft4, $fa5, $ft5, $ft4 + fmadd.s $ft11, $ft2, $ft13, $ft4 + fmul.s $ft4, $fa7, $ft3 + fmadd.s $ft4, $fa5, $fs6, $ft4 fld.s $ft6, $a4, 36 fld.s $fs7, $a4, 24 fld.s $fs4, $a4, 8 fld.s $fs5, $a4, 40 - fmadd.s $ft3, $ft2, $ft6, $ft3 + fmadd.s $ft4, $ft2, $ft6, $ft4 fmul.s $fa7, $fa7, $fs7 fmadd.s $fa7, $fa5, $fs4, $fa7 fmadd.s $ft2, $ft2, $fs5, $fa7 fabs.s $ft14, $ft11 - fst.s $ft3, $sp, 56 # 4-byte Folded Spill - fabs.s $ft8, $ft3 - fabs.s $ft3, $ft2 + fst.s $ft4, $sp, 56 # 4-byte Folded Spill + fabs.s $ft8, $ft4 + fabs.s $ft4, $ft2 fabs.s $fa7, $ft0 fmadd.s $fs0, $fa1, $ft14, $fa4 fmadd.s $fs0, $fa0, $ft8, $fs0 - fmadd.s $fs0, $ft7, $ft3, $fs0 + fmadd.s $fs0, $ft7, $ft4, $fs0 fsub.s $fs0, $fa7, $fs0 fld.s $fa7, $a1, 24 fst.s $fa4, $sp, 428 + fst.s $fa3, $sp, 100 # 4-byte Folded Spill fst.s $fa3, $sp, 432 - fst.s $fa2, $sp, 92 # 4-byte Folded Spill + fst.s $fa2, $sp, 80 # 4-byte Folded Spill fst.s $fa2, $sp, 436 fst.s $fa1, $sp, 416 fst.s $fa0, $sp, 420 @@ -505,32 +486,33 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl movgr2fr.w $fs1, $zero fcmp.clt.s $fcc0, $fs1, $fs0 fst.s $ft7, $sp, 424 - bcnez $fcc0, .LBB3_87 + bcnez $fcc0, .LBB3_89 # %bb.1: - fst.s $fa3, $sp, 80 # 4-byte Folded Spill fst.s $ft0, $sp, 52 # 4-byte Folded Spill - fst.s $ft3, $sp, 44 # 4-byte Folded Spill - fmul.s $fa2, $ft15, $ft1 + fst.s $ft4, $sp, 40 # 4-byte Folded Spill + fmul.s $fa2, $ft15, $fs3 fmadd.s $fa2, $fa6, $ft10, $fa2 - fmadd.s $fa2, $fs3, $ft9, $fa2 + fmadd.s $fa2, $ft1, $ft9, $fa2 fst.s $fa2, $sp, 72 # 4-byte Folded Spill - fmul.s $fa2, $ft1, $ft12 - fmadd.s $fa2, $fa6, $ft4, $fa2 - fmadd.s $fa3, $fs3, $ft13, $fa2 - fmul.s $fa2, $ft1, $ft5 + fmul.s $fa2, $fs3, $ft12 + fmadd.s $fa2, $fa6, $ft5, $fa2 + fmadd.s $ft0, $ft1, $ft13, $fa2 + fmul.s $fa2, $fs3, $ft3 fmadd.s $fa2, $fa6, $fs6, $fa2 - fmadd.s $ft0, $fs3, $ft6, $fa2 - fmul.s $fa2, $ft1, $fs7 + fmadd.s $ft4, $ft1, $ft6, $fa2 + fmul.s $fa2, $fs3, $fs7 fmadd.s $fa2, $fa6, $fs4, $fa2 - pcalau12i $a2, %pc_hi20(.LCPI3_0) - fld.s $ft3, $a2, %pc_lo12(.LCPI3_0) - fmadd.s $fs3, $fs3, $fs5, $fa2 - fst.s $fa3, $sp, 36 # 4-byte Folded Spill - fabs.s $ft1, $fa3 - fst.s $ft0, $sp, 40 # 4-byte Folded Spill - fabs.s $fa2, $ft0 + fmadd.s $fs3, $ft1, $fs5, $fa2 + fst.s $ft0, $sp, 32 # 4-byte Folded Spill + fabs.s $ft1, $ft0 + fst.s $ft4, $sp, 36 # 4-byte Folded Spill + fabs.s $fa2, $ft4 fst.s $fa2, $sp, 64 # 4-byte Folded Spill - fcmp.cule.s $fcc0, $fs0, $ft3 + lu12i.w $a2, -2049 + ori $a2, $a2, 4095 + lu32i.d $a2, 0 + movgr2fr.w $ft4, $a2 + fcmp.cule.s $fcc0, $fs0, $ft4 fabs.s $fa2, $fs3 fst.s $fa2, $sp, 60 # 4-byte Folded Spill bcnez $fcc0, .LBB3_3 @@ -540,7 +522,7 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl fcmp.clt.s $fcc0, $fa3, $fa2 ori $s2, $zero, 1 movcf2gr $a2, $fcc0 - fmov.s $ft3, $fs0 + fmov.s $ft4, $fs0 move $a5, $a1 b .LBB3_4 .LBB3_3: @@ -549,10 +531,10 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl move $a5, $zero fld.s $fa3, $sp, 52 # 4-byte Folded Reload .LBB3_4: - fld.s $ft0, $sp, 80 # 4-byte Folded Reload fld.s $fa2, $sp, 72 # 4-byte Folded Reload fabs.s $fa2, $fa2 fst.s $ft1, $sp, 48 # 4-byte Folded Spill + fld.s $ft0, $sp, 100 # 4-byte Folded Reload fmadd.s $ft1, $fa1, $ft1, $ft0 fld.s $fs0, $sp, 64 # 4-byte Folded Reload fmadd.s $ft1, $fa0, $fs0, $ft1 @@ -560,30 +542,31 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl fmadd.s $ft1, $ft7, $fs0, $ft1 fsub.s $ft1, $fa2, $ft1 fcmp.clt.s $fcc0, $fs1, $ft1 - bcnez $fcc0, .LBB3_87 + bcnez $fcc0, .LBB3_89 # %bb.5: - fst.s $fs3, $sp, 28 # 4-byte Folded Spill - fst.s $ft2, $sp, 32 # 4-byte Folded Spill + fst.s $fs3, $sp, 24 # 4-byte Folded Spill + fst.s $ft2, $sp, 28 # 4-byte Folded Spill fmul.s $fa2, $fa7, $ft12 - fmul.s $fs0, $fa7, $ft5 + fmul.s $fs0, $fa7, $ft3 fmul.s $fs3, $fa7, $fs7 fmul.s $fa7, $ft15, $fa7 - fmov.s $ft2, $ft3 - fld.s $ft3, $sp, 96 # 4-byte Folded Reload - fmadd.s $fa2, $ft3, $ft4, $fa2 - fmadd.s $fs0, $ft3, $fs6, $fs0 - fmadd.s $fs3, $ft3, $fs4, $fs3 - fmadd.s $fa7, $ft3, $ft10, $fa7 - fmov.s $ft3, $ft2 + fmov.s $ft2, $ft4 + fld.s $ft4, $sp, 88 # 4-byte Folded Reload + fmadd.s $fa2, $ft4, $ft5, $fa2 + fmadd.s $fs0, $ft4, $fs6, $fs0 + fmadd.s $fs3, $ft4, $fs4, $fs3 + fmadd.s $fa7, $ft4, $ft10, $fa7 + fmov.s $ft4, $ft2 fmadd.s $ft0, $fs2, $ft13, $fa2 fmadd.s $ft2, $fs2, $ft6, $fs0 fmadd.s $fa2, $fs2, $fs5, $fs3 fmadd.s $fs0, $fs2, $ft9, $fa7 - fst.s $ft0, $sp, 20 # 4-byte Folded Spill + fst.s $ft0, $sp, 16 # 4-byte Folded Spill fabs.s $fs2, $ft0 - fst.s $ft2, $sp, 16 # 4-byte Folded Spill - fabs.s $fs3, $ft2 - fcmp.cule.s $fcc0, $ft1, $ft3 + fmov.s $fs3, $ft2 + fabs.s $fa7, $ft2 + fst.s $fa7, $sp, 44 # 4-byte Folded Spill + fcmp.cule.s $fcc0, $ft1, $ft4 fst.s $fa2, $sp, 12 # 4-byte Folded Spill fabs.s $ft2, $fa2 bcnez $fcc0, .LBB3_7 @@ -594,71 +577,74 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl fcmp.clt.s $fcc0, $fa7, $fa2 ori $s2, $zero, 2 movcf2gr $a2, $fcc0 - fmov.s $ft3, $ft1 + fmov.s $ft4, $ft1 .LBB3_7: - fst.s $fs0, $sp, 24 # 4-byte Folded Spill + fst.s $fs0, $sp, 20 # 4-byte Folded Spill fabs.s $fa2, $fs0 - fld.s $fs0, $sp, 92 # 4-byte Folded Reload + fld.s $fs0, $sp, 80 # 4-byte Folded Reload fmadd.s $fa7, $fa1, $fs2, $fs0 - fmadd.s $fa7, $fa0, $fs3, $fa7 + fld.s $ft1, $sp, 44 # 4-byte Folded Reload + fmadd.s $fa7, $fa0, $ft1, $fa7 fmadd.s $fa7, $ft7, $ft2, $fa7 fsub.s $fa7, $fa2, $fa7 fcmp.clt.s $fcc0, $fs1, $fa7 - bcnez $fcc0, .LBB3_87 + bcnez $fcc0, .LBB3_89 # %bb.8: - fcmp.cule.s $fcc0, $fa7, $ft3 + fst.s $ft2, $sp, 8 # 4-byte Folded Spill + fcmp.cule.s $fcc0, $fa7, $ft4 bcnez $fcc0, .LBB3_10 # %bb.9: addi.d $a5, $a1, 8 movgr2fr.w $fa2, $zero - fld.s $ft1, $sp, 24 # 4-byte Folded Reload + fld.s $ft1, $sp, 20 # 4-byte Folded Reload fcmp.clt.s $fcc0, $ft1, $fa2 ori $s2, $zero, 3 movcf2gr $a2, $fcc0 - fmov.s $ft3, $fa7 + fmov.s $ft4, $fa7 .LBB3_10: fmul.s $fa2, $ft15, $ft12 - fmadd.s $fa2, $ft4, $ft10, $fa2 + fmadd.s $fa2, $ft5, $ft10, $fa2 fmadd.s $fa7, $ft13, $ft9, $fa2 fabs.s $fa2, $fa7 fld.s $ft1, $sp, 48 # 4-byte Folded Reload - fld.s $ft0, $sp, 80 # 4-byte Folded Reload + fld.s $ft0, $sp, 100 # 4-byte Folded Reload fmul.s $ft1, $ft0, $ft1 fmadd.s $ft1, $fa4, $ft14, $ft1 fmadd.s $ft1, $fs0, $fs2, $ft1 fadd.s $ft1, $fa1, $ft1 fsub.s $ft1, $fa2, $ft1 fcmp.clt.s $fcc0, $fs1, $ft1 - fld.s $ft13, $sp, 32 # 4-byte Folded Reload - fld.s $ft12, $sp, 28 # 4-byte Folded Reload - bcnez $fcc0, .LBB3_87 + fld.s $ft2, $sp, 28 # 4-byte Folded Reload + fld.s $ft12, $sp, 24 # 4-byte Folded Reload + bcnez $fcc0, .LBB3_89 # %bb.11: fmov.s $ft0, $fa3 - fcmp.cule.s $fcc0, $ft1, $ft3 + fcmp.cule.s $fcc0, $ft1, $ft4 bcnez $fcc0, .LBB3_13 # %bb.12: movgr2fr.w $fa2, $zero fcmp.clt.s $fcc0, $fa7, $fa2 ori $s2, $zero, 4 movcf2gr $a2, $fcc0 - fmov.s $ft3, $ft1 + fmov.s $ft4, $ft1 move $a5, $a4 .LBB3_13: - fmul.s $fa2, $ft15, $ft5 + fmul.s $fa2, $ft15, $ft3 fmadd.s $fa2, $fs6, $ft10, $fa2 fmadd.s $fa7, $ft6, $ft9, $fa2 fabs.s $fa2, $fa7 fld.s $ft1, $sp, 64 # 4-byte Folded Reload - fld.s $fa3, $sp, 80 # 4-byte Folded Reload + fld.s $fa3, $sp, 100 # 4-byte Folded Reload fmul.s $ft1, $fa3, $ft1 fmadd.s $ft1, $fa4, $ft8, $ft1 - fmadd.s $ft1, $fs0, $fs3, $ft1 + fld.s $ft3, $sp, 44 # 4-byte Folded Reload + fmadd.s $ft1, $fs0, $ft3, $ft1 fadd.s $ft1, $fa0, $ft1 fsub.s $ft1, $fa2, $ft1 fcmp.clt.s $fcc0, $fs1, $ft1 - bcnez $fcc0, .LBB3_87 + bcnez $fcc0, .LBB3_89 # %bb.14: - fcmp.cule.s $fcc0, $ft1, $ft3 + fcmp.cule.s $fcc0, $ft1, $ft4 bcnez $fcc0, .LBB3_16 # %bb.15: addi.d $a5, $a4, 4 @@ -666,24 +652,25 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl fcmp.clt.s $fcc0, $fa7, $fa2 ori $s2, $zero, 5 movcf2gr $a2, $fcc0 - fmov.s $ft3, $ft1 + fmov.s $ft4, $ft1 .LBB3_16: fmul.s $fa2, $ft15, $fs7 fmadd.s $fa2, $fs4, $ft10, $fa2 fmadd.s $fa7, $fs5, $ft9, $fa2 fabs.s $fa2, $fa7 fld.s $ft1, $sp, 60 # 4-byte Folded Reload - fld.s $fa3, $sp, 80 # 4-byte Folded Reload + fld.s $fa3, $sp, 100 # 4-byte Folded Reload fmul.s $ft1, $fa3, $ft1 - fld.s $ft4, $sp, 44 # 4-byte Folded Reload - fmadd.s $ft1, $fa4, $ft4, $ft1 - fmadd.s $ft1, $fs0, $ft2, $ft1 + fld.s $ft3, $sp, 40 # 4-byte Folded Reload + fmadd.s $ft1, $fa4, $ft3, $ft1 + fld.s $ft3, $sp, 8 # 4-byte Folded Reload + fmadd.s $ft1, $fs0, $ft3, $ft1 fadd.s $ft1, $ft7, $ft1 fsub.s $ft1, $fa2, $ft1 fcmp.clt.s $fcc0, $fs1, $ft1 - bcnez $fcc0, .LBB3_87 + bcnez $fcc0, .LBB3_89 # %bb.17: - fcmp.cule.s $fcc0, $ft1, $ft3 + fcmp.cule.s $fcc0, $ft1, $ft4 bcnez $fcc0, .LBB3_19 # %bb.18: addi.d $a5, $a4, 8 @@ -691,147 +678,150 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl fcmp.clt.s $fcc0, $fa7, $fa2 ori $s2, $zero, 6 movcf2gr $a2, $fcc0 - fmov.s $ft3, $ft1 + fmov.s $ft4, $ft1 .LBB3_19: - fld.s $ft9, $sp, 20 # 4-byte Folded Reload - fneg.s $fa7, $ft9 + fld.s $ft6, $sp, 16 # 4-byte Folded Reload + fneg.s $fa7, $ft6 fld.s $fa2, $sp, 72 # 4-byte Folded Reload fmul.s $fa2, $fa2, $fa7 - fld.s $fs5, $sp, 36 # 4-byte Folded Reload - fld.s $ft1, $sp, 24 # 4-byte Folded Reload - fmadd.s $ft4, $ft1, $fs5, $fa2 - fabs.s $fa2, $ft4 + fld.s $fs4, $sp, 32 # 4-byte Folded Reload + fld.s $ft1, $sp, 20 # 4-byte Folded Reload + fmadd.s $ft3, $ft1, $fs4, $fa2 + fabs.s $fa2, $ft3 fld.s $ft1, $sp, 48 # 4-byte Folded Reload fmul.s $ft1, $fs0, $ft1 - fld.s $fa3, $sp, 80 # 4-byte Folded Reload + fld.s $fa3, $sp, 100 # 4-byte Folded Reload fmadd.s $ft1, $fa3, $fs2, $ft1 - fld.s $ft5, $sp, 44 # 4-byte Folded Reload + fld.s $ft5, $sp, 40 # 4-byte Folded Reload fmadd.s $ft1, $fa0, $ft5, $ft1 fmadd.s $ft1, $ft7, $ft8, $ft1 fsub.s $ft1, $fa2, $ft1 fcmp.clt.s $fcc0, $fs1, $ft1 - fld.s $fs4, $sp, 40 # 4-byte Folded Reload - fld.s $ft6, $sp, 16 # 4-byte Folded Reload - bcnez $fcc0, .LBB3_87 + fld.s $ft13, $sp, 36 # 4-byte Folded Reload + bcnez $fcc0, .LBB3_89 # %bb.20: - fmadd.s $ft9, $ft9, $ft9, $fs1 - fmadd.s $ft5, $fs5, $fs5, $ft9 + fmov.s $fs6, $fs3 + fmadd.s $ft9, $ft6, $ft6, $fs1 + fmadd.s $ft5, $fs4, $fs4, $ft9 fcmp.cule.s $fcc0, $ft5, $fs1 bcnez $fcc0, .LBB3_23 # %bb.21: - pcalau12i $t0, %pc_hi20(.LCPI3_1) - fld.s $fa2, $t0, %pc_lo12(.LCPI3_1) fsqrt.s $ft5, $ft5 fdiv.s $ft1, $ft1, $ft5 + lu12i.w $t0, 260198 + ori $t0, $t0, 1638 + movgr2fr.w $fa2, $t0 fmul.s $fa2, $ft1, $fa2 - fcmp.cule.s $fcc0, $fa2, $ft3 + fcmp.cule.s $fcc0, $fa2, $ft4 + fmov.s $fs5, $fs1 + fmov.s $fs3, $fs1 fmov.s $fs7, $fs1 - fmov.s $fs6, $fs1 - fmov.s $ft15, $fs1 bcnez $fcc0, .LBB3_24 # %bb.22: move $a5, $zero movgr2fr.w $fa2, $zero - fdiv.s $fs7, $fa2, $ft5 - fdiv.s $fs6, $fa7, $ft5 - fdiv.s $ft15, $fs5, $ft5 - fcmp.clt.s $fcc0, $ft4, $fa2 + fdiv.s $fs5, $fa2, $ft5 + fdiv.s $fs3, $fa7, $ft5 + fdiv.s $fs7, $fs4, $ft5 + fcmp.clt.s $fcc0, $ft3, $fa2 ori $s2, $zero, 7 movcf2gr $a2, $fcc0 - fmov.s $ft3, $ft1 + fmov.s $ft4, $ft1 b .LBB3_24 .LBB3_23: + fmov.s $fs5, $fs1 + fmov.s $fs3, $fs1 fmov.s $fs7, $fs1 - fmov.s $fs6, $fs1 - fmov.s $ft15, $fs1 .LBB3_24: - fneg.s $fa7, $ft6 + fneg.s $fa7, $fs6 fld.s $fa2, $sp, 72 # 4-byte Folded Reload fmul.s $fa2, $fa2, $fa7 - fld.s $ft1, $sp, 24 # 4-byte Folded Reload - fmadd.s $ft4, $ft1, $fs4, $fa2 - fabs.s $fa2, $ft4 + fld.s $ft1, $sp, 20 # 4-byte Folded Reload + fmadd.s $ft3, $ft1, $ft13, $fa2 + fabs.s $fa2, $ft3 fld.s $ft1, $sp, 64 # 4-byte Folded Reload fmul.s $ft1, $fs0, $ft1 - fld.s $fa3, $sp, 80 # 4-byte Folded Reload - fmadd.s $ft1, $fa3, $fs3, $ft1 fld.s $ft5, $sp, 44 # 4-byte Folded Reload + fld.s $fa3, $sp, 100 # 4-byte Folded Reload + fmadd.s $ft1, $fa3, $ft5, $ft1 + fld.s $ft5, $sp, 40 # 4-byte Folded Reload fmadd.s $ft1, $fa1, $ft5, $ft1 fmadd.s $ft1, $ft7, $ft14, $ft1 fsub.s $ft1, $fa2, $ft1 fcmp.clt.s $fcc0, $fs1, $ft1 - bcnez $fcc0, .LBB3_87 + bcnez $fcc0, .LBB3_89 # %bb.25: movgr2fr.w $fs1, $zero - fmadd.s $ft10, $ft6, $ft6, $fs1 - fmadd.s $ft5, $fs4, $fs4, $ft10 + fmadd.s $ft10, $fs6, $fs6, $fs1 + fmadd.s $ft5, $ft13, $ft13, $ft10 fcmp.cule.s $fcc0, $ft5, $fs1 bcnez $fcc0, .LBB3_28 # %bb.26: - pcalau12i $t0, %pc_hi20(.LCPI3_1) - fld.s $fa2, $t0, %pc_lo12(.LCPI3_1) fsqrt.s $ft5, $ft5 fdiv.s $ft1, $ft1, $ft5 + lu12i.w $t0, 260198 + ori $t0, $t0, 1638 + movgr2fr.w $fa2, $t0 fmul.s $fa2, $ft1, $fa2 - fcmp.cule.s $fcc0, $fa2, $ft3 + fcmp.cule.s $fcc0, $fa2, $ft4 bcnez $fcc0, .LBB3_28 # %bb.27: move $a5, $zero - fdiv.s $fs7, $fs1, $ft5 - fdiv.s $fs6, $fa7, $ft5 - fdiv.s $ft15, $fs4, $ft5 - fcmp.clt.s $fcc0, $ft4, $fs1 + fdiv.s $fs5, $fs1, $ft5 + fdiv.s $fs3, $fa7, $ft5 + fdiv.s $fs7, $ft13, $ft5 + fcmp.clt.s $fcc0, $ft3, $fs1 ori $s2, $zero, 8 movcf2gr $a2, $fcc0 - fmov.s $ft3, $ft1 + fmov.s $ft4, $ft1 .LBB3_28: - fld.s $ft5, $sp, 12 # 4-byte Folded Reload - fneg.s $fa7, $ft5 + fld.s $ft6, $sp, 12 # 4-byte Folded Reload + fneg.s $fa7, $ft6 fld.s $fa2, $sp, 72 # 4-byte Folded Reload fmul.s $fa2, $fa2, $fa7 - fld.s $ft1, $sp, 24 # 4-byte Folded Reload - fmadd.s $ft4, $ft1, $ft12, $fa2 - fabs.s $fa2, $ft4 + fld.s $ft1, $sp, 20 # 4-byte Folded Reload + fmadd.s $ft3, $ft1, $ft12, $fa2 + fabs.s $fa2, $ft3 fld.s $ft1, $sp, 60 # 4-byte Folded Reload fmul.s $ft1, $fs0, $ft1 - fld.s $fa3, $sp, 80 # 4-byte Folded Reload - fmadd.s $ft1, $fa3, $ft2, $ft1 + fld.s $ft5, $sp, 8 # 4-byte Folded Reload + fld.s $fa3, $sp, 100 # 4-byte Folded Reload + fmadd.s $ft1, $fa3, $ft5, $ft1 fmadd.s $ft1, $fa1, $ft8, $ft1 fmadd.s $ft1, $fa0, $ft14, $ft1 fsub.s $ft1, $fa2, $ft1 fcmp.clt.s $fcc0, $fs1, $ft1 - bcnez $fcc0, .LBB3_87 + bcnez $fcc0, .LBB3_89 # %bb.29: - fst.s $ft15, $sp, 8 # 4-byte Folded Spill - fmadd.s $ft15, $ft5, $ft5, $fs1 + fmadd.s $ft15, $ft6, $ft6, $fs1 fmadd.s $ft5, $ft12, $ft12, $ft15 fcmp.cule.s $fcc0, $ft5, $fs1 bcnez $fcc0, .LBB3_32 # %bb.30: - pcalau12i $t0, %pc_hi20(.LCPI3_1) - fld.s $fa2, $t0, %pc_lo12(.LCPI3_1) fsqrt.s $ft5, $ft5 fdiv.s $ft1, $ft1, $ft5 + lu12i.w $t0, 260198 + ori $t0, $t0, 1638 + movgr2fr.w $fa2, $t0 fmul.s $fa2, $ft1, $fa2 - fcmp.cule.s $fcc0, $fa2, $ft3 + fcmp.cule.s $fcc0, $fa2, $ft4 bcnez $fcc0, .LBB3_32 # %bb.31: move $a5, $zero - fdiv.s $fs7, $fs1, $ft5 - fdiv.s $fs6, $fa7, $ft5 - fdiv.s $fa2, $ft12, $ft5 - fst.s $fa2, $sp, 8 # 4-byte Folded Spill - fcmp.clt.s $fcc0, $ft4, $fs1 + fdiv.s $fs5, $fs1, $ft5 + fdiv.s $fs3, $fa7, $ft5 + fdiv.s $fs7, $ft12, $ft5 + fcmp.clt.s $fcc0, $ft3, $fs1 ori $s2, $zero, 9 movcf2gr $a2, $fcc0 - fmov.s $ft3, $ft1 + fmov.s $ft4, $ft1 .LBB3_32: fneg.s $fa7, $ft11 - fld.s $fa2, $sp, 24 # 4-byte Folded Reload + fld.s $fa2, $sp, 20 # 4-byte Folded Reload fmul.s $fa2, $fa2, $fa7 - fld.s $ft1, $sp, 20 # 4-byte Folded Reload - fmadd.s $ft4, $ft0, $ft1, $fa2 - fabs.s $fa2, $ft4 + fld.s $ft1, $sp, 16 # 4-byte Folded Reload + fmadd.s $ft3, $ft0, $ft1, $fa2 + fabs.s $fa2, $ft3 fmul.s $ft1, $fs0, $ft14 fmadd.s $ft1, $fa4, $fs2, $ft1 fld.s $ft5, $sp, 60 # 4-byte Folded Reload @@ -840,289 +830,296 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl fmadd.s $ft1, $ft7, $ft5, $ft1 fsub.s $ft1, $fa2, $ft1 fcmp.clt.s $fcc0, $fs1, $ft1 - bcnez $fcc0, .LBB3_87 + bcnez $fcc0, .LBB3_89 # %bb.33: fmadd.s $ft5, $ft11, $ft11, $ft9 fcmp.cule.s $fcc0, $ft5, $fs1 bcnez $fcc0, .LBB3_36 # %bb.34: - pcalau12i $t0, %pc_hi20(.LCPI3_1) - fld.s $fa2, $t0, %pc_lo12(.LCPI3_1) fsqrt.s $ft5, $ft5 fdiv.s $ft1, $ft1, $ft5 + lu12i.w $t0, 260198 + ori $t0, $t0, 1638 + movgr2fr.w $fa2, $t0 fmul.s $fa2, $ft1, $fa2 - fcmp.cule.s $fcc0, $fa2, $ft3 + fcmp.cule.s $fcc0, $fa2, $ft4 fld.s $ft6, $sp, 56 # 4-byte Folded Reload bcnez $fcc0, .LBB3_37 # %bb.35: move $a5, $zero - fld.s $fa2, $sp, 20 # 4-byte Folded Reload - fdiv.s $fs7, $fa2, $ft5 - fdiv.s $fs6, $fs1, $ft5 - fdiv.s $fa2, $fa7, $ft5 - fst.s $fa2, $sp, 8 # 4-byte Folded Spill - fcmp.clt.s $fcc0, $ft4, $fs1 + fld.s $fa2, $sp, 16 # 4-byte Folded Reload + fdiv.s $fs5, $fa2, $ft5 + fdiv.s $fs3, $fs1, $ft5 + fdiv.s $fs7, $fa7, $ft5 + fcmp.clt.s $fcc0, $ft3, $fs1 ori $s2, $zero, 10 movcf2gr $a2, $fcc0 - fmov.s $ft3, $ft1 + fmov.s $ft4, $ft1 b .LBB3_37 .LBB3_36: fld.s $ft6, $sp, 56 # 4-byte Folded Reload .LBB3_37: fneg.s $fa7, $ft6 - fld.s $fa2, $sp, 24 # 4-byte Folded Reload + fld.s $fa2, $sp, 20 # 4-byte Folded Reload fmul.s $fa2, $fa2, $fa7 - fld.s $ft1, $sp, 16 # 4-byte Folded Reload - fmadd.s $ft4, $ft0, $ft1, $fa2 - fabs.s $fa2, $ft4 + fmadd.s $ft3, $ft0, $fs6, $fa2 + fabs.s $fa2, $ft3 fmul.s $ft1, $fs0, $ft8 - fmadd.s $ft1, $fa4, $fs3, $ft1 + fld.s $ft5, $sp, 44 # 4-byte Folded Reload + fmadd.s $ft1, $fa4, $ft5, $ft1 fld.s $ft5, $sp, 60 # 4-byte Folded Reload fmadd.s $ft1, $fa1, $ft5, $ft1 fld.s $ft5, $sp, 48 # 4-byte Folded Reload fmadd.s $ft1, $ft7, $ft5, $ft1 fsub.s $ft1, $fa2, $ft1 fcmp.clt.s $fcc0, $fs1, $ft1 - bcnez $fcc0, .LBB3_87 + bcnez $fcc0, .LBB3_89 # %bb.38: fmadd.s $ft5, $ft6, $ft6, $ft10 fcmp.cule.s $fcc0, $ft5, $fs1 bcnez $fcc0, .LBB3_41 # %bb.39: - pcalau12i $t0, %pc_hi20(.LCPI3_1) - fld.s $fa2, $t0, %pc_lo12(.LCPI3_1) fsqrt.s $ft5, $ft5 fdiv.s $ft1, $ft1, $ft5 + lu12i.w $t0, 260198 + ori $t0, $t0, 1638 + movgr2fr.w $fa2, $t0 fmul.s $fa2, $ft1, $fa2 - fcmp.cule.s $fcc0, $fa2, $ft3 - bcnez $fcc0, .LBB3_41 + fcmp.cule.s $fcc0, $fa2, $ft4 + fld.s $ft9, $sp, 12 # 4-byte Folded Reload + bcnez $fcc0, .LBB3_42 # %bb.40: move $a5, $zero - fld.s $fa2, $sp, 16 # 4-byte Folded Reload - fdiv.s $fs7, $fa2, $ft5 - fdiv.s $fs6, $fs1, $ft5 - fdiv.s $fa2, $fa7, $ft5 - fst.s $fa2, $sp, 8 # 4-byte Folded Spill - fcmp.clt.s $fcc0, $ft4, $fs1 + fdiv.s $fs5, $fs6, $ft5 + fdiv.s $fs3, $fs1, $ft5 + fdiv.s $fs7, $fa7, $ft5 + fcmp.clt.s $fcc0, $ft3, $fs1 ori $s2, $zero, 11 movcf2gr $a2, $fcc0 - fmov.s $ft3, $ft1 + fmov.s $ft4, $ft1 + b .LBB3_42 .LBB3_41: - fneg.s $fa7, $ft13 - fld.s $fa2, $sp, 24 # 4-byte Folded Reload + fld.s $ft9, $sp, 12 # 4-byte Folded Reload +.LBB3_42: + fneg.s $fa7, $ft2 + fld.s $fa2, $sp, 20 # 4-byte Folded Reload fmul.s $fa2, $fa2, $fa7 - fld.s $ft1, $sp, 12 # 4-byte Folded Reload - fmadd.s $ft4, $ft0, $ft1, $fa2 - fabs.s $fa2, $ft4 - fld.s $ft1, $sp, 44 # 4-byte Folded Reload + fmadd.s $ft3, $ft0, $ft9, $fa2 + fabs.s $fa2, $ft3 + fld.s $ft1, $sp, 40 # 4-byte Folded Reload fmul.s $ft1, $fs0, $ft1 - fmadd.s $ft1, $fa4, $ft2, $ft1 + fld.s $ft5, $sp, 8 # 4-byte Folded Reload + fmadd.s $ft1, $fa4, $ft5, $ft1 fld.s $ft5, $sp, 64 # 4-byte Folded Reload fmadd.s $ft1, $fa1, $ft5, $ft1 fld.s $ft5, $sp, 48 # 4-byte Folded Reload fmadd.s $ft1, $fa0, $ft5, $ft1 fsub.s $ft1, $fa2, $ft1 fcmp.clt.s $fcc0, $fs1, $ft1 - bcnez $fcc0, .LBB3_87 -# %bb.42: - fmadd.s $ft5, $ft13, $ft13, $ft15 - fcmp.cule.s $fcc0, $ft5, $fs1 - bcnez $fcc0, .LBB3_45 + bcnez $fcc0, .LBB3_89 # %bb.43: - pcalau12i $t0, %pc_hi20(.LCPI3_1) - fld.s $fa2, $t0, %pc_lo12(.LCPI3_1) + fmadd.s $ft5, $ft2, $ft2, $ft15 + fcmp.cule.s $fcc0, $ft5, $fs1 + bcnez $fcc0, .LBB3_46 +# %bb.44: fsqrt.s $ft5, $ft5 fdiv.s $ft1, $ft1, $ft5 + lu12i.w $t0, 260198 + ori $t0, $t0, 1638 + movgr2fr.w $fa2, $t0 fmul.s $fa2, $ft1, $fa2 - fcmp.cule.s $fcc0, $fa2, $ft3 - bcnez $fcc0, .LBB3_45 -# %bb.44: + fcmp.cule.s $fcc0, $fa2, $ft4 + fld.s $ft2, $sp, 8 # 4-byte Folded Reload + bcnez $fcc0, .LBB3_47 +# %bb.45: move $a5, $zero - fld.s $fa2, $sp, 12 # 4-byte Folded Reload - fdiv.s $fs7, $fa2, $ft5 - fdiv.s $fs6, $fs1, $ft5 - fdiv.s $fa2, $fa7, $ft5 - fst.s $fa2, $sp, 8 # 4-byte Folded Spill - fcmp.clt.s $fcc0, $ft4, $fs1 + fdiv.s $fs5, $ft9, $ft5 + fdiv.s $fs3, $fs1, $ft5 + fdiv.s $fs7, $fa7, $ft5 + fcmp.clt.s $fcc0, $ft3, $fs1 ori $s2, $zero, 12 movcf2gr $a2, $fcc0 - fmov.s $ft3, $ft1 -.LBB3_45: - fneg.s $ft4, $fs5 - fmul.s $fa2, $ft0, $ft4 + fmov.s $ft4, $ft1 + b .LBB3_47 +.LBB3_46: + fld.s $ft2, $sp, 8 # 4-byte Folded Reload +.LBB3_47: + fneg.s $ft3, $fs4 + fmul.s $fa2, $ft0, $ft3 fld.s $fa7, $sp, 72 # 4-byte Folded Reload fmadd.s $fa7, $fa7, $ft11, $fa2 fabs.s $fa2, $fa7 - fld.s $fa3, $sp, 80 # 4-byte Folded Reload + fld.s $fa3, $sp, 100 # 4-byte Folded Reload fmul.s $ft1, $fa3, $ft14 fld.s $ft5, $sp, 48 # 4-byte Folded Reload fmadd.s $ft1, $fa4, $ft5, $ft1 fmadd.s $ft1, $fa0, $ft2, $ft1 - fmadd.s $ft1, $ft7, $fs3, $ft1 + fld.s $ft2, $sp, 44 # 4-byte Folded Reload + fmadd.s $ft1, $ft7, $ft2, $ft1 fsub.s $ft1, $fa2, $ft1 fcmp.clt.s $fcc0, $fs1, $ft1 - bcnez $fcc0, .LBB3_87 -# %bb.46: + bcnez $fcc0, .LBB3_89 +# %bb.48: fmul.s $fa2, $ft11, $ft11 - fmadd.s $ft5, $fs5, $fs5, $fa2 + fmadd.s $ft5, $fs4, $fs4, $fa2 fcmp.cule.s $fcc0, $ft5, $fs1 - bcnez $fcc0, .LBB3_49 -# %bb.47: - pcalau12i $t0, %pc_hi20(.LCPI3_1) - fld.s $fa2, $t0, %pc_lo12(.LCPI3_1) + bcnez $fcc0, .LBB3_51 +# %bb.49: fsqrt.s $ft5, $ft5 fdiv.s $ft1, $ft1, $ft5 + lu12i.w $t0, 260198 + ori $t0, $t0, 1638 + movgr2fr.w $fa2, $t0 fmul.s $fa2, $ft1, $fa2 - fcmp.cule.s $fcc0, $fa2, $ft3 - bcnez $fcc0, .LBB3_49 -# %bb.48: + fcmp.cule.s $fcc0, $fa2, $ft4 + bcnez $fcc0, .LBB3_51 +# %bb.50: move $a5, $zero - fdiv.s $fs7, $ft4, $ft5 - fdiv.s $fs6, $ft11, $ft5 - fdiv.s $fa2, $fs1, $ft5 - fst.s $fa2, $sp, 8 # 4-byte Folded Spill + fdiv.s $fs5, $ft3, $ft5 + fdiv.s $fs3, $ft11, $ft5 + fdiv.s $fs7, $fs1, $ft5 fcmp.clt.s $fcc0, $fa7, $fs1 ori $s2, $zero, 13 movcf2gr $a2, $fcc0 - fmov.s $ft3, $ft1 -.LBB3_49: - fneg.s $ft4, $fs4 - fmul.s $fa2, $ft0, $ft4 + fmov.s $ft4, $ft1 +.LBB3_51: + fneg.s $ft3, $ft13 + fmul.s $fa2, $ft0, $ft3 fld.s $fa7, $sp, 72 # 4-byte Folded Reload fmadd.s $fa7, $fa7, $ft6, $fa2 fabs.s $fa2, $fa7 - fld.s $fa3, $sp, 80 # 4-byte Folded Reload + fld.s $fa3, $sp, 100 # 4-byte Folded Reload fmul.s $ft1, $fa3, $ft8 - fld.s $ft0, $sp, 64 # 4-byte Folded Reload - fmadd.s $ft1, $fa4, $ft0, $ft1 + fld.s $ft2, $sp, 64 # 4-byte Folded Reload + fmadd.s $ft1, $fa4, $ft2, $ft1 + fld.s $ft2, $sp, 8 # 4-byte Folded Reload fmadd.s $ft1, $fa1, $ft2, $ft1 fmadd.s $ft1, $ft7, $fs2, $ft1 fsub.s $ft1, $fa2, $ft1 fcmp.clt.s $fcc0, $fs1, $ft1 - bcnez $fcc0, .LBB3_87 -# %bb.50: - fmov.s $ft9, $fs6 - fmov.s $fs6, $ft3 + bcnez $fcc0, .LBB3_89 +# %bb.52: + fmov.s $fs6, $ft4 move $s5, $a0 move $s4, $a6 fmul.s $fa2, $ft6, $ft6 - fmadd.s $ft5, $fs4, $fs4, $fa2 + fmadd.s $ft5, $ft13, $ft13, $fa2 fcmp.cule.s $fcc0, $ft5, $fs1 - bcnez $fcc0, .LBB3_53 -# %bb.51: - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.s $fa2, $a0, %pc_lo12(.LCPI3_1) + bcnez $fcc0, .LBB3_55 +# %bb.53: fsqrt.s $ft5, $ft5 fdiv.s $ft1, $ft1, $ft5 + lu12i.w $a0, 260198 + ori $a0, $a0, 1638 + movgr2fr.w $fa2, $a0 fmul.s $fa2, $ft1, $fa2 fcmp.cule.s $fcc0, $fa2, $fs6 - bcnez $fcc0, .LBB3_53 -# %bb.52: + bcnez $fcc0, .LBB3_55 +# %bb.54: move $a5, $zero - fdiv.s $fs7, $ft4, $ft5 + fdiv.s $fs5, $ft3, $ft5 fld.s $fa2, $sp, 56 # 4-byte Folded Reload - fdiv.s $ft9, $fa2, $ft5 - fdiv.s $fa2, $fs1, $ft5 - fst.s $fa2, $sp, 8 # 4-byte Folded Spill + fdiv.s $fs3, $fa2, $ft5 + fdiv.s $fs7, $fs1, $ft5 fcmp.clt.s $fcc0, $fa7, $fs1 ori $s2, $zero, 14 movcf2gr $a2, $fcc0 fmov.s $fs6, $ft1 -.LBB3_53: - fld.s $fa2, $sp, 28 # 4-byte Folded Reload +.LBB3_55: + fld.s $fa2, $sp, 24 # 4-byte Folded Reload fneg.s $ft1, $fa2 fld.s $fa2, $sp, 52 # 4-byte Folded Reload fmul.s $fa2, $fa2, $ft1 - fld.s $fa7, $sp, 32 # 4-byte Folded Reload + fld.s $fa7, $sp, 28 # 4-byte Folded Reload fld.s $ft0, $sp, 72 # 4-byte Folded Reload fmadd.s $fa7, $ft0, $fa7, $fa2 fabs.s $fa2, $fa7 - fld.s $ft0, $sp, 44 # 4-byte Folded Reload - fld.s $fa3, $sp, 80 # 4-byte Folded Reload + fld.s $ft0, $sp, 40 # 4-byte Folded Reload + fld.s $fa3, $sp, 100 # 4-byte Folded Reload fmul.s $ft0, $fa3, $ft0 fld.s $ft2, $sp, 60 # 4-byte Folded Reload fmadd.s $ft0, $fa4, $ft2, $ft0 - fmadd.s $ft0, $fa1, $fs3, $ft0 + fld.s $ft2, $sp, 44 # 4-byte Folded Reload + fmadd.s $ft0, $fa1, $ft2, $ft0 fmadd.s $ft0, $fa0, $fs2, $ft0 fsub.s $ft0, $fa2, $ft0 fcmp.clt.s $fcc0, $fs1, $ft0 - bcnez $fcc0, .LBB3_87 -# %bb.54: - fld.s $fa2, $sp, 32 # 4-byte Folded Reload + bcnez $fcc0, .LBB3_89 +# %bb.56: + fld.s $fa2, $sp, 28 # 4-byte Folded Reload fmul.s $fa2, $fa2, $fa2 - fld.s $ft2, $sp, 28 # 4-byte Folded Reload + fld.s $ft2, $sp, 24 # 4-byte Folded Reload fmadd.s $ft2, $ft2, $ft2, $fa2 fcmp.cule.s $fcc0, $ft2, $fs1 - bcnez $fcc0, .LBB3_57 -# %bb.55: - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.s $fa2, $a0, %pc_lo12(.LCPI3_1) + bcnez $fcc0, .LBB3_59 +# %bb.57: fsqrt.s $ft2, $ft2 fdiv.s $ft0, $ft0, $ft2 + lu12i.w $a0, 260198 + ori $a0, $a0, 1638 + movgr2fr.w $fa2, $a0 fmul.s $fa2, $ft0, $fa2 fcmp.cule.s $fcc0, $fa2, $fs6 - bcnez $fcc0, .LBB3_57 -# %bb.56: # %.thread866 - fdiv.s $fs7, $ft1, $ft2 - fld.s $fa2, $sp, 32 # 4-byte Folded Reload - fdiv.s $ft9, $fa2, $ft2 - fdiv.s $fa2, $fs1, $ft2 - fst.s $fa2, $sp, 8 # 4-byte Folded Spill + bcnez $fcc0, .LBB3_59 +# %bb.58: # %.thread866 + fdiv.s $fs5, $ft1, $ft2 + fld.s $fa2, $sp, 28 # 4-byte Folded Reload + fdiv.s $fs3, $fa2, $ft2 + fdiv.s $fs7, $fs1, $ft2 fcmp.clt.s $fcc0, $fa7, $fs1 ori $s2, $zero, 15 movcf2gr $a2, $fcc0 fmov.s $fs6, $ft0 - b .LBB3_60 -.LBB3_57: - beqz $s2, .LBB3_87 -# %bb.58: - beqz $a5, .LBB3_60 -# %bb.59: + b .LBB3_62 +.LBB3_59: + beqz $s2, .LBB3_89 +# %bb.60: + beqz $a5, .LBB3_62 +# %bb.61: fld.s $fa5, $a5, 0 fst.s $fa5, $s4, 0 fld.s $fa6, $a5, 16 fst.s $fa6, $s4, 4 fld.s $fa7, $a5, 32 - b .LBB3_61 -.LBB3_60: - fmul.s $fa2, $fa6, $ft9 - fmadd.s $fa2, $fa5, $fs7, $fa2 - fld.s $fa5, $sp, 96 # 4-byte Folded Reload - fld.s $fa3, $sp, 8 # 4-byte Folded Reload - fmadd.s $fa5, $fa5, $fa3, $fa2 + b .LBB3_63 +.LBB3_62: + fmul.s $fa2, $fa6, $fs3 + fmadd.s $fa2, $fa5, $fs5, $fa2 + fld.s $fa5, $sp, 88 # 4-byte Folded Reload + fmadd.s $fa5, $fa5, $fs7, $fa2 fst.s $fa5, $s4, 0 fld.s $fa2, $a1, 20 fld.s $fa6, $a1, 16 fld.s $fa7, $a1, 24 - fmul.s $fa2, $ft9, $fa2 - fmadd.s $fa2, $fa6, $fs7, $fa2 - fmadd.s $fa6, $fa7, $fa3, $fa2 + fmul.s $fa2, $fs3, $fa2 + fmadd.s $fa2, $fa6, $fs5, $fa2 + fmadd.s $fa6, $fa7, $fs7, $fa2 fst.s $fa6, $s4, 4 fld.s $fa2, $a1, 36 fld.s $fa7, $a1, 32 fld.s $ft0, $a1, 40 - fmul.s $fa2, $ft9, $fa2 - fmadd.s $fa2, $fa7, $fs7, $fa2 - fmadd.s $fa7, $ft0, $fa3, $fa2 -.LBB3_61: + fmul.s $fa2, $fs3, $fa2 + fmadd.s $fa2, $fa7, $fs5, $fa2 + fmadd.s $fa7, $ft0, $fs7, $fa2 +.LBB3_63: fst.s $fa7, $s4, 8 - beqz $a2, .LBB3_63 -# %bb.62: + beqz $a2, .LBB3_65 +# %bb.64: fneg.s $fa2, $fa5 fst.s $fa2, $s4, 0 fneg.s $fa2, $fa6 fst.s $fa2, $s4, 4 fneg.s $fa2, $fa7 fst.s $fa2, $s4, 8 -.LBB3_63: +.LBB3_65: ld.d $fp, $sp, 624 ld.d $a0, $sp, 592 - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill fneg.s $fa2, $fs6 ori $a0, $zero, 7 fst.s $fa2, $a7, 0 - bltu $s2, $a0, .LBB3_65 -# %bb.64: # %.preheader913 + bltu $s2, $a0, .LBB3_67 +# %bb.66: # %.preheader913 ld.d $a0, $s5, 0 st.d $a0, $sp, 200 ld.w $a0, $s5, 8 @@ -1151,7 +1148,7 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl fmul.s $ft1, $fa7, $ft3 fmadd.s $ft1, $fa5, $ft4, $ft1 fmadd.s $ft1, $fa6, $ft5, $ft1 - fld.s $fa3, $sp, 80 # 4-byte Folded Reload + fld.s $fa3, $sp, 100 # 4-byte Folded Reload fneg.s $ft2, $fa3 fcmp.clt.s $fcc0, $fs1, $ft1 fsel $fa3, $ft2, $fa3, $fcc0 @@ -1164,7 +1161,7 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl fmul.s $fa4, $fa7, $ft1 fmadd.s $fa4, $fa5, $ft2, $fa4 fmadd.s $fa4, $fa6, $ft3, $fa4 - fld.s $ft5, $sp, 92 # 4-byte Folded Reload + fld.s $ft5, $sp, 80 # 4-byte Folded Reload fneg.s $ft4, $ft5 fcmp.clt.s $fcc0, $fs1, $fa4 fsel $fa4, $ft4, $ft5, $fcc0 @@ -1279,30 +1276,30 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl move $a0, $fp fmov.s $fa0, $fs6 jirl $ra, $a4, 0 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload st.w $s2, $a0, 0 ori $s1, $zero, 1 - b .LBB3_88 -.LBB3_65: + b .LBB3_90 +.LBB3_67: fld.s $fs0, $s4, 0 ori $a0, $zero, 3 - bltu $a0, $s2, .LBB3_67 -# %bb.66: + bltu $a0, $s2, .LBB3_69 +# %bb.68: fld.s $fa0, $s4, 4 - fst.s $fa0, $sp, 92 # 4-byte Folded Spill + fst.s $fa0, $sp, 100 # 4-byte Folded Spill fld.s $fs3, $s4, 8 addi.d $a0, $sp, 428 st.d $a0, $sp, 80 # 8-byte Folded Spill addi.d $a0, $sp, 416 move $a2, $a3 move $s6, $a4 - b .LBB3_68 -.LBB3_67: + b .LBB3_70 +.LBB3_69: fld.s $fa0, $s4, 4 fld.s $fa1, $s4, 8 fneg.s $fs0, $fs0 fneg.s $fa0, $fa0 - fst.s $fa0, $sp, 92 # 4-byte Folded Spill + fst.s $fa0, $sp, 100 # 4-byte Folded Spill fneg.s $fs3, $fa1 addi.d $a0, $sp, 416 st.d $a0, $sp, 80 # 8-byte Folded Spill @@ -1311,11 +1308,11 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl move $s5, $a3 move $s6, $a1 move $a1, $a4 -.LBB3_68: +.LBB3_70: fld.s $fa0, $s6, 16 fld.s $fa1, $s6, 0 fld.s $fa2, $s6, 32 - fld.s $fa5, $sp, 92 # 4-byte Folded Reload + fld.s $fa5, $sp, 100 # 4-byte Folded Reload fmul.s $fa0, $fa5, $fa0 fld.s $fa3, $s6, 20 fmadd.s $fa0, $fa1, $fs0, $fa0 @@ -1338,23 +1335,23 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl fabs.s $fa1, $fa1 fcmp.cule.s $fcc0, $fa1, $fa2 fabs.s $fa0, $fa3 - bcnez $fcc0, .LBB3_70 -# %bb.69: + bcnez $fcc0, .LBB3_72 +# %bb.71: move $a4, $zero fcmp.clt.s $fcc0, $fa0, $fa1 ori $a3, $zero, 2 movcf2gr $a6, $fcc0 sub.d $a5, $a3, $a6 addi.d $a3, $a6, 1 - b .LBB3_71 -.LBB3_70: + b .LBB3_73 +.LBB3_72: fcmp.cule.s $fcc0, $fa2, $fa0 fcmp.clt.s $fcc1, $fa0, $fa2 movcf2gr $a3, $fcc0 slli.d $a5, $a3, 1 movcf2gr $a4, $fcc1 addi.d $a3, $a4, 1 -.LBB3_71: # %.loopexit916 +.LBB3_73: # %.loopexit916 slli.d $a6, $a5, 2 fldx.s $fa0, $a0, $a6 addi.d $a7, $sp, 400 @@ -1385,21 +1382,21 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl add.d $a2, $a2, $s2 ori $a5, $zero, 2 st.d $a2, $sp, 72 # 8-byte Folded Spill - beqz $a2, .LBB3_74 -# %bb.72: # %.loopexit916 + beqz $a2, .LBB3_76 +# %bb.74: # %.loopexit916 ori $a2, $zero, 1 ld.d $a6, $sp, 72 # 8-byte Folded Reload - bne $a6, $a2, .LBB3_75 -# %bb.73: + bne $a6, $a2, .LBB3_77 +# %bb.75: move $a2, $zero - b .LBB3_76 -.LBB3_74: + b .LBB3_78 +.LBB3_76: ori $a2, $zero, 1 - b .LBB3_76 -.LBB3_75: + b .LBB3_78 +.LBB3_77: move $a2, $zero ori $a5, $zero, 1 -.LBB3_76: +.LBB3_78: alsl.d $a6, $a2, $a1, 2 fld.s $fa0, $a6, 16 slli.d $a2, $a2, 2 @@ -1480,8 +1477,8 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl addi.d $s3, $sp, 296 pcaddu18i $ra, %call36(_ZL18intersectRectQuad2PfS_S_) jirl $ra, $ra, 0 - blez $a0, .LBB3_87 -# %bb.77: + blez $a0, .LBB3_89 +# %bb.79: move $a2, $zero move $s0, $zero ld.d $a1, $sp, 72 # 8-byte Folded Reload @@ -1512,12 +1509,12 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl slli.d $a0, $a0, 3 addi.d $a3, $sp, 200 addi.d $a4, $sp, 168 - fld.s $ft9, $sp, 92 # 4-byte Folded Reload - b .LBB3_79 -.LBB3_78: # in Loop: Header=BB3_79 Depth=1 + fld.s $ft9, $sp, 100 # 4-byte Folded Reload + b .LBB3_81 +.LBB3_80: # in Loop: Header=BB3_81 Depth=1 addi.d $a2, $a2, 8 - beq $a0, $a2, .LBB3_81 -.LBB3_79: # =>This Inner Loop Header: Depth=1 + beq $a0, $a2, .LBB3_83 +.LBB3_81: # =>This Inner Loop Header: Depth=1 add.d $a5, $s3, $a2 fldx.s $ft3, $a2, $s3 fld.s $ft4, $a5, 4 @@ -1547,18 +1544,18 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl slli.d $a5, $s0, 2 fcmp.cult.s $fcc0, $ft5, $fs1 fstx.s $ft5, $a5, $a4 - bcnez $fcc0, .LBB3_78 -# %bb.80: # in Loop: Header=BB3_79 Depth=1 + bcnez $fcc0, .LBB3_80 +# %bb.82: # in Loop: Header=BB3_81 Depth=1 slli.w $a5, $s0, 1 alsl.d $a6, $a5, $s3, 2 slli.d $a5, $a5, 2 fstx.s $ft3, $a5, $s3 fst.s $ft4, $a6, 4 addi.w $s0, $s0, 1 - b .LBB3_78 -.LBB3_81: - blez $s0, .LBB3_87 -# %bb.82: + b .LBB3_80 +.LBB3_83: + blez $s0, .LBB3_89 +# %bb.84: slt $a0, $a1, $s0 masknez $a2, $s0, $a0 maskeqz $a0, $a1, $a0 @@ -1568,13 +1565,13 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl maskeqz $a1, $a1, $a2 masknez $a2, $a0, $a2 or $s1, $a1, $a2 - bgeu $s1, $s0, .LBB3_89 -# %bb.83: + bgeu $s1, $s0, .LBB3_91 +# %bb.85: fld.s $fa0, $sp, 168 move $a3, $zero addi.d $a1, $sp, 172 addi.d $a2, $s0, -1 -.LBB3_84: # =>This Inner Loop Header: Depth=1 +.LBB3_86: # =>This Inner Loop Header: Depth=1 fld.s $fa1, $a1, 0 fcmp.clt.s $fcc0, $fa0, $fa1 fsel $fa0, $fa0, $fa1, $fcc0 @@ -1585,8 +1582,8 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl addi.d $a1, $a1, 4 addi.d $a2, $a2, -1 addi.w $a0, $a0, 1 - bnez $a2, .LBB3_84 -# %bb.85: + bnez $a2, .LBB3_86 +# %bb.87: addi.d $a1, $sp, 296 addi.d $a4, $sp, 120 addi.d $s3, $sp, 120 @@ -1599,7 +1596,7 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl lu12i.w $s7, -524288 lu32i.d $s7, 0 addi.d $s8, $sp, 168 -.LBB3_86: # =>This Inner Loop Header: Depth=1 +.LBB3_88: # =>This Inner Loop Header: Depth=1 ld.w $a0, $s3, 0 alsl.w $a1, $a0, $a0, 1 slli.d $a2, $a1, 2 @@ -1637,11 +1634,11 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl jirl $ra, $a3, 0 addi.d $s0, $s0, -1 addi.d $s3, $s3, 4 - bnez $s0, .LBB3_86 - b .LBB3_92 -.LBB3_87: + bnez $s0, .LBB3_88 + b .LBB3_94 +.LBB3_89: move $s1, $zero -.LBB3_88: +.LBB3_90: addi.w $a0, $s1, 0 fld.d $fs7, $sp, 440 # 8-byte Folded Reload fld.d $fs6, $sp, 448 # 8-byte Folded Reload @@ -1664,13 +1661,13 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl ld.d $ra, $sp, 584 # 8-byte Folded Reload addi.d $sp, $sp, 592 ret -.LBB3_89: # %.lr.ph +.LBB3_91: # %.lr.ph addi.d $s1, $sp, 168 addi.d $s3, $sp, 208 lu12i.w $s6, -524288 lu32i.d $s6, 0 move $s7, $s0 -.LBB3_90: # =>This Inner Loop Header: Depth=1 +.LBB3_92: # =>This Inner Loop Header: Depth=1 fld.s $fa0, $s3, -8 fld.s $fa1, $s5, 0 fld.s $fa2, $s3, -4 @@ -1704,13 +1701,13 @@ _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteColl addi.d $s3, $s3, 12 addi.d $s7, $s7, -1 addi.d $s1, $s1, 4 - bnez $s7, .LBB3_90 -# %bb.91: + bnez $s7, .LBB3_92 +# %bb.93: move $s1, $s0 -.LBB3_92: # %.loopexit - ld.d $a0, $sp, 96 # 8-byte Folded Reload +.LBB3_94: # %.loopexit + ld.d $a0, $sp, 88 # 8-byte Folded Reload st.w $s2, $a0, 0 - b .LBB3_88 + b .LBB3_90 .Lfunc_end3: .size _Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteCollisionDetectorInterface6ResultE, .Lfunc_end3-_Z8dBoxBox2RK9btVector3PKfS1_S1_S3_S1_RS_PfPiiP12dContactGeomiRN36btDiscreteCollisionDetectorInterface6ResultE .cfi_endproc diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBvhTriangleMeshShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBvhTriangleMeshShape.s index 2adf60f2..604011e1 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBvhTriangleMeshShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btBvhTriangleMeshShape.s @@ -811,12 +811,8 @@ _ZN21btNodeOverlapCallbackD2Ev: # @_ZN21btNodeOverlapCallbackD2Ev .Lfunc_end10: .size _ZN21btNodeOverlapCallbackD2Ev, .Lfunc_end10-_ZN21btNodeOverlapCallbackD2Ev # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN22btBvhTriangleMeshShape15setLocalScalingERK9btVector3 -.LCPI11_0: - .word 0x34000000 # float 1.1920929E-7 .text - .globl _ZN22btBvhTriangleMeshShape15setLocalScalingERK9btVector3 + .globl _ZN22btBvhTriangleMeshShape15setLocalScalingERK9btVector3 # -- Begin function _ZN22btBvhTriangleMeshShape15setLocalScalingERK9btVector3 .p2align 5 .type _ZN22btBvhTriangleMeshShape15setLocalScalingERK9btVector3,@function _ZN22btBvhTriangleMeshShape15setLocalScalingERK9btVector3: # @_ZN22btBvhTriangleMeshShape15setLocalScalingERK9btVector3 @@ -845,12 +841,12 @@ _ZN22btBvhTriangleMeshShape15setLocalScalingERK9btVector3: # @_ZN22btBvhTriangle fsub.s $fa0, $fa0, $fa1 fsub.s $fa1, $fa2, $fa3 fsub.s $fa2, $fa4, $fa5 - pcalau12i $a0, %pc_hi20(.LCPI11_0) - fld.s $fa3, $a0, %pc_lo12(.LCPI11_0) fmul.s $fa1, $fa1, $fa1 fmadd.s $fa0, $fa0, $fa0, $fa1 fmadd.s $fa0, $fa2, $fa2, $fa0 - fcmp.cule.s $fcc0, $fa0, $fa3 + lu12i.w $a0, 212992 + movgr2fr.w $fa1, $a0 + fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB11_4 # %bb.1: move $a0, $fp @@ -895,12 +891,7 @@ _ZN22btBvhTriangleMeshShape15setLocalScalingERK9btVector3: # @_ZN22btBvhTriangle .size _ZN22btBvhTriangleMeshShape15setLocalScalingERK9btVector3, .Lfunc_end11-_ZN22btBvhTriangleMeshShape15setLocalScalingERK9btVector3 .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN22btBvhTriangleMeshShape15setOptimizedBvhEP14btOptimizedBvhRK9btVector3 -.LCPI12_0: - .word 0x34000000 # float 1.1920929E-7 - .text - .globl _ZN22btBvhTriangleMeshShape15setOptimizedBvhEP14btOptimizedBvhRK9btVector3 + .globl _ZN22btBvhTriangleMeshShape15setOptimizedBvhEP14btOptimizedBvhRK9btVector3 # -- Begin function _ZN22btBvhTriangleMeshShape15setOptimizedBvhEP14btOptimizedBvhRK9btVector3 .p2align 5 .type _ZN22btBvhTriangleMeshShape15setOptimizedBvhEP14btOptimizedBvhRK9btVector3,@function _ZN22btBvhTriangleMeshShape15setOptimizedBvhEP14btOptimizedBvhRK9btVector3: # @_ZN22btBvhTriangleMeshShape15setOptimizedBvhEP14btOptimizedBvhRK9btVector3 @@ -931,12 +922,12 @@ _ZN22btBvhTriangleMeshShape15setOptimizedBvhEP14btOptimizedBvhRK9btVector3: # @_ fsub.s $fa0, $fa0, $fa1 fsub.s $fa1, $fa2, $fa3 fsub.s $fa2, $fa4, $fa5 - pcalau12i $a0, %pc_hi20(.LCPI12_0) - fld.s $fa3, $a0, %pc_lo12(.LCPI12_0) fmul.s $fa1, $fa1, $fa1 fmadd.s $fa0, $fa0, $fa0, $fa1 fmadd.s $fa0, $fa2, $fa2, $fa0 - fcmp.cule.s $fcc0, $fa0, $fa3 + lu12i.w $a0, 212992 + movgr2fr.w $fa1, $a0 + fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB12_2 # %bb.1: move $a0, $fp diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCapsuleShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCapsuleShape.s index 87b5a889..a31e9a9f 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCapsuleShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCapsuleShape.s @@ -60,14 +60,8 @@ __clang_call_terminate: # @__clang_call_terminate .Lfunc_end1: .size __clang_call_terminate, .Lfunc_end1-__clang_call_terminate # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK14btCapsuleShape37localGetSupportingVertexWithoutMarginERK9btVector3 -.LCPI2_0: - .word 0x38d1b717 # float 9.99999974E-5 -.LCPI2_1: - .word 0xdd5e0b6b # float -9.99999984E+17 .text - .globl _ZNK14btCapsuleShape37localGetSupportingVertexWithoutMarginERK9btVector3 + .globl _ZNK14btCapsuleShape37localGetSupportingVertexWithoutMarginERK9btVector3 # -- Begin function _ZNK14btCapsuleShape37localGetSupportingVertexWithoutMarginERK9btVector3 .p2align 5 .type _ZNK14btCapsuleShape37localGetSupportingVertexWithoutMarginERK9btVector3,@function _ZNK14btCapsuleShape37localGetSupportingVertexWithoutMarginERK9btVector3: # @_ZNK14btCapsuleShape37localGetSupportingVertexWithoutMarginERK9btVector3 @@ -99,15 +93,16 @@ _ZNK14btCapsuleShape37localGetSupportingVertexWithoutMarginERK9btVector3: # @_ZN .cfi_offset 61, -80 .cfi_offset 62, -88 .cfi_offset 63, -96 + fld.s $fa0, $a1, 4 + fld.s $fa2, $a1, 0 + fld.s $fa1, $a1, 8 move $fp, $a0 - fld.s $fa0, $a1, 0 - fld.s $fa1, $a1, 4 - fld.s $fa2, $a1, 8 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.s $fa4, $a0, %pc_lo12(.LCPI2_0) - fmul.s $fa3, $fa1, $fa1 - fmadd.s $fa3, $fa0, $fa0, $fa3 + fmul.s $fa3, $fa0, $fa0 fmadd.s $fa3, $fa2, $fa2, $fa3 + fmadd.s $fa3, $fa1, $fa1, $fa3 + lu12i.w $a0, 232731 + ori $a0, $a0, 1815 + movgr2fr.w $fa4, $a0 fcmp.clt.s $fcc0, $fa3, $fa4 movgr2fr.w $fs0, $zero bceqz $fcc0, .LBB2_2 @@ -118,9 +113,9 @@ _ZNK14btCapsuleShape37localGetSupportingVertexWithoutMarginERK9btVector3: # @_ZN b .LBB2_3 .LBB2_2: frsqrt.s $fa3, $fa3 - fmul.s $fa4, $fa0, $fa3 - fmul.s $fs1, $fa1, $fa3 - fmul.s $fs2, $fa2, $fa3 + fmul.s $fa4, $fa2, $fa3 + fmul.s $fs1, $fa0, $fa3 + fmul.s $fs2, $fa1, $fa3 .LBB2_3: vst $vr4, $sp, 32 # 16-byte Folded Spill ld.w $a0, $fp, 64 @@ -169,20 +164,22 @@ _ZNK14btCapsuleShape37localGetSupportingVertexWithoutMarginERK9btVector3: # @_ZN fsub.s $fa0, $fs4, $fa1 fsub.s $fa1, $fs5, $fa2 fsub.s $fa2, $fs6, $fa3 - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.s $fs6, $a0, %pc_lo12(.LCPI2_1) fmul.s $fa3, $fs1, $fa1 fmadd.s $fa3, $fa4, $fa0, $fa3 fmadd.s $fa3, $fs2, $fa2, $fa3 - fcmp.cule.s $fcc0, $fa3, $fs6 + lu12i.w $a0, -141856 + ori $a0, $a0, 2923 + lu32i.d $a0, 0 + movgr2fr.w $fs5, $a0 + fcmp.cule.s $fcc0, $fa3, $fs5 fmov.s $fa5, $fs0 - fmov.s $fs4, $fs0 + fmov.s $fs6, $fs0 bcnez $fcc0, .LBB2_5 # %bb.4: fmov.s $fs0, $fa0 fmov.s $fa5, $fa1 - fmov.s $fs4, $fa2 - fmov.s $fs6, $fa3 + fmov.s $fs6, $fa2 + fmov.s $fs5, $fa3 .LBB2_5: fst.s $fa5, $sp, 12 # 4-byte Folded Spill ld.w $a0, $fp, 64 @@ -208,7 +205,7 @@ _ZNK14btCapsuleShape37localGetSupportingVertexWithoutMarginERK9btVector3: # @_ZN ld.d $a1, $a0, 88 fadd.s $fs3, $fa0, $fa3 fadd.s $fs7, $fa1, $fa4 - fadd.s $fs5, $fa2, $fa5 + fadd.s $fs4, $fa2, $fa5 move $a0, $fp jirl $ra, $a1, 0 vld $vr4, $sp, 32 # 16-byte Folded Reload @@ -217,15 +214,15 @@ _ZNK14btCapsuleShape37localGetSupportingVertexWithoutMarginERK9btVector3: # @_ZN fmul.s $fa0, $fs2, $fa0 fsub.s $fa1, $fs3, $fa1 fsub.s $fa2, $fs7, $fa2 - fsub.s $fa0, $fs5, $fa0 + fsub.s $fa0, $fs4, $fa0 fmul.s $fa3, $fs1, $fa2 fmadd.s $fa3, $fa4, $fa1, $fa3 fmadd.s $fa3, $fs2, $fa0, $fa3 - fcmp.clt.s $fcc0, $fs6, $fa3 + fcmp.clt.s $fcc0, $fs5, $fa3 fsel $fa1, $fs0, $fa1, $fcc0 fld.s $fa3, $sp, 12 # 4-byte Folded Reload fsel $fa2, $fa3, $fa2, $fcc0 - fsel $fa0, $fs4, $fa0, $fcc0 + fsel $fa0, $fs6, $fa0, $fcc0 movfr2gr.s $a0, $fa1 movfr2gr.s $a1, $fa2 bstrins.d $a0, $a1, 63, 32 @@ -249,12 +246,7 @@ _ZNK14btCapsuleShape37localGetSupportingVertexWithoutMarginERK9btVector3: # @_ZN .size _ZNK14btCapsuleShape37localGetSupportingVertexWithoutMarginERK9btVector3, .Lfunc_end2-_ZNK14btCapsuleShape37localGetSupportingVertexWithoutMarginERK9btVector3 .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK14btCapsuleShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i -.LCPI3_0: - .word 0xdd5e0b6b # float -9.99999984E+17 - .text - .globl _ZNK14btCapsuleShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i + .globl _ZNK14btCapsuleShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i # -- Begin function _ZNK14btCapsuleShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i .p2align 5 .type _ZNK14btCapsuleShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i,@function _ZNK14btCapsuleShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i: # @_ZNK14btCapsuleShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i @@ -306,12 +298,14 @@ _ZNK14btCapsuleShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVec slli.d $a0, $a0, 2 fldx.s $fs0, $s1, $a0 addi.d $s2, $a2, 8 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI3_0) addi.d $s3, $a1, 8 vrepli.b $vr0, 0 vst $vr0, $sp, 16 # 16-byte Folded Spill addi.d $s4, $sp, 40 + lu12i.w $a0, -141856 + ori $a0, $a0, 2923 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 b .LBB3_3 .p2align 4, , 16 .LBB3_2: # in Loop: Header=BB3_3 Depth=1 @@ -440,14 +434,7 @@ _ZNK14btCapsuleShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVec .size _ZNK14btCapsuleShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i, .Lfunc_end3-_ZNK14btCapsuleShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK14btCapsuleShape21calculateLocalInertiaEfR9btVector3 -.LCPI4_0: - .word 0x3d23d70a # float 0.0399999991 -.LCPI4_1: - .word 0x3daaaaaa # float 0.0833333284 - .text - .globl _ZNK14btCapsuleShape21calculateLocalInertiaEfR9btVector3 + .globl _ZNK14btCapsuleShape21calculateLocalInertiaEfR9btVector3 # -- Begin function _ZNK14btCapsuleShape21calculateLocalInertiaEfR9btVector3 .p2align 5 .type _ZNK14btCapsuleShape21calculateLocalInertiaEfR9btVector3,@function _ZNK14btCapsuleShape21calculateLocalInertiaEfR9btVector3: # @_ZNK14btCapsuleShape21calculateLocalInertiaEfR9btVector3 @@ -477,8 +464,9 @@ _ZNK14btCapsuleShape21calculateLocalInertiaEfR9btVector3: # @_ZNK14btCapsuleShap fadd.s $fa1, $fa1, $fa2 fstx.s $fa1, $a2, $a0 fld.s $fa1, $sp, 0 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.s $fa2, $a0, %pc_lo12(.LCPI4_0) + lu12i.w $a0, 250429 + ori $a0, $a0, 1802 + movgr2fr.w $fa2, $a0 fld.s $fa3, $sp, 4 fadd.s $fa1, $fa1, $fa2 fld.s $fa4, $sp, 8 @@ -487,11 +475,12 @@ _ZNK14btCapsuleShape21calculateLocalInertiaEfR9btVector3: # @_ZNK14btCapsuleShap fadd.s $fa3, $fa3, $fa3 fadd.s $fa2, $fa4, $fa2 fadd.s $fa2, $fa2, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.s $fa4, $a0, %pc_lo12(.LCPI4_1) fmul.s $fa1, $fa1, $fa1 fmul.s $fa3, $fa3, $fa3 fmul.s $fa2, $fa2, $fa2 + lu12i.w $a0, 252586 + ori $a0, $a0, 2730 + movgr2fr.w $fa4, $a0 fmul.s $fa0, $fa0, $fa4 fadd.s $fa4, $fa3, $fa2 fmul.s $fa4, $fa0, $fa4 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCollisionWorld.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCollisionWorld.s index db5e4d1f..6796043a 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCollisionWorld.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCollisionWorld.s @@ -535,12 +535,7 @@ _ZN16btCollisionWorld18addCollisionObjectEP17btCollisionObjectss: # @_ZN16btColl .size _ZN16btCollisionWorld18addCollisionObjectEP17btCollisionObjectss, .Lfunc_end5-_ZN16btCollisionWorld18addCollisionObjectEP17btCollisionObjectss .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN16btCollisionWorld16updateSingleAabbEP17btCollisionObject -.LCPI6_0: - .word 0x5368d4a5 # float 9.99999995E+11 - .text - .globl _ZN16btCollisionWorld16updateSingleAabbEP17btCollisionObject + .globl _ZN16btCollisionWorld16updateSingleAabbEP17btCollisionObject # -- Begin function _ZN16btCollisionWorld16updateSingleAabbEP17btCollisionObject .p2align 5 .type _ZN16btCollisionWorld16updateSingleAabbEP17btCollisionObject,@function _ZN16btCollisionWorld16updateSingleAabbEP17btCollisionObject: # @_ZN16btCollisionWorld16updateSingleAabbEP17btCollisionObject @@ -593,12 +588,13 @@ _ZN16btCollisionWorld16updateSingleAabbEP17btCollisionObject: # @_ZN16btCollisio fsub.s $fa0, $fa3, $fa0 fsub.s $fa1, $fa4, $fa1 fsub.s $fa2, $fa5, $fa2 - pcalau12i $a1, %pc_hi20(.LCPI6_0) - fld.s $fa3, $a1, %pc_lo12(.LCPI6_0) fmul.s $fa1, $fa1, $fa1 fmadd.s $fa0, $fa0, $fa0, $fa1 fmadd.s $fa0, $fa2, $fa2, $fa0 - fcmp.cule.s $fcc0, $fa3, $fa0 + lu12i.w $a1, 341645 + ori $a1, $a1, 1189 + movgr2fr.w $fa1, $a1 + fcmp.cule.s $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB6_4 .LBB6_2: # %.critedge ld.d $a2, $a0, 0 @@ -1058,12 +1054,7 @@ _ZN16btCollisionWorld21removeCollisionObjectEP17btCollisionObject: # @_ZN16btCol .size _ZN16btCollisionWorld21removeCollisionObjectEP17btCollisionObject, .Lfunc_end9-_ZN16btCollisionWorld21removeCollisionObjectEP17btCollisionObject .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN16btCollisionWorld13rayTestSingleERK11btTransformS2_P17btCollisionObjectPK16btCollisionShapeS2_RNS_17RayResultCallbackE -.LCPI10_0: - .word 0x38d1b717 # float 9.99999974E-5 - .text - .globl _ZN16btCollisionWorld13rayTestSingleERK11btTransformS2_P17btCollisionObjectPK16btCollisionShapeS2_RNS_17RayResultCallbackE + .globl _ZN16btCollisionWorld13rayTestSingleERK11btTransformS2_P17btCollisionObjectPK16btCollisionShapeS2_RNS_17RayResultCallbackE # -- Begin function _ZN16btCollisionWorld13rayTestSingleERK11btTransformS2_P17btCollisionObjectPK16btCollisionShapeS2_RNS_17RayResultCallbackE .p2align 5 .type _ZN16btCollisionWorld13rayTestSingleERK11btTransformS2_P17btCollisionObjectPK16btCollisionShapeS2_RNS_17RayResultCallbackE,@function _ZN16btCollisionWorld13rayTestSingleERK11btTransformS2_P17btCollisionObjectPK16btCollisionShapeS2_RNS_17RayResultCallbackE: # @_ZN16btCollisionWorld13rayTestSingleERK11btTransformS2_P17btCollisionObjectPK16btCollisionShapeS2_RNS_17RayResultCallbackE @@ -1149,15 +1140,16 @@ _ZN16btCollisionWorld13rayTestSingleERK11btTransformS2_P17btCollisionObjectPK16b # %bb.3: beqz $a0, .LBB10_7 # %bb.4: - fld.s $fa1, $sp, 600 fld.s $fa2, $sp, 604 + fld.s $fa1, $sp, 600 fld.s $fa3, $sp, 608 - pcalau12i $a0, %pc_hi20(.LCPI10_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI10_0) - fmul.s $fa4, $fa2, $fa2 - fmadd.s $fa4, $fa1, $fa1, $fa4 - fmadd.s $fa4, $fa3, $fa3, $fa4 - fcmp.cule.s $fcc0, $fa4, $fa0 + fmul.s $fa0, $fa2, $fa2 + fmadd.s $fa0, $fa1, $fa1, $fa0 + fmadd.s $fa0, $fa3, $fa3, $fa0 + lu12i.w $a0, 232731 + ori $a0, $a0, 1815 + movgr2fr.w $fa4, $a0 + fcmp.cule.s $fcc0, $fa0, $fa4 bcnez $fcc0, .LBB10_7 # %bb.5: fld.s $fa0, $sp, 632 @@ -1783,12 +1775,8 @@ _ZN12btConvexCast10CastResultD2Ev: # @_ZN12btConvexCast10CastResultD2Ev .Lfunc_end11: .size _ZN12btConvexCast10CastResultD2Ev, .Lfunc_end11-_ZN12btConvexCast10CastResultD2Ev # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN16btCollisionWorld17objectQuerySingleEPK13btConvexShapeRK11btTransformS5_P17btCollisionObjectPK16btCollisionShapeS5_RNS_20ConvexResultCallbackEf -.LCPI12_0: - .word 0x38d1b717 # float 9.99999974E-5 .text - .globl _ZN16btCollisionWorld17objectQuerySingleEPK13btConvexShapeRK11btTransformS5_P17btCollisionObjectPK16btCollisionShapeS5_RNS_20ConvexResultCallbackEf + .globl _ZN16btCollisionWorld17objectQuerySingleEPK13btConvexShapeRK11btTransformS5_P17btCollisionObjectPK16btCollisionShapeS5_RNS_20ConvexResultCallbackEf # -- Begin function _ZN16btCollisionWorld17objectQuerySingleEPK13btConvexShapeRK11btTransformS5_P17btCollisionObjectPK16btCollisionShapeS5_RNS_20ConvexResultCallbackEf .p2align 5 .type _ZN16btCollisionWorld17objectQuerySingleEPK13btConvexShapeRK11btTransformS5_P17btCollisionObjectPK16btCollisionShapeS5_RNS_20ConvexResultCallbackEf,@function _ZN16btCollisionWorld17objectQuerySingleEPK13btConvexShapeRK11btTransformS5_P17btCollisionObjectPK16btCollisionShapeS5_RNS_20ConvexResultCallbackEf: # @_ZN16btCollisionWorld17objectQuerySingleEPK13btConvexShapeRK11btTransformS5_P17btCollisionObjectPK16btCollisionShapeS5_RNS_20ConvexResultCallbackEf @@ -1885,36 +1873,37 @@ _ZN16btCollisionWorld17objectQuerySingleEPK13btConvexShapeRK11btTransformS5_P17b # %bb.3: beqz $a0, .LBB12_7 # %bb.4: - fld.s $fa2, $sp, 672 - fld.s $fa1, $sp, 676 - fld.s $fa3, $sp, 680 - pcalau12i $a0, %pc_hi20(.LCPI12_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI12_0) - fmul.s $fa4, $fa1, $fa1 - fmadd.s $fa4, $fa2, $fa2, $fa4 - fmadd.s $fa4, $fa3, $fa3, $fa4 - fcmp.cule.s $fcc0, $fa4, $fa0 + fld.s $fa0, $sp, 676 + fld.s $fa3, $sp, 672 + fld.s $fa2, $sp, 680 + fmul.s $fa1, $fa0, $fa0 + fmadd.s $fa1, $fa3, $fa3, $fa1 + fmadd.s $fa4, $fa2, $fa2, $fa1 + lu12i.w $a0, 232731 + ori $a0, $a0, 1815 + movgr2fr.w $fa1, $a0 + fcmp.cule.s $fcc0, $fa4, $fa1 bcnez $fcc0, .LBB12_7 # %bb.5: - fld.s $fa0, $sp, 704 + fld.s $fa1, $sp, 704 fld.s $fa5, $s0, 8 - fcmp.cule.s $fcc0, $fa5, $fa0 + fcmp.cule.s $fcc0, $fa5, $fa1 bcnez $fcc0, .LBB12_7 # %bb.6: addi.d $a0, $sp, 672 frsqrt.s $fa4, $fa4 - fmul.s $fa2, $fa2, $fa4 - fst.s $fa2, $sp, 672 - fmul.s $fa1, $fa1, $fa4 - fst.s $fa1, $sp, 676 - fmul.s $fa1, $fa3, $fa4 - fst.s $fa1, $sp, 680 - vld $vr1, $a0, 0 + fmul.s $fa3, $fa3, $fa4 + fst.s $fa3, $sp, 672 + fmul.s $fa0, $fa0, $fa4 + fst.s $fa0, $sp, 676 + fmul.s $fa0, $fa2, $fa4 + fst.s $fa0, $sp, 680 + vld $vr0, $a0, 0 vld $vr2, $sp, 688 ld.d $a0, $s0, 0 - vst $vr1, $sp, 96 + vst $vr0, $sp, 96 vst $vr2, $sp, 112 - fst.s $fa0, $sp, 128 + fst.s $fa1, $sp, 128 ld.d $a3, $a0, 24 st.d $s1, $sp, 80 st.d $zero, $sp, 88 @@ -2575,12 +2564,8 @@ GCC_except_table12: .Lttbase5: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK16btCollisionWorld7rayTestERK9btVector3S2_RNS_17RayResultCallbackE -.LCPI13_0: - .word 0x5d5e0b6b # float 9.99999984E+17 .text - .globl _ZNK16btCollisionWorld7rayTestERK9btVector3S2_RNS_17RayResultCallbackE + .globl _ZNK16btCollisionWorld7rayTestERK9btVector3S2_RNS_17RayResultCallbackE # -- Begin function _ZNK16btCollisionWorld7rayTestERK9btVector3S2_RNS_17RayResultCallbackE .p2align 5 .type _ZNK16btCollisionWorld7rayTestERK9btVector3S2_RNS_17RayResultCallbackE,@function _ZNK16btCollisionWorld7rayTestERK9btVector3S2_RNS_17RayResultCallbackE: # @_ZNK16btCollisionWorld7rayTestERK9btVector3S2_RNS_17RayResultCallbackE @@ -2630,33 +2615,34 @@ _ZNK16btCollisionWorld7rayTestERK9btVector3S2_RNS_17RayResultCallbackE: # @_ZNK1 fmul.s $fa1, $fa1, $fa4 fmul.s $fa2, $fa2, $fa4 fmul.s $fa3, $fa3, $fa4 - pcalau12i $a3, %pc_hi20(.LCPI13_0) - fld.s $fa4, $a3, %pc_lo12(.LCPI13_0) - frecip.s $fa5, $fa1 - movgr2fr.w $fa6, $zero - fcmp.ceq.s $fcc0, $fa1, $fa6 - fsel $fa5, $fa5, $fa4, $fcc0 - fst.s $fa5, $sp, 56 + frecip.s $fa4, $fa1 + movgr2fr.w $fa5, $zero + fcmp.ceq.s $fcc0, $fa1, $fa5 + lu12i.w $a3, 382432 + ori $a3, $a3, 2923 + movgr2fr.w $fa6, $a3 + fsel $fa4, $fa4, $fa6, $fcc0 + fst.s $fa4, $sp, 56 frecip.s $fa7, $fa2 - fcmp.ceq.s $fcc0, $fa2, $fa6 - fsel $fa7, $fa7, $fa4, $fcc0 + fcmp.ceq.s $fcc0, $fa2, $fa5 + fsel $fa7, $fa7, $fa6, $fcc0 fst.s $fa7, $sp, 60 frecip.s $ft0, $fa3 - fcmp.ceq.s $fcc0, $fa3, $fa6 - fsel $fa4, $ft0, $fa4, $fcc0 - fcmp.clt.s $fcc0, $fa5, $fa6 - fst.s $fa4, $sp, 64 + fcmp.ceq.s $fcc0, $fa3, $fa5 + fsel $fa6, $ft0, $fa6, $fcc0 + fcmp.clt.s $fcc0, $fa4, $fa5 + fst.s $fa6, $sp, 64 movcf2gr $a3, $fcc0 - fcmp.clt.s $fcc0, $fa7, $fa6 + fcmp.clt.s $fcc0, $fa7, $fa5 st.w $a3, $sp, 72 movcf2gr $a3, $fcc0 - fld.s $fa5, $sp, 104 + fld.s $fa4, $sp, 104 fld.s $fa7, $sp, 88 - fcmp.clt.s $fcc0, $fa4, $fa6 + fcmp.clt.s $fcc0, $fa6, $fa5 st.w $a3, $sp, 76 movcf2gr $a3, $fcc0 st.w $a3, $sp, 80 - fsub.s $fa4, $fa5, $fa7 + fsub.s $fa4, $fa4, $fa7 fld.s $fa5, $sp, 108 fld.s $fa6, $sp, 92 fld.s $fa7, $sp, 112 @@ -2683,12 +2669,7 @@ _ZNK16btCollisionWorld7rayTestERK9btVector3S2_RNS_17RayResultCallbackE: # @_ZNK1 .size _ZNK16btCollisionWorld7rayTestERK9btVector3S2_RNS_17RayResultCallbackE, .Lfunc_end13-_ZNK16btCollisionWorld7rayTestERK9btVector3S2_RNS_17RayResultCallbackE .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK16btCollisionWorld15convexSweepTestEPK13btConvexShapeRK11btTransformS5_RNS_20ConvexResultCallbackEf -.LCPI14_0: - .word 0x5d5e0b6b # float 9.99999984E+17 - .text - .globl _ZNK16btCollisionWorld15convexSweepTestEPK13btConvexShapeRK11btTransformS5_RNS_20ConvexResultCallbackEf + .globl _ZNK16btCollisionWorld15convexSweepTestEPK13btConvexShapeRK11btTransformS5_RNS_20ConvexResultCallbackEf # -- Begin function _ZNK16btCollisionWorld15convexSweepTestEPK13btConvexShapeRK11btTransformS5_RNS_20ConvexResultCallbackEf .p2align 5 .type _ZNK16btCollisionWorld15convexSweepTestEPK13btConvexShapeRK11btTransformS5_RNS_20ConvexResultCallbackEf,@function _ZNK16btCollisionWorld15convexSweepTestEPK13btConvexShapeRK11btTransformS5_RNS_20ConvexResultCallbackEf: # @_ZNK16btCollisionWorld15convexSweepTestEPK13btConvexShapeRK11btTransformS5_RNS_20ConvexResultCallbackEf @@ -2882,27 +2863,28 @@ _ZNK16btCollisionWorld15convexSweepTestEPK13btConvexShapeRK11btTransformS5_RNS_2 fmul.s $fa4, $fa0, $fa3 fmul.s $fa5, $fa1, $fa3 fmul.s $fa3, $fa2, $fa3 - pcalau12i $a0, %pc_hi20(.LCPI14_0) - fld.s $fa6, $a0, %pc_lo12(.LCPI14_0) - frecip.s $fa7, $fa4 - movgr2fr.w $ft0, $zero - fcmp.ceq.s $fcc0, $fa4, $ft0 - fsel $fa7, $fa7, $fa6, $fcc0 - fst.s $fa7, $sp, 24 + frecip.s $fa6, $fa4 + movgr2fr.w $fa7, $zero + fcmp.ceq.s $fcc0, $fa4, $fa7 + lu12i.w $a0, 382432 + ori $a0, $a0, 2923 + movgr2fr.w $ft0, $a0 + fsel $fa6, $fa6, $ft0, $fcc0 + fst.s $fa6, $sp, 24 frecip.s $ft1, $fa5 - fcmp.ceq.s $fcc0, $fa5, $ft0 - fsel $ft1, $ft1, $fa6, $fcc0 + fcmp.ceq.s $fcc0, $fa5, $fa7 + fsel $ft1, $ft1, $ft0, $fcc0 fst.s $ft1, $sp, 28 frecip.s $ft2, $fa3 - fcmp.ceq.s $fcc0, $fa3, $ft0 - fsel $fa6, $ft2, $fa6, $fcc0 - fcmp.clt.s $fcc0, $fa7, $ft0 - fst.s $fa6, $sp, 32 + fcmp.ceq.s $fcc0, $fa3, $fa7 + fsel $ft0, $ft2, $ft0, $fcc0 + fcmp.clt.s $fcc0, $fa6, $fa7 + fst.s $ft0, $sp, 32 movcf2gr $a0, $fcc0 - fcmp.clt.s $fcc0, $ft1, $ft0 + fcmp.clt.s $fcc0, $ft1, $fa7 st.w $a0, $sp, 40 movcf2gr $a0, $fcc0 - fcmp.clt.s $fcc0, $fa6, $ft0 + fcmp.clt.s $fcc0, $ft0, $fa7 st.w $a0, $sp, 44 movcf2gr $a0, $fcc0 st.w $a0, $sp, 48 @@ -3588,12 +3570,8 @@ _ZN19btSingleRayCallback7processEPK17btBroadphaseProxy: # @_ZN19btSingleRayCallb .size _ZN19btSingleRayCallback7processEPK17btBroadphaseProxy, .Lfunc_end30-_ZN19btSingleRayCallback7processEPK17btBroadphaseProxy .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf -.LCPI31_0: - .word 0x28800000 # float 1.42108547E-14 .section .text._ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf,"axG",@progbits,_ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf,comdat - .weak _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf + .weak _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf # -- Begin function _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf .p2align 5 .type _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf,@function _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf: # @_ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf @@ -3730,11 +3708,11 @@ _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf: # jirl $ra, $ra, 0 fadd.s $fa0, $fa0, $fa0 fst.s $fa0, $s0, 0 - pcalau12i $a0, %pc_hi20(.LCPI31_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI31_0) fmul.s $fa0, $fs1, $fs1 fmadd.s $fa0, $fs0, $fs0, $fa0 fmadd.s $fa0, $fs2, $fs2, $fa0 + lu12i.w $a0, 165888 + movgr2fr.w $fa1, $a0 fcmp.clt.s $fcc0, $fa0, $fa1 st.w $zero, $fp, 12 bceqz $fcc0, .LBB31_2 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCompoundShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCompoundShape.s index 55022064..6d080d25 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCompoundShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCompoundShape.s @@ -1337,12 +1337,7 @@ _ZNK15btCompoundShape21calculateLocalInertiaEfR9btVector3: # @_ZNK15btCompoundSh .size _ZNK15btCompoundShape21calculateLocalInertiaEfR9btVector3, .Lfunc_end11-_ZNK15btCompoundShape21calculateLocalInertiaEfR9btVector3 .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK15btCompoundShape31calculatePrincipalAxisTransformEPfR11btTransformR9btVector3 -.LCPI12_0: - .word 0x3727c5ac # float 9.99999974E-6 - .text - .globl _ZNK15btCompoundShape31calculatePrincipalAxisTransformEPfR11btTransformR9btVector3 + .globl _ZNK15btCompoundShape31calculatePrincipalAxisTransformEPfR11btTransformR9btVector3 # -- Begin function _ZNK15btCompoundShape31calculatePrincipalAxisTransformEPfR11btTransformR9btVector3 .p2align 5 .type _ZNK15btCompoundShape31calculatePrincipalAxisTransformEPfR11btTransformR9btVector3,@function _ZNK15btCompoundShape31calculatePrincipalAxisTransformEPfR11btTransformR9btVector3: # @_ZNK15btCompoundShape31calculatePrincipalAxisTransformEPfR11btTransformR9btVector3 @@ -1572,8 +1567,9 @@ _ZNK15btCompoundShape31calculatePrincipalAxisTransformEPfR11btTransformR9btVecto addi.d $s2, $s2, 4 bne $s4, $s3, .LBB12_6 .LBB12_7: # %._crit_edge221 - pcalau12i $a0, %pc_hi20(.LCPI12_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI12_0) + lu12i.w $a0, 225916 + ori $a0, $a0, 1452 + movgr2fr.w $fa0, $a0 addi.d $a0, $sp, 40 ori $a2, $zero, 20 move $a1, $s0 @@ -1607,14 +1603,8 @@ _ZNK15btCompoundShape31calculatePrincipalAxisTransformEPfR11btTransformR9btVecto .size _ZNK15btCompoundShape31calculatePrincipalAxisTransformEPfR11btTransformR9btVector3, .Lfunc_end12-_ZNK15btCompoundShape31calculatePrincipalAxisTransformEPfR11btTransformR9btVector3 .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN11btMatrix3x311diagonalizeERS_fi -.LCPI13_0: - .word 0x34000000 # float 1.1920929E-7 -.LCPI13_1: - .word 0x4ca00000 # float 83886080 .section .text._ZN11btMatrix3x311diagonalizeERS_fi,"axG",@progbits,_ZN11btMatrix3x311diagonalizeERS_fi,comdat - .weak _ZN11btMatrix3x311diagonalizeERS_fi + .weak _ZN11btMatrix3x311diagonalizeERS_fi # -- Begin function _ZN11btMatrix3x311diagonalizeERS_fi .p2align 5 .type _ZN11btMatrix3x311diagonalizeERS_fi,@function _ZN11btMatrix3x311diagonalizeERS_fi: # @_ZN11btMatrix3x311diagonalizeERS_fi @@ -1632,15 +1622,15 @@ _ZN11btMatrix3x311diagonalizeERS_fi: # @_ZN11btMatrix3x311diagonalizeERS_fi addi.d $a3, $a1, 16 addi.d $a4, $a1, 32 ori $a5, $zero, 2 - pcalau12i $a6, %pc_hi20(.LCPI13_1) - fld.s $fa1, $a6, %pc_lo12(.LCPI13_1) + lu12i.w $a6, 313856 + movgr2fr.w $fa1, $a6 vldi $vr2, -1168 movgr2fr.w $fa3, $zero ori $a6, $zero, 1 vldi $vr4, -1184 vldi $vr5, -1280 vldi $vr6, -1056 - pcalau12i $a7, %pc_hi20(.LCPI13_0) + lu12i.w $a7, 212992 b .LBB13_4 .p2align 4, , 16 .LBB13_2: # in Loop: Header=BB13_4 Depth=1 @@ -1742,7 +1732,7 @@ _ZN11btMatrix3x311diagonalizeERS_fi: # @_ZN11btMatrix3x311diagonalizeERS_fi fcmp.cult.s $fcc0, $ft0, $fa7 bcnez $fcc0, .LBB13_9 # %bb.8: # in Loop: Header=BB13_4 Depth=1 - fld.s $ft1, $a7, %pc_lo12(.LCPI13_0) + movgr2fr.w $ft1, $a7 fmul.s $ft0, $ft0, $ft1 fcmp.cle.s $fcc0, $fa7, $ft0 ori $a2, $zero, 1 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConeShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConeShape.s index efc3f3e7..d6240379 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConeShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConeShape.s @@ -183,12 +183,7 @@ _ZN12btConeShapeXC2Eff: # @_ZN12btConeShapeXC2Eff .size _ZN12btConeShapeXC2Eff, .Lfunc_end4-_ZN12btConeShapeXC2Eff .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK11btConeShape16coneLocalSupportERK9btVector3 -.LCPI5_0: - .word 0x34000000 # float 1.1920929E-7 - .text - .globl _ZNK11btConeShape16coneLocalSupportERK9btVector3 + .globl _ZNK11btConeShape16coneLocalSupportERK9btVector3 # -- Begin function _ZNK11btConeShape16coneLocalSupportERK9btVector3 .p2align 5 .type _ZNK11btConeShape16coneLocalSupportERK9btVector3,@function _ZNK11btConeShape16coneLocalSupportERK9btVector3: # @_ZNK11btConeShape16coneLocalSupportERK9btVector3 @@ -225,11 +220,11 @@ _ZNK11btConeShape16coneLocalSupportERK9btVector3: # @_ZNK11btConeShape16coneLoca slli.d $a5, $a3, 2 fldx.s $fa1, $a1, $a5 fldx.s $fa2, $a1, $a4 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - fld.s $fa4, $a1, %pc_lo12(.LCPI5_0) fmul.s $fa3, $fa1, $fa1 fmadd.s $fa3, $fa2, $fa2, $fa3 fsqrt.s $fa3, $fa3 + lu12i.w $a1, 212992 + movgr2fr.w $fa4, $a1 fcmp.cule.s $fcc0, $fa3, $fa4 bcnez $fcc0, .LBB5_4 # %bb.3: @@ -260,12 +255,7 @@ _ZNK11btConeShape16coneLocalSupportERK9btVector3: # @_ZNK11btConeShape16coneLoca .Lfunc_end5: .size _ZNK11btConeShape16coneLocalSupportERK9btVector3, .Lfunc_end5-_ZNK11btConeShape16coneLocalSupportERK9btVector3 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK11btConeShape37localGetSupportingVertexWithoutMarginERK9btVector3 -.LCPI6_0: - .word 0x34000000 # float 1.1920929E-7 - .text - .globl _ZNK11btConeShape37localGetSupportingVertexWithoutMarginERK9btVector3 + .globl _ZNK11btConeShape37localGetSupportingVertexWithoutMarginERK9btVector3 # -- Begin function _ZNK11btConeShape37localGetSupportingVertexWithoutMarginERK9btVector3 .p2align 5 .type _ZNK11btConeShape37localGetSupportingVertexWithoutMarginERK9btVector3,@function _ZNK11btConeShape37localGetSupportingVertexWithoutMarginERK9btVector3: # @_ZNK11btConeShape37localGetSupportingVertexWithoutMarginERK9btVector3 @@ -302,11 +292,11 @@ _ZNK11btConeShape37localGetSupportingVertexWithoutMarginERK9btVector3: # @_ZNK11 slli.d $a5, $a3, 2 fldx.s $fa1, $a1, $a5 fldx.s $fa2, $a1, $a4 - pcalau12i $a1, %pc_hi20(.LCPI6_0) - fld.s $fa4, $a1, %pc_lo12(.LCPI6_0) fmul.s $fa3, $fa1, $fa1 fmadd.s $fa3, $fa2, $fa2, $fa3 fsqrt.s $fa3, $fa3 + lu12i.w $a1, 212992 + movgr2fr.w $fa4, $a1 fcmp.cule.s $fcc0, $fa3, $fa4 bcnez $fcc0, .LBB6_4 # %bb.3: @@ -337,12 +327,7 @@ _ZNK11btConeShape37localGetSupportingVertexWithoutMarginERK9btVector3: # @_ZNK11 .Lfunc_end6: .size _ZNK11btConeShape37localGetSupportingVertexWithoutMarginERK9btVector3, .Lfunc_end6-_ZNK11btConeShape37localGetSupportingVertexWithoutMarginERK9btVector3 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK11btConeShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i -.LCPI7_0: - .word 0x34000000 # float 1.1920929E-7 - .text - .globl _ZNK11btConeShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i + .globl _ZNK11btConeShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i # -- Begin function _ZNK11btConeShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i .p2align 5 .type _ZNK11btConeShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i,@function _ZNK11btConeShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i: # @_ZNK11btConeShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i @@ -350,11 +335,11 @@ _ZNK11btConeShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector blez $a3, .LBB7_9 # %bb.1: # %.lr.ph addi.d $sp, $sp, -16 - pcalau12i $a4, %pc_hi20(.LCPI7_0) - fld.s $fa0, $a4, %pc_lo12(.LCPI7_0) - vldi $vr1, -1184 + vldi $vr0, -1184 addi.d $a4, $sp, 0 - movgr2fr.w $fa2, $zero + movgr2fr.w $fa1, $zero + lu12i.w $a5, 212992 + movgr2fr.w $fa2, $a5 b .LBB7_5 .p2align 4, , 16 .LBB7_2: # in Loop: Header=BB7_5 Depth=1 @@ -362,7 +347,7 @@ _ZNK11btConeShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector .LBB7_3: # in Loop: Header=BB7_5 Depth=1 stx.w $zero, $a7, $a4 fstx.s $fa3, $a5, $a4 - fmov.s $fa3, $fa2 + fmov.s $fa3, $fa1 .LBB7_4: # %_ZNK11btConeShape16coneLocalSupportERK9btVector3.exit # in Loop: Header=BB7_5 Depth=1 slli.d $a5, $a6, 2 @@ -375,7 +360,7 @@ _ZNK11btConeShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector beqz $a3, .LBB7_8 .LBB7_5: # =>This Inner Loop Header: Depth=1 fld.s $fa3, $a0, 72 - fmul.s $fa3, $fa3, $fa1 + fmul.s $fa3, $fa3, $fa0 ld.w $a5, $a0, 80 fld.s $fa4, $a1, 4 fld.s $fa5, $a1, 0 @@ -400,7 +385,7 @@ _ZNK11btConeShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector fmul.s $fa6, $fa4, $fa4 fmadd.s $fa6, $fa5, $fa5, $fa6 fsqrt.s $fa6, $fa6 - fcmp.cule.s $fcc0, $fa6, $fa0 + fcmp.cule.s $fcc0, $fa6, $fa2 fneg.s $fa3, $fa3 bcnez $fcc0, .LBB7_3 # %bb.7: # in Loop: Header=BB7_5 Depth=1 @@ -418,14 +403,7 @@ _ZNK11btConeShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector .Lfunc_end7: .size _ZNK11btConeShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i, .Lfunc_end7-_ZNK11btConeShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK11btConeShape24localGetSupportingVertexERK9btVector3 -.LCPI8_0: - .word 0x34000000 # float 1.1920929E-7 -.LCPI8_1: - .word 0x28800000 # float 1.42108547E-14 - .text - .globl _ZNK11btConeShape24localGetSupportingVertexERK9btVector3 + .globl _ZNK11btConeShape24localGetSupportingVertexERK9btVector3 # -- Begin function _ZNK11btConeShape24localGetSupportingVertexERK9btVector3 .p2align 5 .type _ZNK11btConeShape24localGetSupportingVertexERK9btVector3,@function _ZNK11btConeShape24localGetSupportingVertexERK9btVector3: # @_ZNK11btConeShape24localGetSupportingVertexERK9btVector3 @@ -489,11 +467,11 @@ _ZNK11btConeShape24localGetSupportingVertexERK9btVector3: # @_ZNK11btConeShape24 slli.d $a3, $a1, 2 fldx.s $fa1, $s0, $a3 fldx.s $fa2, $s0, $a2 - pcalau12i $a3, %pc_hi20(.LCPI8_0) - fld.s $fa4, $a3, %pc_lo12(.LCPI8_0) fmul.s $fa3, $fa1, $fa1 fmadd.s $fa3, $fa2, $fa2, $fa3 fsqrt.s $fa3, $fa3 + lu12i.w $a3, 212992 + movgr2fr.w $fa4, $a3 fcmp.cule.s $fcc0, $fa3, $fa4 bcnez $fcc0, .LBB8_4 # %bb.3: @@ -530,28 +508,28 @@ _ZNK11btConeShape24localGetSupportingVertexERK9btVector3: # @_ZNK11btConeShape24 # %bb.7: movgr2fr.w $fs0, $s1 movgr2fr.w $fs1, $s3 - movgr2fr.w $fs2, $s2 - fld.s $fa0, $s0, 0 - fld.s $fa1, $s0, 4 + fld.s $fa0, $s0, 4 + fld.s $fa1, $s0, 0 fld.s $fa2, $s0, 8 - pcalau12i $a0, %pc_hi20(.LCPI8_1) - fld.s $fa3, $a0, %pc_lo12(.LCPI8_1) - fmul.s $fa4, $fa1, $fa1 - fmadd.s $fa4, $fa0, $fa0, $fa4 - fmadd.s $fa4, $fa2, $fa2, $fa4 - fcmp.clt.s $fcc0, $fa4, $fa3 + movgr2fr.w $fs2, $s2 + fmul.s $fa3, $fa0, $fa0 + fmadd.s $fa3, $fa1, $fa1, $fa3 + fmadd.s $fa3, $fa2, $fa2, $fa3 + lu12i.w $a0, 165888 + movgr2fr.w $fa4, $a0 + fcmp.clt.s $fcc0, $fa3, $fa4 vldi $vr3, -1040 - fsel $fa0, $fa0, $fa3, $fcc0 fsel $fa1, $fa1, $fa3, $fcc0 + fsel $fa0, $fa0, $fa3, $fcc0 fsel $fa2, $fa2, $fa3, $fcc0 - fmul.s $fa3, $fa1, $fa1 + fmul.s $fa3, $fa0, $fa0 ld.d $a0, $fp, 0 - fmadd.s $fa3, $fa0, $fa0, $fa3 + fmadd.s $fa3, $fa1, $fa1, $fa3 fmadd.s $fa3, $fa2, $fa2, $fa3 frsqrt.s $fa3, $fa3 ld.d $a1, $a0, 88 - fmul.s $fs3, $fa0, $fa3 - fmul.s $fs4, $fa1, $fa3 + fmul.s $fs3, $fa1, $fa3 + fmul.s $fs4, $fa0, $fa3 fmul.s $fs5, $fa2, $fa3 move $a0, $fp jirl $ra, $a1, 0 @@ -703,12 +681,8 @@ _ZNK21btConvexInternalShape15getLocalScalingEv: # @_ZNK21btConvexInternalShape15 .Lfunc_end11: .size _ZNK21btConvexInternalShape15getLocalScalingEv, .Lfunc_end11-_ZNK21btConvexInternalShape15getLocalScalingEv # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK11btConeShape21calculateLocalInertiaEfR9btVector3 -.LCPI12_0: - .word 0x3daaaaaa # float 0.0833333284 .section .text._ZNK11btConeShape21calculateLocalInertiaEfR9btVector3,"axG",@progbits,_ZNK11btConeShape21calculateLocalInertiaEfR9btVector3,comdat - .weak _ZNK11btConeShape21calculateLocalInertiaEfR9btVector3 + .weak _ZNK11btConeShape21calculateLocalInertiaEfR9btVector3 # -- Begin function _ZNK11btConeShape21calculateLocalInertiaEfR9btVector3 .p2align 5 .type _ZNK11btConeShape21calculateLocalInertiaEfR9btVector3,@function _ZNK11btConeShape21calculateLocalInertiaEfR9btVector3: # @_ZNK11btConeShape21calculateLocalInertiaEfR9btVector3 @@ -772,11 +746,12 @@ _ZNK11btConeShape21calculateLocalInertiaEfR9btVector3: # @_ZNK11btConeShape21cal fadd.s $fa2, $fa2, $fa2 fadd.s $fa0, $fs3, $fa0 fadd.s $fa0, $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI12_0) - fld.s $fa3, $a0, %pc_lo12(.LCPI12_0) fmul.s $fa1, $fa1, $fa1 fmul.s $fa2, $fa2, $fa2 fmul.s $fa0, $fa0, $fa0 + lu12i.w $a0, 252586 + ori $a0, $a0, 2730 + movgr2fr.w $fa3, $a0 fmul.s $fa3, $fs0, $fa3 fadd.s $fa4, $fa2, $fa0 fadd.s $fa0, $fa1, $fa0 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConeTwistConstraint.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConeTwistConstraint.s index 203dd349..adb475e8 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConeTwistConstraint.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConeTwistConstraint.s @@ -288,18 +288,7 @@ _ZN21btConeTwistConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E: # .size _ZN21btConeTwistConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E, .Lfunc_end4-_ZN21btConeTwistConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_ -.LCPI5_0: - .word 0x34000000 # float 1.1920929E-7 -.LCPI5_1: - .word 0xbf7ffffe # float -0.99999988 -.LCPI5_2: - .word 0x3f7ffffe # float 0.99999988 -.LCPI5_3: - .word 0x40490fdb # float 3.14159274 - .text - .globl _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_ + .globl _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_ # -- Begin function _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_ .p2align 5 .type _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_,@function _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_: # @_ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_ @@ -665,11 +654,11 @@ _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_: fst.s $fa1, $fp, 516 pcaddu18i $ra, %call36(acosf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI5_0) fadd.s $fa0, $fa0, $fa0 - fabs.s $fa2, $fa0 - fcmp.clt.s $fcc0, $fa2, $fa1 + fabs.s $fa1, $fa0 + lu12i.w $a0, 212992 + movgr2fr.w $fa2, $a0 + fcmp.clt.s $fcc0, $fa1, $fa2 fst.s $fa0, $fp, 552 bcnez $fcc0, .LBB5_59 # %bb.3: @@ -845,13 +834,15 @@ _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_: frsqrt.s $fa5, $fa0 fmul.s $fa0, $ft1, $fa5 fmul.s $fa3, $ft2, $fa5 - fmul.s $fa4, $fa4, $fa5 - pcalau12i $a0, %pc_hi20(.LCPI5_1) - fld.s $fa6, $a0, %pc_lo12(.LCPI5_1) - fmul.s $fa5, $fa3, $fa1 - fadd.s $fa5, $fa0, $fa5 - fmadd.s $fa5, $fa4, $fa1, $fa5 - fcmp.clt.s $fcc0, $fa5, $fa6 + fmul.s $fa5, $fa4, $fa5 + fmul.s $fa4, $fa3, $fa1 + fadd.s $fa4, $fa0, $fa4 + fmadd.s $fa4, $fa5, $fa1, $fa4 + lu12i.w $a0, -264193 + ori $a0, $a0, 4094 + lu32i.d $a0, 0 + movgr2fr.w $fa6, $a0 + fcmp.clt.s $fcc0, $fa4, $fa6 bceqz $fcc0, .LBB5_6 # %bb.5: vldi $vr3, -1168 @@ -861,11 +852,11 @@ _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_: movgr2fr.w $fa1, $zero fneg.s $fa2, $fa1 fmul.s $fa6, $fa3, $fa2 - fmadd.s $fs0, $fa4, $fa1, $fa6 - fmsub.s $fs1, $fa0, $fa1, $fa4 + fmadd.s $fs0, $fa5, $fa1, $fa6 + fmsub.s $fs1, $fa0, $fa1, $fa5 fmul.s $fa2, $fa0, $fa2 vldi $vr0, -1168 - fadd.s $fa0, $fa5, $fa0 + fadd.s $fa0, $fa4, $fa0 fadd.s $fa1, $fa0, $fa0 fsqrt.s $fa0, $fa1 fcmp.cor.s $fcc0, $fa0, $fa0 @@ -911,9 +902,9 @@ _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_: fmov.s $fa0, $ft12 pcaddu18i $ra, %call36(acosf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.s $fa3, $a0, %pc_lo12(.LCPI5_0) fadd.s $fa4, $fa0, $fa0 + lu12i.w $a0, 212992 + movgr2fr.w $fa3, $a0 fcmp.cule.s $fcc0, $fa4, $fa3 bcnez $fcc0, .LBB5_23 # %bb.11: @@ -1029,9 +1020,9 @@ _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_: fcmp.cule.s $fcc2, $fa0, $ft15 bcnez $fcc2, .LBB5_18 # %bb.15: - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI5_0) fabs.s $fa1, $fs5 + lu12i.w $a0, 212992 + movgr2fr.w $fa0, $a0 fcmp.cule.s $fcc0, $fa0, $fa1 fld.s $fs5, $sp, 192 # 4-byte Folded Reload bcnez $fcc0, .LBB5_17 @@ -1074,12 +1065,12 @@ _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_: fmadd.s $fa1, $fs3, $ft13, $fa1 fld.s $fa3, $sp, 76 # 4-byte Folded Reload fmadd.s $fa1, $fs6, $fa3, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI5_0) bcnez $fcc1, .LBB5_24 # %bb.19: - fld.s $fa7, $a0, %pc_lo12(.LCPI5_0) - fabs.s $fa2, $fs5 - fcmp.clt.s $fcc0, $fa2, $fa7 + fabs.s $fa7, $fs5 + lu12i.w $a0, 212992 + movgr2fr.w $fa2, $a0 + fcmp.clt.s $fcc0, $fa7, $fa2 bcnez $fcc0, .LBB5_47 # %bb.20: ori $a0, $zero, 1 @@ -1147,9 +1138,10 @@ _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_: fmov.s $ft2, $fs2 b .LBB5_30 .LBB5_24: - fld.s $fa0, $a0, %pc_lo12(.LCPI5_0) - fabs.s $fa2, $fs4 - fcmp.clt.s $fcc1, $fa2, $fa0 + fabs.s $fa0, $fs4 + lu12i.w $a0, 212992 + movgr2fr.w $fa2, $a0 + fcmp.clt.s $fcc1, $fa0, $fa2 bcnez $fcc1, .LBB5_47 # %bb.25: ori $a0, $zero, 1 @@ -1211,8 +1203,9 @@ _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_: vldi $vr4, -1168 bcnez $fcc0, .LBB5_34 # %bb.32: - pcalau12i $a0, %pc_hi20(.LCPI5_2) - fld.s $ft1, $a0, %pc_lo12(.LCPI5_2) + lu12i.w $a0, 260095 + ori $a0, $a0, 4094 + movgr2fr.w $ft1, $a0 fcmp.cule.s $fcc0, $ft1, $fa7 fld.s $ft1, $sp, 72 # 4-byte Folded Reload bcnez $fcc0, .LBB5_35 @@ -1493,10 +1486,11 @@ _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_: fmov.s $fa0, $fs0 pcaddu18i $ra, %call36(acosf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_3) - fld.s $fa1, $a0, %pc_lo12(.LCPI5_3) fadd.s $fa3, $fa0, $fa0 - fcmp.cule.s $fcc0, $fa3, $fa1 + lu12i.w $a0, 263312 + ori $a0, $a0, 4059 + movgr2fr.w $fa0, $a0 + fcmp.cule.s $fcc0, $fa3, $fa0 fst.s $fa3, $fp, 560 bcnez $fcc0, .LBB5_50 # %bb.49: @@ -1509,8 +1503,8 @@ _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_: fadd.s $fa3, $fa0, $fa0 fst.s $fa3, $fp, 560 .LBB5_50: - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI5_0) + lu12i.w $a0, 212992 + movgr2fr.w $fa0, $a0 fcmp.cule.s $fcc0, $fa3, $fa0 bcnez $fcc0, .LBB5_52 # %bb.51: @@ -1538,8 +1532,9 @@ _ZN21btConeTwistConstraint14calcAngleInfo2ERK11btTransformS2_RK11btMatrix3x3S5_: vldi $vr3, -1168 bcnez $fcc0, .LBB5_56 # %bb.54: - pcalau12i $a0, %pc_hi20(.LCPI5_2) - fld.s $fa7, $a0, %pc_lo12(.LCPI5_2) + lu12i.w $a0, 260095 + ori $a0, $a0, 4094 + movgr2fr.w $fa7, $a0 fcmp.cule.s $fcc0, $fa7, $fa4 bcnez $fcc0, .LBB5_56 # %bb.55: @@ -2094,15 +2089,9 @@ _ZN21btConeTwistConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstrai .size _ZN21btConeTwistConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK11btMatrix3x3S8_, .Lfunc_end8-_ZN21btConeTwistConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK11btMatrix3x3S8_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN21btConeTwistConstraint13buildJacobianEv -.LCPI9_0: - .word 0x34000000 # float 1.1920929E-7 -.LCPI9_2: - .word 0x3f3504f3 # float 0.707106769 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI9_1: + .p2align 4, 0x0 # -- Begin function _ZN21btConeTwistConstraint13buildJacobianEv +.LCPI9_0: .word 0x3f800000 # float 1 .word 0x00000000 # float 0 .word 0x00000000 # float 0 @@ -2201,11 +2190,11 @@ _ZN21btConeTwistConstraint13buildJacobianEv: # @_ZN21btConeTwistConstraint13buil fsub.s $fa6, $ft13, $fa0 fsub.s $ft0, $ft14, $fa1 fsub.s $ft1, $ft15, $fa2 - pcalau12i $a3, %pc_hi20(.LCPI9_0) - fld.s $ft2, $a3, %pc_lo12(.LCPI9_0) fmul.s $fa7, $ft0, $ft0 fmadd.s $fa7, $fa6, $fa6, $fa7 fmadd.s $fa7, $ft1, $ft1, $fa7 + lu12i.w $a3, 212992 + movgr2fr.w $ft2, $a3 fcmp.cule.s $fcc0, $fa7, $ft2 bcnez $fcc0, .LBB9_5 # %bb.3: @@ -2224,17 +2213,18 @@ _ZN21btConeTwistConstraint13buildJacobianEv: # @_ZN21btConeTwistConstraint13buil .LBB9_4: ret .LBB9_5: - pcalau12i $a3, %pc_hi20(.LCPI9_1) - vld $vr6, $a3, %pc_lo12(.LCPI9_1) + pcalau12i $a3, %pc_hi20(.LCPI9_0) + vld $vr6, $a3, %pc_lo12(.LCPI9_0) vst $vr6, $sp, 32 movgr2fr.w $ft0, $zero vldi $vr7, -1168 fmov.s $ft1, $ft0 .LBB9_6: - pcalau12i $a3, %pc_hi20(.LCPI9_2) - fld.s $fa6, $a3, %pc_lo12(.LCPI9_2) - fabs.s $ft2, $ft1 - fcmp.cule.s $fcc0, $ft2, $fa6 + fabs.s $fa6, $ft1 + lu12i.w $a3, 258896 + ori $a3, $a3, 1267 + movgr2fr.w $ft2, $a3 + fcmp.cule.s $fcc0, $fa6, $ft2 bcnez $fcc0, .LBB9_8 # %bb.7: fmul.s $fa6, $ft1, $ft1 @@ -2444,14 +2434,7 @@ _ZN21btConeTwistConstraint13buildJacobianEv: # @_ZN21btConeTwistConstraint13buil .size _ZN21btConeTwistConstraint13buildJacobianEv, .Lfunc_end9-_ZN21btConeTwistConstraint13buildJacobianEv .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f -.LCPI10_0: - .word 0xbe99999a # float -0.300000012 -.LCPI10_1: - .word 0x34000000 # float 1.1920929E-7 - .text - .globl _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f + .globl _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f # -- Begin function _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f .p2align 5 .type _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f,@function _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f @@ -2513,7 +2496,7 @@ _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN2 fmul.s $fa1, $fa4, $fa1 fmadd.s $fa1, $fa3, $fa2, $fa1 fmadd.s $fa1, $fa5, $fa6, $fa1 - fadd.s $ft3, $fa7, $fa1 + fadd.s $ft2, $fa7, $fa1 fld.s $fa1, $a3, 44 fld.s $fa3, $a3, 40 fld.s $fa5, $a3, 48 @@ -2521,7 +2504,7 @@ _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN2 fmul.s $fa1, $fa4, $fa1 fmadd.s $fa1, $fa3, $fa2, $fa1 fmadd.s $fa1, $fa5, $fa6, $fa1 - fadd.s $ft4, $ft7, $fa1 + fadd.s $ft3, $ft7, $fa1 fld.s $fa1, $a0, 8 fld.s $fa2, $a0, 12 fld.s $fa5, $fp, 464 @@ -2534,7 +2517,7 @@ _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN2 fmadd.s $fa1, $fa4, $ft0, $fa1 fld.s $fa2, $a0, 28 fld.s $fa4, $a0, 24 - fadd.s $ft2, $fa1, $fa3 + fadd.s $ft4, $fa1, $fa3 fld.s $fa1, $a0, 32 fmul.s $fa2, $fa5, $fa2 fmadd.s $fa2, $fa4, $fa6, $fa2 @@ -2551,9 +2534,9 @@ _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN2 fadd.s $ft6, $fa5, $fa1 ld.d $a4, $a1, 72 fsub.s $fa0, $ft1, $fa0 - fsub.s $fa1, $ft3, $fa7 + fsub.s $fa1, $ft2, $fa7 movgr2fr.w $ft0, $zero - fsub.s $fa2, $ft4, $ft7 + fsub.s $fa2, $ft3, $ft7 fmov.s $ft7, $ft0 fmov.s $fa7, $ft0 fmov.s $fa6, $ft0 @@ -2591,7 +2574,7 @@ _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN2 fadd.s $ft7, $ft7, $ft8 .LBB10_4: # %_ZNK12btSolverBody31getVelocityInLocalPointObsoleteERK9btVector3RS0_.exit ld.d $a4, $a2, 72 - fsub.s $fa3, $ft2, $fa3 + fsub.s $fa3, $ft4, $fa3 fsub.s $fa4, $ft5, $fa4 fsub.s $fa5, $ft6, $fa5 fmov.s $ft8, $ft0 @@ -2633,11 +2616,13 @@ _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN2 fsub.s $fa6, $fa6, $ft9 fsub.s $fa7, $fa7, $ft8 fsub.s $ft0, $ft7, $ft0 - fsub.s $ft1, $ft1, $ft2 - pcalau12i $a5, %pc_hi20(.LCPI10_0) - fld.s $ft2, $a5, %pc_lo12(.LCPI10_0) - fsub.s $ft3, $ft3, $ft5 - fsub.s $ft4, $ft4, $ft6 + fsub.s $ft1, $ft1, $ft4 + fsub.s $ft2, $ft2, $ft5 + fsub.s $ft3, $ft3, $ft6 + lu12i.w $a5, -267879 + ori $a5, $a5, 2458 + lu32i.d $a5, 0 + movgr2fr.w $ft4, $a5 ori $a5, $zero, 252 .p2align 4, , 16 .LBB10_7: # =>This Inner Loop Header: Depth=1 @@ -2650,10 +2635,10 @@ _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN2 fmul.s $ft7, $fa7, $ft6 fmadd.s $ft7, $ft11, $fa6, $ft7 fmadd.s $ft7, $ft10, $ft0, $ft7 - fmul.s $ft8, $ft3, $ft6 + fmul.s $ft8, $ft2, $ft6 fmadd.s $ft8, $ft1, $ft11, $ft8 - fmadd.s $ft8, $ft4, $ft10, $ft8 - fmul.s $ft8, $ft8, $ft2 + fmadd.s $ft8, $ft3, $ft10, $ft8 + fmul.s $ft8, $ft8, $ft4 fdiv.s $ft8, $ft8, $fs0 fld.s $ft9, $fp, 40 fneg.s $ft12, $ft5 @@ -2857,8 +2842,8 @@ _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN2 b .LBB10_21 .LBB10_12: fld.s $fa0, $fp, 488 - pcalau12i $a4, %pc_hi20(.LCPI10_1) - fld.s $fa4, $a4, %pc_lo12(.LCPI10_1) + lu12i.w $a4, 212992 + movgr2fr.w $fa4, $a4 fcmp.cule.s $fcc0, $fa0, $fa4 bcnez $fcc0, .LBB10_32 # %bb.13: @@ -3470,11 +3455,11 @@ _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN2 fsub.s $fa3, $fa4, $fa3 fsub.s $fa4, $fa5, $fa6 fld.s $fa5, $sp, 312 - pcalau12i $a0, %pc_hi20(.LCPI10_1) - fld.s $fa6, $a0, %pc_lo12(.LCPI10_1) - fmul.s $ft0, $fa1, $fa1 - fmadd.s $ft0, $fa0, $fa0, $ft0 - fmadd.s $ft3, $fa2, $fa2, $ft0 + fmul.s $fa6, $fa1, $fa1 + fmadd.s $fa6, $fa0, $fa0, $fa6 + fmadd.s $ft3, $fa2, $fa2, $fa6 + lu12i.w $a0, 212992 + movgr2fr.w $fa6, $a0 fcmp.cule.s $fcc0, $ft3, $fa6 fsub.s $fa5, $fa7, $fa5 fmov.s $ft2, $fs1 @@ -4098,16 +4083,8 @@ _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN2 .size _ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f, .Lfunc_end10-_ZN21btConeTwistConstraint23solveConstraintObsoleteER12btSolverBodyS1_f .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ -.LCPI11_0: - .word 0x3f490fdb # float 0.785398185 -.LCPI11_1: - .word 0x3a83126f # float 0.00100000005 -.LCPI11_2: - .word 0xbcaaaaab # float -0.020833334 .section .text._ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_,"axG",@progbits,_ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_,comdat - .weak _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ + .weak _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ # -- Begin function _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ .p2align 5 .type _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_,@function _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_: # @_ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ @@ -4160,22 +4137,26 @@ _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_: # fmadd.s $fa0, $fs1, $fs1, $fa0 fmadd.s $fa0, $fs3, $fs3, $fa0 fsqrt.s $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI11_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI11_0) - fmul.s $fa2, $fs0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI11_1) - fld.s $fa3, $a0, %pc_lo12(.LCPI11_1) - fcmp.clt.s $fcc0, $fa1, $fa2 - fdiv.s $fa1, $fa1, $fs0 - fsel $fs4, $fa0, $fa1, $fcc0 - fcmp.cule.s $fcc0, $fa3, $fs4 + fmul.s $fa1, $fs0, $fa0 + lu12i.w $a0, 259216 + ori $a0, $a0, 4059 + movgr2fr.w $fa2, $a0 + fdiv.s $fa3, $fa2, $fs0 + fcmp.clt.s $fcc0, $fa2, $fa1 + fsel $fs4, $fa0, $fa3, $fcc0 + lu12i.w $a0, 239665 + ori $a0, $a0, 623 + movgr2fr.w $fa0, $a0 + fcmp.cule.s $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB11_2 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI11_2) - fld.s $fa0, $a0, %pc_lo12(.LCPI11_2) - fmul.s $fa1, $fs0, $fs0 - fmul.s $fa1, $fs0, $fa1 - fmul.s $fa0, $fa1, $fa0 + fmul.s $fa0, $fs0, $fs0 + fmul.s $fa0, $fs0, $fa0 + lu12i.w $a0, -275798 + ori $a0, $a0, 2731 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 + fmul.s $fa0, $fa0, $fa1 fmul.s $fa0, $fa0, $fs4 fmul.s $fa0, $fa0, $fs4 vldi $vr1, -1184 @@ -4306,22 +4287,7 @@ _ZN21btConeTwistConstraint9updateRHSEf: # @_ZN21btConeTwistConstraint9updateRHSE .Lfunc_end12: .size _ZN21btConeTwistConstraint9updateRHSEf, .Lfunc_end12-_ZN21btConeTwistConstraint9updateRHSEf # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN21btConeTwistConstraint13calcAngleInfoEv -.LCPI13_0: - .word 0x3d4ccccd # float 0.0500000007 -.LCPI13_1: - .word 0x3f490fdb # float 0.785398185 -.LCPI13_2: - .word 0xbf490fdb # float -0.785398185 -.LCPI13_3: - .word 0x4016cbe4 # float 2.3561945 -.LCPI13_4: - .word 0xbf7ffffe # float -0.99999988 -.LCPI13_5: - .word 0x3f3504f3 # float 0.707106769 - .text - .globl _ZN21btConeTwistConstraint13calcAngleInfoEv + .globl _ZN21btConeTwistConstraint13calcAngleInfoEv # -- Begin function _ZN21btConeTwistConstraint13calcAngleInfoEv .p2align 5 .type _ZN21btConeTwistConstraint13calcAngleInfoEv,@function _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calcAngleInfoEv @@ -4343,55 +4309,55 @@ _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calc fld.s $fa0, $a0, 364 fld.s $fa2, $a1, 12 fld.s $fa1, $a0, 348 - fld.s $ft3, $a0, 380 fld.s $fa3, $a1, 8 - fmul.s $fa6, $fa0, $fa2 + fld.s $ft3, $a0, 380 + fmul.s $fa5, $fa0, $fa2 fld.s $fa4, $a1, 16 - fld.s $fa5, $a1, 28 - fmadd.s $fa7, $fa3, $fa1, $fa6 - fld.s $fa6, $a1, 24 - fmadd.s $ft11, $fa4, $ft3, $fa7 - fmul.s $ft0, $fa0, $fa5 + fmadd.s $fa7, $fa3, $fa1, $fa5 + fld.s $fa6, $a1, 28 + fld.s $fa5, $a1, 24 + fmadd.s $ft10, $fa4, $ft3, $fa7 fld.s $fa7, $a1, 32 - fmadd.s $ft2, $fa6, $fa1, $ft0 - fld.s $ft1, $a1, 44 - fld.s $ft0, $a1, 40 - fmadd.s $ft12, $fa7, $ft3, $ft2 + fmul.s $ft0, $fa0, $fa6 + fmadd.s $ft1, $fa5, $fa1, $ft0 + fld.s $ft0, $a1, 44 + fmadd.s $ft11, $fa7, $ft3, $ft1 + fld.s $ft1, $a1, 40 fld.s $ft2, $a1, 48 - fmul.s $fa0, $fa0, $ft1 - fmadd.s $fa0, $ft0, $fa1, $fa0 + fmul.s $fa0, $fa0, $ft0 ld.d $a1, $a0, 32 - fmadd.s $ft13, $ft2, $ft3, $fa0 - fld.s $fa0, $a0, 412 - fld.s $fa1, $a0, 428 - fld.s $ft3, $a1, 12 - fld.s $ft4, $a1, 8 - fld.s $ft5, $a0, 444 + fmadd.s $fa0, $ft1, $fa1, $fa0 + fmadd.s $ft12, $ft2, $ft3, $fa0 + fld.s $fa0, $a0, 428 + fld.s $fa1, $a1, 12 + fld.s $ft3, $a0, 412 + fld.s $ft4, $a0, 444 + fld.s $ft5, $a1, 8 + fmul.s $fa1, $fa0, $fa1 fld.s $ft6, $a1, 16 - fmul.s $ft3, $fa1, $ft3 - fmadd.s $ft3, $ft4, $fa0, $ft3 - fld.s $ft4, $a1, 28 - fmadd.s $fs3, $ft6, $ft5, $ft3 - fld.s $ft3, $a1, 24 - fld.s $ft6, $a1, 32 - fmul.s $ft4, $fa1, $ft4 - fld.s $ft7, $a1, 44 - fmadd.s $ft3, $ft3, $fa0, $ft4 - fmadd.s $fs4, $ft6, $ft5, $ft3 - fld.s $ft3, $a1, 40 - fmul.s $ft4, $fa1, $ft7 - fld.s $ft6, $a1, 48 + fld.s $ft7, $a1, 28 + fld.s $ft8, $a1, 24 + fmadd.s $fa1, $ft5, $ft3, $fa1 + fmadd.s $fs3, $ft6, $ft4, $fa1 + fmul.s $fa1, $fa0, $ft7 + fmadd.s $fa1, $ft8, $ft3, $fa1 + fld.s $ft5, $a1, 32 + fld.s $ft6, $a1, 44 + fld.s $ft7, $a1, 40 + fld.s $ft8, $a1, 48 + fmadd.s $fs4, $ft5, $ft4, $fa1 + fmul.s $fa0, $fa0, $ft6 + fmadd.s $fa0, $ft7, $ft3, $fa0 + fmadd.s $fs5, $ft8, $ft4, $fa0 fld.s $fa1, $a0, 492 - pcalau12i $a2, %pc_hi20(.LCPI13_0) - fld.s $ft14, $a2, %pc_lo12(.LCPI13_0) - fmadd.s $fa0, $ft3, $fa0, $ft4 - fmadd.s $fs5, $ft6, $ft5, $fa0 movgr2fr.w $ft3, $zero - fcmp.cult.s $fcc0, $fa1, $ft14 - fmul.s $fa0, $ft12, $fs4 - # implicit-def: $f12 - # kill: killed $f12 + lu12i.w $a2, 251084 + ori $a2, $a2, 3277 + movgr2fr.w $ft13, $a2 + fcmp.cult.s $fcc0, $fa1, $ft13 + fmul.s $fa0, $ft11, $fs4 # implicit-def: $f23 + # implicit-def: $f22 # implicit-def: $f12 # kill: killed $f12 fmov.s $ft4, $ft3 @@ -4402,41 +4368,42 @@ _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calc fld.s $ft6, $a0, 384 fmul.s $ft7, $fa2, $ft4 fmadd.s $ft7, $fa3, $ft5, $ft7 - fmadd.s $ft15, $fa4, $ft6, $ft7 - fmul.s $ft7, $fa5, $ft4 - fmadd.s $ft7, $fa6, $ft5, $ft7 + fmadd.s $ft14, $fa4, $ft6, $ft7 + fmul.s $ft7, $fa6, $ft4 + fmadd.s $ft7, $fa5, $ft5, $ft7 fmadd.s $ft7, $fa7, $ft6, $ft7 - fmul.s $ft4, $ft1, $ft4 - fmadd.s $ft4, $ft0, $ft5, $ft4 - fmadd.s $ft6, $ft2, $ft6, $ft4 - fmadd.s $ft4, $fs3, $ft11, $fa0 - fmadd.s $ft4, $fs5, $ft13, $ft4 - fst.s $ft7, $sp, 56 # 4-byte Folded Spill + fmul.s $ft4, $ft0, $ft4 + fmadd.s $ft4, $ft1, $ft5, $ft4 + fmadd.s $ft15, $ft2, $ft6, $ft4 + fmadd.s $ft4, $fs3, $ft10, $fa0 + fmadd.s $ft4, $fs5, $ft12, $ft4 + fst.s $ft7, $sp, 60 # 4-byte Folded Spill fmul.s $ft5, $fs4, $ft7 - fmadd.s $ft5, $fs3, $ft15, $ft5 - fst.s $ft6, $sp, 60 # 4-byte Folded Spill - fmadd.s $ft5, $fs5, $ft6, $ft5 + fmadd.s $ft5, $fs3, $ft14, $ft5 + fmadd.s $ft5, $fs5, $ft15, $ft5 movgr2fr.w $ft6, $zero fcmp.cult.s $fcc0, $ft4, $ft6 fabs.s $ft7, $ft5 bceqz $fcc0, .LBB13_3 # %bb.2: - pcalau12i $a2, %pc_hi20(.LCPI13_3) - fld.s $ft8, $a2, %pc_lo12(.LCPI13_3) - pcalau12i $a2, %pc_hi20(.LCPI13_2) - fld.s $ft9, $a2, %pc_lo12(.LCPI13_2) - fadd.s $ft10, $ft4, $ft7 + fadd.s $ft8, $ft4, $ft7 fsub.s $ft7, $ft7, $ft4 + fdiv.s $ft7, $ft8, $ft7 + lu12i.w $a2, 262508 + ori $a2, $a2, 3044 b .LBB13_4 .LBB13_3: - pcalau12i $a2, %pc_hi20(.LCPI13_1) - fld.s $ft8, $a2, %pc_lo12(.LCPI13_1) - pcalau12i $a2, %pc_hi20(.LCPI13_2) - fld.s $ft9, $a2, %pc_lo12(.LCPI13_2) - fsub.s $ft10, $ft4, $ft7 + fsub.s $ft8, $ft4, $ft7 fadd.s $ft7, $ft4, $ft7 + fdiv.s $ft7, $ft8, $ft7 + lu12i.w $a2, 259216 + ori $a2, $a2, 4059 .LBB13_4: # %_Z11btAtan2Fastff.exit - fdiv.s $ft7, $ft10, $ft7 + movgr2fr.w $ft8, $a2 + lu12i.w $a2, -265072 + ori $a2, $a2, 4059 + lu32i.d $a2, 0 + movgr2fr.w $ft9, $a2 fmadd.s $ft7, $ft7, $ft9, $ft8 fneg.s $ft8, $ft7 fcmp.clt.s $fcc0, $ft5, $ft6 @@ -4452,7 +4419,7 @@ _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calc fmul.s $ft4, $ft4, $ft6 .LBB13_5: fld.s $ft5, $a0, 496 - fcmp.cult.s $fcc0, $ft5, $ft14 + fcmp.cult.s $fcc0, $ft5, $ft13 # implicit-def: $f14 # implicit-def: $f16 # implicit-def: $f17 @@ -4464,14 +4431,14 @@ _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calc fmul.s $fa2, $fa2, $ft3 fmadd.s $fa2, $fa3, $ft6, $fa2 fmadd.s $ft8, $fa4, $ft7, $fa2 - fmul.s $fa2, $fa5, $ft3 - fmadd.s $fa2, $fa6, $ft6, $fa2 + fmul.s $fa2, $fa6, $ft3 + fmadd.s $fa2, $fa5, $ft6, $fa2 fmadd.s $ft9, $fa7, $ft7, $fa2 - fmul.s $fa2, $ft1, $ft3 - fmadd.s $fa2, $ft0, $ft6, $fa2 + fmul.s $fa2, $ft0, $ft3 + fmadd.s $fa2, $ft1, $ft6, $fa2 fmadd.s $ft6, $ft2, $ft7, $fa2 - fmadd.s $fa2, $fs3, $ft11, $fa0 - fmadd.s $fa2, $fs5, $ft13, $fa2 + fmadd.s $fa2, $fs3, $ft10, $fa0 + fmadd.s $fa2, $fs5, $ft12, $fa2 fmul.s $fa3, $fs4, $ft9 fmadd.s $fa3, $fs3, $ft8, $fa3 fmadd.s $fa3, $fs5, $ft6, $fa3 @@ -4480,22 +4447,24 @@ _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calc fabs.s $fa5, $fa3 bceqz $fcc0, .LBB13_8 # %bb.7: - pcalau12i $a2, %pc_hi20(.LCPI13_3) - fld.s $fa6, $a2, %pc_lo12(.LCPI13_3) - pcalau12i $a2, %pc_hi20(.LCPI13_2) - fld.s $fa7, $a2, %pc_lo12(.LCPI13_2) - fadd.s $ft0, $fa2, $fa5 + fadd.s $fa6, $fa2, $fa5 fsub.s $fa5, $fa5, $fa2 + fdiv.s $fa5, $fa6, $fa5 + lu12i.w $a2, 262508 + ori $a2, $a2, 3044 b .LBB13_9 .LBB13_8: - pcalau12i $a2, %pc_hi20(.LCPI13_1) - fld.s $fa6, $a2, %pc_lo12(.LCPI13_1) - pcalau12i $a2, %pc_hi20(.LCPI13_2) - fld.s $fa7, $a2, %pc_lo12(.LCPI13_2) - fsub.s $ft0, $fa2, $fa5 + fsub.s $fa6, $fa2, $fa5 fadd.s $fa5, $fa2, $fa5 + fdiv.s $fa5, $fa6, $fa5 + lu12i.w $a2, 259216 + ori $a2, $a2, 4059 .LBB13_9: # %_Z11btAtan2Fastff.exit181 - fdiv.s $fa5, $ft0, $fa5 + movgr2fr.w $fa6, $a2 + lu12i.w $a2, -265072 + ori $a2, $a2, 4059 + lu32i.d $a2, 0 + movgr2fr.w $fa7, $a2 fmadd.s $fa5, $fa5, $fa7, $fa6 fneg.s $fa6, $fa5 fcmp.clt.s $fcc0, $fa3, $fa4 @@ -4523,7 +4492,6 @@ _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calc vldi $vr2, -1168 fcmp.cule.s $fcc0, $fa1, $fa2 fld.s $ft4, $sp, 60 # 4-byte Folded Reload - fld.s $ft5, $sp, 56 # 4-byte Folded Reload bcnez $fcc0, .LBB13_12 # %bb.11: vldi $vr2, -1040 @@ -4531,12 +4499,12 @@ _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calc fst.s $fa1, $a0, 552 ori $a2, $zero, 1 st.b $a2, $a0, 574 - fmul.s $fa1, $fs4, $ft5 - fmadd.s $fa1, $fs3, $ft15, $fa1 - fmadd.s $fa1, $fs5, $ft4, $fa1 - fmul.s $fa3, $ft15, $fa1 - fmul.s $fa4, $ft5, $fa1 - fmul.s $fa1, $ft4, $fa1 + fmul.s $fa1, $fs4, $ft4 + fmadd.s $fa1, $fs3, $ft14, $fa1 + fmadd.s $fa1, $fs5, $ft15, $fa1 + fmul.s $fa3, $ft14, $fa1 + fmul.s $fa4, $ft4, $fa1 + fmul.s $fa1, $ft15, $fa1 fmul.s $fa5, $fs4, $ft9 fmadd.s $fa5, $fs3, $ft8, $fa5 fmadd.s $fa5, $fs5, $ft6, $fa5 @@ -4565,8 +4533,8 @@ _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calc fmul.s $fa5, $fa5, $fa4 fmul.s $fa1, $fa1, $fa4 fmul.s $fa3, $fa3, $fa4 - fmadd.s $fa4, $fs3, $ft11, $fa0 - fmadd.s $fa4, $fs5, $ft13, $fa4 + fmadd.s $fa4, $fs3, $ft10, $fa0 + fmadd.s $fa4, $fs5, $ft12, $fa4 movgr2fr.w $fa6, $zero fcmp.cle.s $fcc0, $fa6, $fa4 vldi $vr4, -1168 @@ -4586,35 +4554,38 @@ _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calc fld.s $fa1, $a0, 416 fld.s $fa2, $a0, 432 fld.s $fa3, $a1, 12 - fld.s $fa4, $a1, 8 - fld.s $fa5, $a0, 448 + fld.s $fa4, $a0, 448 + fld.s $fa5, $a1, 8 fld.s $fa6, $a1, 16 fmul.s $fa3, $fa2, $fa3 - fmadd.s $fa3, $fa4, $fa1, $fa3 - fld.s $fa4, $a1, 28 - fmadd.s $fs1, $fa6, $fa5, $fa3 + fld.s $fa7, $a1, 28 + fmadd.s $fa3, $fa5, $fa1, $fa3 + fmadd.s $fs1, $fa6, $fa4, $fa3 fld.s $fa3, $a1, 24 + fmul.s $fa5, $fa2, $fa7 fld.s $fa6, $a1, 32 - fmul.s $fa4, $fa2, $fa4 fld.s $fa7, $a1, 44 - fmadd.s $fa3, $fa3, $fa1, $fa4 - fmadd.s $fs7, $fa6, $fa5, $fa3 - fld.s $fa3, $a1, 40 + fld.s $ft0, $a1, 40 + fmadd.s $fa3, $fa3, $fa1, $fa5 + fmadd.s $fs7, $fa6, $fa4, $fa3 fmul.s $fa2, $fa2, $fa7 - pcalau12i $a2, %pc_hi20(.LCPI13_4) - fld.s $fa4, $a2, %pc_lo12(.LCPI13_4) - fmadd.s $fa1, $fa3, $fa1, $fa2 + fmadd.s $fa1, $ft0, $fa1, $fa2 fld.s $fa2, $a1, 48 - fmadd.s $fa0, $fs3, $ft11, $fa0 - fmadd.s $fa0, $fs5, $ft13, $fa0 - fcmp.cule.s $fcc0, $fa4, $fa0 - fmadd.s $fs0, $fa2, $fa5, $fa1 + fmadd.s $fa0, $fs3, $ft10, $fa0 + fmadd.s $fa0, $fs5, $ft12, $fa0 + lu12i.w $a1, -264193 + ori $a1, $a1, 4094 + lu32i.d $a1, 0 + movgr2fr.w $fa3, $a1 + fcmp.cule.s $fcc0, $fa3, $fa0 + fmadd.s $fs0, $fa2, $fa4, $fa1 bcnez $fcc0, .LBB13_16 # %bb.14: - pcalau12i $a1, %pc_hi20(.LCPI13_5) - fld.s $fa0, $a1, %pc_lo12(.LCPI13_5) - fabs.s $fa1, $fs5 - fcmp.cule.s $fcc0, $fa1, $fa0 + fabs.s $fa0, $fs5 + lu12i.w $a1, 258896 + ori $a1, $a1, 1267 + movgr2fr.w $fa1, $a1 + fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB13_18 # %bb.15: fmul.s $fa0, $fs5, $fs5 @@ -4627,20 +4598,20 @@ _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calc fmov.s $fa0, $fa1 b .LBB13_19 .LBB13_16: - fneg.s $fa1, $ft12 + fneg.s $fa1, $ft11 fmul.s $fa1, $fs5, $fa1 - fmadd.s $fa4, $fs4, $ft13, $fa1 - fneg.s $fa1, $ft13 + fmadd.s $fa4, $fs4, $ft12, $fa1 + fneg.s $fa1, $ft12 fmul.s $fa1, $fs3, $fa1 - fmadd.s $fa5, $fs5, $ft11, $fa1 - fneg.s $fa1, $ft11 + fmadd.s $fa5, $fs5, $ft10, $fa1 + fneg.s $fa1, $ft10 fmul.s $fa2, $fs4, $fa1 vldi $vr1, -1168 fadd.s $fa0, $fa0, $fa1 fadd.s $fa1, $fa0, $fa0 fsqrt.s $fa0, $fa1 fcmp.cor.s $fcc0, $fa0, $fa0 - fmadd.s $fa6, $fs3, $ft12, $fa2 + fmadd.s $fa6, $fs3, $ft11, $fa2 bceqz $fcc0, .LBB13_28 .LBB13_17: # %.split frecip.s $fa3, $fa0 @@ -4691,34 +4662,36 @@ _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calc fmul.s $fa0, $ft9, $fa3 fmadd.s $fa0, $fa2, $ft8, $fa0 fmadd.s $fa0, $fa1, $ft6, $fa0 - fmul.s $fa3, $ft5, $fa3 - fmadd.s $fa2, $fa2, $ft15, $fa3 - fmadd.s $fa1, $fa1, $ft4, $fa2 + fmul.s $fa3, $ft4, $fa3 + fmadd.s $fa2, $fa2, $ft14, $fa3 + fmadd.s $fa1, $fa1, $ft15, $fa2 fcmp.cult.s $fcc0, $fa1, $fs6 fabs.s $fa2, $fa0 bceqz $fcc0, .LBB13_21 # %bb.20: - pcalau12i $a1, %pc_hi20(.LCPI13_3) - fld.s $fa3, $a1, %pc_lo12(.LCPI13_3) - pcalau12i $a1, %pc_hi20(.LCPI13_2) - fld.s $fa4, $a1, %pc_lo12(.LCPI13_2) - fadd.s $fa5, $fa1, $fa2 + fadd.s $fa3, $fa1, $fa2 fsub.s $fa1, $fa2, $fa1 + fdiv.s $fa1, $fa3, $fa1 + lu12i.w $a1, 262508 + ori $a1, $a1, 3044 b .LBB13_22 .LBB13_21: - pcalau12i $a1, %pc_hi20(.LCPI13_1) - fld.s $fa3, $a1, %pc_lo12(.LCPI13_1) - pcalau12i $a1, %pc_hi20(.LCPI13_2) - fld.s $fa4, $a1, %pc_lo12(.LCPI13_2) - fsub.s $fa5, $fa1, $fa2 + fsub.s $fa3, $fa1, $fa2 fadd.s $fa1, $fa1, $fa2 + fdiv.s $fa1, $fa3, $fa1 + lu12i.w $a1, 259216 + ori $a1, $a1, 4059 .LBB13_22: # %_Z11btAtan2Fastff.exit235 - fdiv.s $fa1, $fa5, $fa1 - fmadd.s $fa1, $fa1, $fa4, $fa3 + movgr2fr.w $fa2, $a1 + lu12i.w $a1, -265072 + ori $a1, $a1, 4059 + lu32i.d $a1, 0 + movgr2fr.w $fa3, $a1 + fmadd.s $fa1, $fa1, $fa3, $fa2 fneg.s $fa2, $fa1 fcmp.clt.s $fcc0, $fa0, $fs6 fsel $fa0, $fa1, $fa2, $fcc0 - fcmp.clt.s $fcc0, $ft14, $fs2 + fcmp.clt.s $fcc0, $ft13, $fs2 vldi $vr1, -1168 fsel $fa1, $fs6, $fa1, $fcc0 fneg.s $fa2, $fs2 @@ -4735,9 +4708,9 @@ _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calc fst.s $fa0, $a0, 556 ori $a1, $zero, 1 st.b $a1, $a0, 573 - fadd.s $fa0, $ft11, $fs3 - fadd.s $fa1, $ft12, $fs4 - fadd.s $fa2, $ft13, $fs5 + fadd.s $fa0, $ft10, $fs3 + fadd.s $fa1, $ft11, $fs4 + fadd.s $fa2, $ft12, $fs5 vldi $vr3, -1184 fmul.s $fa0, $fa0, $fa3 fmul.s $fa1, $fa1, $fa3 @@ -4756,9 +4729,9 @@ _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calc fst.s $fa0, $a0, 556 ori $a1, $zero, 1 st.b $a1, $a0, 573 - fadd.s $fa0, $ft11, $fs3 - fadd.s $fa1, $ft12, $fs4 - fadd.s $fa2, $ft13, $fs5 + fadd.s $fa0, $ft10, $fs3 + fadd.s $fa1, $ft11, $fs4 + fadd.s $fa2, $ft12, $fs5 vldi $vr3, -1184 fmul.s $fa0, $fa0, $fa3 fmul.s $fa1, $fa1, $fa3 @@ -4795,6 +4768,7 @@ _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calc .LBB13_28: # %call.sqrt fmov.s $fa0, $fa1 move $fp, $a0 + fst.s $ft10, $sp, 56 # 4-byte Folded Spill fst.s $ft11, $sp, 52 # 4-byte Folded Spill fst.s $ft12, $sp, 48 # 4-byte Folded Spill fst.s $ft13, $sp, 44 # 4-byte Folded Spill @@ -4814,24 +4788,19 @@ _ZN21btConeTwistConstraint13calcAngleInfoEv: # @_ZN21btConeTwistConstraint13calc fld.s $ft9, $sp, 24 # 4-byte Folded Reload fld.s $ft6, $sp, 28 # 4-byte Folded Reload fld.s $ft8, $sp, 32 # 4-byte Folded Reload - fld.s $ft5, $sp, 56 # 4-byte Folded Reload fld.s $ft4, $sp, 60 # 4-byte Folded Reload fld.s $ft15, $sp, 36 # 4-byte Folded Reload fld.s $ft14, $sp, 40 # 4-byte Folded Reload fld.s $ft13, $sp, 44 # 4-byte Folded Reload fld.s $ft12, $sp, 48 # 4-byte Folded Reload fld.s $ft11, $sp, 52 # 4-byte Folded Reload + fld.s $ft10, $sp, 56 # 4-byte Folded Reload move $a0, $fp b .LBB13_17 .Lfunc_end13: .size _ZN21btConeTwistConstraint13calcAngleInfoEv, .Lfunc_end13-_ZN21btConeTwistConstraint13calcAngleInfoEv # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN21btConeTwistConstraint20computeConeLimitInfoERK12btQuaternionRfR9btVector3S3_ -.LCPI14_0: - .word 0x34000000 # float 1.1920929E-7 - .text - .globl _ZN21btConeTwistConstraint20computeConeLimitInfoERK12btQuaternionRfR9btVector3S3_ + .globl _ZN21btConeTwistConstraint20computeConeLimitInfoERK12btQuaternionRfR9btVector3S3_ # -- Begin function _ZN21btConeTwistConstraint20computeConeLimitInfoERK12btQuaternionRfR9btVector3S3_ .p2align 5 .type _ZN21btConeTwistConstraint20computeConeLimitInfoERK12btQuaternionRfR9btVector3S3_,@function _ZN21btConeTwistConstraint20computeConeLimitInfoERK12btQuaternionRfR9btVector3S3_: # @_ZN21btConeTwistConstraint20computeConeLimitInfoERK12btQuaternionRfR9btVector3S3_ @@ -4851,45 +4820,45 @@ _ZN21btConeTwistConstraint20computeConeLimitInfoERK12btQuaternionRfR9btVector3S3 move $s0, $a0 pcaddu18i $ra, %call36(acosf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI14_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI14_0) - fadd.s $fa0, $fa0, $fa0 - fcmp.cule.s $fcc0, $fa0, $fa1 - fst.s $fa0, $s3, 0 + fadd.s $fa1, $fa0, $fa0 + lu12i.w $a0, 212992 + movgr2fr.w $fa0, $a0 + fcmp.cule.s $fcc0, $fa1, $fa0 + fst.s $fa1, $s3, 0 bcnez $fcc0, .LBB14_3 # %bb.1: - fld.s $fa0, $s2, 4 + fld.s $fa1, $s2, 4 fld.s $fa2, $s2, 0 fld.s $fa3, $s2, 8 st.w $zero, $s1, 12 - fmul.s $fa4, $fa0, $fa0 + fmul.s $fa4, $fa1, $fa1 fmadd.s $fa4, $fa2, $fa2, $fa4 fmadd.s $fa4, $fa3, $fa3, $fa4 frsqrt.s $fa4, $fa4 fmul.s $fa2, $fa2, $fa4 fst.s $fa2, $s1, 0 - fmul.s $fa0, $fa0, $fa4 - fst.s $fa0, $s1, 4 + fmul.s $fa1, $fa1, $fa4 + fst.s $fa1, $s1, 4 fmul.s $fa3, $fa3, $fa4 fst.s $fa3, $s1, 8 fld.s $fa2, $s0, 492 - fabs.s $fa4, $fa0 - fcmp.cule.s $fcc0, $fa4, $fa1 + fabs.s $fa4, $fa1 + fcmp.cule.s $fcc0, $fa4, $fa0 fst.s $fa2, $fp, 0 bcnez $fcc0, .LBB14_3 # %bb.2: - fld.s $fa1, $s0, 496 + fld.s $fa0, $s0, 496 fmul.s $fa3, $fa3, $fa3 - fmul.s $fa0, $fa0, $fa0 - fdiv.s $fa0, $fa3, $fa0 fmul.s $fa1, $fa1, $fa1 - frecip.s $fa1, $fa1 + fdiv.s $fa1, $fa3, $fa1 + fmul.s $fa0, $fa0, $fa0 + frecip.s $fa0, $fa0 fmul.s $fa2, $fa2, $fa2 - fdiv.s $fa2, $fa0, $fa2 - fadd.s $fa1, $fa1, $fa2 - vldi $vr2, -1168 + fdiv.s $fa2, $fa1, $fa2 fadd.s $fa0, $fa0, $fa2 - fdiv.s $fa0, $fa0, $fa1 + vldi $vr2, -1168 + fadd.s $fa1, $fa1, $fa2 + fdiv.s $fa0, $fa1, $fa0 fsqrt.s $fa0, $fa0 fst.s $fa0, $fp, 0 .LBB14_3: @@ -4904,21 +4873,16 @@ _ZN21btConeTwistConstraint20computeConeLimitInfoERK12btQuaternionRfR9btVector3S3 .Lfunc_end14: .size _ZN21btConeTwistConstraint20computeConeLimitInfoERK12btQuaternionRfR9btVector3S3_, .Lfunc_end14-_ZN21btConeTwistConstraint20computeConeLimitInfoERK12btQuaternionRfR9btVector3S3_ # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK21btConeTwistConstraint33adjustSwingAxisToUseEllipseNormalER9btVector3 -.LCPI15_0: - .word 0x34000000 # float 1.1920929E-7 - .text - .globl _ZNK21btConeTwistConstraint33adjustSwingAxisToUseEllipseNormalER9btVector3 + .globl _ZNK21btConeTwistConstraint33adjustSwingAxisToUseEllipseNormalER9btVector3 # -- Begin function _ZNK21btConeTwistConstraint33adjustSwingAxisToUseEllipseNormalER9btVector3 .p2align 5 .type _ZNK21btConeTwistConstraint33adjustSwingAxisToUseEllipseNormalER9btVector3,@function _ZNK21btConeTwistConstraint33adjustSwingAxisToUseEllipseNormalER9btVector3: # @_ZNK21btConeTwistConstraint33adjustSwingAxisToUseEllipseNormalER9btVector3 # %bb.0: fld.s $fa0, $a1, 4 - pcalau12i $a2, %pc_hi20(.LCPI15_0) - fld.s $fa1, $a2, %pc_lo12(.LCPI15_0) - fabs.s $fa2, $fa0 - fcmp.cule.s $fcc0, $fa2, $fa1 + fabs.s $fa1, $fa0 + lu12i.w $a2, 212992 + movgr2fr.w $fa2, $a2 + fcmp.cule.s $fcc0, $fa1, $fa2 bcnez $fcc0, .LBB15_2 # %bb.1: fld.s $fa1, $a1, 8 @@ -4951,14 +4915,7 @@ _ZNK21btConeTwistConstraint33adjustSwingAxisToUseEllipseNormalER9btVector3: # @_ .Lfunc_end15: .size _ZNK21btConeTwistConstraint33adjustSwingAxisToUseEllipseNormalER9btVector3, .Lfunc_end15-_ZNK21btConeTwistConstraint33adjustSwingAxisToUseEllipseNormalER9btVector3 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN21btConeTwistConstraint21computeTwistLimitInfoERK12btQuaternionRfR9btVector3 -.LCPI16_0: - .word 0x40490fdb # float 3.14159274 -.LCPI16_1: - .word 0x34000000 # float 1.1920929E-7 - .text - .globl _ZN21btConeTwistConstraint21computeTwistLimitInfoERK12btQuaternionRfR9btVector3 + .globl _ZN21btConeTwistConstraint21computeTwistLimitInfoERK12btQuaternionRfR9btVector3 # -- Begin function _ZN21btConeTwistConstraint21computeTwistLimitInfoERK12btQuaternionRfR9btVector3 .p2align 5 .type _ZN21btConeTwistConstraint21computeTwistLimitInfoERK12btQuaternionRfR9btVector3,@function _ZN21btConeTwistConstraint21computeTwistLimitInfoERK12btQuaternionRfR9btVector3: # @_ZN21btConeTwistConstraint21computeTwistLimitInfoERK12btQuaternionRfR9btVector3 @@ -4980,9 +4937,10 @@ _ZN21btConeTwistConstraint21computeTwistLimitInfoERK12btQuaternionRfR9btVector3: move $s0, $a2 pcaddu18i $ra, %call36(acosf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI16_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI16_0) fadd.s $fa0, $fa0, $fa0 + lu12i.w $a0, 263312 + ori $a0, $a0, 4059 + movgr2fr.w $fa1, $a0 fcmp.cule.s $fcc0, $fa0, $fa1 fst.s $fa0, $s0, 0 bcnez $fcc0, .LBB16_2 @@ -5005,8 +4963,8 @@ _ZN21btConeTwistConstraint21computeTwistLimitInfoERK12btQuaternionRfR9btVector3: fst.s $fs0, $fp, 8 st.w $zero, $fp, 12 fld.s $fa0, $s0, 0 - pcalau12i $a0, %pc_hi20(.LCPI16_1) - fld.s $fa1, $a0, %pc_lo12(.LCPI16_1) + lu12i.w $a0, 212992 + movgr2fr.w $fa1, $a0 fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB16_4 # %bb.3: @@ -5033,12 +4991,7 @@ _ZN21btConeTwistConstraint21computeTwistLimitInfoERK12btQuaternionRfR9btVector3: .Lfunc_end16: .size _ZN21btConeTwistConstraint21computeTwistLimitInfoERK12btQuaternionRfR9btVector3, .Lfunc_end16-_ZN21btConeTwistConstraint21computeTwistLimitInfoERK12btQuaternionRfR9btVector3 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK21btConeTwistConstraint16GetPointForAngleEff -.LCPI17_0: - .word 0x34000000 # float 1.1920929E-7 - .text - .globl _ZNK21btConeTwistConstraint16GetPointForAngleEff + .globl _ZNK21btConeTwistConstraint16GetPointForAngleEff # -- Begin function _ZNK21btConeTwistConstraint16GetPointForAngleEff .p2align 5 .type _ZNK21btConeTwistConstraint16GetPointForAngleEff,@function _ZNK21btConeTwistConstraint16GetPointForAngleEff: # @_ZNK21btConeTwistConstraint16GetPointForAngleEff @@ -5061,11 +5014,11 @@ _ZNK21btConeTwistConstraint16GetPointForAngleEff: # @_ZNK21btConeTwistConstraint fmov.s $fa0, $fs2 pcaddu18i $ra, %call36(sinf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI17_0) - fld.s $fa2, $a0, %pc_lo12(.LCPI17_0) fld.s $fa1, $fp, 492 - fabs.s $fa3, $fs1 - fcmp.clt.s $fcc0, $fa2, $fa3 + fabs.s $fa2, $fs1 + lu12i.w $a0, 212992 + movgr2fr.w $fa3, $a0 + fcmp.clt.s $fcc0, $fa3, $fa2 bceqz $fcc0, .LBB17_2 # %bb.1: fld.s $fa3, $fp, 496 @@ -5613,18 +5566,7 @@ _ZN21btConeTwistConstraint14setMotorTargetERK12btQuaternion: # @_ZN21btConeTwist .size _ZN21btConeTwistConstraint14setMotorTargetERK12btQuaternion, .Lfunc_end18-_ZN21btConeTwistConstraint14setMotorTargetERK12btQuaternion .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN21btConeTwistConstraint31setMotorTargetInConstraintSpaceERK12btQuaternion -.LCPI19_0: - .word 0xbf7ffffe # float -0.99999988 -.LCPI19_1: - .word 0x3d4ccccd # float 0.0500000007 -.LCPI19_2: - .word 0x34000000 # float 1.1920929E-7 -.LCPI19_3: - .word 0x40490fdb # float 3.14159274 - .text - .globl _ZN21btConeTwistConstraint31setMotorTargetInConstraintSpaceERK12btQuaternion + .globl _ZN21btConeTwistConstraint31setMotorTargetInConstraintSpaceERK12btQuaternion # -- Begin function _ZN21btConeTwistConstraint31setMotorTargetInConstraintSpaceERK12btQuaternion .p2align 5 .type _ZN21btConeTwistConstraint31setMotorTargetInConstraintSpaceERK12btQuaternion,@function _ZN21btConeTwistConstraint31setMotorTargetInConstraintSpaceERK12btQuaternion: # @_ZN21btConeTwistConstraint31setMotorTargetInConstraintSpaceERK12btQuaternion @@ -5674,13 +5616,15 @@ _ZN21btConeTwistConstraint31setMotorTargetInConstraintSpaceERK12btQuaternion: # fmul.s $ft0, $fs5, $ft0 fmadd.s $fa4, $ft1, $fa4, $ft0 fmadd.s $fa4, $fa5, $fa7, $fa4 - fmadd.s $fa4, $fa6, $fs3, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI19_0) - fld.s $fa6, $a0, %pc_lo12(.LCPI19_0) - fmul.s $fa5, $fa3, $fa1 - fadd.s $fa5, $fa0, $fa5 - fmadd.s $fa5, $fa4, $fa1, $fa5 - fcmp.clt.s $fcc0, $fa5, $fa6 + fmadd.s $fa5, $fa6, $fs3, $fa4 + fmul.s $fa4, $fa3, $fa1 + fadd.s $fa4, $fa0, $fa4 + fmadd.s $fa4, $fa5, $fa1, $fa4 + lu12i.w $a0, -264193 + ori $a0, $a0, 4094 + lu32i.d $a0, 0 + movgr2fr.w $fa6, $a0 + fcmp.clt.s $fcc0, $fa4, $fa6 bceqz $fcc0, .LBB19_2 # %bb.1: vldi $vr3, -1168 @@ -5690,11 +5634,11 @@ _ZN21btConeTwistConstraint31setMotorTargetInConstraintSpaceERK12btQuaternion: # movgr2fr.w $fa1, $zero fneg.s $fa2, $fa1 fmul.s $fa6, $fa3, $fa2 - fmadd.s $fs0, $fa4, $fa1, $fa6 - fmsub.s $fs2, $fa0, $fa1, $fa4 + fmadd.s $fs0, $fa5, $fa1, $fa6 + fmsub.s $fs2, $fa0, $fa1, $fa5 fmul.s $fa2, $fa0, $fa2 vldi $vr0, -1168 - fadd.s $fa0, $fa5, $fa0 + fadd.s $fa0, $fa4, $fa0 fadd.s $fa1, $fa0, $fa0 fsqrt.s $fa0, $fa1 fcmp.cor.s $fcc0, $fa0, $fa0 @@ -5738,13 +5682,14 @@ _ZN21btConeTwistConstraint31setMotorTargetInConstraintSpaceERK12btQuaternion: # fmadd.s $fa0, $fa5, $fs1, $fa0 fmadd.s $fa2, $fs4, $fs7, $fa0 fmul.s $fa0, $fa6, $fa6 - fld.s $fs3, $fp, 492 - pcalau12i $a0, %pc_hi20(.LCPI19_1) - fld.s $fs1, $a0, %pc_lo12(.LCPI19_1) fmadd.s $fa0, $fa3, $fa3, $fa0 fmadd.s $fa0, $fa1, $fa1, $fa0 fmadd.s $fa0, $fa2, $fa2, $fa0 - fcmp.cult.s $fcc0, $fs3, $fs1 + fld.s $fs6, $fp, 492 + lu12i.w $a0, 251084 + ori $a0, $a0, 3277 + movgr2fr.w $fs1, $a0 + fcmp.cult.s $fcc0, $fs6, $fs1 frsqrt.s $fs5, $fa0 bcnez $fcc0, .LBB19_16 # %bb.5: # %_Z15shortestArcQuatRK9btVector3S1_.exit @@ -5760,12 +5705,12 @@ _ZN21btConeTwistConstraint31setMotorTargetInConstraintSpaceERK12btQuaternion: # pcaddu18i $ra, %call36(acosf) jirl $ra, $ra, 0 fld.s $fa6, $sp, 20 # 4-byte Folded Reload - pcalau12i $a0, %pc_hi20(.LCPI19_2) - fld.s $fa1, $a0, %pc_lo12(.LCPI19_2) fadd.s $fa0, $fa0, $fa0 + lu12i.w $a0, 212992 + movgr2fr.w $fa1, $a0 fcmp.cule.s $fcc0, $fa0, $fa1 # implicit-def: $f7 - # implicit-def: $f30 + # implicit-def: $f27 # implicit-def: $f5 bcnez $fcc0, .LBB19_10 # %bb.7: @@ -5775,25 +5720,25 @@ _ZN21btConeTwistConstraint31setMotorTargetInConstraintSpaceERK12btQuaternion: # fmadd.s $fa2, $fs4, $fs4, $fa2 frsqrt.s $fa2, $fa2 fmul.s $fa5, $fs2, $fa2 - fmul.s $fs6, $fa3, $fa2 - fabs.s $fa3, $fs6 + fmul.s $fs3, $fa3, $fa2 + fabs.s $fa3, $fs3 fcmp.cule.s $fcc0, $fa3, $fa1 fmul.s $fa2, $fs4, $fa2 bcnez $fcc0, .LBB19_9 # %bb.8: fmov.s $fa7, $fa2 fmul.s $fa2, $fa2, $fa2 - fmul.s $fa3, $fs6, $fs6 + fmul.s $fa3, $fs3, $fs3 fdiv.s $fa2, $fa2, $fa3 fmul.s $fa3, $fs7, $fs7 frecip.s $fa3, $fa3 - fmul.s $fa4, $fs3, $fs3 + fmul.s $fa4, $fs6, $fs6 fdiv.s $fa4, $fa2, $fa4 fadd.s $fa3, $fa3, $fa4 vldi $vr4, -1168 fadd.s $fa2, $fa2, $fa4 fdiv.s $fa2, $fa2, $fa3 - fsqrt.s $fs3, $fa2 + fsqrt.s $fs6, $fa2 b .LBB19_10 .LBB19_9: fmov.s $fa7, $fa2 @@ -5805,34 +5750,34 @@ _ZN21btConeTwistConstraint31setMotorTargetInConstraintSpaceERK12btQuaternion: # fld.s $fa2, $sp, 12 # 4-byte Folded Reload bcnez $fcc0, .LBB19_16 # %bb.11: - fcmp.clt.s $fcc0, $fs3, $fa0 + fcmp.clt.s $fcc0, $fs6, $fa0 bceqz $fcc0, .LBB19_13 # %bb.12: - fmov.s $fa0, $fs3 + fmov.s $fa0, $fs6 b .LBB19_15 .LBB19_13: - fneg.s $fa1, $fs3 + fneg.s $fa1, $fs6 fcmp.cule.s $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB19_15 # %bb.14: fmov.s $fa0, $fa1 .LBB19_15: - fmul.s $fa1, $fs6, $fs6 + fmul.s $fa1, $fs3, $fs3 fmadd.s $fa1, $fa5, $fa5, $fa1 fmadd.s $fa1, $fa7, $fa7, $fa1 fsqrt.s $fs2, $fa1 vldi $vr1, -1184 fmul.s $fs0, $fa0, $fa1 fmov.s $fa0, $fs0 - fmov.s $fs3, $fa5 - fmov.s $fs4, $fa7 + fmov.s $fs4, $fa5 + fmov.s $fs6, $fa7 pcaddu18i $ra, %call36(sinf) jirl $ra, $ra, 0 fdiv.s $fa0, $fa0, $fs2 - fmul.s $fs2, $fs3, $fa0 - fmul.s $fa1, $fs6, $fa0 + fmul.s $fs2, $fs4, $fa0 + fmul.s $fa1, $fs3, $fa0 fst.s $fa1, $sp, 28 # 4-byte Folded Spill - fmul.s $fs4, $fs4, $fa0 + fmul.s $fs4, $fs6, $fa0 fmov.s $fa0, $fs0 pcaddu18i $ra, %call36(cosf) jirl $ra, $ra, 0 @@ -5854,17 +5799,18 @@ _ZN21btConeTwistConstraint31setMotorTargetInConstraintSpaceERK12btQuaternion: # fmov.s $fa0, $fs1 pcaddu18i $ra, %call36(acosf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI19_3) - fld.s $fa1, $a0, %pc_lo12(.LCPI19_3) fadd.s $fa0, $fa0, $fa0 + lu12i.w $a0, 263312 + ori $a0, $a0, 4059 + movgr2fr.w $fa1, $a0 fcmp.cule.s $fcc0, $fa0, $fa1 fmov.s $fa3, $fs7 fmov.s $fa4, $fs3 fmov.s $fs5, $fs6 bceqz $fcc0, .LBB19_20 # %bb.18: - pcalau12i $a0, %pc_hi20(.LCPI19_2) - fld.s $fa1, $a0, %pc_lo12(.LCPI19_2) + lu12i.w $a0, 212992 + movgr2fr.w $fa1, $a0 fcmp.cule.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB19_21 .LBB19_19: # %_ZN21btConeTwistConstraint21computeTwistLimitInfoERK12btQuaternionRfR9btVector3.exit @@ -5885,8 +5831,8 @@ _ZN21btConeTwistConstraint31setMotorTargetInConstraintSpaceERK12btQuaternion: # fld.s $fa4, $sp, 20 # 4-byte Folded Reload fld.s $fa3, $sp, 16 # 4-byte Folded Reload fadd.s $fa0, $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI19_2) - fld.s $fa1, $a0, %pc_lo12(.LCPI19_2) + lu12i.w $a0, 212992 + movgr2fr.w $fa1, $a0 fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB19_19 .LBB19_21: @@ -6016,12 +5962,8 @@ _ZN17btTypedConstraint21setupSolverConstraintER20btAlignedObjectArrayI18btSolver .Lfunc_end22: .size _ZN17btTypedConstraint21setupSolverConstraintER20btAlignedObjectArrayI18btSolverConstraintEiif, .Lfunc_end22-_ZN17btTypedConstraint21setupSolverConstraintER20btAlignedObjectArrayI18btSolverConstraintEiif # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf -.LCPI23_0: - .word 0x28800000 # float 1.42108547E-14 .section .text._ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf,"axG",@progbits,_ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf,comdat - .weak _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf + .weak _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf # -- Begin function _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf .p2align 5 .type _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf,@function _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf: # @_ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf @@ -6158,11 +6100,11 @@ _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf: # jirl $ra, $ra, 0 fadd.s $fa0, $fa0, $fa0 fst.s $fa0, $s0, 0 - pcalau12i $a0, %pc_hi20(.LCPI23_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI23_0) fmul.s $fa0, $fs1, $fs1 fmadd.s $fa0, $fs0, $fs0, $fa0 fmadd.s $fa0, $fs2, $fs2, $fa0 + lu12i.w $a0, 165888 + movgr2fr.w $fa1, $a0 fcmp.clt.s $fcc0, $fa0, $fa1 st.w $zero, $fp, 12 bceqz $fcc0, .LBB23_2 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContactConstraint.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContactConstraint.s index 54140d3f..f2db1a57 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContactConstraint.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContactConstraint.s @@ -133,27 +133,21 @@ _ZN19btContactConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN19b .Lfunc_end8: .size _ZN19btContactConstraint23solveConstraintObsoleteER12btSolverBodyS1_f, .Lfunc_end8-_ZN19btContactConstraint23solveConstraintObsoleteER12btSolverBodyS1_f # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z22resolveSingleBilateralR11btRigidBodyRK9btVector3S0_S3_fS3_Rff -.LCPI9_0: - .word 0x3f8ccccd # float 1.10000002 -.LCPI9_1: - .word 0xbe4ccccd # float -0.200000003 - .text - .globl _Z22resolveSingleBilateralR11btRigidBodyRK9btVector3S0_S3_fS3_Rff + .globl _Z22resolveSingleBilateralR11btRigidBodyRK9btVector3S0_S3_fS3_Rff # -- Begin function _Z22resolveSingleBilateralR11btRigidBodyRK9btVector3S0_S3_fS3_Rff .p2align 5 .type _Z22resolveSingleBilateralR11btRigidBodyRK9btVector3S0_S3_fS3_Rff,@function _Z22resolveSingleBilateralR11btRigidBodyRK9btVector3S0_S3_fS3_Rff: # @_Z22resolveSingleBilateralR11btRigidBodyRK9btVector3S0_S3_fS3_Rff # %bb.0: - fld.s $fa1, $a4, 0 fld.s $fa2, $a4, 4 + fld.s $fa1, $a4, 0 fld.s $fa0, $a4, 8 - pcalau12i $a4, %pc_hi20(.LCPI9_0) - fld.s $fa3, $a4, %pc_lo12(.LCPI9_0) - fmul.s $fa4, $fa2, $fa2 - fmadd.s $fa4, $fa1, $fa1, $fa4 - fmadd.s $fa4, $fa0, $fa0, $fa4 - fcmp.clt.s $fcc0, $fa3, $fa4 + fmul.s $fa3, $fa2, $fa2 + fmadd.s $fa3, $fa1, $fa1, $fa3 + fmadd.s $fa3, $fa0, $fa0, $fa3 + lu12i.w $a4, 260300 + ori $a4, $a4, 3277 + movgr2fr.w $fa4, $a4 + fcmp.clt.s $fcc0, $fa4, $fa3 bceqz $fcc0, .LBB9_2 # %bb.1: movgr2fr.w $fa0, $zero @@ -190,18 +184,18 @@ _Z22resolveSingleBilateralR11btRigidBodyRK9btVector3S0_S3_fS3_Rff: # @_Z22resolv fneg.s $fa3, $ft1 fmul.s $fa3, $fa6, $fa3 fmadd.s $fa3, $fa7, $ft3, $fa3 - fld.s $fa6, $a2, 352 - fld.s $ft7, $a2, 348 - fneg.s $fa7, $ft4 - fld.s $ft8, $a2, 344 - fmul.s $fa7, $fa6, $fa7 - fmadd.s $fa7, $ft7, $ft5, $fa7 - fneg.s $ft0, $ft5 - fmul.s $ft0, $ft8, $ft0 - fmadd.s $ft0, $fa6, $ft2, $ft0 - fneg.s $fa6, $ft2 - fmul.s $fa6, $ft7, $fa6 - fmadd.s $fa6, $ft8, $ft4, $fa6 + fld.s $fa7, $a2, 352 + fld.s $ft0, $a2, 348 + fneg.s $fa6, $ft4 + fld.s $ft7, $a2, 344 + fmul.s $fa6, $fa7, $fa6 + fmadd.s $fa6, $ft0, $ft5, $fa6 + fneg.s $ft8, $ft5 + fmul.s $ft8, $ft7, $ft8 + fmadd.s $fa7, $fa7, $ft2, $ft8 + fneg.s $ft8, $ft2 + fmul.s $ft0, $ft0, $ft8 + fmadd.s $ft0, $ft7, $ft4, $ft0 fld.s $ft7, $a0, 8 fld.s $ft8, $a0, 24 fld.s $ft9, $a0, 12 @@ -260,42 +254,44 @@ _Z22resolveSingleBilateralR11btRigidBodyRK9btVector3S0_S3_fS3_Rff: # @_Z22resolv fmul.s $ft7, $ft7, $ft8 fld.s $ft8, $a0, 436 fmadd.s $ft3, $ft5, $ft3, $ft7 - fld.s $ft5, $a2, 428 - fld.s $ft7, $a2, 432 + fld.s $ft5, $a0, 328 + fld.s $ft7, $a0, 332 fmul.s $ft8, $ft1, $ft8 fmadd.s $ft1, $ft8, $ft1, $ft3 - fmul.s $ft3, $ft6, $ft5 - fmul.s $ft5, $ft4, $ft7 + fld.s $ft3, $a2, 428 + fld.s $ft8, $a2, 432 + fadd.s $fa4, $fa4, $ft5 + fadd.s $fa5, $ft7, $fa5 + fmul.s $ft3, $ft6, $ft3 + fmul.s $ft5, $ft4, $ft8 fmul.s $ft4, $ft4, $ft5 fld.s $ft5, $a2, 436 fmadd.s $ft3, $ft3, $ft6, $ft4 - fld.s $ft4, $a0, 328 - fld.s $ft6, $a0, 332 + fld.s $ft4, $a0, 336 + fld.s $ft6, $a2, 328 fmul.s $ft5, $ft2, $ft5 fmadd.s $ft2, $ft5, $ft2, $ft3 - fld.s $ft3, $a2, 328 - fld.s $ft5, $a2, 332 - fadd.s $fa4, $fa4, $ft4 - fadd.s $fa5, $ft6, $fa5 - fadd.s $fa7, $fa7, $ft3 - fadd.s $ft0, $ft5, $ft0 - fsub.s $fa4, $fa4, $fa7 - fld.s $fa7, $a0, 336 - fsub.s $fa5, $fa5, $ft0 - fld.s $ft0, $a2, 336 + fld.s $ft3, $a2, 332 + fld.s $ft5, $a2, 336 + fadd.s $fa3, $fa3, $ft4 + fadd.s $fa6, $fa6, $ft6 + fadd.s $fa7, $ft3, $fa7 + fadd.s $ft0, $ft0, $ft5 + fsub.s $fa4, $fa4, $fa6 + fsub.s $fa5, $fa5, $fa7 fmul.s $fa2, $fa2, $fa5 fmadd.s $fa1, $fa1, $fa4, $fa2 - fadd.s $fa2, $fa3, $fa7 - fadd.s $fa3, $fa6, $ft0 - fsub.s $fa2, $fa2, $fa3 - fld.s $fa3, $a0, 360 - fmadd.s $fa0, $fa0, $fa2, $fa1 - fld.s $fa1, $a2, 360 - pcalau12i $a0, %pc_hi20(.LCPI9_1) - fld.s $fa2, $a0, %pc_lo12(.LCPI9_1) - fadd.s $fa3, $fa3, $ft1 - fadd.s $fa1, $fa1, $fa3 + fld.s $fa2, $a0, 360 + fld.s $fa4, $a2, 360 + fsub.s $fa3, $fa3, $ft0 + fmadd.s $fa0, $fa0, $fa3, $fa1 + fadd.s $fa1, $fa2, $ft1 + fadd.s $fa1, $fa4, $fa1 fadd.s $fa1, $fa1, $ft2 + lu12i.w $a0, -269108 + ori $a0, $a0, 3277 + lu32i.d $a0, 0 + movgr2fr.w $fa2, $a0 fmul.s $fa0, $fa0, $fa2 frecip.s $fa1, $fa1 fmul.s $fa0, $fa0, $fa1 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContactProcessing.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContactProcessing.s index 77b8677b..4ac50491 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContactProcessing.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContactProcessing.s @@ -1,18 +1,6 @@ .file "btContactProcessing.cpp" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN14btContactArray14merge_contactsERKS_b -.LCPI0_0: - .word 0x447a0000 # float 1000 -.LCPI0_1: - .word 0x44a6a000 # float 1333 -.LCPI0_2: - .word 0x45055000 # float 2133 -.LCPI0_3: - .word 0x3727c5ac # float 9.99999974E-6 -.LCPI0_4: - .word 0xb727c5ac # float -9.99999974E-6 .text - .globl _ZN14btContactArray14merge_contactsERKS_b + .globl _ZN14btContactArray14merge_contactsERKS_b # -- Begin function _ZN14btContactArray14merge_contactsERKS_b .p2align 5 .type _ZN14btContactArray14merge_contactsERKS_b,@function _ZN14btContactArray14merge_contactsERKS_b: # @_ZN14btContactArray14merge_contactsERKS_b @@ -194,13 +182,13 @@ _ZN14btContactArray14merge_contactsERKS_b: # @_ZN14btContactArray14merge_contact st.w $s3, $sp, 144 blez $a0, .LBB0_36 # %bb.22: # %.lr.ph - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.s $fs0, $a2, %pc_lo12(.LCPI0_0) - pcalau12i $a2, %pc_hi20(.LCPI0_1) - fld.s $fs1, $a2, %pc_lo12(.LCPI0_1) - pcalau12i $a2, %pc_hi20(.LCPI0_2) - fld.s $fs2, $a2, %pc_lo12(.LCPI0_2) move $s5, $zero + lu12i.w $a2, 280480 + movgr2fr.w $fs0, $a2 + lu12i.w $a2, 281194 + movgr2fr.w $fs1, $a2 + lu12i.w $a2, 282709 + movgr2fr.w $fs2, $a2 b .LBB0_27 .LBB0_23: # in Loop: Header=BB0_27 Depth=1 .Ltmp6: # EH_LABEL @@ -413,10 +401,13 @@ _ZN14btContactArray14merge_contactsERKS_b: # @_ZN14btContactArray14merge_contact move $s6, $zero ld.d $a0, $fp, 16 addi.d $a6, $sp, 16 - pcalau12i $a2, %pc_hi20(.LCPI0_4) - fld.s $fs0, $a2, %pc_lo12(.LCPI0_4) - pcalau12i $a2, %pc_hi20(.LCPI0_3) - fld.s $fs1, $a2, %pc_lo12(.LCPI0_3) + lu12i.w $a2, -298372 + ori $a2, $a2, 1452 + lu32i.d $a2, 0 + movgr2fr.w $fs0, $a2 + lu12i.w $a2, 225916 + ori $a2, $a2, 1452 + movgr2fr.w $fs1, $a2 ori $a7, $zero, 48 ori $s7, $zero, 1 b .LBB0_56 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContinuousConvexCollision.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContinuousConvexCollision.s index 93c50a5a..7ebf30bb 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContinuousConvexCollision.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btContinuousConvexCollision.s @@ -16,22 +16,14 @@ _ZN27btContinuousConvexCollisionC2EPK13btConvexShapeS2_P22btVoronoiSimplexSolver .Lfunc_end0: .size _ZN27btContinuousConvexCollisionC2EPK13btConvexShapeS2_P22btVoronoiSimplexSolverP30btConvexPenetrationDepthSolver, .Lfunc_end0-_ZN27btContinuousConvexCollisionC2EPK13btConvexShapeS2_P22btVoronoiSimplexSolverP30btConvexPenetrationDepthSolver # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE -.LCPI1_0: - .word 0x3a83126f # float 0.00100000005 -.LCPI1_2: - .word 0x3e4ccccd # float 0.200000003 -.LCPI1_3: - .word 0x34000000 # float 1.1920929E-7 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI1_1: + .p2align 4, 0x0 # -- Begin function _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE +.LCPI1_0: .word 0x3f800000 # float 1 .word 0x3f800000 # float 1 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 -.LCPI1_4: +.LCPI1_1: .word 0x3f800000 # float 1 .word 0x00000000 # float 0 .word 0x00000000 # float 0 @@ -46,27 +38,27 @@ _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12b .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception0 # %bb.0: - addi.d $sp, $sp, -800 - .cfi_def_cfa_offset 800 - st.d $ra, $sp, 792 # 8-byte Folded Spill - st.d $fp, $sp, 784 # 8-byte Folded Spill - st.d $s0, $sp, 776 # 8-byte Folded Spill - st.d $s1, $sp, 768 # 8-byte Folded Spill - st.d $s2, $sp, 760 # 8-byte Folded Spill - st.d $s3, $sp, 752 # 8-byte Folded Spill - st.d $s4, $sp, 744 # 8-byte Folded Spill - st.d $s5, $sp, 736 # 8-byte Folded Spill - st.d $s6, $sp, 728 # 8-byte Folded Spill - st.d $s7, $sp, 720 # 8-byte Folded Spill - st.d $s8, $sp, 712 # 8-byte Folded Spill - fst.d $fs0, $sp, 704 # 8-byte Folded Spill - fst.d $fs1, $sp, 696 # 8-byte Folded Spill - fst.d $fs2, $sp, 688 # 8-byte Folded Spill - fst.d $fs3, $sp, 680 # 8-byte Folded Spill - fst.d $fs4, $sp, 672 # 8-byte Folded Spill - fst.d $fs5, $sp, 664 # 8-byte Folded Spill - fst.d $fs6, $sp, 656 # 8-byte Folded Spill - fst.d $fs7, $sp, 648 # 8-byte Folded Spill + addi.d $sp, $sp, -816 + .cfi_def_cfa_offset 816 + st.d $ra, $sp, 808 # 8-byte Folded Spill + st.d $fp, $sp, 800 # 8-byte Folded Spill + st.d $s0, $sp, 792 # 8-byte Folded Spill + st.d $s1, $sp, 784 # 8-byte Folded Spill + st.d $s2, $sp, 776 # 8-byte Folded Spill + st.d $s3, $sp, 768 # 8-byte Folded Spill + st.d $s4, $sp, 760 # 8-byte Folded Spill + st.d $s5, $sp, 752 # 8-byte Folded Spill + st.d $s6, $sp, 744 # 8-byte Folded Spill + st.d $s7, $sp, 736 # 8-byte Folded Spill + st.d $s8, $sp, 728 # 8-byte Folded Spill + fst.d $fs0, $sp, 720 # 8-byte Folded Spill + fst.d $fs1, $sp, 712 # 8-byte Folded Spill + fst.d $fs2, $sp, 704 # 8-byte Folded Spill + fst.d $fs3, $sp, 696 # 8-byte Folded Spill + fst.d $fs4, $sp, 688 # 8-byte Folded Spill + fst.d $fs5, $sp, 680 # 8-byte Folded Spill + fst.d $fs6, $sp, 672 # 8-byte Folded Spill + fst.d $fs7, $sp, 664 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -109,23 +101,23 @@ _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12b bstrins.d $a0, $a1, 63, 32 movfr2gr.s $a1, $fa2 bstrpick.d $a1, $a1, 31, 0 - st.d $a0, $sp, 632 - st.d $a1, $sp, 640 - addi.d $a2, $sp, 208 - addi.d $a3, $sp, 360 + st.d $a0, $sp, 648 + st.d $a1, $sp, 656 + addi.d $a2, $sp, 224 + addi.d $a3, $sp, 376 move $a0, $s2 move $a1, $s4 pcaddu18i $ra, %call36(_ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf) jirl $ra, $ra, 0 - fld.s $fa0, $sp, 208 - fld.s $fa1, $sp, 360 - fld.s $fa2, $sp, 212 - fld.s $fa3, $sp, 216 + fld.s $fa0, $sp, 224 + fld.s $fa1, $sp, 376 + fld.s $fa2, $sp, 228 + fld.s $fa3, $sp, 232 fmul.s $fa0, $fa0, $fa1 - fst.s $fa0, $sp, 28 # 4-byte Folded Spill + fst.s $fa0, $sp, 44 # 4-byte Folded Spill fmul.s $fs3, $fa1, $fa2 fmul.s $fa4, $fa1, $fa3 - fst.s $fa4, $sp, 24 # 4-byte Folded Spill + fst.s $fa4, $sp, 40 # 4-byte Folded Spill movfr2gr.s $a0, $fa0 movfr2gr.s $a1, $fs3 fld.s $fa0, $s3, 48 @@ -139,37 +131,37 @@ _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12b fld.s $fa0, $s3, 56 fld.s $fa1, $s1, 56 bstrpick.d $a1, $a1, 31, 0 - st.d $a0, $sp, 616 - st.d $a1, $sp, 624 + st.d $a0, $sp, 632 + st.d $a1, $sp, 640 fsub.s $fs6, $fa0, $fa1 movfr2gr.s $a0, $fs4 movfr2gr.s $a1, $fs5 bstrins.d $a0, $a1, 63, 32 movfr2gr.s $a1, $fs6 bstrpick.d $a1, $a1, 31, 0 - st.d $a0, $sp, 600 - st.d $a1, $sp, 608 - addi.d $a2, $sp, 208 - addi.d $a3, $sp, 360 + st.d $a0, $sp, 616 + st.d $a1, $sp, 624 + addi.d $a2, $sp, 224 + addi.d $a3, $sp, 376 move $a0, $s1 move $a1, $s3 pcaddu18i $ra, %call36(_ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf) jirl $ra, $ra, 0 - fld.s $fa0, $sp, 208 - fld.s $fa1, $sp, 360 - fld.s $fa2, $sp, 212 - fld.s $fa3, $sp, 216 + fld.s $fa0, $sp, 224 + fld.s $fa1, $sp, 376 + fld.s $fa2, $sp, 228 + fld.s $fa3, $sp, 232 fmul.s $fs7, $fa0, $fa1 fmul.s $fs1, $fa1, $fa2 fmul.s $fs2, $fa1, $fa3 movfr2gr.s $a0, $fs7 movfr2gr.s $a1, $fs1 bstrins.d $a0, $a1, 63, 32 - st.d $a0, $sp, 584 + st.d $a0, $sp, 600 ld.d $a0, $s0, 24 movfr2gr.s $a1, $fs2 bstrpick.d $a1, $a1, 31, 0 - st.d $a1, $sp, 592 + st.d $a1, $sp, 608 ld.d $a1, $a0, 0 ld.d $a1, $a1, 32 jirl $ra, $a1, 0 @@ -179,9 +171,9 @@ _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12b fmov.s $fs0, $fa0 jirl $ra, $a1, 0 fmul.s $fa1, $fs3, $fs3 - fld.s $fa2, $sp, 28 # 4-byte Folded Reload + fld.s $fa2, $sp, 44 # 4-byte Folded Reload fmadd.s $fa1, $fa2, $fa2, $fa1 - fld.s $fa2, $sp, 24 # 4-byte Folded Reload + fld.s $fa2, $sp, 40 # 4-byte Folded Reload fmadd.s $fa1, $fa2, $fa2, $fa1 fsqrt.s $fa1, $fa1 fmul.s $fa2, $fs1, $fs1 @@ -189,16 +181,16 @@ _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12b fmadd.s $fa2, $fs2, $fs2, $fa2 fsqrt.s $fa2, $fa2 fmul.s $fa0, $fa0, $fa2 - fld.s $fa2, $sp, 632 - fld.s $fa3, $sp, 636 - fld.s $fa4, $sp, 640 + fld.s $fa2, $sp, 648 + fld.s $fa3, $sp, 652 + fld.s $fa4, $sp, 656 fmadd.s $fs0, $fa1, $fs0, $fa0 - fsub.s $fs7, $fs4, $fa2 + fsub.s $fs2, $fs4, $fa2 fsub.s $fs5, $fs5, $fa3 - fsub.s $fs2, $fs6, $fa4 + fsub.s $fs6, $fs6, $fa4 fmul.s $fa0, $fs5, $fs5 - fmadd.s $fa0, $fs7, $fs7, $fa0 fmadd.s $fa0, $fs2, $fs2, $fa0 + fmadd.s $fa0, $fs6, $fs6, $fa0 fsqrt.s $fa0, $fa0 fadd.s $fa0, $fs0, $fa0 movgr2fr.w $fs4, $zero @@ -208,25 +200,25 @@ _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12b move $a0, $zero b .LBB1_32 .LBB1_2: - addi.d $a0, $sp, 504 + addi.d $a0, $sp, 520 pcaddu18i $ra, %call36(_ZN21btConvexInternalShapeC2Ev) jirl $ra, $ra, 0 pcalau12i $a0, %got_pc_hi20(_ZTV13btSphereShape) ld.d $a0, $a0, %got_pc_lo12(_ZTV13btSphereShape) addi.d $a0, $a0, 16 - st.d $a0, $sp, 504 + st.d $a0, $sp, 520 ori $a0, $zero, 8 - st.w $a0, $sp, 512 - st.w $zero, $sp, 544 + st.w $a0, $sp, 528 st.w $zero, $sp, 560 + st.w $zero, $sp, 576 pcalau12i $a0, %pc_hi20(_ZTV16btPointCollector+16) addi.d $s7, $a0, %pc_lo12(_ZTV16btPointCollector+16) - st.d $s7, $sp, 456 + st.d $s7, $sp, 472 lu12i.w $a0, 382432 ori $s8, $a0, 2923 ld.d $s3, $s0, 24 - st.w $s8, $sp, 496 - st.b $zero, $sp, 500 + st.w $s8, $sp, 512 + st.b $zero, $sp, 516 ld.d $s4, $s0, 32 ld.d $a0, $s3, 0 ld.w $s5, $s3, 8 @@ -249,7 +241,7 @@ _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12b ld.d $a5, $s0, 8 ld.d $a6, $s0, 16 .Ltmp4: # EH_LABEL - addi.d $a0, $sp, 360 + addi.d $a0, $sp, 376 move $a1, $s3 move $a2, $s4 move $a3, $s5 @@ -262,92 +254,97 @@ _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12b vld $vr0, $s2, 0 addi.d $a0, $s2, 48 vld $vr1, $s2, 16 - vst $vr0, $sp, 208 + vst $vr0, $sp, 224 vld $vr0, $s2, 32 vld $vr2, $a0, 0 - vst $vr1, $sp, 224 + vst $vr1, $sp, 240 vld $vr1, $s1, 0 - vst $vr0, $sp, 240 - vst $vr2, $sp, 256 + vst $vr0, $sp, 256 + vst $vr2, $sp, 272 vld $vr0, $s1, 16 - vst $vr1, $sp, 272 + vst $vr1, $sp, 288 vld $vr1, $s1, 32 addi.d $a0, $s1, 48 - vst $vr0, $sp, 288 + vst $vr0, $sp, 304 vld $vr0, $a0, 0 - vst $vr1, $sp, 304 - st.w $s8, $sp, 336 - st.d $zero, $sp, 344 - vst $vr0, $sp, 320 + vst $vr1, $sp, 320 + st.w $s8, $sp, 352 + st.d $zero, $sp, 360 + vst $vr0, $sp, 336 .Ltmp7: # EH_LABEL - addi.d $a0, $sp, 360 - addi.d $a1, $sp, 208 - addi.d $a2, $sp, 456 + addi.d $a0, $sp, 376 + addi.d $a1, $sp, 224 + addi.d $a2, $sp, 472 move $a3, $zero move $a4, $zero pcaddu18i $ra, %call36(_ZN17btGjkPairDetector16getClosestPointsERKN36btDiscreteCollisionDetectorInterface17ClosestPointInputERNS0_6ResultEP12btIDebugDrawb) jirl $ra, $ra, 0 .Ltmp8: # EH_LABEL # %bb.6: - ld.bu $a0, $sp, 500 - vld $vr0, $sp, 480 - vst $vr0, $sp, 568 + ld.bu $a0, $sp, 516 + vld $vr0, $sp, 496 + vst $vr0, $sp, 584 beqz $a0, .LBB1_30 # %bb.7: - fst.s $fs2, $sp, 28 # 4-byte Folded Spill - fld.s $fs3, $sp, 496 - fld.s $fs1, $sp, 464 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI1_0) - fld.s $fs2, $sp, 468 - fld.s $fs6, $sp, 472 - fst.s $fa0, $sp, 24 # 4-byte Folded Spill + fst.s $fs2, $sp, 44 # 4-byte Folded Spill + fld.s $fs3, $sp, 512 + fld.s $fs2, $sp, 480 + fld.s $fs7, $sp, 484 + fld.s $fs1, $sp, 488 + lu12i.w $a0, 239665 + ori $a0, $a0, 623 + movgr2fr.w $fa0, $a0 + fst.s $fa0, $sp, 36 # 4-byte Folded Spill fcmp.clt.s $fcc0, $fa0, $fs3 bceqz $fcc0, .LBB1_27 # %bb.8: # %.lr.ph - fst.s $fs7, $sp, 20 # 4-byte Folded Spill - fst.s $fs0, $sp, 16 # 4-byte Folded Spill - addi.d $s3, $sp, 192 - addi.d $s6, $sp, 320 + fst.s $fs6, $sp, 32 # 4-byte Folded Spill + fst.s $fs0, $sp, 28 # 4-byte Folded Spill + addi.d $s3, $sp, 208 + addi.d $s5, $sp, 320 + addi.d $s6, $sp, 336 movgr2fr.w $fs0, $zero - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.s $fa0, $a0, %pc_lo12(.LCPI1_3) - fst.s $fa0, $sp, 12 # 4-byte Folded Spill ori $s4, $zero, 65 - fst.s $fs0, $sp, 8 # 4-byte Folded Spill - pcalau12i $s5, %pc_hi20(.LCPI1_2) + lu12i.w $a0, 212992 + movgr2fr.w $fa0, $a0 + fst.s $fa0, $sp, 24 # 4-byte Folded Spill + lu12i.w $a0, 255180 + ori $a0, $a0, 3277 + movgr2fr.w $fa0, $a0 + fst.s $fa0, $sp, 40 # 4-byte Folded Spill + fst.s $fs0, $sp, 20 # 4-byte Folded Spill .p2align 4, , 16 .LBB1_9: # =>This Inner Loop Header: Depth=1 ld.d $a0, $fp, 176 - fld.s $fs4, $sp, 20 # 4-byte Folded Reload + fld.s $fs4, $sp, 32 # 4-byte Folded Reload beqz $a0, .LBB1_11 # %bb.10: # in Loop: Header=BB1_9 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI1_1) - vld $vr0, $a1, %pc_lo12(.LCPI1_1) - vst $vr0, $sp, 208 + pcalau12i $a1, %pc_hi20(.LCPI1_0) + vld $vr0, $a1, %pc_lo12(.LCPI1_0) + vst $vr0, $sp, 224 ld.d $a1, $a0, 0 ld.d $a3, $a1, 32 .Ltmp10: # EH_LABEL - fld.s $fa0, $s5, %pc_lo12(.LCPI1_2) - addi.d $a1, $sp, 568 - addi.d $a2, $sp, 208 + addi.d $a1, $sp, 584 + addi.d $a2, $sp, 224 + fld.s $fa0, $sp, 40 # 4-byte Folded Reload jirl $ra, $a3, 0 .Ltmp11: # EH_LABEL .LBB1_11: # in Loop: Header=BB1_9 Depth=1 addi.w $s4, $s4, -1 beqz $s4, .LBB1_30 # %bb.12: # in Loop: Header=BB1_9 Depth=1 - fmul.s $fa0, $fs5, $fs2 + fmul.s $fa0, $fs5, $fs7 + fld.s $fa1, $sp, 44 # 4-byte Folded Reload + fmadd.s $fa0, $fa1, $fs2, $fa0 fmadd.s $fa0, $fs4, $fs1, $fa0 fld.s $fa1, $sp, 28 # 4-byte Folded Reload - fmadd.s $fa0, $fa1, $fs6, $fa0 - fld.s $fa1, $sp, 16 # 4-byte Folded Reload - fadd.s $fs7, $fa1, $fa0 - fld.s $fa0, $sp, 12 # 4-byte Folded Reload - fcmp.cle.s $fcc0, $fs7, $fa0 + fadd.s $fs6, $fa1, $fa0 + fld.s $fa0, $sp, 24 # 4-byte Folded Reload + fcmp.cle.s $fcc0, $fs6, $fa0 bcnez $fcc0, .LBB1_30 # %bb.13: # in Loop: Header=BB1_9 Depth=1 - fdiv.s $fa0, $fs3, $fs7 + fdiv.s $fa0, $fs3, $fs6 fadd.s $fs4, $fs0, $fa0 fcmp.cle.s $fcc0, $fs4, $fs0 move $a0, $zero @@ -357,14 +354,14 @@ _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12b fcmp.clt.s $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB1_31 # %bb.15: # in Loop: Header=BB1_9 Depth=1 - fld.s $fs0, $sp, 8 # 4-byte Folded Reload + fld.s $fs0, $sp, 20 # 4-byte Folded Reload fcmp.clt.s $fcc0, $fs4, $fs0 bcnez $fcc0, .LBB1_31 # %bb.16: # in Loop: Header=BB1_9 Depth=1 .Ltmp13: # EH_LABEL - addi.d $a1, $sp, 632 - addi.d $a2, $sp, 616 - addi.d $a3, $sp, 144 + addi.d $a1, $sp, 648 + addi.d $a2, $sp, 632 + addi.d $a3, $sp, 160 move $a0, $s2 fmov.s $fa0, $fs4 pcaddu18i $ra, %call36(_ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_) @@ -372,9 +369,9 @@ _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12b .Ltmp14: # EH_LABEL # %bb.17: # in Loop: Header=BB1_9 Depth=1 .Ltmp15: # EH_LABEL - addi.d $a1, $sp, 600 - addi.d $a2, $sp, 584 - addi.d $a3, $sp, 80 + addi.d $a1, $sp, 616 + addi.d $a2, $sp, 600 + addi.d $a3, $sp, 96 move $a0, $s1 fmov.s $fa0, $fs4 pcaddu18i $ra, %call36(_ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_) @@ -384,15 +381,15 @@ _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12b ld.d $a0, $fp, 176 beqz $a0, .LBB1_20 # %bb.19: # in Loop: Header=BB1_9 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI1_4) - vld $vr0, $a1, %pc_lo12(.LCPI1_4) - vst $vr0, $sp, 208 + pcalau12i $a1, %pc_hi20(.LCPI1_1) + vld $vr0, $a1, %pc_lo12(.LCPI1_1) + vst $vr0, $sp, 224 ld.d $a1, $a0, 0 ld.d $a3, $a1, 32 .Ltmp17: # EH_LABEL - fld.s $fa0, $s5, %pc_lo12(.LCPI1_2) - addi.d $a2, $sp, 208 + addi.d $a2, $sp, 224 move $a1, $s3 + fld.s $fa0, $sp, 40 # 4-byte Folded Reload jirl $ra, $a3, 0 .Ltmp18: # EH_LABEL .LBB1_20: # in Loop: Header=BB1_9 Depth=1 @@ -408,91 +405,90 @@ _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12b ld.d $a2, $s0, 32 ld.d $a3, $s0, 8 ld.d $a4, $s0, 16 - st.d $s7, $sp, 32 - st.w $s8, $sp, 72 - st.b $zero, $sp, 76 + st.d $s7, $sp, 48 + st.w $s8, $sp, 88 + st.b $zero, $sp, 92 .Ltmp23: # EH_LABEL - addi.d $a0, $sp, 360 + addi.d $a0, $sp, 376 pcaddu18i $ra, %call36(_ZN17btGjkPairDetectorC1EPK13btConvexShapeS2_P22btVoronoiSimplexSolverP30btConvexPenetrationDepthSolver) jirl $ra, $ra, 0 .Ltmp24: # EH_LABEL # %bb.22: # in Loop: Header=BB1_9 Depth=1 - vld $vr0, $sp, 144 - addi.d $a0, $sp, 160 - vld $vr1, $a0, 0 + vld $vr0, $sp, 160 addi.d $a0, $sp, 176 + vld $vr1, $a0, 0 + addi.d $a0, $sp, 192 vld $vr2, $a0, 0 - vst $vr0, $sp, 208 - addi.d $a0, $sp, 224 + vst $vr0, $sp, 224 + addi.d $a0, $sp, 240 vst $vr1, $a0, 0 vld $vr0, $s3, 0 - addi.d $a0, $sp, 240 + addi.d $a0, $sp, 256 vst $vr2, $a0, 0 - vld $vr1, $sp, 80 - addi.d $a0, $sp, 96 + vld $vr1, $sp, 96 + addi.d $a0, $sp, 112 vld $vr2, $a0, 0 - addi.d $a0, $sp, 256 + addi.d $a0, $sp, 272 vst $vr0, $a0, 0 - addi.d $a0, $sp, 112 + addi.d $a0, $sp, 128 vld $vr0, $a0, 0 - addi.d $a0, $sp, 272 - vst $vr1, $a0, 0 addi.d $a0, $sp, 288 + vst $vr1, $a0, 0 + addi.d $a0, $sp, 304 vst $vr2, $a0, 0 - addi.d $a0, $sp, 128 + addi.d $a0, $sp, 144 vld $vr1, $a0, 0 - addi.d $a0, $sp, 304 - vst $vr0, $a0, 0 - st.w $s8, $sp, 336 - st.d $zero, $sp, 344 + vst $vr0, $s5, 0 + st.w $s8, $sp, 352 + st.d $zero, $sp, 360 vst $vr1, $s6, 0 .Ltmp26: # EH_LABEL - addi.d $a0, $sp, 360 - addi.d $a1, $sp, 208 - addi.d $a2, $sp, 32 + addi.d $a0, $sp, 376 + addi.d $a1, $sp, 224 + addi.d $a2, $sp, 48 move $a3, $zero move $a4, $zero pcaddu18i $ra, %call36(_ZN17btGjkPairDetector16getClosestPointsERKN36btDiscreteCollisionDetectorInterface17ClosestPointInputERNS0_6ResultEP12btIDebugDrawb) jirl $ra, $ra, 0 .Ltmp27: # EH_LABEL # %bb.23: # in Loop: Header=BB1_9 Depth=1 - ld.bu $a0, $sp, 76 + ld.bu $a0, $sp, 92 beqz $a0, .LBB1_31 # %bb.24: # in Loop: Header=BB1_9 Depth=1 - fld.s $fs3, $sp, 72 + fld.s $fs3, $sp, 88 fcmp.cule.s $fcc0, $fs0, $fs3 bceqz $fcc0, .LBB1_33 # %bb.25: # in Loop: Header=BB1_9 Depth=1 - addi.d $a0, $sp, 56 + addi.d $a0, $sp, 72 vld $vr0, $a0, 0 - fld.s $fs1, $sp, 40 - fld.s $fs2, $sp, 44 - fld.s $fs6, $sp, 48 - fld.s $fa1, $sp, 24 # 4-byte Folded Reload + fld.s $fs2, $sp, 56 + fld.s $fs7, $sp, 60 + fld.s $fs1, $sp, 64 + fld.s $fa1, $sp, 36 # 4-byte Folded Reload fcmp.clt.s $fcc0, $fa1, $fs3 - vst $vr0, $sp, 568 + vst $vr0, $sp, 584 fmov.s $fs0, $fs4 bcnez $fcc0, .LBB1_9 # %bb.26: - fld.s $fa0, $sp, 52 + fld.s $fa0, $sp, 68 b .LBB1_28 .LBB1_27: # %.._crit_edge_crit_edge - fmul.s $fa1, $fs5, $fs2 - fld.s $fa0, $sp, 476 - fmadd.s $fa1, $fs7, $fs1, $fa1 - fld.s $fa2, $sp, 28 # 4-byte Folded Reload - fmadd.s $fa1, $fa2, $fs6, $fa1 - fadd.s $fs7, $fs0, $fa1 + fmul.s $fa1, $fs5, $fs7 + fld.s $fa0, $sp, 492 + fld.s $fa2, $sp, 44 # 4-byte Folded Reload + fmadd.s $fa1, $fa2, $fs2, $fa1 + fmadd.s $fa1, $fs6, $fs1, $fa1 + fadd.s $fs6, $fs0, $fa1 .LBB1_28: # %._crit_edge fld.s $fa1, $fp, 184 - fcmp.cle.s $fcc0, $fs7, $fa1 + fcmp.cle.s $fcc0, $fs6, $fa1 bcnez $fcc0, .LBB1_30 # %bb.29: fst.s $fs4, $fp, 168 - fst.s $fs1, $fp, 136 - vld $vr1, $sp, 568 - fst.s $fs2, $fp, 140 - fst.s $fs6, $fp, 144 + fst.s $fs2, $fp, 136 + vld $vr1, $sp, 584 + fst.s $fs7, $fp, 140 + fst.s $fs1, $fp, 144 fst.s $fa0, $fp, 148 vst $vr1, $fp, 152 ori $a0, $zero, 1 @@ -501,35 +497,35 @@ _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12b move $a0, $zero .LBB1_31: # %.thread move $fp, $a0 - addi.d $a0, $sp, 504 + addi.d $a0, $sp, 520 pcaddu18i $ra, %call36(_ZN13btConvexShapeD2Ev) jirl $ra, $ra, 0 move $a0, $fp .LBB1_32: - fld.d $fs7, $sp, 648 # 8-byte Folded Reload - fld.d $fs6, $sp, 656 # 8-byte Folded Reload - fld.d $fs5, $sp, 664 # 8-byte Folded Reload - fld.d $fs4, $sp, 672 # 8-byte Folded Reload - fld.d $fs3, $sp, 680 # 8-byte Folded Reload - fld.d $fs2, $sp, 688 # 8-byte Folded Reload - fld.d $fs1, $sp, 696 # 8-byte Folded Reload - fld.d $fs0, $sp, 704 # 8-byte Folded Reload - ld.d $s8, $sp, 712 # 8-byte Folded Reload - ld.d $s7, $sp, 720 # 8-byte Folded Reload - ld.d $s6, $sp, 728 # 8-byte Folded Reload - ld.d $s5, $sp, 736 # 8-byte Folded Reload - ld.d $s4, $sp, 744 # 8-byte Folded Reload - ld.d $s3, $sp, 752 # 8-byte Folded Reload - ld.d $s2, $sp, 760 # 8-byte Folded Reload - ld.d $s1, $sp, 768 # 8-byte Folded Reload - ld.d $s0, $sp, 776 # 8-byte Folded Reload - ld.d $fp, $sp, 784 # 8-byte Folded Reload - ld.d $ra, $sp, 792 # 8-byte Folded Reload - addi.d $sp, $sp, 800 + fld.d $fs7, $sp, 664 # 8-byte Folded Reload + fld.d $fs6, $sp, 672 # 8-byte Folded Reload + fld.d $fs5, $sp, 680 # 8-byte Folded Reload + fld.d $fs4, $sp, 688 # 8-byte Folded Reload + fld.d $fs3, $sp, 696 # 8-byte Folded Reload + fld.d $fs2, $sp, 704 # 8-byte Folded Reload + fld.d $fs1, $sp, 712 # 8-byte Folded Reload + fld.d $fs0, $sp, 720 # 8-byte Folded Reload + ld.d $s8, $sp, 728 # 8-byte Folded Reload + ld.d $s7, $sp, 736 # 8-byte Folded Reload + ld.d $s6, $sp, 744 # 8-byte Folded Reload + ld.d $s5, $sp, 752 # 8-byte Folded Reload + ld.d $s4, $sp, 760 # 8-byte Folded Reload + ld.d $s3, $sp, 768 # 8-byte Folded Reload + ld.d $s2, $sp, 776 # 8-byte Folded Reload + ld.d $s1, $sp, 784 # 8-byte Folded Reload + ld.d $s0, $sp, 792 # 8-byte Folded Reload + ld.d $fp, $sp, 800 # 8-byte Folded Reload + ld.d $ra, $sp, 808 # 8-byte Folded Reload + addi.d $sp, $sp, 816 ret .LBB1_33: - vld $vr0, $sp, 40 - addi.d $a1, $sp, 56 + vld $vr0, $sp, 56 + addi.d $a1, $sp, 72 vld $vr1, $a1, 0 fst.s $fs4, $fp, 168 vst $vr0, $fp, 136 @@ -558,7 +554,7 @@ _ZN27btContinuousConvexCollision16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12b .LBB1_41: move $fp, $a0 .Ltmp29: # EH_LABEL - addi.d $a0, $sp, 504 + addi.d $a0, $sp, 520 pcaddu18i $ra, %call36(_ZN13btConvexShapeD2Ev) jirl $ra, $ra, 0 .Ltmp30: # EH_LABEL @@ -658,16 +654,8 @@ __clang_call_terminate: # @__clang_call_terminate .Lfunc_end2: .size __clang_call_terminate, .Lfunc_end2-__clang_call_terminate # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ -.LCPI3_0: - .word 0x3f490fdb # float 0.785398185 -.LCPI3_1: - .word 0x3a83126f # float 0.00100000005 -.LCPI3_2: - .word 0xbcaaaaab # float -0.020833334 .section .text._ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_,"axG",@progbits,_ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_,comdat - .weak _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ + .weak _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ # -- Begin function _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ .p2align 5 .type _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_,@function _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_: # @_ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ @@ -720,22 +708,26 @@ _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_: # fmadd.s $fa0, $fs1, $fs1, $fa0 fmadd.s $fa0, $fs3, $fs3, $fa0 fsqrt.s $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI3_0) - fmul.s $fa2, $fs0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.s $fa3, $a0, %pc_lo12(.LCPI3_1) - fcmp.clt.s $fcc0, $fa1, $fa2 - fdiv.s $fa1, $fa1, $fs0 - fsel $fs4, $fa0, $fa1, $fcc0 - fcmp.cule.s $fcc0, $fa3, $fs4 + fmul.s $fa1, $fs0, $fa0 + lu12i.w $a0, 259216 + ori $a0, $a0, 4059 + movgr2fr.w $fa2, $a0 + fdiv.s $fa3, $fa2, $fs0 + fcmp.clt.s $fcc0, $fa2, $fa1 + fsel $fs4, $fa0, $fa3, $fcc0 + lu12i.w $a0, 239665 + ori $a0, $a0, 623 + movgr2fr.w $fa0, $a0 + fcmp.cule.s $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB3_2 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI3_2) - fld.s $fa0, $a0, %pc_lo12(.LCPI3_2) - fmul.s $fa1, $fs0, $fs0 - fmul.s $fa1, $fs0, $fa1 - fmul.s $fa0, $fa1, $fa0 + fmul.s $fa0, $fs0, $fs0 + fmul.s $fa0, $fs0, $fa0 + lu12i.w $a0, -275798 + ori $a0, $a0, 2731 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 + fmul.s $fa0, $fa0, $fa1 fmul.s $fa0, $fa0, $fs4 fmul.s $fa0, $fa0, $fs4 vldi $vr1, -1184 @@ -929,12 +921,8 @@ GCC_except_table5: .Lcst_end1: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf -.LCPI6_0: - .word 0x28800000 # float 1.42108547E-14 .section .text._ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf,"axG",@progbits,_ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf,comdat - .weak _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf + .weak _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf # -- Begin function _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf .p2align 5 .type _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf,@function _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf: # @_ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf @@ -1071,11 +1059,11 @@ _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf: # jirl $ra, $ra, 0 fadd.s $fa0, $fa0, $fa0 fst.s $fa0, $s0, 0 - pcalau12i $a0, %pc_hi20(.LCPI6_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI6_0) fmul.s $fa0, $fs1, $fs1 fmadd.s $fa0, $fs0, $fs0, $fa0 fmadd.s $fa0, $fs2, $fs2, $fa0 + lu12i.w $a0, 165888 + movgr2fr.w $fa1, $a0 fcmp.clt.s $fcc0, $fa0, $fa1 st.w $zero, $fp, 12 bceqz $fcc0, .LBB6_2 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexConvexAlgorithm.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexConvexAlgorithm.s index 34c5dbf5..a3cfb108 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexConvexAlgorithm.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexConvexAlgorithm.s @@ -416,22 +416,7 @@ _ZN23btConvexConvexAlgorithm19setLowLevelOfDetailEb: # @_ZN23btConvexConvexAlgor .Lfunc_end7: .size _ZN23btConvexConvexAlgorithm19setLowLevelOfDetailEb, .Lfunc_end7-_ZN23btConvexConvexAlgorithm19setLowLevelOfDetailEb # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult -.LCPI8_0: - .word 0x5d5e0b6b # float 9.99999984E+17 -.LCPI8_1: - .word 0x34000000 # float 1.1920929E-7 -.LCPI8_2: - .word 0x3f3504f3 # float 0.707106769 -.LCPI8_3: - .word 0x3ec90fdb # float 0.392699093 -.LCPI8_4: - .word 0x40c90fdb # float 6.28318548 -.LCPI8_5: - .word 0x28800000 # float 1.42108547E-14 - .text - .globl _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult + .globl _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult # -- Begin function _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult .p2align 5 .type _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult,@function _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult: # @_ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult @@ -633,8 +618,9 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp st.d $s6, $sp, 664 beqz $a0, .LBB8_16 # %bb.15: - pcalau12i $a0, %pc_hi20(.LCPI8_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI8_0) + lu12i.w $a0, 382432 + ori $a0, $a0, 2923 + movgr2fr.w $fa0, $a0 b .LBB8_17 .LBB8_16: ld.d $a0, $s7, 0 @@ -683,11 +669,11 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp pcaddu18i $ra, %call36(_ZN17btGjkPairDetector16getClosestPointsERKN36btDiscreteCollisionDetectorInterface17ClosestPointInputERNS0_6ResultEP12btIDebugDrawb) jirl $ra, $ra, 0 ld.bu $a0, $s3, 40 - pcalau12i $a3, %pc_hi20(.LCPI8_1) + lu12i.w $a3, 212992 beqz $a0, .LBB8_21 # %bb.18: fld.s $fa6, $sp, 692 - fld.s $fa0, $a3, %pc_lo12(.LCPI8_1) + movgr2fr.w $fa0, $a3 fcmp.cule.s $fcc0, $fa6, $fa0 bcnez $fcc0, .LBB8_22 # %bb.19: @@ -698,24 +684,25 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp fmul.s $fa4, $fa0, $fa0 fmadd.s $fa4, $fa1, $fa1, $fa4 fmadd.s $fa4, $fa2, $fa2, $fa4 - pcalau12i $a1, %pc_hi20(.LCPI8_2) - fld.s $fa5, $a1, %pc_lo12(.LCPI8_2) frsqrt.s $fa4, $fa4 - fmul.s $fs7, $fa2, $fa4 - fabs.s $fa2, $fs7 + fmul.s $fs4, $fa2, $fa4 + fabs.s $fa2, $fs4 + lu12i.w $a1, 258896 + ori $a1, $a1, 1267 + movgr2fr.w $fa5, $a1 fcmp.cule.s $fcc0, $fa2, $fa5 fmul.s $fs0, $fa1, $fa4 - fmul.s $fs4, $fa0, $fa4 + fmul.s $fs7, $fa0, $fa4 fadd.s $fa6, $fa6, $fa3 bcnez $fcc0, .LBB8_39 # %bb.20: - fmul.s $fa0, $fs7, $fs7 - fmadd.s $fa0, $fs4, $fs4, $fa0 + fmul.s $fa0, $fs4, $fs4 + fmadd.s $fa0, $fs7, $fs7, $fa0 frsqrt.s $fa0, $fa0 - fneg.s $fa1, $fs7 + fneg.s $fa1, $fs4 fmul.s $fa1, $fa0, $fa1 fst.s $fa1, $sp, 140 # 4-byte Folded Spill - fmul.s $fa0, $fs4, $fa0 + fmul.s $fa0, $fs7, $fa0 fst.s $fa0, $sp, 136 # 4-byte Folded Spill movgr2fr.w $fa0, $zero fst.s $fa0, $sp, 144 # 4-byte Folded Spill @@ -723,9 +710,9 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp .LBB8_21: movgr2fr.w $fa6, $zero .LBB8_22: - # implicit-def: $f31 - # implicit-def: $f24 # implicit-def: $f28 + # implicit-def: $f24 + # implicit-def: $f31 # implicit-def: $f0 # kill: killed $f0 # implicit-def: $f0 @@ -742,7 +729,6 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp st.d $a4, $sp, 72 # 8-byte Folded Spill bge $a1, $a2, .LBB8_31 # %bb.24: - st.d $a3, $sp, 16 # 8-byte Folded Spill fst.s $fa6, $sp, 28 # 4-byte Folded Spill ld.d $a0, $s7, 0 addi.d $s4, $sp, 776 @@ -795,12 +781,13 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp st.d $a0, $sp, 48 # 8-byte Folded Spill addi.d $a0, $s0, 24 st.d $a0, $sp, 40 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI8_3) - fld.s $fa2, $a0, %pc_lo12(.LCPI8_3) addi.d $a0, $s0, 40 st.d $a0, $sp, 32 # 8-byte Folded Spill - fsel $fa3, $fa0, $fs1, $fcc0 - fdiv.s $fa1, $fa1, $fa3 + fsel $fa2, $fa0, $fs1, $fcc0 + fdiv.s $fa1, $fa1, $fa2 + lu12i.w $a0, 257168 + ori $a0, $a0, 4059 + movgr2fr.w $fa2, $a0 fcmp.clt.s $fcc1, $fa2, $fa1 fsel $fa1, $fa1, $fa2, $fcc1 fld.s $fa2, $sp, 140 # 4-byte Folded Reload @@ -814,14 +801,15 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp vldi $vr2, -1184 fmul.s $fa1, $fa1, $fa2 fst.s $fa1, $sp, 116 # 4-byte Folded Spill - fmul.s $fa1, $fs4, $fs4 + fmul.s $fa1, $fs7, $fs7 fmadd.s $fa1, $fs0, $fs0, $fa1 - fmadd.s $fa1, $fs7, $fs7, $fa1 + fmadd.s $fa1, $fs4, $fs4, $fa1 fsqrt.s $fa1, $fa1 fst.s $fa1, $sp, 112 # 4-byte Folded Spill movcf2gr $s7, $fcc0 - pcalau12i $a0, %pc_hi20(.LCPI8_4) - fld.s $fa1, $a0, %pc_lo12(.LCPI8_4) + lu12i.w $a0, 265360 + ori $a0, $a0, 4059 + movgr2fr.w $fa1, $a0 fst.s $fa1, $sp, 108 # 4-byte Folded Spill fcmp.cule.s $fcc0, $fa0, $fs1 movcf2gr $a0, $fcc0 @@ -829,9 +817,9 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp pcalau12i $a0, %pc_hi20(_ZTV24btPerturbedContactResult+16) addi.d $s6, $a0, %pc_lo12(_ZTV24btPerturbedContactResult+16) move $s4, $zero - fst.s $fs7, $sp, 132 # 4-byte Folded Spill + fst.s $fs4, $sp, 132 # 4-byte Folded Spill fst.s $fs0, $sp, 128 # 4-byte Folded Spill - fst.s $fs4, $sp, 124 # 4-byte Folded Spill + fst.s $fs7, $sp, 124 # 4-byte Folded Spill b .LBB8_28 .p2align 4, , 16 .LBB8_26: # in Loop: Header=BB8_28 Depth=1 @@ -853,7 +841,7 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp fmul.s $fa5, $fs1, $fa1 fmadd.s $fa5, $fa0, $fs3, $fa5 fmadd.s $fa5, $fa4, $fs5, $fa5 - fmadd.s $fa6, $fs7, $fs6, $fa5 + fmadd.s $fa6, $fs4, $fs6, $fa5 fmul.s $fa4, $fs1, $fa4 fmadd.s $fa4, $fa0, $fs6, $fa4 fmadd.s $fa4, $fa3, $fs3, $fa4 @@ -861,29 +849,29 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp fmul.s $fa3, $fs1, $fa3 fmadd.s $fa3, $fa0, $fs5, $fa3 fmadd.s $fa3, $fa1, $fs6, $fa3 - fmadd.s $ft0, $fs4, $fs3, $fa3 + fmadd.s $ft0, $fs7, $fs3, $fa3 fmadd.s $fa2, $fa0, $fs1, $fa2 - fmadd.s $fa2, $fs4, $fs6, $fa2 - fmadd.s $fa2, $fs7, $fs5, $fa2 + fmadd.s $fa2, $fs7, $fs6, $fa2 + fmadd.s $fa2, $fs4, $fs5, $fa2 fmul.s $ft1, $fa0, $fa6 fmadd.s $ft1, $fa2, $fs0, $ft1 - fmadd.s $ft1, $fa7, $fs7, $ft1 - fnmadd.s $fa3, $fs4, $fs3, $fa3 - fmadd.s $ft1, $fa3, $fs4, $ft1 + fmadd.s $ft1, $fa7, $fs4, $ft1 + fnmadd.s $fa3, $fs7, $fs3, $fa3 + fmadd.s $ft1, $fa3, $fs7, $ft1 fmul.s $fa7, $fa0, $fa7 - fmadd.s $fa7, $fa2, $fs4, $fa7 + fmadd.s $fa7, $fa2, $fs7, $fa7 fmadd.s $fa7, $ft0, $fs0, $fa7 - fnmadd.s $fa5, $fs7, $fs6, $fa5 - fmadd.s $fa5, $fa5, $fs7, $fa7 + fnmadd.s $fa5, $fs4, $fs6, $fa5 + fmadd.s $fa5, $fa5, $fs4, $fa7 fmul.s $fa7, $fa0, $ft0 - fmadd.s $fa7, $fa2, $fs7, $fa7 - fmadd.s $fa7, $fa6, $fs4, $fa7 + fmadd.s $fa7, $fa2, $fs4, $fa7 + fmadd.s $fa7, $fa6, $fs7, $fa7 fnmadd.s $fa4, $fs0, $fs5, $fa4 fmadd.s $fa7, $fa4, $fs0, $fa7 fmul.s $fa1, $fa6, $fa1 fmadd.s $fa0, $fa2, $fa0, $fa1 - fmadd.s $fa0, $fa4, $fs4, $fa0 - fmadd.s $fa0, $fa3, $fs7, $fa0 + fmadd.s $fa0, $fa4, $fs7, $fa0 + fmadd.s $fa0, $fa3, $fs4, $fa0 fmul.s $fa1, $fa5, $fa5 fmadd.s $fa1, $ft1, $ft1, $fa1 fmadd.s $fa1, $fa7, $fa7, $fa1 @@ -1018,9 +1006,9 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp jirl $ra, $ra, 0 ld.w $a0, $s1, 148 addi.w $s4, $s4, 1 - fld.s $fs7, $sp, 132 # 4-byte Folded Reload + fld.s $fs4, $sp, 132 # 4-byte Folded Reload fld.s $fs0, $sp, 128 # 4-byte Folded Reload - fld.s $fs4, $sp, 124 # 4-byte Folded Reload + fld.s $fs7, $sp, 124 # 4-byte Folded Reload bge $s4, $a0, .LBB8_30 .LBB8_28: # =>This Inner Loop Header: Depth=1 fld.s $fs1, $sp, 116 # 4-byte Folded Reload @@ -1056,14 +1044,14 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp fld.s $fa1, $sp, 112 # 4-byte Folded Reload fdiv.s $fa0, $fa0, $fa1 fmul.s $fs0, $fs0, $fa0 - fmul.s $fs4, $fs4, $fa0 fmul.s $fs7, $fs7, $fa0 + fmul.s $fs4, $fs4, $fa0 fmov.s $fa0, $fs2 pcaddu18i $ra, %call36(cosf) jirl $ra, $ra, 0 fneg.s $fa1, $fs0 - fneg.s $fa4, $fs4 - fneg.s $fa3, $fs7 + fneg.s $fa4, $fs7 + fneg.s $fa3, $fs4 fmul.s $fa2, $fs3, $fs0 ld.d $a0, $sp, 96 movgr2cf $fcc0, $a0 @@ -1072,7 +1060,7 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp fmul.s $fa5, $fs1, $fa1 fmadd.s $fa5, $fa0, $fs3, $fa5 fmadd.s $fa5, $fa4, $fs5, $fa5 - fmadd.s $fa6, $fs7, $fs6, $fa5 + fmadd.s $fa6, $fs4, $fs6, $fa5 fmul.s $fa4, $fs1, $fa4 fmadd.s $fa4, $fa0, $fs6, $fa4 fmadd.s $fa4, $fa3, $fs3, $fa4 @@ -1080,29 +1068,29 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp fmul.s $fa3, $fs1, $fa3 fmadd.s $fa3, $fa0, $fs5, $fa3 fmadd.s $fa3, $fa1, $fs6, $fa3 - fmadd.s $ft0, $fs4, $fs3, $fa3 + fmadd.s $ft0, $fs7, $fs3, $fa3 fmadd.s $fa2, $fa0, $fs1, $fa2 - fmadd.s $fa2, $fs4, $fs6, $fa2 - fmadd.s $fa2, $fs7, $fs5, $fa2 + fmadd.s $fa2, $fs7, $fs6, $fa2 + fmadd.s $fa2, $fs4, $fs5, $fa2 fmul.s $ft1, $fa0, $fa6 fmadd.s $ft1, $fa2, $fs0, $ft1 - fmadd.s $ft1, $fa7, $fs7, $ft1 - fnmadd.s $fa3, $fs4, $fs3, $fa3 - fmadd.s $ft1, $fa3, $fs4, $ft1 + fmadd.s $ft1, $fa7, $fs4, $ft1 + fnmadd.s $fa3, $fs7, $fs3, $fa3 + fmadd.s $ft1, $fa3, $fs7, $ft1 fmul.s $fa7, $fa0, $fa7 - fmadd.s $fa7, $fa2, $fs4, $fa7 + fmadd.s $fa7, $fa2, $fs7, $fa7 fmadd.s $fa7, $ft0, $fs0, $fa7 - fnmadd.s $fa5, $fs7, $fs6, $fa5 - fmadd.s $fa5, $fa5, $fs7, $fa7 + fnmadd.s $fa5, $fs4, $fs6, $fa5 + fmadd.s $fa5, $fa5, $fs4, $fa7 fmul.s $fa7, $fa0, $ft0 - fmadd.s $fa7, $fa2, $fs7, $fa7 - fmadd.s $fa7, $fa6, $fs4, $fa7 + fmadd.s $fa7, $fa2, $fs4, $fa7 + fmadd.s $fa7, $fa6, $fs7, $fa7 fnmadd.s $fa4, $fs0, $fs5, $fa4 fmadd.s $fa7, $fa4, $fs0, $fa7 fmul.s $fa1, $fa6, $fa1 fmadd.s $fa0, $fa2, $fa0, $fa1 - fmadd.s $fa0, $fa4, $fs4, $fa0 - fmadd.s $fa0, $fa3, $fs7, $fa0 + fmadd.s $fa0, $fa4, $fs7, $fa0 + fmadd.s $fa0, $fa3, $fs4, $fa0 fmul.s $fa1, $fa5, $fa5 fmadd.s $fa1, $ft1, $ft1, $fa1 fmadd.s $fa1, $fa7, $fa7, $fa1 @@ -1203,12 +1191,12 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp .LBB8_30: # %._crit_edge ld.b $a0, $s3, 40 fld.s $fa6, $sp, 28 # 4-byte Folded Reload - ld.d $a3, $sp, 16 # 8-byte Folded Reload + lu12i.w $a3, 212992 .LBB8_31: andi $a0, $a0, 1 beqz $a0, .LBB8_34 # %bb.32: - fld.s $fa0, $a3, %pc_lo12(.LCPI8_1) + movgr2fr.w $fa0, $a3 fcmp.cule.s $fcc0, $fa6, $fa0 bcnez $fcc0, .LBB8_34 # %bb.33: # %.noexc @@ -1255,10 +1243,10 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp fmov.s $ft9, $fa1 b .LBB8_41 .LBB8_39: - fmul.s $fa0, $fs4, $fs4 + fmul.s $fa0, $fs7, $fs7 fmadd.s $fa0, $fs0, $fs0, $fa0 frsqrt.s $fa0, $fa0 - fneg.s $fa1, $fs4 + fneg.s $fa1, $fs7 fmul.s $fa1, $fa0, $fa1 fst.s $fa1, $sp, 144 # 4-byte Folded Spill fmul.s $fa0, $fs0, $fa0 @@ -1290,8 +1278,8 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp fcmp.clt.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB8_48 # %bb.42: - pcalau12i $a0, %pc_hi20(.LCPI8_5) - fld.s $ft8, $a0, %pc_lo12(.LCPI8_5) + lu12i.w $a0, 165888 + movgr2fr.w $ft8, $a0 fcmp.cult.s $fcc0, $ft8, $ft9 bceqz $fcc0, .LBB8_44 # %bb.43: @@ -1309,10 +1297,11 @@ _ZN23btConvexConvexAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDisp st.d $a1, $sp, 168 b .LBB8_47 .LBB8_44: - pcalau12i $a0, %pc_hi20(.LCPI8_2) - fld.s $ft4, $a0, %pc_lo12(.LCPI8_2) - fabs.s $ft5, $ft0 - fcmp.cule.s $fcc0, $ft5, $ft4 + fabs.s $ft4, $ft0 + lu12i.w $a0, 258896 + ori $a0, $a0, 1267 + movgr2fr.w $ft5, $a0 + fcmp.cule.s $fcc0, $ft4, $ft5 bcnez $fcc0, .LBB8_46 # %bb.45: fmul.s $fa7, $ft0, $ft0 @@ -2492,12 +2481,8 @@ _ZNK11btMatrix3x311getRotationER12btQuaternion: # @_ZNK11btMatrix3x311getRotatio .size _ZNK11btMatrix3x311getRotationER12btQuaternion, .Lfunc_end15-_ZNK11btMatrix3x311getRotationER12btQuaternion .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15btTransformUtil32calculateDiffAxisAngleQuaternionERK12btQuaternionS2_R9btVector3Rf -.LCPI16_0: - .word 0x28800000 # float 1.42108547E-14 .section .text._ZN15btTransformUtil32calculateDiffAxisAngleQuaternionERK12btQuaternionS2_R9btVector3Rf,"axG",@progbits,_ZN15btTransformUtil32calculateDiffAxisAngleQuaternionERK12btQuaternionS2_R9btVector3Rf,comdat - .weak _ZN15btTransformUtil32calculateDiffAxisAngleQuaternionERK12btQuaternionS2_R9btVector3Rf + .weak _ZN15btTransformUtil32calculateDiffAxisAngleQuaternionERK12btQuaternionS2_R9btVector3Rf # -- Begin function _ZN15btTransformUtil32calculateDiffAxisAngleQuaternionERK12btQuaternionS2_R9btVector3Rf .p2align 5 .type _ZN15btTransformUtil32calculateDiffAxisAngleQuaternionERK12btQuaternionS2_R9btVector3Rf,@function _ZN15btTransformUtil32calculateDiffAxisAngleQuaternionERK12btQuaternionS2_R9btVector3Rf: # @_ZN15btTransformUtil32calculateDiffAxisAngleQuaternionERK12btQuaternionS2_R9btVector3Rf @@ -2595,11 +2580,11 @@ _ZN15btTransformUtil32calculateDiffAxisAngleQuaternionERK12btQuaternionS2_R9btVe jirl $ra, $ra, 0 fadd.s $fa0, $fa0, $fa0 fst.s $fa0, $s0, 0 - pcalau12i $a0, %pc_hi20(.LCPI16_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI16_0) fmul.s $fa0, $fs1, $fs1 fmadd.s $fa0, $fs0, $fs0, $fa0 fmadd.s $fa0, $fs2, $fs2, $fa0 + lu12i.w $a0, 165888 + movgr2fr.w $fa1, $a0 fcmp.clt.s $fcc0, $fa0, $fa1 st.w $zero, $fp, 12 bceqz $fcc0, .LBB16_5 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexHull.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexHull.s index f0f4193f..a2688c02 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexHull.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexHull.s @@ -2483,12 +2483,6 @@ _ZN11HullLibrary10extrudableEf: # @_ZN11HullLibrary10extrudableEf .word 0x3ca3d70a # float 0.0199999996 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI26_1: - .word 0x3ca3d70a # float 0.0199999996 -.LCPI26_2: - .word 0xbca3d70a # float -0.0199999996 .text .globl _ZN11HullLibrary11FindSimplexEP9btVector3iR20btAlignedObjectArrayIiE .p2align 5 @@ -2588,24 +2582,27 @@ _ZN11HullLibrary11FindSimplexEP9btVector3iR20btAlignedObjectArrayIiE: # @_ZN11Hu .LBB26_4: # %_ZNK9btVector3eqERKS_.exit.thread st.d $a2, $sp, 8 # 8-byte Folded Spill fneg.s $fa0, $fs0 - pcalau12i $a0, %pc_hi20(.LCPI26_1) - fld.s $fa6, $a0, %pc_lo12(.LCPI26_1) - pcalau12i $a0, %pc_hi20(.LCPI26_2) - fld.s $fa7, $a0, %pc_lo12(.LCPI26_2) - fmul.s $ft0, $fa4, $fa0 - fmadd.s $fa1, $fa5, $fa6, $ft0 + fmul.s $fa6, $fa4, $fa0 + lu12i.w $a0, 248381 + ori $a0, $a0, 1802 + movgr2fr.w $fa7, $a0 + fmadd.s $fa1, $fa5, $fa7, $fa6 fmsub.s $fa0, $fa3, $fs0, $fa5 - fmul.s $fa2, $fa3, $fa7 + lu12i.w $a0, -275907 + ori $a0, $a0, 1802 + lu32i.d $a0, 0 + movgr2fr.w $ft0, $a0 + fmul.s $fa2, $fa3, $ft0 fadd.s $fa2, $fa4, $fa2 movfr2gr.s $a0, $fa2 bstrpick.d $a0, $a0, 31, 0 addi.d $a2, $sp, 48 st.d $a0, $sp, 56 - fadd.s $ft0, $fa5, $ft0 - fmul.s $fa5, $fa5, $fa6 + fadd.s $fa6, $fa5, $fa6 + fmul.s $fa5, $fa5, $fa7 fmadd.s $fa5, $fa3, $fs0, $fa5 - fmsub.s $fa4, $fa4, $fa7, $fa3 - movfr2gr.s $a0, $ft0 + fmsub.s $fa4, $fa4, $ft0, $fa3 + movfr2gr.s $a0, $fa6 movfr2gr.s $a1, $fa5 bstrins.d $a0, $a1, 63, 32 movfr2gr.s $a1, $fa4 @@ -2617,7 +2614,7 @@ _ZN11HullLibrary11FindSimplexEP9btVector3iR20btAlignedObjectArrayIiE: # @_ZN11Hu fmadd.s $fa3, $fa2, $fa2, $fa3 fsqrt.s $fa3, $fa3 fmul.s $fa5, $fa5, $fa5 - fmadd.s $fa5, $ft0, $ft0, $fa5 + fmadd.s $fa5, $fa6, $fa6, $fa5 fmadd.s $fa4, $fa4, $fa4, $fa5 fsqrt.s $fa4, $fa4 fcmp.clt.s $fcc0, $fa4, $fa3 @@ -2831,14 +2828,8 @@ _ZN11HullLibrary11FindSimplexEP9btVector3iR20btAlignedObjectArrayIiE: # @_ZN11Hu .size _ZN11HullLibrary11FindSimplexEP9btVector3iR20btAlignedObjectArrayIiE, .Lfunc_end26-_ZN11HullLibrary11FindSimplexEP9btVector3iR20btAlignedObjectArrayIiE .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z12maxdirsteridI9btVector3EiPKT_iRS2_R20btAlignedObjectArrayIiE -.LCPI27_0: - .word 0x3c8efa35 # float 0.0174532924 -.LCPI27_1: - .word 0x3ccccccd # float 0.0250000004 .section .text._Z12maxdirsteridI9btVector3EiPKT_iRS2_R20btAlignedObjectArrayIiE,"axG",@progbits,_Z12maxdirsteridI9btVector3EiPKT_iRS2_R20btAlignedObjectArrayIiE,comdat - .weak _Z12maxdirsteridI9btVector3EiPKT_iRS2_R20btAlignedObjectArrayIiE + .weak _Z12maxdirsteridI9btVector3EiPKT_iRS2_R20btAlignedObjectArrayIiE # -- Begin function _Z12maxdirsteridI9btVector3EiPKT_iRS2_R20btAlignedObjectArrayIiE .p2align 5 .type _Z12maxdirsteridI9btVector3EiPKT_iRS2_R20btAlignedObjectArrayIiE,@function _Z12maxdirsteridI9btVector3EiPKT_iRS2_R20btAlignedObjectArrayIiE: # @_Z12maxdirsteridI9btVector3EiPKT_iRS2_R20btAlignedObjectArrayIiE @@ -2939,11 +2930,13 @@ _Z12maxdirsteridI9btVector3EiPKT_iRS2_R20btAlignedObjectArrayIiE: # @_Z12maxdirs addi.d $a0, $s1, 8 st.d $a0, $sp, 56 # 8-byte Folded Spill addi.w $fp, $zero, -1 - pcalau12i $a0, %pc_hi20(.LCPI27_0) - fld.s $fs5, $a0, %pc_lo12(.LCPI27_0) - pcalau12i $a0, %pc_hi20(.LCPI27_1) - fld.s $fs6, $a0, %pc_lo12(.LCPI27_1) ori $t0, $zero, 3 + lu12i.w $a0, 248047 + ori $a0, $a0, 2613 + movgr2fr.w $fs5, $a0 + lu12i.w $a0, 249036 + ori $a0, $a0, 3277 + movgr2fr.w $fs6, $a0 st.d $a2, $sp, 8 # 8-byte Folded Spill addi.w $s0, $zero, -1 move $s2, $fp @@ -3287,16 +3280,8 @@ _Z12maxdirsteridI9btVector3EiPKT_iRS2_R20btAlignedObjectArrayIiE: # @_Z12maxdirs .size _Z12maxdirsteridI9btVector3EiPKT_iRS2_R20btAlignedObjectArrayIiE, .Lfunc_end27-_Z12maxdirsteridI9btVector3EiPKT_iRS2_R20btAlignedObjectArrayIiE .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN11HullLibrary11calchullgenEP9btVector3ii -.LCPI28_0: - .word 0x3a83126f # float 0.00100000005 -.LCPI28_1: - .word 0x3c23d70a # float 0.00999999977 -.LCPI28_2: - .word 0x3dcccccd # float 0.100000001 .text - .globl _ZN11HullLibrary11calchullgenEP9btVector3ii + .globl _ZN11HullLibrary11calchullgenEP9btVector3ii # -- Begin function _ZN11HullLibrary11calchullgenEP9btVector3ii .p2align 5 .type _ZN11HullLibrary11calchullgenEP9btVector3ii,@function _ZN11HullLibrary11calchullgenEP9btVector3ii: # @_ZN11HullLibrary11calchullgenEP9btVector3ii @@ -3904,12 +3889,13 @@ _ZN11HullLibrary11calchullgenEP9btVector3ii: # @_ZN11HullLibrary11calchullgenEP9 fsub.s $fa1, $fs4, $fs1 fsub.s $fa2, $fs3, $fs2 fmul.s $fa1, $fa1, $fa1 - pcalau12i $a1, %pc_hi20(.LCPI28_0) - fld.s $fa3, $a1, %pc_lo12(.LCPI28_0) fmadd.s $fa0, $fa0, $fa0, $fa1 fmadd.s $fa0, $fa2, $fa2, $fa0 fsqrt.s $fa0, $fa0 - fmul.s $fs0, $fa0, $fa3 + lu12i.w $a1, 239665 + ori $a1, $a1, 623 + movgr2fr.w $fa1, $a1 + fmul.s $fs0, $fa0, $fa1 fld.s $fa0, $sp, 20 # 4-byte Folded Reload fadd.s $fa0, $fs7, $fa0 fld.s $fa1, $sp, 60 # 4-byte Folded Reload @@ -3935,13 +3921,15 @@ _ZN11HullLibrary11calchullgenEP9btVector3ii: # @_ZN11HullLibrary11calchullgenEP9 fmul.s $fs2, $fa1, $fa3 fmul.s $fs3, $fa2, $fa3 lu12i.w $s5, 260096 - pcalau12i $a1, %pc_hi20(.LCPI28_1) - fld.s $fa0, $a1, %pc_lo12(.LCPI28_1) - pcalau12i $a1, %pc_hi20(.LCPI28_2) - fld.s $fa1, $a1, %pc_lo12(.LCPI28_2) movgr2fr.w $fs4, $zero + lu12i.w $a1, 246333 + ori $a1, $a1, 1802 + movgr2fr.w $fa0, $a1 fmul.s $fs5, $fs0, $fa0 fmul.s $fa0, $fs0, $fs0 + lu12i.w $a1, 253132 + ori $a1, $a1, 3277 + movgr2fr.w $fa1, $a1 fmul.s $fs6, $fa0, $fa1 b .LBB28_66 .p2align 4, , 16 @@ -6054,27 +6042,13 @@ GCC_except_table32: .Lttbase3: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_ -.LCPI33_0: - .word 0xff7fffff # float -3.40282347E+38 -.LCPI33_1: - .word 0x7f7fffff # float 3.40282347E+38 -.LCPI33_2: - .word 0x358637bd # float 9.99999997E-7 -.LCPI33_3: - .word 0x3c23d70a # float 0.00999999977 -.LCPI33_4: - .word 0x3d4ccccd # float 0.0500000007 .text - .globl _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_ + .globl _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_ # -- Begin function _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_ .p2align 5 .type _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_,@function _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_ .cfi_startproc # %bb.0: - beqz $a1, .LBB33_60 -# %bb.1: addi.d $sp, $sp, -192 .cfi_def_cfa_offset 192 st.d $ra, $sp, 184 # 8-byte Folded Spill @@ -6115,13 +6089,16 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary .cfi_offset 61, -136 .cfi_offset 62, -144 .cfi_offset 63, -152 + move $fp, $a4 + move $a4, $a1 + beqz $a1, .LBB33_59 +# %bb.1: fmov.s $fs0, $fa0 move $s0, $a5 - move $fp, $a4 move $s2, $a2 move $s3, $a0 ld.w $s5, $a0, 36 - st.d $a1, $sp, 32 # 8-byte Folded Spill + st.d $a4, $sp, 32 # 8-byte Folded Spill bgez $s5, .LBB33_8 # %bb.2: ld.w $a1, $s3, 40 @@ -6153,21 +6130,24 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary jirl $ra, $ra, 0 move $a6, $s4 move $a3, $s1 - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ld.d $a4, $sp, 32 # 8-byte Folded Reload .LBB33_8: # %.loopexit366 st.w $zero, $s3, 36 st.w $zero, $fp, 0 lu12i.w $a0, 260096 - lu52i.d $a2, $a0, 1016 - pcalau12i $a5, %pc_hi20(.LCPI33_0) - fld.s $fa1, $a5, %pc_lo12(.LCPI33_0) - pcalau12i $a4, %pc_hi20(.LCPI33_1) - fld.s $fa0, $a4, %pc_lo12(.LCPI33_1) - st.d $a2, $a6, 0 + lu52i.d $a1, $a0, 1016 + st.d $a1, $a6, 0 st.w $a0, $a6, 8 bstrpick.d $s7, $a3, 31, 0 + lu12i.w $a0, 522239 + ori $a2, $a0, 4095 + movgr2fr.w $fa0, $a2 + lu12i.w $a0, -2049 + ori $a3, $a0, 4095 + lu32i.d $a3, 0 + movgr2fr.w $fa1, $a3 addi.d $a0, $s2, 8 - move $a2, $a1 + move $a1, $a4 fmov.s $fa6, $fa0 fmov.s $fa2, $fa0 fmov.s $fa3, $fa1 @@ -6189,9 +6169,9 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary fsel $fa0, $fa0, $fa5, $fcc0 fcmp.clt.s $fcc0, $fa1, $fa5 fsel $fa1, $fa1, $fa5, $fcc0 - addi.w $a2, $a2, -1 + addi.w $a1, $a1, -1 add.d $a0, $a0, $s7 - bnez $a2, .LBB33_9 + bnez $a1, .LBB33_9 # %bb.10: fsub.s $fa5, $fa4, $fa2 fsub.s $fa4, $fa3, $fa6 @@ -6200,11 +6180,12 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary fmadd.s $fa2, $fa5, $fa7, $fa2 fmadd.s $fa1, $fa4, $fa7, $fa6 fmadd.s $fa0, $fa3, $fa7, $fa0 - ori $a2, $zero, 3 - pcalau12i $a0, %pc_hi20(.LCPI33_2) - bltu $a1, $a2, .LBB33_47 + ori $a1, $zero, 3 + lu12i.w $a0, 219235 + bltu $a4, $a1, .LBB33_47 # %bb.11: - fld.s $fs1, $a0, %pc_lo12(.LCPI33_2) + ori $a1, $a0, 1981 + movgr2fr.w $fs1, $a1 fcmp.clt.s $fcc0, $fa5, $fs1 bcnez $fcc0, .LBB33_47 # %bb.12: @@ -6214,8 +6195,8 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary fcmp.clt.s $fcc0, $fa3, $fs1 bcnez $fcc0, .LBB33_47 # %bb.14: - st.d $a5, $sp, 16 # 8-byte Folded Spill - st.d $a4, $sp, 24 # 8-byte Folded Spill + st.d $a3, $sp, 16 # 8-byte Folded Spill + st.d $a2, $sp, 24 # 8-byte Folded Spill move $s8, $zero fst.s $fa5, $a6, 0 fst.s $fa4, $a6, 4 @@ -6226,29 +6207,29 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary fmul.s $fs5, $fa2, $fs2 fmul.s $fs6, $fa1, $fs3 fmul.s $fs7, $fa0, $fs4 - addi.d $s6, $s0, 8 - ori $a7, $zero, 1 + addi.d $s5, $s0, 8 + ori $a6, $zero, 1 b .LBB33_18 .p2align 4, , 16 .LBB33_15: # in Loop: Header=BB33_18 Depth=1 - ld.w $a6, $s3, 36 + ld.w $a1, $s3, 36 .LBB33_16: # %_ZN20btAlignedObjectArrayIiE10deallocateEv.exit.i.i343 # in Loop: Header=BB33_18 Depth=1 - st.b $a7, $s3, 56 + st.b $a6, $s3, 56 st.d $s4, $s3, 48 st.w $s1, $s3, 40 - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ld.d $a4, $sp, 32 # 8-byte Folded Reload .LBB33_17: # %_ZN20btAlignedObjectArrayIiE9push_backERKi.exit # in Loop: Header=BB33_18 Depth=1 ld.d $a0, $s3, 48 - slli.d $a2, $a6, 2 - stx.w $s5, $a0, $a2 + slli.d $a1, $a1, 2 + stx.w $s6, $a0, $a1 ld.w $a0, $s3, 36 add.d $s2, $s2, $s7 addi.d $a0, $a0, 1 addi.w $s8, $s8, 1 st.w $a0, $s3, 36 - beq $s8, $a1, .LBB33_44 + beq $s8, $a4, .LBB33_44 .LBB33_18: # =>This Loop Header: Depth=1 # Child Loop BB33_21 Depth 2 # Child Loop BB33_36 Depth 2 @@ -6260,17 +6241,17 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary fmul.s $fa0, $fs2, $fa0 fmul.s $fa1, $fs3, $fa1 fmul.s $fa2, $fs4, $fa2 - bstrpick.d $a4, $a0, 31, 0 - move $s5, $zero + bstrpick.d $a1, $a0, 31, 0 + move $s6, $zero beqz $a0, .LBB33_26 # %bb.19: # %.lr.ph.preheader # in Loop: Header=BB33_18 Depth=1 - move $a3, $a4 - move $a2, $s6 + move $a3, $a1 + move $a2, $s5 b .LBB33_21 .p2align 4, , 16 .LBB33_20: # in Loop: Header=BB33_21 Depth=2 - addi.w $s5, $s5, 1 + addi.w $s6, $s6, 1 addi.d $a3, $a3, -1 addi.d $a2, $a2, 16 beqz $a3, .LBB33_28 @@ -6316,34 +6297,34 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary .p2align 4, , 16 .LBB33_26: # %.loopexit # in Loop: Header=BB33_18 Depth=1 - beq $s5, $a0, .LBB33_29 + beq $s6, $a0, .LBB33_29 # %bb.27: # in Loop: Header=BB33_18 Depth=1 - ld.w $a6, $s3, 36 + ld.w $a1, $s3, 36 ld.w $a0, $s3, 40 - bne $a6, $a0, .LBB33_17 + bne $a1, $a0, .LBB33_17 b .LBB33_30 .p2align 4, , 16 .LBB33_28: # in Loop: Header=BB33_18 Depth=1 - move $s5, $a0 + move $s6, $a0 .LBB33_29: # %.loopexit.thread # in Loop: Header=BB33_18 Depth=1 - alsl.d $a2, $a4, $s0, 4 - slli.d $a3, $a4, 4 - fstx.s $fa0, $s0, $a3 + alsl.d $a2, $a1, $s0, 4 + slli.d $a1, $a1, 4 + fstx.s $fa0, $s0, $a1 fst.s $fa1, $a2, 4 fst.s $fa2, $a2, 8 addi.d $a0, $a0, 1 st.w $a0, $fp, 0 - ld.w $a6, $s3, 36 + ld.w $a1, $s3, 36 ld.w $a0, $s3, 40 - bne $a6, $a0, .LBB33_17 + bne $a1, $a0, .LBB33_17 .LBB33_30: # in Loop: Header=BB33_18 Depth=1 - sltui $a0, $a6, 1 - slli.w $a2, $a6, 1 + sltui $a0, $a1, 1 + slli.w $a2, $a1, 1 masknez $a2, $a2, $a0 - maskeqz $a0, $a7, $a0 + maskeqz $a0, $a6, $a0 or $s1, $a0, $a2 - bge $a6, $s1, .LBB33_17 + bge $a1, $s1, .LBB33_17 # %bb.31: # in Loop: Header=BB33_18 Depth=1 beqz $s1, .LBB33_40 # %bb.32: # in Loop: Header=BB33_18 Depth=1 @@ -6351,24 +6332,24 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary ori $a1, $zero, 16 pcaddu18i $ra, %call36(_Z22btAlignedAllocInternalmi) jirl $ra, $ra, 0 - ori $a7, $zero, 1 - ld.w $a6, $s3, 36 + ori $a6, $zero, 1 + ld.w $a1, $s3, 36 move $s4, $a0 ld.d $a0, $s3, 48 - blez $a6, .LBB33_41 + blez $a1, .LBB33_41 .LBB33_33: # %.lr.ph.i.i.i344 # in Loop: Header=BB33_18 Depth=1 move $a2, $zero - ori $a1, $zero, 8 - bltu $a6, $a1, .LBB33_38 + ori $a3, $zero, 8 + bltu $a1, $a3, .LBB33_38 # %bb.34: # %.lr.ph.i.i.i344 # in Loop: Header=BB33_18 Depth=1 sub.d $a3, $s4, $a0 - ori $a1, $zero, 32 - bltu $a3, $a1, .LBB33_38 + ori $a4, $zero, 32 + bltu $a3, $a4, .LBB33_38 # %bb.35: # %vector.ph # in Loop: Header=BB33_18 Depth=1 - bstrpick.d $a2, $a6, 30, 3 + bstrpick.d $a2, $a1, 30, 3 slli.d $a2, $a2, 3 addi.d $a3, $a0, 16 addi.d $a4, $s4, 16 @@ -6387,10 +6368,10 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary bnez $a5, .LBB33_36 # %bb.37: # %middle.block # in Loop: Header=BB33_18 Depth=1 - beq $a2, $a6, .LBB33_42 + beq $a2, $a1, .LBB33_42 .LBB33_38: # %scalar.ph.preheader # in Loop: Header=BB33_18 Depth=1 - sub.d $a1, $a6, $a2 + sub.d $a1, $a1, $a2 alsl.d $a3, $a2, $a0, 2 alsl.d $a2, $a2, $s4, 2 .p2align 4, , 16 @@ -6407,7 +6388,7 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary .LBB33_40: # in Loop: Header=BB33_18 Depth=1 move $s4, $zero ld.d $a0, $s3, 48 - bgtz $a6, .LBB33_33 + bgtz $a1, .LBB33_33 .LBB33_41: # %_ZNK20btAlignedObjectArrayIiE4copyEiiPi.exit.i.i340 # in Loop: Header=BB33_18 Depth=1 beqz $a0, .LBB33_16 @@ -6418,17 +6399,17 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary # %bb.43: # in Loop: Header=BB33_18 Depth=1 pcaddu18i $ra, %call36(_Z21btAlignedFreeInternalPv) jirl $ra, $ra, 0 - ori $a7, $zero, 1 + ori $a6, $zero, 1 b .LBB33_15 .LBB33_44: ld.w $a0, $fp, 0 beqz $a0, .LBB33_49 # %bb.45: # %.lr.ph388.preheader + bstrpick.d $a1, $a0, 31, 0 + ld.d $a3, $sp, 24 # 8-byte Folded Reload + movgr2fr.w $fa0, $a3 ld.d $a2, $sp, 16 # 8-byte Folded Reload - fld.s $fa3, $a2, %pc_lo12(.LCPI33_0) - ld.d $a4, $sp, 24 # 8-byte Folded Reload - fld.s $fa0, $a4, %pc_lo12(.LCPI33_1) - bstrpick.d $a3, $a0, 31, 0 + movgr2fr.w $fa3, $a2 addi.d $a2, $s0, 8 fmov.s $fa1, $fa0 fmov.s $fa2, $fa0 @@ -6452,15 +6433,16 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary fsel $fa0, $fa0, $fa6, $fcc0 fcmp.clt.s $fcc0, $fa3, $fa6 fsel $fa3, $fa3, $fa6, $fcc0 - addi.d $a3, $a3, -1 + addi.d $a1, $a1, -1 addi.d $a2, $a2, 16 - bnez $a3, .LBB33_46 + bnez $a1, .LBB33_46 b .LBB33_50 .LBB33_47: - fld.s $ft0, $a4, %pc_lo12(.LCPI33_1) - fld.s $fa7, $a0, %pc_lo12(.LCPI33_2) + movgr2fr.w $ft0, $a2 fcmp.clt.s $fcc0, $fa5, $ft0 fsel $fa6, $ft0, $fa5, $fcc0 + ori $a0, $a0, 1981 + movgr2fr.w $fa7, $a0 fcmp.clt.s $fcc0, $fa7, $fa5 fsel $fa6, $ft0, $fa6, $fcc0 fcmp.clt.s $fcc0, $fa4, $fa6 @@ -6474,16 +6456,17 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary fcmp.ceq.s $fcc0, $fa6, $ft0 bceqz $fcc0, .LBB33_56 # %bb.48: - pcalau12i $a0, %pc_hi20(.LCPI33_3) - fld.s $fa6, $a0, %pc_lo12(.LCPI33_3) + lu12i.w $a0, 246333 + ori $a0, $a0, 1802 + movgr2fr.w $fa6, $a0 fmov.s $fa7, $fa6 fmov.s $fa5, $fa6 b .LBB33_58 .LBB33_49: - ld.d $a2, $sp, 16 # 8-byte Folded Reload - fld.s $fa3, $a2, %pc_lo12(.LCPI33_0) - ld.d $a4, $sp, 24 # 8-byte Folded Reload - fld.s $fa0, $a4, %pc_lo12(.LCPI33_1) + ld.d $a3, $sp, 24 # 8-byte Folded Reload + movgr2fr.w $fa0, $a3 + ld.d $a1, $sp, 16 # 8-byte Folded Reload + movgr2fr.w $fa3, $a1 fmov.s $fa1, $fa0 fmov.s $fa2, $fa0 fmov.s $fa4, $fa3 @@ -6491,9 +6474,9 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary .LBB33_50: # %._crit_edge fsub.s $fa5, $fa5, $fa2 fsub.s $fa4, $fa4, $fa1 - ori $a2, $zero, 3 + ori $a1, $zero, 3 fsub.s $fa3, $fa3, $fa0 - bltu $a0, $a2, .LBB33_54 + bltu $a0, $a1, .LBB33_54 # %bb.51: # %._crit_edge fcmp.clt.s $fcc0, $fa5, $fs1 bcnez $fcc0, .LBB33_54 @@ -6504,7 +6487,7 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary fcmp.clt.s $fcc0, $fa3, $fs1 bceqz $fcc0, .LBB33_59 .LBB33_54: - fld.s $fa7, $a4, %pc_lo12(.LCPI33_1) + movgr2fr.w $fa7, $a3 fcmp.clt.s $fcc0, $fa5, $fa7 fsel $fa6, $fa7, $fa5, $fcc0 fcmp.cle.s $fcc0, $fs1, $fa5 @@ -6522,16 +6505,18 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary fmadd.s $fa1, $fa4, $ft0, $fa1 fcmp.ceq.s $fcc0, $fa6, $fa7 fmadd.s $fa0, $fa3, $ft0, $fa0 - bceqz $fcc0, .LBB33_61 + bceqz $fcc0, .LBB33_60 # %bb.55: - pcalau12i $a0, %pc_hi20(.LCPI33_3) - fld.s $fa5, $a0, %pc_lo12(.LCPI33_3) + lu12i.w $a0, 246333 + ori $a0, $a0, 1802 + movgr2fr.w $fa5, $a0 fmov.s $fa7, $fa5 fmov.s $fa6, $fa5 b .LBB33_58 .LBB33_56: - pcalau12i $a0, %pc_hi20(.LCPI33_4) - fld.s $ft0, $a0, %pc_lo12(.LCPI33_4) + lu12i.w $a0, 251084 + ori $a0, $a0, 3277 + movgr2fr.w $ft0, $a0 fmul.s $fa6, $fa6, $ft0 fcmp.clt.s $fcc0, $fa5, $fa7 fsel $fa5, $fa5, $fa6, $fcc0 @@ -6575,6 +6560,7 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary ori $a0, $zero, 8 st.w $a0, $fp, 0 .LBB33_59: + sltu $a0, $zero, $a4 fld.d $fs7, $sp, 40 # 8-byte Folded Reload fld.d $fs6, $sp, 48 # 8-byte Folded Reload fld.d $fs5, $sp, 56 # 8-byte Folded Reload @@ -6595,12 +6581,11 @@ _ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_: # @_ZN11HullLibrary ld.d $fp, $sp, 176 # 8-byte Folded Reload ld.d $ra, $sp, 184 # 8-byte Folded Reload addi.d $sp, $sp, 192 -.LBB33_60: - sltu $a0, $zero, $a1 ret -.LBB33_61: - pcalau12i $a0, %pc_hi20(.LCPI33_4) - fld.s $fa7, $a0, %pc_lo12(.LCPI33_4) +.LBB33_60: + lu12i.w $a0, 251084 + ori $a0, $a0, 3277 + movgr2fr.w $fa7, $a0 fmul.s $fa6, $fa6, $fa7 fcmp.clt.s $fcc0, $fa5, $fs1 fsel $fa5, $fa5, $fa6, $fcc0 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexHullShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexHullShape.s index b7baa2e1..f40ca517 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexHullShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexHullShape.s @@ -347,84 +347,81 @@ _ZN17btConvexHullShape8addPointERK9btVector3: # @_ZN17btConvexHullShape8addPoint .size _ZN17btConvexHullShape8addPointERK9btVector3, .Lfunc_end4-_ZN17btConvexHullShape8addPointERK9btVector3 .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK17btConvexHullShape37localGetSupportingVertexWithoutMarginERK9btVector3 -.LCPI5_0: - .word 0x38d1b717 # float 9.99999974E-5 -.LCPI5_1: - .word 0xdd5e0b6b # float -9.99999984E+17 - .text - .globl _ZNK17btConvexHullShape37localGetSupportingVertexWithoutMarginERK9btVector3 + .globl _ZNK17btConvexHullShape37localGetSupportingVertexWithoutMarginERK9btVector3 # -- Begin function _ZNK17btConvexHullShape37localGetSupportingVertexWithoutMarginERK9btVector3 .p2align 5 .type _ZNK17btConvexHullShape37localGetSupportingVertexWithoutMarginERK9btVector3,@function _ZNK17btConvexHullShape37localGetSupportingVertexWithoutMarginERK9btVector3: # @_ZNK17btConvexHullShape37localGetSupportingVertexWithoutMarginERK9btVector3 # %bb.0: - fld.s $fa0, $a1, 0 - fld.s $fa2, $a1, 4 - fld.s $fa3, $a1, 8 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - fld.s $fa4, $a1, %pc_lo12(.LCPI5_0) - fmul.s $fa1, $fa2, $fa2 - fmadd.s $fa1, $fa0, $fa0, $fa1 - fmadd.s $fa1, $fa3, $fa3, $fa1 - fcmp.clt.s $fcc0, $fa1, $fa4 + fld.s $fa0, $a1, 4 + fld.s $fa1, $a1, 0 + fld.s $fa2, $a1, 8 + fmul.s $fa3, $fa0, $fa0 + fmadd.s $fa3, $fa1, $fa1, $fa3 + fmadd.s $fa3, $fa2, $fa2, $fa3 + lu12i.w $a1, 232731 + ori $a1, $a1, 1815 + movgr2fr.w $fa4, $a1 + fcmp.clt.s $fcc0, $fa3, $fa4 + move $a1, $a0 bceqz $fcc0, .LBB5_3 # %bb.1: movgr2fr.w $fa0, $zero vldi $vr1, -1168 fmov.s $fa2, $fa0 - ld.w $a1, $a0, 108 - bgtz $a1, .LBB5_4 + ld.w $a2, $a1, 108 + bgtz $a2, .LBB5_4 .LBB5_2: move $a1, $zero move $a0, $zero ret .LBB5_3: - frsqrt.s $fa4, $fa1 - fmul.s $fa1, $fa0, $fa4 - fmul.s $fa0, $fa2, $fa4 - fmul.s $fa2, $fa3, $fa4 - ld.w $a1, $a0, 108 - blez $a1, .LBB5_2 + frsqrt.s $fa3, $fa3 + fmul.s $fa1, $fa1, $fa3 + fmul.s $fa0, $fa0, $fa3 + fmul.s $fa2, $fa2, $fa3 + ld.w $a2, $a1, 108 + blez $a2, .LBB5_2 .LBB5_4: # %.lr.ph - ld.d $a2, $a0, 120 - fld.s $fa3, $a0, 24 - fld.s $fa4, $a0, 28 - fld.s $fa5, $a0, 32 - pcalau12i $a0, %pc_hi20(.LCPI5_1) - fld.s $fa6, $a0, %pc_lo12(.LCPI5_1) move $a0, $zero - movgr2fr.w $fa7, $zero - addi.d $a2, $a2, 8 - fmov.s $ft0, $fa7 + ld.d $a3, $a1, 120 + fld.s $fa3, $a1, 24 + fld.s $fa4, $a1, 28 + fld.s $fa5, $a1, 32 + addi.d $a1, $a3, 8 + movgr2fr.w $fa6, $zero + lu12i.w $a3, -141856 + ori $a3, $a3, 2923 + lu32i.d $a3, 0 + movgr2fr.w $ft0, $a3 + fmov.s $fa7, $fa6 b .LBB5_6 .p2align 4, , 16 .LBB5_5: # in Loop: Header=BB5_6 Depth=1 - addi.d $a1, $a1, -1 - addi.d $a2, $a2, 16 - beqz $a1, .LBB5_8 + addi.d $a2, $a2, -1 + addi.d $a1, $a1, 16 + beqz $a2, .LBB5_8 .LBB5_6: # =>This Inner Loop Header: Depth=1 - fld.s $ft1, $a2, -8 - fld.s $ft2, $a2, -4 - fld.s $ft4, $a2, 0 + fld.s $ft1, $a1, -8 + fld.s $ft2, $a1, -4 + fld.s $ft4, $a1, 0 fmul.s $ft3, $ft1, $fa3 fmul.s $ft1, $ft2, $fa4 fmul.s $ft2, $ft4, $fa5 fmul.s $ft4, $fa0, $ft1 fmadd.s $ft4, $fa1, $ft3, $ft4 fmadd.s $ft4, $fa2, $ft2, $ft4 - fcmp.cule.s $fcc0, $ft4, $fa6 + fcmp.cule.s $fcc0, $ft4, $ft0 bcnez $fcc0, .LBB5_5 # %bb.7: # in Loop: Header=BB5_6 Depth=1 movfr2gr.s $a0, $ft3 - fmov.s $ft0, $ft1 - fmov.s $fa7, $ft2 - fmov.s $fa6, $ft4 + fmov.s $fa7, $ft1 + fmov.s $fa6, $ft2 + fmov.s $ft0, $ft4 b .LBB5_5 .LBB5_8: # %._crit_edge.loopexit - movfr2gr.s $a1, $ft0 - bstrins.d $a0, $a1, 63, 32 movfr2gr.s $a1, $fa7 + bstrins.d $a0, $a1, 63, 32 + movfr2gr.s $a1, $fa6 bstrpick.d $a1, $a1, 31, 0 ret .Lfunc_end5: @@ -535,12 +532,7 @@ _ZNK17btConvexHullShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9bt .Lfunc_end6: .size _ZNK17btConvexHullShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i, .Lfunc_end6-_ZNK17btConvexHullShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK17btConvexHullShape24localGetSupportingVertexERK9btVector3 -.LCPI7_0: - .word 0x28800000 # float 1.42108547E-14 - .text - .globl _ZNK17btConvexHullShape24localGetSupportingVertexERK9btVector3 + .globl _ZNK17btConvexHullShape24localGetSupportingVertexERK9btVector3 # -- Begin function _ZNK17btConvexHullShape24localGetSupportingVertexERK9btVector3 .p2align 5 .type _ZNK17btConvexHullShape24localGetSupportingVertexERK9btVector3,@function _ZNK17btConvexHullShape24localGetSupportingVertexERK9btVector3: # @_ZNK17btConvexHullShape24localGetSupportingVertexERK9btVector3 @@ -591,28 +583,28 @@ _ZNK17btConvexHullShape24localGetSupportingVertexERK9btVector3: # @_ZNK17btConve # %bb.1: movgr2fr.w $fs0, $fp movgr2fr.w $fs1, $s3 - movgr2fr.w $fs2, $s1 - fld.s $fa0, $s2, 0 - fld.s $fa1, $s2, 4 + fld.s $fa0, $s2, 4 + fld.s $fa1, $s2, 0 fld.s $fa2, $s2, 8 - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.s $fa3, $a0, %pc_lo12(.LCPI7_0) - fmul.s $fa4, $fa1, $fa1 - fmadd.s $fa4, $fa0, $fa0, $fa4 - fmadd.s $fa4, $fa2, $fa2, $fa4 - fcmp.clt.s $fcc0, $fa4, $fa3 + movgr2fr.w $fs2, $s1 + fmul.s $fa3, $fa0, $fa0 + fmadd.s $fa3, $fa1, $fa1, $fa3 + fmadd.s $fa3, $fa2, $fa2, $fa3 + lu12i.w $a0, 165888 + movgr2fr.w $fa4, $a0 + fcmp.clt.s $fcc0, $fa3, $fa4 vldi $vr3, -1040 - fsel $fa0, $fa0, $fa3, $fcc0 fsel $fa1, $fa1, $fa3, $fcc0 + fsel $fa0, $fa0, $fa3, $fcc0 fsel $fa2, $fa2, $fa3, $fcc0 - fmul.s $fa3, $fa1, $fa1 + fmul.s $fa3, $fa0, $fa0 ld.d $a0, $s0, 0 - fmadd.s $fa3, $fa0, $fa0, $fa3 + fmadd.s $fa3, $fa1, $fa1, $fa3 fmadd.s $fa3, $fa2, $fa2, $fa3 frsqrt.s $fa3, $fa3 ld.d $a1, $a0, 88 - fmul.s $fs3, $fa0, $fa3 - fmul.s $fs4, $fa1, $fa3 + fmul.s $fs3, $fa1, $fa3 + fmul.s $fs4, $fa0, $fa3 fmul.s $fs5, $fa2, $fa3 move $a0, $s0 jirl $ra, $a1, 0 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexInternalShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexInternalShape.s index f8ae5675..8f296896 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexInternalShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexInternalShape.s @@ -275,12 +275,7 @@ _ZNK21btConvexInternalShape11getAabbSlowERK11btTransformR9btVector3S4_: # @_ZNK2 .size _ZNK21btConvexInternalShape11getAabbSlowERK11btTransformR9btVector3S4_, .Lfunc_end2-_ZNK21btConvexInternalShape11getAabbSlowERK11btTransformR9btVector3S4_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK21btConvexInternalShape24localGetSupportingVertexERK9btVector3 -.LCPI3_0: - .word 0x28800000 # float 1.42108547E-14 - .text - .globl _ZNK21btConvexInternalShape24localGetSupportingVertexERK9btVector3 + .globl _ZNK21btConvexInternalShape24localGetSupportingVertexERK9btVector3 # -- Begin function _ZNK21btConvexInternalShape24localGetSupportingVertexERK9btVector3 .p2align 5 .type _ZNK21btConvexInternalShape24localGetSupportingVertexERK9btVector3,@function _ZNK21btConvexInternalShape24localGetSupportingVertexERK9btVector3: # @_ZNK21btConvexInternalShape24localGetSupportingVertexERK9btVector3 @@ -331,28 +326,28 @@ _ZNK21btConvexInternalShape24localGetSupportingVertexERK9btVector3: # @_ZNK21btC # %bb.1: movgr2fr.w $fs0, $fp movgr2fr.w $fs1, $s3 - movgr2fr.w $fs2, $s1 - fld.s $fa0, $s2, 0 - fld.s $fa1, $s2, 4 + fld.s $fa0, $s2, 4 + fld.s $fa1, $s2, 0 fld.s $fa2, $s2, 8 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.s $fa3, $a0, %pc_lo12(.LCPI3_0) - fmul.s $fa4, $fa1, $fa1 - fmadd.s $fa4, $fa0, $fa0, $fa4 - fmadd.s $fa4, $fa2, $fa2, $fa4 - fcmp.clt.s $fcc0, $fa4, $fa3 + movgr2fr.w $fs2, $s1 + fmul.s $fa3, $fa0, $fa0 + fmadd.s $fa3, $fa1, $fa1, $fa3 + fmadd.s $fa3, $fa2, $fa2, $fa3 + lu12i.w $a0, 165888 + movgr2fr.w $fa4, $a0 + fcmp.clt.s $fcc0, $fa3, $fa4 vldi $vr3, -1040 - fsel $fa0, $fa0, $fa3, $fcc0 fsel $fa1, $fa1, $fa3, $fcc0 + fsel $fa0, $fa0, $fa3, $fcc0 fsel $fa2, $fa2, $fa3, $fcc0 - fmul.s $fa3, $fa1, $fa1 + fmul.s $fa3, $fa0, $fa0 ld.d $a0, $s0, 0 - fmadd.s $fa3, $fa0, $fa0, $fa3 + fmadd.s $fa3, $fa1, $fa1, $fa3 fmadd.s $fa3, $fa2, $fa2, $fa3 frsqrt.s $fa3, $fa3 ld.d $a1, $a0, 88 - fmul.s $fs3, $fa0, $fa3 - fmul.s $fs4, $fa1, $fa3 + fmul.s $fs3, $fa1, $fa3 + fmul.s $fs4, $fa0, $fa3 fmul.s $fs5, $fa2, $fa3 move $a0, $s0 jirl $ra, $a1, 0 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexPlaneCollisionAlgorithm.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexPlaneCollisionAlgorithm.s index 7c2b188a..e3d75393 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexPlaneCollisionAlgorithm.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexPlaneCollisionAlgorithm.s @@ -591,14 +591,6 @@ _ZN31btConvexPlaneCollisionAlgorithm20collideSingleContactERK12btQuaternionP17bt .word 0x00000000 # float 0 .word 0x00000000 # float 0 .word 0x3f800000 # float 1 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI4_1: - .word 0x3f3504f3 # float 0.707106769 -.LCPI4_2: - .word 0x3ec90fdb # float 0.392699093 -.LCPI4_3: - .word 0x40c90fdb # float 6.28318548 .text .globl _ZN31btConvexPlaneCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult .p2align 5 @@ -670,10 +662,11 @@ _ZN31btConvexPlaneCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK bge $a0, $a1, .LBB4_8 # %bb.2: fld.s $fa0, $s4, 68 - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.s $fa1, $a0, %pc_lo12(.LCPI4_1) - fabs.s $fa2, $fa0 - fcmp.cule.s $fcc0, $fa2, $fa1 + fabs.s $fa1, $fa0 + lu12i.w $a0, 258896 + ori $a0, $a0, 1267 + movgr2fr.w $fa2, $a0 + fcmp.cule.s $fcc0, $fa1, $fa2 bcnez $fcc0, .LBB4_4 # %bb.3: fld.s $fa1, $s4, 64 @@ -681,8 +674,8 @@ _ZN31btConvexPlaneCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK fmadd.s $fa2, $fa1, $fa1, $fa2 frsqrt.s $fa2, $fa2 fneg.s $fa0, $fa0 - fmul.s $fs4, $fa2, $fa0 - fmul.s $fs5, $fa1, $fa2 + fmul.s $fs3, $fa2, $fa0 + fmul.s $fs4, $fa1, $fa2 movgr2fr.w $fs2, $zero b .LBB4_5 .LBB4_4: @@ -693,8 +686,8 @@ _ZN31btConvexPlaneCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK frsqrt.s $fa2, $fa2 fneg.s $fa0, $fa0 fmul.s $fs2, $fa2, $fa0 - fmul.s $fs4, $fa1, $fa2 - movgr2fr.w $fs5, $zero + fmul.s $fs3, $fa1, $fa2 + movgr2fr.w $fs4, $zero .LBB4_5: # %_Z13btPlaneSpace1RK9btVector3RS_S2_.exit ld.d $a0, $s3, 0 ld.d $a1, $a0, 32 @@ -703,11 +696,12 @@ _ZN31btConvexPlaneCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK pcalau12i $a0, %got_pc_hi20(gContactBreakingThreshold) ld.d $a0, $a0, %got_pc_lo12(gContactBreakingThreshold) fld.s $fa1, $a0, 0 - pcalau12i $a0, %pc_hi20(.LCPI4_2) - fld.s $fa2, $a0, %pc_lo12(.LCPI4_2) fdiv.s $fa0, $fa1, $fa0 - fcmp.clt.s $fcc0, $fa2, $fa0 - fsel $fa0, $fa0, $fa2, $fcc0 + lu12i.w $a0, 257168 + ori $a0, $a0, 4059 + movgr2fr.w $fa1, $a0 + fcmp.clt.s $fcc0, $fa1, $fa0 + fsel $fa0, $fa0, $fa1, $fcc0 vldi $vr1, -1184 fmul.s $fs0, $fa0, $fa1 fmov.s $fa0, $fs0 @@ -722,18 +716,19 @@ _ZN31btConvexPlaneCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK # %bb.6: # %.lr.ph fmov.s $fs7, $fa0 move $s3, $zero - fmul.s $fa0, $fs4, $fs4 + fmul.s $fa0, $fs3, $fs3 fmadd.s $fa0, $fs2, $fs2, $fa0 - fmadd.s $fa0, $fs5, $fs5, $fa0 + fmadd.s $fa0, $fs4, $fs4, $fa0 fsqrt.s $fa0, $fa0 fdiv.s $fa0, $fs1, $fa0 fmul.s $fa1, $fs2, $fa0 fst.s $fa1, $sp, 12 # 4-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI4_3) - fld.s $fa1, $a1, %pc_lo12(.LCPI4_3) - fst.s $fa1, $sp, 8 # 4-byte Folded Spill + fmul.s $fs3, $fs3, $fa0 fmul.s $fs4, $fs4, $fa0 - fmul.s $fs5, $fs5, $fa0 + lu12i.w $a1, 265360 + ori $a1, $a1, 4059 + movgr2fr.w $fa0, $a1 + fst.s $fa0, $sp, 8 # 4-byte Folded Spill .p2align 4, , 16 .LBB4_7: # =>This Inner Loop Header: Depth=1 bstrpick.d $a1, $s3, 31, 0 @@ -746,11 +741,11 @@ _ZN31btConvexPlaneCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK fld.s $fs6, $s4, 64 fmov.s $fs2, $fs7 fld.s $fs7, $s4, 60 - fld.s $fs3, $s4, 68 + fld.s $fs5, $s4, 68 fmul.s $fa0, $fa1, $fa0 fmul.s $fa1, $fs6, $fs6 fmadd.s $fa1, $fs7, $fs7, $fa1 - fmadd.s $fa1, $fs3, $fs3, $fa1 + fmadd.s $fa1, $fs5, $fs5, $fa1 fsqrt.s $fs0, $fa1 vldi $vr1, -1184 fmul.s $fs1, $fa0, $fa1 @@ -761,49 +756,49 @@ _ZN31btConvexPlaneCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK fmul.s $fs0, $fs7, $fa0 fmov.s $fs7, $fs2 fmul.s $fs6, $fs6, $fa0 - fmul.s $fs3, $fs3, $fa0 + fmul.s $fs5, $fs5, $fa0 fmov.s $fa0, $fs1 pcaddu18i $ra, %call36(cosf) jirl $ra, $ra, 0 fneg.s $fa1, $fs0 fneg.s $fa2, $fs6 - fneg.s $fa3, $fs3 + fneg.s $fa3, $fs5 fmul.s $fa4, $fs2, $fa1 fld.s $ft2, $sp, 12 # 4-byte Folded Reload fmadd.s $fa4, $fa0, $ft2, $fa4 - fmadd.s $fa4, $fa2, $fs5, $fa4 - fmadd.s $fa5, $fs3, $fs4, $fa4 + fmadd.s $fa4, $fa2, $fs4, $fa4 + fmadd.s $fa5, $fs5, $fs3, $fa4 fmul.s $fa2, $fs2, $fa2 - fmadd.s $fa2, $fa0, $fs4, $fa2 + fmadd.s $fa2, $fa0, $fs3, $fa2 fmadd.s $fa2, $fa3, $ft2, $fa2 - fmadd.s $fa6, $fs0, $fs5, $fa2 + fmadd.s $fa6, $fs0, $fs4, $fa2 fmul.s $fa3, $fs2, $fa3 - fmadd.s $fa3, $fa0, $fs5, $fa3 - fmadd.s $fa3, $fa1, $fs4, $fa3 + fmadd.s $fa3, $fa0, $fs4, $fa3 + fmadd.s $fa3, $fa1, $fs3, $fa3 fmadd.s $fa7, $fs6, $ft2, $fa3 fmul.s $ft0, $ft2, $fs0 fmadd.s $ft0, $fa0, $fs2, $ft0 - fmadd.s $ft0, $fs6, $fs4, $ft0 - fmadd.s $ft0, $fs3, $fs5, $ft0 + fmadd.s $ft0, $fs6, $fs3, $ft0 + fmadd.s $ft0, $fs5, $fs4, $ft0 fmul.s $ft1, $fa0, $fa5 fmadd.s $ft1, $ft0, $fs0, $ft1 - fmadd.s $ft1, $fa6, $fs3, $ft1 + fmadd.s $ft1, $fa6, $fs5, $ft1 fnmadd.s $fa3, $fs6, $ft2, $fa3 fmadd.s $ft1, $fa3, $fs6, $ft1 fmul.s $fa6, $fa0, $fa6 fmadd.s $fa6, $ft0, $fs6, $fa6 fmadd.s $fa6, $fa7, $fs0, $fa6 - fnmadd.s $fa4, $fs3, $fs4, $fa4 - fmadd.s $fa4, $fa4, $fs3, $fa6 + fnmadd.s $fa4, $fs5, $fs3, $fa4 + fmadd.s $fa4, $fa4, $fs5, $fa6 fmul.s $fa6, $fa0, $fa7 - fmadd.s $fa6, $ft0, $fs3, $fa6 + fmadd.s $fa6, $ft0, $fs5, $fa6 fmadd.s $fa6, $fa5, $fs6, $fa6 - fnmadd.s $fa2, $fs0, $fs5, $fa2 + fnmadd.s $fa2, $fs0, $fs4, $fa2 fmadd.s $fa6, $fa2, $fs0, $fa6 fmul.s $fa1, $fa5, $fa1 fmadd.s $fa0, $ft0, $fa0, $fa1 fmadd.s $fa0, $fa2, $fs6, $fa0 - fmadd.s $fa0, $fa3, $fs3, $fa0 + fmadd.s $fa0, $fa3, $fs5, $fa0 movfr2gr.s $a0, $ft1 movfr2gr.s $a1, $fa4 bstrins.d $a0, $a1, 63, 32 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexPointCloudShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexPointCloudShape.s index 92bb815d..5f38ddcd 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexPointCloudShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexPointCloudShape.s @@ -14,84 +14,81 @@ _ZN23btConvexPointCloudShape15setLocalScalingERK9btVector3: # @_ZN23btConvexPoin .size _ZN23btConvexPointCloudShape15setLocalScalingERK9btVector3, .Lfunc_end0-_ZN23btConvexPointCloudShape15setLocalScalingERK9btVector3 .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK23btConvexPointCloudShape37localGetSupportingVertexWithoutMarginERK9btVector3 -.LCPI1_0: - .word 0x38d1b717 # float 9.99999974E-5 -.LCPI1_1: - .word 0xdd5e0b6b # float -9.99999984E+17 - .text - .globl _ZNK23btConvexPointCloudShape37localGetSupportingVertexWithoutMarginERK9btVector3 + .globl _ZNK23btConvexPointCloudShape37localGetSupportingVertexWithoutMarginERK9btVector3 # -- Begin function _ZNK23btConvexPointCloudShape37localGetSupportingVertexWithoutMarginERK9btVector3 .p2align 5 .type _ZNK23btConvexPointCloudShape37localGetSupportingVertexWithoutMarginERK9btVector3,@function _ZNK23btConvexPointCloudShape37localGetSupportingVertexWithoutMarginERK9btVector3: # @_ZNK23btConvexPointCloudShape37localGetSupportingVertexWithoutMarginERK9btVector3 # %bb.0: - fld.s $fa0, $a1, 0 - fld.s $fa2, $a1, 4 - fld.s $fa3, $a1, 8 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.s $fa4, $a1, %pc_lo12(.LCPI1_0) - fmul.s $fa1, $fa2, $fa2 - fmadd.s $fa1, $fa0, $fa0, $fa1 - fmadd.s $fa1, $fa3, $fa3, $fa1 - fcmp.clt.s $fcc0, $fa1, $fa4 + fld.s $fa0, $a1, 4 + fld.s $fa1, $a1, 0 + fld.s $fa2, $a1, 8 + fmul.s $fa3, $fa0, $fa0 + fmadd.s $fa3, $fa1, $fa1, $fa3 + fmadd.s $fa3, $fa2, $fa2, $fa3 + lu12i.w $a1, 232731 + ori $a1, $a1, 1815 + movgr2fr.w $fa4, $a1 + fcmp.clt.s $fcc0, $fa3, $fa4 + move $a1, $a0 bceqz $fcc0, .LBB1_3 # %bb.1: movgr2fr.w $fa0, $zero vldi $vr1, -1168 fmov.s $fa2, $fa0 - ld.w $a1, $a0, 112 - bgtz $a1, .LBB1_4 + ld.w $a2, $a1, 112 + bgtz $a2, .LBB1_4 .LBB1_2: move $a1, $zero move $a0, $zero ret .LBB1_3: - frsqrt.s $fa4, $fa1 - fmul.s $fa1, $fa0, $fa4 - fmul.s $fa0, $fa2, $fa4 - fmul.s $fa2, $fa3, $fa4 - ld.w $a1, $a0, 112 - blez $a1, .LBB1_2 + frsqrt.s $fa3, $fa3 + fmul.s $fa1, $fa1, $fa3 + fmul.s $fa0, $fa0, $fa3 + fmul.s $fa2, $fa2, $fa3 + ld.w $a2, $a1, 112 + blez $a2, .LBB1_2 .LBB1_4: # %.lr.ph - ld.d $a2, $a0, 104 - fld.s $fa3, $a0, 24 - fld.s $fa4, $a0, 28 - fld.s $fa5, $a0, 32 - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.s $fa6, $a0, %pc_lo12(.LCPI1_1) move $a0, $zero - movgr2fr.w $fa7, $zero - addi.d $a2, $a2, 8 - fmov.s $ft0, $fa7 + ld.d $a3, $a1, 104 + fld.s $fa3, $a1, 24 + fld.s $fa4, $a1, 28 + fld.s $fa5, $a1, 32 + addi.d $a1, $a3, 8 + movgr2fr.w $fa6, $zero + lu12i.w $a3, -141856 + ori $a3, $a3, 2923 + lu32i.d $a3, 0 + movgr2fr.w $ft0, $a3 + fmov.s $fa7, $fa6 b .LBB1_6 .p2align 4, , 16 .LBB1_5: # in Loop: Header=BB1_6 Depth=1 - addi.d $a1, $a1, -1 - addi.d $a2, $a2, 16 - beqz $a1, .LBB1_8 + addi.d $a2, $a2, -1 + addi.d $a1, $a1, 16 + beqz $a2, .LBB1_8 .LBB1_6: # =>This Inner Loop Header: Depth=1 - fld.s $ft1, $a2, -8 - fld.s $ft2, $a2, -4 - fld.s $ft4, $a2, 0 + fld.s $ft1, $a1, -8 + fld.s $ft2, $a1, -4 + fld.s $ft4, $a1, 0 fmul.s $ft3, $ft1, $fa3 fmul.s $ft1, $ft2, $fa4 fmul.s $ft2, $ft4, $fa5 fmul.s $ft4, $fa0, $ft1 fmadd.s $ft4, $fa1, $ft3, $ft4 fmadd.s $ft4, $fa2, $ft2, $ft4 - fcmp.cule.s $fcc0, $ft4, $fa6 + fcmp.cule.s $fcc0, $ft4, $ft0 bcnez $fcc0, .LBB1_5 # %bb.7: # in Loop: Header=BB1_6 Depth=1 movfr2gr.s $a0, $ft3 - fmov.s $ft0, $ft1 - fmov.s $fa7, $ft2 - fmov.s $fa6, $ft4 + fmov.s $fa7, $ft1 + fmov.s $fa6, $ft2 + fmov.s $ft0, $ft4 b .LBB1_5 .LBB1_8: # %._crit_edge.loopexit - movfr2gr.s $a1, $ft0 - bstrins.d $a0, $a1, 63, 32 movfr2gr.s $a1, $fa7 + bstrins.d $a0, $a1, 63, 32 + movfr2gr.s $a1, $fa6 bstrpick.d $a1, $a1, 31, 0 ret .Lfunc_end1: @@ -202,12 +199,7 @@ _ZNK23btConvexPointCloudShape49batchedUnitVectorGetSupportingVertexWithoutMargin .Lfunc_end2: .size _ZNK23btConvexPointCloudShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i, .Lfunc_end2-_ZNK23btConvexPointCloudShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK23btConvexPointCloudShape24localGetSupportingVertexERK9btVector3 -.LCPI3_0: - .word 0x28800000 # float 1.42108547E-14 - .text - .globl _ZNK23btConvexPointCloudShape24localGetSupportingVertexERK9btVector3 + .globl _ZNK23btConvexPointCloudShape24localGetSupportingVertexERK9btVector3 # -- Begin function _ZNK23btConvexPointCloudShape24localGetSupportingVertexERK9btVector3 .p2align 5 .type _ZNK23btConvexPointCloudShape24localGetSupportingVertexERK9btVector3,@function _ZNK23btConvexPointCloudShape24localGetSupportingVertexERK9btVector3: # @_ZNK23btConvexPointCloudShape24localGetSupportingVertexERK9btVector3 @@ -258,28 +250,28 @@ _ZNK23btConvexPointCloudShape24localGetSupportingVertexERK9btVector3: # @_ZNK23b # %bb.1: movgr2fr.w $fs0, $fp movgr2fr.w $fs1, $s3 - movgr2fr.w $fs2, $s1 - fld.s $fa0, $s2, 0 - fld.s $fa1, $s2, 4 + fld.s $fa0, $s2, 4 + fld.s $fa1, $s2, 0 fld.s $fa2, $s2, 8 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.s $fa3, $a0, %pc_lo12(.LCPI3_0) - fmul.s $fa4, $fa1, $fa1 - fmadd.s $fa4, $fa0, $fa0, $fa4 - fmadd.s $fa4, $fa2, $fa2, $fa4 - fcmp.clt.s $fcc0, $fa4, $fa3 + movgr2fr.w $fs2, $s1 + fmul.s $fa3, $fa0, $fa0 + fmadd.s $fa3, $fa1, $fa1, $fa3 + fmadd.s $fa3, $fa2, $fa2, $fa3 + lu12i.w $a0, 165888 + movgr2fr.w $fa4, $a0 + fcmp.clt.s $fcc0, $fa3, $fa4 vldi $vr3, -1040 - fsel $fa0, $fa0, $fa3, $fcc0 fsel $fa1, $fa1, $fa3, $fcc0 + fsel $fa0, $fa0, $fa3, $fcc0 fsel $fa2, $fa2, $fa3, $fcc0 - fmul.s $fa3, $fa1, $fa1 + fmul.s $fa3, $fa0, $fa0 ld.d $a0, $s0, 0 - fmadd.s $fa3, $fa0, $fa0, $fa3 + fmadd.s $fa3, $fa1, $fa1, $fa3 fmadd.s $fa3, $fa2, $fa2, $fa3 frsqrt.s $fa3, $fa3 ld.d $a1, $a0, 88 - fmul.s $fs3, $fa0, $fa3 - fmul.s $fs4, $fa1, $fa3 + fmul.s $fs3, $fa1, $fa3 + fmul.s $fs4, $fa0, $fa3 fmul.s $fs5, $fa2, $fa3 move $a0, $s0 jirl $ra, $a1, 0 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexShape.s index cacbfb34..27f680f9 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexShape.s @@ -34,14 +34,8 @@ _ZN13btConvexShapeD0Ev: # @_ZN13btConvexShapeD0Ev .Lfunc_end2: .size _ZN13btConvexShapeD0Ev, .Lfunc_end2-_ZN13btConvexShapeD0Ev # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3 -.LCPI3_0: - .word 0xdd5e0b6b # float -9.99999984E+17 -.LCPI3_1: - .word 0x38d1b717 # float 9.99999974E-5 .text - .globl _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3 + .globl _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3 # -- Begin function _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3 .p2align 5 .type _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3,@function _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3: # @_ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3 @@ -98,11 +92,11 @@ _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3: .LBB3_4: fld.s $fa1, $a1, 0 ld.w $a2, $a0, 64 - fld.s $fa3, $a1, 4 - fld.s $fa4, $a1, 8 + fld.s $fa2, $a1, 4 + fld.s $fa3, $a1, 8 addi.d $a3, $a0, 40 slli.d $a1, $a2, 2 - fldx.s $fa2, $a3, $a1 + fldx.s $fa4, $a3, $a1 addi.w $a2, $a2, 2 lu12i.w $a4, 349525 ori $a4, $a4, 1366 @@ -114,18 +108,19 @@ _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3: sub.w $a2, $a2, $a4 slli.d $a2, $a2, 2 fldx.s $fa7, $a3, $a2 - pcalau12i $a2, %pc_hi20(.LCPI3_1) - fld.s $fa0, $a2, %pc_lo12(.LCPI3_1) - fmul.s $fa5, $fa3, $fa3 - fmadd.s $fa5, $fa1, $fa1, $fa5 - fmadd.s $fa5, $fa4, $fa4, $fa5 + fmul.s $fa0, $fa2, $fa2 + fmadd.s $fa0, $fa1, $fa1, $fa0 + fmadd.s $fa5, $fa3, $fa3, $fa0 + lu12i.w $a2, 232731 + ori $a2, $a2, 1815 + movgr2fr.w $fa0, $a2 fcmp.clt.s $fcc0, $fa5, $fa0 movgr2fr.w $fa0, $zero bceqz $fcc0, .LBB3_18 # %bb.5: vldi $vr1, -1168 + fmov.s $fa2, $fa0 fmov.s $fa3, $fa0 - fmov.s $fa4, $fa0 b .LBB3_19 .LBB3_6: vld $vr0, $a0, 40 @@ -198,15 +193,17 @@ _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3: # %bb.11: # %.lr.ph.preheader.i214 fld.s $fa3, $a1, 0 fld.s $fa4, $a1, 4 - move $a0, $zero + fld.s $fa5, $a1, 8 + move $a1, $zero fmul.s $fa3, $fa3, $fa0 - fld.s $fa6, $a1, 8 fmul.s $fa4, $fa4, $fa1 - pcalau12i $a1, %pc_hi20(.LCPI3_0) - fld.s $fa5, $a1, %pc_lo12(.LCPI3_0) - fmul.s $fa6, $fa6, $fa2 + fmul.s $fa5, $fa5, $fa2 addi.d $a4, $a2, 8 - addi.w $a1, $zero, -1 + addi.w $a0, $zero, -1 + lu12i.w $a5, -141856 + ori $a5, $a5, 2923 + lu32i.d $a5, 0 + movgr2fr.w $fa6, $a5 .p2align 4, , 16 .LBB3_12: # %.lr.ph.i216 # =>This Inner Loop Header: Depth=1 @@ -215,15 +212,15 @@ _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3: fld.s $ft1, $a4, 0 fmul.s $fa7, $fa4, $fa7 fmadd.s $fa7, $fa3, $ft0, $fa7 - fmadd.s $fa7, $fa6, $ft1, $fa7 - fcmp.clt.s $fcc0, $fa5, $fa7 - fsel $fa5, $fa5, $fa7, $fcc0 + fmadd.s $fa7, $fa5, $ft1, $fa7 + fcmp.clt.s $fcc0, $fa6, $fa7 + fsel $fa6, $fa6, $fa7, $fcc0 movcf2gr $a5, $fcc0 - masknez $a1, $a1, $a5 - maskeqz $a5, $a0, $a5 - or $a1, $a5, $a1 + masknez $a0, $a0, $a5 + maskeqz $a5, $a1, $a5 + or $a0, $a5, $a0 addi.d $a3, $a3, -1 - addi.w $a0, $a0, 1 + addi.w $a1, $a1, 1 addi.d $a4, $a4, 16 bnez $a3, .LBB3_12 b .LBB3_17 @@ -237,15 +234,17 @@ _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3: # %bb.14: # %.lr.ph.preheader.i fld.s $fa3, $a1, 0 fld.s $fa4, $a1, 4 - move $a0, $zero + fld.s $fa5, $a1, 8 + move $a1, $zero fmul.s $fa3, $fa3, $fa0 - fld.s $fa6, $a1, 8 fmul.s $fa4, $fa4, $fa1 - pcalau12i $a1, %pc_hi20(.LCPI3_0) - fld.s $fa5, $a1, %pc_lo12(.LCPI3_0) - fmul.s $fa6, $fa6, $fa2 + fmul.s $fa5, $fa5, $fa2 addi.d $a4, $a2, 8 - addi.w $a1, $zero, -1 + addi.w $a0, $zero, -1 + lu12i.w $a5, -141856 + ori $a5, $a5, 2923 + lu32i.d $a5, 0 + movgr2fr.w $fa6, $a5 .p2align 4, , 16 .LBB3_15: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 @@ -254,26 +253,26 @@ _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3: fld.s $ft1, $a4, 0 fmul.s $fa7, $fa4, $fa7 fmadd.s $fa7, $fa3, $ft0, $fa7 - fmadd.s $fa7, $fa6, $ft1, $fa7 - fcmp.clt.s $fcc0, $fa5, $fa7 - fsel $fa5, $fa5, $fa7, $fcc0 + fmadd.s $fa7, $fa5, $ft1, $fa7 + fcmp.clt.s $fcc0, $fa6, $fa7 + fsel $fa6, $fa6, $fa7, $fcc0 movcf2gr $a5, $fcc0 - masknez $a1, $a1, $a5 - maskeqz $a5, $a0, $a5 - or $a1, $a5, $a1 + masknez $a0, $a0, $a5 + maskeqz $a5, $a1, $a5 + or $a0, $a5, $a0 addi.d $a3, $a3, -1 - addi.w $a0, $a0, 1 + addi.w $a1, $a1, 1 addi.d $a4, $a4, 16 bnez $a3, .LBB3_15 b .LBB3_17 .LBB3_16: - addi.w $a1, $zero, -1 + addi.w $a0, $zero, -1 .LBB3_17: # %_ZL17convexHullSupportRK9btVector3PS0_iS1_.exit - alsl.d $a0, $a1, $a2, 4 - slli.d $a1, $a1, 4 - fldx.s $fa3, $a2, $a1 - fld.s $fa4, $a0, 4 - fld.s $fa5, $a0, 8 + alsl.d $a1, $a0, $a2, 4 + slli.d $a0, $a0, 4 + fldx.s $fa3, $a2, $a0 + fld.s $fa4, $a1, 4 + fld.s $fa5, $a1, 8 move $a2, $zero fmul.s $fa0, $fa0, $fa3 fmul.s $fa1, $fa1, $fa4 @@ -282,8 +281,8 @@ _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3: .LBB3_18: frsqrt.s $fa5, $fa5 fmul.s $fa1, $fa1, $fa5 + fmul.s $fa2, $fa2, $fa5 fmul.s $fa3, $fa3, $fa5 - fmul.s $fa4, $fa4, $fa5 .LBB3_19: vrepli.b $vr11, 0 vst $vr11, $sp, 24 @@ -291,10 +290,10 @@ _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3: fld.s $fa5, $a0, 24 fld.s $fa6, $a0, 28 fld.s $ft0, $a0, 32 - fstx.s $fa2, $a1, $a2 + fstx.s $fa4, $a1, $a2 fmul.s $fa5, $fa1, $fa5 - fmul.s $fa6, $fa3, $fa6 - fmul.s $ft0, $fa4, $ft0 + fmul.s $fa6, $fa2, $fa6 + fmul.s $ft0, $fa3, $ft0 fmul.s $fa5, $fa7, $fa5 fmul.s $fa6, $fa7, $fa6 fmul.s $fa7, $fa7, $ft0 @@ -306,46 +305,48 @@ _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3: fadd.s $ft6, $fa6, $ft1 fadd.s $ft9, $fa7, $ft2 fmul.s $ft0, $fa1, $ft4 - fmul.s $ft1, $fa3, $ft4 - fmul.s $ft2, $fa4, $ft4 + fmul.s $ft1, $fa2, $ft4 + fmul.s $ft2, $fa3, $ft4 fsub.s $ft7, $ft5, $ft0 fsub.s $ft8, $ft6, $ft1 fsub.s $ft9, $ft9, $ft2 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.s $ft4, $a0, %pc_lo12(.LCPI3_0) - fmul.s $ft5, $fa3, $ft8 - fmadd.s $ft5, $fa1, $ft7, $ft5 - fmadd.s $ft10, $fa4, $ft9, $ft5 - fcmp.cule.s $fcc0, $ft10, $ft4 + fmul.s $ft4, $fa2, $ft8 + fmadd.s $ft4, $fa1, $ft7, $ft4 + fmadd.s $ft10, $fa3, $ft9, $ft4 + lu12i.w $a0, -141856 + ori $a0, $a0, 2923 + lu32i.d $a0, 0 + movgr2fr.w $ft6, $a0 + fcmp.cule.s $fcc0, $ft10, $ft6 + fmov.s $ft4, $fa0 fmov.s $ft5, $fa0 - fmov.s $ft6, $fa0 bcnez $fcc0, .LBB3_21 # %bb.20: fmov.s $fa0, $ft7 - fmov.s $ft5, $ft8 - fmov.s $ft6, $ft9 - fmov.s $ft4, $ft10 + fmov.s $ft4, $ft8 + fmov.s $ft5, $ft9 + fmov.s $ft6, $ft10 .LBB3_21: - fneg.s $fa2, $fa2 + fneg.s $fa4, $fa4 vst $vr11, $sp, 24 - fstx.s $fa2, $a1, $a2 - fld.s $fa2, $sp, 24 + fstx.s $fa4, $a1, $a2 + fld.s $fa4, $sp, 24 fld.s $ft3, $sp, 28 fld.s $ft7, $sp, 32 move $a2, $zero - fadd.s $fa2, $fa5, $fa2 + fadd.s $fa4, $fa5, $fa4 fadd.s $fa5, $fa6, $ft3 fadd.s $fa6, $fa7, $ft7 - fsub.s $fa2, $fa2, $ft0 + fsub.s $fa4, $fa4, $ft0 fsub.s $fa5, $fa5, $ft1 fsub.s $fa6, $fa6, $ft2 - fmul.s $fa3, $fa3, $fa5 - fmadd.s $fa1, $fa1, $fa2, $fa3 - fmadd.s $fa1, $fa4, $fa6, $fa1 - fcmp.clt.s $fcc0, $ft4, $fa1 - fsel $fa0, $fa0, $fa2, $fcc0 - fsel $fa1, $ft5, $fa5, $fcc0 - fsel $fa2, $ft6, $fa6, $fcc0 + fmul.s $fa2, $fa2, $fa5 + fmadd.s $fa1, $fa1, $fa4, $fa2 + fmadd.s $fa1, $fa3, $fa6, $fa1 + fcmp.clt.s $fcc0, $ft6, $fa1 + fsel $fa0, $fa0, $fa4, $fcc0 + fsel $fa1, $ft4, $fa5, $fcc0 + fsel $fa2, $ft5, $fa6, $fcc0 b .LBB3_29 .LBB3_22: addi.d $a6, $sp, 24 @@ -429,12 +430,8 @@ _ZNK13btConvexShape44localGetSupportVertexWithoutMarginNonVirtualERK9btVector3: .word .LBB3_3-.LJTI3_0 .word .LBB3_6-.LJTI3_0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK13btConvexShape31localGetSupportVertexNonVirtualERK9btVector3 -.LCPI4_0: - .word 0x28800000 # float 1.42108547E-14 .text - .globl _ZNK13btConvexShape31localGetSupportVertexNonVirtualERK9btVector3 + .globl _ZNK13btConvexShape31localGetSupportVertexNonVirtualERK9btVector3 # -- Begin function _ZNK13btConvexShape31localGetSupportVertexNonVirtualERK9btVector3 .p2align 5 .type _ZNK13btConvexShape31localGetSupportVertexNonVirtualERK9btVector3,@function _ZNK13btConvexShape31localGetSupportVertexNonVirtualERK9btVector3: # @_ZNK13btConvexShape31localGetSupportVertexNonVirtualERK9btVector3 @@ -450,15 +447,15 @@ _ZNK13btConvexShape31localGetSupportVertexNonVirtualERK9btVector3: # @_ZNK13btCo .cfi_offset 23, -24 vld $vr0, $a1, 0 vst $vr0, $sp, 8 - fld.s $fa2, $sp, 8 fld.s $fa1, $sp, 12 + fld.s $fa2, $sp, 8 fld.s $fa0, $sp, 16 - pcalau12i $a1, %pc_hi20(.LCPI4_0) - fld.s $fa3, $a1, %pc_lo12(.LCPI4_0) - fmul.s $fa4, $fa1, $fa1 - fmadd.s $fa4, $fa2, $fa2, $fa4 - fmadd.s $fa4, $fa0, $fa0, $fa4 - fcmp.cule.s $fcc0, $fa3, $fa4 + fmul.s $fa3, $fa1, $fa1 + fmadd.s $fa3, $fa2, $fa2, $fa3 + fmadd.s $fa3, $fa0, $fa0, $fa3 + lu12i.w $a1, 165888 + movgr2fr.w $fa4, $a1 + fcmp.cule.s $fcc0, $fa4, $fa3 move $fp, $a0 bcnez $fcc0, .LBB4_2 # %bb.1: diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexTriangleMeshShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexTriangleMeshShape.s index 053e862f..f7f6bea7 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexTriangleMeshShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btConvexTriangleMeshShape.s @@ -116,18 +116,14 @@ __clang_call_terminate: # @__clang_call_terminate .Lfunc_end1: .size __clang_call_terminate, .Lfunc_end1-__clang_call_terminate # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK25btConvexTriangleMeshShape37localGetSupportingVertexWithoutMarginERK9btVector3 -.LCPI2_0: - .word 0x38d1b717 # float 9.99999974E-5 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI2_1: + .p2align 4, 0x0 # -- Begin function _ZNK25btConvexTriangleMeshShape37localGetSupportingVertexWithoutMarginERK9btVector3 +.LCPI2_0: .word 0x5d5e0b6b # float 9.99999984E+17 .word 0x5d5e0b6b # float 9.99999984E+17 .word 0x5d5e0b6b # float 9.99999984E+17 .word 0x00000000 # float 0 -.LCPI2_2: +.LCPI2_1: .dword -2495544585613341845 # 0xdd5e0b6bdd5e0b6b .dword 3713928043 # 0xdd5e0b6b .text @@ -148,15 +144,16 @@ _ZNK25btConvexTriangleMeshShape37localGetSupportingVertexWithoutMarginERK9btVect .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 - fld.s $fa1, $a1, 0 - fld.s $fa2, $a1, 4 - fld.s $fa4, $a1, 8 - pcalau12i $a2, %pc_hi20(.LCPI2_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI2_0) - fmul.s $fa3, $fa2, $fa2 - fmadd.s $fa3, $fa1, $fa1, $fa3 - fmadd.s $fa3, $fa4, $fa4, $fa3 - fcmp.clt.s $fcc0, $fa3, $fa0 + fld.s $fa1, $a1, 4 + fld.s $fa3, $a1, 0 + fld.s $fa2, $a1, 8 + fmul.s $fa0, $fa1, $fa1 + fmadd.s $fa0, $fa3, $fa3, $fa0 + fmadd.s $fa4, $fa2, $fa2, $fa0 + lu12i.w $a2, 232731 + ori $a2, $a2, 1815 + movgr2fr.w $fa0, $a2 + fcmp.clt.s $fcc0, $fa4, $fa0 bceqz $fcc0, .LBB2_2 # %bb.1: movgr2fr.w $fa1, $zero @@ -166,10 +163,10 @@ _ZNK25btConvexTriangleMeshShape37localGetSupportingVertexWithoutMarginERK9btVect b .LBB2_3 .LBB2_2: fld.s $fa0, $a1, 12 - frsqrt.s $fa5, $fa3 - fmul.s $fa3, $fa1, $fa5 - fmul.s $fa1, $fa2, $fa5 - fmul.s $fa2, $fa4, $fa5 + frsqrt.s $fa4, $fa4 + fmul.s $fa3, $fa3, $fa4 + fmul.s $fa1, $fa1, $fa4 + fmul.s $fa2, $fa2, $fa4 .LBB2_3: pcalau12i $a1, %pc_hi20(_ZTV26LocalSupportVertexCallback+16) addi.d $a1, $a1, %pc_lo12(_ZTV26LocalSupportVertexCallback+16) @@ -181,15 +178,15 @@ _ZNK25btConvexTriangleMeshShape37localGetSupportingVertexWithoutMarginERK9btVect lu32i.d $a1, 0 st.w $a1, $sp, 80 fst.s $fa3, $sp, 84 - pcalau12i $a1, %pc_hi20(.LCPI2_2) - vld $vr3, $a1, %pc_lo12(.LCPI2_2) + pcalau12i $a1, %pc_hi20(.LCPI2_1) + vld $vr3, $a1, %pc_lo12(.LCPI2_1) ld.d $a0, $a0, 104 fst.s $fa1, $sp, 88 fst.s $fa2, $sp, 92 vst $vr3, $sp, 16 ld.d $a1, $a0, 0 - pcalau12i $a2, %pc_hi20(.LCPI2_1) - vld $vr1, $a2, %pc_lo12(.LCPI2_1) + pcalau12i $a2, %pc_hi20(.LCPI2_0) + vld $vr1, $a2, %pc_lo12(.LCPI2_0) fst.s $fa0, $sp, 96 ld.d $a4, $a1, 16 vst $vr1, $sp, 32 @@ -464,12 +461,8 @@ GCC_except_table3: .Lttbase2: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK25btConvexTriangleMeshShape24localGetSupportingVertexERK9btVector3 -.LCPI4_0: - .word 0x28800000 # float 1.42108547E-14 .text - .globl _ZNK25btConvexTriangleMeshShape24localGetSupportingVertexERK9btVector3 + .globl _ZNK25btConvexTriangleMeshShape24localGetSupportingVertexERK9btVector3 # -- Begin function _ZNK25btConvexTriangleMeshShape24localGetSupportingVertexERK9btVector3 .p2align 5 .type _ZNK25btConvexTriangleMeshShape24localGetSupportingVertexERK9btVector3,@function _ZNK25btConvexTriangleMeshShape24localGetSupportingVertexERK9btVector3: # @_ZNK25btConvexTriangleMeshShape24localGetSupportingVertexERK9btVector3 @@ -520,28 +513,28 @@ _ZNK25btConvexTriangleMeshShape24localGetSupportingVertexERK9btVector3: # @_ZNK2 # %bb.1: movgr2fr.w $fs0, $fp movgr2fr.w $fs1, $s3 - movgr2fr.w $fs2, $s1 - fld.s $fa0, $s2, 0 - fld.s $fa1, $s2, 4 + fld.s $fa0, $s2, 4 + fld.s $fa1, $s2, 0 fld.s $fa2, $s2, 8 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.s $fa3, $a0, %pc_lo12(.LCPI4_0) - fmul.s $fa4, $fa1, $fa1 - fmadd.s $fa4, $fa0, $fa0, $fa4 - fmadd.s $fa4, $fa2, $fa2, $fa4 - fcmp.clt.s $fcc0, $fa4, $fa3 + movgr2fr.w $fs2, $s1 + fmul.s $fa3, $fa0, $fa0 + fmadd.s $fa3, $fa1, $fa1, $fa3 + fmadd.s $fa3, $fa2, $fa2, $fa3 + lu12i.w $a0, 165888 + movgr2fr.w $fa4, $a0 + fcmp.clt.s $fcc0, $fa3, $fa4 vldi $vr3, -1040 - fsel $fa0, $fa0, $fa3, $fcc0 fsel $fa1, $fa1, $fa3, $fcc0 + fsel $fa0, $fa0, $fa3, $fcc0 fsel $fa2, $fa2, $fa3, $fcc0 - fmul.s $fa3, $fa1, $fa1 + fmul.s $fa3, $fa0, $fa0 ld.d $a0, $s0, 0 - fmadd.s $fa3, $fa0, $fa0, $fa3 + fmadd.s $fa3, $fa1, $fa1, $fa3 fmadd.s $fa3, $fa2, $fa2, $fa3 frsqrt.s $fa3, $fa3 ld.d $a1, $a0, 88 - fmul.s $fs3, $fa0, $fa3 - fmul.s $fs4, $fa1, $fa3 + fmul.s $fs3, $fa1, $fa3 + fmul.s $fs4, $fa0, $fa3 fmul.s $fs5, $fa2, $fa3 move $a0, $s0 jirl $ra, $a1, 0 @@ -684,12 +677,6 @@ _ZNK25btConvexTriangleMeshShape15getLocalScalingEv: # @_ZNK25btConvexTriangleMes .LCPI14_1: .dword -2495544585613341845 # 0xdd5e0b6bdd5e0b6b .dword 3713928043 # 0xdd5e0b6b - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI14_2: - .word 0x3e2aaaab # float 0.166666672 -.LCPI14_3: - .word 0x3727c5ac # float 9.99999974E-6 .text .globl _ZNK25btConvexTriangleMeshShape31calculatePrincipalAxisTransformER11btTransformR9btVector3Rf .p2align 5 @@ -764,10 +751,11 @@ _ZNK25btConvexTriangleMeshShape31calculatePrincipalAxisTransformER11btTransformR ld.d $a0, $sp, 164 ld.d $a1, $sp, 172 .LBB14_4: - pcalau12i $a2, %pc_hi20(.LCPI14_2) - fld.s $fa1, $a2, %pc_lo12(.LCPI14_2) st.d $a0, $s1, 48 st.d $a1, $s1, 56 + lu12i.w $a2, 254634 + ori $a2, $a2, 2731 + movgr2fr.w $fa1, $a2 fmul.s $fa0, $fa0, $fa1 fst.s $fa0, $s0, 0 pcalau12i $a2, %pc_hi20(_ZTVZNK25btConvexTriangleMeshShape31calculatePrincipalAxisTransformER11btTransformR9btVector3RfE15InertiaCallback+16) @@ -803,8 +791,9 @@ _ZNK25btConvexTriangleMeshShape31calculatePrincipalAxisTransformER11btTransformR # %bb.5: .Ltmp24: # EH_LABEL addi.d $a0, $sp, 56 - pcalau12i $a1, %pc_hi20(.LCPI14_3) - fld.s $fa0, $a1, %pc_lo12(.LCPI14_3) + lu12i.w $a1, 225916 + ori $a1, $a1, 1452 + movgr2fr.w $fa0, $a1 ori $a2, $zero, 20 move $a1, $s1 pcaddu18i $ra, %call36(_ZN11btMatrix3x311diagonalizeERS_fi) @@ -928,14 +917,8 @@ GCC_except_table14: .Lttbase3: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN11btMatrix3x311diagonalizeERS_fi -.LCPI15_0: - .word 0x34000000 # float 1.1920929E-7 -.LCPI15_1: - .word 0x4ca00000 # float 83886080 .section .text._ZN11btMatrix3x311diagonalizeERS_fi,"axG",@progbits,_ZN11btMatrix3x311diagonalizeERS_fi,comdat - .weak _ZN11btMatrix3x311diagonalizeERS_fi + .weak _ZN11btMatrix3x311diagonalizeERS_fi # -- Begin function _ZN11btMatrix3x311diagonalizeERS_fi .p2align 5 .type _ZN11btMatrix3x311diagonalizeERS_fi,@function _ZN11btMatrix3x311diagonalizeERS_fi: # @_ZN11btMatrix3x311diagonalizeERS_fi @@ -953,15 +936,15 @@ _ZN11btMatrix3x311diagonalizeERS_fi: # @_ZN11btMatrix3x311diagonalizeERS_fi addi.d $a3, $a1, 16 addi.d $a4, $a1, 32 ori $a5, $zero, 2 - pcalau12i $a6, %pc_hi20(.LCPI15_1) - fld.s $fa1, $a6, %pc_lo12(.LCPI15_1) + lu12i.w $a6, 313856 + movgr2fr.w $fa1, $a6 vldi $vr2, -1168 movgr2fr.w $fa3, $zero ori $a6, $zero, 1 vldi $vr4, -1184 vldi $vr5, -1280 vldi $vr6, -1056 - pcalau12i $a7, %pc_hi20(.LCPI15_0) + lu12i.w $a7, 212992 b .LBB15_4 .p2align 4, , 16 .LBB15_2: # in Loop: Header=BB15_4 Depth=1 @@ -1063,7 +1046,7 @@ _ZN11btMatrix3x311diagonalizeERS_fi: # @_ZN11btMatrix3x311diagonalizeERS_fi fcmp.cult.s $fcc0, $ft0, $fa7 bcnez $fcc0, .LBB15_9 # %bb.8: # in Loop: Header=BB15_4 Depth=1 - fld.s $ft1, $a7, %pc_lo12(.LCPI15_0) + movgr2fr.w $ft1, $a7 fmul.s $ft0, $ft0, $ft1 fcmp.cle.s $fcc0, $fa7, $ft0 ori $a2, $zero, 1 @@ -1565,16 +1548,8 @@ GCC_except_table26: .Lcst_end7: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZZNK25btConvexTriangleMeshShape31calculatePrincipalAxisTransformER11btTransformR9btVector3RfEN15InertiaCallback28internalProcessTriangleIndexEPS2_ii -.LCPI27_0: - .word 0xbe2aaaab # float -0.166666672 -.LCPI27_1: - .word 0x3d4ccccd # float 0.0500000007 -.LCPI27_2: - .word 0x3dcccccd # float 0.100000001 .text - .p2align 5 + .p2align 5 # -- Begin function _ZZNK25btConvexTriangleMeshShape31calculatePrincipalAxisTransformER11btTransformR9btVector3RfEN15InertiaCallback28internalProcessTriangleIndexEPS2_ii .type _ZZNK25btConvexTriangleMeshShape31calculatePrincipalAxisTransformER11btTransformR9btVector3RfEN15InertiaCallback28internalProcessTriangleIndexEPS2_ii,@function _ZZNK25btConvexTriangleMeshShape31calculatePrincipalAxisTransformER11btTransformR9btVector3RfEN15InertiaCallback28internalProcessTriangleIndexEPS2_ii: # @_ZZNK25btConvexTriangleMeshShape31calculatePrincipalAxisTransformER11btTransformR9btVector3RfEN15InertiaCallback28internalProcessTriangleIndexEPS2_ii # %bb.0: # %.preheader @@ -1609,12 +1584,14 @@ _ZZNK25btConvexTriangleMeshShape31calculatePrincipalAxisTransformER11btTransform fmadd.s $ft0, $fa5, $ft0, $ft1 fneg.s $ft1, $ft2 fmul.s $ft1, $fa4, $ft1 - pcalau12i $a1, %pc_hi20(.LCPI27_0) - fld.s $ft3, $a1, %pc_lo12(.LCPI27_0) fmadd.s $ft1, $fa6, $fa7, $ft1 fmadd.s $ft0, $fa0, $ft1, $ft0 fabs.s $ft0, $ft0 - fmul.s $ft0, $ft0, $ft3 + lu12i.w $a1, -269654 + ori $a1, $a1, 2731 + lu32i.d $a1, 0 + movgr2fr.w $ft1, $a1 + fmul.s $ft0, $ft0, $ft1 fmul.s $ft1, $fa6, $fa6 fmadd.s $ft1, $fa5, $fa5, $ft1 fmadd.s $ft1, $ft2, $ft2, $ft1 @@ -1622,13 +1599,15 @@ _ZZNK25btConvexTriangleMeshShape31calculatePrincipalAxisTransformER11btTransform fmadd.s $ft3, $fa5, $fa6, $ft3 fmadd.s $ft3, $fa5, $ft2, $ft3 fmadd.s $ft3, $fa5, $ft2, $ft3 - pcalau12i $a1, %pc_hi20(.LCPI27_1) - fld.s $ft4, $a1, %pc_lo12(.LCPI27_1) - pcalau12i $a1, %pc_hi20(.LCPI27_2) - fld.s $ft5, $a1, %pc_lo12(.LCPI27_2) fmadd.s $ft3, $fa6, $ft2, $ft3 fmadd.s $ft3, $fa6, $ft2, $ft3 + lu12i.w $a1, 251084 + ori $a1, $a1, 3277 + movgr2fr.w $ft4, $a1 fmul.s $ft3, $ft3, $ft4 + lu12i.w $a1, 253132 + ori $a1, $a1, 3277 + movgr2fr.w $ft5, $a1 fmadd.s $ft1, $ft1, $ft5, $ft3 fmul.s $ft3, $ft0, $ft1 fmul.s $ft1, $fa4, $fa6 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCylinderShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCylinderShape.s index ce0dd52a..fb8e75f4 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCylinderShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btCylinderShape.s @@ -845,12 +845,8 @@ _ZNK21btConvexInternalShape9getMarginEv: # @_ZNK21btConvexInternalShape9getMargi .Lfunc_end16: .size _ZNK21btConvexInternalShape9getMarginEv, .Lfunc_end16-_ZNK21btConvexInternalShape9getMarginEv # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK15btCylinderShape24localGetSupportingVertexERK9btVector3 -.LCPI17_0: - .word 0x28800000 # float 1.42108547E-14 .section .text._ZNK15btCylinderShape24localGetSupportingVertexERK9btVector3,"axG",@progbits,_ZNK15btCylinderShape24localGetSupportingVertexERK9btVector3,comdat - .weak _ZNK15btCylinderShape24localGetSupportingVertexERK9btVector3 + .weak _ZNK15btCylinderShape24localGetSupportingVertexERK9btVector3 # -- Begin function _ZNK15btCylinderShape24localGetSupportingVertexERK9btVector3 .p2align 5 .type _ZNK15btCylinderShape24localGetSupportingVertexERK9btVector3,@function _ZNK15btCylinderShape24localGetSupportingVertexERK9btVector3: # @_ZNK15btCylinderShape24localGetSupportingVertexERK9btVector3 @@ -901,28 +897,28 @@ _ZNK15btCylinderShape24localGetSupportingVertexERK9btVector3: # @_ZNK15btCylinde # %bb.1: movgr2fr.w $fs0, $fp movgr2fr.w $fs1, $s3 - movgr2fr.w $fs2, $s1 - fld.s $fa0, $s2, 0 - fld.s $fa1, $s2, 4 + fld.s $fa0, $s2, 4 + fld.s $fa1, $s2, 0 fld.s $fa2, $s2, 8 - pcalau12i $a0, %pc_hi20(.LCPI17_0) - fld.s $fa3, $a0, %pc_lo12(.LCPI17_0) - fmul.s $fa4, $fa1, $fa1 - fmadd.s $fa4, $fa0, $fa0, $fa4 - fmadd.s $fa4, $fa2, $fa2, $fa4 - fcmp.clt.s $fcc0, $fa4, $fa3 + movgr2fr.w $fs2, $s1 + fmul.s $fa3, $fa0, $fa0 + fmadd.s $fa3, $fa1, $fa1, $fa3 + fmadd.s $fa3, $fa2, $fa2, $fa3 + lu12i.w $a0, 165888 + movgr2fr.w $fa4, $a0 + fcmp.clt.s $fcc0, $fa3, $fa4 vldi $vr3, -1040 - fsel $fa0, $fa0, $fa3, $fcc0 fsel $fa1, $fa1, $fa3, $fcc0 + fsel $fa0, $fa0, $fa3, $fcc0 fsel $fa2, $fa2, $fa3, $fcc0 - fmul.s $fa3, $fa1, $fa1 + fmul.s $fa3, $fa0, $fa0 ld.d $a0, $s0, 0 - fmadd.s $fa3, $fa0, $fa0, $fa3 + fmadd.s $fa3, $fa1, $fa1, $fa3 fmadd.s $fa3, $fa2, $fa2, $fa3 frsqrt.s $fa3, $fa3 ld.d $a1, $a0, 88 - fmul.s $fs3, $fa0, $fa3 - fmul.s $fs4, $fa1, $fa3 + fmul.s $fs3, $fa1, $fa3 + fmul.s $fs4, $fa0, $fa3 fmul.s $fs5, $fa2, $fa3 move $a0, $s0 jirl $ra, $a1, 0 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDbvt.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDbvt.s index d63b00f0..6434a1eb 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDbvt.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDbvt.s @@ -562,12 +562,7 @@ _ZL11fetchleavesP6btDbvtP10btDbvtNodeR20btAlignedObjectArrayIS2_Ei: # @_ZL11fetc .size _ZL11fetchleavesP6btDbvtP10btDbvtNodeR20btAlignedObjectArrayIS2_Ei, .Lfunc_end7-_ZL11fetchleavesP6btDbvtP10btDbvtNodeR20btAlignedObjectArrayIS2_Ei .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZL8bottomupP6btDbvtR20btAlignedObjectArrayIP10btDbvtNodeE -.LCPI8_0: - .word 0x7f7fffff # float 3.40282347E+38 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL8bottomupP6btDbvtR20btAlignedObjectArrayIP10btDbvtNodeE .type _ZL8bottomupP6btDbvtR20btAlignedObjectArrayIP10btDbvtNodeE,@function _ZL8bottomupP6btDbvtR20btAlignedObjectArrayIP10btDbvtNodeE: # @_ZL8bottomupP6btDbvtR20btAlignedObjectArrayIP10btDbvtNodeE .cfi_startproc @@ -600,9 +595,10 @@ _ZL8bottomupP6btDbvtR20btAlignedObjectArrayIP10btDbvtNodeE: # @_ZL8bottomupP6btD blt $a1, $s1, .LBB8_11 # %bb.1: # %.preheader.lr.ph move $s0, $a0 - pcalau12i $a0, %pc_hi20(.LCPI8_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI8_0) addi.w $s2, $zero, -1 + lu12i.w $a0, 522239 + ori $a0, $a0, 4095 + movgr2fr.w $fs0, $a0 vrepli.b $vr13, 0 vst $vr13, $sp, 16 # 16-byte Folded Spill b .LBB8_4 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDbvtBroadphase.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDbvtBroadphase.s index 054d9d5c..7018fba1 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDbvtBroadphase.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDbvtBroadphase.s @@ -1474,12 +1474,8 @@ _ZN6btDbvt8ICollideD2Ev: # @_ZN6btDbvt8ICollideD2Ev .Lfunc_end10: .size _ZN6btDbvt8ICollideD2Ev, .Lfunc_end10-_ZN6btDbvt8ICollideD2Ev # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN16btDbvtBroadphase7setAabbEP17btBroadphaseProxyRK9btVector3S4_P12btDispatcher -.LCPI11_0: - .word 0x3d4ccccd # float 0.0500000007 .text - .globl _ZN16btDbvtBroadphase7setAabbEP17btBroadphaseProxyRK9btVector3S4_P12btDispatcher + .globl _ZN16btDbvtBroadphase7setAabbEP17btBroadphaseProxyRK9btVector3S4_P12btDispatcher # -- Begin function _ZN16btDbvtBroadphase7setAabbEP17btBroadphaseProxyRK9btVector3S4_P12btDispatcher .p2align 5 .type _ZN16btDbvtBroadphase7setAabbEP17btBroadphaseProxyRK9btVector3S4_P12btDispatcher,@function _ZN16btDbvtBroadphase7setAabbEP17btBroadphaseProxyRK9btVector3S4_P12btDispatcher: # @_ZN16btDbvtBroadphase7setAabbEP17btBroadphaseProxyRK9btVector3S4_P12btDispatcher @@ -1674,8 +1670,9 @@ _ZN16btDbvtBroadphase7setAabbEP17btBroadphaseProxyRK9btVector3S4_P12btDispatcher fst.s $fa0, $sp, 16 .LBB11_24: addi.d $a0, $s0, 8 - pcalau12i $a2, %pc_hi20(.LCPI11_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI11_0) + lu12i.w $a2, 251084 + ori $a2, $a2, 3277 + movgr2fr.w $fa0, $a2 addi.d $a2, $sp, 32 addi.d $a3, $sp, 8 pcaddu18i $ra, %call36(_ZN6btDbvt6updateEP10btDbvtNodeR12btDbvtAabbMmRK9btVector3f) diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDiscreteDynamicsWorld.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDiscreteDynamicsWorld.s index 79e8e388..eea68f4f 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDiscreteDynamicsWorld.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDiscreteDynamicsWorld.s @@ -669,13 +669,9 @@ _ZN23btDiscreteDynamicsWorld18saveKinematicStateEf: # @_ZN23btDiscreteDynamicsWo .size _ZN23btDiscreteDynamicsWorld18saveKinematicStateEf, .Lfunc_end7-_ZN23btDiscreteDynamicsWorld18saveKinematicStateEf .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN23btDiscreteDynamicsWorld14debugDrawWorldEv -.LCPI8_0: - .word 0x437f0000 # float 255 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI8_1: + .p2align 4, 0x0 # -- Begin function _ZN23btDiscreteDynamicsWorld14debugDrawWorldEv +.LCPI8_0: .word 0x3f800000 # float 1 .word 0x00000000 # float 0 .word 0x00000000 # float 0 @@ -890,12 +886,12 @@ _ZN23btDiscreteDynamicsWorld14debugDrawWorldEv: # @_ZN23btDiscreteDynamicsWorld1 ld.w $a0, $fp, 12 blez $a0, .LBB8_46 # %bb.31: # %.lr.ph124 - pcalau12i $a0, %pc_hi20(.LCPI8_1) - vld $vr0, $a0, %pc_lo12(.LCPI8_1) - vst $vr0, $sp, 16 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(.LCPI8_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI8_0) - movgr2fr.w $fs1, $zero + vld $vr0, $a0, %pc_lo12(.LCPI8_0) + vst $vr0, $sp, 16 # 16-byte Folded Spill + movgr2fr.w $fs0, $zero + lu12i.w $a0, 276464 + movgr2fr.w $fs1, $a0 ori $s0, $zero, 5 pcalau12i $a0, %pc_hi20(.Lswitch.table._ZN23btDiscreteDynamicsWorld14debugDrawWorldEv) addi.d $s1, $a0, %pc_lo12(.Lswitch.table._ZN23btDiscreteDynamicsWorld14debugDrawWorldEv) @@ -942,9 +938,9 @@ _ZN23btDiscreteDynamicsWorld14debugDrawWorldEv: # @_ZN23btDiscreteDynamicsWorld1 # %bb.38: # in Loop: Header=BB8_33 Depth=1 ld.w $a0, $s6, 228 addi.w $a0, $a0, -1 - fmov.s $fa1, $fs0 - fmov.s $fa2, $fs1 - fmov.s $fa0, $fs1 + fmov.s $fa1, $fs1 + fmov.s $fa2, $fs0 + fmov.s $fa0, $fs0 bgeu $a0, $s0, .LBB8_40 # %bb.39: # %switch.lookup # in Loop: Header=BB8_33 Depth=1 @@ -1212,24 +1208,8 @@ GCC_except_table8: .Lttbase2: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN23btDiscreteDynamicsWorld19debugDrawConstraintEP17btTypedConstraint -.LCPI9_0: - .word 0x3f666666 # float 0.899999976 -.LCPI9_1: - .word 0xc0490fdb # float -3.14159274 -.LCPI9_2: - .word 0x40490fdb # float 3.14159274 -.LCPI9_3: - .word 0x40c2c75b # float 6.08683538 -.LCPI9_4: - .word 0x40c90fda # float 6.283185 -.LCPI9_5: - .word 0x3d000000 # float 0.03125 -.LCPI9_6: - .word 0x40c90fdb # float 6.28318548 .text - .globl _ZN23btDiscreteDynamicsWorld19debugDrawConstraintEP17btTypedConstraint + .globl _ZN23btDiscreteDynamicsWorld19debugDrawConstraintEP17btTypedConstraint # -- Begin function _ZN23btDiscreteDynamicsWorld19debugDrawConstraintEP17btTypedConstraint .p2align 5 .type _ZN23btDiscreteDynamicsWorld19debugDrawConstraintEP17btTypedConstraint,@function _ZN23btDiscreteDynamicsWorld19debugDrawConstraintEP17btTypedConstraint: # @_ZN23btDiscreteDynamicsWorld19debugDrawConstraintEP17btTypedConstraint @@ -1673,8 +1653,9 @@ _ZN23btDiscreteDynamicsWorld19debugDrawConstraintEP17btTypedConstraint: # @_ZN23 st.d $a2, $sp, 128 move $a0, $fp jirl $ra, $a1, 0 - pcalau12i $a1, %pc_hi20(.LCPI9_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI9_0) + lu12i.w $a1, 259686 + ori $a1, $a1, 1638 + movgr2fr.w $fa0, $a1 fmul.s $fa0, $fs0, $fa0 vrepli.b $vr1, 0 vst $vr1, $sp, 16 # 16-byte Folded Spill @@ -1773,10 +1754,13 @@ _ZN23btDiscreteDynamicsWorld19debugDrawConstraintEP17btTypedConstraint: # @_ZN23 jirl $ra, $a1, 0 vld $vr0, $sp, 16 # 16-byte Folded Reload vst $vr0, $sp, 72 - pcalau12i $a1, %pc_hi20(.LCPI9_1) - fld.s $fa2, $a1, %pc_lo12(.LCPI9_1) - pcalau12i $a1, %pc_hi20(.LCPI9_2) - fld.s $fa3, $a1, %pc_lo12(.LCPI9_2) + lu12i.w $a1, 263312 + ori $a1, $a1, 4059 + movgr2fr.w $fa3, $a1 + lu12i.w $a1, -260976 + ori $a1, $a1, 4059 + lu32i.d $a1, 0 + movgr2fr.w $fa2, $a1 addi.d $a2, $sp, 88 addi.d $a3, $sp, 104 addi.d $a4, $sp, 72 @@ -2182,8 +2166,9 @@ _ZN23btDiscreteDynamicsWorld19debugDrawConstraintEP17btTypedConstraint: # @_ZN23 beqz $s1, .LBB9_36 .LBB9_19: addi.d $s1, $sp, 200 - pcalau12i $a0, %pc_hi20(.LCPI9_3) - fld.s $fa0, $a0, %pc_lo12(.LCPI9_3) + lu12i.w $a0, 265260 + ori $a0, $a0, 1883 + movgr2fr.w $fa0, $a0 move $a0, $s0 fmov.s $fa1, $fs0 pcaddu18i $ra, %call36(_ZNK21btConeTwistConstraint16GetPointForAngleEff) @@ -2221,13 +2206,14 @@ _ZN23btDiscreteDynamicsWorld19debugDrawConstraintEP17btTypedConstraint: # @_ZN23 fmadd.s $fa0, $fa5, $fa1, $fa0 fadd.s $fa0, $fa6, $fa0 st.d $a0, $sp, 136 - pcalau12i $a0, %pc_hi20(.LCPI9_4) - fld.s $fs2, $a0, %pc_lo12(.LCPI9_4) - pcalau12i $a0, %pc_hi20(.LCPI9_5) - fld.s $fs3, $a0, %pc_lo12(.LCPI9_5) movfr2gr.s $a0, $fa0 bstrpick.d $a0, $a0, 31, 0 st.d $a0, $sp, 144 + lu12i.w $a0, 265360 + ori $a0, $a0, 4058 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, 249856 + movgr2fr.w $fs3, $a0 vrepli.b $vr0, 0 vst $vr0, $sp, 16 # 16-byte Folded Spill ori $s3, $zero, 32 @@ -2481,10 +2467,11 @@ _ZN23btDiscreteDynamicsWorld19debugDrawConstraintEP17btTypedConstraint: # @_ZN23 fsel $fs1, $fs1, $fa1, $fcc0 movcf2gr $a0, $fcc0 st.d $a0, $sp, 16 - pcalau12i $a0, %pc_hi20(.LCPI9_6) + lu12i.w $a0, 265360 + ori $a0, $a0, 4059 ld.w $a1, $sp, 176 ld.wu $a2, $sp, 160 - fld.s $fa1, $a0, %pc_lo12(.LCPI9_6) + movgr2fr.w $fa1, $a0 ld.wu $a0, $sp, 192 slli.d $a1, $a1, 32 or $a1, $a1, $a2 @@ -2738,22 +2725,16 @@ _ZN23btDiscreteDynamicsWorld19debugDrawConstraintEP17btTypedConstraint: # @_ZN23 .word 0x00000000 # float 0 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 -.LCPI10_5: +.LCPI10_3: .word 0x5d5e0b6b # float 9.99999984E+17 .word 0x5d5e0b6b # float 9.99999984E+17 .word 0x5d5e0b6b # float 9.99999984E+17 .word 0x00000000 # float 0 -.LCPI10_6: +.LCPI10_4: .word 0xdd5e0b6b # float -9.99999984E+17 .word 0xdd5e0b6b # float -9.99999984E+17 .word 0xdd5e0b6b # float -9.99999984E+17 .word 0x00000000 # float 0 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI10_3: - .word 0x3f3504f3 # float 0.707106769 -.LCPI10_4: - .word 0x42c80000 # float 100 .text .globl _ZN23btDiscreteDynamicsWorld15debugDrawObjectERK11btTransformPK16btCollisionShapeRK9btVector3 .p2align 5 @@ -2970,11 +2951,11 @@ _ZN23btDiscreteDynamicsWorld15debugDrawObjectERK11btTransformPK16btCollisionShap addi.d $s4, $s0, 48 bltu $a2, $a1, .LBB10_7 # %bb.4: - pcalau12i $a0, %pc_hi20(.LCPI10_5) + pcalau12i $a0, %pc_hi20(.LCPI10_3) ld.d $a1, $s1, 0 - vld $vr0, $a0, %pc_lo12(.LCPI10_5) - pcalau12i $a0, %pc_hi20(.LCPI10_6) - vld $vr1, $a0, %pc_lo12(.LCPI10_6) + vld $vr0, $a0, %pc_lo12(.LCPI10_3) + pcalau12i $a0, %pc_hi20(.LCPI10_4) + vld $vr1, $a0, %pc_lo12(.LCPI10_4) ld.d $a1, $a1, 32 vst $vr0, $sp, 240 vst $vr1, $sp, 224 @@ -3021,11 +3002,11 @@ _ZN23btDiscreteDynamicsWorld15debugDrawObjectERK11btTransformPK16btCollisionShap ori $a1, $zero, 3 bne $a0, $a1, .LBB10_11 # %bb.8: - pcalau12i $a0, %pc_hi20(.LCPI10_5) + pcalau12i $a0, %pc_hi20(.LCPI10_3) ld.d $a1, $s1, 0 - vld $vr0, $a0, %pc_lo12(.LCPI10_5) - pcalau12i $a0, %pc_hi20(.LCPI10_6) - vld $vr1, $a0, %pc_lo12(.LCPI10_6) + vld $vr0, $a0, %pc_lo12(.LCPI10_3) + pcalau12i $a0, %pc_hi20(.LCPI10_4) + vld $vr1, $a0, %pc_lo12(.LCPI10_4) ld.d $a1, $a1, 32 vst $vr0, $sp, 240 vst $vr1, $sp, 224 @@ -3165,30 +3146,31 @@ _ZN23btDiscreteDynamicsWorld15debugDrawObjectERK11btTransformPK16btCollisionShap b .LBB10_30 .LBB10_15: fld.s $fa2, $s2, 76 - fld.s $fa5, $s2, 60 - fmul.s $fa0, $fa2, $fa5 - fld.s $fa4, $s2, 64 - fld.s $fa6, $s2, 68 - pcalau12i $a0, %pc_hi20(.LCPI10_3) - fld.s $fa3, $a0, %pc_lo12(.LCPI10_3) - fmul.s $fa1, $fa2, $fa4 - fmul.s $fa2, $fa2, $fa6 - fabs.s $fa7, $fa6 - fcmp.cule.s $fcc0, $fa7, $fa3 - pcalau12i $a0, %pc_hi20(.LCPI10_4) + fld.s $fa6, $s2, 60 + fld.s $fa3, $s2, 64 + fld.s $fa4, $s2, 68 + fmul.s $fa0, $fa2, $fa6 + fmul.s $fa1, $fa2, $fa3 + fmul.s $fa2, $fa2, $fa4 + fabs.s $fa5, $fa4 + lu12i.w $a0, 258896 + ori $a0, $a0, 1267 + movgr2fr.w $fa7, $a0 + fcmp.cule.s $fcc0, $fa5, $fa7 + lu12i.w $a0, 273536 bcnez $fcc0, .LBB10_27 # %bb.16: - fmul.s $fa3, $fa6, $fa6 - fmadd.s $fa7, $fa4, $fa4, $fa3 + fmul.s $fa5, $fa4, $fa4 + fmadd.s $fa7, $fa3, $fa3, $fa5 frsqrt.s $ft0, $fa7 - fneg.s $fa3, $fa6 - fmul.s $fa3, $ft0, $fa3 - fmul.s $ft1, $fa4, $ft0 - fmul.s $fa4, $fa7, $ft0 - fld.s $fa7, $a0, %pc_lo12(.LCPI10_4) - fneg.s $fa6, $fa5 - fmul.s $fa6, $ft1, $fa6 - fmul.s $fa5, $fa5, $fa3 + fneg.s $fa4, $fa4 + fmul.s $fa5, $ft0, $fa4 + fmul.s $ft1, $fa3, $ft0 + fmul.s $fa3, $fa7, $ft0 + fneg.s $fa4, $fa6 + fmul.s $fa4, $ft1, $fa4 + fmul.s $fa6, $fa6, $fa5 + movgr2fr.w $fa7, $a0 fmul.s $fs0, $ft1, $fa7 movgr2fr.w $fa7, $zero b .LBB10_28 @@ -4293,30 +4275,30 @@ _ZN23btDiscreteDynamicsWorld15debugDrawObjectERK11btTransformPK16btCollisionShap addi.d $a2, $sp, 208 b .LBB10_29 .LBB10_27: - fmul.s $fa3, $fa4, $fa4 - fmadd.s $fa7, $fa5, $fa5, $fa3 + fmul.s $fa5, $fa3, $fa3 + fmadd.s $fa7, $fa6, $fa6, $fa5 frsqrt.s $ft0, $fa7 - fneg.s $fa3, $fa4 + fneg.s $fa3, $fa3 fmul.s $ft1, $ft0, $fa3 - fmul.s $fa3, $fa5, $ft0 - fneg.s $fa4, $fa6 - fld.s $ft2, $a0, %pc_lo12(.LCPI10_4) - fmul.s $fa4, $fa3, $fa4 - fmul.s $fa6, $fa6, $ft1 - fmul.s $fa5, $fa7, $ft0 - fmul.s $fa7, $ft1, $ft2 + fmul.s $fa5, $fa6, $ft0 + fneg.s $fa3, $fa4 + fmul.s $fa3, $fa5, $fa3 + fmul.s $fa4, $fa4, $ft1 + fmul.s $fa6, $fa7, $ft0 + movgr2fr.w $fa7, $a0 + fmul.s $fa7, $ft1, $fa7 .LBB10_28: # %_Z13btPlaneSpace1RK9btVector3RS_S2_.exit - fld.s $ft0, $a0, %pc_lo12(.LCPI10_4) - fmul.s $fa3, $fa3, $ft0 + movgr2fr.w $ft0, $a0 + fmul.s $fa5, $fa5, $ft0 fadd.s $fs2, $fa0, $fa7 - fadd.s $fs3, $fa1, $fa3 + fadd.s $fs3, $fa1, $fa5 fadd.s $fs4, $fa2, $fs0 fsub.s $fs6, $fa0, $fa7 - fsub.s $fs1, $fa1, $fa3 + fsub.s $fs1, $fa1, $fa5 fsub.s $fs7, $fa2, $fs0 - fmul.s $fa3, $fa4, $ft0 - fmul.s $fa4, $fa6, $ft0 - fmul.s $fa5, $fa5, $ft0 + fmul.s $fa3, $fa3, $ft0 + fmul.s $fa4, $fa4, $ft0 + fmul.s $fa5, $fa6, $ft0 ld.d $a0, $s1, 0 fadd.s $fs0, $fa0, $fa3 fadd.s $fs5, $fa1, $fa4 @@ -5018,16 +5000,8 @@ _ZN23btDiscreteDynamicsWorld28synchronizeSingleMotionStateEP11btRigidBody: # @_Z .size _ZN23btDiscreteDynamicsWorld28synchronizeSingleMotionStateEP11btRigidBody, .Lfunc_end14-_ZN23btDiscreteDynamicsWorld28synchronizeSingleMotionStateEP11btRigidBody .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ -.LCPI15_0: - .word 0x3f490fdb # float 0.785398185 -.LCPI15_1: - .word 0x3a83126f # float 0.00100000005 -.LCPI15_2: - .word 0xbcaaaaab # float -0.020833334 .section .text._ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_,"axG",@progbits,_ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_,comdat - .weak _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ + .weak _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ # -- Begin function _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ .p2align 5 .type _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_,@function _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_: # @_ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ @@ -5080,22 +5054,26 @@ _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_: # fmadd.s $fa0, $fs1, $fs1, $fa0 fmadd.s $fa0, $fs3, $fs3, $fa0 fsqrt.s $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI15_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI15_0) - fmul.s $fa2, $fs0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI15_1) - fld.s $fa3, $a0, %pc_lo12(.LCPI15_1) - fcmp.clt.s $fcc0, $fa1, $fa2 - fdiv.s $fa1, $fa1, $fs0 - fsel $fs4, $fa0, $fa1, $fcc0 - fcmp.cule.s $fcc0, $fa3, $fs4 + fmul.s $fa1, $fs0, $fa0 + lu12i.w $a0, 259216 + ori $a0, $a0, 4059 + movgr2fr.w $fa2, $a0 + fdiv.s $fa3, $fa2, $fs0 + fcmp.clt.s $fcc0, $fa2, $fa1 + fsel $fs4, $fa0, $fa3, $fcc0 + lu12i.w $a0, 239665 + ori $a0, $a0, 623 + movgr2fr.w $fa0, $a0 + fcmp.cule.s $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB15_2 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI15_2) - fld.s $fa0, $a0, %pc_lo12(.LCPI15_2) - fmul.s $fa1, $fs0, $fs0 - fmul.s $fa1, $fs0, $fa1 - fmul.s $fa0, $fa1, $fa0 + fmul.s $fa0, $fs0, $fs0 + fmul.s $fa0, $fs0, $fa0 + lu12i.w $a0, -275798 + ori $a0, $a0, 2731 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 + fmul.s $fa0, $fa0, $fa1 fmul.s $fa0, $fa0, $fs4 fmul.s $fa0, $fa0, $fs4 vldi $vr1, -1184 @@ -5435,12 +5413,8 @@ GCC_except_table16: .Lttbase4: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN23btDiscreteDynamicsWorld14stepSimulationEfif -.LCPI17_0: - .word 0x34000000 # float 1.1920929E-7 .text - .globl _ZN23btDiscreteDynamicsWorld14stepSimulationEfif + .globl _ZN23btDiscreteDynamicsWorld14stepSimulationEfif # -- Begin function _ZN23btDiscreteDynamicsWorld14stepSimulationEfif .p2align 5 .type _ZN23btDiscreteDynamicsWorld14stepSimulationEfif,@function _ZN23btDiscreteDynamicsWorld14stepSimulationEfif: # @_ZN23btDiscreteDynamicsWorld14stepSimulationEfif @@ -5484,10 +5458,10 @@ _ZN23btDiscreteDynamicsWorld14stepSimulationEfif: # @_ZN23btDiscreteDynamicsWorl move $s1, $zero b .LBB17_5 .LBB17_3: - pcalau12i $a0, %pc_hi20(.LCPI17_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI17_0) - fabs.s $fa1, $fs1 - fcmp.cule.s $fcc0, $fa0, $fa1 + fabs.s $fa0, $fs1 + lu12i.w $a0, 212992 + movgr2fr.w $fa1, $a0 + fcmp.cule.s $fcc0, $fa1, $fa0 fst.s $fs1, $fp, 328 movcf2gr $s1, $fcc0 fmov.s $fs0, $fs1 @@ -8805,12 +8779,8 @@ _ZN12btIDebugDraw13drawTransformERK11btTransformf: # @_ZN12btIDebugDraw13drawTra .size _ZN12btIDebugDraw13drawTransformERK11btTransformf, .Lfunc_end45-_ZN12btIDebugDraw13drawTransformERK11btTransformf .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN12btIDebugDraw7drawArcERK9btVector3S2_S2_ffffS2_bf -.LCPI46_0: - .word 0x3c8efa35 # float 0.0174532924 .section .text._ZN12btIDebugDraw7drawArcERK9btVector3S2_S2_ffffS2_bf,"axG",@progbits,_ZN12btIDebugDraw7drawArcERK9btVector3S2_S2_ffffS2_bf,comdat - .weak _ZN12btIDebugDraw7drawArcERK9btVector3S2_S2_ffffS2_bf + .weak _ZN12btIDebugDraw7drawArcERK9btVector3S2_S2_ffffS2_bf # -- Begin function _ZN12btIDebugDraw7drawArcERK9btVector3S2_S2_ffffS2_bf .p2align 5 .type _ZN12btIDebugDraw7drawArcERK9btVector3S2_S2_ffffS2_bf,@function _ZN12btIDebugDraw7drawArcERK9btVector3S2_S2_ffffS2_bf: # @_ZN12btIDebugDraw7drawArcERK9btVector3S2_S2_ffffS2_bf @@ -8871,12 +8841,13 @@ _ZN12btIDebugDraw7drawArcERK9btVector3S2_S2_ffffS2_bf: # @_ZN12btIDebugDraw7draw fmul.s $fa6, $fa7, $fa6 fmadd.s $fa1, $fa1, $ft0, $fa6 fst.s $fa1, $sp, 16 # 4-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI46_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI46_0) - fneg.s $fa6, $ft0 - fmul.s $fa2, $fa2, $fa6 - fmadd.s $fa2, $fa7, $ft1, $fa2 - fst.s $fa2, $sp, 12 # 4-byte Folded Spill + fneg.s $fa1, $ft0 + fmul.s $fa1, $fa2, $fa1 + fmadd.s $fa1, $fa7, $ft1, $fa1 + fst.s $fa1, $sp, 12 # 4-byte Folded Spill + lu12i.w $a0, 248047 + ori $a0, $a0, 2613 + movgr2fr.w $fa1, $a0 fmul.s $fa1, $fa4, $fa1 fsub.s $fs3, $fa3, $fs0 fdiv.s $fa1, $fs3, $fa1 @@ -9048,22 +9019,8 @@ _ZN12btIDebugDraw7drawArcERK9btVector3S2_S2_ffffS2_bf: # @_ZN12btIDebugDraw7draw .size _ZN12btIDebugDraw7drawArcERK9btVector3S2_S2_ffffS2_bf, .Lfunc_end46-_ZN12btIDebugDraw7drawArcERK9btVector3S2_S2_ffffS2_bf .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN12btIDebugDraw15drawSpherePatchERK9btVector3S2_S2_fffffS2_f -.LCPI47_0: - .word 0x3c8efa35 # float 0.0174532924 -.LCPI47_1: - .word 0xbfc90fdb # float -1.57079637 -.LCPI47_2: - .word 0x3fc90fdb # float 1.57079637 -.LCPI47_3: - .word 0xc0490fdb # float -3.14159274 -.LCPI47_4: - .word 0x40c90fdb # float 6.28318548 -.LCPI47_5: - .word 0x40490fdb # float 3.14159274 .section .text._ZN12btIDebugDraw15drawSpherePatchERK9btVector3S2_S2_fffffS2_f,"axG",@progbits,_ZN12btIDebugDraw15drawSpherePatchERK9btVector3S2_S2_fffffS2_f,comdat - .weak _ZN12btIDebugDraw15drawSpherePatchERK9btVector3S2_S2_fffffS2_f + .weak _ZN12btIDebugDraw15drawSpherePatchERK9btVector3S2_S2_fffffS2_f # -- Begin function _ZN12btIDebugDraw15drawSpherePatchERK9btVector3S2_S2_fffffS2_f .p2align 5 .type _ZN12btIDebugDraw15drawSpherePatchERK9btVector3S2_S2_fffffS2_f,@function _ZN12btIDebugDraw15drawSpherePatchERK9btVector3S2_S2_fffffS2_f: # @_ZN12btIDebugDraw15drawSpherePatchERK9btVector3S2_S2_fffffS2_f @@ -9142,30 +9099,34 @@ _ZN12btIDebugDraw15drawSpherePatchERK9btVector3S2_S2_fffffS2_f: # @_ZN12btIDebug movfr2gr.s $a0, $ft0 movfr2gr.s $a1, $ft1 bstrins.d $a0, $a1, 63, 32 - pcalau12i $a1, %pc_hi20(.LCPI47_0) - fld.s $ft1, $a1, %pc_lo12(.LCPI47_0) movfr2gr.s $a1, $ft2 bstrpick.d $a1, $a1, 31, 0 st.d $a0, $sp, 136 - fld.s $ft0, $a3, 4 st.d $a1, $sp, 144 + lu12i.w $a0, 248047 + ori $a0, $a0, 2613 + fld.s $ft0, $a3, 4 + movgr2fr.w $ft1, $a0 fmul.s $fa5, $fa5, $ft1 fld.s $ft1, $a3, 8 - fneg.s $ft2, $ft0 - fmul.s $ft2, $ft9, $ft2 - fld.s $ft3, $a3, 0 + fneg.s $ft3, $ft0 + fld.s $ft2, $a3, 0 + fmul.s $ft3, $ft9, $ft3 fneg.s $ft4, $ft1 fmul.s $ft4, $fa6, $ft4 - pcalau12i $a0, %pc_hi20(.LCPI47_1) - fld.s $ft5, $a0, %pc_lo12(.LCPI47_1) - fneg.s $ft7, $ft3 - pcalau12i $a0, %pc_hi20(.LCPI47_2) - fld.s $ft6, $a0, %pc_lo12(.LCPI47_2) + fneg.s $ft7, $ft2 + lu12i.w $a0, -263024 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $ft5, $a0 fcmp.cle.s $fcc0, $fa1, $ft5 fadd.s $ft5, $fa5, $ft5 fsel $ft8, $fa1, $ft5, $fcc0 - fcmp.cle.s $fcc1, $ft6, $fa2 - fsub.s $ft6, $ft6, $fa5 + lu12i.w $a0, 261264 + ori $a0, $a0, 4059 + movgr2fr.w $fa1, $a0 + fcmp.cle.s $fcc1, $fa1, $fa2 + fsub.s $ft6, $fa1, $fa5 fsel $fa2, $fa2, $ft6, $fcc1 fcmp.cule.s $fcc2, $ft8, $fa2 fmul.s $fa1, $fa7, $ft7 @@ -9185,9 +9146,9 @@ _ZN12btIDebugDraw15drawSpherePatchERK9btVector3S2_S2_fffffS2_f: # @_ZN12btIDebug st.d $a0, $sp, 88 # 8-byte Folded Spill .LBB47_3: move $a0, $zero - fmadd.s $fa0, $fa7, $ft1, $ft2 + fmadd.s $fa0, $fa7, $ft1, $ft3 fst.s $fa0, $sp, 108 # 4-byte Folded Spill - fmadd.s $fs6, $ft9, $ft3, $ft4 + fmadd.s $fs6, $ft9, $ft2, $ft4 fmadd.s $fs7, $fa6, $ft0, $fa1 fsub.s $fa0, $fa2, $ft8 fdiv.s $fa1, $fa0, $fa5 @@ -9203,16 +9164,20 @@ _ZN12btIDebugDraw15drawSpherePatchERK9btVector3S2_S2_fffffS2_f: # @_ZN12btIDebug fdiv.s $fa0, $fa0, $fa1 fst.s $fa0, $sp, 28 # 4-byte Folded Spill fcmp.clt.s $fcc0, $fa4, $fa3 - pcalau12i $a1, %pc_hi20(.LCPI47_3) - fld.s $fa0, $a1, %pc_lo12(.LCPI47_3) - pcalau12i $a1, %pc_hi20(.LCPI47_4) - fld.s $fa1, $a1, %pc_lo12(.LCPI47_4) - pcalau12i $a1, %pc_hi20(.LCPI47_5) - fld.s $fa2, $a1, %pc_lo12(.LCPI47_5) + lu12i.w $a1, -260976 + ori $a1, $a1, 4059 + lu32i.d $a1, 0 + movgr2fr.w $fa0, $a1 fadd.s $fa0, $fa5, $fa0 - fsub.s $fa6, $fa4, $fa3 - fcmp.cle.s $fcc1, $fa1, $fa6 - fsel $fa1, $fa4, $fa2, $fcc0 + fsub.s $fa1, $fa4, $fa3 + lu12i.w $a1, 265360 + ori $a1, $a1, 4059 + movgr2fr.w $fa2, $a1 + fcmp.cle.s $fcc1, $fa2, $fa1 + lu12i.w $a1, 263312 + ori $a1, $a1, 4059 + movgr2fr.w $fa1, $a1 + fsel $fa1, $fa4, $fa1, $fcc0 fsel $fs4, $fa3, $fa0, $fcc0 movcf2gr $a1, $fcc0 movcf2gr $a3, $fcc1 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactBvh.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactBvh.s index ffafcdeb..2750c448 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactBvh.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactBvh.s @@ -248,14 +248,7 @@ _ZN9btBvhTree30_sort_and_calc_splitting_indexER18GIM_BVH_DATA_ARRAYiii: # @_ZN9b .Lfunc_end1: .size _ZN9btBvhTree30_sort_and_calc_splitting_indexER18GIM_BVH_DATA_ARRAYiii, .Lfunc_end1-_ZN9btBvhTree30_sort_and_calc_splitting_indexER18GIM_BVH_DATA_ARRAYiii # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN9btBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii -.LCPI2_0: - .word 0x7f7fffff # float 3.40282347E+38 -.LCPI2_1: - .word 0xff7fffff # float -3.40282347E+38 - .text - .globl _ZN9btBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii + .globl _ZN9btBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii # -- Begin function _ZN9btBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii .p2align 5 .type _ZN9btBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii,@function _ZN9btBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii: # @_ZN9btBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii @@ -310,19 +303,22 @@ _ZN9btBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii: # @_ZN9btBvhTree15_build jirl $ra, $ra, 0 move $a2, $s2 move $s2, $a0 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - pcalau12i $a1, %pc_hi20(.LCPI2_1) + lu12i.w $a0, 522239 + ori $a0, $a0, 4095 + lu12i.w $a1, -2049 + ori $a1, $a1, 4095 + lu32i.d $a1, 0 bge $a2, $s1, .LBB2_5 # %bb.3: # %.lr.ph ld.d $a3, $s0, 16 - fld.s $fa0, $a1, %pc_lo12(.LCPI2_1) - fld.s $fa1, $a0, %pc_lo12(.LCPI2_0) - slli.d $a0, $a2, 5 - alsl.d $a0, $a2, $a0, 2 - add.d $a0, $a0, $a3 - addi.d $a0, $a0, 16 - fmov.s $fa5, $fa1 - fmov.s $fa2, $fa1 + slli.d $a4, $a2, 5 + alsl.d $a4, $a2, $a4, 2 + add.d $a3, $a4, $a3 + movgr2fr.w $fa2, $a0 + movgr2fr.w $fa0, $a1 + addi.d $a0, $a3, 16 + fmov.s $fa5, $fa2 + fmov.s $fa1, $fa2 fmov.s $fa3, $fa0 fmov.s $fa4, $fa0 .p2align 4, , 16 @@ -330,14 +326,14 @@ _ZN9btBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii: # @_ZN9btBvhTree15_build # =>This Inner Loop Header: Depth=1 fld.s $fa6, $a0, -16 fld.s $fa7, $a0, -12 - fcmp.clt.s $fcc0, $fa6, $fa1 - fsel $fa1, $fa1, $fa6, $fcc0 + fcmp.clt.s $fcc0, $fa6, $fa2 + fsel $fa2, $fa2, $fa6, $fcc0 fld.s $fa6, $a0, -8 fcmp.clt.s $fcc0, $fa7, $fa5 fsel $fa5, $fa5, $fa7, $fcc0 fld.s $fa7, $a0, 0 - fcmp.clt.s $fcc0, $fa6, $fa2 - fsel $fa2, $fa2, $fa6, $fcc0 + fcmp.clt.s $fcc0, $fa6, $fa1 + fsel $fa1, $fa1, $fa6, $fcc0 fld.s $fa6, $a0, 4 fcmp.clt.s $fcc0, $fa0, $fa7 fld.s $ft0, $a0, 8 @@ -351,20 +347,20 @@ _ZN9btBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii: # @_ZN9btBvhTree15_build bnez $s4, .LBB2_4 b .LBB2_6 .LBB2_5: - fld.s $fa2, $a0, %pc_lo12(.LCPI2_0) - fld.s $fa4, $a1, %pc_lo12(.LCPI2_1) + movgr2fr.w $fa1, $a0 + movgr2fr.w $fa4, $a1 fmov.s $fa3, $fa4 fmov.s $fa0, $fa4 - fmov.s $fa5, $fa2 - fmov.s $fa1, $fa2 + fmov.s $fa5, $fa1 + fmov.s $fa2, $fa1 .LBB2_6: # %._crit_edge ld.d $a0, $fp, 24 slli.d $a1, $s3, 5 alsl.d $s4, $s3, $a1, 2 add.d $a1, $a0, $s4 - fstx.s $fa1, $a0, $s4 + fstx.s $fa2, $a0, $s4 fst.s $fa5, $a1, 4 - fst.s $fa2, $a1, 8 + fst.s $fa1, $a1, 8 fst.s $fa0, $a1, 16 fst.s $fa4, $a1, 20 fst.s $fa3, $a1, 24 @@ -542,14 +538,7 @@ _ZN9btBvhTree10build_treeER18GIM_BVH_DATA_ARRAY: # @_ZN9btBvhTree10build_treeER1 .size _ZN9btBvhTree10build_treeER18GIM_BVH_DATA_ARRAY, .Lfunc_end3-_ZN9btBvhTree10build_treeER18GIM_BVH_DATA_ARRAY .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN12btGImpactBvh5refitEv -.LCPI4_0: - .word 0x7f7fffff # float 3.40282347E+38 -.LCPI4_1: - .word 0xff7fffff # float -3.40282347E+38 - .text - .globl _ZN12btGImpactBvh5refitEv + .globl _ZN12btGImpactBvh5refitEv # -- Begin function _ZN12btGImpactBvh5refitEv .p2align 5 .type _ZN12btGImpactBvh5refitEv,@function _ZN12btGImpactBvh5refitEv: # @_ZN12btGImpactBvh5refitEv @@ -577,12 +566,15 @@ _ZN12btGImpactBvh5refitEv: # @_ZN12btGImpactBvh5refitEv beqz $s0, .LBB4_7 # %bb.1: # %.lr.ph move $fp, $a0 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI4_0) - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.s $fs1, $a0, %pc_lo12(.LCPI4_1) slli.d $a0, $s0, 5 alsl.d $s1, $s0, $a0, 2 + lu12i.w $a0, 522239 + ori $a0, $a0, 4095 + movgr2fr.w $fs0, $a0 + lu12i.w $a0, -2049 + ori $a0, $a0, 4095 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 ori $s2, $zero, 1 move $s3, $s0 b .LBB4_3 @@ -1405,12 +1397,7 @@ _ZNK12btGImpactBvh8rayQueryERK9btVector3S2_R20btAlignedObjectArrayIiE: # @_ZNK12 .size _ZNK12btGImpactBvh8rayQueryERK9btVector3S2_R20btAlignedObjectArrayIiE, .Lfunc_end8-_ZNK12btGImpactBvh8rayQueryERK9btVector3S2_R20btAlignedObjectArrayIiE .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN12btGImpactBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet -.LCPI9_0: - .word 0x358637bd # float 9.99999997E-7 - .text - .globl _ZN12btGImpactBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet + .globl _ZN12btGImpactBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet # -- Begin function _ZN12btGImpactBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet .p2align 5 .type _ZN12btGImpactBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet,@function _ZN12btGImpactBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet: # @_ZN12btGImpactBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet @@ -1449,108 +1436,109 @@ _ZN12btGImpactBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet: # @_ZN12b fneg.s $ft3, $ft3 fmul.s $ft4, $fa7, $ft2 fmadd.s $ft4, $fa2, $ft1, $ft4 - fmadd.s $ft9, $fa5, $ft3, $ft4 - fmul.s $ft4, $fa6, $ft2 - fmadd.s $ft4, $fa1, $ft1, $ft4 - fmadd.s $ft10, $fa3, $ft3, $ft4 + fmadd.s $ft4, $fa5, $ft3, $ft4 + fmul.s $ft5, $fa6, $ft2 + fmadd.s $ft5, $fa1, $ft1, $ft5 + fmadd.s $ft5, $fa3, $ft3, $ft5 fmul.s $ft2, $fa4, $ft2 - fld.s $ft7, $a3, 16 + fld.s $ft6, $a3, 16 fmadd.s $ft1, $fa0, $ft1, $ft2 - fmadd.s $ft11, $ft0, $ft3, $ft1 - fld.s $ft8, $a3, 0 - fmul.s $ft1, $fa7, $ft7 - fld.s $ft12, $a3, 32 - fld.s $ft13, $a3, 20 - fld.s $ft14, $a3, 4 - fmadd.s $ft1, $ft8, $fa2, $ft1 - fmadd.s $ft1, $ft12, $fa5, $ft1 - fmul.s $ft2, $fa7, $ft13 - fmadd.s $ft2, $ft14, $fa2, $ft2 - fld.s $ft15, $a3, 36 - fld.s $fs0, $a3, 24 - fld.s $fs1, $a3, 8 - fld.s $fs2, $a3, 40 - fmadd.s $ft2, $ft15, $fa5, $ft2 - fmul.s $ft3, $fa7, $fs0 - fmadd.s $ft3, $fs1, $fa2, $ft3 - fmadd.s $ft3, $fs2, $fa5, $ft3 - fmul.s $ft4, $fa6, $ft7 - fmadd.s $ft4, $ft8, $fa1, $ft4 - fmadd.s $ft4, $ft12, $fa3, $ft4 - fmul.s $ft5, $fa6, $ft13 - fmadd.s $ft5, $ft14, $fa1, $ft5 - fmadd.s $ft5, $ft15, $fa3, $ft5 - fmul.s $ft6, $fa6, $fs0 - fmadd.s $ft6, $fs1, $fa1, $ft6 - fmadd.s $ft6, $fs2, $fa3, $ft6 - fmul.s $ft7, $fa4, $ft7 - fmadd.s $ft7, $ft8, $fa0, $ft7 - fmadd.s $ft7, $ft12, $ft0, $ft7 - fmul.s $ft8, $fa4, $ft13 - fmadd.s $ft8, $ft14, $fa0, $ft8 - fmadd.s $ft8, $ft15, $ft0, $ft8 - fmul.s $ft12, $fa4, $fs0 - fmadd.s $ft12, $fs1, $fa0, $ft12 - fld.s $ft13, $a3, 52 - fld.s $ft14, $a3, 48 - fld.s $ft15, $a3, 56 - fmadd.s $ft12, $fs2, $ft0, $ft12 - fmul.s $fa7, $fa7, $ft13 - fmadd.s $fa2, $fa2, $ft14, $fa7 - fmadd.s $fa2, $fa5, $ft15, $fa2 - fadd.s $fa2, $ft9, $fa2 - fmul.s $fa5, $fa6, $ft13 - fmadd.s $fa1, $fa1, $ft14, $fa5 - fmadd.s $fa1, $fa3, $ft15, $fa1 - fadd.s $fa1, $ft10, $fa1 - fmul.s $fa3, $fa4, $ft13 - fmadd.s $fa0, $fa0, $ft14, $fa3 - fmadd.s $fa0, $ft0, $ft15, $fa0 - fadd.s $fa0, $ft11, $fa0 + fmadd.s $ft2, $ft0, $ft3, $ft1 + fld.s $ft3, $a3, 0 + fmul.s $ft1, $fa7, $ft6 + fld.s $ft7, $a3, 32 + fld.s $ft8, $a3, 20 + fld.s $ft9, $a3, 4 + fmadd.s $ft1, $ft3, $fa2, $ft1 + fmadd.s $ft1, $ft7, $fa5, $ft1 + fmul.s $ft10, $fa7, $ft8 + fmadd.s $ft10, $ft9, $fa2, $ft10 + fld.s $ft11, $a3, 36 + fld.s $ft12, $a3, 24 + fld.s $ft13, $a3, 8 + fld.s $ft14, $a3, 40 + fmadd.s $ft10, $ft11, $fa5, $ft10 + fmul.s $ft15, $fa7, $ft12 + fmadd.s $ft15, $ft13, $fa2, $ft15 + fmadd.s $ft15, $ft14, $fa5, $ft15 + fmul.s $fs0, $fa6, $ft6 + fmadd.s $fs0, $ft3, $fa1, $fs0 + fmadd.s $fs0, $ft7, $fa3, $fs0 + fmul.s $fs1, $fa6, $ft8 + fmadd.s $fs1, $ft9, $fa1, $fs1 + fmadd.s $fs1, $ft11, $fa3, $fs1 + fmul.s $fs2, $fa6, $ft12 + fmadd.s $fs2, $ft13, $fa1, $fs2 + fmadd.s $fs2, $ft14, $fa3, $fs2 + fmul.s $ft6, $fa4, $ft6 + fmadd.s $ft3, $ft3, $fa0, $ft6 + fmadd.s $ft3, $ft7, $ft0, $ft3 + fmul.s $ft6, $fa4, $ft8 + fmadd.s $ft6, $ft9, $fa0, $ft6 + fmadd.s $ft6, $ft11, $ft0, $ft6 + fmul.s $ft7, $fa4, $ft12 + fmadd.s $ft7, $ft13, $fa0, $ft7 + fld.s $ft8, $a3, 52 + fld.s $ft9, $a3, 48 + fld.s $ft11, $a3, 56 + fmadd.s $ft7, $ft14, $ft0, $ft7 + fmul.s $fa7, $fa7, $ft8 + fmadd.s $fa2, $fa2, $ft9, $fa7 + fmadd.s $fa2, $fa5, $ft11, $fa2 + fadd.s $fa2, $ft4, $fa2 + fmul.s $fa5, $fa6, $ft8 + fmadd.s $fa1, $fa1, $ft9, $fa5 + fmadd.s $fa1, $fa3, $ft11, $fa1 + fadd.s $fa1, $ft5, $fa1 + fmul.s $fa3, $fa4, $ft8 + fmadd.s $fa0, $fa0, $ft9, $fa3 + fmadd.s $fa0, $ft0, $ft11, $fa0 + fadd.s $fa0, $ft2, $fa0 fst.s $fa2, $sp, 16 fst.s $fa1, $sp, 20 fst.s $fa0, $sp, 24 st.w $zero, $sp, 28 fst.s $ft1, $sp, 32 - fst.s $ft2, $sp, 36 - fst.s $ft3, $sp, 40 + fst.s $ft10, $sp, 36 + fst.s $ft15, $sp, 40 st.w $zero, $sp, 44 - fst.s $ft4, $sp, 48 - fst.s $ft5, $sp, 52 - fst.s $ft6, $sp, 56 + fst.s $fs0, $sp, 48 + fst.s $fs1, $sp, 52 + fst.s $fs2, $sp, 56 st.w $zero, $sp, 60 - fst.s $ft7, $sp, 64 - fst.s $ft8, $sp, 68 - pcalau12i $a1, %pc_hi20(.LCPI9_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI9_0) - fst.s $ft12, $sp, 72 + fst.s $ft3, $sp, 64 + fst.s $ft6, $sp, 68 + fst.s $ft7, $sp, 72 st.w $zero, $sp, 76 - fabs.s $fa1, $ft1 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 80 - fabs.s $fa1, $ft2 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 84 - fabs.s $fa1, $ft3 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 88 - fabs.s $fa1, $ft4 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 96 - fabs.s $fa1, $ft5 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 100 - fabs.s $fa1, $ft6 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 104 - fabs.s $fa1, $ft7 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 112 - fabs.s $fa1, $ft8 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 116 - fabs.s $fa1, $ft12 - fadd.s $fa0, $fa1, $fa0 + fabs.s $fa0, $ft1 + lu12i.w $a1, 219235 + ori $a1, $a1, 1981 + movgr2fr.w $fa1, $a1 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 80 + fabs.s $fa0, $ft10 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 84 + fabs.s $fa0, $ft15 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 88 + fabs.s $fa0, $fs0 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 96 + fabs.s $fa0, $fs1 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 100 + fabs.s $fa0, $fs2 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 104 + fabs.s $fa0, $ft3 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 112 + fabs.s $fa0, $ft6 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 116 + fabs.s $fa0, $ft7 + fadd.s $fa0, $fa0, $fa1 fst.s $fa0, $sp, 120 addi.d $a3, $sp, 16 ori $a6, $zero, 1 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactCollisionAlgorithm.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactCollisionAlgorithm.s index 1cd239db..c5a37357 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactCollisionAlgorithm.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactCollisionAlgorithm.s @@ -3716,12 +3716,8 @@ GCC_except_table16: .Lttbase7: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN27btGImpactCollisionAlgorithm37gimpacttrimeshpart_vs_plane_collisionEP17btCollisionObjectS1_P22btGImpactMeshShapePartP18btStaticPlaneShapeb -.LCPI17_0: - .word 0x358637bd # float 9.99999997E-7 .text - .globl _ZN27btGImpactCollisionAlgorithm37gimpacttrimeshpart_vs_plane_collisionEP17btCollisionObjectS1_P22btGImpactMeshShapePartP18btStaticPlaneShapeb + .globl _ZN27btGImpactCollisionAlgorithm37gimpacttrimeshpart_vs_plane_collisionEP17btCollisionObjectS1_P22btGImpactMeshShapePartP18btStaticPlaneShapeb # -- Begin function _ZN27btGImpactCollisionAlgorithm37gimpacttrimeshpart_vs_plane_collisionEP17btCollisionObjectS1_P22btGImpactMeshShapePartP18btStaticPlaneShapeb .p2align 5 .type _ZN27btGImpactCollisionAlgorithm37gimpacttrimeshpart_vs_plane_collisionEP17btCollisionObjectS1_P22btGImpactMeshShapePartP18btStaticPlaneShapeb,@function _ZN27btGImpactCollisionAlgorithm37gimpacttrimeshpart_vs_plane_collisionEP17btCollisionObjectS1_P22btGImpactMeshShapePartP18btStaticPlaneShapeb: # @_ZN27btGImpactCollisionAlgorithm37gimpacttrimeshpart_vs_plane_collisionEP17btCollisionObjectS1_P22btGImpactMeshShapePartP18btStaticPlaneShapeb @@ -3792,8 +3788,8 @@ _ZN27btGImpactCollisionAlgorithm37gimpacttrimeshpart_vs_plane_collisionEP17btCol fst.s $fs0, $sp, 80 fmul.s $fa0, $fa4, $ft2 fmadd.s $fa0, $fa3, $ft3, $fa0 - fmadd.s $fs2, $fa5, $ft5, $fa0 - fst.s $fs2, $sp, 84 + fmadd.s $fs1, $fa5, $ft5, $fa0 + fst.s $fs1, $sp, 84 fmul.s $fa0, $fa7, $ft2 fmadd.s $fa0, $fa6, $ft3, $fa0 fmadd.s $fs3, $ft0, $ft5, $fa0 @@ -3804,8 +3800,8 @@ _ZN27btGImpactCollisionAlgorithm37gimpacttrimeshpart_vs_plane_collisionEP17btCol fmadd.s $fa0, $ft1, $ft3, $fa0 fmadd.s $fa0, $fa1, $ft5, $fa0 ld.d $a4, $a0, 16 - fadd.s $fs1, $fa0, $fa2 - fst.s $fs1, $sp, 92 + fadd.s $fs2, $fa0, $fa2 + fst.s $fs2, $sp, 92 addi.d $a3, $sp, 64 addi.d $a1, $sp, 96 addi.d $a2, $sp, 48 @@ -3842,26 +3838,27 @@ _ZN27btGImpactCollisionAlgorithm37gimpacttrimeshpart_vs_plane_collisionEP17btCol fsub.s $fa3, $fa4, $fa0 fsub.s $fa4, $fa5, $fa1 fsub.s $fa5, $fa6, $fa2 - fmul.s $fa1, $fa1, $fs2 + fmul.s $fa1, $fa1, $fs1 fmadd.s $fa0, $fs0, $fa0, $fa1 fmadd.s $fa0, $fs3, $fa2, $fa0 - fabs.s $fa2, $fs0 - fabs.s $fa1, $fs2 + fabs.s $fa1, $fs0 + fabs.s $fa2, $fs1 fabs.s $fa7, $fs3 - fmul.s $fa4, $fa4, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI17_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI17_0) - fmadd.s $fa2, $fa3, $fa2, $fa4 - fmadd.s $fa2, $fa5, $fa7, $fa2 - fsub.s $fa3, $fa0, $fa2 - fadd.s $fa4, $fs1, $fa1 + fmul.s $fa2, $fa4, $fa2 + fmadd.s $fa1, $fa3, $fa1, $fa2 + fmadd.s $fa1, $fa5, $fa7, $fa1 + fsub.s $fa3, $fa0, $fa1 + lu12i.w $a0, 219235 + ori $a0, $a0, 1981 + movgr2fr.w $fa2, $a0 + fadd.s $fa4, $fs2, $fa2 fcmp.cult.s $fcc0, $fa4, $fa3 fst.s $fa6, $sp, 72 bcnez $fcc0, .LBB17_18 # %bb.1: - fadd.s $fa0, $fa0, $fa2 fadd.s $fa0, $fa0, $fa1 - fcmp.clt.s $fcc0, $fa0, $fs1 + fadd.s $fa0, $fa0, $fa2 + fcmp.clt.s $fcc0, $fa0, $fs2 bcnez $fcc0, .LBB17_18 # %bb.2: ld.d $a0, $fp, 0 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactQuantizedBvh.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactQuantizedBvh.s index e750c75c..969fa7eb 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactQuantizedBvh.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactQuantizedBvh.s @@ -1,26 +1,21 @@ .file "btGImpactQuantizedBvh.cpp" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN18btQuantizedBvhTree17calc_quantizationER18GIM_BVH_DATA_ARRAYf -.LCPI0_0: - .word 0x7f7fffff # float 3.40282347E+38 -.LCPI0_1: - .word 0xff7fffff # float -3.40282347E+38 -.LCPI0_2: - .word 0x477fff00 # float 65535 .text - .globl _ZN18btQuantizedBvhTree17calc_quantizationER18GIM_BVH_DATA_ARRAYf + .globl _ZN18btQuantizedBvhTree17calc_quantizationER18GIM_BVH_DATA_ARRAYf # -- Begin function _ZN18btQuantizedBvhTree17calc_quantizationER18GIM_BVH_DATA_ARRAYf .p2align 5 .type _ZN18btQuantizedBvhTree17calc_quantizationER18GIM_BVH_DATA_ARRAYf,@function _ZN18btQuantizedBvhTree17calc_quantizationER18GIM_BVH_DATA_ARRAYf: # @_ZN18btQuantizedBvhTree17calc_quantizationER18GIM_BVH_DATA_ARRAYf # %bb.0: ld.w $a2, $a1, 4 - pcalau12i $a3, %pc_hi20(.LCPI0_0) - pcalau12i $a4, %pc_hi20(.LCPI0_1) + lu12i.w $a3, 522239 + ori $a3, $a3, 4095 + lu12i.w $a4, -2049 + ori $a4, $a4, 4095 + lu32i.d $a4, 0 blez $a2, .LBB0_3 # %bb.1: # %.lr.ph ld.d $a1, $a1, 16 - fld.s $fa1, $a4, %pc_lo12(.LCPI0_1) - fld.s $fa5, $a3, %pc_lo12(.LCPI0_0) + movgr2fr.w $fa5, $a3 + movgr2fr.w $fa1, $a4 addi.d $a1, $a1, 16 fmov.s $fa6, $fa5 fmov.s $fa4, $fa5 @@ -52,8 +47,8 @@ _ZN18btQuantizedBvhTree17calc_quantizationER18GIM_BVH_DATA_ARRAYf: # @_ZN18btQua bnez $a2, .LBB0_2 b .LBB0_4 .LBB0_3: - fld.s $fa4, $a3, %pc_lo12(.LCPI0_0) - fld.s $fa3, $a4, %pc_lo12(.LCPI0_1) + movgr2fr.w $fa4, $a3 + movgr2fr.w $fa3, $a4 fmov.s $fa2, $fa3 fmov.s $fa1, $fa3 fmov.s $fa6, $fa4 @@ -79,16 +74,17 @@ _ZN18btQuantizedBvhTree17calc_quantizationER18GIM_BVH_DATA_ARRAYf: # @_ZN18btQua bstrpick.d $a2, $a2, 31, 0 st.d $a1, $a0, 56 st.d $a2, $a0, 64 - pcalau12i $a1, %pc_hi20(.LCPI0_2) - fld.s $fa2, $a1, %pc_lo12(.LCPI0_2) fsub.s $fa1, $fa1, $fa5 - fsub.s $fa3, $fa3, $fa6 + fsub.s $fa2, $fa3, $fa6 fsub.s $fa0, $fa0, $fa4 - fdiv.s $fa1, $fa2, $fa1 - fdiv.s $fa3, $fa2, $fa3 - fdiv.s $fa0, $fa2, $fa0 + lu12i.w $a1, 292863 + ori $a1, $a1, 3840 + movgr2fr.w $fa3, $a1 + fdiv.s $fa1, $fa3, $fa1 + fdiv.s $fa2, $fa3, $fa2 + fdiv.s $fa0, $fa3, $fa0 movfr2gr.s $a1, $fa1 - movfr2gr.s $a2, $fa3 + movfr2gr.s $a2, $fa2 bstrins.d $a1, $a2, 63, 32 movfr2gr.s $a2, $fa0 bstrpick.d $a2, $a2, 31, 0 @@ -346,14 +342,7 @@ _ZN18btQuantizedBvhTree30_sort_and_calc_splitting_indexER18GIM_BVH_DATA_ARRAYiii .Lfunc_end2: .size _ZN18btQuantizedBvhTree30_sort_and_calc_splitting_indexER18GIM_BVH_DATA_ARRAYiii, .Lfunc_end2-_ZN18btQuantizedBvhTree30_sort_and_calc_splitting_indexER18GIM_BVH_DATA_ARRAYiii # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN18btQuantizedBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii -.LCPI3_0: - .word 0x7f7fffff # float 3.40282347E+38 -.LCPI3_1: - .word 0xff7fffff # float -3.40282347E+38 - .text - .globl _ZN18btQuantizedBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii + .globl _ZN18btQuantizedBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii # -- Begin function _ZN18btQuantizedBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii .p2align 5 .type _ZN18btQuantizedBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii,@function _ZN18btQuantizedBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii: # @_ZN18btQuantizedBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii @@ -477,17 +466,20 @@ _ZN18btQuantizedBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii: # @_ZN18btQuan jirl $ra, $ra, 0 move $a2, $s2 move $s2, $a0 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - pcalau12i $a1, %pc_hi20(.LCPI3_1) + lu12i.w $a0, 522239 + ori $a0, $a0, 4095 + lu12i.w $a1, -2049 + ori $a1, $a1, 4095 + lu32i.d $a1, 0 bge $a2, $s0, .LBB3_5 # %bb.3: # %.lr.ph ld.d $a3, $s1, 16 - fld.s $fa0, $a1, %pc_lo12(.LCPI3_1) - fld.s $fa4, $a0, %pc_lo12(.LCPI3_0) - slli.d $a0, $a2, 5 - alsl.d $a0, $a2, $a0, 2 - add.d $a0, $a0, $a3 - addi.d $a0, $a0, 16 + slli.d $a4, $a2, 5 + alsl.d $a4, $a2, $a4, 2 + add.d $a3, $a4, $a3 + movgr2fr.w $fa4, $a0 + movgr2fr.w $fa0, $a1 + addi.d $a0, $a3, 16 fmov.s $fa5, $fa4 fmov.s $fa3, $fa4 fmov.s $fa1, $fa0 @@ -518,8 +510,8 @@ _ZN18btQuantizedBvhTree15_build_sub_treeER18GIM_BVH_DATA_ARRAYii: # @_ZN18btQuan bnez $s4, .LBB3_4 b .LBB3_6 .LBB3_5: - fld.s $fa3, $a0, %pc_lo12(.LCPI3_0) - fld.s $fa2, $a1, %pc_lo12(.LCPI3_1) + movgr2fr.w $fa3, $a0 + movgr2fr.w $fa2, $a1 fmov.s $fa1, $fa2 fmov.s $fa0, $fa2 fmov.s $fa5, $fa3 @@ -759,14 +751,7 @@ _ZN18btQuantizedBvhTree10build_treeER18GIM_BVH_DATA_ARRAY: # @_ZN18btQuantizedBv .size _ZN18btQuantizedBvhTree10build_treeER18GIM_BVH_DATA_ARRAY, .Lfunc_end4-_ZN18btQuantizedBvhTree10build_treeER18GIM_BVH_DATA_ARRAY .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN21btGImpactQuantizedBvh5refitEv -.LCPI5_0: - .word 0x7f7fffff # float 3.40282347E+38 -.LCPI5_1: - .word 0xff7fffff # float -3.40282347E+38 - .text - .globl _ZN21btGImpactQuantizedBvh5refitEv + .globl _ZN21btGImpactQuantizedBvh5refitEv # -- Begin function _ZN21btGImpactQuantizedBvh5refitEv .p2align 5 .type _ZN21btGImpactQuantizedBvh5refitEv,@function _ZN21btGImpactQuantizedBvh5refitEv: # @_ZN21btGImpactQuantizedBvh5refitEv @@ -795,11 +780,14 @@ _ZN21btGImpactQuantizedBvh5refitEv: # @_ZN21btGImpactQuantizedBvh5refitEv # %bb.1: # %.lr.ph move $fp, $a0 ld.d $a0, $a0, 24 - pcalau12i $a1, %pc_hi20(.LCPI5_0) - fld.s $fs0, $a1, %pc_lo12(.LCPI5_0) - pcalau12i $a1, %pc_hi20(.LCPI5_1) - fld.s $fs1, $a1, %pc_lo12(.LCPI5_1) slli.d $s1, $s0, 4 + lu12i.w $a1, 522239 + ori $a1, $a1, 4095 + movgr2fr.w $fs0, $a1 + lu12i.w $a1, -2049 + ori $a1, $a1, 4095 + lu32i.d $a1, 0 + movgr2fr.w $fs1, $a1 ori $s2, $zero, 1 move $s3, $s0 b .LBB5_3 @@ -1918,12 +1906,7 @@ _ZNK21btGImpactQuantizedBvh8rayQueryERK9btVector3S2_R20btAlignedObjectArrayIiE: .size _ZNK21btGImpactQuantizedBvh8rayQueryERK9btVector3S2_R20btAlignedObjectArrayIiE, .Lfunc_end9-_ZNK21btGImpactQuantizedBvh8rayQueryERK9btVector3S2_R20btAlignedObjectArrayIiE .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN21btGImpactQuantizedBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet -.LCPI10_0: - .word 0x358637bd # float 9.99999997E-7 - .text - .globl _ZN21btGImpactQuantizedBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet + .globl _ZN21btGImpactQuantizedBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet # -- Begin function _ZN21btGImpactQuantizedBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet .p2align 5 .type _ZN21btGImpactQuantizedBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet,@function _ZN21btGImpactQuantizedBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet: # @_ZN21btGImpactQuantizedBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet @@ -1962,108 +1945,109 @@ _ZN21btGImpactQuantizedBvh14find_collisionEPS_RK11btTransformS0_S3_R9btPairSet: fneg.s $ft3, $ft3 fmul.s $ft4, $fa7, $ft2 fmadd.s $ft4, $fa2, $ft1, $ft4 - fmadd.s $ft9, $fa5, $ft3, $ft4 - fmul.s $ft4, $fa6, $ft2 - fmadd.s $ft4, $fa1, $ft1, $ft4 - fmadd.s $ft10, $fa3, $ft3, $ft4 + fmadd.s $ft4, $fa5, $ft3, $ft4 + fmul.s $ft5, $fa6, $ft2 + fmadd.s $ft5, $fa1, $ft1, $ft5 + fmadd.s $ft5, $fa3, $ft3, $ft5 fmul.s $ft2, $fa4, $ft2 - fld.s $ft7, $a3, 16 + fld.s $ft6, $a3, 16 fmadd.s $ft1, $fa0, $ft1, $ft2 - fmadd.s $ft11, $ft0, $ft3, $ft1 - fld.s $ft8, $a3, 0 - fmul.s $ft1, $fa7, $ft7 - fld.s $ft12, $a3, 32 - fld.s $ft13, $a3, 20 - fld.s $ft14, $a3, 4 - fmadd.s $ft1, $ft8, $fa2, $ft1 - fmadd.s $ft1, $ft12, $fa5, $ft1 - fmul.s $ft2, $fa7, $ft13 - fmadd.s $ft2, $ft14, $fa2, $ft2 - fld.s $ft15, $a3, 36 - fld.s $fs0, $a3, 24 - fld.s $fs1, $a3, 8 - fld.s $fs2, $a3, 40 - fmadd.s $ft2, $ft15, $fa5, $ft2 - fmul.s $ft3, $fa7, $fs0 - fmadd.s $ft3, $fs1, $fa2, $ft3 - fmadd.s $ft3, $fs2, $fa5, $ft3 - fmul.s $ft4, $fa6, $ft7 - fmadd.s $ft4, $ft8, $fa1, $ft4 - fmadd.s $ft4, $ft12, $fa3, $ft4 - fmul.s $ft5, $fa6, $ft13 - fmadd.s $ft5, $ft14, $fa1, $ft5 - fmadd.s $ft5, $ft15, $fa3, $ft5 - fmul.s $ft6, $fa6, $fs0 - fmadd.s $ft6, $fs1, $fa1, $ft6 - fmadd.s $ft6, $fs2, $fa3, $ft6 - fmul.s $ft7, $fa4, $ft7 - fmadd.s $ft7, $ft8, $fa0, $ft7 - fmadd.s $ft7, $ft12, $ft0, $ft7 - fmul.s $ft8, $fa4, $ft13 - fmadd.s $ft8, $ft14, $fa0, $ft8 - fmadd.s $ft8, $ft15, $ft0, $ft8 - fmul.s $ft12, $fa4, $fs0 - fmadd.s $ft12, $fs1, $fa0, $ft12 - fld.s $ft13, $a3, 52 - fld.s $ft14, $a3, 48 - fld.s $ft15, $a3, 56 - fmadd.s $ft12, $fs2, $ft0, $ft12 - fmul.s $fa7, $fa7, $ft13 - fmadd.s $fa2, $fa2, $ft14, $fa7 - fmadd.s $fa2, $fa5, $ft15, $fa2 - fadd.s $fa2, $ft9, $fa2 - fmul.s $fa5, $fa6, $ft13 - fmadd.s $fa1, $fa1, $ft14, $fa5 - fmadd.s $fa1, $fa3, $ft15, $fa1 - fadd.s $fa1, $ft10, $fa1 - fmul.s $fa3, $fa4, $ft13 - fmadd.s $fa0, $fa0, $ft14, $fa3 - fmadd.s $fa0, $ft0, $ft15, $fa0 - fadd.s $fa0, $ft11, $fa0 + fmadd.s $ft2, $ft0, $ft3, $ft1 + fld.s $ft3, $a3, 0 + fmul.s $ft1, $fa7, $ft6 + fld.s $ft7, $a3, 32 + fld.s $ft8, $a3, 20 + fld.s $ft9, $a3, 4 + fmadd.s $ft1, $ft3, $fa2, $ft1 + fmadd.s $ft1, $ft7, $fa5, $ft1 + fmul.s $ft10, $fa7, $ft8 + fmadd.s $ft10, $ft9, $fa2, $ft10 + fld.s $ft11, $a3, 36 + fld.s $ft12, $a3, 24 + fld.s $ft13, $a3, 8 + fld.s $ft14, $a3, 40 + fmadd.s $ft10, $ft11, $fa5, $ft10 + fmul.s $ft15, $fa7, $ft12 + fmadd.s $ft15, $ft13, $fa2, $ft15 + fmadd.s $ft15, $ft14, $fa5, $ft15 + fmul.s $fs0, $fa6, $ft6 + fmadd.s $fs0, $ft3, $fa1, $fs0 + fmadd.s $fs0, $ft7, $fa3, $fs0 + fmul.s $fs1, $fa6, $ft8 + fmadd.s $fs1, $ft9, $fa1, $fs1 + fmadd.s $fs1, $ft11, $fa3, $fs1 + fmul.s $fs2, $fa6, $ft12 + fmadd.s $fs2, $ft13, $fa1, $fs2 + fmadd.s $fs2, $ft14, $fa3, $fs2 + fmul.s $ft6, $fa4, $ft6 + fmadd.s $ft3, $ft3, $fa0, $ft6 + fmadd.s $ft3, $ft7, $ft0, $ft3 + fmul.s $ft6, $fa4, $ft8 + fmadd.s $ft6, $ft9, $fa0, $ft6 + fmadd.s $ft6, $ft11, $ft0, $ft6 + fmul.s $ft7, $fa4, $ft12 + fmadd.s $ft7, $ft13, $fa0, $ft7 + fld.s $ft8, $a3, 52 + fld.s $ft9, $a3, 48 + fld.s $ft11, $a3, 56 + fmadd.s $ft7, $ft14, $ft0, $ft7 + fmul.s $fa7, $fa7, $ft8 + fmadd.s $fa2, $fa2, $ft9, $fa7 + fmadd.s $fa2, $fa5, $ft11, $fa2 + fadd.s $fa2, $ft4, $fa2 + fmul.s $fa5, $fa6, $ft8 + fmadd.s $fa1, $fa1, $ft9, $fa5 + fmadd.s $fa1, $fa3, $ft11, $fa1 + fadd.s $fa1, $ft5, $fa1 + fmul.s $fa3, $fa4, $ft8 + fmadd.s $fa0, $fa0, $ft9, $fa3 + fmadd.s $fa0, $ft0, $ft11, $fa0 + fadd.s $fa0, $ft2, $fa0 fst.s $fa2, $sp, 16 fst.s $fa1, $sp, 20 fst.s $fa0, $sp, 24 st.w $zero, $sp, 28 fst.s $ft1, $sp, 32 - fst.s $ft2, $sp, 36 - fst.s $ft3, $sp, 40 + fst.s $ft10, $sp, 36 + fst.s $ft15, $sp, 40 st.w $zero, $sp, 44 - fst.s $ft4, $sp, 48 - fst.s $ft5, $sp, 52 - fst.s $ft6, $sp, 56 + fst.s $fs0, $sp, 48 + fst.s $fs1, $sp, 52 + fst.s $fs2, $sp, 56 st.w $zero, $sp, 60 - fst.s $ft7, $sp, 64 - fst.s $ft8, $sp, 68 - pcalau12i $a1, %pc_hi20(.LCPI10_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI10_0) - fst.s $ft12, $sp, 72 + fst.s $ft3, $sp, 64 + fst.s $ft6, $sp, 68 + fst.s $ft7, $sp, 72 st.w $zero, $sp, 76 - fabs.s $fa1, $ft1 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 80 - fabs.s $fa1, $ft2 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 84 - fabs.s $fa1, $ft3 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 88 - fabs.s $fa1, $ft4 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 96 - fabs.s $fa1, $ft5 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 100 - fabs.s $fa1, $ft6 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 104 - fabs.s $fa1, $ft7 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 112 - fabs.s $fa1, $ft8 - fadd.s $fa1, $fa1, $fa0 - fst.s $fa1, $sp, 116 - fabs.s $fa1, $ft12 - fadd.s $fa0, $fa1, $fa0 + fabs.s $fa0, $ft1 + lu12i.w $a1, 219235 + ori $a1, $a1, 1981 + movgr2fr.w $fa1, $a1 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 80 + fabs.s $fa0, $ft10 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 84 + fabs.s $fa0, $ft15 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 88 + fabs.s $fa0, $fs0 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 96 + fabs.s $fa0, $fs1 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 100 + fabs.s $fa0, $fs2 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 104 + fabs.s $fa0, $ft3 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 112 + fabs.s $fa0, $ft6 + fadd.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 116 + fabs.s $fa0, $ft7 + fadd.s $fa0, $fa0, $fa1 fst.s $fa0, $sp, 120 addi.d $a3, $sp, 16 ori $a6, $zero, 1 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactShape.s index f711bba6..bb435163 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGImpactShape.s @@ -2649,14 +2649,8 @@ _ZN18btGImpactMeshShape9setMarginEf: # @_ZN18btGImpactMeshShape9setMarginEf .size _ZN18btGImpactMeshShape9setMarginEf, .Lfunc_end61-_ZN18btGImpactMeshShape9setMarginEf .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN18btGImpactMeshShape13calcLocalAABBEv -.LCPI62_0: - .word 0x7f7fffff # float 3.40282347E+38 -.LCPI62_1: - .word 0xff7fffff # float -3.40282347E+38 .section .text._ZN18btGImpactMeshShape13calcLocalAABBEv,"axG",@progbits,_ZN18btGImpactMeshShape13calcLocalAABBEv,comdat - .weak _ZN18btGImpactMeshShape13calcLocalAABBEv + .weak _ZN18btGImpactMeshShape13calcLocalAABBEv # -- Begin function _ZN18btGImpactMeshShape13calcLocalAABBEv .p2align 5 .type _ZN18btGImpactMeshShape13calcLocalAABBEv,@function _ZN18btGImpactMeshShape13calcLocalAABBEv: # @_ZN18btGImpactMeshShape13calcLocalAABBEv @@ -2678,42 +2672,41 @@ _ZN18btGImpactMeshShape13calcLocalAABBEv: # @_ZN18btGImpactMeshShape13calcLocalA .cfi_offset 25, -40 .cfi_offset 26, -48 .cfi_offset 27, -56 + lu12i.w $a1, 522239 + ori $a1, $a1, 4095 move $fp, $a0 - lu12i.w $a0, 522239 - ori $a0, $a0, 4095 - st.w $a0, $fp, 36 - bstrins.d $a0, $a0, 62, 32 + move $a0, $a1 + bstrins.d $a0, $a1, 62, 32 st.d $a0, $fp, 28 + st.w $a1, $fp, 36 lu12i.w $a0, -2049 - ori $a0, $a0, 4095 + ori $a2, $a0, 4095 ld.w $s1, $fp, 188 - lu52i.d $a1, $a0, -9 - st.d $a1, $fp, 44 - lu32i.d $a0, 0 - st.w $a0, $fp, 52 + lu52i.d $a0, $a2, -9 + st.d $a0, $fp, 44 + lu32i.d $a2, 0 + st.w $a2, $fp, 52 beqz $s1, .LBB62_5 # %bb.1: # %.lr.ph addi.d $s2, $fp, 28 - pcalau12i $a0, %pc_hi20(.LCPI62_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI62_0) - pcalau12i $a0, %pc_hi20(.LCPI62_1) - fld.s $fa1, $a0, %pc_lo12(.LCPI62_1) - ld.d $a0, $fp, 200 addi.d $s3, $fp, 44 - slli.d $a1, $s1, 3 - addi.d $s4, $a1, -8 - fmov.s $fa4, $fa1 - fmov.s $fa2, $fa1 + ld.d $a0, $fp, 200 + slli.d $a3, $s1, 3 + movgr2fr.w $fa0, $a1 + movgr2fr.w $fa3, $a2 + addi.d $s4, $a3, -8 + fmov.s $fa4, $fa3 + fmov.s $fa1, $fa3 fmov.s $fa5, $fa0 - fmov.s $fa3, $fa0 + fmov.s $fa2, $fa0 b .LBB62_3 .p2align 4, , 16 .LBB62_2: # %_ZN23btGImpactShapeInterface11updateBoundEv.exit # in Loop: Header=BB62_3 Depth=1 fld.s $fa6, $s0, 28 - fcmp.clt.s $fcc0, $fa6, $fa3 - fsel $fa3, $fa3, $fa6, $fcc0 - fst.s $fa3, $fp, 28 + fcmp.clt.s $fcc0, $fa6, $fa2 + fsel $fa2, $fa2, $fa6, $fcc0 + fst.s $fa2, $fp, 28 fld.s $fa6, $s0, 32 fcmp.clt.s $fcc0, $fa6, $fa5 addi.d $a1, $s0, 28 @@ -2732,9 +2725,9 @@ _ZN18btGImpactMeshShape13calcLocalAABBEv: # @_ZN18btGImpactMeshShape13calcLocalA fld.s $fa0, $a1, 8 fst.s $fa0, $fp, 36 fld.s $fa6, $s0, 44 - fcmp.clt.s $fcc0, $fa2, $fa6 - fsel $fa2, $fa2, $fa6, $fcc0 - fst.s $fa2, $fp, 44 + fcmp.clt.s $fcc0, $fa1, $fa6 + fsel $fa1, $fa1, $fa6, $fcc0 + fst.s $fa1, $fp, 44 fld.s $fa6, $s0, 48 fcmp.clt.s $fcc0, $fa4, $fa6 addi.d $a1, $s0, 44 @@ -2745,14 +2738,14 @@ _ZN18btGImpactMeshShape13calcLocalAABBEv: # @_ZN18btGImpactMeshShape13calcLocalA fld.s $fa4, $a2, 4 fst.s $fa4, $fp, 48 fld.s $fa6, $s0, 52 - fcmp.clt.s $fcc0, $fa1, $fa6 + fcmp.clt.s $fcc0, $fa3, $fa6 movcf2gr $a2, $fcc0 masknez $a3, $s3, $a2 maskeqz $a1, $a1, $a2 or $a1, $a1, $a3 - fld.s $fa1, $a1, 8 + fld.s $fa3, $a1, 8 addi.d $s1, $s1, -1 - fst.s $fa1, $fp, 52 + fst.s $fa3, $fp, 52 addi.d $s4, $s4, -8 beqz $s1, .LBB62_5 .LBB62_3: # =>This Inner Loop Header: Depth=1 @@ -2766,12 +2759,12 @@ _ZN18btGImpactMeshShape13calcLocalAABBEv: # @_ZN18btGImpactMeshShape13calcLocalA jirl $ra, $a1, 0 st.b $zero, $s0, 60 ld.d $a0, $fp, 200 - fld.s $fa3, $fp, 28 + fld.s $fa2, $fp, 28 fld.s $fa5, $fp, 32 fld.s $fa0, $fp, 36 - fld.s $fa2, $fp, 44 + fld.s $fa1, $fp, 44 fld.s $fa4, $fp, 48 - fld.s $fa1, $fp, 52 + fld.s $fa3, $fp, 52 ldx.d $s0, $a0, $s4 b .LBB62_2 .LBB62_5: # %._crit_edge diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGeneric6DofConstraint.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGeneric6DofConstraint.s index 0fe67fca..e003c582 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGeneric6DofConstraint.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGeneric6DofConstraint.s @@ -345,14 +345,7 @@ _ZN22btRotationalLimitMotor14testLimitValueEf: # @_ZN22btRotationalLimitMotor14t .Lfunc_end4: .size _ZN22btRotationalLimitMotor14testLimitValueEf, .Lfunc_end4-_ZN22btRotationalLimitMotor14testLimitValueEf # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN22btRotationalLimitMotor18solveAngularLimitsEfR9btVector3fP11btRigidBodyR12btSolverBodyS3_S5_ -.LCPI5_0: - .word 0x34000000 # float 1.1920929E-7 -.LCPI5_1: - .word 0x5d5e0b6b # float 9.99999984E+17 - .text - .globl _ZN22btRotationalLimitMotor18solveAngularLimitsEfR9btVector3fP11btRigidBodyR12btSolverBodyS3_S5_ + .globl _ZN22btRotationalLimitMotor18solveAngularLimitsEfR9btVector3fP11btRigidBodyR12btSolverBodyS3_S5_ # -- Begin function _ZN22btRotationalLimitMotor18solveAngularLimitsEfR9btVector3fP11btRigidBodyR12btSolverBodyS3_S5_ .p2align 5 .type _ZN22btRotationalLimitMotor18solveAngularLimitsEfR9btVector3fP11btRigidBodyR12btSolverBodyS3_S5_,@function _ZN22btRotationalLimitMotor18solveAngularLimitsEfR9btVector3fP11btRigidBodyR12btSolverBodyS3_S5_: # @_ZN22btRotationalLimitMotor18solveAngularLimitsEfR9btVector3fP11btRigidBodyR12btSolverBodyS3_S5_ @@ -414,23 +407,23 @@ _ZN22btRotationalLimitMotor18solveAngularLimitsEfR9btVector3fP11btRigidBodyR12bt fadd.s $fa7, $ft4, $ft5 .LBB5_10: # %_ZNK12btSolverBody18getAngularVelocityER9btVector3.exit63 fsub.s $fa6, $fa6, $ft1 + fld.s $ft1, $a1, 4 fsub.s $fa5, $fa5, $ft0 - fld.s $ft0, $a1, 4 fsub.s $fa4, $fa4, $fa7 fld.s $fa7, $a1, 0 - fld.s $ft1, $a1, 8 - fmul.s $fa5, $fa5, $ft0 - fld.s $ft0, $a0, 20 + fmul.s $fa5, $fa5, $ft1 + fld.s $ft0, $a1, 8 + fld.s $ft1, $a0, 20 fmadd.s $fa5, $fa7, $fa6, $fa5 - fmadd.s $fa4, $ft1, $fa4, $fa5 - fld.s $fa5, $a0, 24 - fneg.s $fa6, $ft0 - pcalau12i $a7, %pc_hi20(.LCPI5_0) - fld.s $fa7, $a7, %pc_lo12(.LCPI5_0) - fmadd.s $fa3, $fa6, $fa4, $fa3 - fmul.s $fa3, $fa5, $fa3 + fld.s $fa6, $a0, 24 + fmadd.s $fa4, $ft0, $fa4, $fa5 + fneg.s $fa5, $ft1 + fmadd.s $fa3, $fa5, $fa4, $fa3 + fmul.s $fa3, $fa6, $fa3 fabs.s $fa4, $fa3 - fcmp.clt.s $fcc0, $fa4, $fa7 + lu12i.w $a7, 212992 + movgr2fr.w $fa5, $a7 + fcmp.clt.s $fcc0, $fa4, $fa5 bceqz $fcc0, .LBB5_12 # %bb.11: ret @@ -454,11 +447,12 @@ _ZN22btRotationalLimitMotor18solveAngularLimitsEfR9btVector3fP11btRigidBodyR12bt .LBB5_15: fsel $fa0, $fa0, $fa2, $fcc0 fld.s $fa2, $a0, 52 - pcalau12i $a6, %pc_hi20(.LCPI5_1) - fld.s $fa3, $a6, %pc_lo12(.LCPI5_1) fadd.s $fa0, $fa0, $fa2 - fabs.s $fa4, $fa0 - fcmp.clt.s $fcc0, $fa3, $fa4 + fabs.s $fa3, $fa0 + lu12i.w $a6, 382432 + ori $a6, $a6, 2923 + movgr2fr.w $fa4, $a6 + fcmp.clt.s $fcc0, $fa4, $fa3 fsel $fa0, $fa0, $fa1, $fcc0 fst.s $fa0, $a0, 52 fsub.s $fa0, $fa0, $fa2 @@ -610,14 +604,7 @@ _ZN25btTranslationalLimitMotor14testLimitValueEif: # @_ZN25btTranslationalLimitM .Lfunc_end6: .size _ZN25btTranslationalLimitMotor14testLimitValueEif, .Lfunc_end6-_ZN25btTranslationalLimitMotor14testLimitValueEif # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN25btTranslationalLimitMotor15solveLinearAxisEffR11btRigidBodyR12btSolverBodyRK9btVector3S1_S3_S6_iS6_S6_ -.LCPI7_0: - .word 0xdd5e0b6b # float -9.99999984E+17 -.LCPI7_1: - .word 0x5d5e0b6b # float 9.99999984E+17 - .text - .globl _ZN25btTranslationalLimitMotor15solveLinearAxisEffR11btRigidBodyR12btSolverBodyRK9btVector3S1_S3_S6_iS6_S6_ + .globl _ZN25btTranslationalLimitMotor15solveLinearAxisEffR11btRigidBodyR12btSolverBodyRK9btVector3S1_S3_S6_iS6_S6_ # -- Begin function _ZN25btTranslationalLimitMotor15solveLinearAxisEffR11btRigidBodyR12btSolverBodyRK9btVector3S1_S3_S6_iS6_S6_ .p2align 5 .type _ZN25btTranslationalLimitMotor15solveLinearAxisEffR11btRigidBodyR12btSolverBodyRK9btVector3S1_S3_S6_iS6_S6_,@function _ZN25btTranslationalLimitMotor15solveLinearAxisEffR11btRigidBodyR12btSolverBodyRK9btVector3S1_S3_S6_iS6_S6_: # @_ZN25btTranslationalLimitMotor15solveLinearAxisEffR11btRigidBodyR12btSolverBodyRK9btVector3S1_S3_S6_iS6_S6_ @@ -717,40 +704,43 @@ _ZN25btTranslationalLimitMotor15solveLinearAxisEffR11btRigidBodyR12btSolverBodyR fld.s $ft7, $t0, 4 fld.s $ft8, $a3, 0 fld.s $ft9, $a6, 0 - fld.s $ft11, $a3, 4 - fld.s $ft12, $a6, 4 - fld.s $ft13, $a3, 8 - fld.s $ft14, $a6, 8 - fld.s $ft10, $t0, 8 - fsub.s $ft15, $ft8, $ft9 - fsub.s $ft8, $ft11, $ft12 - fsub.s $ft11, $ft13, $ft14 - fmul.s $ft14, $ft7, $ft8 + fld.s $ft10, $a3, 4 + fld.s $ft11, $a6, 4 + fld.s $ft12, $a3, 8 + fld.s $ft13, $a6, 8 + fsub.s $ft8, $ft8, $ft9 + fld.s $ft9, $t0, 8 + fsub.s $ft10, $ft10, $ft11 + fsub.s $ft11, $ft12, $ft13 + fmul.s $ft10, $ft7, $ft10 + fmadd.s $ft8, $ft8, $ft6, $ft10 + fnmadd.s $ft10, $ft11, $ft9, $ft8 alsl.d $a3, $a7, $a0, 2 slli.d $a6, $a7, 2 fldx.s $ft12, $a0, $a6 fld.s $ft13, $a3, 16 - pcalau12i $a3, %pc_hi20(.LCPI7_0) - fld.s $ft8, $a3, %pc_lo12(.LCPI7_0) - pcalau12i $a3, %pc_hi20(.LCPI7_1) - fld.s $ft9, $a3, %pc_lo12(.LCPI7_1) - fmadd.s $ft14, $ft15, $ft6, $ft14 + lu12i.w $a3, 382432 + ori $a3, $a3, 2923 + movgr2fr.w $ft8, $a3 + lu12i.w $a3, -141856 + ori $a3, $a3, 2923 + lu32i.d $a3, 0 fcmp.cule.s $fcc0, $ft13, $ft12 - fnmadd.s $ft11, $ft11, $ft10, $ft14 + movgr2fr.w $ft11, $a3 bcnez $fcc0, .LBB7_9 # %bb.5: - fcmp.cule.s $fcc0, $ft11, $ft13 + fcmp.cule.s $fcc0, $ft10, $ft13 bcnez $fcc0, .LBB7_7 # %bb.6: - fsub.s $ft11, $ft11, $ft13 - movgr2fr.w $ft8, $zero + fsub.s $ft10, $ft10, $ft13 + movgr2fr.w $ft11, $zero b .LBB7_9 .LBB7_7: - fcmp.cule.s $fcc0, $ft12, $ft11 + fcmp.cule.s $fcc0, $ft12, $ft10 bcnez $fcc0, .LBB7_10 # %bb.8: - fsub.s $ft11, $ft11, $ft12 - movgr2fr.w $ft9, $zero + fsub.s $ft10, $ft10, $ft12 + movgr2fr.w $ft8, $zero .LBB7_9: fsub.s $ft3, $ft3, $ft5 fsub.s $ft2, $ft2, $ft4 @@ -758,10 +748,10 @@ _ZN25btTranslationalLimitMotor15solveLinearAxisEffR11btRigidBodyR12btSolverBodyR fmul.s $ft1, $ft2, $ft7 fmadd.s $ft1, $ft6, $ft3, $ft1 fld.s $ft2, $a0, 56 - fmadd.s $ft0, $ft10, $ft0, $ft1 + fmadd.s $ft0, $ft9, $ft0, $ft1 fld.s $ft1, $a0, 52 fld.s $ft3, $a0, 48 - fmul.s $ft2, $ft11, $ft2 + fmul.s $ft2, $ft10, $ft2 fdiv.s $fa0, $ft2, $fa0 fneg.s $ft1, $ft1 alsl.d $a0, $a7, $a0, 2 @@ -770,10 +760,10 @@ _ZN25btTranslationalLimitMotor15solveLinearAxisEffR11btRigidBodyR12btSolverBodyR fmul.s $fa0, $ft3, $fa0 fmul.s $fa0, $fa1, $fa0 fadd.s $fa0, $ft2, $fa0 - fcmp.clt.s $fcc0, $fa0, $ft8 + fcmp.clt.s $fcc0, $fa0, $ft11 movgr2fr.w $fa1, $zero fsel $ft0, $fa0, $fa1, $fcc0 - fcmp.clt.s $fcc0, $ft9, $fa0 + fcmp.clt.s $fcc0, $ft8, $fa0 fsel $fa0, $ft0, $fa1, $fcc0 fst.s $fa0, $a0, 32 fld.s $ft0, $t0, 4 @@ -1817,18 +1807,8 @@ _ZN23btGeneric6DofConstraint21testAngularLimitMotorEi: # @_ZN23btGeneric6DofCons .size _ZN23btGeneric6DofConstraint21testAngularLimitMotorEi, .Lfunc_end14-_ZN23btGeneric6DofConstraint21testAngularLimitMotorEi .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z21btAdjustAngleToLimitsfff -.LCPI15_0: - .word 0x40c90fdb # float 6.28318548 -.LCPI15_1: - .word 0xc0490fdb # float -3.14159274 -.LCPI15_2: - .word 0x40490fdb # float 3.14159274 -.LCPI15_3: - .word 0xc0c90fdb # float -6.28318548 .section .text._Z21btAdjustAngleToLimitsfff,"axG",@progbits,_Z21btAdjustAngleToLimitsfff,comdat - .weak _Z21btAdjustAngleToLimitsfff + .weak _Z21btAdjustAngleToLimitsfff # -- Begin function _Z21btAdjustAngleToLimitsfff .p2align 5 .type _Z21btAdjustAngleToLimitsfff,@function _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff @@ -1861,14 +1841,17 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff # %bb.2: fmov.s $fs4, $fa2 fsub.s $fa0, $fa1, $fs0 - pcalau12i $a0, %pc_hi20(.LCPI15_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI15_0) + lu12i.w $a0, 265360 + ori $a0, $a0, 4059 + movgr2fr.w $fs1, $a0 fmov.s $fa1, $fs1 pcaddu18i $ra, %call36(fmodf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI15_1) - fld.s $fs3, $a0, %pc_lo12(.LCPI15_1) fmov.s $fs2, $fa0 + lu12i.w $a0, -260976 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fs3, $a0 fcmp.cule.s $fcc0, $fs3, $fa0 bcnez $fcc0, .LBB15_7 # %bb.3: @@ -1880,29 +1863,35 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff # %bb.5: fmov.s $fs4, $fa1 fsub.s $fa0, $fs0, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI15_0) - fld.s $fs2, $a0, %pc_lo12(.LCPI15_0) + lu12i.w $a0, 265360 + ori $a0, $a0, 4059 + movgr2fr.w $fs2, $a0 fmov.s $fa1, $fs2 pcaddu18i $ra, %call36(fmodf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI15_1) - fld.s $fs3, $a0, %pc_lo12(.LCPI15_1) fmov.s $fs1, $fa0 + lu12i.w $a0, -260976 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fs3, $a0 fcmp.cule.s $fcc0, $fs3, $fa0 - pcalau12i $s0, %pc_hi20(.LCPI15_2) - pcalau12i $fp, %pc_hi20(.LCPI15_3) + lu12i.w $s0, 263312 + lu12i.w $fp, -258928 bcnez $fcc0, .LBB15_14 # %bb.6: fadd.s $fs1, $fs1, $fs2 b .LBB15_16 .LBB15_7: - pcalau12i $a0, %pc_hi20(.LCPI15_2) - fld.s $fa0, $a0, %pc_lo12(.LCPI15_2) + lu12i.w $a0, 263312 + ori $a0, $a0, 4059 + movgr2fr.w $fa0, $a0 fcmp.cule.s $fcc0, $fs2, $fa0 bcnez $fcc0, .LBB15_9 # %bb.8: - pcalau12i $a0, %pc_hi20(.LCPI15_3) - fld.s $fa0, $a0, %pc_lo12(.LCPI15_3) + lu12i.w $a0, -258928 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 fadd.s $fs2, $fs2, $fa0 .LBB15_9: # %_Z16btNormalizeAnglef.exit fsub.s $fa0, $fs4, $fs0 @@ -1915,13 +1904,16 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff fadd.s $fa0, $fa0, $fs1 b .LBB15_13 .LBB15_11: - pcalau12i $a0, %pc_hi20(.LCPI15_2) - fld.s $fa1, $a0, %pc_lo12(.LCPI15_2) + lu12i.w $a0, 263312 + ori $a0, $a0, 4059 + movgr2fr.w $fa1, $a0 fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB15_13 # %bb.12: - pcalau12i $a0, %pc_hi20(.LCPI15_3) - fld.s $fa1, $a0, %pc_lo12(.LCPI15_3) + lu12i.w $a0, -258928 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 fadd.s $fa0, $fa0, $fa1 .LBB15_13: # %_Z16btNormalizeAnglef.exit29 fabs.s $fa0, $fa0 @@ -1930,11 +1922,14 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff fsel $fs0, $fa1, $fs0, $fcc0 b .LBB15_21 .LBB15_14: - fld.s $fa0, $s0, %pc_lo12(.LCPI15_2) + ori $a0, $s0, 4059 + movgr2fr.w $fa0, $a0 fcmp.cule.s $fcc0, $fs1, $fa0 bcnez $fcc0, .LBB15_16 # %bb.15: - fld.s $fa0, $fp, %pc_lo12(.LCPI15_3) + ori $a0, $fp, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 fadd.s $fs1, $fs1, $fa0 .LBB15_16: # %_Z16btNormalizeAnglef.exit31 fsub.s $fa0, $fs0, $fs4 @@ -1947,15 +1942,20 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff fadd.s $fa0, $fa0, $fs2 b .LBB15_20 .LBB15_18: - fld.s $fa1, $s0, %pc_lo12(.LCPI15_2) + ori $a0, $s0, 4059 + movgr2fr.w $fa1, $a0 fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB15_20 # %bb.19: - fld.s $fa1, $fp, %pc_lo12(.LCPI15_3) + ori $a0, $fp, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 fadd.s $fa0, $fa0, $fa1 .LBB15_20: # %_Z16btNormalizeAnglef.exit33 - fld.s $fa1, $fp, %pc_lo12(.LCPI15_3) fabs.s $fa0, $fa0 + ori $a0, $fp, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 fadd.s $fa1, $fs0, $fa1 fcmp.clt.s $fcc0, $fa0, $fs1 fsel $fs0, $fs0, $fa1, $fcc0 @@ -3030,14 +3030,7 @@ _ZN23btGeneric6DofConstraint16setAngularLimitsEPN17btTypedConstraint17btConstrai .size _ZN23btGeneric6DofConstraint16setAngularLimitsEPN17btTypedConstraint17btConstraintInfo2EiRK11btTransformS5_RK9btVector3S8_S8_S8_, .Lfunc_end23-_ZN23btGeneric6DofConstraint16setAngularLimitsEPN17btTypedConstraint17btConstraintInfo2EiRK11btTransformS5_RK9btVector3S8_S8_S8_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN23btGeneric6DofConstraint21get_limit_motor_info2EP22btRotationalLimitMotorRK11btTransformS4_RK9btVector3S7_S7_S7_PN17btTypedConstraint17btConstraintInfo2EiRS5_i -.LCPI24_0: - .word 0xff7fffff # float -3.40282347E+38 -.LCPI24_1: - .word 0x7f7fffff # float 3.40282347E+38 - .text - .globl _ZN23btGeneric6DofConstraint21get_limit_motor_info2EP22btRotationalLimitMotorRK11btTransformS4_RK9btVector3S7_S7_S7_PN17btTypedConstraint17btConstraintInfo2EiRS5_i + .globl _ZN23btGeneric6DofConstraint21get_limit_motor_info2EP22btRotationalLimitMotorRK11btTransformS4_RK9btVector3S7_S7_S7_PN17btTypedConstraint17btConstraintInfo2EiRS5_i # -- Begin function _ZN23btGeneric6DofConstraint21get_limit_motor_info2EP22btRotationalLimitMotorRK11btTransformS4_RK9btVector3S7_S7_S7_PN17btTypedConstraint17btConstraintInfo2EiRS5_i .p2align 5 .type _ZN23btGeneric6DofConstraint21get_limit_motor_info2EP22btRotationalLimitMotorRK11btTransformS4_RK9btVector3S7_S7_S7_PN17btTypedConstraint17btConstraintInfo2EiRS5_i,@function _ZN23btGeneric6DofConstraint21get_limit_motor_info2EP22btRotationalLimitMotorRK11btTransformS4_RK9btVector3S7_S7_S7_PN17btTypedConstraint17btConstraintInfo2EiRS5_i: # @_ZN23btGeneric6DofConstraint21get_limit_motor_info2EP22btRotationalLimitMotorRK11btTransformS4_RK9btVector3S7_S7_S7_PN17btTypedConstraint17btConstraintInfo2EiRS5_i @@ -3241,19 +3234,22 @@ _ZN23btGeneric6DofConstraint21get_limit_motor_info2EP22btRotationalLimitMotorRK1 addi.d $a0, $t1, -1 sltui $a0, $a0, 1 ld.d $a3, $s0, 64 - pcalau12i $t4, %pc_hi20(.LCPI24_0) - fld.s $fa0, $t4, %pc_lo12(.LCPI24_0) - pcalau12i $t4, %pc_hi20(.LCPI24_1) - fld.s $fa2, $t4, %pc_lo12(.LCPI24_1) - movgr2fr.w $fa1, $zero + lu12i.w $t4, -2049 + ori $t4, $t4, 4095 + lu32i.d $t4, 0 + movgr2fr.w $fa1, $t4 + movgr2fr.w $fa0, $zero movgr2cf $fcc0, $a0 - ld.d $a0, $s0, 72 - fsel $fa0, $fa0, $fa1, $fcc0 - fsel $fa2, $fa1, $fa2, $fcc0 - fstx.s $fa0, $a3, $fp - fstx.s $fa2, $a0, $fp - fld.s $fa0, $a1, 32 - fcmp.cule.s $fcc0, $fa0, $fa1 + fsel $fa1, $fa1, $fa0, $fcc0 + lu12i.w $a0, 522239 + ori $a0, $a0, 4095 + ld.d $t4, $s0, 72 + movgr2fr.w $fa2, $a0 + fsel $fa2, $fa0, $fa2, $fcc0 + fstx.s $fa1, $a3, $fp + fstx.s $fa2, $t4, $fp + fld.s $fa1, $a1, 32 + fcmp.cule.s $fcc0, $fa1, $fa0 ori $a0, $zero, 1 bcnez $fcc0, .LBB24_17 # %bb.19: @@ -3282,23 +3278,23 @@ _ZN23btGeneric6DofConstraint21get_limit_motor_info2EP22btRotationalLimitMotorRK1 fsub.s $fa2, $fa5, $fa2 bne $t1, $a0, .LBB24_22 # %bb.20: - fcmp.cule.s $fcc0, $fa1, $fa2 + fcmp.cule.s $fcc0, $fa0, $fa2 bcnez $fcc0, .LBB24_17 # %bb.21: - fldx.s $fa1, $a2, $fp - fneg.s $fa0, $fa0 + fldx.s $fa3, $a2, $fp + fneg.s $fa0, $fa1 fmul.s $fa0, $fa2, $fa0 - fcmp.cule.s $fcc0, $fa0, $fa1 + fcmp.cule.s $fcc0, $fa0, $fa3 bceqz $fcc0, .LBB24_24 b .LBB24_17 .LBB24_22: - fcmp.cule.s $fcc0, $fa2, $fa1 + fcmp.cule.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB24_17 # %bb.23: - fldx.s $fa1, $a2, $fp - fneg.s $fa0, $fa0 + fldx.s $fa3, $a2, $fp + fneg.s $fa0, $fa1 fmul.s $fa0, $fa2, $fa0 - fcmp.cule.s $fcc0, $fa1, $fa0 + fcmp.cule.s $fcc0, $fa3, $fa0 bcnez $fcc0, .LBB24_17 .LBB24_24: alsl.d $a1, $t0, $a2, 2 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGeometryUtil.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGeometryUtil.s index a850491b..1fe703c3 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGeometryUtil.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGeometryUtil.s @@ -91,12 +91,7 @@ _ZN14btGeometryUtil22areVerticesBehindPlaneERK9btVector3RK20btAlignedObjectArray .Lfunc_end2: .size _ZN14btGeometryUtil22areVerticesBehindPlaneERK9btVector3RK20btAlignedObjectArrayIS0_Ef, .Lfunc_end2-_ZN14btGeometryUtil22areVerticesBehindPlaneERK9btVector3RK20btAlignedObjectArrayIS0_Ef # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z8notExistRK9btVector3RK20btAlignedObjectArrayIS_E -.LCPI3_0: - .word 0x3f7fbe77 # float 0.999000012 - .text - .globl _Z8notExistRK9btVector3RK20btAlignedObjectArrayIS_E + .globl _Z8notExistRK9btVector3RK20btAlignedObjectArrayIS_E # -- Begin function _Z8notExistRK9btVector3RK20btAlignedObjectArrayIS_E .p2align 5 .type _Z8notExistRK9btVector3RK20btAlignedObjectArrayIS_E,@function _Z8notExistRK9btVector3RK20btAlignedObjectArrayIS_E: # @_Z8notExistRK9btVector3RK20btAlignedObjectArrayIS_E @@ -108,10 +103,11 @@ _Z8notExistRK9btVector3RK20btAlignedObjectArrayIS_E: # @_Z8notExistRK9btVector3R fld.s $fa0, $a0, 0 fld.s $fa1, $a0, 4 fld.s $fa2, $a0, 8 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.s $fa3, $a0, %pc_lo12(.LCPI3_0) addi.d $a2, $a2, -1 addi.d $a1, $a1, 8 + lu12i.w $a0, 260091 + ori $a0, $a0, 3703 + movgr2fr.w $fa3, $a0 .p2align 4, , 16 .LBB3_2: # =>This Inner Loop Header: Depth=1 fld.s $fa4, $a1, -4 @@ -137,42 +133,33 @@ _Z8notExistRK9btVector3RK20btAlignedObjectArrayIS_E: # @_Z8notExistRK9btVector3R .Lfunc_end3: .size _Z8notExistRK9btVector3RK20btAlignedObjectArrayIS_E, .Lfunc_end3-_Z8notExistRK9btVector3RK20btAlignedObjectArrayIS_E # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVector3ES3_ -.LCPI4_0: - .word 0x38d1b717 # float 9.99999974E-5 -.LCPI4_1: - .word 0x3f7fbe77 # float 0.999000012 -.LCPI4_2: - .word 0x3c23d70a # float 0.00999999977 - .text - .globl _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVector3ES3_ + .globl _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVector3ES3_ # -- Begin function _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVector3ES3_ .p2align 5 .type _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVector3ES3_,@function _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVector3ES3_: # @_ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVector3ES3_ .cfi_startproc # %bb.0: - addi.d $sp, $sp, -240 - .cfi_def_cfa_offset 240 - st.d $ra, $sp, 232 # 8-byte Folded Spill - st.d $fp, $sp, 224 # 8-byte Folded Spill - st.d $s0, $sp, 216 # 8-byte Folded Spill - st.d $s1, $sp, 208 # 8-byte Folded Spill - st.d $s2, $sp, 200 # 8-byte Folded Spill - st.d $s3, $sp, 192 # 8-byte Folded Spill - st.d $s4, $sp, 184 # 8-byte Folded Spill - st.d $s5, $sp, 176 # 8-byte Folded Spill - st.d $s6, $sp, 168 # 8-byte Folded Spill - st.d $s7, $sp, 160 # 8-byte Folded Spill - st.d $s8, $sp, 152 # 8-byte Folded Spill - fst.d $fs0, $sp, 144 # 8-byte Folded Spill - fst.d $fs1, $sp, 136 # 8-byte Folded Spill - fst.d $fs2, $sp, 128 # 8-byte Folded Spill - fst.d $fs3, $sp, 120 # 8-byte Folded Spill - fst.d $fs4, $sp, 112 # 8-byte Folded Spill - fst.d $fs5, $sp, 104 # 8-byte Folded Spill - fst.d $fs6, $sp, 96 # 8-byte Folded Spill - fst.d $fs7, $sp, 88 # 8-byte Folded Spill + addi.d $sp, $sp, -224 + .cfi_def_cfa_offset 224 + st.d $ra, $sp, 216 # 8-byte Folded Spill + st.d $fp, $sp, 208 # 8-byte Folded Spill + st.d $s0, $sp, 200 # 8-byte Folded Spill + st.d $s1, $sp, 192 # 8-byte Folded Spill + st.d $s2, $sp, 184 # 8-byte Folded Spill + st.d $s3, $sp, 176 # 8-byte Folded Spill + st.d $s4, $sp, 168 # 8-byte Folded Spill + st.d $s5, $sp, 160 # 8-byte Folded Spill + st.d $s6, $sp, 152 # 8-byte Folded Spill + st.d $s7, $sp, 144 # 8-byte Folded Spill + st.d $s8, $sp, 136 # 8-byte Folded Spill + fst.d $fs0, $sp, 128 # 8-byte Folded Spill + fst.d $fs1, $sp, 120 # 8-byte Folded Spill + fst.d $fs2, $sp, 112 # 8-byte Folded Spill + fst.d $fs3, $sp, 104 # 8-byte Folded Spill + fst.d $fs4, $sp, 96 # 8-byte Folded Spill + fst.d $fs5, $sp, 88 # 8-byte Folded Spill + fst.d $fs6, $sp, 80 # 8-byte Folded Spill + fst.d $fs7, $sp, 72 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -198,26 +185,32 @@ _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVe move $fp, $a0 move $s0, $a1 move $a1, $zero - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.s $ft1, $a0, %pc_lo12(.LCPI4_0) ori $a0, $zero, 2 - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill ori $a4, $zero, 1 + lu12i.w $a0, 232731 + ori $a0, $a0, 1815 + movgr2fr.w $ft1, $a0 + lu12i.w $a0, 246333 + ori $a0, $a0, 1802 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, 260091 + ori $a0, $a0, 3703 + movgr2fr.w $fs2, $a0 ori $a0, $zero, 1 - st.d $a0, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 24 # 8-byte Folded Spill fst.s $ft1, $sp, 68 # 4-byte Folded Spill - pcalau12i $s8, %pc_hi20(.LCPI4_1) b .LBB4_3 .p2align 4, , 16 .LBB4_2: # %.loopexit104 # in Loop: Header=BB4_3 Depth=1 - ld.d $a0, $sp, 40 # 8-byte Folded Reload - addi.d $a0, $a0, 1 - st.d $a0, $sp, 40 # 8-byte Folded Spill ld.d $a0, $sp, 32 # 8-byte Folded Reload addi.d $a0, $a0, 1 st.d $a0, $sp, 32 # 8-byte Folded Spill - ld.d $a1, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload + addi.d $a0, $a0, 1 + st.d $a0, $sp, 24 # 8-byte Folded Spill + ld.d $a1, $sp, 16 # 8-byte Folded Reload beq $a1, $s2, .LBB4_51 .LBB4_3: # =>This Loop Header: Depth=1 # Child Loop BB4_6 Depth 2 @@ -230,23 +223,19 @@ _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVe # Child Loop BB4_47 Depth 4 move $a0, $a1 addi.d $a1, $a1, 1 - st.d $a1, $sp, 24 # 8-byte Folded Spill + st.d $a1, $sp, 16 # 8-byte Folded Spill bgeu $a1, $s2, .LBB4_2 # %bb.4: # in Loop: Header=BB4_3 Depth=1 ld.d $a1, $fp, 16 alsl.d $s7, $a0, $a1, 4 - ld.d $a0, $sp, 32 # 8-byte Folded Reload - ld.d $a1, $sp, 40 # 8-byte Folded Reload - st.d $a1, $sp, 80 # 8-byte Folded Spill + ld.d $s8, $sp, 24 # 8-byte Folded Reload + ld.d $s6, $sp, 32 # 8-byte Folded Reload b .LBB4_6 .p2align 4, , 16 .LBB4_5: # %.loopexit103 # in Loop: Header=BB4_6 Depth=2 - ld.d $a0, $sp, 80 # 8-byte Folded Reload - addi.d $a0, $a0, 1 - st.d $a0, $sp, 80 # 8-byte Folded Spill - ld.d $a0, $sp, 72 # 8-byte Folded Reload - beq $a0, $s2, .LBB4_2 + addi.d $s6, $s6, 1 + beq $s8, $s2, .LBB4_2 .LBB4_6: # Parent Loop BB4_3 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB4_11 Depth 3 @@ -256,15 +245,15 @@ _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVe # Child Loop BB4_35 Depth 4 # Child Loop BB4_39 Depth 4 # Child Loop BB4_47 Depth 4 - addi.d $a1, $a0, 1 - st.d $a1, $sp, 72 # 8-byte Folded Spill - addi.w $a1, $a1, 0 + move $a0, $s8 + addi.d $s8, $s8, 1 + addi.w $a1, $s8, 0 bge $a1, $s2, .LBB4_5 # %bb.7: # %.lr.ph # in Loop: Header=BB4_6 Depth=2 ld.d $a1, $fp, 16 alsl.d $s3, $a0, $a1, 4 - ld.d $s4, $sp, 80 # 8-byte Folded Reload + move $s4, $s6 b .LBB4_11 .LBB4_8: # %_ZN20btAlignedObjectArrayI9btVector3E10deallocateEv.exit.i.i.1 # in Loop: Header=BB4_11 Depth=3 @@ -276,12 +265,12 @@ _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVe .LBB4_9: # %_ZN20btAlignedObjectArrayI9btVector3E9push_backERKS0_.exit.1 # in Loop: Header=BB4_11 Depth=3 ld.d $a0, $s0, 16 - fnmadd.s $fa0, $fs3, $fs0, $fs4 + fnmadd.s $fa0, $fs5, $fs0, $fs6 alsl.d $a2, $a1, $a0, 4 slli.d $a1, $a1, 4 - fstx.s $fs1, $a0, $a1 - fst.s $fs2, $a2, 4 - fst.s $fs3, $a2, 8 + fstx.s $fs3, $a0, $a1 + fst.s $fs4, $a2, 4 + fst.s $fs5, $a2, 8 fst.s $fa0, $a2, 12 ld.w $a0, $s0, 4 addi.d $a0, $a0, 1 @@ -310,35 +299,34 @@ _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVe fsub.s $fa2, $fa2, $fa0 fsub.s $fa3, $fa3, $fa1 fld.s $fa4, $s3, 8 - fld.s $fs2, $s7, 8 + fld.s $fs4, $s7, 8 fldx.s $fa5, $a0, $a2 fld.s $fa6, $a1, 4 fld.s $fa7, $a1, 8 - fsub.s $fa4, $fa4, $fs2 + fsub.s $fa4, $fa4, $fs4 fsub.s $fa5, $fa5, $fa0 fsub.s $fa6, $fa6, $fa1 - fsub.s $fa7, $fa7, $fs2 + fsub.s $fa7, $fa7, $fs4 fneg.s $ft0, $fa6 fmul.s $ft0, $fa4, $ft0 - fmadd.s $fs1, $fa3, $fa7, $ft0 + fmadd.s $ft0, $fa3, $fa7, $ft0 fneg.s $fa7, $fa7 fmul.s $fa7, $fa2, $fa7 - fmadd.s $fs3, $fa4, $fa5, $fa7 + fmadd.s $fa7, $fa4, $fa5, $fa7 fneg.s $fa4, $fa5 fmul.s $fa3, $fa3, $fa4 - fmadd.s $fs5, $fa2, $fa6, $fa3 - fmul.s $fa2, $fs3, $fs3 - fmadd.s $fa2, $fs1, $fs1, $fa2 - fmadd.s $fs4, $fs5, $fs5, $fa2 - fcmp.cule.s $fcc1, $fs4, $ft1 - pcalau12i $s5, %pc_hi20(.LCPI4_2) + fmadd.s $fs7, $fa2, $fa6, $fa3 + fmul.s $fa2, $fa7, $fa7 + fmadd.s $fa2, $ft0, $ft0, $fa2 + fmadd.s $fs6, $fs7, $fs7, $fa2 + fcmp.cule.s $fcc1, $fs6, $ft1 bcnez $fcc1, .LBB4_32 # %bb.12: # in Loop: Header=BB4_11 Depth=3 ld.w $a1, $s0, 4 - frsqrt.s $fa2, $fs4 - fmul.s $fs6, $fs1, $fa2 - fmul.s $fs7, $fs3, $fa2 - fmul.s $fs0, $fs5, $fa2 + frsqrt.s $fa2, $fs6 + fmul.s $fs0, $ft0, $fa2 + fmul.s $fs3, $fa7, $fa2 + fmul.s $fs5, $fs7, $fa2 blez $a1, .LBB4_16 # %bb.13: # %.lr.ph.i # in Loop: Header=BB4_11 Depth=3 @@ -353,11 +341,10 @@ _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVe fld.s $fa2, $a2, -4 fld.s $fa3, $a2, -8 fld.s $fa4, $a2, 0 - fld.s $fa5, $s8, %pc_lo12(.LCPI4_1) - fmul.s $fa2, $fs7, $fa2 - fmadd.s $fa2, $fs6, $fa3, $fa2 - fmadd.s $fa2, $fs0, $fa4, $fa2 - fcmp.cule.s $fcc0, $fa2, $fa5 + fmul.s $fa2, $fs3, $fa2 + fmadd.s $fa2, $fs0, $fa3, $fa2 + fmadd.s $fa2, $fs5, $fa4, $fa2 + fcmp.cule.s $fcc0, $fa2, $fs2 bceqz $fcc0, .LBB4_32 # %bb.15: # in Loop: Header=BB4_14 Depth=4 addi.d $a3, $a3, -1 @@ -366,12 +353,12 @@ _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVe .LBB4_16: # %.loopexit102 # in Loop: Header=BB4_11 Depth=3 ld.w $a2, $fp, 4 - fmul.s $fa1, $fs7, $fa1 - fmadd.s $fa5, $fs6, $fa0, $fa1 + fmul.s $fa1, $fs3, $fa1 + fmadd.s $fa4, $fs0, $fa0, $fa1 blez $a2, .LBB4_20 # %bb.17: # %.lr.ph.i61 # in Loop: Header=BB4_11 Depth=3 - fmadd.s $fa0, $fs0, $fs2, $fa5 + fmadd.s $fa0, $fs5, $fs4, $fa4 addi.d $a0, $a0, 8 .p2align 4, , 16 .LBB4_18: # Parent Loop BB4_3 Depth=1 @@ -381,12 +368,11 @@ _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVe fld.s $fa1, $a0, -4 fld.s $fa2, $a0, -8 fld.s $fa3, $a0, 0 - fmul.s $fa1, $fs7, $fa1 - fld.s $fa4, $s5, %pc_lo12(.LCPI4_2) - fmadd.s $fa1, $fs6, $fa2, $fa1 - fmadd.s $fa1, $fs0, $fa3, $fa1 + fmul.s $fa1, $fs3, $fa1 + fmadd.s $fa1, $fs0, $fa2, $fa1 + fmadd.s $fa1, $fs5, $fa3, $fa1 fsub.s $fa1, $fa1, $fa0 - fcmp.cule.s $fcc0, $fa1, $fa4 + fcmp.cule.s $fcc0, $fa1, $fs1 bceqz $fcc0, .LBB4_32 # %bb.19: # in Loop: Header=BB4_18 Depth=4 addi.d $a2, $a2, -1 @@ -401,23 +387,27 @@ _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVe slli.w $a2, $a1, 1 masknez $a2, $a2, $a0 maskeqz $a0, $a4, $a0 - or $s6, $a0, $a2 - bge $a1, $s6, .LBB4_31 + or $s5, $a0, $a2 + bge $a1, $s5, .LBB4_31 # %bb.22: # in Loop: Header=BB4_11 Depth=3 + fst.s $ft0, $sp, 64 # 4-byte Folded Spill + fst.s $fa7, $sp, 60 # 4-byte Folded Spill movcf2gr $a0, $fcc1 - st.d $a0, $sp, 56 - fst.s $fa5, $sp, 52 # 4-byte Folded Spill - beqz $s6, .LBB4_24 + st.d $a0, $sp, 48 + fst.s $fa4, $sp, 44 # 4-byte Folded Spill + beqz $s5, .LBB4_24 # %bb.23: # in Loop: Header=BB4_11 Depth=3 - slli.d $a0, $s6, 4 + slli.d $a0, $s5, 4 ori $a1, $zero, 16 pcaddu18i $ra, %call36(_Z22btAlignedAllocInternalmi) jirl $ra, $ra, 0 - fld.s $fa5, $sp, 52 # 4-byte Folded Reload - ld.d $a1, $sp, 56 + fld.s $fa4, $sp, 44 # 4-byte Folded Reload + ld.d $a1, $sp, 48 movgr2cf $fcc1, $a1 - ori $a4, $zero, 1 + fld.s $fa7, $sp, 60 # 4-byte Folded Reload + fld.s $ft0, $sp, 64 # 4-byte Folded Reload fld.s $ft1, $sp, 68 # 4-byte Folded Reload + ori $a4, $zero, 1 ld.w $a1, $s0, 4 move $s1, $a0 bgtz $a1, .LBB4_25 @@ -451,26 +441,28 @@ _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVe # %bb.29: # in Loop: Header=BB4_11 Depth=3 pcaddu18i $ra, %call36(_Z21btAlignedFreeInternalPv) jirl $ra, $ra, 0 - fld.s $fa5, $sp, 52 # 4-byte Folded Reload - ld.d $a0, $sp, 56 + fld.s $fa4, $sp, 44 # 4-byte Folded Reload + ld.d $a0, $sp, 48 movgr2cf $fcc1, $a0 - ori $a4, $zero, 1 + fld.s $fa7, $sp, 60 # 4-byte Folded Reload + fld.s $ft0, $sp, 64 # 4-byte Folded Reload fld.s $ft1, $sp, 68 # 4-byte Folded Reload + ori $a4, $zero, 1 .LBB4_30: # %_ZN20btAlignedObjectArrayI9btVector3E10deallocateEv.exit.i.i # in Loop: Header=BB4_11 Depth=3 ld.w $a1, $s0, 4 st.b $a4, $s0, 24 st.d $s1, $s0, 16 - st.w $s6, $s0, 8 + st.w $s5, $s0, 8 .LBB4_31: # %_ZN20btAlignedObjectArrayI9btVector3E9push_backERKS0_.exit # in Loop: Header=BB4_11 Depth=3 ld.d $a0, $s0, 16 - fnmadd.s $fa0, $fs0, $fs2, $fa5 + fnmadd.s $fa0, $fs5, $fs4, $fa4 alsl.d $a2, $a1, $a0, 4 slli.d $a1, $a1, 4 - fstx.s $fs6, $a0, $a1 - fst.s $fs7, $a2, 4 - fst.s $fs0, $a2, 8 + fstx.s $fs0, $a0, $a1 + fst.s $fs3, $a2, 4 + fst.s $fs5, $a2, 8 fst.s $fa0, $a2, 12 ld.w $a0, $s0, 4 addi.d $a0, $a0, 1 @@ -479,14 +471,14 @@ _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVe # in Loop: Header=BB4_11 Depth=3 bcnez $fcc1, .LBB4_10 # %bb.33: # in Loop: Header=BB4_11 Depth=3 - fneg.s $fa0, $fs5 - fneg.s $fa1, $fs1 - fneg.s $fa2, $fs3 + fneg.s $fa0, $fs7 + fneg.s $fa1, $ft0 + fneg.s $fa2, $fa7 ld.w $a1, $s0, 4 - frsqrt.s $fa3, $fs4 - fmul.s $fs1, $fa3, $fa1 - fmul.s $fs2, $fa3, $fa2 - fmul.s $fs3, $fa3, $fa0 + frsqrt.s $fa3, $fs6 + fmul.s $fs3, $fa3, $fa1 + fmul.s $fs4, $fa3, $fa2 + fmul.s $fs5, $fa3, $fa0 blez $a1, .LBB4_37 # %bb.34: # %.lr.ph.i.1 # in Loop: Header=BB4_11 Depth=3 @@ -501,11 +493,10 @@ _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVe fld.s $fa0, $a0, -4 fld.s $fa1, $a0, -8 fld.s $fa2, $a0, 0 - fld.s $fa3, $s8, %pc_lo12(.LCPI4_1) - fmul.s $fa0, $fs2, $fa0 - fmadd.s $fa0, $fs1, $fa1, $fa0 - fmadd.s $fa0, $fs3, $fa2, $fa0 - fcmp.clt.s $fcc0, $fa3, $fa0 + fmul.s $fa0, $fs4, $fa0 + fmadd.s $fa0, $fs3, $fa1, $fa0 + fmadd.s $fa0, $fs5, $fa2, $fa0 + fcmp.clt.s $fcc0, $fs2, $fa0 bcnez $fcc0, .LBB4_10 # %bb.36: # in Loop: Header=BB4_35 Depth=4 addi.d $a2, $a2, -1 @@ -517,13 +508,13 @@ _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVe fld.s $fa1, $s7, 4 ld.w $a0, $fp, 4 fld.s $fs0, $s7, 8 - fmul.s $fa1, $fs2, $fa1 - fmadd.s $fs4, $fs1, $fa0, $fa1 + fmul.s $fa1, $fs4, $fa1 + fmadd.s $fs6, $fs3, $fa0, $fa1 blez $a0, .LBB4_41 # %bb.38: # %.lr.ph.i61.1 # in Loop: Header=BB4_11 Depth=3 ld.d $a2, $fp, 16 - fmadd.s $fa0, $fs3, $fs0, $fs4 + fmadd.s $fa0, $fs5, $fs0, $fs6 addi.d $a2, $a2, 8 .p2align 4, , 16 .LBB4_39: # Parent Loop BB4_3 Depth=1 @@ -533,12 +524,11 @@ _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVe fld.s $fa1, $a2, -4 fld.s $fa2, $a2, -8 fld.s $fa3, $a2, 0 - fmul.s $fa1, $fs2, $fa1 - fld.s $fa4, $s5, %pc_lo12(.LCPI4_2) - fmadd.s $fa1, $fs1, $fa2, $fa1 - fmadd.s $fa1, $fs3, $fa3, $fa1 + fmul.s $fa1, $fs4, $fa1 + fmadd.s $fa1, $fs3, $fa2, $fa1 + fmadd.s $fa1, $fs5, $fa3, $fa1 fsub.s $fa1, $fa1, $fa0 - fcmp.clt.s $fcc0, $fa4, $fa1 + fcmp.clt.s $fcc0, $fs1, $fa1 bcnez $fcc0, .LBB4_10 # %bb.40: # in Loop: Header=BB4_39 Depth=4 addi.d $a0, $a0, -1 @@ -599,64 +589,56 @@ _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVe ori $a4, $zero, 1 b .LBB4_8 .LBB4_51: # %._crit_edge - fld.d $fs7, $sp, 88 # 8-byte Folded Reload - fld.d $fs6, $sp, 96 # 8-byte Folded Reload - fld.d $fs5, $sp, 104 # 8-byte Folded Reload - fld.d $fs4, $sp, 112 # 8-byte Folded Reload - fld.d $fs3, $sp, 120 # 8-byte Folded Reload - fld.d $fs2, $sp, 128 # 8-byte Folded Reload - fld.d $fs1, $sp, 136 # 8-byte Folded Reload - fld.d $fs0, $sp, 144 # 8-byte Folded Reload - ld.d $s8, $sp, 152 # 8-byte Folded Reload - ld.d $s7, $sp, 160 # 8-byte Folded Reload - ld.d $s6, $sp, 168 # 8-byte Folded Reload - ld.d $s5, $sp, 176 # 8-byte Folded Reload - ld.d $s4, $sp, 184 # 8-byte Folded Reload - ld.d $s3, $sp, 192 # 8-byte Folded Reload - ld.d $s2, $sp, 200 # 8-byte Folded Reload - ld.d $s1, $sp, 208 # 8-byte Folded Reload - ld.d $s0, $sp, 216 # 8-byte Folded Reload - ld.d $fp, $sp, 224 # 8-byte Folded Reload - ld.d $ra, $sp, 232 # 8-byte Folded Reload - addi.d $sp, $sp, 240 + fld.d $fs7, $sp, 72 # 8-byte Folded Reload + fld.d $fs6, $sp, 80 # 8-byte Folded Reload + fld.d $fs5, $sp, 88 # 8-byte Folded Reload + fld.d $fs4, $sp, 96 # 8-byte Folded Reload + fld.d $fs3, $sp, 104 # 8-byte Folded Reload + fld.d $fs2, $sp, 112 # 8-byte Folded Reload + fld.d $fs1, $sp, 120 # 8-byte Folded Reload + fld.d $fs0, $sp, 128 # 8-byte Folded Reload + ld.d $s8, $sp, 136 # 8-byte Folded Reload + ld.d $s7, $sp, 144 # 8-byte Folded Reload + ld.d $s6, $sp, 152 # 8-byte Folded Reload + ld.d $s5, $sp, 160 # 8-byte Folded Reload + ld.d $s4, $sp, 168 # 8-byte Folded Reload + ld.d $s3, $sp, 176 # 8-byte Folded Reload + ld.d $s2, $sp, 184 # 8-byte Folded Reload + ld.d $s1, $sp, 192 # 8-byte Folded Reload + ld.d $s0, $sp, 200 # 8-byte Folded Reload + ld.d $fp, $sp, 208 # 8-byte Folded Reload + ld.d $ra, $sp, 216 # 8-byte Folded Reload + addi.d $sp, $sp, 224 ret .Lfunc_end4: .size _ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVector3ES3_, .Lfunc_end4-_ZN14btGeometryUtil29getPlaneEquationsFromVerticesER20btAlignedObjectArrayI9btVector3ES3_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btVector3ERS2_ -.LCPI5_0: - .word 0x38d1b717 # float 9.99999974E-5 -.LCPI5_1: - .word 0x358637bd # float 9.99999997E-7 -.LCPI5_2: - .word 0x3c23d70a # float 0.00999999977 - .text - .globl _ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btVector3ERS2_ + .globl _ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btVector3ERS2_ # -- Begin function _ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btVector3ERS2_ .p2align 5 .type _ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btVector3ERS2_,@function _ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btVector3ERS2_: # @_ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btVector3ERS2_ .cfi_startproc # %bb.0: - addi.d $sp, $sp, -160 - .cfi_def_cfa_offset 160 - st.d $ra, $sp, 152 # 8-byte Folded Spill - st.d $fp, $sp, 144 # 8-byte Folded Spill - st.d $s0, $sp, 136 # 8-byte Folded Spill - st.d $s1, $sp, 128 # 8-byte Folded Spill - st.d $s2, $sp, 120 # 8-byte Folded Spill - st.d $s3, $sp, 112 # 8-byte Folded Spill - st.d $s4, $sp, 104 # 8-byte Folded Spill - st.d $s5, $sp, 96 # 8-byte Folded Spill - st.d $s6, $sp, 88 # 8-byte Folded Spill - st.d $s7, $sp, 80 # 8-byte Folded Spill - st.d $s8, $sp, 72 # 8-byte Folded Spill - fst.d $fs0, $sp, 64 # 8-byte Folded Spill - fst.d $fs1, $sp, 56 # 8-byte Folded Spill - fst.d $fs2, $sp, 48 # 8-byte Folded Spill - fst.d $fs3, $sp, 40 # 8-byte Folded Spill - fst.d $fs4, $sp, 32 # 8-byte Folded Spill + addi.d $sp, $sp, -176 + .cfi_def_cfa_offset 176 + st.d $ra, $sp, 168 # 8-byte Folded Spill + st.d $fp, $sp, 160 # 8-byte Folded Spill + st.d $s0, $sp, 152 # 8-byte Folded Spill + st.d $s1, $sp, 144 # 8-byte Folded Spill + st.d $s2, $sp, 136 # 8-byte Folded Spill + st.d $s3, $sp, 128 # 8-byte Folded Spill + st.d $s4, $sp, 120 # 8-byte Folded Spill + st.d $s5, $sp, 112 # 8-byte Folded Spill + st.d $s6, $sp, 104 # 8-byte Folded Spill + st.d $s7, $sp, 96 # 8-byte Folded Spill + st.d $s8, $sp, 88 # 8-byte Folded Spill + fst.d $fs0, $sp, 80 # 8-byte Folded Spill + fst.d $fs1, $sp, 72 # 8-byte Folded Spill + fst.d $fs2, $sp, 64 # 8-byte Folded Spill + fst.d $fs3, $sp, 56 # 8-byte Folded Spill + fst.d $fs4, $sp, 48 # 8-byte Folded Spill + fst.d $fs5, $sp, 40 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -673,6 +655,7 @@ _ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btV .cfi_offset 58, -112 .cfi_offset 59, -120 .cfi_offset 60, -128 + .cfi_offset 61, -136 ld.w $s2, $a0, 4 blez $s2, .LBB5_29 # %bb.1: # %.lr.ph113 @@ -680,26 +663,31 @@ _ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btV move $s0, $a1 move $a1, $zero ori $a0, $zero, 2 - st.d $a0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI5_0) - pcalau12i $a0, %pc_hi20(.LCPI5_1) - fld.s $fs1, $a0, %pc_lo12(.LCPI5_1) + st.d $a0, $sp, 32 # 8-byte Folded Spill ori $a3, $zero, 1 + lu12i.w $a0, 232731 + ori $a0, $a0, 1815 + movgr2fr.w $fs0, $a0 + lu12i.w $a0, 219235 + ori $a0, $a0, 1981 + movgr2fr.w $fs1, $a0 vldi $vr14, -1040 + lu12i.w $a0, 246333 + ori $a0, $a0, 1802 + movgr2fr.w $fs2, $a0 ori $a0, $zero, 1 - st.d $a0, $sp, 16 # 8-byte Folded Spill + st.d $a0, $sp, 24 # 8-byte Folded Spill b .LBB5_3 .p2align 4, , 16 .LBB5_2: # %.loopexit108 # in Loop: Header=BB5_3 Depth=1 + ld.d $a0, $sp, 32 # 8-byte Folded Reload + addi.d $a0, $a0, 1 + st.d $a0, $sp, 32 # 8-byte Folded Spill ld.d $a0, $sp, 24 # 8-byte Folded Reload addi.d $a0, $a0, 1 st.d $a0, $sp, 24 # 8-byte Folded Spill - ld.d $a0, $sp, 16 # 8-byte Folded Reload - addi.d $a0, $a0, 1 - st.d $a0, $sp, 16 # 8-byte Folded Spill - ld.d $a1, $sp, 8 # 8-byte Folded Reload + ld.d $a1, $sp, 16 # 8-byte Folded Reload beq $a1, $s2, .LBB5_29 .LBB5_3: # =>This Loop Header: Depth=1 # Child Loop BB5_6 Depth 2 @@ -708,13 +696,13 @@ _ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btV # Child Loop BB5_25 Depth 4 move $a0, $a1 addi.d $a1, $a1, 1 - st.d $a1, $sp, 8 # 8-byte Folded Spill + st.d $a1, $sp, 16 # 8-byte Folded Spill bgeu $a1, $s2, .LBB5_2 # %bb.4: # in Loop: Header=BB5_3 Depth=1 ld.d $a1, $fp, 16 alsl.d $s7, $a0, $a1, 4 - ld.d $s8, $sp, 16 # 8-byte Folded Reload - ld.d $s3, $sp, 24 # 8-byte Folded Reload + ld.d $s8, $sp, 24 # 8-byte Folded Reload + ld.d $s3, $sp, 32 # 8-byte Folded Reload b .LBB5_6 .p2align 4, , 16 .LBB5_5: # %.loopexit107 @@ -747,9 +735,9 @@ _ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btV ld.d $a0, $s0, 16 alsl.d $a2, $a1, $a0, 4 slli.d $a1, $a1, 4 - fstx.s $fs2, $a0, $a1 - fst.s $fs3, $a2, 4 - fst.s $fs4, $a2, 8 + fstx.s $fs3, $a0, $a1 + fst.s $fs4, $a2, 4 + fst.s $fs5, $a2, 8 st.w $zero, $a2, 12 ld.w $a0, $s0, 4 addi.d $a0, $a0, 1 @@ -849,9 +837,9 @@ _ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btV fadd.s $fa1, $fa6, $fa1 ld.w $a1, $fp, 4 fadd.s $fa2, $fa3, $fa2 - fmul.s $fs2, $ft0, $fa0 - fmul.s $fs3, $ft0, $fa1 - fmul.s $fs4, $ft0, $fa2 + fmul.s $fs3, $ft0, $fa0 + fmul.s $fs4, $ft0, $fa1 + fmul.s $fs5, $ft0, $fa2 blez $a1, .LBB5_19 # %bb.16: # %.lr.ph.i # in Loop: Header=BB5_11 Depth=3 @@ -863,15 +851,13 @@ _ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btV # => This Inner Loop Header: Depth=4 fld.s $fa0, $a0, -4 fld.s $fa1, $a0, -8 - fmul.s $fa0, $fs3, $fa0 fld.s $fa2, $a0, 0 fld.s $fa3, $a0, 4 - pcalau12i $a2, %pc_hi20(.LCPI5_2) - fld.s $fa4, $a2, %pc_lo12(.LCPI5_2) - fmadd.s $fa0, $fa1, $fs2, $fa0 - fmadd.s $fa0, $fa2, $fs4, $fa0 + fmul.s $fa0, $fs4, $fa0 + fmadd.s $fa0, $fa1, $fs3, $fa0 + fmadd.s $fa0, $fa2, $fs5, $fa0 fadd.s $fa0, $fa3, $fa0 - fcmp.cule.s $fcc0, $fa0, $fa4 + fcmp.cule.s $fcc0, $fa0, $fs2 bceqz $fcc0, .LBB5_10 # %bb.18: # in Loop: Header=BB5_17 Depth=4 addi.d $a1, $a1, -1 @@ -935,23 +921,24 @@ _ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btV ori $a3, $zero, 1 b .LBB5_8 .LBB5_29: # %._crit_edge - fld.d $fs4, $sp, 32 # 8-byte Folded Reload - fld.d $fs3, $sp, 40 # 8-byte Folded Reload - fld.d $fs2, $sp, 48 # 8-byte Folded Reload - fld.d $fs1, $sp, 56 # 8-byte Folded Reload - fld.d $fs0, $sp, 64 # 8-byte Folded Reload - ld.d $s8, $sp, 72 # 8-byte Folded Reload - ld.d $s7, $sp, 80 # 8-byte Folded Reload - ld.d $s6, $sp, 88 # 8-byte Folded Reload - ld.d $s5, $sp, 96 # 8-byte Folded Reload - ld.d $s4, $sp, 104 # 8-byte Folded Reload - ld.d $s3, $sp, 112 # 8-byte Folded Reload - ld.d $s2, $sp, 120 # 8-byte Folded Reload - ld.d $s1, $sp, 128 # 8-byte Folded Reload - ld.d $s0, $sp, 136 # 8-byte Folded Reload - ld.d $fp, $sp, 144 # 8-byte Folded Reload - ld.d $ra, $sp, 152 # 8-byte Folded Reload - addi.d $sp, $sp, 160 + fld.d $fs5, $sp, 40 # 8-byte Folded Reload + fld.d $fs4, $sp, 48 # 8-byte Folded Reload + fld.d $fs3, $sp, 56 # 8-byte Folded Reload + fld.d $fs2, $sp, 64 # 8-byte Folded Reload + fld.d $fs1, $sp, 72 # 8-byte Folded Reload + fld.d $fs0, $sp, 80 # 8-byte Folded Reload + ld.d $s8, $sp, 88 # 8-byte Folded Reload + ld.d $s7, $sp, 96 # 8-byte Folded Reload + ld.d $s6, $sp, 104 # 8-byte Folded Reload + ld.d $s5, $sp, 112 # 8-byte Folded Reload + ld.d $s4, $sp, 120 # 8-byte Folded Reload + ld.d $s3, $sp, 128 # 8-byte Folded Reload + ld.d $s2, $sp, 136 # 8-byte Folded Reload + ld.d $s1, $sp, 144 # 8-byte Folded Reload + ld.d $s0, $sp, 152 # 8-byte Folded Reload + ld.d $fp, $sp, 160 # 8-byte Folded Reload + ld.d $ra, $sp, 168 # 8-byte Folded Reload + addi.d $sp, $sp, 176 ret .Lfunc_end5: .size _ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btVector3ERS2_, .Lfunc_end5-_ZN14btGeometryUtil29getVerticesFromPlaneEquationsERK20btAlignedObjectArrayI9btVector3ERS2_ diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGhostObject.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGhostObject.s index 378d7dfb..d282ee19 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGhostObject.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGhostObject.s @@ -1714,12 +1714,8 @@ _ZN17btCollisionObject17setCollisionShapeEP16btCollisionShape: # @_ZN17btCollisi .Lfunc_end15: .size _ZN17btCollisionObject17setCollisionShapeEP16btCollisionShape, .Lfunc_end15-_ZN17btCollisionObject17setCollisionShapeEP16btCollisionShape # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf -.LCPI16_0: - .word 0x28800000 # float 1.42108547E-14 .section .text._ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf,"axG",@progbits,_ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf,comdat - .weak _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf + .weak _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf # -- Begin function _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf .p2align 5 .type _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf,@function _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf: # @_ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf @@ -1856,11 +1852,11 @@ _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf: # jirl $ra, $ra, 0 fadd.s $fa0, $fa0, $fa0 fst.s $fa0, $s0, 0 - pcalau12i $a0, %pc_hi20(.LCPI16_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI16_0) fmul.s $fa0, $fs1, $fs1 fmadd.s $fa0, $fs0, $fs0, $fa0 fmadd.s $fa0, $fs2, $fs2, $fa0 + lu12i.w $a0, 165888 + movgr2fr.w $fa1, $a0 fcmp.clt.s $fcc0, $fa0, $fa1 st.w $zero, $fp, 12 bceqz $fcc0, .LBB16_2 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGjkConvexCast.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGjkConvexCast.s index 6f397cd0..cff6a579 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGjkConvexCast.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGjkConvexCast.s @@ -15,12 +15,7 @@ _ZN15btGjkConvexCastC2EPK13btConvexShapeS2_P22btVoronoiSimplexSolver: # @_ZN15bt .Lfunc_end0: .size _ZN15btGjkConvexCastC2EPK13btConvexShapeS2_P22btVoronoiSimplexSolver, .Lfunc_end0-_ZN15btGjkConvexCastC2EPK13btConvexShapeS2_P22btVoronoiSimplexSolver # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15btGjkConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE -.LCPI1_0: - .word 0x3a83126f # float 0.00100000005 - .text - .globl _ZN15btGjkConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE + .globl _ZN15btGjkConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE # -- Begin function _ZN15btGjkConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE .p2align 5 .type _ZN15btGjkConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE,@function _ZN15btGjkConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE: # @_ZN15btGjkConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE @@ -134,21 +129,22 @@ _ZN15btGjkConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast1 fsub.s $fa0, $fa0, $fa1 fld.s $fa1, $sp, 20 # 4-byte Folded Reload fld.s $fa2, $sp, 16 # 4-byte Folded Reload - fsub.s $fa4, $fa1, $fa2 - fsub.s $fa5, $fs4, $fs5 - fsub.s $fa1, $fs6, $fs7 - fsub.s $fa6, $fs0, $fs1 - fsub.s $fa7, $fs2, $fs3 - fsub.s $fs1, $fa0, $fa1 + fsub.s $fa1, $fa1, $fa2 + fsub.s $fa4, $fs4, $fs5 + fsub.s $fa2, $fs6, $fs7 + fsub.s $fa3, $fs0, $fs1 + fsub.s $fa5, $fs2, $fs3 + fsub.s $fs1, $fa0, $fa2 + fsub.s $fs2, $fa1, $fa3 fld.s $fa3, $sp, 312 - fld.s $fa0, $sp, 280 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fs4, $a0, %pc_lo12(.LCPI1_0) - fld.s $fa2, $sp, 284 - fld.s $fa1, $sp, 288 - fsub.s $fs2, $fa4, $fa6 + fld.s $fa2, $sp, 280 + fld.s $fa1, $sp, 284 + fld.s $fa0, $sp, 288 + lu12i.w $a0, 239665 + ori $a0, $a0, 623 + movgr2fr.w $fs4, $a0 fcmp.cule.s $fcc0, $fa3, $fs4 - fsub.s $fs3, $fa5, $fa7 + fsub.s $fs3, $fa4, $fa5 bcnez $fcc0, .LBB1_11 # %bb.2: # %.lr.ph addi.d $s4, $sp, 296 @@ -160,9 +156,9 @@ _ZN15btGjkConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast1 addi.w $s5, $s5, -1 beqz $s5, .LBB1_14 # %bb.4: # in Loop: Header=BB1_3 Depth=1 - fmul.s $fa2, $fs2, $fa2 - fmadd.s $fa0, $fs1, $fa0, $fa2 - fmadd.s $fa0, $fs3, $fa1, $fa0 + fmul.s $fa1, $fs2, $fa1 + fmadd.s $fa1, $fs1, $fa2, $fa1 + fmadd.s $fa0, $fs3, $fa0, $fa1 fdiv.s $fa0, $fa3, $fa0 fsub.s $fs0, $fa4, $fa0 fcmp.cle.s $fcc0, $fs0, $fa4 @@ -228,9 +224,9 @@ _ZN15btGjkConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast1 bceqz $fcc0, .LBB1_15 # %bb.9: # in Loop: Header=BB1_3 Depth=1 vld $vr4, $s4, 0 - fld.s $fa0, $sp, 280 - fld.s $fa2, $sp, 284 - fld.s $fa1, $sp, 288 + fld.s $fa2, $sp, 280 + fld.s $fa1, $sp, 284 + fld.s $fa0, $sp, 288 fcmp.clt.s $fcc0, $fs4, $fa3 vst $vr4, $sp, 320 fmov.s $fa4, $fs0 @@ -243,9 +239,9 @@ _ZN15btGjkConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast1 movgr2fr.w $fs0, $zero .LBB1_12: # %._crit_edge fld.s $fa4, $fp, 184 - fmul.s $fa5, $fs2, $fa2 - fmadd.s $fa5, $fa0, $fs1, $fa5 - fmadd.s $fa5, $fa1, $fs3, $fa5 + fmul.s $fa5, $fs2, $fa1 + fmadd.s $fa5, $fa2, $fs1, $fa5 + fmadd.s $fa5, $fa0, $fs3, $fa5 fneg.s $fa4, $fa4 fcmp.cle.s $fcc0, $fa4, $fa5 bcnez $fcc0, .LBB1_14 @@ -257,18 +253,18 @@ _ZN15btGjkConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast1 move $a0, $zero b .LBB1_17 .LBB1_15: - fld.s $fa0, $sp, 280 - fld.s $fa2, $sp, 284 - fld.s $fa1, $sp, 288 + fld.s $fa2, $sp, 280 + fld.s $fa1, $sp, 284 + fld.s $fa0, $sp, 288 fld.s $fa3, $sp, 292 fst.s $fs0, $fp, 168 .LBB1_16: # %.thread.sink.split - fst.s $fa0, $fp, 136 - vld $vr0, $s4, 0 - fst.s $fa2, $fp, 140 - fst.s $fa1, $fp, 144 + fst.s $fa2, $fp, 136 + vld $vr2, $s4, 0 + fst.s $fa1, $fp, 140 + fst.s $fa0, $fp, 144 fst.s $fa3, $fp, 148 - vst $vr0, $fp, 152 + vst $vr2, $fp, 152 ori $a0, $zero, 1 .LBB1_17: # %.thread fld.d $fs7, $sp, 336 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGjkEpa2.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGjkEpa2.s index 262483f1..65703a62 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGjkEpa2.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGjkEpa2.s @@ -11,12 +11,7 @@ _ZN15btGjkEpaSolver220StackSizeRequirementEv: # @_ZN15btGjkEpaSolver220StackSize .Lfunc_end0: .size _ZN15btGjkEpaSolver220StackSizeRequirementEv, .Lfunc_end0-_ZN15btGjkEpaSolver220StackSizeRequirementEv # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15btGjkEpaSolver28DistanceEPK13btConvexShapeRK11btTransformS2_S5_RK9btVector3RNS_8sResultsE -.LCPI1_0: - .word 0x38d1b717 # float 9.99999974E-5 - .text - .globl _ZN15btGjkEpaSolver28DistanceEPK13btConvexShapeRK11btTransformS2_S5_RK9btVector3RNS_8sResultsE + .globl _ZN15btGjkEpaSolver28DistanceEPK13btConvexShapeRK11btTransformS2_S5_RK9btVector3RNS_8sResultsE # -- Begin function _ZN15btGjkEpaSolver28DistanceEPK13btConvexShapeRK11btTransformS2_S5_RK9btVector3RNS_8sResultsE .p2align 5 .type _ZN15btGjkEpaSolver28DistanceEPK13btConvexShapeRK11btTransformS2_S5_RK9btVector3RNS_8sResultsE,@function _ZN15btGjkEpaSolver28DistanceEPK13btConvexShapeRK11btTransformS2_S5_RK9btVector3RNS_8sResultsE: # @_ZN15btGjkEpaSolver28DistanceEPK13btConvexShapeRK11btTransformS2_S5_RK9btVector3RNS_8sResultsE @@ -289,12 +284,13 @@ _ZN15btGjkEpaSolver28DistanceEPK13btConvexShapeRK11btTransformS2_S5_RK9btVector3 fmul.s $fa3, $fa1, $fa1 fmadd.s $fa3, $fa0, $fa0, $fa3 fmadd.s $fa3, $fa2, $fa2, $fa3 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fa4, $a0, %pc_lo12(.LCPI1_0) - fsqrt.s $fa5, $fa3 - fst.s $fa5, $fp, 52 + fsqrt.s $fa4, $fa3 + fst.s $fa4, $fp, 52 frsqrt.s $fa3, $fa3 - fcmp.clt.s $fcc0, $fa4, $fa5 + lu12i.w $a0, 232731 + ori $a0, $a0, 1815 + movgr2fr.w $fa5, $a0 + fcmp.clt.s $fcc0, $fa5, $fa4 vldi $vr4, -1168 fsel $fa3, $fa4, $fa3, $fcc0 fmul.s $fa0, $fa0, $fa3 @@ -493,14 +489,8 @@ _ZN12gjkepa2_implL10InitializeEPK13btConvexShapeRK11btTransformS2_S5_RN15btGjkEp .Lfunc_end2: .size _ZN12gjkepa2_implL10InitializeEPK13btConvexShapeRK11btTransformS2_S5_RN15btGjkEpaSolver28sResultsERNS_13MinkowskiDiffEb, .Lfunc_end2-_ZN12gjkepa2_implL10InitializeEPK13btConvexShapeRK11btTransformS2_S5_RN15btGjkEpaSolver28sResultsERNS_13MinkowskiDiffEb # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN12gjkepa2_impl3GJK8EvaluateERKNS_13MinkowskiDiffERK9btVector3 -.LCPI3_0: - .word 0x38d1b717 # float 9.99999974E-5 -.LCPI3_1: - .word 0xb8d1b717 # float -9.99999974E-5 .section .text._ZN12gjkepa2_impl3GJK8EvaluateERKNS_13MinkowskiDiffERK9btVector3,"axG",@progbits,_ZN12gjkepa2_impl3GJK8EvaluateERKNS_13MinkowskiDiffERK9btVector3,comdat - .weak _ZN12gjkepa2_impl3GJK8EvaluateERKNS_13MinkowskiDiffERK9btVector3 + .weak _ZN12gjkepa2_impl3GJK8EvaluateERKNS_13MinkowskiDiffERK9btVector3 # -- Begin function _ZN12gjkepa2_impl3GJK8EvaluateERKNS_13MinkowskiDiffERK9btVector3 .p2align 5 .type _ZN12gjkepa2_impl3GJK8EvaluateERKNS_13MinkowskiDiffERK9btVector3,@function _ZN12gjkepa2_impl3GJK8EvaluateERKNS_13MinkowskiDiffERK9btVector3: # @_ZN12gjkepa2_impl3GJK8EvaluateERKNS_13MinkowskiDiffERK9btVector3 @@ -636,9 +626,12 @@ _ZN12gjkepa2_impl3GJK8EvaluateERKNS_13MinkowskiDiffERK9btVector3: # @_ZN12gjkepa fld.s $fa0, $fp, 148 fld.s $fs3, $fp, 152 ori $s3, $zero, 56 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.s $fs2, $a0, %pc_lo12(.LCPI3_0) - pcalau12i $s4, %pc_hi20(.LCPI3_1) + lu12i.w $a0, 232731 + ori $a0, $a0, 1815 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, -291557 + ori $s4, $a0, 1815 + lu32i.d $s4, 0 b .LBB3_2 .p2align 4, , 16 .LBB3_1: # %._crit_edge @@ -771,10 +764,10 @@ _ZN12gjkepa2_impl3GJK8EvaluateERKNS_13MinkowskiDiffERK9btVector3: # @_ZN12gjkepa vld $vr2, $a1, 0 fmadd.s $fa0, $fa3, $fa1, $fa0 fdiv.s $fa0, $fa0, $fs4 - fld.s $fa1, $s4, %pc_lo12(.LCPI3_1) fcmp.clt.s $fcc0, $fs1, $fa0 fsel $fs1, $fs1, $fa0, $fcc0 fsub.s $fa0, $fs4, $fs1 + movgr2fr.w $fa1, $s4 fmadd.s $fa0, $fs4, $fa1, $fa0 movgr2fr.w $fs3, $zero fcmp.cult.s $fcc0, $fs3, $fa0 @@ -1382,10 +1375,6 @@ _ZN15btGjkEpaSolver211PenetrationEPK13btConvexShapeRK11btTransformS2_S5_RK9btVec .word 0x00000000 # float 0 .word 0x00000000 # float 0 .word 0x00000000 # float 0 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI5_1: - .word 0x38d1b717 # float 9.99999974E-5 .section .text._ZN12gjkepa2_impl3EPA8EvaluateERNS_3GJKERK9btVector3,"axG",@progbits,_ZN12gjkepa2_impl3EPA8EvaluateERNS_3GJKERK9btVector3,comdat .weak _ZN12gjkepa2_impl3EPA8EvaluateERNS_3GJKERK9btVector3 .p2align 5 @@ -1642,10 +1631,11 @@ _ZN12gjkepa2_impl3EPA8EvaluateERNS_3GJKERK9btVector3: # @_ZN12gjkepa2_impl3EPA8E st.d $s4, $a0, 56 st.w $zero, $fp, 0 addi.d $s3, $fp, 84 - pcalau12i $a0, %pc_hi20(.LCPI5_1) - fld.s $fs2, $a0, %pc_lo12(.LCPI5_1) vrepli.b $vr0, 0 vst $vr0, $sp, 80 # 16-byte Folded Spill + lu12i.w $a0, 232731 + ori $a0, $a0, 1815 + movgr2fr.w $fs2, $a0 lu12i.w $a0, 3 ori $a0, $a0, 2152 st.d $a0, $sp, 72 # 8-byte Folded Spill @@ -2003,12 +1993,6 @@ _ZN12gjkepa2_impl3EPA8EvaluateERNS_3GJKERK9btVector3: # @_ZN12gjkepa2_impl3EPA8E .word 0x3f800000 # float 1 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI6_1: - .word 0x7f7fffff # float 3.40282347E+38 -.LCPI6_2: - .word 0x34000000 # float 1.1920929E-7 .text .globl _ZN15btGjkEpaSolver214SignedDistanceERK9btVector3fPK13btConvexShapeRK11btTransformRNS_8sResultsE .p2align 5 @@ -2107,10 +2091,11 @@ _ZN15btGjkEpaSolver214SignedDistanceERK9btVector3fPK13btConvexShapeRK11btTransfo jirl $ra, $ra, 0 .Ltmp1: # EH_LABEL # %bb.1: - pcalau12i $a1, %pc_hi20(.LCPI6_1) - fld.s $fs0, $a1, %pc_lo12(.LCPI6_1) - ori $a1, $zero, 1 - beq $a0, $a1, .LBB6_12 + lu12i.w $a1, 522239 + ori $a1, $a1, 4095 + ori $a2, $zero, 1 + movgr2fr.w $fs0, $a1 + beq $a0, $a2, .LBB6_12 # %bb.2: bnez $a0, .LBB6_21 # %bb.3: # %.preheader @@ -2275,11 +2260,11 @@ _ZN15btGjkEpaSolver214SignedDistanceERK9btVector3fPK13btConvexShapeRK11btTransfo fsub.s $fa2, $fa2, $fa3 fsub.s $fa3, $fa4, $fa5 fmul.s $fa0, $fa2, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI6_2) - fld.s $fa4, $a0, %pc_lo12(.LCPI6_2) fmadd.s $fa0, $fa1, $fa1, $fa0 fmadd.s $fa0, $fa3, $fa3, $fa0 fsqrt.s $fa0, $fa0 + lu12i.w $a0, 212992 + movgr2fr.w $fa4, $a0 fcmp.cult.s $fcc0, $fa0, $fa4 bcnez $fcc0, .LBB6_16 # %bb.15: @@ -4041,16 +4026,8 @@ _ZN12gjkepa2_impl3GJK13EncloseOriginEv: # @_ZN12gjkepa2_impl3GJK13EncloseOriginE .word .LBB12_8-.LJTI12_0 .word .LBB12_13-.LJTI12_0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN12gjkepa2_impl3EPA7newfaceEPNS_3GJK3sSVES3_S3_b -.LCPI13_0: - .word 0x38d1b717 # float 9.99999974E-5 -.LCPI13_1: - .word 0xbc23d70a # float -0.00999999977 -.LCPI13_2: - .word 0xb727c5ac # float -9.99999974E-6 .section .text._ZN12gjkepa2_impl3EPA7newfaceEPNS_3GJK3sSVES3_S3_b,"axG",@progbits,_ZN12gjkepa2_impl3EPA7newfaceEPNS_3GJK3sSVES3_S3_b,comdat - .weak _ZN12gjkepa2_impl3EPA7newfaceEPNS_3GJK3sSVES3_S3_b + .weak _ZN12gjkepa2_impl3EPA7newfaceEPNS_3GJK3sSVES3_S3_b # -- Begin function _ZN12gjkepa2_impl3EPA7newfaceEPNS_3GJK3sSVES3_S3_b .p2align 5 .type _ZN12gjkepa2_impl3EPA7newfaceEPNS_3GJK3sSVES3_S3_b,@function _ZN12gjkepa2_impl3EPA7newfaceEPNS_3GJK3sSVES3_S3_b: # @_ZN12gjkepa2_impl3EPA7newfaceEPNS_3GJK3sSVES3_S3_b @@ -4188,22 +4165,25 @@ _ZN12gjkepa2_impl3EPA7newfaceEPNS_3GJK3sSVES3_S3_b: # @_ZN12gjkepa2_impl3EPA7new fmadd.s $fa6, $ft4, $ft0, $fa6 fmadd.s $fa4, $ft6, $fa4, $fa6 fcmp.clt.s $fcc0, $ft2, $fa5 - pcalau12i $a2, %pc_hi20(.LCPI13_0) - fld.s $fa6, $a2, %pc_lo12(.LCPI13_0) fsel $fa5, $fa5, $ft2, $fcc0 fcmp.clt.s $fcc0, $fa5, $fa4 fsel $fa4, $fa4, $fa5, $fcc0 - fcmp.clt.s $fcc0, $fa6, $fa3 - pcalau12i $a2, %pc_hi20(.LCPI13_1) - fld.s $fa5, $a2, %pc_lo12(.LCPI13_1) - vldi $vr7, -1168 - fsel $fa7, $fa7, $fa3, $fcc0 - fdiv.s $fa4, $fa4, $fa7 - fcmp.cult.s $fcc0, $fa4, $fa5 - movgr2fr.w $fa5, $zero - fsel $fa4, $fa5, $fa4, $fcc0 + lu12i.w $a2, 232731 + ori $a2, $a2, 1815 + movgr2fr.w $fa5, $a2 + fcmp.clt.s $fcc0, $fa5, $fa3 + vldi $vr6, -1168 + fsel $fa6, $fa6, $fa3, $fcc0 + fdiv.s $fa4, $fa4, $fa6 + lu12i.w $a2, -277955 + ori $a2, $a2, 1802 + lu32i.d $a2, 0 + movgr2fr.w $fa6, $a2 + fcmp.cult.s $fcc0, $fa4, $fa6 + movgr2fr.w $fa6, $zero + fsel $fa4, $fa6, $fa4, $fcc0 fst.s $fa4, $a0, 20 - fcmp.cule.s $fcc0, $fa3, $fa6 + fcmp.cule.s $fcc0, $fa3, $fa5 ori $a2, $zero, 2 bcnez $fcc0, .LBB13_12 # %bb.10: @@ -4224,8 +4204,10 @@ _ZN12gjkepa2_impl3EPA7newfaceEPNS_3GJK3sSVES3_S3_b: # @_ZN12gjkepa2_impl3EPA7new fst.s $fa0, $a0, 8 bnez $a4, .LBB13_22 # %bb.11: - pcalau12i $a1, %pc_hi20(.LCPI13_2) - fld.s $fa0, $a1, %pc_lo12(.LCPI13_2) + lu12i.w $a1, -298372 + ori $a1, $a1, 1452 + lu32i.d $a1, 0 + movgr2fr.w $fa0, $a1 fcmp.cle.s $fcc0, $fa0, $fa4 ori $a2, $zero, 3 bcnez $fcc0, .LBB13_22 @@ -4274,12 +4256,8 @@ _ZN12gjkepa2_impl3EPA7newfaceEPNS_3GJK3sSVES3_S3_b: # @_ZN12gjkepa2_impl3EPA7new .size _ZN12gjkepa2_impl3EPA7newfaceEPNS_3GJK3sSVES3_S3_b, .Lfunc_end13-_ZN12gjkepa2_impl3EPA7newfaceEPNS_3GJK3sSVES3_S3_b .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN12gjkepa2_impl3EPA6expandEjPNS_3GJK3sSVEPNS0_5sFaceEjRNS0_8sHorizonE -.LCPI14_0: - .word 0xb727c5ac # float -9.99999974E-6 .section .text._ZN12gjkepa2_impl3EPA6expandEjPNS_3GJK3sSVEPNS0_5sFaceEjRNS0_8sHorizonE,"axG",@progbits,_ZN12gjkepa2_impl3EPA6expandEjPNS_3GJK3sSVEPNS0_5sFaceEjRNS0_8sHorizonE,comdat - .weak _ZN12gjkepa2_impl3EPA6expandEjPNS_3GJK3sSVEPNS0_5sFaceEjRNS0_8sHorizonE + .weak _ZN12gjkepa2_impl3EPA6expandEjPNS_3GJK3sSVEPNS0_5sFaceEjRNS0_8sHorizonE # -- Begin function _ZN12gjkepa2_impl3EPA6expandEjPNS_3GJK3sSVEPNS0_5sFaceEjRNS0_8sHorizonE .p2align 5 .type _ZN12gjkepa2_impl3EPA6expandEjPNS_3GJK3sSVEPNS0_5sFaceEjRNS0_8sHorizonE,@function _ZN12gjkepa2_impl3EPA6expandEjPNS_3GJK3sSVEPNS0_5sFaceEjRNS0_8sHorizonE: # @_ZN12gjkepa2_impl3EPA6expandEjPNS_3GJK3sSVEPNS0_5sFaceEjRNS0_8sHorizonE @@ -4316,21 +4294,23 @@ _ZN12gjkepa2_impl3EPA6expandEjPNS_3GJK3sSVEPNS0_5sFaceEjRNS0_8sHorizonE: # @_ZN1 slli.d $a2, $s0, 2 pcalau12i $a6, %pc_hi20(_ZZN12gjkepa2_impl3EPA6expandEjPNS_3GJK3sSVEPNS0_5sFaceEjRNS0_8sHorizonEE4i1m3) addi.d $a6, $a6, %pc_lo12(_ZZN12gjkepa2_impl3EPA6expandEjPNS_3GJK3sSVEPNS0_5sFaceEjRNS0_8sHorizonEE4i1m3) - fld.s $fa0, $a3, 0 - fld.s $fa1, $a3, 4 - fld.s $fa2, $fp, 20 + fld.s $fa0, $a3, 4 + fld.s $fa1, $fp, 20 + fld.s $fa2, $a3, 0 fld.s $fa3, $fp, 16 - fmul.s $fa1, $fa1, $fa2 - fmadd.s $fa0, $fa0, $fa3, $fa1 + fmul.s $fa0, $fa0, $fa1 fld.s $fa1, $a3, 8 - fld.s $fa2, $fp, 24 - fld.s $fa3, $a3, 16 - pcalau12i $a7, %pc_hi20(.LCPI14_0) - fld.s $fa4, $a7, %pc_lo12(.LCPI14_0) - fmadd.s $fa0, $fa1, $fa2, $fa0 + fld.s $fa4, $fp, 24 + fld.s $fa5, $a3, 16 + fmadd.s $fa0, $fa2, $fa3, $fa0 ldx.wu $a7, $a6, $a2 - fsub.s $fa0, $fa0, $fa3 - fcmp.cule.s $fcc0, $fa4, $fa0 + fmadd.s $fa0, $fa1, $fa4, $fa0 + fsub.s $fa0, $fa0, $fa5 + lu12i.w $a6, -298372 + ori $a6, $a6, 1452 + lu32i.d $a6, 0 + movgr2fr.w $fa1, $a6 + fcmp.cule.s $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB14_6 # %bb.3: move $s3, $a4 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGjkPairDetector.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGjkPairDetector.s index 97c7f18a..86eef7df 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGjkPairDetector.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btGjkPairDetector.s @@ -120,22 +120,6 @@ _ZN17btGjkPairDetector16getClosestPointsERKN36btDiscreteCollisionDetectorInterfa .word 0x3f800000 # float 1 .word 0x00000000 # float 0 .word 0x00000000 # float 0 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI3_1: - .word 0x5d5e0b6b # float 9.99999984E+17 -.LCPI3_2: - .word 0x358637bd # float 9.99999997E-7 -.LCPI3_3: - .word 0x34000000 # float 1.1920929E-7 -.LCPI3_5: - .word 0x28800000 # float 1.42108547E-14 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI3_4: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI3_6: - .dword 0x3f847ae147ae147b # double 0.01 .text .globl _ZN17btGjkPairDetector26getClosestPointsNonVirtualERKN36btDiscreteCollisionDetectorInterface17ClosestPointInputERNS0_6ResultEP12btIDebugDraw .p2align 5 @@ -289,12 +273,14 @@ _ZN17btGjkPairDetector26getClosestPointsNonVirtualERKN36btDiscreteCollisionDetec fld.s $fa0, $sp, 288 fst.s $fa0, $sp, 88 # 4-byte Folded Spill fld.s $fs2, $sp, 292 - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.s $fs4, $a0, %pc_lo12(.LCPI3_1) + lu12i.w $a0, 382432 + ori $a0, $a0, 2923 + movgr2fr.w $fs4, $a0 fld.s $fs3, $sp, 296 - pcalau12i $a0, %pc_hi20(.LCPI3_2) - fld.s $fs7, $a0, %pc_lo12(.LCPI3_2) - pcalau12i $s5, %pc_hi20(.LCPI3_3) + lu12i.w $a0, 219235 + ori $a0, $a0, 1981 + movgr2fr.w $fs7, $a0 + lu12i.w $s5, 212992 ori $s6, $zero, 1000 ori $s7, $zero, 4 .LBB3_3: # =>This Inner Loop Header: Depth=1 @@ -497,10 +483,10 @@ _ZN17btGjkPairDetector26getClosestPointsNonVirtualERKN36btDiscreteCollisionDetec vst $vr3, $s2, 0 bcnez $fcc0, .LBB3_18 # %bb.11: # in Loop: Header=BB3_3 Depth=1 - fld.s $fa0, $s5, %pc_lo12(.LCPI3_3) - fsub.s $fa1, $fs4, $fs0 - fmul.s $fa0, $fs4, $fa0 - fcmp.cult.s $fcc0, $fa0, $fa1 + fsub.s $fa0, $fs4, $fs0 + movgr2fr.w $fa1, $s5 + fmul.s $fa1, $fs4, $fa1 + fcmp.cult.s $fcc0, $fa1, $fa0 bceqz $fcc0, .LBB3_19 # %bb.12: # in Loop: Header=BB3_3 Depth=1 ld.w $a0, $fp, 84 @@ -574,26 +560,29 @@ _ZN17btGjkPairDetector26getClosestPointsNonVirtualERKN36btDiscreteCollisionDetec movfr2gr.s $a1, $fa1 bstrins.d $a0, $a1, 63, 32 movfr2gr.s $a1, $fa0 - fld.s $fs7, $fp, 12 bstrpick.d $a1, $a1, 31, 0 - st.d $a0, $sp, 352 + fld.s $fs7, $fp, 12 fld.s $fs1, $fp, 8 - fmul.s $fa3, $fs7, $fs7 fld.s $fs2, $fp, 16 - pcalau12i $a0, %pc_hi20(.LCPI3_4) - fld.d $fa4, $a0, %pc_lo12(.LCPI3_4) + st.d $a0, $sp, 352 + fmul.s $fa3, $fs7, $fs7 fmadd.s $fa3, $fs1, $fs1, $fa3 fmadd.s $fa3, $fs2, $fs2, $fa3 - fcvt.d.s $fa5, $fa3 - fcmp.cule.d $fcc0, $fa4, $fa5 + fcvt.d.s $fa4, $fa3 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa5, $a0 + fcmp.cule.d $fcc0, $fa5, $fa4 st.d $a1, $sp, 360 bcnez $fcc0, .LBB3_25 # %bb.24: ori $a0, $zero, 5 st.w $a0, $fp, 88 .LBB3_25: - pcalau12i $a0, %pc_hi20(.LCPI3_5) - fld.s $fa4, $a0, %pc_lo12(.LCPI3_5) + lu12i.w $a0, 165888 + movgr2fr.w $fa4, $a0 fcmp.cule.s $fcc0, $fa3, $fa4 bcnez $fcc0, .LBB3_28 # %bb.26: @@ -657,11 +646,14 @@ _ZN17btGjkPairDetector26getClosestPointsNonVirtualERKN36btDiscreteCollisionDetec ld.w $a2, $fp, 88 beqz $a2, .LBB3_33 # %bb.32: - pcalau12i $a1, %pc_hi20(.LCPI3_6) - fld.d $fa0, $a1, %pc_lo12(.LCPI3_6) - fadd.s $fa1, $fs1, $fs0 - fcvt.d.s $fa1, $fa1 - fcmp.cule.d $fcc0, $fa0, $fa1 + fadd.s $fa0, $fs1, $fs0 + fcvt.d.s $fa0, $fa0 + lu12i.w $a1, 293601 + ori $a1, $a1, 1147 + lu32i.d $a1, 293601 + lu52i.d $a1, $a1, 1016 + movgr2fr.d $fa1, $a1 + fcmp.cule.d $fcc0, $fa1, $fa0 movcf2gr $a1, $fcc0 .LBB3_33: and $a1, $s4, $a1 @@ -701,11 +693,11 @@ _ZN17btGjkPairDetector26getClosestPointsNonVirtualERKN36btDiscreteCollisionDetec fsub.s $fs1, $fa2, $fa3 fsub.s $fs2, $fs6, $fs7 fsub.s $fs3, $fs4, $fs5 - pcalau12i $a0, %pc_hi20(.LCPI3_5) - fld.s $fa1, $a0, %pc_lo12(.LCPI3_5) fmul.s $fa0, $fs2, $fs2 fmadd.s $fa0, $fs1, $fs1, $fa0 fmadd.s $fa0, $fs3, $fs3, $fa0 + lu12i.w $a0, 165888 + movgr2fr.w $fa1, $a0 fcmp.cult.s $fcc0, $fa1, $fa0 bceqz $fcc0, .LBB3_44 # %bb.37: diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHeightfieldTerrainShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHeightfieldTerrainShape.s index 7cd454f5..0b7bd529 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHeightfieldTerrainShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHeightfieldTerrainShape.s @@ -184,12 +184,7 @@ _ZN25btHeightfieldTerrainShape10initializeEiiPvfffi14PHY_ScalarTypeb: # @_ZN25bt .Lfunc_end1: .size _ZN25btHeightfieldTerrainShape10initializeEiiPvfffi14PHY_ScalarTypeb, .Lfunc_end1-_ZN25btHeightfieldTerrainShape10initializeEiiPvfffi14PHY_ScalarTypeb # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN25btHeightfieldTerrainShapeC2EiiPvfibb -.LCPI2_0: - .word 0x477fff00 # float 65535 - .text - .globl _ZN25btHeightfieldTerrainShapeC2EiiPvfibb + .globl _ZN25btHeightfieldTerrainShapeC2EiiPvfibb # -- Begin function _ZN25btHeightfieldTerrainShapeC2EiiPvfibb .p2align 5 .type _ZN25btHeightfieldTerrainShapeC2EiiPvfibb,@function _ZN25btHeightfieldTerrainShapeC2EiiPvfibb: # @_ZN25btHeightfieldTerrainShapeC2EiiPvfibb @@ -227,8 +222,9 @@ _ZN25btHeightfieldTerrainShapeC2EiiPvfibb: # @_ZN25btHeightfieldTerrainShapeC2Ei jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(_ZTV25btHeightfieldTerrainShape+16) addi.d $a0, $a0, %pc_lo12(_ZTV25btHeightfieldTerrainShape+16) - pcalau12i $a1, %pc_hi20(.LCPI2_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI2_0) + lu12i.w $a1, 292863 + ori $a1, $a1, 3840 + movgr2fr.w $fa0, $a1 ori $a1, $zero, 5 masknez $a5, $a1, $s0 fdiv.s $fa0, $fs0, $fa0 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHinge2Constraint.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHinge2Constraint.s index 0482fc6b..54c0f440 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHinge2Constraint.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHinge2Constraint.s @@ -11,12 +11,6 @@ .word 0x00000000 # float 0 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI0_2: - .word 0x421de9e7 # float 39.4784203 -.LCPI0_3: - .word 0x3c23d70a # float 0.00999999977 .text .globl _ZN18btHinge2ConstraintC2ER11btRigidBodyS1_R9btVector3S3_S3_ .p2align 5 @@ -315,14 +309,16 @@ _ZN18btHinge2ConstraintC2ER11btRigidBodyS1_R9btVector3S3_S3_: # @_ZN18btHinge2Co move $a0, $fp pcaddu18i $ra, %call36(_ZN29btGeneric6DofSpringConstraint12enableSpringEib) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.s $fa0, $a0, %pc_lo12(.LCPI0_2) + lu12i.w $a0, 270814 + ori $a0, $a0, 2535 + movgr2fr.w $fa0, $a0 ori $a1, $zero, 2 move $a0, $fp pcaddu18i $ra, %call36(_ZN29btGeneric6DofSpringConstraint12setStiffnessEif) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.s $fa0, $a0, %pc_lo12(.LCPI0_3) + lu12i.w $a0, 246333 + ori $a0, $a0, 1802 + movgr2fr.w $fa0, $a0 ori $a1, $zero, 2 move $a0, $fp pcaddu18i $ra, %call36(_ZN29btGeneric6DofSpringConstraint10setDampingEif) diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHingeConstraint.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHingeConstraint.s index 84ea91c2..f13ba4c5 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHingeConstraint.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btHingeConstraint.s @@ -31,17 +31,9 @@ _ZN17btHingeConstraintC2Ev: # @_ZN17btHingeConstraintC2Ev .size _ZN17btHingeConstraintC2Ev, .Lfunc_end0-_ZN17btHingeConstraintC2Ev .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN17btHingeConstraintC2ER11btRigidBodyS1_RK9btVector3S4_RS2_S5_b -.LCPI1_0: - .word 0x3f7ffffe # float 0.99999988 -.LCPI1_1: - .word 0xbf7ffffe # float -0.99999988 -.LCPI1_2: - .word 0x3f3504f3 # float 0.707106769 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI1_3: + .p2align 4, 0x0 # -- Begin function _ZN17btHingeConstraintC2ER11btRigidBodyS1_RK9btVector3S4_RS2_S5_b +.LCPI1_0: .word 0x3f666666 # float 0.899999976 .word 0x3e99999a # float 0.300000012 .word 0x3f800000 # float 1 @@ -110,19 +102,22 @@ _ZN17btHingeConstraintC2ER11btRigidBodyS1_RK9btVector3S4_RS2_S5_b: # @_ZN17btHin fld.s $fa5, $s4, 8 fld.s $fa0, $s4, 24 fld.s $fa1, $s4, 40 - fld.s $fa4, $s3, 0 fld.s $fa3, $s3, 4 + fld.s $fa4, $s3, 0 fld.s $fa6, $s3, 8 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fa7, $a0, %pc_lo12(.LCPI1_0) fmul.s $fa2, $fa0, $fa3 fmadd.s $fa2, $fa4, $fa5, $fa2 fmadd.s $fa2, $fa6, $fa1, $fa2 + lu12i.w $a0, 260095 + ori $a0, $a0, 4094 + movgr2fr.w $fa7, $a0 fcmp.cult.s $fcc0, $fa2, $fa7 - pcalau12i $a0, %pc_hi20(.LCPI1_1) + lu12i.w $a0, -264193 bceqz $fcc0, .LBB1_3 # %bb.1: - fld.s $fa7, $a0, %pc_lo12(.LCPI1_1) + ori $a1, $a0, 4094 + lu32i.d $a1, 0 + movgr2fr.w $fa7, $a1 fcmp.cult.s $fcc0, $fa7, $fa2 bceqz $fcc0, .LBB1_4 # %bb.2: @@ -185,17 +180,20 @@ _ZN17btHingeConstraintC2ER11btRigidBodyS1_RK9btVector3S4_RS2_S5_b: # @_ZN17btHin fld.s $fs4, $s1, 0 fld.s $fa4, $s3, 0 fld.s $fa1, $s3, 8 - fld.s $fa3, $a0, %pc_lo12(.LCPI1_1) fmul.s $fa2, $fa0, $fs3 fmadd.s $fa2, $fa4, $fs4, $fa2 fmadd.s $fa2, $fa1, $fs5, $fa2 + ori $a0, $a0, 4094 + lu32i.d $a0, 0 + movgr2fr.w $fa3, $a0 fcmp.cule.s $fcc0, $fa3, $fa2 bcnez $fcc0, .LBB1_8 # %bb.6: - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.s $fa2, $a0, %pc_lo12(.LCPI1_2) - fabs.s $fa3, $fa1 - fcmp.cule.s $fcc0, $fa3, $fa2 + fabs.s $fa2, $fa1 + lu12i.w $a0, 258896 + ori $a0, $a0, 1267 + movgr2fr.w $fa3, $a0 + fcmp.cule.s $fcc0, $fa2, $fa3 bcnez $fcc0, .LBB1_10 # %bb.7: fmul.s $fa2, $fa1, $fa1 @@ -295,8 +293,8 @@ _ZN17btHingeConstraintC2ER11btRigidBodyS1_RK9btVector3S4_RS2_S5_b: # @_ZN17btHin fld.s $fa0, $s1, 8 fst.s $fa0, $s0, 704 st.w $zero, $s0, 708 - pcalau12i $a0, %pc_hi20(.LCPI1_3) - vld $vr0, $a0, %pc_lo12(.LCPI1_3) + pcalau12i $a0, %pc_hi20(.LCPI1_0) + vld $vr0, $a0, %pc_lo12(.LCPI1_0) lu12i.w $a0, -264192 lu32i.d $a0, 0 st.w $a0, $s0, 752 @@ -336,15 +334,9 @@ _ZN17btHingeConstraintC2ER11btRigidBodyS1_RK9btVector3S4_RS2_S5_b: # @_ZN17btHin .size _ZN17btHingeConstraintC2ER11btRigidBodyS1_RK9btVector3S4_RS2_S5_b, .Lfunc_end1-_ZN17btHingeConstraintC2ER11btRigidBodyS1_RK9btVector3S4_RS2_S5_b .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN17btHingeConstraintC2ER11btRigidBodyRK9btVector3RS2_b -.LCPI2_0: - .word 0x3f3504f3 # float 0.707106769 -.LCPI2_1: - .word 0xbf7ffffe # float -0.99999988 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI2_2: + .p2align 4, 0x0 # -- Begin function _ZN17btHingeConstraintC2ER11btRigidBodyRK9btVector3RS2_b +.LCPI2_0: .word 0x3f666666 # float 0.899999976 .word 0x3e99999a # float 0.300000012 .word 0x3f800000 # float 1 @@ -399,11 +391,12 @@ _ZN17btHingeConstraintC2ER11btRigidBodyRK9btVector3RS2_b: # @_ZN17btHingeConstra addi.d $a0, $a0, %pc_lo12(_ZTV17btHingeConstraint+16) st.h $zero, $s0, 780 fld.s $fa1, $s3, 8 - pcalau12i $a1, %pc_hi20(.LCPI2_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI2_0) st.d $a0, $s0, 0 st.b $zero, $s0, 783 fabs.s $fa2, $fa1 + lu12i.w $a0, 258896 + ori $a0, $a0, 1267 + movgr2fr.w $fa0, $a0 fcmp.cule.s $fcc0, $fa2, $fa0 st.b $fp, $s0, 784 bcnez $fcc0, .LBB2_2 @@ -475,11 +468,13 @@ _ZN17btHingeConstraintC2ER11btRigidBodyRK9btVector3RS2_b: # @_ZN17btHingeConstra fmul.s $fa4, $fa1, $ft11 fmadd.s $fa4, $ft10, $fa2, $fa4 fmadd.s $fs4, $ft13, $fa3, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.s $fa5, $a0, %pc_lo12(.LCPI2_1) fmul.s $fa4, $fa1, $fs5 fmadd.s $fa4, $fa2, $fs3, $fa4 fmadd.s $fa4, $fa3, $fs4, $fa4 + lu12i.w $a0, -264193 + ori $a0, $a0, 4094 + lu32i.d $a0, 0 + movgr2fr.w $fa5, $a0 fcmp.cule.s $fcc0, $fa5, $fa4 bcnez $fcc0, .LBB2_6 # %bb.4: @@ -604,8 +599,8 @@ _ZN17btHingeConstraintC2ER11btRigidBodyRK9btVector3RS2_b: # @_ZN17btHingeConstra fst.s $fa2, $s0, 700 fst.s $fs4, $s0, 704 st.w $zero, $s0, 708 - pcalau12i $a0, %pc_hi20(.LCPI2_2) - vld $vr0, $a0, %pc_lo12(.LCPI2_2) + pcalau12i $a0, %pc_hi20(.LCPI2_0) + vld $vr0, $a0, %pc_lo12(.LCPI2_0) lu12i.w $a0, -264192 lu32i.d $a0, 0 st.w $a0, $s0, 752 @@ -849,15 +844,9 @@ _ZN17btHingeConstraintC2ER11btRigidBodyRK11btTransformb: # @_ZN17btHingeConstrai .size _ZN17btHingeConstraintC2ER11btRigidBodyRK11btTransformb, .Lfunc_end4-_ZN17btHingeConstraintC2ER11btRigidBodyRK11btTransformb .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN17btHingeConstraint13buildJacobianEv -.LCPI5_0: - .word 0x34000000 # float 1.1920929E-7 -.LCPI5_2: - .word 0x3f3504f3 # float 0.707106769 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI5_1: + .p2align 4, 0x0 # -- Begin function _ZN17btHingeConstraint13buildJacobianEv +.LCPI5_0: .word 0x3f800000 # float 1 .word 0x00000000 # float 0 .word 0x00000000 # float 0 @@ -893,13 +882,13 @@ _ZN17btHingeConstraint13buildJacobianEv: # @_ZN17btHingeConstraint13buildJacobia .cfi_offset 63, -80 move $fp, $a0 ld.bu $a0, $a0, 783 - beqz $a0, .LBB5_19 + beqz $a0, .LBB5_20 # %bb.1: ld.bu $a1, $fp, 780 st.w $zero, $fp, 40 st.w $zero, $fp, 788 - pcalau12i $a0, %pc_hi20(.LCPI5_2) - bnez $a1, .LBB5_9 + lu12i.w $a0, 258896 + bnez $a1, .LBB5_10 # %bb.2: ld.d $a1, $fp, 24 fld.s $fa0, $a1, 8 @@ -960,13 +949,13 @@ _ZN17btHingeConstraint13buildJacobianEv: # @_ZN17btHingeConstraint13buildJacobia fsub.s $fa6, $ft13, $fa0 fsub.s $fa7, $ft14, $fa1 fsub.s $ft1, $ft15, $fa2 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - fld.s $ft2, $a3, %pc_lo12(.LCPI5_0) fmul.s $ft0, $fa7, $fa7 fmadd.s $ft0, $fa6, $fa6, $ft0 fmadd.s $ft0, $ft1, $ft1, $ft0 + lu12i.w $a3, 212992 + movgr2fr.w $ft2, $a3 fcmp.cule.s $fcc0, $ft0, $ft2 - bcnez $fcc0, .LBB5_5 + bcnez $fcc0, .LBB5_4 # %bb.3: frsqrt.s $ft2, $ft0 fmul.s $ft0, $fa6, $ft2 @@ -979,35 +968,21 @@ _ZN17btHingeConstraint13buildJacobianEv: # @_ZN17btHingeConstraint13buildJacobia bstrpick.d $a4, $a4, 31, 0 st.d $a3, $sp, 32 st.d $a4, $sp, 40 - fld.s $fa6, $a0, %pc_lo12(.LCPI5_2) - fabs.s $ft2, $ft1 - fcmp.cule.s $fcc0, $ft2, $fa6 - bceqz $fcc0, .LBB5_6 + b .LBB5_5 .LBB5_4: - fmul.s $fa6, $fa7, $fa7 - fmadd.s $ft2, $ft0, $ft0, $fa6 - frsqrt.s $ft4, $ft2 - fneg.s $fa6, $fa7 - fmul.s $ft3, $ft4, $fa6 - fmul.s $fa6, $ft0, $ft4 - fneg.s $fa7, $ft1 - fmul.s $fa7, $fa6, $fa7 - fmul.s $ft1, $ft1, $ft3 - fmul.s $ft0, $ft2, $ft4 - movgr2fr.w $ft2, $zero - b .LBB5_7 -.LBB5_5: - pcalau12i $a3, %pc_hi20(.LCPI5_1) - vld $vr6, $a3, %pc_lo12(.LCPI5_1) + pcalau12i $a3, %pc_hi20(.LCPI5_0) + vld $vr6, $a3, %pc_lo12(.LCPI5_0) vst $vr6, $sp, 32 movgr2fr.w $fa7, $zero vldi $vr8, -1168 fmov.s $ft1, $fa7 - fld.s $fa6, $a0, %pc_lo12(.LCPI5_2) - fabs.s $ft2, $ft1 - fcmp.cule.s $fcc0, $ft2, $fa6 - bcnez $fcc0, .LBB5_4 -.LBB5_6: +.LBB5_5: + fabs.s $fa6, $ft1 + ori $a3, $a0, 1267 + movgr2fr.w $ft2, $a3 + fcmp.cule.s $fcc0, $fa6, $ft2 + bcnez $fcc0, .LBB5_7 +# %bb.6: fmul.s $fa6, $ft1, $ft1 fmadd.s $ft3, $fa7, $fa7, $fa6 frsqrt.s $ft4, $ft3 @@ -1019,7 +994,20 @@ _ZN17btHingeConstraint13buildJacobianEv: # @_ZN17btHingeConstraint13buildJacobia fmul.s $ft1, $ft2, $ft1 fmul.s $ft0, $fa6, $ft0 movgr2fr.w $ft3, $zero -.LBB5_7: # %_Z13btPlaneSpace1RK9btVector3RS_S2_.exit + b .LBB5_8 +.LBB5_7: + fmul.s $fa6, $fa7, $fa7 + fmadd.s $ft2, $ft0, $ft0, $fa6 + frsqrt.s $ft4, $ft2 + fneg.s $fa6, $fa7 + fmul.s $ft3, $ft4, $fa6 + fmul.s $fa6, $ft0, $ft4 + fneg.s $fa7, $ft1 + fmul.s $fa7, $fa6, $fa7 + fmul.s $ft1, $ft1, $ft3 + fmul.s $ft0, $ft2, $ft4 + movgr2fr.w $ft2, $zero +.LBB5_8: # %_Z13btPlaneSpace1RK9btVector3RS_S2_.exit move $a3, $zero fst.s $ft3, $sp, 48 fst.s $fa6, $sp, 52 @@ -1033,7 +1021,7 @@ _ZN17btHingeConstraint13buildJacobianEv: # @_ZN17btHingeConstraint13buildJacobia ori $a5, $zero, 252 fst.s $fa0, $sp, 8 # 4-byte Folded Spill .p2align 4, , 16 -.LBB5_8: # =>This Inner Loop Header: Depth=1 +.LBB5_9: # =>This Inner Loop Header: Depth=1 add.d $a6, $fp, $a3 fld.s $ft9, $a1, 8 fld.s $ft11, $a1, 24 @@ -1178,16 +1166,17 @@ _ZN17btHingeConstraint13buildJacobianEv: # @_ZN17btHingeConstraint13buildJacobia st.d $t0, $a6, 168 addi.d $a3, $a3, 84 addi.d $a4, $a4, 16 - bne $a3, $a5, .LBB5_8 -.LBB5_9: + bne $a3, $a5, .LBB5_9 +.LBB5_10: fld.s $fa0, $fp, 640 - fld.s $fa1, $a0, %pc_lo12(.LCPI5_2) fld.s $fa4, $fp, 608 fld.s $fa5, $fp, 624 - fabs.s $fa2, $fa0 - fcmp.cule.s $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB5_11 -# %bb.10: + fabs.s $fa1, $fa0 + ori $a0, $a0, 1267 + movgr2fr.w $fa2, $a0 + fcmp.cule.s $fcc0, $fa1, $fa2 + bcnez $fcc0, .LBB5_12 +# %bb.11: fmul.s $fa1, $fa0, $fa0 fmadd.s $fa1, $fa5, $fa5, $fa1 frsqrt.s $fa2, $fa1 @@ -1199,8 +1188,8 @@ _ZN17btHingeConstraint13buildJacobianEv: # @_ZN17btHingeConstraint13buildJacobia fmul.s $ft7, $fa6, $fa1 fmul.s $fa7, $fa4, $ft8 movgr2fr.w $ft10, $zero - b .LBB5_12 -.LBB5_11: + b .LBB5_13 +.LBB5_12: fmul.s $fa1, $fa5, $fa5 fmadd.s $fa1, $fa4, $fa4, $fa1 frsqrt.s $fa2, $fa1 @@ -1212,7 +1201,7 @@ _ZN17btHingeConstraint13buildJacobianEv: # @_ZN17btHingeConstraint13buildJacobia fmul.s $ft7, $fa0, $ft10 fmul.s $fa7, $fa1, $fa2 movgr2fr.w $fa6, $zero -.LBB5_12: # %_Z13btPlaneSpace1RK9btVector3RS_S2_.exit95 +.LBB5_13: # %_Z13btPlaneSpace1RK9btVector3RS_S2_.exit95 ld.d $a0, $fp, 24 fld.s $ft2, $a0, 8 fld.s $ft3, $a0, 12 @@ -1519,30 +1508,30 @@ _ZN17btHingeConstraint13buildJacobianEv: # @_ZN17btHingeConstraint13buildJacobia st.d $zero, $fp, 760 fcmp.cult.s $fcc0, $fa2, $fa1 st.b $zero, $fp, 782 - bcnez $fcc0, .LBB5_18 -# %bb.13: + bcnez $fcc0, .LBB5_19 +# %bb.14: pcaddu18i $ra, %call36(_Z21btAdjustAngleToLimitsfff) jirl $ra, $ra, 0 fld.s $fa1, $fp, 748 fcmp.cle.s $fcc0, $fa0, $fa1 fst.s $fa0, $fp, 772 - bcnez $fcc0, .LBB5_16 -# %bb.14: + bcnez $fcc0, .LBB5_17 +# %bb.15: fld.s $fa1, $fp, 752 fcmp.cult.s $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB5_18 -# %bb.15: + bcnez $fcc0, .LBB5_19 +# %bb.16: vldi $vr2, -1040 - b .LBB5_17 -.LBB5_16: + b .LBB5_18 +.LBB5_17: vldi $vr2, -1168 -.LBB5_17: # %.sink.split.i +.LBB5_18: # %.sink.split.i fsub.s $fa0, $fa1, $fa0 fst.s $fa0, $fp, 764 fst.s $fa2, $fp, 760 ori $a0, $zero, 1 st.b $a0, $fp, 782 -.LBB5_18: # %_ZN17btHingeConstraint9testLimitERK11btTransformS2_.exit +.LBB5_19: # %_ZN17btHingeConstraint9testLimitERK11btTransformS2_.exit ld.d $a0, $fp, 24 fld.s $fa2, $fp, 608 fld.s $fa3, $fp, 624 @@ -1611,7 +1600,7 @@ _ZN17btHingeConstraint13buildJacobianEv: # @_ZN17btHingeConstraint13buildJacobia fadd.s $fa0, $fa3, $fa0 frecip.s $fa0, $fa0 fst.s $fa0, $fp, 756 -.LBB5_19: +.LBB5_20: fld.d $fs7, $sp, 80 # 8-byte Folded Reload fld.d $fs6, $sp, 88 # 8-byte Folded Reload fld.d $fs5, $sp, 96 # 8-byte Folded Reload @@ -1681,14 +1670,7 @@ _ZN17btHingeConstraint9testLimitERK11btTransformS2_: # @_ZN17btHingeConstraint9t .size _ZN17btHingeConstraint9testLimitERK11btTransformS2_, .Lfunc_end6-_ZN17btHingeConstraint9testLimitERK11btTransformS2_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f -.LCPI7_0: - .word 0xbe99999a # float -0.300000012 -.LCPI7_1: - .word 0x3727c5ac # float 9.99999974E-6 - .text - .globl _ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f + .globl _ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f # -- Begin function _ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f .p2align 5 .type _ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f,@function _ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f @@ -1706,17 +1688,17 @@ _ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN17btH fst.d $fs6, $sp, 56 # 8-byte Folded Spill fst.d $fs7, $sp, 48 # 8-byte Folded Spill ld.d $a4, $a0, 24 - fld.s $ft4, $a4, 8 + fld.s $ft3, $a4, 8 fld.s $ft10, $a4, 12 fld.s $fa1, $a4, 16 fld.s $ft2, $a4, 24 fld.s $ft9, $a4, 28 fld.s $fa2, $a4, 32 ld.d $a3, $a0, 32 - fld.s $ft5, $a4, 40 + fld.s $ft4, $a4, 40 fld.s $ft8, $a4, 44 fld.s $fa3, $a4, 48 - fld.s $ft3, $a3, 8 + fld.s $ft5, $a3, 8 fld.s $ft7, $a3, 12 fld.s $fa6, $a3, 16 fld.s $ft1, $a3, 24 @@ -1732,28 +1714,28 @@ _ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN17btH fld.s $ft12, $a0, 648 fld.s $ft13, $a0, 656 fmul.s $ft10, $ft10, $ft11 - fmadd.s $ft4, $ft4, $ft12, $ft10 - fmadd.s $fa1, $fa1, $ft13, $ft4 + fmadd.s $ft3, $ft3, $ft12, $ft10 + fmadd.s $fa1, $fa1, $ft13, $ft3 fld.s $ft14, $a4, 56 - fmul.s $ft4, $ft11, $ft9 + fmul.s $ft3, $ft11, $ft9 fld.s $ft15, $a4, 60 - fmadd.s $ft4, $ft2, $ft12, $ft4 + fmadd.s $ft3, $ft2, $ft12, $ft3 fadd.s $ft2, $fa1, $ft14 - fmadd.s $fa1, $fa2, $ft13, $ft4 - fadd.s $ft4, $ft15, $fa1 + fmadd.s $fa1, $fa2, $ft13, $ft3 + fadd.s $ft3, $ft15, $fa1 fld.s $fs0, $a4, 64 fmul.s $fa1, $ft11, $ft8 - fmadd.s $fa1, $ft5, $ft12, $fa1 + fmadd.s $fa1, $ft4, $ft12, $fa1 fmadd.s $fa1, $fa3, $ft13, $fa1 - fadd.s $ft5, $fs0, $fa1 + fadd.s $ft4, $fs0, $fa1 fld.s $fa1, $a0, 716 fld.s $fa2, $a0, 712 fld.s $fa3, $a0, 720 fld.s $ft9, $a3, 56 fmul.s $ft7, $ft7, $fa1 - fmadd.s $ft3, $ft3, $fa2, $ft7 - fmadd.s $fa6, $fa6, $fa3, $ft3 - fadd.s $ft3, $fa6, $ft9 + fmadd.s $ft5, $ft5, $fa2, $ft7 + fmadd.s $fa6, $fa6, $fa3, $ft5 + fadd.s $ft5, $fa6, $ft9 fld.s $fa6, $a3, 60 fmul.s $ft6, $fa1, $ft6 fmadd.s $ft1, $ft1, $fa2, $ft6 @@ -1766,9 +1748,9 @@ _ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN17btH fadd.s $ft7, $ft10, $fa1 ld.d $a5, $a1, 72 fsub.s $fa1, $ft2, $ft14 - fsub.s $fa2, $ft4, $ft15 + fsub.s $fa2, $ft3, $ft15 movgr2fr.w $ft1, $zero - fsub.s $fa3, $ft5, $fs0 + fsub.s $fa3, $ft4, $fs0 fmov.s $ft8, $ft1 fmov.s $ft0, $ft1 fmov.s $fa7, $ft1 @@ -1806,7 +1788,7 @@ _ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN17btH fadd.s $ft8, $fs0, $ft12 .LBB7_4: # %_ZNK12btSolverBody31getVelocityInLocalPointObsoleteERK9btVector3RS0_.exit ld.d $a5, $a2, 72 - fsub.s $fa4, $ft3, $ft9 + fsub.s $fa4, $ft5, $ft9 fsub.s $fa5, $ft6, $fa6 fsub.s $fa6, $ft7, $ft10 fmov.s $ft9, $ft1 @@ -1848,11 +1830,13 @@ _ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN17btH fsub.s $fa7, $fa7, $ft10 fsub.s $ft0, $ft0, $ft9 fsub.s $ft1, $ft8, $ft1 - fsub.s $ft2, $ft2, $ft3 - pcalau12i $a6, %pc_hi20(.LCPI7_0) - fld.s $ft3, $a6, %pc_lo12(.LCPI7_0) - fsub.s $ft4, $ft4, $ft6 - fsub.s $ft5, $ft5, $ft7 + fsub.s $ft2, $ft2, $ft5 + fsub.s $ft3, $ft3, $ft6 + fsub.s $ft4, $ft4, $ft7 + lu12i.w $a6, -267879 + ori $a6, $a6, 2458 + lu32i.d $a6, 0 + movgr2fr.w $ft5, $a6 ori $a6, $zero, 252 .p2align 4, , 16 .LBB7_7: # =>This Inner Loop Header: Depth=1 @@ -1865,10 +1849,10 @@ _ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN17btH fmul.s $ft8, $ft0, $ft7 fmadd.s $ft8, $ft12, $fa7, $ft8 fmadd.s $ft8, $ft11, $ft1, $ft8 - fmul.s $ft9, $ft4, $ft7 + fmul.s $ft9, $ft3, $ft7 fmadd.s $ft9, $ft2, $ft12, $ft9 - fmadd.s $ft9, $ft5, $ft11, $ft9 - fmul.s $ft9, $ft9, $ft3 + fmadd.s $ft9, $ft4, $ft11, $ft9 + fmul.s $ft9, $ft9, $ft5 fdiv.s $ft9, $ft9, $fa0 fld.s $ft10, $a0, 40 fneg.s $ft13, $ft6 @@ -2001,16 +1985,16 @@ _ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN17btH fst.s $ft6, $a2, 24 bne $a5, $a6, .LBB7_7 # %bb.8: # %.loopexit.loopexit - fld.s $ft4, $a4, 8 + fld.s $ft3, $a4, 8 fld.s $ft10, $a4, 12 fld.s $fa1, $a4, 16 fld.s $ft2, $a4, 24 fld.s $ft9, $a4, 28 fld.s $fa2, $a4, 32 - fld.s $ft5, $a4, 40 + fld.s $ft4, $a4, 40 fld.s $ft8, $a4, 44 fld.s $fa3, $a4, 48 - fld.s $ft3, $a3, 8 + fld.s $ft5, $a3, 8 fld.s $ft7, $a3, 12 fld.s $fa6, $a3, 16 fld.s $ft1, $a3, 24 @@ -2024,20 +2008,20 @@ _ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN17btH fld.s $ft14, $a0, 608 fld.s $ft11, $a0, 640 fmul.s $ft10, $ft13, $ft10 - fmadd.s $ft12, $ft4, $ft14, $ft10 - fmul.s $ft4, $ft13, $ft9 - fmadd.s $ft9, $ft2, $ft14, $ft4 + fmadd.s $ft12, $ft3, $ft14, $ft10 + fmul.s $ft3, $ft13, $ft9 + fmadd.s $ft9, $ft2, $ft14, $ft3 fmul.s $ft2, $ft13, $ft8 - fld.s $ft4, $a0, 688 + fld.s $ft3, $a0, 688 fld.s $ft13, $a0, 672 - fmadd.s $ft8, $ft5, $ft14, $ft2 + fmadd.s $ft8, $ft4, $ft14, $ft2 fld.s $ft10, $a0, 704 - fmul.s $ft2, $ft4, $ft7 - fmadd.s $ft7, $ft3, $ft13, $ft2 - fmul.s $ft2, $ft4, $ft6 + fmul.s $ft2, $ft3, $ft7 + fmadd.s $ft7, $ft5, $ft13, $ft2 + fmul.s $ft2, $ft3, $ft6 ld.d $a5, $a1, 72 fmadd.s $ft1, $ft1, $ft13, $ft2 - fmul.s $ft0, $ft4, $ft0 + fmul.s $ft0, $ft3, $ft0 movgr2fr.w $ft2, $zero fmadd.s $fa7, $fa7, $ft13, $ft0 fmov.s $ft3, $ft2 @@ -2103,12 +2087,13 @@ _ZN17btHingeConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN17btH fsub.s $ft13, $ft11, $ft13 fsub.s $ft14, $ft12, $ft14 fsub.s $ft12, $ft15, $fs0 - fmul.s $ft15, $ft14, $ft14 - pcalau12i $a5, %pc_hi20(.LCPI7_1) - fld.s $fa7, $a5, %pc_lo12(.LCPI7_1) - fmadd.s $ft15, $ft13, $ft13, $ft15 - fmadd.s $ft15, $ft12, $ft12, $ft15 - fsqrt.s $ft15, $ft15 + fmul.s $ft11, $ft14, $ft14 + fmadd.s $ft11, $ft13, $ft13, $ft11 + fmadd.s $ft11, $ft12, $ft12, $ft11 + fsqrt.s $ft15, $ft11 + lu12i.w $a5, 225916 + ori $a5, $a5, 1452 + movgr2fr.w $fa7, $a5 fcmp.cule.s $fcc0, $ft15, $fa7 bcnez $fcc0, .LBB7_15 # %bb.14: @@ -2805,14 +2790,7 @@ _ZN17btHingeConstraint8getInfo2EPN17btTypedConstraint17btConstraintInfo2E: # @_Z .size _ZN17btHingeConstraint8getInfo2EPN17btTypedConstraint17btConstraintInfo2E, .Lfunc_end10-_ZN17btHingeConstraint8getInfo2EPN17btTypedConstraint17btConstraintInfo2E .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN17btHingeConstraint16getInfo2InternalEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK9btVector3S8_ -.LCPI11_0: - .word 0xff7fffff # float -3.40282347E+38 -.LCPI11_1: - .word 0x7f7fffff # float 3.40282347E+38 - .text - .globl _ZN17btHingeConstraint16getInfo2InternalEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK9btVector3S8_ + .globl _ZN17btHingeConstraint16getInfo2InternalEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK9btVector3S8_ # -- Begin function _ZN17btHingeConstraint16getInfo2InternalEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK9btVector3S8_ .p2align 5 .type _ZN17btHingeConstraint16getInfo2InternalEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK9btVector3S8_,@function _ZN17btHingeConstraint16getInfo2InternalEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK9btVector3S8_: # @_ZN17btHingeConstraint16getInfo2InternalEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK9btVector3S8_ @@ -3183,15 +3161,18 @@ _ZN17btHingeConstraint16getInfo2InternalEPN17btTypedConstraint17btConstraintInfo fstx.s $fa0, $a3, $s6 stx.w $zero, $a0, $s6 ld.d $a0, $s2, 64 - pcalau12i $a1, %pc_hi20(.LCPI11_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI11_0) + lu12i.w $a1, -2049 + ori $a1, $a1, 4095 + lu32i.d $a1, 0 + movgr2fr.w $fa1, $a1 andi $a1, $s3, 1 movgr2fr.w $fa0, $zero movgr2cf $fcc0, $a1 - pcalau12i $a1, %pc_hi20(.LCPI11_1) - fld.s $fa2, $a1, %pc_lo12(.LCPI11_1) - fsel $fa3, $fa1, $fa0, $fcc0 - fsel $fa1, $fa3, $fa1, $fcc1 + fsel $fa2, $fa1, $fa0, $fcc0 + fsel $fa1, $fa2, $fa1, $fcc1 + lu12i.w $a1, 522239 + ori $a1, $a1, 4095 + movgr2fr.w $fa2, $a1 ld.d $a1, $s2, 72 fsel $fa3, $fa0, $fa2, $fcc0 fsel $fa2, $fa3, $fa2, $fcc1 @@ -3373,16 +3354,7 @@ _ZN17btHingeConstraint13getHingeAngleEv: # @_ZN17btHingeConstraint13getHingeAngl .Lfunc_end14: .size _ZN17btHingeConstraint13getHingeAngleEv, .Lfunc_end14-_ZN17btHingeConstraint13getHingeAngleEv # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN17btHingeConstraint13getHingeAngleERK11btTransformS2_ -.LCPI15_0: - .word 0x3f490fdb # float 0.785398185 -.LCPI15_1: - .word 0xbf490fdb # float -0.785398185 -.LCPI15_2: - .word 0x4016cbe4 # float 2.3561945 - .text - .globl _ZN17btHingeConstraint13getHingeAngleERK11btTransformS2_ + .globl _ZN17btHingeConstraint13getHingeAngleERK11btTransformS2_ # -- Begin function _ZN17btHingeConstraint13getHingeAngleERK11btTransformS2_ .p2align 5 .type _ZN17btHingeConstraint13getHingeAngleERK11btTransformS2_,@function _ZN17btHingeConstraint13getHingeAngleERK11btTransformS2_: # @_ZN17btHingeConstraint13getHingeAngleERK11btTransformS2_ @@ -3452,23 +3424,25 @@ _ZN17btHingeConstraint13getHingeAngleERK11btTransformS2_: # @_ZN17btHingeConstra fabs.s $fa3, $fa0 bceqz $fcc0, .LBB15_2 # %bb.1: - pcalau12i $a1, %pc_hi20(.LCPI15_2) - fld.s $fa4, $a1, %pc_lo12(.LCPI15_2) - pcalau12i $a1, %pc_hi20(.LCPI15_1) - fld.s $fa5, $a1, %pc_lo12(.LCPI15_1) - fadd.s $fa6, $fa2, $fa3 + fadd.s $fa4, $fa2, $fa3 fsub.s $fa2, $fa3, $fa2 + fdiv.s $fa2, $fa4, $fa2 + lu12i.w $a1, 262508 + ori $a1, $a1, 3044 b .LBB15_3 .LBB15_2: - pcalau12i $a1, %pc_hi20(.LCPI15_0) - fld.s $fa4, $a1, %pc_lo12(.LCPI15_0) - pcalau12i $a1, %pc_hi20(.LCPI15_1) - fld.s $fa5, $a1, %pc_lo12(.LCPI15_1) - fsub.s $fa6, $fa2, $fa3 + fsub.s $fa4, $fa2, $fa3 fadd.s $fa2, $fa2, $fa3 + fdiv.s $fa2, $fa4, $fa2 + lu12i.w $a1, 259216 + ori $a1, $a1, 4059 .LBB15_3: # %_Z11btAtan2Fastff.exit - fdiv.s $fa2, $fa6, $fa2 - fmadd.s $fa2, $fa2, $fa5, $fa4 + movgr2fr.w $fa3, $a1 + lu12i.w $a1, -265072 + ori $a1, $a1, 4059 + lu32i.d $a1, 0 + movgr2fr.w $fa4, $a1 + fmadd.s $fa2, $fa2, $fa4, $fa3 fld.s $fa3, $a0, 776 fneg.s $fa4, $fa2 fcmp.clt.s $fcc0, $fa0, $fa1 @@ -3478,18 +3452,8 @@ _ZN17btHingeConstraint13getHingeAngleERK11btTransformS2_: # @_ZN17btHingeConstra .Lfunc_end15: .size _ZN17btHingeConstraint13getHingeAngleERK11btTransformS2_, .Lfunc_end15-_ZN17btHingeConstraint13getHingeAngleERK11btTransformS2_ # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z21btAdjustAngleToLimitsfff -.LCPI16_0: - .word 0x40c90fdb # float 6.28318548 -.LCPI16_1: - .word 0xc0490fdb # float -3.14159274 -.LCPI16_2: - .word 0x40490fdb # float 3.14159274 -.LCPI16_3: - .word 0xc0c90fdb # float -6.28318548 .section .text._Z21btAdjustAngleToLimitsfff,"axG",@progbits,_Z21btAdjustAngleToLimitsfff,comdat - .weak _Z21btAdjustAngleToLimitsfff + .weak _Z21btAdjustAngleToLimitsfff # -- Begin function _Z21btAdjustAngleToLimitsfff .p2align 5 .type _Z21btAdjustAngleToLimitsfff,@function _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff @@ -3522,14 +3486,17 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff # %bb.2: fmov.s $fs4, $fa2 fsub.s $fa0, $fa1, $fs0 - pcalau12i $a0, %pc_hi20(.LCPI16_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI16_0) + lu12i.w $a0, 265360 + ori $a0, $a0, 4059 + movgr2fr.w $fs1, $a0 fmov.s $fa1, $fs1 pcaddu18i $ra, %call36(fmodf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI16_1) - fld.s $fs3, $a0, %pc_lo12(.LCPI16_1) fmov.s $fs2, $fa0 + lu12i.w $a0, -260976 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fs3, $a0 fcmp.cule.s $fcc0, $fs3, $fa0 bcnez $fcc0, .LBB16_7 # %bb.3: @@ -3541,29 +3508,35 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff # %bb.5: fmov.s $fs4, $fa1 fsub.s $fa0, $fs0, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI16_0) - fld.s $fs2, $a0, %pc_lo12(.LCPI16_0) + lu12i.w $a0, 265360 + ori $a0, $a0, 4059 + movgr2fr.w $fs2, $a0 fmov.s $fa1, $fs2 pcaddu18i $ra, %call36(fmodf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI16_1) - fld.s $fs3, $a0, %pc_lo12(.LCPI16_1) fmov.s $fs1, $fa0 + lu12i.w $a0, -260976 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fs3, $a0 fcmp.cule.s $fcc0, $fs3, $fa0 - pcalau12i $s0, %pc_hi20(.LCPI16_2) - pcalau12i $fp, %pc_hi20(.LCPI16_3) + lu12i.w $s0, 263312 + lu12i.w $fp, -258928 bcnez $fcc0, .LBB16_14 # %bb.6: fadd.s $fs1, $fs1, $fs2 b .LBB16_16 .LBB16_7: - pcalau12i $a0, %pc_hi20(.LCPI16_2) - fld.s $fa0, $a0, %pc_lo12(.LCPI16_2) + lu12i.w $a0, 263312 + ori $a0, $a0, 4059 + movgr2fr.w $fa0, $a0 fcmp.cule.s $fcc0, $fs2, $fa0 bcnez $fcc0, .LBB16_9 # %bb.8: - pcalau12i $a0, %pc_hi20(.LCPI16_3) - fld.s $fa0, $a0, %pc_lo12(.LCPI16_3) + lu12i.w $a0, -258928 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 fadd.s $fs2, $fs2, $fa0 .LBB16_9: # %_Z16btNormalizeAnglef.exit fsub.s $fa0, $fs4, $fs0 @@ -3576,13 +3549,16 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff fadd.s $fa0, $fa0, $fs1 b .LBB16_13 .LBB16_11: - pcalau12i $a0, %pc_hi20(.LCPI16_2) - fld.s $fa1, $a0, %pc_lo12(.LCPI16_2) + lu12i.w $a0, 263312 + ori $a0, $a0, 4059 + movgr2fr.w $fa1, $a0 fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB16_13 # %bb.12: - pcalau12i $a0, %pc_hi20(.LCPI16_3) - fld.s $fa1, $a0, %pc_lo12(.LCPI16_3) + lu12i.w $a0, -258928 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 fadd.s $fa0, $fa0, $fa1 .LBB16_13: # %_Z16btNormalizeAnglef.exit29 fabs.s $fa0, $fa0 @@ -3591,11 +3567,14 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff fsel $fs0, $fa1, $fs0, $fcc0 b .LBB16_21 .LBB16_14: - fld.s $fa0, $s0, %pc_lo12(.LCPI16_2) + ori $a0, $s0, 4059 + movgr2fr.w $fa0, $a0 fcmp.cule.s $fcc0, $fs1, $fa0 bcnez $fcc0, .LBB16_16 # %bb.15: - fld.s $fa0, $fp, %pc_lo12(.LCPI16_3) + ori $a0, $fp, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 fadd.s $fs1, $fs1, $fa0 .LBB16_16: # %_Z16btNormalizeAnglef.exit31 fsub.s $fa0, $fs0, $fs4 @@ -3608,15 +3587,20 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff fadd.s $fa0, $fa0, $fs2 b .LBB16_20 .LBB16_18: - fld.s $fa1, $s0, %pc_lo12(.LCPI16_2) + ori $a0, $s0, 4059 + movgr2fr.w $fa1, $a0 fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB16_20 # %bb.19: - fld.s $fa1, $fp, %pc_lo12(.LCPI16_3) + ori $a0, $fp, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 fadd.s $fa0, $fa0, $fa1 .LBB16_20: # %_Z16btNormalizeAnglef.exit33 - fld.s $fa1, $fp, %pc_lo12(.LCPI16_3) fabs.s $fa0, $fa0 + ori $a0, $fp, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 fadd.s $fa1, $fs0, $fa1 fcmp.clt.s $fcc0, $fa0, $fs1 fsel $fs0, $fs0, $fa1, $fcc0 @@ -3636,14 +3620,8 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff .size _Z21btAdjustAngleToLimitsfff, .Lfunc_end16-_Z21btAdjustAngleToLimitsfff .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN17btHingeConstraint14setMotorTargetERK12btQuaternionf -.LCPI17_0: - .word 0xbf7ffffe # float -0.99999988 -.LCPI17_1: - .word 0x40490fdb # float 3.14159274 .text - .globl _ZN17btHingeConstraint14setMotorTargetERK12btQuaternionf + .globl _ZN17btHingeConstraint14setMotorTargetERK12btQuaternionf # -- Begin function _ZN17btHingeConstraint14setMotorTargetERK12btQuaternionf .p2align 5 .type _ZN17btHingeConstraint14setMotorTargetERK12btQuaternionf,@function _ZN17btHingeConstraint14setMotorTargetERK12btQuaternionf: # @_ZN17btHingeConstraint14setMotorTargetERK12btQuaternionf @@ -3784,13 +3762,15 @@ _ZN17btHingeConstraint14setMotorTargetERK12btQuaternionf: # @_ZN17btHingeConstra frsqrt.s $fa4, $fa0 fmul.s $fa0, $ft0, $fa4 fmul.s $fa2, $fa5, $fa4 - fmul.s $fa3, $fa3, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI17_0) - fld.s $fa5, $a0, %pc_lo12(.LCPI17_0) - fmul.s $fa4, $fa2, $fa1 - fmadd.s $fa4, $fa0, $fa1, $fa4 - fadd.s $fa4, $fa3, $fa4 - fcmp.clt.s $fcc0, $fa4, $fa5 + fmul.s $fa4, $fa3, $fa4 + fmul.s $fa3, $fa2, $fa1 + fmadd.s $fa3, $fa0, $fa1, $fa3 + fadd.s $fa3, $fa4, $fa3 + lu12i.w $a0, -264193 + ori $a0, $a0, 4094 + lu32i.d $a0, 0 + movgr2fr.w $fa5, $a0 + fcmp.clt.s $fcc0, $fa3, $fa5 bceqz $fcc0, .LBB17_2 # %bb.1: vldi $vr2, -1040 @@ -3799,17 +3779,17 @@ _ZN17btHingeConstraint14setMotorTargetERK12btQuaternionf: # @_ZN17btHingeConstra b .LBB17_4 .LBB17_2: movgr2fr.w $fa5, $zero - fmsub.s $fs5, $fa3, $fa5, $fa2 + fmsub.s $fs5, $fa4, $fa5, $fa2 fneg.s $fa1, $fa5 - fmul.s $fa3, $fa3, $fa1 - fadd.s $fs6, $fa0, $fa3 - fmul.s $fa3, $fa0, $fa1 + fmul.s $fa4, $fa4, $fa1 + fadd.s $fs6, $fa0, $fa4 + fmul.s $fa4, $fa0, $fa1 vldi $vr0, -1168 - fadd.s $fa0, $fa4, $fa0 + fadd.s $fa0, $fa3, $fa0 fadd.s $fa1, $fa0, $fa0 fsqrt.s $fa0, $fa1 fcmp.cor.s $fcc0, $fa0, $fa0 - fmadd.s $fs7, $fa2, $fa5, $fa3 + fmadd.s $fs7, $fa2, $fa5, $fa4 bceqz $fcc0, .LBB17_12 .LBB17_3: # %.split frecip.s $fa3, $fa0 @@ -3848,9 +3828,10 @@ _ZN17btHingeConstraint14setMotorTargetERK12btQuaternionf: # @_ZN17btHingeConstra fmov.s $fa0, $fs1 pcaddu18i $ra, %call36(acosf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI17_1) - fld.s $fa1, $a0, %pc_lo12(.LCPI17_1) fadd.s $fa0, $fa0, $fa0 + lu12i.w $a0, 263312 + ori $a0, $a0, 4059 + movgr2fr.w $fa1, $a0 fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB17_6 # %bb.5: diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btKinematicCharacterController.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btKinematicCharacterController.s index 68a03c27..c196dd04 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btKinematicCharacterController.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btKinematicCharacterController.s @@ -237,12 +237,7 @@ _ZN30btKinematicCharacterController14getGhostObjectEv: # @_ZN30btKinematicCharac .Lfunc_end6: .size _ZN30btKinematicCharacterController14getGhostObjectEv, .Lfunc_end6-_ZN30btKinematicCharacterController14getGhostObjectEv # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld -.LCPI7_0: - .word 0x3e4ccccd # float 0.200000003 - .text - .globl _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld + .globl _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld # -- Begin function _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld .p2align 5 .type _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld,@function _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld: # @_ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld @@ -260,6 +255,7 @@ _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld: st.d $s5, $sp, 32 # 8-byte Folded Spill fst.d $fs0, $sp, 24 # 8-byte Folded Spill fst.d $fs1, $sp, 16 # 8-byte Folded Spill + fst.d $fs2, $sp, 8 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -270,6 +266,7 @@ _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld: .cfi_offset 28, -64 .cfi_offset 56, -72 .cfi_offset 57, -80 + .cfi_offset 58, -88 ld.d $a3, $a1, 40 move $fp, $a0 ld.d $a0, $a0, 16 @@ -295,7 +292,10 @@ _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld: addi.d $s0, $fp, 128 movgr2fr.w $fs0, $zero ori $s3, $zero, 1 - fmov.s $fs1, $fs0 + lu12i.w $a0, 255180 + ori $a0, $a0, 3277 + movgr2fr.w $fs1, $a0 + fmov.s $fs2, $fs0 b .LBB7_3 .p2align 4, , 16 .LBB7_2: # %._crit_edge99 @@ -369,8 +369,8 @@ _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld: fld.s $fa1, $fp, 92 fld.s $fa2, $fp, 96 move $a3, $zero - vldi $vr9, -1040 - vldi $vr10, -1168 + vldi $vr8, -1040 + vldi $vr9, -1168 b .LBB7_15 .p2align 4, , 16 .LBB7_14: # %._crit_edge @@ -391,7 +391,7 @@ _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld: sltui $a6, $a5, 1 move $a5, $zero movgr2cf $fcc0, $a6 - fsel $fa3, $ft2, $ft1, $fcc0 + fsel $fa3, $ft1, $ft0, $fcc0 addi.d $a6, $a4, 88 b .LBB7_20 .p2align 4, , 16 @@ -411,7 +411,7 @@ _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld: st.d $t0, $fp, 172 fld.s $fa5, $a6, 0 ld.w $a7, $a4, 728 - fmov.s $fs1, $fa4 + fmov.s $fs2, $fa4 .LBB7_18: # in Loop: Header=BB7_20 Depth=3 fld.s $fa4, $a6, -16 fld.s $fa6, $a6, -12 @@ -419,14 +419,12 @@ _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld: fmul.s $fa4, $fa3, $fa4 fmul.s $fa6, $fa3, $fa6 fmul.s $fa7, $fa3, $fa7 - pcalau12i $t0, %pc_hi20(.LCPI7_0) - fld.s $ft0, $t0, %pc_lo12(.LCPI7_0) fmul.s $fa4, $fa4, $fa5 fmul.s $fa6, $fa6, $fa5 fmul.s $fa5, $fa5, $fa7 - fmul.s $fa4, $fa4, $ft0 - fmul.s $fa6, $fa6, $ft0 - fmul.s $fa5, $fa5, $ft0 + fmul.s $fa4, $fa4, $fs1 + fmul.s $fa6, $fa6, $fs1 + fmul.s $fa5, $fa5, $fs1 fadd.s $fa0, $fa0, $fa4 fst.s $fa0, $fp, 88 fadd.s $fa1, $fa6, $fa1 @@ -445,7 +443,7 @@ _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld: fcmp.cule.s $fcc0, $fs0, $fa4 bcnez $fcc0, .LBB7_19 # %bb.21: # in Loop: Header=BB7_20 Depth=3 - fcmp.cule.s $fcc0, $fs1, $fa4 + fcmp.cule.s $fcc0, $fs2, $fa4 bceqz $fcc0, .LBB7_17 # %bb.22: # in Loop: Header=BB7_20 Depth=3 fmov.s $fa5, $fa4 @@ -457,6 +455,7 @@ _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld: vld $vr0, $s1, 0 andi $a0, $s4, 1 vst $vr0, $a1, 56 + fld.d $fs2, $sp, 8 # 8-byte Folded Reload fld.d $fs1, $sp, 16 # 8-byte Folded Reload fld.d $fs0, $sp, 24 # 8-byte Folded Reload ld.d $s5, $sp, 32 # 8-byte Folded Reload @@ -473,12 +472,7 @@ _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld: .size _ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld, .Lfunc_end7-_ZN30btKinematicCharacterController22recoverFromPenetrationEP16btCollisionWorld .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN30btKinematicCharacterController6stepUpEP16btCollisionWorld -.LCPI8_0: - .word 0x3dcccccd # float 0.100000001 - .text - .globl _ZN30btKinematicCharacterController6stepUpEP16btCollisionWorld + .globl _ZN30btKinematicCharacterController6stepUpEP16btCollisionWorld # -- Begin function _ZN30btKinematicCharacterController6stepUpEP16btCollisionWorld .p2align 5 .type _ZN30btKinematicCharacterController6stepUpEP16btCollisionWorld,@function _ZN30btKinematicCharacterController6stepUpEP16btCollisionWorld: # @_ZN30btKinematicCharacterController6stepUpEP16btCollisionWorld @@ -525,11 +519,12 @@ _ZN30btKinematicCharacterController6stepUpEP16btCollisionWorld: # @_ZN30btKinema st.d $a2, $sp, 216 st.w $a2, $sp, 112 vst $vr1, $sp, 116 - pcalau12i $a0, %pc_hi20(.LCPI8_0) - fld.s $fa3, $a0, %pc_lo12(.LCPI8_0) st.w $a2, $sp, 132 vst $vr1, $sp, 136 st.d $a2, $sp, 152 + lu12i.w $a0, 253132 + ori $a0, $a0, 3277 + movgr2fr.w $fa3, $a0 fmul.s $fa0, $fa0, $fa3 fmul.s $fa2, $fa2, $fa3 fmul.s $fa3, $fa5, $fa3 @@ -629,12 +624,8 @@ _ZN16btCollisionWorld20ConvexResultCallbackD2Ev: # @_ZN16btCollisionWorld20Conve .Lfunc_end9: .size _ZN16btCollisionWorld20ConvexResultCallbackD2Ev, .Lfunc_end9-_ZN16btCollisionWorld20ConvexResultCallbackD2Ev # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN30btKinematicCharacterController36updateTargetPositionBasedOnCollisionERK9btVector3ff -.LCPI10_0: - .word 0x34000000 # float 1.1920929E-7 .text - .globl _ZN30btKinematicCharacterController36updateTargetPositionBasedOnCollisionERK9btVector3ff + .globl _ZN30btKinematicCharacterController36updateTargetPositionBasedOnCollisionERK9btVector3ff # -- Begin function _ZN30btKinematicCharacterController36updateTargetPositionBasedOnCollisionERK9btVector3ff .p2align 5 .type _ZN30btKinematicCharacterController36updateTargetPositionBasedOnCollisionERK9btVector3ff,@function _ZN30btKinematicCharacterController36updateTargetPositionBasedOnCollisionERK9btVector3ff: # @_ZN30btKinematicCharacterController36updateTargetPositionBasedOnCollisionERK9btVector3ff @@ -649,11 +640,11 @@ _ZN30btKinematicCharacterController36updateTargetPositionBasedOnCollisionERK9btV fsub.s $fa5, $fa3, $fa4 fsub.s $fa7, $fa7, $ft0 fmul.s $fa0, $fa5, $fa5 - pcalau12i $a2, %pc_hi20(.LCPI10_0) - fld.s $fa2, $a2, %pc_lo12(.LCPI10_0) fmadd.s $fa0, $fa6, $fa6, $fa0 fmadd.s $fa0, $fa7, $fa7, $fa0 fsqrt.s $fa0, $fa0 + lu12i.w $a2, 212992 + movgr2fr.w $fa2, $a2 fcmp.cule.s $fcc0, $fa0, $fa2 bcnez $fcc0, .LBB10_3 # %bb.1: @@ -716,14 +707,7 @@ _ZN30btKinematicCharacterController36updateTargetPositionBasedOnCollisionERK9btV .Lfunc_end10: .size _ZN30btKinematicCharacterController36updateTargetPositionBasedOnCollisionERK9btVector3ff, .Lfunc_end10-_ZN30btKinematicCharacterController36updateTargetPositionBasedOnCollisionERK9btVector3ff # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN30btKinematicCharacterController20stepForwardAndStrafeEP16btCollisionWorldRK9btVector3 -.LCPI11_0: - .word 0x34000000 # float 1.1920929E-7 -.LCPI11_1: - .word 0x3c23d70a # float 0.00999999977 - .text - .globl _ZN30btKinematicCharacterController20stepForwardAndStrafeEP16btCollisionWorldRK9btVector3 + .globl _ZN30btKinematicCharacterController20stepForwardAndStrafeEP16btCollisionWorldRK9btVector3 # -- Begin function _ZN30btKinematicCharacterController20stepForwardAndStrafeEP16btCollisionWorldRK9btVector3 .p2align 5 .type _ZN30btKinematicCharacterController20stepForwardAndStrafeEP16btCollisionWorldRK9btVector3,@function _ZN30btKinematicCharacterController20stepForwardAndStrafeEP16btCollisionWorldRK9btVector3: # @_ZN30btKinematicCharacterController20stepForwardAndStrafeEP16btCollisionWorldRK9btVector3 @@ -817,9 +801,10 @@ _ZN30btKinematicCharacterController20stepForwardAndStrafeEP16btCollisionWorldRK9 addi.d $s6, $sp, 72 pcalau12i $a0, %pc_hi20(_ZTV43btKinematicClosestNotMeConvexResultCallback+16) addi.d $s8, $a0, %pc_lo12(_ZTV43btKinematicClosestNotMeConvexResultCallback+16) - pcalau12i $a0, %pc_hi20(.LCPI11_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI11_0) - pcalau12i $s4, %pc_hi20(.LCPI11_1) + lu12i.w $a0, 212992 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, 246333 + ori $s4, $a0, 1802 vldi $vr9, -1168 b .LBB11_6 .p2align 4, , 16 @@ -829,9 +814,9 @@ _ZN30btKinematicCharacterController20stepForwardAndStrafeEP16btCollisionWorldRK9 vld $vr9, $sp, 32 # 16-byte Folded Reload .LBB11_5: # %.critedge76 # in Loop: Header=BB11_6 Depth=1 - fld.s $fa1, $s4, %pc_lo12(.LCPI11_1) fsub.s $ft1, $ft1, $fa0 - fcmp.clt.s $fcc0, $fa1, $ft1 + movgr2fr.w $fa0, $s4 + fcmp.clt.s $fcc0, $fa0, $ft1 bceqz $fcc0, .LBB11_17 .LBB11_6: # =>This Inner Loop Header: Depth=1 bstrpick.d $a0, $s7, 31, 0 @@ -1162,12 +1147,7 @@ _ZN30btKinematicCharacterController8stepDownEP16btCollisionWorldf: # @_ZN30btKin .size _ZN30btKinematicCharacterController8stepDownEP16btCollisionWorldf, .Lfunc_end12-_ZN30btKinematicCharacterController8stepDownEP16btCollisionWorldf .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN30btKinematicCharacterController16setWalkDirectionERK9btVector3 -.LCPI13_0: - .word 0x34000000 # float 1.1920929E-7 - .text - .globl _ZN30btKinematicCharacterController16setWalkDirectionERK9btVector3 + .globl _ZN30btKinematicCharacterController16setWalkDirectionERK9btVector3 # -- Begin function _ZN30btKinematicCharacterController16setWalkDirectionERK9btVector3 .p2align 5 .type _ZN30btKinematicCharacterController16setWalkDirectionERK9btVector3,@function _ZN30btKinematicCharacterController16setWalkDirectionERK9btVector3: # @_ZN30btKinematicCharacterController16setWalkDirectionERK9btVector3 @@ -1187,11 +1167,11 @@ _ZN30btKinematicCharacterController16setWalkDirectionERK9btVector3: # @_ZN30btKi fmul.s $fa0, $fa0, $fa3 fmul.s $fa2, $fa2, $fa3 fmul.s $fa3, $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI13_0) - fld.s $fa4, $a1, %pc_lo12(.LCPI13_0) fmadd.s $fa3, $fa1, $fa1, $fa3 fmadd.s $fa3, $fa2, $fa2, $fa3 fsqrt.s $fa3, $fa3 + lu12i.w $a1, 212992 + movgr2fr.w $fa4, $a1 fcmp.clt.s $fcc0, $fa3, $fa4 movgr2fr.w $fa3, $zero fsel $fa1, $fa1, $fa3, $fcc0 @@ -1208,12 +1188,7 @@ _ZN30btKinematicCharacterController16setWalkDirectionERK9btVector3: # @_ZN30btKi .Lfunc_end13: .size _ZN30btKinematicCharacterController16setWalkDirectionERK9btVector3, .Lfunc_end13-_ZN30btKinematicCharacterController16setWalkDirectionERK9btVector3 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN30btKinematicCharacterController26setVelocityForTimeIntervalERK9btVector3f -.LCPI14_0: - .word 0x34000000 # float 1.1920929E-7 - .text - .globl _ZN30btKinematicCharacterController26setVelocityForTimeIntervalERK9btVector3f + .globl _ZN30btKinematicCharacterController26setVelocityForTimeIntervalERK9btVector3f # -- Begin function _ZN30btKinematicCharacterController26setVelocityForTimeIntervalERK9btVector3f .p2align 5 .type _ZN30btKinematicCharacterController26setVelocityForTimeIntervalERK9btVector3f,@function _ZN30btKinematicCharacterController26setVelocityForTimeIntervalERK9btVector3f: # @_ZN30btKinematicCharacterController26setVelocityForTimeIntervalERK9btVector3f @@ -1232,11 +1207,11 @@ _ZN30btKinematicCharacterController26setVelocityForTimeIntervalERK9btVector3f: # fmul.s $fa1, $fa1, $fa4 fmul.s $fa3, $fa3, $fa4 fmul.s $fa4, $fa1, $fa1 - pcalau12i $a1, %pc_hi20(.LCPI14_0) - fld.s $fa5, $a1, %pc_lo12(.LCPI14_0) fmadd.s $fa4, $fa2, $fa2, $fa4 fmadd.s $fa4, $fa3, $fa3, $fa4 fsqrt.s $fa4, $fa4 + lu12i.w $a1, 212992 + movgr2fr.w $fa5, $a1 fcmp.clt.s $fcc0, $fa4, $fa5 movgr2fr.w $fa4, $zero fsel $fa2, $fa2, $fa4, $fcc0 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btMinkowskiPenetrationDepthSolver.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btMinkowskiPenetrationDepthSolver.s index 20b57e41..eb2e9eb0 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btMinkowskiPenetrationDepthSolver.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btMinkowskiPenetrationDepthSolver.s @@ -1,14 +1,6 @@ .file "btMinkowskiPenetrationDepthSolver.cpp" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK13btConvexShapeS4_RK11btTransformS7_R9btVector3S9_S9_P12btIDebugDrawP12btStackAlloc -.LCPI0_0: - .word 0x5d5e0b6b # float 9.99999984E+17 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0x3f847ae147ae147b # double 0.01 .text - .globl _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK13btConvexShapeS4_RK11btTransformS7_R9btVector3S9_S9_P12btIDebugDrawP12btStackAlloc + .globl _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK13btConvexShapeS4_RK11btTransformS7_R9btVector3S9_S9_P12btIDebugDrawP12btStackAlloc # -- Begin function _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK13btConvexShapeS4_RK11btTransformS7_R9btVector3S9_S9_P12btIDebugDrawP12btStackAlloc .p2align 5 .type _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK13btConvexShapeS4_RK11btTransformS7_R9btVector3S9_S9_P12btIDebugDrawP12btStackAlloc,@function _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK13btConvexShapeS4_RK11btTransformS7_R9btVector3S9_S9_P12btIDebugDrawP12btStackAlloc: # @_ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK13btConvexShapeS4_RK11btTransformS7_R9btVector3S9_S9_P12btIDebugDrawP12btStackAlloc @@ -80,7 +72,7 @@ _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK lu12i.w $a0, 1 ori $a0, $a0, 472 add.d $a0, $sp, $a0 - ld.d $s1, $a0, 0 + ld.d $s6, $a0, 0 lu12i.w $a0, 1 ori $a0, $a0, 464 add.d $a0, $sp, $a0 @@ -203,7 +195,7 @@ _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK st.d $t0, $sp, 88 # 8-byte Folded Spill move $a0, $t0 jirl $ra, $a1, 0 - st.d $s1, $sp, 80 # 8-byte Folded Spill + st.d $s6, $sp, 80 # 8-byte Folded Spill blez $a0, .LBB0_9 # %bb.6: # %.lr.ph move $s7, $a0 @@ -322,7 +314,7 @@ _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK bne $s7, $s0, .LBB0_7 # %bb.8: # %.loopexit462.loopexit addi.w $s7, $s0, 42 - ld.d $s1, $sp, 80 # 8-byte Folded Reload + ld.d $s6, $sp, 80 # 8-byte Folded Reload b .LBB0_10 .LBB0_9: ori $s7, $zero, 42 @@ -452,15 +444,15 @@ _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK bne $s8, $s0, .LBB0_12 # %bb.13: # %.loopexit.loopexit add.w $s7, $s7, $s0 - ld.d $s1, $sp, 80 # 8-byte Folded Reload + ld.d $s6, $sp, 80 # 8-byte Folded Reload .LBB0_14: # %.loopexit - ld.d $s0, $sp, 88 # 8-byte Folded Reload - ld.d $a0, $s0, 0 + ld.d $s1, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $s1, 0 ld.d $a4, $a0, 112 addi.d $a1, $sp, 1432 ori $a0, $zero, 3416 add.d $a2, $sp, $a0 - move $a0, $s0 + move $a0, $s1 move $a3, $s7 jirl $ra, $a4, 0 ld.d $a0, $s4, 0 @@ -472,7 +464,8 @@ _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK move $a3, $s7 jirl $ra, $a4, 0 movgr2fr.w $fa0, $zero - pcalau12i $a1, %pc_hi20(.LCPI0_0) + lu12i.w $a0, 382432 + ori $s0, $a0, 2923 fst.s $fa0, $sp, 76 # 4-byte Folded Spill blez $s7, .LBB0_20 # %bb.15: # %.lr.ph477 @@ -509,19 +502,22 @@ _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK fld.s $ft15, $s2, 56 ori $a0, $zero, 2432 add.d $a0, $sp, $a0 - fld.s $fs1, $a1, %pc_lo12(.LCPI0_0) - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.d $fs6, $a1, %pc_lo12(.LCPI0_1) addi.d $a1, $s5, 8 - movgr2fr.w $fs7, $zero + movgr2fr.w $fs6, $zero ori $a2, $zero, 3424 add.d $a2, $sp, $a2 + movgr2fr.w $fs1, $s0 ld.d $a3, $sp, 112 # 8-byte Folded Reload movgr2cf $fcc0, $a3 - fst.s $fs7, $sp, 100 # 4-byte Folded Spill - fst.s $fs7, $sp, 104 # 4-byte Folded Spill - fst.s $fs7, $sp, 108 # 4-byte Folded Spill - fst.s $fs7, $sp, 112 # 4-byte Folded Spill + lu12i.w $a3, 293601 + ori $a3, $a3, 1147 + lu32i.d $a3, 293601 + lu52i.d $a3, $a3, 1016 + movgr2fr.d $fs7, $a3 + fst.s $fs6, $sp, 100 # 4-byte Folded Spill + fst.s $fs6, $sp, 104 # 4-byte Folded Spill + fst.s $fs6, $sp, 108 # 4-byte Folded Spill + fst.s $fs6, $sp, 112 # 4-byte Folded Spill b .LBB0_17 .p2align 4, , 16 .LBB0_16: # in Loop: Header=BB0_17 Depth=1 @@ -534,12 +530,12 @@ _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK fld.s $fs4, $a1, 0 fld.s $fs3, $a1, -4 fld.s $fs2, $a1, -8 - fsel $fs4, $fs4, $fs7, $fcc0 + fsel $fs4, $fs4, $fs6, $fcc0 fmul.s $fs5, $fs3, $fs3 fmadd.s $fs5, $fs2, $fs2, $fs5 fmadd.s $fs5, $fs4, $fs4, $fs5 fcvt.d.s $fs5, $fs5 - fcmp.cule.d $fcc1, $fs5, $fs6 + fcmp.cule.d $fcc1, $fs5, $fs7 bcnez $fcc1, .LBB0_16 # %bb.18: # in Loop: Header=BB0_17 Depth=1 fld.s $fs5, $a2, -8 @@ -598,13 +594,13 @@ _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK fmov.s $fs1, $fs5 b .LBB0_16 .LBB0_20: - fld.s $fs1, $a1, %pc_lo12(.LCPI0_0) + movgr2fr.w $fs1, $s0 fst.s $fa0, $sp, 112 # 4-byte Folded Spill fst.s $fa0, $sp, 108 # 4-byte Folded Spill fst.s $fa0, $sp, 104 # 4-byte Folded Spill fst.s $fa0, $sp, 100 # 4-byte Folded Spill .LBB0_21: # %._crit_edge - move $a0, $s0 + move $a0, $s1 pcaddu18i $ra, %call36(_ZNK13btConvexShape19getMarginNonVirtualEv) jirl $ra, $ra, 0 move $a0, $s4 @@ -617,7 +613,7 @@ _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK move $a0, $zero b .LBB0_25 .LBB0_23: - move $a0, $s0 + move $a0, $s1 pcaddu18i $ra, %call36(_ZNK13btConvexShape19getMarginNonVirtualEv) jirl $ra, $ra, 0 fmov.s $fs0, $fa0 @@ -629,7 +625,7 @@ _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK fadd.s $fa0, $fa0, $fa1 fadd.s $fs0, $fs1, $fa0 addi.d $a0, $sp, 344 - move $a1, $s0 + move $a1, $s1 move $a2, $s4 ld.d $a3, $sp, 32 # 8-byte Folded Reload move $a4, $zero @@ -676,9 +672,7 @@ _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK vst $vr1, $sp, 280 vst $vr2, $sp, 296 vst $vr3, $sp, 312 - lu12i.w $a3, 382432 - ori $a3, $a3, 2923 - st.w $a3, $sp, 328 + st.w $s0, $sp, 328 pcalau12i $a3, %pc_hi20(_ZTVZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK13btConvexShapeS4_RK11btTransformS7_R9btVector3S9_S9_P12btIDebugDrawP12btStackAllocE20btIntermediateResult+16) addi.d $a3, $a3, %pc_lo12(_ZTVZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK13btConvexShapeS4_RK11btTransformS7_R9btVector3S9_S9_P12btIDebugDrawP12btStackAllocE20btIntermediateResult+16) st.d $a3, $sp, 152 @@ -695,7 +689,7 @@ _ZN33btMinkowskiPenetrationDepthSolver12calcPenDepthER22btVoronoiSimplexSolverPK addi.d $a0, $sp, 344 addi.d $a1, $sp, 200 addi.d $a2, $sp, 152 - move $a3, $s1 + move $a3, $s6 move $a4, $zero pcaddu18i $ra, %call36(_ZN17btGjkPairDetector16getClosestPointsERKN36btDiscreteCollisionDetectorInterface17ClosestPointInputERNS0_6ResultEP12btIDebugDrawb) jirl $ra, $ra, 0 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btMultiSphereShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btMultiSphereShape.s index 61622272..7f86e247 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btMultiSphereShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btMultiSphereShape.s @@ -382,14 +382,8 @@ _ZN20btAlignedObjectArrayI9btVector3ED2Ev: # @_ZN20btAlignedObjectArrayI9btVecto .size _ZN20btAlignedObjectArrayI9btVector3ED2Ev, .Lfunc_end3-_ZN20btAlignedObjectArrayI9btVector3ED2Ev .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK18btMultiSphereShape37localGetSupportingVertexWithoutMarginERK9btVector3 -.LCPI4_0: - .word 0x28800000 # float 1.42108547E-14 -.LCPI4_1: - .word 0xdd5e0b6b # float -9.99999984E+17 .text - .globl _ZNK18btMultiSphereShape37localGetSupportingVertexWithoutMarginERK9btVector3 + .globl _ZNK18btMultiSphereShape37localGetSupportingVertexWithoutMarginERK9btVector3 # -- Begin function _ZNK18btMultiSphereShape37localGetSupportingVertexWithoutMarginERK9btVector3 .p2align 5 .type _ZNK18btMultiSphereShape37localGetSupportingVertexWithoutMarginERK9btVector3,@function _ZNK18btMultiSphereShape37localGetSupportingVertexWithoutMarginERK9btVector3: # @_ZNK18btMultiSphereShape37localGetSupportingVertexWithoutMarginERK9btVector3 @@ -425,14 +419,14 @@ _ZNK18btMultiSphereShape37localGetSupportingVertexWithoutMarginERK9btVector3: # .cfi_offset 61, -96 .cfi_offset 62, -104 .cfi_offset 63, -112 - fld.s $fa0, $a1, 0 - fld.s $fa1, $a1, 4 - fld.s $fa2, $a1, 8 - pcalau12i $a1, %pc_hi20(.LCPI4_0) - fld.s $fa4, $a1, %pc_lo12(.LCPI4_0) - fmul.s $fa3, $fa1, $fa1 - fmadd.s $fa3, $fa0, $fa0, $fa3 + fld.s $fa0, $a1, 4 + fld.s $fa2, $a1, 0 + fld.s $fa1, $a1, 8 + fmul.s $fa3, $fa0, $fa0 fmadd.s $fa3, $fa2, $fa2, $fa3 + fmadd.s $fa3, $fa1, $fa1, $fa3 + lu12i.w $a1, 165888 + movgr2fr.w $fa4, $a1 fcmp.clt.s $fcc0, $fa3, $fa4 move $fp, $a0 bceqz $fcc0, .LBB4_3 @@ -449,20 +443,22 @@ _ZNK18btMultiSphereShape37localGetSupportingVertexWithoutMarginERK9btVector3: # b .LBB4_9 .LBB4_3: frsqrt.s $fa3, $fa3 - fmul.s $fa0, $fa0, $fa3 - vst $vr0, $sp, 16 # 16-byte Folded Spill - fmul.s $fs0, $fa1, $fa3 - fmul.s $fs1, $fa2, $fa3 + fmul.s $fa2, $fa2, $fa3 + vst $vr2, $sp, 16 # 16-byte Folded Spill + fmul.s $fs0, $fa0, $fa3 + fmul.s $fs1, $fa1, $fa3 ld.w $s1, $fp, 108 blez $s1, .LBB4_2 .LBB4_4: # %.lr.ph + move $s0, $zero ld.d $s2, $fp, 152 ld.d $s3, $fp, 120 - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.s $fs3, $a0, %pc_lo12(.LCPI4_1) movgr2fr.w $fs2, $zero - move $s0, $zero - fmov.s $fs4, $fs2 + lu12i.w $a0, -141856 + ori $a0, $a0, 2923 + lu32i.d $a0, 0 + movgr2fr.w $fs4, $a0 + fmov.s $fs3, $fs2 b .LBB4_6 .p2align 4, , 16 .LBB4_5: # in Loop: Header=BB4_6 Depth=1 @@ -503,16 +499,16 @@ _ZNK18btMultiSphereShape37localGetSupportingVertexWithoutMarginERK9btVector3: # fmul.s $fa3, $fs0, $fa0 fmadd.s $fa3, $fa5, $fa2, $fa3 fmadd.s $fa3, $fs1, $fa1, $fa3 - fcmp.cule.s $fcc0, $fa3, $fs3 + fcmp.cule.s $fcc0, $fa3, $fs4 bcnez $fcc0, .LBB4_5 # %bb.7: # in Loop: Header=BB4_6 Depth=1 movfr2gr.s $s0, $fa2 - fmov.s $fs4, $fa0 + fmov.s $fs3, $fa0 fmov.s $fs2, $fa1 - fmov.s $fs3, $fa3 + fmov.s $fs4, $fa3 b .LBB4_5 .LBB4_8: # %._crit_edge.loopexit - movfr2gr.s $a0, $fs4 + movfr2gr.s $a0, $fs3 bstrins.d $s0, $a0, 63, 32 movfr2gr.s $a0, $fs2 bstrpick.d $a1, $a0, 31, 0 @@ -538,12 +534,7 @@ _ZNK18btMultiSphereShape37localGetSupportingVertexWithoutMarginERK9btVector3: # .size _ZNK18btMultiSphereShape37localGetSupportingVertexWithoutMarginERK9btVector3, .Lfunc_end4-_ZNK18btMultiSphereShape37localGetSupportingVertexWithoutMarginERK9btVector3 .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK18btMultiSphereShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i -.LCPI5_0: - .word 0xdd5e0b6b # float -9.99999984E+17 - .text - .globl _ZNK18btMultiSphereShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i + .globl _ZNK18btMultiSphereShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i # -- Begin function _ZNK18btMultiSphereShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i .p2align 5 .type _ZNK18btMultiSphereShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i,@function _ZNK18btMultiSphereShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i: # @_ZNK18btMultiSphereShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i @@ -592,9 +583,11 @@ _ZNK18btMultiSphereShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9b move $fp, $a3 move $s0, $a2 move $s1, $a1 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI5_0) move $s4, $zero + lu12i.w $a0, -141856 + ori $a0, $a0, 2923 + lu32i.d $a0, 0 + movgr2fr.w $fs0, $a0 blez $s3, .LBB5_7 .p2align 4, , 16 .LBB5_3: # %.lr.ph diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btOptimizedBvh.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btOptimizedBvh.s index c598c5b0..c2ce70d1 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btOptimizedBvh.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btOptimizedBvh.s @@ -656,32 +656,27 @@ _ZN14btOptimizedBvh5refitEP23btStridingMeshInterfaceRK9btVector3S4_: # @_ZN14btO .size _ZN14btOptimizedBvh5refitEP23btStridingMeshInterfaceRK9btVector3S4_, .Lfunc_end5-_ZN14btOptimizedBvh5refitEP23btStridingMeshInterfaceRK9btVector3S4_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN14btOptimizedBvh14updateBvhNodesEP23btStridingMeshInterfaceiii -.LCPI6_0: - .word 0x5d5e0b6b # float 9.99999984E+17 -.LCPI6_1: - .word 0xdd5e0b6b # float -9.99999984E+17 - .text - .globl _ZN14btOptimizedBvh14updateBvhNodesEP23btStridingMeshInterfaceiii + .globl _ZN14btOptimizedBvh14updateBvhNodesEP23btStridingMeshInterfaceiii # -- Begin function _ZN14btOptimizedBvh14updateBvhNodesEP23btStridingMeshInterfaceiii .p2align 5 .type _ZN14btOptimizedBvh14updateBvhNodesEP23btStridingMeshInterfaceiii,@function _ZN14btOptimizedBvh14updateBvhNodesEP23btStridingMeshInterfaceiii: # @_ZN14btOptimizedBvh14updateBvhNodesEP23btStridingMeshInterfaceiii .cfi_startproc # %bb.0: - addi.d $sp, $sp, -160 - .cfi_def_cfa_offset 160 - st.d $ra, $sp, 152 # 8-byte Folded Spill - st.d $fp, $sp, 144 # 8-byte Folded Spill - st.d $s0, $sp, 136 # 8-byte Folded Spill - st.d $s1, $sp, 128 # 8-byte Folded Spill - st.d $s2, $sp, 120 # 8-byte Folded Spill - st.d $s3, $sp, 112 # 8-byte Folded Spill - st.d $s4, $sp, 104 # 8-byte Folded Spill - st.d $s5, $sp, 96 # 8-byte Folded Spill - st.d $s6, $sp, 88 # 8-byte Folded Spill - st.d $s7, $sp, 80 # 8-byte Folded Spill - st.d $s8, $sp, 72 # 8-byte Folded Spill + addi.d $sp, $sp, -176 + .cfi_def_cfa_offset 176 + st.d $ra, $sp, 168 # 8-byte Folded Spill + st.d $fp, $sp, 160 # 8-byte Folded Spill + st.d $s0, $sp, 152 # 8-byte Folded Spill + st.d $s1, $sp, 144 # 8-byte Folded Spill + st.d $s2, $sp, 136 # 8-byte Folded Spill + st.d $s3, $sp, 128 # 8-byte Folded Spill + st.d $s4, $sp, 120 # 8-byte Folded Spill + st.d $s5, $sp, 112 # 8-byte Folded Spill + st.d $s6, $sp, 104 # 8-byte Folded Spill + st.d $s7, $sp, 96 # 8-byte Folded Spill + st.d $s8, $sp, 88 # 8-byte Folded Spill + fst.d $fs0, $sp, 80 # 8-byte Folded Spill + fst.d $fs1, $sp, 72 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -693,6 +688,8 @@ _ZN14btOptimizedBvh14updateBvhNodesEP23btStridingMeshInterfaceiii: # @_ZN14btOpt .cfi_offset 29, -72 .cfi_offset 30, -80 .cfi_offset 31, -88 + .cfi_offset 56, -96 + .cfi_offset 57, -104 move $s2, $a0 st.d $zero, $sp, 64 st.w $zero, $sp, 60 @@ -712,9 +709,16 @@ _ZN14btOptimizedBvh14updateBvhNodesEP23btStridingMeshInterfaceiii: # @_ZN14btOpt addi.d $s3, $a0, 16 addi.d $a0, $zero, -1 ori $s4, $zero, 3 + lu12i.w $a1, 382432 + ori $a1, $a1, 2923 + movgr2fr.w $fs0, $a1 + lu12i.w $a1, -141856 + ori $a1, $a1, 2923 + lu32i.d $a1, 0 + movgr2fr.w $fs1, $a1 lu12i.w $a1, 15 ori $s5, $a1, 4094 - vldi $vr13, -1168 + vldi $vr12, -1168 b .LBB6_3 .p2align 4, , 16 .LBB6_2: # %.loopexit.loopexit @@ -812,7 +816,7 @@ _ZN14btOptimizedBvh14updateBvhNodesEP23btStridingMeshInterfaceiii: # @_ZN14btOpt move $a0, $fp jirl $ra, $t0, 0 move $a0, $s6 - vldi $vr13, -1168 + vldi $vr12, -1168 .LBB6_8: # in Loop: Header=BB6_3 Depth=1 ld.d $a3, $sp, 40 ld.w $a5, $sp, 36 @@ -937,34 +941,30 @@ _ZN14btOptimizedBvh14updateBvhNodesEP23btStridingMeshInterfaceiii: # @_ZN14btOpt fmul.s $ft0, $ft3, $ft0 .LBB6_25: # %_Z8btSetMinIfEvRT_RKS0_.exit.i # in Loop: Header=BB6_3 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI6_0) - fld.s $ft1, $a1, %pc_lo12(.LCPI6_0) - pcalau12i $a1, %pc_hi20(.LCPI6_1) - fld.s $ft2, $a1, %pc_lo12(.LCPI6_1) - fmin.s $ft3, $fa6, $ft1 - fmin.s $ft4, $fa7, $ft1 - fmin.s $ft1, $ft0, $ft1 - fmax.s $fa6, $fa6, $ft2 - fmax.s $fa7, $fa7, $ft2 - fmax.s $ft0, $ft0, $ft2 - fcmp.clt.s $fcc0, $fa3, $ft3 - fsel $ft2, $ft3, $fa3, $fcc0 - fcmp.clt.s $fcc0, $fa4, $ft4 - fsel $ft3, $ft4, $fa4, $fcc0 - fcmp.clt.s $fcc0, $fa5, $ft1 - fsel $ft1, $ft1, $fa5, $fcc0 + fmin.s $ft1, $fa6, $fs0 + fmin.s $ft2, $fa7, $fs0 + fmin.s $ft3, $ft0, $fs0 + fmax.s $fa6, $fa6, $fs1 + fmax.s $fa7, $fa7, $fs1 + fmax.s $ft0, $ft0, $fs1 + fcmp.clt.s $fcc0, $fa3, $ft1 + fsel $ft1, $ft1, $fa3, $fcc0 + fcmp.clt.s $fcc0, $fa4, $ft2 + fsel $ft2, $ft2, $fa4, $fcc0 + fcmp.clt.s $fcc0, $fa5, $ft3 + fsel $ft3, $ft3, $fa5, $fcc0 fcmp.clt.s $fcc0, $fa6, $fa3 fsel $fa3, $fa6, $fa3, $fcc0 fcmp.clt.s $fcc0, $fa7, $fa4 fsel $fa4, $fa7, $fa4, $fcc0 fcmp.clt.s $fcc0, $ft0, $fa5 fsel $fa5, $ft0, $fa5, $fcc0 - fcmp.clt.s $fcc0, $fa0, $ft2 - fsel $fa6, $ft2, $fa0, $fcc0 - fcmp.clt.s $fcc0, $fa1, $ft3 - fsel $fa7, $ft3, $fa1, $fcc0 - fcmp.clt.s $fcc0, $fa2, $ft1 - fsel $ft0, $ft1, $fa2, $fcc0 + fcmp.clt.s $fcc0, $fa0, $ft1 + fsel $fa6, $ft1, $fa0, $fcc0 + fcmp.clt.s $fcc0, $fa1, $ft2 + fsel $fa7, $ft2, $fa1, $fcc0 + fcmp.clt.s $fcc0, $fa2, $ft3 + fsel $ft0, $ft3, $fa2, $fcc0 fcmp.clt.s $fcc0, $fa3, $fa0 fsel $fa0, $fa3, $fa0, $fcc0 fcmp.clt.s $fcc0, $fa4, $fa1 @@ -1001,15 +1001,15 @@ _ZN14btOptimizedBvh14updateBvhNodesEP23btStridingMeshInterfaceiii: # @_ZN14btOpt fmul.s $fa0, $fa0, $ft1 fmul.s $fa1, $fa1, $ft2 fmul.s $fa2, $fa2, $ft3 - fadd.s $fa0, $fa0, $ft5 + fadd.s $fa0, $fa0, $ft4 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a1, $fa0 ori $a1, $a1, 1 - fadd.s $fa0, $fa1, $ft5 + fadd.s $fa0, $fa1, $ft4 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a2, $fa0 ori $a2, $a2, 1 - fadd.s $fa0, $fa2, $ft5 + fadd.s $fa0, $fa2, $ft4 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a3, $fa0 ori $a3, $a3, 1 @@ -1028,18 +1028,20 @@ _ZN14btOptimizedBvh14updateBvhNodesEP23btStridingMeshInterfaceiii: # @_ZN14btOpt move $a0, $fp jirl $ra, $a2, 0 .LBB6_28: # %._crit_edge.thread - ld.d $s8, $sp, 72 # 8-byte Folded Reload - ld.d $s7, $sp, 80 # 8-byte Folded Reload - ld.d $s6, $sp, 88 # 8-byte Folded Reload - ld.d $s5, $sp, 96 # 8-byte Folded Reload - ld.d $s4, $sp, 104 # 8-byte Folded Reload - ld.d $s3, $sp, 112 # 8-byte Folded Reload - ld.d $s2, $sp, 120 # 8-byte Folded Reload - ld.d $s1, $sp, 128 # 8-byte Folded Reload - ld.d $s0, $sp, 136 # 8-byte Folded Reload - ld.d $fp, $sp, 144 # 8-byte Folded Reload - ld.d $ra, $sp, 152 # 8-byte Folded Reload - addi.d $sp, $sp, 160 + fld.d $fs1, $sp, 72 # 8-byte Folded Reload + fld.d $fs0, $sp, 80 # 8-byte Folded Reload + ld.d $s8, $sp, 88 # 8-byte Folded Reload + ld.d $s7, $sp, 96 # 8-byte Folded Reload + ld.d $s6, $sp, 104 # 8-byte Folded Reload + ld.d $s5, $sp, 112 # 8-byte Folded Reload + ld.d $s4, $sp, 120 # 8-byte Folded Reload + ld.d $s3, $sp, 128 # 8-byte Folded Reload + ld.d $s2, $sp, 136 # 8-byte Folded Reload + ld.d $s1, $sp, 144 # 8-byte Folded Reload + ld.d $s0, $sp, 152 # 8-byte Folded Reload + ld.d $fp, $sp, 160 # 8-byte Folded Reload + ld.d $ra, $sp, 168 # 8-byte Folded Reload + addi.d $sp, $sp, 176 ret .Lfunc_end6: .size _ZN14btOptimizedBvh14updateBvhNodesEP23btStridingMeshInterfaceiii, .Lfunc_end6-_ZN14btOptimizedBvh14updateBvhNodesEP23btStridingMeshInterfaceiii @@ -1289,20 +1291,8 @@ GCC_except_table10: .Lcst_end2: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN29QuantizedNodeTriangleCallback28internalProcessTriangleIndexEPS2_ii -.LCPI11_0: - .word 0xdd5e0b6b # float -9.99999984E+17 -.LCPI11_1: - .word 0x5d5e0b6b # float 9.99999984E+17 -.LCPI11_2: - .word 0x3a83126f # float 0.00100000005 -.LCPI11_3: - .word 0x3b03126f # float 0.00200000009 -.LCPI11_4: - .word 0xba83126f # float -0.00100000005 .text - .p2align 5 + .p2align 5 # -- Begin function _ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN29QuantizedNodeTriangleCallback28internalProcessTriangleIndexEPS2_ii .type _ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN29QuantizedNodeTriangleCallback28internalProcessTriangleIndexEPS2_ii,@function _ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN29QuantizedNodeTriangleCallback28internalProcessTriangleIndexEPS2_ii: # @_ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN29QuantizedNodeTriangleCallback28internalProcessTriangleIndexEPS2_ii .cfi_startproc @@ -1339,14 +1329,14 @@ _ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN29Quantiz .cfi_offset 63, -112 fld.s $ft7, $a1, 0 fld.s $fs1, $a1, 4 - fld.s $fs5, $a1, 8 + fld.s $fs4, $a1, 8 fld.s $fs6, $a1, 16 fld.s $fs2, $a1, 20 - fld.s $fs7, $a1, 24 + fld.s $fs5, $a1, 24 ld.d $a4, $a0, 16 fld.s $fs0, $a1, 32 fld.s $fs3, $a1, 36 - fld.s $fs4, $a1, 40 + fld.s $fs7, $a1, 40 fld.s $ft2, $a4, 8 fld.s $ft4, $a4, 12 ld.d $s0, $a0, 8 @@ -1435,81 +1425,88 @@ _ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN29Quantiz st.d $fp, $s0, 16 st.w $s1, $s0, 8 .LBB11_11: # %_ZN20btAlignedObjectArrayI18btQuantizedBvhNodeE9push_backERKS0_.exit - pcalau12i $a0, %pc_hi20(.LCPI11_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI11_0) slli.d $a0, $a2, 21 or $a0, $a0, $a3 - fmax.s $fa1, $fs5, $fa0 - fcmp.clt.s $fcc0, $fa1, $fs7 - pcalau12i $a2, %pc_hi20(.LCPI11_1) - fld.s $fa2, $a2, %pc_lo12(.LCPI11_1) - fsel $fa1, $fa1, $fs7, $fcc0 - fcmp.clt.s $fcc0, $fa1, $fs4 - fsel $fa1, $fa1, $fs4, $fcc0 - fmin.s $fa3, $fs5, $fa2 - fcmp.clt.s $fcc0, $fs7, $fa3 - fsel $fa3, $fa3, $fs7, $fcc0 - fcmp.clt.s $fcc0, $fs4, $fa3 - pcalau12i $a2, %pc_hi20(.LCPI11_2) - fld.s $fa4, $a2, %pc_lo12(.LCPI11_2) - pcalau12i $a2, %pc_hi20(.LCPI11_3) - fld.s $fa5, $a2, %pc_lo12(.LCPI11_3) - fsel $fa3, $fa3, $fs4, $fcc0 - fsub.s $fa6, $fa1, $fa3 - fadd.s $fa7, $fa1, $fa4 - fcmp.clt.s $fcc0, $fa6, $fa5 - fsel $fa1, $fa1, $fa7, $fcc0 - fsub.s $fa1, $fa1, $ft6 - fmul.s $fa1, $fa1, $ft8 - vldi $vr6, -1168 - fadd.s $fa1, $fa1, $fa6 - ftintrz.l.s $fa1, $fa1 - movfr2gr.d $a2, $fa1 + lu12i.w $a2, -141856 + ori $a2, $a2, 2923 + lu32i.d $a2, 0 + movgr2fr.w $fa1, $a2 + fmax.s $fa0, $fs4, $fa1 + fcmp.clt.s $fcc0, $fa0, $fs5 + fsel $fa0, $fa0, $fs5, $fcc0 + fcmp.clt.s $fcc0, $fa0, $fs7 + fsel $fa2, $fa0, $fs7, $fcc0 + lu12i.w $a2, 382432 + ori $a2, $a2, 2923 + movgr2fr.w $fa3, $a2 + fmin.s $fa0, $fs4, $fa3 + fcmp.clt.s $fcc0, $fs5, $fa0 + fsel $fa0, $fa0, $fs5, $fcc0 + fcmp.clt.s $fcc0, $fs7, $fa0 + fsel $fa0, $fa0, $fs7, $fcc0 + fsub.s $fa4, $fa2, $fa0 + lu12i.w $a2, 239665 + ori $a2, $a2, 623 + movgr2fr.w $fa5, $a2 + fadd.s $fa6, $fa2, $fa5 + lu12i.w $a2, 241713 + ori $a2, $a2, 623 + movgr2fr.w $fa7, $a2 + fcmp.clt.s $fcc0, $fa4, $fa7 + fsel $fa2, $fa2, $fa6, $fcc0 + fsub.s $fa2, $fa2, $ft6 + fmul.s $fa2, $fa2, $ft8 + vldi $vr4, -1168 + fadd.s $fa2, $fa2, $fa4 + ftintrz.l.s $fa2, $fa2 + movfr2gr.d $a2, $fa2 ori $a2, $a2, 1 - fmax.s $fa1, $fs1, $fa0 - fcmp.clt.s $fcc1, $fa1, $fs2 - fsel $fa1, $fa1, $fs2, $fcc1 - fcmp.clt.s $fcc1, $fa1, $fs3 - fsel $fa1, $fa1, $fs3, $fcc1 - fmin.s $fa7, $fs1, $fa2 - fcmp.clt.s $fcc1, $fs2, $fa7 - fsel $fa7, $fa7, $fs2, $fcc1 - fcmp.clt.s $fcc1, $fs3, $fa7 - fsel $fa7, $fa7, $fs3, $fcc1 - fsub.s $ft0, $fa1, $fa7 - fadd.s $ft1, $fa1, $fa4 - fcmp.clt.s $fcc1, $ft0, $fa5 - fsel $fa1, $fa1, $ft1, $fcc1 - fsub.s $fa1, $fa1, $ft4 - fmul.s $fa1, $fa1, $ft5 - fadd.s $fa1, $fa1, $fa6 - ftintrz.l.s $fa1, $fa1 - movfr2gr.d $a3, $fa1 + fmax.s $fa2, $fs1, $fa1 + fcmp.clt.s $fcc1, $fa2, $fs2 + fsel $fa2, $fa2, $fs2, $fcc1 + fcmp.clt.s $fcc1, $fa2, $fs3 + fsel $fa2, $fa2, $fs3, $fcc1 + fmin.s $fa6, $fs1, $fa3 + fcmp.clt.s $fcc1, $fs2, $fa6 + fsel $fa6, $fa6, $fs2, $fcc1 + fcmp.clt.s $fcc1, $fs3, $fa6 + fsel $fa6, $fa6, $fs3, $fcc1 + fsub.s $ft0, $fa2, $fa6 + fadd.s $ft1, $fa2, $fa5 + fcmp.clt.s $fcc1, $ft0, $fa7 + fsel $fa2, $fa2, $ft1, $fcc1 + fsub.s $fa2, $fa2, $ft4 + fmul.s $fa2, $fa2, $ft5 + fadd.s $fa2, $fa2, $fa4 + ftintrz.l.s $fa2, $fa2 + movfr2gr.d $a3, $fa2 ori $a3, $a3, 1 - fmax.s $fa0, $ft7, $fa0 - fcmp.clt.s $fcc2, $fa0, $fs6 - fsel $fa0, $fa0, $fs6, $fcc2 - fcmp.clt.s $fcc2, $fa0, $fs0 - fsel $fa0, $fa0, $fs0, $fcc2 - fmin.s $fa1, $ft7, $fa2 - fcmp.clt.s $fcc2, $fs6, $fa1 + fmax.s $fa1, $ft7, $fa1 + fcmp.clt.s $fcc2, $fa1, $fs6 fsel $fa1, $fa1, $fs6, $fcc2 - fcmp.clt.s $fcc2, $fs0, $fa1 + fcmp.clt.s $fcc2, $fa1, $fs0 fsel $fa1, $fa1, $fs0, $fcc2 - fsub.s $fa2, $fa0, $fa1 - fadd.s $fa4, $fa0, $fa4 - fcmp.clt.s $fcc2, $fa2, $fa5 - fsel $fa0, $fa0, $fa4, $fcc2 - fsub.s $fa0, $fa0, $ft2 - fmul.s $fa0, $fa0, $ft3 - fadd.s $fa0, $fa0, $fa6 - pcalau12i $a4, %pc_hi20(.LCPI11_4) - fld.s $fa2, $a4, %pc_lo12(.LCPI11_4) - ftintrz.l.s $fa0, $fa0 - movfr2gr.d $a4, $fa0 + fmin.s $fa2, $ft7, $fa3 + fcmp.clt.s $fcc2, $fs6, $fa2 + fsel $fa2, $fa2, $fs6, $fcc2 + fcmp.clt.s $fcc2, $fs0, $fa2 + fsel $fa2, $fa2, $fs0, $fcc2 + fsub.s $fa3, $fa1, $fa2 + fadd.s $fa5, $fa1, $fa5 + fcmp.clt.s $fcc2, $fa3, $fa7 + fsel $fa1, $fa1, $fa5, $fcc2 + fsub.s $fa1, $fa1, $ft2 + fmul.s $fa1, $fa1, $ft3 + fadd.s $fa1, $fa1, $fa4 + ftintrz.l.s $fa1, $fa1 + movfr2gr.d $a4, $fa1 ori $a4, $a4, 1 - fadd.s $fa0, $fa3, $fa2 - fsel $fa0, $fa3, $fa0, $fcc0 + lu12i.w $a5, -284623 + ori $a5, $a5, 623 + lu32i.d $a5, 0 + movgr2fr.w $fa1, $a5 + fadd.s $fa3, $fa0, $fa1 + fsel $fa0, $fa0, $fa3, $fcc0 fsub.s $fa0, $fa0, $ft6 fmul.s $fa0, $fa0, $ft8 ftintrz.l.s $fa0, $fa0 @@ -1517,15 +1514,15 @@ _ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN29Quantiz lu12i.w $a6, 15 ori $a6, $a6, 4094 and $a5, $a5, $a6 - fadd.s $fa0, $fa7, $fa2 - fsel $fa0, $fa7, $fa0, $fcc1 + fadd.s $fa0, $fa6, $fa1 + fsel $fa0, $fa6, $fa0, $fcc1 fsub.s $fa0, $fa0, $ft4 fmul.s $fa0, $fa0, $ft5 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a7, $fa0 and $a7, $a7, $a6 - fadd.s $fa0, $fa1, $fa2 - fsel $fa0, $fa1, $fa0, $fcc2 + fadd.s $fa0, $fa2, $fa1 + fsel $fa0, $fa2, $fa0, $fcc2 fsub.s $fa0, $fa0, $ft2 fmul.s $fa0, $fa0, $ft3 ld.d $t0, $s0, 16 @@ -1625,14 +1622,8 @@ GCC_except_table12: .Lcst_end3: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN20NodeTriangleCallback28internalProcessTriangleIndexEPS2_ii -.LCPI13_0: - .word 0xdd5e0b6b # float -9.99999984E+17 -.LCPI13_1: - .word 0x5d5e0b6b # float 9.99999984E+17 .text - .p2align 5 + .p2align 5 # -- Begin function _ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN20NodeTriangleCallback28internalProcessTriangleIndexEPS2_ii .type _ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN20NodeTriangleCallback28internalProcessTriangleIndexEPS2_ii,@function _ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN20NodeTriangleCallback28internalProcessTriangleIndexEPS2_ii: # @_ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN20NodeTriangleCallback28internalProcessTriangleIndexEPS2_ii .cfi_startproc @@ -1674,14 +1665,14 @@ _ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN20NodeTri fld.s $fs1, $a1, 16 fld.s $fs4, $a1, 20 fld.s $fs6, $a1, 24 - fld.s $fs0, $a1, 28 + fld.s $fs5, $a1, 28 ld.d $s0, $a0, 8 fld.s $fs3, $a1, 32 fld.s $fs7, $a1, 36 fld.s $fs2, $a1, 40 ld.w $a4, $s0, 4 ld.w $a0, $s0, 8 - fld.s $fs5, $a1, 44 + fld.s $fs0, $a1, 44 bne $a4, $a0, .LBB13_11 # %bb.1: sltui $a0, $a4, 1 @@ -1762,12 +1753,14 @@ _ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN20NodeTri .LBB13_11: # %_ZN20btAlignedObjectArrayI18btOptimizedBvhNodeE9push_backERKS0_.exit movgr2fr.w $fa0, $zero fmax.s $fa1, $fa5, $fa0 - fcmp.clt.s $fcc0, $fa1, $fs0 - pcalau12i $a0, %pc_hi20(.LCPI13_0) - fld.s $fa2, $a0, %pc_lo12(.LCPI13_0) - fsel $fa1, $fa1, $fs0, $fcc0 fcmp.clt.s $fcc0, $fa1, $fs5 fsel $fa1, $fa1, $fs5, $fcc0 + fcmp.clt.s $fcc0, $fa1, $fs0 + fsel $fa1, $fa1, $fs0, $fcc0 + lu12i.w $a0, -141856 + ori $a0, $a0, 2923 + lu32i.d $a0, 0 + movgr2fr.w $fa2, $a0 fmax.s $fa3, $fa6, $fa2 fcmp.clt.s $fcc0, $fa3, $fs6 fsel $fa3, $fa3, $fs6, $fcc0 @@ -1784,12 +1777,13 @@ _ZZN14btOptimizedBvh5buildEP23btStridingMeshInterfacebRK9btVector3S4_EN20NodeTri fcmp.clt.s $fcc0, $fa2, $fs3 fsel $fa2, $fa2, $fs3, $fcc0 fmin.s $fa0, $fa5, $fa0 - fcmp.clt.s $fcc0, $fs0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI13_1) - fld.s $fa5, $a0, %pc_lo12(.LCPI13_1) - fsel $fa0, $fa0, $fs0, $fcc0 fcmp.clt.s $fcc0, $fs5, $fa0 fsel $fa0, $fa0, $fs5, $fcc0 + fcmp.clt.s $fcc0, $fs0, $fa0 + fsel $fa0, $fa0, $fs0, $fcc0 + lu12i.w $a0, 382432 + ori $a0, $a0, 2923 + movgr2fr.w $fa5, $a0 fmin.s $fa6, $fa6, $fa5 fcmp.clt.s $fcc0, $fs6, $fa6 fsel $fa6, $fa6, $fs6, $fcc0 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btPersistentManifold.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btPersistentManifold.s index bf31c95a..2441f316 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btPersistentManifold.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btPersistentManifold.s @@ -67,12 +67,7 @@ _ZN20btPersistentManifold14clearUserCacheER15btManifoldPoint: # @_ZN20btPersiste .size _ZN20btPersistentManifold14clearUserCacheER15btManifoldPoint, .Lfunc_end1-_ZN20btPersistentManifold14clearUserCacheER15btManifoldPoint .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN20btPersistentManifold16sortCachedPointsERK15btManifoldPoint -.LCPI2_0: - .word 0xdd5e0b6b # float -9.99999984E+17 - .text - .globl _ZN20btPersistentManifold16sortCachedPointsERK15btManifoldPoint + .globl _ZN20btPersistentManifold16sortCachedPointsERK15btManifoldPoint # -- Begin function _ZN20btPersistentManifold16sortCachedPointsERK15btManifoldPoint .p2align 5 .type _ZN20btPersistentManifold16sortCachedPointsERK15btManifoldPoint,@function _ZN20btPersistentManifold16sortCachedPointsERK15btManifoldPoint: # @_ZN20btPersistentManifold16sortCachedPointsERK15btManifoldPoint @@ -113,32 +108,32 @@ _ZN20btPersistentManifold16sortCachedPointsERK15btManifoldPoint: # @_ZN20btPersi fld.s $fa7, $a0, 184 fld.s $ft4, $a1, 4 fld.s $ft0, $a0, 188 - fsub.s $fa0, $ft2, $fa7 + fsub.s $fa1, $ft2, $fa7 fsub.s $ft3, $ft4, $ft0 fld.s $ft5, $a1, 8 fld.s $ft1, $a0, 192 fld.s $fa4, $a0, 536 - fld.s $fa1, $a0, 360 + fld.s $fa0, $a0, 360 fld.s $fa5, $a0, 540 fld.s $fa2, $a0, 364 fld.s $fa6, $a0, 544 fld.s $fa3, $a0, 368 fsub.s $ft11, $ft5, $ft1 - fsub.s $ft7, $fa4, $fa1 + fsub.s $ft7, $fa4, $fa0 fsub.s $ft6, $fa5, $fa2 fsub.s $ft8, $fa6, $fa3 fneg.s $ft9, $ft6 fmul.s $ft10, $ft11, $ft9 fmadd.s $ft12, $ft3, $ft8, $ft10 fneg.s $ft10, $ft8 - fmul.s $ft13, $fa0, $ft10 + fmul.s $ft13, $fa1, $ft10 fmadd.s $ft13, $ft11, $ft7, $ft13 fneg.s $ft11, $ft7 fmul.s $ft3, $ft3, $ft11 - fmadd.s $fa0, $fa0, $ft6, $ft3 + fmadd.s $fa1, $fa1, $ft6, $ft3 fmul.s $ft3, $ft13, $ft13 fmadd.s $ft3, $ft12, $ft12, $ft3 - fmadd.s $fa0, $fa0, $fa0, $ft3 + fmadd.s $fa1, $fa1, $fa1, $ft3 bne $a3, $a2, .LBB2_4 # %bb.2: # %._crit_edge208 fld.s $ft3, $a0, 8 @@ -152,19 +147,19 @@ _ZN20btPersistentManifold16sortCachedPointsERK15btManifoldPoint: # @_ZN20btPersi .LBB2_3: # %._crit_edge fld.s $ft4, $a1, 4 fld.s $fa4, $a0, 536 - fld.s $fa1, $a0, 360 + fld.s $fa0, $a0, 360 fld.s $fa5, $a0, 540 fld.s $fa2, $a0, 364 fld.s $fa6, $a0, 544 fld.s $fa3, $a0, 368 fld.s $ft5, $a1, 8 - fsub.s $ft7, $fa4, $fa1 + fsub.s $ft7, $fa4, $fa0 fsub.s $ft6, $fa5, $fa2 fsub.s $ft8, $fa6, $fa3 fneg.s $ft9, $ft6 fneg.s $ft10, $ft8 fneg.s $ft11, $ft7 - movgr2fr.w $fa0, $zero + movgr2fr.w $fa1, $zero .LBB2_4: fld.s $fa7, $a0, 8 fld.s $ft0, $a0, 12 @@ -208,7 +203,7 @@ _ZN20btPersistentManifold16sortCachedPointsERK15btManifoldPoint: # @_ZN20btPersi fmadd.s $fa4, $fa4, $fa4, $fa5 bcnez $fcc0, .LBB2_9 .LBB2_8: # %._crit_edge222 - fsub.s $fa1, $fa1, $fa7 + fsub.s $fa0, $fa0, $fa7 fsub.s $fa2, $fa2, $ft0 fsub.s $fa3, $fa3, $ft1 fneg.s $fa5, $fa2 @@ -216,41 +211,43 @@ _ZN20btPersistentManifold16sortCachedPointsERK15btManifoldPoint: # @_ZN20btPersi fmadd.s $fa5, $ft4, $fa3, $fa5 fneg.s $fa3, $fa3 fmul.s $fa3, $ft3, $fa3 - fmadd.s $fa3, $ft5, $fa1, $fa3 - fneg.s $fa1, $fa1 - fmul.s $fa1, $ft4, $fa1 - fmadd.s $fa1, $ft3, $fa2, $fa1 + fmadd.s $fa3, $ft5, $fa0, $fa3 + fneg.s $fa0, $fa0 + fmul.s $fa0, $ft4, $fa0 + fmadd.s $fa0, $ft3, $fa2, $fa0 fmul.s $fa2, $fa3, $fa3 fmadd.s $fa2, $fa5, $fa5, $fa2 - fmadd.s $fa1, $fa1, $fa1, $fa2 - fabs.s $fa1, $fa1 + fmadd.s $fa0, $fa0, $fa0, $fa2 + fabs.s $fa0, $fa0 b .LBB2_10 .LBB2_9: - movgr2fr.w $fa1, $zero + movgr2fr.w $fa0, $zero .LBB2_10: - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.s $fa2, $a0, %pc_lo12(.LCPI2_0) - fabs.s $fa0, $fa0 - fabs.s $fa3, $ft2 - fabs.s $fa4, $fa4 - fcmp.cule.s $fcc0, $fa0, $fa2 - fsel $fa0, $fa0, $fa2, $fcc0 + fabs.s $fa1, $fa1 + fabs.s $fa2, $ft2 + fabs.s $fa3, $fa4 + lu12i.w $a0, -141856 + ori $a0, $a0, 2923 + lu32i.d $a0, 0 + movgr2fr.w $fa4, $a0 + fcmp.cule.s $fcc0, $fa1, $fa4 + fsel $fa1, $fa1, $fa4, $fcc0 movcf2gr $a0, $fcc0 sub.d $a0, $zero, $a0 - fcmp.clt.s $fcc0, $fa0, $fa3 - fsel $fa0, $fa0, $fa3, $fcc0 + fcmp.clt.s $fcc0, $fa1, $fa2 + fsel $fa1, $fa1, $fa2, $fcc0 movcf2gr $a1, $fcc0 masknez $a0, $a0, $a1 ori $a2, $zero, 1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 - fcmp.clt.s $fcc0, $fa0, $fa4 - fsel $fa0, $fa0, $fa4, $fcc0 + fcmp.clt.s $fcc0, $fa1, $fa3 + fsel $fa1, $fa1, $fa3, $fcc0 movcf2gr $a1, $fcc0 masknez $a0, $a0, $a1 ori $a2, $zero, 2 maskeqz $a1, $a2, $a1 - fcmp.clt.s $fcc0, $fa0, $fa1 + fcmp.clt.s $fcc0, $fa1, $fa0 or $a0, $a1, $a0 movcf2gr $a1, $fcc0 masknez $a0, $a0, $a1 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btPolyhedralConvexShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btPolyhedralConvexShape.s index 477bab6b..5128ed27 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btPolyhedralConvexShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btPolyhedralConvexShape.s @@ -26,14 +26,7 @@ _ZN23btPolyhedralConvexShapeC2Ev: # @_ZN23btPolyhedralConvexShapeC2Ev .size _ZN23btPolyhedralConvexShapeC2Ev, .Lfunc_end0-_ZN23btPolyhedralConvexShapeC2Ev .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK23btPolyhedralConvexShape37localGetSupportingVertexWithoutMarginERK9btVector3 -.LCPI1_0: - .word 0x38d1b717 # float 9.99999974E-5 -.LCPI1_1: - .word 0xdd5e0b6b # float -9.99999984E+17 - .text - .globl _ZNK23btPolyhedralConvexShape37localGetSupportingVertexWithoutMarginERK9btVector3 + .globl _ZNK23btPolyhedralConvexShape37localGetSupportingVertexWithoutMarginERK9btVector3 # -- Begin function _ZNK23btPolyhedralConvexShape37localGetSupportingVertexWithoutMarginERK9btVector3 .p2align 5 .type _ZNK23btPolyhedralConvexShape37localGetSupportingVertexWithoutMarginERK9btVector3,@function _ZNK23btPolyhedralConvexShape37localGetSupportingVertexWithoutMarginERK9btVector3: # @_ZNK23btPolyhedralConvexShape37localGetSupportingVertexWithoutMarginERK9btVector3 @@ -61,14 +54,15 @@ _ZNK23btPolyhedralConvexShape37localGetSupportingVertexWithoutMarginERK9btVector .cfi_offset 56, -64 .cfi_offset 57, -72 .cfi_offset 58, -80 - fld.s $fa0, $a1, 0 - fld.s $fa1, $a1, 4 - fld.s $fa2, $a1, 8 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.s $fa4, $a1, %pc_lo12(.LCPI1_0) - fmul.s $fa3, $fa1, $fa1 - fmadd.s $fa3, $fa0, $fa0, $fa3 + fld.s $fa0, $a1, 4 + fld.s $fa2, $a1, 0 + fld.s $fa1, $a1, 8 + fmul.s $fa3, $fa0, $fa0 fmadd.s $fa3, $fa2, $fa2, $fa3 + fmadd.s $fa3, $fa1, $fa1, $fa3 + lu12i.w $a1, 232731 + ori $a1, $a1, 1815 + movgr2fr.w $fa4, $a1 fcmp.clt.s $fcc0, $fa3, $fa4 move $fp, $a0 bceqz $fcc0, .LBB1_2 @@ -80,10 +74,10 @@ _ZNK23btPolyhedralConvexShape37localGetSupportingVertexWithoutMarginERK9btVector b .LBB1_3 .LBB1_2: frsqrt.s $fa3, $fa3 - fmul.s $fa0, $fa0, $fa3 - vst $vr0, $sp, 16 # 16-byte Folded Spill - fmul.s $fs1, $fa1, $fa3 - fmul.s $fs0, $fa2, $fa3 + fmul.s $fa2, $fa2, $fa3 + vst $vr2, $sp, 16 # 16-byte Folded Spill + fmul.s $fs1, $fa0, $fa3 + fmul.s $fs0, $fa1, $fa3 .LBB1_3: ld.d $a0, $fp, 0 ld.d $a1, $a0, 144 @@ -91,13 +85,15 @@ _ZNK23btPolyhedralConvexShape37localGetSupportingVertexWithoutMarginERK9btVector jirl $ra, $a1, 0 blez $a0, .LBB1_9 # %bb.4: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.s $fs2, $a0, %pc_lo12(.LCPI1_1) move $s2, $zero move $s4, $zero move $s0, $zero move $s3, $zero move $s1, $zero + lu12i.w $a0, -141856 + ori $a0, $a0, 2923 + lu32i.d $a0, 0 + movgr2fr.w $fs2, $a0 b .LBB1_6 .p2align 4, , 16 .LBB1_5: # in Loop: Header=BB1_6 Depth=1 @@ -297,12 +293,7 @@ _ZNK23btPolyhedralConvexShape49batchedUnitVectorGetSupportingVertexWithoutMargin .size _ZNK23btPolyhedralConvexShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i, .Lfunc_end2-_ZNK23btPolyhedralConvexShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK23btPolyhedralConvexShape21calculateLocalInertiaEfR9btVector3 -.LCPI3_0: - .word 0x3daaaaaa # float 0.0833333284 - .text - .globl _ZNK23btPolyhedralConvexShape21calculateLocalInertiaEfR9btVector3 + .globl _ZNK23btPolyhedralConvexShape21calculateLocalInertiaEfR9btVector3 # -- Begin function _ZNK23btPolyhedralConvexShape21calculateLocalInertiaEfR9btVector3 .p2align 5 .type _ZNK23btPolyhedralConvexShape21calculateLocalInertiaEfR9btVector3,@function _ZNK23btPolyhedralConvexShape21calculateLocalInertiaEfR9btVector3: # @_ZNK23btPolyhedralConvexShape21calculateLocalInertiaEfR9btVector3 @@ -363,11 +354,12 @@ _ZNK23btPolyhedralConvexShape21calculateLocalInertiaEfR9btVector3: # @_ZNK23btPo fadd.s $fa1, $fa1, $fa1 fadd.s $fa2, $fs1, $fa2 fadd.s $fa2, $fa2, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.s $fa3, $a0, %pc_lo12(.LCPI3_0) fmul.s $fa0, $fa0, $fa0 fmul.s $fa1, $fa1, $fa1 fmul.s $fa2, $fa2, $fa2 + lu12i.w $a0, 252586 + ori $a0, $a0, 2730 + movgr2fr.w $fa3, $a0 fmul.s $fa3, $fs0, $fa3 fadd.s $fa4, $fa1, $fa2 fadd.s $fa2, $fa0, $fa2 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btQuantizedBvh.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btQuantizedBvh.s index 83d0da72..260010e5 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btQuantizedBvh.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btQuantizedBvh.s @@ -657,12 +657,7 @@ _ZN14btQuantizedBvh9buildTreeEii: # @_ZN14btQuantizedBvh9buildTreeEii .size _ZN14btQuantizedBvh9buildTreeEii, .Lfunc_end5-_ZN14btQuantizedBvh9buildTreeEii .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN14btQuantizedBvh21setQuantizationValuesERK9btVector3S2_f -.LCPI6_0: - .word 0x477ffd00 # float 65533 - .text - .globl _ZN14btQuantizedBvh21setQuantizationValuesERK9btVector3S2_f + .globl _ZN14btQuantizedBvh21setQuantizationValuesERK9btVector3S2_f # -- Begin function _ZN14btQuantizedBvh21setQuantizationValuesERK9btVector3S2_f .p2align 5 .type _ZN14btQuantizedBvh21setQuantizationValuesERK9btVector3S2_f,@function _ZN14btQuantizedBvh21setQuantizationValuesERK9btVector3S2_f: # @_ZN14btQuantizedBvh21setQuantizationValuesERK9btVector3S2_f @@ -693,14 +688,15 @@ _ZN14btQuantizedBvh21setQuantizationValuesERK9btVector3S2_f: # @_ZN14btQuantized bstrpick.d $a2, $a2, 31, 0 st.d $a1, $a0, 24 st.d $a2, $a0, 32 - pcalau12i $a1, %pc_hi20(.LCPI6_0) - fld.s $fa6, $a1, %pc_lo12(.LCPI6_0) fsub.s $fa1, $fa4, $fa1 fsub.s $fa2, $fa5, $fa2 fsub.s $fa0, $fa0, $fa3 - fdiv.s $fa1, $fa6, $fa1 - fdiv.s $fa2, $fa6, $fa2 - fdiv.s $fa0, $fa6, $fa0 + lu12i.w $a1, 292863 + ori $a1, $a1, 3328 + movgr2fr.w $fa3, $a1 + fdiv.s $fa1, $fa3, $fa1 + fdiv.s $fa2, $fa3, $fa2 + fdiv.s $fa0, $fa3, $fa0 movfr2gr.s $a1, $fa1 movfr2gr.s $a2, $fa2 bstrins.d $a1, $a2, 63, 32 @@ -2903,12 +2899,7 @@ _ZNK14btQuantizedBvh17walkStacklessTreeEP21btNodeOverlapCallbackRK9btVector3S4_: .size _ZNK14btQuantizedBvh17walkStacklessTreeEP21btNodeOverlapCallbackRK9btVector3S4_, .Lfunc_end19-_ZNK14btQuantizedBvh17walkStacklessTreeEP21btNodeOverlapCallbackRK9btVector3S4_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK14btQuantizedBvh27walkStacklessTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii -.LCPI20_0: - .word 0x5d5e0b6b # float 9.99999984E+17 - .text - .globl _ZNK14btQuantizedBvh27walkStacklessTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii + .globl _ZNK14btQuantizedBvh27walkStacklessTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii # -- Begin function _ZNK14btQuantizedBvh27walkStacklessTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii .p2align 5 .type _ZNK14btQuantizedBvh27walkStacklessTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii,@function _ZNK14btQuantizedBvh27walkStacklessTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii: # @_ZNK14btQuantizedBvh27walkStacklessTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii @@ -3005,19 +2996,20 @@ _ZNK14btQuantizedBvh27walkStacklessTreeAgainstRayEP21btNodeOverlapCallbackRK9btV fmul.s $fa1, $fa1, $fa5 fmadd.s $fa0, $fa4, $fa0, $fa1 fmadd.s $ft6, $fa3, $fa2, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI20_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI20_0) - frecip.s $fa1, $fa4 + frecip.s $fa0, $fa4 movgr2fr.w $fs7, $zero fcmp.ceq.s $fcc0, $fa4, $fs7 - fsel $fs6, $fa1, $fa0, $fcc0 - frecip.s $fa1, $fa5 + lu12i.w $a0, 382432 + ori $a0, $a0, 2923 + movgr2fr.w $fa1, $a0 + fsel $fs6, $fa0, $fa1, $fcc0 + frecip.s $fa0, $fa5 fcmp.ceq.s $fcc0, $fa5, $fs7 - fsel $fs4, $fa1, $fa0, $fcc0 - frecip.s $fa1, $fa3 + fsel $fs4, $fa0, $fa1, $fcc0 + frecip.s $fa0, $fa3 ld.d $s6, $fp, 120 fcmp.ceq.s $fcc0, $fa3, $fs7 - fsel $fs1, $fa1, $fa0, $fcc0 + fsel $fs1, $fa0, $fa1, $fcc0 addi.w $s7, $zero, -1 fst.s $ft4, $sp, 28 # 4-byte Folded Spill fst.s $ft6, $sp, 24 # 4-byte Folded Spill @@ -3192,12 +3184,7 @@ _ZNK14btQuantizedBvh27walkStacklessTreeAgainstRayEP21btNodeOverlapCallbackRK9btV .size _ZNK14btQuantizedBvh27walkStacklessTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii, .Lfunc_end20-_ZNK14btQuantizedBvh27walkStacklessTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK14btQuantizedBvh36walkStacklessQuantizedTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii -.LCPI21_0: - .word 0x5d5e0b6b # float 9.99999984E+17 - .text - .globl _ZNK14btQuantizedBvh36walkStacklessQuantizedTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii + .globl _ZNK14btQuantizedBvh36walkStacklessQuantizedTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii # -- Begin function _ZNK14btQuantizedBvh36walkStacklessQuantizedTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii .p2align 5 .type _ZNK14btQuantizedBvh36walkStacklessQuantizedTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii,@function _ZNK14btQuantizedBvh36walkStacklessQuantizedTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii: # @_ZNK14btQuantizedBvh36walkStacklessQuantizedTreeAgainstRayEP21btNodeOverlapCallbackRK9btVector3S4_S4_S4_ii @@ -3265,18 +3252,19 @@ _ZNK14btQuantizedBvh36walkStacklessQuantizedTreeAgainstRayEP21btNodeOverlapCallb fmul.s $fa7, $fa7, $ft3 fmadd.s $fa6, $ft2, $fa6, $fa7 fmadd.s $fs0, $ft1, $ft0, $fa6 - pcalau12i $a0, %pc_hi20(.LCPI21_0) - fld.s $fa6, $a0, %pc_lo12(.LCPI21_0) - frecip.s $fa7, $ft2 + frecip.s $fa6, $ft2 movgr2fr.w $fs1, $zero fcmp.ceq.s $fcc0, $ft2, $fs1 - fsel $fs2, $fa7, $fa6, $fcc0 - frecip.s $fa7, $ft3 + lu12i.w $a0, 382432 + ori $a0, $a0, 2923 + movgr2fr.w $fa7, $a0 + fsel $fs2, $fa6, $fa7, $fcc0 + frecip.s $fa6, $ft3 fcmp.ceq.s $fcc0, $ft3, $fs1 - fsel $fs3, $fa7, $fa6, $fcc0 - frecip.s $fa7, $ft1 + fsel $fs3, $fa6, $fa7, $fcc0 + frecip.s $fa6, $ft1 fcmp.ceq.s $fcc0, $ft1, $fs1 - fsel $fs4, $fa7, $fa6, $fcc0 + fsel $fs4, $fa6, $fa7, $fcc0 fcmp.clt.s $fcc0, $fa0, $fa2 fsel $fa6, $fa2, $fa0, $fcc0 fcmp.clt.s $fcc0, $fa3, $fa5 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btQuickprof.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btQuickprof.s index 7b9de166..85375ec3 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btQuickprof.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btQuickprof.s @@ -341,12 +341,7 @@ _ZN12CProfileNode4CallEv: # @_ZN12CProfileNode4CallEv .Lfunc_end5: .size _ZN12CProfileNode4CallEv, .Lfunc_end5-_ZN12CProfileNode4CallEv # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN12CProfileNode6ReturnEv -.LCPI6_0: - .word 0x447a0000 # float 1000 - .text - .globl _ZN12CProfileNode6ReturnEv + .globl _ZN12CProfileNode6ReturnEv # -- Begin function _ZN12CProfileNode6ReturnEv .p2align 5 .type _ZN12CProfileNode6ReturnEv,@function _ZN12CProfileNode6ReturnEv: # @_ZN12CProfileNode6ReturnEv @@ -391,13 +386,13 @@ _ZN12CProfileNode6ReturnEv: # @_ZN12CProfileNode6ReturnEv movgr2fr.d $fa1, $a0 ffint.s.l $fa1, $fa1 movgr2cf $fcc0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI6_0) - fld.s $fa2, $a0, %pc_lo12(.LCPI6_0) - fld.s $fa3, $fp, 12 fsel $fa0, $fa1, $fa0, $fcc0 + lu12i.w $a0, 280480 + fld.s $fa1, $fp, 12 + movgr2fr.w $fa2, $a0 ld.w $a0, $fp, 24 fdiv.s $fa0, $fa0, $fa2 - fadd.s $fa0, $fa3, $fa0 + fadd.s $fa0, $fa1, $fa0 fst.s $fa0, $fp, 12 .LBB6_3: sltui $a0, $a0, 1 @@ -603,12 +598,7 @@ _ZN15CProfileManager13Start_ProfileEPKc: # @_ZN15CProfileManager13Start_ProfileE .size _ZN15CProfileManager13Start_ProfileEPKc, .Lfunc_end13-_ZN15CProfileManager13Start_ProfileEPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15CProfileManager12Stop_ProfileEv -.LCPI14_0: - .word 0x447a0000 # float 1000 - .text - .globl _ZN15CProfileManager12Stop_ProfileEv + .globl _ZN15CProfileManager12Stop_ProfileEv # -- Begin function _ZN15CProfileManager12Stop_ProfileEv .p2align 5 .type _ZN15CProfileManager12Stop_ProfileEv,@function _ZN15CProfileManager12Stop_ProfileEv: # @_ZN15CProfileManager12Stop_ProfileEv @@ -655,13 +645,13 @@ _ZN15CProfileManager12Stop_ProfileEv: # @_ZN15CProfileManager12Stop_ProfileEv movgr2fr.d $fa1, $a0 ffint.s.l $fa1, $fa1 movgr2cf $fcc0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI14_0) - fld.s $fa2, $a0, %pc_lo12(.LCPI14_0) - fld.s $fa3, $s0, 12 fsel $fa0, $fa1, $fa0, $fcc0 + lu12i.w $a0, 280480 + fld.s $fa1, $s0, 12 + movgr2fr.w $fa2, $a0 ld.w $a0, $s0, 24 fdiv.s $fa0, $fa0, $fa2 - fadd.s $fa0, $fa3, $fa0 + fadd.s $fa0, $fa1, $fa0 fst.s $fa0, $s0, 12 .LBB14_3: # %_ZN12CProfileNode6ReturnEv.exit bnez $a0, .LBB14_5 @@ -760,12 +750,7 @@ _ZN15CProfileManager23Increment_Frame_CounterEv: # @_ZN15CProfileManager23Increm .Lfunc_end16: .size _ZN15CProfileManager23Increment_Frame_CounterEv, .Lfunc_end16-_ZN15CProfileManager23Increment_Frame_CounterEv # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15CProfileManager20Get_Time_Since_ResetEv -.LCPI17_0: - .word 0x447a0000 # float 1000 - .text - .globl _ZN15CProfileManager20Get_Time_Since_ResetEv + .globl _ZN15CProfileManager20Get_Time_Since_ResetEv # -- Begin function _ZN15CProfileManager20Get_Time_Since_ResetEv .p2align 5 .type _ZN15CProfileManager20Get_Time_Since_ResetEv,@function _ZN15CProfileManager20Get_Time_Since_ResetEv: # @_ZN15CProfileManager20Get_Time_Since_ResetEv @@ -798,12 +783,12 @@ _ZN15CProfileManager20Get_Time_Since_ResetEv: # @_ZN15CProfileManager20Get_Time_ ffint.s.l $fa0, $fa0 fadd.s $fa0, $fa0, $fa0 slti $a1, $a0, 0 - pcalau12i $a2, %pc_hi20(.LCPI17_0) - fld.s $fa1, $a2, %pc_lo12(.LCPI17_0) - movgr2fr.d $fa2, $a0 - ffint.s.l $fa2, $fa2 + movgr2fr.d $fa1, $a0 + ffint.s.l $fa1, $fa1 movgr2cf $fcc0, $a1 - fsel $fa0, $fa2, $fa0, $fcc0 + fsel $fa0, $fa1, $fa0, $fcc0 + lu12i.w $a0, 280480 + movgr2fr.w $fa1, $a0 fdiv.s $fa0, $fa0, $fa1 ld.d $ra, $sp, 24 # 8-byte Folded Reload addi.d $sp, $sp, 32 @@ -811,16 +796,7 @@ _ZN15CProfileManager20Get_Time_Since_ResetEv: # @_ZN15CProfileManager20Get_Time_ .Lfunc_end17: .size _ZN15CProfileManager20Get_Time_Since_ResetEv, .Lfunc_end17-_ZN15CProfileManager20Get_Time_Since_ResetEv # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15CProfileManager13dumpRecursiveEP16CProfileIteratori -.LCPI18_0: - .word 0x447a0000 # float 1000 -.LCPI18_1: - .word 0x34000000 # float 1.1920929E-7 -.LCPI18_2: - .word 0x42c80000 # float 100 - .text - .globl _ZN15CProfileManager13dumpRecursiveEP16CProfileIteratori + .globl _ZN15CProfileManager13dumpRecursiveEP16CProfileIteratori # -- Begin function _ZN15CProfileManager13dumpRecursiveEP16CProfileIteratori .p2align 5 .type _ZN15CProfileManager13dumpRecursiveEP16CProfileIteratori,@function _ZN15CProfileManager13dumpRecursiveEP16CProfileIteratori: # @_ZN15CProfileManager13dumpRecursiveEP16CProfileIteratori @@ -907,12 +883,12 @@ _ZN15CProfileManager13dumpRecursiveEP16CProfileIteratori: # @_ZN15CProfileManage ffint.s.l $fa0, $fa0 fadd.s $fa0, $fa0, $fa0 slti $a1, $a0, 0 - pcalau12i $a2, %pc_hi20(.LCPI18_0) - fld.s $fa1, $a2, %pc_lo12(.LCPI18_0) - movgr2fr.d $fa2, $a0 - ffint.s.l $fa2, $fa2 + movgr2fr.d $fa1, $a0 + ffint.s.l $fa1, $fa1 movgr2cf $fcc0, $a1 - fsel $fa0, $fa2, $fa0, $fcc0 + fsel $fa0, $fa1, $fa0, $fcc0 + lu12i.w $a0, 280480 + movgr2fr.w $fa1, $a0 fdiv.s $fs0, $fa0, $fa1 pcalau12i $a0, %pc_hi20(_ZN15CProfileManager12FrameCounterE) ld.w $s1, $a0, %pc_lo12(_ZN15CProfileManager12FrameCounterE) @@ -932,19 +908,19 @@ _ZN15CProfileManager13dumpRecursiveEP16CProfileIteratori: # @_ZN15CProfileManage pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 ld.d $a0, $fp, 8 - pcalau12i $s5, %pc_hi20(.LCPI18_1) - pcalau12i $s4, %pc_hi20(.LCPI18_2) + lu12i.w $s5, 212992 + lu12i.w $s4, 273536 beqz $a0, .LBB18_18 # %bb.10: # %.lr.ph77 movgr2fr.w $fa0, $s1 ffint.d.w $fs2, $fa0 - fld.s $fa0, $s5, %pc_lo12(.LCPI18_1) - fld.s $fs3, $s4, %pc_lo12(.LCPI18_2) movgr2fr.w $fs1, $zero - movgr2fr.d $fs4, $zero + movgr2fr.d $fs3, $zero + movgr2fr.w $fa0, $s5 fcmp.cule.s $fcc0, $fs0, $fa0 movcf2gr $a1, $fcc0 st.d $a1, $sp, 16 + movgr2fr.w $fs4, $s4 pcalau12i $a1, %pc_hi20(.L.str.5) addi.d $s1, $a1, %pc_lo12(.L.str.5) move $s2, $zero @@ -972,13 +948,13 @@ _ZN15CProfileManager13dumpRecursiveEP16CProfileIteratori: # @_ZN15CProfileManage .LBB18_12: # =>This Loop Header: Depth=1 # Child Loop BB18_16 Depth 2 fld.s $fs5, $a0, 12 - fmov.d $fs6, $fs4 + fmov.d $fs6, $fs3 ld.d $a1, $sp, 16 movgr2cf $fcc0, $a1 bcnez $fcc0, .LBB18_14 # %bb.13: # in Loop: Header=BB18_12 Depth=1 fdiv.s $fa0, $fs5, $fs0 - fmul.s $fa0, $fa0, $fs3 + fmul.s $fa0, $fa0, $fs4 fcvt.d.s $fs6, $fa0 .LBB18_14: # in Loop: Header=BB18_12 Depth=1 blez $s0, .LBB18_11 @@ -1022,14 +998,14 @@ _ZN15CProfileManager13dumpRecursiveEP16CProfileIteratori: # @_ZN15CProfileManage addi.w $s1, $s1, -1 bnez $s1, .LBB18_23 .LBB18_24: # %._crit_edge83 - fld.s $fa0, $s5, %pc_lo12(.LCPI18_1) + movgr2fr.w $fa0, $s5 fcmp.cule.s $fcc0, $fs0, $fa0 fsub.s $fa0, $fs0, $fs1 bcnez $fcc0, .LBB18_26 # %bb.25: - fld.s $fa1, $s4, %pc_lo12(.LCPI18_2) - fdiv.s $fa2, $fa0, $fs0 - fmul.s $fa1, $fa2, $fa1 + fdiv.s $fa1, $fa0, $fs0 + movgr2fr.w $fa2, $s4 + fmul.s $fa1, $fa1, $fa2 fcvt.d.s $fa1, $fa1 b .LBB18_27 .LBB18_26: diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRaycastCallback.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRaycastCallback.s index 84d5857e..16032852 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRaycastCallback.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRaycastCallback.s @@ -19,12 +19,7 @@ _ZN25btTriangleRaycastCallbackC2ERK9btVector3S2_j: # @_ZN25btTriangleRaycastCall .Lfunc_end0: .size _ZN25btTriangleRaycastCallbackC2ERK9btVector3S2_j, .Lfunc_end0-_ZN25btTriangleRaycastCallbackC2ERK9btVector3S2_j # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN25btTriangleRaycastCallback15processTriangleEP9btVector3ii -.LCPI1_0: - .word 0xb8d1b717 # float -9.99999974E-5 - .text - .globl _ZN25btTriangleRaycastCallback15processTriangleEP9btVector3ii + .globl _ZN25btTriangleRaycastCallback15processTriangleEP9btVector3ii # -- Begin function _ZN25btTriangleRaycastCallback15processTriangleEP9btVector3ii .p2align 5 .type _ZN25btTriangleRaycastCallback15processTriangleEP9btVector3ii,@function _ZN25btTriangleRaycastCallback15processTriangleEP9btVector3ii: # @_ZN25btTriangleRaycastCallback15processTriangleEP9btVector3ii @@ -103,11 +98,13 @@ _ZN25btTriangleRaycastCallback15processTriangleEP9btVector3ii: # @_ZN25btTriangl fcmp.cule.s $fcc0, $ft7, $fa0 bcnez $fcc0, .LBB1_3 # %bb.5: - pcalau12i $a4, %pc_hi20(.LCPI1_0) - fld.s $ft8, $a4, %pc_lo12(.LCPI1_0) fmul.s $ft7, $fa2, $fa2 fmadd.s $ft7, $fa1, $fa1, $ft7 fmadd.s $ft7, $fa3, $fa3, $ft7 + lu12i.w $a4, -291557 + ori $a4, $a4, 1815 + lu32i.d $a4, 0 + movgr2fr.w $ft8, $a4 fmul.s $ft8, $ft7, $ft8 vldi $vr23, -1168 fsub.s $ft15, $ft15, $fa0 @@ -276,12 +273,8 @@ __clang_call_terminate: # @__clang_call_terminate .Lfunc_end3: .size __clang_call_terminate, .Lfunc_end3-__clang_call_terminate # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN28btTriangleConvexcastCallback15processTriangleEP9btVector3ii -.LCPI4_0: - .word 0x38d1b717 # float 9.99999974E-5 .text - .globl _ZN28btTriangleConvexcastCallback15processTriangleEP9btVector3ii + .globl _ZN28btTriangleConvexcastCallback15processTriangleEP9btVector3ii # -- Begin function _ZN28btTriangleConvexcastCallback15processTriangleEP9btVector3ii .p2align 5 .type _ZN28btTriangleConvexcastCallback15processTriangleEP9btVector3ii,@function _ZN28btTriangleConvexcastCallback15processTriangleEP9btVector3ii: # @_ZN28btTriangleConvexcastCallback15processTriangleEP9btVector3ii @@ -357,14 +350,15 @@ _ZN28btTriangleConvexcastCallback15processTriangleEP9btVector3ii: # @_ZN28btTria # %bb.2: beqz $a0, .LBB4_6 # %bb.3: - fld.s $fa3, $sp, 152 fld.s $fa2, $sp, 156 + fld.s $fa3, $sp, 152 fld.s $fa1, $sp, 160 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI4_0) - fmul.s $fa4, $fa2, $fa2 - fmadd.s $fa4, $fa3, $fa3, $fa4 - fmadd.s $fa4, $fa1, $fa1, $fa4 + fmul.s $fa0, $fa2, $fa2 + fmadd.s $fa0, $fa3, $fa3, $fa0 + fmadd.s $fa4, $fa1, $fa1, $fa0 + lu12i.w $a0, 232731 + ori $a0, $a0, 1815 + movgr2fr.w $fa0, $a0 fcmp.cule.s $fcc0, $fa4, $fa0 bcnez $fcc0, .LBB4_6 # %bb.4: diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRaycastVehicle.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRaycastVehicle.s index d116db3f..cf6a2c2e 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRaycastVehicle.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRaycastVehicle.s @@ -1277,14 +1277,7 @@ _ZNK16btRaycastVehicle24getChassisWorldTransformEv: # @_ZNK16btRaycastVehicle24g .Lfunc_end13: .size _ZNK16btRaycastVehicle24getChassisWorldTransformEv, .Lfunc_end13-_ZNK16btRaycastVehicle24getChassisWorldTransformEv # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN16btRaycastVehicle7rayCastER11btWheelInfo -.LCPI14_0: - .word 0x3c23d70a # float 0.00999999977 -.LCPI14_1: - .word 0xbdcccccd # float -0.100000001 - .text - .globl _ZN16btRaycastVehicle7rayCastER11btWheelInfo + .globl _ZN16btRaycastVehicle7rayCastER11btWheelInfo # -- Begin function _ZN16btRaycastVehicle7rayCastER11btWheelInfo .p2align 5 .type _ZN16btRaycastVehicle7rayCastER11btWheelInfo,@function _ZN16btRaycastVehicle7rayCastER11btWheelInfo: # @_ZN16btRaycastVehicle7rayCastER11btWheelInfo @@ -1438,9 +1431,10 @@ _ZN16btRaycastVehicle7rayCastER11btWheelInfo: # @_ZN16btRaycastVehicle7rayCastER pcaddu18i $ra, %call36(_ZNK11btWheelInfo23getSuspensionRestLengthEv) jirl $ra, $ra, 0 fld.s $fa1, $fp, 212 - pcalau12i $a0, %pc_hi20(.LCPI14_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI14_0) fneg.s $fa1, $fa1 + lu12i.w $a0, 246333 + ori $a0, $a0, 1802 + movgr2fr.w $fs0, $a0 fmadd.s $fs1, $fa1, $fs0, $fa0 move $a0, $fp pcaddu18i $ra, %call36(_ZNK11btWheelInfo23getSuspensionRestLengthEv) @@ -1459,19 +1453,21 @@ _ZN16btRaycastVehicle7rayCastER11btWheelInfo: # @_ZN16btRaycastVehicle7rayCastER fsel $fa0, $fa1, $fa0, $fcc0 fst.s $fa0, $fp, 32 .LBB14_4: - vld $vr4, $sp, 32 fld.s $fa0, $fp, 0 - fld.s $fa3, $fp, 52 fld.s $fa1, $fp, 4 - fld.s $fa5, $fp, 56 + fld.s $fa3, $fp, 56 + fld.s $fa4, $fp, 52 fld.s $fa2, $fp, 8 - fld.s $fa6, $fp, 60 - pcalau12i $a0, %pc_hi20(.LCPI14_1) - fld.s $fa7, $a0, %pc_lo12(.LCPI14_1) - fmul.s $fa5, $fa1, $fa5 - fmadd.s $fa3, $fa0, $fa3, $fa5 - fmadd.s $fa3, $fa2, $fa6, $fa3 - fcmp.cult.s $fcc0, $fa3, $fa7 + fld.s $fa5, $fp, 60 + fmul.s $fa3, $fa1, $fa3 + fmadd.s $fa3, $fa0, $fa4, $fa3 + vld $vr4, $sp, 32 + fmadd.s $fa3, $fa2, $fa5, $fa3 + lu12i.w $a0, -271156 + ori $a0, $a0, 3277 + lu32i.d $a0, 0 + movgr2fr.w $fa5, $a0 + fcmp.cult.s $fcc0, $fa3, $fa5 vst $vr4, $s2, 0 bceqz $fcc0, .LBB14_7 # %bb.5: @@ -1553,16 +1549,7 @@ _ZN16btRaycastVehicle7rayCastER11btWheelInfo: # @_ZN16btRaycastVehicle7rayCastER .size _ZN16btRaycastVehicle7rayCastER11btWheelInfo, .Lfunc_end14-_ZN16btRaycastVehicle7rayCastER11btWheelInfo .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN16btRaycastVehicle13updateVehicleEf -.LCPI15_0: - .word 0x40666666 # float 3.5999999 -.LCPI15_1: - .word 0x45bb8000 # float 6000 -.LCPI15_2: - .word 0x3f7d70a4 # float 0.990000009 - .text - .globl _ZN16btRaycastVehicle13updateVehicleEf + .globl _ZN16btRaycastVehicle13updateVehicleEf # -- Begin function _ZN16btRaycastVehicle13updateVehicleEf .p2align 5 .type _ZN16btRaycastVehicle13updateVehicleEf,@function _ZN16btRaycastVehicle13updateVehicleEf: # @_ZN16btRaycastVehicle13updateVehicleEf @@ -1616,12 +1603,13 @@ _ZN16btRaycastVehicle13updateVehicleEf: # @_ZN16btRaycastVehicle13updateVehicleE ld.d $a1, $fp, 168 fld.s $fa0, $a1, 332 fld.s $fa1, $a1, 328 - fmul.s $fa0, $fa0, $fa0 fld.s $fa2, $a1, 336 + fmul.s $fa0, $fa0, $fa0 fmadd.s $fa0, $fa1, $fa1, $fa0 - pcalau12i $a2, %pc_hi20(.LCPI15_0) - fld.s $fa1, $a2, %pc_lo12(.LCPI15_0) fmadd.s $fa0, $fa2, $fa2, $fa0 + lu12i.w $a2, 263782 + ori $a2, $a2, 1638 + movgr2fr.w $fa1, $a2 ld.w $a2, $fp, 184 fsqrt.s $fa0, $fa0 fmul.s $fa0, $fa0, $fa1 @@ -1708,10 +1696,10 @@ _ZN16btRaycastVehicle13updateVehicleEf: # @_ZN16btRaycastVehicle13updateVehicleE .LBB15_15: # %_ZN16btRaycastVehicle16updateSuspensionEf.exit blez $a0, .LBB15_18 # %bb.16: # %.lr.ph112 - pcalau12i $a0, %pc_hi20(.LCPI15_1) - fld.s $fs1, $a0, %pc_lo12(.LCPI15_1) move $s0, $zero move $s1, $zero + lu12i.w $a0, 285624 + movgr2fr.w $fs1, $a0 .p2align 4, , 16 .LBB15_17: # =>This Inner Loop Header: Depth=1 ld.d $a0, $fp, 208 @@ -1774,11 +1762,12 @@ _ZN16btRaycastVehicle13updateVehicleEf: # @_ZN16btRaycastVehicle13updateVehicleE ld.d $a5, $fp, 208 alsl.d $a4, $a2, $a1, 2 addi.d $a2, $a4, 8 - pcalau12i $a3, %pc_hi20(.LCPI15_2) - fld.s $fa0, $a3, %pc_lo12(.LCPI15_2) addi.d $a3, $a4, 24 addi.d $a4, $a4, 40 addi.d $a5, $a5, 244 + lu12i.w $a6, 260055 + ori $a6, $a6, 164 + movgr2fr.w $fa0, $a6 b .LBB15_22 .p2align 4, , 16 .LBB15_20: # in Loop: Header=BB15_22 Depth=1 @@ -3124,12 +3113,8 @@ _ZN19btWheelContactPointC2EP11btRigidBodyS1_RK9btVector3S4_f: # @_ZN19btWheelCon .size _ZN19btWheelContactPointC2EP11btRigidBodyS1_RK9btVector3S4_f, .Lfunc_end26-_ZN19btWheelContactPointC2EP11btRigidBodyS1_RK9btVector3S4_f .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN16btRaycastVehicle9debugDrawEP12btIDebugDraw -.LCPI27_0: - .word 0x437f0000 # float 255 .text - .globl _ZN16btRaycastVehicle9debugDrawEP12btIDebugDraw + .globl _ZN16btRaycastVehicle9debugDrawEP12btIDebugDraw # -- Begin function _ZN16btRaycastVehicle9debugDrawEP12btIDebugDraw .p2align 5 .type _ZN16btRaycastVehicle9debugDrawEP12btIDebugDraw,@function _ZN16btRaycastVehicle9debugDrawEP12btIDebugDraw: # @_ZN16btRaycastVehicle9debugDrawEP12btIDebugDraw @@ -3160,8 +3145,8 @@ _ZN16btRaycastVehicle9debugDrawEP12btIDebugDraw: # @_ZN16btRaycastVehicle9debugD move $s0, $a1 move $s1, $zero move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI27_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI27_0) + lu12i.w $a0, 276464 + movgr2fr.w $fs0, $a0 movgr2fr.w $fs1, $zero ori $a0, $zero, 0 lu32i.d $a0, -65536 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRigidBody.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRigidBody.s index 02c1cf7c..2cb8c931 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRigidBody.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btRigidBody.s @@ -660,16 +660,8 @@ _ZN11btRigidBody26predictIntegratedTransformEfR11btTransform: # @_ZN11btRigidBod .size _ZN11btRigidBody26predictIntegratedTransformEfR11btTransform, .Lfunc_end8-_ZN11btRigidBody26predictIntegratedTransformEfR11btTransform .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ -.LCPI9_0: - .word 0x3f490fdb # float 0.785398185 -.LCPI9_1: - .word 0x3a83126f # float 0.00100000005 -.LCPI9_2: - .word 0xbcaaaaab # float -0.020833334 .section .text._ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_,"axG",@progbits,_ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_,comdat - .weak _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ + .weak _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ # -- Begin function _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ .p2align 5 .type _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_,@function _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_: # @_ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ @@ -722,22 +714,26 @@ _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_: # fmadd.s $fa0, $fs1, $fs1, $fa0 fmadd.s $fa0, $fs3, $fs3, $fa0 fsqrt.s $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI9_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI9_0) - fmul.s $fa2, $fs0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI9_1) - fld.s $fa3, $a0, %pc_lo12(.LCPI9_1) - fcmp.clt.s $fcc0, $fa1, $fa2 - fdiv.s $fa1, $fa1, $fs0 - fsel $fs4, $fa0, $fa1, $fcc0 - fcmp.cule.s $fcc0, $fa3, $fs4 + fmul.s $fa1, $fs0, $fa0 + lu12i.w $a0, 259216 + ori $a0, $a0, 4059 + movgr2fr.w $fa2, $a0 + fdiv.s $fa3, $fa2, $fs0 + fcmp.clt.s $fcc0, $fa2, $fa1 + fsel $fs4, $fa0, $fa3, $fcc0 + lu12i.w $a0, 239665 + ori $a0, $a0, 623 + movgr2fr.w $fa0, $a0 + fcmp.cule.s $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB9_2 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI9_2) - fld.s $fa0, $a0, %pc_lo12(.LCPI9_2) - fmul.s $fa1, $fs0, $fs0 - fmul.s $fa1, $fs0, $fa1 - fmul.s $fa0, $fa1, $fa0 + fmul.s $fa0, $fs0, $fs0 + fmul.s $fa0, $fs0, $fa0 + lu12i.w $a0, -275798 + ori $a0, $a0, 2731 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 + fmul.s $fa0, $fa0, $fa1 fmul.s $fa0, $fa0, $fs4 fmul.s $fa0, $fa0, $fs4 vldi $vr1, -1184 @@ -1007,12 +1003,7 @@ _ZN11btRigidBody10setGravityERK9btVector3: # @_ZN11btRigidBody10setGravityERK9bt .Lfunc_end12: .size _ZN11btRigidBody10setGravityERK9btVector3, .Lfunc_end12-_ZN11btRigidBody10setGravityERK9btVector3 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN11btRigidBody12applyDampingEf -.LCPI13_0: - .word 0x3ba3d70a # float 0.00499999989 - .text - .globl _ZN11btRigidBody12applyDampingEf + .globl _ZN11btRigidBody12applyDampingEf # -- Begin function _ZN11btRigidBody12applyDampingEf .p2align 5 .type _ZN11btRigidBody12applyDampingEf,@function _ZN11btRigidBody12applyDampingEf: # @_ZN11btRigidBody12applyDampingEf @@ -1094,10 +1085,11 @@ _ZN11btRigidBody12applyDampingEf: # @_ZN11btRigidBody12applyDampingEf fmadd.s $fa3, $fs5, $fs5, $fa3 fsqrt.s $fa3, $fa3 fcmp.cule.s $fcc0, $fs3, $fa3 - pcalau12i $a0, %pc_hi20(.LCPI13_0) + lu12i.w $a0, 244285 bcnez $fcc0, .LBB13_8 # %bb.5: - fld.s $fa4, $a0, %pc_lo12(.LCPI13_0) + ori $a1, $a0, 1802 + movgr2fr.w $fa4, $a1 fcmp.cule.s $fcc0, $fa3, $fa4 bcnez $fcc0, .LBB13_7 # %bb.6: @@ -1123,21 +1115,22 @@ _ZN11btRigidBody12applyDampingEf: # @_ZN11btRigidBody12applyDampingEf fmul.s $fa3, $fa2, $fa2 fmadd.s $fa3, $fa1, $fa1, $fa3 fmadd.s $fa3, $fa0, $fa0, $fa3 - fsqrt.s $fa4, $fa3 - fcmp.cule.s $fcc0, $fs1, $fa4 + fsqrt.s $fa3, $fa3 + fcmp.cule.s $fcc0, $fs1, $fa3 bcnez $fcc0, .LBB13_12 # %bb.9: - fld.s $fa3, $a0, %pc_lo12(.LCPI13_0) - fcmp.cule.s $fcc0, $fa4, $fa3 + ori $a0, $a0, 1802 + movgr2fr.w $fa4, $a0 + fcmp.cule.s $fcc0, $fa3, $fa4 bcnez $fcc0, .LBB13_11 # %bb.10: - frecip.s $fa4, $fa4 - fmul.s $fa5, $fa1, $fa4 - fmul.s $fa6, $fa2, $fa4 - fmul.s $fa4, $fa0, $fa4 - fmul.s $fa5, $fa5, $fa3 - fmul.s $fa6, $fa6, $fa3 - fmul.s $fa3, $fa4, $fa3 + frecip.s $fa3, $fa3 + fmul.s $fa5, $fa1, $fa3 + fmul.s $fa6, $fa2, $fa3 + fmul.s $fa3, $fa0, $fa3 + fmul.s $fa5, $fa5, $fa4 + fmul.s $fa6, $fa6, $fa4 + fmul.s $fa3, $fa3, $fa4 fsub.s $fa1, $fa1, $fa5 fst.s $fa1, $fp, 344 fsub.s $fa1, $fa2, $fa6 @@ -1310,12 +1303,7 @@ _ZN11btRigidBody24setCenterOfMassTransformERK11btTransform: # @_ZN11btRigidBody2 .Lfunc_end16: .size _ZN11btRigidBody24setCenterOfMassTransformERK11btTransform, .Lfunc_end16-_ZN11btRigidBody24setCenterOfMassTransformERK11btTransform # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN11btRigidBody19integrateVelocitiesEf -.LCPI17_0: - .word 0x3fc90fdb # float 1.57079637 - .text - .globl _ZN11btRigidBody19integrateVelocitiesEf + .globl _ZN11btRigidBody19integrateVelocitiesEf # -- Begin function _ZN11btRigidBody19integrateVelocitiesEf .p2align 5 .type _ZN11btRigidBody19integrateVelocitiesEf,@function _ZN11btRigidBody19integrateVelocitiesEf: # @_ZN11btRigidBody19integrateVelocitiesEf @@ -1376,18 +1364,19 @@ _ZN11btRigidBody19integrateVelocitiesEf: # @_ZN11btRigidBody19integrateVelocitie fst.s $fa2, $a0, 348 fadd.s $fa3, $fa3, $fa4 fmul.s $fa4, $fa2, $fa2 - fmadd.s $fa5, $fa1, $fa1, $fa4 - pcalau12i $a1, %pc_hi20(.LCPI17_0) - fld.s $fa4, $a1, %pc_lo12(.LCPI17_0) - fmadd.s $fa5, $fa3, $fa3, $fa5 - fsqrt.s $fa5, $fa5 - fmul.s $fa6, $fa0, $fa5 - fcmp.cule.s $fcc0, $fa6, $fa4 + fmadd.s $fa4, $fa1, $fa1, $fa4 + fmadd.s $fa4, $fa3, $fa3, $fa4 + fsqrt.s $fa4, $fa4 + fmul.s $fa6, $fa0, $fa4 + lu12i.w $a1, 261264 + ori $a1, $a1, 4059 + movgr2fr.w $fa5, $a1 + fcmp.cule.s $fcc0, $fa6, $fa5 fst.s $fa3, $a0, 352 bcnez $fcc0, .LBB17_1 # %bb.3: - fdiv.s $fa0, $fa4, $fa0 - fdiv.s $fa0, $fa0, $fa5 + fdiv.s $fa0, $fa5, $fa0 + fdiv.s $fa0, $fa0, $fa4 fmul.s $fa1, $fa1, $fa0 fst.s $fa1, $a0, 344 fmul.s $fa1, $fa2, $fa0 @@ -2059,12 +2048,8 @@ _ZN17btCollisionObject17setCollisionShapeEP16btCollisionShape: # @_ZN17btCollisi .Lfunc_end25: .size _ZN17btCollisionObject17setCollisionShapeEP16btCollisionShape, .Lfunc_end25-_ZN17btCollisionObject17setCollisionShapeEP16btCollisionShape # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf -.LCPI26_0: - .word 0x28800000 # float 1.42108547E-14 .section .text._ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf,"axG",@progbits,_ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf,comdat - .weak _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf + .weak _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf # -- Begin function _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf .p2align 5 .type _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf,@function _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf: # @_ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf @@ -2201,11 +2186,11 @@ _ZN15btTransformUtil22calculateDiffAxisAngleERK11btTransformS2_R9btVector3Rf: # jirl $ra, $ra, 0 fadd.s $fa0, $fa0, $fa0 fst.s $fa0, $s0, 0 - pcalau12i $a0, %pc_hi20(.LCPI26_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI26_0) fmul.s $fa0, $fs1, $fs1 fmadd.s $fa0, $fs0, $fs0, $fa0 fmadd.s $fa0, $fs2, $fs2, $fa0 + lu12i.w $a0, 165888 + movgr2fr.w $fa1, $a0 fcmp.clt.s $fcc0, $fa0, $fa1 st.w $zero, $fp, 12 bceqz $fcc0, .LBB26_2 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSequentialImpulseConstraintSolver.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSequentialImpulseConstraintSolver.s index 4b19a4c0..b9b5454e 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSequentialImpulseConstraintSolver.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSequentialImpulseConstraintSolver.s @@ -1703,14 +1703,7 @@ _ZN35btSequentialImpulseConstraintSolver19getOrInitSolverBodyER17btCollisionObje .size _ZN35btSequentialImpulseConstraintSolver19getOrInitSolverBodyER17btCollisionObject, .Lfunc_end19-_ZN35btSequentialImpulseConstraintSolver19getOrInitSolverBodyER17btCollisionObject .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN35btSequentialImpulseConstraintSolver14convertContactEP20btPersistentManifoldRK19btContactSolverInfo -.LCPI20_0: - .word 0x34000000 # float 1.1920929E-7 -.LCPI20_1: - .word 0x3f3504f3 # float 0.707106769 - .text - .globl _ZN35btSequentialImpulseConstraintSolver14convertContactEP20btPersistentManifoldRK19btContactSolverInfo + .globl _ZN35btSequentialImpulseConstraintSolver14convertContactEP20btPersistentManifoldRK19btContactSolverInfo # -- Begin function _ZN35btSequentialImpulseConstraintSolver14convertContactEP20btPersistentManifoldRK19btContactSolverInfo .p2align 5 .type _ZN35btSequentialImpulseConstraintSolver14convertContactEP20btPersistentManifoldRK19btContactSolverInfo,@function _ZN35btSequentialImpulseConstraintSolver14convertContactEP20btPersistentManifoldRK19btContactSolverInfo: # @_ZN35btSequentialImpulseConstraintSolver14convertContactEP20btPersistentManifoldRK19btContactSolverInfo @@ -1797,7 +1790,8 @@ _ZN35btSequentialImpulseConstraintSolver14convertContactEP20btPersistentManifold lu12i.w $a0, 328016 ori $a0, $a0, 761 st.d $a0, $sp, 64 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI20_0) + lu12i.w $a0, 258896 + ori $a0, $a0, 1267 st.d $a0, $sp, 48 # 8-byte Folded Spill ori $a0, $zero, 112 mul.d $a0, $a4, $a0 @@ -2474,11 +2468,11 @@ _ZN35btSequentialImpulseConstraintSolver14convertContactEP20btPersistentManifold andi $a0, $a0, 16 bnez $a2, .LBB20_55 # %bb.52: # in Loop: Header=BB20_7 Depth=1 - ld.d $a2, $sp, 48 # 8-byte Folded Reload - fld.s $fa7, $a2, %pc_lo12(.LCPI20_0) fmul.s $fa2, $fa1, $fa1 fmadd.s $fa2, $fa3, $fa3, $fa2 fmadd.s $fa2, $fa0, $fa0, $fa2 + lu12i.w $a2, 212992 + movgr2fr.w $fa7, $a2 fcmp.cule.s $fcc0, $fa2, $fa7 bcnez $fcc0, .LBB20_55 # %bb.53: # in Loop: Header=BB20_7 Depth=1 @@ -2517,10 +2511,10 @@ _ZN35btSequentialImpulseConstraintSolver14convertContactEP20btPersistentManifold bnez $a0, .LBB20_60 b .LBB20_61 .LBB20_55: # in Loop: Header=BB20_7 Depth=1 - pcalau12i $a2, %pc_hi20(.LCPI20_1) - fld.s $fa0, $a2, %pc_lo12(.LCPI20_1) - fabs.s $fa1, $fa6 - fcmp.cule.s $fcc0, $fa1, $fa0 + fabs.s $fa0, $fa6 + ld.d $a2, $sp, 48 # 8-byte Folded Reload + movgr2fr.w $fa1, $a2 + fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB20_57 # %bb.56: # in Loop: Header=BB20_7 Depth=1 fmul.s $fa0, $fa6, $fa6 @@ -6480,16 +6474,8 @@ _ZN18btConstraintSolver9allSolvedERK19btContactSolverInfoP12btIDebugDrawP12btSta .Lfunc_end26: .size _ZN18btConstraintSolver9allSolvedERK19btContactSolverInfoP12btIDebugDrawP12btStackAlloc, .Lfunc_end26-_ZN18btConstraintSolver9allSolvedERK19btContactSolverInfoP12btIDebugDrawP12btStackAlloc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ -.LCPI27_0: - .word 0x3f490fdb # float 0.785398185 -.LCPI27_1: - .word 0x3a83126f # float 0.00100000005 -.LCPI27_2: - .word 0xbcaaaaab # float -0.020833334 .section .text._ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_,"axG",@progbits,_ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_,comdat - .weak _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ + .weak _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ # -- Begin function _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ .p2align 5 .type _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_,@function _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_: # @_ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_ @@ -6542,22 +6528,26 @@ _ZN15btTransformUtil18integrateTransformERK11btTransformRK9btVector3S5_fRS0_: # fmadd.s $fa0, $fs1, $fs1, $fa0 fmadd.s $fa0, $fs3, $fs3, $fa0 fsqrt.s $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI27_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI27_0) - fmul.s $fa2, $fs0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI27_1) - fld.s $fa3, $a0, %pc_lo12(.LCPI27_1) - fcmp.clt.s $fcc0, $fa1, $fa2 - fdiv.s $fa1, $fa1, $fs0 - fsel $fs4, $fa0, $fa1, $fcc0 - fcmp.cule.s $fcc0, $fa3, $fs4 + fmul.s $fa1, $fs0, $fa0 + lu12i.w $a0, 259216 + ori $a0, $a0, 4059 + movgr2fr.w $fa2, $a0 + fdiv.s $fa3, $fa2, $fs0 + fcmp.clt.s $fcc0, $fa2, $fa1 + fsel $fs4, $fa0, $fa3, $fcc0 + lu12i.w $a0, 239665 + ori $a0, $a0, 623 + movgr2fr.w $fa0, $a0 + fcmp.cule.s $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB27_2 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI27_2) - fld.s $fa0, $a0, %pc_lo12(.LCPI27_2) - fmul.s $fa1, $fs0, $fs0 - fmul.s $fa1, $fs0, $fa1 - fmul.s $fa0, $fa1, $fa0 + fmul.s $fa0, $fs0, $fs0 + fmul.s $fa0, $fs0, $fa0 + lu12i.w $a0, -275798 + ori $a0, $a0, 2731 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 + fmul.s $fa0, $fa0, $fa1 fmul.s $fa0, $fa0, $fs4 fmul.s $fa0, $fa0, $fs4 vldi $vr1, -1184 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSliderConstraint.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSliderConstraint.s index c4c7bdb5..fa5a9f73 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSliderConstraint.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSliderConstraint.s @@ -510,16 +510,7 @@ _ZN18btSliderConstraint13buildJacobianEv: # @_ZN18btSliderConstraint13buildJacob .size _ZN18btSliderConstraint13buildJacobianEv, .Lfunc_end6-_ZN18btSliderConstraint13buildJacobianEv .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN18btSliderConstraint16buildJacobianIntER11btRigidBodyS1_RK11btTransformS4_ -.LCPI7_0: - .word 0x3f490fdb # float 0.785398185 -.LCPI7_1: - .word 0xbf490fdb # float -0.785398185 -.LCPI7_2: - .word 0x4016cbe4 # float 2.3561945 - .text - .globl _ZN18btSliderConstraint16buildJacobianIntER11btRigidBodyS1_RK11btTransformS4_ + .globl _ZN18btSliderConstraint16buildJacobianIntER11btRigidBodyS1_RK11btTransformS4_ # -- Begin function _ZN18btSliderConstraint16buildJacobianIntER11btRigidBodyS1_RK11btTransformS4_ .p2align 5 .type _ZN18btSliderConstraint16buildJacobianIntER11btRigidBodyS1_RK11btTransformS4_,@function _ZN18btSliderConstraint16buildJacobianIntER11btRigidBodyS1_RK11btTransformS4_: # @_ZN18btSliderConstraint16buildJacobianIntER11btRigidBodyS1_RK11btTransformS4_ @@ -1171,23 +1162,25 @@ _ZN18btSliderConstraint16buildJacobianIntER11btRigidBodyS1_RK11btTransformS4_: # fabs.s $fa5, $fa0 bceqz $fcc0, .LBB7_12 # %bb.11: - pcalau12i $a0, %pc_hi20(.LCPI7_2) - fld.s $fa6, $a0, %pc_lo12(.LCPI7_2) - pcalau12i $a0, %pc_hi20(.LCPI7_1) - fld.s $fa7, $a0, %pc_lo12(.LCPI7_1) - fadd.s $ft0, $fa4, $fa5 + fadd.s $fa6, $fa4, $fa5 fsub.s $fa4, $fa5, $fa4 + fdiv.s $fa4, $fa6, $fa4 + lu12i.w $a0, 262508 + ori $a0, $a0, 3044 b .LBB7_13 .LBB7_12: - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.s $fa6, $a0, %pc_lo12(.LCPI7_0) - pcalau12i $a0, %pc_hi20(.LCPI7_1) - fld.s $fa7, $a0, %pc_lo12(.LCPI7_1) - fsub.s $ft0, $fa4, $fa5 + fsub.s $fa6, $fa4, $fa5 fadd.s $fa4, $fa4, $fa5 + fdiv.s $fa4, $fa6, $fa4 + lu12i.w $a0, 259216 + ori $a0, $a0, 4059 .LBB7_13: # %_Z11btAtan2Fastff.exit.i - fdiv.s $fa4, $ft0, $fa4 - fmadd.s $fa4, $fa4, $fa7, $fa6 + movgr2fr.w $fa5, $a0 + lu12i.w $a0, -265072 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa6, $a0 + fmadd.s $fa4, $fa4, $fa6, $fa5 fneg.s $fa5, $fa4 fcmp.clt.s $fcc0, $fa0, $fa3 fsel $fa0, $fa4, $fa5, $fcc0 @@ -1432,16 +1425,7 @@ _ZN18btSliderConstraint13testLinLimitsEv: # @_ZN18btSliderConstraint13testLinLim .Lfunc_end9: .size _ZN18btSliderConstraint13testLinLimitsEv, .Lfunc_end9-_ZN18btSliderConstraint13testLinLimitsEv # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN18btSliderConstraint13testAngLimitsEv -.LCPI10_0: - .word 0x3f490fdb # float 0.785398185 -.LCPI10_1: - .word 0xbf490fdb # float -0.785398185 -.LCPI10_2: - .word 0x4016cbe4 # float 2.3561945 - .text - .globl _ZN18btSliderConstraint13testAngLimitsEv + .globl _ZN18btSliderConstraint13testAngLimitsEv # -- Begin function _ZN18btSliderConstraint13testAngLimitsEv .p2align 5 .type _ZN18btSliderConstraint13testAngLimitsEv,@function _ZN18btSliderConstraint13testAngLimitsEv: # @_ZN18btSliderConstraint13testAngLimitsEv @@ -1481,23 +1465,25 @@ _ZN18btSliderConstraint13testAngLimitsEv: # @_ZN18btSliderConstraint13testAngLim fabs.s $fa5, $fa0 bceqz $fcc0, .LBB10_3 # %bb.2: - pcalau12i $a0, %pc_hi20(.LCPI10_2) - fld.s $fa6, $a0, %pc_lo12(.LCPI10_2) - pcalau12i $a0, %pc_hi20(.LCPI10_1) - fld.s $fa7, $a0, %pc_lo12(.LCPI10_1) - fadd.s $ft0, $fa4, $fa5 + fadd.s $fa6, $fa4, $fa5 fsub.s $fa4, $fa5, $fa4 + fdiv.s $fa4, $fa6, $fa4 + lu12i.w $a0, 262508 + ori $a0, $a0, 3044 b .LBB10_4 .LBB10_3: - pcalau12i $a0, %pc_hi20(.LCPI10_0) - fld.s $fa6, $a0, %pc_lo12(.LCPI10_0) - pcalau12i $a0, %pc_hi20(.LCPI10_1) - fld.s $fa7, $a0, %pc_lo12(.LCPI10_1) - fsub.s $ft0, $fa4, $fa5 + fsub.s $fa6, $fa4, $fa5 fadd.s $fa4, $fa4, $fa5 + fdiv.s $fa4, $fa6, $fa4 + lu12i.w $a0, 259216 + ori $a0, $a0, 4059 .LBB10_4: # %_Z11btAtan2Fastff.exit - fdiv.s $fa4, $ft0, $fa4 - fmadd.s $fa4, $fa4, $fa7, $fa6 + movgr2fr.w $fa5, $a0 + lu12i.w $a0, -265072 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa6, $a0 + fmadd.s $fa4, $fa4, $fa6, $fa5 fneg.s $fa5, $fa4 fcmp.clt.s $fcc0, $fa0, $fa3 fsel $fa0, $fa4, $fa5, $fcc0 @@ -1526,16 +1512,7 @@ _ZN18btSliderConstraint13testAngLimitsEv: # @_ZN18btSliderConstraint13testAngLim .size _ZN18btSliderConstraint13testAngLimitsEv, .Lfunc_end10-_ZN18btSliderConstraint13testAngLimitsEv .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN18btSliderConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E -.LCPI11_0: - .word 0x3f490fdb # float 0.785398185 -.LCPI11_1: - .word 0xbf490fdb # float -0.785398185 -.LCPI11_2: - .word 0x4016cbe4 # float 2.3561945 - .text - .globl _ZN18btSliderConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E + .globl _ZN18btSliderConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E # -- Begin function _ZN18btSliderConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E .p2align 5 .type _ZN18btSliderConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E,@function _ZN18btSliderConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E: # @_ZN18btSliderConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E @@ -1636,23 +1613,25 @@ _ZN18btSliderConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E: # @_ fabs.s $fa5, $fa0 bceqz $fcc0, .LBB11_13 # %bb.12: - pcalau12i $a0, %pc_hi20(.LCPI11_2) - fld.s $fa6, $a0, %pc_lo12(.LCPI11_2) - pcalau12i $a0, %pc_hi20(.LCPI11_1) - fld.s $fa7, $a0, %pc_lo12(.LCPI11_1) - fadd.s $ft0, $fa4, $fa5 + fadd.s $fa6, $fa4, $fa5 fsub.s $fa4, $fa5, $fa4 + fdiv.s $fa4, $fa6, $fa4 + lu12i.w $a0, 262508 + ori $a0, $a0, 3044 b .LBB11_14 .LBB11_13: - pcalau12i $a0, %pc_hi20(.LCPI11_0) - fld.s $fa6, $a0, %pc_lo12(.LCPI11_0) - pcalau12i $a0, %pc_hi20(.LCPI11_1) - fld.s $fa7, $a0, %pc_lo12(.LCPI11_1) - fsub.s $ft0, $fa4, $fa5 + fsub.s $fa6, $fa4, $fa5 fadd.s $fa4, $fa4, $fa5 + fdiv.s $fa4, $fa6, $fa4 + lu12i.w $a0, 259216 + ori $a0, $a0, 4059 .LBB11_14: # %_Z11btAtan2Fastff.exit.i - fdiv.s $fa4, $ft0, $fa4 - fmadd.s $fa4, $fa4, $fa7, $fa6 + movgr2fr.w $fa5, $a0 + lu12i.w $a0, -265072 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa6, $a0 + fmadd.s $fa4, $fa4, $fa6, $fa5 fneg.s $fa5, $fa4 fcmp.clt.s $fcc0, $fa0, $fa3 fsel $fa0, $fa4, $fa5, $fcc0 @@ -2152,24 +2131,7 @@ _ZN18btSliderConstraint8getInfo2EPN17btTypedConstraint17btConstraintInfo2E: # @_ .size _ZN18btSliderConstraint8getInfo2EPN17btTypedConstraint17btConstraintInfo2E, .Lfunc_end14-_ZN18btSliderConstraint8getInfo2EPN17btTypedConstraint17btConstraintInfo2E .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN18btSliderConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK9btVector3S8_ff -.LCPI15_0: - .word 0x3f490fdb # float 0.785398185 -.LCPI15_1: - .word 0xbf490fdb # float -0.785398185 -.LCPI15_2: - .word 0x4016cbe4 # float 2.3561945 -.LCPI15_3: - .word 0x3f7d70a4 # float 0.990000009 -.LCPI15_4: - .word 0x3c23d70a # float 0.00999999977 -.LCPI15_5: - .word 0xff7fffff # float -3.40282347E+38 -.LCPI15_6: - .word 0x7f7fffff # float 3.40282347E+38 - .text - .globl _ZN18btSliderConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK9btVector3S8_ff + .globl _ZN18btSliderConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK9btVector3S8_ff # -- Begin function _ZN18btSliderConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK9btVector3S8_ff .p2align 5 .type _ZN18btSliderConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK9btVector3S8_ff,@function _ZN18btSliderConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK9btVector3S8_ff: # @_ZN18btSliderConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintInfo2ERK11btTransformS5_RK9btVector3S8_ff @@ -2275,23 +2237,25 @@ _ZN18btSliderConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintI movgr2fr.w $fa3, $zero fcmp.cult.s $fcc0, $fa4, $fa3 fadd.s $fa6, $fa4, $fa5 - pcalau12i $a0, %pc_hi20(.LCPI15_1) + lu12i.w $a0, -265072 bceqz $fcc0, .LBB15_10 # %bb.9: - pcalau12i $a1, %pc_hi20(.LCPI15_2) - fld.s $fa7, $a1, %pc_lo12(.LCPI15_2) - fld.s $ft0, $a0, %pc_lo12(.LCPI15_1) fsub.s $fa4, $fa5, $fa4 fdiv.s $fa4, $fa6, $fa4 + lu12i.w $a1, 262508 + ori $a1, $a1, 3044 b .LBB15_11 .LBB15_10: - pcalau12i $a1, %pc_hi20(.LCPI15_0) - fld.s $fa7, $a1, %pc_lo12(.LCPI15_0) - fld.s $ft0, $a0, %pc_lo12(.LCPI15_1) fsub.s $fa4, $fa4, $fa5 fdiv.s $fa4, $fa4, $fa6 + lu12i.w $a1, 259216 + ori $a1, $a1, 4059 .LBB15_11: # %_Z11btAtan2Fastff.exit.i - fmadd.s $fa4, $fa4, $ft0, $fa7 + movgr2fr.w $fa5, $a1 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa6, $a0 + fmadd.s $fa4, $fa4, $fa6, $fa5 fneg.s $fa5, $fa4 fcmp.clt.s $fcc0, $fa0, $fa3 fsel $fa0, $fa4, $fa5, $fcc0 @@ -2382,25 +2346,27 @@ _ZN18btSliderConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintI fld.s $ft1, $s4, 56 fld.s $ft8, $s3, 48 fld.s $ft9, $s3, 52 + fld.s $ft10, $s3, 56 + slli.w $t3, $t0, 1 fadd.s $fa6, $fs1, $fs0 - fdiv.s $ft10, $fs0, $fa6 + fdiv.s $ft11, $fs0, $fa6 movgr2fr.w $fs0, $zero - pcalau12i $t2, %pc_hi20(.LCPI15_3) - fld.s $ft11, $t2, %pc_lo12(.LCPI15_3) fcmp.clt.s $fcc0, $fs0, $fa6 vldi $vr6, -1184 - fsel $fa6, $fa6, $ft10, $fcc0 + fsel $fa6, $fa6, $ft11, $fcc0 + lu12i.w $t2, 260055 + ori $t2, $t2, 164 + movgr2fr.w $ft11, $t2 fcmp.clt.s $fcc0, $ft11, $fa6 - pcalau12i $t2, %pc_hi20(.LCPI15_4) - fld.s $ft10, $t2, %pc_lo12(.LCPI15_4) fsel $fa6, $fa6, $ft11, $fcc0 - fld.s $ft11, $s3, 56 - slli.w $t4, $t0, 1 - fcmp.clt.s $fcc0, $fa6, $ft10 - fsel $fa6, $fa6, $ft10, $fcc0 + lu12i.w $t2, 246333 + ori $t2, $t2, 1802 + movgr2fr.w $ft11, $t2 + fcmp.clt.s $fcc0, $fa6, $ft11 + fsel $fa6, $fa6, $ft11, $fcc0 fsub.s $fa7, $ft8, $fa7 fsub.s $ft0, $ft9, $ft0 - fsub.s $ft1, $ft11, $ft1 + fsub.s $ft1, $ft10, $ft1 fmul.s $ft6, $ft1, $ft6 fmadd.s $ft6, $ft0, $fa0, $ft6 fmul.s $ft7, $fa7, $ft7 @@ -2408,11 +2374,11 @@ _ZN18btSliderConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintI fmul.s $ft2, $ft0, $ft2 fmadd.s $ft8, $fa7, $fa4, $ft2 fmul.s $ft2, $fa6, $ft6 - slli.d $t2, $t4, 2 + slli.d $t2, $t3, 2 fstx.s $ft2, $a0, $t2 fmul.s $ft2, $fa6, $ft7 - addi.d $t3, $t2, 4 - fstx.s $ft2, $a0, $t3 + addi.d $t4, $t2, 4 + fstx.s $ft2, $a0, $t4 fmul.s $ft2, $fa6, $ft8 addi.d $t5, $t2, 8 fstx.s $ft2, $a0, $t5 @@ -2421,7 +2387,7 @@ _ZN18btSliderConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintI fmul.s $ft6, $ft2, $ft6 fstx.s $ft6, $a1, $t2 fmul.s $ft6, $ft2, $ft7 - fstx.s $ft6, $a1, $t3 + fstx.s $ft6, $a1, $t4 fmul.s $ft6, $ft2, $ft8 fstx.s $ft6, $a1, $t5 alsl.w $t5, $t0, $t0, 1 @@ -2432,27 +2398,27 @@ _ZN18btSliderConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintI fmul.s $ft3, $ft0, $ft3 fmadd.s $ft3, $fa7, $fa5, $ft3 fmul.s $ft6, $fa6, $ft4 - slli.d $t3, $t5, 2 - fstx.s $ft6, $a0, $t3 + slli.d $t4, $t5, 2 + fstx.s $ft6, $a0, $t4 fmul.s $ft6, $fa6, $ft5 - addi.d $t0, $t3, 4 + addi.d $t0, $t4, 4 fstx.s $ft6, $a0, $t0 fmul.s $ft6, $fa6, $ft3 - addi.d $t6, $t3, 8 + addi.d $t6, $t4, 8 fstx.s $ft6, $a0, $t6 fmul.s $ft4, $ft2, $ft4 fmul.s $ft5, $ft2, $ft5 fstx.s $ft5, $a1, $t0 ld.d $t0, $s0, 8 - fstx.s $ft4, $a1, $t3 + fstx.s $ft4, $a1, $t4 fmul.s $ft3, $ft2, $ft3 fstx.s $ft3, $a1, $t6 - alsl.d $t4, $t4, $t0, 2 + alsl.d $t3, $t3, $t0, 2 stx.w $a5, $t0, $t2 - st.w $a6, $t4, 4 - fst.s $fa0, $t4, 8 + st.w $a6, $t3, 4 + fst.s $fa0, $t3, 8 alsl.d $a5, $t5, $t0, 2 - stx.w $a4, $t0, $t3 + stx.w $a4, $t0, $t4 st.w $a7, $a5, 4 fst.s $fa1, $a5, 8 fld.s $ft3, $fp, 956 @@ -2482,7 +2448,7 @@ _ZN18btSliderConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintI fmadd.s $fa0, $fa1, $ft5, $fa0 fmul.s $fa0, $fa0, $ft6 ld.bu $s5, $fp, 320 - fstx.s $fa0, $a3, $t3 + fstx.s $fa0, $a3, $t4 fneg.s $fs5, $fs4 fneg.s $fs7, $fs2 fneg.s $ft5, $fs3 @@ -2767,15 +2733,18 @@ _ZN18btSliderConstraint18getInfo2NonVirtualEPN17btTypedConstraint17btConstraintI fstx.s $fa0, $a0, $s3 stx.w $zero, $a1, $s3 ld.d $a1, $s0, 64 - pcalau12i $a2, %pc_hi20(.LCPI15_5) - fld.s $fa1, $a2, %pc_lo12(.LCPI15_5) + lu12i.w $a2, -2049 + ori $a2, $a2, 4095 + lu32i.d $a2, 0 + movgr2fr.w $fa1, $a2 andi $a2, $s1, 1 movgr2fr.w $fa0, $zero movgr2cf $fcc0, $a2 - pcalau12i $a2, %pc_hi20(.LCPI15_6) - fld.s $fa2, $a2, %pc_lo12(.LCPI15_6) - fsel $fa3, $fa1, $fa0, $fcc0 - fsel $fa1, $fa3, $fa1, $fcc1 + fsel $fa2, $fa1, $fa0, $fcc0 + fsel $fa1, $fa2, $fa1, $fcc1 + lu12i.w $a2, 522239 + ori $a2, $a2, 4095 + movgr2fr.w $fa2, $a2 ld.d $a2, $s0, 72 fsel $fa3, $fa0, $fa2, $fcc0 fsel $fa2, $fa3, $fa2, $fcc1 @@ -2888,12 +2857,7 @@ _ZN18btSliderConstraint23solveConstraintObsoleteER12btSolverBodyS1_f: # @_ZN18bt .Lfunc_end16: .size _ZN18btSliderConstraint23solveConstraintObsoleteER12btSolverBodyS1_f, .Lfunc_end16-_ZN18btSliderConstraint23solveConstraintObsoleteER12btSolverBodyS1_f # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_ -.LCPI17_0: - .word 0x3727c5ac # float 9.99999974E-6 - .text - .globl _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_ + .globl _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_ # -- Begin function _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_ .p2align 5 .type _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_,@function _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_: # @_ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_ @@ -3386,40 +3350,40 @@ _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_: fmul.s $fa7, $ft10, $fa3 fmul.s $ft0, $ft9, $fa3 fmul.s $ft2, $ft11, $fa3 - fst.s $fa4, $sp, 36 # 4-byte Folded Spill + fst.s $fa4, $sp, 40 # 4-byte Folded Spill fsub.s $fa3, $ft5, $fa4 - fst.s $fa5, $sp, 32 # 4-byte Folded Spill + fst.s $fa5, $sp, 36 # 4-byte Folded Spill fsub.s $fa4, $ft4, $fa5 - fst.s $fa6, $sp, 28 # 4-byte Folded Spill + fst.s $fa6, $sp, 32 # 4-byte Folded Spill fsub.s $ft8, $ft3, $fa6 - fst.s $fa7, $sp, 24 # 4-byte Folded Spill + fst.s $fa7, $sp, 28 # 4-byte Folded Spill fst.s $ft12, $sp, 44 # 4-byte Folded Spill fsub.s $ft12, $ft12, $fa7 - fst.s $ft0, $sp, 20 # 4-byte Folded Spill + fst.s $ft0, $sp, 24 # 4-byte Folded Spill fsub.s $ft13, $fs2, $ft0 - fst.s $ft2, $sp, 16 # 4-byte Folded Spill + fst.s $ft2, $sp, 20 # 4-byte Folded Spill fsub.s $fs0, $ft6, $ft2 fsub.s $ft14, $fa3, $ft12 fsub.s $ft15, $fa4, $ft13 fsub.s $ft13, $ft8, $fs0 fmul.s $fa3, $ft15, $ft15 fmadd.s $fa3, $ft14, $ft14, $fa3 - fmadd.s $fa4, $ft13, $ft13, $fa3 + fmadd.s $fa3, $ft13, $ft13, $fa3 + fsqrt.s $fa4, $fa3 fld.s $ft0, $a1, 280 fld.s $fs7, $a1, 284 fld.s $ft8, $a1, 288 fld.s $fs3, $a1, 296 - fld.s $fa3, $a1, 300 - fld.s $fs6, $a1, 304 + fld.s $ft2, $a1, 300 + fld.s $fa3, $a1, 304 fld.s $fs4, $a1, 312 - pcalau12i $a5, %pc_hi20(.LCPI17_0) - fld.s $ft12, $a5, %pc_lo12(.LCPI17_0) - fld.s $ft2, $a1, 316 - fld.s $fs1, $a1, 320 - fsqrt.s $fa4, $fa4 + fld.s $fs1, $a1, 316 + fld.s $fs6, $a1, 320 + lu12i.w $a5, 225916 + ori $a5, $a5, 1452 + movgr2fr.w $ft12, $a5 fcmp.cule.s $fcc0, $fa4, $ft12 fmov.s $fs0, $ft1 - fst.s $ft2, $sp, 40 # 4-byte Folded Spill bcnez $fcc0, .LBB17_16 # %bb.15: frecip.s $fa4, $fa4 @@ -3429,16 +3393,17 @@ _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_: fmul.s $fa6, $fa5, $fs3 fmadd.s $fa6, $ft0, $fs0, $fa6 fmadd.s $fa6, $fs4, $fa4, $fa6 - fmul.s $fa7, $fa5, $fa3 + fmul.s $fa7, $fa5, $ft2 fmadd.s $fa7, $fs7, $fs0, $fa7 - fmadd.s $fa7, $ft2, $fa4, $fa7 - fst.s $fs4, $sp, 12 # 4-byte Folded Spill + fmadd.s $fa7, $fs1, $fa4, $fa7 + fst.s $fs4, $sp, 16 # 4-byte Folded Spill fst.s $fs3, $sp, 8 # 4-byte Folded Spill fmov.s $fs4, $ft0 - fmul.s $ft0, $fa5, $fs6 + fmul.s $ft0, $fa5, $fa3 fmadd.s $ft0, $ft8, $fs0, $ft0 - fmadd.s $ft0, $fs1, $fa4, $ft0 + fmadd.s $ft0, $fs6, $fa4, $ft0 fmul.s $fa7, $fa5, $fa7 + fst.s $ft2, $sp, 12 # 4-byte Folded Spill fld.s $ft2, $a3, 296 fmadd.s $fa6, $fs0, $fa6, $fa7 fmadd.s $fs5, $fa4, $ft0, $fa6 @@ -3462,10 +3427,10 @@ _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_: fmov.s $fa6, $fa2 fmov.s $fa2, $fa1 fmov.s $fa1, $fa0 - fmov.s $fa0, $fa3 - fmov.s $fa3, $fs7 + fmov.s $fa0, $fs7 fmov.s $fs7, $fs6 - fmov.s $fs6, $ft8 + fmov.s $fs6, $fa3 + fmov.s $fa3, $ft8 fmov.s $ft8, $ft5 fld.s $ft5, $a3, 320 fmadd.s $ft0, $ft2, $fa4, $ft0 @@ -3478,20 +3443,20 @@ _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_: fmov.s $ft4, $ft7 fmadd.s $ft2, $ft5, $fa4, $ft2 fmov.s $ft5, $ft8 - fmov.s $ft8, $fs6 + fmov.s $ft8, $fa3 + fmov.s $fa3, $fs6 fmov.s $fs6, $fs7 - fmov.s $fs7, $fa3 - fmov.s $fa3, $fa0 + fmov.s $fs7, $fa0 fmov.s $fa0, $fa1 fmov.s $fa1, $fa2 fmov.s $fa2, $fa6 fmul.s $fa5, $fa5, $ft0 fmov.s $ft0, $fs4 fld.s $fs3, $sp, 8 # 4-byte Folded Reload - fld.s $fs4, $sp, 12 # 4-byte Folded Reload + fld.s $fs4, $sp, 16 # 4-byte Folded Reload fmadd.s $fa5, $fs0, $fa7, $fa5 fmadd.s $fa4, $fa4, $ft2, $fa5 - fld.s $ft2, $sp, 40 # 4-byte Folded Reload + fld.s $ft2, $sp, 12 # 4-byte Folded Reload fld.s $fa5, $a0, 316 fld.s $fa7, $a0, 308 fadd.s $fa4, $fs5, $fa4 @@ -3528,16 +3493,17 @@ _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_: fmul.s $fa7, $fa6, $fs3 fmadd.s $fa7, $ft0, $fa5, $fa7 fmadd.s $fa7, $fs4, $fa4, $fa7 - fst.s $ft0, $sp, 12 # 4-byte Folded Spill - fmul.s $ft0, $fa6, $fa3 + fst.s $ft0, $sp, 16 # 4-byte Folded Spill + fmul.s $ft0, $fa6, $ft2 fmadd.s $ft0, $fs7, $fa5, $ft0 - fmadd.s $ft0, $ft2, $fa4, $ft0 - fmul.s $ft2, $fa6, $fs6 + fmadd.s $ft0, $fs1, $fa4, $ft0 + fst.s $fs1, $sp, 8 # 4-byte Folded Spill + fmov.s $fs1, $ft2 + fmul.s $ft2, $fa6, $fa3 fmadd.s $ft2, $ft8, $fa5, $ft2 - fmadd.s $ft2, $fs1, $fa4, $ft2 + fmadd.s $ft2, $fs6, $fa4, $ft2 fmul.s $ft0, $fa6, $ft0 - fst.s $fs1, $sp, 8 # 4-byte Folded Spill - fmov.s $fs1, $ft6 + fst.s $ft6, $sp, 12 # 4-byte Folded Spill fmov.s $ft6, $ft3 fld.s $ft3, $a3, 296 fmadd.s $fa7, $fa5, $fa7, $ft0 @@ -3566,13 +3532,13 @@ _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_: fmov.s $ft8, $fs5 fmadd.s $ft3, $ft12, $fa4, $ft3 fmul.s $fa6, $fa6, $ft2 - fld.s $ft2, $sp, 40 # 4-byte Folded Reload + fmov.s $ft2, $fs1 + fld.s $fs1, $sp, 8 # 4-byte Folded Reload fmadd.s $fa5, $fa5, $ft0, $fa6 - fld.s $ft0, $sp, 12 # 4-byte Folded Reload + fld.s $ft0, $sp, 16 # 4-byte Folded Reload fmadd.s $fa4, $fa4, $ft3, $fa5 fmov.s $ft3, $ft6 - fmov.s $ft6, $fs1 - fld.s $fs1, $sp, 8 # 4-byte Folded Reload + fld.s $ft6, $sp, 12 # 4-byte Folded Reload fld.s $fa5, $a0, 312 fld.s $fa6, $a0, 308 fadd.s $fa4, $fa7, $fa4 @@ -3583,12 +3549,12 @@ _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_: fmul.s $fa4, $ft15, $fs7 fmadd.s $fa4, $ft0, $ft14, $fa4 fmadd.s $fa4, $ft8, $ft13, $fa4 - fmul.s $fa5, $ft15, $fa3 + fmul.s $fa5, $ft15, $ft2 fmadd.s $fa5, $fs3, $ft14, $fa5 - fmadd.s $fa5, $fs6, $ft13, $fa5 - fmul.s $fa3, $ft15, $ft2 + fmadd.s $fa5, $fa3, $ft13, $fa5 + fmul.s $fa3, $ft15, $fs1 fmadd.s $fa3, $fs4, $ft14, $fa3 - fmadd.s $fa3, $fs1, $ft13, $fa3 + fmadd.s $fa3, $fs6, $ft13, $fa3 fld.s $fa6, $a2, 0 fneg.s $ft8, $ft1 fmul.s $fa7, $fs0, $ft8 @@ -3890,14 +3856,14 @@ _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_: fcmp.cule.s $fcc0, $ft1, $fa3 bcnez $fcc0, .LBB17_21 # %bb.20: - fld.s $fa4, $sp, 36 # 4-byte Folded Reload - fld.s $fa5, $sp, 24 # 4-byte Folded Reload + fld.s $fa4, $sp, 40 # 4-byte Folded Reload + fld.s $fa5, $sp, 28 # 4-byte Folded Reload fsub.s $fa4, $fa4, $fa5 - fld.s $fa5, $sp, 32 # 4-byte Folded Reload - fld.s $fa6, $sp, 20 # 4-byte Folded Reload + fld.s $fa5, $sp, 36 # 4-byte Folded Reload + fld.s $fa6, $sp, 24 # 4-byte Folded Reload fsub.s $fa5, $fa5, $fa6 - fld.s $fa6, $sp, 28 # 4-byte Folded Reload - fld.s $fa7, $sp, 16 # 4-byte Folded Reload + fld.s $fa6, $sp, 32 # 4-byte Folded Reload + fld.s $fa7, $sp, 20 # 4-byte Folded Reload fsub.s $fa6, $fa6, $fa7 fmul.s $fa5, $fa2, $fa5 fld.s $fa7, $a0, 1136 @@ -4029,18 +3995,8 @@ _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_: .Lfunc_end17: .size _ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_, .Lfunc_end17-_ZN18btSliderConstraint18solveConstraintIntER11btRigidBodyR12btSolverBodyS1_S3_ # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z21btAdjustAngleToLimitsfff -.LCPI18_0: - .word 0x40c90fdb # float 6.28318548 -.LCPI18_1: - .word 0xc0490fdb # float -3.14159274 -.LCPI18_2: - .word 0x40490fdb # float 3.14159274 -.LCPI18_3: - .word 0xc0c90fdb # float -6.28318548 .section .text._Z21btAdjustAngleToLimitsfff,"axG",@progbits,_Z21btAdjustAngleToLimitsfff,comdat - .weak _Z21btAdjustAngleToLimitsfff + .weak _Z21btAdjustAngleToLimitsfff # -- Begin function _Z21btAdjustAngleToLimitsfff .p2align 5 .type _Z21btAdjustAngleToLimitsfff,@function _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff @@ -4073,14 +4029,17 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff # %bb.2: fmov.s $fs4, $fa2 fsub.s $fa0, $fa1, $fs0 - pcalau12i $a0, %pc_hi20(.LCPI18_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI18_0) + lu12i.w $a0, 265360 + ori $a0, $a0, 4059 + movgr2fr.w $fs1, $a0 fmov.s $fa1, $fs1 pcaddu18i $ra, %call36(fmodf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI18_1) - fld.s $fs3, $a0, %pc_lo12(.LCPI18_1) fmov.s $fs2, $fa0 + lu12i.w $a0, -260976 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fs3, $a0 fcmp.cule.s $fcc0, $fs3, $fa0 bcnez $fcc0, .LBB18_7 # %bb.3: @@ -4092,29 +4051,35 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff # %bb.5: fmov.s $fs4, $fa1 fsub.s $fa0, $fs0, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI18_0) - fld.s $fs2, $a0, %pc_lo12(.LCPI18_0) + lu12i.w $a0, 265360 + ori $a0, $a0, 4059 + movgr2fr.w $fs2, $a0 fmov.s $fa1, $fs2 pcaddu18i $ra, %call36(fmodf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI18_1) - fld.s $fs3, $a0, %pc_lo12(.LCPI18_1) fmov.s $fs1, $fa0 + lu12i.w $a0, -260976 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fs3, $a0 fcmp.cule.s $fcc0, $fs3, $fa0 - pcalau12i $s0, %pc_hi20(.LCPI18_2) - pcalau12i $fp, %pc_hi20(.LCPI18_3) + lu12i.w $s0, 263312 + lu12i.w $fp, -258928 bcnez $fcc0, .LBB18_14 # %bb.6: fadd.s $fs1, $fs1, $fs2 b .LBB18_16 .LBB18_7: - pcalau12i $a0, %pc_hi20(.LCPI18_2) - fld.s $fa0, $a0, %pc_lo12(.LCPI18_2) + lu12i.w $a0, 263312 + ori $a0, $a0, 4059 + movgr2fr.w $fa0, $a0 fcmp.cule.s $fcc0, $fs2, $fa0 bcnez $fcc0, .LBB18_9 # %bb.8: - pcalau12i $a0, %pc_hi20(.LCPI18_3) - fld.s $fa0, $a0, %pc_lo12(.LCPI18_3) + lu12i.w $a0, -258928 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 fadd.s $fs2, $fs2, $fa0 .LBB18_9: # %_Z16btNormalizeAnglef.exit fsub.s $fa0, $fs4, $fs0 @@ -4127,13 +4092,16 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff fadd.s $fa0, $fa0, $fs1 b .LBB18_13 .LBB18_11: - pcalau12i $a0, %pc_hi20(.LCPI18_2) - fld.s $fa1, $a0, %pc_lo12(.LCPI18_2) + lu12i.w $a0, 263312 + ori $a0, $a0, 4059 + movgr2fr.w $fa1, $a0 fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB18_13 # %bb.12: - pcalau12i $a0, %pc_hi20(.LCPI18_3) - fld.s $fa1, $a0, %pc_lo12(.LCPI18_3) + lu12i.w $a0, -258928 + ori $a0, $a0, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 fadd.s $fa0, $fa0, $fa1 .LBB18_13: # %_Z16btNormalizeAnglef.exit29 fabs.s $fa0, $fa0 @@ -4142,11 +4110,14 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff fsel $fs0, $fa1, $fs0, $fcc0 b .LBB18_21 .LBB18_14: - fld.s $fa0, $s0, %pc_lo12(.LCPI18_2) + ori $a0, $s0, 4059 + movgr2fr.w $fa0, $a0 fcmp.cule.s $fcc0, $fs1, $fa0 bcnez $fcc0, .LBB18_16 # %bb.15: - fld.s $fa0, $fp, %pc_lo12(.LCPI18_3) + ori $a0, $fp, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 fadd.s $fs1, $fs1, $fa0 .LBB18_16: # %_Z16btNormalizeAnglef.exit31 fsub.s $fa0, $fs0, $fs4 @@ -4159,15 +4130,20 @@ _Z21btAdjustAngleToLimitsfff: # @_Z21btAdjustAngleToLimitsfff fadd.s $fa0, $fa0, $fs2 b .LBB18_20 .LBB18_18: - fld.s $fa1, $s0, %pc_lo12(.LCPI18_2) + ori $a0, $s0, 4059 + movgr2fr.w $fa1, $a0 fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB18_20 # %bb.19: - fld.s $fa1, $fp, %pc_lo12(.LCPI18_3) + ori $a0, $fp, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 fadd.s $fa0, $fa0, $fa1 .LBB18_20: # %_Z16btNormalizeAnglef.exit33 - fld.s $fa1, $fp, %pc_lo12(.LCPI18_3) fabs.s $fa0, $fa0 + ori $a0, $fp, 4059 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 fadd.s $fa1, $fs0, $fa1 fcmp.clt.s $fcc0, $fa0, $fs1 fsel $fs0, $fs0, $fa1, $fcc0 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBody.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBody.s index 083e899a..a81f0578 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBody.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBody.s @@ -8328,12 +8328,7 @@ _ZN10btSoftBody9transformERK11btTransform: # @_ZN10btSoftBody9transformERK11btTr .size _ZN10btSoftBody9transformERK11btTransform, .Lfunc_end62-_ZN10btSoftBody9transformERK11btTransform .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN10btSoftBody13updateNormalsEv -.LCPI63_0: - .word 0x34000000 # float 1.1920929E-7 - .text - .globl _ZN10btSoftBody13updateNormalsEv + .globl _ZN10btSoftBody13updateNormalsEv # -- Begin function _ZN10btSoftBody13updateNormalsEv .p2align 5 .type _ZN10btSoftBody13updateNormalsEv,@function _ZN10btSoftBody13updateNormalsEv: # @_ZN10btSoftBody13updateNormalsEv @@ -8438,9 +8433,9 @@ _ZN10btSoftBody13updateNormalsEv: # @_ZN10btSoftBody13updateNormalsEv blez $a1, .LBB63_11 # %bb.7: # %.lr.ph66 ld.d $a0, $a0, 832 - pcalau12i $a2, %pc_hi20(.LCPI63_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI63_0) addi.d $a0, $a0, 88 + lu12i.w $a2, 212992 + movgr2fr.w $fa0, $a2 b .LBB63_9 .p2align 4, , 16 .LBB63_8: # in Loop: Header=BB63_9 Depth=1 @@ -8982,12 +8977,7 @@ _ZN10btSoftBody5scaleERK9btVector3: # @_ZN10btSoftBody5scaleERK9btVector3 .size _ZN10btSoftBody5scaleERK9btVector3, .Lfunc_end67-_ZN10btSoftBody5scaleERK9btVector3 .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN10btSoftBody7setPoseEbb -.LCPI68_0: - .word 0x447a0000 # float 1000 - .text - .globl _ZN10btSoftBody7setPoseEbb + .globl _ZN10btSoftBody7setPoseEbb # -- Begin function _ZN10btSoftBody7setPoseEbb .p2align 5 .type _ZN10btSoftBody7setPoseEbb,@function _ZN10btSoftBody7setPoseEbb: # @_ZN10btSoftBody7setPoseEbb @@ -9111,13 +9101,13 @@ _ZN10btSoftBody7setPoseEbb: # @_ZN10btSoftBody7setPoseEbb blez $s2, .LBB68_41 .LBB68_17: # %.lr.ph movgr2fr.w $fa0, $s3 - pcalau12i $a0, %pc_hi20(.LCPI68_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI68_0) - ld.d $a0, $fp, 832 ffint.s.w $fa0, $fa0 + lu12i.w $a0, 280480 + ld.d $a1, $fp, 832 fmul.s $fa0, $fa7, $fa0 + movgr2fr.w $fa1, $a0 fmul.s $fa0, $fa0, $fa1 - addi.d $a0, $a0, 96 + addi.d $a0, $a1, 96 movgr2fr.w $fs0, $zero move $a1, $s2 .p2align 4, , 16 @@ -11936,14 +11926,8 @@ GCC_except_table86: .Lttbase6: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN10btSoftBody16generateClustersEii -.LCPI87_0: - .word 0x3d800000 # float 0.0625 -.LCPI87_1: - .word 0x34000000 # float 1.1920929E-7 .text - .globl _ZN10btSoftBody16generateClustersEii + .globl _ZN10btSoftBody16generateClustersEii # -- Begin function _ZN10btSoftBody16generateClustersEii .p2align 5 .type _ZN10btSoftBody16generateClustersEii,@function _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii @@ -11952,27 +11936,27 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception9 # %bb.0: - addi.d $sp, $sp, -256 - .cfi_def_cfa_offset 256 - st.d $ra, $sp, 248 # 8-byte Folded Spill - st.d $fp, $sp, 240 # 8-byte Folded Spill - st.d $s0, $sp, 232 # 8-byte Folded Spill - st.d $s1, $sp, 224 # 8-byte Folded Spill - st.d $s2, $sp, 216 # 8-byte Folded Spill - st.d $s3, $sp, 208 # 8-byte Folded Spill - st.d $s4, $sp, 200 # 8-byte Folded Spill - st.d $s5, $sp, 192 # 8-byte Folded Spill - st.d $s6, $sp, 184 # 8-byte Folded Spill - st.d $s7, $sp, 176 # 8-byte Folded Spill - st.d $s8, $sp, 168 # 8-byte Folded Spill - fst.d $fs0, $sp, 160 # 8-byte Folded Spill - fst.d $fs1, $sp, 152 # 8-byte Folded Spill - fst.d $fs2, $sp, 144 # 8-byte Folded Spill - fst.d $fs3, $sp, 136 # 8-byte Folded Spill - fst.d $fs4, $sp, 128 # 8-byte Folded Spill - fst.d $fs5, $sp, 120 # 8-byte Folded Spill - fst.d $fs6, $sp, 112 # 8-byte Folded Spill - fst.d $fs7, $sp, 104 # 8-byte Folded Spill + addi.d $sp, $sp, -272 + .cfi_def_cfa_offset 272 + st.d $ra, $sp, 264 # 8-byte Folded Spill + st.d $fp, $sp, 256 # 8-byte Folded Spill + st.d $s0, $sp, 248 # 8-byte Folded Spill + st.d $s1, $sp, 240 # 8-byte Folded Spill + st.d $s2, $sp, 232 # 8-byte Folded Spill + st.d $s3, $sp, 224 # 8-byte Folded Spill + st.d $s4, $sp, 216 # 8-byte Folded Spill + st.d $s5, $sp, 208 # 8-byte Folded Spill + st.d $s6, $sp, 200 # 8-byte Folded Spill + st.d $s7, $sp, 192 # 8-byte Folded Spill + st.d $s8, $sp, 184 # 8-byte Folded Spill + fst.d $fs0, $sp, 176 # 8-byte Folded Spill + fst.d $fs1, $sp, 168 # 8-byte Folded Spill + fst.d $fs2, $sp, 160 # 8-byte Folded Spill + fst.d $fs3, $sp, 152 # 8-byte Folded Spill + fst.d $fs4, $sp, 144 # 8-byte Folded Spill + fst.d $fs5, $sp, 136 # 8-byte Folded Spill + fst.d $fs6, $sp, 128 # 8-byte Folded Spill + fst.d $fs7, $sp, 120 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -12058,7 +12042,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii masknez $a0, $a0, $a1 maskeqz $a1, $s0, $a1 or $s5, $a1, $a0 - st.d $s4, $sp, 80 # 8-byte Folded Spill + st.d $s4, $sp, 96 # 8-byte Folded Spill bge $s1, $s5, .LBB87_23 # %bb.12: ld.w $a0, $s4, 1344 @@ -12190,7 +12174,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii # %bb.33: # %._crit_edge blez $s5, .LBB87_36 # %bb.34: # %.preheader640 - st.d $s5, $sp, 64 # 8-byte Folded Spill + st.d $s5, $sp, 72 # 8-byte Folded Spill st.d $s3, $sp, 8 # 8-byte Folded Spill ld.w $a0, $s4, 820 blez $a0, .LBB87_82 @@ -12475,7 +12459,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii fmul.s $fs1, $fs1, $fa0 fmul.s $fs2, $fs2, $fa0 fmul.s $fs0, $fs0, $fa0 - ld.d $fp, $sp, 64 # 8-byte Folded Reload + ld.d $fp, $sp, 72 # 8-byte Folded Reload slli.d $a0, $fp, 4 ori $a1, $zero, 16 pcaddu18i $ra, %call36(_Z22btAlignedAllocInternalmi) @@ -12534,13 +12518,16 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii maskeqz $a1, $fp, $a1 or $a0, $a1, $a0 addi.d $a1, $s0, 24 - st.d $a1, $sp, 56 # 8-byte Folded Spill + st.d $a1, $sp, 64 # 8-byte Folded Spill addi.d $a0, $a0, -1 - st.d $a0, $sp, 48 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI87_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI87_0) + st.d $a0, $sp, 56 # 8-byte Folded Spill + lu12i.w $a0, 251904 + movgr2fr.w $fa0, $a0 fst.s $fa0, $sp, 20 # 4-byte Folded Spill movgr2fr.w $fs1, $zero + lu12i.w $a0, 212992 + movgr2fr.w $fa0, $a0 + fst.s $fa0, $sp, 80 # 4-byte Folded Spill b .LBB87_92 .p2align 4, , 16 .LBB87_91: # %._crit_edge698 @@ -12569,7 +12556,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii fcmp.clt.s $fcc0, $fa1, $fa0 fsel $fa0, $fa0, $fa1, $fcc0 vldi $vr1, -1280 - fsub.s $fs2, $fa1, $fa0 + fsub.s $fs3, $fa1, $fa0 addi.w $s3, $s3, 1 b .LBB87_97 .LBB87_93: # %_ZN20btAlignedObjectArrayIPN10btSoftBody4NodeEE10deallocateEv.exit.i.i255 @@ -12588,15 +12575,14 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii jirl $ra, $ra, 0 .LBB87_95: # %.loopexit637 # in Loop: Header=BB87_97 Depth=2 - fsub.s $fa0, $fs6, $fs3 - fsub.s $fa1, $fs7, $fs4 - fsub.s $fa2, $fs0, $fs5 - pcalau12i $a0, %pc_hi20(.LCPI87_1) - fld.s $fa3, $a0, %pc_lo12(.LCPI87_1) + fsub.s $fa0, $fs7, $fs4 + fsub.s $fa1, $fs0, $fs5 + fsub.s $fa2, $fs2, $fs6 fmul.s $fa1, $fa1, $fa1 fmadd.s $fa0, $fa0, $fa0, $fa1 fmadd.s $fa0, $fa2, $fa2, $fa0 - fcmp.clt.s $fcc0, $fa3, $fa0 + fld.s $fa1, $sp, 80 # 4-byte Folded Reload + fcmp.clt.s $fcc0, $fa1, $fa0 st.w $zero, $fp, 4 movcf2gr $a0, $fcc0 or $s5, $s5, $a0 @@ -12650,22 +12636,22 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii fmul.s $fa1, $fa1, $fa3 alsl.d $a1, $s1, $s0, 4 slli.d $a2, $s1, 4 - fldx.s $fs3, $s0, $a2 - fld.s $fs4, $a1, 4 - fld.s $fs5, $a1, 8 + fldx.s $fs4, $s0, $a2 + fld.s $fs5, $a1, 4 + fld.s $fs6, $a1, 8 fmul.s $fa0, $fa0, $fa3 - fsub.s $fa2, $fa2, $fs3 - fsub.s $fa1, $fa1, $fs4 - fsub.s $fa0, $fa0, $fs5 - fmul.s $fa2, $fs2, $fa2 - fmul.s $fa1, $fs2, $fa1 - fmul.s $fa0, $fs2, $fa0 - fadd.s $fs6, $fs3, $fa2 - fadd.s $fs7, $fs4, $fa1 - fadd.s $fs0, $fs5, $fa0 - fstx.s $fs6, $s0, $a2 - fst.s $fs7, $a1, 4 - fst.s $fs0, $a1, 8 + fsub.s $fa2, $fa2, $fs4 + fsub.s $fa1, $fa1, $fs5 + fsub.s $fa0, $fa0, $fs6 + fmul.s $fa2, $fs3, $fa2 + fmul.s $fa1, $fs3, $fa1 + fmul.s $fa0, $fs3, $fa0 + fadd.s $fs7, $fs4, $fa2 + fadd.s $fs0, $fs5, $fa1 + fadd.s $fs2, $fs6, $fa0 + fstx.s $fs7, $s0, $a2 + fst.s $fs0, $a1, 4 + fst.s $fs2, $a1, 8 st.w $zero, $a1, 12 ld.d $a1, $s4, 1352 ldx.d $fp, $a1, $a0 @@ -12701,7 +12687,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii b .LBB87_110 .p2align 4, , 16 .LBB87_108: # in Loop: Header=BB87_110 Depth=2 - ld.d $s4, $sp, 80 # 8-byte Folded Reload + ld.d $s4, $sp, 96 # 8-byte Folded Reload .LBB87_109: # in Loop: Header=BB87_110 Depth=2 ld.d $a2, $s3, 16 slli.d $a3, $a1, 3 @@ -12720,7 +12706,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii ori $a2, $zero, 120 mul.d $a2, $s8, $a2 add.d $s1, $a1, $a2 - ld.d $a1, $sp, 64 # 8-byte Folded Reload + ld.d $a1, $sp, 72 # 8-byte Folded Reload bne $a1, $s6, .LBB87_112 # %bb.111: # in Loop: Header=BB87_110 Depth=2 move $a1, $zero @@ -12744,8 +12730,8 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii fabs.s $fa4, $fa4 fadd.s $fa3, $fa3, $fa4 ori $a2, $zero, 1 - ld.d $a3, $sp, 48 # 8-byte Folded Reload - ld.d $a4, $sp, 56 # 8-byte Folded Reload + ld.d $a3, $sp, 56 # 8-byte Folded Reload + ld.d $a4, $sp, 64 # 8-byte Folded Reload .p2align 4, , 16 .LBB87_113: # %.lr.ph693 # Parent Loop BB87_92 Depth=1 @@ -12878,7 +12864,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii ld.w $a1, $s3, 4 .LBB87_132: # %_ZN20btAlignedObjectArrayIPN10btSoftBody4NodeEE10deallocateEv.exit.i.i274 # in Loop: Header=BB87_110 Depth=2 - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 96 # 8-byte Folded Reload ld.w $a0, $a2, 820 st.b $s6, $s3, 24 st.d $s2, $s3, 16 @@ -12960,20 +12946,20 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii ori $a0, $a0, 3823 lu32i.d $a0, -69906 lu52i.d $a0, $a0, -274 - st.d $a0, $sp, 48 # 8-byte Folded Spill - addi.d $s5, $sp, 92 + st.d $a0, $sp, 64 # 8-byte Folded Spill + addi.d $s3, $sp, 108 lu12i.w $a0, -349526 ori $a5, $a0, 2731 lu32i.d $a5, 0 ori $a6, $zero, 3 ori $a7, $zero, 120 - st.d $a5, $sp, 64 # 8-byte Folded Spill + st.d $a5, $sp, 80 # 8-byte Folded Spill b .LBB87_146 .p2align 4, , 16 .LBB87_145: # in Loop: Header=BB87_146 Depth=1 - ld.d $s4, $sp, 80 # 8-byte Folded Reload + ld.d $s4, $sp, 96 # 8-byte Folded Reload ld.w $a0, $s4, 884 - ld.d $a2, $sp, 56 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload addi.d $a2, $a2, 1 bge $a2, $a0, .LBB87_200 .LBB87_146: # =>This Loop Header: Depth=1 @@ -12986,7 +12972,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii # Child Loop BB87_195 Depth 3 ld.d $a0, $s4, 896 slli.d $a1, $a2, 6 - st.d $a2, $sp, 56 # 8-byte Folded Spill + st.d $a2, $sp, 72 # 8-byte Folded Spill alsl.d $a1, $a2, $a1, 3 add.d $a0, $a0, $a1 ld.d $a1, $a0, 16 @@ -12995,18 +12981,18 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii sub.d $a1, $a1, $a2 ld.d $a3, $a0, 24 srli.d $a1, $a1, 3 - ld.d $a4, $sp, 48 # 8-byte Folded Reload + ld.d $a4, $sp, 64 # 8-byte Folded Reload mul.d $a1, $a1, $a4 - st.w $a1, $sp, 92 + st.w $a1, $sp, 108 sub.d $a1, $a3, $a2 ld.d $a0, $a0, 32 srli.d $a1, $a1, 3 mul.d $a1, $a1, $a4 - st.w $a1, $sp, 96 + st.w $a1, $sp, 112 sub.d $a0, $a0, $a2 srli.d $a0, $a0, 3 mul.d $a0, $a0, $a4 - st.w $a0, $sp, 100 + st.w $a0, $sp, 116 b .LBB87_152 .LBB87_147: # in Loop: Header=BB87_152 Depth=2 .Ltmp277: # EH_LABEL @@ -13015,22 +13001,22 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii .Ltmp278: # EH_LABEL # %bb.148: # %.noexc336.1 # in Loop: Header=BB87_152 Depth=2 - ld.w $a1, $s3, 4 + ld.w $a1, $s4, 4 .LBB87_149: # %_ZN20btAlignedObjectArrayIPN10btSoftBody4NodeEE10deallocateEv.exit.i.i323.1 # in Loop: Header=BB87_152 Depth=2 ori $a0, $zero, 1 - st.b $a0, $s3, 24 - st.d $s2, $s3, 16 - st.w $fp, $s3, 8 - ld.d $a5, $sp, 64 # 8-byte Folded Reload + st.b $a0, $s4, 24 + st.d $s2, $s4, 16 + st.w $fp, $s4, 8 + ld.d $a5, $sp, 80 # 8-byte Folded Reload ori $a6, $zero, 3 ori $a7, $zero, 120 .LBB87_150: # in Loop: Header=BB87_152 Depth=2 - ld.d $a0, $s3, 16 + ld.d $a0, $s4, 16 slli.d $a2, $a1, 3 - stx.d $s4, $a0, $a2 + stx.d $s5, $a0, $a2 addi.d $a0, $a1, 1 - st.w $a0, $s3, 4 + st.w $a0, $s4, 4 .LBB87_151: # in Loop: Header=BB87_152 Depth=2 beq $s7, $a6, .LBB87_145 .LBB87_152: # Parent Loop BB87_146 Depth=1 @@ -13043,23 +13029,23 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii # Child Loop BB87_195 Depth 3 move $s4, $s7 slli.d $a0, $s7, 2 - ldx.w $a1, $a0, $s5 + ldx.w $a1, $a0, $s3 addi.d $s7, $s7, 1 addi.d $a0, $s7, -3 sltui $a0, $a0, 1 bstrpick.d $a2, $s7, 31, 0 masknez $a0, $a2, $a0 slli.d $a0, $a0, 2 - ldx.w $a0, $a0, $s5 + ldx.w $a0, $a0, $s3 slli.d $a1, $a1, 2 - ldx.w $s3, $s1, $a1 + ldx.w $s5, $s1, $a1 slli.d $a1, $a0, 2 ldx.w $a1, $s1, $a1 - beq $a1, $s3, .LBB87_178 + beq $a1, $s5, .LBB87_178 # %bb.153: # in Loop: Header=BB87_152 Depth=2 - ld.d $a3, $sp, 80 # 8-byte Folded Reload + ld.d $a3, $sp, 96 # 8-byte Folded Reload ld.d $a1, $a3, 1352 - slli.d $a2, $s3, 3 + slli.d $a2, $s5, 3 ldx.d $s8, $a1, $a2 ld.d $a2, $a3, 832 ld.w $a1, $s8, 4 @@ -13194,7 +13180,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii st.b $a0, $s8, 24 st.d $s2, $s8, 16 st.w $fp, $s8, 8 - ld.d $a5, $sp, 64 # 8-byte Folded Reload + ld.d $a5, $sp, 80 # 8-byte Folded Reload ori $a6, $zero, 3 ori $a7, $zero, 120 .LBB87_177: # in Loop: Header=BB87_152 Depth=2 @@ -13211,23 +13197,23 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii alsl.d $a1, $a1, $a1, 1 sub.w $a0, $a0, $a1 slli.d $a0, $a0, 2 - ldx.w $a0, $a0, $s5 + ldx.w $a0, $a0, $s3 slli.d $a1, $a0, 2 ldx.w $a1, $s1, $a1 - beq $a1, $s3, .LBB87_151 + beq $a1, $s5, .LBB87_151 # %bb.179: # in Loop: Header=BB87_152 Depth=2 - ld.d $a3, $sp, 80 # 8-byte Folded Reload + ld.d $a3, $sp, 96 # 8-byte Folded Reload ld.d $a1, $a3, 1352 - slli.d $a2, $s3, 3 - ldx.d $s3, $a1, $a2 + slli.d $a2, $s5, 3 + ldx.d $s4, $a1, $a2 ld.d $a2, $a3, 832 - ld.w $a1, $s3, 4 + ld.w $a1, $s4, 4 mul.d $a0, $a0, $a7 - add.d $s4, $a2, $a0 + add.d $s5, $a2, $a0 blez $a1, .LBB87_184 # %bb.180: # %.lr.ph.i310.1 # in Loop: Header=BB87_152 Depth=2 - ld.d $a0, $s3, 16 + ld.d $a0, $s4, 16 move $a3, $a1 move $a2, $a1 .p2align 4, , 16 @@ -13235,7 +13221,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii # Parent Loop BB87_152 Depth=2 # => This Inner Loop Header: Depth=3 ld.d $a4, $a0, 0 - beq $a4, $s4, .LBB87_183 + beq $a4, $s5, .LBB87_183 # %bb.182: # in Loop: Header=BB87_181 Depth=3 addi.w $a2, $a2, -1 addi.d $a3, $a3, -1 @@ -13248,7 +13234,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii .p2align 4, , 16 .LBB87_184: # %_ZNK20btAlignedObjectArrayIPN10btSoftBody4NodeEE16findLinearSearchERKS2_.exit.thread.1 # in Loop: Header=BB87_152 Depth=2 - ld.w $a0, $s3, 8 + ld.w $a0, $s4, 8 bne $a1, $a0, .LBB87_150 # %bb.185: # in Loop: Header=BB87_152 Depth=2 sltui $a0, $a1, 1 @@ -13270,8 +13256,8 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii # %bb.188: # %.noexc335.1 # in Loop: Header=BB87_152 Depth=2 move $s2, $a0 - ld.w $a1, $s3, 4 - ld.d $a0, $s3, 16 + ld.w $a1, $s4, 4 + ld.d $a0, $s4, 16 blez $a1, .LBB87_198 .LBB87_189: # %.lr.ph.i.i.i326.1 # in Loop: Header=BB87_152 Depth=2 @@ -13324,19 +13310,19 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii bnez $a3, .LBB87_195 .LBB87_196: # %_ZNK20btAlignedObjectArrayIPN10btSoftBody4NodeEE4copyEiiPS2_.exit.thread.i.i331.1 # in Loop: Header=BB87_152 Depth=2 - ld.bu $a2, $s3, 24 + ld.bu $a2, $s4, 24 bnez $a2, .LBB87_147 b .LBB87_149 .LBB87_197: # in Loop: Header=BB87_152 Depth=2 move $s2, $zero - ld.d $a0, $s3, 16 + ld.d $a0, $s4, 16 bgtz $a1, .LBB87_189 .LBB87_198: # %_ZNK20btAlignedObjectArrayIPN10btSoftBody4NodeEE4copyEiiPS2_.exit.i.i320.1 # in Loop: Header=BB87_152 Depth=2 beqz $a0, .LBB87_149 # %bb.199: # %_ZNK20btAlignedObjectArrayIPN10btSoftBody4NodeEE4copyEiiPS2_.exit.i.i320.1 # in Loop: Header=BB87_152 Depth=2 - ld.b $a2, $s3, 24 + ld.b $a2, $s4, 24 andi $a2, $a2, 1 bnez $a2, .LBB87_147 b .LBB87_149 @@ -13366,7 +13352,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii vst $vr0, $s2, 392 st.d $fp, $s2, 408 st.h $zero, $s2, 416 - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload ld.w $s5, $a1, 820 st.b $a0, $s2, 56 st.d $zero, $s2, 48 @@ -13455,7 +13441,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii move $s0, $zero ori $s1, $zero, 1 vrepli.b $vr0, 0 - vst $vr0, $sp, 64 # 16-byte Folded Spill + vst $vr0, $sp, 80 # 16-byte Folded Spill lu12i.w $a0, 273536 lu32i.d $a0, 251658 lu52i.d $s2, $a0, 962 @@ -13471,7 +13457,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii st.d $zero, $a0, 36 st.d $zero, $a0, 68 st.d $zero, $a0, 384 - vld $vr0, $sp, 64 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload vst $vr0, $a0, 392 st.d $s2, $a0, 408 st.h $s3, $a0, 416 @@ -13631,7 +13617,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii ld.w $a2, $s4, 4 .LBB87_244: # %_ZN20btAlignedObjectArrayIPN10btSoftBody4NodeEE10deallocateEv.exit.i.i440 # in Loop: Header=BB87_226 Depth=1 - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload ld.d $a0, $a1, 1352 ldx.d $s7, $a0, $s6 st.w $fp, $s4, 8 @@ -13669,7 +13655,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii jirl $ra, $ra, 0 ld.w $a1, $s7, 4 move $s0, $a0 - ld.d $s4, $sp, 80 # 8-byte Folded Reload + ld.d $s4, $sp, 96 # 8-byte Folded Reload ld.d $a0, $s7, 16 blez $a1, .LBB87_260 .LBB87_249: # %.lr.ph.i.i.i443.1 @@ -13737,7 +13723,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii b .LBB87_264 .LBB87_259: # in Loop: Header=BB87_226 Depth=1 move $s0, $zero - ld.d $s4, $sp, 80 # 8-byte Folded Reload + ld.d $s4, $sp, 96 # 8-byte Folded Reload ld.d $a0, $s7, 16 bgtz $a1, .LBB87_249 .LBB87_260: # %_ZNK20btAlignedObjectArrayIPN10btSoftBody4NodeEE4copyEiiPS2_.exit.i.i437.1 @@ -13873,7 +13859,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii ld.w $a1, $s8, 4 .LBB87_282: # %_ZN20btAlignedObjectArrayIPN10btSoftBody4NodeEE10deallocateEv.exit.i.i440.2 # in Loop: Header=BB87_226 Depth=1 - ld.d $a3, $sp, 80 # 8-byte Folded Reload + ld.d $a3, $sp, 96 # 8-byte Folded Reload ld.d $a0, $a3, 1352 ldx.d $s3, $a0, $s6 move $a2, $a1 @@ -13893,7 +13879,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii ld.w $a0, $s3, 4 slli.d $a2, $a2, 3 stx.d $a4, $a3, $a2 - ld.d $s4, $sp, 80 # 8-byte Folded Reload + ld.d $s4, $sp, 96 # 8-byte Folded Reload bne $a0, $a1, .LBB87_225 # %bb.284: # in Loop: Header=BB87_226 Depth=1 sltui $a0, $a1, 1 @@ -14011,7 +13997,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii move $s0, $zero ori $s1, $zero, 1 vrepli.b $vr0, 0 - vst $vr0, $sp, 64 # 16-byte Folded Spill + vst $vr0, $sp, 80 # 16-byte Folded Spill lu12i.w $a0, 273536 lu32i.d $a0, 251658 lu52i.d $s2, $a0, 962 @@ -14027,7 +14013,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii st.d $zero, $a0, 36 st.d $zero, $a0, 68 st.d $zero, $a0, 384 - vld $vr0, $sp, 64 # 16-byte Folded Reload + vld $vr0, $sp, 80 # 16-byte Folded Reload vst $vr0, $a0, 392 st.d $s2, $a0, 408 st.h $s3, $a0, 416 @@ -14304,7 +14290,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii ld.w $a1, $s6, 4 .LBB87_349: # %_ZN20btAlignedObjectArrayIPN10btSoftBody4NodeEE10deallocateEv.exit.i.i485.1 # in Loop: Header=BB87_312 Depth=1 - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 96 # 8-byte Folded Reload ld.d $a0, $a2, 1352 ldx.d $s7, $a0, $s3 move $a0, $a1 @@ -14324,7 +14310,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii ld.w $a2, $s7, 4 slli.d $a0, $a0, 3 stx.d $a4, $a3, $a0 - ld.d $s4, $sp, 80 # 8-byte Folded Reload + ld.d $s4, $sp, 96 # 8-byte Folded Reload bne $a2, $a1, .LBB87_311 # %bb.351: # in Loop: Header=BB87_312 Depth=1 sltui $a0, $a1, 1 @@ -14410,7 +14396,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii bnez $a2, .LBB87_308 b .LBB87_309 .LBB87_365: # %_ZNK20btAlignedObjectArrayIPN10btSoftBody4NodeEE4copyEiiPS2_.exit.i - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload beqz $a0, .LBB87_369 .LBB87_366: # %_ZNK20btAlignedObjectArrayIPN10btSoftBody4NodeEE4copyEiiPS2_.exit.thread.i ld.bu $a1, $s2, 24 @@ -14422,7 +14408,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii .Ltmp285: # EH_LABEL .LBB87_368: # %.noexc340 st.d $zero, $s2, 16 - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload .LBB87_369: # %_ZN20btAlignedObjectArrayIPN10btSoftBody4NodeEE7reserveEi.exit ld.w $a0, $a1, 820 ori $s4, $zero, 1 @@ -14438,7 +14424,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii .p2align 4, , 16 .LBB87_371: # %_ZN20btAlignedObjectArrayIPN10btSoftBody4NodeEE10deallocateEv.exit.i.i371 # in Loop: Header=BB87_373 Depth=1 - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.w $a0, $a0, 820 st.b $s4, $s2, 24 st.d $s3, $s2, 16 @@ -14456,7 +14442,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii .LBB87_373: # =>This Loop Header: Depth=1 # Child Loop BB87_380 Depth 2 # Child Loop BB87_383 Depth 2 - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 96 # 8-byte Folded Reload ld.d $s8, $a2, 832 bne $a1, $s5, .LBB87_372 # %bb.374: # in Loop: Header=BB87_373 Depth=1 @@ -14550,7 +14536,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii ld.w $a1, $s2, 4 b .LBB87_371 .LBB87_389: # %._crit_edge711 - ld.d $s4, $sp, 80 # 8-byte Folded Reload + ld.d $s4, $sp, 96 # 8-byte Folded Reload ld.w $a1, $s4, 1340 ld.w $a0, $s4, 1344 bne $a1, $a0, .LBB87_410 @@ -14573,15 +14559,15 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii .Ltmp292: # EH_LABEL # %bb.393: # %.noexc361 move $s3, $a0 - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.w $a1, $a0, 1340 b .LBB87_396 .LBB87_394: - ld.d $s4, $sp, 80 # 8-byte Folded Reload + ld.d $s4, $sp, 96 # 8-byte Folded Reload b .LBB87_410 .LBB87_395: move $s3, $zero - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload .LBB87_396: # %_ZN20btAlignedObjectArrayIPN10btSoftBody7ClusterEE8allocateEi.exit.i.i344 ld.d $a0, $a0, 1352 blez $a1, .LBB87_405 @@ -14626,14 +14612,14 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii addi.d $a2, $a2, 8 bnez $a3, .LBB87_403 .LBB87_404: # %_ZNK20btAlignedObjectArrayIPN10btSoftBody7ClusterEE4copyEiiPS2_.exit.thread.i.i357 - ld.d $a3, $sp, 80 # 8-byte Folded Reload + ld.d $a3, $sp, 96 # 8-byte Folded Reload ld.bu $a2, $a3, 1360 bnez $a2, .LBB87_407 b .LBB87_409 .LBB87_405: # %_ZNK20btAlignedObjectArrayIPN10btSoftBody7ClusterEE4copyEiiPS2_.exit.i.i346 beqz $a0, .LBB87_469 # %bb.406: # %_ZNK20btAlignedObjectArrayIPN10btSoftBody7ClusterEE4copyEiiPS2_.exit.i.i346 - ld.d $a3, $sp, 80 # 8-byte Folded Reload + ld.d $a3, $sp, 96 # 8-byte Folded Reload ld.b $a2, $a3, 1360 andi $a2, $a2, 1 beqz $a2, .LBB87_409 @@ -14643,7 +14629,7 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii jirl $ra, $ra, 0 .Ltmp294: # EH_LABEL # %bb.408: # %.noexc362 - ld.d $a3, $sp, 80 # 8-byte Folded Reload + ld.d $a3, $sp, 96 # 8-byte Folded Reload ld.w $a1, $a3, 1340 .LBB87_409: # %_ZN20btAlignedObjectArrayIPN10btSoftBody7ClusterEE10deallocateEv.exit.i.i349 ori $a0, $zero, 1 @@ -14959,29 +14945,29 @@ _ZN10btSoftBody16generateClustersEii: # @_ZN10btSoftBody16generateClustersEii bnez $t7, .LBB87_466 b .LBB87_463 .LBB87_468: # %.loopexit - fld.d $fs7, $sp, 104 # 8-byte Folded Reload - fld.d $fs6, $sp, 112 # 8-byte Folded Reload - fld.d $fs5, $sp, 120 # 8-byte Folded Reload - fld.d $fs4, $sp, 128 # 8-byte Folded Reload - fld.d $fs3, $sp, 136 # 8-byte Folded Reload - fld.d $fs2, $sp, 144 # 8-byte Folded Reload - fld.d $fs1, $sp, 152 # 8-byte Folded Reload - fld.d $fs0, $sp, 160 # 8-byte Folded Reload - ld.d $s8, $sp, 168 # 8-byte Folded Reload - ld.d $s7, $sp, 176 # 8-byte Folded Reload - ld.d $s6, $sp, 184 # 8-byte Folded Reload - ld.d $s5, $sp, 192 # 8-byte Folded Reload - ld.d $s4, $sp, 200 # 8-byte Folded Reload - ld.d $s3, $sp, 208 # 8-byte Folded Reload - ld.d $s2, $sp, 216 # 8-byte Folded Reload - ld.d $s1, $sp, 224 # 8-byte Folded Reload - ld.d $s0, $sp, 232 # 8-byte Folded Reload - ld.d $fp, $sp, 240 # 8-byte Folded Reload - ld.d $ra, $sp, 248 # 8-byte Folded Reload - addi.d $sp, $sp, 256 + fld.d $fs7, $sp, 120 # 8-byte Folded Reload + fld.d $fs6, $sp, 128 # 8-byte Folded Reload + fld.d $fs5, $sp, 136 # 8-byte Folded Reload + fld.d $fs4, $sp, 144 # 8-byte Folded Reload + fld.d $fs3, $sp, 152 # 8-byte Folded Reload + fld.d $fs2, $sp, 160 # 8-byte Folded Reload + fld.d $fs1, $sp, 168 # 8-byte Folded Reload + fld.d $fs0, $sp, 176 # 8-byte Folded Reload + ld.d $s8, $sp, 184 # 8-byte Folded Reload + ld.d $s7, $sp, 192 # 8-byte Folded Reload + ld.d $s6, $sp, 200 # 8-byte Folded Reload + ld.d $s5, $sp, 208 # 8-byte Folded Reload + ld.d $s4, $sp, 216 # 8-byte Folded Reload + ld.d $s3, $sp, 224 # 8-byte Folded Reload + ld.d $s2, $sp, 232 # 8-byte Folded Reload + ld.d $s1, $sp, 240 # 8-byte Folded Reload + ld.d $s0, $sp, 248 # 8-byte Folded Reload + ld.d $fp, $sp, 256 # 8-byte Folded Reload + ld.d $ra, $sp, 264 # 8-byte Folded Reload + addi.d $sp, $sp, 272 ret .LBB87_469: - ld.d $a3, $sp, 80 # 8-byte Folded Reload + ld.d $a3, $sp, 96 # 8-byte Folded Reload b .LBB87_409 .LBB87_470: .Ltmp307: # EH_LABEL @@ -15159,31 +15145,28 @@ _ZN20btAlignedObjectArrayI9btVector3ED2Ev: # @_ZN20btAlignedObjectArrayI9btVecto .size _ZN20btAlignedObjectArrayI9btVector3ED2Ev, .Lfunc_end88-_ZN20btAlignedObjectArrayI9btVector3ED2Ev .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN10btSoftBody18initializeClustersEv -.LCPI89_0: - .word 0x5d5e0b6b # float 9.99999984E+17 .text - .globl _ZN10btSoftBody18initializeClustersEv + .globl _ZN10btSoftBody18initializeClustersEv # -- Begin function _ZN10btSoftBody18initializeClustersEv .p2align 5 .type _ZN10btSoftBody18initializeClustersEv,@function _ZN10btSoftBody18initializeClustersEv: # @_ZN10btSoftBody18initializeClustersEv .cfi_startproc # %bb.0: - addi.d $sp, $sp, -128 - .cfi_def_cfa_offset 128 - st.d $ra, $sp, 120 # 8-byte Folded Spill - st.d $fp, $sp, 112 # 8-byte Folded Spill - st.d $s0, $sp, 104 # 8-byte Folded Spill - st.d $s1, $sp, 96 # 8-byte Folded Spill - st.d $s2, $sp, 88 # 8-byte Folded Spill - st.d $s3, $sp, 80 # 8-byte Folded Spill - st.d $s4, $sp, 72 # 8-byte Folded Spill - st.d $s5, $sp, 64 # 8-byte Folded Spill - st.d $s6, $sp, 56 # 8-byte Folded Spill - st.d $s7, $sp, 48 # 8-byte Folded Spill - st.d $s8, $sp, 40 # 8-byte Folded Spill - fst.d $fs0, $sp, 32 # 8-byte Folded Spill + addi.d $sp, $sp, -144 + .cfi_def_cfa_offset 144 + st.d $ra, $sp, 136 # 8-byte Folded Spill + st.d $fp, $sp, 128 # 8-byte Folded Spill + st.d $s0, $sp, 120 # 8-byte Folded Spill + st.d $s1, $sp, 112 # 8-byte Folded Spill + st.d $s2, $sp, 104 # 8-byte Folded Spill + st.d $s3, $sp, 96 # 8-byte Folded Spill + st.d $s4, $sp, 88 # 8-byte Folded Spill + st.d $s5, $sp, 80 # 8-byte Folded Spill + st.d $s6, $sp, 72 # 8-byte Folded Spill + st.d $s7, $sp, 64 # 8-byte Folded Spill + st.d $s8, $sp, 56 # 8-byte Folded Spill + fst.d $fs0, $sp, 48 # 8-byte Folded Spill + fst.d $fs1, $sp, 40 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -15196,6 +15179,7 @@ _ZN10btSoftBody18initializeClustersEv: # @_ZN10btSoftBody18initializeClustersEv .cfi_offset 30, -80 .cfi_offset 31, -88 .cfi_offset 56, -96 + .cfi_offset 57, -104 move $fp, $a0 ld.w $a0, $a0, 1340 blez $a0, .LBB89_49 @@ -15204,6 +15188,9 @@ _ZN10btSoftBody18initializeClustersEv: # @_ZN10btSoftBody18initializeClustersEv ori $s2, $zero, 8 ori $s4, $zero, 1 movgr2fr.w $fs0, $zero + lu12i.w $a0, 382432 + ori $a0, $a0, 2923 + movgr2fr.w $fs1, $a0 vrepli.b $vr17, 0 lu12i.w $s5, 260096 vst $vr17, $sp, 16 # 16-byte Folded Spill @@ -15344,9 +15331,8 @@ _ZN10btSoftBody18initializeClustersEv: # @_ZN10btSoftBody18initializeClustersEv b .LBB89_26 .p2align 4, , 16 .LBB89_24: # in Loop: Header=BB89_26 Depth=2 - pcalau12i $a3, %pc_hi20(.LCPI89_0) - fld.s $fa0, $a3, %pc_lo12(.LCPI89_0) st.b $s4, $s6, 416 + fmov.s $fa0, $fs1 .LBB89_25: # in Loop: Header=BB89_26 Depth=2 fst.s $fa0, $a1, 0 fld.s $fa1, $s6, 164 @@ -15624,34 +15610,26 @@ _ZN10btSoftBody18initializeClustersEv: # @_ZN10btSoftBody18initializeClustersEv blt $a2, $a3, .LBB89_48 b .LBB89_2 .LBB89_49: # %._crit_edge181 - fld.d $fs0, $sp, 32 # 8-byte Folded Reload - ld.d $s8, $sp, 40 # 8-byte Folded Reload - ld.d $s7, $sp, 48 # 8-byte Folded Reload - ld.d $s6, $sp, 56 # 8-byte Folded Reload - ld.d $s5, $sp, 64 # 8-byte Folded Reload - ld.d $s4, $sp, 72 # 8-byte Folded Reload - ld.d $s3, $sp, 80 # 8-byte Folded Reload - ld.d $s2, $sp, 88 # 8-byte Folded Reload - ld.d $s1, $sp, 96 # 8-byte Folded Reload - ld.d $s0, $sp, 104 # 8-byte Folded Reload - ld.d $fp, $sp, 112 # 8-byte Folded Reload - ld.d $ra, $sp, 120 # 8-byte Folded Reload - addi.d $sp, $sp, 128 + fld.d $fs1, $sp, 40 # 8-byte Folded Reload + fld.d $fs0, $sp, 48 # 8-byte Folded Reload + ld.d $s8, $sp, 56 # 8-byte Folded Reload + ld.d $s7, $sp, 64 # 8-byte Folded Reload + ld.d $s6, $sp, 72 # 8-byte Folded Reload + ld.d $s5, $sp, 80 # 8-byte Folded Reload + ld.d $s4, $sp, 88 # 8-byte Folded Reload + ld.d $s3, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload + ld.d $s1, $sp, 112 # 8-byte Folded Reload + ld.d $s0, $sp, 120 # 8-byte Folded Reload + ld.d $fp, $sp, 128 # 8-byte Folded Reload + ld.d $ra, $sp, 136 # 8-byte Folded Reload + addi.d $sp, $sp, 144 ret .Lfunc_end89: .size _ZN10btSoftBody18initializeClustersEv, .Lfunc_end89-_ZN10btSoftBody18initializeClustersEv .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN10btSoftBody14updateClustersEv -.LCPI90_0: - .word 0x38d1b717 # float 9.99999974E-5 -.LCPI90_1: - .word 0x3951b717 # float 1.99999995E-4 -.LCPI90_2: - .word 0x399d4951 # float 2.99999985E-4 - .text - .globl _ZN10btSoftBody14updateClustersEv + .globl _ZN10btSoftBody14updateClustersEv # -- Begin function _ZN10btSoftBody14updateClustersEv .p2align 5 .type _ZN10btSoftBody14updateClustersEv,@function _ZN10btSoftBody14updateClustersEv: # @_ZN10btSoftBody14updateClustersEv @@ -15707,12 +15685,15 @@ _ZN10btSoftBody14updateClustersEv: # @_ZN10btSoftBody14updateClustersEv vrepli.b $vr0, 0 vst $vr0, $sp, 16 # 16-byte Folded Spill movgr2fr.w $fs0, $zero - pcalau12i $a1, %pc_hi20(.LCPI90_0) - fld.s $fs1, $a1, %pc_lo12(.LCPI90_0) - pcalau12i $a1, %pc_hi20(.LCPI90_1) - fld.s $fs2, $a1, %pc_lo12(.LCPI90_1) - pcalau12i $a1, %pc_hi20(.LCPI90_2) - fld.s $fs3, $a1, %pc_lo12(.LCPI90_2) + lu12i.w $a1, 232731 + ori $a1, $a1, 1815 + movgr2fr.w $fs1, $a1 + lu12i.w $a1, 234779 + ori $a1, $a1, 1815 + movgr2fr.w $fs2, $a1 + lu12i.w $a1, 235988 + ori $a1, $a1, 2385 + movgr2fr.w $fs3, $a1 ori $s5, $zero, 2 b .LBB90_5 .LBB90_2: # in Loop: Header=BB90_5 Depth=1 @@ -19429,12 +19410,7 @@ _ZNK10btSoftBody7rayTestERK9btVector3S2_RfRNS_8eFeature1_ERib: # @_ZNK10btSoftBo .size _ZNK10btSoftBody7rayTestERK9btVector3S2_RfRNS_8eFeature1_ERib, .Lfunc_end96-_ZNK10btSoftBody7rayTestERK9btVector3S2_RfRNS_8eFeature1_ERib .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN10btSoftBody13predictMotionEf -.LCPI97_0: - .word 0x3eaaaaab # float 0.333333343 - .text - .globl _ZN10btSoftBody13predictMotionEf + .globl _ZN10btSoftBody13predictMotionEf # -- Begin function _ZN10btSoftBody13predictMotionEf .p2align 5 .type _ZN10btSoftBody13predictMotionEf,@function _ZN10btSoftBody13predictMotionEf: # @_ZN10btSoftBody13predictMotionEf @@ -19707,10 +19683,11 @@ _ZN10btSoftBody13predictMotionEf: # @_ZN10btSoftBody13predictMotionEf ld.w $a0, $fp, 884 blez $a0, .LBB97_22 # %bb.20: # %.lr.ph208 - pcalau12i $a0, %pc_hi20(.LCPI97_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI97_0) move $s2, $zero move $s3, $zero + lu12i.w $a0, 256682 + ori $a0, $a0, 2731 + movgr2fr.w $fs1, $a0 .p2align 4, , 16 .LBB97_21: # =>This Inner Loop Header: Depth=1 ld.d $a0, $fp, 896 @@ -19998,14 +19975,7 @@ _ZN10btSoftBody13predictMotionEf: # @_ZN10btSoftBody13predictMotionEf .size _ZN10btSoftBody13predictMotionEf, .Lfunc_end97-_ZN10btSoftBody13predictMotionEf .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN10btSoftBody11applyForcesEv -.LCPI98_0: - .word 0x34000000 # float 1.1920929E-7 -.LCPI98_1: - .word 0x3eaaaaab # float 0.333333343 - .text - .globl _ZN10btSoftBody11applyForcesEv + .globl _ZN10btSoftBody11applyForcesEv # -- Begin function _ZN10btSoftBody11applyForcesEv .p2align 5 .type _ZN10btSoftBody11applyForcesEv,@function _ZN10btSoftBody11applyForcesEv: # @_ZN10btSoftBody11applyForcesEv @@ -20133,8 +20103,8 @@ _ZN10btSoftBody11applyForcesEv: # @_ZN10btSoftBody11applyForcesEv ld.w $a4, $fp, 312 addi.d $a5, $a5, 100 movgr2fr.w $fa7, $zero - pcalau12i $a6, %pc_hi20(.LCPI98_0) - fld.s $ft0, $a6, %pc_lo12(.LCPI98_0) + lu12i.w $a6, 212992 + movgr2fr.w $ft0, $a6 vldi $vr9, -1056 ori $a6, $zero, 1 vldi $vr10, -1040 @@ -20418,11 +20388,12 @@ _ZN10btSoftBody11applyForcesEv: # @_ZN10btSoftBody11applyForcesEv ld.d $a1, $fp, 776 ld.w $a2, $fp, 312 addi.d $a3, $a3, 32 - pcalau12i $a4, %pc_hi20(.LCPI98_1) - fld.s $fa3, $a4, %pc_lo12(.LCPI98_1) + lu12i.w $a4, 256682 + ori $a4, $a4, 2731 + movgr2fr.w $fa3, $a4 movgr2fr.w $fa4, $zero - pcalau12i $a4, %pc_hi20(.LCPI98_0) - fld.s $fa5, $a4, %pc_lo12(.LCPI98_0) + lu12i.w $a4, 212992 + movgr2fr.w $fa5, $a4 ori $a4, $zero, 3 vldi $vr6, -1040 vldi $vr7, -1168 @@ -20701,16 +20672,7 @@ _ZN10btSoftBody11applyForcesEv: # @_ZN10btSoftBody11applyForcesEv .size _ZN10btSoftBody11applyForcesEv, .Lfunc_end98-_ZN10btSoftBody11applyForcesEv .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN10btSoftBody10updatePoseEv -.LCPI99_0: - .word 0x34000000 # float 1.1920929E-7 -.LCPI99_1: - .word 0x34800000 # float 2.38418579E-7 -.LCPI99_2: - .word 0x34c00000 # float 3.57627869E-7 - .text - .globl _ZN10btSoftBody10updatePoseEv + .globl _ZN10btSoftBody10updatePoseEv # -- Begin function _ZN10btSoftBody10updatePoseEv .p2align 5 .type _ZN10btSoftBody10updatePoseEv,@function _ZN10btSoftBody10updatePoseEv: # @_ZN10btSoftBody10updatePoseEv @@ -20783,17 +20745,17 @@ _ZN10btSoftBody10updatePoseEv: # @_ZN10btSoftBody10updatePoseEv fld.s $fs1, $sp, 116 fld.s $ft3, $sp, 120 fld.s $fs2, $sp, 136 - pcalau12i $a2, %pc_hi20(.LCPI99_2) - fld.s $fs3, $a2, %pc_lo12(.LCPI99_2) - pcalau12i $a2, %pc_hi20(.LCPI99_1) - fld.s $fs4, $a2, %pc_lo12(.LCPI99_1) - pcalau12i $a2, %pc_hi20(.LCPI99_0) - fld.s $fs5, $a2, %pc_lo12(.LCPI99_0) addi.d $a2, $a4, 24 - movgr2fr.w $fs7, $zero addi.d $a3, $a3, 8 - fmov.s $fs6, $fs7 - fmov.s $fs0, $fs7 + movgr2fr.w $fs6, $zero + lu12i.w $a4, 216064 + movgr2fr.w $fs3, $a4 + lu12i.w $a4, 215040 + movgr2fr.w $fs4, $a4 + lu12i.w $a4, 212992 + movgr2fr.w $fs5, $a4 + fmov.s $fs7, $fs6 + fmov.s $fs0, $fs6 .p2align 4, , 16 .LBB99_8: # =>This Inner Loop Header: Depth=1 fld.s $fa3, $a2, -8 @@ -20818,13 +20780,13 @@ _ZN10btSoftBody10updatePoseEv: # @_ZN10btSoftBody10updatePoseEv fmul.s $fa3, $fa4, $fa7 fmul.s $fa6, $fa4, $ft0 fmul.s $fa4, $fa4, $ft1 - fadd.s $fs7, $fa3, $fs7 + fadd.s $fs6, $fa3, $fs6 fadd.s $fs4, $fa6, $fs4 fadd.s $fs2, $fa4, $fs2 fmul.s $fa3, $fa5, $fa7 fmul.s $fa4, $fa5, $ft0 fmul.s $fa5, $fa5, $ft1 - fadd.s $fs6, $fa3, $fs6 + fadd.s $fs7, $fa3, $fs7 fadd.s $fs0, $fa4, $fs0 fadd.s $fs3, $fa5, $fs3 addi.d $a0, $a0, -1 @@ -20842,21 +20804,21 @@ _ZN10btSoftBody10updatePoseEv: # @_ZN10btSoftBody10updatePoseEv fld.s $fs1, $sp, 116 fld.s $fa0, $sp, 120 fst.s $fa0, $sp, 12 # 4-byte Folded Spill - fld.s $fs2, $sp, 136 - pcalau12i $a0, %pc_hi20(.LCPI99_0) - fld.s $fs5, $a0, %pc_lo12(.LCPI99_0) - pcalau12i $a0, %pc_hi20(.LCPI99_1) - fld.s $fs4, $a0, %pc_lo12(.LCPI99_1) - pcalau12i $a0, %pc_hi20(.LCPI99_2) - fld.s $fs3, $a0, %pc_lo12(.LCPI99_2) movgr2fr.w $fs0, $zero - fmov.s $fs6, $fs0 + fld.s $fs2, $sp, 136 + lu12i.w $a0, 212992 + movgr2fr.w $fs5, $a0 + lu12i.w $a0, 215040 + movgr2fr.w $fs4, $a0 + lu12i.w $a0, 216064 + movgr2fr.w $fs3, $a0 fmov.s $fs7, $fs0 + fmov.s $fs6, $fs0 .LBB99_11: fst.s $fs5, $sp, 112 - fst.s $fs7, $sp, 128 + fst.s $fs6, $sp, 128 fst.s $fs4, $sp, 132 - fst.s $fs6, $sp, 144 + fst.s $fs7, $sp, 144 fst.s $fs0, $sp, 148 fst.s $fs3, $sp, 152 addi.d $a0, $sp, 112 @@ -20915,9 +20877,9 @@ _ZN10btSoftBody10updatePoseEv: # @_ZN10btSoftBody10updatePoseEv fmul.s $fa0, $fa5, $ft6 fmadd.s $fa0, $fa2, $ft9, $fa0 fmadd.s $ft3, $ft3, $ft10, $fa0 - fmul.s $fa0, $ft5, $fs7 + fmul.s $fa0, $ft5, $fs6 fmadd.s $fa0, $fs5, $ft4, $fa0 - fmadd.s $fa0, $fs6, $ft0, $fa0 + fmadd.s $fa0, $fs7, $ft0, $fa0 fmul.s $fa1, $ft5, $fs4 fmadd.s $fa1, $fs1, $ft4, $fa1 fmadd.s $fa3, $fs0, $ft0, $fa1 @@ -20925,18 +20887,18 @@ _ZN10btSoftBody10updatePoseEv: # @_ZN10btSoftBody10updatePoseEv fld.s $ft5, $sp, 12 # 4-byte Folded Reload fmadd.s $fa1, $ft5, $ft4, $fa1 fmadd.s $fa6, $fs3, $ft0, $fa1 - fmul.s $fa1, $ft8, $fs7 + fmul.s $fa1, $ft8, $fs6 fmadd.s $fa1, $fs5, $ft2, $fa1 - fmadd.s $fa1, $fs6, $ft1, $fa1 + fmadd.s $fa1, $fs7, $ft1, $fa1 fmul.s $fa2, $ft8, $fs4 fmadd.s $fa2, $fs1, $ft2, $fa2 fmadd.s $fa4, $fs0, $ft1, $fa2 fmul.s $fa2, $ft8, $fs2 fmadd.s $fa2, $ft5, $ft2, $fa2 fmadd.s $fa7, $fs3, $ft1, $fa2 - fmul.s $fa2, $fs7, $ft11 + fmul.s $fa2, $fs6, $ft11 fmadd.s $fa2, $fs5, $ft7, $fa2 - fmadd.s $fa2, $fs6, $ft3, $fa2 + fmadd.s $fa2, $fs7, $ft3, $fa2 fmul.s $fa5, $ft11, $fs4 fmadd.s $fa5, $fs1, $ft7, $fa5 fmadd.s $fa5, $fs0, $ft3, $fa5 @@ -22773,16 +22735,8 @@ _ZN10btSoftBody15RayFromToCaster7ProcessEPK10btDbvtNode: # @_ZN10btSoftBody15Ray .size _ZN10btSoftBody15RayFromToCaster7ProcessEPK10btDbvtNode, .Lfunc_end114-_ZN10btSoftBody15RayFromToCaster7ProcessEPK10btDbvtNode .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN10btSoftBody15RayFromToCaster17rayFromToTriangleERK9btVector3S3_S3_S3_S3_S3_f -.LCPI115_0: - .word 0x34000000 # float 1.1920929E-7 -.LCPI115_1: - .word 0x35a00000 # float 1.1920929E-6 -.LCPI115_2: - .word 0xb5a00000 # float -1.1920929E-6 .section .text._ZN10btSoftBody15RayFromToCaster17rayFromToTriangleERK9btVector3S3_S3_S3_S3_S3_f,"axG",@progbits,_ZN10btSoftBody15RayFromToCaster17rayFromToTriangleERK9btVector3S3_S3_S3_S3_S3_f,comdat - .weak _ZN10btSoftBody15RayFromToCaster17rayFromToTriangleERK9btVector3S3_S3_S3_S3_S3_f + .weak _ZN10btSoftBody15RayFromToCaster17rayFromToTriangleERK9btVector3S3_S3_S3_S3_S3_f # -- Begin function _ZN10btSoftBody15RayFromToCaster17rayFromToTriangleERK9btVector3S3_S3_S3_S3_S3_f .p2align 5 .type _ZN10btSoftBody15RayFromToCaster17rayFromToTriangleERK9btVector3S3_S3_S3_S3_S3_f,@function _ZN10btSoftBody15RayFromToCaster17rayFromToTriangleERK9btVector3S3_S3_S3_S3_S3_f: # @_ZN10btSoftBody15RayFromToCaster17rayFromToTriangleERK9btVector3S3_S3_S3_S3_S3_f @@ -22801,26 +22755,26 @@ _ZN10btSoftBody15RayFromToCaster17rayFromToTriangleERK9btVector3S3_S3_S3_S3_S3_f fld.s $fa7, $a5, 8 fsub.s $fa3, $ft4, $ft5 fsub.s $ft6, $fa5, $ft2 - fsub.s $ft7, $fa6, $ft3 - fsub.s $ft8, $fa7, $ft5 - fneg.s $fa2, $ft7 + fsub.s $ft9, $fa6, $ft3 + fsub.s $ft7, $fa7, $ft5 + fneg.s $fa2, $ft9 fmul.s $fa2, $fa3, $fa2 - fmadd.s $fa2, $fa4, $ft8, $fa2 - fneg.s $ft8, $ft8 - fmul.s $ft8, $fa1, $ft8 - fmadd.s $fa3, $fa3, $ft6, $ft8 - fneg.s $ft8, $ft6 + fmadd.s $fa2, $fa4, $ft7, $fa2 + fneg.s $ft7, $ft7 + fmul.s $ft7, $fa1, $ft7 + fmadd.s $fa3, $fa3, $ft6, $ft7 + fneg.s $ft6, $ft6 + fmul.s $fa4, $fa4, $ft6 fld.s $ft6, $a2, 4 - fmul.s $fa4, $fa4, $ft8 - fmadd.s $fa4, $fa1, $ft7, $fa4 fld.s $ft7, $a2, 0 - fmul.s $fa1, $ft6, $fa3 fld.s $ft8, $a2, 8 - pcalau12i $a1, %pc_hi20(.LCPI115_0) - fld.s $ft9, $a1, %pc_lo12(.LCPI115_0) + fmadd.s $fa4, $fa1, $ft9, $fa4 + fmul.s $fa1, $ft6, $fa3 fmadd.s $fa1, $ft7, $fa2, $fa1 fmadd.s $ft10, $ft8, $fa4, $fa1 fabs.s $fa1, $ft10 + lu12i.w $a1, 212992 + movgr2fr.w $ft9, $a1 fcmp.clt.s $fcc0, $fa1, $ft9 bceqz $fcc0, .LBB115_3 .LBB115_1: # %.critedge.thread @@ -22839,12 +22793,12 @@ _ZN10btSoftBody15RayFromToCaster17rayFromToTriangleERK9btVector3S3_S3_S3_S3_S3_f fmul.s $ft13, $fa3, $ft9 fmadd.s $ft13, $ft11, $fa2, $ft13 fmadd.s $ft13, $ft12, $fa4, $ft13 - pcalau12i $a0, %pc_hi20(.LCPI115_1) - fld.s $ft14, $a0, %pc_lo12(.LCPI115_1) fsub.s $fa0, $ft13, $fa0 fneg.s $fa0, $fa0 fdiv.s $fa0, $fa0, $ft10 - fcmp.cule.s $fcc0, $fa0, $ft14 + lu12i.w $a0, 219648 + movgr2fr.w $ft10, $a0 + fcmp.cule.s $fcc0, $fa0, $ft10 bcnez $fcc0, .LBB115_1 # %bb.4: fcmp.cule.s $fcc0, $fa1, $fa0 @@ -22869,13 +22823,14 @@ _ZN10btSoftBody15RayFromToCaster17rayFromToTriangleERK9btVector3S3_S3_S3_S3_S3_f fmul.s $ft9, $ft2, $ft9 fmadd.s $ft9, $ft3, $ft5, $ft9 fneg.s $ft10, $ft5 - fmul.s $ft9, $fa3, $ft9 - fmadd.s $ft9, $fa2, $ft0, $ft9 - pcalau12i $a0, %pc_hi20(.LCPI115_2) - fld.s $ft0, $a0, %pc_lo12(.LCPI115_2) fmul.s $ft10, $fa1, $ft10 fmadd.s $ft10, $ft2, $ft1, $ft10 - fmadd.s $ft9, $fa4, $ft10, $ft9 + fmul.s $ft9, $fa3, $ft9 + fmadd.s $ft0, $fa2, $ft0, $ft9 + fmadd.s $ft9, $fa4, $ft10, $ft0 + lu12i.w $a0, -304640 + lu32i.d $a0, 0 + movgr2fr.w $ft0, $a0 fcmp.cule.s $fcc0, $ft9, $ft0 bcnez $fcc0, .LBB115_1 # %bb.6: @@ -22916,12 +22871,8 @@ _ZN10btSoftBody15RayFromToCaster17rayFromToTriangleERK9btVector3S3_S3_S3_S3_S3_f .size _ZN10btSoftBody15RayFromToCaster17rayFromToTriangleERK9btVector3S3_S3_S3_S3_S3_f, .Lfunc_end115-_ZN10btSoftBody15RayFromToCaster17rayFromToTriangleERK9btVector3S3_S3_S3_S3_S3_f .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN6btDbvt7rayTestEPK10btDbvtNodeRK9btVector3S5_RNS_8ICollideE -.LCPI116_0: - .word 0x5d5e0b6b # float 9.99999984E+17 .section .text._ZN6btDbvt7rayTestEPK10btDbvtNodeRK9btVector3S5_RNS_8ICollideE,"axG",@progbits,_ZN6btDbvt7rayTestEPK10btDbvtNodeRK9btVector3S5_RNS_8ICollideE,comdat - .weak _ZN6btDbvt7rayTestEPK10btDbvtNodeRK9btVector3S5_RNS_8ICollideE + .weak _ZN6btDbvt7rayTestEPK10btDbvtNodeRK9btVector3S5_RNS_8ICollideE # -- Begin function _ZN6btDbvt7rayTestEPK10btDbvtNodeRK9btVector3S5_RNS_8ICollideE .p2align 5 .type _ZN6btDbvt7rayTestEPK10btDbvtNodeRK9btVector3S5_RNS_8ICollideE,@function _ZN6btDbvt7rayTestEPK10btDbvtNodeRK9btVector3S5_RNS_8ICollideE: # @_ZN6btDbvt7rayTestEPK10btDbvtNodeRK9btVector3S5_RNS_8ICollideE @@ -22998,18 +22949,19 @@ _ZN6btDbvt7rayTestEPK10btDbvtNodeRK9btVector3S5_RNS_8ICollideE: # @_ZN6btDbvt7ra fmul.s $fa4, $fa0, $fa3 fmul.s $fa5, $fa1, $fa3 fmul.s $fa3, $fa2, $fa3 - pcalau12i $a0, %pc_hi20(.LCPI116_0) - fld.s $fa6, $a0, %pc_lo12(.LCPI116_0) - frecip.s $fa7, $fa4 + frecip.s $fa6, $fa4 movgr2fr.w $fs0, $zero fcmp.ceq.s $fcc0, $fa4, $fs0 - fsel $fs1, $fa7, $fa6, $fcc0 - frecip.s $fa7, $fa5 + lu12i.w $a0, 382432 + ori $a0, $a0, 2923 + movgr2fr.w $fa7, $a0 + fsel $fs1, $fa6, $fa7, $fcc0 + frecip.s $fa6, $fa5 fcmp.ceq.s $fcc0, $fa5, $fs0 - fsel $fs2, $fa7, $fa6, $fcc0 - frecip.s $fa7, $fa3 + fsel $fs2, $fa6, $fa7, $fcc0 + frecip.s $fa6, $fa3 fcmp.ceq.s $fcc0, $fa3, $fs0 - fsel $fs3, $fa7, $fa6, $fcc0 + fsel $fs3, $fa6, $fa7, $fcc0 fmul.s $fa1, $fa1, $fa5 fmadd.s $fa0, $fa4, $fa0, $fa1 fmadd.s $fs4, $fa3, $fa2, $fa0 @@ -23820,14 +23772,8 @@ _ZN11btSparseSdfILi3EE8EvaluateERK9btVector3P16btCollisionShapeRS1_f: # @_ZN11bt .size _ZN11btSparseSdfILi3EE8EvaluateERK9btVector3P16btCollisionShapeRS1_f, .Lfunc_end118-_ZN11btSparseSdfILi3EE8EvaluateERK9btVector3P16btCollisionShapeRS1_f .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZL14PolarDecomposeRK11btMatrix3x3RS_S2_ -.LCPI119_0: - .word 0x34000000 # float 1.1920929E-7 -.LCPI119_1: - .word 0x38d1b717 # float 9.99999974E-5 .text - .p2align 5 + .p2align 5 # -- Begin function _ZL14PolarDecomposeRK11btMatrix3x3RS_S2_ .type _ZL14PolarDecomposeRK11btMatrix3x3RS_S2_,@function _ZL14PolarDecomposeRK11btMatrix3x3RS_S2_: # @_ZL14PolarDecomposeRK11btMatrix3x3RS_S2_ # %bb.0: @@ -23863,12 +23809,12 @@ _ZL14PolarDecomposeRK11btMatrix3x3RS_S2_: # @_ZL14PolarDecomposeRK11btMatrix3x3R fmadd.s $ft1, $fa1, $ft1, $ft2 fneg.s $ft2, $fa6 fmul.s $ft2, $fa4, $ft2 - pcalau12i $a3, %pc_hi20(.LCPI119_0) - fld.s $ft3, $a3, %pc_lo12(.LCPI119_0) fmadd.s $ft2, $fa3, $fa7, $ft2 fmadd.s $ft4, $fa2, $ft2, $ft1 fabs.s $ft1, $ft4 - fcmp.clt.s $fcc0, $ft1, $ft3 + lu12i.w $a3, 212992 + movgr2fr.w $ft2, $a3 + fcmp.clt.s $fcc0, $ft1, $ft2 bceqz $fcc0, .LBB119_2 # %bb.1: addi.d $a0, $a1, 24 @@ -23887,10 +23833,11 @@ _ZL14PolarDecomposeRK11btMatrix3x3RS_S2_: # @_ZL14PolarDecomposeRK11btMatrix3x3R st.w $zero, $a2, 44 ret .LBB119_2: # %.preheader.preheader - pcalau12i $a3, %pc_hi20(.LCPI119_1) - fld.s $ft1, $a3, %pc_lo12(.LCPI119_1) ori $a3, $zero, 15 - vldi $vr10, -1184 + vldi $vr9, -1184 + lu12i.w $a4, 232731 + ori $a4, $a4, 1815 + movgr2fr.w $ft2, $a4 .p2align 4, , 16 .LBB119_3: # %.preheader # =>This Inner Loop Header: Depth=1 @@ -23938,15 +23885,15 @@ _ZL14PolarDecomposeRK11btMatrix3x3RS_S2_: # @_ZL14PolarDecomposeRK11btMatrix3x3R fadd.s $fa6, $fa6, $ft8 fadd.s $fa7, $fa7, $ft11 fadd.s $ft0, $ft0, $ft12 - fmul.s $fa1, $fa1, $ft2 - fmul.s $fa0, $fa0, $ft2 - fmul.s $fa2, $fa2, $ft2 - fmul.s $fa3, $fa3, $ft2 - fmul.s $fa4, $fa4, $ft2 - fmul.s $fa5, $fa5, $ft2 - fmul.s $fa6, $fa6, $ft2 - fmul.s $fa7, $fa7, $ft2 - fmul.s $ft0, $ft0, $ft2 + fmul.s $fa1, $fa1, $ft1 + fmul.s $fa0, $fa0, $ft1 + fmul.s $fa2, $fa2, $ft1 + fmul.s $fa3, $fa3, $ft1 + fmul.s $fa4, $fa4, $ft1 + fmul.s $fa5, $fa5, $ft1 + fmul.s $fa6, $fa6, $ft1 + fmul.s $fa7, $fa7, $ft1 + fmul.s $ft0, $ft0, $ft1 fneg.s $ft4, $fa7 fmul.s $ft4, $fa5, $ft4 fmadd.s $ft4, $fa4, $ft0, $ft4 @@ -23961,7 +23908,7 @@ _ZL14PolarDecomposeRK11btMatrix3x3RS_S2_: # @_ZL14PolarDecomposeRK11btMatrix3x3R fmadd.s $ft4, $fa2, $ft5, $ft4 fsub.s $ft3, $ft4, $ft3 fmul.s $ft3, $ft3, $ft3 - fcmp.cule.s $fcc0, $ft3, $ft1 + fcmp.cule.s $fcc0, $ft3, $ft2 bcnez $fcc0, .LBB119_5 # %bb.4: # %.preheader # in Loop: Header=BB119_3 Depth=1 @@ -25386,14 +25333,8 @@ _ZNK10btSoftBody4Body13applyDImpulseERK9btVector3S3_: # @_ZNK10btSoftBody4Body13 .size _ZNK10btSoftBody4Body13applyDImpulseERK9btVector3S3_, .Lfunc_end125-_ZNK10btSoftBody4Body13applyDImpulseERK9btVector3S3_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN10btSoftBody6AJoint7PrepareEfi -.LCPI126_0: - .word 0x34000000 # float 1.1920929E-7 -.LCPI126_1: - .word 0x3e490fdb # float 0.196349546 .text - .globl _ZN10btSoftBody6AJoint7PrepareEfi + .globl _ZN10btSoftBody6AJoint7PrepareEfi # -- Begin function _ZN10btSoftBody6AJoint7PrepareEfi .p2align 5 .type _ZN10btSoftBody6AJoint7PrepareEfi,@function _ZN10btSoftBody6AJoint7PrepareEfi: # @_ZN10btSoftBody6AJoint7PrepareEfi @@ -25541,11 +25482,11 @@ _ZN10btSoftBody6AJoint7PrepareEfi: # @_ZN10btSoftBody6AJoint7PrepareEfi fmul.s $ft0, $fa1, $ft0 fmadd.s $ft0, $fa0, $fa3, $ft0 fmul.s $ft1, $fa7, $fa7 - pcalau12i $a0, %pc_hi20(.LCPI126_0) - fld.s $ft2, $a0, %pc_lo12(.LCPI126_0) fmadd.s $ft1, $fa6, $fa6, $ft1 fmadd.s $ft1, $ft0, $ft0, $ft1 fsqrt.s $ft1, $ft1 + lu12i.w $a0, 212992 + movgr2fr.w $ft2, $a0 fcmp.cule.s $fcc0, $ft1, $ft2 st.d $a1, $fp, 208 bcnez $fcc0, .LBB126_8 @@ -25576,8 +25517,9 @@ _ZN10btSoftBody6AJoint7PrepareEfi: # @_ZN10btSoftBody6AJoint7PrepareEfi fsel $fa0, $fa1, $fa2, $fcc0 pcaddu18i $ra, %call36(acosf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI126_1) - fld.s $fa1, $a0, %pc_lo12(.LCPI126_1) + lu12i.w $a0, 255120 + ori $a0, $a0, 4059 + movgr2fr.w $fa1, $a0 fcmp.clt.s $fcc0, $fa1, $fa0 fsel $fa0, $fa0, $fa1, $fcc0 movgr2fr.w $fa1, $s2 @@ -27055,12 +26997,8 @@ _ZN11btRigidBody12applyImpulseERK9btVector3S2_: # @_ZN11btRigidBody12applyImpuls .size _ZN11btRigidBody12applyImpulseERK9btVector3S2_, .Lfunc_end134-_ZN11btRigidBody12applyImpulseERK9btVector3S2_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN10btSoftBody16PSolve_RContactsEPS_ff -.LCPI135_0: - .word 0x34000000 # float 1.1920929E-7 .text - .globl _ZN10btSoftBody16PSolve_RContactsEPS_ff + .globl _ZN10btSoftBody16PSolve_RContactsEPS_ff # -- Begin function _ZN10btSoftBody16PSolve_RContactsEPS_ff .p2align 5 .type _ZN10btSoftBody16PSolve_RContactsEPS_ff,@function _ZN10btSoftBody16PSolve_RContactsEPS_ff: # @_ZN10btSoftBody16PSolve_RContactsEPS_ff @@ -27098,11 +27036,11 @@ _ZN10btSoftBody16PSolve_RContactsEPS_ff: # @_ZN10btSoftBody16PSolve_RContactsEPS # %bb.1: # %.lr.ph fmov.s $fs1, $fa0 move $s0, $zero - pcalau12i $a1, %pc_hi20(.LCPI135_0) - fld.s $fs3, $a1, %pc_lo12(.LCPI135_0) ori $a1, $zero, 120 mul.d $s1, $a0, $a1 - movgr2fr.w $fs4, $zero + movgr2fr.w $fs3, $zero + lu12i.w $a0, 212992 + movgr2fr.w $fs4, $a0 b .LBB135_3 .p2align 4, , 16 .LBB135_2: # in Loop: Header=BB135_3 Depth=1 @@ -27117,9 +27055,9 @@ _ZN10btSoftBody16PSolve_RContactsEPS_ff: # @_ZN10btSoftBody16PSolve_RContactsEPS sltui $a3, $a0, 1 or $a2, $a3, $a2 add.d $a1, $a1, $s0 - fmov.s $fa5, $fs4 - fmov.s $fa0, $fs4 - fmov.s $fa2, $fs4 + fmov.s $fa5, $fs3 + fmov.s $fa0, $fs3 + fmov.s $fa2, $fs3 bnez $a2, .LBB135_5 # %bb.4: # in Loop: Header=BB135_3 Depth=1 fld.s $fa0, $a1, 92 @@ -27166,7 +27104,7 @@ _ZN10btSoftBody16PSolve_RContactsEPS_ff: # @_ZN10btSoftBody16PSolve_RContactsEPS fmul.s $ft1, $fa2, $fa0 fmadd.s $ft1, $fa1, $fa3, $ft1 fmadd.s $ft1, $fa5, $fa4, $ft1 - fcmp.cult.s $fcc0, $fs3, $ft1 + fcmp.cult.s $fcc0, $fs4, $ft1 bcnez $fcc0, .LBB135_2 # %bb.6: # in Loop: Header=BB135_3 Depth=1 fld.s $ft2, $a1, 24 @@ -32351,14 +32289,8 @@ _ZN15btSoftColliders12CollideVF_SSD0Ev: # @_ZN15btSoftColliders12CollideVF_SSD0E .size _ZN15btSoftColliders12CollideVF_SSD0Ev, .Lfunc_end198-_ZN15btSoftColliders12CollideVF_SSD0Ev .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_ -.LCPI199_0: - .word 0x7f7fffff # float 3.40282347E+38 -.LCPI199_1: - .word 0x34000000 # float 1.1920929E-7 .section .text._ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_,"axG",@progbits,_ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_,comdat - .weak _ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_ + .weak _ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_ # -- Begin function _ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_ .p2align 5 .type _ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_,@function _ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_: # @_ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_ @@ -32415,17 +32347,16 @@ _ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_: # @_ZN15btSoftColl fst.s $fa0, $sp, 40 # 4-byte Folded Spill fsub.s $ft10, $fa0, $fa4 ld.d $a3, $s0, 32 - fst.s $fa1, $sp, 36 # 4-byte Folded Spill + fst.s $fa1, $sp, 32 # 4-byte Folded Spill fsub.s $ft12, $fa1, $fa2 - fst.s $fa5, $sp, 32 # 4-byte Folded Spill + fst.s $fa5, $sp, 28 # 4-byte Folded Spill fsub.s $ft13, $fa5, $fa3 - fld.s $fa6, $a2, 24 + fld.s $fa7, $a2, 24 fld.s $fa5, $a3, 16 fld.s $fa0, $a3, 20 fld.s $fa1, $a3, 24 - fst.s $fa6, $sp, 24 # 4-byte Folded Spill - fsub.s $ft14, $fa6, $fa4 - fst.s $fa5, $sp, 28 # 4-byte Folded Spill + fsub.s $ft14, $fa7, $fa4 + fst.s $fa5, $sp, 36 # 4-byte Folded Spill fsub.s $ft5, $fa5, $fa2 fst.s $fa0, $sp, 48 # 4-byte Folded Spill fsub.s $ft6, $fa0, $fa3 @@ -32446,24 +32377,26 @@ _ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_: # @_ZN15btSoftColl fneg.s $fa0, $fa0 fmul.s $fa0, $fs4, $fa0 fmadd.s $fs0, $fs3, $fa1, $fa0 - pcalau12i $a4, %pc_hi20(.LCPI199_1) - fld.s $ft11, $a4, %pc_lo12(.LCPI199_1) fmul.s $fa0, $fs7, $fs7 fmadd.s $fa0, $fs6, $fs6, $fa0 fmadd.s $fa0, $fs0, $fs0, $fa0 + lu12i.w $a4, 522239 + lu12i.w $a5, 212992 + movgr2fr.w $ft11, $a5 fcmp.cule.s $fcc0, $fa0, $ft11 - pcalau12i $a4, %pc_hi20(.LCPI199_0) + ori $a4, $a4, 4095 + fst.s $fa7, $sp, 24 # 4-byte Folded Spill bcnez $fcc0, .LBB199_8 # %bb.1: frsqrt.s $fa0, $fa0 fmul.s $fa1, $fs6, $fa0 fmul.s $fa5, $fs7, $fa0 fmul.s $fa6, $fs0, $fa0 - fmul.s $fa7, $ft9, $fa5 - fld.s $fa0, $a4, %pc_lo12(.LCPI199_0) - fmadd.s $fa7, $ft8, $fa1, $fa7 - fmadd.s $fa7, $ft10, $fa6, $fa7 + fmul.s $fa0, $ft9, $fa5 + fmadd.s $fa0, $ft8, $fa1, $fa0 + fmadd.s $fa7, $ft10, $fa6, $fa0 fmul.s $fs1, $fa7, $fa7 + movgr2fr.w $fa0, $a4 fcmp.cule.s $fcc0, $fa0, $fs1 # implicit-def: $f8 # kill: killed $f8 @@ -32473,25 +32406,25 @@ _ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_: # @_ZN15btSoftColl # kill: killed $f8 bcnez $fcc0, .LBB199_9 # %bb.2: - fmul.s $ft0, $fa1, $fa7 + fmul.s $fa1, $fa1, $fa7 fmul.s $fa5, $fa5, $fa7 fmul.s $fa6, $fa6, $fa7 - fsub.s $fs2, $ft8, $ft0 + fsub.s $fs2, $ft8, $fa1 fsub.s $fa0, $ft9, $fa5 - fsub.s $fa1, $ft10, $fa6 - fst.s $ft0, $sp, 52 # 4-byte Folded Spill - fsub.s $ft15, $ft12, $ft0 + fsub.s $ft15, $ft10, $fa6 + fst.s $fa1, $sp, 52 # 4-byte Folded Spill + fsub.s $fa1, $ft12, $fa1 fst.s $fa5, $sp, 60 # 4-byte Folded Spill fsub.s $fa5, $ft13, $fa5 fst.s $fa6, $sp, 56 # 4-byte Folded Spill fsub.s $fa7, $ft14, $fa6 fneg.s $fa6, $fa5 - fmul.s $fa6, $fa1, $fa6 + fmul.s $fa6, $ft15, $fa6 fmadd.s $fa6, $fa0, $fa7, $fa6 fneg.s $ft0, $fa7 fmul.s $ft0, $fs2, $ft0 - fmadd.s $ft0, $fa1, $ft15, $ft0 - fneg.s $ft1, $ft15 + fmadd.s $ft0, $ft15, $fa1, $ft0 + fneg.s $ft1, $fa1 fmul.s $ft1, $fa0, $ft1 fmadd.s $ft1, $fs2, $fa5, $ft1 fmul.s $ft0, $fs7, $ft0 @@ -32514,31 +32447,31 @@ _ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_: # @_ZN15btSoftColl fst.s $ft2, $sp, 16 # 4-byte Folded Spill fmov.s $ft2, $ft3 fneg.s $ft3, $ft1 - fmul.s $ft3, $ft15, $ft3 + fmul.s $ft3, $fa1, $ft3 fmadd.s $fa7, $fa7, $fa6, $ft3 fneg.s $ft3, $fa6 fmul.s $fa5, $fa5, $ft3 fmov.s $ft3, $ft2 - fmadd.s $fa5, $ft15, $ft0, $fa5 - fmul.s $fa7, $fs7, $fa7 - fld.s $ft2, $sp, 16 # 4-byte Folded Reload - fmadd.s $fa7, $ft2, $fs6, $fa7 + fmadd.s $fa1, $fa1, $ft0, $fa5 + fmul.s $fa5, $fs7, $fa7 + fld.s $fa7, $sp, 16 # 4-byte Folded Reload + fmadd.s $fa5, $fa7, $fs6, $fa5 fld.s $ft2, $sp, 20 # 4-byte Folded Reload - fmadd.s $fa5, $fa5, $fs0, $fa7 - fcmp.cule.s $fcc0, $fa5, $ft4 + fmadd.s $fa1, $fa1, $fs0, $fa5 + fcmp.cule.s $fcc0, $fa1, $ft4 bcnez $fcc0, .LBB199_5 # %bb.4: - fneg.s $fa5, $fa0 - fmul.s $fa5, $ft1, $fa5 - fmadd.s $fa5, $ft0, $fa1, $fa5 - fneg.s $fa1, $fa1 - fmul.s $fa1, $fa6, $fa1 - fmadd.s $fa1, $ft1, $fs2, $fa1 + fneg.s $fa1, $fa0 + fmul.s $fa1, $ft1, $fa1 + fmadd.s $fa1, $ft0, $ft15, $fa1 + fneg.s $fa5, $ft15 + fmul.s $fa5, $fa6, $fa5 + fmadd.s $fa5, $ft1, $fs2, $fa5 fneg.s $fa7, $fs2 fmul.s $fa7, $ft0, $fa7 fmadd.s $fa0, $fa6, $fa0, $fa7 - fmul.s $fa1, $fs7, $fa1 - fmadd.s $fa1, $fa5, $fs6, $fa1 + fmul.s $fa5, $fs7, $fa5 + fmadd.s $fa1, $fa1, $fs6, $fa5 fmadd.s $fa0, $fa0, $fs0, $fa1 fcmp.clt.s $fcc0, $ft4, $fa0 fmov.s $fa0, $fs1 @@ -32566,17 +32499,17 @@ _ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_: # @_ZN15btSoftColl fadd.s $ft4, $ft8, $fa1 fadd.s $ft1, $ft9, $fa5 fadd.s $ft15, $ft10, $fa0 - fld.s $fa0, $a4, %pc_lo12(.LCPI199_0) - fmul.s $fa1, $ft1, $ft1 - fmadd.s $fa1, $ft4, $ft4, $fa1 - fmadd.s $fa1, $ft15, $ft15, $fa1 + fmul.s $fa0, $ft1, $ft1 + fmadd.s $fa0, $ft4, $ft4, $fa0 + fmadd.s $fa1, $ft15, $ft15, $fa0 + movgr2fr.w $fa0, $a4 fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB199_17 # %bb.7: fmov.s $fa0, $fa1 b .LBB199_17 .LBB199_8: - fld.s $fa0, $a4, %pc_lo12(.LCPI199_0) + movgr2fr.w $fa0, $a4 # implicit-def: $f1 # kill: killed $f1 # implicit-def: $f1 @@ -32611,9 +32544,9 @@ _ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_: # @_ZN15btSoftColl fsub.s $fa5, $ft3, $fa2 fld.s $fa6, $sp, 40 # 4-byte Folded Reload fsub.s $fa6, $fa6, $fa3 - fld.s $fa7, $sp, 36 # 4-byte Folded Reload + fld.s $fa7, $sp, 32 # 4-byte Folded Reload fsub.s $fa7, $fa7, $fa1 - fld.s $ft0, $sp, 32 # 4-byte Folded Reload + fld.s $ft0, $sp, 28 # 4-byte Folded Reload fsub.s $ft0, $ft0, $fa2 fld.s $ft1, $sp, 24 # 4-byte Folded Reload fsub.s $ft1, $ft1, $fa3 @@ -32630,7 +32563,7 @@ _ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_: # @_ZN15btSoftColl fmadd.s $ft2, $ft2, $ft2, $ft3 fmadd.s $ft2, $ft4, $ft4, $ft2 fsqrt.s $ft2, $ft2 - fld.s $ft3, $sp, 28 # 4-byte Folded Reload + fld.s $ft3, $sp, 36 # 4-byte Folded Reload fsub.s $fa1, $ft3, $fa1 fld.s $ft3, $sp, 48 # 4-byte Folded Reload fsub.s $fa2, $ft3, $fa2 @@ -32736,7 +32669,7 @@ _ZN15btSoftColliders12CollideVF_SS7ProcessEPK10btDbvtNodeS3_: # @_ZN15btSoftColl bgtz $a1, .LBB199_24 b .LBB199_26 .LBB199_16: - fld.s $fa0, $a4, %pc_lo12(.LCPI199_0) + movgr2fr.w $fa0, $a4 # implicit-def: $f12 # implicit-def: $f9 # implicit-def: $f23 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBodyConcaveCollisionAlgorithm.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBodyConcaveCollisionAlgorithm.s index 80b38367..54dd59c0 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBodyConcaveCollisionAlgorithm.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBodyConcaveCollisionAlgorithm.s @@ -1050,10 +1050,6 @@ _ZN9btHashMapI9btHashKeyI10btTriIndexES1_E5clearEv: # @_ZN9btHashMapI9btHashKeyI .word 0x437f0000 # float 255 .word 0x00000000 # float 0 .word 0x00000000 # float 0 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI10_1: - .word 0x3d75c28f # float 0.0599999987 .text .globl _ZN26btSoftBodyTriangleCallback15processTriangleEP9btVector3ii .p2align 5 @@ -1404,45 +1400,46 @@ _ZN26btSoftBodyTriangleCallback15processTriangleEP9btVector3ii: # @_ZN26btSoftBo fmadd.s $ft2, $ft7, $ft7, $ft2 fmadd.s $ft2, $ft1, $ft1, $ft2 frsqrt.s $ft2, $ft2 - pcalau12i $a0, %pc_hi20(.LCPI10_1) - fld.s $ft4, $a0, %pc_lo12(.LCPI10_1) - fmul.s $ft5, $ft7, $ft2 + fmul.s $ft4, $ft7, $ft2 + fmul.s $ft3, $ft3, $ft2 + fmul.s $ft1, $ft1, $ft2 + lu12i.w $a0, 251740 + ori $a0, $a0, 655 + movgr2fr.w $ft2, $a0 + fmul.s $ft4, $ft4, $ft2 fmul.s $ft3, $ft3, $ft2 fmul.s $ft1, $ft1, $ft2 - fmul.s $ft2, $ft5, $ft4 - fmul.s $ft3, $ft3, $ft4 - fmul.s $ft1, $ft1, $ft4 - fadd.s $ft4, $fa6, $ft2 + fadd.s $ft2, $fa6, $ft4 fadd.s $ft5, $fa7, $ft3 fadd.s $ft6, $ft0, $ft1 - movfr2gr.s $a0, $ft4 + movfr2gr.s $a0, $ft2 movfr2gr.s $a1, $ft5 bstrins.d $a0, $a1, 63, 32 movfr2gr.s $a1, $ft6 bstrpick.d $a1, $a1, 31, 0 st.d $a0, $sp, 16 st.d $a1, $sp, 24 - fadd.s $ft4, $fa1, $ft2 + fadd.s $ft2, $fa1, $ft4 fadd.s $ft5, $fa4, $ft3 fadd.s $ft6, $fa5, $ft1 - movfr2gr.s $a0, $ft4 + movfr2gr.s $a0, $ft2 movfr2gr.s $a1, $ft5 bstrins.d $a0, $a1, 63, 32 movfr2gr.s $a1, $ft6 bstrpick.d $a1, $a1, 31, 0 st.d $a0, $sp, 32 st.d $a1, $sp, 40 - fadd.s $ft4, $fa0, $ft2 + fadd.s $ft2, $fa0, $ft4 fadd.s $ft5, $fa2, $ft3 fadd.s $ft6, $fa3, $ft1 - movfr2gr.s $a0, $ft4 + movfr2gr.s $a0, $ft2 movfr2gr.s $a1, $ft5 bstrins.d $a0, $a1, 63, 32 movfr2gr.s $a1, $ft6 bstrpick.d $a1, $a1, 31, 0 st.d $a0, $sp, 48 st.d $a1, $sp, 56 - fsub.s $fa6, $fa6, $ft2 + fsub.s $fa6, $fa6, $ft4 fsub.s $fa7, $fa7, $ft3 fsub.s $ft0, $ft0, $ft1 movfr2gr.s $a0, $fa6 @@ -1452,7 +1449,7 @@ _ZN26btSoftBodyTriangleCallback15processTriangleEP9btVector3ii: # @_ZN26btSoftBo bstrpick.d $a1, $a1, 31, 0 st.d $a0, $sp, 64 st.d $a1, $sp, 72 - fsub.s $fa1, $fa1, $ft2 + fsub.s $fa1, $fa1, $ft4 fsub.s $fa4, $fa4, $ft3 fsub.s $fa5, $fa5, $ft1 movfr2gr.s $a0, $fa1 @@ -1462,7 +1459,7 @@ _ZN26btSoftBodyTriangleCallback15processTriangleEP9btVector3ii: # @_ZN26btSoftBo bstrpick.d $a1, $a1, 31, 0 st.d $a0, $sp, 80 st.d $a1, $sp, 88 - fsub.s $fa0, $fa0, $ft2 + fsub.s $fa0, $fa0, $ft4 fsub.s $fa1, $fa2, $ft3 fsub.s $fa2, $fa3, $ft1 movfr2gr.s $a0, $fa0 @@ -1869,12 +1866,8 @@ _ZN9btHashMapI9btHashKeyI10btTriIndexES1_E6insertERKS2_RKS1_: # @_ZN9btHashMapI9 .size _ZN9btHashMapI9btHashKeyI10btTriIndexES1_E6insertERKS2_RKS1_, .Lfunc_end11-_ZN9btHashMapI9btHashKeyI10btTriIndexES1_E6insertERKS2_RKS1_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN26btSoftBodyTriangleCallback22setTimeStepAndCountersEfRK16btDispatcherInfoP16btManifoldResult -.LCPI12_0: - .word 0x3d75c28f # float 0.0599999987 .text - .globl _ZN26btSoftBodyTriangleCallback22setTimeStepAndCountersEfRK16btDispatcherInfoP16btManifoldResult + .globl _ZN26btSoftBodyTriangleCallback22setTimeStepAndCountersEfRK16btDispatcherInfoP16btManifoldResult # -- Begin function _ZN26btSoftBodyTriangleCallback22setTimeStepAndCountersEfRK16btDispatcherInfoP16btManifoldResult .p2align 5 .type _ZN26btSoftBodyTriangleCallback22setTimeStepAndCountersEfRK16btDispatcherInfoP16btManifoldResult,@function _ZN26btSoftBodyTriangleCallback22setTimeStepAndCountersEfRK16btDispatcherInfoP16btManifoldResult: # @_ZN26btSoftBodyTriangleCallback22setTimeStepAndCountersEfRK16btDispatcherInfoP16btManifoldResult @@ -1895,10 +1888,11 @@ _ZN26btSoftBodyTriangleCallback22setTimeStepAndCountersEfRK16btDispatcherInfoP16 .cfi_offset 58, -40 .cfi_offset 59, -48 move $fp, $a0 - pcalau12i $a3, %pc_hi20(.LCPI12_0) + st.d $a1, $a0, 72 ld.d $a0, $a0, 8 - fld.s $fa1, $a3, %pc_lo12(.LCPI12_0) - st.d $a1, $fp, 72 + lu12i.w $a1, 251740 + ori $a1, $a1, 655 + movgr2fr.w $fa1, $a1 ld.d $a1, $a0, 0 fadd.s $fa0, $fa0, $fa1 fst.s $fa0, $fp, 80 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBodyHelpers.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBodyHelpers.s index e8ef42c3..df41c18e 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBodyHelpers.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSoftBodyHelpers.s @@ -11,48 +11,36 @@ .word 0x00000000 # float 0 .word 0x00000000 # float 0 .word 0x00000000 # float 0 -.LCPI0_4: +.LCPI0_2: .word 0x00000000 # float 0 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 .word 0x00000000 # float 0 -.LCPI0_5: +.LCPI0_3: .word 0x00000000 # float 0 .word 0x00000000 # float 0 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 -.LCPI0_6: +.LCPI0_4: .word 0x3f800000 # float 1 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 .word 0x00000000 # float 0 -.LCPI0_7: +.LCPI0_5: .word 0x00000000 # float 0 .word 0x3f333333 # float 0.699999988 .word 0x00000000 # float 0 .word 0x00000000 # float 0 -.LCPI0_11: +.LCPI0_6: .word 0x3f333333 # float 0.699999988 .word 0x3f333333 # float 0.699999988 .word 0x3f333333 # float 0.699999988 .word 0x00000000 # float 0 -.LCPI0_12: +.LCPI0_7: .word 0x00000000 # float 0 .word 0x3f800000 # float 1 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI0_2: - .word 0xbdcccccd # float -0.100000001 -.LCPI0_3: - .word 0x3dcccccd # float 0.100000001 -.LCPI0_8: - .word 0x3eaaaaab # float 0.333333343 -.LCPI0_9: - .word 0x3f4ccccd # float 0.800000011 -.LCPI0_10: - .word 0x30000000 # float 4.65661287E-10 .text .globl _ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi .p2align 5 @@ -118,25 +106,28 @@ _ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi: # @_ZN17btSoftBodyHel vst $vr0, $sp, 352 st.d $a2, $sp, 56 # 8-byte Folded Spill andi $a0, $a2, 1 - pcalau12i $a1, %pc_hi20(.LCPI0_4) + pcalau12i $a1, %pc_hi20(.LCPI0_2) st.d $a1, $sp, 96 # 8-byte Folded Spill - pcalau12i $s5, %pc_hi20(.LCPI0_5) + pcalau12i $s5, %pc_hi20(.LCPI0_3) beqz $a0, .LBB0_6 # %bb.1: # %.preheader2058 ld.w $a0, $s0, 820 blez $a0, .LBB0_6 # %bb.2: # %.lr.ph - pcalau12i $a1, %pc_hi20(.LCPI0_2) - fld.s $fs0, $a1, %pc_lo12(.LCPI0_2) - pcalau12i $a1, %pc_hi20(.LCPI0_3) - fld.s $fs1, $a1, %pc_lo12(.LCPI0_3) - ld.d $a1, $sp, 96 # 8-byte Folded Reload - vld $vr0, $a1, %pc_lo12(.LCPI0_4) - vst $vr0, $sp, 128 # 16-byte Folded Spill - vld $vr0, $s5, %pc_lo12(.LCPI0_5) - vst $vr0, $sp, 64 # 16-byte Folded Spill move $s2, $zero move $s3, $zero + lu12i.w $a1, -271156 + ori $a1, $a1, 3277 + lu32i.d $a1, 0 + movgr2fr.w $fs0, $a1 + lu12i.w $a1, 253132 + ld.d $a2, $sp, 96 # 8-byte Folded Reload + vld $vr0, $a2, %pc_lo12(.LCPI0_2) + vst $vr0, $sp, 128 # 16-byte Folded Spill + vld $vr0, $s5, %pc_lo12(.LCPI0_3) + vst $vr0, $sp, 64 # 16-byte Folded Spill + ori $a1, $a1, 3277 + movgr2fr.w $fs1, $a1 movgr2fr.w $fs2, $zero b .LBB0_4 .p2align 4, , 16 @@ -377,7 +368,7 @@ _ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi: # @_ZN17btSoftBodyHel .LBB0_18: # %.loopexit2055 ld.d $a0, $sp, 56 # 8-byte Folded Reload andi $a0, $a0, 32 - pcalau12i $s1, %pc_hi20(.LCPI0_6) + pcalau12i $s1, %pc_hi20(.LCPI0_4) beqz $a0, .LBB0_23 # %bb.19: pcalau12i $a0, %pc_hi20(_ZGVZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawiE4axis) @@ -391,7 +382,7 @@ _ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi: # @_ZN17btSoftBodyHel ori $s2, $zero, 2 pcalau12i $a0, %pc_hi20(_ZZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawiE4axis) addi.d $s4, $a0, %pc_lo12(_ZZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawiE4axis) - vld $vr0, $s1, %pc_lo12(.LCPI0_6) + vld $vr0, $s1, %pc_lo12(.LCPI0_4) vst $vr0, $sp, 64 # 16-byte Folded Spill move $s6, $zero move $s7, $zero @@ -589,7 +580,7 @@ _ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi: # @_ZN17btSoftBodyHel move $s2, $zero move $s4, $zero ld.d $a0, $sp, 96 # 8-byte Folded Reload - vld $vr0, $a0, %pc_lo12(.LCPI0_4) + vld $vr0, $a0, %pc_lo12(.LCPI0_2) vst $vr0, $sp, 64 # 16-byte Folded Spill movgr2fr.w $fs0, $zero .p2align 4, , 16 @@ -789,18 +780,20 @@ _ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi: # @_ZN17btSoftBodyHel ld.d $s0, $sp, 128 # 8-byte Folded Reload beqz $a0, .LBB0_39 # %bb.34: - pcalau12i $a0, %pc_hi20(.LCPI0_7) - vld $vr0, $a0, %pc_lo12(.LCPI0_7) + pcalau12i $a0, %pc_hi20(.LCPI0_5) + vld $vr0, $a0, %pc_lo12(.LCPI0_5) ld.w $a0, $s0, 884 vst $vr0, $sp, 224 blez $a0, .LBB0_39 # %bb.35: # %.lr.ph2123 - pcalau12i $a1, %pc_hi20(.LCPI0_8) - fld.s $fs0, $a1, %pc_lo12(.LCPI0_8) - pcalau12i $a1, %pc_hi20(.LCPI0_9) - fld.s $fs1, $a1, %pc_lo12(.LCPI0_9) move $s2, $zero move $s3, $zero + lu12i.w $a1, 256682 + ori $a1, $a1, 2731 + movgr2fr.w $fs0, $a1 + lu12i.w $a1, 259276 + ori $a1, $a1, 3277 + movgr2fr.w $fs1, $a1 b .LBB0_37 .p2align 4, , 16 .LBB0_36: # in Loop: Header=BB0_37 Depth=1 @@ -906,8 +899,8 @@ _ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi: # @_ZN17btSoftBodyHel blez $a0, .LBB0_82 # %bb.41: # %.lr.ph2132 move $s6, $zero - pcalau12i $a0, %pc_hi20(.LCPI0_10) - fld.s $fs0, $a0, %pc_lo12(.LCPI0_10) + lu12i.w $a0, 196608 + movgr2fr.w $fs0, $a0 ori $a0, $zero, 16 ori $s7, $zero, 1 lu32i.d $a0, 201327 @@ -996,7 +989,7 @@ _ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi: # @_ZN17btSoftBodyHel st.d $a0, $sp, 224 movfr2gr.s $a0, $fa2 ld.d $a2, $sp, 96 # 8-byte Folded Reload - vld $vr0, $a2, %pc_lo12(.LCPI0_4) + vld $vr0, $a2, %pc_lo12(.LCPI0_2) ld.d $a4, $a1, 40 bstrpick.d $a0, $a0, 31, 0 st.d $a0, $sp, 232 @@ -1038,7 +1031,7 @@ _ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi: # @_ZN17btSoftBodyHel ld.d $a1, $fp, 0 st.d $a0, $sp, 224 movfr2gr.s $a0, $fa2 - vld $vr0, $s5, %pc_lo12(.LCPI0_5) + vld $vr0, $s5, %pc_lo12(.LCPI0_3) ld.d $a4, $a1, 40 bstrpick.d $a0, $a0, 31, 0 st.d $a0, $sp, 232 @@ -1356,16 +1349,17 @@ _ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi: # @_ZN17btSoftBodyHel andi $a0, $a1, 8 beqz $a0, .LBB0_89 # %bb.83: - pcalau12i $a0, %pc_hi20(.LCPI0_11) - vld $vr0, $a0, %pc_lo12(.LCPI0_11) + pcalau12i $a0, %pc_hi20(.LCPI0_6) + vld $vr0, $a0, %pc_lo12(.LCPI0_6) ld.w $a0, $s0, 916 vst $vr0, $sp, 224 blez $a0, .LBB0_88 # %bb.84: # %.lr.ph2135 - pcalau12i $a1, %pc_hi20(.LCPI0_9) - fld.s $fs0, $a1, %pc_lo12(.LCPI0_9) move $s2, $zero move $s4, $zero + lu12i.w $a1, 259276 + ori $a1, $a1, 3277 + movgr2fr.w $fs0, $a1 b .LBB0_86 .p2align 4, , 16 .LBB0_85: # in Loop: Header=BB0_86 Depth=1 @@ -1605,7 +1599,7 @@ _ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi: # @_ZN17btSoftBodyHel bnez $a0, .LBB0_101 .LBB0_98: slli.d $a0, $s1, 52 - pcalau12i $s0, %pc_hi20(.LCPI0_12) + pcalau12i $s0, %pc_hi20(.LCPI0_7) vld $vr1, $sp, 112 # 16-byte Folded Reload bltz $a0, .LBB0_102 .LBB0_99: @@ -1632,7 +1626,7 @@ _ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi: # @_ZN17btSoftBodyHel beqz $a0, .LBB0_98 .LBB0_101: ld.d $a0, $sp, 96 # 8-byte Folded Reload - vld $vr0, $a0, %pc_lo12(.LCPI0_4) + vld $vr0, $a0, %pc_lo12(.LCPI0_2) ld.d $a1, $s0, 1208 vst $vr0, $sp, 224 vld $vr0, $sp, 112 # 16-byte Folded Reload @@ -1646,11 +1640,11 @@ _ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi: # @_ZN17btSoftBodyHel pcaddu18i $ra, %call36(_ZL8drawTreeP12btIDebugDrawPK10btDbvtNodeiRK9btVector3S6_ii) jirl $ra, $ra, 0 slli.d $a0, $s1, 52 - pcalau12i $s0, %pc_hi20(.LCPI0_12) + pcalau12i $s0, %pc_hi20(.LCPI0_7) vld $vr1, $sp, 112 # 16-byte Folded Reload bgez $a0, .LBB0_99 .LBB0_102: - vld $vr0, $s0, %pc_lo12(.LCPI0_12) + vld $vr0, $s0, %pc_lo12(.LCPI0_7) ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a1, $a0, 1272 vst $vr0, $sp, 224 @@ -1678,9 +1672,9 @@ _ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi: # @_ZN17btSoftBodyHel move $s6, $zero move $s7, $zero ld.d $a0, $sp, 32 # 8-byte Folded Reload - vld $vr0, $a0, %pc_lo12(.LCPI0_6) + vld $vr0, $a0, %pc_lo12(.LCPI0_4) vst $vr0, $sp, 112 # 16-byte Folded Spill - vld $vr0, $s0, %pc_lo12(.LCPI0_12) + vld $vr0, $s0, %pc_lo12(.LCPI0_7) vst $vr0, $sp, 96 # 16-byte Folded Spill movgr2fr.w $fa0, $zero fst.s $fa0, $sp, 28 # 4-byte Folded Spill @@ -3772,12 +3766,6 @@ _ZL8drawTreeP12btIDebugDrawPK10btDbvtNodeiRK9btVector3S6_ii: # @_ZL8drawTreeP12b .word 0x00000000 # float 0 .word 0x3f800000 # float 1 .word 0x00000000 # float 0 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI11_3: - .word 0xbdcccccd # float -0.100000001 -.LCPI11_4: - .word 0x3dcccccd # float 0.100000001 .text .globl _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw .p2align 5 @@ -3841,36 +3829,36 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod fmul.s $fa6, $fa1, $fa2 fld.s $fa7, $fp, 692 fmadd.s $fa6, $fa0, $fa3, $fa6 - fmadd.s $fs5, $fa4, $fa5, $fa6 + fmadd.s $fs3, $fa4, $fa5, $fa6 fld.s $fa6, $fp, 676 fmul.s $ft0, $fa2, $fa7 fld.s $ft1, $fp, 708 fld.s $ft2, $fp, 696 fld.s $ft3, $fp, 680 fmadd.s $ft0, $fa6, $fa3, $ft0 - fmadd.s $fs6, $ft1, $fa5, $ft0 + fmadd.s $fs5, $ft1, $fa5, $ft0 fmul.s $fa2, $fa2, $ft2 fmadd.s $fa2, $ft3, $fa3, $fa2 fld.s $fa3, $fp, 712 fld.s $ft0, $fp, 644 fld.s $ft4, $fp, 640 fld.s $ft5, $fp, 648 - fmadd.s $fs4, $fa3, $fa5, $fa2 + fmadd.s $fs1, $fa3, $fa5, $fa2 fmul.s $fa2, $fa1, $ft0 fmadd.s $fa2, $fa0, $ft4, $fa2 - fmadd.s $fs2, $fa4, $ft5, $fa2 + fmadd.s $fs0, $fa4, $ft5, $fa2 fmul.s $fa2, $fa7, $ft0 fmadd.s $fa2, $fa6, $ft4, $fa2 - fmadd.s $fs7, $ft1, $ft5, $fa2 + fmadd.s $fs6, $ft1, $ft5, $fa2 fmul.s $fa2, $ft2, $ft0 fmadd.s $fa2, $ft3, $ft4, $fa2 fld.s $fa5, $fp, 660 fld.s $ft0, $fp, 656 fld.s $ft4, $fp, 664 - fmadd.s $ft5, $fa3, $ft5, $fa2 + fmadd.s $fs7, $fa3, $ft5, $fa2 fmul.s $fa1, $fa1, $fa5 fmadd.s $fa0, $fa0, $ft0, $fa1 - fmadd.s $ft6, $fa4, $ft4, $fa0 + fmadd.s $ft5, $fa4, $ft4, $fa0 fmul.s $fa0, $fa7, $fa5 fmadd.s $fa0, $fa6, $ft0, $fa0 fmadd.s $ft1, $ft1, $ft4, $fa0 @@ -3878,14 +3866,14 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod fmadd.s $fa0, $ft3, $ft0, $fa0 fmadd.s $ft2, $fa3, $ft4, $fa0 movgr2fr.w $ft3, $zero - fmul.s $fa0, $fs6, $ft3 - fadd.s $fa1, $fs5, $fa0 - fmadd.s $fa1, $fs4, $ft3, $fa1 - fmul.s $fa2, $fs7, $ft3 - fadd.s $fa3, $fs2, $fa2 - fmadd.s $fa3, $ft5, $ft3, $fa3 + fmul.s $fa0, $fs5, $ft3 + fadd.s $fa1, $fs3, $fa0 + fmadd.s $fa1, $fs1, $ft3, $fa1 + fmul.s $fa2, $fs6, $ft3 + fadd.s $fa3, $fs0, $fa2 + fmadd.s $fa3, $fs7, $ft3, $fa3 fmul.s $fa4, $ft1, $ft3 - fadd.s $fa5, $ft6, $fa4 + fadd.s $fa5, $ft5, $fa4 fmadd.s $fa5, $ft2, $ft3, $fa5 fmul.s $fa6, $fa3, $fa3 fmadd.s $fa6, $fa1, $fa1, $fa6 @@ -3894,40 +3882,40 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod fmul.s $fa1, $fa1, $fa6 fmul.s $fa3, $fa3, $fa6 fmul.s $fa5, $fa5, $fa6 - fmadd.s $fa6, $fs5, $ft3, $fs6 - fmadd.s $fa6, $fs4, $ft3, $fa6 - fmadd.s $fa7, $fs2, $ft3, $fs7 - fmadd.s $fa7, $ft5, $ft3, $fa7 - fst.s $ft1, $sp, 52 # 4-byte Folded Spill - fmadd.s $ft0, $ft6, $ft3, $ft1 + fmadd.s $fa6, $fs3, $ft3, $fs5 + fmadd.s $fa6, $fs1, $ft3, $fa6 + fmadd.s $fa7, $fs0, $ft3, $fs6 + fmadd.s $fa7, $fs7, $ft3, $fa7 + fst.s $ft1, $sp, 56 # 4-byte Folded Spill + fmadd.s $ft0, $ft5, $ft3, $ft1 fmadd.s $ft0, $ft2, $ft3, $ft0 fmul.s $ft1, $fa7, $fa7 fmadd.s $ft1, $fa6, $fa6, $ft1 fmadd.s $ft1, $ft0, $ft0, $ft1 frsqrt.s $ft1, $ft1 - fmul.s $fs1, $fa6, $ft1 - fmul.s $fs0, $fa7, $ft1 - fmul.s $fs3, $ft0, $ft1 - fmadd.s $fa0, $fs5, $ft3, $fa0 - fadd.s $fa0, $fa0, $fs4 - fmadd.s $fa2, $fs2, $ft3, $fa2 + fmul.s $fs2, $fa6, $ft1 + fmul.s $fs4, $fa7, $ft1 + fmul.s $fa6, $ft0, $ft1 + fst.s $fa6, $sp, 12 # 4-byte Folded Spill + fmadd.s $fa0, $fs3, $ft3, $fa0 + fadd.s $fa0, $fa0, $fs1 + fmadd.s $fa2, $fs0, $ft3, $fa2 + fadd.s $fa2, $fs7, $fa2 fst.s $ft5, $sp, 60 # 4-byte Folded Spill - fadd.s $fa2, $ft5, $fa2 - fst.s $ft6, $sp, 56 # 4-byte Folded Spill - fst.s $ft3, $sp, 44 # 4-byte Folded Spill - fmadd.s $fa4, $ft6, $ft3, $fa4 - fst.s $ft2, $sp, 48 # 4-byte Folded Spill + fst.s $ft3, $sp, 48 # 4-byte Folded Spill + fmadd.s $fa4, $ft5, $ft3, $fa4 + fst.s $ft2, $sp, 52 # 4-byte Folded Spill fadd.s $fa4, $ft2, $fa4 fmul.s $fa6, $fa2, $fa2 fmadd.s $fa6, $fa0, $fa0, $fa6 fmadd.s $fa6, $fa4, $fa4, $fa6 frsqrt.s $fa6, $fa6 fmul.s $fa0, $fa0, $fa6 - fst.s $fa0, $sp, 28 # 4-byte Folded Spill - fmul.s $fa0, $fa2, $fa6 fst.s $fa0, $sp, 20 # 4-byte Folded Spill + fmul.s $fa0, $fa2, $fa6 + fst.s $fa0, $sp, 16 # 4-byte Folded Spill fmul.s $fa0, $fa4, $fa6 - fst.s $fa0, $sp, 36 # 4-byte Folded Spill + fst.s $fa0, $sp, 24 # 4-byte Folded Spill fld.s $fa0, $sp, 80 vldi $vr4, -1244 fmul.s $fa1, $fa1, $fa4 @@ -3957,14 +3945,15 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod jirl $ra, $a4, 0 fld.s $fa0, $sp, 80 vldi $vr3, -1244 - fmul.s $fa1, $fs1, $fa3 + fmul.s $fa1, $fs2, $fa3 fld.s $fa2, $sp, 84 fadd.s $fa0, $fa1, $fa0 fld.s $fa1, $sp, 88 - fmul.s $fa3, $fs0, $fa3 + fmul.s $fa3, $fs4, $fa3 vldi $vr4, -1244 fadd.s $fa2, $fa3, $fa2 - fmul.s $fa3, $fs3, $fa4 + fld.s $fa3, $sp, 12 # 4-byte Folded Reload + fmul.s $fa3, $fa3, $fa4 fadd.s $fa1, $fa3, $fa1 movfr2gr.s $a0, $fa0 movfr2gr.s $a1, $fa2 @@ -3983,16 +3972,16 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod addi.d $a3, $sp, 96 move $a0, $s0 jirl $ra, $a4, 0 - fld.s $fa0, $sp, 28 # 4-byte Folded Reload + fld.s $fa0, $sp, 20 # 4-byte Folded Reload vldi $vr2, -1244 fmul.s $fa0, $fa0, $fa2 - fld.s $fa1, $sp, 20 # 4-byte Folded Reload + fld.s $fa1, $sp, 16 # 4-byte Folded Reload fmul.s $fa1, $fa1, $fa2 vldi $vr6, -1244 fld.s $fa2, $sp, 80 fld.s $fa3, $sp, 84 fld.s $fa4, $sp, 88 - fld.s $fa5, $sp, 36 # 4-byte Folded Reload + fld.s $fa5, $sp, 24 # 4-byte Folded Reload fmul.s $fa5, $fa5, $fa6 fadd.s $fa0, $fa0, $fa2 fadd.s $fa1, $fa1, $fa3 @@ -4017,18 +4006,18 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod ld.w $a0, $fp, 548 blez $a0, .LBB11_4 # %bb.2: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI11_3) - fld.s $fa0, $a0, %pc_lo12(.LCPI11_3) - fst.s $fa0, $sp, 36 # 4-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI11_4) - fld.s $fa0, $a0, %pc_lo12(.LCPI11_4) - fst.s $fa0, $sp, 28 # 4-byte Folded Spill move $s1, $zero move $s2, $zero - lu12i.w $s3, 260096 - fst.s $fs2, $sp, 40 # 4-byte Folded Spill - fst.s $fs4, $sp, 32 # 4-byte Folded Spill - fst.s $fs6, $sp, 24 # 4-byte Folded Spill + lu12i.w $a0, -271156 + ori $s4, $a0, 3277 + lu32i.d $s4, 0 + lu12i.w $a0, 253132 + ori $a0, $a0, 3277 + st.d $a0, $sp, 24 # 8-byte Folded Spill + fst.s $fs3, $sp, 44 # 4-byte Folded Spill + fst.s $fs6, $sp, 40 # 4-byte Folded Spill + fst.s $fs0, $sp, 36 # 4-byte Folded Spill + fst.s $fs1, $sp, 32 # 4-byte Folded Spill .p2align 4, , 16 .LBB11_3: # =>This Inner Loop Header: Depth=1 ld.d $a0, $fp, 560 @@ -4036,97 +4025,99 @@ _ZN17btSoftBodyHelpers9DrawFrameEP10btSoftBodyP12btIDebugDraw: # @_ZN17btSoftBod fld.s $fa0, $a1, 4 fldx.s $fa1, $a0, $s1 fld.s $fa2, $a1, 8 - fmul.s $fa3, $fs6, $fa0 - fmadd.s $fa3, $fs5, $fa1, $fa3 - fmadd.s $fa3, $fs4, $fa2, $fa3 - fmul.s $fa4, $fs7, $fa0 - fmadd.s $fa4, $fs2, $fa1, $fa4 - fld.s $fa5, $sp, 60 # 4-byte Folded Reload - fmadd.s $fa4, $fa5, $fa2, $fa4 - fld.s $fa5, $sp, 52 # 4-byte Folded Reload - fmul.s $fa0, $fa5, $fa0 + fmul.s $fa3, $fs5, $fa0 + fmadd.s $fa3, $fs3, $fa1, $fa3 + fmadd.s $fa3, $fs1, $fa2, $fa3 + fmul.s $fa4, $fs6, $fa0 + fmadd.s $fa4, $fs0, $fa1, $fa4 + fmadd.s $fa4, $fs7, $fa2, $fa4 fld.s $fa5, $sp, 56 # 4-byte Folded Reload + fmul.s $fa0, $fa5, $fa0 + fld.s $fa5, $sp, 60 # 4-byte Folded Reload fmadd.s $fa0, $fa5, $fa1, $fa0 fld.s $fa1, $sp, 80 fld.s $fa5, $sp, 84 fld.s $fa6, $sp, 88 - fld.s $fa7, $sp, 48 # 4-byte Folded Reload + fld.s $fa7, $sp, 52 # 4-byte Folded Reload fmadd.s $fa0, $fa7, $fa2, $fa0 - fmov.s $fs2, $fs7 + fmov.s $fs6, $fs7 fadd.s $fs7, $fa1, $fa3 - fmov.s $fs6, $fs5 + fmov.s $fs1, $fs5 fadd.s $fs5, $fa5, $fa4 - fadd.s $fs3, $fa0, $fa6 - movfr2gr.s $s4, $fs7 - movfr2gr.s $s5, $fs5 - movfr2gr.s $a0, $fs3 - bstrpick.d $s7, $a0, 31, 0 - vreplgr2vr.d $vr0, $s3 + fadd.s $fs4, $fa0, $fa6 + movfr2gr.s $s6, $fs7 + movfr2gr.s $s7, $fs5 + movfr2gr.s $a0, $fs4 + bstrpick.d $s3, $a0, 31, 0 + lu12i.w $a0, 260096 + vreplgr2vr.d $vr0, $a0 vst $vr0, $sp, 64 - fld.s $fs0, $sp, 36 # 4-byte Folded Reload - fadd.s $fa0, $fs7, $fs0 + movgr2fr.w $fs3, $s4 + fadd.s $fa0, $fs7, $fs3 movfr2gr.s $a0, $fa0 - bstrins.d $a0, $s5, 63, 32 + bstrins.d $a0, $s7, 63, 32 st.d $a0, $sp, 120 - st.d $s7, $sp, 128 - fld.s $fs1, $sp, 28 # 4-byte Folded Reload - fadd.s $fa0, $fs7, $fs1 - fld.s $fs4, $sp, 44 # 4-byte Folded Reload - fadd.s $fa1, $fs5, $fs4 - fadd.s $fa2, $fs3, $fs4 + st.d $s3, $sp, 128 + ld.d $a0, $sp, 24 # 8-byte Folded Reload + movgr2fr.w $fs2, $a0 + fadd.s $fa0, $fs7, $fs2 + fld.s $fs0, $sp, 48 # 4-byte Folded Reload + fadd.s $fa1, $fs5, $fs0 + fadd.s $fa2, $fs4, $fs0 movfr2gr.s $a0, $fa0 ld.d $a1, $s0, 0 - movfr2gr.s $s6, $fa1 - bstrins.d $a0, $s6, 63, 32 + movfr2gr.s $s8, $fa1 + bstrins.d $a0, $s8, 63, 32 st.d $a0, $sp, 96 ld.d $a4, $a1, 40 movfr2gr.s $a0, $fa2 - bstrpick.d $s8, $a0, 31, 0 - st.d $s8, $sp, 104 + bstrpick.d $s5, $a0, 31, 0 + st.d $s5, $sp, 104 addi.d $a1, $sp, 120 addi.d $a2, $sp, 96 addi.d $a3, $sp, 64 move $a0, $s0 jirl $ra, $a4, 0 - fadd.s $fa0, $fs5, $fs0 + fadd.s $fa0, $fs5, $fs3 movfr2gr.s $a0, $fa0 - move $a1, $s4 + move $a1, $s6 bstrins.d $a1, $a0, 63, 32 st.d $a1, $sp, 120 - st.d $s7, $sp, 128 - fadd.s $fa0, $fs7, $fs4 - fmov.s $fs7, $fs2 + st.d $s3, $sp, 128 + fadd.s $fa0, $fs7, $fs0 + fmov.s $fs7, $fs6 ld.d $a0, $s0, 0 - fadd.s $fa1, $fs5, $fs1 - fmov.s $fs5, $fs6 - fld.s $fs6, $sp, 24 # 4-byte Folded Reload - fld.s $fs4, $sp, 32 # 4-byte Folded Reload - fld.s $fs2, $sp, 40 # 4-byte Folded Reload - movfr2gr.s $s7, $fa0 + fadd.s $fa1, $fs5, $fs2 + fmov.s $fs5, $fs1 + fld.s $fs1, $sp, 32 # 4-byte Folded Reload + fld.s $fs0, $sp, 36 # 4-byte Folded Reload + fld.s $fs6, $sp, 40 # 4-byte Folded Reload + movfr2gr.s $s3, $fa0 movfr2gr.s $a1, $fa1 - move $a2, $s7 + move $a2, $s3 ld.d $a4, $a0, 40 bstrins.d $a2, $a1, 63, 32 st.d $a2, $sp, 96 - st.d $s8, $sp, 104 + st.d $s5, $sp, 104 addi.d $a1, $sp, 120 addi.d $a2, $sp, 96 addi.d $a3, $sp, 64 move $a0, $s0 jirl $ra, $a4, 0 - fadd.s $fa0, $fs3, $fs0 - bstrins.d $s4, $s5, 63, 32 + fadd.s $fa0, $fs4, $fs3 + fld.s $fs3, $sp, 44 # 4-byte Folded Reload + bstrins.d $s6, $s7, 63, 32 movfr2gr.s $a0, $fa0 bstrpick.d $a0, $a0, 31, 0 - st.d $s4, $sp, 120 + st.d $s6, $sp, 120 st.d $a0, $sp, 128 ld.d $a0, $s0, 0 - fadd.s $fa0, $fs3, $fs1 - bstrins.d $s7, $s6, 63, 32 + fadd.s $fa0, $fs4, $fs2 + bstrins.d $s3, $s8, 63, 32 movfr2gr.s $a1, $fa0 ld.d $a4, $a0, 40 bstrpick.d $a0, $a1, 31, 0 - st.d $s7, $sp, 96 + st.d $s3, $sp, 96 st.d $a0, $sp, 104 addi.d $a1, $sp, 120 addi.d $a2, $sp, 96 @@ -5701,12 +5692,8 @@ _ZN17btSoftBodyHelpers11CalculateUVEiiiii: # @_ZN17btSoftBodyHelpers11CalculateU .word .LBB15_3-.LJTI15_0 .word .LBB15_4-.LJTI15_0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN17btSoftBodyHelpers15CreateEllipsoidER19btSoftBodyWorldInfoRK9btVector3S4_i -.LCPI16_0: - .word 0x40490fdb # float 3.14159274 .text - .globl _ZN17btSoftBodyHelpers15CreateEllipsoidER19btSoftBodyWorldInfoRK9btVector3S4_i + .globl _ZN17btSoftBodyHelpers15CreateEllipsoidER19btSoftBodyWorldInfoRK9btVector3S4_i # -- Begin function _ZN17btSoftBodyHelpers15CreateEllipsoidER19btSoftBodyWorldInfoRK9btVector3S4_i .p2align 5 .type _ZN17btSoftBodyHelpers15CreateEllipsoidER19btSoftBodyWorldInfoRK9btVector3S4_i,@function _ZN17btSoftBodyHelpers15CreateEllipsoidER19btSoftBodyWorldInfoRK9btVector3S4_i: # @_ZN17btSoftBodyHelpers15CreateEllipsoidER19btSoftBodyWorldInfoRK9btVector3S4_i @@ -5763,8 +5750,9 @@ _ZN17btSoftBodyHelpers15CreateEllipsoidER19btSoftBodyWorldInfoRK9btVector3S4_i: move $s4, $zero movgr2fr.d $fa0, $s5 ffint.s.l $fs2, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI16_0) - fld.s $fs3, $a0, %pc_lo12(.LCPI16_0) + lu12i.w $a0, 263312 + ori $a0, $a0, 4059 + movgr2fr.w $fs3, $a0 movgr2fr.w $fs4, $zero vldi $vr3, -1184 ori $s5, $zero, 2 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSolve2LinearConstraint.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSolve2LinearConstraint.s index 0a8ffe9e..1648ea7f 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSolve2LinearConstraint.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSolve2LinearConstraint.s @@ -1,10 +1,6 @@ .file "btSolve2LinearConstraint.cpp" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN24btSolve2LinearConstraint31resolveUnilateralPairConstraintEP11btRigidBodyS1_RK11btMatrix3x3S4_RK9btVector3fS7_S7_S7_S7_fS7_S7_S7_fS7_S7_S7_fS7_RfS8_ -.LCPI0_0: - .word 0x34000000 # float 1.1920929E-7 .text - .globl _ZN24btSolve2LinearConstraint31resolveUnilateralPairConstraintEP11btRigidBodyS1_RK11btMatrix3x3S4_RK9btVector3fS7_S7_S7_S7_fS7_S7_S7_fS7_S7_S7_fS7_RfS8_ + .globl _ZN24btSolve2LinearConstraint31resolveUnilateralPairConstraintEP11btRigidBodyS1_RK11btMatrix3x3S4_RK9btVector3fS7_S7_S7_S7_fS7_S7_S7_fS7_S7_S7_fS7_RfS8_ # -- Begin function _ZN24btSolve2LinearConstraint31resolveUnilateralPairConstraintEP11btRigidBodyS1_RK11btMatrix3x3S4_RK9btVector3fS7_S7_S7_S7_fS7_S7_S7_fS7_S7_S7_fS7_RfS8_ .p2align 5 .type _ZN24btSolve2LinearConstraint31resolveUnilateralPairConstraintEP11btRigidBodyS1_RK11btMatrix3x3S4_RK9btVector3fS7_S7_S7_S7_fS7_S7_S7_fS7_S7_S7_fS7_RfS8_,@function _ZN24btSolve2LinearConstraint31resolveUnilateralPairConstraintEP11btRigidBodyS1_RK11btMatrix3x3S4_RK9btVector3fS7_S7_S7_S7_fS7_S7_S7_fS7_S7_S7_fS7_RfS8_: # @_ZN24btSolve2LinearConstraint31resolveUnilateralPairConstraintEP11btRigidBodyS1_RK11btMatrix3x3S4_RK9btVector3fS7_S7_S7_S7_fS7_S7_S7_fS7_S7_S7_fS7_RfS8_ @@ -55,11 +51,11 @@ _ZN24btSolve2LinearConstraint31resolveUnilateralPairConstraintEP11btRigidBodyS1_ fmadd.s $fa4, $fa6, $fa6, $fa4 fsqrt.s $fa4, $fa4 fabs.s $fa4, $fa4 - pcalau12i $a6, %pc_hi20(.LCPI0_0) - fld.s $fa5, $a6, %pc_lo12(.LCPI0_0) - vldi $vr6, -1040 - fadd.s $fa4, $fa4, $fa6 + vldi $vr5, -1040 + fadd.s $fa4, $fa4, $fa5 fabs.s $fa4, $fa4 + lu12i.w $a6, 212992 + movgr2fr.w $fa5, $a6 fcmp.cle.s $fcc0, $fa5, $fa4 bcnez $fcc0, .LBB0_2 # %bb.1: @@ -402,12 +398,8 @@ _ZN15btJacobianEntryC2ERK11btMatrix3x3S2_RK9btVector3S5_S5_S5_fS5_f: # @_ZN15btJ .size _ZN15btJacobianEntryC2ERK11btMatrix3x3S2_RK9btVector3S5_S5_S5_fS5_f, .Lfunc_end1-_ZN15btJacobianEntryC2ERK11btMatrix3x3S2_RK9btVector3S5_S5_S5_fS5_f .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN24btSolve2LinearConstraint30resolveBilateralPairConstraintEP11btRigidBodyS1_RK11btMatrix3x3S4_RK9btVector3fS7_S7_S7_S7_fS7_S7_S7_fS7_S7_S7_fS7_RfS8_ -.LCPI2_0: - .word 0x34000000 # float 1.1920929E-7 .text - .globl _ZN24btSolve2LinearConstraint30resolveBilateralPairConstraintEP11btRigidBodyS1_RK11btMatrix3x3S4_RK9btVector3fS7_S7_S7_S7_fS7_S7_S7_fS7_S7_S7_fS7_RfS8_ + .globl _ZN24btSolve2LinearConstraint30resolveBilateralPairConstraintEP11btRigidBodyS1_RK11btMatrix3x3S4_RK9btVector3fS7_S7_S7_S7_fS7_S7_S7_fS7_S7_S7_fS7_RfS8_ # -- Begin function _ZN24btSolve2LinearConstraint30resolveBilateralPairConstraintEP11btRigidBodyS1_RK11btMatrix3x3S4_RK9btVector3fS7_S7_S7_S7_fS7_S7_S7_fS7_S7_S7_fS7_RfS8_ .p2align 5 .type _ZN24btSolve2LinearConstraint30resolveBilateralPairConstraintEP11btRigidBodyS1_RK11btMatrix3x3S4_RK9btVector3fS7_S7_S7_S7_fS7_S7_S7_fS7_S7_S7_fS7_RfS8_,@function _ZN24btSolve2LinearConstraint30resolveBilateralPairConstraintEP11btRigidBodyS1_RK11btMatrix3x3S4_RK9btVector3fS7_S7_S7_S7_fS7_S7_S7_fS7_S7_S7_fS7_RfS8_: # @_ZN24btSolve2LinearConstraint30resolveBilateralPairConstraintEP11btRigidBodyS1_RK11btMatrix3x3S4_RK9btVector3fS7_S7_S7_S7_fS7_S7_S7_fS7_S7_S7_fS7_RfS8_ @@ -458,11 +450,11 @@ _ZN24btSolve2LinearConstraint30resolveBilateralPairConstraintEP11btRigidBodyS1_R fmadd.s $fa4, $fa6, $fa6, $fa4 fsqrt.s $fa4, $fa4 fabs.s $fa4, $fa4 - pcalau12i $a6, %pc_hi20(.LCPI2_0) - fld.s $fa5, $a6, %pc_lo12(.LCPI2_0) - vldi $vr6, -1040 - fadd.s $fa4, $fa4, $fa6 + vldi $vr5, -1040 + fadd.s $fa4, $fa4, $fa5 fabs.s $fa4, $fa4 + lu12i.w $a6, 212992 + movgr2fr.w $fa5, $a6 fcmp.cle.s $fcc0, $fa5, $fa4 bcnez $fcc0, .LBB2_6 # %bb.1: diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSphereBoxCollisionAlgorithm.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSphereBoxCollisionAlgorithm.s index 4b4a9064..78384f82 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSphereBoxCollisionAlgorithm.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSphereBoxCollisionAlgorithm.s @@ -364,12 +364,8 @@ GCC_except_table3: .Lttbase2: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN29btSphereBoxCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult -.LCPI4_0: - .word 0x34000000 # float 1.1920929E-7 .text - .globl _ZN29btSphereBoxCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult + .globl _ZN29btSphereBoxCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult # -- Begin function _ZN29btSphereBoxCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult .p2align 5 .type _ZN29btSphereBoxCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult,@function _ZN29btSphereBoxCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult: # @_ZN29btSphereBoxCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult @@ -407,9 +403,9 @@ _ZN29btSphereBoxCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK16 move $a0, $s0 pcaddu18i $ra, %call36(_ZN29btSphereBoxCollisionAlgorithm17getSphereDistanceEP17btCollisionObjectR9btVector3S3_RKS2_f) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI4_0) ld.d $a0, $s0, 24 + lu12i.w $a1, 212992 + movgr2fr.w $fa1, $a1 fcmp.cule.s $fcc0, $fa1, $fa0 st.d $a0, $fp, 8 bcnez $fcc0, .LBB4_3 @@ -478,14 +474,7 @@ _ZN29btSphereBoxCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK16 .size _ZN29btSphereBoxCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult, .Lfunc_end4-_ZN29btSphereBoxCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN29btSphereBoxCollisionAlgorithm17getSphereDistanceEP17btCollisionObjectR9btVector3S3_RKS2_f -.LCPI5_0: - .word 0x4b189680 # float 1.0E+7 -.LCPI5_1: - .word 0x34000000 # float 1.1920929E-7 - .text - .globl _ZN29btSphereBoxCollisionAlgorithm17getSphereDistanceEP17btCollisionObjectR9btVector3S3_RKS2_f + .globl _ZN29btSphereBoxCollisionAlgorithm17getSphereDistanceEP17btCollisionObjectR9btVector3S3_RKS2_f # -- Begin function _ZN29btSphereBoxCollisionAlgorithm17getSphereDistanceEP17btCollisionObjectR9btVector3S3_RKS2_f .p2align 5 .type _ZN29btSphereBoxCollisionAlgorithm17getSphereDistanceEP17btCollisionObjectR9btVector3S3_RKS2_f,@function _ZN29btSphereBoxCollisionAlgorithm17getSphereDistanceEP17btCollisionObjectR9btVector3S3_RKS2_f: # @_ZN29btSphereBoxCollisionAlgorithm17getSphereDistanceEP17btCollisionObjectR9btVector3S3_RKS2_f @@ -795,12 +784,12 @@ _ZN29btSphereBoxCollisionAlgorithm17getSphereDistanceEP17btCollisionObjectR9btVe fsub.s $fa0, $fa3, $fa0 fsub.s $fa2, $fa4, $fa2 fsub.s $fa1, $fa5, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI5_1) - fld.s $fa3, $a0, %pc_lo12(.LCPI5_1) fmul.s $fa2, $fa2, $fa2 fmadd.s $fa0, $fa0, $fa0, $fa2 fmadd.s $fa0, $fa1, $fa1, $fa0 - fcmp.cule.s $fcc0, $fa0, $fa3 + lu12i.w $a0, 212992 + movgr2fr.w $fa1, $a0 + fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB5_19 # %bb.16: fsqrt.s $fa0, $fa0 @@ -823,8 +812,9 @@ _ZN29btSphereBoxCollisionAlgorithm17getSphereDistanceEP17btCollisionObjectR9btVe vldi $vr0, -1168 b .LBB5_21 .LBB5_19: - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI5_0) + lu12i.w $a0, 307593 + ori $a0, $a0, 1664 + movgr2fr.w $fa0, $a0 b .LBB5_21 .LBB5_20: fsub.s $fa0, $fa0, $fs2 @@ -855,18 +845,13 @@ _ZN29btSphereBoxCollisionAlgorithm21calculateTimeOfImpactEP17btCollisionObjectS1 .Lfunc_end6: .size _ZN29btSphereBoxCollisionAlgorithm21calculateTimeOfImpactEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult, .Lfunc_end6-_ZN29btSphereBoxCollisionAlgorithm21calculateTimeOfImpactEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN29btSphereBoxCollisionAlgorithm20getSpherePenetrationEP17btCollisionObjectR9btVector3S3_RKS2_fS5_S5_ -.LCPI7_0: - .word 0xcb189680 # float -1.0E+7 - .text - .globl _ZN29btSphereBoxCollisionAlgorithm20getSpherePenetrationEP17btCollisionObjectR9btVector3S3_RKS2_fS5_S5_ + .globl _ZN29btSphereBoxCollisionAlgorithm20getSpherePenetrationEP17btCollisionObjectR9btVector3S3_RKS2_fS5_S5_ # -- Begin function _ZN29btSphereBoxCollisionAlgorithm20getSpherePenetrationEP17btCollisionObjectR9btVector3S3_RKS2_fS5_S5_ .p2align 5 .type _ZN29btSphereBoxCollisionAlgorithm20getSpherePenetrationEP17btCollisionObjectR9btVector3S3_RKS2_fS5_S5_,@function _ZN29btSphereBoxCollisionAlgorithm20getSpherePenetrationEP17btCollisionObjectR9btVector3S3_RKS2_fS5_S5_: # @_ZN29btSphereBoxCollisionAlgorithm20getSpherePenetrationEP17btCollisionObjectR9btVector3S3_RKS2_fS5_S5_ # %bb.0: - fld.s $ft2, $a5, 0 - fld.s $ft3, $a5, 4 + fld.s $ft0, $a5, 0 + fld.s $ft1, $a5, 4 fld.s $fa1, $a4, 0 fld.s $fa2, $a1, 56 fld.s $fa3, $a4, 4 @@ -881,47 +866,49 @@ _ZN29btSphereBoxCollisionAlgorithm20getSpherePenetrationEP17btCollisionObjectR9b fld.s $fa2, $a1, 24 fld.s $fa5, $a1, 40 fld.s $fa6, $a1, 12 - fld.s $ft0, $a1, 28 - fld.s $ft1, $a1, 44 + fld.s $ft2, $a1, 28 + fld.s $ft3, $a1, 44 fld.s $ft4, $a1, 16 fld.s $ft6, $a1, 32 fld.s $ft7, $a1, 48 fmul.s $fa2, $fa3, $fa2 fmadd.s $fa1, $fa1, $fa7, $fa2 fmadd.s $fa1, $fa5, $fa4, $fa1 - fmul.s $fa2, $fa3, $ft0 + fmul.s $fa2, $fa3, $ft2 fmadd.s $fa2, $fa6, $fa7, $fa2 - fmadd.s $fa2, $ft1, $fa4, $fa2 + fmadd.s $fa2, $ft3, $fa4, $fa2 fmul.s $fa3, $fa3, $ft6 fmadd.s $fa3, $ft4, $fa7, $fa3 fmadd.s $fa3, $ft7, $fa4, $fa3 - fsub.s $ft0, $fa1, $ft2 - fsub.s $ft8, $fa2, $ft3 + fsub.s $ft2, $fa1, $ft0 + fsub.s $ft8, $fa2, $ft1 fsub.s $ft6, $fa3, $ft5 movgr2fr.w $fa5, $zero fmul.s $ft7, $ft8, $fa5 - fsub.s $fa4, $ft7, $ft0 + fsub.s $fa4, $ft7, $ft2 fmadd.s $fa4, $ft6, $fa5, $fa4 fsub.s $ft9, $fa4, $fa0 fcmp.clt.s $fcc0, $fa5, $ft9 bcnez $fcc0, .LBB7_18 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.s $fa6, $a0, %pc_lo12(.LCPI7_0) + lu12i.w $a0, -216695 + ori $a0, $a0, 1664 + lu32i.d $a0, 0 + movgr2fr.w $fa6, $a0 fcmp.cule.s $fcc0, $ft9, $fa6 fmov.s $fa4, $fa5 fmov.s $fa7, $fa5 - fmov.s $ft1, $fa5 + fmov.s $ft3, $fa5 fmov.s $ft4, $fa5 bcnez $fcc0, .LBB7_3 # %bb.2: vldi $vr4, -1040 fmov.s $fa6, $ft9 - fmov.s $fa7, $ft2 - fmov.s $ft1, $ft3 + fmov.s $fa7, $ft0 + fmov.s $ft3, $ft1 fmov.s $ft4, $ft5 .LBB7_3: - fmsub.s $ft8, $ft0, $fa5, $ft8 + fmsub.s $ft8, $ft2, $fa5, $ft8 fmadd.s $ft8, $ft6, $fa5, $ft8 fsub.s $ft9, $ft8, $fa0 fcmp.clt.s $fcc0, $fa5, $ft9 @@ -935,13 +922,13 @@ _ZN29btSphereBoxCollisionAlgorithm20getSpherePenetrationEP17btCollisionObjectR9b movgr2fr.w $fa4, $zero vldi $vr5, -1040 fmov.s $fa6, $ft9 - fmov.s $fa7, $ft2 - fmov.s $ft1, $ft3 + fmov.s $fa7, $ft0 + fmov.s $ft3, $ft1 fmov.s $ft4, $ft5 .LBB7_6: - fmadd.s $ft0, $ft0, $ft8, $ft7 - fsub.s $ft0, $ft0, $ft6 - fsub.s $ft9, $ft0, $fa0 + fmadd.s $ft2, $ft2, $ft8, $ft7 + fsub.s $ft2, $ft2, $ft6 + fsub.s $ft9, $ft2, $fa0 fcmp.clt.s $fcc0, $ft8, $ft9 bcnez $fcc0, .LBB7_18 # %bb.7: @@ -950,43 +937,43 @@ _ZN29btSphereBoxCollisionAlgorithm20getSpherePenetrationEP17btCollisionObjectR9b fld.s $ft8, $a6, 8 movgr2fr.w $ft11, $zero fcmp.cule.s $fcc0, $ft9, $fa6 - fmov.s $ft0, $ft11 + fmov.s $ft2, $ft11 bcnez $fcc0, .LBB7_9 # %bb.8: movgr2fr.w $fa4, $zero - vldi $vr8, -1040 + vldi $vr10, -1040 fmov.s $fa6, $ft9 fmov.s $fa5, $fa4 - fmov.s $fa7, $ft2 - fmov.s $ft1, $ft3 + fmov.s $fa7, $ft0 + fmov.s $ft3, $ft1 fmov.s $ft4, $ft5 .LBB7_9: - fsub.s $ft3, $fa1, $ft6 + fsub.s $ft1, $fa1, $ft6 fsub.s $ft10, $fa2, $ft7 fsub.s $ft5, $fa3, $ft8 fmul.s $ft9, $ft10, $ft11 - fadd.s $ft2, $ft3, $ft9 - fmadd.s $ft2, $ft5, $ft11, $ft2 - fsub.s $ft2, $ft2, $fa0 - fcmp.clt.s $fcc0, $ft11, $ft2 + fadd.s $ft0, $ft1, $ft9 + fmadd.s $ft0, $ft5, $ft11, $ft0 + fsub.s $ft0, $ft0, $fa0 + fcmp.clt.s $fcc0, $ft11, $ft0 bcnez $fcc0, .LBB7_18 # %bb.10: - fcmp.cule.s $fcc0, $ft2, $fa6 + fcmp.cule.s $fcc0, $ft0, $fa6 bcnez $fcc0, .LBB7_12 # %bb.11: movgr2fr.w $fa5, $zero vldi $vr4, -1168 - fmov.s $fa6, $ft2 - fmov.s $ft0, $fa5 + fmov.s $fa6, $ft0 + fmov.s $ft2, $fa5 fmov.s $fa7, $ft6 - fmov.s $ft1, $ft7 + fmov.s $ft3, $ft7 fmov.s $ft4, $ft8 .LBB7_12: - movgr2fr.w $ft2, $zero - fmadd.s $ft10, $ft3, $ft2, $ft10 - fmadd.s $ft10, $ft5, $ft2, $ft10 + movgr2fr.w $ft0, $zero + fmadd.s $ft10, $ft1, $ft0, $ft10 + fmadd.s $ft10, $ft5, $ft0, $ft10 fsub.s $ft10, $ft10, $fa0 - fcmp.clt.s $fcc0, $ft2, $ft10 + fcmp.clt.s $fcc0, $ft0, $ft10 bcnez $fcc0, .LBB7_18 # %bb.13: fcmp.cule.s $fcc0, $ft10, $fa6 @@ -995,22 +982,22 @@ _ZN29btSphereBoxCollisionAlgorithm20getSpherePenetrationEP17btCollisionObjectR9b movgr2fr.w $fa4, $zero vldi $vr5, -1168 fmov.s $fa6, $ft10 - fmov.s $ft0, $fa4 + fmov.s $ft2, $fa4 fmov.s $fa7, $ft6 - fmov.s $ft1, $ft7 + fmov.s $ft3, $ft7 fmov.s $ft4, $ft8 .LBB7_15: - fmadd.s $ft3, $ft3, $ft2, $ft9 - fadd.s $ft3, $ft5, $ft3 - fsub.s $fa0, $ft3, $fa0 - fcmp.clt.s $fcc0, $ft2, $fa0 + fmadd.s $ft1, $ft1, $ft0, $ft9 + fadd.s $ft1, $ft5, $ft1 + fsub.s $fa0, $ft1, $fa0 + fcmp.clt.s $fcc0, $ft0, $fa0 bcnez $fcc0, .LBB7_18 # %bb.16: fcmp.clt.s $fcc0, $fa6, $fa0 bceqz $fcc0, .LBB7_19 # %bb.17: - vldi $vr8, -1168 - fmov.s $fa5, $ft2 + vldi $vr10, -1168 + fmov.s $fa5, $ft0 b .LBB7_20 .LBB7_18: vldi $vr0, -1168 @@ -1018,17 +1005,17 @@ _ZN29btSphereBoxCollisionAlgorithm20getSpherePenetrationEP17btCollisionObjectR9b ret .LBB7_19: # %..critedge83_crit_edge fsub.s $fa0, $fa1, $fa7 - fsub.s $fa7, $fa2, $ft1 - fsub.s $ft1, $fa3, $ft4 + fsub.s $fa7, $fa2, $ft3 + fsub.s $ft0, $fa3, $ft4 fmul.s $fa7, $fa5, $fa7 fmadd.s $fa0, $fa4, $fa0, $fa7 - fmadd.s $ft3, $ft0, $ft1, $fa0 + fmadd.s $ft1, $ft2, $ft0, $fa0 fmov.s $fa0, $fa6 - fmov.s $ft2, $fa4 + fmov.s $ft0, $fa4 .LBB7_20: # %.critedge83 - fmul.s $fa4, $ft2, $ft3 - fmul.s $fa6, $fa5, $ft3 - fmul.s $fa7, $ft0, $ft3 + fmul.s $fa4, $ft0, $ft1 + fmul.s $fa6, $fa5, $ft1 + fmul.s $fa7, $ft2, $ft1 fsub.s $fa1, $fa1, $fa4 fsub.s $fa2, $fa2, $fa6 fsub.s $fa3, $fa3, $fa7 @@ -1039,9 +1026,9 @@ _ZN29btSphereBoxCollisionAlgorithm20getSpherePenetrationEP17btCollisionObjectR9b bstrpick.d $a4, $a4, 31, 0 st.d $a0, $a2, 0 st.d $a4, $a2, 8 - fmul.s $fa4, $fa0, $ft2 + fmul.s $fa4, $fa0, $ft0 fmul.s $fa5, $fa0, $fa5 - fmul.s $fa6, $fa0, $ft0 + fmul.s $fa6, $fa0, $ft2 fadd.s $fa1, $fa4, $fa1 fadd.s $fa2, $fa5, $fa2 fadd.s $fa3, $fa6, $fa3 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSphereShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSphereShape.s index 266df6a2..40bd46e5 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSphereShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSphereShape.s @@ -29,12 +29,7 @@ _ZNK13btSphereShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVect .Lfunc_end1: .size _ZNK13btSphereShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i, .Lfunc_end1-_ZNK13btSphereShape49batchedUnitVectorGetSupportingVertexWithoutMarginEPK9btVector3PS0_i # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK13btSphereShape24localGetSupportingVertexERK9btVector3 -.LCPI2_0: - .word 0x28800000 # float 1.42108547E-14 - .text - .globl _ZNK13btSphereShape24localGetSupportingVertexERK9btVector3 + .globl _ZNK13btSphereShape24localGetSupportingVertexERK9btVector3 # -- Begin function _ZNK13btSphereShape24localGetSupportingVertexERK9btVector3 .p2align 5 .type _ZNK13btSphereShape24localGetSupportingVertexERK9btVector3,@function _ZNK13btSphereShape24localGetSupportingVertexERK9btVector3: # @_ZNK13btSphereShape24localGetSupportingVertexERK9btVector3 @@ -62,40 +57,40 @@ _ZNK13btSphereShape24localGetSupportingVertexERK9btVector3: # @_ZNK13btSphereSha .cfi_offset 59, -64 .cfi_offset 60, -72 .cfi_offset 61, -80 - move $s0, $a0 + move $fp, $a0 ld.d $a0, $a0, 0 ld.d $a2, $a0, 104 - move $s1, $a1 - move $a0, $s0 + move $s0, $a1 + move $a0, $fp jirl $ra, $a2, 0 - move $fp, $a1 + move $s1, $a1 movgr2fr.w $fs0, $a0 srli.d $a0, $a0, 32 movgr2fr.w $fs1, $a0 + fld.s $fa0, $s0, 4 + fld.s $fa1, $s0, 0 + fld.s $fa2, $s0, 8 movgr2fr.w $fs2, $a1 - fld.s $fa0, $s1, 0 - fld.s $fa1, $s1, 4 - fld.s $fa2, $s1, 8 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.s $fa3, $a0, %pc_lo12(.LCPI2_0) - fmul.s $fa4, $fa1, $fa1 - fmadd.s $fa4, $fa0, $fa0, $fa4 - fmadd.s $fa4, $fa2, $fa2, $fa4 - fcmp.clt.s $fcc0, $fa4, $fa3 + fmul.s $fa3, $fa0, $fa0 + fmadd.s $fa3, $fa1, $fa1, $fa3 + fmadd.s $fa3, $fa2, $fa2, $fa3 + lu12i.w $a0, 165888 + movgr2fr.w $fa4, $a0 + fcmp.clt.s $fcc0, $fa3, $fa4 vldi $vr3, -1040 - fsel $fa0, $fa0, $fa3, $fcc0 fsel $fa1, $fa1, $fa3, $fcc0 + fsel $fa0, $fa0, $fa3, $fcc0 fsel $fa2, $fa2, $fa3, $fcc0 - fmul.s $fa3, $fa1, $fa1 - ld.d $a0, $s0, 0 - fmadd.s $fa3, $fa0, $fa0, $fa3 + fmul.s $fa3, $fa0, $fa0 + ld.d $a0, $fp, 0 + fmadd.s $fa3, $fa1, $fa1, $fa3 fmadd.s $fa3, $fa2, $fa2, $fa3 frsqrt.s $fa3, $fa3 ld.d $a1, $a0, 88 - fmul.s $fs3, $fa0, $fa3 - fmul.s $fs4, $fa1, $fa3 + fmul.s $fs3, $fa1, $fa3 + fmul.s $fs4, $fa0, $fa3 fmul.s $fs5, $fa2, $fa3 - move $a0, $s0 + move $a0, $fp jirl $ra, $a1, 0 fmul.s $fa1, $fa0, $fs3 fmul.s $fa2, $fa0, $fs4 @@ -108,8 +103,8 @@ _ZNK13btSphereShape24localGetSupportingVertexERK9btVector3: # @_ZNK13btSphereSha bstrins.d $a0, $a1, 63, 32 movfr2gr.s $a1, $fa0 bstrpick.d $a1, $a1, 31, 0 - bstrins.d $fp, $a1, 31, 0 - move $a1, $fp + bstrins.d $s1, $a1, 31, 0 + move $a1, $s1 fld.d $fs5, $sp, 16 # 8-byte Folded Reload fld.d $fs4, $sp, 24 # 8-byte Folded Reload fld.d $fs3, $sp, 32 # 8-byte Folded Reload @@ -205,12 +200,7 @@ _ZNK13btSphereShape7getAabbERK11btTransformR9btVector3S4_: # @_ZNK13btSphereShap .size _ZNK13btSphereShape7getAabbERK11btTransformR9btVector3S4_, .Lfunc_end3-_ZNK13btSphereShape7getAabbERK11btTransformR9btVector3S4_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK13btSphereShape21calculateLocalInertiaEfR9btVector3 -.LCPI4_0: - .word 0x3ecccccd # float 0.400000006 - .text - .globl _ZNK13btSphereShape21calculateLocalInertiaEfR9btVector3 + .globl _ZNK13btSphereShape21calculateLocalInertiaEfR9btVector3 # -- Begin function _ZNK13btSphereShape21calculateLocalInertiaEfR9btVector3 .p2align 5 .type _ZNK13btSphereShape21calculateLocalInertiaEfR9btVector3,@function _ZNK13btSphereShape21calculateLocalInertiaEfR9btVector3: # @_ZNK13btSphereShape21calculateLocalInertiaEfR9btVector3 @@ -228,10 +218,11 @@ _ZNK13btSphereShape21calculateLocalInertiaEfR9btVector3: # @_ZNK13btSphereShape2 .cfi_offset 56, -32 move $fp, $a0 ld.d $a0, $a0, 0 - pcalau12i $a2, %pc_hi20(.LCPI4_0) - fld.s $fa1, $a2, %pc_lo12(.LCPI4_0) - ld.d $a2, $a0, 88 move $s0, $a1 + lu12i.w $a1, 257228 + ld.d $a2, $a0, 88 + ori $a0, $a1, 3277 + movgr2fr.w $fa1, $a0 fmul.s $fs0, $fa0, $fa1 move $a0, $fp jirl $ra, $a2, 0 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSphereSphereCollisionAlgorithm.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSphereSphereCollisionAlgorithm.s index cd945612..98977adb 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSphereSphereCollisionAlgorithm.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSphereSphereCollisionAlgorithm.s @@ -349,10 +349,6 @@ GCC_except_table3: .word 0x00000000 # float 0 .word 0x00000000 # float 0 .word 0x00000000 # float 0 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI4_1: - .word 0x34000000 # float 1.1920929E-7 .text .globl _ZN32btSphereSphereCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_RK16btDispatcherInfoP16btManifoldResult .p2align 5 @@ -415,13 +411,13 @@ _ZN32btSphereSphereCollisionAlgorithm16processCollisionEP17btCollisionObjectS1_R st.d $fp, $sp, 32 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - pcalau12i $a1, %pc_hi20(.LCPI4_1) - fld.s $ft1, $a1, %pc_lo12(.LCPI4_1) - vld $vr10, $a0, %pc_lo12(.LCPI4_0) fsub.s $fa0, $ft0, $fa0 - fcmp.cule.s $fcc0, $ft0, $ft1 - vst $vr10, $sp, 16 + pcalau12i $a0, %pc_hi20(.LCPI4_0) + vld $vr9, $a0, %pc_lo12(.LCPI4_0) + lu12i.w $a0, 212992 + movgr2fr.w $ft2, $a0 + fcmp.cule.s $fcc0, $ft0, $ft2 + vst $vr9, $sp, 16 bcnez $fcc0, .LBB4_6 # %bb.5: frecip.s $ft1, $ft0 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btStaticPlaneShape.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btStaticPlaneShape.s index 7683b1c7..2be4e855 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btStaticPlaneShape.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btStaticPlaneShape.s @@ -158,12 +158,7 @@ _ZNK18btStaticPlaneShape7getAabbERK11btTransformR9btVector3S4_: # @_ZNK18btStati .Lfunc_end3: .size _ZNK18btStaticPlaneShape7getAabbERK11btTransformR9btVector3S4_, .Lfunc_end3-_ZNK18btStaticPlaneShape7getAabbERK11btTransformR9btVector3S4_ # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNK18btStaticPlaneShape19processAllTrianglesEP18btTriangleCallbackRK9btVector3S4_ -.LCPI4_0: - .word 0x3f3504f3 # float 0.707106769 - .text - .globl _ZNK18btStaticPlaneShape19processAllTrianglesEP18btTriangleCallbackRK9btVector3S4_ + .globl _ZNK18btStaticPlaneShape19processAllTrianglesEP18btTriangleCallbackRK9btVector3S4_ # -- Begin function _ZNK18btStaticPlaneShape19processAllTrianglesEP18btTriangleCallbackRK9btVector3S4_ .p2align 5 .type _ZNK18btStaticPlaneShape19processAllTrianglesEP18btTriangleCallbackRK9btVector3S4_,@function _ZNK18btStaticPlaneShape19processAllTrianglesEP18btTriangleCallbackRK9btVector3S4_: # @_ZNK18btStaticPlaneShape19processAllTrianglesEP18btTriangleCallbackRK9btVector3S4_ @@ -195,8 +190,6 @@ _ZNK18btStaticPlaneShape19processAllTrianglesEP18btTriangleCallbackRK9btVector3S .cfi_offset 59, -80 .cfi_offset 60, -88 .cfi_offset 61, -96 - pcalau12i $a4, %pc_hi20(.LCPI4_0) - fld.s $fa7, $a4, %pc_lo12(.LCPI4_0) fld.s $fa0, $a3, 0 fld.s $fa2, $a2, 0 fld.s $fa1, $a3, 4 @@ -204,8 +197,11 @@ _ZNK18btStaticPlaneShape19processAllTrianglesEP18btTriangleCallbackRK9btVector3S fld.s $fa3, $a2, 4 fld.s $fa4, $a3, 8 fld.s $fa6, $a2, 8 - fabs.s $ft0, $fa5 - fcmp.cule.s $fcc0, $ft0, $fa7 + fabs.s $fa7, $fa5 + lu12i.w $a2, 258896 + ori $a2, $a2, 1267 + movgr2fr.w $ft0, $a2 + fcmp.cule.s $fcc0, $fa7, $ft0 move $fp, $a1 bcnez $fcc0, .LBB4_2 # %bb.1: diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSubSimplexConvexCast.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSubSimplexConvexCast.s index bf3cbb5b..2f389bbb 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSubSimplexConvexCast.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btSubSimplexConvexCast.s @@ -15,16 +15,7 @@ _ZN22btSubsimplexConvexCastC2EPK13btConvexShapeS2_P22btVoronoiSimplexSolver: # @ .Lfunc_end0: .size _ZN22btSubsimplexConvexCastC2EPK13btConvexShapeS2_P22btVoronoiSimplexSolver, .Lfunc_end0-_ZN22btSubsimplexConvexCastC2EPK13btConvexShapeS2_P22btVoronoiSimplexSolver # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN22btSubsimplexConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE -.LCPI1_0: - .word 0x38d1b717 # float 9.99999974E-5 -.LCPI1_1: - .word 0xa8800000 # float -1.42108547E-14 -.LCPI1_2: - .word 0x28800000 # float 1.42108547E-14 - .text - .globl _ZN22btSubsimplexConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE + .globl _ZN22btSubsimplexConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE # -- Begin function _ZN22btSubsimplexConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE .p2align 5 .type _ZN22btSubsimplexConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE,@function _ZN22btSubsimplexConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE: # @_ZN22btSubsimplexConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConvexCast10CastResultE @@ -121,11 +112,11 @@ _ZN22btSubsimplexConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConv fld.s $ft7, $s2, 24 fst.s $ft7, $sp, 32 # 4-byte Folded Spill fsub.s $fs0, $fa0, $fa3 - fsub.s $fs2, $fa1, $fa4 + fsub.s $fs4, $fa1, $fa4 fsub.s $fs1, $fa2, $fa5 ld.d $a0, $s0, 16 fneg.s $fa0, $fs0 - fneg.s $fa1, $fs2 + fneg.s $fa1, $fs4 fneg.s $fa2, $fs1 fst.s $ft1, $sp, 72 # 4-byte Folded Spill fmul.s $fa3, $ft1, $fa1 @@ -175,7 +166,7 @@ _ZN22btSubsimplexConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConv fld.s $fa6, $s4, 20 fmadd.s $fa1, $fa5, $fa4, $fa1 fld.s $fa5, $s4, 16 - fadd.s $fs4, $fa3, $fa1 + fadd.s $fs5, $fa3, $fa1 fmul.s $fa1, $fa6, $fa2 fld.s $fa3, $s4, 36 fmadd.s $fa1, $fa5, $fa0, $fa1 @@ -187,30 +178,30 @@ _ZN22btSubsimplexConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConv fld.s $fa2, $s4, 40 fld.s $fa5, $s4, 56 fmadd.s $fa1, $fa6, $fa4, $fa1 - fadd.s $fs5, $fa3, $fa1 + fadd.s $fs6, $fa3, $fa1 fmadd.s $fa0, $fa2, $fa4, $fa0 - fadd.s $fs6, $fa5, $fa0 - movfr2gr.s $a0, $fs4 + fadd.s $fs7, $fa5, $fa0 + movfr2gr.s $a0, $fs5 fld.s $fa0, $s2, 16 fld.s $fa1, $s2, 0 fld.s $fa2, $s2, 32 - movfr2gr.s $a1, $fs5 - fmul.s $fa0, $fs2, $fa0 + movfr2gr.s $a1, $fs6 + fmul.s $fa0, $fs4, $fa0 fmadd.s $fa0, $fa1, $fs0, $fa0 fmadd.s $fa0, $fa2, $fs1, $fa0 fld.s $fa1, $s2, 20 fld.s $fa2, $s2, 4 fld.s $fa3, $s2, 36 bstrins.d $a0, $a1, 63, 32 - fmul.s $fa1, $fs2, $fa1 + fmul.s $fa1, $fs4, $fa1 fmadd.s $fa1, $fa2, $fs0, $fa1 fmadd.s $fa1, $fa3, $fs1, $fa1 fld.s $fa2, $s2, 24 fld.s $fa3, $s2, 8 - movfr2gr.s $a1, $fs6 + movfr2gr.s $a1, $fs7 bstrpick.d $a1, $a1, 31, 0 - fst.s $fs2, $sp, 12 # 4-byte Folded Spill - fmul.s $fa2, $fs2, $fa2 + fst.s $fs4, $sp, 12 # 4-byte Folded Spill + fmul.s $fa2, $fs4, $fa2 fst.s $fs0, $sp, 16 # 4-byte Folded Spill fmadd.s $fa2, $fa3, $fs0, $fa2 fld.s $fa3, $s2, 40 @@ -265,28 +256,30 @@ _ZN22btSubsimplexConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConv bstrpick.d $a1, $a1, 31, 0 st.d $a0, $sp, 184 st.d $a1, $sp, 192 - fsub.s $fa1, $fs4, $fa1 - fsub.s $fa2, $fs5, $fa3 - fsub.s $fa0, $fs6, $fa0 + fsub.s $fa1, $fs5, $fa1 + fsub.s $fa2, $fs6, $fa3 + fsub.s $fa0, $fs7, $fa0 movfr2gr.s $a0, $fa1 movfr2gr.s $a1, $fa2 bstrins.d $a0, $a1, 63, 32 movfr2gr.s $a1, $fa0 bstrpick.d $a1, $a1, 31, 0 st.d $a0, $sp, 216 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fa3, $a0, %pc_lo12(.LCPI1_0) fmul.s $fa2, $fa2, $fa2 fmadd.s $fa1, $fa1, $fa1, $fa2 fmadd.s $fa0, $fa0, $fa0, $fa1 - fst.s $fa3, $sp, 20 # 4-byte Folded Spill - fcmp.cule.s $fcc0, $fa0, $fa3 + lu12i.w $a0, 232731 + ori $a0, $a0, 1815 + movgr2fr.w $fa1, $a0 + fst.s $fa1, $sp, 20 # 4-byte Folded Spill + fcmp.cule.s $fcc0, $fa0, $fa1 st.d $a1, $sp, 224 bcnez $fcc0, .LBB1_9 # %bb.1: # %.lr.ph movgr2fr.w $fs0, $zero addi.w $s5, $zero, -33 - pcalau12i $s6, %pc_hi20(.LCPI1_1) + lu12i.w $s6, -358400 + lu32i.d $s6, 0 fmov.s $fs2, $fs0 fmov.s $fs3, $fs0 fmov.s $fs1, $fs0 @@ -309,23 +302,23 @@ _ZN22btSubsimplexConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConv fneg.s $fa2, $fa2 fld.s $fs1, $sp, 72 # 4-byte Folded Reload fmul.s $fa3, $fs1, $fa1 - fld.s $fs5, $sp, 84 # 4-byte Folded Reload - fmadd.s $fa3, $fs5, $fa0, $fa3 + fld.s $fs6, $sp, 84 # 4-byte Folded Reload + fmadd.s $fa3, $fs6, $fa0, $fa3 fld.s $fa4, $sp, 132 # 4-byte Folded Reload fmadd.s $fa3, $fa4, $fa2, $fa3 - fld.s $fs7, $sp, 68 # 4-byte Folded Reload - fmul.s $fa4, $fs7, $fa1 + fld.s $fs2, $sp, 68 # 4-byte Folded Reload + fmul.s $fa4, $fs2, $fa1 fst.s $fs0, $sp, 124 # 4-byte Folded Spill fld.s $fs0, $sp, 80 # 4-byte Folded Reload fmadd.s $fa4, $fs0, $fa0, $fa4 - fld.s $fs2, $sp, 60 # 4-byte Folded Reload - fmadd.s $fa4, $fs2, $fa2, $fa4 + fld.s $fs4, $sp, 60 # 4-byte Folded Reload + fmadd.s $fa4, $fs4, $fa2, $fa4 fld.s $fs3, $sp, 64 # 4-byte Folded Reload fmul.s $fa1, $fs3, $fa1 - fld.s $fs6, $sp, 76 # 4-byte Folded Reload - fmadd.s $fa0, $fs6, $fa0, $fa1 - fld.s $fs4, $sp, 56 # 4-byte Folded Reload - fmadd.s $fa0, $fs4, $fa2, $fa0 + fld.s $fs7, $sp, 76 # 4-byte Folded Reload + fmadd.s $fa0, $fs7, $fa0, $fa1 + fld.s $fs5, $sp, 56 # 4-byte Folded Reload + fmadd.s $fa0, $fs5, $fa2, $fa0 movfr2gr.s $a1, $fa3 movfr2gr.s $a2, $fa4 bstrins.d $a1, $a2, 63, 32 @@ -342,19 +335,19 @@ _ZN22btSubsimplexConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConv movgr2fr.w $fa1, $a0 movgr2fr.w $fa2, $a1 fmul.s $fa3, $fs0, $fa1 - fmadd.s $fa3, $fs5, $fa0, $fa3 - fmadd.s $fa3, $fs6, $fa2, $fa3 + fmadd.s $fa3, $fs6, $fa0, $fa3 + fmadd.s $fa3, $fs7, $fa2, $fa3 fld.s $fa4, $sp, 108 # 4-byte Folded Reload fadd.s $fa3, $fa4, $fa3 - fmul.s $fa4, $fs7, $fa1 + fmul.s $fa4, $fs2, $fa1 fmadd.s $fa4, $fs1, $fa0, $fa4 fmadd.s $fa4, $fs3, $fa2, $fa4 fld.s $fa5, $sp, 104 # 4-byte Folded Reload fadd.s $fa4, $fa5, $fa4 - fmul.s $fa1, $fs2, $fa1 + fmul.s $fa1, $fs4, $fa1 fld.s $fa5, $sp, 132 # 4-byte Folded Reload fmadd.s $fa0, $fa5, $fa0, $fa1 - fmadd.s $fa0, $fs4, $fa2, $fa0 + fmadd.s $fa0, $fs5, $fa2, $fa0 fld.s $fa1, $sp, 100 # 4-byte Folded Reload fadd.s $fa0, $fa1, $fa0 movfr2gr.s $a0, $fa3 @@ -455,13 +448,13 @@ _ZN22btSubsimplexConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConv fld.s $fs3, $sp, 112 # 4-byte Folded Reload bcnez $fcc0, .LBB1_7 # %bb.5: # in Loop: Header=BB1_2 Depth=1 - fld.s $fa5, $s6, %pc_lo12(.LCPI1_1) fld.s $fa4, $sp, 12 # 4-byte Folded Reload fmul.s $fa4, $fa4, $fa0 - fld.s $fa6, $sp, 16 # 4-byte Folded Reload - fmadd.s $fa4, $fa1, $fa6, $fa4 - fld.s $fa6, $sp, 8 # 4-byte Folded Reload - fmadd.s $fa4, $fa2, $fa6, $fa4 + fld.s $fa5, $sp, 16 # 4-byte Folded Reload + fmadd.s $fa4, $fa1, $fa5, $fa4 + fld.s $fa5, $sp, 8 # 4-byte Folded Reload + fmadd.s $fa4, $fa2, $fa5, $fa4 + movgr2fr.w $fa5, $s6 fcmp.cle.s $fcc0, $fa5, $fa4 bcnez $fcc0, .LBB1_15 # %bb.6: # in Loop: Header=BB1_2 Depth=1 @@ -531,11 +524,11 @@ _ZN22btSubsimplexConvexCast16calcTimeOfImpactERK11btTransformS2_S2_S2_RN12btConv fmov.s $fs2, $fs1 fmov.s $fs0, $fs1 .LBB1_10: # %.critedge - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_2) fmul.s $fa0, $fs3, $fs3 fmadd.s $fa0, $fs1, $fs1, $fa0 fmadd.s $fa0, $fs2, $fs2, $fa0 + lu12i.w $a0, 165888 + movgr2fr.w $fa1, $a0 fcmp.cult.s $fcc0, $fa0, $fa1 fst.s $fs0, $fp, 168 bceqz $fcc0, .LBB1_12 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btTriangleShapeEx.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btTriangleShapeEx.s index b4e57f43..4b81031e 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btTriangleShapeEx.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btTriangleShapeEx.s @@ -1,30 +1,23 @@ .file "btTriangleShapeEx.cpp" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN20GIM_TRIANGLE_CONTACT12merge_pointsERK9btVector4fPK9btVector3i -.LCPI0_0: - .word 0xc47a0000 # float -1000 -.LCPI0_1: - .word 0x34000000 # float 1.1920929E-7 .text - .globl _ZN20GIM_TRIANGLE_CONTACT12merge_pointsERK9btVector4fPK9btVector3i + .globl _ZN20GIM_TRIANGLE_CONTACT12merge_pointsERK9btVector4fPK9btVector3i # -- Begin function _ZN20GIM_TRIANGLE_CONTACT12merge_pointsERK9btVector4fPK9btVector3i .p2align 5 .type _ZN20GIM_TRIANGLE_CONTACT12merge_pointsERK9btVector4fPK9btVector3i,@function _ZN20GIM_TRIANGLE_CONTACT12merge_pointsERK9btVector4fPK9btVector3i: # @_ZN20GIM_TRIANGLE_CONTACT12merge_pointsERK9btVector4fPK9btVector3i # %bb.0: - lu12i.w $a4, -243808 - lu32i.d $a4, 0 - st.d $a4, $a0, 0 + lu12i.w $a7, -243808 + lu32i.d $a7, 0 + st.d $a7, $a0, 0 blez $a3, .LBB0_13 # %bb.1: # %.lr.ph addi.d $sp, $sp, -80 move $a5, $zero move $a4, $zero - pcalau12i $a6, %pc_hi20(.LCPI0_0) - fld.s $fa3, $a6, %pc_lo12(.LCPI0_0) - pcalau12i $a6, %pc_hi20(.LCPI0_1) - fld.s $fa1, $a6, %pc_lo12(.LCPI0_1) addi.d $a6, $a2, 8 - movgr2fr.w $fa2, $zero + movgr2fr.w $fa3, $a7 + movgr2fr.w $fa1, $zero + lu12i.w $a7, 212992 + movgr2fr.w $fa2, $a7 addi.d $a7, $sp, 16 b .LBB0_5 .p2align 4, , 16 @@ -54,13 +47,13 @@ _ZN20GIM_TRIANGLE_CONTACT12merge_pointsERK9btVector4fPK9btVector3i: # @_ZN20GIM_ fmadd.s $fa4, $ft0, $ft1, $fa4 fsub.s $fa4, $fa4, $ft2 fsub.s $fa4, $fa0, $fa4 - fcmp.cult.s $fcc0, $fa4, $fa2 + fcmp.cult.s $fcc0, $fa4, $fa1 bcnez $fcc0, .LBB0_4 # %bb.6: # in Loop: Header=BB0_5 Depth=1 fcmp.cule.s $fcc0, $fa4, $fa3 bceqz $fcc0, .LBB0_2 # %bb.7: # in Loop: Header=BB0_5 Depth=1 - fadd.s $fa4, $fa4, $fa1 + fadd.s $fa4, $fa4, $fa2 fcmp.cult.s $fcc0, $fa4, $fa3 bcnez $fcc0, .LBB0_4 # %bb.8: # in Loop: Header=BB0_5 Depth=1 @@ -328,31 +321,27 @@ _ZN19btPrimitiveTriangle13clip_triangleERS_P9btVector3: # @_ZN19btPrimitiveTrian .size _ZN19btPrimitiveTriangle13clip_triangleERS_P9btVector3, .Lfunc_end2-_ZN19btPrimitiveTriangle13clip_triangleERS_P9btVector3 .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z22bt_plane_clip_triangleRK9btVector4RK9btVector3S4_S4_PS2_ -.LCPI3_0: - .word 0x34000000 # float 1.1920929E-7 .section .text._Z22bt_plane_clip_triangleRK9btVector4RK9btVector3S4_S4_PS2_,"axG",@progbits,_Z22bt_plane_clip_triangleRK9btVector4RK9btVector3S4_S4_PS2_,comdat - .weak _Z22bt_plane_clip_triangleRK9btVector4RK9btVector3S4_S4_PS2_ + .weak _Z22bt_plane_clip_triangleRK9btVector4RK9btVector3S4_S4_PS2_ # -- Begin function _Z22bt_plane_clip_triangleRK9btVector4RK9btVector3S4_S4_PS2_ .p2align 5 .type _Z22bt_plane_clip_triangleRK9btVector4RK9btVector3S4_S4_PS2_,@function _Z22bt_plane_clip_triangleRK9btVector4RK9btVector3S4_S4_PS2_: # @_Z22bt_plane_clip_triangleRK9btVector4RK9btVector3S4_S4_PS2_ .cfi_startproc # %bb.0: move $a5, $a0 - fld.s $fa0, $a1, 4 - fld.s $fa2, $a0, 4 - fld.s $fa6, $a1, 0 - fld.s $fa3, $a0, 0 - fmul.s $fa0, $fa0, $fa2 - fld.s $fa7, $a1, 8 + fld.s $fa0, $a1, 0 + fld.s $fa2, $a0, 0 + fld.s $fa1, $a1, 4 + fld.s $fa3, $a0, 4 + fld.s $fa6, $a1, 8 fld.s $fa4, $a0, 8 fld.s $fa5, $a0, 12 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI3_0) - fmadd.s $fa0, $fa6, $fa3, $fa0 - fmadd.s $fa0, $fa7, $fa4, $fa0 + fmul.s $fa1, $fa1, $fa3 + fmadd.s $fa0, $fa0, $fa2, $fa1 + fmadd.s $fa0, $fa6, $fa4, $fa0 fsub.s $fa0, $fa0, $fa5 + lu12i.w $a0, 212992 + movgr2fr.w $fa1, $a0 fcmp.clt.s $fcc0, $fa1, $fa0 bceqz $fcc0, .LBB3_2 # %bb.1: @@ -361,8 +350,8 @@ _Z22bt_plane_clip_triangleRK9btVector4RK9btVector3S4_S4_PS2_: # @_Z22bt_plane_cl .LBB3_2: vld $vr2, $a1, 0 vst $vr2, $a4, 0 - fld.s $fa3, $a5, 0 - fld.s $fa2, $a5, 4 + fld.s $fa2, $a5, 0 + fld.s $fa3, $a5, 4 fld.s $fa4, $a5, 8 fld.s $fa5, $a5, 12 ori $a0, $zero, 1 @@ -370,8 +359,8 @@ _Z22bt_plane_clip_triangleRK9btVector4RK9btVector3S4_S4_PS2_: # @_Z22bt_plane_cl fld.s $fa6, $a2, 4 fld.s $fa7, $a2, 0 fld.s $ft0, $a2, 8 - fmul.s $fa2, $fa6, $fa2 - fmadd.s $fa2, $fa7, $fa3, $fa2 + fmul.s $fa3, $fa6, $fa3 + fmadd.s $fa2, $fa7, $fa2, $fa3 fmadd.s $fa2, $ft0, $fa4, $fa2 fsub.s $fa2, $fa2, $fa5 fcmp.clt.s $fcc1, $fa1, $fa2 @@ -520,31 +509,27 @@ _Z22bt_plane_clip_triangleRK9btVector4RK9btVector3S4_S4_PS2_: # @_Z22bt_plane_cl .size _Z22bt_plane_clip_triangleRK9btVector4RK9btVector3S4_S4_PS2_, .Lfunc_end3-_Z22bt_plane_clip_triangleRK9btVector4RK9btVector3S4_S4_PS2_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z21bt_plane_clip_polygonRK9btVector4PK9btVector3iPS2_ -.LCPI4_0: - .word 0x34000000 # float 1.1920929E-7 .section .text._Z21bt_plane_clip_polygonRK9btVector4PK9btVector3iPS2_,"axG",@progbits,_Z21bt_plane_clip_polygonRK9btVector4PK9btVector3iPS2_,comdat - .weak _Z21bt_plane_clip_polygonRK9btVector4PK9btVector3iPS2_ + .weak _Z21bt_plane_clip_polygonRK9btVector4PK9btVector3iPS2_ # -- Begin function _Z21bt_plane_clip_polygonRK9btVector4PK9btVector3iPS2_ .p2align 5 .type _Z21bt_plane_clip_polygonRK9btVector4PK9btVector3iPS2_,@function _Z21bt_plane_clip_polygonRK9btVector4PK9btVector3iPS2_: # @_Z21bt_plane_clip_polygonRK9btVector4PK9btVector3iPS2_ .cfi_startproc # %bb.0: move $a4, $a0 - fld.s $fa0, $a1, 4 - fld.s $fa1, $a0, 4 - fld.s $fa2, $a1, 0 - fld.s $fa3, $a0, 0 - fmul.s $fa0, $fa0, $fa1 + fld.s $fa0, $a1, 0 + fld.s $fa1, $a0, 0 + fld.s $fa2, $a1, 4 + fld.s $fa3, $a0, 4 fld.s $fa4, $a1, 8 fld.s $fa5, $a0, 8 fld.s $fa6, $a0, 12 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI4_0) - fmadd.s $fa0, $fa2, $fa3, $fa0 + fmul.s $fa2, $fa2, $fa3 + fmadd.s $fa0, $fa0, $fa1, $fa2 fmadd.s $fa0, $fa4, $fa5, $fa0 fsub.s $fa0, $fa0, $fa6 + lu12i.w $a0, 212992 + movgr2fr.w $fa1, $a0 fcmp.clt.s $fcc0, $fa1, $fa0 bceqz $fcc0, .LBB4_6 # %bb.1: @@ -679,14 +664,8 @@ _Z21bt_plane_clip_polygonRK9btVector4PK9btVector3iPS2_: # @_Z21bt_plane_clip_pol .size _Z21bt_plane_clip_polygonRK9btVector4PK9btVector3iPS2_, .Lfunc_end4-_Z21bt_plane_clip_polygonRK9btVector4PK9btVector3iPS2_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN19btPrimitiveTriangle35find_triangle_collision_clip_methodERS_R20GIM_TRIANGLE_CONTACT -.LCPI5_0: - .word 0xc47a0000 # float -1000 -.LCPI5_1: - .word 0x34000000 # float 1.1920929E-7 .text - .globl _ZN19btPrimitiveTriangle35find_triangle_collision_clip_methodERS_R20GIM_TRIANGLE_CONTACT + .globl _ZN19btPrimitiveTriangle35find_triangle_collision_clip_methodERS_R20GIM_TRIANGLE_CONTACT # -- Begin function _ZN19btPrimitiveTriangle35find_triangle_collision_clip_methodERS_R20GIM_TRIANGLE_CONTACT .p2align 5 .type _ZN19btPrimitiveTriangle35find_triangle_collision_clip_methodERS_R20GIM_TRIANGLE_CONTACT,@function _ZN19btPrimitiveTriangle35find_triangle_collision_clip_methodERS_R20GIM_TRIANGLE_CONTACT: # @_ZN19btPrimitiveTriangle35find_triangle_collision_clip_methodERS_R20GIM_TRIANGLE_CONTACT @@ -741,12 +720,13 @@ _ZN19btPrimitiveTriangle35find_triangle_collision_clip_methodERS_R20GIM_TRIANGLE fld.s $fa0, $sp, 312 fld.s $fa3, $sp, 316 addi.d $a2, $a0, -1 - pcalau12i $s5, %pc_hi20(.LCPI5_0) - fld.s $fs0, $s5, %pc_lo12(.LCPI5_0) - pcalau12i $a3, %pc_hi20(.LCPI5_1) - fld.s $fs2, $a3, %pc_lo12(.LCPI5_1) addi.d $s4, $sp, 580 - movgr2fr.w $fs3, $zero + lu12i.w $s5, -243808 + lu32i.d $s5, 0 + movgr2fr.w $fs0, $s5 + movgr2fr.w $fs2, $zero + lu12i.w $a3, 212992 + movgr2fr.w $fs3, $a3 addi.d $a3, $sp, 16 .LBB5_3: # %.outer84 # =>This Loop Header: Depth=1 @@ -771,13 +751,13 @@ _ZN19btPrimitiveTriangle35find_triangle_collision_clip_methodERS_R20GIM_TRIANGLE fmadd.s $fa4, $fa6, $fa0, $fa4 fsub.s $fa4, $fa4, $fa3 fsub.s $fa4, $fs1, $fa4 - fcmp.cult.s $fcc0, $fa4, $fs3 + fcmp.cult.s $fcc0, $fa4, $fs2 bcnez $fcc0, .LBB5_4 # %bb.6: # in Loop: Header=BB5_5 Depth=2 fcmp.clt.s $fcc0, $fs0, $fa4 bcnez $fcc0, .LBB5_9 # %bb.7: # in Loop: Header=BB5_5 Depth=2 - fadd.s $fa4, $fa4, $fs2 + fadd.s $fa4, $fa4, $fs3 fcmp.cult.s $fcc0, $fa4, $fs0 bcnez $fcc0, .LBB5_4 # %bb.8: # in Loop: Header=BB5_5 Depth=2 @@ -835,13 +815,13 @@ _ZN19btPrimitiveTriangle35find_triangle_collision_clip_methodERS_R20GIM_TRIANGLE # %bb.17: # %.lr.ph.i16 move $a2, $zero move $a3, $zero + addi.d $a1, $sp, 24 fld.s $fa1, $sp, 24 fld.s $fa2, $sp, 28 fld.s $fa3, $sp, 32 fld.s $fa4, $sp, 36 - fld.s $fa0, $s5, %pc_lo12(.LCPI5_0) - addi.d $a1, $sp, 24 addi.d $a4, $a0, -1 + movgr2fr.w $fa0, $s5 addi.d $a5, $sp, 832 .LBB5_18: # %.outer # =>This Loop Header: Depth=1 @@ -866,13 +846,13 @@ _ZN19btPrimitiveTriangle35find_triangle_collision_clip_methodERS_R20GIM_TRIANGLE fmadd.s $fa5, $fa7, $fa3, $fa5 fsub.s $fa5, $fa5, $fa4 fsub.s $fa5, $fs1, $fa5 - fcmp.cult.s $fcc0, $fa5, $fs3 + fcmp.cult.s $fcc0, $fa5, $fs2 bcnez $fcc0, .LBB5_19 # %bb.21: # in Loop: Header=BB5_20 Depth=2 fcmp.clt.s $fcc0, $fa0, $fa5 bcnez $fcc0, .LBB5_24 # %bb.22: # in Loop: Header=BB5_20 Depth=2 - fadd.s $fa5, $fa5, $fs2 + fadd.s $fa5, $fa5, $fs3 fcmp.cult.s $fcc0, $fa5, $fa0 bcnez $fcc0, .LBB5_19 # %bb.23: # in Loop: Header=BB5_20 Depth=2 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btVoronoiSimplexSolver.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btVoronoiSimplexSolver.s index 4cbda542..bceaaaa5 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btVoronoiSimplexSolver.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btVoronoiSimplexSolver.s @@ -843,14 +843,7 @@ _ZN22btVoronoiSimplexSolver22closestPtPointTriangleERK9btVector3S2_S2_S2_R25btSu .Lfunc_end5: .size _ZN22btVoronoiSimplexSolver22closestPtPointTriangleERK9btVector3S2_S2_S2_R25btSubSimplexClosestResult, .Lfunc_end5-_ZN22btVoronoiSimplexSolver22closestPtPointTriangleERK9btVector3S2_S2_S2_R25btSubSimplexClosestResult # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN22btVoronoiSimplexSolver25closestPtPointTetrahedronERK9btVector3S2_S2_S2_S2_R25btSubSimplexClosestResult -.LCPI6_0: - .word 0x322bcc76 # float 9.99999905E-9 -.LCPI6_1: - .word 0x7f7fffff # float 3.40282347E+38 - .text - .globl _ZN22btVoronoiSimplexSolver25closestPtPointTetrahedronERK9btVector3S2_S2_S2_S2_R25btSubSimplexClosestResult + .globl _ZN22btVoronoiSimplexSolver25closestPtPointTetrahedronERK9btVector3S2_S2_S2_S2_R25btSubSimplexClosestResult # -- Begin function _ZN22btVoronoiSimplexSolver25closestPtPointTetrahedronERK9btVector3S2_S2_S2_S2_R25btSubSimplexClosestResult .p2align 5 .type _ZN22btVoronoiSimplexSolver25closestPtPointTetrahedronERK9btVector3S2_S2_S2_S2_R25btSubSimplexClosestResult,@function _ZN22btVoronoiSimplexSolver25closestPtPointTetrahedronERK9btVector3S2_S2_S2_S2_R25btSubSimplexClosestResult: # @_ZN22btVoronoiSimplexSolver25closestPtPointTetrahedronERK9btVector3S2_S2_S2_S2_R25btSubSimplexClosestResult @@ -885,22 +878,22 @@ _ZN22btVoronoiSimplexSolver25closestPtPointTetrahedronERK9btVector3S2_S2_S2_S2_R ori $a0, $a7, 15 st.b $a0, $a6, 16 fld.s $fa0, $a3, 0 - fld.s $fa2, $a2, 0 - fld.s $fa4, $a3, 4 - fld.s $fa5, $a2, 4 + fld.s $fa1, $a2, 0 + fld.s $fa2, $a3, 4 + fld.s $fa4, $a2, 4 move $s3, $a5 move $s2, $a4 - fsub.s $ft1, $fa0, $fa2 - fsub.s $ft4, $fa4, $fa5 - fld.s $fa1, $a3, 8 - fld.s $fa3, $a2, 8 + fsub.s $ft1, $fa0, $fa1 + fsub.s $ft4, $fa2, $fa4 + fld.s $fa3, $a3, 8 + fld.s $fa5, $a2, 8 fld.s $fa6, $a4, 0 fld.s $fa7, $a4, 4 fld.s $ft0, $a4, 8 - fsub.s $ft2, $fa1, $fa3 - fsub.s $ft5, $fa6, $fa2 - fsub.s $ft6, $fa7, $fa5 - fsub.s $ft3, $ft0, $fa3 + fsub.s $ft2, $fa3, $fa5 + fsub.s $ft5, $fa6, $fa1 + fsub.s $ft6, $fa7, $fa4 + fsub.s $ft3, $ft0, $fa5 fneg.s $ft7, $ft6 fmul.s $ft7, $ft2, $ft7 fmadd.s $ft13, $ft4, $ft3, $ft7 @@ -909,16 +902,16 @@ _ZN22btVoronoiSimplexSolver25closestPtPointTetrahedronERK9btVector3S2_S2_S2_S2_R fld.s $fs1, $a1, 4 fmul.s $ft7, $ft1, $ft7 fmadd.s $ft14, $ft2, $ft5, $ft7 - fsub.s $ft10, $fs0, $fa2 - fsub.s $ft11, $fs1, $fa5 + fsub.s $ft10, $fs0, $fa1 + fsub.s $ft11, $fs1, $fa4 fmul.s $ft12, $ft11, $ft14 fld.s $ft7, $a5, 0 fld.s $ft8, $a5, 4 fld.s $ft9, $a5, 8 fmadd.s $ft12, $ft10, $ft13, $ft12 - fsub.s $ft15, $ft7, $fa2 - fsub.s $fs2, $ft8, $fa5 - fsub.s $fs3, $ft9, $fa3 + fsub.s $ft15, $ft7, $fa1 + fsub.s $fs2, $ft8, $fa4 + fsub.s $fs3, $ft9, $fa5 fmul.s $ft14, $ft14, $fs2 fmadd.s $ft13, $ft15, $ft13, $ft14 fneg.s $ft14, $fs2 @@ -952,18 +945,18 @@ _ZN22btVoronoiSimplexSolver25closestPtPointTetrahedronERK9btVector3S2_S2_S2_S2_R fmul.s $ft1, $ft6, $ft1 fmadd.s $ft1, $ft5, $fs4, $ft1 fmadd.s $ft5, $fs3, $fs7, $ft13 - fsub.s $ft6, $fs2, $fa3 + fsub.s $ft6, $fs2, $fa5 fmadd.s $ft11, $ft6, $fs7, $ft12 fmadd.s $ft2, $ft2, $fs5, $ft14 fmadd.s $ft12, $ft6, $fs5, $fs6 fmadd.s $ft6, $ft6, $ft4, $ft10 fmadd.s $ft1, $ft3, $ft4, $ft1 fsub.s $ft3, $ft7, $fa0 - fsub.s $ft4, $ft8, $fa4 - fsub.s $ft7, $ft9, $fa1 + fsub.s $ft4, $ft8, $fa2 + fsub.s $ft7, $ft9, $fa3 fsub.s $fa6, $fa6, $fa0 - fsub.s $fa7, $fa7, $fa4 - fsub.s $ft0, $ft0, $fa1 + fsub.s $fa7, $fa7, $fa2 + fsub.s $ft0, $ft0, $fa3 fneg.s $ft8, $fa7 fmul.s $ft8, $ft7, $ft8 fmadd.s $ft8, $ft4, $ft0, $ft8 @@ -974,35 +967,36 @@ _ZN22btVoronoiSimplexSolver25closestPtPointTetrahedronERK9btVector3S2_S2_S2_S2_R fmul.s $fa6, $ft4, $fa6 fmadd.s $fa6, $ft3, $fa7, $fa6 fsub.s $fa7, $fs0, $fa0 - fsub.s $ft3, $fs1, $fa4 + fsub.s $ft3, $fs1, $fa2 fmul.s $ft3, $ft3, $ft0 fmadd.s $fa7, $fa7, $ft8, $ft3 - fsub.s $fa0, $fa2, $fa0 - fsub.s $fa2, $fa5, $fa4 - fmul.s $fa2, $fa2, $ft0 - fmadd.s $fa0, $fa0, $ft8, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI6_0) - fsub.s $fa2, $fs2, $fa1 - fmadd.s $fa2, $fa2, $fa6, $fa7 - fld.s $fa4, $a0, %pc_lo12(.LCPI6_0) - fsub.s $fa1, $fa3, $fa1 - fmadd.s $fa0, $fa1, $fa6, $fa0 - fmul.s $fa1, $ft5, $ft5 - fcmp.clt.s $fcc1, $fa1, $fa4 - fmul.s $fa1, $ft2, $ft2 - fcmp.clt.s $fcc2, $fa1, $fa4 - fmul.s $fa1, $ft1, $ft1 - fcmp.clt.s $fcc3, $fa1, $fa4 - fmul.s $fa1, $fa0, $fa0 - fcmp.clt.s $fcc0, $fa1, $fa4 - fmul.s $fa1, $ft11, $ft5 + fsub.s $fa0, $fa1, $fa0 + fsub.s $fa1, $fa4, $fa2 + fmul.s $fa1, $fa1, $ft0 + fmadd.s $fa0, $fa0, $ft8, $fa1 + fsub.s $fa1, $fs2, $fa3 + fmadd.s $fa1, $fa1, $fa6, $fa7 + fsub.s $fa2, $fa5, $fa3 + fmadd.s $fa0, $fa2, $fa6, $fa0 + fmul.s $fa2, $ft5, $ft5 + lu12i.w $a0, 205500 + ori $a0, $a0, 3190 + movgr2fr.w $fa3, $a0 + fcmp.clt.s $fcc1, $fa2, $fa3 + fmul.s $fa2, $ft2, $ft2 + fcmp.clt.s $fcc2, $fa2, $fa3 + fmul.s $fa2, $ft1, $ft1 + fcmp.clt.s $fcc3, $fa2, $fa3 + fmul.s $fa2, $fa0, $fa0 + fcmp.clt.s $fcc0, $fa2, $fa3 + fmul.s $fa2, $ft11, $ft5 movgr2fr.w $fa3, $zero - fcmp.clt.s $fcc5, $fa1, $fa3 - fmul.s $fa1, $ft12, $ft2 - fcmp.clt.s $fcc6, $fa1, $fa3 - fmul.s $fa1, $ft6, $ft1 - fcmp.clt.s $fcc7, $fa1, $fa3 - fmul.s $fa0, $fa2, $fa0 + fcmp.clt.s $fcc5, $fa2, $fa3 + fmul.s $fa2, $ft12, $ft2 + fcmp.clt.s $fcc6, $fa2, $fa3 + fmul.s $fa2, $ft6, $ft1 + fcmp.clt.s $fcc7, $fa2, $fa3 + fmul.s $fa0, $fa1, $fa0 fcmp.clt.s $fcc4, $fa0, $fa3 movcf2gr $a0, $fcc5 movcf2gr $a1, $fcc1 @@ -1033,7 +1027,8 @@ _ZN22btVoronoiSimplexSolver25closestPtPointTetrahedronERK9btVector3S2_S2_S2_S2_R # %bb.1: beqz $a1, .LBB6_7 # %bb.2: - pcalau12i $s5, %pc_hi20(.LCPI6_1) + lu12i.w $a1, 522239 + ori $s5, $a1, 4095 beqz $a0, .LBB6_8 # %bb.3: st.d $a7, $sp, 8 # 8-byte Folded Spill @@ -1050,10 +1045,10 @@ _ZN22btVoronoiSimplexSolver25closestPtPointTetrahedronERK9btVector3S2_S2_S2_S2_R fsub.s $fa0, $fa1, $fs0 fsub.s $fa4, $fa2, $fs1 fsub.s $fa5, $fa3, $fs2 - fld.s $fs0, $s5, %pc_lo12(.LCPI6_1) fmul.s $fa4, $fa4, $fa4 fmadd.s $fa0, $fa0, $fa0, $fa4 fmadd.s $fa0, $fa5, $fa5, $fa0 + movgr2fr.w $fs0, $s5 fcmp.cule.s $fcc0, $fs0, $fa0 bcnez $fcc0, .LBB6_5 # %bb.4: @@ -1086,7 +1081,7 @@ _ZN22btVoronoiSimplexSolver25closestPtPointTetrahedronERK9btVector3S2_S2_S2_S2_R move $a0, $zero b .LBB6_18 .LBB6_8: - fld.s $fs0, $s5, %pc_lo12(.LCPI6_1) + movgr2fr.w $fs0, $s5 beqz $s8, .LBB6_11 .LBB6_9: addi.d $a5, $sp, 16 @@ -1451,12 +1446,7 @@ _ZN22btVoronoiSimplexSolver14compute_pointsER9btVector3S1_: # @_ZN22btVoronoiSim .Lfunc_end13: .size _ZN22btVoronoiSimplexSolver14compute_pointsER9btVector3S1_, .Lfunc_end13-_ZN22btVoronoiSimplexSolver14compute_pointsER9btVector3S1_ # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN22btVoronoiSimplexSolver19pointOutsideOfPlaneERK9btVector3S2_S2_S2_S2_ -.LCPI14_0: - .word 0x322bcc76 # float 9.99999905E-9 - .text - .globl _ZN22btVoronoiSimplexSolver19pointOutsideOfPlaneERK9btVector3S2_S2_S2_S2_ + .globl _ZN22btVoronoiSimplexSolver19pointOutsideOfPlaneERK9btVector3S2_S2_S2_S2_ # -- Begin function _ZN22btVoronoiSimplexSolver19pointOutsideOfPlaneERK9btVector3S2_S2_S2_S2_ .p2align 5 .type _ZN22btVoronoiSimplexSolver19pointOutsideOfPlaneERK9btVector3S2_S2_S2_S2_,@function _ZN22btVoronoiSimplexSolver19pointOutsideOfPlaneERK9btVector3S2_S2_S2_S2_: # @_ZN22btVoronoiSimplexSolver19pointOutsideOfPlaneERK9btVector3S2_S2_S2_S2_ @@ -1501,12 +1491,13 @@ _ZN22btVoronoiSimplexSolver19pointOutsideOfPlaneERK9btVector3S2_S2_S2_S2_: # @_Z fsub.s $fa3, $ft0, $fa3 fsub.s $fa5, $ft2, $fa5 fmul.s $fa3, $fa4, $fa3 - pcalau12i $a0, %pc_hi20(.LCPI14_0) - fld.s $fa4, $a0, %pc_lo12(.LCPI14_0) fmadd.s $fa1, $fa1, $ft1, $fa3 fmadd.s $fa0, $fa5, $fa0, $fa1 fmul.s $fa1, $fa0, $fa0 - fcmp.clt.s $fcc0, $fa1, $fa4 + lu12i.w $a0, 205500 + ori $a0, $a0, 3190 + movgr2fr.w $fa3, $a0 + fcmp.clt.s $fcc0, $fa1, $fa3 fmul.s $fa0, $fa2, $fa0 movgr2fr.w $fa1, $zero fcmp.clt.s $fcc1, $fa0, $fa1 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btWheelInfo.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btWheelInfo.s index 070fb4be..516a367a 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btWheelInfo.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btWheelInfo.s @@ -10,12 +10,7 @@ _ZNK11btWheelInfo23getSuspensionRestLengthEv: # @_ZNK11btWheelInfo23getSuspensio .Lfunc_end0: .size _ZNK11btWheelInfo23getSuspensionRestLengthEv, .Lfunc_end0-_ZNK11btWheelInfo23getSuspensionRestLengthEv # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN11btWheelInfo11updateWheelERK11btRigidBodyRNS_11RaycastInfoE -.LCPI1_0: - .word 0xbdcccccd # float -0.100000001 - .text - .globl _ZN11btWheelInfo11updateWheelERK11btRigidBodyRNS_11RaycastInfoE + .globl _ZN11btWheelInfo11updateWheelERK11btRigidBodyRNS_11RaycastInfoE # -- Begin function _ZN11btWheelInfo11updateWheelERK11btRigidBodyRNS_11RaycastInfoE .p2align 5 .type _ZN11btWheelInfo11updateWheelERK11btRigidBodyRNS_11RaycastInfoE,@function _ZN11btWheelInfo11updateWheelERK11btRigidBodyRNS_11RaycastInfoE: # @_ZN11btWheelInfo11updateWheelERK11btRigidBodyRNS_11RaycastInfoE @@ -29,12 +24,14 @@ _ZN11btWheelInfo11updateWheelERK11btRigidBodyRNS_11RaycastInfoE: # @_ZN11btWheel fld.s $fa4, $a0, 56 fld.s $fa2, $a0, 8 fld.s $fa5, $a0, 60 - pcalau12i $a2, %pc_hi20(.LCPI1_0) - fld.s $fa6, $a2, %pc_lo12(.LCPI1_0) fmul.s $fa4, $fa1, $fa4 fmadd.s $fa3, $fa0, $fa3, $fa4 fmadd.s $fa3, $fa2, $fa5, $fa3 - fcmp.cult.s $fcc0, $fa3, $fa6 + lu12i.w $a2, -271156 + ori $a2, $a2, 3277 + lu32i.d $a2, 0 + movgr2fr.w $fa4, $a2 + fcmp.cult.s $fcc0, $fa3, $fa4 bceqz $fcc0, .LBB1_4 # %bb.2: fld.s $fa4, $a0, 20 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_box_set.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_box_set.s index 9198d6c8..1da72d8c 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_box_set.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_box_set.s @@ -217,14 +217,7 @@ _ZN12GIM_BOX_TREE30_sort_and_calc_splitting_indexER9gim_arrayI13GIM_AABB_DATAEjj .Lfunc_end1: .size _ZN12GIM_BOX_TREE30_sort_and_calc_splitting_indexER9gim_arrayI13GIM_AABB_DATAEjjj, .Lfunc_end1-_ZN12GIM_BOX_TREE30_sort_and_calc_splitting_indexER9gim_arrayI13GIM_AABB_DATAEjjj # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN12GIM_BOX_TREE15_build_sub_treeER9gim_arrayI13GIM_AABB_DATAEjj -.LCPI2_0: - .word 0x7f7fffff # float 3.40282347E+38 -.LCPI2_1: - .word 0xff7fffff # float -3.40282347E+38 - .text - .globl _ZN12GIM_BOX_TREE15_build_sub_treeER9gim_arrayI13GIM_AABB_DATAEjj + .globl _ZN12GIM_BOX_TREE15_build_sub_treeER9gim_arrayI13GIM_AABB_DATAEjj # -- Begin function _ZN12GIM_BOX_TREE15_build_sub_treeER9gim_arrayI13GIM_AABB_DATAEjj .p2align 5 .type _ZN12GIM_BOX_TREE15_build_sub_treeER9gim_arrayI13GIM_AABB_DATAEjj,@function _ZN12GIM_BOX_TREE15_build_sub_treeER9gim_arrayI13GIM_AABB_DATAEjj: # @_ZN12GIM_BOX_TREE15_build_sub_treeER9gim_arrayI13GIM_AABB_DATAEjj @@ -275,32 +268,31 @@ _ZN12GIM_BOX_TREE15_build_sub_treeER9gim_arrayI13GIM_AABB_DATAEjj: # @_ZN12GIM_B .LBB2_2: move $s1, $a3 lu12i.w $a1, 522239 - ori $a1, $a1, 4095 - st.w $a1, $a0, 8 - bstrins.d $a1, $a1, 62, 32 + ori $a4, $a1, 4095 + move $a1, $a4 + bstrins.d $a1, $a4, 62, 32 st.d $a1, $a0, 0 + st.w $a4, $a0, 8 lu12i.w $a1, -2049 - ori $a1, $a1, 4095 - lu52i.d $a3, $a1, -9 - st.d $a3, $a0, 16 - lu32i.d $a1, 0 - st.w $a1, $a0, 24 - bstrpick.d $a4, $s5, 31, 0 - bgeu $a2, $s1, .LBB2_11 + ori $a5, $a1, 4095 + lu52i.d $a1, $a5, -9 + st.d $a1, $a0, 16 + lu32i.d $a5, 0 + st.w $a5, $a0, 24 + bstrpick.d $a6, $s5, 31, 0 + bgeu $a2, $a3, .LBB2_11 # %bb.3: # %.lr.ph - st.d $a4, $sp, 16 # 8-byte Folded Spill + st.d $a6, $sp, 16 # 8-byte Folded Spill addi.d $a1, $a0, 16 ld.d $a3, $s0, 0 bstrpick.d $s7, $a2, 31, 0 bstrpick.d $s8, $s1, 31, 0 - slli.d $a4, $s7, 5 - pcalau12i $a5, %pc_hi20(.LCPI2_0) - fld.s $fa0, $a5, %pc_lo12(.LCPI2_0) - pcalau12i $a5, %pc_hi20(.LCPI2_1) - fld.s $fa1, $a5, %pc_lo12(.LCPI2_1) - alsl.d $s2, $s7, $a4, 2 + slli.d $a6, $s7, 5 + alsl.d $s2, $s7, $a6, 2 add.d $a3, $s2, $a3 addi.d $a3, $a3, 16 + movgr2fr.w $fa0, $a4 + movgr2fr.w $fa1, $a5 sub.d $a4, $s8, $s7 fmov.s $fa4, $fa1 fmov.s $fa2, $fa1 @@ -431,7 +423,7 @@ _ZN12GIM_BOX_TREE15_build_sub_treeER9gim_arrayI13GIM_AABB_DATAEjj: # @_ZN12GIM_B move $a1, $s0 move $s2, $a2 move $a3, $s1 - move $s6, $a4 + move $s6, $a6 pcaddu18i $ra, %call36(_ZN12GIM_BOX_TREE20_calc_splitting_axisER9gim_arrayI13GIM_AABB_DATAEjj) jirl $ra, $ra, 0 move $t4, $s6 diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_contact.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_contact.s index 325dcc20..8ad98df2 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_contact.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_contact.s @@ -1,22 +1,6 @@ .file "gim_contact.cpp" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN17gim_contact_array14merge_contactsERKS_b -.LCPI0_0: - .word 0x447a0000 # float 1000 -.LCPI0_1: - .word 0x44a6a000 # float 1333 -.LCPI0_2: - .word 0x45055000 # float 2133 -.LCPI0_3: - .word 0x3727c5ac # float 9.99999974E-6 -.LCPI0_4: - .word 0x7f7fffff # float 3.40282347E+38 -.LCPI0_5: - .word 0x33d6bf95 # float 1.00000001E-7 -.LCPI0_6: - .word 0xb727c5ac # float -9.99999974E-6 .text - .globl _ZN17gim_contact_array14merge_contactsERKS_b + .globl _ZN17gim_contact_array14merge_contactsERKS_b # -- Begin function _ZN17gim_contact_array14merge_contactsERKS_b .p2align 5 .type _ZN17gim_contact_array14merge_contactsERKS_b,@function _ZN17gim_contact_array14merge_contactsERKS_b: # @_ZN17gim_contact_array14merge_contactsERKS_b @@ -148,58 +132,58 @@ _ZN17gim_contact_array14merge_contactsERKS_b: # @_ZN17gim_contact_array14merge_c move $a0, $s2 move $a1, $s3 .LBB0_15: # %.lr.ph - ld.d $a2, $s0, 0 - addi.w $a4, $a1, 0 - ori $a5, $zero, 4 - bstrpick.d $a3, $a1, 31, 0 - bgeu $a4, $a5, .LBB0_17 + ld.d $a5, $s0, 0 + bstrpick.d $a6, $a1, 31, 0 + addi.w $a1, $a1, 0 + ori $a7, $zero, 4 + lu12i.w $a3, 280480 + lu12i.w $a4, 281194 + lu12i.w $a2, 282709 + bgeu $a1, $a7, .LBB0_17 # %bb.16: move $a1, $zero b .LBB0_20 .LBB0_17: # %vector.ph - move $a4, $zero - bstrpick.d $a1, $a3, 31, 2 + move $a7, $zero + bstrpick.d $a1, $a6, 31, 2 slli.d $a1, $a1, 2 - addi.d $a5, $a0, 16 - addi.d $a6, $a2, 96 - lu12i.w $a7, 260096 - vreplgr2vr.w $vr0, $a7 - lu12i.w $a7, 280480 - vreplgr2vr.w $vr1, $a7 - lu12i.w $a7, 281194 - vreplgr2vr.w $vr2, $a7 - lu12i.w $a7, 263168 - vreplgr2vr.w $vr3, $a7 - lu12i.w $a7, 282709 - vreplgr2vr.w $vr4, $a7 - move $a7, $a1 + addi.d $t0, $a0, 16 + addi.d $t1, $a5, 96 + lu12i.w $t2, 260096 + vreplgr2vr.w $vr0, $t2 + vreplgr2vr.w $vr1, $a3 + vreplgr2vr.w $vr2, $a4 + lu12i.w $t2, 263168 + vreplgr2vr.w $vr3, $t2 + vreplgr2vr.w $vr4, $a2 + move $t2, $a1 .p2align 4, , 16 .LBB0_18: # %vector.body # =>This Inner Loop Header: Depth=1 - addi.d $t0, $a4, 1 - addi.d $t1, $a4, 2 - fld.s $fa5, $a6, -96 - fld.s $fa6, $a6, -48 - fld.s $fa7, $a6, 0 - fld.s $ft0, $a6, 48 - addi.d $t2, $a4, 3 + addi.d $t3, $a7, 1 + addi.d $t4, $a7, 2 + fld.s $fa5, $t1, -96 + fld.s $fa6, $t1, -48 + fld.s $fa7, $t1, 0 + fld.s $ft0, $t1, 48 + addi.d $t5, $a7, 3 vextrins.w $vr5, $vr6, 16 vextrins.w $vr5, $vr7, 32 vextrins.w $vr5, $vr8, 48 vfmadd.s $vr5, $vr5, $vr1, $vr0 - fld.s $fa6, $a6, -92 - fld.s $fa7, $a6, -44 - fld.s $ft0, $a6, 4 - fld.s $ft1, $a6, 52 + fld.s $fa6, $t1, -92 + fld.s $fa7, $t1, -44 + fld.s $ft0, $t1, 4 + fld.s $ft1, $t1, 52 vftintrz.w.s $vr5, $vr5 vextrins.w $vr6, $vr7, 16 vextrins.w $vr6, $vr8, 32 vextrins.w $vr6, $vr9, 48 vfmul.s $vr6, $vr6, $vr2 - fld.s $fa7, $a6, -88 - fld.s $ft0, $a6, -40 - fld.s $ft1, $a6, 8 - fld.s $ft2, $a6, 56 + fld.s $fa7, $t1, -88 + fld.s $ft0, $t1, -40 + fld.s $ft1, $t1, 8 + fld.s $ft2, $t1, 56 vftintrz.w.s $vr6, $vr6 vextrins.w $vr7, $vr8, 16 vextrins.w $vr7, $vr9, 32 @@ -210,62 +194,59 @@ _ZN17gim_contact_array14merge_contactsERKS_b: # @_ZN17gim_contact_array14merge_c vadd.w $vr5, $vr6, $vr5 vslli.w $vr6, $vr7, 8 vadd.w $vr5, $vr5, $vr6 - vstelm.w $vr5, $a5, -16, 0 - vstelm.w $vr5, $a5, -8, 1 - vstelm.w $vr5, $a5, 0, 2 - vstelm.w $vr5, $a5, 8, 3 - st.w $a4, $a5, -12 - st.w $t0, $a5, -4 - st.w $t1, $a5, 4 - st.w $t2, $a5, 12 - addi.d $a4, $a4, 4 - addi.d $a7, $a7, -4 - addi.d $a5, $a5, 32 - addi.d $a6, $a6, 192 - bnez $a7, .LBB0_18 + vstelm.w $vr5, $t0, -16, 0 + vstelm.w $vr5, $t0, -8, 1 + vstelm.w $vr5, $t0, 0, 2 + vstelm.w $vr5, $t0, 8, 3 + st.w $a7, $t0, -12 + st.w $t3, $t0, -4 + st.w $t4, $t0, 4 + st.w $t5, $t0, 12 + addi.d $a7, $a7, 4 + addi.d $t2, $t2, -4 + addi.d $t0, $t0, 32 + addi.d $t1, $t1, 192 + bnez $t2, .LBB0_18 # %bb.19: # %middle.block - beq $a1, $a3, .LBB0_22 + beq $a1, $a6, .LBB0_22 .LBB0_20: # %scalar.ph.preheader - sub.d $a3, $a3, $a1 - slli.d $a4, $a1, 5 - alsl.d $a4, $a1, $a4, 4 - add.d $a2, $a4, $a2 - addi.d $a2, $a2, 8 - alsl.d $a4, $a1, $a0, 3 - pcalau12i $a5, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a5, %pc_lo12(.LCPI0_0) - pcalau12i $a5, %pc_hi20(.LCPI0_1) - fld.s $fa1, $a5, %pc_lo12(.LCPI0_1) - pcalau12i $a5, %pc_hi20(.LCPI0_2) - fld.s $fa2, $a5, %pc_lo12(.LCPI0_2) - addi.d $a4, $a4, 4 - vldi $vr3, -1168 - vldi $vr4, -1272 + sub.d $a6, $a6, $a1 + slli.d $a7, $a1, 5 + alsl.d $a7, $a1, $a7, 4 + add.d $a5, $a7, $a5 + addi.d $a5, $a5, 8 + alsl.d $a7, $a1, $a0, 3 + addi.d $a7, $a7, 4 + vldi $vr0, -1168 + movgr2fr.w $fa1, $a3 + movgr2fr.w $fa2, $a4 + vldi $vr3, -1272 + movgr2fr.w $fa4, $a2 .p2align 4, , 16 .LBB0_21: # %scalar.ph # =>This Inner Loop Header: Depth=1 - fld.s $fa5, $a2, -8 - fmadd.s $fa5, $fa5, $fa0, $fa3 - fld.s $fa6, $a2, -4 + fld.s $fa5, $a5, -8 + fmadd.s $fa5, $fa5, $fa1, $fa0 + fld.s $fa6, $a5, -4 ftintrz.w.s $fa5, $fa5 - movfr2gr.s $a5, $fa5 - fld.s $fa5, $a2, 0 - fmul.s $fa6, $fa6, $fa1 + movfr2gr.s $a2, $fa5 + fld.s $fa5, $a5, 0 + fmul.s $fa6, $fa6, $fa2 ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a6, $fa6 - fmadd.s $fa5, $fa5, $fa2, $fa4 + movfr2gr.s $a3, $fa6 + fmadd.s $fa5, $fa5, $fa4, $fa3 ftintrz.w.s $fa5, $fa5 - movfr2gr.s $a7, $fa5 - alsl.d $a5, $a6, $a5, 4 - slli.d $a6, $a7, 8 - add.d $a5, $a5, $a6 - st.w $a5, $a4, -4 - st.w $a1, $a4, 0 + movfr2gr.s $a4, $fa5 + alsl.d $a2, $a3, $a2, 4 + slli.d $a3, $a4, 8 + add.d $a2, $a2, $a3 + st.w $a2, $a7, -4 + st.w $a1, $a7, 0 addi.d $a1, $a1, 1 - addi.d $a3, $a3, -1 - addi.d $a2, $a2, 48 - addi.d $a4, $a4, 8 - bnez $a3, .LBB0_21 + addi.d $a6, $a6, -1 + addi.d $a5, $a5, 48 + addi.d $a7, $a7, 8 + bnez $a6, .LBB0_21 .LBB0_22: move $s5, $s3 move $s2, $a0 @@ -345,14 +326,19 @@ _ZN17gim_contact_array14merge_contactsERKS_b: # @_ZN17gim_contact_array14merge_c add.d $a0, $a1, $a0 bstrpick.d $s3, $s5, 31, 0 ori $s5, $zero, 1 - pcalau12i $a1, %pc_hi20(.LCPI0_6) - fld.s $fs0, $a1, %pc_lo12(.LCPI0_6) - pcalau12i $a1, %pc_hi20(.LCPI0_3) - fld.s $fs1, $a1, %pc_lo12(.LCPI0_3) - pcalau12i $a1, %pc_hi20(.LCPI0_4) - fld.s $fs2, $a1, %pc_lo12(.LCPI0_4) - pcalau12i $a1, %pc_hi20(.LCPI0_5) - fld.s $fs3, $a1, %pc_lo12(.LCPI0_5) + lu12i.w $a1, -298372 + ori $a1, $a1, 1452 + lu32i.d $a1, 0 + movgr2fr.w $fs0, $a1 + lu12i.w $a1, 225916 + ori $a1, $a1, 1452 + movgr2fr.w $fs1, $a1 + lu12i.w $a1, 522239 + ori $a1, $a1, 4095 + movgr2fr.w $fs2, $a1 + lu12i.w $a1, 212331 + ori $a1, $a1, 3989 + movgr2fr.w $fs3, $a1 lu12i.w $a1, 390005 ori $a1, $a1, 2527 st.d $a1, $sp, 16 # 8-byte Folded Spill diff --git a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_tri_collision.s b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_tri_collision.s index 8957c44a..1c3ed6ce 100644 --- a/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_tri_collision.s +++ b/results/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/gim_tri_collision.s @@ -30,18 +30,8 @@ _ZNK12GIM_TRIANGLE26collide_triangle_hard_testERKS_R25GIM_TRIANGLE_CONTACT_DATA: .size _ZNK12GIM_TRIANGLE26collide_triangle_hard_testERKS_R25GIM_TRIANGLE_CONTACT_DATA, .Lfunc_end0-_ZNK12GIM_TRIANGLE26collide_triangle_hard_testERKS_R25GIM_TRIANGLE_CONTACT_DATA .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA -.LCPI1_0: - .word 0x33d6bf95 # float 1.00000001E-7 -.LCPI1_1: - .word 0x7f7fffff # float 3.40282347E+38 -.LCPI1_2: - .word 0xc47a0000 # float -1000 -.LCPI1_3: - .word 0x34000000 # float 1.1920929E-7 .section .text._ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA,"axG",@progbits,_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA,comdat - .weak _ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA + .weak _ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA # -- Begin function _ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA .p2align 5 .type _ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA,@function _ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA: # @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA @@ -113,11 +103,12 @@ _ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2 fneg.s $ft4, $ft5 fmul.s $ft3, $ft3, $ft4 fmadd.s $ft11, $ft2, $ft6, $ft3 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $ft9, $a0, %pc_lo12(.LCPI1_0) fmul.s $ft2, $ft12, $ft12 fmadd.s $ft2, $ft10, $ft10, $ft2 fmadd.s $ft2, $ft11, $ft11, $ft2 + lu12i.w $a0, 212331 + ori $a0, $a0, 3989 + movgr2fr.w $ft9, $a0 fcmp.cle.s $fcc0, $ft2, $ft9 fst.s $ft11, $s0, 124 bcnez $fcc0, .LBB1_3 @@ -131,12 +122,13 @@ _ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2 sub.w $a0, $a1, $a0 movgr2fr.w $ft2, $a0 fmul.s $ft3, $ft3, $ft2 - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.s $ft4, $a0, %pc_lo12(.LCPI1_1) - vldi $vr13, -1160 - fmadd.s $ft3, $ft3, $ft2, $ft5 + vldi $vr12, -1160 + fmadd.s $ft3, $ft3, $ft2, $ft4 fmul.s $ft2, $ft3, $ft2 - fcmp.cule.s $fcc0, $ft4, $ft2 + lu12i.w $a0, 522239 + ori $a0, $a0, 4095 + movgr2fr.w $ft3, $a0 + fcmp.cule.s $fcc0, $ft3, $ft2 bcnez $fcc0, .LBB1_3 # %bb.2: fmul.s $ft10, $ft10, $ft2 @@ -293,12 +285,13 @@ _ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2 sub.w $a0, $a1, $a0 movgr2fr.w $ft13, $a0 fmul.s $ft9, $ft9, $ft13 - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.s $ft14, $a0, %pc_lo12(.LCPI1_1) - vldi $vr23, -1160 - fmadd.s $ft9, $ft9, $ft13, $ft15 + vldi $vr22, -1160 + fmadd.s $ft9, $ft9, $ft13, $ft14 fmul.s $ft9, $ft9, $ft13 - fcmp.cule.s $fcc0, $ft14, $ft9 + lu12i.w $a0, 522239 + ori $a0, $a0, 4095 + movgr2fr.w $ft13, $a0 + fcmp.cule.s $fcc0, $ft13, $ft9 bcnez $fcc0, .LBB1_15 # %bb.14: fmul.s $ft10, $ft10, $ft9 @@ -441,15 +434,14 @@ _ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2 move $a1, $zero fld.s $fa0, $s0, 0 vst $vr1, $fp, 8 - lu12i.w $a3, -243808 - lu32i.d $a3, 0 - st.d $a3, $fp, 0 - pcalau12i $a3, %pc_hi20(.LCPI1_2) - fld.s $fa2, $a3, %pc_lo12(.LCPI1_2) - pcalau12i $a3, %pc_hi20(.LCPI1_3) - fld.s $fa1, $a3, %pc_lo12(.LCPI1_3) + lu12i.w $a4, -243808 + lu32i.d $a4, 0 + st.d $a4, $fp, 0 bstrpick.d $a0, $a0, 31, 0 addi.d $a3, $s0, 764 + movgr2fr.w $fa2, $a4 + lu12i.w $a4, 212992 + movgr2fr.w $fa1, $a4 addi.d $a4, $sp, 8 b .LBB1_31 .p2align 4, , 16 @@ -510,15 +502,14 @@ _ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2 move $a1, $zero fld.s $fa0, $s0, 0 vst $vr1, $fp, 8 - lu12i.w $a3, -243808 - lu32i.d $a3, 0 - st.d $a3, $fp, 0 - pcalau12i $a3, %pc_hi20(.LCPI1_2) - fld.s $fa2, $a3, %pc_lo12(.LCPI1_2) - pcalau12i $a3, %pc_hi20(.LCPI1_3) - fld.s $fa1, $a3, %pc_lo12(.LCPI1_3) + lu12i.w $a4, -243808 + lu32i.d $a4, 0 + st.d $a4, $fp, 0 bstrpick.d $a0, $a0, 31, 0 addi.d $a3, $s0, 764 + movgr2fr.w $fa2, $a4 + lu12i.w $a4, 212992 + movgr2fr.w $fa1, $a4 addi.d $a4, $sp, 8 b .LBB1_40 .p2align 4, , 16 @@ -637,14 +628,8 @@ _ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2 .size _ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA, .Lfunc_end1-_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS3_ -.LCPI2_0: - .word 0x33d6bf95 # float 1.00000001E-7 -.LCPI2_1: - .word 0x7f7fffff # float 3.40282347E+38 .section .text._ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS3_,"axG",@progbits,_ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS3_,comdat - .weak _ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS3_ + .weak _ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS3_ # -- Begin function _ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS3_ .p2align 5 .type _ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS3_,@function _ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS3_: # @_ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS3_ @@ -694,11 +679,12 @@ _ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS fneg.s $ft0, $ft1 fmul.s $fa7, $fa7, $ft0 fmadd.s $fa5, $fa5, $fa6, $fa7 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI2_0) fmul.s $fa6, $fa4, $fa4 fmadd.s $fa6, $fa3, $fa3, $fa6 fmadd.s $fa6, $fa5, $fa5, $fa6 + lu12i.w $a0, 212331 + ori $a0, $a0, 3989 + movgr2fr.w $fs0, $a0 fcmp.cle.s $fcc0, $fa6, $fs0 fst.s $fa5, $sp, 16 bcnez $fcc0, .LBB2_3 @@ -712,12 +698,13 @@ _ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS sub.w $a0, $a1, $a0 movgr2fr.w $fa6, $a0 fmul.s $fa7, $fa7, $fa6 - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.s $ft0, $a0, %pc_lo12(.LCPI2_1) - vldi $vr9, -1160 - fmadd.s $fa7, $fa7, $fa6, $ft1 + vldi $vr8, -1160 + fmadd.s $fa7, $fa7, $fa6, $ft0 fmul.s $fa6, $fa7, $fa6 - fcmp.cule.s $fcc0, $ft0, $fa6 + lu12i.w $a0, 522239 + ori $a0, $a0, 4095 + movgr2fr.w $fa7, $a0 + fcmp.cule.s $fcc0, $fa7, $fa6 bcnez $fcc0, .LBB2_3 # %bb.2: fmul.s $fa3, $fa3, $fa6 @@ -782,12 +769,13 @@ _ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS sub.w $a0, $a1, $a0 movgr2fr.w $fa6, $a0 fmul.s $fa7, $fa7, $fa6 - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.s $ft0, $a0, %pc_lo12(.LCPI2_1) - vldi $vr9, -1160 - fmadd.s $fa7, $fa7, $fa6, $ft1 + vldi $vr8, -1160 + fmadd.s $fa7, $fa7, $fa6, $ft0 fmul.s $fa6, $fa7, $fa6 - fcmp.cule.s $fcc0, $ft0, $fa6 + lu12i.w $a0, 522239 + ori $a0, $a0, 4095 + movgr2fr.w $fa7, $a0 + fcmp.cule.s $fcc0, $fa7, $fa6 bcnez $fcc0, .LBB2_7 # %bb.6: fmul.s $fa3, $fa3, $fa6 @@ -850,12 +838,13 @@ _ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS sub.w $a0, $a1, $a0 movgr2fr.w $fa6, $a0 fmul.s $fa7, $fa7, $fa6 - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.s $ft0, $a0, %pc_lo12(.LCPI2_1) - vldi $vr9, -1160 - fmadd.s $fa7, $fa7, $fa6, $ft1 + vldi $vr8, -1160 + fmadd.s $fa7, $fa7, $fa6, $ft0 fmul.s $fa6, $fa7, $fa6 - fcmp.cule.s $fcc0, $ft0, $fa6 + lu12i.w $a0, 522239 + ori $a0, $a0, 4095 + movgr2fr.w $fa7, $a0 + fcmp.cule.s $fcc0, $fa7, $fa6 bcnez $fcc0, .LBB2_11 # %bb.10: fmul.s $fa3, $fa3, $fa6 @@ -892,31 +881,27 @@ _ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS .size _ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS3_, .Lfunc_end2-_ZN30GIM_TRIANGLE_CALCULATION_CACHE13clip_triangleERK9btVector4PK9btVector3S5_PS3_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z27PLANE_CLIP_TRIANGLE_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_RKT_S8_S8_PS6_T1_ -.LCPI3_0: - .word 0x34000000 # float 1.1920929E-7 .section .text._Z27PLANE_CLIP_TRIANGLE_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_RKT_S8_S8_PS6_T1_,"axG",@progbits,_Z27PLANE_CLIP_TRIANGLE_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_RKT_S8_S8_PS6_T1_,comdat - .weak _Z27PLANE_CLIP_TRIANGLE_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_RKT_S8_S8_PS6_T1_ + .weak _Z27PLANE_CLIP_TRIANGLE_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_RKT_S8_S8_PS6_T1_ # -- Begin function _Z27PLANE_CLIP_TRIANGLE_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_RKT_S8_S8_PS6_T1_ .p2align 5 .type _Z27PLANE_CLIP_TRIANGLE_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_RKT_S8_S8_PS6_T1_,@function _Z27PLANE_CLIP_TRIANGLE_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_RKT_S8_S8_PS6_T1_: # @_Z27PLANE_CLIP_TRIANGLE_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_RKT_S8_S8_PS6_T1_ .cfi_startproc # %bb.0: move $a5, $a0 - fld.s $fa2, $a0, 4 - fld.s $fa0, $a1, 4 - fld.s $fa3, $a0, 0 + fld.s $fa2, $a0, 0 fld.s $fa6, $a1, 0 - fmul.s $fa0, $fa2, $fa0 + fld.s $fa3, $a0, 4 + fld.s $fa0, $a1, 4 fld.s $fa4, $a0, 8 - fld.s $fa7, $a1, 8 + fld.s $fa1, $a1, 8 fld.s $fa5, $a0, 12 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI3_0) - fmadd.s $fa0, $fa3, $fa6, $fa0 - fmadd.s $fa0, $fa4, $fa7, $fa0 + fmul.s $fa0, $fa3, $fa0 + fmadd.s $fa0, $fa2, $fa6, $fa0 + fmadd.s $fa0, $fa4, $fa1, $fa0 fsub.s $fa0, $fa0, $fa5 + lu12i.w $a0, 212992 + movgr2fr.w $fa1, $a0 fcmp.clt.s $fcc0, $fa1, $fa0 bceqz $fcc0, .LBB3_2 # %bb.1: @@ -928,8 +913,8 @@ _Z27PLANE_CLIP_TRIANGLE_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRK fst.s $fa2, $a4, 4 fld.s $fa2, $a1, 8 fst.s $fa2, $a4, 8 - fld.s $fa3, $a5, 0 - fld.s $fa2, $a5, 4 + fld.s $fa2, $a5, 0 + fld.s $fa3, $a5, 4 fld.s $fa4, $a5, 8 fld.s $fa5, $a5, 12 ori $a0, $zero, 1 @@ -937,8 +922,8 @@ _Z27PLANE_CLIP_TRIANGLE_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRK fld.s $fa7, $a2, 4 fld.s $fa6, $a2, 0 fld.s $ft0, $a2, 8 - fmul.s $fa2, $fa2, $fa7 - fmadd.s $fa2, $fa3, $fa6, $fa2 + fmul.s $fa3, $fa3, $fa7 + fmadd.s $fa2, $fa2, $fa6, $fa3 fmadd.s $fa2, $fa4, $ft0, $fa2 fsub.s $fa2, $fa2, $fa5 fcmp.clt.s $fcc1, $fa1, $fa2 @@ -1085,30 +1070,26 @@ _Z27PLANE_CLIP_TRIANGLE_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRK .size _Z27PLANE_CLIP_TRIANGLE_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_RKT_S8_S8_PS6_T1_, .Lfunc_end3-_Z27PLANE_CLIP_TRIANGLE_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_RKT_S8_S8_PS6_T1_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z26PLANE_CLIP_POLYGON_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_PKT_jPS6_T1_ -.LCPI4_0: - .word 0x34000000 # float 1.1920929E-7 .section .text._Z26PLANE_CLIP_POLYGON_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_PKT_jPS6_T1_,"axG",@progbits,_Z26PLANE_CLIP_POLYGON_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_PKT_jPS6_T1_,comdat - .weak _Z26PLANE_CLIP_POLYGON_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_PKT_jPS6_T1_ + .weak _Z26PLANE_CLIP_POLYGON_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_PKT_jPS6_T1_ # -- Begin function _Z26PLANE_CLIP_POLYGON_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_PKT_jPS6_T1_ .p2align 5 .type _Z26PLANE_CLIP_POLYGON_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_PKT_jPS6_T1_,@function _Z26PLANE_CLIP_POLYGON_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_PKT_jPS6_T1_: # @_Z26PLANE_CLIP_POLYGON_GENERICI9btVector39btVector422DISTANCE_PLANE_3D_FUNCEjRKT0_PKT_jPS6_T1_ # %bb.0: move $a4, $a0 - fld.s $fa0, $a0, 4 - fld.s $fa1, $a1, 4 - fld.s $fa3, $a0, 0 + fld.s $fa0, $a0, 0 fld.s $fa2, $a1, 0 - fmul.s $fa0, $fa0, $fa1 + fld.s $fa1, $a0, 4 + fld.s $fa3, $a1, 4 fld.s $fa4, $a0, 8 fld.s $fa5, $a1, 8 fld.s $fa6, $a0, 12 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI4_0) - fmadd.s $fa0, $fa3, $fa2, $fa0 + fmul.s $fa1, $fa1, $fa3 + fmadd.s $fa0, $fa0, $fa2, $fa1 fmadd.s $fa0, $fa4, $fa5, $fa0 fsub.s $fa0, $fa0, $fa6 + lu12i.w $a0, 212992 + movgr2fr.w $fa1, $a0 fcmp.clt.s $fcc0, $fa1, $fa0 bceqz $fcc0, .LBB4_6 # %bb.1: diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/KDTree.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/KDTree.s index e180b64a..a0939126 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/KDTree.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/KDTree.s @@ -154,10 +154,6 @@ KDTree_AddElement: # @KDTree_AddElement .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI4_1: - .dword 0xcd384f03e93ff9f5 # double -1.0E+64 .text .globl KDTree_CreateTree .p2align 5 @@ -253,9 +249,12 @@ KDTree_CreateTree: # @KDTree_CreateTree addi.d $s2, $s3, 4 ori $s6, $zero, 3 ori $s1, $zero, 0 - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI4_1) lu32i.d $s1, -3 + lu12i.w $a0, -93185 + ori $a0, $a0, 2549 + lu32i.d $a0, -504061 + lu52i.d $a0, $a0, -813 + movgr2fr.d $fs1, $a0 ori $a0, $zero, 1 st.d $a0, $sp, 88 # 8-byte Folded Spill st.d $a1, $sp, 72 # 8-byte Folded Spill diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/Parser_math.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/Parser_math.s index 09fe1641..499db0fd 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/Parser_math.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/Parser_math.s @@ -13,12 +13,7 @@ _ZN2PP11Parser_mathC2Ev: # @_ZN2PP11Parser_mathC2Ev .Lfunc_end0: .size _ZN2PP11Parser_mathC2Ev, .Lfunc_end0-_ZN2PP11Parser_mathC2Ev # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN2PP11Parser_math5do_opEiiiRSt5dequeINS_4WordESaIS2_EERS2_RNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEEERi -.LCPI1_0: - .dword 0x46293e5939a08cea # double 1.0E+30 - .text - .globl _ZN2PP11Parser_math5do_opEiiiRSt5dequeINS_4WordESaIS2_EERS2_RNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEEERi + .globl _ZN2PP11Parser_math5do_opEiiiRSt5dequeINS_4WordESaIS2_EERS2_RNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEEERi # -- Begin function _ZN2PP11Parser_math5do_opEiiiRSt5dequeINS_4WordESaIS2_EERS2_RNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEEERi .p2align 5 .type _ZN2PP11Parser_math5do_opEiiiRSt5dequeINS_4WordESaIS2_EERS2_RNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEEERi,@function _ZN2PP11Parser_math5do_opEiiiRSt5dequeINS_4WordESaIS2_EERS2_RNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEEERi: # @_ZN2PP11Parser_math5do_opEiiiRSt5dequeINS_4WordESaIS2_EERS2_RNSt7__cxx1118basic_stringstreamIcSt11char_traitsIcESaIcEEERi @@ -980,9 +975,12 @@ _ZN2PP11Parser_math5do_opEiiiRSt5dequeINS_4WordESaIS2_EERS2_RNSt7__cxx1118basic_ jirl $ra, $ra, 0 .Ltmp114: # EH_LABEL # %bb.168: # %_ZNSolsEPFRSoS_E.exit223 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) + lu12i.w $a0, 236040 + ori $a0, $a0, 3306 + lu32i.d $a0, -442791 + lu52i.d $a0, $a0, 1122 fcmp.ceq.d $fcc0, $fs0, $fs2 + movgr2fr.d $fa0, $a0 fsel $fs2, $fa0, $fs2, $fcc0 b .LBB1_196 .LBB1_169: diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/PowerParser.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/PowerParser.s index 4d2482e0..7d3c6aa0 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/PowerParser.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/PowerParser.s @@ -2708,19 +2708,9 @@ GCC_except_table6: .Lcst_end5: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN2PP11PowerParser4initEv -.LCPI7_0: - .dword 0x7fdfffffffffffff # double 8.9884656743115785E+307 -.LCPI7_1: - .dword 0x47efffffe0000000 # double 3.4028234663852886E+38 -.LCPI7_2: - .dword 0x0010000000000000 # double 2.2250738585072014E-308 -.LCPI7_3: - .dword 0x3810000000000000 # double 1.1754943508222875E-38 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI7_4: + .p2align 4, 0x0 # -- Begin function _ZN2PP11PowerParser4initEv +.LCPI7_0: .dword 8 # 0x8 .dword 7310293764108612723 # 0x6573617265727473 .text @@ -2784,8 +2774,9 @@ _ZN2PP11PowerParser4initEv: # @_ZN2PP11PowerParser4initEv add.d $a0, $sp, $a0 st.b $zero, $a0, 0 .Ltmp162: # EH_LABEL - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_0) + addi.w $a0, $zero, -1 + lu52i.d $a0, $a0, 2045 + movgr2fr.d $fa0, $a0 lu12i.w $a0, 2 ori $a0, $a0, 240 add.d $a0, $sp, $a0 @@ -3215,8 +3206,9 @@ _ZN2PP11PowerParser4initEv: # @_ZN2PP11PowerParser4initEv add.d $a0, $sp, $a0 st.b $zero, $a0, 0 .Ltmp183: # EH_LABEL - pcalau12i $a0, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_1) + lu12i.w $a0, -131072 + lu52i.d $a0, $a0, 1150 + movgr2fr.d $fa0, $a0 lu12i.w $a0, 1 ori $a0, $a0, 3776 add.d $a0, $sp, $a0 @@ -4023,8 +4015,8 @@ _ZN2PP11PowerParser4initEv: # @_ZN2PP11PowerParser4initEv add.d $a0, $sp, $a0 st.b $zero, $a0, 0 .Ltmp219: # EH_LABEL - pcalau12i $a0, %pc_hi20(.LCPI7_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_2) + lu52i.d $a0, $zero, 1 + movgr2fr.d $fa0, $a0 lu12i.w $a0, 1 ori $a0, $a0, 2992 add.d $a0, $sp, $a0 @@ -4427,8 +4419,8 @@ _ZN2PP11PowerParser4initEv: # @_ZN2PP11PowerParser4initEv add.d $a0, $sp, $a0 st.b $zero, $a0, 0 .Ltmp237: # EH_LABEL - pcalau12i $a0, %pc_hi20(.LCPI7_3) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_3) + lu52i.d $a0, $zero, 897 + movgr2fr.d $fa0, $a0 lu12i.w $a0, 1 ori $a0, $a0, 2600 add.d $a0, $sp, $a0 @@ -12982,8 +12974,8 @@ _ZN2PP11PowerParser4initEv: # @_ZN2PP11PowerParser4initEv pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB7_907: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit2134 - pcalau12i $a0, %pc_hi20(.LCPI7_4) - vld $vr0, $a0, %pc_lo12(.LCPI7_4) + pcalau12i $a0, %pc_hi20(.LCPI7_0) + vld $vr0, $a0, %pc_lo12(.LCPI7_0) addi.d $s1, $sp, 928 st.d $s1, $sp, 912 vst $vr0, $sp, 920 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/clamr_cpuonly.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/clamr_cpuonly.s index 8a699411..e85c1fc8 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/clamr_cpuonly.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/clamr_cpuonly.s @@ -1,14 +1,6 @@ .file "clamr_cpuonly.cpp" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3f80000000000000 # double 0.0078125 -.LCPI0_1: - .dword 0x4059000000000000 # double 100 -.LCPI0_2: - .dword 0x3cacd2b297d889bc # double 2.0E-16 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -76,15 +68,15 @@ main: # @main # %bb.2: pcalau12i $s5, %pc_hi20(nx) ld.w $fp, $s5, %pc_lo12(nx) + pcalau12i $s1, %pc_hi20(_ZL4crux) + st.d $s0, $s1, %pc_lo12(_ZL4crux) movgr2fr.w $fa0, $fp ffint.d.w $fa0, $fa0 vldi $vr1, -1000 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_0) fmul.d $fa0, $fa0, $fa1 - pcalau12i $s1, %pc_hi20(_ZL4crux) - st.d $s0, $s1, %pc_lo12(_ZL4crux) - fmul.d $fa0, $fa0, $fa2 + lu52i.d $a0, $zero, 1016 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 ld.bu $a0, $s2, %pc_lo12(restart) pcalau12i $s4, %pc_hi20(_ZL11circ_radius) fst.d $fa0, $s4, %pc_lo12(_ZL11circ_radius) @@ -248,8 +240,10 @@ main: # @main jirl $ra, $ra, 0 ld.d $a0, $s3, %pc_lo12(_ZL5state) fld.d $fa0, $s4, %pc_lo12(_ZL11circ_radius) - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_1) + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 vldi $vr2, -996 pcaddu18i $ra, %call36(_ZN5State11fill_circleEddd) jirl $ra, $ra, 0 @@ -291,8 +285,11 @@ main: # @main fst.d $fs0, $a1, %pc_lo12(_ZL13H_sum_initial) bcnez $fcc0, .LBB0_25 # %bb.24: - pcalau12i $a1, %pc_hi20(.LCPI0_2) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_2) + lu12i.w $a1, -426616 + ori $a1, $a1, 2492 + lu32i.d $a1, -208206 + lu52i.d $a1, $a1, 970 + movgr2fr.d $fa0, $a1 fmul.d $fa0, $fs0, $fa0 fst.d $fa0, $a0, %pc_lo12(upper_mass_diff_percentage) .LBB0_25: @@ -1200,16 +1197,8 @@ GCC_except_table3: .Lttbase1: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function do_calc -.LCPI4_0: - .dword 0x402399999999999a # double 9.8000000000000007 -.LCPI4_1: - .dword 0x3fee666666666666 # double 0.94999999999999996 -.LCPI4_2: - .dword 0x4059000000000000 # double 100 .text - .globl do_calc + .globl do_calc # -- Begin function do_calc .p2align 5 .type do_calc,@function do_calc: # @do_calc @@ -1233,7 +1222,6 @@ do_calc: # @do_calc st.d $s8, $sp, 280 # 8-byte Folded Spill fst.d $fs0, $sp, 272 # 8-byte Folded Spill fst.d $fs1, $sp, 264 # 8-byte Folded Spill - fst.d $fs2, $sp, 256 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -1247,9 +1235,8 @@ do_calc: # @do_calc .cfi_offset 31, -88 .cfi_offset 56, -96 .cfi_offset 57, -104 - .cfi_offset 58, -112 pcalau12i $a0, %pc_hi20(_ZL4mesh) - st.d $a0, $sp, 144 # 8-byte Folded Spill + st.d $a0, $sp, 152 # 8-byte Folded Spill ld.d $fp, $a0, %pc_lo12(_ZL4mesh) pcalau12i $a0, %pc_hi20(_ZL13next_cp_cycle) st.d $a0, $sp, 24 # 8-byte Folded Spill @@ -1260,10 +1247,10 @@ do_calc: # @do_calc pcalau12i $a0, %pc_hi20(_ZL19next_graphics_cycle) st.d $a0, $sp, 40 # 8-byte Folded Spill ld.w $s0, $a0, %pc_lo12(_ZL19next_graphics_cycle) - st.d $zero, $sp, 240 + st.d $zero, $sp, 248 vrepli.b $vr0, 0 vst $vr0, $sp, 128 # 16-byte Folded Spill - vst $vr0, $sp, 224 + vst $vr0, $sp, 232 .Ltmp42: # EH_LABEL pcalau12i $a0, %pc_hi20(_ZL10tstart_cpu) addi.d $a0, $a0, %pc_lo12(_ZL10tstart_cpu) @@ -1295,11 +1282,17 @@ do_calc: # @do_calc bge $a1, $a2, .LBB4_76 # %bb.3: # %.lr.ph mod.w $a0, $a1, $a0 - pcalau12i $a1, %pc_hi20(.LCPI4_0) - fld.d $fs0, $a1, %pc_lo12(.LCPI4_0) - pcalau12i $a1, %pc_hi20(.LCPI4_1) - fld.d $fs1, $a1, %pc_lo12(.LCPI4_1) addi.w $s8, $a0, 1 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, 235929 + lu52i.d $a0, $a0, 1026 + movgr2fr.d $fs0, $a0 + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, -104858 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(_ZL6deltaT) st.d $a0, $sp, 112 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(_ZL7simTime) @@ -1326,7 +1319,7 @@ do_calc: # @do_calc # %bb.5: # in Loop: Header=BB4_4 Depth=1 ld.d $a3, $sp, 80 # 8-byte Folded Reload fld.d $fa1, $a3, %pc_lo12(_ZL7simTime) - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(_ZL4mesh) ld.w $a1, $fp, 1160 ld.d $a2, $sp, 112 # 8-byte Folded Reload @@ -1344,7 +1337,7 @@ do_calc: # @do_calc jirl $ra, $ra, 0 .Ltmp49: # EH_LABEL # %bb.7: # in Loop: Header=BB4_4 Depth=1 - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(_ZL4mesh) .Ltmp50: # EH_LABEL pcaddu18i $ra, %call36(_ZN4Mesh17partition_measureEv) @@ -1361,7 +1354,7 @@ do_calc: # @do_calc # %bb.9: # in Loop: Header=BB4_4 Depth=1 ld.d $a2, $sp, 72 # 8-byte Folded Reload fld.d $fa1, $a2, %pc_lo12(_ZL17cpu_time_partmeas) - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(_ZL4mesh) ld.w $a1, $fp, 1160 fadd.d $fa0, $fa0, $fa1 @@ -1396,14 +1389,14 @@ do_calc: # @do_calc jirl $ra, $ra, 0 .Ltmp61: # EH_LABEL # %bb.14: # in Loop: Header=BB4_4 Depth=1 - ld.d $a0, $sp, 232 - ld.d $s1, $sp, 224 + ld.d $a0, $sp, 240 + ld.d $s1, $sp, 232 ld.d $a2, $fp, 1160 sub.d $s2, $a0, $s1 srai.d $a1, $s2, 2 bgeu $a1, $a2, .LBB4_25 # %bb.15: # in Loop: Header=BB4_4 Depth=1 - ld.d $s5, $sp, 240 + ld.d $s5, $sp, 248 sub.d $s4, $a2, $a1 sub.d $a2, $s5, $a0 srai.d $a2, $a2, 2 @@ -1460,11 +1453,11 @@ do_calc: # @do_calc jirl $ra, $ra, 0 .LBB4_24: # %_ZZNSt6vectorIiSaIiEE17_M_default_appendEmEN6_GuardD2Ev.exit.i # in Loop: Header=BB4_4 Depth=1 - st.d $s3, $sp, 224 + st.d $s3, $sp, 232 alsl.d $a0, $s4, $s7, 2 - st.d $a0, $sp, 232 - alsl.d $a0, $s6, $s3, 2 st.d $a0, $sp, 240 + alsl.d $a0, $s6, $s3, 2 + st.d $a0, $sp, 248 ld.d $s7, $sp, 104 # 8-byte Folded Reload b .LBB4_31 .p2align 4, , 16 @@ -1475,7 +1468,7 @@ do_calc: # @do_calc beq $a0, $a1, .LBB4_31 # %bb.27: # %_ZSt8_DestroyIPiiEvT_S1_RSaIT0_E.exit.i.i # in Loop: Header=BB4_4 Depth=1 - st.d $a1, $sp, 232 + st.d $a1, $sp, 240 b .LBB4_31 .p2align 4, , 16 .LBB4_28: # in Loop: Header=BB4_4 Depth=1 @@ -1493,28 +1486,28 @@ do_calc: # @do_calc alsl.d $s1, $s2, $s1, 2 .LBB4_30: # %_ZSt27__uninitialized_default_n_aIPimiET_S1_T0_RSaIT1_E.exit.i # in Loop: Header=BB4_4 Depth=1 - st.d $s1, $sp, 232 + st.d $s1, $sp, 240 .LBB4_31: # %_ZNSt6vectorIiSaIiEE6resizeEm.exit # in Loop: Header=BB4_4 Depth=1 ld.d $a0, $s7, %pc_lo12(_ZL5state) .Ltmp64: # EH_LABEL - addi.d $a1, $sp, 224 - addi.d $a2, $sp, 252 - addi.d $a3, $sp, 248 + addi.d $a1, $sp, 232 + addi.d $a2, $sp, 260 + addi.d $a3, $sp, 256 pcaddu18i $ra, %call36(_ZN5State21calc_refine_potentialERSt6vectorIiSaIiEERiS4_) jirl $ra, $ra, 0 .Ltmp65: # EH_LABEL # %bb.32: # in Loop: Header=BB4_4 Depth=1 move $s1, $a0 - ld.d $a0, $sp, 232 - ld.d $s6, $sp, 224 + ld.d $a0, $sp, 240 + ld.d $s6, $sp, 232 ld.d $s2, $s7, %pc_lo12(_ZL5state) - ld.w $s3, $sp, 252 - ld.w $s4, $sp, 248 + ld.w $s3, $sp, 260 + ld.w $s4, $sp, 256 sub.d $s5, $a0, $s6 - st.d $zero, $sp, 200 + st.d $zero, $sp, 208 vld $vr0, $sp, 128 # 16-byte Folded Reload - vst $vr0, $sp, 208 + vst $vr0, $sp, 216 beq $a0, $s6, .LBB4_37 # %bb.33: # in Loop: Header=BB4_4 Depth=1 addi.w $a0, $zero, -3 @@ -1529,10 +1522,10 @@ do_calc: # @do_calc .Ltmp67: # EH_LABEL # %bb.35: # %.noexc100 # in Loop: Header=BB4_4 Depth=1 - st.d $a0, $sp, 200 st.d $a0, $sp, 208 + st.d $a0, $sp, 216 add.d $s7, $a0, $s5 - st.d $s7, $sp, 216 + st.d $s7, $sp, 224 ori $a1, $zero, 5 bltu $s5, $a1, .LBB4_71 # %bb.36: # in Loop: Header=BB4_4 Depth=1 @@ -1544,13 +1537,13 @@ do_calc: # @do_calc .p2align 4, , 16 .LBB4_37: # %.thread # in Loop: Header=BB4_4 Depth=1 - st.d $zero, $sp, 200 - st.d $s5, $sp, 216 + st.d $zero, $sp, 208 + st.d $s5, $sp, 224 move $s7, $s5 .LBB4_38: # in Loop: Header=BB4_4 Depth=1 - st.d $s7, $sp, 208 + st.d $s7, $sp, 216 .Ltmp70: # EH_LABEL - addi.d $a3, $sp, 200 + addi.d $a3, $sp, 208 move $a0, $s2 move $a1, $s3 move $a2, $s4 @@ -1558,21 +1551,21 @@ do_calc: # @do_calc jirl $ra, $ra, 0 .Ltmp71: # EH_LABEL # %bb.39: # in Loop: Header=BB4_4 Depth=1 - ld.d $a0, $sp, 200 + ld.d $a0, $sp, 208 ld.d $s7, $sp, 104 # 8-byte Folded Reload beqz $a0, .LBB4_41 # %bb.40: # in Loop: Header=BB4_4 Depth=1 - ld.d $a1, $sp, 216 + ld.d $a1, $sp, 224 sub.d $a1, $a1, $a0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB4_41: # %_ZNSt6vectorIiSaIiEED2Ev.exit # in Loop: Header=BB4_4 Depth=1 - ld.d $a0, $sp, 224 - ld.d $a1, $sp, 240 - st.d $zero, $sp, 224 + ld.d $a0, $sp, 232 + ld.d $a1, $sp, 248 + st.d $zero, $sp, 232 vld $vr0, $sp, 128 # 16-byte Folded Reload - vst $vr0, $sp, 232 + vst $vr0, $sp, 240 beqz $a0, .LBB4_43 # %bb.42: # in Loop: Header=BB4_4 Depth=1 sub.d $a1, $a1, $a0 @@ -1580,7 +1573,7 @@ do_calc: # @do_calc jirl $ra, $ra, 0 .LBB4_43: # %_ZNSt6vectorIiSaIiEED2Ev.exit102 # in Loop: Header=BB4_4 Depth=1 - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(_ZL4mesh) st.d $s1, $a0, 1160 st.d $s1, $fp, 1160 @@ -1590,7 +1583,7 @@ do_calc: # @do_calc jirl $ra, $ra, 0 .Ltmp74: # EH_LABEL # %bb.44: # in Loop: Header=BB4_4 Depth=1 - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(_ZL4mesh) ld.d $a1, $a0, 864 ld.d $a3, $a0, 856 @@ -1618,7 +1611,7 @@ do_calc: # @do_calc .p2align 4, , 16 .LBB4_49: # %_ZNSt6vectorIiSaIiEE6resizeEm.exit106 # in Loop: Header=BB4_4 Depth=1 - ld.w $a0, $sp, 252 + ld.w $a0, $sp, 260 move $s5, $s0 beqz $a0, .LBB4_69 # %bb.50: # in Loop: Header=BB4_4 Depth=1 @@ -1636,9 +1629,9 @@ do_calc: # @do_calc .Ltmp79: # EH_LABEL # %bb.53: # %.noexc109 # in Loop: Header=BB4_4 Depth=1 - st.d $a0, $sp, 176 + st.d $a0, $sp, 184 alsl.d $a1, $s1, $a0, 2 - st.d $a1, $sp, 192 + st.d $a1, $sp, 200 st.w $zero, $a0, 0 addi.d $s2, $s1, -1 addi.d $s1, $a0, 4 @@ -1656,30 +1649,30 @@ do_calc: # @do_calc .LBB4_55: # %_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.thread.i # in Loop: Header=BB4_4 Depth=1 move $s1, $zero - st.d $zero, $sp, 192 + st.d $zero, $sp, 200 vld $vr0, $sp, 128 # 16-byte Folded Reload - vst $vr0, $sp, 176 + vst $vr0, $sp, 184 .LBB4_56: # in Loop: Header=BB4_4 Depth=1 - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(_ZL4mesh) pcalau12i $a1, %pc_hi20(numpe) ld.w $a1, $a1, %pc_lo12(numpe) pcalau12i $a2, %pc_hi20(cycle_reorder) ld.w $a3, $a2, %pc_lo12(cycle_reorder) - st.d $s1, $sp, 184 + st.d $s1, $sp, 192 .Ltmp81: # EH_LABEL - addi.d $a2, $sp, 176 + addi.d $a2, $sp, 184 pcaddu18i $ra, %call36(_ZN4Mesh15partition_cellsEiRSt6vectorIiSaIiEE16partition_method) jirl $ra, $ra, 0 .Ltmp82: # EH_LABEL # %bb.57: # in Loop: Header=BB4_4 Depth=1 - ld.d $a0, $sp, 184 - ld.d $s3, $sp, 176 + ld.d $a0, $sp, 192 + ld.d $s3, $sp, 184 ld.d $s1, $s7, %pc_lo12(_ZL5state) sub.d $s2, $a0, $s3 - st.d $zero, $sp, 152 + st.d $zero, $sp, 160 vld $vr0, $sp, 128 # 16-byte Folded Reload - vst $vr0, $sp, 160 + vst $vr0, $sp, 168 beq $a0, $s3, .LBB4_62 # %bb.58: # in Loop: Header=BB4_4 Depth=1 addi.w $a0, $zero, -3 @@ -1694,10 +1687,10 @@ do_calc: # @do_calc .Ltmp84: # EH_LABEL # %bb.60: # %.noexc114 # in Loop: Header=BB4_4 Depth=1 - st.d $a0, $sp, 152 st.d $a0, $sp, 160 + st.d $a0, $sp, 168 add.d $s4, $a0, $s2 - st.d $s4, $sp, 168 + st.d $s4, $sp, 176 ori $a1, $zero, 5 bltu $s2, $a1, .LBB4_73 # %bb.61: # in Loop: Header=BB4_4 Depth=1 @@ -1709,22 +1702,22 @@ do_calc: # @do_calc .p2align 4, , 16 .LBB4_62: # %.thread140 # in Loop: Header=BB4_4 Depth=1 - st.d $zero, $sp, 152 - st.d $s2, $sp, 168 + st.d $zero, $sp, 160 + st.d $s2, $sp, 176 move $s4, $s2 .LBB4_63: # in Loop: Header=BB4_4 Depth=1 - st.d $s4, $sp, 160 + st.d $s4, $sp, 168 .Ltmp88: # EH_LABEL - addi.d $a1, $sp, 152 + addi.d $a1, $sp, 160 move $a0, $s1 pcaddu18i $ra, %call36(_ZN5State13state_reorderESt6vectorIiSaIiEE) jirl $ra, $ra, 0 .Ltmp89: # EH_LABEL # %bb.64: # in Loop: Header=BB4_4 Depth=1 - ld.d $a0, $sp, 152 + ld.d $a0, $sp, 160 beqz $a0, .LBB4_66 # %bb.65: # in Loop: Header=BB4_4 Depth=1 - ld.d $a1, $sp, 168 + ld.d $a1, $sp, 176 sub.d $a1, $a1, $a0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -1736,10 +1729,10 @@ do_calc: # @do_calc jirl $ra, $ra, 0 .Ltmp92: # EH_LABEL # %bb.67: # in Loop: Header=BB4_4 Depth=1 - ld.d $a0, $sp, 176 + ld.d $a0, $sp, 184 beqz $a0, .LBB4_69 # %bb.68: # in Loop: Header=BB4_4 Depth=1 - ld.d $a1, $sp, 192 + ld.d $a1, $sp, 200 sub.d $a1, $a1, $a0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -1802,15 +1795,17 @@ do_calc: # @do_calc bceqz $fcc1, .LBB4_134 .LBB4_79: pcalau12i $s1, %pc_hi20(_ZL13H_sum_initial) - fld.d $fa1, $s1, %pc_lo12(_ZL13H_sum_initial) - fsub.d $fa2, $fs0, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI4_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI4_2) - pcalau12i $a0, %pc_hi20(upper_mass_diff_percentage) - fld.d $fa0, $a0, %pc_lo12(upper_mass_diff_percentage) - fabs.d $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 - fmul.d $fs1, $fa1, $fs2 + fld.d $fa0, $s1, %pc_lo12(_ZL13H_sum_initial) + fsub.d $fa1, $fs0, $fa0 + fabs.d $fa1, $fa1 + fdiv.d $fa1, $fa1, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + pcalau12i $a1, %pc_hi20(upper_mass_diff_percentage) + fld.d $fa0, $a1, %pc_lo12(upper_mass_diff_percentage) + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa2, $a0 + fmul.d $fs1, $fa1, $fa2 fcmp.cult.d $fcc0, $fs1, $fa0 bceqz $fcc0, .LBB4_82 # %bb.80: @@ -1845,7 +1840,7 @@ do_calc: # @do_calc ld.w $a2, $s8, %pc_lo12(niter) blt $a2, $a1, .LBB4_94 # %bb.86: - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(_ZL4mesh) .Ltmp107: # EH_LABEL move $a1, $zero @@ -1866,7 +1861,7 @@ do_calc: # @do_calc jirl $ra, $ra, 0 .Ltmp112: # EH_LABEL # %bb.89: - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a3, $a0, %pc_lo12(_ZL4mesh) pcalau12i $a0, %pc_hi20(set_graphics_cell_coordinates) ld.d $a4, $a0, %pc_lo12(set_graphics_cell_coordinates) @@ -1886,7 +1881,7 @@ do_calc: # @do_calc jirl $ra, $a1, 0 .Ltmp116: # EH_LABEL # %bb.91: - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(_ZL4mesh) ld.d $a0, $a0, 856 .Ltmp117: # EH_LABEL @@ -1956,7 +1951,7 @@ do_calc: # @do_calc jirl $ra, $ra, 0 .Ltmp126: # EH_LABEL # %bb.100: - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(_ZL4mesh) .Ltmp127: # EH_LABEL pcaddu18i $ra, %call36(_ZN4Mesh9terminateEv) @@ -2038,7 +2033,7 @@ do_calc: # @do_calc ld.w $a0, $a0, %pc_lo12(graphic_outputInterval) bge $a0, $a1, .LBB4_121 .LBB4_112: - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(_ZL4mesh) .Ltmp142: # EH_LABEL move $a1, $zero @@ -2063,7 +2058,7 @@ do_calc: # @do_calc jirl $ra, $ra, 0 .Ltmp147: # EH_LABEL # %bb.116: - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a3, $a0, %pc_lo12(_ZL4mesh) pcalau12i $a0, %pc_hi20(set_graphics_cell_coordinates) ld.d $a4, $a0, %pc_lo12(set_graphics_cell_coordinates) @@ -2083,7 +2078,7 @@ do_calc: # @do_calc jirl $ra, $a1, 0 .Ltmp151: # EH_LABEL # %bb.118: - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(_ZL4mesh) ld.d $a0, $a0, 856 .Ltmp152: # EH_LABEL @@ -2124,15 +2119,14 @@ do_calc: # @do_calc fst.d $fa0, $s0, %pc_lo12(_ZL17cpu_time_graphics) bge $a1, $a0, .LBB4_135 # %bb.123: - ld.d $a0, $sp, 224 + ld.d $a0, $sp, 232 beqz $a0, .LBB4_125 # %bb.124: - ld.d $a1, $sp, 240 + ld.d $a1, $sp, 248 sub.d $a1, $a1, $a0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB4_125: # %_ZNSt6vectorIiSaIiEED2Ev.exit129 - fld.d $fs2, $sp, 256 # 8-byte Folded Reload fld.d $fs1, $sp, 264 # 8-byte Folded Reload fld.d $fs0, $sp, 272 # 8-byte Folded Reload ld.d $s8, $sp, 280 # 8-byte Folded Reload @@ -2247,28 +2241,28 @@ do_calc: # @do_calc jirl $ra, $ra, 0 .Ltmp168: # EH_LABEL # %bb.142: - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(_ZL4mesh) .Ltmp169: # EH_LABEL pcaddu18i $ra, %call36(_ZN4Mesh23print_partition_measureEv) jirl $ra, $ra, 0 .Ltmp170: # EH_LABEL # %bb.143: - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(_ZL4mesh) .Ltmp171: # EH_LABEL pcaddu18i $ra, %call36(_ZN4Mesh24print_calc_neighbor_typeEv) jirl $ra, $ra, 0 .Ltmp172: # EH_LABEL # %bb.144: - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(_ZL4mesh) .Ltmp173: # EH_LABEL pcaddu18i $ra, %call36(_ZN4Mesh20print_partition_typeEv) jirl $ra, $ra, 0 .Ltmp174: # EH_LABEL # %bb.145: - ld.d $fp, $sp, 144 # 8-byte Folded Reload + ld.d $fp, $sp, 152 # 8-byte Folded Reload ld.d $a0, $fp, %pc_lo12(_ZL4mesh) ld.w $a0, $a0, 584 ld.d $s0, $sp, 96 # 8-byte Folded Reload @@ -2278,7 +2272,11 @@ do_calc: # @do_calc movgr2fr.w $fa1, $a1 ffint.d.w $fa1, $fa1 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fs2 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs0, $a0 + fmul.d $fa0, $fa0, $fs0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.12) addi.d $a0, $a0, %pc_lo12(.L.str.12) @@ -2292,7 +2290,7 @@ do_calc: # @do_calc movgr2fr.w $fa1, $a1 ffint.d.w $fa1, $fa1 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fs2 + fmul.d $fa0, $fa0, $fs0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.13) addi.d $a0, $a0, %pc_lo12(.L.str.13) @@ -2329,7 +2327,7 @@ do_calc: # @do_calc jirl $ra, $ra, 0 .Ltmp180: # EH_LABEL # %bb.148: - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(_ZL4mesh) bnez $fp, .LBB4_159 # %bb.149: @@ -2485,41 +2483,41 @@ do_calc: # @do_calc b .LBB4_153 .LBB4_162: .Ltmp106: # EH_LABEL - ld.d $a2, $sp, 224 + ld.d $a2, $sp, 232 beqz $a2, .LBB4_180 b .LBB4_184 .LBB4_163: .Ltmp183: # EH_LABEL - ld.d $a2, $sp, 224 + ld.d $a2, $sp, 232 beqz $a2, .LBB4_180 b .LBB4_184 .LBB4_164: .Ltmp135: # EH_LABEL - ld.d $a2, $sp, 224 + ld.d $a2, $sp, 232 beqz $a2, .LBB4_180 b .LBB4_184 .LBB4_165: .Ltmp101: # EH_LABEL - ld.d $a2, $sp, 224 + ld.d $a2, $sp, 232 beqz $a2, .LBB4_180 b .LBB4_184 .LBB4_166: # %.loopexit145 .Ltmp80: # EH_LABEL - ld.d $a2, $sp, 224 + ld.d $a2, $sp, 232 beqz $a2, .LBB4_180 b .LBB4_184 .LBB4_167: .Ltmp164: # EH_LABEL - ld.d $a2, $sp, 224 + ld.d $a2, $sp, 232 beqz $a2, .LBB4_180 b .LBB4_184 .LBB4_168: .Ltmp90: # EH_LABEL - ld.d $a2, $sp, 152 + ld.d $a2, $sp, 160 move $fp, $a0 beqz $a2, .LBB4_170 # %bb.169: - ld.d $a0, $sp, 168 + ld.d $a0, $sp, 176 sub.d $a1, $a0, $a2 move $a0, $a2 pcaddu18i $ra, %call36(_ZdlPvm) @@ -2529,21 +2527,21 @@ do_calc: # @do_calc b .LBB4_178 .LBB4_171: # %.loopexit.split-lp146 .Ltmp96: # EH_LABEL - ld.d $a2, $sp, 224 + ld.d $a2, $sp, 232 beqz $a2, .LBB4_180 b .LBB4_184 .LBB4_172: .Ltmp72: # EH_LABEL - ld.d $a2, $sp, 200 + ld.d $a2, $sp, 208 move $fp, $a0 bnez $a2, .LBB4_174 # %bb.173: move $a0, $fp - ld.d $a2, $sp, 224 + ld.d $a2, $sp, 232 beqz $a2, .LBB4_180 b .LBB4_184 .LBB4_174: - ld.d $a0, $sp, 216 + ld.d $a0, $sp, 224 sub.d $a1, $a0, $a2 b .LBB4_182 .LBB4_175: # %.loopexit150 @@ -2551,22 +2549,22 @@ do_calc: # @do_calc b .LBB4_178 .LBB4_176: # %.loopexit .Ltmp77: # EH_LABEL - ld.d $a2, $sp, 224 + ld.d $a2, $sp, 232 beqz $a2, .LBB4_180 b .LBB4_184 .LBB4_177: # %.loopexit.split-lp151 .Ltmp87: # EH_LABEL .LBB4_178: # %_ZNSt6vectorIiSaIiEED2Ev.exit123 - ld.d $a2, $sp, 176 + ld.d $a2, $sp, 184 bnez $a2, .LBB4_181 # %bb.179: # %_ZNSt6vectorIiSaIiEED2Ev.exit121 - ld.d $a2, $sp, 224 + ld.d $a2, $sp, 232 bnez $a2, .LBB4_184 .LBB4_180: # %_ZNSt6vectorIiSaIiEED2Ev.exit131 pcaddu18i $ra, %call36(_Unwind_Resume) jirl $ra, $ra, 0 .LBB4_181: - ld.d $a1, $sp, 192 + ld.d $a1, $sp, 200 sub.d $a1, $a1, $a2 move $fp, $a0 .LBB4_182: # %_ZNSt6vectorIiSaIiEED2Ev.exit121 @@ -2574,15 +2572,15 @@ do_calc: # @do_calc pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 move $a0, $fp - ld.d $a2, $sp, 224 + ld.d $a2, $sp, 232 beqz $a2, .LBB4_180 b .LBB4_184 .LBB4_183: # %.loopexit.split-lp .Ltmp186: # EH_LABEL - ld.d $a2, $sp, 224 + ld.d $a2, $sp, 232 beqz $a2, .LBB4_180 .LBB4_184: - ld.d $a1, $sp, 240 + ld.d $a1, $sp, 248 sub.d $a1, $a1, $a2 move $fp, $a0 move $a0, $a2 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/crux.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/crux.s index 37e7827a..09c6ef84 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/crux.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/crux.s @@ -87,14 +87,7 @@ _ZN4CruxC2Eiib: # @_ZN4CruxC2Eiib .Lfunc_end0: .size _ZN4CruxC2Eiib, .Lfunc_end0-_ZN4CruxC2Eiib # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN4CruxD2Ev -.LCPI1_0: - .dword 0x408f400000000000 # double 1000 -.LCPI1_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl _ZN4CruxD2Ev + .globl _ZN4CruxD2Ev # -- Begin function _ZN4CruxD2Ev .p2align 5 .type _ZN4CruxD2Ev,@function _ZN4CruxD2Ev: # @_ZN4CruxD2Ev @@ -155,17 +148,22 @@ _ZN4CruxD2Ev: # @_ZN4CruxD2Ev bstrpick.d $a0, $a0, 31, 0 movgr2fr.d $fa1, $a0 ffint.s.l $fa1, $fa1 - pcalau12i $s2, %pc_hi20(checkpoint_timing_size) - fld.s $fa2, $s2, %pc_lo12(checkpoint_timing_size) fdiv.s $fa1, $fa0, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI1_0) - fdiv.s $fa0, $fa2, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI1_1) fcvt.d.s $fa1, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + pcalau12i $s2, %pc_hi20(checkpoint_timing_size) + fld.s $fa2, $s2, %pc_lo12(checkpoint_timing_size) + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs0, $a0 fmul.d $fa1, $fa1, $fs0 + fdiv.s $fa0, $fa2, $fa0 fcvt.d.s $fa0, $fa0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs1, $a0 fmul.d $fa0, $fa0, $fs1 movfr2gr.d $a2, $fa0 movfr2gr.d $a1, $fa1 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/graphics.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/graphics.s index e21a3c24..e01f4a39 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/graphics.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/graphics.s @@ -1,10 +1,6 @@ .file "graphics.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function init_graphics_output -.LCPI0_0: - .dword 0x4089000000000000 # double 800 .text - .globl init_graphics_output + .globl init_graphics_output # -- Begin function init_graphics_output .p2align 5 .type init_graphics_output,@function init_graphics_output: # @init_graphics_output @@ -18,8 +14,10 @@ init_graphics_output: # @init_graphics_output fld.s $fa1, $a0, %pc_lo12(graphics_xmin) fsub.s $fa0, $fa0, $fa1 fcvt.d.s $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 pcalau12i $a0, %pc_hi20(graphics_ymax) fld.s $fa2, $a0, %pc_lo12(graphics_ymax) pcalau12i $a0, %pc_hi20(graphics_ymin) @@ -652,12 +650,7 @@ DrawSquaresToFile: # @DrawSquaresToFile .Lfunc_end11: .size DrawSquaresToFile, .Lfunc_end11-DrawSquaresToFile # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DisplayStateToFile -.LCPI12_0: - .dword 0x40247ae147ae147b # double 10.24 - .text - .globl DisplayStateToFile + .globl DisplayStateToFile # -- Begin function DisplayStateToFile .p2align 5 .type DisplayStateToFile,@function DisplayStateToFile: # @DisplayStateToFile @@ -754,8 +747,11 @@ DisplayStateToFile: # @DisplayStateToFile st.d $a0, $sp, 136 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(data_double) st.d $a0, $sp, 64 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI12_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI12_0) + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1026 + movgr2fr.d $fs0, $a0 pcalau12i $s3, %pc_hi20(graphics_xmin) pcalau12i $s4, %pc_hi20(xconversion) pcalau12i $a0, %pc_hi20(x_double) diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/hash.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/hash.s index 4ca65d50..8a8e8255 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/hash.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/hash.s @@ -22,20 +22,7 @@ get_hashtablesize: # @get_hashtablesize .Lfunc_end1: .size get_hashtablesize, .Lfunc_end1-get_hashtablesize # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function compact_hash_init -.LCPI2_0: - .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI2_1: - .dword 0x41dfffffffc00000 # double 2147483647 -.LCPI2_2: - .dword 0x41efffffff400000 # double 4294967290 -.LCPI2_3: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI2_4: - .dword 0x4059000000000000 # double 100 - .text - .globl compact_hash_init + .globl compact_hash_init # -- Begin function compact_hash_init .p2align 5 .type compact_hash_init,@function compact_hash_init: # @compact_hash_init @@ -99,17 +86,19 @@ compact_hash_init: # @compact_hash_init pcaddu18i $ra, %call36(glibc_compat_rand) jirl $ra, $ra, 0 movgr2fr.w $fa0, $a0 - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI2_1) ffint.d.w $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI2_2) - fld.d $fs1, $a0, %pc_lo12(.LCPI2_2) + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs0, $a0 fdiv.d $fa0, $fa0, $fs0 - pcalau12i $a0, %pc_hi20(.LCPI2_3) - fld.d $fs2, $a0, %pc_lo12(.LCPI2_3) + lu12i.w $a0, -3072 + lu52i.d $a0, $a0, 1054 + movgr2fr.d $fs1, $a0 fmul.d $fa0, $fa0, $fs1 vldi $vr1, -912 fadd.d $fa0, $fa0, $fa1 + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs2, $a0 fcmp.clt.d $fcc0, $fa0, $fs2 ftintrz.l.d $fa1, $fa0 movfr2gr.d $a0, $fa1 @@ -266,8 +255,11 @@ compact_hash_init: # @compact_hash_init fcvt.d.s $fa0, $fa2 bcnez $fcc0, .LBB2_32 # %bb.25: - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI2_0) + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fa2, $a0 fmul.d $fa2, $fa0, $fa2 vldi $vr3, -972 fdiv.d $fa2, $fa3, $fa2 @@ -435,11 +427,13 @@ compact_hash_init: # @compact_hash_init bstrpick.d $a1, $s1, 31, 0 movgr2fr.d $fa0, $a1 ffint.d.l $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI2_4) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_4) - movgr2fr.d $fa2, $a0 - ffint.d.l $fa2, $fa2 - fdiv.d $fa0, $fa0, $fa2 + movgr2fr.d $fa1, $a0 + ffint.d.l $fa1, $fa1 + fdiv.d $fa0, $fa0, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 movfr2gr.d $a4, $fa0 addi.w $a1, $s1, 0 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/hsfc.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/hsfc.s index 1997623a..42786d8e 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/hsfc.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/hsfc.s @@ -303,12 +303,8 @@ hsfc3d: # @hsfc3d .word .LBB1_13-.LJTI1_0 .word .LBB1_11-.LJTI1_0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fhsfc2d -.LCPI2_0: - .dword 0x41efffffffe00000 # double 4294967295 .text - .globl fhsfc2d + .globl fhsfc2d # -- Begin function fhsfc2d .p2align 5 .type fhsfc2d,@function fhsfc2d: # @fhsfc2d @@ -335,12 +331,13 @@ fhsfc2d: # @fhsfc2d .LBB2_2: beqz $a1, .LBB2_10 # %bb.3: # %.lr.ph62.i - pcalau12i $a2, %pc_hi20(.LCPI2_0) + lu12i.w $a2, -512 fld.d $fa0, $a0, 0 - fld.d $fa1, $a2, %pc_lo12(.LCPI2_0) - fld.d $fa2, $a0, 8 - fmul.d $fa0, $fa0, $fa1 - fmul.d $fa1, $fa2, $fa1 + fld.d $fa1, $a0, 8 + lu52i.d $a0, $a2, 1054 + movgr2fr.d $fa2, $a0 + fmul.d $fa0, $fa0, $fa2 + fmul.d $fa1, $fa1, $fa2 sltui $a0, $a1, 2 ori $a2, $zero, 2 ftintrz.l.d $fa0, $fa0 @@ -423,12 +420,7 @@ fhsfc2d: # @fhsfc2d .Lfunc_end2: .size fhsfc2d, .Lfunc_end2-fhsfc2d # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fhsfc3d -.LCPI3_0: - .dword 0x41efffffffe00000 # double 4294967295 - .text - .globl fhsfc3d + .globl fhsfc3d # -- Begin function fhsfc3d .p2align 5 .type fhsfc3d,@function fhsfc3d: # @fhsfc3d @@ -436,8 +428,9 @@ fhsfc3d: # @fhsfc3d addi.d $sp, $sp, -32 st.d $ra, $sp, 24 # 8-byte Folded Spill fld.d $fa0, $a0, 0 - pcalau12i $a3, %pc_hi20(.LCPI3_0) - fld.d $fa1, $a3, %pc_lo12(.LCPI3_0) + lu12i.w $a3, -512 + lu52i.d $a3, $a3, 1054 + movgr2fr.d $fa1, $a3 fld.d $fa2, $a0, 8 fmul.d $fa0, $fa0, $fa1 ftintrz.l.d $fa0, $fa0 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/hsfcsort.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/hsfcsort.s index 3f7938c4..5b1f06a8 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/hsfcsort.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/hsfcsort.s @@ -1,21 +1,17 @@ .file "hsfcsort.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function hsfc2sort -.LCPI0_0: - .dword 0x41efffffffe00000 # double 4294967295 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI0_1: + .p2align 4, 0x0 # -- Begin function hsfc2sort +.LCPI0_0: .word 1 # 0x1 .word 3 # 0x3 .word 5 # 0x5 .word 7 # 0x7 -.LCPI0_2: +.LCPI0_1: .word 0 # 0x0 .word 3 # 0x3 .word 6 # 0x6 .word 9 # 0x9 -.LCPI0_3: +.LCPI0_2: .word 2 # 0x2 .word 5 # 0x5 .word 8 # 0x8 @@ -52,11 +48,12 @@ hsfc2sort: # @hsfc2sort move $s2, $a0 beqz $s3, .LBB0_6 # %bb.1: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) move $s6, $zero move $s7, $zero bstrpick.d $s8, $s3, 31, 0 + lu12i.w $a0, -512 + lu52i.d $a0, $a0, 1054 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB0_2: # =>This Inner Loop Header: Depth=1 fld.d $fa0, $s5, 0 @@ -201,8 +198,8 @@ hsfc2sort: # @hsfc2sort bstrpick.d $a0, $s3, 29, 2 slli.w $a1, $a0, 2 slli.d $a2, $a0, 3 - pcalau12i $a3, %pc_hi20(.LCPI0_3) - vld $vr0, $a3, %pc_lo12(.LCPI0_3) + pcalau12i $a3, %pc_hi20(.LCPI0_2) + vld $vr0, $a3, %pc_lo12(.LCPI0_2) alsl.w $a0, $a0, $a2, 2 addi.w $a0, $a0, 2 vreplgr2vr.w $vr1, $s1 @@ -257,10 +254,10 @@ hsfc2sort: # @hsfc2sort # %bb.27: # %vector.ph move $a2, $s3 bstrins.d $a2, $zero, 1, 0 + pcalau12i $a0, %pc_hi20(.LCPI0_0) + vld $vr0, $a0, %pc_lo12(.LCPI0_0) pcalau12i $a0, %pc_hi20(.LCPI0_1) - vld $vr0, $a0, %pc_lo12(.LCPI0_1) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - vld $vr1, $a0, %pc_lo12(.LCPI0_2) + vld $vr1, $a0, %pc_lo12(.LCPI0_1) alsl.w $a1, $a2, $a2, 1 slli.d $a0, $a2, 1 addi.w $a0, $a0, 1 @@ -344,23 +341,19 @@ ui2comp: # @ui2comp .Lfunc_end1: .size ui2comp, .Lfunc_end1-ui2comp # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function hsfc3sort -.LCPI2_0: - .dword 0x41efffffffe00000 # double 4294967295 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI2_1: + .p2align 4, 0x0 # -- Begin function hsfc3sort +.LCPI2_0: .word 1 # 0x1 .word 3 # 0x3 .word 5 # 0x5 .word 7 # 0x7 -.LCPI2_2: +.LCPI2_1: .word 0 # 0x0 .word 4 # 0x4 .word 8 # 0x8 .word 12 # 0xc -.LCPI2_3: +.LCPI2_2: .word 3 # 0x3 .word 7 # 0x7 .word 11 # 0xb @@ -397,12 +390,13 @@ hsfc3sort: # @hsfc3sort move $s2, $a0 beqz $s3, .LBB2_6 # %bb.1: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI2_0) move $s7, $zero move $s8, $zero st.d $s3, $sp, 8 # 8-byte Folded Spill bstrpick.d $s3, $s3, 31, 0 + lu12i.w $a0, -512 + lu52i.d $a0, $a0, 1054 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_2: # =>This Inner Loop Header: Depth=1 fld.d $fa0, $s6, 0 @@ -497,10 +491,10 @@ hsfc3sort: # @hsfc3sort # %bb.13: # %vector.ph bstrpick.d $a1, $s3, 29, 2 slli.w $a2, $a1, 2 + pcalau12i $a0, %pc_hi20(.LCPI2_0) + vld $vr0, $a0, %pc_lo12(.LCPI2_0) pcalau12i $a0, %pc_hi20(.LCPI2_1) - vld $vr0, $a0, %pc_lo12(.LCPI2_1) - pcalau12i $a0, %pc_hi20(.LCPI2_2) - vld $vr1, $a0, %pc_lo12(.LCPI2_2) + vld $vr1, $a0, %pc_lo12(.LCPI2_1) slli.w $a0, $a1, 4 slli.d $a1, $a1, 3 addi.w $a1, $a1, 1 @@ -639,8 +633,8 @@ hsfc3sort: # @hsfc3sort # %bb.28: # %vector.ph147 bstrpick.d $a0, $s3, 29, 2 slli.w $a1, $a0, 2 - pcalau12i $a2, %pc_hi20(.LCPI2_3) - vld $vr0, $a2, %pc_lo12(.LCPI2_3) + pcalau12i $a2, %pc_hi20(.LCPI2_2) + vld $vr0, $a2, %pc_lo12(.LCPI2_2) slli.d $a0, $a0, 4 addi.w $a0, $a0, 3 vreplgr2vr.w $vr1, $s1 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/mesh.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/mesh.s index 17f7b77d..f8b25054 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/mesh.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/mesh.s @@ -2057,12 +2057,8 @@ GCC_except_table9: .Lcst_end2: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1_S1_S1_S1_S1_S1_i -.LCPI10_0: - .dword 0x3f9999999999999a # double 0.025000000000000001 .text - .globl _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1_S1_S1_S1_S1_S1_i + .globl _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1_S1_S1_S1_S1_S1_i # -- Begin function _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1_S1_S1_S1_S1_S1_i .p2align 5 .type _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1_S1_S1_S1_S1_S1_i,@function _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1_S1_S1_S1_S1_S1_i: # @_ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1_S1_S1_S1_S1_S1_i @@ -2111,31 +2107,31 @@ _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1 ld.d $a0, $sp, 200 st.d $a0, $sp, 56 # 8-byte Folded Spill bstrpick.d $s7, $a1, 31, 0 - slli.d $s6, $s7, 3 - move $a0, $s6 + slli.d $s1, $s7, 3 + move $a0, $s1 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 - move $s1, $a0 + move $s8, $a0 st.d $zero, $a0, 0 - addi.d $s8, $s7, -1 - slli.d $fp, $s8, 3 - beqz $s8, .LBB10_3 + addi.d $s6, $s7, -1 + slli.d $fp, $s6, 3 + beqz $s6, .LBB10_3 # %bb.2: # %_ZSt6fill_nIPdmdET_S1_T0_RKT1_.exit.loopexit.i.i.i.i.i - addi.d $a0, $s1, 8 + addi.d $a0, $s8, 8 move $a1, $zero move $a2, $fp pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 .LBB10_3: .Ltmp29: # EH_LABEL - move $a0, $s6 + move $a0, $s1 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 .Ltmp30: # EH_LABEL # %bb.4: # %.noexc64 move $s0, $a0 st.d $zero, $a0, 0 - beqz $s8, .LBB10_6 + beqz $s6, .LBB10_6 # %bb.5: # %_ZSt6fill_nIPdmdET_S1_T0_RKT1_.exit.loopexit.i.i.i.i.i60 addi.d $a0, $s0, 8 move $a1, $zero @@ -2144,14 +2140,14 @@ _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1 jirl $ra, $ra, 0 .LBB10_6: .Ltmp32: # EH_LABEL - move $a0, $s6 + move $a0, $s1 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 .Ltmp33: # EH_LABEL # %bb.7: # %.noexc71 move $s3, $a0 st.d $zero, $a0, 0 - beqz $s8, .LBB10_9 + beqz $s6, .LBB10_9 # %bb.8: # %_ZSt6fill_nIPdmdET_S1_T0_RKT1_.exit.loopexit.i.i.i.i.i67 addi.d $a0, $s3, 8 move $a1, $zero @@ -2160,14 +2156,14 @@ _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1 jirl $ra, $ra, 0 .LBB10_9: .Ltmp35: # EH_LABEL - move $a0, $s6 + move $a0, $s1 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 .Ltmp36: # EH_LABEL # %bb.10: # %.noexc78 move $s4, $a0 st.d $zero, $a0, 0 - beqz $s8, .LBB10_12 + beqz $s6, .LBB10_12 # %bb.11: # %_ZSt6fill_nIPdmdET_S1_T0_RKT1_.exit.loopexit.i.i.i.i.i74 addi.d $a0, $s4, 8 move $a1, $zero @@ -2176,15 +2172,15 @@ _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1 jirl $ra, $ra, 0 .LBB10_12: .Ltmp38: # EH_LABEL - move $a0, $s6 + move $a0, $s1 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 .Ltmp39: # EH_LABEL # %bb.13: # %.noexc85 move $s5, $a0 - st.d $s6, $sp, 16 # 8-byte Folded Spill + st.d $s1, $sp, 16 # 8-byte Folded Spill st.d $zero, $a0, 0 - beqz $s8, .LBB10_15 + beqz $s6, .LBB10_15 # %bb.14: # %_ZSt6fill_nIPdmdET_S1_T0_RKT1_.exit.loopexit.i.i.i.i.i81 addi.d $a0, $s5, 8 move $a1, $zero @@ -2192,8 +2188,11 @@ _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 .LBB10_15: # %.lr.ph.preheader - pcalau12i $a0, %pc_hi20(.LCPI10_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI10_0) + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fs0, $a0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $a0, $a0, %pc_lo12(.L.str.27) st.d $a0, $sp, 48 # 8-byte Folded Spill @@ -2208,20 +2207,20 @@ _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1 st.d $a0, $sp, 24 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.31) addi.d $fp, $a0, %pc_lo12(.L.str.31) - move $s8, $zero + move $s1, $zero move $s6, $zero b .LBB10_17 .p2align 4, , 16 .LBB10_16: # in Loop: Header=BB10_17 Depth=1 addi.w $s6, $s6, 1 addi.d $s7, $s7, -1 - addi.d $s8, $s8, 8 + addi.d $s1, $s1, 8 beqz $s7, .LBB10_27 .LBB10_17: # %.lr.ph # =>This Inner Loop Header: Depth=1 ld.d $a0, $sp, 56 # 8-byte Folded Reload - fldx.d $fa0, $a0, $s8 - fldx.d $fa1, $s1, $s8 + fldx.d $fa0, $a0, $s1 + fldx.d $fa1, $s8, $s1 fsub.d $fa2, $fa0, $fa1 fabs.d $fa2, $fa2 fcmp.cule.d $fcc0, $fa2, $fs0 @@ -2236,8 +2235,8 @@ _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1 jirl $ra, $ra, 0 .LBB10_19: # in Loop: Header=BB10_17 Depth=1 ld.d $a0, $sp, 64 # 8-byte Folded Reload - fldx.d $fa0, $a0, $s8 - fldx.d $fa1, $s0, $s8 + fldx.d $fa0, $a0, $s1 + fldx.d $fa1, $s0, $s1 fsub.d $fa2, $fa0, $fa1 fabs.d $fa2, $fa2 fcmp.cule.d $fcc0, $fa2, $fs0 @@ -2252,8 +2251,8 @@ _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1 jirl $ra, $ra, 0 .LBB10_21: # in Loop: Header=BB10_17 Depth=1 ld.d $a0, $sp, 72 # 8-byte Folded Reload - fldx.d $fa0, $a0, $s8 - fldx.d $fa1, $s3, $s8 + fldx.d $fa0, $a0, $s1 + fldx.d $fa1, $s3, $s1 fsub.d $fa2, $fa0, $fa1 fabs.d $fa2, $fa2 fcmp.cule.d $fcc0, $fa2, $fs0 @@ -2268,8 +2267,8 @@ _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1 jirl $ra, $ra, 0 .LBB10_23: # in Loop: Header=BB10_17 Depth=1 ld.d $a0, $sp, 80 # 8-byte Folded Reload - fldx.d $fa0, $a0, $s8 - fldx.d $fa1, $s4, $s8 + fldx.d $fa0, $a0, $s1 + fldx.d $fa1, $s4, $s1 fsub.d $fa2, $fa0, $fa1 fabs.d $fa2, $fa2 fcmp.cule.d $fcc0, $fa2, $fs0 @@ -2284,8 +2283,8 @@ _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1 jirl $ra, $ra, 0 .LBB10_25: # in Loop: Header=BB10_17 Depth=1 ld.d $a0, $sp, 88 # 8-byte Folded Reload - fldx.d $fa0, $a0, $s8 - fldx.d $fa1, $s5, $s8 + fldx.d $fa0, $a0, $s1 + fldx.d $fa1, $s5, $s1 fsub.d $fa2, $fa0, $fa1 fabs.d $fa2, $fa2 fcmp.cule.d $fcc0, $fa2, $fs0 @@ -2317,7 +2316,7 @@ _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1 move $a1, $fp pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - move $a0, $s1 + move $a0, $s8 move $a1, $fp fld.d $fs0, $sp, 96 # 8-byte Folded Reload ld.d $s8, $sp, 104 # 8-byte Folded Reload @@ -2354,15 +2353,15 @@ _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1 move $s2, $s0 move $fp, $a0 move $a0, $s4 - move $s0, $s6 - move $a1, $s6 + move $s0, $s1 + move $a1, $s1 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 b .LBB10_31 .LBB10_30: .Ltmp37: # EH_LABEL move $s2, $s0 - move $s0, $s6 + move $s0, $s1 move $fp, $a0 .LBB10_31: # %_ZNSt6vectorIdSaIdEED2Ev.exit98 move $a0, $s3 @@ -2373,7 +2372,7 @@ _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1 .LBB10_32: .Ltmp34: # EH_LABEL move $s2, $s0 - move $s0, $s6 + move $s0, $s1 move $fp, $a0 .LBB10_33: # %_ZNSt6vectorIdSaIdEED2Ev.exit100 move $a0, $s2 @@ -2383,10 +2382,10 @@ _ZN4Mesh50compare_coordinates_cpu_local_to_cpu_global_doubleEjPiS0_PdS1_S1_S1_S1 b .LBB10_35 .LBB10_34: .Ltmp31: # EH_LABEL - move $s0, $s6 + move $s0, $s1 move $fp, $a0 .LBB10_35: # %_ZNSt6vectorIdSaIdEED2Ev.exit102 - move $a0, $s1 + move $a0, $s8 move $a1, $s0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -2444,12 +2443,8 @@ GCC_except_table10: .Lcst_end3: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS1_S1_S1_S1_S2_i -.LCPI11_0: - .dword 0x3f9999999999999a # double 0.025000000000000001 .text - .globl _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS1_S1_S1_S1_S2_i + .globl _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS1_S1_S1_S1_S2_i # -- Begin function _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS1_S1_S1_S1_S2_i .p2align 5 .type _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS1_S1_S1_S1_S2_i,@function _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS1_S1_S1_S1_S2_i: # @_ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS1_S1_S1_S1_S2_i @@ -2502,13 +2497,13 @@ _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS move $a0, $s7 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 - move $s0, $a0 + move $s1, $a0 st.d $zero, $a0, 0 addi.d $s8, $s5, -1 slli.d $fp, $s8, 3 beqz $s8, .LBB11_3 # %bb.2: # %_ZSt6fill_nIPdmdET_S1_T0_RKT1_.exit.loopexit.i.i.i.i.i - addi.d $a0, $s0, 8 + addi.d $a0, $s1, 8 move $a1, $zero move $a2, $fp pcaddu18i $ra, %call36(memset) @@ -2520,11 +2515,11 @@ _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS jirl $ra, $ra, 0 .Ltmp42: # EH_LABEL # %bb.4: # %.noexc64 - move $s1, $a0 + move $s0, $a0 st.d $zero, $a0, 0 beqz $s8, .LBB11_6 # %bb.5: # %_ZSt6fill_nIPdmdET_S1_T0_RKT1_.exit.loopexit.i.i.i.i.i60 - addi.d $a0, $s1, 8 + addi.d $a0, $s0, 8 move $a1, $zero move $a2, $fp pcaddu18i $ra, %call36(memset) @@ -2581,8 +2576,11 @@ _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 .LBB11_15: # %.lr.ph.preheader - pcalau12i $a0, %pc_hi20(.LCPI11_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI11_0) + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fs0, $a0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $a0, $a0, %pc_lo12(.L.str.27) st.d $a0, $sp, 64 # 8-byte Folded Spill @@ -2598,22 +2596,22 @@ _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS pcalau12i $a0, %pc_hi20(.L.str.31) addi.d $a0, $a0, %pc_lo12(.L.str.31) st.d $a0, $sp, 32 # 8-byte Folded Spill - move $fp, $zero move $s8, $zero + move $fp, $zero move $s7, $zero b .LBB11_17 .p2align 4, , 16 .LBB11_16: # in Loop: Header=BB11_17 Depth=1 addi.w $s7, $s7, 1 - addi.d $s8, $s8, 8 + addi.d $fp, $fp, 8 addi.d $s5, $s5, -1 - addi.d $fp, $fp, 4 + addi.d $s8, $s8, 4 beqz $s5, .LBB11_27 .LBB11_17: # %.lr.ph # =>This Inner Loop Header: Depth=1 ld.d $a0, $sp, 72 # 8-byte Folded Reload - fldx.d $fa0, $a0, $s8 - fldx.d $fa1, $s0, $s8 + fldx.d $fa0, $a0, $fp + fldx.d $fa1, $s1, $fp fsub.d $fa2, $fa0, $fa1 fabs.d $fa2, $fa2 fcmp.cule.d $fcc0, $fa2, $fs0 @@ -2628,8 +2626,8 @@ _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS jirl $ra, $ra, 0 .LBB11_19: # in Loop: Header=BB11_17 Depth=1 ld.d $a0, $sp, 80 # 8-byte Folded Reload - fldx.d $fa0, $a0, $s8 - fldx.d $fa1, $s1, $s8 + fldx.d $fa0, $a0, $fp + fldx.d $fa1, $s0, $fp fsub.d $fa2, $fa0, $fa1 fabs.d $fa2, $fa2 fcmp.cule.d $fcc0, $fa2, $fs0 @@ -2644,8 +2642,8 @@ _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS jirl $ra, $ra, 0 .LBB11_21: # in Loop: Header=BB11_17 Depth=1 ld.d $a0, $sp, 88 # 8-byte Folded Reload - fldx.d $fa0, $a0, $s8 - fldx.d $fa1, $s3, $s8 + fldx.d $fa0, $a0, $fp + fldx.d $fa1, $s3, $fp fsub.d $fa2, $fa0, $fa1 fabs.d $fa2, $fa2 fcmp.cule.d $fcc0, $fa2, $fs0 @@ -2660,8 +2658,8 @@ _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS jirl $ra, $ra, 0 .LBB11_23: # in Loop: Header=BB11_17 Depth=1 ld.d $a0, $sp, 96 # 8-byte Folded Reload - fldx.d $fa0, $a0, $s8 - fldx.d $fa1, $s4, $s8 + fldx.d $fa0, $a0, $fp + fldx.d $fa1, $s4, $fp fsub.d $fa2, $fa0, $fa1 fabs.d $fa2, $fa2 fcmp.cule.d $fcc0, $fa2, $fs0 @@ -2676,8 +2674,8 @@ _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS jirl $ra, $ra, 0 .LBB11_25: # in Loop: Header=BB11_17 Depth=1 ld.d $a0, $sp, 104 # 8-byte Folded Reload - fldx.s $fa0, $a0, $fp - fldx.s $fa1, $s6, $fp + fldx.s $fa0, $a0, $s8 + fldx.s $fa1, $s6, $s8 fsub.s $fa2, $fa0, $fa1 fabs.s $fa2, $fa2 fcvt.d.s $fa2, $fa2 @@ -2708,11 +2706,11 @@ _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS move $a1, $fp pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - move $a0, $s1 + move $a0, $s0 move $a1, $fp pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - move $a0, $s0 + move $a0, $s1 move $a1, $fp fld.d $fs0, $sp, 112 # 8-byte Folded Reload ld.d $s8, $sp, 120 # 8-byte Folded Reload @@ -2768,7 +2766,7 @@ _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS move $s2, $s7 move $fp, $a0 .LBB11_33: # %_ZNSt6vectorIdSaIdEED2Ev.exit96 - move $a0, $s1 + move $a0, $s0 move $a1, $s2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -2778,7 +2776,7 @@ _ZN4Mesh49compare_coordinates_cpu_local_to_cpu_global_floatEjPiS0_PdS1_S1_S1_PfS move $s2, $s7 move $fp, $a0 .LBB11_35: # %_ZNSt6vectorIdSaIdEED2Ev.exit98 - move $a0, $s0 + move $a0, $s1 move $a1, $s2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -10124,14 +10122,7 @@ _ZN4Mesh10set_boundsEi: # @_ZN4Mesh10set_boundsEi .Lfunc_end23: .size _ZN4Mesh10set_boundsEi, .Lfunc_end23-_ZN4Mesh10set_boundsEi # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN4Mesh11calc_minmaxEv -.LCPI24_0: - .dword 0x46293e5939a08cea # double 1.0E+30 -.LCPI24_1: - .dword 0xc6293e5939a08cea # double -1.0E+30 - .text - .globl _ZN4Mesh11calc_minmaxEv + .globl _ZN4Mesh11calc_minmaxEv # -- Begin function _ZN4Mesh11calc_minmaxEv .p2align 5 .type _ZN4Mesh11calc_minmaxEv,@function _ZN4Mesh11calc_minmaxEv: # @_ZN4Mesh11calc_minmaxEv @@ -10140,16 +10131,15 @@ _ZN4Mesh11calc_minmaxEv: # @_ZN4Mesh11calc_minmaxEv ori $a3, $a1, 3306 lu32i.d $a3, -442791 ld.d $a1, $a0, 1160 - lu52i.d $a2, $a3, 1122 - st.d $a2, $a0, 1184 - st.d $a2, $a0, 1200 - st.d $a2, $a0, 1216 + lu52i.d $a4, $a3, 1122 + st.d $a4, $a0, 1184 + st.d $a4, $a0, 1200 + st.d $a4, $a0, 1216 beqz $a1, .LBB24_27 # %bb.1: # %.lr.ph ld.d $a2, $a0, 1416 - pcalau12i $a4, %pc_hi20(.LCPI24_0) - fld.d $fa0, $a4, %pc_lo12(.LCPI24_0) ori $a5, $zero, 1 + movgr2fr.d $fa0, $a4 b .LBB24_3 .p2align 4, , 16 .LBB24_2: # in Loop: Header=BB24_3 Depth=1 @@ -10167,8 +10157,8 @@ _ZN4Mesh11calc_minmaxEv: # @_ZN4Mesh11calc_minmaxEv b .LBB24_2 .LBB24_5: # %.lr.ph51 ld.d $a2, $a0, 1464 - fld.d $fa0, $a4, %pc_lo12(.LCPI24_0) ori $a5, $zero, 1 + movgr2fr.d $fa0, $a4 b .LBB24_7 .p2align 4, , 16 .LBB24_6: # in Loop: Header=BB24_7 Depth=1 @@ -10190,15 +10180,15 @@ _ZN4Mesh11calc_minmaxEv: # @_ZN4Mesh11calc_minmaxEv blt $a2, $a5, .LBB24_14 # %bb.10: # %.lr.ph54 ld.d $a5, $a0, 1512 - fld.d $fa0, $a4, %pc_lo12(.LCPI24_0) - ori $a4, $zero, 1 + ori $a6, $zero, 1 + movgr2fr.d $fa0, $a4 b .LBB24_12 .p2align 4, , 16 .LBB24_11: # in Loop: Header=BB24_12 Depth=1 - bstrpick.d $a6, $a4, 31, 0 + bstrpick.d $a4, $a6, 31, 0 addi.d $a5, $a5, 8 - addi.w $a4, $a4, 1 - bgeu $a6, $a1, .LBB24_14 + addi.w $a6, $a6, 1 + bgeu $a4, $a1, .LBB24_14 .LBB24_12: # =>This Inner Loop Header: Depth=1 fld.d $fa1, $a5, 0 fcmp.cule.d $fcc0, $fa0, $fa1 @@ -10208,15 +10198,14 @@ _ZN4Mesh11calc_minmaxEv: # @_ZN4Mesh11calc_minmaxEv fmov.d $fa0, $fa1 b .LBB24_11 .LBB24_14: # %.lr.ph58 - lu52i.d $a6, $a3, -926 - st.d $a6, $a0, 1192 + lu52i.d $a3, $a3, -926 + st.d $a3, $a0, 1192 + st.d $a3, $a0, 1208 ld.d $a4, $a0, 1416 ld.d $a5, $a0, 1440 - pcalau12i $a3, %pc_hi20(.LCPI24_1) - fld.d $fa0, $a3, %pc_lo12(.LCPI24_1) - st.d $a6, $a0, 1208 - st.d $a6, $a0, 1224 + st.d $a3, $a0, 1224 ori $a6, $zero, 1 + movgr2fr.d $fa0, $a3 b .LBB24_16 .p2align 4, , 16 .LBB24_15: # in Loop: Header=BB24_16 Depth=1 @@ -10238,8 +10227,8 @@ _ZN4Mesh11calc_minmaxEv: # @_ZN4Mesh11calc_minmaxEv .LBB24_18: # %.lr.ph61 ld.d $a4, $a0, 1464 ld.d $a5, $a0, 1488 - fld.d $fa0, $a3, %pc_lo12(.LCPI24_1) ori $a6, $zero, 1 + movgr2fr.d $fa0, $a3 b .LBB24_20 .p2align 4, , 16 .LBB24_19: # in Loop: Header=BB24_20 Depth=1 @@ -10264,16 +10253,16 @@ _ZN4Mesh11calc_minmaxEv: # @_ZN4Mesh11calc_minmaxEv # %bb.23: # %.lr.ph65 ld.d $a2, $a0, 1512 ld.d $a4, $a0, 1536 - fld.d $fa0, $a3, %pc_lo12(.LCPI24_1) - ori $a3, $zero, 1 + ori $a5, $zero, 1 + movgr2fr.d $fa0, $a3 b .LBB24_25 .p2align 4, , 16 .LBB24_24: # in Loop: Header=BB24_25 Depth=1 - bstrpick.d $a5, $a3, 31, 0 + bstrpick.d $a3, $a5, 31, 0 addi.d $a2, $a2, 8 addi.d $a4, $a4, 8 - addi.w $a3, $a3, 1 - bgeu $a5, $a1, .LBB24_28 + addi.w $a5, $a5, 1 + bgeu $a3, $a1, .LBB24_28 .LBB24_25: # =>This Inner Loop Header: Depth=1 fld.d $fa1, $a2, 0 fld.d $fa2, $a4, 0 @@ -10299,12 +10288,6 @@ _ZN4Mesh11calc_minmaxEv: # @_ZN4Mesh11calc_minmaxEv .LCPI25_0: .dword 0x46293e5939a08cea # double 1.0E+30 .dword 0xc6293e5939a08cea # double -1.0E+30 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI25_1: - .dword 0xc6293e5939a08cea # double -1.0E+30 -.LCPI25_2: - .dword 0x46293e5939a08cea # double 1.0E+30 .text .globl _ZN4Mesh17calc_centerminmaxEv .p2align 5 @@ -10321,11 +10304,14 @@ _ZN4Mesh17calc_centerminmaxEv: # @_ZN4Mesh17calc_centerminmaxEv # %bb.1: # %.lr.ph ld.d $a4, $a0, 1416 ld.d $a5, $a0, 1440 - pcalau12i $a2, %pc_hi20(.LCPI25_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI25_1) - pcalau12i $a3, %pc_hi20(.LCPI25_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI25_2) ori $a6, $zero, 1 + lu12i.w $a2, 236040 + ori $a3, $a2, 3306 + lu32i.d $a3, -442791 + lu52i.d $a2, $a3, -926 + movgr2fr.d $fa0, $a2 + lu52i.d $a3, $a3, 1122 + movgr2fr.d $fa2, $a3 vldi $vr1, -928 b .LBB25_3 .p2align 4, , 16 @@ -10355,10 +10341,10 @@ _ZN4Mesh17calc_centerminmaxEv: # @_ZN4Mesh17calc_centerminmaxEv .LBB25_7: # %.lr.ph40 ld.d $a4, $a0, 1464 ld.d $a5, $a0, 1488 - fld.d $fa0, $a2, %pc_lo12(.LCPI25_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI25_2) ori $a6, $zero, 1 - vldi $vr1, -928 + movgr2fr.d $fa1, $a2 + movgr2fr.d $fa2, $a3 + vldi $vr0, -928 b .LBB25_9 .p2align 4, , 16 .LBB25_8: # in Loop: Header=BB25_9 Depth=1 @@ -10370,7 +10356,7 @@ _ZN4Mesh17calc_centerminmaxEv: # @_ZN4Mesh17calc_centerminmaxEv .LBB25_9: # =>This Inner Loop Header: Depth=1 fld.d $fa3, $a5, 0 fld.d $fa4, $a4, 0 - fmul.d $fa3, $fa3, $fa1 + fmul.d $fa3, $fa3, $fa0 fadd.d $fa3, $fa4, $fa3 fcmp.cule.d $fcc0, $fa2, $fa3 bcnez $fcc0, .LBB25_11 @@ -10378,11 +10364,11 @@ _ZN4Mesh17calc_centerminmaxEv: # @_ZN4Mesh17calc_centerminmaxEv fst.d $fa3, $a0, 1248 fmov.d $fa2, $fa3 .LBB25_11: # in Loop: Header=BB25_9 Depth=1 - fcmp.cule.d $fcc0, $fa3, $fa0 + fcmp.cule.d $fcc0, $fa3, $fa1 bcnez $fcc0, .LBB25_8 # %bb.12: # in Loop: Header=BB25_9 Depth=1 fst.d $fa3, $a0, 1256 - fmov.d $fa0, $fa3 + fmov.d $fa1, $fa3 b .LBB25_8 .LBB25_13: # %._crit_edge ld.w $a4, $a0, 0 @@ -10393,22 +10379,22 @@ _ZN4Mesh17calc_centerminmaxEv: # @_ZN4Mesh17calc_centerminmaxEv .LBB25_15: # %.lr.ph44 ld.d $a4, $a0, 1512 ld.d $a5, $a0, 1536 - fld.d $fa0, $a2, %pc_lo12(.LCPI25_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI25_2) - ori $a2, $zero, 1 - vldi $vr1, -928 + ori $a6, $zero, 1 + movgr2fr.d $fa1, $a2 + movgr2fr.d $fa2, $a3 + vldi $vr0, -928 b .LBB25_17 .p2align 4, , 16 .LBB25_16: # in Loop: Header=BB25_17 Depth=1 - bstrpick.d $a3, $a2, 31, 0 + bstrpick.d $a2, $a6, 31, 0 addi.d $a4, $a4, 8 addi.d $a5, $a5, 8 - addi.w $a2, $a2, 1 - bgeu $a3, $a1, .LBB25_14 + addi.w $a6, $a6, 1 + bgeu $a2, $a1, .LBB25_14 .LBB25_17: # =>This Inner Loop Header: Depth=1 fld.d $fa3, $a5, 0 fld.d $fa4, $a4, 0 - fmul.d $fa3, $fa3, $fa1 + fmul.d $fa3, $fa3, $fa0 fadd.d $fa3, $fa4, $fa3 fcmp.cule.d $fcc0, $fa2, $fa3 bcnez $fcc0, .LBB25_19 @@ -10416,11 +10402,11 @@ _ZN4Mesh17calc_centerminmaxEv: # @_ZN4Mesh17calc_centerminmaxEv fst.d $fa3, $a0, 1264 fmov.d $fa2, $fa3 .LBB25_19: # in Loop: Header=BB25_17 Depth=1 - fcmp.cule.d $fcc0, $fa3, $fa0 + fcmp.cule.d $fcc0, $fa3, $fa1 bcnez $fcc0, .LBB25_16 # %bb.20: # in Loop: Header=BB25_17 Depth=1 fst.d $fa3, $a0, 1272 - fmov.d $fa0, $fa3 + fmov.d $fa1, $fa3 b .LBB25_16 .Lfunc_end25: .size _ZN4Mesh17calc_centerminmaxEv, .Lfunc_end25-_ZN4Mesh17calc_centerminmaxEv @@ -12593,12 +12579,8 @@ GCC_except_table27: .Lcst_end10: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN4Mesh24print_calc_neighbor_typeEv -.LCPI28_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 .text - .globl _ZN4Mesh24print_calc_neighbor_typeEv + .globl _ZN4Mesh24print_calc_neighbor_typeEv # -- Begin function _ZN4Mesh24print_calc_neighbor_typeEv .p2align 5 .type _ZN4Mesh24print_calc_neighbor_typeEv,@function _ZN4Mesh24print_calc_neighbor_typeEv: # @_ZN4Mesh24print_calc_neighbor_typeEv @@ -12619,12 +12601,13 @@ _ZN4Mesh24print_calc_neighbor_typeEv: # @_ZN4Mesh24print_calc_neighbor_typeEv # %bb.1: ld.d $s0, $fp, 1160 srli.d $a0, $s0, 32 - pcalau12i $a1, %pc_hi20(.LCPI28_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI28_0) lu52i.d $a1, $zero, 1107 or $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 movgr2fr.d $fa1, $a0 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a0, 275200 move $a1, $s0 bstrins.d $a1, $a0, 63, 32 @@ -20873,12 +20856,7 @@ _ZN4Mesh24calc_face_list_clearmapsEv: # @_ZN4Mesh24calc_face_list_clearmapsEv .size _ZN4Mesh24calc_face_list_clearmapsEv, .Lfunc_end42-_ZN4Mesh24calc_face_list_clearmapsEv .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN4Mesh12timer_outputE11mesh_timers17mesh_device_typesi -.LCPI43_0: - .dword 0x3e112e0be826d695 # double 1.0000000000000001E-9 - .text - .globl _ZN4Mesh12timer_outputE11mesh_timers17mesh_device_typesi + .globl _ZN4Mesh12timer_outputE11mesh_timers17mesh_device_typesi # -- Begin function _ZN4Mesh12timer_outputE11mesh_timers17mesh_device_typesi .p2align 5 .type _ZN4Mesh12timer_outputE11mesh_timers17mesh_device_typesi,@function _ZN4Mesh12timer_outputE11mesh_timers17mesh_device_typesi: # @_ZN4Mesh12timer_outputE11mesh_timers17mesh_device_typesi @@ -20903,11 +20881,14 @@ _ZN4Mesh12timer_outputE11mesh_timers17mesh_device_typesi: # @_ZN4Mesh12timer_out beqz $a2, .LBB43_3 # %bb.1: # %.thread ld.d $a0, $a0, 392 - pcalau12i $a1, %pc_hi20(.LCPI43_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI43_0) + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + lu12i.w $a0, -97683 + ori $a0, $a0, 1685 + lu32i.d $a0, 77323 + lu52i.d $a0, $a0, 993 movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 - fmul.d $fs0, $fa1, $fa0 + fmul.d $fs0, $fa0, $fa1 addi.d $a0, $sp, 10 ori $a2, $zero, 78 move $a1, $zero diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/partition.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/partition.s index 1ab757e0..e036b804 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/partition.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/partition.s @@ -1,16 +1,6 @@ .file "partition.cpp" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN4Mesh17partition_measureEv -.LCPI0_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI0_1: - .dword 0x40b087c3b666fb67 # double 4231.7645019878173 -.LCPI0_2: - .dword 0x4046a09e667f3bcd # double 45.254833995939045 -.LCPI0_3: - .dword 0x3f80000000000000 # double 0.0078125 .text - .globl _ZN4Mesh17partition_measureEv + .globl _ZN4Mesh17partition_measureEv # -- Begin function _ZN4Mesh17partition_measureEv .p2align 5 .type _ZN4Mesh17partition_measureEv,@function _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv @@ -32,6 +22,8 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv st.d $s6, $sp, 152 # 8-byte Folded Spill st.d $s7, $sp, 144 # 8-byte Folded Spill st.d $s8, $sp, 136 # 8-byte Folded Spill + fst.d $fs0, $sp, 128 # 8-byte Folded Spill + fst.d $fs1, $sp, 120 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -43,6 +35,8 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .cfi_offset 29, -72 .cfi_offset 30, -80 .cfi_offset 31, -88 + .cfi_offset 56, -96 + .cfi_offset 57, -104 pcalau12i $a1, %pc_hi20(measure_type) ld.w $a1, $a1, %pc_lo12(measure_type) beqz $a1, .LBB0_209 @@ -51,11 +45,11 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv ld.d $a0, $a0, 1160 addi.d $a2, $a0, 127 srli.d $a2, $a2, 7 - st.d $a2, $sp, 16 # 8-byte Folded Spill + st.d $a2, $sp, 8 # 8-byte Folded Spill addi.d $a1, $a1, -1 ori $a2, $zero, 3 pcalau12i $a3, %pc_hi20(_ZZN4Mesh17partition_measureEvE13offtile_ratio) - st.d $a3, $sp, 56 # 8-byte Folded Spill + st.d $a3, $sp, 40 # 8-byte Folded Spill bltu $a2, $a1, .LBB0_208 # %bb.2: slli.d $a1, $a1, 2 @@ -63,50 +57,48 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv addi.d $a2, $a2, %pc_lo12(.LJTI0_0) ldx.w $a1, $a2, $a1 add.d $a1, $a2, $a1 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - st.d $a2, $sp, 40 # 8-byte Folded Spill - ld.d $a2, $sp, 16 # 8-byte Folded Reload + ld.d $a2, $sp, 8 # 8-byte Folded Reload addi.w $s6, $a2, 0 - pcalau12i $s7, %pc_hi20(.LCPI0_3) - st.d $s6, $sp, 48 # 8-byte Folded Spill + st.d $s6, $sp, 32 # 8-byte Folded Spill jr $a1 .LBB0_3: # %.preheader beqz $s6, .LBB0_208 # %bb.4: # %.lr.ph565 - ld.d $a1, $sp, 56 # 8-byte Folded Reload - fld.d $fa0, $a1, %pc_lo12(_ZZN4Mesh17partition_measureEvE13offtile_ratio) - ld.d $a1, $fp, 1368 - ld.d $a2, $fp, 1352 - ld.d $a3, $fp, 1392 - ld.d $a4, $fp, 1376 - ld.d $a5, $fp, 1384 - move $a6, $zero - move $a7, $zero + move $a1, $zero + move $a2, $zero + ld.d $a3, $sp, 40 # 8-byte Folded Reload + fld.d $fa0, $a3, %pc_lo12(_ZZN4Mesh17partition_measureEvE13offtile_ratio) + ld.d $a3, $fp, 1368 + ld.d $a4, $fp, 1352 + ld.d $a5, $fp, 1392 + ld.d $a6, $fp, 1376 + ld.d $a7, $fp, 1384 ori $t0, $zero, 128 + lu52i.d $t1, $zero, 1016 + movgr2fr.d $fa1, $t1 b .LBB0_6 .p2align 4, , 16 .LBB0_5: # in Loop: Header=BB0_6 Depth=1 - fld.d $fa1, $s7, %pc_lo12(.LCPI0_3) - addi.w $a6, $a6, 128 + addi.w $a1, $a1, 128 movgr2fr.w $fa2, $s1 ffint.d.w $fa2, $fa2 - fmul.d $fa1, $fa2, $fa1 - fadd.d $fa0, $fa1, $fa0 + fmul.d $fa2, $fa2, $fa1 + fadd.d $fa0, $fa2, $fa0 addi.w $t0, $t0, 128 - beq $a7, $s6, .LBB0_207 + beq $a2, $s6, .LBB0_207 .LBB0_6: # =>This Loop Header: Depth=1 # Child Loop BB0_8 Depth 2 move $s1, $zero - slli.w $t1, $a7, 7 - addi.w $a7, $a7, 1 - slli.w $t2, $a7, 7 - alsl.d $t3, $a6, $a1, 2 - alsl.d $t4, $a6, $a2, 2 - alsl.d $t5, $a6, $a4, 2 - alsl.d $t6, $a6, $a5, 2 - alsl.d $t7, $a6, $a3, 2 - move $t8, $a6 - move $fp, $a6 + slli.w $t1, $a2, 7 + addi.w $a2, $a2, 1 + slli.w $t2, $a2, 7 + alsl.d $t3, $a1, $a3, 2 + alsl.d $t4, $a1, $a4, 2 + alsl.d $t5, $a1, $a6, 2 + alsl.d $t6, $a1, $a7, 2 + alsl.d $t7, $a1, $a5, 2 + move $t8, $a1 + move $fp, $a1 b .LBB0_8 .p2align 4, , 16 .LBB0_7: # in Loop: Header=BB0_8 Depth=2 @@ -125,7 +117,7 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv ld.w $s3, $t3, 0 slt $s4, $s3, $t1 slli.d $s2, $s3, 2 - ldx.w $s5, $a2, $s2 + ldx.w $s5, $a4, $s2 ld.w $s0, $t4, 0 slt $s3, $s3, $t2 xori $s3, $s3, 1 @@ -133,7 +125,7 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv add.d $s1, $s1, $s3 bge $s0, $s5, .LBB0_11 # %bb.10: # in Loop: Header=BB0_8 Depth=2 - ldx.w $s2, $a3, $s2 + ldx.w $s2, $a5, $s2 slt $s3, $s2, $t1 slt $s2, $s2, $t2 xori $s2, $s2, 1 @@ -143,14 +135,14 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv ld.w $s3, $t5, 0 slt $s4, $s3, $t1 slli.d $s2, $s3, 2 - ldx.w $s5, $a2, $s2 + ldx.w $s5, $a4, $s2 slt $s3, $s3, $t2 xori $s3, $s3, 1 or $s3, $s4, $s3 add.d $s1, $s1, $s3 bge $s0, $s5, .LBB0_13 # %bb.12: # in Loop: Header=BB0_8 Depth=2 - ldx.w $s2, $a3, $s2 + ldx.w $s2, $a5, $s2 slt $s3, $s2, $t1 slt $s2, $s2, $t2 xori $s2, $s2, 1 @@ -160,14 +152,14 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv ld.w $s3, $t6, 0 slt $s4, $s3, $t1 slli.d $s2, $s3, 2 - ldx.w $s5, $a2, $s2 + ldx.w $s5, $a4, $s2 slt $s3, $s3, $t2 xori $s3, $s3, 1 or $s3, $s4, $s3 add.d $s1, $s1, $s3 bge $s0, $s5, .LBB0_15 # %bb.14: # in Loop: Header=BB0_8 Depth=2 - ldx.w $s2, $a4, $s2 + ldx.w $s2, $a6, $s2 slt $s3, $s2, $t1 slt $s2, $s2, $t2 xori $s2, $s2, 1 @@ -177,14 +169,14 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv ld.w $s3, $t7, 0 slt $s4, $s3, $t1 slli.d $s2, $s3, 2 - ldx.w $s5, $a2, $s2 + ldx.w $s5, $a4, $s2 slt $s3, $s3, $t2 xori $s3, $s3, 1 or $s3, $s4, $s3 add.w $s1, $s1, $s3 bge $s0, $s5, .LBB0_7 # %bb.16: # in Loop: Header=BB0_8 Depth=2 - ldx.w $s0, $a4, $s2 + ldx.w $s0, $a6, $s2 slt $s2, $s0, $t1 slt $s0, $s0, $t2 xori $s0, $s0, 1 @@ -196,7 +188,15 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv # %bb.18: # %.lr.ph554 move $s0, $zero move $s6, $zero - addi.d $s1, $sp, 88 + addi.d $s1, $sp, 72 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs0, $a0 + lu12i.w $a0, 419827 + ori $a0, $a0, 3021 + lu32i.d $a0, 434334 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs1, $a0 b .LBB0_20 .p2align 4, , 16 .LBB0_19: # %_ZNSt7__cxx1110_List_baseIiSaIiEED2Ev.exit443 @@ -208,14 +208,14 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv # Child Loop BB0_69 Depth 2 # Child Loop BB0_72 Depth 2 slli.d $s7, $s6, 2 - st.d $s1, $sp, 96 - st.d $s1, $sp, 88 - st.d $zero, $sp, 104 + st.d $s1, $sp, 80 + st.d $s1, $sp, 72 + st.d $zero, $sp, 88 slli.w $s8, $s0, 7 addi.w $s0, $s0, 1 slli.w $s2, $s0, 7 addi.w $a0, $s6, 128 - st.d $a0, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 24 # 8-byte Folded Spill ori $s3, $zero, 128 b .LBB0_22 .p2align 4, , 16 @@ -229,11 +229,11 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv ld.d $a0, $fp, 1160 bgeu $s6, $a0, .LBB0_21 # %bb.23: # in Loop: Header=BB0_22 Depth=2 - ld.d $s4, $fp, 1368 - ldx.w $s5, $s4, $s7 - blt $s5, $s8, .LBB0_25 + ld.d $s5, $fp, 1368 + ldx.w $s4, $s5, $s7 + blt $s4, $s8, .LBB0_25 # %bb.24: # in Loop: Header=BB0_22 Depth=2 - blt $s5, $s2, .LBB0_27 + blt $s4, $s2, .LBB0_27 .LBB0_25: # in Loop: Header=BB0_22 Depth=2 .Ltmp46: # EH_LABEL ori $a0, $zero, 24 @@ -242,25 +242,25 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp47: # EH_LABEL # %bb.26: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit416 # in Loop: Header=BB0_22 Depth=2 - ldx.w $a1, $s4, $s7 + ldx.w $a1, $s5, $s7 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1368 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $s5, $a1, $s7 + st.d $a0, $sp, 88 + ldx.w $s4, $a1, $s7 .LBB0_27: # in Loop: Header=BB0_22 Depth=2 ld.d $a1, $fp, 1352 - slli.d $a0, $s5, 2 + slli.d $a0, $s4, 2 ldx.w $a2, $a1, $a0 ldx.w $a1, $a1, $s7 bge $a1, $a2, .LBB0_32 # %bb.28: # in Loop: Header=BB0_22 Depth=2 - ld.d $s4, $fp, 1392 - ldx.w $a0, $s4, $a0 + ld.d $s5, $fp, 1392 + ldx.w $a0, $s5, $a0 blt $a0, $s8, .LBB0_30 # %bb.29: # in Loop: Header=BB0_22 Depth=2 blt $a0, $s2, .LBB0_32 @@ -272,21 +272,21 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp49: # EH_LABEL # %bb.31: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit417 # in Loop: Header=BB0_22 Depth=2 - alsl.d $a1, $s5, $s4, 2 + alsl.d $a1, $s4, $s5, 2 ld.w $a1, $a1, 0 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 + st.d $a0, $sp, 88 .LBB0_32: # in Loop: Header=BB0_22 Depth=2 - ld.d $s4, $fp, 1376 - ldx.w $s5, $s4, $s7 - blt $s5, $s8, .LBB0_34 + ld.d $s5, $fp, 1376 + ldx.w $s4, $s5, $s7 + blt $s4, $s8, .LBB0_34 # %bb.33: # in Loop: Header=BB0_22 Depth=2 - blt $s5, $s2, .LBB0_36 + blt $s4, $s2, .LBB0_36 .LBB0_34: # in Loop: Header=BB0_22 Depth=2 .Ltmp50: # EH_LABEL ori $a0, $zero, 24 @@ -295,25 +295,25 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp51: # EH_LABEL # %bb.35: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit418 # in Loop: Header=BB0_22 Depth=2 - ldx.w $a1, $s4, $s7 + ldx.w $a1, $s5, $s7 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1376 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $s5, $a1, $s7 + st.d $a0, $sp, 88 + ldx.w $s4, $a1, $s7 .LBB0_36: # in Loop: Header=BB0_22 Depth=2 ld.d $a1, $fp, 1352 - slli.d $a0, $s5, 2 + slli.d $a0, $s4, 2 ldx.w $a2, $a1, $a0 ldx.w $a1, $a1, $s7 bge $a1, $a2, .LBB0_41 # %bb.37: # in Loop: Header=BB0_22 Depth=2 - ld.d $s4, $fp, 1392 - ldx.w $a0, $s4, $a0 + ld.d $s5, $fp, 1392 + ldx.w $a0, $s5, $a0 blt $a0, $s8, .LBB0_39 # %bb.38: # in Loop: Header=BB0_22 Depth=2 blt $a0, $s2, .LBB0_41 @@ -325,21 +325,21 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp53: # EH_LABEL # %bb.40: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit419 # in Loop: Header=BB0_22 Depth=2 - alsl.d $a1, $s5, $s4, 2 + alsl.d $a1, $s4, $s5, 2 ld.w $a1, $a1, 0 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 + st.d $a0, $sp, 88 .LBB0_41: # in Loop: Header=BB0_22 Depth=2 - ld.d $s4, $fp, 1384 - ldx.w $s5, $s4, $s7 - blt $s5, $s8, .LBB0_43 + ld.d $s5, $fp, 1384 + ldx.w $s4, $s5, $s7 + blt $s4, $s8, .LBB0_43 # %bb.42: # in Loop: Header=BB0_22 Depth=2 - blt $s5, $s2, .LBB0_45 + blt $s4, $s2, .LBB0_45 .LBB0_43: # in Loop: Header=BB0_22 Depth=2 .Ltmp54: # EH_LABEL ori $a0, $zero, 24 @@ -348,25 +348,25 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp55: # EH_LABEL # %bb.44: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit420 # in Loop: Header=BB0_22 Depth=2 - ldx.w $a1, $s4, $s7 + ldx.w $a1, $s5, $s7 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1384 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $s5, $a1, $s7 + st.d $a0, $sp, 88 + ldx.w $s4, $a1, $s7 .LBB0_45: # in Loop: Header=BB0_22 Depth=2 ld.d $a1, $fp, 1352 - slli.d $a0, $s5, 2 + slli.d $a0, $s4, 2 ldx.w $a2, $a1, $a0 ldx.w $a1, $a1, $s7 bge $a1, $a2, .LBB0_50 # %bb.46: # in Loop: Header=BB0_22 Depth=2 - ld.d $s4, $fp, 1376 - ldx.w $a0, $s4, $a0 + ld.d $s5, $fp, 1376 + ldx.w $a0, $s5, $a0 blt $a0, $s8, .LBB0_48 # %bb.47: # in Loop: Header=BB0_22 Depth=2 blt $a0, $s2, .LBB0_50 @@ -378,21 +378,21 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp57: # EH_LABEL # %bb.49: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit421 # in Loop: Header=BB0_22 Depth=2 - alsl.d $a1, $s5, $s4, 2 + alsl.d $a1, $s4, $s5, 2 ld.w $a1, $a1, 0 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 + st.d $a0, $sp, 88 .LBB0_50: # in Loop: Header=BB0_22 Depth=2 - ld.d $s4, $fp, 1392 - ldx.w $s5, $s4, $s7 - blt $s5, $s8, .LBB0_52 + ld.d $s5, $fp, 1392 + ldx.w $s4, $s5, $s7 + blt $s4, $s8, .LBB0_52 # %bb.51: # in Loop: Header=BB0_22 Depth=2 - blt $s5, $s2, .LBB0_54 + blt $s4, $s2, .LBB0_54 .LBB0_52: # in Loop: Header=BB0_22 Depth=2 .Ltmp58: # EH_LABEL ori $a0, $zero, 24 @@ -401,25 +401,25 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp59: # EH_LABEL # %bb.53: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit422 # in Loop: Header=BB0_22 Depth=2 - ldx.w $a1, $s4, $s7 + ldx.w $a1, $s5, $s7 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1392 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $s5, $a1, $s7 + st.d $a0, $sp, 88 + ldx.w $s4, $a1, $s7 .LBB0_54: # in Loop: Header=BB0_22 Depth=2 ld.d $a1, $fp, 1352 - slli.d $a0, $s5, 2 + slli.d $a0, $s4, 2 ldx.w $a2, $a1, $a0 ldx.w $a1, $a1, $s7 bge $a1, $a2, .LBB0_21 # %bb.55: # in Loop: Header=BB0_22 Depth=2 - ld.d $s4, $fp, 1376 - ldx.w $a0, $s4, $a0 + ld.d $s5, $fp, 1376 + ldx.w $a0, $s5, $a0 blt $a0, $s8, .LBB0_57 # %bb.56: # in Loop: Header=BB0_22 Depth=2 blt $a0, $s2, .LBB0_21 @@ -431,33 +431,33 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp61: # EH_LABEL # %bb.58: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit423 # in Loop: Header=BB0_22 Depth=2 - alsl.d $a1, $s5, $s4, 2 + alsl.d $a1, $s4, $s5, 2 ld.w $a1, $a1, 0 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 + st.d $a0, $sp, 88 b .LBB0_21 .p2align 4, , 16 .LBB0_59: # in Loop: Header=BB0_20 Depth=1 .Ltmp63: # EH_LABEL - addi.d $a0, $sp, 88 + addi.d $a0, $sp, 72 pcaddu18i $ra, %call36(_ZNSt7__cxx114listIiSaIiEE4sortEv) jirl $ra, $ra, 0 .Ltmp64: # EH_LABEL # %bb.60: # in Loop: Header=BB0_20 Depth=1 - ld.d $a0, $sp, 88 - ld.d $s3, $sp, 48 # 8-byte Folded Reload - addi.d $s4, $sp, 112 - ld.d $s6, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 72 + ld.d $s3, $sp, 32 # 8-byte Folded Reload + ld.d $s6, $sp, 24 # 8-byte Folded Reload + addi.d $s4, $sp, 96 beq $a0, $s1, .LBB0_71 # %bb.61: # in Loop: Header=BB0_20 Depth=1 - st.d $s4, $sp, 120 - st.d $s4, $sp, 112 - st.d $zero, $sp, 128 + st.d $s4, $sp, 104 + st.d $s4, $sp, 96 + st.d $zero, $sp, 112 ld.d $a1, $a0, 0 bne $a1, $s1, .LBB0_64 b .LBB0_70 @@ -475,7 +475,7 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv ld.w $a3, $a1, 16 bne $a2, $a3, .LBB0_62 # %bb.65: # in Loop: Header=BB0_64 Depth=2 - ld.d $a3, $sp, 112 + ld.d $a3, $sp, 96 beq $a3, $a1, .LBB0_63 # %bb.66: # in Loop: Header=BB0_64 Depth=2 ld.d $a2, $a1, 0 @@ -486,17 +486,17 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base11_M_transferEPS0_S1_) jirl $ra, $ra, 0 move $a0, $s2 - ld.d $a1, $sp, 128 - ld.d $a2, $sp, 104 + ld.d $a1, $sp, 112 + ld.d $a2, $sp, 88 addi.d $a1, $a1, 1 - st.d $a1, $sp, 128 + st.d $a1, $sp, 112 addi.d $a1, $a2, -1 - st.d $a1, $sp, 104 + st.d $a1, $sp, 88 b .LBB0_63 .p2align 4, , 16 .LBB0_68: # %._crit_edge.i430 # in Loop: Header=BB0_20 Depth=1 - ld.d $a0, $sp, 112 + ld.d $a0, $sp, 96 beq $a0, $s4, .LBB0_70 .p2align 4, , 16 .LBB0_69: # %.lr.ph.i.i.i433 @@ -510,27 +510,23 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv bne $s2, $s4, .LBB0_69 .LBB0_70: # %_ZNSt7__cxx1110_List_baseIiSaIiEED2Ev.exit.i436 # in Loop: Header=BB0_20 Depth=1 - ld.d $a0, $sp, 88 + ld.d $a0, $sp, 72 .LBB0_71: # %_ZNSt7__cxx114listIiSaIiEE6uniqueEv.exit438 # in Loop: Header=BB0_20 Depth=1 - ld.d $a1, $sp, 104 - ld.d $a2, $sp, 40 # 8-byte Folded Reload - fld.d $fa0, $a2, %pc_lo12(.LCPI0_0) + ld.d $a1, $sp, 88 srli.d $a2, $a1, 32 lu52i.d $a3, $zero, 1107 or $a2, $a2, $a3 - movgr2fr.d $fa1, $a2 - fsub.d $fa0, $fa1, $fa0 + movgr2fr.d $fa0, $a2 + fsub.d $fa0, $fa0, $fs0 lu12i.w $a2, 275200 bstrins.d $a1, $a2, 63, 32 - pcalau12i $a2, %pc_hi20(.LCPI0_2) - fld.d $fa1, $a2, %pc_lo12(.LCPI0_2) - ld.d $a2, $sp, 56 # 8-byte Folded Reload - fld.d $fa2, $a2, %pc_lo12(_ZZN4Mesh17partition_measureEvE13offtile_ratio) - movgr2fr.d $fa3, $a1 - fadd.d $fa0, $fa3, $fa0 - fdiv.d $fa0, $fa0, $fa1 + ld.d $a2, $sp, 40 # 8-byte Folded Reload + fld.d $fa1, $a2, %pc_lo12(_ZZN4Mesh17partition_measureEvE13offtile_ratio) + movgr2fr.d $fa2, $a1 fadd.d $fa0, $fa2, $fa0 + fdiv.d $fa0, $fa0, $fs1 + fadd.d $fa0, $fa1, $fa0 fst.d $fa0, $a2, %pc_lo12(_ZZN4Mesh17partition_measureEvE13offtile_ratio) beq $a0, $s1, .LBB0_19 .p2align 4, , 16 @@ -548,9 +544,17 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv beqz $s6, .LBB0_208 # %bb.74: # %.lr.ph move $s1, $zero - move $s7, $zero - addi.d $s5, $sp, 88 - addi.d $s6, $sp, 64 + move $s8, $zero + addi.d $s4, $sp, 72 + addi.d $s5, $sp, 48 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs0, $a0 + lu12i.w $a0, -301457 + ori $a0, $a0, 2919 + lu32i.d $a0, 34755 + lu52i.d $a0, $a0, 1035 + movgr2fr.d $fs1, $a0 b .LBB0_76 .p2align 4, , 16 .LBB0_75: # %_ZNSt7__cxx1110_List_baseIiSaIiEED2Ev.exit503 @@ -564,37 +568,37 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv # Child Loop BB0_144 Depth 2 # Child Loop BB0_148 Depth 2 # Child Loop BB0_150 Depth 2 - slli.d $s3, $s7, 2 - st.d $s5, $sp, 96 - st.d $s5, $sp, 88 - st.d $zero, $sp, 104 - st.d $s6, $sp, 72 - st.d $s6, $sp, 64 - st.d $zero, $sp, 80 + slli.d $s7, $s8, 2 + st.d $s4, $sp, 80 + st.d $s4, $sp, 72 + st.d $zero, $sp, 88 + st.d $s5, $sp, 56 + st.d $s5, $sp, 48 + st.d $zero, $sp, 64 slli.w $s2, $s1, 7 addi.w $s1, $s1, 1 - st.d $s1, $sp, 32 # 8-byte Folded Spill - slli.w $s8, $s1, 7 - addi.w $a0, $s7, 128 + st.d $s1, $sp, 16 # 8-byte Folded Spill + slli.w $s6, $s1, 7 + addi.w $a0, $s8, 128 st.d $a0, $sp, 24 # 8-byte Folded Spill ori $s1, $zero, 128 b .LBB0_78 .p2align 4, , 16 .LBB0_77: # in Loop: Header=BB0_78 Depth=2 - addi.d $s7, $s7, 1 + addi.d $s8, $s8, 1 addi.w $s1, $s1, -1 - addi.d $s3, $s3, 4 + addi.d $s7, $s7, 4 beqz $s1, .LBB0_123 .LBB0_78: # Parent Loop BB0_76 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $a0, $fp, 1160 - bgeu $s7, $a0, .LBB0_77 + bgeu $s8, $a0, .LBB0_77 # %bb.79: # in Loop: Header=BB0_78 Depth=2 ld.d $s0, $fp, 1368 - ldx.w $s4, $s0, $s3 - blt $s4, $s2, .LBB0_81 + ldx.w $s3, $s0, $s7 + blt $s3, $s2, .LBB0_81 # %bb.80: # in Loop: Header=BB0_78 Depth=2 - blt $s4, $s8, .LBB0_84 + blt $s3, $s6, .LBB0_84 .LBB0_81: # in Loop: Header=BB0_78 Depth=2 .Ltmp0: # EH_LABEL ori $a0, $zero, 24 @@ -602,16 +606,16 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv jirl $ra, $ra, 0 .Ltmp1: # EH_LABEL # %bb.82: # in Loop: Header=BB0_78 Depth=2 - ldx.w $a1, $s0, $s3 + ldx.w $a1, $s0, $s7 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1368 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $s0, $a1, $s3 + st.d $a0, $sp, 88 + ldx.w $s0, $a1, $s7 .Ltmp2: # EH_LABEL ori $a0, $zero, 24 pcaddu18i $ra, %call36(_Znwm) @@ -622,26 +626,26 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv add.w $a1, $s0, $a1 srli.d $a1, $a1, 2 st.w $a1, $a0, 16 - addi.d $a1, $sp, 64 + addi.d $a1, $sp, 48 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 80 + ld.d $a0, $sp, 64 ld.d $a1, $fp, 1368 addi.d $a0, $a0, 1 - st.d $a0, $sp, 80 - ldx.w $s4, $a1, $s3 + st.d $a0, $sp, 64 + ldx.w $s3, $a1, $s7 .LBB0_84: # in Loop: Header=BB0_78 Depth=2 ld.d $a1, $fp, 1352 - slli.d $a0, $s4, 2 + slli.d $a0, $s3, 2 ldx.w $a2, $a1, $a0 - ldx.w $a1, $a1, $s3 + ldx.w $a1, $a1, $s7 bge $a1, $a2, .LBB0_90 # %bb.85: # in Loop: Header=BB0_78 Depth=2 ld.d $s0, $fp, 1392 ldx.w $a0, $s0, $a0 blt $a0, $s2, .LBB0_87 # %bb.86: # in Loop: Header=BB0_78 Depth=2 - blt $a0, $s8, .LBB0_90 + blt $a0, $s6, .LBB0_90 .LBB0_87: # in Loop: Header=BB0_78 Depth=2 .Ltmp5: # EH_LABEL ori $a0, $zero, 24 @@ -649,17 +653,17 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv jirl $ra, $ra, 0 .Ltmp6: # EH_LABEL # %bb.88: # in Loop: Header=BB0_78 Depth=2 - alsl.d $a1, $s4, $s0, 2 + alsl.d $a1, $s3, $s0, 2 ld.w $a1, $a1, 0 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1368 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $a0, $a1, $s3 + st.d $a0, $sp, 88 + ldx.w $a0, $a1, $s7 ld.d $a1, $fp, 1392 slli.d $a0, $a0, 2 ldx.w $s0, $a1, $a0 @@ -673,18 +677,18 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv add.w $a1, $s0, $a1 srli.d $a1, $a1, 2 st.w $a1, $a0, 16 - addi.d $a1, $sp, 64 + addi.d $a1, $sp, 48 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 80 + ld.d $a0, $sp, 64 addi.d $a0, $a0, 1 - st.d $a0, $sp, 80 + st.d $a0, $sp, 64 .LBB0_90: # in Loop: Header=BB0_78 Depth=2 ld.d $s0, $fp, 1376 - ldx.w $s4, $s0, $s3 - blt $s4, $s2, .LBB0_92 + ldx.w $s3, $s0, $s7 + blt $s3, $s2, .LBB0_92 # %bb.91: # in Loop: Header=BB0_78 Depth=2 - blt $s4, $s8, .LBB0_95 + blt $s3, $s6, .LBB0_95 .LBB0_92: # in Loop: Header=BB0_78 Depth=2 .Ltmp10: # EH_LABEL ori $a0, $zero, 24 @@ -692,16 +696,16 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv jirl $ra, $ra, 0 .Ltmp11: # EH_LABEL # %bb.93: # in Loop: Header=BB0_78 Depth=2 - ldx.w $a1, $s0, $s3 + ldx.w $a1, $s0, $s7 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1376 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $s0, $a1, $s3 + st.d $a0, $sp, 88 + ldx.w $s0, $a1, $s7 .Ltmp12: # EH_LABEL ori $a0, $zero, 24 pcaddu18i $ra, %call36(_Znwm) @@ -712,26 +716,26 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv add.w $a1, $s0, $a1 srli.d $a1, $a1, 2 st.w $a1, $a0, 16 - addi.d $a1, $sp, 64 + addi.d $a1, $sp, 48 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 80 + ld.d $a0, $sp, 64 ld.d $a1, $fp, 1376 addi.d $a0, $a0, 1 - st.d $a0, $sp, 80 - ldx.w $s4, $a1, $s3 + st.d $a0, $sp, 64 + ldx.w $s3, $a1, $s7 .LBB0_95: # in Loop: Header=BB0_78 Depth=2 ld.d $a1, $fp, 1352 - slli.d $a0, $s4, 2 + slli.d $a0, $s3, 2 ldx.w $a2, $a1, $a0 - ldx.w $a1, $a1, $s3 + ldx.w $a1, $a1, $s7 bge $a1, $a2, .LBB0_101 # %bb.96: # in Loop: Header=BB0_78 Depth=2 ld.d $s0, $fp, 1392 ldx.w $a0, $s0, $a0 blt $a0, $s2, .LBB0_98 # %bb.97: # in Loop: Header=BB0_78 Depth=2 - blt $a0, $s8, .LBB0_101 + blt $a0, $s6, .LBB0_101 .LBB0_98: # in Loop: Header=BB0_78 Depth=2 .Ltmp15: # EH_LABEL ori $a0, $zero, 24 @@ -739,17 +743,17 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv jirl $ra, $ra, 0 .Ltmp16: # EH_LABEL # %bb.99: # in Loop: Header=BB0_78 Depth=2 - alsl.d $a1, $s4, $s0, 2 + alsl.d $a1, $s3, $s0, 2 ld.w $a1, $a1, 0 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1376 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $a0, $a1, $s3 + st.d $a0, $sp, 88 + ldx.w $a0, $a1, $s7 ld.d $a1, $fp, 1392 slli.d $a0, $a0, 2 ldx.w $s0, $a1, $a0 @@ -763,18 +767,18 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv add.w $a1, $s0, $a1 srli.d $a1, $a1, 2 st.w $a1, $a0, 16 - addi.d $a1, $sp, 64 + addi.d $a1, $sp, 48 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 80 + ld.d $a0, $sp, 64 addi.d $a0, $a0, 1 - st.d $a0, $sp, 80 + st.d $a0, $sp, 64 .LBB0_101: # in Loop: Header=BB0_78 Depth=2 ld.d $s0, $fp, 1384 - ldx.w $s4, $s0, $s3 - blt $s4, $s2, .LBB0_103 + ldx.w $s3, $s0, $s7 + blt $s3, $s2, .LBB0_103 # %bb.102: # in Loop: Header=BB0_78 Depth=2 - blt $s4, $s8, .LBB0_106 + blt $s3, $s6, .LBB0_106 .LBB0_103: # in Loop: Header=BB0_78 Depth=2 .Ltmp20: # EH_LABEL ori $a0, $zero, 24 @@ -782,16 +786,16 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv jirl $ra, $ra, 0 .Ltmp21: # EH_LABEL # %bb.104: # in Loop: Header=BB0_78 Depth=2 - ldx.w $a1, $s0, $s3 + ldx.w $a1, $s0, $s7 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1384 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $s0, $a1, $s3 + st.d $a0, $sp, 88 + ldx.w $s0, $a1, $s7 .Ltmp22: # EH_LABEL ori $a0, $zero, 24 pcaddu18i $ra, %call36(_Znwm) @@ -802,26 +806,26 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv add.w $a1, $s0, $a1 srli.d $a1, $a1, 2 st.w $a1, $a0, 16 - addi.d $a1, $sp, 64 + addi.d $a1, $sp, 48 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 80 + ld.d $a0, $sp, 64 ld.d $a1, $fp, 1384 addi.d $a0, $a0, 1 - st.d $a0, $sp, 80 - ldx.w $s4, $a1, $s3 + st.d $a0, $sp, 64 + ldx.w $s3, $a1, $s7 .LBB0_106: # in Loop: Header=BB0_78 Depth=2 ld.d $a1, $fp, 1352 - slli.d $a0, $s4, 2 + slli.d $a0, $s3, 2 ldx.w $a2, $a1, $a0 - ldx.w $a1, $a1, $s3 + ldx.w $a1, $a1, $s7 bge $a1, $a2, .LBB0_112 # %bb.107: # in Loop: Header=BB0_78 Depth=2 ld.d $s0, $fp, 1376 ldx.w $a0, $s0, $a0 blt $a0, $s2, .LBB0_109 # %bb.108: # in Loop: Header=BB0_78 Depth=2 - blt $a0, $s8, .LBB0_112 + blt $a0, $s6, .LBB0_112 .LBB0_109: # in Loop: Header=BB0_78 Depth=2 .Ltmp25: # EH_LABEL ori $a0, $zero, 24 @@ -829,17 +833,17 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv jirl $ra, $ra, 0 .Ltmp26: # EH_LABEL # %bb.110: # in Loop: Header=BB0_78 Depth=2 - alsl.d $a1, $s4, $s0, 2 + alsl.d $a1, $s3, $s0, 2 ld.w $a1, $a1, 0 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1384 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $a0, $a1, $s3 + st.d $a0, $sp, 88 + ldx.w $a0, $a1, $s7 ld.d $a1, $fp, 1376 slli.d $a0, $a0, 2 ldx.w $s0, $a1, $a0 @@ -853,18 +857,18 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv add.w $a1, $s0, $a1 srli.d $a1, $a1, 2 st.w $a1, $a0, 16 - addi.d $a1, $sp, 64 + addi.d $a1, $sp, 48 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 80 + ld.d $a0, $sp, 64 addi.d $a0, $a0, 1 - st.d $a0, $sp, 80 + st.d $a0, $sp, 64 .LBB0_112: # in Loop: Header=BB0_78 Depth=2 ld.d $s0, $fp, 1392 - ldx.w $s4, $s0, $s3 - blt $s4, $s2, .LBB0_114 + ldx.w $s3, $s0, $s7 + blt $s3, $s2, .LBB0_114 # %bb.113: # in Loop: Header=BB0_78 Depth=2 - blt $s4, $s8, .LBB0_117 + blt $s3, $s6, .LBB0_117 .LBB0_114: # in Loop: Header=BB0_78 Depth=2 .Ltmp30: # EH_LABEL ori $a0, $zero, 24 @@ -872,16 +876,16 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv jirl $ra, $ra, 0 .Ltmp31: # EH_LABEL # %bb.115: # in Loop: Header=BB0_78 Depth=2 - ldx.w $a1, $s0, $s3 + ldx.w $a1, $s0, $s7 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1392 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $s0, $a1, $s3 + st.d $a0, $sp, 88 + ldx.w $s0, $a1, $s7 .Ltmp32: # EH_LABEL ori $a0, $zero, 24 pcaddu18i $ra, %call36(_Znwm) @@ -892,26 +896,26 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv add.w $a1, $s0, $a1 srli.d $a1, $a1, 2 st.w $a1, $a0, 16 - addi.d $a1, $sp, 64 + addi.d $a1, $sp, 48 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 80 + ld.d $a0, $sp, 64 ld.d $a1, $fp, 1392 addi.d $a0, $a0, 1 - st.d $a0, $sp, 80 - ldx.w $s4, $a1, $s3 + st.d $a0, $sp, 64 + ldx.w $s3, $a1, $s7 .LBB0_117: # in Loop: Header=BB0_78 Depth=2 ld.d $a1, $fp, 1352 - slli.d $a0, $s4, 2 + slli.d $a0, $s3, 2 ldx.w $a2, $a1, $a0 - ldx.w $a1, $a1, $s3 + ldx.w $a1, $a1, $s7 bge $a1, $a2, .LBB0_77 # %bb.118: # in Loop: Header=BB0_78 Depth=2 ld.d $s0, $fp, 1376 ldx.w $a0, $s0, $a0 blt $a0, $s2, .LBB0_120 # %bb.119: # in Loop: Header=BB0_78 Depth=2 - blt $a0, $s8, .LBB0_77 + blt $a0, $s6, .LBB0_77 .LBB0_120: # in Loop: Header=BB0_78 Depth=2 .Ltmp35: # EH_LABEL ori $a0, $zero, 24 @@ -919,17 +923,17 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv jirl $ra, $ra, 0 .Ltmp36: # EH_LABEL # %bb.121: # in Loop: Header=BB0_78 Depth=2 - alsl.d $a1, $s4, $s0, 2 + alsl.d $a1, $s3, $s0, 2 ld.w $a1, $a1, 0 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1392 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $a0, $a1, $s3 + st.d $a0, $sp, 88 + ldx.w $a0, $a1, $s7 ld.d $a1, $fp, 1376 slli.d $a0, $a0, 2 ldx.w $s0, $a1, $a0 @@ -943,31 +947,32 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv add.w $a1, $s0, $a1 srli.d $a1, $a1, 2 st.w $a1, $a0, 16 - addi.d $a1, $sp, 64 + addi.d $a1, $sp, 48 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 80 + ld.d $a0, $sp, 64 addi.d $a0, $a0, 1 - st.d $a0, $sp, 80 + st.d $a0, $sp, 64 b .LBB0_77 .p2align 4, , 16 .LBB0_123: # in Loop: Header=BB0_76 Depth=1 .Ltmp41: # EH_LABEL - addi.d $a0, $sp, 88 + addi.d $a0, $sp, 72 pcaddu18i $ra, %call36(_ZNSt7__cxx114listIiSaIiEE4sortEv) jirl $ra, $ra, 0 .Ltmp42: # EH_LABEL - ld.d $s2, $sp, 48 # 8-byte Folded Reload - addi.d $s3, $sp, 112 + ld.d $s2, $sp, 32 # 8-byte Folded Reload + ld.d $s8, $sp, 24 # 8-byte Folded Reload + addi.d $s3, $sp, 96 # %bb.124: # in Loop: Header=BB0_76 Depth=1 - ld.d $s1, $sp, 88 - beq $s1, $s5, .LBB0_134 + ld.d $s1, $sp, 72 + beq $s1, $s4, .LBB0_134 # %bb.125: # in Loop: Header=BB0_76 Depth=1 - st.d $s3, $sp, 120 - st.d $s3, $sp, 112 - st.d $zero, $sp, 128 + st.d $s3, $sp, 104 + st.d $s3, $sp, 96 + st.d $zero, $sp, 112 ld.d $a1, $s1, 0 - bne $a1, $s5, .LBB0_128 + bne $a1, $s4, .LBB0_128 b .LBB0_134 .p2align 4, , 16 .LBB0_126: # in Loop: Header=BB0_128 Depth=2 @@ -975,7 +980,7 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .LBB0_127: # %_ZNSt7__cxx114listIiSaIiEE6spliceESt20_List_const_iteratorIiERS2_S4_.exit.i467 # in Loop: Header=BB0_128 Depth=2 ld.d $a1, $s1, 0 - beq $a1, $s5, .LBB0_132 + beq $a1, $s4, .LBB0_132 .LBB0_128: # %.lr.ph.i465 # Parent Loop BB0_76 Depth=1 # => This Inner Loop Header: Depth=2 @@ -983,7 +988,7 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv ld.w $a2, $a1, 16 bne $a0, $a2, .LBB0_126 # %bb.129: # in Loop: Header=BB0_128 Depth=2 - ld.d $a0, $sp, 112 + ld.d $a0, $sp, 96 beq $a0, $a1, .LBB0_127 # %bb.130: # in Loop: Header=BB0_128 Depth=2 ld.d $a2, $a1, 0 @@ -991,17 +996,17 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv # %bb.131: # in Loop: Header=BB0_128 Depth=2 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base11_M_transferEPS0_S1_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 128 - ld.d $a1, $sp, 104 + ld.d $a0, $sp, 112 + ld.d $a1, $sp, 88 addi.d $a0, $a0, 1 - st.d $a0, $sp, 128 + st.d $a0, $sp, 112 addi.d $a0, $a1, -1 - st.d $a0, $sp, 104 + st.d $a0, $sp, 88 b .LBB0_127 .p2align 4, , 16 .LBB0_132: # %._crit_edge.i470 # in Loop: Header=BB0_76 Depth=1 - ld.d $a0, $sp, 112 + ld.d $a0, $sp, 96 beq $a0, $s3, .LBB0_134 .p2align 4, , 16 .LBB0_133: # %.lr.ph.i.i.i473 @@ -1016,20 +1021,20 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .LBB0_134: # %_ZNSt7__cxx114listIiSaIiEE6uniqueEv.exit478 # in Loop: Header=BB0_76 Depth=1 .Ltmp43: # EH_LABEL - addi.d $a0, $sp, 64 + addi.d $a0, $sp, 48 pcaddu18i $ra, %call36(_ZNSt7__cxx114listIiSaIiEE4sortEv) jirl $ra, $ra, 0 .Ltmp44: # EH_LABEL # %bb.135: # in Loop: Header=BB0_76 Depth=1 - ld.d $a0, $sp, 64 - beq $a0, $s6, .LBB0_146 + ld.d $a0, $sp, 48 + beq $a0, $s5, .LBB0_146 # %bb.136: # in Loop: Header=BB0_76 Depth=1 - st.d $s3, $sp, 120 - st.d $s3, $sp, 112 - st.d $zero, $sp, 128 + st.d $s3, $sp, 104 + st.d $s3, $sp, 96 + st.d $zero, $sp, 112 ld.d $a1, $a0, 0 - ld.d $s1, $sp, 32 # 8-byte Folded Reload - bne $a1, $s6, .LBB0_139 + ld.d $s1, $sp, 16 # 8-byte Folded Reload + bne $a1, $s5, .LBB0_139 b .LBB0_145 .p2align 4, , 16 .LBB0_137: # in Loop: Header=BB0_139 Depth=2 @@ -1037,7 +1042,7 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .LBB0_138: # %_ZNSt7__cxx114listIiSaIiEE6spliceESt20_List_const_iteratorIiERS2_S4_.exit.i482 # in Loop: Header=BB0_139 Depth=2 ld.d $a1, $a0, 0 - beq $a1, $s6, .LBB0_143 + beq $a1, $s5, .LBB0_143 .LBB0_139: # %.lr.ph.i480 # Parent Loop BB0_76 Depth=1 # => This Inner Loop Header: Depth=2 @@ -1045,7 +1050,7 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv ld.w $a3, $a1, 16 bne $a2, $a3, .LBB0_137 # %bb.140: # in Loop: Header=BB0_139 Depth=2 - ld.d $a3, $sp, 112 + ld.d $a3, $sp, 96 beq $a3, $a1, .LBB0_138 # %bb.141: # in Loop: Header=BB0_139 Depth=2 ld.d $a2, $a1, 0 @@ -1056,17 +1061,17 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base11_M_transferEPS0_S1_) jirl $ra, $ra, 0 move $a0, $s0 - ld.d $a1, $sp, 128 - ld.d $a2, $sp, 80 + ld.d $a1, $sp, 112 + ld.d $a2, $sp, 64 addi.d $a1, $a1, 1 - st.d $a1, $sp, 128 + st.d $a1, $sp, 112 addi.d $a1, $a2, -1 - st.d $a1, $sp, 80 + st.d $a1, $sp, 64 b .LBB0_138 .p2align 4, , 16 .LBB0_143: # %._crit_edge.i485 # in Loop: Header=BB0_76 Depth=1 - ld.d $a0, $sp, 112 + ld.d $a0, $sp, 96 beq $a0, $s3, .LBB0_145 .p2align 4, , 16 .LBB0_144: # %.lr.ph.i.i.i488 @@ -1080,45 +1085,40 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv bne $s0, $s3, .LBB0_144 .LBB0_145: # %_ZNSt7__cxx1110_List_baseIiSaIiEED2Ev.exit.i491 # in Loop: Header=BB0_76 Depth=1 - ld.d $a0, $sp, 64 + ld.d $a0, $sp, 48 b .LBB0_147 .p2align 4, , 16 .LBB0_146: # in Loop: Header=BB0_76 Depth=1 - ld.d $s1, $sp, 32 # 8-byte Folded Reload + ld.d $s1, $sp, 16 # 8-byte Folded Reload .LBB0_147: # %_ZNSt7__cxx114listIiSaIiEE6uniqueEv.exit493 # in Loop: Header=BB0_76 Depth=1 - ld.d $a1, $sp, 104 - ld.d $a2, $sp, 40 # 8-byte Folded Reload - fld.d $fa0, $a2, %pc_lo12(.LCPI0_0) + ld.d $a1, $sp, 88 srli.d $a2, $a1, 32 lu52i.d $a3, $zero, 1107 or $a2, $a2, $a3 - movgr2fr.d $fa1, $a2 - fsub.d $fa1, $fa1, $fa0 - ld.d $a2, $sp, 80 + movgr2fr.d $fa0, $a2 + fsub.d $fa0, $fa0, $fs0 + ld.d $a2, $sp, 64 lu12i.w $a4, 275200 bstrins.d $a1, $a4, 63, 32 - movgr2fr.d $fa2, $a1 - fadd.d $fa1, $fa2, $fa1 + movgr2fr.d $fa1, $a1 + fadd.d $fa0, $fa1, $fa0 srli.d $a1, $a2, 32 or $a1, $a1, $a3 - movgr2fr.d $fa2, $a1 - fsub.d $fa0, $fa2, $fa0 + movgr2fr.d $fa1, $a1 + fsub.d $fa1, $fa1, $fs0 bstrins.d $a2, $a4, 63, 32 movgr2fr.d $fa2, $a2 - fadd.d $fa0, $fa2, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a1, %pc_lo12(.LCPI0_1) - ld.d $a1, $sp, 56 # 8-byte Folded Reload - fld.d $fa3, $a1, %pc_lo12(_ZZN4Mesh17partition_measureEvE13offtile_ratio) - fmul.d $fa0, $fa1, $fa0 + fadd.d $fa1, $fa2, $fa1 + ld.d $a1, $sp, 40 # 8-byte Folded Reload + fld.d $fa2, $a1, %pc_lo12(_ZZN4Mesh17partition_measureEvE13offtile_ratio) + fmul.d $fa0, $fa0, $fa1 vldi $vr1, -1008 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fa0, $fa0, $fa2 - fadd.d $fa0, $fa3, $fa0 + fdiv.d $fa0, $fa0, $fs1 + fadd.d $fa0, $fa2, $fa0 fst.d $fa0, $a1, %pc_lo12(_ZZN4Mesh17partition_measureEvE13offtile_ratio) - ld.d $s7, $sp, 24 # 8-byte Folded Reload - beq $a0, $s6, .LBB0_149 + beq $a0, $s5, .LBB0_149 .p2align 4, , 16 .LBB0_148: # %.lr.ph.i.i495 # Parent Loop BB0_76 Depth=1 @@ -1128,11 +1128,11 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 move $a0, $s0 - bne $s0, $s6, .LBB0_148 + bne $s0, $s5, .LBB0_148 .LBB0_149: # %_ZNSt7__cxx1110_List_baseIiSaIiEED2Ev.exit498 # in Loop: Header=BB0_76 Depth=1 - ld.d $a0, $sp, 88 - beq $a0, $s5, .LBB0_75 + ld.d $a0, $sp, 72 + beq $a0, $s4, .LBB0_75 .p2align 4, , 16 .LBB0_150: # %.lr.ph.i.i500 # Parent Loop BB0_76 Depth=1 @@ -1142,37 +1142,39 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 move $a0, $s0 - bne $s0, $s5, .LBB0_150 + bne $s0, $s4, .LBB0_150 b .LBB0_75 .LBB0_151: # %.preheader529 beqz $s6, .LBB0_208 # %bb.152: # %.lr.ph559 - move $a0, $zero - move $s1, $zero - addi.d $s2, $sp, 88 - st.d $s7, $sp, 24 # 8-byte Folded Spill + move $s6, $zero + move $s0, $zero + addi.d $s1, $sp, 72 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs0, $a0 + lu52i.d $a0, $zero, 1016 + movgr2fr.d $fs1, $a0 b .LBB0_154 .p2align 4, , 16 .LBB0_153: # %_ZNSt7__cxx1110_List_baseIiSaIiEED2Ev.exit # in Loop: Header=BB0_154 Depth=1 - ld.d $a0, $sp, 32 # 8-byte Folded Reload - beq $s1, $s3, .LBB0_208 + beq $s0, $s3, .LBB0_208 .LBB0_154: # =>This Loop Header: Depth=1 # Child Loop BB0_158 Depth 2 # Child Loop BB0_198 Depth 2 # Child Loop BB0_203 Depth 2 # Child Loop BB0_206 Depth 2 - move $s7, $a0 - slli.d $s8, $a0, 2 - st.d $s2, $sp, 96 - st.d $s2, $sp, 88 - st.d $zero, $sp, 104 - slli.w $s3, $s1, 7 - addi.w $s1, $s1, 1 - slli.w $s4, $s1, 7 - addi.w $a0, $a0, 128 - st.d $a0, $sp, 32 # 8-byte Folded Spill - ori $s0, $zero, 128 + slli.d $s7, $s6, 2 + st.d $s1, $sp, 80 + st.d $s1, $sp, 72 + st.d $zero, $sp, 88 + slli.w $s8, $s0, 7 + addi.w $s0, $s0, 1 + slli.w $s2, $s0, 7 + addi.w $a0, $s6, 128 + st.d $a0, $sp, 24 # 8-byte Folded Spill + ori $s3, $zero, 128 b .LBB0_158 .LBB0_155: # in Loop: Header=BB0_158 Depth=2 .Ltmp80: # EH_LABEL @@ -1182,31 +1184,31 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp81: # EH_LABEL # %bb.156: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit410 # in Loop: Header=BB0_158 Depth=2 - alsl.d $a1, $s6, $s5, 2 + alsl.d $a1, $s4, $s5, 2 ld.w $a1, $a1, 0 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 + st.d $a0, $sp, 88 .p2align 4, , 16 .LBB0_157: # in Loop: Header=BB0_158 Depth=2 - addi.d $s7, $s7, 1 - addi.w $s0, $s0, -1 - addi.d $s8, $s8, 4 - beqz $s0, .LBB0_193 + addi.d $s6, $s6, 1 + addi.w $s3, $s3, -1 + addi.d $s7, $s7, 4 + beqz $s3, .LBB0_193 .LBB0_158: # Parent Loop BB0_154 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $a0, $fp, 1160 - bgeu $s7, $a0, .LBB0_157 + bgeu $s6, $a0, .LBB0_157 # %bb.159: # in Loop: Header=BB0_158 Depth=2 ld.d $s5, $fp, 1368 - ldx.w $s6, $s5, $s8 - blt $s6, $s3, .LBB0_161 + ldx.w $s4, $s5, $s7 + blt $s4, $s8, .LBB0_161 # %bb.160: # in Loop: Header=BB0_158 Depth=2 - blt $s6, $s4, .LBB0_163 + blt $s4, $s2, .LBB0_163 .LBB0_161: # in Loop: Header=BB0_158 Depth=2 .Ltmp66: # EH_LABEL ori $a0, $zero, 24 @@ -1215,28 +1217,28 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp67: # EH_LABEL # %bb.162: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit # in Loop: Header=BB0_158 Depth=2 - ldx.w $a1, $s5, $s8 + ldx.w $a1, $s5, $s7 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1368 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $s6, $a1, $s8 + st.d $a0, $sp, 88 + ldx.w $s4, $a1, $s7 .LBB0_163: # in Loop: Header=BB0_158 Depth=2 ld.d $a1, $fp, 1352 - slli.d $a0, $s6, 2 + slli.d $a0, $s4, 2 ldx.w $a2, $a1, $a0 - ldx.w $a1, $a1, $s8 + ldx.w $a1, $a1, $s7 bge $a1, $a2, .LBB0_168 # %bb.164: # in Loop: Header=BB0_158 Depth=2 ld.d $s5, $fp, 1392 ldx.w $a0, $s5, $a0 - blt $a0, $s3, .LBB0_166 + blt $a0, $s8, .LBB0_166 # %bb.165: # in Loop: Header=BB0_158 Depth=2 - blt $a0, $s4, .LBB0_168 + blt $a0, $s2, .LBB0_168 .LBB0_166: # in Loop: Header=BB0_158 Depth=2 .Ltmp68: # EH_LABEL ori $a0, $zero, 24 @@ -1245,21 +1247,21 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp69: # EH_LABEL # %bb.167: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit404 # in Loop: Header=BB0_158 Depth=2 - alsl.d $a1, $s6, $s5, 2 + alsl.d $a1, $s4, $s5, 2 ld.w $a1, $a1, 0 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 + st.d $a0, $sp, 88 .LBB0_168: # in Loop: Header=BB0_158 Depth=2 ld.d $s5, $fp, 1376 - ldx.w $s6, $s5, $s8 - blt $s6, $s3, .LBB0_170 + ldx.w $s4, $s5, $s7 + blt $s4, $s8, .LBB0_170 # %bb.169: # in Loop: Header=BB0_158 Depth=2 - blt $s6, $s4, .LBB0_172 + blt $s4, $s2, .LBB0_172 .LBB0_170: # in Loop: Header=BB0_158 Depth=2 .Ltmp70: # EH_LABEL ori $a0, $zero, 24 @@ -1268,28 +1270,28 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp71: # EH_LABEL # %bb.171: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit405 # in Loop: Header=BB0_158 Depth=2 - ldx.w $a1, $s5, $s8 + ldx.w $a1, $s5, $s7 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1376 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $s6, $a1, $s8 + st.d $a0, $sp, 88 + ldx.w $s4, $a1, $s7 .LBB0_172: # in Loop: Header=BB0_158 Depth=2 ld.d $a1, $fp, 1352 - slli.d $a0, $s6, 2 + slli.d $a0, $s4, 2 ldx.w $a2, $a1, $a0 - ldx.w $a1, $a1, $s8 + ldx.w $a1, $a1, $s7 bge $a1, $a2, .LBB0_177 # %bb.173: # in Loop: Header=BB0_158 Depth=2 ld.d $s5, $fp, 1392 ldx.w $a0, $s5, $a0 - blt $a0, $s3, .LBB0_175 + blt $a0, $s8, .LBB0_175 # %bb.174: # in Loop: Header=BB0_158 Depth=2 - blt $a0, $s4, .LBB0_177 + blt $a0, $s2, .LBB0_177 .LBB0_175: # in Loop: Header=BB0_158 Depth=2 .Ltmp72: # EH_LABEL ori $a0, $zero, 24 @@ -1298,21 +1300,21 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp73: # EH_LABEL # %bb.176: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit406 # in Loop: Header=BB0_158 Depth=2 - alsl.d $a1, $s6, $s5, 2 + alsl.d $a1, $s4, $s5, 2 ld.w $a1, $a1, 0 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 + st.d $a0, $sp, 88 .LBB0_177: # in Loop: Header=BB0_158 Depth=2 ld.d $s5, $fp, 1384 - ldx.w $s6, $s5, $s8 - blt $s6, $s3, .LBB0_179 + ldx.w $s4, $s5, $s7 + blt $s4, $s8, .LBB0_179 # %bb.178: # in Loop: Header=BB0_158 Depth=2 - blt $s6, $s4, .LBB0_181 + blt $s4, $s2, .LBB0_181 .LBB0_179: # in Loop: Header=BB0_158 Depth=2 .Ltmp74: # EH_LABEL ori $a0, $zero, 24 @@ -1321,28 +1323,28 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp75: # EH_LABEL # %bb.180: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit407 # in Loop: Header=BB0_158 Depth=2 - ldx.w $a1, $s5, $s8 + ldx.w $a1, $s5, $s7 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1384 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $s6, $a1, $s8 + st.d $a0, $sp, 88 + ldx.w $s4, $a1, $s7 .LBB0_181: # in Loop: Header=BB0_158 Depth=2 ld.d $a1, $fp, 1352 - slli.d $a0, $s6, 2 + slli.d $a0, $s4, 2 ldx.w $a2, $a1, $a0 - ldx.w $a1, $a1, $s8 + ldx.w $a1, $a1, $s7 bge $a1, $a2, .LBB0_186 # %bb.182: # in Loop: Header=BB0_158 Depth=2 ld.d $s5, $fp, 1376 ldx.w $a0, $s5, $a0 - blt $a0, $s3, .LBB0_184 + blt $a0, $s8, .LBB0_184 # %bb.183: # in Loop: Header=BB0_158 Depth=2 - blt $a0, $s4, .LBB0_186 + blt $a0, $s2, .LBB0_186 .LBB0_184: # in Loop: Header=BB0_158 Depth=2 .Ltmp76: # EH_LABEL ori $a0, $zero, 24 @@ -1351,21 +1353,21 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp77: # EH_LABEL # %bb.185: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit408 # in Loop: Header=BB0_158 Depth=2 - alsl.d $a1, $s6, $s5, 2 + alsl.d $a1, $s4, $s5, 2 ld.w $a1, $a1, 0 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 + st.d $a0, $sp, 88 .LBB0_186: # in Loop: Header=BB0_158 Depth=2 ld.d $s5, $fp, 1392 - ldx.w $s6, $s5, $s8 - blt $s6, $s3, .LBB0_188 + ldx.w $s4, $s5, $s7 + blt $s4, $s8, .LBB0_188 # %bb.187: # in Loop: Header=BB0_158 Depth=2 - blt $s6, $s4, .LBB0_190 + blt $s4, $s2, .LBB0_190 .LBB0_188: # in Loop: Header=BB0_158 Depth=2 .Ltmp78: # EH_LABEL ori $a0, $zero, 24 @@ -1374,48 +1376,48 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp79: # EH_LABEL # %bb.189: # %_ZNSt7__cxx114listIiSaIiEE9push_backERKi.exit409 # in Loop: Header=BB0_158 Depth=2 - ldx.w $a1, $s5, $s8 + ldx.w $a1, $s5, $s7 st.w $a1, $a0, 16 - addi.d $a1, $sp, 88 + addi.d $a1, $sp, 72 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 + ld.d $a0, $sp, 88 ld.d $a1, $fp, 1392 addi.d $a0, $a0, 1 - st.d $a0, $sp, 104 - ldx.w $s6, $a1, $s8 + st.d $a0, $sp, 88 + ldx.w $s4, $a1, $s7 .LBB0_190: # in Loop: Header=BB0_158 Depth=2 ld.d $a1, $fp, 1352 - slli.d $a0, $s6, 2 + slli.d $a0, $s4, 2 ldx.w $a2, $a1, $a0 - ldx.w $a1, $a1, $s8 + ldx.w $a1, $a1, $s7 bge $a1, $a2, .LBB0_157 # %bb.191: # in Loop: Header=BB0_158 Depth=2 ld.d $s5, $fp, 1376 ldx.w $a0, $s5, $a0 - blt $a0, $s3, .LBB0_155 + blt $a0, $s8, .LBB0_155 # %bb.192: # in Loop: Header=BB0_158 Depth=2 - blt $a0, $s4, .LBB0_157 + blt $a0, $s2, .LBB0_157 b .LBB0_155 .p2align 4, , 16 .LBB0_193: # in Loop: Header=BB0_154 Depth=1 .Ltmp83: # EH_LABEL - addi.d $a0, $sp, 88 + addi.d $a0, $sp, 72 pcaddu18i $ra, %call36(_ZNSt7__cxx114listIiSaIiEE4sortEv) jirl $ra, $ra, 0 .Ltmp84: # EH_LABEL # %bb.194: # in Loop: Header=BB0_154 Depth=1 - ld.d $a0, $sp, 88 - ld.d $s3, $sp, 48 # 8-byte Folded Reload - ld.d $s7, $sp, 24 # 8-byte Folded Reload - addi.d $s4, $sp, 112 - beq $a0, $s2, .LBB0_205 + ld.d $a0, $sp, 72 + ld.d $s3, $sp, 32 # 8-byte Folded Reload + ld.d $s6, $sp, 24 # 8-byte Folded Reload + addi.d $s4, $sp, 96 + beq $a0, $s1, .LBB0_205 # %bb.195: # in Loop: Header=BB0_154 Depth=1 - st.d $s4, $sp, 120 - st.d $s4, $sp, 112 - st.d $zero, $sp, 128 + st.d $s4, $sp, 104 + st.d $s4, $sp, 96 + st.d $zero, $sp, 112 ld.d $a1, $a0, 0 - bne $a1, $s2, .LBB0_198 + bne $a1, $s1, .LBB0_198 b .LBB0_204 .p2align 4, , 16 .LBB0_196: # in Loop: Header=BB0_198 Depth=2 @@ -1423,7 +1425,7 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .LBB0_197: # %_ZNSt7__cxx114listIiSaIiEE6spliceESt20_List_const_iteratorIiERS2_S4_.exit.i # in Loop: Header=BB0_198 Depth=2 ld.d $a1, $a0, 0 - beq $a1, $s2, .LBB0_202 + beq $a1, $s1, .LBB0_202 .LBB0_198: # %.lr.ph.i # Parent Loop BB0_154 Depth=1 # => This Inner Loop Header: Depth=2 @@ -1431,85 +1433,82 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv ld.w $a3, $a1, 16 bne $a2, $a3, .LBB0_196 # %bb.199: # in Loop: Header=BB0_198 Depth=2 - ld.d $a3, $sp, 112 + ld.d $a3, $sp, 96 beq $a3, $a1, .LBB0_197 # %bb.200: # in Loop: Header=BB0_198 Depth=2 ld.d $a2, $a1, 0 beq $a2, $a3, .LBB0_197 # %bb.201: # in Loop: Header=BB0_198 Depth=2 - move $s0, $a0 + move $s2, $a0 move $a0, $a3 pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base11_M_transferEPS0_S1_) jirl $ra, $ra, 0 - move $a0, $s0 - ld.d $a1, $sp, 128 - ld.d $a2, $sp, 104 + move $a0, $s2 + ld.d $a1, $sp, 112 + ld.d $a2, $sp, 88 addi.d $a1, $a1, 1 - st.d $a1, $sp, 128 + st.d $a1, $sp, 112 addi.d $a1, $a2, -1 - st.d $a1, $sp, 104 + st.d $a1, $sp, 88 b .LBB0_197 .p2align 4, , 16 .LBB0_202: # %._crit_edge.i # in Loop: Header=BB0_154 Depth=1 - ld.d $a0, $sp, 112 + ld.d $a0, $sp, 96 beq $a0, $s4, .LBB0_204 .p2align 4, , 16 .LBB0_203: # %.lr.ph.i.i.i # Parent Loop BB0_154 Depth=1 # => This Inner Loop Header: Depth=2 - ld.d $s0, $a0, 0 + ld.d $s2, $a0, 0 ori $a1, $zero, 24 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - move $a0, $s0 - bne $s0, $s4, .LBB0_203 + move $a0, $s2 + bne $s2, $s4, .LBB0_203 .LBB0_204: # %_ZNSt7__cxx1110_List_baseIiSaIiEED2Ev.exit.i # in Loop: Header=BB0_154 Depth=1 - ld.d $a0, $sp, 88 + ld.d $a0, $sp, 72 .LBB0_205: # %_ZNSt7__cxx114listIiSaIiEE6uniqueEv.exit # in Loop: Header=BB0_154 Depth=1 - ld.d $a1, $sp, 104 - ld.d $a2, $sp, 40 # 8-byte Folded Reload - fld.d $fa0, $a2, %pc_lo12(.LCPI0_0) + ld.d $a1, $sp, 88 srli.d $a2, $a1, 32 lu52i.d $a3, $zero, 1107 or $a2, $a2, $a3 - movgr2fr.d $fa1, $a2 - fsub.d $fa0, $fa1, $fa0 + movgr2fr.d $fa0, $a2 + fsub.d $fa0, $fa0, $fs0 lu12i.w $a2, 275200 bstrins.d $a1, $a2, 63, 32 - fld.d $fa1, $s7, %pc_lo12(.LCPI0_3) - ld.d $a2, $sp, 56 # 8-byte Folded Reload - fld.d $fa2, $a2, %pc_lo12(_ZZN4Mesh17partition_measureEvE13offtile_ratio) - movgr2fr.d $fa3, $a1 - fadd.d $fa0, $fa3, $fa0 - fmul.d $fa0, $fa0, $fa1 + ld.d $a2, $sp, 40 # 8-byte Folded Reload + fld.d $fa1, $a2, %pc_lo12(_ZZN4Mesh17partition_measureEvE13offtile_ratio) + movgr2fr.d $fa2, $a1 fadd.d $fa0, $fa2, $fa0 + fmul.d $fa0, $fa0, $fs1 + fadd.d $fa0, $fa1, $fa0 fst.d $fa0, $a2, %pc_lo12(_ZZN4Mesh17partition_measureEvE13offtile_ratio) - beq $a0, $s2, .LBB0_153 + beq $a0, $s1, .LBB0_153 .p2align 4, , 16 .LBB0_206: # %.lr.ph.i.i # Parent Loop BB0_154 Depth=1 # => This Inner Loop Header: Depth=2 - ld.d $s0, $a0, 0 + ld.d $s2, $a0, 0 ori $a1, $zero, 24 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - move $a0, $s0 - bne $s0, $s2, .LBB0_206 + move $a0, $s2 + bne $s2, $s1, .LBB0_206 b .LBB0_153 .LBB0_207: # %..loopexit_crit_edge - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 40 # 8-byte Folded Reload fst.d $fa0, $a0, %pc_lo12(_ZZN4Mesh17partition_measureEvE13offtile_ratio) .LBB0_208: # %.loopexit pcalau12i $a0, %pc_hi20(meas_count) ld.w $a1, $a0, %pc_lo12(meas_count) - ld.d $a2, $sp, 16 # 8-byte Folded Reload + ld.d $a2, $sp, 8 # 8-byte Folded Reload bstrpick.d $a2, $a2, 31, 0 addi.d $a1, $a1, 1 st.w $a1, $a0, %pc_lo12(meas_count) - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 40 # 8-byte Folded Reload fld.d $fa0, $a0, %pc_lo12(_ZZN4Mesh17partition_measureEvE13offtile_ratio) pcalau12i $a0, %pc_hi20(meas_sum_average) fld.d $fa1, $a0, %pc_lo12(meas_sum_average) @@ -1519,6 +1518,8 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv fadd.d $fa0, $fa1, $fa0 fst.d $fa0, $a0, %pc_lo12(meas_sum_average) .LBB0_209: + fld.d $fs1, $sp, 120 # 8-byte Folded Reload + fld.d $fs0, $sp, 128 # 8-byte Folded Reload ld.d $s8, $sp, 136 # 8-byte Folded Reload ld.d $s7, $sp, 144 # 8-byte Folded Reload ld.d $s6, $sp, 152 # 8-byte Folded Reload @@ -1569,8 +1570,8 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp37: # EH_LABEL .LBB0_222: move $fp, $a0 - ld.d $a0, $sp, 64 - addi.d $s0, $sp, 64 + ld.d $a0, $sp, 48 + addi.d $s0, $sp, 48 beq $a0, $s0, .LBB0_224 .p2align 4, , 16 .LBB0_223: # %.lr.ph.i.i505 @@ -1582,8 +1583,8 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv move $a0, $s1 bne $s1, $s0, .LBB0_223 .LBB0_224: # %_ZNSt7__cxx1110_List_baseIiSaIiEED2Ev.exit508 - ld.d $a0, $sp, 88 - addi.d $s0, $sp, 88 + ld.d $a0, $sp, 72 + addi.d $s0, $sp, 72 beq $a0, $s0, .LBB0_232 .p2align 4, , 16 .LBB0_225: # %.lr.ph.i.i510 @@ -1599,8 +1600,8 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp82: # EH_LABEL .LBB0_227: move $fp, $a0 - ld.d $a0, $sp, 88 - addi.d $s0, $sp, 88 + ld.d $a0, $sp, 72 + addi.d $s0, $sp, 72 beq $a0, $s0, .LBB0_232 .p2align 4, , 16 .LBB0_228: # %.lr.ph.i.i412 @@ -1616,8 +1617,8 @@ _ZN4Mesh17partition_measureEv: # @_ZN4Mesh17partition_measureEv .Ltmp62: # EH_LABEL .LBB0_230: move $fp, $a0 - ld.d $a0, $sp, 88 - addi.d $s0, $sp, 88 + ld.d $a0, $sp, 72 + addi.d $s0, $sp, 72 beq $a0, $s0, .LBB0_232 .p2align 4, , 16 .LBB0_231: # %.lr.ph.i.i445 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/state.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/state.s index bbba6f34..28950721 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/state.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/state.s @@ -2100,33 +2100,28 @@ _ZN5State21remove_boundary_cellsEv: # @_ZN5State21remove_boundary_cellsEv .size _ZN5State21remove_boundary_cellsEv, .Lfunc_end10-_ZN5State21remove_boundary_cellsEv .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5State12set_timestepEdd -.LCPI11_0: - .dword 0x408f400000000000 # double 1000 - .text - .globl _ZN5State12set_timestepEdd + .globl _ZN5State12set_timestepEdd # -- Begin function _ZN5State12set_timestepEdd .p2align 5 .type _ZN5State12set_timestepEdd,@function _ZN5State12set_timestepEdd: # @_ZN5State12set_timestepEdd .cfi_startproc # %bb.0: - addi.d $sp, $sp, -160 - .cfi_def_cfa_offset 160 - st.d $ra, $sp, 152 # 8-byte Folded Spill - st.d $fp, $sp, 144 # 8-byte Folded Spill - st.d $s0, $sp, 136 # 8-byte Folded Spill - st.d $s1, $sp, 128 # 8-byte Folded Spill - st.d $s2, $sp, 120 # 8-byte Folded Spill - st.d $s3, $sp, 112 # 8-byte Folded Spill - st.d $s4, $sp, 104 # 8-byte Folded Spill - st.d $s5, $sp, 96 # 8-byte Folded Spill - st.d $s6, $sp, 88 # 8-byte Folded Spill - st.d $s7, $sp, 80 # 8-byte Folded Spill - st.d $s8, $sp, 72 # 8-byte Folded Spill - fst.d $fs0, $sp, 64 # 8-byte Folded Spill - fst.d $fs1, $sp, 56 # 8-byte Folded Spill - fst.d $fs2, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -144 + .cfi_def_cfa_offset 144 + st.d $ra, $sp, 136 # 8-byte Folded Spill + st.d $fp, $sp, 128 # 8-byte Folded Spill + st.d $s0, $sp, 120 # 8-byte Folded Spill + st.d $s1, $sp, 112 # 8-byte Folded Spill + st.d $s2, $sp, 104 # 8-byte Folded Spill + st.d $s3, $sp, 96 # 8-byte Folded Spill + st.d $s4, $sp, 88 # 8-byte Folded Spill + st.d $s5, $sp, 80 # 8-byte Folded Spill + st.d $s6, $sp, 72 # 8-byte Folded Spill + st.d $s7, $sp, 64 # 8-byte Folded Spill + st.d $s8, $sp, 56 # 8-byte Folded Spill + fst.d $fs0, $sp, 48 # 8-byte Folded Spill + fst.d $fs1, $sp, 40 # 8-byte Folded Spill + fst.d $fs2, $sp, 32 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -2144,7 +2139,7 @@ _ZN5State12set_timestepEdd: # @_ZN5State12set_timestepEdd fmov.d $fs0, $fa1 fmov.d $fs1, $fa0 move $fp, $a0 - addi.d $a0, $sp, 32 + addi.d $a0, $sp, 16 pcaddu18i $ra, %call36(cpu_timer_start) jirl $ra, $ra, 0 ld.d $a0, $fp, 192 @@ -2152,37 +2147,39 @@ _ZN5State12set_timestepEdd: # @_ZN5State12set_timestepEdd pcaddu18i $ra, %call36(_ZN4Mesh10set_boundsEi) jirl $ra, $ra, 0 ld.d $a0, $fp, 192 - addi.d $a1, $sp, 28 - addi.d $a2, $sp, 24 + addi.d $a1, $sp, 12 + addi.d $a2, $sp, 8 pcaddu18i $ra, %call36(_ZN4Mesh10get_boundsERiS0_) jirl $ra, $ra, 0 - ld.w $s1, $sp, 28 - ld.w $a0, $sp, 24 - pcalau12i $a4, %pc_hi20(.LCPI11_0) - bge $s1, $a0, .LBB11_7 + ld.w $s0, $sp, 12 + ld.w $a0, $sp, 8 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1032 + bge $s0, $a0, .LBB11_7 # %bb.1: # %.lr.ph - ld.d $s2, $fp, 192 - ld.d $s3, $s2, 1360 - ld.d $a1, $fp, 200 - ld.d $a2, $fp, 208 - ld.d $a3, $fp, 216 - slli.d $s4, $s1, 2 - alsl.d $s5, $s1, $a1, 3 - fld.d $fs2, $a4, %pc_lo12(.LCPI11_0) - alsl.d $s6, $s1, $a2, 3 - alsl.d $s7, $s1, $a3, 3 - ori $s8, $zero, 1 + ld.d $s1, $fp, 192 + ld.d $s2, $s1, 1360 + ld.d $a2, $fp, 200 + ld.d $a3, $fp, 208 + ld.d $a4, $fp, 216 + slli.d $s3, $s0, 2 + alsl.d $s4, $s0, $a2, 3 + alsl.d $s5, $s0, $a3, 3 + alsl.d $s6, $s0, $a4, 3 + movgr2fr.d $fs2, $a1 + ori $s7, $zero, 1 b .LBB11_4 .p2align 4, , 16 .LBB11_2: # %.split # in Loop: Header=BB11_4 Depth=1 - ld.d $a0, $s2, 1048 - fld.d $fa1, $s6, 0 - slli.d $a1, $s0, 3 + ld.d $a0, $s1, 1048 + fld.d $fa1, $s5, 0 + slli.d $a1, $s8, 3 fldx.d $fa2, $a0, $a1 - ld.d $a0, $s2, 1072 + ld.d $a0, $s1, 1072 fabs.d $fa1, $fa1 - fld.d $fa3, $s7, 0 + fld.d $fa3, $s6, 0 fadd.d $fa1, $fa0, $fa1 fldx.d $fa4, $a0, $a1 fdiv.d $fa1, $fa1, $fa2 @@ -2190,24 +2187,24 @@ _ZN5State12set_timestepEdd: # @_ZN5State12set_timestepEdd fadd.d $fa0, $fa0, $fa2 fdiv.d $fa0, $fa0, $fa4 fadd.d $fa0, $fa1, $fa0 - ld.w $a0, $sp, 24 + ld.w $a0, $sp, 8 fdiv.d $fa0, $fs0, $fa0 fcmp.clt.d $fcc0, $fa0, $fs2 fsel $fs2, $fs2, $fa0, $fcc0 .LBB11_3: # in Loop: Header=BB11_4 Depth=1 - addi.d $s1, $s1, 1 - addi.d $s4, $s4, 4 + addi.d $s0, $s0, 1 + addi.d $s3, $s3, 4 + addi.d $s4, $s4, 8 addi.d $s5, $s5, 8 addi.d $s6, $s6, 8 - addi.d $s7, $s7, 8 - bge $s1, $a0, .LBB11_8 + bge $s0, $a0, .LBB11_8 .LBB11_4: # =>This Inner Loop Header: Depth=1 - ldx.w $a1, $s3, $s4 - bne $a1, $s8, .LBB11_3 + ldx.w $a1, $s2, $s3 + bne $a1, $s7, .LBB11_3 # %bb.5: # in Loop: Header=BB11_4 Depth=1 - ld.d $a0, $s2, 1352 - fld.d $fa0, $s5, 0 - ldx.w $s0, $a0, $s4 + ld.d $a0, $s1, 1352 + fld.d $fa0, $s4, 0 + ldx.w $s8, $a0, $s3 fmul.d $fa1, $fs1, $fa0 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 @@ -2215,44 +2212,45 @@ _ZN5State12set_timestepEdd: # @_ZN5State12set_timestepEdd # %bb.6: # %call.sqrt # in Loop: Header=BB11_4 Depth=1 fmov.d $fa0, $fa1 - st.d $a4, $sp, 16 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $a4, $sp, 16 # 8-byte Folded Reload b .LBB11_2 .LBB11_7: - fld.d $fs0, $a4, %pc_lo12(.LCPI11_0) + movgr2fr.d $fs0, $a1 b .LBB11_10 .LBB11_8: # %._crit_edge - fld.d $fs0, $a4, %pc_lo12(.LCPI11_0) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs0, $a0 fcmp.clt.d $fcc0, $fs2, $fs0 bceqz $fcc0, .LBB11_10 # %bb.9: fmov.d $fs0, $fs2 .LBB11_10: # %._crit_edge.thread - ld.d $a0, $sp, 32 - ld.d $a1, $sp, 40 + ld.d $a0, $sp, 16 + ld.d $a1, $sp, 24 pcaddu18i $ra, %call36(cpu_timer_stop) jirl $ra, $ra, 0 fld.d $fa1, $fp, 232 fadd.d $fa0, $fa0, $fa1 fst.d $fa0, $fp, 232 fmov.d $fa0, $fs0 - fld.d $fs2, $sp, 48 # 8-byte Folded Reload - fld.d $fs1, $sp, 56 # 8-byte Folded Reload - fld.d $fs0, $sp, 64 # 8-byte Folded Reload - ld.d $s8, $sp, 72 # 8-byte Folded Reload - ld.d $s7, $sp, 80 # 8-byte Folded Reload - ld.d $s6, $sp, 88 # 8-byte Folded Reload - ld.d $s5, $sp, 96 # 8-byte Folded Reload - ld.d $s4, $sp, 104 # 8-byte Folded Reload - ld.d $s3, $sp, 112 # 8-byte Folded Reload - ld.d $s2, $sp, 120 # 8-byte Folded Reload - ld.d $s1, $sp, 128 # 8-byte Folded Reload - ld.d $s0, $sp, 136 # 8-byte Folded Reload - ld.d $fp, $sp, 144 # 8-byte Folded Reload - ld.d $ra, $sp, 152 # 8-byte Folded Reload - addi.d $sp, $sp, 160 + fld.d $fs2, $sp, 32 # 8-byte Folded Reload + fld.d $fs1, $sp, 40 # 8-byte Folded Reload + fld.d $fs0, $sp, 48 # 8-byte Folded Reload + ld.d $s8, $sp, 56 # 8-byte Folded Reload + ld.d $s7, $sp, 64 # 8-byte Folded Reload + ld.d $s6, $sp, 72 # 8-byte Folded Reload + ld.d $s5, $sp, 80 # 8-byte Folded Reload + ld.d $s4, $sp, 88 # 8-byte Folded Reload + ld.d $s3, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload + ld.d $s1, $sp, 112 # 8-byte Folded Reload + ld.d $s0, $sp, 120 # 8-byte Folded Reload + ld.d $fp, $sp, 128 # 8-byte Folded Reload + ld.d $ra, $sp, 136 # 8-byte Folded Reload + addi.d $sp, $sp, 144 ret .Lfunc_end11: .size _ZN5State12set_timestepEdd, .Lfunc_end11-_ZN5State12set_timestepEdd @@ -2863,42 +2861,34 @@ GCC_except_table15: .Lcst_end1: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5State22calc_finite_differenceEd -.LCPI16_0: - .dword 0x401399999999999a # double 4.9000000000000004 -.LCPI16_1: - .dword 0x402399999999999a # double 9.8000000000000007 -.LCPI16_2: - .dword 0x39b4484bfeebc2a0 # double 1.0000000000000001E-30 .text - .globl _ZN5State22calc_finite_differenceEd + .globl _ZN5State22calc_finite_differenceEd # -- Begin function _ZN5State22calc_finite_differenceEd .p2align 5 .type _ZN5State22calc_finite_differenceEd,@function _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd .cfi_startproc # %bb.0: - addi.d $sp, $sp, -1008 - .cfi_def_cfa_offset 1008 - st.d $ra, $sp, 1000 # 8-byte Folded Spill - st.d $fp, $sp, 992 # 8-byte Folded Spill - st.d $s0, $sp, 984 # 8-byte Folded Spill - st.d $s1, $sp, 976 # 8-byte Folded Spill - st.d $s2, $sp, 968 # 8-byte Folded Spill - st.d $s3, $sp, 960 # 8-byte Folded Spill - st.d $s4, $sp, 952 # 8-byte Folded Spill - st.d $s5, $sp, 944 # 8-byte Folded Spill - st.d $s6, $sp, 936 # 8-byte Folded Spill - st.d $s7, $sp, 928 # 8-byte Folded Spill - st.d $s8, $sp, 920 # 8-byte Folded Spill - fst.d $fs0, $sp, 912 # 8-byte Folded Spill - fst.d $fs1, $sp, 904 # 8-byte Folded Spill - fst.d $fs2, $sp, 896 # 8-byte Folded Spill - fst.d $fs3, $sp, 888 # 8-byte Folded Spill - fst.d $fs4, $sp, 880 # 8-byte Folded Spill - fst.d $fs5, $sp, 872 # 8-byte Folded Spill - fst.d $fs6, $sp, 864 # 8-byte Folded Spill - fst.d $fs7, $sp, 856 # 8-byte Folded Spill + addi.d $sp, $sp, -1040 + .cfi_def_cfa_offset 1040 + st.d $ra, $sp, 1032 # 8-byte Folded Spill + st.d $fp, $sp, 1024 # 8-byte Folded Spill + st.d $s0, $sp, 1016 # 8-byte Folded Spill + st.d $s1, $sp, 1008 # 8-byte Folded Spill + st.d $s2, $sp, 1000 # 8-byte Folded Spill + st.d $s3, $sp, 992 # 8-byte Folded Spill + st.d $s4, $sp, 984 # 8-byte Folded Spill + st.d $s5, $sp, 976 # 8-byte Folded Spill + st.d $s6, $sp, 968 # 8-byte Folded Spill + st.d $s7, $sp, 960 # 8-byte Folded Spill + st.d $s8, $sp, 952 # 8-byte Folded Spill + fst.d $fs0, $sp, 944 # 8-byte Folded Spill + fst.d $fs1, $sp, 936 # 8-byte Folded Spill + fst.d $fs2, $sp, 928 # 8-byte Folded Spill + fst.d $fs3, $sp, 920 # 8-byte Folded Spill + fst.d $fs4, $sp, 912 # 8-byte Folded Spill + fst.d $fs5, $sp, 904 # 8-byte Folded Spill + fst.d $fs6, $sp, 896 # 8-byte Folded Spill + fst.d $fs7, $sp, 888 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -2918,27 +2908,29 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd .cfi_offset 61, -136 .cfi_offset 62, -144 .cfi_offset 63, -152 - fst.d $fa0, $sp, 200 # 8-byte Folded Spill - move $s1, $a0 - addi.d $a0, $sp, 840 + fst.d $fa0, $sp, 224 # 8-byte Folded Spill + move $fp, $a0 + addi.d $a0, $sp, 872 pcaddu18i $ra, %call36(cpu_timer_start) jirl $ra, $ra, 0 - ld.d $s0, $s1, 192 + st.d $fp, $sp, 168 # 8-byte Folded Spill + ld.d $s0, $fp, 192 ld.d $a0, $s0, 1160 ld.d $a1, $s0, 1176 bgeu $a1, $a0, .LBB16_2 # %bb.1: st.d $a0, $s0, 1176 .LBB16_2: + ld.d $s1, $sp, 168 # 8-byte Folded Reload move $a0, $s1 pcaddu18i $ra, %call36(_ZN5State25apply_boundary_conditionsEv) jirl $ra, $ra, 0 ld.d $fp, $s1, 192 ld.d $a0, $fp, 1368 - st.d $a0, $sp, 280 # 8-byte Folded Spill + st.d $a0, $sp, 304 # 8-byte Folded Spill ld.d $s3, $fp, 1376 ld.d $a0, $fp, 1384 - st.d $a0, $sp, 272 # 8-byte Folded Spill + st.d $a0, $sp, 296 # 8-byte Folded Spill ld.d $s6, $fp, 1392 ld.d $s4, $fp, 1352 ld.d $a1, $s0, 1176 @@ -2971,31 +2963,29 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd move $a0, $s1 pcaddu18i $ra, %call36(_ZN10MallocPlus13memory_mallocEmmPKci) jirl $ra, $ra, 0 - st.d $s1, $sp, 168 # 8-byte Folded Spill ld.d $a3, $s1, 192 pcalau12i $a1, %pc_hi20(_ZZN5State22calc_finite_differenceEdE5V_new) st.d $a1, $sp, 144 # 8-byte Folded Spill st.d $a0, $a1, %pc_lo12(_ZZN5State22calc_finite_differenceEdE5V_new) - addi.d $a1, $sp, 836 - addi.d $a2, $sp, 832 + addi.d $a1, $sp, 868 + addi.d $a2, $sp, 864 move $a0, $a3 pcaddu18i $ra, %call36(_ZN4Mesh10get_boundsERiS0_) jirl $ra, $ra, 0 - ld.w $t6, $sp, 836 - ld.w $a0, $sp, 832 - fld.d $fa5, $sp, 200 # 8-byte Folded Reload - bge $t6, $a0, .LBB16_113 + ld.w $t6, $sp, 868 + ld.w $a0, $sp, 864 + fld.d $fa5, $sp, 224 # 8-byte Folded Reload + bge $t6, $a0, .LBB16_114 # %bb.3: # %.lr.ph move $t4, $s6 move $t2, $s3 ld.d $a0, $sp, 168 # 8-byte Folded Reload ld.d $s5, $a0, 200 ld.d $t7, $a0, 208 - ld.d $s7, $a0, 216 + ld.d $t8, $a0, 216 ld.d $a0, $fp, 1048 - st.d $a0, $sp, 712 # 8-byte Folded Spill - ld.d $a0, $fp, 1072 - st.d $a0, $sp, 704 # 8-byte Folded Spill + st.d $a0, $sp, 744 # 8-byte Folded Spill + ld.d $s6, $fp, 1072 vldi $vr22, -928 fmul.d $fs2, $fa5, $ft14 ld.d $a0, $sp, 160 # 8-byte Folded Reload @@ -3005,64 +2995,77 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a3, $a0, %pc_lo12(_ZZN5State22calc_finite_differenceEdE5V_new) alsl.d $a4, $t6, $s4, 2 - ld.d $a0, $sp, 280 # 8-byte Folded Reload + ld.d $a0, $sp, 304 # 8-byte Folded Reload alsl.d $a5, $t6, $a0, 2 alsl.d $a6, $t6, $s3, 2 - alsl.d $s6, $t6, $s6, 2 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + alsl.d $s8, $t6, $t4, 2 + ld.d $a0, $sp, 296 # 8-byte Folded Reload alsl.d $s2, $t6, $a0, 2 - slli.d $s8, $t6, 3 - movgr2fr.d $fs3, $zero + slli.d $s7, $t6, 3 + movgr2fr.d $fa1, $zero vldi $vr16, -912 - move $a7, $s7 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, 235929 + st.d $a0, $sp, 192 # 8-byte Folded Spill + lu52i.d $a0, $a0, 1025 + st.d $a0, $sp, 184 # 8-byte Folded Spill + lu12i.w $a0, -4420 + ori $a0, $a0, 672 + lu32i.d $a0, 280651 + lu52i.d $a0, $a0, 923 + st.d $a0, $sp, 176 # 8-byte Folded Spill + move $a7, $t8 move $t5, $t7 move $t1, $s5 - fst.d $fs2, $sp, 184 # 8-byte Folded Spill - st.d $s8, $sp, 192 # 8-byte Folded Spill - fst.d $fs3, $sp, 176 # 8-byte Folded Spill + st.d $s6, $sp, 208 # 8-byte Folded Spill + fst.d $fs2, $sp, 200 # 8-byte Folded Spill + st.d $s7, $sp, 216 # 8-byte Folded Spill + fst.d $fa1, $sp, 736 # 8-byte Folded Spill b .LBB16_6 .p2align 4, , 16 .LBB16_4: # in Loop: Header=BB16_6 Depth=1 - ld.d $a1, $sp, 528 # 8-byte Folded Reload - ld.d $a2, $sp, 520 # 8-byte Folded Reload - ld.d $a3, $sp, 512 # 8-byte Folded Reload - ld.d $a4, $sp, 504 # 8-byte Folded Reload - ld.d $a5, $sp, 496 # 8-byte Folded Reload - ld.d $a6, $sp, 488 # 8-byte Folded Reload - ld.d $s6, $sp, 480 # 8-byte Folded Reload - ld.d $s8, $sp, 192 # 8-byte Folded Reload - fld.d $fa2, $sp, 456 # 8-byte Folded Reload - fld.d $fa4, $sp, 424 # 8-byte Folded Reload - fld.d $fa7, $sp, 768 # 8-byte Folded Reload + ld.d $s6, $sp, 208 # 8-byte Folded Reload + ld.d $a1, $sp, 552 # 8-byte Folded Reload + ld.d $a2, $sp, 544 # 8-byte Folded Reload + ld.d $a3, $sp, 536 # 8-byte Folded Reload + ld.d $a4, $sp, 528 # 8-byte Folded Reload + ld.d $a5, $sp, 520 # 8-byte Folded Reload + ld.d $a6, $sp, 512 # 8-byte Folded Reload + ld.d $s8, $sp, 504 # 8-byte Folded Reload + fld.d $fa2, $sp, 480 # 8-byte Folded Reload + fld.d $fa4, $sp, 448 # 8-byte Folded Reload + fld.d $fa7, $sp, 808 # 8-byte Folded Reload .LBB16_5: # in Loop: Header=BB16_6 Depth=1 fdiv.d $fa0, $fa5, $fs0 - fld.d $fa1, $sp, 464 # 8-byte Folded Reload - fld.d $fa3, $sp, 440 # 8-byte Folded Reload + fld.d $fa1, $sp, 488 # 8-byte Folded Reload + fld.d $fa3, $sp, 464 # 8-byte Folded Reload fsub.d $fa1, $fa3, $fa1 fadd.d $fa1, $fa1, $ft1 - fld.d $fa3, $sp, 408 # 8-byte Folded Reload + fld.d $fa3, $sp, 432 # 8-byte Folded Reload fsub.d $fa1, $fa1, $fa3 fmul.d $fa1, $fa0, $fa1 fsub.d $fa1, $fs7, $fa1 fsub.d $fa1, $fa1, $ft4 - fld.d $fa3, $sp, 752 # 8-byte Folded Reload - fadd.d $fa1, $fa1, $fa3 - fsub.d $fa1, $fa1, $ft15 fld.d $fa3, $sp, 792 # 8-byte Folded Reload fadd.d $fa1, $fa1, $fa3 - fstx.d $fa1, $a1, $s8 - fld.d $fa1, $sp, 472 # 8-byte Folded Reload - fld.d $fa3, $sp, 448 # 8-byte Folded Reload + fld.d $fa3, $sp, 784 # 8-byte Folded Reload + fsub.d $fa1, $fa1, $fa3 + fld.d $fa3, $sp, 824 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $fa3 + fstx.d $fa1, $a1, $s7 + fld.d $fa1, $sp, 496 # 8-byte Folded Reload + fld.d $fa3, $sp, 472 # 8-byte Folded Reload fsub.d $fa1, $fa3, $fa1 fadd.d $fa1, $fa1, $ft11 fsub.d $fa1, $fa1, $fa4 fmul.d $fa1, $fa0, $fa1 fsub.d $fa1, $ft7, $fa1 - fld.d $fa3, $sp, 760 # 8-byte Folded Reload + fld.d $fa3, $sp, 800 # 8-byte Folded Reload fsub.d $fa1, $fa1, $fa3 fadd.d $fa1, $fa1, $fa7 - fstx.d $fa1, $a2, $s8 - fld.d $fa1, $sp, 808 # 8-byte Folded Reload + fstx.d $fa1, $a2, $s7 + fld.d $fa1, $sp, 840 # 8-byte Folded Reload fsub.d $fa1, $fa2, $fa1 fadd.d $fa1, $fa1, $ft10 fsub.d $fa1, $fa1, $fa6 @@ -3070,13 +3073,13 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fsub.d $fa0, $ft13, $fa0 fsub.d $fa0, $fa0, $fs3 fadd.d $fa0, $fa0, $fs1 - fstx.d $fa0, $a3, $s8 + fstx.d $fa0, $a3, $s7 addi.d $t6, $t6, 1 - ld.w $a0, $sp, 832 + ld.w $a0, $sp, 864 addi.d $a4, $a4, 4 addi.d $a5, $a5, 4 addi.d $a6, $a6, 4 - addi.d $s6, $s6, 4 + addi.d $s8, $s8, 4 addi.d $s2, $s2, 4 addi.d $t1, $t1, 8 addi.d $t5, $t5, 8 @@ -3084,117 +3087,118 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd addi.d $a1, $a1, 8 addi.d $a2, $a2, 8 addi.d $a3, $a3, 8 - fld.d $fs2, $sp, 184 # 8-byte Folded Reload - fld.d $fs3, $sp, 176 # 8-byte Folded Reload - bge $t6, $a0, .LBB16_113 + fld.d $fs2, $sp, 200 # 8-byte Folded Reload + fld.d $fa1, $sp, 736 # 8-byte Folded Reload + bge $t6, $a0, .LBB16_114 .LBB16_6: # =>This Inner Loop Header: Depth=1 - st.d $a3, $sp, 512 # 8-byte Folded Spill - st.d $a2, $sp, 520 # 8-byte Folded Spill - st.d $a1, $sp, 528 # 8-byte Folded Spill - st.d $a5, $sp, 496 # 8-byte Folded Spill + st.d $a3, $sp, 536 # 8-byte Folded Spill + st.d $a2, $sp, 544 # 8-byte Folded Spill + st.d $a1, $sp, 552 # 8-byte Folded Spill + st.d $a5, $sp, 520 # 8-byte Folded Spill ld.w $s1, $a5, 0 - st.d $a4, $sp, 504 # 8-byte Folded Spill + st.d $a4, $sp, 528 # 8-byte Folded Spill ld.w $fp, $a4, 0 slli.d $a1, $s1, 2 ldx.w $a0, $s4, $a1 ldx.w $a3, $t4, $a1 - st.d $a3, $sp, 648 # 8-byte Folded Spill + st.d $a3, $sp, 656 # 8-byte Folded Spill bge $fp, $a0, .LBB16_8 # %bb.7: # in Loop: Header=BB16_6 Depth=1 slli.d $a2, $a3, 3 slli.d $a3, $a3, 2 - ld.d $a4, $sp, 280 # 8-byte Folded Reload + ld.d $a4, $sp, 304 # 8-byte Folded Reload ldx.w $a3, $a4, $a3 fldx.d $fa0, $s5, $a2 - fst.d $fa0, $sp, 752 # 8-byte Folded Spill + fst.d $fa0, $sp, 792 # 8-byte Folded Spill fldx.d $fa0, $t7, $a2 - fst.d $fa0, $sp, 768 # 8-byte Folded Spill - fldx.d $ft0, $s7, $a2 - st.d $a3, $sp, 536 # 8-byte Folded Spill + fst.d $fa0, $sp, 808 # 8-byte Folded Spill + fldx.d $ft0, $t8, $a2 + st.d $a3, $sp, 560 # 8-byte Folded Spill slli.d $a2, $a3, 3 - fldx.d $fa0, $s5, $a2 - fst.d $fa0, $sp, 216 # 8-byte Folded Spill - fldx.d $fa3, $t7, $a2 + fldx.d $fa2, $s5, $a2 + fldx.d $fa0, $t7, $a2 b .LBB16_9 .p2align 4, , 16 .LBB16_8: # in Loop: Header=BB16_6 Depth=1 - st.d $zero, $sp, 536 # 8-byte Folded Spill - fmov.d $fa3, $fs3 - fst.d $fs3, $sp, 216 # 8-byte Folded Spill - fmov.d $ft0, $fs3 - fst.d $fs3, $sp, 768 # 8-byte Folded Spill - fst.d $fs3, $sp, 752 # 8-byte Folded Spill + st.d $zero, $sp, 560 # 8-byte Folded Spill + fmov.d $fa0, $fa1 + fmov.d $fa2, $fa1 + fmov.d $ft0, $fa1 + fst.d $fa1, $sp, 808 # 8-byte Folded Spill + fst.d $fa1, $sp, 792 # 8-byte Folded Spill .LBB16_9: # in Loop: Header=BB16_6 Depth=1 - st.d $a6, $sp, 488 # 8-byte Folded Spill + fst.d $fa2, $sp, 240 # 8-byte Folded Spill + fst.d $fa0, $sp, 256 # 8-byte Folded Spill + st.d $a6, $sp, 512 # 8-byte Folded Spill ld.w $s0, $a6, 0 slli.d $a2, $s0, 2 ldx.w $s3, $s4, $a2 ldx.w $a4, $t4, $a2 - st.d $a4, $sp, 656 # 8-byte Folded Spill + st.d $a4, $sp, 664 # 8-byte Folded Spill bge $fp, $s3, .LBB16_11 # %bb.10: # in Loop: Header=BB16_6 Depth=1 slli.d $a3, $a4, 3 slli.d $a4, $a4, 2 ldx.w $a4, $t2, $a4 fldx.d $fa0, $s5, $a3 - fst.d $fa0, $sp, 760 # 8-byte Folded Spill + fst.d $fa0, $sp, 800 # 8-byte Folded Spill fldx.d $fa0, $t7, $a3 - fst.d $fa0, $sp, 776 # 8-byte Folded Spill - fldx.d $fa5, $s7, $a3 - st.d $a4, $sp, 400 # 8-byte Folded Spill + fst.d $fa0, $sp, 816 # 8-byte Folded Spill + fldx.d $fa5, $t8, $a3 + st.d $a4, $sp, 424 # 8-byte Folded Spill slli.d $a3, $a4, 3 fldx.d $fa0, $s5, $a3 - fst.d $fa0, $sp, 224 # 8-byte Folded Spill - fldx.d $fa2, $t7, $a3 + fst.d $fa0, $sp, 248 # 8-byte Folded Spill + fldx.d $fa3, $t7, $a3 b .LBB16_12 .p2align 4, , 16 .LBB16_11: # in Loop: Header=BB16_6 Depth=1 - st.d $zero, $sp, 400 # 8-byte Folded Spill - fst.d $fs3, $sp, 760 # 8-byte Folded Spill - fst.d $fs3, $sp, 776 # 8-byte Folded Spill - fmov.d $fa5, $fs3 - fst.d $fs3, $sp, 224 # 8-byte Folded Spill - fmov.d $fa2, $fs3 + st.d $zero, $sp, 424 # 8-byte Folded Spill + fst.d $fa1, $sp, 800 # 8-byte Folded Spill + fst.d $fa1, $sp, 816 # 8-byte Folded Spill + fmov.d $fa5, $fa1 + fst.d $fa1, $sp, 248 # 8-byte Folded Spill + fmov.d $fa3, $fa1 .LBB16_12: # in Loop: Header=BB16_6 Depth=1 - ld.w $a3, $s2, 0 - st.d $a3, $sp, 784 # 8-byte Folded Spill - slli.d $a3, $a3, 2 + ld.w $t3, $s2, 0 + slli.d $a3, $t3, 2 ldx.w $a6, $s4, $a3 ldx.w $a5, $t2, $a3 - st.d $a5, $sp, 672 # 8-byte Folded Spill + st.d $a5, $sp, 704 # 8-byte Folded Spill bge $fp, $a6, .LBB16_14 # %bb.13: # in Loop: Header=BB16_6 Depth=1 slli.d $a4, $a5, 3 slli.d $a5, $a5, 2 - ld.d $t0, $sp, 272 # 8-byte Folded Reload + ld.d $t0, $sp, 296 # 8-byte Folded Reload ldx.w $a5, $t0, $a5 fldx.d $fa0, $s5, $a4 - fst.d $fa0, $sp, 792 # 8-byte Folded Spill + fst.d $fa0, $sp, 824 # 8-byte Folded Spill fldx.d $fa4, $t7, $a4 - fldx.d $fa0, $s7, $a4 - fst.d $fa0, $sp, 800 # 8-byte Folded Spill - st.d $a5, $sp, 416 # 8-byte Folded Spill + fldx.d $fa0, $t8, $a4 + fst.d $fa0, $sp, 832 # 8-byte Folded Spill + st.d $a5, $sp, 440 # 8-byte Folded Spill slli.d $a4, $a5, 3 - fldx.d $fa1, $s5, $a4 - fldx.d $fa0, $s7, $a4 + fldx.d $fa2, $s5, $a4 + fldx.d $fa0, $t8, $a4 b .LBB16_15 .p2align 4, , 16 .LBB16_14: # in Loop: Header=BB16_6 Depth=1 - st.d $zero, $sp, 416 # 8-byte Folded Spill - fst.d $fs3, $sp, 792 # 8-byte Folded Spill - fmov.d $fa4, $fs3 - fst.d $fs3, $sp, 800 # 8-byte Folded Spill - fmov.d $fa1, $fs3 - fmov.d $fa0, $fs3 + st.d $zero, $sp, 440 # 8-byte Folded Spill + fst.d $fa1, $sp, 824 # 8-byte Folded Spill + fmov.d $fa4, $fa1 + fst.d $fa1, $sp, 832 # 8-byte Folded Spill + fmov.d $fa2, $fa1 + fmov.d $fa0, $fa1 .LBB16_15: # in Loop: Header=BB16_6 Depth=1 - ld.w $t8, $s6, 0 - slli.d $a4, $t8, 2 + ld.w $a4, $s8, 0 + st.d $a4, $sp, 680 # 8-byte Folded Spill + slli.d $a4, $a4, 2 ldx.w $a5, $s4, $a4 ldx.w $t0, $t2, $a4 - fst.d $fa0, $sp, 256 # 8-byte Folded Spill - fst.d $fa1, $sp, 208 # 8-byte Folded Spill - fst.d $fa2, $sp, 240 # 8-byte Folded Spill - fst.d $fa3, $sp, 232 # 8-byte Folded Spill + st.d $t3, $sp, 688 # 8-byte Folded Spill + fst.d $fa0, $sp, 280 # 8-byte Folded Spill + fst.d $fa2, $sp, 232 # 8-byte Folded Spill + fst.d $fa3, $sp, 264 # 8-byte Folded Spill bge $fp, $a5, .LBB16_17 # %bb.16: # in Loop: Header=BB16_6 Depth=1 move $t3, $t1 @@ -3205,17 +3209,17 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd slli.d $t0, $t0, 2 ldx.w $t0, $t4, $t0 fldx.d $fa0, $s5, $a7 - fst.d $fa0, $sp, 664 # 8-byte Folded Spill + fst.d $fa0, $sp, 696 # 8-byte Folded Spill fldx.d $fa0, $t7, $a7 - fst.d $fa0, $sp, 632 # 8-byte Folded Spill - fldx.d $fa0, $s7, $a7 - fst.d $fa0, $sp, 688 # 8-byte Folded Spill - st.d $t0, $sp, 432 # 8-byte Folded Spill + fst.d $fa0, $sp, 640 # 8-byte Folded Spill + fldx.d $fa0, $t8, $a7 + fst.d $fa0, $sp, 720 # 8-byte Folded Spill + st.d $t0, $sp, 456 # 8-byte Folded Spill slli.d $a7, $t0, 3 fldx.d $fa0, $s5, $a7 - fst.d $fa0, $sp, 248 # 8-byte Folded Spill - fldx.d $fa0, $s7, $a7 - fst.d $fa0, $sp, 264 # 8-byte Folded Spill + fst.d $fa0, $sp, 272 # 8-byte Folded Spill + fldx.d $fa0, $t8, $a7 + fst.d $fa0, $sp, 288 # 8-byte Folded Spill move $a7, $t5 move $t5, $t1 move $t1, $t3 @@ -3223,33 +3227,33 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd .p2align 4, , 16 .LBB16_17: # in Loop: Header=BB16_6 Depth=1 move $ra, $t0 - st.d $zero, $sp, 432 # 8-byte Folded Spill - fst.d $fs3, $sp, 664 # 8-byte Folded Spill - fst.d $fs3, $sp, 632 # 8-byte Folded Spill - fst.d $fs3, $sp, 688 # 8-byte Folded Spill - fst.d $fs3, $sp, 248 # 8-byte Folded Spill - fst.d $fs3, $sp, 264 # 8-byte Folded Spill + st.d $zero, $sp, 456 # 8-byte Folded Spill + fst.d $fa1, $sp, 696 # 8-byte Folded Spill + fst.d $fa1, $sp, 640 # 8-byte Folded Spill + fst.d $fa1, $sp, 720 # 8-byte Folded Spill + fst.d $fa1, $sp, 272 # 8-byte Folded Spill + fst.d $fa1, $sp, 288 # 8-byte Folded Spill .LBB16_18: # in Loop: Header=BB16_6 Depth=1 - fldx.d $fs7, $t1, $s8 - fldx.d $ft13, $t5, $s8 - st.d $a7, $sp, 696 # 8-byte Folded Spill - fldx.d $fa0, $a7, $s8 + fldx.d $fs7, $t1, $s7 + fldx.d $ft13, $t5, $s7 + st.d $a7, $sp, 728 # 8-byte Folded Spill + fldx.d $fa0, $a7, $s7 slli.d $t0, $s1, 3 fldx.d $fs4, $s5, $t0 slli.d $a7, $a0, 3 - ld.d $t3, $sp, 712 # 8-byte Folded Reload + ld.d $t3, $sp, 744 # 8-byte Folded Reload fldx.d $ft3, $t3, $a7 slli.d $a7, $fp, 3 fldx.d $fs0, $t3, $a7 fldx.d $ft12, $t7, $t0 - fldx.d $fa3, $s7, $t0 + fldx.d $fa3, $t8, $t0 fmul.d $fa1, $ft3, $ft3 fmul.d $fa2, $fs0, $fs0 fmul.d $ft6, $fs7, $ft3 fmul.d $fa6, $fs4, $fs0 fadd.d $fa6, $fa6, $ft6 - fadd.d $fs5, $fs0, $ft3 - fdiv.d $fa6, $fa6, $fs5 + fadd.d $fs3, $fs0, $ft3 + fdiv.d $fa6, $fa6, $fs3 fmul.d $ft1, $ft13, $fs0 fdiv.d $fa7, $ft3, $fs0 fmin.d $ft11, $fa7, $ft8 @@ -3269,11 +3273,11 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fdiv.d $fa1, $fa7, $ft5 fmul.d $fa1, $fs2, $fa1 fsub.d $ft15, $fa6, $fa1 - pcalau12i $t0, %pc_hi20(.LCPI16_0) - fld.d $fa1, $t0, %pc_lo12(.LCPI16_0) - fmul.d $fa6, $ft12, $ft12 - fdiv.d $fa6, $fa6, $fs4 + fmul.d $fa1, $ft12, $ft12 + fdiv.d $fa6, $fa1, $fs4 fmul.d $fa7, $fs4, $fs4 + ld.d $t0, $sp, 184 # 8-byte Folded Reload + movgr2fr.d $fa1, $t0 fmul.d $fa7, $fa7, $fa1 fadd.d $fa7, $fa7, $fa6 fmul.d $fa6, $ft13, $ft13 @@ -3284,7 +3288,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fmul.d $ft8, $ft13, $ft3 fmul.d $ft9, $ft12, $fs0 fadd.d $ft9, $ft9, $ft8 - fdiv.d $ft10, $ft9, $fs5 + fdiv.d $ft10, $ft9, $fs3 fmul.d $ft2, $ft2, $fs0 fmul.d $ft9, $ft2, $ft11 fmul.d $fa7, $fa7, $ft3 @@ -3292,20 +3296,20 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fsub.d $fa7, $ft9, $fa7 fdiv.d $fa7, $fa7, $ft5 fmul.d $fa7, $fs2, $fa7 - fsub.d $fs6, $ft10, $fa7 - fst.d $ft12, $sp, 608 # 8-byte Folded Spill + fsub.d $fs5, $ft10, $fa7 + fst.d $ft12, $sp, 648 # 8-byte Folded Spill fmul.d $fa7, $ft12, $fa3 fdiv.d $ft12, $fa7, $fs4 - fst.d $ft13, $sp, 824 # 8-byte Folded Spill + fst.d $ft13, $sp, 856 # 8-byte Folded Spill fmul.d $fa7, $ft13, $fa0 fdiv.d $fa7, $fa7, $fs7 - fst.d $fa0, $sp, 816 # 8-byte Folded Spill + fst.d $fa0, $sp, 848 # 8-byte Folded Spill fmul.d $ft10, $fa0, $ft3 fmul.d $fa3, $fa3, $fs0 fadd.d $fa3, $fa3, $ft10 - fdiv.d $ft13, $fa3, $fs5 + fdiv.d $ft13, $fa3, $fs3 fmul.d $fa0, $fa7, $fs0 - fst.d $fa0, $sp, 728 # 8-byte Folded Spill + fst.d $fa0, $sp, 760 # 8-byte Folded Spill fmul.d $ft11, $fa0, $ft11 fmul.d $ft12, $ft12, $ft3 fmul.d $ft12, $ft12, $ft4 @@ -3313,24 +3317,25 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fdiv.d $ft12, $ft12, $ft5 fmul.d $ft12, $fs2, $ft12 fsub.d $ft12, $ft13, $ft12 - fmul.d $ft13, $fs6, $fs6 + fmul.d $ft13, $fs5, $fs5 fdiv.d $ft13, $ft13, $ft15 vldi $vr25, -928 fmul.d $ft14, $ft15, $ft15 fmul.d $ft14, $ft14, $fa1 fadd.d $fa0, $ft13, $ft14 - fmul.d $ft12, $fs6, $ft12 - fst.d $ft15, $sp, 320 # 8-byte Folded Spill + fmul.d $ft12, $fs5, $ft12 + fst.d $ft15, $sp, 352 # 8-byte Folded Spill fdiv.d $fa3, $ft12, $ft15 - fst.d $fa3, $sp, 808 # 8-byte Folded Spill - fst.d $fs6, $sp, 720 # 8-byte Folded Spill + fst.d $fa3, $sp, 840 # 8-byte Folded Spill + fst.d $fs3, $sp, 752 # 8-byte Folded Spill + fst.d $fs5, $sp, 336 # 8-byte Folded Spill bge $fp, $a0, .LBB16_20 # %bb.19: # in Loop: Header=BB16_6 Depth=1 - fld.d $ft13, $sp, 752 # 8-byte Folded Reload + fld.d $ft13, $sp, 792 # 8-byte Folded Reload fmul.d $ft12, $fs0, $ft13 fadd.d $ft6, $ft6, $ft12 - fdiv.d $ft6, $ft6, $fs5 - fld.d $fa3, $sp, 768 # 8-byte Folded Reload + fdiv.d $ft6, $ft6, $fs3 + fld.d $fa3, $sp, 808 # 8-byte Folded Reload fmul.d $ft12, $ft3, $fa3 fmul.d $ft12, $ft4, $ft12 fsub.d $ft7, $ft7, $ft12 @@ -3344,7 +3349,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fadd.d $ft6, $ft6, $ft7 fmul.d $ft7, $fs0, $fa3 fadd.d $ft7, $ft8, $ft7 - fdiv.d $ft7, $ft7, $fs5 + fdiv.d $ft7, $ft7, $fs3 fmul.d $ft6, $ft3, $ft6 fmul.d $ft6, $ft4, $ft6 fsub.d $ft6, $ft9, $ft6 @@ -3355,16 +3360,16 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fdiv.d $ft6, $ft6, $ft13 fmul.d $ft0, $fs0, $ft0 fadd.d $ft0, $ft10, $ft0 - fdiv.d $ft0, $ft0, $fs5 + fdiv.d $ft0, $ft0, $fs3 fmul.d $ft3, $ft3, $ft6 fmul.d $ft3, $ft4, $ft3 fsub.d $ft3, $ft11, $ft3 fdiv.d $ft3, $ft3, $ft5 fmul.d $ft3, $fs2, $ft3 fsub.d $ft0, $ft0, $ft3 - fadd.d $ft3, $fs6, $ft7 + fadd.d $ft3, $fs5, $ft7 fmul.d $fa3, $ft3, $fs1 - fst.d $fa3, $sp, 464 # 8-byte Folded Spill + fst.d $fa3, $sp, 488 # 8-byte Folded Spill fmul.d $ft3, $ft7, $ft7 fdiv.d $ft3, $ft3, $ft12 fmul.d $ft4, $ft12, $ft12 @@ -3372,37 +3377,40 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fadd.d $ft3, $ft4, $ft3 fadd.d $ft3, $fa0, $ft3 fmul.d $fa0, $ft3, $fs1 - fst.d $fa0, $sp, 472 # 8-byte Folded Spill - fst.d $ft7, $sp, 304 # 8-byte Folded Spill + fst.d $fa0, $sp, 496 # 8-byte Folded Spill + fst.d $ft7, $sp, 320 # 8-byte Folded Spill fmul.d $ft0, $ft0, $ft7 - fst.d $ft12, $sp, 744 # 8-byte Folded Spill + fst.d $ft12, $sp, 776 # 8-byte Folded Spill fdiv.d $ft0, $ft0, $ft12 - fld.d $fa0, $sp, 808 # 8-byte Folded Reload + fld.d $fa0, $sp, 840 # 8-byte Folded Reload fadd.d $ft0, $fa0, $ft0 fmul.d $fa0, $ft0, $fs1 - fst.d $fa0, $sp, 808 # 8-byte Folded Spill + fst.d $fa0, $sp, 840 # 8-byte Folded Spill + vldi $vr22, -928 + fld.d $fs1, $sp, 736 # 8-byte Folded Reload b .LBB16_21 .p2align 4, , 16 .LBB16_20: # in Loop: Header=BB16_6 Depth=1 - fst.d $fa0, $sp, 472 # 8-byte Folded Spill - fst.d $fs6, $sp, 464 # 8-byte Folded Spill - fst.d $fs3, $sp, 744 # 8-byte Folded Spill - fst.d $fs3, $sp, 304 # 8-byte Folded Spill -.LBB16_21: # in Loop: Header=BB16_6 Depth=1 + fst.d $fa0, $sp, 496 # 8-byte Folded Spill + fst.d $fs5, $sp, 488 # 8-byte Folded Spill + fld.d $fs1, $sp, 736 # 8-byte Folded Reload + fst.d $fs1, $sp, 776 # 8-byte Folded Spill + fst.d $fs1, $sp, 320 # 8-byte Folded Spill vldi $vr22, -928 +.LBB16_21: # in Loop: Header=BB16_6 Depth=1 slli.d $t0, $s3, 3 - ld.d $t3, $sp, 712 # 8-byte Folded Reload + ld.d $t3, $sp, 744 # 8-byte Folded Reload fldx.d $ft0, $t3, $t0 slli.d $t0, $s0, 3 - fldx.d $fs1, $s5, $t0 + fldx.d $fs3, $s5, $t0 fldx.d $fa3, $t7, $t0 - fldx.d $ft7, $s7, $t0 + fldx.d $ft7, $t8, $t0 fmul.d $ft6, $ft0, $ft0 - fmul.d $ft3, $fs1, $fs0 + fmul.d $ft3, $fs3, $fs0 fmul.d $ft4, $fs7, $ft0 fadd.d $ft3, $ft3, $ft4 - fadd.d $fs6, $fs0, $ft0 - fdiv.d $ft8, $ft3, $fs6 + fadd.d $fs5, $fs0, $ft0 + fdiv.d $ft8, $ft3, $fs5 fmul.d $ft5, $fa3, $ft0 fdiv.d $ft3, $fs0, $ft0 vldi $vr20, -912 @@ -3423,56 +3431,55 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fmul.d $ft6, $fs2, $ft6 fsub.d $ft13, $ft8, $ft6 fmul.d $ft6, $fa3, $fa3 - fdiv.d $ft6, $ft6, $fs1 - fmul.d $ft8, $fs1, $fs1 + fdiv.d $ft6, $ft6, $fs3 + fmul.d $ft8, $fs3, $fs3 fmul.d $ft8, $ft8, $fa1 fadd.d $ft8, $ft8, $ft6 fmul.d $ft9, $fa3, $fs0 - fld.d $fa0, $sp, 824 # 8-byte Folded Reload + fld.d $fa0, $sp, 856 # 8-byte Folded Reload fmul.d $ft6, $fa0, $ft0 fadd.d $ft9, $ft9, $ft6 - fdiv.d $ft9, $ft9, $fs6 + fdiv.d $ft9, $ft9, $fs5 fmul.d $ft8, $ft8, $ft0 fmul.d $ft8, $ft8, $ft3 fmul.d $ft2, $ft2, $ft10 fsub.d $ft8, $ft8, $ft2 fdiv.d $ft8, $ft8, $ft1 fmul.d $ft8, $fs2, $ft8 - fsub.d $ft15, $ft9, $ft8 - fst.d $fa3, $sp, 592 # 8-byte Folded Spill + fsub.d $fs6, $ft9, $ft8 + fst.d $fa3, $sp, 784 # 8-byte Folded Spill fmul.d $ft8, $fa3, $ft7 - fdiv.d $ft8, $ft8, $fs1 + fdiv.d $ft8, $ft8, $fs3 fmul.d $ft9, $ft7, $fs0 - fld.d $fa0, $sp, 816 # 8-byte Folded Reload + fld.d $fa0, $sp, 848 # 8-byte Folded Reload fmul.d $ft7, $fa0, $ft0 fadd.d $ft9, $ft9, $ft7 - fdiv.d $ft9, $ft9, $fs6 + fdiv.d $ft9, $ft9, $fs5 fmul.d $ft8, $ft8, $ft0 fmul.d $ft11, $ft8, $ft3 - fld.d $fa3, $sp, 728 # 8-byte Folded Reload + fld.d $fa3, $sp, 760 # 8-byte Folded Reload fmul.d $ft8, $fa3, $ft10 fsub.d $ft10, $ft11, $ft8 fdiv.d $ft10, $ft10, $ft1 fmul.d $ft10, $fs2, $ft10 fsub.d $ft9, $ft9, $ft10 - fmul.d $ft10, $ft15, $ft15 + fmul.d $ft10, $fs6, $fs6 fdiv.d $ft10, $ft10, $ft13 fmul.d $ft11, $ft13, $ft13 fmul.d $ft11, $ft11, $fa1 fadd.d $ft10, $ft10, $ft11 - fmul.d $ft9, $ft15, $ft9 - fst.d $ft13, $sp, 552 # 8-byte Folded Spill + fmul.d $ft9, $fs6, $ft9 + fst.d $ft13, $sp, 576 # 8-byte Folded Spill fdiv.d $fa3, $ft9, $ft13 - fst.d $fs5, $sp, 336 # 8-byte Folded Spill - fst.d $fs6, $sp, 328 # 8-byte Folded Spill - fst.d $ft15, $sp, 312 # 8-byte Folded Spill + fst.d $fs5, $sp, 360 # 8-byte Folded Spill + fst.d $fs6, $sp, 344 # 8-byte Folded Spill bge $fp, $s3, .LBB16_23 # %bb.22: # in Loop: Header=BB16_6 Depth=1 - fld.d $ft13, $sp, 760 # 8-byte Folded Reload + fld.d $ft13, $sp, 800 # 8-byte Folded Reload fmul.d $ft9, $fs0, $ft13 fadd.d $ft4, $ft4, $ft9 - fdiv.d $ft4, $ft4, $fs6 - fld.d $ft11, $sp, 776 # 8-byte Folded Reload + fdiv.d $ft4, $ft4, $fs5 + fld.d $ft11, $sp, 816 # 8-byte Folded Reload fmul.d $ft9, $ft0, $ft11 fmul.d $ft9, $ft3, $ft9 fsub.d $ft5, $ft9, $ft5 @@ -3486,7 +3493,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fadd.d $ft4, $ft5, $ft4 fmul.d $ft5, $fs0, $ft11 fadd.d $ft5, $ft6, $ft5 - fdiv.d $ft5, $ft5, $fs6 + fdiv.d $ft5, $ft5, $fs5 fmul.d $ft4, $ft0, $ft4 fmul.d $ft4, $ft3, $ft4 fsub.d $ft2, $ft4, $ft2 @@ -3497,16 +3504,16 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fdiv.d $ft2, $ft2, $ft13 fmul.d $fa5, $fs0, $fa5 fadd.d $fa5, $ft7, $fa5 - fdiv.d $fa5, $fa5, $fs6 + fdiv.d $fa5, $fa5, $fs5 fmul.d $ft0, $ft0, $ft2 fmul.d $ft0, $ft3, $ft0 fsub.d $ft0, $ft0, $ft8 fdiv.d $ft0, $ft0, $ft1 fmul.d $ft0, $fs2, $ft0 fsub.d $fa5, $fa5, $ft0 - fadd.d $ft0, $ft15, $ft4 + fadd.d $ft0, $fs6, $ft4 fmul.d $ft0, $ft0, $ft14 - fst.d $ft0, $sp, 440 # 8-byte Folded Spill + fst.d $ft0, $sp, 464 # 8-byte Folded Spill fmul.d $ft0, $ft4, $ft4 fdiv.d $ft0, $ft0, $ft9 fmul.d $ft1, $ft9, $ft9 @@ -3514,30 +3521,29 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fadd.d $ft0, $ft1, $ft0 fadd.d $ft0, $ft10, $ft0 fmul.d $ft0, $ft0, $ft14 - fst.d $ft0, $sp, 448 # 8-byte Folded Spill - fst.d $ft4, $sp, 640 # 8-byte Folded Spill + fst.d $ft0, $sp, 472 # 8-byte Folded Spill + fst.d $ft4, $sp, 328 # 8-byte Folded Spill fmul.d $fa5, $ft4, $fa5 - fst.d $ft9, $sp, 736 # 8-byte Folded Spill + fst.d $ft9, $sp, 768 # 8-byte Folded Spill fdiv.d $fa5, $fa5, $ft9 fadd.d $fa5, $fa3, $fa5 fmul.d $fa3, $fa5, $ft14 b .LBB16_24 .p2align 4, , 16 .LBB16_23: # in Loop: Header=BB16_6 Depth=1 - fst.d $ft10, $sp, 448 # 8-byte Folded Spill - fst.d $ft15, $sp, 440 # 8-byte Folded Spill - fst.d $fs3, $sp, 736 # 8-byte Folded Spill - fst.d $fs3, $sp, 640 # 8-byte Folded Spill + fst.d $ft10, $sp, 472 # 8-byte Folded Spill + fst.d $fs6, $sp, 464 # 8-byte Folded Spill + fst.d $fs1, $sp, 768 # 8-byte Folded Spill + fst.d $fs1, $sp, 328 # 8-byte Folded Spill .LBB16_24: # in Loop: Header=BB16_6 Depth=1 + ld.d $t3, $sp, 688 # 8-byte Folded Reload slli.d $t0, $a6, 3 - ld.d $t3, $sp, 704 # 8-byte Folded Reload - fldx.d $ft4, $t3, $t0 - ld.d $t0, $sp, 784 # 8-byte Folded Reload - slli.d $t0, $t0, 3 + fldx.d $ft4, $s6, $t0 + slli.d $t0, $t3, 3 fldx.d $fs5, $s5, $t0 - fldx.d $fa5, $t3, $a7 + fldx.d $fa5, $s6, $a7 fldx.d $ft2, $t7, $t0 - fldx.d $ft13, $s7, $t0 + fldx.d $ft13, $t8, $t0 fmul.d $ft5, $ft4, $ft4 fmul.d $ft0, $fa5, $fa5 fmul.d $ft6, $fs7, $ft4 @@ -3566,7 +3572,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fsub.d $fs6, $ft7, $ft8 fmul.d $ft7, $ft2, $ft13 fdiv.d $ft8, $ft7, $fs5 - fld.d $ft7, $sp, 824 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload fmul.d $ft7, $ft7, $ft4 fmul.d $ft2, $ft2, $fa5 fadd.d $ft2, $ft2, $ft7 @@ -3581,7 +3587,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fsub.d $fa7, $ft12, $fa7 fmul.d $ft8, $ft13, $ft13 fdiv.d $ft8, $ft8, $fs5 - fst.d $fs5, $sp, 616 # 8-byte Folded Spill + fst.d $fs5, $sp, 624 # 8-byte Folded Spill fmul.d $ft12, $fs5, $fs5 fmul.d $ft12, $ft12, $fa1 fadd.d $ft12, $ft12, $ft8 @@ -3590,7 +3596,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fadd.d $fa6, $fa6, $ft8 fmul.d $ft8, $fa0, $ft4 vldi $vr0, -912 - fst.d $ft13, $sp, 624 # 8-byte Folded Spill + fst.d $ft13, $sp, 632 # 8-byte Folded Spill fmul.d $ft13, $ft13, $fa5 fadd.d $ft13, $ft13, $ft8 fdiv.d $ft13, $ft13, $ft3 @@ -3602,26 +3608,26 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fdiv.d $ft9, $ft9, $ft10 fmul.d $ft9, $fs2, $ft9 fsub.d $ft10, $ft13, $ft9 - fld.d $ft13, $sp, 816 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload fmul.d $fa7, $ft10, $fa7 fdiv.d $fs5, $fa7, $fs6 fmul.d $fa7, $ft10, $ft10 fdiv.d $fa7, $fa7, $fs6 - fst.d $fs6, $sp, 568 # 8-byte Folded Spill + fst.d $fs6, $sp, 592 # 8-byte Folded Spill fmul.d $ft9, $fs6, $fs6 fmul.d $ft9, $ft9, $fa1 fadd.d $fs6, $fa7, $ft9 fadd.d $fa7, $fs0, $ft4 fmul.d $ft11, $ft13, $fs0 fmul.d $ft15, $fa6, $fs0 - fst.d $fa3, $sp, 456 # 8-byte Folded Spill - fst.d $ft10, $sp, 392 # 8-byte Folded Spill - fst.d $fa7, $sp, 376 # 8-byte Folded Spill - fst.d $fs4, $sp, 360 # 8-byte Folded Spill - fst.d $ft11, $sp, 584 # 8-byte Folded Spill + fst.d $fa3, $sp, 480 # 8-byte Folded Spill + fst.d $ft10, $sp, 416 # 8-byte Folded Spill + fst.d $fa7, $sp, 400 # 8-byte Folded Spill + fst.d $fs4, $sp, 384 # 8-byte Folded Spill + fst.d $ft11, $sp, 616 # 8-byte Folded Spill bge $fp, $a6, .LBB16_26 # %bb.25: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa6, $sp, 792 # 8-byte Folded Reload + fld.d $fa6, $sp, 824 # 8-byte Folded Reload fmov.d $fa3, $ft15 fmul.d $ft9, $fs0, $fa6 fadd.d $ft6, $ft6, $ft9 @@ -3630,7 +3636,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fmin.d $ft9, $ft9, $fa0 fmov.d $fs4, $ft10 fmul.d $ft10, $ft11, $ft9 - fld.d $ft15, $sp, 800 # 8-byte Folded Reload + fld.d $ft15, $sp, 832 # 8-byte Folded Reload fmul.d $ft11, $ft4, $ft15 fdiv.d $ft12, $fs0, $ft4 fmin.d $ft12, $ft12, $fa0 @@ -3642,7 +3648,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fdiv.d $ft13, $fa2, $ft5 fmin.d $ft13, $ft13, $ft14 fmul.d $ft5, $ft5, $ft13 - fld.d $ft13, $sp, 816 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload fadd.d $ft5, $ft11, $ft5 fdiv.d $ft10, $ft10, $ft5 fmul.d $ft10, $fs2, $ft10 @@ -3652,7 +3658,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fmul.d $fa4, $fs0, $fa4 fadd.d $fa4, $ft7, $fa4 fdiv.d $fa4, $fa4, $fa7 - fld.d $ft7, $sp, 728 # 8-byte Folded Reload + fld.d $ft7, $sp, 760 # 8-byte Folded Reload fmul.d $ft7, $ft7, $ft9 fmul.d $ft6, $ft4, $ft6 fmul.d $ft6, $ft12, $ft6 @@ -3678,40 +3684,41 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fsub.d $fa6, $ft7, $ft4 fadd.d $ft4, $fs4, $fa6 fmul.d $fa3, $ft4, $ft14 - fst.d $fa3, $sp, 408 # 8-byte Folded Spill + fst.d $fa3, $sp, 432 # 8-byte Folded Spill fmul.d $fa4, $fa4, $fa6 fdiv.d $fa4, $fa4, $ft10 fadd.d $fa4, $fs5, $fa4 fmul.d $fa3, $fa4, $ft14 - fst.d $fa3, $sp, 424 # 8-byte Folded Spill - fst.d $fa6, $sp, 344 # 8-byte Folded Spill + fst.d $fa3, $sp, 448 # 8-byte Folded Spill + fst.d $fa6, $sp, 368 # 8-byte Folded Spill fmul.d $fa4, $fa6, $fa6 fdiv.d $fa4, $fa4, $ft10 - fst.d $ft10, $sp, 680 # 8-byte Folded Spill + fst.d $ft10, $sp, 712 # 8-byte Folded Spill fmul.d $ft4, $ft10, $ft10 fmul.d $ft4, $ft4, $fa1 fadd.d $fa4, $ft4, $fa4 fadd.d $fa4, $fs6, $fa4 fmul.d $fa6, $fa4, $ft14 - ld.d $a7, $sp, 696 # 8-byte Folded Reload + ld.d $a7, $sp, 728 # 8-byte Folded Reload + move $t0, $ra b .LBB16_27 .p2align 4, , 16 .LBB16_26: # in Loop: Header=BB16_6 Depth=1 - fst.d $fs5, $sp, 424 # 8-byte Folded Spill - fst.d $ft10, $sp, 408 # 8-byte Folded Spill - fst.d $fs3, $sp, 680 # 8-byte Folded Spill - fst.d $fs3, $sp, 344 # 8-byte Folded Spill - ld.d $a7, $sp, 696 # 8-byte Folded Reload + fst.d $fs5, $sp, 448 # 8-byte Folded Spill + fst.d $ft10, $sp, 432 # 8-byte Folded Spill + fst.d $fs1, $sp, 712 # 8-byte Folded Spill + fst.d $fs1, $sp, 368 # 8-byte Folded Spill + ld.d $a7, $sp, 728 # 8-byte Folded Reload + move $t0, $ra fmov.d $fa6, $fs6 .LBB16_27: # in Loop: Header=BB16_6 Depth=1 slli.d $a6, $a5, 3 - ld.d $t3, $sp, 704 # 8-byte Folded Reload - fldx.d $fa4, $t3, $a6 - move $t3, $t8 - slli.d $a6, $t8, 3 + fldx.d $fa4, $s6, $a6 + ld.d $ra, $sp, 680 # 8-byte Folded Reload + slli.d $a6, $ra, 3 fldx.d $fs5, $s5, $a6 fldx.d $ft6, $t7, $a6 - fldx.d $fa3, $s7, $a6 + fldx.d $fa3, $t8, $a6 fmul.d $ft4, $fa4, $fa4 fmul.d $ft7, $fs5, $fa5 fmul.d $ft5, $fs7, $fa4 @@ -3740,7 +3747,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fmul.d $ft0, $ft6, $fa3 fdiv.d $ft1, $ft0, $fs5 fmul.d $ft6, $ft6, $fa5 - fld.d $ft7, $sp, 824 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload fmul.d $ft0, $ft7, $fa4 fadd.d $ft6, $ft6, $ft0 fdiv.d $ft6, $ft6, $ft8 @@ -3753,7 +3760,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fsub.d $ft1, $ft6, $ft1 fmul.d $ft2, $fa3, $fa3 fdiv.d $ft2, $ft2, $fs5 - fst.d $fs5, $sp, 600 # 8-byte Folded Spill + fst.d $fs5, $sp, 672 # 8-byte Folded Spill fmul.d $ft6, $fs5, $fs5 fmul.d $ft6, $ft6, $fa1 fadd.d $ft2, $ft6, $ft2 @@ -3772,27 +3779,26 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fdiv.d $ft11, $ft1, $fa0 fmul.d $ft1, $fs5, $fs5 fdiv.d $ft1, $ft1, $fa0 - fst.d $fa0, $sp, 560 # 8-byte Folded Spill + fst.d $fa0, $sp, 584 # 8-byte Folded Spill fmul.d $ft2, $fa0, $fa0 fmul.d $ft2, $ft2, $fa1 fadd.d $ft10, $ft1, $ft2 fadd.d $fs4, $fs0, $fa4 vldi $vr16, -912 - fld.d $fs6, $sp, 720 # 8-byte Folded Reload bge $fp, $a5, .LBB16_29 # %bb.28: # in Loop: Header=BB16_6 Depth=1 - fld.d $ft9, $sp, 664 # 8-byte Folded Reload + fld.d $ft9, $sp, 696 # 8-byte Folded Reload fmul.d $ft1, $fs0, $ft9 fadd.d $ft1, $ft5, $ft1 fdiv.d $ft1, $ft1, $fs4 - fld.d $ft6, $sp, 688 # 8-byte Folded Reload + fld.d $ft6, $sp, 720 # 8-byte Folded Reload fmul.d $ft2, $fa4, $ft6 fdiv.d $ft3, $fs0, $fa4 fmin.d $ft3, $ft3, $ft8 fmul.d $ft2, $ft3, $ft2 fdiv.d $ft5, $fa4, $fs0 fmin.d $ft5, $ft5, $ft8 - fld.d $fa0, $sp, 584 # 8-byte Folded Reload + fld.d $fa0, $sp, 616 # 8-byte Folded Reload fmul.d $fa7, $fa0, $ft5 fsub.d $fa7, $ft2, $fa7 fdiv.d $ft2, $fa2, $ft4 @@ -3805,7 +3811,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fdiv.d $fa7, $fa7, $fa2 fmul.d $fa7, $fs2, $fa7 fsub.d $ft2, $ft1, $fa7 - fld.d $fa0, $sp, 632 # 8-byte Folded Reload + fld.d $fa0, $sp, 640 # 8-byte Folded Reload fmul.d $fa7, $fa0, $ft6 fdiv.d $fa7, $fa7, $ft9 fmul.d $fa0, $fs0, $fa0 @@ -3813,7 +3819,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fdiv.d $fa0, $fa0, $fs4 fmul.d $fa7, $fa4, $fa7 fmul.d $fa7, $ft3, $fa7 - fld.d $fa3, $sp, 728 # 8-byte Folded Reload + fld.d $fa3, $sp, 760 # 8-byte Folded Reload fmul.d $fa3, $fa3, $ft5 fsub.d $fa3, $fa7, $fa3 fdiv.d $fa3, $fa3, $fa2 @@ -3840,7 +3846,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fdiv.d $fa0, $fa0, $ft2 fadd.d $fa0, $ft11, $fa0 fmul.d $ft11, $fa0, $ft14 - fst.d $fa3, $sp, 352 # 8-byte Folded Spill + fst.d $fa3, $sp, 376 # 8-byte Folded Spill fmul.d $fa0, $fa3, $fa3 fdiv.d $fa0, $fa0, $ft2 fmul.d $fa2, $ft2, $ft2 @@ -3848,98 +3854,100 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fadd.d $fa0, $fa1, $fa0 fadd.d $fa0, $ft10, $fa0 fmul.d $ft10, $fa0, $ft14 - fld.d $fa5, $sp, 200 # 8-byte Folded Reload + fld.d $fa5, $sp, 224 # 8-byte Folded Reload b .LBB16_30 .p2align 4, , 16 .LBB16_29: # in Loop: Header=BB16_6 Depth=1 fmov.d $ft1, $fs5 - fmov.d $ft2, $fs3 - fst.d $fs3, $sp, 352 # 8-byte Folded Spill - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload - fld.d $ft9, $sp, 664 # 8-byte Folded Reload + fmov.d $ft2, $fs1 + fst.d $fs1, $sp, 376 # 8-byte Folded Spill + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $fa5, $sp, 224 # 8-byte Folded Reload + fld.d $ft9, $sp, 696 # 8-byte Folded Reload .LBB16_30: # in Loop: Header=BB16_6 Depth=1 - fld.d $ft0, $sp, 680 # 8-byte Folded Reload - ld.d $a5, $sp, 280 # 8-byte Folded Reload + fld.d $ft0, $sp, 712 # 8-byte Folded Reload + ld.d $a5, $sp, 304 # 8-byte Folded Reload ldx.w $a1, $a5, $a1 slli.d $a5, $a1, 3 - fldx.d $fs3, $s5, $a5 + fldx.d $fs1, $s5, $a5 slli.d $a1, $a1, 2 ldx.w $a6, $s4, $a1 fldx.d $fa2, $t7, $a5 - st.d $s6, $sp, 480 # 8-byte Folded Spill + st.d $s8, $sp, 504 # 8-byte Folded Spill bge $a0, $a6, .LBB16_32 # %bb.31: # in Loop: Header=BB16_6 Depth=1 ldx.w $a0, $t4, $a1 slli.d $a0, $a0, 3 fldx.d $fa0, $s5, $a0 fldx.d $fa1, $t7, $a0 - fadd.d $fa0, $fs3, $fa0 - fmul.d $fs3, $fa0, $ft14 + fadd.d $fa0, $fs1, $fa0 + fmul.d $fs1, $fa0, $ft14 fadd.d $fa0, $fa2, $fa1 fmul.d $fa2, $fa0, $ft14 .LBB16_32: # in Loop: Header=BB16_6 Depth=1 - ld.d $a5, $sp, 672 # 8-byte Folded Reload - fst.d $fa2, $sp, 288 # 8-byte Folded Spill - fst.d $fs4, $sp, 368 # 8-byte Folded Spill - fst.d $fs5, $sp, 384 # 8-byte Folded Spill - st.d $s2, $sp, 584 # 8-byte Folded Spill + ld.d $a5, $sp, 704 # 8-byte Folded Reload + fst.d $fa2, $sp, 312 # 8-byte Folded Spill + fst.d $fs4, $sp, 392 # 8-byte Folded Spill + fst.d $fs5, $sp, 408 # 8-byte Folded Spill + st.d $s2, $sp, 616 # 8-byte Folded Spill ldx.w $s6, $t2, $a2 ldx.w $a6, $t4, $a4 - ld.d $a0, $sp, 272 # 8-byte Folded Reload - ldx.w $t8, $a0, $a3 + ld.d $a0, $sp, 296 # 8-byte Folded Reload + ldx.w $s8, $a0, $a3 slli.d $a0, $s6, 3 fldx.d $fs2, $s5, $a0 fldx.d $fa7, $t7, $a0 slli.d $a0, $a6, 3 fldx.d $fa0, $s5, $a0 - fst.d $fa0, $sp, 576 # 8-byte Folded Spill - fldx.d $ft5, $s7, $a0 - pcalau12i $a0, %pc_hi20(.LCPI16_1) - fld.d $ft3, $a0, %pc_lo12(.LCPI16_1) - slli.d $a0, $t8, 3 + fst.d $fa0, $sp, 608 # 8-byte Folded Spill + fldx.d $ft5, $t8, $a0 + slli.d $a0, $s8, 3 fldx.d $fa0, $s5, $a0 - fst.d $fa0, $sp, 296 # 8-byte Folded Spill - fldx.d $fa0, $s7, $a0 - fst.d $fa0, $sp, 728 # 8-byte Folded Spill - fld.d $fs4, $sp, 320 # 8-byte Folded Reload + fst.d $fa0, $sp, 600 # 8-byte Folded Spill + fldx.d $fa0, $t8, $a0 + fst.d $fa0, $sp, 760 # 8-byte Folded Spill + ld.d $a0, $sp, 192 # 8-byte Folded Reload + lu52i.d $a0, $a0, 1026 + movgr2fr.d $ft3, $a0 + fld.d $fs4, $sp, 352 # 8-byte Folded Reload fmul.d $fa0, $fs4, $ft3 - fst.d $fa0, $sp, 544 # 8-byte Folded Spill + fst.d $fa0, $sp, 568 # 8-byte Folded Spill fsqrt.d $fa0, $fa0 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB16_97 + bceqz $fcc0, .LBB16_98 .LBB16_33: # %.split # in Loop: Header=BB16_6 Depth=1 alsl.d $s1, $s1, $s4, 2 - slt $s3, $fp, $s3 - fld.d $fa1, $sp, 760 # 8-byte Folded Reload - fadd.d $fa1, $fs1, $fa1 + slt $s2, $fp, $s3 + fld.d $fa1, $sp, 800 # 8-byte Folded Reload + fadd.d $fa1, $fs3, $fa1 fmul.d $fa1, $fa1, $ft14 - movgr2cf $fcc0, $s3 - fsel $fa1, $fs1, $fa1, $fcc0 - fld.d $fa2, $sp, 336 # 8-byte Folded Reload + movgr2cf $fcc0, $s2 + fsel $fa1, $fs3, $fa1, $fcc0 + fld.d $fa2, $sp, 752 # 8-byte Folded Reload fmul.d $fa4, $fa2, $ft14 - fdiv.d $fa2, $fs6, $fs4 + fld.d $fa2, $sp, 336 # 8-byte Folded Reload + fdiv.d $fa2, $fa2, $fs4 fabs.d $fa2, $fa2 - fst.d $fa2, $sp, 336 # 8-byte Folded Spill + fst.d $fa2, $sp, 352 # 8-byte Folded Spill fadd.d $fa0, $fa2, $fa0 - fld.d $fs4, $sp, 360 # 8-byte Folded Reload + fld.d $fs4, $sp, 384 # 8-byte Folded Reload fsub.d $fa2, $fs7, $fs4 - fsub.d $fa3, $fs4, $fs3 - fsub.d $fs3, $fa1, $fs7 + fsub.d $fa3, $fs4, $fs1 + fsub.d $fs1, $fa1, $fs7 fmul.d $fa0, $fa0, $ft14 fmul.d $fa0, $fa5, $fa0 - fst.d $fa4, $sp, 720 # 8-byte Folded Spill + fst.d $fa4, $sp, 752 # 8-byte Folded Spill fdiv.d $fa0, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI16_2) - fld.d $fs6, $a0, %pc_lo12(.LCPI16_2) fsub.d $fa1, $ft8, $fa0 fmul.d $fa0, $fa0, $fa1 fmul.d $fa1, $fa2, $fa2 + ld.d $a0, $sp, 176 # 8-byte Folded Reload + movgr2fr.d $fs6, $a0 fcmp.clt.d $fcc0, $fa1, $fs6 fsel $fa1, $fa1, $fs6, $fcc0 frecip.d $fa1, $fa1 - fmul.d $fa4, $fa2, $fs3 + fmul.d $fa4, $fa2, $fs1 fmul.d $fa4, $fa1, $fa4 fmul.d $fa3, $fa2, $fa3 fmul.d $fa1, $fa1, $fa3 @@ -3954,47 +3962,47 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd ld.w $a0, $s1, 0 fmul.d $fa0, $fa0, $fa1 fmul.d $ft4, $fa2, $fa0 - ld.d $a1, $sp, 536 # 8-byte Folded Reload - slli.d $s2, $a1, 2 - fld.d $fa0, $sp, 744 # 8-byte Folded Reload - fld.d $fa1, $sp, 304 # 8-byte Folded Reload + ld.d $a1, $sp, 560 # 8-byte Folded Reload + slli.d $s3, $a1, 2 + fld.d $fa0, $sp, 776 # 8-byte Folded Reload + fld.d $fa1, $sp, 320 # 8-byte Folded Reload fdiv.d $fa2, $fa1, $fa0 - ld.d $a3, $sp, 656 # 8-byte Folded Reload - ld.d $a4, $sp, 648 # 8-byte Folded Reload - fst.d $fa2, $sp, 320 # 8-byte Folded Spill + ld.d $a3, $sp, 664 # 8-byte Folded Reload + ld.d $a4, $sp, 656 # 8-byte Folded Reload + fst.d $fa2, $sp, 336 # 8-byte Folded Spill bge $fp, $a0, .LBB16_39 # %bb.34: # in Loop: Header=BB16_6 Depth=1 slli.d $a0, $a4, 2 ldx.w $a0, $s4, $a0 - ldx.w $a1, $s4, $s2 + ldx.w $a1, $s4, $s3 bge $a0, $a1, .LBB16_36 # %bb.35: # in Loop: Header=BB16_6 Depth=1 - ldx.w $a0, $t4, $s2 + ldx.w $a0, $t4, $s3 slli.d $a0, $a0, 3 fldx.d $fa0, $s5, $a0 - fld.d $fa1, $sp, 216 # 8-byte Folded Reload + fld.d $fa1, $sp, 240 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa3, $fa0, $ft14 b .LBB16_37 .p2align 4, , 16 .LBB16_36: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa3, $sp, 216 # 8-byte Folded Reload + fld.d $fa3, $sp, 240 # 8-byte Folded Reload .LBB16_37: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa0, $sp, 744 # 8-byte Folded Reload + fld.d $fa0, $sp, 776 # 8-byte Folded Reload fmul.d $fa1, $fa0, $ft3 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB16_105 + bceqz $fcc0, .LBB16_106 .LBB16_38: # %.split1369 # in Loop: Header=BB16_6 Depth=1 fabs.d $fa1, $fa2 fadd.d $fa0, $fa1, $fa0 - fld.d $fa2, $sp, 752 # 8-byte Folded Reload + fld.d $fa2, $sp, 792 # 8-byte Folded Reload fsub.d $fa1, $fs7, $fa2 fsub.d $fa2, $fa2, $fa3 fmul.d $fa0, $fa0, $ft14 fmul.d $fa0, $fa5, $fa0 - fld.d $fa3, $sp, 720 # 8-byte Folded Reload + fld.d $fa3, $sp, 752 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa3 fsub.d $fa3, $ft8, $fa0 fmul.d $fa0, $fa0, $fa3 @@ -4002,7 +4010,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fcmp.clt.d $fcc0, $fa3, $fs6 fsel $fa3, $fa3, $fs6, $fcc0 frecip.d $fa3, $fa3 - fmul.d $fa4, $fa1, $fs3 + fmul.d $fa4, $fa1, $fs1 fmul.d $fa4, $fa3, $fa4 fmul.d $fa2, $fa1, $fa2 fmul.d $fa2, $fa3, $fa2 @@ -4019,7 +4027,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fmul.d $fa0, $fa0, $ft14 fmul.d $ft4, $fa0, $ft14 .LBB16_39: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa3, $sp, 552 # 8-byte Folded Reload + fld.d $fa3, $sp, 576 # 8-byte Folded Reload alsl.d $s0, $s0, $s4, 2 ld.w $a1, $s0, 0 slli.d $a0, $s6, 2 @@ -4035,31 +4043,31 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fadd.d $fa0, $fa7, $fa1 fmul.d $fa7, $fa0, $ft14 .LBB16_41: # in Loop: Header=BB16_6 Depth=1 - fst.d $fa7, $sp, 632 # 8-byte Folded Spill + fst.d $fa7, $sp, 640 # 8-byte Folded Spill ld.w $s6, $s1, 0 fmul.d $fa0, $fa3, $ft3 - fst.d $fa0, $sp, 536 # 8-byte Folded Spill + fst.d $fa0, $sp, 560 # 8-byte Folded Spill fsqrt.d $fa0, $fa0 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB16_98 + bceqz $fcc0, .LBB16_99 .LBB16_42: # %.split1371 # in Loop: Header=BB16_6 Depth=1 - slt $s6, $fp, $s6 - fld.d $fa1, $sp, 752 # 8-byte Folded Reload + slt $s7, $fp, $s6 + fld.d $fa1, $sp, 792 # 8-byte Folded Reload fadd.d $fa1, $fs4, $fa1 fmul.d $fa1, $fa1, $ft14 - movgr2cf $fcc0, $s6 + movgr2cf $fcc0, $s7 fsel $fa1, $fs4, $fa1, $fcc0 - fld.d $fa2, $sp, 328 # 8-byte Folded Reload + fld.d $fa2, $sp, 360 # 8-byte Folded Reload fmul.d $fs4, $fa2, $ft14 - fld.d $fa2, $sp, 312 # 8-byte Folded Reload + fld.d $fa2, $sp, 344 # 8-byte Folded Reload fdiv.d $fa2, $fa2, $fa3 fabs.d $fa2, $fa2 - fst.d $fa2, $sp, 552 # 8-byte Folded Spill + fst.d $fa2, $sp, 576 # 8-byte Folded Spill fadd.d $fa0, $fa2, $fa0 - fsub.d $fa2, $fs1, $fs7 - fsub.d $fs3, $fs7, $fa1 - fsub.d $fa1, $fs2, $fs1 + fsub.d $fa2, $fs3, $fs7 + fsub.d $fs1, $fs7, $fa1 + fsub.d $fa1, $fs2, $fs3 fmul.d $fa0, $fa0, $ft14 fmul.d $fa0, $fa5, $fa0 fdiv.d $fa0, $fa0, $fs4 @@ -4071,7 +4079,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd frecip.d $fa3, $fa3 fmul.d $fa1, $fa2, $fa1 fmul.d $fa1, $fa3, $fa1 - fmul.d $fa4, $fa2, $fs3 + fmul.d $fa4, $fa2, $fs1 fmul.d $fa3, $fa3, $fa4 fmul.d $fa0, $fa0, $ft14 fmin.d $fa1, $fa1, $ft8 @@ -4082,40 +4090,48 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fsub.d $fa1, $ft8, $fa1 ld.w $a0, $s0, 0 fmul.d $fa0, $fa0, $fa1 - fmul.d $fa7, $fa2, $fa0 - ld.d $a1, $sp, 400 # 8-byte Folded Reload - slli.d $s8, $a1, 2 - fld.d $fa0, $sp, 736 # 8-byte Folded Reload - fld.d $fa1, $sp, 640 # 8-byte Folded Reload - fdiv.d $fs1, $fa1, $fa0 - bge $fp, $a0, .LBB16_48 + fmul.d $fa0, $fa2, $fa0 + fst.d $fa0, $sp, 792 # 8-byte Folded Spill + ld.d $a1, $sp, 424 # 8-byte Folded Reload + slli.d $s6, $a1, 2 + fld.d $fa0, $sp, 768 # 8-byte Folded Reload + fld.d $fa1, $sp, 328 # 8-byte Folded Reload + fdiv.d $fs3, $fa1, $fa0 + bge $fp, $a0, .LBB16_45 # %bb.43: # in Loop: Header=BB16_6 Depth=1 slli.d $a0, $a3, 2 ldx.w $a0, $s4, $a0 - ldx.w $a1, $s4, $s8 - bge $a0, $a1, .LBB16_45 + ldx.w $a1, $s4, $s6 + fld.d $fa7, $sp, 648 # 8-byte Folded Reload + fld.d $fs2, $sp, 672 # 8-byte Folded Reload + bge $a0, $a1, .LBB16_46 # %bb.44: # in Loop: Header=BB16_6 Depth=1 - ldx.w $a0, $t4, $s8 + ldx.w $a0, $t4, $s6 slli.d $a0, $a0, 3 fldx.d $fa0, $s5, $a0 - fld.d $fa1, $sp, 224 # 8-byte Folded Reload + fld.d $fa1, $sp, 248 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa3, $fa0, $ft14 - b .LBB16_46 + b .LBB16_47 .p2align 4, , 16 .LBB16_45: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa3, $sp, 224 # 8-byte Folded Reload + fld.d $fa7, $sp, 648 # 8-byte Folded Reload + fld.d $fs2, $sp, 672 # 8-byte Folded Reload + b .LBB16_49 + .p2align 4, , 16 .LBB16_46: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa0, $sp, 736 # 8-byte Folded Reload + fld.d $fa3, $sp, 248 # 8-byte Folded Reload +.LBB16_47: # in Loop: Header=BB16_6 Depth=1 + fld.d $fa0, $sp, 768 # 8-byte Folded Reload fmul.d $fa1, $fa0, $ft3 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB16_106 -.LBB16_47: # %.split1373 + bceqz $fcc0, .LBB16_107 +.LBB16_48: # %.split1373 # in Loop: Header=BB16_6 Depth=1 - fabs.d $fa1, $fs1 + fabs.d $fa1, $fs3 fadd.d $fa0, $fa1, $fa0 - fld.d $fa2, $sp, 760 # 8-byte Folded Reload + fld.d $fa2, $sp, 800 # 8-byte Folded Reload fsub.d $fa1, $fa2, $fs7 fsub.d $fa2, $fa3, $fa2 fmul.d $fa0, $fa0, $ft14 @@ -4129,7 +4145,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd frecip.d $fa3, $fa3 fmul.d $fa2, $fa1, $fa2 fmul.d $fa2, $fa3, $fa2 - fmul.d $fa4, $fa1, $fs3 + fmul.d $fa4, $fa1, $fs1 fmul.d $fa3, $fa3, $fa4 fmul.d $fa0, $fa0, $ft14 fmin.d $fa2, $fa2, $ft8 @@ -4140,33 +4156,33 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fsub.d $fa2, $ft8, $fa2 fmul.d $fa0, $fa0, $fa2 fmul.d $fa0, $fa1, $fa0 - fadd.d $fa0, $fa7, $fa0 + fld.d $fa1, $sp, 792 # 8-byte Folded Reload + fadd.d $fa0, $fa1, $fa0 fmul.d $fa0, $fa0, $ft14 - fmul.d $fa7, $fa0, $ft14 -.LBB16_48: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa1, $sp, 544 # 8-byte Folded Reload - fst.d $fa7, $sp, 752 # 8-byte Folded Spill + fmul.d $fa0, $fa0, $ft14 + fst.d $fa0, $sp, 792 # 8-byte Folded Spill +.LBB16_49: # in Loop: Header=BB16_6 Depth=1 + fld.d $fa1, $sp, 568 # 8-byte Folded Reload fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - fld.d $fa7, $sp, 608 # 8-byte Folded Reload - fld.d $fs3, $sp, 592 # 8-byte Folded Reload - bceqz $fcc0, .LBB16_99 -.LBB16_49: # %.split1375 + bceqz $fcc0, .LBB16_100 +.LBB16_50: # %.split1375 # in Loop: Header=BB16_6 Depth=1 - fld.d $fa1, $sp, 776 # 8-byte Folded Reload - fadd.d $fa1, $fs3, $fa1 + fld.d $fa1, $sp, 816 # 8-byte Folded Reload + fld.d $fa2, $sp, 784 # 8-byte Folded Reload + fadd.d $fa1, $fa2, $fa1 fmul.d $fa1, $fa1, $ft14 - movgr2cf $fcc0, $s3 - fsel $fa1, $fs3, $fa1, $fcc0 - fld.d $fa2, $sp, 336 # 8-byte Folded Reload + movgr2cf $fcc0, $s2 + fsel $fa1, $fa2, $fa1, $fcc0 + fld.d $fa2, $sp, 352 # 8-byte Folded Reload fadd.d $fa0, $fa2, $fa0 fsub.d $fa2, $ft7, $fa7 - fld.d $fa3, $sp, 288 # 8-byte Folded Reload + fld.d $fa3, $sp, 312 # 8-byte Folded Reload fsub.d $fa3, $fa7, $fa3 - fsub.d $fs2, $fa1, $ft7 + fsub.d $fs1, $fa1, $ft7 fmul.d $fa0, $fa0, $ft14 fmul.d $fa0, $fa5, $fa0 - fld.d $fa1, $sp, 720 # 8-byte Folded Reload + fld.d $fa1, $sp, 752 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa1 fsub.d $fa1, $ft8, $fa0 fmul.d $fa0, $fa0, $fa1 @@ -4174,7 +4190,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fcmp.clt.d $fcc0, $fa1, $fs6 fsel $fa1, $fa1, $fs6, $fcc0 frecip.d $fa1, $fa1 - fmul.d $fa4, $fa2, $fs2 + fmul.d $fa4, $fa2, $fs1 fmul.d $fa4, $fa1, $fa4 fmul.d $fa3, $fa2, $fa3 fmul.d $fa1, $fa1, $fa3 @@ -4188,41 +4204,41 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fsub.d $fa1, $ft8, $fa1 fmul.d $fa0, $fa1, $fa0 fmul.d $fa0, $fa2, $fa0 - fst.d $fa0, $sp, 760 # 8-byte Folded Spill - bge $fp, $a0, .LBB16_55 -# %bb.50: # in Loop: Header=BB16_6 Depth=1 + fst.d $fa0, $sp, 800 # 8-byte Folded Spill + bge $fp, $a0, .LBB16_56 +# %bb.51: # in Loop: Header=BB16_6 Depth=1 slli.d $a0, $a4, 2 ldx.w $a0, $s4, $a0 - ldx.w $a1, $s4, $s2 - bge $a0, $a1, .LBB16_52 -# %bb.51: # in Loop: Header=BB16_6 Depth=1 - ldx.w $a0, $t4, $s2 + ldx.w $a1, $s4, $s3 + bge $a0, $a1, .LBB16_53 +# %bb.52: # in Loop: Header=BB16_6 Depth=1 + ldx.w $a0, $t4, $s3 slli.d $a0, $a0, 3 fldx.d $fa0, $t7, $a0 - fld.d $fa1, $sp, 232 # 8-byte Folded Reload + fld.d $fa1, $sp, 256 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa3, $fa0, $ft14 - b .LBB16_53 + b .LBB16_54 .p2align 4, , 16 -.LBB16_52: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa3, $sp, 232 # 8-byte Folded Reload .LBB16_53: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa2, $sp, 320 # 8-byte Folded Reload - fld.d $fa0, $sp, 744 # 8-byte Folded Reload + fld.d $fa3, $sp, 256 # 8-byte Folded Reload +.LBB16_54: # in Loop: Header=BB16_6 Depth=1 + fld.d $fa2, $sp, 336 # 8-byte Folded Reload + fld.d $fa0, $sp, 776 # 8-byte Folded Reload fmul.d $fa1, $fa0, $ft3 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB16_107 -.LBB16_54: # %.split1377 + bceqz $fcc0, .LBB16_108 +.LBB16_55: # %.split1377 # in Loop: Header=BB16_6 Depth=1 fabs.d $fa1, $fa2 fadd.d $fa0, $fa1, $fa0 - fld.d $fa2, $sp, 768 # 8-byte Folded Reload + fld.d $fa2, $sp, 808 # 8-byte Folded Reload fsub.d $fa1, $ft7, $fa2 fsub.d $fa2, $fa2, $fa3 fmul.d $fa0, $fa0, $ft14 fmul.d $fa0, $fa5, $fa0 - fld.d $fa3, $sp, 720 # 8-byte Folded Reload + fld.d $fa3, $sp, 752 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa3 fsub.d $fa3, $ft8, $fa0 fmul.d $fa0, $fa0, $fa3 @@ -4230,7 +4246,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fcmp.clt.d $fcc0, $fa3, $fs6 fsel $fa3, $fa3, $fs6, $fcc0 frecip.d $fa3, $fa3 - fmul.d $fa4, $fa1, $fs2 + fmul.d $fa4, $fa1, $fs1 fmul.d $fa4, $fa3, $fa4 fmul.d $fa2, $fa1, $fa2 fmul.d $fa2, $fa3, $fa2 @@ -4243,29 +4259,30 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fsub.d $fa2, $ft8, $fa2 fmul.d $fa0, $fa0, $fa2 fmul.d $fa0, $fa1, $fa0 - fld.d $fa1, $sp, 760 # 8-byte Folded Reload + fld.d $fa1, $sp, 800 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa0, $fa0, $ft14 fmul.d $fa0, $fa0, $ft14 - fst.d $fa0, $sp, 760 # 8-byte Folded Spill -.LBB16_55: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa1, $sp, 536 # 8-byte Folded Reload + fst.d $fa0, $sp, 800 # 8-byte Folded Spill +.LBB16_56: # in Loop: Header=BB16_6 Depth=1 + fld.d $fa1, $sp, 560 # 8-byte Folded Reload fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB16_100 -.LBB16_56: # %.split1379 + bceqz $fcc0, .LBB16_101 +.LBB16_57: # %.split1379 # in Loop: Header=BB16_6 Depth=1 - fld.d $fa1, $sp, 768 # 8-byte Folded Reload + fld.d $fa1, $sp, 808 # 8-byte Folded Reload fadd.d $fa1, $fa7, $fa1 fmul.d $fa1, $fa1, $ft14 - movgr2cf $fcc0, $s6 + movgr2cf $fcc0, $s7 fsel $fa1, $fa7, $fa1, $fcc0 - fld.d $fa2, $sp, 552 # 8-byte Folded Reload + fld.d $fa2, $sp, 576 # 8-byte Folded Reload fadd.d $fa0, $fa2, $fa0 - fsub.d $fa2, $fs3, $ft7 - fsub.d $fs2, $ft7, $fa1 - fld.d $fa1, $sp, 632 # 8-byte Folded Reload - fsub.d $fa1, $fa1, $fs3 + fld.d $fa3, $sp, 784 # 8-byte Folded Reload + fsub.d $fa2, $fa3, $ft7 + fsub.d $fs1, $ft7, $fa1 + fld.d $fa1, $sp, 640 # 8-byte Folded Reload + fsub.d $fa1, $fa1, $fa3 fmul.d $fa0, $fa0, $ft14 fmul.d $fa0, $fa5, $fa0 fdiv.d $fa0, $fa0, $fs4 @@ -4277,7 +4294,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd frecip.d $fa3, $fa3 fmul.d $fa1, $fa2, $fa1 fmul.d $fa1, $fa3, $fa1 - fmul.d $fa4, $fa2, $fs2 + fmul.d $fa4, $fa2, $fs1 fmul.d $fa3, $fa3, $fa4 fmul.d $fa0, $fa0, $ft14 fmin.d $fa1, $fa1, $ft8 @@ -4289,38 +4306,40 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fsub.d $fa1, $ft8, $fa1 fmul.d $fa0, $fa1, $fa0 fmul.d $fa7, $fa2, $fa0 - bge $fp, $a0, .LBB16_59 -# %bb.57: # in Loop: Header=BB16_6 Depth=1 + bge $fp, $a0, .LBB16_60 +# %bb.58: # in Loop: Header=BB16_6 Depth=1 slli.d $a0, $a3, 2 ldx.w $a0, $s4, $a0 - ldx.w $a1, $s4, $s8 - bge $a0, $a1, .LBB16_60 -# %bb.58: # in Loop: Header=BB16_6 Depth=1 - ldx.w $a0, $t4, $s8 + ldx.w $a1, $s4, $s6 + ld.d $s7, $sp, 216 # 8-byte Folded Reload + bge $a0, $a1, .LBB16_61 +# %bb.59: # in Loop: Header=BB16_6 Depth=1 + ldx.w $a0, $t4, $s6 slli.d $a0, $a0, 3 fldx.d $fa0, $t7, $a0 - fld.d $fa1, $sp, 240 # 8-byte Folded Reload + fld.d $fa1, $sp, 264 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa3, $fa0, $ft14 - b .LBB16_61 - .p2align 4, , 16 -.LBB16_59: # in Loop: Header=BB16_6 Depth=1 - fst.d $fa7, $sp, 768 # 8-byte Folded Spill - b .LBB16_63 + b .LBB16_62 .p2align 4, , 16 .LBB16_60: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa3, $sp, 240 # 8-byte Folded Reload + fst.d $fa7, $sp, 808 # 8-byte Folded Spill + ld.d $s7, $sp, 216 # 8-byte Folded Reload + b .LBB16_64 + .p2align 4, , 16 .LBB16_61: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa0, $sp, 736 # 8-byte Folded Reload + fld.d $fa3, $sp, 264 # 8-byte Folded Reload +.LBB16_62: # in Loop: Header=BB16_6 Depth=1 + fld.d $fa0, $sp, 768 # 8-byte Folded Reload fmul.d $fa1, $fa0, $ft3 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB16_108 -.LBB16_62: # %.split1381 + bceqz $fcc0, .LBB16_109 +.LBB16_63: # %.split1381 # in Loop: Header=BB16_6 Depth=1 - fabs.d $fa1, $fs1 + fabs.d $fa1, $fs3 fadd.d $fa0, $fa1, $fa0 - fld.d $fa2, $sp, 776 # 8-byte Folded Reload + fld.d $fa2, $sp, 816 # 8-byte Folded Reload fsub.d $fa1, $fa2, $ft7 fsub.d $fa2, $fa3, $fa2 fmul.d $fa0, $fa0, $ft14 @@ -4334,7 +4353,7 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd frecip.d $fa3, $fa3 fmul.d $fa2, $fa1, $fa2 fmul.d $fa2, $fa3, $fa2 - fmul.d $fa4, $fa1, $fs2 + fmul.d $fa4, $fa1, $fs1 fmul.d $fa3, $fa3, $fa4 fmul.d $fa0, $fa0, $ft14 fmin.d $fa2, $fa2, $ft8 @@ -4348,58 +4367,56 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fadd.d $fa0, $fa7, $fa0 fmul.d $fa0, $fa0, $ft14 fmul.d $fa0, $fa0, $ft14 - fst.d $fa0, $sp, 768 # 8-byte Folded Spill -.LBB16_63: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa2, $sp, 568 # 8-byte Folded Reload - ld.d $a0, $sp, 784 # 8-byte Folded Reload - alsl.d $s1, $a0, $s4, 2 + fst.d $fa0, $sp, 808 # 8-byte Folded Spill +.LBB16_64: # in Loop: Header=BB16_6 Depth=1 + fld.d $fa2, $sp, 592 # 8-byte Folded Reload + fld.d $fa4, $sp, 600 # 8-byte Folded Reload + alsl.d $s1, $t3, $s4, 2 ld.w $a1, $s1, 0 - slli.d $a0, $t8, 2 + slli.d $a0, $s8, 2 ldx.w $a2, $s4, $a0 - fld.d $fa7, $sp, 616 # 8-byte Folded Reload - fld.d $fs2, $sp, 600 # 8-byte Folded Reload - fld.d $fs1, $sp, 296 # 8-byte Folded Reload - bge $a1, $a2, .LBB16_65 -# %bb.64: # in Loop: Header=BB16_6 Depth=1 + fld.d $fa7, $sp, 624 # 8-byte Folded Reload + bge $a1, $a2, .LBB16_66 +# %bb.65: # in Loop: Header=BB16_6 Depth=1 ldx.w $a0, $t2, $a0 slli.d $a0, $a0, 3 fldx.d $fa0, $s5, $a0 - fldx.d $fa1, $s7, $a0 - fadd.d $fa0, $fs1, $fa0 - fmul.d $fs1, $fa0, $ft14 - fld.d $fa0, $sp, 728 # 8-byte Folded Reload + fldx.d $fa1, $t8, $a0 + fadd.d $fa0, $fa4, $fa0 + fmul.d $fa4, $fa0, $ft14 + fld.d $fa0, $sp, 760 # 8-byte Folded Reload fadd.d $fa0, $fa0, $fa1 fmul.d $fa0, $fa0, $ft14 - fst.d $fa0, $sp, 728 # 8-byte Folded Spill -.LBB16_65: # in Loop: Header=BB16_6 Depth=1 - alsl.d $s0, $t3, $s4, 2 + fst.d $fa0, $sp, 760 # 8-byte Folded Spill +.LBB16_66: # in Loop: Header=BB16_6 Depth=1 + alsl.d $s0, $ra, $s4, 2 ld.w $s2, $s0, 0 fmul.d $fa0, $fa2, $ft3 fst.d $fa0, $sp, 776 # 8-byte Folded Spill fsqrt.d $fa0, $fa0 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB16_101 -.LBB16_66: # %.split1383 + bceqz $fcc0, .LBB16_102 +.LBB16_67: # %.split1383 # in Loop: Header=BB16_6 Depth=1 slt $s3, $fp, $s2 fadd.d $fa1, $fs2, $ft9 fmul.d $fa1, $fa1, $ft14 movgr2cf $fcc0, $s3 fsel $fa1, $fs2, $fa1, $fcc0 - fld.d $fa3, $sp, 376 # 8-byte Folded Reload - fmul.d $fa4, $fa3, $ft14 - fld.d $fa3, $sp, 392 # 8-byte Folded Reload + fld.d $fa3, $sp, 400 # 8-byte Folded Reload + fmul.d $fs3, $fa3, $ft14 + fld.d $fa3, $sp, 416 # 8-byte Folded Reload fdiv.d $fa2, $fa3, $fa2 fabs.d $fa2, $fa2 - fst.d $fa2, $sp, 736 # 8-byte Folded Spill + fst.d $fa2, $sp, 752 # 8-byte Folded Spill fadd.d $fa0, $fa2, $fa0 fsub.d $fa2, $fs7, $fa7 - fsub.d $fa3, $fa7, $fs1 + fsub.d $fa3, $fa7, $fa4 fsub.d $fs1, $fa1, $fs7 fmul.d $fa0, $fa0, $ft14 fmul.d $fa0, $fa5, $fa0 - fst.d $fa4, $sp, 784 # 8-byte Folded Spill - fdiv.d $fa0, $fa0, $fa4 + fst.d $fs3, $sp, 816 # 8-byte Folded Spill + fdiv.d $fa0, $fa0, $fs3 fsub.d $fa1, $ft8, $fa0 fmul.d $fa0, $fa0, $fa1 fmul.d $fa1, $fa2, $fa2 @@ -4419,44 +4436,44 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fsub.d $fa1, $ft8, $fa1 ld.w $a0, $s1, 0 fmul.d $fa0, $fa0, $fa1 - fmul.d $ft15, $fa2, $fa0 - ld.d $a1, $sp, 416 # 8-byte Folded Reload + fmul.d $fs3, $fa2, $fa0 + ld.d $a1, $sp, 440 # 8-byte Folded Reload slli.d $s2, $a1, 2 - fld.d $fa0, $sp, 344 # 8-byte Folded Reload + fld.d $fa0, $sp, 368 # 8-byte Folded Reload fdiv.d $fa2, $fa0, $ft0 - fst.d $fa2, $sp, 720 # 8-byte Folded Spill - bge $fp, $a0, .LBB16_72 -# %bb.67: # in Loop: Header=BB16_6 Depth=1 + fst.d $fa2, $sp, 688 # 8-byte Folded Spill + bge $fp, $a0, .LBB16_73 +# %bb.68: # in Loop: Header=BB16_6 Depth=1 slli.d $a0, $a5, 2 ldx.w $a0, $s4, $a0 ldx.w $a1, $s4, $s2 - bge $a0, $a1, .LBB16_69 -# %bb.68: # in Loop: Header=BB16_6 Depth=1 + bge $a0, $a1, .LBB16_70 +# %bb.69: # in Loop: Header=BB16_6 Depth=1 ldx.w $a0, $t2, $s2 slli.d $a0, $a0, 3 fldx.d $fa0, $s5, $a0 - fld.d $fa1, $sp, 208 # 8-byte Folded Reload + fld.d $fa1, $sp, 232 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa3, $fa0, $ft14 - b .LBB16_70 + b .LBB16_71 .p2align 4, , 16 -.LBB16_69: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa3, $sp, 208 # 8-byte Folded Reload .LBB16_70: # in Loop: Header=BB16_6 Depth=1 + fld.d $fa3, $sp, 232 # 8-byte Folded Reload +.LBB16_71: # in Loop: Header=BB16_6 Depth=1 fmul.d $fa1, $ft0, $ft3 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB16_109 -.LBB16_71: # %.split1385 + bceqz $fcc0, .LBB16_110 +.LBB16_72: # %.split1385 # in Loop: Header=BB16_6 Depth=1 fabs.d $fa1, $fa2 fadd.d $fa0, $fa1, $fa0 - fld.d $fa2, $sp, 792 # 8-byte Folded Reload + fld.d $fa2, $sp, 824 # 8-byte Folded Reload fsub.d $fa1, $fs7, $fa2 fsub.d $fa2, $fa2, $fa3 fmul.d $fa0, $fa0, $ft14 fmul.d $fa0, $fa5, $fa0 - fld.d $fa3, $sp, 784 # 8-byte Folded Reload + fld.d $fa3, $sp, 816 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa3 fsub.d $fa3, $ft8, $fa0 fmul.d $fa0, $fa0, $fa3 @@ -4477,43 +4494,44 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fsub.d $fa2, $ft8, $fa2 fmul.d $fa0, $fa0, $fa2 fmul.d $fa0, $fa1, $fa0 - fadd.d $fa0, $ft15, $fa0 + fadd.d $fa0, $fs3, $fa0 fmul.d $fa0, $fa0, $ft14 - fmul.d $ft15, $fa0, $ft14 -.LBB16_72: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa2, $sp, 560 # 8-byte Folded Reload - fld.d $fa4, $sp, 576 # 8-byte Folded Reload + fmul.d $fs3, $fa0, $ft14 +.LBB16_73: # in Loop: Header=BB16_6 Depth=1 + fld.d $fa2, $sp, 584 # 8-byte Folded Reload + fld.d $fa4, $sp, 608 # 8-byte Folded Reload ld.w $a1, $s0, 0 slli.d $a0, $a6, 2 ldx.w $a2, $s4, $a0 - bge $a1, $a2, .LBB16_74 -# %bb.73: # in Loop: Header=BB16_6 Depth=1 + bge $a1, $a2, .LBB16_75 +# %bb.74: # in Loop: Header=BB16_6 Depth=1 ldx.w $a0, $t2, $a0 slli.d $a0, $a0, 3 fldx.d $fa0, $s5, $a0 - fldx.d $fa1, $s7, $a0 + fldx.d $fa1, $t8, $a0 fadd.d $fa0, $fa4, $fa0 fmul.d $fa4, $fa0, $ft14 fadd.d $fa0, $ft5, $fa1 fmul.d $ft5, $fa0, $ft14 -.LBB16_74: # in Loop: Header=BB16_6 Depth=1 +.LBB16_75: # in Loop: Header=BB16_6 Depth=1 + fst.d $fs3, $sp, 784 # 8-byte Folded Spill ld.w $s6, $s1, 0 fmul.d $fa0, $fa2, $ft3 - fst.d $fa0, $sp, 744 # 8-byte Folded Spill + fst.d $fa0, $sp, 768 # 8-byte Folded Spill fsqrt.d $fa0, $fa0 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB16_102 -.LBB16_75: # %.split1387 + bceqz $fcc0, .LBB16_103 +.LBB16_76: # %.split1387 # in Loop: Header=BB16_6 Depth=1 slt $s6, $fp, $s6 - fld.d $fa1, $sp, 792 # 8-byte Folded Reload + fld.d $fa1, $sp, 824 # 8-byte Folded Reload fadd.d $fa1, $fa7, $fa1 fmul.d $fa1, $fa1, $ft14 movgr2cf $fcc0, $s6 fsel $fa1, $fa7, $fa1, $fcc0 - fld.d $fa3, $sp, 368 # 8-byte Folded Reload + fld.d $fa3, $sp, 392 # 8-byte Folded Reload fmul.d $fs4, $fa3, $ft14 - fld.d $fa3, $sp, 384 # 8-byte Folded Reload + fld.d $fa3, $sp, 408 # 8-byte Folded Reload fdiv.d $fa2, $fa3, $fa2 fabs.d $fs1, $fa2 fadd.d $fa0, $fs1, $fa0 @@ -4543,38 +4561,38 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd ld.w $a0, $s0, 0 fmul.d $fa0, $fa0, $fa1 fmul.d $fa7, $fa2, $fa0 - ld.d $a1, $sp, 432 # 8-byte Folded Reload + ld.d $a1, $sp, 456 # 8-byte Folded Reload slli.d $s8, $a1, 2 - fld.d $fa0, $sp, 352 # 8-byte Folded Reload + fld.d $fa0, $sp, 376 # 8-byte Folded Reload fdiv.d $fa3, $fa0, $ft2 - fst.d $fa3, $sp, 656 # 8-byte Folded Spill - bge $fp, $a0, .LBB16_78 -# %bb.76: # in Loop: Header=BB16_6 Depth=1 - slli.d $a0, $ra, 2 + fst.d $fa3, $sp, 680 # 8-byte Folded Spill + bge $fp, $a0, .LBB16_79 +# %bb.77: # in Loop: Header=BB16_6 Depth=1 + slli.d $a0, $t0, 2 ldx.w $a0, $s4, $a0 ldx.w $a1, $s4, $s8 - bge $a0, $a1, .LBB16_79 -# %bb.77: # in Loop: Header=BB16_6 Depth=1 + bge $a0, $a1, .LBB16_80 +# %bb.78: # in Loop: Header=BB16_6 Depth=1 ldx.w $a0, $t2, $s8 slli.d $a0, $a0, 3 fldx.d $fa0, $s5, $a0 - fld.d $fa1, $sp, 248 # 8-byte Folded Reload + fld.d $fa1, $sp, 272 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa2, $fa0, $ft14 - b .LBB16_80 - .p2align 4, , 16 -.LBB16_78: # in Loop: Header=BB16_6 Depth=1 - fst.d $fa7, $sp, 792 # 8-byte Folded Spill - b .LBB16_82 + b .LBB16_81 .p2align 4, , 16 .LBB16_79: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa2, $sp, 248 # 8-byte Folded Reload + fst.d $fa7, $sp, 824 # 8-byte Folded Spill + b .LBB16_83 + .p2align 4, , 16 .LBB16_80: # in Loop: Header=BB16_6 Depth=1 + fld.d $fa2, $sp, 272 # 8-byte Folded Reload +.LBB16_81: # in Loop: Header=BB16_6 Depth=1 fmul.d $fa1, $ft2, $ft3 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB16_110 -.LBB16_81: # %.split1389 + bceqz $fcc0, .LBB16_111 +.LBB16_82: # %.split1389 # in Loop: Header=BB16_6 Depth=1 fabs.d $fa1, $fa3 fadd.d $fa0, $fa1, $fa0 @@ -4605,28 +4623,28 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fadd.d $fa0, $fa7, $fa0 fmul.d $fa0, $fa0, $ft14 fmul.d $fa0, $fa0, $ft14 - fst.d $fa0, $sp, 792 # 8-byte Folded Spill -.LBB16_82: # in Loop: Header=BB16_6 Depth=1 + fst.d $fa0, $sp, 824 # 8-byte Folded Spill +.LBB16_83: # in Loop: Header=BB16_6 Depth=1 fld.d $fa1, $sp, 776 # 8-byte Folded Reload fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - fld.d $fa7, $sp, 624 # 8-byte Folded Reload - bceqz $fcc0, .LBB16_103 -.LBB16_83: # %.split1391 + fld.d $fa7, $sp, 632 # 8-byte Folded Reload + bceqz $fcc0, .LBB16_104 +.LBB16_84: # %.split1391 # in Loop: Header=BB16_6 Depth=1 fadd.d $fa1, $ft12, $ft6 fmul.d $fa1, $fa1, $ft14 movgr2cf $fcc0, $s3 fsel $fa1, $ft12, $fa1, $fcc0 - fld.d $fa2, $sp, 736 # 8-byte Folded Reload + fld.d $fa2, $sp, 752 # 8-byte Folded Reload fadd.d $fa0, $fa2, $fa0 fsub.d $fa2, $ft13, $fa7 - fld.d $fa3, $sp, 728 # 8-byte Folded Reload + fld.d $fa3, $sp, 760 # 8-byte Folded Reload fsub.d $fa3, $fa7, $fa3 fsub.d $fs2, $fa1, $ft13 fmul.d $fa0, $fa0, $ft14 fmul.d $fa0, $fa5, $fa0 - fld.d $fa1, $sp, 784 # 8-byte Folded Reload + fld.d $fa1, $sp, 816 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa1 fsub.d $fa1, $ft8, $fa0 fmul.d $fa0, $fa0, $fa1 @@ -4648,44 +4666,44 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fsub.d $fa1, $ft8, $fa1 fmul.d $fa0, $fa1, $fa0 fmul.d $fs3, $fa2, $fa0 - bge $fp, $a0, .LBB16_86 -# %bb.84: # in Loop: Header=BB16_6 Depth=1 + bge $fp, $a0, .LBB16_87 +# %bb.85: # in Loop: Header=BB16_6 Depth=1 slli.d $a0, $a5, 2 ldx.w $a0, $s4, $a0 ldx.w $a1, $s4, $s2 - bge $a0, $a1, .LBB16_87 -# %bb.85: # in Loop: Header=BB16_6 Depth=1 + bge $a0, $a1, .LBB16_88 +# %bb.86: # in Loop: Header=BB16_6 Depth=1 ldx.w $a0, $t2, $s2 slli.d $a0, $a0, 3 - fldx.d $fa0, $s7, $a0 - fld.d $fa1, $sp, 256 # 8-byte Folded Reload + fldx.d $fa0, $t8, $a0 + fld.d $fa1, $sp, 280 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa3, $fa0, $ft14 - b .LBB16_88 - .p2align 4, , 16 -.LBB16_86: # in Loop: Header=BB16_6 Depth=1 - ld.d $s2, $sp, 584 # 8-byte Folded Reload - b .LBB16_90 + b .LBB16_89 .p2align 4, , 16 .LBB16_87: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa3, $sp, 256 # 8-byte Folded Reload + ld.d $s2, $sp, 616 # 8-byte Folded Reload + b .LBB16_91 + .p2align 4, , 16 .LBB16_88: # in Loop: Header=BB16_6 Depth=1 + fld.d $fa3, $sp, 280 # 8-byte Folded Reload +.LBB16_89: # in Loop: Header=BB16_6 Depth=1 fmul.d $fa1, $ft0, $ft3 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - ld.d $s2, $sp, 584 # 8-byte Folded Reload - bceqz $fcc0, .LBB16_111 -.LBB16_89: # %.split1393 + ld.d $s2, $sp, 616 # 8-byte Folded Reload + bceqz $fcc0, .LBB16_112 +.LBB16_90: # %.split1393 # in Loop: Header=BB16_6 Depth=1 - fld.d $fa1, $sp, 720 # 8-byte Folded Reload + fld.d $fa1, $sp, 688 # 8-byte Folded Reload fabs.d $fa1, $fa1 fadd.d $fa0, $fa1, $fa0 - fld.d $fa2, $sp, 800 # 8-byte Folded Reload + fld.d $fa2, $sp, 832 # 8-byte Folded Reload fsub.d $fa1, $ft13, $fa2 fsub.d $fa2, $fa2, $fa3 fmul.d $fa0, $fa0, $ft14 fmul.d $fa0, $fa5, $fa0 - fld.d $fa3, $sp, 784 # 8-byte Folded Reload + fld.d $fa3, $sp, 816 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa3 fsub.d $fa3, $ft8, $fa0 fmul.d $fa0, $fa0, $fa3 @@ -4709,14 +4727,14 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fadd.d $fa0, $fs3, $fa0 fmul.d $fa0, $fa0, $ft14 fmul.d $fs3, $fa0, $ft14 -.LBB16_90: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa1, $sp, 744 # 8-byte Folded Reload +.LBB16_91: # in Loop: Header=BB16_6 Depth=1 + fld.d $fa1, $sp, 768 # 8-byte Folded Reload fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB16_104 -.LBB16_91: # %.split1395 + bceqz $fcc0, .LBB16_105 +.LBB16_92: # %.split1395 # in Loop: Header=BB16_6 Depth=1 - fld.d $fa1, $sp, 800 # 8-byte Folded Reload + fld.d $fa1, $sp, 832 # 8-byte Folded Reload fadd.d $fa1, $fa7, $fa1 fmul.d $fa1, $fa1, $ft14 movgr2cf $fcc0, $s6 @@ -4749,39 +4767,39 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fmul.d $fa0, $fa1, $fa0 fmul.d $fs1, $fa2, $fa0 bge $fp, $a0, .LBB16_4 -# %bb.92: # in Loop: Header=BB16_6 Depth=1 - slli.d $a0, $ra, 2 +# %bb.93: # in Loop: Header=BB16_6 Depth=1 + slli.d $a0, $t0, 2 ldx.w $a0, $s4, $a0 ldx.w $a1, $s4, $s8 - ld.d $a6, $sp, 488 # 8-byte Folded Reload - ld.d $s6, $sp, 480 # 8-byte Folded Reload - fld.d $fa7, $sp, 768 # 8-byte Folded Reload - bge $a0, $a1, .LBB16_94 -# %bb.93: # in Loop: Header=BB16_6 Depth=1 + ld.d $s6, $sp, 208 # 8-byte Folded Reload + ld.d $a6, $sp, 512 # 8-byte Folded Reload + fld.d $fa7, $sp, 808 # 8-byte Folded Reload + bge $a0, $a1, .LBB16_95 +# %bb.94: # in Loop: Header=BB16_6 Depth=1 ldx.w $a0, $t2, $s8 slli.d $a0, $a0, 3 - fldx.d $fa0, $s7, $a0 - fld.d $fa1, $sp, 264 # 8-byte Folded Reload + fldx.d $fa0, $t8, $a0 + fld.d $fa1, $sp, 288 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa2, $fa0, $ft14 - b .LBB16_95 + b .LBB16_96 .p2align 4, , 16 -.LBB16_94: # in Loop: Header=BB16_6 Depth=1 - fld.d $fa2, $sp, 264 # 8-byte Folded Reload .LBB16_95: # in Loop: Header=BB16_6 Depth=1 + fld.d $fa2, $sp, 288 # 8-byte Folded Reload +.LBB16_96: # in Loop: Header=BB16_6 Depth=1 fmul.d $fa1, $ft2, $ft3 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - ld.d $a1, $sp, 528 # 8-byte Folded Reload - ld.d $a2, $sp, 520 # 8-byte Folded Reload - ld.d $a3, $sp, 512 # 8-byte Folded Reload - ld.d $a4, $sp, 504 # 8-byte Folded Reload - ld.d $a5, $sp, 496 # 8-byte Folded Reload - ld.d $s8, $sp, 192 # 8-byte Folded Reload - bceqz $fcc0, .LBB16_112 -.LBB16_96: # %.split1397 + ld.d $a1, $sp, 552 # 8-byte Folded Reload + ld.d $a2, $sp, 544 # 8-byte Folded Reload + ld.d $a3, $sp, 536 # 8-byte Folded Reload + ld.d $a4, $sp, 528 # 8-byte Folded Reload + ld.d $a5, $sp, 520 # 8-byte Folded Reload + ld.d $s8, $sp, 504 # 8-byte Folded Reload + bceqz $fcc0, .LBB16_113 +.LBB16_97: # %.split1397 # in Loop: Header=BB16_6 Depth=1 - fld.d $fa1, $sp, 656 # 8-byte Folded Reload + fld.d $fa1, $sp, 680 # 8-byte Folded Reload fabs.d $fa1, $fa1 fadd.d $fa0, $fa1, $fa0 fsub.d $fa1, $ft6, $ft13 @@ -4811,909 +4829,919 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fadd.d $fa0, $fs1, $fa0 fmul.d $fa0, $fa0, $ft14 fmul.d $fs1, $fa0, $ft14 - fld.d $fa2, $sp, 456 # 8-byte Folded Reload - fld.d $fa4, $sp, 424 # 8-byte Folded Reload + fld.d $fa2, $sp, 480 # 8-byte Folded Reload + fld.d $fa4, $sp, 448 # 8-byte Folded Reload b .LBB16_5 -.LBB16_97: # %call.sqrt +.LBB16_98: # %call.sqrt # in Loop: Header=BB16_6 Depth=1 - fld.d $fa0, $sp, 544 # 8-byte Folded Reload + fld.d $fa0, $sp, 568 # 8-byte Folded Reload st.d $t2, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 88 # 8-byte Folded Spill - st.d $t7, $sp, 80 # 8-byte Folded Spill + st.d $t6, $sp, 80 # 8-byte Folded Spill + st.d $t7, $sp, 48 # 8-byte Folded Spill + st.d $t8, $sp, 112 # 8-byte Folded Spill move $s2, $t5 - move $s8, $t1 - st.d $ra, $sp, 32 # 8-byte Folded Spill - fld.d $fa1, $sp, 808 # 8-byte Folded Reload - fst.d $fa1, $sp, 808 # 8-byte Folded Spill + move $s7, $t1 + st.d $t0, $sp, 24 # 8-byte Folded Spill + fld.d $fa1, $sp, 840 # 8-byte Folded Reload + fst.d $fa1, $sp, 840 # 8-byte Folded Spill fmov.d $fs6, $fa6 - st.d $t3, $sp, 24 # 8-byte Folded Spill - fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fmov.d $fs5, $ft11 - fst.d $ft12, $sp, 72 # 8-byte Folded Spill + fmov.d $fs5, $ft10 + fst.d $ft11, $sp, 72 # 8-byte Folded Spill + fst.d $ft12, $sp, 64 # 8-byte Folded Spill fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 112 # 8-byte Folded Spill - fst.d $ft3, $sp, 104 # 8-byte Folded Spill - fst.d $ft5, $sp, 96 # 8-byte Folded Spill - st.d $a6, $sp, 40 # 8-byte Folded Spill - fld.d $fa1, $sp, 640 # 8-byte Folded Reload - fst.d $fa1, $sp, 640 # 8-byte Folded Spill - st.d $t8, $sp, 16 # 8-byte Folded Spill - fst.d $fa7, $sp, 632 # 8-byte Folded Spill + fst.d $ft2, $sp, 104 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft5, $sp, 88 # 8-byte Folded Spill + st.d $a6, $sp, 32 # 8-byte Folded Spill + fst.d $fa7, $sp, 640 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fld.d $fa7, $sp, 632 # 8-byte Folded Reload - ld.d $t8, $sp, 16 # 8-byte Folded Reload - ld.d $a6, $sp, 40 # 8-byte Folded Reload - fld.d $ft5, $sp, 96 # 8-byte Folded Reload - fld.d $ft3, $sp, 104 # 8-byte Folded Reload - fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $fa7, $sp, 640 # 8-byte Folded Reload + ld.d $a6, $sp, 32 # 8-byte Folded Reload + fld.d $ft5, $sp, 88 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 104 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 680 # 8-byte Folded Reload - fld.d $ft12, $sp, 72 # 8-byte Folded Reload - fmov.d $ft11, $fs5 - fld.d $ft10, $sp, 56 # 8-byte Folded Reload - ld.d $t3, $sp, 24 # 8-byte Folded Reload + fld.d $ft0, $sp, 712 # 8-byte Folded Reload + fld.d $ft12, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 72 # 8-byte Folded Reload + fmov.d $ft10, $fs5 + ld.d $ra, $sp, 680 # 8-byte Folded Reload + ld.d $t3, $sp, 688 # 8-byte Folded Reload fmov.d $fa6, $fs6 - fld.d $fs6, $sp, 720 # 8-byte Folded Reload - fld.d $ft9, $sp, 664 # 8-byte Folded Reload - ld.d $a5, $sp, 672 # 8-byte Folded Reload - ld.d $ra, $sp, 32 # 8-byte Folded Reload - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload - move $t1, $s8 + fld.d $ft9, $sp, 696 # 8-byte Folded Reload + ld.d $a5, $sp, 704 # 8-byte Folded Reload + ld.d $t0, $sp, 24 # 8-byte Folded Reload + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload + move $t1, $s7 move $t5, $s2 - ld.d $a7, $sp, 696 # 8-byte Folded Reload + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 - ld.d $t7, $sp, 80 # 8-byte Folded Reload - ld.d $t6, $sp, 88 # 8-byte Folded Reload + ld.d $t8, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 48 # 8-byte Folded Reload + ld.d $t6, $sp, 80 # 8-byte Folded Reload ld.d $t4, $sp, 120 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload + fld.d $fa5, $sp, 224 # 8-byte Folded Reload b .LBB16_33 -.LBB16_98: # %call.sqrt1372 +.LBB16_99: # %call.sqrt1372 # in Loop: Header=BB16_6 Depth=1 - fld.d $fa0, $sp, 536 # 8-byte Folded Reload + fld.d $fa0, $sp, 560 # 8-byte Folded Reload st.d $t2, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 88 # 8-byte Folded Spill - st.d $t7, $sp, 80 # 8-byte Folded Spill - move $s8, $t5 - st.d $t1, $sp, 8 # 8-byte Folded Spill - st.d $ra, $sp, 32 # 8-byte Folded Spill - fst.d $fa6, $sp, 64 # 8-byte Folded Spill - st.d $t3, $sp, 24 # 8-byte Folded Spill + st.d $t6, $sp, 80 # 8-byte Folded Spill + st.d $t7, $sp, 48 # 8-byte Folded Spill + st.d $t8, $sp, 112 # 8-byte Folded Spill + move $s7, $t5 + st.d $t1, $sp, 16 # 8-byte Folded Spill + st.d $t0, $sp, 24 # 8-byte Folded Spill + fst.d $fa6, $sp, 40 # 8-byte Folded Spill fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 48 # 8-byte Folded Spill - fst.d $ft12, $sp, 72 # 8-byte Folded Spill + fst.d $ft11, $sp, 72 # 8-byte Folded Spill + fst.d $ft12, $sp, 64 # 8-byte Folded Spill fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 112 # 8-byte Folded Spill - fst.d $ft3, $sp, 104 # 8-byte Folded Spill - fst.d $ft4, $sp, 304 # 8-byte Folded Spill - fst.d $ft5, $sp, 96 # 8-byte Folded Spill - st.d $a6, $sp, 40 # 8-byte Folded Spill - st.d $t8, $sp, 16 # 8-byte Folded Spill + fst.d $ft2, $sp, 104 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 320 # 8-byte Folded Spill + fst.d $ft5, $sp, 88 # 8-byte Folded Spill + st.d $a6, $sp, 32 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $t8, $sp, 16 # 8-byte Folded Reload - fld.d $fa3, $sp, 552 # 8-byte Folded Reload - ld.d $a6, $sp, 40 # 8-byte Folded Reload - fld.d $ft5, $sp, 96 # 8-byte Folded Reload - fld.d $ft4, $sp, 304 # 8-byte Folded Reload - fld.d $ft3, $sp, 104 # 8-byte Folded Reload - fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $fa3, $sp, 576 # 8-byte Folded Reload + ld.d $a6, $sp, 32 # 8-byte Folded Reload + fld.d $ft5, $sp, 88 # 8-byte Folded Reload + fld.d $ft4, $sp, 320 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 104 # 8-byte Folded Reload + ld.d $a4, $sp, 656 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 680 # 8-byte Folded Reload - ld.d $a4, $sp, 648 # 8-byte Folded Reload - fld.d $ft12, $sp, 72 # 8-byte Folded Reload - fld.d $ft11, $sp, 48 # 8-byte Folded Reload + fld.d $ft0, $sp, 712 # 8-byte Folded Reload + fld.d $ft12, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 72 # 8-byte Folded Reload fld.d $ft10, $sp, 56 # 8-byte Folded Reload - ld.d $a3, $sp, 656 # 8-byte Folded Reload - ld.d $t3, $sp, 24 # 8-byte Folded Reload - fld.d $fa6, $sp, 64 # 8-byte Folded Reload - fld.d $ft9, $sp, 664 # 8-byte Folded Reload - ld.d $a5, $sp, 672 # 8-byte Folded Reload - ld.d $ra, $sp, 32 # 8-byte Folded Reload - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload - ld.d $t1, $sp, 8 # 8-byte Folded Reload - move $t5, $s8 - ld.d $a7, $sp, 696 # 8-byte Folded Reload + ld.d $a3, $sp, 664 # 8-byte Folded Reload + ld.d $ra, $sp, 680 # 8-byte Folded Reload + ld.d $t3, $sp, 688 # 8-byte Folded Reload + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 696 # 8-byte Folded Reload + ld.d $a5, $sp, 704 # 8-byte Folded Reload + ld.d $t0, $sp, 24 # 8-byte Folded Reload + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload + ld.d $t1, $sp, 16 # 8-byte Folded Reload + move $t5, $s7 + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 - ld.d $t7, $sp, 80 # 8-byte Folded Reload - ld.d $t6, $sp, 88 # 8-byte Folded Reload + ld.d $t8, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 48 # 8-byte Folded Reload + ld.d $t6, $sp, 80 # 8-byte Folded Reload ld.d $t4, $sp, 120 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload + fld.d $fa5, $sp, 224 # 8-byte Folded Reload b .LBB16_42 -.LBB16_99: # %call.sqrt1376 +.LBB16_100: # %call.sqrt1376 # in Loop: Header=BB16_6 Depth=1 fmov.d $fa0, $fa1 st.d $t2, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 88 # 8-byte Folded Spill - st.d $t7, $sp, 80 # 8-byte Folded Spill - st.d $t5, $sp, 640 # 8-byte Folded Spill - st.d $t1, $sp, 8 # 8-byte Folded Spill - st.d $ra, $sp, 32 # 8-byte Folded Spill - fmov.d $fs3, $fa6 - st.d $t3, $sp, 24 # 8-byte Folded Spill + st.d $t6, $sp, 80 # 8-byte Folded Spill + st.d $t7, $sp, 48 # 8-byte Folded Spill + st.d $t8, $sp, 112 # 8-byte Folded Spill + st.d $t5, $sp, 424 # 8-byte Folded Spill + st.d $t1, $sp, 16 # 8-byte Folded Spill + st.d $t0, $sp, 24 # 8-byte Folded Spill + fmov.d $fs2, $fa6 fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 48 # 8-byte Folded Spill - fst.d $ft12, $sp, 72 # 8-byte Folded Spill + fst.d $ft11, $sp, 72 # 8-byte Folded Spill + fst.d $ft12, $sp, 64 # 8-byte Folded Spill fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 112 # 8-byte Folded Spill - fst.d $ft3, $sp, 104 # 8-byte Folded Spill - fst.d $ft4, $sp, 304 # 8-byte Folded Spill - fst.d $ft5, $sp, 96 # 8-byte Folded Spill - st.d $a6, $sp, 40 # 8-byte Folded Spill - st.d $t8, $sp, 16 # 8-byte Folded Spill + fst.d $ft2, $sp, 104 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 320 # 8-byte Folded Spill + fst.d $ft5, $sp, 88 # 8-byte Folded Spill + st.d $a6, $sp, 32 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $t8, $sp, 16 # 8-byte Folded Reload - ld.d $a6, $sp, 40 # 8-byte Folded Reload - fld.d $ft5, $sp, 96 # 8-byte Folded Reload - fld.d $ft4, $sp, 304 # 8-byte Folded Reload - fld.d $ft3, $sp, 104 # 8-byte Folded Reload - fld.d $ft2, $sp, 112 # 8-byte Folded Reload + ld.d $a6, $sp, 32 # 8-byte Folded Reload + fld.d $ft5, $sp, 88 # 8-byte Folded Reload + fld.d $ft4, $sp, 320 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 104 # 8-byte Folded Reload + ld.d $a4, $sp, 656 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 680 # 8-byte Folded Reload - ld.d $a4, $sp, 648 # 8-byte Folded Reload - fld.d $ft12, $sp, 72 # 8-byte Folded Reload - fld.d $ft11, $sp, 48 # 8-byte Folded Reload + fld.d $ft0, $sp, 712 # 8-byte Folded Reload + fld.d $ft12, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 72 # 8-byte Folded Reload fld.d $ft10, $sp, 56 # 8-byte Folded Reload - ld.d $a3, $sp, 656 # 8-byte Folded Reload - ld.d $t3, $sp, 24 # 8-byte Folded Reload - fmov.d $fa6, $fs3 - fld.d $fs3, $sp, 592 # 8-byte Folded Reload - fld.d $fa7, $sp, 608 # 8-byte Folded Reload - fld.d $ft9, $sp, 664 # 8-byte Folded Reload - ld.d $a5, $sp, 672 # 8-byte Folded Reload - ld.d $ra, $sp, 32 # 8-byte Folded Reload - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload - ld.d $t1, $sp, 8 # 8-byte Folded Reload - ld.d $t5, $sp, 640 # 8-byte Folded Reload - ld.d $a7, $sp, 696 # 8-byte Folded Reload + ld.d $a3, $sp, 664 # 8-byte Folded Reload + ld.d $ra, $sp, 680 # 8-byte Folded Reload + ld.d $t3, $sp, 688 # 8-byte Folded Reload + fmov.d $fa6, $fs2 + fld.d $fs2, $sp, 672 # 8-byte Folded Reload + fld.d $fa7, $sp, 648 # 8-byte Folded Reload + fld.d $ft9, $sp, 696 # 8-byte Folded Reload + ld.d $a5, $sp, 704 # 8-byte Folded Reload + ld.d $t0, $sp, 24 # 8-byte Folded Reload + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload + ld.d $t1, $sp, 16 # 8-byte Folded Reload + ld.d $t5, $sp, 424 # 8-byte Folded Reload + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 - ld.d $t7, $sp, 80 # 8-byte Folded Reload - ld.d $t6, $sp, 88 # 8-byte Folded Reload + ld.d $t8, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 48 # 8-byte Folded Reload + ld.d $t6, $sp, 80 # 8-byte Folded Reload ld.d $t4, $sp, 120 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload - b .LBB16_49 -.LBB16_100: # %call.sqrt1380 + fld.d $fa5, $sp, 224 # 8-byte Folded Reload + b .LBB16_50 +.LBB16_101: # %call.sqrt1380 # in Loop: Header=BB16_6 Depth=1 fmov.d $fa0, $fa1 st.d $t2, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 88 # 8-byte Folded Spill - st.d $t7, $sp, 80 # 8-byte Folded Spill + st.d $t6, $sp, 80 # 8-byte Folded Spill + st.d $t7, $sp, 48 # 8-byte Folded Spill + st.d $t8, $sp, 112 # 8-byte Folded Spill move $s1, $t5 move $s2, $t1 - move $s3, $ra - fst.d $fa6, $sp, 64 # 8-byte Folded Spill - st.d $t3, $sp, 24 # 8-byte Folded Spill - fmov.d $fs3, $ft10 - fst.d $ft11, $sp, 48 # 8-byte Folded Spill - fst.d $ft12, $sp, 72 # 8-byte Folded Spill + move $s3, $t0 + fmov.d $fs2, $fa6 + fst.d $ft10, $sp, 56 # 8-byte Folded Spill + fst.d $ft11, $sp, 72 # 8-byte Folded Spill + fst.d $ft12, $sp, 64 # 8-byte Folded Spill fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 112 # 8-byte Folded Spill - fst.d $ft3, $sp, 104 # 8-byte Folded Spill - fst.d $ft4, $sp, 304 # 8-byte Folded Spill - fst.d $ft5, $sp, 96 # 8-byte Folded Spill - st.d $a6, $sp, 40 # 8-byte Folded Spill - st.d $t8, $sp, 16 # 8-byte Folded Spill + fst.d $ft2, $sp, 104 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 320 # 8-byte Folded Spill + fst.d $ft5, $sp, 88 # 8-byte Folded Spill + st.d $a6, $sp, 32 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $t8, $sp, 16 # 8-byte Folded Reload - ld.d $a6, $sp, 40 # 8-byte Folded Reload - fld.d $ft5, $sp, 96 # 8-byte Folded Reload - fld.d $ft4, $sp, 304 # 8-byte Folded Reload - fld.d $ft3, $sp, 104 # 8-byte Folded Reload - fld.d $ft2, $sp, 112 # 8-byte Folded Reload + ld.d $a6, $sp, 32 # 8-byte Folded Reload + fld.d $ft5, $sp, 88 # 8-byte Folded Reload + fld.d $ft4, $sp, 320 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 104 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 680 # 8-byte Folded Reload - fld.d $ft12, $sp, 72 # 8-byte Folded Reload - fld.d $ft11, $sp, 48 # 8-byte Folded Reload - fmov.d $ft10, $fs3 - fld.d $fs3, $sp, 592 # 8-byte Folded Reload - fld.d $fa7, $sp, 608 # 8-byte Folded Reload - ld.d $a3, $sp, 656 # 8-byte Folded Reload - ld.d $t3, $sp, 24 # 8-byte Folded Reload - fld.d $fa6, $sp, 64 # 8-byte Folded Reload - fld.d $ft9, $sp, 664 # 8-byte Folded Reload - ld.d $a5, $sp, 672 # 8-byte Folded Reload - move $ra, $s3 - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload + fld.d $ft0, $sp, 712 # 8-byte Folded Reload + fld.d $ft12, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 72 # 8-byte Folded Reload + fld.d $ft10, $sp, 56 # 8-byte Folded Reload + ld.d $a3, $sp, 664 # 8-byte Folded Reload + ld.d $ra, $sp, 680 # 8-byte Folded Reload + ld.d $t3, $sp, 688 # 8-byte Folded Reload + fmov.d $fa6, $fs2 + fld.d $fs2, $sp, 672 # 8-byte Folded Reload + fld.d $fa7, $sp, 648 # 8-byte Folded Reload + fld.d $ft9, $sp, 696 # 8-byte Folded Reload + ld.d $a5, $sp, 704 # 8-byte Folded Reload + move $t0, $s3 + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload move $t1, $s2 move $t5, $s1 - ld.d $a7, $sp, 696 # 8-byte Folded Reload + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 - ld.d $t7, $sp, 80 # 8-byte Folded Reload - ld.d $t6, $sp, 88 # 8-byte Folded Reload + ld.d $t8, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 48 # 8-byte Folded Reload + ld.d $t6, $sp, 80 # 8-byte Folded Reload ld.d $t4, $sp, 120 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload - b .LBB16_56 -.LBB16_101: # %call.sqrt1384 + fld.d $fa5, $sp, 224 # 8-byte Folded Reload + b .LBB16_57 +.LBB16_102: # %call.sqrt1384 # in Loop: Header=BB16_6 Depth=1 fld.d $fa0, $sp, 776 # 8-byte Folded Reload st.d $t2, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 88 # 8-byte Folded Spill - st.d $t7, $sp, 80 # 8-byte Folded Spill + st.d $t6, $sp, 80 # 8-byte Folded Spill + move $s8, $t7 + st.d $t8, $sp, 112 # 8-byte Folded Spill move $s3, $t5 move $s6, $t1 - move $s8, $ra - fst.d $fa6, $sp, 64 # 8-byte Folded Spill + move $s7, $t0 + fst.d $fa6, $sp, 40 # 8-byte Folded Spill fmov.d $fs3, $ft10 fmov.d $fs4, $ft11 fmov.d $fs2, $ft12 fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 112 # 8-byte Folded Spill - fst.d $ft3, $sp, 104 # 8-byte Folded Spill - fst.d $ft4, $sp, 304 # 8-byte Folded Spill - fst.d $ft5, $sp, 96 # 8-byte Folded Spill - st.d $a6, $sp, 40 # 8-byte Folded Spill + fst.d $ft2, $sp, 104 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 320 # 8-byte Folded Spill + fst.d $ft5, $sp, 88 # 8-byte Folded Spill + st.d $a6, $sp, 32 # 8-byte Folded Spill + fst.d $fa4, $sp, 600 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $a6, $sp, 40 # 8-byte Folded Reload - fld.d $fa2, $sp, 568 # 8-byte Folded Reload - fld.d $ft5, $sp, 96 # 8-byte Folded Reload - fld.d $ft4, $sp, 304 # 8-byte Folded Reload - fld.d $ft3, $sp, 104 # 8-byte Folded Reload - fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $fa4, $sp, 600 # 8-byte Folded Reload + ld.d $a6, $sp, 32 # 8-byte Folded Reload + fld.d $fa2, $sp, 592 # 8-byte Folded Reload + fld.d $ft5, $sp, 88 # 8-byte Folded Reload + fld.d $ft4, $sp, 320 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 104 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 680 # 8-byte Folded Reload + fld.d $ft0, $sp, 712 # 8-byte Folded Reload fmov.d $ft12, $fs2 - fld.d $fs2, $sp, 600 # 8-byte Folded Reload - fld.d $fa7, $sp, 616 # 8-byte Folded Reload + fld.d $fs2, $sp, 672 # 8-byte Folded Reload fmov.d $ft11, $fs4 fmov.d $ft10, $fs3 - fld.d $fa6, $sp, 64 # 8-byte Folded Reload - fld.d $ft9, $sp, 664 # 8-byte Folded Reload - ld.d $a5, $sp, 672 # 8-byte Folded Reload - move $ra, $s8 - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload + fld.d $fa7, $sp, 624 # 8-byte Folded Reload + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 696 # 8-byte Folded Reload + ld.d $a5, $sp, 704 # 8-byte Folded Reload + move $t0, $s7 + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload move $t1, $s6 move $t5, $s3 - ld.d $a7, $sp, 696 # 8-byte Folded Reload + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 - ld.d $t7, $sp, 80 # 8-byte Folded Reload - ld.d $t6, $sp, 88 # 8-byte Folded Reload + ld.d $t8, $sp, 112 # 8-byte Folded Reload + move $t7, $s8 + ld.d $t6, $sp, 80 # 8-byte Folded Reload ld.d $t4, $sp, 120 # 8-byte Folded Reload + ld.d $s7, $sp, 216 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload - b .LBB16_66 -.LBB16_102: # %call.sqrt1388 + fld.d $fa5, $sp, 224 # 8-byte Folded Reload + b .LBB16_67 +.LBB16_103: # %call.sqrt1388 # in Loop: Header=BB16_6 Depth=1 - fld.d $fa0, $sp, 744 # 8-byte Folded Reload + fld.d $fa0, $sp, 768 # 8-byte Folded Reload st.d $t2, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 88 # 8-byte Folded Spill - st.d $t7, $sp, 80 # 8-byte Folded Spill - move $s8, $t5 - st.d $t1, $sp, 8 # 8-byte Folded Spill - st.d $ra, $sp, 32 # 8-byte Folded Spill - fst.d $fa6, $sp, 64 # 8-byte Folded Spill + st.d $t6, $sp, 80 # 8-byte Folded Spill + st.d $t7, $sp, 48 # 8-byte Folded Spill + st.d $t8, $sp, 112 # 8-byte Folded Spill + move $s7, $t5 + move $s8, $t1 + st.d $t0, $sp, 24 # 8-byte Folded Spill + fst.d $fa6, $sp, 40 # 8-byte Folded Spill fmov.d $fs3, $ft10 fmov.d $fs4, $ft11 fmov.d $fs2, $ft12 fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 112 # 8-byte Folded Spill - fst.d $ft3, $sp, 104 # 8-byte Folded Spill - fst.d $ft4, $sp, 304 # 8-byte Folded Spill - fst.d $ft5, $sp, 96 # 8-byte Folded Spill - fst.d $fa4, $sp, 576 # 8-byte Folded Spill - fmov.d $fs1, $ft15 + fst.d $ft2, $sp, 104 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 320 # 8-byte Folded Spill + fst.d $ft5, $sp, 88 # 8-byte Folded Spill + fst.d $fa4, $sp, 608 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fmov.d $ft15, $fs1 - fld.d $fa4, $sp, 576 # 8-byte Folded Reload - fld.d $fa2, $sp, 560 # 8-byte Folded Reload - fld.d $ft5, $sp, 96 # 8-byte Folded Reload - fld.d $ft4, $sp, 304 # 8-byte Folded Reload - fld.d $ft3, $sp, 104 # 8-byte Folded Reload - fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $fa4, $sp, 608 # 8-byte Folded Reload + fld.d $fa2, $sp, 584 # 8-byte Folded Reload + fld.d $ft5, $sp, 88 # 8-byte Folded Reload + fld.d $ft4, $sp, 320 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 104 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 680 # 8-byte Folded Reload + fld.d $ft0, $sp, 712 # 8-byte Folded Reload fmov.d $ft12, $fs2 - fld.d $fs2, $sp, 600 # 8-byte Folded Reload - fld.d $fa7, $sp, 616 # 8-byte Folded Reload + fld.d $fs2, $sp, 672 # 8-byte Folded Reload fmov.d $ft11, $fs4 fmov.d $ft10, $fs3 - fld.d $fa6, $sp, 64 # 8-byte Folded Reload - fld.d $ft9, $sp, 664 # 8-byte Folded Reload - ld.d $a5, $sp, 672 # 8-byte Folded Reload - ld.d $ra, $sp, 32 # 8-byte Folded Reload - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload - ld.d $t1, $sp, 8 # 8-byte Folded Reload - move $t5, $s8 - ld.d $a7, $sp, 696 # 8-byte Folded Reload + fld.d $fa7, $sp, 624 # 8-byte Folded Reload + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 696 # 8-byte Folded Reload + ld.d $a5, $sp, 704 # 8-byte Folded Reload + ld.d $t0, $sp, 24 # 8-byte Folded Reload + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload + move $t1, $s8 + move $t5, $s7 + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 - ld.d $t7, $sp, 80 # 8-byte Folded Reload - ld.d $t6, $sp, 88 # 8-byte Folded Reload + ld.d $t8, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 48 # 8-byte Folded Reload + ld.d $t6, $sp, 80 # 8-byte Folded Reload ld.d $t4, $sp, 120 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload - b .LBB16_75 -.LBB16_103: # %call.sqrt1392 + ld.d $s7, $sp, 216 # 8-byte Folded Reload + fld.d $fa5, $sp, 224 # 8-byte Folded Reload + b .LBB16_76 +.LBB16_104: # %call.sqrt1392 # in Loop: Header=BB16_6 Depth=1 fmov.d $fa0, $fa1 st.d $t2, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 88 # 8-byte Folded Spill - st.d $t7, $sp, 80 # 8-byte Folded Spill - st.d $t5, $sp, 640 # 8-byte Folded Spill - st.d $t1, $sp, 8 # 8-byte Folded Spill - st.d $ra, $sp, 32 # 8-byte Folded Spill + st.d $t6, $sp, 80 # 8-byte Folded Spill + st.d $t7, $sp, 48 # 8-byte Folded Spill + st.d $t8, $sp, 112 # 8-byte Folded Spill + move $s7, $t5 + st.d $t1, $sp, 16 # 8-byte Folded Spill + st.d $t0, $sp, 24 # 8-byte Folded Spill fmov.d $fs3, $fa6 fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 48 # 8-byte Folded Spill - fst.d $ft12, $sp, 72 # 8-byte Folded Spill + fst.d $ft11, $sp, 72 # 8-byte Folded Spill + fst.d $ft12, $sp, 64 # 8-byte Folded Spill fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 112 # 8-byte Folded Spill - fst.d $ft3, $sp, 104 # 8-byte Folded Spill - fst.d $ft4, $sp, 304 # 8-byte Folded Spill - fst.d $ft5, $sp, 96 # 8-byte Folded Spill - fmov.d $fs2, $ft15 + fst.d $ft2, $sp, 104 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 320 # 8-byte Folded Spill + fst.d $ft5, $sp, 88 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fmov.d $ft15, $fs2 - fld.d $ft5, $sp, 96 # 8-byte Folded Reload - fld.d $ft4, $sp, 304 # 8-byte Folded Reload - fld.d $ft3, $sp, 104 # 8-byte Folded Reload - fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft5, $sp, 88 # 8-byte Folded Reload + fld.d $ft4, $sp, 320 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 104 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 680 # 8-byte Folded Reload - fld.d $ft12, $sp, 72 # 8-byte Folded Reload - fld.d $ft11, $sp, 48 # 8-byte Folded Reload + fld.d $ft0, $sp, 712 # 8-byte Folded Reload + fld.d $ft12, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 72 # 8-byte Folded Reload fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $fa7, $sp, 624 # 8-byte Folded Reload + fld.d $fa7, $sp, 632 # 8-byte Folded Reload fmov.d $fa6, $fs3 - ld.d $a5, $sp, 672 # 8-byte Folded Reload - ld.d $ra, $sp, 32 # 8-byte Folded Reload - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload - ld.d $t1, $sp, 8 # 8-byte Folded Reload - ld.d $t5, $sp, 640 # 8-byte Folded Reload - ld.d $a7, $sp, 696 # 8-byte Folded Reload + ld.d $a5, $sp, 704 # 8-byte Folded Reload + ld.d $t0, $sp, 24 # 8-byte Folded Reload + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload + ld.d $t1, $sp, 16 # 8-byte Folded Reload + move $t5, $s7 + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 - ld.d $t7, $sp, 80 # 8-byte Folded Reload - ld.d $t6, $sp, 88 # 8-byte Folded Reload + ld.d $t8, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 48 # 8-byte Folded Reload + ld.d $t6, $sp, 80 # 8-byte Folded Reload ld.d $t4, $sp, 120 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload - b .LBB16_83 -.LBB16_104: # %call.sqrt1396 + ld.d $s7, $sp, 216 # 8-byte Folded Reload + fld.d $fa5, $sp, 224 # 8-byte Folded Reload + b .LBB16_84 +.LBB16_105: # %call.sqrt1396 # in Loop: Header=BB16_6 Depth=1 fmov.d $fa0, $fa1 st.d $t2, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 88 # 8-byte Folded Spill - st.d $t7, $sp, 80 # 8-byte Folded Spill + st.d $t6, $sp, 80 # 8-byte Folded Spill + move $s7, $t7 + st.d $t8, $sp, 112 # 8-byte Folded Spill move $s1, $t5 move $s2, $t1 - move $s3, $ra - fst.d $fa6, $sp, 64 # 8-byte Folded Spill + move $s3, $t0 + fst.d $fa6, $sp, 40 # 8-byte Folded Spill fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 48 # 8-byte Folded Spill - fst.d $ft12, $sp, 72 # 8-byte Folded Spill + fst.d $ft11, $sp, 72 # 8-byte Folded Spill + fst.d $ft12, $sp, 64 # 8-byte Folded Spill fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 112 # 8-byte Folded Spill - fst.d $ft3, $sp, 104 # 8-byte Folded Spill - fst.d $ft4, $sp, 304 # 8-byte Folded Spill - fst.d $ft5, $sp, 96 # 8-byte Folded Spill - fmov.d $fs2, $ft15 + fst.d $ft2, $sp, 104 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 320 # 8-byte Folded Spill + fst.d $ft5, $sp, 88 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fmov.d $ft15, $fs2 - fld.d $ft5, $sp, 96 # 8-byte Folded Reload - fld.d $ft4, $sp, 304 # 8-byte Folded Reload - fld.d $ft3, $sp, 104 # 8-byte Folded Reload - fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft5, $sp, 88 # 8-byte Folded Reload + fld.d $ft4, $sp, 320 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 104 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft12, $sp, 72 # 8-byte Folded Reload - fld.d $ft11, $sp, 48 # 8-byte Folded Reload + fld.d $ft12, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 72 # 8-byte Folded Reload fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $fa7, $sp, 624 # 8-byte Folded Reload - fld.d $fa6, $sp, 64 # 8-byte Folded Reload - move $ra, $s3 - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload + fld.d $fa7, $sp, 632 # 8-byte Folded Reload + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + move $t0, $s3 + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload move $t1, $s2 move $t5, $s1 - ld.d $a7, $sp, 696 # 8-byte Folded Reload + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 - ld.d $t7, $sp, 80 # 8-byte Folded Reload - ld.d $t6, $sp, 88 # 8-byte Folded Reload - ld.d $s2, $sp, 584 # 8-byte Folded Reload + ld.d $t8, $sp, 112 # 8-byte Folded Reload + move $t7, $s7 + ld.d $s7, $sp, 216 # 8-byte Folded Reload + ld.d $t6, $sp, 80 # 8-byte Folded Reload + ld.d $s2, $sp, 616 # 8-byte Folded Reload ld.d $t4, $sp, 120 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload - b .LBB16_91 -.LBB16_105: # %call.sqrt1370 + fld.d $fa5, $sp, 224 # 8-byte Folded Reload + b .LBB16_92 +.LBB16_106: # %call.sqrt1370 # in Loop: Header=BB16_6 Depth=1 fmov.d $fa0, $fa1 st.d $t2, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 88 # 8-byte Folded Spill - st.d $t7, $sp, 80 # 8-byte Folded Spill - move $s8, $t5 - st.d $t1, $sp, 8 # 8-byte Folded Spill - st.d $ra, $sp, 32 # 8-byte Folded Spill - fld.d $fa1, $sp, 808 # 8-byte Folded Reload - fst.d $fa1, $sp, 808 # 8-byte Folded Spill - fst.d $fa6, $sp, 64 # 8-byte Folded Spill - st.d $t3, $sp, 24 # 8-byte Folded Spill + st.d $t6, $sp, 80 # 8-byte Folded Spill + st.d $t7, $sp, 48 # 8-byte Folded Spill + st.d $t8, $sp, 112 # 8-byte Folded Spill + move $s7, $t5 + st.d $t1, $sp, 16 # 8-byte Folded Spill + st.d $t0, $sp, 24 # 8-byte Folded Spill + fld.d $fa1, $sp, 840 # 8-byte Folded Reload + fst.d $fa1, $sp, 840 # 8-byte Folded Spill + fst.d $fa6, $sp, 40 # 8-byte Folded Spill fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 48 # 8-byte Folded Spill - fst.d $ft12, $sp, 72 # 8-byte Folded Spill + fst.d $ft11, $sp, 72 # 8-byte Folded Spill + fst.d $ft12, $sp, 64 # 8-byte Folded Spill fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 112 # 8-byte Folded Spill - fst.d $ft3, $sp, 104 # 8-byte Folded Spill - fst.d $ft4, $sp, 304 # 8-byte Folded Spill - fst.d $ft5, $sp, 96 # 8-byte Folded Spill - fst.d $fa3, $sp, 216 # 8-byte Folded Spill - st.d $a6, $sp, 40 # 8-byte Folded Spill - fld.d $fa1, $sp, 640 # 8-byte Folded Reload - fst.d $fa1, $sp, 640 # 8-byte Folded Spill - st.d $t8, $sp, 16 # 8-byte Folded Spill - fst.d $fa7, $sp, 632 # 8-byte Folded Spill + fst.d $ft2, $sp, 104 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 320 # 8-byte Folded Spill + fst.d $ft5, $sp, 88 # 8-byte Folded Spill + fst.d $fa3, $sp, 240 # 8-byte Folded Spill + st.d $a6, $sp, 32 # 8-byte Folded Spill + fst.d $fa7, $sp, 640 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fld.d $fa2, $sp, 320 # 8-byte Folded Reload - fld.d $fa7, $sp, 632 # 8-byte Folded Reload - ld.d $t8, $sp, 16 # 8-byte Folded Reload - ld.d $a6, $sp, 40 # 8-byte Folded Reload - fld.d $fa3, $sp, 216 # 8-byte Folded Reload - fld.d $ft5, $sp, 96 # 8-byte Folded Reload - fld.d $ft4, $sp, 304 # 8-byte Folded Reload - fld.d $ft3, $sp, 104 # 8-byte Folded Reload - fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $fa2, $sp, 336 # 8-byte Folded Reload + fld.d $fa7, $sp, 640 # 8-byte Folded Reload + ld.d $a6, $sp, 32 # 8-byte Folded Reload + fld.d $fa3, $sp, 240 # 8-byte Folded Reload + fld.d $ft5, $sp, 88 # 8-byte Folded Reload + fld.d $ft4, $sp, 320 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 104 # 8-byte Folded Reload + ld.d $a4, $sp, 656 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 680 # 8-byte Folded Reload - ld.d $a4, $sp, 648 # 8-byte Folded Reload - fld.d $ft12, $sp, 72 # 8-byte Folded Reload - fld.d $ft11, $sp, 48 # 8-byte Folded Reload + fld.d $ft0, $sp, 712 # 8-byte Folded Reload + fld.d $ft12, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 72 # 8-byte Folded Reload fld.d $ft10, $sp, 56 # 8-byte Folded Reload - ld.d $a3, $sp, 656 # 8-byte Folded Reload - ld.d $t3, $sp, 24 # 8-byte Folded Reload - fld.d $fa6, $sp, 64 # 8-byte Folded Reload - fld.d $ft9, $sp, 664 # 8-byte Folded Reload - ld.d $a5, $sp, 672 # 8-byte Folded Reload - ld.d $ra, $sp, 32 # 8-byte Folded Reload - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload - ld.d $t1, $sp, 8 # 8-byte Folded Reload - move $t5, $s8 - ld.d $a7, $sp, 696 # 8-byte Folded Reload + ld.d $a3, $sp, 664 # 8-byte Folded Reload + ld.d $ra, $sp, 680 # 8-byte Folded Reload + ld.d $t3, $sp, 688 # 8-byte Folded Reload + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 696 # 8-byte Folded Reload + ld.d $a5, $sp, 704 # 8-byte Folded Reload + ld.d $t0, $sp, 24 # 8-byte Folded Reload + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload + ld.d $t1, $sp, 16 # 8-byte Folded Reload + move $t5, $s7 + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 - ld.d $t7, $sp, 80 # 8-byte Folded Reload - ld.d $t6, $sp, 88 # 8-byte Folded Reload + ld.d $t8, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 48 # 8-byte Folded Reload + ld.d $t6, $sp, 80 # 8-byte Folded Reload ld.d $t4, $sp, 120 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload + fld.d $fa5, $sp, 224 # 8-byte Folded Reload b .LBB16_38 -.LBB16_106: # %call.sqrt1374 +.LBB16_107: # %call.sqrt1374 # in Loop: Header=BB16_6 Depth=1 fmov.d $fa0, $fa1 st.d $t2, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 88 # 8-byte Folded Spill - st.d $t7, $sp, 80 # 8-byte Folded Spill - st.d $t5, $sp, 640 # 8-byte Folded Spill - st.d $t1, $sp, 8 # 8-byte Folded Spill - st.d $ra, $sp, 32 # 8-byte Folded Spill - fst.d $fa7, $sp, 752 # 8-byte Folded Spill - fst.d $fa6, $sp, 64 # 8-byte Folded Spill - st.d $t3, $sp, 24 # 8-byte Folded Spill + st.d $t6, $sp, 80 # 8-byte Folded Spill + st.d $t7, $sp, 48 # 8-byte Folded Spill + st.d $t8, $sp, 112 # 8-byte Folded Spill + st.d $t5, $sp, 424 # 8-byte Folded Spill + st.d $t1, $sp, 16 # 8-byte Folded Spill + st.d $t0, $sp, 24 # 8-byte Folded Spill + fst.d $fa6, $sp, 40 # 8-byte Folded Spill fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 48 # 8-byte Folded Spill - fst.d $ft12, $sp, 72 # 8-byte Folded Spill + fst.d $ft11, $sp, 72 # 8-byte Folded Spill + fst.d $ft12, $sp, 64 # 8-byte Folded Spill fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 112 # 8-byte Folded Spill - fst.d $ft3, $sp, 104 # 8-byte Folded Spill - fst.d $ft4, $sp, 304 # 8-byte Folded Spill - fst.d $fa3, $sp, 224 # 8-byte Folded Spill - fst.d $ft5, $sp, 96 # 8-byte Folded Spill - st.d $a6, $sp, 40 # 8-byte Folded Spill - st.d $t8, $sp, 16 # 8-byte Folded Spill + fst.d $ft2, $sp, 104 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 320 # 8-byte Folded Spill + fst.d $fa3, $sp, 248 # 8-byte Folded Spill + fst.d $ft5, $sp, 88 # 8-byte Folded Spill + st.d $a6, $sp, 32 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $t8, $sp, 16 # 8-byte Folded Reload - ld.d $a6, $sp, 40 # 8-byte Folded Reload - fld.d $ft5, $sp, 96 # 8-byte Folded Reload - fld.d $fa3, $sp, 224 # 8-byte Folded Reload - fld.d $ft4, $sp, 304 # 8-byte Folded Reload - fld.d $ft3, $sp, 104 # 8-byte Folded Reload - fld.d $ft2, $sp, 112 # 8-byte Folded Reload + ld.d $a6, $sp, 32 # 8-byte Folded Reload + fld.d $ft5, $sp, 88 # 8-byte Folded Reload + fld.d $fa3, $sp, 248 # 8-byte Folded Reload + fld.d $ft4, $sp, 320 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 104 # 8-byte Folded Reload + ld.d $a4, $sp, 656 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 680 # 8-byte Folded Reload - ld.d $a4, $sp, 648 # 8-byte Folded Reload - fld.d $ft12, $sp, 72 # 8-byte Folded Reload - fld.d $ft11, $sp, 48 # 8-byte Folded Reload + fld.d $ft0, $sp, 712 # 8-byte Folded Reload + fld.d $ft12, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 72 # 8-byte Folded Reload fld.d $ft10, $sp, 56 # 8-byte Folded Reload - ld.d $a3, $sp, 656 # 8-byte Folded Reload - ld.d $t3, $sp, 24 # 8-byte Folded Reload - fld.d $fa6, $sp, 64 # 8-byte Folded Reload - fld.d $ft9, $sp, 664 # 8-byte Folded Reload - ld.d $a5, $sp, 672 # 8-byte Folded Reload - fld.d $fa7, $sp, 752 # 8-byte Folded Reload - ld.d $ra, $sp, 32 # 8-byte Folded Reload - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload - ld.d $t1, $sp, 8 # 8-byte Folded Reload - ld.d $t5, $sp, 640 # 8-byte Folded Reload - ld.d $a7, $sp, 696 # 8-byte Folded Reload + ld.d $a3, $sp, 664 # 8-byte Folded Reload + ld.d $ra, $sp, 680 # 8-byte Folded Reload + ld.d $t3, $sp, 688 # 8-byte Folded Reload + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 696 # 8-byte Folded Reload + ld.d $a5, $sp, 704 # 8-byte Folded Reload + fld.d $fs2, $sp, 672 # 8-byte Folded Reload + fld.d $fa7, $sp, 648 # 8-byte Folded Reload + ld.d $t0, $sp, 24 # 8-byte Folded Reload + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload + ld.d $t1, $sp, 16 # 8-byte Folded Reload + ld.d $t5, $sp, 424 # 8-byte Folded Reload + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 - ld.d $t7, $sp, 80 # 8-byte Folded Reload - ld.d $t6, $sp, 88 # 8-byte Folded Reload + ld.d $t8, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 48 # 8-byte Folded Reload + ld.d $t6, $sp, 80 # 8-byte Folded Reload ld.d $t4, $sp, 120 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload - b .LBB16_47 -.LBB16_107: # %call.sqrt1378 + fld.d $fa5, $sp, 224 # 8-byte Folded Reload + b .LBB16_48 +.LBB16_108: # %call.sqrt1378 # in Loop: Header=BB16_6 Depth=1 fmov.d $fa0, $fa1 st.d $t2, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 88 # 8-byte Folded Spill - st.d $t7, $sp, 80 # 8-byte Folded Spill + st.d $t6, $sp, 80 # 8-byte Folded Spill + st.d $t7, $sp, 48 # 8-byte Folded Spill + st.d $t8, $sp, 112 # 8-byte Folded Spill move $s1, $t5 move $s2, $t1 - move $s3, $ra - fst.d $fa6, $sp, 64 # 8-byte Folded Spill - st.d $t3, $sp, 24 # 8-byte Folded Spill + move $s3, $t0 + fst.d $fa6, $sp, 40 # 8-byte Folded Spill fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 48 # 8-byte Folded Spill - fst.d $ft12, $sp, 72 # 8-byte Folded Spill - fst.d $fa3, $sp, 232 # 8-byte Folded Spill + fst.d $ft11, $sp, 72 # 8-byte Folded Spill + fst.d $ft12, $sp, 64 # 8-byte Folded Spill + fst.d $fa3, $sp, 256 # 8-byte Folded Spill fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 112 # 8-byte Folded Spill - fst.d $ft3, $sp, 104 # 8-byte Folded Spill - fst.d $ft4, $sp, 304 # 8-byte Folded Spill - fst.d $ft5, $sp, 96 # 8-byte Folded Spill - st.d $a6, $sp, 40 # 8-byte Folded Spill - st.d $t8, $sp, 16 # 8-byte Folded Spill - fmov.d $fs3, $fa2 + fst.d $ft2, $sp, 104 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 320 # 8-byte Folded Spill + fst.d $ft5, $sp, 88 # 8-byte Folded Spill + st.d $a6, $sp, 32 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fmov.d $fa2, $fs3 - ld.d $t8, $sp, 16 # 8-byte Folded Reload - ld.d $a6, $sp, 40 # 8-byte Folded Reload - fld.d $ft5, $sp, 96 # 8-byte Folded Reload - fld.d $ft4, $sp, 304 # 8-byte Folded Reload - fld.d $ft3, $sp, 104 # 8-byte Folded Reload - fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $fa2, $sp, 336 # 8-byte Folded Reload + ld.d $a6, $sp, 32 # 8-byte Folded Reload + fld.d $ft5, $sp, 88 # 8-byte Folded Reload + fld.d $ft4, $sp, 320 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 104 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 680 # 8-byte Folded Reload - fld.d $fa3, $sp, 232 # 8-byte Folded Reload - fld.d $ft12, $sp, 72 # 8-byte Folded Reload - fld.d $ft11, $sp, 48 # 8-byte Folded Reload + fld.d $ft0, $sp, 712 # 8-byte Folded Reload + fld.d $fa3, $sp, 256 # 8-byte Folded Reload + fld.d $ft12, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 72 # 8-byte Folded Reload fld.d $ft10, $sp, 56 # 8-byte Folded Reload - ld.d $a3, $sp, 656 # 8-byte Folded Reload - ld.d $t3, $sp, 24 # 8-byte Folded Reload - fld.d $fa6, $sp, 64 # 8-byte Folded Reload - fld.d $ft9, $sp, 664 # 8-byte Folded Reload - ld.d $a5, $sp, 672 # 8-byte Folded Reload - fld.d $fs3, $sp, 592 # 8-byte Folded Reload - fld.d $fa7, $sp, 608 # 8-byte Folded Reload - move $ra, $s3 - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload + ld.d $a3, $sp, 664 # 8-byte Folded Reload + ld.d $ra, $sp, 680 # 8-byte Folded Reload + ld.d $t3, $sp, 688 # 8-byte Folded Reload + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 696 # 8-byte Folded Reload + ld.d $a5, $sp, 704 # 8-byte Folded Reload + fld.d $fs2, $sp, 672 # 8-byte Folded Reload + fld.d $fa7, $sp, 648 # 8-byte Folded Reload + move $t0, $s3 + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload move $t1, $s2 move $t5, $s1 - ld.d $a7, $sp, 696 # 8-byte Folded Reload + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 - ld.d $t7, $sp, 80 # 8-byte Folded Reload - ld.d $t6, $sp, 88 # 8-byte Folded Reload + ld.d $t8, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 48 # 8-byte Folded Reload + ld.d $t6, $sp, 80 # 8-byte Folded Reload ld.d $t4, $sp, 120 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload - b .LBB16_54 -.LBB16_108: # %call.sqrt1382 + fld.d $fa5, $sp, 224 # 8-byte Folded Reload + b .LBB16_55 +.LBB16_109: # %call.sqrt1382 # in Loop: Header=BB16_6 Depth=1 fmov.d $fa0, $fa1 st.d $t2, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 120 # 8-byte Folded Spill - move $s8, $t6 + move $s7, $t6 move $s1, $t7 + st.d $t8, $sp, 112 # 8-byte Folded Spill move $s0, $t5 move $s2, $t1 - move $s3, $ra + move $s3, $t0 move $s6, $a5 - fst.d $fa6, $sp, 64 # 8-byte Folded Spill - st.d $t3, $sp, 24 # 8-byte Folded Spill - fst.d $fa3, $sp, 240 # 8-byte Folded Spill + fst.d $fa6, $sp, 40 # 8-byte Folded Spill + fst.d $fa3, $sp, 264 # 8-byte Folded Spill fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 48 # 8-byte Folded Spill - fst.d $ft12, $sp, 72 # 8-byte Folded Spill + fst.d $ft11, $sp, 72 # 8-byte Folded Spill + fst.d $ft12, $sp, 64 # 8-byte Folded Spill fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 112 # 8-byte Folded Spill - fst.d $ft3, $sp, 104 # 8-byte Folded Spill - fst.d $ft4, $sp, 304 # 8-byte Folded Spill - fst.d $ft5, $sp, 96 # 8-byte Folded Spill - st.d $a6, $sp, 40 # 8-byte Folded Spill - fst.d $fa7, $sp, 768 # 8-byte Folded Spill - st.d $t8, $sp, 16 # 8-byte Folded Spill + fst.d $ft2, $sp, 104 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 320 # 8-byte Folded Spill + fst.d $ft5, $sp, 88 # 8-byte Folded Spill + st.d $a6, $sp, 32 # 8-byte Folded Spill + fst.d $fa7, $sp, 808 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $t8, $sp, 16 # 8-byte Folded Reload - fld.d $fa7, $sp, 768 # 8-byte Folded Reload - ld.d $a6, $sp, 40 # 8-byte Folded Reload - fld.d $ft5, $sp, 96 # 8-byte Folded Reload - fld.d $ft4, $sp, 304 # 8-byte Folded Reload - fld.d $ft3, $sp, 104 # 8-byte Folded Reload - fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $fa7, $sp, 808 # 8-byte Folded Reload + ld.d $a6, $sp, 32 # 8-byte Folded Reload + fld.d $ft5, $sp, 88 # 8-byte Folded Reload + fld.d $ft4, $sp, 320 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 104 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 680 # 8-byte Folded Reload - fld.d $ft12, $sp, 72 # 8-byte Folded Reload - fld.d $ft11, $sp, 48 # 8-byte Folded Reload + fld.d $ft0, $sp, 712 # 8-byte Folded Reload + fld.d $ft12, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 72 # 8-byte Folded Reload fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $fa3, $sp, 240 # 8-byte Folded Reload - ld.d $t3, $sp, 24 # 8-byte Folded Reload - fld.d $fa6, $sp, 64 # 8-byte Folded Reload - fld.d $ft9, $sp, 664 # 8-byte Folded Reload + fld.d $fa3, $sp, 264 # 8-byte Folded Reload + ld.d $ra, $sp, 680 # 8-byte Folded Reload + ld.d $t3, $sp, 688 # 8-byte Folded Reload + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 696 # 8-byte Folded Reload move $a5, $s6 - move $ra, $s3 - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload + fld.d $fs2, $sp, 672 # 8-byte Folded Reload + move $t0, $s3 + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload move $t1, $s2 move $t5, $s0 - ld.d $a7, $sp, 696 # 8-byte Folded Reload + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 + ld.d $t8, $sp, 112 # 8-byte Folded Reload move $t7, $s1 - move $t6, $s8 + move $t6, $s7 + ld.d $s7, $sp, 216 # 8-byte Folded Reload ld.d $t4, $sp, 120 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload - b .LBB16_62 -.LBB16_109: # %call.sqrt1386 + fld.d $fa5, $sp, 224 # 8-byte Folded Reload + b .LBB16_63 +.LBB16_110: # %call.sqrt1386 # in Loop: Header=BB16_6 Depth=1 fmov.d $fa0, $fa1 st.d $t2, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 88 # 8-byte Folded Spill - st.d $t7, $sp, 80 # 8-byte Folded Spill + st.d $t6, $sp, 80 # 8-byte Folded Spill + st.d $t7, $sp, 48 # 8-byte Folded Spill + st.d $t8, $sp, 112 # 8-byte Folded Spill move $s6, $t5 - move $s8, $t1 - st.d $ra, $sp, 32 # 8-byte Folded Spill - fst.d $fa6, $sp, 64 # 8-byte Folded Spill + move $s7, $t1 + move $s8, $t0 + fst.d $fa6, $sp, 40 # 8-byte Folded Spill fmov.d $fs4, $fa3 fst.d $ft10, $sp, 56 # 8-byte Folded Spill + fst.d $fs3, $sp, 784 # 8-byte Folded Spill fmov.d $fs3, $ft11 - fst.d $ft12, $sp, 72 # 8-byte Folded Spill + fst.d $ft12, $sp, 64 # 8-byte Folded Spill fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 112 # 8-byte Folded Spill - fst.d $ft3, $sp, 104 # 8-byte Folded Spill - fst.d $ft4, $sp, 304 # 8-byte Folded Spill - fst.d $ft5, $sp, 96 # 8-byte Folded Spill - st.d $a6, $sp, 40 # 8-byte Folded Spill - fmov.d $fs2, $ft15 + fst.d $ft2, $sp, 104 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 320 # 8-byte Folded Spill + fst.d $ft5, $sp, 88 # 8-byte Folded Spill + st.d $a6, $sp, 32 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fmov.d $ft15, $fs2 - ld.d $a6, $sp, 40 # 8-byte Folded Reload - fld.d $ft5, $sp, 96 # 8-byte Folded Reload - fld.d $ft4, $sp, 304 # 8-byte Folded Reload - fld.d $ft3, $sp, 104 # 8-byte Folded Reload - fld.d $ft2, $sp, 112 # 8-byte Folded Reload + ld.d $a6, $sp, 32 # 8-byte Folded Reload + fld.d $ft5, $sp, 88 # 8-byte Folded Reload + fld.d $ft4, $sp, 320 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 104 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 680 # 8-byte Folded Reload - fld.d $ft12, $sp, 72 # 8-byte Folded Reload + fld.d $ft0, $sp, 712 # 8-byte Folded Reload + fld.d $ft12, $sp, 64 # 8-byte Folded Reload fmov.d $ft11, $fs3 - fld.d $fa2, $sp, 720 # 8-byte Folded Reload + fld.d $fs3, $sp, 784 # 8-byte Folded Reload + fld.d $fa7, $sp, 624 # 8-byte Folded Reload fld.d $ft10, $sp, 56 # 8-byte Folded Reload fmov.d $fa3, $fs4 - fld.d $fa6, $sp, 64 # 8-byte Folded Reload - fld.d $ft9, $sp, 664 # 8-byte Folded Reload - ld.d $a5, $sp, 672 # 8-byte Folded Reload - fld.d $fs2, $sp, 600 # 8-byte Folded Reload - fld.d $fa7, $sp, 616 # 8-byte Folded Reload - ld.d $ra, $sp, 32 # 8-byte Folded Reload - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload - move $t1, $s8 + fld.d $fa2, $sp, 688 # 8-byte Folded Reload + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 696 # 8-byte Folded Reload + ld.d $a5, $sp, 704 # 8-byte Folded Reload + fld.d $fs2, $sp, 672 # 8-byte Folded Reload + move $t0, $s8 + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload + move $t1, $s7 move $t5, $s6 - ld.d $a7, $sp, 696 # 8-byte Folded Reload + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 - ld.d $t7, $sp, 80 # 8-byte Folded Reload - ld.d $t6, $sp, 88 # 8-byte Folded Reload + ld.d $t8, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 48 # 8-byte Folded Reload + ld.d $t6, $sp, 80 # 8-byte Folded Reload + ld.d $s7, $sp, 216 # 8-byte Folded Reload ld.d $t4, $sp, 120 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload - b .LBB16_71 -.LBB16_110: # %call.sqrt1390 + fld.d $fa5, $sp, 224 # 8-byte Folded Reload + b .LBB16_72 +.LBB16_111: # %call.sqrt1390 # in Loop: Header=BB16_6 Depth=1 fmov.d $fa0, $fa1 st.d $t2, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 88 # 8-byte Folded Spill - st.d $t7, $sp, 80 # 8-byte Folded Spill - st.d $t5, $sp, 640 # 8-byte Folded Spill - st.d $t1, $sp, 8 # 8-byte Folded Spill - st.d $ra, $sp, 32 # 8-byte Folded Spill - fst.d $fa2, $sp, 248 # 8-byte Folded Spill - fst.d $fa6, $sp, 64 # 8-byte Folded Spill + st.d $t6, $sp, 80 # 8-byte Folded Spill + st.d $t7, $sp, 48 # 8-byte Folded Spill + st.d $t8, $sp, 112 # 8-byte Folded Spill + move $s7, $t5 + st.d $t1, $sp, 16 # 8-byte Folded Spill + st.d $t0, $sp, 24 # 8-byte Folded Spill + fst.d $fa2, $sp, 272 # 8-byte Folded Spill + fst.d $fa6, $sp, 40 # 8-byte Folded Spill fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 48 # 8-byte Folded Spill - fst.d $ft12, $sp, 72 # 8-byte Folded Spill + fst.d $ft11, $sp, 72 # 8-byte Folded Spill + fst.d $ft12, $sp, 64 # 8-byte Folded Spill fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 112 # 8-byte Folded Spill - fst.d $ft3, $sp, 104 # 8-byte Folded Spill - fst.d $ft4, $sp, 304 # 8-byte Folded Spill - fst.d $ft5, $sp, 96 # 8-byte Folded Spill - fmov.d $fs2, $ft15 - fst.d $fa7, $sp, 792 # 8-byte Folded Spill + fst.d $ft2, $sp, 104 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 320 # 8-byte Folded Spill + fst.d $ft5, $sp, 88 # 8-byte Folded Spill + fst.d $fa7, $sp, 824 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fld.d $fa7, $sp, 792 # 8-byte Folded Reload - fmov.d $ft15, $fs2 - fld.d $fa3, $sp, 656 # 8-byte Folded Reload - fld.d $ft5, $sp, 96 # 8-byte Folded Reload - fld.d $ft4, $sp, 304 # 8-byte Folded Reload - fld.d $ft3, $sp, 104 # 8-byte Folded Reload - fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $fa7, $sp, 824 # 8-byte Folded Reload + fld.d $ft5, $sp, 88 # 8-byte Folded Reload + fld.d $ft4, $sp, 320 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 104 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 680 # 8-byte Folded Reload - fld.d $ft12, $sp, 72 # 8-byte Folded Reload - fld.d $ft11, $sp, 48 # 8-byte Folded Reload + fld.d $ft0, $sp, 712 # 8-byte Folded Reload + fld.d $ft12, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 72 # 8-byte Folded Reload fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $fa6, $sp, 64 # 8-byte Folded Reload - fld.d $fa2, $sp, 248 # 8-byte Folded Reload - fld.d $ft9, $sp, 664 # 8-byte Folded Reload - ld.d $a5, $sp, 672 # 8-byte Folded Reload - ld.d $ra, $sp, 32 # 8-byte Folded Reload - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload - ld.d $t1, $sp, 8 # 8-byte Folded Reload - ld.d $t5, $sp, 640 # 8-byte Folded Reload - ld.d $a7, $sp, 696 # 8-byte Folded Reload + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + fld.d $fa2, $sp, 272 # 8-byte Folded Reload + fld.d $ft9, $sp, 696 # 8-byte Folded Reload + ld.d $a5, $sp, 704 # 8-byte Folded Reload + fld.d $fa3, $sp, 680 # 8-byte Folded Reload + ld.d $t0, $sp, 24 # 8-byte Folded Reload + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload + ld.d $t1, $sp, 16 # 8-byte Folded Reload + move $t5, $s7 + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 - ld.d $t7, $sp, 80 # 8-byte Folded Reload - ld.d $t6, $sp, 88 # 8-byte Folded Reload + ld.d $t8, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 48 # 8-byte Folded Reload + ld.d $t6, $sp, 80 # 8-byte Folded Reload ld.d $t4, $sp, 120 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload - b .LBB16_81 -.LBB16_111: # %call.sqrt1394 + ld.d $s7, $sp, 216 # 8-byte Folded Reload + fld.d $fa5, $sp, 224 # 8-byte Folded Reload + b .LBB16_82 +.LBB16_112: # %call.sqrt1394 # in Loop: Header=BB16_6 Depth=1 fmov.d $fa0, $fa1 st.d $t2, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 88 # 8-byte Folded Spill - st.d $t7, $sp, 80 # 8-byte Folded Spill + st.d $t6, $sp, 80 # 8-byte Folded Spill + move $s7, $t7 + st.d $t8, $sp, 112 # 8-byte Folded Spill move $s1, $t5 move $s2, $t1 - move $s3, $ra - fld.d $fa1, $sp, 808 # 8-byte Folded Reload - fst.d $fa1, $sp, 808 # 8-byte Folded Spill - fst.d $fa3, $sp, 256 # 8-byte Folded Spill - fst.d $fa6, $sp, 64 # 8-byte Folded Spill + move $s3, $t0 + fld.d $fa1, $sp, 840 # 8-byte Folded Reload + fst.d $fa1, $sp, 840 # 8-byte Folded Spill + fst.d $fa3, $sp, 280 # 8-byte Folded Spill + fst.d $fa6, $sp, 40 # 8-byte Folded Spill fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 48 # 8-byte Folded Spill - fst.d $ft12, $sp, 72 # 8-byte Folded Spill + fst.d $ft11, $sp, 72 # 8-byte Folded Spill + fst.d $ft12, $sp, 64 # 8-byte Folded Spill fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 112 # 8-byte Folded Spill - fst.d $ft3, $sp, 104 # 8-byte Folded Spill - fst.d $ft4, $sp, 304 # 8-byte Folded Spill - fst.d $ft5, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 776 # 8-byte Folded Spill + fst.d $ft2, $sp, 104 # 8-byte Folded Spill + fst.d $ft3, $sp, 96 # 8-byte Folded Spill + fst.d $ft4, $sp, 320 # 8-byte Folded Spill + fst.d $ft5, $sp, 88 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fld.d $ft15, $sp, 776 # 8-byte Folded Reload - fld.d $ft5, $sp, 96 # 8-byte Folded Reload - fld.d $ft4, $sp, 304 # 8-byte Folded Reload - fld.d $ft3, $sp, 104 # 8-byte Folded Reload - fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft5, $sp, 88 # 8-byte Folded Reload + fld.d $ft4, $sp, 320 # 8-byte Folded Reload + fld.d $ft3, $sp, 96 # 8-byte Folded Reload + fld.d $ft2, $sp, 104 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft12, $sp, 72 # 8-byte Folded Reload - fld.d $ft11, $sp, 48 # 8-byte Folded Reload + fld.d $ft12, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 72 # 8-byte Folded Reload fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $fa7, $sp, 624 # 8-byte Folded Reload - fld.d $fa6, $sp, 64 # 8-byte Folded Reload - fld.d $fa3, $sp, 256 # 8-byte Folded Reload - move $ra, $s3 - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload + fld.d $fa7, $sp, 632 # 8-byte Folded Reload + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + fld.d $fa3, $sp, 280 # 8-byte Folded Reload + move $t0, $s3 + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload move $t1, $s2 move $t5, $s1 - ld.d $a7, $sp, 696 # 8-byte Folded Reload + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 - ld.d $t7, $sp, 80 # 8-byte Folded Reload - ld.d $t6, $sp, 88 # 8-byte Folded Reload - ld.d $s2, $sp, 584 # 8-byte Folded Reload + ld.d $t8, $sp, 112 # 8-byte Folded Reload + move $t7, $s7 + ld.d $s7, $sp, 216 # 8-byte Folded Reload + ld.d $t6, $sp, 80 # 8-byte Folded Reload + ld.d $s2, $sp, 616 # 8-byte Folded Reload ld.d $t4, $sp, 120 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload - b .LBB16_89 -.LBB16_112: # %call.sqrt1398 + fld.d $fa5, $sp, 224 # 8-byte Folded Reload + b .LBB16_90 +.LBB16_113: # %call.sqrt1398 # in Loop: Header=BB16_6 Depth=1 fmov.d $fa0, $fa1 st.d $t2, $sp, 136 # 8-byte Folded Spill move $s2, $t4 move $s6, $t6 - move $s8, $t7 + move $s7, $t7 + move $s8, $t8 move $fp, $t5 move $s1, $t1 - fst.d $fa2, $sp, 264 # 8-byte Folded Spill - fld.d $fa1, $sp, 808 # 8-byte Folded Reload - fst.d $fa1, $sp, 808 # 8-byte Folded Spill - fst.d $fa6, $sp, 64 # 8-byte Folded Spill + fst.d $fa2, $sp, 288 # 8-byte Folded Spill + fld.d $fa1, $sp, 840 # 8-byte Folded Reload + fst.d $fa1, $sp, 840 # 8-byte Folded Spill + fst.d $fa6, $sp, 40 # 8-byte Folded Spill fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 48 # 8-byte Folded Spill + fst.d $ft11, $sp, 72 # 8-byte Folded Spill fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft4, $sp, 304 # 8-byte Folded Spill - fst.d $ft15, $sp, 776 # 8-byte Folded Spill + fst.d $ft4, $sp, 320 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fld.d $ft15, $sp, 776 # 8-byte Folded Reload - fld.d $fa7, $sp, 768 # 8-byte Folded Reload - fld.d $ft4, $sp, 304 # 8-byte Folded Reload + fld.d $fa7, $sp, 808 # 8-byte Folded Reload + fld.d $ft4, $sp, 320 # 8-byte Folded Reload fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft11, $sp, 48 # 8-byte Folded Reload + fld.d $ft11, $sp, 72 # 8-byte Folded Reload fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $fa6, $sp, 64 # 8-byte Folded Reload - fld.d $fa2, $sp, 264 # 8-byte Folded Reload - fld.d $ft6, $sp, 688 # 8-byte Folded Reload - fld.d $ft7, $sp, 824 # 8-byte Folded Reload - fld.d $ft13, $sp, 816 # 8-byte Folded Reload + fld.d $fa6, $sp, 40 # 8-byte Folded Reload + fld.d $fa2, $sp, 288 # 8-byte Folded Reload + fld.d $ft6, $sp, 720 # 8-byte Folded Reload + fld.d $ft7, $sp, 856 # 8-byte Folded Reload + fld.d $ft13, $sp, 848 # 8-byte Folded Reload move $t1, $s1 move $t5, $fp - ld.d $a7, $sp, 696 # 8-byte Folded Reload + ld.d $a7, $sp, 728 # 8-byte Folded Reload vldi $vr16, -912 vldi $vr22, -928 - ld.d $a6, $sp, 488 # 8-byte Folded Reload - move $t7, $s8 - ld.d $s8, $sp, 192 # 8-byte Folded Reload + ld.d $a6, $sp, 512 # 8-byte Folded Reload + move $t8, $s8 + ld.d $s8, $sp, 504 # 8-byte Folded Reload + move $t7, $s7 + ld.d $s7, $sp, 216 # 8-byte Folded Reload move $t6, $s6 - ld.d $s6, $sp, 480 # 8-byte Folded Reload + ld.d $s6, $sp, 208 # 8-byte Folded Reload move $t4, $s2 - ld.d $s2, $sp, 584 # 8-byte Folded Reload + ld.d $s2, $sp, 616 # 8-byte Folded Reload ld.d $t2, $sp, 136 # 8-byte Folded Reload - ld.d $a5, $sp, 496 # 8-byte Folded Reload - ld.d $a4, $sp, 504 # 8-byte Folded Reload - ld.d $a3, $sp, 512 # 8-byte Folded Reload - ld.d $a2, $sp, 520 # 8-byte Folded Reload - ld.d $a1, $sp, 528 # 8-byte Folded Reload - fld.d $fa5, $sp, 200 # 8-byte Folded Reload - b .LBB16_96 -.LBB16_113: # %._crit_edge + ld.d $a5, $sp, 520 # 8-byte Folded Reload + ld.d $a4, $sp, 528 # 8-byte Folded Reload + ld.d $a3, $sp, 536 # 8-byte Folded Reload + ld.d $a2, $sp, 544 # 8-byte Folded Reload + ld.d $a1, $sp, 552 # 8-byte Folded Reload + fld.d $fa5, $sp, 224 # 8-byte Folded Reload + b .LBB16_97 +.LBB16_114: # %._crit_edge ld.d $fp, $sp, 168 # 8-byte Folded Reload ld.d $a1, $fp, 200 ld.d $a0, $sp, 160 # 8-byte Folded Reload @@ -5735,8 +5763,8 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd move $a0, $fp pcaddu18i $ra, %call36(_ZN10MallocPlus14memory_replaceEPvS0_) jirl $ra, $ra, 0 - ld.d $a2, $sp, 840 - ld.d $a1, $sp, 848 + ld.d $a2, $sp, 872 + ld.d $a1, $sp, 880 st.d $a0, $fp, 216 move $a0, $a2 pcaddu18i $ra, %call36(cpu_timer_stop) @@ -5744,67 +5772,58 @@ _ZN5State22calc_finite_differenceEd: # @_ZN5State22calc_finite_differenceEd fld.d $fa1, $fp, 240 fadd.d $fa0, $fa0, $fa1 fst.d $fa0, $fp, 240 - fld.d $fs7, $sp, 856 # 8-byte Folded Reload - fld.d $fs6, $sp, 864 # 8-byte Folded Reload - fld.d $fs5, $sp, 872 # 8-byte Folded Reload - fld.d $fs4, $sp, 880 # 8-byte Folded Reload - fld.d $fs3, $sp, 888 # 8-byte Folded Reload - fld.d $fs2, $sp, 896 # 8-byte Folded Reload - fld.d $fs1, $sp, 904 # 8-byte Folded Reload - fld.d $fs0, $sp, 912 # 8-byte Folded Reload - ld.d $s8, $sp, 920 # 8-byte Folded Reload - ld.d $s7, $sp, 928 # 8-byte Folded Reload - ld.d $s6, $sp, 936 # 8-byte Folded Reload - ld.d $s5, $sp, 944 # 8-byte Folded Reload - ld.d $s4, $sp, 952 # 8-byte Folded Reload - ld.d $s3, $sp, 960 # 8-byte Folded Reload - ld.d $s2, $sp, 968 # 8-byte Folded Reload - ld.d $s1, $sp, 976 # 8-byte Folded Reload - ld.d $s0, $sp, 984 # 8-byte Folded Reload - ld.d $fp, $sp, 992 # 8-byte Folded Reload - ld.d $ra, $sp, 1000 # 8-byte Folded Reload - addi.d $sp, $sp, 1008 + fld.d $fs7, $sp, 888 # 8-byte Folded Reload + fld.d $fs6, $sp, 896 # 8-byte Folded Reload + fld.d $fs5, $sp, 904 # 8-byte Folded Reload + fld.d $fs4, $sp, 912 # 8-byte Folded Reload + fld.d $fs3, $sp, 920 # 8-byte Folded Reload + fld.d $fs2, $sp, 928 # 8-byte Folded Reload + fld.d $fs1, $sp, 936 # 8-byte Folded Reload + fld.d $fs0, $sp, 944 # 8-byte Folded Reload + ld.d $s8, $sp, 952 # 8-byte Folded Reload + ld.d $s7, $sp, 960 # 8-byte Folded Reload + ld.d $s6, $sp, 968 # 8-byte Folded Reload + ld.d $s5, $sp, 976 # 8-byte Folded Reload + ld.d $s4, $sp, 984 # 8-byte Folded Reload + ld.d $s3, $sp, 992 # 8-byte Folded Reload + ld.d $s2, $sp, 1000 # 8-byte Folded Reload + ld.d $s1, $sp, 1008 # 8-byte Folded Reload + ld.d $s0, $sp, 1016 # 8-byte Folded Reload + ld.d $fp, $sp, 1024 # 8-byte Folded Reload + ld.d $ra, $sp, 1032 # 8-byte Folded Reload + addi.d $sp, $sp, 1040 ret .Lfunc_end16: .size _ZN5State22calc_finite_differenceEd, .Lfunc_end16-_ZN5State22calc_finite_differenceEd .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5State32calc_finite_difference_via_facesEd -.LCPI17_0: - .dword 0x401399999999999a # double 4.9000000000000004 -.LCPI17_1: - .dword 0x402399999999999a # double 9.8000000000000007 -.LCPI17_2: - .dword 0x39b4484bfeebc2a0 # double 1.0000000000000001E-30 - .text - .globl _ZN5State32calc_finite_difference_via_facesEd + .globl _ZN5State32calc_finite_difference_via_facesEd # -- Begin function _ZN5State32calc_finite_difference_via_facesEd .p2align 5 .type _ZN5State32calc_finite_difference_via_facesEd,@function _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_difference_via_facesEd .cfi_startproc # %bb.0: - addi.d $sp, $sp, -976 - .cfi_def_cfa_offset 976 - st.d $ra, $sp, 968 # 8-byte Folded Spill - st.d $fp, $sp, 960 # 8-byte Folded Spill - st.d $s0, $sp, 952 # 8-byte Folded Spill - st.d $s1, $sp, 944 # 8-byte Folded Spill - st.d $s2, $sp, 936 # 8-byte Folded Spill - st.d $s3, $sp, 928 # 8-byte Folded Spill - st.d $s4, $sp, 920 # 8-byte Folded Spill - st.d $s5, $sp, 912 # 8-byte Folded Spill - st.d $s6, $sp, 904 # 8-byte Folded Spill - st.d $s7, $sp, 896 # 8-byte Folded Spill - st.d $s8, $sp, 888 # 8-byte Folded Spill - fst.d $fs0, $sp, 880 # 8-byte Folded Spill - fst.d $fs1, $sp, 872 # 8-byte Folded Spill - fst.d $fs2, $sp, 864 # 8-byte Folded Spill - fst.d $fs3, $sp, 856 # 8-byte Folded Spill - fst.d $fs4, $sp, 848 # 8-byte Folded Spill - fst.d $fs5, $sp, 840 # 8-byte Folded Spill - fst.d $fs6, $sp, 832 # 8-byte Folded Spill - fst.d $fs7, $sp, 824 # 8-byte Folded Spill + addi.d $sp, $sp, -992 + .cfi_def_cfa_offset 992 + st.d $ra, $sp, 984 # 8-byte Folded Spill + st.d $fp, $sp, 976 # 8-byte Folded Spill + st.d $s0, $sp, 968 # 8-byte Folded Spill + st.d $s1, $sp, 960 # 8-byte Folded Spill + st.d $s2, $sp, 952 # 8-byte Folded Spill + st.d $s3, $sp, 944 # 8-byte Folded Spill + st.d $s4, $sp, 936 # 8-byte Folded Spill + st.d $s5, $sp, 928 # 8-byte Folded Spill + st.d $s6, $sp, 920 # 8-byte Folded Spill + st.d $s7, $sp, 912 # 8-byte Folded Spill + st.d $s8, $sp, 904 # 8-byte Folded Spill + fst.d $fs0, $sp, 896 # 8-byte Folded Spill + fst.d $fs1, $sp, 888 # 8-byte Folded Spill + fst.d $fs2, $sp, 880 # 8-byte Folded Spill + fst.d $fs3, $sp, 872 # 8-byte Folded Spill + fst.d $fs4, $sp, 864 # 8-byte Folded Spill + fst.d $fs5, $sp, 856 # 8-byte Folded Spill + fst.d $fs6, $sp, 848 # 8-byte Folded Spill + fst.d $fs7, $sp, 840 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -5826,7 +5845,7 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe .cfi_offset 63, -152 fst.d $fa0, $sp, 208 # 8-byte Folded Spill move $s0, $a0 - addi.d $a0, $sp, 808 + addi.d $a0, $sp, 824 pcaddu18i $ra, %call36(cpu_timer_start) jirl $ra, $ra, 0 ld.d $a0, $s0, 192 @@ -5839,24 +5858,24 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe move $a0, $s0 pcaddu18i $ra, %call36(_ZN5State25apply_boundary_conditionsEv) jirl $ra, $ra, 0 - ld.d $s4, $s0, 192 - ld.d $a0, $s4, 1368 + ld.d $s5, $s0, 192 + ld.d $a0, $s5, 1368 + st.d $a0, $sp, 392 # 8-byte Folded Spill + ld.d $a0, $s5, 1376 st.d $a0, $sp, 200 # 8-byte Folded Spill - ld.d $a0, $s4, 1376 + ld.d $a0, $s5, 1384 + st.d $a0, $sp, 384 # 8-byte Folded Spill + ld.d $a0, $s5, 1392 st.d $a0, $sp, 192 # 8-byte Folded Spill - ld.d $a0, $s4, 1384 - st.d $a0, $sp, 376 # 8-byte Folded Spill - ld.d $a0, $s4, 1392 - st.d $a0, $sp, 184 # 8-byte Folded Spill - ld.d $s3, $s4, 1352 - move $a0, $s4 + ld.d $s3, $s5, 1352 + move $a0, $s5 pcaddu18i $ra, %call36(_ZN4Mesh24calc_face_list_wbidirmapEv) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(_ZGVZN5State32calc_finite_difference_via_facesEdE2Hx) ld.b $a0, $a0, %pc_lo12(_ZGVZN5State32calc_finite_difference_via_facesEdE2Hx) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN5State32calc_finite_difference_via_facesEdE2Hx) - addi.d $s5, $a1, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE2Hx) + addi.d $s2, $a1, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE2Hx) beqz $a0, .LBB17_182 .LBB17_3: pcalau12i $a0, %pc_hi20(_ZGVZN5State32calc_finite_difference_via_facesEdE2Ux) @@ -5870,12 +5889,12 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe ld.b $a0, $a0, %pc_lo12(_ZGVZN5State32calc_finite_difference_via_facesEdE2Vx) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN5State32calc_finite_difference_via_facesEdE2Vx) - addi.d $s1, $a1, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE2Vx) + addi.d $s4, $a1, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE2Vx) beqz $a0, .LBB17_186 .LBB17_5: ld.d $t1, $s0, 192 - ld.d $a1, $s5, 8 - ld.d $a2, $s5, 0 + ld.d $a1, $s2, 8 + ld.d $a2, $s2, 0 ld.w $a0, $t1, 1560 sub.d $a3, $a1, $a2 srai.d $a3, $a3, 3 @@ -5895,7 +5914,7 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe alsl.d $a2, $a0, $a2, 3 beq $a1, $a2, .LBB17_10 # %bb.9: # %_ZSt8_DestroyIPddEvT_S1_RSaIT0_E.exit.i.i - st.d $a2, $s5, 8 + st.d $a2, $s2, 8 .LBB17_10: # %_ZNSt6vectorIdSaIdEE6resizeEm.exit ld.d $a1, $fp, 8 ld.d $a2, $fp, 0 @@ -5919,8 +5938,8 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe # %bb.14: # %_ZSt8_DestroyIPddEvT_S1_RSaIT0_E.exit.i.i1064 st.d $a2, $fp, 8 .LBB17_15: # %_ZNSt6vectorIdSaIdEE6resizeEm.exit1065 - ld.d $a1, $s1, 8 - ld.d $a2, $s1, 0 + ld.d $a1, $s4, 8 + ld.d $a2, $s4, 0 sub.d $a3, $a1, $a2 srai.d $a3, $a3, 3 bgeu $a3, $a0, .LBB17_17 @@ -5937,29 +5956,32 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe bgeu $a0, $a3, .LBB17_20 # %bb.18: alsl.d $a2, $a0, $a2, 3 - fld.d $fa2, $sp, 208 # 8-byte Folded Reload + fld.d $fa1, $sp, 208 # 8-byte Folded Reload beq $a1, $a2, .LBB17_21 # %bb.19: # %_ZSt8_DestroyIPddEvT_S1_RSaIT0_E.exit.i.i1067 - st.d $a2, $s1, 8 + st.d $a2, $s4, 8 b .LBB17_21 .LBB17_20: - fld.d $fa2, $sp, 208 # 8-byte Folded Reload + fld.d $fa1, $sp, 208 # 8-byte Folded Reload .LBB17_21: # %_ZNSt6vectorIdSaIdEE6resizeEm.exit1068 - pcalau12i $t7, %pc_hi20(.LCPI17_0) + lu12i.w $s7, -419431 blez $a0, .LBB17_27 # %bb.22: # %.lr.ph ld.d $a1, $t1, 1640 ld.d $a2, $t1, 1664 ld.d $a3, $s0, 200 + vldi $vr0, -928 + fmul.d $fa1, $fa1, $fa0 ld.d $a4, $s0, 208 - ld.d $a5, $s5, 0 + ld.d $a5, $s2, 0 ld.d $a6, $fp, 0 ld.d $a7, $s0, 216 - ld.d $t0, $s1, 0 + ld.d $t0, $s4, 0 ld.d $t1, $t1, 1048 - fld.d $fa0, $t7, %pc_lo12(.LCPI17_0) - vldi $vr1, -928 - fmul.d $fa2, $fa2, $fa1 + ori $t2, $s7, 2458 + lu32i.d $t2, 235929 + lu52i.d $t2, $t2, 1025 + movgr2fr.d $fa2, $t2 vldi $vr3, -912 b .LBB17_25 .p2align 4, , 16 @@ -5968,9 +5990,9 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fldx.d $fa6, $a3, $t2 fldx.d $fa7, $a4, $t3 fldx.d $ft0, $a4, $t2 - fdiv.d $fa5, $fa2, $fa5 + fdiv.d $fa5, $fa1, $fa5 fadd.d $fa4, $fa4, $fa6 - fmul.d $fa4, $fa4, $fa1 + fmul.d $fa4, $fa4, $fa0 fsub.d $fa6, $fa7, $ft0 fmul.d $fa6, $fa5, $fa6 fsub.d $fa4, $fa4, $fa6 @@ -5979,17 +6001,17 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fldx.d $fa6, $a4, $t2 fldx.d $fa7, $a3, $t3 fadd.d $ft0, $fa4, $fa6 - fmul.d $ft0, $ft0, $fa1 + fmul.d $ft0, $ft0, $fa0 fmul.d $fa4, $fa4, $fa4 fdiv.d $fa4, $fa4, $fa7 fmul.d $fa7, $fa7, $fa7 fldx.d $ft1, $a3, $t2 - fmul.d $fa7, $fa7, $fa0 + fmul.d $fa7, $fa7, $fa2 fadd.d $fa4, $fa4, $fa7 fmul.d $fa6, $fa6, $fa6 fdiv.d $fa6, $fa6, $ft1 fmul.d $fa7, $ft1, $ft1 - fmul.d $fa7, $fa7, $fa0 + fmul.d $fa7, $fa7, $fa2 fadd.d $fa6, $fa6, $fa7 fsub.d $fa4, $fa4, $fa6 fmul.d $fa4, $fa5, $fa4 @@ -6005,7 +6027,7 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fldx.d $fa7, $a3, $t2 fadd.d $fa4, $fa4, $ft0 fmul.d $ft0, $ft0, $ft1 - fmul.d $fa4, $fa4, $fa1 + fmul.d $fa4, $fa4, $fa0 fdiv.d $fa7, $ft0, $fa7 fsub.d $fa6, $fa6, $fa7 fmul.d $fa5, $fa5, $fa6 @@ -6043,12 +6065,12 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $ft0, $fa5, $fa5 fmul.d $ft1, $fa7, $fa7 fdiv.d $ft2, $ft1, $ft0 - fmin.d $ft2, $ft2, $fa1 + fmin.d $ft2, $ft2, $fa0 fmul.d $ft2, $ft0, $ft2 fdiv.d $ft0, $ft0, $ft1 fldx.d $ft3, $a3, $t3 fldx.d $ft4, $a3, $t2 - fmin.d $ft0, $ft0, $fa1 + fmin.d $ft0, $ft0, $fa0 fmul.d $ft0, $ft1, $ft0 fmul.d $ft1, $fa5, $ft3 fmul.d $ft3, $fa7, $ft4 @@ -6060,7 +6082,7 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $ft3, $fa6, $ft3 fmul.d $ft4, $fa4, $ft4 fsub.d $ft3, $ft3, $ft4 - fmul.d $ft3, $fa2, $ft3 + fmul.d $ft3, $fa1, $ft3 fadd.d $ft0, $ft0, $ft2 fdiv.d $ft2, $ft3, $ft0 fsub.d $ft1, $ft1, $ft2 @@ -6074,7 +6096,7 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $ft1, $ft1, $ft1 fdiv.d $ft1, $ft1, $ft4 fmul.d $ft4, $ft4, $ft4 - fmul.d $ft4, $ft4, $fa0 + fmul.d $ft4, $ft4, $fa2 fadd.d $ft1, $ft1, $ft4 fldx.d $ft4, $a3, $t2 fdiv.d $ft3, $ft3, $ft5 @@ -6082,11 +6104,11 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $ft2, $ft2, $ft2 fdiv.d $ft2, $ft2, $ft4 fmul.d $ft4, $ft4, $ft4 - fmul.d $ft4, $ft4, $fa0 + fmul.d $ft4, $ft4, $fa2 fadd.d $ft2, $ft2, $ft4 fmul.d $ft2, $fa4, $ft2 fsub.d $ft1, $ft1, $ft2 - fmul.d $ft1, $fa2, $ft1 + fmul.d $ft1, $fa1, $ft1 fdiv.d $ft1, $ft1, $ft0 fsub.d $ft1, $ft3, $ft1 fst.d $ft1, $a6, 0 @@ -6107,21 +6129,21 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fdiv.d $fa7, $fa7, $ft4 fmul.d $fa4, $fa4, $fa7 fsub.d $fa4, $fa6, $fa4 - fmul.d $fa4, $fa2, $fa4 + fmul.d $fa4, $fa1, $fa4 fdiv.d $fa4, $fa4, $ft0 fsub.d $fa4, $fa5, $fa4 b .LBB17_24 .LBB17_27: # %._crit_edge - st.d $fp, $sp, 792 # 8-byte Folded Spill + st.d $s2, $sp, 800 # 8-byte Folded Spill + st.d $fp, $sp, 808 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(_ZGVZN5State32calc_finite_difference_via_facesEdE2Hy) ld.b $a0, $a0, %pc_lo12(_ZGVZN5State32calc_finite_difference_via_facesEdE2Hy) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN5State32calc_finite_difference_via_facesEdE2Hy) - addi.d $s8, $a1, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE2Hy) - st.d $t7, $sp, 296 # 8-byte Folded Spill + addi.d $fp, $a1, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE2Hy) beqz $a0, .LBB17_188 .LBB17_28: - st.d $s1, $sp, 784 # 8-byte Folded Spill + st.d $s4, $sp, 792 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(_ZGVZN5State32calc_finite_difference_via_facesEdE2Uy) ld.b $a0, $a0, %pc_lo12(_ZGVZN5State32calc_finite_difference_via_facesEdE2Uy) dbar 20 @@ -6133,12 +6155,12 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe ld.b $a0, $a0, %pc_lo12(_ZGVZN5State32calc_finite_difference_via_facesEdE2Vy) dbar 20 pcalau12i $a1, %pc_hi20(_ZZN5State32calc_finite_difference_via_facesEdE2Vy) - addi.d $s7, $a1, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE2Vy) + addi.d $s8, $a1, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE2Vy) beqz $a0, .LBB17_192 .LBB17_30: ld.d $a0, $s0, 192 - ld.d $a2, $s8, 8 - ld.d $a3, $s8, 0 + ld.d $a2, $fp, 8 + ld.d $a3, $fp, 0 ld.w $a1, $a0, 1564 sub.d $a4, $a2, $a3 srai.d $a4, $a4, 3 @@ -6158,7 +6180,7 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe alsl.d $a3, $a1, $a3, 3 beq $a2, $a3, .LBB17_35 # %bb.34: # %_ZSt8_DestroyIPddEvT_S1_RSaIT0_E.exit.i.i1073 - st.d $a3, $s8, 8 + st.d $a3, $fp, 8 .LBB17_35: # %_ZNSt6vectorIdSaIdEE6resizeEm.exit1074 ld.d $a2, $s6, 8 ld.d $a3, $s6, 0 @@ -6182,8 +6204,8 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe # %bb.39: # %_ZSt8_DestroyIPddEvT_S1_RSaIT0_E.exit.i.i1076 st.d $a3, $s6, 8 .LBB17_40: # %_ZNSt6vectorIdSaIdEE6resizeEm.exit1077 - ld.d $a2, $s7, 8 - ld.d $a3, $s7, 0 + ld.d $a2, $s8, 8 + ld.d $a3, $s8, 0 sub.d $a4, $a2, $a3 srai.d $a4, $a4, 3 bgeu $a4, $a1, .LBB17_42 @@ -6200,30 +6222,31 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe bgeu $a1, $a4, .LBB17_45 # %bb.43: alsl.d $a3, $a1, $a3, 3 - fld.d $fa2, $sp, 208 # 8-byte Folded Reload - ld.d $t3, $sp, 296 # 8-byte Folded Reload + fld.d $fa1, $sp, 208 # 8-byte Folded Reload beq $a2, $a3, .LBB17_46 # %bb.44: # %_ZSt8_DestroyIPddEvT_S1_RSaIT0_E.exit.i.i1079 - st.d $a3, $s7, 8 + st.d $a3, $s8, 8 b .LBB17_46 .LBB17_45: - fld.d $fa2, $sp, 208 # 8-byte Folded Reload - ld.d $t3, $sp, 296 # 8-byte Folded Reload + fld.d $fa1, $sp, 208 # 8-byte Folded Reload .LBB17_46: # %_ZNSt6vectorIdSaIdEE6resizeEm.exit1080 blez $a1, .LBB17_52 # %bb.47: # %.lr.ph1185 ld.d $a2, $a0, 2000 ld.d $a3, $a0, 2024 ld.d $a4, $s0, 200 + vldi $vr0, -928 + fmul.d $fa1, $fa1, $fa0 ld.d $a5, $s0, 216 - ld.d $a6, $s8, 0 + ld.d $a6, $fp, 0 ld.d $a7, $s0, 208 ld.d $t0, $s6, 0 - ld.d $t1, $s7, 0 + ld.d $t1, $s8, 0 ld.d $t2, $a0, 1072 - fld.d $fa0, $t3, %pc_lo12(.LCPI17_0) - vldi $vr1, -928 - fmul.d $fa2, $fa2, $fa1 + ori $t3, $s7, 2458 + lu32i.d $t3, 235929 + lu52i.d $t3, $t3, 1025 + movgr2fr.d $fa2, $t3 vldi $vr3, -912 b .LBB17_50 .p2align 4, , 16 @@ -6232,9 +6255,9 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fldx.d $fa6, $a4, $t3 fldx.d $fa7, $a5, $t4 fldx.d $ft0, $a5, $t3 - fdiv.d $fa5, $fa2, $fa5 + fdiv.d $fa5, $fa1, $fa5 fadd.d $fa4, $fa4, $fa6 - fmul.d $fa4, $fa4, $fa1 + fmul.d $fa4, $fa4, $fa0 fsub.d $fa6, $fa7, $ft0 fmul.d $fa6, $fa5, $fa6 fsub.d $fa4, $fa4, $fa6 @@ -6249,7 +6272,7 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fldx.d $fa7, $a4, $t3 fadd.d $fa4, $fa4, $ft0 fmul.d $ft0, $ft0, $ft1 - fmul.d $fa4, $fa4, $fa1 + fmul.d $fa4, $fa4, $fa0 fdiv.d $fa7, $ft0, $fa7 fsub.d $fa6, $fa6, $fa7 fmul.d $fa6, $fa5, $fa6 @@ -6261,15 +6284,15 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $ft0, $fa4, $fa4 fdiv.d $ft0, $ft0, $fa6 fmul.d $fa6, $fa6, $fa6 - fmul.d $fa6, $fa6, $fa0 + fmul.d $fa6, $fa6, $fa2 fadd.d $fa6, $ft0, $fa6 fldx.d $ft0, $a4, $t3 fadd.d $fa4, $fa4, $fa7 - fmul.d $fa4, $fa4, $fa1 + fmul.d $fa4, $fa4, $fa0 fmul.d $fa7, $fa7, $fa7 fdiv.d $fa7, $fa7, $ft0 fmul.d $ft0, $ft0, $ft0 - fmul.d $ft0, $ft0, $fa0 + fmul.d $ft0, $ft0, $fa2 fadd.d $fa7, $fa7, $ft0 fsub.d $fa6, $fa6, $fa7 fmul.d $fa5, $fa5, $fa6 @@ -6307,12 +6330,12 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $ft0, $fa5, $fa5 fmul.d $ft1, $fa7, $fa7 fdiv.d $ft2, $ft1, $ft0 - fmin.d $ft2, $ft2, $fa1 + fmin.d $ft2, $ft2, $fa0 fmul.d $ft2, $ft0, $ft2 fdiv.d $ft0, $ft0, $ft1 fldx.d $ft3, $a4, $t4 fldx.d $ft4, $a4, $t3 - fmin.d $ft0, $ft0, $fa1 + fmin.d $ft0, $ft0, $fa0 fmul.d $ft0, $ft1, $ft0 fmul.d $ft1, $fa5, $ft3 fmul.d $ft3, $fa7, $ft4 @@ -6324,7 +6347,7 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $ft3, $fa6, $ft3 fmul.d $ft4, $fa4, $ft4 fsub.d $ft3, $ft3, $ft4 - fmul.d $ft3, $fa2, $ft3 + fmul.d $ft3, $fa1, $ft3 fadd.d $ft0, $ft0, $ft2 fdiv.d $ft2, $ft3, $ft0 fsub.d $ft1, $ft1, $ft2 @@ -6346,7 +6369,7 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fdiv.d $ft2, $ft2, $ft6 fmul.d $ft2, $fa4, $ft2 fsub.d $ft1, $ft1, $ft2 - fmul.d $ft1, $fa2, $ft1 + fmul.d $ft1, $fa1, $ft1 fdiv.d $ft1, $ft1, $ft0 fsub.d $ft1, $ft3, $ft1 fst.d $ft1, $t0, 0 @@ -6360,22 +6383,23 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $fa7, $ft1, $ft1 fdiv.d $fa7, $fa7, $ft3 fmul.d $ft1, $ft3, $ft3 - fmul.d $ft1, $ft1, $fa0 + fmul.d $ft1, $ft1, $fa2 fldx.d $ft3, $a4, $t3 fadd.d $fa7, $fa7, $ft1 fmul.d $fa6, $fa6, $fa7 fmul.d $fa7, $ft2, $ft2 fdiv.d $fa7, $fa7, $ft3 fmul.d $ft1, $ft3, $ft3 - fmul.d $ft1, $ft1, $fa0 + fmul.d $ft1, $ft1, $fa2 fadd.d $fa7, $fa7, $ft1 fmul.d $fa4, $fa4, $fa7 fsub.d $fa4, $fa6, $fa4 - fmul.d $fa4, $fa2, $fa4 + fmul.d $fa4, $fa1, $fa4 fdiv.d $fa4, $fa4, $ft0 fsub.d $fa4, $fa5, $fa4 b .LBB17_49 .LBB17_52: # %._crit_edge1186 + move $s1, $fp ld.d $a1, $a0, 1176 pcalau12i $a0, %pc_hi20(.L.str.3) addi.d $a3, $a0, %pc_lo12(.L.str.3) @@ -6397,8 +6421,8 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe jirl $ra, $ra, 0 ld.d $a1, $s0, 192 ld.d $a1, $a1, 1176 - pcalau12i $s1, %pc_hi20(_ZZN5State32calc_finite_difference_via_facesEdE5U_new) - st.d $a0, $s1, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE5U_new) + pcalau12i $s2, %pc_hi20(_ZZN5State32calc_finite_difference_via_facesEdE5U_new) + st.d $a0, $s2, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE5U_new) pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $a3, $a0, %pc_lo12(.L.str.5) ori $a2, $zero, 8 @@ -6407,97 +6431,105 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe pcaddu18i $ra, %call36(_ZN10MallocPlus13memory_mallocEmmPKci) jirl $ra, $ra, 0 ld.d $a3, $s0, 192 - pcalau12i $s2, %pc_hi20(_ZZN5State32calc_finite_difference_via_facesEdE5V_new) - st.d $a0, $s2, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE5V_new) - addi.d $a1, $sp, 804 - addi.d $a2, $sp, 800 + pcalau12i $s4, %pc_hi20(_ZZN5State32calc_finite_difference_via_facesEdE5V_new) + st.d $a0, $s4, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE5V_new) + addi.d $a1, $sp, 820 + addi.d $a2, $sp, 816 move $a0, $a3 pcaddu18i $ra, %call36(_ZN4Mesh10get_boundsERiS0_) jirl $ra, $ra, 0 - ld.w $a2, $sp, 804 - ld.w $a0, $sp, 800 + ld.w $t8, $sp, 820 + ld.w $a0, $sp, 816 ld.d $a1, $s0, 200 - st.d $a2, $sp, 616 # 8-byte Folded Spill - bge $a2, $a0, .LBB17_179 + bge $t8, $a0, .LBB17_179 # %bb.53: # %.lr.ph1189 - ld.d $t2, $s0, 208 + ld.d $ra, $s0, 208 ld.d $a0, $s0, 192 - st.d $s0, $sp, 176 # 8-byte Folded Spill + st.d $s0, $sp, 184 # 8-byte Folded Spill ld.d $t3, $s0, 216 - ld.d $a2, $s4, 1048 - st.d $a2, $sp, 248 # 8-byte Folded Spill - ld.d $a2, $s4, 1072 - st.d $a2, $sp, 240 # 8-byte Folded Spill + ld.d $a2, $s5, 1048 + st.d $a2, $sp, 272 # 8-byte Folded Spill + ld.d $a2, $s5, 1072 + st.d $a2, $sp, 264 # 8-byte Folded Spill ld.d $a3, $a0, 1688 - ld.d $a2, $s5, 0 - st.d $a2, $sp, 232 # 8-byte Folded Spill - ld.d $a2, $sp, 792 # 8-byte Folded Reload + ld.d $a2, $sp, 800 # 8-byte Folded Reload + ld.d $a2, $a2, 0 + st.d $a2, $sp, 256 # 8-byte Folded Spill + ld.d $a2, $sp, 808 # 8-byte Folded Reload ld.d $a2, $a2, 0 - st.d $a2, $sp, 680 # 8-byte Folded Spill - ld.d $a2, $sp, 784 # 8-byte Folded Reload + st.d $a2, $sp, 696 # 8-byte Folded Spill + ld.d $a2, $sp, 792 # 8-byte Folded Reload ld.d $a2, $a2, 0 - st.d $a2, $sp, 672 # 8-byte Folded Spill + st.d $a2, $sp, 688 # 8-byte Folded Spill ld.d $a2, $a0, 1712 - st.d $a2, $sp, 776 # 8-byte Folded Spill + st.d $a2, $sp, 792 # 8-byte Folded Spill ld.d $a2, $a0, 1736 - st.d $a2, $sp, 768 # 8-byte Folded Spill + st.d $a2, $sp, 784 # 8-byte Folded Spill ld.d $a2, $a0, 1760 - st.d $a2, $sp, 760 # 8-byte Folded Spill - ldptr.d $s5, $a0, 2048 - ld.d $a2, $s8, 0 - st.d $a2, $sp, 272 # 8-byte Folded Spill + st.d $a2, $sp, 776 # 8-byte Folded Spill + ldptr.d $a5, $a0, 2048 + ld.d $a2, $s1, 0 + st.d $a2, $sp, 296 # 8-byte Folded Spill ld.d $a2, $s6, 0 - st.d $a2, $sp, 264 # 8-byte Folded Spill - ld.d $a2, $s7, 0 - st.d $a2, $sp, 256 # 8-byte Folded Spill - ldptr.d $t0, $a0, 2072 - ldptr.d $t1, $a0, 2096 - ldptr.d $a0, $a0, 2120 - ld.d $a2, $fp, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE5H_new) - st.d $a2, $sp, 304 # 8-byte Folded Spill - st.d $s1, $sp, 168 # 8-byte Folded Spill - ld.d $a4, $s1, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE5U_new) - st.d $a4, $sp, 224 # 8-byte Folded Spill - move $s1, $a3 - st.d $s2, $sp, 160 # 8-byte Folded Spill - ld.d $a3, $s2, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE5V_new) - st.d $a3, $sp, 216 # 8-byte Folded Spill - move $s2, $a0 - ld.d $a0, $sp, 616 # 8-byte Folded Reload - slli.d $fp, $a0, 2 - slli.d $s6, $a0, 3 + st.d $a2, $sp, 288 # 8-byte Folded Spill + ld.d $a2, $s8, 0 + st.d $a2, $sp, 280 # 8-byte Folded Spill + ldptr.d $t1, $a0, 2072 + ldptr.d $t2, $a0, 2096 + ldptr.d $s5, $a0, 2120 + ld.d $a0, $fp, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE5H_new) + st.d $a0, $sp, 312 # 8-byte Folded Spill + st.d $s2, $sp, 176 # 8-byte Folded Spill + ld.d $a0, $s2, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE5U_new) + st.d $a0, $sp, 248 # 8-byte Folded Spill + st.d $s4, $sp, 168 # 8-byte Folded Spill + ld.d $a0, $s4, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE5V_new) + st.d $a0, $sp, 240 # 8-byte Folded Spill + slli.d $fp, $t8, 2 + slli.d $s6, $t8, 3 movgr2fr.d $ft4, $zero vldi $vr13, -928 + ori $a0, $s7, 2458 + move $s1, $a3 + lu32i.d $a0, 235929 + st.d $a0, $sp, 232 # 8-byte Folded Spill + lu52i.d $a0, $a0, 1026 + st.d $a0, $sp, 224 # 8-byte Folded Spill vldi $vr14, -912 - ld.d $s4, $sp, 376 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - st.d $t7, $sp, 752 # 8-byte Folded Spill - ld.d $t5, $sp, 192 # 8-byte Folded Reload - move $a6, $t5 - ld.d $t4, $sp, 200 # 8-byte Folded Reload - move $a7, $t4 - move $s7, $s3 + lu12i.w $a0, -4420 + ori $a0, $a0, 672 + lu32i.d $a0, 280651 + lu52i.d $a0, $a0, 923 + st.d $a0, $sp, 216 # 8-byte Folded Spill + ld.d $t0, $sp, 384 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + move $s2, $t7 + ld.d $t5, $sp, 200 # 8-byte Folded Reload + move $s4, $t5 + ld.d $a4, $sp, 392 # 8-byte Folded Reload + move $a6, $s3 fld.d $ft3, $sp, 208 # 8-byte Folded Reload - st.d $t2, $sp, 288 # 8-byte Folded Spill - st.d $t3, $sp, 280 # 8-byte Folded Spill + st.d $t3, $sp, 304 # 8-byte Folded Spill b .LBB17_55 .p2align 4, , 16 .LBB17_54: # in Loop: Header=BB17_55 Depth=1 - ld.d $s4, $sp, 488 # 8-byte Folded Reload + ld.d $t0, $sp, 512 # 8-byte Folded Reload + ld.d $s2, $sp, 504 # 8-byte Folded Reload + ld.d $s4, $sp, 496 # 8-byte Folded Reload fdiv.d $fa2, $ft3, $ft11 fsub.d $ft1, $ft8, $ft7 fadd.d $ft1, $ft1, $ft12 fsub.d $ft1, $ft1, $fs1 fmul.d $ft1, $fa2, $ft1 fsub.d $ft1, $fs4, $ft1 - fld.d $ft2, $sp, 448 # 8-byte Folded Reload + fld.d $ft2, $sp, 472 # 8-byte Folded Reload fsub.d $ft1, $ft1, $ft2 fadd.d $ft1, $fs0, $ft1 - fld.d $ft2, $sp, 544 # 8-byte Folded Reload + fld.d $ft2, $sp, 568 # 8-byte Folded Reload fsub.d $ft1, $ft1, $ft2 - fld.d $ft2, $sp, 704 # 8-byte Folded Reload + fld.d $ft2, $sp, 728 # 8-byte Folded Reload fadd.d $ft1, $ft2, $ft1 - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload fstx.d $ft1, $a0, $s6 fsub.d $fa1, $fa4, $fa1 fadd.d $fa1, $fa1, $fa7 @@ -6505,170 +6537,171 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $fa1, $fa2, $fa1 fsub.d $fa1, $ft10, $fa1 fsub.d $fa1, $fa1, $fs3 - fld.d $fa4, $sp, 528 # 8-byte Folded Reload + fld.d $fa4, $sp, 552 # 8-byte Folded Reload fadd.d $fa1, $fa4, $fa1 - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 248 # 8-byte Folded Reload fstx.d $fa1, $a0, $s6 fsub.d $fa0, $fa3, $fa0 fadd.d $fa0, $fa0, $ft0 fsub.d $fa0, $fa0, $fa6 fmul.d $fa0, $fa2, $fa0 fsub.d $fa0, $ft9, $fa0 - fld.d $fa1, $sp, 712 # 8-byte Folded Reload + fld.d $fa1, $sp, 736 # 8-byte Folded Reload fsub.d $fa0, $fa0, $fa1 fadd.d $fa0, $fs2, $fa0 - ld.d $a0, $sp, 216 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload fstx.d $fa0, $a0, $s6 - ld.d $a2, $sp, 616 # 8-byte Folded Reload - addi.d $a2, $a2, 1 - ld.w $a0, $sp, 800 - addi.d $s7, $s7, 4 - addi.d $a7, $a7, 4 + addi.d $t8, $t8, 1 + ld.w $a0, $sp, 816 addi.d $a6, $a6, 4 - ld.d $a3, $sp, 752 # 8-byte Folded Reload - addi.d $a3, $a3, 4 - st.d $a3, $sp, 752 # 8-byte Folded Spill + addi.d $a4, $a4, 4 addi.d $s4, $s4, 4 + addi.d $s2, $s2, 4 + addi.d $t0, $t0, 4 addi.d $s6, $s6, 8 addi.d $s1, $s1, 4 - ld.d $a3, $sp, 776 # 8-byte Folded Reload + ld.d $a3, $sp, 792 # 8-byte Folded Reload addi.d $a3, $a3, 4 - st.d $a3, $sp, 776 # 8-byte Folded Spill - ld.d $a3, $sp, 768 # 8-byte Folded Reload + st.d $a3, $sp, 792 # 8-byte Folded Spill + ld.d $a3, $sp, 784 # 8-byte Folded Reload addi.d $a3, $a3, 4 - st.d $a3, $sp, 768 # 8-byte Folded Spill - ld.d $a3, $sp, 760 # 8-byte Folded Reload + st.d $a3, $sp, 784 # 8-byte Folded Spill + ld.d $a3, $sp, 776 # 8-byte Folded Reload addi.d $a3, $a3, 4 - st.d $a3, $sp, 760 # 8-byte Folded Spill - addi.d $s5, $s5, 4 - addi.d $t0, $t0, 4 + st.d $a3, $sp, 776 # 8-byte Folded Spill + addi.d $a5, $a5, 4 addi.d $t1, $t1, 4 - addi.d $s2, $s2, 4 - st.d $a2, $sp, 616 # 8-byte Folded Spill - bge $a2, $a0, .LBB17_180 + addi.d $t2, $t2, 4 + addi.d $s5, $s5, 4 + bge $t8, $a0, .LBB17_180 .LBB17_55: # =>This Inner Loop Header: Depth=1 - st.d $s5, $sp, 592 # 8-byte Folded Spill - ldx.w $s5, $a7, $fp - ldx.w $s8, $s7, $fp + st.d $s5, $sp, 616 # 8-byte Folded Spill + st.d $t2, $sp, 624 # 8-byte Folded Spill + st.d $a4, $sp, 488 # 8-byte Folded Spill + ldx.w $s5, $a4, $fp + st.d $a6, $sp, 480 # 8-byte Folded Spill + ldx.w $s8, $a6, $fp slli.d $a4, $s5, 2 ldx.w $a0, $s3, $a4 - ldx.w $a2, $t7, $a4 - st.d $a2, $sp, 504 # 8-byte Folded Spill + ldx.w $a6, $t7, $a4 + move $s7, $a5 bge $s8, $a0, .LBB17_57 # %bb.56: # in Loop: Header=BB17_55 Depth=1 - slli.d $a3, $a2, 2 - ldx.w $a5, $t4, $a3 - slli.d $a3, $a2, 3 + slli.d $a3, $a6, 2 + ld.d $a2, $sp, 392 # 8-byte Folded Reload + ldx.w $a2, $a2, $a3 + slli.d $a3, $a6, 3 fldx.d $fa0, $a1, $a3 - fst.d $fa0, $sp, 624 # 8-byte Folded Spill - fldx.d $fa0, $t2, $a3 - fst.d $fa0, $sp, 632 # 8-byte Folded Spill - st.d $a5, $sp, 416 # 8-byte Folded Spill - slli.d $a3, $a5, 3 + fst.d $fa0, $sp, 640 # 8-byte Folded Spill + fldx.d $fa0, $ra, $a3 + fst.d $fa0, $sp, 648 # 8-byte Folded Spill + st.d $a2, $sp, 432 # 8-byte Folded Spill + slli.d $a3, $a2, 3 fldx.d $fa2, $a1, $a3 - fldx.d $fa1, $t2, $a3 + fldx.d $fa1, $ra, $a3 b .LBB17_58 .p2align 4, , 16 .LBB17_57: # in Loop: Header=BB17_55 Depth=1 - st.d $zero, $sp, 416 # 8-byte Folded Spill - fst.d $ft4, $sp, 624 # 8-byte Folded Spill - fst.d $ft4, $sp, 632 # 8-byte Folded Spill + st.d $zero, $sp, 432 # 8-byte Folded Spill + fst.d $ft4, $sp, 640 # 8-byte Folded Spill + fst.d $ft4, $sp, 648 # 8-byte Folded Spill fmov.d $fa2, $ft4 fmov.d $fa1, $ft4 .LBB17_58: # in Loop: Header=BB17_55 Depth=1 - ldx.w $s0, $a6, $fp + ldx.w $s0, $s4, $fp slli.d $a5, $s0, 2 ldx.w $a3, $s3, $a5 ldx.w $a2, $t7, $a5 - st.d $a6, $sp, 480 # 8-byte Folded Spill - st.d $a7, $sp, 472 # 8-byte Folded Spill - st.d $s7, $sp, 464 # 8-byte Folded Spill - st.d $a2, $sp, 520 # 8-byte Folded Spill + st.d $a2, $sp, 536 # 8-byte Folded Spill + st.d $a6, $sp, 440 # 8-byte Folded Spill bge $s8, $a3, .LBB17_60 # %bb.59: # in Loop: Header=BB17_55 Depth=1 slli.d $a6, $a2, 2 ldx.w $a7, $t5, $a6 slli.d $a6, $a2, 3 fldx.d $fs1, $a1, $a6 - fldx.d $fa0, $t2, $a6 - fst.d $fa0, $sp, 664 # 8-byte Folded Spill - st.d $a7, $sp, 512 # 8-byte Folded Spill + fldx.d $fa0, $ra, $a6 + fst.d $fa0, $sp, 656 # 8-byte Folded Spill + st.d $a7, $sp, 528 # 8-byte Folded Spill slli.d $a6, $a7, 3 fldx.d $fa4, $a1, $a6 - fldx.d $fa0, $t2, $a6 + fldx.d $fa0, $ra, $a6 b .LBB17_61 .p2align 4, , 16 .LBB17_60: # in Loop: Header=BB17_55 Depth=1 - st.d $zero, $sp, 512 # 8-byte Folded Spill + st.d $zero, $sp, 528 # 8-byte Folded Spill fmov.d $fs1, $ft4 - fst.d $ft4, $sp, 664 # 8-byte Folded Spill + fst.d $ft4, $sp, 656 # 8-byte Folded Spill fmov.d $fa4, $ft4 fmov.d $fa0, $ft4 .LBB17_61: # in Loop: Header=BB17_55 Depth=1 - fst.d $fa2, $sp, 528 # 8-byte Folded Spill - fst.d $fa1, $sp, 328 # 8-byte Folded Spill - fst.d $fa0, $sp, 336 # 8-byte Folded Spill - ldx.w $s7, $s4, $fp - slli.d $a7, $s7, 2 + fst.d $fa2, $sp, 592 # 8-byte Folded Spill + fst.d $fa1, $sp, 336 # 8-byte Folded Spill + fst.d $fa0, $sp, 344 # 8-byte Folded Spill + ldx.w $a2, $t0, $fp + st.d $a2, $sp, 800 # 8-byte Folded Spill + slli.d $a7, $a2, 2 ldx.w $a6, $s3, $a7 - ldx.w $a2, $t5, $a7 - st.d $t0, $sp, 608 # 8-byte Folded Spill - st.d $t1, $sp, 600 # 8-byte Folded Spill - st.d $s4, $sp, 488 # 8-byte Folded Spill + ldx.w $t4, $t5, $a7 + st.d $t1, $sp, 632 # 8-byte Folded Spill + st.d $t0, $sp, 512 # 8-byte Folded Spill + st.d $s4, $sp, 496 # 8-byte Folded Spill bge $s8, $a6, .LBB17_63 # %bb.62: # in Loop: Header=BB17_55 Depth=1 - slli.d $t0, $a2, 2 - ld.d $t1, $sp, 376 # 8-byte Folded Reload - ldx.w $t6, $t1, $t0 - slli.d $t0, $a2, 3 + slli.d $t0, $t4, 2 + ld.d $a2, $sp, 384 # 8-byte Folded Reload + ldx.w $a2, $a2, $t0 + slli.d $t0, $t4, 3 fldx.d $fa1, $a1, $t0 fldx.d $fa0, $t3, $t0 - fst.d $fa0, $sp, 648 # 8-byte Folded Spill - slli.d $t0, $t6, 3 + fst.d $fa0, $sp, 672 # 8-byte Folded Spill + slli.d $t0, $a2, 3 fldx.d $fa0, $a1, $t0 - fst.d $fa0, $sp, 344 # 8-byte Folded Spill + fst.d $fa0, $sp, 352 # 8-byte Folded Spill fldx.d $fa3, $t3, $t0 b .LBB17_64 .p2align 4, , 16 .LBB17_63: # in Loop: Header=BB17_55 Depth=1 - move $t6, $zero + move $a2, $zero fmov.d $fa1, $ft4 - fst.d $ft4, $sp, 648 # 8-byte Folded Spill - fst.d $ft4, $sp, 344 # 8-byte Folded Spill + fst.d $ft4, $sp, 672 # 8-byte Folded Spill + fst.d $ft4, $sp, 352 # 8-byte Folded Spill fmov.d $fa3, $ft4 .LBB17_64: # in Loop: Header=BB17_55 Depth=1 - ld.d $t0, $sp, 752 # 8-byte Folded Reload - ldx.w $s4, $t0, $fp + st.d $s2, $sp, 504 # 8-byte Folded Spill + ldx.w $s4, $s2, $fp slli.d $t1, $s4, 2 ldx.w $t0, $s3, $t1 - ldx.w $t2, $t5, $t1 - st.d $s2, $sp, 584 # 8-byte Folded Spill - st.d $t2, $sp, 456 # 8-byte Folded Spill + ldx.w $s2, $t5, $t1 + st.d $s2, $sp, 608 # 8-byte Folded Spill bge $s8, $t0, .LBB17_66 # %bb.65: # in Loop: Header=BB17_55 Depth=1 - move $s2, $t2 - slli.d $t2, $t2, 2 - ldx.w $t8, $t7, $t2 + slli.d $t2, $s2, 2 + move $t6, $a2 + move $a2, $t4 + ldx.w $t4, $t7, $t2 slli.d $t2, $s2, 3 fldx.d $fa0, $a1, $t2 - fst.d $fa0, $sp, 640 # 8-byte Folded Spill + fst.d $fa0, $sp, 664 # 8-byte Folded Spill fldx.d $fa0, $t3, $t2 - fst.d $fa0, $sp, 656 # 8-byte Folded Spill - st.d $t8, $sp, 440 # 8-byte Folded Spill - slli.d $t2, $t8, 3 + fst.d $fa0, $sp, 680 # 8-byte Folded Spill + st.d $t4, $sp, 464 # 8-byte Folded Spill + slli.d $t2, $t4, 3 + move $t4, $a2 + move $a2, $t6 fldx.d $fa0, $a1, $t2 - fst.d $fa0, $sp, 352 # 8-byte Folded Spill + fst.d $fa0, $sp, 360 # 8-byte Folded Spill fldx.d $fa2, $t3, $t2 b .LBB17_67 .p2align 4, , 16 .LBB17_66: # in Loop: Header=BB17_55 Depth=1 - st.d $zero, $sp, 440 # 8-byte Folded Spill - fst.d $ft4, $sp, 640 # 8-byte Folded Spill - fst.d $ft4, $sp, 656 # 8-byte Folded Spill - fst.d $ft4, $sp, 352 # 8-byte Folded Spill + st.d $zero, $sp, 464 # 8-byte Folded Spill + fst.d $ft4, $sp, 664 # 8-byte Folded Spill + fst.d $ft4, $sp, 680 # 8-byte Folded Spill + fst.d $ft4, $sp, 360 # 8-byte Folded Spill fmov.d $fa2, $ft4 .LBB17_67: # in Loop: Header=BB17_55 Depth=1 - ld.d $t3, $sp, 232 # 8-byte Folded Reload + ld.d $t3, $sp, 256 # 8-byte Folded Reload fldx.d $fs4, $a1, $s6 ldx.w $t2, $s1, $fp fmov.d $fa5, $fs4 @@ -6678,20 +6711,20 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe # %bb.68: # in Loop: Header=BB17_55 Depth=1 slli.d $t2, $t2, 3 fldx.d $fa5, $t3, $t2 - ld.d $s2, $sp, 680 # 8-byte Folded Reload + ld.d $s2, $sp, 696 # 8-byte Folded Reload fldx.d $ft7, $s2, $t2 - ld.d $s2, $sp, 672 # 8-byte Folded Reload + ld.d $s2, $sp, 688 # 8-byte Folded Reload fldx.d $fa7, $s2, $t2 .LBB17_69: # in Loop: Header=BB17_55 Depth=1 - ld.d $t2, $sp, 776 # 8-byte Folded Reload + ld.d $t2, $sp, 792 # 8-byte Folded Reload ldx.w $t2, $t2, $fp bltz $t2, .LBB17_71 # %bb.70: # in Loop: Header=BB17_55 Depth=1 slli.d $t2, $t2, 3 fldx.d $ft1, $t3, $t2 - ld.d $s2, $sp, 680 # 8-byte Folded Reload + ld.d $s2, $sp, 696 # 8-byte Folded Reload fldx.d $ft12, $s2, $t2 - ld.d $s2, $sp, 672 # 8-byte Folded Reload + ld.d $s2, $sp, 688 # 8-byte Folded Reload fldx.d $ft14, $s2, $t2 b .LBB17_72 .p2align 4, , 16 @@ -6702,7 +6735,7 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmov.d $ft12, $ft4 fmov.d $ft14, $ft4 .LBB17_72: # in Loop: Header=BB17_55 Depth=1 - ld.d $t2, $sp, 768 # 8-byte Folded Reload + ld.d $t2, $sp, 784 # 8-byte Folded Reload ldx.w $t2, $t2, $fp fmov.d $fa6, $fs4 fmov.d $ft8, $ft4 @@ -6711,22 +6744,21 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe # %bb.73: # in Loop: Header=BB17_55 Depth=1 slli.d $t2, $t2, 3 fldx.d $fa6, $t3, $t2 - ld.d $s2, $sp, 680 # 8-byte Folded Reload + ld.d $s2, $sp, 696 # 8-byte Folded Reload fldx.d $ft8, $s2, $t2 - ld.d $s2, $sp, 672 # 8-byte Folded Reload + ld.d $s2, $sp, 688 # 8-byte Folded Reload fldx.d $ft0, $s2, $t2 .LBB17_74: # in Loop: Header=BB17_55 Depth=1 - ld.d $t2, $sp, 760 # 8-byte Folded Reload + ld.d $t2, $sp, 776 # 8-byte Folded Reload ldx.w $t2, $t2, $fp slt $s2, $s8, $a3 - st.d $s1, $sp, 496 # 8-byte Folded Spill bltz $t2, .LBB17_76 # %bb.75: # in Loop: Header=BB17_55 Depth=1 slli.d $t2, $t2, 3 fldx.d $ft2, $t3, $t2 - ld.d $t3, $sp, 680 # 8-byte Folded Reload + ld.d $t3, $sp, 696 # 8-byte Folded Reload fldx.d $ft13, $t3, $t2 - ld.d $t3, $sp, 672 # 8-byte Folded Reload + ld.d $t3, $sp, 688 # 8-byte Folded Reload fldx.d $ft15, $t3, $t2 b .LBB17_77 .p2align 4, , 16 @@ -6736,103 +6768,104 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmov.d $ft13, $ft4 fmov.d $ft15, $ft4 .LBB17_77: # in Loop: Header=BB17_55 Depth=1 - ldx.w $a4, $t4, $a4 + ld.d $t2, $sp, 392 # 8-byte Folded Reload + ldx.w $a4, $t2, $a4 slli.d $t2, $a4, 3 fldx.d $fs6, $a1, $t2 slli.d $a4, $a4, 2 ldx.w $t3, $s3, $a4 - ld.d $s1, $sp, 288 # 8-byte Folded Reload - fldx.d $ft9, $s1, $t2 - move $t2, $s1 - fst.d $fa1, $sp, 576 # 8-byte Folded Spill - fst.d $fa4, $sp, 320 # 8-byte Folded Spill + fldx.d $ft9, $ra, $t2 + fst.d $fa1, $sp, 600 # 8-byte Folded Spill + st.d $s1, $sp, 520 # 8-byte Folded Spill + fst.d $fa4, $sp, 328 # 8-byte Folded Spill bge $a0, $t3, .LBB17_79 # %bb.78: # in Loop: Header=BB17_55 Depth=1 ldx.w $a4, $t7, $a4 slli.d $a4, $a4, 3 fldx.d $fa0, $a1, $a4 - fldx.d $fa1, $t2, $a4 + fldx.d $fa1, $ra, $a4 fadd.d $fa0, $fs6, $fa0 fmul.d $fs6, $fa0, $ft5 fadd.d $fa0, $ft9, $fa1 fmul.d $fa0, $fa0, $ft5 - fst.d $fa0, $sp, 392 # 8-byte Folded Spill + fst.d $fa0, $sp, 408 # 8-byte Folded Spill b .LBB17_80 .p2align 4, , 16 .LBB17_79: # in Loop: Header=BB17_55 Depth=1 - fst.d $ft9, $sp, 392 # 8-byte Folded Spill + fst.d $ft9, $sp, 408 # 8-byte Folded Spill .LBB17_80: # in Loop: Header=BB17_55 Depth=1 - fst.d $fa3, $sp, 360 # 8-byte Folded Spill - fst.d $fa2, $sp, 368 # 8-byte Folded Spill - fldx.d $ft10, $t2, $s6 - ld.d $t3, $sp, 280 # 8-byte Folded Reload + fst.d $fa3, $sp, 368 # 8-byte Folded Spill + fst.d $fa2, $sp, 376 # 8-byte Folded Spill + fldx.d $ft10, $ra, $s6 + ld.d $t3, $sp, 304 # 8-byte Folded Reload fldx.d $ft9, $t3, $s6 slli.d $a4, $s5, 3 fldx.d $fs0, $a1, $a4 - fldx.d $fa0, $t2, $a4 - fst.d $fa0, $sp, 784 # 8-byte Folded Spill - move $t8, $t5 + fldx.d $fa0, $ra, $a4 + fst.d $fa0, $sp, 768 # 8-byte Folded Spill + move $t6, $t5 ldx.w $s1, $t5, $a5 slli.d $a4, $s0, 3 fldx.d $fs5, $a1, $a4 - fldx.d $fa0, $t2, $a4 - fst.d $fa0, $sp, 744 # 8-byte Folded Spill + fldx.d $fa0, $ra, $a4 + fst.d $fa0, $sp, 720 # 8-byte Folded Spill ldx.w $a5, $t7, $t1 slli.d $a4, $s4, 3 fldx.d $fa0, $a1, $a4 - fst.d $fa0, $sp, 704 # 8-byte Folded Spill + fst.d $fa0, $sp, 728 # 8-byte Folded Spill fldx.d $fa0, $t3, $a4 - fst.d $fa0, $sp, 736 # 8-byte Folded Spill - ld.d $a4, $sp, 376 # 8-byte Folded Reload + fst.d $fa0, $sp, 760 # 8-byte Folded Spill + ld.d $a4, $sp, 384 # 8-byte Folded Reload ldx.w $a7, $a4, $a7 - slli.d $a4, $s7, 3 + ld.d $a4, $sp, 800 # 8-byte Folded Reload + slli.d $a4, $a4, 3 fldx.d $fa0, $a1, $a4 - fst.d $fa0, $sp, 568 # 8-byte Folded Spill + fst.d $fa0, $sp, 584 # 8-byte Folded Spill fldx.d $fa0, $t3, $a4 - fst.d $fa0, $sp, 728 # 8-byte Folded Spill + fst.d $fa0, $sp, 752 # 8-byte Folded Spill slli.d $a4, $s1, 3 fldx.d $fs3, $a1, $a4 - fldx.d $fa0, $t2, $a4 - fst.d $fa0, $sp, 696 # 8-byte Folded Spill - st.d $a5, $sp, 424 # 8-byte Folded Spill + fldx.d $fa0, $ra, $a4 + fst.d $fa0, $sp, 712 # 8-byte Folded Spill + st.d $a5, $sp, 448 # 8-byte Folded Spill slli.d $a4, $a5, 3 fldx.d $fa0, $a1, $a4 - fst.d $fa0, $sp, 552 # 8-byte Folded Spill + fst.d $fa0, $sp, 576 # 8-byte Folded Spill fldx.d $fa0, $t3, $a4 - fst.d $fa0, $sp, 720 # 8-byte Folded Spill - st.d $a7, $sp, 408 # 8-byte Folded Spill + fst.d $fa0, $sp, 744 # 8-byte Folded Spill + st.d $a7, $sp, 416 # 8-byte Folded Spill slli.d $a4, $a7, 3 fldx.d $fa0, $a1, $a4 - fst.d $fa0, $sp, 544 # 8-byte Folded Spill + fst.d $fa0, $sp, 568 # 8-byte Folded Spill fldx.d $fa0, $t3, $a4 - fst.d $fa0, $sp, 712 # 8-byte Folded Spill + fst.d $fa0, $sp, 736 # 8-byte Folded Spill slli.d $a4, $s8, 3 - ld.d $a5, $sp, 248 # 8-byte Folded Reload + ld.d $a5, $sp, 272 # 8-byte Folded Reload fldx.d $ft11, $a5, $a4 slli.d $a0, $a0, 3 fldx.d $fs2, $a5, $a0 slli.d $a0, $a3, 3 fldx.d $fa0, $a5, $a0 - fst.d $fa0, $sp, 536 # 8-byte Folded Spill + fst.d $fa0, $sp, 560 # 8-byte Folded Spill slli.d $a0, $t0, 3 - pcalau12i $a3, %pc_hi20(.LCPI17_1) - fld.d $fa0, $a3, %pc_lo12(.LCPI17_1) - ld.d $a3, $sp, 240 # 8-byte Folded Reload - fldx.d $fa1, $a3, $a0 - fst.d $fa1, $sp, 432 # 8-byte Folded Spill + ld.d $a3, $sp, 264 # 8-byte Folded Reload + fldx.d $fa0, $a3, $a0 + fst.d $fa0, $sp, 456 # 8-byte Folded Spill slli.d $a0, $a6, 3 - fldx.d $fa1, $a3, $a0 - fst.d $fa1, $sp, 400 # 8-byte Folded Spill - fst.d $fa0, $sp, 792 # 8-byte Folded Spill + fldx.d $fa0, $a3, $a0 + fst.d $fa0, $sp, 424 # 8-byte Folded Spill + ld.d $a0, $sp, 224 # 8-byte Folded Reload + movgr2fr.d $fa0, $a0 + fst.d $fa0, $sp, 808 # 8-byte Folded Spill fmul.d $fa0, $fa5, $fa0 - fst.d $fa0, $sp, 560 # 8-byte Folded Spill + fst.d $fa0, $sp, 552 # 8-byte Folded Spill fsqrt.d $fa0, $fa0 fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB17_163 .LBB17_81: # %.split # in Loop: Header=BB17_55 Depth=1 alsl.d $t5, $s5, $s3, 2 - fst.d $fs1, $sp, 312 # 8-byte Folded Spill + fst.d $fs1, $sp, 320 # 8-byte Folded Spill fadd.d $fa1, $fs5, $fs1 fmul.d $fa1, $fa1, $ft5 movgr2cf $fcc0, $s2 @@ -6841,20 +6874,20 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $fa4, $fa2, $ft5 fdiv.d $fa2, $ft7, $fa5 fabs.d $fa2, $fa2 - fst.d $fa2, $sp, 384 # 8-byte Folded Spill + fst.d $fa2, $sp, 400 # 8-byte Folded Spill fadd.d $fa0, $fa2, $fa0 fsub.d $fa2, $fs4, $fs0 fsub.d $fa3, $fs0, $fs6 fsub.d $fs2, $fa1, $fs4 fmul.d $fa0, $fa0, $ft5 fmul.d $fa0, $ft3, $fa0 - fst.d $fa4, $sp, 688 # 8-byte Folded Spill + fst.d $fa4, $sp, 704 # 8-byte Folded Spill fdiv.d $fa0, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI17_2) - fld.d $fs6, $a0, %pc_lo12(.LCPI17_2) fsub.d $fa1, $ft6, $fa0 fmul.d $fa0, $fa0, $fa1 fmul.d $fa1, $fa2, $fa2 + ld.d $a0, $sp, 216 # 8-byte Folded Reload + movgr2fr.d $fs6, $a0 fcmp.clt.d $fcc0, $fa1, $fs6 fsel $fa1, $fa1, $fs6, $fcc0 frecip.d $fa1, $fa1 @@ -6873,38 +6906,40 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fsub.d $fa1, $ft6, $fa1 fmul.d $fa0, $fa0, $fa1 fmul.d $fs1, $fa2, $fa0 - ld.d $a5, $sp, 272 # 8-byte Folded Reload - ld.d $a6, $sp, 264 # 8-byte Folded Reload - ld.d $a7, $sp, 256 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $t1, $sp, 600 # 8-byte Folded Reload + move $a5, $s7 + ld.d $a6, $sp, 296 # 8-byte Folded Reload + ld.d $a7, $sp, 288 # 8-byte Folded Reload + ld.d $t0, $sp, 280 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload bge $s8, $a0, .LBB17_84 # %bb.82: # in Loop: Header=BB17_55 Depth=1 - ld.d $a0, $sp, 504 # 8-byte Folded Reload + ld.d $a0, $sp, 440 # 8-byte Folded Reload slli.d $a0, $a0, 2 ldx.w $a3, $s3, $a0 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 432 # 8-byte Folded Reload slli.d $a0, $a0, 2 ldx.w $a4, $s3, $a0 - ld.d $s5, $sp, 592 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload + ld.d $s5, $sp, 616 # 8-byte Folded Reload bge $a3, $a4, .LBB17_85 # %bb.83: # in Loop: Header=BB17_55 Depth=1 ldx.w $a0, $t7, $a0 slli.d $a0, $a0, 3 fldx.d $fa0, $a1, $a0 - fld.d $fa1, $sp, 528 # 8-byte Folded Reload + fld.d $fa1, $sp, 592 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa3, $fa0, $ft5 b .LBB17_86 .p2align 4, , 16 .LBB17_84: # in Loop: Header=BB17_55 Depth=1 - ld.d $s5, $sp, 592 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload + ld.d $s5, $sp, 616 # 8-byte Folded Reload b .LBB17_88 .p2align 4, , 16 .LBB17_85: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa3, $sp, 528 # 8-byte Folded Reload + fld.d $fa3, $sp, 592 # 8-byte Folded Reload .LBB17_86: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa0, $sp, 792 # 8-byte Folded Reload + fld.d $fa0, $sp, 808 # 8-byte Folded Reload fmul.d $fa1, $ft1, $fa0 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 @@ -6914,12 +6949,12 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fdiv.d $fa1, $ft12, $ft1 fabs.d $fa1, $fa1 fadd.d $fa0, $fa1, $fa0 - fld.d $fa2, $sp, 624 # 8-byte Folded Reload + fld.d $fa2, $sp, 640 # 8-byte Folded Reload fsub.d $fa1, $fs4, $fa2 fsub.d $fa2, $fa2, $fa3 fmul.d $fa0, $fa0, $ft5 fmul.d $fa0, $ft3, $fa0 - fld.d $fa3, $sp, 688 # 8-byte Folded Reload + fld.d $fa3, $sp, 704 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa3 fsub.d $fa3, $ft6, $fa0 fmul.d $fa0, $fa0, $fa3 @@ -6945,40 +6980,41 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $fa0, $fa0, $ft5 fmul.d $fs1, $fa0, $ft5 .LBB17_88: # in Loop: Header=BB17_55 Depth=1 - alsl.d $ra, $s0, $s3, 2 - ld.w $a3, $ra, 0 + alsl.d $s7, $s0, $s3, 2 + ld.w $a3, $s7, 0 slli.d $a0, $s1, 2 ldx.w $a4, $s3, $a0 - fst.d $fs1, $sp, 448 # 8-byte Folded Spill + fst.d $fs1, $sp, 472 # 8-byte Folded Spill bge $a3, $a4, .LBB17_90 # %bb.89: # in Loop: Header=BB17_55 Depth=1 ldx.w $a0, $t7, $a0 slli.d $a0, $a0, 3 fldx.d $fa0, $a1, $a0 - fldx.d $fa1, $t2, $a0 + fldx.d $fa1, $ra, $a0 fadd.d $fa0, $fs3, $fa0 fmul.d $fs3, $fa0, $ft5 - fld.d $fa0, $sp, 696 # 8-byte Folded Reload + fld.d $fa0, $sp, 712 # 8-byte Folded Reload fadd.d $fa0, $fa0, $fa1 fmul.d $fa0, $fa0, $ft5 - fst.d $fa0, $sp, 696 # 8-byte Folded Spill + fst.d $fa0, $sp, 712 # 8-byte Folded Spill .LBB17_90: # in Loop: Header=BB17_55 Depth=1 ld.w $s0, $t5, 0 - fld.d $fa0, $sp, 792 # 8-byte Folded Reload + fld.d $fa0, $sp, 808 # 8-byte Folded Reload fmul.d $fa0, $fa6, $fa0 - fst.d $fa0, $sp, 528 # 8-byte Folded Spill + fst.d $fa0, $sp, 544 # 8-byte Folded Spill fsqrt.d $fa0, $fa0 fcmp.cor.d $fcc0, $fa0, $fa0 + st.d $s7, $sp, 592 # 8-byte Folded Spill bceqz $fcc0, .LBB17_164 .LBB17_91: # %.split1335 # in Loop: Header=BB17_55 Depth=1 slt $s0, $s8, $s0 - fld.d $fa1, $sp, 624 # 8-byte Folded Reload + fld.d $fa1, $sp, 640 # 8-byte Folded Reload fadd.d $fa1, $fs0, $fa1 fmul.d $fa1, $fa1, $ft5 movgr2cf $fcc0, $s0 fsel $fa1, $fs0, $fa1, $fcc0 - fld.d $fa2, $sp, 536 # 8-byte Folded Reload + fld.d $fa2, $sp, 560 # 8-byte Folded Reload fadd.d $fa2, $ft11, $fa2 fmul.d $fs1, $fa2, $ft5 fdiv.d $fa2, $ft8, $fa6 @@ -7005,17 +7041,17 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fcmp.clt.d $fcc0, $fa3, $fa1 fsel $fa1, $fa1, $fa3, $fcc0 fcmp.clt.d $fcc0, $fa1, $fs7 - ld.w $a0, $ra, 0 + ld.w $a0, $s7, 0 fsel $fa1, $fa1, $fs7, $fcc0 fsub.d $fa1, $ft6, $fa1 fmul.d $fa0, $fa0, $fa1 fmul.d $fs5, $fa2, $fa0 bge $s8, $a0, .LBB17_97 # %bb.92: # in Loop: Header=BB17_55 Depth=1 - ld.d $a0, $sp, 520 # 8-byte Folded Reload + ld.d $a0, $sp, 536 # 8-byte Folded Reload slli.d $a0, $a0, 2 ldx.w $a3, $s3, $a0 - ld.d $a0, $sp, 512 # 8-byte Folded Reload + ld.d $a0, $sp, 528 # 8-byte Folded Reload slli.d $a0, $a0, 2 ldx.w $a4, $s3, $a0 bge $a3, $a4, .LBB17_94 @@ -7023,15 +7059,15 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe ldx.w $a0, $t7, $a0 slli.d $a0, $a0, 3 fldx.d $fa0, $a1, $a0 - fld.d $fa1, $sp, 320 # 8-byte Folded Reload + fld.d $fa1, $sp, 328 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa3, $fa0, $ft5 b .LBB17_95 .p2align 4, , 16 .LBB17_94: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa3, $sp, 320 # 8-byte Folded Reload + fld.d $fa3, $sp, 328 # 8-byte Folded Reload .LBB17_95: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa0, $sp, 792 # 8-byte Folded Reload + fld.d $fa0, $sp, 808 # 8-byte Folded Reload fmul.d $fa1, $ft2, $fa0 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 @@ -7041,7 +7077,7 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fdiv.d $fa1, $ft13, $ft2 fabs.d $fa1, $fa1 fadd.d $fa0, $fa1, $fa0 - fld.d $fa2, $sp, 312 # 8-byte Folded Reload + fld.d $fa2, $sp, 320 # 8-byte Folded Reload fsub.d $fa1, $fa2, $fs4 fsub.d $fa2, $fa3, $fa2 fmul.d $fa0, $fa0, $ft5 @@ -7071,29 +7107,29 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $fa0, $fa0, $ft5 fmul.d $fs5, $fa0, $ft5 .LBB17_97: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa1, $sp, 560 # 8-byte Folded Reload - fst.d $fs5, $sp, 536 # 8-byte Folded Spill + fld.d $fa1, $sp, 552 # 8-byte Folded Reload + fst.d $fs5, $sp, 560 # 8-byte Folded Spill fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB17_165 .LBB17_98: # %.split1339 # in Loop: Header=BB17_55 Depth=1 - fld.d $fa1, $sp, 664 # 8-byte Folded Reload - fld.d $fa2, $sp, 744 # 8-byte Folded Reload + fld.d $fa1, $sp, 656 # 8-byte Folded Reload + fld.d $fa2, $sp, 720 # 8-byte Folded Reload fadd.d $fa1, $fa2, $fa1 fmul.d $fa1, $fa1, $ft5 movgr2cf $fcc0, $s2 fsel $fa1, $fa2, $fa1, $fcc0 - fld.d $fa2, $sp, 384 # 8-byte Folded Reload + fld.d $fa2, $sp, 400 # 8-byte Folded Reload fadd.d $fa0, $fa2, $fa0 - fld.d $fa3, $sp, 784 # 8-byte Folded Reload + fld.d $fa3, $sp, 768 # 8-byte Folded Reload fsub.d $fa2, $ft10, $fa3 - fld.d $fa4, $sp, 392 # 8-byte Folded Reload + fld.d $fa4, $sp, 408 # 8-byte Folded Reload fsub.d $fa3, $fa3, $fa4 fsub.d $fs2, $fa1, $ft10 fmul.d $fa0, $fa0, $ft5 fmul.d $fa0, $ft3, $fa0 - fld.d $fa1, $sp, 688 # 8-byte Folded Reload + fld.d $fa1, $sp, 704 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa1 fsub.d $fa1, $ft6, $fa0 fmul.d $fa0, $fa0, $fa1 @@ -7115,34 +7151,34 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fsub.d $fa1, $ft6, $fa1 fmul.d $fa0, $fa1, $fa0 fmul.d $fs3, $fa2, $fa0 - fld.d $fs5, $sp, 576 # 8-byte Folded Reload + fld.d $fs5, $sp, 600 # 8-byte Folded Reload bge $s8, $a0, .LBB17_101 # %bb.99: # in Loop: Header=BB17_55 Depth=1 - ld.d $a0, $sp, 504 # 8-byte Folded Reload + ld.d $a0, $sp, 440 # 8-byte Folded Reload slli.d $a0, $a0, 2 ldx.w $a3, $s3, $a0 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 432 # 8-byte Folded Reload slli.d $a0, $a0, 2 ldx.w $a4, $s3, $a0 - ld.d $s2, $sp, 584 # 8-byte Folded Reload + ld.d $s2, $sp, 608 # 8-byte Folded Reload bge $a3, $a4, .LBB17_102 # %bb.100: # in Loop: Header=BB17_55 Depth=1 ldx.w $a0, $t7, $a0 slli.d $a0, $a0, 3 - fldx.d $fa0, $t2, $a0 - fld.d $fa1, $sp, 328 # 8-byte Folded Reload + fldx.d $fa0, $ra, $a0 + fld.d $fa1, $sp, 336 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa3, $fa0, $ft5 b .LBB17_103 .p2align 4, , 16 .LBB17_101: # in Loop: Header=BB17_55 Depth=1 - ld.d $s2, $sp, 584 # 8-byte Folded Reload + ld.d $s2, $sp, 608 # 8-byte Folded Reload b .LBB17_105 .p2align 4, , 16 .LBB17_102: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa3, $sp, 328 # 8-byte Folded Reload + fld.d $fa3, $sp, 336 # 8-byte Folded Reload .LBB17_103: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa0, $sp, 792 # 8-byte Folded Reload + fld.d $fa0, $sp, 808 # 8-byte Folded Reload fmul.d $fa1, $ft1, $fa0 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 @@ -7152,12 +7188,12 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fdiv.d $fa1, $ft12, $ft1 fabs.d $fa1, $fa1 fadd.d $fa0, $fa1, $fa0 - fld.d $fa2, $sp, 632 # 8-byte Folded Reload + fld.d $fa2, $sp, 648 # 8-byte Folded Reload fsub.d $fa1, $ft10, $fa2 fsub.d $fa2, $fa2, $fa3 fmul.d $fa0, $fa0, $ft5 fmul.d $fa0, $ft3, $fa0 - fld.d $fa3, $sp, 688 # 8-byte Folded Reload + fld.d $fa3, $sp, 704 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa3 fsub.d $fa3, $ft6, $fa0 fmul.d $fa0, $fa0, $fa3 @@ -7183,24 +7219,24 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $fa0, $fa0, $ft5 fmul.d $fs3, $fa0, $ft5 .LBB17_105: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa1, $sp, 528 # 8-byte Folded Reload - fst.d $fs3, $sp, 560 # 8-byte Folded Spill + fld.d $fa1, $sp, 544 # 8-byte Folded Reload + fst.d $fs3, $sp, 640 # 8-byte Folded Spill fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB17_166 .LBB17_106: # %.split1343 # in Loop: Header=BB17_55 Depth=1 - fld.d $fa1, $sp, 632 # 8-byte Folded Reload - fld.d $fa2, $sp, 784 # 8-byte Folded Reload + fld.d $fa1, $sp, 648 # 8-byte Folded Reload + fld.d $fa2, $sp, 768 # 8-byte Folded Reload fadd.d $fa1, $fa2, $fa1 fmul.d $fa1, $fa1, $ft5 movgr2cf $fcc0, $s0 fsel $fa1, $fa2, $fa1, $fcc0 fadd.d $fa0, $fs0, $fa0 - fld.d $fa3, $sp, 744 # 8-byte Folded Reload + fld.d $fa3, $sp, 720 # 8-byte Folded Reload fsub.d $fa2, $fa3, $ft10 fsub.d $fs0, $ft10, $fa1 - fld.d $fa1, $sp, 696 # 8-byte Folded Reload + fld.d $fa1, $sp, 712 # 8-byte Folded Reload fsub.d $fa1, $fa1, $fa3 fmul.d $fa0, $fa0, $ft5 fmul.d $fa0, $ft3, $fa0 @@ -7220,34 +7256,34 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fcmp.clt.d $fcc0, $fa3, $fa1 fsel $fa1, $fa1, $fa3, $fcc0 fcmp.clt.d $fcc0, $fa1, $fs7 - ld.w $a0, $ra, 0 + ld.w $a0, $s7, 0 fsel $fa1, $fa1, $fs7, $fcc0 fsub.d $fa1, $ft6, $fa1 fmul.d $fa0, $fa1, $fa0 fmul.d $fs2, $fa2, $fa0 - fld.d $fs3, $sp, 568 # 8-byte Folded Reload + fld.d $fs3, $sp, 584 # 8-byte Folded Reload bge $s8, $a0, .LBB17_112 # %bb.107: # in Loop: Header=BB17_55 Depth=1 - ld.d $a0, $sp, 520 # 8-byte Folded Reload + ld.d $a0, $sp, 536 # 8-byte Folded Reload slli.d $a0, $a0, 2 ldx.w $a3, $s3, $a0 - ld.d $a0, $sp, 512 # 8-byte Folded Reload + ld.d $a0, $sp, 528 # 8-byte Folded Reload slli.d $a0, $a0, 2 ldx.w $a4, $s3, $a0 bge $a3, $a4, .LBB17_109 # %bb.108: # in Loop: Header=BB17_55 Depth=1 ldx.w $a0, $t7, $a0 slli.d $a0, $a0, 3 - fldx.d $fa0, $t2, $a0 - fld.d $fa1, $sp, 336 # 8-byte Folded Reload + fldx.d $fa0, $ra, $a0 + fld.d $fa1, $sp, 344 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa3, $fa0, $ft5 b .LBB17_110 .p2align 4, , 16 .LBB17_109: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa3, $sp, 336 # 8-byte Folded Reload + fld.d $fa3, $sp, 344 # 8-byte Folded Reload .LBB17_110: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa0, $sp, 792 # 8-byte Folded Reload + fld.d $fa0, $sp, 808 # 8-byte Folded Reload fmul.d $fa1, $ft2, $fa0 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 @@ -7257,7 +7293,7 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fdiv.d $fa1, $ft13, $ft2 fabs.d $fa1, $fa1 fadd.d $fa0, $fa1, $fa0 - fld.d $fa2, $sp, 664 # 8-byte Folded Reload + fld.d $fa2, $sp, 656 # 8-byte Folded Reload fsub.d $fa1, $fa2, $ft10 fsub.d $fa2, $fa3, $fa2 fmul.d $fa0, $fa0, $ft5 @@ -7287,27 +7323,28 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $fa0, $fa0, $ft5 fmul.d $fs2, $fa0, $ft5 .LBB17_112: # in Loop: Header=BB17_55 Depth=1 - alsl.d $s7, $s7, $s3, 2 + ld.d $a0, $sp, 800 # 8-byte Folded Reload + alsl.d $s7, $a0, $s3, 2 ld.w $a3, $s7, 0 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 416 # 8-byte Folded Reload slli.d $a0, $a0, 2 ldx.w $a4, $s3, $a0 - fld.d $fs0, $sp, 544 # 8-byte Folded Reload + fld.d $fs0, $sp, 568 # 8-byte Folded Reload bge $a3, $a4, .LBB17_114 # %bb.113: # in Loop: Header=BB17_55 Depth=1 - ldx.w $a0, $t8, $a0 + ldx.w $a0, $t6, $a0 slli.d $a0, $a0, 3 fldx.d $fa0, $a1, $a0 fldx.d $fa1, $t3, $a0 fadd.d $fa0, $fs0, $fa0 fmul.d $fs0, $fa0, $ft5 - fld.d $fa0, $sp, 712 # 8-byte Folded Reload + fld.d $fa0, $sp, 736 # 8-byte Folded Reload fadd.d $fa0, $fa0, $fa1 fmul.d $fa0, $fa0, $ft5 - fst.d $fa0, $sp, 712 # 8-byte Folded Spill + fst.d $fa0, $sp, 736 # 8-byte Folded Spill .LBB17_114: # in Loop: Header=BB17_55 Depth=1 alsl.d $s4, $s4, $s3, 2 - ldx.w $a4, $s5, $fp + ldx.w $a4, $a5, $fp ld.w $a0, $s4, 0 fmov.d $fa4, $fs4 fmov.d $fa0, $fs7 @@ -7315,92 +7352,92 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe bltz $a4, .LBB17_116 # %bb.115: # in Loop: Header=BB17_55 Depth=1 slli.d $a4, $a4, 3 - fldx.d $fa4, $a5, $a4 - fldx.d $fa0, $a6, $a4 - fldx.d $fa3, $a7, $a4 + fldx.d $fa4, $a6, $a4 + fldx.d $fa0, $a7, $a4 + fldx.d $fa3, $t0, $a4 .LBB17_116: # in Loop: Header=BB17_55 Depth=1 - fst.d $fa0, $sp, 416 # 8-byte Folded Spill - ldx.w $a4, $t0, $fp + fst.d $fa0, $sp, 440 # 8-byte Folded Spill + ldx.w $a4, $t1, $fp bltz $a4, .LBB17_118 # %bb.117: # in Loop: Header=BB17_55 Depth=1 slli.d $a3, $a4, 3 - fldx.d $fa0, $a5, $a3 - fst.d $fa0, $sp, 688 # 8-byte Folded Spill fldx.d $fa0, $a6, $a3 - fst.d $fa0, $sp, 392 # 8-byte Folded Spill + fst.d $fa0, $sp, 712 # 8-byte Folded Spill fldx.d $fa0, $a7, $a3 - fst.d $fa0, $sp, 632 # 8-byte Folded Spill + fst.d $fa0, $sp, 416 # 8-byte Folded Spill + fldx.d $fa0, $t0, $a3 + fst.d $fa0, $sp, 656 # 8-byte Folded Spill b .LBB17_119 .p2align 4, , 16 .LBB17_118: # in Loop: Header=BB17_55 Depth=1 slt $a3, $s8, $a3 movgr2cf $fcc0, $a3 fsel $fa0, $fs7, $fs4, $fcc0 - fst.d $fa0, $sp, 688 # 8-byte Folded Spill - fst.d $fs7, $sp, 392 # 8-byte Folded Spill - fst.d $fs7, $sp, 632 # 8-byte Folded Spill + fst.d $fa0, $sp, 712 # 8-byte Folded Spill + fst.d $fs7, $sp, 416 # 8-byte Folded Spill + fst.d $fs7, $sp, 656 # 8-byte Folded Spill .LBB17_119: # in Loop: Header=BB17_55 Depth=1 - ldx.w $a3, $t1, $fp + ldx.w $a3, $t2, $fp fmov.d $fa1, $fs4 fmov.d $fa2, $fs7 - fst.d $fs7, $sp, 784 # 8-byte Folded Spill + fst.d $fs7, $sp, 800 # 8-byte Folded Spill bltz $a3, .LBB17_121 # %bb.120: # in Loop: Header=BB17_55 Depth=1 slli.d $a3, $a3, 3 - fldx.d $fa1, $a5, $a3 - fldx.d $fa2, $a6, $a3 - fldx.d $fa0, $a7, $a3 - fst.d $fa0, $sp, 784 # 8-byte Folded Spill + fldx.d $fa1, $a6, $a3 + fldx.d $fa2, $a7, $a3 + fldx.d $fa0, $t0, $a3 + fst.d $fa0, $sp, 800 # 8-byte Folded Spill .LBB17_121: # in Loop: Header=BB17_55 Depth=1 - ldx.w $a3, $s2, $fp + ldx.w $a3, $s5, $fp slt $s0, $s8, $a0 bltz $a3, .LBB17_123 # %bb.122: # in Loop: Header=BB17_55 Depth=1 slli.d $a0, $a3, 3 - fldx.d $fa0, $a5, $a0 - fldx.d $fs7, $a6, $a0 - fldx.d $fs1, $a7, $a0 - fst.d $fs1, $sp, 624 # 8-byte Folded Spill + fldx.d $fa0, $a6, $a0 + fldx.d $fs7, $a7, $a0 + fldx.d $fs1, $t0, $a0 + fst.d $fs1, $sp, 648 # 8-byte Folded Spill b .LBB17_124 .p2align 4, , 16 .LBB17_123: # in Loop: Header=BB17_55 Depth=1 movgr2cf $fcc0, $s0 fsel $fa0, $fs7, $fs4, $fcc0 - fst.d $fs7, $sp, 624 # 8-byte Folded Spill + fst.d $fs7, $sp, 648 # 8-byte Folded Spill .LBB17_124: # in Loop: Header=BB17_55 Depth=1 - fst.d $fa2, $sp, 408 # 8-byte Folded Spill - fst.d $fa0, $sp, 664 # 8-byte Folded Spill - fst.d $fa1, $sp, 744 # 8-byte Folded Spill - fld.d $fa0, $sp, 792 # 8-byte Folded Reload + fst.d $fa2, $sp, 432 # 8-byte Folded Spill + fst.d $fa0, $sp, 704 # 8-byte Folded Spill + fst.d $fa1, $sp, 768 # 8-byte Folded Spill + fld.d $fa0, $sp, 808 # 8-byte Folded Reload fmul.d $fa0, $fa4, $fa0 - fst.d $fa0, $sp, 504 # 8-byte Folded Spill + fst.d $fa0, $sp, 528 # 8-byte Folded Spill fsqrt.d $fa0, $fa0 fcmp.cor.d $fcc0, $fa0, $fa0 - fst.d $fs2, $sp, 528 # 8-byte Folded Spill - fst.d $fa3, $sp, 520 # 8-byte Folded Spill - fst.d $fa4, $sp, 512 # 8-byte Folded Spill + fst.d $fs2, $sp, 552 # 8-byte Folded Spill + fst.d $fa3, $sp, 544 # 8-byte Folded Spill + fst.d $fa4, $sp, 536 # 8-byte Folded Spill bceqz $fcc0, .LBB17_167 .LBB17_125: # %.split1347 # in Loop: Header=BB17_55 Depth=1 - fld.d $fa1, $sp, 640 # 8-byte Folded Reload - fld.d $fa2, $sp, 704 # 8-byte Folded Reload + fld.d $fa1, $sp, 664 # 8-byte Folded Reload + fld.d $fa2, $sp, 728 # 8-byte Folded Reload fadd.d $fa1, $fa2, $fa1 fmul.d $fa1, $fa1, $ft5 movgr2cf $fcc0, $s0 fsel $fa1, $fa2, $fa1, $fcc0 - fld.d $fa2, $sp, 400 # 8-byte Folded Reload + fld.d $fa2, $sp, 424 # 8-byte Folded Reload fadd.d $fa2, $ft11, $fa2 fmul.d $fs2, $fa2, $ft5 fdiv.d $fa2, $fa3, $fa4 fabs.d $fa2, $fa2 - fst.d $fa2, $sp, 400 # 8-byte Folded Spill + fst.d $fa2, $sp, 424 # 8-byte Folded Spill fadd.d $fa0, $fa2, $fa0 fsub.d $fa2, $fs4, $fs3 fsub.d $fa3, $fs3, $fs0 fsub.d $fs1, $fa1, $fs4 fmul.d $fa0, $fa0, $ft5 fmul.d $fa0, $ft3, $fa0 - fst.d $fs2, $sp, 696 # 8-byte Folded Spill + fst.d $fs2, $sp, 720 # 8-byte Folded Spill fdiv.d $fa0, $fa0, $fs2 fsub.d $fa1, $ft6, $fa0 fmul.d $fa0, $fa0, $fa1 @@ -7425,32 +7462,32 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $fs2, $fa2, $fa0 bge $s8, $a0, .LBB17_131 # %bb.126: # in Loop: Header=BB17_55 Depth=1 - slli.d $a0, $a2, 2 + slli.d $a0, $t4, 2 ldx.w $a3, $s3, $a0 - slli.d $a0, $t6, 2 + slli.d $a0, $a2, 2 ldx.w $a4, $s3, $a0 bge $a3, $a4, .LBB17_128 # %bb.127: # in Loop: Header=BB17_55 Depth=1 - ldx.w $a0, $t8, $a0 + ldx.w $a0, $t6, $a0 slli.d $a0, $a0, 3 fldx.d $fa0, $a1, $a0 - fld.d $fa1, $sp, 344 # 8-byte Folded Reload + fld.d $fa1, $sp, 352 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa2, $fa0, $ft5 b .LBB17_129 .p2align 4, , 16 .LBB17_128: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa2, $sp, 344 # 8-byte Folded Reload + fld.d $fa2, $sp, 352 # 8-byte Folded Reload .LBB17_129: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa3, $sp, 688 # 8-byte Folded Reload - fld.d $fa0, $sp, 792 # 8-byte Folded Reload + fld.d $fa3, $sp, 712 # 8-byte Folded Reload + fld.d $fa0, $sp, 808 # 8-byte Folded Reload fmul.d $fa1, $fa3, $fa0 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB17_175 .LBB17_130: # %.split1349 # in Loop: Header=BB17_55 Depth=1 - fld.d $fa1, $sp, 632 # 8-byte Folded Reload + fld.d $fa1, $sp, 656 # 8-byte Folded Reload fdiv.d $fa1, $fa1, $fa3 fabs.d $fa1, $fa1 fadd.d $fa0, $fa1, $fa0 @@ -7458,7 +7495,7 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fsub.d $fa2, $fs5, $fa2 fmul.d $fa0, $fa0, $ft5 fmul.d $fa0, $ft3, $fa0 - fld.d $fa3, $sp, 696 # 8-byte Folded Reload + fld.d $fa3, $sp, 720 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa3 fsub.d $fa3, $ft6, $fa0 fmul.d $fa0, $fa0, $fa3 @@ -7483,28 +7520,28 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $fa0, $fa0, $ft5 fmul.d $fs2, $fa0, $ft5 .LBB17_131: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa4, $sp, 552 # 8-byte Folded Reload + fld.d $fa4, $sp, 576 # 8-byte Folded Reload ld.w $a3, $s4, 0 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + ld.d $a0, $sp, 448 # 8-byte Folded Reload slli.d $a0, $a0, 2 ldx.w $a4, $s3, $a0 - fld.d $fa3, $sp, 744 # 8-byte Folded Reload - fst.d $fs2, $sp, 544 # 8-byte Folded Spill + fld.d $fa3, $sp, 768 # 8-byte Folded Reload + fst.d $fs2, $sp, 568 # 8-byte Folded Spill bge $a3, $a4, .LBB17_133 # %bb.132: # in Loop: Header=BB17_55 Depth=1 - ldx.w $a0, $t8, $a0 + ldx.w $a0, $t6, $a0 slli.d $a0, $a0, 3 fldx.d $fa0, $a1, $a0 fldx.d $fa1, $t3, $a0 fadd.d $fa0, $fa4, $fa0 fmul.d $fa4, $fa0, $ft5 - fld.d $fa0, $sp, 720 # 8-byte Folded Reload + fld.d $fa0, $sp, 744 # 8-byte Folded Reload fadd.d $fa0, $fa0, $fa1 fmul.d $fa0, $fa0, $ft5 - fst.d $fa0, $sp, 720 # 8-byte Folded Spill + fst.d $fa0, $sp, 744 # 8-byte Folded Spill .LBB17_133: # in Loop: Header=BB17_55 Depth=1 ld.w $s1, $s7, 0 - fld.d $fa0, $sp, 792 # 8-byte Folded Reload + fld.d $fa0, $sp, 808 # 8-byte Folded Reload fmul.d $fs2, $fa3, $fa0 fsqrt.d $fa0, $fs2 fcmp.cor.d $fcc0, $fa0, $fa0 @@ -7516,14 +7553,14 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $fa1, $fa1, $ft5 movgr2cf $fcc0, $s1 fsel $fa1, $fs3, $fa1, $fcc0 - fld.d $fa2, $sp, 432 # 8-byte Folded Reload + fld.d $fa2, $sp, 456 # 8-byte Folded Reload fadd.d $fa2, $ft11, $fa2 fmul.d $fs5, $fa2, $ft5 - fld.d $fa2, $sp, 784 # 8-byte Folded Reload + fld.d $fa2, $sp, 800 # 8-byte Folded Reload fdiv.d $fa2, $fa2, $fa3 fabs.d $fs1, $fa2 fadd.d $fa0, $fs1, $fa0 - fld.d $fa3, $sp, 704 # 8-byte Folded Reload + fld.d $fa3, $sp, 728 # 8-byte Folded Reload fsub.d $fa2, $fa3, $fs4 fsub.d $fs3, $fs4, $fa1 fsub.d $fa1, $fa4, $fa3 @@ -7550,41 +7587,40 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fsub.d $fa1, $ft6, $fa1 fmul.d $fa0, $fa0, $fa1 fmul.d $fa0, $fa2, $fa0 - fst.d $fa0, $sp, 704 # 8-byte Folded Spill + fst.d $fa0, $sp, 728 # 8-byte Folded Spill bge $s8, $a0, .LBB17_140 # %bb.135: # in Loop: Header=BB17_55 Depth=1 - ld.d $a0, $sp, 456 # 8-byte Folded Reload - slli.d $a0, $a0, 2 + slli.d $a0, $s2, 2 ldx.w $a3, $s3, $a0 - ld.d $a0, $sp, 440 # 8-byte Folded Reload + ld.d $a0, $sp, 464 # 8-byte Folded Reload slli.d $a0, $a0, 2 ldx.w $a4, $s3, $a0 bge $a3, $a4, .LBB17_137 # %bb.136: # in Loop: Header=BB17_55 Depth=1 - ldx.w $a0, $t8, $a0 + ldx.w $a0, $t6, $a0 slli.d $a0, $a0, 3 fldx.d $fa0, $a1, $a0 - fld.d $fa1, $sp, 352 # 8-byte Folded Reload + fld.d $fa1, $sp, 360 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa3, $fa0, $ft5 b .LBB17_138 .p2align 4, , 16 .LBB17_137: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa3, $sp, 352 # 8-byte Folded Reload + fld.d $fa3, $sp, 360 # 8-byte Folded Reload .LBB17_138: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa2, $sp, 664 # 8-byte Folded Reload - fld.d $fa0, $sp, 792 # 8-byte Folded Reload + fld.d $fa2, $sp, 704 # 8-byte Folded Reload + fld.d $fa0, $sp, 808 # 8-byte Folded Reload fmul.d $fa1, $fa2, $fa0 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB17_176 .LBB17_139: # %.split1353 # in Loop: Header=BB17_55 Depth=1 - fld.d $fa1, $sp, 624 # 8-byte Folded Reload + fld.d $fa1, $sp, 648 # 8-byte Folded Reload fdiv.d $fa1, $fa1, $fa2 fabs.d $fa1, $fa1 fadd.d $fa0, $fa1, $fa0 - fld.d $fa2, $sp, 640 # 8-byte Folded Reload + fld.d $fa2, $sp, 664 # 8-byte Folded Reload fsub.d $fa1, $fa2, $fs4 fsub.d $fa2, $fa3, $fa2 fmul.d $fa0, $fa0, $ft5 @@ -7609,34 +7645,34 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fsub.d $fa2, $ft6, $fa2 fmul.d $fa0, $fa0, $fa2 fmul.d $fa0, $fa1, $fa0 - fld.d $fa1, $sp, 704 # 8-byte Folded Reload + fld.d $fa1, $sp, 728 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa0, $fa0, $ft5 fmul.d $fa0, $fa0, $ft5 - fst.d $fa0, $sp, 704 # 8-byte Folded Spill + fst.d $fa0, $sp, 728 # 8-byte Folded Spill .LBB17_140: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa1, $sp, 504 # 8-byte Folded Reload + fld.d $fa1, $sp, 528 # 8-byte Folded Reload fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB17_169 .LBB17_141: # %.split1355 # in Loop: Header=BB17_55 Depth=1 - fld.d $fa1, $sp, 656 # 8-byte Folded Reload - fld.d $fa2, $sp, 736 # 8-byte Folded Reload + fld.d $fa1, $sp, 680 # 8-byte Folded Reload + fld.d $fa2, $sp, 760 # 8-byte Folded Reload fadd.d $fa1, $fa2, $fa1 fmul.d $fa1, $fa1, $ft5 movgr2cf $fcc0, $s0 fsel $fa1, $fa2, $fa1, $fcc0 - fld.d $fa2, $sp, 400 # 8-byte Folded Reload + fld.d $fa2, $sp, 424 # 8-byte Folded Reload fadd.d $fa0, $fa2, $fa0 - fld.d $fa3, $sp, 728 # 8-byte Folded Reload + fld.d $fa3, $sp, 752 # 8-byte Folded Reload fsub.d $fa2, $ft9, $fa3 - fld.d $fa4, $sp, 712 # 8-byte Folded Reload + fld.d $fa4, $sp, 736 # 8-byte Folded Reload fsub.d $fa3, $fa3, $fa4 fsub.d $fs3, $fa1, $ft9 fmul.d $fa0, $fa0, $ft5 fmul.d $fa0, $ft3, $fa0 - fld.d $fa1, $sp, 696 # 8-byte Folded Reload + fld.d $fa1, $sp, 720 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa1 fsub.d $fa1, $ft6, $fa0 fmul.d $fa0, $fa0, $fa1 @@ -7658,44 +7694,44 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fsub.d $fa1, $ft6, $fa1 fmul.d $fa0, $fa1, $fa0 fmul.d $fa0, $fa2, $fa0 - fst.d $fa0, $sp, 712 # 8-byte Folded Spill + fst.d $fa0, $sp, 736 # 8-byte Folded Spill bge $s8, $a0, .LBB17_147 # %bb.142: # in Loop: Header=BB17_55 Depth=1 - slli.d $a0, $a2, 2 + slli.d $a0, $t4, 2 ldx.w $a3, $s3, $a0 - slli.d $a0, $t6, 2 + slli.d $a0, $a2, 2 ldx.w $a4, $s3, $a0 bge $a3, $a4, .LBB17_144 # %bb.143: # in Loop: Header=BB17_55 Depth=1 - ldx.w $a0, $t8, $a0 + ldx.w $a0, $t6, $a0 slli.d $a0, $a0, 3 fldx.d $fa0, $t3, $a0 - fld.d $fa1, $sp, 360 # 8-byte Folded Reload + fld.d $fa1, $sp, 368 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa3, $fa0, $ft5 b .LBB17_145 .p2align 4, , 16 .LBB17_144: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa3, $sp, 360 # 8-byte Folded Reload + fld.d $fa3, $sp, 368 # 8-byte Folded Reload .LBB17_145: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa2, $sp, 688 # 8-byte Folded Reload - fld.d $fa0, $sp, 792 # 8-byte Folded Reload + fld.d $fa2, $sp, 712 # 8-byte Folded Reload + fld.d $fa0, $sp, 808 # 8-byte Folded Reload fmul.d $fa1, $fa2, $fa0 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB17_177 .LBB17_146: # %.split1357 # in Loop: Header=BB17_55 Depth=1 - fld.d $fa1, $sp, 632 # 8-byte Folded Reload + fld.d $fa1, $sp, 656 # 8-byte Folded Reload fdiv.d $fa1, $fa1, $fa2 fabs.d $fa1, $fa1 fadd.d $fa0, $fa1, $fa0 - fld.d $fa2, $sp, 648 # 8-byte Folded Reload + fld.d $fa2, $sp, 672 # 8-byte Folded Reload fsub.d $fa1, $ft9, $fa2 fsub.d $fa2, $fa2, $fa3 fmul.d $fa0, $fa0, $ft5 fmul.d $fa0, $ft3, $fa0 - fld.d $fa3, $sp, 696 # 8-byte Folded Reload + fld.d $fa3, $sp, 720 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa3 fsub.d $fa3, $ft6, $fa0 fmul.d $fa0, $fa0, $fa3 @@ -7716,29 +7752,29 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fsub.d $fa2, $ft6, $fa2 fmul.d $fa0, $fa0, $fa2 fmul.d $fa0, $fa1, $fa0 - fld.d $fa1, $sp, 712 # 8-byte Folded Reload + fld.d $fa1, $sp, 736 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa0, $fa0, $ft5 fmul.d $fa0, $fa0, $ft5 - fst.d $fa0, $sp, 712 # 8-byte Folded Spill + fst.d $fa0, $sp, 736 # 8-byte Folded Spill .LBB17_147: # in Loop: Header=BB17_55 Depth=1 fsqrt.d $fa0, $fs2 fcmp.cor.d $fcc0, $fa0, $fa0 - fld.d $fs3, $sp, 560 # 8-byte Folded Reload + fld.d $fs3, $sp, 640 # 8-byte Folded Reload bceqz $fcc0, .LBB17_170 .LBB17_148: # %.split1359 # in Loop: Header=BB17_55 Depth=1 - fld.d $fa1, $sp, 648 # 8-byte Folded Reload - fld.d $fa2, $sp, 728 # 8-byte Folded Reload + fld.d $fa1, $sp, 672 # 8-byte Folded Reload + fld.d $fa2, $sp, 752 # 8-byte Folded Reload fadd.d $fa1, $fa2, $fa1 fmul.d $fa1, $fa1, $ft5 movgr2cf $fcc0, $s1 fsel $fa1, $fa2, $fa1, $fcc0 fadd.d $fa0, $fs1, $fa0 - fld.d $fa3, $sp, 736 # 8-byte Folded Reload + fld.d $fa3, $sp, 760 # 8-byte Folded Reload fsub.d $fa2, $fa3, $ft9 fsub.d $fs1, $ft9, $fa1 - fld.d $fa1, $sp, 720 # 8-byte Folded Reload + fld.d $fa1, $sp, 744 # 8-byte Folded Reload fsub.d $fa1, $fa1, $fa3 fmul.d $fa0, $fa0, $ft5 fmul.d $fa0, $ft3, $fa0 @@ -7765,43 +7801,42 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $fs2, $fa2, $fa0 bge $s8, $a0, .LBB17_151 # %bb.149: # in Loop: Header=BB17_55 Depth=1 - ld.d $a0, $sp, 456 # 8-byte Folded Reload - slli.d $a0, $a0, 2 + slli.d $a0, $s2, 2 ldx.w $a3, $s3, $a0 - ld.d $a0, $sp, 440 # 8-byte Folded Reload + ld.d $a0, $sp, 464 # 8-byte Folded Reload slli.d $a0, $a0, 2 ldx.w $a4, $s3, $a0 - ld.d $s1, $sp, 496 # 8-byte Folded Reload + ld.d $s1, $sp, 520 # 8-byte Folded Reload bge $a3, $a4, .LBB17_152 # %bb.150: # in Loop: Header=BB17_55 Depth=1 - ldx.w $a0, $t8, $a0 + ldx.w $a0, $t6, $a0 slli.d $a0, $a0, 3 fldx.d $fa0, $t3, $a0 - fld.d $fa1, $sp, 368 # 8-byte Folded Reload + fld.d $fa1, $sp, 376 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fmul.d $fa3, $fa0, $ft5 b .LBB17_153 .p2align 4, , 16 .LBB17_151: # in Loop: Header=BB17_55 Depth=1 - ld.d $s1, $sp, 496 # 8-byte Folded Reload + ld.d $s1, $sp, 520 # 8-byte Folded Reload b .LBB17_155 .p2align 4, , 16 .LBB17_152: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa3, $sp, 368 # 8-byte Folded Reload + fld.d $fa3, $sp, 376 # 8-byte Folded Reload .LBB17_153: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa2, $sp, 664 # 8-byte Folded Reload - fld.d $fa0, $sp, 792 # 8-byte Folded Reload + fld.d $fa2, $sp, 704 # 8-byte Folded Reload + fld.d $fa0, $sp, 808 # 8-byte Folded Reload fmul.d $fa1, $fa2, $fa0 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB17_178 .LBB17_154: # %.split1361 # in Loop: Header=BB17_55 Depth=1 - fld.d $fa1, $sp, 624 # 8-byte Folded Reload + fld.d $fa1, $sp, 648 # 8-byte Folded Reload fdiv.d $fa1, $fa1, $fa2 fabs.d $fa1, $fa1 fadd.d $fa0, $fa1, $fa0 - fld.d $fa2, $sp, 656 # 8-byte Folded Reload + fld.d $fa2, $sp, 680 # 8-byte Folded Reload fsub.d $fa1, $fa2, $ft9 fsub.d $fa2, $fa3, $fa2 fmul.d $fa0, $fa0, $ft5 @@ -7831,17 +7866,18 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fmul.d $fs2, $fa0, $ft5 .LBB17_155: # in Loop: Header=BB17_55 Depth=1 fmul.d $fa0, $ft7, $ft7 - ld.d $a0, $sp, 296 # 8-byte Folded Reload - fld.d $fa2, $a0, %pc_lo12(.LCPI17_0) fdiv.d $fa0, $fa0, $fa5 fmul.d $fa1, $fa5, $fa5 + ld.d $a0, $sp, 232 # 8-byte Folded Reload + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fa2, $a0 ld.w $a0, $t5, 0 fmul.d $fa1, $fa1, $fa2 fadd.d $fa1, $fa1, $fa0 fmul.d $fa0, $ft7, $fa7 fdiv.d $fa0, $fa0, $fa5 - fld.d $fs0, $sp, 536 # 8-byte Folded Reload - fld.d $fs1, $sp, 520 # 8-byte Folded Reload + fld.d $fs0, $sp, 560 # 8-byte Folded Reload + fld.d $fs1, $sp, 544 # 8-byte Folded Reload bge $s8, $a0, .LBB17_157 # %bb.156: # in Loop: Header=BB17_55 Depth=1 fadd.d $fa3, $ft7, $ft12 @@ -7858,12 +7894,13 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fadd.d $fa0, $fa0, $fa3 fmul.d $fa0, $fa0, $ft5 .LBB17_157: # in Loop: Header=BB17_55 Depth=1 - move $t5, $t8 - fld.d $ft12, $sp, 784 # 8-byte Folded Reload + move $t5, $t6 + fld.d $ft12, $sp, 800 # 8-byte Folded Reload fmul.d $fa3, $ft8, $ft8 fdiv.d $fa3, $fa3, $fa6 fmul.d $fa4, $fa6, $fa6 - ld.w $a0, $ra, 0 + ld.d $a0, $sp, 592 # 8-byte Folded Reload + ld.w $a0, $a0, 0 fmul.d $fa4, $fa4, $fa2 fadd.d $fa4, $fa4, $fa3 fmul.d $fa3, $ft8, $ft0 @@ -7884,9 +7921,9 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fadd.d $fa3, $fa3, $fa5 fmul.d $fa3, $fa3, $ft5 .LBB17_159: # in Loop: Header=BB17_55 Depth=1 - fld.d $fa5, $sp, 416 # 8-byte Folded Reload + fld.d $fa5, $sp, 440 # 8-byte Folded Reload fmul.d $fa5, $fa5, $fs1 - fld.d $fa7, $sp, 512 # 8-byte Folded Reload + fld.d $fa7, $sp, 536 # 8-byte Folded Reload fdiv.d $fa5, $fa5, $fa7 fmul.d $fa6, $fs1, $fs1 ld.w $a0, $s7, 0 @@ -7896,12 +7933,12 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fadd.d $fa6, $fa7, $fa6 bge $s8, $a0, .LBB17_161 # %bb.160: # in Loop: Header=BB17_55 Depth=1 - fld.d $ft1, $sp, 632 # 8-byte Folded Reload + fld.d $ft1, $sp, 656 # 8-byte Folded Reload fadd.d $fa7, $fs1, $ft1 fmul.d $fs1, $fa7, $ft5 - fld.d $fa7, $sp, 392 # 8-byte Folded Reload + fld.d $fa7, $sp, 416 # 8-byte Folded Reload fmul.d $fa7, $fa7, $ft1 - fld.d $ft0, $sp, 688 # 8-byte Folded Reload + fld.d $ft0, $sp, 712 # 8-byte Folded Reload fdiv.d $fa7, $fa7, $ft0 fadd.d $fa5, $fa5, $fa7 fmul.d $fa5, $fa5, $ft5 @@ -7913,12 +7950,11 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fadd.d $fa6, $fa6, $fa7 fmul.d $fa6, $fa6, $ft5 .LBB17_161: # in Loop: Header=BB17_55 Depth=1 + ld.d $a4, $sp, 488 # 8-byte Folded Reload ld.d $a6, $sp, 480 # 8-byte Folded Reload - ld.d $a7, $sp, 472 # 8-byte Folded Reload - ld.d $s7, $sp, 464 # 8-byte Folded Reload - fld.d $fa7, $sp, 408 # 8-byte Folded Reload + fld.d $fa7, $sp, 432 # 8-byte Folded Reload fmul.d $fa7, $fa7, $ft12 - fld.d $ft1, $sp, 744 # 8-byte Folded Reload + fld.d $ft1, $sp, 768 # 8-byte Folded Reload fdiv.d $fa7, $fa7, $ft1 fmul.d $ft0, $ft12, $ft12 ld.w $a0, $s4, 0 @@ -7928,11 +7964,11 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fadd.d $ft0, $ft1, $ft0 bge $s8, $a0, .LBB17_54 # %bb.162: # in Loop: Header=BB17_55 Depth=1 - fld.d $ft13, $sp, 624 # 8-byte Folded Reload + fld.d $ft13, $sp, 648 # 8-byte Folded Reload fadd.d $ft1, $ft12, $ft13 fmul.d $ft12, $ft1, $ft5 fmul.d $ft1, $fs7, $ft13 - fld.d $ft2, $sp, 664 # 8-byte Folded Reload + fld.d $ft2, $sp, 704 # 8-byte Folded Reload fdiv.d $ft1, $ft1, $ft2 fadd.d $fa7, $fa7, $ft1 fmul.d $fa7, $fa7, $ft5 @@ -7946,767 +7982,785 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe b .LBB17_54 .LBB17_163: # %call.sqrt # in Loop: Header=BB17_55 Depth=1 - fld.d $fa0, $sp, 560 # 8-byte Folded Reload - st.d $a1, $sp, 448 # 8-byte Folded Spill - ld.d $a0, $sp, 616 # 8-byte Folded Reload - st.d $a0, $sp, 616 # 8-byte Folded Spill + fld.d $fa0, $sp, 552 # 8-byte Folded Reload + st.d $a1, $sp, 544 # 8-byte Folded Spill + st.d $t8, $sp, 24 # 8-byte Folded Spill + st.d $ra, $sp, 72 # 8-byte Folded Spill fmov.d $fs7, $ft4 - fst.d $ft7, $sp, 48 # 8-byte Folded Spill - fst.d $ft8, $sp, 24 # 8-byte Folded Spill - fst.d $ft9, $sp, 64 # 8-byte Folded Spill - fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 152 # 8-byte Folded Spill - fst.d $fa5, $sp, 80 # 8-byte Folded Spill - fst.d $fa6, $sp, 144 # 8-byte Folded Spill - fst.d $fa7, $sp, 72 # 8-byte Folded Spill - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill - st.d $a2, $sp, 32 # 8-byte Folded Spill - st.d $t6, $sp, 384 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill + fst.d $ft8, $sp, 16 # 8-byte Folded Spill + fst.d $ft9, $sp, 152 # 8-byte Folded Spill + fst.d $ft10, $sp, 40 # 8-byte Folded Spill + fst.d $ft11, $sp, 144 # 8-byte Folded Spill + fst.d $fa5, $sp, 64 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill + fst.d $fa7, $sp, 56 # 8-byte Folded Spill + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill + st.d $t4, $sp, 400 # 8-byte Folded Spill + st.d $a2, $sp, 48 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $t6, $sp, 384 # 8-byte Folded Reload - ld.d $a2, $sp, 32 # 8-byte Folded Reload - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload - fld.d $fa7, $sp, 72 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload - fld.d $fa5, $sp, 80 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload - fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $ft9, $sp, 64 # 8-byte Folded Reload - fld.d $ft8, $sp, 24 # 8-byte Folded Reload - fld.d $ft7, $sp, 48 # 8-byte Folded Reload + ld.d $a2, $sp, 48 # 8-byte Folded Reload + ld.d $t4, $sp, 400 # 8-byte Folded Reload + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload + fld.d $fa7, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload + fld.d $fa5, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload + fld.d $ft10, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 152 # 8-byte Folded Reload + fld.d $ft8, $sp, 16 # 8-byte Folded Reload + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fmov.d $ft4, $fs7 - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + ld.d $t3, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + ld.d $t8, $sp, 24 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload - ld.d $a1, $sp, 448 # 8-byte Folded Reload + ld.d $a1, $sp, 544 # 8-byte Folded Reload b .LBB17_81 .LBB17_164: # %call.sqrt1336 # in Loop: Header=BB17_55 Depth=1 - fld.d $fa0, $sp, 528 # 8-byte Folded Reload + fld.d $fa0, $sp, 544 # 8-byte Folded Reload move $s1, $a1 - ld.d $a0, $sp, 616 # 8-byte Folded Reload - st.d $a0, $sp, 616 # 8-byte Folded Spill + st.d $t8, $sp, 24 # 8-byte Folded Spill + st.d $ra, $sp, 72 # 8-byte Folded Spill + move $s7, $a5 fmov.d $fs2, $ft4 - fst.d $ft7, $sp, 48 # 8-byte Folded Spill - fst.d $ft8, $sp, 24 # 8-byte Folded Spill - fst.d $ft9, $sp, 64 # 8-byte Folded Spill - fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 152 # 8-byte Folded Spill - fst.d $fa5, $sp, 80 # 8-byte Folded Spill - fst.d $fa6, $sp, 144 # 8-byte Folded Spill - fst.d $fa7, $sp, 72 # 8-byte Folded Spill - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill - st.d $a2, $sp, 32 # 8-byte Folded Spill - move $s5, $t6 - st.d $t5, $sp, 40 # 8-byte Folded Spill - st.d $ra, $sp, 16 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill + fst.d $ft8, $sp, 16 # 8-byte Folded Spill + fst.d $ft9, $sp, 152 # 8-byte Folded Spill + fst.d $ft10, $sp, 40 # 8-byte Folded Spill + fst.d $ft11, $sp, 144 # 8-byte Folded Spill + fst.d $fa5, $sp, 64 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill + fst.d $fa7, $sp, 56 # 8-byte Folded Spill + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill + move $s5, $t4 + st.d $a2, $sp, 48 # 8-byte Folded Spill + st.d $t5, $sp, 32 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $ra, $sp, 16 # 8-byte Folded Reload - ld.d $t5, $sp, 40 # 8-byte Folded Reload - move $t6, $s5 - ld.d $a2, $sp, 32 # 8-byte Folded Reload - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload - fld.d $fa7, $sp, 72 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload - fld.d $fa5, $sp, 80 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload - fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $ft9, $sp, 64 # 8-byte Folded Reload - fld.d $ft8, $sp, 24 # 8-byte Folded Reload - fld.d $ft7, $sp, 48 # 8-byte Folded Reload + ld.d $t5, $sp, 32 # 8-byte Folded Reload + ld.d $a2, $sp, 48 # 8-byte Folded Reload + move $t4, $s5 + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload + fld.d $fa7, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload + fld.d $fa5, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload + fld.d $ft10, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 152 # 8-byte Folded Reload + fld.d $ft8, $sp, 16 # 8-byte Folded Reload + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fmov.d $ft4, $fs2 - ld.d $t1, $sp, 600 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $a7, $sp, 256 # 8-byte Folded Reload - ld.d $a6, $sp, 264 # 8-byte Folded Reload - ld.d $a5, $sp, 272 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload + ld.d $t0, $sp, 280 # 8-byte Folded Reload + ld.d $a7, $sp, 288 # 8-byte Folded Reload + ld.d $a6, $sp, 296 # 8-byte Folded Reload + move $a5, $s7 + ld.d $s7, $sp, 592 # 8-byte Folded Reload + ld.d $t3, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + ld.d $t8, $sp, 24 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload - ld.d $s5, $sp, 592 # 8-byte Folded Reload + ld.d $s5, $sp, 616 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload move $a1, $s1 b .LBB17_91 .LBB17_165: # %call.sqrt1340 # in Loop: Header=BB17_55 Depth=1 fmov.d $fa0, $fa1 move $s1, $a1 - ld.d $a0, $sp, 616 # 8-byte Folded Reload - st.d $a0, $sp, 616 # 8-byte Folded Spill + st.d $t8, $sp, 24 # 8-byte Folded Spill + st.d $ra, $sp, 72 # 8-byte Folded Spill + move $s7, $a5 fmov.d $fs2, $ft4 - fmov.d $fs3, $ft7 - fst.d $ft8, $sp, 24 # 8-byte Folded Spill - fmov.d $fs5, $ft9 - fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 152 # 8-byte Folded Spill - fst.d $fa5, $sp, 80 # 8-byte Folded Spill - fst.d $fa6, $sp, 144 # 8-byte Folded Spill - fst.d $fa7, $sp, 72 # 8-byte Folded Spill - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill - st.d $a2, $sp, 32 # 8-byte Folded Spill - move $s5, $t6 - st.d $t5, $sp, 40 # 8-byte Folded Spill - st.d $ra, $sp, 16 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill + fmov.d $fs5, $ft8 + fst.d $ft9, $sp, 152 # 8-byte Folded Spill + fmov.d $fs3, $ft10 + fst.d $ft11, $sp, 144 # 8-byte Folded Spill + fst.d $fa5, $sp, 64 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill + fst.d $fa7, $sp, 56 # 8-byte Folded Spill + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill + move $s5, $t4 + st.d $a2, $sp, 48 # 8-byte Folded Spill + st.d $t5, $sp, 32 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $ra, $sp, 16 # 8-byte Folded Reload - ld.d $t5, $sp, 40 # 8-byte Folded Reload - move $t6, $s5 - ld.d $a2, $sp, 32 # 8-byte Folded Reload - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload - fld.d $fa7, $sp, 72 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload - fld.d $fa5, $sp, 80 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload - fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fmov.d $ft9, $fs5 - fld.d $ft8, $sp, 24 # 8-byte Folded Reload - fmov.d $ft7, $fs3 + ld.d $t5, $sp, 32 # 8-byte Folded Reload + ld.d $a2, $sp, 48 # 8-byte Folded Reload + move $t4, $s5 + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload + fld.d $fa7, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload + fld.d $fa5, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload + fmov.d $ft10, $fs3 + fld.d $ft9, $sp, 152 # 8-byte Folded Reload + fmov.d $ft8, $fs5 + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fmov.d $ft4, $fs2 - ld.d $t1, $sp, 600 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $a7, $sp, 256 # 8-byte Folded Reload - ld.d $a6, $sp, 264 # 8-byte Folded Reload - ld.d $a5, $sp, 272 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload + ld.d $t0, $sp, 280 # 8-byte Folded Reload + ld.d $a7, $sp, 288 # 8-byte Folded Reload + ld.d $a6, $sp, 296 # 8-byte Folded Reload + move $a5, $s7 + ld.d $s7, $sp, 592 # 8-byte Folded Reload + ld.d $t3, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + ld.d $t8, $sp, 24 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload - ld.d $s5, $sp, 592 # 8-byte Folded Reload + ld.d $s5, $sp, 616 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload move $a1, $s1 b .LBB17_98 .LBB17_166: # %call.sqrt1344 # in Loop: Header=BB17_55 Depth=1 fmov.d $fa0, $fa1 move $s1, $a1 + move $s5, $t8 + st.d $ra, $sp, 72 # 8-byte Folded Spill + move $s7, $a5 fmov.d $fs2, $ft4 - fst.d $ft7, $sp, 48 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill fmov.d $fs5, $ft8 - fst.d $ft9, $sp, 64 # 8-byte Folded Spill + fst.d $ft9, $sp, 152 # 8-byte Folded Spill fmov.d $fs3, $ft10 - fst.d $ft11, $sp, 152 # 8-byte Folded Spill - fst.d $fa5, $sp, 80 # 8-byte Folded Spill - fst.d $fa6, $sp, 144 # 8-byte Folded Spill - fst.d $fa7, $sp, 72 # 8-byte Folded Spill - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill - st.d $a2, $sp, 32 # 8-byte Folded Spill - move $s2, $t6 - st.d $t5, $sp, 40 # 8-byte Folded Spill - move $s5, $ra + fst.d $ft11, $sp, 144 # 8-byte Folded Spill + fst.d $fa5, $sp, 64 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill + fst.d $fa7, $sp, 56 # 8-byte Folded Spill + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill + st.d $t4, $sp, 400 # 8-byte Folded Spill + st.d $a2, $sp, 48 # 8-byte Folded Spill + move $s2, $t5 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - move $ra, $s5 - ld.d $t5, $sp, 40 # 8-byte Folded Reload - move $t6, $s2 - ld.d $a2, $sp, 32 # 8-byte Folded Reload - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload - fld.d $fa7, $sp, 72 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload - fld.d $fa5, $sp, 80 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload + move $t5, $s2 + ld.d $a2, $sp, 48 # 8-byte Folded Reload + ld.d $t4, $sp, 400 # 8-byte Folded Reload + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload + fld.d $fa7, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload + fld.d $fa5, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload fmov.d $ft10, $fs3 - fld.d $ft9, $sp, 64 # 8-byte Folded Reload + fld.d $ft9, $sp, 152 # 8-byte Folded Reload fmov.d $ft8, $fs5 - fld.d $fs5, $sp, 576 # 8-byte Folded Reload - fld.d $ft7, $sp, 48 # 8-byte Folded Reload + fld.d $fs5, $sp, 600 # 8-byte Folded Reload + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fmov.d $ft4, $fs2 - ld.d $t1, $sp, 600 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $a7, $sp, 256 # 8-byte Folded Reload - ld.d $a6, $sp, 264 # 8-byte Folded Reload - ld.d $a5, $sp, 272 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $s5, $sp, 592 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload + ld.d $t0, $sp, 280 # 8-byte Folded Reload + ld.d $a7, $sp, 288 # 8-byte Folded Reload + ld.d $a6, $sp, 296 # 8-byte Folded Reload + move $a5, $s7 + ld.d $s7, $sp, 592 # 8-byte Folded Reload + ld.d $t3, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + move $t8, $s5 + ld.d $s5, $sp, 616 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload - ld.d $s2, $sp, 584 # 8-byte Folded Reload + ld.d $s2, $sp, 608 # 8-byte Folded Reload move $a1, $s1 b .LBB17_106 .LBB17_167: # %call.sqrt1348 # in Loop: Header=BB17_55 Depth=1 - fld.d $fa0, $sp, 504 # 8-byte Folded Reload + fld.d $fa0, $sp, 528 # 8-byte Folded Reload move $s1, $a1 - fst.d $fs0, $sp, 544 # 8-byte Folded Spill + move $s5, $t8 + st.d $ra, $sp, 72 # 8-byte Folded Spill + st.d $a5, $sp, 408 # 8-byte Folded Spill + fst.d $fs0, $sp, 568 # 8-byte Folded Spill fmov.d $fs0, $ft4 - fst.d $ft7, $sp, 48 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill fmov.d $fs2, $ft8 - fst.d $ft9, $sp, 64 # 8-byte Folded Spill + fst.d $ft9, $sp, 152 # 8-byte Folded Spill fmov.d $fs5, $ft10 - fst.d $ft11, $sp, 152 # 8-byte Folded Spill + fst.d $ft11, $sp, 144 # 8-byte Folded Spill fmov.d $fs1, $fa5 - fst.d $fa6, $sp, 144 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill fmov.d $fs3, $fa7 - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill - move $s2, $a2 - st.d $t6, $sp, 384 # 8-byte Folded Spill - st.d $t5, $sp, 40 # 8-byte Folded Spill - move $s5, $ra + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill + move $s2, $t4 + st.d $a2, $sp, 48 # 8-byte Folded Spill + st.d $t5, $sp, 32 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fld.d $fa4, $sp, 512 # 8-byte Folded Reload - move $ra, $s5 - ld.d $t5, $sp, 40 # 8-byte Folded Reload - ld.d $t6, $sp, 384 # 8-byte Folded Reload - move $a2, $s2 - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload + fld.d $fa4, $sp, 536 # 8-byte Folded Reload + ld.d $t5, $sp, 32 # 8-byte Folded Reload + ld.d $a2, $sp, 48 # 8-byte Folded Reload + move $t4, $s2 + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload fmov.d $fa7, $fs3 - fld.d $fs3, $sp, 568 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload + fld.d $fs3, $sp, 584 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload fmov.d $fa5, $fs1 - fld.d $fa3, $sp, 520 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload + fld.d $fa3, $sp, 544 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload fmov.d $ft10, $fs5 - fld.d $fs5, $sp, 576 # 8-byte Folded Reload - fld.d $ft9, $sp, 64 # 8-byte Folded Reload + fld.d $fs5, $sp, 600 # 8-byte Folded Reload + fld.d $ft9, $sp, 152 # 8-byte Folded Reload fmov.d $ft8, $fs2 - fld.d $ft7, $sp, 48 # 8-byte Folded Reload + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fmov.d $ft4, $fs0 - fld.d $fs0, $sp, 544 # 8-byte Folded Reload - ld.d $t1, $sp, 600 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $s5, $sp, 592 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + fld.d $fs0, $sp, 568 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload + ld.d $a5, $sp, 408 # 8-byte Folded Reload + ld.d $t3, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + move $t8, $s5 + ld.d $s5, $sp, 616 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload - ld.d $s2, $sp, 584 # 8-byte Folded Reload + ld.d $s2, $sp, 608 # 8-byte Folded Reload move $a1, $s1 b .LBB17_125 .LBB17_168: # %call.sqrt1352 # in Loop: Header=BB17_55 Depth=1 fmov.d $fa0, $fs2 move $s2, $a1 - ld.d $a0, $sp, 616 # 8-byte Folded Reload - st.d $a0, $sp, 616 # 8-byte Folded Spill + st.d $t8, $sp, 24 # 8-byte Folded Spill + st.d $ra, $sp, 72 # 8-byte Folded Spill + st.d $a5, $sp, 408 # 8-byte Folded Spill fmov.d $fs1, $ft4 - fmov.d $fs3, $ft7 - fst.d $ft8, $sp, 24 # 8-byte Folded Spill - fmov.d $fs5, $ft9 - fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 152 # 8-byte Folded Spill - fst.d $fa5, $sp, 80 # 8-byte Folded Spill - fst.d $fa6, $sp, 144 # 8-byte Folded Spill - fst.d $fa7, $sp, 72 # 8-byte Folded Spill - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill - st.d $a2, $sp, 32 # 8-byte Folded Spill - move $s5, $t6 - st.d $t5, $sp, 40 # 8-byte Folded Spill - st.d $ra, $sp, 16 # 8-byte Folded Spill - fst.d $fa4, $sp, 552 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill + fmov.d $fs5, $ft8 + fst.d $ft9, $sp, 152 # 8-byte Folded Spill + fmov.d $fs3, $ft10 + fst.d $ft11, $sp, 144 # 8-byte Folded Spill + fst.d $fa5, $sp, 64 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill + fst.d $fa7, $sp, 56 # 8-byte Folded Spill + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill + move $s5, $t4 + st.d $a2, $sp, 48 # 8-byte Folded Spill + st.d $t5, $sp, 32 # 8-byte Folded Spill + fst.d $fa4, $sp, 576 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fld.d $fa4, $sp, 552 # 8-byte Folded Reload - ld.d $ra, $sp, 16 # 8-byte Folded Reload - ld.d $t5, $sp, 40 # 8-byte Folded Reload - move $t6, $s5 - ld.d $a2, $sp, 32 # 8-byte Folded Reload - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload - fld.d $fa7, $sp, 72 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload - fld.d $fa5, $sp, 80 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload - fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fmov.d $ft9, $fs5 - fld.d $fs5, $sp, 576 # 8-byte Folded Reload - fld.d $ft8, $sp, 24 # 8-byte Folded Reload - fmov.d $ft7, $fs3 - fld.d $fs3, $sp, 568 # 8-byte Folded Reload + fld.d $fa4, $sp, 576 # 8-byte Folded Reload + ld.d $t5, $sp, 32 # 8-byte Folded Reload + ld.d $a2, $sp, 48 # 8-byte Folded Reload + move $t4, $s5 + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload + fld.d $fa7, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload + fld.d $fa5, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload + fmov.d $ft10, $fs3 + fld.d $fs3, $sp, 584 # 8-byte Folded Reload + fld.d $ft9, $sp, 152 # 8-byte Folded Reload + fmov.d $ft8, $fs5 + fld.d $fs5, $sp, 600 # 8-byte Folded Reload + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fmov.d $ft4, $fs1 - fld.d $fa3, $sp, 744 # 8-byte Folded Reload - ld.d $t1, $sp, 600 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + fld.d $fa3, $sp, 768 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload + ld.d $a5, $sp, 408 # 8-byte Folded Reload + ld.d $t3, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + ld.d $t8, $sp, 24 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload - ld.d $s5, $sp, 592 # 8-byte Folded Reload + ld.d $s5, $sp, 616 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload move $a1, $s2 - ld.d $s2, $sp, 584 # 8-byte Folded Reload + ld.d $s2, $sp, 608 # 8-byte Folded Reload b .LBB17_134 .LBB17_169: # %call.sqrt1356 # in Loop: Header=BB17_55 Depth=1 fmov.d $fa0, $fa1 move $s2, $a1 - ld.d $a0, $sp, 616 # 8-byte Folded Reload - st.d $a0, $sp, 616 # 8-byte Folded Spill + st.d $t8, $sp, 24 # 8-byte Folded Spill + st.d $ra, $sp, 72 # 8-byte Folded Spill + st.d $a5, $sp, 408 # 8-byte Folded Spill fmov.d $fs3, $ft4 - fst.d $ft7, $sp, 48 # 8-byte Folded Spill - fst.d $ft8, $sp, 24 # 8-byte Folded Spill - fst.d $ft9, $sp, 64 # 8-byte Folded Spill - fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 152 # 8-byte Folded Spill - fst.d $fa5, $sp, 80 # 8-byte Folded Spill - fst.d $fa6, $sp, 144 # 8-byte Folded Spill - fst.d $fa7, $sp, 72 # 8-byte Folded Spill - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill - st.d $a2, $sp, 32 # 8-byte Folded Spill - move $s5, $t6 - st.d $t5, $sp, 40 # 8-byte Folded Spill - st.d $ra, $sp, 16 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill + fst.d $ft8, $sp, 16 # 8-byte Folded Spill + fst.d $ft9, $sp, 152 # 8-byte Folded Spill + fst.d $ft10, $sp, 40 # 8-byte Folded Spill + fst.d $ft11, $sp, 144 # 8-byte Folded Spill + fst.d $fa5, $sp, 64 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill + fst.d $fa7, $sp, 56 # 8-byte Folded Spill + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill + move $s5, $t4 + st.d $a2, $sp, 48 # 8-byte Folded Spill + st.d $t5, $sp, 32 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $ra, $sp, 16 # 8-byte Folded Reload - ld.d $t5, $sp, 40 # 8-byte Folded Reload - move $t6, $s5 - ld.d $a2, $sp, 32 # 8-byte Folded Reload - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload - fld.d $fa7, $sp, 72 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload - fld.d $fa5, $sp, 80 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload - fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $ft9, $sp, 64 # 8-byte Folded Reload - fld.d $ft8, $sp, 24 # 8-byte Folded Reload - fld.d $ft7, $sp, 48 # 8-byte Folded Reload + ld.d $t5, $sp, 32 # 8-byte Folded Reload + ld.d $a2, $sp, 48 # 8-byte Folded Reload + move $t4, $s5 + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload + fld.d $fa7, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload + fld.d $fa5, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload + fld.d $ft10, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 152 # 8-byte Folded Reload + fld.d $ft8, $sp, 16 # 8-byte Folded Reload + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fmov.d $ft4, $fs3 - ld.d $t1, $sp, 600 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload + ld.d $a5, $sp, 408 # 8-byte Folded Reload + ld.d $t3, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + ld.d $t8, $sp, 24 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload - ld.d $s5, $sp, 592 # 8-byte Folded Reload + ld.d $s5, $sp, 616 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload move $a1, $s2 - ld.d $s2, $sp, 584 # 8-byte Folded Reload + ld.d $s2, $sp, 608 # 8-byte Folded Reload b .LBB17_141 .LBB17_170: # %call.sqrt1360 # in Loop: Header=BB17_55 Depth=1 fmov.d $fa0, $fs2 move $s0, $a1 + move $s5, $t8 + st.d $ra, $sp, 72 # 8-byte Folded Spill + st.d $a5, $sp, 408 # 8-byte Folded Spill fmov.d $fs2, $ft4 - fst.d $ft7, $sp, 48 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill fmov.d $fs3, $ft8 - fst.d $ft9, $sp, 64 # 8-byte Folded Spill - fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 152 # 8-byte Folded Spill - fst.d $fa5, $sp, 80 # 8-byte Folded Spill - fst.d $fa6, $sp, 144 # 8-byte Folded Spill - fst.d $fa7, $sp, 72 # 8-byte Folded Spill - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill + fst.d $ft9, $sp, 152 # 8-byte Folded Spill + fst.d $ft10, $sp, 40 # 8-byte Folded Spill + fst.d $ft11, $sp, 144 # 8-byte Folded Spill + fst.d $fa5, $sp, 64 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill + fst.d $fa7, $sp, 56 # 8-byte Folded Spill + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill move $s2, $t5 - move $s5, $ra pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - move $ra, $s5 move $t5, $s2 - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload - fld.d $fa7, $sp, 72 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload - fld.d $fa5, $sp, 80 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload - fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $ft9, $sp, 64 # 8-byte Folded Reload + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload + fld.d $fa7, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload + fld.d $fa5, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload + fld.d $ft10, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 152 # 8-byte Folded Reload fmov.d $ft8, $fs3 - fld.d $fs3, $sp, 560 # 8-byte Folded Reload - fld.d $ft7, $sp, 48 # 8-byte Folded Reload + fld.d $fs3, $sp, 640 # 8-byte Folded Reload + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fmov.d $ft4, $fs2 - ld.d $t1, $sp, 600 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $s5, $sp, 592 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload + ld.d $a5, $sp, 408 # 8-byte Folded Reload + ld.d $t3, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + move $t8, $s5 + ld.d $s5, $sp, 616 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload - ld.d $s2, $sp, 584 # 8-byte Folded Reload + ld.d $s2, $sp, 608 # 8-byte Folded Reload move $a1, $s0 b .LBB17_148 .LBB17_171: # %call.sqrt1334 # in Loop: Header=BB17_55 Depth=1 fmov.d $fa0, $fa1 - st.d $a1, $sp, 448 # 8-byte Folded Spill - ld.d $a0, $sp, 616 # 8-byte Folded Reload - st.d $a0, $sp, 616 # 8-byte Folded Spill + st.d $a1, $sp, 544 # 8-byte Folded Spill + st.d $t8, $sp, 24 # 8-byte Folded Spill + st.d $ra, $sp, 72 # 8-byte Folded Spill + move $s7, $a5 fst.d $ft4, $sp, 8 # 8-byte Folded Spill - fst.d $ft7, $sp, 48 # 8-byte Folded Spill - fst.d $ft8, $sp, 24 # 8-byte Folded Spill - fst.d $ft9, $sp, 64 # 8-byte Folded Spill - fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 152 # 8-byte Folded Spill - fst.d $fa5, $sp, 80 # 8-byte Folded Spill - fst.d $fa6, $sp, 144 # 8-byte Folded Spill - fst.d $fa7, $sp, 72 # 8-byte Folded Spill - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill - st.d $a2, $sp, 32 # 8-byte Folded Spill - move $s5, $t6 - st.d $t5, $sp, 40 # 8-byte Folded Spill - fst.d $fa3, $sp, 528 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill + fst.d $ft8, $sp, 16 # 8-byte Folded Spill + fst.d $ft9, $sp, 152 # 8-byte Folded Spill + fst.d $ft10, $sp, 40 # 8-byte Folded Spill + fst.d $ft11, $sp, 144 # 8-byte Folded Spill + fst.d $fa5, $sp, 64 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill + fst.d $fa7, $sp, 56 # 8-byte Folded Spill + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill + move $s5, $t4 + st.d $a2, $sp, 48 # 8-byte Folded Spill + st.d $t5, $sp, 32 # 8-byte Folded Spill + fst.d $fa3, $sp, 592 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fld.d $fa3, $sp, 528 # 8-byte Folded Reload - ld.d $t5, $sp, 40 # 8-byte Folded Reload - move $t6, $s5 - ld.d $a2, $sp, 32 # 8-byte Folded Reload - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload - fld.d $fa7, $sp, 72 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload - fld.d $fa5, $sp, 80 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload - fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $ft9, $sp, 64 # 8-byte Folded Reload - fld.d $ft8, $sp, 24 # 8-byte Folded Reload - fld.d $ft7, $sp, 48 # 8-byte Folded Reload + fld.d $fa3, $sp, 592 # 8-byte Folded Reload + ld.d $t5, $sp, 32 # 8-byte Folded Reload + ld.d $a2, $sp, 48 # 8-byte Folded Reload + move $t4, $s5 + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload + fld.d $fa7, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload + fld.d $fa5, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload + fld.d $ft10, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 152 # 8-byte Folded Reload + fld.d $ft8, $sp, 16 # 8-byte Folded Reload + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fld.d $ft4, $sp, 8 # 8-byte Folded Reload - ld.d $t1, $sp, 600 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $a7, $sp, 256 # 8-byte Folded Reload - ld.d $a6, $sp, 264 # 8-byte Folded Reload - ld.d $a5, $sp, 272 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload + ld.d $t0, $sp, 280 # 8-byte Folded Reload + ld.d $a7, $sp, 288 # 8-byte Folded Reload + ld.d $a6, $sp, 296 # 8-byte Folded Reload + move $a5, $s7 + ld.d $t3, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + ld.d $t8, $sp, 24 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload - ld.d $s5, $sp, 592 # 8-byte Folded Reload - ld.d $a1, $sp, 448 # 8-byte Folded Reload + ld.d $s5, $sp, 616 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload + ld.d $a1, $sp, 544 # 8-byte Folded Reload b .LBB17_87 .LBB17_172: # %call.sqrt1338 # in Loop: Header=BB17_55 Depth=1 fmov.d $fa0, $fa1 move $s1, $a1 - ld.d $a0, $sp, 616 # 8-byte Folded Reload - st.d $a0, $sp, 616 # 8-byte Folded Spill - fst.d $fa3, $sp, 320 # 8-byte Folded Spill + st.d $t8, $sp, 24 # 8-byte Folded Spill + st.d $ra, $sp, 72 # 8-byte Folded Spill + move $s7, $a5 + fst.d $fa3, $sp, 328 # 8-byte Folded Spill fmov.d $fs3, $ft4 - fst.d $fs5, $sp, 536 # 8-byte Folded Spill - fmov.d $fs5, $ft7 - fst.d $ft8, $sp, 24 # 8-byte Folded Spill - fst.d $ft9, $sp, 64 # 8-byte Folded Spill - fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 152 # 8-byte Folded Spill - fst.d $fa5, $sp, 80 # 8-byte Folded Spill - fst.d $fa6, $sp, 144 # 8-byte Folded Spill - fst.d $fa7, $sp, 72 # 8-byte Folded Spill - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill - st.d $a2, $sp, 32 # 8-byte Folded Spill - move $s5, $t6 - st.d $t5, $sp, 40 # 8-byte Folded Spill - st.d $ra, $sp, 16 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill + fst.d $fs5, $sp, 560 # 8-byte Folded Spill + fmov.d $fs5, $ft8 + fst.d $ft9, $sp, 152 # 8-byte Folded Spill + fst.d $ft10, $sp, 40 # 8-byte Folded Spill + fst.d $ft11, $sp, 144 # 8-byte Folded Spill + fst.d $fa5, $sp, 64 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill + fst.d $fa7, $sp, 56 # 8-byte Folded Spill + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill + move $s5, $t4 + st.d $a2, $sp, 48 # 8-byte Folded Spill + st.d $t5, $sp, 32 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $ra, $sp, 16 # 8-byte Folded Reload - ld.d $t5, $sp, 40 # 8-byte Folded Reload - move $t6, $s5 - ld.d $a2, $sp, 32 # 8-byte Folded Reload - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload - fld.d $fa7, $sp, 72 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload - fld.d $fa5, $sp, 80 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload - fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $ft9, $sp, 64 # 8-byte Folded Reload - fld.d $ft8, $sp, 24 # 8-byte Folded Reload - fmov.d $ft7, $fs5 - fld.d $fs5, $sp, 536 # 8-byte Folded Reload + ld.d $t5, $sp, 32 # 8-byte Folded Reload + ld.d $a2, $sp, 48 # 8-byte Folded Reload + move $t4, $s5 + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload + fld.d $fa7, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload + fld.d $fa5, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload + fld.d $ft10, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 152 # 8-byte Folded Reload + fmov.d $ft8, $fs5 + fld.d $fs5, $sp, 560 # 8-byte Folded Reload + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fmov.d $ft4, $fs3 - fld.d $fa3, $sp, 320 # 8-byte Folded Reload - ld.d $t1, $sp, 600 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $a7, $sp, 256 # 8-byte Folded Reload - ld.d $a6, $sp, 264 # 8-byte Folded Reload - ld.d $a5, $sp, 272 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + fld.d $fa3, $sp, 328 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload + ld.d $t0, $sp, 280 # 8-byte Folded Reload + ld.d $a7, $sp, 288 # 8-byte Folded Reload + ld.d $a6, $sp, 296 # 8-byte Folded Reload + move $a5, $s7 + ld.d $s7, $sp, 592 # 8-byte Folded Reload + ld.d $t3, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + ld.d $t8, $sp, 24 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload - ld.d $s5, $sp, 592 # 8-byte Folded Reload + ld.d $s5, $sp, 616 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload move $a1, $s1 b .LBB17_96 .LBB17_173: # %call.sqrt1342 # in Loop: Header=BB17_55 Depth=1 fmov.d $fa0, $fa1 move $s1, $a1 - fst.d $fs3, $sp, 560 # 8-byte Folded Spill + move $s5, $t8 + st.d $ra, $sp, 72 # 8-byte Folded Spill + move $s7, $a5 + fst.d $fs3, $sp, 640 # 8-byte Folded Spill fmov.d $fs3, $ft4 - fst.d $ft7, $sp, 48 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill fmov.d $fs5, $ft8 - fst.d $ft9, $sp, 64 # 8-byte Folded Spill - fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 152 # 8-byte Folded Spill - fst.d $fa5, $sp, 80 # 8-byte Folded Spill - fst.d $fa6, $sp, 144 # 8-byte Folded Spill - fst.d $fa7, $sp, 72 # 8-byte Folded Spill - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill - st.d $a2, $sp, 32 # 8-byte Folded Spill - move $s2, $t6 - st.d $t5, $sp, 40 # 8-byte Folded Spill - move $s5, $ra - fst.d $fa3, $sp, 328 # 8-byte Folded Spill + fst.d $ft9, $sp, 152 # 8-byte Folded Spill + fst.d $ft10, $sp, 40 # 8-byte Folded Spill + fst.d $ft11, $sp, 144 # 8-byte Folded Spill + fst.d $fa5, $sp, 64 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill + fst.d $fa7, $sp, 56 # 8-byte Folded Spill + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill + st.d $t4, $sp, 400 # 8-byte Folded Spill + st.d $a2, $sp, 48 # 8-byte Folded Spill + move $s2, $t5 + fst.d $fa3, $sp, 336 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fld.d $fa3, $sp, 328 # 8-byte Folded Reload - move $ra, $s5 - ld.d $t5, $sp, 40 # 8-byte Folded Reload - move $t6, $s2 - ld.d $a2, $sp, 32 # 8-byte Folded Reload - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload - fld.d $fa7, $sp, 72 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload - fld.d $fa5, $sp, 80 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload - fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $ft9, $sp, 64 # 8-byte Folded Reload + fld.d $fa3, $sp, 336 # 8-byte Folded Reload + move $t5, $s2 + ld.d $a2, $sp, 48 # 8-byte Folded Reload + ld.d $t4, $sp, 400 # 8-byte Folded Reload + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload + fld.d $fa7, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload + fld.d $fa5, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload + fld.d $ft10, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 152 # 8-byte Folded Reload fmov.d $ft8, $fs5 - fld.d $fs5, $sp, 576 # 8-byte Folded Reload - fld.d $ft7, $sp, 48 # 8-byte Folded Reload + fld.d $fs5, $sp, 600 # 8-byte Folded Reload + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fmov.d $ft4, $fs3 - fld.d $fs3, $sp, 560 # 8-byte Folded Reload - ld.d $t1, $sp, 600 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $a7, $sp, 256 # 8-byte Folded Reload - ld.d $a6, $sp, 264 # 8-byte Folded Reload - ld.d $a5, $sp, 272 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $s5, $sp, 592 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + fld.d $fs3, $sp, 640 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload + ld.d $t0, $sp, 280 # 8-byte Folded Reload + ld.d $a7, $sp, 288 # 8-byte Folded Reload + ld.d $a6, $sp, 296 # 8-byte Folded Reload + move $a5, $s7 + ld.d $s7, $sp, 592 # 8-byte Folded Reload + ld.d $t3, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + move $t8, $s5 + ld.d $s5, $sp, 616 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload - ld.d $s2, $sp, 584 # 8-byte Folded Reload + ld.d $s2, $sp, 608 # 8-byte Folded Reload move $a1, $s1 b .LBB17_104 .LBB17_174: # %call.sqrt1346 # in Loop: Header=BB17_55 Depth=1 fmov.d $fa0, $fa1 move $s0, $a1 - fst.d $fs2, $sp, 528 # 8-byte Folded Spill + move $s2, $t8 + move $s5, $ra + move $s7, $a5 + fst.d $fs2, $sp, 552 # 8-byte Folded Spill fmov.d $fs2, $ft4 - fmov.d $fs3, $ft7 - fst.d $ft8, $sp, 24 # 8-byte Folded Spill - fmov.d $fs5, $ft9 - fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 152 # 8-byte Folded Spill - fst.d $fa5, $sp, 80 # 8-byte Folded Spill - fst.d $fa6, $sp, 144 # 8-byte Folded Spill - fst.d $fa7, $sp, 72 # 8-byte Folded Spill - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill - st.d $a2, $sp, 32 # 8-byte Folded Spill - move $s1, $t6 - st.d $t5, $sp, 40 # 8-byte Folded Spill - move $s2, $ra - fst.d $fa3, $sp, 336 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill + fmov.d $fs5, $ft8 + fst.d $ft9, $sp, 152 # 8-byte Folded Spill + fmov.d $fs3, $ft10 + fst.d $ft11, $sp, 144 # 8-byte Folded Spill + fst.d $fa5, $sp, 64 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill + fst.d $fa7, $sp, 56 # 8-byte Folded Spill + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill + st.d $t4, $sp, 400 # 8-byte Folded Spill + st.d $a2, $sp, 48 # 8-byte Folded Spill + move $s1, $t5 + fst.d $fa3, $sp, 344 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fld.d $fa3, $sp, 336 # 8-byte Folded Reload - move $ra, $s2 - ld.d $t5, $sp, 40 # 8-byte Folded Reload - move $t6, $s1 - ld.d $a2, $sp, 32 # 8-byte Folded Reload - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload - fld.d $fa7, $sp, 72 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload - fld.d $fa5, $sp, 80 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload - fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fmov.d $ft9, $fs5 - fld.d $fs5, $sp, 576 # 8-byte Folded Reload - fld.d $ft8, $sp, 24 # 8-byte Folded Reload - fmov.d $ft7, $fs3 - fld.d $fs3, $sp, 568 # 8-byte Folded Reload + fld.d $fa3, $sp, 344 # 8-byte Folded Reload + move $t5, $s1 + ld.d $a2, $sp, 48 # 8-byte Folded Reload + ld.d $t4, $sp, 400 # 8-byte Folded Reload + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload + fld.d $fa7, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload + fld.d $fa5, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload + fmov.d $ft10, $fs3 + fld.d $fs3, $sp, 584 # 8-byte Folded Reload + fld.d $ft9, $sp, 152 # 8-byte Folded Reload + fmov.d $ft8, $fs5 + fld.d $fs5, $sp, 600 # 8-byte Folded Reload + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fmov.d $ft4, $fs2 - fld.d $fs2, $sp, 528 # 8-byte Folded Reload - ld.d $t1, $sp, 600 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $a7, $sp, 256 # 8-byte Folded Reload - ld.d $a6, $sp, 264 # 8-byte Folded Reload - ld.d $a5, $sp, 272 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $s2, $sp, 584 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + fld.d $fs2, $sp, 552 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload + ld.d $t0, $sp, 280 # 8-byte Folded Reload + ld.d $a7, $sp, 288 # 8-byte Folded Reload + ld.d $a6, $sp, 296 # 8-byte Folded Reload + move $a5, $s7 + ld.d $t3, $sp, 304 # 8-byte Folded Reload + move $ra, $s5 + ld.d $s5, $sp, 616 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload + move $t8, $s2 + ld.d $s2, $sp, 608 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload move $a1, $s0 b .LBB17_111 @@ -8714,282 +8768,286 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe # in Loop: Header=BB17_55 Depth=1 fmov.d $fa0, $fa1 move $s1, $a1 - fst.d $fs2, $sp, 544 # 8-byte Folded Spill + move $s5, $t8 + st.d $ra, $sp, 72 # 8-byte Folded Spill + st.d $a5, $sp, 408 # 8-byte Folded Spill + fst.d $fs2, $sp, 568 # 8-byte Folded Spill fmov.d $fs2, $ft4 - fst.d $ft7, $sp, 48 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill fmov.d $fs5, $ft8 - fst.d $ft9, $sp, 64 # 8-byte Folded Spill + fst.d $ft9, $sp, 152 # 8-byte Folded Spill fmov.d $fs3, $ft10 - fst.d $ft11, $sp, 152 # 8-byte Folded Spill - fst.d $fa5, $sp, 80 # 8-byte Folded Spill - fst.d $fa6, $sp, 144 # 8-byte Folded Spill - fst.d $fa7, $sp, 72 # 8-byte Folded Spill - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill - move $s2, $a2 - st.d $t6, $sp, 384 # 8-byte Folded Spill - st.d $t5, $sp, 40 # 8-byte Folded Spill - move $s5, $ra - fst.d $fa2, $sp, 344 # 8-byte Folded Spill + fst.d $ft11, $sp, 144 # 8-byte Folded Spill + fst.d $fa5, $sp, 64 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill + fst.d $fa7, $sp, 56 # 8-byte Folded Spill + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill + move $s2, $t4 + st.d $a2, $sp, 48 # 8-byte Folded Spill + st.d $t5, $sp, 32 # 8-byte Folded Spill + fst.d $fa2, $sp, 352 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fld.d $fa3, $sp, 688 # 8-byte Folded Reload - fld.d $fa2, $sp, 344 # 8-byte Folded Reload - move $ra, $s5 - ld.d $t5, $sp, 40 # 8-byte Folded Reload - ld.d $t6, $sp, 384 # 8-byte Folded Reload - move $a2, $s2 - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload - fld.d $fa7, $sp, 72 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload - fld.d $fa5, $sp, 80 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload + fld.d $fa3, $sp, 712 # 8-byte Folded Reload + fld.d $fa2, $sp, 352 # 8-byte Folded Reload + ld.d $t5, $sp, 32 # 8-byte Folded Reload + ld.d $a2, $sp, 48 # 8-byte Folded Reload + move $t4, $s2 + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload + fld.d $fa7, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload + fld.d $fa5, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload fmov.d $ft10, $fs3 - fld.d $fs3, $sp, 568 # 8-byte Folded Reload - fld.d $ft9, $sp, 64 # 8-byte Folded Reload + fld.d $fs3, $sp, 584 # 8-byte Folded Reload + fld.d $ft9, $sp, 152 # 8-byte Folded Reload fmov.d $ft8, $fs5 - fld.d $fs5, $sp, 576 # 8-byte Folded Reload - fld.d $ft7, $sp, 48 # 8-byte Folded Reload + fld.d $fs5, $sp, 600 # 8-byte Folded Reload + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fmov.d $ft4, $fs2 - fld.d $fs2, $sp, 544 # 8-byte Folded Reload - ld.d $t1, $sp, 600 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $s5, $sp, 592 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + fld.d $fs2, $sp, 568 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload + ld.d $a5, $sp, 408 # 8-byte Folded Reload + ld.d $t3, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + move $t8, $s5 + ld.d $s5, $sp, 616 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload - ld.d $s2, $sp, 584 # 8-byte Folded Reload + ld.d $s2, $sp, 608 # 8-byte Folded Reload move $a1, $s1 b .LBB17_130 .LBB17_176: # %call.sqrt1354 # in Loop: Header=BB17_55 Depth=1 fmov.d $fa0, $fa1 move $s2, $a1 - ld.d $a0, $sp, 616 # 8-byte Folded Reload - st.d $a0, $sp, 616 # 8-byte Folded Spill + st.d $t8, $sp, 24 # 8-byte Folded Spill + st.d $ra, $sp, 72 # 8-byte Folded Spill + st.d $a5, $sp, 408 # 8-byte Folded Spill fst.d $ft4, $sp, 8 # 8-byte Folded Spill - fst.d $ft7, $sp, 48 # 8-byte Folded Spill - fst.d $ft8, $sp, 24 # 8-byte Folded Spill - fst.d $ft9, $sp, 64 # 8-byte Folded Spill - fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 152 # 8-byte Folded Spill - fst.d $fa5, $sp, 80 # 8-byte Folded Spill - fst.d $fa6, $sp, 144 # 8-byte Folded Spill - fst.d $fa7, $sp, 72 # 8-byte Folded Spill - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill - st.d $a2, $sp, 32 # 8-byte Folded Spill - move $s5, $t6 - st.d $t5, $sp, 40 # 8-byte Folded Spill - st.d $ra, $sp, 16 # 8-byte Folded Spill - fst.d $fa3, $sp, 352 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill + fst.d $ft8, $sp, 16 # 8-byte Folded Spill + fst.d $ft9, $sp, 152 # 8-byte Folded Spill + fst.d $ft10, $sp, 40 # 8-byte Folded Spill + fst.d $ft11, $sp, 144 # 8-byte Folded Spill + fst.d $fa5, $sp, 64 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill + fst.d $fa7, $sp, 56 # 8-byte Folded Spill + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill + move $s5, $t4 + st.d $a2, $sp, 48 # 8-byte Folded Spill + st.d $t5, $sp, 32 # 8-byte Folded Spill + fst.d $fa3, $sp, 360 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fld.d $fa2, $sp, 664 # 8-byte Folded Reload - fld.d $fa3, $sp, 352 # 8-byte Folded Reload - ld.d $ra, $sp, 16 # 8-byte Folded Reload - ld.d $t5, $sp, 40 # 8-byte Folded Reload - move $t6, $s5 - ld.d $a2, $sp, 32 # 8-byte Folded Reload - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload - fld.d $fa7, $sp, 72 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload - fld.d $fa5, $sp, 80 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload - fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $ft9, $sp, 64 # 8-byte Folded Reload - fld.d $ft8, $sp, 24 # 8-byte Folded Reload - fld.d $ft7, $sp, 48 # 8-byte Folded Reload + fld.d $fa2, $sp, 704 # 8-byte Folded Reload + fld.d $fa3, $sp, 360 # 8-byte Folded Reload + ld.d $t5, $sp, 32 # 8-byte Folded Reload + ld.d $a2, $sp, 48 # 8-byte Folded Reload + move $t4, $s5 + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload + fld.d $fa7, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload + fld.d $fa5, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload + fld.d $ft10, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 152 # 8-byte Folded Reload + fld.d $ft8, $sp, 16 # 8-byte Folded Reload + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fld.d $ft4, $sp, 8 # 8-byte Folded Reload - ld.d $t1, $sp, 600 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload + ld.d $a5, $sp, 408 # 8-byte Folded Reload + ld.d $t3, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + ld.d $t8, $sp, 24 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload - ld.d $s5, $sp, 592 # 8-byte Folded Reload + ld.d $s5, $sp, 616 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload move $a1, $s2 - ld.d $s2, $sp, 584 # 8-byte Folded Reload + ld.d $s2, $sp, 608 # 8-byte Folded Reload b .LBB17_139 .LBB17_177: # %call.sqrt1358 # in Loop: Header=BB17_55 Depth=1 fmov.d $fa0, $fa1 move $s0, $a1 + move $s5, $t8 + st.d $ra, $sp, 72 # 8-byte Folded Spill + st.d $a5, $sp, 408 # 8-byte Folded Spill fst.d $ft4, $sp, 8 # 8-byte Folded Spill - fst.d $ft7, $sp, 48 # 8-byte Folded Spill - fst.d $ft8, $sp, 24 # 8-byte Folded Spill - fst.d $ft9, $sp, 64 # 8-byte Folded Spill - fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 152 # 8-byte Folded Spill - fst.d $fa5, $sp, 80 # 8-byte Folded Spill - fst.d $fa6, $sp, 144 # 8-byte Folded Spill - fst.d $fa7, $sp, 72 # 8-byte Folded Spill - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill - fst.d $fa3, $sp, 360 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill + fst.d $ft8, $sp, 16 # 8-byte Folded Spill + fst.d $ft9, $sp, 152 # 8-byte Folded Spill + fst.d $ft10, $sp, 40 # 8-byte Folded Spill + fst.d $ft11, $sp, 144 # 8-byte Folded Spill + fst.d $fa5, $sp, 64 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill + fst.d $fa7, $sp, 56 # 8-byte Folded Spill + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill + fst.d $fa3, $sp, 368 # 8-byte Folded Spill move $s2, $t5 - move $s5, $ra pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fld.d $fa2, $sp, 688 # 8-byte Folded Reload - move $ra, $s5 + fld.d $fa2, $sp, 712 # 8-byte Folded Reload move $t5, $s2 - fld.d $fa3, $sp, 360 # 8-byte Folded Reload - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload - fld.d $fa7, $sp, 72 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload - fld.d $fa5, $sp, 80 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload - fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $ft9, $sp, 64 # 8-byte Folded Reload - fld.d $ft8, $sp, 24 # 8-byte Folded Reload - fld.d $ft7, $sp, 48 # 8-byte Folded Reload + fld.d $fa3, $sp, 368 # 8-byte Folded Reload + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload + fld.d $fa7, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload + fld.d $fa5, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload + fld.d $ft10, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 152 # 8-byte Folded Reload + fld.d $ft8, $sp, 16 # 8-byte Folded Reload + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fld.d $ft4, $sp, 8 # 8-byte Folded Reload - ld.d $t1, $sp, 600 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $s5, $sp, 592 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload + ld.d $a5, $sp, 408 # 8-byte Folded Reload + ld.d $t3, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 72 # 8-byte Folded Reload + move $t8, $s5 + ld.d $s5, $sp, 616 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload - ld.d $s2, $sp, 584 # 8-byte Folded Reload + ld.d $s2, $sp, 608 # 8-byte Folded Reload move $a1, $s0 b .LBB17_146 .LBB17_178: # %call.sqrt1362 # in Loop: Header=BB17_55 Depth=1 fmov.d $fa0, $fa1 move $s0, $a1 - move $s5, $s1 + move $s2, $t8 + move $s5, $ra + st.d $a5, $sp, 408 # 8-byte Folded Spill fmov.d $fs3, $ft4 - fst.d $ft7, $sp, 48 # 8-byte Folded Spill - fst.d $ft8, $sp, 24 # 8-byte Folded Spill - fst.d $ft9, $sp, 64 # 8-byte Folded Spill - fst.d $ft10, $sp, 56 # 8-byte Folded Spill - fst.d $ft11, $sp, 152 # 8-byte Folded Spill - fst.d $fa5, $sp, 80 # 8-byte Folded Spill - fst.d $fa6, $sp, 144 # 8-byte Folded Spill - fst.d $fa7, $sp, 72 # 8-byte Folded Spill - fst.d $ft0, $sp, 136 # 8-byte Folded Spill - fst.d $ft1, $sp, 128 # 8-byte Folded Spill - fst.d $ft2, $sp, 120 # 8-byte Folded Spill - fst.d $ft12, $sp, 112 # 8-byte Folded Spill - fst.d $ft13, $sp, 104 # 8-byte Folded Spill - fst.d $ft14, $sp, 96 # 8-byte Folded Spill - fst.d $ft15, $sp, 88 # 8-byte Folded Spill - fst.d $fa3, $sp, 368 # 8-byte Folded Spill + fst.d $ft7, $sp, 160 # 8-byte Folded Spill + fst.d $ft8, $sp, 16 # 8-byte Folded Spill + fst.d $ft9, $sp, 152 # 8-byte Folded Spill + fst.d $ft10, $sp, 40 # 8-byte Folded Spill + fst.d $ft11, $sp, 144 # 8-byte Folded Spill + fst.d $fa5, $sp, 64 # 8-byte Folded Spill + fst.d $fa6, $sp, 136 # 8-byte Folded Spill + fst.d $fa7, $sp, 56 # 8-byte Folded Spill + fst.d $ft0, $sp, 128 # 8-byte Folded Spill + fst.d $ft1, $sp, 120 # 8-byte Folded Spill + fst.d $ft2, $sp, 112 # 8-byte Folded Spill + fst.d $ft12, $sp, 104 # 8-byte Folded Spill + fst.d $ft13, $sp, 96 # 8-byte Folded Spill + fst.d $ft14, $sp, 88 # 8-byte Folded Spill + fst.d $ft15, $sp, 80 # 8-byte Folded Spill + fst.d $fa3, $sp, 376 # 8-byte Folded Spill move $s1, $t5 - move $s2, $ra pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fld.d $fa2, $sp, 664 # 8-byte Folded Reload - move $ra, $s2 + fld.d $fa2, $sp, 704 # 8-byte Folded Reload move $t5, $s1 - fld.d $fa3, $sp, 368 # 8-byte Folded Reload - fld.d $ft15, $sp, 88 # 8-byte Folded Reload - fld.d $ft14, $sp, 96 # 8-byte Folded Reload - fld.d $ft13, $sp, 104 # 8-byte Folded Reload - fld.d $ft12, $sp, 112 # 8-byte Folded Reload - fld.d $ft2, $sp, 120 # 8-byte Folded Reload - fld.d $ft1, $sp, 128 # 8-byte Folded Reload - fld.d $ft0, $sp, 136 # 8-byte Folded Reload - fld.d $fa7, $sp, 72 # 8-byte Folded Reload - fld.d $fa6, $sp, 144 # 8-byte Folded Reload - fld.d $fa5, $sp, 80 # 8-byte Folded Reload - fld.d $ft11, $sp, 152 # 8-byte Folded Reload - fld.d $ft10, $sp, 56 # 8-byte Folded Reload - fld.d $ft9, $sp, 64 # 8-byte Folded Reload - fld.d $ft8, $sp, 24 # 8-byte Folded Reload - fld.d $ft7, $sp, 48 # 8-byte Folded Reload + fld.d $fa3, $sp, 376 # 8-byte Folded Reload + fld.d $ft15, $sp, 80 # 8-byte Folded Reload + fld.d $ft14, $sp, 88 # 8-byte Folded Reload + fld.d $ft13, $sp, 96 # 8-byte Folded Reload + fld.d $ft12, $sp, 104 # 8-byte Folded Reload + fld.d $ft2, $sp, 112 # 8-byte Folded Reload + fld.d $ft1, $sp, 120 # 8-byte Folded Reload + fld.d $ft0, $sp, 128 # 8-byte Folded Reload + fld.d $fa7, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $sp, 136 # 8-byte Folded Reload + fld.d $fa5, $sp, 64 # 8-byte Folded Reload + fld.d $ft11, $sp, 144 # 8-byte Folded Reload + fld.d $ft10, $sp, 40 # 8-byte Folded Reload + fld.d $ft9, $sp, 152 # 8-byte Folded Reload + fld.d $ft8, $sp, 16 # 8-byte Folded Reload + fld.d $ft7, $sp, 160 # 8-byte Folded Reload vldi $vr14, -912 vldi $vr13, -928 fmov.d $ft4, $fs3 - fld.d $fs3, $sp, 560 # 8-byte Folded Reload - ld.d $t1, $sp, 600 # 8-byte Folded Reload - ld.d $t0, $sp, 608 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 288 # 8-byte Folded Reload - ld.d $s2, $sp, 584 # 8-byte Folded Reload - ld.d $t7, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 192 # 8-byte Folded Reload - ld.d $t4, $sp, 200 # 8-byte Folded Reload + fld.d $fs3, $sp, 640 # 8-byte Folded Reload + ld.d $t1, $sp, 632 # 8-byte Folded Reload + ld.d $a5, $sp, 408 # 8-byte Folded Reload + ld.d $t3, $sp, 304 # 8-byte Folded Reload + move $ra, $s5 + ld.d $s5, $sp, 616 # 8-byte Folded Reload + ld.d $t2, $sp, 624 # 8-byte Folded Reload + move $t8, $s2 + ld.d $t7, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 200 # 8-byte Folded Reload fld.d $ft3, $sp, 208 # 8-byte Folded Reload - move $s1, $s5 - ld.d $s5, $sp, 592 # 8-byte Folded Reload + ld.d $s1, $sp, 520 # 8-byte Folded Reload move $a1, $s0 b .LBB17_154 .LBB17_179: # %._crit_edge1186.._crit_edge1190_crit_edge ld.d $a0, $fp, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE5H_new) - st.d $a0, $sp, 304 # 8-byte Folded Spill + st.d $a0, $sp, 312 # 8-byte Folded Spill b .LBB17_181 .LBB17_180: - ld.d $s0, $sp, 176 # 8-byte Folded Reload - ld.d $s1, $sp, 168 # 8-byte Folded Reload - ld.d $s2, $sp, 160 # 8-byte Folded Reload + ld.d $s0, $sp, 184 # 8-byte Folded Reload + ld.d $s2, $sp, 176 # 8-byte Folded Reload + ld.d $s4, $sp, 168 # 8-byte Folded Reload .LBB17_181: # %._crit_edge1190 move $a0, $s0 - ld.d $a2, $sp, 304 # 8-byte Folded Reload + ld.d $a2, $sp, 312 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZN10MallocPlus14memory_replaceEPvS0_) jirl $ra, $ra, 0 ld.d $a1, $s0, 208 - ld.d $a2, $s1, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE5U_new) + ld.d $a2, $s2, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE5U_new) st.d $a0, $s0, 200 move $a0, $s0 pcaddu18i $ra, %call36(_ZN10MallocPlus14memory_replaceEPvS0_) jirl $ra, $ra, 0 ld.d $a1, $s0, 216 - ld.d $a2, $s2, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE5V_new) + ld.d $a2, $s4, %pc_lo12(_ZZN5State32calc_finite_difference_via_facesEdE5V_new) st.d $a0, $s0, 208 move $a0, $s0 pcaddu18i $ra, %call36(_ZN10MallocPlus14memory_replaceEPvS0_) jirl $ra, $ra, 0 - ld.d $a2, $sp, 808 - ld.d $a1, $sp, 816 + ld.d $a2, $sp, 824 + ld.d $a1, $sp, 832 st.d $a0, $s0, 216 move $a0, $a2 pcaddu18i $ra, %call36(cpu_timer_stop) @@ -8997,26 +9055,26 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe fld.d $fa1, $s0, 240 fadd.d $fa0, $fa0, $fa1 fst.d $fa0, $s0, 240 - fld.d $fs7, $sp, 824 # 8-byte Folded Reload - fld.d $fs6, $sp, 832 # 8-byte Folded Reload - fld.d $fs5, $sp, 840 # 8-byte Folded Reload - fld.d $fs4, $sp, 848 # 8-byte Folded Reload - fld.d $fs3, $sp, 856 # 8-byte Folded Reload - fld.d $fs2, $sp, 864 # 8-byte Folded Reload - fld.d $fs1, $sp, 872 # 8-byte Folded Reload - fld.d $fs0, $sp, 880 # 8-byte Folded Reload - ld.d $s8, $sp, 888 # 8-byte Folded Reload - ld.d $s7, $sp, 896 # 8-byte Folded Reload - ld.d $s6, $sp, 904 # 8-byte Folded Reload - ld.d $s5, $sp, 912 # 8-byte Folded Reload - ld.d $s4, $sp, 920 # 8-byte Folded Reload - ld.d $s3, $sp, 928 # 8-byte Folded Reload - ld.d $s2, $sp, 936 # 8-byte Folded Reload - ld.d $s1, $sp, 944 # 8-byte Folded Reload - ld.d $s0, $sp, 952 # 8-byte Folded Reload - ld.d $fp, $sp, 960 # 8-byte Folded Reload - ld.d $ra, $sp, 968 # 8-byte Folded Reload - addi.d $sp, $sp, 976 + fld.d $fs7, $sp, 840 # 8-byte Folded Reload + fld.d $fs6, $sp, 848 # 8-byte Folded Reload + fld.d $fs5, $sp, 856 # 8-byte Folded Reload + fld.d $fs4, $sp, 864 # 8-byte Folded Reload + fld.d $fs3, $sp, 872 # 8-byte Folded Reload + fld.d $fs2, $sp, 880 # 8-byte Folded Reload + fld.d $fs1, $sp, 888 # 8-byte Folded Reload + fld.d $fs0, $sp, 896 # 8-byte Folded Reload + ld.d $s8, $sp, 904 # 8-byte Folded Reload + ld.d $s7, $sp, 912 # 8-byte Folded Reload + ld.d $s6, $sp, 920 # 8-byte Folded Reload + ld.d $s5, $sp, 928 # 8-byte Folded Reload + ld.d $s4, $sp, 936 # 8-byte Folded Reload + ld.d $s3, $sp, 944 # 8-byte Folded Reload + ld.d $s2, $sp, 952 # 8-byte Folded Reload + ld.d $s1, $sp, 960 # 8-byte Folded Reload + ld.d $s0, $sp, 968 # 8-byte Folded Reload + ld.d $fp, $sp, 976 # 8-byte Folded Reload + ld.d $ra, $sp, 984 # 8-byte Folded Reload + addi.d $sp, $sp, 992 ret .LBB17_182: pcalau12i $a0, %pc_hi20(_ZGVZN5State32calc_finite_difference_via_facesEdE2Hx) @@ -9026,14 +9084,14 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe addi.w $a0, $a0, 0 beqz $a0, .LBB17_3 # %bb.183: - st.d $zero, $s5, 16 + st.d $zero, $s2, 16 vrepli.b $vr0, 0 - vst $vr0, $s5, 0 + vst $vr0, $s2, 0 pcalau12i $a0, %pc_hi20(_ZNSt6vectorIdSaIdEED2Ev) addi.d $a0, $a0, %pc_lo12(_ZNSt6vectorIdSaIdEED2Ev) pcalau12i $a1, %pc_hi20(__dso_handle) addi.d $a2, $a1, %pc_lo12(__dso_handle) - move $a1, $s5 + move $a1, $s2 pcaddu18i $ra, %call36(__cxa_atexit) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(_ZGVZN5State32calc_finite_difference_via_facesEdE2Hx) @@ -9072,14 +9130,14 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe addi.w $a0, $a0, 0 beqz $a0, .LBB17_5 # %bb.187: - st.d $zero, $s1, 16 + st.d $zero, $s4, 16 vrepli.b $vr0, 0 - vst $vr0, $s1, 0 + vst $vr0, $s4, 0 pcalau12i $a0, %pc_hi20(_ZNSt6vectorIdSaIdEED2Ev) addi.d $a0, $a0, %pc_lo12(_ZNSt6vectorIdSaIdEED2Ev) pcalau12i $a1, %pc_hi20(__dso_handle) addi.d $a2, $a1, %pc_lo12(__dso_handle) - move $a1, $s1 + move $a1, $s4 pcaddu18i $ra, %call36(__cxa_atexit) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(_ZGVZN5State32calc_finite_difference_via_facesEdE2Vx) @@ -9095,14 +9153,14 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe addi.w $a0, $a0, 0 beqz $a0, .LBB17_28 # %bb.189: - st.d $zero, $s8, 16 + st.d $zero, $fp, 16 vrepli.b $vr0, 0 - vst $vr0, $s8, 0 + vst $vr0, $fp, 0 pcalau12i $a0, %pc_hi20(_ZNSt6vectorIdSaIdEED2Ev) addi.d $a0, $a0, %pc_lo12(_ZNSt6vectorIdSaIdEED2Ev) pcalau12i $a1, %pc_hi20(__dso_handle) addi.d $a2, $a1, %pc_lo12(__dso_handle) - move $a1, $s8 + move $a1, $fp pcaddu18i $ra, %call36(__cxa_atexit) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(_ZGVZN5State32calc_finite_difference_via_facesEdE2Hy) @@ -9141,14 +9199,14 @@ _ZN5State32calc_finite_difference_via_facesEd: # @_ZN5State32calc_finite_differe addi.w $a0, $a0, 0 beqz $a0, .LBB17_30 # %bb.193: - st.d $zero, $s7, 16 + st.d $zero, $s8, 16 vrepli.b $vr0, 0 - vst $vr0, $s7, 0 + vst $vr0, $s8, 0 pcalau12i $a0, %pc_hi20(_ZNSt6vectorIdSaIdEED2Ev) addi.d $a0, $a0, %pc_lo12(_ZNSt6vectorIdSaIdEED2Ev) pcalau12i $a1, %pc_hi20(__dso_handle) addi.d $a2, $a1, %pc_lo12(__dso_handle) - move $a1, $s7 + move $a1, $s8 pcaddu18i $ra, %call36(__cxa_atexit) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(_ZGVZN5State32calc_finite_difference_via_facesEdE2Vy) @@ -9310,16 +9368,7 @@ _ZN5State14symmetry_checkEPKcSt6vectorIiSaIiEEd9SIGN_RULERi: # @_ZN5State14symme .Lfunc_end18: .size _ZN5State14symmetry_checkEPKcSt6vectorIiSaIiEEd9SIGN_RULERi, .Lfunc_end18-_ZN5State14symmetry_checkEPKcSt6vectorIiSaIiEEd9SIGN_RULERi # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5State21calc_refine_potentialERSt6vectorIiSaIiEERiS4_ -.LCPI19_0: - .dword 0xc08f400000000000 # double -1000 -.LCPI19_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI19_2: - .dword 0x3fa999999999999a # double 0.050000000000000003 - .text - .globl _ZN5State21calc_refine_potentialERSt6vectorIiSaIiEERiS4_ + .globl _ZN5State21calc_refine_potentialERSt6vectorIiSaIiEERiS4_ # -- Begin function _ZN5State21calc_refine_potentialERSt6vectorIiSaIiEERiS4_ .p2align 5 .type _ZN5State21calc_refine_potentialERSt6vectorIiSaIiEERiS4_,@function _ZN5State21calc_refine_potentialERSt6vectorIiSaIiEERiS4_: # @_ZN5State21calc_refine_potentialERSt6vectorIiSaIiEERiS4_ @@ -9391,14 +9440,23 @@ _ZN5State21calc_refine_potentialERSt6vectorIiSaIiEERiS4_: # @_ZN5State21calc_ref alsl.d $t2, $a0, $t2, 2 ori $t3, $zero, 1 vldi $vr0, -928 + ori $t4, $zero, 0 + lu32i.d $t4, -49152 + lu52i.d $t4, $t4, -1016 + movgr2fr.d $fa1, $t4 + lu12i.w $t4, -419431 + ori $t4, $t4, 2458 + lu32i.d $t4, -419431 + lu52i.d $t5, $t4, 1019 + movgr2fr.d $fa2, $t5 b .LBB19_4 .p2align 4, , 16 .LBB19_2: # %.sink.split # in Loop: Header=BB19_4 Depth=1 - st.w $t4, $t2, 0 + st.w $t5, $t2, 0 .LBB19_3: # in Loop: Header=BB19_4 Depth=1 addi.d $a0, $a0, 1 - ld.w $t4, $sp, 8 + ld.w $t5, $sp, 8 addi.d $a3, $a3, 4 addi.d $a4, $a4, 8 addi.d $a5, $a5, 4 @@ -9407,114 +9465,110 @@ _ZN5State21calc_refine_potentialERSt6vectorIiSaIiEERiS4_: # @_ZN5State21calc_ref addi.d $t0, $t0, 4 addi.d $t1, $t1, 4 addi.d $t2, $t2, 4 - bge $a0, $t4, .LBB19_18 + bge $a0, $t5, .LBB19_18 .LBB19_4: # =>This Inner Loop Header: Depth=1 - ld.w $t4, $a3, 0 - bne $t4, $t3, .LBB19_3 + ld.w $t5, $a3, 0 + bne $t5, $t3, .LBB19_3 # %bb.5: # in Loop: Header=BB19_4 Depth=1 - ld.w $t4, $a5, 0 - slli.d $t6, $t4, 3 - slli.d $t5, $t4, 2 - ldx.w $t7, $s5, $t5 - ld.w $t4, $a6, 0 - fldx.d $fa1, $a2, $t6 - bge $t4, $t7, .LBB19_7 + ld.w $t5, $a5, 0 + slli.d $t7, $t5, 3 + slli.d $t6, $t5, 2 + ldx.w $t8, $s5, $t6 + ld.w $t5, $a6, 0 + fldx.d $fa3, $a2, $t7 + bge $t5, $t8, .LBB19_7 # %bb.6: # in Loop: Header=BB19_4 Depth=1 - ldx.w $t5, $s4, $t5 - slli.d $t5, $t5, 3 - fldx.d $fa2, $a2, $t5 - fadd.d $fa1, $fa1, $fa2 - fmul.d $fa1, $fa1, $fa0 + ldx.w $t6, $s4, $t6 + slli.d $t6, $t6, 3 + fldx.d $fa4, $a2, $t6 + fadd.d $fa3, $fa3, $fa4 + fmul.d $fa3, $fa3, $fa0 .LBB19_7: # in Loop: Header=BB19_4 Depth=1 - ld.w $t5, $a7, 0 - slli.d $t6, $t5, 3 - slli.d $t5, $t5, 2 - ldx.w $t7, $s5, $t5 - fldx.d $fa2, $a2, $t6 - bge $t4, $t7, .LBB19_9 + ld.w $t6, $a7, 0 + slli.d $t7, $t6, 3 + slli.d $t6, $t6, 2 + ldx.w $t8, $s5, $t6 + fldx.d $fa4, $a2, $t7 + bge $t5, $t8, .LBB19_9 # %bb.8: # in Loop: Header=BB19_4 Depth=1 - ldx.w $t5, $s4, $t5 - slli.d $t5, $t5, 3 - fldx.d $fa3, $a2, $t5 - fadd.d $fa2, $fa2, $fa3 - fmul.d $fa2, $fa2, $fa0 + ldx.w $t6, $s4, $t6 + slli.d $t6, $t6, 3 + fldx.d $fa5, $a2, $t6 + fadd.d $fa4, $fa4, $fa5 + fmul.d $fa4, $fa4, $fa0 .LBB19_9: # in Loop: Header=BB19_4 Depth=1 - ld.w $t5, $t0, 0 - slli.d $t6, $t5, 3 - slli.d $t5, $t5, 2 - ldx.w $t7, $s5, $t5 - fldx.d $fa3, $a2, $t6 - bge $t4, $t7, .LBB19_11 + ld.w $t6, $t0, 0 + slli.d $t7, $t6, 3 + slli.d $t6, $t6, 2 + ldx.w $t8, $s5, $t6 + fldx.d $fa5, $a2, $t7 + bge $t5, $t8, .LBB19_11 # %bb.10: # in Loop: Header=BB19_4 Depth=1 - ldx.w $t5, $s3, $t5 - slli.d $t5, $t5, 3 - fldx.d $fa4, $a2, $t5 - fadd.d $fa3, $fa3, $fa4 - fmul.d $fa3, $fa3, $fa0 + ldx.w $t6, $s3, $t6 + slli.d $t6, $t6, 3 + fldx.d $fa6, $a2, $t6 + fadd.d $fa5, $fa5, $fa6 + fmul.d $fa5, $fa5, $fa0 .LBB19_11: # in Loop: Header=BB19_4 Depth=1 - ld.w $t5, $t1, 0 - slli.d $t6, $t5, 3 - slli.d $t5, $t5, 2 - ldx.w $t7, $s5, $t5 - fldx.d $fa4, $a2, $t6 - bge $t4, $t7, .LBB19_13 + ld.w $t6, $t1, 0 + slli.d $t7, $t6, 3 + slli.d $t6, $t6, 2 + ldx.w $t8, $s5, $t6 + fldx.d $fa6, $a2, $t7 + bge $t5, $t8, .LBB19_13 # %bb.12: # in Loop: Header=BB19_4 Depth=1 - ldx.w $t4, $s3, $t5 - slli.d $t4, $t4, 3 - fldx.d $fa5, $a2, $t4 - fadd.d $fa4, $fa4, $fa5 - fmul.d $fa4, $fa4, $fa0 + ldx.w $t5, $s3, $t6 + slli.d $t5, $t5, 3 + fldx.d $fa7, $a2, $t5 + fadd.d $fa6, $fa6, $fa7 + fmul.d $fa6, $fa6, $fa0 .LBB19_13: # in Loop: Header=BB19_4 Depth=1 - fld.d $fa5, $a4, 0 - fsub.d $fa2, $fa2, $fa5 - fsub.d $fa1, $fa5, $fa1 - fdiv.d $fa2, $fa2, $fa5 - fabs.d $fa2, $fa2 - fdiv.d $fa1, $fa1, $fa5 - pcalau12i $t4, %pc_hi20(.LCPI19_0) - fld.d $fa6, $t4, %pc_lo12(.LCPI19_0) - fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa2, $fa1 - fsel $fa7, $fa2, $fa1, $fcc0 - fmax.d $fa6, $fa7, $fa6 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 - fcmp.clt.d $fcc0, $fa6, $fa1 - fsel $fa1, $fa6, $fa1, $fcc0 - fsub.d $fa2, $fa4, $fa5 - fsub.d $fa3, $fa5, $fa3 - fdiv.d $fa2, $fa2, $fa5 - fabs.d $fa2, $fa2 - fdiv.d $fa3, $fa3, $fa5 + fld.d $fa7, $a4, 0 + fsub.d $fa4, $fa4, $fa7 + fsub.d $fa3, $fa7, $fa3 + fdiv.d $fa4, $fa4, $fa7 + fabs.d $fa4, $fa4 + fdiv.d $fa3, $fa3, $fa7 fabs.d $fa3, $fa3 - fcmp.clt.d $fcc0, $fa2, $fa3 - fsel $fa4, $fa2, $fa3, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa4 - fsel $fa1, $fa1, $fa4, $fcc0 - fcmp.clt.d $fcc0, $fa3, $fa2 - pcalau12i $t4, %pc_hi20(.LCPI19_1) - fld.d $fa4, $t4, %pc_lo12(.LCPI19_1) - fsel $fa2, $fa3, $fa2, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 - fcmp.cule.d $fcc0, $fa1, $fa4 + fcmp.clt.d $fcc0, $fa4, $fa3 + fsel $ft0, $fa4, $fa3, $fcc0 + fmax.d $ft0, $ft0, $fa1 + fcmp.clt.d $fcc0, $fa3, $fa4 + fsel $fa3, $fa3, $fa4, $fcc0 + fcmp.clt.d $fcc0, $ft0, $fa3 + fsel $fa3, $ft0, $fa3, $fcc0 + fsub.d $fa4, $fa6, $fa7 + fsub.d $fa5, $fa7, $fa5 + fdiv.d $fa4, $fa4, $fa7 + fabs.d $fa4, $fa4 + fdiv.d $fa5, $fa5, $fa7 + fabs.d $fa5, $fa5 + fcmp.clt.d $fcc0, $fa4, $fa5 + fsel $fa6, $fa4, $fa5, $fcc0 + fcmp.clt.d $fcc0, $fa3, $fa6 + fsel $fa3, $fa3, $fa6, $fcc0 + fcmp.clt.d $fcc0, $fa5, $fa4 + fsel $fa4, $fa5, $fa4, $fcc0 + fcmp.clt.d $fcc0, $fa3, $fa4 + fsel $fa3, $fa3, $fa4, $fcc0 + fcmp.cule.d $fcc0, $fa3, $fa2 st.w $zero, $t2, 0 bcnez $fcc0, .LBB19_15 # %bb.14: # in Loop: Header=BB19_4 Depth=1 - ld.w $t5, $a6, 0 - ld.w $t6, $a1, 1120 - ori $t4, $zero, 1 - blt $t5, $t6, .LBB19_2 + ld.w $t6, $a6, 0 + ld.w $t7, $a1, 1120 + ori $t5, $zero, 1 + blt $t6, $t7, .LBB19_2 .LBB19_15: # in Loop: Header=BB19_4 Depth=1 - pcalau12i $t4, %pc_hi20(.LCPI19_2) - fld.d $fa2, $t4, %pc_lo12(.LCPI19_2) - fcmp.cule.d $fcc0, $fa2, $fa1 + lu52i.d $t5, $t4, 1018 + movgr2fr.d $fa4, $t5 + fcmp.cule.d $fcc0, $fa4, $fa3 bcnez $fcc0, .LBB19_3 # %bb.16: # in Loop: Header=BB19_4 Depth=1 - ld.w $t4, $a6, 0 - blez $t4, .LBB19_3 + ld.w $t5, $a6, 0 + blez $t5, .LBB19_3 # %bb.17: # in Loop: Header=BB19_4 Depth=1 - addi.d $t4, $zero, -1 + addi.d $t5, $zero, -1 b .LBB19_2 .LBB19_18: # %._crit_edge ld.d $a0, $sp, 16 @@ -9713,12 +9767,7 @@ _ZN5State18output_timer_blockE17mesh_device_typesddddd: # @_ZN5State18output_tim .Lfunc_end23: .size _ZN5State18output_timer_blockE17mesh_device_typesddddd, .Lfunc_end23-_ZN5State18output_timer_blockE17mesh_device_typesddddd # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5State12timer_outputE12state_timers17mesh_device_typesi -.LCPI24_0: - .dword 0x3e112e0be826d695 # double 1.0000000000000001E-9 - .text - .globl _ZN5State12timer_outputE12state_timers17mesh_device_typesi + .globl _ZN5State12timer_outputE12state_timers17mesh_device_typesi # -- Begin function _ZN5State12timer_outputE12state_timers17mesh_device_typesi .p2align 5 .type _ZN5State12timer_outputE12state_timers17mesh_device_typesi,@function _ZN5State12timer_outputE12state_timers17mesh_device_typesi: # @_ZN5State12timer_outputE12state_timers17mesh_device_typesi @@ -9752,11 +9801,14 @@ _ZN5State12timer_outputE12state_timers17mesh_device_typesi: # @_ZN5State12timer_ beqz $a2, .LBB24_2 # %bb.1: ld.d $a0, $a0, 296 - pcalau12i $a1, %pc_hi20(.LCPI24_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI24_0) + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + lu12i.w $a0, -97683 + ori $a0, $a0, 1685 + lu32i.d $a0, 77323 + lu52i.d $a0, $a0, 993 movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 - fmul.d $fs0, $fa1, $fa0 + fmul.d $fs0, $fa0, $fa1 b .LBB24_3 .LBB24_2: fld.d $fs0, $a0, 224 @@ -9812,12 +9864,7 @@ _ZN5State12timer_outputE12state_timers17mesh_device_typesi: # @_ZN5State12timer_ .size _ZN5State12timer_outputE12state_timers17mesh_device_typesi, .Lfunc_end24-_ZN5State12timer_outputE12state_timers17mesh_device_typesi .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5State37compare_state_cpu_local_to_cpu_globalEPS_PKcijjPiS3_ -.LCPI25_0: - .dword 0x3f947ae147ae147b # double 0.02 - .text - .globl _ZN5State37compare_state_cpu_local_to_cpu_globalEPS_PKcijjPiS3_ + .globl _ZN5State37compare_state_cpu_local_to_cpu_globalEPS_PKcijjPiS3_ # -- Begin function _ZN5State37compare_state_cpu_local_to_cpu_globalEPS_PKcijjPiS3_ .p2align 5 .type _ZN5State37compare_state_cpu_local_to_cpu_globalEPS_PKcijjPiS3_,@function _ZN5State37compare_state_cpu_local_to_cpu_globalEPS_PKcijjPiS3_: # @_ZN5State37compare_state_cpu_local_to_cpu_globalEPS_PKcijjPiS3_ @@ -9910,8 +9957,11 @@ _ZN5State37compare_state_cpu_local_to_cpu_globalEPS_PKcijjPiS3_: # @_ZN5State37c pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 .LBB25_9: # %.lr.ph.preheader - pcalau12i $a0, %pc_hi20(.LCPI25_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI25_0) + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fs0, $a0 pcalau12i $a0, %pc_hi20(.L.str.14) addi.d $a0, $a0, %pc_lo12(.L.str.14) st.d $a0, $sp, 32 # 8-byte Folded Spill diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/timer.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/timer.s index 3e5a8b70..e098d129 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/timer.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/timer.s @@ -11,12 +11,7 @@ cpu_timer_start: # @cpu_timer_start .Lfunc_end0: .size cpu_timer_start, .Lfunc_end0-cpu_timer_start # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function cpu_timer_stop -.LCPI1_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl cpu_timer_stop + .globl cpu_timer_stop # -- Begin function cpu_timer_stop .p2align 5 .type cpu_timer_stop,@function cpu_timer_stop: # @cpu_timer_stop @@ -36,12 +31,15 @@ cpu_timer_stop: # @cpu_timer_stop sub.d $a0, $a0, $s0 sub.d $a1, $a1, $fp movgr2fr.d $fa0, $a0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_0) ffint.d.l $fa0, $fa0 - movgr2fr.d $fa2, $a1 - ffint.d.l $fa2, $fa2 - fmul.d $fa1, $fa2, $fa1 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa2, $a0 + fmul.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa1, $fa0 ld.d $s0, $sp, 24 # 8-byte Folded Reload ld.d $fp, $sp, 32 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/zorder.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/zorder.s index 513c9bd7..efab46b9 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/zorder.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/zorder.s @@ -1,12 +1,6 @@ .file "zorder.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function calc_zorder -.LCPI0_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI0_1: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 .text - .globl calc_zorder + .globl calc_zorder # -- Begin function calc_zorder .p2align 5 .type calc_zorder,@function calc_zorder: # @calc_zorder @@ -36,10 +30,11 @@ calc_zorder: # @calc_zorder move $s5, $a2 move $s6, $a1 move $s7, $zero - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_1) + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs0, $a0 + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs1, $a0 lu12i.w $a0, 4080 ori $a0, $a0, 255 st.d $a0, $sp, 64 # 8-byte Folded Spill @@ -199,14 +194,7 @@ calc_zorder: # @calc_zorder .Lfunc_end0: .size calc_zorder, .Lfunc_end0-calc_zorder # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function index_to_bit -.LCPI1_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI1_1: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 - .text - .globl index_to_bit + .globl index_to_bit # -- Begin function index_to_bit .p2align 5 .type index_to_bit,@function index_to_bit: # @index_to_bit @@ -218,12 +206,13 @@ index_to_bit: # @index_to_bit st.d $ra, $sp, 8 # 8-byte Folded Spill fst.d $fs0, $sp, 0 # 8-byte Folded Spill srli.d $a3, $a0, 32 - pcalau12i $a4, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a4, %pc_lo12(.LCPI1_0) lu52i.d $a4, $zero, 1107 or $a3, $a3, $a4 + movgr2fr.d $fa0, $a3 + lu12i.w $a3, 256 + lu52i.d $a3, $a3, 1107 movgr2fr.d $fa1, $a3 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a3, 275200 bstrins.d $a0, $a3, 63, 32 movgr2fr.d $fa1, $a0 @@ -232,9 +221,9 @@ index_to_bit: # @index_to_bit vldi $vr0, -912 pcaddu18i $ra, %call36(ldexp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_1) fmul.d $fa0, $fa0, $fs0 + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ftintrz.l.d $fa2, $fa0 movfr2gr.d $a0, $fa2 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/HACCKernels/CMakeFiles/HACCKernels.dir/GravityForceKernel.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/HACCKernels/CMakeFiles/HACCKernels.dir/GravityForceKernel.s index 9bfe2212..ae2629e2 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/HACCKernels/CMakeFiles/HACCKernels.dir/GravityForceKernel.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/HACCKernels/CMakeFiles/HACCKernels.dir/GravityForceKernel.s @@ -1,38 +1,34 @@ .file "GravityForceKernel.cpp" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z19GravityForceKernel4iPfS_S_S_fffffRfS0_S0_ -.LCPI0_0: - .word 0x3a1b5121 # float 5.92486991E-4 -.LCPI0_1: - .word 0xb78a184e # float -1.64621997E-5 -.LCPI0_2: - .word 0xbc108c2a # float -0.00882248021 -.LCPI0_3: - .word 0x3d8c8d19 # float 0.0686284974 -.LCPI0_4: - .word 0xbe87077d # float -0.263729006 .text - .globl _Z19GravityForceKernel4iPfS_S_S_fffffRfS0_S0_ + .globl _Z19GravityForceKernel4iPfS_S_S_fffffRfS0_S0_ # -- Begin function _Z19GravityForceKernel4iPfS_S_S_fffffRfS0_S0_ .p2align 5 .type _Z19GravityForceKernel4iPfS_S_S_fffffRfS0_S0_,@function _Z19GravityForceKernel4iPfS_S_S_fffffRfS0_S0_: # @_Z19GravityForceKernel4iPfS_S_S_fffffRfS0_S0_ # %bb.0: blez $a0, .LBB0_6 # %bb.1: # %.lr.ph.preheader.i - pcalau12i $t0, %pc_hi20(.LCPI0_0) - fld.s $fa7, $t0, %pc_lo12(.LCPI0_0) - pcalau12i $t0, %pc_hi20(.LCPI0_1) - fld.s $ft0, $t0, %pc_lo12(.LCPI0_1) - pcalau12i $t0, %pc_hi20(.LCPI0_2) - fld.s $ft1, $t0, %pc_lo12(.LCPI0_2) - pcalau12i $t0, %pc_hi20(.LCPI0_3) - fld.s $ft3, $t0, %pc_lo12(.LCPI0_3) - pcalau12i $t0, %pc_hi20(.LCPI0_4) - fld.s $ft4, $t0, %pc_lo12(.LCPI0_4) - movgr2fr.w $ft5, $zero - fmov.s $ft2, $ft5 - fmov.s $fa6, $ft5 - fmov.s $fa5, $ft5 + movgr2fr.w $fa5, $zero + lu12i.w $t0, 238005 + ori $t0, $t0, 289 + movgr2fr.w $fa6, $t0 + lu12i.w $t0, -296799 + ori $t0, $t0, 2126 + lu32i.d $t0, 0 + movgr2fr.w $fa7, $t0 + lu12i.w $t0, -278264 + ori $t0, $t0, 3114 + lu32i.d $t0, 0 + movgr2fr.w $ft2, $t0 + lu12i.w $t0, 252104 + ori $t0, $t0, 3353 + movgr2fr.w $ft3, $t0 + lu12i.w $t0, -268176 + ori $t0, $t0, 1917 + lu32i.d $t0, 0 + movgr2fr.w $ft5, $t0 + fmov.s $ft4, $fa5 + fmov.s $ft1, $fa5 + fmov.s $ft0, $fa5 b .LBB0_3 .p2align 4, , 16 .LBB0_2: # in Loop: Header=BB0_3 Depth=1 @@ -57,80 +53,74 @@ _Z19GravityForceKernel4iPfS_S_S_fffffRfS0_S0_: # @_Z19GravityForceKernel4iPfS_S_ bcnez $fcc0, .LBB0_2 # %bb.4: # %.lr.ph.i # in Loop: Header=BB0_3 Depth=1 - fcmp.ceq.s $fcc0, $ft9, $ft5 + fcmp.ceq.s $fcc0, $ft9, $fa5 bcnez $fcc0, .LBB0_2 # %bb.5: # in Loop: Header=BB0_3 Depth=1 - fmadd.s $ft10, $ft9, $ft0, $fa7 - fmadd.s $ft10, $ft10, $ft9, $ft1 + fmadd.s $ft10, $ft9, $fa7, $fa6 + fmadd.s $ft10, $ft10, $ft9, $ft2 fmadd.s $ft10, $ft10, $ft9, $ft3 fadd.s $ft11, $ft9, $fa4 fsqrt.s $ft12, $ft11 fmul.s $ft11, $ft12, $ft11 fld.s $ft12, $a4, 0 frecip.s $ft11, $ft11 - fadd.s $ft11, $ft11, $ft4 + fadd.s $ft11, $ft11, $ft5 fmadd.s $ft9, $ft9, $ft10, $ft11 fmul.s $ft9, $ft12, $ft9 - fmadd.s $ft2, $ft9, $ft6, $ft2 - fmadd.s $fa6, $ft9, $ft7, $fa6 - fmadd.s $fa5, $ft9, $ft8, $fa5 + fmadd.s $ft4, $ft9, $ft6, $ft4 + fmadd.s $ft1, $ft9, $ft7, $ft1 + fmadd.s $ft0, $ft9, $ft8, $ft0 b .LBB0_2 .LBB0_6: - movgr2fr.w $fa5, $zero - fmov.s $fa6, $fa5 - fmov.s $ft2, $fa5 + movgr2fr.w $ft0, $zero + fmov.s $ft1, $ft0 + fmov.s $ft4, $ft0 .LBB0_7: # %_ZL18GravityForceKernelILi4ETnRAplT_Li1E_KfL_Z17PolyCoefficients4EEviPfS3_S3_S3_fffffRfS4_S4_.exit fld.s $fa0, $a5, 0 fld.s $fa1, $a6, 0 - fadd.s $fa0, $fa0, $ft2 + fadd.s $fa0, $fa0, $ft4 fld.s $fa2, $a7, 0 fst.s $fa0, $a5, 0 - fadd.s $fa0, $fa1, $fa6 + fadd.s $fa0, $fa1, $ft1 fst.s $fa0, $a6, 0 - fadd.s $fa0, $fa2, $fa5 + fadd.s $fa0, $fa2, $ft0 fst.s $fa0, $a7, 0 ret .Lfunc_end0: .size _Z19GravityForceKernel4iPfS_S_S_fffffRfS0_S0_, .Lfunc_end0-_Z19GravityForceKernel4iPfS_S_S_fffffRfS0_S0_ # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z19GravityForceKernel5iPfS_S_S_fffffRfS0_S0_ -.LCPI1_0: - .word 0xb87df61a # float -6.05491005E-5 -.LCPI1_1: - .word 0x35c589a2 # float 1.47177002E-6 -.LCPI1_2: - .word 0x3a8f475b # float 0.00109312998 -.LCPI1_3: - .word 0xbc3c19f7 # float -0.0114807999 -.LCPI1_4: - .word 0x3d99cce0 # float 0.0750977993 -.LCPI1_5: - .word 0xbe89e53b # float -0.269327015 - .text - .globl _Z19GravityForceKernel5iPfS_S_S_fffffRfS0_S0_ + .globl _Z19GravityForceKernel5iPfS_S_S_fffffRfS0_S0_ # -- Begin function _Z19GravityForceKernel5iPfS_S_S_fffffRfS0_S0_ .p2align 5 .type _Z19GravityForceKernel5iPfS_S_S_fffffRfS0_S0_,@function _Z19GravityForceKernel5iPfS_S_S_fffffRfS0_S0_: # @_Z19GravityForceKernel5iPfS_S_S_fffffRfS0_S0_ # %bb.0: blez $a0, .LBB1_6 # %bb.1: # %.lr.ph.preheader.i - pcalau12i $t0, %pc_hi20(.LCPI1_0) - fld.s $fa6, $t0, %pc_lo12(.LCPI1_0) - pcalau12i $t0, %pc_hi20(.LCPI1_1) - fld.s $ft0, $t0, %pc_lo12(.LCPI1_1) - pcalau12i $t0, %pc_hi20(.LCPI1_2) - fld.s $ft1, $t0, %pc_lo12(.LCPI1_2) - pcalau12i $t0, %pc_hi20(.LCPI1_3) - fld.s $ft2, $t0, %pc_lo12(.LCPI1_3) - pcalau12i $t0, %pc_hi20(.LCPI1_4) - fld.s $ft4, $t0, %pc_lo12(.LCPI1_4) - pcalau12i $t0, %pc_hi20(.LCPI1_5) - fld.s $ft5, $t0, %pc_lo12(.LCPI1_5) - movgr2fr.w $ft6, $zero - fmov.s $ft3, $ft6 - fmov.s $fa7, $ft6 - fmov.s $fa5, $ft6 + movgr2fr.w $fa5, $zero + lu12i.w $t0, 220248 + ori $t0, $t0, 2466 + movgr2fr.w $fa6, $t0 + lu12i.w $t0, -292897 + ori $t0, $t0, 1562 + lu32i.d $t0, 0 + movgr2fr.w $fa7, $t0 + lu12i.w $t0, 239860 + ori $t0, $t0, 1883 + movgr2fr.w $ft0, $t0 + lu12i.w $t0, -277567 + ori $t0, $t0, 2551 + lu32i.d $t0, 0 + movgr2fr.w $ft3, $t0 + lu12i.w $t0, 252316 + ori $t0, $t0, 3296 + movgr2fr.w $ft4, $t0 + lu12i.w $t0, -268130 + ori $t0, $t0, 1339 + lu32i.d $t0, 0 + movgr2fr.w $ft6, $t0 + fmov.s $ft5, $fa5 + fmov.s $ft2, $fa5 + fmov.s $ft1, $fa5 b .LBB1_3 .p2align 4, , 16 .LBB1_2: # in Loop: Header=BB1_3 Depth=1 @@ -155,85 +145,79 @@ _Z19GravityForceKernel5iPfS_S_S_fffffRfS0_S0_: # @_Z19GravityForceKernel5iPfS_S_ bcnez $fcc0, .LBB1_2 # %bb.4: # %.lr.ph.i # in Loop: Header=BB1_3 Depth=1 - fcmp.ceq.s $fcc0, $ft10, $ft6 + fcmp.ceq.s $fcc0, $ft10, $fa5 bcnez $fcc0, .LBB1_2 # %bb.5: # in Loop: Header=BB1_3 Depth=1 - fmadd.s $ft11, $ft10, $ft0, $fa6 - fmadd.s $ft11, $ft11, $ft10, $ft1 - fmadd.s $ft11, $ft11, $ft10, $ft2 + fmadd.s $ft11, $ft10, $fa6, $fa7 + fmadd.s $ft11, $ft11, $ft10, $ft0 + fmadd.s $ft11, $ft11, $ft10, $ft3 fmadd.s $ft11, $ft11, $ft10, $ft4 fadd.s $ft12, $ft10, $fa4 fsqrt.s $ft13, $ft12 fmul.s $ft12, $ft13, $ft12 fld.s $ft13, $a4, 0 frecip.s $ft12, $ft12 - fadd.s $ft12, $ft12, $ft5 + fadd.s $ft12, $ft12, $ft6 fmadd.s $ft10, $ft10, $ft11, $ft12 fmul.s $ft10, $ft13, $ft10 - fmadd.s $ft3, $ft10, $ft7, $ft3 - fmadd.s $fa7, $ft10, $ft8, $fa7 - fmadd.s $fa5, $ft10, $ft9, $fa5 + fmadd.s $ft5, $ft10, $ft7, $ft5 + fmadd.s $ft2, $ft10, $ft8, $ft2 + fmadd.s $ft1, $ft10, $ft9, $ft1 b .LBB1_2 .LBB1_6: - movgr2fr.w $fa5, $zero - fmov.s $fa7, $fa5 - fmov.s $ft3, $fa5 + movgr2fr.w $ft1, $zero + fmov.s $ft2, $ft1 + fmov.s $ft5, $ft1 .LBB1_7: # %_ZL18GravityForceKernelILi5ETnRAplT_Li1E_KfL_Z17PolyCoefficients5EEviPfS3_S3_S3_fffffRfS4_S4_.exit fld.s $fa0, $a5, 0 fld.s $fa1, $a6, 0 - fadd.s $fa0, $fa0, $ft3 + fadd.s $fa0, $fa0, $ft5 fld.s $fa2, $a7, 0 fst.s $fa0, $a5, 0 - fadd.s $fa0, $fa1, $fa7 + fadd.s $fa0, $fa1, $ft2 fst.s $fa0, $a6, 0 - fadd.s $fa0, $fa2, $fa5 + fadd.s $fa0, $fa2, $ft1 fst.s $fa0, $a7, 0 ret .Lfunc_end1: .size _Z19GravityForceKernel5iPfS_S_S_fffffRfS0_S0_, .Lfunc_end1-_Z19GravityForceKernel5iPfS_S_S_fffffRfS0_S0_ # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z19GravityForceKernel6iPfS_S_S_fffffRfS0_S0_ -.LCPI2_0: - .word 0x36de991c # float 6.63393985E-6 -.LCPI2_1: - .word 0xb41e2ae4 # float -1.47305002E-7 -.LCPI2_2: - .word 0xb90ac3ad # float -1.32336005E-4 -.LCPI2_3: - .word 0x3ad10a49 # float 0.00159484998 -.LCPI2_4: - .word 0xbc5a1b6a # float -0.0133122001 -.LCPI2_5: - .word 0x3da07068 # float 0.0783393979 -.LCPI2_6: - .word 0xbe8af901 # float -0.271430999 - .text - .globl _Z19GravityForceKernel6iPfS_S_S_fffffRfS0_S0_ + .globl _Z19GravityForceKernel6iPfS_S_S_fffffRfS0_S0_ # -- Begin function _Z19GravityForceKernel6iPfS_S_S_fffffRfS0_S0_ .p2align 5 .type _Z19GravityForceKernel6iPfS_S_S_fffffRfS0_S0_,@function _Z19GravityForceKernel6iPfS_S_S_fffffRfS0_S0_: # @_Z19GravityForceKernel6iPfS_S_S_fffffRfS0_S0_ # %bb.0: blez $a0, .LBB2_6 # %bb.1: # %.lr.ph.preheader.i - pcalau12i $t0, %pc_hi20(.LCPI2_0) - fld.s $fa5, $t0, %pc_lo12(.LCPI2_0) - pcalau12i $t0, %pc_hi20(.LCPI2_1) - fld.s $fa7, $t0, %pc_lo12(.LCPI2_1) - pcalau12i $t0, %pc_hi20(.LCPI2_2) - fld.s $ft1, $t0, %pc_lo12(.LCPI2_2) - pcalau12i $t0, %pc_hi20(.LCPI2_3) - fld.s $ft2, $t0, %pc_lo12(.LCPI2_3) - pcalau12i $t0, %pc_hi20(.LCPI2_4) - fld.s $ft3, $t0, %pc_lo12(.LCPI2_4) - pcalau12i $t0, %pc_hi20(.LCPI2_5) - fld.s $ft5, $t0, %pc_lo12(.LCPI2_5) - pcalau12i $t0, %pc_hi20(.LCPI2_6) - fld.s $ft6, $t0, %pc_lo12(.LCPI2_6) - movgr2fr.w $ft7, $zero - fmov.s $ft4, $ft7 - fmov.s $ft0, $ft7 - fmov.s $fa6, $ft7 + movgr2fr.w $fa5, $zero + lu12i.w $t0, 224745 + ori $t0, $t0, 2332 + movgr2fr.w $fa6, $t0 + lu12i.w $t0, -310814 + ori $t0, $t0, 2788 + lu32i.d $t0, 0 + movgr2fr.w $fa7, $t0 + lu12i.w $t0, -290644 + ori $t0, $t0, 941 + lu32i.d $t0, 0 + movgr2fr.w $ft0, $t0 + lu12i.w $t0, 240912 + ori $t0, $t0, 2633 + movgr2fr.w $ft1, $t0 + lu12i.w $t0, -277087 + ori $t0, $t0, 2922 + lu32i.d $t0, 0 + movgr2fr.w $ft4, $t0 + lu12i.w $t0, 252423 + ori $t0, $t0, 104 + movgr2fr.w $ft5, $t0 + lu12i.w $t0, -268113 + ori $t0, $t0, 2305 + lu32i.d $t0, 0 + movgr2fr.w $ft7, $t0 + fmov.s $ft6, $fa5 + fmov.s $ft3, $fa5 + fmov.s $ft2, $fa5 b .LBB2_3 .p2align 4, , 16 .LBB2_2: # in Loop: Header=BB2_3 Depth=1 @@ -258,39 +242,39 @@ _Z19GravityForceKernel6iPfS_S_S_fffffRfS0_S0_: # @_Z19GravityForceKernel6iPfS_S_ bcnez $fcc0, .LBB2_2 # %bb.4: # %.lr.ph.i # in Loop: Header=BB2_3 Depth=1 - fcmp.ceq.s $fcc0, $ft11, $ft7 + fcmp.ceq.s $fcc0, $ft11, $fa5 bcnez $fcc0, .LBB2_2 # %bb.5: # in Loop: Header=BB2_3 Depth=1 - fmadd.s $ft12, $ft11, $fa7, $fa5 + fmadd.s $ft12, $ft11, $fa7, $fa6 + fmadd.s $ft12, $ft12, $ft11, $ft0 fmadd.s $ft12, $ft12, $ft11, $ft1 - fmadd.s $ft12, $ft12, $ft11, $ft2 - fmadd.s $ft12, $ft12, $ft11, $ft3 + fmadd.s $ft12, $ft12, $ft11, $ft4 fmadd.s $ft12, $ft12, $ft11, $ft5 fadd.s $ft13, $ft11, $fa4 fsqrt.s $ft14, $ft13 fmul.s $ft13, $ft14, $ft13 fld.s $ft14, $a4, 0 frecip.s $ft13, $ft13 - fadd.s $ft13, $ft13, $ft6 + fadd.s $ft13, $ft13, $ft7 fmadd.s $ft11, $ft11, $ft12, $ft13 fmul.s $ft11, $ft14, $ft11 - fmadd.s $ft4, $ft11, $ft8, $ft4 - fmadd.s $ft0, $ft11, $ft9, $ft0 - fmadd.s $fa6, $ft11, $ft10, $fa6 + fmadd.s $ft6, $ft11, $ft8, $ft6 + fmadd.s $ft3, $ft11, $ft9, $ft3 + fmadd.s $ft2, $ft11, $ft10, $ft2 b .LBB2_2 .LBB2_6: - movgr2fr.w $fa6, $zero - fmov.s $ft0, $fa6 - fmov.s $ft4, $fa6 + movgr2fr.w $ft2, $zero + fmov.s $ft3, $ft2 + fmov.s $ft6, $ft2 .LBB2_7: # %_ZL18GravityForceKernelILi6ETnRAplT_Li1E_KfL_Z17PolyCoefficients6EEviPfS3_S3_S3_fffffRfS4_S4_.exit fld.s $fa0, $a5, 0 fld.s $fa1, $a6, 0 - fadd.s $fa0, $fa0, $ft4 + fadd.s $fa0, $fa0, $ft6 fld.s $fa2, $a7, 0 fst.s $fa0, $a5, 0 - fadd.s $fa0, $fa1, $ft0 + fadd.s $fa0, $fa1, $ft3 fst.s $fa0, $a6, 0 - fadd.s $fa0, $fa2, $fa6 + fadd.s $fa0, $fa2, $ft2 fst.s $fa0, $a7, 0 ret .Lfunc_end2: diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/HACCKernels/CMakeFiles/HACCKernels.dir/main.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/HACCKernels/CMakeFiles/HACCKernels.dir/main.s index d80093f2..d76f9f66 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/HACCKernels/CMakeFiles/HACCKernels.dir/main.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/HACCKernels/CMakeFiles/HACCKernels.dir/main.s @@ -3,12 +3,8 @@ .globl _ZSt21ios_base_library_initv # End of file scope inline assembly - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc -.LCPI0_0: - .word 0x2f800000 # float 2.32830644E-10 .text - .globl _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc + .globl _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc # -- Begin function _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc .p2align 5 .type _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc,@function _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc @@ -30,6 +26,7 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc st.d $s6, $sp, 184 # 8-byte Folded Spill st.d $s7, $sp, 176 # 8-byte Folded Spill st.d $s8, $sp, 168 # 8-byte Folded Spill + fst.d $fs0, $sp, 160 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -41,8 +38,9 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc .cfi_offset 29, -72 .cfi_offset 30, -80 .cfi_offset 31, -88 + .cfi_offset 56, -96 move $fp, $a1 - st.d $a0, $sp, 136 # 8-byte Folded Spill + st.d $a0, $sp, 128 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(_ZSt4cout) ld.d $s1, $a0, %got_pc_lo12(_ZSt4cout) pcalau12i $a0, %pc_hi20(.L.str) @@ -81,23 +79,25 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc jirl $ra, $ra, 0 move $s2, $zero pcalau12i $a0, %pc_hi20(IListMin) - st.d $a0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(IListMax) st.d $a0, $sp, 16 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(IListMax) + st.d $a0, $sp, 8 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(PMin) - st.d $a0, $sp, 40 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(PMax) st.d $a0, $sp, 32 # 8-byte Folded Spill - vldi $vr5, -928 - vldi $vr6, -1168 + pcalau12i $a0, %pc_hi20(PMax) + st.d $a0, $sp, 24 # 8-byte Folded Spill + vldi $vr4, -928 + lu12i.w $a0, 194560 + movgr2fr.w $fs0, $a0 + vldi $vr5, -1168 b .LBB0_5 .p2align 4, , 16 .LBB0_4: # %_ZNSt6vectorIfSaIfEED2Ev.exit119 # in Loop: Header=BB0_5 Depth=1 pcalau12i $a0, %pc_hi20(NumIters) ld.w $a0, $a0, %pc_lo12(NumIters) - vldi $vr5, -928 - vldi $vr6, -1168 + vldi $vr4, -928 + vldi $vr5, -1168 bge $s2, $a0, .LBB0_56 .LBB0_5: # %.lr.ph424 # =>This Loop Header: Depth=1 @@ -105,20 +105,20 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc # Child Loop BB0_37 Depth 2 # Child Loop BB0_40 Depth 2 addi.w $s2, $s2, 1 - st.w $zero, $sp, 156 - st.w $zero, $sp, 160 + st.w $zero, $sp, 148 + st.w $zero, $sp, 152 slli.d $a0, $s2, 13 xor $a0, $a0, $s2 bstrpick.d $a1, $a0, 31, 17 xor $a0, $a1, $a0 slli.d $a1, $a0, 5 xor $s4, $a1, $a0 - ld.d $a0, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(PMin) slli.d $a1, $s4, 13 xor $a1, $a1, $s4 bstrpick.d $a2, $a1, 31, 17 - ld.d $a3, $sp, 32 # 8-byte Folded Reload + ld.d $a3, $sp, 24 # 8-byte Folded Reload ld.w $a3, $a3, %pc_lo12(PMax) xor $a1, $a2, $a1 slli.d $a2, $a1, 5 @@ -127,14 +127,14 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc addi.w $a2, $fp, 0 mod.wu $a1, $a2, $a1 add.w $s1, $a1, $a0 - st.w $zero, $sp, 164 + st.w $zero, $sp, 156 bltz $s1, .LBB0_57 # %bb.6: # %_ZNSt6vectorIfSaIfEE17_S_check_init_lenEmRKS0_.exit.i # in Loop: Header=BB0_5 Depth=1 - st.d $s2, $sp, 104 # 8-byte Folded Spill - ld.d $a0, $sp, 24 # 8-byte Folded Reload - ld.w $s5, $a0, %pc_lo12(IListMin) + st.d $s2, $sp, 96 # 8-byte Folded Spill ld.d $a0, $sp, 16 # 8-byte Folded Reload + ld.w $s5, $a0, %pc_lo12(IListMin) + ld.d $a0, $sp, 8 # 8-byte Folded Reload ld.w $s6, $a0, %pc_lo12(IListMax) beqz $s1, .LBB0_16 # %bb.7: # %.noexc53 @@ -158,7 +158,7 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc .LBB0_9: # in Loop: Header=BB0_5 Depth=1 .Ltmp0: # EH_LABEL alsl.d $a0, $s1, $s7, 2 - st.d $a0, $sp, 64 # 8-byte Folded Spill + st.d $a0, $sp, 56 # 8-byte Folded Spill move $a0, $s2 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 @@ -178,7 +178,7 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc .LBB0_12: # in Loop: Header=BB0_5 Depth=1 .Ltmp3: # EH_LABEL alsl.d $a0, $s1, $s8, 2 - st.d $a0, $sp, 56 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill move $a0, $s2 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 @@ -187,7 +187,7 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc # in Loop: Header=BB0_5 Depth=1 move $s2, $a0 alsl.d $a0, $s1, $a0, 2 - st.d $a0, $sp, 48 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill st.w $zero, $s2, 0 beqz $s0, .LBB0_15 # %bb.14: # %_ZSt6fill_nIPfmfET_S1_T0_RKT1_.exit.loopexit.i.i.i.i.i65 @@ -199,17 +199,17 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc jirl $ra, $ra, 0 .LBB0_15: # %_ZNSt6vectorIfSaIfEEC2EmRKS0_.exit71 # in Loop: Header=BB0_5 Depth=1 - vldi $vr5, -928 - vldi $vr6, -1168 + vldi $vr4, -928 + vldi $vr5, -1168 b .LBB0_17 .p2align 4, , 16 .LBB0_16: # in Loop: Header=BB0_5 Depth=1 move $s8, $zero + st.d $zero, $sp, 48 # 8-byte Folded Spill st.d $zero, $sp, 56 # 8-byte Folded Spill - st.d $zero, $sp, 64 # 8-byte Folded Spill move $s7, $zero move $s2, $zero - st.d $zero, $sp, 48 # 8-byte Folded Spill + st.d $zero, $sp, 40 # 8-byte Folded Spill .LBB0_17: # %_ZNSt6vectorIfSaIfEEC2EmRKS0_.exit71 # in Loop: Header=BB0_5 Depth=1 sub.w $a0, $s6, $s5 @@ -219,9 +219,9 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc bltz $s5, .LBB0_58 # %bb.18: # %_ZNSt6vectorIfSaIfEE17_S_check_init_lenEmRKS0_.exit.i72 # in Loop: Header=BB0_5 Depth=1 - st.d $s7, $sp, 128 # 8-byte Folded Spill - st.d $s8, $sp, 120 # 8-byte Folded Spill - st.d $s2, $sp, 112 # 8-byte Folded Spill + st.d $s7, $sp, 120 # 8-byte Folded Spill + st.d $s8, $sp, 112 # 8-byte Folded Spill + st.d $s2, $sp, 104 # 8-byte Folded Spill beqz $s5, .LBB0_32 # %bb.19: # in Loop: Header=BB0_5 Depth=1 slli.d $s7, $s5, 2 @@ -247,7 +247,7 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc .LBB0_22: # in Loop: Header=BB0_5 Depth=1 .Ltmp9: # EH_LABEL alsl.d $a0, $s5, $s4, 2 - st.d $a0, $sp, 88 # 8-byte Folded Spill + st.d $a0, $sp, 80 # 8-byte Folded Spill move $a0, $s7 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 @@ -266,9 +266,9 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc jirl $ra, $ra, 0 .LBB0_25: # in Loop: Header=BB0_5 Depth=1 .Ltmp12: # EH_LABEL - st.d $s0, $sp, 144 # 8-byte Folded Spill + st.d $s0, $sp, 136 # 8-byte Folded Spill alsl.d $a0, $s5, $s0, 2 - st.d $a0, $sp, 80 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill move $a0, $s7 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 @@ -288,7 +288,7 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc .LBB0_28: # in Loop: Header=BB0_5 Depth=1 .Ltmp15: # EH_LABEL alsl.d $a0, $s5, $s6, 2 - st.d $a0, $sp, 72 # 8-byte Folded Spill + st.d $a0, $sp, 64 # 8-byte Folded Spill move $a0, $s7 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 @@ -297,7 +297,7 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc # in Loop: Header=BB0_5 Depth=1 move $s7, $a0 alsl.d $a0, $s5, $a0, 2 - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill st.w $zero, $s7, 0 beqz $s2, .LBB0_31 # %bb.30: # %_ZSt6fill_nIPfmfET_S1_T0_RKT1_.exit.loopexit.i.i.i.i.i101 @@ -309,53 +309,118 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc jirl $ra, $ra, 0 .LBB0_31: # %_ZNSt6vectorIfSaIfEEC2EmRKS0_.exit107 # in Loop: Header=BB0_5 Depth=1 - vldi $vr5, -928 - vldi $vr6, -1168 - ld.d $s2, $sp, 112 # 8-byte Folded Reload + vldi $vr4, -928 + vldi $vr5, -1168 + ld.d $s2, $sp, 104 # 8-byte Folded Reload pcalau12i $s8, %pc_hi20(MaxSep) - pcalau12i $a0, %pc_hi20(.LCPI0_0) bnez $s1, .LBB0_33 b .LBB0_35 .p2align 4, , 16 .LBB0_32: # in Loop: Header=BB0_5 Depth=1 - st.d $zero, $sp, 72 # 8-byte Folded Spill + st.d $zero, $sp, 64 # 8-byte Folded Spill move $s6, $zero - st.d $zero, $sp, 88 # 8-byte Folded Spill - move $s4, $zero - st.d $zero, $sp, 144 # 8-byte Folded Spill st.d $zero, $sp, 80 # 8-byte Folded Spill + move $s4, $zero + st.d $zero, $sp, 136 # 8-byte Folded Spill + st.d $zero, $sp, 72 # 8-byte Folded Spill move $s7, $zero - st.d $zero, $sp, 96 # 8-byte Folded Spill + st.d $zero, $sp, 88 # 8-byte Folded Spill pcalau12i $s8, %pc_hi20(MaxSep) - pcalau12i $a0, %pc_hi20(.LCPI0_0) beqz $s1, .LBB0_35 .LBB0_33: # %.lr.ph # in Loop: Header=BB0_5 Depth=1 fld.s $fa0, $s8, %pc_lo12(MaxSep) - move $a1, $zero + move $a0, $zero fcvt.d.s $fa0, $fa0 - fmul.d $fa0, $fa0, $fa5 - move $a2, $s2 + fmul.d $fa0, $fa0, $fa4 + move $a1, $s2 + ld.d $a2, $sp, 112 # 8-byte Folded Reload ld.d $a3, $sp, 120 # 8-byte Folded Reload - ld.d $a4, $sp, 128 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_34: # Parent Loop BB0_5 Depth=1 # => This Inner Loop Header: Depth=2 + slli.d $a4, $fp, 13 + xor $a4, $a4, $fp + bstrpick.d $a5, $a4, 31, 17 + xor $a4, $a5, $a4 + slli.d $a5, $a4, 5 + xor $a4, $a5, $a4 + bstrpick.d $a5, $a4, 31, 0 + movgr2fr.d $fa1, $a5 + ffint.s.l $fa1, $fa1 + fmul.s $fa1, $fa1, $fs0 + fcvt.d.s $fa1, $fa1 + fmul.d $fa1, $fa0, $fa1 + fcvt.s.d $fa1, $fa1 + fst.s $fa1, $a3, 0 + slli.d $a5, $a4, 13 + xor $a4, $a5, $a4 + bstrpick.d $a5, $a4, 31, 17 + xor $a4, $a5, $a4 + slli.d $a5, $a4, 5 + xor $a4, $a5, $a4 + bstrpick.d $a5, $a4, 31, 0 + movgr2fr.d $fa1, $a5 + ffint.s.l $fa1, $fa1 + fmul.s $fa1, $fa1, $fs0 + fcvt.d.s $fa1, $fa1 + fmul.d $fa1, $fa0, $fa1 + fcvt.s.d $fa1, $fa1 + fst.s $fa1, $a2, 0 + slli.d $a5, $a4, 13 + xor $a4, $a5, $a4 + bstrpick.d $a5, $a4, 31, 17 + xor $a4, $a5, $a4 + slli.d $a5, $a4, 5 + xor $fp, $a5, $a4 + bstrpick.d $a4, $fp, 31, 0 + movgr2fr.d $fa1, $a4 + ffint.s.l $fa1, $fa1 + fmul.s $fa1, $fa1, $fs0 + fcvt.d.s $fa1, $fa1 + fmul.d $fa1, $fa0, $fa1 + fcvt.s.d $fa1, $fa1 + fst.s $fa1, $a1, 0 + addi.d $a0, $a0, 1 + addi.d $a3, $a3, 4 + addi.d $a2, $a2, 4 + addi.d $a1, $a1, 4 + blt $a0, $s1, .LBB0_34 +.LBB0_35: # %.preheader338 + # in Loop: Header=BB0_5 Depth=1 + beqz $s5, .LBB0_38 +# %bb.36: # %.lr.ph420 + # in Loop: Header=BB0_5 Depth=1 + fld.s $fa0, $s8, %pc_lo12(MaxSep) + pcalau12i $a0, %pc_hi20(OffsetAdjFrac) + fld.s $fa1, $a0, %pc_lo12(OffsetAdjFrac) + move $a0, $zero + fcvt.d.s $fa0, $fa0 + fcvt.d.s $fa1, $fa1 + fadd.d $fa1, $fa1, $fa4 + fmul.d $fa2, $fa0, $fa4 + move $a1, $s7 + move $a2, $s6 + ld.d $a3, $sp, 136 # 8-byte Folded Reload + move $a4, $s4 + .p2align 4, , 16 +.LBB0_37: # Parent Loop BB0_5 Depth=1 + # => This Inner Loop Header: Depth=2 slli.d $a5, $fp, 13 xor $a5, $a5, $fp bstrpick.d $a6, $a5, 31, 17 xor $a5, $a6, $a5 slli.d $a6, $a5, 5 xor $a5, $a6, $a5 - fld.s $fa1, $a0, %pc_lo12(.LCPI0_0) bstrpick.d $a6, $a5, 31, 0 - movgr2fr.d $fa2, $a6 - ffint.s.l $fa2, $fa2 - fmul.s $fa2, $fa2, $fa1 - fcvt.d.s $fa2, $fa2 - fmul.d $fa2, $fa0, $fa2 - fcvt.s.d $fa2, $fa2 - fst.s $fa2, $a4, 0 + movgr2fr.d $fa3, $a6 + ffint.s.l $fa3, $fa3 + fmul.s $fa3, $fa3, $fs0 + fcvt.d.s $fa3, $fa3 + fmadd.d $fa3, $fa3, $fa4, $fa1 + fmul.d $fa3, $fa3, $fa0 + fcvt.s.d $fa3, $fa3 + fst.s $fa3, $a4, 0 slli.d $a6, $a5, 13 xor $a5, $a6, $a5 bstrpick.d $a6, $a5, 31, 17 @@ -363,13 +428,27 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc slli.d $a6, $a5, 5 xor $a5, $a6, $a5 bstrpick.d $a6, $a5, 31, 0 - movgr2fr.d $fa2, $a6 - ffint.s.l $fa2, $fa2 - fmul.s $fa2, $fa2, $fa1 - fcvt.d.s $fa2, $fa2 - fmul.d $fa2, $fa0, $fa2 - fcvt.s.d $fa2, $fa2 - fst.s $fa2, $a3, 0 + movgr2fr.d $fa3, $a6 + ffint.s.l $fa3, $fa3 + fmul.s $fa3, $fa3, $fs0 + fcvt.d.s $fa3, $fa3 + fmul.d $fa3, $fa2, $fa3 + fcvt.s.d $fa3, $fa3 + fst.s $fa3, $a3, 0 + slli.d $a6, $a5, 13 + xor $a5, $a6, $a5 + bstrpick.d $a6, $a5, 31, 17 + xor $a5, $a6, $a5 + slli.d $a6, $a5, 5 + xor $a5, $a6, $a5 + bstrpick.d $a6, $a5, 31, 0 + movgr2fr.d $fa3, $a6 + ffint.s.l $fa3, $fa3 + fmul.s $fa3, $fa3, $fs0 + fcvt.d.s $fa3, $fa3 + fmul.d $fa3, $fa2, $fa3 + fcvt.s.d $fa3, $fa3 + fst.s $fa3, $a2, 0 slli.d $a6, $a5, 13 xor $a5, $a6, $a5 bstrpick.d $a6, $a5, 31, 17 @@ -377,107 +456,24 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc slli.d $a6, $a5, 5 xor $fp, $a6, $a5 bstrpick.d $a5, $fp, 31, 0 - movgr2fr.d $fa2, $a5 - ffint.s.l $fa2, $fa2 - fmul.s $fa1, $fa2, $fa1 - fcvt.d.s $fa1, $fa1 - fmul.d $fa1, $fa0, $fa1 - fcvt.s.d $fa1, $fa1 - fst.s $fa1, $a2, 0 - addi.d $a1, $a1, 1 + movgr2fr.d $fa3, $a5 + ffint.s.l $fa3, $fa3 + fmadd.s $fa3, $fa3, $fs0, $fa5 + fst.s $fa3, $a1, 0 + addi.d $a0, $a0, 1 addi.d $a4, $a4, 4 addi.d $a3, $a3, 4 addi.d $a2, $a2, 4 - blt $a1, $s1, .LBB0_34 -.LBB0_35: # %.preheader338 - # in Loop: Header=BB0_5 Depth=1 - beqz $s5, .LBB0_38 -# %bb.36: # %.lr.ph420 - # in Loop: Header=BB0_5 Depth=1 - fld.s $fa0, $s8, %pc_lo12(MaxSep) - pcalau12i $a1, %pc_hi20(OffsetAdjFrac) - fld.s $fa1, $a1, %pc_lo12(OffsetAdjFrac) - move $a1, $zero - fcvt.d.s $fa0, $fa0 - fcvt.d.s $fa1, $fa1 - fadd.d $fa1, $fa1, $fa5 - fmul.d $fa2, $fa0, $fa5 - move $a2, $s7 - move $a3, $s6 - ld.d $a4, $sp, 144 # 8-byte Folded Reload - move $a5, $s4 - .p2align 4, , 16 -.LBB0_37: # Parent Loop BB0_5 Depth=1 - # => This Inner Loop Header: Depth=2 - slli.d $a6, $fp, 13 - xor $a6, $a6, $fp - bstrpick.d $a7, $a6, 31, 17 - xor $a6, $a7, $a6 - slli.d $a7, $a6, 5 - xor $a6, $a7, $a6 - fld.s $fa3, $a0, %pc_lo12(.LCPI0_0) - bstrpick.d $a7, $a6, 31, 0 - movgr2fr.d $fa4, $a7 - ffint.s.l $fa4, $fa4 - fmul.s $fa4, $fa4, $fa3 - fcvt.d.s $fa4, $fa4 - fmadd.d $fa4, $fa4, $fa5, $fa1 - fmul.d $fa4, $fa4, $fa0 - fcvt.s.d $fa4, $fa4 - fst.s $fa4, $a5, 0 - slli.d $a7, $a6, 13 - xor $a6, $a7, $a6 - bstrpick.d $a7, $a6, 31, 17 - xor $a6, $a7, $a6 - slli.d $a7, $a6, 5 - xor $a6, $a7, $a6 - bstrpick.d $a7, $a6, 31, 0 - movgr2fr.d $fa4, $a7 - ffint.s.l $fa4, $fa4 - fmul.s $fa4, $fa4, $fa3 - fcvt.d.s $fa4, $fa4 - fmul.d $fa4, $fa2, $fa4 - fcvt.s.d $fa4, $fa4 - fst.s $fa4, $a4, 0 - slli.d $a7, $a6, 13 - xor $a6, $a7, $a6 - bstrpick.d $a7, $a6, 31, 17 - xor $a6, $a7, $a6 - slli.d $a7, $a6, 5 - xor $a6, $a7, $a6 - bstrpick.d $a7, $a6, 31, 0 - movgr2fr.d $fa4, $a7 - ffint.s.l $fa4, $fa4 - fmul.s $fa4, $fa4, $fa3 - fcvt.d.s $fa4, $fa4 - fmul.d $fa4, $fa2, $fa4 - fcvt.s.d $fa4, $fa4 - fst.s $fa4, $a3, 0 - slli.d $a7, $a6, 13 - xor $a6, $a7, $a6 - bstrpick.d $a7, $a6, 31, 17 - xor $a6, $a7, $a6 - slli.d $a7, $a6, 5 - xor $fp, $a7, $a6 - bstrpick.d $a6, $fp, 31, 0 - movgr2fr.d $fa4, $a6 - ffint.s.l $fa4, $fa4 - fmadd.s $fa3, $fa4, $fa3, $fa6 - fst.s $fa3, $a2, 0 - addi.d $a1, $a1, 1 - addi.d $a5, $a5, 4 - addi.d $a4, $a4, 4 - addi.d $a3, $a3, 4 - addi.d $a2, $a2, 4 - blt $a1, $s5, .LBB0_37 + addi.d $a1, $a1, 4 + blt $a0, $s5, .LBB0_37 .LBB0_38: # %.preheader # in Loop: Header=BB0_5 Depth=1 beqz $s1, .LBB0_42 # %bb.39: # %.lr.ph422.preheader # in Loop: Header=BB0_5 Depth=1 move $fp, $zero - ld.d $s0, $sp, 120 # 8-byte Folded Reload - ld.d $s3, $sp, 128 # 8-byte Folded Reload + ld.d $s0, $sp, 112 # 8-byte Folded Reload + ld.d $s3, $sp, 120 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_40: # %.lr.ph422 # Parent Loop BB0_5 Depth=1 @@ -491,15 +487,15 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc fmul.s $fa3, $fa3, $fa3 fmul.s $fa4, $fa4, $fa4 .Ltmp18: # EH_LABEL - addi.d $a5, $sp, 164 - addi.d $a6, $sp, 160 - addi.d $a7, $sp, 156 + addi.d $a5, $sp, 156 + addi.d $a6, $sp, 152 + addi.d $a7, $sp, 148 move $a0, $s5 move $a1, $s4 - ld.d $a2, $sp, 144 # 8-byte Folded Reload + ld.d $a2, $sp, 136 # 8-byte Folded Reload move $a3, $s6 move $a4, $s7 - ld.d $t0, $sp, 136 # 8-byte Folded Reload + ld.d $t0, $sp, 128 # 8-byte Folded Reload jirl $ra, $t0, 0 .Ltmp19: # EH_LABEL # %bb.41: # in Loop: Header=BB0_40 Depth=2 @@ -515,7 +511,7 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc beqz $s7, .LBB0_44 .LBB0_43: # %._crit_edge.thread # in Loop: Header=BB0_5 Depth=1 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload sub.d $a1, $a0, $s7 move $a0, $s7 pcaddu18i $ra, %call36(_ZdlPvm) @@ -524,20 +520,20 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc # in Loop: Header=BB0_5 Depth=1 beqz $s6, .LBB0_46 # %bb.45: # in Loop: Header=BB0_5 Depth=1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload sub.d $a1, $a0, $s6 move $a0, $s6 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB0_46: # %_ZNSt6vectorIfSaIfEED2Ev.exit109 # in Loop: Header=BB0_5 Depth=1 - ld.d $fp, $sp, 128 # 8-byte Folded Reload - ld.d $s0, $sp, 120 # 8-byte Folded Reload - ld.d $s2, $sp, 104 # 8-byte Folded Reload - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $fp, $sp, 120 # 8-byte Folded Reload + ld.d $s0, $sp, 112 # 8-byte Folded Reload + ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload beqz $a0, .LBB0_48 # %bb.47: # in Loop: Header=BB0_5 Depth=1 - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 72 # 8-byte Folded Reload sub.d $a1, $a1, $a0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -545,17 +541,17 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc # in Loop: Header=BB0_5 Depth=1 beqz $s4, .LBB0_50 # %bb.49: # in Loop: Header=BB0_5 Depth=1 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload sub.d $a1, $a0, $s4 move $a0, $s4 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB0_50: # %_ZNSt6vectorIfSaIfEED2Ev.exit113 # in Loop: Header=BB0_5 Depth=1 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload beqz $a0, .LBB0_52 # %bb.51: # in Loop: Header=BB0_5 Depth=1 - ld.d $a1, $sp, 48 # 8-byte Folded Reload + ld.d $a1, $sp, 40 # 8-byte Folded Reload sub.d $a1, $a1, $a0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -563,7 +559,7 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc # in Loop: Header=BB0_5 Depth=1 beqz $s0, .LBB0_54 # %bb.53: # in Loop: Header=BB0_5 Depth=1 - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 48 # 8-byte Folded Reload sub.d $a1, $a0, $s0 move $a0, $s0 pcaddu18i $ra, %call36(_ZdlPvm) @@ -572,14 +568,14 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc # in Loop: Header=BB0_5 Depth=1 beqz $fp, .LBB0_4 # %bb.55: # in Loop: Header=BB0_5 Depth=1 - ld.d $a0, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $sp, 56 # 8-byte Folded Reload sub.d $a1, $a0, $fp move $a0, $fp pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 b .LBB0_4 .LBB0_56: # %._crit_edge425.loopexit - fld.s $fa0, $sp, 164 + fld.s $fa0, $sp, 156 fcvt.d.s $fa0, $fa0 pcalau12i $a0, %got_pc_hi20(_ZSt4cout) ld.d $fp, $a0, %got_pc_lo12(_ZSt4cout) @@ -594,7 +590,7 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc move $a1, $s1 pcaddu18i $ra, %call36(_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l) jirl $ra, $ra, 0 - fld.s $fa0, $sp, 160 + fld.s $fa0, $sp, 152 fcvt.d.s $fa0, $fa0 move $a0, $s0 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) @@ -604,7 +600,7 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc move $a1, $s1 pcaddu18i $ra, %call36(_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l) jirl $ra, $ra, 0 - fld.s $fa0, $sp, 156 + fld.s $fa0, $sp, 148 fcvt.d.s $fa0, $fa0 move $a0, $s0 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) @@ -615,6 +611,7 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc move $a0, $fp pcaddu18i $ra, %call36(_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l) jirl $ra, $ra, 0 + fld.d $fs0, $sp, 160 # 8-byte Folded Reload ld.d $s8, $sp, 168 # 8-byte Folded Reload ld.d $s7, $sp, 176 # 8-byte Folded Reload ld.d $s6, $sp, 184 # 8-byte Folded Reload @@ -644,17 +641,17 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc .LBB0_60: # %.loopexit .Ltmp8: # EH_LABEL move $s0, $a0 - ld.d $s7, $sp, 128 # 8-byte Folded Reload - ld.d $s8, $sp, 120 # 8-byte Folded Reload - ld.d $s2, $sp, 112 # 8-byte Folded Reload + ld.d $s7, $sp, 120 # 8-byte Folded Reload + ld.d $s8, $sp, 112 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload b .LBB0_71 .LBB0_61: .Ltmp11: # EH_LABEL move $s0, $a0 - ld.d $s7, $sp, 128 # 8-byte Folded Reload - ld.d $s8, $sp, 120 # 8-byte Folded Reload - ld.d $s2, $sp, 112 # 8-byte Folded Reload - ld.d $s1, $sp, 88 # 8-byte Folded Reload + ld.d $s7, $sp, 120 # 8-byte Folded Reload + ld.d $s8, $sp, 112 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload + ld.d $s1, $sp, 80 # 8-byte Folded Reload b .LBB0_70 .LBB0_62: .Ltmp14: # EH_LABEL @@ -679,24 +676,24 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc .LBB0_67: # %_ZNSt6vectorIfSaIfEED2Ev.exit121 .Ltmp20: # EH_LABEL move $s0, $a0 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload sub.d $a1, $a0, $s7 move $a0, $s7 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB0_68: # %_ZNSt6vectorIfSaIfEED2Ev.exit123 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload sub.d $a1, $a0, $s6 move $a0, $s6 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB0_69: # %_ZNSt6vectorIfSaIfEED2Ev.exit125 - ld.d $s7, $sp, 128 # 8-byte Folded Reload - ld.d $s8, $sp, 120 # 8-byte Folded Reload - ld.d $s2, $sp, 112 # 8-byte Folded Reload - ld.d $s1, $sp, 88 # 8-byte Folded Reload - ld.d $a0, $sp, 144 # 8-byte Folded Reload - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $s7, $sp, 120 # 8-byte Folded Reload + ld.d $s8, $sp, 112 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload + ld.d $s1, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 72 # 8-byte Folded Reload sub.d $a1, $a1, $a0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -716,21 +713,21 @@ _Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc: # @_Z3runPFviPfS_S_S_fffffRfS0_S0_EPKc pcaddu18i $ra, %call36(_Unwind_Resume) jirl $ra, $ra, 0 .LBB0_75: - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 40 # 8-byte Folded Reload sub.d $a1, $a0, $s2 move $a0, $s2 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 beqz $s8, .LBB0_73 .LBB0_76: - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 48 # 8-byte Folded Reload sub.d $a1, $a0, $s8 move $a0, $s8 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 beqz $s7, .LBB0_74 .LBB0_77: - ld.d $a0, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $sp, 56 # 8-byte Folded Reload sub.d $a1, $a0, $s7 move $a0, $s7 pcaddu18i $ra, %call36(_ZdlPvm) diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/HPCCG/CMakeFiles/HPCCG.dir/mytimer.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/HPCCG/CMakeFiles/HPCCG.dir/mytimer.s index 98c5e570..34f02a83 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/HPCCG/CMakeFiles/HPCCG.dir/mytimer.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/HPCCG/CMakeFiles/HPCCG.dir/mytimer.s @@ -1,10 +1,6 @@ .file "mytimer.cpp" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z7mytimerv -.LCPI0_0: - .dword 0x412e848000000000 # double 1.0E+6 .text - .globl _Z7mytimerv + .globl _Z7mytimerv # -- Begin function _Z7mytimerv .p2align 5 .type _Z7mytimerv,@function _Z7mytimerv: # @_Z7mytimerv @@ -16,14 +12,16 @@ _Z7mytimerv: # @_Z7mytimerv pcaddu18i $ra, %call36(getrusage) jirl $ra, $ra, 0 ld.d $a0, $sp, 8 + ld.d $a1, $sp, 16 movgr2fr.d $fa0, $a0 - ld.d $a0, $sp, 16 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_0) ffint.d.l $fa0, $fa0 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 movgr2fr.d $fa2, $a0 - ffint.d.l $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + fdiv.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa1, $fa0 ld.d $ra, $sp, 152 # 8-byte Folded Reload addi.d $sp, $sp, 160 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Driver.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Driver.s index d9d61674..fc3a78b5 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Driver.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Driver.s @@ -3,15 +3,9 @@ .globl _ZSt21ios_base_library_initv # End of file scope inline assembly - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN6DriverC2EPK9InputFileRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE -.LCPI0_0: - .dword 0x547d42aea2879f2e # double 9.9999999999999997E+98 -.LCPI0_1: - .dword 0x3ff3333333333333 # double 1.2 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI0_2: + .p2align 4, 0x0 # -- Begin function _ZN6DriverC2EPK9InputFileRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE +.LCPI0_0: .dword 8 # 0x8 .dword 8390891584273675364 # 0x74726f7065727464 .text @@ -332,8 +326,11 @@ _ZN6DriverC2EPK9InputFileRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE: st.d $a0, $sp, 16 st.b $zero, $sp, 29 .Ltmp41: # EH_LABEL - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, -382855 + ori $a0, $a0, 3886 + lu32i.d $a0, -179538 + lu52i.d $a0, $a0, 1351 + movgr2fr.d $fs0, $a0 addi.d $a1, $sp, 8 move $a0, $s1 fmov.d $fa0, $fs0 @@ -422,8 +419,11 @@ _ZN6DriverC2EPK9InputFileRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE: st.d $a0, $sp, 16 st.b $zero, $sp, 29 .Ltmp50: # EH_LABEL - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_1) + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 209715 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa0, $a0 addi.d $a1, $sp, 8 move $a0, $s1 pcaddu18i $ra, %call36(_ZNK9InputFile9getDoubleERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEd) @@ -439,8 +439,8 @@ _ZN6DriverC2EPK9InputFileRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE: pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB0_55: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit76 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - vld $vr0, $a0, %pc_lo12(.LCPI0_2) + pcalau12i $a0, %pc_hi20(.LCPI0_0) + vld $vr0, $a0, %pc_lo12(.LCPI0_0) st.d $s7, $sp, 8 vst $vr0, $sp, 16 st.b $zero, $sp, 32 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/GenMesh.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/GenMesh.s index a21e5aef..25627511 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/GenMesh.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/GenMesh.s @@ -8,16 +8,6 @@ .LCPI0_0: .dword 8 # 0x8 .dword 7309475736097875309 # 0x657079746873656d - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0x4071abe4b73fefb5 # double 282.74333882308139 -.LCPI0_2: - .dword 0x400921fb54442d18 # double 3.1415926535897931 -.LCPI0_3: - .dword 0x4066800000000000 # double 180 -.LCPI0_4: - .dword 0x401921fb54442d18 # double 6.2831853071795862 .text .globl _ZN7GenMeshC2EPK9InputFile .p2align 5 @@ -302,8 +292,11 @@ _ZN7GenMeshC2EPK9InputFile: # @_ZN7GenMeshC2EPK9InputFile vst $vr2, $sp, 16 # 16-byte Folded Spill pcaddu18i $ra, %call36(bcmp) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI0_4) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_4) + lu12i.w $a1, 345154 + ori $a1, $a1, 3352 + lu32i.d $a1, -450053 + lu52i.d $a1, $a1, 1025 + movgr2fr.d $fa0, $a1 vld $vr1, $sp, 16 # 16-byte Folded Reload fcmp.cult.d $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB0_49 @@ -332,16 +325,24 @@ _ZN7GenMeshC2EPK9InputFile: # @_ZN7GenMeshC2EPK9InputFile bltu $s6, $s8, .LBB0_52 # %bb.51: fld.d $fa0, $s0, 16 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_2) + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 b .LBB0_53 .LBB0_52: - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_1) + lu12i.w $a0, -297986 + ori $a0, $a0, 4021 + lu32i.d $a0, 109540 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa0, $a0 .LBB0_53: - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_3) + ori $a0, $zero, 0 + lu32i.d $a0, 425984 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa1, $a0 fdiv.d $fa2, $fa0, $fa1 ori $a0, $zero, 32 fst.d $fa2, $fp, 40 @@ -800,14 +801,7 @@ _ZN7GenMesh8generateERSt6vectorI7double2SaIS1_EERS0_IiSaIiEES7_S7_S7_S7_S7_S7_S7 .size _ZN7GenMesh8generateERSt6vectorI7double2SaIS1_EERS0_IiSaIiEES7_S7_S7_S7_S7_S7_S7_S7_, .Lfunc_end2-_ZN7GenMesh8generateERSt6vectorI7double2SaIS1_EERS0_IiSaIiEES7_S7_S7_S7_S7_S7_S7_S7_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN7GenMesh9calcNumPEEv -.LCPI3_0: - .dword 0x3d719799812dea11 # double 9.9999999999999998E-13 -.LCPI3_1: - .dword 0xbd719799812dea11 # double -9.9999999999999998E-13 - .text - .globl _ZN7GenMesh9calcNumPEEv + .globl _ZN7GenMesh9calcNumPEEv # -- Begin function _ZN7GenMesh9calcNumPEEv .p2align 5 .type _ZN7GenMesh9calcNumPEEv,@function _ZN7GenMesh9calcNumPEEv: # @_ZN7GenMesh9calcNumPEEv @@ -840,41 +834,44 @@ _ZN7GenMesh9calcNumPEEv: # @_ZN7GenMesh9calcNumPEEv fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB3_9 .LBB3_1: # %.split - pcalau12i $a1, %pc_hi20(.LCPI3_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI3_0) + lu12i.w $a1, -519458 + ori $a2, $a1, 2577 + lu32i.d $a2, 104345 + lu52i.d $a1, $a2, 983 + movgr2fr.d $fa1, $a1 fadd.d $fa1, $fa0, $fa1 vreplvei.d $vr1, $vr1, 0 vfrintrm.d $vr1, $vr1 ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a2, $fa1 - ori $a3, $zero, 1 - slt $a4, $a3, $a2 + movfr2gr.s $a3, $fa1 + ori $a4, $zero, 1 + slt $a5, $a4, $a3 ld.w $a1, $fp, 0 - maskeqz $a2, $a2, $a4 - masknez $a3, $a3, $a4 - or $a2, $a2, $a3 + maskeqz $a3, $a3, $a5 + masknez $a4, $a4, $a5 + or $a4, $a3, $a4 .p2align 4, , 16 .LBB3_2: # =>This Inner Loop Header: Depth=1 - div.w $a3, $a1, $a2 - mul.d $a4, $a3, $a2 - sub.w $a4, $a1, $a4 - addi.w $a2, $a2, -1 - bnez $a4, .LBB3_2 + div.w $a3, $a1, $a4 + mul.d $a5, $a3, $a4 + sub.w $a5, $a1, $a5 + addi.w $a4, $a4, -1 + bnez $a5, .LBB3_2 # %bb.3: - pcalau12i $a4, %pc_hi20(.LCPI3_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI3_1) + lu52i.d $a2, $a2, -1065 + movgr2fr.d $fa1, $a2 fadd.d $fa0, $fa0, $fa1 vreplvei.d $vr0, $vr0, 0 vfrintrp.d $vr0, $vr0 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a4, $fa0 - addi.w $a2, $a2, 1 + movfr2gr.s $a5, $fa0 + addi.w $a2, $a4, 1 .p2align 4, , 16 .LBB3_4: # =>This Inner Loop Header: Depth=1 - div.w $a5, $a1, $a4 - mul.d $a6, $a5, $a4 + div.w $a4, $a1, $a5 + mul.d $a6, $a4, $a5 sub.w $a6, $a1, $a6 - addi.w $a4, $a4, 1 + addi.w $a5, $a5, 1 bnez $a6, .LBB3_4 # %bb.5: movgr2fr.w $fa0, $a2 @@ -885,11 +882,11 @@ _ZN7GenMesh9calcNumPEEv: # @_ZN7GenMesh9calcNumPEEv fdiv.d $fa1, $fs2, $fa1 fcmp.clt.d $fcc0, $fa0, $fa1 fsel $fa0, $fa0, $fa1, $fcc0 - addi.d $a3, $a4, -1 + addi.d $a3, $a5, -1 movgr2fr.w $fa1, $a3 ffint.d.w $fa1, $fa1 fdiv.d $fa1, $fs3, $fa1 - movgr2fr.w $fa2, $a5 + movgr2fr.w $fa2, $a4 ffint.d.w $fa2, $fa2 fdiv.d $fa2, $fs2, $fa2 fcmp.clt.d $fcc0, $fa1, $fa2 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Hydro.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Hydro.s index 69741359..067aeeb6 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Hydro.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Hydro.s @@ -3,24 +3,18 @@ .globl _ZSt21ios_base_library_initv # End of file scope inline assembly - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5HydroC2EPK9InputFileP4Mesh -.LCPI0_0: - .dword 0x3fe3333333333333 # double 0.59999999999999998 -.LCPI0_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI0_2: + .p2align 4, 0x0 # -- Begin function _ZN5HydroC2EPK9InputFileP4Mesh +.LCPI0_0: .dword 8 # 0x8 .dword 7094703731814328690 # 0x62757374696e6972 -.LCPI0_3: +.LCPI0_1: .dword 8 # 0x8 .dword 7094703731814328677 # 0x62757374696e6965 -.LCPI0_4: +.LCPI0_2: .dword 0x3ff0000000000000 # double 1 .dword 0x0000000000000000 # double 0 -.LCPI0_5: +.LCPI0_3: .dword 0x0000000000000000 # double 0 .dword 0x3ff0000000000000 # double 1 .text @@ -81,8 +75,11 @@ _ZN5HydroC2EPK9InputFileP4Mesh: # @_ZN5HydroC2EPK9InputFileP4Mesh st.d $a0, $sp, 144 st.b $zero, $sp, 155 .Ltmp0: # EH_LABEL - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 209715 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa0, $a0 addi.d $a1, $sp, 136 move $a0, $s0 pcaddu18i $ra, %call36(_ZNK9InputFile9getDoubleERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEd) @@ -106,8 +103,11 @@ _ZN5HydroC2EPK9InputFileP4Mesh: # @_ZN5HydroC2EPK9InputFileP4Mesh st.d $a0, $sp, 144 st.b $zero, $sp, 156 .Ltmp3: # EH_LABEL - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_1) + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fa0, $a0 addi.d $a1, $sp, 136 move $a0, $s0 pcaddu18i $ra, %call36(_ZNK9InputFile9getDoubleERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEd) @@ -177,8 +177,8 @@ _ZN5HydroC2EPK9InputFileP4Mesh: # @_ZN5HydroC2EPK9InputFileP4Mesh pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB0_12: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit91 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - vld $vr0, $a0, %pc_lo12(.LCPI0_2) + pcalau12i $a0, %pc_hi20(.LCPI0_0) + vld $vr0, $a0, %pc_lo12(.LCPI0_0) st.d $s1, $sp, 136 vst $vr0, $sp, 144 st.b $zero, $sp, 160 @@ -199,8 +199,8 @@ _ZN5HydroC2EPK9InputFileP4Mesh: # @_ZN5HydroC2EPK9InputFileP4Mesh pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB0_15: # %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit98 - pcalau12i $a0, %pc_hi20(.LCPI0_3) - vld $vr0, $a0, %pc_lo12(.LCPI0_3) + pcalau12i $a0, %pc_hi20(.LCPI0_1) + vld $vr0, $a0, %pc_lo12(.LCPI0_1) st.d $s1, $sp, 136 vst $vr0, $sp, 144 st.b $zero, $sp, 160 @@ -419,8 +419,8 @@ _ZN5HydroC2EPK9InputFileP4Mesh: # @_ZN5HydroC2EPK9InputFileP4Mesh # %bb.44: # %.lr.ph move $s5, $zero move $s6, $zero - pcalau12i $a0, %pc_hi20(.LCPI0_4) - vld $vr0, $a0, %pc_lo12(.LCPI0_4) + pcalau12i $a0, %pc_hi20(.LCPI0_2) + vld $vr0, $a0, %pc_lo12(.LCPI0_2) vst $vr0, $sp, 32 # 16-byte Folded Spill addi.w $a0, $zero, -8 lu52i.d $a0, $a0, 2047 @@ -549,8 +549,8 @@ _ZN5HydroC2EPK9InputFileP4Mesh: # @_ZN5HydroC2EPK9InputFileP4Mesh # %bb.60: # %.lr.ph226 move $s5, $zero move $s6, $zero - pcalau12i $a0, %pc_hi20(.LCPI0_5) - vld $vr0, $a0, %pc_lo12(.LCPI0_5) + pcalau12i $a0, %pc_hi20(.LCPI0_3) + vld $vr0, $a0, %pc_lo12(.LCPI0_3) vst $vr0, $sp, 32 # 16-byte Folded Spill addi.w $a0, $zero, -8 lu52i.d $a0, $a0, 2047 @@ -1027,14 +1027,8 @@ GCC_except_table0: .Lcst_end0: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5Hydro4initEv -.LCPI1_0: - .dword 0xbd719799812dea11 # double -9.9999999999999998E-13 -.LCPI1_1: - .dword 0x3d719799812dea11 # double 9.9999999999999998E-13 .text - .globl _ZN5Hydro4initEv + .globl _ZN5Hydro4initEv # -- Begin function _ZN5Hydro4initEv .p2align 5 .type _ZN5Hydro4initEv,@function _ZN5Hydro4initEv: # @_ZN5Hydro4initEv @@ -1055,6 +1049,7 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv st.d $s8, $sp, 232 # 8-byte Folded Spill fst.d $fs0, $sp, 224 # 8-byte Folded Spill fst.d $fs1, $sp, 216 # 8-byte Folded Spill + fst.d $fs2, $sp, 208 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -1068,14 +1063,15 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv .cfi_offset 31, -88 .cfi_offset 56, -96 .cfi_offset 57, -104 + .cfi_offset 58, -112 move $fp, $a0 ld.d $a0, $a0, 0 ld.wu $s8, $a0, 504 - ld.w $s4, $a0, 560 + ld.w $s5, $a0, 560 ld.w $s2, $a0, 60 ld.w $s3, $a0, 68 - ld.w $s0, $a0, 72 - ld.d $s7, $a0, 256 + ld.w $s4, $a0, 72 + ld.d $s0, $a0, 256 st.d $a0, $sp, 8 # 8-byte Folded Spill ld.d $a0, $a0, 320 st.d $a0, $sp, 184 # 8-byte Folded Spill @@ -1103,7 +1099,7 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 st.d $a0, $fp, 280 - slli.d $a0, $s0, 3 + slli.d $a0, $s4, 3 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 st.d $a0, $fp, 288 @@ -1153,7 +1149,7 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 st.d $a0, $fp, 368 - slli.d $s6, $s0, 4 + slli.d $s6, $s4, 4 move $a0, $s6 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 @@ -1170,17 +1166,16 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 st.d $a0, $fp, 400 - pcalau12i $s6, %pc_hi20(.LCPI1_1) - st.d $s4, $sp, 192 # 8-byte Folded Spill - blez $s4, .LBB1_34 + st.d $s5, $sp, 192 # 8-byte Folded Spill + blez $s5, .LBB1_34 # %bb.1: # %.lr.ph97 - move $s5, $zero + move $s6, $zero ld.d $a0, $sp, 8 # 8-byte Folded Reload ld.d $t0, $a0, 568 ld.d $t1, $a0, 592 - fld.d $fa4, $fp, 72 - fld.d $fa5, $fp, 80 - ld.d $s4, $a0, 32 + fld.d $fa3, $fp, 72 + fld.d $fa4, $fp, 80 + ld.d $s1, $a0, 32 ld.d $t2, $a0, 40 fld.d $fs0, $fp, 88 fld.d $fs1, $fp, 96 @@ -1197,8 +1192,8 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv sltui $a1, $a1, 16 or $a0, $a0, $a1 sub.d $a1, $a6, $s3 - vreplvei.d $vr6, $vr4, 0 - vreplvei.d $vr0, $vr5, 0 + vreplvei.d $vr5, $vr3, 0 + vreplvei.d $vr0, $vr4, 0 vst $vr0, $sp, 48 # 16-byte Folded Spill sltui $a2, $a2, 16 or $a0, $a0, $a2 @@ -1215,22 +1210,27 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv st.d $a0, $sp, 40 # 8-byte Folded Spill addi.d $a0, $s3, 16 st.d $a0, $sp, 32 # 8-byte Folded Spill - addi.d $a0, $s7, 8 + addi.d $a0, $s0, 8 st.d $a0, $sp, 64 # 8-byte Folded Spill + lu12i.w $a0, -519458 + ori $s0, $a0, 2577 + lu32i.d $s0, 104345 + lu52i.d $a0, $s0, -1065 + movgr2fr.d $fs2, $a0 ori $t3, $zero, 24 st.d $t0, $sp, 152 # 8-byte Folded Spill st.d $t1, $sp, 144 # 8-byte Folded Spill - vst $vr4, $sp, 128 # 16-byte Folded Spill + vst $vr3, $sp, 128 # 16-byte Folded Spill st.d $t2, $sp, 104 # 8-byte Folded Spill - vst $vr6, $sp, 80 # 16-byte Folded Spill - vst $vr5, $sp, 112 # 16-byte Folded Spill + vst $vr5, $sp, 80 # 16-byte Folded Spill + vst $vr4, $sp, 112 # 16-byte Folded Spill b .LBB1_3 .p2align 4, , 16 .LBB1_2: # %._crit_edge # in Loop: Header=BB1_3 Depth=1 - addi.d $s5, $s5, 1 + addi.d $s6, $s6, 1 ld.d $a0, $sp, 192 # 8-byte Folded Reload - beq $s5, $a0, .LBB1_34 + beq $s6, $a0, .LBB1_34 .LBB1_3: # =>This Loop Header: Depth=1 # Child Loop BB1_6 Depth 2 # Child Loop BB1_9 Depth 2 @@ -1239,16 +1239,16 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv # Child Loop BB1_21 Depth 2 # Child Loop BB1_30 Depth 2 # Child Loop BB1_33 Depth 2 - slli.d $a0, $s5, 2 - ldx.w $s0, $t0, $a0 - ldx.w $s7, $t1, $a0 - sub.d $s1, $s7, $s0 - beq $s0, $s7, .LBB1_17 + slli.d $a0, $s6, 2 + ldx.w $s4, $t0, $a0 + ldx.w $s5, $t1, $a0 + sub.d $s7, $s5, $s4 + beq $s4, $s5, .LBB1_17 # %bb.4: # %.lr.ph.i.i.i.preheader # in Loop: Header=BB1_3 Depth=1 - alsl.d $a2, $s0, $s2, 3 - slli.d $a1, $s0, 3 - slli.d $a0, $s7, 3 + alsl.d $a2, $s4, $s2, 3 + slli.d $a1, $s4, 3 + slli.d $a0, $s5, 3 sub.d $a0, $a0, $a1 addi.d $a3, $a0, -8 srli.d $a0, $a3, 3 @@ -1267,8 +1267,8 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv .LBB1_6: # %vector.body160 # Parent Loop BB1_3 Depth=1 # => This Inner Loop Header: Depth=2 - vst $vr6, $a5, -16 - vst $vr6, $a5, 0 + vst $vr5, $a5, -16 + vst $vr5, $a5, 0 addi.d $a6, $a6, -4 addi.d $a5, $a5, 32 bnez $a6, .LBB1_6 @@ -1277,17 +1277,17 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv beq $a0, $a4, .LBB1_10 .LBB1_8: # %.lr.ph.i.i.i.preheader171 # in Loop: Header=BB1_3 Depth=1 - alsl.d $a4, $s7, $s2, 3 + alsl.d $a4, $s5, $s2, 3 .p2align 4, , 16 .LBB1_9: # %.lr.ph.i.i.i # Parent Loop BB1_3 Depth=1 # => This Inner Loop Header: Depth=2 - fst.d $fa4, $a2, 0 + fst.d $fa3, $a2, 0 addi.d $a2, $a2, 8 bne $a2, $a4, .LBB1_9 .LBB1_10: # %.lr.ph.i.i.i77.preheader # in Loop: Header=BB1_3 Depth=1 - alsl.d $a2, $s0, $s3, 3 + alsl.d $a2, $s4, $s3, 3 bltu $a3, $t3, .LBB1_14 # %bb.11: # %vector.ph143 # in Loop: Header=BB1_3 Depth=1 @@ -1313,42 +1313,42 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv beq $a0, $a3, .LBB1_16 .LBB1_14: # %.lr.ph.i.i.i77.preheader170 # in Loop: Header=BB1_3 Depth=1 - alsl.d $a0, $s7, $s3, 3 + alsl.d $a0, $s5, $s3, 3 .p2align 4, , 16 .LBB1_15: # %.lr.ph.i.i.i77 # Parent Loop BB1_3 Depth=1 # => This Inner Loop Header: Depth=2 - fst.d $fa5, $a2, 0 + fst.d $fa4, $a2, 0 addi.d $a2, $a2, 8 bne $a2, $a0, .LBB1_15 .LBB1_16: # %.lr.ph.i.i.i82.preheader # in Loop: Header=BB1_3 Depth=1 ld.d $a0, $sp, 160 # 8-byte Folded Reload - alsl.d $a0, $s0, $a0, 3 - slli.d $a2, $s1, 3 + alsl.d $a0, $s4, $a0, 3 + slli.d $a2, $s7, 3 move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - vld $vr5, $sp, 112 # 16-byte Folded Reload + vld $vr4, $sp, 112 # 16-byte Folded Reload ld.d $t0, $sp, 152 # 8-byte Folded Reload ld.d $t1, $sp, 144 # 8-byte Folded Reload - vld $vr4, $sp, 128 # 16-byte Folded Reload + vld $vr3, $sp, 128 # 16-byte Folded Reload ld.d $t2, $sp, 104 # 8-byte Folded Reload - vld $vr6, $sp, 80 # 16-byte Folded Reload + vld $vr5, $sp, 80 # 16-byte Folded Reload ori $t3, $zero, 24 .LBB1_17: # %_ZSt4fillIPddEvT_S1_RKT0_.exit85 # in Loop: Header=BB1_3 Depth=1 - beq $s4, $t2, .LBB1_26 + beq $s1, $t2, .LBB1_26 # %bb.18: # %_ZSt4fillIPddEvT_S1_RKT0_.exit85 # in Loop: Header=BB1_3 Depth=1 - bge $s0, $s7, .LBB1_26 + bge $s4, $s5, .LBB1_26 # %bb.19: # %.lr.ph.preheader # in Loop: Header=BB1_3 Depth=1 ld.d $a0, $sp, 64 # 8-byte Folded Reload - alsl.d $a0, $s0, $a0, 4 - alsl.d $a1, $s0, $s2, 3 - alsl.d $a2, $s0, $s3, 3 - move $a3, $s1 + alsl.d $a0, $s4, $a0, 4 + alsl.d $a1, $s4, $s2, 3 + alsl.d $a2, $s4, $s3, 3 + move $a3, $s7 b .LBB1_21 .p2align 4, , 16 .LBB1_20: # in Loop: Header=BB1_21 Depth=2 @@ -1360,29 +1360,28 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv .LBB1_21: # %.lr.ph # Parent Loop BB1_3 Depth=1 # => This Inner Loop Header: Depth=2 - fld.d $fa1, $s4, 0 - pcalau12i $a4, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a4, %pc_lo12(.LCPI1_0) - fld.d $fa2, $a0, -8 - fadd.d $fa1, $fa1, $fa0 - fcmp.cule.d $fcc0, $fa2, $fa1 + fld.d $fa0, $s1, 0 + fld.d $fa1, $a0, -8 + fadd.d $fa0, $fa0, $fs2 + fcmp.cule.d $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB1_20 # %bb.22: # in Loop: Header=BB1_21 Depth=2 - fld.d $fa3, $s4, 8 - fld.d $fa1, $s6, %pc_lo12(.LCPI1_1) - fadd.d $fa3, $fa3, $fa1 - fcmp.cule.d $fcc0, $fa3, $fa2 + fld.d $fa2, $s1, 8 + lu52i.d $a4, $s0, 983 + movgr2fr.d $fa0, $a4 + fadd.d $fa2, $fa2, $fa0 + fcmp.cule.d $fcc0, $fa2, $fa1 bcnez $fcc0, .LBB1_20 # %bb.23: # in Loop: Header=BB1_21 Depth=2 - fld.d $fa3, $s4, 16 - fld.d $fa2, $a0, 0 - fadd.d $fa0, $fa3, $fa0 - fcmp.cule.d $fcc0, $fa2, $fa0 + fld.d $fa2, $s1, 16 + fld.d $fa1, $a0, 0 + fadd.d $fa2, $fa2, $fs2 + fcmp.cule.d $fcc0, $fa1, $fa2 bcnez $fcc0, .LBB1_20 # %bb.24: # in Loop: Header=BB1_21 Depth=2 - fld.d $fa0, $s4, 24 - fadd.d $fa0, $fa0, $fa1 - fcmp.cule.d $fcc0, $fa0, $fa2 + fld.d $fa2, $s1, 24 + fadd.d $fa0, $fa2, $fa0 + fcmp.cule.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_20 # %bb.25: # in Loop: Header=BB1_21 Depth=2 fst.d $fs0, $a1, 0 @@ -1391,31 +1390,31 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv .p2align 4, , 16 .LBB1_26: # %.loopexit # in Loop: Header=BB1_3 Depth=1 - bge $s0, $s7, .LBB1_2 + bge $s4, $s5, .LBB1_2 # %bb.27: # %.lr.ph94.preheader # in Loop: Header=BB1_3 Depth=1 - sltui $a0, $s1, 6 + sltui $a0, $s7, 6 ld.d $a1, $sp, 72 # 8-byte Folded Reload or $a0, $a0, $a1 andi $a0, $a0, 1 beqz $a0, .LBB1_29 # %bb.28: # in Loop: Header=BB1_3 Depth=1 - move $a0, $s0 + move $a0, $s4 b .LBB1_32 .p2align 4, , 16 .LBB1_29: # %vector.ph # in Loop: Header=BB1_3 Depth=1 - move $a1, $s1 + move $a1, $s7 bstrins.d $a1, $zero, 0, 0 - add.d $a0, $a1, $s0 + add.d $a0, $a1, $s4 ld.d $a2, $sp, 168 # 8-byte Folded Reload - alsl.d $a2, $s0, $a2, 3 - alsl.d $a3, $s0, $s3, 3 + alsl.d $a2, $s4, $a2, 3 + alsl.d $a3, $s4, $s3, 3 ld.d $a4, $sp, 176 # 8-byte Folded Reload - alsl.d $a4, $s0, $a4, 3 + alsl.d $a4, $s4, $a4, 3 ld.d $a5, $sp, 184 # 8-byte Folded Reload - alsl.d $a5, $s0, $a5, 3 - alsl.d $a6, $s0, $s2, 3 + alsl.d $a5, $s4, $a5, 3 + alsl.d $a6, $s4, $s2, 3 move $a7, $a1 .p2align 4, , 16 .LBB1_30: # %vector.body @@ -1437,7 +1436,7 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv bnez $a7, .LBB1_30 # %bb.31: # %middle.block # in Loop: Header=BB1_3 Depth=1 - beq $s1, $a1, .LBB1_2 + beq $s7, $a1, .LBB1_2 .LBB1_32: # %.lr.ph94.preheader169 # in Loop: Header=BB1_3 Depth=1 alsl.d $a1, $a0, $s2, 3 @@ -1448,7 +1447,7 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv alsl.d $a4, $a0, $s3, 3 ld.d $a5, $sp, 168 # 8-byte Folded Reload alsl.d $a5, $a0, $a5, 3 - sub.d $a0, $s7, $a0 + sub.d $a0, $s5, $a0 .p2align 4, , 16 .LBB1_33: # %.lr.ph94 # Parent Loop BB1_3 Depth=1 @@ -1478,8 +1477,12 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv fcmp.ceq.d $fcc0, $fa0, $fs0 bcnez $fcc0, .LBB1_47 # %bb.36: # %.lr.ph99.split.preheader - fld.d $fs1, $s6, %pc_lo12(.LCPI1_1) move $s0, $zero + lu12i.w $a0, -519458 + ori $a0, $a0, 2577 + lu32i.d $a0, 104345 + lu52i.d $a0, $a0, 983 + movgr2fr.d $fs1, $a0 vrepli.b $vr4, 0 vst $vr4, $sp, 192 # 16-byte Folded Spill b .LBB1_38 @@ -1588,6 +1591,7 @@ _ZN5Hydro4initEv: # @_ZN5Hydro4initEv ld.d $a0, $a0, 6 st.d $a1, $fp, 168 st.d $a0, $fp, 174 + fld.d $fs2, $sp, 208 # 8-byte Folded Reload fld.d $fs1, $sp, 216 # 8-byte Folded Reload fld.d $fs0, $sp, 224 # 8-byte Folded Reload ld.d $s8, $sp, 232 # 8-byte Folded Reload @@ -1719,12 +1723,7 @@ _ZN5HydroD2Ev: # @_ZN5HydroD2Ev .size _ZN5HydroD2Ev, .Lfunc_end2-_ZN5HydroD2Ev .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5Hydro13initRadialVelEdii -.LCPI3_0: - .dword 0x3d719799812dea11 # double 9.9999999999999998E-13 - .text - .globl _ZN5Hydro13initRadialVelEdii + .globl _ZN5Hydro13initRadialVelEdii # -- Begin function _ZN5Hydro13initRadialVelEdii .p2align 5 .type _ZN5Hydro13initRadialVelEdii,@function _ZN5Hydro13initRadialVelEdii: # @_ZN5Hydro13initRadialVelEdii @@ -1737,10 +1736,13 @@ _ZN5Hydro13initRadialVelEdii: # @_ZN5Hydro13initRadialVelEdii ld.d $a4, $a0, 248 alsl.d $a0, $a1, $a3, 4 addi.d $a0, $a0, 8 - pcalau12i $a3, %pc_hi20(.LCPI3_0) - fld.d $fa1, $a3, %pc_lo12(.LCPI3_0) alsl.d $a3, $a1, $a4, 4 sub.d $a1, $a2, $a1 + lu12i.w $a2, -519458 + ori $a2, $a2, 2577 + lu32i.d $a2, 104345 + lu52i.d $a2, $a2, 983 + movgr2fr.d $fa1, $a2 vrepli.b $vr2, 0 b .LBB3_3 .p2align 4, , 16 @@ -1796,31 +1798,26 @@ _ZN5Hydro12resetDtHydroEv: # @_ZN5Hydro12resetDtHydroEv .Lfunc_end4: .size _ZN5Hydro12resetDtHydroEv, .Lfunc_end4-_ZN5Hydro12resetDtHydroEv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5Hydro7doCycleEd -.LCPI5_0: - .dword 0x2b617f7d4ed8c33e # double 1.0E-99 - .text - .globl _ZN5Hydro7doCycleEd + .globl _ZN5Hydro7doCycleEd # -- Begin function _ZN5Hydro7doCycleEd .p2align 5 .type _ZN5Hydro7doCycleEd,@function _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd .cfi_startproc # %bb.0: - addi.d $sp, $sp, -384 - .cfi_def_cfa_offset 384 - st.d $ra, $sp, 376 # 8-byte Folded Spill - st.d $fp, $sp, 368 # 8-byte Folded Spill - st.d $s0, $sp, 360 # 8-byte Folded Spill - st.d $s1, $sp, 352 # 8-byte Folded Spill - st.d $s2, $sp, 344 # 8-byte Folded Spill - st.d $s3, $sp, 336 # 8-byte Folded Spill - st.d $s4, $sp, 328 # 8-byte Folded Spill - st.d $s5, $sp, 320 # 8-byte Folded Spill - st.d $s6, $sp, 312 # 8-byte Folded Spill - st.d $s7, $sp, 304 # 8-byte Folded Spill - st.d $s8, $sp, 296 # 8-byte Folded Spill - fst.d $fs0, $sp, 288 # 8-byte Folded Spill + addi.d $sp, $sp, -368 + .cfi_def_cfa_offset 368 + st.d $ra, $sp, 360 # 8-byte Folded Spill + st.d $fp, $sp, 352 # 8-byte Folded Spill + st.d $s0, $sp, 344 # 8-byte Folded Spill + st.d $s1, $sp, 336 # 8-byte Folded Spill + st.d $s2, $sp, 328 # 8-byte Folded Spill + st.d $s3, $sp, 320 # 8-byte Folded Spill + st.d $s4, $sp, 312 # 8-byte Folded Spill + st.d $s5, $sp, 304 # 8-byte Folded Spill + st.d $s6, $sp, 296 # 8-byte Folded Spill + st.d $s7, $sp, 288 # 8-byte Folded Spill + st.d $s8, $sp, 280 # 8-byte Folded Spill + fst.d $fs0, $sp, 272 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -1835,48 +1832,48 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd .cfi_offset 56, -96 move $fp, $a0 ld.d $a0, $a0, 0 - ld.w $s1, $a0, 504 + ld.w $s2, $a0, 504 ld.w $a1, $a0, 400 - st.d $a1, $sp, 208 # 8-byte Folded Spill + st.d $a1, $sp, 200 # 8-byte Folded Spill ld.d $a1, $a0, 240 - st.d $a1, $sp, 224 # 8-byte Folded Spill + st.d $a1, $sp, 208 # 8-byte Folded Spill ld.d $a1, $a0, 248 - st.d $a1, $sp, 120 # 8-byte Folded Spill + st.d $a1, $sp, 128 # 8-byte Folded Spill ld.d $a1, $a0, 256 - st.d $a1, $sp, 112 # 8-byte Folded Spill + st.d $a1, $sp, 120 # 8-byte Folded Spill ld.d $a1, $a0, 296 - st.d $a1, $sp, 104 # 8-byte Folded Spill + st.d $a1, $sp, 112 # 8-byte Folded Spill ld.d $a1, $a0, 304 - st.d $a1, $sp, 96 # 8-byte Folded Spill + st.d $a1, $sp, 104 # 8-byte Folded Spill ld.d $a1, $a0, 312 - st.d $a1, $sp, 88 # 8-byte Folded Spill + st.d $a1, $sp, 96 # 8-byte Folded Spill ld.d $a1, $a0, 320 st.d $a1, $sp, 136 # 8-byte Folded Spill ld.d $a1, $a0, 328 st.d $a1, $sp, 176 # 8-byte Folded Spill ld.d $a1, $a0, 336 - st.d $a1, $sp, 80 # 8-byte Folded Spill + st.d $a1, $sp, 88 # 8-byte Folded Spill ld.d $s8, $a0, 344 ld.d $a1, $a0, 352 - st.d $a1, $sp, 256 # 8-byte Folded Spill + st.d $a1, $sp, 240 # 8-byte Folded Spill ld.d $a1, $a0, 360 - st.d $a1, $sp, 280 # 8-byte Folded Spill + st.d $a1, $sp, 264 # 8-byte Folded Spill ld.d $a1, $a0, 368 st.d $a1, $sp, 168 # 8-byte Folded Spill ld.d $a1, $a0, 376 - st.d $a1, $sp, 72 # 8-byte Folded Spill + st.d $a1, $sp, 80 # 8-byte Folded Spill ld.d $t7, $a0, 288 ld.d $s4, $a0, 264 ld.d $a1, $a0, 272 - st.d $a1, $sp, 64 # 8-byte Folded Spill + st.d $a1, $sp, 72 # 8-byte Folded Spill ld.d $a1, $a0, 280 - st.d $a1, $sp, 56 # 8-byte Folded Spill + st.d $a1, $sp, 64 # 8-byte Folded Spill ld.d $s0, $a0, 384 ld.d $a1, $a0, 392 - st.d $a1, $sp, 200 # 8-byte Folded Spill + st.d $a1, $sp, 192 # 8-byte Folded Spill # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 240 # 16-byte Folded Spill - blez $s1, .LBB5_11 + vst $vr0, $sp, 224 # 16-byte Folded Spill + blez $s2, .LBB5_11 # %bb.1: # %.lr.ph move $a1, $zero ld.d $a2, $a0, 512 @@ -1884,7 +1881,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd ld.d $a4, $fp, 248 ld.d $a5, $fp, 256 vldi $vr0, -928 - vld $vr1, $sp, 240 # 16-byte Folded Reload + vld $vr1, $sp, 224 # 16-byte Folded Reload fmul.d $fa0, $fa1, $fa0 vreplvei.d $vr0, $vr0, 0 ori $a6, $zero, 1 @@ -1893,7 +1890,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd .LBB5_2: # %_ZN5Hydro10advPosHalfEPK7double2S2_dPS0_ii.exit # in Loop: Header=BB5_3 Depth=1 addi.d $a1, $a1, 1 - beq $a1, $s1, .LBB5_11 + beq $a1, $s2, .LBB5_11 .LBB5_3: # =>This Loop Header: Depth=1 # Child Loop BB5_5 Depth 2 # Child Loop BB5_7 Depth 2 @@ -1909,7 +1906,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd alsl.d $t4, $t1, $t7, 4 sub.d $t3, $t2, $t1 addi.d $t3, $t3, 1 - ld.d $t5, $sp, 224 # 8-byte Folded Reload + ld.d $t5, $sp, 208 # 8-byte Folded Reload add.d $t5, $t5, $t0 move $t6, $t3 .p2align 4, , 16 @@ -1960,22 +1957,22 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd bnez $a7, .LBB5_10 b .LBB5_2 .LBB5_11: # %.preheader - st.d $t7, $sp, 48 # 8-byte Folded Spill - ld.d $a1, $sp, 208 # 8-byte Folded Reload - st.d $s1, $sp, 128 # 8-byte Folded Spill + st.d $t7, $sp, 56 # 8-byte Folded Spill + ld.d $a1, $sp, 200 # 8-byte Folded Reload blez $a1, .LBB5_35 # %bb.12: # %.lr.ph221 + st.d $s2, $sp, 40 # 8-byte Folded Spill move $a4, $zero - ld.d $a0, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload addi.d $a0, $a0, 16 - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill b .LBB5_14 .p2align 4, , 16 .LBB5_13: # %_ZN5Hydro12sumCrnrForceEPK7double2S2_S2_PS0_ii.exit # in Loop: Header=BB5_14 Depth=1 ld.d $a4, $sp, 160 # 8-byte Folded Reload addi.d $a4, $a4, 1 - ld.d $a0, $sp, 208 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload beq $a4, $a0, .LBB5_34 .LBB5_14: # =>This Loop Header: Depth=1 # Child Loop BB5_30 Depth 2 @@ -1996,7 +1993,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd ld.d $a1, $sp, 136 # 8-byte Folded Reload alsl.d $a1, $s6, $a1, 3 sub.d $a4, $a6, $s6 - ld.d $a2, $sp, 280 # 8-byte Folded Reload + ld.d $a2, $sp, 264 # 8-byte Folded Reload alsl.d $a3, $s6, $a2, 3 ori $a2, $zero, 2 st.d $a6, $sp, 152 # 8-byte Folded Spill @@ -2011,9 +2008,9 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd .LBB5_16: # %_ZSt4copyIPdS0_ET0_T_S2_S1_.exit # in Loop: Header=BB5_14 Depth=1 move $a1, $s4 - ld.d $s2, $sp, 64 # 8-byte Folded Reload + ld.d $s2, $sp, 72 # 8-byte Folded Reload move $a2, $s2 - ld.d $s1, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload move $a3, $s1 move $a4, $s3 move $a5, $s5 @@ -2025,9 +2022,9 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd move $a2, $s1 ld.d $s7, $sp, 176 # 8-byte Folded Reload move $a3, $s7 - ld.d $a4, $sp, 80 # 8-byte Folded Reload + ld.d $a4, $sp, 88 # 8-byte Folded Reload move $a5, $s8 - ld.d $a6, $sp, 256 # 8-byte Folded Reload + ld.d $a6, $sp, 240 # 8-byte Folded Reload move $a7, $s3 pcaddu18i $ra, %call36(_ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii) jirl $ra, $ra, 0 @@ -2041,14 +2038,14 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd jirl $ra, $ra, 0 ld.d $a0, $fp, 0 move $a1, $s4 - ld.d $a2, $sp, 72 # 8-byte Folded Reload + ld.d $a2, $sp, 80 # 8-byte Folded Reload move $a3, $s3 move $a4, $s5 pcaddu18i $ra, %call36(_ZN4Mesh11calcEdgeLenEPK7double2Pdii) jirl $ra, $ra, 0 ld.d $a0, $fp, 0 move $a1, $s7 - ld.d $a2, $sp, 200 # 8-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload move $a3, $s3 move $a4, $s5 pcaddu18i $ra, %call36(_ZN4Mesh11calcCharLenEPKdPdii) @@ -2069,7 +2066,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd # in Loop: Header=BB5_14 Depth=1 sub.d $a2, $t1, $a1 alsl.d $a3, $a1, $a0, 3 - ld.d $a4, $sp, 256 # 8-byte Folded Reload + ld.d $a4, $sp, 240 # 8-byte Folded Reload alsl.d $a4, $a1, $a4, 3 alsl.d $a1, $a1, $a6, 3 .p2align 4, , 16 @@ -2132,9 +2129,9 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd st.d $t1, $sp, 16 st.d $s6, $sp, 8 st.d $a2, $sp, 0 - ld.d $a2, $sp, 256 # 8-byte Folded Reload - ld.d $a3, $sp, 280 # 8-byte Folded Reload - vld $vr0, $sp, 240 # 16-byte Folded Reload + ld.d $a2, $sp, 240 # 8-byte Folded Reload + ld.d $a3, $sp, 264 # 8-byte Folded Reload + vld $vr0, $sp, 224 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 pcaddu18i $ra, %call36(_ZN7PolyGas15calcStateAtHalfEPKdS1_S1_S1_S1_S1_dPdS2_ii) jirl $ra, $ra, 0 @@ -2213,7 +2210,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd bltu $a1, $a3, .LBB5_18 # %bb.28: # %vector.memcheck # in Loop: Header=BB5_14 Depth=1 - ld.d $a1, $sp, 256 # 8-byte Folded Reload + ld.d $a1, $sp, 240 # 8-byte Folded Reload sub.d $a2, $a0, $a1 move $a1, $s6 bltu $a2, $a3, .LBB5_19 @@ -2226,7 +2223,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd addi.d $a3, $a3, 16 alsl.d $a4, $s6, $a0, 3 addi.d $a4, $a4, 16 - ld.d $a5, $sp, 40 # 8-byte Folded Reload + ld.d $a5, $sp, 48 # 8-byte Folded Reload alsl.d $a5, $s6, $a5, 3 move $a7, $a2 .p2align 4, , 16 @@ -2259,7 +2256,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd b .LBB5_16 .LBB5_34: # %._crit_edge.loopexit ld.d $a0, $fp, 0 - ld.d $s1, $sp, 128 # 8-byte Folded Reload + ld.d $s2, $sp, 40 # 8-byte Folded Reload .LBB5_35: # %._crit_edge pcaddu18i $ra, %call36(_ZN4Mesh13checkBadSidesEv) jirl $ra, $ra, 0 @@ -2273,27 +2270,29 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd ld.d $a2, $fp, 272 pcaddu18i $ra, %call36(_ZN4Mesh11sumToPointsI7double2EEvPKT_PS2_) jirl $ra, $ra, 0 - pcalau12i $s2, %pc_hi20(.LCPI5_0) ld.d $s8, $sp, 136 # 8-byte Folded Reload - blez $s1, .LBB5_47 + blez $s2, .LBB5_47 # %bb.36: # %.lr.ph228 move $s0, $zero ld.d $a1, $fp, 40 ld.d $a0, $fp, 32 - fld.d $fs0, $s2, %pc_lo12(.LCPI5_0) - vld $vr0, $sp, 240 # 16-byte Folded Reload + vld $vr0, $sp, 224 # 16-byte Folded Reload vreplvei.d $vr3, $vr0, 0 + lu12i.w $a2, 322956 + ori $a2, $a2, 830 + lu32i.d $a2, 98173 + lu52i.d $a2, $a2, 694 + movgr2fr.d $fs0, $a2 lu52i.d $a2, $zero, 1022 vreplgr2vr.d $vr4, $a2 - vst $vr3, $sp, 256 # 16-byte Folded Spill + vst $vr3, $sp, 240 # 16-byte Folded Spill vst $vr4, $sp, 176 # 16-byte Folded Spill b .LBB5_38 .p2align 4, , 16 .LBB5_37: # %_ZN5Hydro10advPosFullEPK7double2S2_S2_dPS0_S3_ii.exit # in Loop: Header=BB5_38 Depth=1 addi.d $s0, $s0, 1 - ld.d $a2, $sp, 128 # 8-byte Folded Reload - beq $s0, $a2, .LBB5_47 + beq $s0, $s2, .LBB5_47 .LBB5_38: # =>This Loop Header: Depth=1 # Child Loop BB5_40 Depth 2 # Child Loop BB5_44 Depth 2 @@ -2331,14 +2330,14 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd bltu $s7, $a2, .LBB5_40 # %bb.41: # %._crit_edge225 # in Loop: Header=BB5_38 Depth=1 - vld $vr3, $sp, 256 # 16-byte Folded Reload + vld $vr3, $sp, 240 # 16-byte Folded Reload vld $vr4, $sp, 176 # 16-byte Folded Reload bge $s1, $s3, .LBB5_37 b .LBB5_43 .p2align 4, , 16 .LBB5_42: # in Loop: Header=BB5_38 Depth=1 move $a1, $a0 - vld $vr3, $sp, 256 # 16-byte Folded Reload + vld $vr3, $sp, 240 # 16-byte Folded Reload vld $vr4, $sp, 176 # 16-byte Folded Reload bge $s1, $s3, .LBB5_37 .LBB5_43: # %.cont.preheader.i @@ -2377,9 +2376,9 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd # in Loop: Header=BB5_38 Depth=1 ld.d $a6, $fp, 248 ld.d $t0, $fp, 256 - ld.d $a4, $sp, 224 # 8-byte Folded Reload + ld.d $a4, $sp, 208 # 8-byte Folded Reload add.d $a4, $a4, $a7 - ld.d $a5, $sp, 48 # 8-byte Folded Reload + ld.d $a5, $sp, 56 # 8-byte Folded Reload add.d $a5, $a5, $a7 add.d $a6, $a6, $a7 add.d $a3, $a3, $a7 @@ -2409,7 +2408,6 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd bnez $a2, .LBB5_46 b .LBB5_37 .LBB5_47: # %._crit_edge229 - st.d $s2, $sp, 176 # 8-byte Folded Spill lu12i.w $a0, -382855 ori $a0, $a0, 3886 lu32i.d $a0, -179538 @@ -2421,19 +2419,19 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd ld.d $a0, $a0, 6 st.d $a1, $fp, 168 st.d $a0, $fp, 174 - ld.d $a0, $sp, 208 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload blez $a0, .LBB5_55 # %bb.48: # %.lr.ph232 move $s3, $zero vldi $vr0, -800 - vld $vr1, $sp, 240 # 16-byte Folded Reload + vld $vr1, $sp, 224 # 16-byte Folded Reload fmul.d $fs0, $fa1, $fa0 b .LBB5_50 .p2align 4, , 16 .LBB5_49: # %_ZN5Hydro8calcWorkEPK7double2S2_S2_S2_S2_dPdS3_ii.exit # in Loop: Header=BB5_50 Depth=1 addi.d $s3, $s3, 1 - ld.d $a0, $sp, 208 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload beq $s3, $a0, .LBB5_55 .LBB5_50: # =>This Loop Header: Depth=1 # Child Loop BB5_54 Depth 2 @@ -2447,12 +2445,11 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd ldx.w $s1, $a3, $a2 ldx.w $s6, $a4, $a2 ldx.w $s7, $a5, $a2 - ld.d $s5, $sp, 224 # 8-byte Folded Reload + ld.d $s5, $sp, 208 # 8-byte Folded Reload move $a1, $s5 - ld.d $a2, $sp, 120 # 8-byte Folded Reload - move $s2, $s8 - ld.d $s8, $sp, 112 # 8-byte Folded Reload - move $a3, $s8 + ld.d $a2, $sp, 128 # 8-byte Folded Reload + ld.d $s2, $sp, 120 # 8-byte Folded Reload + move $a3, $s2 move $a4, $s0 move $a5, $s1 pcaddu18i $ra, %call36(_ZN4Mesh8calcCtrsEPK7double2PS0_S3_ii) @@ -2460,12 +2457,11 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd ld.d $a0, $fp, 0 st.d $s1, $sp, 0 move $a1, $s5 - move $a2, $s8 - move $s8, $s2 - ld.d $a3, $sp, 104 # 8-byte Folded Reload - ld.d $a4, $sp, 96 # 8-byte Folded Reload - ld.d $a5, $sp, 88 # 8-byte Folded Reload - move $a6, $s2 + move $a2, $s2 + ld.d $a3, $sp, 112 # 8-byte Folded Reload + ld.d $a4, $sp, 104 # 8-byte Folded Reload + ld.d $a5, $sp, 96 # 8-byte Folded Reload + move $a6, $s8 move $a7, $s0 pcaddu18i $ra, %call36(_ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii) jirl $ra, $ra, 0 @@ -2563,39 +2559,37 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd jirl $ra, $ra, 0 ld.d $a0, $fp, 0 ld.w $a1, $a0, 560 - ld.d $s2, $sp, 176 # 8-byte Folded Reload blez $a1, .LBB5_85 # %bb.56: # %.lr.ph236 move $s0, $zero - vld $vr0, $sp, 240 # 16-byte Folded Reload + vld $vr0, $sp, 224 # 16-byte Folded Reload frecip.d $ft0, $fa0 vreplvei.d $vr0, $vr8, 0 - vst $vr0, $sp, 224 # 16-byte Folded Spill + vst $vr0, $sp, 208 # 16-byte Folded Spill addi.d $s1, $s8, 16 - ld.d $a1, $sp, 280 # 8-byte Folded Reload + ld.d $a1, $sp, 264 # 8-byte Folded Reload addi.d $s3, $a1, 16 ori $s4, $zero, 4 - ori $s5, $zero, 32 lu12i.w $a1, 322956 ori $a1, $a1, 830 lu32i.d $a1, 98173 - lu52i.d $a1, $a1, 694 - vreplgr2vr.d $vr0, $a1 - vst $vr0, $sp, 208 # 16-byte Folded Spill - vst $vr8, $sp, 256 # 16-byte Folded Spill + lu52i.d $s2, $a1, 694 + movgr2fr.d $fs0, $s2 + ori $s5, $zero, 32 + vst $vr8, $sp, 240 # 16-byte Folded Spill b .LBB5_58 .p2align 4, , 16 .LBB5_57: # %_ZN5Hydro7calcRhoEPKdS1_Pdii.exit213 # in Loop: Header=BB5_58 Depth=1 move $a0, $fp - ld.d $a1, $sp, 200 # 8-byte Folded Reload + ld.d $a1, $sp, 192 # 8-byte Folded Reload move $a2, $s8 - ld.d $a3, $sp, 280 # 8-byte Folded Reload - vld $vr0, $sp, 240 # 16-byte Folded Reload + ld.d $a3, $sp, 264 # 8-byte Folded Reload + vld $vr0, $sp, 224 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 pcaddu18i $ra, %call36(_ZN5Hydro11calcDtHydroEPKdS1_S1_dii) jirl $ra, $ra, 0 - vld $vr8, $sp, 256 # 16-byte Folded Reload + vld $vr8, $sp, 240 # 16-byte Folded Reload ld.d $a0, $fp, 0 ld.w $a1, $a0, 560 addi.d $s0, $s0, 1 @@ -2620,7 +2614,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd ld.d $a3, $fp, 344 sub.d $a0, $a5, $a4 move $a6, $a4 - vld $vr9, $sp, 224 # 16-byte Folded Reload + vld $vr9, $sp, 208 # 16-byte Folded Reload bgeu $a0, $s4, .LBB5_68 .LBB5_60: # %.lr.ph.i197.preheader # in Loop: Header=BB5_58 Depth=1 @@ -2628,7 +2622,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd alsl.d $a3, $a6, $a3, 3 alsl.d $a2, $a6, $a2, 3 alsl.d $a1, $a6, $a1, 3 - ld.d $t0, $sp, 280 # 8-byte Folded Reload + ld.d $t0, $sp, 264 # 8-byte Folded Reload alsl.d $t0, $a6, $t0, 3 alsl.d $a6, $a6, $s8, 3 .p2align 4, , 16 @@ -2668,10 +2662,9 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd # Parent Loop BB5_58 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa0, $t0, 0 - fld.d $fa1, $s2, %pc_lo12(.LCPI5_0) - fld.d $fa2, $a2, 0 - fadd.d $fa0, $fa0, $fa1 - fdiv.d $fa0, $fa2, $fa0 + fld.d $fa1, $a2, 0 + fadd.d $fa0, $fa0, $fs0 + fdiv.d $fa0, $fa1, $fa0 fst.d $fa0, $a3, 0 addi.d $a7, $a7, -1 addi.d $a3, $a3, 8 @@ -2711,7 +2704,7 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd bltu $a7, $s5, .LBB5_60 # %bb.69: # %vector.memcheck321 # in Loop: Header=BB5_58 Depth=1 - ld.d $a6, $sp, 280 # 8-byte Folded Reload + ld.d $a6, $sp, 264 # 8-byte Folded Reload sub.d $a7, $a3, $a6 move $a6, $a4 bltu $a7, $s5, .LBB5_60 @@ -2793,19 +2786,19 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd alsl.d $t2, $a4, $a1, 3 addi.d $t2, $t2, 16 move $t3, $a7 - vld $vr4, $sp, 208 # 16-byte Folded Reload .p2align 4, , 16 .LBB5_78: # %vector.body310 # Parent Loop BB5_58 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr0, $t2, -16 - vld $vr1, $t2, 0 - vld $vr2, $t0, -16 + vld $vr0, $t0, -16 + vld $vr1, $t2, -16 + vld $vr2, $t2, 0 vld $vr3, $t0, 0 - vfadd.d $vr0, $vr0, $vr4 + vreplgr2vr.d $vr4, $s2 vfadd.d $vr1, $vr1, $vr4 - vfdiv.d $vr0, $vr2, $vr0 - vfdiv.d $vr1, $vr3, $vr1 + vfadd.d $vr2, $vr2, $vr4 + vfdiv.d $vr0, $vr0, $vr1 + vfdiv.d $vr1, $vr3, $vr2 vst $vr0, $t1, -16 vst $vr1, $t1, 0 addi.d $t3, $t3, -4 @@ -2861,19 +2854,19 @@ _ZN5Hydro7doCycleEd: # @_ZN5Hydro7doCycleEd beq $a0, $a6, .LBB5_57 b .LBB5_66 .LBB5_85: # %._crit_edge237 - fld.d $fs0, $sp, 288 # 8-byte Folded Reload - ld.d $s8, $sp, 296 # 8-byte Folded Reload - ld.d $s7, $sp, 304 # 8-byte Folded Reload - ld.d $s6, $sp, 312 # 8-byte Folded Reload - ld.d $s5, $sp, 320 # 8-byte Folded Reload - ld.d $s4, $sp, 328 # 8-byte Folded Reload - ld.d $s3, $sp, 336 # 8-byte Folded Reload - ld.d $s2, $sp, 344 # 8-byte Folded Reload - ld.d $s1, $sp, 352 # 8-byte Folded Reload - ld.d $s0, $sp, 360 # 8-byte Folded Reload - ld.d $fp, $sp, 368 # 8-byte Folded Reload - ld.d $ra, $sp, 376 # 8-byte Folded Reload - addi.d $sp, $sp, 384 + fld.d $fs0, $sp, 272 # 8-byte Folded Reload + ld.d $s8, $sp, 280 # 8-byte Folded Reload + ld.d $s7, $sp, 288 # 8-byte Folded Reload + ld.d $s6, $sp, 296 # 8-byte Folded Reload + ld.d $s5, $sp, 304 # 8-byte Folded Reload + ld.d $s4, $sp, 312 # 8-byte Folded Reload + ld.d $s3, $sp, 320 # 8-byte Folded Reload + ld.d $s2, $sp, 328 # 8-byte Folded Reload + ld.d $s1, $sp, 336 # 8-byte Folded Reload + ld.d $s0, $sp, 344 # 8-byte Folded Reload + ld.d $fp, $sp, 352 # 8-byte Folded Reload + ld.d $ra, $sp, 360 # 8-byte Folded Reload + addi.d $sp, $sp, 368 ret .Lfunc_end5: .size _ZN5Hydro7doCycleEd, .Lfunc_end5-_ZN5Hydro7doCycleEd @@ -3069,12 +3062,7 @@ _ZN5Hydro12sumCrnrForceEPK7double2S2_S2_PS0_ii: # @_ZN5Hydro12sumCrnrForceEPK7do .size _ZN5Hydro12sumCrnrForceEPK7double2S2_S2_PS0_ii, .Lfunc_end9-_ZN5Hydro12sumCrnrForceEPK7double2S2_S2_PS0_ii .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5Hydro9calcAccelEPK7double2PKdPS0_ii -.LCPI10_0: - .dword 0x2b617f7d4ed8c33e # double 1.0E-99 - .text - .globl _ZN5Hydro9calcAccelEPK7double2PKdPS0_ii + .globl _ZN5Hydro9calcAccelEPK7double2PKdPS0_ii # -- Begin function _ZN5Hydro9calcAccelEPK7double2PKdPS0_ii .p2align 5 .type _ZN5Hydro9calcAccelEPK7double2PKdPS0_ii,@function _ZN5Hydro9calcAccelEPK7double2PKdPS0_ii: # @_ZN5Hydro9calcAccelEPK7double2PKdPS0_ii @@ -3085,11 +3073,14 @@ _ZN5Hydro9calcAccelEPK7double2PKdPS0_ii: # @_ZN5Hydro9calcAccelEPK7double2PKdPS0 slli.d $a0, $a4, 4 addi.d $a6, $a0, 8 add.d $a0, $a1, $a6 - pcalau12i $a1, %pc_hi20(.LCPI10_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI10_0) alsl.d $a1, $a4, $a2, 3 add.d $a2, $a3, $a6 sub.d $a3, $a5, $a4 + lu12i.w $a4, 322956 + ori $a4, $a4, 830 + lu32i.d $a4, 98173 + lu52i.d $a4, $a4, 694 + movgr2fr.d $fa0, $a4 .p2align 4, , 16 .LBB10_2: # %.cont # =>This Inner Loop Header: Depth=1 @@ -3342,98 +3333,88 @@ _ZN5Hydro12calcWorkRateEPKdS1_S1_S1_dPdii: # @_ZN5Hydro12calcWorkRateEPKdS1_S1_S .Lfunc_end13: .size _ZN5Hydro12calcWorkRateEPKdS1_S1_S1_dPdii, .Lfunc_end13-_ZN5Hydro12calcWorkRateEPKdS1_S1_S1_dPdii # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5Hydro10calcEnergyEPKdS1_Pdii -.LCPI14_0: - .dword 0x2b617f7d4ed8c33e # double 1.0E-99 - .text - .globl _ZN5Hydro10calcEnergyEPKdS1_Pdii + .globl _ZN5Hydro10calcEnergyEPKdS1_Pdii # -- Begin function _ZN5Hydro10calcEnergyEPKdS1_Pdii .p2align 5 .type _ZN5Hydro10calcEnergyEPKdS1_Pdii,@function _ZN5Hydro10calcEnergyEPKdS1_Pdii: # @_ZN5Hydro10calcEnergyEPKdS1_Pdii # %bb.0: bge $a4, $a5, .LBB14_4 # %bb.1: # %.lr.ph.preheader - sub.d $a0, $a5, $a4 - ori $a6, $zero, 6 - bgeu $a0, $a6, .LBB14_5 + sub.d $a6, $a5, $a4 + ori $a7, $zero, 6 + lu12i.w $a0, 322956 + bgeu $a6, $a7, .LBB14_5 .LBB14_2: # %.lr.ph.preheader16 - alsl.d $a0, $a4, $a1, 3 - pcalau12i $a1, %pc_hi20(.LCPI14_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI14_0) - alsl.d $a1, $a4, $a2, 3 - alsl.d $a2, $a4, $a3, 3 - sub.d $a3, $a5, $a4 + alsl.d $a1, $a4, $a1, 3 + alsl.d $a2, $a4, $a2, 3 + alsl.d $a3, $a4, $a3, 3 + sub.d $a4, $a5, $a4 + ori $a0, $a0, 830 + lu32i.d $a0, 98173 + lu52i.d $a0, $a0, 694 + movgr2fr.d $fa0, $a0 .p2align 4, , 16 .LBB14_3: # %.lr.ph # =>This Inner Loop Header: Depth=1 - fld.d $fa1, $a1, 0 - fld.d $fa2, $a0, 0 + fld.d $fa1, $a2, 0 + fld.d $fa2, $a1, 0 fadd.d $fa1, $fa1, $fa0 fdiv.d $fa1, $fa2, $fa1 - fst.d $fa1, $a2, 0 - addi.d $a0, $a0, 8 + fst.d $fa1, $a3, 0 addi.d $a1, $a1, 8 - addi.d $a3, $a3, -1 addi.d $a2, $a2, 8 - bnez $a3, .LBB14_3 + addi.d $a4, $a4, -1 + addi.d $a3, $a3, 8 + bnez $a4, .LBB14_3 .LBB14_4: # %._crit_edge ret .LBB14_5: # %vector.memcheck - sub.d $a7, $a3, $a1 - ori $a6, $zero, 32 - bltu $a7, $a6, .LBB14_2 + sub.d $t0, $a3, $a1 + ori $a7, $zero, 32 + bltu $t0, $a7, .LBB14_2 # %bb.6: # %vector.memcheck - sub.d $a7, $a3, $a2 - bltu $a7, $a6, .LBB14_2 + sub.d $t0, $a3, $a2 + bltu $t0, $a7, .LBB14_2 # %bb.7: # %vector.ph - move $a6, $a0 - bstrins.d $a6, $zero, 1, 0 - add.d $a7, $a6, $a4 + move $a7, $a6 + bstrins.d $a7, $zero, 1, 0 + add.d $t0, $a7, $a4 slli.d $a4, $a4, 3 - addi.d $t1, $a4, 16 - add.d $a4, $a1, $t1 - add.d $t0, $a3, $t1 - add.d $t1, $a2, $t1 - lu12i.w $t2, 322956 - ori $t2, $t2, 830 - lu32i.d $t2, 98173 - lu52i.d $t2, $t2, 694 - vreplgr2vr.d $vr0, $t2 - move $t2, $a6 + addi.d $t2, $a4, 16 + add.d $a4, $a1, $t2 + add.d $t1, $a3, $t2 + add.d $t2, $a2, $t2 + ori $t3, $a0, 830 + lu32i.d $t3, 98173 + lu52i.d $t3, $t3, 694 + vreplgr2vr.d $vr0, $t3 + move $t3, $a7 .p2align 4, , 16 .LBB14_8: # %vector.body # =>This Inner Loop Header: Depth=1 - vld $vr1, $t1, -16 - vld $vr2, $t1, 0 + vld $vr1, $t2, -16 + vld $vr2, $t2, 0 vld $vr3, $a4, -16 vld $vr4, $a4, 0 vfadd.d $vr1, $vr1, $vr0 vfadd.d $vr2, $vr2, $vr0 vfdiv.d $vr1, $vr3, $vr1 vfdiv.d $vr2, $vr4, $vr2 - vst $vr1, $t0, -16 - vst $vr2, $t0, 0 - addi.d $t2, $t2, -4 + vst $vr1, $t1, -16 + vst $vr2, $t1, 0 + addi.d $t3, $t3, -4 addi.d $a4, $a4, 32 - addi.d $t0, $t0, 32 addi.d $t1, $t1, 32 - bnez $t2, .LBB14_8 + addi.d $t2, $t2, 32 + bnez $t3, .LBB14_8 # %bb.9: # %middle.block - move $a4, $a7 - bne $a0, $a6, .LBB14_2 + move $a4, $t0 + bne $a6, $a7, .LBB14_2 b .LBB14_4 .Lfunc_end14: .size _ZN5Hydro10calcEnergyEPKdS1_Pdii, .Lfunc_end14-_ZN5Hydro10calcEnergyEPKdS1_Pdii # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5Hydro11calcDtHydroEPKdS1_S1_dii -.LCPI15_0: - .dword 0x2b617f7d4ed8c33e # double 1.0E-99 -.LCPI15_1: - .dword 0x547d42aea2879f2e # double 9.9999999999999997E+98 - .text - .globl _ZN5Hydro11calcDtHydroEPKdS1_S1_dii + .globl _ZN5Hydro11calcDtHydroEPKdS1_S1_dii # -- Begin function _ZN5Hydro11calcDtHydroEPKdS1_S1_dii .p2align 5 .type _ZN5Hydro11calcDtHydroEPKdS1_S1_dii,@function _ZN5Hydro11calcDtHydroEPKdS1_S1_dii: # @_ZN5Hydro11calcDtHydroEPKdS1_S1_dii @@ -3449,20 +3430,26 @@ _ZN5Hydro11calcDtHydroEPKdS1_S1_dii: # @_ZN5Hydro11calcDtHydroEPKdS1_S1_dii fst.d $fs1, $sp, 96 # 8-byte Folded Spill fst.d $fs2, $sp, 88 # 8-byte Folded Spill move $fp, $a0 - pcalau12i $a6, %pc_hi20(.LCPI15_0) - pcalau12i $a0, %pc_hi20(.LCPI15_1) + lu12i.w $a0, 322956 + ori $a0, $a0, 830 + lu32i.d $a0, 98173 + lu52i.d $t2, $a0, 694 + lu12i.w $a0, -382855 + ori $a0, $a0, 3886 + lu32i.d $a0, -179538 + lu52i.d $a0, $a0, 1351 bge $a4, $a5, .LBB15_7 # %bb.1: # %.lr.ph.i - ld.d $t1, $fp, 368 + ld.d $a6, $fp, 368 ld.d $t0, $fp, 360 fld.d $fa1, $fp, 56 sub.d $a7, $a5, $a4 alsl.d $a1, $a4, $a1, 3 - fld.d $fs1, $a0, %pc_lo12(.LCPI15_1) - fld.d $fs0, $a6, %pc_lo12(.LCPI15_0) alsl.d $t0, $a4, $t0, 3 - alsl.d $t1, $a4, $t1, 3 + alsl.d $t1, $a4, $a6, 3 addi.w $a6, $zero, -1 + movgr2fr.d $fs1, $a0 + movgr2fr.d $fs0, $t2 move $t2, $a4 .p2align 4, , 16 .LBB15_2: # =>This Inner Loop Header: Depth=1 @@ -3488,7 +3475,7 @@ _ZN5Hydro11calcDtHydroEPKdS1_S1_dii: # @_ZN5Hydro11calcDtHydroEPKdS1_S1_dii addi.d $t1, $t1, 8 bnez $a7, .LBB15_2 # %bb.3: # %._crit_edge.i - fld.d $fa1, $a0, %pc_lo12(.LCPI15_1) + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fs1, $fa1 bceqz $fcc0, .LBB15_5 # %bb.4: @@ -3537,9 +3524,9 @@ _ZN5Hydro11calcDtHydroEPKdS1_S1_dii: # @_ZN5Hydro11calcDtHydroEPKdS1_S1_dii bnez $a0, .LBB15_6 b .LBB15_8 .LBB15_7: - fld.d $fs0, $a6, %pc_lo12(.LCPI15_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI15_1) addi.w $a3, $zero, -1 + movgr2fr.d $fs0, $t2 + movgr2fr.d $fa1, $a0 .LBB15_8: # %._crit_edge.i8 fld.d $fa2, $fp, 64 fmul.d $fa0, $fa0, $fa2 @@ -3580,12 +3567,7 @@ _ZN5Hydro11calcDtHydroEPKdS1_S1_dii: # @_ZN5Hydro11calcDtHydroEPKdS1_S1_dii .Lfunc_end15: .size _ZN5Hydro11calcDtHydroEPKdS1_S1_dii, .Lfunc_end15-_ZN5Hydro11calcDtHydroEPKdS1_S1_dii # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5Hydro9sumEnergyEPKdS1_S1_S1_S1_PK7double2S4_RdS5_iiii -.LCPI16_0: - .dword 0x400921fb54442d18 # double 3.1415926535897931 - .text - .globl _ZN5Hydro9sumEnergyEPKdS1_S1_S1_S1_PK7double2S4_RdS5_iiii + .globl _ZN5Hydro9sumEnergyEPKdS1_S1_S1_S1_PK7double2S4_RdS5_iiii # -- Begin function _ZN5Hydro9sumEnergyEPKdS1_S1_S1_S1_PK7double2S4_RdS5_iiii .p2align 5 .type _ZN5Hydro9sumEnergyEPKdS1_S1_S1_S1_PK7double2S4_RdS5_iiii,@function _ZN5Hydro9sumEnergyEPKdS1_S1_S1_S1_PK7double2S4_RdS5_iiii: # @_ZN5Hydro9sumEnergyEPKdS1_S1_S1_S1_PK7double2S4_RdS5_iiii @@ -3613,10 +3595,13 @@ _ZN5Hydro9sumEnergyEPKdS1_S1_S1_S1_PK7double2S4_RdS5_iiii: # @_ZN5Hydro9sumEnerg # %bb.3: # %._crit_edge.loopexit fadd.d $fa2, $fa0, $fa0 .LBB16_4: # %._crit_edge - fld.d $fa3, $t0, 0 - pcalau12i $a1, %pc_hi20(.LCPI16_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI16_0) ld.d $a1, $sp, 8 + fld.d $fa3, $t0, 0 + lu12i.w $t1, 345154 + ori $t1, $t1, 3352 + lu32i.d $t1, -450053 + lu52i.d $t1, $t1, 1024 + movgr2fr.d $fa0, $t1 fmadd.d $fa2, $fa2, $fa0, $fa3 fst.d $fa2, $t0, 0 bge $t4, $t3, .LBB16_8 @@ -3676,32 +3661,31 @@ _ZN5Hydro9sumEnergyEPKdS1_S1_S1_S1_PK7double2S4_RdS5_iiii: # @_ZN5Hydro9sumEnerg .Lfunc_end16: .size _ZN5Hydro9sumEnergyEPKdS1_S1_S1_S1_PK7double2S4_RdS5_iiii, .Lfunc_end16-_ZN5Hydro9sumEnergyEPKdS1_S1_S1_S1_PK7double2S4_RdS5_iiii # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5Hydro13calcDtCourantEPKdRdPcii -.LCPI17_0: - .dword 0x547d42aea2879f2e # double 9.9999999999999997E+98 -.LCPI17_1: - .dword 0x2b617f7d4ed8c33e # double 1.0E-99 - .text - .globl _ZN5Hydro13calcDtCourantEPKdRdPcii + .globl _ZN5Hydro13calcDtCourantEPKdRdPcii # -- Begin function _ZN5Hydro13calcDtCourantEPKdRdPcii .p2align 5 .type _ZN5Hydro13calcDtCourantEPKdRdPcii,@function _ZN5Hydro13calcDtCourantEPKdRdPcii: # @_ZN5Hydro13calcDtCourantEPKdRdPcii # %bb.0: - pcalau12i $a7, %pc_hi20(.LCPI17_0) + lu12i.w $a6, -382855 + ori $a6, $a6, 3886 + lu32i.d $a6, -179538 + lu52i.d $t0, $a6, 1351 bge $a4, $a5, .LBB17_5 # %bb.1: # %.lr.ph ld.d $a6, $a0, 368 - ld.d $t0, $a0, 360 + ld.d $a7, $a0, 360 fld.d $fa1, $a0, 56 alsl.d $a0, $a4, $a6, 3 - alsl.d $a6, $a4, $t0, 3 - fld.d $fa0, $a7, %pc_lo12(.LCPI17_0) - pcalau12i $a7, %pc_hi20(.LCPI17_1) - fld.d $fa2, $a7, %pc_lo12(.LCPI17_1) + alsl.d $a6, $a4, $a7, 3 alsl.d $a1, $a4, $a1, 3 sub.d $a7, $a5, $a4 addi.w $a5, $zero, -1 + movgr2fr.d $fa0, $t0 + lu12i.w $t0, 322956 + ori $t0, $t0, 830 + lu32i.d $t0, 98173 + lu52i.d $t0, $t0, 694 + movgr2fr.d $fa2, $t0 .p2align 4, , 16 .LBB17_2: # =>This Inner Loop Header: Depth=1 fld.d $fa3, $a6, 0 @@ -3739,8 +3723,8 @@ _ZN5Hydro13calcDtCourantEPKdRdPcii: # @_ZN5Hydro13calcDtCourantEPKdRdPcii pcaddu18i $t8, %call36(snprintf) jr $t8 .LBB17_5: - fld.d $fa0, $a7, %pc_lo12(.LCPI17_0) addi.w $a5, $zero, -1 + movgr2fr.d $fa0, $t0 fld.d $fa1, $a2, 0 fcmp.clt.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB17_4 @@ -3749,24 +3733,22 @@ _ZN5Hydro13calcDtCourantEPKdRdPcii: # @_ZN5Hydro13calcDtCourantEPKdRdPcii .Lfunc_end17: .size _ZN5Hydro13calcDtCourantEPKdRdPcii, .Lfunc_end17-_ZN5Hydro13calcDtCourantEPKdRdPcii # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5Hydro12calcDtVolumeEPKdS1_dRdPcii -.LCPI18_0: - .dword 0x2b617f7d4ed8c33e # double 1.0E-99 - .text - .globl _ZN5Hydro12calcDtVolumeEPKdS1_dRdPcii + .globl _ZN5Hydro12calcDtVolumeEPKdS1_dRdPcii # -- Begin function _ZN5Hydro12calcDtVolumeEPKdS1_dRdPcii .p2align 5 .type _ZN5Hydro12calcDtVolumeEPKdS1_dRdPcii,@function _ZN5Hydro12calcDtVolumeEPKdS1_dRdPcii: # @_ZN5Hydro12calcDtVolumeEPKdS1_dRdPcii # %bb.0: - pcalau12i $a7, %pc_hi20(.LCPI18_0) + lu12i.w $a7, 322956 + ori $a7, $a7, 830 + lu32i.d $a7, 98173 + lu52i.d $t0, $a7, 694 bge $a5, $a6, .LBB18_3 # %bb.1: # %.lr.ph.preheader alsl.d $a1, $a5, $a1, 3 - fld.d $fa1, $a7, %pc_lo12(.LCPI18_0) alsl.d $a2, $a5, $a2, 3 sub.d $a7, $a6, $a5 addi.w $a6, $zero, -1 + movgr2fr.d $fa1, $t0 .p2align 4, , 16 .LBB18_2: # %.lr.ph # =>This Inner Loop Header: Depth=1 @@ -3788,8 +3770,8 @@ _ZN5Hydro12calcDtVolumeEPKdS1_dRdPcii: # @_ZN5Hydro12calcDtVolumeEPKdS1_dRdPcii bnez $a7, .LBB18_2 b .LBB18_4 .LBB18_3: - fld.d $fa1, $a7, %pc_lo12(.LCPI18_0) addi.w $a6, $zero, -1 + movgr2fr.d $fa1, $t0 .LBB18_4: # %._crit_edge fld.d $fa2, $a0, 64 fld.d $fa3, $a3, 0 @@ -3961,12 +3943,7 @@ _ZN5Hydro10getDtHydroERdRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE: # .size _ZN5Hydro10getDtHydroERdRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE, .Lfunc_end19-_ZN5Hydro10getDtHydroERdRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5Hydro16writeEnergyCheckEv -.LCPI20_0: - .dword 0x400921fb54442d18 # double 3.1415926535897931 - .text - .globl _ZN5Hydro16writeEnergyCheckEv + .globl _ZN5Hydro16writeEnergyCheckEv # -- Begin function _ZN5Hydro16writeEnergyCheckEv .p2align 5 .type _ZN5Hydro16writeEnergyCheckEv,@function _ZN5Hydro16writeEnergyCheckEv: # @_ZN5Hydro16writeEnergyCheckEv @@ -3994,20 +3971,23 @@ _ZN5Hydro16writeEnergyCheckEv: # @_ZN5Hydro16writeEnergyCheckEv st.d $zero, $sp, 8 blez $a2, .LBB20_11 # %bb.1: # %.lr.ph - ld.d $a3, $a1, 408 - ld.d $a4, $a1, 432 - ld.d $a5, $a1, 456 - ld.d $a6, $a1, 480 - ld.d $a7, $a0, 328 - ld.d $t0, $a1, 312 - ld.d $t1, $a1, 320 - ld.d $t2, $a0, 296 - ld.d $t3, $a1, 384 - ld.d $t4, $a1, 240 + move $a3, $zero + ld.d $a4, $a1, 408 + ld.d $a5, $a1, 432 + ld.d $a6, $a1, 456 + ld.d $a7, $a1, 480 + ld.d $t0, $a0, 328 + ld.d $t1, $a1, 312 + ld.d $t2, $a1, 320 + ld.d $t3, $a0, 296 + ld.d $t4, $a1, 384 + ld.d $t5, $a1, 240 ld.d $a0, $a0, 248 - pcalau12i $t5, %pc_hi20(.LCPI20_0) - fld.d $fa0, $t5, %pc_lo12(.LCPI20_0) - move $t5, $zero + lu12i.w $t6, 345154 + ori $t6, $t6, 3352 + lu32i.d $t6, -450053 + lu52i.d $t6, $t6, 1024 + movgr2fr.d $fa0, $t6 movgr2fr.d $fa1, $zero vldi $vr2, -928 fmov.d $fa3, $fa1 @@ -4019,21 +3999,21 @@ _ZN5Hydro16writeEnergyCheckEv: # @_ZN5Hydro16writeEnergyCheckEv fadd.d $fa3, $fa5, $fa3 fst.d $fa3, $sp, 16 fadd.d $fa4, $fa6, $fa4 - addi.d $t5, $t5, 1 + addi.d $a3, $a3, 1 fst.d $fa4, $sp, 8 - beq $t5, $a2, .LBB20_11 + beq $a3, $a2, .LBB20_11 .LBB20_3: # =>This Loop Header: Depth=1 # Child Loop BB20_5 Depth 2 # Child Loop BB20_9 Depth 2 - slli.d $t6, $t5, 2 - ldx.w $t8, $a5, $t6 - ldx.w $t7, $a6, $t6 + slli.d $t6, $a3, 2 + ldx.w $t8, $a6, $t6 + ldx.w $t7, $a7, $t6 fmov.d $fa5, $fa1 bge $t8, $t7, .LBB20_7 # %bb.4: # %.lr.ph.preheader.i # in Loop: Header=BB20_3 Depth=1 sub.d $t7, $t7, $t8 - alsl.d $t8, $t8, $a7, 3 + alsl.d $t8, $t8, $t0, 3 fmov.d $fa5, $fa1 .p2align 4, , 16 .LBB20_5: # %.lr.ph.i @@ -4050,8 +4030,8 @@ _ZN5Hydro16writeEnergyCheckEv: # @_ZN5Hydro16writeEnergyCheckEv fmadd.d $fa5, $fa5, $fa0, $fa1 .LBB20_7: # %._crit_edge.i # in Loop: Header=BB20_3 Depth=1 - ldx.w $s0, $a3, $t6 - ldx.w $t6, $a4, $t6 + ldx.w $s0, $a4, $t6 + ldx.w $t6, $a5, $t6 fmov.d $fa6, $fa1 bge $s0, $t6, .LBB20_2 # %bb.8: # %.lr.ph43.i @@ -4060,7 +4040,7 @@ _ZN5Hydro16writeEnergyCheckEv: # @_ZN5Hydro16writeEnergyCheckEv ld.d $t8, $a1, 104 ld.d $fp, $a1, 88 sub.d $t6, $t6, $s0 - alsl.d $t7, $s0, $t3, 3 + alsl.d $t7, $s0, $t4, 3 alsl.d $t8, $s0, $t8, 2 alsl.d $fp, $s0, $fp, 2 alsl.d $s0, $s0, $s1, 2 @@ -4072,19 +4052,19 @@ _ZN5Hydro16writeEnergyCheckEv: # @_ZN5Hydro16writeEnergyCheckEv ld.w $s2, $fp, 0 ld.w $s3, $s0, 0 slli.d $s1, $s1, 3 - fldx.d $fa7, $t0, $s1 + fldx.d $fa7, $t1, $s1 slli.d $s4, $s2, 4 - fldx.d $ft0, $t4, $s4 + fldx.d $ft0, $t5, $s4 fld.d $ft1, $t7, 0 slli.d $s3, $s3, 3 - fldx.d $ft2, $t3, $s3 + fldx.d $ft2, $t4, $s3 fmul.d $fa7, $fa7, $ft0 - fldx.d $ft0, $t2, $s1 + fldx.d $ft0, $t3, $s1 fmul.d $fa7, $fa7, $fa2 fadd.d $ft1, $ft1, $ft2 fmul.d $fa7, $fa7, $ft1 fmul.d $fa7, $ft0, $fa7 - fldx.d $ft0, $t1, $s1 + fldx.d $ft0, $t2, $s1 alsl.d $s1, $s2, $a0, 4 fld.d $ft1, $s1, 8 fldx.d $ft2, $a0, $s4 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Mesh.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Mesh.s index 77e5e608..712ada07 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Mesh.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/Mesh.s @@ -3774,12 +3774,7 @@ _ZN4Mesh8calcCtrsEPK7double2PS0_S3_ii: # @_ZN4Mesh8calcCtrsEPK7double2PS0_S3_ii .size _ZN4Mesh8calcCtrsEPK7double2PS0_S3_ii, .Lfunc_end9-_ZN4Mesh8calcCtrsEPK7double2PS0_S3_ii .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii -.LCPI10_0: - .dword 0x3fd5555555555555 # double 0.33333333333333331 - .text - .globl _ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii + .globl _ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii # -- Begin function _ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii .p2align 5 .type _ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii,@function _ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii: # @_ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii @@ -3809,7 +3804,7 @@ _ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii: # @_ZN4Mesh8calcVolsEPK7double2S2_ .cfi_offset 29, -72 .cfi_offset 30, -80 .cfi_offset 31, -88 - move $fp, $a0 + move $t5, $a0 ld.d $s8, $sp, 112 ld.d $s5, $a0, 104 ld.w $a0, $a0, 72 @@ -3818,13 +3813,14 @@ _ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii: # @_ZN4Mesh8calcVolsEPK7double2S2_ ldx.w $s6, $s5, $a7 slt $a0, $s8, $a0 alsl.d $a7, $s8, $s5, 2 - addi.d $t0, $fp, 68 + addi.d $t0, $t5, 68 maskeqz $a7, $a7, $a0 masknez $a0, $t0, $a0 or $a0, $a7, $a0 ld.w $a7, $a0, 0 move $s0, $a6 move $s1, $a5 + move $fp, $a3 move $s2, $a2 move $s3, $a1 beq $s6, $a7, .LBB10_2 @@ -3835,7 +3831,7 @@ _ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii: # @_ZN4Mesh8calcVolsEPK7double2S2_ move $a1, $zero move $a2, $s7 st.d $a4, $sp, 16 # 8-byte Folded Spill - st.d $a3, $sp, 8 # 8-byte Folded Spill + st.d $t5, $sp, 8 # 8-byte Folded Spill pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 alsl.d $a0, $s6, $s1, 3 @@ -3843,29 +3839,32 @@ _ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii: # @_ZN4Mesh8calcVolsEPK7double2S2_ move $a2, $s7 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.d $a3, $sp, 8 # 8-byte Folded Reload + ld.d $t5, $sp, 8 # 8-byte Folded Reload ld.d $a4, $sp, 16 # 8-byte Folded Reload .LBB10_2: # %_ZSt4fillIPddEvT_S1_RKT0_.exit57 bge $s4, $s8, .LBB10_7 # %bb.3: # %.lr.ph - ld.d $a1, $fp, 88 - ld.d $a2, $fp, 96 + ld.d $a1, $t5, 88 + ld.d $a2, $t5, 96 move $a0, $zero alsl.d $a1, $s4, $a1, 2 alsl.d $a2, $s4, $a2, 2 - alsl.d $t5, $s4, $s5, 2 - alsl.d $t4, $s4, $a3, 3 + alsl.d $t6, $s4, $s5, 2 + alsl.d $t4, $s4, $fp, 3 alsl.d $a5, $s4, $a4, 3 - pcalau12i $a6, %pc_hi20(.LCPI10_0) - fld.d $fa0, $a6, %pc_lo12(.LCPI10_0) sub.d $a6, $s8, $s4 + lu12i.w $a7, 349525 + ori $a7, $a7, 1365 + lu32i.d $a7, 349525 + lu52i.d $a7, $a7, 1021 + movgr2fr.d $fa0, $a7 vldi $vr1, -928 movgr2fr.d $fa2, $zero .p2align 4, , 16 .LBB10_4: # =>This Inner Loop Header: Depth=1 ld.w $a7, $a2, 0 ld.w $t0, $a1, 0 - ld.w $t1, $t5, 0 + ld.w $t1, $t6, 0 alsl.d $t2, $a7, $s3, 4 slli.d $a7, $a7, 4 alsl.d $t3, $t0, $s3, 4 @@ -3904,7 +3903,7 @@ _ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii: # @_ZN4Mesh8calcVolsEPK7double2S2_ add.w $a0, $a0, $a7 addi.d $a1, $a1, 4 addi.d $a2, $a2, 4 - addi.d $t5, $t5, 4 + addi.d $t6, $t6, 4 addi.d $t4, $t4, 8 addi.d $a6, $a6, -1 addi.d $a5, $a5, 8 @@ -3912,9 +3911,9 @@ _ZN4Mesh8calcVolsEPK7double2S2_PdS3_S3_S3_ii: # @_ZN4Mesh8calcVolsEPK7double2S2_ # %bb.5: # %._crit_edge beqz $a0, .LBB10_7 # %bb.6: - ld.w $a1, $fp, 80 + ld.w $a1, $t5, 80 add.d $a0, $a1, $a0 - st.w $a0, $fp, 80 + st.w $a0, $t5, 80 .LBB10_7: # %._crit_edge.thread ld.d $s8, $sp, 24 # 8-byte Folded Reload ld.d $s7, $sp, 32 # 8-byte Folded Reload @@ -4246,12 +4245,7 @@ _ZN4Mesh5writeERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEidPKdS9_S9_ .size _ZN4Mesh5writeERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEidPKdS9_S9_, .Lfunc_end14-_ZN4Mesh5writeERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEidPKdS9_S9_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN4Mesh9getXPlaneEd -.LCPI15_0: - .dword 0x3d719799812dea11 # double 9.9999999999999998E-13 - .text - .globl _ZN4Mesh9getXPlaneEd + .globl _ZN4Mesh9getXPlaneEd # -- Begin function _ZN4Mesh9getXPlaneEd .p2align 5 .type _ZN4Mesh9getXPlaneEd,@function _ZN4Mesh9getXPlaneEd: # @_ZN4Mesh9getXPlaneEd @@ -4303,8 +4297,11 @@ _ZN4Mesh9getXPlaneEd: # @_ZN4Mesh9getXPlaneEd move $a1, $zero move $s6, $zero move $s1, $zero - pcalau12i $a2, %pc_hi20(.LCPI15_0) - fld.d $fs1, $a2, %pc_lo12(.LCPI15_0) + lu12i.w $a2, -519458 + ori $a2, $a2, 2577 + lu32i.d $a2, 104345 + lu52i.d $a2, $a2, 983 + movgr2fr.d $fs1, $a2 addi.w $a2, $zero, -4 lu52i.d $a2, $a2, 2047 st.d $a2, $sp, 16 # 8-byte Folded Spill @@ -4467,12 +4464,8 @@ GCC_except_table15: .Lcst_end4: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN4Mesh9getYPlaneEd -.LCPI16_0: - .dword 0x3d719799812dea11 # double 9.9999999999999998E-13 .text - .globl _ZN4Mesh9getYPlaneEd + .globl _ZN4Mesh9getYPlaneEd # -- Begin function _ZN4Mesh9getYPlaneEd .p2align 5 .type _ZN4Mesh9getYPlaneEd,@function _ZN4Mesh9getYPlaneEd: # @_ZN4Mesh9getYPlaneEd @@ -4524,8 +4517,11 @@ _ZN4Mesh9getYPlaneEd: # @_ZN4Mesh9getYPlaneEd move $s5, $zero move $s1, $zero ori $s6, $zero, 8 - pcalau12i $a2, %pc_hi20(.LCPI16_0) - fld.d $fs1, $a2, %pc_lo12(.LCPI16_0) + lu12i.w $a2, -519458 + ori $a2, $a2, 2577 + lu32i.d $a2, 104345 + lu52i.d $a2, $a2, 983 + movgr2fr.d $fs1, $a2 addi.w $a2, $zero, -4 lu52i.d $a2, $a2, 2047 st.d $a2, $sp, 16 # 8-byte Folded Spill diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/PolyGas.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/PolyGas.s index 1a16396e..4f601c1e 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/PolyGas.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/PolyGas.s @@ -1,10 +1,6 @@ .file "PolyGas.cc" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN7PolyGasC2EPK9InputFileP5Hydro -.LCPI0_0: - .dword 0x3ffaaaaaaaaaaaab # double 1.6666666666666667 .text - .globl _ZN7PolyGasC2EPK9InputFileP5Hydro + .globl _ZN7PolyGasC2EPK9InputFileP5Hydro # -- Begin function _ZN7PolyGasC2EPK9InputFileP5Hydro .p2align 5 .type _ZN7PolyGasC2EPK9InputFileP5Hydro,@function _ZN7PolyGasC2EPK9InputFileP5Hydro: # @_ZN7PolyGasC2EPK9InputFileP5Hydro @@ -38,8 +34,11 @@ _ZN7PolyGasC2EPK9InputFileP5Hydro: # @_ZN7PolyGasC2EPK9InputFileP5Hydro st.d $a0, $sp, 24 st.b $zero, $sp, 37 .Ltmp0: # EH_LABEL - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, -349526 + ori $a0, $a0, 2731 + lu32i.d $a0, -349526 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa0, $a0 addi.d $a1, $sp, 16 move $a0, $s0 pcaddu18i $ra, %call36(_ZNK9InputFile9getDoubleERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEd) @@ -138,12 +137,8 @@ GCC_except_table0: .Lcst_end0: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN7PolyGas15calcStateAtHalfEPKdS1_S1_S1_S1_S1_dPdS2_ii -.LCPI1_0: - .dword 0x2b617f7d4ed8c33e # double 1.0E-99 .text - .globl _ZN7PolyGas15calcStateAtHalfEPKdS1_S1_S1_S1_S1_dPdS2_ii + .globl _ZN7PolyGas15calcStateAtHalfEPKdS1_S1_S1_S1_S1_dPdS2_ii # -- Begin function _ZN7PolyGas15calcStateAtHalfEPKdS1_S1_S1_S1_S1_dPdS2_ii .p2align 5 .type _ZN7PolyGas15calcStateAtHalfEPKdS1_S1_S1_S1_S1_dPdS2_ii,@function _ZN7PolyGas15calcStateAtHalfEPKdS1_S1_S1_S1_S1_dPdS2_ii: # @_ZN7PolyGas15calcStateAtHalfEPKdS1_S1_S1_S1_S1_dPdS2_ii @@ -187,16 +182,19 @@ _ZN7PolyGas15calcStateAtHalfEPKdS1_S1_S1_S1_S1_dPdS2_ii: # @_ZN7PolyGas15calcSta st.d $s2, $sp, 48 # 8-byte Folded Spill st.d $s1, $sp, 56 # 8-byte Folded Spill vldi $vr0, -928 + fld.d $fa1, $s6, 8 + fld.d $fa2, $s6, 16 fmul.d $ft2, $fs0, $fa0 - fld.d $fa0, $s6, 8 - fld.d $fa1, $s6, 16 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.d $fa2, $a1, %pc_lo12(.LCPI1_0) - vldi $vr3, -784 - fadd.d $fs0, $fa0, $fa3 - fmul.d $fa0, $fa1, $fa1 - fcmp.clt.d $fcc0, $fa0, $fa2 - fsel $fs1, $fa0, $fa2, $fcc0 + vldi $vr0, -784 + fadd.d $fs0, $fa1, $fa0 + fmul.d $fa0, $fa2, $fa2 + lu12i.w $a1, 322956 + ori $a1, $a1, 830 + lu32i.d $a1, 98173 + lu52i.d $a1, $a1, 694 + movgr2fr.d $fa1, $a1 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fs1, $fa0, $fa1, $fcc0 alsl.d $s6, $s0, $s7, 3 alsl.d $s8, $s0, $fp, 3 ld.d $a1, $sp, 64 # 8-byte Folded Reload @@ -425,12 +423,7 @@ _ZN7PolyGas15calcStateAtHalfEPKdS1_S1_S1_S1_S1_dPdS2_ii: # @_ZN7PolyGas15calcSta .Lfunc_end1: .size _ZN7PolyGas15calcStateAtHalfEPKdS1_S1_S1_S1_S1_dPdS2_ii, .Lfunc_end1-_ZN7PolyGas15calcStateAtHalfEPKdS1_S1_S1_S1_S1_dPdS2_ii # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN7PolyGas7calcEOSEPKdS1_PdS2_S2_ii -.LCPI2_0: - .dword 0x2b617f7d4ed8c33e # double 1.0E-99 - .text - .globl _ZN7PolyGas7calcEOSEPKdS1_PdS2_S2_ii + .globl _ZN7PolyGas7calcEOSEPKdS1_PdS2_S2_ii # -- Begin function _ZN7PolyGas7calcEOSEPKdS1_PdS2_S2_ii .p2align 5 .type _ZN7PolyGas7calcEOSEPKdS1_PdS2_S2_ii,@function _ZN7PolyGas7calcEOSEPKdS1_PdS2_S2_ii: # @_ZN7PolyGas7calcEOSEPKdS1_PdS2_S2_ii @@ -450,13 +443,16 @@ _ZN7PolyGas7calcEOSEPKdS1_PdS2_S2_ii: # @_ZN7PolyGas7calcEOSEPKdS1_PdS2_S2_ii fst.d $fs2, $sp, 16 # 8-byte Folded Spill fld.d $fa0, $a0, 8 fld.d $fa1, $a0, 16 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI2_0) - vldi $vr3, -784 - fadd.d $fs0, $fa0, $fa3 + vldi $vr2, -784 + fadd.d $fs0, $fa0, $fa2 fmul.d $fa0, $fa1, $fa1 - fcmp.clt.d $fcc0, $fa0, $fa2 - fsel $fs1, $fa0, $fa2, $fcc0 + lu12i.w $a0, 322956 + ori $a0, $a0, 830 + lu32i.d $a0, 98173 + lu52i.d $a0, $a0, 694 + movgr2fr.d $fa1, $a0 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fs1, $fa0, $fa1, $fcc0 alsl.d $fp, $a6, $a1, 3 alsl.d $s0, $a6, $a2, 3 alsl.d $s1, $a6, $a3, 3 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/QCS.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/QCS.s index c15e0ba7..a1c3390e 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/QCS.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/PENNANT/CMakeFiles/PENNANT.dir/QCS.s @@ -1,10 +1,6 @@ .file "QCS.cc" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN3QCSC2EPK9InputFileP5Hydro -.LCPI0_0: - .dword 0x3ffaaaaaaaaaaaab # double 1.6666666666666667 .text - .globl _ZN3QCSC2EPK9InputFileP5Hydro + .globl _ZN3QCSC2EPK9InputFileP5Hydro # -- Begin function _ZN3QCSC2EPK9InputFileP5Hydro .p2align 5 .type _ZN3QCSC2EPK9InputFileP5Hydro,@function _ZN3QCSC2EPK9InputFileP5Hydro: # @_ZN3QCSC2EPK9InputFileP5Hydro @@ -40,8 +36,11 @@ _ZN3QCSC2EPK9InputFileP5Hydro: # @_ZN3QCSC2EPK9InputFileP5Hydro st.d $a0, $sp, 16 st.b $zero, $sp, 30 .Ltmp0: # EH_LABEL - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, -349526 + ori $a0, $a0, 2731 + lu32i.d $a0, -349526 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa0, $a0 addi.d $a1, $sp, 8 move $a0, $s0 pcaddu18i $ra, %call36(_ZNK9InputFile9getDoubleERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEd) @@ -296,12 +295,7 @@ _ZN3QCS9calcForceEP7double2ii: # @_ZN3QCS9calcForceEP7double2ii .Lfunc_end2: .size _ZN3QCS9calcForceEP7double2ii, .Lfunc_end2-_ZN3QCS9calcForceEP7double2ii # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN3QCS12setCornerDivEPdS0_S0_S0_S0_ii -.LCPI3_0: - .dword 0x3d719799812dea11 # double 9.9999999999999998E-13 - .text - .globl _ZN3QCS12setCornerDivEPdS0_S0_S0_S0_ii + .globl _ZN3QCS12setCornerDivEPdS0_S0_S0_S0_ii # -- Begin function _ZN3QCS12setCornerDivEPdS0_S0_S0_S0_ii .p2align 5 .type _ZN3QCS12setCornerDivEPdS0_S0_S0_S0_ii,@function _ZN3QCS12setCornerDivEPdS0_S0_S0_S0_ii: # @_ZN3QCS12setCornerDivEPdS0_S0_S0_S0_ii @@ -461,9 +455,12 @@ _ZN3QCS12setCornerDivEPdS0_S0_S0_S0_ii: # @_ZN3QCS12setCornerDivEPdS0_S0_S0_S0_i alsl.d $s7, $a6, $t8, 2 alsl.d $s5, $a6, $ra, 2 vldi $vr23, -928 - pcalau12i $a1, %pc_hi20(.LCPI3_0) - fld.d $ft12, $a1, %pc_lo12(.LCPI3_0) - movgr2fr.d $ft13, $zero + movgr2fr.d $ft12, $zero + lu12i.w $a1, -519458 + ori $a1, $a1, 2577 + lu32i.d $a1, 104345 + lu52i.d $a1, $a1, 983 + movgr2fr.d $ft13, $a1 vldi $vr22, -944 .p2align 4, , 16 .LBB3_10: # =>This Inner Loop Header: Depth=1 @@ -524,8 +521,8 @@ _ZN3QCS12setCornerDivEPdS0_S0_S0_S0_ii: # @_ZN3QCS12setCornerDivEPdS0_S0_S0_S0_i fcmp.clt.d $fcc0, $ft10, $ft9 fsel $ft11, $ft9, $ft10, $fcc0 fst.d $ft11, $sp, 112 # 8-byte Folded Spill - fcmp.clt.d $fcc0, $ft11, $ft12 - fmov.d $ft11, $ft13 + fcmp.clt.d $fcc0, $ft11, $ft13 + fmov.d $ft11, $ft12 bcnez $fcc0, .LBB3_12 # %bb.11: # in Loop: Header=BB3_10 Depth=1 fsub.d $ft11, $fa4, $fa1 @@ -940,12 +937,7 @@ _ZN3QCS11setQCnForceEPKdS1_S1_P7double2ii: # @_ZN3QCS11setQCnForceEPKdS1_S1_P7do .size _ZN3QCS11setQCnForceEPKdS1_S1_P7double2ii, .Lfunc_end4-_ZN3QCS11setQCnForceEPKdS1_S1_P7double2ii .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN3QCS8setForceEPKdPK7double2PdPS2_ii -.LCPI5_0: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 - .text - .globl _ZN3QCS8setForceEPKdPK7double2PdPS2_ii + .globl _ZN3QCS8setForceEPKdPK7double2PdPS2_ii # -- Begin function _ZN3QCS8setForceEPKdPK7double2PdPS2_ii .p2align 5 .type _ZN3QCS8setForceEPKdPK7double2PdPS2_ii,@function _ZN3QCS8setForceEPKdPK7double2PdPS2_ii: # @_ZN3QCS8setForceEPKdPK7double2PdPS2_ii @@ -989,10 +981,13 @@ _ZN3QCS8setForceEPKdPK7double2PdPS2_ii: # @_ZN3QCS8setForceEPKdPK7double2PdPS2_i jirl $ra, $ra, 0 bge $fp, $s2, .LBB5_7 # %bb.1: # %.lr.ph.preheader - pcalau12i $a1, %pc_hi20(.LCPI5_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI5_0) - vldi $vr1, -912 - movgr2fr.d $fa2, $zero + vldi $vr0, -912 + movgr2fr.d $fa1, $zero + lu12i.w $a1, -85564 + ori $a1, $a1, 813 + lu32i.d $a1, -379166 + lu52i.d $a1, $a1, 1009 + movgr2fr.d $fa2, $a1 move $a1, $s0 move $a2, $a0 b .LBB5_3 @@ -1010,10 +1005,10 @@ _ZN3QCS8setForceEPKdPK7double2PdPS2_ii: # @_ZN3QCS8setForceEPKdPK7double2PdPS2_i # =>This Inner Loop Header: Depth=1 fld.d $fa3, $a1, 0 fneg.d $fa4, $fa3 - fmadd.d $fa5, $fa4, $fa3, $fa1 - fcmp.clt.d $fcc0, $fa5, $fa0 - fmov.d $fa4, $fa2 - fmov.d $fa6, $fa2 + fmadd.d $fa5, $fa4, $fa3, $fa0 + fcmp.clt.d $fcc0, $fa5, $fa2 + fmov.d $fa4, $fa1 + fmov.d $fa6, $fa1 bcnez $fcc0, .LBB5_2 # %bb.4: # in Loop: Header=BB5_3 Depth=1 fld.d $fa4, $s4, 0 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/main.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/main.s index 2155dba4..ed0f6f44 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/main.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/main.s @@ -3286,22 +3286,12 @@ GCC_except_table6: .Lcst_end3: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN6miniFE6driverIdiiEEiRK3BoxRS1_RNS_10ParametersER8YAML_Doc -.LCPI7_0: - .dword 0x3faeb851eb851eb8 # double 0.059999999999999998 -.LCPI7_2: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI7_3: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI7_4: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI7_1: + .p2align 4, 0x0 # -- Begin function _ZN6miniFE6driverIdiiEEiRK3BoxRS1_RNS_10ParametersER8YAML_Doc +.LCPI7_0: .dword 8 # 0x8 .dword 7311150271011243843 # 0x65766c6f73204743 -.LCPI7_5: +.LCPI7_1: .dword 8 # 0x8 .dword 7308613580332748612 # 0x656d695420544f44 .section .text._ZN6miniFE6driverIdiiEEiRK3BoxRS1_RNS_10ParametersER8YAML_Doc,"axG",@progbits,_ZN6miniFE6driverIdiiEEiRK3BoxRS1_RNS_10ParametersER8YAML_Doc,comdat @@ -4089,8 +4079,11 @@ _ZN6miniFE6driverIdiiEEiRK3BoxRS1_RNS_10ParametersER8YAML_Doc: # @_ZN6miniFE6dri .Ltmp384: # EH_LABEL # %bb.98: # %_ZNSolsEPFRSoS_E.exit396 .Ltmp385: # EH_LABEL - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_0) + lu12i.w $a0, -83887 + ori $a0, $a0, 3768 + lu32i.d $a0, -83887 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fa0, $a0 addi.d $a0, $sp, 488 addi.d $a1, $sp, 288 move $a2, $zero @@ -4100,8 +4093,8 @@ _ZN6miniFE6driverIdiiEEiRK3BoxRS1_RNS_10ParametersER8YAML_Doc: # @_ZN6miniFE6dri # %bb.99: st.d $a0, $sp, 24 # 8-byte Folded Spill .LBB7_100: # %._crit_edge.i.i397 - pcalau12i $a0, %pc_hi20(.LCPI7_1) - vld $vr0, $a0, %pc_lo12(.LCPI7_1) + pcalau12i $a0, %pc_hi20(.LCPI7_0) + vld $vr0, $a0, %pc_lo12(.LCPI7_0) st.d $s5, $sp, 256 vst $vr0, $sp, 264 st.b $zero, $sp, 280 @@ -4470,12 +4463,13 @@ _ZN6miniFE6driverIdiiEEiRK3BoxRS1_RNS_10ParametersER8YAML_Doc: # @_ZN6miniFE6dri ld.d $a1, $sp, 48 # 8-byte Folded Reload mul.d $a0, $a0, $a1 srli.d $a1, $s2, 32 - pcalau12i $a2, %pc_hi20(.LCPI7_2) - fld.d $fa0, $a2, %pc_lo12(.LCPI7_2) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a1, 275200 bstrins.d $s2, $a1, 63, 32 movgr2fr.d $fa1, $s2 @@ -4493,18 +4487,24 @@ _ZN6miniFE6driverIdiiEEiRK3BoxRS1_RNS_10ParametersER8YAML_Doc: # @_ZN6miniFE6dri slli.d $a1, $a0, 1 movgr2fr.w $fa0, $a1 ffint.d.w $fa4, $fa0 - fld.d $fa6, $sp, 208 - pcalau12i $a1, %pc_hi20(.LCPI7_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI7_3) alsl.d $a0, $a0, $a0, 1 addi.d $a0, $a0, 2 movgr2fr.w $fa5, $a0 + fld.d $fa6, $sp, 208 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa0, $a0 fcmp.cule.d $fcc0, $fa6, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI7_4) + lu12i.w $a0, -390306 bcnez $fcc0, .LBB7_143 # %bb.142: - fld.d $fa7, $a0, %pc_lo12(.LCPI7_4) fdiv.d $fa6, $fs2, $fa6 + ori $a1, $a0, 3469 + lu32i.d $a1, 50935 + lu52i.d $a1, $a1, 1003 + movgr2fr.d $fa7, $a1 fmul.d $fa6, $fa6, $fa7 b .LBB7_144 .LBB7_143: @@ -4518,9 +4518,12 @@ _ZN6miniFE6driverIdiiEEiRK3BoxRS1_RNS_10ParametersER8YAML_Doc: # @_ZN6miniFE6dri ffint.d.w $fa2, $fa5 bcnez $fcc0, .LBB7_146 # %bb.145: - fld.d $fa3, $a0, %pc_lo12(.LCPI7_4) - fdiv.d $fa4, $fs3, $fa6 - fmul.d $fa3, $fa4, $fa3 + fdiv.d $fa3, $fs3, $fa6 + ori $a1, $a0, 3469 + lu32i.d $a1, 50935 + lu52i.d $a1, $a1, 1003 + movgr2fr.d $fa4, $a1 + fmul.d $fa3, $fa3, $fa4 b .LBB7_147 .LBB7_146: vldi $vr3, -784 @@ -4532,9 +4535,12 @@ _ZN6miniFE6driverIdiiEEiRK3BoxRS1_RNS_10ParametersER8YAML_Doc: # @_ZN6miniFE6dri fadd.d $fa1, $fs2, $fs3 bcnez $fcc0, .LBB7_149 # %bb.148: - fld.d $fa2, $a0, %pc_lo12(.LCPI7_4) - fdiv.d $fa3, $fs4, $fa3 - fmul.d $fa2, $fa3, $fa2 + fdiv.d $fa2, $fs4, $fa3 + ori $a1, $a0, 3469 + lu32i.d $a1, 50935 + lu52i.d $a1, $a1, 1003 + movgr2fr.d $fa3, $a1 + fmul.d $fa2, $fa2, $fa3 b .LBB7_150 .LBB7_149: vldi $vr2, -784 @@ -4545,9 +4551,12 @@ _ZN6miniFE6driverIdiiEEiRK3BoxRS1_RNS_10ParametersER8YAML_Doc: # @_ZN6miniFE6dri fadd.d $fs1, $fa1, $fs4 bcnez $fcc0, .LBB7_152 # %bb.151: - fld.d $fa0, $a0, %pc_lo12(.LCPI7_4) - fdiv.d $fa1, $fs1, $fa2 - fmul.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fs1, $fa2 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 b .LBB7_153 .LBB7_152: vldi $vr0, -784 @@ -4704,8 +4713,8 @@ _ZN6miniFE6driverIdiiEEiRK3BoxRS1_RNS_10ParametersER8YAML_Doc: # @_ZN6miniFE6dri jirl $ra, $ra, 0 .Ltmp465: # EH_LABEL # %bb.171: # %._crit_edge.i.i571 - pcalau12i $a1, %pc_hi20(.LCPI7_5) - vld $vr1, $a1, %pc_lo12(.LCPI7_5) + pcalau12i $a1, %pc_hi20(.LCPI7_1) + vld $vr1, $a1, %pc_lo12(.LCPI7_1) fld.d $fa0, $sp, 200 st.d $s4, $sp, 160 vst $vr1, $sp, 168 @@ -6282,16 +6291,8 @@ __clang_call_terminate: # @__clang_call_terminate .Lfunc_end8: .size __clang_call_terminate, .Lfunc_end8-__clang_call_terminate # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN6miniFE17compute_imbalanceIiEEvRK3BoxS3_RfS4_R8YAML_Docb -.LCPI9_0: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI9_1: - .word 0x42c80000 # float 100 .section .text._ZN6miniFE17compute_imbalanceIiEEvRK3BoxS3_RfS4_R8YAML_Docb,"axG",@progbits,_ZN6miniFE17compute_imbalanceIiEEvRK3BoxS3_RfS4_R8YAML_Docb,comdat - .weak _ZN6miniFE17compute_imbalanceIiEEvRK3BoxS3_RfS4_R8YAML_Docb + .weak _ZN6miniFE17compute_imbalanceIiEEvRK3BoxS3_RfS4_R8YAML_Docb # -- Begin function _ZN6miniFE17compute_imbalanceIiEEvRK3BoxS3_RfS4_R8YAML_Docb .p2align 5 .type _ZN6miniFE17compute_imbalanceIiEEvRK3BoxS3_RfS4_R8YAML_Docb,@function _ZN6miniFE17compute_imbalanceIiEEvRK3BoxS3_RfS4_R8YAML_Docb: # @_ZN6miniFE17compute_imbalanceIiEEvRK3BoxS3_RfS4_R8YAML_Docb @@ -6334,12 +6335,15 @@ _ZN6miniFE17compute_imbalanceIiEEvRK3BoxS3_RfS4_R8YAML_Docb: # @_ZN6miniFE17comp mulw.d.w $a0, $a6, $a0 mul.d $a0, $a0, $a1 movgr2fr.w $fa0, $a0 - pcalau12i $a1, %pc_hi20(.LCPI9_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI9_0) ffint.s.w $fa0, $fa0 - fabs.s $fa2, $fa0 - fcvt.d.s $fa2, $fa2 - fcmp.cule.d $fcc0, $fa2, $fa1 + fabs.s $fa1, $fa0 + fcvt.d.s $fa1, $fa1 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa2, $a1 + fcmp.cule.d $fcc0, $fa1, $fa2 move $s0, $a4 move $fp, $a3 move $s1, $a2 @@ -6355,11 +6359,11 @@ _ZN6miniFE17compute_imbalanceIiEEvRK3BoxS3_RfS4_R8YAML_Docb: # @_ZN6miniFE17comp ffint.s.w $fa1, $fa1 srai.d $a1, $a0, 63 andn $a0, $a0, $a1 - pcalau12i $a1, %pc_hi20(.LCPI9_1) - fld.s $fa2, $a1, %pc_lo12(.LCPI9_1) fsub.s $fa1, $fa1, $fa0 fabs.s $fa1, $fa1 fdiv.s $fa1, $fa1, $fa0 + lu12i.w $a1, 273536 + movgr2fr.w $fa2, $a1 fmul.s $fa1, $fa1, $fa2 movgr2fr.w $fa3, $a0 ffint.s.w $fa3, $fa3 @@ -10000,16 +10004,8 @@ _ZN6miniFE16impose_dirichletINS_9CSRMatrixIdiiEENS_6VectorIdiiEEEEvNT_10ScalarTy .size _ZN6miniFE16impose_dirichletINS_9CSRMatrixIdiiEENS_6VectorIdiiEEEEvNT_10ScalarTypeERS5_RT0_iiiRKSt3setINS5_17GlobalOrdinalTypeESt4lessISB_ESaISB_EE, .Lfunc_end12-_ZN6miniFE16impose_dirichletINS_9CSRMatrixIdiiEENS_6VectorIdiiEEEEvNT_10ScalarTypeERS5_RT0_iiiRKSt3setINS5_17GlobalOrdinalTypeESt4lessISB_ESaISB_EE .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN6miniFE20compute_matrix_statsINS_9CSRMatrixIdiiEEEEmRKT_iiR8YAML_Doc -.LCPI13_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI13_1: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI13_2: - .dword 0x3e10000000000000 # double 9.3132257461547852E-10 .section .text._ZN6miniFE20compute_matrix_statsINS_9CSRMatrixIdiiEEEEmRKT_iiR8YAML_Doc,"axG",@progbits,_ZN6miniFE20compute_matrix_statsINS_9CSRMatrixIdiiEEEEmRKT_iiR8YAML_Doc,comdat - .weak _ZN6miniFE20compute_matrix_statsINS_9CSRMatrixIdiiEEEEmRKT_iiR8YAML_Doc + .weak _ZN6miniFE20compute_matrix_statsINS_9CSRMatrixIdiiEEEEmRKT_iiR8YAML_Doc # -- Begin function _ZN6miniFE20compute_matrix_statsINS_9CSRMatrixIdiiEEEEmRKT_iiR8YAML_Doc .p2align 5 .type _ZN6miniFE20compute_matrix_statsINS_9CSRMatrixIdiiEEEEmRKT_iiR8YAML_Doc,@function _ZN6miniFE20compute_matrix_statsINS_9CSRMatrixIdiiEEEEmRKT_iiR8YAML_Doc: # @_ZN6miniFE20compute_matrix_statsINS_9CSRMatrixIdiiEEEEmRKT_iiR8YAML_Doc @@ -10054,18 +10050,19 @@ _ZN6miniFE20compute_matrix_statsINS_9CSRMatrixIdiiEEEEmRKT_iiR8YAML_Doc: # @_ZN6 ld.w $a4, $a4, -4 move $s0, $a3 srli.d $a3, $a4, 32 - pcalau12i $a5, %pc_hi20(.LCPI13_0) - fld.d $fs3, $a5, %pc_lo12(.LCPI13_0) lu52i.d $s7, $zero, 1107 or $a3, $a3, $s7 movgr2fr.d $fa0, $a3 + lu12i.w $a3, 256 + lu52i.d $a3, $a3, 1107 + movgr2fr.d $fs3, $a3 fsub.d $fa0, $fa0, $fs3 lu12i.w $s6, 275200 - pcalau12i $a3, %pc_hi20(.LCPI13_1) - fld.d $fs1, $a3, %pc_lo12(.LCPI13_1) bstrins.d $a4, $s6, 63, 32 movgr2fr.d $fa1, $a4 fadd.d $fs0, $fa1, $fa0 + lu52i.d $a3, $zero, 1086 + movgr2fr.d $fs1, $a3 fcmp.clt.d $fcc0, $fs0, $fs1 ftintrz.l.d $fa0, $fs0 movfr2gr.d $a3, $fa0 @@ -10302,11 +10299,11 @@ _ZN6miniFE20compute_matrix_statsINS_9CSRMatrixIdiiEEEEmRKT_iiR8YAML_Doc: # @_ZN6 jirl $ra, $ra, 0 .Ltmp777: # EH_LABEL # %bb.24: # %.noexc163 - pcalau12i $a1, %pc_hi20(.LCPI13_2) - fld.d $fa0, $a1, %pc_lo12(.LCPI13_2) srli.d $s2, $s8, 2 - movgr2fr.w $fa1, $s2 - ffint.d.w $fs2, $fa1 + movgr2fr.w $fa0, $s2 + ffint.d.w $fs2, $fa0 + lu52i.d $a1, $zero, 993 + movgr2fr.d $fa0, $a1 fmul.d $fa1, $fs2, $fa0 vldi $vr2, -1008 fmul.d $fa1, $fa1, $fa2 @@ -12220,16 +12217,8 @@ GCC_except_table14: .Lcst_end9: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd -.LCPI15_0: - .dword 0x3cb0000000000000 # double 2.2204460492503131E-16 -.LCPI15_1: - .dword 0x4059000000000000 # double 100 -.LCPI15_2: - .dword 0x3fb999999999999a # double 0.10000000000000001 .section .text._ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd,"axG",@progbits,_ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd,comdat - .weak _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd + .weak _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd # -- Begin function _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd .p2align 5 .type _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd,@function _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd: # @_ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd @@ -12238,27 +12227,27 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception10 # %bb.0: - addi.d $sp, $sp, -368 - .cfi_def_cfa_offset 368 - st.d $ra, $sp, 360 # 8-byte Folded Spill - st.d $fp, $sp, 352 # 8-byte Folded Spill - st.d $s0, $sp, 344 # 8-byte Folded Spill - st.d $s1, $sp, 336 # 8-byte Folded Spill - st.d $s2, $sp, 328 # 8-byte Folded Spill - st.d $s3, $sp, 320 # 8-byte Folded Spill - st.d $s4, $sp, 312 # 8-byte Folded Spill - st.d $s5, $sp, 304 # 8-byte Folded Spill - st.d $s6, $sp, 296 # 8-byte Folded Spill - st.d $s7, $sp, 288 # 8-byte Folded Spill - st.d $s8, $sp, 280 # 8-byte Folded Spill - fst.d $fs0, $sp, 272 # 8-byte Folded Spill - fst.d $fs1, $sp, 264 # 8-byte Folded Spill - fst.d $fs2, $sp, 256 # 8-byte Folded Spill - fst.d $fs3, $sp, 248 # 8-byte Folded Spill - fst.d $fs4, $sp, 240 # 8-byte Folded Spill - fst.d $fs5, $sp, 232 # 8-byte Folded Spill - fst.d $fs6, $sp, 224 # 8-byte Folded Spill - fst.d $fs7, $sp, 216 # 8-byte Folded Spill + addi.d $sp, $sp, -384 + .cfi_def_cfa_offset 384 + st.d $ra, $sp, 376 # 8-byte Folded Spill + st.d $fp, $sp, 368 # 8-byte Folded Spill + st.d $s0, $sp, 360 # 8-byte Folded Spill + st.d $s1, $sp, 352 # 8-byte Folded Spill + st.d $s2, $sp, 344 # 8-byte Folded Spill + st.d $s3, $sp, 336 # 8-byte Folded Spill + st.d $s4, $sp, 328 # 8-byte Folded Spill + st.d $s5, $sp, 320 # 8-byte Folded Spill + st.d $s6, $sp, 312 # 8-byte Folded Spill + st.d $s7, $sp, 304 # 8-byte Folded Spill + st.d $s8, $sp, 296 # 8-byte Folded Spill + fst.d $fs0, $sp, 288 # 8-byte Folded Spill + fst.d $fs1, $sp, 280 # 8-byte Folded Spill + fst.d $fs2, $sp, 272 # 8-byte Folded Spill + fst.d $fs3, $sp, 264 # 8-byte Folded Spill + fst.d $fs4, $sp, 256 # 8-byte Folded Spill + fst.d $fs5, $sp, 248 # 8-byte Folded Spill + fst.d $fs6, $sp, 240 # 8-byte Folded Spill + fst.d $fs7, $sp, 232 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -12278,12 +12267,12 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 .cfi_offset 61, -136 .cfi_offset 62, -144 .cfi_offset 63, -152 - ld.d $fp, $sp, 368 + ld.d $fp, $sp, 384 move $s1, $a7 - st.d $a6, $sp, 152 # 8-byte Folded Spill - st.d $a5, $sp, 160 # 8-byte Folded Spill + st.d $a6, $sp, 168 # 8-byte Folded Spill + st.d $a5, $sp, 176 # 8-byte Folded Spill move $s0, $a4 - st.d $a2, $sp, 168 # 8-byte Folded Spill + st.d $a2, $sp, 184 # 8-byte Folded Spill move $s2, $a1 move $s7, $a0 pcaddu18i $ra, %call36(_ZN6miniFE7mytimerEv) @@ -12305,7 +12294,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 srai.d $s5, $a0, 32 slli.d $s4, $s5, 3 addi.d $fp, $s5, -1 - st.d $s2, $sp, 208 # 8-byte Folded Spill + st.d $s2, $sp, 224 # 8-byte Folded Spill beqz $s6, .LBB15_18 # %bb.3: # %.noexc7.i move $a0, $s4 @@ -12314,7 +12303,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 alsl.d $a1, $s5, $a0, 3 st.d $a1, $sp, 24 # 8-byte Folded Spill st.d $zero, $a0, 0 - st.d $a0, $sp, 184 # 8-byte Folded Spill + st.d $a0, $sp, 200 # 8-byte Folded Spill addi.d $s3, $a0, 8 beqz $fp, .LBB15_5 # %bb.4: # %_ZSt6fill_nIPdmdET_S1_T0_RKT1_.exit.loopexit.i.i.i.i.i.i @@ -12325,7 +12314,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 jirl $ra, $ra, 0 alsl.d $s3, $fp, $s3, 3 .LBB15_5: # %.lr.ph.i - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload move $a1, $zero move $a2, $s4 pcaddu18i $ra, %call36(memset) @@ -12333,10 +12322,10 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 bltz $s8, .LBB15_19 .LBB15_6: # %_ZNSt6vectorIdSaIdEE17_S_check_init_lenEmRKS0_.exit.i.i138 move $s2, $s1 - st.d $s0, $sp, 192 # 8-byte Folded Spill + st.d $s0, $sp, 208 # 8-byte Folded Spill beqz $s8, .LBB15_23 # %bb.7: # %.noexc7.i140 - st.d $s3, $sp, 200 # 8-byte Folded Spill + st.d $s3, $sp, 216 # 8-byte Folded Spill slli.d $s1, $s8, 3 .Ltmp888: # EH_LABEL move $a0, $s1 @@ -12359,14 +12348,14 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 jirl $ra, $ra, 0 alsl.d $s3, $s8, $s3, 3 .LBB15_10: # %.lr.ph.i143 - st.d $s3, $sp, 128 # 8-byte Folded Spill - ld.d $s8, $sp, 184 # 8-byte Folded Reload + st.d $s3, $sp, 144 # 8-byte Folded Spill + ld.d $s8, $sp, 200 # 8-byte Folded Reload move $a0, $s0 move $a1, $zero move $a2, $s1 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.d $s3, $sp, 200 # 8-byte Folded Reload + ld.d $s3, $sp, 216 # 8-byte Folded Reload beqz $s6, .LBB15_24 .LBB15_11: # %.noexc7.i150 .Ltmp890: # EH_LABEL @@ -12424,7 +12413,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 .LBB15_18: st.d $zero, $sp, 24 # 8-byte Folded Spill move $s3, $zero - st.d $zero, $sp, 184 # 8-byte Folded Spill + st.d $zero, $sp, 200 # 8-byte Folded Spill bgez $s8, .LBB15_6 .LBB15_19: # %.noexc.i145 .Ltmp988: # EH_LABEL @@ -12449,33 +12438,33 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 ld.d $a0, $a0, %got_pc_lo12(_ZSt4cerr) pcaddu18i $ra, %call36(_ZNSo3putEc) jirl $ra, $ra, 0 - fld.d $fs7, $sp, 216 # 8-byte Folded Reload - fld.d $fs6, $sp, 224 # 8-byte Folded Reload - fld.d $fs5, $sp, 232 # 8-byte Folded Reload - fld.d $fs4, $sp, 240 # 8-byte Folded Reload - fld.d $fs3, $sp, 248 # 8-byte Folded Reload - fld.d $fs2, $sp, 256 # 8-byte Folded Reload - fld.d $fs1, $sp, 264 # 8-byte Folded Reload - fld.d $fs0, $sp, 272 # 8-byte Folded Reload - ld.d $s8, $sp, 280 # 8-byte Folded Reload - ld.d $s7, $sp, 288 # 8-byte Folded Reload - ld.d $s6, $sp, 296 # 8-byte Folded Reload - ld.d $s5, $sp, 304 # 8-byte Folded Reload - ld.d $s4, $sp, 312 # 8-byte Folded Reload - ld.d $s3, $sp, 320 # 8-byte Folded Reload - ld.d $s2, $sp, 328 # 8-byte Folded Reload - ld.d $s1, $sp, 336 # 8-byte Folded Reload - ld.d $s0, $sp, 344 # 8-byte Folded Reload - ld.d $fp, $sp, 352 # 8-byte Folded Reload - ld.d $ra, $sp, 360 # 8-byte Folded Reload - addi.d $sp, $sp, 368 + fld.d $fs7, $sp, 232 # 8-byte Folded Reload + fld.d $fs6, $sp, 240 # 8-byte Folded Reload + fld.d $fs5, $sp, 248 # 8-byte Folded Reload + fld.d $fs4, $sp, 256 # 8-byte Folded Reload + fld.d $fs3, $sp, 264 # 8-byte Folded Reload + fld.d $fs2, $sp, 272 # 8-byte Folded Reload + fld.d $fs1, $sp, 280 # 8-byte Folded Reload + fld.d $fs0, $sp, 288 # 8-byte Folded Reload + ld.d $s8, $sp, 296 # 8-byte Folded Reload + ld.d $s7, $sp, 304 # 8-byte Folded Reload + ld.d $s6, $sp, 312 # 8-byte Folded Reload + ld.d $s5, $sp, 320 # 8-byte Folded Reload + ld.d $s4, $sp, 328 # 8-byte Folded Reload + ld.d $s3, $sp, 336 # 8-byte Folded Reload + ld.d $s2, $sp, 344 # 8-byte Folded Reload + ld.d $s1, $sp, 352 # 8-byte Folded Reload + ld.d $s0, $sp, 360 # 8-byte Folded Reload + ld.d $fp, $sp, 368 # 8-byte Folded Reload + ld.d $ra, $sp, 376 # 8-byte Folded Reload + addi.d $sp, $sp, 384 pcaddu18i $t8, %call36(_ZNSo5flushEv) jr $t8 .LBB15_23: - st.d $zero, $sp, 128 # 8-byte Folded Spill + st.d $zero, $sp, 144 # 8-byte Folded Spill st.d $zero, $sp, 16 # 8-byte Folded Spill move $s0, $zero - ld.d $s8, $sp, 184 # 8-byte Folded Reload + ld.d $s8, $sp, 200 # 8-byte Folded Reload bnez $s6, .LBB15_11 .LBB15_24: move $s1, $zero @@ -12486,10 +12475,10 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 .Ltmp893: # EH_LABEL pcaddu18i $ra, %call36(_ZN6miniFE7mytimerEv) jirl $ra, $ra, 0 - fst.d $fa0, $sp, 200 # 8-byte Folded Spill + fst.d $fa0, $sp, 216 # 8-byte Folded Spill .Ltmp894: # EH_LABEL # %bb.26: - ld.d $a0, $sp, 168 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload ld.d $a1, $a0, 16 ld.d $a0, $a0, 8 sub.d $a1, $a1, $a0 @@ -12549,7 +12538,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 .Ltmp895: # EH_LABEL pcaddu18i $ra, %call36(_ZN6miniFE7mytimerEv) jirl $ra, $ra, 0 - fst.d $fa0, $sp, 176 # 8-byte Folded Spill + fst.d $fa0, $sp, 192 # 8-byte Folded Spill .Ltmp896: # EH_LABEL # %bb.35: .Ltmp897: # EH_LABEL @@ -12624,7 +12613,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 .Ltmp902: # EH_LABEL # %bb.44: fmov.d $fs6, $fa0 - ld.d $a0, $sp, 208 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.d $a1, $a0, 16 ld.d $a0, $a0, 8 sub.d $a1, $a1, $a0 @@ -12755,8 +12744,8 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 jirl $ra, $ra, 0 .Ltmp920: # EH_LABEL # %bb.65: # %_ZNSolsEPFRSoS_E.exit.preheader - fld.d $fa0, $sp, 200 # 8-byte Folded Reload - fld.d $fa1, $sp, 176 # 8-byte Folded Reload + fld.d $fa0, $sp, 216 # 8-byte Folded Reload + fld.d $fa1, $sp, 192 # 8-byte Folded Reload fsub.d $fa0, $fa1, $fa0 fsub.d $fa1, $fs5, $fs4 fadd.d $fa0, $fa0, $fs3 @@ -12764,11 +12753,11 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 fsub.d $fa3, $fs2, $fs0 movgr2fr.d $fs5, $zero fadd.d $fa1, $fa1, $fs3 - fst.d $fa1, $sp, 200 # 8-byte Folded Spill + fst.d $fa1, $sp, 216 # 8-byte Folded Spill fadd.d $fs7, $fa0, $fa2 fadd.d $fa0, $fa3, $fs5 - fst.d $fa0, $sp, 208 # 8-byte Folded Spill - ld.d $a1, $sp, 192 # 8-byte Folded Reload + fst.d $fa0, $sp, 224 # 8-byte Folded Spill + ld.d $a1, $sp, 208 # 8-byte Folded Reload blez $a1, .LBB15_159 # %bb.66: # %.lr.ph lu12i.w $a0, 419430 @@ -12787,15 +12776,15 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 maskeqz $a0, $a0, $a1 masknez $a1, $a2, $a1 or $a0, $a0, $a1 - st.d $a0, $sp, 120 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill move $a0, $s5 bstrpick.d $s5, $s6, 30, 0 sub.d $a0, $a0, $s1 srli.d $a1, $a0, 3 - st.d $a1, $sp, 144 # 8-byte Folded Spill + st.d $a1, $sp, 160 # 8-byte Folded Spill bstrpick.d $a0, $a0, 33, 3 - st.d $a0, $sp, 136 # 8-byte Folded Spill - ld.d $a0, $sp, 128 # 8-byte Folded Reload + st.d $a0, $sp, 152 # 8-byte Folded Spill + ld.d $a0, $sp, 144 # 8-byte Folded Reload sub.d $a0, $a0, $s0 srli.d $a1, $a0, 3 bstrpick.d $a0, $a0, 33, 3 @@ -12805,20 +12794,31 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 addi.d $a0, $s0, 16 st.d $a0, $sp, 88 # 8-byte Folded Spill addi.d $a0, $s8, 16 - st.d $a0, $sp, 112 # 8-byte Folded Spill - sub.d $a0, $zero, $s5 st.d $a0, $sp, 128 # 8-byte Folded Spill + sub.d $a0, $zero, $s5 + st.d $a0, $sp, 144 # 8-byte Folded Spill addi.d $a0, $s1, 16 st.d $a0, $sp, 72 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI15_0) - st.d $a0, $sp, 104 # 8-byte Folded Spill - fld.d $fa0, $a0, %pc_lo12(.LCPI15_0) - fst.d $fa0, $sp, 176 # 8-byte Folded Spill - vrepli.b $vr0, 0 - vst $vr0, $sp, 32 # 16-byte Folded Spill + lu52i.d $a0, $zero, 971 + movgr2fr.d $fa0, $a0 + vrepli.b $vr1, 0 + vst $vr1, $sp, 32 # 16-byte Folded Spill addi.w $a0, $a1, 0 - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 112 # 8-byte Folded Spill + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa1, $a0 + fst.d $fa1, $sp, 104 # 8-byte Folded Spill + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fa1, $a0 + fst.d $fa1, $sp, 96 # 8-byte Folded Spill ori $a0, $zero, 1 + fst.d $fa0, $sp, 120 # 8-byte Folded Spill + fst.d $fa0, $sp, 192 # 8-byte Folded Spill .p2align 4, , 16 .LBB15_67: # =>This Loop Header: Depth=1 # Child Loop BB15_76 Depth 2 @@ -12836,7 +12836,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 # Child Loop BB15_147 Depth 2 # Child Loop BB15_150 Depth 2 fld.d $fa0, $s2, 0 - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 176 # 8-byte Folded Reload fld.d $fa1, $a1, 0 fcmp.cule.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB15_159 @@ -12871,7 +12871,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 blez $fp, .LBB15_77 # %bb.75: # %.lr.ph.i196.preheader # in Loop: Header=BB15_67 Depth=1 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload move $a1, $s5 .p2align 4, , 16 .LBB15_76: # %.lr.ph.i196 @@ -12908,7 +12908,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 b .LBB15_92 .LBB15_82: # %vector.body510.preheader # in Loop: Header=BB15_67 Depth=1 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a1, $sp, 88 # 8-byte Folded Reload move $a2, $s6 vld $vr4, $sp, 32 # 16-byte Folded Reload @@ -12934,10 +12934,10 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 beq $s5, $s6, .LBB15_87 .LBB15_85: # %.lr.ph.i187.preheader537 # in Loop: Header=BB15_67 Depth=1 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload add.d $a0, $a0, $a2 alsl.d $a1, $a2, $s0, 3 - ld.d $a3, $sp, 184 # 8-byte Folded Reload + ld.d $a3, $sp, 200 # 8-byte Folded Reload alsl.d $a2, $a2, $a3, 3 .p2align 4, , 16 .LBB15_86: # %.lr.ph.i187 @@ -12963,7 +12963,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 .LBB15_89: # %vector.ph520 # in Loop: Header=BB15_67 Depth=1 vreplvei.d $vr1, $vr0, 0 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a1, $sp, 88 # 8-byte Folded Reload move $a2, $s6 .p2align 4, , 16 @@ -12990,10 +12990,10 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 beq $s5, $s6, .LBB15_94 .LBB15_92: # %.lr.ph.i205.preheader538 # in Loop: Header=BB15_67 Depth=1 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload add.d $a0, $a0, $a2 alsl.d $a1, $a2, $s0, 3 - ld.d $a3, $sp, 184 # 8-byte Folded Reload + ld.d $a3, $sp, 200 # 8-byte Folded Reload alsl.d $a2, $a2, $a3, 3 .p2align 4, , 16 .LBB15_93: # %.lr.ph.i205 @@ -13016,9 +13016,9 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 .Ltmp928: # EH_LABEL # %bb.95: # in Loop: Header=BB15_67 Depth=1 fsub.d $fa1, $fs2, $fs0 - fld.d $fa2, $sp, 208 # 8-byte Folded Reload + fld.d $fa2, $sp, 224 # 8-byte Folded Reload fadd.d $fa2, $fa2, $fa1 - fst.d $fa2, $sp, 208 # 8-byte Folded Spill + fst.d $fa2, $sp, 224 # 8-byte Folded Spill fsub.d $fs4, $fa0, $fs3 fmov.d $fs1, $fs6 .LBB15_96: # in Loop: Header=BB15_67 Depth=1 @@ -13028,11 +13028,11 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 # %bb.97: # %.split706 # in Loop: Header=BB15_67 Depth=1 fst.d $fa0, $s2, 0 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload beq $s8, $a0, .LBB15_99 .LBB15_98: # %.split706 # in Loop: Header=BB15_67 Depth=1 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload mod.wu $a0, $s8, $a0 bnez $a0, .LBB15_110 .LBB15_99: # in Loop: Header=BB15_67 Depth=1 @@ -13194,7 +13194,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 .Ltmp956: # EH_LABEL # %bb.119: # in Loop: Header=BB15_67 Depth=1 fmov.d $fs3, $fa0 - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload addi.w $s3, $a0, 0 fmov.d $fs6, $fs5 blez $s3, .LBB15_122 @@ -13202,7 +13202,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 # in Loop: Header=BB15_67 Depth=1 move $a0, $s1 move $a1, $s0 - ld.d $a2, $sp, 136 # 8-byte Folded Reload + ld.d $a2, $sp, 152 # 8-byte Folded Reload fmov.d $fs6, $fs5 .p2align 4, , 16 .LBB15_121: # %.lr.ph.i239 @@ -13225,15 +13225,15 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 # %bb.123: # in Loop: Header=BB15_67 Depth=1 fadd.d $fs7, $fs7, $fs4 fsub.d $fa1, $fs2, $fs0 - fld.d $fa2, $sp, 200 # 8-byte Folded Reload + fld.d $fa2, $sp, 216 # 8-byte Folded Reload fadd.d $fa2, $fa2, $fa1 - fst.d $fa2, $sp, 200 # 8-byte Folded Spill + fst.d $fa2, $sp, 216 # 8-byte Folded Spill fsub.d $fa0, $fa0, $fs3 - fld.d $fa1, $sp, 176 # 8-byte Folded Reload + fld.d $fa1, $sp, 192 # 8-byte Folded Reload fcmp.cule.d $fcc0, $fa1, $fs6 - fld.d $fa1, $sp, 208 # 8-byte Folded Reload + fld.d $fa1, $sp, 224 # 8-byte Folded Reload fadd.d $fa1, $fa1, $fa0 - fst.d $fa1, $sp, 208 # 8-byte Folded Spill + fst.d $fa1, $sp, 224 # 8-byte Folded Spill bcnez $fcc0, .LBB15_135 # %bb.124: # in Loop: Header=BB15_67 Depth=1 fcmp.clt.d $fcc0, $fs6, $fs5 @@ -13244,7 +13244,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 # %bb.126: # %.lr.ph.i.i.preheader # in Loop: Header=BB15_67 Depth=1 move $a0, $s1 - ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 152 # 8-byte Folded Reload fmov.d $fa0, $fs5 .p2align 4, , 16 .LBB15_127: # %.lr.ph.i.i @@ -13264,7 +13264,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 # %bb.129: # %_ZN6miniFE3dotINS_6VectorIdiiEEEENS_10TypeTraitsINT_10ScalarTypeEE14magnitude_typeERKS4_S9_.exit.i.split # in Loop: Header=BB15_67 Depth=1 fmov.d $fa0, $fs5 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload blez $a0, .LBB15_132 .LBB15_130: # %.lr.ph.i10.i.preheader # in Loop: Header=BB15_67 Depth=1 @@ -13288,21 +13288,18 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 bceqz $fcc0, .LBB15_158 .LBB15_133: # %.loopexit.split # in Loop: Header=BB15_67 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI15_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI15_1) - ld.d $a0, $sp, 104 # 8-byte Folded Reload - fld.d $fa2, $a0, %pc_lo12(.LCPI15_0) - fabs.d $fa3, $fs6 - fmul.d $fa0, $fs0, $fa0 - fmul.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa2 - fcmp.cult.d $fcc0, $fa0, $fa3 + fabs.d $fa0, $fs6 + fld.d $fa2, $sp, 104 # 8-byte Folded Reload + fmul.d $fa2, $fs0, $fa2 + fmul.d $fa1, $fa2, $fa1 + fld.d $fa2, $sp, 120 # 8-byte Folded Reload + fmul.d $fa1, $fa1, $fa2 + fcmp.cult.d $fcc0, $fa1, $fa0 bceqz $fcc0, .LBB15_166 # %bb.134: # in Loop: Header=BB15_67 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI15_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI15_2) + fld.d $fa0, $sp, 96 # 8-byte Folded Reload fmul.d $fa0, $fs6, $fa0 - fst.d $fa0, $sp, 176 # 8-byte Folded Spill + fst.d $fa0, $sp, 192 # 8-byte Folded Spill .LBB15_135: # in Loop: Header=BB15_67 Depth=1 .Ltmp959: # EH_LABEL pcaddu18i $ra, %call36(_ZN6miniFE7mytimerEv) @@ -13310,7 +13307,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 .Ltmp960: # EH_LABEL # %bb.136: # in Loop: Header=BB15_67 Depth=1 fmov.d $fs2, $fa0 - ld.d $a0, $sp, 168 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload ld.d $a1, $a0, 16 ld.d $a0, $a0, 8 sub.d $a1, $a1, $a0 @@ -13365,7 +13362,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 .LBB15_146: # %vector.ph473 # in Loop: Header=BB15_67 Depth=1 vreplvei.d $vr1, $vr0, 0 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a1, $sp, 72 # 8-byte Folded Reload move $a2, $s6 .p2align 4, , 16 @@ -13392,10 +13389,10 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 beq $s5, $s6, .LBB15_151 .LBB15_149: # %.lr.ph.i260.preheader536 # in Loop: Header=BB15_67 Depth=1 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload add.d $a0, $a0, $a2 alsl.d $a1, $a2, $s1, 3 - ld.d $a3, $sp, 184 # 8-byte Folded Reload + ld.d $a3, $sp, 200 # 8-byte Folded Reload alsl.d $a2, $a2, $a3, 3 .p2align 4, , 16 .LBB15_150: # %.lr.ph.i260 @@ -13420,10 +13417,10 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 # in Loop: Header=BB15_67 Depth=1 fsub.d $fa0, $fa0, $fs2 fadd.d $fs7, $fs7, $fa0 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload st.w $s8, $a0, 0 addi.w $a0, $s8, 1 - ld.d $a1, $sp, 192 # 8-byte Folded Reload + ld.d $a1, $sp, 208 # 8-byte Folded Reload bne $s8, $a1, .LBB15_67 b .LBB15_159 .LBB15_153: # %vector.ph490 @@ -13462,7 +13459,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 fst.d $fa0, $s2, 0 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload bne $s8, $a0, .LBB15_98 b .LBB15_99 .LBB15_157: # %call.sqrt708 @@ -13471,7 +13468,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 jirl $ra, $ra, 0 fmov.d $fs0, $fa0 fmov.d $fa0, $fs5 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload bgtz $a0, .LBB15_130 b .LBB15_132 .LBB15_158: # %call.sqrt709 @@ -13483,9 +13480,9 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 .LBB15_159: # %.critedge ld.d $s3, $sp, 56 # 8-byte Folded Reload fst.d $fs7, $s3, 0 - fld.d $fa0, $sp, 208 # 8-byte Folded Reload + fld.d $fa0, $sp, 224 # 8-byte Folded Reload fst.d $fa0, $s3, 8 - fld.d $fa0, $sp, 200 # 8-byte Folded Reload + fld.d $fa0, $sp, 216 # 8-byte Folded Reload fst.d $fa0, $s3, 16 st.d $zero, $s3, 24 .Ltmp982: # EH_LABEL @@ -13493,7 +13490,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 jirl $ra, $ra, 0 .Ltmp983: # EH_LABEL # %bb.160: - ld.d $fp, $sp, 184 # 8-byte Folded Reload + ld.d $fp, $sp, 200 # 8-byte Folded Reload b .LBB15_175 .LBB15_161: # %vector.memcheck451 sub.d $a5, $s8, $a0 @@ -13537,7 +13534,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 addi.d $a1, $a0, %pc_lo12(.L.str.109) ori $a2, $zero, 44 move $a0, $s2 - ld.d $fp, $sp, 184 # 8-byte Folded Reload + ld.d $fp, $sp, 200 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l) jirl $ra, $ra, 0 .Ltmp965: # EH_LABEL @@ -13583,9 +13580,9 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 .Ltmp973: # EH_LABEL # %bb.174: # %_ZNSolsEPFRSoS_E.exit248 fst.d $fs7, $s3, 0 - fld.d $fa0, $sp, 208 # 8-byte Folded Reload + fld.d $fa0, $sp, 224 # 8-byte Folded Reload fst.d $fa0, $s3, 8 - fld.d $fa0, $sp, 200 # 8-byte Folded Reload + fld.d $fa0, $sp, 216 # 8-byte Folded Reload fst.d $fa0, $s3, 16 .Ltmp974: # EH_LABEL pcaddu18i $ra, %call36(_ZN6miniFE7mytimerEv) @@ -13616,49 +13613,49 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 ld.d $a0, $sp, 24 # 8-byte Folded Reload sub.d $a1, $a0, $fp move $a0, $fp - fld.d $fs7, $sp, 216 # 8-byte Folded Reload - fld.d $fs6, $sp, 224 # 8-byte Folded Reload - fld.d $fs5, $sp, 232 # 8-byte Folded Reload - fld.d $fs4, $sp, 240 # 8-byte Folded Reload - fld.d $fs3, $sp, 248 # 8-byte Folded Reload - fld.d $fs2, $sp, 256 # 8-byte Folded Reload - fld.d $fs1, $sp, 264 # 8-byte Folded Reload - fld.d $fs0, $sp, 272 # 8-byte Folded Reload - ld.d $s8, $sp, 280 # 8-byte Folded Reload - ld.d $s7, $sp, 288 # 8-byte Folded Reload - ld.d $s6, $sp, 296 # 8-byte Folded Reload - ld.d $s5, $sp, 304 # 8-byte Folded Reload - ld.d $s4, $sp, 312 # 8-byte Folded Reload - ld.d $s3, $sp, 320 # 8-byte Folded Reload - ld.d $s2, $sp, 328 # 8-byte Folded Reload - ld.d $s1, $sp, 336 # 8-byte Folded Reload - ld.d $s0, $sp, 344 # 8-byte Folded Reload - ld.d $fp, $sp, 352 # 8-byte Folded Reload - ld.d $ra, $sp, 360 # 8-byte Folded Reload - addi.d $sp, $sp, 368 + fld.d $fs7, $sp, 232 # 8-byte Folded Reload + fld.d $fs6, $sp, 240 # 8-byte Folded Reload + fld.d $fs5, $sp, 248 # 8-byte Folded Reload + fld.d $fs4, $sp, 256 # 8-byte Folded Reload + fld.d $fs3, $sp, 264 # 8-byte Folded Reload + fld.d $fs2, $sp, 272 # 8-byte Folded Reload + fld.d $fs1, $sp, 280 # 8-byte Folded Reload + fld.d $fs0, $sp, 288 # 8-byte Folded Reload + ld.d $s8, $sp, 296 # 8-byte Folded Reload + ld.d $s7, $sp, 304 # 8-byte Folded Reload + ld.d $s6, $sp, 312 # 8-byte Folded Reload + ld.d $s5, $sp, 320 # 8-byte Folded Reload + ld.d $s4, $sp, 328 # 8-byte Folded Reload + ld.d $s3, $sp, 336 # 8-byte Folded Reload + ld.d $s2, $sp, 344 # 8-byte Folded Reload + ld.d $s1, $sp, 352 # 8-byte Folded Reload + ld.d $s0, $sp, 360 # 8-byte Folded Reload + ld.d $fp, $sp, 368 # 8-byte Folded Reload + ld.d $ra, $sp, 376 # 8-byte Folded Reload + addi.d $sp, $sp, 384 pcaddu18i $t8, %call36(_ZdlPvm) jr $t8 .LBB15_181: # %_ZN6miniFE6VectorIdiiED2Ev.exit268 - fld.d $fs7, $sp, 216 # 8-byte Folded Reload - fld.d $fs6, $sp, 224 # 8-byte Folded Reload - fld.d $fs5, $sp, 232 # 8-byte Folded Reload - fld.d $fs4, $sp, 240 # 8-byte Folded Reload - fld.d $fs3, $sp, 248 # 8-byte Folded Reload - fld.d $fs2, $sp, 256 # 8-byte Folded Reload - fld.d $fs1, $sp, 264 # 8-byte Folded Reload - fld.d $fs0, $sp, 272 # 8-byte Folded Reload - ld.d $s8, $sp, 280 # 8-byte Folded Reload - ld.d $s7, $sp, 288 # 8-byte Folded Reload - ld.d $s6, $sp, 296 # 8-byte Folded Reload - ld.d $s5, $sp, 304 # 8-byte Folded Reload - ld.d $s4, $sp, 312 # 8-byte Folded Reload - ld.d $s3, $sp, 320 # 8-byte Folded Reload - ld.d $s2, $sp, 328 # 8-byte Folded Reload - ld.d $s1, $sp, 336 # 8-byte Folded Reload - ld.d $s0, $sp, 344 # 8-byte Folded Reload - ld.d $fp, $sp, 352 # 8-byte Folded Reload - ld.d $ra, $sp, 360 # 8-byte Folded Reload - addi.d $sp, $sp, 368 + fld.d $fs7, $sp, 232 # 8-byte Folded Reload + fld.d $fs6, $sp, 240 # 8-byte Folded Reload + fld.d $fs5, $sp, 248 # 8-byte Folded Reload + fld.d $fs4, $sp, 256 # 8-byte Folded Reload + fld.d $fs3, $sp, 264 # 8-byte Folded Reload + fld.d $fs2, $sp, 272 # 8-byte Folded Reload + fld.d $fs1, $sp, 280 # 8-byte Folded Reload + fld.d $fs0, $sp, 288 # 8-byte Folded Reload + ld.d $s8, $sp, 296 # 8-byte Folded Reload + ld.d $s7, $sp, 304 # 8-byte Folded Reload + ld.d $s6, $sp, 312 # 8-byte Folded Reload + ld.d $s5, $sp, 320 # 8-byte Folded Reload + ld.d $s4, $sp, 328 # 8-byte Folded Reload + ld.d $s3, $sp, 336 # 8-byte Folded Reload + ld.d $s2, $sp, 344 # 8-byte Folded Reload + ld.d $s1, $sp, 352 # 8-byte Folded Reload + ld.d $s0, $sp, 360 # 8-byte Folded Reload + ld.d $fp, $sp, 368 # 8-byte Folded Reload + ld.d $ra, $sp, 376 # 8-byte Folded Reload + addi.d $sp, $sp, 384 ret .LBB15_182: .Ltmp979: # EH_LABEL @@ -13705,7 +13702,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 .LBB15_194: .Ltmp990: # EH_LABEL move $s2, $a0 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload beqz $a0, .LBB15_203 b .LBB15_206 .LBB15_195: # %.loopexit.split-lp @@ -13728,7 +13725,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 # %bb.201: # %_ZN6miniFE6VectorIdiiED2Ev.exit270 bnez $s0, .LBB15_205 .LBB15_202: # %_ZN6miniFE6VectorIdiiED2Ev.exit272 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload bnez $a0, .LBB15_206 .LBB15_203: # %_ZN6miniFE6VectorIdiiED2Ev.exit274 move $a0, $s2 @@ -13747,10 +13744,10 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_14matvec_overlapIS2_S4 move $a0, $s0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload beqz $a0, .LBB15_203 .LBB15_206: - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload ld.d $a1, $sp, 24 # 8-byte Folded Reload sub.d $a1, $a1, $a0 pcaddu18i $ra, %call36(_ZdlPvm) @@ -13865,16 +13862,8 @@ GCC_except_table15: .Lcst_end10: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd -.LCPI16_0: - .dword 0x3cb0000000000000 # double 2.2204460492503131E-16 -.LCPI16_1: - .dword 0x4059000000000000 # double 100 -.LCPI16_2: - .dword 0x3fb999999999999a # double 0.10000000000000001 .section .text._ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd,"axG",@progbits,_ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd,comdat - .weak _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd + .weak _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd # -- Begin function _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd .p2align 5 .type _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd,@function _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd: # @_ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEEEvRT_RKT0_RS9_T1_NS7_16LocalOrdinalTypeERNS_10TypeTraitsINS7_10ScalarTypeEE14magnitude_typeERSE_SJ_Pd @@ -13883,27 +13872,27 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception11 # %bb.0: - addi.d $sp, $sp, -368 - .cfi_def_cfa_offset 368 - st.d $ra, $sp, 360 # 8-byte Folded Spill - st.d $fp, $sp, 352 # 8-byte Folded Spill - st.d $s0, $sp, 344 # 8-byte Folded Spill - st.d $s1, $sp, 336 # 8-byte Folded Spill - st.d $s2, $sp, 328 # 8-byte Folded Spill - st.d $s3, $sp, 320 # 8-byte Folded Spill - st.d $s4, $sp, 312 # 8-byte Folded Spill - st.d $s5, $sp, 304 # 8-byte Folded Spill - st.d $s6, $sp, 296 # 8-byte Folded Spill - st.d $s7, $sp, 288 # 8-byte Folded Spill - st.d $s8, $sp, 280 # 8-byte Folded Spill - fst.d $fs0, $sp, 272 # 8-byte Folded Spill - fst.d $fs1, $sp, 264 # 8-byte Folded Spill - fst.d $fs2, $sp, 256 # 8-byte Folded Spill - fst.d $fs3, $sp, 248 # 8-byte Folded Spill - fst.d $fs4, $sp, 240 # 8-byte Folded Spill - fst.d $fs5, $sp, 232 # 8-byte Folded Spill - fst.d $fs6, $sp, 224 # 8-byte Folded Spill - fst.d $fs7, $sp, 216 # 8-byte Folded Spill + addi.d $sp, $sp, -384 + .cfi_def_cfa_offset 384 + st.d $ra, $sp, 376 # 8-byte Folded Spill + st.d $fp, $sp, 368 # 8-byte Folded Spill + st.d $s0, $sp, 360 # 8-byte Folded Spill + st.d $s1, $sp, 352 # 8-byte Folded Spill + st.d $s2, $sp, 344 # 8-byte Folded Spill + st.d $s3, $sp, 336 # 8-byte Folded Spill + st.d $s4, $sp, 328 # 8-byte Folded Spill + st.d $s5, $sp, 320 # 8-byte Folded Spill + st.d $s6, $sp, 312 # 8-byte Folded Spill + st.d $s7, $sp, 304 # 8-byte Folded Spill + st.d $s8, $sp, 296 # 8-byte Folded Spill + fst.d $fs0, $sp, 288 # 8-byte Folded Spill + fst.d $fs1, $sp, 280 # 8-byte Folded Spill + fst.d $fs2, $sp, 272 # 8-byte Folded Spill + fst.d $fs3, $sp, 264 # 8-byte Folded Spill + fst.d $fs4, $sp, 256 # 8-byte Folded Spill + fst.d $fs5, $sp, 248 # 8-byte Folded Spill + fst.d $fs6, $sp, 240 # 8-byte Folded Spill + fst.d $fs7, $sp, 232 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -13923,12 +13912,12 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE .cfi_offset 61, -136 .cfi_offset 62, -144 .cfi_offset 63, -152 - ld.d $fp, $sp, 368 + ld.d $fp, $sp, 384 move $s1, $a7 - st.d $a6, $sp, 152 # 8-byte Folded Spill - st.d $a5, $sp, 160 # 8-byte Folded Spill + st.d $a6, $sp, 168 # 8-byte Folded Spill + st.d $a5, $sp, 176 # 8-byte Folded Spill move $s0, $a4 - st.d $a2, $sp, 168 # 8-byte Folded Spill + st.d $a2, $sp, 184 # 8-byte Folded Spill move $s2, $a1 move $s7, $a0 pcaddu18i $ra, %call36(_ZN6miniFE7mytimerEv) @@ -13950,7 +13939,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE srai.d $s5, $a0, 32 slli.d $s4, $s5, 3 addi.d $fp, $s5, -1 - st.d $s2, $sp, 208 # 8-byte Folded Spill + st.d $s2, $sp, 224 # 8-byte Folded Spill beqz $s6, .LBB16_18 # %bb.3: # %.noexc7.i move $a0, $s4 @@ -13959,7 +13948,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE alsl.d $a1, $s5, $a0, 3 st.d $a1, $sp, 24 # 8-byte Folded Spill st.d $zero, $a0, 0 - st.d $a0, $sp, 184 # 8-byte Folded Spill + st.d $a0, $sp, 200 # 8-byte Folded Spill addi.d $s3, $a0, 8 beqz $fp, .LBB16_5 # %bb.4: # %_ZSt6fill_nIPdmdET_S1_T0_RKT1_.exit.loopexit.i.i.i.i.i.i @@ -13970,7 +13959,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE jirl $ra, $ra, 0 alsl.d $s3, $fp, $s3, 3 .LBB16_5: # %.lr.ph.i - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload move $a1, $zero move $a2, $s4 pcaddu18i $ra, %call36(memset) @@ -13978,10 +13967,10 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE bltz $s8, .LBB16_19 .LBB16_6: # %_ZNSt6vectorIdSaIdEE17_S_check_init_lenEmRKS0_.exit.i.i138 move $s2, $s1 - st.d $s0, $sp, 192 # 8-byte Folded Spill + st.d $s0, $sp, 208 # 8-byte Folded Spill beqz $s8, .LBB16_23 # %bb.7: # %.noexc7.i140 - st.d $s3, $sp, 200 # 8-byte Folded Spill + st.d $s3, $sp, 216 # 8-byte Folded Spill slli.d $s1, $s8, 3 .Ltmp991: # EH_LABEL move $a0, $s1 @@ -14004,14 +13993,14 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE jirl $ra, $ra, 0 alsl.d $s3, $s8, $s3, 3 .LBB16_10: # %.lr.ph.i143 - st.d $s3, $sp, 128 # 8-byte Folded Spill - ld.d $s8, $sp, 184 # 8-byte Folded Reload + st.d $s3, $sp, 144 # 8-byte Folded Spill + ld.d $s8, $sp, 200 # 8-byte Folded Reload move $a0, $s0 move $a1, $zero move $a2, $s1 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.d $s3, $sp, 200 # 8-byte Folded Reload + ld.d $s3, $sp, 216 # 8-byte Folded Reload beqz $s6, .LBB16_24 .LBB16_11: # %.noexc7.i150 .Ltmp993: # EH_LABEL @@ -14069,7 +14058,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE .LBB16_18: st.d $zero, $sp, 24 # 8-byte Folded Spill move $s3, $zero - st.d $zero, $sp, 184 # 8-byte Folded Spill + st.d $zero, $sp, 200 # 8-byte Folded Spill bgez $s8, .LBB16_6 .LBB16_19: # %.noexc.i145 .Ltmp1091: # EH_LABEL @@ -14094,33 +14083,33 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE ld.d $a0, $a0, %got_pc_lo12(_ZSt4cerr) pcaddu18i $ra, %call36(_ZNSo3putEc) jirl $ra, $ra, 0 - fld.d $fs7, $sp, 216 # 8-byte Folded Reload - fld.d $fs6, $sp, 224 # 8-byte Folded Reload - fld.d $fs5, $sp, 232 # 8-byte Folded Reload - fld.d $fs4, $sp, 240 # 8-byte Folded Reload - fld.d $fs3, $sp, 248 # 8-byte Folded Reload - fld.d $fs2, $sp, 256 # 8-byte Folded Reload - fld.d $fs1, $sp, 264 # 8-byte Folded Reload - fld.d $fs0, $sp, 272 # 8-byte Folded Reload - ld.d $s8, $sp, 280 # 8-byte Folded Reload - ld.d $s7, $sp, 288 # 8-byte Folded Reload - ld.d $s6, $sp, 296 # 8-byte Folded Reload - ld.d $s5, $sp, 304 # 8-byte Folded Reload - ld.d $s4, $sp, 312 # 8-byte Folded Reload - ld.d $s3, $sp, 320 # 8-byte Folded Reload - ld.d $s2, $sp, 328 # 8-byte Folded Reload - ld.d $s1, $sp, 336 # 8-byte Folded Reload - ld.d $s0, $sp, 344 # 8-byte Folded Reload - ld.d $fp, $sp, 352 # 8-byte Folded Reload - ld.d $ra, $sp, 360 # 8-byte Folded Reload - addi.d $sp, $sp, 368 + fld.d $fs7, $sp, 232 # 8-byte Folded Reload + fld.d $fs6, $sp, 240 # 8-byte Folded Reload + fld.d $fs5, $sp, 248 # 8-byte Folded Reload + fld.d $fs4, $sp, 256 # 8-byte Folded Reload + fld.d $fs3, $sp, 264 # 8-byte Folded Reload + fld.d $fs2, $sp, 272 # 8-byte Folded Reload + fld.d $fs1, $sp, 280 # 8-byte Folded Reload + fld.d $fs0, $sp, 288 # 8-byte Folded Reload + ld.d $s8, $sp, 296 # 8-byte Folded Reload + ld.d $s7, $sp, 304 # 8-byte Folded Reload + ld.d $s6, $sp, 312 # 8-byte Folded Reload + ld.d $s5, $sp, 320 # 8-byte Folded Reload + ld.d $s4, $sp, 328 # 8-byte Folded Reload + ld.d $s3, $sp, 336 # 8-byte Folded Reload + ld.d $s2, $sp, 344 # 8-byte Folded Reload + ld.d $s1, $sp, 352 # 8-byte Folded Reload + ld.d $s0, $sp, 360 # 8-byte Folded Reload + ld.d $fp, $sp, 368 # 8-byte Folded Reload + ld.d $ra, $sp, 376 # 8-byte Folded Reload + addi.d $sp, $sp, 384 pcaddu18i $t8, %call36(_ZNSo5flushEv) jr $t8 .LBB16_23: - st.d $zero, $sp, 128 # 8-byte Folded Spill + st.d $zero, $sp, 144 # 8-byte Folded Spill st.d $zero, $sp, 16 # 8-byte Folded Spill move $s0, $zero - ld.d $s8, $sp, 184 # 8-byte Folded Reload + ld.d $s8, $sp, 200 # 8-byte Folded Reload bnez $s6, .LBB16_11 .LBB16_24: move $s1, $zero @@ -14131,10 +14120,10 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE .Ltmp996: # EH_LABEL pcaddu18i $ra, %call36(_ZN6miniFE7mytimerEv) jirl $ra, $ra, 0 - fst.d $fa0, $sp, 200 # 8-byte Folded Spill + fst.d $fa0, $sp, 216 # 8-byte Folded Spill .Ltmp997: # EH_LABEL # %bb.26: - ld.d $a0, $sp, 168 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload ld.d $a1, $a0, 16 ld.d $a0, $a0, 8 sub.d $a1, $a1, $a0 @@ -14194,7 +14183,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE .Ltmp998: # EH_LABEL pcaddu18i $ra, %call36(_ZN6miniFE7mytimerEv) jirl $ra, $ra, 0 - fst.d $fa0, $sp, 176 # 8-byte Folded Spill + fst.d $fa0, $sp, 192 # 8-byte Folded Spill .Ltmp999: # EH_LABEL # %bb.35: .Ltmp1000: # EH_LABEL @@ -14269,7 +14258,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE .Ltmp1005: # EH_LABEL # %bb.44: fmov.d $fs6, $fa0 - ld.d $a0, $sp, 208 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.d $a1, $a0, 16 ld.d $a0, $a0, 8 sub.d $a1, $a1, $a0 @@ -14400,8 +14389,8 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE jirl $ra, $ra, 0 .Ltmp1023: # EH_LABEL # %bb.65: # %_ZNSolsEPFRSoS_E.exit.preheader - fld.d $fa0, $sp, 200 # 8-byte Folded Reload - fld.d $fa1, $sp, 176 # 8-byte Folded Reload + fld.d $fa0, $sp, 216 # 8-byte Folded Reload + fld.d $fa1, $sp, 192 # 8-byte Folded Reload fsub.d $fa0, $fa1, $fa0 fsub.d $fa1, $fs5, $fs4 fadd.d $fa0, $fa0, $fs3 @@ -14409,11 +14398,11 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE fsub.d $fa3, $fs2, $fs0 movgr2fr.d $fs5, $zero fadd.d $fa1, $fa1, $fs3 - fst.d $fa1, $sp, 200 # 8-byte Folded Spill + fst.d $fa1, $sp, 216 # 8-byte Folded Spill fadd.d $fs7, $fa0, $fa2 fadd.d $fa0, $fa3, $fs5 - fst.d $fa0, $sp, 208 # 8-byte Folded Spill - ld.d $a1, $sp, 192 # 8-byte Folded Reload + fst.d $fa0, $sp, 224 # 8-byte Folded Spill + ld.d $a1, $sp, 208 # 8-byte Folded Reload blez $a1, .LBB16_159 # %bb.66: # %.lr.ph lu12i.w $a0, 419430 @@ -14432,15 +14421,15 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE maskeqz $a0, $a0, $a1 masknez $a1, $a2, $a1 or $a0, $a0, $a1 - st.d $a0, $sp, 120 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill move $a0, $s5 bstrpick.d $s5, $s6, 30, 0 sub.d $a0, $a0, $s1 srli.d $a1, $a0, 3 - st.d $a1, $sp, 144 # 8-byte Folded Spill + st.d $a1, $sp, 160 # 8-byte Folded Spill bstrpick.d $a0, $a0, 33, 3 - st.d $a0, $sp, 136 # 8-byte Folded Spill - ld.d $a0, $sp, 128 # 8-byte Folded Reload + st.d $a0, $sp, 152 # 8-byte Folded Spill + ld.d $a0, $sp, 144 # 8-byte Folded Reload sub.d $a0, $a0, $s0 srli.d $a1, $a0, 3 bstrpick.d $a0, $a0, 33, 3 @@ -14450,20 +14439,31 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE addi.d $a0, $s0, 16 st.d $a0, $sp, 88 # 8-byte Folded Spill addi.d $a0, $s8, 16 - st.d $a0, $sp, 112 # 8-byte Folded Spill - sub.d $a0, $zero, $s5 st.d $a0, $sp, 128 # 8-byte Folded Spill + sub.d $a0, $zero, $s5 + st.d $a0, $sp, 144 # 8-byte Folded Spill addi.d $a0, $s1, 16 st.d $a0, $sp, 72 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI16_0) - st.d $a0, $sp, 104 # 8-byte Folded Spill - fld.d $fa0, $a0, %pc_lo12(.LCPI16_0) - fst.d $fa0, $sp, 176 # 8-byte Folded Spill - vrepli.b $vr0, 0 - vst $vr0, $sp, 32 # 16-byte Folded Spill + lu52i.d $a0, $zero, 971 + movgr2fr.d $fa0, $a0 + vrepli.b $vr1, 0 + vst $vr1, $sp, 32 # 16-byte Folded Spill addi.w $a0, $a1, 0 - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 112 # 8-byte Folded Spill + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa1, $a0 + fst.d $fa1, $sp, 104 # 8-byte Folded Spill + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fa1, $a0 + fst.d $fa1, $sp, 96 # 8-byte Folded Spill ori $a0, $zero, 1 + fst.d $fa0, $sp, 120 # 8-byte Folded Spill + fst.d $fa0, $sp, 192 # 8-byte Folded Spill .p2align 4, , 16 .LBB16_67: # =>This Loop Header: Depth=1 # Child Loop BB16_76 Depth 2 @@ -14481,7 +14481,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE # Child Loop BB16_147 Depth 2 # Child Loop BB16_150 Depth 2 fld.d $fa0, $s2, 0 - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 176 # 8-byte Folded Reload fld.d $fa1, $a1, 0 fcmp.cule.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB16_159 @@ -14516,7 +14516,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE blez $fp, .LBB16_77 # %bb.75: # %.lr.ph.i196.preheader # in Loop: Header=BB16_67 Depth=1 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload move $a1, $s5 .p2align 4, , 16 .LBB16_76: # %.lr.ph.i196 @@ -14553,7 +14553,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE b .LBB16_92 .LBB16_82: # %vector.body510.preheader # in Loop: Header=BB16_67 Depth=1 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a1, $sp, 88 # 8-byte Folded Reload move $a2, $s6 vld $vr4, $sp, 32 # 16-byte Folded Reload @@ -14579,10 +14579,10 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE beq $s5, $s6, .LBB16_87 .LBB16_85: # %.lr.ph.i187.preheader537 # in Loop: Header=BB16_67 Depth=1 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload add.d $a0, $a0, $a2 alsl.d $a1, $a2, $s0, 3 - ld.d $a3, $sp, 184 # 8-byte Folded Reload + ld.d $a3, $sp, 200 # 8-byte Folded Reload alsl.d $a2, $a2, $a3, 3 .p2align 4, , 16 .LBB16_86: # %.lr.ph.i187 @@ -14608,7 +14608,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE .LBB16_89: # %vector.ph520 # in Loop: Header=BB16_67 Depth=1 vreplvei.d $vr1, $vr0, 0 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a1, $sp, 88 # 8-byte Folded Reload move $a2, $s6 .p2align 4, , 16 @@ -14635,10 +14635,10 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE beq $s5, $s6, .LBB16_94 .LBB16_92: # %.lr.ph.i205.preheader538 # in Loop: Header=BB16_67 Depth=1 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload add.d $a0, $a0, $a2 alsl.d $a1, $a2, $s0, 3 - ld.d $a3, $sp, 184 # 8-byte Folded Reload + ld.d $a3, $sp, 200 # 8-byte Folded Reload alsl.d $a2, $a2, $a3, 3 .p2align 4, , 16 .LBB16_93: # %.lr.ph.i205 @@ -14661,9 +14661,9 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE .Ltmp1031: # EH_LABEL # %bb.95: # in Loop: Header=BB16_67 Depth=1 fsub.d $fa1, $fs2, $fs0 - fld.d $fa2, $sp, 208 # 8-byte Folded Reload + fld.d $fa2, $sp, 224 # 8-byte Folded Reload fadd.d $fa2, $fa2, $fa1 - fst.d $fa2, $sp, 208 # 8-byte Folded Spill + fst.d $fa2, $sp, 224 # 8-byte Folded Spill fsub.d $fs4, $fa0, $fs3 fmov.d $fs1, $fs6 .LBB16_96: # in Loop: Header=BB16_67 Depth=1 @@ -14673,11 +14673,11 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE # %bb.97: # %.split706 # in Loop: Header=BB16_67 Depth=1 fst.d $fa0, $s2, 0 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload beq $s8, $a0, .LBB16_99 .LBB16_98: # %.split706 # in Loop: Header=BB16_67 Depth=1 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload mod.wu $a0, $s8, $a0 bnez $a0, .LBB16_110 .LBB16_99: # in Loop: Header=BB16_67 Depth=1 @@ -14839,7 +14839,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE .Ltmp1059: # EH_LABEL # %bb.119: # in Loop: Header=BB16_67 Depth=1 fmov.d $fs3, $fa0 - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload addi.w $s3, $a0, 0 fmov.d $fs6, $fs5 blez $s3, .LBB16_122 @@ -14847,7 +14847,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE # in Loop: Header=BB16_67 Depth=1 move $a0, $s1 move $a1, $s0 - ld.d $a2, $sp, 136 # 8-byte Folded Reload + ld.d $a2, $sp, 152 # 8-byte Folded Reload fmov.d $fs6, $fs5 .p2align 4, , 16 .LBB16_121: # %.lr.ph.i239 @@ -14870,15 +14870,15 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE # %bb.123: # in Loop: Header=BB16_67 Depth=1 fadd.d $fs7, $fs7, $fs4 fsub.d $fa1, $fs2, $fs0 - fld.d $fa2, $sp, 200 # 8-byte Folded Reload + fld.d $fa2, $sp, 216 # 8-byte Folded Reload fadd.d $fa2, $fa2, $fa1 - fst.d $fa2, $sp, 200 # 8-byte Folded Spill + fst.d $fa2, $sp, 216 # 8-byte Folded Spill fsub.d $fa0, $fa0, $fs3 - fld.d $fa1, $sp, 176 # 8-byte Folded Reload + fld.d $fa1, $sp, 192 # 8-byte Folded Reload fcmp.cule.d $fcc0, $fa1, $fs6 - fld.d $fa1, $sp, 208 # 8-byte Folded Reload + fld.d $fa1, $sp, 224 # 8-byte Folded Reload fadd.d $fa1, $fa1, $fa0 - fst.d $fa1, $sp, 208 # 8-byte Folded Spill + fst.d $fa1, $sp, 224 # 8-byte Folded Spill bcnez $fcc0, .LBB16_135 # %bb.124: # in Loop: Header=BB16_67 Depth=1 fcmp.clt.d $fcc0, $fs6, $fs5 @@ -14889,7 +14889,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE # %bb.126: # %.lr.ph.i.i.preheader # in Loop: Header=BB16_67 Depth=1 move $a0, $s1 - ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 152 # 8-byte Folded Reload fmov.d $fa0, $fs5 .p2align 4, , 16 .LBB16_127: # %.lr.ph.i.i @@ -14909,7 +14909,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE # %bb.129: # %_ZN6miniFE3dotINS_6VectorIdiiEEEENS_10TypeTraitsINT_10ScalarTypeEE14magnitude_typeERKS4_S9_.exit.i.split # in Loop: Header=BB16_67 Depth=1 fmov.d $fa0, $fs5 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload blez $a0, .LBB16_132 .LBB16_130: # %.lr.ph.i10.i.preheader # in Loop: Header=BB16_67 Depth=1 @@ -14933,21 +14933,18 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE bceqz $fcc0, .LBB16_158 .LBB16_133: # %.loopexit.split # in Loop: Header=BB16_67 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI16_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI16_1) - ld.d $a0, $sp, 104 # 8-byte Folded Reload - fld.d $fa2, $a0, %pc_lo12(.LCPI16_0) - fabs.d $fa3, $fs6 - fmul.d $fa0, $fs0, $fa0 - fmul.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa2 - fcmp.cult.d $fcc0, $fa0, $fa3 + fabs.d $fa0, $fs6 + fld.d $fa2, $sp, 104 # 8-byte Folded Reload + fmul.d $fa2, $fs0, $fa2 + fmul.d $fa1, $fa2, $fa1 + fld.d $fa2, $sp, 120 # 8-byte Folded Reload + fmul.d $fa1, $fa1, $fa2 + fcmp.cult.d $fcc0, $fa1, $fa0 bceqz $fcc0, .LBB16_166 # %bb.134: # in Loop: Header=BB16_67 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI16_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI16_2) + fld.d $fa0, $sp, 96 # 8-byte Folded Reload fmul.d $fa0, $fs6, $fa0 - fst.d $fa0, $sp, 176 # 8-byte Folded Spill + fst.d $fa0, $sp, 192 # 8-byte Folded Spill .LBB16_135: # in Loop: Header=BB16_67 Depth=1 .Ltmp1062: # EH_LABEL pcaddu18i $ra, %call36(_ZN6miniFE7mytimerEv) @@ -14955,7 +14952,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE .Ltmp1063: # EH_LABEL # %bb.136: # in Loop: Header=BB16_67 Depth=1 fmov.d $fs2, $fa0 - ld.d $a0, $sp, 168 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload ld.d $a1, $a0, 16 ld.d $a0, $a0, 8 sub.d $a1, $a1, $a0 @@ -15010,7 +15007,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE .LBB16_146: # %vector.ph473 # in Loop: Header=BB16_67 Depth=1 vreplvei.d $vr1, $vr0, 0 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a1, $sp, 72 # 8-byte Folded Reload move $a2, $s6 .p2align 4, , 16 @@ -15037,10 +15034,10 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE beq $s5, $s6, .LBB16_151 .LBB16_149: # %.lr.ph.i260.preheader536 # in Loop: Header=BB16_67 Depth=1 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload add.d $a0, $a0, $a2 alsl.d $a1, $a2, $s1, 3 - ld.d $a3, $sp, 184 # 8-byte Folded Reload + ld.d $a3, $sp, 200 # 8-byte Folded Reload alsl.d $a2, $a2, $a3, 3 .p2align 4, , 16 .LBB16_150: # %.lr.ph.i260 @@ -15065,10 +15062,10 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE # in Loop: Header=BB16_67 Depth=1 fsub.d $fa0, $fa0, $fs2 fadd.d $fs7, $fs7, $fa0 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload st.w $s8, $a0, 0 addi.w $a0, $s8, 1 - ld.d $a1, $sp, 192 # 8-byte Folded Reload + ld.d $a1, $sp, 208 # 8-byte Folded Reload bne $s8, $a1, .LBB16_67 b .LBB16_159 .LBB16_153: # %vector.ph490 @@ -15107,7 +15104,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 fst.d $fa0, $s2, 0 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload bne $s8, $a0, .LBB16_98 b .LBB16_99 .LBB16_157: # %call.sqrt708 @@ -15116,7 +15113,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE jirl $ra, $ra, 0 fmov.d $fs0, $fa0 fmov.d $fa0, $fs5 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload bgtz $a0, .LBB16_130 b .LBB16_132 .LBB16_158: # %call.sqrt709 @@ -15128,9 +15125,9 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE .LBB16_159: # %.critedge ld.d $s3, $sp, 56 # 8-byte Folded Reload fst.d $fs7, $s3, 0 - fld.d $fa0, $sp, 208 # 8-byte Folded Reload + fld.d $fa0, $sp, 224 # 8-byte Folded Reload fst.d $fa0, $s3, 8 - fld.d $fa0, $sp, 200 # 8-byte Folded Reload + fld.d $fa0, $sp, 216 # 8-byte Folded Reload fst.d $fa0, $s3, 16 st.d $zero, $s3, 24 .Ltmp1085: # EH_LABEL @@ -15138,7 +15135,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE jirl $ra, $ra, 0 .Ltmp1086: # EH_LABEL # %bb.160: - ld.d $fp, $sp, 184 # 8-byte Folded Reload + ld.d $fp, $sp, 200 # 8-byte Folded Reload b .LBB16_175 .LBB16_161: # %vector.memcheck451 sub.d $a5, $s8, $a0 @@ -15182,7 +15179,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE addi.d $a1, $a0, %pc_lo12(.L.str.109) ori $a2, $zero, 44 move $a0, $s2 - ld.d $fp, $sp, 184 # 8-byte Folded Reload + ld.d $fp, $sp, 200 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l) jirl $ra, $ra, 0 .Ltmp1068: # EH_LABEL @@ -15228,9 +15225,9 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE .Ltmp1076: # EH_LABEL # %bb.174: # %_ZNSolsEPFRSoS_E.exit248 fst.d $fs7, $s3, 0 - fld.d $fa0, $sp, 208 # 8-byte Folded Reload + fld.d $fa0, $sp, 224 # 8-byte Folded Reload fst.d $fa0, $s3, 8 - fld.d $fa0, $sp, 200 # 8-byte Folded Reload + fld.d $fa0, $sp, 216 # 8-byte Folded Reload fst.d $fa0, $s3, 16 .Ltmp1077: # EH_LABEL pcaddu18i $ra, %call36(_ZN6miniFE7mytimerEv) @@ -15261,49 +15258,49 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE ld.d $a0, $sp, 24 # 8-byte Folded Reload sub.d $a1, $a0, $fp move $a0, $fp - fld.d $fs7, $sp, 216 # 8-byte Folded Reload - fld.d $fs6, $sp, 224 # 8-byte Folded Reload - fld.d $fs5, $sp, 232 # 8-byte Folded Reload - fld.d $fs4, $sp, 240 # 8-byte Folded Reload - fld.d $fs3, $sp, 248 # 8-byte Folded Reload - fld.d $fs2, $sp, 256 # 8-byte Folded Reload - fld.d $fs1, $sp, 264 # 8-byte Folded Reload - fld.d $fs0, $sp, 272 # 8-byte Folded Reload - ld.d $s8, $sp, 280 # 8-byte Folded Reload - ld.d $s7, $sp, 288 # 8-byte Folded Reload - ld.d $s6, $sp, 296 # 8-byte Folded Reload - ld.d $s5, $sp, 304 # 8-byte Folded Reload - ld.d $s4, $sp, 312 # 8-byte Folded Reload - ld.d $s3, $sp, 320 # 8-byte Folded Reload - ld.d $s2, $sp, 328 # 8-byte Folded Reload - ld.d $s1, $sp, 336 # 8-byte Folded Reload - ld.d $s0, $sp, 344 # 8-byte Folded Reload - ld.d $fp, $sp, 352 # 8-byte Folded Reload - ld.d $ra, $sp, 360 # 8-byte Folded Reload - addi.d $sp, $sp, 368 + fld.d $fs7, $sp, 232 # 8-byte Folded Reload + fld.d $fs6, $sp, 240 # 8-byte Folded Reload + fld.d $fs5, $sp, 248 # 8-byte Folded Reload + fld.d $fs4, $sp, 256 # 8-byte Folded Reload + fld.d $fs3, $sp, 264 # 8-byte Folded Reload + fld.d $fs2, $sp, 272 # 8-byte Folded Reload + fld.d $fs1, $sp, 280 # 8-byte Folded Reload + fld.d $fs0, $sp, 288 # 8-byte Folded Reload + ld.d $s8, $sp, 296 # 8-byte Folded Reload + ld.d $s7, $sp, 304 # 8-byte Folded Reload + ld.d $s6, $sp, 312 # 8-byte Folded Reload + ld.d $s5, $sp, 320 # 8-byte Folded Reload + ld.d $s4, $sp, 328 # 8-byte Folded Reload + ld.d $s3, $sp, 336 # 8-byte Folded Reload + ld.d $s2, $sp, 344 # 8-byte Folded Reload + ld.d $s1, $sp, 352 # 8-byte Folded Reload + ld.d $s0, $sp, 360 # 8-byte Folded Reload + ld.d $fp, $sp, 368 # 8-byte Folded Reload + ld.d $ra, $sp, 376 # 8-byte Folded Reload + addi.d $sp, $sp, 384 pcaddu18i $t8, %call36(_ZdlPvm) jr $t8 .LBB16_181: # %_ZN6miniFE6VectorIdiiED2Ev.exit268 - fld.d $fs7, $sp, 216 # 8-byte Folded Reload - fld.d $fs6, $sp, 224 # 8-byte Folded Reload - fld.d $fs5, $sp, 232 # 8-byte Folded Reload - fld.d $fs4, $sp, 240 # 8-byte Folded Reload - fld.d $fs3, $sp, 248 # 8-byte Folded Reload - fld.d $fs2, $sp, 256 # 8-byte Folded Reload - fld.d $fs1, $sp, 264 # 8-byte Folded Reload - fld.d $fs0, $sp, 272 # 8-byte Folded Reload - ld.d $s8, $sp, 280 # 8-byte Folded Reload - ld.d $s7, $sp, 288 # 8-byte Folded Reload - ld.d $s6, $sp, 296 # 8-byte Folded Reload - ld.d $s5, $sp, 304 # 8-byte Folded Reload - ld.d $s4, $sp, 312 # 8-byte Folded Reload - ld.d $s3, $sp, 320 # 8-byte Folded Reload - ld.d $s2, $sp, 328 # 8-byte Folded Reload - ld.d $s1, $sp, 336 # 8-byte Folded Reload - ld.d $s0, $sp, 344 # 8-byte Folded Reload - ld.d $fp, $sp, 352 # 8-byte Folded Reload - ld.d $ra, $sp, 360 # 8-byte Folded Reload - addi.d $sp, $sp, 368 + fld.d $fs7, $sp, 232 # 8-byte Folded Reload + fld.d $fs6, $sp, 240 # 8-byte Folded Reload + fld.d $fs5, $sp, 248 # 8-byte Folded Reload + fld.d $fs4, $sp, 256 # 8-byte Folded Reload + fld.d $fs3, $sp, 264 # 8-byte Folded Reload + fld.d $fs2, $sp, 272 # 8-byte Folded Reload + fld.d $fs1, $sp, 280 # 8-byte Folded Reload + fld.d $fs0, $sp, 288 # 8-byte Folded Reload + ld.d $s8, $sp, 296 # 8-byte Folded Reload + ld.d $s7, $sp, 304 # 8-byte Folded Reload + ld.d $s6, $sp, 312 # 8-byte Folded Reload + ld.d $s5, $sp, 320 # 8-byte Folded Reload + ld.d $s4, $sp, 328 # 8-byte Folded Reload + ld.d $s3, $sp, 336 # 8-byte Folded Reload + ld.d $s2, $sp, 344 # 8-byte Folded Reload + ld.d $s1, $sp, 352 # 8-byte Folded Reload + ld.d $s0, $sp, 360 # 8-byte Folded Reload + ld.d $fp, $sp, 368 # 8-byte Folded Reload + ld.d $ra, $sp, 376 # 8-byte Folded Reload + addi.d $sp, $sp, 384 ret .LBB16_182: .Ltmp1082: # EH_LABEL @@ -15350,7 +15347,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE .LBB16_194: .Ltmp1093: # EH_LABEL move $s2, $a0 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload beqz $a0, .LBB16_203 b .LBB16_206 .LBB16_195: # %.loopexit.split-lp @@ -15373,7 +15370,7 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE # %bb.201: # %_ZN6miniFE6VectorIdiiED2Ev.exit270 bnez $s0, .LBB16_205 .LBB16_202: # %_ZN6miniFE6VectorIdiiED2Ev.exit272 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload bnez $a0, .LBB16_206 .LBB16_203: # %_ZN6miniFE6VectorIdiiED2Ev.exit274 move $a0, $s2 @@ -15392,10 +15389,10 @@ _ZN6miniFE8cg_solveINS_9CSRMatrixIdiiEENS_6VectorIdiiEENS_10matvec_stdIS2_S4_EEE move $a0, $s0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload beqz $a0, .LBB16_203 .LBB16_206: - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload ld.d $a1, $sp, 24 # 8-byte Folded Reload sub.d $a1, $a1, $a0 pcaddu18i $ra, %call36(_ZdlPvm) @@ -15510,18 +15507,8 @@ GCC_except_table16: .Lcst_end11: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT_17GlobalOrdinalTypeEEERKS4_db -.LCPI17_0: - .dword 0x3fa999999999999a # double 0.050000000000000003 -.LCPI17_1: - .dword 0x400921fb54442d18 # double 3.1415926535897931 -.LCPI17_2: - .dword 0x4023bd3cc9be45de # double 9.869604401089358 -.LCPI17_3: - .dword 0x3ff9f02f6222c720 # double 1.6211389382774044 .section .text._ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT_17GlobalOrdinalTypeEEERKS4_db,"axG",@progbits,_ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT_17GlobalOrdinalTypeEEERKS4_db,comdat - .weak _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT_17GlobalOrdinalTypeEEERKS4_db + .weak _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT_17GlobalOrdinalTypeEEERKS4_db # -- Begin function _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT_17GlobalOrdinalTypeEEERKS4_db .p2align 5 .type _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT_17GlobalOrdinalTypeEEERKS4_db,@function _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT_17GlobalOrdinalTypeEEERKS4_db: # @_ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT_17GlobalOrdinalTypeEEERKS4_db @@ -15581,7 +15568,7 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT sltui $a2, $a2, 1 and $a2, $a4, $a2 add.w $a2, $a3, $a2 - movgr2fr.d $fs6, $zero + movgr2fr.d $fs7, $zero st.d $a2, $sp, 40 # 8-byte Folded Spill bge $a5, $a2, .LBB17_46 # %bb.1: # %.preheader266.lr.ph @@ -15616,7 +15603,7 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT move $s2, $zero move $s4, $zero move $t2, $zero - move $fp, $zero + move $s0, $zero move $s6, $zero move $t1, $zero move $s3, $zero @@ -15655,7 +15642,12 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT add.d $a0, $a1, $a0 st.d $t0, $sp, 168 # 8-byte Folded Spill mul.d $a1, $a0, $t0 - vldi $vr2, -800 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fs4, $a0 + vldi $vr1, -800 st.d $a7, $sp, 120 # 8-byte Folded Spill b .LBB17_5 .p2align 4, , 16 @@ -15709,9 +15701,9 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT b .LBB17_10 .p2align 4, , 16 .LBB17_8: # in Loop: Header=BB17_10 Depth=3 - fst.d $fs4, $s7, 0 + fst.d $fs5, $s7, 0 addi.d $t2, $s7, 8 - move $fp, $s4 + move $s0, $s4 move $s4, $a0 .LBB17_9: # %_ZNSt6vectorIdSaIdEE9push_backERKd.exit108.us.us.us # in Loop: Header=BB17_10 Depth=3 @@ -15744,18 +15736,18 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT div.w $a1, $a0, $a2 movgr2fr.w $fa0, $a1 ffint.d.w $fa0, $fa0 - fdiv.d $fs4, $fa0, $fs0 + fdiv.d $fs5, $fa0, $fs0 mul.d $a1, $a1, $a2 sub.w $a1, $a0, $a1 ld.d $a2, $sp, 168 # 8-byte Folded Reload div.w $a1, $a1, $a2 movgr2fr.w $fa0, $a1 ffint.d.w $fa0, $fa0 - fdiv.d $fs5, $fa0, $fs2 + fdiv.d $fs1, $fa0, $fs2 mod.w $a0, $a0, $a2 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 - fdiv.d $fs1, $fa0, $fs3 + fdiv.d $fs6, $fa0, $fs3 ld.d $a0, $sp, 136 # 8-byte Folded Reload beqz $a0, .LBB17_15 .LBB17_11: # %.critedge.us.us.us @@ -15767,12 +15759,12 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT st.d $s6, $sp, 192 # 8-byte Folded Spill beq $t2, $s4, .LBB17_25 .LBB17_13: # in Loop: Header=BB17_10 Depth=3 - fst.d $fs1, $t2, 0 - move $s5, $fp + fst.d $fs6, $t2, 0 + move $s5, $s0 addi.d $a0, $t2, 8 beq $a0, $s4, .LBB17_32 .LBB17_14: # in Loop: Header=BB17_10 Depth=3 - fst.d $fs5, $t2, 8 + fst.d $fs1, $t2, 8 addi.d $s7, $t2, 16 move $a0, $s4 move $s4, $s5 @@ -15781,21 +15773,19 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT b .LBB17_39 .p2align 4, , 16 .LBB17_15: # in Loop: Header=BB17_10 Depth=3 - pcalau12i $a0, %pc_hi20(.LCPI17_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI17_0) - fadd.d $fa1, $fs1, $fa2 - fabs.d $fa1, $fa1 - fcmp.cule.d $fcc0, $fa0, $fa1 + fadd.d $fa0, $fs6, $fa1 + fabs.d $fa0, $fa0 + fcmp.cule.d $fcc0, $fs4, $fa0 bcnez $fcc0, .LBB17_9 # %bb.16: # in Loop: Header=BB17_10 Depth=3 - fadd.d $fa1, $fs5, $fa2 - fabs.d $fa1, $fa1 - fcmp.cule.d $fcc0, $fa0, $fa1 + fadd.d $fa0, $fs1, $fa1 + fabs.d $fa0, $fa0 + fcmp.cule.d $fcc0, $fs4, $fa0 bcnez $fcc0, .LBB17_9 # %bb.17: # in Loop: Header=BB17_10 Depth=3 - fadd.d $fa1, $fs4, $fa2 - fabs.d $fa1, $fa1 - fcmp.cule.d $fcc0, $fa0, $fa1 + fadd.d $fa0, $fs5, $fa1 + fabs.d $fa0, $fa0 + fcmp.cule.d $fcc0, $fs4, $fa0 bcnez $fcc0, .LBB17_9 b .LBB17_11 .p2align 4, , 16 @@ -15808,7 +15798,7 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT beq $s4, $a0, .LBB17_121 # %bb.19: # %_ZNSt12_Vector_baseIiSaIiEE11_M_allocateEm.exit.i.i.us.us.us # in Loop: Header=BB17_10 Depth=3 - move $s0, $t2 + move $fp, $t2 srai.d $a0, $s4, 2 ori $a2, $zero, 1 sltu $a1, $a2, $a0 @@ -15857,13 +15847,13 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT alsl.d $s6, $s5, $s6, 2 move $s4, $s7 ld.d $a7, $sp, 120 # 8-byte Folded Reload - vldi $vr2, -800 - move $t2, $s0 + vldi $vr1, -800 + move $t2, $fp st.d $s6, $sp, 192 # 8-byte Folded Spill bne $t2, $s4, .LBB17_13 .LBB17_25: # in Loop: Header=BB17_10 Depth=3 move $s7, $s4 - sub.d $s4, $s4, $fp + sub.d $s4, $s4, $s0 addi.w $a0, $zero, -8 lu52i.d $a0, $a0, 2047 beq $s4, $a0, .LBB17_123 @@ -15888,27 +15878,27 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT or $s6, $a1, $a0 slli.d $a0, $s6, 3 .Ltmp1096: # EH_LABEL - move $s0, $s3 + move $fp, $s3 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 .Ltmp1097: # EH_LABEL # %bb.27: # %.noexc90.us.us.us # in Loop: Header=BB17_10 Depth=3 move $s5, $a0 - fstx.d $fs1, $a0, $s4 + fstx.d $fs6, $a0, $s4 blez $s4, .LBB17_29 # %bb.28: # in Loop: Header=BB17_10 Depth=3 move $a0, $s5 - move $a1, $fp + move $a1, $s0 move $a2, $s4 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 .LBB17_29: # %_ZNSt6vectorIdSaIdEE11_S_relocateEPdS2_S2_RS0_.exit.i.i.us.us.us # in Loop: Header=BB17_10 Depth=3 - move $s3, $s0 - beqz $fp, .LBB17_31 + move $s3, $fp + beqz $s0, .LBB17_31 # %bb.30: # in Loop: Header=BB17_10 Depth=3 - move $a0, $fp + move $a0, $s0 move $a1, $s4 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -15918,8 +15908,8 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT alsl.d $s4, $s6, $s5, 3 ld.d $s6, $sp, 192 # 8-byte Folded Reload ld.d $a7, $sp, 120 # 8-byte Folded Reload - vldi $vr2, -800 ld.d $t1, $sp, 112 # 8-byte Folded Reload + vldi $vr1, -800 ld.d $s2, $sp, 184 # 8-byte Folded Reload addi.d $a0, $t2, 8 bne $a0, $s4, .LBB17_14 @@ -15947,18 +15937,18 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT or $a0, $a0, $a3 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 - or $s0, $a1, $a0 - slli.d $a0, $s0, 3 + or $fp, $a1, $a0 + slli.d $a0, $fp, 3 .Ltmp1098: # EH_LABEL move $s2, $s3 - move $fp, $s5 + move $s0, $s5 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 .Ltmp1099: # EH_LABEL # %bb.34: # %.noexc98.us.us.us # in Loop: Header=BB17_10 Depth=3 move $s4, $a0 - fstx.d $fs5, $a0, $s6 + fstx.d $fs1, $a0, $s6 blez $s6, .LBB17_36 # %bb.35: # in Loop: Header=BB17_10 Depth=3 move $a0, $s4 @@ -15979,11 +15969,11 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT # in Loop: Header=BB17_10 Depth=3 add.d $a0, $s4, $s6 addi.d $s7, $a0, 8 - alsl.d $a0, $s0, $s4, 3 + alsl.d $a0, $fp, $s4, 3 ld.d $s6, $sp, 192 # 8-byte Folded Reload ld.d $a7, $sp, 120 # 8-byte Folded Reload - vldi $vr2, -800 ld.d $t1, $sp, 112 # 8-byte Folded Reload + vldi $vr1, -800 ld.d $s2, $sp, 184 # 8-byte Folded Reload addi.d $t1, $t1, 4 bne $s7, $a0, .LBB17_8 @@ -15994,7 +15984,7 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT beq $s5, $a0, .LBB17_119 # %bb.40: # %_ZNSt12_Vector_baseIdSaIdEE11_M_allocateEm.exit.i.i101.us.us.us # in Loop: Header=BB17_10 Depth=3 - move $s0, $t1 + move $fp, $t1 srai.d $a0, $s5, 3 ori $a3, $zero, 1 sltu $a2, $a3, $a0 @@ -16014,14 +16004,14 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT slli.d $a0, $s1, 3 .Ltmp1100: # EH_LABEL move $s2, $s3 - move $fp, $s4 + move $s0, $s4 pcaddu18i $ra, %call36(_Znwm) jirl $ra, $ra, 0 .Ltmp1101: # EH_LABEL # %bb.41: # %.noexc107.us.us.us # in Loop: Header=BB17_10 Depth=3 move $s6, $a0 - fstx.d $fs4, $a0, $s5 + fstx.d $fs5, $a0, $s5 blez $s5, .LBB17_43 # %bb.42: # in Loop: Header=BB17_10 Depth=3 move $a0, $s6 @@ -16043,24 +16033,24 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT add.d $a0, $s6, $s5 addi.d $t2, $a0, 8 alsl.d $s4, $s1, $s6, 3 - move $fp, $s6 + move $s0, $s6 ld.d $s6, $sp, 192 # 8-byte Folded Reload ld.d $a7, $sp, 120 # 8-byte Folded Reload - vldi $vr2, -800 - move $t1, $s0 + move $t1, $fp + vldi $vr1, -800 ld.d $s2, $sp, 184 # 8-byte Folded Reload b .LBB17_9 .LBB17_46: move $s3, $zero move $s6, $zero - move $fp, $zero + move $s0, $zero move $s4, $zero # implicit-def: $f28_64 # implicit-def: $f27_64 # implicit-def: $f25_64 - # implicit-def: $f31_64 + # implicit-def: $f30_64 # implicit-def: $f29_64 - fcmp.cule.d $fcc0, $fs6, $fs0 + fcmp.cule.d $fcc0, $fs7, $fs0 bceqz $fcc0, .LBB17_76 .LBB17_47: .Ltmp1103: # EH_LABEL @@ -16118,63 +16108,72 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT # implicit-def: $f28_64 # implicit-def: $f27_64 # implicit-def: $f25_64 - # implicit-def: $f31_64 + # implicit-def: $f30_64 # implicit-def: $f29_64 move $s6, $s3 - move $fp, $s3 + move $s0, $s3 move $s4, $s3 - fcmp.cule.d $fcc0, $fs6, $fs0 + fcmp.cule.d $fcc0, $fs7, $fs0 bceqz $fcc0, .LBB17_76 b .LBB17_47 .LBB17_55: # %.preheader beq $t1, $s3, .LBB17_116 # %bb.56: # %.lr.ph - st.d $fp, $sp, 160 # 8-byte Folded Spill + st.d $s0, $sp, 160 # 8-byte Folded Spill st.d $s4, $sp, 80 # 8-byte Folded Spill st.d $s6, $sp, 192 # 8-byte Folded Spill - move $s0, $zero + move $fp, $zero st.d $s3, $sp, 152 # 8-byte Folded Spill sub.d $a0, $t1, $s3 - srai.d $fp, $a0, 2 + srai.d $s0, $a0, 2 ld.d $a0, $sp, 16 # 8-byte Folded Reload ld.d $s1, $a0, 8 movgr2fr.d $fa3, $zero vldi $vr4, -912 - pcalau12i $a0, %pc_hi20(.LCPI17_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI17_1) + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa0, $a0 fst.d $fa0, $sp, 184 # 8-byte Folded Spill addi.w $s2, $zero, -7 lu32i.d $s2, 0 - pcalau12i $a0, %pc_hi20(.LCPI17_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI17_2) + lu12i.w $a0, -222236 + ori $a0, $a0, 1502 + lu32i.d $a0, 245052 + lu52i.d $a0, $a0, 1026 + movgr2fr.d $fa0, $a0 fst.d $fa0, $sp, 176 # 8-byte Folded Spill addi.w $s3, $zero, -3 lu32i.d $s3, 0 ori $s4, $zero, 603 ori $s5, $zero, 300 - pcalau12i $s6, %pc_hi20(.LCPI17_3) - fmov.d $fs6, $fa3 + lu12i.w $a0, 401964 + ori $a0, $a0, 1824 + lu32i.d $a0, -397265 + lu52i.d $s6, $a0, 1023 + fmov.d $fs7, $fa3 # implicit-def: $f28_64 # implicit-def: $f27_64 # implicit-def: $f2_64 - # implicit-def: $f31_64 + # implicit-def: $f30_64 # implicit-def: $f29_64 fst.d $fa3, $sp, 88 # 8-byte Folded Spill b .LBB17_58 .p2align 4, , 16 .LBB17_57: # in Loop: Header=BB17_58 Depth=1 - addi.d $s0, $s0, 1 - beq $s0, $fp, .LBB17_74 + addi.d $fp, $fp, 1 + beq $fp, $s0, .LBB17_74 .LBB17_58: # =>This Loop Header: Depth=1 # Child Loop BB17_68 Depth 2 # Child Loop BB17_69 Depth 3 - slli.d $a0, $s0, 2 + slli.d $a0, $fp, 2 ld.d $a1, $sp, 152 # 8-byte Folded Reload ldx.w $a0, $a1, $a0 slli.d $a0, $a0, 3 fldx.d $fa5, $s1, $a0 - slli.d $a0, $s0, 4 - alsl.d $a0, $s0, $a0, 3 + slli.d $a0, $fp, 4 + alsl.d $a0, $fp, $a0, 3 ld.d $a2, $sp, 160 # 8-byte Folded Reload add.d $a1, $a2, $a0 fldx.d $fs2, $a2, $a0 @@ -16187,15 +16186,15 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT .LBB17_59: # in Loop: Header=BB17_58 Depth=1 fsub.d $fa1, $fa0, $fa5 fabs.d $fa1, $fa1 - fcmp.cule.d $fcc0, $fa1, $fs6 + fcmp.cule.d $fcc0, $fa1, $fs7 bcnez $fcc0, .LBB17_57 # %bb.60: # in Loop: Header=BB17_58 Depth=1 fmov.d $fs5, $fs1 - fld.d $fs7, $sp, 168 # 8-byte Folded Reload + fld.d $fs6, $sp, 168 # 8-byte Folded Reload fmov.d $fa2, $fs2 fmov.d $fs3, $fa0 fmov.d $fs4, $fa5 - fmov.d $fs6, $fa1 + fmov.d $fs7, $fa1 b .LBB17_57 .p2align 4, , 16 .LBB17_61: # in Loop: Header=BB17_58 Depth=1 @@ -16224,11 +16223,11 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT # in Loop: Header=BB17_58 Depth=1 fst.d $fa5, $sp, 96 # 8-byte Folded Spill fst.d $fa2, $sp, 104 # 8-byte Folded Spill - fst.d $fs7, $sp, 112 # 8-byte Folded Spill + fst.d $fs6, $sp, 112 # 8-byte Folded Spill fst.d $fs5, $sp, 120 # 8-byte Folded Spill fst.d $fs4, $sp, 128 # 8-byte Folded Spill fst.d $fs3, $sp, 136 # 8-byte Folded Spill - fst.d $fs6, $sp, 144 # 8-byte Folded Spill + fst.d $fs7, $sp, 144 # 8-byte Folded Spill move $a0, $zero fmov.d $fs5, $fa3 b .LBB17_68 @@ -16313,13 +16312,13 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT b .LBB17_70 .LBB17_73: # %_ZN6miniFE4solnEdddii.exit # in Loop: Header=BB17_58 Depth=1 - fld.d $fa0, $s6, %pc_lo12(.LCPI17_3) + movgr2fr.d $fa0, $s6 fmul.d $fa0, $fs5, $fa0 - fld.d $fs6, $sp, 144 # 8-byte Folded Reload + fld.d $fs7, $sp, 144 # 8-byte Folded Reload fld.d $fs3, $sp, 136 # 8-byte Folded Reload fld.d $fs4, $sp, 128 # 8-byte Folded Reload fld.d $fs5, $sp, 120 # 8-byte Folded Reload - fld.d $fs7, $sp, 112 # 8-byte Folded Reload + fld.d $fs6, $sp, 112 # 8-byte Folded Reload fld.d $fa2, $sp, 104 # 8-byte Folded Reload fld.d $fa3, $sp, 88 # 8-byte Folded Reload vldi $vr4, -912 @@ -16327,15 +16326,15 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT b .LBB17_59 .LBB17_74: # %._crit_edge fmov.d $fs1, $fa2 - fcmp.cor.d $fcc0, $fs6, $fs6 + fcmp.cor.d $fcc0, $fs7, $fs7 fld.d $fs0, $sp, 24 # 8-byte Folded Reload ld.d $s6, $sp, 192 # 8-byte Folded Reload ld.d $s4, $sp, 80 # 8-byte Folded Reload - ld.d $fp, $sp, 160 # 8-byte Folded Reload + ld.d $s0, $sp, 160 # 8-byte Folded Reload ld.d $s3, $sp, 152 # 8-byte Folded Reload bceqz $fcc0, .LBB17_111 # %bb.75: # %._crit_edge.thread - fcmp.cule.d $fcc0, $fs6, $fs0 + fcmp.cule.d $fcc0, $fs7, $fs0 bcnez $fcc0, .LBB17_47 .LBB17_76: .Ltmp1113: # EH_LABEL @@ -16351,7 +16350,7 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT .Ltmp1115: # EH_LABEL pcalau12i $a0, %got_pc_hi20(_ZSt4cout) ld.d $a0, $a0, %got_pc_lo12(_ZSt4cout) - fmov.d $fa0, $fs6 + fmov.d $fa0, $fs7 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp1116: # EH_LABEL @@ -16434,7 +16433,7 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT # %bb.89: # %_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc.exit119 .Ltmp1133: # EH_LABEL move $a0, $s1 - fmov.d $fa0, $fs7 + fmov.d $fa0, $fs6 pcaddu18i $ra, %call36(_ZNSo9_M_insertIdEERSoT_) jirl $ra, $ra, 0 .Ltmp1134: # EH_LABEL @@ -16578,10 +16577,10 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT jirl $ra, $ra, 0 .Ltmp1164: # EH_LABEL .LBB17_111: # %_ZNSolsEPFRSoS_E.exit139 - beqz $fp, .LBB17_113 + beqz $s0, .LBB17_113 # %bb.112: - sub.d $a1, $s4, $fp - move $a0, $fp + sub.d $a1, $s4, $s0 + move $a0, $s0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB17_113: # %_ZNSt6vectorIdSaIdEED2Ev.exit @@ -16592,7 +16591,7 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 .LBB17_115: # %_ZNSt6vectorIiSaIiEED2Ev.exit - fcmp.clt.d $fcc0, $fs0, $fs6 + fcmp.clt.d $fcc0, $fs0, $fs7 movcf2gr $a0, $fcc0 fld.d $fs7, $sp, 200 # 8-byte Folded Reload fld.d $fs6, $sp, 208 # 8-byte Folded Reload @@ -16619,17 +16618,17 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT # implicit-def: $f28_64 # implicit-def: $f27_64 # implicit-def: $f25_64 - # implicit-def: $f31_64 + # implicit-def: $f30_64 # implicit-def: $f29_64 fld.d $fs0, $sp, 24 # 8-byte Folded Reload - fcmp.cule.d $fcc0, $fs6, $fs0 + fcmp.cule.d $fcc0, $fs7, $fs0 bceqz $fcc0, .LBB17_76 b .LBB17_47 .LBB17_117: # %.split421.us .Ltmp1170: # EH_LABEL pcalau12i $a0, %pc_hi20(.L.str.71) addi.d $a0, $a0, %pc_lo12(.L.str.71) - move $fp, $s5 + move $s0, $s5 pcaddu18i $ra, %call36(_ZSt20__throw_length_errorPKc) jirl $ra, $ra, 0 .Ltmp1171: # EH_LABEL @@ -16638,7 +16637,7 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT .Ltmp1168: # EH_LABEL pcalau12i $a0, %pc_hi20(.L.str.71) addi.d $a0, $a0, %pc_lo12(.L.str.71) - move $fp, $s4 + move $s0, $s4 move $s4, $s7 pcaddu18i $ra, %call36(_ZSt20__throw_length_errorPKc) jirl $ra, $ra, 0 @@ -16682,8 +16681,8 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT .LBB17_130: move $s1, $a0 .LBB17_131: - ld.d $s7, $sp, 192 # 8-byte Folded Reload - bnez $fp, .LBB17_134 + ld.d $fp, $sp, 192 # 8-byte Folded Reload + bnez $s0, .LBB17_134 # %bb.132: # %_ZNSt6vectorIdSaIdEED2Ev.exit150 bnez $s3, .LBB17_135 .LBB17_133: # %_ZNSt6vectorIiSaIiEED2Ev.exit152 @@ -16691,13 +16690,13 @@ _ZN6miniFE15verify_solutionINS_6VectorIdiiEEEEiRKNS_23simple_mesh_descriptionINT pcaddu18i $ra, %call36(_Unwind_Resume) jirl $ra, $ra, 0 .LBB17_134: # %.thread - sub.d $a1, $s4, $fp - move $a0, $fp + sub.d $a1, $s4, $s0 + move $a0, $s0 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 beqz $s3, .LBB17_133 .LBB17_135: - sub.d $a1, $s7, $s3 + sub.d $a1, $fp, $s3 move $a0, $s3 pcaddu18i $ra, %call36(_ZdlPvm) jirl $ra, $ra, 0 @@ -21027,12 +21026,6 @@ _ZN6miniFE4Hex820diffusionMatrix_symmIdEEvPKT_S4_PS2_: # @_ZN6miniFE4Hex820diffu .LCPI40_0: .dword 0xbfe279a74576233f # double -0.57735026899999997 .dword 0x3fe279a74576233f # double 0.57735026899999997 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI40_1: - .dword 0x3ff93cd3a2bb11a0 # double 1.5773502690000001 -.LCPI40_2: - .dword 0x3fdb0cb17513b982 # double 0.42264973100000003 .section .text._ZN6miniFE4Hex812sourceVectorIdEEvPKT_S4_PS2_,"axG",@progbits,_ZN6miniFE4Hex812sourceVectorIdEEvPKT_S4_PS2_,comdat .weak _ZN6miniFE4Hex812sourceVectorIdEEvPKT_S4_PS2_ .p2align 5 @@ -21103,18 +21096,24 @@ _ZN6miniFE4Hex812sourceVectorIdEEvPKT_S4_PS2_: # @_ZN6miniFE4Hex812sourceVectorI fst.d $fa0, $sp, 32 # 8-byte Folded Spill fadd.d $fa0, $fa1, $fa2 fst.d $fa0, $sp, 24 # 8-byte Folded Spill - addi.d $a0, $sp, 160 - pcalau12i $a1, %pc_hi20(.LCPI40_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI40_1) + addi.d $a1, $sp, 160 + addi.d $a0, $sp, 144 + ori $s4, $zero, 1 + lu12i.w $a2, -382031 + ori $a2, $a2, 416 + lu32i.d $a2, -443181 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa0, $a2 fst.d $fa0, $sp, 16 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI40_2) - fld.d $fa0, $a1, %pc_lo12(.LCPI40_2) + lu12i.w $a2, 479547 + ori $a2, $a2, 2434 + lu32i.d $a2, -324431 + lu52i.d $a2, $a2, 1021 + movgr2fr.d $fa0, $a2 fst.d $fa0, $sp, 8 # 8-byte Folded Spill - addi.d $a1, $sp, 144 - ori $s4, $zero, 1 .p2align 4, , 16 .LBB40_1: # =>This Inner Loop Header: Depth=1 - fld.d $fa0, $a0, 0 + fld.d $fa0, $a1, 0 vldi $vr2, -912 fadd.d $fa1, $fa0, $fa2 vldi $vr3, -960 @@ -21123,7 +21122,7 @@ _ZN6miniFE4Hex812sourceVectorIdEEvPKT_S4_PS2_: # @_ZN6miniFE4Hex812sourceVectorI fsub.d $fa0, $fa2, $fa0 fmul.d $fa5, $fa0, $fa3 fst.d $fa5, $sp, 112 # 8-byte Folded Spill - fld.d $fs6, $a1, 0 + fld.d $fs6, $a0, 0 fld.d $fa1, $sp, 56 # 8-byte Folded Reload fmul.d $fa0, $fa5, $fa1 fmul.d $fa1, $fa4, $fa1 @@ -21345,8 +21344,8 @@ _ZN6miniFE4Hex812sourceVectorIdEEvPKT_S4_PS2_: # @_ZN6miniFE4Hex812sourceVectorI fst.d $fa0, $fp, 56 andi $a2, $s4, 1 addi.d $s0, $s0, 768 - move $a1, $s2 - move $a0, $s3 + move $a0, $s2 + move $a1, $s3 move $s4, $zero bnez $a2, .LBB40_1 # %bb.2: diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/mytimer.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/mytimer.s index 5fe77d63..f79d4971 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/mytimer.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/mytimer.s @@ -1,10 +1,6 @@ .file "mytimer.cpp" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN6miniFE7mytimerEv -.LCPI0_0: - .dword 0x412e848000000000 # double 1.0E+6 .text - .globl _ZN6miniFE7mytimerEv + .globl _ZN6miniFE7mytimerEv # -- Begin function _ZN6miniFE7mytimerEv .p2align 5 .type _ZN6miniFE7mytimerEv,@function _ZN6miniFE7mytimerEv: # @_ZN6miniFE7mytimerEv @@ -16,14 +12,16 @@ _ZN6miniFE7mytimerEv: # @_ZN6miniFE7mytimerEv pcaddu18i $ra, %call36(gettimeofday) jirl $ra, $ra, 0 ld.d $a0, $sp, 8 + ld.d $a1, $sp, 16 movgr2fr.d $fa0, $a0 - ld.d $a0, $sp, 16 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_0) ffint.d.l $fa0, $fa0 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 movgr2fr.d $fa2, $a0 - ffint.d.l $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + fdiv.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa1, $fa0 ld.d $ra, $sp, 24 # 8-byte Folded Reload addi.d $sp, $sp, 32 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/CoMD.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/CoMD.s index 92d4bed0..01ba2b95 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/CoMD.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/CoMD.s @@ -1,12 +1,6 @@ .file "CoMD.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI0_0: - .word 0x3a800000 # float 9.765625E-4 -.LCPI0_1: - .word 0x45b00000 # float 5632 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -470,41 +464,41 @@ main: # @main ld.w $a1, $a0, 0 ori $a2, $zero, 88 mul.d $a1, $a1, $a2 - pcalau12i $a3, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a3, %pc_lo12(.LCPI0_0) + movgr2fr.w $fa0, $a1 + ffint.s.w $fa0, $fa0 + lu12i.w $a1, 239616 movgr2fr.w $fa1, $a1 - ffint.s.w $fa1, $fa1 ld.w $a0, $a0, 4 - fmul.s $fa1, $fa1, $fa0 - fmul.s $fs0, $fa1, $fa0 + fmul.s $fa0, $fa0, $fa1 + fmul.s $fs0, $fa0, $fa1 ld.d $a1, $fp, 24 mul.d $a0, $a0, $a2 - movgr2fr.w $fa1, $a0 - ffint.s.w $fa1, $fa1 + movgr2fr.w $fa0, $a0 + ffint.s.w $fa0, $fa0 ld.w $a0, $a1, 0 ld.w $a2, $a1, 4 ld.w $a1, $a1, 8 - fmul.s $fa1, $fa1, $fa0 - fmul.s $fs1, $fa1, $fa0 + fmul.s $fa0, $fa0, $fa1 + fmul.s $fs1, $fa0, $fa1 mul.d $a3, $a2, $a0 mul.d $a3, $a3, $a1 addi.d $a0, $a0, 2 addi.d $a2, $a2, 2 mul.d $a0, $a2, $a0 addi.d $a1, $a1, 2 - pcalau12i $a2, %pc_hi20(.LCPI0_1) - fld.s $fa1, $a2, %pc_lo12(.LCPI0_1) mul.d $a0, $a0, $a1 - movgr2fr.w $fa2, $a3 - ffint.s.w $fa2, $fa2 - fmul.s $fa2, $fa2, $fa1 - fmul.s $fa2, $fa2, $fa0 - fmul.s $fs2, $fa2, $fa0 - movgr2fr.w $fa2, $a0 - ffint.s.w $fa2, $fa2 - fmul.s $fa1, $fa2, $fa1 - fmul.s $fa1, $fa1, $fa0 - fmul.s $fs3, $fa1, $fa0 + movgr2fr.w $fa0, $a3 + ffint.s.w $fa0, $fa0 + lu12i.w $a1, 285440 + movgr2fr.w $fa2, $a1 + fmul.s $fa0, $fa0, $fa2 + fmul.s $fa0, $fa0, $fa1 + fmul.s $fs2, $fa0, $fa1 + movgr2fr.w $fa0, $a0 + ffint.s.w $fa0, $fa0 + fmul.s $fa0, $fa0, $fa2 + fmul.s $fa0, $fa0, $fa1 + fmul.s $fs3, $fa0, $fa1 move $a0, $s0 pcaddu18i $ra, %call36(printSeparator) jirl $ra, $ra, 0 @@ -1025,12 +1019,7 @@ main: # @main .Lfunc_end0: .size main, .Lfunc_end0-main # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function printThings -.LCPI1_0: - .dword 0x3f20f13ed339f07f # double 1.2925998599999999E-4 - .text - .p2align 5 + .p2align 5 # -- Begin function printThings .type printThings,@function printThings: # @printThings # %bb.0: @@ -1075,12 +1064,15 @@ printThings: # @printThings fadd.d $fa1, $fa2, $fa3 movgr2fr.w $fa4, $a1 ffint.d.w $fa4, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa5, $a0, %pc_lo12(.LCPI1_0) fdiv.d $fa1, $fa1, $fa4 fdiv.d $fa3, $fa3, $fa4 fdiv.d $fa2, $fa2, $fa4 - fdiv.d $fa4, $fa3, $fa5 + lu12i.w $a0, -183393 + ori $a0, $a0, 127 + lu32i.d $a0, 61758 + lu52i.d $a0, $a0, 1010 + movgr2fr.d $fa4, $a0 + fdiv.d $fa4, $fa3, $fa4 pcalau12i $a0, %got_pc_hi20(stdout) ld.d $a0, $a0, %got_pc_lo12(stdout) ld.d $a0, $a0, 0 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/eam.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/eam.s index f0e4ab65..d2e45071 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/eam.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/eam.s @@ -1,12 +1,6 @@ .file "eam.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function initEamPot -.LCPI0_0: - .dword 0x4059e921dd37dc65 # double 103.64269190268676 -.LCPI0_1: - .dword 0x402ccc9e3fcf6bae # double 14.399644846029187 .text - .globl initEamPot + .globl initEamPot # -- Begin function initEamPot .p2align 5 .type initEamPot,@function initEamPot: # @initEamPot @@ -121,12 +115,15 @@ initEamPot: # @initEamPot jirl $ra, $ra, 0 ld.w $a0, $sp, 156 fld.d $fa0, $sp, 136 - fld.d $fa1, $sp, 144 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a1, %pc_lo12(.LCPI0_0) st.w $a0, $fp, 36 fst.d $fa0, $fp, 16 - fmul.d $fa0, $fa1, $fa2 + fld.d $fa0, $sp, 144 + lu12i.w $a0, -142467 + ori $a0, $a0, 3173 + lu32i.d $a0, -399071 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 fst.d $fa0, $fp, 8 addi.d $a1, $sp, 128 move $a0, $s4 @@ -285,12 +282,15 @@ initEamPot: # @initEamPot jirl $ra, $ra, 0 ld.w $a0, $sp, 92 fld.d $fa0, $sp, 104 - fld.d $fa1, $sp, 112 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a1, %pc_lo12(.LCPI0_0) st.w $a0, $fp, 36 fst.d $fa0, $fp, 16 - fmul.d $fa0, $fa1, $fa2 + fld.d $fa0, $sp, 112 + lu12i.w $a0, -142467 + ori $a0, $a0, 3173 + lu32i.d $a0, -399071 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 fst.d $fa0, $fp, 8 addi.d $a1, $sp, 96 move $a0, $s4 @@ -386,51 +386,51 @@ initEamPot: # @initEamPot vst $vr8, $sp, 16 # 16-byte Folded Spill blt $s3, $a0, .LBB0_28 # %bb.21: # %.lr.ph70.i - ori $a1, $zero, 5 ori $a0, $zero, 1 - bltu $s3, $a1, .LBB0_25 + ori $a2, $zero, 5 + lu12i.w $a1, 261366 + bltu $s3, $a2, .LBB0_25 # %bb.22: # %vector.ph - addi.d $a1, $s3, -1 - move $a2, $a1 - bstrins.d $a2, $zero, 1, 0 - ori $a4, $zero, 1 - move $a0, $a1 - bstrins.d $a0, $a4, 1, 0 + addi.d $a2, $s3, -1 + move $a3, $a2 + bstrins.d $a3, $zero, 1, 0 + ori $a5, $zero, 1 + move $a0, $a2 + bstrins.d $a0, $a5, 1, 0 vreplvei.d $vr0, $vr8, 0 - addi.d $a3, $s6, 24 - lu32i.d $a4, 2 - vreplgr2vr.d $vr1, $a4 - lu12i.w $a4, 261366 - ori $a4, $a4, 2990 - lu32i.d $a4, -209762 - lu52i.d $a4, $a4, 1026 - vreplgr2vr.d $vr2, $a4 - move $a4, $a2 + addi.d $a4, $s6, 24 + lu32i.d $a5, 2 + vreplgr2vr.d $vr1, $a5 + ori $a5, $a1, 2990 + lu32i.d $a5, -209762 + lu52i.d $a5, $a5, 1026 + vreplgr2vr.d $vr2, $a5 + move $a5, $a3 vld $vr7, $sp, 64 # 16-byte Folded Reload .p2align 4, , 16 .LBB0_23: # %vector.body # =>This Inner Loop Header: Depth=1 vaddi.wu $vr3, $vr1, 2 - vpickve2gr.w $a5, $vr1, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa4, $a5 + vpickve2gr.w $a6, $vr1, 1 + bstrpick.d $a6, $a6, 31, 0 + movgr2fr.d $fa4, $a6 ffint.d.l $fa4, $fa4 - vpickve2gr.w $a5, $vr1, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa5, $a5 + vpickve2gr.w $a6, $vr1, 0 + bstrpick.d $a6, $a6, 31, 0 + movgr2fr.d $fa5, $a6 ffint.d.l $fa5, $fa5 vextrins.d $vr5, $vr4, 16 - vpickve2gr.w $a5, $vr3, 1 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa4, $a5 + vpickve2gr.w $a6, $vr3, 1 + bstrpick.d $a6, $a6, 31, 0 + movgr2fr.d $fa4, $a6 ffint.d.l $fa4, $fa4 - vpickve2gr.w $a5, $vr3, 0 - bstrpick.d $a5, $a5, 31, 0 - movgr2fr.d $fa3, $a5 + vpickve2gr.w $a6, $vr3, 0 + bstrpick.d $a6, $a6, 31, 0 + movgr2fr.d $fa3, $a6 ffint.d.l $fa3, $fa3 vextrins.d $vr3, $vr4, 16 - vld $vr4, $a3, -16 - vld $vr6, $a3, 0 + vld $vr4, $a4, -16 + vld $vr6, $a4, 0 vfmadd.d $vr5, $vr5, $vr0, $vr7 vfmadd.d $vr3, $vr3, $vr0, $vr7 vfdiv.d $vr5, $vr4, $vr5 @@ -439,36 +439,38 @@ initEamPot: # @initEamPot vfmul.d $vr3, $vr6, $vr3 vfmul.d $vr4, $vr4, $vr2 vfmul.d $vr3, $vr3, $vr2 - vst $vr4, $a3, -16 - vst $vr3, $a3, 0 + vst $vr4, $a4, -16 + vst $vr3, $a4, 0 vaddi.wu $vr1, $vr1, 4 - addi.d $a4, $a4, -4 - addi.d $a3, $a3, 32 - bnez $a4, .LBB0_23 + addi.d $a5, $a5, -4 + addi.d $a4, $a4, 32 + bnez $a5, .LBB0_23 # %bb.24: # %middle.block - beq $a1, $a2, .LBB0_28 + beq $a2, $a3, .LBB0_28 .LBB0_25: # %scalar.ph.preheader - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_1) - sub.d $a1, $s3, $a0 - alsl.d $a2, $a0, $s6, 3 - movgr2fr.d $fa1, $zero + sub.d $a2, $s3, $a0 + alsl.d $a3, $a0, $s6, 3 + movgr2fr.d $fa0, $zero + ori $a1, $a1, 2990 + lu32i.d $a1, -209762 + lu52i.d $a1, $a1, 1026 + movgr2fr.d $fa1, $a1 .p2align 4, , 16 .LBB0_26: # %scalar.ph # =>This Inner Loop Header: Depth=1 - bstrpick.d $a3, $a0, 31, 0 - fld.d $fa2, $a2, 0 - movgr2fr.d $fa3, $a3 + bstrpick.d $a1, $a0, 31, 0 + fld.d $fa2, $a3, 0 + movgr2fr.d $fa3, $a1 ffint.d.l $fa3, $fa3 - fmadd.d $fa3, $fa3, $ft0, $fa1 + fmadd.d $fa3, $fa3, $ft0, $fa0 fdiv.d $fa3, $fa2, $fa3 fmul.d $fa2, $fa2, $fa3 - fmul.d $fa2, $fa2, $fa0 - fst.d $fa2, $a2, 0 - addi.d $a1, $a1, -1 - addi.d $a2, $a2, 8 + fmul.d $fa2, $fa2, $fa1 + fst.d $fa2, $a3, 0 + addi.d $a2, $a2, -1 + addi.d $a3, $a3, 8 addi.w $a0, $a0, 1 - bnez $a1, .LBB0_26 + bnez $a2, .LBB0_26 b .LBB0_28 .LBB0_27: # %.preheader.thread.i fld.d $fa0, $sp, 104 @@ -1659,12 +1661,7 @@ eamForce: # @eamForce .Lfunc_end1: .size eamForce, .Lfunc_end1-eamForce # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function eamPrint -.LCPI2_0: - .dword 0x4059e921dd37dc65 # double 103.64269190268676 - .text - .p2align 5 + .p2align 5 # -- Begin function eamPrint .type eamPrint,@function eamPrint: # @eamPrint # %bb.0: @@ -1694,8 +1691,11 @@ eamPrint: # @eamPrint pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 fld.d $fa0, $s0, 8 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_0) + lu12i.w $a0, -142467 + ori $a0, $a0, 3173 + lu32i.d $a0, -399071 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 movfr2gr.d $a2, $fa0 pcalau12i $a0, %pc_hi20(.L.str.6) diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/initAtoms.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/initAtoms.s index f9642414..70f2468c 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/initAtoms.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/initAtoms.s @@ -667,12 +667,7 @@ setVcm: # @setVcm .Lfunc_end3: .size setVcm, .Lfunc_end3-setVcm # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function setTemperature -.LCPI4_0: - .dword 0x3f1696fe6ef7eb54 # double 8.6173323999999996E-5 - .text - .globl setTemperature + .globl setTemperature # -- Begin function setTemperature .p2align 5 .type setTemperature,@function setTemperature: # @setTemperature @@ -696,12 +691,15 @@ setTemperature: # @setTemperature ld.d $a0, $a0, 24 ld.w $a1, $a0, 12 fmov.d $fs0, $fa0 - pcalau12i $s0, %pc_hi20(.LCPI4_0) + lu12i.w $s0, 454526 blez $a1, .LBB4_8 # %bb.1: # %.lr.ph66 - fld.d $fa0, $s0, %pc_lo12(.LCPI4_0) move $s1, $zero move $s2, $zero + ori $a1, $s0, 2900 + lu32i.d $a1, 431870 + lu52i.d $a1, $a1, 1009 + movgr2fr.d $fa0, $a1 fmul.d $fs2, $fs0, $fa0 b .LBB4_3 .p2align 4, , 16 @@ -802,10 +800,13 @@ setTemperature: # @setTemperature ld.d $s1, $fp, 32 ld.w $a0, $s1, 4 fld.d $fa0, $fp, 56 - fld.d $fa1, $s0, %pc_lo12(.LCPI4_0) - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fdiv.d $fa0, $fa0, $fa2 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + fdiv.d $fa0, $fa0, $fa1 + ori $a0, $s0, 2900 + lu32i.d $a0, 431870 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 vldi $vr1, -904 fdiv.d $fa0, $fa0, $fa1 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/ljForce.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/ljForce.s index d8461715..25b141ff 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/ljForce.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/ljForce.s @@ -362,12 +362,7 @@ ljForce: # @ljForce .Lfunc_end2: .size ljForce, .Lfunc_end2-ljForce # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ljPrint -.LCPI3_0: - .dword 0x4059e921dd37dc65 # double 103.64269190268676 - .text - .p2align 5 + .p2align 5 # -- Begin function ljPrint .type ljPrint,@function ljPrint: # @ljPrint # %bb.0: @@ -397,8 +392,11 @@ ljPrint: # @ljPrint pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 fld.d $fa0, $s0, 8 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_0) + lu12i.w $a0, -142467 + ori $a0, $a0, 3173 + lu32i.d $a0, -399071 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 movfr2gr.d $a2, $fa0 pcalau12i $a0, %pc_hi20(.L.str.5) diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/performanceTimers.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/performanceTimers.s index fcc35ca9..9a50adb6 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/performanceTimers.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/performanceTimers.s @@ -78,14 +78,7 @@ profileStop: # @profileStop .Lfunc_end1: .size profileStop, .Lfunc_end1-profileStop # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function getElapsedTime -.LCPI2_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI2_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl getElapsedTime + .globl getElapsedTime # -- Begin function getElapsedTime .p2align 5 .type getElapsedTime,@function getElapsedTime: # @getElapsedTime @@ -98,36 +91,29 @@ getElapsedTime: # @getElapsedTime add.d $a0, $a1, $a0 ld.d $a1, $a0, 24 srli.d $a2, $a1, 32 - pcalau12i $a3, %pc_hi20(.LCPI2_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI2_0) lu52i.d $a3, $zero, 1107 or $a2, $a2, $a3 + movgr2fr.d $fa0, $a2 + lu12i.w $a2, 256 + lu52i.d $a2, $a2, 1107 movgr2fr.d $fa1, $a2 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a2, 275200 - pcalau12i $a3, %pc_hi20(.LCPI2_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI2_1) bstrins.d $a1, $a2, 63, 32 - movgr2fr.d $fa2, $a1 - fadd.d $fa0, $fa2, $fa0 + movgr2fr.d $fa1, $a1 + fadd.d $fa0, $fa1, $fa0 + lu12i.w $a1, -390306 + ori $a1, $a1, 3469 + lu32i.d $a1, 50935 + lu52i.d $a1, $a1, 1003 + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa0, $fa1 st.d $zero, $a0, 24 ret .Lfunc_end2: .size getElapsedTime, .Lfunc_end2-getElapsedTime # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function printPerformanceResults -.LCPI3_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI3_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI3_2: - .dword 0x4059000000000000 # double 100 -.LCPI3_3: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl printPerformanceResults + .globl printPerformanceResults # -- Begin function printPerformanceResults .p2align 5 .type printPerformanceResults,@function printPerformanceResults: # @printPerformanceResults @@ -151,11 +137,12 @@ printPerformanceResults: # @printPerformanceResults addi.d $s4, $a0, %pc_lo12(perfTimer) ld.d $a0, $s4, 8 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI3_0) - fld.d $fs0, $a2, %pc_lo12(.LCPI3_0) lu52i.d $s2, $zero, 1107 or $a1, $a1, $s2 movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 + movgr2fr.d $fs0, $a1 fsub.d $fa0, $fa0, $fs0 lu12i.w $s3, 275200 bstrins.d $a0, $s3, 63, 32 @@ -831,11 +818,14 @@ printPerformanceResults: # @printPerformanceResults or $a1, $a1, $s2 movgr2fr.d $fa0, $a1 fsub.d $fa0, $fa0, $fs0 - pcalau12i $a1, %pc_hi20(.LCPI3_1) - fld.d $fs1, $a1, %pc_lo12(.LCPI3_1) bstrins.d $a0, $s3, 63, 32 movgr2fr.d $fa1, $a0 fadd.d $fa0, $fa1, $fa0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs1, $a0 fmul.d $fs2, $fa0, $fs1 pcalau12i $a0, %got_pc_hi20(stdout) ld.d $s5, $a0, %got_pc_lo12(stdout) @@ -863,7 +853,6 @@ printPerformanceResults: # @printPerformanceResults ld.d $a3, $s4, 16 pcalau12i $a0, %pc_hi20(timerName) addi.d $s6, $a0, %pc_lo12(timerName) - pcalau12i $s1, %pc_hi20(.LCPI3_2) beqz $a3, .LBB3_14 # %bb.13: ld.d $a0, $s4, 8 @@ -884,10 +873,13 @@ printPerformanceResults: # @printPerformanceResults move $a1, $a3 bstrins.d $a1, $s3, 63, 32 movgr2fr.d $fa2, $a1 - fld.d $fa3, $s1, %pc_lo12(.LCPI3_2) fadd.d $fa1, $fa2, $fa1 fdiv.d $fa1, $fa0, $fa1 fdiv.d $fa2, $fa0, $fs2 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa3, $a1 fmul.d $fa2, $fa2, $fa3 movfr2gr.d $a4, $fa1 movfr2gr.d $a5, $fa0 @@ -918,10 +910,13 @@ printPerformanceResults: # @printPerformanceResults move $a1, $a3 bstrins.d $a1, $s3, 63, 32 movgr2fr.d $fa2, $a1 - fld.d $fa3, $s1, %pc_lo12(.LCPI3_2) fadd.d $fa1, $fa2, $fa1 fdiv.d $fa1, $fa0, $fa1 fdiv.d $fa2, $fa0, $fs2 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa3, $a1 fmul.d $fa2, $fa2, $fa3 movfr2gr.d $a4, $fa1 movfr2gr.d $a5, $fa0 @@ -952,10 +947,13 @@ printPerformanceResults: # @printPerformanceResults move $a1, $a3 bstrins.d $a1, $s3, 63, 32 movgr2fr.d $fa2, $a1 - fld.d $fa3, $s1, %pc_lo12(.LCPI3_2) fadd.d $fa1, $fa2, $fa1 fdiv.d $fa1, $fa0, $fa1 fdiv.d $fa2, $fa0, $fs2 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa3, $a1 fmul.d $fa2, $fa2, $fa3 movfr2gr.d $a4, $fa1 movfr2gr.d $a5, $fa0 @@ -986,10 +984,13 @@ printPerformanceResults: # @printPerformanceResults move $a1, $a3 bstrins.d $a1, $s3, 63, 32 movgr2fr.d $fa2, $a1 - fld.d $fa3, $s1, %pc_lo12(.LCPI3_2) fadd.d $fa1, $fa2, $fa1 fdiv.d $fa1, $fa0, $fa1 fdiv.d $fa2, $fa0, $fs2 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa3, $a1 fmul.d $fa2, $fa2, $fa3 movfr2gr.d $a4, $fa1 movfr2gr.d $a5, $fa0 @@ -1020,10 +1021,13 @@ printPerformanceResults: # @printPerformanceResults move $a1, $a3 bstrins.d $a1, $s3, 63, 32 movgr2fr.d $fa2, $a1 - fld.d $fa3, $s1, %pc_lo12(.LCPI3_2) fadd.d $fa1, $fa2, $fa1 fdiv.d $fa1, $fa0, $fa1 fdiv.d $fa2, $fa0, $fs2 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa3, $a1 fmul.d $fa2, $fa2, $fa3 movfr2gr.d $a4, $fa1 movfr2gr.d $a5, $fa0 @@ -1054,10 +1058,13 @@ printPerformanceResults: # @printPerformanceResults move $a1, $a3 bstrins.d $a1, $s3, 63, 32 movgr2fr.d $fa2, $a1 - fld.d $fa3, $s1, %pc_lo12(.LCPI3_2) fadd.d $fa1, $fa2, $fa1 fdiv.d $fa1, $fa0, $fa1 fdiv.d $fa2, $fa0, $fs2 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa3, $a1 fmul.d $fa2, $fa2, $fa3 movfr2gr.d $a4, $fa1 movfr2gr.d $a5, $fa0 @@ -1088,10 +1095,13 @@ printPerformanceResults: # @printPerformanceResults move $a1, $a3 bstrins.d $a1, $s3, 63, 32 movgr2fr.d $fa2, $a1 - fld.d $fa3, $s1, %pc_lo12(.LCPI3_2) fadd.d $fa1, $fa2, $fa1 fdiv.d $fa1, $fa0, $fa1 fdiv.d $fa2, $fa0, $fs2 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa3, $a1 fmul.d $fa2, $fa2, $fa3 movfr2gr.d $a4, $fa1 movfr2gr.d $a5, $fa0 @@ -1122,10 +1132,13 @@ printPerformanceResults: # @printPerformanceResults move $a1, $a3 bstrins.d $a1, $s3, 63, 32 movgr2fr.d $fa2, $a1 - fld.d $fa3, $s1, %pc_lo12(.LCPI3_2) fadd.d $fa1, $fa2, $fa1 fdiv.d $fa1, $fa0, $fa1 fdiv.d $fa2, $fa0, $fs2 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa3, $a1 fmul.d $fa2, $fa2, $fa3 movfr2gr.d $a4, $fa1 movfr2gr.d $a5, $fa0 @@ -1156,10 +1169,13 @@ printPerformanceResults: # @printPerformanceResults move $a1, $a3 bstrins.d $a1, $s3, 63, 32 movgr2fr.d $fa2, $a1 - fld.d $fa3, $s1, %pc_lo12(.LCPI3_2) fadd.d $fa1, $fa2, $fa1 fdiv.d $fa1, $fa0, $fa1 fdiv.d $fa2, $fa0, $fs2 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa3, $a1 fmul.d $fa2, $fa2, $fa3 movfr2gr.d $a4, $fa1 movfr2gr.d $a5, $fa0 @@ -1190,10 +1206,13 @@ printPerformanceResults: # @printPerformanceResults move $a1, $a3 bstrins.d $a1, $s3, 63, 32 movgr2fr.d $fa2, $a1 - fld.d $fa3, $s1, %pc_lo12(.LCPI3_2) fadd.d $fa1, $fa2, $fa1 fdiv.d $fa1, $fa0, $fa1 fdiv.d $fa2, $fa0, $fs2 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa3, $a1 fmul.d $fa2, $fa2, $fa3 movfr2gr.d $a4, $fa1 movfr2gr.d $a5, $fa0 @@ -1216,6 +1235,7 @@ printPerformanceResults: # @printPerformanceResults fadd.d $fa0, $fa1, $fa0 fmul.d $fa0, $fa0, $fs1 ld.d $a0, $s5, 0 + ld.d $a2, $s6, 80 srli.d $a1, $a3, 32 or $a1, $a1, $s2 movgr2fr.d $fa1, $a1 @@ -1224,11 +1244,13 @@ printPerformanceResults: # @printPerformanceResults bstrins.d $a1, $s3, 63, 32 movgr2fr.d $fa2, $a1 fadd.d $fa1, $fa2, $fa1 - fld.d $fa2, $s1, %pc_lo12(.LCPI3_2) - ld.d $a2, $s6, 80 fdiv.d $fa1, $fa0, $fa1 - fdiv.d $fa3, $fa0, $fs2 - fmul.d $fa2, $fa3, $fa2 + fdiv.d $fa2, $fa0, $fs2 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa3, $a1 + fmul.d $fa2, $fa2, $fa3 movfr2gr.d $a4, $fa1 movfr2gr.d $a5, $fa0 movfr2gr.d $a6, $fa2 @@ -1528,39 +1550,41 @@ printPerformanceResults: # @printPerformanceResults ffint.d.w $fs2, $fa0 pcaddu18i $ra, %call36(getNRanks) jirl $ra, $ra, 0 - movgr2fr.w $fa0, $a0 - ffint.d.w $fa0, $fa0 - fld.d $fa1, $s4, 200 - pcalau12i $a0, %pc_hi20(.LCPI3_3) - fld.d $fa2, $a0, %pc_lo12(.LCPI3_3) - ld.d $a0, $s4, 160 - fdiv.d $fa0, $fs2, $fa0 - fmul.d $fa1, $fa1, $fs1 - fmul.d $fa1, $fa1, $fa2 - srli.d $a1, $a0, 32 - or $a1, $a1, $s2 - movgr2fr.d $fa2, $a1 + fld.d $fa0, $s4, 200 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + fdiv.d $fa1, $fs2, $fa1 + fmul.d $fa0, $fa0, $fs1 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + ld.d $a1, $s4, 160 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa2, $a0 + fmul.d $fa0, $fa0, $fa2 + srli.d $a0, $a1, 32 + or $a0, $a0, $s2 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fs0 - mul.d $a1, $fp, $s0 - mul.d $a1, $a1, $a0 - bstrins.d $a0, $s3, 63, 32 - movgr2fr.d $fa3, $a0 + mul.d $a0, $fp, $s0 + mul.d $a0, $a0, $a1 + bstrins.d $a1, $s3, 63, 32 + movgr2fr.d $fa3, $a1 fadd.d $fa2, $fa3, $fa2 - fmul.d $fa0, $fa0, $fa2 + fmul.d $fa1, $fa1, $fa2 movgr2fr.w $fa2, $fp ffint.d.w $fa2, $fa2 - fmul.d $fa0, $fa0, $fa2 - fdiv.d $fa0, $fa1, $fa0 + fmul.d $fa1, $fa1, $fa2 + fdiv.d $fa1, $fa0, $fa1 pcalau12i $s0, %pc_hi20(perfGlobal.0) - fst.d $fa0, $s0, %pc_lo12(perfGlobal.0) - srli.d $a0, $a1, 32 - or $a0, $a0, $s2 - movgr2fr.d $fa0, $a0 - fsub.d $fa0, $fa0, $fs0 - bstrins.d $a1, $s3, 63, 32 - movgr2fr.d $fa2, $a1 - fadd.d $fa0, $fa2, $fa0 - fdiv.d $fa0, $fa1, $fa0 + fst.d $fa1, $s0, %pc_lo12(perfGlobal.0) + srli.d $a1, $a0, 32 + or $a1, $a1, $s2 + movgr2fr.d $fa1, $a1 + fsub.d $fa1, $fa1, $fs0 + bstrins.d $a0, $s3, 63, 32 + movgr2fr.d $fa2, $a0 + fadd.d $fa1, $fa2, $fa1 + fdiv.d $fa0, $fa0, $fa1 pcalau12i $s2, %pc_hi20(perfGlobal.1) fst.d $fa0, $s2, %pc_lo12(perfGlobal.1) ld.d $a3, $s5, 0 @@ -1701,16 +1725,7 @@ printPerformanceResults: # @printPerformanceResults .Lfunc_end3: .size printPerformanceResults, .Lfunc_end3-printPerformanceResults # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function printPerformanceResultsYaml -.LCPI4_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI4_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI4_2: - .dword 0x4059000000000000 # double 100 - .text - .globl printPerformanceResultsYaml + .globl printPerformanceResultsYaml # -- Begin function printPerformanceResultsYaml .p2align 5 .type printPerformanceResultsYaml,@function printPerformanceResultsYaml: # @printPerformanceResultsYaml @@ -1741,18 +1756,22 @@ printPerformanceResultsYaml: # @printPerformanceResultsYaml addi.d $s0, $a0, %pc_lo12(perfTimer) ld.d $a0, $s0, 80 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI4_0) - fld.d $fs1, $a2, %pc_lo12(.LCPI4_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 + movgr2fr.d $fs1, $a1 fsub.d $fa0, $fa0, $fs1 - pcalau12i $a1, %pc_hi20(.LCPI4_1) - fld.d $fs0, $a1, %pc_lo12(.LCPI4_1) lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa1, $a0 fadd.d $fa0, $fa1, $fa0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs0, $a0 fmul.d $fs2, $fa0, $fs0 pcalau12i $a0, %pc_hi20(.L.str.24) addi.d $a0, $a0, %pc_lo12(.L.str.24) @@ -1797,8 +1816,10 @@ printPerformanceResultsYaml: # @printPerformanceResultsYaml addi.d $s2, $a0, %pc_lo12(.L.str.30) pcalau12i $a0, %pc_hi20(.L.str.31) addi.d $s3, $a0, %pc_lo12(.L.str.31) - pcalau12i $a0, %pc_hi20(.LCPI4_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI4_2) + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.32) addi.d $s4, $a0, %pc_lo12(.L.str.32) move $s8, $zero diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/random.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/random.s index 43a5559d..c6dc8b55 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/random.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CMakeFiles/CoMD.dir/random.s @@ -1,10 +1,6 @@ .file "random.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gasdev -.LCPI0_0: - .dword 0x3c20000000000000 # double 4.3368086899420177E-19 .text - .globl gasdev + .globl gasdev # -- Begin function gasdev .p2align 5 .type gasdev,@function gasdev: # @gasdev @@ -19,8 +15,8 @@ gasdev: # @gasdev lu32i.d $a1, 221293 lu52i.d $a1, $a1, 97 ori $a2, $zero, 9 - pcalau12i $a4, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a4, %pc_lo12(.LCPI0_0) + lu52i.d $a4, $zero, 962 + movgr2fr.d $fa0, $a4 vldi $vr1, -784 vldi $vr2, -1024 vldi $vr3, -912 @@ -86,12 +82,7 @@ gasdev: # @gasdev .Lfunc_end0: .size gasdev, .Lfunc_end0-gasdev # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function lcg61 -.LCPI1_0: - .dword 0x3c20000000000000 # double 4.3368086899420177E-19 - .text - .globl lcg61 + .globl lcg61 # -- Begin function lcg61 .p2align 5 .type lcg61,@function lcg61: # @lcg61 @@ -110,12 +101,12 @@ lcg61: # @lcg61 srli.d $a2, $a2, 60 slli.d $a3, $a2, 61 sub.d $a2, $a2, $a3 - pcalau12i $a3, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI1_0) add.d $a1, $a1, $a2 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + lu52i.d $a2, $zero, 962 + movgr2fr.d $fa1, $a2 + fmul.d $fa0, $fa0, $fa1 st.d $a1, $a0, 0 ret .Lfunc_end1: diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/Pathfinder/CMakeFiles/PathFinder.dir/main.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/Pathfinder/CMakeFiles/PathFinder.dir/main.s index b183fc55..18994079 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/Pathfinder/CMakeFiles/PathFinder.dir/main.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/Pathfinder/CMakeFiles/PathFinder.dir/main.s @@ -475,25 +475,19 @@ exhaustiveLegSearch: # @exhaustiveLegSearch .Lfunc_end1: .size exhaustiveLegSearch, .Lfunc_end1-exhaustiveLegSearch # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function runBatch -.LCPI2_0: - .dword 0x40ac200000000000 # double 3600 -.LCPI2_1: - .dword 0x404e000000000000 # double 60 - .text - .globl runBatch + .globl runBatch # -- Begin function runBatch .p2align 5 .type runBatch,@function runBatch: # @runBatch # %bb.0: - addi.d $sp, $sp, -48 - st.d $ra, $sp, 40 # 8-byte Folded Spill - st.d $fp, $sp, 32 # 8-byte Folded Spill - st.d $s0, $sp, 24 # 8-byte Folded Spill - st.d $s1, $sp, 16 # 8-byte Folded Spill - st.d $s2, $sp, 8 # 8-byte Folded Spill - fst.d $fs0, $sp, 0 # 8-byte Folded Spill + addi.d $sp, $sp, -64 + st.d $ra, $sp, 56 # 8-byte Folded Spill + st.d $fp, $sp, 48 # 8-byte Folded Spill + st.d $s0, $sp, 40 # 8-byte Folded Spill + st.d $s1, $sp, 32 # 8-byte Folded Spill + st.d $s2, $sp, 24 # 8-byte Folded Spill + st.d $s3, $sp, 16 # 8-byte Folded Spill + fst.d $fs0, $sp, 8 # 8-byte Folded Spill beqz $a0, .LBB2_4 # %bb.1: move $fp, $a1 @@ -519,8 +513,11 @@ runBatch: # @runBatch bstrpick.d $a1, $a0, 31, 31 srai.d $a0, $a0, 11 add.d $s1, $a0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_0) + ori $a0, $zero, 0 + lu32i.d $a0, -253952 + lu52i.d $a0, $a0, 1034 + movgr2fr.d $fa1, $a0 + ori $s3, $zero, 0 pcaddu18i $ra, %call36(fmod) jirl $ra, $ra, 0 ftintrz.w.d $fa1, $fa0 @@ -533,8 +530,9 @@ runBatch: # @runBatch bstrpick.d $a1, $a0, 31, 31 srai.d $a0, $a0, 5 add.d $s2, $a0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_1) + lu32i.d $s3, -131072 + lu52i.d $a0, $s3, 1028 + movgr2fr.d $fa1, $a0 pcaddu18i $ra, %call36(fmod) jirl $ra, $ra, 0 movfr2gr.d $a3, $fa0 @@ -552,13 +550,14 @@ runBatch: # @runBatch .LBB2_3: pcalau12i $a0, %pc_hi20(.Lstr.2) addi.d $a0, $a0, %pc_lo12(.Lstr.2) - fld.d $fs0, $sp, 0 # 8-byte Folded Reload - ld.d $s2, $sp, 8 # 8-byte Folded Reload - ld.d $s1, $sp, 16 # 8-byte Folded Reload - ld.d $s0, $sp, 24 # 8-byte Folded Reload - ld.d $fp, $sp, 32 # 8-byte Folded Reload - ld.d $ra, $sp, 40 # 8-byte Folded Reload - addi.d $sp, $sp, 48 + fld.d $fs0, $sp, 8 # 8-byte Folded Reload + ld.d $s3, $sp, 16 # 8-byte Folded Reload + ld.d $s2, $sp, 24 # 8-byte Folded Reload + ld.d $s1, $sp, 32 # 8-byte Folded Reload + ld.d $s0, $sp, 40 # 8-byte Folded Reload + ld.d $fp, $sp, 48 # 8-byte Folded Reload + ld.d $ra, $sp, 56 # 8-byte Folded Reload + addi.d $sp, $sp, 64 pcaddu18i $t8, %call36(puts) jr $t8 .LBB2_4: diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/Pathfinder/CMakeFiles/PathFinder.dir/searchAlgorithms.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/Pathfinder/CMakeFiles/PathFinder.dir/searchAlgorithms.s index befdb504..7dc8b26b 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/Pathfinder/CMakeFiles/PathFinder.dir/searchAlgorithms.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/Pathfinder/CMakeFiles/PathFinder.dir/searchAlgorithms.s @@ -1,12 +1,6 @@ .file "searchAlgorithms.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function doMultiSearches -.LCPI0_0: - .dword 0x40ac200000000000 # double 3600 -.LCPI0_1: - .dword 0x404e000000000000 # double 60 .text - .globl doMultiSearches + .globl doMultiSearches # -- Begin function doMultiSearches .p2align 5 .type doMultiSearches,@function doMultiSearches: # @doMultiSearches @@ -210,8 +204,11 @@ doMultiSearches: # @doMultiSearches bstrpick.d $a1, $a0, 31, 31 srai.d $a0, $a0, 11 add.d $fp, $a0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) + ori $a0, $zero, 0 + lu32i.d $a0, -253952 + lu52i.d $a0, $a0, 1034 + movgr2fr.d $fa1, $a0 + ori $s1, $zero, 0 pcaddu18i $ra, %call36(fmod) jirl $ra, $ra, 0 ftintrz.w.d $fa1, $fa0 @@ -224,8 +221,9 @@ doMultiSearches: # @doMultiSearches bstrpick.d $a1, $a0, 31, 31 srai.d $a0, $a0, 5 add.d $s0, $a0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_1) + lu32i.d $s1, -131072 + lu52i.d $a0, $s1, 1028 + movgr2fr.d $fa1, $a0 pcaddu18i $ra, %call36(fmod) jirl $ra, $ra, 0 movfr2gr.d $a3, $fa0 @@ -812,14 +810,7 @@ findAndRecordAllPaths: # @findAndRecordAllPaths .Lfunc_end3: .size findAndRecordAllPaths, .Lfunc_end3-findAndRecordAllPaths # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function findAllPossibleLegs -.LCPI4_0: - .dword 0x40ac200000000000 # double 3600 -.LCPI4_1: - .dword 0x404e000000000000 # double 60 - .text - .globl findAllPossibleLegs + .globl findAllPossibleLegs # -- Begin function findAllPossibleLegs .p2align 5 .type findAllPossibleLegs,@function findAllPossibleLegs: # @findAllPossibleLegs @@ -1057,8 +1048,11 @@ findAllPossibleLegs: # @findAllPossibleLegs bstrpick.d $a1, $a0, 31, 31 srai.d $a0, $a0, 11 add.d $s1, $a0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_0) + ori $a0, $zero, 0 + lu32i.d $a0, -253952 + lu52i.d $a0, $a0, 1034 + movgr2fr.d $fa1, $a0 + ori $fp, $zero, 0 pcaddu18i $ra, %call36(fmod) jirl $ra, $ra, 0 ftintrz.w.d $fa1, $fa0 @@ -1071,8 +1065,9 @@ findAllPossibleLegs: # @findAllPossibleLegs bstrpick.d $a1, $a0, 31, 31 srai.d $a0, $a0, 5 add.d $s2, $a0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_1) + lu32i.d $fp, -131072 + lu52i.d $a0, $fp, 1028 + movgr2fr.d $fa1, $a0 pcaddu18i $ra, %call36(fmod) jirl $ra, $ra, 0 movfr2gr.d $s3, $fa0 @@ -1123,14 +1118,7 @@ findAllPossibleLegs: # @findAllPossibleLegs .Lfunc_end4: .size findAllPossibleLegs, .Lfunc_end4-findAllPossibleLegs # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function findAndLogAllPossibleLegs -.LCPI5_0: - .dword 0x40ac200000000000 # double 3600 -.LCPI5_1: - .dword 0x404e000000000000 # double 60 - .text - .globl findAndLogAllPossibleLegs + .globl findAndLogAllPossibleLegs # -- Begin function findAndLogAllPossibleLegs .p2align 5 .type findAndLogAllPossibleLegs,@function findAndLogAllPossibleLegs: # @findAndLogAllPossibleLegs @@ -1164,25 +1152,25 @@ findAndLogAllPossibleLegs: # @findAndLogAllPossibleLegs ori $a2, $zero, 1 pcaddu18i $ra, %call36(fwrite) jirl $ra, $ra, 0 - st.d $fp, $sp, 64 # 8-byte Folded Spill st.b $zero, $fp, 4 ori $a0, $zero, 16 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - move $fp, $a0 + move $s0, $a0 st.d $zero, $a0, 8 ori $a0, $zero, 64 pcaddu18i $ra, %call36(NodeVecVec_new) jirl $ra, $ra, 0 - st.d $fp, $sp, 16 # 8-byte Folded Spill + st.d $s0, $sp, 16 # 8-byte Folded Spill st.d $a0, $sp, 56 # 8-byte Folded Spill - st.d $a0, $fp, 0 + st.d $a0, $s0, 0 pcalau12i $a0, %pc_hi20(.Lstr.1) addi.d $a0, $a0, %pc_lo12(.Lstr.1) pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 ld.d $a0, $s1, 32 ld.w $a1, $a0, 0 + st.d $fp, $sp, 64 # 8-byte Folded Spill blez $a1, .LBB5_16 # %bb.2: # %.preheader.preheader vrepli.b $vr0, 0 @@ -1190,12 +1178,12 @@ findAndLogAllPossibleLegs: # @findAndLogAllPossibleLegs pcalau12i $a2, %pc_hi20(.L__const.findAndLogAllPossibleLegs.fullIntSignature) addi.d $s8, $a2, %pc_lo12(.L__const.findAndLogAllPossibleLegs.fullIntSignature) move $s0, $zero - move $fp, $zero + move $s6, $zero b .LBB5_5 .p2align 4, , 16 .LBB5_3: # %.lr.ph74.split.us # in Loop: Header=BB5_5 Depth=1 - add.w $fp, $fp, $a1 + add.w $s6, $s6, $a1 .LBB5_4: # %._crit_edge75 # in Loop: Header=BB5_5 Depth=1 addi.d $s0, $s0, 1 @@ -1221,11 +1209,11 @@ findAndLogAllPossibleLegs: # @findAndLogAllPossibleLegs .LBB5_8: # %._crit_edge.loopexit # in Loop: Header=BB5_10 Depth=2 ld.w $a1, $a0, 0 - ld.d $fp, $sp, 24 # 8-byte Folded Reload + ld.d $s6, $sp, 24 # 8-byte Folded Reload .LBB5_9: # %._crit_edge # in Loop: Header=BB5_10 Depth=2 addi.d $s4, $s4, 1 - addi.w $fp, $fp, 1 + addi.w $s6, $s6, 1 bge $s4, $a1, .LBB5_4 .LBB5_10: # %.lr.ph74.split # Parent Loop BB5_5 Depth=1 @@ -1238,7 +1226,7 @@ findAndLogAllPossibleLegs: # @findAndLogAllPossibleLegs blez $a2, .LBB5_9 # %bb.11: # %.lr.ph # in Loop: Header=BB5_10 Depth=2 - st.d $fp, $sp, 24 # 8-byte Folded Spill + st.d $s6, $sp, 24 # 8-byte Folded Spill move $s5, $zero move $fp, $zero addi.d $a1, $a0, 8 @@ -1311,7 +1299,7 @@ findAndLogAllPossibleLegs: # @findAndLogAllPossibleLegs move $s1, $zero b .LBB5_24 .LBB5_16: - move $fp, $zero + move $s6, $zero .LBB5_17: # %._crit_edge79 ld.d $a0, $sp, 56 # 8-byte Folded Reload ld.w $s1, $a0, 0 @@ -1328,8 +1316,11 @@ findAndLogAllPossibleLegs: # @findAndLogAllPossibleLegs bstrpick.d $a1, $a0, 31, 31 srai.d $a0, $a0, 11 add.d $s3, $a0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $zero, 0 + lu32i.d $a0, -253952 + lu52i.d $a0, $a0, 1034 + movgr2fr.d $fa1, $a0 + ori $fp, $zero, 0 pcaddu18i $ra, %call36(fmod) jirl $ra, $ra, 0 ftintrz.w.d $fa1, $fa0 @@ -1342,15 +1333,16 @@ findAndLogAllPossibleLegs: # @findAndLogAllPossibleLegs bstrpick.d $a1, $a0, 31, 31 srai.d $a0, $a0, 5 add.d $s4, $a0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI5_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI5_1) + lu32i.d $fp, -131072 + lu52i.d $a0, $fp, 1028 + movgr2fr.d $fa1, $a0 pcaddu18i $ra, %call36(fmod) jirl $ra, $ra, 0 movfr2gr.d $s5, $fa0 pcalau12i $a0, %pc_hi20(.L.str.7) addi.d $a0, $a0, %pc_lo12(.L.str.7) move $a1, $s1 - move $a2, $fp + move $a2, $s6 move $a3, $s3 move $a4, $s4 move $a5, $s5 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/init.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/init.s index 387e8323..71da5495 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/init.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/init.s @@ -178,14 +178,7 @@ generate_n_windows: # @generate_n_windows .Lfunc_end1: .size generate_n_windows, .Lfunc_end1-generate_n_windows # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function generate_poles -.LCPI2_0: - .dword 0x41dfffffffc00000 # double 2147483647 -.LCPI2_1: - .dword 0x4063100000000000 # double 152.5 - .text - .globl generate_poles + .globl generate_poles # -- Begin function generate_poles .p2align 5 .type generate_poles,@function generate_poles: # @generate_poles @@ -239,15 +232,15 @@ generate_poles: # @generate_poles bnez $a2, .LBB2_2 # %bb.3: # %.preheader.lr.ph move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI2_0) + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs0, $a0 movgr2fr.d $fs1, $zero ori $a0, $zero, 0 - pcalau12i $a1, %pc_hi20(.LCPI2_1) - fld.d $fs2, $a1, %pc_lo12(.LCPI2_1) lu32i.d $a0, 200704 lu52i.d $a0, $a0, 1030 vreplgr2vr.d $vr0, $a0 + movgr2fr.d $fs2, $a0 vst $vr0, $sp, 16 # 16-byte Folded Spill b .LBB2_5 .p2align 4, , 16 @@ -367,12 +360,7 @@ generate_poles: # @generate_poles .Lfunc_end2: .size generate_poles, .Lfunc_end2-generate_poles # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function generate_window_params -.LCPI3_0: - .dword 0x41dfffffffc00000 # double 2147483647 - .text - .globl generate_window_params + .globl generate_window_params # -- Begin function generate_window_params .p2align 5 .type generate_window_params,@function generate_window_params: # @generate_window_params @@ -423,9 +411,10 @@ generate_window_params: # @generate_window_params addi.d $a3, $a3, 8 bnez $a2, .LBB3_2 # %bb.3: # %.lr.ph61.preheader - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI3_0) move $s3, $zero + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs0, $a0 b .LBB3_5 .p2align 4, , 16 .LBB3_4: # %._crit_edge @@ -514,10 +503,6 @@ generate_window_params: # @generate_window_params .LCPI4_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI4_1: - .dword 0x41dfffffffc00000 # double 2147483647 .text .globl generate_pseudo_K0RS .p2align 5 @@ -596,9 +581,10 @@ generate_pseudo_K0RS: # @generate_pseudo_K0RS .LBB4_8: # %.preheader.lr.ph blez $s1, .LBB4_15 # %bb.9: # %.preheader.preheader - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI4_1) move $s3, $zero + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs0, $a0 b .LBB4_11 .p2align 4, , 16 .LBB4_10: # %._crit_edge diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/io.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/io.s index ec69ecb6..f2445ac8 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/io.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/io.s @@ -669,14 +669,8 @@ print_CLI_error: # @print_CLI_error .Lfunc_end5: .size print_CLI_error, .Lfunc_end5-print_CLI_error # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function print_input_summary -.LCPI6_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI6_1: - .dword 0x3f50000000000000 # double 9.765625E-4 .text - .globl print_input_summary + .globl print_input_summary # -- Begin function print_input_summary .p2align 5 .type print_input_summary,@function print_input_summary: # @print_input_summary @@ -1038,18 +1032,19 @@ print_input_summary: # @print_input_summary pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 srli.d $a0, $fp, 32 - pcalau12i $a1, %pc_hi20(.LCPI6_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI6_0) lu52i.d $a1, $zero, 1107 or $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 movgr2fr.d $fa1, $a0 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a0, 275200 - pcalau12i $a1, %pc_hi20(.LCPI6_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI6_1) bstrins.d $fp, $a0, 63, 32 - movgr2fr.d $fa2, $fp - fadd.d $fa0, $fa2, $fa0 + movgr2fr.d $fa1, $fp + fadd.d $fa0, $fa1, $fa0 + lu52i.d $a0, $zero, 1013 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 fmul.d $fa0, $fa0, $fa1 movfr2gr.d $a1, $fa0 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/material.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/material.s index b0871d15..a26e7475 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/material.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/material.s @@ -895,12 +895,7 @@ load_mats: # @load_mats .Lfunc_end2: .size load_mats, .Lfunc_end2-load_mats # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function load_concs -.LCPI3_0: - .dword 0x41dfffffffc00000 # double 2147483647 - .text - .globl load_concs + .globl load_concs # -- Begin function load_concs .p2align 5 .type load_concs,@function load_concs: # @load_concs @@ -923,13 +918,13 @@ load_concs: # @load_concs pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 ld.w $a1, $fp, 0 - st.d $a1, $sp, 16 # 8-byte Folded Spill + st.d $a1, $sp, 24 # 8-byte Folded Spill move $s4, $a0 slli.d $a0, $a1, 3 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 ld.w $a1, $fp, 4 - st.d $a1, $sp, 8 # 8-byte Folded Spill + st.d $a1, $sp, 16 # 8-byte Folded Spill move $s3, $a0 st.d $a0, $s4, 0 slli.d $a0, $a1, 3 @@ -978,35 +973,34 @@ load_concs: # @load_concs pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 ld.w $a1, $fp, 36 - st.d $a0, $sp, 24 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill st.d $a0, $s4, 64 slli.d $a0, $a1, 3 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 ld.w $a1, $fp, 40 - st.d $a0, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill st.d $a0, $s4, 72 slli.d $a0, $a1, 3 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 ld.w $a1, $fp, 44 - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill st.d $a0, $s4, 80 slli.d $a0, $a1, 3 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - st.d $a0, $sp, 48 # 8-byte Folded Spill - st.d $a0, $s4, 88 - pcalau12i $a0, %pc_hi20(.LCPI3_0) st.d $a0, $sp, 56 # 8-byte Folded Spill - ld.d $a0, $sp, 8 # 8-byte Folded Reload - ld.d $a1, $sp, 16 # 8-byte Folded Reload + st.d $a0, $s4, 88 + ld.d $a0, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 24 # 8-byte Folded Reload blez $a1, .LBB3_4 # %bb.1: # %.lr.ph.preheader - st.d $s4, $sp, 16 # 8-byte Folded Spill - ld.d $a0, $sp, 56 # 8-byte Folded Reload - fld.d $fs0, $a0, %pc_lo12(.LCPI3_0) + st.d $s4, $sp, 24 # 8-byte Folded Spill move $s4, $zero + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB3_2: # %.lr.ph # =>This Inner Loop Header: Depth=1 @@ -1022,13 +1016,14 @@ load_concs: # @load_concs blt $s4, $a0, .LBB3_2 # %bb.3: # %._crit_edge.loopexit ld.w $a0, $fp, 4 - ld.d $s4, $sp, 16 # 8-byte Folded Reload + ld.d $s4, $sp, 24 # 8-byte Folded Reload .LBB3_4: # %._crit_edge blez $a0, .LBB3_7 # %bb.5: # %.lr.ph.1.preheader - ld.d $a0, $sp, 56 # 8-byte Folded Reload - fld.d $fs0, $a0, %pc_lo12(.LCPI3_0) move $s3, $zero + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB3_6: # %.lr.ph.1 # =>This Inner Loop Header: Depth=1 @@ -1044,11 +1039,12 @@ load_concs: # @load_concs blt $s3, $a0, .LBB3_6 .LBB3_7: # %._crit_edge.1 ld.w $a0, $fp, 8 - ld.d $s3, $sp, 56 # 8-byte Folded Reload + lu12i.w $s3, -1024 blez $a0, .LBB3_10 # %bb.8: # %.lr.ph.2.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI3_0) move $s2, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB3_9: # %.lr.ph.2 # =>This Inner Loop Header: Depth=1 @@ -1066,8 +1062,9 @@ load_concs: # @load_concs ld.w $a0, $fp, 12 blez $a0, .LBB3_13 # %bb.11: # %.lr.ph.3.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI3_0) move $s1, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB3_12: # %.lr.ph.3 # =>This Inner Loop Header: Depth=1 @@ -1083,11 +1080,12 @@ load_concs: # @load_concs blt $s1, $a0, .LBB3_12 .LBB3_13: # %._crit_edge.3 ld.w $a0, $fp, 16 - ld.d $s2, $sp, 40 # 8-byte Folded Reload + ld.d $s2, $sp, 48 # 8-byte Folded Reload blez $a0, .LBB3_16 # %bb.14: # %.lr.ph.4.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI3_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB3_15: # %.lr.ph.4 # =>This Inner Loop Header: Depth=1 @@ -1103,11 +1101,12 @@ load_concs: # @load_concs blt $s0, $a0, .LBB3_15 .LBB3_16: # %._crit_edge.4 ld.w $a0, $fp, 20 - ld.d $s1, $sp, 48 # 8-byte Folded Reload + ld.d $s1, $sp, 56 # 8-byte Folded Reload blez $a0, .LBB3_19 # %bb.17: # %.lr.ph.5.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI3_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB3_18: # %.lr.ph.5 # =>This Inner Loop Header: Depth=1 @@ -1125,8 +1124,9 @@ load_concs: # @load_concs ld.w $a0, $fp, 24 blez $a0, .LBB3_22 # %bb.20: # %.lr.ph.6.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI3_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB3_21: # %.lr.ph.6 # =>This Inner Loop Header: Depth=1 @@ -1144,8 +1144,9 @@ load_concs: # @load_concs ld.w $a0, $fp, 28 blez $a0, .LBB3_25 # %bb.23: # %.lr.ph.7.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI3_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB3_24: # %.lr.ph.7 # =>This Inner Loop Header: Depth=1 @@ -1161,11 +1162,12 @@ load_concs: # @load_concs blt $s0, $a0, .LBB3_24 .LBB3_25: # %._crit_edge.7 ld.w $a0, $fp, 32 - ld.d $s5, $sp, 24 # 8-byte Folded Reload + ld.d $s5, $sp, 32 # 8-byte Folded Reload blez $a0, .LBB3_28 # %bb.26: # %.lr.ph.8.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI3_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB3_27: # %.lr.ph.8 # =>This Inner Loop Header: Depth=1 @@ -1181,11 +1183,12 @@ load_concs: # @load_concs blt $s0, $a0, .LBB3_27 .LBB3_28: # %._crit_edge.8 ld.w $a0, $fp, 36 - ld.d $s5, $sp, 32 # 8-byte Folded Reload + ld.d $s5, $sp, 40 # 8-byte Folded Reload blez $a0, .LBB3_31 # %bb.29: # %.lr.ph.9.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI3_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB3_30: # %.lr.ph.9 # =>This Inner Loop Header: Depth=1 @@ -1203,8 +1206,9 @@ load_concs: # @load_concs ld.w $a0, $fp, 40 blez $a0, .LBB3_34 # %bb.32: # %.lr.ph.10.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI3_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB3_33: # %.lr.ph.10 # =>This Inner Loop Header: Depth=1 @@ -1222,8 +1226,9 @@ load_concs: # @load_concs ld.w $a0, $fp, 44 blez $a0, .LBB3_37 # %bb.35: # %.lr.ph.11.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI3_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB3_36: # %.lr.ph.11 # =>This Inner Loop Header: Depth=1 @@ -1256,32 +1261,7 @@ load_concs: # @load_concs .Lfunc_end3: .size load_concs, .Lfunc_end3-load_concs # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function pick_mat -.LCPI4_0: - .dword 0x3faa9fbe76c8b439 # double 0.051999999999999998 -.LCPI4_1: - .dword 0x3fd4ed916872b021 # double 0.32700000000000001 -.LCPI4_2: - .dword 0x3fdd810624dd2f1b # double 0.46100000000000002 -.LCPI4_3: - .dword 0x3fe3ae147ae147af # double 0.6150000000000001 -.LCPI4_4: - .dword 0x3fe5ba5e353f7cee # double 0.67900000000000005 -.LCPI4_5: - .dword 0x3fe7d70a3d70a3d8 # double 0.74500000000000011 -.LCPI4_6: - .dword 0x3fe999999999999a # double 0.80000000000000004 -.LCPI4_7: - .dword 0x3fe9db22d0e56042 # double 0.80800000000000005 -.LCPI4_8: - .dword 0x3fea5604189374bd # double 0.82300000000000006 -.LCPI4_9: - .dword 0x3feb22d0e560418a # double 0.84800000000000009 -.LCPI4_10: - .dword 0x3feb8d4fdf3b645b # double 0.8610000000000001 - .text - .globl pick_mat + .globl pick_mat # -- Begin function pick_mat .p2align 5 .type pick_mat,@function pick_mat: # @pick_mat @@ -1294,68 +1274,101 @@ pick_mat: # @pick_mat fcmp.clt.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB4_12 # %bb.1: # %._crit_edge.1 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_0) + lu12i.w $a0, 486539 + ori $a0, $a0, 1081 + lu32i.d $a0, -352322 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 1 bcnez $fcc0, .LBB4_13 # %bb.2: # %._crit_edge.2 - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_1) + lu12i.w $a0, 427819 + ori $a0, $a0, 33 + lu32i.d $a0, 322961 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 2 bcnez $fcc0, .LBB4_13 # %bb.3: # %._crit_edge.3 - pcalau12i $a0, %pc_hi20(.LCPI4_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_2) + lu12i.w $a0, 150994 + ori $a0, $a0, 3867 + lu32i.d $a0, -163578 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 3 bcnez $fcc0, .LBB4_13 # %bb.4: # %._crit_edge.4 - pcalau12i $a0, %pc_hi20(.LCPI4_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_3) + lu12i.w $a0, 503316 + ori $a0, $a0, 1967 + lu32i.d $a0, 241172 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 4 bcnez $fcc0, .LBB4_13 # %bb.5: # %._crit_edge.5 - pcalau12i $a0, %pc_hi20(.LCPI4_4) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_4) + lu12i.w $a0, 218103 + ori $a0, $a0, 3310 + lu32i.d $a0, 375390 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 5 bcnez $fcc0, .LBB4_13 # %bb.6: # %._crit_edge.6 - pcalau12i $a0, %pc_hi20(.LCPI4_5) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_5) + lu12i.w $a0, 251658 + ori $a0, $a0, 984 + lu32i.d $a0, 513802 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 6 bcnez $fcc0, .LBB4_13 # %bb.7: # %._crit_edge.7 - pcalau12i $a0, %pc_hi20(.LCPI4_6) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_6) + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 7 bcnez $fcc0, .LBB4_13 # %bb.8: # %._crit_edge.8 - pcalau12i $a0, %pc_hi20(.LCPI4_7) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_7) + lu12i.w $a0, -192938 + ori $a0, $a0, 66 + lu32i.d $a0, -402654 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 8 bcnez $fcc0, .LBB4_13 # %bb.9: # %._crit_edge.9 - pcalau12i $a0, %pc_hi20(.LCPI4_8) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_8) + lu12i.w $a0, 100663 + ori $a0, $a0, 1213 + lu32i.d $a0, -371196 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 9 bcnez $fcc0, .LBB4_13 # %bb.10: # %._crit_edge.10 - pcalau12i $a0, %pc_hi20(.LCPI4_9) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_9) + lu12i.w $a0, -109052 + ori $a0, $a0, 394 + lu32i.d $a0, -318768 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 10 bcnez $fcc0, .LBB4_13 # %bb.11: # %._crit_edge.11 - pcalau12i $a0, %pc_hi20(.LCPI4_10) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_10) + lu12i.w $a0, -134218 + ori $a0, $a0, 1115 + lu32i.d $a0, -291505 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 11 bcnez $fcc0, .LBB4_13 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/utils.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/utils.s index f0ee2e7b..68814804 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/utils.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/utils.s @@ -1,10 +1,6 @@ .file "utils.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function rn -.LCPI0_0: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl rn + .globl rn # -- Begin function rn .p2align 5 .type rn,@function rn: # @rn @@ -22,12 +18,13 @@ rn: # @rn srli.d $a2, $a2, 30 slli.d $a3, $a2, 31 sub.d $a2, $a2, $a3 - pcalau12i $a3, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI0_0) add.d $a1, $a1, $a2 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + lu12i.w $a2, -1024 + lu52i.d $a2, $a2, 1053 + movgr2fr.d $fa1, $a2 + fdiv.d $fa0, $fa0, $fa1 st.d $a1, $a0, 0 ret .Lfunc_end0: diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/xs_kernel.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/xs_kernel.s index 7ef09133..e10d830e 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/xs_kernel.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/RSBench/CMakeFiles/rsbench.dir/xs_kernel.s @@ -1,40 +1,6 @@ .file "xs_kernel.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fast_nuclear_W -.LCPI0_0: - .dword 0xbfd19dc7afdb7b46 # double -0.27525512860841095 -.LCPI0_1: - .dword 0x3fe065c77cdfff0d # double 0.51242422475476845 -.LCPI0_2: - .dword 0xc005cc470a049097 # double -2.7247448713915889 -.LCPI0_3: - .dword 0x3faa80fd3629c600 # double 0.051765358792987826 -.LCPI0_4: - .dword 0x4062000000000000 # double 144 -.LCPI0_5: - .dword 0x4023bd3cbc48f10b # double 9.8696040000000007 -.LCPI0_6: - .dword 0x4043bd3cddd6e04c # double 39.47842 -.LCPI0_7: - .dword 0x405634e4649906cd # double 88.826440000000005 -.LCPI0_8: - .dword 0x4063bd3d07c84b5e # double 157.91370000000001 -.LCPI0_9: - .dword 0x406ed7aee631f8a1 # double 246.74010000000001 -.LCPI0_10: - .dword 0x407634e48e8a71de # double 355.30579999999998 -.LCPI0_11: - .dword 0x407e39c504816f00 # double 483.61059999999998 -.LCPI0_12: - .dword 0x4083bd3cd35a8588 # double 631.65470000000005 -.LCPI0_13: - .dword 0x4088fb810624dd2f # double 799.43799999999999 -.LCPI0_14: - .dword 0x408ed7aee631f8a1 # double 986.96040000000005 -.LCPI0_15: - .dword 0x40544f923a29c77a # double 81.243300000000005 .text - .globl fast_nuclear_W + .globl fast_nuclear_W # -- Begin function fast_nuclear_W .p2align 5 .type fast_nuclear_W,@function fast_nuclear_W: # @fast_nuclear_W @@ -96,12 +62,14 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa3, $fs2 pcaddu18i $ra, %call36(__divdc3) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_4) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_4) fst.d $fa0, $sp, 248 # 8-byte Folded Spill fst.d $fa1, $sp, 256 # 8-byte Folded Spill - fmul.d $fs6, $fs1, $fa2 - fmul.d $fs7, $fs0, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, 131072 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa0, $a0 + fmul.d $fs6, $fs1, $fa0 + fmul.d $fs7, $fs0, $fa0 fmul.d $fa0, $fs1, $fs6 fmul.d $fa1, $fs0, $fs7 fmul.d $fa2, $fs6, $fs0 @@ -120,11 +88,14 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa1, $fs4 pcaddu18i $ra, %call36(cexp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_5) fneg.d $fa1, $fa1 - vldi $vr3, -784 - fsub.d $fa0, $fa3, $fa0 + vldi $vr2, -784 + fsub.d $fa0, $fa2, $fa0 + lu12i.w $a0, -277361 + ori $a0, $a0, 267 + lu32i.d $a0, 245052 + lu52i.d $a0, $a0, 1026 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fs2 fmov.d $fa3, $fs6 pcaddu18i $ra, %call36(__divdc3) @@ -137,10 +108,13 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa1, $fs4 pcaddu18i $ra, %call36(cexp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_6) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_6) - vldi $vr3, -784 - fadd.d $fa0, $fa0, $fa3 + vldi $vr2, -784 + fadd.d $fa0, $fa0, $fa2 + lu12i.w $a0, -139922 + ori $a0, $a0, 76 + lu32i.d $a0, 245052 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fs2 fmov.d $fa3, $fs6 pcaddu18i $ra, %call36(__divdc3) @@ -171,11 +145,14 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa1, $fs4 pcaddu18i $ra, %call36(cexp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_7) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_7) fneg.d $fa1, $fa1 - vldi $vr3, -784 - fsub.d $fa0, $fa3, $fa0 + vldi $vr2, -784 + fsub.d $fa0, $fa2, $fa0 + lu12i.w $a0, 412048 + ori $a0, $a0, 1741 + lu32i.d $a0, 406756 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fs2 fmov.d $fa3, $fs6 pcaddu18i $ra, %call36(__divdc3) @@ -194,10 +171,13 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa1, $fs4 pcaddu18i $ra, %call36(cexp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_8) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_8) - vldi $vr3, -784 - fadd.d $fa0, $fa0, $fa3 + vldi $vr2, -784 + fadd.d $fa0, $fa0, $fa2 + lu12i.w $a0, 31876 + ori $a0, $a0, 2910 + lu32i.d $a0, 245053 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fs2 fmov.d $fa3, $fs6 pcaddu18i $ra, %call36(__divdc3) @@ -210,11 +190,14 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa1, $fs4 pcaddu18i $ra, %call36(cexp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_9) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_9) fneg.d $fa1, $fa1 - vldi $vr3, -784 - fsub.d $fa0, $fa3, $fa0 + vldi $vr2, -784 + fsub.d $fa0, $fa2, $fa0 + lu12i.w $a0, -105697 + ori $fp, $a0, 2209 + lu32i.d $fp, -75858 + lu52i.d $a0, $fp, 1030 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fs2 fmov.d $fa3, $fs6 pcaddu18i $ra, %call36(__divdc3) @@ -227,10 +210,13 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa1, $fs4 pcaddu18i $ra, %call36(cexp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_10) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_10) - vldi $vr3, -784 - fadd.d $fa0, $fa0, $fa3 + vldi $vr2, -784 + fadd.d $fa0, $fa0, $fa2 + lu12i.w $a0, -464729 + ori $a0, $a0, 478 + lu32i.d $a0, 406756 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fs2 fmov.d $fa3, $fs6 pcaddu18i $ra, %call36(__divdc3) @@ -243,11 +229,14 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa1, $fs4 pcaddu18i $ra, %call36(cexp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_11) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_11) fneg.d $fa1, $fa1 - vldi $vr3, -784 - fsub.d $fa0, $fa3, $fa0 + vldi $vr2, -784 + fsub.d $fa0, $fa2, $fa0 + lu12i.w $a0, 18454 + ori $a0, $a0, 3840 + lu32i.d $a0, -116283 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fs2 fmov.d $fa3, $fs6 pcaddu18i $ra, %call36(__divdc3) @@ -260,10 +249,13 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa1, $fs4 pcaddu18i $ra, %call36(cexp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_12) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_12) - vldi $vr3, -784 - fadd.d $fa0, $fa0, $fa3 + vldi $vr2, -784 + fadd.d $fa0, $fa0, $fa2 + lu12i.w $a0, -182872 + ori $a0, $a0, 1416 + lu32i.d $a0, 245052 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fs2 fmov.d $fa3, $fs6 pcaddu18i $ra, %call36(__divdc3) @@ -276,11 +268,14 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa1, $fs4 pcaddu18i $ra, %call36(cexp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_13) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_13) fneg.d $fa1, $fa1 - vldi $vr3, -784 - fsub.d $fa0, $fa3, $fa0 + vldi $vr2, -784 + fsub.d $fa0, $fa2, $fa0 + lu12i.w $a0, 25165 + ori $a0, $a0, 3375 + lu32i.d $a0, -459903 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fs2 fmov.d $fa3, $fs6 pcaddu18i $ra, %call36(__divdc3) @@ -293,10 +288,10 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa1, $fs4 pcaddu18i $ra, %call36(cexp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_14) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_14) - vldi $vr3, -784 - fadd.d $fa0, $fa0, $fa3 + vldi $vr2, -784 + fadd.d $fa0, $fa0, $fa2 + lu52i.d $a0, $fp, 1032 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fs2 fmov.d $fa3, $fs6 .LBB0_6: # %.split56.us @@ -376,8 +371,11 @@ fast_nuclear_W: # @fast_nuclear_W vfmul.d $vr0, $vr1, $vr0 vfadd.d $vr6, $vr2, $vr0 .LBB0_7: # %.split56.us - pcalau12i $a0, %pc_hi20(.LCPI0_15) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_15) + lu12i.w $a0, 238236 + ori $a0, $a0, 1914 + lu32i.d $a0, 282514 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa2, $a0 fmul.d $fa0, $fs0, $fa2 fmul.d $fa1, $fs1, $fa2 fld.d $fa3, $sp, 272 # 8-byte Folded Reload @@ -423,11 +421,17 @@ fast_nuclear_W: # @fast_nuclear_W fadd.d $fs4, $fa2, $fa2 bceqz $fcc0, .LBB0_32 .LBB0_12: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, -328265 + ori $a0, $a0, 2886 + lu32i.d $a0, 105927 + lu52i.d $a0, $a0, -1027 + movgr2fr.d $fa0, $a0 fadd.d $fa2, $fs5, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_1) + lu12i.w $a0, 511487 + ori $a0, $a0, 3853 + lu32i.d $a0, 26055 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa0, $a0 movgr2fr.d $fa1, $zero fmov.d $fa3, $fs4 pcaddu18i $ra, %call36(__divdc3) @@ -435,11 +439,17 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fs6, $fa0 fmov.d $fs7, $fa1 .LBB0_13: - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_2) + lu12i.w $a0, 41033 + ori $a0, $a0, 151 + lu32i.d $a0, 379975 + lu52i.d $a0, $a0, -1024 + movgr2fr.d $fa0, $a0 fadd.d $fa2, $fs5, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_3) + lu12i.w $a0, 221852 + ori $a0, $a0, 1536 + lu32i.d $a0, -360195 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fa0, $a0 movgr2fr.d $fa1, $zero fmov.d $fa3, $fs4 pcaddu18i $ra, %call36(__divdc3) @@ -641,12 +651,18 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa3, $fs0 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_0) fmov.d $fa3, $fa1 - fadd.d $fa2, $fa0, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_1) + lu12i.w $a0, -328265 + ori $a0, $a0, 2886 + lu32i.d $a0, 105927 + lu52i.d $a0, $a0, -1027 + movgr2fr.d $fa1, $a0 + fadd.d $fa2, $fa0, $fa1 + lu12i.w $a0, 511487 + ori $a0, $a0, 3853 + lu32i.d $a0, 26055 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa0, $a0 movgr2fr.d $fa1, $zero pcaddu18i $ra, %call36(__divdc3) jirl $ra, $ra, 0 @@ -687,8 +703,11 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa3, $fs0 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_5) + lu12i.w $a0, -277361 + ori $a0, $a0, 267 + lu32i.d $a0, 245052 + lu52i.d $a0, $a0, 1026 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fa0 fneg.d $fa3, $fa1 fmov.d $fa0, $fs3 @@ -712,8 +731,11 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa3, $fs0 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_6) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_6) + lu12i.w $a0, -139922 + ori $a0, $a0, 76 + lu32i.d $a0, 245052 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fa0 fneg.d $fa3, $fa1 fmov.d $fa0, $fs3 @@ -755,8 +777,11 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa3, $fs0 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_7) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_7) + lu12i.w $a0, 412048 + ori $a0, $a0, 1741 + lu32i.d $a0, 406756 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fa0 fneg.d $fa3, $fa1 fmov.d $fa0, $fs3 @@ -786,8 +811,11 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa3, $fs0 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_8) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_8) + lu12i.w $a0, 31876 + ori $a0, $a0, 2910 + lu32i.d $a0, 245053 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fa0 fneg.d $fa3, $fa1 fmov.d $fa0, $fs3 @@ -811,8 +839,11 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa3, $fs0 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_9) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_9) + lu12i.w $a0, -105697 + ori $fp, $a0, 2209 + lu32i.d $fp, -75858 + lu52i.d $a0, $fp, 1030 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fa0 fneg.d $fa3, $fa1 fmov.d $fa0, $fs3 @@ -836,8 +867,11 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa3, $fs0 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_10) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_10) + lu12i.w $a0, -464729 + ori $a0, $a0, 478 + lu32i.d $a0, 406756 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fa0 fneg.d $fa3, $fa1 fmov.d $fa0, $fs3 @@ -861,8 +895,11 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa3, $fs0 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_11) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_11) + lu12i.w $a0, 18454 + ori $a0, $a0, 3840 + lu32i.d $a0, -116283 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fa0 fneg.d $fa3, $fa1 fmov.d $fa0, $fs3 @@ -886,8 +923,11 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa3, $fs0 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_12) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_12) + lu12i.w $a0, -182872 + ori $a0, $a0, 1416 + lu32i.d $a0, 245052 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fa0 fneg.d $fa3, $fa1 fmov.d $fa0, $fs3 @@ -911,8 +951,11 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa3, $fs0 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_13) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_13) + lu12i.w $a0, 25165 + ori $a0, $a0, 3375 + lu32i.d $a0, -459903 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fa0 fneg.d $fa3, $fa1 fmov.d $fa0, $fs3 @@ -936,8 +979,8 @@ fast_nuclear_W: # @fast_nuclear_W fmov.d $fa3, $fs0 pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_14) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_14) + lu52i.d $a0, $fp, 1032 + movgr2fr.d $fa2, $a0 fsub.d $fa2, $fa2, $fa0 fneg.d $fa3, $fa1 fmov.d $fa0, $fs5 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/init.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/init.s index 965baec5..320462b8 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/init.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/init.s @@ -1,23 +1,22 @@ .file "init.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function calculate_derived_inputs -.LCPI0_0: - .dword 0x3ff6a09e667f3bcd # double 1.4142135623730951 .text - .globl calculate_derived_inputs + .globl calculate_derived_inputs # -- Begin function calculate_derived_inputs .p2align 5 .type calculate_derived_inputs,@function calculate_derived_inputs: # @calculate_derived_inputs # %bb.0: ld.wu $a1, $a0, 28 srli.d $a2, $a1, 31 + fld.s $fa0, $a0, 56 add.w $a1, $a1, $a2 srai.d $a1, $a1, 1 - fld.s $fa0, $a0, 56 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI0_0) st.w $a1, $a0, 28 fcvt.d.s $fa0, $fa0 + lu12i.w $a2, 419827 + ori $a2, $a2, 3021 + lu32i.d $a2, 434334 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa1, $a2 fld.s $fa2, $a0, 20 fmul.d $fa0, $fa0, $fa1 movgr2fr.w $fa1, $a1 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/solver.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/solver.s index 3340d0ab..83798199 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/solver.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/solver.s @@ -1392,12 +1392,7 @@ interpolateTable: # @interpolateTable .Lfunc_end1: .size interpolateTable, .Lfunc_end1-interpolateTable # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function transport_sweep -.LCPI2_0: - .dword 0x3690000000000000 # double 7.0064923216240854E-46 - .text - .globl transport_sweep + .globl transport_sweep # -- Begin function transport_sweep .p2align 5 .type transport_sweep,@function transport_sweep: # @transport_sweep @@ -1735,6 +1730,7 @@ transport_sweep: # @transport_sweep .LBB2_24: # in Loop: Header=BB2_22 Depth=5 ld.d $a0, $sp, 152 # 8-byte Folded Reload add.d $a0, $s6, $a0 + addi.w $s6, $a0, 0 movgr2fr.w $fa1, $a0 ffint.s.w $fa1, $fa1 fcvt.d.s $fa1, $fa1 @@ -1742,12 +1738,11 @@ transport_sweep: # @transport_sweep fcvt.s.d $fs6, $fa1 fsub.s $fa2, $fs6, $fs2 fcvt.d.s $fa2, $fa2 - pcalau12i $a1, %pc_hi20(.LCPI2_0) - fld.d $fa3, $a1, %pc_lo12(.LCPI2_0) fdiv.d $fa0, $fa2, $fa0 - addi.w $s6, $a0, 0 fcvt.s.d $fa2, $fa0 - fcmp.cle.d $fcc0, $fa1, $fa3 + lu52i.d $a0, $zero, 873 + movgr2fr.d $fa0, $a0 + fcmp.cle.d $fcc0, $fa1, $fa0 fsub.s $fs5, $fs5, $fa2 vst $vr2, $sp, 208 # 16-byte Folded Spill bcnez $fcc0, .LBB2_27 @@ -4399,12 +4394,7 @@ alt_attenuate_fluxes: # @alt_attenuate_fluxes .Lfunc_end8: .size alt_attenuate_fluxes, .Lfunc_end8-alt_attenuate_fluxes # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function renormalize_flux -.LCPI9_0: - .dword 0x400921fb54442d18 # double 3.1415926535897931 - .text - .globl renormalize_flux + .globl renormalize_flux # -- Begin function renormalize_flux .p2align 5 .type renormalize_flux,@function renormalize_flux: # @renormalize_flux @@ -4544,12 +4534,15 @@ renormalize_flux: # @renormalize_flux # %bb.14: # %.preheader74.lr.ph.us.us.preheader move $a3, $zero ld.d $a4, $s0, 16 - pcalau12i $a5, %pc_hi20(.LCPI9_0) - fld.d $fa1, $a5, %pc_lo12(.LCPI9_0) - vldi $vr2, -1264 - fmul.s $fa2, $fa0, $fa2 - fcvt.d.s $fa2, $fa2 - fmul.d $fa1, $fa2, $fa1 + vldi $vr1, -1264 + fmul.s $fa1, $fa0, $fa1 + fcvt.d.s $fa1, $fa1 + lu12i.w $a5, 345154 + ori $a5, $a5, 3352 + lu32i.d $a5, -450053 + lu52i.d $a5, $a5, 1024 + movgr2fr.d $fa2, $a5 + fmul.d $fa1, $fa1, $fa2 movgr2fr.w $fa2, $a1 ffint.d.w $fa2, $fa2 fmul.d $fa1, $fa1, $fa2 @@ -4797,34 +4790,30 @@ renormalize_flux: # @renormalize_flux .Lfunc_end9: .size renormalize_flux, .Lfunc_end9-renormalize_flux # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function update_sources -.LCPI10_0: - .dword 0x402921fb54442d18 # double 12.566370614359172 - .text - .globl update_sources + .globl update_sources # -- Begin function update_sources .p2align 5 .type update_sources,@function update_sources: # @update_sources # %bb.0: - addi.d $sp, $sp, -176 - st.d $ra, $sp, 168 # 8-byte Folded Spill - st.d $fp, $sp, 160 # 8-byte Folded Spill - st.d $s0, $sp, 152 # 8-byte Folded Spill - st.d $s1, $sp, 144 # 8-byte Folded Spill - st.d $s2, $sp, 136 # 8-byte Folded Spill - st.d $s3, $sp, 128 # 8-byte Folded Spill - st.d $s4, $sp, 120 # 8-byte Folded Spill - st.d $s5, $sp, 112 # 8-byte Folded Spill - st.d $s6, $sp, 104 # 8-byte Folded Spill - st.d $s7, $sp, 96 # 8-byte Folded Spill - st.d $s8, $sp, 88 # 8-byte Folded Spill - fst.d $fs0, $sp, 80 # 8-byte Folded Spill - fst.d $fs1, $sp, 72 # 8-byte Folded Spill + addi.d $sp, $sp, -192 + st.d $ra, $sp, 184 # 8-byte Folded Spill + st.d $fp, $sp, 176 # 8-byte Folded Spill + st.d $s0, $sp, 168 # 8-byte Folded Spill + st.d $s1, $sp, 160 # 8-byte Folded Spill + st.d $s2, $sp, 152 # 8-byte Folded Spill + st.d $s3, $sp, 144 # 8-byte Folded Spill + st.d $s4, $sp, 136 # 8-byte Folded Spill + st.d $s5, $sp, 128 # 8-byte Folded Spill + st.d $s6, $sp, 120 # 8-byte Folded Spill + st.d $s7, $sp, 112 # 8-byte Folded Spill + st.d $s8, $sp, 104 # 8-byte Folded Spill + fst.d $fs0, $sp, 96 # 8-byte Folded Spill + fst.d $fs1, $sp, 88 # 8-byte Folded Spill + fst.d $fs2, $sp, 80 # 8-byte Folded Spill move $s0, $a1 ld.w $a1, $a1, 36 fmov.s $fs0, $fa0 - st.d $a0, $sp, 8 # 8-byte Folded Spill + st.d $a0, $sp, 16 # 8-byte Folded Spill slli.d $s5, $a1, 2 move $a0, $s5 pcaddu18i $ra, %call36(malloc) @@ -4835,11 +4824,11 @@ update_sources: # @update_sources pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 ld.d $s6, $s0, 120 - st.d $a0, $sp, 56 # 8-byte Folded Spill + st.d $a0, $sp, 64 # 8-byte Folded Spill slli.d $a0, $s6, 2 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - st.d $a0, $sp, 16 # 8-byte Folded Spill + st.d $a0, $sp, 24 # 8-byte Folded Spill move $a0, $s5 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 @@ -4850,25 +4839,30 @@ update_sources: # @update_sources move $s5, $a0 blez $s6, .LBB10_23 # %bb.1: # %.lr.ph84 - st.d $zero, $sp, 24 # 8-byte Folded Spill + st.d $zero, $sp, 32 # 8-byte Folded Spill frecip.s $fs0, $fs0 addi.d $a0, $s5, 16 - st.d $a0, $sp, 64 # 8-byte Folded Spill - st.d $s2, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1026 + movgr2fr.d $fs1, $a0 + st.d $s2, $sp, 40 # 8-byte Folded Spill b .LBB10_3 .p2align 4, , 16 .LBB10_2: # %._crit_edge81 # in Loop: Header=BB10_3 Depth=1 - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload pcaddu18i $ra, %call36(pairwise_sum) jirl $ra, $ra, 0 ld.d $s6, $s0, 120 - ld.d $a2, $sp, 24 # 8-byte Folded Reload + ld.d $a2, $sp, 32 # 8-byte Folded Reload slli.d $a0, $a2, 2 addi.d $a2, $a2, 1 - ld.d $a1, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 24 # 8-byte Folded Reload fstx.s $fa0, $a1, $a0 - st.d $a2, $sp, 24 # 8-byte Folded Spill + st.d $a2, $sp, 32 # 8-byte Folded Spill bge $a2, $s6, .LBB10_23 .LBB10_3: # =>This Loop Header: Depth=1 # Child Loop BB10_7 Depth 2 @@ -4880,35 +4874,35 @@ update_sources: # @update_sources blez $a1, .LBB10_2 # %bb.4: # %.preheader69.preheader # in Loop: Header=BB10_3 Depth=1 - ld.d $a0, $sp, 8 # 8-byte Folded Reload + ld.d $a0, $sp, 16 # 8-byte Folded Reload ld.d $a0, $a0, 16 - ld.d $a2, $sp, 24 # 8-byte Folded Reload + ld.d $a2, $sp, 32 # 8-byte Folded Reload slli.d $a1, $a2, 5 alsl.d $a1, $a2, $a1, 4 add.d $a2, $a0, $a1 ldx.d $s3, $a0, $a1 ld.d $a0, $a2, 8 - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill ld.d $s7, $a2, 32 ld.d $s1, $a2, 40 - move $s6, $zero - st.d $s3, $sp, 48 # 8-byte Folded Spill + move $s4, $zero + st.d $s3, $sp, 56 # 8-byte Folded Spill b .LBB10_7 .p2align 4, , 16 .LBB10_5: # in Loop: Header=BB10_7 Depth=2 - ld.d $s2, $sp, 32 # 8-byte Folded Reload - ld.d $s3, $sp, 48 # 8-byte Folded Reload + ld.d $s2, $sp, 40 # 8-byte Folded Reload + ld.d $s3, $sp, 56 # 8-byte Folded Reload .LBB10_6: # %._crit_edge78 # in Loop: Header=BB10_7 Depth=2 move $a0, $fp pcaddu18i $ra, %call36(pairwise_sum) jirl $ra, $ra, 0 ld.w $a1, $s0, 12 - slli.d $a0, $s6, 2 - addi.d $s6, $s6, 1 - ld.d $a2, $sp, 56 # 8-byte Folded Reload + slli.d $a0, $s4, 2 + addi.d $s4, $s4, 1 + ld.d $a2, $sp, 64 # 8-byte Folded Reload fstx.s $fa0, $a2, $a0 - bge $s6, $a1, .LBB10_2 + bge $s4, $a1, .LBB10_2 .LBB10_7: # %.preheader69 # Parent Loop BB10_3 Depth=1 # => This Loop Header: Depth=2 @@ -4920,7 +4914,7 @@ update_sources: # @update_sources blez $a1, .LBB10_10 # %bb.8: # %.lr.ph # in Loop: Header=BB10_7 Depth=2 - slli.d $a0, $s6, 3 + slli.d $a0, $s4, 3 ldx.d $a0, $s3, $a0 move $a2, $a1 move $a3, $s7 @@ -4948,11 +4942,11 @@ update_sources: # @update_sources blez $a1, .LBB10_6 # %bb.11: # %.preheader.lr.ph # in Loop: Header=BB10_7 Depth=2 - move $s4, $zero - fmul.s $fs1, $fs0, $fa0 - alsl.d $s8, $s6, $s3, 3 - ld.d $a0, $sp, 40 # 8-byte Folded Reload - alsl.d $s2, $s6, $a0, 3 + move $s6, $zero + fmul.s $fs2, $fs0, $fa0 + alsl.d $s8, $s4, $s3, 3 + ld.d $a0, $sp, 48 # 8-byte Folded Reload + alsl.d $s2, $s4, $a0, 3 b .LBB10_13 .p2align 4, , 16 .LBB10_12: # %._crit_edge76 @@ -4962,31 +4956,29 @@ update_sources: # @update_sources jirl $ra, $ra, 0 ldx.d $a0, $s7, $s3 fld.s $fa1, $a0, 8 - fmadd.s $fa0, $fs1, $fa1, $fa0 ld.d $a0, $s2, 0 - pcalau12i $a1, %pc_hi20(.LCPI10_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI10_0) - slli.d $a2, $s4, 2 - fldx.s $fa2, $a0, $a2 + fmadd.s $fa0, $fs2, $fa1, $fa0 + slli.d $a2, $s6, 2 + fldx.s $fa1, $a0, $a2 fcvt.d.s $fa0, $fa0 - fdiv.d $fa0, $fa0, $fa1 + fdiv.d $fa0, $fa0, $fs1 fcvt.s.d $fa0, $fa0 - fsub.s $fa1, $fa0, $fa2 - fmul.s $fa1, $fa1, $fa1 + fsub.s $fa2, $fa0, $fa1 fmul.s $fa2, $fa2, $fa2 + fmul.s $fa1, $fa1, $fa1 ld.w $a1, $s0, 36 - fdiv.s $fa1, $fa1, $fa2 + fdiv.s $fa1, $fa2, $fa1 fstx.s $fa1, $fp, $a2 - addi.d $s4, $s4, 1 + addi.d $s6, $s6, 1 fstx.s $fa0, $a0, $a2 - bge $s4, $a1, .LBB10_5 + bge $s6, $a1, .LBB10_5 .LBB10_13: # %.preheader # Parent Loop BB10_3 Depth=1 # Parent Loop BB10_7 Depth=2 # => This Loop Header: Depth=3 # Child Loop BB10_21 Depth 4 # Child Loop BB10_17 Depth 4 - slli.d $s3, $s4, 3 + slli.d $s3, $s6, 3 blez $a1, .LBB10_12 # %bb.14: # %.lr.ph75 # in Loop: Header=BB10_13 Depth=3 @@ -5035,7 +5027,7 @@ update_sources: # @update_sources slli.d $a3, $a3, 3 addi.d $a4, $a2, 16 addi.d $a5, $a0, 16 - ld.d $a6, $sp, 64 # 8-byte Folded Reload + ld.d $a6, $sp, 72 # 8-byte Folded Reload move $a7, $a3 .p2align 4, , 16 .LBB10_21: # %vector.body @@ -5061,7 +5053,7 @@ update_sources: # @update_sources beq $a3, $a1, .LBB10_12 b .LBB10_16 .LBB10_23: # %._crit_edge85 - ld.d $s0, $sp, 16 # 8-byte Folded Reload + ld.d $s0, $sp, 24 # 8-byte Folded Reload move $a0, $s0 move $a1, $s6 pcaddu18i $ra, %call36(pairwise_sum) @@ -5076,27 +5068,28 @@ update_sources: # @update_sources move $a0, $fp pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 move $a0, $s0 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 fmov.s $fa0, $fs0 - fld.d $fs1, $sp, 72 # 8-byte Folded Reload - fld.d $fs0, $sp, 80 # 8-byte Folded Reload - ld.d $s8, $sp, 88 # 8-byte Folded Reload - ld.d $s7, $sp, 96 # 8-byte Folded Reload - ld.d $s6, $sp, 104 # 8-byte Folded Reload - ld.d $s5, $sp, 112 # 8-byte Folded Reload - ld.d $s4, $sp, 120 # 8-byte Folded Reload - ld.d $s3, $sp, 128 # 8-byte Folded Reload - ld.d $s2, $sp, 136 # 8-byte Folded Reload - ld.d $s1, $sp, 144 # 8-byte Folded Reload - ld.d $s0, $sp, 152 # 8-byte Folded Reload - ld.d $fp, $sp, 160 # 8-byte Folded Reload - ld.d $ra, $sp, 168 # 8-byte Folded Reload - addi.d $sp, $sp, 176 + fld.d $fs2, $sp, 80 # 8-byte Folded Reload + fld.d $fs1, $sp, 88 # 8-byte Folded Reload + fld.d $fs0, $sp, 96 # 8-byte Folded Reload + ld.d $s8, $sp, 104 # 8-byte Folded Reload + ld.d $s7, $sp, 112 # 8-byte Folded Reload + ld.d $s6, $sp, 120 # 8-byte Folded Reload + ld.d $s5, $sp, 128 # 8-byte Folded Reload + ld.d $s4, $sp, 136 # 8-byte Folded Reload + ld.d $s3, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 152 # 8-byte Folded Reload + ld.d $s1, $sp, 160 # 8-byte Folded Reload + ld.d $s0, $sp, 168 # 8-byte Folded Reload + ld.d $fp, $sp, 176 # 8-byte Folded Reload + ld.d $ra, $sp, 184 # 8-byte Folded Reload + addi.d $sp, $sp, 192 ret .Lfunc_end10: .size update_sources, .Lfunc_end10-update_sources diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/tracks.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/tracks.s index a26d25ef..1339211c 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/tracks.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/tracks.s @@ -633,10 +633,6 @@ free_tracks: # @free_tracks .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI6_1: - .dword 0x400921fb54442d18 # double 3.1415926535897931 .text .globl generate_polar_angles .p2align 5 @@ -654,8 +650,9 @@ generate_polar_angles: # @generate_polar_angles # %bb.1: # %.lr.ph bstrpick.d $a1, $fp, 31, 0 movgr2fr.d $fa0, $a1 - ori $a1, $zero, 4 ffint.d.l $fa0, $fa0 + ori $a1, $zero, 4 + lu12i.w $a2, 345154 bgeu $fp, $a1, .LBB6_3 # %bb.2: move $a1, $zero @@ -663,37 +660,36 @@ generate_polar_angles: # @generate_polar_angles .LBB6_3: # %vector.ph bstrpick.d $a1, $fp, 30, 2 slli.d $a1, $a1, 2 - pcalau12i $a2, %pc_hi20(.LCPI6_0) - vld $vr1, $a2, %pc_lo12(.LCPI6_0) + pcalau12i $a3, %pc_hi20(.LCPI6_0) + vld $vr1, $a3, %pc_lo12(.LCPI6_0) vreplvei.d $vr2, $vr0, 0 - lu52i.d $a2, $zero, 1022 - vreplgr2vr.d $vr3, $a2 - lu12i.w $a2, 345154 - ori $a2, $a2, 3352 - lu32i.d $a2, -450053 - lu52i.d $a2, $a2, 1024 - vreplgr2vr.d $vr4, $a2 - move $a2, $a0 - move $a3, $a1 + lu52i.d $a3, $zero, 1022 + vreplgr2vr.d $vr3, $a3 + ori $a3, $a2, 3352 + lu32i.d $a3, -450053 + lu52i.d $a3, $a3, 1024 + vreplgr2vr.d $vr4, $a3 + move $a3, $a0 + move $a4, $a1 .p2align 4, , 16 .LBB6_4: # %vector.body # =>This Inner Loop Header: Depth=1 - vpickve2gr.w $a4, $vr1, 1 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa5, $a4 + vpickve2gr.w $a5, $vr1, 1 + bstrpick.d $a5, $a5, 31, 0 + movgr2fr.d $fa5, $a5 ffint.d.l $fa5, $fa5 - vpickve2gr.w $a4, $vr1, 0 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa6, $a4 + vpickve2gr.w $a5, $vr1, 0 + bstrpick.d $a5, $a5, 31, 0 + movgr2fr.d $fa6, $a5 ffint.d.l $fa6, $fa6 vextrins.d $vr6, $vr5, 16 - vpickve2gr.w $a4, $vr1, 3 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa5, $a4 + vpickve2gr.w $a5, $vr1, 3 + bstrpick.d $a5, $a5, 31, 0 + movgr2fr.d $fa5, $a5 ffint.d.l $fa5, $fa5 - vpickve2gr.w $a4, $vr1, 2 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa7, $a4 + vpickve2gr.w $a5, $vr1, 2 + bstrpick.d $a5, $a5, 31, 0 + movgr2fr.d $fa7, $a5 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr5, 16 vfadd.d $vr5, $vr7, $vr3 @@ -713,34 +709,36 @@ generate_polar_angles: # @generate_polar_angles vreplvei.d $vr5, $vr5, 1 fcvt.s.d $fa5, $fa5 vextrins.w $vr6, $vr5, 48 - vst $vr6, $a2, 0 + vst $vr6, $a3, 0 vaddi.wu $vr1, $vr1, 4 - addi.d $a3, $a3, -4 - addi.d $a2, $a2, 16 - bnez $a3, .LBB6_4 + addi.d $a4, $a4, -4 + addi.d $a3, $a3, 16 + bnez $a4, .LBB6_4 # %bb.5: # %middle.block beq $a1, $fp, .LBB6_8 .LBB6_6: # %scalar.ph.preheader - pcalau12i $a2, %pc_hi20(.LCPI6_1) - fld.d $fa1, $a2, %pc_lo12(.LCPI6_1) - alsl.d $a2, $a1, $a0, 2 - sub.d $a3, $fp, $a1 - vldi $vr2, -928 + alsl.d $a3, $a1, $a0, 2 + sub.d $a4, $fp, $a1 + vldi $vr1, -928 + ori $a2, $a2, 3352 + lu32i.d $a2, -450053 + lu52i.d $a2, $a2, 1024 + movgr2fr.d $fa2, $a2 .p2align 4, , 16 .LBB6_7: # %scalar.ph # =>This Inner Loop Header: Depth=1 - bstrpick.d $a4, $a1, 31, 0 - movgr2fr.d $fa3, $a4 + bstrpick.d $a2, $a1, 31, 0 + movgr2fr.d $fa3, $a2 ffint.d.l $fa3, $fa3 - fadd.d $fa3, $fa3, $fa2 - fmul.d $fa3, $fa3, $fa1 + fadd.d $fa3, $fa3, $fa1 + fmul.d $fa3, $fa3, $fa2 fdiv.d $fa3, $fa3, $fa0 fcvt.s.d $fa3, $fa3 - fst.s $fa3, $a2, 0 + fst.s $fa3, $a3, 0 addi.w $a1, $a1, 1 - addi.d $a3, $a3, -1 - addi.d $a2, $a2, 4 - bnez $a3, .LBB6_7 + addi.d $a4, $a4, -1 + addi.d $a3, $a3, 4 + bnez $a4, .LBB6_7 .LBB6_8: # %._crit_edge ld.d $fp, $sp, 0 # 8-byte Folded Reload ld.d $ra, $sp, 8 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/utils.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/utils.s index 177a2632..4bb50561 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/utils.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/SimpleMOC/CMakeFiles/SimpleMOC.dir/utils.s @@ -1,10 +1,6 @@ .file "utils.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function urand -.LCPI0_0: - .word 0x30000000 # float 4.65661287E-10 .text - .globl urand + .globl urand # -- Begin function urand .p2align 5 .type urand,@function urand: # @urand @@ -13,27 +9,18 @@ urand: # @urand st.d $ra, $sp, 8 # 8-byte Folded Spill pcaddu18i $ra, %call36(glibc_compat_rand) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI0_0) + movgr2fr.w $fa0, $a0 + ffint.s.w $fa0, $fa0 + lu12i.w $a0, 196608 movgr2fr.w $fa1, $a0 - ffint.s.w $fa1, $fa1 - fmul.s $fa0, $fa1, $fa0 + fmul.s $fa0, $fa0, $fa1 ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 ret .Lfunc_end0: .size urand, .Lfunc_end0-urand # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function nrand -.LCPI1_0: - .word 0x30000000 # float 4.65661287E-10 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI1_1: - .dword 0x401921fb54442d18 # double 6.2831853071795862 - .text - .globl nrand + .globl nrand # -- Begin function nrand .p2align 5 .type nrand,@function nrand: # @nrand @@ -49,10 +36,10 @@ nrand: # @nrand fmov.s $fs1, $fa0 pcaddu18i $ra, %call36(glibc_compat_rand) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.s $fs2, $a1, %pc_lo12(.LCPI1_0) movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 + lu12i.w $a0, 196608 + movgr2fr.w $fs2, $a0 fmul.s $fs4, $fa0, $fs2 pcaddu18i $ra, %call36(glibc_compat_rand) jirl $ra, $ra, 0 @@ -68,10 +55,13 @@ nrand: # @nrand fcmp.cor.d $fcc0, $fs2, $fs2 bceqz $fcc0, .LBB1_2 .LBB1_1: # %.split - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_1) - fcvt.d.s $fa1, $fs3 - fmul.d $fa0, $fa1, $fa0 + fcvt.d.s $fa0, $fs3 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 fmul.d $fa0, $fs2, $fa0 @@ -142,12 +132,7 @@ pairwise_sum: # @pairwise_sum .Lfunc_end2: .size pairwise_sum, .Lfunc_end2-pairwise_sum # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function buildExponentialTable -.LCPI3_0: - .dword 0x3f847ae147ae147b # double 0.01 - .text - .globl buildExponentialTable + .globl buildExponentialTable # -- Begin function buildExponentialTable .p2align 5 .type buildExponentialTable,@function buildExponentialTable: # @buildExponentialTable @@ -165,11 +150,14 @@ buildExponentialTable: # @buildExponentialTable fst.d $fs1, $sp, 16 # 8-byte Folded Spill fmov.s $fs0, $fa1 move $fp, $a0 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_0) fcvt.d.s $fa0, $fa0 - vldi $vr2, -992 - fmul.d $fa0, $fa0, $fa2 + vldi $vr1, -992 + fmul.d $fa0, $fa0, $fa1 + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 frecip.d $fa1, $fa0 fsqrt.d $fa0, $fa1 @@ -245,12 +233,7 @@ buildExponentialTable: # @buildExponentialTable .Lfunc_end3: .size buildExponentialTable, .Lfunc_end3-buildExponentialTable # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function get_time -.LCPI4_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl get_time + .globl get_time # -- Begin function get_time .p2align 5 .type get_time,@function get_time: # @get_time @@ -259,11 +242,13 @@ get_time: # @get_time st.d $ra, $sp, 8 # 8-byte Folded Spill pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI4_0) + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 ret @@ -334,24 +319,21 @@ est_mem_usage: # @est_mem_usage .Lfunc_end5: .size est_mem_usage, .Lfunc_end5-est_mem_usage # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function time_per_intersection -.LCPI6_0: - .dword 0x41cdcd6500000000 # double 1.0E+9 - .text - .globl time_per_intersection + .globl time_per_intersection # -- Begin function time_per_intersection .p2align 5 .type time_per_intersection,@function time_per_intersection: # @time_per_intersection # %bb.0: ld.d $a1, $a0, 144 movgr2fr.d $fa1, $a1 - pcalau12i $a1, %pc_hi20(.LCPI6_0) - fld.d $fa2, $a1, %pc_lo12(.LCPI6_0) - ld.w $a0, $a0, 36 ffint.d.l $fa1, $fa1 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa2 + ori $a1, $zero, 0 + lu32i.d $a1, -144027 + ld.w $a0, $a0, 36 + lu52i.d $a1, $a1, 1052 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 fdiv.d $fa0, $fa0, $fa1 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench/CMakeFiles/XSBench.dir/GridInit.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench/CMakeFiles/XSBench.dir/GridInit.s index 30ffc8a6..45fb530b 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench/CMakeFiles/XSBench.dir/GridInit.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench/CMakeFiles/XSBench.dir/GridInit.s @@ -1,10 +1,6 @@ .file "GridInit.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function generate_grids -.LCPI0_0: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl generate_grids + .globl generate_grids # -- Begin function generate_grids .p2align 5 .type generate_grids,@function generate_grids: # @generate_grids @@ -26,9 +22,10 @@ generate_grids: # @generate_grids # %bb.2: # %.preheader.us.preheader move $s0, $a1 move $s1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) move $s2, $zero + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB0_3: # %.preheader.us # =>This Loop Header: Depth=1 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench/CMakeFiles/XSBench.dir/Materials.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench/CMakeFiles/XSBench.dir/Materials.s index e33060be..0be212bf 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench/CMakeFiles/XSBench.dir/Materials.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench/CMakeFiles/XSBench.dir/Materials.s @@ -825,12 +825,7 @@ load_mats: # @load_mats .Lfunc_end1: .size load_mats, .Lfunc_end1-load_mats # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function load_concs -.LCPI2_0: - .dword 0x41dfffffffc00000 # double 2147483647 - .text - .globl load_concs + .globl load_concs # -- Begin function load_concs .p2align 5 .type load_concs,@function load_concs: # @load_concs @@ -853,13 +848,13 @@ load_concs: # @load_concs pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 ld.w $a1, $fp, 0 - st.d $a1, $sp, 16 # 8-byte Folded Spill + st.d $a1, $sp, 24 # 8-byte Folded Spill move $s4, $a0 slli.d $a0, $a1, 3 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 ld.w $a1, $fp, 4 - st.d $a1, $sp, 8 # 8-byte Folded Spill + st.d $a1, $sp, 16 # 8-byte Folded Spill move $s3, $a0 st.d $a0, $s4, 0 slli.d $a0, $a1, 3 @@ -908,35 +903,34 @@ load_concs: # @load_concs pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 ld.w $a1, $fp, 36 - st.d $a0, $sp, 24 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill st.d $a0, $s4, 64 slli.d $a0, $a1, 3 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 ld.w $a1, $fp, 40 - st.d $a0, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill st.d $a0, $s4, 72 slli.d $a0, $a1, 3 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 ld.w $a1, $fp, 44 - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill st.d $a0, $s4, 80 slli.d $a0, $a1, 3 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - st.d $a0, $sp, 48 # 8-byte Folded Spill - st.d $a0, $s4, 88 - pcalau12i $a0, %pc_hi20(.LCPI2_0) st.d $a0, $sp, 56 # 8-byte Folded Spill - ld.d $a0, $sp, 8 # 8-byte Folded Reload - ld.d $a1, $sp, 16 # 8-byte Folded Reload + st.d $a0, $s4, 88 + ld.d $a0, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 24 # 8-byte Folded Reload blez $a1, .LBB2_4 # %bb.1: # %.lr.ph.preheader - st.d $s4, $sp, 16 # 8-byte Folded Spill - ld.d $a0, $sp, 56 # 8-byte Folded Reload - fld.d $fs0, $a0, %pc_lo12(.LCPI2_0) + st.d $s4, $sp, 24 # 8-byte Folded Spill move $s4, $zero + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_2: # %.lr.ph # =>This Inner Loop Header: Depth=1 @@ -952,13 +946,14 @@ load_concs: # @load_concs blt $s4, $a0, .LBB2_2 # %bb.3: # %._crit_edge.loopexit ld.w $a0, $fp, 4 - ld.d $s4, $sp, 16 # 8-byte Folded Reload + ld.d $s4, $sp, 24 # 8-byte Folded Reload .LBB2_4: # %._crit_edge blez $a0, .LBB2_7 # %bb.5: # %.lr.ph.1.preheader - ld.d $a0, $sp, 56 # 8-byte Folded Reload - fld.d $fs0, $a0, %pc_lo12(.LCPI2_0) move $s3, $zero + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_6: # %.lr.ph.1 # =>This Inner Loop Header: Depth=1 @@ -974,11 +969,12 @@ load_concs: # @load_concs blt $s3, $a0, .LBB2_6 .LBB2_7: # %._crit_edge.1 ld.w $a0, $fp, 8 - ld.d $s3, $sp, 56 # 8-byte Folded Reload + lu12i.w $s3, -1024 blez $a0, .LBB2_10 # %bb.8: # %.lr.ph.2.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI2_0) move $s2, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_9: # %.lr.ph.2 # =>This Inner Loop Header: Depth=1 @@ -996,8 +992,9 @@ load_concs: # @load_concs ld.w $a0, $fp, 12 blez $a0, .LBB2_13 # %bb.11: # %.lr.ph.3.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI2_0) move $s1, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_12: # %.lr.ph.3 # =>This Inner Loop Header: Depth=1 @@ -1013,11 +1010,12 @@ load_concs: # @load_concs blt $s1, $a0, .LBB2_12 .LBB2_13: # %._crit_edge.3 ld.w $a0, $fp, 16 - ld.d $s2, $sp, 40 # 8-byte Folded Reload + ld.d $s2, $sp, 48 # 8-byte Folded Reload blez $a0, .LBB2_16 # %bb.14: # %.lr.ph.4.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI2_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_15: # %.lr.ph.4 # =>This Inner Loop Header: Depth=1 @@ -1033,11 +1031,12 @@ load_concs: # @load_concs blt $s0, $a0, .LBB2_15 .LBB2_16: # %._crit_edge.4 ld.w $a0, $fp, 20 - ld.d $s1, $sp, 48 # 8-byte Folded Reload + ld.d $s1, $sp, 56 # 8-byte Folded Reload blez $a0, .LBB2_19 # %bb.17: # %.lr.ph.5.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI2_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_18: # %.lr.ph.5 # =>This Inner Loop Header: Depth=1 @@ -1055,8 +1054,9 @@ load_concs: # @load_concs ld.w $a0, $fp, 24 blez $a0, .LBB2_22 # %bb.20: # %.lr.ph.6.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI2_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_21: # %.lr.ph.6 # =>This Inner Loop Header: Depth=1 @@ -1074,8 +1074,9 @@ load_concs: # @load_concs ld.w $a0, $fp, 28 blez $a0, .LBB2_25 # %bb.23: # %.lr.ph.7.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI2_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_24: # %.lr.ph.7 # =>This Inner Loop Header: Depth=1 @@ -1091,11 +1092,12 @@ load_concs: # @load_concs blt $s0, $a0, .LBB2_24 .LBB2_25: # %._crit_edge.7 ld.w $a0, $fp, 32 - ld.d $s5, $sp, 24 # 8-byte Folded Reload + ld.d $s5, $sp, 32 # 8-byte Folded Reload blez $a0, .LBB2_28 # %bb.26: # %.lr.ph.8.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI2_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_27: # %.lr.ph.8 # =>This Inner Loop Header: Depth=1 @@ -1111,11 +1113,12 @@ load_concs: # @load_concs blt $s0, $a0, .LBB2_27 .LBB2_28: # %._crit_edge.8 ld.w $a0, $fp, 36 - ld.d $s5, $sp, 32 # 8-byte Folded Reload + ld.d $s5, $sp, 40 # 8-byte Folded Reload blez $a0, .LBB2_31 # %bb.29: # %.lr.ph.9.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI2_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_30: # %.lr.ph.9 # =>This Inner Loop Header: Depth=1 @@ -1133,8 +1136,9 @@ load_concs: # @load_concs ld.w $a0, $fp, 40 blez $a0, .LBB2_34 # %bb.32: # %.lr.ph.10.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI2_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_33: # %.lr.ph.10 # =>This Inner Loop Header: Depth=1 @@ -1152,8 +1156,9 @@ load_concs: # @load_concs ld.w $a0, $fp, 44 blez $a0, .LBB2_37 # %bb.35: # %.lr.ph.11.preheader - fld.d $fs0, $s3, %pc_lo12(.LCPI2_0) move $s0, $zero + lu52i.d $a0, $s3, 1053 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB2_36: # %.lr.ph.11 # =>This Inner Loop Header: Depth=1 @@ -1487,32 +1492,7 @@ load_concs_v: # @load_concs_v .Lfunc_end3: .size load_concs_v, .Lfunc_end3-load_concs_v # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function pick_mat -.LCPI4_0: - .dword 0x3faa9fbe76c8b439 # double 0.051999999999999998 -.LCPI4_1: - .dword 0x3fd4ed916872b021 # double 0.32700000000000001 -.LCPI4_2: - .dword 0x3fdd810624dd2f1b # double 0.46100000000000002 -.LCPI4_3: - .dword 0x3fe3ae147ae147af # double 0.6150000000000001 -.LCPI4_4: - .dword 0x3fe5ba5e353f7cee # double 0.67900000000000005 -.LCPI4_5: - .dword 0x3fe7d70a3d70a3d8 # double 0.74500000000000011 -.LCPI4_6: - .dword 0x3fe999999999999a # double 0.80000000000000004 -.LCPI4_7: - .dword 0x3fe9db22d0e56042 # double 0.80800000000000005 -.LCPI4_8: - .dword 0x3fea5604189374bd # double 0.82300000000000006 -.LCPI4_9: - .dword 0x3feb22d0e560418a # double 0.84800000000000009 -.LCPI4_10: - .dword 0x3feb8d4fdf3b645b # double 0.8610000000000001 - .text - .globl pick_mat + .globl pick_mat # -- Begin function pick_mat .p2align 5 .type pick_mat,@function pick_mat: # @pick_mat @@ -1525,68 +1505,101 @@ pick_mat: # @pick_mat fcmp.clt.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB4_12 # %bb.1: # %._crit_edge.1 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_0) + lu12i.w $a0, 486539 + ori $a0, $a0, 1081 + lu32i.d $a0, -352322 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 1 bcnez $fcc0, .LBB4_13 # %bb.2: # %._crit_edge.2 - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_1) + lu12i.w $a0, 427819 + ori $a0, $a0, 33 + lu32i.d $a0, 322961 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 2 bcnez $fcc0, .LBB4_13 # %bb.3: # %._crit_edge.3 - pcalau12i $a0, %pc_hi20(.LCPI4_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_2) + lu12i.w $a0, 150994 + ori $a0, $a0, 3867 + lu32i.d $a0, -163578 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 3 bcnez $fcc0, .LBB4_13 # %bb.4: # %._crit_edge.4 - pcalau12i $a0, %pc_hi20(.LCPI4_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_3) + lu12i.w $a0, 503316 + ori $a0, $a0, 1967 + lu32i.d $a0, 241172 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 4 bcnez $fcc0, .LBB4_13 # %bb.5: # %._crit_edge.5 - pcalau12i $a0, %pc_hi20(.LCPI4_4) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_4) + lu12i.w $a0, 218103 + ori $a0, $a0, 3310 + lu32i.d $a0, 375390 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 5 bcnez $fcc0, .LBB4_13 # %bb.6: # %._crit_edge.6 - pcalau12i $a0, %pc_hi20(.LCPI4_5) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_5) + lu12i.w $a0, 251658 + ori $a0, $a0, 984 + lu32i.d $a0, 513802 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 6 bcnez $fcc0, .LBB4_13 # %bb.7: # %._crit_edge.7 - pcalau12i $a0, %pc_hi20(.LCPI4_6) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_6) + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 7 bcnez $fcc0, .LBB4_13 # %bb.8: # %._crit_edge.8 - pcalau12i $a0, %pc_hi20(.LCPI4_7) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_7) + lu12i.w $a0, -192938 + ori $a0, $a0, 66 + lu32i.d $a0, -402654 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 8 bcnez $fcc0, .LBB4_13 # %bb.9: # %._crit_edge.9 - pcalau12i $a0, %pc_hi20(.LCPI4_8) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_8) + lu12i.w $a0, 100663 + ori $a0, $a0, 1213 + lu32i.d $a0, -371196 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 9 bcnez $fcc0, .LBB4_13 # %bb.10: # %._crit_edge.10 - pcalau12i $a0, %pc_hi20(.LCPI4_9) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_9) + lu12i.w $a0, -109052 + ori $a0, $a0, 394 + lu32i.d $a0, -318768 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 10 bcnez $fcc0, .LBB4_13 # %bb.11: # %._crit_edge.11 - pcalau12i $a0, %pc_hi20(.LCPI4_10) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_10) + lu12i.w $a0, -134218 + ori $a0, $a0, 1115 + lu32i.d $a0, -291505 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 11 bcnez $fcc0, .LBB4_13 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench/CMakeFiles/XSBench.dir/XSutils.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench/CMakeFiles/XSBench.dir/XSutils.s index 3f442b1a..00fd9c05 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench/CMakeFiles/XSBench.dir/XSutils.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/XSBench/CMakeFiles/XSBench.dir/XSutils.s @@ -161,12 +161,7 @@ binary_search: # @binary_search .Lfunc_end3: .size binary_search, .Lfunc_end3-binary_search # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function rn -.LCPI4_0: - .dword 0x41dfffffffc00000 # double 2147483647 - .text - .globl rn + .globl rn # -- Begin function rn .p2align 5 .type rn,@function rn: # @rn @@ -184,23 +179,19 @@ rn: # @rn srli.d $a2, $a2, 30 slli.d $a3, $a2, 31 sub.d $a2, $a2, $a3 - pcalau12i $a3, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI4_0) add.d $a1, $a1, $a2 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + lu12i.w $a2, -1024 + lu52i.d $a2, $a2, 1053 + movgr2fr.d $fa1, $a2 + fdiv.d $fa0, $fa0, $fa1 st.d $a1, $a0, 0 ret .Lfunc_end4: .size rn, .Lfunc_end4-rn # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function rn_v -.LCPI5_0: - .dword 0x41dfffffffc00000 # double 2147483647 - .text - .globl rn_v + .globl rn_v # -- Begin function rn_v .p2align 5 .type rn_v,@function rn_v: # @rn_v @@ -219,12 +210,13 @@ rn_v: # @rn_v srli.d $a2, $a2, 30 slli.d $a3, $a2, 31 sub.d $a2, $a2, $a3 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI5_0) add.d $a1, $a1, $a2 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + lu12i.w $a2, -1024 + lu52i.d $a2, $a2, 1053 + movgr2fr.d $fa1, $a2 + fdiv.d $fa0, $fa0, $fa1 st.d $a1, $a0, %pc_lo12(rn_v.seed) ret .Lfunc_end5: diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/init.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/init.s index 8107c31e..8014be12 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/init.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/init.s @@ -1,26 +1,23 @@ .file "init.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function init -.LCPI0_0: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl init + .globl init # -- Begin function init .p2align 5 .type init,@function init: # @init # %bb.0: - addi.d $sp, $sp, -208 - st.d $ra, $sp, 200 # 8-byte Folded Spill - st.d $fp, $sp, 192 # 8-byte Folded Spill - st.d $s0, $sp, 184 # 8-byte Folded Spill - st.d $s1, $sp, 176 # 8-byte Folded Spill - st.d $s2, $sp, 168 # 8-byte Folded Spill - st.d $s3, $sp, 160 # 8-byte Folded Spill - st.d $s4, $sp, 152 # 8-byte Folded Spill - st.d $s5, $sp, 144 # 8-byte Folded Spill - st.d $s6, $sp, 136 # 8-byte Folded Spill - st.d $s7, $sp, 128 # 8-byte Folded Spill - st.d $s8, $sp, 120 # 8-byte Folded Spill + addi.d $sp, $sp, -224 + st.d $ra, $sp, 216 # 8-byte Folded Spill + st.d $fp, $sp, 208 # 8-byte Folded Spill + st.d $s0, $sp, 200 # 8-byte Folded Spill + st.d $s1, $sp, 192 # 8-byte Folded Spill + st.d $s2, $sp, 184 # 8-byte Folded Spill + st.d $s3, $sp, 176 # 8-byte Folded Spill + st.d $s4, $sp, 168 # 8-byte Folded Spill + st.d $s5, $sp, 160 # 8-byte Folded Spill + st.d $s6, $sp, 152 # 8-byte Folded Spill + st.d $s7, $sp, 144 # 8-byte Folded Spill + st.d $s8, $sp, 136 # 8-byte Folded Spill + fst.d $fs0, $sp, 128 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(error_tol) ld.w $a0, $a0, %pc_lo12(error_tol) sub.d $a0, $zero, $a0 @@ -75,7 +72,7 @@ init: # @init pcalau12i $t3, %pc_hi20(init_block_y) ld.w $a2, $t3, %pc_lo12(init_block_y) pcalau12i $a3, %pc_hi20(init_block_z) - st.d $a3, $sp, 56 # 8-byte Folded Spill + st.d $a3, $sp, 64 # 8-byte Folded Spill ld.w $a3, $a3, %pc_lo12(init_block_z) mul.d $a5, $a2, $a0 mul.d $a7, $a5, $a3 @@ -132,7 +129,7 @@ init: # @init .LBB0_10: # %._crit_edge135.loopexit ld.w $a0, $t2, %pc_lo12(init_block_x) ld.w $a2, $t3, %pc_lo12(init_block_y) - ld.d $a3, $sp, 56 # 8-byte Folded Reload + ld.d $a3, $sp, 64 # 8-byte Folded Reload ld.w $a3, $a3, %pc_lo12(init_block_z) mul.d $a4, $a2, $a0 mul.d $a7, $a4, $a3 @@ -186,7 +183,7 @@ init: # @init ld.w $a6, $a6, %pc_lo12(npz) mul.d $a1, $a5, $a1 st.w $a1, $a4, 4 - st.d $a7, $sp, 88 # 8-byte Folded Spill + st.d $a7, $sp, 96 # 8-byte Folded Spill mul.d $a1, $a7, $a3 mul.d $a1, $a1, $a6 st.w $a1, $a4, 8 @@ -198,23 +195,26 @@ init: # @init move $a1, $zero move $a7, $zero move $a6, $zero - ld.d $a5, $sp, 88 # 8-byte Folded Reload + ld.d $a5, $sp, 96 # 8-byte Folded Reload bstrpick.d $a4, $a5, 31, 31 add.w $a4, $a5, $a4 srai.d $a4, $a4, 1 - st.d $a4, $sp, 80 # 8-byte Folded Spill + st.d $a4, $sp, 88 # 8-byte Folded Spill pcalau12i $a4, %pc_hi20(blocks) - st.d $a4, $sp, 16 # 8-byte Folded Spill + st.d $a4, $sp, 24 # 8-byte Folded Spill addi.w $a4, $zero, -1 lu32i.d $a4, 0 - st.d $a4, $sp, 8 # 8-byte Folded Spill - st.d $t2, $sp, 32 # 8-byte Folded Spill - st.d $t3, $sp, 24 # 8-byte Folded Spill + st.d $a4, $sp, 16 # 8-byte Folded Spill + lu12i.w $a4, -1024 + lu52i.d $a4, $a4, 1053 + movgr2fr.d $fs0, $a4 + st.d $t2, $sp, 40 # 8-byte Folded Spill + st.d $t3, $sp, 32 # 8-byte Folded Spill b .LBB0_16 .p2align 4, , 16 .LBB0_14: # %._crit_edge159.loopexit # in Loop: Header=BB0_16 Depth=1 - ld.d $a3, $sp, 56 # 8-byte Folded Reload + ld.d $a3, $sp, 64 # 8-byte Folded Reload ld.w $a3, $a3, %pc_lo12(init_block_z) .LBB0_15: # %._crit_edge159 # in Loop: Header=BB0_16 Depth=1 @@ -235,12 +235,12 @@ init: # @init # %bb.18: # %.preheader125.preheader # in Loop: Header=BB0_16 Depth=1 move $a5, $zero - ld.d $a3, $sp, 88 # 8-byte Folded Reload + ld.d $a3, $sp, 96 # 8-byte Folded Reload mul.d $a3, $a7, $a3 - ld.d $a4, $sp, 80 # 8-byte Folded Reload + ld.d $a4, $sp, 88 # 8-byte Folded Reload add.d $a3, $a3, $a4 - st.d $a3, $sp, 40 # 8-byte Folded Spill - st.d $a7, $sp, 48 # 8-byte Folded Spill + st.d $a3, $sp, 48 # 8-byte Folded Spill + st.d $a7, $sp, 56 # 8-byte Folded Spill b .LBB0_21 .p2align 4, , 16 .LBB0_19: # %._crit_edge153.loopexit @@ -262,13 +262,13 @@ init: # @init # %bb.22: # %.lr.ph152 # in Loop: Header=BB0_21 Depth=2 move $a4, $zero - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload mul.d $a0, $a5, $a0 - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 88 # 8-byte Folded Reload add.d $a0, $a0, $a2 - st.d $a0, $sp, 64 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill addi.w $a3, $a6, 0 - st.d $a5, $sp, 72 # 8-byte Folded Spill + st.d $a5, $sp, 80 # 8-byte Folded Spill b .LBB0_25 .p2align 4, , 16 .LBB0_23: # in Loop: Header=BB0_25 Depth=3 @@ -292,28 +292,28 @@ init: # @init # Child Loop BB0_35 Depth 5 # Child Loop BB0_39 Depth 6 # Child Loop BB0_41 Depth 7 - ld.d $a0, $sp, 16 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(blocks) alsl.d $a2, $a3, $a3, 1 slli.d $a2, $a2, 6 add.d $s3, $a0, $a2 st.w $zero, $s3, 4 stx.w $a1, $a0, $a2 - ld.d $a0, $sp, 8 # 8-byte Folded Reload + ld.d $a0, $sp, 16 # 8-byte Folded Reload st.w $a0, $s3, 16 - ld.d $a0, $sp, 88 # 8-byte Folded Reload - st.d $a4, $sp, 104 # 8-byte Folded Spill + ld.d $a0, $sp, 96 # 8-byte Folded Reload + st.d $a4, $sp, 112 # 8-byte Folded Spill mul.d $a0, $a4, $a0 - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 88 # 8-byte Folded Reload add.d $a0, $a0, $a2 st.w $a0, $s3, 172 - ld.d $a0, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload st.w $a0, $s3, 176 - ld.d $a0, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 48 # 8-byte Folded Reload st.w $a0, $s3, 180 - st.d $a3, $sp, 96 # 8-byte Folded Spill + st.d $a3, $sp, 104 # 8-byte Folded Spill move $a0, $a3 - st.d $a1, $sp, 112 # 8-byte Folded Spill + st.d $a1, $sp, 120 # 8-byte Folded Spill move $a2, $zero pcaddu18i $ra, %call36(add_sorted_list) jirl $ra, $ra, 0 @@ -411,16 +411,14 @@ init: # @init ld.d $a1, $s3, 184 slli.d $a2, $s8, 3 ldx.d $a1, $a1, $a2 + slli.d $a2, $s1, 3 + ldx.d $a1, $a1, $a2 movgr2fr.w $fa0, $a0 - slli.d $a0, $s1, 3 + slli.d $a0, $s7, 3 ldx.d $a0, $a1, $a0 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_0) - slli.d $a1, $s7, 3 - ldx.d $a0, $a0, $a1 ffint.d.w $fa0, $fa0 ld.w $a2, $s6, %pc_lo12(z_block_size) - fdiv.d $fa0, $fa0, $fa1 + fdiv.d $fa0, $fa0, $fs0 fstx.d $fa0, $a0, $s2 addi.d $fp, $fp, 1 addi.d $s2, $s2, 8 @@ -432,9 +430,9 @@ init: # @init .p2align 4, , 16 .LBB0_43: # %._crit_edge148 # in Loop: Header=BB0_25 Depth=3 - ld.d $a4, $sp, 104 # 8-byte Folded Reload + ld.d $a4, $sp, 112 # 8-byte Folded Reload sltui $a1, $a4, 1 - ld.d $t0, $sp, 96 # 8-byte Folded Reload + ld.d $t0, $sp, 104 # 8-byte Folded Reload addi.d $a2, $t0, -1 addi.w $a0, $zero, -2 maskeqz $a3, $a0, $a1 @@ -442,7 +440,7 @@ init: # @init st.w $a3, $s3, 52 st.w $a1, $s3, 76 st.w $zero, $s3, 28 - ld.d $t2, $sp, 32 # 8-byte Folded Reload + ld.d $t2, $sp, 40 # 8-byte Folded Reload ld.w $a1, $t2, %pc_lo12(init_block_x) addi.w $a1, $a1, -1 xor $a1, $a4, $a1 @@ -453,7 +451,7 @@ init: # @init st.w $a3, $s3, 56 st.w $a2, $s3, 92 st.w $zero, $s3, 32 - ld.d $a5, $sp, 72 # 8-byte Folded Reload + ld.d $a5, $sp, 80 # 8-byte Folded Reload beqz $a5, .LBB0_45 # %bb.44: # in Loop: Header=BB0_25 Depth=3 st.w $zero, $s3, 60 @@ -467,8 +465,8 @@ init: # @init lu32i.d $a3, 0 st.w $a3, $s3, 60 .LBB0_46: # in Loop: Header=BB0_25 Depth=3 - ld.d $t3, $sp, 24 # 8-byte Folded Reload - ld.d $a7, $sp, 48 # 8-byte Folded Reload + ld.d $t3, $sp, 32 # 8-byte Folded Reload + ld.d $a7, $sp, 56 # 8-byte Folded Reload st.w $a2, $s3, 108 st.w $zero, $s3, 36 ld.w $a2, $t3, %pc_lo12(init_block_y) @@ -479,7 +477,7 @@ init: # @init move $a3, $a0 lu32i.d $a3, 0 st.w $a3, $s3, 64 - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload st.w $a2, $s3, 124 st.w $zero, $s3, 40 beqz $a7, .LBB0_50 @@ -495,7 +493,7 @@ init: # @init st.w $zero, $s3, 64 ld.w $a2, $t2, %pc_lo12(init_block_x) add.d $a2, $a2, $t0 - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload st.w $a2, $s3, 124 st.w $zero, $s3, 40 bnez $a7, .LBB0_48 @@ -507,7 +505,7 @@ init: # @init .LBB0_51: # in Loop: Header=BB0_25 Depth=3 st.w $a2, $s3, 140 st.w $zero, $s3, 44 - ld.d $a2, $sp, 56 # 8-byte Folded Reload + ld.d $a2, $sp, 64 # 8-byte Folded Reload ld.w $a2, $a2, %pc_lo12(init_block_z) addi.w $a2, $a2, -1 bne $a7, $a2, .LBB0_23 @@ -539,18 +537,19 @@ init: # @init addi.d $s0, $s0, 8 blt $s1, $a1, .LBB0_55 .LBB0_56: # %._crit_edge169 - ld.d $s8, $sp, 120 # 8-byte Folded Reload - ld.d $s7, $sp, 128 # 8-byte Folded Reload - ld.d $s6, $sp, 136 # 8-byte Folded Reload - ld.d $s5, $sp, 144 # 8-byte Folded Reload - ld.d $s4, $sp, 152 # 8-byte Folded Reload - ld.d $s3, $sp, 160 # 8-byte Folded Reload - ld.d $s2, $sp, 168 # 8-byte Folded Reload - ld.d $s1, $sp, 176 # 8-byte Folded Reload - ld.d $s0, $sp, 184 # 8-byte Folded Reload - ld.d $fp, $sp, 192 # 8-byte Folded Reload - ld.d $ra, $sp, 200 # 8-byte Folded Reload - addi.d $sp, $sp, 208 + fld.d $fs0, $sp, 128 # 8-byte Folded Reload + ld.d $s8, $sp, 136 # 8-byte Folded Reload + ld.d $s7, $sp, 144 # 8-byte Folded Reload + ld.d $s6, $sp, 152 # 8-byte Folded Reload + ld.d $s5, $sp, 160 # 8-byte Folded Reload + ld.d $s4, $sp, 168 # 8-byte Folded Reload + ld.d $s3, $sp, 176 # 8-byte Folded Reload + ld.d $s2, $sp, 184 # 8-byte Folded Reload + ld.d $s1, $sp, 192 # 8-byte Folded Reload + ld.d $s0, $sp, 200 # 8-byte Folded Reload + ld.d $fp, $sp, 208 # 8-byte Folded Reload + ld.d $ra, $sp, 216 # 8-byte Folded Reload + addi.d $sp, $sp, 224 ret .Lfunc_end0: .size init, .Lfunc_end0-init diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/profile.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/profile.s index 5cac2618..fee602a7 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/profile.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/profile.s @@ -1,10 +1,6 @@ .file "profile.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function profile -.LCPI0_0: - .dword 0x4090000000000000 # double 1024 .text - .globl profile + .globl profile # -- Begin function profile .p2align 5 .type profile,@function profile: # @profile @@ -68,8 +64,8 @@ profile: # @profile addi.d $a0, $a0, %pc_lo12(average) st.d $a0, $sp, 376 # 8-byte Folded Spill fld.d $fa0, $a0, 304 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) + lu52i.d $a0, $zero, 1033 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 fmul.d $fa0, $fa0, $fa1 fmul.d $fa0, $fa0, $fa1 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/util.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/util.s index 37cf5e62..88231539 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/util.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/util.s @@ -1,10 +1,6 @@ .file "util.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function timer -.LCPI0_0: - .dword 0x412e848000000000 # double 1.0E+6 .text - .globl timer + .globl timer # -- Begin function timer .p2align 5 .type timer,@function timer: # @timer @@ -13,23 +9,20 @@ timer: # @timer st.d $ra, $sp, 8 # 8-byte Folded Spill pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_0) + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 ret .Lfunc_end0: .size timer, .Lfunc_end0-timer # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ma_malloc -.LCPI1_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .globl ma_malloc + .globl ma_malloc # -- Begin function ma_malloc .p2align 5 .type ma_malloc,@function ma_malloc: # @ma_malloc @@ -51,8 +44,9 @@ ma_malloc: # @ma_malloc or $a1, $a1, $a2 pcalau12i $a2, %pc_hi20(counter_malloc) movgr2fr.d $fa0, $a1 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_0) + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 + movgr2fr.d $fa1, $a1 lu12i.w $a1, 275200 bstrins.d $fp, $a1, 63, 32 ld.w $a1, $a2, %pc_lo12(counter_malloc) diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/mg.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/mg.s index f444619c..c21fc537 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/mg.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/mg.s @@ -2748,12 +2748,7 @@ MGBuild: # @MGBuild .Lfunc_end8: .size MGBuild, .Lfunc_end8-MGBuild # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function MGSolve -.LCPI9_0: - .dword 0x3f50624dd2f1a9fc # double 0.001 - .text - .globl MGSolve + .globl MGSolve # -- Begin function MGSolve .p2align 5 .type MGSolve,@function MGSolve: # @MGSolve @@ -2814,9 +2809,12 @@ MGSolve: # @MGSolve st.d $a0, $sp, 8 # 8-byte Folded Spill move $s1, $zero addi.d $s5, $fp, 1200 - pcalau12i $a0, %pc_hi20(.LCPI9_0) - fld.d $fs2, $a0, %pc_lo12(.LCPI9_0) ori $s6, $zero, 2 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs2, $a0 addi.w $s8, $zero, -8 b .LBB9_5 .p2align 4, , 16 diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/miniGMG.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/miniGMG.s index b9cea5dd..cf6edcc8 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/miniGMG.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/miniGMG.s @@ -1,12 +1,6 @@ .file "miniGMG.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3feccccccccccccd # double 0.90000000000000002 -.LCPI0_1: - .dword 0x3cd203af9ee75616 # double 1.0000000000000001E-15 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -199,8 +193,11 @@ main: # @main ld.d $a0, $s7, 0 pcaddu18i $ra, %call36(fflush) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, -209716 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs1, $a0 addi.d $a0, $sp, 88 move $a1, $zero fmov.d $fa0, $fs0 @@ -229,8 +226,11 @@ main: # @main move $a2, $zero pcaddu18i $ra, %call36(zero_grid) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI0_1) + lu12i.w $a0, -397707 + ori $a0, $a0, 1558 + lu32i.d $a0, 132015 + lu52i.d $a0, $a0, 973 + movgr2fr.d $fs2, $a0 addi.d $a0, $sp, 88 ori $a2, $zero, 1 move $a1, $zero diff --git a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/operators.ompif.s b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/operators.ompif.s index 992d8f4e..211b83ee 100644 --- a/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/operators.ompif.s +++ b/results/MultiSource/Benchmarks/DOE-ProxyApps-C/miniGMG/CMakeFiles/miniGMG.dir/operators.ompif.s @@ -706,39 +706,35 @@ rebuild_lambda: # @rebuild_lambda .Lfunc_end2: .size rebuild_lambda, .Lfunc_end2-rebuild_lambda # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function smooth -.LCPI3_0: - .dword 0xbfe5555555555555 # double -0.66666666666666663 - .text - .globl smooth + .globl smooth # -- Begin function smooth .p2align 5 .type smooth,@function smooth: # @smooth # %bb.0: - addi.d $sp, $sp, -1008 - st.d $ra, $sp, 1000 # 8-byte Folded Spill - st.d $fp, $sp, 992 # 8-byte Folded Spill - st.d $s0, $sp, 984 # 8-byte Folded Spill - st.d $s1, $sp, 976 # 8-byte Folded Spill - st.d $s2, $sp, 968 # 8-byte Folded Spill - st.d $s3, $sp, 960 # 8-byte Folded Spill - st.d $s4, $sp, 952 # 8-byte Folded Spill - st.d $s5, $sp, 944 # 8-byte Folded Spill - st.d $s6, $sp, 936 # 8-byte Folded Spill - st.d $s7, $sp, 928 # 8-byte Folded Spill - st.d $s8, $sp, 920 # 8-byte Folded Spill - fst.d $fs0, $sp, 912 # 8-byte Folded Spill + addi.d $sp, $sp, -1024 + st.d $ra, $sp, 1016 # 8-byte Folded Spill + st.d $fp, $sp, 1008 # 8-byte Folded Spill + st.d $s0, $sp, 1000 # 8-byte Folded Spill + st.d $s1, $sp, 992 # 8-byte Folded Spill + st.d $s2, $sp, 984 # 8-byte Folded Spill + st.d $s3, $sp, 976 # 8-byte Folded Spill + st.d $s4, $sp, 968 # 8-byte Folded Spill + st.d $s5, $sp, 960 # 8-byte Folded Spill + st.d $s6, $sp, 952 # 8-byte Folded Spill + st.d $s7, $sp, 944 # 8-byte Folded Spill + st.d $s8, $sp, 936 # 8-byte Folded Spill + fst.d $fs0, $sp, 928 # 8-byte Folded Spill + fst.d $fs1, $sp, 920 # 8-byte Folded Spill move $fp, $a0 ld.w $a4, $a0, 1612 ori $a0, $zero, 2 fmov.d $fs0, $fa1 # kill: def $f0_64 killed $f0_64 def $vr0 move $s3, $a3 - st.d $a2, $sp, 136 # 8-byte Folded Spill + st.d $a2, $sp, 128 # 8-byte Folded Spill move $s0, $a1 - st.d $a4, $sp, 96 # 8-byte Folded Spill - vst $vr0, $sp, 80 # 16-byte Folded Spill + st.d $a4, $sp, 88 # 8-byte Folded Spill + vst $vr0, $sp, 64 # 16-byte Folded Spill blt $a4, $a0, .LBB3_2 # %bb.1: ori $a3, $zero, 1 @@ -749,49 +745,49 @@ smooth: # @smooth move $a2, $s3 pcaddu18i $ra, %call36(exchange_boundary) jirl $ra, $ra, 0 - vld $vr0, $sp, 80 # 16-byte Folded Reload + vld $vr0, $sp, 64 # 16-byte Folded Reload .LBB3_2: move $a1, $zero ori $a2, $zero, 1 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload slt $a0, $a2, $a0 - st.d $a0, $sp, 64 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill alsl.d $a0, $s0, $fp, 3 - st.d $a0, $sp, 56 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill addi.d $a0, $a0, 1616 - st.d $a0, $sp, 128 # 8-byte Folded Spill + st.d $a0, $sp, 120 # 8-byte Folded Spill vreplvei.d $vr0, $vr0, 0 - vst $vr0, $sp, 32 # 16-byte Folded Spill + vst $vr0, $sp, 16 # 16-byte Folded Spill ori $a0, $zero, 216 mul.d $a0, $s0, $a0 - st.d $a0, $sp, 152 # 8-byte Folded Spill + st.d $a0, $sp, 144 # 8-byte Folded Spill slli.d $a0, $s3, 3 - st.d $a0, $sp, 120 # 8-byte Folded Spill + st.d $a0, $sp, 112 # 8-byte Folded Spill lu12i.w $a0, 349525 ori $a0, $a0, 1365 lu32i.d $a0, 349525 lu52i.d $a0, $a0, -1026 - vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 16 # 16-byte Folded Spill - st.d $fp, $sp, 104 # 8-byte Folded Spill - st.d $s0, $sp, 72 # 8-byte Folded Spill + st.d $a0, $sp, 632 # 8-byte Folded Spill + movgr2fr.d $fs1, $a0 + st.d $fp, $sp, 96 # 8-byte Folded Spill + st.d $s0, $sp, 56 # 8-byte Folded Spill b .LBB3_4 .p2align 4, , 16 .LBB3_3: # %._crit_edge228 # in Loop: Header=BB3_4 Depth=1 pcaddu18i $ra, %call36(CycleTime) jirl $ra, $ra, 0 - ld.d $a3, $sp, 56 # 8-byte Folded Reload + ld.d $a3, $sp, 40 # 8-byte Folded Reload ld.d $a1, $a3, 0 - ld.d $a2, $sp, 112 # 8-byte Folded Reload + ld.d $a2, $sp, 104 # 8-byte Folded Reload sub.d $a0, $a0, $a2 add.d $a0, $a0, $a1 - ld.d $a1, $sp, 96 # 8-byte Folded Reload - ld.d $a2, $sp, 144 # 8-byte Folded Reload + ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $sp, 136 # 8-byte Folded Reload add.w $a1, $a2, $a1 st.d $a0, $a3, 0 - ld.d $fp, $sp, 104 # 8-byte Folded Reload - ld.d $s0, $sp, 72 # 8-byte Folded Reload + ld.d $fp, $sp, 96 # 8-byte Folded Reload + ld.d $s0, $sp, 56 # 8-byte Folded Reload ori $a0, $zero, 4 bge $a1, $a0, .LBB3_43 .LBB3_4: # =>This Loop Header: Depth=1 @@ -801,43 +797,42 @@ smooth: # @smooth # Child Loop BB3_16 Depth 5 # Child Loop BB3_41 Depth 6 # Child Loop BB3_18 Depth 6 - st.d $a1, $sp, 144 # 8-byte Folded Spill + st.d $a1, $sp, 136 # 8-byte Folded Spill andi $a0, $a1, 1 sltui $a0, $a0, 1 ori $a1, $zero, 10 masknez $a1, $a1, $a0 - ld.d $a2, $sp, 136 # 8-byte Folded Reload + ld.d $a2, $sp, 128 # 8-byte Folded Reload maskeqz $a0, $a2, $a0 or $a2, $a0, $a1 ori $a3, $zero, 1 move $a0, $fp move $a1, $s0 - ld.d $a4, $sp, 64 # 8-byte Folded Reload + ld.d $a4, $sp, 48 # 8-byte Folded Reload move $a5, $a4 pcaddu18i $ra, %call36(exchange_boundary) jirl $ra, $ra, 0 pcaddu18i $ra, %call36(CycleTime) jirl $ra, $ra, 0 ld.w $a1, $fp, 1600 - st.d $a0, $sp, 112 # 8-byte Folded Spill - vld $vr9, $sp, 80 # 16-byte Folded Reload - vld $vr10, $sp, 32 # 16-byte Folded Reload - vld $vr11, $sp, 16 # 16-byte Folded Reload - st.d $a1, $sp, 168 # 8-byte Folded Spill + st.d $a0, $sp, 104 # 8-byte Folded Spill + vld $vr9, $sp, 64 # 16-byte Folded Reload + vld $vr10, $sp, 16 # 16-byte Folded Reload + st.d $a1, $sp, 160 # 8-byte Folded Spill blez $a1, .LBB3_3 # %bb.5: # %.lr.ph227 # in Loop: Header=BB3_4 Depth=1 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a0, $a0, 1776 - st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $a0, $sp, 152 # 8-byte Folded Spill move $a1, $zero b .LBB3_7 .p2align 4, , 16 .LBB3_6: # %._crit_edge224 # in Loop: Header=BB3_7 Depth=2 - ld.d $a1, $sp, 176 # 8-byte Folded Reload + ld.d $a1, $sp, 168 # 8-byte Folded Reload addi.d $a1, $a1, 1 - ld.d $a0, $sp, 168 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload beq $a1, $a0, .LBB3_3 .LBB3_7: # Parent Loop BB3_4 Depth=1 # => This Loop Header: Depth=2 @@ -846,167 +841,167 @@ smooth: # @smooth # Child Loop BB3_16 Depth 5 # Child Loop BB3_41 Depth 6 # Child Loop BB3_18 Depth 6 - st.d $a1, $sp, 176 # 8-byte Folded Spill + st.d $a1, $sp, 168 # 8-byte Folded Spill slli.d $a0, $a1, 8 - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 152 # 8-byte Folded Reload add.d $a0, $a1, $a0 ld.d $a0, $a0, 248 - ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a1, $sp, 144 # 8-byte Folded Reload add.d $a0, $a0, $a1 ld.w $a1, $a0, 44 - st.d $a1, $sp, 480 # 8-byte Folded Spill + st.d $a1, $sp, 472 # 8-byte Folded Spill blez $a1, .LBB3_6 # %bb.8: # %.lr.ph # in Loop: Header=BB3_7 Depth=2 - ld.w $fp, $a0, 48 + ld.w $a4, $a0, 48 ld.d $a1, $a0, 176 - ld.w $t1, $a0, 52 - st.d $zero, $sp, 472 # 8-byte Folded Spill - ld.d $a2, $sp, 120 # 8-byte Folded Reload - ldx.d $a5, $a1, $a2 - add.d $a6, $fp, $t1 - addi.w $a2, $a6, 1 - ld.d $a4, $sp, 480 # 8-byte Folded Reload - mul.w $t0, $a4, $a2 - ld.d $t2, $a1, 16 - ld.d $t3, $a1, 40 - ld.d $t4, $a1, 48 - ld.d $t5, $a1, 56 - ld.d $t6, $a1, 32 - ld.d $a3, $sp, 128 # 8-byte Folded Reload + ld.w $t0, $a0, 52 + st.d $zero, $sp, 464 # 8-byte Folded Spill + ld.d $a2, $sp, 112 # 8-byte Folded Reload + ldx.d $a6, $a1, $a2 + add.d $a7, $a4, $t0 + addi.w $a2, $a7, 1 + ld.d $a5, $sp, 472 # 8-byte Folded Reload + mul.w $t1, $a5, $a2 + ld.d $t3, $a1, 16 + ld.d $t4, $a1, 40 + ld.d $t5, $a1, 48 + ld.d $t6, $a1, 56 + ld.d $t7, $a1, 32 + ld.d $a3, $sp, 120 # 8-byte Folded Reload fld.d $fa0, $a3, 0 ld.w $a3, $a0, 20 - ld.w $a7, $a0, 24 - st.d $a7, $sp, 336 # 8-byte Folded Spill + ld.w $t2, $a0, 24 + st.d $t2, $sp, 328 # 8-byte Folded Spill ld.w $a0, $a0, 28 - st.d $a0, $sp, 360 # 8-byte Folded Spill + st.d $a0, $sp, 352 # 8-byte Folded Spill fmul.d $fa0, $fa0, $fa0 frecip.d $fa0, $fa0 fmul.d $fa0, $fs0, $fa0 ori $a0, $zero, 1 - sub.w $a7, $a0, $a4 + sub.w $t2, $a0, $a5 addi.d $a0, $a1, 80 - st.d $a0, $sp, 264 # 8-byte Folded Spill - ld.d $a0, $sp, 136 # 8-byte Folded Reload - alsl.d $a0, $a0, $a1, 3 st.d $a0, $sp, 256 # 8-byte Folded Spill - st.d $a3, $sp, 328 # 8-byte Folded Spill - add.d $a0, $a3, $a4 + ld.d $a0, $sp, 128 # 8-byte Folded Reload + alsl.d $a0, $a0, $a1, 3 + st.d $a0, $sp, 248 # 8-byte Folded Spill + st.d $a3, $sp, 320 # 8-byte Folded Spill + add.d $a0, $a3, $a5 addi.d $a3, $a0, -1 - st.d $a6, $sp, 368 # 8-byte Folded Spill - st.d $a7, $sp, 904 # 8-byte Folded Spill - mulw.d.w $a6, $a6, $a7 - sub.d $a0, $a6, $a4 - addi.w $t7, $a0, 1 + st.d $a7, $sp, 360 # 8-byte Folded Spill + st.d $t2, $sp, 912 # 8-byte Folded Spill + mulw.d.w $a7, $a7, $t2 + sub.d $a0, $a7, $a5 + addi.w $t8, $a0, 1 bstrpick.d $a1, $a2, 31, 0 - st.d $a1, $sp, 232 # 8-byte Folded Spill + st.d $a1, $sp, 224 # 8-byte Folded Spill addi.d $a0, $a0, 2 - st.d $a0, $sp, 224 # 8-byte Folded Spill - add.d $a0, $fp, $a6 - st.d $a0, $sp, 456 # 8-byte Folded Spill - sub.d $a0, $a0, $a4 + st.d $a0, $sp, 216 # 8-byte Folded Spill + add.d $a0, $a4, $a7 + st.d $a0, $sp, 448 # 8-byte Folded Spill + sub.d $a0, $a0, $a5 addi.w $a2, $a0, 1 - addi.d $a0, $t1, 1 - add.d $a1, $a0, $a6 - sub.w $t8, $a1, $a4 - sub.d $a1, $t0, $t1 - slli.d $a1, $a1, 3 - st.d $a1, $sp, 200 # 8-byte Folded Spill - sub.d $a1, $t0, $fp + addi.d $a0, $t0, 1 + add.d $a1, $a0, $a7 + sub.w $fp, $a1, $a5 + sub.d $a1, $t1, $t0 slli.d $a1, $a1, 3 st.d $a1, $sp, 192 # 8-byte Folded Spill + sub.d $a1, $t1, $a4 + slli.d $a1, $a1, 3 + st.d $a1, $sp, 184 # 8-byte Folded Spill vreplvei.d $vr1, $vr0, 0 - add.d $a0, $a0, $fp - st.d $a0, $sp, 352 # 8-byte Folded Spill - st.d $a5, $sp, 320 # 8-byte Folded Spill - alsl.d $a7, $t0, $a5, 3 - st.d $t2, $sp, 304 # 8-byte Folded Spill - alsl.d $t2, $t0, $t2, 3 + add.d $a0, $a0, $a4 + st.d $a0, $sp, 344 # 8-byte Folded Spill + st.d $a6, $sp, 312 # 8-byte Folded Spill + alsl.d $t2, $t1, $a6, 3 st.d $t3, $sp, 296 # 8-byte Folded Spill - alsl.d $t3, $t0, $t3, 3 + alsl.d $t3, $t1, $t3, 3 st.d $t4, $sp, 288 # 8-byte Folded Spill - alsl.d $t4, $t0, $t4, 3 + alsl.d $t4, $t1, $t4, 3 st.d $t5, $sp, 280 # 8-byte Folded Spill - alsl.d $t5, $t0, $t5, 3 + alsl.d $t5, $t1, $t5, 3 st.d $t6, $sp, 272 # 8-byte Folded Spill - alsl.d $t6, $t0, $t6, 3 - ld.d $a0, $sp, 144 # 8-byte Folded Reload - add.w $a1, $a4, $a0 - st.d $a1, $sp, 344 # 8-byte Folded Spill - st.d $t0, $sp, 312 # 8-byte Folded Spill - slli.d $a1, $t0, 3 - st.d $a1, $sp, 184 # 8-byte Folded Spill - sub.w $a1, $t7, $fp - st.d $a1, $sp, 448 # 8-byte Folded Spill - sub.w $a1, $t7, $t1 + alsl.d $t6, $t1, $t6, 3 + st.d $t7, $sp, 264 # 8-byte Folded Spill + alsl.d $s1, $t1, $t7, 3 + ld.d $a0, $sp, 136 # 8-byte Folded Reload + add.w $a1, $a5, $a0 + st.d $a1, $sp, 336 # 8-byte Folded Spill + st.d $t1, $sp, 304 # 8-byte Folded Spill + slli.d $a1, $t1, 3 + st.d $a1, $sp, 176 # 8-byte Folded Spill + sub.w $a1, $t8, $a4 st.d $a1, $sp, 440 # 8-byte Folded Spill - sub.d $a1, $a6, $t1 + sub.w $a1, $t8, $t0 st.d $a1, $sp, 432 # 8-byte Folded Spill - add.d $a1, $t1, $a6 + sub.d $a1, $a7, $t0 st.d $a1, $sp, 424 # 8-byte Folded Spill - st.d $a6, $sp, 464 # 8-byte Folded Spill - sub.d $a1, $a6, $fp + add.d $a1, $t0, $a7 st.d $a1, $sp, 416 # 8-byte Folded Spill - st.d $a3, $sp, 248 # 8-byte Folded Spill - st.d $a3, $sp, 408 # 8-byte Folded Spill - st.d $t8, $sp, 208 # 8-byte Folded Spill - st.d $t8, $sp, 400 # 8-byte Folded Spill - st.d $a2, $sp, 216 # 8-byte Folded Spill - st.d $a2, $sp, 392 # 8-byte Folded Spill - st.d $t7, $sp, 240 # 8-byte Folded Spill - st.d $t7, $sp, 384 # 8-byte Folded Spill - st.d $a0, $sp, 376 # 8-byte Folded Spill - st.d $fp, $sp, 640 # 8-byte Folded Spill - st.d $t1, $sp, 488 # 8-byte Folded Spill + st.d $a7, $sp, 456 # 8-byte Folded Spill + sub.d $a1, $a7, $a4 + st.d $a1, $sp, 408 # 8-byte Folded Spill + st.d $a3, $sp, 240 # 8-byte Folded Spill + st.d $a3, $sp, 400 # 8-byte Folded Spill + st.d $fp, $sp, 200 # 8-byte Folded Spill + st.d $fp, $sp, 392 # 8-byte Folded Spill + st.d $a2, $sp, 208 # 8-byte Folded Spill + st.d $a2, $sp, 384 # 8-byte Folded Spill + st.d $t8, $sp, 232 # 8-byte Folded Spill + st.d $t8, $sp, 376 # 8-byte Folded Spill + st.d $a0, $sp, 368 # 8-byte Folded Spill + st.d $a4, $sp, 664 # 8-byte Folded Spill + st.d $t0, $sp, 480 # 8-byte Folded Spill b .LBB3_10 .p2align 4, , 16 .LBB3_9: # %._crit_edge # in Loop: Header=BB3_10 Depth=3 - ld.d $a2, $sp, 376 # 8-byte Folded Reload + ld.d $a2, $sp, 368 # 8-byte Folded Reload addi.w $a2, $a2, 1 - ld.d $a0, $sp, 904 # 8-byte Folded Reload + ld.d $a0, $sp, 912 # 8-byte Folded Reload addi.w $a0, $a0, 1 - st.d $a0, $sp, 904 # 8-byte Folded Spill - ld.d $a0, $sp, 472 # 8-byte Folded Reload + st.d $a0, $sp, 912 # 8-byte Folded Spill + ld.d $a0, $sp, 464 # 8-byte Folded Reload addi.d $a0, $a0, 1 - st.d $a0, $sp, 472 # 8-byte Folded Spill - ld.d $a0, $sp, 352 # 8-byte Folded Reload + st.d $a0, $sp, 464 # 8-byte Folded Spill + ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a1, $sp, 376 # 8-byte Folded Reload + add.w $a1, $a1, $a0 + st.d $a1, $sp, 376 # 8-byte Folded Spill ld.d $a1, $sp, 384 # 8-byte Folded Reload add.w $a1, $a1, $a0 st.d $a1, $sp, 384 # 8-byte Folded Spill - ld.d $a1, $sp, 392 # 8-byte Folded Reload - add.w $a1, $a1, $a0 - st.d $a1, $sp, 392 # 8-byte Folded Spill - ld.d $a1, $sp, 448 # 8-byte Folded Reload - add.w $a1, $a1, $a0 - st.d $a1, $sp, 448 # 8-byte Folded Spill - ld.d $a1, $sp, 400 # 8-byte Folded Reload - add.w $a1, $a1, $a0 - st.d $a1, $sp, 400 # 8-byte Folded Spill ld.d $a1, $sp, 440 # 8-byte Folded Reload add.w $a1, $a1, $a0 st.d $a1, $sp, 440 # 8-byte Folded Spill - ld.d $a0, $sp, 408 # 8-byte Folded Reload - addi.d $a0, $a0, -1 - st.d $a0, $sp, 408 # 8-byte Folded Spill - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a1, $sp, 392 # 8-byte Folded Reload + add.w $a1, $a1, $a0 + st.d $a1, $sp, 392 # 8-byte Folded Spill ld.d $a1, $sp, 432 # 8-byte Folded Reload - add.d $a1, $a1, $a0 + add.w $a1, $a1, $a0 st.d $a1, $sp, 432 # 8-byte Folded Spill + ld.d $a0, $sp, 400 # 8-byte Folded Reload + addi.d $a0, $a0, -1 + st.d $a0, $sp, 400 # 8-byte Folded Spill + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a1, $sp, 424 # 8-byte Folded Reload add.d $a1, $a1, $a0 st.d $a1, $sp, 424 # 8-byte Folded Spill ld.d $a1, $sp, 416 # 8-byte Folded Reload add.d $a1, $a1, $a0 st.d $a1, $sp, 416 # 8-byte Folded Spill + ld.d $a1, $sp, 408 # 8-byte Folded Reload + add.d $a1, $a1, $a0 + st.d $a1, $sp, 408 # 8-byte Folded Spill + ld.d $a1, $sp, 448 # 8-byte Folded Reload + add.d $a1, $a1, $a0 + st.d $a1, $sp, 448 # 8-byte Folded Spill ld.d $a1, $sp, 456 # 8-byte Folded Reload add.d $a1, $a1, $a0 st.d $a1, $sp, 456 # 8-byte Folded Spill - ld.d $a1, $sp, 464 # 8-byte Folded Reload - add.d $a1, $a1, $a0 - st.d $a1, $sp, 464 # 8-byte Folded Spill - ld.d $a0, $sp, 344 # 8-byte Folded Reload - st.d $a2, $sp, 376 # 8-byte Folded Spill + ld.d $a0, $sp, 336 # 8-byte Folded Reload + st.d $a2, $sp, 368 # 8-byte Folded Spill bge $a2, $a0, .LBB3_6 .LBB3_10: # Parent Loop BB3_4 Depth=1 # Parent Loop BB3_7 Depth=2 @@ -1015,181 +1010,183 @@ smooth: # @smooth # Child Loop BB3_16 Depth 5 # Child Loop BB3_41 Depth 6 # Child Loop BB3_18 Depth 6 - ld.d $a0, $sp, 480 # 8-byte Folded Reload + ld.d $a0, $sp, 472 # 8-byte Folded Reload ori $a1, $zero, 1 sub.d $a2, $a1, $a0 addi.d $a0, $a0, -1 - st.d $a0, $sp, 480 # 8-byte Folded Spill - ld.d $a1, $sp, 360 # 8-byte Folded Reload + st.d $a0, $sp, 472 # 8-byte Folded Spill + ld.d $a1, $sp, 352 # 8-byte Folded Reload add.d $a0, $a0, $a1 - st.d $a2, $sp, 536 # 8-byte Folded Spill - st.d $a0, $sp, 528 # 8-byte Folded Spill + st.d $a2, $sp, 528 # 8-byte Folded Spill + st.d $a0, $sp, 520 # 8-byte Folded Spill bge $a2, $a0, .LBB3_9 # %bb.11: # %.preheader210.lr.ph # in Loop: Header=BB3_10 Depth=3 - ld.d $a0, $sp, 480 # 8-byte Folded Reload - ld.d $a1, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 472 # 8-byte Folded Reload + ld.d $a1, $sp, 328 # 8-byte Folded Reload add.d $a1, $a0, $a1 - ld.d $a0, $sp, 536 # 8-byte Folded Reload + ld.d $a0, $sp, 528 # 8-byte Folded Reload st.d $a1, $sp, 864 # 8-byte Folded Spill bge $a0, $a1, .LBB3_9 # %bb.12: # %.preheader210.lr.ph # in Loop: Header=BB3_10 Depth=3 - ld.d $a0, $sp, 480 # 8-byte Folded Reload - ld.d $a1, $sp, 328 # 8-byte Folded Reload - add.d $t0, $a0, $a1 - ld.d $a0, $sp, 536 # 8-byte Folded Reload - bge $a0, $t0, .LBB3_9 + ld.d $a0, $sp, 472 # 8-byte Folded Reload + ld.d $a1, $sp, 320 # 8-byte Folded Reload + add.d $a7, $a0, $a1 + ld.d $a0, $sp, 528 # 8-byte Folded Reload + bge $a0, $a7, .LBB3_9 # %bb.13: # %.preheader210.us.us.preheader # in Loop: Header=BB3_10 Depth=3 - move $s1, $zero - ld.d $t8, $sp, 904 # 8-byte Folded Reload - addi.d $a0, $t8, 1 - ld.d $a3, $sp, 408 # 8-byte Folded Reload + move $fp, $zero + ld.d $t7, $sp, 912 # 8-byte Folded Reload + addi.d $a0, $t7, 1 + ld.d $a3, $sp, 400 # 8-byte Folded Reload slt $a1, $a0, $a3 masknez $a2, $a0, $a1 maskeqz $a1, $a3, $a1 or $a1, $a1, $a2 - sub.d $a1, $a1, $t8 + sub.d $a1, $a1, $t7 bstrins.d $a1, $zero, 0, 0 - st.d $a1, $sp, 664 # 8-byte Folded Spill - ld.d $a3, $sp, 472 # 8-byte Folded Reload - ld.d $a1, $sp, 248 # 8-byte Folded Reload + st.d $a1, $sp, 656 # 8-byte Folded Spill + ld.d $a3, $sp, 464 # 8-byte Folded Reload + ld.d $a1, $sp, 240 # 8-byte Folded Reload sub.d $a1, $a1, $a3 slt $a2, $a0, $a1 maskeqz $a1, $a1, $a2 masknez $a0, $a0, $a2 or $a0, $a1, $a0 - sub.d $s2, $a0, $t8 - ld.d $a1, $sp, 232 # 8-byte Folded Reload + sub.d $s0, $a0, $t7 + ld.d $a1, $sp, 224 # 8-byte Folded Reload mul.d $a1, $a3, $a1 - ld.d $a2, $sp, 240 # 8-byte Folded Reload + ld.d $a2, $sp, 232 # 8-byte Folded Reload add.d $a2, $a2, $a1 - st.d $a2, $sp, 520 # 8-byte Folded Spill - slli.d $a2, $t8, 3 + st.d $a2, $sp, 512 # 8-byte Folded Spill + slli.d $a2, $t7, 3 sub.d $a2, $zero, $a2 - add.d $a3, $t1, $t8 + add.d $a3, $t0, $t7 slli.d $a3, $a3, 3 sub.d $a3, $zero, $a3 - ld.d $a4, $sp, 208 # 8-byte Folded Reload - add.d $a4, $a4, $a1 - st.d $a4, $sp, 512 # 8-byte Folded Spill - add.d $a4, $fp, $t8 + ld.d $a5, $sp, 200 # 8-byte Folded Reload + add.d $a5, $a5, $a1 + st.d $a5, $sp, 504 # 8-byte Folded Spill + ld.d $a4, $sp, 664 # 8-byte Folded Reload + add.d $a4, $a4, $t7 slli.d $a4, $a4, 3 sub.d $a4, $zero, $a4 - ld.d $a5, $sp, 216 # 8-byte Folded Reload + ld.d $a5, $sp, 208 # 8-byte Folded Reload add.d $a5, $a5, $a1 - st.d $a5, $sp, 504 # 8-byte Folded Spill - ld.d $a5, $sp, 224 # 8-byte Folded Reload + st.d $a5, $sp, 496 # 8-byte Folded Spill + ld.d $a5, $sp, 216 # 8-byte Folded Reload add.d $a1, $a5, $a1 - st.d $a1, $sp, 496 # 8-byte Folded Spill - ld.d $a1, $sp, 376 # 8-byte Folded Reload + st.d $a1, $sp, 488 # 8-byte Folded Spill + ld.d $a1, $sp, 368 # 8-byte Folded Reload andi $a1, $a1, 1 sltui $a1, $a1, 1 - ld.d $t7, $sp, 264 # 8-byte Folded Reload - masknez $a5, $t7, $a1 - ld.d $s0, $sp, 256 # 8-byte Folded Reload - maskeqz $a6, $s0, $a1 + ld.d $t1, $sp, 256 # 8-byte Folded Reload + masknez $a5, $t1, $a1 + ld.d $t8, $sp, 248 # 8-byte Folded Reload + maskeqz $a6, $t8, $a1 or $a5, $a6, $a5 - masknez $a6, $s0, $a1 - maskeqz $a1, $t7, $a1 + masknez $a6, $t8, $a1 + maskeqz $a1, $t1, $a1 or $a1, $a1, $a6 - ld.d $a1, $a1, 0 + ld.d $a6, $a1, 0 ld.d $a5, $a5, 0 - nor $a6, $t8, $zero - add.d $a6, $a0, $a6 - st.d $a6, $sp, 880 # 8-byte Folded Spill - ld.d $a6, $sp, 312 # 8-byte Folded Reload - alsl.d $t7, $a6, $a1, 3 - alsl.d $s3, $a6, $a5, 3 - ld.d $s0, $sp, 184 # 8-byte Folded Reload - add.d $s5, $a1, $s0 - st.d $s5, $sp, 744 # 8-byte Folded Spill - add.d $a1, $a1, $a2 - st.d $a1, $sp, 736 # 8-byte Folded Spill - add.d $a0, $a0, $a6 + nor $a1, $t7, $zero + add.d $a1, $a0, $a1 + st.d $a1, $sp, 880 # 8-byte Folded Spill + ld.d $t1, $sp, 304 # 8-byte Folded Reload + alsl.d $s8, $t1, $a6, 3 + alsl.d $a1, $t1, $a5, 3 + ld.d $t8, $sp, 176 # 8-byte Folded Reload + add.d $s2, $a6, $t8 + st.d $s2, $sp, 744 # 8-byte Folded Spill + add.d $a6, $a6, $a2 + st.d $a6, $sp, 736 # 8-byte Folded Spill + add.d $a0, $a0, $t1 st.d $a0, $sp, 856 # 8-byte Folded Spill - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 296 # 8-byte Folded Reload add.d $a0, $a0, $a2 st.d $a0, $sp, 728 # 8-byte Folded Spill - ld.d $a0, $sp, 200 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload add.d $a0, $a5, $a0 st.d $a0, $sp, 720 # 8-byte Folded Spill add.d $a0, $a5, $a3 st.d $a0, $sp, 712 # 8-byte Folded Spill - add.d $a1, $a5, $s0 + add.d $a3, $a5, $t8 add.d $a0, $a5, $a2 st.d $a0, $sp, 848 # 8-byte Folded Spill - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload add.d $a0, $a5, $a0 st.d $a0, $sp, 704 # 8-byte Folded Spill add.d $a0, $a5, $a4 st.d $a0, $sp, 696 # 8-byte Folded Spill - st.d $a1, $sp, 824 # 8-byte Folded Spill - addi.d $a0, $a1, -8 + ld.d $a4, $sp, 664 # 8-byte Folded Reload + st.d $a3, $sp, 824 # 8-byte Folded Spill + addi.d $a0, $a3, -8 st.d $a0, $sp, 688 # 8-byte Folded Spill - ld.d $a0, $sp, 296 # 8-byte Folded Reload + ld.d $a0, $sp, 288 # 8-byte Folded Reload add.d $a0, $a0, $a2 st.d $a0, $sp, 808 # 8-byte Folded Spill - ld.d $a0, $sp, 288 # 8-byte Folded Reload + ld.d $a0, $sp, 280 # 8-byte Folded Reload add.d $a0, $a0, $a2 st.d $a0, $sp, 792 # 8-byte Folded Spill - ld.d $a0, $sp, 280 # 8-byte Folded Reload + ld.d $a0, $sp, 272 # 8-byte Folded Reload add.d $a0, $a0, $a2 st.d $a0, $sp, 776 # 8-byte Folded Spill - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload add.d $a0, $a0, $a2 st.d $a0, $sp, 680 # 8-byte Folded Spill - ld.d $a0, $sp, 320 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload add.d $a0, $a0, $a2 st.d $a0, $sp, 672 # 8-byte Folded Spill - st.d $s2, $sp, 872 # 8-byte Folded Spill - bstrins.d $s2, $zero, 0, 0 - st.d $s2, $sp, 656 # 8-byte Folded Spill - add.d $a0, $s2, $t8 - st.d $a0, $sp, 648 # 8-byte Folded Spill - ld.d $a2, $sp, 464 # 8-byte Folded Reload + st.d $s0, $sp, 872 # 8-byte Folded Spill + bstrins.d $s0, $zero, 0, 0 + st.d $s0, $sp, 648 # 8-byte Folded Spill + add.d $a0, $s0, $t7 + st.d $a0, $sp, 640 # 8-byte Folded Spill ld.d $a6, $sp, 456 # 8-byte Folded Reload - ld.d $s2, $sp, 416 # 8-byte Folded Reload - ld.d $s8, $sp, 424 # 8-byte Folded Reload - ld.d $ra, $sp, 432 # 8-byte Folded Reload - ld.d $a3, $sp, 440 # 8-byte Folded Reload - ld.d $a4, $sp, 400 # 8-byte Folded Reload - ld.d $a5, $sp, 448 # 8-byte Folded Reload - ld.d $t8, $sp, 392 # 8-byte Folded Reload - ld.d $s0, $sp, 384 # 8-byte Folded Reload - ld.d $a1, $sp, 536 # 8-byte Folded Reload + ld.d $s2, $sp, 448 # 8-byte Folded Reload + ld.d $s5, $sp, 408 # 8-byte Folded Reload + ld.d $s4, $sp, 416 # 8-byte Folded Reload + ld.d $a5, $sp, 424 # 8-byte Folded Reload + ld.d $t1, $sp, 432 # 8-byte Folded Reload + ld.d $t7, $sp, 392 # 8-byte Folded Reload + ld.d $t8, $sp, 440 # 8-byte Folded Reload + ld.d $s6, $sp, 384 # 8-byte Folded Reload + ld.d $s0, $sp, 376 # 8-byte Folded Reload + ld.d $a2, $sp, 528 # 8-byte Folded Reload b .LBB3_15 .p2align 4, , 16 .LBB3_14: # %._crit_edge213.split.us.us.us # in Loop: Header=BB3_15 Depth=4 - ld.d $a1, $sp, 544 # 8-byte Folded Reload - addi.w $a1, $a1, 1 - ld.d $s1, $sp, 632 # 8-byte Folded Reload - addi.d $s1, $s1, 1 - ld.d $t1, $sp, 488 # 8-byte Folded Reload - ld.d $s0, $sp, 552 # 8-byte Folded Reload - add.w $s0, $s0, $t1 + ld.d $a2, $sp, 536 # 8-byte Folded Reload + addi.w $a2, $a2, 1 + ld.d $fp, $sp, 624 # 8-byte Folded Reload + addi.d $fp, $fp, 1 + ld.d $t0, $sp, 480 # 8-byte Folded Reload + ld.d $s0, $sp, 544 # 8-byte Folded Reload + add.w $s0, $s0, $t0 + ld.d $s6, $sp, 552 # 8-byte Folded Reload + add.w $s6, $s6, $t0 ld.d $t8, $sp, 560 # 8-byte Folded Reload - add.w $t8, $t8, $t1 - ld.d $a5, $sp, 568 # 8-byte Folded Reload - add.w $a5, $a5, $t1 - ld.d $a4, $sp, 576 # 8-byte Folded Reload - add.w $a4, $a4, $t1 - ld.d $a3, $sp, 584 # 8-byte Folded Reload - add.w $a3, $a3, $t1 - ld.d $ra, $sp, 592 # 8-byte Folded Reload - add.d $ra, $ra, $t1 - ld.d $s8, $sp, 600 # 8-byte Folded Reload - add.d $s8, $s8, $t1 + add.w $t8, $t8, $t0 + ld.d $t7, $sp, 568 # 8-byte Folded Reload + add.w $t7, $t7, $t0 + ld.d $t1, $sp, 576 # 8-byte Folded Reload + add.w $t1, $t1, $t0 + ld.d $a5, $sp, 584 # 8-byte Folded Reload + add.d $a5, $a5, $t0 + ld.d $s4, $sp, 592 # 8-byte Folded Reload + add.d $s4, $s4, $t0 + ld.d $s5, $sp, 600 # 8-byte Folded Reload + add.d $s5, $s5, $t0 ld.d $s2, $sp, 608 # 8-byte Folded Reload - add.d $s2, $s2, $t1 + add.d $s2, $s2, $t0 ld.d $a6, $sp, 616 # 8-byte Folded Reload - add.d $a6, $a6, $t1 - ld.d $a2, $sp, 624 # 8-byte Folded Reload - add.d $a2, $a2, $t1 - ld.d $a0, $sp, 528 # 8-byte Folded Reload - bge $a1, $a0, .LBB3_9 + add.d $a6, $a6, $t0 + ld.d $a0, $sp, 520 # 8-byte Folded Reload + bge $a2, $a0, .LBB3_9 .LBB3_15: # %.preheader210.us.us # Parent Loop BB3_4 Depth=1 # Parent Loop BB3_7 Depth=2 @@ -1198,39 +1195,38 @@ smooth: # @smooth # Child Loop BB3_16 Depth 5 # Child Loop BB3_41 Depth 6 # Child Loop BB3_18 Depth 6 - st.d $a1, $sp, 544 # 8-byte Folded Spill - move $s5, $zero - st.d $s1, $sp, 632 # 8-byte Folded Spill - mul.d $a0, $t1, $s1 - ld.d $a1, $sp, 520 # 8-byte Folded Reload - add.d $a1, $a1, $a0 - st.d $a1, $sp, 760 # 8-byte Folded Spill - ld.d $a1, $sp, 512 # 8-byte Folded Reload - add.d $a1, $a1, $a0 - st.d $a1, $sp, 816 # 8-byte Folded Spill - ld.d $a1, $sp, 504 # 8-byte Folded Reload - add.d $a1, $a1, $a0 - st.d $a1, $sp, 768 # 8-byte Folded Spill - ld.d $a1, $sp, 496 # 8-byte Folded Reload - add.d $a0, $a1, $a0 + st.d $a2, $sp, 536 # 8-byte Folded Spill + move $a2, $zero + st.d $fp, $sp, 624 # 8-byte Folded Spill + mul.d $a0, $t0, $fp + ld.d $a3, $sp, 512 # 8-byte Folded Reload + add.d $a3, $a3, $a0 + st.d $a3, $sp, 760 # 8-byte Folded Spill + ld.d $a3, $sp, 504 # 8-byte Folded Reload + add.d $a3, $a3, $a0 + st.d $a3, $sp, 816 # 8-byte Folded Spill + ld.d $a3, $sp, 496 # 8-byte Folded Reload + add.d $a3, $a3, $a0 + st.d $a3, $sp, 768 # 8-byte Folded Spill + ld.d $a3, $sp, 488 # 8-byte Folded Reload + add.d $a0, $a3, $a0 st.d $a0, $sp, 752 # 8-byte Folded Spill - st.d $a2, $sp, 624 # 8-byte Folded Spill - move $t1, $a2 st.d $a6, $sp, 616 # 8-byte Folded Spill st.d $s2, $sp, 608 # 8-byte Folded Spill - st.d $s8, $sp, 600 # 8-byte Folded Spill - st.d $ra, $sp, 592 # 8-byte Folded Spill - st.d $a3, $sp, 584 # 8-byte Folded Spill - st.d $a3, $sp, 896 # 8-byte Folded Spill - st.d $a4, $sp, 576 # 8-byte Folded Spill - st.d $a4, $sp, 888 # 8-byte Folded Spill - st.d $a5, $sp, 568 # 8-byte Folded Spill - move $a4, $a5 + st.d $s5, $sp, 600 # 8-byte Folded Spill + st.d $s4, $sp, 592 # 8-byte Folded Spill + st.d $a5, $sp, 584 # 8-byte Folded Spill + move $fp, $a5 + st.d $t1, $sp, 576 # 8-byte Folded Spill + st.d $t1, $sp, 904 # 8-byte Folded Spill + st.d $t7, $sp, 568 # 8-byte Folded Spill + st.d $t7, $sp, 896 # 8-byte Folded Spill st.d $t8, $sp, 560 # 8-byte Folded Spill - move $a0, $t8 - st.d $s0, $sp, 552 # 8-byte Folded Spill - move $s1, $s0 - ld.d $a2, $sp, 536 # 8-byte Folded Reload + st.d $t8, $sp, 888 # 8-byte Folded Spill + st.d $s6, $sp, 552 # 8-byte Folded Spill + st.d $s0, $sp, 544 # 8-byte Folded Spill + move $t7, $s0 + ld.d $t0, $sp, 528 # 8-byte Folded Reload .p2align 4, , 16 .LBB3_16: # %.preheader.us.us.us # Parent Loop BB3_4 Depth=1 @@ -1240,13 +1236,13 @@ smooth: # @smooth # => This Loop Header: Depth=5 # Child Loop BB3_41 Depth 6 # Child Loop BB3_18 Depth 6 - ld.d $s7, $sp, 904 # 8-byte Folded Reload - ld.d $a1, $sp, 872 # 8-byte Folded Reload + ld.d $s3, $sp, 912 # 8-byte Folded Reload + ld.d $a0, $sp, 872 # 8-byte Folded Reload ori $a3, $zero, 8 - bgeu $a1, $a3, .LBB3_20 + bgeu $a0, $a3, .LBB3_20 .LBB3_17: # %scalar.ph.preheader # in Loop: Header=BB3_16 Depth=5 - move $a1, $s7 + move $a3, $s3 .p2align 4, , 16 .LBB3_18: # %scalar.ph # Parent Loop BB3_4 Depth=1 @@ -1255,310 +1251,311 @@ smooth: # @smooth # Parent Loop BB3_15 Depth=4 # Parent Loop BB3_16 Depth=5 # => This Inner Loop Header: Depth=6 - add.w $a5, $t1, $s7 - slli.d $a3, $a5, 3 - fldx.d $fa2, $t2, $a3 - alsl.d $t8, $a5, $s3, 3 - fldx.d $fa3, $s3, $a3 - addi.w $a5, $a5, 1 - slli.d $a5, $a5, 3 - fldx.d $fa4, $s3, $a5 - fld.d $fa5, $t8, -8 - fldx.d $fa6, $t3, $a5 - fldx.d $fa7, $t3, $a3 + add.w $a0, $a6, $s3 + slli.d $a5, $a0, 3 + fldx.d $fa2, $t3, $a5 + alsl.d $t1, $a0, $a1, 3 + fldx.d $fa3, $a1, $a5 + addi.w $a0, $a0, 1 + slli.d $a0, $a0, 3 + fldx.d $fa4, $a1, $a0 + fld.d $fa5, $t1, -8 + fldx.d $fa6, $t4, $a0 + fldx.d $fa7, $t4, $a5 fsub.d $fa4, $fa4, $fa3 fsub.d $fa5, $fa3, $fa5 fneg.d $fa5, $fa5 fmul.d $fa5, $fa7, $fa5 fmadd.d $fa4, $fa6, $fa4, $fa5 - add.w $a5, $a6, $s7 - slli.d $a5, $a5, 3 - fldx.d $fa5, $t4, $a5 - fldx.d $fa6, $s3, $a5 - add.w $a5, $s2, $s7 - slli.d $a5, $a5, 3 - fldx.d $fa7, $s3, $a5 + add.w $a0, $s2, $s3 + slli.d $a0, $a0, 3 + fldx.d $fa5, $t5, $a0 + fldx.d $fa6, $a1, $a0 + add.w $a0, $s5, $s3 + slli.d $a0, $a0, 3 + fldx.d $fa7, $a1, $a0 fsub.d $fa6, $fa6, $fa3 fmadd.d $fa4, $fa5, $fa6, $fa4 - fldx.d $fa5, $t4, $a3 + fldx.d $fa5, $t5, $a5 fsub.d $fa6, $fa3, $fa7 - add.w $a5, $s8, $s7 - slli.d $a5, $a5, 3 - fldx.d $fa7, $s3, $a5 + add.w $a0, $s4, $s3 + slli.d $a0, $a0, 3 + fldx.d $fa7, $a1, $a0 fneg.d $fa5, $fa5 fmadd.d $fa4, $fa5, $fa6, $fa4 - fldx.d $fa5, $t5, $a5 + fldx.d $fa5, $t6, $a0 fsub.d $fa6, $fa7, $fa3 - add.w $a5, $ra, $s7 - slli.d $a5, $a5, 3 - fldx.d $fa7, $s3, $a5 - fldx.d $ft0, $t5, $a3 + add.w $a0, $fp, $s3 + slli.d $a0, $a0, 3 + fldx.d $fa7, $a1, $a0 + fldx.d $ft0, $t6, $a5 fmadd.d $fa4, $fa5, $fa6, $fa4 fmul.d $fa2, $ft1, $fa2 fsub.d $fa5, $fa3, $fa7 fneg.d $fa6, $ft0 fnmadd.d $fa4, $fa6, $fa5, $fa4 - fldx.d $fa5, $t6, $a3 - fldx.d $fa6, $a7, $a3 - pcalau12i $a5, %pc_hi20(.LCPI3_0) - fld.d $fa7, $a5, %pc_lo12(.LCPI3_0) + fldx.d $fa5, $t2, $a5 + fldx.d $fa6, $s1, $a5 fmul.d $fa4, $fa0, $fa4 fmadd.d $fa2, $fa2, $fa3, $fa4 - fsub.d $fa2, $fa2, $fa6 - fmul.d $fa4, $fa5, $fa7 + fsub.d $fa2, $fa2, $fa5 + fmul.d $fa4, $fa6, $fs1 fmadd.d $fa2, $fa4, $fa2, $fa3 - fstx.d $fa2, $t7, $a3 - addi.d $a1, $a1, 1 - addi.d $s7, $s7, 1 - blt $a1, $t0, .LBB3_18 + fstx.d $fa2, $s8, $a5 + addi.d $a3, $a3, 1 + addi.d $s3, $s3, 1 + blt $a3, $a7, .LBB3_18 .LBB3_19: # %._crit_edge.us.us.us # in Loop: Header=BB3_16 Depth=5 - addi.w $a2, $a2, 1 - addi.d $s5, $s5, 1 - add.w $s1, $s1, $fp - add.w $a0, $a0, $fp - add.w $a4, $a4, $fp - ld.d $a1, $sp, 888 # 8-byte Folded Reload - add.w $a1, $a1, $fp - st.d $a1, $sp, 888 # 8-byte Folded Spill - ld.d $a1, $sp, 896 # 8-byte Folded Reload - add.w $a1, $a1, $fp - st.d $a1, $sp, 896 # 8-byte Folded Spill - add.d $ra, $ra, $fp - add.d $s8, $s8, $fp - add.d $s2, $s2, $fp - add.d $a6, $a6, $fp - add.d $t1, $t1, $fp - ld.d $a1, $sp, 864 # 8-byte Folded Reload - blt $a2, $a1, .LBB3_16 + addi.w $t0, $t0, 1 + addi.d $a2, $a2, 1 + add.w $t7, $t7, $a4 + add.w $s6, $s6, $a4 + ld.d $a0, $sp, 888 # 8-byte Folded Reload + add.w $a0, $a0, $a4 + st.d $a0, $sp, 888 # 8-byte Folded Spill + ld.d $a0, $sp, 896 # 8-byte Folded Reload + add.w $a0, $a0, $a4 + st.d $a0, $sp, 896 # 8-byte Folded Spill + ld.d $a0, $sp, 904 # 8-byte Folded Reload + add.w $a0, $a0, $a4 + st.d $a0, $sp, 904 # 8-byte Folded Spill + add.d $fp, $fp, $a4 + add.d $s4, $s4, $a4 + add.d $s5, $s5, $a4 + add.d $s2, $s2, $a4 + add.d $a6, $a6, $a4 + ld.d $a0, $sp, 864 # 8-byte Folded Reload + blt $t0, $a0, .LBB3_16 b .LBB3_14 .p2align 4, , 16 .LBB3_20: # %vector.scevcheck # in Loop: Header=BB3_16 Depth=5 - mul.d $a1, $fp, $s5 - ld.d $a3, $sp, 816 # 8-byte Folded Reload - add.w $a5, $a3, $a1 - ld.d $a3, $sp, 880 # 8-byte Folded Reload - add.w $a3, $a5, $a3 - ld.d $s7, $sp, 904 # 8-byte Folded Reload - st.d $a5, $sp, 840 # 8-byte Folded Spill - blt $a3, $a5, .LBB3_17 + mul.d $a3, $a4, $a2 + ld.d $a0, $sp, 816 # 8-byte Folded Reload + add.w $t1, $a0, $a3 + ld.d $a0, $sp, 880 # 8-byte Folded Reload + add.w $a5, $t1, $a0 + ld.d $s3, $sp, 912 # 8-byte Folded Reload + st.d $t1, $sp, 840 # 8-byte Folded Spill + blt $a5, $t1, .LBB3_17 # %bb.21: # %vector.scevcheck # in Loop: Header=BB3_16 Depth=5 - ld.d $a3, $sp, 768 # 8-byte Folded Reload - add.w $a5, $a3, $a1 - ld.d $a3, $sp, 880 # 8-byte Folded Reload - add.w $a3, $a5, $a3 - ld.d $s7, $sp, 904 # 8-byte Folded Reload - st.d $a5, $sp, 832 # 8-byte Folded Spill - blt $a3, $a5, .LBB3_17 + ld.d $a0, $sp, 768 # 8-byte Folded Reload + add.w $t1, $a0, $a3 + ld.d $a0, $sp, 880 # 8-byte Folded Reload + add.w $a5, $t1, $a0 + ld.d $s3, $sp, 912 # 8-byte Folded Reload + st.d $t1, $sp, 832 # 8-byte Folded Spill + blt $a5, $t1, .LBB3_17 # %bb.22: # %vector.scevcheck # in Loop: Header=BB3_16 Depth=5 - ld.d $a3, $sp, 760 # 8-byte Folded Reload - add.w $a5, $a3, $a1 - ld.d $a3, $sp, 880 # 8-byte Folded Reload - add.w $a3, $a5, $a3 - ld.d $s7, $sp, 904 # 8-byte Folded Reload - blt $a3, $a5, .LBB3_17 + ld.d $a0, $sp, 760 # 8-byte Folded Reload + add.w $t1, $a0, $a3 + ld.d $a0, $sp, 880 # 8-byte Folded Reload + add.w $a5, $t1, $a0 + ld.d $s3, $sp, 912 # 8-byte Folded Reload + blt $a5, $t1, .LBB3_17 # %bb.23: # %vector.scevcheck # in Loop: Header=BB3_16 Depth=5 - ld.d $a3, $sp, 752 # 8-byte Folded Reload - add.w $a3, $a3, $a1 - ld.d $a1, $sp, 880 # 8-byte Folded Reload - add.w $a1, $a3, $a1 - ld.d $s7, $sp, 904 # 8-byte Folded Reload - blt $a1, $a3, .LBB3_17 + ld.d $a0, $sp, 752 # 8-byte Folded Reload + add.w $a3, $a0, $a3 + ld.d $a0, $sp, 880 # 8-byte Folded Reload + add.w $a5, $a3, $a0 + ld.d $s3, $sp, 912 # 8-byte Folded Reload + blt $a5, $a3, .LBB3_17 # %bb.24: # %vector.scevcheck # in Loop: Header=BB3_16 Depth=5 - ld.d $a1, $sp, 880 # 8-byte Folded Reload - srli.d $a1, $a1, 32 - ld.d $s7, $sp, 904 # 8-byte Folded Reload - bnez $a1, .LBB3_17 + ld.d $a0, $sp, 880 # 8-byte Folded Reload + srli.d $a5, $a0, 32 + ld.d $s3, $sp, 912 # 8-byte Folded Reload + bnez $a5, .LBB3_17 # %bb.25: # %vector.memcheck # in Loop: Header=BB3_16 Depth=5 - move $s4, $a3 - ld.d $a1, $sp, 744 # 8-byte Folded Reload - alsl.d $a1, $a5, $a1, 3 - ld.d $a3, $sp, 856 # 8-byte Folded Reload - add.d $t8, $a3, $a5 - ld.d $a3, $sp, 736 # 8-byte Folded Reload - alsl.d $s6, $t8, $a3, 3 - alsl.d $s0, $a5, $t2, 3 - ld.d $a3, $sp, 728 # 8-byte Folded Reload - alsl.d $s7, $t8, $a3, 3 - sltu $s7, $a1, $s7 - sltu $s0, $s0, $s6 - and $s0, $s7, $s0 - ld.d $s7, $sp, 904 # 8-byte Folded Reload + ld.d $a0, $sp, 744 # 8-byte Folded Reload + alsl.d $t8, $t1, $a0, 3 + ld.d $a0, $sp, 856 # 8-byte Folded Reload + add.d $a5, $a0, $t1 + ld.d $a0, $sp, 736 # 8-byte Folded Reload + alsl.d $s7, $a5, $a0, 3 + alsl.d $s0, $t1, $t3, 3 + ld.d $a0, $sp, 728 # 8-byte Folded Reload + alsl.d $s3, $a5, $a0, 3 + sltu $s3, $t8, $s3 + sltu $s0, $s0, $s7 + and $s0, $s3, $s0 + ld.d $s3, $sp, 912 # 8-byte Folded Reload bnez $s0, .LBB3_17 # %bb.26: # %vector.memcheck # in Loop: Header=BB3_16 Depth=5 - ld.d $a3, $sp, 720 # 8-byte Folded Reload - alsl.d $s0, $a5, $a3, 3 - ld.d $a3, $sp, 712 # 8-byte Folded Reload - alsl.d $s7, $t8, $a3, 3 - sltu $s7, $a1, $s7 - sltu $s0, $s0, $s6 - and $s0, $s7, $s0 - ld.d $s7, $sp, 904 # 8-byte Folded Reload + ld.d $a0, $sp, 720 # 8-byte Folded Reload + alsl.d $s0, $t1, $a0, 3 + ld.d $a0, $sp, 712 # 8-byte Folded Reload + alsl.d $s3, $a5, $a0, 3 + sltu $s3, $t8, $s3 + sltu $s0, $s0, $s7 + and $s0, $s3, $s0 + ld.d $s3, $sp, 912 # 8-byte Folded Reload bnez $s0, .LBB3_17 # %bb.27: # %vector.memcheck # in Loop: Header=BB3_16 Depth=5 - ld.d $a3, $sp, 824 # 8-byte Folded Reload - ld.d $s7, $sp, 840 # 8-byte Folded Reload - alsl.d $s0, $s7, $a3, 3 - ld.d $a3, $sp, 856 # 8-byte Folded Reload - add.d $s7, $a3, $s7 - ld.d $a3, $sp, 848 # 8-byte Folded Reload - st.d $s7, $sp, 784 # 8-byte Folded Spill - alsl.d $s7, $s7, $a3, 3 - sltu $s7, $a1, $s7 - sltu $s0, $s0, $s6 - and $s0, $s7, $s0 - ld.d $s7, $sp, 904 # 8-byte Folded Reload + move $ra, $a3 + ld.d $a0, $sp, 824 # 8-byte Folded Reload + ld.d $a3, $sp, 840 # 8-byte Folded Reload + alsl.d $s0, $a3, $a0, 3 + ld.d $a0, $sp, 856 # 8-byte Folded Reload + add.d $a3, $a0, $a3 + ld.d $a0, $sp, 848 # 8-byte Folded Reload + st.d $a3, $sp, 784 # 8-byte Folded Spill + alsl.d $s3, $a3, $a0, 3 + sltu $s3, $t8, $s3 + sltu $s0, $s0, $s7 + and $s0, $s3, $s0 + ld.d $s3, $sp, 912 # 8-byte Folded Reload bnez $s0, .LBB3_17 # %bb.28: # %vector.memcheck # in Loop: Header=BB3_16 Depth=5 - ld.d $a3, $sp, 704 # 8-byte Folded Reload - alsl.d $s0, $a5, $a3, 3 - ld.d $a3, $sp, 696 # 8-byte Folded Reload - alsl.d $s7, $t8, $a3, 3 - sltu $s7, $a1, $s7 - sltu $s0, $s0, $s6 - and $s0, $s7, $s0 - ld.d $s7, $sp, 904 # 8-byte Folded Reload + ld.d $a0, $sp, 704 # 8-byte Folded Reload + alsl.d $s0, $t1, $a0, 3 + ld.d $a0, $sp, 696 # 8-byte Folded Reload + alsl.d $s3, $a5, $a0, 3 + sltu $s3, $t8, $s3 + sltu $s0, $s0, $s7 + and $s0, $s3, $s0 + ld.d $s3, $sp, 912 # 8-byte Folded Reload bnez $s0, .LBB3_17 # %bb.29: # %vector.memcheck # in Loop: Header=BB3_16 Depth=5 - ld.d $a3, $sp, 824 # 8-byte Folded Reload - ld.d $s7, $sp, 832 # 8-byte Folded Reload - alsl.d $s0, $s7, $a3, 3 - ld.d $a3, $sp, 856 # 8-byte Folded Reload - add.d $s7, $a3, $s7 - ld.d $a3, $sp, 848 # 8-byte Folded Reload - st.d $s7, $sp, 800 # 8-byte Folded Spill - alsl.d $s7, $s7, $a3, 3 - sltu $s7, $a1, $s7 - sltu $s0, $s0, $s6 - and $s0, $s7, $s0 - ld.d $s7, $sp, 904 # 8-byte Folded Reload + ld.d $a0, $sp, 824 # 8-byte Folded Reload + ld.d $a3, $sp, 832 # 8-byte Folded Reload + alsl.d $s0, $a3, $a0, 3 + ld.d $a0, $sp, 856 # 8-byte Folded Reload + add.d $a3, $a0, $a3 + ld.d $a0, $sp, 848 # 8-byte Folded Reload + st.d $a3, $sp, 800 # 8-byte Folded Spill + alsl.d $s3, $a3, $a0, 3 + sltu $s3, $t8, $s3 + sltu $s0, $s0, $s7 + and $s0, $s3, $s0 + ld.d $s3, $sp, 912 # 8-byte Folded Reload bnez $s0, .LBB3_17 # %bb.30: # %vector.memcheck # in Loop: Header=BB3_16 Depth=5 - ld.d $a3, $sp, 688 # 8-byte Folded Reload - alsl.d $s0, $a5, $a3, 3 - ld.d $a3, $sp, 848 # 8-byte Folded Reload - alsl.d $s7, $t8, $a3, 3 - sltu $s7, $a1, $s7 - sltu $s0, $s0, $s6 - and $s0, $s7, $s0 - ld.d $s7, $sp, 904 # 8-byte Folded Reload + ld.d $a0, $sp, 688 # 8-byte Folded Reload + alsl.d $s0, $t1, $a0, 3 + ld.d $a0, $sp, 848 # 8-byte Folded Reload + alsl.d $s3, $a5, $a0, 3 + sltu $s3, $t8, $s3 + sltu $s0, $s0, $s7 + and $s0, $s3, $s0 + ld.d $s3, $sp, 912 # 8-byte Folded Reload bnez $s0, .LBB3_17 # %bb.31: # %vector.memcheck # in Loop: Header=BB3_16 Depth=5 - ld.d $a3, $sp, 824 # 8-byte Folded Reload - alsl.d $s7, $s4, $a3, 3 - ld.d $a3, $sp, 856 # 8-byte Folded Reload - add.d $s0, $a3, $s4 - ld.d $a3, $sp, 848 # 8-byte Folded Reload - alsl.d $a3, $s0, $a3, 3 - sltu $a3, $a1, $a3 - sltu $s7, $s7, $s6 - and $a3, $a3, $s7 - ld.d $s7, $sp, 904 # 8-byte Folded Reload + ld.d $a0, $sp, 824 # 8-byte Folded Reload + alsl.d $s3, $ra, $a0, 3 + ld.d $a0, $sp, 856 # 8-byte Folded Reload + add.d $s0, $a0, $ra + ld.d $a0, $sp, 848 # 8-byte Folded Reload + alsl.d $a3, $s0, $a0, 3 + sltu $a3, $t8, $a3 + sltu $s3, $s3, $s7 + and $a3, $a3, $s3 + ld.d $s3, $sp, 912 # 8-byte Folded Reload bnez $a3, .LBB3_17 # %bb.32: # %vector.memcheck # in Loop: Header=BB3_16 Depth=5 - alsl.d $a3, $a5, $t3, 3 - ld.d $s7, $sp, 808 # 8-byte Folded Reload - alsl.d $s7, $t8, $s7, 3 - sltu $s7, $a1, $s7 - sltu $a3, $a3, $s6 - and $a3, $s7, $a3 - ld.d $s7, $sp, 904 # 8-byte Folded Reload + alsl.d $a3, $t1, $t4, 3 + ld.d $a0, $sp, 808 # 8-byte Folded Reload + alsl.d $s3, $a5, $a0, 3 + sltu $s3, $t8, $s3 + sltu $a3, $a3, $s7 + and $a3, $s3, $a3 + ld.d $s3, $sp, 912 # 8-byte Folded Reload bnez $a3, .LBB3_17 # %bb.33: # %vector.memcheck # in Loop: Header=BB3_16 Depth=5 - alsl.d $a3, $s4, $t3, 3 - ld.d $s7, $sp, 808 # 8-byte Folded Reload - alsl.d $s0, $s0, $s7, 3 - sltu $s0, $a1, $s0 - sltu $a3, $a3, $s6 + alsl.d $a3, $ra, $t4, 3 + ld.d $a0, $sp, 808 # 8-byte Folded Reload + alsl.d $s0, $s0, $a0, 3 + sltu $s0, $t8, $s0 + sltu $a3, $a3, $s7 and $a3, $s0, $a3 - ld.d $s7, $sp, 904 # 8-byte Folded Reload + ld.d $s3, $sp, 912 # 8-byte Folded Reload bnez $a3, .LBB3_17 # %bb.34: # %vector.memcheck # in Loop: Header=BB3_16 Depth=5 - alsl.d $a3, $a5, $t4, 3 - ld.d $s0, $sp, 792 # 8-byte Folded Reload - alsl.d $s0, $t8, $s0, 3 - sltu $s0, $a1, $s0 - sltu $a3, $a3, $s6 + alsl.d $a3, $t1, $t5, 3 + ld.d $a0, $sp, 792 # 8-byte Folded Reload + alsl.d $s0, $a5, $a0, 3 + sltu $s0, $t8, $s0 + sltu $a3, $a3, $s7 and $a3, $s0, $a3 - ld.d $s7, $sp, 904 # 8-byte Folded Reload + ld.d $s3, $sp, 912 # 8-byte Folded Reload bnez $a3, .LBB3_17 # %bb.35: # %vector.memcheck # in Loop: Header=BB3_16 Depth=5 - ld.d $a3, $sp, 832 # 8-byte Folded Reload - alsl.d $a3, $a3, $t4, 3 - ld.d $s0, $sp, 792 # 8-byte Folded Reload - ld.d $s4, $sp, 800 # 8-byte Folded Reload - alsl.d $s0, $s4, $s0, 3 - sltu $s0, $a1, $s0 - sltu $a3, $a3, $s6 + ld.d $a0, $sp, 832 # 8-byte Folded Reload + alsl.d $a3, $a0, $t5, 3 + ld.d $a0, $sp, 792 # 8-byte Folded Reload + ld.d $s0, $sp, 800 # 8-byte Folded Reload + alsl.d $s0, $s0, $a0, 3 + sltu $s0, $t8, $s0 + sltu $a3, $a3, $s7 and $a3, $s0, $a3 - ld.d $s7, $sp, 904 # 8-byte Folded Reload + ld.d $s3, $sp, 912 # 8-byte Folded Reload bnez $a3, .LBB3_17 # %bb.36: # %vector.memcheck # in Loop: Header=BB3_16 Depth=5 - alsl.d $a3, $a5, $t5, 3 - ld.d $s0, $sp, 776 # 8-byte Folded Reload - alsl.d $s0, $t8, $s0, 3 - sltu $s0, $a1, $s0 - sltu $a3, $a3, $s6 + alsl.d $a3, $t1, $t6, 3 + ld.d $a0, $sp, 776 # 8-byte Folded Reload + alsl.d $s0, $a5, $a0, 3 + sltu $s0, $t8, $s0 + sltu $a3, $a3, $s7 and $a3, $s0, $a3 - ld.d $s7, $sp, 904 # 8-byte Folded Reload + ld.d $s3, $sp, 912 # 8-byte Folded Reload bnez $a3, .LBB3_17 # %bb.37: # %vector.memcheck # in Loop: Header=BB3_16 Depth=5 - ld.d $a3, $sp, 840 # 8-byte Folded Reload - alsl.d $a3, $a3, $t5, 3 - ld.d $s0, $sp, 776 # 8-byte Folded Reload - ld.d $s4, $sp, 784 # 8-byte Folded Reload - alsl.d $s0, $s4, $s0, 3 - sltu $s0, $a1, $s0 - sltu $a3, $a3, $s6 + ld.d $a0, $sp, 840 # 8-byte Folded Reload + alsl.d $a3, $a0, $t6, 3 + ld.d $a0, $sp, 776 # 8-byte Folded Reload + ld.d $s0, $sp, 784 # 8-byte Folded Reload + alsl.d $s0, $s0, $a0, 3 + sltu $s0, $t8, $s0 + sltu $a3, $a3, $s7 and $a3, $s0, $a3 - ld.d $s7, $sp, 904 # 8-byte Folded Reload + ld.d $s3, $sp, 912 # 8-byte Folded Reload bnez $a3, .LBB3_17 # %bb.38: # %vector.memcheck # in Loop: Header=BB3_16 Depth=5 - alsl.d $a3, $a5, $t6, 3 - ld.d $s0, $sp, 680 # 8-byte Folded Reload - alsl.d $s0, $t8, $s0, 3 - sltu $s0, $a1, $s0 - sltu $a3, $a3, $s6 + alsl.d $a3, $t1, $s1, 3 + ld.d $a0, $sp, 680 # 8-byte Folded Reload + alsl.d $s0, $a5, $a0, 3 + sltu $s0, $t8, $s0 + sltu $a3, $a3, $s7 and $a3, $s0, $a3 - ld.d $s7, $sp, 904 # 8-byte Folded Reload + ld.d $s3, $sp, 912 # 8-byte Folded Reload bnez $a3, .LBB3_17 # %bb.39: # %vector.memcheck # in Loop: Header=BB3_16 Depth=5 - alsl.d $a3, $a5, $a7, 3 - ld.d $a5, $sp, 672 # 8-byte Folded Reload - alsl.d $a5, $t8, $a5, 3 - sltu $a1, $a1, $a5 - sltu $a3, $a3, $s6 - and $a1, $a1, $a3 - ld.d $s7, $sp, 904 # 8-byte Folded Reload - bnez $a1, .LBB3_17 + alsl.d $a3, $t1, $t2, 3 + ld.d $a0, $sp, 672 # 8-byte Folded Reload + alsl.d $a5, $a5, $a0, 3 + sltu $a5, $t8, $a5 + sltu $a3, $a3, $s7 + and $a3, $a5, $a3 + ld.d $s3, $sp, 912 # 8-byte Folded Reload + bnez $a3, .LBB3_17 # %bb.40: # %vector.ph # in Loop: Header=BB3_16 Depth=5 - ld.d $a1, $sp, 664 # 8-byte Folded Reload - ld.d $a3, $sp, 896 # 8-byte Folded Reload - ld.d $a5, $sp, 888 # 8-byte Folded Reload - move $t8, $a4 - move $s0, $a0 - move $s6, $s1 + ld.d $a3, $sp, 656 # 8-byte Folded Reload + ld.d $a5, $sp, 904 # 8-byte Folded Reload + ld.d $t1, $sp, 896 # 8-byte Folded Reload + ld.d $t8, $sp, 888 # 8-byte Folded Reload + move $s0, $s6 + move $s3, $t7 + ld.d $a4, $sp, 632 # 8-byte Folded Reload .p2align 4, , 16 .LBB3_41: # %vector.body # Parent Loop BB3_4 Depth=1 @@ -1567,81 +1564,83 @@ smooth: # @smooth # Parent Loop BB3_15 Depth=4 # Parent Loop BB3_16 Depth=5 # => This Inner Loop Header: Depth=6 - slli.d $s7, $s6, 3 - vldx $vr2, $t2, $s7 - alsl.d $fp, $s6, $s3, 3 - vldx $vr3, $s3, $s7 - addi.w $s4, $s6, 1 - slli.d $s4, $s4, 3 - vldx $vr4, $s3, $s4 - vld $vr5, $fp, -8 - vldx $vr6, $t3, $s4 - vldx $vr7, $t3, $s7 + slli.d $s7, $s3, 3 + vldx $vr2, $t3, $s7 + alsl.d $a0, $s3, $a1, 3 + vldx $vr3, $a1, $s7 + addi.w $ra, $s3, 1 + slli.d $ra, $ra, 3 + vldx $vr4, $a1, $ra + vld $vr5, $a0, -8 + vldx $vr6, $t4, $ra + vldx $vr7, $t4, $s7 vfsub.d $vr4, $vr4, $vr3 vfsub.d $vr5, $vr3, $vr5 vbitrevi.d $vr5, $vr5, 63 vfmul.d $vr5, $vr7, $vr5 vfmadd.d $vr4, $vr6, $vr4, $vr5 - slli.d $fp, $s0, 3 - vldx $vr5, $s3, $fp - vldx $vr6, $t4, $fp - slli.d $fp, $t8, 3 - vldx $vr7, $s3, $fp + slli.d $a0, $s0, 3 + vldx $vr5, $a1, $a0 + vldx $vr6, $t5, $a0 + slli.d $a0, $t8, 3 + vldx $vr7, $a1, $a0 vfsub.d $vr5, $vr5, $vr3 vfmadd.d $vr4, $vr6, $vr5, $vr4 - vldx $vr5, $t4, $s7 + vldx $vr5, $t5, $s7 vfsub.d $vr6, $vr3, $vr7 - slli.d $fp, $a5, 3 - vldx $vr7, $s3, $fp + slli.d $a0, $t1, 3 + vldx $vr7, $a1, $a0 vbitrevi.d $vr5, $vr5, 63 vfmadd.d $vr4, $vr5, $vr6, $vr4 - vldx $vr5, $t5, $fp + vldx $vr5, $t6, $a0 vfsub.d $vr6, $vr7, $vr3 - slli.d $fp, $a3, 3 - vldx $vr7, $s3, $fp - vldx $vr8, $t5, $s7 + slli.d $a0, $a5, 3 + vldx $vr7, $a1, $a0 + vldx $vr8, $t6, $s7 vfmadd.d $vr4, $vr5, $vr6, $vr4 vfmul.d $vr2, $vr10, $vr2 vfsub.d $vr5, $vr3, $vr7 vbitrevi.d $vr6, $vr8, 63 vfnmadd.d $vr4, $vr6, $vr5, $vr4 - vldx $vr5, $a7, $s7 - vldx $vr6, $t6, $s7 + vldx $vr5, $t2, $s7 vfmul.d $vr4, $vr1, $vr4 + vldx $vr6, $s1, $s7 vfmadd.d $vr2, $vr2, $vr3, $vr4 vfsub.d $vr2, $vr2, $vr5 - vfmul.d $vr4, $vr6, $vr11 + vreplgr2vr.d $vr4, $a4 + vfmul.d $vr4, $vr6, $vr4 vfmadd.d $vr2, $vr4, $vr2, $vr3 - vstx $vr2, $t7, $s7 - addi.w $s6, $s6, 2 + vstx $vr2, $s8, $s7 + addi.w $s3, $s3, 2 addi.w $s0, $s0, 2 addi.w $t8, $t8, 2 + addi.w $t1, $t1, 2 + addi.d $a3, $a3, -2 addi.w $a5, $a5, 2 - addi.d $a1, $a1, -2 - addi.w $a3, $a3, 2 - bnez $a1, .LBB3_41 + bnez $a3, .LBB3_41 # %bb.42: # %middle.block # in Loop: Header=BB3_16 Depth=5 - ld.d $s7, $sp, 648 # 8-byte Folded Reload - ld.d $fp, $sp, 640 # 8-byte Folded Reload - ld.d $a1, $sp, 872 # 8-byte Folded Reload - ld.d $a3, $sp, 656 # 8-byte Folded Reload - bne $a1, $a3, .LBB3_17 + ld.d $s3, $sp, 640 # 8-byte Folded Reload + ld.d $a4, $sp, 664 # 8-byte Folded Reload + ld.d $a0, $sp, 872 # 8-byte Folded Reload + ld.d $a3, $sp, 648 # 8-byte Folded Reload + bne $a0, $a3, .LBB3_17 b .LBB3_19 .LBB3_43: - fld.d $fs0, $sp, 912 # 8-byte Folded Reload - ld.d $s8, $sp, 920 # 8-byte Folded Reload - ld.d $s7, $sp, 928 # 8-byte Folded Reload - ld.d $s6, $sp, 936 # 8-byte Folded Reload - ld.d $s5, $sp, 944 # 8-byte Folded Reload - ld.d $s4, $sp, 952 # 8-byte Folded Reload - ld.d $s3, $sp, 960 # 8-byte Folded Reload - ld.d $s2, $sp, 968 # 8-byte Folded Reload - ld.d $s1, $sp, 976 # 8-byte Folded Reload - ld.d $s0, $sp, 984 # 8-byte Folded Reload - ld.d $fp, $sp, 992 # 8-byte Folded Reload - ld.d $ra, $sp, 1000 # 8-byte Folded Reload - addi.d $sp, $sp, 1008 + fld.d $fs1, $sp, 920 # 8-byte Folded Reload + fld.d $fs0, $sp, 928 # 8-byte Folded Reload + ld.d $s8, $sp, 936 # 8-byte Folded Reload + ld.d $s7, $sp, 944 # 8-byte Folded Reload + ld.d $s6, $sp, 952 # 8-byte Folded Reload + ld.d $s5, $sp, 960 # 8-byte Folded Reload + ld.d $s4, $sp, 968 # 8-byte Folded Reload + ld.d $s3, $sp, 976 # 8-byte Folded Reload + ld.d $s2, $sp, 984 # 8-byte Folded Reload + ld.d $s1, $sp, 992 # 8-byte Folded Reload + ld.d $s0, $sp, 1000 # 8-byte Folded Reload + ld.d $fp, $sp, 1008 # 8-byte Folded Reload + ld.d $ra, $sp, 1016 # 8-byte Folded Reload + addi.d $sp, $sp, 1024 ret .Lfunc_end3: .size smooth, .Lfunc_end3-smooth @@ -4751,39 +4750,28 @@ interpolation_constant: # @interpolation_constant .Lfunc_end9: .size interpolation_constant, .Lfunc_end9-interpolation_constant # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function interpolation_linear -.LCPI10_0: - .dword 0xbfb8000000000000 # double -0.09375 -.LCPI10_1: - .dword 0x3fec200000000000 # double 0.87890625 -.LCPI10_2: - .dword 0x3fea5e0000000000 # double 0.823974609375 - .text - .globl interpolation_linear + .globl interpolation_linear # -- Begin function interpolation_linear .p2align 5 .type interpolation_linear,@function interpolation_linear: # @interpolation_linear # %bb.0: - addi.d $sp, $sp, -240 - st.d $ra, $sp, 232 # 8-byte Folded Spill - st.d $fp, $sp, 224 # 8-byte Folded Spill - st.d $s0, $sp, 216 # 8-byte Folded Spill - st.d $s1, $sp, 208 # 8-byte Folded Spill - st.d $s2, $sp, 200 # 8-byte Folded Spill - st.d $s3, $sp, 192 # 8-byte Folded Spill - st.d $s4, $sp, 184 # 8-byte Folded Spill - st.d $s5, $sp, 176 # 8-byte Folded Spill - st.d $s6, $sp, 168 # 8-byte Folded Spill - st.d $s7, $sp, 160 # 8-byte Folded Spill - st.d $s8, $sp, 152 # 8-byte Folded Spill - fst.d $fs0, $sp, 144 # 8-byte Folded Spill - fst.d $fs1, $sp, 136 # 8-byte Folded Spill - fst.d $fs2, $sp, 128 # 8-byte Folded Spill - fst.d $fs3, $sp, 120 # 8-byte Folded Spill - fst.d $fs4, $sp, 112 # 8-byte Folded Spill - fst.d $fs5, $sp, 104 # 8-byte Folded Spill - fst.d $fs6, $sp, 96 # 8-byte Folded Spill + addi.d $sp, $sp, -224 + st.d $ra, $sp, 216 # 8-byte Folded Spill + st.d $fp, $sp, 208 # 8-byte Folded Spill + st.d $s0, $sp, 200 # 8-byte Folded Spill + st.d $s1, $sp, 192 # 8-byte Folded Spill + st.d $s2, $sp, 184 # 8-byte Folded Spill + st.d $s3, $sp, 176 # 8-byte Folded Spill + st.d $s4, $sp, 168 # 8-byte Folded Spill + st.d $s5, $sp, 160 # 8-byte Folded Spill + st.d $s6, $sp, 152 # 8-byte Folded Spill + st.d $s7, $sp, 144 # 8-byte Folded Spill + st.d $s8, $sp, 136 # 8-byte Folded Spill + fst.d $fs0, $sp, 128 # 8-byte Folded Spill + fst.d $fs1, $sp, 120 # 8-byte Folded Spill + fst.d $fs2, $sp, 112 # 8-byte Folded Spill + fst.d $fs3, $sp, 104 # 8-byte Folded Spill + fst.d $fs4, $sp, 96 # 8-byte Folded Spill move $s2, $a3 move $s3, $a2 fmov.d $fs0, $fa0 @@ -4817,11 +4805,13 @@ interpolation_linear: # @interpolation_linear st.d $a0, $sp, 64 # 8-byte Folded Spill slli.d $a0, $s3, 3 st.d $a0, $sp, 48 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI10_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI10_0) slli.d $a0, $s2, 3 st.d $a0, $sp, 40 # 8-byte Folded Spill - vldi $vr1, -956 + vldi $vr0, -956 + ori $a1, $zero, 0 + lu32i.d $a1, -524288 + lu52i.d $a1, $a1, -1029 + movgr2fr.d $fa1, $a1 vldi $vr2, -914 b .LBB10_3 .p2align 4, , 16 @@ -4888,8 +4878,8 @@ interpolation_linear: # @interpolation_linear sltui $a2, $a2, 1 mul.d $s4, $a1, $t7 movgr2cf $fcc0, $a2 - fsel $fa3, $fa0, $fa1, $fcc0 - fsel $fa4, $fa1, $fa0, $fcc0 + fsel $fa3, $fa1, $fa0, $fcc0 + fsel $fa4, $fa0, $fa1, $fcc0 fmul.d $fa5, $fa3, $fa2 fmul.d $fa6, $fa5, $fa2 fmul.d $fa7, $fa4, $fa2 @@ -4908,8 +4898,8 @@ interpolation_linear: # @interpolation_linear sltui $a2, $a2, 1 add.d $s7, $a1, $s4 movgr2cf $fcc0, $a2 - fsel $ft9, $fa0, $fa1, $fcc0 - fsel $ft11, $fa1, $fa0, $fcc0 + fsel $ft9, $fa1, $fa0, $fcc0 + fsel $ft11, $fa0, $fa1, $fcc0 fmul.d $ft1, $fa3, $ft9 fmul.d $ft2, $ft1, $fa2 fmul.d $ft3, $fa3, $ft11 @@ -4934,89 +4924,93 @@ interpolation_linear: # @interpolation_linear sltui $a3, $a3, 1 add.w $s1, $s7, $a2 movgr2cf $fcc0, $a3 - fsel $ft14, $fa0, $fa1, $fcc0 - fsel $ft13, $fa1, $fa0, $fcc0 - slli.d $ra, $a1, 3 - fldx.d $ft15, $t8, $ra + fsel $ft14, $fa1, $fa0, $fcc0 + fsel $ft13, $fa0, $fa1, $fcc0 addi.w $fp, $s1, -1 sub.w $a3, $fp, $t6 - sub.w $a1, $a3, $t7 - slli.d $a1, $a1, 3 - fldx.d $fs1, $s2, $a1 + sub.w $a2, $a3, $t7 + slli.d $a2, $a2, 3 + fldx.d $ft15, $s2, $a2 + slli.d $ra, $a1, 3 + fldx.d $fs1, $t8, $ra + fmul.d $fs2, $ft1, $ft14 + fmul.d $ft15, $fs2, $ft15 sub.w $a5, $s1, $t6 sub.w $a1, $a5, $t7 slli.d $a1, $a1, 3 fldx.d $fs2, $s2, $a1 - fmul.d $fs3, $ft1, $ft14 - fmul.d $fs1, $fs3, $fs1 - fmadd.d $ft15, $fs0, $ft15, $fs1 + addi.w $s0, $s1, 1 + sub.w $a6, $s0, $t6 + sub.w $a1, $a6, $t7 + slli.d $a1, $a1, 3 + fldx.d $fs3, $s2, $a1 + fmadd.d $ft15, $fs0, $fs1, $ft15 fmadd.d $ft15, $ft2, $fs2, $ft15 fmul.d $fs1, $ft1, $ft13 - addi.w $s0, $s1, 1 - sub.w $a1, $s0, $t6 - sub.w $a2, $a1, $t7 - slli.d $a2, $a2, 3 - fldx.d $fs2, $s2, $a2 - sub.w $a2, $fp, $t7 - slli.d $a2, $a2, 3 - fldx.d $fs3, $s2, $a2 - sub.w $a2, $s1, $t7 - slli.d $a2, $a2, 3 - fldx.d $fs4, $s2, $a2 - fmadd.d $ft15, $fs1, $fs2, $ft15 - fmul.d $fs1, $fa5, $ft14 fmadd.d $ft15, $fs1, $fs3, $ft15 - fmadd.d $ft15, $fa6, $fs4, $ft15 + fmul.d $fs1, $fa5, $ft14 + sub.w $a1, $fp, $t7 + slli.d $a1, $a1, 3 + fldx.d $fs2, $s2, $a1 + sub.w $a1, $s1, $t7 + slli.d $a1, $a1, 3 + fldx.d $fs3, $s2, $a1 + sub.w $a1, $s0, $t7 + slli.d $a1, $a1, 3 + fldx.d $fs4, $s2, $a1 + fmadd.d $ft15, $fs1, $fs2, $ft15 + fmadd.d $ft15, $fa6, $fs3, $ft15 fmul.d $fs1, $fa5, $ft13 - sub.w $a2, $s0, $t7 + fmadd.d $ft15, $fs1, $fs4, $ft15 + fmul.d $fs1, $ft3, $ft14 + add.w $a1, $fp, $t6 + sub.w $a2, $a1, $t7 slli.d $a2, $a2, 3 fldx.d $fs2, $s2, $a2 - add.w $a6, $fp, $t6 - sub.w $a2, $a6, $t7 - slli.d $a2, $a2, 3 - fldx.d $fs3, $s2, $a2 add.w $a2, $s1, $t6 sub.w $a4, $a2, $t7 slli.d $a4, $a4, 3 - fldx.d $fs4, $s2, $a4 - fmadd.d $ft15, $fs1, $fs2, $ft15 - fmul.d $fs1, $ft3, $ft14 - fmadd.d $ft15, $fs1, $fs3, $ft15 - fmadd.d $ft15, $ft4, $fs4, $ft15 + fldx.d $fs3, $s2, $a4 add.w $a4, $s0, $t6 sub.w $a0, $a4, $t7 slli.d $a0, $a0, 3 - fldx.d $fs1, $s2, $a0 + fldx.d $fs4, $s2, $a0 + fmadd.d $ft15, $fs1, $fs2, $ft15 + fmadd.d $ft15, $ft4, $fs3, $ft15 + fmul.d $fs1, $ft3, $ft13 + fmadd.d $ft15, $fs1, $fs4, $ft15 + fmul.d $fs1, $ft5, $ft14 slli.d $a0, $a3, 3 fldx.d $fs2, $s2, $a0 - fmul.d $fs3, $ft3, $ft13 - fmadd.d $ft15, $fs3, $fs1, $ft15 - fmul.d $fs1, $ft5, $ft14 - fmadd.d $ft15, $fs1, $fs2, $ft15 slli.d $a0, $a5, 3 - fldx.d $fs1, $s2, $a0 - slli.d $a0, $a1, 3 - fldx.d $fs2, $s2, $a0 - pcalau12i $a0, %pc_hi20(.LCPI10_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI10_1) - fmadd.d $ft15, $ft6, $fs1, $ft15 - fmul.d $fs1, $ft5, $ft13 + fldx.d $fs3, $s2, $a0 + slli.d $a0, $a6, 3 + fldx.d $fs4, $s2, $a0 fmadd.d $ft15, $fs1, $fs2, $ft15 - fmul.d $fs1, $ft14, $fs3 + fmadd.d $ft15, $ft6, $fs3, $ft15 + fmul.d $fs1, $ft5, $ft13 + fmadd.d $ft15, $fs1, $fs4, $ft15 slli.d $a0, $fp, 3 - fldx.d $fs2, $s2, $a0 + fldx.d $fs1, $s2, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, -253952 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs2, $a0 + fmul.d $fs3, $ft14, $fs2 + fmadd.d $ft15, $fs3, $fs1, $ft15 slli.d $a0, $s1, 3 - fldx.d $fs4, $s2, $a0 - pcalau12i $a0, %pc_hi20(.LCPI10_2) - fld.d $fs5, $a0, %pc_lo12(.LCPI10_2) + fldx.d $fs1, $s2, $a0 slli.d $a0, $s0, 3 - fldx.d $fs6, $s2, $a0 - fmadd.d $ft15, $fs1, $fs2, $ft15 - fmadd.d $ft15, $fs4, $fs5, $ft15 - fmul.d $fs1, $ft13, $fs3 - fmadd.d $ft15, $fs1, $fs6, $ft15 + fldx.d $fs3, $s2, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, -369152 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs4, $a0 + fmadd.d $ft15, $fs1, $fs4, $ft15 + fmul.d $fs1, $ft13, $fs2 + fmadd.d $ft15, $fs1, $fs3, $ft15 fmul.d $fs1, $ft7, $ft14 - slli.d $a0, $a6, 3 + slli.d $a0, $a1, 3 fldx.d $fs2, $s2, $a0 slli.d $a0, $a2, 3 fldx.d $fs3, $s2, $a0 @@ -5033,7 +5027,7 @@ interpolation_linear: # @interpolation_linear add.w $a0, $a5, $t7 slli.d $a0, $a0, 3 fldx.d $fs3, $s2, $a0 - add.w $a0, $a1, $t7 + add.w $a0, $a6, $t7 slli.d $a0, $a0, 3 fldx.d $fs4, $s2, $a0 fmadd.d $ft15, $fs1, $fs2, $ft15 @@ -5055,7 +5049,7 @@ interpolation_linear: # @interpolation_linear fmul.d $fs1, $fa7, $ft13 fmadd.d $ft15, $fs1, $fs4, $ft15 fmul.d $ft14, $ft11, $ft14 - add.w $a0, $a6, $t7 + add.w $a0, $a1, $t7 slli.d $a0, $a0, 3 fldx.d $fs1, $s2, $a0 add.w $a0, $a2, $t7 @@ -5094,25 +5088,23 @@ interpolation_linear: # @interpolation_linear sub.d $a0, $a0, $a3 add.d $a0, $a0, $a2 st.d $a0, $a1, 320 - fld.d $fs6, $sp, 96 # 8-byte Folded Reload - fld.d $fs5, $sp, 104 # 8-byte Folded Reload - fld.d $fs4, $sp, 112 # 8-byte Folded Reload - fld.d $fs3, $sp, 120 # 8-byte Folded Reload - fld.d $fs2, $sp, 128 # 8-byte Folded Reload - fld.d $fs1, $sp, 136 # 8-byte Folded Reload - fld.d $fs0, $sp, 144 # 8-byte Folded Reload - ld.d $s8, $sp, 152 # 8-byte Folded Reload - ld.d $s7, $sp, 160 # 8-byte Folded Reload - ld.d $s6, $sp, 168 # 8-byte Folded Reload - ld.d $s5, $sp, 176 # 8-byte Folded Reload - ld.d $s4, $sp, 184 # 8-byte Folded Reload - ld.d $s3, $sp, 192 # 8-byte Folded Reload - ld.d $s2, $sp, 200 # 8-byte Folded Reload - ld.d $s1, $sp, 208 # 8-byte Folded Reload - ld.d $s0, $sp, 216 # 8-byte Folded Reload - ld.d $fp, $sp, 224 # 8-byte Folded Reload - ld.d $ra, $sp, 232 # 8-byte Folded Reload - addi.d $sp, $sp, 240 + fld.d $fs4, $sp, 96 # 8-byte Folded Reload + fld.d $fs3, $sp, 104 # 8-byte Folded Reload + fld.d $fs2, $sp, 112 # 8-byte Folded Reload + fld.d $fs1, $sp, 120 # 8-byte Folded Reload + fld.d $fs0, $sp, 128 # 8-byte Folded Reload + ld.d $s8, $sp, 136 # 8-byte Folded Reload + ld.d $s7, $sp, 144 # 8-byte Folded Reload + ld.d $s6, $sp, 152 # 8-byte Folded Reload + ld.d $s5, $sp, 160 # 8-byte Folded Reload + ld.d $s4, $sp, 168 # 8-byte Folded Reload + ld.d $s3, $sp, 176 # 8-byte Folded Reload + ld.d $s2, $sp, 184 # 8-byte Folded Reload + ld.d $s1, $sp, 192 # 8-byte Folded Reload + ld.d $s0, $sp, 200 # 8-byte Folded Reload + ld.d $fp, $sp, 208 # 8-byte Folded Reload + ld.d $ra, $sp, 216 # 8-byte Folded Reload + addi.d $sp, $sp, 224 ret .Lfunc_end10: .size interpolation_linear, .Lfunc_end10-interpolation_linear @@ -7457,18 +7449,7 @@ matmul_grids: # @matmul_grids .Lfunc_end21: .size matmul_grids, .Lfunc_end21-matmul_grids # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function initialize_problem -.LCPI22_0: - .dword 0x4046800000000000 # double 45 -.LCPI22_1: - .dword 0x401921fb54442d18 # double 6.2831853071795862 -.LCPI22_2: - .dword 0xc044000000000000 # double -40 -.LCPI22_3: - .dword 0x4043bd3cc9be45de # double 39.478417604357432 - .text - .globl initialize_problem + .globl initialize_problem # -- Begin function initialize_problem .p2align 5 .type initialize_problem,@function initialize_problem: # @initialize_problem @@ -7504,22 +7485,32 @@ initialize_problem: # @initialize_problem ori $a0, $zero, 216 ld.d $a1, $sp, 8 # 8-byte Folded Reload mul.d $s2, $a1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI22_0) - fld.d $fs3, $a0, %pc_lo12(.LCPI22_0) - pcalau12i $a0, %pc_hi20(.LCPI22_1) - fld.d $fs4, $a0, %pc_lo12(.LCPI22_1) - pcalau12i $a0, %pc_hi20(.LCPI22_2) - fld.d $fs5, $a0, %pc_lo12(.LCPI22_2) - pcalau12i $a0, %pc_hi20(.LCPI22_3) - fld.d $fs6, $a0, %pc_lo12(.LCPI22_3) + ori $a0, $zero, 0 + lu32i.d $a0, 425984 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fs4, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, 262144 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fs5, $a0 + lu12i.w $a0, -222236 + ori $a0, $a0, 1502 + lu32i.d $a0, 245052 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs6, $a0 lu52i.d $s3, $zero, 1023 fst.d $fs0, $sp, 160 # 8-byte Folded Spill fst.d $fa2, $sp, 64 # 8-byte Folded Spill fst.d $fa0, $sp, 56 # 8-byte Folded Spill - fst.d $fs4, $sp, 48 # 8-byte Folded Spill fst.d $fs6, $sp, 208 # 8-byte Folded Spill - fst.d $fs3, $sp, 40 # 8-byte Folded Spill fst.d $fs5, $sp, 200 # 8-byte Folded Spill + fst.d $fs4, $sp, 48 # 8-byte Folded Spill + fst.d $fs3, $sp, 40 # 8-byte Folded Spill b .LBB22_3 .p2align 4, , 16 .LBB22_2: # %._crit_edge322 diff --git a/results/MultiSource/Benchmarks/FreeBench/analyzer/CMakeFiles/analyzer.dir/functs.s b/results/MultiSource/Benchmarks/FreeBench/analyzer/CMakeFiles/analyzer.dir/functs.s index 1f952eb7..fa3b7618 100644 --- a/results/MultiSource/Benchmarks/FreeBench/analyzer/CMakeFiles/analyzer.dir/functs.s +++ b/results/MultiSource/Benchmarks/FreeBench/analyzer/CMakeFiles/analyzer.dir/functs.s @@ -1,12 +1,6 @@ .file "functs.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function speedup_test -.LCPI0_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI0_1: - .dword 0x4059000000000000 # double 100 .text - .globl speedup_test + .globl speedup_test # -- Begin function speedup_test .p2align 5 .type speedup_test,@function speedup_test: # @speedup_test @@ -76,28 +70,31 @@ speedup_test: # @speedup_test ld.d $a1, $sp, 24 sub.d $a0, $a0, $a1 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI0_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa1, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a1, 275200 ld.d $a3, $sp, 8 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa2, $a0 - fadd.d $fa1, $fa2, $fa1 + fadd.d $fa0, $fa2, $fa0 srli.d $a0, $a3, 32 or $a0, $a0, $a2 movgr2fr.d $fa2, $a0 - fsub.d $fa0, $fa2, $fa0 + fsub.d $fa1, $fa2, $fa1 bstrins.d $a3, $a1, 63, 32 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) - movgr2fr.d $fa3, $a3 - fadd.d $fa0, $fa3, $fa0 - fdiv.d $fa0, $fa1, $fa0 - fmul.d $fa0, $fa0, $fa2 + movgr2fr.d $fa2, $a3 + fadd.d $fa1, $fa2, $fa1 + fdiv.d $fa0, $fa0, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.5) addi.d $a0, $a0, %pc_lo12(.L.str.5) @@ -212,12 +209,7 @@ find_hard_raws: # @find_hard_raws .Lfunc_end2: .size find_hard_raws, .Lfunc_end2-find_hard_raws # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function specul_time_o -.LCPI3_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .globl specul_time_o + .globl specul_time_o # -- Begin function specul_time_o .p2align 5 .type specul_time_o,@function specul_time_o: # @specul_time_o @@ -438,7 +430,7 @@ specul_time_o: # @specul_time_o ori $a0, $zero, 1 pcalau12i $s4, %pc_hi20(loop_time) lu52i.d $s3, $zero, 1107 - pcalau12i $s5, %pc_hi20(.LCPI3_0) + lu12i.w $s5, 256 lu12i.w $s2, 275200 beq $s6, $a0, .LBB3_33 # %bb.31: @@ -450,23 +442,24 @@ specul_time_o: # @specul_time_o b .LBB3_39 .LBB3_33: ld.d $a0, $s4, %pc_lo12(loop_time) - fld.d $fa0, $s5, %pc_lo12(.LCPI3_0) srli.d $a1, $a0, 32 or $a1, $a1, $s3 + movgr2fr.d $fa0, $a1 + lu52i.d $a1, $s5, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa1, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 bstrins.d $a0, $s2, 63, 32 movgr2fr.d $fa2, $a0 - fadd.d $fa1, $fa2, $fa1 + fadd.d $fa0, $fa2, $fa0 srli.d $a0, $s1, 32 or $a0, $a0, $s3 movgr2fr.d $fa2, $a0 - fsub.d $fa0, $fa2, $fa0 + fsub.d $fa1, $fa2, $fa1 move $a0, $s1 bstrins.d $a0, $s2, 63, 32 movgr2fr.d $fa2, $a0 - fadd.d $fa0, $fa2, $fa0 - fdiv.d $fa0, $fa1, $fa0 + fadd.d $fa1, $fa2, $fa1 + fdiv.d $fa0, $fa0, $fa1 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.8) addi.d $a0, $a0, %pc_lo12(.L.str.8) @@ -481,7 +474,8 @@ specul_time_o: # @specul_time_o srli.d $a1, $a0, 32 or $a1, $a1, $s3 movgr2fr.d $fa0, $a1 - fld.d $fa1, $s5, %pc_lo12(.LCPI3_0) + lu52i.d $a1, $s5, 1107 + movgr2fr.d $fa1, $a1 add.d $a1, $a0, $s1 bstrins.d $a0, $s2, 63, 32 ld.d $a2, $s4, %pc_lo12(loop_time) @@ -508,23 +502,24 @@ specul_time_o: # @specul_time_o bne $s6, $a0, .LBB3_37 # %bb.36: ld.d $a0, $s4, %pc_lo12(loop_time) - fld.d $fa0, $s5, %pc_lo12(.LCPI3_0) srli.d $a1, $a0, 32 or $a1, $a1, $s3 + movgr2fr.d $fa0, $a1 + lu52i.d $a1, $s5, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa1, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 bstrins.d $a0, $s2, 63, 32 movgr2fr.d $fa2, $a0 - fadd.d $fa1, $fa2, $fa1 + fadd.d $fa0, $fa2, $fa0 srli.d $a0, $s1, 32 or $a0, $a0, $s3 movgr2fr.d $fa2, $a0 - fsub.d $fa0, $fa2, $fa0 + fsub.d $fa1, $fa2, $fa1 move $a0, $s1 bstrins.d $a0, $s2, 63, 32 movgr2fr.d $fa2, $a0 - fadd.d $fa0, $fa2, $fa0 - fdiv.d $fa0, $fa1, $fa0 + fadd.d $fa1, $fa2, $fa1 + fdiv.d $fa0, $fa0, $fa1 movfr2gr.d $a2, $fa0 pcalau12i $a0, %pc_hi20(.L.str.10) addi.d $a1, $a0, %pc_lo12(.L.str.10) @@ -537,25 +532,26 @@ specul_time_o: # @specul_time_o # %bb.38: pcalau12i $a0, %pc_hi20(prog_time) ld.d $a0, $a0, %pc_lo12(prog_time) - fld.d $fa0, $s5, %pc_lo12(.LCPI3_0) srli.d $a1, $a0, 32 or $a1, $a1, $s3 + movgr2fr.d $fa0, $a1 + lu52i.d $a1, $s5, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa1, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 add.d $a1, $a0, $s1 ld.d $a2, $s4, %pc_lo12(loop_time) bstrins.d $a0, $s2, 63, 32 movgr2fr.d $fa2, $a0 - fadd.d $fa1, $fa2, $fa1 + fadd.d $fa0, $fa2, $fa0 sub.d $a0, $a1, $a2 srli.d $a1, $a0, 32 or $a1, $a1, $s3 movgr2fr.d $fa2, $a1 - fsub.d $fa0, $fa2, $fa0 + fsub.d $fa1, $fa2, $fa1 bstrins.d $a0, $s2, 63, 32 movgr2fr.d $fa2, $a0 - fadd.d $fa0, $fa2, $fa0 - fdiv.d $fa0, $fa1, $fa0 + fadd.d $fa1, $fa2, $fa1 + fdiv.d $fa0, $fa0, $fa1 movfr2gr.d $a2, $fa0 pcalau12i $a0, %pc_hi20(.L.str.11) addi.d $a1, $a0, %pc_lo12(.L.str.11) @@ -587,12 +583,7 @@ specul_time_o: # @specul_time_o .Lfunc_end3: .size specul_time_o, .Lfunc_end3-specul_time_o # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function specul_time_r -.LCPI4_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .globl specul_time_r + .globl specul_time_r # -- Begin function specul_time_r .p2align 5 .type specul_time_r,@function specul_time_r: # @specul_time_r @@ -1184,7 +1175,7 @@ specul_time_r: # @specul_time_r ori $a0, $zero, 1 pcalau12i $s5, %pc_hi20(loop_time) lu52i.d $s4, $zero, 1107 - pcalau12i $s6, %pc_hi20(.LCPI4_0) + lu12i.w $s6, 256 lu12i.w $s3, 275200 beq $s7, $a0, .LBB4_82 # %bb.80: @@ -1197,23 +1188,24 @@ specul_time_r: # @specul_time_r b .LBB4_88 .LBB4_82: ld.d $a0, $s5, %pc_lo12(loop_time) - fld.d $fa0, $s6, %pc_lo12(.LCPI4_0) srli.d $a1, $a0, 32 or $a1, $a1, $s4 + movgr2fr.d $fa0, $a1 + lu52i.d $a1, $s6, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa1, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 bstrins.d $a0, $s3, 63, 32 movgr2fr.d $fa2, $a0 - fadd.d $fa1, $fa2, $fa1 + fadd.d $fa0, $fa2, $fa0 srli.d $a0, $s2, 32 or $a0, $a0, $s4 movgr2fr.d $fa2, $a0 - fsub.d $fa0, $fa2, $fa0 + fsub.d $fa1, $fa2, $fa1 move $a0, $s2 bstrins.d $a0, $s3, 63, 32 movgr2fr.d $fa2, $a0 - fadd.d $fa0, $fa2, $fa0 - fdiv.d $fa0, $fa1, $fa0 + fadd.d $fa1, $fa2, $fa1 + fdiv.d $fa0, $fa0, $fa1 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.8) addi.d $a0, $a0, %pc_lo12(.L.str.8) @@ -1228,7 +1220,8 @@ specul_time_r: # @specul_time_r srli.d $a1, $a0, 32 or $a1, $a1, $s4 movgr2fr.d $fa0, $a1 - fld.d $fa1, $s6, %pc_lo12(.LCPI4_0) + lu52i.d $a1, $s6, 1107 + movgr2fr.d $fa1, $a1 add.d $a1, $a0, $s2 bstrins.d $a0, $s3, 63, 32 ld.d $a2, $s5, %pc_lo12(loop_time) @@ -1256,23 +1249,24 @@ specul_time_r: # @specul_time_r bne $s7, $a0, .LBB4_86 # %bb.85: ld.d $a0, $s5, %pc_lo12(loop_time) - fld.d $fa0, $s6, %pc_lo12(.LCPI4_0) srli.d $a1, $a0, 32 or $a1, $a1, $s4 + movgr2fr.d $fa0, $a1 + lu52i.d $a1, $s6, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa1, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 bstrins.d $a0, $s3, 63, 32 movgr2fr.d $fa2, $a0 - fadd.d $fa1, $fa2, $fa1 + fadd.d $fa0, $fa2, $fa0 srli.d $a0, $s2, 32 or $a0, $a0, $s4 movgr2fr.d $fa2, $a0 - fsub.d $fa0, $fa2, $fa0 + fsub.d $fa1, $fa2, $fa1 move $a0, $s2 bstrins.d $a0, $s3, 63, 32 movgr2fr.d $fa2, $a0 - fadd.d $fa0, $fa2, $fa0 - fdiv.d $fa0, $fa1, $fa0 + fadd.d $fa1, $fa2, $fa1 + fdiv.d $fa0, $fa0, $fa1 movfr2gr.d $a3, $fa0 pcalau12i $a0, %pc_hi20(.L.str.14) addi.d $a1, $a0, %pc_lo12(.L.str.14) @@ -1286,25 +1280,26 @@ specul_time_r: # @specul_time_r # %bb.87: pcalau12i $a0, %pc_hi20(prog_time) ld.d $a0, $a0, %pc_lo12(prog_time) - fld.d $fa0, $s6, %pc_lo12(.LCPI4_0) srli.d $a1, $a0, 32 or $a1, $a1, $s4 + movgr2fr.d $fa0, $a1 + lu52i.d $a1, $s6, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa1, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 add.d $a1, $a0, $s2 ld.d $a2, $s5, %pc_lo12(loop_time) bstrins.d $a0, $s3, 63, 32 movgr2fr.d $fa2, $a0 - fadd.d $fa1, $fa2, $fa1 + fadd.d $fa0, $fa2, $fa0 sub.d $a0, $a1, $a2 srli.d $a1, $a0, 32 or $a1, $a1, $s4 movgr2fr.d $fa2, $a1 - fsub.d $fa0, $fa2, $fa0 + fsub.d $fa1, $fa2, $fa1 bstrins.d $a0, $s3, 63, 32 movgr2fr.d $fa2, $a0 - fadd.d $fa0, $fa2, $fa0 - fdiv.d $fa0, $fa1, $fa0 + fadd.d $fa1, $fa2, $fa1 + fdiv.d $fa0, $fa0, $fa1 movfr2gr.d $a3, $fa0 pcalau12i $a0, %pc_hi20(.L.str.15) addi.d $a1, $a0, %pc_lo12(.L.str.15) diff --git a/results/MultiSource/Benchmarks/FreeBench/distray/CMakeFiles/distray.dir/distray.s b/results/MultiSource/Benchmarks/FreeBench/distray/CMakeFiles/distray.dir/distray.s index a089004d..fe5bfcca 100644 --- a/results/MultiSource/Benchmarks/FreeBench/distray/CMakeFiles/distray.dir/distray.s +++ b/results/MultiSource/Benchmarks/FreeBench/distray/CMakeFiles/distray.dir/distray.s @@ -1,22 +1,6 @@ .file "distray.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x406fe00000000000 # double 255 -.LCPI0_1: - .dword 0x407e000000000000 # double 480 -.LCPI0_2: - .dword 0x4084000000000000 # double 640 -.LCPI0_3: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI0_4: - .dword 0x3f4999999999999a # double 7.8125000000000004E-4 -.LCPI0_5: - .dword 0x3f51111111111111 # double 0.0010416666666666667 -.LCPI0_6: - .dword 0x41cfffffff800000 # double 1073741823 .section .text.unlikely.,"ax",@progbits - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -117,46 +101,62 @@ main: # @main addi.d $s7, $a0, %pc_lo12(memory) blez $s2, .LBB0_11 # %bb.2: # %.split45.i.preheader - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_1) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -131072 + lu52i.d $a1, $a1, 1031 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_2) + ori $a1, $zero, 0 + lu32i.d $a1, 262144 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 80 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(Cameraright) - addi.d $a0, $a0, %pc_lo12(Cameraright) - st.d $a0, $sp, 72 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(Cameradir) - addi.d $a0, $a0, %pc_lo12(Cameradir) - st.d $a0, $sp, 64 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(Cameraup) - addi.d $s8, $a0, %pc_lo12(Cameraup) + pcalau12i $a1, %pc_hi20(Cameraright) + addi.d $a1, $a1, %pc_lo12(Cameraright) + st.d $a1, $sp, 72 # 8-byte Folded Spill + pcalau12i $a1, %pc_hi20(Cameradir) + addi.d $a1, $a1, %pc_lo12(Cameradir) + st.d $a1, $sp, 64 # 8-byte Folded Spill + pcalau12i $a1, %pc_hi20(Cameraup) + addi.d $s8, $a1, %pc_lo12(Cameraup) vrepli.b $vr0, 0 vst $vr0, $sp, 112 # 16-byte Folded Spill movgr2fr.d $ft0, $zero - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_3) + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 56 # 8-byte Folded Spill vldi $vr10, -912 - pcalau12i $a0, %pc_hi20(.LCPI0_4) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_4) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1012 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 48 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_5) + lu12i.w $a1, 69905 + ori $a1, $a1, 273 + lu32i.d $a1, 69905 + lu52i.d $a1, $a1, 1013 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 40 # 8-byte Folded Spill pcalau12i $s5, %pc_hi20(rnd) - lu12i.w $a0, 269412 - pcalau12i $a1, %pc_hi20(.LCPI0_6) - fld.d $ft3, $a1, %pc_lo12(.LCPI0_6) - ori $s4, $a0, 3693 - lu12i.w $a0, 3 - ori $s1, $a0, 57 - pcalau12i $a0, %pc_hi20(Camerapos) - addi.d $s0, $a0, %pc_lo12(Camerapos) - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) - fst.d $fa0, $sp, 96 # 8-byte Folded Spill + lu12i.w $a1, 269412 + ori $s4, $a1, 3693 + lu12i.w $a1, 3 + ori $s1, $a1, 57 + lu12i.w $a1, -2048 + lu52i.d $a1, $a1, 1052 + movgr2fr.d $ft3, $a1 + pcalau12i $a1, %pc_hi20(Camerapos) + addi.d $s0, $a1, %pc_lo12(Camerapos) move $a1, $zero + lu32i.d $a0, -8192 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa0, $a0 + fst.d $fa0, $sp, 96 # 8-byte Folded Spill fst.d $ft0, $sp, 144 # 8-byte Folded Spill fst.d $ft3, $sp, 136 # 8-byte Folded Spill b .LBB0_5 @@ -372,11 +372,13 @@ main: # @main .LBB0_11: # %.split45.us.i movgr2fr.w $fa0, $s2 ffint.d.w $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) frecip.d $fa0, $fa0 - movgr2fr.d $fa2, $zero - fmul.d $fa0, $fa0, $fa2 + movgr2fr.d $fa1, $zero + fmul.d $fa0, $fa0, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -8192 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 ftintrz.l.d $fa0, $fa0 movfr2gr.d $a1, $fa0 @@ -445,16 +447,8 @@ main: # @main .Lfunc_end0: .size main, .Lfunc_end0-main # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function TraceLine -.LCPI1_0: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI1_1: - .dword 0x3fe45f306c8462a6 # double 0.63661977000000003 -.LCPI1_2: - .dword 0x41cfffffff800000 # double 1073741823 .text - .p2align 5 + .p2align 5 # -- Begin function TraceLine .type TraceLine,@function TraceLine: # @TraceLine # %bb.0: @@ -491,8 +485,11 @@ TraceLine: # @TraceLine addi.d $a4, $sp, 160 pcaddu18i $ra, %call36(IntersectObjs) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fs4, $a0, %pc_lo12(.LCPI1_0) + lu12i.w $a0, -487882 + ori $a0, $a0, 2289 + lu32i.d $a0, 325813 + lu52i.d $a0, $a0, 1006 + movgr2fr.d $fs4, $a0 fcmp.cule.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB1_7 # %bb.2: @@ -576,9 +573,10 @@ TraceLine: # @TraceLine fmul.d $fs0, $fa0, $fa1 pcalau12i $s5, %pc_hi20(rnd) lu12i.w $a0, 269412 - pcalau12i $a1, %pc_hi20(.LCPI1_2) - fld.d $fs7, $a1, %pc_lo12(.LCPI1_2) ori $s6, $a0, 3693 + lu12i.w $a0, -2048 + lu52i.d $a0, $a0, 1052 + movgr2fr.d $fs7, $a0 lu12i.w $a0, 3 ori $s7, $a0, 57 .p2align 4, , 16 @@ -661,8 +659,11 @@ TraceLine: # @TraceLine fdiv.d $fa0, $fa1, $fa0 pcaddu18i $ra, %call36(atan) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_1) + lu12i.w $a0, 444486 + ori $a0, $a0, 678 + lu32i.d $a0, 286512 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fmul.d $fa2, $fa0, $fa1 b .LBB1_10 .LBB1_9: @@ -875,9 +876,10 @@ TraceLine: # @TraceLine vrepli.b $vr3, 0 pcalau12i $s4, %pc_hi20(rnd) lu12i.w $a0, 269412 - pcalau12i $a1, %pc_hi20(.LCPI1_2) - fld.d $fs1, $a1, %pc_lo12(.LCPI1_2) ori $s5, $a0, 3693 + lu12i.w $a0, -2048 + lu52i.d $a0, $a0, 1052 + movgr2fr.d $fs1, $a0 lu12i.w $a0, 3 ori $s6, $a0, 57 fmov.d $fs7, $fs3 @@ -1004,14 +1006,7 @@ TraceLine: # @TraceLine .Lfunc_end1: .size TraceLine, .Lfunc_end1-TraceLine # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function IntersectObjs -.LCPI2_0: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI2_1: - .dword 0x40f86a0000000000 # double 1.0E+5 - .text - .p2align 5 + .p2align 5 # -- Begin function IntersectObjs .type IntersectObjs,@function IntersectObjs: # @IntersectObjs # %bb.0: @@ -1032,9 +1027,12 @@ IntersectObjs: # @IntersectObjs fst.d $fs6, $sp, 80 # 8-byte Folded Spill fst.d $fs7, $sp, 72 # 8-byte Folded Spill fld.d $fs1, $a1, 16 - pcalau12i $a5, %pc_hi20(.LCPI2_0) - fld.d $fa5, $a5, %pc_lo12(.LCPI2_0) fabs.d $fa0, $fs1 + lu12i.w $a5, -487882 + ori $a5, $a5, 2289 + lu32i.d $a5, 325813 + lu52i.d $a5, $a5, 1006 + movgr2fr.d $fa5, $a5 fcmp.cule.d $fcc0, $fa0, $fa5 bcnez $fcc0, .LBB2_4 # %bb.1: @@ -1046,8 +1044,10 @@ IntersectObjs: # @IntersectObjs fcmp.cule.d $fcc0, $ft2, $fa5 bcnez $fcc0, .LBB2_4 # %bb.2: - pcalau12i $a5, %pc_hi20(.LCPI2_1) - fld.d $fa1, $a5, %pc_lo12(.LCPI2_1) + ori $a5, $zero, 0 + lu32i.d $a5, -497152 + lu52i.d $a6, $a5, 1039 + movgr2fr.d $fa1, $a6 fcmp.cule.d $fcc0, $fa1, $ft2 bcnez $fcc0, .LBB2_4 # %bb.3: @@ -1063,11 +1063,9 @@ IntersectObjs: # @IntersectObjs fst.d $fa0, $a2, 16 vrepli.b $vr0, 0 vst $vr0, $a3, 0 - lu52i.d $a5, $zero, 1023 - st.d $a5, $a3, 16 + lu52i.d $a6, $zero, 1023 + st.d $a6, $a3, 16 vld $vr0, $a2, 0 - ori $a5, $zero, 0 - lu32i.d $a5, -497152 lu52i.d $a5, $a5, 1038 vreplgr2vr.d $vr1, $a5 vfadd.d $vr0, $vr0, $vr1 diff --git a/results/MultiSource/Benchmarks/FreeBench/fourinarow/CMakeFiles/fourinarow.dir/fourinarow.s b/results/MultiSource/Benchmarks/FreeBench/fourinarow/CMakeFiles/fourinarow.dir/fourinarow.s index 39a41297..06c9f073 100644 --- a/results/MultiSource/Benchmarks/FreeBench/fourinarow/CMakeFiles/fourinarow.dir/fourinarow.s +++ b/results/MultiSource/Benchmarks/FreeBench/fourinarow/CMakeFiles/fourinarow.dir/fourinarow.s @@ -1547,12 +1547,7 @@ find_winner_c: # @find_winner_c .Lfunc_end6: .size find_winner_c, .Lfunc_end6-find_winner_c # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function value -.LCPI7_0: - .word 0x447a0000 # float 1000 - .text - .globl value + .globl value # -- Begin function value .p2align 5 .type value,@function value: # @value @@ -1597,452 +1592,454 @@ value: # @value ld.w $a1, $a1, %pc_lo12(off) ld.d $t5, $a0, %pc_lo12(C2UP_R) pcalau12i $a0, %pc_hi20(C2UP_L) - ld.d $a0, $a0, %pc_lo12(C2UP_L) + ld.d $t6, $a0, %pc_lo12(C2UP_L) movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 vldi $vr1, -1244 fdiv.s $fa0, $fa0, $fa1 - slli.d $a1, $a0, 12 - st.d $a1, $sp, 1728 # 8-byte Folded Spill - slli.d $a1, $a0, 15 - st.d $a1, $sp, 1720 # 8-byte Folded Spill - slli.d $a1, $a3, 1 - st.d $a1, $sp, 1712 # 8-byte Folded Spill - slli.d $a1, $a3, 2 - st.d $a1, $sp, 1704 # 8-byte Folded Spill - slli.d $a1, $a3, 3 - st.d $a1, $sp, 1696 # 8-byte Folded Spill - slli.d $a1, $a3, 4 - st.d $a1, $sp, 1688 # 8-byte Folded Spill - slli.d $a1, $a3, 5 - st.d $a1, $sp, 1680 # 8-byte Folded Spill - slli.d $a1, $a3, 6 - st.d $a1, $sp, 1672 # 8-byte Folded Spill - slli.d $a1, $a3, 7 - st.d $a1, $sp, 1664 # 8-byte Folded Spill - slli.d $a1, $a3, 8 - st.d $a1, $sp, 1656 # 8-byte Folded Spill - slli.d $a1, $a3, 9 - st.d $a1, $sp, 1648 # 8-byte Folded Spill - slli.d $a1, $a3, 10 - st.d $a1, $sp, 1640 # 8-byte Folded Spill - slli.d $a1, $a3, 11 - st.d $a1, $sp, 1632 # 8-byte Folded Spill - vldi $vr1, -1040 - vldi $vr2, -1168 - vldi $vr3, -1228 - vldi $vr4, -1260 - slli.d $a1, $a3, 12 - st.d $a1, $sp, 1624 # 8-byte Folded Spill - slli.d $a1, $a3, 13 - st.d $a1, $sp, 1616 # 8-byte Folded Spill - slli.d $a1, $a3, 14 - st.d $a1, $sp, 1608 # 8-byte Folded Spill - slli.d $a1, $a3, 15 - st.d $a1, $sp, 1600 # 8-byte Folded Spill - slli.d $a1, $a3, 16 - st.d $a1, $sp, 1584 # 8-byte Folded Spill - slli.d $a1, $a3, 17 - st.d $a1, $sp, 1568 # 8-byte Folded Spill - slli.d $a1, $a3, 18 - st.d $a1, $sp, 1560 # 8-byte Folded Spill - slli.d $a1, $a3, 19 - st.d $a1, $sp, 1552 # 8-byte Folded Spill + slli.d $a0, $t6, 12 + st.d $a0, $sp, 1728 # 8-byte Folded Spill + slli.d $a0, $t6, 15 + st.d $a0, $sp, 1720 # 8-byte Folded Spill + slli.d $a0, $a3, 1 + st.d $a0, $sp, 1712 # 8-byte Folded Spill + slli.d $a0, $a3, 2 + st.d $a0, $sp, 1704 # 8-byte Folded Spill + slli.d $a0, $a3, 3 + st.d $a0, $sp, 1696 # 8-byte Folded Spill + slli.d $a0, $a3, 4 + st.d $a0, $sp, 1688 # 8-byte Folded Spill + slli.d $a0, $a3, 5 + st.d $a0, $sp, 1680 # 8-byte Folded Spill + slli.d $a0, $a3, 6 + st.d $a0, $sp, 1672 # 8-byte Folded Spill + slli.d $a0, $a3, 7 + st.d $a0, $sp, 1664 # 8-byte Folded Spill + slli.d $a0, $a3, 8 + st.d $a0, $sp, 1656 # 8-byte Folded Spill + slli.d $a0, $a3, 9 + st.d $a0, $sp, 1648 # 8-byte Folded Spill + slli.d $a0, $a3, 10 + st.d $a0, $sp, 1640 # 8-byte Folded Spill + lu12i.w $a0, 280480 + movgr2fr.w $fa1, $a0 + slli.d $a0, $a3, 11 + st.d $a0, $sp, 1632 # 8-byte Folded Spill + vldi $vr2, -1040 + vldi $vr3, -1168 + vldi $vr4, -1228 + vldi $vr5, -1260 + slli.d $a0, $a3, 12 + st.d $a0, $sp, 1624 # 8-byte Folded Spill + slli.d $a0, $a3, 13 + st.d $a0, $sp, 1616 # 8-byte Folded Spill + slli.d $a0, $a3, 14 + st.d $a0, $sp, 1608 # 8-byte Folded Spill + slli.d $a0, $a3, 15 + st.d $a0, $sp, 1600 # 8-byte Folded Spill + slli.d $a0, $a3, 16 + st.d $a0, $sp, 1584 # 8-byte Folded Spill + slli.d $a0, $a3, 17 + st.d $a0, $sp, 1568 # 8-byte Folded Spill + slli.d $a0, $a3, 18 + st.d $a0, $sp, 1560 # 8-byte Folded Spill + slli.d $a0, $a3, 19 + st.d $a0, $sp, 1552 # 8-byte Folded Spill st.d $a3, $sp, 1576 # 8-byte Folded Spill - slli.d $a1, $a3, 20 - st.d $a1, $sp, 1544 # 8-byte Folded Spill - slli.d $a1, $a4, 1 - st.d $a1, $sp, 1536 # 8-byte Folded Spill - slli.d $a1, $a4, 2 - st.d $a1, $sp, 1528 # 8-byte Folded Spill - slli.d $a1, $a4, 3 - st.d $a1, $sp, 1520 # 8-byte Folded Spill - slli.d $a1, $a4, 6 - st.d $a1, $sp, 1512 # 8-byte Folded Spill - slli.d $a1, $a4, 7 - st.d $a1, $sp, 1504 # 8-byte Folded Spill - slli.d $a1, $a4, 8 - st.d $a1, $sp, 1496 # 8-byte Folded Spill - slli.d $a1, $a4, 9 - st.d $a1, $sp, 1488 # 8-byte Folded Spill - slli.d $a1, $a4, 12 - st.d $a1, $sp, 1480 # 8-byte Folded Spill - slli.d $a1, $a4, 13 - st.d $a1, $sp, 1472 # 8-byte Folded Spill - slli.d $a1, $a4, 14 - st.d $a1, $sp, 1464 # 8-byte Folded Spill - slli.d $a1, $a4, 15 - st.d $a1, $sp, 1456 # 8-byte Folded Spill - slli.d $a1, $a4, 18 - st.d $a1, $sp, 1448 # 8-byte Folded Spill - slli.d $a1, $a4, 19 - st.d $a1, $sp, 1440 # 8-byte Folded Spill - slli.d $a1, $a4, 20 - st.d $a1, $sp, 1432 # 8-byte Folded Spill - slli.d $a1, $a4, 21 - st.d $a1, $sp, 1424 # 8-byte Folded Spill - slli.d $a1, $a4, 24 - st.d $a1, $sp, 1416 # 8-byte Folded Spill - slli.d $a1, $a4, 25 - st.d $a1, $sp, 1408 # 8-byte Folded Spill - slli.d $a1, $a4, 26 - st.d $a1, $sp, 1400 # 8-byte Folded Spill - slli.d $a1, $a4, 27 - st.d $a1, $sp, 1392 # 8-byte Folded Spill - slli.d $a1, $a4, 30 - st.d $a1, $sp, 1384 # 8-byte Folded Spill - slli.d $a1, $a4, 31 - st.d $a1, $sp, 1376 # 8-byte Folded Spill - slli.d $a1, $a4, 32 - st.d $a1, $sp, 1368 # 8-byte Folded Spill + slli.d $a0, $a3, 20 + st.d $a0, $sp, 1544 # 8-byte Folded Spill + slli.d $a0, $a4, 1 + st.d $a0, $sp, 1536 # 8-byte Folded Spill + slli.d $a0, $a4, 2 + st.d $a0, $sp, 1528 # 8-byte Folded Spill + slli.d $a0, $a4, 3 + st.d $a0, $sp, 1520 # 8-byte Folded Spill + slli.d $a0, $a4, 6 + st.d $a0, $sp, 1512 # 8-byte Folded Spill + slli.d $a0, $a4, 7 + st.d $a0, $sp, 1504 # 8-byte Folded Spill + slli.d $a0, $a4, 8 + st.d $a0, $sp, 1496 # 8-byte Folded Spill + slli.d $a0, $a4, 9 + st.d $a0, $sp, 1488 # 8-byte Folded Spill + slli.d $a0, $a4, 12 + st.d $a0, $sp, 1480 # 8-byte Folded Spill + slli.d $a0, $a4, 13 + st.d $a0, $sp, 1472 # 8-byte Folded Spill + slli.d $a0, $a4, 14 + st.d $a0, $sp, 1464 # 8-byte Folded Spill + slli.d $a0, $a4, 15 + st.d $a0, $sp, 1456 # 8-byte Folded Spill + slli.d $a0, $a4, 18 + st.d $a0, $sp, 1448 # 8-byte Folded Spill + slli.d $a0, $a4, 19 + st.d $a0, $sp, 1440 # 8-byte Folded Spill + slli.d $a0, $a4, 20 + st.d $a0, $sp, 1432 # 8-byte Folded Spill + slli.d $a0, $a4, 21 + st.d $a0, $sp, 1424 # 8-byte Folded Spill + slli.d $a0, $a4, 24 + st.d $a0, $sp, 1416 # 8-byte Folded Spill + slli.d $a0, $a4, 25 + st.d $a0, $sp, 1408 # 8-byte Folded Spill + slli.d $a0, $a4, 26 + st.d $a0, $sp, 1400 # 8-byte Folded Spill + slli.d $a0, $a4, 27 + st.d $a0, $sp, 1392 # 8-byte Folded Spill + slli.d $a0, $a4, 30 + st.d $a0, $sp, 1384 # 8-byte Folded Spill + slli.d $a0, $a4, 31 + st.d $a0, $sp, 1376 # 8-byte Folded Spill + slli.d $a0, $a4, 32 + st.d $a0, $sp, 1368 # 8-byte Folded Spill st.d $a4, $sp, 1808 # 8-byte Folded Spill - slli.d $a1, $a4, 33 - st.d $a1, $sp, 1360 # 8-byte Folded Spill - slli.d $a1, $a5, 1 - st.d $a1, $sp, 1352 # 8-byte Folded Spill - slli.d $a1, $a5, 2 - st.d $a1, $sp, 1344 # 8-byte Folded Spill - slli.d $a1, $a5, 3 - st.d $a1, $sp, 1336 # 8-byte Folded Spill - slli.d $a1, $a5, 6 - st.d $a1, $sp, 1328 # 8-byte Folded Spill - slli.d $a1, $a5, 7 - st.d $a1, $sp, 1320 # 8-byte Folded Spill - slli.d $a1, $a5, 8 - st.d $a1, $sp, 1312 # 8-byte Folded Spill - slli.d $a1, $a5, 9 - st.d $a1, $sp, 1304 # 8-byte Folded Spill - slli.d $a1, $a5, 12 - st.d $a1, $sp, 1296 # 8-byte Folded Spill - slli.d $a1, $a5, 13 - st.d $a1, $sp, 1288 # 8-byte Folded Spill - slli.d $a1, $a5, 14 - st.d $a1, $sp, 1280 # 8-byte Folded Spill + slli.d $a0, $a4, 33 + st.d $a0, $sp, 1360 # 8-byte Folded Spill + slli.d $a0, $a5, 1 + st.d $a0, $sp, 1352 # 8-byte Folded Spill + slli.d $a0, $a5, 2 + st.d $a0, $sp, 1344 # 8-byte Folded Spill + slli.d $a0, $a5, 3 + st.d $a0, $sp, 1336 # 8-byte Folded Spill + slli.d $a0, $a5, 6 + st.d $a0, $sp, 1328 # 8-byte Folded Spill + slli.d $a0, $a5, 7 + st.d $a0, $sp, 1320 # 8-byte Folded Spill + slli.d $a0, $a5, 8 + st.d $a0, $sp, 1312 # 8-byte Folded Spill + slli.d $a0, $a5, 9 + st.d $a0, $sp, 1304 # 8-byte Folded Spill + slli.d $a0, $a5, 12 + st.d $a0, $sp, 1296 # 8-byte Folded Spill + slli.d $a0, $a5, 13 + st.d $a0, $sp, 1288 # 8-byte Folded Spill + slli.d $a0, $a5, 14 + st.d $a0, $sp, 1280 # 8-byte Folded Spill st.d $a5, $sp, 1800 # 8-byte Folded Spill - slli.d $a1, $a5, 15 - st.d $a1, $sp, 1272 # 8-byte Folded Spill - slli.d $a1, $a6, 1 - st.d $a1, $sp, 1264 # 8-byte Folded Spill - slli.d $a1, $a6, 2 - st.d $a1, $sp, 1256 # 8-byte Folded Spill - slli.d $a1, $a6, 3 - st.d $a1, $sp, 1248 # 8-byte Folded Spill - slli.d $a1, $a6, 6 - st.d $a1, $sp, 1240 # 8-byte Folded Spill - slli.d $a1, $a6, 7 - st.d $a1, $sp, 1232 # 8-byte Folded Spill - slli.d $a1, $a6, 8 - st.d $a1, $sp, 1224 # 8-byte Folded Spill - slli.d $a1, $a6, 9 - st.d $a1, $sp, 1216 # 8-byte Folded Spill - slli.d $a1, $a6, 12 - st.d $a1, $sp, 1208 # 8-byte Folded Spill - slli.d $a1, $a6, 13 - st.d $a1, $sp, 1200 # 8-byte Folded Spill - slli.d $a1, $a6, 14 - st.d $a1, $sp, 1192 # 8-byte Folded Spill + slli.d $a0, $a5, 15 + st.d $a0, $sp, 1272 # 8-byte Folded Spill + slli.d $a0, $a6, 1 + st.d $a0, $sp, 1264 # 8-byte Folded Spill + slli.d $a0, $a6, 2 + st.d $a0, $sp, 1256 # 8-byte Folded Spill + slli.d $a0, $a6, 3 + st.d $a0, $sp, 1248 # 8-byte Folded Spill + slli.d $a0, $a6, 6 + st.d $a0, $sp, 1240 # 8-byte Folded Spill + slli.d $a0, $a6, 7 + st.d $a0, $sp, 1232 # 8-byte Folded Spill + slli.d $a0, $a6, 8 + st.d $a0, $sp, 1224 # 8-byte Folded Spill + slli.d $a0, $a6, 9 + st.d $a0, $sp, 1216 # 8-byte Folded Spill + slli.d $a0, $a6, 12 + st.d $a0, $sp, 1208 # 8-byte Folded Spill + slli.d $a0, $a6, 13 + st.d $a0, $sp, 1200 # 8-byte Folded Spill + slli.d $a0, $a6, 14 + st.d $a0, $sp, 1192 # 8-byte Folded Spill st.d $a6, $sp, 1792 # 8-byte Folded Spill - slli.d $a1, $a6, 15 - st.d $a1, $sp, 1184 # 8-byte Folded Spill - slli.d $a1, $a7, 1 - st.d $a1, $sp, 1176 # 8-byte Folded Spill - slli.d $a1, $a7, 2 - st.d $a1, $sp, 1168 # 8-byte Folded Spill - slli.d $a1, $a7, 3 - st.d $a1, $sp, 1160 # 8-byte Folded Spill - slli.d $a1, $a7, 4 - st.d $a1, $sp, 1152 # 8-byte Folded Spill - slli.d $a1, $a7, 5 - st.d $a1, $sp, 1144 # 8-byte Folded Spill - slli.d $a1, $a7, 6 - st.d $a1, $sp, 1136 # 8-byte Folded Spill - slli.d $a1, $a7, 7 - st.d $a1, $sp, 1128 # 8-byte Folded Spill - slli.d $a1, $a7, 8 - st.d $a1, $sp, 1120 # 8-byte Folded Spill - slli.d $a1, $a7, 9 - st.d $a1, $sp, 1112 # 8-byte Folded Spill - slli.d $a1, $a7, 10 - st.d $a1, $sp, 1104 # 8-byte Folded Spill - slli.d $a1, $a7, 11 - st.d $a1, $sp, 1096 # 8-byte Folded Spill - slli.d $a1, $a7, 12 - st.d $a1, $sp, 1088 # 8-byte Folded Spill - slli.d $a1, $a7, 13 - st.d $a1, $sp, 1080 # 8-byte Folded Spill - slli.d $a1, $a7, 14 - st.d $a1, $sp, 1072 # 8-byte Folded Spill - slli.d $a1, $a7, 15 - st.d $a1, $sp, 1064 # 8-byte Folded Spill - slli.d $a1, $a7, 16 - st.d $a1, $sp, 1056 # 8-byte Folded Spill - slli.d $a1, $a7, 17 - st.d $a1, $sp, 1048 # 8-byte Folded Spill - slli.d $a1, $a7, 18 - st.d $a1, $sp, 1040 # 8-byte Folded Spill - slli.d $a1, $a7, 19 - st.d $a1, $sp, 1032 # 8-byte Folded Spill + slli.d $a0, $a6, 15 + st.d $a0, $sp, 1184 # 8-byte Folded Spill + slli.d $a0, $a7, 1 + st.d $a0, $sp, 1176 # 8-byte Folded Spill + slli.d $a0, $a7, 2 + st.d $a0, $sp, 1168 # 8-byte Folded Spill + slli.d $a0, $a7, 3 + st.d $a0, $sp, 1160 # 8-byte Folded Spill + slli.d $a0, $a7, 4 + st.d $a0, $sp, 1152 # 8-byte Folded Spill + slli.d $a0, $a7, 5 + st.d $a0, $sp, 1144 # 8-byte Folded Spill + slli.d $a0, $a7, 6 + st.d $a0, $sp, 1136 # 8-byte Folded Spill + slli.d $a0, $a7, 7 + st.d $a0, $sp, 1128 # 8-byte Folded Spill + slli.d $a0, $a7, 8 + st.d $a0, $sp, 1120 # 8-byte Folded Spill + slli.d $a0, $a7, 9 + st.d $a0, $sp, 1112 # 8-byte Folded Spill + slli.d $a0, $a7, 10 + st.d $a0, $sp, 1104 # 8-byte Folded Spill + slli.d $a0, $a7, 11 + st.d $a0, $sp, 1096 # 8-byte Folded Spill + slli.d $a0, $a7, 12 + st.d $a0, $sp, 1088 # 8-byte Folded Spill + slli.d $a0, $a7, 13 + st.d $a0, $sp, 1080 # 8-byte Folded Spill + slli.d $a0, $a7, 14 + st.d $a0, $sp, 1072 # 8-byte Folded Spill + slli.d $a0, $a7, 15 + st.d $a0, $sp, 1064 # 8-byte Folded Spill + slli.d $a0, $a7, 16 + st.d $a0, $sp, 1056 # 8-byte Folded Spill + slli.d $a0, $a7, 17 + st.d $a0, $sp, 1048 # 8-byte Folded Spill + slli.d $a0, $a7, 18 + st.d $a0, $sp, 1040 # 8-byte Folded Spill + slli.d $a0, $a7, 19 + st.d $a0, $sp, 1032 # 8-byte Folded Spill st.d $a7, $sp, 1784 # 8-byte Folded Spill - slli.d $a1, $a7, 20 - st.d $a1, $sp, 1024 # 8-byte Folded Spill - slli.d $a1, $t0, 1 - st.d $a1, $sp, 1016 # 8-byte Folded Spill - slli.d $a1, $t0, 2 - st.d $a1, $sp, 1008 # 8-byte Folded Spill - slli.d $a1, $t0, 3 - st.d $a1, $sp, 1000 # 8-byte Folded Spill - slli.d $a1, $t0, 4 - st.d $a1, $sp, 992 # 8-byte Folded Spill - slli.d $a1, $t0, 6 - st.d $a1, $sp, 984 # 8-byte Folded Spill - slli.d $a1, $t0, 7 - st.d $a1, $sp, 976 # 8-byte Folded Spill - slli.d $a1, $t0, 8 - st.d $a1, $sp, 968 # 8-byte Folded Spill - slli.d $a1, $t0, 9 - st.d $a1, $sp, 960 # 8-byte Folded Spill - slli.d $a1, $t0, 10 - st.d $a1, $sp, 952 # 8-byte Folded Spill - slli.d $a1, $t0, 12 - st.d $a1, $sp, 944 # 8-byte Folded Spill - slli.d $a1, $t0, 13 - st.d $a1, $sp, 936 # 8-byte Folded Spill - slli.d $a1, $t0, 14 - st.d $a1, $sp, 928 # 8-byte Folded Spill - slli.d $a1, $t0, 15 - st.d $a1, $sp, 920 # 8-byte Folded Spill - slli.d $a1, $t0, 16 - st.d $a1, $sp, 912 # 8-byte Folded Spill - slli.d $a1, $t0, 18 - st.d $a1, $sp, 904 # 8-byte Folded Spill - slli.d $a1, $t0, 19 - st.d $a1, $sp, 896 # 8-byte Folded Spill - slli.d $a1, $t0, 20 - st.d $a1, $sp, 888 # 8-byte Folded Spill - slli.d $a1, $t0, 21 - st.d $a1, $sp, 880 # 8-byte Folded Spill - slli.d $a1, $t0, 22 - st.d $a1, $sp, 872 # 8-byte Folded Spill - slli.d $a1, $t0, 24 - st.d $a1, $sp, 864 # 8-byte Folded Spill - slli.d $a1, $t0, 25 - st.d $a1, $sp, 856 # 8-byte Folded Spill - slli.d $a1, $t0, 26 - st.d $a1, $sp, 848 # 8-byte Folded Spill - slli.d $a1, $t0, 27 - st.d $a1, $sp, 840 # 8-byte Folded Spill - slli.d $a1, $t0, 28 - st.d $a1, $sp, 832 # 8-byte Folded Spill - slli.d $a1, $t0, 30 - st.d $a1, $sp, 824 # 8-byte Folded Spill - slli.d $a1, $t0, 31 - st.d $a1, $sp, 816 # 8-byte Folded Spill - slli.d $a1, $t0, 32 - st.d $a1, $sp, 808 # 8-byte Folded Spill - slli.d $a1, $t0, 33 - st.d $a1, $sp, 800 # 8-byte Folded Spill + slli.d $a0, $a7, 20 + st.d $a0, $sp, 1024 # 8-byte Folded Spill + slli.d $a0, $t0, 1 + st.d $a0, $sp, 1016 # 8-byte Folded Spill + slli.d $a0, $t0, 2 + st.d $a0, $sp, 1008 # 8-byte Folded Spill + slli.d $a0, $t0, 3 + st.d $a0, $sp, 1000 # 8-byte Folded Spill + slli.d $a0, $t0, 4 + st.d $a0, $sp, 992 # 8-byte Folded Spill + slli.d $a0, $t0, 6 + st.d $a0, $sp, 984 # 8-byte Folded Spill + slli.d $a0, $t0, 7 + st.d $a0, $sp, 976 # 8-byte Folded Spill + slli.d $a0, $t0, 8 + st.d $a0, $sp, 968 # 8-byte Folded Spill + slli.d $a0, $t0, 9 + st.d $a0, $sp, 960 # 8-byte Folded Spill + slli.d $a0, $t0, 10 + st.d $a0, $sp, 952 # 8-byte Folded Spill + slli.d $a0, $t0, 12 + st.d $a0, $sp, 944 # 8-byte Folded Spill + slli.d $a0, $t0, 13 + st.d $a0, $sp, 936 # 8-byte Folded Spill + slli.d $a0, $t0, 14 + st.d $a0, $sp, 928 # 8-byte Folded Spill + slli.d $a0, $t0, 15 + st.d $a0, $sp, 920 # 8-byte Folded Spill + slli.d $a0, $t0, 16 + st.d $a0, $sp, 912 # 8-byte Folded Spill + slli.d $a0, $t0, 18 + st.d $a0, $sp, 904 # 8-byte Folded Spill + slli.d $a0, $t0, 19 + st.d $a0, $sp, 896 # 8-byte Folded Spill + slli.d $a0, $t0, 20 + st.d $a0, $sp, 888 # 8-byte Folded Spill + slli.d $a0, $t0, 21 + st.d $a0, $sp, 880 # 8-byte Folded Spill + slli.d $a0, $t0, 22 + st.d $a0, $sp, 872 # 8-byte Folded Spill + slli.d $a0, $t0, 24 + st.d $a0, $sp, 864 # 8-byte Folded Spill + slli.d $a0, $t0, 25 + st.d $a0, $sp, 856 # 8-byte Folded Spill + slli.d $a0, $t0, 26 + st.d $a0, $sp, 848 # 8-byte Folded Spill + slli.d $a0, $t0, 27 + st.d $a0, $sp, 840 # 8-byte Folded Spill + slli.d $a0, $t0, 28 + st.d $a0, $sp, 832 # 8-byte Folded Spill + slli.d $a0, $t0, 30 + st.d $a0, $sp, 824 # 8-byte Folded Spill + slli.d $a0, $t0, 31 + st.d $a0, $sp, 816 # 8-byte Folded Spill + slli.d $a0, $t0, 32 + st.d $a0, $sp, 808 # 8-byte Folded Spill + slli.d $a0, $t0, 33 + st.d $a0, $sp, 800 # 8-byte Folded Spill st.d $t0, $sp, 1776 # 8-byte Folded Spill - slli.d $a1, $t0, 34 - st.d $a1, $sp, 792 # 8-byte Folded Spill - slli.d $a1, $t1, 1 - st.d $a1, $sp, 784 # 8-byte Folded Spill - slli.d $a1, $t1, 2 - st.d $a1, $sp, 776 # 8-byte Folded Spill - slli.d $a1, $t1, 3 - st.d $a1, $sp, 768 # 8-byte Folded Spill - slli.d $a1, $t1, 6 - st.d $a1, $sp, 760 # 8-byte Folded Spill - slli.d $a1, $t1, 7 - st.d $a1, $sp, 752 # 8-byte Folded Spill - slli.d $a1, $t1, 8 - st.d $a1, $sp, 744 # 8-byte Folded Spill - slli.d $a1, $t1, 9 - st.d $a1, $sp, 736 # 8-byte Folded Spill - slli.d $a1, $t1, 12 - st.d $a1, $sp, 728 # 8-byte Folded Spill - slli.d $a1, $t1, 13 - st.d $a1, $sp, 720 # 8-byte Folded Spill - slli.d $a1, $t1, 14 - st.d $a1, $sp, 712 # 8-byte Folded Spill + slli.d $a0, $t0, 34 + st.d $a0, $sp, 792 # 8-byte Folded Spill + slli.d $a0, $t1, 1 + st.d $a0, $sp, 784 # 8-byte Folded Spill + slli.d $a0, $t1, 2 + st.d $a0, $sp, 776 # 8-byte Folded Spill + slli.d $a0, $t1, 3 + st.d $a0, $sp, 768 # 8-byte Folded Spill + slli.d $a0, $t1, 6 + st.d $a0, $sp, 760 # 8-byte Folded Spill + slli.d $a0, $t1, 7 + st.d $a0, $sp, 752 # 8-byte Folded Spill + slli.d $a0, $t1, 8 + st.d $a0, $sp, 744 # 8-byte Folded Spill + slli.d $a0, $t1, 9 + st.d $a0, $sp, 736 # 8-byte Folded Spill + slli.d $a0, $t1, 12 + st.d $a0, $sp, 728 # 8-byte Folded Spill + slli.d $a0, $t1, 13 + st.d $a0, $sp, 720 # 8-byte Folded Spill + slli.d $a0, $t1, 14 + st.d $a0, $sp, 712 # 8-byte Folded Spill st.d $t1, $sp, 1768 # 8-byte Folded Spill - slli.d $a1, $t1, 15 - st.d $a1, $sp, 704 # 8-byte Folded Spill - slli.d $a1, $t2, 1 - st.d $a1, $sp, 696 # 8-byte Folded Spill - slli.d $a1, $t2, 2 - st.d $a1, $sp, 688 # 8-byte Folded Spill - slli.d $a1, $t2, 3 - st.d $a1, $sp, 680 # 8-byte Folded Spill - slli.d $a1, $t2, 6 - st.d $a1, $sp, 672 # 8-byte Folded Spill - slli.d $a1, $t2, 7 - st.d $a1, $sp, 664 # 8-byte Folded Spill - slli.d $a1, $t2, 8 - st.d $a1, $sp, 656 # 8-byte Folded Spill - slli.d $a1, $t2, 9 - st.d $a1, $sp, 648 # 8-byte Folded Spill - slli.d $a1, $t2, 12 - st.d $a1, $sp, 640 # 8-byte Folded Spill - slli.d $a1, $t2, 13 - st.d $a1, $sp, 632 # 8-byte Folded Spill - slli.d $a1, $t2, 14 - st.d $a1, $sp, 624 # 8-byte Folded Spill + slli.d $a0, $t1, 15 + st.d $a0, $sp, 704 # 8-byte Folded Spill + slli.d $a0, $t2, 1 + st.d $a0, $sp, 696 # 8-byte Folded Spill + slli.d $a0, $t2, 2 + st.d $a0, $sp, 688 # 8-byte Folded Spill + slli.d $a0, $t2, 3 + st.d $a0, $sp, 680 # 8-byte Folded Spill + slli.d $a0, $t2, 6 + st.d $a0, $sp, 672 # 8-byte Folded Spill + slli.d $a0, $t2, 7 + st.d $a0, $sp, 664 # 8-byte Folded Spill + slli.d $a0, $t2, 8 + st.d $a0, $sp, 656 # 8-byte Folded Spill + slli.d $a0, $t2, 9 + st.d $a0, $sp, 648 # 8-byte Folded Spill + slli.d $a0, $t2, 12 + st.d $a0, $sp, 640 # 8-byte Folded Spill + slli.d $a0, $t2, 13 + st.d $a0, $sp, 632 # 8-byte Folded Spill + slli.d $a0, $t2, 14 + st.d $a0, $sp, 624 # 8-byte Folded Spill st.d $t2, $sp, 1760 # 8-byte Folded Spill - slli.d $a1, $t2, 15 - st.d $a1, $sp, 616 # 8-byte Folded Spill - slli.d $a1, $t3, 1 - st.d $a1, $sp, 608 # 8-byte Folded Spill - slli.d $a1, $t3, 2 - st.d $a1, $sp, 600 # 8-byte Folded Spill - slli.d $a1, $t3, 3 - st.d $a1, $sp, 592 # 8-byte Folded Spill - slli.d $a1, $t3, 4 - st.d $a1, $sp, 584 # 8-byte Folded Spill - slli.d $a1, $t3, 5 - st.d $a1, $sp, 576 # 8-byte Folded Spill - slli.d $a1, $t3, 6 - st.d $a1, $sp, 568 # 8-byte Folded Spill - slli.d $a1, $t3, 7 - st.d $a1, $sp, 560 # 8-byte Folded Spill - slli.d $a1, $t3, 8 - st.d $a1, $sp, 552 # 8-byte Folded Spill - slli.d $a1, $t3, 9 - st.d $a1, $sp, 544 # 8-byte Folded Spill - slli.d $a1, $t3, 10 - st.d $a1, $sp, 536 # 8-byte Folded Spill - slli.d $a1, $t3, 11 - st.d $a1, $sp, 528 # 8-byte Folded Spill - slli.d $a1, $t3, 12 - st.d $a1, $sp, 520 # 8-byte Folded Spill - slli.d $a1, $t3, 13 - st.d $a1, $sp, 512 # 8-byte Folded Spill - slli.d $a1, $t3, 14 - st.d $a1, $sp, 504 # 8-byte Folded Spill - slli.d $a1, $t3, 15 - st.d $a1, $sp, 496 # 8-byte Folded Spill - slli.d $a1, $t3, 16 - st.d $a1, $sp, 488 # 8-byte Folded Spill - slli.d $a1, $t3, 17 - st.d $a1, $sp, 480 # 8-byte Folded Spill - slli.d $a1, $t3, 18 - st.d $a1, $sp, 472 # 8-byte Folded Spill - slli.d $a1, $t3, 19 - st.d $a1, $sp, 464 # 8-byte Folded Spill + slli.d $a0, $t2, 15 + st.d $a0, $sp, 616 # 8-byte Folded Spill + slli.d $a0, $t3, 1 + st.d $a0, $sp, 608 # 8-byte Folded Spill + slli.d $a0, $t3, 2 + st.d $a0, $sp, 600 # 8-byte Folded Spill + slli.d $a0, $t3, 3 + st.d $a0, $sp, 592 # 8-byte Folded Spill + slli.d $a0, $t3, 4 + st.d $a0, $sp, 584 # 8-byte Folded Spill + slli.d $a0, $t3, 5 + st.d $a0, $sp, 576 # 8-byte Folded Spill + slli.d $a0, $t3, 6 + st.d $a0, $sp, 568 # 8-byte Folded Spill + slli.d $a0, $t3, 7 + st.d $a0, $sp, 560 # 8-byte Folded Spill + slli.d $a0, $t3, 8 + st.d $a0, $sp, 552 # 8-byte Folded Spill + slli.d $a0, $t3, 9 + st.d $a0, $sp, 544 # 8-byte Folded Spill + slli.d $a0, $t3, 10 + st.d $a0, $sp, 536 # 8-byte Folded Spill + slli.d $a0, $t3, 11 + st.d $a0, $sp, 528 # 8-byte Folded Spill + slli.d $a0, $t3, 12 + st.d $a0, $sp, 520 # 8-byte Folded Spill + slli.d $a0, $t3, 13 + st.d $a0, $sp, 512 # 8-byte Folded Spill + slli.d $a0, $t3, 14 + st.d $a0, $sp, 504 # 8-byte Folded Spill + slli.d $a0, $t3, 15 + st.d $a0, $sp, 496 # 8-byte Folded Spill + slli.d $a0, $t3, 16 + st.d $a0, $sp, 488 # 8-byte Folded Spill + slli.d $a0, $t3, 17 + st.d $a0, $sp, 480 # 8-byte Folded Spill + slli.d $a0, $t3, 18 + st.d $a0, $sp, 472 # 8-byte Folded Spill + slli.d $a0, $t3, 19 + st.d $a0, $sp, 464 # 8-byte Folded Spill st.d $t3, $sp, 1752 # 8-byte Folded Spill - slli.d $a1, $t3, 20 - st.d $a1, $sp, 456 # 8-byte Folded Spill - slli.d $a1, $t4, 1 - st.d $a1, $sp, 448 # 8-byte Folded Spill - slli.d $a1, $t4, 2 - st.d $a1, $sp, 440 # 8-byte Folded Spill - slli.d $a1, $t4, 3 - st.d $a1, $sp, 432 # 8-byte Folded Spill - slli.d $a1, $t4, 4 - st.d $a1, $sp, 424 # 8-byte Folded Spill - slli.d $a1, $t4, 5 - st.d $a1, $sp, 416 # 8-byte Folded Spill - slli.d $a1, $t4, 6 - st.d $a1, $sp, 408 # 8-byte Folded Spill - slli.d $a1, $t4, 7 - st.d $a1, $sp, 400 # 8-byte Folded Spill - slli.d $a1, $t4, 8 - st.d $a1, $sp, 392 # 8-byte Folded Spill - slli.d $a1, $t4, 9 - st.d $a1, $sp, 384 # 8-byte Folded Spill - slli.d $a1, $t4, 10 - st.d $a1, $sp, 376 # 8-byte Folded Spill - slli.d $a1, $t4, 11 - st.d $a1, $sp, 368 # 8-byte Folded Spill - slli.d $a1, $t4, 12 - st.d $a1, $sp, 360 # 8-byte Folded Spill - slli.d $a1, $t4, 13 - st.d $a1, $sp, 352 # 8-byte Folded Spill - slli.d $a1, $t4, 14 - st.d $a1, $sp, 344 # 8-byte Folded Spill - slli.d $a1, $t4, 15 - st.d $a1, $sp, 336 # 8-byte Folded Spill - slli.d $a1, $t4, 16 - st.d $a1, $sp, 328 # 8-byte Folded Spill - slli.d $a1, $t4, 17 - st.d $a1, $sp, 320 # 8-byte Folded Spill - slli.d $a1, $t4, 18 - st.d $a1, $sp, 312 # 8-byte Folded Spill - slli.d $a1, $t4, 19 - st.d $a1, $sp, 304 # 8-byte Folded Spill - slli.d $a1, $t4, 20 - st.d $a1, $sp, 296 # 8-byte Folded Spill - slli.d $a1, $t4, 21 - st.d $a1, $sp, 288 # 8-byte Folded Spill - slli.d $a1, $t4, 22 - st.d $a1, $sp, 280 # 8-byte Folded Spill - slli.d $a1, $t4, 23 - st.d $a1, $sp, 272 # 8-byte Folded Spill - slli.d $a1, $t4, 24 - st.d $a1, $sp, 264 # 8-byte Folded Spill - slli.d $a1, $t4, 25 - st.d $a1, $sp, 256 # 8-byte Folded Spill - slli.d $a1, $t4, 26 - st.d $a1, $sp, 248 # 8-byte Folded Spill - slli.d $a1, $t4, 27 - st.d $a1, $sp, 240 # 8-byte Folded Spill - slli.d $a1, $t4, 28 - st.d $a1, $sp, 232 # 8-byte Folded Spill - slli.d $a1, $t4, 29 - st.d $a1, $sp, 224 # 8-byte Folded Spill - slli.d $a1, $t4, 30 - st.d $a1, $sp, 216 # 8-byte Folded Spill - slli.d $a1, $t4, 31 - st.d $a1, $sp, 208 # 8-byte Folded Spill - slli.d $a1, $t4, 32 - st.d $a1, $sp, 200 # 8-byte Folded Spill - slli.d $a1, $t4, 33 - st.d $a1, $sp, 192 # 8-byte Folded Spill - slli.d $a1, $t4, 34 - st.d $a1, $sp, 184 # 8-byte Folded Spill + slli.d $a0, $t3, 20 + st.d $a0, $sp, 456 # 8-byte Folded Spill + slli.d $a0, $t4, 1 + st.d $a0, $sp, 448 # 8-byte Folded Spill + slli.d $a0, $t4, 2 + st.d $a0, $sp, 440 # 8-byte Folded Spill + slli.d $a0, $t4, 3 + st.d $a0, $sp, 432 # 8-byte Folded Spill + slli.d $a0, $t4, 4 + st.d $a0, $sp, 424 # 8-byte Folded Spill + slli.d $a0, $t4, 5 + st.d $a0, $sp, 416 # 8-byte Folded Spill + slli.d $a0, $t4, 6 + st.d $a0, $sp, 408 # 8-byte Folded Spill + slli.d $a0, $t4, 7 + st.d $a0, $sp, 400 # 8-byte Folded Spill + slli.d $a0, $t4, 8 + st.d $a0, $sp, 392 # 8-byte Folded Spill + slli.d $a0, $t4, 9 + st.d $a0, $sp, 384 # 8-byte Folded Spill + slli.d $a0, $t4, 10 + st.d $a0, $sp, 376 # 8-byte Folded Spill + slli.d $a0, $t4, 11 + st.d $a0, $sp, 368 # 8-byte Folded Spill + slli.d $a0, $t4, 12 + st.d $a0, $sp, 360 # 8-byte Folded Spill + slli.d $a0, $t4, 13 + st.d $a0, $sp, 352 # 8-byte Folded Spill + slli.d $a0, $t4, 14 + st.d $a0, $sp, 344 # 8-byte Folded Spill + slli.d $a0, $t4, 15 + st.d $a0, $sp, 336 # 8-byte Folded Spill + slli.d $a0, $t4, 16 + st.d $a0, $sp, 328 # 8-byte Folded Spill + slli.d $a0, $t4, 17 + st.d $a0, $sp, 320 # 8-byte Folded Spill + slli.d $a0, $t4, 18 + st.d $a0, $sp, 312 # 8-byte Folded Spill + slli.d $a0, $t4, 19 + st.d $a0, $sp, 304 # 8-byte Folded Spill + slli.d $a0, $t4, 20 + st.d $a0, $sp, 296 # 8-byte Folded Spill + slli.d $a0, $t4, 21 + st.d $a0, $sp, 288 # 8-byte Folded Spill + slli.d $a0, $t4, 22 + st.d $a0, $sp, 280 # 8-byte Folded Spill + slli.d $a0, $t4, 23 + st.d $a0, $sp, 272 # 8-byte Folded Spill + slli.d $a0, $t4, 24 + st.d $a0, $sp, 264 # 8-byte Folded Spill + slli.d $a0, $t4, 25 + st.d $a0, $sp, 256 # 8-byte Folded Spill + slli.d $a0, $t4, 26 + st.d $a0, $sp, 248 # 8-byte Folded Spill + slli.d $a0, $t4, 27 + st.d $a0, $sp, 240 # 8-byte Folded Spill + slli.d $a0, $t4, 28 + st.d $a0, $sp, 232 # 8-byte Folded Spill + slli.d $a0, $t4, 29 + st.d $a0, $sp, 224 # 8-byte Folded Spill + slli.d $a0, $t4, 30 + st.d $a0, $sp, 216 # 8-byte Folded Spill + slli.d $a0, $t4, 31 + st.d $a0, $sp, 208 # 8-byte Folded Spill + slli.d $a0, $t4, 32 + st.d $a0, $sp, 200 # 8-byte Folded Spill + slli.d $a0, $t4, 33 + st.d $a0, $sp, 192 # 8-byte Folded Spill + slli.d $a0, $t4, 34 + st.d $a0, $sp, 184 # 8-byte Folded Spill st.d $t4, $sp, 1592 # 8-byte Folded Spill - slli.d $a1, $t4, 35 - st.d $a1, $sp, 176 # 8-byte Folded Spill - slli.d $a1, $t5, 1 - st.d $a1, $sp, 168 # 8-byte Folded Spill - slli.d $a1, $t5, 2 - st.d $a1, $sp, 160 # 8-byte Folded Spill - slli.d $a1, $t5, 3 - st.d $a1, $sp, 152 # 8-byte Folded Spill - slli.d $a1, $t5, 6 - st.d $a1, $sp, 144 # 8-byte Folded Spill - slli.d $a1, $t5, 7 - st.d $a1, $sp, 136 # 8-byte Folded Spill - slli.d $a1, $t5, 8 - st.d $a1, $sp, 128 # 8-byte Folded Spill - slli.d $a1, $t5, 9 - st.d $a1, $sp, 120 # 8-byte Folded Spill - slli.d $a1, $t5, 12 - st.d $a1, $sp, 112 # 8-byte Folded Spill - slli.d $a1, $t5, 13 - st.d $a1, $sp, 104 # 8-byte Folded Spill - slli.d $a1, $t5, 14 - st.d $a1, $sp, 96 # 8-byte Folded Spill + slli.d $a0, $t4, 35 + st.d $a0, $sp, 176 # 8-byte Folded Spill + slli.d $a0, $t5, 1 + st.d $a0, $sp, 168 # 8-byte Folded Spill + slli.d $a0, $t5, 2 + st.d $a0, $sp, 160 # 8-byte Folded Spill + slli.d $a0, $t5, 3 + st.d $a0, $sp, 152 # 8-byte Folded Spill + slli.d $a0, $t5, 6 + st.d $a0, $sp, 144 # 8-byte Folded Spill + slli.d $a0, $t5, 7 + st.d $a0, $sp, 136 # 8-byte Folded Spill + slli.d $a0, $t5, 8 + st.d $a0, $sp, 128 # 8-byte Folded Spill + slli.d $a0, $t5, 9 + st.d $a0, $sp, 120 # 8-byte Folded Spill + slli.d $a0, $t5, 12 + st.d $a0, $sp, 112 # 8-byte Folded Spill + slli.d $a0, $t5, 13 + st.d $a0, $sp, 104 # 8-byte Folded Spill + slli.d $a0, $t5, 14 + st.d $a0, $sp, 96 # 8-byte Folded Spill st.d $t5, $sp, 1744 # 8-byte Folded Spill - slli.d $a1, $t5, 15 - st.d $a1, $sp, 88 # 8-byte Folded Spill - slli.d $a1, $a0, 1 - st.d $a1, $sp, 80 # 8-byte Folded Spill - slli.d $a1, $a0, 2 - st.d $a1, $sp, 72 # 8-byte Folded Spill - slli.d $a1, $a0, 3 - st.d $a1, $sp, 64 # 8-byte Folded Spill - slli.d $a1, $a0, 6 - st.d $a1, $sp, 56 # 8-byte Folded Spill - slli.d $a1, $a0, 7 - st.d $a1, $sp, 48 # 8-byte Folded Spill - slli.d $a1, $a0, 8 - st.d $a1, $sp, 40 # 8-byte Folded Spill - slli.d $a1, $a0, 9 - st.d $a1, $sp, 32 # 8-byte Folded Spill - slli.d $a1, $a0, 13 - st.d $a1, $sp, 24 # 8-byte Folded Spill - st.d $a0, $sp, 1736 # 8-byte Folded Spill - slli.d $a0, $a0, 14 + slli.d $a0, $t5, 15 + st.d $a0, $sp, 88 # 8-byte Folded Spill + slli.d $a0, $t6, 1 + st.d $a0, $sp, 80 # 8-byte Folded Spill + slli.d $a0, $t6, 2 + st.d $a0, $sp, 72 # 8-byte Folded Spill + slli.d $a0, $t6, 3 + st.d $a0, $sp, 64 # 8-byte Folded Spill + slli.d $a0, $t6, 6 + st.d $a0, $sp, 56 # 8-byte Folded Spill + slli.d $a0, $t6, 7 + st.d $a0, $sp, 48 # 8-byte Folded Spill + slli.d $a0, $t6, 8 + st.d $a0, $sp, 40 # 8-byte Folded Spill + slli.d $a0, $t6, 9 + st.d $a0, $sp, 32 # 8-byte Folded Spill + slli.d $a0, $t6, 13 + st.d $a0, $sp, 24 # 8-byte Folded Spill + st.d $t6, $sp, 1736 # 8-byte Folded Spill + slli.d $a0, $t6, 14 st.d $a0, $sp, 16 # 8-byte Folded Spill ori $a0, $zero, 1 .p2align 4, , 16 @@ -2056,16 +2053,14 @@ value: # @value or $a4, $a1, $a0 movgr2cf $fcc0, $a3 move $t2, $a3 - fsel $fa5, $fa2, $fa1, $fcc0 - fadd.s $fa5, $fa0, $fa5 + fsel $fa6, $fa3, $fa2, $fcc0 + fadd.s $fa6, $fa0, $fa6 ld.d $t0, $sp, 1576 # 8-byte Folded Reload andn $a0, $t0, $a4 - pcalau12i $a1, %pc_hi20(.LCPI7_0) - fld.s $fa6, $a1, %pc_lo12(.LCPI7_0) sltui $a0, $a0, 1 movgr2fr.w $fa7, $a2 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a1, $fa7 maskeqz $a1, $a1, $a0 @@ -2076,7 +2071,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2087,7 +2082,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2098,7 +2093,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2109,7 +2104,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2120,7 +2115,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2131,7 +2126,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2142,7 +2137,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2153,7 +2148,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2164,7 +2159,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2175,7 +2170,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2186,7 +2181,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2197,7 +2192,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2208,7 +2203,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2219,7 +2214,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2230,7 +2225,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2241,7 +2236,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2252,7 +2247,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2263,7 +2258,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2274,7 +2269,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2285,7 +2280,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2296,7 +2291,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2307,7 +2302,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2318,7 +2313,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2329,7 +2324,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2340,7 +2335,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2351,7 +2346,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2362,7 +2357,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2373,7 +2368,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2384,7 +2379,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2395,7 +2390,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2406,7 +2401,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2417,7 +2412,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2428,7 +2423,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2439,7 +2434,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2450,7 +2445,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2461,7 +2456,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2472,7 +2467,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2483,7 +2478,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2494,7 +2489,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2505,7 +2500,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2516,7 +2511,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2527,7 +2522,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2538,7 +2533,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2549,7 +2544,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2560,7 +2555,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2571,7 +2566,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2582,7 +2577,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2593,7 +2588,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2604,7 +2599,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2615,7 +2610,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2626,7 +2621,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2637,7 +2632,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2648,7 +2643,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2659,7 +2654,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2670,7 +2665,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2681,7 +2676,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2692,7 +2687,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2703,7 +2698,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2714,7 +2709,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2725,7 +2720,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2736,7 +2731,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2747,7 +2742,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2758,7 +2753,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2769,7 +2764,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2780,7 +2775,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2791,7 +2786,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2802,7 +2797,7 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa7, $fa5, $fa6, $fa7 + fmadd.s $fa7, $fa6, $fa1, $fa7 ftintrz.w.s $fa7, $fa7 movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 @@ -2813,9 +2808,9 @@ value: # @value sltui $a1, $a1, 1 movgr2fr.w $fa7, $a0 ffint.s.w $fa7, $fa7 - fmadd.s $fa6, $fa5, $fa6, $fa7 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + fmadd.s $fa7, $fa6, $fa1, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 @@ -2827,11 +2822,11 @@ value: # @value andn $a1, $a1, $a4 and $a2, $t0, $t2 or $a1, $a1, $a2 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a5, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a5, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a5, $a1 @@ -2840,11 +2835,11 @@ value: # @value andn $a1, $a1, $a4 and $t0, $a7, $t2 or $a1, $a1, $t0 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a5, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a5, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a5, $a1 @@ -2853,11 +2848,11 @@ value: # @value andn $a5, $a1, $a4 and $a1, $t1, $t2 or $a5, $a5, $a1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a6, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a6, $fa7 sltui $a5, $a5, 1 masknez $a0, $a0, $a5 maskeqz $a5, $a6, $a5 @@ -2866,11 +2861,11 @@ value: # @value andn $a6, $a0, $a4 and $a0, $t5, $t2 or $a6, $a6, $a0 - movgr2fr.w $fa6, $a5 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t1, $fa6 + movgr2fr.w $fa7, $a5 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t1, $fa7 sltui $a6, $a6, 1 masknez $a5, $a5, $a6 maskeqz $a6, $t1, $a6 @@ -2880,11 +2875,11 @@ value: # @value and $a7, $t6, $t2 st.d $a7, $sp, 1848 # 8-byte Folded Spill or $a6, $a6, $a7 - movgr2fr.w $fa6, $a5 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t1, $fa6 + movgr2fr.w $fa7, $a5 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t1, $fa7 sltui $a6, $a6, 1 masknez $a5, $a5, $a6 maskeqz $a6, $t1, $a6 @@ -2894,11 +2889,11 @@ value: # @value and $a7, $t7, $t2 st.d $a7, $sp, 1840 # 8-byte Folded Spill or $a6, $a6, $a7 - movgr2fr.w $fa6, $a5 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t1, $fa6 + movgr2fr.w $fa7, $a5 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t1, $fa7 sltui $a6, $a6, 1 masknez $a5, $a5, $a6 maskeqz $a6, $t1, $a6 @@ -2908,11 +2903,11 @@ value: # @value and $a7, $t8, $t2 st.d $a7, $sp, 1832 # 8-byte Folded Spill or $a6, $a6, $a7 - movgr2fr.w $fa6, $a5 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t1, $fa6 + movgr2fr.w $fa7, $a5 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t1, $fa7 sltui $a6, $a6, 1 masknez $a5, $a5, $a6 maskeqz $a6, $t1, $a6 @@ -2921,11 +2916,11 @@ value: # @value andn $a6, $a6, $a4 and $s2, $s2, $t2 or $a6, $a6, $s2 - movgr2fr.w $fa6, $a5 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t1, $fa6 + movgr2fr.w $fa7, $a5 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t1, $fa7 sltui $a6, $a6, 1 masknez $a5, $a5, $a6 maskeqz $a6, $t1, $a6 @@ -2934,11 +2929,11 @@ value: # @value andn $a6, $a6, $a4 and $s3, $s3, $t2 or $a6, $a6, $s3 - movgr2fr.w $fa6, $a5 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t1, $fa6 + movgr2fr.w $fa7, $a5 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t1, $fa7 sltui $a6, $a6, 1 masknez $a5, $a5, $a6 maskeqz $a6, $t1, $a6 @@ -2947,11 +2942,11 @@ value: # @value andn $a6, $a6, $a4 and $s4, $s4, $t2 or $a6, $a6, $s4 - movgr2fr.w $fa6, $a5 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t1, $fa6 + movgr2fr.w $fa7, $a5 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t1, $fa7 sltui $a6, $a6, 1 masknez $a5, $a5, $a6 maskeqz $a6, $t1, $a6 @@ -2960,11 +2955,11 @@ value: # @value andn $a6, $a6, $a4 and $s5, $s5, $t2 or $a6, $a6, $s5 - movgr2fr.w $fa6, $a5 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t1, $fa6 + movgr2fr.w $fa7, $a5 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t1, $fa7 sltui $a6, $a6, 1 masknez $a5, $a5, $a6 maskeqz $a6, $t1, $a6 @@ -2973,11 +2968,11 @@ value: # @value andn $a6, $a6, $a4 and $s6, $s6, $t2 or $a6, $a6, $s6 - movgr2fr.w $fa6, $a5 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t1, $fa6 + movgr2fr.w $fa7, $a5 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t1, $fa7 sltui $a6, $a6, 1 masknez $a5, $a5, $a6 maskeqz $a6, $t1, $a6 @@ -2986,11 +2981,11 @@ value: # @value andn $a6, $a6, $a4 and $s7, $s7, $t2 or $a6, $a6, $s7 - movgr2fr.w $fa6, $a5 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t1, $fa6 + movgr2fr.w $fa7, $a5 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t1, $fa7 sltui $a6, $a6, 1 masknez $a5, $a5, $a6 maskeqz $a6, $t1, $a6 @@ -2999,11 +2994,11 @@ value: # @value andn $a6, $a6, $a4 and $s8, $s8, $t2 or $a6, $a6, $s8 - movgr2fr.w $fa6, $a5 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t1, $fa6 + movgr2fr.w $fa7, $a5 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t1, $fa7 sltui $a6, $a6, 1 masknez $a5, $a5, $a6 maskeqz $a6, $t1, $a6 @@ -3012,11 +3007,11 @@ value: # @value andn $a6, $a6, $a4 and $ra, $ra, $t2 or $a6, $a6, $ra - movgr2fr.w $fa6, $a5 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t1, $fa6 + movgr2fr.w $fa7, $a5 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t1, $fa7 sltui $a6, $a6, 1 masknez $a5, $a5, $a6 maskeqz $a6, $t1, $a6 @@ -3025,11 +3020,11 @@ value: # @value andn $a6, $a6, $a4 and $t7, $t4, $t2 or $a6, $a6, $t7 - movgr2fr.w $fa6, $a5 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t1, $fa6 + movgr2fr.w $fa7, $a5 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t1, $fa7 sltui $a6, $a6, 1 masknez $a5, $a5, $a6 maskeqz $a6, $t1, $a6 @@ -3038,11 +3033,11 @@ value: # @value andn $a6, $a6, $a4 and $t8, $a3, $t2 or $a6, $a6, $t8 - movgr2fr.w $fa6, $a5 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t1, $fa6 + movgr2fr.w $fa7, $a5 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t1, $fa7 sltui $a6, $a6, 1 masknez $a5, $a5, $a6 maskeqz $a6, $t1, $a6 @@ -3051,11 +3046,11 @@ value: # @value andn $t1, $a3, $a4 and $a5, $t3, $t2 or $t1, $t1, $a5 - movgr2fr.w $fa6, $a6 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t5, $fa6 + movgr2fr.w $fa7, $a6 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t5, $fa7 sltui $t1, $t1, 1 masknez $a6, $a6, $t1 maskeqz $t1, $t5, $t1 @@ -3064,11 +3059,11 @@ value: # @value andn $t5, $a3, $a4 and $a6, $fp, $t2 or $t5, $t5, $a6 - movgr2fr.w $fa6, $t1 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $t1 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 sltui $t5, $t5, 1 masknez $t1, $t1, $t5 maskeqz $t5, $t6, $t5 @@ -3077,11 +3072,11 @@ value: # @value andn $t6, $a3, $a4 and $t1, $s0, $t2 or $t6, $t6, $t1 - movgr2fr.w $fa6, $t5 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a7, $fa6 + movgr2fr.w $fa7, $t5 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a7, $fa7 sltui $t6, $t6, 1 masknez $t5, $t5, $t6 maskeqz $a7, $a7, $t6 @@ -3090,11 +3085,11 @@ value: # @value andn $t5, $a3, $a4 and $t2, $s1, $t2 or $t5, $t5, $t2 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 sltui $t5, $t5, 1 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 @@ -3102,605 +3097,605 @@ value: # @value ld.d $t5, $sp, 1776 # 8-byte Folded Reload andn $t5, $t5, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 1016 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 1008 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 1000 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 992 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 984 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 976 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 968 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 960 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 952 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 944 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 936 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 928 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 920 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 912 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 904 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 896 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 888 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 880 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 872 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 864 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 856 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 848 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 840 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 832 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 824 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 816 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 808 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 800 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 792 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $t5, $sp, 1768 # 8-byte Folded Reload andn $t5, $t5, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 784 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 776 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 768 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 760 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 752 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 744 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 736 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 728 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 720 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 712 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 704 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $t5, $sp, 1760 # 8-byte Folded Reload andn $t5, $t5, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 696 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 688 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 680 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 672 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 664 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 656 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 648 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 640 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 632 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 624 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $a3, $sp, 616 # 8-byte Folded Reload andn $t5, $a3, $a4 sltui $t5, $t5, 1 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa3, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t6, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa4, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t6, $fa7 masknez $a7, $a7, $t5 maskeqz $t5, $t6, $t5 or $a7, $t5, $a7 ld.d $t5, $sp, 1752 # 8-byte Folded Reload andn $t5, $t5, $a4 or $a2, $t5, $a2 - movgr2fr.w $fa6, $a7 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t5, $fa6 + movgr2fr.w $fa7, $a7 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t5, $fa7 sltui $a2, $a2, 1 masknez $a7, $a7, $a2 maskeqz $a2, $t5, $a2 @@ -3708,11 +3703,11 @@ value: # @value ld.d $a3, $sp, 608 # 8-byte Folded Reload andn $a7, $a3, $a4 or $a7, $a7, $t0 - movgr2fr.w $fa6, $a2 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $t0, $fa6 + movgr2fr.w $fa7, $a2 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $t0, $fa7 sltui $a7, $a7, 1 masknez $a2, $a2, $a7 maskeqz $a7, $t0, $a7 @@ -3720,11 +3715,11 @@ value: # @value ld.d $a3, $sp, 600 # 8-byte Folded Reload andn $a7, $a3, $a4 or $a1, $a7, $a1 - movgr2fr.w $fa6, $a2 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a7, $fa6 + movgr2fr.w $fa7, $a2 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a7, $fa7 sltui $a1, $a1, 1 masknez $a2, $a2, $a1 maskeqz $a1, $a7, $a1 @@ -3732,12 +3727,12 @@ value: # @value ld.d $a2, $sp, 592 # 8-byte Folded Reload andn $a2, $a2, $a4 or $a0, $a2, $a0 - movgr2fr.w $fa6, $a1 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 - sltui $a0, $a0, 1 + movgr2fr.w $fa7, $a1 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 + sltui $a0, $a0, 1 masknez $a1, $a1, $a0 maskeqz $a0, $a2, $a0 or $a0, $a0, $a1 @@ -3745,11 +3740,11 @@ value: # @value andn $a1, $a1, $a4 ld.d $a2, $sp, 1848 # 8-byte Folded Reload or $a1, $a1, $a2 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3758,11 +3753,11 @@ value: # @value andn $a1, $a1, $a4 ld.d $a2, $sp, 1840 # 8-byte Folded Reload or $a1, $a1, $a2 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3771,11 +3766,11 @@ value: # @value andn $a1, $a1, $a4 ld.d $a2, $sp, 1832 # 8-byte Folded Reload or $a1, $a1, $a2 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3783,11 +3778,11 @@ value: # @value ld.d $a1, $sp, 560 # 8-byte Folded Reload andn $a1, $a1, $a4 or $a1, $a1, $s2 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3795,11 +3790,11 @@ value: # @value ld.d $a1, $sp, 552 # 8-byte Folded Reload andn $a1, $a1, $a4 or $a1, $a1, $s3 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3807,11 +3802,11 @@ value: # @value ld.d $a1, $sp, 544 # 8-byte Folded Reload andn $a1, $a1, $a4 or $a1, $a1, $s4 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3819,11 +3814,11 @@ value: # @value ld.d $a1, $sp, 536 # 8-byte Folded Reload andn $a1, $a1, $a4 or $a1, $a1, $s5 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3831,11 +3826,11 @@ value: # @value ld.d $a1, $sp, 528 # 8-byte Folded Reload andn $a1, $a1, $a4 or $a1, $a1, $s6 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3843,11 +3838,11 @@ value: # @value ld.d $a1, $sp, 520 # 8-byte Folded Reload andn $a1, $a1, $a4 or $a1, $a1, $s7 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3855,11 +3850,11 @@ value: # @value ld.d $a1, $sp, 512 # 8-byte Folded Reload andn $a1, $a1, $a4 or $a1, $a1, $s8 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3867,11 +3862,11 @@ value: # @value ld.d $a1, $sp, 504 # 8-byte Folded Reload andn $a1, $a1, $a4 or $a1, $a1, $ra - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3879,11 +3874,11 @@ value: # @value ld.d $a1, $sp, 496 # 8-byte Folded Reload andn $a1, $a1, $a4 or $a1, $a1, $t7 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3891,11 +3886,11 @@ value: # @value ld.d $a1, $sp, 488 # 8-byte Folded Reload andn $a1, $a1, $a4 or $a1, $a1, $t8 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3903,11 +3898,11 @@ value: # @value ld.d $a1, $sp, 480 # 8-byte Folded Reload andn $a1, $a1, $a4 or $a1, $a1, $a5 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3915,11 +3910,11 @@ value: # @value ld.d $a1, $sp, 472 # 8-byte Folded Reload andn $a1, $a1, $a4 or $a1, $a1, $a6 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3927,11 +3922,11 @@ value: # @value ld.d $a1, $sp, 464 # 8-byte Folded Reload andn $a1, $a1, $a4 or $a1, $a1, $t1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3939,11 +3934,11 @@ value: # @value ld.d $a1, $sp, 456 # 8-byte Folded Reload andn $a1, $a1, $a4 or $a1, $a1, $t2 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 sltui $a1, $a1, 1 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 @@ -3951,660 +3946,660 @@ value: # @value ld.d $a1, $sp, 1592 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 448 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 440 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 432 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 424 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 416 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 408 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 400 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 392 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 384 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 376 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 368 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 360 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 352 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 344 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 336 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 328 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 320 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 312 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 304 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 296 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 288 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 280 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 272 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 264 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 256 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 248 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 240 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 232 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 224 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 216 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 208 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 200 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 192 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 184 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 176 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 1744 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 168 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 160 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 152 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 144 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 136 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 128 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 120 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 112 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 104 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 96 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 88 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 1736 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 80 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 72 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 64 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 56 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 48 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 40 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 32 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 1728 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 24 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 16 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa6, $fa5, $fa4, $fa6 - ftintrz.w.s $fa6, $fa6 - movfr2gr.s $a2, $fa6 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa7, $fa6, $fa5, $fa7 + ftintrz.w.s $fa7, $fa7 + movfr2gr.s $a2, $fa7 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 ld.d $a1, $sp, 1720 # 8-byte Folded Reload andn $a1, $a1, $a4 sltui $a1, $a1, 1 - movgr2fr.w $fa6, $a0 - ffint.s.w $fa6, $fa6 - fmadd.s $fa5, $fa5, $fa4, $fa6 - ftintrz.w.s $fa5, $fa5 - movfr2gr.s $a2, $fa5 + movgr2fr.w $fa7, $a0 + ffint.s.w $fa7, $fa7 + fmadd.s $fa6, $fa6, $fa5, $fa7 + ftintrz.w.s $fa6, $fa6 + movfr2gr.s $a2, $fa6 masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a2, $a1, $a0 diff --git a/results/MultiSource/Benchmarks/FreeBench/neural/CMakeFiles/neural.dir/neural.s b/results/MultiSource/Benchmarks/FreeBench/neural/CMakeFiles/neural.dir/neural.s index 5a40c155..6b4172b6 100644 --- a/results/MultiSource/Benchmarks/FreeBench/neural/CMakeFiles/neural.dir/neural.s +++ b/results/MultiSource/Benchmarks/FreeBench/neural/CMakeFiles/neural.dir/neural.s @@ -1325,16 +1325,7 @@ run: # @run .Lfunc_end1: .size run, .Lfunc_end1-run # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function runcont -.LCPI2_0: - .dword 0xc069000000000000 # double -200 -.LCPI2_1: - .dword 0x4069000000000000 # double 200 -.LCPI2_2: - .dword 0x3fe6666666666666 # double 0.69999999999999996 - .text - .p2align 5 + .p2align 5 # -- Begin function runcont .type runcont,@function runcont: # @runcont # %bb.0: @@ -1356,10 +1347,10 @@ runcont: # @runcont fst.d $fs3, $sp, 88 # 8-byte Folded Spill fst.d $fs4, $sp, 80 # 8-byte Folded Spill pcalau12i $s3, %pc_hi20(NNTOT) - ld.w $s8, $s3, %pc_lo12(NNTOT) + ld.w $s4, $s3, %pc_lo12(NNTOT) move $fp, $a1 move $s0, $a0 - slli.d $s2, $s8, 2 + slli.d $s2, $s4, 2 move $a0, $s2 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 @@ -1367,13 +1358,14 @@ runcont: # @runcont move $a0, $s2 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - beqz $s1, .LBB2_63 + beqz $s1, .LBB2_61 # %bb.1: - beqz $a0, .LBB2_63 + move $a6, $a0 + beqz $a0, .LBB2_61 # %bb.2: # %.preheader99 - blez $s8, .LBB2_5 + blez $s4, .LBB2_5 # %bb.3: # %.lr.ph.preheader - move $a5, $zero + move $a0, $zero move $a1, $s0 move $a2, $fp move $a3, $s1 @@ -1385,60 +1377,67 @@ runcont: # @runcont movgr2fr.w $fa0, $a4 ffint.s.w $fa0, $fa0 fst.s $fa0, $a3, 0 - ld.w $s8, $s3, %pc_lo12(NNTOT) - addi.d $a5, $a5, 1 + ld.w $s4, $s3, %pc_lo12(NNTOT) + addi.d $a0, $a0, 1 addi.d $a3, $a3, 4 addi.d $a2, $a2, 4 addi.d $a1, $a1, 4 - blt $a5, $s8, .LBB2_4 + blt $a0, $s4, .LBB2_4 .LBB2_5: # %.preheader97.preheader - move $s6, $zero - st.d $zero, $sp, 48 # 8-byte Folded Spill - addi.d $a1, $a0, 16 - st.d $a1, $sp, 32 # 8-byte Folded Spill - addi.d $a1, $s1, 16 - st.d $a1, $sp, 24 # 8-byte Folded Spill - addi.d $a1, $fp, 16 - st.d $a1, $sp, 16 # 8-byte Folded Spill - pcalau12i $s7, %pc_hi20(Tmatrix) + move $s7, $zero + st.d $zero, $sp, 64 # 8-byte Folded Spill + addi.d $a0, $a6, 16 + st.d $a0, $sp, 48 # 8-byte Folded Spill + addi.d $a0, $s1, 16 + st.d $a0, $sp, 24 # 8-byte Folded Spill + addi.d $a0, $fp, 16 + st.d $a0, $sp, 16 # 8-byte Folded Spill + pcalau12i $s8, %pc_hi20(Tmatrix) movgr2fr.d $fs0, $zero movgr2fr.w $fs1, $zero - pcalau12i $a1, %pc_hi20(.LCPI2_0) - fld.d $fs2, $a1, %pc_lo12(.LCPI2_0) - pcalau12i $a1, %pc_hi20(.LCPI2_1) - fld.d $fs3, $a1, %pc_lo12(.LCPI2_1) - vrepli.b $vr6, 0 - st.d $a0, $sp, 40 # 8-byte Folded Spill - vst $vr6, $sp, 64 # 16-byte Folded Spill + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a1, $a0, -1018 + movgr2fr.d $fs2, $a1 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, 419430 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs4, $a0 + vrepli.b $vr0, 0 + vst $vr0, $sp, 32 # 16-byte Folded Spill + st.d $a6, $sp, 56 # 8-byte Folded Spill b .LBB2_7 .p2align 4, , 16 .LBB2_6: # %hamming.exit.thread # in Loop: Header=BB2_7 Depth=1 - ori $a1, $zero, 499 - ld.d $a2, $sp, 56 # 8-byte Folded Reload - bgeu $a2, $a1, .LBB2_60 + ori $a0, $zero, 499 + ld.d $a1, $sp, 72 # 8-byte Folded Reload + bgeu $a1, $a0, .LBB2_58 .LBB2_7: # %.preheader97 # =>This Loop Header: Depth=1 # Child Loop BB2_10 Depth 2 # Child Loop BB2_12 Depth 3 - # Child Loop BB2_19 Depth 2 - # Child Loop BB2_22 Depth 3 - # Child Loop BB2_25 Depth 4 - # Child Loop BB2_36 Depth 2 - # Child Loop BB2_39 Depth 2 - # Child Loop BB2_43 Depth 2 - # Child Loop BB2_46 Depth 2 - # Child Loop BB2_49 Depth 2 - # Child Loop BB2_56 Depth 2 - # Child Loop BB2_59 Depth 2 + # Child Loop BB2_41 Depth 2 + # Child Loop BB2_44 Depth 3 + # Child Loop BB2_47 Depth 4 + # Child Loop BB2_20 Depth 2 + # Child Loop BB2_23 Depth 2 + # Child Loop BB2_27 Depth 2 + # Child Loop BB2_30 Depth 2 + # Child Loop BB2_33 Depth 2 + # Child Loop BB2_54 Depth 2 + # Child Loop BB2_57 Depth 2 move $s2, $fp - ld.d $a0, $sp, 48 # 8-byte Folded Reload - st.d $a0, $sp, 56 # 8-byte Folded Spill - blez $s8, .LBB2_14 + ld.d $a0, $sp, 64 # 8-byte Folded Reload + st.d $a0, $sp, 72 # 8-byte Folded Spill + blez $s4, .LBB2_14 # %bb.8: # %.preheader93.lr.ph # in Loop: Header=BB2_7 Depth=1 - ld.d $fp, $s7, %pc_lo12(Tmatrix) - move $s4, $zero + ld.d $fp, $s8, %pc_lo12(Tmatrix) + move $s5, $zero b .LBB2_10 .p2align 4, , 16 .LBB2_9: # %._crit_edge @@ -1454,21 +1453,21 @@ runcont: # @runcont fsub.d $fa1, $fa2, $fa0 fadd.d $fa0, $fa0, $fa2 fdiv.d $fa0, $fa1, $fa0 - ld.w $s8, $s3, %pc_lo12(NNTOT) + ld.w $s4, $s3, %pc_lo12(NNTOT) fcvt.s.d $fa0, $fa0 - slli.d $a0, $s4, 2 - addi.d $s4, $s4, 1 + slli.d $a0, $s5, 2 + addi.d $s5, $s5, 1 fstx.s $fa0, $s1, $a0 - bge $s4, $s8, .LBB2_14 + bge $s5, $s4, .LBB2_14 .LBB2_10: # %.preheader93 # Parent Loop BB2_7 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB2_12 Depth 3 fmov.d $fa0, $fs0 - blez $s8, .LBB2_9 + blez $s4, .LBB2_9 # %bb.11: # %.lr.ph103 # in Loop: Header=BB2_10 Depth=2 - slli.d $a0, $s4, 3 + slli.d $a0, $s5, 3 ldx.d $a0, $fp, $a0 move $a1, $s0 fmov.s $fa0, $fs1 @@ -1482,9 +1481,9 @@ runcont: # @runcont ffint.s.w $fa2, $fa2 fmadd.s $fa0, $fa1, $fa2, $fa0 addi.d $a1, $a1, 4 - addi.d $s8, $s8, -1 + addi.d $s4, $s4, -1 addi.d $a0, $a0, 4 - bnez $s8, .LBB2_12 + bnez $s4, .LBB2_12 # %bb.13: # %._crit_edge.loopexit # in Loop: Header=BB2_10 Depth=2 fcvt.d.s $fa0, $fa0 @@ -1492,163 +1491,60 @@ runcont: # @runcont .p2align 4, , 16 .LBB2_14: # %.preheader96 # in Loop: Header=BB2_7 Depth=1 - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload addi.w $a0, $a0, 1 - st.d $a0, $sp, 48 # 8-byte Folded Spill - beqz $s6, .LBB2_17 -# %bb.15: # in Loop: Header=BB2_7 Depth=1 - ori $s6, $zero, 1 + st.d $a0, $sp, 64 # 8-byte Folded Spill + beqz $s7, .LBB2_39 +.LBB2_15: # in Loop: Header=BB2_7 Depth=1 + ori $s7, $zero, 1 .LBB2_16: # %.preheader95 # in Loop: Header=BB2_7 Depth=1 - vld $vr6, $sp, 64 # 16-byte Folded Reload - b .LBB2_32 - .p2align 4, , 16 -.LBB2_17: # %.preheader92.lr.ph - # in Loop: Header=BB2_7 Depth=1 - vld $vr6, $sp, 64 # 16-byte Folded Reload - blez $s8, .LBB2_60 -# %bb.18: # %.preheader92.preheader - # in Loop: Header=BB2_7 Depth=1 - move $a0, $zero - .p2align 4, , 16 -.LBB2_19: # %.preheader92 - # Parent Loop BB2_7 Depth=1 - # => This Loop Header: Depth=2 - # Child Loop BB2_22 Depth 3 - # Child Loop BB2_25 Depth 4 - blez $s8, .LBB2_31 -# %bb.20: # %.lr.ph112 - # in Loop: Header=BB2_19 Depth=2 - move $fp, $a0 - ld.d $s5, $s7, %pc_lo12(Tmatrix) - move $s4, $zero - ori $s6, $zero, 1 - b .LBB2_22 - .p2align 4, , 16 -.LBB2_21: # in Loop: Header=BB2_22 Depth=3 - ld.w $s8, $s3, %pc_lo12(NNTOT) - addi.d $s4, $s4, 1 - bge $s4, $s8, .LBB2_29 -.LBB2_22: # Parent Loop BB2_7 Depth=1 - # Parent Loop BB2_19 Depth=2 - # => This Loop Header: Depth=3 - # Child Loop BB2_25 Depth 4 - slli.d $a0, $s4, 2 - fldx.s $fa0, $s1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI2_2) - fld.d $fs4, $a0, %pc_lo12(.LCPI2_2) - fabs.s $fa0, $fa0 - fcvt.d.s $fa0, $fa0 - fcmp.cule.d $fcc0, $fs4, $fa0 - bcnez $fcc0, .LBB2_21 -# %bb.23: # %.preheader - # in Loop: Header=BB2_22 Depth=3 - fmov.d $fa0, $fs0 - blez $s8, .LBB2_27 -# %bb.24: # %.lr.ph107 - # in Loop: Header=BB2_22 Depth=3 - slli.d $a0, $s4, 3 - ldx.d $a0, $s5, $a0 - move $a1, $s1 - fmov.s $fa0, $fs1 - .p2align 4, , 16 -.LBB2_25: # Parent Loop BB2_7 Depth=1 - # Parent Loop BB2_19 Depth=2 - # Parent Loop BB2_22 Depth=3 - # => This Inner Loop Header: Depth=4 - fld.s $fa1, $a0, 0 - fld.s $fa2, $a1, 0 - fmadd.s $fa0, $fa1, $fa2, $fa0 - addi.d $a1, $a1, 4 - addi.d $s8, $s8, -1 - addi.d $a0, $a0, 4 - bnez $s8, .LBB2_25 -# %bb.26: # %._crit_edge108.loopexit - # in Loop: Header=BB2_22 Depth=3 - fcvt.d.s $fa0, $fa0 -.LBB2_27: # %._crit_edge108 - # in Loop: Header=BB2_22 Depth=3 - alsl.d $s8, $s4, $s1, 2 - fneg.d $fa1, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 - fsel $fa0, $fa1, $fs3, $fcc0 - fcmp.clt.d $fcc0, $fa0, $fs2 - fsel $fa0, $fa0, $fs2, $fcc0 - pcaddu18i $ra, %call36(exp) - jirl $ra, $ra, 0 - vldi $vr2, -912 - fsub.d $fa1, $fa2, $fa0 - fadd.d $fa0, $fa0, $fa2 - fdiv.d $fa0, $fa1, $fa0 - fcvt.s.d $fa0, $fa0 - fcvt.d.s $fa1, $fa0 - fcmp.cule.d $fcc0, $fs4, $fa1 - fst.s $fa0, $s8, 0 - bcnez $fcc0, .LBB2_21 -# %bb.28: # in Loop: Header=BB2_22 Depth=3 - move $s6, $zero - b .LBB2_21 - .p2align 4, , 16 -.LBB2_29: # %._crit_edge113 - # in Loop: Header=BB2_19 Depth=2 - bnez $s6, .LBB2_16 -# %bb.30: # %._crit_edge113 - # in Loop: Header=BB2_19 Depth=2 - addi.w $a0, $fp, 1 - ori $a1, $zero, 49 - vld $vr6, $sp, 64 # 16-byte Folded Reload - bltu $fp, $a1, .LBB2_19 - b .LBB2_32 -.LBB2_31: # in Loop: Header=BB2_7 Depth=1 - ori $s6, $zero, 1 - .p2align 4, , 16 -.LBB2_32: # %.preheader95 - # in Loop: Header=BB2_7 Depth=1 - blez $s8, .LBB2_60 -# %bb.33: # %.lr.ph119.preheader + blez $s4, .LBB2_58 +# %bb.17: # %.lr.ph119.preheader # in Loop: Header=BB2_7 Depth=1 move $fp, $s2 - bstrpick.d $a0, $s8, 30, 3 + bstrpick.d $a0, $s4, 30, 3 ori $a1, $zero, 8 - bgeu $s8, $a1, .LBB2_35 -# %bb.34: # in Loop: Header=BB2_7 Depth=1 + bgeu $s4, $a1, .LBB2_19 +# %bb.18: # in Loop: Header=BB2_7 Depth=1 move $a1, $zero - ld.d $a5, $sp, 40 # 8-byte Folded Reload - b .LBB2_38 + ld.d $a5, $sp, 56 # 8-byte Folded Reload + b .LBB2_22 .p2align 4, , 16 -.LBB2_35: # %vector.ph27 +.LBB2_19: # %vector.ph27 # in Loop: Header=BB2_7 Depth=1 slli.d $a1, $a0, 3 ld.d $a2, $sp, 24 # 8-byte Folded Reload - ld.d $a3, $sp, 32 # 8-byte Folded Reload + ld.d $a3, $sp, 48 # 8-byte Folded Reload move $a4, $a1 + vld $vr2, $sp, 32 # 16-byte Folded Reload .p2align 4, , 16 -.LBB2_36: # %vector.body30 +.LBB2_20: # %vector.body30 # Parent Loop BB2_7 Depth=1 # => This Inner Loop Header: Depth=2 vld $vr0, $a2, -16 vld $vr1, $a2, 0 - vfcmp.cule.s $vr0, $vr0, $vr6 + vfcmp.cule.s $vr0, $vr0, $vr2 vbitseti.w $vr0, $vr0, 0 - vfcmp.cule.s $vr1, $vr1, $vr6 + vfcmp.cule.s $vr1, $vr1, $vr2 vbitseti.w $vr1, $vr1, 0 vst $vr0, $a3, -16 vst $vr1, $a3, 0 addi.d $a4, $a4, -8 addi.d $a3, $a3, 32 addi.d $a2, $a2, 32 - bnez $a4, .LBB2_36 -# %bb.37: # %middle.block35 + bnez $a4, .LBB2_20 +# %bb.21: # %middle.block35 # in Loop: Header=BB2_7 Depth=1 - ld.d $a5, $sp, 40 # 8-byte Folded Reload - beq $a1, $s8, .LBB2_40 -.LBB2_38: # %.lr.ph119.preheader39 + ld.d $a5, $sp, 56 # 8-byte Folded Reload + beq $a1, $s4, .LBB2_24 +.LBB2_22: # %.lr.ph119.preheader39 # in Loop: Header=BB2_7 Depth=1 alsl.d $a2, $a1, $a5, 2 alsl.d $a3, $a1, $s1, 2 - sub.d $a1, $s8, $a1 + sub.d $a1, $s4, $a1 .p2align 4, , 16 -.LBB2_39: # %.lr.ph119 +.LBB2_23: # %.lr.ph119 # Parent Loop BB2_7 Depth=1 # => This Inner Loop Header: Depth=2 fld.s $fa0, $a3, 0 @@ -1660,26 +1556,26 @@ runcont: # @runcont addi.d $a2, $a2, 4 addi.d $a1, $a1, -1 addi.d $a3, $a3, 4 - bnez $a1, .LBB2_39 -.LBB2_40: # %.lr.ph.preheader.i + bnez $a1, .LBB2_23 +.LBB2_24: # %.lr.ph.preheader.i # in Loop: Header=BB2_7 Depth=1 ori $a1, $zero, 8 - bgeu $s8, $a1, .LBB2_42 -# %bb.41: # in Loop: Header=BB2_7 Depth=1 + bgeu $s4, $a1, .LBB2_26 +# %bb.25: # in Loop: Header=BB2_7 Depth=1 move $a0, $zero move $a1, $zero - b .LBB2_45 + b .LBB2_29 .p2align 4, , 16 -.LBB2_42: # %vector.ph11 +.LBB2_26: # %vector.ph11 # in Loop: Header=BB2_7 Depth=1 slli.d $a0, $a0, 3 ld.d $a1, $sp, 16 # 8-byte Folded Reload - ld.d $a2, $sp, 32 # 8-byte Folded Reload + ld.d $a2, $sp, 48 # 8-byte Folded Reload move $a3, $a0 - vori.b $vr0, $vr6, 0 - vori.b $vr1, $vr6, 0 + vld $vr1, $sp, 32 # 16-byte Folded Reload + vori.b $vr0, $vr1, 0 .p2align 4, , 16 -.LBB2_43: # %vector.body14 +.LBB2_27: # %vector.body14 # Parent Loop BB2_7 Depth=1 # => This Inner Loop Header: Depth=2 vld $vr2, $a1, -16 @@ -1695,21 +1591,21 @@ runcont: # @runcont addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 addi.d $a1, $a1, 32 - bnez $a3, .LBB2_43 -# %bb.44: # %middle.block22 + bnez $a3, .LBB2_27 +# %bb.28: # %middle.block22 # in Loop: Header=BB2_7 Depth=1 vadd.w $vr0, $vr1, $vr0 vhaddw.d.w $vr0, $vr0, $vr0 vhaddw.q.d $vr0, $vr0, $vr0 vpickve2gr.d $a1, $vr0, 0 - beq $a0, $s8, .LBB2_47 -.LBB2_45: # %.lr.ph.i.preheader + beq $a0, $s4, .LBB2_31 +.LBB2_29: # %.lr.ph.i.preheader # in Loop: Header=BB2_7 Depth=1 - sub.d $a2, $s8, $a0 + sub.d $a2, $s4, $a0 alsl.d $a3, $a0, $a5, 2 alsl.d $a0, $a0, $fp, 2 .p2align 4, , 16 -.LBB2_46: # %.lr.ph.i +.LBB2_30: # %.lr.ph.i # Parent Loop BB2_7 Depth=1 # => This Inner Loop Header: Depth=2 ld.w $a4, $a0, 0 @@ -1720,57 +1616,150 @@ runcont: # @runcont addi.d $a2, $a2, -1 addi.d $a3, $a3, 4 addi.d $a0, $a0, 4 - bnez $a2, .LBB2_46 -.LBB2_47: # %hamming.exit + bnez $a2, .LBB2_30 +.LBB2_31: # %hamming.exit # in Loop: Header=BB2_7 Depth=1 addi.w $a0, $a1, 0 - beqz $a0, .LBB2_60 -# %bb.48: # %.lr.ph123.preheader + beqz $a0, .LBB2_58 +# %bb.32: # %.lr.ph123.preheader # in Loop: Header=BB2_7 Depth=1 - move $a6, $zero - move $a1, $s8 + move $a0, $zero + move $a1, $s4 move $a2, $fp - ld.d $a0, $sp, 40 # 8-byte Folded Reload - move $a3, $a0 + ld.d $a6, $sp, 56 # 8-byte Folded Reload + move $a3, $a6 .p2align 4, , 16 -.LBB2_49: # %.lr.ph123 +.LBB2_33: # %.lr.ph123 # Parent Loop BB2_7 Depth=1 # => This Inner Loop Header: Depth=2 ld.w $a4, $a2, 0 ld.w $a5, $a3, 0 - bne $a4, $a5, .LBB2_52 -# %bb.50: # in Loop: Header=BB2_49 Depth=2 - addi.w $a6, $a6, 1 + bne $a4, $a5, .LBB2_36 +# %bb.34: # in Loop: Header=BB2_33 Depth=2 + addi.w $a0, $a0, 1 addi.d $a3, $a3, 4 addi.d $a1, $a1, -1 addi.d $a2, $a2, 4 - bnez $a1, .LBB2_49 -# %bb.51: # in Loop: Header=BB2_7 Depth=1 - move $a6, $s8 -.LBB2_52: # %.critedge + bnez $a1, .LBB2_33 +# %bb.35: # in Loop: Header=BB2_7 Depth=1 + move $a0, $s4 +.LBB2_36: # %.critedge # in Loop: Header=BB2_7 Depth=1 - slli.d $a2, $a6, 2 - ldx.w $a1, $a0, $a2 - stx.w $a1, $fp, $a2 - ld.w $s8, $s3, %pc_lo12(NNTOT) - blez $s8, .LBB2_6 -# %bb.53: # %.lr.ph128.preheader + slli.d $a0, $a0, 2 + ldx.w $a1, $a6, $a0 + stx.w $a1, $fp, $a0 + ld.w $s4, $s3, %pc_lo12(NNTOT) + blez $s4, .LBB2_6 +# %bb.37: # %.lr.ph128.preheader # in Loop: Header=BB2_7 Depth=1 - ori $a1, $zero, 8 - bgeu $s8, $a1, .LBB2_55 -# %bb.54: # in Loop: Header=BB2_7 Depth=1 - move $a4, $zero - b .LBB2_58 + ori $a0, $zero, 8 + bgeu $s4, $a0, .LBB2_53 +# %bb.38: # in Loop: Header=BB2_7 Depth=1 + move $a0, $zero + b .LBB2_56 + .p2align 4, , 16 +.LBB2_39: # %.preheader92.lr.ph + # in Loop: Header=BB2_7 Depth=1 + blez $s4, .LBB2_58 +# %bb.40: # %.preheader92.preheader + # in Loop: Header=BB2_7 Depth=1 + move $a0, $zero + .p2align 4, , 16 +.LBB2_41: # %.preheader92 + # Parent Loop BB2_7 Depth=1 + # => This Loop Header: Depth=2 + # Child Loop BB2_44 Depth 3 + # Child Loop BB2_47 Depth 4 + blez $s4, .LBB2_15 +# %bb.42: # %.lr.ph112 + # in Loop: Header=BB2_41 Depth=2 + move $fp, $a0 + ld.d $s6, $s8, %pc_lo12(Tmatrix) + move $s5, $zero + ori $s7, $zero, 1 + b .LBB2_44 + .p2align 4, , 16 +.LBB2_43: # in Loop: Header=BB2_44 Depth=3 + ld.w $s4, $s3, %pc_lo12(NNTOT) + addi.d $s5, $s5, 1 + bge $s5, $s4, .LBB2_51 +.LBB2_44: # Parent Loop BB2_7 Depth=1 + # Parent Loop BB2_41 Depth=2 + # => This Loop Header: Depth=3 + # Child Loop BB2_47 Depth 4 + slli.d $a0, $s5, 2 + fldx.s $fa0, $s1, $a0 + fabs.s $fa0, $fa0 + fcvt.d.s $fa0, $fa0 + fcmp.cule.d $fcc0, $fs4, $fa0 + bcnez $fcc0, .LBB2_43 +# %bb.45: # %.preheader + # in Loop: Header=BB2_44 Depth=3 + fmov.d $fa0, $fs0 + blez $s4, .LBB2_49 +# %bb.46: # %.lr.ph107 + # in Loop: Header=BB2_44 Depth=3 + slli.d $a0, $s5, 3 + ldx.d $a0, $s6, $a0 + move $a1, $s1 + fmov.s $fa0, $fs1 + .p2align 4, , 16 +.LBB2_47: # Parent Loop BB2_7 Depth=1 + # Parent Loop BB2_41 Depth=2 + # Parent Loop BB2_44 Depth=3 + # => This Inner Loop Header: Depth=4 + fld.s $fa1, $a0, 0 + fld.s $fa2, $a1, 0 + fmadd.s $fa0, $fa1, $fa2, $fa0 + addi.d $a1, $a1, 4 + addi.d $s4, $s4, -1 + addi.d $a0, $a0, 4 + bnez $s4, .LBB2_47 +# %bb.48: # %._crit_edge108.loopexit + # in Loop: Header=BB2_44 Depth=3 + fcvt.d.s $fa0, $fa0 +.LBB2_49: # %._crit_edge108 + # in Loop: Header=BB2_44 Depth=3 + alsl.d $s4, $s5, $s1, 2 + fneg.d $fa1, $fa0 + fcmp.clt.d $fcc0, $fa0, $fs2 + fsel $fa0, $fa1, $fs3, $fcc0 + fcmp.clt.d $fcc0, $fa0, $fs2 + fsel $fa0, $fa0, $fs2, $fcc0 + pcaddu18i $ra, %call36(exp) + jirl $ra, $ra, 0 + vldi $vr2, -912 + fsub.d $fa1, $fa2, $fa0 + fadd.d $fa0, $fa0, $fa2 + fdiv.d $fa0, $fa1, $fa0 + fcvt.s.d $fa0, $fa0 + fcvt.d.s $fa1, $fa0 + fcmp.cule.d $fcc0, $fs4, $fa1 + fst.s $fa0, $s4, 0 + bcnez $fcc0, .LBB2_43 +# %bb.50: # in Loop: Header=BB2_44 Depth=3 + move $s7, $zero + b .LBB2_43 .p2align 4, , 16 -.LBB2_55: # %vector.ph +.LBB2_51: # %._crit_edge113 + # in Loop: Header=BB2_41 Depth=2 + bnez $s7, .LBB2_16 +# %bb.52: # %._crit_edge113 + # in Loop: Header=BB2_41 Depth=2 + addi.w $a0, $fp, 1 + ori $a1, $zero, 49 + bltu $fp, $a1, .LBB2_41 + b .LBB2_16 + .p2align 4, , 16 +.LBB2_53: # %vector.ph # in Loop: Header=BB2_7 Depth=1 - bstrpick.d $a1, $s8, 30, 3 - slli.d $a4, $a1, 3 + bstrpick.d $a0, $s4, 30, 3 + slli.d $a0, $a0, 3 ld.d $a1, $sp, 16 # 8-byte Folded Reload ld.d $a2, $sp, 24 # 8-byte Folded Reload - move $a3, $a4 + move $a3, $a0 .p2align 4, , 16 -.LBB2_56: # %vector.body +.LBB2_54: # %vector.body # Parent Loop BB2_7 Depth=1 # => This Inner Loop Header: Depth=2 vld $vr0, $a1, -16 @@ -1782,17 +1771,17 @@ runcont: # @runcont addi.d $a3, $a3, -8 addi.d $a2, $a2, 32 addi.d $a1, $a1, 32 - bnez $a3, .LBB2_56 -# %bb.57: # %middle.block + bnez $a3, .LBB2_54 +# %bb.55: # %middle.block # in Loop: Header=BB2_7 Depth=1 - beq $a4, $s8, .LBB2_6 -.LBB2_58: # %.lr.ph128.preheader38 + beq $a0, $s4, .LBB2_6 +.LBB2_56: # %.lr.ph128.preheader38 # in Loop: Header=BB2_7 Depth=1 - alsl.d $a1, $a4, $s1, 2 - alsl.d $a2, $a4, $fp, 2 - sub.d $a4, $s8, $a4 + alsl.d $a1, $a0, $s1, 2 + alsl.d $a2, $a0, $fp, 2 + sub.d $a0, $s4, $a0 .p2align 4, , 16 -.LBB2_59: # %.lr.ph128 +.LBB2_57: # %.lr.ph128 # Parent Loop BB2_7 Depth=1 # => This Inner Loop Header: Depth=2 ld.w $a3, $a2, 0 @@ -1800,15 +1789,15 @@ runcont: # @runcont ffint.s.w $fa0, $fa0 fst.s $fa0, $a1, 0 addi.d $a1, $a1, 4 - addi.d $a4, $a4, -1 + addi.d $a0, $a0, -1 addi.d $a2, $a2, 4 - bnez $a4, .LBB2_59 + bnez $a0, .LBB2_57 b .LBB2_6 -.LBB2_60: # %.loopexit +.LBB2_58: # %.loopexit ori $a0, $zero, 500 - ld.d $a1, $sp, 48 # 8-byte Folded Reload - bne $a1, $a0, .LBB2_62 -# %bb.61: + ld.d $a1, $sp, 64 # 8-byte Folded Reload + bne $a1, $a0, .LBB2_60 +# %bb.59: pcalau12i $a0, %pc_hi20(.L.str.17) addi.d $a0, $a0, %pc_lo12(.L.str.17) fld.d $fs4, $sp, 80 # 8-byte Folded Reload @@ -1830,7 +1819,7 @@ runcont: # @runcont addi.d $sp, $sp, 208 pcaddu18i $t8, %call36(printf) jr $t8 -.LBB2_62: +.LBB2_60: fld.d $fs4, $sp, 80 # 8-byte Folded Reload fld.d $fs3, $sp, 88 # 8-byte Folded Reload fld.d $fs2, $sp, 96 # 8-byte Folded Reload @@ -1849,7 +1838,7 @@ runcont: # @runcont ld.d $ra, $sp, 200 # 8-byte Folded Reload addi.d $sp, $sp, 208 ret -.LBB2_63: +.LBB2_61: pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a3, $a0, 0 diff --git a/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/fftsg.s b/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/fftsg.s index 0a03d32f..56d0272b 100644 --- a/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/fftsg.s +++ b/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/fftsg.s @@ -60,12 +60,7 @@ cdft: # @cdft .Lfunc_end0: .size cdft, .Lfunc_end0-cdft # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function makewt -.LCPI1_0: - .dword 0x3fe921fb54442d18 # double 0.78539816339744828 - .text - .globl makewt + .globl makewt # -- Begin function makewt .p2align 5 .type makewt,@function makewt: # @makewt @@ -92,13 +87,16 @@ makewt: # @makewt st.w $a0, $a1, 4 blt $s0, $a2, .LBB1_12 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) bstrpick.d $s1, $s0, 31, 1 - movgr2fr.d $fa1, $s1 - ffint.d.l $fa1, $fa1 - fdiv.d $fs2, $fa0, $fa1 - fmul.d $fa0, $fs2, $fa1 + movgr2fr.d $fa0, $s1 + ffint.d.l $fa0, $fa0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 + fdiv.d $fs2, $fa1, $fa0 + fmul.d $fa0, $fs2, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 fmov.d $fs0, $fa0 @@ -658,12 +656,7 @@ cftbsub: # @cftbsub .Lfunc_end3: .size cftbsub, .Lfunc_end3-cftbsub # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function rdft -.LCPI4_0: - .dword 0x3fe921fb54442d18 # double 0.78539816339744828 - .text - .globl rdft + .globl rdft # -- Begin function rdft .p2align 5 .type rdft,@function rdft: # @rdft @@ -709,13 +702,16 @@ rdft: # @rdft # %bb.4: addi.w $a0, $s3, 0 alsl.d $s1, $a0, $s7, 3 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI4_0) bstrpick.d $s6, $s5, 31, 1 - movgr2fr.d $fa1, $s6 - ffint.d.l $fa1, $fa1 - fdiv.d $fs1, $fa0, $fa1 - fmul.d $fa0, $fs1, $fa1 + movgr2fr.d $fa0, $s6 + ffint.d.l $fa0, $fa0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 + fdiv.d $fs1, $fa1, $fa0 + fmul.d $fa0, $fs1, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 fst.d $fa0, $s1, 0 @@ -962,12 +958,7 @@ rdft: # @rdft .Lfunc_end4: .size rdft, .Lfunc_end4-rdft # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function makect -.LCPI5_0: - .dword 0x3fe921fb54442d18 # double 0.78539816339744828 - .text - .globl makect + .globl makect # -- Begin function makect .p2align 5 .type makect,@function makect: # @makect @@ -986,13 +977,16 @@ makect: # @makect blt $s0, $a0, .LBB5_4 # %bb.1: move $fp, $a2 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) bstrpick.d $s1, $s0, 31, 1 - movgr2fr.d $fa1, $s1 - ffint.d.l $fa1, $fa1 - fdiv.d $fs1, $fa0, $fa1 - fmul.d $fa0, $fs1, $fa1 + movgr2fr.d $fa0, $s1 + ffint.d.l $fa0, $fa0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 + fdiv.d $fs1, $fa1, $fa0 + fmul.d $fa0, $fs1, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 fst.d $fa0, $fp, 0 @@ -1165,12 +1159,7 @@ rftbsub: # @rftbsub .Lfunc_end7: .size rftbsub, .Lfunc_end7-rftbsub # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ddct -.LCPI8_0: - .dword 0x3fe921fb54442d18 # double 0.78539816339744828 - .text - .globl ddct + .globl ddct # -- Begin function ddct .p2align 5 .type ddct,@function ddct: # @ddct @@ -1215,13 +1204,16 @@ ddct: # @ddct blt $fp, $a0, .LBB8_8 # %bb.4: alsl.d $s7, $s4, $s2, 3 - pcalau12i $a0, %pc_hi20(.LCPI8_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI8_0) bstrpick.d $s5, $fp, 31, 1 - movgr2fr.d $fa1, $s5 - ffint.d.l $fa1, $fa1 - fdiv.d $fs1, $fa0, $fa1 - fmul.d $fa0, $fs1, $fa1 + movgr2fr.d $fa0, $s5 + ffint.d.l $fa0, $fa0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 + fdiv.d $fs1, $fa1, $fa0 + fmul.d $fa0, $fs1, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 fst.d $fa0, $s7, 0 @@ -1603,12 +1595,7 @@ dctsub: # @dctsub .Lfunc_end9: .size dctsub, .Lfunc_end9-dctsub # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ddst -.LCPI10_0: - .dword 0x3fe921fb54442d18 # double 0.78539816339744828 - .text - .globl ddst + .globl ddst # -- Begin function ddst .p2align 5 .type ddst,@function ddst: # @ddst @@ -1653,13 +1640,16 @@ ddst: # @ddst blt $fp, $a0, .LBB10_8 # %bb.4: alsl.d $s7, $s4, $s2, 3 - pcalau12i $a0, %pc_hi20(.LCPI10_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI10_0) bstrpick.d $s5, $fp, 31, 1 - movgr2fr.d $fa1, $s5 - ffint.d.l $fa1, $fa1 - fdiv.d $fs1, $fa0, $fa1 - fmul.d $fa0, $fs1, $fa1 + movgr2fr.d $fa0, $s5 + ffint.d.l $fa0, $fa0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 + fdiv.d $fs1, $fa1, $fa0 + fmul.d $fa0, $fs1, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 fst.d $fa0, $s7, 0 @@ -2044,12 +2034,7 @@ dstsub: # @dstsub .Lfunc_end11: .size dstsub, .Lfunc_end11-dstsub # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function dfct -.LCPI12_0: - .dword 0x3fe921fb54442d18 # double 0.78539816339744828 - .text - .globl dfct + .globl dfct # -- Begin function dfct .p2align 5 .type dfct,@function dfct: # @dfct @@ -2098,13 +2083,16 @@ dfct: # @dfct blt $s5, $a0, .LBB12_8 # %bb.4: alsl.d $s6, $s7, $s1, 3 - pcalau12i $a0, %pc_hi20(.LCPI12_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI12_0) bstrpick.d $s2, $s5, 31, 1 - movgr2fr.d $fa1, $s2 - ffint.d.l $fa1, $fa1 - fdiv.d $fs1, $fa0, $fa1 - fmul.d $fa0, $fs1, $fa1 + movgr2fr.d $fa0, $s2 + ffint.d.l $fa0, $fa0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 + fdiv.d $fs1, $fa1, $fa0 + fmul.d $fa0, $fs1, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 ld.d $ra, $sp, 96 # 8-byte Folded Reload @@ -2897,12 +2885,7 @@ dfct: # @dfct .Lfunc_end12: .size dfct, .Lfunc_end12-dfct # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function dfst -.LCPI13_0: - .dword 0x3fe921fb54442d18 # double 0.78539816339744828 - .text - .globl dfst + .globl dfst # -- Begin function dfst .p2align 5 .type dfst,@function dfst: # @dfst @@ -2950,13 +2933,16 @@ dfst: # @dfst blt $s7, $a0, .LBB13_8 # %bb.4: alsl.d $s4, $s6, $s1, 3 - pcalau12i $a0, %pc_hi20(.LCPI13_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI13_0) bstrpick.d $s2, $s7, 31, 1 - movgr2fr.d $fa1, $s2 - ffint.d.l $fa1, $fa1 - fdiv.d $fs1, $fa0, $fa1 - fmul.d $fa0, $fs1, $fa1 + movgr2fr.d $fa0, $s2 + ffint.d.l $fa0, $fa0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 + fdiv.d $fs1, $fa1, $fa0 + fmul.d $fa0, $fs1, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 fst.d $fa0, $s4, 0 diff --git a/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/pifft.s b/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/pifft.s index d45cfc38..05152360 100644 --- a/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/pifft.s +++ b/results/MultiSource/Benchmarks/FreeBench/pifft/CMakeFiles/pifft.dir/pifft.s @@ -1,16 +1,6 @@ .file "pifft.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3cb0000000000000 # double 2.2204460492503131E-16 -.LCPI0_1: - .dword 0x4059000000000000 # double 100 -.LCPI0_2: - .dword 0x3fd3333333333333 # double 0.29999999999999999 -.LCPI0_3: - .dword 0x4049000000000000 # double 50 .section .text.unlikely.,"ax",@progbits - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -163,17 +153,22 @@ main: # @main jirl $ra, $ra, 0 alsl.d $a0, $s6, $s6, 1 alsl.d $a0, $a0, $s6, 3 - movgr2fr.w $fa3, $a0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_1) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_2) - ffint.d.w $fa3, $fa3 - fmadd.d $fa0, $fa3, $fa4, $fa0 - fmul.d $fa0, $fa0, $fa1 - fcmp.cule.d $fcc0, $fa2, $fa0 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + lu52i.d $a0, $zero, 971 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa2, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 + fmul.d $fa1, $fa1, $fa0 + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 209715 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fcmp.cule.d $fcc0, $fa2, $fa1 ori $s3, $zero, 1 move $s5, $s7 move $fp, $s2 @@ -190,8 +185,8 @@ main: # @main move $a1, $s0 addi.w $s3, $s3, 1 slli.d $a2, $s0, 3 - fmul.d $fa0, $fa0, $fa1 - fcmp.cule.d $fcc0, $fa2, $fa0 + fmul.d $fa1, $fa1, $fa0 + fcmp.cule.d $fcc0, $fa2, $fa1 alsl.w $s0, $s0, $a2, 1 bcnez $fcc0, .LBB0_10 # %bb.9: # %.lr.ph @@ -904,11 +899,13 @@ main: # @main pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 ld.w $a0, $sp, 236 - pcalau12i $a1, %pc_hi20(.LCPI0_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_3) - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 ld.d $a0, $sp, 64 # 8-byte Folded Reload bstrpick.d $a0, $a0, 31, 0 movgr2fr.d $fa1, $a0 @@ -1419,12 +1416,7 @@ mp_sscanf: # @mp_sscanf .Lfunc_end2: .size mp_sscanf, .Lfunc_end2-mp_sscanf # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function mp_sqrt -.LCPI3_0: - .dword 0x3cb0000000000000 # double 2.2204460492503131E-16 - .text - .globl mp_sqrt + .globl mp_sqrt # -- Begin function mp_sqrt .p2align 5 .type mp_sqrt,@function mp_sqrt: # @mp_sqrt @@ -1455,16 +1447,16 @@ mp_sqrt: # @mp_sqrt st.d $a0, $sp, 40 # 8-byte Folded Spill ld.d $s1, $sp, 184 ld.d $s0, $sp, 176 - movgr2fr.w $fa1, $a1 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI3_0) - ffint.d.w $fa1, $fa1 + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 ori $s7, $zero, 1 + lu52i.d $a0, $zero, 971 + movgr2fr.d $fa1, $a0 vldi $vr2, -912 .p2align 4, , 16 .LBB3_3: # =>This Inner Loop Header: Depth=1 - fmul.d $fa1, $fa1, $fa1 - fmul.d $fa3, $fa1, $fa0 + fmul.d $fa0, $fa0, $fa0 + fmul.d $fa3, $fa0, $fa1 fcmp.cule.d $fcc0, $fa2, $fa3 slli.w $s7, $s7, 1 bcnez $fcc0, .LBB3_5 @@ -2727,12 +2719,7 @@ mp_squh: # @mp_squh .Lfunc_end9: .size mp_squh, .Lfunc_end9-mp_squh # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function mp_inv -.LCPI10_0: - .dword 0x3cb0000000000000 # double 2.2204460492503131E-16 - .text - .globl mp_inv + .globl mp_inv # -- Begin function mp_inv .p2align 5 .type mp_inv,@function mp_inv: # @mp_inv @@ -2762,10 +2749,10 @@ mp_inv: # @mp_inv ld.d $s7, $sp, 168 ld.d $t8, $sp, 160 movgr2fr.w $fa0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI10_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI10_0) ffint.d.w $fa0, $fa0 ori $a6, $zero, 1 + lu52i.d $a0, $zero, 971 + movgr2fr.d $fa1, $a0 vldi $vr2, -912 fmov.d $fa3, $fa0 .p2align 4, , 16 @@ -4658,12 +4645,7 @@ mp_mul_cmul: # @mp_mul_cmul .Lfunc_end26: .size mp_mul_cmul, .Lfunc_end26-mp_mul_cmul # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function mp_mul_d2i -.LCPI27_0: - .dword 0x3cb0000000000000 # double 2.2204460492503131E-16 - .text - .globl mp_mul_d2i + .globl mp_mul_d2i # -- Begin function mp_mul_d2i .p2align 5 .type mp_mul_d2i,@function mp_mul_d2i: # @mp_mul_d2i @@ -4743,18 +4725,18 @@ mp_mul_d2i: # @mp_mul_d2i fmul.d $fa1, $fs2, $fs2 blt $s3, $a1, .LBB27_6 # %bb.3: # %.lr.ph.preheader - pcalau12i $a2, %pc_hi20(.LCPI27_0) - fld.d $fa2, $a2, %pc_lo12(.LCPI27_0) alsl.d $a2, $a1, $s0, 3 movgr2fr.d $fs4, $zero - vldi $vr3, -912 + vldi $vr2, -912 + lu52i.d $a3, $zero, 971 + movgr2fr.d $fa3, $a3 .p2align 4, , 16 .LBB27_4: # %.lr.ph # =>This Inner Loop Header: Depth=1 fld.d $fa4, $a2, 0 - fmadd.d $fs4, $fa3, $fa4, $fs4 - fmul.d $fa3, $fs2, $fa3 - fcmp.clt.d $fcc0, $fa3, $fa2 + fmadd.d $fs4, $fa2, $fa4, $fs4 + fmul.d $fa2, $fs2, $fa2 + fcmp.clt.d $fcc0, $fa2, $fa3 bcnez $fcc0, .LBB27_6 # %bb.5: # %.lr.ph # in Loop: Header=BB27_4 Depth=1 @@ -5570,26 +5552,21 @@ mp_squh_use_in1fft: # @mp_squh_use_in1fft .Lfunc_end31: .size mp_squh_use_in1fft, .Lfunc_end31-mp_squh_use_in1fft # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function mp_get_nfft_init -.LCPI32_0: - .dword 0x3cb0000000000000 # double 2.2204460492503131E-16 - .text - .globl mp_get_nfft_init + .globl mp_get_nfft_init # -- Begin function mp_get_nfft_init .p2align 5 .type mp_get_nfft_init,@function mp_get_nfft_init: # @mp_get_nfft_init # %bb.0: - movgr2fr.w $fa1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI32_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI32_0) - ffint.d.w $fa1, $fa1 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 ori $a0, $zero, 1 + lu52i.d $a2, $zero, 971 + movgr2fr.d $fa1, $a2 vldi $vr2, -912 .p2align 4, , 16 .LBB32_1: # =>This Inner Loop Header: Depth=1 - fmul.d $fa1, $fa1, $fa1 - fmul.d $fa3, $fa1, $fa0 + fmul.d $fa0, $fa0, $fa0 + fmul.d $fa3, $fa0, $fa1 fcmp.cule.d $fcc0, $fa2, $fa3 slli.w $a0, $a0, 1 bcnez $fcc0, .LBB32_3 diff --git a/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/cfrac.s b/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/cfrac.s index ba9132d3..58993a8e 100644 --- a/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/cfrac.s +++ b/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/cfrac.s @@ -1,15 +1,9 @@ .file "cfrac.c" - .section .rodata.cst4,"aM",@progbits,4 + .section .rodata.cst8,"aM",@progbits,8 .p2align 2, 0x0 # -- Begin function pfKnuthEx28 .LCPI0_0: - .word 0x3eaaaaab # float 0.333333343 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: .word 0x3f2aaaab # float 0.666666686 .word 0x3faaaaab # float 1.33333337 -.LCPI0_2: - .dword 0x3fe5555555555555 # double 0.66666666666666663 .text .globl pfKnuthEx28 .p2align 5 @@ -59,8 +53,8 @@ pfKnuthEx28: # @pfKnuthEx28 jirl $ra, $ra, 0 sltui $a0, $a0, 1 slli.d $a0, $a0, 2 - pcalau12i $a1, %pc_hi20(.LCPI0_1) - addi.d $a1, $a1, %pc_lo12(.LCPI0_1) + pcalau12i $a1, %pc_hi20(.LCPI0_0) + addi.d $a1, $a1, %pc_lo12(.LCPI0_0) fldx.s $fs0, $a1, $a0 ld.d $a0, $sp, 0 bnez $a0, .LBB0_12 @@ -98,8 +92,9 @@ pfKnuthEx28: # @pfKnuthEx28 .LBB0_8: beqz $a0, .LBB0_11 .LBB0_9: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 256682 + ori $a0, $a0, 2731 + movgr2fr.w $fs0, $a0 ld.d $a0, $sp, 0 bnez $a0, .LBB0_12 b .LBB0_14 @@ -127,11 +122,14 @@ pfKnuthEx28: # @pfKnuthEx28 ori $a0, $zero, 2 pcaddu18i $ra, %call36(pfKnuthEx28) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_2) fcvt.d.s $fa0, $fa0 - vldi $vr2, -928 - fmul.d $fa0, $fa0, $fa2 + vldi $vr1, -928 + fmul.d $fa0, $fa0, $fa1 + lu12i.w $a0, 349525 + ori $a0, $a0, 1365 + lu32i.d $a0, 349525 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fadd.d $fa0, $fa0, $fa1 fcvt.s.d $fs0, $fa0 ld.d $a0, $sp, 0 @@ -257,12 +255,7 @@ logf_: # @logf_ .Lfunc_end1: .size logf_, .Lfunc_end1-logf_ # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function findk -.LCPI2_0: - .word 0xd8635fa9 # float -9.99999986E+14 - .text - .globl findk + .globl findk # -- Begin function findk .p2align 5 .type findk,@function findk: # @findk @@ -298,9 +291,11 @@ findk: # @findk ld.d $a1, $sp, 64 # 8-byte Folded Reload bltu $a1, $a0, .LBB2_22 # %bb.3: # %.lr.ph61.preheader - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.s $fs2, $a0, %pc_lo12(.LCPI2_0) ori $s3, $zero, 1 + lu12i.w $a0, -162251 + ori $a0, $a0, 4009 + lu32i.d $a0, 0 + movgr2fr.w $fs2, $a0 pcalau12i $s2, %pc_hi20(debug) pcalau12i $a0, %got_pc_hi20(primes) ld.d $a1, $a0, %got_pc_lo12(primes) diff --git a/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/pfloat.s b/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/pfloat.s index 8987df5f..95d8d91e 100644 --- a/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/pfloat.s +++ b/results/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/pfloat.s @@ -1,12 +1,6 @@ .file "pfloat.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function dtop -.LCPI0_0: - .dword 0x40f0000000000000 # double 65536 -.LCPI0_1: - .dword 0x3ef0000000000000 # double 1.52587890625E-5 .text - .globl dtop + .globl dtop # -- Begin function dtop .p2align 5 .type dtop,@function dtop: # @dtop @@ -19,8 +13,7 @@ dtop: # @dtop fst.d $fs0, $sp, 56 # 8-byte Folded Spill fst.d $fs1, $sp, 48 # 8-byte Folded Spill fst.d $fs2, $sp, 40 # 8-byte Folded Spill - fst.d $fs3, $sp, 32 # 8-byte Folded Spill - fmov.d $fs1, $fa0 + fmov.d $fs0, $fa0 ori $a0, $zero, 129 pcaddu18i $ra, %call36(palloc) jirl $ra, $ra, 0 @@ -28,16 +21,16 @@ dtop: # @dtop # %bb.1: move $fp, $a0 move $s0, $zero - movgr2fr.d $fs2, $zero - fcmp.clt.d $fcc0, $fs1, $fs2 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI0_1) - fneg.d $fa0, $fs1 - fsel $fa0, $fs1, $fa0, $fcc0 + movgr2fr.d $fs1, $zero + fcmp.clt.d $fcc0, $fs0, $fs1 + fneg.d $fa0, $fs0 + fsel $fa0, $fs0, $fa0, $fcc0 movcf2gr $a0, $fcc0 st.b $a0, $fp, 6 + lu52i.d $a0, $zero, 1039 + movgr2fr.d $fs0, $a0 + lu52i.d $a0, $zero, 1007 + movgr2fr.d $fs2, $a0 .p2align 4, , 16 .LBB0_2: # =>This Inner Loop Header: Depth=1 add.d $s1, $fp, $s0 @@ -50,17 +43,16 @@ dtop: # @dtop movfr2gr.d $a0, $fa0 st.h $a0, $s1, 8 vld $vr0, $sp, 16 # 16-byte Folded Reload - fmul.d $fa0, $fa0, $fs3 + fmul.d $fa0, $fa0, $fs2 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 - fcmp.cune.d $fcc0, $fa0, $fs2 + fcmp.cune.d $fcc0, $fa0, $fs1 addi.d $s0, $s0, 2 bcnez $fcc0, .LBB0_2 # %bb.3: srli.d $a0, $s0, 1 st.h $a0, $fp, 4 move $a0, $fp - fld.d $fs3, $sp, 32 # 8-byte Folded Reload fld.d $fs2, $sp, 40 # 8-byte Folded Reload fld.d $fs1, $sp, 48 # 8-byte Folded Reload fld.d $fs0, $sp, 56 # 8-byte Folded Reload @@ -73,7 +65,6 @@ dtop: # @dtop jr $t8 .LBB0_4: move $a0, $zero - fld.d $fs3, $sp, 32 # 8-byte Folded Reload fld.d $fs2, $sp, 40 # 8-byte Folded Reload fld.d $fs1, $sp, 48 # 8-byte Folded Reload fld.d $fs0, $sp, 56 # 8-byte Folded Reload @@ -86,12 +77,7 @@ dtop: # @dtop .Lfunc_end0: .size dtop, .Lfunc_end0-dtop # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ptod -.LCPI1_0: - .dword 0x40f0000000000000 # double 65536 - .text - .globl ptod + .globl ptod # -- Begin function ptod .p2align 5 .type ptod,@function ptod: # @ptod @@ -108,20 +94,20 @@ ptod: # @ptod fst.d $fs0, $sp, 8 # 8-byte Folded Spill ld.hu $a1, $a0, 4 slli.d $a1, $a1, 1 - pcalau12i $a2, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI1_0) - addi.d $a2, $a1, 6 + addi.d $a1, $a1, 6 movgr2fr.d $fs0, $zero - ori $a1, $zero, 10 + lu52i.d $a2, $zero, 1039 + movgr2fr.d $fa0, $a2 + ori $a2, $zero, 10 .p2align 4, , 16 .LBB1_3: # =>This Inner Loop Header: Depth=1 - ldx.hu $a3, $a0, $a2 + ldx.hu $a3, $a0, $a1 movgr2fr.w $fa1, $a3 ffint.d.w $fa1, $fa1 - addi.d $a3, $a2, 2 - addi.d $a2, $a2, -2 + addi.d $a3, $a1, 2 + addi.d $a1, $a1, -2 fmadd.d $fs0, $fs0, $fa0, $fa1 - bltu $a1, $a3, .LBB1_3 + bltu $a2, $a3, .LBB1_3 # %bb.4: ld.h $a1, $a0, 0 ld.bu $fp, $a0, 6 diff --git a/results/MultiSource/Benchmarks/MallocBench/espresso/CMakeFiles/espresso.dir/mincov.s b/results/MultiSource/Benchmarks/MallocBench/espresso/CMakeFiles/espresso.dir/mincov.s index e3ec5af1..736ba1c1 100644 --- a/results/MultiSource/Benchmarks/MallocBench/espresso/CMakeFiles/espresso.dir/mincov.s +++ b/results/MultiSource/Benchmarks/MallocBench/espresso/CMakeFiles/espresso.dir/mincov.s @@ -1,10 +1,6 @@ .file "mincov.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function sm_minimum_cover -.LCPI0_0: - .dword 0x4059000000000000 # double 100 .text - .globl sm_minimum_cover + .globl sm_minimum_cover # -- Begin function sm_minimum_cover .p2align 5 .type sm_minimum_cover,@function sm_minimum_cover: # @sm_minimum_cover @@ -138,10 +134,12 @@ sm_minimum_cover: # @sm_minimum_cover pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 .LBB0_12: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) ld.w $a1, $fp, 48 ld.w $a2, $fp, 72 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 fmul.d $fa0, $fs0, $fa0 movfr2gr.d $a4, $fa0 pcalau12i $a0, %pc_hi20(.L.str.2) diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gschar.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gschar.s index 06b22bd3..8270635c 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gschar.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gschar.s @@ -1194,12 +1194,7 @@ show_move: # @show_move .Lfunc_end23: .size show_move, .Lfunc_end23-show_move # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function show_proceed -.LCPI24_0: - .dword 0x3f30000000000000 # double 2.44140625E-4 - .text - .globl show_proceed + .globl show_proceed # -- Begin function show_proceed .p2align 5 .type show_proceed,@function show_proceed: # @show_proceed @@ -1227,13 +1222,13 @@ show_proceed: # @show_proceed addi.w $a0, $zero, -14 st.d $a0, $sp, 24 # 8-byte Folded Spill ori $a0, $zero, 2048 - pcalau12i $a1, %pc_hi20(.LCPI24_0) - fld.d $fs0, $a1, %pc_lo12(.LCPI24_0) vreplgr2vr.d $vr0, $a0 vst $vr0, $sp, 48 # 16-byte Folded Spill lu12i.w $a0, -1 vreplgr2vr.d $vr0, $a0 vst $vr0, $sp, 32 # 16-byte Folded Spill + lu52i.d $a0, $zero, 1011 + movgr2fr.d $fs0, $a0 pcalau12i $a0, %pc_hi20(continue_show_update) addi.d $s1, $a0, %pc_lo12(continue_show_update) move $s2, $zero diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gscolor.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gscolor.s index a0d4ba66..9ae1be2d 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gscolor.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gscolor.s @@ -1,10 +1,6 @@ .file "gscolor.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function gs_setgray -.LCPI0_0: - .word 0x477fff00 # float 65535 .text - .globl gs_setgray + .globl gs_setgray # -- Begin function gs_setgray .p2align 5 .type gs_setgray,@function gs_setgray: # @gs_setgray @@ -31,9 +27,10 @@ gs_setgray: # @gs_setgray ori $a2, $a0, 4095 b .LBB0_7 .LBB0_6: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_0) fcvt.s.d $fa0, $fa0 + lu12i.w $a0, 292863 + ori $a0, $a0, 3840 + movgr2fr.w $fa1, $a0 fmul.s $fa0, $fa0, $fa1 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a2, $fa0 @@ -79,18 +76,11 @@ check_unit: # @check_unit .Lfunc_end1: .size check_unit, .Lfunc_end1-check_unit # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function gs_currentgray -.LCPI2_0: - .word 0x477fff00 # float 65535 - .text - .globl gs_currentgray + .globl gs_currentgray # -- Begin function gs_currentgray .p2align 5 .type gs_currentgray,@function gs_currentgray: # @gs_currentgray # %bb.0: - addi.d $sp, $sp, -16 - st.d $ra, $sp, 8 # 8-byte Folded Spill ld.d $a0, $a0, 304 ld.bu $a1, $a0, 9 beqz $a1, .LBB2_2 @@ -98,16 +88,19 @@ gs_currentgray: # @gs_currentgray ld.hu $a0, $a0, 6 b .LBB2_3 .LBB2_2: + addi.d $sp, $sp, -16 + st.d $ra, $sp, 8 # 8-byte Folded Spill pcaddu18i $ra, %call36(gx_color_luminance) jirl $ra, $ra, 0 -.LBB2_3: - pcalau12i $a1, %pc_hi20(.LCPI2_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI2_0) - movgr2fr.w $fa1, $a0 - ffint.s.w $fa1, $fa1 - fdiv.s $fa0, $fa1, $fa0 ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 +.LBB2_3: + movgr2fr.w $fa0, $a0 + ffint.s.w $fa0, $fa0 + lu12i.w $a0, 292863 + ori $a0, $a0, 3840 + movgr2fr.w $fa1, $a0 + fdiv.s $fa0, $fa0, $fa1 ret .Lfunc_end2: .size gs_currentgray, .Lfunc_end2-gs_currentgray @@ -148,12 +141,7 @@ gs_currentgscolor: # @gs_currentgscolor .Lfunc_end4: .size gs_currentgscolor, .Lfunc_end4-gs_currentgscolor # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function gs_sethsbcolor -.LCPI5_0: - .word 0x477fff00 # float 65535 - .text - .globl gs_sethsbcolor + .globl gs_sethsbcolor # -- Begin function gs_sethsbcolor .p2align 5 .type gs_sethsbcolor,@function gs_sethsbcolor: # @gs_sethsbcolor @@ -191,9 +179,10 @@ gs_sethsbcolor: # @gs_sethsbcolor bcnez $fcc0, .LBB5_10 b .LBB5_12 .LBB5_8: - pcalau12i $a1, %pc_hi20(.LCPI5_0) - fld.s $fa4, $a1, %pc_lo12(.LCPI5_0) fcvt.s.d $fa0, $fa0 + lu12i.w $a1, 292863 + ori $a1, $a1, 3840 + movgr2fr.w $fa4, $a1 fmul.s $fa0, $fa0, $fa4 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a1, $fa0 @@ -207,10 +196,11 @@ gs_sethsbcolor: # @gs_sethsbcolor move $a3, $zero b .LBB5_15 .LBB5_11: - pcalau12i $a2, %pc_hi20(.LCPI5_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI5_0) - fcvt.s.d $fa1, $fa1 - fmul.s $fa0, $fa1, $fa0 + fcvt.s.d $fa0, $fa1 + lu12i.w $a2, 292863 + ori $a2, $a2, 3840 + movgr2fr.w $fa1, $a2 + fmul.s $fa0, $fa0, $fa1 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a2, $fa0 fcmp.clt.d $fcc0, $fa2, $fa3 @@ -223,10 +213,11 @@ gs_sethsbcolor: # @gs_sethsbcolor ori $a3, $a3, 4095 b .LBB5_15 .LBB5_14: - pcalau12i $a3, %pc_hi20(.LCPI5_0) - fld.s $fa0, $a3, %pc_lo12(.LCPI5_0) - fcvt.s.d $fa1, $fa2 - fmul.s $fa0, $fa1, $fa0 + fcvt.s.d $fa0, $fa2 + lu12i.w $a3, 292863 + ori $a3, $a3, 3840 + movgr2fr.w $fa1, $a3 + fmul.s $fa0, $fa0, $fa1 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a3, $fa0 .LBB5_15: # %tri_param.exit @@ -242,12 +233,7 @@ gs_sethsbcolor: # @gs_sethsbcolor .Lfunc_end5: .size gs_sethsbcolor, .Lfunc_end5-gs_sethsbcolor # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function tri_param -.LCPI6_0: - .word 0x477fff00 # float 65535 - .text - .globl tri_param + .globl tri_param # -- Begin function tri_param .p2align 5 .type tri_param,@function tri_param: # @tri_param @@ -279,9 +265,10 @@ tri_param: # @tri_param bcnez $fcc0, .LBB6_8 b .LBB6_10 .LBB6_6: - pcalau12i $a1, %pc_hi20(.LCPI6_0) - fld.s $fa4, $a1, %pc_lo12(.LCPI6_0) fcvt.s.d $fa0, $fa0 + lu12i.w $a1, 292863 + ori $a1, $a1, 3840 + movgr2fr.w $fa4, $a1 fmul.s $fa0, $fa0, $fa4 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a1, $fa0 @@ -295,10 +282,11 @@ tri_param: # @tri_param move $a2, $zero b .LBB6_13 .LBB6_9: - pcalau12i $a3, %pc_hi20(.LCPI6_0) - fld.s $fa0, $a3, %pc_lo12(.LCPI6_0) - fcvt.s.d $fa1, $fa1 - fmul.s $fa0, $fa1, $fa0 + fcvt.s.d $fa0, $fa1 + lu12i.w $a3, 292863 + ori $a3, $a3, 3840 + movgr2fr.w $fa1, $a3 + fmul.s $fa0, $fa0, $fa1 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a3, $fa0 fcmp.clt.d $fcc0, $fa2, $fa3 @@ -311,10 +299,11 @@ tri_param: # @tri_param ori $a2, $a2, 4095 b .LBB6_13 .LBB6_12: - pcalau12i $a2, %pc_hi20(.LCPI6_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI6_0) - fcvt.s.d $fa1, $fa2 - fmul.s $fa0, $fa1, $fa0 + fcvt.s.d $fa0, $fa2 + lu12i.w $a2, 292863 + ori $a2, $a2, 3840 + movgr2fr.w $fa1, $a2 + fmul.s $fa0, $fa0, $fa1 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a2, $fa0 .LBB6_13: # %check_unit.exit14 @@ -326,12 +315,7 @@ tri_param: # @tri_param .Lfunc_end6: .size tri_param, .Lfunc_end6-tri_param # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function gs_currenthsbcolor -.LCPI7_0: - .word 0x477fff00 # float 65535 - .text - .globl gs_currenthsbcolor + .globl gs_currenthsbcolor # -- Begin function gs_currenthsbcolor .p2align 5 .type gs_currenthsbcolor,@function gs_currenthsbcolor: # @gs_currenthsbcolor @@ -346,20 +330,21 @@ gs_currenthsbcolor: # @gs_currenthsbcolor jirl $ra, $ra, 0 ld.hu $a0, $sp, 10 ld.hu $a1, $sp, 12 - pcalau12i $a2, %pc_hi20(.LCPI7_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI7_0) ld.hu $a2, $sp, 14 + movgr2fr.w $fa0, $a0 + ffint.s.w $fa0, $fa0 + lu12i.w $a0, 292863 + ori $a0, $a0, 3840 movgr2fr.w $fa1, $a0 - ffint.s.w $fa1, $fa1 - fdiv.s $fa1, $fa1, $fa0 - fst.s $fa1, $fp, 0 - movgr2fr.w $fa1, $a1 - ffint.s.w $fa1, $fa1 - fdiv.s $fa1, $fa1, $fa0 - fst.s $fa1, $fp, 4 - movgr2fr.w $fa1, $a2 - ffint.s.w $fa1, $fa1 - fdiv.s $fa0, $fa1, $fa0 + fdiv.s $fa0, $fa0, $fa1 + fst.s $fa0, $fp, 0 + movgr2fr.w $fa0, $a1 + ffint.s.w $fa0, $fa0 + fdiv.s $fa0, $fa0, $fa1 + fst.s $fa0, $fp, 4 + movgr2fr.w $fa0, $a2 + ffint.s.w $fa0, $fa0 + fdiv.s $fa0, $fa0, $fa1 fst.s $fa0, $fp, 8 move $a0, $zero ld.d $fp, $sp, 16 # 8-byte Folded Reload @@ -369,40 +354,31 @@ gs_currenthsbcolor: # @gs_currenthsbcolor .Lfunc_end7: .size gs_currenthsbcolor, .Lfunc_end7-gs_currenthsbcolor # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function tri_return -.LCPI8_0: - .word 0x477fff00 # float 65535 - .text - .globl tri_return + .globl tri_return # -- Begin function tri_return .p2align 5 .type tri_return,@function tri_return: # @tri_return # %bb.0: - pcalau12i $a4, %pc_hi20(.LCPI8_0) - fld.s $fa0, $a4, %pc_lo12(.LCPI8_0) + movgr2fr.w $fa0, $a0 + ffint.s.w $fa0, $fa0 + lu12i.w $a0, 292863 + ori $a0, $a0, 3840 movgr2fr.w $fa1, $a0 - ffint.s.w $fa1, $fa1 - fdiv.s $fa1, $fa1, $fa0 - fst.s $fa1, $a3, 0 - movgr2fr.w $fa1, $a1 - ffint.s.w $fa1, $fa1 - fdiv.s $fa1, $fa1, $fa0 - fst.s $fa1, $a3, 4 - movgr2fr.w $fa1, $a2 - ffint.s.w $fa1, $fa1 - fdiv.s $fa0, $fa1, $fa0 + fdiv.s $fa0, $fa0, $fa1 + fst.s $fa0, $a3, 0 + movgr2fr.w $fa0, $a1 + ffint.s.w $fa0, $fa0 + fdiv.s $fa0, $fa0, $fa1 + fst.s $fa0, $a3, 4 + movgr2fr.w $fa0, $a2 + ffint.s.w $fa0, $fa0 + fdiv.s $fa0, $fa0, $fa1 fst.s $fa0, $a3, 8 ret .Lfunc_end8: .size tri_return, .Lfunc_end8-tri_return # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function gs_setrgbcolor -.LCPI9_0: - .word 0x477fff00 # float 65535 - .text - .globl gs_setrgbcolor + .globl gs_setrgbcolor # -- Begin function gs_setrgbcolor .p2align 5 .type gs_setrgbcolor,@function gs_setrgbcolor: # @gs_setrgbcolor @@ -440,9 +416,10 @@ gs_setrgbcolor: # @gs_setrgbcolor bcnez $fcc0, .LBB9_10 b .LBB9_12 .LBB9_8: - pcalau12i $a1, %pc_hi20(.LCPI9_0) - fld.s $fa4, $a1, %pc_lo12(.LCPI9_0) fcvt.s.d $fa0, $fa0 + lu12i.w $a1, 292863 + ori $a1, $a1, 3840 + movgr2fr.w $fa4, $a1 fmul.s $fa0, $fa0, $fa4 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a1, $fa0 @@ -456,10 +433,11 @@ gs_setrgbcolor: # @gs_setrgbcolor move $a2, $zero b .LBB9_15 .LBB9_11: - pcalau12i $a3, %pc_hi20(.LCPI9_0) - fld.s $fa0, $a3, %pc_lo12(.LCPI9_0) - fcvt.s.d $fa1, $fa1 - fmul.s $fa0, $fa1, $fa0 + fcvt.s.d $fa0, $fa1 + lu12i.w $a3, 292863 + ori $a3, $a3, 3840 + movgr2fr.w $fa1, $a3 + fmul.s $fa0, $fa0, $fa1 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a3, $fa0 fcmp.clt.d $fcc0, $fa2, $fa3 @@ -472,10 +450,11 @@ gs_setrgbcolor: # @gs_setrgbcolor ori $a2, $a2, 4095 b .LBB9_15 .LBB9_14: - pcalau12i $a2, %pc_hi20(.LCPI9_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI9_0) - fcvt.s.d $fa1, $fa2 - fmul.s $fa0, $fa1, $fa0 + fcvt.s.d $fa0, $fa2 + lu12i.w $a2, 292863 + ori $a2, $a2, 3840 + movgr2fr.w $fa1, $a2 + fmul.s $fa0, $fa0, $fa1 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a2, $fa0 .LBB9_15: # %tri_param.exit @@ -494,77 +473,64 @@ gs_setrgbcolor: # @gs_setrgbcolor .Lfunc_end9: .size gs_setrgbcolor, .Lfunc_end9-gs_setrgbcolor # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function gs_currentrgbcolor -.LCPI10_0: - .word 0x477fff00 # float 65535 - .text - .globl gs_currentrgbcolor + .globl gs_currentrgbcolor # -- Begin function gs_currentrgbcolor .p2align 5 .type gs_currentrgbcolor,@function gs_currentrgbcolor: # @gs_currentrgbcolor # %bb.0: ld.d $a0, $a0, 304 ld.hu $a2, $a0, 0 - pcalau12i $a3, %pc_hi20(.LCPI10_0) - fld.s $fa0, $a3, %pc_lo12(.LCPI10_0) ld.hu $a3, $a0, 2 ld.hu $a0, $a0, 4 + movgr2fr.w $fa0, $a2 + ffint.s.w $fa0, $fa0 + lu12i.w $a2, 292863 + ori $a2, $a2, 3840 movgr2fr.w $fa1, $a2 - ffint.s.w $fa1, $fa1 - fdiv.s $fa1, $fa1, $fa0 - fst.s $fa1, $a1, 0 - movgr2fr.w $fa1, $a3 - ffint.s.w $fa1, $fa1 - fdiv.s $fa1, $fa1, $fa0 - fst.s $fa1, $a1, 4 - movgr2fr.w $fa1, $a0 - ffint.s.w $fa1, $fa1 - fdiv.s $fa0, $fa1, $fa0 + fdiv.s $fa0, $fa0, $fa1 + fst.s $fa0, $a1, 0 + movgr2fr.w $fa0, $a3 + ffint.s.w $fa0, $fa0 + fdiv.s $fa0, $fa0, $fa1 + fst.s $fa0, $a1, 4 + movgr2fr.w $fa0, $a0 + ffint.s.w $fa0, $fa0 + fdiv.s $fa0, $fa0, $fa1 fst.s $fa0, $a1, 8 move $a0, $zero ret .Lfunc_end10: .size gs_currentrgbcolor, .Lfunc_end10-gs_currentrgbcolor # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function gs_colorrgb -.LCPI11_0: - .word 0x477fff00 # float 65535 - .text - .globl gs_colorrgb + .globl gs_colorrgb # -- Begin function gs_colorrgb .p2align 5 .type gs_colorrgb,@function gs_colorrgb: # @gs_colorrgb # %bb.0: ld.hu $a2, $a0, 0 - pcalau12i $a3, %pc_hi20(.LCPI11_0) - fld.s $fa0, $a3, %pc_lo12(.LCPI11_0) ld.hu $a3, $a0, 2 ld.hu $a0, $a0, 4 + movgr2fr.w $fa0, $a2 + ffint.s.w $fa0, $fa0 + lu12i.w $a2, 292863 + ori $a2, $a2, 3840 movgr2fr.w $fa1, $a2 - ffint.s.w $fa1, $fa1 - fdiv.s $fa1, $fa1, $fa0 - fst.s $fa1, $a1, 0 - movgr2fr.w $fa1, $a3 - ffint.s.w $fa1, $fa1 - fdiv.s $fa1, $fa1, $fa0 - fst.s $fa1, $a1, 4 - movgr2fr.w $fa1, $a0 - ffint.s.w $fa1, $fa1 - fdiv.s $fa0, $fa1, $fa0 + fdiv.s $fa0, $fa0, $fa1 + fst.s $fa0, $a1, 0 + movgr2fr.w $fa0, $a3 + ffint.s.w $fa0, $fa0 + fdiv.s $fa0, $fa0, $fa1 + fst.s $fa0, $a1, 4 + movgr2fr.w $fa0, $a0 + ffint.s.w $fa0, $fa0 + fdiv.s $fa0, $fa0, $fa1 fst.s $fa0, $a1, 8 move $a0, $zero ret .Lfunc_end11: .size gs_colorrgb, .Lfunc_end11-gs_colorrgb # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gs_setscreen -.LCPI12_0: - .dword 0x40dfffc000000000 # double 32767 - .text - .globl gs_setscreen + .globl gs_setscreen # -- Begin function gs_setscreen .p2align 5 .type gs_setscreen,@function gs_setscreen: # @gs_setscreen @@ -597,8 +563,10 @@ gs_setscreen: # @gs_setscreen b .LBB12_10 .LBB12_4: # %.lr.ph addi.w $s1, $zero, -15 - pcalau12i $a0, %pc_hi20(.LCPI12_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI12_0) + ori $a0, $zero, 0 + lu32i.d $a0, -64 + lu52i.d $a0, $a0, 1037 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 7 ori $s2, $a0, 4095 b .LBB12_6 @@ -656,12 +624,7 @@ gs_setscreen: # @gs_setscreen .Lfunc_end12: .size gs_setscreen, .Lfunc_end12-gs_setscreen # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gs_screen_init -.LCPI13_0: - .dword 0x4052000000000000 # double 72 - .text - .globl gs_screen_init + .globl gs_screen_init # -- Begin function gs_screen_init .p2align 5 .type gs_screen_init,@function gs_screen_init: # @gs_screen_init @@ -684,8 +647,10 @@ gs_screen_init: # @gs_screen_init move $fp, $a1 move $s1, $a0 fmov.d $fs0, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI13_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI13_0) + ori $a0, $zero, 0 + lu32i.d $a0, 131072 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa1, $a0 fmov.d $fs1, $fa0 fdiv.d $fa0, $fa1, $fa0 fcvt.s.d $fs2, $fa0 @@ -946,12 +911,7 @@ gs_screen_currentpoint: # @gs_screen_currentpoint .Lfunc_end14: .size gs_screen_currentpoint, .Lfunc_end14-gs_screen_currentpoint # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gs_screen_next -.LCPI15_0: - .dword 0x40dfffc000000000 # double 32767 - .text - .globl gs_screen_next + .globl gs_screen_next # -- Begin function gs_screen_next .p2align 5 .type gs_screen_next,@function gs_screen_next: # @gs_screen_next @@ -964,9 +924,11 @@ gs_screen_next: # @gs_screen_next addi.w $a0, $zero, -15 ret .LBB15_2: - pcalau12i $a1, %pc_hi20(.LCPI15_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI15_0) ld.d $a2, $a0, 8 + ori $a1, $zero, 0 + lu32i.d $a1, -64 + lu52i.d $a1, $a1, 1037 + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa0, $fa1 ftintrz.l.d $fa0, $fa0 movfr2gr.d $a3, $fa0 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gscoord.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gscoord.s index 050edb2d..b466e07d 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gscoord.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gscoord.s @@ -392,28 +392,23 @@ gs_idtransform: # @gs_idtransform .Lfunc_end11: .size gs_idtransform, .Lfunc_end11-gs_idtransform # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gs_translate_to_fixed -.LCPI12_0: - .dword 0x3f30000000000000 # double 2.44140625E-4 - .text - .globl gs_translate_to_fixed + .globl gs_translate_to_fixed # -- Begin function gs_translate_to_fixed .p2align 5 .type gs_translate_to_fixed,@function gs_translate_to_fixed: # @gs_translate_to_fixed # %bb.0: - pcalau12i $a3, %pc_hi20(.LCPI12_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI12_0) st.d $a1, $a0, 120 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + lu52i.d $a1, $zero, 1011 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa1, $a0, 88 + fmul.d $fa0, $fa0, $fa1 + fcvt.s.d $fa0, $fa0 + fst.s $fa0, $a0, 88 st.d $a2, $a0, 128 - movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 fst.s $fa0, $a0, 104 st.w $zero, $a0, 232 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsdevice.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsdevice.s index 07ecb2cd..9b9cc966 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsdevice.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsdevice.s @@ -10,28 +10,22 @@ gx_default_open_device: # @gx_default_open_device .Lfunc_end0: .size gx_default_open_device, .Lfunc_end0-gx_default_open_device # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function gx_default_get_initial_matrix -.LCPI1_0: - .word 0x42900000 # float 72 -.LCPI1_1: - .word 0xc2900000 # float -72 - .text - .globl gx_default_get_initial_matrix + .globl gx_default_get_initial_matrix # -- Begin function gx_default_get_initial_matrix .p2align 5 .type gx_default_get_initial_matrix,@function gx_default_get_initial_matrix: # @gx_default_get_initial_matrix # %bb.0: fld.s $fa0, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI1_0) - fld.s $fa1, $a2, %pc_lo12(.LCPI1_0) + lu12i.w $a2, 272640 + movgr2fr.w $fa1, $a2 fdiv.s $fa0, $fa0, $fa1 fst.s $fa0, $a1, 0 - fld.s $fa0, $a0, 36 - pcalau12i $a2, %pc_hi20(.LCPI1_1) - fld.s $fa1, $a2, %pc_lo12(.LCPI1_1) st.w $zero, $a1, 16 st.w $zero, $a1, 32 + fld.s $fa0, $a0, 36 + lu12i.w $a2, -251648 + lu32i.d $a2, 0 + movgr2fr.w $fa1, $a2 ld.w $a0, $a0, 28 fdiv.s $fa0, $fa0, $fa1 fst.s $fa0, $a1, 48 @@ -388,18 +382,7 @@ gs_makedevice: # @gs_makedevice .Lfunc_end21: .size gs_makedevice, .Lfunc_end21-gs_makedevice # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gs_makeimagedevice -.LCPI22_0: - .dword 0xbf50624dd2f1a9fc # double -0.001 -.LCPI22_1: - .dword 0x3ff004189374bc6a # double 1.0009999999999999 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI22_2: - .word 0x437f0000 # float 255 - .text - .globl gs_makeimagedevice + .globl gs_makeimagedevice # -- Begin function gs_makeimagedevice .p2align 5 .type gs_makeimagedevice,@function gs_makeimagedevice: # @gs_makeimagedevice @@ -469,13 +452,19 @@ gs_makeimagedevice: # @gs_makeimagedevice addi.d $t1, $sp, 40 lu12i.w $t2, -349526 ori $t2, $t2, 2731 - pcalau12i $t3, %pc_hi20(.LCPI22_0) - fld.d $fa0, $t3, %pc_lo12(.LCPI22_0) - pcalau12i $t3, %pc_hi20(.LCPI22_1) - fld.d $fa1, $t3, %pc_lo12(.LCPI22_1) - pcalau12i $t3, %pc_hi20(.LCPI22_2) - fld.s $fa2, $t3, %pc_lo12(.LCPI22_2) lu32i.d $t2, 0 + lu12i.w $t3, -184550 + ori $t3, $t3, 2556 + lu32i.d $t3, 25165 + lu52i.d $t3, $t3, -1035 + movgr2fr.d $fa0, $t3 + lu12i.w $t3, -444597 + ori $t3, $t3, 3178 + lu32i.d $t3, 1048 + lu52i.d $t3, $t3, 1023 + movgr2fr.d $fa1, $t3 + lu12i.w $t3, 276464 + movgr2fr.w $fa2, $t3 vldi $vr3, -928 ori $t3, $zero, 255 move $t4, $s3 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsimage.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsimage.s index 8b1d2151..2bed144a 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsimage.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsimage.s @@ -62,12 +62,7 @@ gs_image_init: # @gs_image_init .Lfunc_end0: .size gs_image_init, .Lfunc_end0-gs_image_init # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function image_init -.LCPI1_0: - .word 0x45800000 # float 4096 - .text - .globl image_init + .globl image_init # -- Begin function image_init .p2align 5 .type image_init,@function image_init: # @image_init @@ -129,15 +124,15 @@ image_init: # @image_init st.w $s6, $s7, 0 move $t1, $s5 st.w $s5, $s7, 4 - fld.s $fa0, $sp, 16 - pcalau12i $a2, %pc_hi20(.LCPI1_0) - fld.s $fa2, $a2, %pc_lo12(.LCPI1_0) move $a2, $s4 st.w $s4, $s7, 8 move $a5, $s3 st.w $s3, $s7, 12 + fld.s $fa0, $sp, 16 move $a3, $s2 st.w $s2, $s7, 16 + lu12i.w $t0, 284672 + movgr2fr.w $fa2, $t0 fmul.s $fa0, $fa0, $fa2 fld.s $fa1, $sp, 64 ftintrz.l.s $fa0, $fa0 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsline.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsline.s index defbe166..04124f41 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsline.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsline.s @@ -80,18 +80,7 @@ gs_currentlinejoin: # @gs_currentlinejoin .Lfunc_end5: .size gs_currentlinejoin, .Lfunc_end5-gs_currentlinejoin # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function gs_setmiterlimit -.LCPI6_0: - .word 0x49742400 # float 1.0E+6 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI6_1: - .dword 0x400000346dc5d639 # double 2.0001000000000002 -.LCPI6_2: - .dword 0x3fffff972474538f # double 1.9999 - .text - .globl gs_setmiterlimit + .globl gs_setmiterlimit # -- Begin function gs_setmiterlimit .p2align 5 .type gs_setmiterlimit,@function gs_setmiterlimit: # @gs_setmiterlimit @@ -107,22 +96,29 @@ gs_setmiterlimit: # @gs_setmiterlimit st.d $ra, $sp, 24 # 8-byte Folded Spill st.d $fp, $sp, 16 # 8-byte Folded Spill fst.d $fs0, $sp, 8 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI6_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI6_1) + fcvt.s.d $fa1, $fa0 ld.d $fp, $a0, 280 - fcvt.s.d $fa2, $fa0 fmul.d $fs0, $fa0, $fa0 - fcmp.cule.d $fcc0, $fa1, $fs0 - fst.s $fa2, $fp, 12 + lu12i.w $a0, 449629 + ori $a0, $a0, 1593 + lu32i.d $a0, 52 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa0, $a0 + fcmp.cule.d $fcc0, $fa0, $fs0 + fst.s $fa1, $fp, 12 bcnez $fcc0, .LBB6_5 # %bb.3: - pcalau12i $a0, %pc_hi20(.LCPI6_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI6_2) + lu12i.w $a0, 149317 + ori $a0, $a0, 911 + lu32i.d $a0, -105 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa0, $a0 fcmp.clt.d $fcc0, $fa0, $fs0 bceqz $fcc0, .LBB6_5 # %bb.4: - pcalau12i $a0, %pc_hi20(.LCPI6_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI6_0) + lu12i.w $a0, 300866 + ori $a0, $a0, 1024 + movgr2fr.w $fa0, $a0 b .LBB6_7 .LBB6_5: vldi $vr0, -784 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsmatrix.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsmatrix.s index dd802418..390aedbc 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsmatrix.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gsmatrix.s @@ -79,12 +79,7 @@ gs_make_scaling: # @gs_make_scaling .Lfunc_end2: .size gs_make_scaling, .Lfunc_end2-gs_make_scaling # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gs_make_rotation -.LCPI3_0: - .dword 0x3f91df46a2529d39 # double 0.017453292519943295 - .text - .globl gs_make_rotation + .globl gs_make_rotation # -- Begin function gs_make_rotation .p2align 5 .type gs_make_rotation,@function gs_make_rotation: # @gs_make_rotation @@ -93,9 +88,12 @@ gs_make_rotation: # @gs_make_rotation st.d $ra, $sp, 24 # 8-byte Folded Spill st.d $fp, $sp, 16 # 8-byte Folded Spill fst.d $fs0, $sp, 8 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI3_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI3_0) move $fp, $a0 + lu12i.w $a0, -383703 + ori $a0, $a0, 3385 + lu32i.d $a0, 122694 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 fcvt.s.d $fs0, $fa0 pcalau12i $a0, %pc_hi20(gs_identity_matrix) @@ -294,14 +292,7 @@ gs_matrix_invert: # @gs_matrix_invert .Lfunc_end5: .size gs_matrix_invert, .Lfunc_end5-gs_matrix_invert # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gs_matrix_rotate -.LCPI6_0: - .dword 0x4076800000000000 # double 360 -.LCPI6_1: - .dword 0x3f91df46a2529d39 # double 0.017453292519943295 - .text - .globl gs_matrix_rotate + .globl gs_matrix_rotate # -- Begin function gs_matrix_rotate .p2align 5 .type gs_matrix_rotate,@function gs_matrix_rotate: # @gs_matrix_rotate @@ -312,10 +303,12 @@ gs_matrix_rotate: # @gs_matrix_rotate st.d $s0, $sp, 24 # 8-byte Folded Spill fst.d $fs0, $sp, 16 # 8-byte Folded Spill fst.d $fs1, $sp, 8 # 8-byte Folded Spill - pcalau12i $a2, %pc_hi20(.LCPI6_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI6_0) - fabs.d $fa2, $fa0 - fcmp.cult.d $fcc0, $fa1, $fa2 + fabs.d $fa1, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, 425984 + lu52i.d $a2, $a2, 1031 + movgr2fr.d $fa2, $a2 + fcmp.cult.d $fcc0, $fa2, $fa1 move $fp, $a1 move $s0, $a0 bcnez $fcc0, .LBB6_6 @@ -357,8 +350,11 @@ gs_matrix_rotate: # @gs_matrix_rotate ffint.s.w $fa0, $fa0 b .LBB6_7 .LBB6_6: - pcalau12i $a0, %pc_hi20(.LCPI6_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI6_1) + lu12i.w $a0, -383703 + ori $a0, $a0, 3385 + lu32i.d $a0, 122694 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 fcvt.d.s $fs0, $fa0 @@ -651,12 +647,7 @@ gs_distance_transform_inverse: # @gs_distance_transform_inverse .Lfunc_end10: .size gs_distance_transform_inverse, .Lfunc_end10-gs_distance_transform_inverse # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gs_bbox_transform_inverse -.LCPI11_0: - .dword 0xb690000000000000 # double -7.0064923216240854E-46 - .text - .globl gs_bbox_transform_inverse + .globl gs_bbox_transform_inverse # -- Begin function gs_bbox_transform_inverse .p2align 5 .type gs_bbox_transform_inverse,@function gs_bbox_transform_inverse: # @gs_bbox_transform_inverse @@ -783,8 +774,8 @@ gs_bbox_transform_inverse: # @gs_bbox_transform_inverse fdiv.d $fa3, $fa3, $ft0 .LBB11_5: move $a0, $zero - pcalau12i $a1, %pc_hi20(.LCPI11_0) - fld.d $fa6, $a1, %pc_lo12(.LCPI11_0) + lu52i.d $a1, $zero, -1175 + movgr2fr.d $fa6, $a1 fcvt.s.d $fa7, $fa3 fadd.s $ft0, $fa5, $fa0 movgr2fr.w $ft1, $zero @@ -813,21 +804,16 @@ gs_bbox_transform_inverse: # @gs_bbox_transform_inverse .Lfunc_end11: .size gs_bbox_transform_inverse, .Lfunc_end11-gs_bbox_transform_inverse # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gs_point_transform2fixed -.LCPI12_0: - .dword 0x40b0000000000000 # double 4096 - .text - .globl gs_point_transform2fixed + .globl gs_point_transform2fixed # -- Begin function gs_point_transform2fixed .p2align 5 .type gs_point_transform2fixed,@function gs_point_transform2fixed: # @gs_point_transform2fixed # %bb.0: - fld.s $fa3, $a0, 0 - pcalau12i $a2, %pc_hi20(.LCPI12_0) - fld.d $fa2, $a2, %pc_lo12(.LCPI12_0) - fcvt.d.s $fa3, $fa3 - fmul.d $fa3, $fa0, $fa3 + fld.s $fa2, $a0, 0 + fcvt.d.s $fa2, $fa2 + fmul.d $fa3, $fa0, $fa2 + lu52i.d $a2, $zero, 1035 + movgr2fr.d $fa2, $a2 fmul.d $fa3, $fa3, $fa2 ld.d $a2, $a0, 96 ftintrz.l.d $fa3, $fa3 @@ -874,21 +860,16 @@ gs_point_transform2fixed: # @gs_point_transform2fixed .Lfunc_end12: .size gs_point_transform2fixed, .Lfunc_end12-gs_point_transform2fixed # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gs_distance_transform2fixed -.LCPI13_0: - .dword 0x40b0000000000000 # double 4096 - .text - .globl gs_distance_transform2fixed + .globl gs_distance_transform2fixed # -- Begin function gs_distance_transform2fixed .p2align 5 .type gs_distance_transform2fixed,@function gs_distance_transform2fixed: # @gs_distance_transform2fixed # %bb.0: - fld.s $fa3, $a0, 0 - pcalau12i $a2, %pc_hi20(.LCPI13_0) - fld.d $fa2, $a2, %pc_lo12(.LCPI13_0) - fcvt.d.s $fa3, $fa3 - fmul.d $fa3, $fa0, $fa3 + fld.s $fa2, $a0, 0 + fcvt.d.s $fa2, $fa2 + fmul.d $fa3, $fa0, $fa2 + lu52i.d $a2, $zero, 1035 + movgr2fr.d $fa2, $a2 fld.s $fa5, $a0, 48 fmul.d $fa3, $fa3, $fa2 ftintrz.l.d $fa4, $fa3 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gspath.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gspath.s index 44049482..fe270f89 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gspath.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gspath.s @@ -35,12 +35,7 @@ gs_closepath: # @gs_closepath .Lfunc_end1: .size gs_closepath, .Lfunc_end1-gs_closepath # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gs_currentpoint -.LCPI2_0: - .dword 0x3f30000000000000 # double 2.44140625E-4 - .text - .globl gs_currentpoint + .globl gs_currentpoint # -- Begin function gs_currentpoint .p2align 5 .type gs_currentpoint,@function gs_currentpoint: # @gs_currentpoint @@ -58,10 +53,10 @@ gs_currentpoint: # @gs_currentpoint bltz $a0, .LBB2_2 # %bb.1: ld.d $a0, $sp, 8 - pcalau12i $a1, %pc_hi20(.LCPI2_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_0) movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 + lu52i.d $a0, $zero, 1011 + movgr2fr.d $fa1, $a0 ld.d $a0, $sp, 16 fmul.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 @@ -225,42 +220,32 @@ gs_arc: # @gs_arc .Lfunc_end7: .size gs_arc, .Lfunc_end7-gs_arc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function arc_either -.LCPI8_0: - .dword 0x40b0000000000000 # double 4096 -.LCPI8_1: - .dword 0x3f30000000000000 # double 2.44140625E-4 -.LCPI8_2: - .dword 0x3f91df46a2529d39 # double 0.017453292519943295 -.LCPI8_3: - .dword 0x3f81df46a2529d39 # double 0.0087266462599716477 - .text - .globl arc_either + .globl arc_either # -- Begin function arc_either .p2align 5 .type arc_either,@function arc_either: # @arc_either # %bb.0: - addi.d $sp, $sp, -160 - st.d $ra, $sp, 152 # 8-byte Folded Spill - st.d $fp, $sp, 144 # 8-byte Folded Spill - st.d $s0, $sp, 136 # 8-byte Folded Spill - st.d $s1, $sp, 128 # 8-byte Folded Spill - st.d $s2, $sp, 120 # 8-byte Folded Spill - st.d $s3, $sp, 112 # 8-byte Folded Spill - st.d $s4, $sp, 104 # 8-byte Folded Spill - fst.d $fs0, $sp, 96 # 8-byte Folded Spill - fst.d $fs1, $sp, 88 # 8-byte Folded Spill - fst.d $fs2, $sp, 80 # 8-byte Folded Spill - fst.d $fs3, $sp, 72 # 8-byte Folded Spill - fst.d $fs4, $sp, 64 # 8-byte Folded Spill - fst.d $fs5, $sp, 56 # 8-byte Folded Spill - fst.d $fs6, $sp, 48 # 8-byte Folded Spill - fst.d $fs7, $sp, 40 # 8-byte Folded Spill + addi.d $sp, $sp, -176 + st.d $ra, $sp, 168 # 8-byte Folded Spill + st.d $fp, $sp, 160 # 8-byte Folded Spill + st.d $s0, $sp, 152 # 8-byte Folded Spill + st.d $s1, $sp, 144 # 8-byte Folded Spill + st.d $s2, $sp, 136 # 8-byte Folded Spill + st.d $s3, $sp, 128 # 8-byte Folded Spill + st.d $s4, $sp, 120 # 8-byte Folded Spill + st.d $s5, $sp, 112 # 8-byte Folded Spill + fst.d $fs0, $sp, 104 # 8-byte Folded Spill + fst.d $fs1, $sp, 96 # 8-byte Folded Spill + fst.d $fs2, $sp, 88 # 8-byte Folded Spill + fst.d $fs3, $sp, 80 # 8-byte Folded Spill + fst.d $fs4, $sp, 72 # 8-byte Folded Spill + fst.d $fs5, $sp, 64 # 8-byte Folded Spill + fst.d $fs6, $sp, 56 # 8-byte Folded Spill + fst.d $fs7, $sp, 48 # 8-byte Folded Spill fmov.d $fs6, $fa2 movgr2fr.d $fa2, $zero fcmp.clt.d $fcc0, $fs6, $fa2 - fst.d $fa1, $sp, 32 # 8-byte Folded Spill + fst.d $fa1, $sp, 40 # 8-byte Folded Spill bceqz $fcc0, .LBB8_2 # %bb.1: addi.w $a0, $zero, -15 @@ -269,8 +254,8 @@ arc_either: # @arc_either move $s0, $a1 fmov.d $fs2, $fa0 move $fp, $a0 - pcalau12i $a0, %pc_hi20(.LCPI8_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI8_0) + lu52i.d $a0, $zero, 1035 + movgr2fr.d $fa0, $a0 fmul.d $fa1, $fa3, $fa0 ftintrz.l.d $fa1, $fa1 movfr2gr.d $s1, $fa1 @@ -279,7 +264,7 @@ arc_either: # @arc_either movfr2gr.d $a0, $fa0 bne $s1, $a0, .LBB8_4 # %bb.3: - move $s3, $s1 + move $s4, $s1 b .LBB8_7 .LBB8_4: lu12i.w $a1, -512638 @@ -292,7 +277,7 @@ arc_either: # @arc_either add.d $a3, $a1, $a3 lu12i.w $a1, 360 mul.d $a3, $a3, $a1 - sub.d $s3, $s1, $a3 + sub.d $s4, $s1, $a3 mulh.d $a2, $a0, $a2 srli.d $a3, $a2, 63 srai.d $a2, $a2, 18 @@ -301,28 +286,31 @@ arc_either: # @arc_either sub.d $s1, $a0, $a2 beqz $s0, .LBB8_6 # %bb.5: - slt $a0, $s1, $s3 - add.d $a1, $s3, $a1 + slt $a0, $s1, $s4 + add.d $a1, $s4, $a1 masknez $a1, $a1, $a0 - maskeqz $a0, $s3, $a0 - or $s3, $a0, $a1 + maskeqz $a0, $s4, $a0 + or $s4, $a0, $a1 b .LBB8_7 .LBB8_6: - slt $a0, $s3, $s1 + slt $a0, $s4, $s1 add.d $a1, $s1, $a1 masknez $a1, $a1, $a0 maskeqz $a0, $s1, $a0 or $s1, $a0, $a1 .LBB8_7: - movgr2fr.d $fa0, $s3 - pcalau12i $a0, %pc_hi20(.LCPI8_1) - fld.d $fs4, $a0, %pc_lo12(.LCPI8_1) + movgr2fr.d $fa0, $s4 ffint.d.l $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI8_2) - fld.d $fs5, $a0, %pc_lo12(.LCPI8_2) + lu52i.d $a0, $zero, 1011 + movgr2fr.d $fs4, $a0 fmul.d $fa0, $fa0, $fs4 fcvt.s.d $fa0, $fa0 fcvt.d.s $fa0, $fa0 + lu12i.w $a0, -383703 + ori $s2, $a0, 3385 + lu32i.d $s2, 122694 + lu52i.d $a0, $s2, 1017 + movgr2fr.d $fs5, $a0 fmul.d $fa0, $fa0, $fs5 fcvt.s.d $fa0, $fa0 fcvt.d.s $fs3, $fa0 @@ -340,20 +328,20 @@ arc_either: # @arc_either fadd.d $fa1, $fs2, $fa0 fcvt.s.d $fa4, $fa1 fcvt.d.s $fa1, $fs0 - fld.d $fa2, $sp, 32 # 8-byte Folded Reload + fld.d $fa2, $sp, 40 # 8-byte Folded Reload fadd.d $fa2, $fa2, $fa1 fcvt.s.d $fa5, $fa2 - sub.d $s2, $s1, $s3 + sub.d $s3, $s1, $s4 beqz $s0, .LBB8_14 # %bb.8: # %.preheader158 lu12i.w $a0, -91 - ori $s4, $a0, 4095 + ori $s5, $a0, 4095 ori $s0, $zero, 1 - blt $s4, $s2, .LBB8_20 + blt $s5, $s3, .LBB8_20 # %bb.9: # %.lr.ph.preheader fadd.d $fa1, $fs2, $fa1 fcvt.s.d $fs7, $fa1 - fld.d $fa1, $sp, 32 # 8-byte Folded Reload + fld.d $fa1, $sp, 40 # 8-byte Folded Reload fsub.d $fa0, $fa1, $fa0 fcvt.s.d $fs1, $fa0 fcvt.d.s $fa0, $fa4 @@ -370,12 +358,12 @@ arc_either: # @arc_either jirl $ra, $ra, 0 bltz $a0, .LBB8_24 # %bb.10: - fst.d $fs6, $sp, 24 # 8-byte Folded Spill - sub.d $a0, $s1, $s3 + fst.d $fs6, $sp, 32 # 8-byte Folded Spill + sub.d $a0, $s1, $s4 lu12i.w $s0, 90 - add.d $s2, $a0, $s0 + add.d $s3, $a0, $s0 fneg.s $fs6, $fs3 - blt $s4, $s2, .LBB8_19 + blt $s5, $s3, .LBB8_19 .p2align 4, , 16 .LBB8_11: # %.lr.ph # =>This Inner Loop Header: Depth=1 @@ -386,7 +374,7 @@ arc_either: # @arc_either fadd.s $fa4, $fs7, $fs6 fcvt.s.d $fs7, $fa1 fcvt.d.s $fa1, $fs0 - fld.d $fa2, $sp, 32 # 8-byte Folded Reload + fld.d $fa2, $sp, 40 # 8-byte Folded Reload fsub.d $fa2, $fa2, $fa1 fcvt.d.s $fa1, $fs1 fsub.s $fa5, $fs1, $fs0 @@ -401,24 +389,24 @@ arc_either: # @arc_either jirl $ra, $ra, 0 bltz $a0, .LBB8_24 # %bb.12: # in Loop: Header=BB8_11 Depth=1 - add.d $s2, $s2, $s0 + add.d $s3, $s3, $s0 fneg.s $fs6, $fs0 fmov.s $fs0, $fs3 - bge $s4, $s2, .LBB8_11 + bge $s5, $s3, .LBB8_11 # %bb.13: move $s0, $zero fmov.s $fs0, $fs3 - bnez $s2, .LBB8_21 + bnez $s3, .LBB8_21 b .LBB8_23 .LBB8_14: # %.preheader lu12i.w $a0, 90 - ori $s4, $a0, 1 + ori $s5, $a0, 1 ori $s0, $zero, 1 - blt $s2, $s4, .LBB8_20 + blt $s3, $s5, .LBB8_20 # %bb.15: # %.lr.ph193.preheader fsub.d $fa1, $fs2, $fa1 fcvt.s.d $fs7, $fa1 - fld.d $fa1, $sp, 32 # 8-byte Folded Reload + fld.d $fa1, $sp, 40 # 8-byte Folded Reload fadd.d $fa0, $fa1, $fa0 fcvt.s.d $fs1, $fa0 fcvt.d.s $fa0, $fa4 @@ -435,12 +423,12 @@ arc_either: # @arc_either jirl $ra, $ra, 0 bltz $a0, .LBB8_24 # %bb.16: - fst.d $fs6, $sp, 24 # 8-byte Folded Spill - sub.d $a0, $s1, $s3 + fst.d $fs6, $sp, 32 # 8-byte Folded Spill + sub.d $a0, $s1, $s4 lu12i.w $s0, -90 - add.d $s2, $a0, $s0 + add.d $s3, $a0, $s0 fneg.s $fs0, $fs0 - blt $s2, $s4, .LBB8_22 + blt $s3, $s5, .LBB8_22 .p2align 4, , 16 .LBB8_17: # %.lr.ph193 # =>This Inner Loop Header: Depth=1 @@ -451,7 +439,7 @@ arc_either: # @arc_either fsub.s $fa4, $fs7, $fs3 fcvt.s.d $fs7, $fa1 fcvt.d.s $fa1, $fs0 - fld.d $fa2, $sp, 32 # 8-byte Folded Reload + fld.d $fa2, $sp, 40 # 8-byte Folded Reload fadd.d $fa2, $fa2, $fa1 fcvt.d.s $fa1, $fs1 fadd.s $fa5, $fs1, $fs0 @@ -466,29 +454,29 @@ arc_either: # @arc_either jirl $ra, $ra, 0 bltz $a0, .LBB8_24 # %bb.18: # in Loop: Header=BB8_17 Depth=1 - add.d $s2, $s2, $s0 + add.d $s3, $s3, $s0 fneg.s $fs0, $fs3 fmov.s $fs3, $fs6 - bge $s2, $s4, .LBB8_17 + bge $s3, $s5, .LBB8_17 .LBB8_19: move $s0, $zero - bnez $s2, .LBB8_21 + bnez $s3, .LBB8_21 b .LBB8_23 .LBB8_20: - fst.d $fs6, $sp, 24 # 8-byte Folded Spill + fst.d $fs6, $sp, 32 # 8-byte Folded Spill fmov.s $fs7, $fa4 fmov.s $fs1, $fa5 fmov.s $fs6, $fs0 fmov.s $fs0, $fs3 - beqz $s2, .LBB8_23 + beqz $s3, .LBB8_23 .LBB8_21: - movgr2fr.d $fa0, $s2 + movgr2fr.d $fa0, $s3 ffint.d.l $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI8_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI8_3) fmul.d $fa0, $fa0, $fs4 fcvt.s.d $fa0, $fa0 fcvt.d.s $fa0, $fa0 + lu52i.d $a0, $s2, 1016 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 pcaddu18i $ra, %call36(tan) jirl $ra, $ra, 0 @@ -501,19 +489,19 @@ arc_either: # @arc_either fmul.d $fa0, $fa0, $fs5 fcvt.s.d $fa0, $fa0 fcvt.d.s $fa1, $fs7 - fst.d $fa1, $sp, 16 # 8-byte Folded Spill + fst.d $fa1, $sp, 24 # 8-byte Folded Spill fcvt.d.s $fa1, $fs1 - fst.d $fa1, $sp, 8 # 8-byte Folded Spill + fst.d $fa1, $sp, 16 # 8-byte Folded Spill fcvt.d.s $fs5, $fa0 fmov.d $fa0, $fs5 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fs4, $sp, 24 # 8-byte Folded Reload + fld.d $fs4, $sp, 32 # 8-byte Folded Reload fmadd.d $fs2, $fs4, $fa0, $fs2 fmov.d $fa0, $fs5 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fa1, $sp, 32 # 8-byte Folded Reload + fld.d $fa1, $sp, 40 # 8-byte Folded Reload fmadd.d $fa3, $fs4, $fa0, $fa1 fneg.s $fa0, $fs3 fmadd.s $fa0, $fa0, $fs6, $fs7 @@ -521,51 +509,53 @@ arc_either: # @arc_either fmadd.s $fa0, $fs3, $fs0, $fs1 fcvt.d.s $fa5, $fa0 move $a0, $fp - fld.d $fa0, $sp, 16 # 8-byte Folded Reload - fld.d $fa1, $sp, 8 # 8-byte Folded Reload + fld.d $fa0, $sp, 24 # 8-byte Folded Reload + fld.d $fa1, $sp, 16 # 8-byte Folded Reload fmov.d $fa2, $fs2 move $a1, $s0 - fld.d $fs7, $sp, 40 # 8-byte Folded Reload - fld.d $fs6, $sp, 48 # 8-byte Folded Reload - fld.d $fs5, $sp, 56 # 8-byte Folded Reload - fld.d $fs4, $sp, 64 # 8-byte Folded Reload - fld.d $fs3, $sp, 72 # 8-byte Folded Reload - fld.d $fs2, $sp, 80 # 8-byte Folded Reload - fld.d $fs1, $sp, 88 # 8-byte Folded Reload - fld.d $fs0, $sp, 96 # 8-byte Folded Reload - ld.d $s4, $sp, 104 # 8-byte Folded Reload - ld.d $s3, $sp, 112 # 8-byte Folded Reload - ld.d $s2, $sp, 120 # 8-byte Folded Reload - ld.d $s1, $sp, 128 # 8-byte Folded Reload - ld.d $s0, $sp, 136 # 8-byte Folded Reload - ld.d $fp, $sp, 144 # 8-byte Folded Reload - ld.d $ra, $sp, 152 # 8-byte Folded Reload - addi.d $sp, $sp, 160 + fld.d $fs7, $sp, 48 # 8-byte Folded Reload + fld.d $fs6, $sp, 56 # 8-byte Folded Reload + fld.d $fs5, $sp, 64 # 8-byte Folded Reload + fld.d $fs4, $sp, 72 # 8-byte Folded Reload + fld.d $fs3, $sp, 80 # 8-byte Folded Reload + fld.d $fs2, $sp, 88 # 8-byte Folded Reload + fld.d $fs1, $sp, 96 # 8-byte Folded Reload + fld.d $fs0, $sp, 104 # 8-byte Folded Reload + ld.d $s5, $sp, 112 # 8-byte Folded Reload + ld.d $s4, $sp, 120 # 8-byte Folded Reload + ld.d $s3, $sp, 128 # 8-byte Folded Reload + ld.d $s2, $sp, 136 # 8-byte Folded Reload + ld.d $s1, $sp, 144 # 8-byte Folded Reload + ld.d $s0, $sp, 152 # 8-byte Folded Reload + ld.d $fp, $sp, 160 # 8-byte Folded Reload + ld.d $ra, $sp, 168 # 8-byte Folded Reload + addi.d $sp, $sp, 176 pcaddu18i $t8, %call36(arc_add) jr $t8 .LBB8_22: move $s0, $zero fmov.s $fs6, $fs3 - bnez $s2, .LBB8_21 + bnez $s3, .LBB8_21 .LBB8_23: move $a0, $zero .LBB8_24: # %.thread - fld.d $fs7, $sp, 40 # 8-byte Folded Reload - fld.d $fs6, $sp, 48 # 8-byte Folded Reload - fld.d $fs5, $sp, 56 # 8-byte Folded Reload - fld.d $fs4, $sp, 64 # 8-byte Folded Reload - fld.d $fs3, $sp, 72 # 8-byte Folded Reload - fld.d $fs2, $sp, 80 # 8-byte Folded Reload - fld.d $fs1, $sp, 88 # 8-byte Folded Reload - fld.d $fs0, $sp, 96 # 8-byte Folded Reload - ld.d $s4, $sp, 104 # 8-byte Folded Reload - ld.d $s3, $sp, 112 # 8-byte Folded Reload - ld.d $s2, $sp, 120 # 8-byte Folded Reload - ld.d $s1, $sp, 128 # 8-byte Folded Reload - ld.d $s0, $sp, 136 # 8-byte Folded Reload - ld.d $fp, $sp, 144 # 8-byte Folded Reload - ld.d $ra, $sp, 152 # 8-byte Folded Reload - addi.d $sp, $sp, 160 + fld.d $fs7, $sp, 48 # 8-byte Folded Reload + fld.d $fs6, $sp, 56 # 8-byte Folded Reload + fld.d $fs5, $sp, 64 # 8-byte Folded Reload + fld.d $fs4, $sp, 72 # 8-byte Folded Reload + fld.d $fs3, $sp, 80 # 8-byte Folded Reload + fld.d $fs2, $sp, 88 # 8-byte Folded Reload + fld.d $fs1, $sp, 96 # 8-byte Folded Reload + fld.d $fs0, $sp, 104 # 8-byte Folded Reload + ld.d $s5, $sp, 112 # 8-byte Folded Reload + ld.d $s4, $sp, 120 # 8-byte Folded Reload + ld.d $s3, $sp, 128 # 8-byte Folded Reload + ld.d $s2, $sp, 136 # 8-byte Folded Reload + ld.d $s1, $sp, 144 # 8-byte Folded Reload + ld.d $s0, $sp, 152 # 8-byte Folded Reload + ld.d $fp, $sp, 160 # 8-byte Folded Reload + ld.d $ra, $sp, 168 # 8-byte Folded Reload + addi.d $sp, $sp, 176 ret .Lfunc_end8: .size arc_either, .Lfunc_end8-arc_either @@ -668,14 +658,7 @@ arc_add: # @arc_add .Lfunc_end10: .size arc_add, .Lfunc_end10-arc_add # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gs_arcto -.LCPI11_0: - .dword 0x3f30000000000000 # double 2.44140625E-4 -.LCPI11_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl gs_arcto + .globl gs_arcto # -- Begin function gs_arcto .p2align 5 .type gs_arcto,@function gs_arcto: # @gs_arcto @@ -713,10 +696,10 @@ gs_arcto: # @gs_arcto bltz $a0, .LBB11_12 # %bb.3: # %gs_currentpoint.exit ld.d $a0, $sp, 40 - pcalau12i $a1, %pc_hi20(.LCPI11_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI11_0) movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 + lu52i.d $a0, $zero, 1011 + movgr2fr.d $fa1, $a0 ld.d $a0, $sp, 48 fmul.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 @@ -759,9 +742,12 @@ gs_arcto: # @gs_arcto fcvt.d.s $fs7, $fa2 bceqz $fcc0, .LBB11_13 .LBB11_5: # %.split - pcalau12i $a0, %pc_hi20(.LCPI11_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI11_1) fabs.d $fa1, $fs7 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa2, $a0 fcmp.clt.d $fcc0, $fa1, $fa2 fld.d $fa1, $sp, 24 # 8-byte Folded Reload bcnez $fcc0, .LBB11_8 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gspath2.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gspath2.s index b6fffadc..7fa4a6b1 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gspath2.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gspath2.s @@ -48,12 +48,7 @@ gs_reversepath: # @gs_reversepath .Lfunc_end1: .size gs_reversepath, .Lfunc_end1-gs_reversepath # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gs_pathbbox -.LCPI2_0: - .dword 0x3f30000000000000 # double 2.44140625E-4 - .text - .globl gs_pathbbox + .globl gs_pathbbox # -- Begin function gs_pathbbox .p2align 5 .type gs_pathbbox,@function gs_pathbbox: # @gs_pathbbox @@ -71,29 +66,29 @@ gs_pathbbox: # @gs_pathbbox bltz $a0, .LBB2_2 # %bb.1: ld.d $a0, $sp, 24 - pcalau12i $a1, %pc_hi20(.LCPI2_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI2_0) + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + lu52i.d $a0, $zero, 1011 movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 ld.d $a0, $sp, 32 - fmul.d $fa1, $fa1, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa1, $sp, 8 - movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 + fmul.d $fa0, $fa0, $fa1 + fcvt.s.d $fa0, $fa0 + fst.s $fa0, $sp, 8 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 ld.d $a0, $sp, 40 - fmul.d $fa1, $fa1, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa1, $sp, 12 - movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 + fmul.d $fa0, $fa0, $fa1 + fcvt.s.d $fa0, $fa0 + fst.s $fa0, $sp, 12 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 ld.d $a0, $sp, 48 - fmul.d $fa1, $fa1, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa1, $sp, 16 - movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + fmul.d $fa0, $fa0, $fa1 + fcvt.s.d $fa0, $fa0 + fst.s $fa0, $sp, 16 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 fst.s $fa0, $sp, 20 addi.d $a1, $s0, 24 @@ -123,12 +118,7 @@ gs_path_enum_init: # @gs_path_enum_init .Lfunc_end3: .size gs_path_enum_init, .Lfunc_end3-gs_path_enum_init # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gs_path_enum_next -.LCPI4_0: - .dword 0x3f30000000000000 # double 2.44140625E-4 - .text - .globl gs_path_enum_next + .globl gs_path_enum_next # -- Begin function gs_path_enum_next .p2align 5 .type gs_path_enum_next,@function gs_path_enum_next: # @gs_path_enum_next @@ -152,10 +142,10 @@ gs_path_enum_next: # @gs_path_enum_next # %bb.2: move $fp, $a1 ld.d $a0, $s1, 24 - pcalau12i $a1, %pc_hi20(.LCPI4_0) - fld.d $fs0, $a1, %pc_lo12(.LCPI4_0) movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 + lu52i.d $a0, $zero, 1011 + movgr2fr.d $fs0, $a0 ld.d $a0, $s1, 32 fmul.d $fa0, $fa0, $fs0 fcvt.s.d $fa0, $fa0 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gstype1.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gstype1.s index 1e19fd71..3994e119 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gstype1.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gstype1.s @@ -282,16 +282,7 @@ gs_type1_init: # @gs_type1_init .Lfunc_end3: .size gs_type1_init, .Lfunc_end3-gs_type1_init # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gs_type1_interpret -.LCPI4_0: - .dword 0x3f30000000000000 # double 2.44140625E-4 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI4_1: - .word 0x45800000 # float 4096 - .text - .globl gs_type1_interpret + .globl gs_type1_interpret # -- Begin function gs_type1_interpret .p2align 5 .type gs_type1_interpret,@function gs_type1_interpret: # @gs_type1_interpret @@ -308,6 +299,7 @@ gs_type1_interpret: # @gs_type1_interpret st.d $s6, $sp, 392 # 8-byte Folded Spill st.d $s7, $sp, 384 # 8-byte Folded Spill st.d $s8, $sp, 376 # 8-byte Folded Spill + fst.d $fs0, $sp, 368 # 8-byte Folded Spill ld.d $a2, $a0, 8 ld.d $s6, $a2, 256 ld.d $a3, $a0, 16 @@ -374,6 +366,8 @@ gs_type1_interpret: # @gs_type1_interpret lu12i.w $a0, 1 ori $a0, $a0, 234 st.d $a0, $sp, 24 # 8-byte Folded Spill + lu12i.w $a0, 284672 + movgr2fr.w $fs0, $a0 ld.d $s3, $sp, 64 # 8-byte Folded Reload .LBB4_6: # =>This Loop Header: Depth=1 # Child Loop BB4_9 Depth 2 @@ -1354,16 +1348,14 @@ gs_type1_interpret: # @gs_type1_interpret b .LBB4_14 .LBB4_118: # in Loop: Header=BB4_14 Depth=2 ld.d $a0, $s6, -8 + ld.d $a1, $s6, 0 addi.d $s8, $s6, -8 movgr2fr.d $fa0, $a0 - ld.d $a0, $s6, 0 ffint.s.l $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI4_1) - fld.s $fa1, $a1, %pc_lo12(.LCPI4_1) - movgr2fr.d $fa2, $a0 - ffint.s.l $fa2, $fa2 - fdiv.s $fa0, $fa0, $fa2 - fmul.s $fa0, $fa0, $fa1 + movgr2fr.d $fa1, $a1 + ffint.s.l $fa1, $fa1 + fdiv.s $fa0, $fa0, $fa1 + fmul.s $fa0, $fa0, $fs0 ftintrz.l.s $fa0, $fa0 fst.d $fa0, $s6, -8 b .LBB4_14 @@ -1438,6 +1430,7 @@ gs_type1_interpret: # @gs_type1_interpret .LBB4_129: # %.loopexit ld.d $a0, $sp, 96 # 8-byte Folded Reload addi.w $a0, $a0, 0 + fld.d $fs0, $sp, 368 # 8-byte Folded Reload ld.d $s8, $sp, 376 # 8-byte Folded Reload ld.d $s7, $sp, 384 # 8-byte Folded Reload ld.d $s6, $sp, 392 # 8-byte Folded Reload @@ -1463,11 +1456,11 @@ gs_type1_interpret: # @gs_type1_interpret beqz $a0, .LBB4_133 # %bb.131: ld.d $a1, $s0, 480 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_0) ld.d $a0, $s0, 0 movgr2fr.d $fa0, $a1 ffint.d.l $fa0, $fa0 + lu52i.d $a1, $zero, 1011 + movgr2fr.d $fa1, $a1 ld.d $a1, $s0, 488 fmul.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 @@ -1609,11 +1602,11 @@ gs_type1_interpret: # @gs_type1_interpret fld.s $fa5, $sp, 172 .LBB4_145: ld.d $a1, $s0, 480 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_0) ld.d $a0, $s0, 0 movgr2fr.d $fa0, $a1 ffint.d.l $fa0, $fa0 + lu52i.d $a1, $zero, 1011 + movgr2fr.d $fa1, $a1 ld.d $a1, $s0, 488 fmul.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxcache.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxcache.s index 930bfd03..60b9afa2 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxcache.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxcache.s @@ -327,12 +327,7 @@ gx_lookup_cached_char: # @gx_lookup_cached_char .Lfunc_end5: .size gx_lookup_cached_char, .Lfunc_end5-gx_lookup_cached_char # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gx_copy_cached_char -.LCPI6_0: - .dword 0x3f30000000000000 # double 2.44140625E-4 - .text - .globl gx_copy_cached_char + .globl gx_copy_cached_char # -- Begin function gx_copy_cached_char .p2align 5 .type gx_copy_cached_char,@function gx_copy_cached_char: # @gx_copy_cached_char @@ -388,18 +383,18 @@ gx_copy_cached_char: # @gx_copy_cached_char ori $a2, $zero, 96 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI6_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI6_0) - movgr2fr.d $fa1, $s2 - fld.s $fa2, $sp, 88 - ffint.d.l $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - fcvt.s.d $fa1, $fa1 - fsub.s $fa1, $fa2, $fa1 - fst.s $fa1, $sp, 88 - movgr2fr.d $fa1, $s3 - ffint.d.l $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.d $fa0, $s2 + ffint.d.l $fa0, $fa0 + lu52i.d $a0, $zero, 1011 + fld.s $fa1, $sp, 88 + movgr2fr.d $fa2, $a0 + fmul.d $fa0, $fa0, $fa2 + fcvt.s.d $fa0, $fa0 + fsub.s $fa0, $fa1, $fa0 + fst.s $fa0, $sp, 88 + movgr2fr.d $fa0, $s3 + ffint.d.l $fa0, $fa0 + fmul.d $fa0, $fa0, $fa2 fld.s $fa1, $sp, 104 fcvt.s.d $fa0, $fa0 ld.hu $a0, $s0, 24 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxcolor.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxcolor.s index bab767ed..8eb05fa1 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxcolor.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxcolor.s @@ -144,24 +144,18 @@ gx_color_to_hsb: # @gx_color_to_hsb .Lfunc_end2: .size gx_color_to_hsb, .Lfunc_end2-gx_color_to_hsb # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function gx_color_from_hsb -.LCPI3_0: - .word 0x477fff00 # float 65535 -.LCPI3_1: - .word 0x462aac00 # float 10923 - .text - .globl gx_color_from_hsb + .globl gx_color_from_hsb # -- Begin function gx_color_from_hsb .p2align 5 .type gx_color_from_hsb,@function gx_color_from_hsb: # @gx_color_from_hsb # %bb.0: beqz $a2, .LBB3_4 # %bb.1: - pcalau12i $a4, %pc_hi20(.LCPI3_0) - fld.s $fa0, $a4, %pc_lo12(.LCPI3_0) - movgr2fr.w $fa1, $a3 - ffint.s.w $fa1, $fa1 + movgr2fr.w $fa0, $a3 + ffint.s.w $fa1, $fa0 + lu12i.w $a3, 292863 + ori $a3, $a3, 3840 + movgr2fr.w $fa0, $a3 fdiv.s $fa2, $fa1, $fa0 movgr2fr.w $fa1, $a2 ffint.s.w $fa1, $fa1 @@ -174,12 +168,13 @@ gx_color_from_hsb: # @gx_color_from_hsb ori $a3, $a3, 2731 mul.d $a3, $a2, $a3 sub.d $a1, $a1, $a3 - pcalau12i $a3, %pc_hi20(.LCPI3_1) - fld.s $fa1, $a3, %pc_lo12(.LCPI3_1) bstrpick.d $a1, $a1, 15, 0 + movgr2fr.w $fa1, $a1 + ffint.s.w $fa1, $fa1 + lu12i.w $a1, 287402 + ori $a1, $a1, 3072 movgr2fr.w $fa3, $a1 - ffint.s.w $fa3, $fa3 - fdiv.s $fa3, $fa3, $fa1 + fdiv.s $fa3, $fa1, $fa3 vldi $vr4, -1168 fsub.s $fa1, $fa4, $fa5 fmul.s $fa1, $fa2, $fa1 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxpath.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxpath.s index 68d42431..f427c105 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxpath.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxpath.s @@ -810,36 +810,35 @@ gx_path_add_curve: # @gx_path_add_curve .Lfunc_end11: .size gx_path_add_curve, .Lfunc_end11-gx_path_add_curve # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gx_path_add_arc -.LCPI12_0: - .dword 0x3fe199999999999a # double 0.55000000000000004 -.LCPI12_1: - .dword 0x3fdccccccccccccc # double 0.44999999999999996 - .text - .globl gx_path_add_arc + .globl gx_path_add_arc # -- Begin function gx_path_add_arc .p2align 5 .type gx_path_add_arc,@function gx_path_add_arc: # @gx_path_add_arc # %bb.0: - pcalau12i $a7, %pc_hi20(.LCPI12_0) - fld.d $fa0, $a7, %pc_lo12(.LCPI12_0) move $a7, $a4 move $t0, $a3 - movgr2fr.d $fa1, $a5 - ffint.d.l $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.l.d $fa1, $fa1 - movfr2gr.d $a3, $fa1 - movgr2fr.d $fa1, $a6 - ffint.d.l $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.d $fa0, $a5 + ffint.d.l $fa0, $fa0 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1022 + movgr2fr.d $fa1, $a3 + fmul.d $fa0, $fa0, $fa1 + ftintrz.l.d $fa0, $fa0 + movfr2gr.d $a3, $fa0 + movgr2fr.d $fa0, $a6 + ffint.d.l $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.l.d $fa0, $fa0 - pcalau12i $a4, %pc_hi20(.LCPI12_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI12_1) movfr2gr.d $a4, $fa0 movgr2fr.d $fa0, $a1 ffint.d.l $fa0, $fa0 + lu12i.w $a1, -209716 + ori $a1, $a1, 3276 + lu32i.d $a1, -209716 + lu52i.d $a1, $a1, 1021 + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa0, $fa1 ftintrz.l.d $fa0, $fa0 movfr2gr.d $a1, $fa0 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxpath2.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxpath2.s index 63087a27..7add1b91 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxpath2.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxpath2.s @@ -629,29 +629,25 @@ gx_path_translate: # @gx_path_translate .Lfunc_end10: .size gx_path_translate, .Lfunc_end10-gx_path_translate # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gx_path_flatten -.LCPI11_0: - .dword 0x40b0000000000000 # double 4096 -.LCPI11_1: - .dword 0x3fdccccccccccccd # double 0.45000000000000001 - .text - .globl gx_path_flatten + .globl gx_path_flatten # -- Begin function gx_path_flatten .p2align 5 .type gx_path_flatten,@function gx_path_flatten: # @gx_path_flatten # %bb.0: - pcalau12i $a2, %pc_hi20(.LCPI11_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI11_0) + lu52i.d $a2, $zero, 1035 + movgr2fr.d $fa1, $a2 fmul.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 ftintrz.l.s $fa1, $fa0 - pcalau12i $a2, %pc_hi20(.LCPI11_1) - fld.d $fa2, $a2, %pc_lo12(.LCPI11_1) pcalau12i $a2, %pc_hi20(scaled_flat) fst.d $fa1, $a2, %pc_lo12(scaled_flat) fcvt.d.s $fa0, $fa0 - fmul.d $fa0, $fa0, $fa2 + lu12i.w $a2, -209716 + ori $a2, $a2, 3277 + lu32i.d $a2, -209716 + lu52i.d $a2, $a2, 1021 + movgr2fr.d $fa1, $a2 + fmul.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 pcalau12i $a2, %pc_hi20(scaled_flat_sq) fst.s $fa0, $a2, %pc_lo12(scaled_flat_sq) diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxstroke.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxstroke.s index 493dfc33..ce28e54f 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxstroke.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/gxstroke.s @@ -45,14 +45,7 @@ gx_stroke_fill: # @gx_stroke_fill .Lfunc_end0: .size gx_stroke_fill, .Lfunc_end0-gx_stroke_fill # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function stroke -.LCPI1_0: - .word 0x45800000 # float 4096 -.LCPI1_1: - .word 0x39800000 # float 2.44140625E-4 - .text - .globl stroke + .globl stroke # -- Begin function stroke .p2align 5 .type stroke,@function stroke: # @stroke @@ -83,7 +76,7 @@ stroke: # @stroke ld.d $a0, $a3, 24 st.d $a0, $sp, 128 # 8-byte Folded Spill fld.s $fa0, $a3, 0 - st.d $a3, $sp, 40 # 8-byte Folded Spill + st.d $a3, $sp, 32 # 8-byte Folded Spill ld.w $a0, $a3, 32 st.d $a0, $sp, 136 # 8-byte Folded Spill ld.d $a1, $a2, 40 @@ -101,9 +94,10 @@ stroke: # @stroke movgr2fr.w $fa0, $a0 beqz $s1, .LBB1_8 # %bb.2: # %.thread - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_0) - fmul.s $fs2, $fa0, $fa1 + lu12i.w $a0, 284672 + movgr2fr.w $fa1, $a0 + fmul.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 52 # 4-byte Folded Spill ori $a0, $zero, 1 st.d $a0, $sp, 56 # 8-byte Folded Spill ld.w $a0, $fp, 112 @@ -114,7 +108,8 @@ stroke: # @stroke lu12i.w $a0, 246333 ori $a0, $a0, 1802 st.w $a0, $sp, 656 - # implicit-def: $f26 + # implicit-def: $f0 + # kill: killed $f0 ld.w $a0, $fp, 112 beqz $a0, .LBB1_9 .LBB1_4: @@ -148,13 +143,14 @@ stroke: # @stroke fcmp.clt.s $fcc0, $fa2, $fa1 fsel $fa1, $fa2, $fa1, $fcc0 fmul.s $fa1, $fa1, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fa2, $a0, %pc_lo12(.LCPI1_0) - vldi $vr3, -1176 - fcmp.cule.s $fcc0, $fa3, $fa1 + vldi $vr2, -1176 + fcmp.cule.s $fcc0, $fa2, $fa1 movcf2gr $a0, $fcc0 st.d $a0, $sp, 56 # 8-byte Folded Spill - fmul.s $fs2, $fa0, $fa2 + lu12i.w $a0, 284672 + movgr2fr.w $fa1, $a0 + fmul.s $fa0, $fa0, $fa1 + fst.s $fa0, $sp, 52 # 4-byte Folded Spill ld.w $a0, $fp, 112 bnez $a0, .LBB1_4 .LBB1_9: @@ -173,10 +169,12 @@ stroke: # @stroke st.d $a0, $sp, 104 # 8-byte Folded Spill vrepli.b $vr0, 0 vst $vr0, $sp, 64 # 16-byte Folded Spill - st.d $fp, $sp, 32 # 8-byte Folded Spill + lu12i.w $a0, 235520 + movgr2fr.w $fs3, $a0 + st.d $fp, $sp, 24 # 8-byte Folded Spill st.d $s1, $sp, 88 # 8-byte Folded Spill - st.d $s2, $sp, 24 # 8-byte Folded Spill - st.d $s3, $sp, 16 # 8-byte Folded Spill + st.d $s2, $sp, 16 # 8-byte Folded Spill + st.d $s3, $sp, 8 # 8-byte Folded Spill b .LBB1_12 .p2align 4, , 16 .LBB1_11: # in Loop: Header=BB1_12 Depth=1 @@ -190,11 +188,11 @@ stroke: # @stroke # %bb.13: # %.lr.ph268.preheader # in Loop: Header=BB1_12 Depth=1 ld.d $s7, $s4, 24 - st.d $s4, $sp, 48 # 8-byte Folded Spill + st.d $s4, $sp, 40 # 8-byte Folded Spill ld.d $fp, $s4, 32 - ld.d $a0, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.bu $s6, $a0, 40 - fld.s $fs3, $a0, 48 + fld.s $fs4, $a0, 48 ld.w $s5, $a0, 44 move $s4, $zero move $s1, $zero @@ -231,14 +229,14 @@ stroke: # @stroke sub.d $a0, $s7, $s2 sub.d $a1, $fp, $s3 movgr2fr.d $fa0, $a0 - ffint.s.l $fs4, $fa0 - movgr2fr.d $fa0, $a1 ffint.s.l $fs5, $fa0 + movgr2fr.d $fa0, $a1 + ffint.s.l $fs6, $fa0 ld.d $a0, $sp, 88 # 8-byte Folded Reload beqz $a0, .LBB1_26 # %bb.20: # in Loop: Header=BB1_16 Depth=2 - fcvt.d.s $fa0, $fs4 - fcvt.d.s $fa1, $fs5 + fcvt.d.s $fa0, $fs5 + fcvt.d.s $fa1, $fs6 addi.d $a1, $sp, 144 ld.d $a0, $sp, 120 # 8-byte Folded Reload pcaddu18i $ra, %call36(gs_idtransform) @@ -280,8 +278,8 @@ stroke: # @stroke jirl $ra, $a4, 0 b .LBB1_14 .LBB1_26: # in Loop: Header=BB1_16 Depth=2 - fdiv.s $fa1, $fs4, $fs0 - fdiv.s $fa2, $fs5, $fs1 + fdiv.s $fa1, $fs5, $fs0 + fdiv.s $fa2, $fs6, $fs1 .LBB1_27: # in Loop: Header=BB1_16 Depth=2 fmul.s $fa0, $fa2, $fa2 fmadd.s $fa0, $fa1, $fa1, $fa0 @@ -289,7 +287,8 @@ stroke: # @stroke ld.d $a0, $sp, 56 # 8-byte Folded Reload beqz $a0, .LBB1_32 # %bb.28: # in Loop: Header=BB1_16 Depth=2 - fdiv.s $fa3, $fs2, $fa0 + fld.s $fa3, $sp, 52 # 4-byte Folded Reload + fdiv.s $fa3, $fa3, $fa0 fmul.s $fa1, $fa3, $fa1 fst.s $fa1, $sp, 144 fmul.s $fa2, $fa3, $fa2 @@ -387,14 +386,12 @@ stroke: # @stroke ld.d $a5, $sp, 136 # 8-byte Folded Reload beqz $a5, .LBB1_50 # %bb.36: # in Loop: Header=BB1_16 Depth=2 - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_1) - fmul.s $fs7, $fa0, $fa1 - fcmp.cule.s $fcc0, $fs7, $fs3 + fmul.s $fs2, $fa0, $fs3 + fcmp.cule.s $fcc0, $fs2, $fs4 bcnez $fcc0, .LBB1_52 # %bb.37: # %.lr.ph.preheader # in Loop: Header=BB1_16 Depth=2 - fmov.s $fs6, $fs7 + fmov.s $fs7, $fs2 b .LBB1_43 .p2align 4, , 16 .LBB1_38: # in Loop: Header=BB1_43 Depth=3 @@ -420,7 +417,7 @@ stroke: # @stroke move $s4, $zero .LBB1_42: # %.thread238 # in Loop: Header=BB1_43 Depth=3 - fsub.s $fs6, $fs6, $fs3 + fsub.s $fs7, $fs7, $fs4 addi.w $a0, $s5, 1 ld.d $a1, $sp, 136 # 8-byte Folded Reload xor $a1, $a0, $a1 @@ -428,8 +425,8 @@ stroke: # @stroke masknez $s5, $a0, $a1 slli.d $a0, $s5, 2 ld.d $a1, $sp, 128 # 8-byte Folded Reload - fldx.s $fs3, $a1, $a0 - fcmp.clt.s $fcc0, $fs3, $fs6 + fldx.s $fs4, $a1, $a0 + fcmp.clt.s $fcc0, $fs4, $fs7 bceqz $fcc0, .LBB1_53 .LBB1_43: # %.lr.ph # Parent Loop BB1_12 Depth=1 @@ -437,12 +434,12 @@ stroke: # @stroke # => This Inner Loop Header: Depth=3 move $a1, $s2 move $a0, $s3 - fdiv.s $fa0, $fs3, $fs7 - fmul.s $fa1, $fa0, $fs4 + fdiv.s $fa0, $fs4, $fs2 + fmul.s $fa1, $fa0, $fs5 ftintrz.l.s $fa1, $fa1 movfr2gr.d $a2, $fa1 add.d $s2, $s2, $a2 - fmul.s $fa0, $fa0, $fs5 + fmul.s $fa0, $fa0, $fs6 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a2, $fa0 add.d $s3, $s3, $a2 @@ -540,7 +537,7 @@ stroke: # @stroke st.d $a3, $sp, 608 b .LBB1_22 .LBB1_52: # in Loop: Header=BB1_16 Depth=2 - fmov.s $fs6, $fs7 + fmov.s $fs7, $fs2 .LBB1_53: # %._crit_edge # in Loop: Header=BB1_16 Depth=2 beqz $s6, .LBB1_59 @@ -571,7 +568,7 @@ stroke: # @stroke pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 .LBB1_59: # in Loop: Header=BB1_16 Depth=2 - fsub.s $fs3, $fs3, $fs6 + fsub.s $fs4, $fs4, $fs7 b .LBB1_15 .LBB1_60: # in Loop: Header=BB1_16 Depth=2 ld.d $a0, $sp, 616 @@ -610,14 +607,14 @@ stroke: # @stroke # in Loop: Header=BB1_12 Depth=1 beqz $s4, .LBB1_69 # %bb.63: # in Loop: Header=BB1_12 Depth=1 - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 40 # 8-byte Folded Reload ld.bu $a0, $a0, 56 - ld.d $fp, $sp, 32 # 8-byte Folded Reload + ld.d $fp, $sp, 24 # 8-byte Folded Reload beqz $a0, .LBB1_66 # %bb.64: # in Loop: Header=BB1_12 Depth=1 beqz $s6, .LBB1_66 # %bb.65: # in Loop: Header=BB1_12 Depth=1 - ld.d $a0, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.w $a0, $a0, 40 addi.d $a2, $sp, 152 bnez $a0, .LBB1_67 @@ -640,11 +637,11 @@ stroke: # @stroke .p2align 4, , 16 .LBB1_69: # in Loop: Header=BB1_12 Depth=1 move $s4, $s0 - ld.d $fp, $sp, 32 # 8-byte Folded Reload + ld.d $fp, $sp, 24 # 8-byte Folded Reload .LBB1_70: # %._crit_edge269.thread # in Loop: Header=BB1_12 Depth=1 - ld.d $s2, $sp, 24 # 8-byte Folded Reload - ld.d $s3, $sp, 16 # 8-byte Folded Reload + ld.d $s2, $sp, 16 # 8-byte Folded Reload + ld.d $s3, $sp, 8 # 8-byte Folded Reload ld.d $a0, $s2, %pc_lo12(stroke_path) bne $a0, $s3, .LBB1_11 .LBB1_71: # in Loop: Header=BB1_12 Depth=1 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/iscan.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/iscan.s index 499608f2..2de5fb0a 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/iscan.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/iscan.s @@ -1862,12 +1862,8 @@ scan_string: # @scan_string .word .LBB4_51-.LJTI4_0 .word .LBB4_47-.LJTI4_0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function scan_number -.LCPI5_0: - .dword 0x412e848000000000 # double 1.0E+6 .text - .globl scan_number + .globl scan_number # -- Begin function scan_number .p2align 5 .type scan_number,@function scan_number: # @scan_number @@ -2463,8 +2459,10 @@ scan_number: # @scan_number ori $a0, $zero, 7 bltu $s3, $a0, .LBB5_120 # %bb.118: # %.lr.ph277.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa0, $a0 ori $a0, $zero, 6 .p2align 4, , 16 .LBB5_119: # %.lr.ph277 @@ -2486,8 +2484,10 @@ scan_number: # @scan_number addi.w $a0, $zero, -7 bltu $a0, $s3, .LBB5_125 # %bb.123: # %.lr.ph271.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa0, $a0 addi.w $a0, $zero, -12 .LBB5_124: # %.lr.ph271 # =>This Inner Loop Header: Depth=1 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/iutil.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/iutil.s index 2d28b0c4..dc496605 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/iutil.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/iutil.s @@ -457,14 +457,7 @@ num_params: # @num_params .Lfunc_end6: .size num_params, .Lfunc_end6-num_params # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function real_param -.LCPI7_0: - .dword 0x3ff0000a7c5ac472 # double 1.0000100000000001 -.LCPI7_1: - .dword 0xbee4f8b588e368f1 # double -1.0000000000000001E-5 - .text - .globl real_param + .globl real_param # -- Begin function real_param .p2align 5 .type real_param,@function real_param: # @real_param @@ -479,26 +472,29 @@ real_param: # @real_param # %bb.2: ld.d $a0, $a0, 0 movgr2fr.d $fa0, $a0 - ffint.s.l $fa1, $fa0 + ffint.s.l $fa2, $fa0 beqz $a2, .LBB7_7 .LBB7_3: movgr2fr.w $fa0, $zero - fcmp.cule.s $fcc0, $fa0, $fa1 - fcvt.d.s $fa2, $fa1 + fcmp.cule.s $fcc0, $fa0, $fa2 + fcvt.d.s $fa1, $fa2 bcnez $fcc0, .LBB7_10 # %bb.4: - pcalau12i $a0, %pc_hi20(.LCPI7_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI7_1) - fcmp.clt.d $fcc0, $fa2, $fa1 + lu12i.w $a0, -487882 + ori $a0, $a0, 2289 + lu32i.d $a0, 325813 + lu52i.d $a0, $a0, -1042 + movgr2fr.d $fa2, $a0 + fcmp.clt.d $fcc0, $fa1, $fa2 bceqz $fcc0, .LBB7_8 # %bb.5: addi.w $a0, $zero, -15 ret .LBB7_6: - fld.s $fa1, $a0, 0 + fld.s $fa2, $a0, 0 bnez $a2, .LBB7_3 .LBB7_7: - fmov.s $fa0, $fa1 + fmov.s $fa0, $fa2 .LBB7_8: move $a0, $zero fst.s $fa0, $a1, 0 @@ -508,13 +504,16 @@ real_param: # @real_param ret .LBB7_10: vldi $vr0, -1168 - fcmp.cule.s $fcc0, $fa1, $fa0 - fmov.s $fa0, $fa1 + fcmp.cule.s $fcc0, $fa2, $fa0 + fmov.s $fa0, $fa2 bcnez $fcc0, .LBB7_8 # %bb.11: - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_0) - fcmp.clt.d $fcc0, $fa0, $fa2 + lu12i.w $a0, 509356 + ori $a0, $a0, 1138 + lu32i.d $a0, 10 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa0, $a0 + fcmp.clt.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB7_13 # %bb.12: addi.w $a0, $zero, -15 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/zarith.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/zarith.s index cb660e1c..56d638ab 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/zarith.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/zarith.s @@ -163,12 +163,7 @@ zdiv: # @zdiv .Lfunc_end1: .size zdiv, .Lfunc_end1-zdiv # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function zmul -.LCPI2_0: - .word 0xcf000000 # float -2.14748365E+9 - .text - .globl zmul + .globl zmul # -- Begin function zmul .p2align 5 .type zmul,@function zmul: # @zmul @@ -250,8 +245,9 @@ zmul: # @zmul fmul.s $fa0, $fa0, $fa1 bne $a1, $a2, .LBB2_10 # %bb.15: - pcalau12i $a2, %pc_hi20(.LCPI2_0) - fld.s $fa1, $a2, %pc_lo12(.LCPI2_0) + lu12i.w $a2, -200704 + lu32i.d $a2, 0 + movgr2fr.w $fa1, $a2 fcmp.ceq.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB2_10 .LBB2_16: diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/zmath.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/zmath.s index b1c498e4..bb71462a 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/zmath.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/zmath.s @@ -129,20 +129,7 @@ zarcsin: # @zarcsin .Lfunc_end3: .size zarcsin, .Lfunc_end3-zarcsin # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function zatan -.LCPI4_0: - .dword 0xb690000000000000 # double -7.0064923216240854E-46 -.LCPI4_1: - .dword 0x40667ffff0000000 # double 179.99999237060547 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI4_2: - .word 0xc3340000 # float -180 -.LCPI4_3: - .word 0x43340000 # float 180 - .text - .globl zatan + .globl zatan # -- Begin function zatan .p2align 5 .type zatan,@function zatan: # @zatan @@ -178,15 +165,15 @@ zatan: # @zatan jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(radians_to_degrees) fld.d $fa1, $a0, %pc_lo12(radians_to_degrees) - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI4_0) fmul.d $fa1, $fa0, $fa1 - fcmp.cule.d $fcc0, $fa2, $fa1 + lu52i.d $a0, $zero, -1175 + movgr2fr.d $fa0, $a0 + fcmp.cule.d $fcc0, $fa0, $fa1 fcvt.s.d $fa0, $fa1 bcnez $fcc0, .LBB4_8 # %bb.5: - pcalau12i $a0, %pc_hi20(.LCPI4_3) - fld.s $fa1, $a0, %pc_lo12(.LCPI4_3) + lu12i.w $a0, 275264 + movgr2fr.w $fa1, $a0 .p2align 4, , 16 .LBB4_6: # %.preheader # =>This Inner Loop Header: Depth=1 @@ -195,21 +182,24 @@ zatan: # @zatan bcnez $fcc0, .LBB4_6 b .LBB4_11 .LBB4_7: - pcalau12i $a0, %pc_hi20(.LCPI4_3) - fld.s $fa1, $a0, %pc_lo12(.LCPI4_3) fcmp.clt.s $fcc0, $fa0, $fs0 - fsel $fa0, $fs0, $fa1, $fcc0 + lu12i.w $a0, 275264 + movgr2fr.w $fa0, $a0 + fsel $fa0, $fs0, $fa0, $fcc0 b .LBB4_13 .LBB4_8: # %.preheader19 - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI4_1) + lu12i.w $a0, -65536 + lu32i.d $a0, 425983 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa2, $a0 fcmp.cult.d $fcc0, $fa1, $fa2 bcnez $fcc0, .LBB4_11 # %bb.9: - pcalau12i $a0, %pc_hi20(.LCPI4_2) - fld.s $fa1, $a0, %pc_lo12(.LCPI4_2) - pcalau12i $a0, %pc_hi20(.LCPI4_3) - fld.s $fa2, $a0, %pc_lo12(.LCPI4_3) + lu12i.w $a0, -249024 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 + lu12i.w $a0, 275264 + movgr2fr.w $fa2, $a0 .p2align 4, , 16 .LBB4_10: # %.lr.ph # =>This Inner Loop Header: Depth=1 @@ -220,8 +210,8 @@ zatan: # @zatan fcmp.cule.s $fcc0, $fs0, $fs1 bcnez $fcc0, .LBB4_13 # %bb.12: - pcalau12i $a0, %pc_hi20(.LCPI4_3) - fld.s $fa1, $a0, %pc_lo12(.LCPI4_3) + lu12i.w $a0, 275264 + movgr2fr.w $fa1, $a0 fadd.s $fa0, $fa0, $fa1 .LBB4_13: fst.s $fa0, $fp, -16 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/zmisc.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/zmisc.s index c45c88f5..c111faa5 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/zmisc.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/zmisc.s @@ -128,14 +128,7 @@ zbind: # @zbind .Lfunc_end0: .size zbind, .Lfunc_end0-zbind # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function zcurrenttime -.LCPI1_0: - .dword 0x40ed4c0000000000 # double 6.0E+4 -.LCPI1_1: - .dword 0x4096800000000000 # double 1440 - .text - .globl zcurrenttime + .globl zcurrenttime # -- Begin function zcurrenttime .p2align 5 .type zcurrenttime,@function zcurrenttime: # @zcurrenttime @@ -163,23 +156,27 @@ zcurrenttime: # @zcurrenttime addi.d $sp, $sp, 32 ret .LBB1_2: - move $a0, $zero ld.d $a1, $sp, 0 ld.d $a2, $sp, 8 - pcalau12i $a3, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI1_0) - movgr2fr.d $fa1, $a1 - movgr2fr.d $fa2, $a2 - ffint.d.l $fa2, $fa2 - fdiv.d $fa0, $fa2, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI1_1) - fld.d $fa2, $a1, %pc_lo12(.LCPI1_1) - ori $a1, $zero, 44 - st.h $a1, $fp, 24 + move $a0, $zero + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + movgr2fr.d $fa1, $a2 ffint.d.l $fa1, $fa1 - fmadd.d $fa0, $fa1, $fa2, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -177152 + lu52i.d $a1, $a1, 1038 + movgr2fr.d $fa2, $a1 + ori $a1, $zero, 0 + fdiv.d $fa1, $fa1, $fa2 + lu32i.d $a1, 425984 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fa2, $a1 + fmadd.d $fa0, $fa0, $fa2, $fa1 fcvt.s.d $fa0, $fa0 fst.s $fa0, $fp, 16 + ori $a1, $zero, 44 + st.h $a1, $fp, 24 ld.d $fp, $sp, 16 # 8-byte Folded Reload ld.d $ra, $sp, 24 # 8-byte Folded Reload addi.d $sp, $sp, 32 diff --git a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/ztype.s b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/ztype.s index e51b93b9..27c18194 100644 --- a/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/ztype.s +++ b/results/MultiSource/Benchmarks/MallocBench/gs/CMakeFiles/gs.dir/ztype.s @@ -449,16 +449,7 @@ zwcheck: # @zwcheck .Lfunc_end9: .size zwcheck, .Lfunc_end9-zwcheck # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function zcvi -.LCPI10_0: - .word 0x4f000000 # float 2.14748365E+9 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI10_1: - .dword 0xc1e0000000200000 # double -2147483649 - .text - .globl zcvi + .globl zcvi # -- Begin function zcvi .p2align 5 .type zcvi,@function zcvi: # @zcvi @@ -480,16 +471,17 @@ zcvi: # @zcvi # %bb.3: fld.s $fa0, $a0, 0 .LBB10_4: - pcalau12i $a1, %pc_hi20(.LCPI10_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI10_0) + lu12i.w $a1, 323584 + movgr2fr.w $fa1, $a1 fcmp.cle.s $fcc0, $fa1, $fa0 addi.w $a1, $zero, -15 bcnez $fcc0, .LBB10_16 # %bb.5: - pcalau12i $a2, %pc_hi20(.LCPI10_1) - fld.d $fa1, $a2, %pc_lo12(.LCPI10_1) - fcvt.d.s $fa2, $fa0 - fcmp.cle.d $fcc0, $fa2, $fa1 + fcvt.d.s $fa1, $fa0 + lu12i.w $a2, 512 + lu52i.d $a2, $a2, -994 + movgr2fr.d $fa2, $a2 + fcmp.cle.d $fcc0, $fa1, $fa2 bcnez $fcc0, .LBB10_16 # %bb.6: move $a1, $zero diff --git a/results/MultiSource/Benchmarks/McCat/04-bisect/CMakeFiles/bisect.dir/dbisect.s b/results/MultiSource/Benchmarks/McCat/04-bisect/CMakeFiles/bisect.dir/dbisect.s index 03e0f053..b5acd1ec 100644 --- a/results/MultiSource/Benchmarks/McCat/04-bisect/CMakeFiles/bisect.dir/dbisect.s +++ b/results/MultiSource/Benchmarks/McCat/04-bisect/CMakeFiles/bisect.dir/dbisect.s @@ -1,69 +1,50 @@ .file "dbisect.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function sturm -.LCPI0_0: - .dword 0x4330000000000000 # double 4503599627370496 .text - .globl sturm + .globl sturm # -- Begin function sturm .p2align 5 .type sturm,@function sturm: # @sturm # %bb.0: + move $a4, $zero blez $a0, .LBB0_6 # %bb.1: # %.lr.ph.preheader - pcalau12i $a4, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a4, %pc_lo12(.LCPI0_0) - move $a4, $zero vldi $vr3, -912 - movgr2fr.d $fa2, $zero + movgr2fr.d $fa1, $zero + lu52i.d $a5, $zero, 1075 + movgr2fr.d $fa2, $a5 b .LBB0_4 .p2align 4, , 16 .LBB0_2: # in Loop: Header=BB0_4 Depth=1 fld.d $fa3, $a2, 0 fabs.d $fa3, $fa3 - fmul.d $fa3, $fa3, $fa1 + fmul.d $fa3, $fa3, $fa2 .LBB0_3: # in Loop: Header=BB0_4 Depth=1 fld.d $fa4, $a1, 0 fsub.d $fa4, $fa4, $fa0 fsub.d $fa3, $fa4, $fa3 - fcmp.clt.d $fcc0, $fa3, $fa2 + fcmp.clt.d $fcc0, $fa3, $fa1 movcf2gr $a5, $fcc0 add.w $a4, $a4, $a5 addi.d $a2, $a2, 8 addi.d $a3, $a3, 8 addi.d $a0, $a0, -1 addi.d $a1, $a1, 8 - beqz $a0, .LBB0_7 + beqz $a0, .LBB0_6 .LBB0_4: # %.lr.ph # =>This Inner Loop Header: Depth=1 - fcmp.ceq.d $fcc0, $fa3, $fa2 + fcmp.ceq.d $fcc0, $fa3, $fa1 bcnez $fcc0, .LBB0_2 # %bb.5: # in Loop: Header=BB0_4 Depth=1 fld.d $fa4, $a3, 0 fdiv.d $fa3, $fa4, $fa3 b .LBB0_3 -.LBB0_6: - move $a4, $zero -.LBB0_7: # %._crit_edge +.LBB0_6: # %._crit_edge move $a0, $a4 ret .Lfunc_end0: .size sturm, .Lfunc_end0-sturm # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function dbisect -.LCPI1_0: - .dword 0xbff028f5c28f5c29 # double -1.01 -.LCPI1_1: - .dword 0x3ff028f5c28f5c29 # double 1.01 -.LCPI1_2: - .dword 0x3cb0000000000000 # double 2.2204460492503131E-16 -.LCPI1_3: - .dword 0x3cc0000000000000 # double 4.4408920985006262E-16 -.LCPI1_4: - .dword 0x4330000000000000 # double 4503599627370496 - .text - .globl dbisect + .globl dbisect # -- Begin function dbisect .p2align 5 .type dbisect,@function dbisect: # @dbisect @@ -82,39 +63,42 @@ dbisect: # @dbisect fst.d $fs1, $sp, 24 # 8-byte Folded Spill fst.d $fs2, $sp, 16 # 8-byte Folded Spill fst.d $fs3, $sp, 8 # 8-byte Folded Spill - move $fp, $a7 - move $s5, $a5 - move $s0, $a4 - move $s1, $a3 - move $s2, $a2 - move $s3, $a1 - move $s4, $a0 + move $fp, $a3 + move $s0, $a2 + move $s1, $a1 ld.d $s6, $sp, 112 st.d $zero, $a1, 0 st.d $zero, $a2, 0 - slli.d $a0, $a3, 3 - addi.d $a0, $a0, -8 - fldx.d $fa2, $s4, $a0 - fldx.d $fa3, $a1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa4, $a0, %pc_lo12(.LCPI1_0) - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_1) - fabs.d $fa3, $fa3 - fmadd.d $fs0, $fa3, $fa4, $fa2 + slli.d $a1, $a3, 3 + addi.d $a1, $a1, -8 + fldx.d $fa1, $s1, $a1 + move $s2, $a7 + move $s5, $a5 + move $s3, $a4 + move $s4, $a0 + fldx.d $fa2, $a0, $a1 + fabs.d $fa3, $fa1 + lu12i.w $a0, -251659 + ori $a0, $a0, 3113 + lu32i.d $a0, 10485 + lu52i.d $a1, $a0, -1025 + movgr2fr.d $fa1, $a1 + fmadd.d $fs0, $fa3, $fa1, $fa2 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 ori $a0, $zero, 2 fmadd.d $fs1, $fa3, $fa1, $fa2 blt $a3, $a0, .LBB1_3 # %bb.1: # %.lr.ph.preheader - addi.d $a0, $s1, -2 + addi.d $a0, $fp, -2 bstrpick.d $a0, $a0, 31, 0 slli.d $a0, $a0, 3 addi.w $a1, $zero, -8 .p2align 4, , 16 .LBB1_2: # %.lr.ph # =>This Inner Loop Header: Depth=1 - fldx.d $fa2, $s3, $a0 - add.d $a2, $s3, $a0 + fldx.d $fa2, $s1, $a0 + add.d $a2, $s1, $a0 fld.d $fa3, $a2, 8 fabs.d $fa2, $fa2 fldx.d $fa4, $s4, $a0 @@ -131,12 +115,12 @@ dbisect: # @dbisect bne $a0, $a1, .LBB1_2 .LBB1_3: # %._crit_edge fadd.d $fa1, $fs0, $fs1 + fneg.d $fa2, $fs0 movgr2fr.d $fs2, $zero - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fa2, $a0, %pc_lo12(.LCPI1_2) fcmp.clt.d $fcc0, $fs2, $fa1 - fneg.d $fa1, $fs0 - fsel $fa1, $fa1, $fs1, $fcc0 + fsel $fa1, $fa2, $fs1, $fcc0 + lu52i.d $a0, $zero, 971 + movgr2fr.d $fa2, $a0 fmul.d $fa1, $fa1, $fa2 fcmp.cult.d $fcc0, $fs2, $fa0 fsel $fs3, $fa1, $fa0, $fcc0 @@ -145,15 +129,15 @@ dbisect: # @dbisect vldi $vr1, -928 fmadd.d $fa0, $fs3, $fa1, $fa0 fst.d $fa0, $a6, 0 - addi.w $a0, $s1, 1 + addi.w $a0, $fp, 1 ori $a1, $zero, 8 pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 beqz $a0, .LBB1_35 # %bb.4: # %.preheader133 - bge $s5, $s0, .LBB1_7 + bge $s5, $s3, .LBB1_7 # %bb.5: # %._crit_edge141.thread - st.w $zero, $fp, 0 + st.w $zero, $s2, 0 .LBB1_6: # %._crit_edge155 fld.d $fs3, $sp, 8 # 8-byte Folded Reload fld.d $fs2, $sp, 16 # 8-byte Folded Reload @@ -172,7 +156,7 @@ dbisect: # @dbisect pcaddu18i $t8, %call36(free) jr $t8 .LBB1_7: # %.lr.ph140.preheader - sub.d $a1, $s5, $s0 + sub.d $a1, $s5, $s3 addi.d $a2, $a1, 1 ori $a3, $zero, 2 move $a1, $s5 @@ -209,31 +193,33 @@ dbisect: # @dbisect addi.d $a2, $a2, -1 addi.d $a3, $a3, -8 addi.d $a1, $a1, -8 - blt $s0, $a2, .LBB1_12 + blt $s3, $a2, .LBB1_12 .LBB1_13: # %.preheader.lr.ph move $a1, $zero - st.w $zero, $fp, 0 - bstrpick.d $a2, $s1, 31, 0 - alsl.d $a3, $s0, $a0, 3 - pcalau12i $a4, %pc_hi20(.LCPI1_3) - fld.d $fa0, $a4, %pc_lo12(.LCPI1_3) + st.w $zero, $s2, 0 + bstrpick.d $a2, $fp, 31, 0 + alsl.d $a3, $s3, $a0, 3 addi.d $a4, $s5, 1 alsl.d $a5, $s5, $a0, 3 + lu52i.d $a6, $zero, 972 + movgr2fr.d $fa0, $a6 vldi $vr1, -928 + lu52i.d $a6, $zero, 1075 + movgr2fr.d $fa2, $a6 b .LBB1_16 .p2align 4, , 16 .LBB1_14: # %._crit_edge149 # in Loop: Header=BB1_16 Depth=1 - st.w $a1, $fp, 0 + st.w $a1, $s2, 0 .LBB1_15: # %._crit_edge164 # in Loop: Header=BB1_16 Depth=1 alsl.d $a7, $a6, $s6, 3 - fmul.d $fa2, $fa3, $fa1 - fst.d $fa2, $a7, 0 + fmul.d $fa3, $fa4, $fa1 + fst.d $fa3, $a7, 0 addi.d $s5, $a6, -1 addi.d $a4, $a4, -1 addi.d $a5, $a5, -8 - bge $s0, $a6, .LBB1_6 + bge $s3, $a6, .LBB1_6 .LBB1_16: # %.preheader # =>This Loop Header: Depth=1 # Child Loop BB1_17 Depth 2 @@ -245,69 +231,67 @@ dbisect: # @dbisect .p2align 4, , 16 .LBB1_17: # Parent Loop BB1_16 Depth=1 # => This Inner Loop Header: Depth=2 - fld.d $fa2, $a7, 0 - fcmp.cule.d $fcc0, $fa2, $fs0 + fld.d $fa3, $a7, 0 + fcmp.cule.d $fcc0, $fa3, $fs0 bceqz $fcc0, .LBB1_20 # %bb.18: # in Loop: Header=BB1_17 Depth=2 addi.d $t0, $t0, -1 addi.d $a7, $a7, -8 - blt $s0, $t0, .LBB1_17 + blt $s3, $t0, .LBB1_17 # %bb.19: # in Loop: Header=BB1_16 Depth=1 - fmov.d $fa2, $fs0 + fmov.d $fa3, $fs0 .LBB1_20: # in Loop: Header=BB1_16 Depth=1 slli.d $a7, $a6, 3 - fldx.d $fa3, $s6, $a7 - fcmp.clt.d $fcc0, $fa3, $fs1 - fsel $fs1, $fs1, $fa3, $fcc0 - fsub.d $fa3, $fs1, $fa2 - fabs.d $fa4, $fa2 - fabs.d $fa5, $fs1 - fadd.d $fa4, $fa4, $fa5 - fmadd.d $fa4, $fa4, $fa0, $fs3 - fcmp.cule.d $fcc0, $fa3, $fa4 - fadd.d $fa3, $fa2, $fs1 + fldx.d $fa4, $s6, $a7 + fcmp.clt.d $fcc0, $fa4, $fs1 + fsel $fs1, $fs1, $fa4, $fcc0 + fsub.d $fa4, $fs1, $fa3 + fabs.d $fa5, $fa3 + fabs.d $fa6, $fs1 + fadd.d $fa5, $fa5, $fa6 + fmadd.d $fa5, $fa5, $fa0, $fs3 + fcmp.cule.d $fcc0, $fa4, $fa5 + fadd.d $fa4, $fa3, $fs1 bceqz $fcc0, .LBB1_23 b .LBB1_15 .p2align 4, , 16 .LBB1_21: # in Loop: Header=BB1_23 Depth=2 - fmov.d $fs1, $fa3 + fmov.d $fs1, $fa4 .LBB1_22: # in Loop: Header=BB1_23 Depth=2 addi.d $a1, $a1, 1 - fsub.d $fa3, $fs1, $fa2 - fabs.d $fa4, $fa2 - fabs.d $fa5, $fs1 - fadd.d $fa4, $fa4, $fa5 - fmadd.d $fa4, $fa4, $fa0, $fs3 - fcmp.clt.d $fcc0, $fa4, $fa3 - fadd.d $fa3, $fa2, $fs1 + fsub.d $fa4, $fs1, $fa3 + fabs.d $fa5, $fa3 + fabs.d $fa6, $fs1 + fadd.d $fa5, $fa5, $fa6 + fmadd.d $fa5, $fa5, $fa0, $fs3 + fcmp.clt.d $fcc0, $fa5, $fa4 + fadd.d $fa4, $fa3, $fs1 bceqz $fcc0, .LBB1_14 .LBB1_23: # %.lr.ph148 # Parent Loop BB1_16 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB1_27 Depth 3 - fmul.d $fa3, $fa3, $fa1 + fmul.d $fa4, $fa4, $fa1 move $a7, $zero - blez $s1, .LBB1_29 + blez $fp, .LBB1_29 # %bb.24: # %.lr.ph.i.preheader # in Loop: Header=BB1_23 Depth=2 - vldi $vr4, -912 + vldi $vr5, -912 move $t0, $s4 - move $t1, $s3 + move $t1, $s1 move $t2, $a2 - move $t3, $s2 + move $t3, $s0 b .LBB1_27 .p2align 4, , 16 .LBB1_25: # in Loop: Header=BB1_27 Depth=3 - fld.d $fa4, $t1, 0 - pcalau12i $t4, %pc_hi20(.LCPI1_4) - fld.d $fa5, $t4, %pc_lo12(.LCPI1_4) - fabs.d $fa4, $fa4 - fmul.d $fa4, $fa4, $fa5 + fld.d $fa5, $t1, 0 + fabs.d $fa5, $fa5 + fmul.d $fa5, $fa5, $fa2 .LBB1_26: # in Loop: Header=BB1_27 Depth=3 - fld.d $fa5, $t0, 0 - fsub.d $fa5, $fa5, $fa3 - fsub.d $fa4, $fa5, $fa4 - fcmp.clt.d $fcc0, $fa4, $fs2 + fld.d $fa6, $t0, 0 + fsub.d $fa6, $fa6, $fa4 + fsub.d $fa5, $fa6, $fa5 + fcmp.clt.d $fcc0, $fa5, $fs2 movcf2gr $t4, $fcc0 add.w $a7, $a7, $t4 addi.d $t3, $t3, 8 @@ -319,34 +303,34 @@ dbisect: # @dbisect # Parent Loop BB1_16 Depth=1 # Parent Loop BB1_23 Depth=2 # => This Inner Loop Header: Depth=3 - fcmp.ceq.d $fcc0, $fa4, $fs2 + fcmp.ceq.d $fcc0, $fa5, $fs2 bcnez $fcc0, .LBB1_25 # %bb.28: # in Loop: Header=BB1_27 Depth=3 - fld.d $fa5, $t3, 0 - fdiv.d $fa4, $fa5, $fa4 + fld.d $fa6, $t3, 0 + fdiv.d $fa5, $fa6, $fa5 b .LBB1_26 .p2align 4, , 16 .LBB1_29: # %sturm.exit # in Loop: Header=BB1_23 Depth=2 bge $a7, $a6, .LBB1_21 # %bb.30: # in Loop: Header=BB1_23 Depth=2 - bge $a7, $s0, .LBB1_32 + bge $a7, $s3, .LBB1_32 # %bb.31: # in Loop: Header=BB1_23 Depth=2 - fst.d $fa3, $a3, 0 + fst.d $fa4, $a3, 0 b .LBB1_34 .p2align 4, , 16 .LBB1_32: # in Loop: Header=BB1_23 Depth=2 slli.d $t0, $a7, 3 - fldx.d $fa2, $s6, $t0 + fldx.d $fa3, $s6, $t0 alsl.d $t0, $a7, $a0, 3 - fcmp.cule.d $fcc0, $fa2, $fa3 - fst.d $fa3, $t0, 8 + fcmp.cule.d $fcc0, $fa3, $fa4 + fst.d $fa4, $t0, 8 bcnez $fcc0, .LBB1_34 # %bb.33: # in Loop: Header=BB1_23 Depth=2 alsl.d $a7, $a7, $s6, 3 - fst.d $fa3, $a7, 0 + fst.d $fa4, $a7, 0 .LBB1_34: # in Loop: Header=BB1_23 Depth=2 - fmov.d $fa2, $fa3 + fmov.d $fa3, $fa4 b .LBB1_22 .LBB1_35: pcalau12i $a0, %got_pc_hi20(stderr) diff --git a/results/MultiSource/Benchmarks/McCat/05-eks/CMakeFiles/eks.dir/Divsol.s b/results/MultiSource/Benchmarks/McCat/05-eks/CMakeFiles/eks.dir/Divsol.s index e46f9696..35c65b0a 100644 --- a/results/MultiSource/Benchmarks/McCat/05-eks/CMakeFiles/eks.dir/Divsol.s +++ b/results/MultiSource/Benchmarks/McCat/05-eks/CMakeFiles/eks.dir/Divsol.s @@ -496,14 +496,7 @@ WeirdHouse: # @WeirdHouse .Lfunc_end2: .size WeirdHouse, .Lfunc_end2-WeirdHouse # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DivideAndSolve -.LCPI3_0: - .dword 0x4049000000000000 # double 50 -.LCPI3_1: - .dword 0x3ddb7cdfd9d7bdbb # double 1.0E-10 - .text - .globl DivideAndSolve + .globl DivideAndSolve # -- Begin function DivideAndSolve .p2align 5 .type DivideAndSolve,@function DivideAndSolve: # @DivideAndSolve @@ -550,16 +543,23 @@ DivideAndSolve: # @DivideAndSolve addi.d $a0, $s0, 16 st.d $a0, $sp, 16 # 8-byte Folded Spill vldi $vr2, -784 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fs3, $a0, %pc_lo12(.LCPI3_0) + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs3, $a0 movgr2fr.d $fs4, $zero + lu12i.w $a0, -156293 + ori $a0, $a0, 3515 + lu32i.d $a0, -295713 + lu52i.d $a0, $a0, 989 + movgr2fr.d $fs5, $a0 ori $s5, $zero, 51 ori $s3, $zero, 408 b .LBB3_3 .p2align 4, , 16 .LBB3_2: # %._crit_edge110 # in Loop: Header=BB3_3 Depth=1 - ftintrz.w.d $fa0, $fs5 + ftintrz.w.d $fa0, $fs6 movfr2gr.s $s8, $fa0 vldi $vr2, -784 bge $s8, $s5, .LBB3_37 @@ -583,7 +583,7 @@ DivideAndSolve: # @DivideAndSolve fadd.d $fa1, $fa1, $fa2 ftintrz.w.d $fa1, $fa1 movfr2gr.s $s1, $fa1 - fadd.d $fs5, $fs2, $fa0 + fadd.d $fs6, $fs2, $fa0 bge $s1, $s2, .LBB3_5 # %bb.4: # %._crit_edge108.thread # in Loop: Header=BB3_3 Depth=1 @@ -595,7 +595,7 @@ DivideAndSolve: # @DivideAndSolve .p2align 4, , 16 .LBB3_5: # %.lr.ph107 # in Loop: Header=BB3_3 Depth=1 - fadd.d $fa0, $fs5, $fa2 + fadd.d $fa0, $fs6, $fa2 fmin.d $fa0, $fa0, $fs3 ftintrz.w.d $fa0, $fa0 move $a1, $s2 @@ -1007,11 +1007,9 @@ DivideAndSolve: # @DivideAndSolve st.d $s8, $sp, 56 # 8-byte Folded Spill slli.d $s8, $a2, 3 fldx.d $fa1, $a1, $s8 - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI3_1) fldx.d $fa2, $a1, $s7 fsub.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fs1 + fmul.d $fa0, $fa0, $fs5 fcmp.cule.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB3_2 # %bb.34: # %.lr.ph109 @@ -1029,7 +1027,7 @@ DivideAndSolve: # @DivideAndSolve ld.d $a1, $s6, 0 fldx.d $fa0, $a0, $s7 fldx.d $fa1, $a1, $s8 - fldx.d $fs6, $a1, $s7 + fldx.d $fs1, $a1, $s7 fsub.d $fa0, $fa0, $fa1 vldi $vr2, -928 fmul.d $fs0, $fa0, $fa2 @@ -1039,7 +1037,7 @@ DivideAndSolve: # @DivideAndSolve jirl $ra, $ra, 0 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 - fmul.d $fa1, $fs6, $fs6 + fmul.d $fa1, $fs1, $fs1 ld.d $a0, $s1, 0 ld.d $a1, $s6, 0 fmadd.d $fa1, $fs0, $fs0, $fa1 @@ -1074,7 +1072,7 @@ DivideAndSolve: # @DivideAndSolve fldx.d $fa1, $a1, $s8 fldx.d $fa2, $a1, $s7 fsub.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fs1 + fmul.d $fa0, $fa0, $fs5 fcmp.clt.d $fcc0, $fa0, $fa2 bcnez $fcc0, .LBB3_35 b .LBB3_2 diff --git a/results/MultiSource/Benchmarks/McCat/05-eks/CMakeFiles/eks.dir/MM.s b/results/MultiSource/Benchmarks/McCat/05-eks/CMakeFiles/eks.dir/MM.s index 00ac0616..334df563 100644 --- a/results/MultiSource/Benchmarks/McCat/05-eks/CMakeFiles/eks.dir/MM.s +++ b/results/MultiSource/Benchmarks/McCat/05-eks/CMakeFiles/eks.dir/MM.s @@ -1,12 +1,6 @@ .file "MM.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function MakeMatrix -.LCPI0_0: - .dword 0xbfd47ae147ae147b # double -0.32000000000000001 -.LCPI0_1: - .dword 0x3fd46d04297691db # double 0.3191538243211462 .text - .globl MakeMatrix + .globl MakeMatrix # -- Begin function MakeMatrix .p2align 5 .type MakeMatrix,@function MakeMatrix: # @MakeMatrix @@ -32,12 +26,18 @@ MakeMatrix: # @MakeMatrix move $s0, $a0 move $s4, $zero move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_1) - movgr2fr.d $fs2, $zero + movgr2fr.d $fs0, $zero ori $s3, $zero, 408 + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, -1027 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, 169833 + ori $a0, $a0, 475 + lu32i.d $a0, 290052 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fs2, $a0 b .LBB0_2 .p2align 4, , 16 .LBB0_1: # in Loop: Header=BB0_2 Depth=1 @@ -65,16 +65,16 @@ MakeMatrix: # @MakeMatrix beq $s1, $s3, .LBB0_1 .LBB0_4: # Parent Loop BB0_2 Depth=1 # => This Inner Loop Header: Depth=2 - fmov.d $fa0, $fs2 + fmov.d $fa0, $fs0 blt $fp, $s7, .LBB0_3 # %bb.5: # in Loop: Header=BB0_4 Depth=2 movgr2fr.w $fa0, $s8 ffint.d.w $fa0, $fa0 - fmul.d $fa1, $fa0, $fs0 + fmul.d $fa1, $fa0, $fs1 fmul.d $fa0, $fa1, $fa0 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 - fmul.d $fa0, $fa0, $fs1 + fmul.d $fa0, $fa0, $fs2 b .LBB0_3 .LBB0_6: move $a0, $s0 diff --git a/results/MultiSource/Benchmarks/McCat/05-eks/CMakeFiles/eks.dir/QRfact.s b/results/MultiSource/Benchmarks/McCat/05-eks/CMakeFiles/eks.dir/QRfact.s index 86c4c186..65a3bbae 100644 --- a/results/MultiSource/Benchmarks/McCat/05-eks/CMakeFiles/eks.dir/QRfact.s +++ b/results/MultiSource/Benchmarks/McCat/05-eks/CMakeFiles/eks.dir/QRfact.s @@ -83,12 +83,7 @@ ApplyRGivens: # @ApplyRGivens .Lfunc_end2: .size ApplyRGivens, .Lfunc_end2-ApplyRGivens # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function QRiterate -.LCPI3_0: - .dword 0x3ddb7cdfd9d7bdbb # double 1.0E-10 - .text - .globl QRiterate + .globl QRiterate # -- Begin function QRiterate .p2align 5 .type QRiterate,@function QRiterate: # @QRiterate @@ -108,8 +103,11 @@ QRiterate: # @QRiterate move $t7, $zero ld.d $a2, $a0, 0 addi.d $a3, $a0, 392 - pcalau12i $a4, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a4, %pc_lo12(.LCPI3_0) + lu12i.w $a4, -156293 + ori $a4, $a4, 3515 + lu32i.d $a4, -295713 + lu52i.d $a4, $a4, 989 + movgr2fr.d $fa0, $a4 ori $t8, $zero, 50 movgr2fr.d $fa1, $zero ori $a5, $zero, 1 diff --git a/results/MultiSource/Benchmarks/McCat/08-main/CMakeFiles/main.dir/main.s b/results/MultiSource/Benchmarks/McCat/08-main/CMakeFiles/main.dir/main.s index 4e1d3d34..d94b8da9 100644 --- a/results/MultiSource/Benchmarks/McCat/08-main/CMakeFiles/main.dir/main.s +++ b/results/MultiSource/Benchmarks/McCat/08-main/CMakeFiles/main.dir/main.s @@ -1,14 +1,6 @@ .file "main.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function MakeSphere -.LCPI0_0: - .dword 0x401921fb54442eea # double 6.2831853071800001 -.LCPI0_1: - .dword 0xc00921fb54442eea # double -3.1415926535900001 -.LCPI0_2: - .dword 0x400921fbda7bebf0 # double 3.1415936535900002 .text - .globl MakeSphere + .globl MakeSphere # -- Begin function MakeSphere .p2align 5 .type MakeSphere,@function MakeSphere: # @MakeSphere @@ -24,29 +16,35 @@ MakeSphere: # @MakeSphere fst.d $fs5, $sp, 128 # 8-byte Folded Spill fst.d $fs6, $sp, 120 # 8-byte Folded Spill fst.d $fs7, $sp, 112 # 8-byte Folded Spill - pcalau12i $a3, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a3, %pc_lo12(.LCPI0_0) fmov.d $fs0, $fa0 move $fp, $a0 movgr2fr.w $fa0, $a1 ffint.d.w $fa0, $fa0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3818 + lu32i.d $a0, -450053 + lu52i.d $a1, $a0, 1025 + movgr2fr.d $fa1, $a1 fdiv.d $fa0, $fa1, $fa0 fst.d $fa0, $sp, 8 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fs6, $a0, %pc_lo12(.LCPI0_1) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fs7, $a0, %pc_lo12(.LCPI0_2) movgr2fr.w $fa0, $a2 ffint.d.w $fa0, $fa0 - fdiv.d $fs5, $fa1, $fa0 - fmov.d $fs1, $fs6 + fdiv.d $fs6, $fa1, $fa0 + lu52i.d $a0, $a0, -1024 + movgr2fr.d $fs7, $a0 + lu12i.w $a0, -153666 + ori $a0, $a0, 3056 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fs5, $a0 + fmov.d $fs1, $fs7 .p2align 4, , 16 .LBB0_1: # %.preheader # =>This Loop Header: Depth=1 # Child Loop BB0_2 Depth 2 fld.d $fa0, $sp, 8 # 8-byte Folded Reload fadd.d $fs2, $fa0, $fs1 - fmov.d $fs3, $fs6 + fmov.d $fs3, $fs7 .p2align 4, , 16 .LBB0_2: # Parent Loop BB0_1 Depth=1 # => This Inner Loop Header: Depth=2 @@ -100,7 +98,7 @@ MakeSphere: # @MakeSphere jirl $ra, $ra, 0 fmul.d $fa0, $fs0, $fa0 fst.d $fa0, $sp, 56 - fadd.d $fs3, $fs5, $fs3 + fadd.d $fs3, $fs6, $fs3 fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 @@ -157,10 +155,10 @@ MakeSphere: # @MakeSphere move $a3, $zero pcaddu18i $ra, %call36(InsertPoly4) jirl $ra, $ra, 0 - fcmp.clt.d $fcc0, $fs3, $fs7 + fcmp.clt.d $fcc0, $fs3, $fs5 bcnez $fcc0, .LBB0_2 # %bb.3: # in Loop: Header=BB0_1 Depth=1 - fcmp.clt.d $fcc0, $fs2, $fs7 + fcmp.clt.d $fcc0, $fs2, $fs5 fmov.d $fs1, $fs2 bcnez $fcc0, .LBB0_1 # %bb.4: diff --git a/results/MultiSource/Benchmarks/McCat/08-main/CMakeFiles/main.dir/trig.s b/results/MultiSource/Benchmarks/McCat/08-main/CMakeFiles/main.dir/trig.s index 5a4ccf0b..6ffa91ce 100644 --- a/results/MultiSource/Benchmarks/McCat/08-main/CMakeFiles/main.dir/trig.s +++ b/results/MultiSource/Benchmarks/McCat/08-main/CMakeFiles/main.dir/trig.s @@ -426,12 +426,7 @@ TranslateMatrix: # @TranslateMatrix .Lfunc_end4: .size TranslateMatrix, .Lfunc_end4-TranslateMatrix # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function RotateMatrix -.LCPI5_0: - .dword 0x3f91df46a2529e84 # double 0.017453292519944444 - .text - .globl RotateMatrix + .globl RotateMatrix # -- Begin function RotateMatrix .p2align 5 .type RotateMatrix,@function RotateMatrix: # @RotateMatrix @@ -449,10 +444,13 @@ RotateMatrix: # @RotateMatrix fst.d $fs3, $sp, 48 # 8-byte Folded Spill fst.d $fs4, $sp, 40 # 8-byte Folded Spill fst.d $fs5, $sp, 32 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fs5, $a0, %pc_lo12(.LCPI5_0) fmov.d $fs3, $fa2 fmov.d $fs2, $fa1 + lu12i.w $a0, -383703 + ori $a0, $a0, 3716 + lu32i.d $a0, 122694 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fs5, $a0 fmul.d $fs1, $fa0, $fs5 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(cos) diff --git a/results/MultiSource/Benchmarks/McCat/09-vor/CMakeFiles/vor.dir/vor.s b/results/MultiSource/Benchmarks/McCat/09-vor/CMakeFiles/vor.dir/vor.s index 9deef7dc..da39d827 100644 --- a/results/MultiSource/Benchmarks/McCat/09-vor/CMakeFiles/vor.dir/vor.s +++ b/results/MultiSource/Benchmarks/McCat/09-vor/CMakeFiles/vor.dir/vor.s @@ -132,14 +132,7 @@ add_point: # @add_point .Lfunc_end3: .size add_point, .Lfunc_end3-add_point # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function compute_v -.LCPI4_0: - .dword 0xc0c3880000000000 # double -1.0E+4 -.LCPI4_1: - .dword 0x40c3880000000000 # double 1.0E+4 - .text - .globl compute_v + .globl compute_v # -- Begin function compute_v .p2align 5 .type compute_v,@function compute_v: # @compute_v @@ -183,10 +176,12 @@ compute_v: # @compute_v # %bb.1: bge $s2, $s4, .LBB4_4 # %bb.2: - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI4_1) + ori $s0, $zero, 0 ori $a1, $zero, 0 lu32i.d $a1, 1 + lu32i.d $s0, 231424 + lu52i.d $a0, $s0, 1036 + movgr2fr.d $fs1, $a0 move $a0, $fp fmov.d $fs0, $fa0 fmov.d $fa1, $fs1 @@ -196,8 +191,8 @@ compute_v: # @compute_v fcmp.cule.d $fcc0, $fa2, $fs1 bcnez $fcc0, .LBB4_21 # %bb.3: - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_0) + lu52i.d $a0, $s0, -1012 + movgr2fr.d $fa1, $a0 ori $a1, $zero, 1 move $a0, $fp fmov.d $fa0, $fs0 @@ -207,21 +202,25 @@ compute_v: # @compute_v # %bb.5: bne $s2, $s4, .LBB4_8 # %bb.6: - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_1) -.LBB4_7: + ori $a0, $zero, 0 ori $a1, $zero, 0 lu32i.d $a1, 1 + lu32i.d $a0, 231424 + lu52i.d $a0, $a0, 1036 +.LBB4_7: + movgr2fr.d $fa1, $a0 b .LBB4_19 .LBB4_8: bge $s3, $s5, .LBB4_12 # %bb.9: bge $s4, $s2, .LBB4_12 # %bb.10: - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI4_1) + ori $a0, $zero, 0 ori $a1, $zero, 0 lu32i.d $a1, 1 + lu32i.d $a0, 231424 + lu52i.d $a0, $a0, 1036 + movgr2fr.d $fs0, $a0 move $a0, $fp fmov.d $fs1, $fa0 fmov.d $fa1, $fs0 @@ -241,17 +240,20 @@ compute_v: # @compute_v # %bb.13: bge $s2, $s4, .LBB4_15 # %bb.14: - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_0) + ori $a0, $zero, 0 + lu32i.d $a0, 231424 + lu52i.d $a0, $a0, -1012 b .LBB4_18 .LBB4_15: bne $s3, $s5, .LBB4_22 # %bb.16: bge $s4, $s2, .LBB4_22 # %bb.17: - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_1) + ori $a0, $zero, 0 + lu32i.d $a0, 231424 + lu52i.d $a0, $a0, 1036 .LBB4_18: + movgr2fr.d $fa1, $a0 ori $a1, $zero, 1 .LBB4_19: move $a0, $fp @@ -281,19 +283,21 @@ compute_v: # @compute_v # %bb.23: bge $s2, $s4, .LBB4_25 # %bb.24: - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI4_0) + ori $s0, $zero, 0 ori $a1, $zero, 0 lu32i.d $a1, 1 + lu32i.d $s0, 231424 + lu52i.d $a0, $s0, -1012 + movgr2fr.d $fs0, $a0 move $a0, $fp fmov.d $fs1, $fa0 fmov.d $fa1, $fs0 pcaddu18i $ra, %call36(intersect) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI4_1) - fabs.d $fa3, $fa0 - fcmp.cule.d $fcc0, $fa3, $fa2 + fabs.d $fa2, $fa0 + lu52i.d $a0, $s0, 1036 + movgr2fr.d $fa3, $a0 + fcmp.cule.d $fcc0, $fa2, $fa3 bceqz $fcc0, .LBB4_11 b .LBB4_21 .LBB4_25: @@ -301,25 +305,30 @@ compute_v: # @compute_v # %bb.26: bne $s2, $s4, .LBB4_28 # %bb.27: - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_0) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 1 + lu32i.d $a0, 231424 + lu52i.d $a0, $a0, -1012 b .LBB4_7 .LBB4_28: bge $s5, $s3, .LBB4_32 # %bb.29: bge $s4, $s2, .LBB4_32 # %bb.30: - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_0) + ori $s0, $zero, 0 ori $a1, $zero, 0 lu32i.d $a1, 1 + lu32i.d $s0, 231424 + lu52i.d $a0, $s0, -1012 + movgr2fr.d $fa1, $a0 move $a0, $fp fmov.d $fs0, $fa0 pcaddu18i $ra, %call36(intersect) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI4_1) fabs.d $fa3, $fa0 + lu52i.d $a0, $s0, 1036 + movgr2fr.d $fa2, $a0 fcmp.cule.d $fcc0, $fa3, $fa2 bcnez $fcc0, .LBB4_21 # %bb.31: diff --git a/results/MultiSource/Benchmarks/McCat/18-imp/CMakeFiles/imp.dir/L_canny.s b/results/MultiSource/Benchmarks/McCat/18-imp/CMakeFiles/imp.dir/L_canny.s index 6de755a0..dfcd73a3 100644 --- a/results/MultiSource/Benchmarks/McCat/18-imp/CMakeFiles/imp.dir/L_canny.s +++ b/results/MultiSource/Benchmarks/McCat/18-imp/CMakeFiles/imp.dir/L_canny.s @@ -170,12 +170,7 @@ L_canny: # @L_canny .Lfunc_end0: .size L_canny, .Lfunc_end0-L_canny # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function GaussianMask -.LCPI1_0: - .dword 0x3ff6a09e667f3bcd # double 1.4142135623730951 - .text - .globl GaussianMask + .globl GaussianMask # -- Begin function GaussianMask .p2align 5 .type GaussianMask,@function GaussianMask: # @GaussianMask @@ -240,10 +235,13 @@ GaussianMask: # @GaussianMask addi.d $sp, $sp, 96 ret .LBB1_5: # %.lr.ph - pcalau12i $a2, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI1_0) - fcvt.d.s $fa1, $fs0 - fmul.d $fs1, $fa1, $fa0 + fcvt.d.s $fa0, $fs0 + lu12i.w $a2, 419827 + ori $a2, $a2, 3021 + lu32i.d $a2, 434334 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa1, $a2 + fmul.d $fs1, $fa0, $fa1 vldi $vr1, -928 move $s4, $s1 b .LBB1_8 @@ -303,14 +301,7 @@ GaussianMask: # @GaussianMask .Lfunc_end1: .size GaussianMask, .Lfunc_end1-GaussianMask # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DGaussianMask -.LCPI2_0: - .dword 0x4026a09e667f3bcd # double 11.313708498984761 -.LCPI2_1: - .dword 0x40040d931ff62705 # double 2.5066282746310002 - .text - .globl DGaussianMask + .globl DGaussianMask # -- Begin function DGaussianMask .p2align 5 .type DGaussianMask,@function DGaussianMask: # @DGaussianMask @@ -329,14 +320,17 @@ DGaussianMask: # @DGaussianMask fst.d $fs3, $sp, 24 # 8-byte Folded Spill fst.d $fs4, $sp, 16 # 8-byte Folded Spill fst.d $fs5, $sp, 8 # 8-byte Folded Spill - pcalau12i $a3, %pc_hi20(.LCPI2_0) - fld.d $fa1, $a3, %pc_lo12(.LCPI2_0) move $s3, $a2 move $fp, $a1 move $s0, $a0 fcvt.s.d $fa0, $fa0 fcvt.d.s $fs0, $fa0 - fmul.d $fa0, $fs0, $fa1 + lu12i.w $a0, 419827 + ori $a0, $a0, 3021 + lu32i.d $a0, 434334 + lu52i.d $a0, $a0, 1026 + movgr2fr.d $fa0, $a0 + fmul.d $fa0, $fs0, $fa0 ftintrz.l.d $fa0, $fa0 movfr2gr.d $a0, $fa0 ori $s2, $a0, 1 @@ -383,10 +377,13 @@ DGaussianMask: # @DGaussianMask addi.d $sp, $sp, 112 ret .LBB2_5: # %.lr.ph - pcalau12i $a2, %pc_hi20(.LCPI2_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI2_1) - fadd.d $fa1, $fs0, $fs0 - fmul.d $fs1, $fa1, $fs0 + fadd.d $fa0, $fs0, $fs0 + fmul.d $fs1, $fa0, $fs0 + lu12i.w $a2, 130914 + ori $a2, $a2, 1797 + lu32i.d $a2, 265619 + lu52i.d $a2, $a2, 1024 + movgr2fr.d $fa0, $a2 fmul.d $fs2, $fs0, $fa0 vldi $vr0, -784 fdiv.d $fa0, $fa0, $fs2 @@ -686,20 +683,7 @@ dfilter: # @dfilter .Lfunc_end3: .size dfilter, .Lfunc_end3-dfilter # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function dnon_max -.LCPI4_0: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI4_1: - .dword 0x3fd999999999999a # double 0.40000000000000002 -.LCPI4_2: - .dword 0xbfd999999999999a # double -0.40000000000000002 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI4_3: - .word 0x437f0000 # float 255 - .text - .globl dnon_max + .globl dnon_max # -- Begin function dnon_max .p2align 5 .type dnon_max,@function dnon_max: # @dnon_max @@ -738,17 +722,23 @@ dnon_max: # @dnon_max add.d $a6, $s1, $t0 alsl.d $a7, $s0, $fp, 3 add.d $t0, $a0, $t0 - pcalau12i $t1, %pc_hi20(.LCPI4_0) - fld.d $fa0, $t1, %pc_lo12(.LCPI4_0) - pcalau12i $t1, %pc_hi20(.LCPI4_1) - fld.d $fa1, $t1, %pc_lo12(.LCPI4_1) - pcalau12i $t1, %pc_hi20(.LCPI4_2) - fld.d $fa2, $t1, %pc_lo12(.LCPI4_2) - pcalau12i $t1, %pc_hi20(.LCPI4_3) - fld.s $fa3, $t1, %pc_lo12(.LCPI4_3) ori $t1, $zero, 1 - movgr2fr.w $fa4, $zero - vldi $vr5, -912 + lu12i.w $t2, -122104 + ori $t2, $t2, 3130 + lu32i.d $t2, 358798 + lu52i.d $t2, $t2, 996 + movgr2fr.d $fa0, $t2 + movgr2fr.w $fa1, $zero + lu12i.w $t2, -419431 + ori $t2, $t2, 2458 + lu32i.d $t2, -419431 + lu52i.d $t3, $t2, 1021 + movgr2fr.d $fa2, $t3 + vldi $vr3, -912 + lu52i.d $t2, $t2, -1027 + movgr2fr.d $fa4, $t2 + lu12i.w $t2, 276464 + movgr2fr.w $fa5, $t2 b .LBB4_4 .p2align 4, , 16 .LBB4_3: # %._crit_edge.us @@ -784,11 +774,11 @@ dnon_max: # @dnon_max # %bb.8: # in Loop: Header=BB4_7 Depth=2 fldx.s $fa7, $a6, $t2 fdiv.s $fa6, $fa7, $fa6 - fcmp.cult.s $fcc0, $fa6, $fa4 + fcmp.cult.s $fcc0, $fa6, $fa1 fcvt.d.s $fa7, $fa6 bcnez $fcc0, .LBB4_12 # %bb.9: # in Loop: Header=BB4_7 Depth=2 - fcmp.clt.d $fcc0, $fa7, $fa1 + fcmp.clt.d $fcc0, $fa7, $fa2 bceqz $fcc0, .LBB4_12 # %bb.10: # in Loop: Header=BB4_7 Depth=2 add.d $t4, $a7, $t2 @@ -796,7 +786,7 @@ dnon_max: # @dnon_max fld.s $ft1, $t4, 4 fmul.s $ft0, $fa6, $ft0 fcvt.d.s $ft0, $ft0 - fsub.d $fa7, $fa5, $fa7 + fsub.d $fa7, $fa3, $fa7 fcvt.d.s $ft1, $ft1 fldx.s $ft2, $fp, $t2 fmadd.d $ft0, $fa7, $ft1, $ft0 @@ -814,12 +804,12 @@ dnon_max: # @dnon_max b .LBB4_16 .p2align 4, , 16 .LBB4_12: # in Loop: Header=BB4_7 Depth=2 - fcmp.cult.s $fcc0, $fa4, $fa6 - fmov.s $ft0, $fa4 + fcmp.cult.s $fcc0, $fa1, $fa6 + fmov.s $ft0, $fa1 bcnez $fcc0, .LBB4_5 # %bb.13: # in Loop: Header=BB4_7 Depth=2 - fcmp.cule.d $fcc0, $fa7, $fa2 - fmov.s $ft0, $fa4 + fcmp.cule.d $fcc0, $fa7, $fa4 + fmov.s $ft0, $fa1 bcnez $fcc0, .LBB4_5 # %bb.14: # in Loop: Header=BB4_7 Depth=2 fldx.s $ft0, $a7, $t2 @@ -830,7 +820,7 @@ dnon_max: # @dnon_max fld.s $ft1, $t4, 4 add.d $t4, $fp, $t2 fld.s $ft2, $t4, 8 - fadd.d $fa7, $fa7, $fa5 + fadd.d $fa7, $fa7, $fa3 fcvt.d.s $ft1, $ft1 fmadd.d $ft0, $fa7, $ft1, $ft0 fneg.s $ft1, $ft2 @@ -853,7 +843,7 @@ dnon_max: # @dnon_max fcvt.d.s $fa6, $fa7 fadd.d $fa6, $fa6, $fa0 fcmp.cule.d $fcc0, $ft0, $fa6 - fmov.s $ft0, $fa3 + fmov.s $ft0, $fa5 bceqz $fcc0, .LBB4_5 b .LBB4_6 .LBB4_18: # %._crit_edge104 diff --git a/results/MultiSource/Benchmarks/McCat/18-imp/CMakeFiles/imp.dir/textloc.s b/results/MultiSource/Benchmarks/McCat/18-imp/CMakeFiles/imp.dir/textloc.s index 21c1f18e..8651e1f3 100644 --- a/results/MultiSource/Benchmarks/McCat/18-imp/CMakeFiles/imp.dir/textloc.s +++ b/results/MultiSource/Benchmarks/McCat/18-imp/CMakeFiles/imp.dir/textloc.s @@ -92,12 +92,7 @@ ConvertToFloat: # @ConvertToFloat .Lfunc_end0: .size ConvertToFloat, .Lfunc_end0-ConvertToFloat # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function HorzVariance -.LCPI1_0: - .word 0x437f0000 # float 255 - .text - .globl HorzVariance + .globl HorzVariance # -- Begin function HorzVariance .p2align 5 .type HorzVariance,@function HorzVariance: # @HorzVariance @@ -141,11 +136,11 @@ HorzVariance: # @HorzVariance bstrpick.d $a4, $a4, 31, 0 add.d $a5, $a5, $a6 addi.d $a5, $a5, 10 - pcalau12i $a6, %pc_hi20(.LCPI1_0) - fld.s $fa2, $a6, %pc_lo12(.LCPI1_0) + movgr2fr.w $fa2, $zero addi.d $a6, $fp, 1 sltui $a6, $a6, 1 - movgr2fr.w $fa3, $zero + lu12i.w $a7, 276464 + movgr2fr.w $fa3, $a7 movgr2cf $fcc0, $a6 .p2align 4, , 16 .LBB1_4: # %.preheader.us @@ -165,7 +160,7 @@ HorzVariance: # @HorzVariance add.d $t1, $a7, $a6 move $t2, $a2 move $t3, $t0 - fmov.s $fa5, $fa3 + fmov.s $fa5, $fa2 .p2align 4, , 16 .LBB1_6: # Parent Loop BB1_4 Depth=1 # Parent Loop BB1_5 Depth=2 @@ -202,7 +197,7 @@ HorzVariance: # @HorzVariance # in Loop: Header=BB1_5 Depth=2 fdiv.s $fa4, $fa4, $fa0 fcmp.cult.s $fcc1, $fa1, $fa4 - fsel $fa5, $fa3, $fa2, $fcc1 + fsel $fa5, $fa2, $fa3, $fcc1 fsel $fa4, $fa5, $fa4, $fcc0 fst.s $fa4, $t1, 0 addi.d $a7, $a7, 1 @@ -236,10 +231,9 @@ HorzVariance: # @HorzVariance addi.d $a6, $a0, 56 slli.d $a7, $s2, 2 ori $t0, $zero, 8 - pcalau12i $t1, %pc_hi20(.LCPI1_0) - fld.s $fa4, $t1, %pc_lo12(.LCPI1_0) - movgr2fr.w $fa5, $zero lu12i.w $t1, 276464 + movgr2fr.w $fa4, $t1 + movgr2fr.w $fa5, $zero vreplgr2vr.w $vr6, $t1 b .LBB1_15 .p2align 4, , 16 diff --git a/results/MultiSource/Benchmarks/MiBench/automotive-basicmath/CMakeFiles/automotive-basicmath.dir/basicmath.s b/results/MultiSource/Benchmarks/MiBench/automotive-basicmath/CMakeFiles/automotive-basicmath.dir/basicmath.s index 56f41f57..a03716c8 100644 --- a/results/MultiSource/Benchmarks/MiBench/automotive-basicmath/CMakeFiles/automotive-basicmath.dir/basicmath.s +++ b/results/MultiSource/Benchmarks/MiBench/automotive-basicmath/CMakeFiles/automotive-basicmath.dir/basicmath.s @@ -1,97 +1,46 @@ .file "basicmath.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x4040000000000000 # double 32 -.LCPI0_1: - .dword 0xc02b666666666666 # double -13.699999999999999 -.LCPI0_2: - .dword 0xc041800000000000 # double -35 -.LCPI0_3: - .dword 0x4028ae147ae147ae # double 12.34 -.LCPI0_4: - .dword 0xc050f8f5c28f5c29 # double -67.890000000000001 -.LCPI0_5: - .dword 0xc03799999999999a # double -23.600000000000001 -.LCPI0_6: - .dword 0x4046800000000000 # double 45 -.LCPI0_7: - .dword 0x4021570a3d70a3d7 # double 8.6699999999999999 -.LCPI0_8: - .dword 0x4041000000000000 # double 34 -.LCPI0_9: - .dword 0xbffb333333333333 # double -1.7 -.LCPI0_10: - .dword 0x4015333333333333 # double 5.2999999999999998 -.LCPI0_11: - .dword 0xbff7374bc6a7ef9e # double -1.4510000000000001 -.LCPI0_12: - .dword 0xbffe6e978d4fdf3c # double -1.9020000000000001 -.LCPI0_13: - .dword 0xc002d2f1a9fbe76d # double -2.3530000000000002 -.LCPI0_14: - .dword 0xc0066e978d4fdf3c # double -2.8040000000000003 -.LCPI0_15: - .dword 0xc00a0a3d70a3d70b # double -3.2550000000000003 -.LCPI0_16: - .dword 0xc00da5e353f7ceda # double -3.7060000000000004 -.LCPI0_17: - .dword 0xc010a0c49ba5e354 # double -4.157 -.LCPI0_18: - .dword 0xc0126e978d4fdf3b # double -4.6079999999999997 -.LCPI0_19: - .dword 0x3fe3851eb851eb85 # double 0.60999999999999999 -.LCPI0_20: - .dword 0x400921fb54442d18 # double 3.1415926535897931 -.LCPI0_21: - .dword 0x4066800000000000 # double 180 -.LCPI0_22: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI0_23: - .dword 0x4076800000000000 # double 360 -.LCPI0_24: - .dword 0x3f41df46a2529d39 # double 5.4541539124822798E-4 -.LCPI0_25: - .dword 0x401921fb97600b9b # double 6.2831863071795864 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -208 - st.d $ra, $sp, 200 # 8-byte Folded Spill - st.d $fp, $sp, 192 # 8-byte Folded Spill - st.d $s0, $sp, 184 # 8-byte Folded Spill - st.d $s1, $sp, 176 # 8-byte Folded Spill - st.d $s2, $sp, 168 # 8-byte Folded Spill - st.d $s3, $sp, 160 # 8-byte Folded Spill - st.d $s4, $sp, 152 # 8-byte Folded Spill - fst.d $fs0, $sp, 144 # 8-byte Folded Spill - fst.d $fs1, $sp, 136 # 8-byte Folded Spill - fst.d $fs2, $sp, 128 # 8-byte Folded Spill - fst.d $fs3, $sp, 120 # 8-byte Folded Spill - fst.d $fs4, $sp, 112 # 8-byte Folded Spill - fst.d $fs5, $sp, 104 # 8-byte Folded Spill + addi.d $sp, $sp, -240 + st.d $ra, $sp, 232 # 8-byte Folded Spill + st.d $fp, $sp, 224 # 8-byte Folded Spill + st.d $s0, $sp, 216 # 8-byte Folded Spill + st.d $s1, $sp, 208 # 8-byte Folded Spill + st.d $s2, $sp, 200 # 8-byte Folded Spill + st.d $s3, $sp, 192 # 8-byte Folded Spill + st.d $s4, $sp, 184 # 8-byte Folded Spill + st.d $s5, $sp, 176 # 8-byte Folded Spill + fst.d $fs0, $sp, 168 # 8-byte Folded Spill + fst.d $fs1, $sp, 160 # 8-byte Folded Spill + fst.d $fs2, $sp, 152 # 8-byte Folded Spill + fst.d $fs3, $sp, 144 # 8-byte Folded Spill + fst.d $fs4, $sp, 136 # 8-byte Folded Spill + fst.d $fs5, $sp, 128 # 8-byte Folded Spill + fst.d $fs6, $sp, 120 # 8-byte Folded Spill + fst.d $fs7, $sp, 112 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.Lstr) addi.d $a0, $a0, %pc_lo12(.Lstr) pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_0) + lu52i.d $a0, $zero, 1028 + movgr2fr.d $fa2, $a0 vldi $vr0, -912 vldi $vr1, -859 vldi $vr3, -834 - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 - addi.d $s0, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 + addi.d $s0, $sp, 88 pcaddu18i $ra, %call36(SolveCubic) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a0, $a0, %pc_lo12(.L.str.1) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_3 # %bb.1: # %.lr.ph.preheader pcalau12i $a0, %pc_hi20(.L.str.2) @@ -104,7 +53,7 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 addi.d $s1, $s1, 1 addi.d $s0, $s0, 8 blt $s1, $a0, .LBB0_2 @@ -116,16 +65,16 @@ main: # @main vldi $vr1, -878 vldi $vr2, -975 vldi $vr3, -834 - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 - addi.d $s0, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 + addi.d $s0, $sp, 88 pcaddu18i $ra, %call36(SolveCubic) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a0, $a0, %pc_lo12(.L.str.1) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_6 # %bb.4: # %.lr.ph110.preheader pcalau12i $a0, %pc_hi20(.L.str.2) @@ -138,7 +87,7 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 addi.d $s1, $s1, 1 addi.d $s0, $s0, 8 blt $s1, $a0, .LBB0_5 @@ -150,16 +99,16 @@ main: # @main vldi $vr1, -884 vldi $vr2, -970 vldi $vr3, -833 - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 - addi.d $s0, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 + addi.d $s0, $sp, 88 pcaddu18i $ra, %call36(SolveCubic) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a0, $a0, %pc_lo12(.L.str.1) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_9 # %bb.7: # %.lr.ph114.preheader pcalau12i $a0, %pc_hi20(.L.str.2) @@ -172,7 +121,7 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 addi.d $s1, $s1, 1 addi.d $s0, $s0, 8 blt $s1, $a0, .LBB0_8 @@ -180,22 +129,27 @@ main: # @main ori $a0, $zero, 10 pcaddu18i $ra, %call36(putchar) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_1) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_2) + ori $a0, $zero, 0 + lu32i.d $a0, 98304 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa3, $a0 + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, -301466 + lu52i.d $a0, $a0, -1022 + movgr2fr.d $fa1, $a0 vldi $vr0, -912 vldi $vr2, -912 - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 - addi.d $s0, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 + addi.d $s0, $sp, 88 pcaddu18i $ra, %call36(SolveCubic) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a0, $a0, %pc_lo12(.L.str.1) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_12 # %bb.10: # %.lr.ph118.preheader pcalau12i $a0, %pc_hi20(.L.str.2) @@ -208,7 +162,7 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 addi.d $s1, $s1, 1 addi.d $s0, $s0, 8 blt $s1, $a0, .LBB0_11 @@ -216,21 +170,24 @@ main: # @main ori $a0, $zero, 10 pcaddu18i $ra, %call36(putchar) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_3) + lu12i.w $a0, 503316 + ori $a0, $a0, 1966 + lu32i.d $a0, -479724 + lu52i.d $a0, $a0, 1026 + movgr2fr.d $fa1, $a0 vldi $vr0, -1016 vldi $vr2, -1004 vldi $vr3, -984 - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 - addi.d $s0, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 + addi.d $s0, $sp, 88 pcaddu18i $ra, %call36(SolveCubic) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a0, $a0, %pc_lo12(.L.str.1) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_15 # %bb.13: # %.lr.ph122.preheader pcalau12i $a0, %pc_hi20(.L.str.2) @@ -243,7 +200,7 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 addi.d $s1, $s1, 1 addi.d $s0, $s0, 8 blt $s1, $a0, .LBB0_14 @@ -251,22 +208,28 @@ main: # @main ori $a0, $zero, 10 pcaddu18i $ra, %call36(putchar) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_4) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_4) - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_5) + lu12i.w $a0, -251659 + ori $a0, $a0, 3113 + lu32i.d $a0, 63733 + lu52i.d $a0, $a0, -1019 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, 498073 + lu52i.d $a0, $a0, -1021 + movgr2fr.d $fa3, $a0 vldi $vr0, -864 vldi $vr2, -1000 - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 - addi.d $s0, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 + addi.d $s0, $sp, 88 pcaddu18i $ra, %call36(SolveCubic) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a0, $a0, %pc_lo12(.L.str.1) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_18 # %bb.16: # %.lr.ph126.preheader pcalau12i $a0, %pc_hi20(.L.str.2) @@ -279,7 +242,7 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 addi.d $s1, $s1, 1 addi.d $s0, $s0, 8 blt $s1, $a0, .LBB0_17 @@ -287,23 +250,30 @@ main: # @main ori $a0, $zero, 10 pcaddu18i $ra, %call36(putchar) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_6) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_6) - pcalau12i $a0, %pc_hi20(.LCPI0_7) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_7) - pcalau12i $a0, %pc_hi20(.LCPI0_8) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_8) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 425984 + lu52i.d $a1, $a1, 1028 + movgr2fr.d $fa0, $a1 + lu32i.d $a0, 65536 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fa3, $a0 + lu12i.w $a0, 251658 + ori $a0, $a0, 983 + lu32i.d $a0, 87818 + lu52i.d $a0, $a0, 1026 + movgr2fr.d $fa1, $a0 vldi $vr2, -994 - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 - addi.d $s0, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 + addi.d $s0, $sp, 88 pcaddu18i $ra, %call36(SolveCubic) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a0, $a0, %pc_lo12(.L.str.1) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_21 # %bb.19: # %.lr.ph130.preheader pcalau12i $a0, %pc_hi20(.L.str.2) @@ -316,7 +286,7 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 addi.d $s1, $s1, 1 addi.d $s0, $s0, 8 blt $s1, $a0, .LBB0_20 @@ -325,22 +295,27 @@ main: # @main ori $s1, $zero, 10 pcaddu18i $ra, %call36(putchar) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_9) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_9) - pcalau12i $a0, %pc_hi20(.LCPI0_10) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_10) + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + move $a1, $a0 + lu32i.d $a1, -314573 + lu52i.d $a1, $a1, -1025 + movgr2fr.d $fa1, $a1 + lu32i.d $a0, 340787 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fa2, $a0 vldi $vr0, -856 vldi $vr3, -976 - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 - addi.d $s0, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 + addi.d $s0, $sp, 88 pcaddu18i $ra, %call36(SolveCubic) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a0, $a0, %pc_lo12(.L.str.1) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_24 # %bb.22: # %.lr.ph134.preheader pcalau12i $a0, %pc_hi20(.L.str.2) @@ -353,7 +328,7 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 addi.d $s2, $s2, 1 addi.d $s0, $s0, 8 blt $s2, $a0, .LBB0_23 @@ -366,17 +341,47 @@ main: # @main addi.d $fp, $a0, %pc_lo12(.L.str.1) pcalau12i $a0, %pc_hi20(.L.str.2) addi.d $s0, $a0, %pc_lo12(.L.str.2) - pcalau12i $a0, %pc_hi20(.LCPI0_11) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_11) - pcalau12i $a0, %pc_hi20(.LCPI0_12) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_12) - pcalau12i $a0, %pc_hi20(.LCPI0_13) - fld.d $fs2, $a0, %pc_lo12(.LCPI0_13) - pcalau12i $a0, %pc_hi20(.LCPI0_14) - fld.d $fs3, $a0, %pc_lo12(.LCPI0_14) - pcalau12i $a0, %pc_hi20(.LCPI0_15) - fld.d $fs4, $a0, %pc_lo12(.LCPI0_15) - movgr2fr.d $fs5, $zero + lu12i.w $a0, -234882 + ori $a0, $a0, 3998 + lu32i.d $a0, 472907 + lu52i.d $a0, $a0, -1025 + lu12i.w $s3, -469763 + ori $a1, $s3, 3900 + movgr2fr.d $fs0, $a0 + move $a0, $a1 + lu32i.d $a0, -102761 + lu52i.d $a0, $a0, -1025 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -352322 + ori $a0, $a0, 1901 + lu32i.d $a0, 185073 + lu52i.d $a0, $a0, -1024 + movgr2fr.d $fs2, $a0 + lu32i.d $a1, 421527 + lu52i.d $a0, $a1, -1024 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, 461373 + ori $a0, $a0, 1803 + lu32i.d $a0, -390595 + lu52i.d $a0, $a0, -1024 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, 343932 + ori $a0, $a0, 3802 + lu32i.d $a0, -154141 + lu52i.d $a0, $a0, -1024 + movgr2fr.d $fs5, $a0 + lu12i.w $a0, -411042 + ori $a0, $a0, 852 + lu32i.d $a0, 41156 + lu52i.d $a0, $a0, -1023 + movgr2fr.d $fs6, $a0 + lu12i.w $a0, -293602 + ori $a0, $a0, 2949 + lu32i.d $a0, 230686 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs7, $a0 + movgr2fr.d $fa0, $zero + fst.d $fa0, $sp, 8 # 8-byte Folded Spill vldi $vr0, -912 vst $vr0, $sp, 16 # 16-byte Folded Spill b .LBB0_26 @@ -409,8 +414,9 @@ main: # @main vldi $vr0, -816 vld $vr1, $sp, 32 # 16-byte Folded Reload fadd.d $fa1, $fa1, $fa0 + fld.d $fa0, $sp, 8 # 8-byte Folded Reload vst $vr1, $sp, 32 # 16-byte Folded Spill - fcmp.clt.d $fcc0, $fs5, $fa1 + fcmp.clt.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB0_25 .LBB0_28: # %.preheader105 # Parent Loop BB0_26 Depth=1 @@ -433,10 +439,8 @@ main: # @main ori $a0, $zero, 10 pcaddu18i $ra, %call36(putchar) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_19) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_19) vld $vr2, $sp, 48 # 16-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 + fadd.d $fa2, $fa2, $fs7 vldi $vr0, -978 fcmp.clt.d $fcc0, $fa2, $fa0 bceqz $fcc0, .LBB0_27 @@ -454,8 +458,8 @@ main: # @main # Child Loop BB0_53 Depth 4 # Child Loop BB0_56 Depth 4 vldi $vr3, -784 - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 vld $vr0, $sp, 16 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 vld $vr1, $sp, 32 # 16-byte Folded Reload @@ -467,33 +471,33 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_33 # %bb.31: # %.lr.ph138.preheader # in Loop: Header=BB0_30 Depth=3 - move $s3, $zero - addi.d $s4, $sp, 80 + move $s4, $zero + addi.d $s5, $sp, 88 .p2align 4, , 16 .LBB0_32: # %.lr.ph138 # Parent Loop BB0_26 Depth=1 # Parent Loop BB0_28 Depth=2 # Parent Loop BB0_30 Depth=3 # => This Inner Loop Header: Depth=4 - ld.d $a1, $s4, 0 + ld.d $a1, $s5, 0 move $a0, $s0 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 - addi.d $s3, $s3, 1 - addi.d $s4, $s4, 8 - blt $s3, $a0, .LBB0_32 + ld.w $a0, $sp, 84 + addi.d $s4, $s4, 1 + addi.d $s5, $s5, 8 + blt $s4, $a0, .LBB0_32 .LBB0_33: # %._crit_edge139 # in Loop: Header=BB0_30 Depth=3 ori $a0, $zero, 10 pcaddu18i $ra, %call36(putchar) jirl $ra, $ra, 0 - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 vld $vr0, $sp, 16 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 vld $vr1, $sp, 32 # 16-byte Folded Reload @@ -506,33 +510,33 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_36 # %bb.34: # %.lr.ph138.1.preheader # in Loop: Header=BB0_30 Depth=3 - move $s3, $zero - addi.d $s4, $sp, 80 + move $s4, $zero + addi.d $s5, $sp, 88 .p2align 4, , 16 .LBB0_35: # %.lr.ph138.1 # Parent Loop BB0_26 Depth=1 # Parent Loop BB0_28 Depth=2 # Parent Loop BB0_30 Depth=3 # => This Inner Loop Header: Depth=4 - ld.d $a1, $s4, 0 + ld.d $a1, $s5, 0 move $a0, $s0 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 - addi.d $s3, $s3, 1 - addi.d $s4, $s4, 8 - blt $s3, $a0, .LBB0_35 + ld.w $a0, $sp, 84 + addi.d $s4, $s4, 1 + addi.d $s5, $s5, 8 + blt $s4, $a0, .LBB0_35 .LBB0_36: # %._crit_edge139.1 # in Loop: Header=BB0_30 Depth=3 ori $a0, $zero, 10 pcaddu18i $ra, %call36(putchar) jirl $ra, $ra, 0 - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 vld $vr0, $sp, 16 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 vld $vr1, $sp, 32 # 16-byte Folded Reload @@ -545,33 +549,33 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_39 # %bb.37: # %.lr.ph138.2.preheader # in Loop: Header=BB0_30 Depth=3 - move $s3, $zero - addi.d $s4, $sp, 80 + move $s4, $zero + addi.d $s5, $sp, 88 .p2align 4, , 16 .LBB0_38: # %.lr.ph138.2 # Parent Loop BB0_26 Depth=1 # Parent Loop BB0_28 Depth=2 # Parent Loop BB0_30 Depth=3 # => This Inner Loop Header: Depth=4 - ld.d $a1, $s4, 0 + ld.d $a1, $s5, 0 move $a0, $s0 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 - addi.d $s3, $s3, 1 - addi.d $s4, $s4, 8 - blt $s3, $a0, .LBB0_38 + ld.w $a0, $sp, 84 + addi.d $s4, $s4, 1 + addi.d $s5, $s5, 8 + blt $s4, $a0, .LBB0_38 .LBB0_39: # %._crit_edge139.2 # in Loop: Header=BB0_30 Depth=3 ori $a0, $zero, 10 pcaddu18i $ra, %call36(putchar) jirl $ra, $ra, 0 - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 vld $vr0, $sp, 16 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 vld $vr1, $sp, 32 # 16-byte Folded Reload @@ -584,33 +588,33 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_42 # %bb.40: # %.lr.ph138.3.preheader # in Loop: Header=BB0_30 Depth=3 - move $s3, $zero - addi.d $s4, $sp, 80 + move $s4, $zero + addi.d $s5, $sp, 88 .p2align 4, , 16 .LBB0_41: # %.lr.ph138.3 # Parent Loop BB0_26 Depth=1 # Parent Loop BB0_28 Depth=2 # Parent Loop BB0_30 Depth=3 # => This Inner Loop Header: Depth=4 - ld.d $a1, $s4, 0 + ld.d $a1, $s5, 0 move $a0, $s0 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 - addi.d $s3, $s3, 1 - addi.d $s4, $s4, 8 - blt $s3, $a0, .LBB0_41 + ld.w $a0, $sp, 84 + addi.d $s4, $s4, 1 + addi.d $s5, $s5, 8 + blt $s4, $a0, .LBB0_41 .LBB0_42: # %._crit_edge139.3 # in Loop: Header=BB0_30 Depth=3 ori $a0, $zero, 10 pcaddu18i $ra, %call36(putchar) jirl $ra, $ra, 0 - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 vld $vr0, $sp, 16 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 vld $vr1, $sp, 32 # 16-byte Folded Reload @@ -623,33 +627,33 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_45 # %bb.43: # %.lr.ph138.4.preheader # in Loop: Header=BB0_30 Depth=3 - move $s3, $zero - addi.d $s4, $sp, 80 + move $s4, $zero + addi.d $s5, $sp, 88 .p2align 4, , 16 .LBB0_44: # %.lr.ph138.4 # Parent Loop BB0_26 Depth=1 # Parent Loop BB0_28 Depth=2 # Parent Loop BB0_30 Depth=3 # => This Inner Loop Header: Depth=4 - ld.d $a1, $s4, 0 + ld.d $a1, $s5, 0 move $a0, $s0 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 - addi.d $s3, $s3, 1 - addi.d $s4, $s4, 8 - blt $s3, $a0, .LBB0_44 + ld.w $a0, $sp, 84 + addi.d $s4, $s4, 1 + addi.d $s5, $s5, 8 + blt $s4, $a0, .LBB0_44 .LBB0_45: # %._crit_edge139.4 # in Loop: Header=BB0_30 Depth=3 ori $a0, $zero, 10 pcaddu18i $ra, %call36(putchar) jirl $ra, $ra, 0 - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 vld $vr0, $sp, 16 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 vld $vr1, $sp, 32 # 16-byte Folded Reload @@ -662,119 +666,119 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_48 # %bb.46: # %.lr.ph138.5.preheader # in Loop: Header=BB0_30 Depth=3 - move $s3, $zero - addi.d $s4, $sp, 80 + move $s4, $zero + addi.d $s5, $sp, 88 .p2align 4, , 16 .LBB0_47: # %.lr.ph138.5 # Parent Loop BB0_26 Depth=1 # Parent Loop BB0_28 Depth=2 # Parent Loop BB0_30 Depth=3 # => This Inner Loop Header: Depth=4 - ld.d $a1, $s4, 0 + ld.d $a1, $s5, 0 move $a0, $s0 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 - addi.d $s3, $s3, 1 - addi.d $s4, $s4, 8 - blt $s3, $a0, .LBB0_47 + ld.w $a0, $sp, 84 + addi.d $s4, $s4, 1 + addi.d $s5, $s5, 8 + blt $s4, $a0, .LBB0_47 .LBB0_48: # %._crit_edge139.5 # in Loop: Header=BB0_30 Depth=3 ori $a0, $zero, 10 pcaddu18i $ra, %call36(putchar) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_16) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_16) - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 vld $vr0, $sp, 16 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 vld $vr1, $sp, 32 # 16-byte Folded Reload # kill: def $f1_64 killed $f1_64 killed $vr1 vld $vr2, $sp, 48 # 16-byte Folded Reload # kill: def $f2_64 killed $f2_64 killed $vr2 + fmov.d $fa3, $fs5 pcaddu18i $ra, %call36(SolveCubic) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a0, $a0, %pc_lo12(.L.str.1) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_51 # %bb.49: # %.lr.ph138.6.preheader # in Loop: Header=BB0_30 Depth=3 - move $s3, $zero - addi.d $s4, $sp, 80 + move $s4, $zero + addi.d $s5, $sp, 88 .p2align 4, , 16 .LBB0_50: # %.lr.ph138.6 # Parent Loop BB0_26 Depth=1 # Parent Loop BB0_28 Depth=2 # Parent Loop BB0_30 Depth=3 # => This Inner Loop Header: Depth=4 - ld.d $a1, $s4, 0 + ld.d $a1, $s5, 0 pcalau12i $a0, %pc_hi20(.L.str.2) addi.d $a0, $a0, %pc_lo12(.L.str.2) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 - addi.d $s3, $s3, 1 - addi.d $s4, $s4, 8 - blt $s3, $a0, .LBB0_50 + ld.w $a0, $sp, 84 + addi.d $s4, $s4, 1 + addi.d $s5, $s5, 8 + blt $s4, $a0, .LBB0_50 .LBB0_51: # %._crit_edge139.6 # in Loop: Header=BB0_30 Depth=3 ori $a0, $zero, 10 pcaddu18i $ra, %call36(putchar) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_17) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_17) - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 vld $vr0, $sp, 16 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 vld $vr1, $sp, 32 # 16-byte Folded Reload # kill: def $f1_64 killed $f1_64 killed $vr1 vld $vr2, $sp, 48 # 16-byte Folded Reload # kill: def $f2_64 killed $f2_64 killed $vr2 + fmov.d $fa3, $fs6 pcaddu18i $ra, %call36(SolveCubic) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a0, $a0, %pc_lo12(.L.str.1) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_54 # %bb.52: # %.lr.ph138.7.preheader # in Loop: Header=BB0_30 Depth=3 - move $s3, $zero - addi.d $s4, $sp, 80 + move $s4, $zero + addi.d $s5, $sp, 88 .p2align 4, , 16 .LBB0_53: # %.lr.ph138.7 # Parent Loop BB0_26 Depth=1 # Parent Loop BB0_28 Depth=2 # Parent Loop BB0_30 Depth=3 # => This Inner Loop Header: Depth=4 - ld.d $a1, $s4, 0 + ld.d $a1, $s5, 0 pcalau12i $a0, %pc_hi20(.L.str.2) addi.d $a0, $a0, %pc_lo12(.L.str.2) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 - addi.d $s3, $s3, 1 - addi.d $s4, $s4, 8 - blt $s3, $a0, .LBB0_53 + ld.w $a0, $sp, 84 + addi.d $s4, $s4, 1 + addi.d $s5, $s5, 8 + blt $s4, $a0, .LBB0_53 .LBB0_54: # %._crit_edge139.7 # in Loop: Header=BB0_30 Depth=3 ori $a0, $zero, 10 pcaddu18i $ra, %call36(putchar) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_18) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_18) - addi.d $a0, $sp, 76 - addi.d $a1, $sp, 80 + ori $a0, $s3, 3899 + lu32i.d $a0, 159383 + lu52i.d $a0, $a0, -1023 + movgr2fr.d $fa3, $a0 + addi.d $a0, $sp, 84 + addi.d $a1, $sp, 88 vld $vr0, $sp, 16 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 vld $vr1, $sp, 32 # 16-byte Folded Reload @@ -787,27 +791,27 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.L.str.1) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 + ld.w $a0, $sp, 84 blez $a0, .LBB0_29 # %bb.55: # %.lr.ph138.8.preheader # in Loop: Header=BB0_30 Depth=3 - move $s3, $zero - addi.d $s4, $sp, 80 + move $s4, $zero + addi.d $s5, $sp, 88 .p2align 4, , 16 .LBB0_56: # %.lr.ph138.8 # Parent Loop BB0_26 Depth=1 # Parent Loop BB0_28 Depth=2 # Parent Loop BB0_30 Depth=3 # => This Inner Loop Header: Depth=4 - ld.d $a1, $s4, 0 + ld.d $a1, $s5, 0 pcalau12i $a0, %pc_hi20(.L.str.2) addi.d $a0, $a0, %pc_lo12(.L.str.2) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $sp, 76 - addi.d $s3, $s3, 1 - addi.d $s4, $s4, 8 - blt $s3, $a0, .LBB0_56 + ld.w $a0, $sp, 84 + addi.d $s4, $s4, 1 + addi.d $s5, $s5, 8 + blt $s4, $a0, .LBB0_56 b .LBB0_29 .LBB0_57: pcalau12i $a0, %pc_hi20(.Lstr.1) @@ -822,11 +826,11 @@ main: # @main .p2align 4, , 16 .LBB0_58: # =>This Inner Loop Header: Depth=1 addi.w $fp, $fp, 2 - addi.d $a1, $sp, 68 + addi.d $a1, $sp, 76 move $a0, $fp pcaddu18i $ra, %call36(usqrt) jirl $ra, $ra, 0 - ld.w $a2, $sp, 68 + ld.w $a2, $sp, 76 move $a0, $s0 move $a1, $fp pcaddu18i $ra, %call36(printf) @@ -841,13 +845,14 @@ main: # @main ori $fp, $a0, 361 pcalau12i $a0, %pc_hi20(.L.str.6) addi.d $s0, $a0, %pc_lo12(.L.str.6) + fld.d $fs4, $sp, 8 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_60: # =>This Inner Loop Header: Depth=1 - addi.d $a1, $sp, 68 + addi.d $a1, $sp, 76 move $a0, $fp pcaddu18i $ra, %call36(usqrt) jirl $ra, $ra, 0 - ld.w $a2, $sp, 68 + ld.w $a2, $sp, 76 move $a0, $s0 move $a1, $fp pcaddu18i $ra, %call36(printf) @@ -860,67 +865,88 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.Lstr.2) pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_20) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_20) - pcalau12i $a0, %pc_hi20(.LCPI0_21) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_21) - pcalau12i $a0, %pc_hi20(.L.str.8) - addi.d $fp, $a0, %pc_lo12(.L.str.8) - pcalau12i $a0, %pc_hi20(.LCPI0_22) - fld.d $fs2, $a0, %pc_lo12(.LCPI0_22) - pcalau12i $a0, %pc_hi20(.LCPI0_23) - fld.d $fs3, $a0, %pc_lo12(.LCPI0_23) + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fs0, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, 425984 + lu52i.d $a1, $a0, 1030 + movgr2fr.d $fs1, $a1 + pcalau12i $a1, %pc_hi20(.L.str.8) + addi.d $fp, $a1, %pc_lo12(.L.str.8) + lu12i.w $a1, -184550 + ori $a1, $a1, 2556 + lu32i.d $a1, 25165 + lu52i.d $a1, $a1, 1013 + movgr2fr.d $fs2, $a1 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fs3, $a0 .p2align 4, , 16 .LBB0_62: # =>This Inner Loop Header: Depth=1 - fmul.d $fa0, $fs5, $fs0 + fmul.d $fa0, $fs4, $fs0 fdiv.d $fa0, $fa0, $fs1 movfr2gr.d $a2, $fa0 - movfr2gr.d $a1, $fs5 + movfr2gr.d $a1, $fs4 move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - fadd.d $fs5, $fs5, $fs2 - fcmp.cle.d $fcc0, $fs5, $fs3 + fadd.d $fs4, $fs4, $fs2 + fcmp.cle.d $fcc0, $fs4, $fs3 bcnez $fcc0, .LBB0_62 # %bb.63: ori $a0, $zero, 10 pcaddu18i $ra, %call36(putchar) jirl $ra, $ra, 0 - movgr2fr.d $fs2, $zero + movgr2fr.d $fs1, $zero + ori $a0, $zero, 0 + lu32i.d $a0, 425984 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.10) addi.d $fp, $a0, %pc_lo12(.L.str.10) - pcalau12i $a0, %pc_hi20(.LCPI0_24) - fld.d $fs3, $a0, %pc_lo12(.LCPI0_24) - pcalau12i $a0, %pc_hi20(.LCPI0_25) - fld.d $fs4, $a0, %pc_lo12(.LCPI0_25) + lu12i.w $a0, -383703 + ori $a0, $a0, 3385 + lu32i.d $a0, 122694 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -428544 + ori $a0, $a0, 2971 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fs4, $a0 .p2align 4, , 16 .LBB0_64: # =>This Inner Loop Header: Depth=1 - fmul.d $fa0, $fs2, $fs1 + fmul.d $fa0, $fs1, $fs2 fdiv.d $fa0, $fa0, $fs0 movfr2gr.d $a2, $fa0 - movfr2gr.d $a1, $fs2 + movfr2gr.d $a1, $fs1 move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - fadd.d $fs2, $fs2, $fs3 - fcmp.cle.d $fcc0, $fs2, $fs4 + fadd.d $fs1, $fs1, $fs3 + fcmp.cle.d $fcc0, $fs1, $fs4 bcnez $fcc0, .LBB0_64 # %bb.65: move $a0, $zero - fld.d $fs5, $sp, 104 # 8-byte Folded Reload - fld.d $fs4, $sp, 112 # 8-byte Folded Reload - fld.d $fs3, $sp, 120 # 8-byte Folded Reload - fld.d $fs2, $sp, 128 # 8-byte Folded Reload - fld.d $fs1, $sp, 136 # 8-byte Folded Reload - fld.d $fs0, $sp, 144 # 8-byte Folded Reload - ld.d $s4, $sp, 152 # 8-byte Folded Reload - ld.d $s3, $sp, 160 # 8-byte Folded Reload - ld.d $s2, $sp, 168 # 8-byte Folded Reload - ld.d $s1, $sp, 176 # 8-byte Folded Reload - ld.d $s0, $sp, 184 # 8-byte Folded Reload - ld.d $fp, $sp, 192 # 8-byte Folded Reload - ld.d $ra, $sp, 200 # 8-byte Folded Reload - addi.d $sp, $sp, 208 + fld.d $fs7, $sp, 112 # 8-byte Folded Reload + fld.d $fs6, $sp, 120 # 8-byte Folded Reload + fld.d $fs5, $sp, 128 # 8-byte Folded Reload + fld.d $fs4, $sp, 136 # 8-byte Folded Reload + fld.d $fs3, $sp, 144 # 8-byte Folded Reload + fld.d $fs2, $sp, 152 # 8-byte Folded Reload + fld.d $fs1, $sp, 160 # 8-byte Folded Reload + fld.d $fs0, $sp, 168 # 8-byte Folded Reload + ld.d $s5, $sp, 176 # 8-byte Folded Reload + ld.d $s4, $sp, 184 # 8-byte Folded Reload + ld.d $s3, $sp, 192 # 8-byte Folded Reload + ld.d $s2, $sp, 200 # 8-byte Folded Reload + ld.d $s1, $sp, 208 # 8-byte Folded Reload + ld.d $s0, $sp, 216 # 8-byte Folded Reload + ld.d $fp, $sp, 224 # 8-byte Folded Reload + ld.d $ra, $sp, 232 # 8-byte Folded Reload + addi.d $sp, $sp, 240 ret .Lfunc_end0: .size main, .Lfunc_end0-main diff --git a/results/MultiSource/Benchmarks/MiBench/automotive-basicmath/CMakeFiles/automotive-basicmath.dir/cubic.s b/results/MultiSource/Benchmarks/MiBench/automotive-basicmath/CMakeFiles/automotive-basicmath.dir/cubic.s index 7b399025..ec7fa3fb 100644 --- a/results/MultiSource/Benchmarks/MiBench/automotive-basicmath/CMakeFiles/automotive-basicmath.dir/cubic.s +++ b/results/MultiSource/Benchmarks/MiBench/automotive-basicmath/CMakeFiles/automotive-basicmath.dir/cubic.s @@ -1,14 +1,6 @@ .file "cubic.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function SolveCubic -.LCPI0_0: - .dword 0x401921fb54442d18 # double 6.2831853071795862 -.LCPI0_1: - .dword 0x402921fb54442d18 # double 12.566370614359172 -.LCPI0_2: - .dword 0x3fd5555555555555 # double 0.33333333333333331 .text - .globl SolveCubic + .globl SolveCubic # -- Begin function SolveCubic .p2align 5 .type SolveCubic,@function SolveCubic: # @SolveCubic @@ -200,8 +192,11 @@ SolveCubic: # @SolveCubic jirl $ra, $ra, 0 fabs.d $fa0, $fa0 fadd.d $fa0, $fa0, $fs0 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_2) + lu12i.w $a0, 349525 + ori $a0, $a0, 1365 + lu32i.d $a0, 349525 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa1, $a0 pcaddu18i $ra, %call36(pow) jirl $ra, $ra, 0 pcaddu18i $ra, %call36(__extenddftf2) @@ -231,7 +226,7 @@ SolveCubic: # @SolveCubic fneg.d $fa0, $fs0 movgr2cf $fcc0, $a0 fsel $fs2, $fa0, $fs0, $fcc0 - ld.d $fp, $sp, 48 # 8-byte Folded Reload + ld.d $s2, $sp, 48 # 8-byte Folded Reload b .LBB0_8 .LBB0_3: ori $a0, $zero, 3 @@ -267,7 +262,7 @@ SolveCubic: # @SolveCubic fsqrt.d $fs1, $fa0 fcmp.cor.d $fcc0, $fs1, $fs1 fmov.d $fs4, $fs1 - ld.d $fp, $sp, 48 # 8-byte Folded Reload + ld.d $s2, $sp, 48 # 8-byte Folded Reload movcf2gr $a0, $fcc0 st.d $a0, $sp, 40 bceqz $fcc0, .LBB0_11 @@ -282,12 +277,15 @@ SolveCubic: # @SolveCubic movgr2cf $fcc0, $a0 bceqz $fcc0, .LBB0_12 .LBB0_6: # %.split.split.split - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) - vldi $vr2, -896 - fmul.d $fs4, $fs4, $fa2 - fmul.d $fs5, $fa0, $fa2 - fadd.d $fa0, $fs0, $fa1 + vldi $vr1, -896 + fmul.d $fs4, $fs4, $fa1 + fmul.d $fs5, $fa0, $fa1 + lu12i.w $a0, 345154 + ori $fp, $a0, 3352 + lu32i.d $fp, -450053 + lu52i.d $a0, $fp, 1025 + movgr2fr.d $fa0, $a0 + fadd.d $fa0, $fs0, $fa0 vldi $vr1, -1016 fdiv.d $fa0, $fa0, $fa1 pcaddu18i $ra, %call36(cos) @@ -301,16 +299,16 @@ SolveCubic: # @SolveCubic jirl $ra, $ra, 0 pcaddu18i $ra, %call36(__trunctfdf2) jirl $ra, $ra, 0 - fst.d $fa0, $fp, 8 + fst.d $fa0, $s2, 8 ld.d $a0, $sp, 40 movgr2cf $fcc0, $a0 bceqz $fcc0, .LBB0_13 .LBB0_7: # %.split.split.split.split - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_1) fmul.d $fs2, $fs4, $fs3 - vldi $vr1, -896 - fmul.d $fs1, $fs1, $fa1 + vldi $vr0, -896 + fmul.d $fs1, $fs1, $fa0 + lu52i.d $a0, $fp, 1026 + movgr2fr.d $fa0, $a0 fadd.d $fa0, $fs0, $fa0 vldi $vr1, -1016 fdiv.d $fa0, $fa0, $fa1 @@ -325,7 +323,7 @@ SolveCubic: # @SolveCubic jirl $ra, $ra, 0 pcaddu18i $ra, %call36(__trunctfdf2) jirl $ra, $ra, 0 - fst.d $fa0, $fp, 16 + fst.d $fa0, $s2, 16 .LBB0_8: fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(__extenddftf2) @@ -336,7 +334,7 @@ SolveCubic: # @SolveCubic jirl $ra, $ra, 0 pcaddu18i $ra, %call36(__trunctfdf2) jirl $ra, $ra, 0 - fst.d $fa0, $fp, 0 + fst.d $fa0, $s2, 0 fld.d $fs5, $sp, 56 # 8-byte Folded Reload fld.d $fs4, $sp, 64 # 8-byte Folded Reload fld.d $fs3, $sp, 72 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/MiBench/automotive-basicmath/CMakeFiles/automotive-basicmath.dir/rad2deg.s b/results/MultiSource/Benchmarks/MiBench/automotive-basicmath/CMakeFiles/automotive-basicmath.dir/rad2deg.s index 6b519897..608aad87 100644 --- a/results/MultiSource/Benchmarks/MiBench/automotive-basicmath/CMakeFiles/automotive-basicmath.dir/rad2deg.s +++ b/results/MultiSource/Benchmarks/MiBench/automotive-basicmath/CMakeFiles/automotive-basicmath.dir/rad2deg.s @@ -1,44 +1,41 @@ .file "rad2deg.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function rad2deg -.LCPI0_0: - .dword 0x4066800000000000 # double 180 -.LCPI0_1: - .dword 0x400921fb54442d18 # double 3.1415926535897931 .text - .globl rad2deg + .globl rad2deg # -- Begin function rad2deg .p2align 5 .type rad2deg,@function rad2deg: # @rad2deg # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) + ori $a0, $zero, 0 + lu32i.d $a0, 425984 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fa0, $fa0, $fa2 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 ret .Lfunc_end0: .size rad2deg, .Lfunc_end0-rad2deg # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function deg2rad -.LCPI1_0: - .dword 0x400921fb54442d18 # double 3.1415926535897931 -.LCPI1_1: - .dword 0x4066800000000000 # double 180 - .text - .globl deg2rad + .globl deg2rad # -- Begin function deg2rad .p2align 5 .type deg2rad,@function deg2rad: # @deg2rad # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_0) - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI1_1) + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fa0, $fa0, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, 425984 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 ret .Lfunc_end1: .size deg2rad, .Lfunc_end1-deg2rad diff --git a/results/MultiSource/Benchmarks/MiBench/automotive-susan/CMakeFiles/automotive-susan.dir/susan.s b/results/MultiSource/Benchmarks/MiBench/automotive-susan/CMakeFiles/automotive-susan.dir/susan.s index f8ca03e8..97810d79 100644 --- a/results/MultiSource/Benchmarks/MiBench/automotive-susan/CMakeFiles/automotive-susan.dir/susan.s +++ b/results/MultiSource/Benchmarks/MiBench/automotive-susan/CMakeFiles/automotive-susan.dir/susan.s @@ -514,12 +514,7 @@ int_to_uchar: # @int_to_uchar .Lfunc_end4: .size int_to_uchar, .Lfunc_end4-int_to_uchar # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function setup_brightness_lut -.LCPI5_0: - .dword 0x4059000000000000 # double 100 - .text - .globl setup_brightness_lut + .globl setup_brightness_lut # -- Begin function setup_brightness_lut .p2align 5 .type setup_brightness_lut,@function setup_brightness_lut: # @setup_brightness_lut @@ -545,9 +540,11 @@ setup_brightness_lut: # @setup_brightness_lut ffint.s.w $fs0, $fa0 bne $s0, $a0, .LBB5_3 # %bb.1: # %.split.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI5_0) addi.w $s0, $zero, -256 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs1, $a0 ori $s1, $zero, 257 move $s2, $s0 .p2align 4, , 16 @@ -574,9 +571,11 @@ setup_brightness_lut: # @setup_brightness_lut bne $s2, $s1, .LBB5_2 b .LBB5_5 .LBB5_3: # %.split.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI5_0) addi.w $s0, $zero, -256 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs1, $a0 ori $s1, $zero, 257 move $s2, $s0 .p2align 4, , 16 @@ -1345,14 +1344,7 @@ enlarge: # @enlarge .Lfunc_end9: .size enlarge, .Lfunc_end9-enlarge # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function susan_smoothing -.LCPI10_0: - .dword 0x402e000010000000 # double 15.000000476837158 -.LCPI10_1: - .dword 0x4059000000000000 # double 100 - .text - .globl susan_smoothing + .globl susan_smoothing # -- Begin function susan_smoothing .p2align 5 .type susan_smoothing,@function susan_smoothing: # @susan_smoothing @@ -1374,12 +1366,14 @@ susan_smoothing: # @susan_smoothing st.d $a4, $sp, 88 # 8-byte Folded Spill move $s2, $a0 fcvt.s.d $fs0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI10_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI10_0) st.d $a1, $sp, 96 # 8-byte Folded Spill st.d $a1, $sp, 112 st.w $a2, $sp, 108 st.w $a3, $sp, 104 + lu12i.w $a0, 65536 + lu32i.d $a0, -131072 + lu52i.d $a0, $a0, 1026 + movgr2fr.d $fa1, $a0 fcmp.cule.d $fcc0, $fa0, $fa1 fcvt.d.s $fa0, $fs0 bceqz $fcc0, .LBB10_42 @@ -1756,11 +1750,13 @@ susan_smoothing: # @susan_smoothing blt $fp, $s4, .LBB10_20 # %bb.16: # %.preheader259.preheader fneg.s $fa0, $fs0 - pcalau12i $a0, %pc_hi20(.LCPI10_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI10_1) fmul.s $fs0, $fs0, $fa0 addi.d $a0, $zero, -2 sub.w $s6, $a0, $s5 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs1, $a0 ld.d $a0, $sp, 48 # 8-byte Folded Reload move $a1, $s4 .p2align 4, , 16 @@ -2711,12 +2707,7 @@ edge_draw: # @edge_draw .Lfunc_end11: .size edge_draw, .Lfunc_end11-edge_draw # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function susan_thin -.LCPI12_0: - .dword 0x3fe6666666666666 # double 0.69999999999999996 - .text - .globl susan_thin + .globl susan_thin # -- Begin function susan_thin .p2align 5 .type susan_thin,@function susan_thin: # @susan_thin @@ -2744,6 +2735,11 @@ susan_thin: # @susan_thin ori $s5, $zero, 100 ori $t0, $zero, 2 ori $fp, $zero, 8 + lu12i.w $a5, 419430 + ori $a5, $a5, 1638 + lu32i.d $a5, 419430 + lu52i.d $a5, $a5, 1022 + movgr2fr.d $fa0, $a5 ori $t5, $zero, 5 ori $s8, $zero, 1 ori $t6, $zero, 4 @@ -3048,15 +3044,13 @@ susan_thin: # @susan_thin ldx.w $t1, $a0, $t1 slli.d $t2, $s7, 2 ldx.w $t2, $a0, $t2 - movgr2fr.w $fa0, $t1 - ffint.s.w $fa0, $fa0 - movgr2fr.w $fa1, $t2 - pcalau12i $t1, %pc_hi20(.LCPI12_0) - fld.d $fa2, $t1, %pc_lo12(.LCPI12_0) + movgr2fr.w $fa1, $t1 ffint.s.w $fa1, $fa1 - fdiv.s $fa0, $fa0, $fa1 - fcvt.d.s $fa0, $fa0 - fcmp.cule.d $fcc0, $fa0, $fa2 + movgr2fr.w $fa2, $t2 + ffint.s.w $fa2, $fa2 + fdiv.s $fa1, $fa1, $fa2 + fcvt.d.s $fa1, $fa1 + fcmp.cule.d $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB12_54 # %bb.30: # in Loop: Header=BB12_3 Depth=1 beqz $a7, .LBB12_34 @@ -3315,16 +3309,7 @@ susan_thin: # @susan_thin .Lfunc_end12: .size susan_thin, .Lfunc_end12-susan_thin # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function susan_edges -.LCPI13_0: - .dword 0x3feccccccccccccd # double 0.90000000000000002 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI13_1: - .word 0x49742400 # float 1.0E+6 - .text - .globl susan_edges + .globl susan_edges # -- Begin function susan_edges .p2align 5 .type susan_edges,@function susan_edges: # @susan_edges @@ -3346,19 +3331,19 @@ susan_edges: # @susan_edges st.d $a3, $sp, 296 # 8-byte Folded Spill move $fp, $a2 move $s3, $a0 - st.d $a5, $sp, 264 # 8-byte Folded Spill + st.d $a5, $sp, 256 # 8-byte Folded Spill mul.w $a0, $a6, $a5 slli.d $a2, $a0, 2 - st.d $a1, $sp, 240 # 8-byte Folded Spill + st.d $a1, $sp, 232 # 8-byte Folded Spill move $a0, $a1 move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a1, $zero, 7 - blt $s5, $a1, .LBB13_44 + blt $s5, $a1, .LBB13_42 # %bb.1: # %.preheader872.lr.ph st.d $fp, $sp, 288 # 8-byte Folded Spill - ld.d $a3, $sp, 264 # 8-byte Folded Reload + ld.d $a3, $sp, 256 # 8-byte Folded Reload addi.w $a0, $a3, -3 addi.w $a2, $a3, -5 addi.w $a4, $a3, -6 @@ -3366,9 +3351,9 @@ susan_edges: # @susan_edges # %bb.2: # %.preheader872.us.preheader addi.w $a1, $s5, -3 bstrpick.d $a7, $a0, 31, 0 - ld.d $a3, $sp, 240 # 8-byte Folded Reload + ld.d $a3, $sp, 232 # 8-byte Folded Reload addi.d $a3, $a3, 12 - ld.d $a5, $sp, 264 # 8-byte Folded Reload + ld.d $a5, $sp, 256 # 8-byte Folded Reload alsl.w $a5, $a5, $a5, 1 addi.d $a6, $s3, 3 addi.d $a7, $a7, -3 @@ -3396,7 +3381,7 @@ susan_edges: # @susan_edges .LBB13_3: # %._crit_edge.us # in Loop: Header=BB13_4 Depth=1 addi.d $t6, $t6, 1 - ld.d $t8, $sp, 264 # 8-byte Folded Reload + ld.d $t8, $sp, 256 # 8-byte Folded Reload add.w $a5, $a5, $t8 add.d $t7, $t7, $t8 add.d $t0, $t0, $t8 @@ -3585,19 +3570,19 @@ susan_edges: # @susan_edges .LBB13_8: # %.preheader871 ori $a1, $zero, 9 ld.d $a5, $sp, 288 # 8-byte Folded Reload - blt $s5, $a1, .LBB13_44 + blt $s5, $a1, .LBB13_42 # %bb.9: # %.preheader.lr.ph - ld.d $a3, $sp, 264 # 8-byte Folded Reload - blt $a3, $a1, .LBB13_44 + ld.d $a3, $sp, 256 # 8-byte Folded Reload + blt $a3, $a1, .LBB13_42 # %bb.10: # %.preheader.us.preheader addi.w $a1, $s5, -4 st.d $a1, $sp, 32 # 8-byte Folded Spill - ld.d $a6, $sp, 264 # 8-byte Folded Reload + ld.d $a6, $sp, 256 # 8-byte Folded Reload addi.d $a1, $a6, -4 bstrpick.d $a1, $a1, 31, 0 addi.d $a1, $a1, -4 st.d $a1, $sp, 200 # 8-byte Folded Spill - ld.d $a1, $sp, 240 # 8-byte Folded Reload + ld.d $a1, $sp, 232 # 8-byte Folded Reload addi.d $a1, $a1, 16 st.d $a1, $sp, 24 # 8-byte Folded Spill slli.d $a7, $a6, 2 @@ -3606,34 +3591,43 @@ susan_edges: # @susan_edges addi.d $a1, $a5, 4 st.d $a1, $sp, 8 # 8-byte Folded Spill add.d $a1, $s3, $a6 - st.d $a1, $sp, 104 # 8-byte Folded Spill + st.d $a1, $sp, 112 # 8-byte Folded Spill add.d $a1, $a0, $a6 add.d $a3, $s3, $a1 - st.d $a3, $sp, 96 # 8-byte Folded Spill + st.d $a3, $sp, 104 # 8-byte Folded Spill add.d $a3, $a1, $a2 add.d $a5, $a3, $a4 add.d $a5, $a5, $s3 addi.d $a5, $a5, 21 - st.d $a5, $sp, 272 # 8-byte Folded Spill + st.d $a5, $sp, 264 # 8-byte Folded Spill add.d $a5, $s3, $a3 - st.d $a5, $sp, 88 # 8-byte Folded Spill + st.d $a5, $sp, 96 # 8-byte Folded Spill alsl.d $a3, $a4, $a3, 1 add.d $a3, $a3, $s3 - addi.d $t1, $a3, 27 + addi.d $t6, $a3, 27 slli.d $a2, $a2, 1 alsl.d $a2, $a4, $a2, 1 alsl.d $a0, $a0, $a2, 1 add.d $a0, $a0, $a6 add.d $a0, $a0, $s3 - addi.d $t6, $a0, 32 + addi.d $t7, $a0, 32 add.d $a0, $a1, $a2 add.d $a0, $a0, $s3 - addi.d $t5, $a0, 27 + addi.d $a0, $a0, 27 + st.d $a0, $sp, 288 # 8-byte Folded Spill ori $a0, $zero, 4 - st.d $a0, $sp, 248 # 8-byte Folded Spill - movgr2fr.w $fa0, $zero - vldi $vr1, -1184 - vldi $vr2, -1280 + st.d $a0, $sp, 240 # 8-byte Folded Spill + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, -209716 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa0, $a0 + lu12i.w $a0, 300866 + ori $a0, $a0, 1024 + movgr2fr.w $fa1, $a0 + movgr2fr.w $fa2, $zero + vldi $vr3, -1184 + vldi $vr4, -1280 ori $a2, $zero, 4 b .LBB13_12 .p2align 4, , 16 @@ -3641,29 +3635,31 @@ susan_edges: # @susan_edges # in Loop: Header=BB13_12 Depth=1 ld.d $a2, $sp, 40 # 8-byte Folded Reload addi.d $a2, $a2, 1 - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload addi.w $a0, $a0, 1 - st.d $a0, $sp, 248 # 8-byte Folded Spill - ld.d $a0, $sp, 264 # 8-byte Folded Reload + st.d $a0, $sp, 240 # 8-byte Folded Spill + ld.d $a0, $sp, 256 # 8-byte Folded Reload ld.d $a7, $sp, 48 # 8-byte Folded Reload add.w $a7, $a7, $a0 + ld.d $a1, $sp, 112 # 8-byte Folded Reload + add.d $a1, $a1, $a0 + st.d $a1, $sp, 112 # 8-byte Folded Spill ld.d $a1, $sp, 104 # 8-byte Folded Reload add.d $a1, $a1, $a0 st.d $a1, $sp, 104 # 8-byte Folded Spill + ld.d $a1, $sp, 264 # 8-byte Folded Reload + add.d $a1, $a1, $a0 + st.d $a1, $sp, 264 # 8-byte Folded Spill ld.d $a1, $sp, 96 # 8-byte Folded Reload add.d $a1, $a1, $a0 st.d $a1, $sp, 96 # 8-byte Folded Spill - ld.d $a1, $sp, 272 # 8-byte Folded Reload - add.d $a1, $a1, $a0 - st.d $a1, $sp, 272 # 8-byte Folded Spill - ld.d $a1, $sp, 88 # 8-byte Folded Reload - add.d $a1, $a1, $a0 - st.d $a1, $sp, 88 # 8-byte Folded Spill - add.d $t1, $t1, $a0 add.d $t6, $t6, $a0 - add.d $t5, $t5, $a0 + add.d $t7, $t7, $a0 + ld.d $a1, $sp, 288 # 8-byte Folded Reload + add.d $a1, $a1, $a0 + st.d $a1, $sp, 288 # 8-byte Folded Spill ld.d $a0, $sp, 32 # 8-byte Folded Reload - beq $a2, $a0, .LBB13_44 + beq $a2, $a0, .LBB13_42 .LBB13_12: # %.preheader.us # =>This Loop Header: Depth=1 # Child Loop BB13_15 Depth 2 @@ -3675,14 +3671,15 @@ susan_edges: # @susan_edges alsl.d $s3, $a0, $a1, 2 ld.d $a1, $sp, 16 # 8-byte Folded Reload add.d $a1, $a1, $a0 - st.d $a1, $sp, 80 # 8-byte Folded Spill + st.d $a1, $sp, 88 # 8-byte Folded Spill ld.d $a1, $sp, 8 # 8-byte Folded Reload add.d $a0, $a1, $a0 st.d $a0, $sp, 56 # 8-byte Folded Spill ori $s6, $zero, 4 - st.d $t1, $sp, 232 # 8-byte Folded Spill st.d $t6, $sp, 224 # 8-byte Folded Spill - st.d $t5, $sp, 216 # 8-byte Folded Spill + st.d $t7, $sp, 216 # 8-byte Folded Spill + ld.d $a0, $sp, 288 # 8-byte Folded Reload + st.d $a0, $sp, 288 # 8-byte Folded Spill b .LBB13_15 .LBB13_13: # %.critedge.us.sink.split # in Loop: Header=BB13_15 Depth=2 @@ -3701,35 +3698,35 @@ susan_edges: # @susan_edges ld.w $a2, $s3, 0 blez $a2, .LBB13_14 # %bb.16: # in Loop: Header=BB13_15 Depth=2 - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload ldx.bu $a0, $a0, $t8 ld.d $a1, $sp, 304 # 8-byte Folded Reload - st.d $a2, $sp, 256 # 8-byte Folded Spill + st.d $a2, $sp, 248 # 8-byte Folded Spill sub.w $a6, $a1, $a2 ld.d $a1, $sp, 296 # 8-byte Folded Reload add.d $s8, $a1, $a0 + ld.d $a0, $sp, 112 # 8-byte Folded Reload + add.d $a3, $a0, $t8 ld.d $a0, $sp, 104 # 8-byte Folded Reload - add.d $s4, $a0, $t8 - ld.d $a0, $sp, 96 # 8-byte Folded Reload add.d $s5, $a0, $t8 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload add.d $a5, $a0, $t8 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload add.d $a2, $a0, $t8 - add.d $a4, $t1, $t8 - add.d $a0, $t5, $t8 + add.d $a4, $t6, $t8 + ld.d $a0, $sp, 288 # 8-byte Folded Reload + add.d $a0, $a0, $t8 + st.d $a0, $sp, 272 # 8-byte Folded Spill + add.d $a0, $t7, $t8 st.d $a0, $sp, 280 # 8-byte Folded Spill - add.d $a0, $t6, $t8 - st.d $a0, $sp, 288 # 8-byte Folded Spill ori $a0, $zero, 601 - blt $a6, $a0, .LBB13_20 + blt $a6, $a0, .LBB13_22 # %bb.17: # in Loop: Header=BB13_15 Depth=2 - ld.bu $a0, $s4, 3 - ld.bu $a1, $s4, 4 + ld.bu $a0, $a3, 3 + ld.bu $a1, $a3, 4 sub.d $a0, $s8, $a0 - ld.bu $t2, $a0, 0 - st.d $t2, $sp, 176 # 8-byte Folded Spill - ld.bu $a0, $s4, 5 + ld.bu $t5, $a0, 0 + ld.bu $a0, $a3, 5 sub.d $a1, $s8, $a1 ld.bu $a1, $a1, 0 ld.bu $a7, $s5, 5 @@ -3737,14 +3734,14 @@ susan_edges: # @susan_edges ld.bu $t0, $s5, 6 ld.bu $t1, $a0, 0 sub.d $a0, $s8, $a7 - ld.bu $a3, $a0, 0 + ld.bu $s4, $a0, 0 sub.d $a0, $s8, $t0 ld.bu $fp, $a0, 0 - add.d $a0, $a1, $t2 + add.d $a0, $a1, $t5 ld.bu $a1, $s5, 7 add.d $a0, $a0, $t1 - st.d $a0, $sp, 208 # 8-byte Folded Spill - add.d $a0, $fp, $a3 + st.d $a0, $sp, 192 # 8-byte Folded Spill + add.d $a0, $fp, $s4 ld.bu $a7, $s5, 8 sub.d $a1, $s8, $a1 ld.bu $t0, $s5, 9 @@ -3757,29 +3754,30 @@ susan_edges: # @susan_edges ld.bu $a1, $a5, 9 add.d $a0, $a0, $s2 add.d $a0, $a0, $s1 - st.d $a0, $sp, 192 # 8-byte Folded Spill + st.d $a0, $sp, 208 # 8-byte Folded Spill ld.bu $a0, $a5, 10 sub.d $a1, $s8, $a1 ld.bu $t4, $a1, 0 - st.d $t4, $sp, 136 # 8-byte Folded Spill + st.d $t4, $sp, 144 # 8-byte Folded Spill ld.bu $a1, $a5, 11 sub.d $a0, $s8, $a0 - ld.bu $t5, $a0, 0 + ld.bu $t6, $a0, 0 + st.d $t6, $sp, 136 # 8-byte Folded Spill ld.bu $a0, $a5, 13 sub.d $a1, $s8, $a1 ld.bu $a1, $a1, 0 - st.d $a1, $sp, 168 # 8-byte Folded Spill + st.d $a1, $sp, 176 # 8-byte Folded Spill ld.bu $a1, $a5, 14 sub.d $a0, $s8, $a0 ld.bu $a0, $a0, 0 - st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $a0, $sp, 168 # 8-byte Folded Spill ld.bu $a0, $a5, 15 sub.d $a1, $s8, $a1 - ld.bu $ra, $a1, 0 + ld.bu $t0, $a1, 0 ld.bu $a1, $a2, -6 sub.d $a0, $s8, $a0 - ld.bu $t0, $a0, 0 - st.d $t0, $sp, 144 # 8-byte Folded Spill + ld.bu $ra, $a0, 0 + st.d $ra, $sp, 152 # 8-byte Folded Spill ld.bu $a0, $a2, -5 sub.d $a1, $s8, $a1 ld.bu $t7, $a1, 0 @@ -3793,7 +3791,7 @@ susan_edges: # @susan_edges sub.d $s0, $s8, $s0 ld.bu $s7, $s0, 0 st.d $a6, $sp, 184 # 8-byte Folded Spill - ld.d $a6, $sp, 272 # 8-byte Folded Reload + ld.d $a6, $sp, 264 # 8-byte Folded Reload ldx.bu $s0, $a6, $t8 sub.d $a1, $s8, $a1 ld.bu $a1, $a1, 0 @@ -3802,70 +3800,67 @@ susan_edges: # @susan_edges ld.bu $t2, $a4, -5 ld.bu $t3, $s0, 0 sub.d $a6, $s8, $a6 - ld.bu $t6, $a6, 0 - st.d $t6, $sp, 152 # 8-byte Folded Spill + ld.bu $s0, $a6, 0 + st.d $s0, $sp, 160 # 8-byte Folded Spill sub.d $a6, $s8, $t2 add.d $t2, $t4, $t7 - add.d $t3, $t0, $t3 - add.d $t2, $t2, $t6 + add.d $t3, $ra, $t3 + add.d $t2, $t2, $s0 sub.d $t2, $t3, $t2 ld.bu $t3, $a4, -4 - move $t6, $t5 - add.d $t7, $a3, $t5 + add.d $t7, $s4, $t6 add.d $t7, $t7, $a0 ld.bu $t4, $a4, -2 - ld.bu $a3, $a6, 0 - st.d $a3, $sp, 128 # 8-byte Folded Spill + ld.bu $s4, $a6, 0 sub.d $a0, $s8, $t3 - ld.bu $s0, $a0, 0 + ld.bu $a0, $a0, 0 sub.d $a6, $s8, $t4 - add.d $t3, $s1, $ra - move $t0, $ra + add.d $t3, $s1, $t0 + move $s0, $t0 add.d $a1, $t3, $a1 ld.bu $t3, $a4, -1 - add.d $t4, $t7, $a3 + add.d $t4, $t7, $s4 sub.d $t4, $a1, $t4 - ld.d $a1, $sp, 232 # 8-byte Folded Reload + ld.d $a1, $sp, 224 # 8-byte Folded Reload ldx.bu $a1, $a1, $t8 ld.bu $s1, $a6, 0 sub.d $a6, $s8, $t3 ld.bu $t7, $a6, 0 sub.d $a1, $s8, $a1 - ld.d $a0, $sp, 176 # 8-byte Folded Reload - add.d $a6, $a0, $fp - ld.d $a0, $sp, 216 # 8-byte Folded Reload - ldx.bu $t3, $a0, $t8 + add.d $a6, $t5, $fp + ld.d $t3, $sp, 288 # 8-byte Folded Reload + ldx.bu $t3, $t3, $t8 add.d $fp, $t1, $s2 - move $a0, $a2 - ld.d $a2, $sp, 280 # 8-byte Folded Reload - ld.bu $s2, $a2, 1 + ld.d $ra, $sp, 272 # 8-byte Folded Reload + ld.bu $s2, $ra, 1 ld.bu $a1, $a1, 0 sub.d $t1, $s8, $t3 ld.bu $t1, $t1, 0 sub.d $t3, $s8, $s2 - ld.d $ra, $sp, 168 # 8-byte Folded Reload - add.d $a6, $a6, $ra + ld.d $t6, $sp, 176 # 8-byte Folded Reload + add.d $a6, $a6, $t6 add.d $a6, $a6, $a7 - ld.bu $a7, $a2, 3 - ld.d $a3, $sp, 160 # 8-byte Folded Reload - add.d $fp, $fp, $a3 + ld.bu $a7, $ra, 3 + move $t0, $a2 + ld.d $a2, $sp, 168 # 8-byte Folded Reload + add.d $fp, $fp, $a2 add.d $fp, $fp, $s7 - ld.bu $s2, $a2, 4 + ld.bu $s2, $ra, 4 ld.bu $t3, $t3, 0 sub.d $a7, $s8, $a7 ld.bu $s7, $a7, 0 sub.d $a7, $s8, $s2 - add.d $a6, $a6, $s0 + add.d $a6, $a6, $a0 add.d $fp, $fp, $s1 alsl.d $a6, $t1, $a6, 1 add.w $a6, $a6, $t3 sub.d $a6, $fp, $a6 - ld.d $t5, $sp, 288 # 8-byte Folded Reload + ld.d $t5, $sp, 280 # 8-byte Folded Reload ld.bu $fp, $t5, -1 add.d $t2, $t2, $a1 alsl.d $t2, $t2, $t2, 1 add.d $a6, $a6, $t2 - ld.d $t2, $sp, 224 # 8-byte Folded Reload + ld.d $t2, $sp, 216 # 8-byte Folded Reload ldx.bu $t2, $t2, $t8 ld.bu $s2, $a7, 0 sub.d $a7, $s8, $fp @@ -3878,10 +3873,9 @@ susan_edges: # @susan_edges add.d $a6, $a6, $s7 alsl.w $a6, $s2, $a6, 1 sub.d $a6, $a6, $a7 - ld.d $t5, $sp, 208 # 8-byte Folded Reload + ld.d $t5, $sp, 192 # 8-byte Folded Reload sub.d $a7, $a7, $t5 - ld.bu $fp, $a2, 2 - move $a2, $a0 + ld.bu $fp, $ra, 2 sub.d $t4, $s8, $t4 ld.bu $t4, $t4, 0 add.d $t2, $a7, $t2 @@ -3890,220 +3884,229 @@ susan_edges: # @susan_edges ld.bu $fp, $a7, 0 add.d $a7, $a6, $t4 add.d $a6, $t2, $t4 - ld.d $t2, $sp, 192 # 8-byte Folded Reload + ld.d $t2, $sp, 208 # 8-byte Folded Reload sub.d $t2, $t3, $t2 add.d $t2, $t2, $fp sub.d $t3, $s8, $t5 - ld.d $t5, $sp, 216 # 8-byte Folded Reload ld.bu $t3, $t3, 0 add.w $t2, $t2, $s7 - ld.d $a0, $sp, 136 # 8-byte Folded Reload - add.d $t4, $t6, $a0 - add.d $t4, $t4, $ra + ld.d $t4, $sp, 144 # 8-byte Folded Reload + ld.d $t5, $sp, 136 # 8-byte Folded Reload + add.d $t4, $t5, $t4 + add.d $t4, $t4, $t6 add.d $t3, $t4, $t3 ld.bu $t4, $a4, -3 - add.d $t3, $t3, $a3 - add.d $t3, $t3, $t0 - ld.d $a0, $sp, 144 # 8-byte Folded Reload - add.d $t0, $t3, $a0 + add.d $t3, $t3, $a2 + move $a2, $t0 + add.d $t3, $t3, $s0 + ld.d $t0, $sp, 152 # 8-byte Folded Reload + add.d $t0, $t3, $t0 sub.d $t3, $s8, $t4 ld.bu $t3, $t3, 0 - ld.d $a0, $sp, 152 # 8-byte Folded Reload - sub.d $t0, $a0, $t0 - ld.d $a0, $sp, 128 # 8-byte Folded Reload - add.d $t0, $t0, $a0 - add.d $a0, $t0, $s0 + ld.d $t4, $sp, 160 # 8-byte Folded Reload + sub.d $t0, $t4, $t0 + add.d $t0, $t0, $s4 + ld.d $t4, $sp, 216 # 8-byte Folded Reload + add.d $a0, $t0, $a0 add.w $a0, $a0, $t3 add.d $a0, $a0, $s1 add.d $a0, $a0, $t7 add.d $a0, $a0, $a1 alsl.d $a0, $t1, $a0, 1 - ld.d $t1, $sp, 232 # 8-byte Folded Reload + ld.d $t6, $sp, 224 # 8-byte Folded Reload alsl.w $a0, $s2, $a0, 1 + move $t7, $t4 alsl.d $a1, $a6, $a6, 1 alsl.d $a0, $t2, $a0, 1 add.d $a0, $a0, $a1 mul.d $a1, $a7, $a7 mul.d $a6, $a0, $a0 add.d $a1, $a6, $a1 - ld.d $t6, $sp, 224 # 8-byte Folded Reload bstrpick.d $a1, $a1, 31, 0 - movgr2fr.d $fa3, $a1 + movgr2fr.d $fa5, $a1 ld.d $a1, $sp, 184 # 8-byte Folded Reload bstrpick.d $a1, $a1, 31, 0 - movgr2fr.d $fa4, $a1 - pcalau12i $a1, %pc_hi20(.LCPI13_0) - fld.d $fa5, $a1, %pc_lo12(.LCPI13_0) - ffint.s.l $fa3, $fa3 - fsqrt.s $fa3, $fa3 - fcvt.d.s $fa3, $fa3 - ffint.s.l $fa4, $fa4 - fcvt.d.s $fa4, $fa4 - fmul.d $fa4, $fa4, $fa5 - fcmp.clt.d $fcc0, $fa4, $fa3 - bceqz $fcc0, .LBB13_20 + movgr2fr.d $fa6, $a1 + ffint.s.l $fa5, $fa5 + fsqrt.s $fa5, $fa5 + fcvt.d.s $fa5, $fa5 + ffint.s.l $fa6, $fa6 + fcvt.d.s $fa6, $fa6 + fmul.d $fa6, $fa6, $fa0 + fcmp.clt.d $fcc0, $fa6, $fa5 + bceqz $fcc0, .LBB13_22 # %bb.18: # in Loop: Header=BB13_15 Depth=2 addi.w $a1, $a7, 0 - beqz $a1, .LBB13_33 + fmov.s $fa5, $fa1 + beqz $a1, .LBB13_20 # %bb.19: # in Loop: Header=BB13_15 Depth=2 addi.w $a0, $a0, 0 - movgr2fr.w $fa3, $a0 - ffint.s.w $fa3, $fa3 - movgr2fr.w $fa4, $a1 - ffint.s.w $fa4, $fa4 - fdiv.s $fa3, $fa3, $fa4 - b .LBB13_34 - .p2align 4, , 16 + movgr2fr.w $fa5, $a0 + ffint.s.w $fa5, $fa5 + movgr2fr.w $fa6, $a1 + ffint.s.w $fa6, $fa6 + fdiv.s $fa5, $fa5, $fa6 .LBB13_20: # in Loop: Header=BB13_15 Depth=2 + fneg.s $fa6, $fa5 + fcmp.cule.s $fcc0, $fa2, $fa5 + fsel $fa6, $fa6, $fa5, $fcc0 + fcmp.clt.s $fcc0, $fa6, $fa3 + ld.d $a4, $sp, 248 # 8-byte Folded Reload + bceqz $fcc0, .LBB13_28 +# %bb.21: # in Loop: Header=BB13_15 Depth=2 + move $a0, $zero + b .LBB13_37 + .p2align 4, , 16 +.LBB13_22: # in Loop: Header=BB13_15 Depth=2 st.d $s6, $sp, 184 # 8-byte Folded Spill st.d $s3, $sp, 192 # 8-byte Folded Spill - st.d $a2, $sp, 152 # 8-byte Folded Spill - ld.bu $a0, $s4, 3 - ld.bu $a1, $s4, 4 + st.d $a2, $sp, 160 # 8-byte Folded Spill + ld.bu $a0, $a3, 3 + ld.bu $a1, $a3, 4 sub.d $a0, $s8, $a0 - ld.bu $a6, $a0, 0 - ld.bu $a0, $s4, 5 + ld.bu $s4, $a0, 0 + ld.bu $a0, $a3, 5 sub.d $a1, $s8, $a1 ld.bu $s0, $a1, 0 ld.bu $a1, $s5, 5 sub.d $a0, $s8, $a0 - ld.bu $fp, $a0, 0 + ld.bu $a3, $a0, 0 ld.bu $a0, $s5, 6 sub.d $a1, $s8, $a1 - ld.bu $ra, $a1, 0 + ld.bu $t5, $a1, 0 ld.bu $a1, $s5, 7 sub.d $a0, $s8, $a0 ld.bu $s2, $a0, 0 - ld.bu $a3, $s5, 9 - sub.d $a0, $s8, $a1 - ld.bu $a1, $s5, 8 - ld.bu $a0, $a0, 0 - sub.d $a3, $s8, $a3 - ld.bu $a7, $a3, 0 - sub.d $a1, $s8, $a1 - ld.bu $a3, $a5, 9 - ld.d $a2, $sp, 280 # 8-byte Folded Reload - ld.bu $s3, $a1, 0 - st.d $a7, $sp, 72 # 8-byte Folded Spill - slli.d $t3, $a7, 2 - ld.bu $a1, $a5, 10 - sub.d $a3, $s8, $a3 - ld.bu $s6, $a3, 0 - ld.bu $a3, $a5, 11 + ld.bu $a0, $s5, 9 sub.d $a1, $s8, $a1 + ld.bu $a6, $s5, 8 ld.bu $a1, $a1, 0 st.d $a1, $sp, 176 # 8-byte Folded Spill - ld.bu $a1, $a5, 12 - sub.d $a3, $s8, $a3 - ld.bu $a3, $a3, 0 - st.d $a3, $sp, 168 # 8-byte Folded Spill - ld.bu $t0, $a5, 13 + sub.d $a0, $s8, $a0 + ld.bu $a7, $a0, 0 + sub.d $a0, $s8, $a6 + ld.bu $a1, $a5, 9 + ld.bu $ra, $a0, 0 + st.d $a7, $sp, 80 # 8-byte Folded Spill + slli.d $t1, $a7, 2 + ld.bu $a0, $a5, 10 sub.d $a1, $s8, $a1 ld.bu $a1, $a1, 0 - st.d $a1, $sp, 160 # 8-byte Folded Spill - ld.bu $t2, $a5, 14 - sub.d $a1, $s8, $t0 - ld.bu $a7, $a1, 0 - ld.bu $t0, $a5, 15 - sub.d $a5, $s8, $t2 - ld.bu $a1, $a5, 0 st.d $a1, $sp, 208 # 8-byte Folded Spill - ld.bu $t2, $a4, -6 - sub.d $t0, $s8, $t0 - ld.bu $a5, $t0, 0 - ld.bu $t4, $a4, -5 + ld.bu $a1, $a5, 11 + sub.d $a0, $s8, $a0 + ld.bu $fp, $a0, 0 + ld.bu $a0, $a5, 12 + sub.d $a1, $s8, $a1 + ld.d $a2, $sp, 272 # 8-byte Folded Reload + ld.bu $s3, $a1, 0 + ld.bu $a1, $a5, 13 + sub.d $a0, $s8, $a0 + ld.bu $a0, $a0, 0 + st.d $a0, $sp, 168 # 8-byte Folded Spill + ld.bu $t0, $a5, 14 + sub.d $a1, $s8, $a1 + ld.bu $a6, $a1, 0 + ld.bu $t2, $a5, 15 + sub.d $a5, $s8, $t0 + ld.bu $a0, $a5, 0 + ld.bu $t3, $a4, -6 + sub.d $t0, $s8, $t2 + ld.bu $t0, $t0, 0 + ld.bu $t2, $a4, -5 + sub.d $t3, $s8, $t3 + ld.bu $t3, $t3, 0 + move $a1, $t7 + ld.bu $t4, $a4, -4 sub.d $t2, $s8, $t2 - ld.bu $a1, $t2, 0 - move $a3, $t5 - ld.bu $t5, $a4, -4 - sub.d $t2, $s8, $t4 - ld.bu $t2, $t2, 0 - ld.bu $t4, $a4, -3 - sub.d $t5, $s8, $t5 - ld.bu $t0, $t5, 0 - ld.bu $s1, $a4, -2 + ld.bu $a5, $t2, 0 + ld.bu $t7, $a4, -3 sub.d $t4, $s8, $t4 - ld.bu $t7, $t4, 0 - ld.bu $t4, $a4, -1 - sub.d $a4, $s8, $s1 + ld.bu $s6, $t4, 0 + ld.bu $t4, $a4, -2 + sub.d $t7, $s8, $t7 + ld.bu $t7, $t7, 0 + ld.bu $s1, $a4, -1 + sub.d $a4, $s8, $t4 ld.bu $a4, $a4, 0 - ldx.bu $s1, $t1, $t8 - sub.d $t4, $s8, $t4 - ld.bu $t5, $t4, 0 - ld.d $t1, $sp, 288 # 8-byte Folded Reload - ld.bu $s5, $t1, -1 - sub.d $s7, $s8, $s1 - ldx.bu $t6, $t6, $t8 - ld.bu $s4, $a2, 1 - sub.d $s1, $s8, $s5 + ldx.bu $s5, $t6, $t8 + sub.d $t4, $s8, $s1 + ld.bu $t2, $t4, 0 + ld.d $a7, $sp, 280 # 8-byte Folded Reload + ld.bu $s1, $a7, -1 + sub.d $s5, $s8, $s5 + ldx.bu $s7, $a1, $t8 + ld.bu $t6, $a2, 1 + sub.d $s1, $s8, $s1 ld.bu $s1, $s1, 0 + sub.d $s7, $s8, $s7 + ld.bu $s7, $s7, 0 + st.d $s4, $sp, 152 # 8-byte Folded Spill + add.d $s0, $s0, $s4 + st.d $a3, $sp, 136 # 8-byte Folded Spill + add.d $s0, $s0, $a3 + add.d $s0, $s0, $s1 + add.d $s7, $s0, $s7 + ld.bu $s4, $a2, 2 + ld.bu $s5, $s5, 0 sub.d $t6, $s8, $t6 + ld.bu $s0, $t6, 0 + sub.d $t6, $s8, $s4 ld.bu $t6, $t6, 0 - st.d $a6, $sp, 144 # 8-byte Folded Spill - add.d $s0, $s0, $a6 - st.d $fp, $sp, 136 # 8-byte Folded Spill - add.d $s0, $s0, $fp - add.d $s0, $s0, $s1 - add.d $t6, $s0, $t6 - ld.bu $a6, $a2, 2 - ld.bu $s5, $s7, 0 - sub.d $s0, $s8, $s4 - ld.bu $s0, $s0, 0 - sub.d $a6, $s8, $a6 - ld.bu $a6, $a6, 0 - st.d $s2, $sp, 128 # 8-byte Folded Spill - add.d $a0, $a0, $s2 + st.d $s2, $sp, 144 # 8-byte Folded Spill + ld.d $a1, $sp, 176 # 8-byte Folded Reload + add.d $a3, $a1, $s2 + st.d $ra, $sp, 176 # 8-byte Folded Spill + add.d $a3, $a3, $ra + add.d $a3, $a3, $s0 + add.d $t6, $a3, $t6 + move $a1, $t5 + st.d $t1, $sp, 128 # 8-byte Folded Spill + alsl.d $a3, $t5, $t1, 2 + ld.d $t5, $sp, 208 # 8-byte Folded Reload + add.d $a3, $a3, $t5 + add.d $a3, $a3, $fp st.d $s3, $sp, 120 # 8-byte Folded Spill + add.d $a3, $a3, $s3 + move $s3, $a0 + ld.d $a0, $sp, 168 # 8-byte Folded Reload + add.d $a0, $a3, $a0 + ld.d $a3, $sp, 288 # 8-byte Folded Reload + ldx.bu $a3, $a3, $t8 + move $s2, $a6 + add.d $a0, $a0, $a6 add.d $a0, $a0, $s3 - ld.d $s3, $sp, 176 # 8-byte Folded Reload - add.d $a0, $a0, $s0 - add.d $a6, $a0, $a6 - st.d $t3, $sp, 112 # 8-byte Folded Spill - alsl.d $a0, $ra, $t3, 2 - ld.d $t3, $sp, 168 # 8-byte Folded Reload - move $s2, $s6 - add.d $a0, $a0, $s6 - move $s6, $a1 - ld.d $a1, $sp, 208 # 8-byte Folded Reload - add.d $a0, $a0, $s3 + add.d $a0, $a0, $t0 add.d $a0, $a0, $t3 - ld.d $t4, $sp, 160 # 8-byte Folded Reload - add.d $a0, $a0, $t4 - ldx.bu $a3, $a3, $t8 - st.d $a7, $sp, 160 # 8-byte Folded Spill - add.d $a0, $a0, $a7 - add.d $a0, $a0, $a1 add.d $a0, $a0, $a5 add.d $a0, $a0, $s6 - add.d $a0, $a0, $t2 - add.d $a0, $a0, $t0 add.d $s4, $a0, $t7 - ld.bu $t7, $a2, 3 - sub.d $a0, $s8, $a3 - ld.bu $a0, $a0, 0 - ld.bu $a2, $a2, 4 - sub.d $a3, $s8, $t7 - ld.bu $t7, $t1, 1 + ld.bu $a0, $a2, 3 + sub.d $a3, $s8, $a3 ld.bu $a3, $a3, 0 - sub.d $a2, $s8, $a2 - ld.bu $a2, $a2, 0 - sub.d $t7, $s8, $t7 - ld.bu $t7, $t7, 0 - add.d $a6, $a6, $a3 - add.d $s4, $s4, $a4 - add.w $s4, $s4, $t5 - add.d $s4, $s4, $s5 - alsl.d $s4, $a0, $s4, 2 - alsl.d $s4, $a2, $s4, 2 - alsl.d $a6, $a6, $s4, 2 - add.d $t6, $t6, $t7 + ld.bu $t7, $a2, 4 + sub.d $a0, $s8, $a0 + ld.bu $a6, $a7, 1 + ld.bu $a2, $a0, 0 + sub.d $a0, $s8, $t7 + ld.bu $a0, $a0, 0 + sub.d $a6, $s8, $a6 + ld.bu $t7, $a6, 0 + add.d $a6, $t6, $a2 + add.d $t6, $s4, $a4 + add.w $t6, $t6, $t2 + add.d $t6, $t6, $s5 + alsl.d $t6, $a3, $t6, 2 + alsl.d $t6, $a0, $t6, 2 + alsl.d $a6, $a6, $t6, 2 + add.d $t6, $s7, $t7 alsl.d $t6, $t6, $t6, 3 add.w $s7, $a6, $t6 - beqz $s7, .LBB13_23 -# %bb.21: # in Loop: Header=BB13_15 Depth=2 - move $fp, $ra - move $t4, $t0 - ld.d $t1, $sp, 152 # 8-byte Folded Reload + beqz $s7, .LBB13_25 +# %bb.23: # in Loop: Header=BB13_15 Depth=2 + move $t4, $s6 + move $s6, $a4 + ld.d $t1, $sp, 160 # 8-byte Folded Reload ld.bu $a6, $t1, -6 ld.bu $t6, $t1, -5 sub.d $a6, $s8, $a6 @@ -4113,51 +4116,54 @@ susan_edges: # @susan_edges ld.bu $a7, $t1, -2 ld.bu $ra, $t1, -1 sub.d $s4, $s8, $s4 - ld.d $t1, $sp, 272 # 8-byte Folded Reload + ld.d $t1, $sp, 264 # 8-byte Folded Reload ldx.bu $t1, $t1, $t8 sub.d $a7, $s8, $a7 sub.d $ra, $s8, $ra ld.bu $ra, $ra, 0 sub.d $t1, $s8, $t1 - add.d $s8, $a1, $s3 - st.d $t2, $sp, 280 # 8-byte Folded Spill + st.d $fp, $sp, 280 # 8-byte Folded Spill + st.d $s3, $sp, 272 # 8-byte Folded Spill + add.d $s8, $s3, $fp + st.d $a5, $sp, 160 # 8-byte Folded Spill + add.d $s8, $s8, $a5 + st.d $t2, $sp, 64 # 8-byte Folded Spill add.d $s8, $s8, $t2 - st.d $t5, $sp, 64 # 8-byte Folded Spill - add.d $s8, $s8, $t5 add.d $t6, $s8, $t6 ld.bu $a6, $a6, 0 ld.bu $t1, $t1, 0 add.d $t6, $t6, $ra - move $t5, $s2 - st.d $a5, $sp, 288 # 8-byte Folded Spill - add.d $s8, $a5, $s2 - st.d $s6, $sp, 152 # 8-byte Folded Spill - add.d $s8, $s8, $s6 + st.d $t0, $sp, 168 # 8-byte Folded Spill + add.d $s8, $t0, $t5 + st.d $t3, $sp, 72 # 8-byte Folded Spill + add.d $s8, $s8, $t3 add.d $s8, $s8, $s5 add.d $a6, $s8, $a6 - move $ra, $fp - slli.d $s8, $fp, 2 + move $a5, $a1 + slli.d $s8, $a1, 2 add.d $a6, $a6, $t1 - ld.d $s2, $sp, 144 # 8-byte Folded Reload - ld.d $a1, $sp, 136 # 8-byte Folded Reload - add.d $t1, $a1, $s2 + ld.d $ra, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 152 # 8-byte Folded Reload + add.d $t1, $ra, $a1 add.d $t1, $t1, $s8 - ld.d $a5, $sp, 128 # 8-byte Folded Reload - add.d $t1, $t1, $a5 + ld.d $t5, $sp, 144 # 8-byte Folded Reload + add.d $t1, $t1, $t5 + ld.d $t0, $sp, 176 # 8-byte Folded Reload + add.d $t1, $t1, $t0 + ld.d $t2, $sp, 128 # 8-byte Folded Reload + add.d $t1, $t1, $t2 + slli.d $fp, $a3, 2 ld.d $t2, $sp, 120 # 8-byte Folded Reload add.d $t1, $t1, $t2 - ld.d $t0, $sp, 112 # 8-byte Folded Reload - add.d $t1, $t1, $t0 - slli.d $fp, $a0, 2 - add.d $t1, $t1, $t3 - ld.d $t0, $sp, 160 # 8-byte Folded Reload - add.d $t1, $t1, $t0 + add.d $t1, $t1, $s2 + move $t3, $t4 add.d $t1, $t1, $t4 - add.d $t1, $t1, $a4 + move $t4, $a4 + add.d $t1, $t1, $s6 add.d $t1, $t1, $fp - slli.d $fp, $a2, 2 + slli.d $fp, $a0, 2 add.d $t1, $t1, $s0 - add.d $t1, $t1, $a3 + add.d $t1, $t1, $a2 add.d $t1, $t1, $fp ld.bu $fp, $s4, 0 ld.bu $a7, $a7, 0 @@ -4169,205 +4175,198 @@ susan_edges: # @susan_edges alsl.d $a6, $a6, $a6, 3 add.d $a6, $a7, $a6 bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa3, $a6 + movgr2fr.d $fa5, $a6 bstrpick.d $a6, $s7, 31, 0 - movgr2fr.d $fa4, $a6 - ffint.s.l $fa3, $fa3 - ffint.s.l $fa4, $fa4 - fdiv.s $fa3, $fa3, $fa4 - fcmp.clt.s $fcc0, $fa3, $fa1 - move $t6, $a5 - move $a5, $t2 - move $t2, $s3 + movgr2fr.d $fa6, $a6 + ffint.s.l $fa5, $fa5 + ffint.s.l $fa6, $fa6 + fdiv.s $fa5, $fa5, $fa6 + fcmp.clt.s $fcc0, $fa5, $fa3 ld.d $s3, $sp, 192 # 8-byte Folded Reload ld.d $s6, $sp, 184 # 8-byte Folded Reload - bceqz $fcc0, .LBB13_24 -# %bb.22: # in Loop: Header=BB13_15 Depth=2 + bceqz $fcc0, .LBB13_26 +# %bb.24: # in Loop: Header=BB13_15 Depth=2 move $a1, $zero - b .LBB13_27 -.LBB13_23: # in Loop: Header=BB13_15 Depth=2 + ori $a0, $zero, 1 + ld.d $t6, $sp, 224 # 8-byte Folded Reload + ld.d $t7, $sp, 216 # 8-byte Folded Reload + b .LBB13_32 +.LBB13_25: # in Loop: Header=BB13_15 Depth=2 move $a0, $zero ori $a1, $zero, 1 + ld.d $t7, $sp, 216 # 8-byte Folded Reload ld.d $t6, $sp, 224 # 8-byte Folded Reload - ld.d $t1, $sp, 232 # 8-byte Folded Reload ld.d $s3, $sp, 192 # 8-byte Folded Reload ld.d $s6, $sp, 184 # 8-byte Folded Reload - b .LBB13_29 -.LBB13_24: # in Loop: Header=BB13_15 Depth=2 - fcmp.clt.s $fcc0, $fa2, $fa3 - bceqz $fcc0, .LBB13_26 -# %bb.25: # in Loop: Header=BB13_15 Depth=2 + b .LBB13_32 +.LBB13_26: # in Loop: Header=BB13_15 Depth=2 + fcmp.clt.s $fcc0, $fa4, $fa5 + ld.d $fp, $sp, 216 # 8-byte Folded Reload + bceqz $fcc0, .LBB13_30 +# %bb.27: # in Loop: Header=BB13_15 Depth=2 move $a0, $zero ori $a1, $zero, 1 - b .LBB13_28 -.LBB13_26: # in Loop: Header=BB13_15 Depth=2 - add.d $a6, $s2, $t5 - ld.d $a7, $sp, 288 # 8-byte Folded Reload - add.d $a7, $a1, $a7 - ld.d $a1, $sp, 152 # 8-byte Folded Reload - add.d $a7, $a7, $a1 + b .LBB13_31 +.LBB13_28: # in Loop: Header=BB13_15 Depth=2 + fcmp.clt.s $fcc0, $fa4, $fa6 + bceqz $fcc0, .LBB13_36 +# %bb.29: # in Loop: Header=BB13_15 Depth=2 + move $a1, $zero + ori $a0, $zero, 1 + b .LBB13_38 +.LBB13_30: # in Loop: Header=BB13_15 Depth=2 + move $a4, $s2 + move $s2, $a1 + move $t1, $a5 + move $a1, $t0 + move $t0, $t2 + move $t2, $a4 + ld.d $a4, $sp, 208 # 8-byte Folded Reload + add.d $a6, $s2, $a4 + ld.d $a4, $sp, 168 # 8-byte Folded Reload + add.d $a7, $ra, $a4 + ld.d $a4, $sp, 72 # 8-byte Folded Reload + add.d $a7, $a7, $a4 add.d $a6, $a6, $s5 add.d $a7, $a7, $s1 sub.d $a6, $a6, $a7 add.d $a6, $a6, $t7 alsl.d $a6, $a6, $a6, 1 - add.d $a7, $t6, $t2 - ld.d $a1, $sp, 208 # 8-byte Folded Reload - add.d $a5, $a5, $a1 - ld.d $a1, $sp, 280 # 8-byte Folded Reload + ld.d $a5, $sp, 280 # 8-byte Folded Reload + add.d $a7, $t5, $a5 + ld.d $a4, $sp, 272 # 8-byte Folded Reload + add.d $a5, $a1, $a4 + ld.d $a1, $sp, 160 # 8-byte Folded Reload add.d $a5, $a5, $a1 ld.d $a1, $sp, 64 # 8-byte Folded Reload add.d $a7, $a7, $a1 add.d $a5, $a5, $s0 sub.d $a5, $a7, $a5 - add.d $a3, $a5, $a3 - alsl.d $a5, $ra, $t3, 2 - ld.d $a1, $sp, 72 # 8-byte Folded Reload - alsl.d $a1, $a1, $t0, 2 - add.d $a1, $a1, $t4 - add.d $a4, $a5, $a4 + add.d $a2, $a5, $a2 + alsl.d $a5, $t1, $t0, 2 + ld.d $a1, $sp, 80 # 8-byte Folded Reload + alsl.d $a1, $a1, $t2, 2 + add.d $a1, $a1, $t3 + add.d $a4, $a5, $t4 + alsl.d $a1, $a3, $a1, 2 + sub.d $a1, $a4, $a1 alsl.d $a0, $a0, $a1, 2 - sub.d $a0, $a4, $a0 - alsl.d $a0, $a2, $a0, 2 - alsl.d $a0, $a3, $a0, 1 + alsl.d $a0, $a2, $a0, 1 add.w $a0, $a0, $a6 slt $a0, $zero, $a0 sub.d $a0, $zero, $a0 ori $a1, $a0, 1 -.LBB13_27: # %.thread755.us - # in Loop: Header=BB13_15 Depth=2 ori $a0, $zero, 1 -.LBB13_28: # %.thread755.us +.LBB13_31: # %.thread755.us # in Loop: Header=BB13_15 Depth=2 - ld.d $t1, $sp, 232 # 8-byte Folded Reload ld.d $t6, $sp, 224 # 8-byte Folded Reload -.LBB13_29: # %.thread755.us + move $t7, $fp +.LBB13_32: # %.thread755.us # in Loop: Header=BB13_15 Depth=2 - ld.d $a2, $sp, 248 # 8-byte Folded Reload + ld.d $a2, $sp, 240 # 8-byte Folded Reload add.d $a2, $a2, $a1 - ld.d $a3, $sp, 264 # 8-byte Folded Reload + ld.d $a3, $sp, 256 # 8-byte Folded Reload mul.d $a2, $a3, $a2 add.d $a2, $a0, $a2 add.w $a2, $s6, $a2 slli.d $a2, $a2, 2 - ld.d $a3, $sp, 240 # 8-byte Folded Reload + ld.d $a3, $sp, 232 # 8-byte Folded Reload ldx.w $a2, $a3, $a2 - ld.d $t5, $sp, 216 # 8-byte Folded Reload - ld.d $a4, $sp, 256 # 8-byte Folded Reload + ld.d $a4, $sp, 248 # 8-byte Folded Reload bge $a2, $a4, .LBB13_14 -# %bb.30: # in Loop: Header=BB13_15 Depth=2 - ld.d $a2, $sp, 248 # 8-byte Folded Reload +# %bb.33: # in Loop: Header=BB13_15 Depth=2 + ld.d $a2, $sp, 240 # 8-byte Folded Reload sub.d $a2, $a2, $a1 - ld.d $a3, $sp, 264 # 8-byte Folded Reload + ld.d $a3, $sp, 256 # 8-byte Folded Reload mul.d $a2, $a3, $a2 sub.d $a2, $a2, $a0 add.w $a2, $s6, $a2 slli.d $a2, $a2, 2 - ld.d $a3, $sp, 240 # 8-byte Folded Reload + ld.d $a3, $sp, 232 # 8-byte Folded Reload ldx.w $a2, $a3, $a2 blt $a4, $a2, .LBB13_14 -# %bb.31: # in Loop: Header=BB13_15 Depth=2 - ld.d $a2, $sp, 248 # 8-byte Folded Reload +# %bb.34: # in Loop: Header=BB13_15 Depth=2 + ld.d $a2, $sp, 240 # 8-byte Folded Reload alsl.d $a2, $a1, $a2, 1 - ld.d $a3, $sp, 264 # 8-byte Folded Reload + ld.d $a3, $sp, 256 # 8-byte Folded Reload mul.d $a2, $a3, $a2 alsl.d $a2, $a0, $a2, 1 add.w $a2, $s6, $a2 slli.d $a2, $a2, 2 - ld.d $a3, $sp, 240 # 8-byte Folded Reload + ld.d $a3, $sp, 232 # 8-byte Folded Reload ldx.w $a2, $a3, $a2 bge $a2, $a4, .LBB13_14 -# %bb.32: # in Loop: Header=BB13_15 Depth=2 +# %bb.35: # in Loop: Header=BB13_15 Depth=2 slli.d $a1, $a1, 1 slli.d $a0, $a0, 1 - ld.d $a2, $sp, 248 # 8-byte Folded Reload + ld.d $a2, $sp, 240 # 8-byte Folded Reload sub.d $a1, $a2, $a1 - ld.d $a2, $sp, 264 # 8-byte Folded Reload + ld.d $a2, $sp, 256 # 8-byte Folded Reload mul.d $a1, $a2, $a1 sub.d $a0, $a1, $a0 add.w $a0, $s6, $a0 slli.d $a0, $a0, 2 - ld.d $a1, $sp, 240 # 8-byte Folded Reload + ld.d $a1, $sp, 232 # 8-byte Folded Reload ldx.w $a1, $a1, $a0 ori $a0, $zero, 2 bge $a4, $a1, .LBB13_13 b .LBB13_14 -.LBB13_33: # in Loop: Header=BB13_15 Depth=2 - pcalau12i $a0, %pc_hi20(.LCPI13_1) - fld.s $fa3, $a0, %pc_lo12(.LCPI13_1) -.LBB13_34: # in Loop: Header=BB13_15 Depth=2 - ld.d $a4, $sp, 256 # 8-byte Folded Reload - fneg.s $fa4, $fa3 - fcmp.cule.s $fcc0, $fa0, $fa3 - fsel $fa4, $fa4, $fa3, $fcc0 - fcmp.clt.s $fcc0, $fa4, $fa1 - bceqz $fcc0, .LBB13_36 -# %bb.35: # in Loop: Header=BB13_15 Depth=2 - move $a0, $zero - b .LBB13_39 .LBB13_36: # in Loop: Header=BB13_15 Depth=2 - fcmp.clt.s $fcc0, $fa2, $fa4 - bceqz $fcc0, .LBB13_38 -# %bb.37: # in Loop: Header=BB13_15 Depth=2 - move $a1, $zero - ori $a0, $zero, 1 - b .LBB13_40 -.LBB13_38: # in Loop: Header=BB13_15 Depth=2 - fcmp.clt.s $fcc0, $fa3, $fa0 + fcmp.clt.s $fcc0, $fa5, $fa2 movcf2gr $a0, $fcc0 sub.d $a0, $zero, $a0 ori $a0, $a0, 1 -.LBB13_39: # in Loop: Header=BB13_15 Depth=2 +.LBB13_37: # in Loop: Header=BB13_15 Depth=2 ori $a1, $zero, 1 -.LBB13_40: # in Loop: Header=BB13_15 Depth=2 - ld.d $a2, $sp, 248 # 8-byte Folded Reload +.LBB13_38: # in Loop: Header=BB13_15 Depth=2 + ld.d $a2, $sp, 240 # 8-byte Folded Reload add.d $a2, $a2, $a0 - ld.d $a3, $sp, 264 # 8-byte Folded Reload + ld.d $a3, $sp, 256 # 8-byte Folded Reload mul.d $a2, $a3, $a2 add.d $a2, $a1, $a2 add.w $a2, $s6, $a2 slli.d $a2, $a2, 2 - ld.d $a3, $sp, 240 # 8-byte Folded Reload + ld.d $a3, $sp, 232 # 8-byte Folded Reload ldx.w $a2, $a3, $a2 bge $a2, $a4, .LBB13_14 -# %bb.41: # in Loop: Header=BB13_15 Depth=2 - ld.d $a2, $sp, 248 # 8-byte Folded Reload +# %bb.39: # in Loop: Header=BB13_15 Depth=2 + ld.d $a2, $sp, 240 # 8-byte Folded Reload sub.d $a2, $a2, $a0 - ld.d $a3, $sp, 264 # 8-byte Folded Reload + ld.d $a3, $sp, 256 # 8-byte Folded Reload mul.d $a2, $a3, $a2 sub.d $a2, $a2, $a1 add.w $a2, $s6, $a2 slli.d $a2, $a2, 2 - ld.d $a3, $sp, 240 # 8-byte Folded Reload + ld.d $a3, $sp, 232 # 8-byte Folded Reload ldx.w $a2, $a3, $a2 blt $a4, $a2, .LBB13_14 -# %bb.42: # in Loop: Header=BB13_15 Depth=2 - ld.d $a2, $sp, 248 # 8-byte Folded Reload +# %bb.40: # in Loop: Header=BB13_15 Depth=2 + ld.d $a2, $sp, 240 # 8-byte Folded Reload alsl.d $a2, $a0, $a2, 1 - ld.d $a3, $sp, 264 # 8-byte Folded Reload + ld.d $a3, $sp, 256 # 8-byte Folded Reload mul.d $a2, $a3, $a2 alsl.d $a2, $a1, $a2, 1 add.w $a2, $s6, $a2 slli.d $a2, $a2, 2 - ld.d $a3, $sp, 240 # 8-byte Folded Reload + ld.d $a3, $sp, 232 # 8-byte Folded Reload ldx.w $a2, $a3, $a2 bge $a2, $a4, .LBB13_14 -# %bb.43: # in Loop: Header=BB13_15 Depth=2 +# %bb.41: # in Loop: Header=BB13_15 Depth=2 slli.d $a0, $a0, 1 slli.d $a1, $a1, 1 - ld.d $a2, $sp, 248 # 8-byte Folded Reload + ld.d $a2, $sp, 240 # 8-byte Folded Reload sub.d $a0, $a2, $a0 - ld.d $a2, $sp, 264 # 8-byte Folded Reload + ld.d $a2, $sp, 256 # 8-byte Folded Reload mul.d $a0, $a2, $a0 sub.d $a0, $a0, $a1 add.w $a0, $s6, $a0 slli.d $a0, $a0, 2 - ld.d $a1, $sp, 240 # 8-byte Folded Reload + ld.d $a1, $sp, 232 # 8-byte Folded Reload ldx.w $a1, $a1, $a0 ori $a0, $zero, 1 bge $a4, $a1, .LBB13_13 b .LBB13_14 -.LBB13_44: # %._crit_edge877 +.LBB13_42: # %._crit_edge877 move $a0, $zero ld.d $s8, $sp, 312 # 8-byte Folded Reload ld.d $s7, $sp, 320 # 8-byte Folded Reload @@ -4385,16 +4384,7 @@ susan_edges: # @susan_edges .Lfunc_end13: .size susan_edges, .Lfunc_end13-susan_edges # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function susan_edges_small -.LCPI14_0: - .dword 0x3fd999999999999a # double 0.40000000000000002 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI14_1: - .word 0x49742400 # float 1.0E+6 - .text - .globl susan_edges_small + .globl susan_edges_small # -- Begin function susan_edges_small .p2align 5 .type susan_edges_small,@function susan_edges_small: # @susan_edges_small @@ -4424,7 +4414,7 @@ susan_edges_small: # @susan_edges_small pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $zero, 3 - blt $s4, $a0, .LBB14_40 + blt $s4, $a0, .LBB14_39 # %bb.1: # %.preheader276.lr.ph addi.w $t0, $fp, -2 blt $fp, $a0, .LBB14_8 @@ -4511,9 +4501,9 @@ susan_edges_small: # @susan_edges_small b .LBB14_5 .LBB14_8: # %.preheader275 ori $a0, $zero, 5 - blt $s4, $a0, .LBB14_40 + blt $s4, $a0, .LBB14_39 # %bb.9: # %.preheader.lr.ph - blt $fp, $a0, .LBB14_40 + blt $fp, $a0, .LBB14_39 # %bb.10: # %.preheader.us.preheader addi.w $a0, $s4, -2 st.d $a0, $sp, 40 # 8-byte Folded Spill @@ -4534,10 +4524,18 @@ susan_edges_small: # @susan_edges_small add.d $a0, $a0, $s2 addi.d $t0, $a0, 5 ori $t1, $zero, 2 - ori $t4, $zero, 480 - movgr2fr.w $fa0, $zero - vldi $vr1, -1184 - vldi $vr2, -1280 + ori $t3, $zero, 480 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa0, $a0 + lu12i.w $a0, 300866 + ori $a0, $a0, 1024 + movgr2fr.w $fa1, $a0 + movgr2fr.w $fa2, $zero + vldi $vr3, -1184 + vldi $vr4, -1280 ori $a3, $zero, 2 b .LBB14_12 .p2align 4, , 16 @@ -4552,7 +4550,7 @@ susan_edges_small: # @susan_edges_small addi.d $t1, $t1, 1 add.d $t0, $t0, $fp ld.d $a0, $sp, 40 # 8-byte Folded Reload - beq $a3, $a0, .LBB14_40 + beq $a3, $a0, .LBB14_39 .LBB14_12: # %.preheader.us # =>This Loop Header: Depth=1 # Child Loop BB14_15 Depth 2 @@ -4588,145 +4586,160 @@ susan_edges_small: # @susan_edges_small ldx.bu $a0, $t7, $t5 add.d $s4, $s0, $a0 add.d $a0, $a7, $t5 - add.d $ra, $a4, $t5 - bgeu $s3, $t4, .LBB14_20 + add.d $s8, $a4, $t5 + bgeu $s3, $t3, .LBB14_22 # %bb.17: # in Loop: Header=BB14_15 Depth=2 ld.bu $s5, $a0, 1 ori $a2, $zero, 730 sub.d $a2, $a2, $s3 ld.bu $s6, $a0, 2 - bstrpick.d $s8, $a2, 31, 0 + bstrpick.d $a6, $a2, 31, 0 sub.d $a2, $s4, $s5 - ld.bu $a5, $a2, 0 - sub.d $a2, $s4, $s6 + ld.bu $a2, $a2, 0 + sub.d $a5, $s4, $s6 ld.bu $s7, $a0, 3 - ld.bu $a6, $a2, 0 + ld.bu $a5, $a5, 0 add.d $a0, $t0, $t5 ld.bu $a0, $a0, -2 - sub.d $a2, $s4, $s7 - ld.bu $t3, $a2, 0 - ldx.bu $a2, $t0, $t5 + sub.d $ra, $s4, $s7 + ld.bu $t3, $ra, 0 + ldx.bu $a3, $t0, $t5 sub.d $a0, $s4, $a0 + ld.bu $t8, $a0, 0 + ldx.bu $ra, $a4, $t5 + sub.d $a0, $s4, $a3 ld.bu $a3, $a0, 0 - ldx.bu $a0, $a4, $t5 - sub.d $a2, $s4, $a2 - ld.bu $t8, $a2, 0 - ld.bu $a2, $ra, 1 - sub.d $t4, $s4, $a0 + ld.bu $a0, $s8, 1 + sub.d $t4, $s4, $ra ld.bu $t4, $t4, 0 - add.d $a6, $a6, $a5 - sub.d $t2, $s4, $a2 + add.d $a5, $a5, $a2 + sub.d $t2, $s4, $a0 ld.bu $t2, $t2, 0 - ld.bu $ra, $ra, 2 - add.d $a6, $a6, $t3 - sub.d $a6, $t4, $a6 - add.d $a6, $a6, $t2 - sub.d $t2, $s4, $ra + ld.bu $s8, $s8, 2 + add.d $a5, $a5, $t3 + sub.d $a5, $t4, $a5 + add.d $a5, $a5, $t2 + sub.d $t2, $s4, $s8 ld.bu $t2, $t2, 0 - add.d $a3, $a5, $a3 - add.d $a5, $t3, $t8 - add.d $a3, $a3, $t4 - sub.d $a3, $a5, $a3 - add.d $a5, $a3, $t2 - add.d $a6, $a6, $t2 - mul.d $a3, $a5, $a5 - mul.d $t2, $a6, $a6 + add.d $a2, $a2, $t8 + add.d $a3, $t3, $a3 + add.d $a2, $a2, $t4 + sub.d $a2, $a3, $a2 + add.d $a2, $a2, $t2 + add.d $a5, $a5, $t2 + mul.d $a3, $a2, $a2 + mul.d $t2, $a5, $a5 add.d $a3, $a3, $t2 bstrpick.d $a3, $a3, 31, 0 - movgr2fr.d $fa3, $a3 - ffint.s.l $fa3, $fa3 - fsqrt.s $fa3, $fa3 - pcalau12i $a3, %pc_hi20(.LCPI14_0) - fld.d $fa4, $a3, %pc_lo12(.LCPI14_0) - fcvt.d.s $fa3, $fa3 - movgr2fr.d $fa5, $s8 - ffint.d.l $fa5, $fa5 - fmul.d $fa4, $fa5, $fa4 - fcmp.clt.d $fcc0, $fa4, $fa3 - bceqz $fcc0, .LBB14_21 + movgr2fr.d $fa5, $a3 + ffint.s.l $fa5, $fa5 + fsqrt.s $fa5, $fa5 + fcvt.d.s $fa5, $fa5 + movgr2fr.d $fa6, $a6 + ffint.d.l $fa6, $fa6 + fmul.d $fa6, $fa6, $fa0 + fcmp.clt.d $fcc0, $fa6, $fa5 + bceqz $fcc0, .LBB14_23 # %bb.18: # in Loop: Header=BB14_15 Depth=2 - beqz $a5, .LBB14_31 + fmov.s $fa5, $fa1 + beqz $a2, .LBB14_20 # %bb.19: # in Loop: Header=BB14_15 Depth=2 - movgr2fr.w $fa3, $a6 - ffint.s.w $fa3, $fa3 - movgr2fr.w $fa4, $a5 - ffint.s.w $fa4, $fa4 - fdiv.s $fa3, $fa3, $fa4 - b .LBB14_32 + movgr2fr.w $fa5, $a5 + ffint.s.w $fa5, $fa5 + movgr2fr.w $fa6, $a2 + ffint.s.w $fa6, $fa6 + fdiv.s $fa5, $fa5, $fa6 +.LBB14_20: # in Loop: Header=BB14_15 Depth=2 + fneg.s $fa6, $fa5 + fcmp.cule.s $fcc0, $fa2, $fa5 + fsel $fa6, $fa6, $fa5, $fcc0 + fcmp.clt.s $fcc0, $fa6, $fa3 + ori $t3, $zero, 480 + bceqz $fcc0, .LBB14_29 +# %bb.21: # in Loop: Header=BB14_15 Depth=2 + move $a2, $zero + b .LBB14_36 .p2align 4, , 16 -.LBB14_20: # %._crit_edge +.LBB14_22: # %._crit_edge # in Loop: Header=BB14_15 Depth=2 ld.bu $a2, $a0, 1 ld.bu $a5, $a0, 2 ld.bu $a0, $a0, 3 - ldx.bu $a6, $a4, $t5 - ld.bu $t3, $ra, 1 - ld.bu $ra, $ra, 2 - sub.d $s8, $zero, $a2 + ldx.bu $s7, $a4, $t5 + ld.bu $a6, $s8, 1 + ld.bu $s8, $s8, 2 + sub.d $a2, $zero, $a2 sub.d $s5, $zero, $a5 sub.d $s6, $zero, $a0 + sub.d $ra, $zero, $s7 sub.d $s7, $zero, $a6 - sub.d $a2, $zero, $t3 - sub.d $ra, $zero, $ra - b .LBB14_22 -.LBB14_21: # in Loop: Header=BB14_15 Depth=2 - sub.d $s8, $zero, $s5 + sub.d $s8, $zero, $s8 + b .LBB14_24 +.LBB14_23: # in Loop: Header=BB14_15 Depth=2 + sub.d $a2, $zero, $s5 sub.d $s5, $zero, $s6 sub.d $s6, $zero, $s7 - sub.d $s7, $zero, $a0 - sub.d $a2, $zero, $a2 sub.d $ra, $zero, $ra - ori $t4, $zero, 480 -.LBB14_22: # in Loop: Header=BB14_15 Depth=2 - ldx.bu $a0, $s4, $s8 + sub.d $s7, $zero, $a0 + sub.d $s8, $zero, $s8 + ori $t3, $zero, 480 +.LBB14_24: # in Loop: Header=BB14_15 Depth=2 + ldx.bu $a0, $s4, $a2 ldx.bu $a3, $s4, $s6 - ldx.bu $a5, $s4, $s7 - ldx.bu $t2, $s4, $s5 - ldx.bu $t3, $s4, $a2 - ldx.bu $a2, $s4, $ra + ldx.bu $a5, $s4, $ra + ldx.bu $a6, $s4, $s5 + ldx.bu $t2, $s4, $s7 + ldx.bu $a2, $s4, $s8 add.d $s5, $a5, $a3 - add.d $a6, $s5, $a0 - add.d $a3, $a6, $t2 - add.d $a3, $a3, $t3 + add.d $s6, $s5, $a0 + add.d $a3, $s6, $a6 + add.d $a3, $a3, $t2 add.d $a5, $a3, $a2 - beqz $a5, .LBB14_26 -# %bb.23: # in Loop: Header=BB14_15 Depth=2 + beqz $a5, .LBB14_28 +# %bb.25: # in Loop: Header=BB14_15 Depth=2 add.d $a3, $t0, $t5 ld.bu $a3, $a3, -2 - ldx.bu $t2, $t0, $t5 + ldx.bu $a6, $t0, $t5 sub.d $a3, $s4, $a3 ld.bu $a3, $a3, 0 - sub.d $t2, $s4, $t2 - ld.bu $t2, $t2, 0 - add.d $a6, $a6, $a2 - add.d $a3, $a6, $a3 - add.d $a3, $a3, $t2 - movgr2fr.w $fa3, $a3 - ffint.s.w $fa3, $fa3 - movgr2fr.w $fa4, $a5 - ffint.s.w $fa4, $fa4 - fdiv.s $fa3, $fa3, $fa4 - fcmp.clt.s $fcc0, $fa3, $fa1 - bceqz $fcc0, .LBB14_25 -# %bb.24: # in Loop: Header=BB14_15 Depth=2 - move $a2, $zero - b .LBB14_28 -.LBB14_25: # in Loop: Header=BB14_15 Depth=2 - fcmp.clt.s $fcc0, $fa2, $fa3 + sub.d $a6, $s4, $a6 + ld.bu $a6, $a6, 0 + add.d $t2, $s6, $a2 + add.d $a3, $t2, $a3 + add.d $a3, $a3, $a6 + movgr2fr.w $fa5, $a3 + ffint.s.w $fa5, $fa5 + movgr2fr.w $fa6, $a5 + ffint.s.w $fa6, $fa6 + fdiv.s $fa5, $fa5, $fa6 + fcmp.clt.s $fcc0, $fa5, $fa3 bceqz $fcc0, .LBB14_27 -.LBB14_26: # in Loop: Header=BB14_15 Depth=2 +# %bb.26: # in Loop: Header=BB14_15 Depth=2 + move $a2, $zero + b .LBB14_32 +.LBB14_27: # in Loop: Header=BB14_15 Depth=2 + fcmp.clt.s $fcc0, $fa4, $fa5 + bceqz $fcc0, .LBB14_31 +.LBB14_28: # in Loop: Header=BB14_15 Depth=2 move $a0, $zero ori $a2, $zero, 1 - b .LBB14_29 -.LBB14_27: # in Loop: Header=BB14_15 Depth=2 + b .LBB14_33 +.LBB14_29: # in Loop: Header=BB14_15 Depth=2 + fcmp.clt.s $fcc0, $fa4, $fa6 + bceqz $fcc0, .LBB14_35 +# %bb.30: # in Loop: Header=BB14_15 Depth=2 + move $a0, $zero + ori $a2, $zero, 1 + b .LBB14_37 +.LBB14_31: # in Loop: Header=BB14_15 Depth=2 add.d $a0, $a2, $a0 sltu $a0, $s5, $a0 sub.d $a0, $zero, $a0 ori $a2, $a0, 1 -.LBB14_28: # %.thread262.us +.LBB14_32: # %.thread262.us # in Loop: Header=BB14_15 Depth=2 ori $a0, $zero, 1 -.LBB14_29: # %.thread262.us +.LBB14_33: # %.thread262.us # in Loop: Header=BB14_15 Depth=2 add.d $a3, $t1, $a2 mul.d $a3, $fp, $a3 @@ -4735,7 +4748,7 @@ susan_edges_small: # @susan_edges_small slli.d $a3, $a3, 2 ldx.w $a3, $s1, $a3 bge $a3, $s3, .LBB14_14 -# %bb.30: # in Loop: Header=BB14_15 Depth=2 +# %bb.34: # in Loop: Header=BB14_15 Depth=2 sub.d $a2, $t1, $a2 mul.d $a2, $fp, $a2 sub.d $a0, $a2, $a0 @@ -4745,34 +4758,14 @@ susan_edges_small: # @susan_edges_small ori $a0, $zero, 2 bge $s3, $a2, .LBB14_13 b .LBB14_14 -.LBB14_31: # in Loop: Header=BB14_15 Depth=2 - pcalau12i $a0, %pc_hi20(.LCPI14_1) - fld.s $fa3, $a0, %pc_lo12(.LCPI14_1) -.LBB14_32: # in Loop: Header=BB14_15 Depth=2 - ori $t4, $zero, 480 - fneg.s $fa4, $fa3 - fcmp.cule.s $fcc0, $fa0, $fa3 - fsel $fa4, $fa4, $fa3, $fcc0 - fcmp.clt.s $fcc0, $fa4, $fa1 - bceqz $fcc0, .LBB14_34 -# %bb.33: # in Loop: Header=BB14_15 Depth=2 - move $a2, $zero - b .LBB14_37 -.LBB14_34: # in Loop: Header=BB14_15 Depth=2 - fcmp.clt.s $fcc0, $fa2, $fa4 - bceqz $fcc0, .LBB14_36 -# %bb.35: # in Loop: Header=BB14_15 Depth=2 - move $a0, $zero - ori $a2, $zero, 1 - b .LBB14_38 -.LBB14_36: # in Loop: Header=BB14_15 Depth=2 - fcmp.clt.s $fcc0, $fa3, $fa0 +.LBB14_35: # in Loop: Header=BB14_15 Depth=2 + fcmp.clt.s $fcc0, $fa5, $fa2 movcf2gr $a0, $fcc0 sub.d $a0, $zero, $a0 ori $a2, $a0, 1 -.LBB14_37: # in Loop: Header=BB14_15 Depth=2 +.LBB14_36: # in Loop: Header=BB14_15 Depth=2 ori $a0, $zero, 1 -.LBB14_38: # in Loop: Header=BB14_15 Depth=2 +.LBB14_37: # in Loop: Header=BB14_15 Depth=2 add.d $a3, $t1, $a2 mul.d $a3, $fp, $a3 add.d $a3, $a0, $a3 @@ -4780,7 +4773,7 @@ susan_edges_small: # @susan_edges_small slli.d $a3, $a3, 2 ldx.w $a3, $s1, $a3 bge $a3, $s3, .LBB14_14 -# %bb.39: # in Loop: Header=BB14_15 Depth=2 +# %bb.38: # in Loop: Header=BB14_15 Depth=2 sub.d $a2, $t1, $a2 mul.d $a2, $fp, $a2 sub.d $a0, $a2, $a0 @@ -4790,7 +4783,7 @@ susan_edges_small: # @susan_edges_small ori $a0, $zero, 1 bge $s3, $a2, .LBB14_13 b .LBB14_14 -.LBB14_40: # %._crit_edge281 +.LBB14_39: # %._crit_edge281 move $a0, $zero ld.d $s8, $sp, 72 # 8-byte Folded Reload ld.d $s7, $sp, 80 # 8-byte Folded Reload @@ -6623,15 +6616,9 @@ susan_corners_quick: # @susan_corners_quick .Lfunc_end17: .size susan_corners_quick, .Lfunc_end17-susan_corners_quick # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI18_0: - .dword 0xb690000000000000 # double -7.0064923216240854E-46 -.LCPI18_1: - .dword 0x4059000000000000 # double 100 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI18_2: + .p2align 4, 0x0 # -- Begin function main +.LCPI18_0: .byte 0 # 0x0 .byte 4 # 0x4 .byte 8 # 0x8 @@ -6648,28 +6635,28 @@ susan_corners_quick: # @susan_corners_quick .byte 255 # 0xff .byte 255 # 0xff .byte 255 # 0xff -.LCPI18_3: +.LCPI18_1: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI18_4: +.LCPI18_2: .dword 14 # 0xe .dword 15 # 0xf -.LCPI18_5: +.LCPI18_3: .dword 12 # 0xc .dword 13 # 0xd -.LCPI18_6: +.LCPI18_4: .dword 10 # 0xa .dword 11 # 0xb -.LCPI18_7: +.LCPI18_5: .dword 8 # 0x8 .dword 9 # 0x9 -.LCPI18_8: +.LCPI18_6: .dword 6 # 0x6 .dword 7 # 0x7 -.LCPI18_9: +.LCPI18_7: .dword 4 # 0x4 .dword 5 # 0x5 -.LCPI18_10: +.LCPI18_8: .dword 2 # 0x2 .dword 3 # 0x3 .text @@ -6739,6 +6726,8 @@ main: # @main move $s5, $zero st.d $zero, $sp, 56 # 8-byte Folded Spill move $s8, $zero + lu52i.d $a0, $zero, -1175 + movgr2fr.d $fs0, $a0 b .LBB18_6 .LBB18_4: # in Loop: Header=BB18_6 Depth=1 ori $a0, $zero, 1 @@ -6787,9 +6776,7 @@ main: # @main move $a1, $zero pcaddu18i $ra, %call36(strtod) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI18_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI18_0) - fcmp.cule.d $fcc0, $fa1, $fa0 + fcmp.cule.d $fcc0, $fs0, $fa0 fcvt.s.d $fa0, $fa0 vst $vr0, $sp, 16 # 16-byte Folded Spill bceqz $fcc0, .LBB18_4 @@ -6861,10 +6848,12 @@ main: # @main jirl $ra, $ra, 0 move $s2, $a0 addi.d $s7, $a0, 258 - pcalau12i $a0, %pc_hi20(.LCPI18_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI18_1) ori $s4, $zero, 2 addi.w $s6, $zero, -256 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs0, $a0 ori $s1, $zero, 515 .p2align 4, , 16 .LBB18_24: # %.split.us.i @@ -6930,11 +6919,13 @@ main: # @main pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 move $s2, $a0 - pcalau12i $a0, %pc_hi20(.LCPI18_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI18_1) - addi.d $a0, $s2, 258 + addi.d $a0, $a0, 258 st.d $a0, $sp, 56 # 8-byte Folded Spill addi.w $s1, $zero, -256 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs0, $a0 ori $s3, $zero, 515 .p2align 4, , 16 .LBB18_29: # %.split.us.i92 @@ -6991,10 +6982,12 @@ main: # @main jirl $ra, $ra, 0 move $s0, $a0 addi.d $s3, $a0, 258 - pcalau12i $a0, %pc_hi20(.LCPI18_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI18_1) ori $s1, $zero, 2 addi.w $s2, $zero, -256 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs0, $a0 ori $s4, $zero, 515 .p2align 4, , 16 .LBB18_35: # %.split.i @@ -7143,8 +7136,8 @@ main: # @main bltu $a1, $a0, .LBB18_88 # %bb.51: # %vector.main.loop.iter.check vreplgr2vr.d $vr0, $s4 - pcalau12i $a1, %pc_hi20(.LCPI18_3) - pcalau12i $a2, %pc_hi20(.LCPI18_10) + pcalau12i $a1, %pc_hi20(.LCPI18_1) + pcalau12i $a2, %pc_hi20(.LCPI18_8) bgeu $s3, $a0, .LBB18_91 # %bb.52: move $a3, $zero @@ -7238,8 +7231,8 @@ main: # @main bstrpick.d $a2, $s3, 30, 3 slli.d $a2, $a2, 3 vreplgr2vr.w $vr0, $a0 - pcalau12i $a4, %pc_hi20(.LCPI18_2) - vld $vr1, $a4, %pc_lo12(.LCPI18_2) + pcalau12i $a4, %pc_hi20(.LCPI18_0) + vld $vr1, $a4, %pc_lo12(.LCPI18_0) vreplgr2vr.w $vr2, $a1 addi.d $a4, $s0, 16 vrepli.w $vr3, 255 @@ -7421,8 +7414,8 @@ main: # @main bstrpick.d $a2, $s5, 30, 3 slli.d $a2, $a2, 3 vreplgr2vr.w $vr0, $a0 - pcalau12i $a4, %pc_hi20(.LCPI18_2) - vld $vr1, $a4, %pc_lo12(.LCPI18_2) + pcalau12i $a4, %pc_hi20(.LCPI18_0) + vld $vr1, $a4, %pc_lo12(.LCPI18_0) vreplgr2vr.w $vr2, $a1 addi.d $a4, $s4, 16 vrepli.w $vr3, 255 @@ -7467,20 +7460,20 @@ main: # @main bne $a4, $a1, .LBB18_90 b .LBB18_138 .LBB18_91: # %vector.ph262 - vld $vr1, $a1, %pc_lo12(.LCPI18_3) + vld $vr1, $a1, %pc_lo12(.LCPI18_1) + pcalau12i $a0, %pc_hi20(.LCPI18_2) + vld $vr2, $a0, %pc_lo12(.LCPI18_2) + pcalau12i $a0, %pc_hi20(.LCPI18_3) + vld $vr3, $a0, %pc_lo12(.LCPI18_3) pcalau12i $a0, %pc_hi20(.LCPI18_4) - vld $vr2, $a0, %pc_lo12(.LCPI18_4) + vld $vr4, $a0, %pc_lo12(.LCPI18_4) pcalau12i $a0, %pc_hi20(.LCPI18_5) - vld $vr3, $a0, %pc_lo12(.LCPI18_5) + vld $vr5, $a0, %pc_lo12(.LCPI18_5) pcalau12i $a0, %pc_hi20(.LCPI18_6) - vld $vr4, $a0, %pc_lo12(.LCPI18_6) + vld $vr6, $a0, %pc_lo12(.LCPI18_6) pcalau12i $a0, %pc_hi20(.LCPI18_7) - vld $vr5, $a0, %pc_lo12(.LCPI18_7) - pcalau12i $a0, %pc_hi20(.LCPI18_8) - vld $vr6, $a0, %pc_lo12(.LCPI18_8) - pcalau12i $a0, %pc_hi20(.LCPI18_9) - vld $vr7, $a0, %pc_lo12(.LCPI18_9) - vld $vr8, $a2, %pc_lo12(.LCPI18_10) + vld $vr7, $a0, %pc_lo12(.LCPI18_7) + vld $vr8, $a2, %pc_lo12(.LCPI18_8) bstrpick.d $a0, $s3, 30, 4 slli.d $a3, $a0, 4 alsl.d $a0, $a0, $s4, 4 @@ -7706,8 +7699,8 @@ main: # @main beqz $a4, .LBB18_139 .LBB18_127: # %vec.epilog.ph bstrpick.d $a4, $s3, 30, 2 - vld $vr1, $a1, %pc_lo12(.LCPI18_3) - vld $vr2, $a2, %pc_lo12(.LCPI18_10) + vld $vr1, $a1, %pc_lo12(.LCPI18_1) + vld $vr2, $a2, %pc_lo12(.LCPI18_8) slli.d $a2, $a4, 2 alsl.d $a1, $a4, $s4, 2 sub.d $a3, $a3, $a2 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jcdctmgr.s b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jcdctmgr.s index 141ca95c..197891b8 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jcdctmgr.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jcdctmgr.s @@ -75,22 +75,7 @@ jinit_forward_dct: # @jinit_forward_dct .Lfunc_end0: .size jinit_forward_dct, .Lfunc_end0-jinit_forward_dct # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function start_pass_fdctmgr -.LCPI1_0: - .dword 0x3ff63150b14861ef # double 1.3870398450000001 -.LCPI1_1: - .dword 0x3ff4e7ae914d6fca # double 1.3065629649999999 -.LCPI1_2: - .dword 0x3ff2d062ef6c11aa # double 1.1758756020000001 -.LCPI1_3: - .dword 0x3fe92469c0a7bf3b # double 0.785694958 -.LCPI1_4: - .dword 0x3fe1517a7bc720bb # double 0.54119609999999996 -.LCPI1_5: - .dword 0x3fd1a855de72ab5d # double 0.275899379 - .text - .p2align 5 + .p2align 5 # -- Begin function start_pass_fdctmgr .type start_pass_fdctmgr,@function start_pass_fdctmgr: # @start_pass_fdctmgr # %bb.0: @@ -126,18 +111,36 @@ start_pass_fdctmgr: # @start_pass_fdctmgr addi.d $s8, $a0, %pc_lo12(start_pass_fdctmgr.aanscalefactor) move $s5, $zero vldi $vr5, -992 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI1_0) - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI1_1) - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI1_2) - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI1_3) - pcalau12i $a0, %pc_hi20(.LCPI1_4) - fld.d $fs4, $a0, %pc_lo12(.LCPI1_4) - pcalau12i $a0, %pc_hi20(.LCPI1_5) - fld.d $fs5, $a0, %pc_lo12(.LCPI1_5) + lu12i.w $a0, -322426 + ori $a0, $a0, 495 + lu32i.d $a0, 405840 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs0, $a0 + lu12i.w $a0, -453418 + ori $a0, $a0, 4042 + lu32i.d $a0, 321454 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -67903 + ori $a0, $a0, 426 + lu32i.d $a0, 184418 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -259461 + ori $a0, $a0, 3899 + lu32i.d $a0, -449431 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, 506994 + ori $a0, $a0, 187 + lu32i.d $a0, 86394 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -137430 + ori $a0, $a0, 2909 + lu32i.d $a0, 108629 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fs5, $a0 ori $s2, $zero, 64 vrepli.b $vr6, 0 ori $a0, $zero, 1024 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jddctmgr.s b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jddctmgr.s index 6aaa75ce..40bba127 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jddctmgr.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jddctmgr.s @@ -63,22 +63,7 @@ jinit_inverse_dct: # @jinit_inverse_dct .Lfunc_end0: .size jinit_inverse_dct, .Lfunc_end0-jinit_inverse_dct # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function start_pass -.LCPI1_0: - .dword 0x3ff63150b14861ef # double 1.3870398450000001 -.LCPI1_1: - .dword 0x3ff4e7ae914d6fca # double 1.3065629649999999 -.LCPI1_2: - .dword 0x3ff2d062ef6c11aa # double 1.1758756020000001 -.LCPI1_3: - .dword 0x3fe92469c0a7bf3b # double 0.785694958 -.LCPI1_4: - .dword 0x3fe1517a7bc720bb # double 0.54119609999999996 -.LCPI1_5: - .dword 0x3fd1a855de72ab5d # double 0.275899379 - .text - .p2align 5 + .p2align 5 # -- Begin function start_pass .type start_pass,@function start_pass: # @start_pass # %bb.0: @@ -111,19 +96,37 @@ start_pass: # @start_pass pcalau12i $a0, %got_pc_hi20(jpeg_idct_1x1) ld.d $s3, $a0, %got_pc_lo12(jpeg_idct_1x1) ori $a7, $zero, 7 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI1_0) - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI1_1) - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI1_2) - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI1_3) - pcalau12i $a0, %pc_hi20(.LCPI1_4) - fld.d $fs4, $a0, %pc_lo12(.LCPI1_4) - pcalau12i $a0, %pc_hi20(.LCPI1_5) - fld.d $fs5, $a0, %pc_lo12(.LCPI1_5) vrepli.b $vr5, 0 + lu12i.w $a0, -322426 + ori $a0, $a0, 495 + lu32i.d $a0, 405840 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs0, $a0 + lu12i.w $a0, -453418 + ori $a0, $a0, 4042 + lu32i.d $a0, 321454 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -67903 + ori $a0, $a0, 426 + lu32i.d $a0, 184418 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -259461 + ori $a0, $a0, 3899 + lu32i.d $a0, -449431 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, 506994 + ori $a0, $a0, 187 + lu32i.d $a0, 86394 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -137430 + ori $a0, $a0, 2909 + lu32i.d $a0, 108629 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(start_pass.aanscales) addi.d $s7, $a0, %pc_lo12(start_pass.aanscales) ori $a0, $zero, 2048 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jidctflt.s b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jidctflt.s index a7f52a72..3eef6b76 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jidctflt.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-jpeg/CMakeFiles/consumer-jpeg.dir/jidctflt.s @@ -1,16 +1,6 @@ .file "jidctflt.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function jpeg_idct_float -.LCPI0_0: - .word 0x3fb504f3 # float 1.41421354 -.LCPI0_1: - .word 0x3fec835e # float 1.84775901 -.LCPI0_2: - .word 0x3f8a8bd4 # float 1.08239222 -.LCPI0_3: - .word 0xc0273d75 # float -2.61312604 .text - .globl jpeg_idct_float + .globl jpeg_idct_float # -- Begin function jpeg_idct_float .p2align 5 .type jpeg_idct_float,@function jpeg_idct_float: # @jpeg_idct_float @@ -20,17 +10,22 @@ jpeg_idct_float: # @jpeg_idct_float ld.d $a0, $a0, 408 addi.d $a1, $a2, 64 addi.d $a2, $a5, 128 - pcalau12i $a5, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a5, %pc_lo12(.LCPI0_0) - pcalau12i $a5, %pc_hi20(.LCPI0_1) - fld.s $fa1, $a5, %pc_lo12(.LCPI0_1) - pcalau12i $a5, %pc_hi20(.LCPI0_2) - fld.s $fa2, $a5, %pc_lo12(.LCPI0_2) - pcalau12i $a5, %pc_hi20(.LCPI0_3) - fld.s $fa3, $a5, %pc_lo12(.LCPI0_3) addi.d $a5, $sp, 16 ori $a6, $zero, 9 ori $a7, $zero, 1 + lu12i.w $t0, 260944 + ori $t0, $t0, 1267 + movgr2fr.w $fa0, $t0 + lu12i.w $t0, 261832 + ori $t0, $t0, 862 + movgr2fr.w $fa1, $t0 + lu12i.w $t0, 260264 + ori $t0, $t0, 3028 + movgr2fr.w $fa2, $t0 + lu12i.w $t0, -261517 + ori $t0, $t0, 3445 + lu32i.d $t0, 0 + movgr2fr.w $fa3, $t0 b .LBB0_3 .p2align 4, , 16 .LBB0_1: # in Loop: Header=BB0_3 Depth=1 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/VbrTag.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/VbrTag.s index 0a3ae9bc..8f4e51fa 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/VbrTag.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/VbrTag.s @@ -348,18 +348,7 @@ InitVbrTag: # @InitVbrTag .Lfunc_end4: .size InitVbrTag, .Lfunc_end4-InitVbrTag # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function PutVbrTag -.LCPI5_0: - .dword 0x3f847ae147ae147b # double 0.01 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI5_1: - .word 0x43800000 # float 256 -.LCPI5_2: - .word 0x437f0000 # float 255 - .text - .globl PutVbrTag + .globl PutVbrTag # -- Begin function PutVbrTag .p2align 5 .type PutVbrTag,@function PutVbrTag: # @PutVbrTag @@ -460,21 +449,24 @@ PutVbrTag: # @PutVbrTag movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 ld.d $a1, $s3, %pc_lo12(pVbrFrames) - pcalau12i $a2, %pc_hi20(.LCPI5_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI5_0) - pcalau12i $a2, %pc_hi20(.LCPI5_1) - fld.s $fa2, $a2, %pc_lo12(.LCPI5_1) - pcalau12i $a2, %pc_hi20(.LCPI5_2) - fld.s $fa3, $a2, %pc_lo12(.LCPI5_2) - movgr2fr.d $fa4, $s2 - ffint.s.l $fa4, $fa4 + movgr2fr.d $fa1, $s2 + ffint.s.l $fa1, $fa1 + lu12i.w $a2, 293601 + ori $a2, $a2, 1147 + lu32i.d $a2, 293601 + lu52i.d $a2, $a2, 1016 + movgr2fr.d $fa2, $a2 + lu12i.w $a2, 276480 + movgr2fr.w $fa3, $a2 + lu12i.w $a2, 276464 + movgr2fr.w $fa4, $a2 ori $a2, $zero, 1 .p2align 4, , 16 .LBB5_5: # =>This Inner Loop Header: Depth=1 bstrpick.d $a3, $s7, 31, 0 movgr2fr.d $fa5, $a3 ffint.d.l $fa5, $fa5 - fmul.d $fa5, $fa5, $fa1 + fmul.d $fa5, $fa5, $fa2 fmul.d $fa5, $fa5, $fa0 vreplvei.d $vr5, $vr5, 0 vfrintrm.d $vr5, $vr5 @@ -484,10 +476,10 @@ PutVbrTag: # @PutVbrTag ldx.w $a3, $a1, $a3 movgr2fr.w $fa5, $a3 ffint.s.w $fa5, $fa5 - fmul.s $fa5, $fa5, $fa2 - fdiv.s $fa5, $fa5, $fa4 - fcmp.clt.s $fcc0, $fa3, $fa5 - fsel $fa5, $fa5, $fa3, $fcc0 + fmul.s $fa5, $fa5, $fa3 + fdiv.s $fa5, $fa5, $fa1 + fcmp.clt.s $fcc0, $fa4, $fa5 + fsel $fa5, $fa5, $fa4, $fcc0 ftintrz.l.s $fa5, $fa5 movfr2gr.d $a3, $fa5 stx.b $a3, $a2, $s8 @@ -574,25 +566,16 @@ PutVbrTag: # @PutVbrTag .Lfunc_end5: .size PutVbrTag, .Lfunc_end5-PutVbrTag # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function SeekPoint -.LCPI6_0: - .word 0x42c80000 # float 100 -.LCPI6_1: - .word 0x43800000 # float 256 -.LCPI6_2: - .word 0x3b800000 # float 0.00390625 - .text - .globl SeekPoint + .globl SeekPoint # -- Begin function SeekPoint .p2align 5 .type SeekPoint,@function SeekPoint: # @SeekPoint # %bb.0: - pcalau12i $a2, %pc_hi20(.LCPI6_0) - fld.s $fa1, $a2, %pc_lo12(.LCPI6_0) - movgr2fr.w $fa2, $zero - fcmp.clt.s $fcc0, $fa0, $fa2 - fsel $fa0, $fa0, $fa2, $fcc0 + movgr2fr.w $fa1, $zero + fcmp.clt.s $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 + lu12i.w $a2, 273536 + movgr2fr.w $fa1, $a2 fcmp.clt.s $fcc0, $fa1, $fa0 fsel $fa0, $fa0, $fa1, $fcc0 ftintrz.w.s $fa1, $fa0 @@ -612,19 +595,19 @@ SeekPoint: # @SeekPoint ffint.s.w $fa1, $fa1 b .LBB6_3 .LBB6_2: - pcalau12i $a0, %pc_hi20(.LCPI6_1) - fld.s $fa1, $a0, %pc_lo12(.LCPI6_1) + lu12i.w $a0, 276480 + movgr2fr.w $fa1, $a0 .LBB6_3: movgr2fr.w $fa2, $a3 + ffint.s.w $fa2, $fa2 + fsub.s $fa1, $fa1, $fa2 movgr2fr.w $fa3, $a2 ffint.s.w $fa3, $fa3 fsub.s $fa0, $fa0, $fa3 - pcalau12i $a0, %pc_hi20(.LCPI6_2) - fld.s $fa3, $a0, %pc_lo12(.LCPI6_2) - ffint.s.w $fa2, $fa2 - fsub.s $fa1, $fa1, $fa2 fmadd.s $fa0, $fa1, $fa0, $fa2 - fmul.s $fa0, $fa0, $fa3 + lu12i.w $a0, 243712 + movgr2fr.w $fa1, $a0 + fmul.s $fa0, $fa0, $fa1 movgr2fr.w $fa1, $a1 ffint.s.w $fa1, $fa1 fmul.s $fa0, $fa0, $fa1 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/decode_i386.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/decode_i386.s index bade2f6d..745cb0c3 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/decode_i386.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/decode_i386.s @@ -93,14 +93,7 @@ synth_1to1_mono: # @synth_1to1_mono .Lfunc_end0: .size synth_1to1_mono, .Lfunc_end0-synth_1to1_mono # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function synth_1to1 -.LCPI1_0: - .dword 0x40dfffc000000000 # double 32767 -.LCPI1_1: - .dword 0xc0e0000000000000 # double -32768 - .text - .globl synth_1to1 + .globl synth_1to1 # -- Begin function synth_1to1 .p2align 5 .type synth_1to1,@function synth_1to1: # @synth_1to1 @@ -180,12 +173,14 @@ synth_1to1: # @synth_1to1 sub.d $a1, $a1, $a3 add.d $a1, $a1, $a2 addi.d $t0, $a1, 8 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI1_0) - pcalau12i $a1, %pc_hi20(.LCPI1_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_1) ori $a7, $zero, 16 + ori $a1, $zero, 0 + lu32i.d $a1, -64 + lu52i.d $a1, $a1, 1037 + movgr2fr.d $fa1, $a1 ori $a1, $s1, 4095 + lu52i.d $a2, $zero, -1010 + movgr2fr.d $fa0, $a2 lu12i.w $a2, 8 b .LBB1_7 .p2align 4, , 16 @@ -259,12 +254,12 @@ synth_1to1: # @synth_1to1 fmadd.d $fa2, $fa5, $fa6, $fa2 fneg.d $fa3, $fa7 fmadd.d $fa2, $fa3, $ft0, $fa2 - fcmp.cule.d $fcc0, $fa2, $fa0 + fcmp.cule.d $fcc0, $fa2, $fa1 move $t1, $a4 move $a3, $t0 bceqz $fcc0, .LBB1_4 # %bb.8: # in Loop: Header=BB1_7 Depth=1 - fcmp.cule.d $fcc0, $fa1, $fa2 + fcmp.cule.d $fcc0, $fa0, $fa2 bcnez $fcc0, .LBB1_10 # %bb.9: # in Loop: Header=BB1_7 Depth=1 st.h $a2, $t1, 0 @@ -276,51 +271,59 @@ synth_1to1: # @synth_1to1 st.h $a4, $t1, 0 b .LBB1_6 .LBB1_11: - fld.d $fa2, $a6, 256 - fld.d $fa3, $a5, 128 - fld.d $fa4, $a6, 272 - fld.d $fa5, $a5, 144 - fld.d $fa6, $a6, 288 - fld.d $fa7, $a5, 160 - fld.d $ft0, $a6, 304 - fld.d $ft1, $a5, 176 - fmul.d $fa2, $fa2, $fa3 - fmadd.d $fa2, $fa4, $fa5, $fa2 - fmadd.d $fa2, $fa6, $fa7, $fa2 - fmadd.d $fa2, $ft0, $ft1, $fa2 - fld.d $fa3, $a6, 320 - fld.d $fa4, $a5, 192 - fld.d $fa5, $a6, 336 - fld.d $fa6, $a5, 208 - fld.d $fa7, $a6, 352 - fld.d $ft0, $a5, 224 - fld.d $ft1, $a6, 368 - fld.d $ft2, $a5, 240 - fmadd.d $fa2, $fa3, $fa4, $fa2 - fmadd.d $fa2, $fa5, $fa6, $fa2 - fmadd.d $fa2, $fa7, $ft0, $fa2 - fmadd.d $fa2, $ft1, $ft2, $fa2 - fcmp.cule.d $fcc0, $fa2, $fa0 + fld.d $fa1, $a6, 256 + fld.d $fa2, $a5, 128 + fld.d $fa3, $a6, 272 + fld.d $fa4, $a5, 144 + fld.d $fa5, $a6, 288 + fld.d $fa6, $a5, 160 + fld.d $fa7, $a6, 304 + fld.d $ft0, $a5, 176 + fmul.d $fa1, $fa1, $fa2 + fmadd.d $fa1, $fa3, $fa4, $fa1 + fmadd.d $fa1, $fa5, $fa6, $fa1 + fmadd.d $fa1, $fa7, $ft0, $fa1 + fld.d $fa2, $a6, 320 + fld.d $fa3, $a5, 192 + fld.d $fa4, $a6, 336 + fld.d $fa5, $a5, 208 + fld.d $fa6, $a6, 352 + fld.d $fa7, $a5, 224 + fld.d $ft0, $a6, 368 + fld.d $ft1, $a5, 240 + fmadd.d $fa1, $fa2, $fa3, $fa1 + fmadd.d $fa1, $fa4, $fa5, $fa1 + fmadd.d $fa1, $fa6, $fa7, $fa1 + fmadd.d $fa1, $ft0, $ft1, $fa1 + ori $a5, $zero, 0 + lu32i.d $a5, -64 + lu52i.d $a5, $a5, 1037 + movgr2fr.d $fa2, $a5 + fcmp.cule.d $fcc0, $fa1, $fa2 bcnez $fcc0, .LBB1_13 # %bb.12: st.h $a1, $a4, 0 addi.w $a0, $a0, 1 b .LBB1_16 .LBB1_13: - fcmp.cule.d $fcc0, $fa1, $fa2 + fcmp.cule.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_15 # %bb.14: st.h $a2, $a4, 0 addi.w $a0, $a0, 1 b .LBB1_16 .LBB1_15: - ftintrz.l.d $fa2, $fa2 - movfr2gr.d $a5, $fa2 + ftintrz.l.d $fa1, $fa1 + movfr2gr.d $a5, $fa1 st.h $a5, $a4, 0 .LBB1_16: addi.d $a4, $t1, 8 addi.w $a5, $zero, -128 addi.w $a6, $zero, -15 + ori $a7, $zero, 0 + lu32i.d $a7, -64 + lu52i.d $a7, $a7, 1037 + movgr2fr.d $fa1, $a7 b .LBB1_20 .p2align 4, , 16 .LBB1_17: # in Loop: Header=BB1_20 Depth=1 @@ -401,10 +404,10 @@ synth_1to1: # @synth_1to1 fmadd.d $fa2, $fa3, $fa4, $fa2 fneg.d $fa3, $fa5 fmadd.d $fa2, $fa3, $fa6, $fa2 - fcmp.cule.d $fcc0, $fa2, $fa0 + fcmp.cule.d $fcc0, $fa2, $fa1 bceqz $fcc0, .LBB1_17 # %bb.21: # in Loop: Header=BB1_20 Depth=1 - fcmp.cule.d $fcc0, $fa1, $fa2 + fcmp.cule.d $fcc0, $fa0, $fa2 bcnez $fcc0, .LBB1_23 # %bb.22: # in Loop: Header=BB1_20 Depth=1 st.h $a2, $a4, 0 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/fft.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/fft.s index 57206490..e55b62e2 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/fft.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/fft.s @@ -1,10 +1,6 @@ .file "fft.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function fft_short -.LCPI0_0: - .word 0x3f3504f3 # float 0.707106769 .text - .globl fft_short + .globl fft_short # -- Begin function fft_short .p2align 5 .type fft_short,@function fft_short: # @fft_short @@ -337,8 +333,9 @@ fft_short: # @fft_short pcalau12i $a0, %pc_hi20(window_s) addi.d $s2, $a0, %pc_lo12(window_s) move $s3, $zero - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 258896 + ori $a0, $a0, 1267 + movgr2fr.w $fs0, $a0 ori $s4, $zero, 127 ori $s5, $zero, 63 ori $s6, $zero, 126 @@ -486,8 +483,9 @@ fft_short: # @fft_short pcalau12i $a0, %pc_hi20(window_s) addi.d $s2, $a0, %pc_lo12(window_s) move $s3, $zero - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 258896 + ori $a0, $a0, 1267 + movgr2fr.w $fs0, $a0 ori $s4, $zero, 127 ori $s5, $zero, 63 ori $s6, $zero, 126 @@ -646,12 +644,7 @@ fft_short: # @fft_short .Lfunc_end0: .size fft_short, .Lfunc_end0-fft_short # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fht -.LCPI1_0: - .dword 0x3ff6a09e667f3bcd # double 1.4142135623730951 - .text - .p2align 5 + .p2align 5 # -- Begin function fht .type fht,@function fht: # @fht # %bb.0: @@ -669,8 +662,11 @@ fht: # @fht ori $a3, $zero, 4 pcalau12i $a4, %pc_hi20(costab) addi.d $a4, $a4, %pc_lo12(costab) - pcalau12i $a5, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a5, %pc_lo12(.LCPI1_0) + lu12i.w $a5, 419827 + ori $a5, $a5, 3021 + lu32i.d $a5, 434334 + lu52i.d $a5, $a5, 1023 + movgr2fr.d $fa0, $a5 ori $a5, $zero, 2 vldi $vr1, -1168 b .LBB1_2 @@ -873,12 +869,7 @@ fht: # @fht .Lfunc_end1: .size fht, .Lfunc_end1-fht # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function fft_long -.LCPI2_0: - .word 0x3f3504f3 # float 0.707106769 - .text - .globl fft_long + .globl fft_long # -- Begin function fft_long .p2align 5 .type fft_long,@function fft_long: # @fft_long @@ -1004,8 +995,9 @@ fft_long: # @fft_long addi.d $a5, $a5, %pc_lo12(rv_tbl) pcalau12i $a6, %pc_hi20(window) addi.d $a6, $a6, %pc_lo12(window) - pcalau12i $a7, %pc_hi20(.LCPI2_0) - fld.s $fa0, $a7, %pc_lo12(.LCPI2_0) + lu12i.w $a7, 258896 + ori $a7, $a7, 1267 + movgr2fr.w $fa0, $a7 ori $a7, $zero, 511 ori $t0, $zero, 255 ori $t1, $zero, 510 @@ -1138,8 +1130,9 @@ fft_long: # @fft_long addi.d $a5, $a5, %pc_lo12(rv_tbl) pcalau12i $a6, %pc_hi20(window) addi.d $a6, $a6, %pc_lo12(window) - pcalau12i $a7, %pc_hi20(.LCPI2_0) - fld.s $fa0, $a7, %pc_lo12(.LCPI2_0) + lu12i.w $a7, 258896 + ori $a7, $a7, 1267 + movgr2fr.w $fa0, $a7 ori $a7, $zero, 511 ori $t0, $zero, 255 ori $t1, $zero, 510 @@ -1282,14 +1275,6 @@ fft_long: # @fft_long .word 0x3cc90ab0 # float 0.024541229 .word 0x3f7ffec4 # float 0.999981164 .word 0x3bc90f88 # float 0.00613588467 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI3_2: - .dword 0x401921fb54442d18 # double 6.2831853071795862 -.LCPI3_3: - .dword 0x3f50000000000000 # double 9.765625E-4 -.LCPI3_4: - .dword 0x3f70000000000000 # double 0.00390625 .text .globl init_fft .p2align 5 @@ -1308,14 +1293,17 @@ init_fft: # @init_fft vld $vr0, $a0, %pc_lo12(.LCPI3_0) pcalau12i $a0, %pc_hi20(costab) addi.d $a0, $a0, %pc_lo12(costab) - vst $vr0, $a0, 0 pcalau12i $a1, %pc_hi20(.LCPI3_1) - vld $vr0, $a1, %pc_lo12(.LCPI3_1) - pcalau12i $a1, %pc_hi20(.LCPI3_2) - fld.d $fs0, $a1, %pc_lo12(.LCPI3_2) - pcalau12i $a1, %pc_hi20(.LCPI3_3) - fld.d $fs1, $a1, %pc_lo12(.LCPI3_3) - vst $vr0, $a0, 16 + vld $vr1, $a1, %pc_lo12(.LCPI3_1) + vst $vr0, $a0, 0 + vst $vr1, $a0, 16 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fs0, $a0 + lu52i.d $a0, $zero, 1013 + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(window) addi.d $fp, $a0, %pc_lo12(window) move $s0, $zero @@ -1343,8 +1331,8 @@ init_fft: # @init_fft addi.w $s1, $s1, 1 bne $s0, $s2, .LBB3_1 # %bb.2: # %.preheader.preheader - pcalau12i $a0, %pc_hi20(.LCPI3_4) - fld.d $fs1, $a0, %pc_lo12(.LCPI3_4) + lu52i.d $a0, $zero, 1015 + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(window_s) addi.d $fp, $a0, %pc_lo12(window_s) move $s0, $zero diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/get_audio.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/get_audio.s index 0a052b7d..a33ab98c 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/get_audio.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/get_audio.s @@ -39,16 +39,7 @@ lame_init_infile: # @lame_init_infile .Lfunc_end0: .size lame_init_infile, .Lfunc_end0-lame_init_infile # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function OpenSndFile -.LCPI1_0: - .dword 0x408f400000000000 # double 1000 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI1_1: - .word 0x5f000000 # float 9.22337203E+18 - .text - .globl OpenSndFile + .globl OpenSndFile # -- Begin function OpenSndFile .p2align 5 .type OpenSndFile,@function OpenSndFile: # @OpenSndFile @@ -163,23 +154,25 @@ OpenSndFile: # @OpenSndFile bne $a1, $a2, .LBB1_15 # %bb.13: movgr2fr.d $fa0, $a0 + ld.w $a0, $s6, %pc_lo12(input_bitrate) ffint.d.l $fa0, $fa0 vldi $vr1, -992 - ld.w $a0, $s6, %pc_lo12(input_bitrate) - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.d $fa2, $a1, %pc_lo12(.LCPI1_0) fmul.d $fa0, $fa0, $fa1 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa2, $a0 + ld.w $a0, $s3, %pc_lo12(samp_freq) fmul.d $fa1, $fa1, $fa2 fdiv.d $fa0, $fa0, $fa1 - ld.w $a0, $s3, %pc_lo12(samp_freq) fcvt.s.d $fa0, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI1_1) - fld.s $fa1, $a1, %pc_lo12(.LCPI1_1) - movgr2fr.w $fa2, $a0 - ffint.s.w $fa2, $fa2 - fmul.s $fa0, $fa2, $fa0 + movgr2fr.w $fa1, $a0 + ffint.s.w $fa1, $fa1 + fmul.s $fa0, $fa1, $fa0 + lu12i.w $a0, 389120 + movgr2fr.w $fa1, $a0 fcmp.clt.s $fcc0, $fa0, $fa1 ftintrz.l.s $fa2, $fa0 movfr2gr.d $a0, $fa2 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/ieeefloat.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/ieeefloat.s index b4f60553..629fb30a 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/ieeefloat.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/ieeefloat.s @@ -1,10 +1,6 @@ .file "ieeefloat.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ConvertFromIeeeSingle -.LCPI0_0: - .dword 0x7ff0000000000000 # double +Inf .text - .globl ConvertFromIeeeSingle + .globl ConvertFromIeeeSingle # -- Begin function ConvertFromIeeeSingle .p2align 5 .type ConvertFromIeeeSingle,@function ConvertFromIeeeSingle: # @ConvertFromIeeeSingle @@ -37,8 +33,8 @@ ConvertFromIeeeSingle: # @ConvertFromIeeeSingle movgr2fr.d $fa0, $zero ret .LBB0_5: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) + lu52i.d $a0, $zero, 2047 + movgr2fr.d $fa0, $a0 ret .LBB0_6: srli.d $a1, $a1, 23 @@ -53,12 +49,7 @@ ConvertFromIeeeSingle: # @ConvertFromIeeeSingle .Lfunc_end0: .size ConvertFromIeeeSingle, .Lfunc_end0-ConvertFromIeeeSingle # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ConvertToIeeeSingle -.LCPI1_0: - .dword 0x4170000000000000 # double 16777216 - .text - .globl ConvertToIeeeSingle + .globl ConvertToIeeeSingle # -- Begin function ConvertToIeeeSingle .p2align 5 .type ConvertToIeeeSingle,@function ConvertToIeeeSingle: # @ConvertToIeeeSingle @@ -112,8 +103,8 @@ ConvertToIeeeSingle: # @ConvertToIeeeSingle or $s0, $s0, $a0 b .LBB1_9 .LBB1_8: - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_0) + lu52i.d $a1, $zero, 1047 + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa0, $fa1 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 @@ -135,12 +126,7 @@ ConvertToIeeeSingle: # @ConvertToIeeeSingle .Lfunc_end1: .size ConvertToIeeeSingle, .Lfunc_end1-ConvertToIeeeSingle # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ConvertFromIeeeDouble -.LCPI2_0: - .dword 0x7ff0000000000000 # double +Inf - .text - .globl ConvertFromIeeeDouble + .globl ConvertFromIeeeDouble # -- Begin function ConvertFromIeeeDouble .p2align 5 .type ConvertFromIeeeDouble,@function ConvertFromIeeeDouble: # @ConvertFromIeeeDouble @@ -188,8 +174,8 @@ ConvertFromIeeeDouble: # @ConvertFromIeeeDouble addi.w $a0, $zero, -1074 b .LBB2_7 .LBB2_5: - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI2_0) + lu52i.d $a0, $zero, 2047 + movgr2fr.d $fa0, $a0 b .LBB2_8 .LBB2_6: ori $a1, $zero, 1 @@ -222,14 +208,7 @@ ConvertFromIeeeDouble: # @ConvertFromIeeeDouble .Lfunc_end2: .size ConvertFromIeeeDouble, .Lfunc_end2-ConvertFromIeeeDouble # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ConvertToIeeeDouble -.LCPI3_0: - .dword 0xc130000000000000 # double -1048576 -.LCPI3_1: - .dword 0xc1e0000000000000 # double -2147483648 - .text - .globl ConvertToIeeeDouble + .globl ConvertToIeeeDouble # -- Begin function ConvertToIeeeDouble .p2align 5 .type ConvertToIeeeDouble,@function ConvertToIeeeDouble: # @ConvertToIeeeDouble @@ -287,12 +266,12 @@ ConvertToIeeeDouble: # @ConvertToIeeeDouble # kill: def $f0_64 killed $f0_64 def $vr0 vreplvei.d $vr1, $vr0, 0 vfrintrm.d $vr1, $vr1 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI3_0) ftintrz.l.d $fa1, $fa1 movfr2gr.d $a0, $fa1 addu16i.d $a0, $a0, -16 - fadd.d $fa0, $fa0, $fa2 + lu52i.d $a1, $zero, -1005 + movgr2fr.d $fa1, $a1 + fadd.d $fa0, $fa0, $fa1 slli.d $a1, $s1, 20 add.d $a1, $a1, $s0 addu16i.d $a1, $a1, 16352 @@ -319,11 +298,11 @@ ConvertToIeeeDouble: # @ConvertToIeeeDouble .LBB3_12: pcaddu18i $ra, %call36(ldexp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_1) # kill: def $f0_64 killed $f0_64 def $vr0 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 + lu52i.d $a0, $zero, -994 + movgr2fr.d $fa1, $a0 fadd.d $fa0, $fa0, $fa1 ftintrz.l.d $fa0, $fa0 movfr2gr.d $a0, $fa0 @@ -357,12 +336,7 @@ ConvertToIeeeDouble: # @ConvertToIeeeDouble .Lfunc_end3: .size ConvertToIeeeDouble, .Lfunc_end3-ConvertToIeeeDouble # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ConvertFromIeeeExtended -.LCPI4_0: - .dword 0x7ff0000000000000 # double +Inf - .text - .globl ConvertFromIeeeExtended + .globl ConvertFromIeeeExtended # -- Begin function ConvertFromIeeeExtended .p2align 5 .type ConvertFromIeeeExtended,@function ConvertFromIeeeExtended: # @ConvertFromIeeeExtended @@ -396,8 +370,8 @@ ConvertFromIeeeExtended: # @ConvertFromIeeeExtended ori $a3, $a3, 4095 bne $fp, $a3, .LBB4_6 # %bb.5: - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI4_0) + lu52i.d $a0, $zero, 2047 + movgr2fr.d $fa0, $a0 b .LBB4_7 .LBB4_6: movgr2fr.d $fa0, $a2 @@ -435,12 +409,7 @@ ConvertFromIeeeExtended: # @ConvertFromIeeeExtended .Lfunc_end4: .size ConvertFromIeeeExtended, .Lfunc_end4-ConvertFromIeeeExtended # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ConvertToIeeeExtended -.LCPI5_0: - .dword 0xc1e0000000000000 # double -2147483648 - .text - .globl ConvertToIeeeExtended + .globl ConvertToIeeeExtended # -- Begin function ConvertToIeeeExtended .p2align 5 .type ConvertToIeeeExtended,@function ConvertToIeeeExtended: # @ConvertToIeeeExtended @@ -493,11 +462,11 @@ ConvertToIeeeExtended: # @ConvertToIeeeExtended ori $a0, $zero, 32 pcaddu18i $ra, %call36(ldexp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI5_0) # kill: def $f0_64 killed $f0_64 def $vr0 vreplvei.d $vr1, $vr0, 0 vfrintrm.d $vr1, $vr1 + lu52i.d $a0, $zero, -994 + movgr2fr.d $fs0, $a0 fadd.d $fa2, $fa1, $fs0 ftintrz.l.d $fa2, $fa2 movfr2gr.d $a0, $fa2 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/lame.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/lame.s index 994508a4..2bd2fffb 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/lame.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/lame.s @@ -1,39 +1,17 @@ .file "lame.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function lame_init_params -.LCPI0_0: - .dword 0x408f400000000000 # double 1000 -.LCPI0_1: - .dword 0x402a000010000000 # double 13.000000476837158 -.LCPI0_2: - .dword 0x40c3880000000000 # double 1.0E+4 -.LCPI0_3: - .dword 0x4026000010000000 # double 11.000000476837158 -.LCPI0_4: - .dword 0x401199999999999a # double 4.4000000000000004 -.LCPI0_5: - .dword 0x4021fffff0000000 # double 8.9999995231628418 -.LCPI0_6: - .dword 0x3fb0000000000000 # double 0.0625 -.LCPI0_7: - .dword 0xb690000000000000 # double -7.0064923216240854E-46 -.LCPI0_8: - .dword 0x3ff921fb54442d18 # double 1.5707963267948966 -.LCPI0_9: - .dword 0x3f964bf964bf964c # double 0.021774193548387097 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI0_10: + .p2align 4, 0x0 # -- Begin function lame_init_params +.LCPI0_0: .word 0 # 0x0 .word 1 # 0x1 .word 1 # 0x1 .word 1 # 0x1 -.LCPI0_11: +.LCPI0_1: .word 0 # 0x0 .word 1 # 0x1 .word 1 # 0x1 .word 0 # 0x0 -.LCPI0_12: +.LCPI0_2: .word 0 # 0x0 .word 0 # 0x0 .word 1 # 0x1 @@ -76,146 +54,157 @@ lame_init_params: # @lame_init_params pcaddu18i $ra, %call36(InitFormatBitStream) jirl $ra, $ra, 0 ld.w $a0, $fp, 8 - ori $a6, $zero, 1 - bne $a0, $a6, .LBB0_2 + ori $a5, $zero, 1 + bne $a0, $a5, .LBB0_6 # %bb.1: # %.thread431 move $a0, $zero ori $a1, $zero, 3 st.w $a1, $fp, 36 - b .LBB0_3 -.LBB0_2: + ld.w $a1, $fp, 16 + st.w $a5, $fp, 204 + lu12i.w $a3, 5 + beqz $a1, .LBB0_7 +.LBB0_2: # %._crit_edge395 + ld.w $a2, $fp, 48 +.LBB0_3: + ori $a3, $a3, 3520 + slt $a4, $a3, $a1 + addi.d $a3, $a4, 1 + st.w $a3, $fp, 200 + ori $a3, $zero, 800 + st.w $a3, $fp, 184 + ori $a3, $zero, 576 + sll.w $a3, $a3, $a4 + st.w $a3, $fp, 188 + beqz $a2, .LBB0_9 +# %bb.4: + ld.w $a4, $fp, 12 + lu12i.w $a5, 260096 + st.w $a5, $fp, 216 + bne $a1, $a4, .LBB0_10 +.LBB0_5: + vldi $vr0, -1168 + b .LBB0_11 +.LBB0_6: ld.w $a0, $fp, 36 addi.d $a0, $a0, -3 sltu $a0, $zero, $a0 - addi.d $a6, $a0, 1 -.LBB0_3: - ld.w $a2, $fp, 16 - st.w $a6, $fp, 204 - lu12i.w $a4, 5 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - beqz $a2, .LBB0_8 -# %bb.4: # %._crit_edge395 - ld.w $a3, $fp, 48 -.LBB0_5: - ori $a4, $a4, 3520 - slt $a5, $a4, $a2 - addi.d $a4, $a5, 1 - st.w $a4, $fp, 200 - ori $a4, $zero, 800 - st.w $a4, $fp, 184 - ori $a4, $zero, 576 - sll.w $a4, $a4, $a5 - st.w $a4, $fp, 188 - beqz $a3, .LBB0_10 -# %bb.6: - ld.w $a5, $fp, 12 - lu12i.w $a6, 260096 - st.w $a6, $fp, 216 - bne $a2, $a5, .LBB0_11 + addi.d $a5, $a0, 1 + ld.w $a1, $fp, 16 + st.w $a5, $fp, 204 + lu12i.w $a3, 5 + bnez $a1, .LBB0_2 .LBB0_7: - vldi $vr0, -1168 - b .LBB0_12 -.LBB0_8: - ld.w $a2, $fp, 12 - lu12i.w $a5, 11 - ori $a3, $a5, 2943 - bge $a3, $a2, .LBB0_44 -# %bb.9: - ori $a2, $a5, 2944 - b .LBB0_101 + ld.w $a1, $fp, 12 + lu12i.w $a4, 11 + ori $a2, $a4, 2943 + bge $a2, $a1, .LBB0_43 +# %bb.8: + ori $a1, $a4, 2944 + b .LBB0_100 +.LBB0_9: + ori $a2, $zero, 64 + masknez $a2, $a2, $a4 + ori $a5, $zero, 128 + maskeqz $a4, $a5, $a4 + or $a2, $a4, $a2 + st.w $a2, $fp, 48 + ld.w $a4, $fp, 12 + lu12i.w $a5, 260096 + st.w $a5, $fp, 216 + beq $a1, $a4, .LBB0_5 .LBB0_10: - ori $a3, $zero, 64 - masknez $a3, $a3, $a5 - ori $a6, $zero, 128 - maskeqz $a5, $a6, $a5 - or $a3, $a5, $a3 - st.w $a3, $fp, 48 - ld.w $a5, $fp, 12 - lu12i.w $a6, 260096 - st.w $a6, $fp, 216 - beq $a2, $a5, .LBB0_7 -.LBB0_11: - movgr2fr.w $fa0, $a5 + movgr2fr.w $fa0, $a4 ffint.s.w $fa0, $fa0 - movgr2fr.w $fa1, $a2 + movgr2fr.w $fa1, $a1 ffint.s.w $fa1, $fa1 fdiv.s $fa0, $fa0, $fa1 fst.s $fa0, $fp, 216 -.LBB0_12: - ld.d $a5, $fp, 0 - srli.d $a6, $a5, 1 - andi $a7, $a5, 1 - or $a6, $a7, $a6 - movgr2fr.d $fa1, $a6 +.LBB0_11: + ld.d $a4, $fp, 0 + srli.d $a5, $a4, 1 + andi $a6, $a4, 1 + or $a5, $a6, $a5 + movgr2fr.d $fa1, $a5 ffint.s.l $fa1, $fa1 fadd.s $fa1, $fa1, $fa1 - slti $a6, $a5, 0 - movgr2fr.d $fa2, $a5 + slti $a5, $a4, 0 + movgr2fr.d $fa2, $a4 ffint.s.l $fa2, $fa2 - movgr2cf $fcc0, $a6 + movgr2cf $fcc0, $a5 fsel $fa1, $fa2, $fa1, $fcc0 - bstrpick.d $a4, $a4, 31, 0 - movgr2fr.d $fa2, $a4 + bstrpick.d $a3, $a3, 31, 0 + movgr2fr.d $fa2, $a3 ffint.s.l $fa2, $fa2 fmul.s $fa0, $fa0, $fa2 fdiv.s $fa0, $fa1, $fa0 vldi $vr1, -1280 fadd.s $fa0, $fa0, $fa1 ftintrz.l.s $fa0, $fa0 - ori $a4, $zero, 319 + ori $a3, $zero, 319 fst.d $fa0, $fp, 176 - bge $a4, $a3, .LBB0_14 -# %bb.13: - move $a4, $zero + bge $a3, $a2, .LBB0_13 +# %bb.12: + move $a3, $zero st.w $zero, $fp, 88 - b .LBB0_15 -.LBB0_14: # %._crit_edge398 - ld.w $a4, $fp, 88 - sltu $a4, $zero, $a4 -.LBB0_15: - slli.d $a2, $a2, 4 - sll.w $a2, $a2, $a0 - movgr2fr.w $fa0, $a2 - fld.d $fa1, $a1, %pc_lo12(.LCPI0_0) + b .LBB0_14 +.LBB0_13: # %._crit_edge398 + ld.w $a3, $fp, 88 + sltu $a3, $zero, $a3 +.LBB0_14: + slli.d $a1, $a1, 4 + sll.w $a1, $a1, $a0 + movgr2fr.w $fa0, $a1 ffint.d.w $fa0, $fa0 - movgr2fr.w $fa2, $a3 - ffint.d.w $fa2, $fa2 - fmul.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a2 + ffint.d.w $fa1, $fa1 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fdiv.d $fa0, $fa0, $fa1 - beqz $a4, .LBB0_18 -# %bb.16: - pcalau12i $a1, %pc_hi20(.LCPI0_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_3) + beqz $a3, .LBB0_17 +# %bb.15: + lu12i.w $a1, 65536 + lu32i.d $a1, 393216 + lu52i.d $a1, $a1, 1026 + movgr2fr.d $fa1, $a1 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB0_18 -# %bb.17: + bcnez $fcc0, .LBB0_17 +# %bb.16: ld.w $a1, $fp, 92 - pcalau12i $a2, %pc_hi20(.LCPI0_4) - fld.d $fa0, $a2, %pc_lo12(.LCPI0_4) - movgr2fr.w $fa1, $a1 - ffint.d.w $fa1, $fa1 - fadd.d $fa0, $fa1, $fa0 -.LBB0_18: + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, 104857 + lu52i.d $a1, $a1, 1025 + movgr2fr.d $fa1, $a1 + fadd.d $fa0, $fa0, $fa1 +.LBB0_17: ld.w $a1, $fp, 40 sltu $a1, $zero, $a1 xori $a0, $a0, 1 or $a0, $a1, $a0 - bnez $a0, .LBB0_21 -# %bb.19: - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_5) + bnez $a0, .LBB0_20 +# %bb.18: + lu12i.w $a0, -65536 + lu32i.d $a0, 131071 + lu52i.d $a0, $a0, 1026 + movgr2fr.d $fa1, $a0 fcmp.cule.d $fcc0, $fa1, $fa0 - bcnez $fcc0, .LBB0_21 -# %bb.20: + bcnez $fcc0, .LBB0_20 +# %bb.19: st.w $zero, $fp, 36 -.LBB0_21: +.LBB0_20: ld.w $a0, $fp, 104 - bnez $a0, .LBB0_24 -# %bb.22: - pcalau12i $a0, %pc_hi20(.LCPI0_6) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_6) + bnez $a0, .LBB0_23 +# %bb.21: fcvt.s.d $fa0, $fa0 fcvt.d.s $fa0, $fa0 + lu52i.d $a0, $zero, 1019 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 @@ -229,8 +218,8 @@ lame_init_params: # @lame_init_params ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 ori $a1, $zero, 30 - blt $a1, $a0, .LBB0_24 -# %bb.23: + blt $a1, $a0, .LBB0_23 +# %bb.22: movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 vldi $vr1, -961 @@ -238,10 +227,10 @@ lame_init_params: # @lame_init_params fcvt.s.d $fa0, $fa0 fst.s $fa0, $fp, 232 fst.s $fa0, $fp, 236 -.LBB0_24: +.LBB0_23: ld.w $a0, $fp, 108 - blez $a0, .LBB0_28 -# %bb.25: + blez $a0, .LBB0_27 +# %bb.24: slli.d $a1, $a0, 1 bstrpick.d $a1, $a1, 31, 1 ld.w $a2, $fp, 16 @@ -254,8 +243,8 @@ lame_init_params: # @lame_init_params fdiv.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 fmov.s $fa2, $fa0 - bltz $a1, .LBB0_27 -# %bb.26: + bltz $a1, .LBB0_26 +# %bb.25: add.d $a0, $a1, $a0 slli.d $a0, $a0, 1 bstrpick.d $a0, $a0, 31, 1 @@ -264,7 +253,7 @@ lame_init_params: # @lame_init_params ffint.d.l $fa2, $fa2 fdiv.d $fa1, $fa2, $fa1 fcvt.s.d $fa2, $fa1 -.LBB0_27: +.LBB0_26: vldi $vr1, -1168 fcmp.clt.s $fcc0, $fa1, $fa0 fsel $fa0, $fa0, $fa1, $fcc0 @@ -272,10 +261,10 @@ lame_init_params: # @lame_init_params fcmp.clt.s $fcc0, $fa1, $fa2 fsel $fa0, $fa2, $fa1, $fcc0 fst.s $fa0, $fp, 244 -.LBB0_28: +.LBB0_27: ld.w $a0, $fp, 104 - blez $a0, .LBB0_34 -# %bb.29: + blez $a0, .LBB0_33 +# %bb.28: slli.d $a1, $a0, 1 bstrpick.d $a1, $a1, 31, 1 ld.w $a2, $fp, 16 @@ -284,44 +273,44 @@ lame_init_params: # @lame_init_params ffint.d.l $fa0, $fa0 movgr2fr.w $fa1, $a2 ld.w $a1, $fp, 112 - ffint.d.w $fa1, $fa1 - fdiv.d $fa0, $fa0, $fa1 + ffint.d.w $fa2, $fa1 + fdiv.d $fa0, $fa0, $fa2 fcvt.s.d $fa0, $fa0 - fmov.s $fa2, $fa0 - bltz $a1, .LBB0_33 -# %bb.30: + fmov.s $fa1, $fa0 + bltz $a1, .LBB0_32 +# %bb.29: sub.d $a0, $a0, $a1 - movgr2fr.w $fa2, $a0 - pcalau12i $a0, %pc_hi20(.LCPI0_7) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_7) - ffint.d.w $fa2, $fa2 - fadd.d $fa2, $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 - fcmp.cule.d $fcc0, $fa3, $fa1 - bcnez $fcc0, .LBB0_32 -# %bb.31: - movgr2fr.w $fa2, $zero - b .LBB0_33 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + fadd.d $fa1, $fa1, $fa1 + fdiv.d $fa1, $fa1, $fa2 + lu52i.d $a0, $zero, -1175 + movgr2fr.d $fa2, $a0 + fcmp.cule.d $fcc0, $fa2, $fa1 + bcnez $fcc0, .LBB0_31 +# %bb.30: + movgr2fr.w $fa1, $zero + b .LBB0_32 +.LBB0_31: + fcvt.s.d $fa1, $fa1 .LBB0_32: - fcvt.s.d $fa2, $fa1 -.LBB0_33: - vldi $vr1, -1168 - fcmp.clt.s $fcc0, $fa1, $fa2 - fsel $fa2, $fa2, $fa1, $fcc0 - fst.s $fa2, $fp, 232 - fcmp.clt.s $fcc0, $fa1, $fa0 - fsel $fa0, $fa0, $fa1, $fcc0 + vldi $vr2, -1168 + fcmp.clt.s $fcc0, $fa2, $fa1 + fsel $fa1, $fa1, $fa2, $fcc0 + fst.s $fa1, $fp, 232 + fcmp.clt.s $fcc0, $fa2, $fa0 + fsel $fa0, $fa0, $fa2, $fcc0 fst.s $fa0, $fp, 236 -.LBB0_34: +.LBB0_33: ld.w $a0, $fp, 256 - bnez $a0, .LBB0_59 -# %bb.35: + bnez $a0, .LBB0_58 +# %bb.34: fld.s $fa0, $fp, 232 movgr2fr.w $fs0, $zero fcmp.cule.s $fcc0, $fa0, $fs0 - pcalau12i $s0, %pc_hi20(.LCPI0_8) - bcnez $fcc0, .LBB0_47 -# %bb.36: # %.preheader382 + lu12i.w $s0, 345154 + bcnez $fcc0, .LBB0_46 +# %bb.35: # %.preheader382 fld.s $fa1, $fp, 236 move $s1, $zero fcvt.d.s $fs1, $fa1 @@ -330,35 +319,38 @@ lame_init_params: # @lame_init_params fcvt.d.s $fs3, $fa0 movgr2fr.d $fs4, $zero ori $s2, $zero, 999 - fld.d $fs5, $s0, %pc_lo12(.LCPI0_8) vldi $vr2, -961 + ori $a0, $s0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs5, $a0 vldi $vr3, -912 ori $s3, $zero, 32 - b .LBB0_38 + b .LBB0_37 .p2align 4, , 16 -.LBB0_37: # %cdce.end - # in Loop: Header=BB0_38 Depth=1 +.LBB0_36: # %cdce.end + # in Loop: Header=BB0_37 Depth=1 addi.w $s1, $s1, 1 fadd.d $fs4, $fs4, $fa3 - beq $s1, $s3, .LBB0_46 -.LBB0_38: # =>This Inner Loop Header: Depth=1 + beq $s1, $s3, .LBB0_45 +.LBB0_37: # =>This Inner Loop Header: Depth=1 fdiv.d $fa0, $fs4, $fa2 fcmp.cult.d $fcc0, $fa0, $fs1 - bcnez $fcc0, .LBB0_40 -# %bb.39: # in Loop: Header=BB0_38 Depth=1 + bcnez $fcc0, .LBB0_39 +# %bb.38: # in Loop: Header=BB0_37 Depth=1 ld.w $a0, $fp, 248 slt $a1, $a0, $s1 masknez $a2, $s1, $a1 maskeqz $a0, $a0, $a1 or $a0, $a0, $a2 st.w $a0, $fp, 248 -.LBB0_40: # in Loop: Header=BB0_38 Depth=1 +.LBB0_39: # in Loop: Header=BB0_37 Depth=1 fcmp.cule.d $fcc0, $fa0, $fs2 - bcnez $fcc0, .LBB0_37 -# %bb.41: # in Loop: Header=BB0_38 Depth=1 + bcnez $fcc0, .LBB0_36 +# %bb.40: # in Loop: Header=BB0_37 Depth=1 fcmp.cule.d $fcc0, $fs1, $fa0 - bcnez $fcc0, .LBB0_37 -# %bb.42: # in Loop: Header=BB0_38 Depth=1 + bcnez $fcc0, .LBB0_36 +# %bb.41: # in Loop: Header=BB0_37 Depth=1 slt $a0, $s2, $s1 masknez $a1, $s1, $a0 maskeqz $a0, $s2, $a0 @@ -371,22 +363,22 @@ lame_init_params: # @lame_init_params sltu $a2, $zero, $a2 andi $a2, $a2, 1 or $s2, $a0, $a1 - beqz $a2, .LBB0_37 -# %bb.43: # %cdce.call - # in Loop: Header=BB0_38 Depth=1 + beqz $a2, .LBB0_36 +# %bb.42: # %cdce.call + # in Loop: Header=BB0_37 Depth=1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 vldi $vr3, -912 vldi $vr2, -961 - b .LBB0_37 -.LBB0_44: - lu12i.w $a3, 10 - ori $a7, $a3, 3139 - bge $a7, $a2, .LBB0_96 -# %bb.45: - ori $a2, $a3, 3140 - b .LBB0_101 -.LBB0_46: + b .LBB0_36 +.LBB0_43: + lu12i.w $a2, 10 + ori $a6, $a2, 3139 + bge $a6, $a1, .LBB0_95 +# %bb.44: + ori $a1, $a2, 3140 + b .LBB0_100 +.LBB0_45: ld.w $a0, $fp, 248 addi.d $a1, $s2, -999 sltui $a1, $a1, 1 @@ -406,20 +398,23 @@ lame_init_params: # @lame_init_params fdiv.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 fst.s $fa0, $fp, 236 -.LBB0_47: +.LBB0_46: fld.s $fa0, $fp, 244 fcmp.cule.s $fcc0, $fa0, $fs0 - bcnez $fcc0, .LBB0_49 -# %bb.48: - pcalau12i $a0, %pc_hi20(.LCPI0_9) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_9) - fcvt.d.s $fa2, $fa0 - fcmp.cule.d $fcc0, $fa1, $fa2 - bceqz $fcc0, .LBB0_111 -.LBB0_49: + bcnez $fcc0, .LBB0_48 +# %bb.47: + fcvt.d.s $fa1, $fa0 + lu12i.w $a0, 412665 + ori $a0, $a0, 1612 + lu32i.d $a0, 412665 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa2, $a0 + fcmp.cule.d $fcc0, $fa2, $fa1 + bceqz $fcc0, .LBB0_110 +.LBB0_48: fcmp.cule.s $fcc0, $fa0, $fs0 - bcnez $fcc0, .LBB0_59 -.LBB0_50: # %.preheader381 + bcnez $fcc0, .LBB0_58 +.LBB0_49: # %.preheader381 fld.s $fa1, $fp, 240 move $s1, $zero fcvt.d.s $fs0, $fa1 @@ -428,35 +423,38 @@ lame_init_params: # @lame_init_params fcvt.d.s $fs2, $fa0 movgr2fr.d $fs3, $zero addi.w $s2, $zero, -1 - fld.d $fs4, $s0, %pc_lo12(.LCPI0_8) vldi $vr2, -961 + ori $a0, $s0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs4, $a0 vldi $vr3, -912 ori $s0, $zero, 32 - b .LBB0_52 + b .LBB0_51 .p2align 4, , 16 -.LBB0_51: # %cdce.end379 - # in Loop: Header=BB0_52 Depth=1 +.LBB0_50: # %cdce.end379 + # in Loop: Header=BB0_51 Depth=1 addi.w $s1, $s1, 1 fadd.d $fs3, $fs3, $fa3 - beq $s1, $s0, .LBB0_58 -.LBB0_52: # =>This Inner Loop Header: Depth=1 + beq $s1, $s0, .LBB0_57 +.LBB0_51: # =>This Inner Loop Header: Depth=1 fdiv.d $fa0, $fs3, $fa2 fcmp.cult.d $fcc0, $fs0, $fa0 - bcnez $fcc0, .LBB0_54 -# %bb.53: # in Loop: Header=BB0_52 Depth=1 + bcnez $fcc0, .LBB0_53 +# %bb.52: # in Loop: Header=BB0_51 Depth=1 ld.w $a0, $fp, 252 slt $a1, $s1, $a0 masknez $a2, $s1, $a1 maskeqz $a0, $a0, $a1 or $a0, $a0, $a2 st.w $a0, $fp, 252 -.LBB0_54: # in Loop: Header=BB0_52 Depth=1 +.LBB0_53: # in Loop: Header=BB0_51 Depth=1 fcmp.cule.d $fcc0, $fa0, $fs0 - bcnez $fcc0, .LBB0_51 -# %bb.55: # in Loop: Header=BB0_52 Depth=1 + bcnez $fcc0, .LBB0_50 +# %bb.54: # in Loop: Header=BB0_51 Depth=1 fcmp.cule.d $fcc0, $fs1, $fa0 - bcnez $fcc0, .LBB0_51 -# %bb.56: # in Loop: Header=BB0_52 Depth=1 + bcnez $fcc0, .LBB0_50 +# %bb.55: # in Loop: Header=BB0_51 Depth=1 slt $a0, $s1, $s2 masknez $a1, $s1, $a0 maskeqz $a0, $s2, $a0 @@ -469,15 +467,15 @@ lame_init_params: # @lame_init_params sltu $a2, $zero, $a2 andi $a2, $a2, 1 or $s2, $a0, $a1 - beqz $a2, .LBB0_51 -# %bb.57: # %cdce.call378 - # in Loop: Header=BB0_52 Depth=1 + beqz $a2, .LBB0_50 +# %bb.56: # %cdce.call378 + # in Loop: Header=BB0_51 Depth=1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 vldi $vr3, -912 vldi $vr2, -961 - b .LBB0_51 -.LBB0_58: # %.sink.split + b .LBB0_50 +.LBB0_57: # %.sink.split ld.w $a0, $fp, 252 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 @@ -496,7 +494,7 @@ lame_init_params: # @lame_init_params fdiv.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 fst.s $fa0, $fp, 244 -.LBB0_59: +.LBB0_58: ld.w $a0, $fp, 36 st.w $zero, $fp, 228 addi.d $a0, $a0, -3 @@ -509,43 +507,43 @@ lame_init_params: # @lame_init_params pcaddu18i $ra, %call36(SmpFrqIndex) jirl $ra, $ra, 0 st.w $a0, $fp, 224 - bltz $a0, .LBB0_112 -# %bb.60: + bltz $a0, .LBB0_111 +# %bb.59: ld.w $a0, $fp, 48 ld.w $a1, $fp, 192 ld.w $a2, $fp, 16 pcaddu18i $ra, %call36(BitrateIndex) jirl $ra, $ra, 0 st.w $a0, $fp, 220 - bltz $a0, .LBB0_112 -# %bb.61: + bltz $a0, .LBB0_111 +# %bb.60: ld.w $a0, $fp, 88 ori $s0, $zero, 1 - beqz $a0, .LBB0_68 -# %bb.62: + beqz $a0, .LBB0_67 +# %bb.61: ld.w $a0, $fp, 100 - beqz $a0, .LBB0_82 -# %bb.63: + beqz $a0, .LBB0_81 +# %bb.62: ld.w $a1, $fp, 192 ld.w $a2, $fp, 16 pcaddu18i $ra, %call36(BitrateIndex) jirl $ra, $ra, 0 st.w $a0, $fp, 212 - bltz $a0, .LBB0_112 -# %bb.64: # %._crit_edge402 + bltz $a0, .LBB0_111 +# %bb.63: # %._crit_edge402 ld.w $a0, $fp, 96 - beqz $a0, .LBB0_83 -.LBB0_65: + beqz $a0, .LBB0_82 +.LBB0_64: ld.w $a1, $fp, 192 ld.w $a2, $fp, 16 pcaddu18i $ra, %call36(BitrateIndex) jirl $ra, $ra, 0 st.w $a0, $fp, 208 - bltz $a0, .LBB0_112 -# %bb.66: + bltz $a0, .LBB0_111 +# %bb.65: ld.w $a0, $fp, 88 - beqz $a0, .LBB0_68 -.LBB0_67: + beqz $a0, .LBB0_67 +.LBB0_66: ld.w $a0, $fp, 28 move $s0, $zero slti $a1, $a0, 2 @@ -554,63 +552,63 @@ lame_init_params: # @lame_init_params maskeqz $a0, $a0, $a1 or $a0, $a0, $a2 st.w $a0, $fp, 28 -.LBB0_68: # %.thread +.LBB0_67: # %.thread ld.w $a0, $fp, 36 ori $a1, $zero, 3 - beq $a0, $a1, .LBB0_71 -# %bb.69: - bnez $s0, .LBB0_72 -.LBB0_70: + beq $a0, $a1, .LBB0_70 +# %bb.68: + bnez $s0, .LBB0_71 +.LBB0_69: ld.d $a0, $fp, 136 - bnez $a0, .LBB0_73 - b .LBB0_75 -.LBB0_71: + bnez $a0, .LBB0_72 + b .LBB0_74 +.LBB0_70: st.w $zero, $fp, 44 - beqz $s0, .LBB0_70 -.LBB0_72: + beqz $s0, .LBB0_69 +.LBB0_71: st.w $zero, $fp, 24 ld.d $a0, $fp, 136 - beqz $a0, .LBB0_75 -.LBB0_73: + beqz $a0, .LBB0_74 +.LBB0_72: ld.bu $a2, $a0, 0 ori $a1, $zero, 45 - bne $a2, $a1, .LBB0_76 -# %bb.74: + bne $a2, $a1, .LBB0_75 +# %bb.73: st.w $zero, $fp, 24 ld.bu $a0, $a0, 0 - bne $a0, $a1, .LBB0_76 -.LBB0_75: + bne $a0, $a1, .LBB0_75 +.LBB0_74: pcalau12i $a0, %got_pc_hi20(id3tag) ld.d $a0, $a0, %got_pc_lo12(id3tag) st.w $zero, $a0, 0 -.LBB0_76: # %.thread361 +.LBB0_75: # %.thread361 ld.w $a0, $fp, 20 - beqz $a0, .LBB0_78 -# %bb.77: + beqz $a0, .LBB0_77 +# %bb.76: st.w $zero, $fp, 24 -.LBB0_78: +.LBB0_77: pcalau12i $a0, %pc_hi20(bs) addi.d $a0, $a0, %pc_lo12(bs) pcaddu18i $ra, %call36(init_bit_stream_w) jirl $ra, $ra, 0 ld.w $a0, $fp, 28 ori $a1, $zero, 9 - bltu $a1, $a0, .LBB0_93 -# %bb.79: + bltu $a1, $a0, .LBB0_92 +# %bb.78: slli.d $a0, $a0, 2 pcalau12i $a1, %pc_hi20(.LJTI0_0) addi.d $a1, $a1, %pc_lo12(.LJTI0_0) ldx.w $a0, $a1, $a0 add.d $a0, $a1, $a0 jr $a0 -.LBB0_80: # %.thread376.sink.split +.LBB0_79: # %.thread376.sink.split ori $a0, $zero, 2 st.w $a0, $fp, 28 -.LBB0_81: # %.thread376 - pcalau12i $a0, %pc_hi20(.LCPI0_11) - vld $vr0, $a0, %pc_lo12(.LCPI0_11) - b .LBB0_88 -.LBB0_82: +.LBB0_80: # %.thread376 + pcalau12i $a0, %pc_hi20(.LCPI0_1) + vld $vr0, $a0, %pc_lo12(.LCPI0_1) + b .LBB0_87 +.LBB0_81: ld.w $a0, $fp, 96 ld.w $a1, $fp, 92 ori $a2, $zero, 255 @@ -631,46 +629,46 @@ lame_init_params: # @lame_init_params maskeqz $a1, $a3, $a1 or $a1, $a1, $a2 st.w $a1, $fp, 212 - bnez $a0, .LBB0_65 -.LBB0_83: + bnez $a0, .LBB0_64 +.LBB0_82: ori $a0, $zero, 1 st.w $a0, $fp, 208 ld.w $a0, $fp, 88 - bnez $a0, .LBB0_67 - b .LBB0_68 -.LBB0_84: # %.thread433 + bnez $a0, .LBB0_66 + b .LBB0_67 +.LBB0_83: # %.thread433 addi.d $a0, $fp, 256 st.d $zero, $a0, 16 vld $vr0, $sp, 16 # 16-byte Folded Reload vst $vr0, $a0, 0 - b .LBB0_93 -.LBB0_85: # %.thread364 + b .LBB0_92 +.LBB0_84: # %.thread364 ori $a0, $zero, 7 st.w $a0, $fp, 28 -.LBB0_86: # %.thread365 +.LBB0_85: # %.thread365 ori $a0, $zero, 1 st.d $a0, $fp, 272 vld $vr0, $sp, 16 # 16-byte Folded Reload - b .LBB0_92 -.LBB0_87: - pcalau12i $a0, %pc_hi20(.LCPI0_10) - vld $vr0, $a0, %pc_lo12(.LCPI0_10) -.LBB0_88: # %.preheader380.preheader + b .LBB0_91 +.LBB0_86: + pcalau12i $a0, %pc_hi20(.LCPI0_0) + vld $vr0, $a0, %pc_lo12(.LCPI0_0) +.LBB0_87: # %.preheader380.preheader ori $a0, $zero, 1 lu32i.d $a0, 1 - b .LBB0_91 -.LBB0_89: # %.thread368 + b .LBB0_90 +.LBB0_88: # %.thread368 ori $a0, $zero, 5 st.w $a0, $fp, 28 -.LBB0_90: # %.thread369 - pcalau12i $a0, %pc_hi20(.LCPI0_12) - vld $vr0, $a0, %pc_lo12(.LCPI0_12) +.LBB0_89: # %.thread369 + pcalau12i $a0, %pc_hi20(.LCPI0_2) + vld $vr0, $a0, %pc_lo12(.LCPI0_2) ori $a0, $zero, 1 -.LBB0_91: # %.preheader380.preheader +.LBB0_90: # %.preheader380.preheader st.d $a0, $fp, 272 -.LBB0_92: # %.preheader380.preheader +.LBB0_91: # %.preheader380.preheader vst $vr0, $fp, 256 -.LBB0_93: # %.preheader380.preheader +.LBB0_92: # %.preheader380.preheader ld.w $a0, $fp, 192 ld.w $a1, $fp, 224 alsl.d $a0, $a0, $a0, 1 @@ -709,8 +707,8 @@ lame_init_params: # @lame_init_params st.w $a1, $a5, 144 ld.w $a1, $fp, 24 vst $vr0, $a5, 0 - beqz $a1, .LBB0_95 -# %bb.94: + beqz $a1, .LBB0_94 +# %bb.93: ld.w $a2, $fp, 36 ori $a1, $zero, 1 sub.w $a1, $a1, $a0 @@ -731,7 +729,7 @@ lame_init_params: # @lame_init_params addi.d $sp, $sp, 128 pcaddu18i $t8, %call36(InitVbrTag) jr $t8 -.LBB0_95: +.LBB0_94: fld.d $fs5, $sp, 32 # 8-byte Folded Reload fld.d $fs4, $sp, 40 # 8-byte Folded Reload fld.d $fs3, $sp, 48 # 8-byte Folded Reload @@ -746,83 +744,90 @@ lame_init_params: # @lame_init_params ld.d $ra, $sp, 120 # 8-byte Folded Reload addi.d $sp, $sp, 128 ret -.LBB0_96: - lu12i.w $a3, 7 - ori $a7, $a3, 3327 - bge $a7, $a2, .LBB0_98 -# %bb.97: - ori $a2, $a3, 3328 - b .LBB0_101 -.LBB0_98: - ori $a3, $a4, 3519 - bge $a3, $a2, .LBB0_100 -# %bb.99: - ori $a2, $a4, 3520 - b .LBB0_101 +.LBB0_95: + lu12i.w $a2, 7 + ori $a6, $a2, 3327 + bge $a6, $a1, .LBB0_97 +# %bb.96: + ori $a1, $a2, 3328 + b .LBB0_100 +.LBB0_97: + ori $a2, $a3, 3519 + bge $a2, $a1, .LBB0_99 +# %bb.98: + ori $a1, $a3, 3520 + b .LBB0_100 +.LBB0_99: + ori $a2, $a3, 1570 + slt $a1, $a1, $a2 + masknez $a2, $a2, $a1 + lu12i.w $a6, 3 + ori $a6, $a6, 3712 + maskeqz $a1, $a6, $a1 + or $a1, $a1, $a2 .LBB0_100: - ori $a3, $a4, 1570 - slt $a2, $a2, $a3 - masknez $a3, $a3, $a2 - lu12i.w $a7, 3 - ori $a7, $a7, 3712 - maskeqz $a2, $a7, $a2 - or $a2, $a2, $a3 -.LBB0_101: - ld.w $a3, $fp, 48 - st.w $a2, $fp, 16 - blez $a3, .LBB0_5 + ld.w $a2, $fp, 48 + st.w $a1, $fp, 16 + blez $a2, .LBB0_3 +# %bb.101: + ld.w $a6, $fp, 88 + bnez $a6, .LBB0_3 # %bb.102: - ld.w $a7, $fp, 88 - bnez $a7, .LBB0_5 -# %bb.103: - slli.d $a7, $a2, 4 - sll.w $a7, $a7, $a0 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa0, $a7 + slli.d $a6, $a1, 4 + sll.w $a6, $a6, $a0 + bstrpick.d $a6, $a6, 31, 0 + movgr2fr.d $fa0, $a6 ffint.d.l $fa1, $fa0 - bstrpick.d $a7, $a3, 31, 0 - movgr2fr.d $fa0, $a7 - fld.d $fa2, $a1, %pc_lo12(.LCPI0_0) - pcalau12i $a7, %pc_hi20(.LCPI0_1) - fld.d $fa3, $a7, %pc_lo12(.LCPI0_1) + bstrpick.d $a6, $a2, 31, 0 + movgr2fr.d $fa0, $a6 ffint.d.l $fa0, $fa0 + ori $a6, $zero, 0 + lu32i.d $a6, -49152 + lu52i.d $a6, $a6, 1032 + movgr2fr.d $fa2, $a6 fmul.d $fa2, $fa0, $fa2 fdiv.d $fa1, $fa1, $fa2 - fcmp.cule.d $fcc0, $fa1, $fa3 - bcnez $fcc0, .LBB0_5 -# %bb.104: - pcalau12i $a2, %pc_hi20(.LCPI0_2) - fld.d $fa1, $a2, %pc_lo12(.LCPI0_2) + lu12i.w $a6, 65536 + lu32i.d $a6, -393216 + lu52i.d $a6, $a6, 1026 + movgr2fr.d $fa2, $a6 + fcmp.cule.d $fcc0, $fa1, $fa2 + bcnez $fcc0, .LBB0_3 +# %bb.103: + ori $a1, $zero, 0 + lu32i.d $a1, 231424 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa0, $fa1 - slli.d $a2, $a6, 4 - movgr2fr.w $fa1, $a2 + slli.d $a1, $a5, 4 + movgr2fr.w $fa1, $a1 ffint.d.w $fa1, $fa1 fdiv.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a6, $fa0 - lu12i.w $a2, 3 - ori $a2, $a2, 3712 - bge $a2, $a6, .LBB0_110 + movfr2gr.s $a5, $fa0 + lu12i.w $a1, 3 + ori $a1, $a1, 3712 + bge $a1, $a5, .LBB0_109 +# %bb.104: + ori $a1, $a3, 1570 + bgeu $a1, $a5, .LBB0_109 # %bb.105: - ori $a2, $a4, 1570 - bgeu $a2, $a6, .LBB0_110 + ori $a1, $a3, 3520 + bgeu $a1, $a5, .LBB0_109 # %bb.106: - ori $a2, $a4, 3520 - bgeu $a2, $a6, .LBB0_110 + lu12i.w $a1, 7 + ori $a1, $a1, 3328 + bgeu $a1, $a5, .LBB0_109 # %bb.107: - lu12i.w $a2, 7 - ori $a2, $a2, 3328 - bgeu $a2, $a6, .LBB0_110 + lu12i.w $a1, 10 + ori $a1, $a1, 3140 + bgeu $a1, $a5, .LBB0_109 # %bb.108: - lu12i.w $a2, 10 - ori $a2, $a2, 3140 - bgeu $a2, $a6, .LBB0_110 -# %bb.109: - ori $a2, $a5, 2944 + ori $a1, $a4, 2944 +.LBB0_109: + st.w $a1, $fp, 16 + b .LBB0_3 .LBB0_110: - st.w $a2, $fp, 16 - b .LBB0_5 -.LBB0_111: st.d $zero, $fp, 240 pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) @@ -835,9 +840,9 @@ lame_init_params: # @lame_init_params jirl $ra, $ra, 0 fld.s $fa0, $fp, 244 fcmp.cule.s $fcc0, $fa0, $fs0 - bceqz $fcc0, .LBB0_50 - b .LBB0_59 -.LBB0_112: + bceqz $fcc0, .LBB0_49 + b .LBB0_58 +.LBB0_111: pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 @@ -846,7 +851,7 @@ lame_init_params: # @lame_init_params ori $a0, $zero, 1 pcaddu18i $ra, %call36(exit) jirl $ra, $ra, 0 -.LBB0_113: +.LBB0_112: ori $a0, $zero, 1 ori $a1, $zero, 1 lu32i.d $a1, 1 @@ -864,27 +869,19 @@ lame_init_params: # @lame_init_params .section .rodata,"a",@progbits .p2align 2, 0x0 .LJTI0_0: - .word .LBB0_113-.LJTI0_0 - .word .LBB0_87-.LJTI0_0 - .word .LBB0_81-.LJTI0_0 - .word .LBB0_80-.LJTI0_0 + .word .LBB0_112-.LJTI0_0 + .word .LBB0_86-.LJTI0_0 .word .LBB0_80-.LJTI0_0 - .word .LBB0_90-.LJTI0_0 + .word .LBB0_79-.LJTI0_0 + .word .LBB0_79-.LJTI0_0 .word .LBB0_89-.LJTI0_0 - .word .LBB0_86-.LJTI0_0 + .word .LBB0_88-.LJTI0_0 .word .LBB0_85-.LJTI0_0 .word .LBB0_84-.LJTI0_0 + .word .LBB0_83-.LJTI0_0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function lame_print_config -.LCPI1_0: - .dword 0x408f400000000000 # double 1000 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI1_1: - .word 0x43fa0000 # float 500 .text - .globl lame_print_config + .globl lame_print_config # -- Begin function lame_print_config .p2align 5 .type lame_print_config,@function lame_print_config: # @lame_print_config @@ -900,17 +897,18 @@ lame_print_config: # @lame_print_config st.d $s5, $sp, 32 # 8-byte Folded Spill fst.d $fs0, $sp, 24 # 8-byte Folded Spill fst.d $fs1, $sp, 16 # 8-byte Folded Spill - fst.d $fs2, $sp, 8 # 8-byte Folded Spill move $fp, $a0 ld.w $a0, $a0, 16 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.d $fs0, $a1, %pc_lo12(.LCPI1_0) - fld.s $fs2, $fp, 216 - ld.w $s5, $fp, 204 - ld.w $s4, $fp, 48 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 - fdiv.d $fs1, $fa0, $fs0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + fld.s $fs1, $fp, 216 + ld.w $s5, $fp, 204 + ld.w $s4, $fp, 48 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 + fdiv.d $fs0, $fa0, $fa1 pcalau12i $a0, %got_pc_hi20(stderr) ld.d $s3, $a0, %got_pc_lo12(stderr) ld.d $a0, $s3, 0 @@ -935,14 +933,14 @@ lame_print_config: # @lame_print_config fld.s $fa0, $fp, 216 vldi $vr1, -1168 fcmp.ceq.s $fcc0, $fa0, $fa1 - fcvt.s.d $fs1, $fs1 + fcvt.s.d $fs0, $fs0 bcnez $fcc0, .LBB1_5 # %bb.4: - fmul.s $fa0, $fs2, $fs1 + fmul.s $fa0, $fs1, $fs0 ld.d $a0, $s3, 0 ftintrz.w.s $fa0, $fa0 movfr2gr.s $a2, $fa0 - ftintrz.w.s $fa0, $fs1 + ftintrz.w.s $fa0, $fs0 movfr2gr.s $a3, $fa0 pcalau12i $a1, %pc_hi20(.L.str.6) addi.d $a1, $a1, %pc_lo12(.L.str.6) @@ -950,13 +948,13 @@ lame_print_config: # @lame_print_config jirl $ra, $ra, 0 .LBB1_5: fld.s $fa0, $fp, 244 - movgr2fr.w $fs2, $zero - fcmp.cule.s $fcc0, $fa0, $fs2 - pcalau12i $s0, %pc_hi20(.LCPI1_1) + movgr2fr.w $fs1, $zero + fcmp.cule.s $fcc0, $fa0, $fs1 + lu12i.w $s0, 278432 bceqz $fcc0, .LBB1_11 # %bb.6: fld.s $fa0, $fp, 232 - fcmp.cule.s $fcc0, $fa0, $fs2 + fcmp.cule.s $fcc0, $fa0, $fs1 bceqz $fcc0, .LBB1_12 .LBB1_7: ld.w $a0, $fp, 20 @@ -980,12 +978,12 @@ lame_print_config: # @lame_print_config b .LBB1_17 .LBB1_11: fld.s $fa1, $fp, 240 - fld.s $fa2, $s0, %pc_lo12(.LCPI1_1) ld.d $a0, $s3, 0 - fmul.s $fa1, $fa1, $fs1 + fmul.s $fa1, $fa1, $fs0 + movgr2fr.w $fa2, $s0 fmul.s $fa1, $fa1, $fa2 fcvt.d.s $fa1, $fa1 - fmul.s $fa0, $fa0, $fs1 + fmul.s $fa0, $fa0, $fs0 fmul.s $fa0, $fa0, $fa2 fcvt.d.s $fa0, $fa0 movfr2gr.d $a3, $fa0 @@ -995,17 +993,17 @@ lame_print_config: # @lame_print_config pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 fld.s $fa0, $fp, 232 - fcmp.cule.s $fcc0, $fa0, $fs2 + fcmp.cule.s $fcc0, $fa0, $fs1 bcnez $fcc0, .LBB1_7 .LBB1_12: - fld.s $fa1, $s0, %pc_lo12(.LCPI1_1) ld.d $a0, $s3, 0 - fld.s $fa2, $fp, 236 - fmul.s $fa0, $fa0, $fs1 - fmul.s $fa0, $fa0, $fa1 + fmul.s $fa0, $fa0, $fs0 + fld.s $fa1, $fp, 236 + movgr2fr.w $fa2, $s0 + fmul.s $fa0, $fa0, $fa2 fcvt.d.s $fa0, $fa0 - fmul.s $fa2, $fa2, $fs1 - fmul.s $fa1, $fa2, $fa1 + fmul.s $fa1, $fa1, $fs0 + fmul.s $fa1, $fa1, $fa2 fcvt.d.s $fa1, $fa1 movfr2gr.d $a2, $fa0 movfr2gr.d $a3, $fa1 @@ -1066,17 +1064,21 @@ lame_print_config: # @lame_print_config ld.w $a1, $fp, 16 ld.w $a2, $fp, 88 ld.d $a0, $s3, 0 - ld.w $a7, $fp, 28 movgr2fr.w $fa0, $a1 ffint.d.w $fa0, $fa0 - fdiv.d $fa0, $fa0, $fs0 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + ld.w $a7, $fp, 28 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa1, $a1 + fdiv.d $fa0, $fa0, $fa1 bnez $a2, .LBB1_20 # %bb.19: slli.d $a1, $s5, 4 movgr2fr.w $fa1, $a1 ffint.s.w $fa1, $fa1 movgr2fr.w $fa2, $s4 - fmul.s $fa1, $fs1, $fa1 + fmul.s $fa1, $fs0, $fa1 ld.w $a1, $fp, 36 ld.w $a3, $fp, 48 ffint.s.w $fa2, $fa2 @@ -1114,7 +1116,6 @@ lame_print_config: # @lame_print_config jirl $ra, $ra, 0 .LBB1_21: ld.d $a0, $s3, 0 - fld.d $fs2, $sp, 8 # 8-byte Folded Reload fld.d $fs1, $sp, 16 # 8-byte Folded Reload fld.d $fs0, $sp, 24 # 8-byte Folded Reload ld.d $s5, $sp, 32 # 8-byte Folded Reload @@ -1131,16 +1132,7 @@ lame_print_config: # @lame_print_config .Lfunc_end1: .size lame_print_config, .Lfunc_end1-lame_print_config # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function lame_encode_frame -.LCPI2_0: - .dword 0x408f400000000000 # double 1000 -.LCPI2_1: - .dword 0x3e112e0be826d695 # double 1.0000000000000001E-9 -.LCPI2_2: - .dword 0x3fd6666666666666 # double 0.34999999999999998 - .text - .globl lame_encode_frame + .globl lame_encode_frame # -- Begin function lame_encode_frame .p2align 5 .type lame_encode_frame,@function lame_encode_frame: # @lame_encode_frame @@ -1209,21 +1201,26 @@ lame_encode_frame: # @lame_encode_frame .LBB2_6: ld.w $a0, $fp, 16 movgr2fr.w $fa0, $a0 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_0) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 ld.w $a0, $fp, 48 ld.w $a2, $fp, 188 ffint.d.w $fa0, $fa0 fdiv.d $fa0, $fa0, $fa1 st.d $zero, $s5, %pc_lo12(lame_encode_frame.sentBits) mul.d $a0, $a2, $a0 - vldi $vr1, -992 - fmul.d $fa0, $fa0, $fa1 - pcalau12i $a2, %pc_hi20(.LCPI2_1) - fld.d $fa1, $a2, %pc_lo12(.LCPI2_1) - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fdiv.d $fa0, $fa2, $fa0 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + vldi $vr2, -992 + fmul.d $fa0, $fa0, $fa2 + fdiv.d $fa0, $fa1, $fa0 + lu12i.w $a0, -97683 + ori $a0, $a0, 1685 + lu32i.d $a0, 77323 + lu52i.d $a0, $a0, 993 + movgr2fr.d $fa1, $a0 fadd.d $fa2, $fa0, $fa1 vreplvei.d $vr2, $vr2, 0 vfrintrm.d $vr2, $vr2 @@ -1648,15 +1645,18 @@ lame_encode_frame: # @lame_encode_frame addi.d $a0, $a0, %pc_lo12(lame_encode_frame.ms_ratio) fld.d $fa0, $a0, 0 fld.d $fa1, $a0, 8 + fld.d $fa2, $sp, 96 fadd.d $fa0, $fa0, $fa1 - fld.d $fa1, $sp, 96 fadd.d $fa0, $fs0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI2_2) - fld.d $fa2, $a0, %pc_lo12(.LCPI2_2) - fadd.d $fa0, $fa1, $fa0 + fadd.d $fa0, $fa2, $fa0 vldi $vr1, -944 fmul.d $fa0, $fa0, $fa1 - fcmp.cule.d $fcc0, $fa2, $fa0 + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, 419430 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa1, $a0 + fcmp.cule.d $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB2_61 # %bb.60: ori $a0, $zero, 2 @@ -1833,12 +1833,7 @@ lame_encode_frame: # @lame_encode_frame .Lfunc_end2: .size lame_encode_frame, .Lfunc_end2-lame_encode_frame # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fill_buffer_resample -.LCPI3_0: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 - .text - .globl fill_buffer_resample + .globl fill_buffer_resample # -- Begin function fill_buffer_resample .p2align 5 .type fill_buffer_resample,@function fill_buffer_resample: # @fill_buffer_resample @@ -1875,12 +1870,15 @@ fill_buffer_resample: # @fill_buffer_resample vreplvei.d $vr2, $vr2, 0 vfrintrm.d $vr2, $vr2 fsub.d $fa1, $fa1, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI3_0) - fabs.d $fa3, $fa1 + fabs.d $fa2, $fa1 fldx.d $fa1, $a7, $t1 alsl.d $a0, $a6, $t1, 1 - fcmp.clt.d $fcc0, $fa3, $fa2 + lu12i.w $t2, -85564 + ori $t2, $t2, 813 + lu32i.d $t2, -379166 + lu52i.d $t2, $t2, 1009 + movgr2fr.d $fa3, $t2 + fcmp.clt.d $fcc0, $fa2, $fa3 add.d $t2, $t0, $a0 move $a0, $zero vldi $vr2, -784 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/layer3.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/layer3.s index aa8f2ba4..716b5612 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/layer3.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/layer3.s @@ -1,559 +1,539 @@ .file "layer3.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function init_layer3 -.LCPI0_0: - .dword 0x3ff5555555555555 # double 1.3333333333333333 -.LCPI0_1: - .dword 0x3fa657184ae74487 # double 0.043633231299858237 -.LCPI0_2: - .dword 0x400921fb54442d18 # double 3.1415926535897931 -.LCPI0_3: - .dword 0x4052000000000000 # double 72 -.LCPI0_16: - .dword 0x3fc0c152382d7365 # double 0.1308996938995747 -.LCPI0_17: - .dword 0x7ff0000000000000 # double +Inf -.LCPI0_25: - .dword 0x3ff6a09e667f3bcd # double 1.4142135623730951 -.LCPI0_26: - .dword 0x3feae89f995ad3ad # double 0.8408964152537145 -.LCPI0_27: - .dword 0x3fe6a09e667f3bcd # double 0.70710678118654757 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI0_4: + .p2align 4, 0x0 # -- Begin function init_layer3 +.LCPI0_0: .dword 0xbfe5b3935c0c9409 # double -0.67817085245462849 .dword 0xbfe42ae51f3af2b9 # double -0.63023620700513228 -.LCPI0_5: +.LCPI0_1: .dword 0xbffa9aa4bcad1baf # double -1.6627547617115217 .dword 0xbff4e7ae9144f0fc # double -1.3065629648763766 -.LCPI0_6: +.LCPI0_2: .dword 0xbfe03f8e65fdf0fc # double -0.50775833053879138 .dword 0xbfddd1ba8e917e2b # double -0.46592582628906837 -.LCPI0_7: +.LCPI0_3: .dword 0x3fd34c45a2782fb5 # double 0.30153027406845051 .dword 0x3ff7746ea3a45f90 # double 1.4659258262890695 -.LCPI0_8: +.LCPI0_4: .dword 0xbfe2f895141f4826 # double -0.59284452371708052 .dword 0xbfe209c1a6fe449c # double -0.56369097343317121 -.LCPI0_9: +.LCPI0_5: .dword 0xbff1535055b4bd6a # double -1.0828402851000996 .dword 0xbfedc74ea7f7f7fc # double -0.93057949835178943 -.LCPI0_10: +.LCPI0_6: .dword 0xbfd969579af13b11 # double -0.3970545781239006 .dword 0xbfd37fb982271a06 # double -0.30467069349506948 -.LCPI0_11: +.LCPI0_7: .dword 0x401be994a779f926 # double 6.9781061332890086 .dword 0xc0223026a975a6c3 # double -9.0940449672810981 -.LCPI0_12: +.LCPI0_8: .dword 0xbfe1517a7bdb3895 # double -0.54119610014619701 .dword 0xbfe0c6c679d621e4 # double -0.52426456257040543 -.LCPI0_13: +.LCPI0_9: .dword 0xbfea486a6fff9fc1 # double -0.82133981585229077 .dword 0xbfe7aed8ced5b9bd # double -0.74009361646113059 -.LCPI0_14: +.LCPI0_10: .dword 0xbfc8b3fcea414726 # double -0.19299279630882288 .dword 0xbfb11ced80d03287 # double -0.066847652386238562 -.LCPI0_15: +.LCPI0_11: .dword 0xc00c4ffda58811f0 # double -3.5390580112600603 .dword 0xc00252a2ed14a1b7 # double -2.2903498193665786 -.LCPI0_18: +.LCPI0_12: .dword 0xbfe5b3935c0c9409 # double -0.67817085245462849 .dword 0x3fe42ae51f3af2b9 # double 0.63023620700513228 -.LCPI0_19: +.LCPI0_13: .dword 0xbfe2f895141f4826 # double -0.59284452371708052 .dword 0x3fe209c1a6fe449c # double 0.56369097343317121 -.LCPI0_20: +.LCPI0_14: .dword 0xbfe1517a7bdb3895 # double -0.54119610014619701 .dword 0x3fe0c6c679d621e4 # double 0.52426456257040543 -.LCPI0_21: +.LCPI0_15: .dword 0xbfe03f8e65fdf0fc # double -0.50775833053879138 .dword 0x3fddd1ba8e917e2b # double 0.46592582628906837 -.LCPI0_22: +.LCPI0_16: .dword 0xbfd969579af13b11 # double -0.3970545781239006 .dword 0x3fd37fb982271a06 # double 0.30467069349506948 -.LCPI0_23: +.LCPI0_17: .dword 0xbfc8b3fcea414726 # double -0.19299279630882288 .dword 0x3fb11ced80d03287 # double 0.066847652386238562 -.LCPI0_24: +.LCPI0_18: .dword 0x0000000000000000 # double 0 .dword 0x8000000000000000 # double -0 -.LCPI0_28: +.LCPI0_19: .word 12288 # 0x3000 .word 12352 # 0x3040 .word 12416 # 0x3080 .word 12480 # 0x30c0 -.LCPI0_29: +.LCPI0_20: .word 12544 # 0x3100 .word 12608 # 0x3140 .word 12296 # 0x3008 .word 12360 # 0x3048 -.LCPI0_30: +.LCPI0_21: .word 12424 # 0x3088 .word 12488 # 0x30c8 .word 12552 # 0x3108 .word 12616 # 0x3148 -.LCPI0_31: +.LCPI0_22: .word 12304 # 0x3010 .word 12368 # 0x3050 .word 12432 # 0x3090 .word 12496 # 0x30d0 -.LCPI0_32: +.LCPI0_23: .word 12560 # 0x3110 .word 12624 # 0x3150 .word 12312 # 0x3018 .word 12376 # 0x3058 -.LCPI0_33: +.LCPI0_24: .word 12440 # 0x3098 .word 12504 # 0x30d8 .word 12568 # 0x3118 .word 12632 # 0x3158 -.LCPI0_34: +.LCPI0_25: .word 12320 # 0x3020 .word 12384 # 0x3060 .word 12448 # 0x30a0 .word 12512 # 0x30e0 -.LCPI0_35: +.LCPI0_26: .word 12576 # 0x3120 .word 12640 # 0x3160 .word 12328 # 0x3028 .word 12392 # 0x3068 -.LCPI0_36: +.LCPI0_27: .word 12456 # 0x30a8 .word 12520 # 0x30e8 .word 12584 # 0x3128 .word 12648 # 0x3168 -.LCPI0_37: +.LCPI0_28: .word 12289 # 0x3001 .word 12353 # 0x3041 .word 12417 # 0x3081 .word 12481 # 0x30c1 -.LCPI0_38: +.LCPI0_29: .word 12545 # 0x3101 .word 12609 # 0x3141 .word 12297 # 0x3009 .word 12361 # 0x3049 -.LCPI0_39: +.LCPI0_30: .word 12425 # 0x3089 .word 12489 # 0x30c9 .word 12553 # 0x3109 .word 12617 # 0x3149 -.LCPI0_40: +.LCPI0_31: .word 12305 # 0x3011 .word 12369 # 0x3051 .word 12433 # 0x3091 .word 12497 # 0x30d1 -.LCPI0_41: +.LCPI0_32: .word 12561 # 0x3111 .word 12625 # 0x3151 .word 12313 # 0x3019 .word 12377 # 0x3059 -.LCPI0_42: +.LCPI0_33: .word 12441 # 0x3099 .word 12505 # 0x30d9 .word 12569 # 0x3119 .word 12633 # 0x3159 -.LCPI0_43: +.LCPI0_34: .word 12321 # 0x3021 .word 12385 # 0x3061 .word 12449 # 0x30a1 .word 12513 # 0x30e1 -.LCPI0_44: +.LCPI0_35: .word 12577 # 0x3121 .word 12641 # 0x3161 .word 12329 # 0x3029 .word 12393 # 0x3069 -.LCPI0_45: +.LCPI0_36: .word 12457 # 0x30a9 .word 12521 # 0x30e9 .word 12585 # 0x3129 .word 12649 # 0x3169 -.LCPI0_46: +.LCPI0_37: .word 12290 # 0x3002 .word 12354 # 0x3042 .word 12418 # 0x3082 .word 12482 # 0x30c2 -.LCPI0_47: +.LCPI0_38: .word 12546 # 0x3102 .word 12610 # 0x3142 .word 12298 # 0x300a .word 12362 # 0x304a -.LCPI0_48: +.LCPI0_39: .word 12426 # 0x308a .word 12490 # 0x30ca .word 12554 # 0x310a .word 12618 # 0x314a -.LCPI0_49: +.LCPI0_40: .word 12306 # 0x3012 .word 12370 # 0x3052 .word 12434 # 0x3092 .word 12498 # 0x30d2 -.LCPI0_50: +.LCPI0_41: .word 12562 # 0x3112 .word 12626 # 0x3152 .word 12314 # 0x301a .word 12378 # 0x305a -.LCPI0_51: +.LCPI0_42: .word 12442 # 0x309a .word 12506 # 0x30da .word 12570 # 0x311a .word 12634 # 0x315a -.LCPI0_52: +.LCPI0_43: .word 12322 # 0x3022 .word 12386 # 0x3062 .word 12450 # 0x30a2 .word 12514 # 0x30e2 -.LCPI0_53: +.LCPI0_44: .word 12578 # 0x3122 .word 12642 # 0x3162 .word 12330 # 0x302a .word 12394 # 0x306a -.LCPI0_54: +.LCPI0_45: .word 12458 # 0x30aa .word 12522 # 0x30ea .word 12586 # 0x312a .word 12650 # 0x316a -.LCPI0_55: +.LCPI0_46: .word 12291 # 0x3003 .word 12355 # 0x3043 .word 12419 # 0x3083 .word 12483 # 0x30c3 -.LCPI0_56: +.LCPI0_47: .word 12547 # 0x3103 .word 12611 # 0x3143 .word 12299 # 0x300b .word 12363 # 0x304b -.LCPI0_57: +.LCPI0_48: .word 12427 # 0x308b .word 12491 # 0x30cb .word 12555 # 0x310b .word 12619 # 0x314b -.LCPI0_58: +.LCPI0_49: .word 12307 # 0x3013 .word 12371 # 0x3053 .word 12435 # 0x3093 .word 12499 # 0x30d3 -.LCPI0_59: +.LCPI0_50: .word 12563 # 0x3113 .word 12627 # 0x3153 .word 12315 # 0x301b .word 12379 # 0x305b -.LCPI0_60: +.LCPI0_51: .word 12443 # 0x309b .word 12507 # 0x30db .word 12571 # 0x311b .word 12635 # 0x315b -.LCPI0_61: +.LCPI0_52: .word 12323 # 0x3023 .word 12387 # 0x3063 .word 12451 # 0x30a3 .word 12515 # 0x30e3 -.LCPI0_62: +.LCPI0_53: .word 12579 # 0x3123 .word 12643 # 0x3163 .word 12331 # 0x302b .word 12395 # 0x306b -.LCPI0_63: +.LCPI0_54: .word 12459 # 0x30ab .word 12523 # 0x30eb .word 12587 # 0x312b .word 12651 # 0x316b -.LCPI0_64: +.LCPI0_55: .word 12292 # 0x3004 .word 12356 # 0x3044 .word 12420 # 0x3084 .word 12484 # 0x30c4 -.LCPI0_65: +.LCPI0_56: .word 12548 # 0x3104 .word 12612 # 0x3144 .word 12300 # 0x300c .word 12364 # 0x304c -.LCPI0_66: +.LCPI0_57: .word 12428 # 0x308c .word 12492 # 0x30cc .word 12556 # 0x310c .word 12620 # 0x314c -.LCPI0_67: +.LCPI0_58: .word 12308 # 0x3014 .word 12372 # 0x3054 .word 12436 # 0x3094 .word 12500 # 0x30d4 -.LCPI0_68: +.LCPI0_59: .word 12564 # 0x3114 .word 12628 # 0x3154 .word 12316 # 0x301c .word 12380 # 0x305c -.LCPI0_69: +.LCPI0_60: .word 12444 # 0x309c .word 12508 # 0x30dc .word 12572 # 0x311c .word 12636 # 0x315c -.LCPI0_70: +.LCPI0_61: .word 12324 # 0x3024 .word 12388 # 0x3064 .word 12452 # 0x30a4 .word 12516 # 0x30e4 -.LCPI0_71: +.LCPI0_62: .word 12580 # 0x3124 .word 12644 # 0x3164 .word 12332 # 0x302c .word 12396 # 0x306c -.LCPI0_72: +.LCPI0_63: .word 12460 # 0x30ac .word 12524 # 0x30ec .word 12588 # 0x312c .word 12652 # 0x316c -.LCPI0_73: +.LCPI0_64: .word 16384 # 0x4000 .word 16448 # 0x4040 .word 16512 # 0x4080 .word 16576 # 0x40c0 -.LCPI0_74: +.LCPI0_65: .word 16392 # 0x4008 .word 16456 # 0x4048 .word 16520 # 0x4088 .word 16584 # 0x40c8 -.LCPI0_75: +.LCPI0_66: .word 16400 # 0x4010 .word 16464 # 0x4050 .word 16528 # 0x4090 .word 16592 # 0x40d0 -.LCPI0_76: +.LCPI0_67: .word 16408 # 0x4018 .word 16472 # 0x4058 .word 16536 # 0x4098 .word 16600 # 0x40d8 -.LCPI0_77: +.LCPI0_68: .word 16385 # 0x4001 .word 16449 # 0x4041 .word 16513 # 0x4081 .word 16577 # 0x40c1 -.LCPI0_78: +.LCPI0_69: .word 16393 # 0x4009 .word 16457 # 0x4049 .word 16521 # 0x4089 .word 16585 # 0x40c9 -.LCPI0_79: +.LCPI0_70: .word 16401 # 0x4011 .word 16465 # 0x4051 .word 16529 # 0x4091 .word 16593 # 0x40d1 -.LCPI0_80: +.LCPI0_71: .word 16409 # 0x4019 .word 16473 # 0x4059 .word 16537 # 0x4099 .word 16601 # 0x40d9 -.LCPI0_81: +.LCPI0_72: .word 16386 # 0x4002 .word 16450 # 0x4042 .word 16514 # 0x4082 .word 16578 # 0x40c2 -.LCPI0_82: +.LCPI0_73: .word 16394 # 0x400a .word 16458 # 0x404a .word 16522 # 0x408a .word 16586 # 0x40ca -.LCPI0_83: +.LCPI0_74: .word 16402 # 0x4012 .word 16466 # 0x4052 .word 16530 # 0x4092 .word 16594 # 0x40d2 -.LCPI0_84: +.LCPI0_75: .word 16410 # 0x401a .word 16474 # 0x405a .word 16538 # 0x409a .word 16602 # 0x40da -.LCPI0_85: +.LCPI0_76: .word 16387 # 0x4003 .word 16451 # 0x4043 .word 16515 # 0x4083 .word 16579 # 0x40c3 -.LCPI0_86: +.LCPI0_77: .word 16395 # 0x400b .word 16459 # 0x404b .word 16523 # 0x408b .word 16587 # 0x40cb -.LCPI0_87: +.LCPI0_78: .word 16403 # 0x4013 .word 16467 # 0x4053 .word 16531 # 0x4093 .word 16595 # 0x40d3 -.LCPI0_88: +.LCPI0_79: .word 16411 # 0x401b .word 16475 # 0x405b .word 16539 # 0x409b .word 16603 # 0x40db -.LCPI0_89: +.LCPI0_80: .word 20480 # 0x5000 .word 20488 # 0x5008 .word 20496 # 0x5010 .word 20481 # 0x5001 -.LCPI0_90: +.LCPI0_81: .word 40960 # 0xa000 .word 40968 # 0xa008 .word 40976 # 0xa010 .word 40961 # 0xa001 -.LCPI0_91: +.LCPI0_82: .word 20489 # 0x5009 .word 20497 # 0x5011 .word 20482 # 0x5002 .word 20490 # 0x500a -.LCPI0_92: +.LCPI0_83: .word 40969 # 0xa009 .word 40977 # 0xa011 .word 40962 # 0xa002 .word 40970 # 0xa00a -.LCPI0_93: +.LCPI0_84: .word 20498 # 0x5012 .word 20483 # 0x5003 .word 20491 # 0x500b .word 20499 # 0x5013 -.LCPI0_94: +.LCPI0_85: .word 40978 # 0xa012 .word 40963 # 0xa003 .word 40971 # 0xa00b .word 40979 # 0xa013 -.LCPI0_95: +.LCPI0_86: .word 0 # 0x0 .word 512 # 0x200 .word 1024 # 0x400 .word 1536 # 0x600 -.LCPI0_96: +.LCPI0_87: .word 64 # 0x40 .word 576 # 0x240 .word 1088 # 0x440 .word 1600 # 0x640 -.LCPI0_97: +.LCPI0_88: .word 128 # 0x80 .word 640 # 0x280 .word 1152 # 0x480 .word 1664 # 0x680 -.LCPI0_98: +.LCPI0_89: .word 192 # 0xc0 .word 704 # 0x2c0 .word 1216 # 0x4c0 .word 1728 # 0x6c0 -.LCPI0_99: +.LCPI0_90: .word 4096 # 0x1000 .word 4160 # 0x1040 .word 4224 # 0x1080 .word 4288 # 0x10c0 -.LCPI0_100: +.LCPI0_91: .word 4104 # 0x1008 .word 4168 # 0x1048 .word 4232 # 0x1088 .word 4296 # 0x10c8 -.LCPI0_101: +.LCPI0_92: .word 4112 # 0x1010 .word 4176 # 0x1050 .word 4240 # 0x1090 .word 4304 # 0x10d0 -.LCPI0_102: +.LCPI0_93: .word 4120 # 0x1018 .word 4184 # 0x1058 .word 4248 # 0x1098 .word 4312 # 0x10d8 -.LCPI0_103: +.LCPI0_94: .word 4128 # 0x1020 .word 4192 # 0x1060 .word 4256 # 0x10a0 .word 4320 # 0x10e0 -.LCPI0_104: +.LCPI0_95: .word 4097 # 0x1001 .word 4161 # 0x1041 .word 4225 # 0x1081 .word 4289 # 0x10c1 -.LCPI0_105: +.LCPI0_96: .word 4105 # 0x1009 .word 4169 # 0x1049 .word 4233 # 0x1089 .word 4297 # 0x10c9 -.LCPI0_106: +.LCPI0_97: .word 4113 # 0x1011 .word 4177 # 0x1051 .word 4241 # 0x1091 .word 4305 # 0x10d1 -.LCPI0_107: +.LCPI0_98: .word 4121 # 0x1019 .word 4185 # 0x1059 .word 4249 # 0x1099 .word 4313 # 0x10d9 -.LCPI0_108: +.LCPI0_99: .word 4129 # 0x1021 .word 4193 # 0x1061 .word 4257 # 0x10a1 .word 4321 # 0x10e1 -.LCPI0_109: +.LCPI0_100: .word 4098 # 0x1002 .word 4162 # 0x1042 .word 4226 # 0x1082 .word 4290 # 0x10c2 -.LCPI0_110: +.LCPI0_101: .word 4106 # 0x100a .word 4170 # 0x104a .word 4234 # 0x108a .word 4298 # 0x10ca -.LCPI0_111: +.LCPI0_102: .word 4114 # 0x1012 .word 4178 # 0x1052 .word 4242 # 0x1092 .word 4306 # 0x10d2 -.LCPI0_112: +.LCPI0_103: .word 4122 # 0x101a .word 4186 # 0x105a .word 4250 # 0x109a .word 4314 # 0x10da -.LCPI0_113: +.LCPI0_104: .word 4130 # 0x1022 .word 4194 # 0x1062 .word 4258 # 0x10a2 .word 4322 # 0x10e2 -.LCPI0_114: +.LCPI0_105: .word 4099 # 0x1003 .word 4163 # 0x1043 .word 4227 # 0x1083 .word 4291 # 0x10c3 -.LCPI0_115: +.LCPI0_106: .word 4107 # 0x100b .word 4171 # 0x104b .word 4235 # 0x108b .word 4299 # 0x10cb -.LCPI0_116: +.LCPI0_107: .word 4115 # 0x1013 .word 4179 # 0x1053 .word 4243 # 0x1093 .word 4307 # 0x10d3 -.LCPI0_117: +.LCPI0_108: .word 4123 # 0x101b .word 4187 # 0x105b .word 4251 # 0x109b .word 4315 # 0x10db -.LCPI0_118: +.LCPI0_109: .word 4131 # 0x1023 .word 4195 # 0x1063 .word 4259 # 0x10a3 .word 4323 # 0x10e3 -.LCPI0_119: +.LCPI0_110: .word 4100 # 0x1004 .word 4164 # 0x1044 .word 4228 # 0x1084 .word 4292 # 0x10c4 -.LCPI0_120: +.LCPI0_111: .word 4108 # 0x100c .word 4172 # 0x104c .word 4236 # 0x108c .word 4300 # 0x10cc -.LCPI0_121: +.LCPI0_112: .word 4116 # 0x1014 .word 4180 # 0x1054 .word 4244 # 0x1094 .word 4308 # 0x10d4 -.LCPI0_122: +.LCPI0_113: .word 4124 # 0x101c .word 4188 # 0x105c .word 4252 # 0x109c .word 4316 # 0x10dc -.LCPI0_123: +.LCPI0_114: .word 4132 # 0x1024 .word 4196 # 0x1064 .word 4260 # 0x10a4 @@ -604,11 +584,14 @@ init_layer3: # @init_layer3 # %bb.2: # %.preheader344.preheader pcalau12i $a0, %pc_hi20(ispow) addi.d $s0, $a0, %pc_lo12(ispow) - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) move $s1, $zero lu12i.w $a0, 2 ori $s2, $a0, 15 + lu12i.w $a0, 349525 + ori $a0, $a0, 1365 + lu32i.d $a0, 349525 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB0_3: # %.preheader344 # =>This Inner Loop Header: Depth=1 @@ -715,18 +698,26 @@ init_layer3: # @init_layer3 lu52i.d $a1, $a1, 1022 st.d $a1, $a0, %pc_lo12(aa_cs.7) pcalau12i $a0, %pc_hi20(aa_ca.7) - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.d $fs0, $a1, %pc_lo12(.LCPI0_1) - pcalau12i $a1, %pc_hi20(.LCPI0_2) - fld.d $fs3, $a1, %pc_lo12(.LCPI0_2) - pcalau12i $a1, %pc_hi20(.LCPI0_3) - fld.d $fs1, $a1, %pc_lo12(.LCPI0_3) lu12i.w $a1, -233331 ori $a1, $a1, 1012 lu32i.d $a1, -110744 lu52i.d $a1, $a1, -1034 st.d $a1, $a0, %pc_lo12(aa_ca.7) ori $s1, $zero, 55 + lu12i.w $a0, 306804 + ori $a0, $a0, 1159 + lu32i.d $a0, 415512 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fs0, $a0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fs3, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, 131072 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(win) addi.d $s0, $a0, %pc_lo12(win) move $s2, $zero @@ -778,46 +769,46 @@ init_layer3: # @init_layer3 addi.w $s1, $s1, 2 bne $s2, $s3, .LBB0_5 # %bb.6: # %.preheader341.preheader - pcalau12i $a0, %pc_hi20(.LCPI0_4) - vld $vr0, $a0, %pc_lo12(.LCPI0_4) + pcalau12i $a0, %pc_hi20(.LCPI0_0) + vld $vr0, $a0, %pc_lo12(.LCPI0_0) move $s2, $zero - pcalau12i $a0, %pc_hi20(.LCPI0_5) - vld $vr1, $a0, %pc_lo12(.LCPI0_5) + pcalau12i $a0, %pc_hi20(.LCPI0_1) + vld $vr1, $a0, %pc_lo12(.LCPI0_1) vst $vr0, $s0, 432 - pcalau12i $a0, %pc_hi20(.LCPI0_6) - vld $vr0, $a0, %pc_lo12(.LCPI0_6) + pcalau12i $a0, %pc_hi20(.LCPI0_2) + vld $vr0, $a0, %pc_lo12(.LCPI0_2) vst $vr1, $s0, 960 - pcalau12i $a0, %pc_hi20(.LCPI0_7) - vld $vr1, $a0, %pc_lo12(.LCPI0_7) + pcalau12i $a0, %pc_hi20(.LCPI0_3) + vld $vr1, $a0, %pc_lo12(.LCPI0_3) vst $vr0, $s0, 480 vrepli.b $vr0, 0 vst $vr0, $s0, 864 vst $vr1, $s0, 912 - pcalau12i $a0, %pc_hi20(.LCPI0_8) - vld $vr1, $a0, %pc_lo12(.LCPI0_8) - pcalau12i $a0, %pc_hi20(.LCPI0_9) - vld $vr2, $a0, %pc_lo12(.LCPI0_9) - pcalau12i $a0, %pc_hi20(.LCPI0_10) - vld $vr3, $a0, %pc_lo12(.LCPI0_10) + pcalau12i $a0, %pc_hi20(.LCPI0_4) + vld $vr1, $a0, %pc_lo12(.LCPI0_4) + pcalau12i $a0, %pc_hi20(.LCPI0_5) + vld $vr2, $a0, %pc_lo12(.LCPI0_5) + pcalau12i $a0, %pc_hi20(.LCPI0_6) + vld $vr3, $a0, %pc_lo12(.LCPI0_6) vst $vr0, $s0, 528 vst $vr1, $s0, 448 vst $vr2, $s0, 976 vst $vr3, $s0, 496 vst $vr0, $s0, 880 - pcalau12i $a0, %pc_hi20(.LCPI0_11) - vld $vr1, $a0, %pc_lo12(.LCPI0_11) + pcalau12i $a0, %pc_hi20(.LCPI0_7) + vld $vr1, $a0, %pc_lo12(.LCPI0_7) vst $vr0, $s0, 544 - pcalau12i $a0, %pc_hi20(.LCPI0_12) - vld $vr2, $a0, %pc_lo12(.LCPI0_12) + pcalau12i $a0, %pc_hi20(.LCPI0_8) + vld $vr2, $a0, %pc_lo12(.LCPI0_8) vst $vr1, $s0, 928 - pcalau12i $a0, %pc_hi20(.LCPI0_13) - vld $vr1, $a0, %pc_lo12(.LCPI0_13) + pcalau12i $a0, %pc_hi20(.LCPI0_9) + vld $vr1, $a0, %pc_lo12(.LCPI0_9) vst $vr2, $s0, 464 - pcalau12i $a0, %pc_hi20(.LCPI0_14) - vld $vr2, $a0, %pc_lo12(.LCPI0_14) + pcalau12i $a0, %pc_hi20(.LCPI0_10) + vld $vr2, $a0, %pc_lo12(.LCPI0_10) vst $vr1, $s0, 992 - pcalau12i $a0, %pc_hi20(.LCPI0_15) - vld $vr1, $a0, %pc_lo12(.LCPI0_15) + pcalau12i $a0, %pc_hi20(.LCPI0_11) + vld $vr1, $a0, %pc_lo12(.LCPI0_11) vst $vr2, $s0, 512 vst $vr0, $s0, 896 vst $vr0, $s0, 560 @@ -941,10 +932,13 @@ init_layer3: # @init_layer3 ori $s6, $zero, 49 ori $s7, $zero, 35 ori $s8, $zero, 21 - pcalau12i $a0, %pc_hi20(.LCPI0_16) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_16) - pcalau12i $a0, %pc_hi20(.LCPI0_17) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_17) + lu12i.w $a0, 230103 + ori $a0, $a0, 869 + lu32i.d $a0, 49490 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fs1, $a0 + lu52i.d $a0, $zero, 2047 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB0_7: # =>This Inner Loop Header: Depth=1 bstrpick.d $s1, $s3, 31, 0 @@ -1195,24 +1189,24 @@ init_layer3: # @init_layer3 fneg.d $fa0, $fa0 fst.d $fa0, $a0, 408 fneg.d $fa0, $fa2 - pcalau12i $a1, %pc_hi20(.LCPI0_18) - vld $vr1, $a1, %pc_lo12(.LCPI0_18) - pcalau12i $a1, %pc_hi20(.LCPI0_19) - vld $vr2, $a1, %pc_lo12(.LCPI0_19) - pcalau12i $a1, %pc_hi20(.LCPI0_20) - vld $vr3, $a1, %pc_lo12(.LCPI0_20) + pcalau12i $a1, %pc_hi20(.LCPI0_12) + vld $vr1, $a1, %pc_lo12(.LCPI0_12) + pcalau12i $a1, %pc_hi20(.LCPI0_13) + vld $vr2, $a1, %pc_lo12(.LCPI0_13) + pcalau12i $a1, %pc_hi20(.LCPI0_14) + vld $vr3, $a1, %pc_lo12(.LCPI0_14) fst.d $fa0, $a0, 424 vst $vr1, $a0, 432 vst $vr2, $a0, 448 vst $vr3, $a0, 464 - pcalau12i $a1, %pc_hi20(.LCPI0_21) - vld $vr0, $a1, %pc_lo12(.LCPI0_21) - pcalau12i $a1, %pc_hi20(.LCPI0_22) - vld $vr1, $a1, %pc_lo12(.LCPI0_22) - pcalau12i $a1, %pc_hi20(.LCPI0_23) - vld $vr2, $a1, %pc_lo12(.LCPI0_23) - pcalau12i $a1, %pc_hi20(.LCPI0_24) - vld $vr3, $a1, %pc_lo12(.LCPI0_24) + pcalau12i $a1, %pc_hi20(.LCPI0_15) + vld $vr0, $a1, %pc_lo12(.LCPI0_15) + pcalau12i $a1, %pc_hi20(.LCPI0_16) + vld $vr1, $a1, %pc_lo12(.LCPI0_16) + pcalau12i $a1, %pc_hi20(.LCPI0_17) + vld $vr2, $a1, %pc_lo12(.LCPI0_17) + pcalau12i $a1, %pc_hi20(.LCPI0_18) + vld $vr3, $a1, %pc_lo12(.LCPI0_18) vst $vr0, $a0, 480 vst $vr1, $a0, 496 vst $vr2, $a0, 512 @@ -1353,12 +1347,12 @@ init_layer3: # @init_layer3 st.d $a0, $sp, 168 # 8-byte Folded Spill st.d $zero, $a0, 0 pcalau12i $a0, %pc_hi20(tan2_2) - addi.d $a1, $a0, %pc_lo12(tan2_2) - ori $a0, $fp, 3021 - lu32i.d $a0, 434334 - lu52i.d $fp, $a0, 1023 - st.d $a1, $sp, 160 # 8-byte Folded Spill - st.d $fp, $a1, 0 + addi.d $a0, $a0, %pc_lo12(tan2_2) + ori $s3, $fp, 3021 + lu32i.d $s3, 434334 + lu52i.d $fp, $s3, 1023 + st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $fp, $a0, 0 vldi $vr0, -816 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 @@ -1381,16 +1375,18 @@ init_layer3: # @init_layer3 st.d $s2, $s7, 128 st.d $s1, $sp, 144 # 8-byte Folded Spill st.d $fp, $s1, 128 - st.d $fp, $sp, 152 # 8-byte Folded Spill st.d $fp, $s0, 128 ori $s4, $zero, 1 ori $s2, $zero, 8 - pcalau12i $a0, %pc_hi20(.LCPI0_25) - fld.d $fs4, $a0, %pc_lo12(.LCPI0_25) - pcalau12i $a0, %pc_hi20(.LCPI0_26) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_26) - pcalau12i $a0, %pc_hi20(.LCPI0_27) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_27) + st.d $fp, $sp, 152 # 8-byte Folded Spill + movgr2fr.d $fs4, $fp + lu12i.w $a0, -420435 + ori $a0, $a0, 941 + lu32i.d $a0, -333665 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs0, $a0 + lu52i.d $a0, $s3, 1022 + movgr2fr.d $fs1, $a0 ori $s8, $zero, 128 b .LBB0_23 .p2align 4, , 16 @@ -2423,193 +2419,193 @@ init_layer3: # @init_layer3 addi.d $a2, $a2, 56 bne $a0, $a6, .LBB0_28 # %bb.29: # %.preheader328.preheader - pcalau12i $a0, %pc_hi20(.LCPI0_28) - vld $vr0, $a0, %pc_lo12(.LCPI0_28) + pcalau12i $a0, %pc_hi20(.LCPI0_19) + vld $vr0, $a0, %pc_lo12(.LCPI0_19) pcalau12i $a0, %pc_hi20(i_slen2) addi.d $a0, $a0, %pc_lo12(i_slen2) - pcalau12i $a1, %pc_hi20(.LCPI0_29) - vld $vr1, $a1, %pc_lo12(.LCPI0_29) - pcalau12i $a1, %pc_hi20(.LCPI0_30) - vld $vr2, $a1, %pc_lo12(.LCPI0_30) + pcalau12i $a1, %pc_hi20(.LCPI0_20) + vld $vr1, $a1, %pc_lo12(.LCPI0_20) + pcalau12i $a1, %pc_hi20(.LCPI0_21) + vld $vr2, $a1, %pc_lo12(.LCPI0_21) vst $vr0, $a0, 0 vst $vr1, $a0, 16 vst $vr2, $a0, 32 - pcalau12i $a1, %pc_hi20(.LCPI0_31) - vld $vr0, $a1, %pc_lo12(.LCPI0_31) - pcalau12i $a1, %pc_hi20(.LCPI0_32) - vld $vr1, $a1, %pc_lo12(.LCPI0_32) - pcalau12i $a1, %pc_hi20(.LCPI0_33) - vld $vr2, $a1, %pc_lo12(.LCPI0_33) - pcalau12i $a1, %pc_hi20(.LCPI0_34) - vld $vr3, $a1, %pc_lo12(.LCPI0_34) + pcalau12i $a1, %pc_hi20(.LCPI0_22) + vld $vr0, $a1, %pc_lo12(.LCPI0_22) + pcalau12i $a1, %pc_hi20(.LCPI0_23) + vld $vr1, $a1, %pc_lo12(.LCPI0_23) + pcalau12i $a1, %pc_hi20(.LCPI0_24) + vld $vr2, $a1, %pc_lo12(.LCPI0_24) + pcalau12i $a1, %pc_hi20(.LCPI0_25) + vld $vr3, $a1, %pc_lo12(.LCPI0_25) vst $vr0, $a0, 48 vst $vr1, $a0, 64 vst $vr2, $a0, 80 vst $vr3, $a0, 96 - pcalau12i $a1, %pc_hi20(.LCPI0_35) - vld $vr0, $a1, %pc_lo12(.LCPI0_35) - pcalau12i $a1, %pc_hi20(.LCPI0_36) - vld $vr1, $a1, %pc_lo12(.LCPI0_36) - pcalau12i $a1, %pc_hi20(.LCPI0_37) - vld $vr2, $a1, %pc_lo12(.LCPI0_37) - pcalau12i $a1, %pc_hi20(.LCPI0_38) - vld $vr3, $a1, %pc_lo12(.LCPI0_38) + pcalau12i $a1, %pc_hi20(.LCPI0_26) + vld $vr0, $a1, %pc_lo12(.LCPI0_26) + pcalau12i $a1, %pc_hi20(.LCPI0_27) + vld $vr1, $a1, %pc_lo12(.LCPI0_27) + pcalau12i $a1, %pc_hi20(.LCPI0_28) + vld $vr2, $a1, %pc_lo12(.LCPI0_28) + pcalau12i $a1, %pc_hi20(.LCPI0_29) + vld $vr3, $a1, %pc_lo12(.LCPI0_29) vst $vr0, $a0, 112 vst $vr1, $a0, 128 vst $vr2, $a0, 144 vst $vr3, $a0, 160 - pcalau12i $a1, %pc_hi20(.LCPI0_39) - vld $vr0, $a1, %pc_lo12(.LCPI0_39) - pcalau12i $a1, %pc_hi20(.LCPI0_40) - vld $vr1, $a1, %pc_lo12(.LCPI0_40) - pcalau12i $a1, %pc_hi20(.LCPI0_41) - vld $vr2, $a1, %pc_lo12(.LCPI0_41) - pcalau12i $a1, %pc_hi20(.LCPI0_42) - vld $vr3, $a1, %pc_lo12(.LCPI0_42) + pcalau12i $a1, %pc_hi20(.LCPI0_30) + vld $vr0, $a1, %pc_lo12(.LCPI0_30) + pcalau12i $a1, %pc_hi20(.LCPI0_31) + vld $vr1, $a1, %pc_lo12(.LCPI0_31) + pcalau12i $a1, %pc_hi20(.LCPI0_32) + vld $vr2, $a1, %pc_lo12(.LCPI0_32) + pcalau12i $a1, %pc_hi20(.LCPI0_33) + vld $vr3, $a1, %pc_lo12(.LCPI0_33) vst $vr0, $a0, 176 vst $vr1, $a0, 192 vst $vr2, $a0, 208 vst $vr3, $a0, 224 - pcalau12i $a1, %pc_hi20(.LCPI0_43) - vld $vr0, $a1, %pc_lo12(.LCPI0_43) - pcalau12i $a1, %pc_hi20(.LCPI0_44) - vld $vr1, $a1, %pc_lo12(.LCPI0_44) - pcalau12i $a1, %pc_hi20(.LCPI0_45) - vld $vr2, $a1, %pc_lo12(.LCPI0_45) - pcalau12i $a1, %pc_hi20(.LCPI0_46) - vld $vr3, $a1, %pc_lo12(.LCPI0_46) + pcalau12i $a1, %pc_hi20(.LCPI0_34) + vld $vr0, $a1, %pc_lo12(.LCPI0_34) + pcalau12i $a1, %pc_hi20(.LCPI0_35) + vld $vr1, $a1, %pc_lo12(.LCPI0_35) + pcalau12i $a1, %pc_hi20(.LCPI0_36) + vld $vr2, $a1, %pc_lo12(.LCPI0_36) + pcalau12i $a1, %pc_hi20(.LCPI0_37) + vld $vr3, $a1, %pc_lo12(.LCPI0_37) vst $vr0, $a0, 240 vst $vr1, $a0, 256 vst $vr2, $a0, 272 vst $vr3, $a0, 288 - pcalau12i $a1, %pc_hi20(.LCPI0_47) - vld $vr0, $a1, %pc_lo12(.LCPI0_47) - pcalau12i $a1, %pc_hi20(.LCPI0_48) - vld $vr1, $a1, %pc_lo12(.LCPI0_48) - pcalau12i $a1, %pc_hi20(.LCPI0_49) - vld $vr2, $a1, %pc_lo12(.LCPI0_49) - pcalau12i $a1, %pc_hi20(.LCPI0_50) - vld $vr3, $a1, %pc_lo12(.LCPI0_50) + pcalau12i $a1, %pc_hi20(.LCPI0_38) + vld $vr0, $a1, %pc_lo12(.LCPI0_38) + pcalau12i $a1, %pc_hi20(.LCPI0_39) + vld $vr1, $a1, %pc_lo12(.LCPI0_39) + pcalau12i $a1, %pc_hi20(.LCPI0_40) + vld $vr2, $a1, %pc_lo12(.LCPI0_40) + pcalau12i $a1, %pc_hi20(.LCPI0_41) + vld $vr3, $a1, %pc_lo12(.LCPI0_41) vst $vr0, $a0, 304 vst $vr1, $a0, 320 vst $vr2, $a0, 336 vst $vr3, $a0, 352 - pcalau12i $a1, %pc_hi20(.LCPI0_51) - vld $vr0, $a1, %pc_lo12(.LCPI0_51) - pcalau12i $a1, %pc_hi20(.LCPI0_52) - vld $vr1, $a1, %pc_lo12(.LCPI0_52) - pcalau12i $a1, %pc_hi20(.LCPI0_53) - vld $vr2, $a1, %pc_lo12(.LCPI0_53) - pcalau12i $a1, %pc_hi20(.LCPI0_54) - vld $vr3, $a1, %pc_lo12(.LCPI0_54) + pcalau12i $a1, %pc_hi20(.LCPI0_42) + vld $vr0, $a1, %pc_lo12(.LCPI0_42) + pcalau12i $a1, %pc_hi20(.LCPI0_43) + vld $vr1, $a1, %pc_lo12(.LCPI0_43) + pcalau12i $a1, %pc_hi20(.LCPI0_44) + vld $vr2, $a1, %pc_lo12(.LCPI0_44) + pcalau12i $a1, %pc_hi20(.LCPI0_45) + vld $vr3, $a1, %pc_lo12(.LCPI0_45) vst $vr0, $a0, 368 vst $vr1, $a0, 384 vst $vr2, $a0, 400 vst $vr3, $a0, 416 - pcalau12i $a1, %pc_hi20(.LCPI0_55) - vld $vr0, $a1, %pc_lo12(.LCPI0_55) - pcalau12i $a1, %pc_hi20(.LCPI0_56) - vld $vr1, $a1, %pc_lo12(.LCPI0_56) - pcalau12i $a1, %pc_hi20(.LCPI0_57) - vld $vr2, $a1, %pc_lo12(.LCPI0_57) - pcalau12i $a1, %pc_hi20(.LCPI0_58) - vld $vr3, $a1, %pc_lo12(.LCPI0_58) + pcalau12i $a1, %pc_hi20(.LCPI0_46) + vld $vr0, $a1, %pc_lo12(.LCPI0_46) + pcalau12i $a1, %pc_hi20(.LCPI0_47) + vld $vr1, $a1, %pc_lo12(.LCPI0_47) + pcalau12i $a1, %pc_hi20(.LCPI0_48) + vld $vr2, $a1, %pc_lo12(.LCPI0_48) + pcalau12i $a1, %pc_hi20(.LCPI0_49) + vld $vr3, $a1, %pc_lo12(.LCPI0_49) vst $vr0, $a0, 432 vst $vr1, $a0, 448 vst $vr2, $a0, 464 vst $vr3, $a0, 480 - pcalau12i $a1, %pc_hi20(.LCPI0_59) - vld $vr0, $a1, %pc_lo12(.LCPI0_59) - pcalau12i $a1, %pc_hi20(.LCPI0_60) - vld $vr1, $a1, %pc_lo12(.LCPI0_60) - pcalau12i $a1, %pc_hi20(.LCPI0_61) - vld $vr2, $a1, %pc_lo12(.LCPI0_61) - pcalau12i $a1, %pc_hi20(.LCPI0_62) - vld $vr3, $a1, %pc_lo12(.LCPI0_62) + pcalau12i $a1, %pc_hi20(.LCPI0_50) + vld $vr0, $a1, %pc_lo12(.LCPI0_50) + pcalau12i $a1, %pc_hi20(.LCPI0_51) + vld $vr1, $a1, %pc_lo12(.LCPI0_51) + pcalau12i $a1, %pc_hi20(.LCPI0_52) + vld $vr2, $a1, %pc_lo12(.LCPI0_52) + pcalau12i $a1, %pc_hi20(.LCPI0_53) + vld $vr3, $a1, %pc_lo12(.LCPI0_53) vst $vr0, $a0, 496 vst $vr1, $a0, 512 vst $vr2, $a0, 528 vst $vr3, $a0, 544 - pcalau12i $a1, %pc_hi20(.LCPI0_63) - vld $vr0, $a1, %pc_lo12(.LCPI0_63) - pcalau12i $a1, %pc_hi20(.LCPI0_64) - vld $vr1, $a1, %pc_lo12(.LCPI0_64) - pcalau12i $a1, %pc_hi20(.LCPI0_65) - vld $vr2, $a1, %pc_lo12(.LCPI0_65) - pcalau12i $a1, %pc_hi20(.LCPI0_66) - vld $vr3, $a1, %pc_lo12(.LCPI0_66) + pcalau12i $a1, %pc_hi20(.LCPI0_54) + vld $vr0, $a1, %pc_lo12(.LCPI0_54) + pcalau12i $a1, %pc_hi20(.LCPI0_55) + vld $vr1, $a1, %pc_lo12(.LCPI0_55) + pcalau12i $a1, %pc_hi20(.LCPI0_56) + vld $vr2, $a1, %pc_lo12(.LCPI0_56) + pcalau12i $a1, %pc_hi20(.LCPI0_57) + vld $vr3, $a1, %pc_lo12(.LCPI0_57) vst $vr0, $a0, 560 vst $vr1, $a0, 576 vst $vr2, $a0, 592 vst $vr3, $a0, 608 - pcalau12i $a1, %pc_hi20(.LCPI0_67) - vld $vr0, $a1, %pc_lo12(.LCPI0_67) - pcalau12i $a1, %pc_hi20(.LCPI0_68) - vld $vr1, $a1, %pc_lo12(.LCPI0_68) - pcalau12i $a1, %pc_hi20(.LCPI0_69) - vld $vr2, $a1, %pc_lo12(.LCPI0_69) - pcalau12i $a1, %pc_hi20(.LCPI0_70) - vld $vr3, $a1, %pc_lo12(.LCPI0_70) + pcalau12i $a1, %pc_hi20(.LCPI0_58) + vld $vr0, $a1, %pc_lo12(.LCPI0_58) + pcalau12i $a1, %pc_hi20(.LCPI0_59) + vld $vr1, $a1, %pc_lo12(.LCPI0_59) + pcalau12i $a1, %pc_hi20(.LCPI0_60) + vld $vr2, $a1, %pc_lo12(.LCPI0_60) + pcalau12i $a1, %pc_hi20(.LCPI0_61) + vld $vr3, $a1, %pc_lo12(.LCPI0_61) vst $vr0, $a0, 624 vst $vr1, $a0, 640 vst $vr2, $a0, 656 vst $vr3, $a0, 672 - pcalau12i $a1, %pc_hi20(.LCPI0_71) - vld $vr0, $a1, %pc_lo12(.LCPI0_71) - pcalau12i $a1, %pc_hi20(.LCPI0_72) - vld $vr1, $a1, %pc_lo12(.LCPI0_72) - pcalau12i $a1, %pc_hi20(.LCPI0_73) - vld $vr2, $a1, %pc_lo12(.LCPI0_73) - pcalau12i $a1, %pc_hi20(.LCPI0_74) - vld $vr3, $a1, %pc_lo12(.LCPI0_74) + pcalau12i $a1, %pc_hi20(.LCPI0_62) + vld $vr0, $a1, %pc_lo12(.LCPI0_62) + pcalau12i $a1, %pc_hi20(.LCPI0_63) + vld $vr1, $a1, %pc_lo12(.LCPI0_63) + pcalau12i $a1, %pc_hi20(.LCPI0_64) + vld $vr2, $a1, %pc_lo12(.LCPI0_64) + pcalau12i $a1, %pc_hi20(.LCPI0_65) + vld $vr3, $a1, %pc_lo12(.LCPI0_65) vst $vr0, $a0, 688 vst $vr1, $a0, 704 vst $vr2, $a0, 720 vst $vr3, $a0, 736 - pcalau12i $a1, %pc_hi20(.LCPI0_75) - vld $vr0, $a1, %pc_lo12(.LCPI0_75) - pcalau12i $a1, %pc_hi20(.LCPI0_76) - vld $vr1, $a1, %pc_lo12(.LCPI0_76) - pcalau12i $a1, %pc_hi20(.LCPI0_77) - vld $vr2, $a1, %pc_lo12(.LCPI0_77) - pcalau12i $a1, %pc_hi20(.LCPI0_78) - vld $vr3, $a1, %pc_lo12(.LCPI0_78) + pcalau12i $a1, %pc_hi20(.LCPI0_66) + vld $vr0, $a1, %pc_lo12(.LCPI0_66) + pcalau12i $a1, %pc_hi20(.LCPI0_67) + vld $vr1, $a1, %pc_lo12(.LCPI0_67) + pcalau12i $a1, %pc_hi20(.LCPI0_68) + vld $vr2, $a1, %pc_lo12(.LCPI0_68) + pcalau12i $a1, %pc_hi20(.LCPI0_69) + vld $vr3, $a1, %pc_lo12(.LCPI0_69) vst $vr0, $a0, 752 vst $vr1, $a0, 768 vst $vr2, $a0, 784 vst $vr3, $a0, 800 - pcalau12i $a1, %pc_hi20(.LCPI0_79) - vld $vr0, $a1, %pc_lo12(.LCPI0_79) - pcalau12i $a1, %pc_hi20(.LCPI0_80) - vld $vr1, $a1, %pc_lo12(.LCPI0_80) - pcalau12i $a1, %pc_hi20(.LCPI0_81) - vld $vr2, $a1, %pc_lo12(.LCPI0_81) - pcalau12i $a1, %pc_hi20(.LCPI0_82) - vld $vr3, $a1, %pc_lo12(.LCPI0_82) + pcalau12i $a1, %pc_hi20(.LCPI0_70) + vld $vr0, $a1, %pc_lo12(.LCPI0_70) + pcalau12i $a1, %pc_hi20(.LCPI0_71) + vld $vr1, $a1, %pc_lo12(.LCPI0_71) + pcalau12i $a1, %pc_hi20(.LCPI0_72) + vld $vr2, $a1, %pc_lo12(.LCPI0_72) + pcalau12i $a1, %pc_hi20(.LCPI0_73) + vld $vr3, $a1, %pc_lo12(.LCPI0_73) vst $vr0, $a0, 816 vst $vr1, $a0, 832 vst $vr2, $a0, 848 vst $vr3, $a0, 864 - pcalau12i $a1, %pc_hi20(.LCPI0_83) - vld $vr0, $a1, %pc_lo12(.LCPI0_83) - pcalau12i $a1, %pc_hi20(.LCPI0_84) - vld $vr1, $a1, %pc_lo12(.LCPI0_84) - pcalau12i $a1, %pc_hi20(.LCPI0_85) - vld $vr2, $a1, %pc_lo12(.LCPI0_85) - pcalau12i $a1, %pc_hi20(.LCPI0_86) - vld $vr3, $a1, %pc_lo12(.LCPI0_86) + pcalau12i $a1, %pc_hi20(.LCPI0_74) + vld $vr0, $a1, %pc_lo12(.LCPI0_74) + pcalau12i $a1, %pc_hi20(.LCPI0_75) + vld $vr1, $a1, %pc_lo12(.LCPI0_75) + pcalau12i $a1, %pc_hi20(.LCPI0_76) + vld $vr2, $a1, %pc_lo12(.LCPI0_76) + pcalau12i $a1, %pc_hi20(.LCPI0_77) + vld $vr3, $a1, %pc_lo12(.LCPI0_77) vst $vr0, $a0, 880 vst $vr1, $a0, 896 vst $vr2, $a0, 912 vst $vr3, $a0, 928 - pcalau12i $a1, %pc_hi20(.LCPI0_87) - vld $vr0, $a1, %pc_lo12(.LCPI0_87) - pcalau12i $a1, %pc_hi20(.LCPI0_88) - vld $vr1, $a1, %pc_lo12(.LCPI0_88) - pcalau12i $a1, %pc_hi20(.LCPI0_89) - vld $vr2, $a1, %pc_lo12(.LCPI0_89) - pcalau12i $a1, %pc_hi20(.LCPI0_90) - vld $vr3, $a1, %pc_lo12(.LCPI0_90) + pcalau12i $a1, %pc_hi20(.LCPI0_78) + vld $vr0, $a1, %pc_lo12(.LCPI0_78) + pcalau12i $a1, %pc_hi20(.LCPI0_79) + vld $vr1, $a1, %pc_lo12(.LCPI0_79) + pcalau12i $a1, %pc_hi20(.LCPI0_80) + vld $vr2, $a1, %pc_lo12(.LCPI0_80) + pcalau12i $a1, %pc_hi20(.LCPI0_81) + vld $vr3, $a1, %pc_lo12(.LCPI0_81) vst $vr0, $a0, 944 vst $vr1, $a0, 960 vst $vr2, $a0, 976 @@ -2618,14 +2614,14 @@ init_layer3: # @init_layer3 move $a2, $zero move $a3, $zero vst $vr3, $a1, 2000 - pcalau12i $a4, %pc_hi20(.LCPI0_91) - vld $vr0, $a4, %pc_lo12(.LCPI0_91) - pcalau12i $a4, %pc_hi20(.LCPI0_92) - vld $vr1, $a4, %pc_lo12(.LCPI0_92) - pcalau12i $a4, %pc_hi20(.LCPI0_93) - vld $vr2, $a4, %pc_lo12(.LCPI0_93) - pcalau12i $a4, %pc_hi20(.LCPI0_94) - vld $vr3, $a4, %pc_lo12(.LCPI0_94) + pcalau12i $a4, %pc_hi20(.LCPI0_82) + vld $vr0, $a4, %pc_lo12(.LCPI0_82) + pcalau12i $a4, %pc_hi20(.LCPI0_83) + vld $vr1, $a4, %pc_lo12(.LCPI0_83) + pcalau12i $a4, %pc_hi20(.LCPI0_84) + vld $vr2, $a4, %pc_lo12(.LCPI0_84) + pcalau12i $a4, %pc_hi20(.LCPI0_85) + vld $vr3, $a4, %pc_lo12(.LCPI0_85) vst $vr0, $a0, 992 vst $vr1, $a1, 2016 vst $vr2, $a0, 1008 @@ -2635,18 +2631,18 @@ init_layer3: # @init_layer3 .p2align 4, , 16 .LBB0_30: # %.preheader320 # =>This Inner Loop Header: Depth=1 - pcalau12i $a5, %pc_hi20(.LCPI0_95) - vld $vr0, $a5, %pc_lo12(.LCPI0_95) - pcalau12i $a5, %pc_hi20(.LCPI0_96) - vld $vr1, $a5, %pc_lo12(.LCPI0_96) + pcalau12i $a5, %pc_hi20(.LCPI0_86) + vld $vr0, $a5, %pc_lo12(.LCPI0_86) + pcalau12i $a5, %pc_hi20(.LCPI0_87) + vld $vr1, $a5, %pc_lo12(.LCPI0_87) vreplgr2vr.w $vr2, $a2 vadd.w $vr3, $vr2, $vr0 vadd.w $vr2, $vr2, $vr1 vst $vr2, $a0, -144 - pcalau12i $a5, %pc_hi20(.LCPI0_97) - vld $vr2, $a5, %pc_lo12(.LCPI0_97) - pcalau12i $a5, %pc_hi20(.LCPI0_98) - vld $vr4, $a5, %pc_lo12(.LCPI0_98) + pcalau12i $a5, %pc_hi20(.LCPI0_88) + vld $vr2, $a5, %pc_lo12(.LCPI0_88) + pcalau12i $a5, %pc_hi20(.LCPI0_89) + vld $vr4, $a5, %pc_lo12(.LCPI0_89) vst $vr3, $a0, -160 vreplgr2vr.w $vr3, $a3 vadd.w $vr5, $vr3, $vr2 @@ -2698,77 +2694,77 @@ init_layer3: # @init_layer3 addi.d $a0, $a0, 320 bne $a3, $a4, .LBB0_30 # %bb.31: # %.preheader316.preheader - pcalau12i $a0, %pc_hi20(.LCPI0_99) - vld $vr0, $a0, %pc_lo12(.LCPI0_99) + pcalau12i $a0, %pc_hi20(.LCPI0_90) + vld $vr0, $a0, %pc_lo12(.LCPI0_90) vst $vr0, $a1, 1600 - pcalau12i $a0, %pc_hi20(.LCPI0_100) - vld $vr0, $a0, %pc_lo12(.LCPI0_100) - pcalau12i $a0, %pc_hi20(.LCPI0_101) - vld $vr1, $a0, %pc_lo12(.LCPI0_101) - pcalau12i $a0, %pc_hi20(.LCPI0_102) - vld $vr2, $a0, %pc_lo12(.LCPI0_102) - pcalau12i $a0, %pc_hi20(.LCPI0_103) - vld $vr3, $a0, %pc_lo12(.LCPI0_103) + pcalau12i $a0, %pc_hi20(.LCPI0_91) + vld $vr0, $a0, %pc_lo12(.LCPI0_91) + pcalau12i $a0, %pc_hi20(.LCPI0_92) + vld $vr1, $a0, %pc_lo12(.LCPI0_92) + pcalau12i $a0, %pc_hi20(.LCPI0_93) + vld $vr2, $a0, %pc_lo12(.LCPI0_93) + pcalau12i $a0, %pc_hi20(.LCPI0_94) + vld $vr3, $a0, %pc_lo12(.LCPI0_94) vst $vr0, $a1, 1616 vst $vr1, $a1, 1632 vst $vr2, $a1, 1648 vst $vr3, $a1, 1664 - pcalau12i $a0, %pc_hi20(.LCPI0_104) - vld $vr0, $a0, %pc_lo12(.LCPI0_104) - pcalau12i $a0, %pc_hi20(.LCPI0_105) - vld $vr1, $a0, %pc_lo12(.LCPI0_105) - pcalau12i $a0, %pc_hi20(.LCPI0_106) - vld $vr2, $a0, %pc_lo12(.LCPI0_106) - pcalau12i $a0, %pc_hi20(.LCPI0_107) - vld $vr3, $a0, %pc_lo12(.LCPI0_107) + pcalau12i $a0, %pc_hi20(.LCPI0_95) + vld $vr0, $a0, %pc_lo12(.LCPI0_95) + pcalau12i $a0, %pc_hi20(.LCPI0_96) + vld $vr1, $a0, %pc_lo12(.LCPI0_96) + pcalau12i $a0, %pc_hi20(.LCPI0_97) + vld $vr2, $a0, %pc_lo12(.LCPI0_97) + pcalau12i $a0, %pc_hi20(.LCPI0_98) + vld $vr3, $a0, %pc_lo12(.LCPI0_98) vst $vr0, $a1, 1680 vst $vr1, $a1, 1696 vst $vr2, $a1, 1712 vst $vr3, $a1, 1728 - pcalau12i $a0, %pc_hi20(.LCPI0_108) - vld $vr0, $a0, %pc_lo12(.LCPI0_108) - pcalau12i $a0, %pc_hi20(.LCPI0_109) - vld $vr1, $a0, %pc_lo12(.LCPI0_109) - pcalau12i $a0, %pc_hi20(.LCPI0_110) - vld $vr2, $a0, %pc_lo12(.LCPI0_110) - pcalau12i $a0, %pc_hi20(.LCPI0_111) - vld $vr3, $a0, %pc_lo12(.LCPI0_111) + pcalau12i $a0, %pc_hi20(.LCPI0_99) + vld $vr0, $a0, %pc_lo12(.LCPI0_99) + pcalau12i $a0, %pc_hi20(.LCPI0_100) + vld $vr1, $a0, %pc_lo12(.LCPI0_100) + pcalau12i $a0, %pc_hi20(.LCPI0_101) + vld $vr2, $a0, %pc_lo12(.LCPI0_101) + pcalau12i $a0, %pc_hi20(.LCPI0_102) + vld $vr3, $a0, %pc_lo12(.LCPI0_102) vst $vr0, $a1, 1744 vst $vr1, $a1, 1760 vst $vr2, $a1, 1776 vst $vr3, $a1, 1792 - pcalau12i $a0, %pc_hi20(.LCPI0_112) - vld $vr0, $a0, %pc_lo12(.LCPI0_112) - pcalau12i $a0, %pc_hi20(.LCPI0_113) - vld $vr1, $a0, %pc_lo12(.LCPI0_113) - pcalau12i $a0, %pc_hi20(.LCPI0_114) - vld $vr2, $a0, %pc_lo12(.LCPI0_114) - pcalau12i $a0, %pc_hi20(.LCPI0_115) - vld $vr3, $a0, %pc_lo12(.LCPI0_115) + pcalau12i $a0, %pc_hi20(.LCPI0_103) + vld $vr0, $a0, %pc_lo12(.LCPI0_103) + pcalau12i $a0, %pc_hi20(.LCPI0_104) + vld $vr1, $a0, %pc_lo12(.LCPI0_104) + pcalau12i $a0, %pc_hi20(.LCPI0_105) + vld $vr2, $a0, %pc_lo12(.LCPI0_105) + pcalau12i $a0, %pc_hi20(.LCPI0_106) + vld $vr3, $a0, %pc_lo12(.LCPI0_106) vst $vr0, $a1, 1808 vst $vr1, $a1, 1824 vst $vr2, $a1, 1840 vst $vr3, $a1, 1856 - pcalau12i $a0, %pc_hi20(.LCPI0_116) - vld $vr0, $a0, %pc_lo12(.LCPI0_116) - pcalau12i $a0, %pc_hi20(.LCPI0_117) - vld $vr1, $a0, %pc_lo12(.LCPI0_117) - pcalau12i $a0, %pc_hi20(.LCPI0_118) - vld $vr2, $a0, %pc_lo12(.LCPI0_118) - pcalau12i $a0, %pc_hi20(.LCPI0_119) - vld $vr3, $a0, %pc_lo12(.LCPI0_119) + pcalau12i $a0, %pc_hi20(.LCPI0_107) + vld $vr0, $a0, %pc_lo12(.LCPI0_107) + pcalau12i $a0, %pc_hi20(.LCPI0_108) + vld $vr1, $a0, %pc_lo12(.LCPI0_108) + pcalau12i $a0, %pc_hi20(.LCPI0_109) + vld $vr2, $a0, %pc_lo12(.LCPI0_109) + pcalau12i $a0, %pc_hi20(.LCPI0_110) + vld $vr3, $a0, %pc_lo12(.LCPI0_110) vst $vr0, $a1, 1872 vst $vr1, $a1, 1888 vst $vr2, $a1, 1904 vst $vr3, $a1, 1920 - pcalau12i $a0, %pc_hi20(.LCPI0_120) - vld $vr0, $a0, %pc_lo12(.LCPI0_120) - pcalau12i $a0, %pc_hi20(.LCPI0_121) - vld $vr1, $a0, %pc_lo12(.LCPI0_121) - pcalau12i $a0, %pc_hi20(.LCPI0_122) - vld $vr2, $a0, %pc_lo12(.LCPI0_122) - pcalau12i $a0, %pc_hi20(.LCPI0_123) - vld $vr3, $a0, %pc_lo12(.LCPI0_123) + pcalau12i $a0, %pc_hi20(.LCPI0_111) + vld $vr0, $a0, %pc_lo12(.LCPI0_111) + pcalau12i $a0, %pc_hi20(.LCPI0_112) + vld $vr1, $a0, %pc_lo12(.LCPI0_112) + pcalau12i $a0, %pc_hi20(.LCPI0_113) + vld $vr2, $a0, %pc_lo12(.LCPI0_113) + pcalau12i $a0, %pc_hi20(.LCPI0_114) + vld $vr3, $a0, %pc_lo12(.LCPI0_114) vst $vr0, $a1, 1936 vst $vr1, $a1, 1952 vst $vr2, $a1, 1968 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/newmdct.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/newmdct.s index 1a2814db..3531e8f2 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/newmdct.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/newmdct.s @@ -1,10 +1,6 @@ .file "newmdct.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function mdct_sub48 -.LCPI0_0: - .dword 0x3ff921fb54442d18 # double 1.5707963267948966 .text - .globl mdct_sub48 + .globl mdct_sub48 # -- Begin function mdct_sub48 .p2align 5 .type mdct_sub48,@function mdct_sub48: # @mdct_sub48 @@ -31,9 +27,9 @@ mdct_sub48: # @mdct_sub48 fst.d $fs7, $sp, 472 # 8-byte Folded Spill pcalau12i $fp, %pc_hi20(mdct_sub48.init) ld.w $a5, $fp, %pc_lo12(mdct_sub48.init) - st.d $a4, $sp, 32 # 8-byte Folded Spill - st.d $a3, $sp, 24 # 8-byte Folded Spill - st.d $a2, $sp, 48 # 8-byte Folded Spill + st.d $a4, $sp, 24 # 8-byte Folded Spill + st.d $a3, $sp, 16 # 8-byte Folded Spill + st.d $a2, $sp, 40 # 8-byte Folded Spill move $s3, $a1 move $s2, $a0 bnez $a5, .LBB0_2 @@ -52,23 +48,29 @@ mdct_sub48: # @mdct_sub48 move $fp, $zero lu12i.w $a0, 1 ori $a0, $a0, 520 - st.d $a1, $sp, 16 # 8-byte Folded Spill + st.d $a1, $sp, 8 # 8-byte Folded Spill add.d $s0, $a1, $a0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs5, $a0 lu12i.w $a0, -2 ori $a0, $a0, 3584 - st.d $a0, $sp, 72 # 8-byte Folded Spill + st.d $a0, $sp, 64 # 8-byte Folded Spill lu12i.w $a0, 2 ori $a0, $a0, 1024 - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill + fst.d $fs5, $sp, 72 # 8-byte Folded Spill b .LBB0_5 .p2align 4, , 16 .LBB0_4: # %._crit_edge.thread # in Loop: Header=BB0_5 Depth=1 ld.w $a0, $s2, 204 addi.d $fp, $fp, 1 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a1, $sp, 32 # 8-byte Folded Reload add.d $s0, $s0, $a1 - ld.d $s3, $sp, 48 # 8-byte Folded Reload + ld.d $s3, $sp, 40 # 8-byte Folded Reload bge $fp, $a0, .LBB0_35 .LBB0_5: # %.preheader215 # =>This Loop Header: Depth=1 @@ -84,20 +86,20 @@ mdct_sub48: # @mdct_sub48 move $s7, $zero alsl.d $a0, $fp, $fp, 3 slli.d $a1, $a0, 9 - ld.d $a2, $sp, 24 # 8-byte Folded Reload + ld.d $a2, $sp, 16 # 8-byte Folded Reload add.d $a1, $a2, $a1 st.d $a1, $sp, 88 # 8-byte Folded Spill slli.d $a0, $a0, 10 - ld.d $a1, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 8 # 8-byte Folded Reload add.d $s4, $a1, $a0 - st.d $fp, $sp, 64 # 8-byte Folded Spill + st.d $fp, $sp, 56 # 8-byte Folded Spill ori $a0, $zero, 120 mul.d $a0, $fp, $a0 - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ld.d $a1, $sp, 24 # 8-byte Folded Reload add.d $a0, $a1, $a0 addi.d $a0, $a0, 72 st.d $a0, $sp, 80 # 8-byte Folded Spill - st.d $s0, $sp, 56 # 8-byte Folded Spill + st.d $s0, $sp, 48 # 8-byte Folded Spill st.d $s0, $sp, 112 # 8-byte Folded Spill st.d $s4, $sp, 96 # 8-byte Folded Spill b .LBB0_8 @@ -106,10 +108,11 @@ mdct_sub48: # @mdct_sub48 ld.w $a0, $s2, 200 ld.d $s7, $sp, 104 # 8-byte Folded Reload addi.d $s7, $s7, 1 - ld.d $a1, $sp, 72 # 8-byte Folded Reload + ld.d $a1, $sp, 64 # 8-byte Folded Reload ld.d $a2, $sp, 112 # 8-byte Folded Reload add.d $a2, $a2, $a1 st.d $a2, $sp, 112 # 8-byte Folded Spill + fld.d $fs5, $sp, 72 # 8-byte Folded Reload ld.d $s4, $sp, 96 # 8-byte Folded Reload bge $s7, $a0, .LBB0_33 .LBB0_8: # Parent Loop BB0_5 Depth=1 @@ -235,10 +238,8 @@ mdct_sub48: # @mdct_sub48 bcnez $fcc0, .LBB0_17 # %bb.16: # %.loopexit213.loopexit # in Loop: Header=BB0_14 Depth=3 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) - fsub.d $fa2, $fs2, $fs4 - fmul.d $fa1, $fa2, $fa1 + fsub.d $fa1, $fs2, $fs4 + fmul.d $fa1, $fa1, $fs5 fsub.s $fa0, $fa0, $fs0 fcvt.d.s $fa0, $fa0 fdiv.d $fa0, $fa1, $fa0 @@ -319,10 +320,8 @@ mdct_sub48: # @mdct_sub48 bcnez $fcc0, .LBB0_13 # %bb.19: # %.loopexit212.loopexit # in Loop: Header=BB0_14 Depth=3 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_0) fsub.d $fa1, $fa1, $fs4 - fmul.d $fa1, $fa1, $fa2 + fmul.d $fa1, $fa1, $fs5 fsub.s $fa0, $fa0, $fs1 fcvt.d.s $fa0, $fa0 fdiv.d $fa0, $fa1, $fa0 @@ -446,31 +445,31 @@ mdct_sub48: # @mdct_sub48 pcalau12i $a0, %pc_hi20(ca.7) fld.d $fa0, $a0, %pc_lo12(ca.7) pcalau12i $a0, %pc_hi20(cs.7) - fld.d $fs2, $a0, %pc_lo12(cs.7) + fld.d $fs3, $a0, %pc_lo12(cs.7) pcalau12i $a0, %pc_hi20(ca.6) fld.d $fa1, $a0, %pc_lo12(ca.6) pcalau12i $a0, %pc_hi20(cs.6) - fld.d $fs4, $a0, %pc_lo12(cs.6) + fld.d $fs5, $a0, %pc_lo12(cs.6) pcalau12i $a0, %pc_hi20(ca.5) fld.d $fa2, $a0, %pc_lo12(ca.5) pcalau12i $a0, %pc_hi20(cs.5) - fld.d $fs6, $a0, %pc_lo12(cs.5) + fld.d $fs7, $a0, %pc_lo12(cs.5) pcalau12i $a0, %pc_hi20(ca.4) fld.d $fa3, $a0, %pc_lo12(ca.4) pcalau12i $a0, %pc_hi20(cs.4) - fld.d $fs0, $a0, %pc_lo12(cs.4) + fld.d $fs1, $a0, %pc_lo12(cs.4) pcalau12i $a0, %pc_hi20(ca.3) fld.d $fa4, $a0, %pc_lo12(ca.3) pcalau12i $a0, %pc_hi20(cs.3) - fld.d $fs3, $a0, %pc_lo12(cs.3) + fld.d $fs4, $a0, %pc_lo12(cs.3) pcalau12i $a0, %pc_hi20(ca.2) fld.d $fa5, $a0, %pc_lo12(ca.2) pcalau12i $a0, %pc_hi20(cs.2) - fld.d $fs7, $a0, %pc_lo12(cs.2) + fld.d $fs0, $a0, %pc_lo12(cs.2) pcalau12i $a0, %pc_hi20(ca.1) fld.d $fa6, $a0, %pc_lo12(ca.1) pcalau12i $a0, %pc_hi20(cs.1) - fld.d $fs5, $a0, %pc_lo12(cs.1) + fld.d $fs6, $a0, %pc_lo12(cs.1) pcalau12i $a0, %pc_hi20(ca.0) fld.d $fa7, $a0, %pc_lo12(ca.0) pcalau12i $a0, %pc_hi20(cs.0) @@ -503,7 +502,7 @@ mdct_sub48: # @mdct_sub48 fst.d $fa4, $sp, 432 # 8-byte Folded Spill fneg.d $fa0, $fa4 fst.d $fa0, $sp, 368 # 8-byte Folded Spill - fld.d $fs1, $a0, %pc_lo12(cs.0) + fld.d $fs2, $a0, %pc_lo12(cs.0) fst.d $fa5, $sp, 424 # 8-byte Folded Spill fneg.d $fa0, $fa5 fst.d $fa0, $sp, 360 # 8-byte Folded Spill @@ -1195,87 +1194,87 @@ mdct_sub48: # @mdct_sub48 # in Loop: Header=BB0_24 Depth=3 fld.d $fa2, $s5, -64 fld.d $fa3, $s5, 56 - fmul.d $fa4, $fa2, $fs2 + fmul.d $fa4, $fa2, $fs3 fld.d $fa5, $sp, 464 # 8-byte Folded Reload fmadd.d $fa4, $fa3, $fa5, $fa4 fld.d $fa5, $sp, 400 # 8-byte Folded Reload fmul.d $fa2, $fa2, $fa5 - fmadd.d $fa2, $fa3, $fs2, $fa2 + fmadd.d $fa2, $fa3, $fs3, $fa2 fld.d $fa3, $s5, -56 fld.d $fa5, $s5, 48 fst.d $fa4, $s5, -64 fst.d $fa2, $s5, 56 - fmul.d $fa2, $fa3, $fs4 + fmul.d $fa2, $fa3, $fs5 fld.d $fa4, $sp, 456 # 8-byte Folded Reload fmadd.d $fa2, $fa5, $fa4, $fa2 fld.d $fa4, $sp, 392 # 8-byte Folded Reload fmul.d $fa3, $fa3, $fa4 - fmadd.d $fa3, $fa5, $fs4, $fa3 + fmadd.d $fa3, $fa5, $fs5, $fa3 fld.d $fa4, $s5, -48 fld.d $fa5, $s5, 40 fst.d $fa2, $s5, -56 fst.d $fa3, $s5, 48 - fmul.d $fa2, $fa4, $fs6 + fmul.d $fa2, $fa4, $fs7 fld.d $fa3, $sp, 448 # 8-byte Folded Reload fmadd.d $fa2, $fa5, $fa3, $fa2 fld.d $fa3, $sp, 384 # 8-byte Folded Reload fmul.d $fa3, $fa4, $fa3 fld.d $fa4, $s5, -40 - fmadd.d $fa3, $fa5, $fs6, $fa3 + fmadd.d $fa3, $fa5, $fs7, $fa3 fst.d $fa2, $s5, -48 fst.d $fa3, $s5, 40 - fmul.d $fa2, $fa4, $fs0 + fmul.d $fa2, $fa4, $fs1 fld.d $fa3, $sp, 440 # 8-byte Folded Reload fmadd.d $fa2, $fa1, $fa3, $fa2 fld.d $fa3, $sp, 376 # 8-byte Folded Reload fmul.d $fa3, $fa4, $fa3 - fmadd.d $fa1, $fa1, $fs0, $fa3 + fmadd.d $fa1, $fa1, $fs1, $fa3 fld.d $fa3, $s5, -32 fld.d $fa4, $s5, 24 fst.d $fa2, $s5, -40 fst.d $fa1, $s5, 32 - fmul.d $fa1, $fa3, $fs3 + fmul.d $fa1, $fa3, $fs4 fld.d $fa2, $sp, 432 # 8-byte Folded Reload fmadd.d $fa1, $fa4, $fa2, $fa1 fld.d $fa2, $sp, 368 # 8-byte Folded Reload fmul.d $fa2, $fa3, $fa2 - fmadd.d $fa2, $fa4, $fs3, $fa2 + fmadd.d $fa2, $fa4, $fs4, $fa2 fld.d $fa3, $s5, -24 fld.d $fa4, $s5, 16 fst.d $fa1, $s5, -32 fst.d $fa2, $s5, 24 - fmul.d $fa1, $fa3, $fs7 + fmul.d $fa1, $fa3, $fs0 fld.d $fa2, $sp, 424 # 8-byte Folded Reload fmadd.d $fa1, $fa4, $fa2, $fa1 fld.d $fa2, $sp, 360 # 8-byte Folded Reload fmul.d $fa2, $fa3, $fa2 fld.d $fa3, $s5, -16 - fmadd.d $fa2, $fa4, $fs7, $fa2 + fmadd.d $fa2, $fa4, $fs0, $fa2 fst.d $fa1, $s5, -24 fst.d $fa2, $s5, 16 - fmul.d $fa1, $fa3, $fs5 + fmul.d $fa1, $fa3, $fs6 fld.d $fa2, $sp, 416 # 8-byte Folded Reload fmadd.d $fa1, $fa0, $fa2, $fa1 fld.d $fa2, $sp, 352 # 8-byte Folded Reload fmul.d $fa2, $fa3, $fa2 - fmadd.d $fa0, $fa0, $fs5, $fa2 + fmadd.d $fa0, $fa0, $fs6, $fa2 fld.d $fa2, $s5, -8 fld.d $fa3, $s5, 0 fst.d $fa1, $s5, -16 fst.d $fa0, $s5, 8 - fmul.d $fa0, $fa2, $fs1 + fmul.d $fa0, $fa2, $fs2 fld.d $fa1, $sp, 408 # 8-byte Folded Reload fmadd.d $fa1, $fa3, $fa1, $fa0 fld.d $fa0, $sp, 344 # 8-byte Folded Reload fmul.d $fa0, $fa2, $fa0 - fmadd.d $fa0, $fa3, $fs1, $fa0 + fmadd.d $fa0, $fa3, $fs2, $fa0 fst.d $fa1, $s5, -8 b .LBB0_22 .p2align 4, , 16 .LBB0_33: # %._crit_edge # in Loop: Header=BB0_5 Depth=1 - ld.d $fp, $sp, 64 # 8-byte Folded Reload - ld.d $s0, $sp, 56 # 8-byte Folded Reload + ld.d $fp, $sp, 56 # 8-byte Folded Reload + ld.d $s0, $sp, 48 # 8-byte Folded Reload ori $a1, $zero, 1 bne $a0, $a1, .LBB0_4 # %bb.34: # in Loop: Header=BB0_5 Depth=1 @@ -1403,247 +1402,111 @@ mdct_sub48: # @mdct_sub48 .LCPI1_29: .dword 0x3fc0b5150f6da2d0 # double 0.13052619222005157 .dword 0x3fa65547c4694e11 # double 0.043619387365336 -.LCPI1_49: +.LCPI1_30: .dword 0xbfb150dd8dd9d8bf # double -0.067640158778746504 .dword 0x3fa5c53b3ed42489 # double 0.042520381373898415 -.LCPI1_50: +.LCPI1_31: .dword 0xbf8db3b3a9a67635 # double -0.014502910246672144 .dword 0x3fb69105d1310418 # double 0.088150371143470685 -.LCPI1_51: +.LCPI1_32: .dword 0xbfba477c4665d2fd # double -0.10265328139014325 .dword 0x3fbc337b2d43eb39 # double 0.11016054015264566 -.LCPI1_52: +.LCPI1_33: .dword 0x3f8db3b3a9a6765a # double 0.014502910246672208 .dword 0x3fba477c4665d2ef # double 0.10265328139014306 -.LCPI1_53: +.LCPI1_34: .dword 0xbfb150dd8dd9d8c0 # double -0.067640158778746517 .dword 0xbfbc337b2d43eb38 # double -0.11016054015264565 -.LCPI1_54: +.LCPI1_35: .dword 0x3fa5c53b3ed424bf # double 0.04252038137389879 .dword 0x3fb69105d1310415 # double 0.088150371143470643 -.LCPI1_55: +.LCPI1_36: .dword 0x3fbc337b2d43eb35 # double 0.11016054015264561 .dword 0x3fa5c53b3ed424be # double 0.042520381373898783 -.LCPI1_56: +.LCPI1_37: .dword 0xbfb69105d1310415 # double -0.088150371143470643 .dword 0x3f8db3b3a9a676b8 # double 0.014502910246672371 -.LCPI1_57: +.LCPI1_38: .dword 0xbfba477c4665d2ea # double -0.10265328139014299 .dword 0xbfb150dd8dd9d8cb # double -0.06764015877874667 -.LCPI1_58: +.LCPI1_39: .dword 0xbfb69105d131040f # double -0.08815037114347056 .dword 0xbfba477c4665d2e8 # double -0.10265328139014296 -.LCPI1_59: +.LCPI1_40: .dword 0xbfbc337b2d43eb34 # double -0.11016054015264559 .dword 0xbfb150dd8dd9d8d2 # double -0.067640158778746767 -.LCPI1_60: +.LCPI1_41: .dword 0xbfa5c53b3ed424d2 # double -0.042520381373898922 .dword 0xbf8db3b3a9a67701 # double -0.014502910246672497 -.LCPI1_61: +.LCPI1_42: .dword 0xbfba477c4665d2f0 # double -0.10265328139014307 .dword 0xbfa5c53b3ed424c1 # double -0.042520381373898804 -.LCPI1_62: +.LCPI1_43: .dword 0x3fa5c53b3ed424d4 # double 0.042520381373898936 .dword 0xbfba477c4665d2e6 # double -0.10265328139014293 -.LCPI1_97: +.LCPI1_44: .dword 0x3fefb9ea92ec689b # double 0.99144486137381038 .dword 0x3fed906bcf328d46 # double 0.92387953251128674 -.LCPI1_98: +.LCPI1_45: .dword 0x3fc0d9fd31c98bf8 # double 0.13165249758739583 .dword 0x3fda827999fcef32 # double 0.41421356237309503 -.LCPI1_100: +.LCPI1_46: .dword 0x3fe37af93f9513ea # double 0.60876142900872066 .dword 0x3fd87de2a6aea964 # double 0.38268343236508984 -.LCPI1_101: +.LCPI1_47: .dword 0xbfe963268b572491 # double -0.79335334029123505 .dword 0xbfed906bcf328d46 # double -0.92387953251128674 -.LCPI1_102: +.LCPI1_48: .dword 0xbfed906bcf328d46 # double -0.92387953251128674 .dword 0xbfed906bcf328d47 # double -0.92387953251128685 -.LCPI1_103: +.LCPI1_49: .dword 0x3fd87de2a6aea96f # double 0.38268343236509045 .dword 0xbfd87de2a6aea965 # double -0.38268343236508989 -.LCPI1_104: +.LCPI1_50: .dword 0xbfc0b5150f6da2f1 # double -0.13052619222005249 .dword 0x3fed906bcf328d44 # double 0.92387953251128651 -.LCPI1_105: +.LCPI1_51: .dword 0x3fefb9ea92ec689a # double 0.99144486137381027 .dword 0x3fd87de2a6aea991 # double 0.38268343236509234 -.LCPI1_106: +.LCPI1_52: .dword 0x3fc0b5150f6da293 # double 0.13052619222004988 .dword 0x3fed906bcf328d4d # double 0.92387953251128752 -.LCPI1_107: +.LCPI1_53: .dword 0xbfd87de2a6aea965 # double -0.38268343236508989 .dword 0xbfd87de2a6aea971 # double -0.38268343236509056 -.LCPI1_108: +.LCPI1_54: .dword 0xbfed906bcf328d4e # double -0.92387953251128763 .dword 0x3fed906bcf328d47 # double 0.92387953251128685 -.LCPI1_109: +.LCPI1_55: .dword 0xbfe963268b572493 # double -0.79335334029123528 .dword 0x3fed906bcf328d4c # double 0.9238795325112874 -.LCPI1_110: +.LCPI1_56: .dword 0xbfe37af93f9513d7 # double -0.60876142900871855 .dword 0x3fd87de2a6aea91b # double 0.38268343236508578 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI1_30: - .dword 0x3f9657184ae74487 # double 0.021816615649929118 -.LCPI1_31: - .dword 0x4043000000000000 # double 38 -.LCPI1_32: - .dword 0x4045000000000000 # double 42 -.LCPI1_33: - .dword 0x4047000000000000 # double 46 -.LCPI1_34: - .dword 0x4049000000000000 # double 50 -.LCPI1_35: - .dword 0x404b000000000000 # double 54 -.LCPI1_36: - .dword 0x404d000000000000 # double 58 -.LCPI1_37: - .dword 0x404f000000000000 # double 62 -.LCPI1_38: - .dword 0x4050800000000000 # double 66 -.LCPI1_39: - .dword 0x4051800000000000 # double 70 -.LCPI1_40: - .dword 0x405b800000000000 # double 110 -.LCPI1_41: - .dword 0x405c800000000000 # double 114 -.LCPI1_42: - .dword 0x405d800000000000 # double 118 -.LCPI1_43: - .dword 0x405e800000000000 # double 122 -.LCPI1_44: - .dword 0x405f800000000000 # double 126 -.LCPI1_45: - .dword 0x4060400000000000 # double 130 -.LCPI1_46: - .dword 0x4060c00000000000 # double 134 -.LCPI1_47: - .dword 0x4061400000000000 # double 138 -.LCPI1_48: - .dword 0x4061c00000000000 # double 142 -.LCPI1_63: - .dword 0x400921fb54442d18 # double 3.1415926535897931 -.LCPI1_64: - .dword 0x3f90000000000000 # double 0.015625 -.LCPI1_65: - .dword 0x3feff621e3796d7e # double 0.99879545620517241 -.LCPI1_66: - .dword 0x3fefd88da3d12526 # double 0.99518472667219693 -.LCPI1_67: - .dword 0x3fefa7557f08a517 # double 0.98917650996478101 -.LCPI1_68: - .dword 0x3fef6297cff75cb0 # double 0.98078528040323043 -.LCPI1_69: - .dword 0x3fef0a7efb9230d7 # double 0.97003125319454397 -.LCPI1_70: - .dword 0x3fee9f4156c62dda # double 0.95694033573220882 -.LCPI1_71: - .dword 0x3fee212104f686e5 # double 0.94154406518302081 -.LCPI1_72: - .dword 0x3fed906bcf328d46 # double 0.92387953251128674 -.LCPI1_73: - .dword 0x3feced7af43cc773 # double 0.90398929312344334 -.LCPI1_74: - .dword 0x3fec38b2f180bdb1 # double 0.88192126434835505 -.LCPI1_75: - .dword 0x3feb728345196e3e # double 0.85772861000027212 -.LCPI1_76: - .dword 0x3fea9b66290ea1a3 # double 0.83146961230254524 -.LCPI1_77: - .dword 0x3fe9b3e047f38741 # double 0.80320753148064494 -.LCPI1_78: - .dword 0x3fe8bc806b151741 # double 0.77301045336273699 -.LCPI1_79: - .dword 0x3fe7b5df226aafaf # double 0.74095112535495911 -.LCPI1_80: - .dword 0x3fe6a09e667f3bcd # double 0.70710678118654757 -.LCPI1_81: - .dword 0x3fe57d69348cec9f # double 0.67155895484701833 -.LCPI1_82: - .dword 0x3fe44cf325091dd6 # double 0.63439328416364549 -.LCPI1_83: - .dword 0x3fe30ff7fce17036 # double 0.59569930449243347 -.LCPI1_84: - .dword 0x3fe1c73b39ae68c9 # double 0.55557023301960229 -.LCPI1_85: - .dword 0x3fe073879922ffed # double 0.51410274419322166 -.LCPI1_86: - .dword 0x3fde2b5d3806f63e # double 0.47139673682599781 -.LCPI1_87: - .dword 0x3fdb5d1009e15cc2 # double 0.4275550934302822 -.LCPI1_88: - .dword 0x3fd87de2a6aea964 # double 0.38268343236508984 -.LCPI1_89: - .dword 0x3fd58f9a75ab1fdd # double 0.33688985339222005 -.LCPI1_90: - .dword 0x3fd294062ed59f05 # double 0.29028467725446233 -.LCPI1_91: - .dword 0x3fcf19f97b215f1e # double 0.24298017990326398 -.LCPI1_92: - .dword 0x3fc8f8b83c69a60d # double 0.19509032201612833 -.LCPI1_93: - .dword 0x3fc2c8106e8e613a # double 0.14673047445536175 -.LCPI1_94: - .dword 0x3fb917a6bc29b438 # double 0.09801714032956077 -.LCPI1_95: - .dword 0x3fa91f65f10dd824 # double 0.049067674327418126 -.LCPI1_96: - .dword 0x3f00000000000000 # double 3.0517578125E-5 -.LCPI1_99: - .dword 0x3fefb9ea92ec689c # double 0.99144486137381049 -.LCPI1_111: - .dword 0x3fe963268b572493 # double 0.79335334029123528 -.LCPI1_112: - .dword 0x3fd5555555555555 # double 0.33333333333333331 -.LCPI1_113: - .dword 0x3fc0b5150f6da2d5 # double 0.13052619222005171 -.LCPI1_114: - .dword 0xbfefb9ea92ec689b # double -0.99144486137381038 -.LCPI1_115: - .dword 0xbfd87de2a6aea96d # double -0.38268343236509034 -.LCPI1_116: - .dword 0xbfed906bcf328d43 # double -0.92387953251128641 -.LCPI1_117: - .dword 0x3fe37af93f9513f3 # double 0.60876142900872166 -.LCPI1_118: - .dword 0xbfe963268b572484 # double -0.79335334029123361 -.LCPI1_119: - .dword 0xbfe963268b572493 # double -0.79335334029123528 -.LCPI1_120: - .dword 0xbfe37af93f9513d8 # double -0.60876142900871866 -.LCPI1_121: - .dword 0xbfd87de2a6aea959 # double -0.38268343236508923 -.LCPI1_122: - .dword 0xbfefb9ea92ec689d # double -0.9914448613738106 -.LCPI1_123: - .dword 0xbfc0b5150f6da27e # double -0.1305261922200493 .text .globl mdct_init48 .p2align 5 .type mdct_init48,@function mdct_init48: # @mdct_init48 # %bb.0: # %.preheader204 - addi.d $sp, $sp, -528 - st.d $ra, $sp, 520 # 8-byte Folded Spill - st.d $fp, $sp, 512 # 8-byte Folded Spill - st.d $s0, $sp, 504 # 8-byte Folded Spill - st.d $s1, $sp, 496 # 8-byte Folded Spill - st.d $s2, $sp, 488 # 8-byte Folded Spill - st.d $s3, $sp, 480 # 8-byte Folded Spill - st.d $s4, $sp, 472 # 8-byte Folded Spill - st.d $s5, $sp, 464 # 8-byte Folded Spill - st.d $s6, $sp, 456 # 8-byte Folded Spill - fst.d $fs0, $sp, 448 # 8-byte Folded Spill - fst.d $fs1, $sp, 440 # 8-byte Folded Spill - fst.d $fs2, $sp, 432 # 8-byte Folded Spill - fst.d $fs3, $sp, 424 # 8-byte Folded Spill - fst.d $fs4, $sp, 416 # 8-byte Folded Spill - fst.d $fs5, $sp, 408 # 8-byte Folded Spill - fst.d $fs6, $sp, 400 # 8-byte Folded Spill - fst.d $fs7, $sp, 392 # 8-byte Folded Spill + addi.d $sp, $sp, -512 + st.d $ra, $sp, 504 # 8-byte Folded Spill + st.d $fp, $sp, 496 # 8-byte Folded Spill + st.d $s0, $sp, 488 # 8-byte Folded Spill + st.d $s1, $sp, 480 # 8-byte Folded Spill + st.d $s2, $sp, 472 # 8-byte Folded Spill + st.d $s3, $sp, 464 # 8-byte Folded Spill + st.d $s4, $sp, 456 # 8-byte Folded Spill + st.d $s5, $sp, 448 # 8-byte Folded Spill + st.d $s6, $sp, 440 # 8-byte Folded Spill + fst.d $fs0, $sp, 432 # 8-byte Folded Spill + fst.d $fs1, $sp, 424 # 8-byte Folded Spill + fst.d $fs2, $sp, 416 # 8-byte Folded Spill + fst.d $fs3, $sp, 408 # 8-byte Folded Spill + fst.d $fs4, $sp, 400 # 8-byte Folded Spill + fst.d $fs5, $sp, 392 # 8-byte Folded Spill + fst.d $fs6, $sp, 384 # 8-byte Folded Spill + fst.d $fs7, $sp, 376 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(ca.0) lu12i.w $a1, -207109 ori $a1, $a1, 3789 @@ -1879,63 +1742,100 @@ mdct_init48: # @mdct_init48 ori $s1, $zero, 44 pcalau12i $a0, %pc_hi20(all) addi.d $s2, $a0, %pc_lo12(all) - pcalau12i $a0, %pc_hi20(.LCPI1_30) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_30) - fst.d $fa0, $sp, 104 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_31) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_31) - fst.d $fa0, $sp, 96 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_32) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_32) + lu12i.w $a0, 306804 + ori $a0, $a0, 1159 + lu32i.d $a0, 415512 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa0, $a0 fst.d $fa0, $sp, 88 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_33) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_33) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 196608 + lu52i.d $a1, $a1, 1028 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 80 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_34) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_34) + ori $a1, $zero, 0 + lu32i.d $a1, 327680 + lu52i.d $a1, $a1, 1028 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 72 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_35) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_35) + lu12i.w $a1, 116508 + ori $a1, $a1, 1820 + lu32i.d $a1, -233017 + lu52i.d $a1, $a1, 1019 + vreplgr2vr.d $vr0, $a1 + vst $vr0, $sp, 112 # 16-byte Folded Spill + ori $a1, $zero, 0 + lu32i.d $a1, 458752 + lu52i.d $a1, $a1, 1028 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 64 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_36) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_36) + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1028 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 56 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_37) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_37) + ori $a1, $zero, 0 + lu32i.d $a1, -327680 + lu52i.d $a1, $a1, 1028 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 48 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_38) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_38) + ori $a1, $zero, 0 + lu32i.d $a1, -196608 + lu52i.d $a1, $a1, 1028 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 40 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_39) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_39) + ori $a1, $zero, 0 + lu32i.d $a1, -65536 + lu52i.d $a1, $a1, 1028 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 32 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_40) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_40) + ori $a1, $zero, 0 + lu32i.d $a1, 32768 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_41) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_41) + ori $a1, $zero, 0 + lu32i.d $a1, 98304 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 16 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_42) - fld.d $fs4, $a0, %pc_lo12(.LCPI1_42) - pcalau12i $a0, %pc_hi20(.LCPI1_43) - fld.d $fs5, $a0, %pc_lo12(.LCPI1_43) - pcalau12i $a0, %pc_hi20(.LCPI1_44) - fld.d $fs6, $a0, %pc_lo12(.LCPI1_44) - pcalau12i $a0, %pc_hi20(.LCPI1_45) - fld.d $fs7, $a0, %pc_lo12(.LCPI1_45) - pcalau12i $a0, %pc_hi20(.LCPI1_46) - fld.d $fs0, $a0, %pc_lo12(.LCPI1_46) - pcalau12i $a0, %pc_hi20(.LCPI1_47) - fld.d $fs1, $a0, %pc_lo12(.LCPI1_47) - pcalau12i $a0, %pc_hi20(.LCPI1_48) - fld.d $fs2, $a0, %pc_lo12(.LCPI1_48) - lu12i.w $a0, 116508 - ori $a0, $a0, 1820 - lu32i.d $a0, -233017 - lu52i.d $a0, $a0, 1019 - vreplgr2vr.d $vr0, $a0 - vst $vr0, $sp, 128 # 16-byte Folded Spill - addi.w $s3, $zero, -4 + ori $a1, $zero, 0 + lu32i.d $a1, -294912 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa0, $a1 + fst.d $fa0, $sp, 8 # 8-byte Folded Spill + ori $a1, $zero, 0 + lu32i.d $a1, -229376 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fs3, $a1 + ori $a1, $zero, 0 + lu32i.d $a1, -163840 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fs4, $a1 + ori $a1, $zero, 0 + lu32i.d $a1, -98304 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fs5, $a1 + ori $a1, $zero, 0 + lu32i.d $a1, -32768 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fs6, $a1 + ori $a1, $zero, 0 + lu32i.d $a1, 16384 + lu52i.d $a1, $a1, 1030 + movgr2fr.d $fs7, $a1 + ori $a1, $zero, 0 + lu32i.d $a1, 49152 + lu52i.d $a1, $a1, 1030 + movgr2fr.d $fs0, $a1 + ori $a1, $zero, 0 + lu32i.d $a1, 81920 + lu52i.d $a1, $a1, 1030 + movgr2fr.d $fs1, $a1 + lu32i.d $a0, 114688 + lu52i.d $s3, $a0, 1030 + addi.w $s4, $zero, -4 .p2align 4, , 16 .LBB1_1: # %.preheader203 # =>This Inner Loop Header: Depth=1 @@ -1944,187 +1844,187 @@ mdct_init48: # @mdct_init48 addi.d $a0, $a0, 1 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 - fld.d $fa1, $sp, 104 # 8-byte Folded Reload - fmul.d $fs3, $fa0, $fa1 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload - fmul.d $fa0, $fs3, $fa0 + fld.d $fa1, $sp, 88 # 8-byte Folded Reload + fmul.d $fs2, $fa0, $fa1 + fld.d $fa0, $sp, 80 # 8-byte Folded Reload + fmul.d $fa0, $fs2, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 112 # 16-byte Folded Spill - fld.d $fa0, $sp, 88 # 8-byte Folded Reload - fmul.d $fa0, $fs3, $fa0 + vst $vr0, $sp, 96 # 16-byte Folded Spill + fld.d $fa0, $sp, 72 # 8-byte Folded Reload + fmul.d $fa0, $fs2, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload vextrins.d $vr1, $vr0, 16 - vld $vr0, $sp, 128 # 16-byte Folded Reload + vld $vr0, $sp, 112 # 16-byte Folded Reload vfmul.d $vr0, $vr1, $vr0 vst $vr0, $s0, -112 - fld.d $fa0, $sp, 80 # 8-byte Folded Reload - fmul.d $fa0, $fs3, $fa0 + fld.d $fa0, $sp, 64 # 8-byte Folded Reload + fmul.d $fa0, $fs2, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 112 # 16-byte Folded Spill - fld.d $fa0, $sp, 72 # 8-byte Folded Reload - fmul.d $fa0, $fs3, $fa0 + vst $vr0, $sp, 96 # 16-byte Folded Spill + fld.d $fa0, $sp, 56 # 8-byte Folded Reload + fmul.d $fa0, $fs2, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload vextrins.d $vr1, $vr0, 16 - vld $vr0, $sp, 128 # 16-byte Folded Reload + vld $vr0, $sp, 112 # 16-byte Folded Reload vfmul.d $vr0, $vr1, $vr0 vst $vr0, $s0, -96 - fld.d $fa0, $sp, 64 # 8-byte Folded Reload - fmul.d $fa0, $fs3, $fa0 + fld.d $fa0, $sp, 48 # 8-byte Folded Reload + fmul.d $fa0, $fs2, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 112 # 16-byte Folded Spill - fld.d $fa0, $sp, 56 # 8-byte Folded Reload - fmul.d $fa0, $fs3, $fa0 + vst $vr0, $sp, 96 # 16-byte Folded Spill + fld.d $fa0, $sp, 40 # 8-byte Folded Reload + fmul.d $fa0, $fs2, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload vextrins.d $vr1, $vr0, 16 - vld $vr0, $sp, 128 # 16-byte Folded Reload + vld $vr0, $sp, 112 # 16-byte Folded Reload vfmul.d $vr0, $vr1, $vr0 vst $vr0, $s0, -80 - fld.d $fa0, $sp, 48 # 8-byte Folded Reload - fmul.d $fa0, $fs3, $fa0 + fld.d $fa0, $sp, 32 # 8-byte Folded Reload + fmul.d $fa0, $fs2, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 112 # 16-byte Folded Spill - fld.d $fa0, $sp, 40 # 8-byte Folded Reload - fmul.d $fa0, $fs3, $fa0 + vst $vr0, $sp, 96 # 16-byte Folded Spill + fld.d $fa0, $sp, 24 # 8-byte Folded Reload + fmul.d $fa0, $fs2, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload vextrins.d $vr1, $vr0, 16 - vld $vr0, $sp, 128 # 16-byte Folded Reload + vld $vr0, $sp, 112 # 16-byte Folded Reload vfmul.d $vr0, $vr1, $vr0 vst $vr0, $s0, -64 - fld.d $fa0, $sp, 32 # 8-byte Folded Reload - fmul.d $fa0, $fs3, $fa0 + fld.d $fa0, $sp, 16 # 8-byte Folded Reload + fmul.d $fa0, $fs2, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 112 # 16-byte Folded Spill - fld.d $fa0, $sp, 24 # 8-byte Folded Reload - fmul.d $fa0, $fs3, $fa0 + vst $vr0, $sp, 96 # 16-byte Folded Spill + fld.d $fa0, $sp, 8 # 8-byte Folded Reload + fmul.d $fa0, $fs2, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload vextrins.d $vr1, $vr0, 16 - vld $vr0, $sp, 128 # 16-byte Folded Reload + vld $vr0, $sp, 112 # 16-byte Folded Reload vfmul.d $vr0, $vr1, $vr0 vst $vr0, $s0, -48 - fld.d $fa0, $sp, 16 # 8-byte Folded Reload - fmul.d $fa0, $fs3, $fa0 + fmul.d $fa0, $fs2, $fs3 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 112 # 16-byte Folded Spill - fmul.d $fa0, $fs3, $fs4 + vst $vr0, $sp, 96 # 16-byte Folded Spill + fmul.d $fa0, $fs2, $fs4 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload vextrins.d $vr1, $vr0, 16 - vld $vr0, $sp, 128 # 16-byte Folded Reload + vld $vr0, $sp, 112 # 16-byte Folded Reload vfmul.d $vr0, $vr1, $vr0 vst $vr0, $s0, -32 - fmul.d $fa0, $fs3, $fs5 + fmul.d $fa0, $fs2, $fs5 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 112 # 16-byte Folded Spill - fmul.d $fa0, $fs3, $fs6 + vst $vr0, $sp, 96 # 16-byte Folded Spill + fmul.d $fa0, $fs2, $fs6 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload vextrins.d $vr1, $vr0, 16 - vld $vr0, $sp, 128 # 16-byte Folded Reload + vld $vr0, $sp, 112 # 16-byte Folded Reload vfmul.d $vr0, $vr1, $vr0 vst $vr0, $s0, -16 - fmul.d $fa0, $fs3, $fs7 + fmul.d $fa0, $fs2, $fs7 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 112 # 16-byte Folded Spill - fmul.d $fa0, $fs3, $fs0 + vst $vr0, $sp, 96 # 16-byte Folded Spill + fmul.d $fa0, $fs2, $fs0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload vextrins.d $vr1, $vr0, 16 - vld $vr0, $sp, 128 # 16-byte Folded Reload + vld $vr0, $sp, 112 # 16-byte Folded Reload vfmul.d $vr0, $vr1, $vr0 vst $vr0, $s0, 0 - fmul.d $fa0, $fs3, $fs1 + fmul.d $fa0, $fs2, $fs1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 112 # 16-byte Folded Spill - fmul.d $fa0, $fs3, $fs2 + vst $vr0, $sp, 96 # 16-byte Folded Spill + movgr2fr.d $fa0, $s3 + fmul.d $fa0, $fs2, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vld $vr1, $sp, 112 # 16-byte Folded Reload + vld $vr1, $sp, 96 # 16-byte Folded Reload vextrins.d $vr1, $vr0, 16 - vld $vr0, $sp, 128 # 16-byte Folded Reload + vld $vr0, $sp, 112 # 16-byte Folded Reload vfmul.d $vr0, $vr1, $vr0 vst $vr0, $s0, 16 addi.d $s1, $s1, -4 addi.d $s0, $s0, 144 - bne $s1, $s3, .LBB1_1 + bne $s1, $s4, .LBB1_1 # %bb.2: # %.preheader201.preheader - pcalau12i $a0, %pc_hi20(.LCPI1_49) - vld $vr0, $a0, %pc_lo12(.LCPI1_49) - pcalau12i $a0, %pc_hi20(.LCPI1_50) - vld $vr1, $a0, %pc_lo12(.LCPI1_50) + pcalau12i $a0, %pc_hi20(.LCPI1_30) + vld $vr0, $a0, %pc_lo12(.LCPI1_30) + pcalau12i $a0, %pc_hi20(.LCPI1_31) + vld $vr1, $a0, %pc_lo12(.LCPI1_31) vst $vr0, $s0, -112 vst $vr1, $s0, -96 - pcalau12i $a0, %pc_hi20(.LCPI1_51) - vld $vr0, $a0, %pc_lo12(.LCPI1_51) - pcalau12i $a0, %pc_hi20(.LCPI1_52) - vld $vr1, $a0, %pc_lo12(.LCPI1_52) - pcalau12i $a0, %pc_hi20(.LCPI1_53) - vld $vr2, $a0, %pc_lo12(.LCPI1_53) - pcalau12i $a0, %pc_hi20(.LCPI1_54) - vld $vr3, $a0, %pc_lo12(.LCPI1_54) + pcalau12i $a0, %pc_hi20(.LCPI1_32) + vld $vr0, $a0, %pc_lo12(.LCPI1_32) + pcalau12i $a0, %pc_hi20(.LCPI1_33) + vld $vr1, $a0, %pc_lo12(.LCPI1_33) + pcalau12i $a0, %pc_hi20(.LCPI1_34) + vld $vr2, $a0, %pc_lo12(.LCPI1_34) + pcalau12i $a0, %pc_hi20(.LCPI1_35) + vld $vr3, $a0, %pc_lo12(.LCPI1_35) vst $vr0, $s0, -80 vst $vr1, $s0, -64 vst $vr2, $s0, -48 vst $vr3, $s0, -32 - pcalau12i $a0, %pc_hi20(.LCPI1_55) - vld $vr0, $a0, %pc_lo12(.LCPI1_55) - pcalau12i $a0, %pc_hi20(.LCPI1_56) - vld $vr1, $a0, %pc_lo12(.LCPI1_56) - pcalau12i $a0, %pc_hi20(.LCPI1_57) - vld $vr2, $a0, %pc_lo12(.LCPI1_57) - pcalau12i $a0, %pc_hi20(.LCPI1_58) - vld $vr3, $a0, %pc_lo12(.LCPI1_58) + pcalau12i $a0, %pc_hi20(.LCPI1_36) + vld $vr0, $a0, %pc_lo12(.LCPI1_36) + pcalau12i $a0, %pc_hi20(.LCPI1_37) + vld $vr1, $a0, %pc_lo12(.LCPI1_37) + pcalau12i $a0, %pc_hi20(.LCPI1_38) + vld $vr2, $a0, %pc_lo12(.LCPI1_38) + pcalau12i $a0, %pc_hi20(.LCPI1_39) + vld $vr3, $a0, %pc_lo12(.LCPI1_39) vst $vr0, $s0, -16 vst $vr1, $s0, 0 vst $vr2, $s0, 16 vst $vr3, $s0, 32 - pcalau12i $a0, %pc_hi20(.LCPI1_59) - vld $vr0, $a0, %pc_lo12(.LCPI1_59) - pcalau12i $a0, %pc_hi20(.LCPI1_60) - vld $vr1, $a0, %pc_lo12(.LCPI1_60) - pcalau12i $a0, %pc_hi20(.LCPI1_61) - vld $vr2, $a0, %pc_lo12(.LCPI1_61) - pcalau12i $a0, %pc_hi20(.LCPI1_62) - vld $vr3, $a0, %pc_lo12(.LCPI1_62) + pcalau12i $a0, %pc_hi20(.LCPI1_40) + vld $vr0, $a0, %pc_lo12(.LCPI1_40) + pcalau12i $a0, %pc_hi20(.LCPI1_41) + vld $vr1, $a0, %pc_lo12(.LCPI1_41) + pcalau12i $a0, %pc_hi20(.LCPI1_42) + vld $vr2, $a0, %pc_lo12(.LCPI1_42) + pcalau12i $a0, %pc_hi20(.LCPI1_43) + vld $vr3, $a0, %pc_lo12(.LCPI1_43) vst $vr0, $s0, 48 vst $vr1, $s0, 64 vst $vr2, $s0, 80 @@ -2136,7 +2036,7 @@ mdct_init48: # @mdct_init48 fdiv.d $fa1, $fa0, $fa4 vld $vr2, $a0, 8 vld $vr3, $a0, 24 - fst.d $fa1, $sp, 264 + fst.d $fa1, $sp, 248 vreplvei.d $vr1, $vr0, 0 vfdiv.d $vr2, $vr2, $vr1 vfdiv.d $vr1, $vr3, $vr1 @@ -2155,7 +2055,7 @@ mdct_init48: # @mdct_init48 addi.d $a0, $a0, 120 ori $a2, $zero, 128 ori $a3, $zero, 112 - addi.d $a4, $sp, 144 + addi.d $a4, $sp, 128 ori $a5, $zero, 248 .p2align 4, , 16 .LBB1_3: # %.preheader198 @@ -2235,18 +2135,21 @@ mdct_init48: # @mdct_init48 fdiv.d $fa0, $fa0, $fa4 fst.d $fa0, $a1, 160 fld.d $fa0, $a0, 0 - vst $vr4, $sp, 128 # 16-byte Folded Spill + vst $vr4, $sp, 112 # 16-byte Folded Spill fdiv.d $fa0, $fa0, $fa4 fst.d $fa0, $a1, 168 ori $s2, $zero, 31 pcalau12i $a0, %pc_hi20(mm) addi.d $s0, $a0, %pc_lo12(mm) - pcalau12i $a0, %pc_hi20(.LCPI1_63) - fld.d $fs0, $a0, %pc_lo12(.LCPI1_63) - pcalau12i $a0, %pc_hi20(.LCPI1_64) - fld.d $fs1, $a0, %pc_lo12(.LCPI1_64) move $s1, $zero - addi.d $s3, $sp, 144 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fs0, $a0 + lu52i.d $a0, $zero, 1017 + movgr2fr.d $fs1, $a0 + addi.d $s3, $sp, 128 ori $s4, $zero, 248 .p2align 4, , 16 .LBB1_5: # =>This Inner Loop Header: Depth=1 @@ -2266,7 +2169,7 @@ mdct_init48: # @mdct_init48 # %bb.6: # %.preheader195.1.preheader move $s2, $zero ori $s3, $zero, 29 - addi.d $s4, $sp, 144 + addi.d $s4, $sp, 128 ori $s5, $zero, 248 .p2align 4, , 16 .LBB1_7: # %.preheader195.1 @@ -2291,7 +2194,7 @@ mdct_init48: # @mdct_init48 add.d $a0, $a0, $s2 addi.d $s0, $a0, 120 ori $s1, $zero, 27 - addi.d $s2, $sp, 144 + addi.d $s2, $sp, 128 ori $s4, $zero, 248 .p2align 4, , 16 .LBB1_9: # %.preheader195.2 @@ -2313,7 +2216,7 @@ mdct_init48: # @mdct_init48 # %bb.10: # %.preheader195.3.preheader move $s1, $zero ori $s2, $zero, 25 - addi.d $s3, $sp, 144 + addi.d $s3, $sp, 128 ori $s4, $zero, 248 .p2align 4, , 16 .LBB1_11: # %.preheader195.3 @@ -2335,7 +2238,7 @@ mdct_init48: # @mdct_init48 # %bb.12: # %.preheader195.4.preheader move $s1, $zero ori $s2, $zero, 23 - addi.d $s3, $sp, 144 + addi.d $s3, $sp, 128 ori $s4, $zero, 248 .p2align 4, , 16 .LBB1_13: # %.preheader195.4 @@ -2357,7 +2260,7 @@ mdct_init48: # @mdct_init48 # %bb.14: # %.preheader195.5.preheader move $s1, $zero ori $s2, $zero, 21 - addi.d $s3, $sp, 144 + addi.d $s3, $sp, 128 ori $s4, $zero, 248 .p2align 4, , 16 .LBB1_15: # %.preheader195.5 @@ -2379,7 +2282,7 @@ mdct_init48: # @mdct_init48 # %bb.16: # %.preheader195.6.preheader move $s1, $zero ori $s2, $zero, 19 - addi.d $s3, $sp, 144 + addi.d $s3, $sp, 128 ori $s4, $zero, 248 .p2align 4, , 16 .LBB1_17: # %.preheader195.6 @@ -2401,7 +2304,7 @@ mdct_init48: # @mdct_init48 # %bb.18: # %.preheader195.7.preheader move $s1, $zero ori $s2, $zero, 17 - addi.d $s3, $sp, 144 + addi.d $s3, $sp, 128 ori $s4, $zero, 248 .p2align 4, , 16 .LBB1_19: # %.preheader195.7 @@ -2423,7 +2326,7 @@ mdct_init48: # @mdct_init48 # %bb.20: # %.preheader195.8.preheader move $s1, $zero ori $s2, $zero, 15 - addi.d $s3, $sp, 144 + addi.d $s3, $sp, 128 ori $s4, $zero, 248 .p2align 4, , 16 .LBB1_21: # %.preheader195.8 @@ -2445,7 +2348,7 @@ mdct_init48: # @mdct_init48 # %bb.22: # %.preheader195.9.preheader move $s1, $zero ori $s2, $zero, 13 - addi.d $s3, $sp, 144 + addi.d $s3, $sp, 128 ori $s4, $zero, 248 .p2align 4, , 16 .LBB1_23: # %.preheader195.9 @@ -2467,7 +2370,7 @@ mdct_init48: # @mdct_init48 # %bb.24: # %.preheader195.10.preheader move $s1, $zero ori $s2, $zero, 11 - addi.d $s3, $sp, 144 + addi.d $s3, $sp, 128 ori $s4, $zero, 248 .p2align 4, , 16 .LBB1_25: # %.preheader195.10 @@ -2489,7 +2392,7 @@ mdct_init48: # @mdct_init48 # %bb.26: # %.preheader195.11.preheader move $s1, $zero ori $s2, $zero, 9 - addi.d $s3, $sp, 144 + addi.d $s3, $sp, 128 ori $s4, $zero, 248 .p2align 4, , 16 .LBB1_27: # %.preheader195.11 @@ -2511,7 +2414,7 @@ mdct_init48: # @mdct_init48 # %bb.28: # %.preheader195.12.preheader move $s1, $zero ori $s2, $zero, 7 - addi.d $s3, $sp, 144 + addi.d $s3, $sp, 128 ori $s4, $zero, 248 .p2align 4, , 16 .LBB1_29: # %.preheader195.12 @@ -2533,7 +2436,7 @@ mdct_init48: # @mdct_init48 # %bb.30: # %.preheader195.13.preheader move $s1, $zero ori $s2, $zero, 5 - addi.d $s3, $sp, 144 + addi.d $s3, $sp, 128 ori $s4, $zero, 248 .p2align 4, , 16 .LBB1_31: # %.preheader195.13 @@ -2555,7 +2458,7 @@ mdct_init48: # @mdct_init48 # %bb.32: # %.preheader195.14.preheader move $s1, $zero ori $s2, $zero, 3 - addi.d $s3, $sp, 144 + addi.d $s3, $sp, 128 ori $s4, $zero, 248 .p2align 4, , 16 .LBB1_33: # %.preheader195.14 @@ -2575,261 +2478,354 @@ mdct_init48: # @mdct_init48 addi.d $s0, $s0, 8 bne $s1, $s4, .LBB1_33 # %bb.34: # %.preheader195.15 - fld.d $fa0, $sp, 144 - pcalau12i $a0, %pc_hi20(.LCPI1_65) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_65) - fld.d $fa2, $sp, 152 - pcalau12i $a0, %pc_hi20(.LCPI1_66) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_66) + fld.d $fa0, $sp, 128 + lu12i.w $a0, -116842 + ori $a0, $a0, 3454 + lu32i.d $a0, -2527 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 fst.d $fa0, $s0, -120 - fmul.d $fa0, $fa2, $fa3 - fld.d $fa1, $sp, 160 - pcalau12i $a0, %pc_hi20(.LCPI1_67) - fld.d $fa2, $a0, %pc_lo12(.LCPI1_67) - fld.d $fa3, $sp, 168 - pcalau12i $a0, %pc_hi20(.LCPI1_68) - fld.d $fa4, $a0, %pc_lo12(.LCPI1_68) + fld.d $fa0, $sp, 136 + lu12i.w $a0, -377582 + ori $a0, $a0, 1318 + lu32i.d $a0, -10099 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 fst.d $fa0, $s0, -112 - fmul.d $fa0, $fa1, $fa2 + fld.d $fa0, $sp, 144 + lu12i.w $a0, 520330 + ori $a0, $a0, 1303 + lu32i.d $a0, -22699 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 fst.d $fa0, $s0, -104 - fmul.d $fa0, $fa3, $fa4 - fld.d $fa1, $sp, 176 - pcalau12i $a0, %pc_hi20(.LCPI1_69) - fld.d $fa2, $a0, %pc_lo12(.LCPI1_69) - fld.d $fa3, $sp, 184 - pcalau12i $a0, %pc_hi20(.LCPI1_70) - fld.d $fa4, $a0, %pc_lo12(.LCPI1_70) + fld.d $fa0, $sp, 152 + lu12i.w $a0, -196747 + ori $a0, $a0, 3248 + lu32i.d $a0, -40297 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 fst.d $fa0, $s0, -96 - fmul.d $fa0, $fa1, $fa2 + fld.d $fa0, $sp, 160 + lu12i.w $a0, -18141 + ori $a0, $a0, 215 + lu32i.d $a0, -62850 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 fst.d $fa0, $s0, -88 - fmul.d $fa1, $fa3, $fa4 - fld.d $fa2, $sp, 192 - pcalau12i $a0, %pc_hi20(.LCPI1_71) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_71) - fld.d $fa4, $sp, 200 - pcalau12i $a0, %pc_hi20(.LCPI1_72) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_72) - fst.d $fa1, $s0, -80 - fmul.d $fa1, $fa2, $fa3 - fst.d $fa1, $s0, -72 - fmul.d $fa1, $fa4, $fa0 - fld.d $fa2, $sp, 208 - pcalau12i $a0, %pc_hi20(.LCPI1_73) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_73) - fld.d $fa4, $sp, 216 - pcalau12i $a0, %pc_hi20(.LCPI1_74) - fld.d $fa5, $a0, %pc_lo12(.LCPI1_74) + fld.d $fa0, $sp, 168 + lu12i.w $a0, 355426 + ori $a0, $a0, 3546 + lu32i.d $a0, -90303 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + fst.d $fa0, $s0, -80 + fld.d $fa0, $sp, 176 + lu12i.w $a0, 20328 + ori $a0, $a0, 1765 + lu32i.d $a0, -122591 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + fst.d $fa0, $s0, -72 + fld.d $fa1, $sp, 184 + lu12i.w $a0, -199896 + ori $a1, $a0, 3398 + lu32i.d $a1, -159637 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa0, $a1 + fmul.d $fa1, $fa1, $fa0 fst.d $fa1, $s0, -64 - fmul.d $fa1, $fa2, $fa3 + fld.d $fa1, $sp, 192 + lu12i.w $a1, -48180 + ori $a1, $a1, 1907 + lu32i.d $a1, -201350 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, -56 - fmul.d $fa1, $fa4, $fa5 - fld.d $fa2, $sp, 224 - pcalau12i $a0, %pc_hi20(.LCPI1_75) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_75) - fld.d $fa4, $sp, 232 - pcalau12i $a0, %pc_hi20(.LCPI1_76) - fld.d $fa5, $a0, %pc_lo12(.LCPI1_76) + fld.d $fa1, $sp, 200 + lu12i.w $a1, -59381 + ori $a1, $a1, 3505 + lu32i.d $a1, -247630 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, -48 - fmul.d $fa1, $fa2, $fa3 + fld.d $fa1, $sp, 208 + lu12i.w $a1, 283030 + ori $a1, $a1, 3646 + lu32i.d $a1, -298365 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, -40 - fmul.d $fa1, $fa4, $fa5 - fld.d $fa2, $sp, 240 - pcalau12i $a0, %pc_hi20(.LCPI1_77) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_77) - fld.d $fa4, $sp, 248 - pcalau12i $a0, %pc_hi20(.LCPI1_78) - fld.d $fa5, $a0, %pc_lo12(.LCPI1_78) + fld.d $fa1, $sp, 216 + lu12i.w $a1, 168170 + ori $a1, $a1, 419 + lu32i.d $a1, -353434 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, -32 - fmul.d $fa1, $fa2, $fa3 + fld.d $fa1, $sp, 224 + lu12i.w $a1, 294712 + ori $a1, $a1, 1857 + lu32i.d $a1, -412704 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, -24 - fmul.d $fa1, $fa4, $fa5 - fld.d $fa2, $sp, 256 - pcalau12i $a0, %pc_hi20(.LCPI1_79) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_79) - fld.d $fa4, $sp, 264 - pcalau12i $a0, %pc_hi20(.LCPI1_80) - fld.d $fa5, $a0, %pc_lo12(.LCPI1_80) + fld.d $fa1, $sp, 232 + lu12i.w $a1, 438609 + ori $a1, $a1, 1857 + lu32i.d $a1, -476032 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, -16 - fmul.d $fa1, $fa2, $fa3 + fld.d $fa1, $sp, 240 + lu12i.w $a1, 140970 + ori $a1, $a1, 4015 + lu32i.d $a1, 505311 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, -8 - fmul.d $fa1, $fa4, $fa5 - fld.d $fa2, $sp, 272 - pcalau12i $a0, %pc_hi20(.LCPI1_81) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_81) - fld.d $fa4, $sp, 280 - pcalau12i $a0, %pc_hi20(.LCPI1_82) - fld.d $fa5, $a0, %pc_lo12(.LCPI1_82) + fld.d $fa1, $sp, 248 + lu12i.w $a1, 419827 + ori $a1, $a1, 3021 + lu32i.d $a1, 434334 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, 0 - fmul.d $fa1, $fa2, $fa3 + fld.d $fa1, $sp, 256 + lu12i.w $a1, 215246 + ori $a1, $a1, 3231 + lu32i.d $a1, 359785 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, 8 - fmul.d $fa1, $fa4, $fa5 - fld.d $fa2, $sp, 288 - pcalau12i $a0, %pc_hi20(.LCPI1_83) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_83) - fld.d $fa4, $sp, 296 - pcalau12i $a0, %pc_hi20(.LCPI1_84) - fld.d $fa5, $a0, %pc_lo12(.LCPI1_84) + fld.d $fa1, $sp, 264 + lu12i.w $a1, 151697 + ori $a1, $a1, 3542 + lu32i.d $a1, 281843 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, 16 - fmul.d $fa1, $fa2, $fa3 + fld.d $fa1, $sp, 272 + lu12i.w $a1, -12777 + ori $a1, $a1, 54 + lu32i.d $a1, 200695 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, 24 - fmul.d $fa1, $fa4, $fa5 - fld.d $fa2, $sp, 304 - pcalau12i $a0, %pc_hi20(.LCPI1_85) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_85) - fld.d $fa4, $sp, 312 - pcalau12i $a0, %pc_hi20(.LCPI1_86) - fld.d $fa5, $a0, %pc_lo12(.LCPI1_86) + fld.d $fa1, $sp, 280 + lu12i.w $a1, 236262 + ori $a1, $a1, 2249 + lu32i.d $a1, 116539 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, 32 - fmul.d $fa1, $fa2, $fa3 + fld.d $fa1, $sp, 288 + lu12i.w $a1, -421329 + ori $a1, $a1, 4077 + lu32i.d $a1, 29575 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, 40 - fmul.d $fa1, $fa4, $fa5 - fld.d $fa2, $sp, 320 - pcalau12i $a0, %pc_hi20(.LCPI1_87) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_87) - fld.d $fa4, $sp, 328 - pcalau12i $a0, %pc_hi20(.LCPI1_88) - fld.d $fa5, $a0, %pc_lo12(.LCPI1_88) + fld.d $fa1, $sp, 296 + lu12i.w $a1, 229487 + ori $a1, $a1, 1598 + lu32i.d $a1, -119971 + lu52i.d $a1, $a1, 1021 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, 48 - fmul.d $fa1, $fa2, $fa3 + fld.d $fa1, $sp, 304 + lu12i.w $a1, 40469 + ori $a1, $a1, 3266 + lu32i.d $a1, -303856 + lu52i.d $a1, $a1, 1021 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, 56 - fmul.d $fa1, $fa4, $fa5 - fld.d $fa2, $sp, 336 - pcalau12i $a0, %pc_hi20(.LCPI1_89) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_89) - fld.d $fa4, $sp, 344 - pcalau12i $a0, %pc_hi20(.LCPI1_90) - fld.d $fa5, $a0, %pc_lo12(.LCPI1_90) + fld.d $fa1, $sp, 312 + lu12i.w $a1, -365846 + ori $a2, $a1, 2404 + lu32i.d $a2, -492062 + lu52i.d $a2, $a2, 1021 + movgr2fr.d $fa2, $a2 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, 64 - fmul.d $fa1, $fa2, $fa3 + fld.d $fa1, $sp, 320 + lu12i.w $a2, 481969 + ori $a2, $a2, 4061 + lu32i.d $a2, 364442 + lu52i.d $a2, $a2, 1021 + movgr2fr.d $fa2, $a2 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, 72 - fmul.d $fa1, $fa4, $fa5 - fld.d $fa2, $sp, 352 - pcalau12i $a0, %pc_hi20(.LCPI1_91) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_91) - fld.d $fa4, $sp, 360 - pcalau12i $a0, %pc_hi20(.LCPI1_92) - fld.d $fa5, $a0, %pc_lo12(.LCPI1_92) + fld.d $fa1, $sp, 328 + lu12i.w $a2, 191833 + ori $a2, $a2, 3845 + lu32i.d $a2, 168966 + lu52i.d $a2, $a2, 1021 + movgr2fr.d $fa2, $a2 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, 80 - fmul.d $fa1, $fa2, $fa3 + fld.d $fa1, $sp, 336 + lu12i.w $a2, 504341 + ori $a2, $a2, 3870 + lu32i.d $a2, -58887 + lu52i.d $a2, $a2, 1020 + movgr2fr.d $fa2, $a2 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, 88 - fmul.d $fa1, $fa4, $fa5 - fld.d $fa2, $sp, 368 - pcalau12i $a0, %pc_hi20(.LCPI1_93) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_93) - fld.d $fa4, $sp, 376 - pcalau12i $a0, %pc_hi20(.LCPI1_94) - fld.d $fa5, $a0, %pc_lo12(.LCPI1_94) + fld.d $fa1, $sp, 344 + lu12i.w $a2, 247450 + ori $a2, $a2, 1549 + lu32i.d $a2, -460616 + lu52i.d $a2, $a2, 1020 + movgr2fr.d $fa2, $a2 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, 96 - fmul.d $fa1, $fa2, $fa3 + fld.d $fa1, $sp, 352 + lu12i.w $a2, 452838 + ori $a2, $a2, 314 + lu32i.d $a2, 182288 + lu52i.d $a2, $a2, 1020 + movgr2fr.d $fa2, $a2 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, 104 - fmul.d $fa1, $fa4, $fa5 - fld.d $fa2, $sp, 384 - pcalau12i $a0, %pc_hi20(.LCPI1_95) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_95) + fld.d $fa1, $sp, 360 + lu12i.w $a2, -277861 + ori $a2, $a2, 1080 + lu32i.d $a2, -452698 + lu52i.d $a2, $a2, 1019 + movgr2fr.d $fa2, $a2 + fmul.d $fa1, $fa1, $fa2 fst.d $fa1, $s0, 112 + fld.d $fa1, $sp, 368 + lu12i.w $a2, -61219 + ori $a2, $a2, 2084 + lu32i.d $a2, -450715 + lu52i.d $a2, $a2, 1018 + movgr2fr.d $fa2, $a2 + fmul.d $fa1, $fa1, $fa2 + fst.d $fa1, $s0, 120 fld.d $fa1, $fp, 72 - fld.d $fa4, $fp, 136 - fmul.d $fa2, $fa2, $fa3 - fst.d $fa2, $s0, 120 + fld.d $fa2, $fp, 136 + fld.d $fa3, $fp, 216 + fld.d $fa4, $fp, 280 fst.d $fa1, $fp, 136 - fst.d $fa4, $fp, 72 - fld.d $fa1, $fp, 216 - fld.d $fa2, $fp, 280 - fld.d $fa3, $fp, 360 - fld.d $fa4, $fp, 424 - fst.d $fa1, $fp, 280 - fst.d $fa2, $fp, 216 - fst.d $fa3, $fp, 424 - fst.d $fa4, $fp, 360 - fld.d $fa1, $fp, 504 - fld.d $fa2, $fp, 568 - fld.d $fa3, $fp, 936 - fld.d $fa4, $fp, 1000 - fst.d $fa1, $fp, 568 - fst.d $fa2, $fp, 504 - fst.d $fa3, $fp, 1000 - fst.d $fa4, $fp, 936 - fld.d $fa1, $fp, 1080 - fld.d $fa2, $fp, 1144 - fld.d $fa3, $fp, 80 - fld.d $fa4, $fp, 128 - fst.d $fa1, $fp, 1144 - fst.d $fa2, $fp, 1080 - fst.d $fa3, $fp, 128 - fst.d $fa4, $fp, 80 - fld.d $fa1, $fp, 224 - fld.d $fa2, $fp, 272 - fld.d $fa3, $fp, 368 - fld.d $fa4, $fp, 416 - fst.d $fa1, $fp, 272 - fst.d $fa2, $fp, 224 - fst.d $fa3, $fp, 416 - fst.d $fa4, $fp, 368 - fld.d $fa1, $fp, 512 - fld.d $fa2, $fp, 560 - fld.d $fa3, $fp, 944 - fld.d $fa4, $fp, 992 - fst.d $fa1, $fp, 560 - fst.d $fa2, $fp, 512 - fst.d $fa3, $fp, 992 - fst.d $fa4, $fp, 944 - fld.d $fa1, $fp, 1088 - fld.d $fa2, $fp, 1136 - fld.d $fa3, $fp, 88 - fld.d $fa4, $fp, 120 - fst.d $fa1, $fp, 1136 - fst.d $fa2, $fp, 1088 - fst.d $fa3, $fp, 120 - fst.d $fa4, $fp, 88 - fld.d $fa1, $fp, 232 - fld.d $fa2, $fp, 264 - fld.d $fa3, $fp, 376 - fld.d $fa4, $fp, 408 - fst.d $fa1, $fp, 264 - fst.d $fa2, $fp, 232 - fst.d $fa3, $fp, 408 - fst.d $fa4, $fp, 376 - fld.d $fa1, $fp, 520 - fld.d $fa2, $fp, 552 - fld.d $fa3, $fp, 952 - fld.d $fa4, $fp, 984 - fst.d $fa1, $fp, 552 - fst.d $fa2, $fp, 520 - fst.d $fa3, $fp, 984 - fst.d $fa4, $fp, 952 - fld.d $fa1, $fp, 1096 - fld.d $fa2, $fp, 1128 - fld.d $fa3, $fp, 96 - fld.d $fa4, $fp, 112 - fst.d $fa1, $fp, 1128 - fst.d $fa2, $fp, 1096 - fst.d $fa3, $fp, 112 - fst.d $fa4, $fp, 96 - fld.d $fa1, $fp, 240 - fld.d $fa2, $fp, 256 - fld.d $fa3, $fp, 384 - fld.d $fa4, $fp, 400 - fst.d $fa1, $fp, 256 - fst.d $fa2, $fp, 240 - fst.d $fa3, $fp, 400 - fst.d $fa4, $fp, 384 - fld.d $fa1, $fp, 528 - fld.d $fa2, $fp, 544 - fld.d $fa3, $fp, 960 - fld.d $fa4, $fp, 976 - fst.d $fa1, $fp, 544 - fst.d $fa2, $fp, 528 - fst.d $fa3, $fp, 976 - fst.d $fa4, $fp, 960 - fld.d $fa2, $fp, 1104 - fld.d $fa3, $fp, 1120 - pcalau12i $a0, %pc_hi20(.LCPI1_96) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_96) - fst.d $fa2, $fp, 1120 - fst.d $fa3, $fp, 1104 + fst.d $fa2, $fp, 72 + fst.d $fa3, $fp, 280 + fst.d $fa4, $fp, 216 + fld.d $fa1, $fp, 360 + fld.d $fa2, $fp, 424 + fld.d $fa3, $fp, 504 + fld.d $fa4, $fp, 568 + fst.d $fa1, $fp, 424 + fst.d $fa2, $fp, 360 + fst.d $fa3, $fp, 568 + fst.d $fa4, $fp, 504 + fld.d $fa1, $fp, 936 + fld.d $fa2, $fp, 1000 + fld.d $fa3, $fp, 1080 + fld.d $fa4, $fp, 1144 + fst.d $fa1, $fp, 1000 + fst.d $fa2, $fp, 936 + fst.d $fa3, $fp, 1144 + fst.d $fa4, $fp, 1080 + fld.d $fa1, $fp, 80 + fld.d $fa2, $fp, 128 + fld.d $fa3, $fp, 224 + fld.d $fa4, $fp, 272 + fst.d $fa1, $fp, 128 + fst.d $fa2, $fp, 80 + fst.d $fa3, $fp, 272 + fst.d $fa4, $fp, 224 + fld.d $fa1, $fp, 368 + fld.d $fa2, $fp, 416 + fld.d $fa3, $fp, 512 + fld.d $fa4, $fp, 560 + fst.d $fa1, $fp, 416 + fst.d $fa2, $fp, 368 + fst.d $fa3, $fp, 560 + fst.d $fa4, $fp, 512 + fld.d $fa1, $fp, 944 + fld.d $fa2, $fp, 992 + fld.d $fa3, $fp, 1088 + fld.d $fa4, $fp, 1136 + fst.d $fa1, $fp, 992 + fst.d $fa2, $fp, 944 + fst.d $fa3, $fp, 1136 + fst.d $fa4, $fp, 1088 + fld.d $fa1, $fp, 88 + fld.d $fa2, $fp, 120 + fld.d $fa3, $fp, 232 + fld.d $fa4, $fp, 264 + fst.d $fa1, $fp, 120 + fst.d $fa2, $fp, 88 + fst.d $fa3, $fp, 264 + fst.d $fa4, $fp, 232 + fld.d $fa1, $fp, 376 + fld.d $fa2, $fp, 408 + fld.d $fa3, $fp, 520 + fld.d $fa4, $fp, 552 + fst.d $fa1, $fp, 408 + fst.d $fa2, $fp, 376 + fst.d $fa3, $fp, 552 + fst.d $fa4, $fp, 520 + fld.d $fa1, $fp, 952 + fld.d $fa2, $fp, 984 + fld.d $fa3, $fp, 1096 + fld.d $fa4, $fp, 1128 + fst.d $fa1, $fp, 984 + fst.d $fa2, $fp, 952 + fst.d $fa3, $fp, 1128 + fst.d $fa4, $fp, 1096 + fld.d $fa1, $fp, 96 + fld.d $fa2, $fp, 112 + fld.d $fa3, $fp, 240 + fld.d $fa4, $fp, 256 + fst.d $fa1, $fp, 112 + fst.d $fa2, $fp, 96 + fst.d $fa3, $fp, 256 + fst.d $fa4, $fp, 240 + fld.d $fa1, $fp, 384 + fld.d $fa2, $fp, 400 + fld.d $fa3, $fp, 528 + fld.d $fa4, $fp, 544 + fst.d $fa1, $fp, 400 + fst.d $fa2, $fp, 384 + fst.d $fa3, $fp, 544 + fst.d $fa4, $fp, 528 + fld.d $fa1, $fp, 960 + fld.d $fa2, $fp, 976 + fld.d $fa3, $fp, 1104 + fld.d $fa4, $fp, 1120 + fst.d $fa1, $fp, 976 + fst.d $fa2, $fp, 960 + fst.d $fa3, $fp, 1120 + fst.d $fa4, $fp, 1104 + lu52i.d $a2, $zero, 1008 + movgr2fr.d $fa1, $a2 vld $vr3, $fp, 0 - vld $vr6, $sp, 128 # 16-byte Folded Reload + vld $vr6, $sp, 112 # 16-byte Folded Reload fmul.d $fa2, $fa6, $fa1 vreplvei.d $vr2, $vr2, 0 vld $vr4, $fp, 288 @@ -2992,151 +2988,177 @@ mdct_init48: # @mdct_init48 vfmul.d $vr3, $vr2, $vr3 vst $vr3, $fp, 560 vfmul.d $vr2, $vr2, $vr5 - pcalau12i $a0, %pc_hi20(.LCPI1_97) - vld $vr3, $a0, %pc_lo12(.LCPI1_97) - pcalau12i $a0, %pc_hi20(.LCPI1_98) - vld $vr4, $a0, %pc_lo12(.LCPI1_98) + pcalau12i $a3, %pc_hi20(.LCPI1_44) + vld $vr3, $a3, %pc_lo12(.LCPI1_44) + pcalau12i $a3, %pc_hi20(.LCPI1_45) + vld $vr4, $a3, %pc_lo12(.LCPI1_45) vst $vr2, $fp, 1136 vreplvei.d $vr2, $vr6, 0 vfmul.d $vr2, $vr2, $vr3 vst $vr4, $fp, 576 - lu52i.d $a0, $zero, 1008 - vreplgr2vr.d $vr3, $a0 + vreplgr2vr.d $vr3, $a2 vfmul.d $vr2, $vr2, $vr3 - lu12i.w $a0, 349525 - ori $a0, $a0, 1365 - lu32i.d $a0, 349525 - lu52i.d $a0, $a0, 1021 - pcalau12i $a1, %pc_hi20(.LCPI1_99) - fld.d $fa3, $a1, %pc_lo12(.LCPI1_99) - vreplgr2vr.d $vr4, $a0 - vfmul.d $vr2, $vr2, $vr4 - vreplvei.d $vr4, $vr2, 0 - fmul.d $fa3, $fa4, $fa3 - pcalau12i $a0, %pc_hi20(cos_s) - addi.d $a0, $a0, %pc_lo12(cos_s) - pcalau12i $a1, %pc_hi20(.LCPI1_100) - vld $vr4, $a1, %pc_lo12(.LCPI1_100) - fst.d $fa3, $a0, 144 + lu12i.w $a2, 349525 + ori $a2, $a2, 1365 + lu32i.d $a2, 349525 + lu52i.d $a4, $a2, 1021 + vreplgr2vr.d $vr3, $a4 + vfmul.d $vr2, $vr2, $vr3 + vreplvei.d $vr3, $vr2, 0 + lu12i.w $a3, -446778 + ori $a2, $a3, 2204 + lu32i.d $a2, -17942 + lu52i.d $a2, $a2, 1022 + movgr2fr.d $fa4, $a2 + fmul.d $fa3, $fa3, $fa4 + pcalau12i $a2, %pc_hi20(cos_s) + addi.d $a2, $a2, %pc_lo12(cos_s) + pcalau12i $a5, %pc_hi20(.LCPI1_46) + vld $vr4, $a5, %pc_lo12(.LCPI1_46) + fst.d $fa3, $a2, 144 vfmul.d $vr3, $vr2, $vr4 - pcalau12i $a1, %pc_hi20(.LCPI1_101) - vld $vr4, $a1, %pc_lo12(.LCPI1_101) - pcalau12i $a1, %pc_hi20(.LCPI1_102) - vld $vr5, $a1, %pc_lo12(.LCPI1_102) - vst $vr3, $a0, 0 + pcalau12i $a5, %pc_hi20(.LCPI1_47) + vld $vr4, $a5, %pc_lo12(.LCPI1_47) + pcalau12i $a5, %pc_hi20(.LCPI1_48) + vld $vr5, $a5, %pc_lo12(.LCPI1_48) + vst $vr3, $a2, 0 vfmul.d $vr3, $vr2, $vr4 - vst $vr3, $a0, 24 + vst $vr3, $a2, 24 vfmul.d $vr3, $vr2, $vr5 - pcalau12i $a1, %pc_hi20(.LCPI1_103) - vld $vr4, $a1, %pc_lo12(.LCPI1_103) - pcalau12i $a1, %pc_hi20(.LCPI1_104) - vld $vr5, $a1, %pc_lo12(.LCPI1_104) - vst $vr3, $a0, 48 + pcalau12i $a5, %pc_hi20(.LCPI1_49) + vld $vr4, $a5, %pc_lo12(.LCPI1_49) + pcalau12i $a5, %pc_hi20(.LCPI1_50) + vld $vr5, $a5, %pc_lo12(.LCPI1_50) + vst $vr3, $a2, 48 vfmul.d $vr3, $vr2, $vr4 - vst $vr3, $a0, 72 + vst $vr3, $a2, 72 vfmul.d $vr4, $vr2, $vr5 - pcalau12i $a1, %pc_hi20(.LCPI1_105) - vld $vr5, $a1, %pc_lo12(.LCPI1_105) - vst $vr4, $a0, 96 - pcalau12i $a1, %pc_hi20(.LCPI1_106) - vld $vr4, $a1, %pc_lo12(.LCPI1_106) + pcalau12i $a5, %pc_hi20(.LCPI1_51) + vld $vr5, $a5, %pc_lo12(.LCPI1_51) + vst $vr4, $a2, 96 + pcalau12i $a5, %pc_hi20(.LCPI1_52) + vld $vr4, $a5, %pc_lo12(.LCPI1_52) vfmul.d $vr5, $vr2, $vr5 - vst $vr5, $a0, 120 - vstelm.d $vr3, $a0, 152, 1 + vst $vr5, $a2, 120 + vstelm.d $vr3, $a2, 152, 1 vfmul.d $vr3, $vr2, $vr4 - pcalau12i $a1, %pc_hi20(.LCPI1_107) - vld $vr4, $a1, %pc_lo12(.LCPI1_107) - pcalau12i $a1, %pc_hi20(.LCPI1_108) - vld $vr5, $a1, %pc_lo12(.LCPI1_108) - vst $vr3, $a0, 168 + pcalau12i $a5, %pc_hi20(.LCPI1_53) + vld $vr4, $a5, %pc_lo12(.LCPI1_53) + pcalau12i $a5, %pc_hi20(.LCPI1_54) + vld $vr5, $a5, %pc_lo12(.LCPI1_54) + vst $vr3, $a2, 168 vfmul.d $vr3, $vr2, $vr4 - vst $vr3, $a0, 192 + vst $vr3, $a2, 192 vfmul.d $vr3, $vr2, $vr5 - pcalau12i $a1, %pc_hi20(.LCPI1_109) - vld $vr4, $a1, %pc_lo12(.LCPI1_109) - pcalau12i $a1, %pc_hi20(.LCPI1_110) - vld $vr5, $a1, %pc_lo12(.LCPI1_110) - vst $vr3, $a0, 216 + pcalau12i $a5, %pc_hi20(.LCPI1_55) + vld $vr4, $a5, %pc_lo12(.LCPI1_55) + pcalau12i $a5, %pc_hi20(.LCPI1_56) + vld $vr5, $a5, %pc_lo12(.LCPI1_56) + vst $vr3, $a2, 216 vfmul.d $vr3, $vr2, $vr4 - vst $vr3, $a0, 240 + vst $vr3, $a2, 240 vfmul.d $vr2, $vr2, $vr5 - pcalau12i $a1, %pc_hi20(.LCPI1_111) - fld.d $fa3, $a1, %pc_lo12(.LCPI1_111) - pcalau12i $a1, %pc_hi20(.LCPI1_112) - fld.d $fa4, $a1, %pc_lo12(.LCPI1_112) - vst $vr2, $a0, 264 - fmul.d $fa2, $fa6, $fa3 + vst $vr2, $a2, 264 + lu12i.w $a6, -477838 + ori $a5, $a6, 1171 + lu32i.d $a5, -433370 + lu52i.d $a7, $a5, 1022 + movgr2fr.d $fa2, $a7 + fmul.d $fa2, $fa6, $fa2 fmul.d $fa1, $fa2, $fa1 - fmul.d $fa1, $fa1, $fa4 - pcalau12i $a1, %pc_hi20(.LCPI1_113) - fld.d $fa2, $a1, %pc_lo12(.LCPI1_113) - pcalau12i $a1, %pc_hi20(.LCPI1_114) - fld.d $fa3, $a1, %pc_lo12(.LCPI1_114) - lu12i.w $a1, 343402 - ori $a1, $a1, 1652 - lu32i.d $a1, -487951 - lu52i.d $a1, $a1, 1022 - st.d $a1, $fp, 592 + movgr2fr.d $fa2, $a4 + fmul.d $fa1, $fa1, $fa2 + lu12i.w $a4, 343402 + ori $a4, $a4, 1652 + lu32i.d $a4, -487951 + lu52i.d $a4, $a4, 1022 + st.d $a4, $fp, 592 + lu12i.w $a4, 63194 + ori $a7, $a4, 725 + lu32i.d $a7, 46357 + lu52i.d $a7, $a7, 1020 + movgr2fr.d $fa2, $a7 + fmul.d $fa2, $fa1, $fa2 + fst.d $fa2, $a2, 16 + ori $a7, $a3, 2203 + lu32i.d $a7, -17942 + lu52i.d $a7, $a7, -1026 + movgr2fr.d $fa2, $a7 + fmul.d $fa2, $fa1, $fa2 + fst.d $fa2, $a2, 40 + ori $a7, $a1, 2413 + lu32i.d $a7, -492062 + lu52i.d $a7, $a7, -1027 + movgr2fr.d $fa2, $a7 fmul.d $fa2, $fa1, $fa2 - fst.d $fa2, $a0, 16 - fmul.d $fa2, $fa1, $fa3 - pcalau12i $a1, %pc_hi20(.LCPI1_115) - fld.d $fa3, $a1, %pc_lo12(.LCPI1_115) - pcalau12i $a1, %pc_hi20(.LCPI1_116) - fld.d $fa4, $a1, %pc_lo12(.LCPI1_116) - fst.d $fa2, $a0, 40 - fmul.d $fa2, $fa1, $fa3 - fst.d $fa2, $a0, 64 - fmul.d $fa2, $fa1, $fa4 - pcalau12i $a1, %pc_hi20(.LCPI1_117) - fld.d $fa3, $a1, %pc_lo12(.LCPI1_117) - pcalau12i $a1, %pc_hi20(.LCPI1_118) - fld.d $fa4, $a1, %pc_lo12(.LCPI1_118) - fst.d $fa2, $a0, 88 - fmul.d $fa2, $fa1, $fa3 - fst.d $fa2, $a0, 112 - fmul.d $fa2, $fa1, $fa4 - pcalau12i $a1, %pc_hi20(.LCPI1_119) - fld.d $fa3, $a1, %pc_lo12(.LCPI1_119) - pcalau12i $a1, %pc_hi20(.LCPI1_120) - fld.d $fa4, $a1, %pc_lo12(.LCPI1_120) - fst.d $fa2, $a0, 136 - fmul.d $fa2, $fa1, $fa3 - fst.d $fa2, $a0, 160 - fmul.d $fa2, $fa1, $fa4 - pcalau12i $a1, %pc_hi20(.LCPI1_121) - fld.d $fa3, $a1, %pc_lo12(.LCPI1_121) - fst.d $fa2, $a0, 184 + fst.d $fa2, $a2, 64 + ori $a0, $a0, 3395 + lu32i.d $a0, -159637 + lu52i.d $a0, $a0, -1026 + movgr2fr.d $fa2, $a0 + fmul.d $fa2, $fa1, $fa2 + fst.d $fa2, $a2, 88 + lu12i.w $a0, 260433 + ori $a7, $a0, 1011 + lu32i.d $a7, 228089 + lu52i.d $a7, $a7, 1022 + movgr2fr.d $fa2, $a7 + fmul.d $fa2, $fa1, $fa2 + fst.d $fa2, $a2, 112 + ori $a6, $a6, 1156 + lu32i.d $a6, -433370 + lu52i.d $a6, $a6, -1026 + movgr2fr.d $fa2, $a6 + fmul.d $fa2, $fa1, $fa2 + fst.d $fa2, $a2, 136 + lu52i.d $a5, $a5, -1026 + movgr2fr.d $fa2, $a5 + fmul.d $fa2, $fa1, $fa2 + fst.d $fa2, $a2, 160 + ori $a0, $a0, 984 + lu32i.d $a0, 228089 + lu52i.d $a0, $a0, -1026 + movgr2fr.d $fa2, $a0 + fmul.d $fa2, $fa1, $fa2 + fst.d $fa2, $a2, 184 + fmul.d $fa0, $fa1, $fa0 + fst.d $fa0, $a2, 208 + ori $a0, $a1, 2393 + lu32i.d $a0, -492062 + lu52i.d $a0, $a0, -1027 + movgr2fr.d $fa0, $a0 + fmul.d $fa0, $fa1, $fa0 + fst.d $fa0, $a2, 232 + ori $a0, $a3, 2205 + lu32i.d $a0, -17942 + lu52i.d $a0, $a0, -1026 + movgr2fr.d $fa0, $a0 + fmul.d $fa0, $fa1, $fa0 + fst.d $fa0, $a2, 256 + ori $a0, $a4, 638 + lu32i.d $a0, 46357 + lu52i.d $a0, $a0, -1028 + movgr2fr.d $fa0, $a0 fmul.d $fa0, $fa1, $fa0 - fst.d $fa0, $a0, 208 - fmul.d $fa0, $fa1, $fa3 - pcalau12i $a1, %pc_hi20(.LCPI1_122) - fld.d $fa2, $a1, %pc_lo12(.LCPI1_122) - pcalau12i $a1, %pc_hi20(.LCPI1_123) - fld.d $fa3, $a1, %pc_lo12(.LCPI1_123) - fst.d $fa0, $a0, 232 - fmul.d $fa0, $fa1, $fa2 - fst.d $fa0, $a0, 256 - fmul.d $fa0, $fa1, $fa3 - fst.d $fa0, $a0, 280 - fld.d $fs7, $sp, 392 # 8-byte Folded Reload - fld.d $fs6, $sp, 400 # 8-byte Folded Reload - fld.d $fs5, $sp, 408 # 8-byte Folded Reload - fld.d $fs4, $sp, 416 # 8-byte Folded Reload - fld.d $fs3, $sp, 424 # 8-byte Folded Reload - fld.d $fs2, $sp, 432 # 8-byte Folded Reload - fld.d $fs1, $sp, 440 # 8-byte Folded Reload - fld.d $fs0, $sp, 448 # 8-byte Folded Reload - ld.d $s6, $sp, 456 # 8-byte Folded Reload - ld.d $s5, $sp, 464 # 8-byte Folded Reload - ld.d $s4, $sp, 472 # 8-byte Folded Reload - ld.d $s3, $sp, 480 # 8-byte Folded Reload - ld.d $s2, $sp, 488 # 8-byte Folded Reload - ld.d $s1, $sp, 496 # 8-byte Folded Reload - ld.d $s0, $sp, 504 # 8-byte Folded Reload - ld.d $fp, $sp, 512 # 8-byte Folded Reload - ld.d $ra, $sp, 520 # 8-byte Folded Reload - addi.d $sp, $sp, 528 + fst.d $fa0, $a2, 280 + fld.d $fs7, $sp, 376 # 8-byte Folded Reload + fld.d $fs6, $sp, 384 # 8-byte Folded Reload + fld.d $fs5, $sp, 392 # 8-byte Folded Reload + fld.d $fs4, $sp, 400 # 8-byte Folded Reload + fld.d $fs3, $sp, 408 # 8-byte Folded Reload + fld.d $fs2, $sp, 416 # 8-byte Folded Reload + fld.d $fs1, $sp, 424 # 8-byte Folded Reload + fld.d $fs0, $sp, 432 # 8-byte Folded Reload + ld.d $s6, $sp, 440 # 8-byte Folded Reload + ld.d $s5, $sp, 448 # 8-byte Folded Reload + ld.d $s4, $sp, 456 # 8-byte Folded Reload + ld.d $s3, $sp, 464 # 8-byte Folded Reload + ld.d $s2, $sp, 472 # 8-byte Folded Reload + ld.d $s1, $sp, 480 # 8-byte Folded Reload + ld.d $s0, $sp, 488 # 8-byte Folded Reload + ld.d $fp, $sp, 496 # 8-byte Folded Reload + ld.d $ra, $sp, 504 # 8-byte Folded Reload + addi.d $sp, $sp, 512 ret .Lfunc_end1: .size mdct_init48, .Lfunc_end1-mdct_init48 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/parse.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/parse.s index 3b8314e9..ce4efe37 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/parse.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/parse.s @@ -1067,35 +1067,30 @@ lame_presets_info: # @lame_presets_info .word 8 # 0x8 .word 56 # 0x38 .word 3700 # 0xe74 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI3_7: - .dword 0x3690000000000000 # double 7.0064923216240854E-46 -.LCPI3_8: - .dword 0x408f400000000000 # double 1000 -.LCPI3_9: - .dword 0x3feffffff0000000 # double 0.99999997019767761 .text .globl lame_parse_args .p2align 5 .type lame_parse_args,@function lame_parse_args: # @lame_parse_args # %bb.0: - addi.d $sp, $sp, -224 - st.d $ra, $sp, 216 # 8-byte Folded Spill - st.d $fp, $sp, 208 # 8-byte Folded Spill - st.d $s0, $sp, 200 # 8-byte Folded Spill - st.d $s1, $sp, 192 # 8-byte Folded Spill - st.d $s2, $sp, 184 # 8-byte Folded Spill - st.d $s3, $sp, 176 # 8-byte Folded Spill - st.d $s4, $sp, 168 # 8-byte Folded Spill - st.d $s5, $sp, 160 # 8-byte Folded Spill - st.d $s6, $sp, 152 # 8-byte Folded Spill - st.d $s7, $sp, 144 # 8-byte Folded Spill - st.d $s8, $sp, 136 # 8-byte Folded Spill + addi.d $sp, $sp, -240 + st.d $ra, $sp, 232 # 8-byte Folded Spill + st.d $fp, $sp, 224 # 8-byte Folded Spill + st.d $s0, $sp, 216 # 8-byte Folded Spill + st.d $s1, $sp, 208 # 8-byte Folded Spill + st.d $s2, $sp, 200 # 8-byte Folded Spill + st.d $s3, $sp, 192 # 8-byte Folded Spill + st.d $s4, $sp, 184 # 8-byte Folded Spill + st.d $s5, $sp, 176 # 8-byte Folded Spill + st.d $s6, $sp, 168 # 8-byte Folded Spill + st.d $s7, $sp, 160 # 8-byte Folded Spill + st.d $s8, $sp, 152 # 8-byte Folded Spill + fst.d $fs0, $sp, 144 # 8-byte Folded Spill + fst.d $fs1, $sp, 136 # 8-byte Folded Spill + fst.d $fs2, $sp, 128 # 8-byte Folded Spill move $s4, $a2 ld.d $a2, $a2, 0 - st.d $a2, $sp, 80 # 8-byte Folded Spill + st.d $a2, $sp, 72 # 8-byte Folded Spill move $s5, $a1 move $fp, $a0 pcalau12i $a0, %pc_hi20(inPath) @@ -1104,7 +1099,7 @@ lame_parse_args: # @lame_parse_args pcalau12i $a0, %pc_hi20(outPath) addi.d $s2, $a0, %pc_lo12(outPath) st.b $zero, $s2, 0 - st.d $a1, $sp, 112 # 8-byte Folded Spill + st.d $a1, $sp, 104 # 8-byte Folded Spill st.d $a1, $fp, 128 st.d $s2, $fp, 136 pcalau12i $a0, %got_pc_hi20(id3tag) @@ -1116,31 +1111,37 @@ lame_parse_args: # @lame_parse_args st.w $zero, $s1, 0 blt $s5, $a0, .LBB3_159 # %bb.1: # %.lr.ph420 - st.d $s2, $sp, 88 # 8-byte Folded Spill - st.d $s1, $sp, 72 # 8-byte Folded Spill + st.d $s2, $sp, 80 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.110) addi.d $a0, $a0, %pc_lo12(.L.str.110) - st.d $a0, $sp, 120 # 8-byte Folded Spill - st.d $zero, $sp, 104 # 8-byte Folded Spill + st.d $a0, $sp, 112 # 8-byte Folded Spill st.d $zero, $sp, 96 # 8-byte Folded Spill + st.d $zero, $sp, 88 # 8-byte Folded Spill move $s3, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs0, $a0 + lu12i.w $a0, -65536 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs1, $a0 ori $a0, $zero, 160 lu32i.d $a0, 12000 - st.d $a0, $sp, 64 # 8-byte Folded Spill + st.d $a0, $sp, 56 # 8-byte Folded Spill + lu52i.d $a0, $zero, 873 + movgr2fr.d $fs2, $a0 ori $a0, $zero, 260 lu32i.d $a0, 300 - st.d $a0, $sp, 56 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill lu12i.w $a0, 3 ori $a0, $a0, 3712 - st.d $a0, $sp, 48 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill ori $a0, $zero, 100 lu32i.d $a0, 2000 - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill lu12i.w $a0, 5 ori $a0, $a0, 3520 - st.d $a0, $sp, 32 # 8-byte Folded Spill - ori $a0, $zero, 0 - lu32i.d $a0, 1 st.d $a0, $sp, 24 # 8-byte Folded Spill ori $s2, $zero, 57 ori $s0, $zero, 1 @@ -1152,12 +1153,12 @@ lame_parse_args: # @lame_parse_args ldx.d $s8, $s4, $a0 ld.bu $a0, $s8, 0 addi.d $s1, $s8, 1 - st.d $s1, $sp, 128 + st.d $s1, $sp, 120 ori $a1, $zero, 45 bne $a0, $a1, .LBB3_6 # %bb.3: # in Loop: Header=BB3_2 Depth=1 addi.w $a0, $s3, 2 - ld.d $s6, $sp, 120 # 8-byte Folded Reload + ld.d $s6, $sp, 112 # 8-byte Folded Reload blt $a0, $s5, .LBB3_9 # %bb.4: # in Loop: Header=BB3_2 Depth=1 ld.bu $a1, $s1, 0 @@ -1185,11 +1186,11 @@ lame_parse_args: # @lame_parse_args ld.bu $a1, $s1, 0 bnez $a1, .LBB3_5 .LBB3_10: # in Loop: Header=BB3_2 Depth=1 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.bu $a1, $a0, 0 beqz $a1, .LBB3_16 # %bb.11: # in Loop: Header=BB3_2 Depth=1 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload ld.bu $a1, $a0, 0 beqz $a1, .LBB3_16 # %bb.12: # %thread-pre-split @@ -1223,7 +1224,7 @@ lame_parse_args: # @lame_parse_args ori $a0, $zero, 45 bne $a1, $a0, .LBB3_62 # %bb.18: # in Loop: Header=BB3_2 Depth=1 - st.d $s7, $sp, 128 + st.d $s7, $sp, 120 pcalau12i $a0, %pc_hi20(.L.str.111) addi.d $a1, $a0, %pc_lo12(.L.str.111) move $a0, $s7 @@ -1478,7 +1479,8 @@ lame_parse_args: # @lame_parse_args lu32i.d $a0, 0 st.w $a0, $fp, 108 vst $vr0, $fp, 92 - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ori $a0, $zero, 0 + lu32i.d $a0, 1 st.d $a0, $fp, 36 ori $a0, $zero, 2 st.w $a0, $fp, 28 @@ -1503,7 +1505,7 @@ lame_parse_args: # @lame_parse_args ori $a2, $zero, 9 masknez $a1, $a2, $a1 or $a0, $a0, $a1 - st.d $a0, $sp, 104 # 8-byte Folded Spill + st.d $a0, $sp, 96 # 8-byte Folded Spill .p2align 4, , 16 .LBB3_64: # in Loop: Header=BB3_66 Depth=2 xor $a0, $s8, $s7 @@ -1571,7 +1573,7 @@ lame_parse_args: # @lame_parse_args lu32i.d $a0, 1 st.d $a0, $fp, 36 ori $a0, $zero, 1 - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill b .LBB3_65 .LBB3_76: # in Loop: Header=BB3_66 Depth=2 ori $a0, $zero, 9 @@ -1674,12 +1676,14 @@ lame_parse_args: # @lame_parse_args move $a1, $zero pcaddu18i $ra, %call36(strtod) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI3_8) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_8) fcvt.s.d $fa0, $fa0 fcvt.d.s $fa0, $fa0 - vldi $vr2, -928 - fmadd.d $fa0, $fa0, $fa1, $fa2 + vldi $vr1, -928 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 st.w $a0, $fp, 12 @@ -1746,7 +1750,7 @@ lame_parse_args: # @lame_parse_args ld.d $a0, $a0, 0 pcalau12i $a1, %pc_hi20(.L.str.151) addi.d $a1, $a1, %pc_lo12(.L.str.151) - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload move $a3, $s8 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 @@ -1758,7 +1762,7 @@ lame_parse_args: # @lame_parse_args ext.w.b $a3, $a1 pcalau12i $a1, %pc_hi20(.L.str.153) addi.d $a1, $a1, %pc_lo12(.L.str.153) - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ori $s1, $zero, 1 @@ -1784,7 +1788,7 @@ lame_parse_args: # @lame_parse_args pcalau12i $a1, %pc_hi20(.L.str.150) addi.d $a1, $a1, %pc_lo12(.L.str.150) .LBB3_113: # in Loop: Header=BB3_66 Depth=2 - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload move $a3, $s8 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 @@ -1795,18 +1799,14 @@ lame_parse_args: # @lame_parse_args move $a1, $zero pcaddu18i $ra, %call36(strtod) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI3_8) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_8) - fcvt.s.d $fa2, $fa0 - fcvt.d.s $fa2, $fa2 - vldi $vr3, -928 - fmadd.d $fa1, $fa2, $fa1, $fa3 - pcalau12i $a0, %pc_hi20(.LCPI3_9) - fld.d $fa2, $a0, %pc_lo12(.LCPI3_9) + fcvt.s.d $fa1, $fa0 + fcvt.d.s $fa1, $fa1 + vldi $vr2, -928 + fmadd.d $fa1, $fa1, $fs0, $fa2 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a0, $fa1 st.w $a0, $fp, 16 - fcmp.cule.d $fcc0, $fa2, $fa0 + fcmp.cule.d $fcc0, $fs1, $fa0 ori $s3, $zero, 1 bcnez $fcc0, .LBB3_119 b .LBB3_184 @@ -1817,7 +1817,7 @@ lame_parse_args: # @lame_parse_args b .LBB3_119 .LBB3_116: # in Loop: Header=BB3_2 Depth=1 move $s3, $zero - ld.d $a0, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $sp, 56 # 8-byte Folded Reload st.d $a0, $fp, 100 b .LBB3_118 .LBB3_117: # in Loop: Header=BB3_2 Depth=1 @@ -1858,10 +1858,8 @@ lame_parse_args: # @lame_parse_args move $a1, $zero pcaddu18i $ra, %call36(strtod) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI3_8) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_8) - vldi $vr2, -928 - fmadd.d $fa0, $fa0, $fa1, $fa2 + vldi $vr1, -928 + fmadd.d $fa0, $fa0, $fs0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 st.w $a0, $fp, 104 @@ -1873,10 +1871,8 @@ lame_parse_args: # @lame_parse_args move $a1, $zero pcaddu18i $ra, %call36(strtod) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI3_8) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_8) - vldi $vr2, -928 - fmadd.d $fa0, $fa0, $fa1, $fa2 + vldi $vr1, -928 + fmadd.d $fa0, $fa0, $fs0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 st.w $a0, $fp, 112 @@ -1885,7 +1881,7 @@ lame_parse_args: # @lame_parse_args b .LBB3_186 .LBB3_127: # in Loop: Header=BB3_2 Depth=1 ori $s3, $zero, 1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload st.w $s3, $a0, 0 addi.d $a0, $a0, 8 b .LBB3_134 @@ -1894,10 +1890,8 @@ lame_parse_args: # @lame_parse_args move $a1, $zero pcaddu18i $ra, %call36(strtod) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI3_8) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_8) - vldi $vr2, -928 - fmadd.d $fa0, $fa0, $fa1, $fa2 + vldi $vr1, -928 + fmadd.d $fa0, $fa0, $fs0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 st.w $a0, $fp, 108 @@ -1906,13 +1900,13 @@ lame_parse_args: # @lame_parse_args b .LBB3_187 .LBB3_129: # in Loop: Header=BB3_2 Depth=1 ori $s3, $zero, 1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload st.w $s3, $a0, 0 addi.d $a0, $a0, 39 b .LBB3_134 .LBB3_130: # in Loop: Header=BB3_2 Depth=1 ori $s3, $zero, 1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload st.w $s3, $a0, 0 addi.d $a0, $a0, 70 b .LBB3_134 @@ -1921,10 +1915,8 @@ lame_parse_args: # @lame_parse_args move $a1, $zero pcaddu18i $ra, %call36(strtod) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI3_8) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_8) - vldi $vr2, -928 - fmadd.d $fa0, $fa0, $fa1, $fa2 + vldi $vr1, -928 + fmadd.d $fa0, $fa0, $fs0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 st.w $a0, $fp, 116 @@ -1933,14 +1925,14 @@ lame_parse_args: # @lame_parse_args b .LBB3_188 .LBB3_132: # in Loop: Header=BB3_2 Depth=1 ori $s3, $zero, 1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload st.w $s3, $a0, 0 addi.d $a0, $a0, 101 ori $a2, $zero, 4 b .LBB3_135 .LBB3_133: # in Loop: Header=BB3_2 Depth=1 ori $s3, $zero, 1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload st.w $s3, $a0, 0 addi.d $a0, $a0, 106 .LBB3_134: # in Loop: Header=BB3_2 Depth=1 @@ -1955,17 +1947,15 @@ lame_parse_args: # @lame_parse_args move $a1, $zero pcaddu18i $ra, %call36(strtod) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI3_7) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_7) - fcvt.s.d $fa2, $fa0 - fst.s $fa2, $fp, 152 - fcmp.cult.d $fcc0, $fa1, $fa0 + fcvt.s.d $fa1, $fa0 + fst.s $fa1, $fp, 152 + fcmp.cult.d $fcc0, $fs2, $fa0 ori $s3, $zero, 1 bcnez $fcc0, .LBB3_119 b .LBB3_189 .LBB3_137: # in Loop: Header=BB3_2 Depth=1 ori $s3, $zero, 1 - ld.d $s1, $sp, 72 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload st.w $s3, $s1, 0 ori $a2, $zero, 10 move $a0, $s6 @@ -1985,12 +1975,12 @@ lame_parse_args: # @lame_parse_args st.b $a0, $s1, 266 b .LBB3_119 .LBB3_138: # in Loop: Header=BB3_2 Depth=1 - addi.d $a1, $sp, 128 + addi.d $a1, $sp, 120 ori $a2, $zero, 10 move $a0, $s6 pcaddu18i $ra, %call36(strtol) jirl $ra, $ra, 0 - ld.d $a1, $sp, 128 + ld.d $a1, $sp, 120 move $s7, $a0 pcalau12i $a0, %got_pc_hi20(genre_last) ld.d $a0, $a0, %got_pc_lo12(genre_last) @@ -2002,7 +1992,7 @@ lame_parse_args: # @lame_parse_args blt $s1, $a0, .LBB3_157 .LBB3_140: # in Loop: Header=BB3_2 Depth=1 ori $s3, $zero, 1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload st.w $s3, $a0, 0 st.b $s7, $a0, 265 b .LBB3_119 @@ -2011,14 +2001,14 @@ lame_parse_args: # @lame_parse_args vld $vr0, $a0, %pc_lo12(.LCPI3_6) ori $a0, $zero, 16 st.w $a0, $fp, 48 - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 48 # 8-byte Folded Reload st.d $a0, $fp, 108 ori $a0, $zero, 40 st.w $a0, $fp, 116 vst $vr0, $fp, 92 ori $s3, $zero, 1 st.w $s3, $fp, 160 - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 40 # 8-byte Folded Reload st.w $a0, $fp, 16 ori $a0, $zero, 3 b .LBB3_155 @@ -2027,7 +2017,7 @@ lame_parse_args: # @lame_parse_args vld $vr0, $a0, %pc_lo12(.LCPI3_5) ori $a0, $zero, 56 st.w $a0, $fp, 48 - ld.d $a0, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 32 # 8-byte Folded Reload st.d $a0, $fp, 108 ori $a0, $zero, 20 st.w $a0, $fp, 116 @@ -2037,7 +2027,7 @@ lame_parse_args: # @lame_parse_args ori $a0, $zero, 3 lu32i.d $a0, 1 st.d $a0, $fp, 36 - ld.d $a0, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload st.w $a0, $fp, 16 b .LBB3_156 .LBB3_143: # %.preheader @@ -2105,7 +2095,7 @@ lame_parse_args: # @lame_parse_args ld.d $a0, $a0, 0 pcalau12i $a1, %pc_hi20(.L.str.148) addi.d $a1, $a1, %pc_lo12(.L.str.148) - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload move $a3, $s7 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 @@ -2156,15 +2146,15 @@ lame_parse_args: # @lame_parse_args .LBB3_160: # %._crit_edge.loopexit.loopexit sltu $a0, $zero, $s1 .LBB3_161: # %._crit_edge.loopexit - ld.d $a1, $sp, 96 # 8-byte Folded Reload + ld.d $a1, $sp, 88 # 8-byte Folded Reload sltui $s0, $a1, 1 - ld.d $s1, $sp, 72 # 8-byte Folded Reload - ld.d $s2, $sp, 88 # 8-byte Folded Reload - ld.d $s3, $sp, 104 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s2, $sp, 80 # 8-byte Folded Reload + ld.d $s3, $sp, 96 # 8-byte Folded Reload .LBB3_162: # %._crit_edge bnez $a0, .LBB3_183 # %bb.163: # %._crit_edge - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.bu $a0, $a0, 0 beqz $a0, .LBB3_183 # %bb.164: @@ -2194,7 +2184,7 @@ lame_parse_args: # @lame_parse_args ori $a1, $zero, 3 beq $a0, $a1, .LBB3_172 # %bb.171: - ld.d $s1, $sp, 112 # 8-byte Folded Reload + ld.d $s1, $sp, 104 # 8-byte Folded Reload move $a0, $s1 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 @@ -2248,18 +2238,21 @@ lame_parse_args: # @lame_parse_args .LBB3_178: st.w $s3, $fp, 28 .LBB3_179: - ld.d $s8, $sp, 136 # 8-byte Folded Reload - ld.d $s7, $sp, 144 # 8-byte Folded Reload - ld.d $s6, $sp, 152 # 8-byte Folded Reload - ld.d $s5, $sp, 160 # 8-byte Folded Reload - ld.d $s4, $sp, 168 # 8-byte Folded Reload - ld.d $s3, $sp, 176 # 8-byte Folded Reload - ld.d $s2, $sp, 184 # 8-byte Folded Reload - ld.d $s1, $sp, 192 # 8-byte Folded Reload - ld.d $s0, $sp, 200 # 8-byte Folded Reload - ld.d $fp, $sp, 208 # 8-byte Folded Reload - ld.d $ra, $sp, 216 # 8-byte Folded Reload - addi.d $sp, $sp, 224 + fld.d $fs2, $sp, 128 # 8-byte Folded Reload + fld.d $fs1, $sp, 136 # 8-byte Folded Reload + fld.d $fs0, $sp, 144 # 8-byte Folded Reload + ld.d $s8, $sp, 152 # 8-byte Folded Reload + ld.d $s7, $sp, 160 # 8-byte Folded Reload + ld.d $s6, $sp, 168 # 8-byte Folded Reload + ld.d $s5, $sp, 176 # 8-byte Folded Reload + ld.d $s4, $sp, 184 # 8-byte Folded Reload + ld.d $s3, $sp, 192 # 8-byte Folded Reload + ld.d $s2, $sp, 200 # 8-byte Folded Reload + ld.d $s1, $sp, 208 # 8-byte Folded Reload + ld.d $s0, $sp, 216 # 8-byte Folded Reload + ld.d $fp, $sp, 224 # 8-byte Folded Reload + ld.d $ra, $sp, 232 # 8-byte Folded Reload + addi.d $sp, $sp, 240 ret .LBB3_180: st.w $zero, $s1, 0 @@ -2279,7 +2272,7 @@ lame_parse_args: # @lame_parse_args ld.d $a0, $a0, 0 pcalau12i $a1, %pc_hi20(.L.str.154) addi.d $a1, $a1, %pc_lo12(.L.str.154) - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload move $a3, $s8 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 @@ -2287,11 +2280,11 @@ lame_parse_args: # @lame_parse_args b .LBB3_161 .LBB3_182: move $a0, $fp - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 72 # 8-byte Folded Reload pcaddu18i $ra, %call36(lame_help) jirl $ra, $ra, 0 .LBB3_183: - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 72 # 8-byte Folded Reload pcaddu18i $ra, %call36(lame_usage) jirl $ra, $ra, 0 .LBB3_184: @@ -2388,7 +2381,7 @@ lame_parse_args: # @lame_parse_args ld.d $a0, $a0, 0 pcalau12i $a1, %pc_hi20(.L.str.147) addi.d $a1, $a1, %pc_lo12(.L.str.147) - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload move $a3, $s6 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/psymodel.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/psymodel.s index 9ac7025b..77d9f198 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/psymodel.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/psymodel.s @@ -1,82 +1,52 @@ .file "psymodel.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function L3psycho_anal -.LCPI0_0: - .dword 0x408f400000000000 # double 1000 -.LCPI0_1: - .dword 0x4090000000000000 # double 1024 -.LCPI0_2: - .dword 0x416153d9a3000000 # double 9084621.09375 -.LCPI0_19: - .dword 0x3fd0137987dd704c # double 0.25118864315095801 -.LCPI0_20: - .dword 0x3fcd791c5f888823 # double 0.23025850929940458 -.LCPI0_21: - .dword 0x3fd999999999999a # double 0.40000000000000002 -.LCPI0_22: - .dword 0x3fb0270ac3f8a9f9 # double 0.063095734448019317 -.LCPI0_23: - .dword 0x3fa8f6869e6f084d # double 0.048755843010000001 -.LCPI0_24: - .dword 0x3fdfedfbdeea22f7 # double 0.49890038269999998 -.LCPI0_25: - .dword 0x3fea6ff6e4078667 # double 0.82616753136626364 -.LCPI0_26: - .dword 0x3ff30298b36105e3 # double 1.1881339079849276 -.LCPI0_27: - .dword 0x40a7700000000000 # double 3000 -.LCPI0_28: - .dword 0x3ff947ae147ae148 # double 1.5800000000000001 -.LCPI0_29: - .dword 0x3fe6666666666666 # double 0.69999999999999996 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI0_3: + .p2align 4, 0x0 # -- Begin function L3psycho_anal +.LCPI0_0: .dword 0x3f69e7c6e43390b7 # double 0.0031622776601683794 .dword 0x3f6c9323c534cddb # double 0.0034881304397005159 -.LCPI0_4: +.LCPI0_1: .dword 0x3f730c0cd2dc51a4 # double 0.0046501637387063345 .dword 0x3f7e1800bd1fad13 # double 0.0073471096857063043 -.LCPI0_5: +.LCPI0_2: .dword 0x3f8b4f7e2b2c2a91 # double 0.013335214321633234 .dword 0x3f9b56adfe1a8d8a # double 0.026697844156513585 -.LCPI0_6: +.LCPI0_3: .dword 0x3faccab8602d2692 # double 0.056234132519034877 .dword 0x3fbe5289b7ccc08b # double 0.11844692933368799 -.LCPI0_7: +.LCPI0_4: .dword 0x3fce5a84719edcce # double 0.23713737056616541 .dword 0x3fdb8bdb0dfb81ef # double 0.43041111340974586 -.LCPI0_8: +.LCPI0_5: .dword 0x3fe5c2da5de54871 # double 0.68003576601974036 .dword 0x3fed02b917109c2c # double 0.90658239846096 -.LCPI0_9: +.LCPI0_6: .dword 0x3f69e7c6e43390b7 # double 0.0031622776601683794 .dword 0x3f6ac06f83763a3b # double 0.0032655885694972387 -.LCPI0_10: +.LCPI0_7: .dword 0x3f6d706639d5c887 # double 0.0035936352348296427 .dword 0x3f71397ea6f6b13a # double 0.0042052218772535236 -.LCPI0_11: +.LCPI0_8: .dword 0x3f755ba8c82a8ba8 # double 0.0052143662493645895 .dword 0x3f7beda7a0c5893e # double 0.0068184421510031883 -.LCPI0_12: +.LCPI0_9: .dword 0x3f832421767ffbc6 # double 0.0093462576187429806 .dword 0x3f8b4f7e2b2c2a91 # double 0.013335214321633234 -.LCPI0_13: +.LCPI0_10: .dword 0x3f941eab43952c27 # double 0.019648242950364072 .dword 0x3f9e596c3b309b4b # double 0.029637995827353419 -.LCPI0_14: +.LCPI0_11: .dword 0x3fa73840d9d607da # double 0.045351053801837529 .dword 0x3fb1d9c04928942a # double 0.069728868351902534 -.LCPI0_15: +.LCPI0_12: .dword 0x3fbb507a530a8d1b # double 0.10669674422620228 .dword 0x3fc499d4c1c1c1dc # double 0.16094455204758062 -.LCPI0_16: +.LCPI0_13: .dword 0x3fce5a84719edcce # double 0.23713737056616541 .dword 0x3fd5a779ebdbd985 # double 0.33834693940243482 -.LCPI0_17: +.LCPI0_14: .dword 0x3fddae9ef4bedb09 # double 0.4637830152600349 .dword 0x3fe36813fc23e21a # double 0.60645484205366817 -.LCPI0_18: +.LCPI0_15: .dword 0x3fe81049c5a7f437 # double 0.75198830227566915 .dword 0x3fec28aef2028a7f # double 0.87996623294414167 .text @@ -85,100 +55,92 @@ .type L3psycho_anal,@function L3psycho_anal: # @L3psycho_anal # %bb.0: - addi.d $sp, $sp, -1248 - st.d $ra, $sp, 1240 # 8-byte Folded Spill - st.d $fp, $sp, 1232 # 8-byte Folded Spill - st.d $s0, $sp, 1224 # 8-byte Folded Spill - st.d $s1, $sp, 1216 # 8-byte Folded Spill - st.d $s2, $sp, 1208 # 8-byte Folded Spill - st.d $s3, $sp, 1200 # 8-byte Folded Spill - st.d $s4, $sp, 1192 # 8-byte Folded Spill - st.d $s5, $sp, 1184 # 8-byte Folded Spill - st.d $s6, $sp, 1176 # 8-byte Folded Spill - st.d $s7, $sp, 1168 # 8-byte Folded Spill - st.d $s8, $sp, 1160 # 8-byte Folded Spill - fst.d $fs0, $sp, 1152 # 8-byte Folded Spill - fst.d $fs1, $sp, 1144 # 8-byte Folded Spill - fst.d $fs2, $sp, 1136 # 8-byte Folded Spill - fst.d $fs3, $sp, 1128 # 8-byte Folded Spill - fst.d $fs4, $sp, 1120 # 8-byte Folded Spill - fst.d $fs5, $sp, 1112 # 8-byte Folded Spill - fst.d $fs6, $sp, 1104 # 8-byte Folded Spill - fst.d $fs7, $sp, 1096 # 8-byte Folded Spill - move $s8, $a7 - move $s1, $a6 - st.d $a5, $sp, 104 # 8-byte Folded Spill - st.d $a4, $sp, 96 # 8-byte Folded Spill - st.d $a3, $sp, 88 # 8-byte Folded Spill - st.d $a1, $sp, 192 # 8-byte Folded Spill + addi.d $sp, $sp, -1296 + st.d $ra, $sp, 1288 # 8-byte Folded Spill + st.d $fp, $sp, 1280 # 8-byte Folded Spill + st.d $s0, $sp, 1272 # 8-byte Folded Spill + st.d $s1, $sp, 1264 # 8-byte Folded Spill + st.d $s2, $sp, 1256 # 8-byte Folded Spill + st.d $s3, $sp, 1248 # 8-byte Folded Spill + st.d $s4, $sp, 1240 # 8-byte Folded Spill + st.d $s5, $sp, 1232 # 8-byte Folded Spill + st.d $s6, $sp, 1224 # 8-byte Folded Spill + st.d $s7, $sp, 1216 # 8-byte Folded Spill + st.d $s8, $sp, 1208 # 8-byte Folded Spill + fst.d $fs0, $sp, 1200 # 8-byte Folded Spill + fst.d $fs1, $sp, 1192 # 8-byte Folded Spill + fst.d $fs2, $sp, 1184 # 8-byte Folded Spill + fst.d $fs3, $sp, 1176 # 8-byte Folded Spill + fst.d $fs4, $sp, 1168 # 8-byte Folded Spill + fst.d $fs5, $sp, 1160 # 8-byte Folded Spill + fst.d $fs6, $sp, 1152 # 8-byte Folded Spill + fst.d $fs7, $sp, 1144 # 8-byte Folded Spill + st.d $a5, $sp, 96 # 8-byte Folded Spill + st.d $a4, $sp, 88 # 8-byte Folded Spill + st.d $a3, $sp, 80 # 8-byte Folded Spill + move $s5, $a2 + st.d $a1, $sp, 184 # 8-byte Folded Spill move $s3, $a0 pcalau12i $a0, %pc_hi20(L3psycho_anal.cw_lower_index) st.d $a0, $sp, 312 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_0) - st.d $a0, $sp, 112 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(L3psycho_anal.cw_upper_index) st.d $a0, $sp, 320 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(L3psycho_anal.cw) - addi.d $a0, $a0, %pc_lo12(L3psycho_anal.cw) - st.d $a0, $sp, 424 # 8-byte Folded Spill + addi.d $s0, $a0, %pc_lo12(L3psycho_anal.cw) pcalau12i $a0, %pc_hi20(L3psycho_anal.w2_s) addi.d $a0, $a0, %pc_lo12(L3psycho_anal.w2_s) - st.d $a0, $sp, 304 # 8-byte Folded Spill + st.d $a0, $sp, 352 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(L3psycho_anal.w1_s) addi.d $a0, $a0, %pc_lo12(L3psycho_anal.w1_s) - st.d $a0, $sp, 296 # 8-byte Folded Spill + st.d $a0, $sp, 304 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(L3psycho_anal.bo_s) addi.d $s4, $a0, %pc_lo12(L3psycho_anal.bo_s) pcalau12i $a0, %pc_hi20(L3psycho_anal.bu_s) addi.d $s6, $a0, %pc_lo12(L3psycho_anal.bu_s) pcalau12i $a0, %pc_hi20(L3psycho_anal.w2_l) addi.d $a0, $a0, %pc_lo12(L3psycho_anal.w2_l) - st.d $a0, $sp, 288 # 8-byte Folded Spill + st.d $a0, $sp, 344 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(L3psycho_anal.w1_l) addi.d $a0, $a0, %pc_lo12(L3psycho_anal.w1_l) - st.d $a0, $sp, 280 # 8-byte Folded Spill + st.d $a0, $sp, 296 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(L3psycho_anal.bo_l) addi.d $a0, $a0, %pc_lo12(L3psycho_anal.bo_l) - st.d $a0, $sp, 344 # 8-byte Folded Spill + st.d $a0, $sp, 336 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(L3psycho_anal.bu_l) addi.d $a0, $a0, %pc_lo12(L3psycho_anal.bu_l) - st.d $a0, $sp, 272 # 8-byte Folded Spill + st.d $a0, $sp, 288 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(L3psycho_anal.npart_l_orig) - st.d $a0, $sp, 264 # 8-byte Folded Spill + st.d $a0, $sp, 280 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(L3psycho_anal.npart_s_orig) - st.d $a0, $sp, 256 # 8-byte Folded Spill + st.d $a0, $sp, 272 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(L3psycho_anal.npart_l) - st.d $a0, $sp, 512 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(L3psycho_anal.npart_s) - st.d $a0, $sp, 336 # 8-byte Folded Spill + st.d $a0, $sp, 560 # 8-byte Folded Spill + pcalau12i $s8, %pc_hi20(L3psycho_anal.npart_s) pcalau12i $a0, %pc_hi20(L3psycho_anal.s3ind_s) addi.d $s2, $a0, %pc_lo12(L3psycho_anal.s3ind_s) pcalau12i $a0, %pc_hi20(L3psycho_anal.s3ind) addi.d $a0, $a0, %pc_lo12(L3psycho_anal.s3ind) - st.d $a0, $sp, 560 # 8-byte Folded Spill - st.d $s3, $sp, 208 # 8-byte Folded Spill - bnez $a2, .LBB0_69 + st.d $a0, $sp, 608 # 8-byte Folded Spill + st.d $s0, $sp, 472 # 8-byte Folded Spill + st.d $s8, $sp, 360 # 8-byte Folded Spill + bnez $a2, .LBB0_68 # %bb.1: ld.d $a0, $s3, 168 - bnez $a0, .LBB0_69 + bnez $a0, .LBB0_68 # %bb.2: - move $s5, $s1 pcalau12i $a1, %pc_hi20(L3psycho_anal.blocktype_old) - ori $a4, $zero, 3 + ori $a2, $zero, 3 ld.w $s7, $s3, 16 - lu32i.d $a4, 3 + lu32i.d $a2, 3 lu12i.w $a0, 7 ori $a3, $a0, 3327 - st.d $a4, $a1, %pc_lo12(L3psycho_anal.blocktype_old) - st.d $s8, $sp, 544 # 8-byte Folded Spill - st.d $a2, $sp, 528 # 8-byte Folded Spill + st.d $a2, $a1, %pc_lo12(L3psycho_anal.blocktype_old) + st.d $a6, $sp, 592 # 8-byte Folded Spill + st.d $a7, $sp, 576 # 8-byte Folded Spill blt $a3, $s7, .LBB0_6 # %bb.3: lu12i.w $a0, 3 ori $a0, $a0, 3712 - ld.d $s0, $sp, 424 # 8-byte Folded Reload - ld.d $s1, $sp, 304 # 8-byte Folded Reload - ld.d $s3, $sp, 288 # 8-byte Folded Reload beq $s7, $a0, .LBB0_9 # %bb.4: lu12i.w $a0, 5 @@ -187,12 +149,9 @@ L3psycho_anal: # @L3psycho_anal # %bb.5: ori $a0, $a0, 3520 beq $s7, $a0, .LBB0_9 - b .LBB0_275 + b .LBB0_274 .LBB0_6: ori $a0, $a0, 3328 - ld.d $s0, $sp, 424 # 8-byte Folded Reload - ld.d $s1, $sp, 304 # 8-byte Folded Reload - ld.d $s3, $sp, 288 # 8-byte Folded Reload beq $s7, $a0, .LBB0_9 # %bb.7: lu12i.w $a0, 10 @@ -201,7 +160,7 @@ L3psycho_anal: # @L3psycho_anal # %bb.8: lu12i.w $a0, 11 ori $a0, $a0, 2944 - bne $s7, $a0, .LBB0_275 + bne $s7, $a0, .LBB0_274 .LBB0_9: # %vector.ph pcalau12i $a0, %pc_hi20(L3psycho_anal.rx_sav) addi.d $a0, $a0, %pc_lo12(L3psycho_anal.rx_sav) @@ -235,22 +194,25 @@ L3psycho_anal: # @L3psycho_anal move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.d $fp, $sp, 208 # 8-byte Folded Reload - fld.s $fa0, $fp, 152 + fld.s $fa0, $s3, 152 + ori $a0, $zero, 1 + ld.d $a1, $sp, 312 # 8-byte Folded Reload + st.b $a0, $a1, %pc_lo12(L3psycho_anal.cw_lower_index) movgr2fr.w $fa1, $zero fcmp.clt.s $fcc0, $fa1, $fa0 - ld.d $a0, $sp, 112 # 8-byte Folded Reload - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) fcvt.d.s $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_2) - fmul.d $fa0, $fa0, $fa2 - ori $a0, $zero, 1 - ld.d $a1, $sp, 312 # 8-byte Folded Reload - st.b $a0, $a1, %pc_lo12(L3psycho_anal.cw_lower_index) + lu52i.d $a0, $zero, 1033 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + lu12i.w $a0, -380928 + lu32i.d $a0, 87001 + lu52i.d $a0, $a0, 1046 + movgr2fr.d $fa1, $a0 fsel $fa0, $fa1, $fa0, $fcc0 bstrpick.d $a0, $s7, 31, 0 movgr2fr.d $fa1, $a0 @@ -399,57 +361,57 @@ L3psycho_anal: # @L3psycho_anal vst $vr0, $s0, 1968 vst $vr0, $s0, 1984 vst $vr0, $s0, 2000 - pcalau12i $a1, %pc_hi20(.LCPI0_3) - vld $vr1, $a1, %pc_lo12(.LCPI0_3) + pcalau12i $a1, %pc_hi20(.LCPI0_0) + vld $vr1, $a1, %pc_lo12(.LCPI0_0) vst $vr0, $s0, 2016 vst $vr0, $s0, 2032 stptr.w $a0, $s0, 2048 pcalau12i $a0, %pc_hi20(L3psycho_anal.mld_s) addi.d $a0, $a0, %pc_lo12(L3psycho_anal.mld_s) - pcalau12i $a1, %pc_hi20(.LCPI0_4) - vld $vr0, $a1, %pc_lo12(.LCPI0_4) - pcalau12i $a1, %pc_hi20(.LCPI0_5) - vld $vr2, $a1, %pc_lo12(.LCPI0_5) + pcalau12i $a1, %pc_hi20(.LCPI0_1) + vld $vr0, $a1, %pc_lo12(.LCPI0_1) + pcalau12i $a1, %pc_hi20(.LCPI0_2) + vld $vr2, $a1, %pc_lo12(.LCPI0_2) vst $vr1, $a0, 0 vst $vr0, $a0, 16 vst $vr2, $a0, 32 + pcalau12i $a1, %pc_hi20(.LCPI0_3) + vld $vr0, $a1, %pc_lo12(.LCPI0_3) + pcalau12i $a1, %pc_hi20(.LCPI0_4) + vld $vr1, $a1, %pc_lo12(.LCPI0_4) + pcalau12i $a1, %pc_hi20(.LCPI0_5) + vld $vr2, $a1, %pc_lo12(.LCPI0_5) pcalau12i $a1, %pc_hi20(.LCPI0_6) - vld $vr0, $a1, %pc_lo12(.LCPI0_6) - pcalau12i $a1, %pc_hi20(.LCPI0_7) - vld $vr1, $a1, %pc_lo12(.LCPI0_7) - pcalau12i $a1, %pc_hi20(.LCPI0_8) - vld $vr2, $a1, %pc_lo12(.LCPI0_8) - pcalau12i $a1, %pc_hi20(.LCPI0_9) - vld $vr3, $a1, %pc_lo12(.LCPI0_9) + vld $vr3, $a1, %pc_lo12(.LCPI0_6) vst $vr0, $a0, 48 vst $vr1, $a0, 64 vst $vr2, $a0, 80 pcalau12i $a0, %pc_hi20(L3psycho_anal.mld_l) addi.d $a0, $a0, %pc_lo12(L3psycho_anal.mld_l) - pcalau12i $a1, %pc_hi20(.LCPI0_10) - vld $vr0, $a1, %pc_lo12(.LCPI0_10) + pcalau12i $a1, %pc_hi20(.LCPI0_7) + vld $vr0, $a1, %pc_lo12(.LCPI0_7) vst $vr3, $a0, 0 vst $vr0, $a0, 16 + pcalau12i $a1, %pc_hi20(.LCPI0_8) + vld $vr0, $a1, %pc_lo12(.LCPI0_8) + pcalau12i $a1, %pc_hi20(.LCPI0_9) + vld $vr1, $a1, %pc_lo12(.LCPI0_9) + pcalau12i $a1, %pc_hi20(.LCPI0_10) + vld $vr2, $a1, %pc_lo12(.LCPI0_10) pcalau12i $a1, %pc_hi20(.LCPI0_11) - vld $vr0, $a1, %pc_lo12(.LCPI0_11) - pcalau12i $a1, %pc_hi20(.LCPI0_12) - vld $vr1, $a1, %pc_lo12(.LCPI0_12) - pcalau12i $a1, %pc_hi20(.LCPI0_13) - vld $vr2, $a1, %pc_lo12(.LCPI0_13) - pcalau12i $a1, %pc_hi20(.LCPI0_14) - vld $vr3, $a1, %pc_lo12(.LCPI0_14) + vld $vr3, $a1, %pc_lo12(.LCPI0_11) vst $vr0, $a0, 32 vst $vr1, $a0, 48 vst $vr2, $a0, 64 vst $vr3, $a0, 80 + pcalau12i $a1, %pc_hi20(.LCPI0_12) + vld $vr0, $a1, %pc_lo12(.LCPI0_12) + pcalau12i $a1, %pc_hi20(.LCPI0_13) + vld $vr1, $a1, %pc_lo12(.LCPI0_13) + pcalau12i $a1, %pc_hi20(.LCPI0_14) + vld $vr2, $a1, %pc_lo12(.LCPI0_14) pcalau12i $a1, %pc_hi20(.LCPI0_15) - vld $vr0, $a1, %pc_lo12(.LCPI0_15) - pcalau12i $a1, %pc_hi20(.LCPI0_16) - vld $vr1, $a1, %pc_lo12(.LCPI0_16) - pcalau12i $a1, %pc_hi20(.LCPI0_17) - vld $vr2, $a1, %pc_lo12(.LCPI0_17) - pcalau12i $a1, %pc_hi20(.LCPI0_18) - vld $vr3, $a1, %pc_lo12(.LCPI0_18) + vld $vr3, $a1, %pc_lo12(.LCPI0_15) vst $vr0, $a0, 96 vst $vr1, $a0, 112 vst $vr2, $a0, 128 @@ -466,22 +428,25 @@ L3psycho_anal: # @L3psycho_anal move $a0, $s0 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.w $a0, $fp, 16 + move $s1, $s3 + ld.w $a0, $s3, 16 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 - st.d $s1, $sp, 64 - ld.d $a0, $sp, 296 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload + st.d $a0, $sp, 64 + ld.d $a0, $sp, 304 # 8-byte Folded Reload st.d $a0, $sp, 56 st.d $s4, $sp, 48 st.d $s6, $sp, 40 - st.d $s3, $sp, 32 - ld.d $a0, $sp, 280 # 8-byte Folded Reload - st.d $a0, $sp, 24 ld.d $a0, $sp, 344 # 8-byte Folded Reload + st.d $a0, $sp, 32 + ld.d $a0, $sp, 296 # 8-byte Folded Reload + st.d $a0, $sp, 24 + ld.d $a0, $sp, 336 # 8-byte Folded Reload st.d $a0, $sp, 16 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 288 # 8-byte Folded Reload st.d $a0, $sp, 8 - addi.d $a0, $sp, 576 + addi.d $a0, $sp, 624 st.d $a0, $sp, 0 pcalau12i $a0, %pc_hi20(L3psycho_anal.numlines_l) addi.d $a0, $a0, %pc_lo12(L3psycho_anal.numlines_l) @@ -530,12 +495,12 @@ L3psycho_anal: # @L3psycho_anal maskeqz $a0, $a0, $a2 or $t1, $a0, $a1 addi.w $a6, $t1, 1 - ld.d $a0, $sp, 264 # 8-byte Folded Reload + ld.d $a0, $sp, 280 # 8-byte Folded Reload st.w $a6, $a0, %pc_lo12(L3psycho_anal.npart_l_orig) addi.d $a2, $s7, -16 addi.d $a3, $zero, -2 addi.w $a4, $zero, -1 - ld.d $t3, $sp, 512 # 8-byte Folded Reload + ld.d $t5, $sp, 560 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_12: # =>This Inner Loop Header: Depth=1 ld.w $a0, $fp, 0 @@ -545,40 +510,39 @@ L3psycho_anal: # @L3psycho_anal addi.d $fp, $fp, 4 bgez $a0, .LBB0_12 # %bb.13: - ld.d $t2, $sp, 344 # 8-byte Folded Reload - ld.w $t0, $t2, 80 + ld.d $t4, $sp, 336 # 8-byte Folded Reload + ld.w $t0, $t4, 80 addi.w $a5, $a3, 1 ld.w $a7, $s4, 44 - ld.d $a0, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 272 # 8-byte Folded Reload st.w $a5, $a0, %pc_lo12(L3psycho_anal.npart_s_orig) addi.w $a1, $t0, 1 - st.w $a1, $t3, %pc_lo12(L3psycho_anal.npart_l) + st.w $a1, $t5, %pc_lo12(L3psycho_anal.npart_l) addi.w $a0, $a7, 1 - ld.d $t4, $sp, 336 # 8-byte Folded Reload - st.w $a0, $t4, %pc_lo12(L3psycho_anal.npart_s) + ld.d $t2, $sp, 360 # 8-byte Folded Reload + st.w $a0, $t2, %pc_lo12(L3psycho_anal.npart_s) + ld.d $t2, $sp, 352 # 8-byte Folded Reload + ld.d $t3, $sp, 344 # 8-byte Folded Reload blt $t1, $t0, .LBB0_16 # %bb.14: - ld.d $s3, $sp, 336 # 8-byte Folded Reload bge $a7, $a5, .LBB0_17 .LBB0_15: - move $s1, $s5 bgtz $a1, .LBB0_18 b .LBB0_28 .LBB0_16: - st.w $a6, $t3, %pc_lo12(L3psycho_anal.npart_l) - st.w $t1, $t2, 80 + st.w $a6, $t5, %pc_lo12(L3psycho_anal.npart_l) + st.w $t1, $t4, 80 lu52i.d $a1, $zero, 1023 - st.d $a1, $s3, 160 + st.d $a1, $t3, 160 move $a1, $a6 - ld.d $s3, $sp, 336 # 8-byte Folded Reload blt $a7, $a5, .LBB0_15 .LBB0_17: - st.w $a5, $s3, %pc_lo12(L3psycho_anal.npart_s) + ld.d $a0, $sp, 360 # 8-byte Folded Reload + st.w $a5, $a0, %pc_lo12(L3psycho_anal.npart_s) st.w $a3, $s4, 44 lu52i.d $a0, $zero, 1023 - st.d $a0, $s1, 88 + st.d $a0, $t2, 88 move $a0, $a5 - move $s1, $s5 blez $a1, .LBB0_28 .LBB0_18: # %.preheader969.lr.ph bstrpick.d $a7, $a6, 31, 0 @@ -627,7 +591,7 @@ L3psycho_anal: # @L3psycho_anal move $t5, $a6 .LBB0_25: # %._crit_edge # in Loop: Header=BB0_21 Depth=1 - ld.d $t8, $sp, 560 # 8-byte Folded Reload + ld.d $t8, $sp, 608 # 8-byte Folded Reload alsl.d $t6, $t3, $t8, 3 slli.d $t7, $t3, 3 stx.w $t5, $t8, $t7 @@ -708,10 +672,13 @@ L3psycho_anal: # @L3psycho_anal .LBB0_39: # %.preheader966 blez $a1, .LBB0_51 # %bb.40: # %.lr.ph1008.preheader - pcalau12i $a2, %pc_hi20(.LCPI0_19) - fld.d $fa0, $a2, %pc_lo12(.LCPI0_19) move $a2, $zero - movgr2fr.d $fa1, $zero + movgr2fr.d $fa0, $zero + lu12i.w $a3, -492073 + ori $a3, $a3, 76 + lu32i.d $a3, 4985 + lu52i.d $a3, $a3, 1021 + movgr2fr.d $fa1, $a3 ori $a3, $zero, 3 b .LBB0_42 .p2align 4, , 16 @@ -725,7 +692,7 @@ L3psycho_anal: # @L3psycho_anal # Child Loop BB0_44 Depth 2 # Child Loop BB0_47 Depth 2 # Child Loop BB0_50 Depth 2 - ld.d $a6, $sp, 560 # 8-byte Folded Reload + ld.d $a6, $sp, 608 # 8-byte Folded Reload alsl.d $a5, $a2, $a6, 3 slli.d $a4, $a2, 3 ldx.w $a4, $a6, $a4 @@ -737,7 +704,7 @@ L3psycho_anal: # @L3psycho_anal alsl.d $t0, $a4, $s8, 3 sub.w $a7, $a5, $a4 addi.d $t1, $a7, 1 - fmov.d $fa2, $fa1 + fmov.d $fa2, $fa0 .p2align 4, , 16 .LBB0_44: # Parent Loop BB0_42 Depth=1 # => This Inner Loop Header: Depth=2 @@ -748,7 +715,7 @@ L3psycho_anal: # @L3psycho_anal bnez $t1, .LBB0_44 # %bb.45: # %.lr.ph1005 # in Loop: Header=BB0_42 Depth=1 - fdiv.d $fa2, $fa0, $fa2 + fdiv.d $fa2, $fa1, $fa2 bltu $a7, $a3, .LBB0_49 # %bb.46: # %vector.ph1480 # in Loop: Header=BB0_42 Depth=1 @@ -793,18 +760,21 @@ L3psycho_anal: # @L3psycho_anal bnez $a4, .LBB0_50 b .LBB0_41 .LBB0_51: # %._crit_edge1009 - ld.d $a1, $sp, 208 # 8-byte Folded Reload - ld.w $a1, $a1, 192 + move $s3, $s1 + ld.w $a1, $s1, 192 ori $a2, $zero, 1 - bne $a1, $a2, .LBB0_67 + bne $a1, $a2, .LBB0_55 # %bb.52: # %._crit_edge1009 - ld.d $s8, $sp, 544 # 8-byte Folded Reload + ld.d $s8, $sp, 360 # 8-byte Folded Reload blez $a0, .LBB0_55 # %bb.53: # %.lr.ph1011.preheader - pcalau12i $a0, %pc_hi20(.LCPI0_20) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_20) move $fp, $zero - addi.d $s0, $sp, 576 + addi.d $s0, $sp, 624 + lu12i.w $a0, 391304 + ori $a0, $a0, 2083 + lu32i.d $a0, -165604 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB0_54: # %.lr.ph1011 # =>This Inner Loop Header: Depth=1 @@ -812,17 +782,17 @@ L3psycho_anal: # @L3psycho_anal fmul.d $fa0, $fa0, $fs0 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 - ld.w $a0, $s3, %pc_lo12(L3psycho_anal.npart_s) + ld.w $a0, $s8, %pc_lo12(L3psycho_anal.npart_s) fst.d $fa0, $s0, 0 addi.d $fp, $fp, 1 addi.d $s0, $s0, 8 blt $fp, $a0, .LBB0_54 .LBB0_55: # %.loopexit964 - blez $a0, .LBB0_68 -.LBB0_56: # %.lr.ph1025.preheader + blez $a0, .LBB0_67 +# %bb.56: # %.lr.ph1025.preheader move $a1, $zero movgr2fr.d $fa0, $zero - addi.d $a2, $sp, 576 + addi.d $a2, $sp, 624 ori $a3, $zero, 3 b .LBB0_58 .p2align 4, , 16 @@ -830,7 +800,7 @@ L3psycho_anal: # @L3psycho_anal # in Loop: Header=BB0_58 Depth=1 addi.d $a1, $a1, 1 addi.d $s7, $s7, 512 - beq $a1, $a0, .LBB0_68 + beq $a1, $a0, .LBB0_67 .LBB0_58: # %.lr.ph1025 # =>This Loop Header: Depth=1 # Child Loop BB0_60 Depth 2 @@ -903,234 +873,269 @@ L3psycho_anal: # @L3psycho_anal addi.d $a6, $a6, 8 bnez $a4, .LBB0_66 b .LBB0_57 -.LBB0_67: - ld.d $s8, $sp, 544 # 8-byte Folded Reload - bgtz $a0, .LBB0_56 -.LBB0_68: # %._crit_edge1026 +.LBB0_67: # %._crit_edge1026 pcaddu18i $ra, %call36(init_fft) jirl $ra, $ra, 0 - ld.d $s3, $sp, 208 # 8-byte Folded Reload - ld.d $a2, $sp, 528 # 8-byte Folded Reload -.LBB0_69: + ld.d $a6, $sp, 592 # 8-byte Folded Reload + ld.d $a7, $sp, 576 # 8-byte Folded Reload +.LBB0_68: ld.w $a1, $s3, 36 ld.w $a0, $s3, 204 - addi.d $a3, $a1, -1 - sltui $a4, $a3, 1 - masknez $a0, $a0, $a4 + addi.d $a2, $a1, -1 + sltui $a2, $a2, 1 + masknez $a0, $a0, $a2 ori $a3, $zero, 4 - maskeqz $a3, $a3, $a4 - or $a3, $a3, $a0 + maskeqz $a2, $a3, $a2 + or $a2, $a2, $a0 pcalau12i $a0, %pc_hi20(L3psycho_anal.thm) - addi.d $s5, $a0, %pc_lo12(L3psycho_anal.thm) - st.d $a3, $sp, 328 # 8-byte Folded Spill - blez $a3, .LBB0_202 -# %bb.70: # %.lr.ph1133 - ld.d $a0, $sp, 1256 - st.d $a0, $sp, 184 # 8-byte Folded Spill - ld.d $a0, $sp, 1248 + addi.d $s7, $a0, %pc_lo12(L3psycho_anal.thm) + st.d $a2, $sp, 328 # 8-byte Folded Spill + blez $a2, .LBB0_201 +# %bb.69: # %.lr.ph1133 + st.d $s3, $sp, 192 # 8-byte Folded Spill + ld.d $a0, $sp, 1304 st.d $a0, $sp, 176 # 8-byte Folded Spill + ld.d $a0, $sp, 1296 + st.d $a0, $sp, 168 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(L3psycho_anal.s3_s) - addi.d $t6, $a0, %pc_lo12(L3psycho_anal.s3_s) + addi.d $t7, $a0, %pc_lo12(L3psycho_anal.s3_s) pcalau12i $a0, %pc_hi20(L3psycho_anal.numlines_s) addi.d $t8, $a0, %pc_lo12(L3psycho_anal.numlines_s) pcalau12i $a0, %pc_hi20(L3psycho_anal.s3_l) addi.d $a0, $a0, %pc_lo12(L3psycho_anal.s3_l) st.d $a0, $sp, 200 # 8-byte Folded Spill ori $a0, $zero, 1952 - mul.d $a0, $a2, $a0 - add.d $a1, $s8, $a0 - st.d $a1, $sp, 168 # 8-byte Folded Spill - add.d $a0, $s1, $a0 - st.d $a0, $sp, 160 # 8-byte Folded Spill + mul.d $a0, $s5, $a0 + add.d $a1, $a7, $a0 + st.d $a1, $sp, 160 # 8-byte Folded Spill + add.d $a0, $a6, $a0 + st.d $a0, $sp, 152 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(L3psycho_anal.ax_sav) addi.d $a1, $a0, %pc_lo12(L3psycho_anal.ax_sav) ori $a0, $zero, 2056 - st.d $a1, $sp, 152 # 8-byte Folded Spill + st.d $a1, $sp, 144 # 8-byte Folded Spill add.d $a2, $a1, $a0 pcalau12i $a1, %pc_hi20(L3psycho_anal.bx_sav) addi.d $a1, $a1, %pc_lo12(L3psycho_anal.bx_sav) - st.d $a1, $sp, 144 # 8-byte Folded Spill + st.d $a1, $sp, 136 # 8-byte Folded Spill add.d $a1, $a1, $a0 - st.d $a1, $sp, 448 # 8-byte Folded Spill + st.d $a1, $sp, 496 # 8-byte Folded Spill pcalau12i $a1, %pc_hi20(L3psycho_anal.rx_sav) addi.d $a1, $a1, %pc_lo12(L3psycho_anal.rx_sav) - st.d $a1, $sp, 136 # 8-byte Folded Spill + st.d $a1, $sp, 128 # 8-byte Folded Spill add.d $a0, $a1, $a0 - st.d $a0, $sp, 432 # 8-byte Folded Spill + st.d $a0, $sp, 480 # 8-byte Folded Spill + ori $s0, $zero, 3072 pcalau12i $a0, %pc_hi20(L3psycho_anal.wsamp_L) - addi.d $s1, $a0, %pc_lo12(L3psycho_anal.wsamp_L) + addi.d $s5, $a0, %pc_lo12(L3psycho_anal.wsamp_L) pcalau12i $a0, %pc_hi20(L3psycho_anal.wsamp_S) addi.d $a3, $a0, %pc_lo12(L3psycho_anal.wsamp_S) - move $s8, $zero + st.d $zero, $sp, 424 # 8-byte Folded Spill move $a1, $zero - ori $s7, $zero, 1 + ori $s8, $zero, 1 lu12i.w $a0, 258048 vreplgr2vr.w $vr7, $a0 movgr2fr.w $fa0, $zero lu12i.w $a0, -1 ori $a0, $a0, 2044 - st.d $a0, $sp, 416 # 8-byte Folded Spill - lu12i.w $a0, 1 - ori $a0, $a0, 8 + st.d $a0, $sp, 464 # 8-byte Folded Spill + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1021 st.d $a0, $sp, 216 # 8-byte Folded Spill + lu12i.w $a0, -245878 + ori $a0, $a0, 2553 + lu32i.d $a0, 9994 + lu52i.d $a0, $a0, 1019 + st.d $a0, $sp, 440 # 8-byte Folded Spill + lu12i.w $a0, -399632 + ori $a0, $a0, 2125 + lu32i.d $a0, -461178 + lu52i.d $a0, $a0, 1018 + st.d $a0, $sp, 456 # 8-byte Folded Spill + lu12i.w $a0, -135518 + ori $a0, $a0, 759 + lu32i.d $a0, -4613 + lu52i.d $a0, $a0, 1021 + st.d $a0, $sp, 432 # 8-byte Folded Spill + lu12i.w $a0, -114568 + ori $a0, $a0, 1639 + lu32i.d $a0, -364554 + lu52i.d $a0, $a0, 1022 + st.d $a0, $sp, 376 # 8-byte Folded Spill + lu12i.w $a0, -313840 + ori $a0, $a0, 1507 + lu32i.d $a0, 197272 + lu52i.d $a0, $a0, 1023 + st.d $a0, $sp, 368 # 8-byte Folded Spill + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, 1034 + st.d $a0, $sp, 104 # 8-byte Folded Spill + lu12i.w $a4, 1 lu12i.w $a0, 258896 ori $a0, $a0, 1267 - st.d $a0, $sp, 120 # 8-byte Folded Spill - st.d $s5, $sp, 392 # 8-byte Folded Spill - st.d $s1, $sp, 368 # 8-byte Folded Spill - st.d $t6, $sp, 248 # 8-byte Folded Spill - st.d $t8, $sp, 240 # 8-byte Folded Spill - st.d $a3, $sp, 384 # 8-byte Folded Spill + st.d $a0, $sp, 112 # 8-byte Folded Spill + ori $a0, $a4, 8 + st.d $a0, $sp, 208 # 8-byte Folded Spill + st.d $s5, $sp, 248 # 8-byte Folded Spill + st.d $s7, $sp, 400 # 8-byte Folded Spill + st.d $t7, $sp, 264 # 8-byte Folded Spill + st.d $t8, $sp, 256 # 8-byte Folded Spill + st.d $a3, $sp, 392 # 8-byte Folded Spill vst $vr7, $sp, 224 # 16-byte Folded Spill - fst.s $fa0, $sp, 132 # 4-byte Folded Spill - b .LBB0_72 + fst.s $fa0, $sp, 124 # 4-byte Folded Spill + b .LBB0_71 .p2align 4, , 16 -.LBB0_71: # in Loop: Header=BB0_72 Depth=1 - ld.d $a1, $sp, 408 # 8-byte Folded Reload +.LBB0_70: # in Loop: Header=BB0_71 Depth=1 + ld.d $a1, $sp, 448 # 8-byte Folded Reload addi.d $a1, $a1, 1 - ld.d $s8, $sp, 360 # 8-byte Folded Reload - xori $s8, $s8, 1 - ld.d $a2, $sp, 400 # 8-byte Folded Reload - ld.d $a3, $sp, 216 # 8-byte Folded Reload + ld.d $a0, $sp, 424 # 8-byte Folded Reload + xori $a0, $a0, 1 + st.d $a0, $sp, 424 # 8-byte Folded Spill + ld.d $a2, $sp, 416 # 8-byte Folded Reload + ld.d $a3, $sp, 208 # 8-byte Folded Reload add.d $a2, $a2, $a3 - ld.d $a0, $sp, 448 # 8-byte Folded Reload + ld.d $a0, $sp, 496 # 8-byte Folded Reload add.d $a0, $a0, $a3 - st.d $a0, $sp, 448 # 8-byte Folded Spill - ld.d $a0, $sp, 432 # 8-byte Folded Reload + st.d $a0, $sp, 496 # 8-byte Folded Spill + ld.d $a0, $sp, 480 # 8-byte Folded Reload add.d $a0, $a0, $a3 - st.d $a0, $sp, 432 # 8-byte Folded Spill - ld.d $s5, $sp, 392 # 8-byte Folded Reload + st.d $a0, $sp, 480 # 8-byte Folded Spill + ld.d $s7, $sp, 400 # 8-byte Folded Reload ld.d $a0, $sp, 328 # 8-byte Folded Reload - ld.d $a3, $sp, 384 # 8-byte Folded Reload - beq $a1, $a0, .LBB0_196 -.LBB0_72: # =>This Loop Header: Depth=1 - # Child Loop BB0_76 Depth 2 - # Child Loop BB0_78 Depth 2 - # Child Loop BB0_80 Depth 2 - # Child Loop BB0_82 Depth 2 - # Child Loop BB0_84 Depth 2 - # Child Loop BB0_86 Depth 2 - # Child Loop BB0_88 Depth 2 - # Child Loop BB0_90 Depth 2 - # Child Loop BB0_101 Depth 2 - # Child Loop BB0_113 Depth 2 - # Child Loop BB0_129 Depth 2 - # Child Loop BB0_131 Depth 3 - # Child Loop BB0_137 Depth 2 - # Child Loop BB0_139 Depth 3 - # Child Loop BB0_144 Depth 2 - # Child Loop BB0_146 Depth 3 - # Child Loop BB0_163 Depth 2 - # Child Loop BB0_170 Depth 2 - # Child Loop BB0_172 Depth 3 - # Child Loop BB0_175 Depth 2 - # Child Loop BB0_178 Depth 3 - # Child Loop BB0_180 Depth 4 - # Child Loop BB0_186 Depth 3 - # Child Loop BB0_188 Depth 4 - # Child Loop BB0_191 Depth 3 - # Child Loop BB0_193 Depth 4 - st.d $a2, $sp, 400 # 8-byte Folded Spill + ori $s0, $zero, 3072 + ld.d $s5, $sp, 248 # 8-byte Folded Reload + ld.d $a3, $sp, 392 # 8-byte Folded Reload + beq $a1, $a0, .LBB0_195 +.LBB0_71: # =>This Loop Header: Depth=1 + # Child Loop BB0_75 Depth 2 + # Child Loop BB0_77 Depth 2 + # Child Loop BB0_79 Depth 2 + # Child Loop BB0_81 Depth 2 + # Child Loop BB0_83 Depth 2 + # Child Loop BB0_85 Depth 2 + # Child Loop BB0_87 Depth 2 + # Child Loop BB0_89 Depth 2 + # Child Loop BB0_100 Depth 2 + # Child Loop BB0_112 Depth 2 + # Child Loop BB0_128 Depth 2 + # Child Loop BB0_130 Depth 3 + # Child Loop BB0_136 Depth 2 + # Child Loop BB0_138 Depth 3 + # Child Loop BB0_143 Depth 2 + # Child Loop BB0_145 Depth 3 + # Child Loop BB0_162 Depth 2 + # Child Loop BB0_169 Depth 2 + # Child Loop BB0_171 Depth 3 + # Child Loop BB0_174 Depth 2 + # Child Loop BB0_177 Depth 3 + # Child Loop BB0_179 Depth 4 + # Child Loop BB0_185 Depth 3 + # Child Loop BB0_187 Depth 4 + # Child Loop BB0_190 Depth 3 + # Child Loop BB0_192 Depth 4 + st.d $a2, $sp, 416 # 8-byte Folded Spill andi $a0, $a1, 1 - ori $a2, $zero, 3072 - mul.d $a2, $a0, $a2 + mul.d $a2, $a0, $s0 add.d $s3, $a3, $a2 slli.d $a0, $a0, 12 - add.d $a5, $s1, $a0 + add.d $a5, $s5, $a0 pcalau12i $a0, %pc_hi20(L3psycho_anal.pe) addi.d $a2, $a0, %pc_lo12(L3psycho_anal.pe) slli.d $a4, $a1, 3 ori $a0, $zero, 488 mul.d $s1, $a1, $a0 pcalau12i $a0, %pc_hi20(L3psycho_anal.en) - addi.d $s0, $a0, %pc_lo12(L3psycho_anal.en) - st.d $a1, $sp, 408 # 8-byte Folded Spill - st.d $s0, $sp, 376 # 8-byte Folded Spill - st.d $s1, $sp, 352 # 8-byte Folded Spill - st.d $a2, $sp, 544 # 8-byte Folded Spill - st.d $a4, $sp, 528 # 8-byte Folded Spill - st.d $s3, $sp, 496 # 8-byte Folded Spill - st.d $a5, $sp, 480 # 8-byte Folded Spill - bltu $s7, $a1, .LBB0_74 -# %bb.73: # in Loop: Header=BB0_72 Depth=1 + addi.d $a0, $a0, %pc_lo12(L3psycho_anal.en) + st.d $a0, $sp, 408 # 8-byte Folded Spill + st.d $a1, $sp, 448 # 8-byte Folded Spill + st.d $s1, $sp, 384 # 8-byte Folded Spill + st.d $a2, $sp, 592 # 8-byte Folded Spill + st.d $a4, $sp, 576 # 8-byte Folded Spill + st.d $s3, $sp, 544 # 8-byte Folded Spill + st.d $a5, $sp, 528 # 8-byte Folded Spill + bltu $s8, $a1, .LBB0_73 +# %bb.72: # in Loop: Header=BB0_71 Depth=1 move $a0, $a5 - ld.d $fp, $sp, 192 # 8-byte Folded Reload - move $s0, $a2 + ld.d $fp, $sp, 184 # 8-byte Folded Reload + move $s7, $a2 move $a2, $fp - move $s5, $a4 + move $s0, $a4 pcaddu18i $ra, %call36(fft_long) jirl $ra, $ra, 0 move $a0, $s3 - ld.d $a1, $sp, 408 # 8-byte Folded Reload + ld.d $a1, $sp, 448 # 8-byte Folded Reload move $a2, $fp pcaddu18i $ra, %call36(fft_short) jirl $ra, $ra, 0 - fldx.d $fa0, $s0, $s5 - ld.d $s0, $sp, 376 # 8-byte Folded Reload - ld.d $a0, $sp, 176 # 8-byte Folded Reload - fstx.d $fa0, $a0, $s5 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + fldx.d $fa0, $s7, $s0 + ld.d $s7, $sp, 400 # 8-byte Folded Reload + ld.d $s3, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload + fstx.d $fa0, $a0, $s0 + ori $s0, $zero, 3072 + ld.d $a0, $sp, 448 # 8-byte Folded Reload ori $a1, $zero, 976 mul.d $a0, $a0, $a1 - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 152 # 8-byte Folded Reload add.d $fp, $a1, $a0 - ld.d $a0, $sp, 392 # 8-byte Folded Reload - add.d $a1, $a0, $s1 + add.d $a1, $s7, $s1 ori $a2, $zero, 488 move $a0, $fp pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 addi.d $a0, $fp, 488 - add.d $a1, $s0, $s1 + add.d $a1, $s3, $s1 ori $a2, $zero, 488 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 - ld.d $t1, $sp, 408 # 8-byte Folded Reload - ld.d $a4, $sp, 384 # 8-byte Folded Reload + ld.d $t1, $sp, 448 # 8-byte Folded Reload + ld.d $a4, $sp, 392 # 8-byte Folded Reload vldi $vr6, -1184 ori $t2, $zero, 2044 - ld.d $t0, $sp, 368 # 8-byte Folded Reload - b .LBB0_83 + b .LBB0_82 .p2align 4, , 16 -.LBB0_74: # in Loop: Header=BB0_72 Depth=1 +.LBB0_73: # in Loop: Header=BB0_71 Depth=1 fldx.d $fa0, $a2, $a4 addi.d $a0, $a1, -2 slli.d $a1, $a0, 3 - ld.d $a2, $sp, 184 # 8-byte Folded Reload + ld.d $a2, $sp, 176 # 8-byte Folded Reload fstx.d $fa0, $a2, $a1 ori $a1, $zero, 976 mul.d $a0, $a0, $a1 - ld.d $a1, $sp, 168 # 8-byte Folded Reload + ld.d $a1, $sp, 160 # 8-byte Folded Reload add.d $fp, $a1, $a0 addi.d $a0, $fp, 488 - add.d $a1, $s0, $s1 + ld.d $a1, $sp, 408 # 8-byte Folded Reload + add.d $a1, $a1, $s1 ori $a2, $zero, 488 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 - add.d $a1, $s5, $s1 + add.d $a1, $s7, $s1 ori $a2, $zero, 488 move $a0, $fp pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 - ld.d $t1, $sp, 408 # 8-byte Folded Reload - ld.d $a4, $sp, 384 # 8-byte Folded Reload + ld.d $t1, $sp, 448 # 8-byte Folded Reload + ld.d $a4, $sp, 392 # 8-byte Folded Reload vldi $vr6, -1184 ori $t2, $zero, 2044 ori $a0, $zero, 2 ori $a6, $zero, 4080 - ld.d $t0, $sp, 368 # 8-byte Folded Reload - bne $t1, $a0, .LBB0_83 -# %bb.75: # %vector.body1574.preheader - # in Loop: Header=BB0_72 Depth=1 + bne $t1, $a0, .LBB0_82 +# %bb.74: # %vector.body1574.preheader + # in Loop: Header=BB0_71 Depth=1 lu12i.w $a5, 1 move $a0, $a5 - ld.d $a2, $sp, 120 # 8-byte Folded Reload + ld.d $a2, $sp, 112 # 8-byte Folded Reload ori $a7, $zero, 3056 .p2align 4, , 16 -.LBB0_76: # %vector.body1574 - # Parent Loop BB0_72 Depth=1 +.LBB0_75: # %vector.body1574 + # Parent Loop BB0_71 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $a1, $t0, $a0 + add.d $a1, $s5, $a0 vld $vr1, $a1, -16 vldx $vr2, $a1, $a6 vfadd.s $vr3, $vr1, $vr2 @@ -1141,13 +1146,13 @@ L3psycho_anal: # @L3psycho_anal vfmul.s $vr1, $vr1, $vr0 addi.d $a0, $a0, -16 vstx $vr1, $a1, $a6 - bnez $a0, .LBB0_76 -# %bb.77: # %vector.body1561.preheader - # in Loop: Header=BB0_72 Depth=1 + bnez $a0, .LBB0_75 +# %bb.76: # %vector.body1561.preheader + # in Loop: Header=BB0_71 Depth=1 move $a1, $zero .p2align 4, , 16 -.LBB0_78: # %vector.body1561 - # Parent Loop BB0_72 Depth=1 +.LBB0_77: # %vector.body1561 + # Parent Loop BB0_71 Depth=1 # => This Inner Loop Header: Depth=2 add.d $a2, $a4, $a1 vldx $vr1, $a2, $a7 @@ -1161,13 +1166,13 @@ L3psycho_anal: # @L3psycho_anal addi.d $a1, $a1, -16 addi.w $a0, $zero, -1024 vstx $vr1, $a2, $a3 - bne $a1, $a0, .LBB0_78 -# %bb.79: # %vector.body1548.preheader - # in Loop: Header=BB0_72 Depth=1 + bne $a1, $a0, .LBB0_77 +# %bb.78: # %vector.body1548.preheader + # in Loop: Header=BB0_71 Depth=1 move $a1, $zero .p2align 4, , 16 -.LBB0_80: # %vector.body1548 - # Parent Loop BB0_72 Depth=1 +.LBB0_79: # %vector.body1548 + # Parent Loop BB0_71 Depth=1 # => This Inner Loop Header: Depth=2 add.d $a2, $a4, $a1 vld $vr1, $a2, 2032 @@ -1180,13 +1185,13 @@ L3psycho_anal: # @L3psycho_anal vfmul.s $vr1, $vr1, $vr0 addi.d $a1, $a1, -16 vstx $vr1, $a2, $a3 - bne $a1, $a0, .LBB0_80 -# %bb.81: # %vector.body1535.preheader - # in Loop: Header=BB0_72 Depth=1 + bne $a1, $a0, .LBB0_79 +# %bb.80: # %vector.body1535.preheader + # in Loop: Header=BB0_71 Depth=1 move $a1, $zero .p2align 4, , 16 -.LBB0_82: # %vector.body1535 - # Parent Loop BB0_72 Depth=1 +.LBB0_81: # %vector.body1535 + # Parent Loop BB0_71 Depth=1 # => This Inner Loop Header: Depth=2 add.d $a2, $a4, $a1 vld $vr1, $a2, 1008 @@ -1198,51 +1203,50 @@ L3psycho_anal: # @L3psycho_anal vfmul.s $vr1, $vr1, $vr0 addi.d $a1, $a1, -16 vstx $vr1, $a2, $a6 - bne $a1, $a0, .LBB0_82 -.LBB0_83: # %.loopexit960 - # in Loop: Header=BB0_72 Depth=1 - st.d $s8, $sp, 360 # 8-byte Folded Spill - andi $a0, $s8, 1 - ori $a1, $zero, 3072 - mul.d $t4, $a0, $a1 + bne $a1, $a0, .LBB0_81 +.LBB0_82: # %.loopexit960 + # in Loop: Header=BB0_71 Depth=1 + ld.d $a0, $sp, 424 # 8-byte Folded Reload + andi $a0, $a0, 1 + mul.d $t3, $a0, $s0 slli.d $a0, $a0, 12 - add.d $s8, $t0, $a0 - add.d $a4, $a4, $t4 + add.d $s7, $s5, $a0 + add.d $a4, $a4, $t3 addi.d $a0, $a4, 4 addi.d $a1, $a4, 1008 addi.d $a2, $a4, 1028 addi.d $a3, $a4, 2032 - ld.d $a5, $sp, 480 # 8-byte Folded Reload + ld.d $a5, $sp, 528 # 8-byte Folded Reload fld.s $fs0, $a5, 0 addi.d $a5, $a4, 2047 addi.d $a4, $a5, 5 addi.d $a5, $a5, 1009 fmul.s $fa0, $fs0, $fs0 pcalau12i $a6, %pc_hi20(L3psycho_anal.energy) - addi.d $t3, $a6, %pc_lo12(L3psycho_anal.energy) - fst.s $fa0, $t3, 0 - addi.d $a6, $sp, 576 + addi.d $t4, $a6, %pc_lo12(L3psycho_anal.energy) + fst.s $fa0, $t4, 0 + addi.d $a6, $sp, 624 alsl.d $a6, $t1, $a6, 2 ori $a7, $zero, 4092 ori $t0, $zero, 4 vld $vr7, $sp, 224 # 16-byte Folded Reload .p2align 4, , 16 -.LBB0_84: # Parent Loop BB0_72 Depth=1 +.LBB0_83: # Parent Loop BB0_71 Depth=1 # => This Inner Loop Header: Depth=2 - fldx.s $fa1, $s8, $a7 - fldx.s $fa2, $s8, $t0 + fldx.s $fa1, $s7, $a7 + fldx.s $fa2, $s7, $t0 fmul.s $fa1, $fa1, $fa1 fmadd.s $fa1, $fa2, $fa2, $fa1 fmul.s $fa1, $fa1, $fa6 - fstx.s $fa1, $t3, $t0 + fstx.s $fa1, $t4, $t0 fadd.s $fa0, $fa0, $fa1 addi.d $a7, $a7, -4 addi.d $t0, $t0, 4 - bne $a7, $t2, .LBB0_84 -# %bb.85: # %.preheader958 - # in Loop: Header=BB0_72 Depth=1 + bne $a7, $t2, .LBB0_83 +# %bb.84: # %.preheader958 + # in Loop: Header=BB0_71 Depth=1 ori $a7, $zero, 2048 - ld.d $t5, $sp, 496 # 8-byte Folded Reload + ld.d $t5, $sp, 544 # 8-byte Folded Reload fldx.s $fa1, $t5, $a7 fst.s $fa0, $a6, 0 fmul.s $fa0, $fa1, $fa1 @@ -1250,12 +1254,12 @@ L3psycho_anal: # @L3psycho_anal addi.d $fp, $a6, %pc_lo12(L3psycho_anal.energy_s) move $a6, $zero fst.s $fa0, $fp, 1032 - ld.d $t0, $sp, 512 # 8-byte Folded Reload + ld.d $t0, $sp, 560 # 8-byte Folded Reload ori $t2, $zero, 512 ori $ra, $zero, 3 .p2align 4, , 16 -.LBB0_86: # %vector.body1525 - # Parent Loop BB0_72 Depth=1 +.LBB0_85: # %vector.body1525 + # Parent Loop BB0_71 Depth=1 # => This Inner Loop Header: Depth=2 vld $vr0, $a5, 0 vldx $vr1, $a4, $a6 @@ -1267,16 +1271,16 @@ L3psycho_anal: # @L3psycho_anal vst $vr0, $a7, 1036 addi.d $a6, $a6, 16 addi.d $a5, $a5, -16 - bne $a6, $t2, .LBB0_86 -# %bb.87: # %middle.block1532 - # in Loop: Header=BB0_72 Depth=1 + bne $a6, $t2, .LBB0_85 +# %bb.86: # %middle.block1532 + # in Loop: Header=BB0_71 Depth=1 fld.s $fa0, $t5, 1024 move $a4, $zero fmul.s $fa0, $fa0, $fa0 fst.s $fa0, $fp, 516 .p2align 4, , 16 -.LBB0_88: # %vector.body1515 - # Parent Loop BB0_72 Depth=1 +.LBB0_87: # %vector.body1515 + # Parent Loop BB0_71 Depth=1 # => This Inner Loop Header: Depth=2 vld $vr0, $a3, 0 vldx $vr1, $a2, $a4 @@ -1288,16 +1292,16 @@ L3psycho_anal: # @L3psycho_anal vst $vr0, $a5, 520 addi.d $a4, $a4, 16 addi.d $a3, $a3, -16 - bne $a4, $t2, .LBB0_88 -# %bb.89: # %middle.block1522 - # in Loop: Header=BB0_72 Depth=1 + bne $a4, $t2, .LBB0_87 +# %bb.88: # %middle.block1522 + # in Loop: Header=BB0_71 Depth=1 fld.s $fa0, $t5, 0 move $a2, $zero fmul.s $fa0, $fa0, $fa0 fst.s $fa0, $fp, 0 .p2align 4, , 16 -.LBB0_90: # %vector.body1506 - # Parent Loop BB0_72 Depth=1 +.LBB0_89: # %vector.body1506 + # Parent Loop BB0_71 Depth=1 # => This Inner Loop Header: Depth=2 vld $vr0, $a1, 0 vldx $vr1, $a0, $a2 @@ -1309,22 +1313,22 @@ L3psycho_anal: # @L3psycho_anal vst $vr0, $a3, 4 addi.d $a2, $a2, 16 addi.d $a1, $a1, -16 - bne $a2, $t2, .LBB0_90 -# %bb.91: # %.preheader957 - # in Loop: Header=BB0_72 Depth=1 + bne $a2, $t2, .LBB0_89 +# %bb.90: # %.preheader957 + # in Loop: Header=BB0_71 Depth=1 ld.d $a3, $sp, 312 # 8-byte Folded Reload ld.bu $a0, $a3, %pc_lo12(L3psycho_anal.cw_lower_index) - ld.d $a5, $sp, 400 # 8-byte Folded Reload - beqz $a0, .LBB0_109 -# %bb.92: # %.lr.ph1036 - # in Loop: Header=BB0_72 Depth=1 + ld.d $a5, $sp, 416 # 8-byte Folded Reload + beqz $a0, .LBB0_108 +# %bb.91: # %.lr.ph1036 + # in Loop: Header=BB0_71 Depth=1 slli.d $a0, $t1, 12 alsl.d $a0, $t1, $a0, 3 - ld.d $a4, $sp, 152 # 8-byte Folded Reload + ld.d $a4, $sp, 144 # 8-byte Folded Reload add.d $a1, $a4, $a0 - ld.d $a6, $sp, 144 # 8-byte Folded Reload + ld.d $a6, $sp, 136 # 8-byte Folded Reload add.d $a2, $a6, $a0 - ld.d $a7, $sp, 136 # 8-byte Folded Reload + ld.d $a7, $sp, 128 # 8-byte Folded Reload add.d $s0, $a7, $a0 ori $t2, $zero, 2052 fldx.s $fs5, $a1, $t2 @@ -1334,31 +1338,31 @@ L3psycho_anal: # @L3psycho_anal fldx.s $fs6, $a6, $a0 fldx.s $fs3, $a7, $a0 fstx.s $fs2, $a1, $t2 - fld.s $fa1, $t3, 0 + fld.s $fa1, $t4, 0 fstx.s $fs6, $a2, $t2 fstx.s $fs3, $s0, $t2 fstx.s $fs0, $a4, $a0 fsqrt.s $fa0, $fa1 fcmp.cor.s $fcc0, $fa0, $fa0 fstx.s $fs0, $a6, $a0 - ld.d $s1, $sp, 424 # 8-byte Folded Reload - bceqz $fcc0, .LBB0_194 -# %bb.93: # %.lr.ph1036.split - # in Loop: Header=BB0_72 Depth=1 - fld.s $fa4, $sp, 132 # 4-byte Folded Reload + bceqz $fcc0, .LBB0_193 +# %bb.92: # %.lr.ph1036.split + # in Loop: Header=BB0_71 Depth=1 + fld.s $fa4, $sp, 124 # 4-byte Folded Reload fcmp.ceq.s $fcc0, $fs3, $fa4 fst.s $fa0, $s0, 0 - bcnez $fcc0, .LBB0_195 -.LBB0_94: # in Loop: Header=BB0_72 Depth=1 + bcnez $fcc0, .LBB0_194 +.LBB0_93: # in Loop: Header=BB0_71 Depth=1 fmul.s $fa3, $fs2, $fs6 fneg.s $fa1, $fs6 fmul.s $fa1, $fs6, $fa1 fmadd.s $fa1, $fs2, $fs2, $fa1 fmul.s $fa2, $fa1, $fa6 fmul.s $fa1, $fs3, $fs3 + ld.d $a1, $sp, 472 # 8-byte Folded Reload fcmp.ceq.s $fcc0, $fs1, $fa4 - bcnez $fcc0, .LBB0_96 -.LBB0_95: # in Loop: Header=BB0_72 Depth=1 + bcnez $fcc0, .LBB0_95 +.LBB0_94: # in Loop: Header=BB0_71 Depth=1 fadd.s $fa4, $fa2, $fa3 fadd.s $fa5, $fs5, $fs4 fmul.s $fa4, $fa5, $fa4 @@ -1369,15 +1373,15 @@ L3psycho_anal: # @L3psycho_anal fmadd.s $fa3, $fa3, $fa2, $fa4 fmul.s $fa1, $fs1, $fa1 fmov.s $fa2, $fa5 -.LBB0_96: # in Loop: Header=BB0_72 Depth=1 +.LBB0_95: # in Loop: Header=BB0_71 Depth=1 vldi $vr4, -1280 fmsub.s $fa4, $fs3, $fa4, $fs1 fabs.s $fa5, $fa4 fadd.s $fa0, $fa5, $fa0 movgr2fr.w $ft0, $zero fcmp.ceq.s $fcc0, $fa0, $ft0 - bcnez $fcc0, .LBB0_98 -# %bb.97: # in Loop: Header=BB0_72 Depth=1 + bcnez $fcc0, .LBB0_97 +# %bb.96: # in Loop: Header=BB0_71 Depth=1 fadd.s $fa5, $fs0, $fs0 fneg.s $fa4, $fa4 fdiv.s $fa1, $fa4, $fa1 @@ -1393,20 +1397,20 @@ L3psycho_anal: # @L3psycho_anal fcvt.d.s $fa0, $fa0 fdiv.d $fa0, $fa1, $fa0 fcvt.s.d $fa0, $fa0 -.LBB0_98: # in Loop: Header=BB0_72 Depth=1 +.LBB0_97: # in Loop: Header=BB0_71 Depth=1 ld.bu $a0, $a3, %pc_lo12(L3psycho_anal.cw_lower_index) - fst.s $fa0, $s1, 0 - beqz $a0, .LBB0_109 -# %bb.99: # %.peel.next.preheader - # in Loop: Header=BB0_72 Depth=1 + fst.s $fa0, $a1, 0 + beqz $a0, .LBB0_108 +# %bb.98: # %.peel.next.preheader + # in Loop: Header=BB0_71 Depth=1 move $s0, $zero ori $s1, $zero, 1 ori $s3, $zero, 4092 - b .LBB0_101 + b .LBB0_100 .p2align 4, , 16 -.LBB0_100: # in Loop: Header=BB0_101 Depth=2 +.LBB0_99: # in Loop: Header=BB0_100 Depth=2 ld.bu $a0, $a3, %pc_lo12(L3psycho_anal.cw_lower_index) - ld.d $a1, $sp, 424 # 8-byte Folded Reload + ld.d $a1, $sp, 472 # 8-byte Folded Reload add.d $a1, $a1, $s0 fst.s $fa0, $a1, 4 addi.d $s1, $s1, 1 @@ -1414,42 +1418,42 @@ L3psycho_anal: # @L3psycho_anal maskeqz $s5, $a1, $a0 addi.d $s3, $s3, -4 addi.d $s0, $s0, 4 - bgeu $s1, $s5, .LBB0_110 -.LBB0_101: # %.peel.next - # Parent Loop BB0_72 Depth=1 + bgeu $s1, $s5, .LBB0_109 +.LBB0_100: # %.peel.next + # Parent Loop BB0_71 Depth=1 # => This Inner Loop Header: Depth=2 add.d $a0, $a5, $s0 fldx.s $fs6, $a5, $s0 - ld.d $a2, $sp, 448 # 8-byte Folded Reload + ld.d $a2, $sp, 496 # 8-byte Folded Reload add.d $a1, $a2, $s0 fldx.s $fs7, $a2, $s0 - ld.d $a6, $sp, 416 # 8-byte Folded Reload + ld.d $a6, $sp, 464 # 8-byte Folded Reload fldx.s $fs2, $a0, $a6 fldx.s $fs1, $a1, $a6 - ld.d $a4, $sp, 432 # 8-byte Folded Reload + ld.d $a4, $sp, 480 # 8-byte Folded Reload add.d $s5, $a4, $s0 fldx.s $fs3, $a4, $s0 fstx.s $fs2, $a5, $s0 fstx.s $fs1, $a2, $s0 fldx.s $fs5, $s5, $a6 - add.d $a2, $s8, $s0 + add.d $a2, $s7, $s0 fld.s $fs0, $a2, 4 - add.d $a2, $t3, $s0 + add.d $a2, $t4, $s0 fld.s $fa1, $a2, 4 fstx.s $fs5, $a4, $s0 - fldx.s $fs4, $s8, $s3 + fldx.s $fs4, $s7, $s3 fstx.s $fs0, $a0, $a6 move $a0, $a6 fsqrt.s $fa0, $fa1 fcmp.cor.s $fcc0, $fa0, $fa0 fstx.s $fs4, $a1, $a6 - bceqz $fcc0, .LBB0_107 -# %bb.102: # %.peel.next.split - # in Loop: Header=BB0_101 Depth=2 + bceqz $fcc0, .LBB0_106 +# %bb.101: # %.peel.next.split + # in Loop: Header=BB0_100 Depth=2 fcmp.ceq.s $fcc0, $fs5, $ft0 fstx.s $fa0, $s5, $a0 - bcnez $fcc0, .LBB0_108 -.LBB0_103: # in Loop: Header=BB0_101 Depth=2 + bcnez $fcc0, .LBB0_107 +.LBB0_102: # in Loop: Header=BB0_100 Depth=2 fmul.s $fa1, $fs2, $fs1 fneg.s $fa2, $fs1 fmul.s $fa2, $fs1, $fa2 @@ -1457,8 +1461,8 @@ L3psycho_anal: # @L3psycho_anal fmul.s $fa3, $fa2, $fa6 fmul.s $fa2, $fs5, $fs5 fcmp.ceq.s $fcc0, $fs3, $ft0 - bcnez $fcc0, .LBB0_105 -.LBB0_104: # in Loop: Header=BB0_101 Depth=2 + bcnez $fcc0, .LBB0_104 +.LBB0_103: # in Loop: Header=BB0_100 Depth=2 fadd.s $fa4, $fa3, $fa1 fadd.s $fa5, $fs6, $fs7 fmul.s $fa4, $fa5, $fa4 @@ -1469,15 +1473,15 @@ L3psycho_anal: # @L3psycho_anal fmadd.s $fa1, $fa1, $fa3, $fa4 fmul.s $fa2, $fs3, $fa2 fmov.s $fa3, $fa5 -.LBB0_105: # in Loop: Header=BB0_101 Depth=2 +.LBB0_104: # in Loop: Header=BB0_100 Depth=2 vldi $vr4, -1280 fmsub.s $fa4, $fs5, $fa4, $fs3 fabs.s $fa5, $fa4 fadd.s $fa0, $fa5, $fa0 movgr2fr.w $fa5, $zero fcmp.ceq.s $fcc0, $fa0, $fa5 - bcnez $fcc0, .LBB0_100 -# %bb.106: # in Loop: Header=BB0_101 Depth=2 + bcnez $fcc0, .LBB0_99 +# %bb.105: # in Loop: Header=BB0_100 Depth=2 fadd.s $fa5, $fs0, $fs4 fneg.s $fa4, $fa4 fdiv.s $fa2, $fa4, $fa2 @@ -1493,53 +1497,53 @@ L3psycho_anal: # @L3psycho_anal fcvt.d.s $fa0, $fa0 fdiv.d $fa0, $fa1, $fa0 fcvt.s.d $fa0, $fa0 - b .LBB0_100 -.LBB0_107: # %call.sqrt1991 - # in Loop: Header=BB0_101 Depth=2 + b .LBB0_99 +.LBB0_106: # %call.sqrt1991 + # in Loop: Header=BB0_100 Depth=2 fmov.s $fa0, $fa1 - st.d $t3, $sp, 496 # 8-byte Folded Spill - st.d $t4, $sp, 480 # 8-byte Folded Spill - fst.s $ft0, $sp, 464 # 4-byte Folded Spill + st.d $t4, $sp, 544 # 8-byte Folded Spill + st.d $t3, $sp, 528 # 8-byte Folded Spill + fst.s $ft0, $sp, 512 # 4-byte Folded Spill pcaddu18i $ra, %call36(sqrtf) jirl $ra, $ra, 0 - fld.s $ft0, $sp, 464 # 4-byte Folded Reload - ld.d $t4, $sp, 480 # 8-byte Folded Reload - ld.d $t3, $sp, 496 # 8-byte Folded Reload + fld.s $ft0, $sp, 512 # 4-byte Folded Reload + ld.d $t3, $sp, 528 # 8-byte Folded Reload + ld.d $t4, $sp, 544 # 8-byte Folded Reload ori $ra, $zero, 3 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 464 # 8-byte Folded Reload vldi $vr6, -1184 - ld.d $t1, $sp, 408 # 8-byte Folded Reload - ld.d $a5, $sp, 400 # 8-byte Folded Reload - ld.d $t0, $sp, 512 # 8-byte Folded Reload + ld.d $t1, $sp, 448 # 8-byte Folded Reload + ld.d $a5, $sp, 416 # 8-byte Folded Reload + ld.d $t0, $sp, 560 # 8-byte Folded Reload ld.d $a3, $sp, 312 # 8-byte Folded Reload fcmp.ceq.s $fcc0, $fs5, $ft0 fstx.s $fa0, $s5, $a0 - bceqz $fcc0, .LBB0_103 + bceqz $fcc0, .LBB0_102 .p2align 4, , 16 -.LBB0_108: # in Loop: Header=BB0_101 Depth=2 +.LBB0_107: # in Loop: Header=BB0_100 Depth=2 vldi $vr2, -1168 fmov.s $fa3, $ft0 vldi $vr1, -1168 fcmp.ceq.s $fcc0, $fs3, $ft0 - bceqz $fcc0, .LBB0_104 - b .LBB0_105 + bceqz $fcc0, .LBB0_103 + b .LBB0_104 .p2align 4, , 16 -.LBB0_109: # in Loop: Header=BB0_72 Depth=1 +.LBB0_108: # in Loop: Header=BB0_71 Depth=1 move $s5, $zero -.LBB0_110: # %.preheader956 - # in Loop: Header=BB0_72 Depth=1 +.LBB0_109: # %.preheader956 + # in Loop: Header=BB0_71 Depth=1 ld.d $a3, $sp, 320 # 8-byte Folded Reload ld.w $a2, $a3, %pc_lo12(L3psycho_anal.cw_upper_index) - bge $s5, $a2, .LBB0_125 -# %bb.111: # %.lr.ph1039 - # in Loop: Header=BB0_72 Depth=1 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + bge $s5, $a2, .LBB0_124 +# %bb.110: # %.lr.ph1039 + # in Loop: Header=BB0_71 Depth=1 + ld.d $a0, $sp, 472 # 8-byte Folded Reload alsl.d $a0, $s5, $a0, 2 - addi.d $s8, $a0, 8 + addi.d $s7, $a0, 8 addi.d $a0, $s5, 2 andi $a0, $a0, 12 - sub.d $a1, $t4, $a0 - ld.d $a2, $sp, 384 # 8-byte Folded Reload + sub.d $a1, $t3, $a0 + ld.d $a2, $sp, 392 # 8-byte Folded Reload addi.d $a2, $a2, 2047 addi.d $a2, $a2, 1 add.d $s0, $a2, $a1 @@ -1547,29 +1551,29 @@ L3psycho_anal: # @L3psycho_anal addi.d $a1, $a1, %pc_lo12(L3psycho_anal.energy_s) add.d $a1, $a1, $a0 addi.d $s1, $a1, 516 - add.d $a0, $t4, $a0 + add.d $a0, $t3, $a0 add.d $s3, $a2, $a0 - b .LBB0_113 + b .LBB0_112 .p2align 4, , 16 -.LBB0_112: # in Loop: Header=BB0_113 Depth=2 - fst.s $fa0, $s8, -8 - fst.s $fa0, $s8, 4 - fst.s $fa0, $s8, 0 - fst.s $fa0, $s8, -4 +.LBB0_111: # in Loop: Header=BB0_112 Depth=2 + fst.s $fa0, $s7, -8 + fst.s $fa0, $s7, 4 + fst.s $fa0, $s7, 0 + fst.s $fa0, $s7, -4 addi.d $s5, $s5, 4 ld.w $a2, $a3, %pc_lo12(L3psycho_anal.cw_upper_index) - addi.d $s8, $s8, 16 + addi.d $s7, $s7, 16 addi.d $s0, $s0, -4 addi.d $s1, $s1, 4 addi.d $s3, $s3, 4 - bge $s5, $a2, .LBB0_125 -.LBB0_113: # Parent Loop BB0_72 Depth=1 + bge $s5, $a2, .LBB0_124 +.LBB0_112: # Parent Loop BB0_71 Depth=1 # => This Inner Loop Header: Depth=2 fld.s $fa5, $s1, -516 movgr2fr.w $fs4, $zero fcmp.ceq.s $fcc0, $fa5, $fs4 - bcnez $fcc0, .LBB0_116 -# %bb.114: # in Loop: Header=BB0_113 Depth=2 + bcnez $fcc0, .LBB0_115 +# %bb.113: # in Loop: Header=BB0_112 Depth=2 fld.s $fa0, $s3, -2048 fld.s $fa1, $s0, -1024 fmul.s $ft0, $fa0, $fa1 @@ -1579,43 +1583,43 @@ L3psycho_anal: # @L3psycho_anal fsqrt.s $fs0, $fa5 fcmp.cor.s $fcc0, $fs0, $fs0 fmul.s $fs3, $fa0, $fa6 - bcnez $fcc0, .LBB0_117 -# %bb.115: # %call.sqrt1992 - # in Loop: Header=BB0_113 Depth=2 + bcnez $fcc0, .LBB0_116 +# %bb.114: # %call.sqrt1992 + # in Loop: Header=BB0_112 Depth=2 fmov.s $fa0, $fa5 - st.d $t3, $sp, 496 # 8-byte Folded Spill - vst $vr5, $sp, 480 # 16-byte Folded Spill - vst $vr8, $sp, 464 # 16-byte Folded Spill + st.d $t4, $sp, 544 # 8-byte Folded Spill + vst $vr5, $sp, 528 # 16-byte Folded Spill + vst $vr8, $sp, 512 # 16-byte Folded Spill pcaddu18i $ra, %call36(sqrtf) jirl $ra, $ra, 0 - vld $vr8, $sp, 464 # 16-byte Folded Reload - vld $vr5, $sp, 480 # 16-byte Folded Reload - ld.d $t3, $sp, 496 # 8-byte Folded Reload + vld $vr8, $sp, 512 # 16-byte Folded Reload + vld $vr5, $sp, 528 # 16-byte Folded Reload + ld.d $t4, $sp, 544 # 8-byte Folded Reload ori $ra, $zero, 3 vldi $vr6, -1184 - ld.d $t1, $sp, 408 # 8-byte Folded Reload - ld.d $t0, $sp, 512 # 8-byte Folded Reload + ld.d $t1, $sp, 448 # 8-byte Folded Reload + ld.d $t0, $sp, 560 # 8-byte Folded Reload ld.d $a3, $sp, 320 # 8-byte Folded Reload fmov.s $fs0, $fa0 - b .LBB0_117 + b .LBB0_116 .p2align 4, , 16 -.LBB0_116: # in Loop: Header=BB0_113 Depth=2 +.LBB0_115: # in Loop: Header=BB0_112 Depth=2 vldi $vr8, -1168 fmov.s $fs0, $fa5 fmov.s $fs3, $fs4 vldi $vr5, -1168 -.LBB0_117: # in Loop: Header=BB0_113 Depth=2 +.LBB0_116: # in Loop: Header=BB0_112 Depth=2 fld.s $fs1, $s1, 516 fcmp.ceq.s $fcc0, $fs1, $fs4 - bcnez $fcc0, .LBB0_120 -# %bb.118: # in Loop: Header=BB0_113 Depth=2 + bcnez $fcc0, .LBB0_119 +# %bb.117: # in Loop: Header=BB0_112 Depth=2 fld.s $fs4, $s3, 0 fld.s $fs2, $s0, 1024 fsqrt.s $fa0, $fs1 fcmp.cor.s $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB0_124 -.LBB0_119: # %.split1993 - # in Loop: Header=BB0_113 Depth=2 + bceqz $fcc0, .LBB0_123 +.LBB0_118: # %.split1993 + # in Loop: Header=BB0_112 Depth=2 fadd.s $fa1, $ft0, $fs3 fadd.s $fa2, $fs4, $fs2 fmul.s $fa1, $fa1, $fa2 @@ -1627,21 +1631,21 @@ L3psycho_anal: # @L3psycho_anal fmul.s $fa5, $fa5, $fa0 fmov.s $fs1, $fa0 fmov.s $fs3, $fa2 -.LBB0_120: # in Loop: Header=BB0_113 Depth=2 +.LBB0_119: # in Loop: Header=BB0_112 Depth=2 fld.s $fa1, $s1, 0 fsqrt.s $fa0, $fa1 fcmp.cor.s $fcc0, $fa0, $fa0 - bceqz $fcc0, .LBB0_123 -.LBB0_121: # %.split1995 - # in Loop: Header=BB0_113 Depth=2 + bceqz $fcc0, .LBB0_122 +.LBB0_120: # %.split1995 + # in Loop: Header=BB0_112 Depth=2 vldi $vr1, -1280 fmsub.s $fa1, $fs0, $fa1, $fs1 fabs.s $fa2, $fa1 fadd.s $fa0, $fa2, $fa0 movgr2fr.w $fa2, $zero fcmp.ceq.s $fcc0, $fa0, $fa2 - bcnez $fcc0, .LBB0_112 -# %bb.122: # in Loop: Header=BB0_113 Depth=2 + bcnez $fcc0, .LBB0_111 +# %bb.121: # in Loop: Header=BB0_112 Depth=2 fld.s $fa2, $s3, -1024 fld.s $fa3, $s0, 0 fadd.s $fa4, $fa2, $fa3 @@ -1659,91 +1663,92 @@ L3psycho_anal: # @L3psycho_anal fcvt.d.s $fa0, $fa0 fdiv.d $fa0, $fa1, $fa0 fcvt.s.d $fa0, $fa0 - b .LBB0_112 -.LBB0_123: # %call.sqrt1996 - # in Loop: Header=BB0_113 Depth=2 + b .LBB0_111 +.LBB0_122: # %call.sqrt1996 + # in Loop: Header=BB0_112 Depth=2 fmov.s $fa0, $fa1 - st.d $t3, $sp, 496 # 8-byte Folded Spill - vst $vr5, $sp, 480 # 16-byte Folded Spill - vst $vr8, $sp, 464 # 16-byte Folded Spill + st.d $t4, $sp, 544 # 8-byte Folded Spill + vst $vr5, $sp, 528 # 16-byte Folded Spill + vst $vr8, $sp, 512 # 16-byte Folded Spill pcaddu18i $ra, %call36(sqrtf) jirl $ra, $ra, 0 - vld $vr8, $sp, 464 # 16-byte Folded Reload - vld $vr5, $sp, 480 # 16-byte Folded Reload - ld.d $t3, $sp, 496 # 8-byte Folded Reload + vld $vr8, $sp, 512 # 16-byte Folded Reload + vld $vr5, $sp, 528 # 16-byte Folded Reload + ld.d $t4, $sp, 544 # 8-byte Folded Reload ori $ra, $zero, 3 vldi $vr6, -1184 - ld.d $t1, $sp, 408 # 8-byte Folded Reload - ld.d $t0, $sp, 512 # 8-byte Folded Reload + ld.d $t1, $sp, 448 # 8-byte Folded Reload + ld.d $t0, $sp, 560 # 8-byte Folded Reload ld.d $a3, $sp, 320 # 8-byte Folded Reload - b .LBB0_121 -.LBB0_124: # %call.sqrt1994 - # in Loop: Header=BB0_113 Depth=2 + b .LBB0_120 +.LBB0_123: # %call.sqrt1994 + # in Loop: Header=BB0_112 Depth=2 fmov.s $fa0, $fs1 - st.d $t3, $sp, 496 # 8-byte Folded Spill - vst $vr5, $sp, 480 # 16-byte Folded Spill - vst $vr8, $sp, 464 # 16-byte Folded Spill + st.d $t4, $sp, 544 # 8-byte Folded Spill + vst $vr5, $sp, 528 # 16-byte Folded Spill + vst $vr8, $sp, 512 # 16-byte Folded Spill pcaddu18i $ra, %call36(sqrtf) jirl $ra, $ra, 0 - vld $vr8, $sp, 464 # 16-byte Folded Reload - vld $vr5, $sp, 480 # 16-byte Folded Reload - ld.d $t3, $sp, 496 # 8-byte Folded Reload + vld $vr8, $sp, 512 # 16-byte Folded Reload + vld $vr5, $sp, 528 # 16-byte Folded Reload + ld.d $t4, $sp, 544 # 8-byte Folded Reload ori $ra, $zero, 3 vldi $vr6, -1184 - ld.d $t1, $sp, 408 # 8-byte Folded Reload - ld.d $t0, $sp, 512 # 8-byte Folded Reload + ld.d $t1, $sp, 448 # 8-byte Folded Reload + ld.d $t0, $sp, 560 # 8-byte Folded Reload ld.d $a3, $sp, 320 # 8-byte Folded Reload - b .LBB0_119 + b .LBB0_118 .p2align 4, , 16 -.LBB0_125: # %.preheader955 - # in Loop: Header=BB0_72 Depth=1 +.LBB0_124: # %.preheader955 + # in Loop: Header=BB0_71 Depth=1 pcalau12i $a0, %pc_hi20(L3psycho_anal.numlines_l) - addi.d $t4, $a0, %pc_lo12(L3psycho_anal.numlines_l) + addi.d $t5, $a0, %pc_lo12(L3psycho_anal.numlines_l) pcalau12i $a0, %pc_hi20(L3psycho_anal.eb) - addi.d $s8, $a0, %pc_lo12(L3psycho_anal.eb) + addi.d $s7, $a0, %pc_lo12(L3psycho_anal.eb) pcalau12i $a0, %pc_hi20(L3psycho_anal.cb) - addi.d $t5, $a0, %pc_lo12(L3psycho_anal.cb) - blez $a2, .LBB0_133 -# %bb.126: # %.lr.ph1054.preheader - # in Loop: Header=BB0_72 Depth=1 + addi.d $t6, $a0, %pc_lo12(L3psycho_anal.cb) + blez $a2, .LBB0_132 +# %bb.125: # %.lr.ph1054.preheader + # in Loop: Header=BB0_71 Depth=1 move $a0, $zero move $a1, $zero - ld.d $a7, $sp, 424 # 8-byte Folded Reload + ld.d $a7, $sp, 472 # 8-byte Folded Reload ori $t2, $zero, 2 - b .LBB0_129 + ld.d $t3, $sp, 216 # 8-byte Folded Reload + b .LBB0_128 .p2align 4, , 16 -.LBB0_127: # in Loop: Header=BB0_129 Depth=2 +.LBB0_126: # in Loop: Header=BB0_128 Depth=2 move $a1, $a5 -.LBB0_128: # %._crit_edge1048 - # in Loop: Header=BB0_129 Depth=2 +.LBB0_127: # %._crit_edge1048 + # in Loop: Header=BB0_128 Depth=2 slli.d $a3, $a0, 3 - fstx.d $fa0, $s8, $a3 - fstx.d $fa1, $t5, $a3 + fstx.d $fa0, $s7, $a3 + fstx.d $fa1, $t6, $a3 addi.d $a0, $a0, 1 - bge $a1, $a2, .LBB0_134 -.LBB0_129: # %.lr.ph1054 - # Parent Loop BB0_72 Depth=1 + bge $a1, $a2, .LBB0_133 +.LBB0_128: # %.lr.ph1054 + # Parent Loop BB0_71 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_131 Depth 3 + # Child Loop BB0_130 Depth 3 slli.d $a3, $a1, 2 - fldx.s $fa1, $t3, $a3 + fldx.s $fa1, $t4, $a3 fldx.s $fa2, $a7, $a3 slli.d $a3, $a0, 2 - ldx.w $a3, $t4, $a3 + ldx.w $a3, $t5, $a3 fcvt.d.s $fa0, $fa1 fmul.s $fa1, $fa1, $fa2 fcvt.d.s $fa1, $fa1 addi.w $a5, $a1, 1 - blt $a3, $t2, .LBB0_127 -# %bb.130: # %.lr.ph1047.preheader - # in Loop: Header=BB0_129 Depth=2 - alsl.d $a4, $a5, $t3, 2 + blt $a3, $t2, .LBB0_126 +# %bb.129: # %.lr.ph1047.preheader + # in Loop: Header=BB0_128 Depth=2 + alsl.d $a4, $a5, $t4, 2 alsl.d $a5, $a5, $a7, 2 addi.d $a6, $a3, 1 .p2align 4, , 16 -.LBB0_131: # %.lr.ph1047 - # Parent Loop BB0_72 Depth=1 - # Parent Loop BB0_129 Depth=2 +.LBB0_130: # %.lr.ph1047 + # Parent Loop BB0_71 Depth=1 + # Parent Loop BB0_128 Depth=2 # => This Inner Loop Header: Depth=3 fld.s $fa2, $a4, 0 fld.s $fa3, $a5, 0 @@ -1755,130 +1760,130 @@ L3psycho_anal: # @L3psycho_anal addi.d $a4, $a4, 4 addi.w $a6, $a6, -1 addi.d $a5, $a5, 4 - bltu $t2, $a6, .LBB0_131 -# %bb.132: # %._crit_edge1048.loopexit - # in Loop: Header=BB0_129 Depth=2 + bltu $t2, $a6, .LBB0_130 +# %bb.131: # %._crit_edge1048.loopexit + # in Loop: Header=BB0_128 Depth=2 add.w $a1, $a1, $a3 - b .LBB0_128 + b .LBB0_127 .p2align 4, , 16 -.LBB0_133: # in Loop: Header=BB0_72 Depth=1 +.LBB0_132: # in Loop: Header=BB0_71 Depth=1 move $a1, $zero move $a0, $zero ori $t2, $zero, 2 -.LBB0_134: # %.preheader954 - # in Loop: Header=BB0_72 Depth=1 - ld.d $a2, $sp, 264 # 8-byte Folded Reload + ld.d $t3, $sp, 216 # 8-byte Folded Reload +.LBB0_133: # %.preheader954 + # in Loop: Header=BB0_71 Depth=1 + ld.d $a2, $sp, 280 # 8-byte Folded Reload ld.w $a2, $a2, %pc_lo12(L3psycho_anal.npart_l_orig) addi.w $a3, $a0, 0 - bge $a3, $a2, .LBB0_141 -# %bb.135: # in Loop: Header=BB0_72 Depth=1 + bge $a3, $a2, .LBB0_140 +# %bb.134: # in Loop: Header=BB0_71 Depth=1 bstrpick.d $a2, $a2, 31, 0 move $a3, $a0 - b .LBB0_137 + b .LBB0_136 .p2align 4, , 16 -.LBB0_136: # %._crit_edge1063 - # in Loop: Header=BB0_137 Depth=2 - pcalau12i $a1, %pc_hi20(.LCPI0_21) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_21) +.LBB0_135: # %._crit_edge1063 + # in Loop: Header=BB0_136 Depth=2 slli.d $a1, $a0, 3 - fstx.d $fa0, $s8, $a1 + fstx.d $fa0, $s7, $a1 + movgr2fr.d $fa1, $t3 fmul.d $fa0, $fa0, $fa1 - fstx.d $fa0, $t5, $a1 + fstx.d $fa0, $t6, $a1 addi.d $a3, $a3, 1 addi.d $a0, $a0, 1 move $a1, $a5 - beq $a3, $a2, .LBB0_141 -.LBB0_137: # %.lr.ph1068 - # Parent Loop BB0_72 Depth=1 + beq $a3, $a2, .LBB0_140 +.LBB0_136: # %.lr.ph1068 + # Parent Loop BB0_71 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_139 Depth 3 + # Child Loop BB0_138 Depth 3 slli.d $a4, $a1, 2 - fldx.s $fa0, $t3, $a4 + fldx.s $fa0, $t4, $a4 slli.d $a4, $a0, 2 - ldx.w $a4, $t4, $a4 + ldx.w $a4, $t5, $a4 fcvt.d.s $fa0, $fa0 addi.w $a5, $a1, 1 - blt $a4, $t2, .LBB0_136 -# %bb.138: # %.lr.ph1062.preheader - # in Loop: Header=BB0_137 Depth=2 - alsl.d $a5, $a5, $t3, 2 + blt $a4, $t2, .LBB0_135 +# %bb.137: # %.lr.ph1062.preheader + # in Loop: Header=BB0_136 Depth=2 + alsl.d $a5, $a5, $t4, 2 addi.d $a6, $a4, 1 .p2align 4, , 16 -.LBB0_139: # %.lr.ph1062 - # Parent Loop BB0_72 Depth=1 - # Parent Loop BB0_137 Depth=2 +.LBB0_138: # %.lr.ph1062 + # Parent Loop BB0_71 Depth=1 + # Parent Loop BB0_136 Depth=2 # => This Inner Loop Header: Depth=3 fld.s $fa1, $a5, 0 fcvt.d.s $fa1, $fa1 fadd.d $fa0, $fa0, $fa1 addi.w $a6, $a6, -1 addi.d $a5, $a5, 4 - bltu $t2, $a6, .LBB0_139 -# %bb.140: # %._crit_edge1063.loopexit - # in Loop: Header=BB0_137 Depth=2 + bltu $t2, $a6, .LBB0_138 +# %bb.139: # %._crit_edge1063.loopexit + # in Loop: Header=BB0_136 Depth=2 add.w $a5, $a1, $a4 - b .LBB0_136 + b .LBB0_135 .p2align 4, , 16 -.LBB0_141: # %._crit_edge1069 - # in Loop: Header=BB0_72 Depth=1 +.LBB0_140: # %._crit_edge1069 + # in Loop: Header=BB0_71 Depth=1 ld.w $a0, $t0, %pc_lo12(L3psycho_anal.npart_l) - ld.d $a2, $sp, 544 # 8-byte Folded Reload - ld.d $a1, $sp, 528 # 8-byte Folded Reload + ld.d $a2, $sp, 592 # 8-byte Folded Reload + ld.d $a1, $sp, 576 # 8-byte Folded Reload stx.d $zero, $a2, $a1 pcalau12i $a1, %pc_hi20(L3psycho_anal.thr) addi.d $s5, $a1, %pc_lo12(L3psycho_anal.thr) - blez $a0, .LBB0_156 -# %bb.142: # %.lr.ph1081 - # in Loop: Header=BB0_72 Depth=1 - st.d $t5, $sp, 480 # 8-byte Folded Spill - st.d $t4, $sp, 496 # 8-byte Folded Spill + blez $a0, .LBB0_155 +# %bb.141: # %.lr.ph1081 + # in Loop: Header=BB0_71 Depth=1 + st.d $t6, $sp, 528 # 8-byte Folded Spill + st.d $t5, $sp, 544 # 8-byte Folded Spill alsl.d $a0, $t1, $a2, 3 - st.d $a0, $sp, 464 # 8-byte Folded Spill + st.d $a0, $sp, 512 # 8-byte Folded Spill ori $a0, $zero, 504 mul.d $a0, $t1, $a0 pcalau12i $a1, %pc_hi20(L3psycho_anal.nb_1) addi.d $a1, $a1, %pc_lo12(L3psycho_anal.nb_1) add.d $a1, $a1, $a0 - st.d $a1, $sp, 544 # 8-byte Folded Spill + st.d $a1, $sp, 592 # 8-byte Folded Spill pcalau12i $a1, %pc_hi20(L3psycho_anal.nb_2) addi.d $a1, $a1, %pc_lo12(L3psycho_anal.nb_2) - move $s0, $zero + move $s1, $zero movgr2fr.d $fs1, $zero add.d $a0, $a1, $a0 - st.d $a0, $sp, 528 # 8-byte Folded Spill - ld.d $s3, $sp, 200 # 8-byte Folded Reload + st.d $a0, $sp, 576 # 8-byte Folded Spill + ld.d $s0, $sp, 200 # 8-byte Folded Reload fmov.d $fs0, $fs1 - b .LBB0_144 + b .LBB0_143 .p2align 4, , 16 -.LBB0_143: # in Loop: Header=BB0_144 Depth=2 +.LBB0_142: # in Loop: Header=BB0_143 Depth=2 ld.w $a0, $t0, %pc_lo12(L3psycho_anal.npart_l) - addi.d $s0, $s0, 1 - addi.d $s3, $s3, 512 - bge $s0, $a0, .LBB0_157 -.LBB0_144: # Parent Loop BB0_72 Depth=1 + addi.d $s1, $s1, 1 + addi.d $s0, $s0, 512 + bge $s1, $a0, .LBB0_156 +.LBB0_143: # Parent Loop BB0_71 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_146 Depth 3 - ld.d $a1, $sp, 560 # 8-byte Folded Reload - alsl.d $a0, $s0, $a1, 3 - slli.d $s1, $s0, 3 - ldx.w $a3, $a1, $s1 + # Child Loop BB0_145 Depth 3 + ld.d $a1, $sp, 608 # 8-byte Folded Reload + alsl.d $a0, $s1, $a1, 3 + slli.d $s3, $s1, 3 + ldx.w $a3, $a1, $s3 ld.w $a4, $a0, 4 fmov.d $fs3, $fs1 fmov.d $fa0, $fs1 - blt $a4, $a3, .LBB0_154 -# %bb.145: # %.lr.ph1075 - # in Loop: Header=BB0_144 Depth=2 - alsl.d $a0, $a3, $s3, 3 - alsl.d $a1, $a3, $s8, 3 - ld.d $a2, $sp, 480 # 8-byte Folded Reload + blt $a4, $a3, .LBB0_153 +# %bb.144: # %.lr.ph1075 + # in Loop: Header=BB0_143 Depth=2 + alsl.d $a0, $a3, $s0, 3 + alsl.d $a1, $a3, $s7, 3 + ld.d $a2, $sp, 528 # 8-byte Folded Reload alsl.d $a2, $a3, $a2, 3 sub.d $a3, $a4, $a3 movgr2fr.d $fa0, $zero addi.d $a3, $a3, 1 fmov.d $fs3, $fa0 .p2align 4, , 16 -.LBB0_146: # Parent Loop BB0_72 Depth=1 - # Parent Loop BB0_144 Depth=2 +.LBB0_145: # Parent Loop BB0_71 Depth=1 + # Parent Loop BB0_143 Depth=2 # => This Inner Loop Header: Depth=3 fld.d $fa1, $a0, 0 fld.d $fa2, $a1, 0 @@ -1889,57 +1894,57 @@ L3psycho_anal: # @L3psycho_anal addi.d $a1, $a1, 8 addi.w $a3, $a3, -1 addi.d $a2, $a2, 8 - bnez $a3, .LBB0_146 -# %bb.147: # %._crit_edge1076 - # in Loop: Header=BB0_144 Depth=2 + bnez $a3, .LBB0_145 +# %bb.146: # %._crit_edge1076 + # in Loop: Header=BB0_143 Depth=2 movgr2fr.d $fa1, $zero fcmp.ceq.d $fcc0, $fs3, $fa1 - bcnez $fcc0, .LBB0_151 -# %bb.148: # in Loop: Header=BB0_144 Depth=2 - pcalau12i $a0, %pc_hi20(.LCPI0_23) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_23) + bcnez $fcc0, .LBB0_150 +# %bb.147: # in Loop: Header=BB0_143 Depth=2 fdiv.d $fa0, $fa0, $fs3 + ld.d $a0, $sp, 456 # 8-byte Folded Reload + movgr2fr.d $fa1, $a0 fcmp.cle.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB0_152 -# %bb.149: # in Loop: Header=BB0_144 Depth=2 - pcalau12i $a0, %pc_hi20(.LCPI0_24) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_24) + bcnez $fcc0, .LBB0_151 +# %bb.148: # in Loop: Header=BB0_143 Depth=2 + ld.d $a0, $sp, 432 # 8-byte Folded Reload + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa1, $fa0 - bceqz $fcc0, .LBB0_153 -# %bb.150: # in Loop: Header=BB0_144 Depth=2 + bceqz $fcc0, .LBB0_152 +# %bb.149: # in Loop: Header=BB0_143 Depth=2 vldi $vr0, -912 - b .LBB0_154 -.LBB0_151: # in Loop: Header=BB0_144 Depth=2 + b .LBB0_153 +.LBB0_150: # in Loop: Header=BB0_143 Depth=2 fmov.d $fa0, $fs3 - b .LBB0_154 -.LBB0_152: # in Loop: Header=BB0_144 Depth=2 - pcalau12i $a0, %pc_hi20(.LCPI0_22) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_22) - b .LBB0_154 -.LBB0_153: # in Loop: Header=BB0_144 Depth=2 + b .LBB0_153 +.LBB0_151: # in Loop: Header=BB0_143 Depth=2 + ld.d $a0, $sp, 440 # 8-byte Folded Reload + movgr2fr.d $fa0, $a0 + b .LBB0_153 +.LBB0_152: # in Loop: Header=BB0_143 Depth=2 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_25) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_25) - pcalau12i $a0, %pc_hi20(.LCPI0_26) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_26) + ld.d $a0, $sp, 376 # 8-byte Folded Reload + movgr2fr.d $fa1, $a0 + ld.d $a0, $sp, 368 # 8-byte Folded Reload + movgr2fr.d $fa2, $a0 fmadd.d $fa0, $fa0, $fa2, $fa1 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 ori $ra, $zero, 3 - ld.d $t0, $sp, 512 # 8-byte Folded Reload + ld.d $t0, $sp, 560 # 8-byte Folded Reload # kill: def $f0_64 killed $f0_64 def $vr0 .p2align 4, , 16 -.LBB0_154: # %._crit_edge1076.thread - # in Loop: Header=BB0_144 Depth=2 +.LBB0_153: # %._crit_edge1076.thread + # in Loop: Header=BB0_143 Depth=2 pcalau12i $a0, %pc_hi20(L3psycho_anal.minval) addi.d $a0, $a0, %pc_lo12(L3psycho_anal.minval) - fldx.d $fa1, $a0, $s1 + fldx.d $fa1, $a0, $s3 fcmp.clt.d $fcc0, $fa1, $fa0 - ld.d $a1, $sp, 544 # 8-byte Folded Reload - fldx.d $fa2, $a1, $s1 - ld.d $a2, $sp, 528 # 8-byte Folded Reload - fldx.d $fa3, $a2, $s1 + ld.d $a1, $sp, 592 # 8-byte Folded Reload + fldx.d $fa2, $a1, $s3 + ld.d $a2, $sp, 576 # 8-byte Folded Reload + fldx.d $fa3, $a2, $s3 fsel $fa0, $fa0, $fa1, $fcc0 fmul.d $fa4, $fs3, $fa0 fadd.d $fa0, $fa2, $fa2 @@ -1951,18 +1956,18 @@ L3psycho_anal: # @L3psycho_anal fsel $fa1, $fa0, $fa4, $fcc0 pcalau12i $a0, %pc_hi20(L3psycho_anal.qthr_l) addi.d $a0, $a0, %pc_lo12(L3psycho_anal.qthr_l) - fldx.d $fa3, $a0, $s1 + fldx.d $fa3, $a0, $s3 fcmp.clt.d $fcc0, $fa1, $fa3 - fldx.d $fa0, $s8, $s1 + fldx.d $fa0, $s7, $s3 fsel $fa1, $fa1, $fa3, $fcc0 - fstx.d $fa1, $s5, $s1 - fstx.d $fa2, $a2, $s1 + fstx.d $fa1, $s5, $s3 + fstx.d $fa2, $a2, $s3 fcmp.cule.d $fcc0, $fa0, $fa1 - fstx.d $fa4, $a1, $s1 - bcnez $fcc0, .LBB0_143 -# %bb.155: # in Loop: Header=BB0_144 Depth=2 - slli.d $a0, $s0, 2 - ld.d $a1, $sp, 496 # 8-byte Folded Reload + fstx.d $fa4, $a1, $s3 + bcnez $fcc0, .LBB0_142 +# %bb.154: # in Loop: Header=BB0_143 Depth=2 + slli.d $a0, $s1, 2 + ld.d $a1, $sp, 544 # 8-byte Folded Reload ldx.w $a0, $a1, $a0 movgr2fr.w $fa2, $a0 ffint.d.w $fs2, $fa2 @@ -1970,61 +1975,60 @@ L3psycho_anal: # @L3psycho_anal pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 ori $ra, $zero, 3 - ld.d $t0, $sp, 512 # 8-byte Folded Reload + ld.d $t0, $sp, 560 # 8-byte Folded Reload fneg.d $fa1, $fs2 fmadd.d $fs0, $fa1, $fa0, $fs0 - ld.d $a0, $sp, 464 # 8-byte Folded Reload + ld.d $a0, $sp, 512 # 8-byte Folded Reload fst.d $fs0, $a0, 0 - b .LBB0_143 + b .LBB0_142 .p2align 4, , 16 -.LBB0_156: # in Loop: Header=BB0_72 Depth=1 +.LBB0_155: # in Loop: Header=BB0_71 Depth=1 movgr2fr.d $fs0, $zero -.LBB0_157: # %._crit_edge1082 - # in Loop: Header=BB0_72 Depth=1 - ld.d $t4, $sp, 304 # 8-byte Folded Reload - ld.d $t5, $sp, 296 # 8-byte Folded Reload - ld.d $t0, $sp, 288 # 8-byte Folded Reload - ld.d $t1, $sp, 280 # 8-byte Folded Reload - ld.d $t2, $sp, 344 # 8-byte Folded Reload - ld.d $t3, $sp, 272 # 8-byte Folded Reload - ld.d $t6, $sp, 336 # 8-byte Folded Reload - ld.d $t7, $sp, 376 # 8-byte Folded Reload - ld.d $t8, $sp, 240 # 8-byte Folded Reload - ld.d $s1, $sp, 368 # 8-byte Folded Reload - ld.d $a0, $sp, 408 # 8-byte Folded Reload +.LBB0_156: # %._crit_edge1082 + # in Loop: Header=BB0_71 Depth=1 + ld.d $t4, $sp, 352 # 8-byte Folded Reload + ld.d $t5, $sp, 304 # 8-byte Folded Reload + ld.d $t0, $sp, 344 # 8-byte Folded Reload + ld.d $t1, $sp, 296 # 8-byte Folded Reload + ld.d $t2, $sp, 336 # 8-byte Folded Reload + ld.d $t3, $sp, 288 # 8-byte Folded Reload + ld.d $t6, $sp, 408 # 8-byte Folded Reload + ld.d $t7, $sp, 264 # 8-byte Folded Reload + ld.d $t8, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 448 # 8-byte Folded Reload ori $a2, $zero, 260 ori $s0, $zero, 21 - bltu $s7, $a0, .LBB0_168 -# %bb.158: # in Loop: Header=BB0_72 Depth=1 - ld.d $a0, $sp, 208 # 8-byte Folded Reload + bltu $s8, $a0, .LBB0_167 +# %bb.157: # in Loop: Header=BB0_71 Depth=1 + ld.d $a0, $sp, 192 # 8-byte Folded Reload ld.w $a0, $a0, 160 - beqz $a0, .LBB0_160 -# %bb.159: # in Loop: Header=BB0_72 Depth=1 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + beqz $a0, .LBB0_159 +# %bb.158: # in Loop: Header=BB0_71 Depth=1 + ld.d $a0, $sp, 448 # 8-byte Folded Reload slli.d $a0, $a0, 2 - addi.d $a1, $sp, 1080 - stx.w $s7, $a0, $a1 - b .LBB0_168 -.LBB0_160: # in Loop: Header=BB0_72 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI0_27) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_27) + addi.d $a1, $sp, 1128 + stx.w $s8, $a0, $a1 + b .LBB0_167 +.LBB0_159: # in Loop: Header=BB0_71 Depth=1 + ld.d $a0, $sp, 104 # 8-byte Folded Reload + movgr2fr.d $fa0, $a0 fcmp.clt.d $fcc0, $fa0, $fs0 - bceqz $fcc0, .LBB0_162 -# %bb.161: # in Loop: Header=BB0_72 Depth=1 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + bceqz $fcc0, .LBB0_161 +# %bb.160: # in Loop: Header=BB0_71 Depth=1 + ld.d $a0, $sp, 448 # 8-byte Folded Reload slli.d $a0, $a0, 2 - addi.d $a1, $sp, 1080 + addi.d $a1, $sp, 1128 stx.w $zero, $a0, $a1 - b .LBB0_168 -.LBB0_162: # %.preheader953.preheader - # in Loop: Header=BB0_72 Depth=1 + b .LBB0_167 +.LBB0_161: # %.preheader953.preheader + # in Loop: Header=BB0_71 Depth=1 movgr2fr.w $fa0, $zero move $a0, $zero fmov.s $fa1, $fa0 fmov.s $fa2, $fa0 .p2align 4, , 16 -.LBB0_163: # %.preheader953 - # Parent Loop BB0_72 Depth=1 +.LBB0_162: # %.preheader953 + # Parent Loop BB0_71 Depth=1 # => This Inner Loop Header: Depth=2 pcalau12i $a1, %pc_hi20(L3psycho_anal.energy_s) addi.d $a1, $a1, %pc_lo12(L3psycho_anal.energy_s) @@ -2036,8 +2040,8 @@ L3psycho_anal: # @L3psycho_anal fadd.s $fa1, $fa1, $fa4 addi.d $a0, $a0, 4 fadd.s $fa0, $fa0, $fa5 - bne $a0, $a2, .LBB0_163 -# %bb.164: # in Loop: Header=BB0_72 Depth=1 + bne $a0, $a2, .LBB0_162 +# %bb.163: # in Loop: Header=BB0_71 Depth=1 fcmp.clt.s $fcc0, $fa2, $fa1 fsel $fa3, $fa1, $fa2, $fcc0 fcmp.clt.s $fcc0, $fa3, $fa0 @@ -2046,45 +2050,47 @@ L3psycho_anal: # @L3psycho_anal fsel $fa1, $fa1, $fa2, $fcc0 fcmp.clt.s $fcc0, $fa0, $fa1 fsel $fa0, $fa0, $fa1, $fcc0 - ld.d $a1, $sp, 408 # 8-byte Folded Reload - addi.d $a2, $sp, 1080 + ld.d $a1, $sp, 448 # 8-byte Folded Reload + addi.d $a2, $sp, 1128 alsl.d $a0, $a1, $a2, 2 slli.d $a1, $a1, 2 vldi $vr1, -1218 fmul.s $fa1, $fa3, $fa1 fcmp.cule.s $fcc0, $fa0, $fa1 - stx.w $s7, $a1, $a2 - bceqz $fcc0, .LBB0_167 -# %bb.165: # in Loop: Header=BB0_72 Depth=1 - ld.d $a1, $sp, 112 # 8-byte Folded Reload - fld.d $fa1, $a1, %pc_lo12(.LCPI0_0) + stx.w $s8, $a1, $a2 + bceqz $fcc0, .LBB0_166 +# %bb.164: # in Loop: Header=BB0_71 Depth=1 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa1, $a1 fcmp.cule.d $fcc0, $fs0, $fa1 - bcnez $fcc0, .LBB0_168 -# %bb.166: # in Loop: Header=BB0_72 Depth=1 + bcnez $fcc0, .LBB0_167 +# %bb.165: # in Loop: Header=BB0_71 Depth=1 vldi $vr1, -1244 fmul.s $fa1, $fa3, $fa1 fcmp.cule.s $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB0_168 -.LBB0_167: # in Loop: Header=BB0_72 Depth=1 + bcnez $fcc0, .LBB0_167 +.LBB0_166: # in Loop: Header=BB0_71 Depth=1 st.w $zero, $a0, 0 .p2align 4, , 16 -.LBB0_168: # in Loop: Header=BB0_72 Depth=1 +.LBB0_167: # in Loop: Header=BB0_71 Depth=1 move $a0, $zero - ld.d $a2, $sp, 352 # 8-byte Folded Reload - add.d $a3, $t7, $a2 - ld.d $a1, $sp, 392 # 8-byte Folded Reload + ld.d $a2, $sp, 384 # 8-byte Folded Reload + add.d $a3, $t6, $a2 + ld.d $a1, $sp, 400 # 8-byte Folded Reload add.d $a4, $a1, $a2 - b .LBB0_170 + b .LBB0_169 .p2align 4, , 16 -.LBB0_169: # %._crit_edge1093 - # in Loop: Header=BB0_170 Depth=2 +.LBB0_168: # %._crit_edge1093 + # in Loop: Header=BB0_169 Depth=2 fstx.d $fa0, $a3, $a1 addi.d $a0, $a0, 1 fstx.d $fa1, $a4, $a1 - beq $a0, $s0, .LBB0_173 -.LBB0_170: # Parent Loop BB0_72 Depth=1 + beq $a0, $s0, .LBB0_172 +.LBB0_169: # Parent Loop BB0_71 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_172 Depth 3 + # Child Loop BB0_171 Depth 3 slli.d $a1, $a0, 3 slli.d $a2, $a0, 2 ldx.w $a6, $t2, $a2 @@ -2092,9 +2098,9 @@ L3psycho_anal: # @L3psycho_anal ldx.w $a7, $t3, $a2 fldx.d $fa2, $t0, $a1 slli.d $a2, $a6, 3 - fldx.d $fa0, $s8, $a2 + fldx.d $fa0, $s7, $a2 slli.d $a5, $a7, 3 - fldx.d $fa3, $s8, $a5 + fldx.d $fa3, $s7, $a5 fldx.d $fa4, $s5, $a2 fmul.d $fa0, $fa2, $fa0 fldx.d $fa5, $s5, $a5 @@ -2102,19 +2108,19 @@ L3psycho_anal: # @L3psycho_anal fmul.d $fa2, $fa2, $fa4 addi.w $a2, $a7, 1 fmadd.d $fa1, $fa1, $fa5, $fa2 - bge $a2, $a6, .LBB0_169 -# %bb.171: # %.lr.ph1092.preheader - # in Loop: Header=BB0_170 Depth=2 - alsl.d $a2, $a7, $s8, 3 + bge $a2, $a6, .LBB0_168 +# %bb.170: # %.lr.ph1092.preheader + # in Loop: Header=BB0_169 Depth=2 + alsl.d $a2, $a7, $s7, 3 addi.d $a2, $a2, 8 alsl.d $a5, $a7, $s5, 3 addi.d $a5, $a5, 8 nor $a7, $a7, $zero add.d $a6, $a7, $a6 .p2align 4, , 16 -.LBB0_172: # %.lr.ph1092 - # Parent Loop BB0_72 Depth=1 - # Parent Loop BB0_170 Depth=2 +.LBB0_171: # %.lr.ph1092 + # Parent Loop BB0_71 Depth=1 + # Parent Loop BB0_169 Depth=2 # => This Inner Loop Header: Depth=3 fld.d $fa2, $a2, 0 fld.d $fa3, $a5, 0 @@ -2123,37 +2129,37 @@ L3psycho_anal: # @L3psycho_anal addi.d $a2, $a2, 8 addi.w $a6, $a6, -1 addi.d $a5, $a5, 8 - bnez $a6, .LBB0_172 - b .LBB0_169 + bnez $a6, .LBB0_171 + b .LBB0_168 .p2align 4, , 16 -.LBB0_173: # %.preheader952 - # in Loop: Header=BB0_72 Depth=1 - ld.d $a0, $sp, 256 # 8-byte Folded Reload +.LBB0_172: # %.preheader952 + # in Loop: Header=BB0_71 Depth=1 + ld.d $a0, $sp, 272 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(L3psycho_anal.npart_s_orig) - ld.w $a1, $t6, %pc_lo12(L3psycho_anal.npart_s) + ld.d $a1, $sp, 360 # 8-byte Folded Reload + ld.w $a1, $a1, %pc_lo12(L3psycho_anal.npart_s) move $a2, $zero addi.d $a3, $a3, 176 addi.d $a4, $a4, 176 - ld.d $t6, $sp, 248 # 8-byte Folded Reload ori $s0, $zero, 12 - b .LBB0_175 + b .LBB0_174 .p2align 4, , 16 -.LBB0_174: # in Loop: Header=BB0_175 Depth=2 +.LBB0_173: # in Loop: Header=BB0_174 Depth=2 addi.d $a2, $a2, 1 addi.d $fp, $fp, 516 - beq $a2, $ra, .LBB0_71 -.LBB0_175: # %.preheader950 - # Parent Loop BB0_72 Depth=1 + beq $a2, $ra, .LBB0_70 +.LBB0_174: # %.preheader950 + # Parent Loop BB0_71 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_178 Depth 3 - # Child Loop BB0_180 Depth 4 - # Child Loop BB0_186 Depth 3 - # Child Loop BB0_188 Depth 4 - # Child Loop BB0_191 Depth 3 - # Child Loop BB0_193 Depth 4 - blez $a0, .LBB0_182 -# %bb.176: # %.lr.ph1108 - # in Loop: Header=BB0_175 Depth=2 + # Child Loop BB0_177 Depth 3 + # Child Loop BB0_179 Depth 4 + # Child Loop BB0_185 Depth 3 + # Child Loop BB0_187 Depth 4 + # Child Loop BB0_190 Depth 3 + # Child Loop BB0_192 Depth 4 + blez $a0, .LBB0_181 +# %bb.175: # %.lr.ph1108 + # in Loop: Header=BB0_174 Depth=2 slli.d $a5, $a2, 9 alsl.d $a6, $a2, $a5, 2 pcalau12i $a5, %pc_hi20(L3psycho_anal.energy_s) @@ -2161,60 +2167,60 @@ L3psycho_anal: # @L3psycho_anal move $a5, $zero move $a7, $zero add.d $a6, $t0, $a6 - b .LBB0_178 + b .LBB0_177 .p2align 4, , 16 -.LBB0_177: # %._crit_edge1103 - # in Loop: Header=BB0_178 Depth=3 +.LBB0_176: # %._crit_edge1103 + # in Loop: Header=BB0_177 Depth=3 fcvt.d.s $fa0, $fa0 slli.d $a7, $a5, 3 addi.d $a5, $a5, 1 - fstx.d $fa0, $s8, $a7 + fstx.d $fa0, $s7, $a7 move $a7, $t1 - beq $a5, $a0, .LBB0_182 -.LBB0_178: # Parent Loop BB0_72 Depth=1 - # Parent Loop BB0_175 Depth=2 + beq $a5, $a0, .LBB0_181 +.LBB0_177: # Parent Loop BB0_71 Depth=1 + # Parent Loop BB0_174 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_180 Depth 4 + # Child Loop BB0_179 Depth 4 slli.d $t1, $a7, 2 slli.d $t0, $a5, 2 ldx.w $t0, $t8, $t0 fldx.s $fa0, $a6, $t1 addi.w $t1, $a7, 1 - blez $t0, .LBB0_177 -# %bb.179: # %.lr.ph1102.preheader - # in Loop: Header=BB0_178 Depth=3 + blez $t0, .LBB0_176 +# %bb.178: # %.lr.ph1102.preheader + # in Loop: Header=BB0_177 Depth=3 alsl.d $t1, $t1, $fp, 2 addi.d $t2, $t0, 1 .p2align 4, , 16 -.LBB0_180: # %.lr.ph1102 - # Parent Loop BB0_72 Depth=1 - # Parent Loop BB0_175 Depth=2 - # Parent Loop BB0_178 Depth=3 +.LBB0_179: # %.lr.ph1102 + # Parent Loop BB0_71 Depth=1 + # Parent Loop BB0_174 Depth=2 + # Parent Loop BB0_177 Depth=3 # => This Inner Loop Header: Depth=4 fld.s $fa1, $t1, 0 fadd.s $fa0, $fa0, $fa1 addi.w $t2, $t2, -1 addi.d $t1, $t1, 4 - bltu $s7, $t2, .LBB0_180 -# %bb.181: # %._crit_edge1103.loopexit - # in Loop: Header=BB0_178 Depth=3 + bltu $s8, $t2, .LBB0_179 +# %bb.180: # %._crit_edge1103.loopexit + # in Loop: Header=BB0_177 Depth=3 add.d $a7, $a7, $t0 addi.w $t1, $a7, 1 - b .LBB0_177 + b .LBB0_176 .p2align 4, , 16 -.LBB0_182: # %.preheader949 - # in Loop: Header=BB0_175 Depth=2 - blez $a1, .LBB0_189 -# %bb.183: # %.lr.ph1117.preheader - # in Loop: Header=BB0_175 Depth=2 +.LBB0_181: # %.preheader949 + # in Loop: Header=BB0_174 Depth=2 + blez $a1, .LBB0_188 +# %bb.182: # %.lr.ph1117.preheader + # in Loop: Header=BB0_174 Depth=2 move $a5, $zero - move $a6, $t6 - b .LBB0_186 + move $a6, $t7 + b .LBB0_185 .p2align 4, , 16 -.LBB0_184: # in Loop: Header=BB0_186 Depth=3 +.LBB0_183: # in Loop: Header=BB0_185 Depth=3 movgr2fr.d $fa0, $zero -.LBB0_185: # %._crit_edge1114 - # in Loop: Header=BB0_186 Depth=3 +.LBB0_184: # %._crit_edge1114 + # in Loop: Header=BB0_185 Depth=3 pcalau12i $t0, %pc_hi20(L3psycho_anal.qthr_s) addi.d $t0, $t0, %pc_lo12(L3psycho_anal.qthr_s) fldx.d $fa1, $t0, $a7 @@ -2223,28 +2229,28 @@ L3psycho_anal: # @L3psycho_anal fstx.d $fa0, $s5, $a7 addi.d $a5, $a5, 1 addi.d $a6, $a6, 512 - beq $a5, $a1, .LBB0_189 -.LBB0_186: # %.lr.ph1117 - # Parent Loop BB0_72 Depth=1 - # Parent Loop BB0_175 Depth=2 + beq $a5, $a1, .LBB0_188 +.LBB0_185: # %.lr.ph1117 + # Parent Loop BB0_71 Depth=1 + # Parent Loop BB0_174 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_188 Depth 4 + # Child Loop BB0_187 Depth 4 alsl.d $t0, $a5, $s2, 3 slli.d $a7, $a5, 3 ldx.w $t2, $s2, $a7 ld.w $t3, $t0, 4 - blt $t3, $t2, .LBB0_184 -# %bb.187: # %.lr.ph1113 - # in Loop: Header=BB0_186 Depth=3 + blt $t3, $t2, .LBB0_183 +# %bb.186: # %.lr.ph1113 + # in Loop: Header=BB0_185 Depth=3 alsl.d $t0, $t2, $a6, 3 - alsl.d $t1, $t2, $s8, 3 + alsl.d $t1, $t2, $s7, 3 sub.d $t2, $t3, $t2 addi.d $t2, $t2, 1 movgr2fr.d $fa0, $zero .p2align 4, , 16 -.LBB0_188: # Parent Loop BB0_72 Depth=1 - # Parent Loop BB0_175 Depth=2 - # Parent Loop BB0_186 Depth=3 +.LBB0_187: # Parent Loop BB0_71 Depth=1 + # Parent Loop BB0_174 Depth=2 + # Parent Loop BB0_185 Depth=3 # => This Inner Loop Header: Depth=4 fld.d $fa1, $t0, 0 fld.d $fa2, $t1, 0 @@ -2252,28 +2258,28 @@ L3psycho_anal: # @L3psycho_anal addi.d $t0, $t0, 8 addi.w $t2, $t2, -1 addi.d $t1, $t1, 8 - bnez $t2, .LBB0_188 - b .LBB0_185 + bnez $t2, .LBB0_187 + b .LBB0_184 .p2align 4, , 16 -.LBB0_189: # %.preheader948 - # in Loop: Header=BB0_175 Depth=2 +.LBB0_188: # %.preheader948 + # in Loop: Header=BB0_174 Depth=2 move $a5, $zero alsl.d $a6, $a2, $a3, 3 alsl.d $a7, $a2, $a4, 3 - b .LBB0_191 + b .LBB0_190 .p2align 4, , 16 -.LBB0_190: # %._crit_edge1124 - # in Loop: Header=BB0_191 Depth=3 +.LBB0_189: # %._crit_edge1124 + # in Loop: Header=BB0_190 Depth=3 slli.d $t0, $a5, 4 alsl.d $t0, $a5, $t0, 3 fstx.d $fa0, $a6, $t0 addi.d $a5, $a5, 1 fstx.d $fa1, $a7, $t0 - beq $a5, $s0, .LBB0_174 -.LBB0_191: # Parent Loop BB0_72 Depth=1 - # Parent Loop BB0_175 Depth=2 + beq $a5, $s0, .LBB0_173 +.LBB0_190: # Parent Loop BB0_71 Depth=1 + # Parent Loop BB0_174 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_193 Depth 4 + # Child Loop BB0_192 Depth 4 slli.d $t0, $a5, 3 slli.d $t1, $a5, 2 ldx.w $t2, $s4, $t1 @@ -2281,9 +2287,9 @@ L3psycho_anal: # @L3psycho_anal ldx.w $t3, $s6, $t1 fldx.d $fa2, $t4, $t0 slli.d $t0, $t2, 3 - fldx.d $fa0, $s8, $t0 + fldx.d $fa0, $s7, $t0 slli.d $t1, $t3, 3 - fldx.d $fa3, $s8, $t1 + fldx.d $fa3, $s7, $t1 fldx.d $fa4, $s5, $t0 fmul.d $fa0, $fa2, $fa0 fldx.d $fa5, $s5, $t1 @@ -2291,20 +2297,20 @@ L3psycho_anal: # @L3psycho_anal fmul.d $fa2, $fa2, $fa4 addi.w $t0, $t3, 1 fmadd.d $fa1, $fa1, $fa5, $fa2 - bge $t0, $t2, .LBB0_190 -# %bb.192: # %.lr.ph1123.preheader - # in Loop: Header=BB0_191 Depth=3 - alsl.d $t0, $t3, $s8, 3 + bge $t0, $t2, .LBB0_189 +# %bb.191: # %.lr.ph1123.preheader + # in Loop: Header=BB0_190 Depth=3 + alsl.d $t0, $t3, $s7, 3 addi.d $t0, $t0, 8 alsl.d $t1, $t3, $s5, 3 addi.d $t1, $t1, 8 nor $t3, $t3, $zero add.d $t2, $t3, $t2 .p2align 4, , 16 -.LBB0_193: # %.lr.ph1123 - # Parent Loop BB0_72 Depth=1 - # Parent Loop BB0_175 Depth=2 - # Parent Loop BB0_191 Depth=3 +.LBB0_192: # %.lr.ph1123 + # Parent Loop BB0_71 Depth=1 + # Parent Loop BB0_174 Depth=2 + # Parent Loop BB0_190 Depth=3 # => This Inner Loop Header: Depth=4 fld.d $fa2, $t0, 0 fld.d $fa3, $t1, 0 @@ -2313,68 +2319,72 @@ L3psycho_anal: # @L3psycho_anal addi.d $t0, $t0, 8 addi.w $t2, $t2, -1 addi.d $t1, $t1, 8 - bnez $t2, .LBB0_193 - b .LBB0_190 -.LBB0_194: # %call.sqrt - # in Loop: Header=BB0_72 Depth=1 + bnez $t2, .LBB0_192 + b .LBB0_189 +.LBB0_193: # %call.sqrt + # in Loop: Header=BB0_71 Depth=1 fmov.s $fa0, $fa1 + move $s1, $t4 move $s3, $t3 - move $s5, $t4 pcaddu18i $ra, %call36(sqrtf) jirl $ra, $ra, 0 - move $t4, $s5 move $t3, $s3 + move $t4, $s1 ori $ra, $zero, 3 vldi $vr6, -1184 - ld.d $t1, $sp, 408 # 8-byte Folded Reload - ld.d $a5, $sp, 400 # 8-byte Folded Reload - ld.d $t0, $sp, 512 # 8-byte Folded Reload + ld.d $t1, $sp, 448 # 8-byte Folded Reload + ld.d $a5, $sp, 416 # 8-byte Folded Reload + ld.d $t0, $sp, 560 # 8-byte Folded Reload ld.d $a3, $sp, 312 # 8-byte Folded Reload - fld.s $fa4, $sp, 132 # 4-byte Folded Reload + fld.s $fa4, $sp, 124 # 4-byte Folded Reload fcmp.ceq.s $fcc0, $fs3, $fa4 fst.s $fa0, $s0, 0 - bceqz $fcc0, .LBB0_94 -.LBB0_195: # in Loop: Header=BB0_72 Depth=1 + bceqz $fcc0, .LBB0_93 +.LBB0_194: # in Loop: Header=BB0_71 Depth=1 vldi $vr1, -1168 fmov.s $fa2, $fa4 vldi $vr3, -1168 + ld.d $a1, $sp, 472 # 8-byte Folded Reload fcmp.ceq.s $fcc0, $fs1, $fa4 - bceqz $fcc0, .LBB0_95 - b .LBB0_96 -.LBB0_196: # %._crit_edge1134 + bceqz $fcc0, .LBB0_94 + b .LBB0_95 +.LBB0_195: # %._crit_edge1134 ori $a1, $zero, 4 ld.d $a2, $sp, 328 # 8-byte Folded Reload addi.d $a0, $a2, -4 - ld.d $s3, $sp, 208 # 8-byte Folded Reload - bne $a2, $a1, .LBB0_215 -# %bb.197: # %.preheader947.preheader - pcalau12i $a1, %pc_hi20(.LCPI0_28) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_28) + ld.d $s3, $sp, 192 # 8-byte Folded Reload + bne $a2, $a1, .LBB0_214 +# %bb.196: # %.preheader947.preheader + lu12i.w $a1, 83886 + ori $a1, $a1, 328 + lu32i.d $a1, -440402 + lu52i.d $a1, $a1, 1023 + movgr2fr.d $fa0, $a1 ori $a1, $zero, 168 pcalau12i $a2, %pc_hi20(L3psycho_anal.mld_l) addi.d $a2, $a2, %pc_lo12(L3psycho_anal.mld_l) move $a3, $zero - b .LBB0_199 + b .LBB0_198 .p2align 4, , 16 -.LBB0_198: # in Loop: Header=BB0_199 Depth=1 +.LBB0_197: # in Loop: Header=BB0_198 Depth=1 addi.d $a3, $a3, 8 - beq $a3, $a1, .LBB0_204 -.LBB0_199: # %.preheader947 + beq $a3, $a1, .LBB0_203 +.LBB0_198: # %.preheader947 # =>This Inner Loop Header: Depth=1 - add.d $a4, $s5, $a3 - fldx.d $fa1, $s5, $a3 + add.d $a4, $s7, $a3 + fldx.d $fa1, $s7, $a3 fld.d $fa2, $a4, 488 fmul.d $fa3, $fa1, $fa0 fcmp.cult.d $fcc0, $fa3, $fa2 - bcnez $fcc0, .LBB0_198 -# %bb.200: # %.preheader947 - # in Loop: Header=BB0_199 Depth=1 + bcnez $fcc0, .LBB0_197 +# %bb.199: # %.preheader947 + # in Loop: Header=BB0_198 Depth=1 fmul.d $fa2, $fa2, $fa0 fcmp.cult.d $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB0_198 -# %bb.201: # in Loop: Header=BB0_199 Depth=1 + bcnez $fcc0, .LBB0_197 +# %bb.200: # in Loop: Header=BB0_198 Depth=1 fldx.d $fa1, $a2, $a3 - add.d $a5, $t7, $a3 + add.d $a5, $t6, $a3 fld.d $fa2, $a5, 1464 fld.d $fa3, $a4, 1464 fmul.d $fa2, $fa1, $fa2 @@ -2391,44 +2401,44 @@ L3psycho_anal: # @L3psycho_anal fsel $fa1, $fa1, $fa3, $fcc0 fst.d $fa2, $a4, 976 fst.d $fa1, $a4, 1464 - b .LBB0_198 -.LBB0_202: + b .LBB0_197 +.LBB0_201: move $fp, $zero ori $a0, $zero, 1 - beq $a1, $a0, .LBB0_216 -.LBB0_203: + beq $a1, $a0, .LBB0_215 +.LBB0_202: movgr2fr.d $fs1, $zero fmov.d $fs0, $fs1 ld.w $s2, $s3, 204 - ld.d $s0, $sp, 1264 - bgtz $s2, .LBB0_250 - b .LBB0_269 -.LBB0_204: # %.preheader945.preheader + ld.d $s0, $sp, 1312 + bgtz $s2, .LBB0_249 + b .LBB0_268 +.LBB0_203: # %.preheader945.preheader pcalau12i $a1, %pc_hi20(L3psycho_anal.mld_s) addi.d $a1, $a1, %pc_lo12(L3psycho_anal.mld_s) move $a2, $zero ori $a3, $zero, 288 - b .LBB0_206 + b .LBB0_205 .p2align 4, , 16 -.LBB0_205: # in Loop: Header=BB0_206 Depth=1 +.LBB0_204: # in Loop: Header=BB0_205 Depth=1 addi.d $a2, $a2, 24 addi.d $a1, $a1, 8 - beq $a2, $a3, .LBB0_215 -.LBB0_206: # %.preheader945 + beq $a2, $a3, .LBB0_214 +.LBB0_205: # %.preheader945 # =>This Inner Loop Header: Depth=1 - add.d $a4, $s5, $a2 + add.d $a4, $s7, $a2 fld.d $fa1, $a4, 176 fld.d $fa2, $a4, 664 fmul.d $fa3, $fa1, $fa0 fcmp.cult.d $fcc0, $fa3, $fa2 - add.d $a5, $t7, $a2 - bcnez $fcc0, .LBB0_209 -# %bb.207: # %.preheader945 - # in Loop: Header=BB0_206 Depth=1 + add.d $a5, $t6, $a2 + bcnez $fcc0, .LBB0_208 +# %bb.206: # %.preheader945 + # in Loop: Header=BB0_205 Depth=1 fmul.d $fa2, $fa2, $fa0 fcmp.cult.d $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB0_209 -# %bb.208: # in Loop: Header=BB0_206 Depth=1 + bcnez $fcc0, .LBB0_208 +# %bb.207: # in Loop: Header=BB0_205 Depth=1 addi.d $a6, $a5, 1656 fld.d $fa1, $a1, 0 fld.d $fa2, $a6, -16 @@ -2448,17 +2458,17 @@ L3psycho_anal: # @L3psycho_anal fsel $fa1, $fa1, $fa3, $fcc0 fst.d $fa2, $a7, 0 fst.d $fa1, $a7, 488 -.LBB0_209: # in Loop: Header=BB0_206 Depth=1 +.LBB0_208: # in Loop: Header=BB0_205 Depth=1 fld.d $fa1, $a4, 184 fld.d $fa2, $a4, 672 fmul.d $fa3, $fa1, $fa0 fcmp.cult.d $fcc0, $fa3, $fa2 - bcnez $fcc0, .LBB0_212 -# %bb.210: # in Loop: Header=BB0_206 Depth=1 + bcnez $fcc0, .LBB0_211 +# %bb.209: # in Loop: Header=BB0_205 Depth=1 fmul.d $fa2, $fa2, $fa0 fcmp.cult.d $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB0_212 -# %bb.211: # in Loop: Header=BB0_206 Depth=1 + bcnez $fcc0, .LBB0_211 +# %bb.210: # in Loop: Header=BB0_205 Depth=1 fld.d $fa1, $a1, 0 fld.d $fa2, $a5, 1648 fld.d $fa3, $a4, 1648 @@ -2476,17 +2486,17 @@ L3psycho_anal: # @L3psycho_anal fsel $fa1, $fa1, $fa3, $fcc0 fst.d $fa2, $a4, 1160 fst.d $fa1, $a4, 1648 -.LBB0_212: # in Loop: Header=BB0_206 Depth=1 +.LBB0_211: # in Loop: Header=BB0_205 Depth=1 fld.d $fa1, $a4, 192 fld.d $fa2, $a4, 680 fmul.d $fa3, $fa1, $fa0 fcmp.cult.d $fcc0, $fa3, $fa2 - bcnez $fcc0, .LBB0_205 -# %bb.213: # in Loop: Header=BB0_206 Depth=1 + bcnez $fcc0, .LBB0_204 +# %bb.212: # in Loop: Header=BB0_205 Depth=1 fmul.d $fa2, $fa2, $fa0 fcmp.cult.d $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB0_205 -# %bb.214: # in Loop: Header=BB0_206 Depth=1 + bcnez $fcc0, .LBB0_204 +# %bb.213: # in Loop: Header=BB0_205 Depth=1 fld.d $fa1, $a1, 0 fld.d $fa2, $a5, 1656 fld.d $fa3, $a4, 1656 @@ -2504,84 +2514,90 @@ L3psycho_anal: # @L3psycho_anal fsel $fa1, $fa1, $fa3, $fcc0 fst.d $fa2, $a4, 1168 fst.d $fa1, $a4, 1656 - b .LBB0_205 -.LBB0_215: # %.loopexitthread-pre-split + b .LBB0_204 +.LBB0_214: # %.loopexitthread-pre-split ld.w $a1, $s3, 36 sltui $fp, $a0, 1 ori $a0, $zero, 1 - bne $a1, $a0, .LBB0_203 -.LBB0_216: # %.preheader944.preheader + bne $a1, $a0, .LBB0_202 +.LBB0_215: # %.preheader944.preheader move $s0, $zero - ld.d $a0, $sp, 112 # 8-byte Folded Reload - fld.d $fs1, $a0, %pc_lo12(.LCPI0_0) movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs2, $a0 vldi $vr3, -912 ori $s1, $zero, 128 - fmov.d $fs2, $fs0 - b .LBB0_218 + fmov.d $fs1, $fs0 + b .LBB0_217 .p2align 4, , 16 -.LBB0_217: # in Loop: Header=BB0_218 Depth=1 +.LBB0_216: # in Loop: Header=BB0_217 Depth=1 fdiv.d $fa0, $fa1, $fa0 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 vldi $vr3, -912 # kill: def $f0_64 killed $f0_64 def $vr0 - fadd.d $fs2, $fs2, $fa0 + fadd.d $fs1, $fs1, $fa0 addi.d $s0, $s0, 8 fadd.d $fs0, $fs0, $fa3 - beq $s0, $s1, .LBB0_220 -.LBB0_218: # %.preheader944 + beq $s0, $s1, .LBB0_219 +.LBB0_217: # %.preheader944 # =>This Inner Loop Header: Depth=1 - add.d $a0, $s5, $s0 + add.d $a0, $s7, $s0 fld.d $fa1, $a0, 40 fld.d $fa2, $a0, 528 fcmp.clt.d $fcc0, $fa1, $fa2 fsel $fa0, $fa2, $fa1, $fcc0 fcmp.clt.d $fcc0, $fa2, $fa1 fsel $fa1, $fa2, $fa1, $fcc0 - fmul.d $fa2, $fa0, $fs1 + fmul.d $fa2, $fa0, $fs2 fcmp.cle.d $fcc0, $fa2, $fa1 - bceqz $fcc0, .LBB0_217 -# %bb.219: # in Loop: Header=BB0_218 Depth=1 + bceqz $fcc0, .LBB0_216 +# %bb.218: # in Loop: Header=BB0_217 Depth=1 vldi $vr0, -1016 - fadd.d $fs2, $fs2, $fa0 + fadd.d $fs1, $fs1, $fa0 addi.d $s0, $s0, 8 fadd.d $fs0, $fs0, $fa3 - bne $s0, $s1, .LBB0_218 -.LBB0_220: # %.preheader.preheader + bne $s0, $s1, .LBB0_217 +.LBB0_219: # %.preheader.preheader vrepli.b $vr3, 0 addi.w $s0, $zero, -24 - b .LBB0_223 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs2, $a0 + b .LBB0_222 .p2align 4, , 16 -.LBB0_221: # in Loop: Header=BB0_223 Depth=1 +.LBB0_220: # in Loop: Header=BB0_222 Depth=1 fdiv.d $fa0, $fa1, $fa0 - vst $vr4, $sp, 432 # 16-byte Folded Spill + vst $vr4, $sp, 480 # 16-byte Folded Spill pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 - vld $vr4, $sp, 432 # 16-byte Folded Reload + vld $vr4, $sp, 480 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 def $vr0 -.LBB0_222: # in Loop: Header=BB0_223 Depth=1 +.LBB0_221: # in Loop: Header=BB0_222 Depth=1 vldi $vr3, -912 - vld $vr1, $sp, 544 # 16-byte Folded Reload + vld $vr1, $sp, 592 # 16-byte Folded Reload vpackev.d $vr1, $vr1, $vr3 - vld $vr2, $sp, 560 # 16-byte Folded Reload + vld $vr2, $sp, 608 # 16-byte Folded Reload vfadd.d $vr1, $vr2, $vr1 - vld $vr2, $sp, 528 # 16-byte Folded Reload + vld $vr2, $sp, 576 # 16-byte Folded Reload vpackev.d $vr2, $vr2, $vr3 vfadd.d $vr1, $vr1, $vr2 - vld $vr2, $sp, 512 # 16-byte Folded Reload + vld $vr2, $sp, 560 # 16-byte Folded Reload vpackev.d $vr2, $vr2, $vr3 vfadd.d $vr1, $vr1, $vr2 - vld $vr2, $sp, 496 # 16-byte Folded Reload + vld $vr2, $sp, 544 # 16-byte Folded Reload vpackev.d $vr2, $vr2, $vr3 vfadd.d $vr1, $vr1, $vr2 - vld $vr2, $sp, 480 # 16-byte Folded Reload + vld $vr2, $sp, 528 # 16-byte Folded Reload vpackev.d $vr2, $vr2, $vr3 vfadd.d $vr1, $vr1, $vr2 - vld $vr2, $sp, 464 # 16-byte Folded Reload + vld $vr2, $sp, 512 # 16-byte Folded Reload vpackev.d $vr2, $vr2, $vr3 vfadd.d $vr1, $vr1, $vr2 - vld $vr2, $sp, 448 # 16-byte Folded Reload + vld $vr2, $sp, 496 # 16-byte Folded Reload vpackev.d $vr2, $vr2, $vr3 vfadd.d $vr1, $vr1, $vr2 vpackev.d $vr2, $vr4, $vr3 @@ -2589,236 +2605,239 @@ L3psycho_anal: # @L3psycho_anal vpackev.d $vr0, $vr0, $vr3 addi.d $s0, $s0, 8 vfadd.d $vr3, $vr1, $vr0 - beqz $s0, .LBB0_249 -.LBB0_223: # %.preheader + beqz $s0, .LBB0_248 +.LBB0_222: # %.preheader # =>This Inner Loop Header: Depth=1 - add.d $s1, $s5, $s0 + add.d $s1, $s7, $s0 fld.d $fa1, $s1, 272 fld.d $fa2, $s1, 760 fcmp.clt.d $fcc0, $fa1, $fa2 fsel $fa0, $fa2, $fa1, $fcc0 fcmp.clt.d $fcc0, $fa2, $fa1 fsel $fa1, $fa2, $fa1, $fcc0 - fmul.d $fa2, $fa0, $fs1 + fmul.d $fa2, $fa0, $fs2 fcmp.cle.d $fcc0, $fa2, $fa1 - vst $vr3, $sp, 560 # 16-byte Folded Spill - bcnez $fcc0, .LBB0_225 -# %bb.224: # in Loop: Header=BB0_223 Depth=1 + vst $vr3, $sp, 608 # 16-byte Folded Spill + bcnez $fcc0, .LBB0_224 +# %bb.223: # in Loop: Header=BB0_222 Depth=1 fdiv.d $fa0, $fa1, $fa0 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - b .LBB0_226 + b .LBB0_225 .p2align 4, , 16 -.LBB0_225: # in Loop: Header=BB0_223 Depth=1 +.LBB0_224: # in Loop: Header=BB0_222 Depth=1 vldi $vr0, -1016 -.LBB0_226: # in Loop: Header=BB0_223 Depth=1 - vst $vr0, $sp, 544 # 16-byte Folded Spill +.LBB0_225: # in Loop: Header=BB0_222 Depth=1 + vst $vr0, $sp, 592 # 16-byte Folded Spill fld.d $fa1, $s1, 296 fld.d $fa2, $s1, 784 fcmp.clt.d $fcc0, $fa1, $fa2 fsel $fa0, $fa2, $fa1, $fcc0 fcmp.clt.d $fcc0, $fa2, $fa1 fsel $fa1, $fa2, $fa1, $fcc0 - fmul.d $fa2, $fa0, $fs1 + fmul.d $fa2, $fa0, $fs2 fcmp.cle.d $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB0_228 -# %bb.227: # in Loop: Header=BB0_223 Depth=1 + bcnez $fcc0, .LBB0_227 +# %bb.226: # in Loop: Header=BB0_222 Depth=1 fdiv.d $fa0, $fa1, $fa0 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - b .LBB0_229 + b .LBB0_228 .p2align 4, , 16 -.LBB0_228: # in Loop: Header=BB0_223 Depth=1 +.LBB0_227: # in Loop: Header=BB0_222 Depth=1 vldi $vr0, -1016 -.LBB0_229: # in Loop: Header=BB0_223 Depth=1 - vst $vr0, $sp, 528 # 16-byte Folded Spill +.LBB0_228: # in Loop: Header=BB0_222 Depth=1 + vst $vr0, $sp, 576 # 16-byte Folded Spill fld.d $fa1, $s1, 320 fld.d $fa2, $s1, 808 fcmp.clt.d $fcc0, $fa1, $fa2 fsel $fa0, $fa2, $fa1, $fcc0 fcmp.clt.d $fcc0, $fa2, $fa1 fsel $fa1, $fa2, $fa1, $fcc0 - fmul.d $fa2, $fa0, $fs1 + fmul.d $fa2, $fa0, $fs2 fcmp.cle.d $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB0_231 -# %bb.230: # in Loop: Header=BB0_223 Depth=1 + bcnez $fcc0, .LBB0_230 +# %bb.229: # in Loop: Header=BB0_222 Depth=1 fdiv.d $fa0, $fa1, $fa0 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - b .LBB0_232 + b .LBB0_231 .p2align 4, , 16 -.LBB0_231: # in Loop: Header=BB0_223 Depth=1 +.LBB0_230: # in Loop: Header=BB0_222 Depth=1 vldi $vr0, -1016 -.LBB0_232: # in Loop: Header=BB0_223 Depth=1 - vst $vr0, $sp, 512 # 16-byte Folded Spill +.LBB0_231: # in Loop: Header=BB0_222 Depth=1 + vst $vr0, $sp, 560 # 16-byte Folded Spill fld.d $fa1, $s1, 344 fld.d $fa2, $s1, 832 fcmp.clt.d $fcc0, $fa1, $fa2 fsel $fa0, $fa2, $fa1, $fcc0 fcmp.clt.d $fcc0, $fa2, $fa1 fsel $fa1, $fa2, $fa1, $fcc0 - fmul.d $fa2, $fa0, $fs1 + fmul.d $fa2, $fa0, $fs2 fcmp.cle.d $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB0_234 -# %bb.233: # in Loop: Header=BB0_223 Depth=1 + bcnez $fcc0, .LBB0_233 +# %bb.232: # in Loop: Header=BB0_222 Depth=1 fdiv.d $fa0, $fa1, $fa0 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - b .LBB0_235 + b .LBB0_234 .p2align 4, , 16 -.LBB0_234: # in Loop: Header=BB0_223 Depth=1 +.LBB0_233: # in Loop: Header=BB0_222 Depth=1 vldi $vr0, -1016 -.LBB0_235: # in Loop: Header=BB0_223 Depth=1 - vst $vr0, $sp, 496 # 16-byte Folded Spill +.LBB0_234: # in Loop: Header=BB0_222 Depth=1 + vst $vr0, $sp, 544 # 16-byte Folded Spill fld.d $fa1, $s1, 368 fld.d $fa2, $s1, 856 fcmp.clt.d $fcc0, $fa1, $fa2 fsel $fa0, $fa2, $fa1, $fcc0 fcmp.clt.d $fcc0, $fa2, $fa1 fsel $fa1, $fa2, $fa1, $fcc0 - fmul.d $fa2, $fa0, $fs1 + fmul.d $fa2, $fa0, $fs2 fcmp.cle.d $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB0_237 -# %bb.236: # in Loop: Header=BB0_223 Depth=1 + bcnez $fcc0, .LBB0_236 +# %bb.235: # in Loop: Header=BB0_222 Depth=1 fdiv.d $fa0, $fa1, $fa0 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - b .LBB0_238 + b .LBB0_237 .p2align 4, , 16 -.LBB0_237: # in Loop: Header=BB0_223 Depth=1 +.LBB0_236: # in Loop: Header=BB0_222 Depth=1 vldi $vr0, -1016 -.LBB0_238: # in Loop: Header=BB0_223 Depth=1 - vst $vr0, $sp, 480 # 16-byte Folded Spill +.LBB0_237: # in Loop: Header=BB0_222 Depth=1 + vst $vr0, $sp, 528 # 16-byte Folded Spill fld.d $fa1, $s1, 392 fld.d $fa2, $s1, 880 fcmp.clt.d $fcc0, $fa1, $fa2 fsel $fa0, $fa2, $fa1, $fcc0 fcmp.clt.d $fcc0, $fa2, $fa1 fsel $fa1, $fa2, $fa1, $fcc0 - fmul.d $fa2, $fa0, $fs1 + fmul.d $fa2, $fa0, $fs2 fcmp.cle.d $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB0_240 -# %bb.239: # in Loop: Header=BB0_223 Depth=1 + bcnez $fcc0, .LBB0_239 +# %bb.238: # in Loop: Header=BB0_222 Depth=1 fdiv.d $fa0, $fa1, $fa0 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - b .LBB0_241 + b .LBB0_240 .p2align 4, , 16 -.LBB0_240: # in Loop: Header=BB0_223 Depth=1 +.LBB0_239: # in Loop: Header=BB0_222 Depth=1 vldi $vr0, -1016 -.LBB0_241: # in Loop: Header=BB0_223 Depth=1 - vst $vr0, $sp, 464 # 16-byte Folded Spill +.LBB0_240: # in Loop: Header=BB0_222 Depth=1 + vst $vr0, $sp, 512 # 16-byte Folded Spill fld.d $fa1, $s1, 416 fld.d $fa2, $s1, 904 fcmp.clt.d $fcc0, $fa1, $fa2 fsel $fa0, $fa2, $fa1, $fcc0 fcmp.clt.d $fcc0, $fa2, $fa1 fsel $fa1, $fa2, $fa1, $fcc0 - fmul.d $fa2, $fa0, $fs1 + fmul.d $fa2, $fa0, $fs2 fcmp.cle.d $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB0_243 -# %bb.242: # in Loop: Header=BB0_223 Depth=1 + bcnez $fcc0, .LBB0_242 +# %bb.241: # in Loop: Header=BB0_222 Depth=1 fdiv.d $fa0, $fa1, $fa0 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - b .LBB0_244 + b .LBB0_243 .p2align 4, , 16 -.LBB0_243: # in Loop: Header=BB0_223 Depth=1 +.LBB0_242: # in Loop: Header=BB0_222 Depth=1 vldi $vr0, -1016 -.LBB0_244: # in Loop: Header=BB0_223 Depth=1 - vst $vr0, $sp, 448 # 16-byte Folded Spill +.LBB0_243: # in Loop: Header=BB0_222 Depth=1 + vst $vr0, $sp, 496 # 16-byte Folded Spill fld.d $fa1, $s1, 440 fld.d $fa2, $s1, 928 fcmp.clt.d $fcc0, $fa1, $fa2 fsel $fa0, $fa2, $fa1, $fcc0 fcmp.clt.d $fcc0, $fa2, $fa1 fsel $fa1, $fa2, $fa1, $fcc0 - fmul.d $fa2, $fa0, $fs1 + fmul.d $fa2, $fa0, $fs2 fcmp.cle.d $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB0_246 -# %bb.245: # in Loop: Header=BB0_223 Depth=1 + bcnez $fcc0, .LBB0_245 +# %bb.244: # in Loop: Header=BB0_222 Depth=1 fdiv.d $fa0, $fa1, $fa0 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 fmov.d $fa4, $fa0 - b .LBB0_247 + b .LBB0_246 .p2align 4, , 16 -.LBB0_246: # in Loop: Header=BB0_223 Depth=1 +.LBB0_245: # in Loop: Header=BB0_222 Depth=1 vldi $vr4, -1016 -.LBB0_247: # in Loop: Header=BB0_223 Depth=1 +.LBB0_246: # in Loop: Header=BB0_222 Depth=1 fld.d $fa1, $s1, 464 fld.d $fa2, $s1, 952 fcmp.clt.d $fcc0, $fa1, $fa2 fsel $fa0, $fa2, $fa1, $fcc0 fcmp.clt.d $fcc0, $fa2, $fa1 fsel $fa1, $fa2, $fa1, $fcc0 - fmul.d $fa2, $fa0, $fs1 + fmul.d $fa2, $fa0, $fs2 fcmp.cle.d $fcc0, $fa2, $fa1 - bceqz $fcc0, .LBB0_221 -# %bb.248: # in Loop: Header=BB0_223 Depth=1 + bceqz $fcc0, .LBB0_220 +# %bb.247: # in Loop: Header=BB0_222 Depth=1 vldi $vr0, -1016 - b .LBB0_222 -.LBB0_249: - pcalau12i $a0, %pc_hi20(.LCPI0_29) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_29) - fdiv.d $fa1, $fs2, $fs0 - fmul.d $fa1, $fa1, $fa0 + b .LBB0_221 +.LBB0_248: + fdiv.d $fa0, $fs1, $fs0 + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, 419430 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 vldi $vr2, -928 - fmin.d $fs0, $fa1, $fa2 - vreplvei.d $vr1, $vr3, 0 + fmin.d $fs0, $fa0, $fa2 + vreplvei.d $vr0, $vr3, 0 vreplvei.d $vr3, $vr3, 1 - fdiv.d $fa1, $fa3, $fa1 - fmul.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa3, $fa0 + fmul.d $fa0, $fa0, $fa1 fmin.d $fs1, $fa0, $fa2 ld.w $s2, $s3, 204 - ld.d $s0, $sp, 1264 - blez $s2, .LBB0_269 -.LBB0_250: # %._crit_edge1154 + ld.d $s0, $sp, 1312 + blez $s2, .LBB0_268 +.LBB0_249: # %._crit_edge1154 slli.d $a2, $s2, 2 - addi.d $a0, $sp, 1088 - addi.d $s1, $sp, 1088 + addi.d $a0, $sp, 1136 + addi.d $s1, $sp, 1136 move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $zero, 2 - bne $s2, $a0, .LBB0_256 -# %bb.251: + bne $s2, $a0, .LBB0_255 +# %bb.250: ld.w $a0, $s3, 156 - beqz $a0, .LBB0_253 -# %bb.252: + beqz $a0, .LBB0_252 +# %bb.251: ld.w $a0, $s3, 36 ori $a1, $zero, 1 - bne $a0, $a1, .LBB0_256 -.LBB0_253: - ld.w $a0, $sp, 1080 - beqz $a0, .LBB0_255 -# %bb.254: - ld.w $a0, $sp, 1084 - bnez $a0, .LBB0_256 -.LBB0_255: - st.d $zero, $sp, 1080 -.LBB0_256: # %.lr.ph1158.preheader + bne $a0, $a1, .LBB0_255 +.LBB0_252: + ld.w $a0, $sp, 1128 + beqz $a0, .LBB0_254 +# %bb.253: + ld.w $a0, $sp, 1132 + bnez $a0, .LBB0_255 +.LBB0_254: + st.d $zero, $sp, 1128 +.LBB0_255: # %.lr.ph1158.preheader pcalau12i $a0, %pc_hi20(L3psycho_anal.blocktype_old) addi.d $a0, $a0, %pc_lo12(L3psycho_anal.blocktype_old) - addi.d $a1, $sp, 1080 + addi.d $a1, $sp, 1128 ori $a2, $zero, 3 pcalau12i $a3, %pc_hi20(.LJTI0_0) addi.d $a3, $a3, %pc_lo12(.LJTI0_0) move $a4, $zero move $a5, $s0 - b .LBB0_259 + b .LBB0_258 .p2align 4, , 16 -.LBB0_257: # in Loop: Header=BB0_259 Depth=1 +.LBB0_256: # in Loop: Header=BB0_258 Depth=1 move $a7, $zero st.w $zero, $s1, 0 -.LBB0_258: # in Loop: Header=BB0_259 Depth=1 +.LBB0_257: # in Loop: Header=BB0_258 Depth=1 st.w $a6, $a5, 0 st.w $a7, $a0, 0 addi.d $a4, $a4, 1 @@ -2827,45 +2846,45 @@ L3psycho_anal: # @L3psycho_anal addi.d $a5, $a5, 4 addi.d $a1, $a1, 4 addi.d $a0, $a0, 4 - bge $a4, $a6, .LBB0_269 -.LBB0_259: # %.lr.ph1158 + bge $a4, $a6, .LBB0_268 +.LBB0_258: # %.lr.ph1158 # =>This Inner Loop Header: Depth=1 ld.w $a6, $a1, 0 - beqz $a6, .LBB0_263 -# %bb.260: # in Loop: Header=BB0_259 Depth=1 + beqz $a6, .LBB0_262 +# %bb.259: # in Loop: Header=BB0_258 Depth=1 ld.w $a6, $a0, 0 - bltu $a2, $a6, .LBB0_266 -# %bb.261: # in Loop: Header=BB0_259 Depth=1 + bltu $a2, $a6, .LBB0_265 +# %bb.260: # in Loop: Header=BB0_258 Depth=1 slli.d $a7, $a6, 2 ldx.w $a7, $a3, $a7 add.d $a7, $a3, $a7 jr $a7 -.LBB0_262: # in Loop: Header=BB0_259 Depth=1 +.LBB0_261: # in Loop: Header=BB0_258 Depth=1 ori $a7, $zero, 3 st.w $a7, $s1, 0 - b .LBB0_268 + b .LBB0_267 .p2align 4, , 16 -.LBB0_263: # in Loop: Header=BB0_259 Depth=1 +.LBB0_262: # in Loop: Header=BB0_258 Depth=1 ld.w $a6, $a0, 0 ori $a7, $zero, 2 st.w $a7, $s1, 0 - beq $a6, $a2, .LBB0_267 -# %bb.264: # in Loop: Header=BB0_259 Depth=1 - bnez $a6, .LBB0_258 -# %bb.265: # %.thread - # in Loop: Header=BB0_259 Depth=1 + beq $a6, $a2, .LBB0_266 +# %bb.263: # in Loop: Header=BB0_258 Depth=1 + bnez $a6, .LBB0_257 +# %bb.264: # %.thread + # in Loop: Header=BB0_258 Depth=1 ori $a6, $zero, 1 - b .LBB0_258 -.LBB0_266: # %._crit_edge1340 - # in Loop: Header=BB0_259 Depth=1 + b .LBB0_257 +.LBB0_265: # %._crit_edge1340 + # in Loop: Header=BB0_258 Depth=1 ld.w $a7, $s1, 0 - b .LBB0_258 -.LBB0_267: # in Loop: Header=BB0_259 Depth=1 + b .LBB0_257 +.LBB0_266: # in Loop: Header=BB0_258 Depth=1 ori $a7, $zero, 2 -.LBB0_268: # in Loop: Header=BB0_259 Depth=1 +.LBB0_267: # in Loop: Header=BB0_258 Depth=1 ori $a6, $zero, 2 - b .LBB0_258 -.LBB0_269: # %._crit_edge1159 + b .LBB0_257 +.LBB0_268: # %._crit_edge1159 ld.w $a0, $s0, 0 addi.d $a0, $a0, -2 sltui $a0, $a0, 1 @@ -2877,56 +2896,56 @@ L3psycho_anal: # @L3psycho_anal maskeqz $a0, $a1, $a0 or $a0, $a0, $a3 fld.d $fa0, $a0, 0 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload fst.d $fa0, $a0, 0 fst.d $fs1, $a1, 0 fst.d $fs0, $a2, 0 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload fst.d $fs0, $a0, 0 - beqz $fp, .LBB0_272 -# %bb.270: - fld.s $fa0, $sp, 588 - fld.s $fa1, $sp, 584 + beqz $fp, .LBB0_271 +# %bb.269: + fld.s $fa0, $sp, 636 + fld.s $fa1, $sp, 632 pcalau12i $a0, %pc_hi20(L3psycho_anal.ms_ener_ratio_old) fld.d $fa2, $a0, %pc_lo12(L3psycho_anal.ms_ener_ratio_old) fadd.s $fa1, $fa0, $fa1 - ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload fst.d $fa2, $a1, 0 movgr2fr.w $fa2, $zero fcmp.cule.s $fcc0, $fa1, $fa2 st.d $zero, $a0, %pc_lo12(L3psycho_anal.ms_ener_ratio_old) - bcnez $fcc0, .LBB0_273 -# %bb.271: + bcnez $fcc0, .LBB0_272 +# %bb.270: fdiv.s $fa0, $fa0, $fa1 fcvt.d.s $fa0, $fa0 fst.d $fa0, $a0, %pc_lo12(L3psycho_anal.ms_ener_ratio_old) - b .LBB0_273 -.LBB0_272: - ld.d $a0, $sp, 104 # 8-byte Folded Reload + b .LBB0_272 +.LBB0_271: + ld.d $a0, $sp, 96 # 8-byte Folded Reload st.d $zero, $a0, 0 -.LBB0_273: - fld.d $fs7, $sp, 1096 # 8-byte Folded Reload - fld.d $fs6, $sp, 1104 # 8-byte Folded Reload - fld.d $fs5, $sp, 1112 # 8-byte Folded Reload - fld.d $fs4, $sp, 1120 # 8-byte Folded Reload - fld.d $fs3, $sp, 1128 # 8-byte Folded Reload - fld.d $fs2, $sp, 1136 # 8-byte Folded Reload - fld.d $fs1, $sp, 1144 # 8-byte Folded Reload - fld.d $fs0, $sp, 1152 # 8-byte Folded Reload - ld.d $s8, $sp, 1160 # 8-byte Folded Reload - ld.d $s7, $sp, 1168 # 8-byte Folded Reload - ld.d $s6, $sp, 1176 # 8-byte Folded Reload - ld.d $s5, $sp, 1184 # 8-byte Folded Reload - ld.d $s4, $sp, 1192 # 8-byte Folded Reload - ld.d $s3, $sp, 1200 # 8-byte Folded Reload - ld.d $s2, $sp, 1208 # 8-byte Folded Reload - ld.d $s1, $sp, 1216 # 8-byte Folded Reload - ld.d $s0, $sp, 1224 # 8-byte Folded Reload - ld.d $fp, $sp, 1232 # 8-byte Folded Reload - ld.d $ra, $sp, 1240 # 8-byte Folded Reload - addi.d $sp, $sp, 1248 +.LBB0_272: + fld.d $fs7, $sp, 1144 # 8-byte Folded Reload + fld.d $fs6, $sp, 1152 # 8-byte Folded Reload + fld.d $fs5, $sp, 1160 # 8-byte Folded Reload + fld.d $fs4, $sp, 1168 # 8-byte Folded Reload + fld.d $fs3, $sp, 1176 # 8-byte Folded Reload + fld.d $fs2, $sp, 1184 # 8-byte Folded Reload + fld.d $fs1, $sp, 1192 # 8-byte Folded Reload + fld.d $fs0, $sp, 1200 # 8-byte Folded Reload + ld.d $s8, $sp, 1208 # 8-byte Folded Reload + ld.d $s7, $sp, 1216 # 8-byte Folded Reload + ld.d $s6, $sp, 1224 # 8-byte Folded Reload + ld.d $s5, $sp, 1232 # 8-byte Folded Reload + ld.d $s4, $sp, 1240 # 8-byte Folded Reload + ld.d $s3, $sp, 1248 # 8-byte Folded Reload + ld.d $s2, $sp, 1256 # 8-byte Folded Reload + ld.d $s1, $sp, 1264 # 8-byte Folded Reload + ld.d $s0, $sp, 1272 # 8-byte Folded Reload + ld.d $fp, $sp, 1280 # 8-byte Folded Reload + ld.d $ra, $sp, 1288 # 8-byte Folded Reload + addi.d $sp, $sp, 1296 ret -.LBB0_274: +.LBB0_273: pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a3, $a0, 0 @@ -2938,7 +2957,7 @@ L3psycho_anal: # @L3psycho_anal jirl $ra, $ra, 0 pcaddu18i $ra, %call36(abort) jirl $ra, $ra, 0 -.LBB0_275: +.LBB0_274: pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 @@ -2955,29 +2974,13 @@ L3psycho_anal: # @L3psycho_anal .section .rodata,"a",@progbits .p2align 2, 0x0 .LJTI0_0: - .word .LBB0_257-.LJTI0_0 - .word .LBB0_274-.LJTI0_0 - .word .LBB0_262-.LJTI0_0 - .word .LBB0_257-.LJTI0_0 + .word .LBB0_256-.LJTI0_0 + .word .LBB0_273-.LJTI0_0 + .word .LBB0_261-.LJTI0_0 + .word .LBB0_256-.LJTI0_0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function L3para_read -.LCPI1_0: - .dword 0xbfcd791c5f888823 # double -0.23025850929940458 -.LCPI1_1: - .dword 0x3fde5604189374bc # double 0.47399999999999998 -.LCPI1_2: - .dword 0x402f9f6e6106ab15 # double 15.811389 -.LCPI1_3: - .dword 0xc031800000000000 # double -17.5 -.LCPI1_4: - .dword 0xc04e000000000000 # double -60 -.LCPI1_5: - .dword 0x3fcd791c5f888823 # double 0.23025850929940458 -.LCPI1_6: - .dword 0x3f847ae147ae147b # double 0.01 .text - .globl L3para_read + .globl L3para_read # -- Begin function L3para_read .p2align 5 .type L3para_read,@function L3para_read: # @L3para_read @@ -3021,7 +3024,7 @@ L3para_read: # @L3para_read ld.d $t0, $sp, 1296 st.d $t0, $sp, 88 # 8-byte Folded Spill st.d $a7, $sp, 80 # 8-byte Folded Spill - st.d $a6, $sp, 112 # 8-byte Folded Spill + st.d $a6, $sp, 120 # 8-byte Folded Spill st.d $a5, $sp, 104 # 8-byte Folded Spill move $s3, $a4 move $s4, $a3 @@ -3033,8 +3036,12 @@ L3para_read: # @L3para_read ld.d $s7, $a0, %got_pc_lo12(psy_data) move $s0, $zero move $s8, $zero - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI1_0) + lu12i.w $a0, 391304 + ori $a0, $a0, 2083 + lu32i.d $a0, -165604 + st.d $a0, $sp, 112 # 8-byte Folded Spill + lu52i.d $a0, $a0, -1028 + movgr2fr.d $fs1, $a0 ori $a1, $zero, 6 b .LBB1_3 .p2align 4, , 16 @@ -3133,56 +3140,64 @@ L3para_read: # @L3para_read move $s0, $a2 b .LBB1_2 .LBB1_14: # %.preheader273 - pcalau12i $s3, %pc_hi20(.LCPI1_1) - pcalau12i $s6, %pc_hi20(.LCPI1_2) - pcalau12i $s5, %pc_hi20(.LCPI1_3) - pcalau12i $a0, %pc_hi20(.LCPI1_4) - st.d $a0, $sp, 120 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_5) - st.d $a0, $sp, 128 # 8-byte Folded Spill - ld.d $s8, $sp, 104 # 8-byte Folded Reload + lu12i.w $s4, 100663 + ld.d $s6, $sp, 120 # 8-byte Folded Reload + ld.d $s8, $sp, 112 # 8-byte Folded Reload + ld.d $s5, $sp, 104 # 8-byte Folded Reload blez $s0, .LBB1_21 # %bb.15: # %.preheader272.us.preheader - move $fp, $zero + move $s1, $zero vldi $vr3, -904 vldi $vr4, -1016 - fld.d $fs1, $s3, %pc_lo12(.LCPI1_1) - fld.d $fs2, $s6, %pc_lo12(.LCPI1_2) + ori $a0, $s4, 1212 + lu32i.d $a0, -109052 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fs1, $a0 vldi $vr5, -994 + lu12i.w $a0, 397418 + ori $a0, $a0, 2837 + lu32i.d $a0, -24722 + lu52i.d $a0, $a0, 1026 + movgr2fr.d $fs2, $a0 vldi $vr6, -912 - fld.d $fs3, $s5, %pc_lo12(.LCPI1_3) + ori $a0, $zero, 0 + lu32i.d $a0, 98304 + lu52i.d $a0, $a0, -1021 + movgr2fr.d $fs3, $a0 + ori $a0, $zero, 0 movgr2fr.d $fs4, $zero - ld.d $a0, $sp, 120 # 8-byte Folded Reload - fld.d $fs5, $a0, %pc_lo12(.LCPI1_4) - ld.d $a0, $sp, 128 # 8-byte Folded Reload - fld.d $fs6, $a0, %pc_lo12(.LCPI1_5) + lu32i.d $a0, -131072 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fs5, $a0 + lu52i.d $a0, $s8, 1020 + movgr2fr.d $fs6, $a0 b .LBB1_17 .p2align 4, , 16 .LBB1_16: # %._crit_edge307.us # in Loop: Header=BB1_17 Depth=1 - addi.d $fp, $fp, 1 - addi.d $s8, $s4, 512 - beq $fp, $s0, .LBB1_21 + addi.d $s1, $s1, 1 + addi.d $s5, $s3, 512 + beq $s1, $s0, .LBB1_21 .LBB1_17: # %.preheader272.us # =>This Loop Header: Depth=1 # Child Loop BB1_19 Depth 2 - slli.d $a0, $fp, 3 - addi.d $s1, $sp, 640 - fldx.d $fs7, $a0, $s1 + slli.d $a0, $s1, 3 + addi.d $fp, $sp, 640 + fldx.d $fs7, $a0, $fp move $s2, $zero - move $s4, $s8 + move $s3, $s5 b .LBB1_19 .p2align 4, , 16 .LBB1_18: # in Loop: Header=BB1_19 Depth=2 - fst.d $fa0, $s8, 0 + fst.d $fa0, $s5, 0 addi.d $s2, $s2, 1 - addi.d $s8, $s8, 8 - addi.d $s1, $s1, 8 + addi.d $s5, $s5, 8 + addi.d $fp, $fp, 8 beq $s0, $s2, .LBB1_16 .LBB1_19: # Parent Loop BB1_17 Depth=1 # => This Inner Loop Header: Depth=2 - fld.d $fa0, $s1, 0 - sltu $a0, $fp, $s2 + fld.d $fa0, $fp, 0 + sltu $a0, $s1, $s2 fsub.d $fa0, $fs7, $fa0 movgr2cf $fcc0, $a0 fsel $fa1, $fa4, $fa3, $fcc0 @@ -3228,7 +3243,6 @@ L3para_read: # @L3para_read addi.w $a0, $a2, 1 bceqz $fcc0, .LBB1_27 # %bb.22: # %.preheader270 - ld.d $s8, $sp, 112 # 8-byte Folded Reload bltz $a2, .LBB1_28 # %bb.23: # %.lr.ph312.preheader move $a2, $zero @@ -3276,7 +3290,6 @@ L3para_read: # @L3para_read alsl.w $a0, $a0, $a2, 1 alsl.d $a1, $a0, $a1, 3 move $a0, $s0 - ld.d $s8, $sp, 112 # 8-byte Folded Reload ld.d $t2, $sp, 96 # 8-byte Folded Reload ld.d $t3, $sp, 88 # 8-byte Folded Reload ld.d $t4, $sp, 80 # 8-byte Folded Reload @@ -3534,13 +3547,13 @@ L3para_read: # @L3para_read ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 fcmp.ceq.d $fcc0, $fs0, $fa1 - addi.w $s7, $a1, 1 + addi.w $s5, $a1, 1 bceqz $fcc0, .LBB1_69 # %bb.63: # %.preheader270.5 move $a0, $zero bltz $a1, .LBB1_68 # %bb.64: # %.lr.ph312.preheader.5 - bstrpick.d $a1, $s7, 31, 0 + bstrpick.d $a1, $s5, 31, 0 addi.d $a2, $sp, 136 move $a3, $t2 .p2align 4, , 16 @@ -3572,61 +3585,73 @@ L3para_read: # @L3para_read addi.d $a3, $a3, 4 bne $a1, $a0, .LBB1_65 # %bb.67: # %._crit_edge313.loopexit.5 - move $a0, $s7 + move $a0, $s5 .LBB1_68: # %._crit_edge313.5 slli.d $a0, $a0, 2 addi.w $a1, $zero, -1 lu32i.d $a1, 0 stx.w $a1, $t2, $a0 - bgtz $s7, .LBB1_70 + bgtz $s5, .LBB1_70 b .LBB1_76 .LBB1_69: - slli.d $a1, $s7, 2 - alsl.w $a1, $s7, $a1, 1 + slli.d $a1, $s5, 2 + alsl.w $a1, $s5, $a1, 1 alsl.d $s2, $a1, $s2, 3 - move $s7, $a0 - blez $s7, .LBB1_76 + move $s5, $a0 + blez $s5, .LBB1_76 .LBB1_70: # %.preheader268.us.preheader - move $fp, $zero - fld.d $fs1, $s3, %pc_lo12(.LCPI1_1) + move $s0, $zero + ori $a0, $s4, 1212 + lu32i.d $a0, -109052 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fs1, $a0 vldi $vr3, -904 vldi $vr4, -1016 - fld.d $fs2, $s6, %pc_lo12(.LCPI1_2) vldi $vr5, -994 + lu12i.w $a0, 397418 + ori $a0, $a0, 2837 + lu32i.d $a0, -24722 + lu52i.d $a0, $a0, 1026 + movgr2fr.d $fs2, $a0 vldi $vr6, -912 - fld.d $fs3, $s5, %pc_lo12(.LCPI1_3) + ori $a0, $zero, 0 + lu32i.d $a0, 98304 + lu52i.d $a0, $a0, -1021 + movgr2fr.d $fs3, $a0 + ori $a0, $zero, 0 movgr2fr.d $fs4, $zero - ld.d $a0, $sp, 120 # 8-byte Folded Reload - fld.d $fs5, $a0, %pc_lo12(.LCPI1_4) - ld.d $a0, $sp, 128 # 8-byte Folded Reload - fld.d $fs6, $a0, %pc_lo12(.LCPI1_5) + lu32i.d $a0, -131072 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fs5, $a0 + lu52i.d $a0, $s8, 1020 + movgr2fr.d $fs6, $a0 b .LBB1_72 .p2align 4, , 16 .LBB1_71: # %._crit_edge321.us # in Loop: Header=BB1_72 Depth=1 - addi.d $fp, $fp, 1 - addi.d $s8, $s8, 512 - beq $fp, $s7, .LBB1_76 + addi.d $s0, $s0, 1 + addi.d $s6, $s6, 512 + beq $s0, $s5, .LBB1_76 .LBB1_72: # %.preheader268.us # =>This Loop Header: Depth=1 # Child Loop BB1_74 Depth 2 - slli.d $a0, $fp, 3 - addi.d $s0, $sp, 136 - fldx.d $fs7, $a0, $s0 + slli.d $a0, $s0, 3 + addi.d $fp, $sp, 136 + fldx.d $fs7, $a0, $fp move $s1, $zero - move $s3, $s8 + move $s3, $s6 b .LBB1_74 .p2align 4, , 16 .LBB1_73: # in Loop: Header=BB1_74 Depth=2 fst.d $fa0, $s3, 0 addi.d $s1, $s1, 1 addi.d $s3, $s3, 8 - addi.d $s0, $s0, 8 - beq $s7, $s1, .LBB1_71 + addi.d $fp, $fp, 8 + beq $s5, $s1, .LBB1_71 .LBB1_74: # Parent Loop BB1_72 Depth=1 # => This Inner Loop Header: Depth=2 - fld.d $fa0, $s0, 0 - sltu $a0, $fp, $s1 + fld.d $fa0, $fp, 0 + sltu $a0, $s0, $s1 fsub.d $fa0, $fs7, $fa0 movgr2cf $fcc0, $a0 fsel $fa1, $fa4, $fa3, $fcc0 @@ -3668,11 +3693,14 @@ L3para_read: # @L3para_read addi.d $a1, $s3, 8 ld.d $s4, $sp, 24 # 8-byte Folded Reload addi.d $a2, $s4, 4 - pcalau12i $a3, %pc_hi20(.LCPI1_6) - fld.d $fa0, $a3, %pc_lo12(.LCPI1_6) ld.d $s5, $sp, 16 # 8-byte Folded Reload addi.d $a3, $s5, 4 vldi $vr1, -912 + lu12i.w $a4, 293601 + ori $a4, $a4, 1147 + lu32i.d $a4, 293601 + lu52i.d $a4, $a4, 1016 + movgr2fr.d $fa0, $a4 ori $a4, $zero, 6 ld.d $t5, $sp, 72 # 8-byte Folded Reload ld.d $t6, $sp, 64 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/quantize-pvt.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/quantize-pvt.s index 6ad63721..8f1436fa 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/quantize-pvt.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/quantize-pvt.s @@ -1,10 +1,6 @@ .file "quantize-pvt.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function iteration_init -.LCPI0_0: - .dword 0x3ff5555555555555 # double 1.3333333333333333 .text - .globl iteration_init + .globl iteration_init # -- Begin function iteration_init .p2align 5 .type iteration_init,@function iteration_init: # @iteration_init @@ -74,11 +70,14 @@ iteration_init: # @iteration_init jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(pow43) addi.d $s2, $a0, %pc_lo12(pow43) - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) move $s3, $zero lu12i.w $s1, 2 ori $s4, $s1, 16 + lu12i.w $a0, 349525 + ori $a0, $a0, 1365 + lu32i.d $a0, 349525 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs0, $a0 .p2align 4, , 16 .LBB0_2: # =>This Inner Loop Header: Depth=1 bstrpick.d $a0, $s3, 31, 0 @@ -295,31 +294,9 @@ iteration_init: # @iteration_init .Lfunc_end0: .size iteration_init, .Lfunc_end0-iteration_init # -- End function - .section .rodata.cst8,"aM",@progbits,8 + .section .rodata.cst16,"aM",@progbits,16 .p2align 3, 0x0 # -- Begin function compute_ath .LCPI1_0: - .dword 0x408f400000000000 # double 1000 -.LCPI1_1: - .dword 0x547d42aea2879f2e # double 9.9999999999999997E+98 -.LCPI1_2: - .dword 0x4092000000000000 # double 1152 -.LCPI1_3: - .dword 0x3f947ae147ae147b # double 0.02 -.LCPI1_4: - .dword 0xbfe999999999999a # double -0.80000000000000004 -.LCPI1_5: - .dword 0xc00a666666666666 # double -3.2999999999999998 -.LCPI1_6: - .dword 0xbfe3333333333333 # double -0.59999999999999998 -.LCPI1_7: - .dword 0x400d1eb851eb851f # double 3.6400000000000001 -.LCPI1_8: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI1_10: - .dword 0x4078000000000000 # double 384 - .section .rodata.cst16,"aM",@progbits,16 - .p2align 3, 0x0 -.LCPI1_9: .dword 0xc069000000000000 # double -200 .dword 0xc05c800000000000 # double -114 .text @@ -328,111 +305,131 @@ iteration_init: # @iteration_init .type compute_ath,@function compute_ath: # @compute_ath # %bb.0: - addi.d $sp, $sp, -224 - st.d $ra, $sp, 216 # 8-byte Folded Spill - st.d $fp, $sp, 208 # 8-byte Folded Spill - st.d $s0, $sp, 200 # 8-byte Folded Spill - st.d $s1, $sp, 192 # 8-byte Folded Spill - st.d $s2, $sp, 184 # 8-byte Folded Spill - st.d $s3, $sp, 176 # 8-byte Folded Spill - st.d $s4, $sp, 168 # 8-byte Folded Spill - st.d $s5, $sp, 160 # 8-byte Folded Spill - st.d $s6, $sp, 152 # 8-byte Folded Spill - st.d $s7, $sp, 144 # 8-byte Folded Spill - st.d $s8, $sp, 136 # 8-byte Folded Spill - fst.d $fs0, $sp, 128 # 8-byte Folded Spill - fst.d $fs1, $sp, 120 # 8-byte Folded Spill - fst.d $fs2, $sp, 112 # 8-byte Folded Spill - fst.d $fs3, $sp, 104 # 8-byte Folded Spill - fst.d $fs4, $sp, 96 # 8-byte Folded Spill - fst.d $fs5, $sp, 88 # 8-byte Folded Spill - fst.d $fs6, $sp, 80 # 8-byte Folded Spill - fst.d $fs7, $sp, 72 # 8-byte Folded Spill + addi.d $sp, $sp, -208 + st.d $ra, $sp, 200 # 8-byte Folded Spill + st.d $fp, $sp, 192 # 8-byte Folded Spill + st.d $s0, $sp, 184 # 8-byte Folded Spill + st.d $s1, $sp, 176 # 8-byte Folded Spill + st.d $s2, $sp, 168 # 8-byte Folded Spill + st.d $s3, $sp, 160 # 8-byte Folded Spill + st.d $s4, $sp, 152 # 8-byte Folded Spill + st.d $s5, $sp, 144 # 8-byte Folded Spill + st.d $s6, $sp, 136 # 8-byte Folded Spill + st.d $s7, $sp, 128 # 8-byte Folded Spill + st.d $s8, $sp, 120 # 8-byte Folded Spill + fst.d $fs0, $sp, 112 # 8-byte Folded Spill + fst.d $fs1, $sp, 104 # 8-byte Folded Spill + fst.d $fs2, $sp, 96 # 8-byte Folded Spill + fst.d $fs3, $sp, 88 # 8-byte Folded Spill + fst.d $fs4, $sp, 80 # 8-byte Folded Spill + fst.d $fs5, $sp, 72 # 8-byte Folded Spill + fst.d $fs6, $sp, 64 # 8-byte Folded Spill + fst.d $fs7, $sp, 56 # 8-byte Folded Spill move $fp, $a0 ld.w $a0, $a0, 16 - pcalau12i $a3, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI1_0) move $s0, $a2 move $s1, $a1 - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fdiv.d $fa2, $fa1, $fa0 - pcalau12i $a0, %pc_hi20(scalefac_band) - addi.d $s4, $a0, %pc_lo12(scalefac_band) - lu12i.w $a0, -382855 - ori $a0, $a0, 3886 - pcalau12i $a1, %pc_hi20(.LCPI1_1) - st.d $a1, $sp, 16 # 8-byte Folded Spill - fld.d $fa0, $a1, %pc_lo12(.LCPI1_1) - fst.d $fa0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI1_2) - fld.d $fs7, $a1, %pc_lo12(.LCPI1_2) - pcalau12i $a1, %pc_hi20(.LCPI1_3) - fld.d $fs4, $a1, %pc_lo12(.LCPI1_3) - pcalau12i $a1, %pc_hi20(.LCPI1_4) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_4) - pcalau12i $a1, %pc_hi20(.LCPI1_5) - fld.d $fs0, $a1, %pc_lo12(.LCPI1_5) - pcalau12i $a1, %pc_hi20(.LCPI1_6) - fld.d $fs6, $a1, %pc_lo12(.LCPI1_6) - pcalau12i $a1, %pc_hi20(.LCPI1_7) - fld.d $fa0, $a1, %pc_lo12(.LCPI1_7) - pcalau12i $a1, %pc_hi20(.LCPI1_8) - fld.d $fs1, $a1, %pc_lo12(.LCPI1_8) - lu32i.d $a0, -179538 - lu52i.d $s2, $a0, 1351 - pcalau12i $a0, %pc_hi20(.LCPI1_9) - addi.d $s3, $a0, %pc_lo12(.LCPI1_9) - move $s7, $zero + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa1, $a1 + fdiv.d $fs6, $fa0, $fa1 + pcalau12i $a1, %pc_hi20(scalefac_band) + addi.d $s4, $a1, %pc_lo12(scalefac_band) + lu12i.w $a1, -382855 + ori $a1, $a1, 3886 + lu32i.d $a1, -179538 + lu52i.d $s2, $a1, 1351 + movgr2fr.d $fa0, $s2 + fst.d $fa0, $sp, 16 # 8-byte Folded Spill + lu32i.d $a0, 131072 + lu52i.d $a0, $a0, 1033 + movgr2fr.d $fs5, $a0 + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, -1026 + movgr2fr.d $fs7, $a0 + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, -367002 + lu52i.d $a0, $a0, -1024 + movgr2fr.d $fa0, $a0 + fst.d $fa0, $sp, 40 # 8-byte Folded Spill + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 209715 + lu52i.d $a0, $a0, -1026 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, 335544 + ori $a0, $a0, 1311 + lu32i.d $a0, -188744 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa0, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa1, $a0 + fst.d $fa1, $sp, 48 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(.LCPI1_0) + addi.d $s3, $a0, %pc_lo12(.LCPI1_0) + move $s6, $zero ori $a2, $zero, 21 - fst.d $fa2, $sp, 64 # 8-byte Folded Spill - fst.d $fa1, $sp, 56 # 8-byte Folded Spill - fst.d $fa0, $sp, 48 # 8-byte Folded Spill - fst.d $fs1, $sp, 40 # 8-byte Folded Spill + fst.d $fa0, $sp, 32 # 8-byte Folded Spill b .LBB1_2 .p2align 4, , 16 .LBB1_1: # %.loopexit53 # in Loop: Header=BB1_2 Depth=1 - beq $s7, $a2, .LBB1_6 + beq $s6, $a2, .LBB1_6 .LBB1_2: # =>This Loop Header: Depth=1 # Child Loop BB1_4 Depth 2 - move $a0, $s7 - slli.d $a1, $s7, 2 + move $a0, $s6 + slli.d $a1, $s6, 2 + ldx.w $s7, $s4, $a1 + addi.d $s6, $s6, 1 + slli.d $a1, $s6, 2 ldx.w $s8, $s4, $a1 - addi.d $s7, $s7, 1 - slli.d $a1, $s7, 2 - ldx.w $s5, $s4, $a1 slli.d $a1, $a0, 3 stx.d $s2, $s1, $a1 - bge $s8, $s5, .LBB1_1 + bge $s7, $s8, .LBB1_1 # %bb.3: # %.lr.ph.preheader # in Loop: Header=BB1_2 Depth=1 - alsl.d $s6, $a0, $s1, 3 - fld.d $fs5, $sp, 24 # 8-byte Folded Reload + alsl.d $s5, $a0, $s1, 3 + fld.d $fs0, $sp, 16 # 8-byte Folded Reload .p2align 4, , 16 .LBB1_4: # %.lr.ph # Parent Loop BB1_2 Depth=1 # => This Inner Loop Header: Depth=2 - movgr2fr.w $fa0, $s8 + movgr2fr.w $fa0, $s7 ffint.d.w $fa0, $fa0 - fmul.d $fa0, $fa2, $fa0 - fdiv.d $fa0, $fa0, $fs7 + fmul.d $fa0, $fs6, $fa0 + fdiv.d $fa0, $fa0, $fs5 fcmp.clt.d $fcc0, $fa0, $fs4 fsel $fs1, $fa0, $fs4, $fcc0 fmov.d $fa0, $fs1 + fmov.d $fa1, $fs7 pcaddu18i $ra, %call36(pow) jirl $ra, $ra, 0 fmov.d $fs2, $fa0 - fadd.d $fa0, $fs1, $fs0 + fld.d $fa0, $sp, 40 # 8-byte Folded Reload + fadd.d $fa0, $fs1, $fa0 fmul.d $fa0, $fa0, $fa0 - fmul.d $fa0, $fa0, $fs6 + fmul.d $fa0, $fa0, $fs3 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 vldi $vr1, -870 fmul.d $fa0, $fa0, $fa1 - fmov.d $fs3, $fs0 - fld.d $fa1, $sp, 48 # 8-byte Folded Reload - fmadd.d $fs0, $fs2, $fa1, $fa0 + fld.d $fa1, $sp, 32 # 8-byte Folded Reload + fmadd.d $fs2, $fs2, $fa1, $fa0 vldi $vr1, -1008 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(pow) @@ -441,155 +438,137 @@ compute_ath: # @compute_ath sltui $a0, $a0, 1 slli.d $a0, $a0, 3 fldx.d $fa1, $s3, $a0 - fld.d $fa2, $sp, 40 # 8-byte Folded Reload - fmadd.d $fa0, $fa0, $fa2, $fs0 - fmov.d $fs0, $fs3 + fld.d $fa2, $sp, 48 # 8-byte Folded Reload + fmadd.d $fa0, $fa0, $fa2, $fs2 fadd.d $fa0, $fa0, $fa1 vldi $vr1, -988 fdiv.d $fa1, $fa0, $fa1 vldi $vr0, -988 pcaddu18i $ra, %call36(pow) jirl $ra, $ra, 0 - fld.d $fa1, $sp, 56 # 8-byte Folded Reload - fld.d $fa2, $sp, 64 # 8-byte Folded Reload - fcmp.clt.d $fcc0, $fs5, $fa0 - addi.w $s8, $s8, 1 - fsel $fs5, $fa0, $fs5, $fcc0 - bne $s5, $s8, .LBB1_4 + fcmp.clt.d $fcc0, $fs0, $fa0 + addi.w $s7, $s7, 1 + fsel $fs0, $fa0, $fs0, $fcc0 + bne $s8, $s7, .LBB1_4 # %bb.5: # %..loopexit53_crit_edge # in Loop: Header=BB1_2 Depth=1 - fst.d $fs5, $s6, 0 - fld.d $fs1, $sp, 40 # 8-byte Folded Reload + fst.d $fs0, $s5, 0 + fld.d $fa0, $sp, 32 # 8-byte Folded Reload ori $a2, $zero, 21 b .LBB1_1 .LBB1_6: # %.preheader.preheader - move $s6, $zero + move $s5, $zero addi.d $s1, $s4, 92 - ld.d $a0, $sp, 16 # 8-byte Folded Reload - fld.d $fa0, $a0, %pc_lo12(.LCPI1_1) - fst.d $fa0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_10) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_10) + movgr2fr.d $fa1, $s2 + fst.d $fa1, $sp, 16 # 8-byte Folded Spill + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa5, $a0 ori $s4, $zero, 12 - fst.d $fs6, $sp, 32 # 8-byte Folded Spill + fst.d $fs3, $sp, 24 # 8-byte Folded Spill b .LBB1_8 .p2align 4, , 16 .LBB1_7: # %.loopexit # in Loop: Header=BB1_8 Depth=1 - beq $s6, $s4, .LBB1_12 + beq $s5, $s4, .LBB1_12 .LBB1_8: # %.preheader # =>This Loop Header: Depth=1 # Child Loop BB1_10 Depth 2 - move $a0, $s6 - slli.d $a1, $s6, 2 - ldx.w $s5, $s1, $a1 - addi.d $s6, $s6, 1 - slli.d $a1, $s6, 2 + move $a0, $s5 + slli.d $a1, $s5, 2 + ldx.w $s6, $s1, $a1 + addi.d $s5, $s5, 1 + slli.d $a1, $s5, 2 ldx.w $s7, $s1, $a1 slli.d $a1, $a0, 3 stx.d $s2, $s0, $a1 - bge $s5, $s7, .LBB1_7 + bge $s6, $s7, .LBB1_7 # %bb.9: # %.lr.ph58.preheader # in Loop: Header=BB1_8 Depth=1 alsl.d $s8, $a0, $s0, 3 - fmov.d $fs3, $fs0 - fld.d $fs0, $sp, 24 # 8-byte Folded Reload - fld.d $fs7, $sp, 48 # 8-byte Folded Reload + fld.d $fs5, $sp, 16 # 8-byte Folded Reload + fmov.d $fs0, $fa0 .p2align 4, , 16 .LBB1_10: # %.lr.ph58 # Parent Loop BB1_8 Depth=1 # => This Inner Loop Header: Depth=2 - movgr2fr.w $fa0, $s5 + movgr2fr.w $fa0, $s6 ffint.d.w $fa0, $fa0 - fmul.d $fa0, $fa2, $fa0 - fdiv.d $fa0, $fa0, $fa3 + fmul.d $fa0, $fs6, $fa0 + fdiv.d $fa0, $fa0, $fa5 fcmp.clt.d $fcc0, $fa0, $fs4 - fmov.d $fs5, $fs1 fsel $fs1, $fa0, $fs4, $fcc0 fmov.d $fa0, $fs1 - fmov.d $fs6, $fa3 + fmov.d $fa1, $fs7 + fmov.d $fs3, $fa5 pcaddu18i $ra, %call36(pow) jirl $ra, $ra, 0 fmov.d $fs2, $fa0 - fadd.d $fa0, $fs1, $fs3 + fld.d $fa0, $sp, 40 # 8-byte Folded Reload + fadd.d $fa0, $fs1, $fa0 fmul.d $fa0, $fa0, $fa0 - fld.d $fa1, $sp, 32 # 8-byte Folded Reload + fld.d $fa1, $sp, 24 # 8-byte Folded Reload fmul.d $fa0, $fa0, $fa1 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 vldi $vr1, -870 fmul.d $fa0, $fa0, $fa1 - fmadd.d $fs2, $fs2, $fs7, $fa0 + fmadd.d $fs2, $fs2, $fs0, $fa0 vldi $vr1, -1008 fmov.d $fa0, $fs1 - fmov.d $fs1, $fs5 pcaddu18i $ra, %call36(pow) jirl $ra, $ra, 0 ld.w $a0, $fp, 148 sltui $a0, $a0, 1 slli.d $a0, $a0, 3 fldx.d $fa1, $s3, $a0 - fmadd.d $fa0, $fa0, $fs5, $fs2 + fld.d $fa2, $sp, 48 # 8-byte Folded Reload + fmadd.d $fa0, $fa0, $fa2, $fs2 fadd.d $fa0, $fa0, $fa1 vldi $vr1, -988 fdiv.d $fa1, $fa0, $fa1 vldi $vr0, -988 pcaddu18i $ra, %call36(pow) jirl $ra, $ra, 0 - fmov.d $fa3, $fs6 - fld.d $fa1, $sp, 56 # 8-byte Folded Reload - fld.d $fa2, $sp, 64 # 8-byte Folded Reload - fcmp.clt.d $fcc0, $fs0, $fa0 - addi.w $s5, $s5, 1 - fsel $fs0, $fa0, $fs0, $fcc0 - bne $s7, $s5, .LBB1_10 + fmov.d $fa5, $fs3 + fcmp.clt.d $fcc0, $fs5, $fa0 + addi.w $s6, $s6, 1 + fsel $fs5, $fa0, $fs5, $fcc0 + bne $s7, $s6, .LBB1_10 # %bb.11: # %..loopexit_crit_edge # in Loop: Header=BB1_8 Depth=1 - fst.d $fs0, $s8, 0 - fmov.d $fs0, $fs3 + fst.d $fs5, $s8, 0 + fmov.d $fa0, $fs0 b .LBB1_7 .LBB1_12: - fld.d $fs7, $sp, 72 # 8-byte Folded Reload - fld.d $fs6, $sp, 80 # 8-byte Folded Reload - fld.d $fs5, $sp, 88 # 8-byte Folded Reload - fld.d $fs4, $sp, 96 # 8-byte Folded Reload - fld.d $fs3, $sp, 104 # 8-byte Folded Reload - fld.d $fs2, $sp, 112 # 8-byte Folded Reload - fld.d $fs1, $sp, 120 # 8-byte Folded Reload - fld.d $fs0, $sp, 128 # 8-byte Folded Reload - ld.d $s8, $sp, 136 # 8-byte Folded Reload - ld.d $s7, $sp, 144 # 8-byte Folded Reload - ld.d $s6, $sp, 152 # 8-byte Folded Reload - ld.d $s5, $sp, 160 # 8-byte Folded Reload - ld.d $s4, $sp, 168 # 8-byte Folded Reload - ld.d $s3, $sp, 176 # 8-byte Folded Reload - ld.d $s2, $sp, 184 # 8-byte Folded Reload - ld.d $s1, $sp, 192 # 8-byte Folded Reload - ld.d $s0, $sp, 200 # 8-byte Folded Reload - ld.d $fp, $sp, 208 # 8-byte Folded Reload - ld.d $ra, $sp, 216 # 8-byte Folded Reload - addi.d $sp, $sp, 224 + fld.d $fs7, $sp, 56 # 8-byte Folded Reload + fld.d $fs6, $sp, 64 # 8-byte Folded Reload + fld.d $fs5, $sp, 72 # 8-byte Folded Reload + fld.d $fs4, $sp, 80 # 8-byte Folded Reload + fld.d $fs3, $sp, 88 # 8-byte Folded Reload + fld.d $fs2, $sp, 96 # 8-byte Folded Reload + fld.d $fs1, $sp, 104 # 8-byte Folded Reload + fld.d $fs0, $sp, 112 # 8-byte Folded Reload + ld.d $s8, $sp, 120 # 8-byte Folded Reload + ld.d $s7, $sp, 128 # 8-byte Folded Reload + ld.d $s6, $sp, 136 # 8-byte Folded Reload + ld.d $s5, $sp, 144 # 8-byte Folded Reload + ld.d $s4, $sp, 152 # 8-byte Folded Reload + ld.d $s3, $sp, 160 # 8-byte Folded Reload + ld.d $s2, $sp, 168 # 8-byte Folded Reload + ld.d $s1, $sp, 176 # 8-byte Folded Reload + ld.d $s0, $sp, 184 # 8-byte Folded Reload + ld.d $fp, $sp, 192 # 8-byte Folded Reload + ld.d $ra, $sp, 200 # 8-byte Folded Reload + addi.d $sp, $sp, 208 ret .Lfunc_end1: .size compute_ath, .Lfunc_end1-compute_ath # -- End function - .section .rodata.cst8,"aM",@progbits,8 + .section .rodata.cst16,"aM",@progbits,16 .p2align 3, 0x0 # -- Begin function ATHformula .LCPI2_0: - .dword 0x3f947ae147ae147b # double 0.02 -.LCPI2_1: - .dword 0xbfe999999999999a # double -0.80000000000000004 -.LCPI2_2: - .dword 0xc00a666666666666 # double -3.2999999999999998 -.LCPI2_3: - .dword 0xbfe3333333333333 # double -0.59999999999999998 -.LCPI2_4: - .dword 0x400d1eb851eb851f # double 3.6400000000000001 -.LCPI2_5: - .dword 0x3f50624dd2f1a9fc # double 0.001 - .section .rodata.cst16,"aM",@progbits,16 - .p2align 3, 0x0 -.LCPI2_6: .dword 0xc069000000000000 # double -200 .dword 0xc05c800000000000 # double -114 .text @@ -603,43 +582,61 @@ ATHformula: # @ATHformula st.d $fp, $sp, 16 # 8-byte Folded Spill fst.d $fs0, $sp, 8 # 8-byte Folded Spill fst.d $fs1, $sp, 0 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI2_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_0) move $fp, $a0 + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 fsel $fs0, $fa0, $fa1, $fcc0 - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_1) + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, -1026 + movgr2fr.d $fa1, $a0 fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(pow) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI2_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_2) - pcalau12i $a0, %pc_hi20(.LCPI2_3) - fld.d $fa2, $a0, %pc_lo12(.LCPI2_3) fmov.d $fs1, $fa0 - fadd.d $fa0, $fs0, $fa1 + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, -367002 + lu52i.d $a0, $a0, -1024 + movgr2fr.d $fa0, $a0 + fadd.d $fa0, $fs0, $fa0 fmul.d $fa0, $fa0, $fa0 - fmul.d $fa0, $fa0, $fa2 + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 209715 + lu52i.d $a0, $a0, -1026 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI2_4) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_4) - vldi $vr2, -870 - fmul.d $fa0, $fa0, $fa2 + vldi $vr1, -870 + fmul.d $fa0, $fa0, $fa1 + lu12i.w $a0, 335544 + ori $a0, $a0, 1311 + lu32i.d $a0, -188744 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa1, $a0 fmadd.d $fs1, $fs1, $fa1, $fa0 vldi $vr1, -1008 fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(pow) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI2_5) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_5) - ld.w $a0, $fp, 148 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + ld.w $a1, $fp, 148 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa1, $a0 fmadd.d $fa0, $fa0, $fa1, $fs1 - sltui $a0, $a0, 1 + sltui $a0, $a1, 1 slli.d $a0, $a0, 3 - pcalau12i $a1, %pc_hi20(.LCPI2_6) - addi.d $a1, $a1, %pc_lo12(.LCPI2_6) + pcalau12i $a1, %pc_hi20(.LCPI2_0) + addi.d $a1, $a1, %pc_lo12(.LCPI2_0) fldx.d $fa1, $a1, $a0 fadd.d $fa0, $fa0, $fa1 vldi $vr1, -988 @@ -655,12 +652,7 @@ ATHformula: # @ATHformula .Lfunc_end2: .size ATHformula, .Lfunc_end2-ATHformula # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ms_convert -.LCPI3_0: - .dword 0x3fe6a09e667f3bcd # double 0.70710678118654757 - .text - .globl ms_convert + .globl ms_convert # -- Begin function ms_convert .p2align 5 .type ms_convert,@function ms_convert: # @ms_convert @@ -674,11 +666,14 @@ ms_convert: # @ms_convert bgeu $a1, $a3, .LBB3_4 # %bb.2: # %scalar.ph.preheader lu12i.w $a3, -2 - pcalau12i $a4, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a4, %pc_lo12(.LCPI3_0) ori $a3, $a3, 3584 lu12i.w $a4, 1 ori $a4, $a4, 512 + lu12i.w $a5, 419827 + ori $a5, $a5, 3021 + lu32i.d $a5, 434334 + lu52i.d $a5, $a5, 1022 + movgr2fr.d $fa0, $a5 .p2align 4, , 16 .LBB3_3: # %scalar.ph # =>This Inner Loop Header: Depth=1 @@ -725,14 +720,7 @@ ms_convert: # @ms_convert .Lfunc_end3: .size ms_convert, .Lfunc_end3-ms_convert # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function on_pe -.LCPI4_0: - .dword 0xc087700000000000 # double -750 -.LCPI4_1: - .dword 0x3ff8cccccccccccd # double 1.55 - .text - .globl on_pe + .globl on_pe # -- Begin function on_pe .p2align 5 .type on_pe,@function on_pe: # @on_pe @@ -755,28 +743,33 @@ on_pe: # @on_pe move $a3, $a5 pcaddu18i $ra, %call36(ResvMaxBits) jirl $ra, $ra, 0 - ld.w $a4, $s0, 204 - blez $a4, .LBB4_3 + ld.w $a1, $s0, 204 + blez $a1, .LBB4_3 # %bb.1: # %.lr.ph move $a0, $zero - ld.w $a1, $sp, 12 - ld.w $a2, $sp, 8 - alsl.d $a3, $s1, $s3, 4 + ld.w $a2, $sp, 12 + ld.w $a3, $sp, 8 + alsl.d $a4, $s1, $s3, 4 ori $a5, $zero, 240 mul.d $a5, $s1, $a5 add.d $a5, $a5, $s2 - pcalau12i $a6, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a6, %pc_lo12(.LCPI4_0) - pcalau12i $a6, %pc_hi20(.LCPI4_1) - fld.d $fa1, $a6, %pc_lo12(.LCPI4_1) addi.d $a5, $a5, 72 + ori $a6, $zero, 0 + lu32i.d $a6, 487424 + lu52i.d $a6, $a6, -1016 + movgr2fr.d $fa0, $a6 + lu12i.w $a6, -209716 + ori $a6, $a6, 3277 + lu32i.d $a6, -471860 + lu52i.d $a6, $a6, 1023 + movgr2fr.d $fa1, $a6 ori $a6, $zero, 500 ori $a7, $zero, 4095 .p2align 4, , 16 .LBB4_2: # =>This Inner Loop Header: Depth=1 - fld.d $fa2, $a3, 0 - div.w $a4, $a2, $a4 - st.w $a4, $fp, 0 + fld.d $fa2, $a4, 0 + div.w $a1, $a3, $a1 + st.w $a1, $fp, 0 fadd.d $fa2, $fa2, $fa0 ld.w $t0, $a5, 0 fdiv.d $fa2, $fa2, $fa1 @@ -789,25 +782,25 @@ on_pe: # @on_pe masknez $t1, $t1, $t2 maskeqz $t0, $t0, $t2 or $t0, $t0, $t1 - slt $t1, $t0, $a1 + slt $t1, $t0, $a2 maskeqz $t0, $t0, $t1 - masknez $t1, $a1, $t1 + masknez $t1, $a2, $t1 or $t0, $t0, $t1 - add.w $t1, $t0, $a4 + add.w $t1, $t0, $a1 slt $t1, $a7, $t1 - sub.d $t2, $a7, $a4 + sub.d $t2, $a7, $a1 masknez $t0, $t0, $t1 maskeqz $t1, $t2, $t1 or $t0, $t1, $t0 - add.d $a4, $t0, $a4 - st.w $a4, $fp, 0 - sub.w $a1, $a1, $t0 - ld.w $a4, $s0, 204 + add.d $a1, $t0, $a1 + st.w $a1, $fp, 0 + sub.w $a2, $a2, $t0 + ld.w $a1, $s0, 204 addi.d $a0, $a0, 1 addi.d $fp, $fp, 4 - addi.d $a3, $a3, 8 + addi.d $a4, $a4, 8 addi.d $a5, $a5, 120 - blt $a0, $a4, .LBB4_2 + blt $a0, $a1, .LBB4_2 .LBB4_3: # %._crit_edge ld.d $s3, $sp, 16 # 8-byte Folded Reload ld.d $s2, $sp, 24 # 8-byte Folded Reload @@ -820,18 +813,7 @@ on_pe: # @on_pe .Lfunc_end4: .size on_pe, .Lfunc_end4-on_pe # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function reduce_side -.LCPI5_0: - .dword 0x3fd51eb851eb851f # double 0.33000000000000002 -.LCPI5_1: - .dword 0xb690000000000000 # double -7.0064923216240854E-46 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI5_2: - .word 0x42fa0000 # float 125 - .text - .globl reduce_side + .globl reduce_side # -- Begin function reduce_side .p2align 5 .type reduce_side,@function reduce_side: # @reduce_side @@ -841,24 +823,27 @@ reduce_side: # @reduce_side blt $a2, $a3, .LBB5_5 # %bb.1: vldi $vr1, -928 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - fld.d $fa2, $a3, %pc_lo12(.LCPI5_0) fsub.d $fa0, $fa1, $fa0 - pcalau12i $a3, %pc_hi20(.LCPI5_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI5_1) - fmul.d $fa0, $fa0, $fa2 + lu12i.w $a3, 335544 + ori $a3, $a3, 1311 + lu32i.d $a3, 335544 + lu52i.d $a3, $a3, 1021 + movgr2fr.d $fa1, $a3 + fmul.d $fa0, $fa0, $fa1 fadd.d $fa0, $fa0, $fa0 - fcvt.s.d $fa2, $fa0 - fcmp.clt.d $fcc0, $fa0, $fa1 + fcvt.s.d $fa1, $fa0 + lu52i.d $a3, $zero, -1175 + movgr2fr.d $fa2, $a3 + fcmp.clt.d $fcc0, $fa0, $fa2 movgr2fr.w $fa0, $zero - fsel $fa0, $fa2, $fa0, $fcc0 + fsel $fa0, $fa1, $fa0, $fcc0 bstrpick.d $a3, $a2, 31, 0 movgr2fr.d $fa1, $a3 - pcalau12i $a3, %pc_hi20(.LCPI5_2) - fld.s $fa3, $a3, %pc_lo12(.LCPI5_2) ffint.s.l $fa2, $fa1 fneg.s $fa1, $fa2 fmadd.s $fa1, $fa1, $fa0, $fa2 + lu12i.w $a3, 274336 + movgr2fr.w $fa3, $a3 fcmp.cule.s $fcc0, $fa1, $fa3 bcnez $fcc0, .LBB5_3 # %bb.2: @@ -3061,12 +3046,7 @@ quantize_xrpow: # @quantize_xrpow .Lfunc_end12: .size quantize_xrpow, .Lfunc_end12-quantize_xrpow # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function quantize_xrpow_ISO -.LCPI13_0: - .dword 0x3fe306f694467382 # double 0.59460000000000002 - .text - .globl quantize_xrpow_ISO + .globl quantize_xrpow_ISO # -- Begin function quantize_xrpow_ISO .p2align 5 .type quantize_xrpow_ISO,@function quantize_xrpow_ISO: # @quantize_xrpow_ISO @@ -3076,8 +3056,11 @@ quantize_xrpow_ISO: # @quantize_xrpow_ISO pcalau12i $a3, %pc_hi20(ipow20) addi.d $a3, $a3, %pc_lo12(ipow20) fldx.d $fa0, $a3, $a2 - pcalau12i $a2, %pc_hi20(.LCPI13_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI13_0) + lu12i.w $a2, -441241 + ori $a2, $a2, 898 + lu32i.d $a2, 198390 + lu52i.d $a2, $a2, 1022 + movgr2fr.d $fa1, $a2 fdiv.d $fa1, $fa1, $fa0 vreplvei.d $vr0, $vr0, 0 vreplvei.d $vr1, $vr1, 0 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/quantize.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/quantize.s index 620478dc..7d2292d8 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/quantize.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/quantize.s @@ -310,14 +310,6 @@ iteration_loop: # @iteration_loop .word 0 # 0x0 .word 0 # 0x0 .word 210 # 0xd2 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI1_1: - .dword 0x3d719799812dea11 # double 9.9999999999999998E-13 -.LCPI1_2: - .dword 0x3fe62e42fefa39ef # double 0.69314718055994529 -.LCPI1_3: - .dword 0x2b617f7d4ed8c33e # double 1.0E-99 .text .globl init_outer_loop .p2align 5 @@ -372,8 +364,11 @@ init_outer_loop: # @init_outer_loop addi.d $a0, $a0, 24 bnez $a1, .LBB1_3 # %bb.4: # %.preheader83.preheader - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_1) + lu12i.w $a0, -519458 + ori $a0, $a0, 2577 + lu32i.d $a0, 104345 + lu52i.d $a0, $a0, 983 + movgr2fr.d $fa3, $a0 fcmp.clt.d $fcc0, $fa0, $fa3 fsel $fa4, $fa0, $fa3, $fcc0 fcmp.clt.d $fcc0, $fa2, $fa4 @@ -389,10 +384,13 @@ init_outer_loop: # @init_outer_loop fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI1_2) vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 + lu12i.w $a0, -4189 + ori $a0, $a0, 2543 + lu32i.d $a0, 405058 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs3, $a0 fdiv.d $fa0, $fa0, $fs3 fsub.d $fa0, $fa1, $fa0 ftintrz.w.d $fa0, $fa0 @@ -435,12 +433,15 @@ init_outer_loop: # @init_outer_loop slti $a1, $a0, 2 maskeqz $a0, $a0, $a1 masknez $a1, $s0, $a1 - pcalau12i $a2, %pc_hi20(.LCPI1_3) - fld.d $fa0, $a2, %pc_lo12(.LCPI1_3) or $a0, $a0, $a1 - fadd.d $fa1, $fs0, $fs1 - fadd.d $fa1, $fa1, $fs2 - fcmp.clt.d $fcc0, $fa0, $fa1 + fadd.d $fa0, $fs0, $fs1 + fadd.d $fa0, $fa0, $fs2 + lu12i.w $a1, 322956 + ori $a1, $a1, 830 + lu32i.d $a1, 98173 + lu52i.d $a1, $a1, 694 + movgr2fr.d $fa1, $a1 + fcmp.clt.d $fcc0, $fa1, $fa0 st.w $a0, $fp, 52 movcf2gr $a0, $fcc0 fld.d $fs3, $sp, 8 # 8-byte Folded Reload @@ -454,11 +455,14 @@ init_outer_loop: # @init_outer_loop ret .LBB1_5: # %.thread.preheader lu12i.w $a0, -2 - pcalau12i $a2, %pc_hi20(.LCPI1_3) - fld.d $fa0, $a2, %pc_lo12(.LCPI1_3) ori $a0, $a0, 3584 lu12i.w $a2, 1 ori $a2, $a2, 512 + lu12i.w $a3, 322956 + ori $a3, $a3, 830 + lu32i.d $a3, 98173 + lu52i.d $a3, $a3, 694 + movgr2fr.d $fa0, $a3 .p2align 4, , 16 .LBB1_6: # %.thread # =>This Inner Loop Header: Depth=1 @@ -796,12 +800,7 @@ outer_loop: # @outer_loop .Lfunc_end2: .size outer_loop, .Lfunc_end2-outer_loop # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function set_masking_lower -.LCPI3_0: - .dword 0x40a28e0000000000 # double 2375 - .text - .globl set_masking_lower + .globl set_masking_lower # -- Begin function set_masking_lower .p2align 5 .type set_masking_lower,@function set_masking_lower: # @set_masking_lower @@ -812,12 +811,14 @@ set_masking_lower: # @set_masking_lower addi.d $a0, $a0, -6 movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_0) addi.d $a0, $a1, -125 - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, 167424 + lu52i.d $a0, $a0, 1034 + movgr2fr.d $fa2, $a0 + fdiv.d $fa1, $fa1, $fa2 fcvt.s.d $fa1, $fa1 vldi $vr2, -1040 fadd.s $fa1, $fa1, $fa2 @@ -840,16 +841,7 @@ set_masking_lower: # @set_masking_lower .Lfunc_end3: .size set_masking_lower, .Lfunc_end3-set_masking_lower # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function VBR_iteration_loop -.LCPI4_0: - .dword 0x4091300000000000 # double 1100 -.LCPI4_1: - .dword 0x40a28e0000000000 # double 2375 -.LCPI4_2: - .dword 0x3fd51eb851eb851f # double 0.33000000000000002 - .text - .globl VBR_iteration_loop + .globl VBR_iteration_loop # -- Begin function VBR_iteration_loop .p2align 5 .type VBR_iteration_loop,@function VBR_iteration_loop: # @VBR_iteration_loop @@ -867,13 +859,14 @@ VBR_iteration_loop: # @VBR_iteration_loop st.d $s7, $sp, 1952 # 8-byte Folded Spill st.d $s8, $sp, 1944 # 8-byte Folded Spill fst.d $fs0, $sp, 1936 # 8-byte Folded Spill + fst.d $fs1, $sp, 1928 # 8-byte Folded Spill addi.d $sp, $sp, -1712 st.d $a7, $sp, 112 # 8-byte Folded Spill move $s7, $a6 move $s3, $a5 st.d $a4, $sp, 64 # 8-byte Folded Spill st.d $a3, $sp, 96 # 8-byte Folded Spill - st.d $a2, $sp, 24 # 8-byte Folded Spill + move $s1, $a2 st.d $a1, $sp, 40 # 8-byte Folded Spill move $s4, $a0 move $a1, $a5 @@ -886,11 +879,11 @@ VBR_iteration_loop: # @VBR_iteration_loop blez $a1, .LBB4_6 # %bb.1: # %.lr.ph move $fp, $zero - addi.d $s0, $sp, 260 + addi.d $s0, $sp, 252 b .LBB4_3 .p2align 4, , 16 .LBB4_2: # in Loop: Header=BB4_3 Depth=1 - ld.w $a3, $sp, 256 + ld.w $a3, $sp, 248 move $a0, $s4 move $a1, $s3 pcaddu18i $ra, %call36(ResvFrameBegin) @@ -903,14 +896,14 @@ VBR_iteration_loop: # @VBR_iteration_loop st.w $a0, $s4, 220 bge $a2, $a1, .LBB4_5 .LBB4_3: # =>This Inner Loop Header: Depth=1 - addi.d $a1, $sp, 256 - addi.d $a2, $sp, 252 + addi.d $a1, $sp, 248 + addi.d $a2, $sp, 244 move $a0, $s4 pcaddu18i $ra, %call36(getframebits) jirl $ra, $ra, 0 ld.w $a0, $s4, 220 ld.w $a1, $s4, 208 - ld.w $a2, $sp, 252 + ld.w $a2, $sp, 244 bne $a0, $a1, .LBB4_2 # %bb.4: # in Loop: Header=BB4_3 Depth=1 ld.w $a0, $s4, 204 @@ -929,13 +922,12 @@ VBR_iteration_loop: # @VBR_iteration_loop st.d $a0, $sp, 120 # 8-byte Folded Spill ld.w $a0, $s4, 200 st.w $a1, $s4, 220 - pcalau12i $a1, %pc_hi20(.LCPI4_1) - st.d $a1, $sp, 216 # 8-byte Folded Spill st.d $s3, $sp, 80 # 8-byte Folded Spill st.d $s7, $sp, 72 # 8-byte Folded Spill - st.d $s4, $sp, 240 # 8-byte Folded Spill + st.d $s4, $sp, 232 # 8-byte Folded Spill blez $a0, .LBB4_35 # %bb.8: # %.lr.ph368 + st.d $s1, $sp, 24 # 8-byte Folded Spill addi.d $a0, $s3, 48 st.d $a0, $sp, 32 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(reduce_sidechannel) @@ -945,13 +937,18 @@ VBR_iteration_loop: # @VBR_iteration_loop ld.d $a0, $a0, %got_pc_lo12(convert_mdct) st.d $a0, $sp, 48 # 8-byte Folded Spill move $s5, $zero - move $s0, $zero - st.d $zero, $sp, 208 # 8-byte Folded Spill - movgr2fr.d $fs0, $zero + move $s1, $zero + move $fp, $zero + ori $a0, $zero, 0 + lu32i.d $a0, 77824 + lu52i.d $a0, $a0, 1033 + movgr2fr.d $fs0, $a0 + movgr2fr.d $fs1, $zero b .LBB4_11 .p2align 4, , 16 .LBB4_9: # in Loop: Header=BB4_11 Depth=1 - move $s0, $s2 + move $fp, $s2 + move $s1, $s3 .LBB4_10: # %._crit_edge361 # in Loop: Header=BB4_11 Depth=1 ld.w $a0, $s4, 200 @@ -963,16 +960,17 @@ VBR_iteration_loop: # @VBR_iteration_loop .LBB4_11: # =>This Loop Header: Depth=1 # Child Loop BB4_16 Depth 2 # Child Loop BB4_22 Depth 3 - move $s2, $s0 - ld.w $fp, $s4, 204 + move $s3, $s1 + move $s2, $fp + ld.w $s0, $s4, 204 ld.d $a0, $sp, 48 # 8-byte Folded Reload ld.w $a0, $a0, 0 ld.d $a1, $sp, 56 # 8-byte Folded Reload ld.w $s1, $a1, 0 - alsl.d $s0, $s5, $s5, 3 + alsl.d $fp, $s5, $s5, 3 beqz $a0, .LBB4_13 # %bb.12: # in Loop: Header=BB4_11 Depth=1 - slli.d $a0, $s0, 10 + slli.d $a0, $fp, 10 ld.d $a1, $sp, 96 # 8-byte Folded Reload add.d $a0, $a1, $a0 move $a1, $a0 @@ -982,20 +980,20 @@ VBR_iteration_loop: # @VBR_iteration_loop sltui $a0, $s1, 1 ori $a1, $zero, 1 masknez $a1, $a1, $a0 - maskeqz $a0, $fp, $a0 + maskeqz $a0, $s0, $a0 or $a0, $a0, $a1 st.d $a0, $sp, 176 # 8-byte Folded Spill st.d $s5, $sp, 88 # 8-byte Folded Spill blez $a0, .LBB4_9 # %bb.14: # %.lr.ph360 # in Loop: Header=BB4_11 Depth=1 - move $fp, $zero + move $s0, $zero ori $a0, $zero, 240 mul.d $a0, $s5, $a0 ld.d $a1, $sp, 32 # 8-byte Folded Reload add.d $a0, $a1, $a0 st.d $a0, $sp, 168 # 8-byte Folded Spill - slli.d $a0, $s0, 10 + slli.d $a0, $fp, 10 ld.d $a1, $sp, 96 # 8-byte Folded Reload add.d $a0, $a1, $a0 st.d $a0, $sp, 160 # 8-byte Folded Spill @@ -1007,7 +1005,7 @@ VBR_iteration_loop: # @VBR_iteration_loop ld.d $a0, $sp, 40 # 8-byte Folded Reload alsl.d $a0, $s5, $a0, 4 st.d $a0, $sp, 104 # 8-byte Folded Spill - slli.d $a0, $s0, 9 + slli.d $a0, $fp, 9 add.d $a0, $s7, $a0 st.d $a0, $sp, 152 # 8-byte Folded Spill ori $a0, $zero, 488 @@ -1015,10 +1013,11 @@ VBR_iteration_loop: # @VBR_iteration_loop ld.d $a1, $sp, 112 # 8-byte Folded Reload add.d $a0, $a1, $a0 st.d $a0, $sp, 144 # 8-byte Folded Spill - addi.d $a0, $sp, 352 + addi.d $a0, $sp, 344 alsl.d $a0, $s5, $a0, 3 st.d $a0, $sp, 136 # 8-byte Folded Spill - move $s0, $s2 + move $fp, $s2 + move $s1, $s3 b .LBB4_16 .p2align 4, , 16 .LBB4_15: # in Loop: Header=BB4_16 Depth=2 @@ -1026,22 +1025,21 @@ VBR_iteration_loop: # @VBR_iteration_loop ld.d $a1, $sp, 136 # 8-byte Folded Reload ld.d $a2, $sp, 192 # 8-byte Folded Reload stx.w $a0, $a1, $a2 - ld.d $a1, $sp, 208 # 8-byte Folded Reload - add.w $a1, $a0, $a1 - st.d $a1, $sp, 208 # 8-byte Folded Spill - ld.d $s4, $sp, 240 # 8-byte Folded Reload - ld.d $s0, $sp, 200 # 8-byte Folded Reload - addi.d $fp, $fp, 1 + ld.d $fp, $sp, 208 # 8-byte Folded Reload + add.w $fp, $a0, $fp + ld.d $s4, $sp, 232 # 8-byte Folded Reload + ld.d $s1, $sp, 200 # 8-byte Folded Reload + addi.d $s0, $s0, 1 ld.d $a0, $sp, 176 # 8-byte Folded Reload - beq $fp, $a0, .LBB4_10 + beq $s0, $a0, .LBB4_10 .LBB4_16: # Parent Loop BB4_11 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB4_22 Depth 3 ori $a0, $zero, 120 - mul.d $a0, $fp, $a0 + mul.d $a0, $s0, $a0 ld.d $a1, $sp, 168 # 8-byte Folded Reload add.d $s7, $a1, $a0 - alsl.d $s2, $fp, $fp, 3 + alsl.d $s2, $s0, $s0, 3 slli.d $a0, $s2, 9 ld.d $a1, $sp, 160 # 8-byte Folded Reload add.d $s8, $a1, $a0 @@ -1051,11 +1049,11 @@ VBR_iteration_loop: # @VBR_iteration_loop pcaddu18i $ra, %call36(init_outer_loop) jirl $ra, $ra, 0 ori $a1, $zero, 244 - mul.d $s6, $fp, $a1 - slli.d $s1, $fp, 2 + mul.d $s6, $s0, $a1 + slli.d $s3, $s0, 2 beqz $a0, .LBB4_30 # %bb.17: # in Loop: Header=BB4_16 Depth=2 - ori $a0, $zero, 3408 + ori $a0, $zero, 3400 add.d $a0, $sp, $a0 ori $a2, $zero, 120 move $a1, $s7 @@ -1075,16 +1073,16 @@ VBR_iteration_loop: # @VBR_iteration_loop fcvt.s.d $fa0, $fa0 pcalau12i $a0, %got_pc_hi20(masking_lower) ld.d $a0, $a0, %got_pc_lo12(masking_lower) - st.d $a0, $sp, 232 # 8-byte Folded Spill + st.d $a0, $sp, 224 # 8-byte Folded Spill fst.s $fa0, $a0, 0 ori $a0, $zero, 976 - mul.d $a0, $fp, $a0 + mul.d $a0, $s0, $a0 ld.d $a1, $sp, 128 # 8-byte Folded Reload add.d $a2, $a1, $a0 - addi.d $a4, $sp, 368 + addi.d $a4, $sp, 360 move $a0, $s4 move $a1, $s8 - st.d $a2, $sp, 224 # 8-byte Folded Spill + st.d $a2, $sp, 216 # 8-byte Folded Spill move $a3, $s7 pcaddu18i $ra, %call36(calc_xmin) jirl $ra, $ra, 0 @@ -1096,16 +1094,15 @@ VBR_iteration_loop: # @VBR_iteration_loop maskeqz $a3, $a3, $a1 or $a0, $a3, $a0 ori $a3, $zero, 2 - st.d $s1, $sp, 192 # 8-byte Folded Spill + st.d $fp, $sp, 208 # 8-byte Folded Spill + st.d $s3, $sp, 192 # 8-byte Folded Spill bne $a2, $a3, .LBB4_19 # %bb.18: # in Loop: Header=BB4_16 Depth=2 - slli.d $a2, $fp, 3 + slli.d $a2, $s0, 3 ld.d $a3, $sp, 104 # 8-byte Folded Reload fldx.d $fa0, $a3, $a2 - pcalau12i $a2, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI4_0) - fcmp.clt.d $fcc0, $fa0, $fa1 - fsel $fa0, $fa0, $fa1, $fcc0 + fcmp.clt.d $fcc0, $fa0, $fs0 + fsel $fa0, $fa0, $fs0, $fcc0 bstrpick.d $a0, $a0, 31, 0 movgr2fr.d $fa1, $a0 ffint.d.l $fa1, $fa1 @@ -1119,11 +1116,11 @@ VBR_iteration_loop: # @VBR_iteration_loop or $a0, $a0, $a2 .LBB4_19: # in Loop: Header=BB4_16 Depth=2 ld.w $a2, $s4, 212 - masknez $a3, $s0, $a1 + masknez $a3, $s1, $a1 slli.d $a2, $a2, 2 ld.w $a4, $s4, 204 ld.w $a5, $s4, 200 - addi.d $a6, $sp, 260 + addi.d $a6, $sp, 252 ldx.w $a2, $a2, $a6 ori $a6, $zero, 1 maskeqz $a1, $a6, $a1 @@ -1148,7 +1145,7 @@ VBR_iteration_loop: # @VBR_iteration_loop add.d $a0, $a3, $a0 bstrpick.d $a1, $a0, 31, 31 add.w $a0, $a0, $a1 - srai.d $s0, $a0, 1 + srai.d $fp, $a0, 1 st.d $a3, $sp, 184 # 8-byte Folded Spill addi.w $s3, $a3, 1 slli.d $a0, $s2, 8 @@ -1160,25 +1157,25 @@ VBR_iteration_loop: # @VBR_iteration_loop .p2align 4, , 16 .LBB4_20: # in Loop: Header=BB4_22 Depth=3 ld.w $s3, $s7, 0 - ori $a0, $zero, 3160 + ori $a0, $zero, 3152 add.d $a0, $sp, $a0 ori $a2, $zero, 244 move $a1, $s6 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 - addi.d $a0, $sp, 856 + addi.d $a0, $sp, 848 ori $a2, $zero, 2304 move $a1, $s4 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 - ori $a0, $zero, 3528 + ori $a0, $zero, 3520 add.d $a0, $sp, $a0 ori $a2, $zero, 120 move $a1, $s7 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 .LBB4_21: # in Loop: Header=BB4_22 Depth=3 - sub.w $s0, $s0, $s1 + sub.w $fp, $fp, $s1 addi.w $a0, $s1, 0 bstrpick.d $s1, $s1, 31, 1 ori $a1, $zero, 21 @@ -1186,27 +1183,29 @@ VBR_iteration_loop: # @VBR_iteration_loop .LBB4_22: # Parent Loop BB4_11 Depth=1 # Parent Loop BB4_16 Depth=2 # => This Inner Loop Header: Depth=3 - addi.w $s2, $s0, 0 + addi.w $s2, $fp, 0 bge $s2, $s3, .LBB4_21 # %bb.23: # in Loop: Header=BB4_22 Depth=3 - ori $a0, $zero, 3408 + ori $a0, $zero, 3400 add.d $a1, $sp, $a0 ori $a2, $zero, 120 move $a0, $s7 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 - ld.d $s5, $sp, 240 # 8-byte Folded Reload + ld.d $s5, $sp, 232 # 8-byte Folded Reload ld.w $a0, $s5, 92 slli.d $a0, $a0, 1 addi.d $a0, $a0, -6 movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 - ld.d $a0, $sp, 216 # 8-byte Folded Reload - fld.d $fa1, $a0, %pc_lo12(.LCPI4_1) - addi.d $a0, $s0, -125 - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + addi.d $a0, $fp, -125 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, 167424 + lu52i.d $a0, $a0, 1034 + movgr2fr.d $fa2, $a0 + fdiv.d $fa1, $fa1, $fa2 fcvt.s.d $fa1, $fa1 vldi $vr2, -1040 fadd.s $fa1, $fa1, $fa2 @@ -1220,18 +1219,18 @@ VBR_iteration_loop: # @VBR_iteration_loop pcaddu18i $ra, %call36(pow) jirl $ra, $ra, 0 fcvt.s.d $fa0, $fa0 - ld.d $a0, $sp, 232 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload fst.s $fa0, $a0, 0 - addi.d $a4, $sp, 368 + addi.d $a4, $sp, 360 move $a0, $s5 move $a1, $s8 - ld.d $a2, $sp, 224 # 8-byte Folded Reload + ld.d $a2, $sp, 216 # 8-byte Folded Reload move $a3, $s7 pcaddu18i $ra, %call36(calc_xmin) jirl $ra, $ra, 0 - addi.d $a3, $sp, 320 - addi.d $a4, $sp, 368 - st.d $fp, $sp, 8 + addi.d $a3, $sp, 312 + addi.d $a4, $sp, 360 + st.d $s0, $sp, 8 move $a0, $s5 move $a1, $s8 move $a2, $s2 @@ -1240,24 +1239,24 @@ VBR_iteration_loop: # @VBR_iteration_loop move $a7, $s7 pcaddu18i $ra, %call36(outer_loop) jirl $ra, $ra, 0 - fld.d $fa0, $sp, 320 + fld.d $fa0, $sp, 312 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 bgtz $a0, .LBB4_27 # %bb.24: # in Loop: Header=BB4_22 Depth=3 - fld.d $fa0, $sp, 336 - fcmp.cult.d $fcc0, $fs0, $fa0 + fld.d $fa0, $sp, 328 + fcmp.cult.d $fcc0, $fs1, $fa0 bcnez $fcc0, .LBB4_27 # %bb.25: # in Loop: Header=BB4_22 Depth=3 - fld.d $fa0, $sp, 344 - fcmp.cult.d $fcc0, $fs0, $fa0 + fld.d $fa0, $sp, 336 + fcmp.cult.d $fcc0, $fs1, $fa0 bcnez $fcc0, .LBB4_27 # %bb.26: # in Loop: Header=BB4_22 Depth=3 - fld.d $fa0, $sp, 328 - fcmp.cult.d $fcc0, $fs0, $fa0 + fld.d $fa0, $sp, 320 + fcmp.cult.d $fcc0, $fs1, $fa0 bceqz $fcc0, .LBB4_20 .LBB4_27: # in Loop: Header=BB4_22 Depth=3 - add.w $s0, $s0, $s1 + add.w $fp, $fp, $s1 addi.w $a0, $s1, 0 bstrpick.d $s1, $s1, 31, 1 ori $a1, $zero, 21 @@ -1266,19 +1265,19 @@ VBR_iteration_loop: # @VBR_iteration_loop ld.d $a0, $sp, 184 # 8-byte Folded Reload blt $a0, $s3, .LBB4_15 # %bb.29: # in Loop: Header=BB4_16 Depth=2 - ori $a0, $zero, 3528 + ori $a0, $zero, 3520 add.d $a1, $sp, $a0 ori $a2, $zero, 120 move $a0, $s7 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 - ori $a0, $zero, 3160 + ori $a0, $zero, 3152 add.d $a1, $sp, $a0 ori $a2, $zero, 244 move $a0, $s6 pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 - addi.d $a1, $sp, 856 + addi.d $a1, $sp, 848 ori $a2, $zero, 2304 move $a0, $s4 pcaddu18i $ra, %call36(memcpy) @@ -1293,7 +1292,7 @@ VBR_iteration_loop: # @VBR_iteration_loop pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ori $a0, $zero, 2304 - mul.d $a0, $fp, $a0 + mul.d $a0, $s0, $a0 ld.d $a1, $sp, 152 # 8-byte Folded Reload add.d $a0, $a1, $a0 ori $a2, $zero, 2304 @@ -1301,56 +1300,53 @@ VBR_iteration_loop: # @VBR_iteration_loop pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ld.d $a0, $sp, 136 # 8-byte Folded Reload - stx.w $zero, $a0, $s1 - ori $s0, $zero, 1 - addi.d $fp, $fp, 1 + stx.w $zero, $a0, $s3 + ori $s1, $zero, 1 + addi.d $s0, $s0, 1 ld.d $a0, $sp, 176 # 8-byte Folded Reload - bne $fp, $a0, .LBB4_16 + bne $s0, $a0, .LBB4_16 b .LBB4_10 .LBB4_31: # %._crit_edge369 ld.d $a1, $sp, 56 # 8-byte Folded Reload ld.w $a1, $a1, 0 - beqz $a1, .LBB4_36 + beqz $a1, .LBB4_41 # %bb.32: # %._crit_edge369 - ld.d $fp, $sp, 208 # 8-byte Folded Reload - blez $a0, .LBB4_42 + blez $a0, .LBB4_41 # %bb.33: # %.lr.ph376.preheader - ori $a1, $zero, 4 - bgeu $a0, $a1, .LBB4_37 + ori $a2, $zero, 4 + lu12i.w $a1, 335544 + bgeu $a0, $a2, .LBB4_36 # %bb.34: - move $a1, $zero - b .LBB4_40 + move $a2, $zero + ld.d $t2, $sp, 24 # 8-byte Folded Reload + b .LBB4_39 .LBB4_35: move $fp, $zero - b .LBB4_43 -.LBB4_36: - ld.d $fp, $sp, 208 # 8-byte Folded Reload b .LBB4_42 -.LBB4_37: # %vector.ph - bstrpick.d $a1, $a0, 30, 2 +.LBB4_36: # %vector.ph + bstrpick.d $a2, $a0, 30, 2 vrepli.b $vr0, 0 - slli.d $a1, $a1, 2 + slli.d $a2, $a2, 2 vori.b $vr1, $vr0, 0 vinsgr2vr.w $vr1, $fp, 0 - ld.d $a2, $sp, 24 # 8-byte Folded Reload - addi.d $a2, $a2, 16 - addi.d $a3, $sp, 368 - lu52i.d $a4, $zero, 1022 - vreplgr2vr.d $vr2, $a4 - lu12i.w $a4, 335544 - ori $a4, $a4, 1311 - lu32i.d $a4, 335544 - lu52i.d $a4, $a4, 1021 - vreplgr2vr.d $vr3, $a4 - lu52i.d $a4, $zero, 1023 - vreplgr2vr.d $vr4, $a4 + ld.d $t2, $sp, 24 # 8-byte Folded Reload + addi.d $a3, $t2, 16 + addi.d $a4, $sp, 360 + lu52i.d $a5, $zero, 1022 + vreplgr2vr.d $vr2, $a5 + ori $a5, $a1, 1311 + lu32i.d $a5, 335544 + lu52i.d $a5, $a5, 1021 + vreplgr2vr.d $vr3, $a5 + lu52i.d $a5, $zero, 1023 + vreplgr2vr.d $vr4, $a5 vrepli.w $vr5, 125 - move $a4, $a1 + move $a5, $a2 .p2align 4, , 16 -.LBB4_38: # %vector.body +.LBB4_37: # %vector.body # =>This Inner Loop Header: Depth=1 - vld $vr6, $a2, -16 - vld $vr7, $a2, 0 + vld $vr6, $a3, -16 + vld $vr7, $a3, 0 vfsub.d $vr6, $vr2, $vr6 vfsub.d $vr7, $vr2, $vr7 vfmul.d $vr6, $vr6, $vr3 @@ -1363,173 +1359,174 @@ VBR_iteration_loop: # @VBR_iteration_loop vfadd.d $vr7, $vr7, $vr4 vfdiv.d $vr6, $vr8, $vr6 vfdiv.d $vr7, $vr9, $vr7 - ld.w $a5, $a3, -8 - ld.w $a6, $a3, -16 - ld.w $a7, $a3, 0 - ld.w $t0, $a3, 8 - movgr2fr.w $ft0, $a5 + ld.w $a6, $a4, -8 + ld.w $a7, $a4, -16 + ld.w $t0, $a4, 0 + ld.w $t1, $a4, 8 + movgr2fr.w $ft0, $a6 ffint.d.w $ft0, $ft0 - movgr2fr.w $ft1, $a6 + movgr2fr.w $ft1, $a7 ffint.d.w $ft1, $ft1 vextrins.d $vr9, $vr8, 16 - movgr2fr.w $ft0, $t0 + movgr2fr.w $ft0, $t1 ffint.d.w $ft0, $ft0 - movgr2fr.w $ft2, $a7 + movgr2fr.w $ft2, $t0 ffint.d.w $ft2, $ft2 vextrins.d $vr10, $vr8, 16 vfmul.d $vr6, $vr6, $vr9 vfmul.d $vr7, $vr7, $vr10 vreplvei.d $vr8, $vr6, 0 ftintrz.w.d $ft0, $ft0 - movfr2gr.s $a5, $ft0 - vinsgr2vr.w $vr8, $a5, 0 + movfr2gr.s $a6, $ft0 + vinsgr2vr.w $vr8, $a6, 0 vreplvei.d $vr6, $vr6, 1 ftintrz.w.d $fa6, $fa6 - movfr2gr.s $a5, $fa6 - vinsgr2vr.w $vr8, $a5, 1 + movfr2gr.s $a6, $fa6 + vinsgr2vr.w $vr8, $a6, 1 vreplvei.d $vr6, $vr7, 0 ftintrz.w.d $fa6, $fa6 - movfr2gr.s $a5, $fa6 - vinsgr2vr.w $vr6, $a5, 0 + movfr2gr.s $a6, $fa6 + vinsgr2vr.w $vr6, $a6, 0 vreplvei.d $vr7, $vr7, 1 ftintrz.w.d $fa7, $fa7 - movfr2gr.s $a5, $fa7 - vinsgr2vr.w $vr6, $a5, 1 + movfr2gr.s $a6, $fa7 + vinsgr2vr.w $vr6, $a6, 1 vmax.w $vr7, $vr8, $vr5 vmax.w $vr6, $vr6, $vr5 - vstelm.w $vr7, $a3, -12, 0 - vstelm.w $vr7, $a3, -4, 1 - vstelm.w $vr6, $a3, 4, 0 - vstelm.w $vr6, $a3, 12, 1 + vstelm.w $vr7, $a4, -12, 0 + vstelm.w $vr7, $a4, -4, 1 + vstelm.w $vr6, $a4, 4, 0 + vstelm.w $vr6, $a4, 12, 1 vadd.w $vr1, $vr7, $vr1 vadd.w $vr0, $vr6, $vr0 - addi.d $a4, $a4, -4 - addi.d $a2, $a2, 32 + addi.d $a5, $a5, -4 addi.d $a3, $a3, 32 - bnez $a4, .LBB4_38 -# %bb.39: # %middle.block + addi.d $a4, $a4, 32 + bnez $a5, .LBB4_37 +# %bb.38: # %middle.block vadd.w $vr0, $vr0, $vr1 vhaddw.d.w $vr0, $vr0, $vr0 vpickve2gr.w $fp, $vr0, 0 - beq $a1, $a0, .LBB4_42 -.LBB4_40: # %.lr.ph376.preheader538 - ld.d $a2, $sp, 24 # 8-byte Folded Reload - alsl.d $a2, $a1, $a2, 3 - addi.d $a3, $sp, 352 - alsl.d $a3, $a1, $a3, 3 - addi.d $a3, $a3, 4 - sub.d $a0, $a0, $a1 - pcalau12i $a1, %pc_hi20(.LCPI4_2) - fld.d $fa0, $a1, %pc_lo12(.LCPI4_2) - vldi $vr1, -928 + beq $a2, $a0, .LBB4_41 +.LBB4_39: # %.lr.ph376.preheader538 + alsl.d $a3, $a2, $t2, 3 + addi.d $a4, $sp, 344 + alsl.d $a4, $a2, $a4, 3 + addi.d $a4, $a4, 4 + sub.d $a0, $a0, $a2 + vldi $vr0, -928 + ori $a1, $a1, 1311 + lu32i.d $a1, 335544 + lu52i.d $a1, $a1, 1021 + movgr2fr.d $fa1, $a1 vldi $vr2, -912 ori $a1, $zero, 125 .p2align 4, , 16 -.LBB4_41: # %.lr.ph376 +.LBB4_40: # %.lr.ph376 # =>This Inner Loop Header: Depth=1 - fld.d $fa3, $a2, 0 - fsub.d $fa3, $fa1, $fa3 - fmul.d $fa3, $fa3, $fa0 + fld.d $fa3, $a3, 0 + fsub.d $fa3, $fa0, $fa3 + fmul.d $fa3, $fa3, $fa1 fadd.d $fa3, $fa3, $fa3 - ld.w $a4, $a3, -4 + ld.w $a2, $a4, -4 fsub.d $fa4, $fa2, $fa3 fadd.d $fa3, $fa3, $fa2 fdiv.d $fa3, $fa4, $fa3 - movgr2fr.w $fa4, $a4 + movgr2fr.w $fa4, $a2 ffint.d.w $fa4, $fa4 fmul.d $fa3, $fa3, $fa4 ftintrz.w.d $fa3, $fa3 - movfr2gr.s $a4, $fa3 - slt $a5, $a1, $a4 - maskeqz $a4, $a4, $a5 + movfr2gr.s $a2, $fa3 + slt $a5, $a1, $a2 + maskeqz $a2, $a2, $a5 masknez $a5, $a1, $a5 - or $a4, $a4, $a5 - st.w $a4, $a3, 0 - add.w $fp, $a4, $fp - addi.d $a2, $a2, 8 - addi.d $a0, $a0, -1 + or $a2, $a2, $a5 + st.w $a2, $a4, 0 + add.w $fp, $a2, $fp addi.d $a3, $a3, 8 - bnez $a0, .LBB4_41 -.LBB4_42: # %.loopexit348 + addi.d $a0, $a0, -1 + addi.d $a4, $a4, 8 + bnez $a0, .LBB4_40 +.LBB4_41: # %.loopexit348 ori $a0, $zero, 1 - bnez $s0, .LBB4_44 -.LBB4_43: # %.loopexit348.thread + bnez $s1, .LBB4_43 +.LBB4_42: # %.loopexit348.thread ld.w $a0, $s4, 208 -.LBB4_44: +.LBB4_43: ld.w $a1, $s4, 212 - bge $a0, $a1, .LBB4_49 -# %bb.45: # %.lr.ph380.preheader - addi.d $a2, $sp, 260 + bge $a0, $a1, .LBB4_48 +# %bb.44: # %.lr.ph380.preheader + addi.d $a2, $sp, 252 alsl.d $a2, $a0, $a2, 2 .p2align 4, , 16 -.LBB4_46: # %.lr.ph380 +.LBB4_45: # %.lr.ph380 # =>This Inner Loop Header: Depth=1 ld.w $a3, $a2, 0 - bge $a3, $fp, .LBB4_49 -# %bb.47: # in Loop: Header=BB4_46 Depth=1 + bge $a3, $fp, .LBB4_48 +# %bb.46: # in Loop: Header=BB4_45 Depth=1 addi.w $a0, $a0, 1 addi.d $a2, $a2, 4 - bne $a1, $a0, .LBB4_46 -# %bb.48: + bne $a1, $a0, .LBB4_45 +# %bb.47: move $a0, $a1 -.LBB4_49: # %._crit_edge381 +.LBB4_48: # %._crit_edge381 st.w $a0, $s4, 220 - addi.d $a1, $sp, 256 - addi.d $a2, $sp, 252 + addi.d $a1, $sp, 248 + addi.d $a2, $sp, 244 move $a0, $s4 pcaddu18i $ra, %call36(getframebits) jirl $ra, $ra, 0 - ld.w $a2, $sp, 252 - ld.w $a3, $sp, 256 + ld.w $a2, $sp, 244 + ld.w $a3, $sp, 248 move $a0, $s4 move $a1, $s3 pcaddu18i $ra, %call36(ResvFrameBegin) jirl $ra, $ra, 0 ld.w $a1, $s4, 200 move $s5, $a0 - bge $a0, $fp, .LBB4_61 -# %bb.50: # %.preheader346 - blez $a1, .LBB4_107 -# %bb.51: # %.preheader345.lr.ph + bge $a0, $fp, .LBB4_60 +# %bb.49: # %.preheader346 + blez $a1, .LBB4_106 +# %bb.50: # %.preheader345.lr.ph ld.w $a0, $s4, 204 - blez $a0, .LBB4_62 -# %bb.52: # %.preheader345.lr.ph.split.us + blez $a0, .LBB4_61 +# %bb.51: # %.preheader345.lr.ph.split.us ld.w $a2, $s4, 220 slli.d $a2, $a2, 2 - addi.d $a3, $sp, 260 + addi.d $a3, $sp, 252 ldx.w $a2, $a2, $a3 move $a3, $zero bstrpick.d $a4, $a0, 30, 3 slli.d $a4, $a4, 3 vreplgr2vr.w $vr0, $a2 vreplgr2vr.w $vr1, $fp - addi.d $a5, $sp, 352 - addi.d $a6, $sp, 368 + addi.d $a5, $sp, 344 + addi.d $a6, $sp, 360 ori $a7, $zero, 8 - b .LBB4_54 + b .LBB4_53 .p2align 4, , 16 -.LBB4_53: # %._crit_edge387.us - # in Loop: Header=BB4_54 Depth=1 +.LBB4_52: # %._crit_edge387.us + # in Loop: Header=BB4_53 Depth=1 addi.d $a3, $a3, 1 addi.d $a6, $a6, 8 addi.d $a5, $a5, 8 - bgeu $a3, $a1, .LBB4_62 -.LBB4_54: # %.preheader345.us + bgeu $a3, $a1, .LBB4_61 +.LBB4_53: # %.preheader345.us # =>This Loop Header: Depth=1 - # Child Loop BB4_57 Depth 2 - # Child Loop BB4_60 Depth 2 - bgeu $a0, $a7, .LBB4_56 -# %bb.55: # in Loop: Header=BB4_54 Depth=1 + # Child Loop BB4_56 Depth 2 + # Child Loop BB4_59 Depth 2 + bgeu $a0, $a7, .LBB4_55 +# %bb.54: # in Loop: Header=BB4_53 Depth=1 move $t1, $zero - b .LBB4_59 + b .LBB4_58 .p2align 4, , 16 -.LBB4_56: # %vector.body525.preheader - # in Loop: Header=BB4_54 Depth=1 +.LBB4_55: # %vector.body525.preheader + # in Loop: Header=BB4_53 Depth=1 move $t0, $a6 move $t1, $a4 .p2align 4, , 16 -.LBB4_57: # %vector.body525 - # Parent Loop BB4_54 Depth=1 +.LBB4_56: # %vector.body525 + # Parent Loop BB4_53 Depth=1 # => This Inner Loop Header: Depth=2 vld $vr2, $t0, -16 vld $vr3, $t0, 0 @@ -1541,18 +1538,18 @@ VBR_iteration_loop: # @VBR_iteration_loop vst $vr3, $t0, 0 addi.d $t1, $t1, -8 addi.d $t0, $t0, 32 - bnez $t1, .LBB4_57 -# %bb.58: # %middle.block530 - # in Loop: Header=BB4_54 Depth=1 + bnez $t1, .LBB4_56 +# %bb.57: # %middle.block530 + # in Loop: Header=BB4_53 Depth=1 move $t1, $a4 - beq $a4, $a0, .LBB4_53 -.LBB4_59: # %scalar.ph518.preheader - # in Loop: Header=BB4_54 Depth=1 + beq $a4, $a0, .LBB4_52 +.LBB4_58: # %scalar.ph518.preheader + # in Loop: Header=BB4_53 Depth=1 alsl.d $t0, $t1, $a5, 2 sub.d $t1, $a0, $t1 .p2align 4, , 16 -.LBB4_60: # %scalar.ph518 - # Parent Loop BB4_54 Depth=1 +.LBB4_59: # %scalar.ph518 + # Parent Loop BB4_53 Depth=1 # => This Inner Loop Header: Depth=2 ld.w $t2, $t0, 0 mul.w $t2, $a2, $t2 @@ -1560,16 +1557,16 @@ VBR_iteration_loop: # @VBR_iteration_loop st.w $t2, $t0, 0 addi.d $t1, $t1, -1 addi.d $t0, $t0, 4 - bnez $t1, .LBB4_60 - b .LBB4_53 -.LBB4_61: # %.loopexit - blez $a1, .LBB4_107 -.LBB4_62: # %.preheader342.lr.ph + bnez $t1, .LBB4_59 + b .LBB4_52 +.LBB4_60: # %.loopexit + blez $a1, .LBB4_106 +.LBB4_61: # %.preheader342.lr.ph ld.w $a0, $s4, 204 addi.d $s6, $s3, 48 lu12i.w $a2, 1 - blez $a0, .LBB4_86 -# %bb.63: # %.preheader342.preheader + blez $a0, .LBB4_85 +# %bb.62: # %.preheader342.preheader move $s0, $zero addi.d $a4, $s3, 288 move $a3, $a2 @@ -1582,18 +1579,22 @@ VBR_iteration_loop: # @VBR_iteration_loop addi.d $a6, $a2, 488 ori $a3, $a3, 512 add.d $a7, $s7, $a3 - addi.d $t0, $sp, 360 + addi.d $t0, $sp, 352 ld.d $a2, $sp, 64 # 8-byte Folded Reload addi.d $t1, $a2, 1952 + ori $a2, $zero, 0 + lu32i.d $a2, 167424 + lu52i.d $a2, $a2, 1034 + movgr2fr.d $fs0, $a2 st.d $fp, $sp, 208 # 8-byte Folded Spill - st.d $a3, $sp, 232 # 8-byte Folded Spill - st.d $s5, $sp, 224 # 8-byte Folded Spill + st.d $a3, $sp, 224 # 8-byte Folded Spill + st.d $s5, $sp, 216 # 8-byte Folded Spill st.d $s6, $sp, 136 # 8-byte Folded Spill - b .LBB4_66 + b .LBB4_65 .p2align 4, , 16 -.LBB4_64: # %._crit_edge399.loopexit - # in Loop: Header=BB4_66 Depth=1 - ld.d $s4, $sp, 240 # 8-byte Folded Reload +.LBB4_63: # %._crit_edge399.loopexit + # in Loop: Header=BB4_65 Depth=1 + ld.d $s4, $sp, 232 # 8-byte Folded Reload ld.w $a1, $s4, 200 ld.d $s3, $sp, 80 # 8-byte Folded Reload ld.d $s7, $sp, 72 # 8-byte Folded Reload @@ -1605,8 +1606,8 @@ VBR_iteration_loop: # @VBR_iteration_loop ld.d $a7, $sp, 176 # 8-byte Folded Reload ld.d $t0, $sp, 168 # 8-byte Folded Reload ld.d $t1, $sp, 160 # 8-byte Folded Reload -.LBB4_65: # %._crit_edge399 - # in Loop: Header=BB4_66 Depth=1 +.LBB4_64: # %._crit_edge399 + # in Loop: Header=BB4_65 Depth=1 addi.d $s0, $s0, 1 addi.d $a4, $a4, 240 ld.d $a2, $sp, 144 # 8-byte Folded Reload @@ -1615,13 +1616,13 @@ VBR_iteration_loop: # @VBR_iteration_loop add.d $a7, $a7, $a3 addi.d $t0, $t0, 8 addi.d $t1, $t1, 1952 - bge $s0, $a1, .LBB4_85 -.LBB4_66: # %.preheader342 + bge $s0, $a1, .LBB4_84 +.LBB4_65: # %.preheader342 # =>This Loop Header: Depth=1 - # Child Loop BB4_82 Depth 2 - blez $a0, .LBB4_65 -# %bb.67: # %.lr.ph398 - # in Loop: Header=BB4_66 Depth=1 + # Child Loop BB4_81 Depth 2 + blez $a0, .LBB4_64 +# %bb.66: # %.lr.ph398 + # in Loop: Header=BB4_65 Depth=1 st.d $t1, $sp, 160 # 8-byte Folded Spill st.d $t0, $sp, 168 # 8-byte Folded Spill st.d $a7, $sp, 176 # 8-byte Folded Spill @@ -1635,7 +1636,7 @@ VBR_iteration_loop: # @VBR_iteration_loop slli.d $a1, $a0, 10 ld.d $a2, $sp, 96 # 8-byte Folded Reload add.d $s8, $a2, $a1 - addi.d $a1, $sp, 352 + addi.d $a1, $sp, 344 alsl.d $s1, $s0, $a1, 3 ori $a1, $zero, 1952 mul.d $a1, $s0, $a1 @@ -1648,27 +1649,25 @@ VBR_iteration_loop: # @VBR_iteration_loop ld.d $a1, $sp, 112 # 8-byte Folded Reload add.d $s7, $a1, $a0 st.d $s0, $sp, 152 # 8-byte Folded Spill - bge $s5, $fp, .LBB4_70 -# %bb.68: # in Loop: Header=BB4_66 Depth=1 + bge $s5, $fp, .LBB4_69 +# %bb.67: # in Loop: Header=BB4_65 Depth=1 move $a0, $s4 move $a1, $s8 move $a2, $s2 pcaddu18i $ra, %call36(init_outer_loop) jirl $ra, $ra, 0 - beqz $a0, .LBB4_73 -# %bb.69: # in Loop: Header=BB4_66 Depth=1 + beqz $a0, .LBB4_72 +# %bb.68: # in Loop: Header=BB4_65 Depth=1 ld.w $a0, $s4, 92 slli.d $a0, $a0, 1 - addi.d $a0, $a0, -6 ld.w $s4, $s1, 0 + addi.d $a0, $a0, -6 movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 - ld.d $a0, $sp, 216 # 8-byte Folded Reload - fld.d $fa1, $a0, %pc_lo12(.LCPI4_1) addi.d $a0, $s4, -125 - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + fdiv.d $fa1, $fa1, $fs0 fcvt.s.d $fa1, $fa1 vldi $vr2, -1040 fadd.s $fa1, $fa1, $fa2 @@ -1685,50 +1684,50 @@ VBR_iteration_loop: # @VBR_iteration_loop pcalau12i $a0, %got_pc_hi20(masking_lower) ld.d $a0, $a0, %got_pc_lo12(masking_lower) fst.s $fa0, $a0, 0 - addi.d $a4, $sp, 368 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + addi.d $a4, $sp, 360 + ld.d $a0, $sp, 232 # 8-byte Folded Reload move $a1, $s8 move $a2, $s3 move $a3, $s2 pcaddu18i $ra, %call36(calc_xmin) jirl $ra, $ra, 0 - addi.d $a3, $sp, 320 - addi.d $a4, $sp, 368 + addi.d $a3, $sp, 312 + addi.d $a4, $sp, 360 st.d $zero, $sp, 8 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload move $a1, $s8 move $a2, $s4 - ld.d $s4, $sp, 240 # 8-byte Folded Reload + ld.d $s4, $sp, 232 # 8-byte Folded Reload move $a5, $s6 move $a6, $s7 move $a7, $s2 pcaddu18i $ra, %call36(outer_loop) jirl $ra, $ra, 0 ld.w $a0, $s4, 204 - ld.d $a3, $sp, 232 # 8-byte Folded Reload + ld.d $a3, $sp, 224 # 8-byte Folded Reload ori $a1, $zero, 1 - bge $a1, $a0, .LBB4_64 - b .LBB4_74 + bge $a1, $a0, .LBB4_63 + b .LBB4_73 .p2align 4, , 16 -.LBB4_70: # %.thread - # in Loop: Header=BB4_66 Depth=1 +.LBB4_69: # %.thread + # in Loop: Header=BB4_65 Depth=1 ld.w $a0, $s4, 204 ori $a1, $zero, 2 - blt $a0, $a1, .LBB4_64 -# %bb.71: # %.peel.next.thread - # in Loop: Header=BB4_66 Depth=1 + blt $a0, $a1, .LBB4_63 +# %bb.70: # %.peel.next.thread + # in Loop: Header=BB4_65 Depth=1 pcalau12i $a0, %got_pc_hi20(reduce_sidechannel) ld.d $a0, $a0, %got_pc_lo12(reduce_sidechannel) ld.w $a0, $a0, 0 - bnez $a0, .LBB4_74 + bnez $a0, .LBB4_73 .p2align 4, , 16 -# %bb.72: # in Loop: Header=BB4_66 Depth=1 - ld.d $a0, $sp, 240 # 8-byte Folded Reload +# %bb.71: # in Loop: Header=BB4_65 Depth=1 + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.w $a0, $a0, 204 ori $a1, $zero, 3 - blt $a0, $a1, .LBB4_64 - b .LBB4_78 -.LBB4_73: # in Loop: Header=BB4_66 Depth=1 + blt $a0, $a1, .LBB4_63 + b .LBB4_77 +.LBB4_72: # in Loop: Header=BB4_65 Depth=1 ori $a2, $zero, 244 move $a0, $s7 move $a1, $zero @@ -1740,34 +1739,32 @@ VBR_iteration_loop: # @VBR_iteration_loop pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ld.w $a0, $s4, 204 - ld.d $a3, $sp, 232 # 8-byte Folded Reload + ld.d $a3, $sp, 224 # 8-byte Folded Reload ori $a1, $zero, 1 - bge $a1, $a0, .LBB4_64 -.LBB4_74: # %.peel.next - # in Loop: Header=BB4_66 Depth=1 + bge $a1, $a0, .LBB4_63 +.LBB4_73: # %.peel.next + # in Loop: Header=BB4_65 Depth=1 addi.d $s2, $s2, 120 add.d $s8, $s8, $a3 - ld.d $fp, $sp, 240 # 8-byte Folded Reload + ld.d $fp, $sp, 232 # 8-byte Folded Reload move $a0, $fp move $a1, $s8 move $a2, $s2 pcaddu18i $ra, %call36(init_outer_loop) jirl $ra, $ra, 0 addi.d $s7, $s7, 244 - beqz $a0, .LBB4_76 -# %bb.75: # in Loop: Header=BB4_66 Depth=1 + beqz $a0, .LBB4_75 +# %bb.74: # in Loop: Header=BB4_65 Depth=1 ld.w $a0, $fp, 92 slli.d $a0, $a0, 1 - addi.d $a0, $a0, -6 ld.w $s4, $s1, 4 + addi.d $a0, $a0, -6 movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 - ld.d $a0, $sp, 216 # 8-byte Folded Reload - fld.d $fa1, $a0, %pc_lo12(.LCPI4_1) addi.d $a0, $s4, -125 - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + fdiv.d $fa1, $fa1, $fs0 fcvt.s.d $fa1, $fa1 vldi $vr2, -1040 fadd.s $fa1, $fa1, $fa2 @@ -1785,7 +1782,7 @@ VBR_iteration_loop: # @VBR_iteration_loop ld.d $a0, $a0, %got_pc_lo12(masking_lower) fst.s $fa0, $a0, 0 addi.d $a2, $s3, 976 - addi.d $a4, $sp, 368 + addi.d $a4, $sp, 360 move $a0, $fp move $a1, $s8 move $a3, $s2 @@ -1793,8 +1790,8 @@ VBR_iteration_loop: # @VBR_iteration_loop jirl $ra, $ra, 0 addi.d $a0, $s6, 2047 addi.d $a5, $a0, 257 - addi.d $a3, $sp, 320 - addi.d $a4, $sp, 368 + addi.d $a3, $sp, 312 + addi.d $a4, $sp, 360 ori $a0, $zero, 1 st.d $a0, $sp, 8 move $a0, $fp @@ -1804,8 +1801,8 @@ VBR_iteration_loop: # @VBR_iteration_loop move $a7, $s2 pcaddu18i $ra, %call36(outer_loop) jirl $ra, $ra, 0 - b .LBB4_77 -.LBB4_76: # in Loop: Header=BB4_66 Depth=1 + b .LBB4_76 +.LBB4_75: # in Loop: Header=BB4_65 Depth=1 ori $a2, $zero, 244 move $a0, $s7 move $a1, $zero @@ -1817,15 +1814,15 @@ VBR_iteration_loop: # @VBR_iteration_loop move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 -.LBB4_77: # in Loop: Header=BB4_66 Depth=1 +.LBB4_76: # in Loop: Header=BB4_65 Depth=1 ld.d $fp, $sp, 208 # 8-byte Folded Reload - ld.d $a3, $sp, 232 # 8-byte Folded Reload - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a3, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.w $a0, $a0, 204 ori $a1, $zero, 3 - blt $a0, $a1, .LBB4_64 -.LBB4_78: # %.peel.next444.preheader - # in Loop: Header=BB4_66 Depth=1 + blt $a0, $a1, .LBB4_63 +.LBB4_77: # %.peel.next444.preheader + # in Loop: Header=BB4_65 Depth=1 ori $s1, $zero, 2 ld.d $s4, $sp, 160 # 8-byte Folded Reload ld.d $s3, $sp, 168 # 8-byte Folded Reload @@ -1834,21 +1831,19 @@ VBR_iteration_loop: # @VBR_iteration_loop ld.d $s8, $sp, 192 # 8-byte Folded Reload ld.d $s2, $sp, 200 # 8-byte Folded Reload ori $s0, $zero, 2 - b .LBB4_82 + b .LBB4_81 .p2align 4, , 16 -.LBB4_79: # in Loop: Header=BB4_82 Depth=2 +.LBB4_78: # in Loop: Header=BB4_81 Depth=2 ld.w $a0, $fp, 92 slli.d $a0, $a0, 1 - addi.d $a0, $a0, -6 ld.w $s5, $s3, 0 + addi.d $a0, $a0, -6 movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 - ld.d $a0, $sp, 216 # 8-byte Folded Reload - fld.d $fa1, $a0, %pc_lo12(.LCPI4_1) addi.d $a0, $s5, -125 - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + fdiv.d $fa1, $fa1, $fs0 fcvt.s.d $fa1, $fa1 vldi $vr2, -1040 fadd.s $fa1, $fa1, $fa2 @@ -1865,31 +1860,31 @@ VBR_iteration_loop: # @VBR_iteration_loop pcalau12i $a0, %got_pc_hi20(masking_lower) ld.d $a0, $a0, %got_pc_lo12(masking_lower) fst.s $fa0, $a0, 0 - addi.d $a4, $sp, 368 + addi.d $a4, $sp, 360 move $a0, $fp move $a1, $s8 move $a2, $s4 move $a3, $s2 pcaddu18i $ra, %call36(calc_xmin) jirl $ra, $ra, 0 - addi.d $a3, $sp, 320 - addi.d $a4, $sp, 368 + addi.d $a3, $sp, 312 + addi.d $a4, $sp, 360 st.d $s1, $sp, 8 move $a0, $fp move $a1, $s8 move $a2, $s5 - ld.d $s5, $sp, 224 # 8-byte Folded Reload + ld.d $s5, $sp, 216 # 8-byte Folded Reload move $a5, $s6 move $a6, $s7 move $a7, $s2 pcaddu18i $ra, %call36(outer_loop) jirl $ra, $ra, 0 -.LBB4_80: # in Loop: Header=BB4_82 Depth=2 +.LBB4_79: # in Loop: Header=BB4_81 Depth=2 ld.d $fp, $sp, 208 # 8-byte Folded Reload - ld.d $a3, $sp, 232 # 8-byte Folded Reload -.LBB4_81: # in Loop: Header=BB4_82 Depth=2 + ld.d $a3, $sp, 224 # 8-byte Folded Reload +.LBB4_80: # in Loop: Header=BB4_81 Depth=2 addi.d $s0, $s0, 1 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.w $a0, $a0, 204 addi.w $s1, $s1, 1 addi.d $s2, $s2, 120 @@ -1899,20 +1894,20 @@ VBR_iteration_loop: # @VBR_iteration_loop addi.d $s6, $a1, 257 addi.d $s3, $s3, 4 addi.d $s4, $s4, 976 - bge $s0, $a0, .LBB4_64 -.LBB4_82: # %.peel.next444 - # Parent Loop BB4_66 Depth=1 + bge $s0, $a0, .LBB4_63 +.LBB4_81: # %.peel.next444 + # Parent Loop BB4_65 Depth=1 # => This Inner Loop Header: Depth=2 - bge $s5, $fp, .LBB4_81 -# %bb.83: # in Loop: Header=BB4_82 Depth=2 - ld.d $fp, $sp, 240 # 8-byte Folded Reload + bge $s5, $fp, .LBB4_80 +# %bb.82: # in Loop: Header=BB4_81 Depth=2 + ld.d $fp, $sp, 232 # 8-byte Folded Reload move $a0, $fp move $a1, $s8 move $a2, $s2 pcaddu18i $ra, %call36(init_outer_loop) jirl $ra, $ra, 0 - bnez $a0, .LBB4_79 -# %bb.84: # in Loop: Header=BB4_82 Depth=2 + bnez $a0, .LBB4_78 +# %bb.83: # in Loop: Header=BB4_81 Depth=2 ori $a2, $zero, 244 move $a0, $s7 move $a1, $zero @@ -1923,48 +1918,48 @@ VBR_iteration_loop: # @VBR_iteration_loop move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - b .LBB4_80 -.LBB4_85: # %.preheader341 + b .LBB4_79 +.LBB4_84: # %.preheader341 lu12i.w $a2, 1 - blez $a1, .LBB4_107 -.LBB4_86: # %.preheader340.lr.ph + blez $a1, .LBB4_106 +.LBB4_85: # %.preheader340.lr.ph ld.w $a0, $s4, 204 - blez $a0, .LBB4_96 -# %bb.87: # %.preheader340.preheader + blez $a0, .LBB4_95 +# %bb.86: # %.preheader340.preheader move $s2, $zero ori $a3, $a2, 512 - st.d $a3, $sp, 240 # 8-byte Folded Spill + st.d $a3, $sp, 232 # 8-byte Folded Spill move $s0, $s7 - b .LBB4_90 + b .LBB4_89 .p2align 4, , 16 -.LBB4_88: # %._crit_edge403.loopexit - # in Loop: Header=BB4_90 Depth=1 +.LBB4_87: # %._crit_edge403.loopexit + # in Loop: Header=BB4_89 Depth=1 ld.w $a1, $s8, 200 move $s4, $s8 lu12i.w $a2, 1 move $s6, $fp -.LBB4_89: # %._crit_edge403 - # in Loop: Header=BB4_90 Depth=1 +.LBB4_88: # %._crit_edge403 + # in Loop: Header=BB4_89 Depth=1 addi.d $s2, $s2, 1 addi.d $s6, $s6, 240 - ld.d $a3, $sp, 240 # 8-byte Folded Reload + ld.d $a3, $sp, 232 # 8-byte Folded Reload add.d $s0, $s0, $a3 - bge $s2, $a1, .LBB4_95 -.LBB4_90: # %.preheader340 + bge $s2, $a1, .LBB4_94 +.LBB4_89: # %.preheader340 # =>This Loop Header: Depth=1 - # Child Loop BB4_93 Depth 2 - blez $a0, .LBB4_89 -# %bb.91: # %.lr.ph402 - # in Loop: Header=BB4_90 Depth=1 + # Child Loop BB4_92 Depth 2 + blez $a0, .LBB4_88 +# %bb.90: # %.lr.ph402 + # in Loop: Header=BB4_89 Depth=1 move $s8, $s4 move $s4, $zero move $s1, $zero move $s5, $s0 move $fp, $s6 - b .LBB4_93 + b .LBB4_92 .p2align 4, , 16 -.LBB4_92: # in Loop: Header=BB4_93 Depth=2 - ld.w $a3, $sp, 252 +.LBB4_91: # in Loop: Header=BB4_92 Depth=2 + ld.w $a3, $sp, 244 move $a0, $s8 move $a1, $s6 move $a2, $s3 @@ -1976,8 +1971,8 @@ VBR_iteration_loop: # @VBR_iteration_loop addi.d $s6, $s6, 120 addi.d $a1, $s5, 2047 addi.d $s5, $a1, 257 - bge $s1, $a0, .LBB4_88 -.LBB4_93: # Parent Loop BB4_90 Depth=1 + bge $s1, $a0, .LBB4_87 +.LBB4_92: # Parent Loop BB4_89 Depth=1 # => This Inner Loop Header: Depth=2 move $a0, $s8 move $a1, $s2 @@ -1988,21 +1983,21 @@ VBR_iteration_loop: # @VBR_iteration_loop pcaddu18i $ra, %call36(best_scalefac_store) jirl $ra, $ra, 0 ld.w $a0, $s6, 24 - bnez $a0, .LBB4_92 -# %bb.94: # in Loop: Header=BB4_93 Depth=2 + bnez $a0, .LBB4_91 +# %bb.93: # in Loop: Header=BB4_92 Depth=2 move $a0, $s2 move $a1, $s4 move $a2, $s6 move $a3, $s5 pcaddu18i $ra, %call36(best_huffman_divide) jirl $ra, $ra, 0 - b .LBB4_92 -.LBB4_95: # %.preheader339 - blez $a1, .LBB4_107 -.LBB4_96: # %.preheader338.lr.ph + b .LBB4_91 +.LBB4_94: # %.preheader339 + blez $a1, .LBB4_106 +.LBB4_95: # %.preheader338.lr.ph ld.w $a5, $s4, 204 - blez $a5, .LBB4_107 -# %bb.97: # %.preheader338.preheader + blez $a5, .LBB4_106 +# %bb.96: # %.preheader338.preheader move $a0, $zero move $a3, $a2 lu12i.w $a2, -2 @@ -2011,69 +2006,70 @@ VBR_iteration_loop: # @VBR_iteration_loop movgr2fr.d $fa0, $zero lu12i.w $a4, 2 ori $a4, $a4, 1024 - b .LBB4_100 + b .LBB4_99 .p2align 4, , 16 -.LBB4_98: # %._crit_edge407.loopexit - # in Loop: Header=BB4_100 Depth=1 +.LBB4_97: # %._crit_edge407.loopexit + # in Loop: Header=BB4_99 Depth=1 ld.w $a1, $s4, 200 -.LBB4_99: # %._crit_edge407 - # in Loop: Header=BB4_100 Depth=1 +.LBB4_98: # %._crit_edge407 + # in Loop: Header=BB4_99 Depth=1 addi.d $a0, $a0, 1 ld.d $a6, $sp, 96 # 8-byte Folded Reload add.d $a6, $a6, $a4 st.d $a6, $sp, 96 # 8-byte Folded Spill add.d $s7, $s7, $a3 - bge $a0, $a1, .LBB4_107 -.LBB4_100: # %.preheader338 + bge $a0, $a1, .LBB4_106 +.LBB4_99: # %.preheader338 # =>This Loop Header: Depth=1 - # Child Loop BB4_103 Depth 2 - # Child Loop BB4_105 Depth 3 - blez $a5, .LBB4_99 -# %bb.101: # %.preheader.lr.ph - # in Loop: Header=BB4_100 Depth=1 + # Child Loop BB4_102 Depth 2 + # Child Loop BB4_104 Depth 3 + blez $a5, .LBB4_98 +# %bb.100: # %.preheader.lr.ph + # in Loop: Header=BB4_99 Depth=1 move $a1, $zero move $a6, $s7 ld.d $a7, $sp, 96 # 8-byte Folded Reload - b .LBB4_103 + b .LBB4_102 .p2align 4, , 16 -.LBB4_102: # in Loop: Header=BB4_103 Depth=2 +.LBB4_101: # in Loop: Header=BB4_102 Depth=2 ld.w $a5, $s4, 204 addi.d $a1, $a1, 1 add.d $a7, $a7, $a3 addi.d $a6, $a6, 2047 addi.d $a6, $a6, 257 - bge $a1, $a5, .LBB4_98 -.LBB4_103: # %.preheader - # Parent Loop BB4_100 Depth=1 + bge $a1, $a5, .LBB4_97 +.LBB4_102: # %.preheader + # Parent Loop BB4_99 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB4_105 Depth 3 + # Child Loop BB4_104 Depth 3 move $a5, $a6 move $t0, $a2 - b .LBB4_105 + b .LBB4_104 .p2align 4, , 16 -.LBB4_104: # in Loop: Header=BB4_105 Depth=3 +.LBB4_103: # in Loop: Header=BB4_104 Depth=3 addi.d $t0, $t0, 8 addi.d $a5, $a5, 4 - beqz $t0, .LBB4_102 -.LBB4_105: # Parent Loop BB4_100 Depth=1 - # Parent Loop BB4_103 Depth=2 + beqz $t0, .LBB4_101 +.LBB4_104: # Parent Loop BB4_99 Depth=1 + # Parent Loop BB4_102 Depth=2 # => This Inner Loop Header: Depth=3 add.d $t1, $a7, $t0 fldx.d $fa1, $t1, $a3 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_104 -# %bb.106: # in Loop: Header=BB4_105 Depth=3 + bcnez $fcc0, .LBB4_103 +# %bb.105: # in Loop: Header=BB4_104 Depth=3 ld.w $t1, $a5, 0 sub.d $t1, $zero, $t1 st.w $t1, $a5, 0 - b .LBB4_104 -.LBB4_107: # %._crit_edge409 - ld.w $a2, $sp, 252 + b .LBB4_103 +.LBB4_106: # %._crit_edge409 + ld.w $a2, $sp, 244 move $a0, $s4 move $a1, $s3 pcaddu18i $ra, %call36(ResvFrameEnd) jirl $ra, $ra, 0 addi.d $sp, $sp, 1712 + fld.d $fs1, $sp, 1928 # 8-byte Folded Reload fld.d $fs0, $sp, 1936 # 8-byte Folded Reload ld.d $s8, $sp, 1944 # 8-byte Folded Reload ld.d $s7, $sp, 1952 # 8-byte Folded Reload @@ -2111,12 +2107,7 @@ VBR_compare: # @VBR_compare .Lfunc_end5: .size VBR_compare, .Lfunc_end5-VBR_compare # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function calc_noise1 -.LCPI6_0: - .dword 0x3f50624dd2f1a9fc # double 0.001 - .text - .globl calc_noise1 + .globl calc_noise1 # -- Begin function calc_noise1 .p2align 5 .type calc_noise1,@function calc_noise1: # @calc_noise1 @@ -2153,7 +2144,7 @@ calc_noise1: # @calc_noise1 lu52i.d $a1, $a1, -1016 st.d $a1, $s7, 0 addi.w $a1, $a4, 0 - pcalau12i $s3, %pc_hi20(.LCPI6_0) + lu12i.w $a3, -184550 st.d $a4, $sp, 104 # 8-byte Folded Spill st.d $a2, $sp, 24 # 8-byte Folded Spill beqz $a1, .LBB6_12 @@ -2162,25 +2153,28 @@ calc_noise1: # @calc_noise1 ld.w $a1, $a2, 68 ld.w $a2, $a2, 12 st.d $a2, $sp, 176 # 8-byte Folded Spill - addi.d $a1, $a1, 1 - st.d $a1, $sp, 160 # 8-byte Folded Spill + addi.d $s3, $a1, 1 pcalau12i $a1, %got_pc_hi20(pow20) - ld.d $s4, $a1, %got_pc_lo12(pow20) + ld.d $fp, $a1, %got_pc_lo12(pow20) pcalau12i $a1, %got_pc_hi20(scalefac_band) - ld.d $s5, $a1, %got_pc_lo12(scalefac_band) + ld.d $s4, $a1, %got_pc_lo12(scalefac_band) movgr2fr.d $fs0, $zero pcalau12i $a1, %got_pc_hi20(pow43) - ld.d $s6, $a1, %got_pc_lo12(pow43) - move $fp, $zero + ld.d $s5, $a1, %got_pc_lo12(pow43) + move $s0, $zero st.d $zero, $sp, 168 # 8-byte Folded Spill + ori $a1, $a3, 2556 + lu32i.d $a1, 25165 + lu52i.d $a1, $a1, 1013 + movgr2fr.d $fs1, $a1 lu32i.d $a0, -131072 - lu52i.d $s0, $a0, -1021 + lu52i.d $s6, $a0, -1021 b .LBB6_4 .p2align 4, , 16 .LBB6_2: # %.thread # in Loop: Header=BB6_4 Depth=1 ld.d $a0, $sp, 136 # 8-byte Folded Reload - stx.d $s0, $a0, $s2 + stx.d $s6, $a0, $s2 vldi $vr0, -834 .LBB6_3: # in Loop: Header=BB6_4 Depth=1 ld.d $a4, $sp, 104 # 8-byte Folded Reload @@ -2191,11 +2185,11 @@ calc_noise1: # @calc_noise1 fcmp.clt.d $fcc0, $fa0, $fa1 fsel $fa0, $fa0, $fa1, $fcc0 fst.d $fa0, $s7, 0 - beq $fp, $a4, .LBB6_13 + beq $s0, $a4, .LBB6_13 .LBB6_4: # =>This Loop Header: Depth=1 # Child Loop BB6_8 Depth 2 - move $a0, $fp - slli.d $a1, $fp, 2 + move $a0, $s0 + slli.d $a1, $s0, 2 ld.d $a2, $sp, 120 # 8-byte Folded Reload ldx.w $a2, $a2, $a1 beqz $s1, .LBB6_6 @@ -2205,21 +2199,20 @@ calc_noise1: # @calc_noise1 ldx.w $a3, $a3, $a1 add.d $a2, $a3, $a2 .LBB6_6: # in Loop: Header=BB6_4 Depth=1 - ldx.w $a3, $s5, $a1 - addi.d $fp, $a0, 1 - slli.d $a1, $fp, 2 - ldx.w $a4, $s5, $a1 + ldx.w $a3, $s4, $a1 + addi.d $s0, $a0, 1 + slli.d $a1, $s0, 2 + ldx.w $a4, $s4, $a1 sub.d $a1, $a4, $a3 fmov.d $fa0, $fs0 bge $a3, $a4, .LBB6_9 # %bb.7: # %.lr.ph.preheader # in Loop: Header=BB6_4 Depth=1 - ld.d $a4, $sp, 160 # 8-byte Folded Reload - sll.w $a2, $a2, $a4 + sll.w $a2, $a2, $s3 ld.d $a4, $sp, 176 # 8-byte Folded Reload sub.w $a2, $a4, $a2 slli.d $a2, $a2, 3 - fldx.d $fa1, $s4, $a2 + fldx.d $fa1, $fp, $a2 ld.d $a2, $sp, 96 # 8-byte Folded Reload alsl.d $a2, $a3, $a2, 3 ld.d $a4, $sp, 88 # 8-byte Folded Reload @@ -2233,7 +2226,7 @@ calc_noise1: # @calc_noise1 ld.w $a5, $a3, 0 fld.d $fa2, $a2, 0 slli.d $a5, $a5, 3 - fldx.d $fa3, $s6, $a5 + fldx.d $fa3, $s5, $a5 fabs.d $fa2, $fa2 fneg.d $fa3, $fa3 fmadd.d $fa2, $fa3, $fa1, $fa2 @@ -2252,9 +2245,8 @@ calc_noise1: # @calc_noise1 fstx.d $fa0, $a0, $s2 ld.d $a0, $sp, 128 # 8-byte Folded Reload fldx.d $fa1, $a0, $s2 - fld.d $fa2, $s3, %pc_lo12(.LCPI6_0) fdiv.d $fa0, $fa0, $fa1 - fcmp.cule.d $fcc0, $fa2, $fa0 + fcmp.cule.d $fcc0, $fs1, $fa0 bceqz $fcc0, .LBB6_2 # %bb.10: # in Loop: Header=BB6_4 Depth=1 pcaddu18i $ra, %call36(log10) @@ -2278,9 +2270,9 @@ calc_noise1: # @calc_noise1 st.d $zero, $sp, 168 # 8-byte Folded Spill .LBB6_13: # %.preheader ld.d $a3, $sp, 24 # 8-byte Folded Reload - ld.w $s0, $a3, 84 + ld.w $s2, $a3, 84 ori $a0, $zero, 11 - bltu $a0, $s0, .LBB6_24 + bltu $a0, $s2, .LBB6_24 # %bb.14: # %.preheader.split.us ld.w $a0, $a3, 68 ld.d $a1, $sp, 120 # 8-byte Folded Reload @@ -2288,7 +2280,7 @@ calc_noise1: # @calc_noise1 st.d $a1, $sp, 72 # 8-byte Folded Spill ld.w $a1, $a3, 44 ld.d $a2, $sp, 128 # 8-byte Folded Reload - addi.d $s6, $a2, 176 + addi.d $s3, $a2, 176 addi.d $a0, $a0, 1 st.d $a0, $sp, 160 # 8-byte Folded Spill ld.w $a0, $a3, 12 @@ -2296,29 +2288,33 @@ calc_noise1: # @calc_noise1 slli.d $a0, $a1, 3 st.d $a0, $sp, 64 # 8-byte Folded Spill ld.d $a0, $sp, 112 # 8-byte Folded Reload - addi.d $s2, $a0, 168 + addi.d $s4, $a0, 168 ld.d $a0, $sp, 136 # 8-byte Folded Reload addi.d $s1, $a0, 168 pcalau12i $a0, %got_pc_hi20(scalefac_band) ld.d $a0, $a0, %got_pc_lo12(scalefac_band) st.d $a0, $sp, 176 # 8-byte Folded Spill - alsl.d $a0, $s0, $a0, 2 - ld.w $s4, $a0, 92 + alsl.d $a0, $s2, $a0, 2 + ld.w $s6, $a0, 92 pcalau12i $a0, %got_pc_hi20(pow20) ld.d $a0, $a0, %got_pc_lo12(pow20) st.d $a0, $sp, 144 # 8-byte Folded Spill movgr2fr.d $fs1, $zero pcalau12i $a0, %got_pc_hi20(pow43) ld.d $s5, $a0, %got_pc_lo12(pow43) - fld.d $fs0, $s3, %pc_lo12(.LCPI6_0) + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs0, $a0 ori $a0, $zero, 0 lu32i.d $a0, -131072 lu52i.d $a0, $a0, -1021 st.d $a0, $sp, 48 # 8-byte Folded Spill ori $t1, $zero, 12 - move $a0, $s4 - st.d $s0, $sp, 16 # 8-byte Folded Spill - st.d $s4, $sp, 56 # 8-byte Folded Spill + move $a0, $s6 + st.d $s2, $sp, 16 # 8-byte Folded Spill + st.d $s6, $sp, 56 # 8-byte Folded Spill b .LBB6_17 .p2align 4, , 16 .LBB6_15: # %.thread149.us @@ -2334,18 +2330,18 @@ calc_noise1: # @calc_noise1 fcmp.clt.d $fcc0, $fa0, $fa1 fsel $fa0, $fa0, $fa1, $fcc0 fst.d $fa0, $s7, 0 - move $a0, $s3 - beq $s0, $t1, .LBB6_26 + move $a0, $s0 + beq $s2, $t1, .LBB6_26 .LBB6_17: # =>This Loop Header: Depth=1 # Child Loop BB6_19 Depth 2 - move $a1, $s0 - addi.d $s0, $s0, 1 + move $a1, $s2 + addi.d $s2, $s2, 1 ld.d $a2, $sp, 176 # 8-byte Folded Reload - alsl.d $a2, $s0, $a2, 2 - ld.w $s3, $a2, 92 + alsl.d $a2, $s2, $a2, 2 + ld.w $s0, $a2, 92 slli.d $fp, $a1, 3 fmov.d $fa1, $fs1 - bge $a0, $s3, .LBB6_20 + bge $a0, $s0, .LBB6_20 # %bb.18: # %.lr.ph164.us.preheader # in Loop: Header=BB6_17 Depth=1 alsl.d $a2, $a1, $fp, 2 @@ -2368,7 +2364,7 @@ calc_noise1: # @calc_noise1 alsl.d $a3, $a0, $a3, 3 ld.d $a4, $sp, 96 # 8-byte Folded Reload add.d $a3, $a4, $a3 - sub.d $a4, $s3, $a0 + sub.d $a4, $s0, $a0 fmov.d $fa1, $fs1 .p2align 4, , 16 .LBB6_19: # %.lr.ph164.us @@ -2388,19 +2384,19 @@ calc_noise1: # @calc_noise1 bnez $a4, .LBB6_19 .LBB6_20: # %._crit_edge165.us # in Loop: Header=BB6_17 Depth=1 - sub.d $a0, $s3, $a0 + sub.d $a0, $s0, $a0 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 fdiv.d $fa0, $fa1, $fa0 - fstx.d $fa0, $s2, $fp + fstx.d $fa0, $s4, $fp slli.d $a0, $a1, 4 alsl.d $a0, $a1, $a0, 3 - fldx.d $fa1, $s6, $a0 + fldx.d $fa1, $s3, $a0 fdiv.d $fa0, $fa0, $fa1 fcmp.clt.d $fcc0, $fa0, $fs0 bcnez $fcc0, .LBB6_15 # %bb.21: # in Loop: Header=BB6_17 Depth=1 - move $s4, $s8 + move $s6, $s8 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 vldi $vr1, -988 @@ -2417,11 +2413,12 @@ calc_noise1: # @calc_noise1 fadd.d $fa1, $fa0, $fa1 fst.d $fa1, $a0, 0 .LBB6_23: # in Loop: Header=BB6_17 Depth=1 - move $s8, $s4 - ld.d $s4, $sp, 56 # 8-byte Folded Reload + move $s8, $s6 + ld.d $s6, $sp, 56 # 8-byte Folded Reload ori $t1, $zero, 12 b .LBB6_16 .LBB6_24: # %.split.us + ld.d $a0, $sp, 168 # 8-byte Folded Reload addi.w $a1, $a4, 0 ori $a3, $zero, 2 blt $a1, $a3, .LBB6_47 @@ -2432,7 +2429,6 @@ calc_noise1: # @calc_noise1 ffint.d.l $fa1, $fa1 fdiv.d $fa0, $fa0, $fa1 fst.d $fa0, $s8, 0 - ld.d $a0, $sp, 168 # 8-byte Folded Reload bge $a0, $a3, .LBB6_48 b .LBB6_49 .LBB6_26: # %._crit_edge172.us @@ -2444,9 +2440,9 @@ calc_noise1: # @calc_noise1 slli.d $a0, $a0, 3 st.d $a0, $sp, 64 # 8-byte Folded Spill ld.d $a0, $sp, 112 # 8-byte Folded Reload - addi.d $s6, $a0, 336 + addi.d $fp, $a0, 336 ld.d $a0, $sp, 128 # 8-byte Folded Reload - addi.d $fp, $a0, 184 + addi.d $s3, $a0, 184 ld.d $a0, $sp, 136 # 8-byte Folded Reload addi.d $s1, $a0, 336 ld.d $a0, $sp, 88 # 8-byte Folded Reload @@ -2461,14 +2457,14 @@ calc_noise1: # @calc_noise1 lu52i.d $a0, $a0, -1021 st.d $a0, $sp, 32 # 8-byte Folded Spill ori $t1, $zero, 12 - move $a0, $s4 + move $a0, $s6 ld.d $s2, $sp, 16 # 8-byte Folded Reload b .LBB6_29 .p2align 4, , 16 .LBB6_27: # %.thread149.us.1 # in Loop: Header=BB6_29 Depth=1 ld.d $a0, $sp, 32 # 8-byte Folded Reload - stx.d $a0, $s1, $s3 + stx.d $a0, $s1, $s4 vldi $vr0, -834 .LBB6_28: # in Loop: Header=BB6_29 Depth=1 fld.d $fa1, $s8, 0 @@ -2487,12 +2483,12 @@ calc_noise1: # @calc_noise1 ld.d $a2, $sp, 176 # 8-byte Folded Reload alsl.d $a2, $s2, $a2, 2 ld.w $s0, $a2, 92 - slli.d $s3, $a1, 3 + slli.d $s4, $a1, 3 fmov.d $fa1, $fs1 bge $a0, $s0, .LBB6_32 # %bb.30: # %.lr.ph164.us.preheader.1 # in Loop: Header=BB6_29 Depth=1 - alsl.d $a2, $a1, $s3, 2 + alsl.d $a2, $a1, $s4, 2 ld.d $a3, $sp, 72 # 8-byte Folded Reload ldx.w $a2, $a3, $a2 ld.d $a3, $sp, 160 # 8-byte Folded Reload @@ -2536,10 +2532,10 @@ calc_noise1: # @calc_noise1 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 fdiv.d $fa0, $fa1, $fa0 - fstx.d $fa0, $s6, $s3 + fstx.d $fa0, $fp, $s4 slli.d $a0, $a1, 4 alsl.d $a0, $a1, $a0, 3 - fldx.d $fa1, $fp, $a0 + fldx.d $fa1, $s3, $a0 fdiv.d $fa0, $fa0, $fa1 fcmp.clt.d $fcc0, $fa0, $fs0 bcnez $fcc0, .LBB6_27 @@ -2549,7 +2545,7 @@ calc_noise1: # @calc_noise1 vldi $vr1, -988 fmul.d $fa0, $fa0, $fa1 fcmp.cule.d $fcc0, $fa0, $fs1 - fstx.d $fa0, $s1, $s3 + fstx.d $fa0, $s1, $s4 bcnez $fcc0, .LBB6_35 # %bb.34: # in Loop: Header=BB6_29 Depth=1 ld.d $a0, $sp, 80 # 8-byte Folded Reload @@ -2573,9 +2569,9 @@ calc_noise1: # @calc_noise1 ld.d $a0, $sp, 112 # 8-byte Folded Reload addi.d $a5, $a0, 504 ld.d $a0, $sp, 128 # 8-byte Folded Reload - addi.d $s0, $a0, 192 + addi.d $fp, $a0, 192 ld.d $a0, $sp, 136 # 8-byte Folded Reload - addi.d $fp, $a0, 504 + addi.d $s3, $a0, 504 ld.d $a0, $sp, 88 # 8-byte Folded Reload addi.d $s1, $a0, 8 ld.d $a0, $sp, 96 # 8-byte Folded Reload @@ -2593,7 +2589,7 @@ calc_noise1: # @calc_noise1 .LBB6_37: # %.thread149.us.2 # in Loop: Header=BB6_39 Depth=1 ld.d $a0, $sp, 128 # 8-byte Folded Reload - stx.d $a0, $fp, $s3 + stx.d $a0, $s3, $s4 vldi $vr0, -834 .LBB6_38: # in Loop: Header=BB6_39 Depth=1 fld.d $fa1, $s8, 0 @@ -2603,7 +2599,7 @@ calc_noise1: # @calc_noise1 fcmp.clt.d $fcc0, $fa0, $fa1 fsel $fa0, $fa0, $fa1, $fcc0 fst.d $fa0, $s7, 0 - move $s4, $s6 + move $s6, $s0 beq $s2, $a6, .LBB6_46 .LBB6_39: # =>This Loop Header: Depth=1 # Child Loop BB6_41 Depth 2 @@ -2611,13 +2607,13 @@ calc_noise1: # @calc_noise1 addi.d $s2, $s2, 1 ld.d $a1, $sp, 176 # 8-byte Folded Reload alsl.d $a1, $s2, $a1, 2 - ld.w $s6, $a1, 92 - slli.d $s3, $a0, 3 + ld.w $s0, $a1, 92 + slli.d $s4, $a0, 3 fmov.d $fa1, $fs1 - bge $s4, $s6, .LBB6_42 + bge $s6, $s0, .LBB6_42 # %bb.40: # %.lr.ph164.us.preheader.2 # in Loop: Header=BB6_39 Depth=1 - alsl.d $a1, $a0, $s3, 2 + alsl.d $a1, $a0, $s4, 2 ld.d $a2, $sp, 120 # 8-byte Folded Reload ldx.w $a1, $a2, $a1 ld.d $a2, $sp, 160 # 8-byte Folded Reload @@ -2629,12 +2625,12 @@ calc_noise1: # @calc_noise1 slli.d $a1, $a1, 3 ld.d $a2, $sp, 144 # 8-byte Folded Reload fldx.d $fa0, $a2, $a1 - sub.d $a1, $s6, $s4 - slli.d $a2, $s4, 3 - alsl.d $a2, $s4, $a2, 2 + sub.d $a1, $s0, $s6 + slli.d $a2, $s6, 3 + alsl.d $a2, $s6, $a2, 2 add.d $a2, $s1, $a2 - slli.d $a3, $s4, 4 - alsl.d $a3, $s4, $a3, 3 + slli.d $a3, $s6, 4 + alsl.d $a3, $s6, $a3, 3 ld.d $a4, $sp, 136 # 8-byte Folded Reload add.d $a3, $a4, $a3 fmov.d $fa1, $fs1 @@ -2656,25 +2652,25 @@ calc_noise1: # @calc_noise1 bnez $a1, .LBB6_41 .LBB6_42: # %._crit_edge165.us.2 # in Loop: Header=BB6_39 Depth=1 - sub.d $a1, $s6, $s4 + sub.d $a1, $s0, $s6 movgr2fr.w $fa0, $a1 ffint.d.w $fa0, $fa0 fdiv.d $fa0, $fa1, $fa0 - fstx.d $fa0, $a5, $s3 + fstx.d $fa0, $a5, $s4 slli.d $a1, $a0, 4 alsl.d $a0, $a0, $a1, 3 - fldx.d $fa1, $s0, $a0 + fldx.d $fa1, $fp, $a0 fdiv.d $fa0, $fa0, $fa1 fcmp.clt.d $fcc0, $fa0, $fs0 bcnez $fcc0, .LBB6_37 # %bb.43: # in Loop: Header=BB6_39 Depth=1 - move $s4, $a5 + move $s6, $a5 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 vldi $vr1, -988 fmul.d $fa0, $fa0, $fa1 fcmp.cule.d $fcc0, $fa0, $fs1 - fstx.d $fa0, $fp, $s3 + fstx.d $fa0, $s3, $s4 bcnez $fcc0, .LBB6_45 # %bb.44: # in Loop: Header=BB6_39 Depth=1 ld.d $a0, $sp, 80 # 8-byte Folded Reload @@ -2685,7 +2681,7 @@ calc_noise1: # @calc_noise1 fadd.d $fa1, $fa0, $fa1 fst.d $fa1, $a0, 0 .LBB6_45: # in Loop: Header=BB6_39 Depth=1 - move $a5, $s4 + move $a5, $s6 ori $a6, $zero, 12 b .LBB6_38 .LBB6_46: # %._crit_edge172.us.2 @@ -2694,11 +2690,11 @@ calc_noise1: # @calc_noise1 ld.d $a1, $sp, 104 # 8-byte Folded Reload sub.d $a0, $a1, $a0 addi.w $a4, $a0, 36 + ld.d $a0, $sp, 168 # 8-byte Folded Reload addi.w $a1, $a4, 0 ori $a3, $zero, 2 bge $a1, $a3, .LBB6_25 .LBB6_47: - ld.d $a0, $sp, 168 # 8-byte Folded Reload blt $a0, $a3, .LBB6_49 .LBB6_48: ld.d $a1, $sp, 80 # 8-byte Folded Reload @@ -2888,12 +2884,6 @@ quant_compare: # @quant_compare .LCPI8_0: .dword 0x3ffae89f995ad3ae # double 1.6817928305074292 .dword 0x3ff4bfdad5362a27 # double 1.2968395546510096 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI8_1: - .dword 0xc08c200000000000 # double -900 -.LCPI8_2: - .dword 0x3ff0cccccccccccd # double 1.05 .text .globl amp_scalefac_bands .p2align 5 @@ -2907,11 +2897,13 @@ amp_scalefac_bands: # @amp_scalefac_bands ld.w $a5, $a1, 68 ld.w $a4, $a1, 80 sltui $a5, $a5, 1 - pcalau12i $a6, %pc_hi20(.LCPI8_1) - fld.d $fa1, $a6, %pc_lo12(.LCPI8_1) + ori $a6, $zero, 0 + lu32i.d $a6, -253952 + lu52i.d $a7, $a6, -1016 beqz $a4, .LBB8_3 # %bb.1: # %.lr.ph.preheader bstrpick.d $a6, $a4, 31, 0 + movgr2fr.d $fa1, $a7 move $a7, $a3 .p2align 4, , 16 .LBB8_2: # %.lr.ph @@ -2922,20 +2914,23 @@ amp_scalefac_bands: # @amp_scalefac_bands addi.d $a6, $a6, -1 addi.d $a7, $a7, 8 bnez $a6, .LBB8_2 -.LBB8_3: # %._crit_edge + b .LBB8_4 +.LBB8_3: + movgr2fr.d $fa1, $a7 +.LBB8_4: # %._crit_edge slli.d $a6, $a5, 3 pcalau12i $a5, %pc_hi20(.LCPI8_0) addi.d $a7, $a5, %pc_lo12(.LCPI8_0) ld.w $a5, $a1, 84 ori $t0, $zero, 11 - bltu $t0, $a5, .LBB8_6 -# %bb.4: # %.preheader74.preheader + bltu $t0, $a5, .LBB8_7 +# %bb.5: # %.preheader74.preheader alsl.d $t0, $a5, $a3, 3 addi.d $t0, $t0, 336 addi.d $t1, $a5, 1 ori $t2, $zero, 12 .p2align 4, , 16 -.LBB8_5: # %.preheader74 +.LBB8_6: # %.preheader74 # =>This Inner Loop Header: Depth=1 fld.d $fa0, $t0, -168 fld.d $fa2, $t0, 0 @@ -2949,40 +2944,43 @@ amp_scalefac_bands: # @amp_scalefac_bands bstrpick.d $t3, $t1, 31, 0 addi.d $t0, $t0, 8 addi.d $t1, $t1, 1 - bne $t3, $t2, .LBB8_5 -.LBB8_6: # %._crit_edge81 - pcalau12i $t0, %pc_hi20(.LCPI8_2) - fld.d $fa2, $t0, %pc_lo12(.LCPI8_2) + bne $t3, $t2, .LBB8_6 +.LBB8_7: # %._crit_edge81 fldx.d $fa0, $a7, $a6 + lu12i.w $a6, -209716 + ori $a6, $a6, 3277 + lu32i.d $a6, 52428 + lu52i.d $a6, $a6, 1023 + movgr2fr.d $fa2, $a6 fmul.d $fa1, $fa1, $fa2 movgr2fr.d $fa2, $zero fmin.d $fa1, $fa1, $fa2 - beqz $a4, .LBB8_20 -# %bb.7: # %.lr.ph88.preheader + beqz $a4, .LBB8_21 +# %bb.8: # %.lr.ph88.preheader vreplvei.d $vr2, $vr0, 0 addi.d $a4, $a0, 16 pcalau12i $a5, %got_pc_hi20(scalefac_band) ld.d $a5, $a5, %got_pc_lo12(scalefac_band) move $a6, $zero ori $a7, $zero, 4 - b .LBB8_10 + b .LBB8_11 .p2align 4, , 16 -.LBB8_8: # %.lr.ph88..loopexit73_crit_edge - # in Loop: Header=BB8_10 Depth=1 +.LBB8_9: # %.lr.ph88..loopexit73_crit_edge + # in Loop: Header=BB8_11 Depth=1 addi.d $a6, $a6, 1 -.LBB8_9: # %.loopexit73 - # in Loop: Header=BB8_10 Depth=1 +.LBB8_10: # %.loopexit73 + # in Loop: Header=BB8_11 Depth=1 ld.wu $t0, $a1, 80 - bgeu $a6, $t0, .LBB8_19 -.LBB8_10: # %.lr.ph88 + bgeu $a6, $t0, .LBB8_20 +.LBB8_11: # %.lr.ph88 # =>This Loop Header: Depth=1 - # Child Loop BB8_15 Depth 2 - # Child Loop BB8_18 Depth 2 + # Child Loop BB8_16 Depth 2 + # Child Loop BB8_19 Depth 2 slli.d $t0, $a6, 3 fldx.d $fa3, $a3, $t0 fcmp.clt.d $fcc0, $fa1, $fa3 - bceqz $fcc0, .LBB8_8 -# %bb.11: # in Loop: Header=BB8_10 Depth=1 + bceqz $fcc0, .LBB8_9 +# %bb.12: # in Loop: Header=BB8_11 Depth=1 slli.d $t0, $a6, 2 ldx.w $t1, $a2, $t0 addi.d $t1, $t1, 1 @@ -2991,24 +2989,24 @@ amp_scalefac_bands: # @amp_scalefac_bands addi.d $a6, $a6, 1 slli.d $t0, $a6, 2 ldx.w $t0, $a5, $t0 - bge $t4, $t0, .LBB8_9 -# %bb.12: # %.lr.ph85.preheader - # in Loop: Header=BB8_10 Depth=1 + bge $t4, $t0, .LBB8_10 +# %bb.13: # %.lr.ph85.preheader + # in Loop: Header=BB8_11 Depth=1 sub.d $t2, $t0, $t4 - bgeu $t2, $a7, .LBB8_14 -# %bb.13: # in Loop: Header=BB8_10 Depth=1 + bgeu $t2, $a7, .LBB8_15 +# %bb.14: # in Loop: Header=BB8_11 Depth=1 move $t1, $t4 - b .LBB8_17 -.LBB8_14: # %vector.ph - # in Loop: Header=BB8_10 Depth=1 + b .LBB8_18 +.LBB8_15: # %vector.ph + # in Loop: Header=BB8_11 Depth=1 move $t3, $t2 bstrins.d $t3, $zero, 1, 0 add.d $t1, $t3, $t4 alsl.d $t4, $t4, $a4, 3 move $t5, $t3 .p2align 4, , 16 -.LBB8_15: # %vector.body - # Parent Loop BB8_10 Depth=1 +.LBB8_16: # %vector.body + # Parent Loop BB8_11 Depth=1 # => This Inner Loop Header: Depth=2 vld $vr3, $t4, -16 vld $vr4, $t4, 0 @@ -3018,51 +3016,51 @@ amp_scalefac_bands: # @amp_scalefac_bands vst $vr4, $t4, 0 addi.d $t5, $t5, -4 addi.d $t4, $t4, 32 - bnez $t5, .LBB8_15 -# %bb.16: # %middle.block - # in Loop: Header=BB8_10 Depth=1 - beq $t2, $t3, .LBB8_9 -.LBB8_17: # %.lr.ph85.preheader186 - # in Loop: Header=BB8_10 Depth=1 + bnez $t5, .LBB8_16 +# %bb.17: # %middle.block + # in Loop: Header=BB8_11 Depth=1 + beq $t2, $t3, .LBB8_10 +.LBB8_18: # %.lr.ph85.preheader186 + # in Loop: Header=BB8_11 Depth=1 alsl.d $t2, $t1, $a0, 3 sub.d $t0, $t0, $t1 .p2align 4, , 16 -.LBB8_18: # %.lr.ph85 - # Parent Loop BB8_10 Depth=1 +.LBB8_19: # %.lr.ph85 + # Parent Loop BB8_11 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa3, $t2, 0 fmul.d $fa3, $fa0, $fa3 fst.d $fa3, $t2, 0 addi.d $t0, $t0, -1 addi.d $t2, $t2, 8 - bnez $t0, .LBB8_18 - b .LBB8_9 -.LBB8_19: # %.preheader.loopexit + bnez $t0, .LBB8_19 + b .LBB8_10 +.LBB8_20: # %.preheader.loopexit ld.w $a5, $a1, 84 -.LBB8_20: # %.preheader +.LBB8_21: # %.preheader ori $a6, $zero, 12 - bgeu $a5, $a6, .LBB8_58 -# %bb.21: # %.lr.ph94 + bgeu $a5, $a6, .LBB8_59 +# %bb.22: # %.lr.ph94 addi.d $a7, $a2, 88 addi.d $t0, $a3, 168 pcalau12i $a4, %got_pc_hi20(scalefac_band) ld.d $a4, $a4, %got_pc_lo12(scalefac_band) ori $t1, $zero, 2 - b .LBB8_23 + b .LBB8_24 .p2align 4, , 16 -.LBB8_22: # %.loopexit - # in Loop: Header=BB8_23 Depth=1 +.LBB8_23: # %.loopexit + # in Loop: Header=BB8_24 Depth=1 addi.d $a5, $a5, 1 bstrpick.d $t2, $a5, 31, 0 - beq $t2, $a6, .LBB8_32 -.LBB8_23: # =>This Loop Header: Depth=1 - # Child Loop BB8_28 Depth 2 - # Child Loop BB8_31 Depth 2 + beq $t2, $a6, .LBB8_33 +.LBB8_24: # =>This Loop Header: Depth=1 + # Child Loop BB8_29 Depth 2 + # Child Loop BB8_32 Depth 2 slli.d $t2, $a5, 3 fldx.d $fa2, $t0, $t2 fcmp.cule.d $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB8_22 -# %bb.24: # in Loop: Header=BB8_23 Depth=1 + bcnez $fcc0, .LBB8_23 +# %bb.25: # in Loop: Header=BB8_24 Depth=1 alsl.d $t2, $a5, $t2, 2 ldx.w $t3, $a7, $t2 addi.d $t3, $t3, 1 @@ -3070,16 +3068,16 @@ amp_scalefac_bands: # @amp_scalefac_bands alsl.d $t2, $a5, $a4, 2 ld.w $t6, $t2, 92 ld.w $t2, $t2, 96 - bge $t6, $t2, .LBB8_22 -# %bb.25: # %.lr.ph91.preheader - # in Loop: Header=BB8_23 Depth=1 + bge $t6, $t2, .LBB8_23 +# %bb.26: # %.lr.ph91.preheader + # in Loop: Header=BB8_24 Depth=1 sub.d $t4, $t2, $t6 - bgeu $t4, $t1, .LBB8_27 -# %bb.26: # in Loop: Header=BB8_23 Depth=1 + bgeu $t4, $t1, .LBB8_28 +# %bb.27: # in Loop: Header=BB8_24 Depth=1 move $t3, $t6 - b .LBB8_30 -.LBB8_27: # %vector.ph144 - # in Loop: Header=BB8_23 Depth=1 + b .LBB8_31 +.LBB8_28: # %vector.ph144 + # in Loop: Header=BB8_24 Depth=1 move $t5, $t4 bstrins.d $t5, $zero, 0, 0 add.d $t3, $t5, $t6 @@ -3088,8 +3086,8 @@ amp_scalefac_bands: # @amp_scalefac_bands add.d $t6, $a0, $t6 move $t7, $t5 .p2align 4, , 16 -.LBB8_28: # %vector.body147 - # Parent Loop BB8_23 Depth=1 +.LBB8_29: # %vector.body147 + # Parent Loop BB8_24 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $t6, 0 fld.d $fa3, $t6, 24 @@ -3099,32 +3097,32 @@ amp_scalefac_bands: # @amp_scalefac_bands fst.d $fa3, $t6, 24 addi.d $t7, $t7, -2 addi.d $t6, $t6, 48 - bnez $t7, .LBB8_28 -# %bb.29: # %middle.block151 - # in Loop: Header=BB8_23 Depth=1 - beq $t4, $t5, .LBB8_22 -.LBB8_30: # %.lr.ph91.preheader185 - # in Loop: Header=BB8_23 Depth=1 + bnez $t7, .LBB8_29 +# %bb.30: # %middle.block151 + # in Loop: Header=BB8_24 Depth=1 + beq $t4, $t5, .LBB8_23 +.LBB8_31: # %.lr.ph91.preheader185 + # in Loop: Header=BB8_24 Depth=1 slli.d $t4, $t3, 4 alsl.d $t4, $t3, $t4, 3 add.d $t4, $a0, $t4 sub.d $t2, $t2, $t3 .p2align 4, , 16 -.LBB8_31: # %.lr.ph91 - # Parent Loop BB8_23 Depth=1 +.LBB8_32: # %.lr.ph91 + # Parent Loop BB8_24 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $t4, 0 fmul.d $fa2, $fa0, $fa2 fst.d $fa2, $t4, 0 addi.d $t2, $t2, -1 addi.d $t4, $t4, 24 - bnez $t2, .LBB8_31 - b .LBB8_22 -.LBB8_32: # %._crit_edge95 + bnez $t2, .LBB8_32 + b .LBB8_23 +.LBB8_33: # %._crit_edge95 ld.w $a5, $a1, 84 ori $a6, $zero, 11 - bltu $a6, $a5, .LBB8_58 -# %bb.33: # %.lr.ph94.1 + bltu $a6, $a5, .LBB8_59 +# %bb.34: # %.lr.ph94.1 addi.d $a6, $a3, 336 addi.d $a7, $a2, 92 addi.d $t0, $a0, 8 @@ -3132,21 +3130,21 @@ amp_scalefac_bands: # @amp_scalefac_bands ori $t2, $zero, 2 ori $t3, $zero, 24 ori $t4, $zero, 12 - b .LBB8_35 + b .LBB8_36 .p2align 4, , 16 -.LBB8_34: # %.loopexit.1 - # in Loop: Header=BB8_35 Depth=1 +.LBB8_35: # %.loopexit.1 + # in Loop: Header=BB8_36 Depth=1 addi.d $a5, $a5, 1 bstrpick.d $t5, $a5, 31, 0 - beq $t5, $t4, .LBB8_45 -.LBB8_35: # =>This Loop Header: Depth=1 - # Child Loop BB8_43 Depth 2 - # Child Loop BB8_39 Depth 2 + beq $t5, $t4, .LBB8_46 +.LBB8_36: # =>This Loop Header: Depth=1 + # Child Loop BB8_44 Depth 2 + # Child Loop BB8_40 Depth 2 slli.d $t5, $a5, 3 fldx.d $fa2, $a6, $t5 fcmp.cule.d $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB8_34 -# %bb.36: # in Loop: Header=BB8_35 Depth=1 + bcnez $fcc0, .LBB8_35 +# %bb.37: # in Loop: Header=BB8_36 Depth=1 alsl.d $t5, $a5, $t5, 2 ldx.w $t6, $a7, $t5 addi.d $t6, $t6, 1 @@ -3154,52 +3152,52 @@ amp_scalefac_bands: # @amp_scalefac_bands alsl.d $t6, $a5, $a4, 2 ld.w $t5, $t6, 92 ld.w $t6, $t6, 96 - bge $t5, $t6, .LBB8_34 -# %bb.37: # %.lr.ph91.preheader.1 - # in Loop: Header=BB8_35 Depth=1 + bge $t5, $t6, .LBB8_35 +# %bb.38: # %.lr.ph91.preheader.1 + # in Loop: Header=BB8_36 Depth=1 sub.d $t7, $t6, $t5 - bgeu $t7, $t2, .LBB8_40 -.LBB8_38: # %.lr.ph91.1.preheader - # in Loop: Header=BB8_35 Depth=1 + bgeu $t7, $t2, .LBB8_41 +.LBB8_39: # %.lr.ph91.1.preheader + # in Loop: Header=BB8_36 Depth=1 slli.d $t7, $t5, 4 alsl.d $t7, $t5, $t7, 3 add.d $t7, $t0, $t7 sub.d $t5, $t6, $t5 .p2align 4, , 16 -.LBB8_39: # %.lr.ph91.1 - # Parent Loop BB8_35 Depth=1 +.LBB8_40: # %.lr.ph91.1 + # Parent Loop BB8_36 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $t7, 0 fmul.d $fa2, $fa0, $fa2 fst.d $fa2, $t7, 0 addi.d $t5, $t5, -1 addi.d $t7, $t7, 24 - bnez $t5, .LBB8_39 - b .LBB8_34 -.LBB8_40: # %vector.scevcheck - # in Loop: Header=BB8_35 Depth=1 + bnez $t5, .LBB8_40 + b .LBB8_35 +.LBB8_41: # %vector.scevcheck + # in Loop: Header=BB8_36 Depth=1 nor $t8, $t5, $zero add.d $t8, $t8, $t6 mul.d $fp, $t5, $t3 add.d $s0, $t0, $fp mul.d $s1, $t8, $t3 add.d $s1, $s0, $s1 - bltu $s1, $s0, .LBB8_38 -# %bb.41: # %vector.scevcheck - # in Loop: Header=BB8_35 Depth=1 + bltu $s1, $s0, .LBB8_39 +# %bb.42: # %vector.scevcheck + # in Loop: Header=BB8_36 Depth=1 mulh.du $t8, $t8, $t3 sltu $t8, $zero, $t8 - bnez $t8, .LBB8_38 -# %bb.42: # %vector.ph157 - # in Loop: Header=BB8_35 Depth=1 + bnez $t8, .LBB8_39 +# %bb.43: # %vector.ph157 + # in Loop: Header=BB8_36 Depth=1 move $t8, $t7 bstrins.d $t8, $zero, 0, 0 add.d $t5, $t8, $t5 add.d $fp, $t1, $fp move $s0, $t8 .p2align 4, , 16 -.LBB8_43: # %vector.body160 - # Parent Loop BB8_35 Depth=1 +.LBB8_44: # %vector.body160 + # Parent Loop BB8_36 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $fp, -24 fld.d $fa3, $fp, 0 @@ -3209,16 +3207,16 @@ amp_scalefac_bands: # @amp_scalefac_bands fst.d $fa3, $fp, 0 addi.d $s0, $s0, -2 addi.d $fp, $fp, 48 - bnez $s0, .LBB8_43 -# %bb.44: # %middle.block164 - # in Loop: Header=BB8_35 Depth=1 - beq $t7, $t8, .LBB8_34 - b .LBB8_38 -.LBB8_45: # %._crit_edge95.1 + bnez $s0, .LBB8_44 +# %bb.45: # %middle.block164 + # in Loop: Header=BB8_36 Depth=1 + beq $t7, $t8, .LBB8_35 + b .LBB8_39 +.LBB8_46: # %._crit_edge95.1 ld.w $a1, $a1, 84 ori $a5, $zero, 11 - bltu $a5, $a1, .LBB8_58 -# %bb.46: # %.lr.ph94.2 + bltu $a5, $a1, .LBB8_59 +# %bb.47: # %.lr.ph94.2 addi.d $a3, $a3, 504 addi.d $a2, $a2, 96 addi.d $a5, $a0, 16 @@ -3226,21 +3224,21 @@ amp_scalefac_bands: # @amp_scalefac_bands ori $a6, $zero, 2 ori $a7, $zero, 24 ori $t0, $zero, 12 - b .LBB8_48 + b .LBB8_49 .p2align 4, , 16 -.LBB8_47: # %.loopexit.2 - # in Loop: Header=BB8_48 Depth=1 +.LBB8_48: # %.loopexit.2 + # in Loop: Header=BB8_49 Depth=1 addi.d $a1, $a1, 1 bstrpick.d $t1, $a1, 31, 0 - beq $t1, $t0, .LBB8_58 -.LBB8_48: # =>This Loop Header: Depth=1 - # Child Loop BB8_56 Depth 2 - # Child Loop BB8_52 Depth 2 + beq $t1, $t0, .LBB8_59 +.LBB8_49: # =>This Loop Header: Depth=1 + # Child Loop BB8_57 Depth 2 + # Child Loop BB8_53 Depth 2 slli.d $t1, $a1, 3 fldx.d $fa2, $a3, $t1 fcmp.cule.d $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB8_47 -# %bb.49: # in Loop: Header=BB8_48 Depth=1 + bcnez $fcc0, .LBB8_48 +# %bb.50: # in Loop: Header=BB8_49 Depth=1 alsl.d $t1, $a1, $t1, 2 ldx.w $t2, $a2, $t1 addi.d $t2, $t2, 1 @@ -3248,52 +3246,52 @@ amp_scalefac_bands: # @amp_scalefac_bands alsl.d $t2, $a1, $a4, 2 ld.w $t1, $t2, 92 ld.w $t2, $t2, 96 - bge $t1, $t2, .LBB8_47 -# %bb.50: # %.lr.ph91.preheader.2 - # in Loop: Header=BB8_48 Depth=1 + bge $t1, $t2, .LBB8_48 +# %bb.51: # %.lr.ph91.preheader.2 + # in Loop: Header=BB8_49 Depth=1 sub.d $t3, $t2, $t1 - bgeu $t3, $a6, .LBB8_53 -.LBB8_51: # %.lr.ph91.2.preheader - # in Loop: Header=BB8_48 Depth=1 + bgeu $t3, $a6, .LBB8_54 +.LBB8_52: # %.lr.ph91.2.preheader + # in Loop: Header=BB8_49 Depth=1 slli.d $t3, $t1, 4 alsl.d $t3, $t1, $t3, 3 add.d $t3, $a5, $t3 sub.d $t1, $t2, $t1 .p2align 4, , 16 -.LBB8_52: # %.lr.ph91.2 - # Parent Loop BB8_48 Depth=1 +.LBB8_53: # %.lr.ph91.2 + # Parent Loop BB8_49 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $t3, 0 fmul.d $fa2, $fa0, $fa2 fst.d $fa2, $t3, 0 addi.d $t1, $t1, -1 addi.d $t3, $t3, 24 - bnez $t1, .LBB8_52 - b .LBB8_47 -.LBB8_53: # %vector.scevcheck167 - # in Loop: Header=BB8_48 Depth=1 + bnez $t1, .LBB8_53 + b .LBB8_48 +.LBB8_54: # %vector.scevcheck167 + # in Loop: Header=BB8_49 Depth=1 nor $t4, $t1, $zero add.d $t4, $t4, $t2 mul.d $t5, $t1, $a7 add.d $t6, $a5, $t5 mul.d $t7, $t4, $a7 add.d $t7, $t6, $t7 - bltu $t7, $t6, .LBB8_51 -# %bb.54: # %vector.scevcheck167 - # in Loop: Header=BB8_48 Depth=1 + bltu $t7, $t6, .LBB8_52 +# %bb.55: # %vector.scevcheck167 + # in Loop: Header=BB8_49 Depth=1 mulh.du $t4, $t4, $a7 sltu $t4, $zero, $t4 - bnez $t4, .LBB8_51 -# %bb.55: # %vector.ph175 - # in Loop: Header=BB8_48 Depth=1 + bnez $t4, .LBB8_52 +# %bb.56: # %vector.ph175 + # in Loop: Header=BB8_49 Depth=1 move $t4, $t3 bstrins.d $t4, $zero, 0, 0 add.d $t1, $t4, $t1 add.d $t5, $a0, $t5 move $t6, $t4 .p2align 4, , 16 -.LBB8_56: # %vector.body178 - # Parent Loop BB8_48 Depth=1 +.LBB8_57: # %vector.body178 + # Parent Loop BB8_49 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $t5, -24 fld.d $fa3, $t5, 0 @@ -3303,12 +3301,12 @@ amp_scalefac_bands: # @amp_scalefac_bands fst.d $fa3, $t5, 0 addi.d $t6, $t6, -2 addi.d $t5, $t5, 48 - bnez $t6, .LBB8_56 -# %bb.57: # %middle.block182 - # in Loop: Header=BB8_48 Depth=1 - beq $t3, $t4, .LBB8_47 - b .LBB8_51 -.LBB8_58: # %.split.us + bnez $t6, .LBB8_57 +# %bb.58: # %middle.block182 + # in Loop: Header=BB8_49 Depth=1 + beq $t3, $t4, .LBB8_48 + b .LBB8_52 +.LBB8_59: # %.split.us ld.d $s1, $sp, 8 # 8-byte Folded Reload ld.d $s0, $sp, 16 # 8-byte Folded Reload ld.d $fp, $sp, 24 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/reservoir.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/reservoir.s index 92812fce..6a0bb7dc 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/reservoir.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/reservoir.s @@ -45,12 +45,7 @@ ResvFrameBegin: # @ResvFrameBegin .Lfunc_end0: .size ResvFrameBegin, .Lfunc_end0-ResvFrameBegin # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ResvMaxBits -.LCPI1_0: - .dword 0x402e666666666666 # double 15.199999999999999 - .text - .globl ResvMaxBits + .globl ResvMaxBits # -- Begin function ResvMaxBits .p2align 5 .type ResvMaxBits,@function ResvMaxBits: # @ResvMaxBits @@ -71,12 +66,15 @@ ResvMaxBits: # @ResvMaxBits add.d $a0, $a5, $a0 b .LBB1_3 .LBB1_2: - pcalau12i $a5, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a5, %pc_lo12(.LCPI1_0) move $a5, $zero - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + lu12i.w $a6, 419430 + ori $a6, $a6, 1638 + lu32i.d $a6, -104858 + lu52i.d $a6, $a6, 1026 + movgr2fr.d $fa1, $a6 + fdiv.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a6, $fa0 sub.d $a0, $a0, $a6 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/tabinit.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/tabinit.s index 93413083..02279491 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/tabinit.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/tabinit.s @@ -46,10 +46,6 @@ .LCPI0_14: .dword 0x3fe1517a7bdb3895 # double 0.54119610014619701 .dword 0x3ff4e7ae9144f0fb # double 1.3065629648763764 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_15: - .dword 0x3ef0000000000000 # double 1.52587890625E-5 .text .globl make_decode_tables .p2align 5 @@ -123,8 +119,8 @@ make_decode_tables: # @make_decode_tables move $a6, $zero move $a3, $zero move $a7, $zero - pcalau12i $a2, %pc_hi20(.LCPI0_15) - fld.d $fa0, $a2, %pc_lo12(.LCPI0_15) + lu52i.d $a2, $zero, 1007 + movgr2fr.d $fa0, $a2 lu12i.w $a2, -2 ori $a2, $a2, 8 ori $t0, $zero, 2048 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/takehiro.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/takehiro.s index 6c941cb1..5c3f540c 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/takehiro.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/takehiro.s @@ -1,10 +1,6 @@ .file "takehiro.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function count_bits -.LCPI0_0: - .dword 0x40c0070000000000 # double 8206 .text - .globl count_bits + .globl count_bits # -- Begin function count_bits .p2align 5 .type count_bits,@function count_bits: # @count_bits @@ -25,8 +21,10 @@ count_bits: # @count_bits pcalau12i $a3, %got_pc_hi20(ipow20) ld.d $a3, $a3, %got_pc_lo12(ipow20) fldx.d $fa0, $a3, $a0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) + ori $a0, $zero, 0 + lu32i.d $a0, 1792 + lu52i.d $a0, $a0, 1036 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa1, $fa0 lu12i.w $a0, -2 ori $a3, $a0, 3584 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/timestatus.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/timestatus.s index 415f46ac..381271b7 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/timestatus.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/timestatus.s @@ -32,12 +32,7 @@ ts_real_time: # @ts_real_time .Lfunc_end0: .size ts_real_time, .Lfunc_end0-ts_real_time # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function ts_process_time -.LCPI1_0: - .word 0x49742400 # float 1.0E+6 - .text - .globl ts_process_time + .globl ts_process_time # -- Begin function ts_process_time .p2align 5 .type ts_process_time,@function ts_process_time: # @ts_process_time @@ -57,12 +52,13 @@ ts_process_time: # @ts_process_time st.d $a0, $a1, %pc_lo12(ts_process_time.initial_time) move $a1, $a0 .LBB1_3: - pcalau12i $a2, %pc_hi20(.LCPI1_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI1_0) sub.d $a0, $a0, $a1 - movgr2fr.d $fa1, $a0 - ffint.s.l $fa1, $fa1 - fdiv.s $fa0, $fa1, $fa0 + movgr2fr.d $fa0, $a0 + ffint.s.l $fa0, $fa0 + lu12i.w $a0, 300866 + ori $a0, $a0, 1024 + movgr2fr.w $fa1, $a0 + fdiv.s $fa0, $fa0, $fa1 ld.d $fp, $sp, 0 # 8-byte Folded Reload ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 @@ -109,18 +105,7 @@ ts_calc_times: # @ts_calc_times .Lfunc_end2: .size ts_calc_times, .Lfunc_end2-ts_calc_times # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function timestatus -.LCPI3_0: - .word 0x49742400 # float 1.0E+6 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI3_1: - .dword 0x4059000000000000 # double 100 -.LCPI3_2: - .dword 0x404e000000000000 # double 60 - .text - .globl timestatus + .globl timestatus # -- Begin function timestatus .p2align 5 .type timestatus,@function timestatus: # @timestatus @@ -174,12 +159,13 @@ timestatus: # @timestatus .LBB3_5: ld.d $a1, $a1, %pc_lo12(ts_process_time.initial_time) fcvt.s.d $fa2, $fs0 - pcalau12i $a2, %pc_hi20(.LCPI3_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI3_0) sub.d $a0, $a0, $a1 - movgr2fr.d $fa1, $a0 - ffint.s.l $fa1, $fa1 - fdiv.s $fa4, $fa1, $fa0 + movgr2fr.d $fa0, $a0 + ffint.s.l $fa0, $fa0 + lu12i.w $a0, 300866 + ori $a0, $a0, 1024 + movgr2fr.w $fa1, $a0 + fdiv.s $fa4, $fa0, $fa1 movgr2fr.d $fa6, $fp blez $fp, .LBB3_8 # %bb.6: @@ -228,9 +214,11 @@ timestatus: # @timestatus ori $a0, $zero, 1 bge $a0, $s0, .LBB3_9 .LBB3_12: - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.d $fa7, $a0, %pc_lo12(.LCPI3_1) ffint.d.l $fa6, $fa6 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa7, $a0 fmul.d $fa6, $fa6, $fa7 addi.d $a3, $s0, -1 movgr2fr.d $fa7, $a3 @@ -252,11 +240,13 @@ timestatus: # @timestatus lu32i.d $a1, 88546 lu52i.d $a1, $a1, 1165 mulh.d $a2, $a7, $a1 - pcalau12i $a5, %pc_hi20(.LCPI3_2) - fld.d $fa4, $a5, %pc_lo12(.LCPI3_2) srli.d $a5, $a2, 63 srli.d $a2, $a2, 10 add.w $a5, $a2, $a5 + ori $a2, $zero, 0 + lu32i.d $a2, -131072 + lu52i.d $a2, $a2, 1028 + movgr2fr.d $fa4, $a2 fdiv.d $fa7, $fa7, $fa4 ftintrz.l.d $fa7, $fa7 movfr2gr.d $a6, $fa7 @@ -381,11 +371,11 @@ timestatus: # @timestatus st.d $a2, $sp, 96 st.d $t7, $sp, 64 st.d $t3, $sp, 16 + st.d $t6, $sp, 40 st.d $s3, $sp, 88 st.d $s1, $sp, 56 - st.d $t6, $sp, 40 - st.d $t2, $sp, 8 st.d $t5, $sp, 32 + st.d $t2, $sp, 8 pcalau12i $a1, %pc_hi20(.L.str.1) addi.d $a1, $a1, %pc_lo12(.L.str.1) move $a2, $fp diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/util.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/util.s index a9f84d11..151549ef 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/util.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/util.s @@ -1,12 +1,6 @@ .file "util.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function getframebits -.LCPI0_0: - .dword 0x408f400000000000 # double 1000 -.LCPI0_1: - .dword 0x3e112e0be826d695 # double 1.0000000000000001E-9 .text - .globl getframebits + .globl getframebits # -- Begin function getframebits .p2align 5 .type getframebits,@function getframebits: # @getframebits @@ -35,41 +29,46 @@ getframebits: # @getframebits ori $t1, $zero, 104 maskeqz $a6, $t1, $a6 or $a6, $a6, $t0 + ld.w $t0, $a0, 60 masknez $a6, $a6, $a5 maskeqz $a5, $a7, $a5 or $a5, $a5, $a6 - ld.w $a6, $a0, 60 + sltui $a6, $t0, 1 + addi.d $a7, $a5, 16 + maskeqz $a5, $a5, $a6 movgr2fr.w $fa0, $a3 ffint.d.w $fa0, $fa0 - pcalau12i $a3, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a3, %pc_lo12(.LCPI0_0) - ld.w $a3, $a0, 188 + movgr2fr.w $fa1, $a4 + ffint.d.w $fa1, $fa1 + ori $a3, $zero, 0 + lu32i.d $a3, -49152 + ld.w $a4, $a0, 188 + lu52i.d $a3, $a3, 1032 + movgr2fr.d $fa2, $a3 + fdiv.d $fa1, $fa1, $fa2 movgr2fr.w $fa2, $a4 ffint.d.w $fa2, $fa2 fdiv.d $fa1, $fa2, $fa1 - movgr2fr.w $fa2, $a3 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 vldi $vr2, -960 fmul.d $fa0, $fa0, $fa2 - pcalau12i $a3, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI0_1) - sltui $a3, $a6, 1 - addi.d $a4, $a5, 16 - maskeqz $a5, $a5, $a3 + lu12i.w $a3, -97683 + ori $a3, $a3, 1685 + lu32i.d $a3, 77323 + lu52i.d $a3, $a3, 993 + movgr2fr.d $fa2, $a3 fmadd.d $fa0, $fa1, $fa0, $fa2 vreplvei.d $vr0, $vr0, 0 - ld.w $a6, $a0, 196 + ld.w $a3, $a0, 196 vfrintrm.d $vr0, $vr0 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a7, $fa0 - add.d $a6, $a6, $a7 - slli.d $a6, $a6, 3 - st.w $a6, $a1, 0 + movfr2gr.s $a4, $fa0 + add.d $a3, $a3, $a4 + slli.d $a3, $a3, 3 + st.w $a3, $a1, 0 ld.w $a0, $a0, 200 - masknez $a1, $a4, $a3 + masknez $a1, $a7, $a6 or $a1, $a5, $a1 - sub.w $a1, $a6, $a1 + sub.w $a1, $a3, $a1 div.w $a0, $a1, $a0 st.w $a0, $a2, 0 ret diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/vbrquantize.s b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/vbrquantize.s index fd72905d..626647fb 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/vbrquantize.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/vbrquantize.s @@ -89,32 +89,7 @@ calc_sfb_ave_noise: # @calc_sfb_ave_noise .Lfunc_end0: .size calc_sfb_ave_noise, .Lfunc_end0-calc_sfb_ave_noise # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function find_scalefac -.LCPI1_0: - .dword 0xc034800000000000 # double -20.5 -.LCPI1_1: - .dword 0xc042400000000000 # double -36.5 -.LCPI1_2: - .dword 0xc046400000000000 # double -44.5 -.LCPI1_3: - .dword 0xc048400000000000 # double -48.5 -.LCPI1_4: - .dword 0xc049400000000000 # double -50.5 -.LCPI1_5: - .dword 0xc049c00000000000 # double -51.5 -.LCPI1_6: - .dword 0xc04a000000000000 # double -52 -.LCPI1_7: - .dword 0x40c3884000000000 # double 10000.5 -.LCPI1_8: - .dword 0x40c3880000000000 # double 1.0E+4 -.LCPI1_9: - .dword 0x4040000000000000 # double 32 -.LCPI1_10: - .dword 0x3f847ae147ae147b # double 0.01 - .text - .globl find_scalefac + .globl find_scalefac # -- Begin function find_scalefac .p2align 5 .type find_scalefac,@function find_scalefac: # @find_scalefac @@ -147,12 +122,16 @@ find_scalefac: # @find_scalefac blez $s2, .LBB1_15 # %bb.1: # %.lr.ph.preheader.i.us.preheader slli.d $s3, $fp, 3 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI1_0) - pcalau12i $a0, %pc_hi20(.LCPI1_8) - fld.d $fs5, $a0, %pc_lo12(.LCPI1_8) - pcalau12i $a0, %pc_hi20(.LCPI1_9) - fld.d $fs4, $a0, %pc_lo12(.LCPI1_9) + lu52i.d $a0, $zero, 1028 + movgr2fr.d $fs4, $a0 + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 294912 + lu52i.d $a1, $a1, -1021 + movgr2fr.d $fs1, $a1 + lu32i.d $a0, 231424 + lu52i.d $a0, $a0, 1036 + movgr2fr.d $fs5, $a0 movgr2fr.d $fs6, $zero lu12i.w $a0, 2 ori $s4, $a0, 14 @@ -252,11 +231,14 @@ find_scalefac: # @find_scalefac .LBB1_15: # %.split movgr2fr.d $fa0, $zero fdiv.d $fs1, $fa0, $fs3 - pcalau12i $a0, %pc_hi20(.LCPI1_0) + ori $a0, $zero, 0 + lu32i.d $a0, 294912 + lu52i.d $a0, $a0, -1021 fcmp.cule.d $fcc0, $fa0, $fs1 bcnez $fcc0, .LBB1_17 # %bb.16: # %calc_sfb_ave_noise.exit.us100.preheader - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) + movgr2fr.d $fa0, $a0 + ori $s3, $zero, 0 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 vldi $vr0, -878 @@ -277,13 +259,17 @@ find_scalefac: # @find_scalefac vldi $vr0, -986 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_7) - fld.d $fa4, $a0, %pc_lo12(.LCPI1_7) - pcalau12i $a0, %pc_hi20(.LCPI1_8) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_8) + ori $a0, $zero, 0 + lu32i.d $a0, 231488 + lu52i.d $a0, $a0, 1036 + movgr2fr.d $fa4, $a0 + lu32i.d $s3, 231424 + lu52i.d $a0, $s3, 1036 + movgr2fr.d $fa0, $a0 b .LBB1_20 .LBB1_17: # %.split.split - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) + movgr2fr.d $fa0, $a0 + ori $s3, $zero, 0 vst $vr0, $sp, 16 # 16-byte Folded Spill # kill: def $f0_64 killed $f0_64 killed $vr0 pcaddu18i $ra, %call36(exp2) @@ -291,28 +277,39 @@ find_scalefac: # @find_scalefac fcmp.clt.d $fcc0, $fs0, $fs1 bceqz $fcc0, .LBB1_19 # %bb.18: # %calc_sfb_ave_noise.exit.us122.preheader - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_1) + ori $a0, $zero, 0 + lu32i.d $a0, 147456 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa0, $a0 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_2) + ori $a0, $zero, 0 + lu32i.d $a0, 409600 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa0, $a0 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_3) + ori $a0, $zero, 0 + lu32i.d $a0, -507904 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa0, $a0 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_4) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_4) + ori $a0, $zero, 0 + lu32i.d $a0, -442368 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa0, $a0 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_5) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_5) + ori $a0, $zero, 0 + lu32i.d $a0, -409600 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa0, $a0 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_6) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_6) + lu32i.d $s3, -393216 + lu52i.d $a0, $s3, -1020 + movgr2fr.d $fa0, $a0 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 vldi $vr4, -844 @@ -340,10 +337,13 @@ find_scalefac: # @find_scalefac vldi $vr0, -986 vldi $vr4, -985 .LBB1_20: # %.split96.us - pcalau12i $a0, %pc_hi20(.LCPI1_10) - fld.d $fs4, $a0, %pc_lo12(.LCPI1_10) vldi $vr1, -920 fadd.d $fa3, $fa0, $fa1 + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fs4, $a0 fadd.d $fs5, $fa0, $fs4 fcmp.cule.d $fcc0, $fa3, $fs5 bcnez $fcc0, .LBB1_39 @@ -507,12 +507,7 @@ find_scalefac: # @find_scalefac .Lfunc_end1: .size find_scalefac, .Lfunc_end1-find_scalefac # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function compute_scalefacs_short -.LCPI2_0: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 - .text - .globl compute_scalefacs_short + .globl compute_scalefacs_short # -- Begin function compute_scalefacs_short .p2align 5 .type compute_scalefacs_short,@function compute_scalefacs_short: # @compute_scalefacs_short @@ -557,512 +552,515 @@ compute_scalefacs_short: # @compute_scalefacs_short fld.d $ft5, $a0, 96 fld.d $ft6, $a0, 104 fld.d $ft7, $a0, 112 - fld.d $ft10, $a0, 120 + fld.d $ft8, $a0, 120 movgr2fr.w $fa0, $a3 - ffint.d.w $ft8, $fa0 + ffint.d.w $ft9, $fa0 fneg.d $fa0, $fa1 - vldi $vr17, -920 - fmadd.d $fa0, $fa0, $ft8, $ft9 + vldi $vr18, -920 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 336 # 8-byte Folded Spill - pcalau12i $a3, %pc_hi20(.LCPI2_0) fneg.d $fa0, $fa2 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 328 # 8-byte Folded Spill fneg.d $fa0, $fa3 - fmov.d $fa2, $fa3 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmov.d $fa1, $fa3 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 320 # 8-byte Folded Spill fneg.d $fa0, $fa4 - fmov.d $fa3, $fa4 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmov.d $fa2, $fa4 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 312 # 8-byte Folded Spill fneg.d $fa0, $fa5 - fmov.d $fa4, $fa5 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmov.d $fa3, $fa5 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 304 # 8-byte Folded Spill fneg.d $fa0, $fa6 - fmov.d $fa5, $fa6 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmov.d $fa4, $fa6 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 296 # 8-byte Folded Spill fneg.d $fa0, $fa7 - fmov.d $fa6, $fa7 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmov.d $fa5, $fa7 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 288 # 8-byte Folded Spill fneg.d $fa0, $ft0 - fmov.d $fa7, $ft0 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmov.d $fa6, $ft0 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 280 # 8-byte Folded Spill fneg.d $fa0, $ft1 - fmov.d $ft0, $ft1 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmov.d $fa7, $ft1 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 272 # 8-byte Folded Spill fneg.d $fa0, $ft2 - fmov.d $ft1, $ft2 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmov.d $ft0, $ft2 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 264 # 8-byte Folded Spill fneg.d $fa0, $ft3 - fmov.d $ft2, $ft3 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmov.d $ft1, $ft3 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 256 # 8-byte Folded Spill fneg.d $fa0, $ft4 - fmov.d $ft3, $ft4 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmov.d $ft2, $ft4 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 248 # 8-byte Folded Spill fneg.d $fa0, $ft5 - fmov.d $ft4, $ft5 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmov.d $ft3, $ft5 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 240 # 8-byte Folded Spill fneg.d $fa0, $ft6 - fmov.d $ft5, $ft6 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmov.d $ft4, $ft6 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 232 # 8-byte Folded Spill fneg.d $fa0, $ft7 - fmov.d $ft6, $ft7 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmov.d $ft5, $ft7 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 224 # 8-byte Folded Spill fld.d $fs5, $a0, 128 - fneg.d $fa0, $ft10 - fmov.d $fa1, $ft10 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fneg.d $fa0, $ft8 + fmov.d $ft6, $ft8 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 216 # 8-byte Folded Spill - fld.d $ft11, $a0, 136 + fld.d $ft8, $a0, 136 fneg.d $fa0, $fs5 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 208 # 8-byte Folded Spill fld.d $ft7, $a0, 144 fst.d $ft7, $sp, 192 # 8-byte Folded Spill - fneg.d $fa0, $ft11 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fneg.d $fa0, $ft8 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 200 # 8-byte Folded Spill - fld.d $ft10, $a0, 152 - fst.d $ft10, $sp, 184 # 8-byte Folded Spill + fld.d $ft11, $a0, 152 + fst.d $ft11, $sp, 184 # 8-byte Folded Spill fneg.d $fa0, $ft7 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 176 # 8-byte Folded Spill fld.d $ft7, $a0, 160 fst.d $ft7, $sp, 160 # 8-byte Folded Spill - fneg.d $fa0, $ft10 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fneg.d $fa0, $ft11 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 168 # 8-byte Folded Spill - fld.d $ft10, $a0, 168 - fst.d $ft10, $sp, 144 # 8-byte Folded Spill + fld.d $ft11, $a0, 168 + fst.d $ft11, $sp, 144 # 8-byte Folded Spill fneg.d $fa0, $ft7 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 152 # 8-byte Folded Spill fld.d $ft7, $a0, 176 fst.d $ft7, $sp, 128 # 8-byte Folded Spill - fneg.d $fa0, $ft10 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fneg.d $fa0, $ft11 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 136 # 8-byte Folded Spill - fld.d $ft10, $a0, 184 - fst.d $ft10, $sp, 112 # 8-byte Folded Spill + fld.d $ft11, $a0, 184 + fst.d $ft11, $sp, 112 # 8-byte Folded Spill fneg.d $fa0, $ft7 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 120 # 8-byte Folded Spill fld.d $ft7, $a0, 192 fst.d $ft7, $sp, 96 # 8-byte Folded Spill - fneg.d $fa0, $ft10 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fneg.d $fa0, $ft11 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 104 # 8-byte Folded Spill fld.d $fs6, $a0, 200 fneg.d $fa0, $ft7 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 88 # 8-byte Folded Spill fld.d $fs4, $a0, 208 fneg.d $fa0, $fs6 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 80 # 8-byte Folded Spill fld.d $fs3, $a0, 216 fneg.d $fa0, $fs4 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 72 # 8-byte Folded Spill fld.d $fs2, $a0, 224 fneg.d $fa0, $fs3 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 64 # 8-byte Folded Spill fld.d $fs1, $a0, 232 fneg.d $fa0, $fs2 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 56 # 8-byte Folded Spill fld.d $fs0, $a0, 240 fneg.d $fa0, $fs1 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 48 # 8-byte Folded Spill fld.d $ft15, $a0, 248 fneg.d $fa0, $fs0 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 40 # 8-byte Folded Spill - fld.d $ft14, $a0, 256 + fld.d $ft13, $a0, 256 fneg.d $fa0, $ft15 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmadd.d $fa0, $fa0, $ft9, $ft10 fst.d $fa0, $sp, 32 # 8-byte Folded Spill - fld.d $ft13, $a0, 264 - fneg.d $fa0, $ft14 - fmadd.d $fa0, $fa0, $ft8, $ft9 - fst.d $fa0, $sp, 24 # 8-byte Folded Spill - fld.d $ft10, $a0, 272 + fld.d $ft12, $a0, 264 fneg.d $fa0, $ft13 - fmadd.d $fa0, $fa0, $ft8, $ft9 + fmadd.d $fa0, $fa0, $ft9, $ft10 + fst.d $fa0, $sp, 24 # 8-byte Folded Spill + fld.d $ft7, $a0, 272 + fld.d $ft14, $a0, 280 + fneg.d $ft11, $ft12 + fmadd.d $fa0, $ft11, $ft9, $ft10 fst.d $fa0, $sp, 16 # 8-byte Folded Spill - fld.d $ft7, $a0, 280 - fld.d $fa0, $a3, %pc_lo12(.LCPI2_0) - fneg.d $ft12, $ft10 - fmadd.d $ft12, $ft12, $ft8, $ft9 - fst.d $ft12, $sp, 8 # 8-byte Folded Spill - fneg.d $fs7, $ft7 - fmadd.d $ft8, $fs7, $ft8, $ft9 - fld.d $ft9, $sp, 336 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $s1, $ft9 - fld.d $ft9, $sp, 328 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $s0, $ft9 - fld.d $ft9, $sp, 320 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $t8, $ft9 - fld.d $ft9, $sp, 312 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $t6, $ft9 - fld.d $ft9, $sp, 304 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $t4, $ft9 - fld.d $ft9, $sp, 296 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $t2, $ft9 - fld.d $ft9, $sp, 288 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $t0, $ft9 - fld.d $ft9, $sp, 280 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $a6, $ft9 - fld.d $ft9, $sp, 272 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $a4, $ft9 - fld.d $ft9, $sp, 264 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $a0, $ft9 - fld.d $ft9, $sp, 256 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $a3, $ft9 - fld.d $ft9, $sp, 248 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $a5, $ft9 - fld.d $ft9, $sp, 240 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $a7, $ft9 - fld.d $ft9, $sp, 232 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $t1, $ft9 - fld.d $ft9, $sp, 224 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $t3, $ft9 - fld.d $ft9, $sp, 216 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $t5, $ft9 - fld.d $ft9, $sp, 208 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $t7, $ft9 - fld.d $ft9, $sp, 200 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $fa0 - vreplvei.d $vr17, $vr17, 0 - vfrintrm.d $vr17, $vr17 - ftintrz.w.d $ft9, $ft9 - movfr2gr.s $fp, $ft9 + fneg.d $ft11, $ft7 + fmadd.d $fa0, $ft11, $ft9, $ft10 + fst.d $fa0, $sp, 8 # 8-byte Folded Spill + fneg.d $fs7, $ft14 + fmadd.d $ft9, $fs7, $ft9, $ft10 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $ft10, $a0 + fld.d $fa0, $sp, 336 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $s1, $fs7 + fld.d $fa0, $sp, 328 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $s0, $fs7 + fld.d $fa0, $sp, 320 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $t8, $fs7 + fld.d $fa0, $sp, 312 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $t6, $fs7 + fld.d $fa0, $sp, 304 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $t4, $fs7 + fld.d $fa0, $sp, 296 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $t2, $fs7 + fld.d $fa0, $sp, 288 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $t0, $fs7 + fld.d $fa0, $sp, 280 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $a6, $fs7 + fld.d $fa0, $sp, 272 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $a4, $fs7 + fld.d $fa0, $sp, 264 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $a0, $fs7 + fld.d $fa0, $sp, 256 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $a3, $fs7 + fld.d $fa0, $sp, 248 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $a5, $fs7 + fld.d $fa0, $sp, 240 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $a7, $fs7 + fld.d $fa0, $sp, 232 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $t1, $fs7 + fld.d $fa0, $sp, 224 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $t3, $fs7 + fld.d $fa0, $sp, 216 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $t5, $fs7 + fld.d $fa0, $sp, 208 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $t7, $fs7 + fld.d $fa0, $sp, 200 # 8-byte Folded Reload + fadd.d $fs7, $fa0, $ft10 + vreplvei.d $vr31, $vr31, 0 + vfrintrm.d $vr31, $vr31 + ftintrz.w.d $fs7, $fs7 + movfr2gr.s $fp, $fs7 slli.d $s2, $a1, 52 ori $a1, $zero, 0 lu32i.d $a1, -131072 lu52i.d $a1, $a1, 1026 sub.d $a1, $a1, $s2 - movgr2fr.d $ft9, $a1 - fadd.d $ft11, $ft9, $ft11 - fadd.d $fs5, $ft9, $fs5 - fadd.d $fs7, $ft9, $fa1 - fadd.d $ft6, $ft9, $ft6 - fadd.d $ft5, $ft9, $ft5 - fadd.d $ft4, $ft9, $ft4 - fadd.d $ft3, $ft9, $ft3 - fadd.d $ft2, $ft9, $ft2 - fadd.d $ft1, $ft9, $ft1 - fadd.d $ft0, $ft9, $ft0 - fadd.d $fa7, $ft9, $fa7 - fadd.d $fa6, $ft9, $fa6 - fadd.d $fa5, $ft9, $fa5 - fadd.d $fa4, $ft9, $fa4 - fadd.d $fa3, $ft9, $fa3 - fadd.d $fa2, $ft9, $fa2 - fld.d $fa1, $sp, 344 # 8-byte Folded Reload - fadd.d $fa1, $ft9, $fa1 - fld.d $ft12, $sp, 352 # 8-byte Folded Reload - fadd.d $ft9, $ft9, $ft12 - movgr2fr.d $ft12, $zero - fmax.d $ft9, $ft9, $ft12 - fcmp.clt.d $fcc0, $ft9, $fa1 - fsel $fa1, $ft9, $fa1, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa3 - fsel $fa1, $fa1, $fa3, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa4 - fsel $fa1, $fa1, $fa4, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa5 - fsel $fa1, $fa1, $fa5, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa6 - fsel $fa1, $fa1, $fa6, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa7 - fsel $fa1, $fa1, $fa7, $fcc0 - fcmp.clt.d $fcc0, $fa1, $ft0 - fsel $fa1, $fa1, $ft0, $fcc0 - fcmp.clt.d $fcc0, $fa1, $ft1 - fsel $fa1, $fa1, $ft1, $fcc0 - fcmp.clt.d $fcc0, $fa1, $ft2 - fsel $fa1, $fa1, $ft2, $fcc0 - fcmp.clt.d $fcc0, $fa1, $ft3 - fsel $fa1, $fa1, $ft3, $fcc0 - fcmp.clt.d $fcc0, $fa1, $ft4 - fsel $fa1, $fa1, $ft4, $fcc0 - fcmp.clt.d $fcc0, $fa1, $ft5 - fsel $fa1, $fa1, $ft5, $fcc0 - fcmp.clt.d $fcc0, $fa1, $ft6 - fsel $fa1, $fa1, $ft6, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fs7 - fsel $fa1, $fa1, $fs7, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fs5 - fsel $fa1, $fa1, $fs5, $fcc0 - fcmp.clt.d $fcc0, $fa1, $ft11 - fsel $fa1, $fa1, $ft11, $fcc0 - fld.d $fa2, $sp, 176 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $a1, $fa2 + movgr2fr.d $fs7, $a1 + fadd.d $ft8, $fs7, $ft8 + fadd.d $fs5, $fs7, $fs5 + fadd.d $ft6, $fs7, $ft6 + fadd.d $ft5, $fs7, $ft5 + fadd.d $ft4, $fs7, $ft4 + fadd.d $ft3, $fs7, $ft3 + fadd.d $ft2, $fs7, $ft2 + fadd.d $ft1, $fs7, $ft1 + fadd.d $ft0, $fs7, $ft0 + fadd.d $fa7, $fs7, $fa7 + fadd.d $fa6, $fs7, $fa6 + fadd.d $fa5, $fs7, $fa5 + fadd.d $fa4, $fs7, $fa4 + fadd.d $fa3, $fs7, $fa3 + fadd.d $fa2, $fs7, $fa2 + fadd.d $fa1, $fs7, $fa1 + fld.d $fa0, $sp, 344 # 8-byte Folded Reload + fadd.d $fa0, $fs7, $fa0 + fld.d $ft11, $sp, 352 # 8-byte Folded Reload + fadd.d $fs7, $fs7, $ft11 + movgr2fr.d $ft11, $zero + fmax.d $ft11, $fs7, $ft11 + fcmp.clt.d $fcc0, $ft11, $fa0 + fsel $fa0, $ft11, $fa0, $fcc0 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 + fcmp.clt.d $fcc0, $fa0, $fa2 + fsel $fa0, $fa0, $fa2, $fcc0 + fcmp.clt.d $fcc0, $fa0, $fa3 + fsel $fa0, $fa0, $fa3, $fcc0 + fcmp.clt.d $fcc0, $fa0, $fa4 + fsel $fa0, $fa0, $fa4, $fcc0 + fcmp.clt.d $fcc0, $fa0, $fa5 + fsel $fa0, $fa0, $fa5, $fcc0 + fcmp.clt.d $fcc0, $fa0, $fa6 + fsel $fa0, $fa0, $fa6, $fcc0 + fcmp.clt.d $fcc0, $fa0, $fa7 + fsel $fa0, $fa0, $fa7, $fcc0 + fcmp.clt.d $fcc0, $fa0, $ft0 + fsel $fa0, $fa0, $ft0, $fcc0 + fcmp.clt.d $fcc0, $fa0, $ft1 + fsel $fa0, $fa0, $ft1, $fcc0 + fcmp.clt.d $fcc0, $fa0, $ft2 + fsel $fa0, $fa0, $ft2, $fcc0 + fcmp.clt.d $fcc0, $fa0, $ft3 + fsel $fa0, $fa0, $ft3, $fcc0 + fcmp.clt.d $fcc0, $fa0, $ft4 + fsel $fa0, $fa0, $ft4, $fcc0 + fcmp.clt.d $fcc0, $fa0, $ft5 + fsel $fa0, $fa0, $ft5, $fcc0 + fcmp.clt.d $fcc0, $fa0, $ft6 + fsel $fa0, $fa0, $ft6, $fcc0 + fcmp.clt.d $fcc0, $fa0, $fs5 + fsel $fa0, $fa0, $fs5, $fcc0 + fcmp.clt.d $fcc0, $fa0, $ft8 + fsel $fa0, $fa0, $ft8, $fcc0 + fld.d $fa1, $sp, 176 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $a1, $fa1 ori $s3, $zero, 0 lu32i.d $s3, -262144 lu52i.d $s3, $s3, 1025 xor $s2, $s2, $s3 - movgr2fr.d $ft9, $s2 - fld.d $fa2, $sp, 192 # 8-byte Folded Reload - fadd.d $fa2, $ft9, $fa2 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 - fld.d $fa2, $sp, 168 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $s2, $fa2 - fld.d $fa2, $sp, 184 # 8-byte Folded Reload - fadd.d $fa2, $ft9, $fa2 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 - fld.d $fa2, $sp, 152 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $s3, $fa2 - fld.d $fa2, $sp, 160 # 8-byte Folded Reload - fadd.d $fa2, $ft9, $fa2 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 - fld.d $fa2, $sp, 136 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $s4, $fa2 - fld.d $fa2, $sp, 144 # 8-byte Folded Reload - fadd.d $fa2, $ft9, $fa2 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 - fld.d $fa2, $sp, 120 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $s5, $fa2 - fld.d $fa2, $sp, 128 # 8-byte Folded Reload - fadd.d $fa2, $ft9, $fa2 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 - fld.d $fa2, $sp, 104 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $s6, $fa2 - fld.d $fa2, $sp, 112 # 8-byte Folded Reload - fadd.d $fa2, $ft9, $fa2 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 - fld.d $fa2, $sp, 88 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $s7, $fa2 - fld.d $fa2, $sp, 96 # 8-byte Folded Reload - fadd.d $fa2, $ft9, $fa2 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 - fld.d $fa2, $sp, 80 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $s8, $fa2 - fadd.d $fa2, $ft9, $fs6 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 - fld.d $fa2, $sp, 72 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $ra, $fa2 - fadd.d $fa2, $ft9, $fs4 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 + movgr2fr.d $ft8, $s2 + fld.d $fa1, $sp, 192 # 8-byte Folded Reload + fadd.d $fa1, $ft8, $fa1 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 + fld.d $fa1, $sp, 168 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $s2, $fa1 + fld.d $fa1, $sp, 184 # 8-byte Folded Reload + fadd.d $fa1, $ft8, $fa1 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 + fld.d $fa1, $sp, 152 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $s3, $fa1 + fld.d $fa1, $sp, 160 # 8-byte Folded Reload + fadd.d $fa1, $ft8, $fa1 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 + fld.d $fa1, $sp, 136 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $s4, $fa1 + fld.d $fa1, $sp, 144 # 8-byte Folded Reload + fadd.d $fa1, $ft8, $fa1 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 + fld.d $fa1, $sp, 120 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $s5, $fa1 + fld.d $fa1, $sp, 128 # 8-byte Folded Reload + fadd.d $fa1, $ft8, $fa1 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 + fld.d $fa1, $sp, 104 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $s6, $fa1 + fld.d $fa1, $sp, 112 # 8-byte Folded Reload + fadd.d $fa1, $ft8, $fa1 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 + fld.d $fa1, $sp, 88 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $s7, $fa1 + fld.d $fa1, $sp, 96 # 8-byte Folded Reload + fadd.d $fa1, $ft8, $fa1 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 + fld.d $fa1, $sp, 80 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $s8, $fa1 + fadd.d $fa1, $ft8, $fs6 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 + fld.d $fa1, $sp, 72 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $ra, $fa1 + fadd.d $fa1, $ft8, $fs4 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 st.w $s1, $a2, 0 - fld.d $fa2, $sp, 64 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $s1, $fa2 - fadd.d $fa2, $ft9, $fs3 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 + fld.d $fa1, $sp, 64 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $s1, $fa1 + fadd.d $fa1, $ft8, $fs3 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 st.w $s0, $a2, 4 - fld.d $fa2, $sp, 56 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $s0, $fa2 - fadd.d $fa2, $ft9, $fs2 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 + fld.d $fa1, $sp, 56 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $s0, $fa1 + fadd.d $fa1, $ft8, $fs2 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 st.w $t8, $a2, 8 - fld.d $fa2, $sp, 48 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $t8, $fa2 - fadd.d $fa2, $ft9, $fs1 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 + fld.d $fa1, $sp, 48 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $t8, $fa1 + fadd.d $fa1, $ft8, $fs1 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 st.w $t6, $a2, 12 - fld.d $fa2, $sp, 40 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $t6, $fa2 - fadd.d $fa2, $ft9, $fs0 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 + fld.d $fa1, $sp, 40 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $t6, $fa1 + fadd.d $fa1, $ft8, $fs0 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 st.w $t4, $a2, 16 - fld.d $fa2, $sp, 32 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $t4, $fa2 - fadd.d $fa2, $ft9, $ft15 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 + fld.d $fa1, $sp, 32 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $t4, $fa1 + fadd.d $fa1, $ft8, $ft15 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 st.w $t2, $a2, 20 - fld.d $fa2, $sp, 24 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $t2, $fa2 - fadd.d $fa2, $ft9, $ft14 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 + fld.d $fa1, $sp, 24 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $t2, $fa1 + fadd.d $fa1, $ft8, $ft13 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 st.w $t0, $a2, 24 - fld.d $fa2, $sp, 16 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $t0, $fa2 - fadd.d $fa2, $ft9, $ft13 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 + fld.d $fa1, $sp, 16 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $t0, $fa1 + fadd.d $fa1, $ft8, $ft12 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 st.w $a6, $a2, 28 - fld.d $fa2, $sp, 8 # 8-byte Folded Reload - fadd.d $fa2, $fa2, $fa0 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $a6, $fa2 - fadd.d $fa2, $ft9, $ft10 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa1, $fa1, $fa2, $fcc0 - fadd.d $fa0, $ft8, $fa0 + fld.d $fa1, $sp, 8 # 8-byte Folded Reload + fadd.d $fa1, $fa1, $ft10 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $a6, $fa1 + fadd.d $fa1, $ft8, $ft7 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 + fadd.d $fa1, $ft9, $ft10 st.w $a4, $a2, 32 - vreplvei.d $vr0, $vr0, 0 - vfrintrm.d $vr0, $vr0 - ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a4, $fa0 - fadd.d $fa0, $ft9, $ft7 - fcmp.clt.d $fcc0, $fa1, $fa0 - fsel $fa0, $fa1, $fa0, $fcc0 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $a4, $fa1 + fadd.d $fa1, $ft8, $ft14 + fcmp.clt.d $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 st.w $a0, $a2, 36 st.w $a3, $a2, 40 st.w $a5, $a2, 44 @@ -1114,12 +1112,7 @@ compute_scalefacs_short: # @compute_scalefacs_short .Lfunc_end2: .size compute_scalefacs_short, .Lfunc_end2-compute_scalefacs_short # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function compute_scalefacs_long -.LCPI3_0: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 - .text - .globl compute_scalefacs_long + .globl compute_scalefacs_long # -- Begin function compute_scalefacs_long .p2align 5 .type compute_scalefacs_long,@function compute_scalefacs_long: # @compute_scalefacs_long @@ -1277,8 +1270,11 @@ compute_scalefacs_long: # @compute_scalefacs_long movgr2fr.w $fa1, $s2 ffint.d.w $fa1, $fa1 vldi $vr2, -920 - pcalau12i $a1, %pc_hi20(.LCPI3_0) - fld.d $fa3, $a1, %pc_lo12(.LCPI3_0) + lu12i.w $a1, -85564 + ori $a1, $a1, 813 + lu32i.d $a1, -379166 + lu52i.d $a1, $a1, 1009 + movgr2fr.d $fa3, $a1 vldi $vr4, -978 vldi $vr5, -996 ori $a1, $zero, 21 @@ -1315,14 +1311,7 @@ compute_scalefacs_long: # @compute_scalefacs_long .Lfunc_end3: .size compute_scalefacs_long, .Lfunc_end3-compute_scalefacs_long # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function VBR_iteration_loop_new -.LCPI4_0: - .dword 0x405fc00000000000 # double 127 -.LCPI4_1: - .dword 0x406a400000000000 # double 210 - .text - .globl VBR_iteration_loop_new + .globl VBR_iteration_loop_new # -- Begin function VBR_iteration_loop_new .p2align 5 .type VBR_iteration_loop_new,@function VBR_iteration_loop_new: # @VBR_iteration_loop_new @@ -1341,8 +1330,9 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new st.d $s8, $sp, 1944 # 8-byte Folded Spill fst.d $fs0, $sp, 1936 # 8-byte Folded Spill fst.d $fs1, $sp, 1928 # 8-byte Folded Spill + fst.d $fs2, $sp, 1920 # 8-byte Folded Spill lu12i.w $a1, 1 - ori $a1, $a1, 1232 + ori $a1, $a1, 1248 sub.d $sp, $sp, $a1 st.d $a7, $sp, 32 # 8-byte Folded Spill move $a2, $a6 @@ -1357,12 +1347,14 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new slli.d $a0, $a0, 1 addi.d $a0, $a0, -10 movgr2fr.w $fa0, $a0 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI4_0) ffint.d.w $fa0, $fa0 vldi $vr1, -988 fdiv.d $fa1, $fa0, $fa1 - fcmp.cule.d $fcc0, $fa1, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, -16384 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 + fcmp.cule.d $fcc0, $fa1, $fa0 bceqz $fcc0, .LBB4_26 .LBB4_1: # %cdce.end pcalau12i $a0, %got_pc_hi20(masking_lower) @@ -1382,7 +1374,7 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new st.d $a0, $sp, 104 # 8-byte Folded Spill lu12i.w $a0, 1 ori $s3, $a0, 512 - addi.d $fp, $sp, 208 + addi.d $fp, $sp, 216 ori $s4, $zero, 2 pcalau12i $a0, %got_pc_hi20(scalefac_band) ld.d $a0, $a0, %got_pc_lo12(scalefac_band) @@ -1391,11 +1383,15 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new addi.d $a0, $a0, 96 st.d $a0, $sp, 88 # 8-byte Folded Spill movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -376832 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fs1, $a0 lu12i.w $a0, 2 ori $a0, $a0, 1024 st.d $a0, $sp, 40 # 8-byte Folded Spill lu12i.w $a0, 1 - ori $a0, $a0, 1208 + ori $a0, $a0, 1216 add.d $a0, $sp, $a0 st.d $a0, $sp, 80 # 8-byte Folded Spill ld.d $a0, $sp, 56 # 8-byte Folded Reload @@ -1457,7 +1453,7 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new ori $a0, $zero, 976 mul.d $a0, $a2, $a0 lu12i.w $a1, 1 - ori $a1, $a1, 1208 + ori $a1, $a1, 1216 add.d $a1, $sp, $a1 add.d $a0, $a1, $a0 st.d $a0, $sp, 128 # 8-byte Folded Spill @@ -1558,7 +1554,7 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new ld.w $a1, $s4, 0 add.d $s2, $s8, $s1 lu12i.w $a2, 1 - ori $a2, $a2, 912 + ori $a2, $a2, 920 add.d $s7, $sp, $a2 move $s0, $s8 add.d $s8, $s7, $s1 @@ -1578,7 +1574,7 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new fst.d $fa0, $s8, -16 vld $vr1, $sp, 192 # 16-byte Folded Reload fcmp.clt.d $fcc0, $fa1, $fa0 - fsel $fs1, $fa1, $fa0, $fcc0 + fsel $fs2, $fa1, $fa0, $fcc0 sub.w $a4, $a1, $a0 alsl.w $a0, $a0, $a0, 1 slli.d $a0, $a0, 3 @@ -1596,8 +1592,8 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new ld.w $a1, $s4, 0 fst.d $fa0, $s8, -8 move $s8, $s0 - fcmp.clt.d $fcc0, $fs1, $fa0 - fsel $fs1, $fs1, $fa0, $fcc0 + fcmp.clt.d $fcc0, $fs2, $fa0 + fsel $fs2, $fs2, $fa0, $fcc0 sub.w $a4, $a1, $a0 alsl.w $a0, $a0, $a0, 1 slli.d $a0, $a0, 3 @@ -1612,8 +1608,8 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new pcaddu18i $ra, %call36(find_scalefac) jirl $ra, $ra, 0 fstx.d $fa0, $s7, $s1 - fcmp.clt.d $fcc0, $fs1, $fa0 - fsel $fa4, $fs1, $fa0, $fcc0 + fcmp.clt.d $fcc0, $fs2, $fa0 + fsel $fa4, $fs2, $fa0, $fcc0 addi.d $s1, $s1, 24 addi.d $s4, $s4, 4 ori $a0, $zero, 288 @@ -1646,7 +1642,7 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new jirl $ra, $ra, 0 vld $vr4, $sp, 192 # 16-byte Folded Reload lu12i.w $a0, 1 - ori $a0, $a0, 720 + ori $a0, $a0, 728 add.d $a0, $sp, $a0 fstx.d $fa0, $s1, $a0 fcmp.clt.d $fcc0, $fa4, $fa0 @@ -1657,10 +1653,8 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new bne $s1, $a0, .LBB4_18 .LBB4_19: # %.loopexit # in Loop: Header=BB4_10 Depth=2 - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI4_1) - vldi $vr1, -1008 - fmadd.d $fa0, $fa4, $fa1, $fa0 + vldi $vr0, -1008 + fmadd.d $fa0, $fa4, $fa0, $fs1 vldi $vr1, -928 fadd.d $fa0, $fa0, $fa1 vreplvei.d $vr0, $vr0, 0 @@ -1678,166 +1672,166 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new # %bb.20: # %.preheader.preheader # in Loop: Header=BB4_10 Depth=2 lu12i.w $a1, 1 - ori $a1, $a1, 912 + ori $a1, $a1, 920 add.d $a1, $sp, $a1 vld $vr1, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 896 + ori $a1, $a1, 904 add.d $a1, $sp, $a1 vld $vr2, $a1, 0 vreplvei.d $vr0, $vr4, 0 vfsub.d $vr1, $vr1, $vr0 vfsub.d $vr2, $vr2, $vr0 lu12i.w $a1, 1 - ori $a1, $a1, 928 + ori $a1, $a1, 936 add.d $a1, $sp, $a1 vld $vr3, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 896 + ori $a1, $a1, 904 add.d $a1, $sp, $a1 vst $vr2, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 912 + ori $a1, $a1, 920 add.d $a1, $sp, $a1 vst $vr1, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 944 + ori $a1, $a1, 952 add.d $a1, $sp, $a1 vld $vr1, $a1, 0 vfsub.d $vr2, $vr3, $vr0 lu12i.w $a1, 1 - ori $a1, $a1, 928 + ori $a1, $a1, 936 add.d $a1, $sp, $a1 vst $vr2, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 976 + ori $a1, $a1, 984 add.d $a1, $sp, $a1 vld $vr2, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 960 + ori $a1, $a1, 968 add.d $a1, $sp, $a1 vld $vr3, $a1, 0 vfsub.d $vr1, $vr1, $vr0 lu12i.w $a1, 1 - ori $a1, $a1, 944 + ori $a1, $a1, 952 add.d $a1, $sp, $a1 vst $vr1, $a1, 0 vfsub.d $vr1, $vr2, $vr0 vfsub.d $vr2, $vr3, $vr0 lu12i.w $a1, 1 - ori $a1, $a1, 992 + ori $a1, $a1, 1000 add.d $a1, $sp, $a1 vld $vr3, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 960 + ori $a1, $a1, 968 add.d $a1, $sp, $a1 vst $vr2, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 976 + ori $a1, $a1, 984 add.d $a1, $sp, $a1 vst $vr1, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1008 + ori $a1, $a1, 1016 add.d $a1, $sp, $a1 vld $vr1, $a1, 0 vfsub.d $vr2, $vr3, $vr0 lu12i.w $a1, 1 - ori $a1, $a1, 992 + ori $a1, $a1, 1000 add.d $a1, $sp, $a1 vst $vr2, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1040 + ori $a1, $a1, 1048 add.d $a1, $sp, $a1 vld $vr2, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1024 + ori $a1, $a1, 1032 add.d $a1, $sp, $a1 vld $vr3, $a1, 0 vfsub.d $vr1, $vr1, $vr0 lu12i.w $a1, 1 - ori $a1, $a1, 1008 + ori $a1, $a1, 1016 add.d $a1, $sp, $a1 vst $vr1, $a1, 0 vfsub.d $vr1, $vr2, $vr0 vfsub.d $vr2, $vr3, $vr0 lu12i.w $a1, 1 - ori $a1, $a1, 1056 + ori $a1, $a1, 1064 add.d $a1, $sp, $a1 vld $vr3, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1024 + ori $a1, $a1, 1032 add.d $a1, $sp, $a1 vst $vr2, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1040 + ori $a1, $a1, 1048 add.d $a1, $sp, $a1 vst $vr1, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1072 + ori $a1, $a1, 1080 add.d $a1, $sp, $a1 vld $vr1, $a1, 0 vfsub.d $vr2, $vr3, $vr0 lu12i.w $a1, 1 - ori $a1, $a1, 1056 + ori $a1, $a1, 1064 add.d $a1, $sp, $a1 vst $vr2, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1104 + ori $a1, $a1, 1112 add.d $a1, $sp, $a1 vld $vr2, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1088 + ori $a1, $a1, 1096 add.d $a1, $sp, $a1 vld $vr3, $a1, 0 vfsub.d $vr1, $vr1, $vr0 lu12i.w $a1, 1 - ori $a1, $a1, 1072 + ori $a1, $a1, 1080 add.d $a1, $sp, $a1 vst $vr1, $a1, 0 vfsub.d $vr1, $vr2, $vr0 vfsub.d $vr2, $vr3, $vr0 lu12i.w $a1, 1 - ori $a1, $a1, 1120 + ori $a1, $a1, 1128 add.d $a1, $sp, $a1 vld $vr3, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1088 + ori $a1, $a1, 1096 add.d $a1, $sp, $a1 vst $vr2, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1104 + ori $a1, $a1, 1112 add.d $a1, $sp, $a1 vst $vr1, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1136 + ori $a1, $a1, 1144 add.d $a1, $sp, $a1 vld $vr1, $a1, 0 vfsub.d $vr2, $vr3, $vr0 lu12i.w $a1, 1 - ori $a1, $a1, 1120 + ori $a1, $a1, 1128 add.d $a1, $sp, $a1 vst $vr2, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1168 + ori $a1, $a1, 1176 add.d $a1, $sp, $a1 vld $vr2, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1152 + ori $a1, $a1, 1160 add.d $a1, $sp, $a1 vld $vr3, $a1, 0 vfsub.d $vr1, $vr1, $vr0 lu12i.w $a1, 1 - ori $a1, $a1, 1136 + ori $a1, $a1, 1144 add.d $a1, $sp, $a1 vst $vr1, $a1, 0 vfsub.d $vr1, $vr2, $vr0 vfsub.d $vr0, $vr3, $vr0 lu12i.w $a1, 1 - ori $a1, $a1, 1152 + ori $a1, $a1, 1160 add.d $a1, $sp, $a1 vst $vr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 1168 + ori $a1, $a1, 1176 add.d $a1, $sp, $a1 vst $vr1, $a1, 0 st.w $zero, $s1, 68 @@ -1845,7 +1839,7 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new add.d $a0, $a1, $a0 addi.d $s5, $a0, 88 lu12i.w $a0, 1 - ori $a0, $a0, 896 + ori $a0, $a0, 904 add.d $a0, $sp, $a0 move $a1, $s1 move $a2, $s5 @@ -1859,7 +1853,7 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new ori $a0, $zero, 1 st.w $a0, $s1, 68 lu12i.w $a0, 1 - ori $a0, $a0, 896 + ori $a0, $a0, 904 add.d $a0, $sp, $a0 move $a1, $s1 move $a2, $s5 @@ -1870,110 +1864,110 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new .LBB4_22: # %.preheader133.preheader # in Loop: Header=BB4_10 Depth=2 lu12i.w $a1, 1 - ori $a1, $a1, 720 + ori $a1, $a1, 728 add.d $a1, $sp, $a1 vld $vr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 736 + ori $a1, $a1, 744 add.d $a1, $sp, $a1 vld $vr1, $a1, 0 vreplvei.d $vr2, $vr4, 0 vfsub.d $vr0, $vr0, $vr2 lu12i.w $a1, 1 - ori $a1, $a1, 720 + ori $a1, $a1, 728 add.d $a1, $sp, $a1 vst $vr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 768 + ori $a1, $a1, 776 add.d $a1, $sp, $a1 vld $vr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 752 + ori $a1, $a1, 760 add.d $a1, $sp, $a1 vld $vr3, $a1, 0 vfsub.d $vr1, $vr1, $vr2 lu12i.w $a1, 1 - ori $a1, $a1, 736 + ori $a1, $a1, 744 add.d $a1, $sp, $a1 vst $vr1, $a1, 0 vfsub.d $vr0, $vr0, $vr2 vfsub.d $vr1, $vr3, $vr2 lu12i.w $a1, 1 - ori $a1, $a1, 784 + ori $a1, $a1, 792 add.d $a1, $sp, $a1 vld $vr3, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 752 + ori $a1, $a1, 760 add.d $a1, $sp, $a1 vst $vr1, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 768 + ori $a1, $a1, 776 add.d $a1, $sp, $a1 vst $vr0, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 800 + ori $a1, $a1, 808 add.d $a1, $sp, $a1 vld $vr0, $a1, 0 vfsub.d $vr1, $vr3, $vr2 lu12i.w $a1, 1 - ori $a1, $a1, 784 + ori $a1, $a1, 792 add.d $a1, $sp, $a1 vst $vr1, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 832 + ori $a1, $a1, 840 add.d $a1, $sp, $a1 vld $vr1, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 816 + ori $a1, $a1, 824 add.d $a1, $sp, $a1 vld $vr3, $a1, 0 vfsub.d $vr0, $vr0, $vr2 lu12i.w $a1, 1 - ori $a1, $a1, 800 + ori $a1, $a1, 808 add.d $a1, $sp, $a1 vst $vr0, $a1, 0 vfsub.d $vr0, $vr1, $vr2 vfsub.d $vr1, $vr3, $vr2 lu12i.w $a1, 1 - ori $a1, $a1, 864 + ori $a1, $a1, 872 add.d $a1, $sp, $a1 vld $vr3, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 816 + ori $a1, $a1, 824 add.d $a1, $sp, $a1 vst $vr1, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 848 + ori $a1, $a1, 856 add.d $a1, $sp, $a1 vld $vr1, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 832 + ori $a1, $a1, 840 add.d $a1, $sp, $a1 vst $vr0, $a1, 0 vfsub.d $vr0, $vr3, $vr2 lu12i.w $a1, 1 - ori $a1, $a1, 880 + ori $a1, $a1, 888 add.d $a1, $sp, $a1 fld.d $fa3, $a1, 0 vfsub.d $vr1, $vr1, $vr2 lu12i.w $a1, 1 - ori $a1, $a1, 848 + ori $a1, $a1, 856 add.d $a1, $sp, $a1 vst $vr1, $a1, 0 lu12i.w $a1, 1 - ori $a1, $a1, 864 + ori $a1, $a1, 872 add.d $a1, $sp, $a1 vst $vr0, $a1, 0 fsub.d $fa0, $fa3, $fa4 lu12i.w $a1, 1 - ori $a1, $a1, 880 + ori $a1, $a1, 888 add.d $a1, $sp, $a1 fst.d $fa0, $a1, 0 st.w $zero, $s1, 68 ld.d $a1, $sp, 120 # 8-byte Folded Reload add.d $s5, $a1, $a0 lu12i.w $a0, 1 - ori $a0, $a0, 720 + ori $a0, $a0, 728 add.d $a0, $sp, $a0 move $a1, $s1 move $a2, $s5 @@ -1987,7 +1981,7 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new ori $a0, $zero, 1 st.w $a0, $s1, 68 lu12i.w $a0, 1 - ori $a0, $a0, 720 + ori $a0, $a0, 728 add.d $a0, $sp, $a0 move $a1, $s1 move $a2, $s5 @@ -1996,8 +1990,9 @@ VBR_iteration_loop_new: # @VBR_iteration_loop_new b .LBB4_8 .LBB4_24: # %._crit_edge150 lu12i.w $a0, 1 - ori $a0, $a0, 1232 + ori $a0, $a0, 1248 add.d $sp, $sp, $a0 + fld.d $fs2, $sp, 1920 # 8-byte Folded Reload fld.d $fs1, $sp, 1928 # 8-byte Folded Reload fld.d $fs0, $sp, 1936 # 8-byte Folded Reload ld.d $s8, $sp, 1944 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z01.s b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z01.s index cc1b9233..5b1411fa 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z01.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z01.s @@ -1,14 +1,6 @@ .file "z01.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI0_0: - .word 0x42f00000 # float 120 -.LCPI0_1: - .word 0x44b40000 # float 1440 -.LCPI0_2: - .word 0x440dc000 # float 567 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -427,8 +419,7 @@ main: # @main ori $a0, $zero, 2336 add.d $a0, $sp, $a0 fld.s $fa0, $a0, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_2) + lu12i.w $a0, 278748 b .LBB0_177 .LBB0_34: # in Loop: Header=BB0_10 Depth=1 st.d $s2, $sp, 152 # 8-byte Folded Spill @@ -1454,16 +1445,15 @@ main: # @main ori $a0, $zero, 2336 add.d $a0, $sp, $a0 fld.s $fa0, $a0, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 274176 b .LBB0_177 .LBB0_176: # in Loop: Header=BB0_10 Depth=1 ori $a0, $zero, 2336 add.d $a0, $sp, $a0 fld.s $fa0, $a0, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_1) + lu12i.w $a0, 281408 .LBB0_177: # in Loop: Header=BB0_10 Depth=1 + movgr2fr.w $fa1, $a0 fmul.s $fa0, $fa0, $fa1 b .LBB0_179 .LBB0_178: # in Loop: Header=BB0_10 Depth=1 @@ -1490,20 +1480,18 @@ main: # @main jr $a0 .LBB0_181: # in Loop: Header=BB0_10 Depth=1 fld.s $fa0, $sp, 276 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_2) + lu12i.w $a0, 278748 b .LBB0_184 .LBB0_182: # in Loop: Header=BB0_10 Depth=1 fld.s $fa0, $sp, 276 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 274176 b .LBB0_184 .LBB0_183: # in Loop: Header=BB0_10 Depth=1 fld.s $fa0, $sp, 276 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_1) + lu12i.w $a0, 281408 .LBB0_184: # %GetArg.exit205 # in Loop: Header=BB0_10 Depth=1 + movgr2fr.w $fa1, $a0 fmul.s $fa0, $fa0, $fa1 b .LBB0_186 .LBB0_185: # in Loop: Header=BB0_10 Depth=1 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z08.s b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z08.s index 412720f0..276ad37c 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z08.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z08.s @@ -761,12 +761,7 @@ ReplaceWithTidy: # @ReplaceWithTidy .Lfunc_end0: .size ReplaceWithTidy, .Lfunc_end0-ReplaceWithTidy # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function Manifest -.LCPI1_0: - .word 0x43000000 # float 128 - .text - .globl Manifest + .globl Manifest # -- Begin function Manifest .p2align 5 .type Manifest,@function Manifest: # @Manifest @@ -4943,8 +4938,8 @@ Manifest: # @Manifest # %bb.391: pcaddu18i $ra, %call36(GetScaleFactor) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI1_0) + lu12i.w $a0, 274432 + movgr2fr.w $fs0, $a0 fmul.s $fa0, $fa0, $fs0 ftintrz.w.s $fa0, $fa0 movfr2gr.s $a0, $fa0 @@ -4965,8 +4960,8 @@ Manifest: # @Manifest .LBB1_394: # %.thread2438 pcaddu18i $ra, %call36(GetScaleFactor) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_0) + lu12i.w $a0, 274432 + movgr2fr.w $fa1, $a0 fmul.s $fa0, $fa0, $fa1 ftintrz.w.s $fa0, $fa0 movfr2gr.s $a0, $fa0 @@ -10945,16 +10940,7 @@ insert_split: # @insert_split .Lfunc_end3: .size insert_split, .Lfunc_end3-insert_split # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function GetScaleFactor -.LCPI4_0: - .dword 0x3f847ae147ae147b # double 0.01 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI4_1: - .word 0x42c80000 # float 100 - .text - .p2align 5 + .p2align 5 # -- Begin function GetScaleFactor .type GetScaleFactor,@function GetScaleFactor: # @GetScaleFactor # %bb.0: @@ -10979,10 +10965,13 @@ GetScaleFactor: # @GetScaleFactor bne $a0, $a1, .LBB4_5 # %bb.2: fld.s $fa0, $sp, 4 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_0) - fcvt.d.s $fa2, $fa0 - fcmp.cule.d $fcc0, $fa1, $fa2 + fcvt.d.s $fa1, $fa0 + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fcmp.cule.d $fcc0, $fa2, $fa1 bcnez $fcc0, .LBB4_9 # %bb.3: pcalau12i $a0, %pc_hi20(.L.str.67) @@ -11021,8 +11010,8 @@ GetScaleFactor: # @GetScaleFactor addi.d $sp, $sp, 32 ret .LBB4_9: - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.s $fa1, $a0, %pc_lo12(.LCPI4_1) + lu12i.w $a0, 273536 + movgr2fr.w $fa1, $a0 fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB4_8 # %bb.10: diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z13.s b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z13.s index c2299b09..a08ac73e 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z13.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z13.s @@ -1,14 +1,6 @@ .file "z13.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function BreakObject -.LCPI0_0: - .word 0x3c000000 # float 0.0078125 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0x3ff199999999999a # double 1.1000000000000001 .text - .globl BreakObject + .globl BreakObject # -- Begin function BreakObject .p2align 5 .type BreakObject,@function BreakObject: # @BreakObject @@ -204,14 +196,14 @@ BreakObject: # @BreakObject ld.bu $a0, $s0, 32 beqz $a0, .LBB0_27 # %bb.28: - addi.d $a4, $s0, 32 ld.w $a0, $s0, 64 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI0_0) + addi.d $a4, $s0, 32 addi.d $a5, $fp, 64 + movgr2fr.w $fa0, $a0 + ffint.s.w $fa0, $fa0 + lu12i.w $a0, 245760 movgr2fr.w $fa1, $a0 - ffint.s.w $fa1, $fa1 - fmul.s $fa0, $fa1, $fa0 + fmul.s $fa0, $fa0, $fa1 fcvt.d.s $fa0, $fa0 movfr2gr.d $a6, $fa0 pcalau12i $a0, %pc_hi20(.L.str.6) @@ -250,14 +242,14 @@ BreakObject: # @BreakObject masknez $a1, $a1, $a0 pcalau12i $a2, %pc_hi20(.L.str.9) addi.d $a2, $a2, %pc_lo12(.L.str.9) + ld.w $a3, $fp, 64 maskeqz $a0, $a2, $a0 - ld.w $a2, $fp, 64 - pcalau12i $a3, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a3, %pc_lo12(.LCPI0_0) or $a5, $a0, $a1 - movgr2fr.w $fa1, $a2 - ffint.s.w $fa1, $fa1 - fmul.s $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a3 + ffint.s.w $fa0, $fa0 + lu12i.w $a0, 245760 + movgr2fr.w $fa1, $a0 + fmul.s $fa0, $fa0, $fa1 fcvt.d.s $fa0, $fa0 movfr2gr.d $a6, $fa0 pcalau12i $a0, %pc_hi20(.L.str.8) @@ -324,12 +316,12 @@ BreakObject: # @BreakObject beqz $a0, .LBB0_41 # %bb.42: ld.w $a0, $fp, 64 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI0_0) addi.d $a4, $fp, 32 + movgr2fr.w $fa0, $a0 + ffint.s.w $fa0, $fa0 + lu12i.w $a0, 245760 movgr2fr.w $fa1, $a0 - ffint.s.w $fa1, $fa1 - fmul.s $fa0, $fa1, $fa0 + fmul.s $fa0, $fa0, $fa1 fcvt.d.s $fa0, $fa0 movfr2gr.d $a6, $fa0 pcalau12i $a0, %pc_hi20(.L.str.2) @@ -741,11 +733,14 @@ BreakObject: # @BreakObject move $a1, $fp pcaddu18i $ra, %call36(FontSize) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_1) - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, 104857 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 ftintrz.l.d $fa0, $fa0 ld.hu $a0, $s1, 68 movfr2gr.d $a1, $fa0 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z15.s b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z15.s index 06a733e4..b69030db 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z15.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z15.s @@ -72,12 +72,7 @@ EnlargeToConstraint: # @EnlargeToConstraint .Lfunc_end2: .size EnlargeToConstraint, .Lfunc_end2-EnlargeToConstraint # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function ScaleToConstraint -.LCPI3_0: - .word 0x43000000 # float 128 - .text - .globl ScaleToConstraint + .globl ScaleToConstraint # -- Begin function ScaleToConstraint .p2align 5 .type ScaleToConstraint,@function ScaleToConstraint: # @ScaleToConstraint @@ -119,8 +114,8 @@ ScaleToConstraint: # @ScaleToConstraint fcmp.clt.s $fcc0, $fa0, $fa1 fsel $fa0, $fa1, $fa0, $fcc0 .LBB3_5: - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI3_0) + lu12i.w $a0, 274432 + movgr2fr.w $fa1, $a0 fmul.s $fa0, $fa0, $fa1 ftintrz.w.s $fa0, $fa0 movfr2gr.s $a0, $fa0 @@ -218,36 +213,7 @@ InvScaleConstraint: # @InvScaleConstraint .Lfunc_end4: .size InvScaleConstraint, .Lfunc_end4-InvScaleConstraint # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function RotateConstraint -.LCPI5_0: - .dword 0x400921fb54442d18 # double 3.1415926535897931 -.LCPI5_1: - .dword 0x40e6800000000000 # double 46080 -.LCPI5_2: - .dword 0xb690000000000000 # double -7.0064923216240854E-46 -.LCPI5_3: - .dword 0x401921fb54442d18 # double 6.2831853071795862 -.LCPI5_4: - .dword 0xc01921fb54442d18 # double -6.2831853071795862 -.LCPI5_5: - .dword 0x3ff921fb54442d18 # double 1.5707963267948966 -.LCPI5_6: - .dword 0xbff921fb54442d18 # double -1.5707963267948966 -.LCPI5_7: - .dword 0x4012d97c7f3321d2 # double 4.7123889803846897 -.LCPI5_8: - .dword 0xc00921fb54442d18 # double -3.1415926535897931 -.LCPI5_9: - .dword 0xc012d97c7f3321d2 # double -4.7123889803846897 -.LCPI5_10: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI5_11: - .word 0x4afffffe # float 8388607 - .text - .globl RotateConstraint + .globl RotateConstraint # -- Begin function RotateConstraint .p2align 5 .type RotateConstraint,@function RotateConstraint: # @RotateConstraint @@ -275,22 +241,27 @@ RotateConstraint: # @RotateConstraint move $fp, $a0 movgr2fr.w $fa0, $a2 ffint.s.w $fa0, $fa0 - fadd.s $fa1, $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI5_0) - pcalau12i $a0, %pc_hi20(.LCPI5_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI5_1) - pcalau12i $a0, %pc_hi20(.LCPI5_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI5_2) - fcvt.d.s $fa1, $fa1 - fmul.d $fa1, $fa1, $fs1 - fdiv.d $fa1, $fa1, $fa2 + fadd.s $fa0, $fa0, $fa0 + fcvt.d.s $fa0, $fa0 + lu12i.w $a0, 345154 + ori $s4, $a0, 3352 + lu32i.d $s4, -450053 + lu52i.d $a0, $s4, 1024 + movgr2fr.d $fs1, $a0 + fmul.d $fa0, $fa0, $fs1 + ori $a0, $zero, 0 + lu32i.d $a0, 425984 + lu52i.d $a0, $a0, 1038 + movgr2fr.d $fa1, $a0 + fdiv.d $fa1, $fa0, $fa1 + lu52i.d $a0, $zero, -1175 + movgr2fr.d $fa0, $a0 fcmp.cule.d $fcc0, $fa0, $fa1 fcvt.s.d $fs0, $fa1 bcnez $fcc0, .LBB5_3 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI5_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI5_3) + lu52i.d $a0, $s4, 1025 + movgr2fr.d $fa1, $a0 .p2align 4, , 16 .LBB5_2: # %.lr.ph # =>This Inner Loop Header: Depth=1 @@ -300,14 +271,14 @@ RotateConstraint: # @RotateConstraint fcvt.s.d $fs0, $fa2 bcnez $fcc0, .LBB5_2 .LBB5_3: # %.preheader - pcalau12i $a0, %pc_hi20(.LCPI5_3) - fld.d $fa0, $a0, %pc_lo12(.LCPI5_3) fcvt.d.s $fs2, $fs0 + lu52i.d $a0, $s4, 1025 + movgr2fr.d $fa0, $a0 fcmp.cult.d $fcc0, $fs2, $fa0 bcnez $fcc0, .LBB5_6 # %bb.4: - pcalau12i $a0, %pc_hi20(.LCPI5_4) - fld.d $fa1, $a0, %pc_lo12(.LCPI5_4) + lu52i.d $a0, $s4, -1023 + movgr2fr.d $fa1, $a0 .p2align 4, , 16 .LBB5_5: # %.lr.ph99 # =>This Inner Loop Header: Depth=1 @@ -324,21 +295,24 @@ RotateConstraint: # @RotateConstraint fcmp.cle.d $fcc0, $fs2, $fa0 bceqz $fcc0, .LBB5_12 # %bb.8: - pcalau12i $a0, %pc_hi20(.LCPI5_5) - fld.d $fa0, $a0, %pc_lo12(.LCPI5_5) + lu52i.d $a0, $s4, 1023 + movgr2fr.d $fa0, $a0 fcmp.cult.d $fcc0, $fa0, $fs2 bceqz $fcc0, .LBB5_13 .LBB5_9: fcmp.cult.d $fcc0, $fs1, $fs2 bceqz $fcc0, .LBB5_14 # %bb.10: - pcalau12i $a0, %pc_hi20(.LCPI5_7) - fld.d $fa1, $a0, %pc_lo12(.LCPI5_7) + lu12i.w $a0, 521010 + ori $a0, $a0, 466 + lu32i.d $a0, 186748 + lu52i.d $a1, $a0, 1025 + movgr2fr.d $fa1, $a1 fcmp.cult.d $fcc0, $fa1, $fs2 bceqz $fcc0, .LBB5_15 # %bb.11: - pcalau12i $a0, %pc_hi20(.LCPI5_9) - fld.d $fa1, $a0, %pc_lo12(.LCPI5_9) + lu52i.d $a0, $a0, -1023 + movgr2fr.d $fa1, $a0 fadd.d $fa1, $fs2, $fa1 fcvt.s.d $fs0, $fa1 addi.d $a0, $s2, 4 @@ -361,8 +335,8 @@ RotateConstraint: # @RotateConstraint move $a3, $zero pcaddu18i $ra, %call36(Error) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_5) - fld.d $fa0, $a0, %pc_lo12(.LCPI5_5) + lu52i.d $a0, $s4, 1023 + movgr2fr.d $fa0, $a0 fcmp.cult.d $fcc0, $fa0, $fs2 bcnez $fcc0, .LBB5_9 .LBB5_13: @@ -374,8 +348,8 @@ RotateConstraint: # @RotateConstraint move $a3, $s3 b .LBB5_16 .LBB5_14: - pcalau12i $a0, %pc_hi20(.LCPI5_6) - fld.d $fa1, $a0, %pc_lo12(.LCPI5_6) + lu52i.d $a0, $s4, -1025 + movgr2fr.d $fa1, $a0 fadd.d $fa1, $fs2, $fa1 fcvt.s.d $fs0, $fa1 addi.d $a3, $s2, 8 @@ -385,8 +359,8 @@ RotateConstraint: # @RotateConstraint move $a2, $s3 b .LBB5_16 .LBB5_15: - pcalau12i $a0, %pc_hi20(.LCPI5_8) - fld.d $fa1, $a0, %pc_lo12(.LCPI5_8) + lu52i.d $a0, $s4, -1024 + movgr2fr.d $fa1, $a0 fadd.d $fa1, $fs2, $fa1 fcvt.s.d $fs0, $fa1 addi.d $a3, $s3, 8 @@ -408,7 +382,7 @@ RotateConstraint: # @RotateConstraint fcvt.s.d $fs1, $fa0 beqz $s1, .LBB5_21 # %bb.17: - st.d $s4, $sp, 8 # 8-byte Folded Spill + move $s7, $s4 move $s4, $s3 ld.w $s3, $s0, 48 ld.w $s2, $s0, 56 @@ -421,13 +395,16 @@ RotateConstraint: # @RotateConstraint pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 fabs.s $fa1, $fs2 - pcalau12i $a0, %pc_hi20(.LCPI5_10) - fld.d $fs1, $a0, %pc_lo12(.LCPI5_10) fcvt.d.s $fa1, $fa1 lu12i.w $a0, 2047 ori $s1, $a0, 4095 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs1, $a0 fcmp.clt.d $fcc0, $fa1, $fs1 - pcalau12i $s7, %pc_hi20(.LCPI5_11) + lu12i.w $a3, 307199 move $a0, $s1 move $a1, $s1 move $a2, $s1 @@ -438,10 +415,11 @@ RotateConstraint: # @RotateConstraint ffint.s.w $fa1, $fa1 movgr2fr.w $fa2, $s3 ffint.s.w $fa2, $fa2 - fld.s $fa3, $s7, %pc_lo12(.LCPI5_11) fneg.s $fa2, $fa2 fmadd.s $fa1, $fa2, $fa0, $fa1 fdiv.s $fa1, $fa1, $fs2 + ori $a0, $a3, 4094 + movgr2fr.w $fa3, $a0 fcmp.cle.s $fcc0, $fa3, $fa1 fsel $fa1, $fa1, $fa3, $fcc0 ftintrz.w.s $fa1, $fa1 @@ -467,6 +445,7 @@ RotateConstraint: # @RotateConstraint ftintrz.w.s $fa0, $fa0 movfr2gr.s $a2, $fa0 .LBB5_19: # %SemiRotateConstraint.exit70 + move $s3, $a3 st.w $a0, $fp, 0 st.w $a1, $fp, 4 st.w $a2, $fp, 8 @@ -492,10 +471,11 @@ RotateConstraint: # @RotateConstraint ffint.s.w $fa1, $fa1 movgr2fr.w $fa2, $s2 ffint.s.w $fa2, $fa2 - fld.s $fa3, $s7, %pc_lo12(.LCPI5_11) fneg.s $fa2, $fa2 fmadd.s $fa1, $fa2, $fa0, $fa1 fdiv.s $fa1, $fa1, $fs2 + ori $a0, $s3, 4094 + movgr2fr.w $fa3, $a0 fcmp.cle.s $fcc0, $fa3, $fa1 fsel $fa1, $fa1, $fa3, $fcc0 ftintrz.w.s $fa1, $fa1 @@ -512,8 +492,7 @@ RotateConstraint: # @RotateConstraint fsel $fa1, $fa1, $fa3, $fcc0 ftintrz.w.s $fa1, $fa1 movfr2gr.s $a0, $fa1 - ld.d $a1, $sp, 8 # 8-byte Folded Reload - movgr2fr.w $fa1, $a1 + movgr2fr.w $fa1, $s7 ffint.s.w $fa1, $fa1 fmadd.s $fa0, $fa2, $fa0, $fa1 fdiv.s $fa0, $fa0, $fs2 @@ -529,11 +508,14 @@ RotateConstraint: # @RotateConstraint pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 fabs.s $fa1, $fs3 - pcalau12i $a0, %pc_hi20(.LCPI5_10) - fld.d $fs2, $a0, %pc_lo12(.LCPI5_10) fcvt.d.s $fa1, $fa1 lu12i.w $a0, 2047 ori $s1, $a0, 4095 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs2, $a0 fcmp.clt.d $fcc0, $fa1, $fs2 move $a0, $s1 move $a1, $s1 @@ -545,11 +527,12 @@ RotateConstraint: # @RotateConstraint ffint.s.w $fa1, $fa1 movgr2fr.w $fa2, $s2 ffint.s.w $fa2, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI5_11) - fld.s $fa3, $a0, %pc_lo12(.LCPI5_11) fneg.s $fa2, $fa2 fmadd.s $fa1, $fa2, $fa0, $fa1 fdiv.s $fa1, $fa1, $fs3 + lu12i.w $a0, 307199 + ori $a0, $a0, 4094 + movgr2fr.w $fa3, $a0 fcmp.cle.s $fcc0, $fa3, $fa1 fsel $fa1, $fa1, $fa3, $fcc0 ftintrz.w.s $fa1, $fa1 @@ -600,11 +583,12 @@ RotateConstraint: # @RotateConstraint ffint.s.w $fa1, $fa1 movgr2fr.w $fa2, $s2 ffint.s.w $fa2, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI5_11) - fld.s $fa3, $a0, %pc_lo12(.LCPI5_11) fneg.s $fa2, $fa2 fmadd.s $fa1, $fa2, $fa0, $fa1 fdiv.s $fa1, $fa1, $fs1 + lu12i.w $a0, 307199 + ori $a0, $a0, 4094 + movgr2fr.w $fa3, $a0 fcmp.cle.s $fcc0, $fa3, $fa1 fsel $fa1, $fa1, $fa3, $fcc0 ftintrz.w.s $fa1, $fa1 @@ -670,12 +654,7 @@ RotateConstraint: # @RotateConstraint .Lfunc_end5: .size RotateConstraint, .Lfunc_end5-RotateConstraint # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function InsertScale -.LCPI6_0: - .word 0x43000000 # float 128 - .text - .globl InsertScale + .globl InsertScale # -- Begin function InsertScale .p2align 5 .type InsertScale,@function InsertScale: # @InsertScale @@ -728,8 +707,8 @@ InsertScale: # @InsertScale fcmp.clt.s $fcc0, $fa0, $fa1 fsel $fa0, $fa1, $fa0, $fcc0 .LBB6_5: # %ScaleToConstraint.exit - pcalau12i $a0, %pc_hi20(.LCPI6_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI6_0) + lu12i.w $a0, 274432 + movgr2fr.w $fa1, $a0 fmul.s $fa0, $fa0, $fa1 ftintrz.w.s $fa0, $fa0 movfr2gr.s $s4, $fa0 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z17.s b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z17.s index 4d13d01a..7c8a5eed 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z17.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z17.s @@ -1,26 +1,6 @@ .file "z17.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function GetGap -.LCPI0_0: - .word 0x43340000 # float 180 -.LCPI0_1: - .word 0xc3b40000 # float -360 -.LCPI0_2: - .word 0xc3340000 # float -180 -.LCPI0_3: - .word 0x43b40000 # float 360 -.LCPI0_4: - .word 0x43000000 # float 128 -.LCPI0_5: - .word 0x45800000 # float 4096 -.LCPI0_6: - .word 0x42f00000 # float 120 -.LCPI0_7: - .word 0x44b40000 # float 1440 -.LCPI0_8: - .word 0x440dc000 # float 567 .text - .globl GetGap + .globl GetGap # -- Begin function GetGap .p2align 5 .type GetGap,@function GetGap: # @GetGap @@ -51,7 +31,7 @@ GetGap: # @GetGap bgeu $a4, $a5, .LBB0_5 # %bb.1: ld.bu $a4, $a0, 64 - beqz $a4, .LBB0_66 + beqz $a4, .LBB0_67 # %bb.2: move $s4, $a1 move $s3, $a2 @@ -127,11 +107,11 @@ GetGap: # @GetGap jr $a0 .LBB0_14: fld.s $fa0, $sp, 20 - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_5) + lu12i.w $a0, 284672 + movgr2fr.w $fa1, $a0 fmul.s $fa0, $fa0, $fa1 ori $a0, $zero, 2048 - b .LBB0_45 + b .LBB0_46 .LBB0_15: pcalau12i $a0, %pc_hi20(.L.str.2) addi.d $a2, $a0, %pc_lo12(.L.str.2) @@ -156,31 +136,29 @@ GetGap: # @GetGap move $a4, $fp pcaddu18i $ra, %call36(Error) jirl $ra, $ra, 0 - b .LBB0_66 + b .LBB0_67 .LBB0_16: fld.s $fa0, $sp, 20 - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_5) + lu12i.w $a0, 284672 + movgr2fr.w $fa1, $a0 fmul.s $fa0, $fa0, $fa1 lu12i.w $a0, 1 ori $a0, $a0, 1024 - b .LBB0_45 + b .LBB0_46 .LBB0_17: fld.s $fa0, $sp, 20 - pcalau12i $a0, %pc_hi20(.LCPI0_7) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_7) - b .LBB0_43 + lu12i.w $a0, 281408 + b .LBB0_37 .LBB0_18: ld.h $a0, $s4, 10 - b .LBB0_42 + b .LBB0_43 .LBB0_19: fld.s $fa0, $sp, 20 - pcalau12i $a0, %pc_hi20(.LCPI0_6) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_6) - b .LBB0_43 + lu12i.w $a0, 274176 + b .LBB0_37 .LBB0_20: ld.h $a0, $s4, 8 - b .LBB0_42 + b .LBB0_43 .LBB0_21: fld.s $fs0, $sp, 20 ld.wu $a0, $s4, 12 @@ -192,13 +170,13 @@ GetGap: # @GetGap movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 fmul.s $fa0, $fs0, $fa0 - b .LBB0_44 + b .LBB0_45 .LBB0_22: pcalau12i $a0, %pc_hi20(.L.str.9) addi.d $a2, $a0, %pc_lo12(.L.str.9) ori $a0, $zero, 17 ori $a1, $zero, 4 - b .LBB0_65 + b .LBB0_66 .LBB0_23: ld.w $a0, $s5, 0 fld.s $fa0, $sp, 20 @@ -208,15 +186,16 @@ GetGap: # @GetGap fneg.s $fa0, $fa0 fst.s $fa0, $sp, 20 .LBB0_25: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_0) ori $a0, $zero, 158 + lu12i.w $a2, 275264 + movgr2fr.w $fa1, $a2 fcmp.cule.s $fcc0, $fa0, $fa1 st.w $a0, $s5, 0 - bcnez $fcc0, .LBB0_32 + bcnez $fcc0, .LBB0_29 # %bb.26: # %.lr.ph.preheader - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.s $fa2, $a0, %pc_lo12(.LCPI0_1) + lu12i.w $a0, -246976 + lu32i.d $a0, 0 + movgr2fr.w $fa2, $a0 .p2align 4, , 16 .LBB0_27: # %.lr.ph # =>This Inner Loop Header: Depth=1 @@ -225,15 +204,28 @@ GetGap: # @GetGap bcnez $fcc0, .LBB0_27 # %bb.28: # %.thread-pre-split_crit_edge fst.s $fa0, $sp, 20 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.s $fa2, $a0, %pc_lo12(.LCPI0_2) +.LBB0_29: # %thread-pre-split + lu12i.w $a0, -249024 + lu32i.d $a0, 0 + movgr2fr.w $fa2, $a0 fcmp.cule.s $fcc0, $fa2, $fa0 - bceqz $fcc0, .LBB0_33 -.LBB0_29: + bcnez $fcc0, .LBB0_33 +# %bb.30: # %.lr.ph95.preheader + lu12i.w $a0, 277312 + movgr2fr.w $fa3, $a0 + .p2align 4, , 16 +.LBB0_31: # %.lr.ph95 + # =>This Inner Loop Header: Depth=1 + fadd.s $fa0, $fa0, $fa3 + fcmp.clt.s $fcc0, $fa0, $fa2 + bcnez $fcc0, .LBB0_31 +# %bb.32: # %._crit_edge + fst.s $fa0, $sp, 20 +.LBB0_33: fabs.s $fa2, $fa0 fcmp.cle.s $fcc0, $fa2, $fa1 - bcnez $fcc0, .LBB0_31 -.LBB0_30: + bcnez $fcc0, .LBB0_35 +# %bb.34: pcalau12i $a0, %got_pc_hi20(no_fpos) ld.d $a0, $a0, %got_pc_lo12(no_fpos) ld.d $a4, $a0, 0 @@ -248,46 +240,27 @@ GetGap: # @GetGap jirl $ra, $ra, 0 move $a1, $s3 fld.s $fa0, $sp, 20 -.LBB0_31: - pcalau12i $a0, %pc_hi20(.LCPI0_4) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_4) +.LBB0_35: + lu12i.w $a0, 274432 + movgr2fr.w $fa1, $a0 fmul.s $fa0, $fa0, $fa1 lu12i.w $a0, 1 - b .LBB0_45 -.LBB0_32: # %thread-pre-split - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.s $fa2, $a0, %pc_lo12(.LCPI0_2) - fcmp.cule.s $fcc0, $fa2, $fa0 - bcnez $fcc0, .LBB0_29 -.LBB0_33: # %.lr.ph95.preheader - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.s $fa3, $a0, %pc_lo12(.LCPI0_3) - .p2align 4, , 16 -.LBB0_34: # %.lr.ph95 - # =>This Inner Loop Header: Depth=1 - fadd.s $fa0, $fa0, $fa3 - fcmp.clt.s $fcc0, $fa0, $fa2 - bcnez $fcc0, .LBB0_34 -# %bb.35: # %._crit_edge - fst.s $fa0, $sp, 20 - fabs.s $fa2, $fa0 - fcmp.cle.s $fcc0, $fa2, $fa1 - bceqz $fcc0, .LBB0_30 - b .LBB0_31 + b .LBB0_46 .LBB0_36: fld.s $fa0, $sp, 20 - pcalau12i $a0, %pc_hi20(.LCPI0_8) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_8) - b .LBB0_43 -.LBB0_37: + lu12i.w $a0, 278748 +.LBB0_37: # %.thread + movgr2fr.w $fa1, $a0 + b .LBB0_44 +.LBB0_38: fld.s $fa0, $sp, 20 vldi $vr1, -1228 fmul.s $fa0, $fa0, $fa1 - b .LBB0_44 -.LBB0_38: + b .LBB0_45 +.LBB0_39: fld.s $fa0, $sp, 20 - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_5) + lu12i.w $a0, 284672 + movgr2fr.w $fa1, $a0 ld.hu $a0, $a1, 0 fmul.s $fa0, $fa0, $fa1 lu12i.w $a2, 14 @@ -299,8 +272,8 @@ GetGap: # @GetGap lu12i.w $s2, 1 ori $a3, $s2, 1 st.h $a2, $a1, 0 - blt $a0, $a3, .LBB0_46 -# %bb.39: + blt $a0, $a3, .LBB0_47 +# %bb.40: fld.s $fa0, $sp, 20 fcvt.d.s $fa0, $fa0 movfr2gr.d $a5, $fa0 @@ -314,21 +287,21 @@ GetGap: # @GetGap jirl $ra, $ra, 0 move $a1, $s3 move $a0, $s2 - b .LBB0_46 -.LBB0_40: - ld.h $a0, $s4, 2 - b .LBB0_42 + b .LBB0_47 .LBB0_41: + ld.h $a0, $s4, 2 + b .LBB0_43 +.LBB0_42: ld.h $a0, $s4, 6 -.LBB0_42: # %.thread +.LBB0_43: # %.thread fld.s $fa0, $sp, 20 movgr2fr.w $fa1, $a0 ffint.s.w $fa1, $fa1 -.LBB0_43: # %.thread - fmul.s $fa0, $fa0, $fa1 .LBB0_44: # %.thread - ori $a0, $zero, 1024 + fmul.s $fa0, $fa0, $fa1 .LBB0_45: # %.thread + ori $a0, $zero, 1024 +.LBB0_46: # %.thread ld.hu $a2, $a1, 0 lu12i.w $a3, 14 ori $a3, $a3, 1023 @@ -337,70 +310,70 @@ GetGap: # @GetGap st.h $a0, $a1, 0 ftintrz.w.s $fa0, $fa0 movfr2gr.s $a0, $fa0 -.LBB0_46: +.LBB0_47: st.h $a0, $a1, 2 ld.bu $a0, $s1, 0 addi.d $a2, $a0, -101 ori $a3, $zero, 19 - bltu $a3, $a2, .LBB0_49 -# %bb.47: + bltu $a3, $a2, .LBB0_50 +# %bb.48: slli.d $a0, $a2, 2 pcalau12i $a2, %pc_hi20(.LJTI0_1) addi.d $a2, $a2, %pc_lo12(.LJTI0_1) ldx.w $a0, $a2, $a0 add.d $a0, $a2, $a0 jr $a0 -.LBB0_48: +.LBB0_49: ld.hu $a0, $a1, 0 ori $a2, $zero, 1 - b .LBB0_57 -.LBB0_49: - bnez $a0, .LBB0_51 + b .LBB0_58 .LBB0_50: + bnez $a0, .LBB0_52 +.LBB0_51: ld.hu $a0, $a1, 0 ori $a2, $zero, 1 bstrins.d $a0, $a2, 63, 13 st.h $a0, $a1, 0 - b .LBB0_58 -.LBB0_51: + b .LBB0_59 +.LBB0_52: pcalau12i $a0, %pc_hi20(.L.str.11) addi.d $a2, $a0, %pc_lo12(.L.str.11) ori $a0, $zero, 17 ori $a1, $zero, 7 - b .LBB0_65 -.LBB0_52: + b .LBB0_66 +.LBB0_53: ld.hu $a0, $a1, 0 ori $a2, $zero, 3 - b .LBB0_57 -.LBB0_53: + b .LBB0_58 +.LBB0_54: ld.hu $a0, $a1, 0 ori $a2, $zero, 2 - b .LBB0_57 -.LBB0_54: + b .LBB0_58 +.LBB0_55: ld.hu $a0, $a1, 0 ori $a2, $zero, 5 - b .LBB0_57 -.LBB0_55: + b .LBB0_58 +.LBB0_56: ld.hu $a0, $a1, 0 ori $a2, $zero, 4 - b .LBB0_57 -.LBB0_56: + b .LBB0_58 +.LBB0_57: ld.hu $a0, $a1, 0 ori $a2, $zero, 6 -.LBB0_57: +.LBB0_58: bstrins.d $a0, $a2, 63, 13 st.h $a0, $a1, 0 addi.d $s1, $s1, 1 -.LBB0_58: +.LBB0_59: ld.bu $a2, $s1, 0 ori $a3, $zero, 117 - bne $a2, $a3, .LBB0_63 -# %bb.59: + bne $a2, $a3, .LBB0_64 +# %bb.60: lu12i.w $a2, 14 and $a2, $a0, $a2 lu12i.w $a3, 4 - bne $a2, $a3, .LBB0_61 -# %bb.60: + bne $a2, $a3, .LBB0_62 +# %bb.61: pcalau12i $a0, %pc_hi20(.L.str.12) addi.d $a2, $a0, %pc_lo12(.L.str.12) ori $a0, $zero, 17 @@ -410,26 +383,26 @@ GetGap: # @GetGap move $a5, $s0 pcaddu18i $ra, %call36(Error) jirl $ra, $ra, 0 - b .LBB0_62 -.LBB0_61: + b .LBB0_63 +.LBB0_62: ori $a0, $a0, 128 st.h $a0, $a1, 0 -.LBB0_62: - ld.bu $a2, $s1, 1 .LBB0_63: - beqz $a2, .LBB0_66 -# %bb.64: + ld.bu $a2, $s1, 1 +.LBB0_64: + beqz $a2, .LBB0_67 +# %bb.65: pcalau12i $a0, %pc_hi20(.L.str.13) addi.d $a2, $a0, %pc_lo12(.L.str.13) ori $a0, $zero, 17 ori $a1, $zero, 8 -.LBB0_65: +.LBB0_66: ori $a3, $zero, 2 move $a4, $fp move $a5, $s0 pcaddu18i $ra, %call36(Error) jirl $ra, $ra, 0 -.LBB0_66: +.LBB0_67: fld.d $fs0, $sp, 24 # 8-byte Folded Reload ld.d $s5, $sp, 32 # 8-byte Folded Reload ld.d $s4, $sp, 40 # 8-byte Folded Reload @@ -460,45 +433,41 @@ GetGap: # @GetGap .word .LBB0_19-.LJTI0_0 .word .LBB0_22-.LJTI0_0 .word .LBB0_22-.LJTI0_0 - .word .LBB0_37-.LJTI0_0 - .word .LBB0_22-.LJTI0_0 .word .LBB0_38-.LJTI0_0 - .word .LBB0_41-.LJTI0_0 + .word .LBB0_22-.LJTI0_0 + .word .LBB0_39-.LJTI0_0 + .word .LBB0_42-.LJTI0_0 .word .LBB0_22-.LJTI0_0 .word .LBB0_22-.LJTI0_0 - .word .LBB0_40-.LJTI0_0 + .word .LBB0_41-.LJTI0_0 .word .LBB0_16-.LJTI0_0 .word .LBB0_22-.LJTI0_0 .word .LBB0_20-.LJTI0_0 .word .LBB0_18-.LJTI0_0 .LJTI0_1: - .word .LBB0_48-.LJTI0_1 - .word .LBB0_51-.LJTI0_1 - .word .LBB0_51-.LJTI0_1 - .word .LBB0_53-.LJTI0_1 - .word .LBB0_51-.LJTI0_1 - .word .LBB0_51-.LJTI0_1 + .word .LBB0_49-.LJTI0_1 + .word .LBB0_52-.LJTI0_1 + .word .LBB0_52-.LJTI0_1 .word .LBB0_54-.LJTI0_1 - .word .LBB0_51-.LJTI0_1 - .word .LBB0_51-.LJTI0_1 - .word .LBB0_51-.LJTI0_1 + .word .LBB0_52-.LJTI0_1 + .word .LBB0_52-.LJTI0_1 .word .LBB0_55-.LJTI0_1 - .word .LBB0_51-.LJTI0_1 - .word .LBB0_51-.LJTI0_1 - .word .LBB0_51-.LJTI0_1 - .word .LBB0_51-.LJTI0_1 + .word .LBB0_52-.LJTI0_1 + .word .LBB0_52-.LJTI0_1 + .word .LBB0_52-.LJTI0_1 .word .LBB0_56-.LJTI0_1 - .word .LBB0_50-.LJTI0_1 - .word .LBB0_51-.LJTI0_1 + .word .LBB0_52-.LJTI0_1 + .word .LBB0_52-.LJTI0_1 + .word .LBB0_52-.LJTI0_1 + .word .LBB0_52-.LJTI0_1 + .word .LBB0_57-.LJTI0_1 .word .LBB0_51-.LJTI0_1 .word .LBB0_52-.LJTI0_1 + .word .LBB0_52-.LJTI0_1 + .word .LBB0_53-.LJTI0_1 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function MinGap -.LCPI1_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 .text - .globl MinGap + .globl MinGap # -- Begin function MinGap .p2align 5 .type MinGap,@function MinGap: # @MinGap @@ -606,12 +575,15 @@ MinGap: # @MinGap add.w $a0, $a1, $a0 beqz $a3, .LBB1_16 # %bb.12: - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI1_0) addi.w $a1, $a2, 0 - movgr2fr.w $fa1, $a2 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a2 + ffint.d.w $fa0, $fa0 + lu12i.w $a2, -419431 + ori $a2, $a2, 2458 + lu32i.d $a2, -419431 + lu52i.d $a2, $a2, 1019 + movgr2fr.d $fa1, $a2 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a2, $fa0 add.w $a0, $a0, $a2 @@ -657,12 +629,8 @@ MinGap: # @MinGap .word .LBB1_10-.LJTI1_0 .word .LBB1_8-.LJTI1_0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ExtraGap -.LCPI2_0: - .dword 0x3feccccccccccccd # double 0.90000000000000002 .text - .globl ExtraGap + .globl ExtraGap # -- Begin function ExtraGap .p2align 5 .type ExtraGap,@function ExtraGap: # @ExtraGap @@ -732,11 +700,14 @@ ExtraGap: # @ExtraGap ld.w $a2, $a2, 36 beqz $a2, .LBB2_10 # %bb.8: - pcalau12i $a2, %pc_hi20(.LCPI2_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI2_0) - movgr2fr.w $fa1, $a4 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a4 + ffint.d.w $fa0, $fa0 + lu12i.w $a2, -209716 + ori $a2, $a2, 3277 + lu32i.d $a2, -209716 + lu52i.d $a2, $a2, 1022 + movgr2fr.d $fa1, $a2 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a2, $fa0 add.d $a0, $a1, $a0 @@ -768,12 +739,8 @@ ExtraGap: # @ExtraGap .word .LBB2_4-.LJTI2_0 .word .LBB2_4-.LJTI2_0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ActualGap -.LCPI3_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 .text - .globl ActualGap + .globl ActualGap # -- Begin function ActualGap .p2align 5 .type ActualGap,@function ActualGap: # @ActualGap @@ -931,12 +898,15 @@ ActualGap: # @ActualGap add.w $a0, $a1, $a0 beqz $a3, .LBB3_16 # %bb.14: - pcalau12i $a1, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI3_0) addi.w $a1, $a2, 0 - movgr2fr.w $fa1, $a2 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a2 + ffint.d.w $fa0, $fa0 + lu12i.w $a2, -419431 + ori $a2, $a2, 2458 + lu32i.d $a2, -419431 + lu52i.d $a2, $a2, 1019 + movgr2fr.d $fa1, $a2 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a2, $fa0 add.w $a0, $a0, $a2 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z19.s b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z19.s index 772b24dd..2c9edb0c 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z19.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z19.s @@ -520,12 +520,7 @@ SearchGalley: # @SearchGalley .Lfunc_end1: .size SearchGalley, .Lfunc_end1-SearchGalley # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function AttachGalley -.LCPI2_0: - .word 0x440dc000 # float 567 - .text - .globl AttachGalley + .globl AttachGalley # -- Begin function AttachGalley .p2align 5 .type AttachGalley,@function AttachGalley: # @AttachGalley @@ -624,6 +619,8 @@ AttachGalley: # @AttachGalley ori $a1, $a1, 4095 lu32i.d $a1, 0 st.d $a1, $sp, 192 # 8-byte Folded Spill + lu12i.w $a1, 278748 + movgr2fr.w $fs0, $a1 # implicit-def: $r5 # kill: killed $r5 st.d $s1, $sp, 216 # 8-byte Folded Spill @@ -1961,8 +1958,6 @@ AttachGalley: # @AttachGalley add.w $a0, $a0, $a3 blez $a0, .LBB2_266 # %bb.163: # in Loop: Header=BB2_7 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI2_0) - fld.s $fs0, $a1, %pc_lo12(.LCPI2_0) bstrpick.d $a0, $a0, 31, 0 movgr2fr.d $fa0, $a0 ffint.s.l $fa0, $fa0 @@ -2385,8 +2380,6 @@ AttachGalley: # @AttachGalley .LBB2_215: # in Loop: Header=BB2_7 Depth=1 addi.d $a0, $fp, 56 ld.w $a0, $a0, 0 - pcalau12i $a1, %pc_hi20(.LCPI2_0) - fld.s $fs0, $a1, %pc_lo12(.LCPI2_0) add.d $a0, $a0, $a3 movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 @@ -2490,8 +2483,6 @@ AttachGalley: # @AttachGalley blt $a1, $a0, .LBB2_266 # %bb.223: # in Loop: Header=BB2_7 Depth=1 st.d $a1, $sp, 56 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI2_0) bstrpick.d $a0, $s1, 31, 0 movgr2fr.d $fa0, $a0 ffint.s.l $fa0, $fa0 @@ -2664,8 +2655,6 @@ AttachGalley: # @AttachGalley add.w $a0, $a1, $a0 blez $a0, .LBB2_266 # %bb.243: # in Loop: Header=BB2_7 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI2_0) - fld.s $fs0, $a1, %pc_lo12(.LCPI2_0) bstrpick.d $a0, $a0, 31, 0 movgr2fr.d $fa0, $a0 ffint.s.l $fa0, $fa0 @@ -2723,8 +2712,6 @@ AttachGalley: # @AttachGalley addi.d $a0, $s1, 56 ld.w $a1, $s5, 0 ld.w $a0, $a0, 0 - pcalau12i $a2, %pc_hi20(.LCPI2_0) - fld.s $fs0, $a2, %pc_lo12(.LCPI2_0) add.d $a0, $a0, $a1 movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 @@ -3744,8 +3731,6 @@ AttachGalley: # @AttachGalley addi.d $a0, $s1, 56 ld.w $a1, $s5, 0 ld.w $a0, $a0, 0 - pcalau12i $a2, %pc_hi20(.LCPI2_0) - fld.s $fs0, $a2, %pc_lo12(.LCPI2_0) add.d $a0, $a0, $a1 movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z21.s b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z21.s index 4599371f..58f2da79 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z21.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z21.s @@ -1,16 +1,6 @@ .file "z21.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function SizeGalley -.LCPI0_0: - .word 0x42c80000 # float 100 -.LCPI0_2: - .word 0x43000000 # float 128 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0x3f847ae147ae147b # double 0.01 .text - .globl SizeGalley + .globl SizeGalley # -- Begin function SizeGalley .p2align 5 .type SizeGalley,@function SizeGalley: # @SizeGalley @@ -1305,13 +1295,16 @@ SizeGalley: # @SizeGalley pcalau12i $a1, %pc_hi20(.L.str.19) addi.d $a1, $a1, %pc_lo12(.L.str.19) st.d $a1, $sp, 136 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.s $fs0, $a1, %pc_lo12(.LCPI0_0) + lu12i.w $a1, 273536 + movgr2fr.w $fs0, $a1 pcalau12i $a1, %pc_hi20(.L.str.20) addi.d $a1, $a1, %pc_lo12(.L.str.20) st.d $a1, $sp, 112 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.d $fs1, $a1, %pc_lo12(.LCPI0_1) + lu12i.w $a1, 293601 + ori $a1, $a1, 1147 + lu32i.d $a1, 293601 + lu52i.d $a1, $a1, 1016 + movgr2fr.d $fs1, $a1 pcalau12i $a1, %pc_hi20(.L.str.21) addi.d $a1, $a1, %pc_lo12(.L.str.21) st.d $a1, $sp, 32 # 8-byte Folded Spill @@ -1601,6 +1594,8 @@ SizeGalley: # @SizeGalley sltu $a0, $zero, $a0 st.d $a0, $sp, 136 # 8-byte Folded Spill ori $s4, $zero, 9 + lu12i.w $a0, 274432 + movgr2fr.w $fs0, $a0 b .LBB0_192 .p2align 4, , 16 .LBB0_189: # in Loop: Header=BB0_192 Depth=1 @@ -2097,12 +2092,10 @@ SizeGalley: # @SizeGalley ffint.s.w $fa1, $fa1 movgr2fr.w $fa2, $s6 ffint.s.w $fa2, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.s $fa3, $a0, %pc_lo12(.LCPI0_2) fdiv.s $fa1, $fa1, $fa2 fcmp.clt.s $fcc0, $fa0, $fa1 fsel $fa0, $fa0, $fa1, $fcc0 - fmul.s $fa0, $fa0, $fa3 + fmul.s $fa0, $fa0, $fs0 ftintrz.w.s $fa0, $fa0 movfr2gr.s $a0, $fa0 st.d $a0, $sp, 112 # 8-byte Folded Spill diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z23.s b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z23.s index 0914b1da..9b644d95 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z23.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z23.s @@ -1,10 +1,6 @@ .file "z23.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function FixAndPrintObject -.LCPI0_0: - .word 0x3c000000 # float 0.0078125 .text - .globl FixAndPrintObject + .globl FixAndPrintObject # -- Begin function FixAndPrintObject .p2align 5 .type FixAndPrintObject,@function FixAndPrintObject: # @FixAndPrintObject @@ -498,7 +494,7 @@ FixAndPrintObject: # @FixAndPrintObject beqz $a0, .LBB0_60 .LBB0_66: st.d $s8, $sp, 168 # 8-byte Folded Spill - st.d $s6, $sp, 112 # 8-byte Folded Spill + st.d $s6, $sp, 104 # 8-byte Folded Spill st.d $s4, $sp, 144 # 8-byte Folded Spill ld.d $s1, $s0, 8 beq $s1, $fp, .LBB0_291 @@ -949,7 +945,7 @@ FixAndPrintObject: # @FixAndPrintObject ld.d $a7, $sp, 192 # 8-byte Folded Reload b .LBB0_115 .LBB0_124: - move $s7, $a4 + st.d $a4, $sp, 192 # 8-byte Folded Spill ld.d $s5, $fp, 8 move $s1, $zero .p2align 4, , 16 @@ -965,12 +961,13 @@ FixAndPrintObject: # @FixAndPrintObject ld.w $a1, $a0, 16 beqz $a1, .LBB0_215 # %bb.127: - beqz $s7, .LBB0_261 + ld.d $a1, $sp, 192 # 8-byte Folded Reload + beqz $a1, .LBB0_261 # %bb.128: st.d $s2, $sp, 176 # 8-byte Folded Spill st.d $a6, $sp, 184 # 8-byte Folded Spill - move $s2, $s3 - st.d $s6, $sp, 112 # 8-byte Folded Spill + move $s7, $s3 + move $s3, $s6 ld.w $a1, $fp, 72 bgtz $a1, .LBB0_130 # %bb.129: @@ -989,9 +986,9 @@ FixAndPrintObject: # @FixAndPrintObject ld.w $a1, $fp, 72 ld.d $a0, $s0, 0 .LBB0_130: - slli.w $a2, $s2, 7 + slli.w $a2, $s7, 7 ld.d $a3, $a0, 160 - div.w $s3, $a2, $a1 + div.w $s2, $a2, $a1 slli.w $a0, $s8, 7 div.w $s6, $a0, $a1 move $a0, $s5 @@ -1004,15 +1001,15 @@ FixAndPrintObject: # @FixAndPrintObject sub.w $a1, $a1, $a3 jirl $ra, $a2, 0 ld.d $a0, $s0, 0 - ld.d $a0, $a0, 152 ld.w $a1, $fp, 64 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.s $fa1, $a2, %pc_lo12(.LCPI0_0) - ld.w $a2, $fp, 72 + ld.d $a0, $a0, 152 movgr2fr.w $fa0, $a1 ffint.s.w $fa0, $fa0 + ld.w $a1, $fp, 72 + lu12i.w $a2, 245760 + movgr2fr.w $fa1, $a2 fmul.s $fa0, $fa0, $fa1 - movgr2fr.w $fa2, $a2 + movgr2fr.w $fa2, $a1 ffint.s.w $fa2, $fa2 fmul.s $fa1, $fa2, $fa1 jirl $ra, $a0, 0 @@ -1022,9 +1019,9 @@ FixAndPrintObject: # @FixAndPrintObject st.d $a0, $sp, 0 move $a0, $s5 move $a1, $zero - move $a2, $s3 + move $a2, $s2 move $a3, $s6 - move $a4, $s7 + ld.d $a4, $sp, 192 # 8-byte Folded Reload move $a5, $zero move $a6, $zero move $a7, $s1 @@ -1033,8 +1030,8 @@ FixAndPrintObject: # @FixAndPrintObject ld.d $a0, $s0, 0 ld.d $a0, $a0, 168 jirl $ra, $a0, 0 - ld.d $s6, $sp, 112 # 8-byte Folded Reload - move $s3, $s2 + move $s6, $s3 + move $s3, $s7 b .LBB0_276 .LBB0_131: ld.d $a0, $sp, 144 # 8-byte Folded Reload @@ -1087,7 +1084,7 @@ FixAndPrintObject: # @FixAndPrintObject st.d $a4, $sp, 192 # 8-byte Folded Spill beqz $a4, .LBB0_218 # %bb.136: # %.preheader2179 - st.d $s6, $sp, 112 # 8-byte Folded Spill + st.d $s6, $sp, 104 # 8-byte Folded Spill beq $s0, $fp, .LBB0_563 # %bb.137: # %.preheader2178.lr.ph sub.w $a0, $a6, $s2 @@ -1252,7 +1249,7 @@ FixAndPrintObject: # @FixAndPrintObject st.d $s6, $sp, 0 b .LBB0_39 .LBB0_156: - st.d $s6, $sp, 112 # 8-byte Folded Spill + st.d $s6, $sp, 104 # 8-byte Folded Spill st.d $s4, $sp, 144 # 8-byte Folded Spill ld.d $s3, $fp, 8 beq $s3, $fp, .LBB0_278 @@ -1438,7 +1435,7 @@ FixAndPrintObject: # @FixAndPrintObject beqz $a4, .LBB0_270 # %bb.192: st.d $s3, $sp, 160 # 8-byte Folded Spill - st.d $s6, $sp, 112 # 8-byte Folded Spill + st.d $s6, $sp, 104 # 8-byte Folded Spill ld.d $s6, $fp, 8 .LBB0_193: # =>This Inner Loop Header: Depth=1 ld.d $s6, $s6, 16 @@ -1574,7 +1571,7 @@ FixAndPrintObject: # @FixAndPrintObject move $a1, $s2 move $a2, $s3 move $a3, $s8 - move $a4, $s7 + ld.d $a4, $sp, 192 # 8-byte Folded Reload b .LBB0_275 .LBB0_218: st.d $s2, $sp, 176 # 8-byte Folded Spill @@ -1612,7 +1609,7 @@ FixAndPrintObject: # @FixAndPrintObject jirl $ra, $ra, 0 beqz $a0, .LBB0_221 .LBB0_227: - st.d $s6, $sp, 112 # 8-byte Folded Spill + st.d $s6, $sp, 104 # 8-byte Folded Spill ld.w $a0, $s1, 48 ld.d $s3, $s0, 8 ld.d $a1, $sp, 160 # 8-byte Folded Reload @@ -2071,7 +2068,7 @@ FixAndPrintObject: # @FixAndPrintObject ld.d $a0, $s0, 0 ld.d $a0, $a0, 168 jirl $ra, $a0, 0 - ld.d $s6, $sp, 112 # 8-byte Folded Reload + ld.d $s6, $sp, 104 # 8-byte Folded Reload ld.d $s3, $sp, 160 # 8-byte Folded Reload b .LBB0_276 .LBB0_287: # %ScaleFactor.exit2106.thread @@ -2178,7 +2175,7 @@ FixAndPrintObject: # @FixAndPrintObject ld.d $a0, $sp, 192 # 8-byte Folded Reload alsl.d $a2, $a0, $fp, 2 ld.w $a0, $a2, 48 - st.d $zero, $sp, 104 # 8-byte Folded Spill + st.d $zero, $sp, 112 # 8-byte Folded Spill ld.d $a1, $sp, 168 # 8-byte Folded Reload add.w $a1, $a0, $a1 st.d $a1, $sp, 128 # 8-byte Folded Spill @@ -2198,7 +2195,7 @@ FixAndPrintObject: # @FixAndPrintObject ld.d $a2, $sp, 192 # 8-byte Folded Reload pcaddu18i $ra, %call36(FindAdjustIncrement) jirl $ra, $ra, 0 - st.d $a0, $sp, 104 # 8-byte Folded Spill + st.d $a0, $sp, 112 # 8-byte Folded Spill .LBB0_310: ld.d $a0, $sp, 192 # 8-byte Folded Reload alsl.d $a0, $a0, $s7, 2 @@ -2658,7 +2655,7 @@ FixAndPrintObject: # @FixAndPrintObject masknez $a0, $a0, $a1 maskeqz $a1, $a2, $a1 or $a0, $a1, $a0 - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 104 # 8-byte Folded Reload st.w $zero, $a1, 0 ld.d $s4, $sp, 144 # 8-byte Folded Reload st.w $a0, $s4, 0 @@ -2666,7 +2663,7 @@ FixAndPrintObject: # @FixAndPrintObject .LBB0_374: # %._crit_edge2495 ld.w $a1, $sp, 232 ld.w $a0, $sp, 216 - ld.d $a2, $sp, 112 # 8-byte Folded Reload + ld.d $a2, $sp, 104 # 8-byte Folded Reload st.w $a1, $a2, 0 st.w $a0, $s4, 0 b .LBB0_278 @@ -2745,7 +2742,7 @@ FixAndPrintObject: # @FixAndPrintObject .LBB0_384: # in Loop: Header=BB0_381 Depth=1 ld.w $a1, $a3, 56 ld.w $a2, $a3, 48 - ld.d $a3, $sp, 104 # 8-byte Folded Reload + ld.d $a3, $sp, 112 # 8-byte Folded Reload add.w $a3, $a1, $a3 .LBB0_385: # in Loop: Header=BB0_381 Depth=1 addi.d $a1, $sp, 248 @@ -2877,7 +2874,7 @@ FixAndPrintObject: # @FixAndPrintObject masknez $a2, $a2, $a1 maskeqz $a0, $a0, $a1 or $a0, $a0, $a2 - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 104 # 8-byte Folded Reload st.w $a0, $a1, 0 ld.w $a1, $s0, 56 ld.d $a2, $sp, 136 # 8-byte Folded Reload @@ -3091,14 +3088,14 @@ FixAndPrintObject: # @FixAndPrintObject ld.d $a2, $fp, 0 bne $a1, $a2, .LBB0_559 # %bb.438: - addi.d $s2, $a0, 32 ld.w $a1, $s1, 64 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI0_0) + addi.d $s2, $a0, 32 addi.d $s6, $a0, 64 - movgr2fr.w $fa1, $a1 - ffint.s.w $fa1, $fa1 - fmul.s $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a1 + ffint.s.w $fa0, $fa0 + lu12i.w $a0, 245760 + movgr2fr.w $fa1, $a0 + fmul.s $fa0, $fa0, $fa1 fcvt.d.s $fs0, $fa0 ld.d $a0, $sp, 136 # 8-byte Folded Reload pcaddu18i $ra, %call36(EchoLength) @@ -3553,7 +3550,7 @@ FixAndPrintObject: # @FixAndPrintObject pcalau12i $a0, %got_pc_hi20(zz_lengths) ld.d $a0, $a0, %got_pc_lo12(zz_lengths) st.d $a0, $sp, 72 # 8-byte Folded Spill - st.d $zero, $sp, 104 # 8-byte Folded Spill + st.d $zero, $sp, 112 # 8-byte Folded Spill st.d $zero, $sp, 88 # 8-byte Folded Spill st.d $zero, $sp, 80 # 8-byte Folded Spill ori $s0, $zero, 9 @@ -3600,7 +3597,7 @@ FixAndPrintObject: # @FixAndPrintObject lu12i.w $a2, 262144 bne $a1, $a2, .LBB0_523 # %bb.509: # in Loop: Header=BB0_508 Depth=1 - ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 112 # 8-byte Folded Reload bnez $a1, .LBB0_511 # %bb.510: # in Loop: Header=BB0_508 Depth=1 ld.bu $a1, $a0, 32 @@ -3624,7 +3621,7 @@ FixAndPrintObject: # @FixAndPrintObject lu12i.w $a2, 393216 and $a1, $a1, $a2 ori $a2, $zero, 1 - st.d $a2, $sp, 104 # 8-byte Folded Spill + st.d $a2, $sp, 112 # 8-byte Folded Spill lu12i.w $a2, 131072 bne $a1, $a2, .LBB0_523 # %bb.512: # in Loop: Header=BB0_508 Depth=1 @@ -3741,7 +3738,7 @@ FixAndPrintObject: # @FixAndPrintObject st.d $s1, $a4, 0 bnez $a1, .LBB0_522 # %bb.520: # in Loop: Header=BB0_508 Depth=1 - st.d $zero, $sp, 104 # 8-byte Folded Spill + st.d $zero, $sp, 112 # 8-byte Folded Spill b .LBB0_523 .LBB0_521: # %.thread2771 # in Loop: Header=BB0_508 Depth=1 @@ -3751,7 +3748,7 @@ FixAndPrintObject: # @FixAndPrintObject pcalau12i $a3, %got_pc_hi20(zz_tmp) ld.d $a3, $a3, %got_pc_lo12(zz_tmp) ld.d $a4, $a1, 16 - st.d $zero, $sp, 104 # 8-byte Folded Spill + st.d $zero, $sp, 112 # 8-byte Folded Spill st.d $a2, $a3, 0 st.d $a4, $s1, 16 st.d $s1, $a4, 24 @@ -3876,7 +3873,7 @@ FixAndPrintObject: # @FixAndPrintObject move $s4, $zero b .LBB0_507 .LBB0_541: # %._crit_edge2432.loopexit - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload sltui $s0, $a0, 1 b .LBB0_543 .LBB0_542: @@ -4079,12 +4076,12 @@ FixAndPrintObject: # @FixAndPrintObject pcaddu18i $ra, %call36(EchoLength) jirl $ra, $ra, 0 ld.w $a1, $s1, 64 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI0_0) move $s2, $a0 - movgr2fr.w $fa1, $a1 - ffint.s.w $fa1, $fa1 - fmul.s $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a1 + ffint.s.w $fa0, $fa0 + lu12i.w $a0, 245760 + movgr2fr.w $fa1, $a0 + fmul.s $fa0, $fa0, $fa1 fcvt.d.s $fs0, $fa0 ld.d $a0, $sp, 136 # 8-byte Folded Reload pcaddu18i $ra, %call36(EchoLength) @@ -4119,7 +4116,7 @@ FixAndPrintObject: # @FixAndPrintObject pcaddu18i $ra, %call36(FixAndPrintObject) jirl $ra, $ra, 0 .LBB0_563: # %.thread2121 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.d $a1, $sp, 160 # 8-byte Folded Reload st.w $a1, $a0, 0 ld.d $a0, $sp, 144 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z34.s b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z34.s index b8fd36f5..193e06ed 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z34.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z34.s @@ -1,16 +1,6 @@ .file "z34.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function RotateSize -.LCPI0_0: - .dword 0x400921fb54442d18 # double 3.1415926535897931 -.LCPI0_1: - .dword 0x40e6800000000000 # double 46080 -.LCPI0_2: - .dword 0x415fffffc0000000 # double 8388607 -.LCPI0_3: - .dword 0xc15fffffc0000000 # double -8388607 .text - .globl RotateSize + .globl RotateSize # -- Begin function RotateSize .p2align 5 .type RotateSize,@function RotateSize: # @RotateSize @@ -39,18 +29,23 @@ RotateSize: # @RotateSize move $s2, $a0 movgr2fr.w $fa0, $a5 ffint.d.w $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) fadd.d $fa0, $fa0, $fa0 - ld.w $s3, $a4, 56 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fs3, $fa0, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, 425984 + lu52i.d $a0, $a0, 1038 + ld.w $s3, $a4, 56 + movgr2fr.d $fa1, $a0 + fdiv.d $fs4, $fa0, $fa1 ld.w $s6, $a4, 52 movgr2fr.w $fa0, $s3 ffint.s.w $fa0, $fa0 - fcvt.d.s $fs4, $fa0 + fcvt.d.s $fs3, $fa0 movgr2fr.w $fa0, $s6 ld.w $s5, $a4, 48 ffint.s.w $fa0, $fa0 @@ -70,17 +65,17 @@ RotateSize: # @RotateSize beqz $a0, .LBB0_2 # %bb.1: fmov.d $fa0, $fs5 - fmov.d $fa1, $fs4 + fmov.d $fa1, $fs3 pcaddu18i $ra, %call36(atan2) jirl $ra, $ra, 0 - fmadd.d $fa1, $fs4, $fs4, $fs7 + fmadd.d $fa1, $fs3, $fs3, $fs7 fsqrt.d $fa1, $fa1 .LBB0_2: fst.d $fa1, $sp, 56 # 8-byte Folded Spill - fst.d $fs4, $sp, 16 # 8-byte Folded Spill + fst.d $fs3, $sp, 16 # 8-byte Folded Spill fcvt.d.s $fs6, $fs1 - fcvt.d.s $fs4, $fs2 - fadd.d $fs2, $fs3, $fa0 + fcvt.d.s $fs3, $fs2 + fadd.d $fs2, $fs4, $fa0 fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 @@ -102,8 +97,8 @@ RotateSize: # @RotateSize fsqrt.d $fs1, $fa1 .LBB0_4: or $s5, $s5, $s4 - fst.d $fs3, $sp, 64 # 8-byte Folded Spill - fadd.d $fs2, $fs3, $fa0 + fst.d $fs4, $sp, 64 # 8-byte Folded Spill + fadd.d $fs2, $fs4, $fa0 fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 @@ -112,16 +107,16 @@ RotateSize: # @RotateSize pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 fst.d $fa0, $sp, 24 # 8-byte Folded Spill - fmul.d $fs3, $fs4, $fs4 + fmul.d $fs4, $fs3, $fs3 fmov.d $fa0, $fs0 fmov.d $fs5, $fs0 beqz $s5, .LBB0_6 # %bb.5: - fmov.d $fa0, $fs4 + fmov.d $fa0, $fs3 fmov.d $fa1, $fs6 pcaddu18i $ra, %call36(atan2) jirl $ra, $ra, 0 - fmadd.d $fa1, $fs6, $fs6, $fs3 + fmadd.d $fa1, $fs6, $fs6, $fs4 fsqrt.d $fs5, $fa1 .LBB0_6: or $s3, $s3, $s4 @@ -138,52 +133,53 @@ RotateSize: # @RotateSize fmov.d $fs7, $fs0 beqz $s3, .LBB0_8 # %bb.7: - fmov.d $fa0, $fs4 - fld.d $fs4, $sp, 16 # 8-byte Folded Reload - fmov.d $fa1, $fs4 + fmov.d $fa0, $fs3 + fld.d $fs3, $sp, 16 # 8-byte Folded Reload + fmov.d $fa1, $fs3 pcaddu18i $ra, %call36(atan2) jirl $ra, $ra, 0 fmov.d $fs0, $fa0 - fmadd.d $fa0, $fs4, $fs4, $fs3 + fmadd.d $fa0, $fs3, $fs3, $fs4 fsqrt.d $fs7, $fa0 .LBB0_8: - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_2) - fld.d $fa6, $sp, 56 # 8-byte Folded Reload - fld.d $fa1, $sp, 40 # 8-byte Folded Reload - fmul.d $fa1, $fa6, $fa1 - fcmp.clt.d $fcc0, $fa0, $fa1 - fsel $fa2, $fa1, $fa0, $fcc0 + fld.d $fa5, $sp, 56 # 8-byte Folded Reload + fld.d $fa0, $sp, 40 # 8-byte Folded Reload + fmul.d $fa0, $fa5, $fa0 + lu12i.w $a0, -262144 + lu52i.d $a1, $a0, 1045 + movgr2fr.d $fa1, $a1 + fcmp.clt.d $fcc0, $fa1, $fa0 + fsel $fa2, $fa0, $fa1, $fcc0 fld.d $fa3, $sp, 24 # 8-byte Folded Reload fmul.d $fa3, $fs1, $fa3 fcmp.clt.d $fcc0, $fa2, $fa3 fsel $fa2, $fa3, $fa2, $fcc0 - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_3) - fmul.d $fa5, $fs5, $fs2 - fcmp.clt.d $fcc0, $fa2, $fa5 - fsel $fs2, $fa5, $fa2, $fcc0 - fmax.d $fa1, $fa1, $fa4 - fcmp.clt.d $fcc0, $fa1, $fa3 - fsel $fa1, $fa1, $fa3, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa5 - fsel $fs3, $fa1, $fa5, $fcc0 - fld.d $fa1, $sp, 48 # 8-byte Folded Reload - fmul.d $fa1, $fa6, $fa1 - fcmp.clt.d $fcc0, $fa0, $fa1 - fsel $fa0, $fa1, $fa0, $fcc0 - fld.d $fa2, $sp, 32 # 8-byte Folded Reload - fmul.d $fa2, $fs1, $fa2 - fcmp.clt.d $fcc0, $fa0, $fa2 - fsel $fa0, $fa2, $fa0, $fcc0 - fmul.d $fa3, $fs5, $fs6 + fmul.d $fa4, $fs5, $fs2 + fcmp.clt.d $fcc0, $fa2, $fa4 + fsel $fs2, $fa4, $fa2, $fcc0 + lu52i.d $a0, $a0, -1003 + movgr2fr.d $fa2, $a0 + fmax.d $fa0, $fa0, $fa2 fcmp.clt.d $fcc0, $fa0, $fa3 - fsel $fs1, $fa3, $fa0, $fcc0 - fmax.d $fa0, $fa1, $fa4 - fcmp.clt.d $fcc0, $fa0, $fa2 - fsel $fa0, $fa0, $fa2, $fcc0 + fsel $fa0, $fa0, $fa3, $fcc0 + fcmp.clt.d $fcc0, $fa0, $fa4 + fsel $fs3, $fa0, $fa4, $fcc0 + fld.d $fa0, $sp, 48 # 8-byte Folded Reload + fmul.d $fa0, $fa5, $fa0 + fcmp.clt.d $fcc0, $fa1, $fa0 + fsel $fa1, $fa0, $fa1, $fcc0 + fld.d $fa3, $sp, 32 # 8-byte Folded Reload + fmul.d $fa3, $fs1, $fa3 + fcmp.clt.d $fcc0, $fa1, $fa3 + fsel $fa1, $fa3, $fa1, $fcc0 + fmul.d $fa4, $fs5, $fs6 + fcmp.clt.d $fcc0, $fa1, $fa4 + fsel $fs1, $fa4, $fa1, $fcc0 + fmax.d $fa0, $fa0, $fa2 fcmp.clt.d $fcc0, $fa0, $fa3 - fsel $fs4, $fa0, $fa3, $fcc0 + fsel $fa0, $fa0, $fa3, $fcc0 + fcmp.clt.d $fcc0, $fa0, $fa4 + fsel $fs4, $fa0, $fa4, $fcc0 fld.d $fa0, $sp, 64 # 8-byte Folded Reload fadd.d $fs0, $fa0, $fs0 fmov.d $fa0, $fs0 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z48.s b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z48.s index 8b031776..c54354a1 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z48.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z48.s @@ -4463,14 +4463,7 @@ PDFFile_Cleanup: # @PDFFile_Cleanup .Lfunc_end26: .size PDFFile_Cleanup, .Lfunc_end26-PDFFile_Cleanup # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function PDFPage_EvalExpr -.LCPI27_0: - .dword 0x400921fb54442d18 # double 3.1415926535897931 -.LCPI27_1: - .dword 0x4066800000000000 # double 180 - .text - .p2align 5 + .p2align 5 # -- Begin function PDFPage_EvalExpr .type PDFPage_EvalExpr,@function PDFPage_EvalExpr: # @PDFPage_EvalExpr # %bb.0: @@ -4897,13 +4890,18 @@ PDFPage_EvalExpr: # @PDFPage_EvalExpr b .LBB27_11 .LBB27_66: fld.s $fa0, $sp, 16 - pcalau12i $a0, %pc_hi20(.LCPI27_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI27_0) - pcalau12i $a0, %pc_hi20(.LCPI27_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI27_1) fcvt.d.s $fa0, $fa0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fa0, $fa0, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, 425984 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 fcvt.s.d $fa0, $fa0 @@ -4942,13 +4940,18 @@ PDFPage_EvalExpr: # @PDFPage_EvalExpr b .LBB27_11 .LBB27_72: fld.s $fa0, $sp, 16 - pcalau12i $a0, %pc_hi20(.LCPI27_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI27_0) - pcalau12i $a0, %pc_hi20(.LCPI27_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI27_1) fcvt.d.s $fa0, $fa0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fa0, $fa0, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, 425984 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 fcvt.s.d $fa0, $fa0 diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z49.s b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z49.s index 0b451ba0..c86ebd64 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z49.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z49.s @@ -1472,20 +1472,15 @@ PS_PrintInitialize: # @PS_PrintInitialize .Lfunc_end9: .size PS_PrintInitialize, .Lfunc_end9-PS_PrintInitialize # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function PS_PrintLength -.LCPI10_0: - .word 0x440dc000 # float 567 - .text - .p2align 5 + .p2align 5 # -- Begin function PS_PrintLength .type PS_PrintLength,@function PS_PrintLength: # @PS_PrintLength # %bb.0: - pcalau12i $a2, %pc_hi20(.LCPI10_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI10_0) + movgr2fr.w $fa0, $a1 + ffint.s.w $fa0, $fa0 + lu12i.w $a1, 278748 movgr2fr.w $fa1, $a1 - ffint.s.w $fa1, $fa1 - fdiv.s $fa0, $fa1, $fa0 + fdiv.s $fa0, $fa0, $fa1 fcvt.d.s $fa0, $fa0 movfr2gr.d $a2, $fa0 pcalau12i $a1, %pc_hi20(.L.str.40) @@ -3946,24 +3941,19 @@ PS_PrintUnderline: # @PS_PrintUnderline .Lfunc_end19: .size PS_PrintUnderline, .Lfunc_end19-PS_PrintUnderline # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function PS_CoordRotate -.LCPI20_0: - .word 0x3c000000 # float 0.0078125 - .text - .p2align 5 + .p2align 5 # -- Begin function PS_CoordRotate .type PS_CoordRotate,@function PS_CoordRotate: # @PS_CoordRotate # %bb.0: addi.d $sp, $sp, -16 st.d $ra, $sp, 8 # 8-byte Folded Spill pcalau12i $a1, %pc_hi20(out_fp) - pcalau12i $a2, %pc_hi20(.LCPI20_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI20_0) ld.d $a3, $a1, %pc_lo12(out_fp) + movgr2fr.w $fa0, $a0 + ffint.s.w $fa0, $fa0 + lu12i.w $a0, 245760 movgr2fr.w $fa1, $a0 - ffint.s.w $fa1, $fa1 - fmul.s $fa0, $fa1, $fa0 + fmul.s $fa0, $fa0, $fa1 fcvt.d.s $fa0, $fa0 movfr2gr.d $a2, $fa0 pcalau12i $a0, %pc_hi20(.L.str.192) diff --git a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z50.s b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z50.s index 333369ff..2f34cf03 100644 --- a/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z50.s +++ b/results/MultiSource/Benchmarks/MiBench/consumer-typeset/CMakeFiles/consumer-typeset.dir/z50.s @@ -725,20 +725,15 @@ PDF_PrintInitialize: # @PDF_PrintInitialize .Lfunc_end8: .size PDF_PrintInitialize, .Lfunc_end8-PDF_PrintInitialize # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function PDF_PrintLength -.LCPI9_0: - .word 0x440dc000 # float 567 - .text - .p2align 5 + .p2align 5 # -- Begin function PDF_PrintLength .type PDF_PrintLength,@function PDF_PrintLength: # @PDF_PrintLength # %bb.0: - pcalau12i $a2, %pc_hi20(.LCPI9_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI9_0) + movgr2fr.w $fa0, $a1 + ffint.s.w $fa0, $fa0 + lu12i.w $a1, 278748 movgr2fr.w $fa1, $a1 - ffint.s.w $fa1, $fa1 - fdiv.s $fa0, $fa1, $fa0 + fdiv.s $fa0, $fa0, $fa1 fcvt.d.s $fa0, $fa0 movfr2gr.d $a2, $fa0 pcalau12i $a1, %pc_hi20(.L.str.12) @@ -869,12 +864,7 @@ PDF_PrintMapping: # @PDF_PrintMapping .Lfunc_end12: .size PDF_PrintMapping, .Lfunc_end12-PDF_PrintMapping # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function PDF_PrintBeforeFirstPage -.LCPI13_0: - .word 0x3d4ccccd # float 0.0500000007 - .text - .p2align 5 + .p2align 5 # -- Begin function PDF_PrintBeforeFirstPage .type PDF_PrintBeforeFirstPage,@function PDF_PrintBeforeFirstPage: # @PDF_PrintBeforeFirstPage # %bb.0: @@ -905,8 +895,9 @@ PDF_PrintBeforeFirstPage: # @PDF_PrintBeforeFirstPage pcaddu18i $ra, %call36(FontPrintPageSetup) jirl $ra, $ra, 0 ld.d $a0, $fp, %pc_lo12(out_fp) - pcalau12i $a1, %pc_hi20(.LCPI13_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI13_0) + lu12i.w $a1, 251084 + ori $a1, $a1, 3277 + movgr2fr.w $fa0, $a1 ori $a1, $zero, 10 pcaddu18i $ra, %call36(PDFPage_Init) jirl $ra, $ra, 0 @@ -925,12 +916,7 @@ PDF_PrintBeforeFirstPage: # @PDF_PrintBeforeFirstPage .Lfunc_end13: .size PDF_PrintBeforeFirstPage, .Lfunc_end13-PDF_PrintBeforeFirstPage # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function PDF_PrintBetweenPages -.LCPI14_0: - .word 0x3d4ccccd # float 0.0500000007 - .text - .p2align 5 + .p2align 5 # -- Begin function PDF_PrintBetweenPages .type PDF_PrintBetweenPages,@function PDF_PrintBetweenPages: # @PDF_PrintBetweenPages # %bb.0: @@ -942,8 +928,9 @@ PDF_PrintBetweenPages: # @PDF_PrintBetweenPages pcaddu18i $ra, %call36(PDFPage_Cleanup) jirl $ra, $ra, 0 ld.d $a0, $fp, %pc_lo12(out_fp) - pcalau12i $a1, %pc_hi20(.LCPI14_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI14_0) + lu12i.w $a1, 251084 + ori $a1, $a1, 3277 + movgr2fr.w $fa0, $a1 ori $a1, $zero, 10 pcaddu18i $ra, %call36(PDFPage_Init) jirl $ra, $ra, 0 @@ -1336,14 +1323,7 @@ PDF_PrintUnderline: # @PDF_PrintUnderline .Lfunc_end17: .size PDF_PrintUnderline, .Lfunc_end17-PDF_PrintUnderline # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function PDF_CoordRotate -.LCPI18_0: - .dword 0x400921fb54442d18 # double 3.1415926535897931 -.LCPI18_1: - .dword 0x4066800000000000 # double 180 - .text - .p2align 5 + .p2align 5 # -- Begin function PDF_CoordRotate .type PDF_CoordRotate,@function PDF_CoordRotate: # @PDF_CoordRotate # %bb.0: @@ -1367,13 +1347,18 @@ PDF_CoordRotate: # @PDF_CoordRotate st.d $ra, $sp, 8 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(out_fp) ld.d $a0, $a0, %pc_lo12(out_fp) - pcalau12i $a2, %pc_hi20(.LCPI18_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI18_0) - pcalau12i $a2, %pc_hi20(.LCPI18_1) - fld.d $fa1, $a2, %pc_lo12(.LCPI18_1) - movgr2fr.w $fa2, $a1 - ffint.d.w $fa2, $fa2 - fmul.d $fa0, $fa2, $fa0 + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 + lu12i.w $a1, 345154 + ori $a1, $a1, 3352 + lu32i.d $a1, -450053 + lu52i.d $a1, $a1, 1024 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 + ori $a1, $zero, 0 + lu32i.d $a1, 425984 + lu52i.d $a1, $a1, 1030 + movgr2fr.d $fa1, $a1 fdiv.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 pcaddu18i $ra, %call36(PDFPage_Rotate) @@ -1387,25 +1372,24 @@ PDF_CoordRotate: # @PDF_CoordRotate .Lfunc_end18: .size PDF_CoordRotate, .Lfunc_end18-PDF_CoordRotate # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function PDF_CoordScale -.LCPI19_0: - .dword 0x3f847ae147ae147b # double 0.01 - .text - .p2align 5 + .p2align 5 # -- Begin function PDF_CoordScale .type PDF_CoordScale,@function PDF_CoordScale: # @PDF_CoordScale # %bb.0: - fcvt.d.s $fa3, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI19_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI19_0) - vldi $vr4, -784 - fadd.d $fa3, $fa3, $fa4 - fabs.d $fa3, $fa3 + fcvt.d.s $fa2, $fa0 + vldi $vr3, -784 + fadd.d $fa2, $fa2, $fa3 + fabs.d $fa3, $fa2 + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 fcmp.clt.d $fcc0, $fa2, $fa3 bcnez $fcc0, .LBB19_2 # %bb.1: fcvt.d.s $fa3, $fa1 + vldi $vr4, -784 fadd.d $fa3, $fa3, $fa4 fabs.d $fa3, $fa3 fcmp.cule.d $fcc0, $fa3, $fa2 diff --git a/results/MultiSource/Benchmarks/MiBench/telecomm-FFT/CMakeFiles/telecomm-fft.dir/fourierf.s b/results/MultiSource/Benchmarks/MiBench/telecomm-FFT/CMakeFiles/telecomm-fft.dir/fourierf.s index 85fa560e..723271fc 100644 --- a/results/MultiSource/Benchmarks/MiBench/telecomm-FFT/CMakeFiles/telecomm-fft.dir/fourierf.s +++ b/results/MultiSource/Benchmarks/MiBench/telecomm-FFT/CMakeFiles/telecomm-fft.dir/fourierf.s @@ -4,22 +4,6 @@ .LCPI0_0: .dword 0xc01921fb54442d18 # double -6.2831853071795862 .dword 0x401921fb54442d18 # double 6.2831853071795862 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0x3fe6a09e667f3bcd # double 0.70710678118654757 -.LCPI0_2: - .dword 0x3fe6a09e667f3bcc # double 0.70710678118654746 -.LCPI0_3: - .dword 0x3fe921fb54442d18 # double 0.78539816339744828 -.LCPI0_4: - .dword 0x3fc921fb54442d18 # double 0.19634954084936207 -.LCPI0_5: - .dword 0x3fc8f8b83c69a60a # double 0.19509032201612825 -.LCPI0_6: - .dword 0xbfe921fb54442d18 # double -0.78539816339744828 -.LCPI0_7: - .dword 0xbfc921fb54442d18 # double -0.19634954084936207 .text .globl fft_float .p2align 5 @@ -162,26 +146,37 @@ fft_float: # @fft_float addi.d $a1, $a1, %pc_lo12(.LCPI0_0) fldx.d $fa0, $a1, $a0 fst.d $fa0, $sp, 56 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fs6, $a0, %pc_lo12(.LCPI0_1) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fs7, $a0, %pc_lo12(.LCPI0_2) - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_3) + ori $a0, $zero, 2 + lu12i.w $a1, 419827 + ori $a2, $a1, 3021 + lu32i.d $a2, 434334 + lu52i.d $a2, $a2, 1022 + movgr2fr.d $fs6, $a2 + ori $a1, $a1, 3020 + lu32i.d $a1, 434334 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fs7, $a1 + lu12i.w $a1, 345154 + ori $a1, $a1, 3352 + lu32i.d $a1, -450053 + lu52i.d $a2, $a1, 1022 + movgr2fr.d $fa0, $a2 fst.d $fa0, $sp, 48 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_6) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_6) + lu52i.d $a2, $a1, -1026 + movgr2fr.d $fa0, $a2 fst.d $fa0, $sp, 40 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_7) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_7) + lu52i.d $a2, $a1, -1028 + movgr2fr.d $fa0, $a2 fst.d $fa0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_5) + lu12i.w $a2, 247450 + ori $a2, $a2, 1546 + lu32i.d $a2, -460616 + lu52i.d $a2, $a2, 1020 + movgr2fr.d $fa0, $a2 fst.d $fa0, $sp, 32 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_4) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_4) + lu52i.d $a1, $a1, 1020 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 16 # 8-byte Folded Spill - ori $a0, $zero, 2 b .LBB0_17 .p2align 4, , 16 .LBB0_16: # %._crit_edge124 @@ -388,22 +383,6 @@ fft_float: # @fft_float .LCPI1_0: .dword 0xc01921fb54442d18 # double -6.2831853071795862 .dword 0x401921fb54442d18 # double 6.2831853071795862 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI1_1: - .dword 0x3fe6a09e667f3bcd # double 0.70710678118654757 -.LCPI1_2: - .dword 0x3fe6a09e667f3bcc # double 0.70710678118654746 -.LCPI1_3: - .dword 0x3fe921fb54442d18 # double 0.78539816339744828 -.LCPI1_4: - .dword 0x3fc921fb54442d18 # double 0.19634954084936207 -.LCPI1_5: - .dword 0x3fc8f8b83c69a60a # double 0.19509032201612825 -.LCPI1_6: - .dword 0xbfe921fb54442d18 # double -0.78539816339744828 -.LCPI1_7: - .dword 0xbfc921fb54442d18 # double -0.19634954084936207 .text .globl fft_float_StrictFP .p2align 5 @@ -546,26 +525,37 @@ fft_float_StrictFP: # @fft_float_StrictFP addi.d $a1, $a1, %pc_lo12(.LCPI1_0) fldx.d $fa0, $a1, $a0 fst.d $fa0, $sp, 56 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fs6, $a0, %pc_lo12(.LCPI1_1) - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fs7, $a0, %pc_lo12(.LCPI1_2) - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_3) + ori $a0, $zero, 2 + lu12i.w $a1, 419827 + ori $a2, $a1, 3021 + lu32i.d $a2, 434334 + lu52i.d $a2, $a2, 1022 + movgr2fr.d $fs6, $a2 + ori $a1, $a1, 3020 + lu32i.d $a1, 434334 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fs7, $a1 + lu12i.w $a1, 345154 + ori $a1, $a1, 3352 + lu32i.d $a1, -450053 + lu52i.d $a2, $a1, 1022 + movgr2fr.d $fa0, $a2 fst.d $fa0, $sp, 48 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_6) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_6) + lu52i.d $a2, $a1, -1026 + movgr2fr.d $fa0, $a2 fst.d $fa0, $sp, 40 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_7) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_7) + lu52i.d $a2, $a1, -1028 + movgr2fr.d $fa0, $a2 fst.d $fa0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_5) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_5) + lu12i.w $a2, 247450 + ori $a2, $a2, 1546 + lu32i.d $a2, -460616 + lu52i.d $a2, $a2, 1020 + movgr2fr.d $fa0, $a2 fst.d $fa0, $sp, 32 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_4) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_4) + lu52i.d $a1, $a1, 1020 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 16 # 8-byte Folded Spill - ori $a0, $zero, 2 b .LBB1_17 .p2align 4, , 16 .LBB1_16: # %._crit_edge124 diff --git a/results/MultiSource/Benchmarks/MiBench/telecomm-FFT/CMakeFiles/telecomm-fft.dir/main.s b/results/MultiSource/Benchmarks/MiBench/telecomm-FFT/CMakeFiles/telecomm-fft.dir/main.s index 310710ff..0d6fccb5 100644 --- a/results/MultiSource/Benchmarks/MiBench/telecomm-FFT/CMakeFiles/telecomm-fft.dir/main.s +++ b/results/MultiSource/Benchmarks/MiBench/telecomm-FFT/CMakeFiles/telecomm-fft.dir/main.s @@ -51,10 +51,6 @@ srand: # @srand .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI2_1: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 .text .globl main .p2align 5 @@ -348,8 +344,11 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.Lstr) pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI2_1) + lu12i.w $a0, -487882 + ori $a0, $a0, 2289 + lu32i.d $a0, 325813 + lu52i.d $s2, $a0, 1006 + movgr2fr.d $fs0, $s2 pcalau12i $a0, %pc_hi20(.L.str.6) addi.d $s1, $a0, %pc_lo12(.L.str.6) move $fp, $s8 @@ -388,11 +387,11 @@ main: # @main pcalau12i $a0, %pc_hi20(.L.str.6) addi.d $s1, $a0, %pc_lo12(.L.str.6) move $s0, $s4 - move $s2, $s7 + move $s3, $s7 .p2align 4, , 16 .LBB2_28: # %.lr.ph119 # =>This Inner Loop Header: Depth=1 - fld.s $fa1, $s2, 0 + fld.s $fa1, $s3, 0 fld.s $fa0, $s0, 0 fsub.s $fa2, $fa1, $fa0 fabs.s $fa2, $fa2 @@ -406,7 +405,7 @@ main: # @main move $a0, $s1 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - addi.d $s2, $s2, 4 + addi.d $s3, $s3, 4 addi.d $fp, $fp, -1 addi.d $s0, $s0, 4 bnez $fp, .LBB2_28 @@ -451,10 +450,7 @@ main: # @main movfr2gr.d $a3, $fa0 pcalau12i $a1, %pc_hi20(.L.str.9) addi.d $a1, $a1, %pc_lo12(.L.str.9) - lu12i.w $a4, -487882 - ori $a4, $a4, 2289 - lu32i.d $a4, 325813 - lu52i.d $a4, $a4, 1006 + move $a4, $s2 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ori $a0, $zero, 1 diff --git a/results/MultiSource/Benchmarks/NPB-serial/is/CMakeFiles/is.dir/is.s b/results/MultiSource/Benchmarks/NPB-serial/is/CMakeFiles/is.dir/is.s index 367ce2f0..a65d6fd3 100644 --- a/results/MultiSource/Benchmarks/NPB-serial/is/CMakeFiles/is.dir/is.s +++ b/results/MultiSource/Benchmarks/NPB-serial/is/CMakeFiles/is.dir/is.s @@ -1,16 +1,6 @@ .file "is.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function randlc -.LCPI0_0: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 -.LCPI0_1: - .dword 0x4160000000000000 # double 8388608 -.LCPI0_2: - .dword 0x3d10000000000000 # double 1.4210854715202004E-14 -.LCPI0_3: - .dword 0x42d0000000000000 # double 70368744177664 .text - .globl randlc + .globl randlc # -- Begin function randlc .p2align 5 .type randlc,@function randlc: # @randlc @@ -22,35 +12,31 @@ randlc: # @randlc pcalau12i $a2, %pc_hi20(randlc.R23) fld.d $fa3, $a2, %pc_lo12(randlc.R23) pcalau12i $a2, %pc_hi20(randlc.T23) - fld.d $fa1, $a2, %pc_lo12(randlc.T23) + fld.d $fa2, $a2, %pc_lo12(randlc.T23) pcalau12i $a2, %pc_hi20(randlc.R46) fld.d $fa0, $a2, %pc_lo12(randlc.R46) pcalau12i $a2, %pc_hi20(randlc.T46) - fld.d $fa2, $a2, %pc_lo12(randlc.T46) + fld.d $fa1, $a2, %pc_lo12(randlc.T46) b .LBB0_3 .LBB0_2: # %.preheader pcalau12i $a3, %pc_hi20(randlc.R23) lu52i.d $a4, $zero, 1000 st.d $a4, $a3, %pc_lo12(randlc.R23) pcalau12i $a3, %pc_hi20(randlc.T23) - lu52i.d $a4, $zero, 1046 - st.d $a4, $a3, %pc_lo12(randlc.T23) + lu52i.d $a5, $zero, 1046 + st.d $a5, $a3, %pc_lo12(randlc.T23) pcalau12i $a3, %pc_hi20(randlc.R46) - lu52i.d $a4, $zero, 977 - st.d $a4, $a3, %pc_lo12(randlc.R46) + lu52i.d $a6, $zero, 977 + st.d $a6, $a3, %pc_lo12(randlc.R46) pcalau12i $a3, %pc_hi20(randlc.T46) - pcalau12i $a4, %pc_hi20(.LCPI0_0) - fld.d $fa3, $a4, %pc_lo12(.LCPI0_0) - pcalau12i $a4, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI0_1) - pcalau12i $a4, %pc_hi20(.LCPI0_2) - fld.d $fa0, $a4, %pc_lo12(.LCPI0_2) - pcalau12i $a4, %pc_hi20(.LCPI0_3) - fld.d $fa2, $a4, %pc_lo12(.LCPI0_3) - lu52i.d $a4, $zero, 1069 - st.d $a4, $a3, %pc_lo12(randlc.T46) + lu52i.d $a7, $zero, 1069 + st.d $a7, $a3, %pc_lo12(randlc.T46) ori $a3, $zero, 1 st.b $a3, $a2, %pc_lo12(randlc.KS) + movgr2fr.d $fa3, $a4 + movgr2fr.d $fa2, $a5 + movgr2fr.d $fa0, $a6 + movgr2fr.d $fa1, $a7 .LBB0_3: fld.d $fa4, $a1, 0 fmul.d $fa5, $fa3, $fa4 @@ -59,7 +45,7 @@ randlc: # @randlc movgr2fr.w $fa5, $a1 fld.d $fa6, $a0, 0 ffint.d.w $fa5, $fa5 - fneg.d $fa7, $fa1 + fneg.d $fa7, $fa2 fmadd.d $fa4, $fa7, $fa5, $fa4 fmul.d $ft0, $fa3, $fa6 ftintrz.w.d $ft0, $ft0 @@ -76,34 +62,21 @@ randlc: # @randlc ffint.d.w $fa3, $fa3 fmadd.d $fa3, $fa7, $fa3, $fa5 fmul.d $fa4, $fa4, $fa6 - fmadd.d $fa1, $fa1, $fa3, $fa4 - fmul.d $fa3, $fa0, $fa1 + fmadd.d $fa2, $fa2, $fa3, $fa4 + fmul.d $fa3, $fa0, $fa2 ftintrz.w.d $fa3, $fa3 movfr2gr.s $a1, $fa3 movgr2fr.w $fa3, $a1 ffint.d.w $fa3, $fa3 - fneg.d $fa2, $fa2 - fmadd.d $fa1, $fa2, $fa3, $fa1 + fneg.d $fa1, $fa1 + fmadd.d $fa1, $fa1, $fa3, $fa2 fmul.d $fa0, $fa0, $fa1 fst.d $fa1, $a0, 0 ret .Lfunc_end0: .size randlc, .Lfunc_end0-randlc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function create_seq -.LCPI1_0: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 -.LCPI1_1: - .dword 0x4160000000000000 # double 8388608 -.LCPI1_2: - .dword 0x3d10000000000000 # double 1.4210854715202004E-14 -.LCPI1_3: - .dword 0x42d0000000000000 # double 70368744177664 -.LCPI1_4: - .dword 0x4120000000000000 # double 524288 - .text - .globl create_seq + .globl create_seq # -- Begin function create_seq .p2align 5 .type create_seq,@function create_seq: # @create_seq @@ -129,17 +102,13 @@ create_seq: # @create_seq lu52i.d $a3, $zero, 977 st.d $a3, $a2, %pc_lo12(randlc.R46) lu52i.d $a2, $zero, 1069 - pcalau12i $a3, %pc_hi20(.LCPI1_0) - fld.d $fa2, $a3, %pc_lo12(.LCPI1_0) - pcalau12i $a3, %pc_hi20(.LCPI1_1) - fld.d $fa3, $a3, %pc_lo12(.LCPI1_1) - pcalau12i $a3, %pc_hi20(.LCPI1_2) - fld.d $fa4, $a3, %pc_lo12(.LCPI1_2) - pcalau12i $a3, %pc_hi20(.LCPI1_3) - fld.d $fa7, $a3, %pc_lo12(.LCPI1_3) st.d $a2, $a1, %pc_lo12(randlc.T46) ori $a1, $zero, 1 st.b $a1, $a0, %pc_lo12(randlc.KS) + movgr2fr.d $fa2, $a5 + movgr2fr.d $fa3, $a4 + movgr2fr.d $fa4, $a3 + movgr2fr.d $fa7, $a2 .LBB1_3: # %.peel.next fmul.d $fa5, $fa1, $fa2 ftintrz.w.d $fa5, $fa5 @@ -235,17 +204,17 @@ create_seq: # @create_seq ffint.d.w $ft2, $ft2 fmadd.d $ft0, $fa6, $ft2, $ft0 fmul.d $fa0, $fa1, $fa0 - fmadd.d $ft0, $fa3, $ft0, $fa0 + fmadd.d $fa0, $fa3, $ft0, $fa0 + fmul.d $ft0, $fa4, $fa0 + ftintrz.w.d $ft0, $ft0 + movfr2gr.s $a0, $ft0 + movgr2fr.w $ft0, $a0 + ffint.d.w $ft0, $ft0 + fmadd.d $ft0, $fa7, $ft0, $fa0 fmul.d $fa0, $fa4, $ft0 - ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a0, $fa0 - movgr2fr.w $fa0, $a0 - ffint.d.w $ft2, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI1_4) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_4) - fmadd.d $ft0, $fa7, $ft2, $ft0 - fmul.d $ft2, $fa4, $ft0 - fadd.d $ft1, $ft1, $ft2 + fadd.d $ft1, $ft1, $fa0 + lu52i.d $a0, $zero, 1042 + movgr2fr.d $fa0, $a0 fmul.d $ft1, $ft1, $fa0 ftintrz.w.d $ft1, $ft1 movfr2gr.s $a0, $ft1 @@ -878,14 +847,7 @@ rank: # @rank .Lfunc_end4: .size rank, .Lfunc_end4-rank # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI5_0: - .dword 0x41b2b9b0a1000000 # double 314159265 -.LCPI5_1: - .dword 0x41d2309ce5400000 # double 1220703125 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -927,10 +889,14 @@ main: # @main ori $a1, $zero, 10 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) - pcalau12i $a0, %pc_hi20(.LCPI5_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI5_1) + lu12i.w $a0, -389120 + lu32i.d $a0, 178608 + lu52i.d $a0, $a0, 1051 + movgr2fr.d $fa0, $a0 + lu12i.w $a0, -109568 + lu32i.d $a0, 143516 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fa1, $a0 pcaddu18i $ra, %call36(create_seq) jirl $ra, $ra, 0 ori $a0, $zero, 1 diff --git a/results/MultiSource/Benchmarks/Olden/bh/CMakeFiles/bh.dir/newbh.s b/results/MultiSource/Benchmarks/Olden/bh/CMakeFiles/bh.dir/newbh.s index b5925b57..4b9da8d2 100644 --- a/results/MultiSource/Benchmarks/Olden/bh/CMakeFiles/bh.dir/newbh.s +++ b/results/MultiSource/Benchmarks/Olden/bh/CMakeFiles/bh.dir/newbh.s @@ -31,12 +31,6 @@ main: # @main .LCPI1_0: .dword 0xc000000000000000 # double -2 .dword 0x4010000000000000 # double 4 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI1_1: - .dword 0x3f8999999999999a # double 0.012500000000000001 -.LCPI1_2: - .dword 0x4000028f5c28f5c3 # double 2.0012500000000002 .text .globl old_main .p2align 5 @@ -237,12 +231,18 @@ old_main: # @old_main addi.d $s3, $s3, 8 blt $s6, $a0, .LBB1_9 .LBB1_10: # %.preheader.preheader - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI1_1) - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fs1, $a0, %pc_lo12(.LCPI1_2) move $a0, $zero - movgr2fr.d $fs2, $zero + movgr2fr.d $fs0, $zero + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, 1016 + movgr2fr.d $fs1, $a1 + lu12i.w $a1, 377487 + ori $a1, $a1, 1475 + lu32i.d $a1, 655 + lu52i.d $a1, $a1, 1024 + movgr2fr.d $fs2, $a1 ori $s1, $zero, 9 .p2align 4, , 16 .LBB1_11: # %.preheader @@ -252,8 +252,8 @@ old_main: # @old_main move $a1, $s0 pcaddu18i $ra, %call36(stepsystem) jirl $ra, $ra, 0 - fadd.d $fs2, $fs2, $fs0 - fcmp.cule.d $fcc0, $fs1, $fs2 + fadd.d $fs0, $fs0, $fs1 + fcmp.cule.d $fcc0, $fs2, $fs0 bcnez $fcc0, .LBB1_13 # %bb.12: # %.preheader # in Loop: Header=BB1_11 Depth=1 @@ -279,22 +279,7 @@ old_main: # @old_main .Lfunc_end1: .size old_main, .Lfunc_end1-old_main # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function uniform_testdata -.LCPI2_0: - .dword 0x405ec00000000000 # double 123 -.LCPI2_1: - .dword 0x3feff7ced916872b # double 0.99899999999999999 -.LCPI2_2: - .dword 0xbfe5555555555555 # double -0.66666666666666663 -.LCPI2_3: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI2_4: - .dword 0x3ff6a09e667f3bcd # double 1.4142135623730951 -.LCPI2_5: - .dword 0x3ff4d8d7a58fa312 # double 1.3029400317411199 - .text - .globl uniform_testdata + .globl uniform_testdata # -- Begin function uniform_testdata .p2align 5 .type uniform_testdata,@function uniform_testdata: # @uniform_testdata @@ -309,14 +294,16 @@ uniform_testdata: # @uniform_testdata st.d $s4, $sp, 184 # 8-byte Folded Spill st.d $s5, $sp, 176 # 8-byte Folded Spill st.d $s6, $sp, 168 # 8-byte Folded Spill - fst.d $fs0, $sp, 160 # 8-byte Folded Spill - fst.d $fs1, $sp, 152 # 8-byte Folded Spill - fst.d $fs2, $sp, 144 # 8-byte Folded Spill - fst.d $fs3, $sp, 136 # 8-byte Folded Spill - fst.d $fs4, $sp, 128 # 8-byte Folded Spill - fst.d $fs5, $sp, 120 # 8-byte Folded Spill - fst.d $fs6, $sp, 112 # 8-byte Folded Spill - fst.d $fs7, $sp, 104 # 8-byte Folded Spill + st.d $s7, $sp, 160 # 8-byte Folded Spill + st.d $s8, $sp, 152 # 8-byte Folded Spill + fst.d $fs0, $sp, 144 # 8-byte Folded Spill + fst.d $fs1, $sp, 136 # 8-byte Folded Spill + fst.d $fs2, $sp, 128 # 8-byte Folded Spill + fst.d $fs3, $sp, 120 # 8-byte Folded Spill + fst.d $fs4, $sp, 112 # 8-byte Folded Spill + fst.d $fs5, $sp, 104 # 8-byte Folded Spill + fst.d $fs6, $sp, 96 # 8-byte Folded Spill + fst.d $fs7, $sp, 88 # 8-byte Folded Spill move $s4, $a3 move $s1, $a2 move $s2, $a1 @@ -337,46 +324,59 @@ uniform_testdata: # @uniform_testdata move $s3, $a0 blez $s1, .LBB2_12 # %bb.1: # %.lr.ph - vst $vr0, $sp, 80 # 16-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI2_0) + vst $vr0, $sp, 64 # 16-byte Folded Spill move $s6, $zero - movgr2fr.w $fa1, $s4 - ffint.d.w $fa1, $fa1 - fmul.d $fs4, $fa1, $fa0 + movgr2fr.w $fa0, $s4 + ffint.d.w $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -81920 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa1, $a0 + fmul.d $fs4, $fa0, $fa1 bstrpick.d $a0, $s1, 31, 0 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 frecip.d $fa0, $fa0 - fst.d $fa0, $sp, 48 # 8-byte Folded Spill - movgr2fr.d $fs0, $zero - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI2_1) - pcalau12i $a0, %pc_hi20(.LCPI2_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI2_2) fst.d $fa0, $sp, 32 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI2_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI2_3) - pcalau12i $a0, %pc_hi20(.LCPI2_4) - fld.d $fa0, $a0, %pc_lo12(.LCPI2_4) - fst.d $fa0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI2_5) - fld.d $fa0, $a0, %pc_lo12(.LCPI2_5) + movgr2fr.d $fs0, $zero + lu12i.w $a0, -159384 + ori $a0, $a0, 1835 + lu32i.d $a0, -2098 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, 349525 + ori $a0, $a0, 1365 + lu32i.d $a0, 349525 + lu52i.d $a0, $a0, -1026 + movgr2fr.d $fa0, $a0 fst.d $fa0, $sp, 16 # 8-byte Folded Spill + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, 419827 + ori $a0, $a0, 3021 + lu32i.d $a0, 434334 + lu52i.d $s4, $a0, 1023 + lu12i.w $a0, -370438 + ori $a0, $a0, 786 + lu32i.d $a0, 317655 + lu52i.d $s7, $a0, 1023 fmov.d $fa1, $fs0 fmov.d $fa2, $fs0 fmov.d $fs2, $fs0 fmov.d $fs5, $fs0 move $s3, $s0 - fst.d $fs1, $sp, 40 # 8-byte Folded Spill + fst.d $fs1, $sp, 24 # 8-byte Folded Spill .p2align 4, , 16 .LBB2_2: # %.preheader99 # =>This Loop Header: Depth=1 # Child Loop BB2_4 Depth 2 # Child Loop BB2_6 Depth 2 - fst.d $fa2, $sp, 64 # 8-byte Folded Spill - fst.d $fa1, $sp, 72 # 8-byte Folded Spill - move $s4, $s3 + fst.d $fa2, $sp, 48 # 8-byte Folded Spill + fst.d $fa1, $sp, 56 # 8-byte Folded Spill + move $s8, $s3 ori $a0, $zero, 144 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 @@ -384,9 +384,9 @@ uniform_testdata: # @uniform_testdata st.w $s2, $a0, 40 st.d $zero, $a0, 136 st.w $s2, $a0, 44 - st.d $a0, $s4, 128 + st.d $a0, $s8, 128 st.h $s5, $a0, 0 - fld.d $fa0, $sp, 48 # 8-byte Folded Reload + fld.d $fa0, $sp, 32 # 8-byte Folded Reload fst.d $fa0, $a0, 8 fmov.d $fa0, $fs4 pcaddu18i $ra, %call36(my_rand) @@ -397,7 +397,7 @@ uniform_testdata: # @uniform_testdata fmov.d $fa2, $fs4 pcaddu18i $ra, %call36(xrand) jirl $ra, $ra, 0 - fld.d $fa1, $sp, 32 # 8-byte Folded Reload + fld.d $fa1, $sp, 16 # 8-byte Folded Reload pcaddu18i $ra, %call36(pow) jirl $ra, $ra, 0 vldi $vr1, -784 @@ -445,11 +445,11 @@ uniform_testdata: # @uniform_testdata vldi $vr0, -1008 fmul.d $fa0, $fs6, $fa0 fst.d $fa0, $s3, 32 - vld $vr2, $sp, 80 # 16-byte Folded Reload + vld $vr2, $sp, 64 # 16-byte Folded Reload vfadd.d $vr2, $vr2, $vr1 - vst $vr2, $sp, 80 # 16-byte Folded Spill + vst $vr2, $sp, 64 # 16-byte Folded Spill fadd.d $fs5, $fs5, $fa0 - fst.d $fs5, $sp, 56 # 8-byte Folded Spill + fst.d $fs5, $sp, 40 # 8-byte Folded Spill .p2align 4, , 16 .LBB2_4: # %.preheader98 # Parent Loop BB2_2 Depth=1 @@ -528,24 +528,24 @@ uniform_testdata: # @uniform_testdata fld.d $fs6, $s3, 56 fmadd.d $fa1, $fs7, $fs7, $fs0 fmadd.d $fa1, $fs6, $fs6, $fa1 - fmadd.d $fa1, $fa0, $fa0, $fa1 - vldi $vr2, -912 - fcmp.clt.d $fcc0, $fa2, $fa1 + fmadd.d $fa2, $fa0, $fa0, $fa1 + vldi $vr1, -912 + fcmp.clt.d $fcc0, $fa1, $fa2 fst.d $fa0, $s3, 64 bcnez $fcc0, .LBB2_6 # %bb.7: # %.preheader97 # in Loop: Header=BB2_2 Depth=1 - fsqrt.d $fa2, $fa1 - fcmp.cor.d $fcc0, $fa2, $fa2 + fsqrt.d $fa1, $fa2 + fcmp.cor.d $fcc0, $fa1, $fa1 bceqz $fcc0, .LBB2_10 .LBB2_8: # %.preheader97.split # in Loop: Header=BB2_2 Depth=1 - fld.d $fa1, $sp, 24 # 8-byte Folded Reload - fmul.d $fa1, $fs5, $fa1 - fdiv.d $fa1, $fa1, $fs1 - fld.d $fa3, $sp, 16 # 8-byte Folded Reload - fmul.d $fa1, $fa1, $fa3 - fdiv.d $fa1, $fa1, $fa2 + movgr2fr.d $fa2, $s4 + fmul.d $fa2, $fs5, $fa2 + fdiv.d $fa2, $fa2, $fs1 + movgr2fr.d $fa3, $s7 + fmul.d $fa2, $fa2, $fa3 + fdiv.d $fa1, $fa2, $fa1 fmul.d $fa2, $fa1, $fs7 fst.d $fa2, $s3, 48 fmul.d $fa3, $fa1, $fs6 @@ -553,13 +553,13 @@ uniform_testdata: # @uniform_testdata fmul.d $fa0, $fa1, $fa0 fst.d $fa0, $s3, 64 fadd.d $fs2, $fs2, $fa2 - fld.d $fa2, $sp, 64 # 8-byte Folded Reload + fld.d $fa2, $sp, 48 # 8-byte Folded Reload fadd.d $fa2, $fa2, $fa3 addi.w $s6, $s6, 1 - fld.d $fa1, $sp, 72 # 8-byte Folded Reload + fld.d $fa1, $sp, 56 # 8-byte Folded Reload fadd.d $fa1, $fa1, $fa0 - fld.d $fs1, $sp, 40 # 8-byte Folded Reload - fld.d $fs5, $sp, 56 # 8-byte Folded Reload + fld.d $fs1, $sp, 24 # 8-byte Folded Reload + fld.d $fs5, $sp, 40 # 8-byte Folded Reload bne $s6, $s1, .LBB2_2 b .LBB2_11 .LBB2_9: # %call.sqrt @@ -570,14 +570,14 @@ uniform_testdata: # @uniform_testdata .LBB2_10: # %call.sqrt176 # in Loop: Header=BB2_2 Depth=1 fst.d $fa0, $sp, 8 # 8-byte Folded Spill - fmov.d $fa0, $fa1 + fmov.d $fa0, $fa2 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - fmov.d $fa2, $fa0 + fmov.d $fa1, $fa0 fld.d $fa0, $sp, 8 # 8-byte Folded Reload b .LBB2_8 .LBB2_11: # %._crit_edge.loopexit - vld $vr0, $sp, 80 # 16-byte Folded Reload + vld $vr0, $sp, 64 # 16-byte Folded Reload vst $vr0, $fp, 0 fst.d $fs5, $fp, 16 fst.d $fs2, $fp, 24 @@ -588,14 +588,16 @@ uniform_testdata: # @uniform_testdata ld.d $a0, $s0, 128 st.d $a0, $fp, 48 st.d $s3, $fp, 56 - fld.d $fs7, $sp, 104 # 8-byte Folded Reload - fld.d $fs6, $sp, 112 # 8-byte Folded Reload - fld.d $fs5, $sp, 120 # 8-byte Folded Reload - fld.d $fs4, $sp, 128 # 8-byte Folded Reload - fld.d $fs3, $sp, 136 # 8-byte Folded Reload - fld.d $fs2, $sp, 144 # 8-byte Folded Reload - fld.d $fs1, $sp, 152 # 8-byte Folded Reload - fld.d $fs0, $sp, 160 # 8-byte Folded Reload + fld.d $fs7, $sp, 88 # 8-byte Folded Reload + fld.d $fs6, $sp, 96 # 8-byte Folded Reload + fld.d $fs5, $sp, 104 # 8-byte Folded Reload + fld.d $fs4, $sp, 112 # 8-byte Folded Reload + fld.d $fs3, $sp, 120 # 8-byte Folded Reload + fld.d $fs2, $sp, 128 # 8-byte Folded Reload + fld.d $fs1, $sp, 136 # 8-byte Folded Reload + fld.d $fs0, $sp, 144 # 8-byte Folded Reload + ld.d $s8, $sp, 152 # 8-byte Folded Reload + ld.d $s7, $sp, 160 # 8-byte Folded Reload ld.d $s6, $sp, 168 # 8-byte Folded Reload ld.d $s5, $sp, 176 # 8-byte Folded Reload ld.d $s4, $sp, 184 # 8-byte Folded Reload @@ -610,12 +612,7 @@ uniform_testdata: # @uniform_testdata .Lfunc_end2: .size uniform_testdata, .Lfunc_end2-uniform_testdata # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function intcoord -.LCPI3_0: - .dword 0x41d0000000000000 # double 1073741824 - .text - .globl intcoord + .globl intcoord # -- Begin function intcoord .p2align 5 .type intcoord,@function intcoord: # @intcoord @@ -629,26 +626,26 @@ intcoord: # @intcoord fdiv.d $fa0, $fa0, $fa1 movgr2fr.d $fa2, $zero fcmp.cle.d $fcc0, $fa2, $fa0 - vldi $vr3, -912 fld.d $fa5, $a1, 8 + vldi $vr3, -912 fcmp.clt.d $fcc1, $fa0, $fa3 fld.d $fa3, $a0, 32 movcf2gr $a0, $fcc0 movcf2gr $a3, $fcc1 - and $a0, $a0, $a3 fsub.d $fa4, $fa4, $fa5 fdiv.d $fa4, $fa4, $fa1 fcmp.cult.d $fcc0, $fa4, $fa2 - pcalau12i $a3, %pc_hi20(.LCPI3_0) + and $a0, $a0, $a3 bcnez $fcc0, .LBB3_3 # %bb.1: vldi $vr5, -912 fcmp.cule.d $fcc0, $fa5, $fa4 - move $a4, $a2 + move $a3, $a2 bcnez $fcc0, .LBB3_4 # %bb.2: - fld.d $fa5, $a3, %pc_lo12(.LCPI3_0) - slli.d $a4, $a0, 32 + slli.d $a3, $a0, 32 + lu52i.d $a2, $zero, 1053 + movgr2fr.d $fa5, $a2 fmul.d $fa4, $fa4, $fa5 vreplvei.d $vr4, $vr4, 0 vfrintrm.d $vr4, $vr4 @@ -657,7 +654,7 @@ intcoord: # @intcoord slli.d $a2, $a2, 32 b .LBB3_4 .LBB3_3: - move $a4, $a2 + move $a3, $a2 .LBB3_4: fld.d $fa4, $a1, 16 fsub.d $fa3, $fa3, $fa4 @@ -670,16 +667,18 @@ intcoord: # @intcoord fcmp.cule.d $fcc0, $fa2, $fa1 bcnez $fcc0, .LBB3_7 # %bb.6: - fld.d $fa2, $a3, %pc_lo12(.LCPI3_0) + lu52i.d $a1, $zero, 1053 + movgr2fr.d $fa2, $a1 fmul.d $fa1, $fa1, $fa2 vreplvei.d $vr1, $vr1, 0 vfrintrm.d $vr1, $vr1 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a1, $fa1 bstrpick.d $a1, $a1, 31, 0 - or $a1, $a4, $a1 + or $a1, $a3, $a1 .LBB3_7: - fld.d $fa1, $a3, %pc_lo12(.LCPI3_0) + lu52i.d $a3, $zero, 1053 + movgr2fr.d $fa1, $a3 fmul.d $fa0, $fa0, $fa1 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 @@ -860,12 +859,7 @@ freetree1: # @freetree1 .Lfunc_end7: .size freetree1, .Lfunc_end7-freetree1 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function maketree -.LCPI8_0: - .dword 0x41d0000000000000 # double 1073741824 - .text - .globl maketree + .globl maketree # -- Begin function maketree .p2align 5 .type maketree,@function maketree: # @maketree @@ -892,9 +886,9 @@ maketree: # @maketree # %bb.1: # %.lr.ph31 addi.d $s3, $fp, 552 ori $s4, $zero, 2 - pcalau12i $a1, %pc_hi20(.LCPI8_0) - fld.d $fs0, $a1, %pc_lo12(.LCPI8_0) - movgr2fr.d $fs1, $zero + movgr2fr.d $fs0, $zero + lu52i.d $a1, $zero, 1053 + movgr2fr.d $fs1, $a1 lu12i.w $s0, 131072 b .LBB8_3 .p2align 4, , 16 @@ -912,7 +906,7 @@ maketree: # @maketree .p2align 4, , 16 .LBB8_4: # %intcoord.exit # in Loop: Header=BB8_6 Depth=2 - fmul.d $fa0, $fa0, $fs0 + fmul.d $fa0, $fa0, $fs1 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 ftintrz.w.d $fa0, $fa0 @@ -934,7 +928,7 @@ maketree: # @maketree # Parent Loop BB8_3 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa0, $s1, 8 - fcmp.ceq.d $fcc0, $fa0, $fs1 + fcmp.ceq.d $fcc0, $fa0, $fs0 bcnez $fcc0, .LBB8_5 # %bb.7: # in Loop: Header=BB8_6 Depth=2 move $a0, $s1 @@ -949,14 +943,14 @@ maketree: # @maketree fsub.d $fa0, $fa0, $fa2 fdiv.d $fa0, $fa0, $fa1 fld.d $fa2, $fp, 8 - fcmp.cle.d $fcc0, $fs1, $fa0 + fcmp.cle.d $fcc0, $fs0, $fa0 fcmp.clt.d $fcc1, $fa0, $fa4 move $a1, $zero movcf2gr $a0, $fcc0 movcf2gr $a2, $fcc1 fsub.d $fa2, $fa3, $fa2 fdiv.d $fa2, $fa2, $fa1 - fcmp.cult.d $fcc0, $fa2, $fs1 + fcmp.cult.d $fcc0, $fa2, $fs0 and $a0, $a0, $a2 bcnez $fcc0, .LBB8_10 # %bb.8: # in Loop: Header=BB8_6 Depth=2 @@ -965,7 +959,7 @@ maketree: # @maketree bcnez $fcc0, .LBB8_11 # %bb.9: # in Loop: Header=BB8_6 Depth=2 slli.d $a3, $a0, 32 - fmul.d $fa2, $fa2, $fs0 + fmul.d $fa2, $fa2, $fs1 vreplvei.d $vr2, $vr2, 0 vfrintrm.d $vr2, $vr2 ftintrz.w.d $fa2, $fa2 @@ -979,14 +973,14 @@ maketree: # @maketree fld.d $fa3, $fp, 16 fsub.d $fa2, $fa2, $fa3 fdiv.d $fa1, $fa2, $fa1 - fcmp.cult.d $fcc0, $fa1, $fs1 + fcmp.cult.d $fcc0, $fa1, $fs0 move $a2, $zero bcnez $fcc0, .LBB8_4 # %bb.12: # in Loop: Header=BB8_6 Depth=2 fcmp.cule.d $fcc0, $fa4, $fa1 bcnez $fcc0, .LBB8_4 # %bb.13: # in Loop: Header=BB8_6 Depth=2 - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 vreplvei.d $vr1, $vr1, 0 vfrintrm.d $vr1, $vr1 ftintrz.w.d $fa1, $fa1 @@ -1109,16 +1103,7 @@ computegrav: # @computegrav .Lfunc_end9: .size computegrav, .Lfunc_end9-computegrav # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function vp -.LCPI10_0: - .dword 0x3f7999999999999a # double 0.0062500000000000003 -.LCPI10_1: - .dword 0x40c3880000000000 # double 1.0E+4 -.LCPI10_2: - .dword 0x3f8999999999999a # double 0.012500000000000001 - .text - .globl vp + .globl vp # -- Begin function vp .p2align 5 .type vp,@function vp: # @vp @@ -1127,18 +1112,19 @@ vp: # @vp st.d $ra, $sp, 8 # 8-byte Folded Spill beqz $a0, .LBB10_26 # %bb.1: # %.preheader128.lr.ph - pcalau12i $a2, %pc_hi20(.LCPI10_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI10_0) lu12i.w $a2, -419431 ori $a2, $a2, 2458 lu32i.d $a2, -419431 - pcalau12i $a3, %pc_hi20(.LCPI10_1) - fld.d $fa1, $a3, %pc_lo12(.LCPI10_1) - pcalau12i $a3, %pc_hi20(.LCPI10_2) - fld.d $fa2, $a3, %pc_lo12(.LCPI10_2) - lu52i.d $a2, $a2, 1015 - vreplgr2vr.d $vr3, $a2 - vldi $vr4, -988 + lu52i.d $a3, $a2, 1015 + movgr2fr.d $fa0, $a3 + vreplgr2vr.d $vr1, $a3 + vldi $vr2, -988 + ori $a3, $zero, 0 + lu32i.d $a3, 231424 + lu52i.d $a3, $a3, 1036 + movgr2fr.d $fa3, $a3 + lu52i.d $a2, $a2, 1016 + movgr2fr.d $fa4, $a2 .p2align 4, , 16 .LBB10_2: # %.preheader128 # =>This Inner Loop Header: Depth=1 @@ -1155,7 +1141,7 @@ vp: # @vp vld $vr10, $a0, 48 fadd.d $fa7, $ft0, $fa7 vfsub.d $vr8, $vr5, $vr9 - vfmul.d $vr8, $vr8, $vr3 + vfmul.d $vr8, $vr8, $vr1 vfadd.d $vr8, $vr10, $vr8 vst $vr8, $a0, 48 fst.d $fa7, $a0, 64 @@ -1174,15 +1160,15 @@ vp: # @vp bceqz $fcc0, .LBB10_29 # %bb.7: # in Loop: Header=BB10_2 Depth=1 fabs.d $ft2, $fa7 - fcmp.clt.d $fcc0, $ft2, $fa4 + fcmp.clt.d $fcc0, $ft2, $fa2 bceqz $fcc0, .LBB10_43 # %bb.8: # in Loop: Header=BB10_2 Depth=1 fabs.d $ft2, $ft0 - fcmp.clt.d $fcc0, $ft2, $fa4 + fcmp.clt.d $fcc0, $ft2, $fa2 bceqz $fcc0, .LBB10_42 # %bb.9: # in Loop: Header=BB10_2 Depth=1 fabs.d $ft2, $ft1 - fcmp.cule.d $fcc0, $fa4, $ft2 + fcmp.cule.d $fcc0, $fa2, $ft2 bcnez $fcc0, .LBB10_41 # %bb.10: # %.preheader123 # in Loop: Header=BB10_2 Depth=1 @@ -1200,15 +1186,15 @@ vp: # @vp bceqz $fcc0, .LBB10_32 # %bb.13: # in Loop: Header=BB10_2 Depth=1 fabs.d $ft2, $ft2 - fcmp.clt.d $fcc0, $ft2, $fa1 + fcmp.clt.d $fcc0, $ft2, $fa3 bceqz $fcc0, .LBB10_40 # %bb.14: # in Loop: Header=BB10_2 Depth=1 fabs.d $ft2, $ft3 - fcmp.clt.d $fcc0, $ft2, $fa1 + fcmp.clt.d $fcc0, $ft2, $fa3 bceqz $fcc0, .LBB10_39 # %bb.15: # in Loop: Header=BB10_2 Depth=1 fabs.d $ft2, $fa6 - fcmp.cule.d $fcc0, $fa1, $ft2 + fcmp.cule.d $fcc0, $fa3, $ft2 bcnez $fcc0, .LBB10_38 # %bb.16: # %.preheader122.preheader # in Loop: Header=BB10_2 Depth=1 @@ -1226,26 +1212,26 @@ vp: # @vp bceqz $fcc0, .LBB10_35 # %bb.19: # in Loop: Header=BB10_2 Depth=1 fabs.d $ft4, $ft4 - fcmp.clt.d $fcc0, $ft4, $fa1 + fcmp.clt.d $fcc0, $ft4, $fa3 bceqz $fcc0, .LBB10_37 # %bb.20: # in Loop: Header=BB10_2 Depth=1 fabs.d $ft4, $ft5 - fcmp.clt.d $fcc0, $ft4, $fa1 + fcmp.clt.d $fcc0, $ft4, $fa3 bceqz $fcc0, .LBB10_36 # %bb.21: # in Loop: Header=BB10_2 Depth=1 fabs.d $ft4, $ft3 - fcmp.clt.d $fcc0, $ft4, $fa1 + fcmp.clt.d $fcc0, $ft4, $fa3 bceqz $fcc0, .LBB10_47 # %bb.22: # in Loop: Header=BB10_2 Depth=1 - vfmul.d $vr12, $vr5, $vr3 + vfmul.d $vr12, $vr5, $vr1 fmul.d $ft5, $fa6, $fa0 vfadd.d $vr10, $vr10, $vr12 fadd.d $ft3, $ft3, $ft5 vreplvei.d $vr5, $vr10, 0 - fmul.d $fa5, $fa5, $fa2 + fmul.d $fa5, $fa5, $fa4 vreplvei.d $vr6, $vr10, 1 - fmul.d $fa6, $fa6, $fa2 - fmul.d $ft6, $ft3, $fa2 + fmul.d $fa6, $fa6, $fa4 + fmul.d $ft6, $ft3, $fa4 fadd.d $fa7, $fa7, $fa5 fadd.d $fa6, $ft0, $fa6 fadd.d $fa5, $ft1, $ft6 @@ -1256,16 +1242,16 @@ vp: # @vp vst $vr8, $a0, 48 fadd.d $ft0, $ft3, $ft5 fabs.d $fa7, $fa7 - fcmp.clt.d $fcc0, $fa7, $fa1 + fcmp.clt.d $fcc0, $fa7, $fa3 fst.d $ft0, $a0, 64 bceqz $fcc0, .LBB10_46 # %bb.23: # in Loop: Header=BB10_2 Depth=1 fabs.d $fa6, $fa6 - fcmp.clt.d $fcc0, $fa6, $fa1 + fcmp.clt.d $fcc0, $fa6, $fa3 bceqz $fcc0, .LBB10_45 # %bb.24: # in Loop: Header=BB10_2 Depth=1 fabs.d $fa5, $fa5 - fcmp.clt.d $fcc0, $fa5, $fa1 + fcmp.clt.d $fcc0, $fa5, $fa3 bceqz $fcc0, .LBB10_44 # %bb.25: # in Loop: Header=BB10_2 Depth=1 ld.d $a0, $a0, 136 @@ -1767,12 +1753,7 @@ hackgrav: # @hackgrav .Lfunc_end17: .size hackgrav, .Lfunc_end17-hackgrav # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gravsub -.LCPI18_0: - .dword 0x3f647ae147ae147c # double 0.0025000000000000005 - .text - .globl gravsub + .globl gravsub # -- Begin function gravsub .p2align 5 .type gravsub,@function gravsub: # @gravsub @@ -1793,11 +1774,14 @@ gravsub: # @gravsub vreplvei.d $vr0, $vr3, 0 movgr2fr.d $fa1, $zero fmadd.d $fa0, $fa0, $fa0, $fa1 - pcalau12i $a3, %pc_hi20(.LCPI18_0) - fld.d $fa1, $a3, %pc_lo12(.LCPI18_0) - vreplvei.d $vr2, $vr3, 1 - fmadd.d $fa0, $fa2, $fa2, $fa0 + vreplvei.d $vr1, $vr3, 1 + fmadd.d $fa0, $fa1, $fa1, $fa0 fmadd.d $fa0, $fs1, $fs1, $fa0 + lu12i.w $a3, 293601 + ori $a3, $a3, 1148 + lu32i.d $a3, 293601 + lu52i.d $a3, $a3, 1014 + movgr2fr.d $fa1, $a3 fadd.d $fs0, $fa0, $fa1 fsqrt.d $fa0, $fs0 fcmp.cor.d $fcc0, $fa0, $fa0 @@ -1884,14 +1868,7 @@ subdivp: # @subdivp .Lfunc_end19: .size subdivp, .Lfunc_end19-subdivp # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function expandbox -.LCPI20_0: - .dword 0x408f400000000000 # double 1000 -.LCPI20_1: - .dword 0x41d0000000000000 # double 1073741824 - .text - .globl expandbox + .globl expandbox # -- Begin function expandbox .p2align 5 .type expandbox,@function expandbox: # @expandbox @@ -1946,21 +1923,23 @@ expandbox: # @expandbox fcmp.clt.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB20_31 .LBB20_6: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI20_0) - fld.d $fs4, $a0, %pc_lo12(.LCPI20_0) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs4, $a0 fcmp.cule.d $fcc0, $fs4, $fs1 bcnez $fcc0, .LBB20_32 # %bb.7: # %.preheader52.preheader.preheader vldi $vr7, -928 pcalau12i $s1, %pc_hi20(cp_free_list) ori $s2, $zero, 2 - pcalau12i $a0, %pc_hi20(.LCPI20_1) - fld.d $ft0, $a0, %pc_lo12(.LCPI20_1) - vrepli.b $vr9, 0 - vldi $vr10, -912 + vrepli.b $vr8, 0 + vldi $vr9, -912 + lu52i.d $a0, $zero, 1053 + movgr2fr.d $ft2, $a0 lu12i.w $s3, 131072 - fst.d $ft0, $sp, 32 # 8-byte Folded Spill - vst $vr9, $sp, 16 # 16-byte Folded Spill + vst $vr8, $sp, 16 # 16-byte Folded Spill + fst.d $ft2, $sp, 8 # 8-byte Folded Spill b .LBB20_9 .p2align 4, , 16 .LBB20_8: # %.backedge @@ -2026,9 +2005,9 @@ expandbox: # @expandbox fst.d $fa6, $sp, 40 # 8-byte Folded Spill pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - vldi $vr10, -912 - vld $vr9, $sp, 16 # 16-byte Folded Reload - fld.d $ft0, $sp, 32 # 8-byte Folded Reload + fld.d $ft2, $sp, 8 # 8-byte Folded Reload + vldi $vr9, -912 + vld $vr8, $sp, 16 # 16-byte Folded Reload vldi $vr7, -928 fld.d $fa6, $sp, 40 # 8-byte Folded Reload .LBB20_19: # %cell_alloc.exit @@ -2036,14 +2015,14 @@ expandbox: # @expandbox move $a1, $zero st.h $s2, $a0, 0 st.w $zero, $a0, 40 - vst $vr9, $a0, 48 - vst $vr9, $a0, 64 - vst $vr9, $a0, 80 + vst $vr8, $a0, 48 + vst $vr8, $a0, 64 + vst $vr8, $a0, 80 fsub.d $fa0, $fs5, $fs3 fdiv.d $fa0, $fa0, $fs1 fcmp.cult.d $fcc0, $fa0, $fs2 - fcmp.cule.d $fcc1, $ft2, $fa0 - vst $vr9, $a0, 96 + fcmp.cule.d $fcc1, $ft1, $fa0 + vst $vr8, $a0, 96 movcf2gr $a2, $fcc0 movcf2gr $a3, $fcc1 or $a2, $a2, $a3 @@ -2054,10 +2033,10 @@ expandbox: # @expandbox bcnez $fcc0, .LBB20_22 # %bb.20: # %cell_alloc.exit # in Loop: Header=BB20_9 Depth=1 - fcmp.cule.d $fcc0, $ft2, $fa1 + fcmp.cule.d $fcc0, $ft1, $fa1 bcnez $fcc0, .LBB20_22 # %bb.21: # in Loop: Header=BB20_9 Depth=1 - fmul.d $fa1, $fa1, $ft0 + fmul.d $fa1, $fa1, $ft2 vreplvei.d $vr1, $vr1, 0 vfrintrm.d $vr1, $vr1 ftintrz.w.d $fa1, $fa1 @@ -2071,18 +2050,18 @@ expandbox: # @expandbox fcmp.cult.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB20_33 # %bb.23: # in Loop: Header=BB20_9 Depth=1 - fcmp.cule.d $fcc0, $ft2, $fa1 + fcmp.cule.d $fcc0, $ft1, $fa1 bcnez $fcc0, .LBB20_33 # %bb.24: # in Loop: Header=BB20_9 Depth=1 bnez $a3, .LBB20_33 # %bb.25: # in Loop: Header=BB20_9 Depth=1 addi.d $a3, $a0, 48 - fmul.d $fa1, $fa1, $ft0 + fmul.d $fa1, $fa1, $ft2 vreplvei.d $vr1, $vr1, 0 vfrintrm.d $vr1, $vr1 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a4, $fa1 - fmul.d $fa0, $fa0, $ft0 + fmul.d $fa0, $fa0, $ft2 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 ftintrz.w.d $fa0, $fa0 @@ -2103,7 +2082,7 @@ expandbox: # @expandbox st.d $a0, $fp, 32 bcnez $fcc0, .LBB20_8 # %bb.26: # in Loop: Header=BB20_9 Depth=1 - fcmp.cule.d $fcc0, $ft2, $fa0 + fcmp.cule.d $fcc0, $ft1, $fa0 bcnez $fcc0, .LBB20_8 # %bb.27: # in Loop: Header=BB20_9 Depth=1 fld.d $fa0, $s0, 24 @@ -2112,7 +2091,7 @@ expandbox: # @expandbox fcmp.cult.d $fcc0, $fa0, $fs2 bcnez $fcc0, .LBB20_8 # %bb.28: # in Loop: Header=BB20_9 Depth=1 - fcmp.cule.d $fcc0, $ft2, $fa0 + fcmp.cule.d $fcc0, $ft1, $fa0 bcnez $fcc0, .LBB20_8 # %bb.29: # in Loop: Header=BB20_9 Depth=1 fld.d $fa0, $s0, 16 @@ -2121,7 +2100,7 @@ expandbox: # @expandbox fcmp.cult.d $fcc0, $fa0, $fs2 bcnez $fcc0, .LBB20_8 # %bb.30: # in Loop: Header=BB20_9 Depth=1 - fcmp.clt.d $fcc0, $fa0, $ft2 + fcmp.clt.d $fcc0, $fa0, $ft1 bceqz $fcc0, .LBB20_8 .LBB20_31: # %._crit_edge fld.d $fs7, $sp, 48 # 8-byte Folded Reload @@ -2494,12 +2473,7 @@ ic_test: # @ic_test .Lfunc_end23: .size ic_test, .Lfunc_end23-ic_test # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function intcoord1 -.LCPI24_0: - .dword 0x41d0000000000000 # double 1073741824 - .text - .globl intcoord1 + .globl intcoord1 # -- Begin function intcoord1 .p2align 5 .type intcoord1,@function intcoord1: # @intcoord1 @@ -2508,28 +2482,28 @@ intcoord1: # @intcoord1 fld.d $fa3, $a0, 24 fsub.d $fa0, $fa0, $fa4 fdiv.d $fa0, $fa0, $fa3 - movgr2fr.d $fa4, $zero - vldi $vr5, -912 - fcmp.clt.d $fcc0, $fa0, $fa5 + vldi $vr4, -912 + fcmp.clt.d $fcc0, $fa0, $fa4 fld.d $fa5, $a0, 8 + movgr2fr.d $fa4, $zero fcmp.cle.d $fcc1, $fa4, $fa0 move $a3, $zero movcf2gr $a1, $fcc1 movcf2gr $a2, $fcc0 - and $a2, $a1, $a2 fsub.d $fa1, $fa1, $fa5 fdiv.d $fa1, $fa1, $fa3 fcmp.cult.d $fcc0, $fa1, $fa4 - pcalau12i $a4, %pc_hi20(.LCPI24_0) + and $a2, $a1, $a2 bcnez $fcc0, .LBB24_3 # %bb.1: vldi $vr5, -912 fcmp.cule.d $fcc0, $fa5, $fa1 - move $a5, $a3 + move $a4, $a3 bcnez $fcc0, .LBB24_4 # %bb.2: - fld.d $fa5, $a4, %pc_lo12(.LCPI24_0) - slli.d $a5, $a2, 32 + slli.d $a4, $a2, 32 + lu52i.d $a1, $zero, 1053 + movgr2fr.d $fa5, $a1 fmul.d $fa1, $fa1, $fa5 vreplvei.d $vr1, $vr1, 0 vfrintrm.d $vr1, $vr1 @@ -2538,7 +2512,7 @@ intcoord1: # @intcoord1 slli.d $a3, $a1, 32 b .LBB24_4 .LBB24_3: - move $a5, $a3 + move $a4, $a3 .LBB24_4: fld.d $fa1, $a0, 16 fsub.d $fa1, $fa2, $fa1 @@ -2551,16 +2525,18 @@ intcoord1: # @intcoord1 fcmp.cule.d $fcc0, $fa2, $fa1 bcnez $fcc0, .LBB24_7 # %bb.6: - fld.d $fa2, $a4, %pc_lo12(.LCPI24_0) + lu52i.d $a0, $zero, 1053 + movgr2fr.d $fa2, $a0 fmul.d $fa1, $fa1, $fa2 vreplvei.d $vr1, $vr1, 0 vfrintrm.d $vr1, $vr1 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a0, $fa1 bstrpick.d $a0, $a0, 31, 0 - or $a1, $a5, $a0 + or $a1, $a4, $a0 .LBB24_7: - fld.d $fa1, $a4, %pc_lo12(.LCPI24_0) + lu52i.d $a0, $zero, 1053 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 @@ -2573,12 +2549,7 @@ intcoord1: # @intcoord1 .Lfunc_end24: .size intcoord1, .Lfunc_end24-intcoord1 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function subindex -.LCPI25_0: - .dword 0x41d0000000000000 # double 1073741824 - .text - .globl subindex + .globl subindex # -- Begin function subindex .p2align 5 .type subindex,@function subindex: # @subindex @@ -2620,8 +2591,8 @@ subindex: # @subindex fcmp.clt.d $fcc0, $fa1, $fa2 bceqz $fcc0, .LBB25_9 # %bb.6: - pcalau12i $a0, %pc_hi20(.LCPI25_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI25_0) + lu52i.d $a0, $zero, 1053 + movgr2fr.d $fa2, $a0 fmul.d $fa3, $fa3, $fa2 vreplvei.d $vr3, $vr3, 0 vfrintrm.d $vr3, $vr3 diff --git a/results/MultiSource/Benchmarks/Olden/bh/CMakeFiles/bh.dir/util.s b/results/MultiSource/Benchmarks/Olden/bh/CMakeFiles/bh.dir/util.s index 798bb559..f8c696a2 100644 --- a/results/MultiSource/Benchmarks/Olden/bh/CMakeFiles/bh.dir/util.s +++ b/results/MultiSource/Benchmarks/Olden/bh/CMakeFiles/bh.dir/util.s @@ -1,49 +1,40 @@ .file "util.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function my_rand -.LCPI0_0: - .dword 0x40d069c000000000 # double 16807 -.LCPI0_1: - .dword 0x41dfffffffc00000 # double 2147483647 -.LCPI0_2: - .dword 0xc1dfffffffc00000 # double -2147483647 .text - .globl my_rand + .globl my_rand # -- Begin function my_rand .p2align 5 .type my_rand,@function my_rand: # @my_rand # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) - vldi $vr2, -912 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_1) - fmadd.d $fa0, $fa0, $fa1, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_2) - fdiv.d $fa2, $fa0, $fa3 - vreplvei.d $vr2, $vr2, 0 - vfrintrm.d $vr2, $vr2 - fmadd.d $fa0, $fa2, $fa1, $fa0 + vldi $vr1, -912 + ori $a0, $zero, 0 + lu32i.d $a0, 27072 + lu52i.d $a0, $a0, 1037 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, -1024 + lu52i.d $a1, $a0, 1053 + movgr2fr.d $fa1, $a1 + fdiv.d $fa1, $fa0, $fa1 + vreplvei.d $vr1, $vr1, 0 + vfrintrm.d $vr1, $vr1 + lu52i.d $a0, $a0, -995 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa2, $fa0 ret .Lfunc_end0: .size my_rand, .Lfunc_end0-my_rand # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function xrand -.LCPI1_0: - .dword 0x41dfffffffc00000 # double 2147483647 - .text - .globl xrand + .globl xrand # -- Begin function xrand .p2align 5 .type xrand,@function xrand: # @xrand # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_0) fsub.d $fa1, $fa1, $fa0 fmul.d $fa1, $fa1, $fa2 - fdiv.d $fa1, $fa1, $fa3 + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fa2, $a0 + fdiv.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa0, $fa1 ret .Lfunc_end1: diff --git a/results/MultiSource/Benchmarks/Olden/em3d/CMakeFiles/em3d.dir/util.s b/results/MultiSource/Benchmarks/Olden/em3d/CMakeFiles/em3d.dir/util.s index 6e804cbc..b153650e 100644 --- a/results/MultiSource/Benchmarks/Olden/em3d/CMakeFiles/em3d.dir/util.s +++ b/results/MultiSource/Benchmarks/Olden/em3d/CMakeFiles/em3d.dir/util.s @@ -64,12 +64,7 @@ gen_uniform_double: # @gen_uniform_double .Lfunc_end3: .size gen_uniform_double, .Lfunc_end3-gen_uniform_double # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function check_percent -.LCPI4_0: - .dword 0x4059000000000000 # double 100 - .text - .globl check_percent + .globl check_percent # -- Begin function check_percent .p2align 5 .type check_percent,@function check_percent: # @check_percent @@ -80,16 +75,18 @@ check_percent: # @check_percent move $fp, $a0 pcaddu18i $ra, %call36(drand48) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_0) - pcalau12i $a0, %pc_hi20(percentcheck) - ld.w $a1, $a0, %pc_lo12(percentcheck) - movgr2fr.w $fa2, $fp - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 - addi.d $a1, $a1, 1 + movgr2fr.w $fa1, $fp + ffint.d.w $fa1, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + pcalau12i $a1, %pc_hi20(percentcheck) + ld.w $a2, $a1, %pc_lo12(percentcheck) + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa2, $a0 + fdiv.d $fa1, $fa1, $fa2 + addi.d $a0, $a2, 1 fcmp.cule.d $fcc0, $fa1, $fa0 - st.w $a1, $a0, %pc_lo12(percentcheck) + st.w $a0, $a1, %pc_lo12(percentcheck) bcnez $fcc0, .LBB4_2 # %bb.1: pcalau12i $a0, %pc_hi20(numlocal) diff --git a/results/MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/health.s b/results/MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/health.s index fc3d2353..c285916b 100644 --- a/results/MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/health.s +++ b/results/MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/health.s @@ -276,16 +276,7 @@ check_patients_inside: # @check_patients_inside .Lfunc_end2: .size check_patients_inside, .Lfunc_end2-check_patients_inside # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function check_patients_assess -.LCPI3_0: - .word 0x4f000000 # float 2.14748365E+9 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI3_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 - .text - .globl check_patients_assess + .globl check_patients_assess # -- Begin function check_patients_assess .p2align 5 .type check_patients_assess,@function check_patients_assess: # @check_patients_assess @@ -308,12 +299,15 @@ check_patients_assess: # @check_patients_assess move $s0, $a0 move $s4, $zero addi.d $s1, $a0, 104 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI3_0) - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI3_1) - addi.d $s2, $s0, 152 - addi.d $s3, $s0, 128 + addi.d $s2, $a0, 152 + addi.d $s3, $a0, 128 + lu12i.w $a0, 323584 + movgr2fr.w $fs0, $a0 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fs1, $a0 ori $s6, $zero, 10 b .LBB3_4 .LBB3_2: # in Loop: Header=BB3_4 Depth=1 @@ -501,16 +495,7 @@ put_in_hosp: # @put_in_hosp .Lfunc_end5: .size put_in_hosp, .Lfunc_end5-put_in_hosp # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function generate_patient -.LCPI6_0: - .word 0x4f000000 # float 2.14748365E+9 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI6_1: - .dword 0x3fe54fdf3b645a1d # double 0.66600000000000004 - .text - .globl generate_patient + .globl generate_patient # -- Begin function generate_patient .p2align 5 .type generate_patient,@function generate_patient: # @generate_patient @@ -522,13 +507,16 @@ generate_patient: # @generate_patient ld.d $a0, $a0, 184 pcaddu18i $ra, %call36(my_rand) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI6_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI6_0) - pcalau12i $a0, %pc_hi20(.LCPI6_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI6_1) + lu12i.w $a0, 323584 + movgr2fr.w $fa1, $a0 fmul.s $fa1, $fa0, $fa1 ftintrz.l.s $fa1, $fa1 fcvt.d.s $fa0, $fa0 + lu12i.w $a0, 243269 + ori $a0, $a0, 2589 + lu32i.d $a0, 348127 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 fcmp.cule.d $fcc0, $fa0, $fa2 fst.d $fa1, $fp, 184 bcnez $fcc0, .LBB6_2 @@ -673,16 +661,7 @@ main: # @main .Lfunc_end7: .size main, .Lfunc_end7-main # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function sim -.LCPI8_0: - .word 0x4f000000 # float 2.14748365E+9 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI8_1: - .dword 0x3fe54fdf3b645a1d # double 0.66600000000000004 - .text - .globl sim + .globl sim # -- Begin function sim .p2align 5 .type sim,@function sim: # @sim @@ -979,13 +958,16 @@ sim: # @sim ld.d $a0, $fp, 184 pcaddu18i $ra, %call36(my_rand) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI8_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI8_0) - pcalau12i $a0, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI8_1) + lu12i.w $a0, 323584 + movgr2fr.w $fa1, $a0 fmul.s $fa1, $fa0, $fa1 ftintrz.l.s $fa1, $fa1 fcvt.d.s $fa0, $fa0 + lu12i.w $a0, 243269 + ori $a0, $a0, 2589 + lu32i.d $a0, 348127 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 fcmp.cule.d $fcc0, $fa0, $fa2 fst.d $fa1, $fp, 184 bcnez $fcc0, .LBB8_46 diff --git a/results/MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/poisson.s b/results/MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/poisson.s index 06decca1..f3988551 100644 --- a/results/MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/poisson.s +++ b/results/MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/poisson.s @@ -1,10 +1,6 @@ .file "poisson.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function my_rand -.LCPI0_0: - .dword 0x3e00000000200000 # double 4.6566128752457969E-10 .text - .globl my_rand + .globl my_rand # -- Begin function my_rand .p2align 5 .type my_rand,@function my_rand: # @my_rand @@ -38,12 +34,13 @@ my_rand: # @my_rand add.d $a2, $a1, $a2 masknez $a1, $a1, $a0 maskeqz $a0, $a2, $a0 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI0_0) or $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + lu12i.w $a0, 512 + lu52i.d $a0, $a0, 992 movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + fmul.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 ret .Lfunc_end0: diff --git a/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/compute.s b/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/compute.s index a9a4141f..4c0f0246 100644 --- a/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/compute.s +++ b/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/compute.s @@ -736,22 +736,7 @@ Compute_Leaf: # @Compute_Leaf .Lfunc_end3: .size Compute_Leaf, .Lfunc_end3-Compute_Leaf # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function optimize_node -.LCPI4_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI4_1: - .dword 0xc01465655f122ff6 # double -5.0990195135927845 -.LCPI4_2: - .dword 0x3fc91a556151761c # double 0.19611613513818404 -.LCPI4_3: - .dword 0xbfef60eab9a5d3a3 # double -0.98058067569092022 -.LCPI4_4: - .dword 0xbfe999999999999a # double -0.80000000000000004 -.LCPI4_5: - .dword 0xbfc91a556151761c # double -0.19611613513818404 - .text - .globl optimize_node + .globl optimize_node # -- Begin function optimize_node .p2align 5 .type optimize_node,@function optimize_node: # @optimize_node @@ -774,19 +759,34 @@ optimize_node: # @optimize_node pcalau12i $s0, %pc_hi20(Q) fld.d $ft2, $s0, %pc_lo12(Q) vldi $vr11, -876 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $ft4, $a0, %pc_lo12(.LCPI4_0) - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $ft5, $a0, %pc_lo12(.LCPI4_1) - pcalau12i $a0, %pc_hi20(.LCPI4_2) - fld.d $ft6, $a0, %pc_lo12(.LCPI4_2) - pcalau12i $a0, %pc_hi20(.LCPI4_3) - fld.d $ft7, $a0, %pc_lo12(.LCPI4_3) - pcalau12i $a0, %pc_hi20(.LCPI4_4) - fld.d $ft8, $a0, %pc_lo12(.LCPI4_4) + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $ft4, $a0 + lu12i.w $a0, 389410 + ori $a0, $a0, 4086 + lu32i.d $a0, 288101 + lu52i.d $a0, $a0, -1023 + movgr2fr.d $ft5, $a0 + lu12i.w $a0, 398615 + ori $a0, $a0, 1564 + lu32i.d $a0, -452011 + lu52i.d $a1, $a0, 1020 + movgr2fr.d $ft6, $a1 + lu12i.w $a1, -288163 + ori $a1, $a1, 931 + lu32i.d $a1, -40726 + lu52i.d $a1, $a1, -1026 + movgr2fr.d $ft7, $a1 + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a1, $a1, -1026 + movgr2fr.d $ft8, $a1 movgr2fr.d $ft9, $zero vldi $vr18, -912 - pcalau12i $s1, %pc_hi20(.LCPI4_5) + lu52i.d $s1, $a0, -1028 .p2align 4, , 16 .LBB4_1: # %.critedge # =>This Inner Loop Header: Depth=1 @@ -933,11 +933,11 @@ optimize_node: # @optimize_node movgr2fr.d $fa4, $zero fmadd.d $fa4, $fa2, $fa2, $fa4 fmadd.d $fa4, $fa3, $fa3, $fa4 - fld.d $fa5, $s1, %pc_lo12(.LCPI4_5) fsqrt.d $fa4, $fa4 fdiv.d $fa2, $fa2, $fa4 fdiv.d $fa3, $fa3, $fa4 - fmul.d $fa3, $fa3, $fa5 + movgr2fr.d $fa4, $s1 + fmul.d $fa3, $fa3, $fa4 fmadd.d $fa2, $fa2, $ft7, $fa3 fabs.d $fa2, $fa2 fcmp.clt.d $fcc0, $ft4, $fa2 @@ -1026,10 +1026,6 @@ optimize_node: # @optimize_node .LCPI5_0: .dword 0x3fc91a556151761c # double 0.19611613513818404 .dword 0xbfef60eab9a5d3a3 # double -0.98058067569092022 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI5_1: - .dword 0x401465655f122ff6 # double 5.0990195135927845 .text .globl find_gradient_h .p2align 5 @@ -1038,8 +1034,11 @@ find_gradient_h: # @find_gradient_h # %bb.0: pcalau12i $a1, %pc_hi20(.LCPI5_0) vld $vr1, $a1, %pc_lo12(.LCPI5_0) - pcalau12i $a1, %pc_hi20(.LCPI5_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI5_1) + lu12i.w $a1, 389410 + ori $a1, $a1, 4086 + lu32i.d $a1, 288101 + lu52i.d $a1, $a1, 1025 + movgr2fr.d $fa0, $a1 vst $vr1, $a0, 0 ret .Lfunc_end5: @@ -1247,25 +1246,23 @@ find_dd_grad_f: # @find_dd_grad_f .Lfunc_end9: .size find_dd_grad_f, .Lfunc_end9-find_dd_grad_f # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function find_g -.LCPI10_0: - .dword 0xbfe999999999999a # double -0.80000000000000004 - .text - .globl find_g + .globl find_g # -- Begin function find_g .p2align 5 .type find_g,@function find_g: # @find_g # %bb.0: pcalau12i $a0, %pc_hi20(P) - fld.d $fa0, $a0, %pc_lo12(P) - pcalau12i $a0, %pc_hi20(Q) - fld.d $fa1, $a0, %pc_lo12(Q) - pcalau12i $a0, %pc_hi20(.LCPI10_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI10_0) - fmul.d $fa1, $fa1, $fa1 - fmadd.d $fa0, $fa0, $fa0, $fa1 - fadd.d $fa0, $fa0, $fa2 + pcalau12i $a1, %pc_hi20(Q) + fld.d $fa0, $a1, %pc_lo12(Q) + fld.d $fa1, $a0, %pc_lo12(P) + fmul.d $fa0, $fa0, $fa0 + fmadd.d $fa0, $fa1, $fa1, $fa0 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, -1026 + movgr2fr.d $fa1, $a0 + fadd.d $fa0, $fa0, $fa1 ret .Lfunc_end10: .size find_g, .Lfunc_end10-find_g diff --git a/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/main.s b/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/main.s index 34ad9b38..3bb8249e 100644 --- a/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/main.s +++ b/results/MultiSource/Benchmarks/Olden/power/CMakeFiles/power.dir/main.s @@ -1,24 +1,6 @@ .file "main.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3fc1eb851eb851ec # double 0.14000000000000001 -.LCPI0_1: - .dword 0x40c3880000000000 # double 1.0E+4 -.LCPI0_2: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI0_3: - .dword 0xbfe4cccccccccccd # double -0.65000000000000002 -.LCPI0_4: - .dword 0x3f847ae147ae147b # double 0.01 -.LCPI0_5: - .dword 0xc059000000000000 # double -100 -.LCPI0_6: - .dword 0xbfc0a3d70a3d70a4 # double -0.13 -.LCPI0_7: - .dword 0x3f60624dd2f1a9fc # double 0.002 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -63,31 +45,53 @@ main: # @main vst $vr1, $fp, 48 lu12i.w $a0, 419430 ori $a0, $a0, 1638 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_0) lu32i.d $a0, 419430 lu52i.d $a0, $a0, 1022 st.d $a0, $fp, 16 + lu12i.w $a0, 125829 + ori $a0, $a0, 492 + lu32i.d $a0, 125829 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fa0, $a0 pcalau12i $a0, %pc_hi20(.L.str.3) addi.d $s0, $a0, %pc_lo12(.L.str.3) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_1) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_2) - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_3) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 231424 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs0, $a1 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fs1, $a1 + lu12i.w $a1, -209716 + ori $a1, $a1, 3277 + lu32i.d $a1, 314572 + lu52i.d $a1, $a1, -1026 + movgr2fr.d $fa1, $a1 fst.d $fa1, $sp, 16 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI0_4) + lu12i.w $a1, 293601 + ori $a1, $a1, 1147 + lu32i.d $a1, 293601 + lu52i.d $a1, $a1, 1016 + movgr2fr.d $fs3, $a1 ori $s2, $zero, 35 - pcalau12i $a0, %pc_hi20(map_P) - addi.d $s3, $a0, %pc_lo12(map_P) - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.d $fs4, $a0, %pc_lo12(.LCPI0_5) - pcalau12i $a0, %pc_hi20(.LCPI0_6) - fld.d $fs5, $a0, %pc_lo12(.LCPI0_6) - pcalau12i $a0, %pc_hi20(.LCPI0_7) - fld.d $fs6, $a0, %pc_lo12(.LCPI0_7) + pcalau12i $a1, %pc_hi20(map_P) + addi.d $s3, $a1, %pc_lo12(map_P) + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, -1019 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, 41943 + ori $a0, $a0, 164 + lu32i.d $a0, 41943 + lu52i.d $a0, $a0, -1028 + movgr2fr.d $fs5, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1014 + movgr2fr.d $fs6, $a0 pcalau12i $a0, %pc_hi20(map_Q) addi.d $s4, $a0, %pc_lo12(map_Q) pcalau12i $a0, %pc_hi20(.L.str.4) diff --git a/results/MultiSource/Benchmarks/Olden/tsp/CMakeFiles/tsp.dir/build.s b/results/MultiSource/Benchmarks/Olden/tsp/CMakeFiles/tsp.dir/build.s index fc61eea6..30be73e8 100644 --- a/results/MultiSource/Benchmarks/Olden/tsp/CMakeFiles/tsp.dir/build.s +++ b/results/MultiSource/Benchmarks/Olden/tsp/CMakeFiles/tsp.dir/build.s @@ -1,12 +1,6 @@ .file "build.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function build_tree -.LCPI0_0: - .dword 0xc113de0e54d37c9a # double -325507.58283800783 -.LCPI0_1: - .dword 0x4103de1654d37c9a # double 162754.79141900392 .text - .globl build_tree + .globl build_tree # -- Begin function build_tree .p2align 5 .type build_tree,@function build_tree: # @build_tree @@ -47,14 +41,19 @@ build_tree: # @build_tree fcmp.clt.d $fcc0, $fa2, $fa0 fsel $fa0, $fa0, $fa1, $fcc0 vldi $vr1, -984 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_1) - vldi $vr4, -856 - fsel $fs4, $fa4, $fa1, $fcc0 - fmul.d $fa0, $fa0, $fa2 - fdiv.d $fa0, $fa0, $fa3 + vldi $vr2, -856 + lu12i.w $a0, 347447 + ori $a0, $a0, 3226 + fsel $fs4, $fa2, $fa1, $fcc0 + move $a1, $a0 + lu32i.d $a1, 253454 + lu52i.d $a1, $a1, -1007 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 + lu32i.d $a0, 253462 + lu52i.d $a0, $a0, 1040 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 vldi $vr1, -912 fadd.d $fa0, $fa0, $fa1 pcaddu18i $ra, %call36(log) diff --git a/results/MultiSource/Benchmarks/Olden/voronoi/CMakeFiles/voronoi.dir/newvor.s b/results/MultiSource/Benchmarks/Olden/voronoi/CMakeFiles/voronoi.dir/newvor.s index 63964cb0..08f48c7b 100644 --- a/results/MultiSource/Benchmarks/Olden/voronoi/CMakeFiles/voronoi.dir/newvor.s +++ b/results/MultiSource/Benchmarks/Olden/voronoi/CMakeFiles/voronoi.dir/newvor.s @@ -1805,12 +1805,7 @@ main: # @main .Lfunc_end23: .size main, .Lfunc_end23-main # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function get_points -.LCPI24_0: - .dword 0x41dfffffffc00000 # double 2147483647 - .text - .globl get_points + .globl get_points # -- Begin function get_points .p2align 5 .type get_points,@function get_points: # @get_points @@ -1878,11 +1873,12 @@ get_points: # @get_points mul.d $a0, $a0, $s8 add.d $a0, $a0, $a1 addi.d $a0, $a0, 1 - pcalau12i $a1, %pc_hi20(.LCPI24_0) - fld.d $fs1, $a1, %pc_lo12(.LCPI24_0) addi.w $s3, $a0, 0 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs1, $a0 fdiv.d $fa0, $fa0, $fs1 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 diff --git a/results/MultiSource/Benchmarks/PAQ8p/CMakeFiles/paq8p.dir/paq8p.s b/results/MultiSource/Benchmarks/PAQ8p/CMakeFiles/paq8p.dir/paq8p.s index db7c16d8..62bca598 100644 --- a/results/MultiSource/Benchmarks/PAQ8p/CMakeFiles/paq8p.dir/paq8p.s +++ b/results/MultiSource/Benchmarks/PAQ8p/CMakeFiles/paq8p.dir/paq8p.s @@ -17126,12 +17126,8 @@ _ZN3APM1pEiii: # @_ZN3APM1pEiii .size _ZN3APM1pEiii, .Lfunc_end60-_ZN3APM1pEiii .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z8wavModelR5Mixer -.LCPI61_0: - .dword 0x3fefdf3b645a1cac # double 0.99599999999999999 .text - .globl _Z8wavModelR5Mixer + .globl _Z8wavModelR5Mixer # -- Begin function _Z8wavModelR5Mixer .p2align 5 .type _Z8wavModelR5Mixer,@function _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer @@ -17260,7 +17256,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer ori $a1, $zero, 97 bne $a0, $a1, .LBB61_45 # %bb.15: # %.preheader511 - st.d $s0, $sp, 160 # 8-byte Folded Spill + st.d $s0, $sp, 168 # 8-byte Folded Spill ori $a7, $zero, 32 addi.d $a3, $zero, -32 ori $s8, $zero, 102 @@ -17286,12 +17282,12 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer ori $s5, $zero, 1001 ori $s3, $zero, 32 st.d $s7, $sp, 80 # 8-byte Folded Spill - st.d $a5, $sp, 184 # 8-byte Folded Spill + st.d $a5, $sp, 192 # 8-byte Folded Spill b .LBB61_18 .LBB61_16: # in Loop: Header=BB61_18 Depth=1 st.w $a0, $fp, %pc_lo12(_ZZ8wavModelR5MixerE3eof) ld.d $s7, $sp, 80 # 8-byte Folded Reload - ld.d $a5, $sp, 184 # 8-byte Folded Reload + ld.d $a5, $sp, 192 # 8-byte Folded Reload .p2align 4, , 16 .LBB61_17: # in Loop: Header=BB61_18 Depth=1 addi.w $a0, $s3, 1 @@ -17337,7 +17333,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer ldx.bu $a4, $a1, $a4 slli.d $a5, $a5, 8 or $a4, $a5, $a4 - ld.d $a5, $sp, 184 # 8-byte Folded Reload + ld.d $a5, $sp, 192 # 8-byte Folded Reload bstrpick.d $a4, $a4, 15, 0 beq $a4, $t1, .LBB61_24 # %bb.23: # in Loop: Header=BB61_18 Depth=1 @@ -17387,8 +17383,8 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer st.d $t5, $sp, 136 # 8-byte Folded Spill st.d $t4, $sp, 144 # 8-byte Folded Spill st.d $t3, $sp, 152 # 8-byte Folded Spill - st.d $t2, $sp, 176 # 8-byte Folded Spill - st.d $t1, $sp, 192 # 8-byte Folded Spill + st.d $t2, $sp, 160 # 8-byte Folded Spill + st.d $t1, $sp, 184 # 8-byte Folded Spill st.d $ra, $sp, 120 # 8-byte Folded Spill ld.bu $a3, $ra, %pc_lo12(_ZL1D) st.d $t8, $sp, 128 # 8-byte Folded Spill @@ -17402,7 +17398,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer # %bb.28: # %.preheader510.preheader # in Loop: Header=BB61_18 Depth=1 add.d $a0, $a2, $a0 - st.d $s6, $sp, 168 # 8-byte Folded Spill + st.d $s6, $sp, 176 # 8-byte Folded Spill addi.d $a0, $a0, 1 st.d $a0, $sp, 112 # 8-byte Folded Spill slli.d $s6, $s7, 2 @@ -17421,7 +17417,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer move $a0, $zero ld.d $a1, $sp, 112 # 8-byte Folded Reload bstrpick.d $a1, $a1, 31, 0 - ld.d $s6, $sp, 168 # 8-byte Folded Reload + ld.d $s6, $sp, 176 # 8-byte Folded Reload lu12i.w $a2, 4 ori $a2, $a2, 3048 add.d $a2, $sp, $a2 @@ -17570,11 +17566,11 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer st.b $s2, $ra, %pc_lo12(_ZL1D) .LBB61_43: # in Loop: Header=BB61_18 Depth=1 ld.d $s7, $sp, 80 # 8-byte Folded Reload - ld.d $a5, $sp, 184 # 8-byte Folded Reload + ld.d $a5, $sp, 192 # 8-byte Folded Reload ori $a7, $zero, 32 ori $t0, $zero, 116 - ld.d $t1, $sp, 192 # 8-byte Folded Reload - ld.d $t2, $sp, 176 # 8-byte Folded Reload + ld.d $t1, $sp, 184 # 8-byte Folded Reload + ld.d $t2, $sp, 160 # 8-byte Folded Reload ld.d $t3, $sp, 152 # 8-byte Folded Reload ld.d $t4, $sp, 144 # 8-byte Folded Reload ld.d $t5, $sp, 136 # 8-byte Folded Reload @@ -17583,7 +17579,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer b .LBB61_17 .LBB61_44: # %.loopexit512.loopexit ld.w $a2, $a5, %pc_lo12(pos) - ld.d $s0, $sp, 160 # 8-byte Folded Reload + ld.d $s0, $sp, 168 # 8-byte Folded Reload .LBB61_45: # %.loopexit512 ld.w $a0, $fp, %pc_lo12(_ZZ8wavModelR5MixerE3eof) bge $a0, $a2, .LBB61_47 @@ -17594,7 +17590,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer st.w $zero, $a1, %pc_lo12(_ZZ8wavModelR5MixerE4bits) b .LBB61_132 .LBB61_47: - st.d $a5, $sp, 184 # 8-byte Folded Spill + st.d $a5, $sp, 192 # 8-byte Folded Spill ld.w $a1, $s0, %pc_lo12(bpos) pcalau12i $a3, %pc_hi20(buf) addi.d $s8, $a3, %pc_lo12(buf) @@ -17614,7 +17610,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer addi.d $fp, $a3, %pc_lo12(_ZZ8wavModelR5MixerE4scm8) bnez $a1, .LBB61_131 # %bb.48: - st.d $ra, $sp, 104 # 8-byte Folded Spill + st.d $ra, $sp, 112 # 8-byte Folded Spill pcalau12i $a1, %pc_hi20(_ZZ8wavModelR5MixerE2cm) addi.d $t1, $a1, %pc_lo12(_ZZ8wavModelR5MixerE2cm) pcalau12i $a1, %pc_hi20(_ZZ8wavModelR5MixerE1s) @@ -17628,16 +17624,16 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer mod.w $a3, $a0, $a4 mod.w $t8, $a0, $a1 div.w $a0, $t8, $a4 - st.d $a0, $sp, 176 # 8-byte Folded Spill - st.d $s6, $sp, 168 # 8-byte Folded Spill - st.d $s0, $sp, 160 # 8-byte Folded Spill - st.d $fp, $sp, 152 # 8-byte Folded Spill - st.d $s1, $sp, 144 # 8-byte Folded Spill - st.d $s2, $sp, 136 # 8-byte Folded Spill - st.d $s3, $sp, 128 # 8-byte Folded Spill - st.d $s4, $sp, 120 # 8-byte Folded Spill - st.d $s5, $sp, 112 # 8-byte Folded Spill - st.d $t1, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 184 # 8-byte Folded Spill + st.d $s6, $sp, 176 # 8-byte Folded Spill + st.d $s0, $sp, 168 # 8-byte Folded Spill + st.d $fp, $sp, 160 # 8-byte Folded Spill + st.d $s1, $sp, 152 # 8-byte Folded Spill + st.d $s2, $sp, 144 # 8-byte Folded Spill + st.d $s3, $sp, 136 # 8-byte Folded Spill + st.d $s4, $sp, 128 # 8-byte Folded Spill + st.d $s5, $sp, 120 # 8-byte Folded Spill + st.d $t1, $sp, 104 # 8-byte Folded Spill beqz $a3, .LBB61_54 # %bb.49: # %_Z1ciiiii.exit395 st.d $s7, $sp, 80 # 8-byte Folded Spill @@ -17680,7 +17676,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer add.d $a2, $a2, $t2 slli.d $t2, $t2, 2 stx.w $a2, $a4, $t2 - ld.d $t6, $sp, 184 # 8-byte Folded Reload + ld.d $t6, $sp, 192 # 8-byte Folded Reload ld.w $a2, $t6, %pc_lo12(pos) ld.w $t2, $s8, 0 addi.w $t3, $a2, -1 @@ -17698,8 +17694,8 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer mul.d $t3, $t3, $t0 mul.d $t4, $t2, $a5 lu12i.w $t5, 17091 - ori $s5, $t5, 71 - mul.d $a2, $a2, $s5 + ori $s4, $t5, 71 + mul.d $a2, $a2, $s4 lu12i.w $t5, 26856 ori $t5, $t5, 323 mul.d $t5, $t8, $t5 @@ -17807,9 +17803,9 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer or $t6, $t6, $fp slli.d $t6, $t6, 5 srli.d $fp, $s2, 3 - alsl.d $s4, $a1, $a1, 2 + alsl.d $s5, $a1, $a1, 2 alsl.d $s2, $a1, $s0, 1 - sub.w $s3, $t2, $s4 + sub.w $s3, $t2, $s5 and $s3, $t5, $s3 sub.w $t2, $t2, $s2 and $t2, $t5, $t2 @@ -17822,8 +17818,8 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer bstrins.d $t2, $t5, 9, 5 mul.d $t5, $t6, $t0 mul.d $s3, $t2, $a5 - move $s1, $s5 - mul.d $a2, $t8, $s5 + move $s1, $s4 + mul.d $a2, $t8, $s4 st.d $a2, $sp, 32 # 8-byte Folded Spill add.d $t5, $a2, $t5 add.w $t5, $t5, $s3 @@ -17852,22 +17848,22 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer add.d $t2, $t2, $t5 slli.d $t5, $t5, 2 stx.w $t2, $a4, $t5 - ld.w $s5, $a3, %pc_lo12(pos) + ld.w $s4, $a3, %pc_lo12(pos) ld.w $t2, $s8, 0 - sub.w $t5, $s5, $a1 + sub.w $t5, $s4, $a1 addi.w $s7, $t2, -1 and $t2, $s7, $t5 - sub.w $t5, $s5, $t4 + sub.w $t5, $s4, $t4 and $t5, $s7, $t5 ldx.bu $t5, $a0, $t5 ldx.bu $t2, $a0, $t2 srli.d $t5, $t5, 4 srli.d $t2, $t2, 4 bstrins.d $t5, $t2, 7, 4 - sub.w $t2, $s5, $t3 + sub.w $t2, $s4, $t3 and $t2, $s7, $t2 ldx.bu $t2, $a0, $t2 - sub.w $t7, $s5, $s0 + sub.w $t7, $s4, $s0 and $t7, $s7, $t7 ldx.bu $t7, $a0, $t7 slli.d $t5, $t5, 8 @@ -17875,9 +17871,9 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer bstrins.d $t5, $t2, 7, 4 srli.d $t2, $t7, 4 or $t2, $t5, $t2 - sub.w $t5, $s5, $s4 + sub.w $t5, $s4, $s5 and $t5, $s7, $t5 - sub.w $t7, $s5, $s2 + sub.w $t7, $s4, $s2 and $t7, $s7, $t7 ldx.bu $t7, $a0, $t7 ldx.b $t5, $a0, $t5 @@ -17886,15 +17882,15 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer srli.d $t7, $t7, 5 srli.d $t5, $t5, 5 bstrins.d $t7, $t5, 5, 3 - sub.w $t5, $s5, $s3 + sub.w $t5, $s4, $s3 and $t5, $s7, $t5 ldx.bu $ra, $a0, $t5 slli.d $t5, $t7, 6 alsl.d $s6, $a1, $a1, 3 - sub.w $t7, $s5, $s6 + sub.w $t7, $s4, $s6 and $t7, $s7, $t7 alsl.d $a2, $a1, $fp, 1 - sub.w $t6, $s5, $a2 + sub.w $t6, $s4, $a2 and $t6, $s7, $t6 ldx.bu $t6, $a0, $t6 ldx.b $t7, $a0, $t7 @@ -17903,22 +17899,22 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer srli.d $t6, $t6, 6 srli.d $t7, $t7, 6 bstrins.d $t6, $t7, 3, 2 - sub.w $t7, $s5, $fp + sub.w $t7, $s4, $fp and $t8, $s7, $t7 - alsl.d $t7, $s4, $a1, 1 - sub.w $ra, $s5, $t7 + alsl.d $t7, $s5, $a1, 1 + sub.w $ra, $s4, $t7 and $ra, $s7, $ra ldx.b $a3, $a0, $ra alsl.d $ra, $a1, $fp, 2 - sub.w $s5, $s5, $ra - and $s5, $s7, $s5 + sub.w $s4, $s4, $ra + and $s4, $s7, $s4 ldx.bu $t8, $a0, $t8 bstrpick.d $a3, $a3, 7, 6 - ldx.b $s5, $a0, $s5 + ldx.b $s4, $a0, $s4 slli.d $t6, $t6, 4 slli.d $a3, $a3, 2 or $a3, $t6, $a3 - bstrpick.d $t6, $s5, 7, 6 + bstrpick.d $t6, $s4, 7, 6 or $a3, $a3, $t6 srli.d $t6, $t8, 5 or $t6, $t5, $t6 @@ -17960,25 +17956,25 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer stx.w $a3, $a4, $t2 move $s1, $s8 ld.w $a3, $s8, 0 - ld.d $a4, $sp, 184 # 8-byte Folded Reload - ld.w $s5, $a4, %pc_lo12(pos) + ld.d $a4, $sp, 192 # 8-byte Folded Reload + ld.w $s4, $a4, %pc_lo12(pos) addi.w $s7, $a3, -1 - sub.w $a3, $s5, $a1 + sub.w $a3, $s4, $a1 and $a3, $s7, $a3 ldx.b $a3, $a0, $a3 st.d $t4, $sp, 8 # 8-byte Folded Spill - sub.w $t2, $s5, $t4 + sub.w $t2, $s4, $t4 and $t2, $s7, $t2 ldx.bu $t2, $a0, $t2 - sub.w $t4, $s5, $t3 + sub.w $t4, $s4, $t3 and $t4, $s7, $t4 ldx.b $t4, $a0, $t4 srli.d $t2, $t2, 6 srli.d $a3, $a3, 6 bstrins.d $t2, $a3, 3, 2 bstrpick.d $a3, $t4, 7, 6 - st.d $s0, $sp, 192 # 8-byte Folded Spill - sub.w $t4, $s5, $s0 + st.d $s0, $sp, 96 # 8-byte Folded Spill + sub.w $t4, $s4, $s0 and $t4, $s7, $t4 ldx.b $t4, $a0, $t4 slli.d $t2, $t2, 4 @@ -17986,20 +17982,20 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer or $a3, $t2, $a3 bstrpick.d $t2, $t4, 7, 6 or $t4, $a3, $t2 - sub.w $a3, $s5, $s4 + sub.w $a3, $s4, $s5 and $a3, $s7, $a3 ldx.b $a3, $a0, $a3 - sub.w $t2, $s5, $s2 + sub.w $t2, $s4, $s2 and $t2, $s7, $t2 ldx.bu $t2, $a0, $t2 - sub.w $t5, $s5, $s3 + sub.w $t5, $s4, $s3 and $t5, $s7, $t5 ldx.b $t5, $a0, $t5 srli.d $t2, $t2, 6 srli.d $a3, $a3, 6 bstrins.d $t2, $a3, 3, 2 bstrpick.d $a3, $t5, 7, 6 - sub.w $t5, $s5, $fp + sub.w $t5, $s4, $fp and $t5, $s7, $t5 ldx.b $t5, $a0, $t5 slli.d $t2, $t2, 4 @@ -18007,20 +18003,20 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer or $a3, $t2, $a3 bstrpick.d $t2, $t5, 7, 6 or $fp, $a3, $t2 - sub.w $a3, $s5, $s6 + sub.w $a3, $s4, $s6 and $a3, $s7, $a3 ldx.b $a3, $a0, $a3 - sub.w $t2, $s5, $a2 + sub.w $t2, $s4, $a2 and $t2, $s7, $t2 ldx.bu $t2, $a0, $t2 - sub.w $t5, $s5, $t7 + sub.w $t5, $s4, $t7 and $t5, $s7, $t5 ldx.b $t5, $a0, $t5 srli.d $t2, $t2, 6 srli.d $a3, $a3, 6 bstrins.d $t2, $a3, 3, 2 bstrpick.d $a3, $t5, 7, 6 - sub.w $t5, $s5, $ra + sub.w $t5, $s4, $ra and $t5, $s7, $t5 ldx.b $t5, $a0, $t5 slli.d $t2, $t2, 4 @@ -18034,20 +18030,20 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer mul.d $t5, $a1, $t5 slli.d $t6, $a1, 4 sub.d $t7, $a1, $t6 - add.w $t2, $s5, $t2 + add.w $t2, $s4, $t2 and $t2, $s7, $t2 ldx.b $t2, $a0, $t2 - add.w $t5, $s5, $t5 + add.w $t5, $s4, $t5 and $t5, $s7, $t5 ldx.bu $t5, $a0, $t5 - add.w $t7, $s5, $t7 + add.w $t7, $s4, $t7 and $t7, $s7, $t7 ldx.b $t7, $a0, $t7 srli.d $t5, $t5, 6 srli.d $t2, $t2, 6 bstrins.d $t5, $t2, 3, 2 bstrpick.d $t2, $t7, 7, 6 - sub.w $t6, $s5, $t6 + sub.w $t6, $s4, $t6 and $t6, $s7, $t6 ldx.b $t6, $a0, $t6 slli.d $t5, $t5, 4 @@ -18055,7 +18051,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer or $t2, $t5, $t2 bstrpick.d $t5, $t6, 7, 6 alsl.d $t6, $a1, $a1, 4 - sub.w $t6, $s5, $t6 + sub.w $t6, $s4, $t6 and $t6, $s7, $t6 ldx.bu $t6, $a0, $t6 or $t5, $t2, $t5 @@ -18068,7 +18064,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer mul.d $a3, $a1, $a3 addi.d $t4, $zero, -18 mul.d $t4, $a1, $t4 - add.w $t4, $s5, $t4 + add.w $t4, $s4, $t4 and $t4, $s7, $t4 ldx.bu $t4, $a0, $t4 addi.d $t6, $zero, -19 @@ -18076,10 +18072,10 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer slli.d $t8, $s3, 4 srli.d $t4, $t4, 6 bstrins.d $t8, $t4, 3, 2 - add.w $t4, $s5, $t6 + add.w $t4, $s4, $t6 and $t4, $s7, $t4 ldx.bu $t4, $a0, $t4 - add.w $a3, $s5, $a3 + add.w $a3, $s4, $a3 and $a3, $s7, $a3 ldx.bu $a3, $a0, $a3 srli.d $t4, $t4, 6 @@ -18088,14 +18084,14 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer srli.d $a3, $a3, 6 or $s3, $t4, $a3 .LBB61_51: # %_Z1ciiiii.exit402 - ld.d $a2, $sp, 104 # 8-byte Folded Reload + ld.d $a2, $sp, 112 # 8-byte Folded Reload alsl.w $a3, $a1, $a1, 1 st.d $a3, $sp, 16 # 8-byte Folded Spill alsl.w $a3, $a1, $a1, 2 - st.d $a3, $sp, 104 # 8-byte Folded Spill + st.d $a3, $sp, 112 # 8-byte Folded Spill addi.d $a3, $zero, -21 mul.d $a3, $a1, $a3 - add.w $a3, $s5, $a3 + add.w $a3, $s4, $a3 and $a3, $s7, $a3 ldx.bu $a3, $a0, $a3 or $t2, $t2, $fp @@ -18109,7 +18105,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer mul.d $a3, $a1, $a3 addi.d $t6, $zero, -22 mul.d $t6, $a1, $t6 - add.w $t6, $s5, $t6 + add.w $t6, $s4, $t6 and $t6, $s7, $t6 ldx.bu $t6, $a0, $t6 addi.d $t8, $zero, -23 @@ -18117,10 +18113,10 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer slli.d $fp, $fp, 4 srli.d $t6, $t6, 6 bstrins.d $fp, $t6, 3, 2 - add.w $t6, $s5, $t8 + add.w $t6, $s4, $t8 and $t6, $s7, $t6 ldx.bu $t6, $a0, $t6 - add.w $a3, $s5, $a3 + add.w $a3, $s4, $a3 and $a3, $s7, $a3 ldx.bu $a3, $a0, $a3 srli.d $t6, $t6, 6 @@ -18130,7 +18126,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer or $fp, $t6, $a3 .LBB61_53: # %_Z1ciiiii.exit409 ld.d $s7, $sp, 80 # 8-byte Folded Reload - ld.d $s6, $sp, 96 # 8-byte Folded Reload + ld.d $s6, $sp, 104 # 8-byte Folded Reload or $a3, $fp, $t7 ld.d $t0, $sp, 48 # 8-byte Folded Reload mul.d $t6, $t2, $t0 @@ -18171,7 +18167,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer stx.w $a3, $a5, $t2 pcalau12i $a3, %pc_hi20(_ZZ8wavModelR5MixerE2pr) addi.d $a3, $a3, %pc_lo12(_ZZ8wavModelR5MixerE2pr) - ld.d $a5, $sp, 176 # 8-byte Folded Reload + ld.d $a5, $sp, 184 # 8-byte Folded Reload slli.d $t2, $a5, 2 ldx.w $t8, $a3, $t2 srai.d $t2, $t8, 8 @@ -18200,7 +18196,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer add.d $t2, $t2, $t6 slli.d $t5, $t6, 2 stx.w $t2, $s2, $t5 - ld.d $s5, $sp, 184 # 8-byte Folded Reload + ld.d $s5, $sp, 192 # 8-byte Folded Reload ld.w $t2, $s5, %pc_lo12(pos) ld.w $t5, $s8, 0 addi.d $s0, $a1, 1 @@ -18389,7 +18385,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer add.d $a5, $a5, $a7 add.d $a7, $t0, $t1 sub.w $a5, $a5, $a7 - ld.d $s5, $sp, 112 # 8-byte Folded Reload + ld.d $s5, $sp, 120 # 8-byte Folded Reload ld.wu $a7, $s5, 0 srli.d $t0, $a5, 1 addi.w $a5, $zero, -256 @@ -18434,7 +18430,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer or $t5, $t5, $t6 ldx.bu $t6, $a0, $a6 srli.d $t5, $t5, 16 - ld.d $s4, $sp, 120 # 8-byte Folded Reload + ld.d $s4, $sp, 128 # 8-byte Folded Reload ld.wu $t7, $s4, 0 add.d $t3, $t8, $t3 add.d $t5, $t5, $t6 @@ -18469,7 +18465,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer ldx.bu $s0, $a0, $a6 sub.d $t6, $fp, $t6 alsl.d $t6, $t6, $t6, 1 - ld.d $s2, $sp, 136 # 8-byte Folded Reload + ld.d $s2, $sp, 144 # 8-byte Folded Reload ld.wu $fp, $s2, 0 sub.d $t7, $t7, $s0 add.d $t6, $t7, $t6 @@ -18497,7 +18493,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer ldx.bu $t7, $a0, $a6 srli.d $t6, $t6, 16 alsl.d $a1, $t8, $a1, 1 - ld.d $s3, $sp, 128 # 8-byte Folded Reload + ld.d $s3, $sp, 136 # 8-byte Folded Reload ld.wu $fp, $s3, 0 sub.d $a1, $a1, $t7 add.w $a1, $a1, $t6 @@ -18525,7 +18521,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer slli.w $a1, $t7, 24 slli.w $t7, $fp, 16 or $t7, $a1, $t7 - ld.d $fp, $sp, 192 # 8-byte Folded Reload + ld.d $fp, $sp, 96 # 8-byte Folded Reload sub.w $a1, $fp, $a3 andn $a1, $a4, $a1 move $ra, $a2 @@ -18541,7 +18537,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer ldx.bu $fp, $a0, $a6 srli.d $t7, $t7, 16 alsl.d $t6, $t6, $s0, 2 - ld.d $s1, $sp, 144 # 8-byte Folded Reload + ld.d $s1, $sp, 152 # 8-byte Folded Reload ld.wu $s0, $s1, 0 add.d $t7, $t7, $fp sub.d $t6, $t6, $t7 @@ -18565,7 +18561,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer slli.w $t1, $t2, 24 slli.w $t2, $t3, 16 or $t1, $t1, $t2 - ld.d $t3, $sp, 104 # 8-byte Folded Reload + ld.d $t3, $sp, 112 # 8-byte Folded Reload sub.w $t2, $t3, $a3 sub.w $a3, $a3, $t3 ldx.b $a2, $a0, $a2 @@ -18592,7 +18588,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer sub.d $a0, $t8, $a0 srli.d $a2, $a2, 16 add.d $a0, $a0, $a3 - ld.d $fp, $sp, 152 # 8-byte Folded Reload + ld.d $fp, $sp, 160 # 8-byte Folded Reload ld.wu $a3, $fp, 0 add.d $a0, $a0, $a2 alsl.d $a1, $a1, $a1, 2 @@ -18602,7 +18598,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer addi.d $a1, $a1, -256 and $a0, $a1, $a0 st.w $a0, $fp, 24 - ld.d $s0, $sp, 160 # 8-byte Folded Reload + ld.d $s0, $sp, 168 # 8-byte Folded Reload b .LBB61_130 .LBB61_54: # %.preheader508 st.d $t8, $sp, 48 # 8-byte Folded Spill @@ -18618,22 +18614,24 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer add.w $a2, $a2, $a0 pcalau12i $a3, %pc_hi20(_ZZ8wavModelR5MixerE7counter) addi.d $a4, $a3, %pc_lo12(_ZZ8wavModelR5MixerE7counter) - pcalau12i $a6, %pc_hi20(.LCPI61_0) st.d $a5, $sp, 40 # 8-byte Folded Spill st.d $a4, $sp, 72 # 8-byte Folded Spill - st.d $a6, $sp, 192 # 8-byte Folded Spill bltz $a2, .LBB61_61 # %bb.55: # %.lr.ph move $s2, $zero move $s3, $zero - ld.d $a3, $sp, 176 # 8-byte Folded Reload + ld.d $a3, $sp, 184 # 8-byte Folded Reload alsl.d $s4, $a3, $a4, 2 - fld.d $fs0, $a6, %pc_lo12(.LCPI61_0) lu12i.w $a2, 4 ori $a2, $a2, 3048 add.d $a2, $sp, $a2 alsl.d $s6, $a3, $a2, 3 addi.w $s8, $zero, -1 + lu12i.w $a2, 411041 + ori $a2, $a2, 3244 + lu32i.d $a2, -8389 + lu52i.d $a2, $a2, 1022 + movgr2fr.d $fs0, $a2 b .LBB61_58 .p2align 4, , 16 .LBB61_56: # in Loop: Header=BB61_58 Depth=1 @@ -18678,7 +18676,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer blt $a3, $a2, .LBB61_56 b .LBB61_57 .LBB61_61: # %._crit_edge529 - ld.d $s6, $sp, 168 # 8-byte Folded Reload + ld.d $s6, $sp, 176 # 8-byte Folded Reload ld.w $a2, $s6, %pc_lo12(_ZZ8wavModelR5MixerE8channels) ori $a3, $zero, 2 bne $a2, $a3, .LBB61_72 @@ -18686,7 +18684,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer andi $a1, $a1, 1 beqz $a1, .LBB61_67 # %bb.63: # %.lr.ph536 - ld.d $a2, $sp, 176 # 8-byte Folded Reload + ld.d $a2, $sp, 184 # 8-byte Folded Reload ld.d $a1, $sp, 72 # 8-byte Folded Reload alsl.d $fp, $a2, $a1, 2 slli.d $a1, $a0, 4 @@ -18695,10 +18693,13 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer ori $a2, $a2, 3048 add.d $a2, $sp, $a2 add.d $s2, $a2, $a1 - ld.d $a1, $sp, 192 # 8-byte Folded Reload - fld.d $fs0, $a1, %pc_lo12(.LCPI61_0) ori $a1, $zero, 1 ori $s3, $zero, 784 + lu12i.w $a2, 411041 + ori $a2, $a2, 3244 + lu32i.d $a2, -8389 + lu52i.d $a2, $a2, 1022 + movgr2fr.d $fs0, $a2 ori $s4, $zero, 12 move $s5, $a0 move $s0, $a0 @@ -18745,17 +18746,20 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer blez $a0, .LBB61_72 # %bb.68: # %.lr.ph539 move $fp, $zero - ld.d $s4, $sp, 176 # 8-byte Folded Reload + ld.d $s4, $sp, 184 # 8-byte Folded Reload ld.d $a1, $sp, 72 # 8-byte Folded Reload alsl.d $s2, $s4, $a1, 2 lu12i.w $a1, 4 ori $a1, $a1, 3048 add.d $a1, $sp, $a1 - ld.d $a2, $sp, 192 # 8-byte Folded Reload - fld.d $fs0, $a2, %pc_lo12(.LCPI61_0) alsl.d $a1, $s4, $a1, 3 addi.d $s3, $a1, 800 ori $s0, $zero, 1 + lu12i.w $a1, 411041 + ori $a1, $a1, 3244 + lu32i.d $a1, -8389 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fs0, $a1 ld.d $s8, $sp, 208 # 8-byte Folded Reload b .LBB61_70 .p2align 4, , 16 @@ -18789,7 +18793,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer fstx.d $fa0, $s3, $a1 b .LBB61_69 .LBB61_72: - ld.d $s4, $sp, 176 # 8-byte Folded Reload + ld.d $s4, $sp, 184 # 8-byte Folded Reload ld.d $s8, $sp, 208 # 8-byte Folded Reload .LBB61_73: # %.loopexit506 pcalau12i $a1, %pc_hi20(_ZZ8wavModelR5MixerE1n) @@ -18818,30 +18822,34 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer st.d $a5, $sp, 32 # 8-byte Folded Spill bne $a3, $a4, .LBB61_99 # %bb.75: # %.preheader499 - ld.d $a3, $sp, 192 # 8-byte Folded Reload blez $a2, .LBB61_110 # %bb.76: # %.preheader498.preheader st.d $s7, $sp, 80 # 8-byte Folded Spill - fld.d $fs0, $a3, %pc_lo12(.LCPI61_0) move $s1, $zero - move $s8, $zero + move $a4, $zero ori $a3, $zero, 1 - ori $s6, $zero, 12 + ori $s4, $zero, 12 + lu12i.w $a2, 411041 + ori $a2, $a2, 3244 + lu32i.d $a2, -8389 + lu52i.d $a2, $a2, 1022 + movgr2fr.d $fs0, $a2 .LBB61_77: # %.preheader498 # =>This Loop Header: Depth=1 # Child Loop BB61_79 Depth 2 andi $a2, $a1, 1 - maskeqz $a2, $s6, $a2 + maskeqz $a2, $s4, $a2 add.w $a2, $a2, $a0 blt $a2, $a3, .LBB61_81 # %bb.78: # %.lr.ph560 # in Loop: Header=BB61_77 Depth=1 - move $s4, $a3 + move $s6, $a3 addi.d $s0, $a3, -1 - st.d $s1, $sp, 192 # 8-byte Folded Spill + st.d $s1, $sp, 96 # 8-byte Folded Spill move $s7, $s5 - st.d $s8, $sp, 88 # 8-byte Folded Spill - ld.d $fp, $sp, 208 # 8-byte Folded Reload + move $fp, $s8 + st.d $a4, $sp, 88 # 8-byte Folded Spill + move $s8, $a4 ld.d $s3, $sp, 200 # 8-byte Folded Reload .p2align 4, , 16 .LBB61_79: # Parent Loop BB61_77 Depth=1 @@ -18864,7 +18872,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer ld.w $a0, $fp, %pc_lo12(_ZL1S) fdiv.d $fa0, $fa0, $fs0 fst.d $fa0, $s7, 800 - maskeqz $a2, $s6, $a1 + maskeqz $a2, $s4, $a1 add.w $a2, $a2, $a0 addi.d $s8, $s8, 1 addi.d $s7, $s7, 16 @@ -18872,13 +18880,14 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer blt $s8, $a2, .LBB61_79 # %bb.80: # %._crit_edge561 # in Loop: Header=BB61_77 Depth=1 - addi.d $a3, $s4, 1 - ld.d $s8, $sp, 88 # 8-byte Folded Reload - addi.d $s8, $s8, 1 + addi.d $a3, $s6, 1 + ld.d $a4, $sp, 88 # 8-byte Folded Reload + addi.d $a4, $a4, 1 addi.d $s5, $s5, 800 - ld.d $s1, $sp, 192 # 8-byte Folded Reload + ld.d $s1, $sp, 96 # 8-byte Folded Reload addi.w $s1, $s1, 1 - blt $s4, $a2, .LBB61_77 + move $s8, $fp + blt $s6, $a2, .LBB61_77 .LBB61_81: # %.loopexit500 blez $a2, .LBB61_108 # %bb.82: # %.lr.ph597.preheader @@ -18888,7 +18897,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer lu12i.w $a2, 4 ori $a2, $a2, 3048 add.d $t2, $sp, $a2 - ld.d $a3, $sp, 176 # 8-byte Folded Reload + ld.d $a3, $sp, 184 # 8-byte Folded Reload alsl.d $a2, $a3, $t2, 3 addi.d $t3, $a2, 816 ori $t4, $zero, 2 @@ -18968,7 +18977,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer bgeu $t5, $s1, .LBB61_95 # %bb.91: # %.lr.ph582.us.preheader # in Loop: Header=BB61_84 Depth=1 - ld.d $a2, $sp, 176 # 8-byte Folded Reload + ld.d $a2, $sp, 184 # 8-byte Folded Reload alsl.d $a2, $a2, $s5, 3 add.d $a5, $a0, $a5 addi.w $a5, $a5, 1 @@ -19023,7 +19032,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer b .LBB61_83 .LBB61_97: # %call.sqrt # in Loop: Header=BB61_84 Depth=1 - st.d $t1, $sp, 192 # 8-byte Folded Spill + st.d $t1, $sp, 96 # 8-byte Folded Spill st.d $t3, $sp, 88 # 8-byte Folded Spill st.d $t7, $sp, 24 # 8-byte Folded Spill pcaddu18i $ra, %call36(sqrt) @@ -19038,13 +19047,13 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer lu12i.w $a0, 4 ori $a0, $a0, 3048 add.d $t2, $sp, $a0 - ld.d $t1, $sp, 192 # 8-byte Folded Reload + ld.d $t1, $sp, 96 # 8-byte Folded Reload fmov.d $fa1, $fa0 b .LBB61_89 .LBB61_98: # %.loopexit506._crit_edge ld.d $a1, $sp, 200 # 8-byte Folded Reload ld.b $a1, $a1, %pc_lo12(_ZL1D) - ld.d $s6, $sp, 96 # 8-byte Folded Reload + ld.d $s6, $sp, 104 # 8-byte Folded Reload ld.d $s3, $sp, 48 # 8-byte Folded Reload b .LBB61_124 .LBB61_99: # %.preheader503 @@ -19054,45 +19063,50 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer move $s7, $zero move $s3, $zero ori $a3, $zero, 1 - ori $s6, $zero, 12 + ori $s4, $zero, 12 + lu12i.w $a2, 411041 + ori $a2, $a2, 3244 + lu32i.d $a2, -8389 + lu52i.d $a2, $a2, 1022 + movgr2fr.d $fs0, $a2 b .LBB61_102 .p2align 4, , 16 .LBB61_101: # %.loopexit502 # in Loop: Header=BB61_102 Depth=1 - addi.d $a3, $s8, 1 + addi.d $a3, $s6, 1 andi $a2, $a1, 1 - maskeqz $a2, $s6, $a2 + maskeqz $a2, $s4, $a2 add.w $a2, $a2, $a0 addi.d $s5, $s5, 800 addi.w $s3, $s3, 1 addi.d $s7, $s7, 1 - bge $s8, $a2, .LBB61_81 + bge $s6, $a2, .LBB61_81 .LBB61_102: # %.lr.ph556 # =>This Loop Header: Depth=1 # Child Loop BB61_106 Depth 2 addi.d $a2, $a0, 1 bstrpick.d $a2, $a2, 31, 0 - move $s8, $a3 + move $s6, $a3 beq $a3, $a2, .LBB61_101 # %bb.103: # %.preheader501 # in Loop: Header=BB61_102 Depth=1 andi $a2, $a1, 1 - maskeqz $a2, $s6, $a2 + maskeqz $a2, $s4, $a2 add.w $a2, $a2, $a0 - blt $a2, $s8, .LBB61_101 + blt $a2, $s6, .LBB61_101 # %bb.104: # %.lr.ph543 # in Loop: Header=BB61_102 Depth=1 - addi.d $s0, $s8, -1 + addi.d $s0, $s6, -1 move $fp, $s7 move $s1, $s3 - move $s4, $s5 + move $s8, $s5 b .LBB61_106 .p2align 4, , 16 .LBB61_105: # in Loop: Header=BB61_106 Depth=2 andi $a2, $a1, 1 - maskeqz $a2, $s6, $a2 + maskeqz $a2, $s4, $a2 add.w $a2, $a2, $a0 - addi.d $s4, $s4, 16 + addi.d $s8, $s8, 16 addi.w $s1, $s1, 1 bge $fp, $a2, .LBB61_101 .LBB61_106: # Parent Loop BB61_102 Depth=1 @@ -19102,7 +19116,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer bstrpick.d $a2, $a2, 31, 0 beq $fp, $a2, .LBB61_105 # %bb.107: # in Loop: Header=BB61_106 Depth=2 - fld.d $fs0, $s4, 0 + fld.d $fs1, $s8, 0 ori $a1, $zero, 1 move $a0, $s0 pcaddu18i $ra, %call36(_Z1Xii) @@ -19115,21 +19129,19 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer mul.d $a0, $a0, $s2 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 - ld.d $a0, $sp, 192 # 8-byte Folded Reload - fld.d $fa1, $a0, %pc_lo12(.LCPI61_0) ld.d $a0, $sp, 208 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(_ZL1S) ld.d $a1, $sp, 200 # 8-byte Folded Reload ld.b $a1, $a1, %pc_lo12(_ZL1D) - fsub.d $fa0, $fs0, $fa0 - fdiv.d $fa0, $fa0, $fa1 - fst.d $fa0, $s4, 800 + fsub.d $fa0, $fs1, $fa0 + fdiv.d $fa0, $fa0, $fs0 + fst.d $fa0, $s8, 800 b .LBB61_105 .LBB61_108: ld.d $s7, $sp, 80 # 8-byte Folded Reload -.LBB61_109: # %.thread - ld.d $s4, $sp, 176 # 8-byte Folded Reload -.LBB61_110: # %.thread +.LBB61_109: + ld.d $s4, $sp, 184 # 8-byte Folded Reload +.LBB61_110: ld.d $fp, $sp, 72 # 8-byte Folded Reload ld.d $a3, $sp, 64 # 8-byte Folded Reload ldx.w $a3, $fp, $a3 @@ -19256,8 +19268,8 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer .LBB61_123: # %.loopexit ld.d $a2, $sp, 32 # 8-byte Folded Reload st.w $zero, $a2, 0 - ld.d $s6, $sp, 96 # 8-byte Folded Reload - ld.d $s4, $sp, 176 # 8-byte Folded Reload + ld.d $s6, $sp, 104 # 8-byte Folded Reload + ld.d $s4, $sp, 184 # 8-byte Folded Reload ld.d $s3, $sp, 48 # 8-byte Folded Reload ld.d $s8, $sp, 208 # 8-byte Folded Reload .LBB61_124: @@ -19322,7 +19334,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer st.w $a3, $a4, 8 stx.w $a1, $a0, $a7 addi.d $a0, $a5, 1 - ld.d $s8, $sp, 184 # 8-byte Folded Reload + ld.d $s8, $sp, 192 # 8-byte Folded Reload ld.w $a4, $s8, %pc_lo12(pos) ld.d $ra, $sp, 56 # 8-byte Folded Reload ld.w $a5, $ra, 0 @@ -19637,7 +19649,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer addi.w $a3, $a3, 1 and $a3, $a2, $a3 ldx.b $a6, $a0, $a3 - ld.d $s5, $sp, 112 # 8-byte Folded Reload + ld.d $s5, $sp, 120 # 8-byte Folded Reload ld.w $a7, $s5, 0 bstrins.d $a5, $a6, 8, 8 slli.d $a5, $a5, 8 @@ -19654,7 +19666,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer ldx.b $t1, $a0, $a4 slli.d $a7, $a7, 16 slli.d $t0, $t0, 8 - ld.d $ra, $sp, 104 # 8-byte Folded Reload + ld.d $ra, $sp, 112 # 8-byte Folded Reload ld.w $t2, $ra, 0 or $a7, $a7, $t0 slli.d $t0, $t1, 9 @@ -19686,7 +19698,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer alsl.d $a2, $a3, $a3, 1 add.d $a2, $a4, $a2 slli.d $a1, $a1, 8 - ld.d $s4, $sp, 120 # 8-byte Folded Reload + ld.d $s4, $sp, 128 # 8-byte Folded Reload ld.w $a3, $s4, 0 or $a0, $a1, $a0 add.d $a0, $a2, $a0 @@ -19696,13 +19708,13 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer and $a0, $a1, $a0 and $a0, $a0, $t0 st.w $a0, $s4, 24 - ld.d $s0, $sp, 160 # 8-byte Folded Reload - ld.d $fp, $sp, 152 # 8-byte Folded Reload - ld.d $s1, $sp, 144 # 8-byte Folded Reload - ld.d $s2, $sp, 136 # 8-byte Folded Reload - ld.d $s3, $sp, 128 # 8-byte Folded Reload + ld.d $s0, $sp, 168 # 8-byte Folded Reload + ld.d $fp, $sp, 160 # 8-byte Folded Reload + ld.d $s1, $sp, 152 # 8-byte Folded Reload + ld.d $s2, $sp, 144 # 8-byte Folded Reload + ld.d $s3, $sp, 136 # 8-byte Folded Reload .LBB61_130: - ld.d $s6, $sp, 168 # 8-byte Folded Reload + ld.d $s6, $sp, 176 # 8-byte Folded Reload .LBB61_131: ld.d $a0, $s5, 32 pcalau12i $a1, %pc_hi20(y) @@ -19858,7 +19870,7 @@ _Z8wavModelR5Mixer: # @_Z8wavModelR5Mixer addi.d $a1, $a4, 8 st.w $a1, $s7, 96 st.h $a0, $a6, 14 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(pos) ld.w $a1, $s8, 0 ld.w $a3, $s0, %pc_lo12(bpos) diff --git a/results/MultiSource/Benchmarks/Prolangs-C++/employ/CMakeFiles/employ.dir/driver.s b/results/MultiSource/Benchmarks/Prolangs-C++/employ/CMakeFiles/employ.dir/driver.s index 503f604b..26ae1753 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C++/employ/CMakeFiles/employ.dir/driver.s +++ b/results/MultiSource/Benchmarks/Prolangs-C++/employ/CMakeFiles/employ.dir/driver.s @@ -565,12 +565,7 @@ _ZN4Boss7NewWeekEv: # @_ZN4Boss7NewWeekEv .Lfunc_end18: .size _ZN4Boss7NewWeekEv, .Lfunc_end18-_ZN4Boss7NewWeekEv # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN18CommissionedWorkerC2EPKcS1_ff -.LCPI19_0: - .word 0x42700000 # float 60 - .text - .globl _ZN18CommissionedWorkerC2EPKcS1_ff + .globl _ZN18CommissionedWorkerC2EPKcS1_ff # -- Begin function _ZN18CommissionedWorkerC2EPKcS1_ff .p2align 5 .type _ZN18CommissionedWorkerC2EPKcS1_ff,@function _ZN18CommissionedWorkerC2EPKcS1_ff: # @_ZN18CommissionedWorkerC2EPKcS1_ff @@ -631,12 +626,11 @@ _ZN18CommissionedWorkerC2EPKcS1_ff: # @_ZN18CommissionedWorkerC2EPKcS1_ff st.w $zero, $fp, 32 b .LBB19_5 .LBB19_2: - pcalau12i $a0, %pc_hi20(.LCPI19_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI19_0) + lu12i.w $a0, 272128 + movgr2fr.w $fa0, $a0 fcmp.cule.s $fcc0, $fs0, $fa0 bcnez $fcc0, .LBB19_4 # %bb.3: - lu12i.w $a0, 272128 st.w $a0, $fp, 32 b .LBB19_5 .LBB19_4: @@ -668,12 +662,7 @@ _ZN18CommissionedWorker15SetWeeklySalaryEf: # @_ZN18CommissionedWorker15SetWeekl .Lfunc_end20: .size _ZN18CommissionedWorker15SetWeeklySalaryEf, .Lfunc_end20-_ZN18CommissionedWorker15SetWeeklySalaryEf # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN18CommissionedWorker17SetCommissionRateEf -.LCPI21_0: - .word 0x42700000 # float 60 - .text - .globl _ZN18CommissionedWorker17SetCommissionRateEf + .globl _ZN18CommissionedWorker17SetCommissionRateEf # -- Begin function _ZN18CommissionedWorker17SetCommissionRateEf .p2align 5 .type _ZN18CommissionedWorker17SetCommissionRateEf,@function _ZN18CommissionedWorker17SetCommissionRateEf: # @_ZN18CommissionedWorker17SetCommissionRateEf @@ -685,12 +674,11 @@ _ZN18CommissionedWorker17SetCommissionRateEf: # @_ZN18CommissionedWorker17SetCom st.w $zero, $a0, 32 ret .LBB21_2: - pcalau12i $a1, %pc_hi20(.LCPI21_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI21_0) + lu12i.w $a1, 272128 + movgr2fr.w $fa1, $a1 fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB21_4 # %bb.3: - lu12i.w $a1, 272128 st.w $a1, $a0, 32 ret .LBB21_4: @@ -709,26 +697,23 @@ _ZN18CommissionedWorker13SalesThisWeekEf: # @_ZN18CommissionedWorker13SalesThisW .Lfunc_end22: .size _ZN18CommissionedWorker13SalesThisWeekEf, .Lfunc_end22-_ZN18CommissionedWorker13SalesThisWeekEf # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN18CommissionedWorker8EarningsEv -.LCPI23_0: - .dword 0x4059000000000000 # double 100 - .text - .globl _ZN18CommissionedWorker8EarningsEv + .globl _ZN18CommissionedWorker8EarningsEv # -- Begin function _ZN18CommissionedWorker8EarningsEv .p2align 5 .type _ZN18CommissionedWorker8EarningsEv,@function _ZN18CommissionedWorker8EarningsEv: # @_ZN18CommissionedWorker8EarningsEv # %bb.0: - fld.s $fa0, $a0, 28 - fld.s $fa1, $a0, 32 - pcalau12i $a1, %pc_hi20(.LCPI23_0) - fld.d $fa2, $a1, %pc_lo12(.LCPI23_0) - fld.s $fa3, $a0, 36 + fld.s $fa0, $a0, 32 + fld.s $fa1, $a0, 28 fcvt.d.s $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + fld.s $fa2, $a0, 36 + movgr2fr.d $fa3, $a1 + fdiv.d $fa0, $fa0, $fa3 fcvt.d.s $fa1, $fa1 - fdiv.d $fa1, $fa1, $fa2 - fcvt.d.s $fa2, $fa3 - fmadd.d $fa0, $fa1, $fa2, $fa0 + fcvt.d.s $fa2, $fa2 + fmadd.d $fa0, $fa0, $fa2, $fa1 fcvt.s.d $fa0, $fa0 ret .Lfunc_end23: @@ -836,34 +821,29 @@ _ZN18CommissionedWorker5PrintEv: # @_ZN18CommissionedWorker5PrintEv .size _ZN18CommissionedWorker5PrintEv, .Lfunc_end24-_ZN18CommissionedWorker5PrintEv .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN18CommissionedWorker5RaiseEi -.LCPI25_0: - .word 0x42700000 # float 60 - .text - .globl _ZN18CommissionedWorker5RaiseEi + .globl _ZN18CommissionedWorker5RaiseEi # -- Begin function _ZN18CommissionedWorker5RaiseEi .p2align 5 .type _ZN18CommissionedWorker5RaiseEi,@function _ZN18CommissionedWorker5RaiseEi: # @_ZN18CommissionedWorker5RaiseEi # %bb.0: blez $a1, .LBB25_2 # %bb.1: # %_ZN18CommissionedWorker17SetCommissionRateEf.exit - pcalau12i $a2, %pc_hi20(.LCPI25_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI25_0) - fld.s $fa1, $a0, 32 + fld.s $fa0, $a0, 32 bstrpick.d $a1, $a1, 31, 0 - movgr2fr.d $fa2, $a1 - ffint.s.l $fa2, $fa2 - fadd.s $fa1, $fa1, $fa2 - fcmp.clt.s $fcc0, $fa0, $fa1 - fsel $fa0, $fa1, $fa0, $fcc0 + movgr2fr.d $fa1, $a1 + ffint.s.l $fa1, $fa1 + fadd.s $fa0, $fa0, $fa1 + lu12i.w $a1, 272128 + movgr2fr.w $fa2, $a1 + fcmp.clt.s $fcc0, $fa2, $fa0 + fsel $fa2, $fa0, $fa2, $fcc0 movgr2fr.w $fa3, $zero fld.s $fa4, $a0, 24 fld.s $fa5, $a0, 28 - fcmp.clt.s $fcc0, $fa1, $fa3 - fsel $fa0, $fa0, $fa3, $fcc0 + fcmp.clt.s $fcc0, $fa0, $fa3 + fsel $fa0, $fa2, $fa3, $fcc0 fst.s $fa0, $a0, 32 - fmadd.s $fa0, $fa2, $fa4, $fa5 + fmadd.s $fa0, $fa1, $fa4, $fa5 fst.s $fa0, $a0, 28 .LBB25_2: ret @@ -1536,25 +1516,19 @@ _ZN20HourlyWorkerOvertimeC2EPKcS1_f: # @_ZN20HourlyWorkerOvertimeC2EPKcS1_f .size _ZN20HourlyWorkerOvertimeC2EPKcS1_f, .Lfunc_end43-_ZN20HourlyWorkerOvertimeC2EPKcS1_f .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN20HourlyWorkerOvertime8EarningsEv -.LCPI44_0: - .word 0x42200000 # float 40 -.LCPI44_1: - .word 0xc2200000 # float -40 - .text - .globl _ZN20HourlyWorkerOvertime8EarningsEv + .globl _ZN20HourlyWorkerOvertime8EarningsEv # -- Begin function _ZN20HourlyWorkerOvertime8EarningsEv .p2align 5 .type _ZN20HourlyWorkerOvertime8EarningsEv,@function _ZN20HourlyWorkerOvertime8EarningsEv: # @_ZN20HourlyWorkerOvertime8EarningsEv # %bb.0: fld.s $fa0, $a0, 32 - pcalau12i $a1, %pc_hi20(.LCPI44_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI44_0) - pcalau12i $a1, %pc_hi20(.LCPI44_1) - fld.s $fa2, $a1, %pc_lo12(.LCPI44_1) + lu12i.w $a1, 270848 + movgr2fr.w $fa1, $a1 fcmp.clt.s $fcc0, $fa1, $fa0 - fadd.s $fa1, $fa0, $fa2 + lu12i.w $a1, -253440 + lu32i.d $a1, 0 + movgr2fr.w $fa1, $a1 + fadd.s $fa1, $fa0, $fa1 fld.s $fa2, $a0, 28 fcvt.d.s $fa1, $fa1 movgr2fr.d $fa3, $zero diff --git a/results/MultiSource/Benchmarks/Prolangs-C++/ocean/CMakeFiles/ocean.dir/ocean.s b/results/MultiSource/Benchmarks/Prolangs-C++/ocean/CMakeFiles/ocean.dir/ocean.s index 40617e53..e168ec51 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C++/ocean/CMakeFiles/ocean.dir/ocean.s +++ b/results/MultiSource/Benchmarks/Prolangs-C++/ocean/CMakeFiles/ocean.dir/ocean.s @@ -979,12 +979,8 @@ GCC_except_table16: .Lcst_end4: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN6Random8randRealEv -.LCPI17_0: - .word 0x30000000 # float 4.65661287E-10 .text - .globl _ZN6Random8randRealEv + .globl _ZN6Random8randRealEv # -- Begin function _ZN6Random8randRealEv .p2align 5 .type _ZN6Random8randRealEv,@function _ZN6Random8randRealEv: # @_ZN6Random8randRealEv @@ -993,11 +989,11 @@ _ZN6Random8randRealEv: # @_ZN6Random8randRealEv st.d $ra, $sp, 8 # 8-byte Folded Spill pcaddu18i $ra, %call36(random) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI17_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI17_0) - movgr2fr.d $fa1, $a0 - ffint.s.l $fa1, $fa1 - fmul.s $fa0, $fa1, $fa0 + movgr2fr.d $fa0, $a0 + ffint.s.l $fa0, $fa0 + lu12i.w $a0, 196608 + movgr2fr.w $fa1, $a0 + fmul.s $fa0, $fa0, $fa1 ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 ret diff --git a/results/MultiSource/Benchmarks/Prolangs-C++/trees/CMakeFiles/trees.dir/tree.s b/results/MultiSource/Benchmarks/Prolangs-C++/trees/CMakeFiles/trees.dir/tree.s index bc789925..3e5811e9 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C++/trees/CMakeFiles/trees.dir/tree.s +++ b/results/MultiSource/Benchmarks/Prolangs-C++/trees/CMakeFiles/trees.dir/tree.s @@ -622,12 +622,7 @@ _ZN10BinaryNodeC2EPc4TreeS1_: # @_ZN10BinaryNodeC2EPc4TreeS1_ .size _ZN10BinaryNodeC2EPc4TreeS1_, .Lfunc_end11-_ZN10BinaryNodeC2EPc4TreeS1_ .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZN10BinaryNode9nodeValueEv -.LCPI12_0: - .word 0x4b18967f # float 9999999 - .text - .globl _ZN10BinaryNode9nodeValueEv + .globl _ZN10BinaryNode9nodeValueEv # -- Begin function _ZN10BinaryNode9nodeValueEv .p2align 5 .type _ZN10BinaryNode9nodeValueEv,@function _ZN10BinaryNode9nodeValueEv: # @_ZN10BinaryNode9nodeValueEv @@ -659,9 +654,10 @@ _ZN10BinaryNode9nodeValueEv: # @_ZN10BinaryNode9nodeValueEv ld.d $a1, $a1, 0 fmov.s $fs0, $fa0 jirl $ra, $a1, 0 - pcalau12i $a0, %pc_hi20(.LCPI12_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI12_0) fadd.s $fa0, $fs0, $fa0 + lu12i.w $a0, 307593 + ori $a0, $a0, 1663 + movgr2fr.w $fa1, $a0 movgr2cf $fcc0, $s0 fsel $fa0, $fa1, $fa0, $fcc0 fld.d $fs0, $sp, 0 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/config2.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/config2.s index 46ad74da..1e65149d 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/config2.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/config2.s @@ -1,34 +1,21 @@ .file "config2.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function config2 -.LCPI0_0: - .dword 0x3ffccccccccccccd # double 1.8 -.LCPI0_1: - .dword 0x3ff0cccccccccccd # double 1.05 -.LCPI0_2: - .dword 0x3f847ae147ae147b # double 0.01 -.LCPI0_3: - .dword 0x3ff199999999999a # double 1.1000000000000001 -.LCPI0_4: - .dword 0x3f947ae147ae147b # double 0.02 -.LCPI0_5: - .dword 0x3fa47ae147ae147b # double 0.040000000000000001 .text - .globl config2 + .globl config2 # -- Begin function config2 .p2align 5 .type config2,@function config2: # @config2 # %bb.0: - addi.d $sp, $sp, -96 - st.d $ra, $sp, 88 # 8-byte Folded Spill - st.d $fp, $sp, 80 # 8-byte Folded Spill - st.d $s0, $sp, 72 # 8-byte Folded Spill - st.d $s1, $sp, 64 # 8-byte Folded Spill - st.d $s2, $sp, 56 # 8-byte Folded Spill - st.d $s3, $sp, 48 # 8-byte Folded Spill - st.d $s4, $sp, 40 # 8-byte Folded Spill - fst.d $fs0, $sp, 32 # 8-byte Folded Spill - fst.d $fs1, $sp, 24 # 8-byte Folded Spill + addi.d $sp, $sp, -112 + st.d $ra, $sp, 104 # 8-byte Folded Spill + st.d $fp, $sp, 96 # 8-byte Folded Spill + st.d $s0, $sp, 88 # 8-byte Folded Spill + st.d $s1, $sp, 80 # 8-byte Folded Spill + st.d $s2, $sp, 72 # 8-byte Folded Spill + st.d $s3, $sp, 64 # 8-byte Folded Spill + st.d $s4, $sp, 56 # 8-byte Folded Spill + st.d $s5, $sp, 48 # 8-byte Folded Spill + fst.d $fs0, $sp, 40 # 8-byte Folded Spill + fst.d $fs1, $sp, 32 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(numcells) ld.d $s0, $a0, %got_pc_lo12(numcells) ld.w $a1, $s0, 0 @@ -153,11 +140,15 @@ config2: # @config2 ld.d $a1, $a1, %got_pc_lo12(wire_est_factor) ld.w $a1, $a1, 0 movgr2fr.w $fa0, $a1 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_0) ffint.d.w $fa0, $fa0 - vldi $vr2, -988 - fdiv.d $fa0, $fa0, $fa2 + vldi $vr1, -988 + lu12i.w $a1, -209716 + ori $s4, $a1, 3277 + fdiv.d $fa0, $fa0, $fa1 + move $a1, $s4 + lu32i.d $a1, -209716 + lu52i.d $a1, $a1, 1023 + movgr2fr.d $fa1, $a1 fadd.d $fa0, $fa0, $fa1 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 @@ -166,8 +157,8 @@ config2: # @config2 movfr2gr.s $a0, $fa0 addi.d $a0, $a0, 2 pcalau12i $a1, %got_pc_hi20(aveChanWid) - ld.d $s3, $a1, %got_pc_lo12(aveChanWid) - st.w $a0, $s3, 0 + ld.d $s5, $a1, %got_pc_lo12(aveChanWid) + st.w $a0, $s5, 0 pcalau12i $a0, %got_pc_hi20(fpo) ld.d $s1, $a0, %got_pc_lo12(fpo) ld.d $a0, $s1, 0 @@ -182,7 +173,7 @@ config2: # @config2 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ld.d $a0, $s1, 0 - ld.w $a2, $s3, 0 + ld.w $a2, $s5, 0 pcalau12i $a1, %pc_hi20(.L.str.2) addi.d $a1, $a1, %pc_lo12(.L.str.2) pcaddu18i $ra, %call36(fprintf) @@ -195,123 +186,125 @@ config2: # @config2 ld.w $a0, $a0, 0 b .LBB0_41 .LBB0_13: # %.preheader - ld.w $a1, $s0, 0 + ld.w $a0, $s0, 0 pcalau12i $fp, %pc_hi20(expandExtra) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - pcalau12i $s2, %pc_hi20(.LCPI0_3) - blez $a1, .LBB0_25 + lu12i.w $s3, 293601 + lu12i.w $s2, -419431 + blez $a0, .LBB0_25 # %bb.14: # %.lr.ph133 - pcalau12i $a2, %got_pc_hi20(cellarray) - ld.d $a2, $a2, %got_pc_lo12(cellarray) - ld.d $a2, $a2, 0 - pcalau12i $a3, %got_pc_hi20(maxWeight) - ld.d $a3, $a3, %got_pc_lo12(maxWeight) - ld.w $a3, $a3, 0 - ld.w $a4, $s3, 0 - mul.d $a3, $a3, $a3 - mul.d $a3, $a3, $a4 - addi.d $a4, $a1, 1 - bstrpick.d $a4, $a4, 31, 0 + pcalau12i $a1, %got_pc_hi20(cellarray) + ld.d $a1, $a1, %got_pc_lo12(cellarray) + ld.d $a1, $a1, 0 + pcalau12i $a2, %got_pc_hi20(maxWeight) + ld.d $a2, $a2, %got_pc_lo12(maxWeight) + ld.w $a2, $a2, 0 + ld.w $a3, $s5, 0 + mul.d $a2, $a2, $a2 + mul.d $a2, $a2, $a3 + addi.d $a3, $a0, 1 + bstrpick.d $a3, $a3, 31, 0 movgr2fr.d $fs0, $zero + ori $a4, $zero, 1 ori $a5, $zero, 1 - ori $a6, $zero, 1 b .LBB0_17 .p2align 4, , 16 .LBB0_15: # in Loop: Header=BB0_17 Depth=1 - ld.w $t0, $a7, 56 - ld.w $t1, $a7, 60 - ld.w $t2, $a7, 64 - ld.w $a7, $a7, 68 - sub.d $t0, $t1, $t0 - add.d $t0, $t0, $a3 - sub.d $a7, $a7, $t2 - add.d $a7, $a7, $a3 - mulw.d.w $a7, $t0, $a7 + ld.w $a7, $a6, 56 + ld.w $t0, $a6, 60 + ld.w $t1, $a6, 64 + ld.w $a6, $a6, 68 + sub.d $a7, $t0, $a7 + add.d $a7, $a7, $a2 + sub.d $a6, $a6, $t1 + add.d $a6, $a6, $a2 + mulw.d.w $a6, $a7, $a6 .LBB0_16: # in Loop: Header=BB0_17 Depth=1 - movgr2fr.w $fa0, $a7 + movgr2fr.w $fa0, $a6 ffint.d.w $fa0, $fa0 - addi.d $a6, $a6, 1 + addi.d $a5, $a5, 1 fadd.d $fs0, $fs0, $fa0 - beq $a6, $a4, .LBB0_23 + beq $a5, $a3, .LBB0_23 .LBB0_17: # =>This Loop Header: Depth=1 # Child Loop BB0_20 Depth 2 - slli.d $a7, $a6, 3 - ldx.d $a7, $a2, $a7 - ld.w $t0, $a7, 56 - ld.w $t1, $a7, 60 - alsl.d $a7, $t0, $a7, 3 - ld.d $a7, $a7, 152 - beq $t1, $a5, .LBB0_15 + slli.d $a6, $a5, 3 + ldx.d $a6, $a1, $a6 + ld.w $a7, $a6, 56 + ld.w $t0, $a6, 60 + alsl.d $a6, $a7, $a6, 3 + ld.d $a6, $a6, 152 + beq $t0, $a4, .LBB0_15 # %bb.18: # in Loop: Header=BB0_17 Depth=1 - ld.d $t1, $a7, 0 - beqz $t1, .LBB0_21 + ld.d $t0, $a6, 0 + beqz $t0, .LBB0_21 # %bb.19: # %.lr.ph127.preheader # in Loop: Header=BB0_17 Depth=1 - move $t0, $zero + move $a7, $zero .p2align 4, , 16 .LBB0_20: # %.lr.ph127 # Parent Loop BB0_17 Depth=1 # => This Inner Loop Header: Depth=2 - ld.w $t2, $t1, 56 - ld.w $t3, $t1, 60 - ld.w $t4, $t1, 64 - ld.w $t5, $t1, 68 - ld.d $t1, $t1, 0 - sub.d $t2, $t3, $t2 - sub.d $t3, $t5, $t4 - mul.d $t2, $t3, $t2 - add.d $t0, $t2, $t0 - bnez $t1, .LBB0_20 + ld.w $t1, $t0, 56 + ld.w $t2, $t0, 60 + ld.w $t3, $t0, 64 + ld.w $t4, $t0, 68 + ld.d $t0, $t0, 0 + sub.d $t1, $t2, $t1 + sub.d $t2, $t4, $t3 + mul.d $t1, $t2, $t1 + add.d $a7, $t1, $a7 + bnez $t0, .LBB0_20 b .LBB0_22 .LBB0_21: # in Loop: Header=BB0_17 Depth=1 - move $t0, $zero + move $a7, $zero .LBB0_22: # %._crit_edge128 # in Loop: Header=BB0_17 Depth=1 - ld.w $t1, $a7, 56 - ld.w $t2, $a7, 60 - ld.w $t3, $a7, 64 - ld.w $a7, $a7, 68 - sub.d $t1, $t2, $t1 - add.d $t2, $a3, $t1 - sub.d $a7, $a7, $t3 - add.d $t3, $a3, $a7 - mul.d $t2, $t2, $t3 - mul.d $a7, $a7, $t1 - sub.d $a7, $t0, $a7 - add.w $a7, $a7, $t2 + ld.w $t0, $a6, 56 + ld.w $t1, $a6, 60 + ld.w $t2, $a6, 64 + ld.w $a6, $a6, 68 + sub.d $t0, $t1, $t0 + add.d $t1, $a2, $t0 + sub.d $a6, $a6, $t2 + add.d $t2, $a2, $a6 + mul.d $t1, $t1, $t2 + mul.d $a6, $a6, $t0 + sub.d $a6, $a7, $a6 + add.w $a6, $a6, $t1 b .LBB0_16 .LBB0_23: # %._crit_edge134 - lu12i.w $a2, -209716 - ori $a2, $a2, 3277 - lu32i.d $a2, 52428 - lu52i.d $a2, $a2, 1023 - ori $a3, $zero, 9 - st.d $a2, $fp, %pc_lo12(expandExtra) - bge $a3, $a1, .LBB0_26 + move $a1, $s4 + lu32i.d $a1, 52428 + lu52i.d $a1, $a1, 1023 + ori $a2, $zero, 9 + st.d $a1, $fp, %pc_lo12(expandExtra) + bge $a2, $a0, .LBB0_26 # %bb.24: - fld.d $fs1, $a0, %pc_lo12(.LCPI0_1) + movgr2fr.d $fs1, $a1 b .LBB0_29 .LBB0_25: movgr2fr.d $fs0, $zero .LBB0_26: # %._crit_edge134.thread - ori $a2, $zero, 10 - sub.d $a1, $a2, $a1 - bstrpick.d $a1, $a1, 31, 0 - fld.d $fa0, $a0, %pc_lo12(.LCPI0_1) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_2) - fld.d $fs1, $s2, %pc_lo12(.LCPI0_3) - movgr2fr.d $fa2, $a1 - ffint.d.l $fa2, $fa2 - fmadd.d $fa0, $fa2, $fa1, $fa0 + ori $a1, $zero, 10 + sub.d $a0, $a1, $a0 + bstrpick.d $a0, $a0, 31, 0 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + lu32i.d $s4, 52428 + lu52i.d $a0, $s4, 1023 + movgr2fr.d $fa1, $a0 + ori $a0, $s3, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa2, $fa1 + ori $a0, $s2, 2458 + lu32i.d $a0, 104857 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs1, $a0 fcmp.cule.d $fcc0, $fa0, $fs1 fst.d $fa0, $fp, %pc_lo12(expandExtra) bcnez $fcc0, .LBB0_28 # %bb.27: - lu12i.w $a0, -419431 - ori $a0, $a0, 2458 - lu32i.d $a0, 104857 - lu52i.d $a0, $a0, 1023 st.d $a0, $fp, %pc_lo12(expandExtra) b .LBB0_29 .LBB0_28: @@ -326,18 +319,18 @@ config2: # @config2 movfr2gr.s $a0, $fa0 addi.d $a1, $a0, 1 pcalau12i $a0, %got_pc_hi20(blockt) - ld.d $s4, $a0, %got_pc_lo12(blockt) - st.w $a1, $s4, 0 + ld.d $s5, $a0, %got_pc_lo12(blockt) + st.w $a1, $s5, 0 pcalau12i $a0, %got_pc_hi20(blockr) - ld.d $s3, $a0, %got_pc_lo12(blockr) + ld.d $s4, $a0, %got_pc_lo12(blockr) ld.d $a0, $s1, 0 - st.w $a1, $s3, 0 + st.w $a1, $s4, 0 movfr2gr.d $a2, $fs1 pcalau12i $a1, %pc_hi20(.L.str.3) addi.d $a1, $a1, %pc_lo12(.L.str.3) pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 - ld.w $a0, $s3, 0 + ld.w $a0, $s4, 0 ld.w $a1, $s0, 0 movgr2fr.w $fa2, $a0 movgr2fr.w $fa0, $a1 @@ -360,17 +353,22 @@ config2: # @config2 bcnez $fcc0, .LBB0_35 # %bb.33: vldi $vr1, -888 - pcalau12i $a0, %pc_hi20(.LCPI0_4) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_4) - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_5) - fld.d $fa4, $fp, %pc_lo12(expandExtra) - fld.d $fa5, $s2, %pc_lo12(.LCPI0_3) fadd.d $fa0, $fa0, $fa1 - fmadd.d $fa0, $fa0, $fa3, $fa2 - fadd.d $fa0, $fa0, $fa4 - fcmp.clt.d $fcc0, $fa5, $fa0 - fsel $fs1, $fa0, $fa5, $fcc0 + ori $a0, $s3, 1147 + lu32i.d $a0, 293601 + lu52i.d $a1, $a0, 1017 + movgr2fr.d $fa1, $a1 + fld.d $fa2, $fp, %pc_lo12(expandExtra) + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fa3, $a0 + fmadd.d $fa0, $fa0, $fa3, $fa1 + fadd.d $fa0, $fa0, $fa2 + ori $a0, $s2, 2458 + lu32i.d $a0, 104857 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fcmp.clt.d $fcc0, $fa1, $fa0 + fsel $fs1, $fa0, $fa1, $fcc0 fsqrt.d $fa0, $fs0 fcmp.cor.d $fcc0, $fa0, $fa0 fst.d $fs1, $fp, %pc_lo12(expandExtra) @@ -381,8 +379,8 @@ config2: # @config2 movfr2gr.s $a1, $fa0 ld.d $a0, $s1, 0 addi.d $a1, $a1, 1 + st.w $a1, $s5, 0 st.w $a1, $s4, 0 - st.w $a1, $s3, 0 movfr2gr.d $a2, $fs1 pcalau12i $a1, %pc_hi20(.L.str.3) addi.d $a1, $a1, %pc_lo12(.L.str.3) @@ -405,8 +403,8 @@ config2: # @config2 movfr2gr.s $a0, $fa0 addi.d $a1, $a0, 1 ld.d $a0, $s1, 0 + st.w $a1, $s5, 0 st.w $a1, $s4, 0 - st.w $a1, $s3, 0 movfr2gr.d $a2, $fs1 pcalau12i $a1, %pc_hi20(.L.str.3) addi.d $a1, $a1, %pc_lo12(.L.str.3) @@ -421,17 +419,17 @@ config2: # @config2 fmov.d $fa0, $fs0 bceqz $fcc0, .LBB0_45 .LBB0_39: # %.split170 - ld.w $a0, $s4, 0 + ld.w $a0, $s5, 0 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 addi.d $a0, $a0, 1 - st.w $a0, $s4, 0 + st.w $a0, $s5, 0 bceqz $fcc0, .LBB0_46 .LBB0_40: # %.split170.split - ld.w $a0, $s3, 0 + ld.w $a0, $s4, 0 frecip.d $fa0, $fs0 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 @@ -439,7 +437,7 @@ config2: # @config2 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 addi.w $a0, $a0, 1 - st.w $a0, $s3, 0 + st.w $a0, $s4, 0 .LBB0_41: pcalau12i $a1, %got_pc_hi20(maxWeight) ld.d $a1, $a1, %got_pc_lo12(maxWeight) @@ -545,16 +543,17 @@ config2: # @config2 ld.d $a1, $a1, %got_pc_lo12(binOffsetY) st.w $a0, $a1, 0 move $a0, $zero - fld.d $fs1, $sp, 24 # 8-byte Folded Reload - fld.d $fs0, $sp, 32 # 8-byte Folded Reload - ld.d $s4, $sp, 40 # 8-byte Folded Reload - ld.d $s3, $sp, 48 # 8-byte Folded Reload - ld.d $s2, $sp, 56 # 8-byte Folded Reload - ld.d $s1, $sp, 64 # 8-byte Folded Reload - ld.d $s0, $sp, 72 # 8-byte Folded Reload - ld.d $fp, $sp, 80 # 8-byte Folded Reload - ld.d $ra, $sp, 88 # 8-byte Folded Reload - addi.d $sp, $sp, 96 + fld.d $fs1, $sp, 32 # 8-byte Folded Reload + fld.d $fs0, $sp, 40 # 8-byte Folded Reload + ld.d $s5, $sp, 48 # 8-byte Folded Reload + ld.d $s4, $sp, 56 # 8-byte Folded Reload + ld.d $s3, $sp, 64 # 8-byte Folded Reload + ld.d $s2, $sp, 72 # 8-byte Folded Reload + ld.d $s1, $sp, 80 # 8-byte Folded Reload + ld.d $s0, $sp, 88 # 8-byte Folded Reload + ld.d $fp, $sp, 96 # 8-byte Folded Reload + ld.d $ra, $sp, 104 # 8-byte Folded Reload + addi.d $sp, $sp, 112 pcaddu18i $t8, %call36(loadbins) jr $t8 .LBB0_42: # %call.sqrt @@ -578,10 +577,10 @@ config2: # @config2 .LBB0_45: # %call.sqrt171 fmov.d $fa0, $fs1 movcf2gr $a0, $fcc0 - st.d $a0, $sp, 16 + st.d $a0, $sp, 24 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $a0, $sp, 16 + ld.d $a0, $sp, 24 movgr2cf $fcc0, $a0 b .LBB0_39 .LBB0_46: # %call.sqrt172 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/finalout.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/finalout.s index 5bd9a72a..fe3ff0ff 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/finalout.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/finalout.s @@ -1,12 +1,6 @@ .file "finalout.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function finalout -.LCPI0_0: - .dword 0x408ff80000000000 # double 1023 -.LCPI0_1: - .dword 0xc090c80000000000 # double -1074 .text - .globl finalout + .globl finalout # -- Begin function finalout .p2align 5 .type finalout,@function finalout: # @finalout @@ -28,16 +22,20 @@ finalout: # @finalout fadd.d $fa1, $fa0, $fa1 fadd.d $fa1, $fa1, $fa1 vldi $vr2, -1000 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_0) fcmp.clt.d $fcc0, $fa2, $fa0 vldi $vr0, -988 fsel $fa0, $fa0, $fa1, $fcc0 - fcmp.clt.d $fcc0, $fa3, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -2048 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 + fcmp.clt.d $fcc0, $fa1, $fa0 + ori $a0, $zero, 0 bcnez $fcc0, .LBB0_16 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_1) + lu32i.d $a0, 51200 + lu52i.d $a0, $a0, -1015 + movgr2fr.d $fa1, $a0 fcmp.cule.d $fcc0, $fa1, $fa0 bceqz $fcc0, .LBB0_16 .LBB0_2: # %cdce.end diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/findcost.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/findcost.s index 52a2257b..64503d00 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/findcost.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/findcost.s @@ -1,10 +1,6 @@ .file "findcost.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function findcost -.LCPI0_0: - .dword 0x40f86a0000000000 # double 1.0E+5 .text - .globl findcost + .globl findcost # -- Begin function findcost .p2align 5 .type findcost,@function findcost: # @findcost @@ -292,8 +288,10 @@ findcost: # @findcost st.w $zero, $a0, 0 blez $a1, .LBB0_48 # %bb.40: # %.lr.ph179 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI0_0) + ori $a2, $zero, 0 + lu32i.d $a2, -497152 + lu52i.d $a2, $a2, 1039 + movgr2fr.d $fa0, $a2 fdiv.d $fa0, $fs0, $fa0 pcalau12i $a2, %got_pc_hi20(cellarray) ld.d $a3, $a2, %got_pc_lo12(cellarray) diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/fuloop.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/fuloop.s index 462a304c..4c3dc58f 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/fuloop.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/fuloop.s @@ -1,10 +1,6 @@ .file "fuloop.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fuloop -.LCPI0_0: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl fuloop + .globl fuloop # -- Begin function fuloop .p2align 5 .type fuloop,@function fuloop: # @fuloop @@ -166,6 +162,9 @@ fuloop: # @fuloop ori $s8, $a0, 3693 lu12i.w $a0, 3 ori $s4, $a0, 57 + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs0, $a0 pcalau12i $a0, %got_pc_hi20(gridGiven) ld.d $a0, $a0, %got_pc_lo12(gridGiven) st.d $a0, $sp, 32 # 8-byte Folded Spill @@ -226,29 +225,29 @@ fuloop: # @fuloop div.w $t4, $t4, $s2 sub.d $t3, $t3, $t1 addi.d $t3, $t3, 1 - movgr2fr.w $fa2, $t3 - ffint.d.w $fa2, $fa2 + movgr2fr.w $fa1, $t3 + ffint.d.w $fa1, $fa1 mul.d $a6, $a6, $s8 add.d $a6, $a6, $s4 bstrpick.d $t3, $a6, 30, 0 - movgr2fr.w $fa3, $t3 - ffint.d.w $fa3, $fa3 - fdiv.d $fa3, $fa3, $fa1 - fmul.d $fa2, $fa3, $fa2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $t3, $fa2 + movgr2fr.w $fa2, $t3 + ffint.d.w $fa2, $fa2 + fdiv.d $fa2, $fa2, $fs0 + fmul.d $fa1, $fa2, $fa1 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $t3, $fa1 add.w $t1, $t1, $t3 sub.d $t3, $t4, $t2 addi.d $t3, $t3, 1 - movgr2fr.w $fa2, $t3 - ffint.d.w $fa2, $fa2 + movgr2fr.w $fa1, $t3 + ffint.d.w $fa1, $fa1 mul.d $a6, $a6, $s8 add.w $a6, $a6, $s4 bstrpick.d $t3, $a6, 30, 0 - movgr2fr.w $fa3, $t3 - ffint.d.w $fa3, $fa3 - fdiv.d $fa1, $fa3, $fa1 - fmul.d $fa1, $fa1, $fa2 + movgr2fr.w $fa2, $t3 + ffint.d.w $fa2, $fa2 + fdiv.d $fa2, $fa2, $fs0 + fmul.d $fa1, $fa2, $fa1 ftintrz.w.d $fa1, $fa1 movfr2gr.s $t3, $fa1 add.w $t2, $t2, $t3 @@ -260,15 +259,13 @@ fuloop: # @fuloop # => This Inner Loop Header: Depth=2 mul.d $a6, $a6, $s8 add.w $a6, $a6, $s4 - pcalau12i $a7, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a7, %pc_lo12(.LCPI0_0) bstrpick.d $a7, $a6, 30, 0 - movgr2fr.w $fa2, $a7 - ffint.d.w $fa2, $fa2 - fdiv.d $fa2, $fa2, $fa1 - fmul.d $fa2, $fa2, $fa0 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $a7, $fa2 + movgr2fr.w $fa1, $a7 + ffint.d.w $fa1, $fa1 + fdiv.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fa0 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $a7, $fa1 beq $a0, $a7, .LBB0_6 # %bb.7: # in Loop: Header=BB0_6 Depth=2 addi.w $s6, $a7, 1 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/main.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/main.s index 615c4f62..6c8d9de1 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/main.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/main.s @@ -1,26 +1,6 @@ .file "main.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3fd999999999999a # double 0.40000000000000002 -.LCPI0_1: - .dword 0x405fc00000000000 # double 127 -.LCPI0_2: - .dword 0x40f86a0000000000 # double 1.0E+5 -.LCPI0_3: - .dword 0x3ffccccccccccccd # double 1.8 -.LCPI0_4: - .dword 0x3fb47ae147ae147b # double 0.080000000000000002 -.LCPI0_5: - .dword 0xbfb999999999999a # double -0.10000000000000001 -.LCPI0_6: - .dword 0xbf947ae147ae147b # double -0.02 -.LCPI0_7: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI0_8: - .dword 0x3f947ae147ae147b # double 0.02 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -39,7 +19,6 @@ main: # @main st.d $s8, $sp, 1144 # 8-byte Folded Spill fst.d $fs0, $sp, 1136 # 8-byte Folded Spill fst.d $fs1, $sp, 1128 # 8-byte Folded Spill - fst.d $fs2, $sp, 1120 # 8-byte Folded Spill pcalau12i $a2, %got_pc_hi20(offset) ld.d $s2, $a2, %got_pc_lo12(offset) st.w $zero, $s2, 0 @@ -89,13 +68,13 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.2) addi.d $a1, $a0, %pc_lo12(.L.str.2) - addi.d $a0, $sp, 96 + addi.d $a0, $sp, 104 move $a2, $s0 pcaddu18i $ra, %call36(sprintf) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.3) addi.d $a1, $a0, %pc_lo12(.L.str.3) - addi.d $a0, $sp, 96 + addi.d $a0, $sp, 104 pcaddu18i $ra, %call36(fopen) jirl $ra, $ra, 0 move $a3, $a0 @@ -106,7 +85,7 @@ main: # @main # %bb.2: pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $a0, $a0, %pc_lo12(.L.str.4) - addi.d $a1, $sp, 96 + addi.d $a1, $sp, 104 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 move $a0, $zero @@ -211,12 +190,12 @@ main: # @main ld.d $a2, $s1, 0 pcalau12i $a0, %pc_hi20(.L.str.7) addi.d $a1, $a0, %pc_lo12(.L.str.7) - addi.d $a0, $sp, 96 + addi.d $a0, $sp, 104 pcaddu18i $ra, %call36(sprintf) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.8) addi.d $a1, $a0, %pc_lo12(.L.str.8) - addi.d $a0, $sp, 96 + addi.d $a0, $sp, 104 pcaddu18i $ra, %call36(fopen) jirl $ra, $ra, 0 beqz $a0, .LBB0_15 @@ -230,11 +209,11 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %got_pc_hi20(blockr) ld.d $a0, $a0, %got_pc_lo12(blockr) - st.d $a0, $sp, 80 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill ld.w $a0, $a0, 0 pcalau12i $a1, %got_pc_hi20(blockl) ld.d $a1, $a1, %got_pc_lo12(blockl) - st.d $a1, $sp, 72 # 8-byte Folded Spill + st.d $a1, $sp, 80 # 8-byte Folded Spill ld.w $a1, $a1, 0 sub.w $a2, $a0, $a1 pcalau12i $a0, %got_pc_hi20(bdxlength) @@ -250,7 +229,7 @@ main: # @main pcalau12i $a0, %got_pc_hi20(bdylength) ld.d $a1, $a0, %got_pc_lo12(bdylength) ld.d $a0, $s0, 0 - st.d $a1, $sp, 88 # 8-byte Folded Spill + st.d $a1, $sp, 96 # 8-byte Folded Spill st.w $a3, $a1, 0 pcalau12i $a1, %pc_hi20(.L.str.9) addi.d $a1, $a1, %pc_lo12(.L.str.9) @@ -265,12 +244,12 @@ main: # @main ld.d $a2, $s1, 0 pcalau12i $a0, %pc_hi20(.L.str.10) addi.d $a1, $a0, %pc_lo12(.L.str.10) - addi.d $a0, $sp, 96 + addi.d $a0, $sp, 104 pcaddu18i $ra, %call36(sprintf) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.8) addi.d $a1, $a0, %pc_lo12(.L.str.8) - addi.d $a0, $sp, 96 + addi.d $a0, $sp, 104 pcaddu18i $ra, %call36(fopen) jirl $ra, $ra, 0 beqz $a0, .LBB0_15 @@ -280,12 +259,12 @@ main: # @main ld.d $a2, $s1, 0 pcalau12i $a0, %pc_hi20(.L.str.11) addi.d $a1, $a0, %pc_lo12(.L.str.11) - addi.d $a0, $sp, 96 + addi.d $a0, $sp, 104 pcaddu18i $ra, %call36(sprintf) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.3) addi.d $a1, $a0, %pc_lo12(.L.str.3) - addi.d $a0, $sp, 96 + addi.d $a0, $sp, 104 pcaddu18i $ra, %call36(fopen) jirl $ra, $ra, 0 bnez $a0, .LBB0_17 @@ -293,7 +272,7 @@ main: # @main ld.d $a0, $s0, 0 pcalau12i $a1, %pc_hi20(.L.str.4) addi.d $a1, $a1, %pc_lo12(.L.str.4) - addi.d $a2, $sp, 96 + addi.d $a2, $sp, 104 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 .LBB0_16: @@ -311,7 +290,7 @@ main: # @main pcaddu18i $ra, %call36(initcheck) jirl $ra, $ra, 0 ld.d $a0, $s0, 0 - st.d $fp, $sp, 64 # 8-byte Folded Spill + st.d $fp, $sp, 72 # 8-byte Folded Spill ld.w $a2, $fp, 0 pcalau12i $a1, %pc_hi20(.L.str.12) addi.d $a1, $a1, %pc_lo12(.L.str.12) @@ -328,7 +307,7 @@ main: # @main ld.d $a0, $s0, 0 pcalau12i $a1, %got_pc_hi20(overfill) ld.d $a1, $a1, %got_pc_lo12(overfill) - st.d $a1, $sp, 56 # 8-byte Folded Spill + st.d $a1, $sp, 64 # 8-byte Folded Spill ld.w $a2, $a1, 0 pcalau12i $a1, %pc_hi20(.L.str.14) addi.d $a1, $a1, %pc_lo12(.L.str.14) @@ -345,14 +324,14 @@ main: # @main bnez $a0, .LBB0_16 # %bb.18: fmov.d $fs0, $fa0 - st.d $s5, $sp, 32 # 8-byte Folded Spill - st.d $fp, $sp, 48 # 8-byte Folded Spill + st.d $s5, $sp, 40 # 8-byte Folded Spill + st.d $fp, $sp, 56 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(T) ld.d $a1, $a0, %got_pc_lo12(T) ori $a0, $zero, 0 lu32i.d $a0, -144027 lu52i.d $a0, $a0, 1052 - st.d $a1, $sp, 40 # 8-byte Folded Spill + st.d $a1, $sp, 48 # 8-byte Folded Spill st.d $a0, $a1, 0 pcalau12i $a0, %got_pc_hi20(numcells) ld.d $s5, $a0, %got_pc_lo12(numcells) @@ -360,18 +339,18 @@ main: # @main alsl.d $a1, $a0, $a0, 2 alsl.d $a1, $a1, $a0, 1 pcalau12i $a2, %pc_hi20(bigcell) - st.d $a2, $sp, 16 # 8-byte Folded Spill + st.d $a2, $sp, 24 # 8-byte Folded Spill st.w $a1, $a2, %pc_lo12(bigcell) addi.d $a1, $a1, 1 pcalau12i $a2, %pc_hi20(toobig) - st.d $a2, $sp, 24 # 8-byte Folded Spill + st.d $a2, $sp, 32 # 8-byte Folded Spill st.w $a1, $a2, %pc_lo12(toobig) ori $a1, $zero, 100 mul.w $a0, $a0, $a1 pcaddu18i $ra, %call36(testloop) jirl $ra, $ra, 0 pcalau12i $a1, %pc_hi20(totFunc) - st.d $a1, $sp, 8 # 8-byte Folded Spill + st.d $a1, $sp, 16 # 8-byte Folded Spill fld.d $fa0, $a1, %pc_lo12(totFunc) movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 @@ -474,14 +453,14 @@ main: # @main st.w $a0, $s7, 0 pcaddu18i $ra, %call36(config2) jirl $ra, $ra, 0 - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload ld.w $a0, $a0, 0 - ld.d $a1, $sp, 72 # 8-byte Folded Reload + ld.d $a1, $sp, 80 # 8-byte Folded Reload ld.w $a1, $a1, 0 ld.w $a2, $s8, 0 ld.w $a3, $s6, 0 sub.d $a0, $a0, $a1 - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ld.d $a1, $sp, 40 # 8-byte Folded Reload st.w $a0, $a1, 0 sub.d $a1, $a2, $a3 movgr2fr.w $fa0, $a0 @@ -495,7 +474,7 @@ main: # @main fdiv.d $fa1, $fa0, $fa1 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload st.w $a1, $a0, 0 bcnez $fcc0, .LBB0_32 # %bb.31: # %call.sqrt246 @@ -504,9 +483,9 @@ main: # @main jirl $ra, $ra, 0 .LBB0_32: # %._crit_edge.split.split ld.d $a0, $s0, 0 - ld.d $s8, $sp, 32 # 8-byte Folded Reload + ld.d $s8, $sp, 40 # 8-byte Folded Reload ld.w $a2, $s8, 0 - ld.d $fp, $sp, 88 # 8-byte Folded Reload + ld.d $fp, $sp, 96 # 8-byte Folded Reload ld.w $a3, $fp, 0 pcalau12i $s6, %pc_hi20(aveCellSide) fst.d $fa0, $s6, %pc_lo12(aveCellSide) @@ -517,7 +496,7 @@ main: # @main pcaddu18i $ra, %call36(findcost) jirl $ra, $ra, 0 ld.d $a3, $s0, 0 - ld.d $s7, $sp, 64 # 8-byte Folded Reload + ld.d $s7, $sp, 72 # 8-byte Folded Reload st.w $a0, $s7, 0 pcalau12i $a0, %pc_hi20(.L.str.15) addi.d $a0, $a0, %pc_lo12(.L.str.15) @@ -527,9 +506,9 @@ main: # @main jirl $ra, $ra, 0 ld.d $a0, $s0, 0 ld.w $a2, $s7, 0 - ld.d $a1, $sp, 48 # 8-byte Folded Reload - ld.w $a3, $a1, 0 ld.d $a1, $sp, 56 # 8-byte Folded Reload + ld.w $a3, $a1, 0 + ld.d $a1, $sp, 64 # 8-byte Folded Reload ld.w $a4, $a1, 0 pcalau12i $a1, %pc_hi20(.L.str.16) addi.d $a1, $a1, %pc_lo12(.L.str.16) @@ -541,28 +520,31 @@ main: # @main ld.w $a0, $s5, 0 alsl.d $a1, $a0, $a0, 2 alsl.d $a1, $a1, $a0, 1 - ld.d $s7, $sp, 16 # 8-byte Folded Reload - st.w $a1, $s7, %pc_lo12(bigcell) - addi.d $a1, $a1, 1 ld.d $a2, $sp, 24 # 8-byte Folded Reload + st.w $a1, $a2, %pc_lo12(bigcell) + addi.d $a1, $a1, 1 + ld.d $a2, $sp, 32 # 8-byte Folded Reload st.w $a1, $a2, %pc_lo12(toobig) ori $a1, $zero, 100 mul.w $a0, $a0, $a1 pcaddu18i $ra, %call36(test2loop) jirl $ra, $ra, 0 movgr2fr.w $fa0, $a0 - ld.d $a2, $sp, 8 # 8-byte Folded Reload - fld.d $fa1, $a2, %pc_lo12(totFunc) + ld.d $a1, $sp, 16 # 8-byte Folded Reload + fld.d $fa1, $a1, %pc_lo12(totFunc) pcalau12i $a0, %pc_hi20(totPen) fld.d $fa2, $a0, %pc_lo12(totPen) - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa3, $a1, %pc_lo12(.LCPI0_0) ffint.d.w $fa0, $fa0 fdiv.d $fa1, $fa1, $fa0 + fst.d $fa1, $a1, %pc_lo12(totFunc) fdiv.d $fa0, $fa2, $fa0 - fst.d $fa1, $a2, %pc_lo12(totFunc) fst.d $fa0, $a0, %pc_lo12(totPen) - fmul.d $fa1, $fa1, $fa3 + lu12i.w $a0, -419431 + ori $s7, $a0, 2458 + lu32i.d $s7, -419431 + lu52i.d $a0, $s7, 1021 + movgr2fr.d $fa2, $a0 + fmul.d $fa1, $fa1, $fa2 fdiv.d $fa0, $fa1, $fa0 fst.d $fa0, $s3, 0 pcalau12i $a0, %got_pc_hi20(numnets) @@ -590,14 +572,18 @@ main: # @main pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 .LBB0_34: # %._crit_edge.split.split.split - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_1) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_2) - ld.d $a0, $s0, 0 + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -16384 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 fdiv.d $fa1, $fa0, $fa1 - fmul.d $fa1, $fa1, $fs1 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + lu32i.d $a0, -497152 + lu52i.d $a1, $a0, 1039 + ld.d $a0, $s0, 0 + movgr2fr.d $fa2, $a1 + fmul.d $fa1, $fa1, $fa2 + ld.d $a1, $sp, 48 # 8-byte Folded Reload fst.d $fa1, $a1, 0 movfr2gr.d $a2, $fs0 movfr2gr.d $a3, $fa0 @@ -609,47 +595,58 @@ main: # @main ld.d $fp, $a0, %got_pc_lo12(aveChanWid) ld.w $a0, $fp, 0 movgr2fr.w $fa0, $a0 - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_3) - fld.d $fa2, $s6, %pc_lo12(aveCellSide) - pcalau12i $a0, %pc_hi20(.LCPI0_4) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_4) ffint.d.w $fa0, $fa0 - fmul.d $fa0, $fa0, $fa1 - fdiv.d $fa0, $fa0, $fa2 - fcmp.cult.d $fcc0, $fa3, $fa0 - pcalau12i $s6, %pc_hi20(.LCPI0_8) + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, -209716 + fld.d $fa1, $s6, %pc_lo12(aveCellSide) + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa2, $a0 + fmul.d $fa0, $fa0, $fa2 + fdiv.d $fa0, $fa0, $fa1 + lu12i.w $a0, 293601 + ori $s6, $a0, 1147 + lu32i.d $s6, 293601 + lu52i.d $a0, $s6, 1019 + movgr2fr.d $fa1, $a0 + fcmp.cult.d $fcc0, $fa1, $fa0 bceqz $fcc0, .LBB0_36 # %bb.35: fld.d $fa1, $s3, 0 - fld.d $fa0, $s6, %pc_lo12(.LCPI0_8) + lu52i.d $a0, $s6, 1017 + movgr2fr.d $fa0, $a0 fcmp.clt.d $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB0_39 b .LBB0_40 .LBB0_36: # %condstore.split pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_5) + lu52i.d $a0, $s7, -1029 + movgr2fr.d $fa1, $a0 fsub.d $fa1, $fa1, $fa0 vldi $vr0, -988 pcaddu18i $ra, %call36(pow) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_6) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_6) - pcalau12i $a0, %pc_hi20(.LCPI0_7) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_7) + lu52i.d $a0, $s6, -1031 + movgr2fr.d $fa2, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 fld.d $fa1, $s3, 0 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa3, $a0 fmadd.d $fa0, $fa0, $fa3, $fa2 fcmp.clt.d $fcc0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 bcnez $fcc0, .LBB0_38 # %bb.37: # %condstore.split - fld.d $fa2, $s6, %pc_lo12(.LCPI0_8) + lu52i.d $a0, $s6, 1017 + movgr2fr.d $fa2, $a0 fcmp.clt.d $fcc0, $fa0, $fa2 bceqz $fcc0, .LBB0_40 .LBB0_38: - fld.d $fa1, $s6, %pc_lo12(.LCPI0_8) + lu52i.d $a0, $s6, 1017 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 fsel $fa0, $fa0, $fa1, $fcc0 .LBB0_39: # %.sink.split @@ -674,7 +671,7 @@ main: # @main movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 movgr2fr.d $fs0, $zero - fdiv.d $fs2, $fs0, $fa0 + fdiv.d $fs1, $fs0, $fa0 b .LBB0_53 .LBB0_42: # %.lr.ph118 pcalau12i $a0, %got_pc_hi20(cellarray) @@ -724,7 +721,7 @@ main: # @main .LBB0_46: # %._crit_edge119 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 - fdiv.d $fs2, $fs0, $fa0 + fdiv.d $fs1, $fs0, $fa0 bgtz $a0, .LBB0_48 # %bb.47: movgr2fr.d $fs0, $zero @@ -761,7 +758,7 @@ main: # @main .LBB0_50: # %.split249 # in Loop: Header=BB0_49 Depth=1 ld.w $a0, $s5, 0 - fsub.d $fa0, $fa0, $fs2 + fsub.d $fa0, $fa0, $fs1 fmadd.d $fs0, $fa0, $fa0, $fs0 addi.d $s2, $s2, 1 addi.d $s3, $s3, 8 @@ -780,9 +777,9 @@ main: # @main fdiv.d $fa0, $fs0, $fa0 fsqrt.d $fs0, $fa0 fcmp.cor.d $fcc0, $fs0, $fs0 - ld.d $s2, $sp, 64 # 8-byte Folded Reload - ld.d $s3, $sp, 56 # 8-byte Folded Reload - ld.d $s4, $sp, 40 # 8-byte Folded Reload + ld.d $s2, $sp, 72 # 8-byte Folded Reload + ld.d $s3, $sp, 64 # 8-byte Folded Reload + ld.d $s4, $sp, 48 # 8-byte Folded Reload bcnez $fcc0, .LBB0_55 # %bb.54: # %call.sqrt251 pcaddu18i $ra, %call36(sqrt) @@ -797,27 +794,28 @@ main: # @main pcaddu18i $ra, %call36(fwrite) jirl $ra, $ra, 0 ld.d $a0, $s0, 0 - movfr2gr.d $a2, $fs2 + movfr2gr.d $a2, $fs1 movfr2gr.d $a3, $fs0 pcalau12i $a1, %pc_hi20(.L.str.20) addi.d $a1, $a1, %pc_lo12(.L.str.20) pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 vldi $vr0, -1024 - fmadd.d $fa1, $fs0, $fa0, $fs2 + fmadd.d $fa0, $fs0, $fa0, $fs1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a1, $fa0 + pcalau12i $a2, %pc_hi20(rangeLimit) fld.d $fa0, $s4, 0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 - pcalau12i $a1, %pc_hi20(rangeLimit) - fcmp.cule.d $fcc0, $fs1, $fa0 - st.w $a0, $a1, %pc_lo12(rangeLimit) - bcnez $fcc0, .LBB0_57 -# %bb.56: ori $a0, $zero, 0 lu32i.d $a0, -497152 lu52i.d $a0, $a0, 1039 + movgr2fr.d $fa1, $a0 + fcmp.cule.d $fcc0, $fa1, $fa0 + st.w $a1, $a2, %pc_lo12(rangeLimit) + bcnez $fcc0, .LBB0_57 +# %bb.56: st.d $a0, $s4, 0 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fa1 .LBB0_57: pcalau12i $fp, %pc_hi20(Tsave) fst.d $fa0, $fp, %pc_lo12(Tsave) @@ -847,7 +845,7 @@ main: # @main jirl $ra, $ra, 0 ld.d $a0, $s0, 0 ld.w $a2, $s2, 0 - ld.d $fp, $sp, 48 # 8-byte Folded Reload + ld.d $fp, $sp, 56 # 8-byte Folded Reload ld.w $a3, $fp, 0 ld.w $a4, $s3, 0 pcalau12i $a1, %pc_hi20(.L.str.22) @@ -860,12 +858,12 @@ main: # @main ld.d $a2, $s1, 0 pcalau12i $a0, %pc_hi20(.L.str.23) addi.d $a1, $a0, %pc_lo12(.L.str.23) - addi.d $a0, $sp, 96 + addi.d $a0, $sp, 104 pcaddu18i $ra, %call36(sprintf) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.8) addi.d $a1, $a0, %pc_lo12(.L.str.8) - addi.d $a0, $sp, 96 + addi.d $a0, $sp, 104 pcaddu18i $ra, %call36(fopen) jirl $ra, $ra, 0 ld.d $a3, $s0, 0 @@ -873,13 +871,13 @@ main: # @main # %bb.60: pcalau12i $a0, %pc_hi20(.L.str.24) addi.d $a1, $a0, %pc_lo12(.L.str.24) - addi.d $a2, $sp, 96 + addi.d $a2, $sp, 104 move $a0, $a3 b .LBB0_62 .LBB0_61: pcalau12i $a1, %pc_hi20(.L.str.25) addi.d $a1, $a1, %pc_lo12(.L.str.25) - addi.d $a2, $sp, 96 + addi.d $a2, $sp, 104 move $s1, $fp move $fp, $a0 move $a0, $a3 @@ -1021,11 +1019,12 @@ main: # @main .LBB0_73: alsl.d $a1, $a0, $a0, 2 alsl.d $a1, $a1, $a0, 1 - st.w $a1, $s7, %pc_lo12(bigcell) + ld.d $a2, $sp, 24 # 8-byte Folded Reload + st.w $a1, $a2, %pc_lo12(bigcell) pcalau12i $a2, %pc_hi20(choose) st.w $a0, $a2, %pc_lo12(choose) addi.d $a0, $a1, 1 - ld.d $a1, $sp, 24 # 8-byte Folded Reload + ld.d $a1, $sp, 32 # 8-byte Folded Reload st.w $a0, $a1, %pc_lo12(toobig) pcaddu18i $ra, %call36(prepSpots) jirl $ra, $ra, 0 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/makebins.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/makebins.s index 89ce0875..03b13fc7 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/makebins.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/makebins.s @@ -1,10 +1,6 @@ .file "makebins.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function makebins -.LCPI0_0: - .dword 0x4059000000000000 # double 100 .text - .globl makebins + .globl makebins # -- Begin function makebins .p2align 5 .type makebins,@function makebins: # @makebins @@ -30,8 +26,10 @@ makebins: # @makebins movgr2fr.w $fa0, $a0 ffint.d.w $fs1, $fa0 ori $a0, $zero, 1 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fs4, $a1, %pc_lo12(.LCPI0_0) + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fs4, $a1 fsqrt.d $fs5, $fs1 fcmp.cor.d $fcc1, $fs5, $fs5 movgr2fr.d $fs6, $zero diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/makesite.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/makesite.s index 3f653954..5c95986a 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/makesite.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/makesite.s @@ -1,10 +1,6 @@ .file "makesite.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Vside -.LCPI0_0: - .dword 0x3f847ae147ae147b # double 0.01 .text - .globl Vside + .globl Vside # -- Begin function Vside .p2align 5 .type Vside,@function Vside: # @Vside @@ -32,11 +28,14 @@ Vside: # @Vside slli.d $a0, $a0, 3 ldx.d $a0, $a1, $a0 fld.d $fa0, $a0, 120 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_0) - fld.d $fa2, $a0, 112 - fadd.d $fa1, $fa0, $fa1 - fcmp.clt.d $fcc0, $fa2, $fa1 + lu12i.w $a1, 293601 + ori $a1, $a1, 1147 + lu32i.d $a1, 293601 + fld.d $fa1, $a0, 112 + lu52i.d $a1, $a1, 1016 + movgr2fr.d $fa2, $a1 + fadd.d $fa2, $fa0, $fa2 + fcmp.clt.d $fcc0, $fa1, $fa2 bceqz $fcc0, .LBB0_2 # %bb.1: vldi $vr0, -912 @@ -387,12 +386,7 @@ Vside: # @Vside .Lfunc_end0: .size Vside, .Lfunc_end0-Vside # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Hside -.LCPI1_0: - .dword 0x3f847ae147ae147b # double 0.01 - .text - .globl Hside + .globl Hside # -- Begin function Hside .p2align 5 .type Hside,@function Hside: # @Hside @@ -419,10 +413,13 @@ Hside: # @Hside ld.d $a1, $a1, 0 slli.d $a0, $a0, 3 ldx.d $a0, $a1, $a0 - fld.d $fa1, $a0, 120 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.d $fa2, $a1, %pc_lo12(.LCPI1_0) fld.d $fa0, $a0, 112 + fld.d $fa1, $a0, 120 + lu12i.w $a1, 293601 + ori $a1, $a1, 1147 + lu32i.d $a1, 293601 + lu52i.d $a1, $a1, 1016 + movgr2fr.d $fa2, $a1 fadd.d $fa1, $fa1, $fa2 fcmp.clt.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB1_2 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/mshortest.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/mshortest.s index 19dd3d08..cbd3f766 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/mshortest.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/mshortest.s @@ -1,11 +1,7 @@ .file "mshortest.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function mshortest -.LCPI0_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI0_1: + .p2align 4, 0x0 # -- Begin function mshortest +.LCPI0_0: .half 7 # 0x7 .half 6 # 0x6 .half 5 # 0x5 @@ -101,11 +97,14 @@ mshortest: # @mshortest # %bb.6: # %._crit_edge460.loopexit ld.w $a0, $s5, 0 .LBB0_7: # %._crit_edge460 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI0_0) - movgr2fr.w $fa1, $a1 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 + lu12i.w $a2, -419431 + ori $a2, $a2, 2458 + lu32i.d $a2, -419431 + lu52i.d $a2, $a2, 1019 + movgr2fr.d $fa1, $a2 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a2, $fa0 alsl.d $a0, $a0, $a2, 1 @@ -521,7 +520,7 @@ mshortest: # @mshortest bstrpick.d $a4, $s0, 31, 0 addi.d $a3, $s0, 1 bstrpick.d $a3, $a3, 31, 0 - pcalau12i $s6, %pc_hi20(.LCPI0_1) + pcalau12i $s6, %pc_hi20(.LCPI0_0) ori $a5, $zero, 8 bltu $s0, $a5, .LBB0_52 # %bb.47: # %vector.memcheck810 @@ -553,7 +552,7 @@ mshortest: # @mshortest # Parent Loop BB0_30 Depth=1 # => This Inner Loop Header: Depth=2 vld $vr0, $a5, 0 - vld $vr1, $s6, %pc_lo12(.LCPI0_1) + vld $vr1, $s6, %pc_lo12(.LCPI0_0) vshuf.h $vr1, $vr0, $vr0 vst $vr1, $a7, 0 addi.d $a5, $a5, 16 @@ -1361,7 +1360,7 @@ mshortest: # @mshortest # Parent Loop BB0_79 Depth=3 # => This Inner Loop Header: Depth=4 vld $vr0, $a6, 0 - vld $vr1, $s0, %pc_lo12(.LCPI0_1) + vld $vr1, $s0, %pc_lo12(.LCPI0_0) vshuf.h $vr1, $vr0, $vr0 vst $vr1, $t0, 0 addi.d $a6, $a6, 16 @@ -1460,7 +1459,7 @@ mshortest: # @mshortest # Parent Loop BB0_79 Depth=3 # => This Inner Loop Header: Depth=4 vld $vr0, $t0, 0 - vld $vr1, $s0, %pc_lo12(.LCPI0_1) + vld $vr1, $s0, %pc_lo12(.LCPI0_0) vshuf.h $vr1, $vr0, $vr0 vst $vr1, $a3, 0 addi.d $a3, $a3, 16 @@ -1774,7 +1773,7 @@ mshortest: # @mshortest # Parent Loop BB0_58 Depth=2 # => This Inner Loop Header: Depth=3 vld $vr0, $a6, 0 - vld $vr1, $s6, %pc_lo12(.LCPI0_1) + vld $vr1, $s6, %pc_lo12(.LCPI0_0) vshuf.h $vr1, $vr0, $vr0 vst $vr1, $t0, 0 addi.d $a6, $a6, 16 @@ -1863,7 +1862,7 @@ mshortest: # @mshortest # Parent Loop BB0_58 Depth=2 # => This Inner Loop Header: Depth=3 vld $vr0, $a7, 0 - vld $vr1, $s6, %pc_lo12(.LCPI0_1) + vld $vr1, $s6, %pc_lo12(.LCPI0_0) vshuf.h $vr1, $vr0, $vr0 vst $vr1, $a2, 0 addi.d $a2, $a2, 16 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/neworient.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/neworient.s index c9b10f90..2b24ea1d 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/neworient.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/neworient.s @@ -1,10 +1,6 @@ .file "neworient.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function newOrient -.LCPI0_0: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl newOrient + .globl newOrient # -- Begin function newOrient .p2align 5 .type newOrient,@function newOrient: # @newOrient @@ -39,11 +35,12 @@ newOrient: # @newOrient ld.d $a5, $a3, %got_pc_lo12(randVar) ld.w $a6, $a5, 0 lu12i.w $a3, 269412 - pcalau12i $a7, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a7, %pc_lo12(.LCPI0_0) ori $a7, $a3, 3693 lu12i.w $a3, 3 ori $t0, $a3, 57 + lu12i.w $a3, -1024 + lu52i.d $a3, $a3, 1053 + movgr2fr.d $fa1, $a3 .p2align 4, , 16 .LBB0_4: # =>This Inner Loop Header: Depth=1 mul.d $a3, $a6, $a7 @@ -108,11 +105,12 @@ newOrient: # @newOrient ld.d $a5, $a3, %got_pc_lo12(randVar) ld.w $a6, $a5, 0 lu12i.w $a3, 269412 - pcalau12i $a7, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a7, %pc_lo12(.LCPI0_0) ori $a7, $a3, 3693 lu12i.w $a3, 3 ori $t0, $a3, 57 + lu12i.w $a3, -1024 + lu52i.d $a3, $a3, 1053 + movgr2fr.d $fa1, $a3 .p2align 4, , 16 .LBB0_14: # =>This Inner Loop Header: Depth=1 mul.d $a3, $a6, $a7 @@ -186,11 +184,12 @@ newOrient: # @newOrient ld.d $t3, $t1, %got_pc_lo12(randVar) ld.w $t4, $t3, 0 lu12i.w $t1, 269412 - pcalau12i $t5, %pc_hi20(.LCPI0_0) - fld.d $fa1, $t5, %pc_lo12(.LCPI0_0) ori $t5, $t1, 3693 lu12i.w $t1, 3 ori $t6, $t1, 57 + lu12i.w $t1, -1024 + lu52i.d $t1, $t1, 1053 + movgr2fr.d $fa1, $t1 .p2align 4, , 16 .LBB0_27: # =>This Inner Loop Header: Depth=1 mul.d $t1, $t4, $t5 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/outbig.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/outbig.s index ceb22a89..e9f0cc53 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/outbig.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/outbig.s @@ -1,10 +1,6 @@ .file "outbig.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function outbig -.LCPI0_0: - .dword 0x407f400000000000 # double 500 .text - .globl outbig + .globl outbig # -- Begin function outbig .p2align 5 .type outbig,@function outbig: # @outbig @@ -68,12 +64,14 @@ outbig: # @outbig slt $a0, $s1, $s0 masknez $a1, $s1, $a0 maskeqz $a0, $s0, $a0 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI0_0) + ori $a2, $zero, 0 or $a0, $a0, $a1 - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + lu32i.d $a2, -49152 + movgr2fr.w $fa0, $a0 + lu52i.d $a0, $a2, 1031 + ffint.d.w $fa0, $fa0 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 addi.w $a0, $a0, 1 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/outsmall.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/outsmall.s index 33047872..e17d5b46 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/outsmall.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/outsmall.s @@ -1,10 +1,6 @@ .file "outsmall.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function outsmall -.LCPI0_0: - .dword 0x407f400000000000 # double 500 .text - .globl outsmall + .globl outsmall # -- Begin function outsmall .p2align 5 .type outsmall,@function outsmall: # @outsmall @@ -46,12 +42,14 @@ outsmall: # @outsmall slt $a0, $s1, $s0 masknez $a1, $s1, $a0 maskeqz $a0, $s0, $a0 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI0_0) or $a0, $a0, $a1 - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 addi.w $a0, $a0, 1 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/placepads.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/placepads.s index 2d737267..f4207533 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/placepads.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/placepads.s @@ -1,10 +1,6 @@ .file "placepads.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function placepads -.LCPI0_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 .text - .globl placepads + .globl placepads # -- Begin function placepads .p2align 5 .type placepads,@function placepads: # @placepads @@ -68,6 +64,11 @@ placepads: # @placepads st.d $a3, $sp, 8 # 8-byte Folded Spill sub.d $s4, $s3, $a3 ori $s5, $zero, 4 + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, -419431 + lu52i.d $a3, $a3, 1019 + movgr2fr.d $fa0, $a3 move $a3, $t1 # implicit-def: $r16 move $s0, $a6 @@ -76,7 +77,7 @@ placepads: # @placepads .p2align 4, , 16 .LBB0_2: # in Loop: Header=BB0_4 Depth=1 move $s7, $t5 - move $s0, $s8 + move $s0, $t6 .LBB0_3: # %.loopexit423 # in Loop: Header=BB0_4 Depth=1 move $a4, $t3 @@ -142,54 +143,52 @@ placepads: # @placepads slt $a4, $t3, $s0 masknez $a5, $t3, $a4 maskeqz $a4, $s0, $a4 - or $s8, $a4, $a5 - move $t6, $s8 + or $t6, $a4, $a5 + move $a4, $t6 b .LBB0_14 .LBB0_13: # in Loop: Header=BB0_4 Depth=1 - sub.w $t6, $s0, $t3 - div.w $a4, $t6, $a5 - srai.d $a5, $a4, 63 - andn $t4, $a4, $a5 - slt $a4, $s0, $t3 - masknez $a5, $s0, $a4 - maskeqz $a4, $t3, $a4 - or $s8, $a4, $a5 + sub.w $a4, $s0, $t3 + div.w $a5, $a4, $a5 + srai.d $a7, $a5, 63 + andn $t4, $a5, $a7 + slt $a5, $s0, $t3 + masknez $a7, $s0, $a5 + maskeqz $a5, $t3, $a5 + or $t6, $a5, $a7 .LBB0_14: # %.lr.ph448 # in Loop: Header=BB0_4 Depth=1 - slt $a4, $s0, $t3 - sub.d $a5, $t3, $s0 - bstrpick.d $a5, $a5, 31, 1 - maskeqz $a5, $a5, $a4 - ld.d $a7, $sp, 80 # 8-byte Folded Reload - masknez $a4, $a7, $a4 - or $a4, $a5, $a4 - st.d $a4, $sp, 80 # 8-byte Folded Spill - pcalau12i $a4, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a4, %pc_lo12(.LCPI0_0) - sub.d $a4, $a3, $s7 + slt $a5, $s0, $t3 + sub.d $a7, $t3, $s0 + bstrpick.d $a7, $a7, 31, 1 + maskeqz $a7, $a7, $a5 + ld.d $t0, $sp, 80 # 8-byte Folded Reload + masknez $a5, $t0, $a5 + or $a5, $a7, $a5 + st.d $a5, $sp, 80 # 8-byte Folded Spill + sub.d $a5, $a3, $s7 + movgr2fr.w $fa1, $a5 + ffint.d.w $fa1, $fa1 + fmul.d $fa1, $fa1, $fa0 + ftintrz.w.d $fa1, $fa1 + movfr2gr.s $a5, $fa1 + add.d $a5, $s6, $a5 + add.w $t5, $a5, $s7 + ld.d $a7, $sp, 48 # 8-byte Folded Reload + st.w $t5, $a7, 0 + add.w $a3, $a5, $a3 + ld.d $a5, $sp, 64 # 8-byte Folded Reload + st.w $a3, $a5, 0 movgr2fr.w $fa1, $a4 ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 - ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a4, $fa0 - add.d $a4, $s6, $a4 - add.w $t5, $a4, $s7 - ld.d $a5, $sp, 48 # 8-byte Folded Reload - st.w $t5, $a5, 0 - add.w $a3, $a4, $a3 - ld.d $a4, $sp, 64 # 8-byte Folded Reload - st.w $a3, $a4, 0 - movgr2fr.w $fa0, $t6 - ffint.d.w $fa0, $fa0 ld.d $t3, $sp, 72 # 8-byte Folded Reload - move $t6, $a0 + move $t7, $a0 b .LBB0_17 .LBB0_15: # in Loop: Header=BB0_17 Depth=2 move $t3, $a2 .p2align 4, , 16 .LBB0_16: # %.loopexit422 # in Loop: Header=BB0_17 Depth=2 - move $t6, $t3 + move $t7, $t3 addi.w $a4, $t3, 0 addi.w $t3, $t3, 1 bge $a4, $a1, .LBB0_2 @@ -197,11 +196,11 @@ placepads: # @placepads # => This Loop Header: Depth=2 # Child Loop BB0_25 Depth 3 slli.d $a4, $t3, 3 - ldx.d $t7, $s1, $a4 - ld.w $a4, $t7, 80 + ldx.d $t8, $s1, $a4 + ld.w $a4, $t8, 80 bne $a4, $s5, .LBB0_16 # %bb.18: # in Loop: Header=BB0_17 Depth=2 - ld.d $a5, $t7, 152 + ld.d $a5, $t8, 152 ld.w $a4, $a5, 60 ld.w $a7, $a5, 56 ld.w $t2, $s2, 8 @@ -210,10 +209,10 @@ placepads: # @placepads # %bb.19: # in Loop: Header=BB0_17 Depth=2 sub.w $a7, $t3, $a0 slli.d $a7, $a7, 3 - fldx.d $fa1, $s3, $a7 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a7, $fa1 + fldx.d $fa2, $s3, $a7 + fmul.d $fa2, $fa2, $fa1 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $a7, $fa2 b .LBB0_21 .LBB0_20: # in Loop: Header=BB0_17 Depth=2 bstrpick.d $a7, $a4, 31, 31 @@ -224,31 +223,31 @@ placepads: # @placepads ld.w $t2, $a5, 68 ld.w $a5, $a5, 64 sub.w $a5, $t2, $a5 - st.w $a7, $t7, 12 + st.w $a7, $t8, 12 bstrpick.d $a7, $a5, 31, 31 add.w $a7, $a5, $a7 srli.d $a7, $a7, 1 sub.d $a5, $s6, $a5 add.d $a5, $a5, $a7 - addi.w $t3, $t6, 2 - st.w $a5, $t7, 16 + addi.w $t3, $t7, 2 + st.w $a5, $t8, 16 blt $a1, $t3, .LBB0_16 # %bb.22: # %.lr.ph439.preheader # in Loop: Header=BB0_17 Depth=2 add.d $fp, $a4, $t4 - sub.d $t6, $a2, $t3 - alsl.d $s0, $t3, $s4, 3 + sub.d $s0, $a2, $t3 + alsl.d $s7, $t3, $s4, 3 alsl.d $t7, $t3, $s1, 3 b .LBB0_25 .p2align 4, , 16 .LBB0_23: # in Loop: Header=BB0_25 Depth=3 - fld.d $fa1, $s0, 0 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a7, $fa1 + fld.d $fa2, $s7, 0 + fmul.d $fa2, $fa2, $fa1 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $a7, $fa2 .LBB0_24: # in Loop: Header=BB0_25 Depth=3 - ld.w $t2, $s7, 68 - ld.w $fp, $s7, 64 + ld.w $t2, $s8, 68 + ld.w $fp, $s8, 64 sub.w $t2, $t2, $fp st.w $a7, $t8, 12 bstrpick.d $a7, $t2, 31, 31 @@ -259,10 +258,10 @@ placepads: # @placepads st.w $a7, $t8, 16 add.d $fp, $a5, $a4 addi.d $t3, $t3, 1 - addi.d $t6, $t6, -1 - addi.d $s0, $s0, 8 + addi.d $s0, $s0, -1 + addi.d $s7, $s7, 8 addi.d $t7, $t7, 8 - beqz $t6, .LBB0_15 + beqz $s0, .LBB0_15 .LBB0_25: # %.lr.ph439 # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_17 Depth=2 @@ -271,9 +270,9 @@ placepads: # @placepads ld.w $a4, $t8, 80 bne $a4, $s5, .LBB0_16 # %bb.26: # in Loop: Header=BB0_25 Depth=3 - ld.d $s7, $t8, 152 - ld.w $a4, $s7, 60 - ld.w $a5, $s7, 56 + ld.d $s8, $t8, 152 + ld.w $a4, $s8, 60 + ld.w $a5, $s8, 56 ld.w $a7, $s2, 8 sub.w $a4, $a4, $a5 add.d $a5, $fp, $t4 @@ -291,10 +290,8 @@ placepads: # @placepads ld.d $s2, $a4, 0 ld.d $a4, $sp, 16 # 8-byte Folded Reload ld.d $s3, $a4, 0 - movgr2fr.w $fa0, $a3 - ffint.d.w $fa1, $fa0 - pcalau12i $a4, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a4, %pc_lo12(.LCPI0_0) + movgr2fr.w $fa1, $a3 + ffint.d.w $fa1, $fa1 sub.d $a4, $a3, $t5 movgr2fr.w $fa2, $a4 ffint.d.w $fa2, $fa2 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/placepin.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/placepin.s index f6591968..f42759ee 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/placepin.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/placepin.s @@ -1,56 +1,55 @@ .file "placepin.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function placepin -.LCPI0_0: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl placepin + .globl placepin # -- Begin function placepin .p2align 5 .type placepin,@function placepin: # @placepin # %bb.0: - addi.d $sp, $sp, -128 - st.d $ra, $sp, 120 # 8-byte Folded Spill - st.d $fp, $sp, 112 # 8-byte Folded Spill - st.d $s0, $sp, 104 # 8-byte Folded Spill - st.d $s1, $sp, 96 # 8-byte Folded Spill - st.d $s2, $sp, 88 # 8-byte Folded Spill - st.d $s3, $sp, 80 # 8-byte Folded Spill - st.d $s4, $sp, 72 # 8-byte Folded Spill - st.d $s5, $sp, 64 # 8-byte Folded Spill - st.d $s6, $sp, 56 # 8-byte Folded Spill - st.d $s7, $sp, 48 # 8-byte Folded Spill - st.d $s8, $sp, 40 # 8-byte Folded Spill + addi.d $sp, $sp, -112 + st.d $ra, $sp, 104 # 8-byte Folded Spill + st.d $fp, $sp, 96 # 8-byte Folded Spill + st.d $s0, $sp, 88 # 8-byte Folded Spill + st.d $s1, $sp, 80 # 8-byte Folded Spill + st.d $s2, $sp, 72 # 8-byte Folded Spill + st.d $s3, $sp, 64 # 8-byte Folded Spill + st.d $s4, $sp, 56 # 8-byte Folded Spill + st.d $s5, $sp, 48 # 8-byte Folded Spill + st.d $s6, $sp, 40 # 8-byte Folded Spill + st.d $s7, $sp, 32 # 8-byte Folded Spill + st.d $s8, $sp, 24 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(numcells) ld.d $a0, $a0, %got_pc_lo12(numcells) - st.d $a0, $sp, 24 # 8-byte Folded Spill - ld.w $a0, $a0, 0 - blez $a0, .LBB0_88 + st.d $a0, $sp, 16 # 8-byte Folded Spill + ld.w $t4, $a0, 0 + blez $t4, .LBB0_88 # %bb.1: # %.lr.ph265 - pcalau12i $a1, %got_pc_hi20(cellarray) - ld.d $a1, $a1, %got_pc_lo12(cellarray) - ld.d $a2, $a1, 0 + pcalau12i $a0, %got_pc_hi20(cellarray) + ld.d $a0, $a0, %got_pc_lo12(cellarray) + ld.d $a1, $a0, 0 ori $a3, $zero, 44 ori $a4, $zero, 2 - lu12i.w $a1, 269412 - ori $a5, $a1, 3693 - lu12i.w $a1, 3 - ori $a6, $a1, 57 - vrepli.b $vr0, 0 - ori $a1, $zero, 1 - # implicit-def: $r28 - st.d $a2, $sp, 16 # 8-byte Folded Spill + lu12i.w $a0, 269412 + ori $a5, $a0, 3693 + lu12i.w $a0, 3 + ori $a6, $a0, 57 + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fa0, $a0 + vrepli.b $vr1, 0 + ori $a0, $zero, 1 + # implicit-def: $r27 + st.d $a1, $sp, 8 # 8-byte Folded Spill b .LBB0_4 .p2align 4, , 16 .LBB0_2: # %.loopexit195.loopexit # in Loop: Header=BB0_4 Depth=1 - ld.d $a0, $sp, 24 # 8-byte Folded Reload - ld.w $a0, $a0, 0 - ld.d $a2, $sp, 16 # 8-byte Folded Reload + ld.d $a0, $sp, 16 # 8-byte Folded Reload + ld.w $t4, $a0, 0 + ld.d $a1, $sp, 8 # 8-byte Folded Reload .LBB0_3: # %.loopexit195 # in Loop: Header=BB0_4 Depth=1 - addi.d $a1, $fp, 1 - bge $fp, $a0, .LBB0_88 + addi.d $a0, $fp, 1 + bge $fp, $t4, .LBB0_88 .LBB0_4: # =>This Loop Header: Depth=1 # Child Loop BB0_9 Depth 2 # Child Loop BB0_56 Depth 3 @@ -70,14 +69,14 @@ placepin: # @placepin # Child Loop BB0_82 Depth 4 # Child Loop BB0_84 Depth 3 # Child Loop BB0_87 Depth 3 - move $fp, $a1 - slli.d $a1, $a1, 3 - ldx.d $t2, $a2, $a1 - ld.w $a1, $t2, 76 - beqz $a1, .LBB0_3 + move $fp, $a0 + slli.d $a0, $a0, 3 + ldx.d $t2, $a1, $a0 + ld.w $a0, $t2, 76 + beqz $a0, .LBB0_3 # %bb.5: # in Loop: Header=BB0_4 Depth=1 - ld.w $a1, $t2, 128 - beqz $a1, .LBB0_3 + ld.w $a0, $t2, 128 + beqz $a0, .LBB0_3 # %bb.6: # in Loop: Header=BB0_4 Depth=1 ld.w $t3, $t2, 132 blez $t3, .LBB0_3 @@ -85,8 +84,7 @@ placepin: # @placepin # in Loop: Header=BB0_4 Depth=1 ld.d $t4, $t2, 136 ld.d $t5, $t2, 144 - addi.d $a0, $t4, 88 - st.d $a0, $sp, 32 # 8-byte Folded Spill + addi.d $t6, $t4, 88 addi.d $t7, $t4, 8 addi.d $t8, $t5, 48 addi.d $s0, $t5, 4 @@ -119,586 +117,579 @@ placepin: # @placepin move $s1, $a0 mul.d $a0, $a0, $a3 add.d $s2, $t5, $a0 - ld.w $s4, $s2, 28 - beqz $s4, .LBB0_8 + ld.w $s3, $s2, 28 + beqz $s3, .LBB0_8 # %bb.10: # in Loop: Header=BB0_9 Depth=2 - ld.w $s6, $s2, 16 - pcalau12i $s3, %pc_hi20(.LCPI0_0) - beq $s6, $a4, .LBB0_14 + ld.w $s5, $s2, 16 + beq $s5, $a4, .LBB0_14 # %bb.11: # in Loop: Header=BB0_9 Depth=2 ori $a0, $zero, 1 - bne $s6, $a0, .LBB0_16 + bne $s5, $a0, .LBB0_16 # %bb.12: # in Loop: Header=BB0_9 Depth=2 ld.d $a0, $t2, 216 ld.w $a1, $s2, 20 alsl.d $a7, $a1, $a0, 3 slli.d $a1, $a1, 3 - ldx.w $a2, $a0, $a1 - ld.w $s5, $a7, 4 - bge $s5, $a2, .LBB0_20 + ldx.w $a1, $a0, $a1 + ld.w $s4, $a7, 4 + bge $s4, $a1, .LBB0_20 # %bb.13: # in Loop: Header=BB0_9 Depth=2 move $t0, $zero b .LBB0_30 .p2align 4, , 16 .LBB0_14: # in Loop: Header=BB0_9 Depth=2 - ld.d $s5, $t2, 216 + ld.d $s4, $t2, 216 ld.w $a0, $s2, 20 - alsl.d $a1, $a0, $s5, 3 + alsl.d $a7, $a0, $s4, 3 slli.d $a0, $a0, 3 - ldx.w $a2, $s5, $a0 - ld.w $s6, $a1, 4 - bge $s6, $a2, .LBB0_22 + ldx.w $a1, $s4, $a0 + ld.w $s5, $a7, 4 + bge $s5, $a1, .LBB0_22 # %bb.15: # in Loop: Header=BB0_9 Depth=2 move $t0, $zero b .LBB0_37 .p2align 4, , 16 .LBB0_16: # in Loop: Header=BB0_9 Depth=2 ori $a0, $zero, 3 - blt $s6, $a0, .LBB0_24 + blt $s5, $a0, .LBB0_24 # %bb.17: # %.preheader194 # in Loop: Header=BB0_9 Depth=2 ld.d $a0, $t2, 216 ld.w $a1, $t2, 64 alsl.d $a0, $a1, $a0, 3 - ld.w $t1, $a0, 4 - blez $t1, .LBB0_54 + ld.w $s4, $a0, 4 + blez $s4, .LBB0_54 # %bb.18: # %.lr.ph231.preheader # in Loop: Header=BB0_9 Depth=2 ori $a0, $zero, 8 - bgeu $t1, $a0, .LBB0_55 + bgeu $s4, $a0, .LBB0_55 # %bb.19: # in Loop: Header=BB0_9 Depth=2 - move $a1, $zero - ori $s5, $zero, 1 + move $t0, $zero + ori $s6, $zero, 1 b .LBB0_58 .LBB0_20: # %.lr.ph225.preheader # in Loop: Header=BB0_9 Depth=2 - sub.w $a0, $s5, $a2 - ori $a1, $zero, 7 - bgeu $a0, $a1, .LBB0_25 + sub.w $a0, $s4, $a1 + ori $a2, $zero, 7 + bgeu $a0, $a2, .LBB0_25 # %bb.21: # in Loop: Header=BB0_9 Depth=2 move $t0, $zero - move $t1, $a2 + move $s5, $a1 b .LBB0_28 .LBB0_22: # %.lr.ph.preheader # in Loop: Header=BB0_9 Depth=2 - sub.w $a0, $s6, $a2 - ori $a1, $zero, 7 - bgeu $a0, $a1, .LBB0_32 + sub.w $a0, $s5, $a1 + ori $a2, $zero, 7 + bgeu $a0, $a2, .LBB0_32 # %bb.23: # in Loop: Header=BB0_9 Depth=2 move $t0, $zero - move $t1, $a2 + move $s6, $a1 b .LBB0_35 .LBB0_24: # in Loop: Header=BB0_9 Depth=2 - move $a0, $s6 + move $a0, $s5 bnez $a0, .LBB0_64 b .LBB0_53 .LBB0_25: # %vector.ph406 # in Loop: Header=BB0_9 Depth=2 bstrpick.d $a0, $a0, 31, 0 - addi.d $a0, $a0, 1 - bstrpick.d $a7, $a0, 32, 3 - slli.d $a1, $a7, 3 - alsl.d $t1, $a7, $a2, 3 - slli.d $a7, $a2, 4 - alsl.d $a2, $a2, $a7, 2 - ld.d $a7, $sp, 32 # 8-byte Folded Reload - add.d $t0, $a7, $a2 - move $s6, $a1 - vori.b $vr1, $vr0, 0 - vori.b $vr2, $vr0, 0 + addi.d $t1, $a0, 1 + bstrpick.d $a7, $t1, 32, 3 + slli.d $a0, $a7, 3 + alsl.d $s5, $a7, $a1, 3 + slli.d $a7, $a1, 4 + alsl.d $a1, $a1, $a7, 2 + add.d $t0, $t6, $a1 + move $s6, $a0 + vori.b $vr2, $vr1, 0 + vori.b $vr3, $vr1, 0 .p2align 4, , 16 .LBB0_26: # %vector.body409 # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # => This Inner Loop Header: Depth=3 - ld.w $a2, $t0, -80 + ld.w $a1, $t0, -80 ld.w $a7, $t0, -60 ld.w $s7, $t0, -40 ld.w $s8, $t0, -20 - vinsgr2vr.w $vr3, $a2, 0 - vinsgr2vr.w $vr3, $a7, 1 - vinsgr2vr.w $vr3, $s7, 2 - vinsgr2vr.w $vr3, $s8, 3 - ld.w $a2, $t0, 0 - ld.w $a7, $t0, 20 - ld.w $s7, $t0, 40 - ld.w $s8, $t0, 60 - vinsgr2vr.w $vr4, $a2, 0 + vinsgr2vr.w $vr4, $a1, 0 vinsgr2vr.w $vr4, $a7, 1 vinsgr2vr.w $vr4, $s7, 2 vinsgr2vr.w $vr4, $s8, 3 - vadd.w $vr1, $vr3, $vr1 + ld.w $a1, $t0, 0 + ld.w $a7, $t0, 20 + ld.w $s7, $t0, 40 + ld.w $s8, $t0, 60 + vinsgr2vr.w $vr5, $a1, 0 + vinsgr2vr.w $vr5, $a7, 1 + vinsgr2vr.w $vr5, $s7, 2 + vinsgr2vr.w $vr5, $s8, 3 vadd.w $vr2, $vr4, $vr2 + vadd.w $vr3, $vr5, $vr3 addi.d $s6, $s6, -8 addi.d $t0, $t0, 160 bnez $s6, .LBB0_26 # %bb.27: # %middle.block415 # in Loop: Header=BB0_9 Depth=2 - vadd.w $vr1, $vr2, $vr1 - vhaddw.d.w $vr1, $vr1, $vr1 - vhaddw.q.d $vr1, $vr1, $vr1 - vpickve2gr.d $t0, $vr1, 0 - beq $a0, $a1, .LBB0_30 + vadd.w $vr2, $vr3, $vr2 + vhaddw.d.w $vr2, $vr2, $vr2 + vhaddw.q.d $vr2, $vr2, $vr2 + vpickve2gr.d $t0, $vr2, 0 + beq $t1, $a0, .LBB0_30 .LBB0_28: # %.lr.ph225.preheader455 # in Loop: Header=BB0_9 Depth=2 - slli.d $a0, $t1, 4 - alsl.d $a0, $t1, $a0, 2 + slli.d $a0, $s5, 4 + alsl.d $a0, $s5, $a0, 2 add.d $a0, $t7, $a0 - sub.d $a1, $s5, $t1 + sub.d $a1, $s4, $s5 addi.d $a1, $a1, 1 .p2align 4, , 16 .LBB0_29: # %.lr.ph225 # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # => This Inner Loop Header: Depth=3 - ld.w $a2, $a0, 0 - add.d $t0, $a2, $t0 + ld.w $a7, $a0, 0 + add.d $t0, $a7, $t0 addi.w $a1, $a1, -1 addi.d $a0, $a0, 20 bnez $a1, .LBB0_29 .LBB0_30: # %._crit_edge226 # in Loop: Header=BB0_9 Depth=2 addi.w $a0, $t0, 0 - blt $a0, $s4, .LBB0_89 + blt $a0, $s3, .LBB0_89 # %bb.31: # %thread-pre-split.thread # in Loop: Header=BB0_9 Depth=2 - ld.w $s5, $s2, 20 + ld.w $s4, $s2, 20 b .LBB0_64 .LBB0_32: # %vector.ph438 # in Loop: Header=BB0_9 Depth=2 bstrpick.d $a0, $a0, 31, 0 - addi.d $a0, $a0, 1 - bstrpick.d $a7, $a0, 32, 3 - slli.d $a1, $a7, 3 - alsl.d $t1, $a7, $a2, 3 - slli.d $a7, $a2, 4 - alsl.d $a2, $a2, $a7, 2 - ld.d $a7, $sp, 32 # 8-byte Folded Reload - add.d $t0, $a7, $a2 - move $s7, $a1 - vori.b $vr1, $vr0, 0 - vori.b $vr2, $vr0, 0 + addi.d $t1, $a0, 1 + bstrpick.d $a7, $t1, 32, 3 + slli.d $a0, $a7, 3 + alsl.d $s6, $a7, $a1, 3 + slli.d $a7, $a1, 4 + alsl.d $a1, $a1, $a7, 2 + add.d $t0, $t6, $a1 + move $s7, $a0 + vori.b $vr2, $vr1, 0 + vori.b $vr3, $vr1, 0 .p2align 4, , 16 .LBB0_33: # %vector.body441 # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # => This Inner Loop Header: Depth=3 - ld.w $a2, $t0, -80 + ld.w $a1, $t0, -80 ld.w $a7, $t0, -60 ld.w $s8, $t0, -40 ld.w $ra, $t0, -20 - vinsgr2vr.w $vr3, $a2, 0 - vinsgr2vr.w $vr3, $a7, 1 - vinsgr2vr.w $vr3, $s8, 2 - vinsgr2vr.w $vr3, $ra, 3 - ld.w $a2, $t0, 0 - ld.w $a7, $t0, 20 - ld.w $s8, $t0, 40 - ld.w $ra, $t0, 60 - vinsgr2vr.w $vr4, $a2, 0 + vinsgr2vr.w $vr4, $a1, 0 vinsgr2vr.w $vr4, $a7, 1 vinsgr2vr.w $vr4, $s8, 2 vinsgr2vr.w $vr4, $ra, 3 - vadd.w $vr1, $vr3, $vr1 + ld.w $a1, $t0, 0 + ld.w $a7, $t0, 20 + ld.w $s8, $t0, 40 + ld.w $ra, $t0, 60 + vinsgr2vr.w $vr5, $a1, 0 + vinsgr2vr.w $vr5, $a7, 1 + vinsgr2vr.w $vr5, $s8, 2 + vinsgr2vr.w $vr5, $ra, 3 vadd.w $vr2, $vr4, $vr2 + vadd.w $vr3, $vr5, $vr3 addi.d $s7, $s7, -8 addi.d $t0, $t0, 160 bnez $s7, .LBB0_33 # %bb.34: # %middle.block447 # in Loop: Header=BB0_9 Depth=2 - vadd.w $vr1, $vr2, $vr1 - vhaddw.d.w $vr1, $vr1, $vr1 - vhaddw.q.d $vr1, $vr1, $vr1 - vpickve2gr.d $t0, $vr1, 0 - beq $a0, $a1, .LBB0_37 + vadd.w $vr2, $vr3, $vr2 + vhaddw.d.w $vr2, $vr2, $vr2 + vhaddw.q.d $vr2, $vr2, $vr2 + vpickve2gr.d $t0, $vr2, 0 + beq $t1, $a0, .LBB0_37 .LBB0_35: # %.lr.ph.preheader457 # in Loop: Header=BB0_9 Depth=2 - slli.d $a0, $t1, 4 - alsl.d $a0, $t1, $a0, 2 + slli.d $a0, $s6, 4 + alsl.d $a0, $s6, $a0, 2 add.d $a0, $t7, $a0 - sub.d $a1, $s6, $t1 + sub.d $a1, $s5, $s6 addi.d $a1, $a1, 1 .p2align 4, , 16 .LBB0_36: # %.lr.ph # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # => This Inner Loop Header: Depth=3 - ld.w $a2, $a0, 0 - add.d $t0, $a2, $t0 + ld.w $a7, $a0, 0 + add.d $t0, $a7, $t0 addi.w $a1, $a1, -1 addi.d $a0, $a0, 20 bnez $a1, .LBB0_36 .LBB0_37: # %._crit_edge # in Loop: Header=BB0_9 Depth=2 addi.w $a0, $t0, 0 - blt $a0, $s4, .LBB0_89 + blt $a0, $s3, .LBB0_89 # %bb.38: # in Loop: Header=BB0_9 Depth=2 ld.w $a0, $s2, 24 - alsl.d $a1, $a0, $s5, 3 + alsl.d $a7, $a0, $s4, 3 slli.d $a0, $a0, 3 - ldx.w $a2, $s5, $a0 - ld.w $s5, $a1, 4 - bge $s5, $a2, .LBB0_40 + ldx.w $a1, $s4, $a0 + ld.w $s4, $a7, 4 + bge $s4, $a1, .LBB0_40 # %bb.39: # in Loop: Header=BB0_9 Depth=2 move $t0, $zero b .LBB0_47 .LBB0_40: # %.lr.ph218.preheader # in Loop: Header=BB0_9 Depth=2 - sub.w $a0, $s5, $a2 - ori $a1, $zero, 7 - bgeu $a0, $a1, .LBB0_42 + sub.w $a0, $s4, $a1 + ori $a2, $zero, 7 + bgeu $a0, $a2, .LBB0_42 # %bb.41: # in Loop: Header=BB0_9 Depth=2 move $t0, $zero - move $t1, $a2 + move $s5, $a1 b .LBB0_45 .LBB0_42: # %vector.ph422 # in Loop: Header=BB0_9 Depth=2 bstrpick.d $a0, $a0, 31, 0 - addi.d $a0, $a0, 1 - bstrpick.d $a7, $a0, 32, 3 - slli.d $a1, $a7, 3 - alsl.d $t1, $a7, $a2, 3 - slli.d $a7, $a2, 4 - alsl.d $a2, $a2, $a7, 2 - ld.d $a7, $sp, 32 # 8-byte Folded Reload - add.d $t0, $a7, $a2 - move $s6, $a1 - vori.b $vr1, $vr0, 0 - vori.b $vr2, $vr0, 0 + addi.d $t1, $a0, 1 + bstrpick.d $a7, $t1, 32, 3 + slli.d $a0, $a7, 3 + alsl.d $s5, $a7, $a1, 3 + slli.d $a7, $a1, 4 + alsl.d $a1, $a1, $a7, 2 + add.d $t0, $t6, $a1 + move $s6, $a0 + vori.b $vr2, $vr1, 0 + vori.b $vr3, $vr1, 0 .p2align 4, , 16 .LBB0_43: # %vector.body425 # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # => This Inner Loop Header: Depth=3 - ld.w $a2, $t0, -80 + ld.w $a1, $t0, -80 ld.w $a7, $t0, -60 ld.w $s7, $t0, -40 ld.w $s8, $t0, -20 - vinsgr2vr.w $vr3, $a2, 0 - vinsgr2vr.w $vr3, $a7, 1 - vinsgr2vr.w $vr3, $s7, 2 - vinsgr2vr.w $vr3, $s8, 3 - ld.w $a2, $t0, 0 - ld.w $a7, $t0, 20 - ld.w $s7, $t0, 40 - ld.w $s8, $t0, 60 - vinsgr2vr.w $vr4, $a2, 0 + vinsgr2vr.w $vr4, $a1, 0 vinsgr2vr.w $vr4, $a7, 1 vinsgr2vr.w $vr4, $s7, 2 vinsgr2vr.w $vr4, $s8, 3 - vadd.w $vr1, $vr3, $vr1 + ld.w $a1, $t0, 0 + ld.w $a7, $t0, 20 + ld.w $s7, $t0, 40 + ld.w $s8, $t0, 60 + vinsgr2vr.w $vr5, $a1, 0 + vinsgr2vr.w $vr5, $a7, 1 + vinsgr2vr.w $vr5, $s7, 2 + vinsgr2vr.w $vr5, $s8, 3 vadd.w $vr2, $vr4, $vr2 + vadd.w $vr3, $vr5, $vr3 addi.d $s6, $s6, -8 addi.d $t0, $t0, 160 bnez $s6, .LBB0_43 # %bb.44: # %middle.block431 # in Loop: Header=BB0_9 Depth=2 - vadd.w $vr1, $vr2, $vr1 - vhaddw.d.w $vr1, $vr1, $vr1 - vhaddw.q.d $vr1, $vr1, $vr1 - vpickve2gr.d $t0, $vr1, 0 - beq $a0, $a1, .LBB0_47 + vadd.w $vr2, $vr3, $vr2 + vhaddw.d.w $vr2, $vr2, $vr2 + vhaddw.q.d $vr2, $vr2, $vr2 + vpickve2gr.d $t0, $vr2, 0 + beq $t1, $a0, .LBB0_47 .LBB0_45: # %.lr.ph218.preheader456 # in Loop: Header=BB0_9 Depth=2 - slli.d $a0, $t1, 4 - alsl.d $a0, $t1, $a0, 2 + slli.d $a0, $s5, 4 + alsl.d $a0, $s5, $a0, 2 add.d $a0, $t7, $a0 - sub.d $a1, $s5, $t1 + sub.d $a1, $s4, $s5 addi.d $a1, $a1, 1 .p2align 4, , 16 .LBB0_46: # %.lr.ph218 # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # => This Inner Loop Header: Depth=3 - ld.w $a2, $a0, 0 - add.d $t0, $a2, $t0 + ld.w $a7, $a0, 0 + add.d $t0, $a7, $t0 addi.w $a1, $a1, -1 addi.d $a0, $a0, 20 bnez $a1, .LBB0_46 .LBB0_47: # %._crit_edge219 # in Loop: Header=BB0_9 Depth=2 addi.w $a0, $t0, 0 - blt $a0, $s4, .LBB0_89 + blt $a0, $s3, .LBB0_89 # %bb.48: # %.preheader193.preheader # in Loop: Header=BB0_9 Depth=2 pcalau12i $a0, %got_pc_hi20(randVar) ld.d $a1, $a0, %got_pc_lo12(randVar) - ld.w $a7, $a1, 0 - ori $a2, $zero, 2 + ld.w $t0, $a1, 0 + ori $a7, $zero, 2 .p2align 4, , 16 .LBB0_49: # %.preheader193 # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # => This Inner Loop Header: Depth=3 - movgr2fr.w $fa1, $a2 - ffint.d.w $fa1, $fa1 - mul.d $a0, $a7, $a5 - add.w $a7, $a0, $a6 - st.w $a7, $a1, 0 - fld.d $fa2, $s3, %pc_lo12(.LCPI0_0) - bstrpick.d $a0, $a7, 30, 0 + movgr2fr.w $fa2, $a7 + ffint.d.w $fa2, $fa2 + mul.d $a0, $t0, $a5 + add.w $t0, $a0, $a6 + st.w $t0, $a1, 0 + bstrpick.d $a0, $t0, 30, 0 movgr2fr.w $fa3, $a0 ffint.d.w $fa3, $fa3 - fdiv.d $fa2, $fa3, $fa2 + fdiv.d $fa3, $fa3, $fa0 ld.w $a0, $s2, 16 - fmul.d $fa1, $fa2, $fa1 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a2, $fa1 - beq $a0, $a2, .LBB0_49 + fmul.d $fa2, $fa3, $fa2 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $a7, $fa2 + beq $a0, $a7, .LBB0_49 # %bb.50: # in Loop: Header=BB0_9 Depth=2 - beqz $a2, .LBB0_52 + beqz $a7, .LBB0_52 # %bb.51: # in Loop: Header=BB0_9 Depth=2 - ld.w $s5, $s2, 24 + ld.w $s4, $s2, 24 bnez $a0, .LBB0_64 b .LBB0_53 .LBB0_52: # in Loop: Header=BB0_9 Depth=2 - ld.w $s5, $s2, 20 + ld.w $s4, $s2, 20 bnez $a0, .LBB0_64 .LBB0_53: # in Loop: Header=BB0_9 Depth=2 - ld.w $ra, $s2, 20 - vldi $vr1, -912 - move $t1, $ra + ld.w $s8, $s2, 20 + vldi $vr2, -912 + move $ra, $s8 b .LBB0_65 .LBB0_54: # in Loop: Header=BB0_9 Depth=2 - move $a1, $zero + move $t0, $zero b .LBB0_60 .LBB0_55: # %vector.ph393 # in Loop: Header=BB0_9 Depth=2 - bstrpick.d $a0, $t1, 30, 3 + bstrpick.d $a0, $s4, 30, 3 slli.d $a0, $a0, 3 - srli.d $a1, $t1, 3 - ori $s5, $zero, 1 - bstrins.d $s5, $a1, 30, 3 - ld.d $a1, $sp, 32 # 8-byte Folded Reload - move $t0, $a0 - vori.b $vr1, $vr0, 0 - vori.b $vr2, $vr0, 0 + srli.d $a1, $s4, 3 + ori $s6, $zero, 1 + bstrins.d $s6, $a1, 30, 3 + move $t0, $t6 + move $t1, $a0 + vori.b $vr2, $vr1, 0 + vori.b $vr3, $vr1, 0 .p2align 4, , 16 .LBB0_56: # %vector.body396 # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # => This Inner Loop Header: Depth=3 - ld.w $a2, $a1, -60 - ld.w $a7, $a1, -40 - ld.w $s7, $a1, -20 - ld.w $s8, $a1, 0 - vinsgr2vr.w $vr3, $a2, 0 - vinsgr2vr.w $vr3, $a7, 1 - vinsgr2vr.w $vr3, $s7, 2 - vinsgr2vr.w $vr3, $s8, 3 - ld.w $a2, $a1, 20 - ld.w $a7, $a1, 40 - ld.w $s7, $a1, 60 - ld.w $s8, $a1, 80 - vinsgr2vr.w $vr4, $a2, 0 + ld.w $a1, $t0, -60 + ld.w $a7, $t0, -40 + ld.w $s7, $t0, -20 + ld.w $s8, $t0, 0 + vinsgr2vr.w $vr4, $a1, 0 vinsgr2vr.w $vr4, $a7, 1 vinsgr2vr.w $vr4, $s7, 2 vinsgr2vr.w $vr4, $s8, 3 - vadd.w $vr1, $vr3, $vr1 + ld.w $a1, $t0, 20 + ld.w $a7, $t0, 40 + ld.w $s7, $t0, 60 + ld.w $s8, $t0, 80 + vinsgr2vr.w $vr5, $a1, 0 + vinsgr2vr.w $vr5, $a7, 1 + vinsgr2vr.w $vr5, $s7, 2 + vinsgr2vr.w $vr5, $s8, 3 vadd.w $vr2, $vr4, $vr2 - addi.d $t0, $t0, -8 - addi.d $a1, $a1, 160 - bnez $t0, .LBB0_56 + vadd.w $vr3, $vr5, $vr3 + addi.d $t1, $t1, -8 + addi.d $t0, $t0, 160 + bnez $t1, .LBB0_56 # %bb.57: # %middle.block401 # in Loop: Header=BB0_9 Depth=2 - vadd.w $vr1, $vr2, $vr1 - vhaddw.d.w $vr1, $vr1, $vr1 - vhaddw.q.d $vr1, $vr1, $vr1 - vpickve2gr.d $a1, $vr1, 0 - beq $a0, $t1, .LBB0_60 + vadd.w $vr2, $vr3, $vr2 + vhaddw.d.w $vr2, $vr2, $vr2 + vhaddw.q.d $vr2, $vr2, $vr2 + vpickve2gr.d $t0, $vr2, 0 + beq $a0, $s4, .LBB0_60 .LBB0_58: # %.lr.ph231.preheader454 # in Loop: Header=BB0_9 Depth=2 - addi.d $a0, $t1, 1 - bstrpick.d $a2, $a0, 31, 0 - slli.d $a0, $s5, 4 - alsl.d $a0, $s5, $a0, 2 + addi.d $a0, $s4, 1 + bstrpick.d $a1, $a0, 31, 0 + slli.d $a0, $s6, 4 + alsl.d $a0, $s6, $a0, 2 add.d $a0, $t7, $a0 - sub.d $a2, $a2, $s5 + sub.d $a1, $a1, $s6 .p2align 4, , 16 .LBB0_59: # %.lr.ph231 # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # => This Inner Loop Header: Depth=3 ld.w $a7, $a0, 0 - add.d $a1, $a7, $a1 - addi.d $a2, $a2, -1 + add.d $t0, $a7, $t0 + addi.d $a1, $a1, -1 addi.d $a0, $a0, 20 - bnez $a2, .LBB0_59 + bnez $a1, .LBB0_59 .LBB0_60: # %._crit_edge232 # in Loop: Header=BB0_9 Depth=2 - addi.w $a0, $a1, 0 - blt $a0, $s4, .LBB0_89 + addi.w $a0, $t0, 0 + blt $a0, $s3, .LBB0_89 # %bb.61: # %.preheader.preheader # in Loop: Header=BB0_9 Depth=2 pcalau12i $a0, %got_pc_hi20(randVar) ld.d $a1, $a0, %got_pc_lo12(randVar) - ld.w $a7, $a1, 0 + ld.w $t0, $a1, 0 .p2align 4, , 16 .LBB0_62: # %.preheader # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # => This Inner Loop Header: Depth=3 - movgr2fr.w $fa1, $s6 - ffint.d.w $fa1, $fa1 - mul.d $a0, $a7, $a5 - add.w $a7, $a0, $a6 - st.w $a7, $a1, 0 - fld.d $fa2, $s3, %pc_lo12(.LCPI0_0) - bstrpick.d $a0, $a7, 30, 0 + movgr2fr.w $fa2, $s5 + ffint.d.w $fa2, $fa2 + mul.d $a0, $t0, $a5 + add.w $t0, $a0, $a6 + st.w $t0, $a1, 0 + bstrpick.d $a0, $t0, 30, 0 movgr2fr.w $fa3, $a0 ffint.d.w $fa3, $fa3 - fdiv.d $fa2, $fa3, $fa2 + fdiv.d $fa3, $fa3, $fa0 ld.w $a0, $s2, 16 - fmul.d $fa1, $fa2, $fa1 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $s6, $fa1 - beq $a0, $s6, .LBB0_62 + fmul.d $fa2, $fa3, $fa2 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $s5, $fa2 + beq $a0, $s5, .LBB0_62 # %bb.63: # %.loopexit192 # in Loop: Header=BB0_9 Depth=2 - addi.d $s5, $s6, 1 + addi.d $s4, $s5, 1 beqz $a0, .LBB0_53 .p2align 4, , 16 .LBB0_64: # in Loop: Header=BB0_9 Depth=2 ld.d $a0, $t2, 216 - addi.w $a1, $s5, 0 - alsl.d $a2, $a1, $a0, 3 + addi.w $a1, $s4, 0 + alsl.d $a7, $a1, $a0, 3 slli.d $a1, $a1, 3 - ldx.w $ra, $a0, $a1 - ld.w $t1, $a2, 4 - sub.d $a0, $t1, $ra + ldx.w $s8, $a0, $a1 + ld.w $ra, $a7, 4 + sub.d $a0, $ra, $s8 addi.d $a0, $a0, 1 - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 + movgr2fr.w $fa2, $a0 + ffint.d.w $fa2, $fa2 .LBB0_65: # in Loop: Header=BB0_9 Depth=2 - addi.w $a0, $t1, 1 - pcalau12i $a1, %got_pc_hi20(randVar) - ld.d $t0, $a1, %got_pc_lo12(randVar) - ld.w $a1, $t0, 0 + addi.w $t1, $ra, 1 + pcalau12i $a0, %got_pc_hi20(randVar) + ld.d $a0, $a0, %got_pc_lo12(randVar) + ld.w $t0, $a0, 0 .LBB0_66: # %.critedge # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # => This Loop Header: Depth=3 # Child Loop BB0_68 Depth 4 - mul.d $a1, $a1, $a5 - add.w $a1, $a1, $a6 - fld.d $fa2, $s3, %pc_lo12(.LCPI0_0) - bstrpick.d $a2, $a1, 30, 0 - movgr2fr.w $fa3, $a2 + mul.d $a1, $t0, $a5 + add.w $t0, $a1, $a6 + bstrpick.d $a1, $t0, 30, 0 + movgr2fr.w $fa3, $a1 ffint.d.w $fa3, $fa3 - fdiv.d $fa2, $fa3, $fa2 - fmul.d $fa2, $fa1, $fa2 - ftintrz.w.d $fa2, $fa2 - movfr2gr.s $s4, $fa2 - add.w $a7, $ra, $s4 - st.w $a1, $t0, 0 - blt $t1, $a7, .LBB0_66 + fdiv.d $fa3, $fa3, $fa0 + fmul.d $fa3, $fa2, $fa3 + ftintrz.w.d $fa3, $fa3 + movfr2gr.s $s3, $fa3 + add.w $a1, $s8, $s3 + st.w $t0, $a0, 0 + blt $ra, $a1, .LBB0_66 # %bb.67: # %.lr.ph237 # in Loop: Header=BB0_66 Depth=3 - move $a2, $zero - ld.w $s6, $s2, 28 - add.d $s8, $ra, $s4 - slli.d $s4, $s8, 4 - alsl.d $s4, $s8, $s4, 2 - add.d $s7, $t4, $s4 - move $s4, $a7 + move $a7, $zero + ld.w $s5, $s2, 28 + add.d $s7, $s8, $s3 + slli.d $s3, $s7, 4 + alsl.d $s3, $s7, $s3, 2 + add.d $s6, $t4, $s3 + move $s3, $a1 .p2align 4, , 16 .LBB0_68: # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # Parent Loop BB0_66 Depth=3 # => This Inner Loop Header: Depth=4 - ld.w $t6, $s7, 8 - add.w $a2, $t6, $a2 - bge $a2, $s6, .LBB0_70 + ld.w $a2, $s6, 8 + add.w $a7, $a2, $a7 + bge $a7, $s5, .LBB0_70 # %bb.69: # in Loop: Header=BB0_68 Depth=4 - addi.w $s4, $s4, 1 - addi.d $s7, $s7, 20 - bne $a0, $s4, .LBB0_68 + addi.w $s3, $s3, 1 + addi.d $s6, $s6, 20 + bne $t1, $s3, .LBB0_68 b .LBB0_66 .p2align 4, , 16 .LBB0_70: # %.loopexit # in Loop: Header=BB0_9 Depth=2 - move $s3, $zero + move $s8, $zero move $s2, $s1 - blt $a7, $s4, .LBB0_75 + blt $a1, $s3, .LBB0_75 .LBB0_71: # %._crit_edge247 # in Loop: Header=BB0_9 Depth=2 - ld.w $a1, $s7, 0 - sub.w $a0, $s6, $s3 + ld.w $a1, $s6, 0 + sub.w $a0, $s5, $s8 add.d $a1, $a1, $a0 - st.w $a1, $s7, 0 + st.w $a1, $s6, 0 blez $a0, .LBB0_8 # %bb.72: # %.lr.ph252.preheader # in Loop: Header=BB0_9 Depth=2 add.w $a0, $s2, $a0 - addi.w $t0, $s2, 0 - addi.d $a1, $t0, 1 + addi.w $t1, $s2, 0 + addi.d $a1, $t1, 1 slt $a2, $a0, $a1 masknez $a7, $a0, $a2 maskeqz $a1, $a1, $a2 or $a1, $a1, $a7 - sub.d $a7, $a1, $t0 - bgeu $a7, $a4, .LBB0_83 + sub.d $t0, $a1, $t1 + bgeu $t0, $a4, .LBB0_83 # %bb.73: # in Loop: Header=BB0_9 Depth=2 - move $a1, $t0 + move $a1, $t1 b .LBB0_86 .p2align 4, , 16 .LBB0_74: # %._crit_edge241 # in Loop: Header=BB0_75 Depth=3 - addi.d $s8, $s8, 1 - add.d $s3, $t1, $s3 - beq $s8, $s4, .LBB0_71 + addi.d $s7, $s7, 1 + add.d $s8, $ra, $s8 + beq $s7, $s3, .LBB0_71 .LBB0_75: # %.lr.ph246 # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # => This Loop Header: Depth=3 # Child Loop BB0_79 Depth 4 # Child Loop BB0_82 Depth 4 - slli.d $a0, $s8, 4 - alsl.d $a1, $s8, $a0, 2 + slli.d $a0, $s7, 4 + alsl.d $a1, $s7, $a0, 2 add.d $a0, $t4, $a1 - ld.w $t1, $a0, 8 + ld.w $ra, $a0, 8 ldx.w $a2, $t4, $a1 move $a0, $s2 - add.d $a2, $a2, $t1 + add.d $a2, $a2, $ra stx.w $a2, $t4, $a1 - add.w $s2, $t1, $s2 - blez $t1, .LBB0_74 + add.w $s2, $ra, $s2 + blez $ra, .LBB0_74 # %bb.76: # %.lr.ph240.preheader # in Loop: Header=BB0_75 Depth=3 - addi.w $a7, $a0, 0 - addi.d $a0, $a7, 1 + addi.w $t0, $a0, 0 + addi.d $a0, $t0, 1 slt $a1, $s2, $a0 masknez $a2, $s2, $a1 maskeqz $a0, $a0, $a1 or $a0, $a0, $a2 - sub.d $a1, $a0, $a7 + sub.d $a1, $a0, $t0 bgeu $a1, $a4, .LBB0_78 # %bb.77: # in Loop: Header=BB0_75 Depth=3 - move $a0, $a7 + move $a0, $t0 b .LBB0_81 .p2align 4, , 16 .LBB0_78: # %vector.ph381 # in Loop: Header=BB0_75 Depth=3 - move $a2, $a1 - bstrins.d $a2, $zero, 0, 0 - add.d $a0, $a2, $a7 - mul.d $a7, $a7, $a3 - add.d $a7, $t8, $a7 - move $t0, $a2 + move $a7, $a1 + bstrins.d $a7, $zero, 0, 0 + add.d $a0, $a7, $t0 + mul.d $a2, $t0, $a3 + add.d $t0, $t8, $a2 + move $t1, $a7 .p2align 4, , 16 .LBB0_79: # %vector.body384 # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # Parent Loop BB0_75 Depth=3 # => This Inner Loop Header: Depth=4 - st.w $s8, $a7, -44 - st.w $s8, $a7, 0 - addi.d $t0, $t0, -2 - addi.d $a7, $a7, 88 - bnez $t0, .LBB0_79 + st.w $s7, $t0, -44 + st.w $s7, $t0, 0 + addi.d $t1, $t1, -2 + addi.d $t0, $t0, 88 + bnez $t1, .LBB0_79 # %bb.80: # %middle.block388 # in Loop: Header=BB0_75 Depth=3 - beq $a1, $a2, .LBB0_74 + beq $a1, $a7, .LBB0_74 .LBB0_81: # %.lr.ph240.preheader452 # in Loop: Header=BB0_75 Depth=3 mul.d $a1, $a0, $a3 @@ -709,59 +700,59 @@ placepin: # @placepin # Parent Loop BB0_9 Depth=2 # Parent Loop BB0_75 Depth=3 # => This Inner Loop Header: Depth=4 - st.w $s8, $a1, 0 + st.w $s7, $a1, 0 addi.d $a0, $a0, 1 addi.d $a1, $a1, 44 blt $a0, $s2, .LBB0_82 b .LBB0_74 .LBB0_83: # %vector.ph # in Loop: Header=BB0_9 Depth=2 - move $a2, $a7 - bstrins.d $a2, $zero, 0, 0 - add.d $a1, $a2, $t0 - mul.d $t0, $t0, $a3 - add.d $t0, $t8, $t0 - move $t1, $a2 + move $a7, $t0 + bstrins.d $a7, $zero, 0, 0 + add.d $a1, $a7, $t1 + mul.d $a2, $t1, $a3 + add.d $t1, $t8, $a2 + move $s2, $a7 .p2align 4, , 16 .LBB0_84: # %vector.body # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # => This Inner Loop Header: Depth=3 - st.w $s4, $t0, -44 - st.w $s4, $t0, 0 - addi.d $t1, $t1, -2 - addi.d $t0, $t0, 88 - bnez $t1, .LBB0_84 + st.w $s3, $t1, -44 + st.w $s3, $t1, 0 + addi.d $s2, $s2, -2 + addi.d $t1, $t1, 88 + bnez $s2, .LBB0_84 # %bb.85: # %middle.block # in Loop: Header=BB0_9 Depth=2 - beq $a7, $a2, .LBB0_8 + beq $t0, $a7, .LBB0_8 .LBB0_86: # %.lr.ph252.preheader453 # in Loop: Header=BB0_9 Depth=2 mul.d $a2, $a1, $a3 - add.d $a2, $s0, $a2 + add.d $a7, $s0, $a2 .p2align 4, , 16 .LBB0_87: # %.lr.ph252 # Parent Loop BB0_4 Depth=1 # Parent Loop BB0_9 Depth=2 # => This Inner Loop Header: Depth=3 - st.w $s4, $a2, 0 + st.w $s3, $a7, 0 addi.d $a1, $a1, 1 - addi.d $a2, $a2, 44 + addi.d $a7, $a7, 44 blt $a1, $a0, .LBB0_87 b .LBB0_8 .LBB0_88: # %._crit_edge266 - ld.d $s8, $sp, 40 # 8-byte Folded Reload - ld.d $s7, $sp, 48 # 8-byte Folded Reload - ld.d $s6, $sp, 56 # 8-byte Folded Reload - ld.d $s5, $sp, 64 # 8-byte Folded Reload - ld.d $s4, $sp, 72 # 8-byte Folded Reload - ld.d $s3, $sp, 80 # 8-byte Folded Reload - ld.d $s2, $sp, 88 # 8-byte Folded Reload - ld.d $s1, $sp, 96 # 8-byte Folded Reload - ld.d $s0, $sp, 104 # 8-byte Folded Reload - ld.d $fp, $sp, 112 # 8-byte Folded Reload - ld.d $ra, $sp, 120 # 8-byte Folded Reload - addi.d $sp, $sp, 128 + ld.d $s8, $sp, 24 # 8-byte Folded Reload + ld.d $s7, $sp, 32 # 8-byte Folded Reload + ld.d $s6, $sp, 40 # 8-byte Folded Reload + ld.d $s5, $sp, 48 # 8-byte Folded Reload + ld.d $s4, $sp, 56 # 8-byte Folded Reload + ld.d $s3, $sp, 64 # 8-byte Folded Reload + ld.d $s2, $sp, 72 # 8-byte Folded Reload + ld.d $s1, $sp, 80 # 8-byte Folded Reload + ld.d $s0, $sp, 88 # 8-byte Folded Reload + ld.d $fp, $sp, 96 # 8-byte Folded Reload + ld.d $ra, $sp, 104 # 8-byte Folded Reload + addi.d $sp, $sp, 112 ret .LBB0_89: pcalau12i $a0, %got_pc_hi20(fpo) diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/routenet.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/routenet.s index 7bcb9cf5..4402f302 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/routenet.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/routenet.s @@ -1,26 +1,22 @@ .file "routenet.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function routenet -.LCPI0_0: - .dword 0x4054000000000000 # double 80 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI0_1: + .p2align 4, 0x0 # -- Begin function routenet +.LCPI0_0: .word 5 # 0x5 .word 6 # 0x6 .word 7 # 0x7 .word 8 # 0x8 -.LCPI0_2: +.LCPI0_1: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI0_3: +.LCPI0_2: .word 4 # 0x4 .word 5 # 0x5 .word 6 # 0x6 .word 7 # 0x7 -.LCPI0_4: +.LCPI0_3: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -64,8 +60,10 @@ routenet: # @routenet ffint.d.l $fs0, $fa0 pcalau12i $a0, %got_pc_hi20(Mpaths) ld.d $s3, $a0, %got_pc_lo12(Mpaths) - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_0) + ori $a0, $zero, 0 + lu32i.d $a0, 262144 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs1, $a0 .p2align 4, , 16 .LBB0_2: # =>This Inner Loop Header: Depth=1 st.w $a1, $s3, 0 @@ -162,10 +160,10 @@ routenet: # @routenet .LBB0_17: # %vec.epilog.ph bstrpick.d $a6, $fp, 30, 3 slli.d $a6, $a6, 3 + pcalau12i $a7, %pc_hi20(.LCPI0_2) + vld $vr2, $a7, %pc_lo12(.LCPI0_2) pcalau12i $a7, %pc_hi20(.LCPI0_3) - vld $vr2, $a7, %pc_lo12(.LCPI0_3) - pcalau12i $a7, %pc_hi20(.LCPI0_4) - vld $vr3, $a7, %pc_lo12(.LCPI0_4) + vld $vr3, $a7, %pc_lo12(.LCPI0_3) srli.d $a7, $fp, 3 bstrins.d $a3, $a7, 30, 3 vreplgr2vr.w $vr4, $a5 @@ -199,10 +197,10 @@ routenet: # @routenet srli.d $a6, $fp, 4 ori $a5, $zero, 1 bstrins.d $a5, $a6, 30, 4 + pcalau12i $a6, %pc_hi20(.LCPI0_0) + vld $vr2, $a6, %pc_lo12(.LCPI0_0) pcalau12i $a6, %pc_hi20(.LCPI0_1) - vld $vr2, $a6, %pc_lo12(.LCPI0_1) - pcalau12i $a6, %pc_hi20(.LCPI0_2) - vld $vr3, $a6, %pc_lo12(.LCPI0_2) + vld $vr3, $a6, %pc_lo12(.LCPI0_1) vaddi.wu $vr4, $vr0, 8 addi.d $a6, $a1, 18 addi.d $a7, $a2, 18 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/savewolf.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/savewolf.s index a24c983a..5dc4a1a8 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/savewolf.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/savewolf.s @@ -117,12 +117,7 @@ savewolf: # @savewolf .Lfunc_end0: .size savewolf, .Lfunc_end0-savewolf # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function TW_oldinput -.LCPI1_0: - .dword 0x3f847ae147ae147b # double 0.01 - .text - .globl TW_oldinput + .globl TW_oldinput # -- Begin function TW_oldinput .p2align 5 .type TW_oldinput,@function TW_oldinput: # @TW_oldinput @@ -156,8 +151,11 @@ TW_oldinput: # @TW_oldinput pcalau12i $a0, %got_pc_hi20(cellarray) ld.d $s3, $a0, %got_pc_lo12(cellarray) ori $s8, $zero, 1 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI1_0) + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fs0, $a0 vldi $vr3, -928 ori $s4, $zero, 44 st.d $s2, $sp, 24 # 8-byte Folded Spill diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/scrapnet.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/scrapnet.s index 702fad65..53fd057f 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/scrapnet.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/scrapnet.s @@ -1,10 +1,6 @@ .file "scrapnet.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function scrapnet -.LCPI0_0: - .dword 0x3fe6666666666666 # double 0.69999999999999996 .text - .globl scrapnet + .globl scrapnet # -- Begin function scrapnet .p2align 5 .type scrapnet,@function scrapnet: # @scrapnet @@ -33,11 +29,14 @@ scrapnet: # @scrapnet ld.w $a0, $a0, 0 blez $a0, .LBB0_22 # %bb.1: # %.preheader.lr.ph - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_0) - movgr2fr.w $fa1, $s2 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $s2 + ffint.d.w $fa0, $fa0 + lu12i.w $a1, 419430 + ori $a1, $a1, 1638 + lu32i.d $a1, 419430 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 ori $a2, $zero, 8 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/selectpin.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/selectpin.s index bde0c07f..b61fc03d 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/selectpin.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/selectpin.s @@ -1,10 +1,6 @@ .file "selectpin.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function selectpin -.LCPI0_0: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl selectpin + .globl selectpin # -- Begin function selectpin .p2align 5 .type selectpin,@function selectpin: # @selectpin @@ -28,11 +24,12 @@ selectpin: # @selectpin ld.d $s2, $a1, %got_pc_lo12(randVar) ld.w $a1, $s2, 0 lu12i.w $a3, 269412 - pcalau12i $a4, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a4, %pc_lo12(.LCPI0_0) ori $s3, $a3, 3693 lu12i.w $a3, 3 ori $s4, $a3, 57 + lu12i.w $a3, -1024 + lu52i.d $a3, $a3, 1053 + movgr2fr.d $fs0, $a3 .p2align 4, , 16 .LBB0_1: # =>This Inner Loop Header: Depth=1 mul.d $a1, $a1, $s3 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/test2loop.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/test2loop.s index acbc8eaa..983d9efc 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/test2loop.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/test2loop.s @@ -1,12 +1,6 @@ .file "test2loop.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function test2loop -.LCPI0_0: - .dword 0x3f50000000000000 # double 9.765625E-4 -.LCPI0_1: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl test2loop + .globl test2loop # -- Begin function test2loop .p2align 5 .type test2loop,@function test2loop: # @test2loop @@ -41,8 +35,8 @@ test2loop: # @test2loop fadd.d $fa0, $fa0, $fa0 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) + lu52i.d $a0, $zero, 1013 + movgr2fr.d $fs0, $a0 fmul.d $fa0, $fa0, $fs0 pcalau12i $a0, %got_pc_hi20(bdxlength) ld.d $fp, $a0, %got_pc_lo12(bdxlength) @@ -159,44 +153,47 @@ test2loop: # @test2loop pcalau12i $a0, %got_pc_hi20(blockt) ld.d $a0, $a0, %got_pc_lo12(blockt) st.d $a0, $sp, 88 # 8-byte Folded Spill - move $a0, $zero - lu12i.w $a1, 269412 - ori $s4, $a1, 3693 - lu12i.w $a1, 3 - ori $fp, $a1, 57 + move $a2, $zero + lu12i.w $a0, 269412 + ori $s4, $a0, 3693 + lu12i.w $a0, 3 + ori $fp, $a0, 57 + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs0, $a0 ld.d $ra, $sp, 152 # 8-byte Folded Reload b .LBB0_3 .p2align 4, , 16 .LBB0_2: # %.outer.sink.split # in Loop: Header=BB0_3 Depth=1 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - addi.w $a0, $a0, 1 - pcalau12i $a1, %got_pc_hi20(funccost) - ld.d $a1, $a1, %got_pc_lo12(funccost) - ld.w $a2, $a1, 0 + ld.d $a2, $sp, 168 # 8-byte Folded Reload + addi.w $a2, $a2, 1 + pcalau12i $a0, %got_pc_hi20(funccost) + ld.d $a0, $a0, %got_pc_lo12(funccost) + ld.w $a0, $a0, 0 ld.d $a1, $sp, 80 # 8-byte Folded Reload fld.d $fa0, $a1, 0 - movgr2fr.w $fa1, $a2 + movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 fadd.d $fa0, $fa0, $fa1 fst.d $fa0, $a1, 0 - pcalau12i $a1, %got_pc_hi20(penalty) - ld.d $a1, $a1, %got_pc_lo12(penalty) - ld.w $a2, $a1, 0 + pcalau12i $a0, %got_pc_hi20(penalty) + ld.d $a0, $a0, %got_pc_lo12(penalty) + ld.w $a0, $a0, 0 ld.d $a1, $sp, 72 # 8-byte Folded Reload fld.d $fa0, $a1, 0 - movgr2fr.w $fa1, $a2 + movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 fadd.d $fa0, $fa0, $fa1 fst.d $fa0, $a1, 0 ld.d $ra, $sp, 152 # 8-byte Folded Reload - ld.d $a1, $sp, 144 # 8-byte Folded Reload - bge $a0, $a1, .LBB0_40 + ld.d $a0, $sp, 144 # 8-byte Folded Reload + bge $a2, $a0, .LBB0_40 .LBB0_3: # %.preheader240.lr.ph # =>This Loop Header: Depth=1 # Child Loop BB0_5 Depth 2 # Child Loop BB0_6 Depth 3 - st.d $a0, $sp, 168 # 8-byte Folded Spill + st.d $a2, $sp, 168 # 8-byte Folded Spill ld.d $a0, $sp, 136 # 8-byte Folded Reload ld.w $a0, $a0, 0 ld.d $a1, $sp, 176 # 8-byte Folded Reload @@ -229,15 +226,13 @@ test2loop: # @test2loop # Child Loop BB0_6 Depth 3 mul.d $a7, $a7, $s4 add.w $a7, $a7, $fp - pcalau12i $t0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $t0, %pc_lo12(.LCPI0_1) bstrpick.d $t0, $a7, 30, 0 - movgr2fr.w $fa3, $t0 - ffint.d.w $fa3, $fa3 - fdiv.d $fa3, $fa3, $fa2 - fmul.d $fa3, $fa3, $fa0 - ftintrz.w.d $fa3, $fa3 - movfr2gr.s $t0, $fa3 + movgr2fr.w $fa2, $t0 + ffint.d.w $fa2, $fa2 + fdiv.d $fa2, $fa2, $fs0 + fmul.d $fa2, $fa2, $fa0 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $t0, $fa2 beq $a0, $t0, .LBB0_5 .p2align 4, , 16 .LBB0_6: # %.preheader @@ -247,12 +242,12 @@ test2loop: # @test2loop mul.d $a7, $a7, $s4 add.w $a7, $a7, $fp bstrpick.d $t1, $a7, 30, 0 - movgr2fr.w $fa3, $t1 - ffint.d.w $fa3, $fa3 - fdiv.d $fa3, $fa3, $fa2 - fmul.d $fa3, $fa3, $fa1 - ftintrz.w.d $fa3, $fa3 - movfr2gr.s $t1, $fa3 + movgr2fr.w $fa2, $t1 + ffint.d.w $fa2, $fa2 + fdiv.d $fa2, $fa2, $fs0 + fmul.d $fa2, $fa2, $fa1 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $t1, $fa2 addi.w $s7, $t1, 1 beq $s7, $a1, .LBB0_6 # %bb.7: # in Loop: Header=BB0_5 Depth=2 @@ -330,29 +325,29 @@ test2loop: # @test2loop div.w $t5, $t5, $s7 sub.d $t4, $t4, $t2 addi.d $t4, $t4, 1 - movgr2fr.w $fa3, $t4 - ffint.d.w $fa3, $fa3 + movgr2fr.w $fa2, $t4 + ffint.d.w $fa2, $fa2 mul.d $a7, $a7, $s4 add.d $a7, $a7, $fp bstrpick.d $t4, $a7, 30, 0 - movgr2fr.w $fa4, $t4 - ffint.d.w $fa4, $fa4 - fdiv.d $fa4, $fa4, $fa2 - fmul.d $fa3, $fa4, $fa3 - ftintrz.w.d $fa3, $fa3 - movfr2gr.s $t4, $fa3 + movgr2fr.w $fa3, $t4 + ffint.d.w $fa3, $fa3 + fdiv.d $fa3, $fa3, $fs0 + fmul.d $fa2, $fa3, $fa2 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $t4, $fa2 add.w $t2, $t2, $t4 sub.d $t4, $t5, $t3 addi.d $t4, $t4, 1 - movgr2fr.w $fa3, $t4 - ffint.d.w $fa3, $fa3 + movgr2fr.w $fa2, $t4 + ffint.d.w $fa2, $fa2 mul.d $a7, $a7, $s4 add.w $a7, $a7, $fp bstrpick.d $t4, $a7, 30, 0 - movgr2fr.w $fa4, $t4 - ffint.d.w $fa4, $fa4 - fdiv.d $fa2, $fa4, $fa2 - fmul.d $fa2, $fa2, $fa3 + movgr2fr.w $fa3, $t4 + ffint.d.w $fa3, $fa3 + fdiv.d $fa3, $fa3, $fs0 + fmul.d $fa2, $fa3, $fa2 ftintrz.w.d $fa2, $fa2 movfr2gr.s $t4, $fa2 add.w $t3, $t3, $t4 @@ -598,13 +593,14 @@ test2loop: # @test2loop bnez $a0, .LBB0_2 .LBB0_38: # in Loop: Header=BB0_3 Depth=1 ld.d $ra, $sp, 152 # 8-byte Folded Reload - ld.d $a0, $sp, 168 # 8-byte Folded Reload - ld.d $a1, $sp, 144 # 8-byte Folded Reload - blt $a0, $a1, .LBB0_3 + ld.d $a2, $sp, 168 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload + blt $a2, $a0, .LBB0_3 b .LBB0_40 .LBB0_39: - move $a0, $zero + move $a2, $zero .LBB0_40: # %.outer._crit_edge + move $a0, $a2 fld.d $fs0, $sp, 192 # 8-byte Folded Reload ld.d $s8, $sp, 200 # 8-byte Folded Reload ld.d $s7, $sp, 208 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/testloop.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/testloop.s index be7e11a5..4499850f 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/testloop.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/testloop.s @@ -1,12 +1,6 @@ .file "testloop.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function testloop -.LCPI0_0: - .dword 0x3f50000000000000 # double 9.765625E-4 -.LCPI0_1: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl testloop + .globl testloop # -- Begin function testloop .p2align 5 .type testloop,@function testloop: # @testloop @@ -37,8 +31,8 @@ testloop: # @testloop fadd.d $fa0, $fa0, $fa0 pcaddu18i $ra, %call36(exp2) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) + lu52i.d $a0, $zero, 1013 + movgr2fr.d $fs0, $a0 fmul.d $fa0, $fa0, $fs0 pcalau12i $a0, %got_pc_hi20(bdxlength) ld.d $fp, $a0, %got_pc_lo12(bdxlength) @@ -160,6 +154,9 @@ testloop: # @testloop ori $s4, $a1, 3693 lu12i.w $a1, 3 ori $fp, $a1, 57 + lu12i.w $a1, -1024 + lu52i.d $a1, $a1, 1053 + movgr2fr.d $fs0, $a1 ld.d $ra, $sp, 136 # 8-byte Folded Reload b .LBB0_3 .p2align 4, , 16 @@ -216,15 +213,13 @@ testloop: # @testloop # Child Loop BB0_6 Depth 3 mul.d $a7, $a7, $s4 add.w $a7, $a7, $fp - pcalau12i $t0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $t0, %pc_lo12(.LCPI0_1) bstrpick.d $t0, $a7, 30, 0 - movgr2fr.w $fa3, $t0 - ffint.d.w $fa3, $fa3 - fdiv.d $fa3, $fa3, $fa2 - fmul.d $fa3, $fa3, $fa0 - ftintrz.w.d $fa3, $fa3 - movfr2gr.s $t0, $fa3 + movgr2fr.w $fa2, $t0 + ffint.d.w $fa2, $fa2 + fdiv.d $fa2, $fa2, $fs0 + fmul.d $fa2, $fa2, $fa0 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $t0, $fa2 beq $a0, $t0, .LBB0_5 .p2align 4, , 16 .LBB0_6: # %.preheader @@ -234,12 +229,12 @@ testloop: # @testloop mul.d $a7, $a7, $s4 add.w $a7, $a7, $fp bstrpick.d $t1, $a7, 30, 0 - movgr2fr.w $fa3, $t1 - ffint.d.w $fa3, $fa3 - fdiv.d $fa3, $fa3, $fa2 - fmul.d $fa3, $fa3, $fa1 - ftintrz.w.d $fa3, $fa3 - movfr2gr.s $t1, $fa3 + movgr2fr.w $fa2, $t1 + ffint.d.w $fa2, $fa2 + fdiv.d $fa2, $fa2, $fs0 + fmul.d $fa2, $fa2, $fa1 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $t1, $fa2 addi.w $s7, $t1, 1 beq $s7, $a1, .LBB0_6 # %bb.7: # in Loop: Header=BB0_5 Depth=2 @@ -317,29 +312,29 @@ testloop: # @testloop div.w $t5, $t5, $s7 sub.d $t4, $t4, $t2 addi.d $t4, $t4, 1 - movgr2fr.w $fa3, $t4 - ffint.d.w $fa3, $fa3 + movgr2fr.w $fa2, $t4 + ffint.d.w $fa2, $fa2 mul.d $a7, $a7, $s4 add.d $a7, $a7, $fp bstrpick.d $t4, $a7, 30, 0 - movgr2fr.w $fa4, $t4 - ffint.d.w $fa4, $fa4 - fdiv.d $fa4, $fa4, $fa2 - fmul.d $fa3, $fa4, $fa3 - ftintrz.w.d $fa3, $fa3 - movfr2gr.s $t4, $fa3 + movgr2fr.w $fa3, $t4 + ffint.d.w $fa3, $fa3 + fdiv.d $fa3, $fa3, $fs0 + fmul.d $fa2, $fa3, $fa2 + ftintrz.w.d $fa2, $fa2 + movfr2gr.s $t4, $fa2 add.w $t2, $t2, $t4 sub.d $t4, $t5, $t3 addi.d $t4, $t4, 1 - movgr2fr.w $fa3, $t4 - ffint.d.w $fa3, $fa3 + movgr2fr.w $fa2, $t4 + ffint.d.w $fa2, $fa2 mul.d $a7, $a7, $s4 add.w $a7, $a7, $fp bstrpick.d $t4, $a7, 30, 0 - movgr2fr.w $fa4, $t4 - ffint.d.w $fa4, $fa4 - fdiv.d $fa2, $fa4, $fa2 - fmul.d $fa2, $fa2, $fa3 + movgr2fr.w $fa3, $t4 + ffint.d.w $fa3, $fa3 + fdiv.d $fa3, $fa3, $fs0 + fmul.d $fa2, $fa3, $fa2 ftintrz.w.d $fa2, $fa2 movfr2gr.s $t4, $fa2 add.w $t3, $t3, $t4 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/twstats.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/twstats.s index 176b0142..a2f94a70 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/twstats.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/twstats.s @@ -1,10 +1,6 @@ .file "twstats.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function twstats -.LCPI0_0: - .dword 0x4059000000000000 # double 100 .text - .globl twstats + .globl twstats # -- Begin function twstats .p2align 5 .type twstats,@function twstats: # @twstats @@ -15,32 +11,33 @@ twstats: # @twstats st.d $s0, $sp, 24 # 8-byte Folded Spill st.d $s1, $sp, 16 # 8-byte Folded Spill st.d $s2, $sp, 8 # 8-byte Folded Spill - st.d $s3, $sp, 0 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(fpo) ld.d $fp, $a0, %got_pc_lo12(fpo) ld.d $a0, $fp, 0 pcalau12i $a1, %got_pc_hi20(icost) - ld.d $s1, $a1, %got_pc_lo12(icost) - ld.w $a2, $s1, 0 + ld.d $s0, $a1, %got_pc_lo12(icost) + ld.w $a2, $s0, 0 pcalau12i $a1, %got_pc_hi20(fcost) - ld.d $s2, $a1, %got_pc_lo12(fcost) - ld.w $a3, $s2, 0 + ld.d $s1, $a1, %got_pc_lo12(fcost) + ld.w $a3, $s1, 0 pcalau12i $a1, %pc_hi20(.L.str) addi.d $a1, $a1, %pc_lo12(.L.str) pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 - ld.w $a1, $s1, 0 - pcalau12i $s0, %pc_hi20(.LCPI0_0) + ld.w $a1, $s0, 0 beqz $a1, .LBB0_2 # %bb.1: - ld.w $a2, $s2, 0 + ld.w $a2, $s1, 0 ld.d $a0, $fp, 0 movgr2fr.w $fa0, $a2 ffint.d.w $fa0, $fa0 - fld.d $fa1, $s0, %pc_lo12(.LCPI0_0) - movgr2fr.w $fa2, $a1 - ffint.d.w $fa2, $fa2 - fdiv.d $fa0, $fa0, $fa2 + movgr2fr.w $fa1, $a1 + ffint.d.w $fa1, $fa1 + fdiv.d $fa0, $fa0, $fa1 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 @@ -53,27 +50,30 @@ twstats: # @twstats .LBB0_2: ld.d $a0, $fp, 0 pcalau12i $a1, %got_pc_hi20(iwire) - ld.d $s2, $a1, %got_pc_lo12(iwire) - ld.w $a2, $s2, 0 + ld.d $s1, $a1, %got_pc_lo12(iwire) + ld.w $a2, $s1, 0 pcalau12i $a1, %got_pc_hi20(fwire) - ld.d $s3, $a1, %got_pc_lo12(fwire) - ld.w $a3, $s3, 0 + ld.d $s2, $a1, %got_pc_lo12(fwire) + ld.w $a3, $s2, 0 pcalau12i $a1, %pc_hi20(.L.str.2) addi.d $a1, $a1, %pc_lo12(.L.str.2) pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 - ld.w $a0, $s1, 0 + ld.w $a0, $s0, 0 beqz $a0, .LBB0_4 # %bb.3: - ld.w $a1, $s3, 0 + ld.w $a1, $s2, 0 + ld.w $a2, $s1, 0 ld.d $a0, $fp, 0 - ld.w $a2, $s2, 0 movgr2fr.w $fa0, $a1 ffint.d.w $fa0, $fa0 - fld.d $fa1, $s0, %pc_lo12(.LCPI0_0) - movgr2fr.w $fa2, $a2 - ffint.d.w $fa2, $fa2 - fdiv.d $fa0, $fa0, $fa2 + movgr2fr.w $fa1, $a2 + ffint.d.w $fa1, $fa1 + fdiv.d $fa0, $fa0, $fa1 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 @@ -86,26 +86,29 @@ twstats: # @twstats .LBB0_4: ld.d $a0, $fp, 0 pcalau12i $a1, %got_pc_hi20(iwirex) - ld.d $s2, $a1, %got_pc_lo12(iwirex) - ld.w $a2, $s2, 0 + ld.d $s1, $a1, %got_pc_lo12(iwirex) + ld.w $a2, $s1, 0 pcalau12i $a1, %got_pc_hi20(fwirex) - ld.d $s1, $a1, %got_pc_lo12(fwirex) - ld.w $a3, $s1, 0 + ld.d $s0, $a1, %got_pc_lo12(fwirex) + ld.w $a3, $s0, 0 pcalau12i $a1, %pc_hi20(.L.str.4) addi.d $a1, $a1, %pc_lo12(.L.str.4) pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 - ld.w $a1, $s2, 0 + ld.w $a1, $s1, 0 beqz $a1, .LBB0_6 # %bb.5: - ld.w $a2, $s1, 0 + ld.w $a2, $s0, 0 ld.d $a0, $fp, 0 movgr2fr.w $fa0, $a2 ffint.d.w $fa0, $fa0 - fld.d $fa1, $s0, %pc_lo12(.LCPI0_0) - movgr2fr.w $fa2, $a1 - ffint.d.w $fa2, $fa2 - fdiv.d $fa0, $fa0, $fa2 + movgr2fr.w $fa1, $a1 + ffint.d.w $fa1, $fa1 + fdiv.d $fa0, $fa0, $fa1 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 @@ -118,26 +121,29 @@ twstats: # @twstats .LBB0_6: ld.d $a0, $fp, 0 pcalau12i $a1, %got_pc_hi20(iwirey) - ld.d $s2, $a1, %got_pc_lo12(iwirey) - ld.w $a2, $s2, 0 + ld.d $s1, $a1, %got_pc_lo12(iwirey) + ld.w $a2, $s1, 0 pcalau12i $a1, %got_pc_hi20(fwirey) - ld.d $s1, $a1, %got_pc_lo12(fwirey) - ld.w $a3, $s1, 0 + ld.d $s0, $a1, %got_pc_lo12(fwirey) + ld.w $a3, $s0, 0 pcalau12i $a1, %pc_hi20(.L.str.6) addi.d $a1, $a1, %pc_lo12(.L.str.6) pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 - ld.w $a1, $s2, 0 + ld.w $a1, $s1, 0 beqz $a1, .LBB0_8 # %bb.7: - ld.w $a2, $s1, 0 + ld.w $a2, $s0, 0 ld.d $a0, $fp, 0 movgr2fr.w $fa0, $a2 ffint.d.w $fa0, $fa0 - fld.d $fa1, $s0, %pc_lo12(.LCPI0_0) - movgr2fr.w $fa2, $a1 - ffint.d.w $fa2, $fa2 - fdiv.d $fa0, $fa0, $fa2 + movgr2fr.w $fa1, $a1 + ffint.d.w $fa1, $fa1 + fdiv.d $fa0, $fa0, $fa1 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 @@ -185,7 +191,6 @@ twstats: # @twstats ld.w $a2, $a1, 0 pcalau12i $a1, %pc_hi20(.L.str.12) addi.d $a1, $a1, %pc_lo12(.L.str.12) - ld.d $s3, $sp, 0 # 8-byte Folded Reload ld.d $s2, $sp, 8 # 8-byte Folded Reload ld.d $s1, $sp, 16 # 8-byte Folded Reload ld.d $s0, $sp, 24 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/uaspect.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/uaspect.s index 5cbb61cb..e067976e 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/uaspect.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/uaspect.s @@ -1,10 +1,6 @@ .file "uaspect.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function uaspect -.LCPI0_0: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl uaspect + .globl uaspect # -- Begin function uaspect .p2align 5 .type uaspect,@function uaspect: # @uaspect @@ -313,12 +309,13 @@ uaspect: # @uaspect lu12i.w $a2, 3 ori $a2, $a2, 57 add.d $a1, $a1, $a2 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI0_0) bstrpick.d $a2, $a1, 30, 0 - movgr2fr.w $fa2, $a2 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a2 + ffint.d.w $fa1, $fa1 + lu12i.w $a2, -1024 + lu52i.d $a2, $a2, 1053 + movgr2fr.d $fa2, $a2 + fdiv.d $fa1, $fa1, $fa2 fcmp.cule.d $fcc0, $fa0, $fa1 st.w $a1, $a0, 0 bcnez $fcc0, .LBB0_27 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/uloop.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/uloop.s index 7d812273..6fb3ca0a 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/uloop.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/uloop.s @@ -1,12 +1,6 @@ .file "uloop.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function uloop -.LCPI0_0: - .dword 0x41dfffffffc00000 # double 2147483647 -.LCPI0_1: - .dword 0x3f847ae147ae147b # double 0.01 .text - .globl uloop + .globl uloop # -- Begin function uloop .p2align 5 .type uloop,@function uloop: # @uloop @@ -24,18 +18,19 @@ uloop: # @uloop st.d $s7, $sp, 384 # 8-byte Folded Spill st.d $s8, $sp, 376 # 8-byte Folded Spill fst.d $fs0, $sp, 368 # 8-byte Folded Spill + fst.d $fs1, $sp, 360 # 8-byte Folded Spill pcalau12i $s6, %pc_hi20(flips) st.w $zero, $s6, %pc_lo12(flips) pcalau12i $a0, %pc_hi20(flipp) - st.d $a0, $sp, 48 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill st.w $zero, $a0, %pc_lo12(flipp) pcalau12i $a0, %pc_hi20(attp) - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill st.w $zero, $a0, %pc_lo12(attp) pcalau12i $a0, %got_pc_hi20(rangeLimit) ld.d $a0, $a0, %got_pc_lo12(rangeLimit) ld.w $a0, $a0, 0 - st.d $a0, $sp, 184 # 8-byte Folded Spill + st.d $a0, $sp, 176 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(count) ld.d $a0, $a0, %got_pc_lo12(count) ld.w $a0, $a0, 0 @@ -46,7 +41,7 @@ uloop: # @uloop ld.d $a0, $fp, 0 pcalau12i $a1, %pc_hi20(.L.str) addi.d $a1, $a1, %pc_lo12(.L.str) - ld.d $s0, $sp, 184 # 8-byte Folded Reload + ld.d $s0, $sp, 176 # 8-byte Folded Reload move $a2, $s0 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 @@ -75,65 +70,71 @@ uloop: # @uloop .LBB0_2: pcalau12i $a0, %got_pc_hi20(attmax) ld.d $a0, $a0, %got_pc_lo12(attmax) - st.d $a0, $sp, 336 # 8-byte Folded Spill + st.d $a0, $sp, 328 # 8-byte Folded Spill ld.w $a0, $a0, 0 blez $a0, .LBB0_4 # %bb.3: # %.preheader856.lr.ph.preheader pcalau12i $a0, %got_pc_hi20(randVar) - ld.d $s2, $a0, %got_pc_lo12(randVar) + ld.d $s3, $a0, %got_pc_lo12(randVar) pcalau12i $a0, %got_pc_hi20(choose) ld.d $a0, $a0, %got_pc_lo12(choose) - st.d $a0, $sp, 328 # 8-byte Folded Spill + st.d $a0, $sp, 320 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(bigcell) ld.d $a0, $a0, %got_pc_lo12(bigcell) - st.d $a0, $sp, 320 # 8-byte Folded Spill + st.d $a0, $sp, 312 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(toobig) ld.d $a0, $a0, %got_pc_lo12(toobig) - st.d $a0, $sp, 312 # 8-byte Folded Spill + st.d $a0, $sp, 304 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(cellarray) ld.d $a0, $a0, %got_pc_lo12(cellarray) - st.d $a0, $sp, 304 # 8-byte Folded Spill + st.d $a0, $sp, 296 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(numcells) ld.d $a6, $a0, %got_pc_lo12(numcells) - st.d $zero, $sp, 352 # 8-byte Folded Spill - st.d $zero, $sp, 272 # 8-byte Folded Spill - st.d $zero, $sp, 120 # 8-byte Folded Spill - st.d $zero, $sp, 72 # 8-byte Folded Spill - st.d $zero, $sp, 200 # 8-byte Folded Spill + st.d $zero, $sp, 344 # 8-byte Folded Spill + st.d $zero, $sp, 264 # 8-byte Folded Spill st.d $zero, $sp, 112 # 8-byte Folded Spill - st.d $zero, $sp, 296 # 8-byte Folded Spill - st.d $zero, $sp, 192 # 8-byte Folded Spill - st.d $zero, $sp, 96 # 8-byte Folded Spill st.d $zero, $sp, 64 # 8-byte Folded Spill - st.d $zero, $sp, 128 # 8-byte Folded Spill - st.d $zero, $sp, 80 # 8-byte Folded Spill - st.d $zero, $sp, 248 # 8-byte Folded Spill + st.d $zero, $sp, 192 # 8-byte Folded Spill + st.d $zero, $sp, 104 # 8-byte Folded Spill + st.d $zero, $sp, 288 # 8-byte Folded Spill + st.d $zero, $sp, 184 # 8-byte Folded Spill + st.d $zero, $sp, 88 # 8-byte Folded Spill + st.d $zero, $sp, 56 # 8-byte Folded Spill + st.d $zero, $sp, 120 # 8-byte Folded Spill + st.d $zero, $sp, 72 # 8-byte Folded Spill + st.d $zero, $sp, 240 # 8-byte Folded Spill lu12i.w $a0, 269412 ori $s7, $a0, 3693 lu12i.w $a0, 3 ori $fp, $a0, 57 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, -1024 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 244 ori $a0, $a0, 576 - st.d $a0, $sp, 56 # 8-byte Folded Spill - st.d $s6, $sp, 136 # 8-byte Folded Spill - st.d $s2, $sp, 264 # 8-byte Folded Spill - st.d $a6, $sp, 344 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fs1, $a0 + st.d $s6, $sp, 128 # 8-byte Folded Spill + st.d $s3, $sp, 256 # 8-byte Folded Spill + st.d $a6, $sp, 336 # 8-byte Folded Spill b .LBB0_13 .LBB0_4: - st.d $zero, $sp, 248 # 8-byte Folded Spill - st.d $zero, $sp, 80 # 8-byte Folded Spill - st.d $zero, $sp, 128 # 8-byte Folded Spill - st.d $zero, $sp, 64 # 8-byte Folded Spill - st.d $zero, $sp, 96 # 8-byte Folded Spill - st.d $zero, $sp, 192 # 8-byte Folded Spill - st.d $zero, $sp, 296 # 8-byte Folded Spill - st.d $zero, $sp, 112 # 8-byte Folded Spill - st.d $zero, $sp, 200 # 8-byte Folded Spill + st.d $zero, $sp, 240 # 8-byte Folded Spill st.d $zero, $sp, 72 # 8-byte Folded Spill st.d $zero, $sp, 120 # 8-byte Folded Spill - st.d $zero, $sp, 272 # 8-byte Folded Spill + st.d $zero, $sp, 56 # 8-byte Folded Spill + st.d $zero, $sp, 88 # 8-byte Folded Spill + st.d $zero, $sp, 184 # 8-byte Folded Spill + st.d $zero, $sp, 288 # 8-byte Folded Spill + st.d $zero, $sp, 104 # 8-byte Folded Spill + st.d $zero, $sp, 192 # 8-byte Folded Spill + st.d $zero, $sp, 64 # 8-byte Folded Spill + st.d $zero, $sp, 112 # 8-byte Folded Spill + st.d $zero, $sp, 264 # 8-byte Folded Spill .LBB0_5: # %.outer._crit_edge pcalau12i $a0, %got_pc_hi20(fpo) ld.d $fp, $a0, %got_pc_lo12(fpo) @@ -147,28 +148,28 @@ uloop: # @uloop ld.d $a0, $fp, 0 pcalau12i $a1, %pc_hi20(.L.str.5) addi.d $a1, $a1, %pc_lo12(.L.str.5) - ld.d $a2, $sp, 192 # 8-byte Folded Reload - ld.d $a3, $sp, 272 # 8-byte Folded Reload + ld.d $a2, $sp, 184 # 8-byte Folded Reload + ld.d $a3, $sp, 264 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ld.d $a0, $fp, 0 pcalau12i $a1, %pc_hi20(.L.str.6) addi.d $a1, $a1, %pc_lo12(.L.str.6) - ld.d $a2, $sp, 80 # 8-byte Folded Reload - ld.d $a3, $sp, 112 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload + ld.d $a3, $sp, 104 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ld.d $a0, $fp, 0 pcalau12i $a1, %pc_hi20(.L.str.7) addi.d $a1, $a1, %pc_lo12(.L.str.7) - ld.d $a2, $sp, 248 # 8-byte Folded Reload - ld.d $a3, $sp, 296 # 8-byte Folded Reload + ld.d $a2, $sp, 240 # 8-byte Folded Reload + ld.d $a3, $sp, 288 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ld.d $a0, $fp, 0 - ld.d $a1, $sp, 48 # 8-byte Folded Reload - ld.w $a2, $a1, %pc_lo12(flipp) ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.w $a2, $a1, %pc_lo12(flipp) + ld.d $a1, $sp, 32 # 8-byte Folded Reload ld.w $a3, $a1, %pc_lo12(attp) pcalau12i $a1, %pc_hi20(.L.str.8) addi.d $a1, $a1, %pc_lo12(.L.str.8) @@ -177,28 +178,29 @@ uloop: # @uloop ld.d $a0, $fp, 0 pcalau12i $a1, %pc_hi20(.L.str.9) addi.d $a1, $a1, %pc_lo12(.L.str.9) - ld.d $a2, $sp, 96 # 8-byte Folded Reload - ld.d $a3, $sp, 120 # 8-byte Folded Reload + ld.d $a2, $sp, 88 # 8-byte Folded Reload + ld.d $a3, $sp, 112 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ld.d $a0, $fp, 0 pcalau12i $a1, %pc_hi20(.L.str.10) addi.d $a1, $a1, %pc_lo12(.L.str.10) - ld.d $a2, $sp, 128 # 8-byte Folded Reload - ld.d $a3, $sp, 200 # 8-byte Folded Reload + ld.d $a2, $sp, 120 # 8-byte Folded Reload + ld.d $a3, $sp, 192 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ld.d $a0, $fp, 0 pcalau12i $a1, %pc_hi20(.L.str.11) addi.d $a1, $a1, %pc_lo12(.L.str.11) - ld.d $a2, $sp, 64 # 8-byte Folded Reload - ld.d $a3, $sp, 72 # 8-byte Folded Reload + ld.d $a2, $sp, 56 # 8-byte Folded Reload + ld.d $a3, $sp, 64 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ld.d $a0, $fp, 0 ld.w $a2, $s6, %pc_lo12(flips) pcalau12i $a1, %pc_hi20(.L.str.12) addi.d $a1, $a1, %pc_lo12(.L.str.12) + fld.d $fs1, $sp, 360 # 8-byte Folded Reload fld.d $fs0, $sp, 368 # 8-byte Folded Reload ld.d $s8, $sp, 376 # 8-byte Folded Reload ld.d $s7, $sp, 384 # 8-byte Folded Reload @@ -225,61 +227,61 @@ uloop: # @uloop .LBB0_8: # %.thread826 # in Loop: Header=BB0_13 Depth=1 move $a0, $s8 - move $a1, $s1 + move $a1, $s2 pcaddu18i $ra, %call36(usite2) jirl $ra, $ra, 0 - ld.d $a1, $sp, 200 # 8-byte Folded Reload + ld.d $a1, $sp, 192 # 8-byte Folded Reload addi.w $a1, $a1, 1 - st.d $a1, $sp, 200 # 8-byte Folded Spill + st.d $a1, $sp, 192 # 8-byte Folded Spill beqz $a0, .LBB0_46 # %bb.9: # in Loop: Header=BB0_13 Depth=1 - ld.d $s6, $sp, 136 # 8-byte Folded Reload + ld.d $s6, $sp, 128 # 8-byte Folded Reload ld.w $a0, $s6, %pc_lo12(flips) addi.d $a0, $a0, 1 st.w $a0, $s6, %pc_lo12(flips) - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload addi.w $a0, $a0, 1 - st.d $a0, $sp, 128 # 8-byte Folded Spill - ld.d $a0, $sp, 352 # 8-byte Folded Reload + st.d $a0, $sp, 120 # 8-byte Folded Spill + ld.d $a0, $sp, 344 # 8-byte Folded Reload addi.w $a0, $a0, 1 .LBB0_10: # %.outer # in Loop: Header=BB0_13 Depth=1 - st.d $a0, $sp, 352 # 8-byte Folded Spill + st.d $a0, $sp, 344 # 8-byte Folded Spill move $a0, $s5 - ld.d $a1, $sp, 216 # 8-byte Folded Reload - ld.d $a2, $sp, 208 # 8-byte Folded Reload - ld.d $a3, $sp, 144 # 8-byte Folded Reload + ld.d $a1, $sp, 208 # 8-byte Folded Reload + ld.d $a2, $sp, 200 # 8-byte Folded Reload + ld.d $a3, $sp, 136 # 8-byte Folded Reload pcaddu18i $ra, %call36(fixSpot) jirl $ra, $ra, 0 move $a0, $s4 - ld.d $a1, $sp, 232 # 8-byte Folded Reload - ld.d $a2, $sp, 224 # 8-byte Folded Reload - ld.d $a3, $sp, 152 # 8-byte Folded Reload + ld.d $a1, $sp, 224 # 8-byte Folded Reload + ld.d $a2, $sp, 216 # 8-byte Folded Reload + ld.d $a3, $sp, 144 # 8-byte Folded Reload pcaddu18i $ra, %call36(fixSpot) jirl $ra, $ra, 0 .LBB0_11: # %.outer # in Loop: Header=BB0_13 Depth=1 - ld.d $a6, $sp, 344 # 8-byte Folded Reload + ld.d $a6, $sp, 336 # 8-byte Folded Reload .LBB0_12: # %.outer # in Loop: Header=BB0_13 Depth=1 - ld.d $a0, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload ld.w $a0, $a0, 0 - ld.d $a1, $sp, 352 # 8-byte Folded Reload + ld.d $a1, $sp, 344 # 8-byte Folded Reload bge $a1, $a0, .LBB0_5 .LBB0_13: # %.preheader856.lr.ph # =>This Loop Header: Depth=1 # Child Loop BB0_15 Depth 2 # Child Loop BB0_16 Depth 3 # Child Loop BB0_38 Depth 2 - ld.d $a0, $sp, 328 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.w $a2, $a0, 0 - ld.w $a0, $s2, 0 + ld.w $a0, $s3, 0 movgr2fr.w $fa0, $a2 - ld.d $a1, $sp, 320 # 8-byte Folded Reload - ld.w $a5, $a1, 0 ld.d $a1, $sp, 312 # 8-byte Folded Reload - ld.w $a3, $a1, 0 + ld.w $a5, $a1, 0 ld.d $a1, $sp, 304 # 8-byte Folded Reload + ld.w $a3, $a1, 0 + ld.d $a1, $sp, 296 # 8-byte Folded Reload ld.d $a4, $a1, 0 ld.w $ra, $a6, 0 ffint.d.w $fa0, $fa0 @@ -316,38 +318,38 @@ uloop: # @uloop fdiv.d $fa2, $fa2, $fs0 fmul.d $fa2, $fa2, $fa1 ftintrz.w.d $fa2, $fa2 - movfr2gr.s $s3, $fa2 - addi.w $s1, $s3, 1 - beq $s1, $a3, .LBB0_16 + movfr2gr.s $s1, $fa2 + addi.w $s2, $s1, 1 + beq $s2, $a3, .LBB0_16 # %bb.17: # in Loop: Header=BB0_15 Depth=2 - beq $s0, $s3, .LBB0_15 + beq $s0, $s1, .LBB0_15 # %bb.18: # in Loop: Header=BB0_15 Depth=2 addi.w $s8, $s0, 1 slli.d $a5, $s8, 3 ldx.d $s5, $a4, $a5 blt $s0, $ra, .LBB0_20 # %bb.19: # in Loop: Header=BB0_15 Depth=2 - blt $s3, $ra, .LBB0_26 + blt $s1, $ra, .LBB0_26 .LBB0_20: # in Loop: Header=BB0_15 Depth=2 ld.w $a5, $s5, 8 bge $s0, $ra, .LBB0_22 # %bb.21: # in Loop: Header=BB0_15 Depth=2 - bge $s3, $ra, .LBB0_14 + bge $s1, $ra, .LBB0_14 .LBB0_22: # in Loop: Header=BB0_15 Depth=2 addi.w $a6, $zero, -1 beq $a5, $a6, .LBB0_15 # %bb.23: # in Loop: Header=BB0_15 Depth=2 - slli.d $a6, $s1, 3 + slli.d $a6, $s2, 3 ldx.d $s4, $a4, $a6 ld.w $a6, $s4, 8 bne $a5, $a6, .LBB0_15 # %bb.24: # in Loop: Header=BB0_13 Depth=1 ld.w $t7, $s5, 56 ld.w $t6, $s4, 56 - st.w $a0, $s2, 0 + st.w $a0, $s3, 0 pcalau12i $a0, %got_pc_hi20(gridGiven) - ld.d $s2, $a0, %got_pc_lo12(gridGiven) - ld.w $a0, $s2, 0 + ld.d $s3, $a0, %got_pc_lo12(gridGiven) + ld.w $a0, $s3, 0 beqz $a0, .LBB0_49 # %bb.25: # in Loop: Header=BB0_13 Depth=1 alsl.d $a0, $t7, $s5, 3 @@ -459,7 +461,7 @@ uloop: # @uloop b .LBB0_50 .p2align 4, , 16 .LBB0_26: # in Loop: Header=BB0_13 Depth=1 - st.w $a0, $s2, 0 + st.w $a0, $s3, 0 ori $a1, $zero, 8 move $a0, $s5 pcaddu18i $ra, %call36(newOrient) @@ -475,22 +477,22 @@ uloop: # @uloop ld.w $a0, $s6, %pc_lo12(flips) addi.d $a0, $a0, 1 st.w $a0, $s6, %pc_lo12(flips) - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload addi.w $a0, $a0, 1 - st.d $a0, $sp, 248 # 8-byte Folded Spill + st.d $a0, $sp, 240 # 8-byte Folded Spill .LBB0_29: # in Loop: Header=BB0_13 Depth=1 - ld.d $a6, $sp, 344 # 8-byte Folded Reload - ld.d $a0, $sp, 296 # 8-byte Folded Reload + ld.d $a6, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 288 # 8-byte Folded Reload addi.w $a0, $a0, 1 - st.d $a0, $sp, 296 # 8-byte Folded Spill + st.d $a0, $sp, 288 # 8-byte Folded Spill b .LBB0_12 .p2align 4, , 16 .LBB0_30: # in Loop: Header=BB0_13 Depth=1 ld.w $s4, $s5, 56 ld.w $s1, $s5, 12 ld.w $s0, $s5, 16 - st.w $a0, $s2, 0 - ld.d $a7, $sp, 184 # 8-byte Folded Reload + st.w $a0, $s3, 0 + ld.d $a7, $sp, 176 # 8-byte Folded Reload sub.w $a0, $s1, $a7 pcalau12i $a1, %got_pc_hi20(blockl) ld.d $a1, $a1, %got_pc_lo12(blockl) @@ -539,8 +541,8 @@ uloop: # @uloop masknez $a0, $a0, $a6 maskeqz $a1, $a5, $a6 or $a5, $a1, $a0 - addi.d $a6, $sp, 364 - addi.d $a7, $sp, 360 + addi.d $a6, $sp, 356 + addi.d $a7, $sp, 352 move $a0, $s5 move $a1, $s4 pcaddu18i $ra, %call36(pickSpot) @@ -548,8 +550,8 @@ uloop: # @uloop pcalau12i $a0, %got_pc_hi20(gridGiven) ld.d $a0, $a0, %got_pc_lo12(gridGiven) ld.w $a0, $a0, 0 - ld.w $s2, $sp, 364 - ld.w $s3, $sp, 360 + ld.w $s2, $sp, 356 + ld.w $s3, $sp, 352 beqz $a0, .LBB0_32 # %bb.31: # in Loop: Header=BB0_13 Depth=1 alsl.d $a0, $s4, $s5, 3 @@ -610,9 +612,9 @@ uloop: # @uloop pcalau12i $a4, %pc_hi20(newyy) st.w $a3, $a4, %pc_lo12(newyy) sub.w $s2, $a2, $a1 - st.w $s2, $sp, 364 + st.w $s2, $sp, 356 sub.w $s3, $a3, $a0 - st.w $s3, $sp, 360 + st.w $s3, $sp, 352 .LBB0_32: # in Loop: Header=BB0_13 Depth=1 move $a0, $s8 move $a1, $s2 @@ -624,9 +626,9 @@ uloop: # @uloop ld.w $a0, $s6, %pc_lo12(flips) addi.d $a0, $a0, 1 st.w $a0, $s6, %pc_lo12(flips) - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload addi.w $a0, $a0, 1 - st.d $a0, $sp, 192 # 8-byte Folded Spill + st.d $a0, $sp, 184 # 8-byte Folded Spill .LBB0_34: # in Loop: Header=BB0_13 Depth=1 move $a0, $s5 move $a1, $s1 @@ -634,7 +636,7 @@ uloop: # @uloop move $a3, $s4 pcaddu18i $ra, %call36(fixSpot) jirl $ra, $ra, 0 - ld.d $s2, $sp, 264 # 8-byte Folded Reload + ld.d $s3, $sp, 256 # 8-byte Folded Reload .LBB0_35: # in Loop: Header=BB0_13 Depth=1 ld.w $a0, $s5, 132 beqz $a0, .LBB0_39 @@ -658,28 +660,26 @@ uloop: # @uloop .LBB0_39: # %.loopexit # in Loop: Header=BB0_13 Depth=1 ld.w $a0, $s5, 76 - ld.d $a1, $sp, 352 # 8-byte Folded Reload + ld.d $a1, $sp, 344 # 8-byte Folded Reload addi.w $a1, $a1, 1 - st.d $a1, $sp, 352 # 8-byte Folded Spill - ld.d $a1, $sp, 272 # 8-byte Folded Reload + st.d $a1, $sp, 344 # 8-byte Folded Spill + ld.d $a1, $sp, 264 # 8-byte Folded Reload addi.w $a1, $a1, 1 - st.d $a1, $sp, 272 # 8-byte Folded Spill + st.d $a1, $sp, 264 # 8-byte Folded Spill beqz $a0, .LBB0_11 # %bb.40: # in Loop: Header=BB0_13 Depth=1 fld.d $fa0, $s5, 120 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) fld.d $fa1, $s5, 112 - fadd.d $fa2, $fa0, $fa2 + fadd.d $fa2, $fa0, $fs1 fcmp.cule.d $fcc0, $fa1, $fa2 bcnez $fcc0, .LBB0_11 # %bb.41: # in Loop: Header=BB0_13 Depth=1 - ld.w $a0, $s2, 0 + ld.w $a0, $s3, 0 fsub.d $fa1, $fa1, $fa0 mul.d $a0, $a0, $s7 ld.w $a1, $s5, 56 add.d $a0, $a0, $fp - st.w $a0, $s2, 0 + st.w $a0, $s3, 0 bstrpick.d $a0, $a0, 30, 0 alsl.d $a1, $a1, $s5, 3 ld.d $a1, $a1, 152 @@ -699,16 +699,16 @@ uloop: # @uloop beqz $a0, .LBB0_71 # %bb.42: # in Loop: Header=BB0_13 Depth=1 add.w $a4, $s6, $s2 - ld.d $s6, $sp, 136 # 8-byte Folded Reload + ld.d $s6, $sp, 128 # 8-byte Folded Reload ld.w $a0, $s6, %pc_lo12(flips) add.w $a3, $s4, $s2 add.w $a2, $s3, $s0 add.w $a1, $s1, $s0 addi.d $a0, $a0, 1 st.w $a0, $s6, %pc_lo12(flips) - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload addi.w $a0, $a0, 1 - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill move $a0, $s5 pcaddu18i $ra, %call36(fixSpotAsp) jirl $ra, $ra, 0 @@ -726,37 +726,37 @@ uloop: # @uloop move $a2, $s3 pcaddu18i $ra, %call36(usiteo1) jirl $ra, $ra, 0 - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 104 # 8-byte Folded Reload addi.w $a1, $a1, 1 - st.d $a1, $sp, 112 # 8-byte Folded Spill + st.d $a1, $sp, 104 # 8-byte Folded Spill beqz $a0, .LBB0_68 # %bb.45: # in Loop: Header=BB0_13 Depth=1 ld.w $a0, $s6, %pc_lo12(flips) addi.d $a0, $a0, 1 st.w $a0, $s6, %pc_lo12(flips) - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload addi.w $a0, $a0, 1 - st.d $a0, $sp, 80 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill b .LBB0_34 .LBB0_46: # in Loop: Header=BB0_13 Depth=1 ori $a1, $zero, 4 move $a0, $s5 pcaddu18i $ra, %call36(newOrient) jirl $ra, $ra, 0 - move $s2, $a0 + move $s3, $a0 ori $a1, $zero, 4 move $a0, $s4 pcaddu18i $ra, %call36(newOrient) jirl $ra, $ra, 0 - ld.d $s6, $sp, 136 # 8-byte Folded Reload - bgez $s2, .LBB0_73 + ld.d $s6, $sp, 128 # 8-byte Folded Reload + bgez $s3, .LBB0_73 # %bb.47: # in Loop: Header=BB0_13 Depth=1 bgez $a0, .LBB0_73 # %bb.48: # in Loop: Header=BB0_13 Depth=1 - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 344 # 8-byte Folded Reload addi.w $a0, $a0, 1 - st.d $a0, $sp, 352 # 8-byte Folded Spill - ld.d $s2, $sp, 264 # 8-byte Folded Reload + st.d $a0, $sp, 344 # 8-byte Folded Spill + ld.d $s3, $sp, 256 # 8-byte Folded Reload b .LBB0_11 .LBB0_49: # in Loop: Header=BB0_13 Depth=1 ld.w $t5, $s4, 12 @@ -766,8 +766,8 @@ uloop: # @uloop alsl.d $a0, $t7, $s5, 3 ld.d $a1, $a0, 152 .LBB0_50: # in Loop: Header=BB0_13 Depth=1 - st.d $t8, $sp, 224 # 8-byte Folded Spill - st.d $t5, $sp, 232 # 8-byte Folded Spill + st.d $t8, $sp, 216 # 8-byte Folded Spill + st.d $t5, $sp, 224 # 8-byte Folded Spill ld.w $a0, $a1, 56 ld.w $a2, $a1, 60 ld.w $a3, $a1, 64 @@ -777,124 +777,124 @@ uloop: # @uloop add.w $s6, $a2, $t5 add.w $a1, $a3, $t8 add.w $t8, $a4, $t8 - st.d $t3, $sp, 216 # 8-byte Folded Spill - st.d $t4, $sp, 208 # 8-byte Folded Spill - st.d $t6, $sp, 152 # 8-byte Folded Spill - st.d $t7, $sp, 144 # 8-byte Folded Spill - st.d $s2, $sp, 88 # 8-byte Folded Spill - st.d $a0, $sp, 288 # 8-byte Folded Spill + st.d $t3, $sp, 208 # 8-byte Folded Spill + st.d $t4, $sp, 200 # 8-byte Folded Spill + st.d $t6, $sp, 144 # 8-byte Folded Spill + st.d $t7, $sp, 136 # 8-byte Folded Spill + st.d $s3, $sp, 80 # 8-byte Folded Spill + st.d $a0, $sp, 280 # 8-byte Folded Spill bge $s0, $ra, .LBB0_52 # %bb.51: # in Loop: Header=BB0_13 Depth=1 - st.d $a5, $sp, 280 # 8-byte Folded Spill + st.d $a5, $sp, 272 # 8-byte Folded Spill fld.d $fa0, $a5, 8 move $a2, $t8 - move $s2, $a1 - st.d $t8, $sp, 240 # 8-byte Folded Spill + move $s3, $a1 + st.d $t8, $sp, 232 # 8-byte Folded Spill pcaddu18i $ra, %call36(wireestx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 280 # 8-byte Folded Reload + ld.d $a1, $sp, 272 # 8-byte Folded Reload fld.d $fa0, $a1, 16 - ld.d $a1, $sp, 288 # 8-byte Folded Reload + ld.d $a1, $sp, 280 # 8-byte Folded Reload sub.w $a1, $a1, $a0 - st.d $a1, $sp, 288 # 8-byte Folded Spill + st.d $a1, $sp, 280 # 8-byte Folded Spill move $a0, $s6 - move $a1, $s2 - ld.d $a2, $sp, 240 # 8-byte Folded Reload + move $a1, $s3 + ld.d $a2, $sp, 232 # 8-byte Folded Reload pcaddu18i $ra, %call36(wireestx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 280 # 8-byte Folded Reload + ld.d $a1, $sp, 272 # 8-byte Folded Reload fld.d $fa0, $a1, 24 add.w $s6, $a0, $s6 - move $a0, $s2 - ld.d $a1, $sp, 288 # 8-byte Folded Reload + move $a0, $s3 + ld.d $a1, $sp, 280 # 8-byte Folded Reload move $a2, $s6 pcaddu18i $ra, %call36(wireesty) jirl $ra, $ra, 0 - ld.d $a1, $sp, 280 # 8-byte Folded Reload + ld.d $a1, $sp, 272 # 8-byte Folded Reload fld.d $fa0, $a1, 32 - sub.w $s2, $s2, $a0 - st.d $s2, $sp, 176 # 8-byte Folded Spill - ld.d $a0, $sp, 240 # 8-byte Folded Reload - ld.d $a1, $sp, 288 # 8-byte Folded Reload - st.d $s6, $sp, 168 # 8-byte Folded Spill + sub.w $s3, $s3, $a0 + st.d $s3, $sp, 168 # 8-byte Folded Spill + ld.d $a0, $sp, 232 # 8-byte Folded Reload + ld.d $a1, $sp, 280 # 8-byte Folded Reload + st.d $s6, $sp, 160 # 8-byte Folded Spill move $a2, $s6 pcaddu18i $ra, %call36(wireesty) jirl $ra, $ra, 0 - ld.d $t8, $sp, 240 # 8-byte Folded Reload - ld.d $t4, $sp, 208 # 8-byte Folded Reload - ld.d $t3, $sp, 216 # 8-byte Folded Reload + ld.d $t8, $sp, 232 # 8-byte Folded Reload + ld.d $t4, $sp, 200 # 8-byte Folded Reload + ld.d $t3, $sp, 208 # 8-byte Folded Reload ld.w $a2, $s4, 56 - ld.d $a1, $sp, 344 # 8-byte Folded Reload + ld.d $a1, $sp, 336 # 8-byte Folded Reload ld.w $ra, $a1, 0 add.w $t8, $a0, $t8 b .LBB0_53 .LBB0_52: # in Loop: Header=BB0_13 Depth=1 - st.d $s6, $sp, 168 # 8-byte Folded Spill - st.d $a1, $sp, 176 # 8-byte Folded Spill + st.d $s6, $sp, 160 # 8-byte Folded Spill + st.d $a1, $sp, 168 # 8-byte Folded Spill move $a2, $t6 .LBB0_53: # in Loop: Header=BB0_13 Depth=1 addi.d $a3, $s4, 152 slli.d $a0, $a2, 3 - st.d $a3, $sp, 104 # 8-byte Folded Spill - ldx.d $s2, $a3, $a0 - ld.w $a0, $s2, 56 - ld.w $a2, $s2, 60 - ld.w $a3, $s2, 64 - ld.w $a4, $s2, 68 + st.d $a3, $sp, 96 # 8-byte Folded Spill + ldx.d $s3, $a3, $a0 + ld.w $a0, $s3, 56 + ld.w $a2, $s3, 60 + ld.w $a3, $s3, 64 + ld.w $a4, $s3, 68 add.w $a6, $a0, $t3 add.w $s6, $a2, $t3 add.w $a1, $a3, $t4 add.w $t0, $a4, $t4 - bge $s3, $ra, .LBB0_55 + bge $s1, $ra, .LBB0_55 # %bb.54: # in Loop: Header=BB0_13 Depth=1 - fld.d $fa0, $s2, 8 + fld.d $fa0, $s3, 8 move $a0, $a6 move $a2, $t0 - st.d $t8, $sp, 240 # 8-byte Folded Spill - st.d $a6, $sp, 280 # 8-byte Folded Spill - st.d $a1, $sp, 256 # 8-byte Folded Spill - st.d $t0, $sp, 160 # 8-byte Folded Spill + st.d $t8, $sp, 232 # 8-byte Folded Spill + st.d $a6, $sp, 272 # 8-byte Folded Spill + st.d $a1, $sp, 248 # 8-byte Folded Spill + st.d $t0, $sp, 152 # 8-byte Folded Spill pcaddu18i $ra, %call36(wireestx) jirl $ra, $ra, 0 - fld.d $fa0, $s2, 16 - ld.d $a1, $sp, 280 # 8-byte Folded Reload + fld.d $fa0, $s3, 16 + ld.d $a1, $sp, 272 # 8-byte Folded Reload sub.w $a1, $a1, $a0 - st.d $a1, $sp, 280 # 8-byte Folded Spill + st.d $a1, $sp, 272 # 8-byte Folded Spill move $a0, $s6 - ld.d $a1, $sp, 256 # 8-byte Folded Reload - ld.d $a2, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 248 # 8-byte Folded Reload + ld.d $a2, $sp, 152 # 8-byte Folded Reload pcaddu18i $ra, %call36(wireestx) jirl $ra, $ra, 0 - fld.d $fa0, $s2, 24 + fld.d $fa0, $s3, 24 add.w $s6, $a0, $s6 - ld.d $a0, $sp, 256 # 8-byte Folded Reload - ld.d $a1, $sp, 280 # 8-byte Folded Reload + ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a1, $sp, 272 # 8-byte Folded Reload move $a2, $s6 pcaddu18i $ra, %call36(wireesty) jirl $ra, $ra, 0 - fld.d $fa0, $s2, 32 - ld.d $a1, $sp, 256 # 8-byte Folded Reload + fld.d $fa0, $s3, 32 + ld.d $a1, $sp, 248 # 8-byte Folded Reload sub.w $a1, $a1, $a0 - st.d $a1, $sp, 256 # 8-byte Folded Spill - ld.d $a0, $sp, 160 # 8-byte Folded Reload - ld.d $a1, $sp, 280 # 8-byte Folded Reload + st.d $a1, $sp, 248 # 8-byte Folded Spill + ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a1, $sp, 272 # 8-byte Folded Reload move $a2, $s6 pcaddu18i $ra, %call36(wireesty) jirl $ra, $ra, 0 - ld.d $t0, $sp, 160 # 8-byte Folded Reload - ld.d $a1, $sp, 256 # 8-byte Folded Reload - ld.d $a6, $sp, 280 # 8-byte Folded Reload - ld.d $t8, $sp, 240 # 8-byte Folded Reload - ld.d $t4, $sp, 208 # 8-byte Folded Reload - ld.d $t3, $sp, 216 # 8-byte Folded Reload + ld.d $t0, $sp, 152 # 8-byte Folded Reload + ld.d $a1, $sp, 248 # 8-byte Folded Reload + ld.d $a6, $sp, 272 # 8-byte Folded Reload + ld.d $t8, $sp, 232 # 8-byte Folded Reload + ld.d $t4, $sp, 200 # 8-byte Folded Reload + ld.d $t3, $sp, 208 # 8-byte Folded Reload add.w $t0, $a0, $t0 .LBB0_55: # in Loop: Header=BB0_13 Depth=1 - ld.d $s2, $sp, 264 # 8-byte Folded Reload - ld.d $t1, $sp, 232 # 8-byte Folded Reload - ld.d $t2, $sp, 224 # 8-byte Folded Reload - ld.d $t5, $sp, 288 # 8-byte Folded Reload - ld.d $t6, $sp, 176 # 8-byte Folded Reload - ld.d $t7, $sp, 168 # 8-byte Folded Reload + ld.d $s3, $sp, 256 # 8-byte Folded Reload + ld.d $t1, $sp, 224 # 8-byte Folded Reload + ld.d $t2, $sp, 216 # 8-byte Folded Reload + ld.d $t5, $sp, 280 # 8-byte Folded Reload + ld.d $t6, $sp, 168 # 8-byte Folded Reload + ld.d $t7, $sp, 160 # 8-byte Folded Reload bge $a6, $t7, .LBB0_59 # %bb.56: # in Loop: Header=BB0_13 Depth=1 bge $t5, $s6, .LBB0_59 @@ -910,9 +910,9 @@ uloop: # @uloop slt $a0, $a1, $t8 slt $ra, $t5, $s6 slt $a2, $a6, $t7 - st.d $a6, $sp, 280 # 8-byte Folded Spill + st.d $a6, $sp, 272 # 8-byte Folded Spill sub.w $a4, $t7, $a6 - ld.d $a6, $sp, 56 # 8-byte Folded Reload + ld.d $a6, $sp, 48 # 8-byte Folded Reload slt $a3, $a4, $a6 maskeqz $a4, $a4, $a3 masknez $a5, $a6, $a3 @@ -920,7 +920,7 @@ uloop: # @uloop maskeqz $a4, $a4, $a2 masknez $a5, $a6, $a2 or $a5, $a4, $a5 - st.d $s6, $sp, 288 # 8-byte Folded Spill + st.d $s6, $sp, 280 # 8-byte Folded Spill sub.w $a6, $s6, $t5 slt $a4, $a6, $a5 masknez $a7, $a5, $a4 @@ -929,7 +929,7 @@ uloop: # @uloop maskeqz $a6, $a6, $ra masknez $a5, $a5, $ra or $a6, $a6, $a5 - st.d $a1, $sp, 256 # 8-byte Folded Spill + st.d $a1, $sp, 248 # 8-byte Folded Spill sub.w $a7, $t8, $a1 slt $a5, $a7, $a6 move $s6, $t0 @@ -958,7 +958,7 @@ uloop: # @uloop ld.w $a2, $a2, 0 blt $a2, $a1, .LBB0_6 # %bb.62: # in Loop: Header=BB0_13 Depth=1 - ld.d $a1, $sp, 256 # 8-byte Folded Reload + ld.d $a1, $sp, 248 # 8-byte Folded Reload sub.w $a0, $a1, $a0 pcalau12i $a1, %got_pc_hi20(blockb) ld.d $a1, $a1, %got_pc_lo12(blockb) @@ -985,8 +985,8 @@ uloop: # @uloop move $a3, $t2 move $a4, $t3 move $a5, $t4 - ld.d $s6, $sp, 280 # 8-byte Folded Reload - ld.d $ra, $sp, 288 # 8-byte Folded Reload + ld.d $s6, $sp, 272 # 8-byte Folded Reload + ld.d $ra, $sp, 280 # 8-byte Folded Reload beqz $a7, .LBB0_8 # %bb.64: # in Loop: Header=BB0_13 Depth=1 bstrpick.d $a0, $a6, 31, 31 @@ -1014,7 +1014,7 @@ uloop: # @uloop blt $a0, $a1, .LBB0_106 b .LBB0_107 .LBB0_68: # in Loop: Header=BB0_13 Depth=1 - ld.d $s2, $sp, 264 # 8-byte Folded Reload + ld.d $s3, $sp, 256 # 8-byte Folded Reload ori $a1, $zero, 8 move $a0, $s5 pcaddu18i $ra, %call36(newOrient) @@ -1025,17 +1025,17 @@ uloop: # @uloop move $a0, $s8 pcaddu18i $ra, %call36(usite0) jirl $ra, $ra, 0 - ld.d $a1, $sp, 296 # 8-byte Folded Reload + ld.d $a1, $sp, 288 # 8-byte Folded Reload addi.w $a1, $a1, 1 - st.d $a1, $sp, 296 # 8-byte Folded Spill + st.d $a1, $sp, 288 # 8-byte Folded Spill beqz $a0, .LBB0_35 # %bb.70: # in Loop: Header=BB0_13 Depth=1 ld.w $a0, $s6, %pc_lo12(flips) addi.d $a0, $a0, 1 st.w $a0, $s6, %pc_lo12(flips) - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload addi.w $a0, $a0, 1 - st.d $a0, $sp, 248 # 8-byte Folded Spill + st.d $a0, $sp, 240 # 8-byte Folded Spill move $a0, $s5 move $a1, $s1 move $a2, $s0 @@ -1044,41 +1044,41 @@ uloop: # @uloop jirl $ra, $ra, 0 b .LBB0_35 .LBB0_71: # in Loop: Header=BB0_13 Depth=1 - ld.d $s6, $sp, 136 # 8-byte Folded Reload + ld.d $s6, $sp, 128 # 8-byte Folded Reload .LBB0_72: # in Loop: Header=BB0_13 Depth=1 - ld.d $a6, $sp, 344 # 8-byte Folded Reload - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a6, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload addi.w $a0, $a0, 1 - st.d $a0, $sp, 120 # 8-byte Folded Spill - ld.d $s2, $sp, 264 # 8-byte Folded Reload + st.d $a0, $sp, 112 # 8-byte Folded Spill + ld.d $s3, $sp, 256 # 8-byte Folded Reload b .LBB0_12 .LBB0_73: # in Loop: Header=BB0_13 Depth=1 addi.d $a1, $s5, 152 - slti $a2, $s2, 0 + slti $a2, $s3, 0 slti $a3, $a0, 0 masknez $a4, $a0, $a3 - ld.d $a5, $sp, 152 # 8-byte Folded Reload + ld.d $a5, $sp, 144 # 8-byte Folded Reload maskeqz $a3, $a5, $a3 or $a3, $a3, $a4 masknez $a3, $a3, $a2 maskeqz $a0, $a0, $a2 or $a4, $a0, $a3 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload ld.w $a0, $a0, 0 - masknez $a3, $s2, $a2 - ld.d $t6, $sp, 144 # 8-byte Folded Reload + masknez $a3, $s3, $a2 + ld.d $t6, $sp, 136 # 8-byte Folded Reload maskeqz $a2, $t6, $a2 or $a2, $a2, $a3 - st.d $a4, $sp, 280 # 8-byte Folded Spill + st.d $a4, $sp, 272 # 8-byte Folded Spill slli.d $t8, $a4, 3 - ld.d $t5, $sp, 344 # 8-byte Folded Reload - st.d $a2, $sp, 256 # 8-byte Folded Spill + ld.d $t5, $sp, 336 # 8-byte Folded Reload + st.d $a2, $sp, 248 # 8-byte Folded Spill beqz $a0, .LBB0_80 # %bb.74: # in Loop: Header=BB0_13 Depth=1 slli.d $a0, $a2, 3 - ldx.d $s2, $a1, $a0 - ld.d $a1, $s2, 88 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ldx.d $s3, $a1, $a0 + ld.d $a1, $s3, 88 + ld.d $a0, $sp, 96 # 8-byte Folded Reload ldx.d $a2, $a0, $t8 ld.w $a3, $s4, 12 ld.w $a0, $a1, 8 @@ -1220,149 +1220,149 @@ uloop: # @uloop ld.w $t3, $s5, 12 ld.w $a5, $s5, 16 slli.d $a0, $a2, 3 - ldx.d $s2, $a1, $a0 + ldx.d $s3, $a1, $a0 .LBB0_81: # in Loop: Header=BB0_13 Depth=1 - ld.w $a0, $s2, 56 - ld.w $a2, $s2, 60 - ld.w $a3, $s2, 64 - ld.w $a4, $s2, 68 + ld.w $a0, $s3, 56 + ld.w $a2, $s3, 60 + ld.w $a3, $s3, 64 + ld.w $a4, $s3, 68 ld.w $a6, $t5, 0 add.w $a0, $a0, $t4 add.w $s6, $a2, $t4 add.w $a1, $a3, $t7 add.w $ra, $a4, $t7 - st.d $t4, $sp, 240 # 8-byte Folded Spill - st.d $t7, $sp, 176 # 8-byte Folded Spill - st.d $t3, $sp, 168 # 8-byte Folded Spill - st.d $a5, $sp, 88 # 8-byte Folded Spill + st.d $t4, $sp, 232 # 8-byte Folded Spill + st.d $t7, $sp, 168 # 8-byte Folded Spill + st.d $t3, $sp, 160 # 8-byte Folded Spill + st.d $a5, $sp, 80 # 8-byte Folded Spill bge $s0, $a6, .LBB0_83 # %bb.82: # in Loop: Header=BB0_13 Depth=1 - fld.d $fa0, $s2, 8 + fld.d $fa0, $s3, 8 move $a2, $ra move $s0, $a0 - st.d $s2, $sp, 160 # 8-byte Folded Spill - move $s2, $a1 - st.d $t8, $sp, 16 # 8-byte Folded Spill - st.d $ra, $sp, 288 # 8-byte Folded Spill + st.d $s3, $sp, 152 # 8-byte Folded Spill + move $s3, $a1 + st.d $t8, $sp, 8 # 8-byte Folded Spill + st.d $ra, $sp, 280 # 8-byte Folded Spill pcaddu18i $ra, %call36(wireestx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 152 # 8-byte Folded Reload fld.d $fa0, $a1, 16 sub.w $s0, $s0, $a0 move $a0, $s6 - move $a1, $s2 - ld.d $a2, $sp, 288 # 8-byte Folded Reload + move $a1, $s3 + ld.d $a2, $sp, 280 # 8-byte Folded Reload pcaddu18i $ra, %call36(wireestx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 152 # 8-byte Folded Reload fld.d $fa0, $a1, 24 add.w $s6, $a0, $s6 - move $a0, $s2 + move $a0, $s3 move $a1, $s0 move $a2, $s6 pcaddu18i $ra, %call36(wireesty) jirl $ra, $ra, 0 - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 152 # 8-byte Folded Reload fld.d $fa0, $a1, 32 - sub.w $s2, $s2, $a0 - st.d $s2, $sp, 32 # 8-byte Folded Spill - ld.d $a0, $sp, 288 # 8-byte Folded Reload - st.d $s0, $sp, 160 # 8-byte Folded Spill + sub.w $s3, $s3, $a0 + st.d $s3, $sp, 24 # 8-byte Folded Spill + ld.d $a0, $sp, 280 # 8-byte Folded Reload + st.d $s0, $sp, 152 # 8-byte Folded Spill move $a1, $s0 - st.d $s6, $sp, 24 # 8-byte Folded Spill + st.d $s6, $sp, 16 # 8-byte Folded Spill move $a2, $s6 pcaddu18i $ra, %call36(wireesty) jirl $ra, $ra, 0 - ld.d $ra, $sp, 288 # 8-byte Folded Reload - ld.d $t8, $sp, 16 # 8-byte Folded Reload - ld.d $a5, $sp, 88 # 8-byte Folded Reload - ld.d $t3, $sp, 168 # 8-byte Folded Reload - ld.d $a1, $sp, 344 # 8-byte Folded Reload + ld.d $ra, $sp, 280 # 8-byte Folded Reload + ld.d $t8, $sp, 8 # 8-byte Folded Reload + ld.d $a5, $sp, 80 # 8-byte Folded Reload + ld.d $t3, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 336 # 8-byte Folded Reload ld.w $a6, $a1, 0 add.w $ra, $a0, $ra b .LBB0_84 .LBB0_83: # in Loop: Header=BB0_13 Depth=1 - st.d $s6, $sp, 24 # 8-byte Folded Spill - st.d $a1, $sp, 32 # 8-byte Folded Spill - st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $s6, $sp, 16 # 8-byte Folded Spill + st.d $a1, $sp, 24 # 8-byte Folded Spill + st.d $a0, $sp, 152 # 8-byte Folded Spill .LBB0_84: # in Loop: Header=BB0_13 Depth=1 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ldx.d $s0, $a0, $t8 ld.w $a0, $s0, 56 ld.w $a2, $s0, 60 ld.w $a3, $s0, 64 ld.w $a4, $s0, 68 add.w $t8, $a0, $t3 - add.w $s2, $a2, $t3 + add.w $s3, $a2, $t3 add.w $s6, $a3, $a5 add.w $a2, $a4, $a5 - bge $s3, $a6, .LBB0_86 + bge $s1, $a6, .LBB0_86 # %bb.85: # in Loop: Header=BB0_13 Depth=1 fld.d $fa0, $s0, 8 move $a0, $t8 move $a1, $s6 - st.d $ra, $sp, 288 # 8-byte Folded Spill - move $s3, $t8 - st.d $a2, $sp, 104 # 8-byte Folded Spill + st.d $ra, $sp, 280 # 8-byte Folded Spill + move $s1, $t8 + st.d $a2, $sp, 96 # 8-byte Folded Spill pcaddu18i $ra, %call36(wireestx) jirl $ra, $ra, 0 fld.d $fa0, $s0, 16 - sub.w $s3, $s3, $a0 - move $a0, $s2 + sub.w $s1, $s1, $a0 + move $a0, $s3 move $a1, $s6 - ld.d $a2, $sp, 104 # 8-byte Folded Reload + ld.d $a2, $sp, 96 # 8-byte Folded Reload pcaddu18i $ra, %call36(wireestx) jirl $ra, $ra, 0 fld.d $fa0, $s0, 24 - add.w $s2, $a0, $s2 + add.w $s3, $a0, $s3 move $a0, $s6 - move $a1, $s3 - move $a2, $s2 + move $a1, $s1 + move $a2, $s3 pcaddu18i $ra, %call36(wireesty) jirl $ra, $ra, 0 fld.d $fa0, $s0, 32 sub.w $s6, $s6, $a0 - ld.d $a0, $sp, 104 # 8-byte Folded Reload - move $a1, $s3 - move $a2, $s2 + ld.d $a0, $sp, 96 # 8-byte Folded Reload + move $a1, $s1 + move $a2, $s3 pcaddu18i $ra, %call36(wireesty) jirl $ra, $ra, 0 - ld.d $a2, $sp, 104 # 8-byte Folded Reload - move $t8, $s3 - ld.d $ra, $sp, 288 # 8-byte Folded Reload - ld.d $a5, $sp, 88 # 8-byte Folded Reload - ld.d $t3, $sp, 168 # 8-byte Folded Reload + ld.d $a2, $sp, 96 # 8-byte Folded Reload + move $t8, $s1 + ld.d $ra, $sp, 280 # 8-byte Folded Reload + ld.d $a5, $sp, 80 # 8-byte Folded Reload + ld.d $t3, $sp, 160 # 8-byte Folded Reload add.w $a2, $a0, $a2 .LBB0_86: # in Loop: Header=BB0_13 Depth=1 - ld.d $t1, $sp, 240 # 8-byte Folded Reload - ld.d $t2, $sp, 176 # 8-byte Folded Reload - ld.d $t5, $sp, 160 # 8-byte Folded Reload - ld.d $t6, $sp, 32 # 8-byte Folded Reload - ld.d $t7, $sp, 24 # 8-byte Folded Reload + ld.d $t1, $sp, 232 # 8-byte Folded Reload + ld.d $t2, $sp, 168 # 8-byte Folded Reload + ld.d $t5, $sp, 152 # 8-byte Folded Reload + ld.d $t6, $sp, 24 # 8-byte Folded Reload + ld.d $t7, $sp, 16 # 8-byte Folded Reload bge $t8, $t7, .LBB0_91 # %bb.87: # in Loop: Header=BB0_13 Depth=1 - bge $t5, $s2, .LBB0_91 + bge $t5, $s3, .LBB0_91 # %bb.88: # in Loop: Header=BB0_13 Depth=1 bge $s6, $ra, .LBB0_91 # %bb.89: # in Loop: Header=BB0_13 Depth=1 bge $t6, $a2, .LBB0_91 # %bb.90: # in Loop: Header=BB0_13 Depth=1 - ld.d $s6, $sp, 136 # 8-byte Folded Reload + ld.d $s6, $sp, 128 # 8-byte Folded Reload b .LBB0_100 .LBB0_91: # in Loop: Header=BB0_13 Depth=1 slt $a0, $s6, $ra - slt $a1, $t5, $s2 - slt $s3, $t8, $t7 + slt $a1, $t5, $s3 + slt $s1, $t8, $t7 sub.w $a4, $t7, $t8 - ld.d $a6, $sp, 56 # 8-byte Folded Reload + ld.d $a6, $sp, 48 # 8-byte Folded Reload slt $a3, $a4, $a6 maskeqz $a4, $a4, $a3 masknez $a7, $a6, $a3 or $a4, $a4, $a7 - maskeqz $a4, $a4, $s3 - masknez $a6, $a6, $s3 + maskeqz $a4, $a4, $s1 + masknez $a6, $a6, $s1 or $a4, $a4, $a6 - sub.w $a6, $s2, $t5 + sub.w $a6, $s3, $t5 slt $t4, $a6, $a4 masknez $a7, $a4, $t4 maskeqz $a6, $a6, $t4 @@ -1395,16 +1395,16 @@ uloop: # @uloop pcalau12i $a2, %got_pc_hi20(blockt) ld.d $a2, $a2, %got_pc_lo12(blockt) ld.w $a2, $a2, 0 - ld.d $s2, $sp, 264 # 8-byte Folded Reload + ld.d $s3, $sp, 256 # 8-byte Folded Reload bge $a2, $a1, .LBB0_112 # %bb.94: # in Loop: Header=BB0_13 Depth=1 sub.d $a0, $a1, $a2 sub.w $a5, $a5, $a0 sub.w $t2, $t2, $a0 - ld.d $s6, $sp, 136 # 8-byte Folded Reload + ld.d $s6, $sp, 128 # 8-byte Folded Reload b .LBB0_101 .LBB0_95: # in Loop: Header=BB0_13 Depth=1 - and $a2, $s3, $a3 + and $a2, $s1, $a3 masknez $a3, $a2, $t4 ori $a7, $zero, 2 maskeqz $a7, $a7, $t4 @@ -1420,15 +1420,15 @@ uloop: # @uloop maskeqz $a2, $a2, $a0 masknez $a0, $a1, $a0 or $a2, $a2, $a0 - ld.d $s6, $sp, 136 # 8-byte Folded Reload + ld.d $s6, $sp, 128 # 8-byte Folded Reload beqz $a2, .LBB0_100 # %bb.96: # in Loop: Header=BB0_13 Depth=1 bstrpick.d $a0, $a4, 31, 31 add.w $a0, $a4, $a0 srai.d $a1, $a0, 1 sub.d $a0, $a4, $a1 - ld.d $a7, $sp, 280 # 8-byte Folded Reload - ld.d $a6, $sp, 256 # 8-byte Folded Reload + ld.d $a7, $sp, 272 # 8-byte Folded Reload + ld.d $a6, $sp, 248 # 8-byte Folded Reload beq $a2, $t0, .LBB0_108 # %bb.97: # in Loop: Header=BB0_13 Depth=1 ori $a3, $zero, 2 @@ -1445,15 +1445,15 @@ uloop: # @uloop sub.w $a0, $t8, $a0 b .LBB0_118 .LBB0_100: # in Loop: Header=BB0_13 Depth=1 - ld.d $s2, $sp, 264 # 8-byte Folded Reload + ld.d $s3, $sp, 256 # 8-byte Folded Reload .LBB0_101: # %.thread840 # in Loop: Header=BB0_13 Depth=1 - ld.d $a7, $sp, 280 # 8-byte Folded Reload - ld.d $a6, $sp, 256 # 8-byte Folded Reload + ld.d $a7, $sp, 272 # 8-byte Folded Reload + ld.d $a6, $sp, 248 # 8-byte Folded Reload .LBB0_102: # %.thread840 # in Loop: Header=BB0_13 Depth=1 move $a0, $s8 - move $a1, $s1 + move $a1, $s2 move $a2, $t1 move $a3, $t2 move $a4, $t3 @@ -1464,22 +1464,22 @@ uloop: # @uloop ld.w $a0, $s6, %pc_lo12(flips) addi.d $a0, $a0, 1 st.w $a0, $s6, %pc_lo12(flips) + ld.d $a0, $sp, 56 # 8-byte Folded Reload + addi.w $a0, $a0, 1 + st.d $a0, $sp, 56 # 8-byte Folded Spill ld.d $a0, $sp, 64 # 8-byte Folded Reload addi.w $a0, $a0, 1 st.d $a0, $sp, 64 # 8-byte Folded Spill - ld.d $a0, $sp, 72 # 8-byte Folded Reload - addi.w $a0, $a0, 1 - st.d $a0, $sp, 72 # 8-byte Folded Spill - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 344 # 8-byte Folded Reload addi.w $a0, $a0, 2 b .LBB0_10 .LBB0_104: # in Loop: Header=BB0_13 Depth=1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload addi.w $a0, $a0, 1 - st.d $a0, $sp, 72 # 8-byte Folded Spill - ld.d $a0, $sp, 352 # 8-byte Folded Reload + st.d $a0, $sp, 64 # 8-byte Folded Spill + ld.d $a0, $sp, 344 # 8-byte Folded Reload addi.w $a0, $a0, 2 - st.d $a0, $sp, 352 # 8-byte Folded Spill + st.d $a0, $sp, 344 # 8-byte Folded Spill b .LBB0_11 .LBB0_105: # in Loop: Header=BB0_13 Depth=1 sub.w $a0, $t5, $a0 @@ -1502,7 +1502,7 @@ uloop: # @uloop pcalau12i $a2, %got_pc_hi20(blockt) ld.d $a2, $a2, %got_pc_lo12(blockt) ld.w $a2, $a2, 0 - ld.d $s2, $sp, 264 # 8-byte Folded Reload + ld.d $s3, $sp, 256 # 8-byte Folded Reload bge $a2, $a1, .LBB0_115 # %bb.109: # in Loop: Header=BB0_13 Depth=1 sub.d $a0, $a1, $a2 @@ -1512,7 +1512,7 @@ uloop: # @uloop .LBB0_110: # in Loop: Header=BB0_13 Depth=1 add.w $t3, $a1, $t3 sub.w $t1, $t1, $a0 - add.w $a1, $a1, $s2 + add.w $a1, $a1, $s3 pcalau12i $a2, %got_pc_hi20(blockr) ld.d $a2, $a2, %got_pc_lo12(blockr) ld.w $a2, $a2, 0 @@ -1521,15 +1521,15 @@ uloop: # @uloop sub.d $a0, $a1, $a2 sub.w $t3, $t3, $a0 sub.w $t1, $t1, $a0 - ld.d $s2, $sp, 264 # 8-byte Folded Reload + ld.d $s3, $sp, 256 # 8-byte Folded Reload b .LBB0_102 .LBB0_112: # in Loop: Header=BB0_13 Depth=1 sub.w $a0, $s6, $a0 pcalau12i $a1, %got_pc_hi20(blockb) ld.d $a1, $a1, %got_pc_lo12(blockb) ld.w $a1, $a1, 0 - ld.d $a7, $sp, 280 # 8-byte Folded Reload - ld.d $a6, $sp, 256 # 8-byte Folded Reload + ld.d $a7, $sp, 272 # 8-byte Folded Reload + ld.d $a6, $sp, 248 # 8-byte Folded Reload bge $a0, $a1, .LBB0_114 # %bb.113: # in Loop: Header=BB0_13 Depth=1 sub.d $a0, $a1, $a0 @@ -1537,7 +1537,7 @@ uloop: # @uloop add.w $t2, $a0, $t2 .LBB0_114: # %.thread840 # in Loop: Header=BB0_13 Depth=1 - ld.d $s6, $sp, 136 # 8-byte Folded Reload + ld.d $s6, $sp, 128 # 8-byte Folded Reload b .LBB0_102 .LBB0_115: # in Loop: Header=BB0_13 Depth=1 sub.w $a0, $t6, $a0 @@ -1556,7 +1556,7 @@ uloop: # @uloop pcalau12i $a1, %got_pc_hi20(blockl) ld.d $a1, $a1, %got_pc_lo12(blockl) ld.w $a1, $a1, 0 - ld.d $s2, $sp, 264 # 8-byte Folded Reload + ld.d $s3, $sp, 256 # 8-byte Folded Reload bge $a0, $a1, .LBB0_102 # %bb.119: # in Loop: Header=BB0_13 Depth=1 sub.d $a0, $a1, $a0 @@ -1566,12 +1566,7 @@ uloop: # @uloop .Lfunc_end0: .size uloop, .Lfunc_end0-uloop # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function pickSpot -.LCPI1_0: - .dword 0x41dfffffffc00000 # double 2147483647 - .text - .globl pickSpot + .globl pickSpot # -- Begin function pickSpot .p2align 5 .type pickSpot,@function pickSpot: # @pickSpot @@ -1693,6 +1688,9 @@ pickSpot: # @pickSpot ori $a1, $a1, 3693 lu12i.w $a2, 3 ori $a2, $a2, 57 + lu12i.w $t2, -1024 + lu52i.d $t2, $t2, 1053 + movgr2fr.d $fa0, $t2 ori $t2, $zero, 1 ori $t3, $zero, 2 .p2align 4, , 16 @@ -1701,8 +1699,6 @@ pickSpot: # @pickSpot mul.d $t4, $t4, $a1 add.d $t4, $t4, $a2 st.w $t4, $a0, 0 - pcalau12i $t5, %pc_hi20(.LCPI1_0) - fld.d $fa0, $t5, %pc_lo12(.LCPI1_0) bstrpick.d $t4, $t4, 30, 0 movgr2fr.w $fa3, $t4 ffint.d.w $fa3, $fa3 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/upin.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/upin.s index 477c7767..91ab3997 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/upin.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/upin.s @@ -1,12 +1,6 @@ .file "upin.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function upin -.LCPI0_0: - .dword 0x40f86a0000000000 # double 1.0E+5 -.LCPI0_1: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl upin + .globl upin # -- Begin function upin .p2align 5 .type upin,@function upin: # @upin @@ -143,8 +137,10 @@ upin: # @upin st.d $t5, $sp, 16 # 8-byte Folded Spill blez $t8, .LBB0_57 # %bb.16: # %.lr.ph274 - pcalau12i $a3, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a3, %pc_lo12(.LCPI0_0) + ori $a3, $zero, 0 + lu32i.d $a3, -497152 + lu52i.d $a3, $a3, 1039 + movgr2fr.d $fa1, $a3 fdiv.d $fa0, $fa0, $fa1 vldi $vr1, -988 fmul.d $fa1, $fa0, $fa1 @@ -476,12 +472,13 @@ upin: # @upin lu12i.w $a2, 3 ori $a2, $a2, 57 add.d $a1, $a1, $a2 - pcalau12i $a2, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a2, %pc_lo12(.LCPI0_1) bstrpick.d $a2, $a1, 30, 0 - movgr2fr.w $fa2, $a2 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a2 + ffint.d.w $fa1, $fa1 + lu12i.w $a2, -1024 + lu52i.d $a2, $a2, 1053 + movgr2fr.d $fa2, $a2 + fdiv.d $fa1, $fa1, $fa2 fcmp.cule.d $fcc0, $fa0, $fa1 st.w $a1, $a0, 0 bcnez $fcc0, .LBB0_73 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/upinswap.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/upinswap.s index 27349caf..98d3aa72 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/upinswap.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/upinswap.s @@ -1,10 +1,6 @@ .file "upinswap.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function upinswap -.LCPI0_0: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl upinswap + .globl upinswap # -- Begin function upinswap .p2align 5 .type upinswap,@function upinswap: # @upinswap @@ -332,12 +328,13 @@ upinswap: # @upinswap lu12i.w $a2, 3 ori $a2, $a2, 57 add.d $a1, $a1, $a2 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI0_0) bstrpick.d $a2, $a1, 30, 0 - movgr2fr.w $fa2, $a2 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a2 + ffint.d.w $fa1, $fa1 + lu12i.w $a2, -1024 + lu52i.d $a2, $a2, 1053 + movgr2fr.d $fa2, $a2 + fdiv.d $fa1, $fa1, $fa2 fcmp.cule.d $fcc0, $fa0, $fa1 st.w $a1, $a0, 0 bcnez $fcc0, .LBB0_49 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usite0.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usite0.s index a8643040..02203408 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usite0.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usite0.s @@ -1,10 +1,6 @@ .file "usite0.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function usite0 -.LCPI0_0: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl usite0 + .globl usite0 # -- Begin function usite0 .p2align 5 .type usite0,@function usite0: # @usite0 @@ -139,12 +135,13 @@ usite0: # @usite0 lu12i.w $a2, 3 ori $a2, $a2, 57 add.d $a1, $a1, $a2 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI0_0) bstrpick.d $a2, $a1, 30, 0 - movgr2fr.w $fa2, $a2 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a2 + ffint.d.w $fa1, $fa1 + lu12i.w $a2, -1024 + lu52i.d $a2, $a2, 1053 + movgr2fr.d $fa2, $a2 + fdiv.d $fa1, $fa1, $fa2 fcmp.cule.d $fcc0, $fa0, $fa1 st.w $a1, $a0, 0 bcnez $fcc0, .LBB0_7 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usite1.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usite1.s index 9f475ebb..007e4285 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usite1.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usite1.s @@ -1,10 +1,6 @@ .file "usite1.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function usite1 -.LCPI0_0: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl usite1 + .globl usite1 # -- Begin function usite1 .p2align 5 .type usite1,@function usite1: # @usite1 @@ -151,12 +147,13 @@ usite1: # @usite1 lu12i.w $a2, 3 ori $a2, $a2, 57 add.d $a1, $a1, $a2 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI0_0) bstrpick.d $a2, $a1, 30, 0 - movgr2fr.w $fa2, $a2 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a2 + ffint.d.w $fa1, $fa1 + lu12i.w $a2, -1024 + lu52i.d $a2, $a2, 1053 + movgr2fr.d $fa2, $a2 + fdiv.d $fa1, $fa1, $fa2 fcmp.cule.d $fcc0, $fa0, $fa1 st.w $a1, $a0, 0 bcnez $fcc0, .LBB0_12 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usite2.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usite2.s index 131d2d92..81aaafa1 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usite2.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usite2.s @@ -1,10 +1,6 @@ .file "usite2.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function usite2 -.LCPI0_0: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl usite2 + .globl usite2 # -- Begin function usite2 .p2align 5 .type usite2,@function usite2: # @usite2 @@ -237,12 +233,13 @@ usite2: # @usite2 lu12i.w $a2, 3 ori $a2, $a2, 57 add.d $a1, $a1, $a2 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI0_0) bstrpick.d $a2, $a1, 30, 0 - movgr2fr.w $fa2, $a2 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a2 + ffint.d.w $fa1, $fa1 + lu12i.w $a2, -1024 + lu52i.d $a2, $a2, 1053 + movgr2fr.d $fa2, $a2 + fdiv.d $fa1, $fa1, $fa2 fcmp.cule.d $fcc0, $fa0, $fa1 st.w $a1, $a0, 0 bcnez $fcc0, .LBB0_11 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usiteo1.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usiteo1.s index afa5f229..8ec81fac 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usiteo1.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usiteo1.s @@ -1,10 +1,6 @@ .file "usiteo1.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function usiteo1 -.LCPI0_0: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl usiteo1 + .globl usiteo1 # -- Begin function usiteo1 .p2align 5 .type usiteo1,@function usiteo1: # @usiteo1 @@ -142,12 +138,13 @@ usiteo1: # @usiteo1 lu12i.w $a2, 3 ori $a2, $a2, 57 add.d $a1, $a1, $a2 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI0_0) bstrpick.d $a2, $a1, 30, 0 - movgr2fr.w $fa2, $a2 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a2 + ffint.d.w $fa1, $fa1 + lu12i.w $a2, -1024 + lu52i.d $a2, $a2, 1053 + movgr2fr.d $fa2, $a2 + fdiv.d $fa1, $fa1, $fa2 fcmp.cule.d $fcc0, $fa0, $fa1 st.w $a1, $a0, 0 bcnez $fcc0, .LBB0_7 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usiteo2.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usiteo2.s index 29c8c864..69a863bf 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usiteo2.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/usiteo2.s @@ -1,10 +1,6 @@ .file "usiteo2.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function usiteo2 -.LCPI0_0: - .dword 0x41dfffffffc00000 # double 2147483647 .text - .globl usiteo2 + .globl usiteo2 # -- Begin function usiteo2 .p2align 5 .type usiteo2,@function usiteo2: # @usiteo2 @@ -237,12 +233,13 @@ usiteo2: # @usiteo2 lu12i.w $a2, 3 ori $a2, $a2, 57 add.d $a1, $a1, $a2 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI0_0) bstrpick.d $a2, $a1, 30, 0 - movgr2fr.w $fa2, $a2 - ffint.d.w $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a2 + ffint.d.w $fa1, $fa1 + lu12i.w $a2, -1024 + lu52i.d $a2, $a2, 1053 + movgr2fr.d $fa2, $a2 + fdiv.d $fa1, $fa1, $fa2 fcmp.cule.d $fcc0, $fa0, $fa1 st.w $a1, $a0, 0 bcnez $fcc0, .LBB0_11 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/utemp.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/utemp.s index 475be7e9..e37576c4 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/utemp.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/utemp.s @@ -1,26 +1,10 @@ .file "utemp.c" - .section .rodata.cst8,"aM",@progbits,8 + .section .rodata.cst16,"aM",@progbits,16 .p2align 3, 0x0 # -- Begin function utemp .LCPI0_0: - .dword 0x40f86a0000000000 # double 1.0E+5 -.LCPI0_1: - .dword 0x40a7700000000000 # double 3000 -.LCPI0_2: - .dword 0x4069000000000000 # double 200 -.LCPI0_4: - .dword 0x3fec28f5c28f5c29 # double 0.88 -.LCPI0_5: - .dword 0x3fee8f5c28f5c28f # double 0.95499999999999996 -.LCPI0_7: - .dword 0x3fe999999999999a # double 0.80000000000000004 -.LCPI0_8: - .dword 0x3fc999999999999a # double 0.20000000000000001 - .section .rodata.cst16,"aM",@progbits,16 - .p2align 3, 0x0 -.LCPI0_3: .dword 0x3fea3d70a3d70a3d # double 0.81999999999999995 .dword 0x3fe6666666666666 # double 0.69999999999999996 -.LCPI0_6: +.LCPI0_1: .dword 0x3fe999999999999a # double 0.80000000000000004 .dword 0x3feb333333333333 # double 0.84999999999999998 .text @@ -52,19 +36,25 @@ utemp: # @utemp pcalau12i $a0, %got_pc_hi20(Tsave) ld.d $a0, $a0, %got_pc_lo12(Tsave) fld.d $fa0, $a0, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -497152 + lu52i.d $a1, $a1, 1039 + movgr2fr.d $fa1, $a1 fdiv.d $fa0, $fa0, $fa1 pcalau12i $s8, %pc_hi20(count) st.w $zero, $s8, %pc_lo12(count) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_1) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_2) - vldi $vr3, -988 - fmul.d $fs0, $fa0, $fa3 + vldi $vr1, -988 + fmul.d $fs0, $fa0, $fa1 + ori $a1, $zero, 0 + lu32i.d $a1, 487424 + lu52i.d $a1, $a1, 1034 + movgr2fr.d $fa1, $a1 fmul.d $fs1, $fa0, $fa1 - fmul.d $fa1, $fa0, $fa2 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa1, $a0 + fmul.d $fa1, $fa0, $fa1 fst.d $fa1, $sp, 32 # 8-byte Folded Spill vldi $vr1, -972 fmul.d $fa1, $fa0, $fa1 @@ -77,12 +67,18 @@ utemp: # @utemp ld.w $a0, $a0, 0 pcalau12i $a1, %got_pc_hi20(T) ld.d $s2, $a1, %got_pc_lo12(T) - pcalau12i $a1, %pc_hi20(.LCPI0_4) - fld.d $fs2, $a1, %pc_lo12(.LCPI0_4) - pcalau12i $a1, %pc_hi20(.LCPI0_5) - fld.d $fs6, $a1, %pc_lo12(.LCPI0_5) - pcalau12i $a1, %pc_hi20(.LCPI0_6) - addi.d $a1, $a1, %pc_lo12(.LCPI0_6) + lu12i.w $a1, -251659 + ori $a1, $a1, 3113 + lu32i.d $a1, -251659 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fs2, $a1 + lu12i.w $a1, 167772 + ori $a1, $a1, 655 + lu32i.d $a1, -94372 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fs6, $a1 + pcalau12i $a1, %pc_hi20(.LCPI0_1) + addi.d $a1, $a1, %pc_lo12(.LCPI0_1) st.d $a1, $sp, 16 # 8-byte Folded Spill pcalau12i $a1, %got_pc_hi20(fpo) ld.d $s7, $a1, %got_pc_lo12(fpo) @@ -130,12 +126,15 @@ utemp: # @utemp ld.d $s5, $a1, %got_pc_lo12(stdout) pcalau12i $a1, %got_pc_hi20(doCompaction) ld.d $s4, $a1, %got_pc_lo12(doCompaction) - pcalau12i $a1, %pc_hi20(.LCPI0_7) - fld.d $fs7, $a1, %pc_lo12(.LCPI0_7) - pcalau12i $a1, %pc_hi20(.LCPI0_8) - fld.d $fs3, $a1, %pc_lo12(.LCPI0_8) - pcalau12i $a1, %pc_hi20(.LCPI0_3) - addi.d $a1, $a1, %pc_lo12(.LCPI0_3) + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a2, $a1, 1022 + movgr2fr.d $fs7, $a2 + lu52i.d $a1, $a1, 1020 + movgr2fr.d $fs3, $a1 + pcalau12i $a1, %pc_hi20(.LCPI0_0) + addi.d $a1, $a1, %pc_lo12(.LCPI0_0) st.d $a1, $sp, 40 # 8-byte Folded Spill .p2align 4, , 16 .LBB0_1: # =>This Inner Loop Header: Depth=1 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s index 83ba135b..43714ce3 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/wireratio.s @@ -1,12 +1,6 @@ .file "wireratio.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function wireratio -.LCPI0_0: - .dword 0x3f847ae147ae147b # double 0.01 -.LCPI0_1: - .dword 0x400a666666666666 # double 3.2999999999999998 .text - .globl wireratio + .globl wireratio # -- Begin function wireratio .p2align 5 .type wireratio,@function wireratio: # @wireratio @@ -91,8 +85,8 @@ wireratio: # @wireratio lu12i.w $a0, 293601 ori $a0, $a0, 1147 lu32i.d $a0, 293601 - lu52i.d $a0, $a0, 1016 - st.d $a0, $s1, %pc_lo12(a) + lu52i.d $s2, $a0, 1016 + st.d $s2, $s1, %pc_lo12(a) addi.d $a0, $sp, 24 addi.d $a1, $sp, 16 pcaddu18i $ra, %call36(findratio) @@ -101,20 +95,22 @@ wireratio: # @wireratio fld.d $fa1, $sp, 24 vldi $vr2, -892 fadd.d $fa2, $fs1, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_0) - vldi $vr4, -1020 - fadd.d $fa4, $fa2, $fa4 - vldi $vr5, -908 - fmadd.d $fa4, $fa4, $fa5, $fa3 - movgr2fr.d $fa5, $zero - fcmp.clt.d $fcc0, $fa5, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) - fsel $fa3, $fa3, $fa4, $fcc0 - fst.d $fa3, $s1, %pc_lo12(a) + vldi $vr3, -1020 + fadd.d $fa3, $fa2, $fa3 + vldi $vr4, -908 + movgr2fr.d $fa5, $s2 + fmadd.d $fa3, $fa3, $fa4, $fa5 + movgr2fr.d $fa4, $zero + fcmp.clt.d $fcc0, $fa4, $fa2 + fsel $fa2, $fa5, $fa3, $fcc0 + fst.d $fa2, $s1, %pc_lo12(a) fsub.d $fs3, $fa0, $fa1 - fsub.d $fa1, $fa2, $fs0 + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, -367002 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa0, $a0 + fsub.d $fa1, $fa0, $fs0 vldi $vr0, -988 pcaddu18i $ra, %call36(pow) jirl $ra, $ra, 0 @@ -252,44 +248,12 @@ probtree: # @probtree .Lfunc_end1: .size probtree, .Lfunc_end1-probtree # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function findratio -.LCPI2_0: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI2_1: - .dword 0x4059000000000000 # double 100 -.LCPI2_2: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI2_3: - .dword 0xbf847ae147ae147b # double -0.01 -.LCPI2_4: - .dword 0x4040000000000000 # double 32 -.LCPI2_5: - .dword 0xc042000000000000 # double -36 -.LCPI2_6: - .dword 0x3fd999999999999a # double 0.40000000000000002 -.LCPI2_7: - .dword 0x4044000000000000 # double 40 -.LCPI2_8: - .dword 0xc04e000000000000 # double -60 -.LCPI2_9: - .dword 0x404e000000000000 # double 60 -.LCPI2_10: - .dword 0xc056800000000000 # double -90 -.LCPI2_11: - .dword 0x3fe5555555555555 # double 0.66666666666666663 -.LCPI2_12: - .dword 0x3fc5555555555555 # double 0.16666666666666666 -.LCPI2_13: - .dword 0x3ff5555555555555 # double 1.3333333333333333 -.LCPI2_14: - .dword 0xbfe5555555555555 # double -0.66666666666666663 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI2_15: + .p2align 4, 0x0 # -- Begin function findratio +.LCPI2_0: .dword 0xc000000000000000 # double -2 .dword 0xc010000000000000 # double -4 -.LCPI2_16: +.LCPI2_1: .dword 0xc010000000000000 # double -4 .dword 0x4000000000000000 # double 2 .text @@ -298,21 +262,22 @@ probtree: # @probtree .type findratio,@function findratio: # @findratio # %bb.0: - addi.d $sp, $sp, -1696 - st.d $ra, $sp, 1688 # 8-byte Folded Spill - st.d $fp, $sp, 1680 # 8-byte Folded Spill - st.d $s0, $sp, 1672 # 8-byte Folded Spill - st.d $s1, $sp, 1664 # 8-byte Folded Spill - st.d $s2, $sp, 1656 # 8-byte Folded Spill - st.d $s3, $sp, 1648 # 8-byte Folded Spill - fst.d $fs0, $sp, 1640 # 8-byte Folded Spill - fst.d $fs1, $sp, 1632 # 8-byte Folded Spill - fst.d $fs2, $sp, 1624 # 8-byte Folded Spill - fst.d $fs3, $sp, 1616 # 8-byte Folded Spill - fst.d $fs4, $sp, 1608 # 8-byte Folded Spill - fst.d $fs5, $sp, 1600 # 8-byte Folded Spill - fst.d $fs6, $sp, 1592 # 8-byte Folded Spill - fst.d $fs7, $sp, 1584 # 8-byte Folded Spill + addi.d $sp, $sp, -1712 + st.d $ra, $sp, 1704 # 8-byte Folded Spill + st.d $fp, $sp, 1696 # 8-byte Folded Spill + st.d $s0, $sp, 1688 # 8-byte Folded Spill + st.d $s1, $sp, 1680 # 8-byte Folded Spill + st.d $s2, $sp, 1672 # 8-byte Folded Spill + st.d $s3, $sp, 1664 # 8-byte Folded Spill + st.d $s4, $sp, 1656 # 8-byte Folded Spill + fst.d $fs0, $sp, 1648 # 8-byte Folded Spill + fst.d $fs1, $sp, 1640 # 8-byte Folded Spill + fst.d $fs2, $sp, 1632 # 8-byte Folded Spill + fst.d $fs3, $sp, 1624 # 8-byte Folded Spill + fst.d $fs4, $sp, 1616 # 8-byte Folded Spill + fst.d $fs5, $sp, 1608 # 8-byte Folded Spill + fst.d $fs6, $sp, 1600 # 8-byte Folded Spill + fst.d $fs7, $sp, 1592 # 8-byte Folded Spill pcalau12i $s0, %pc_hi20(N) fld.d $fs0, $s0, %pc_lo12(N) fsqrt.d $fa0, $fs0 @@ -320,50 +285,57 @@ findratio: # @findratio move $fp, $a1 bceqz $fcc0, .LBB2_40 .LBB2_1: # %.split588 - move $a3, $zero + move $a4, $zero pcalau12i $s1, %pc_hi20(rootN) fst.d $fa0, $s1, %pc_lo12(rootN) vldi $vr1, -784 fadd.d $ft13, $fa0, $fa1 vldi $vr2, -1000 - fmul.d $fa4, $fs0, $fa2 + fmul.d $fa3, $fs0, $fa2 vldi $vr2, -1024 - vldi $vr3, -864 - fmadd.d $fa6, $fa0, $fa3, $fa2 + vldi $vr4, -864 + fmadd.d $fa5, $fa0, $fa4, $fa2 vldi $vr2, -856 fmul.d $fa2, $fa0, $fa2 - vldi $vr3, -984 - fmadd.d $fa2, $fs0, $fa3, $fa2 - fadd.d $fa7, $fa2, $fa1 + vldi $vr4, -984 + fmadd.d $fa2, $fs0, $fa4, $fa2 + fadd.d $fa6, $fa2, $fa1 vldi $vr1, -880 fmul.d $fa1, $fa0, $fa1 - fmadd.d $fa1, $fs0, $fa3, $fa1 + fmadd.d $fa1, $fs0, $fa4, $fa1 vldi $vr2, -896 - fadd.d $ft0, $fa1, $fa2 + fadd.d $fa7, $fa1, $fa2 pcalau12i $a1, %pc_hi20(CC) fld.d $fa2, $a1, %pc_lo12(CC) - pcalau12i $a4, %pc_hi20(.LCPI2_0) - fld.d $fa5, $a4, %pc_lo12(.LCPI2_0) - pcalau12i $a1, %pc_hi20(.LCPI2_2) - fld.d $ft1, $a1, %pc_lo12(.LCPI2_2) - pcalau12i $a1, %pc_hi20(.LCPI2_1) - fld.d $fa3, $a1, %pc_lo12(.LCPI2_1) + ori $a2, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fa4, $a1 + lu12i.w $a1, -419431 + ori $a1, $a1, 2458 + lu32i.d $a1, -419431 + lu52i.d $a3, $a1, 1019 + movgr2fr.d $ft0, $a3 + lu32i.d $a2, -458752 + lu52i.d $a2, $a2, 1029 + movgr2fr.d $ft1, $a2 movgr2fr.d $fa1, $zero ori $a6, $zero, 100 - lu12i.w $a1, 244 - ori $a1, $a1, 575 - # implicit-def: $r6 + lu12i.w $a2, 244 + ori $a2, $a2, 575 + # implicit-def: $r7 .LBB2_2: # =>This Loop Header: Depth=1 # Child Loop BB2_4 Depth 2 # Child Loop BB2_10 Depth 2 - bstrpick.d $a5, $a3, 31, 0 + bstrpick.d $a5, $a4, 31, 0 movgr2fr.d $ft2, $a5 ffint.d.l $ft2, $ft2 - beqz $a3, .LBB2_8 + beqz $a4, .LBB2_8 # %bb.3: # %.split.us.preheader # in Loop: Header=BB2_2 Depth=1 move $a5, $zero - addi.w $a7, $a2, 0 + addi.w $a7, $a3, 0 .p2align 4, , 16 .LBB2_4: # %.split.us # Parent Loop BB2_2 Depth=1 @@ -371,17 +343,17 @@ findratio: # @findratio bstrpick.d $t0, $a5, 31, 0 movgr2fr.d $ft3, $t0 ffint.d.l $ft3, $ft3 - fdiv.d $ft3, $ft3, $fa3 + fdiv.d $ft3, $ft3, $ft1 fadd.d $ft12, $ft3, $ft2 fcmp.clt.d $fcc0, $ft13, $ft12 bcnez $fcc0, .LBB2_15 # %bb.5: # in Loop: Header=BB2_4 Depth=2 - fdiv.d $ft3, $ft12, $fa4 + fdiv.d $ft3, $ft12, $fa3 fmul.d $ft4, $ft12, $ft12 - fmul.d $ft5, $fa6, $ft4 + fmul.d $ft5, $fa5, $ft4 fmadd.d $ft4, $ft4, $ft12, $ft5 - fmadd.d $ft4, $fa7, $ft12, $ft4 - fadd.d $ft4, $ft0, $ft4 + fmadd.d $ft4, $fa6, $ft12, $ft4 + fadd.d $ft4, $fa7, $ft4 fmul.d $ft3, $ft3, $ft4 fcmp.cule.d $fcc0, $ft3, $fa2 movcf2gr $t0, $fcc0 @@ -389,79 +361,79 @@ findratio: # @findratio ori $t0, $t0, 1 bne $t0, $a7, .LBB2_18 # %bb.6: # in Loop: Header=BB2_4 Depth=2 - fsub.d $fa5, $ft3, $fa2 - fneg.d $ft3, $fa5 - fcmp.cult.d $fcc0, $fa5, $fa1 + fsub.d $fa4, $ft3, $fa2 + fneg.d $ft3, $fa4 + fcmp.cult.d $fcc0, $fa4, $fa1 addi.w $a5, $a5, 1 - fsel $fa5, $fa5, $ft3, $fcc0 + fsel $fa4, $fa4, $ft3, $fcc0 bne $a5, $a6, .LBB2_4 # %bb.7: # %.loopexit228 # in Loop: Header=BB2_2 Depth=1 - move $a5, $a3 - addi.w $a3, $a3, 1 - blt $a5, $a1, .LBB2_2 + move $a5, $a4 + addi.w $a4, $a4, 1 + blt $a5, $a2, .LBB2_2 b .LBB2_14 .p2align 4, , 16 .LBB2_8: # %.split.peel # in Loop: Header=BB2_2 Depth=1 - fadd.d $ft12, $ft2, $ft1 + fadd.d $ft12, $ft2, $ft0 fcmp.clt.d $fcc0, $ft13, $ft12 ori $a7, $zero, 1 bcnez $fcc0, .LBB2_39 # %bb.9: # %.split.peel.next # in Loop: Header=BB2_2 Depth=1 - fdiv.d $ft3, $ft12, $fa4 + fdiv.d $ft3, $ft12, $fa3 fmul.d $ft4, $ft12, $ft12 - fmul.d $ft5, $fa6, $ft4 + fmul.d $ft5, $fa5, $ft4 fmadd.d $ft4, $ft4, $ft12, $ft5 - fmadd.d $ft4, $fa7, $ft12, $ft4 - fadd.d $ft4, $ft0, $ft4 + fmadd.d $ft4, $fa6, $ft12, $ft4 + fadd.d $ft4, $fa7, $ft4 fmul.d $ft3, $ft3, $ft4 fcmp.cule.d $fcc0, $ft3, $fa2 - movcf2gr $a2, $fcc0 - sub.d $a2, $zero, $a2 - ori $t0, $a2, 1 + movcf2gr $a3, $fcc0 + sub.d $a3, $zero, $a3 + ori $t0, $a3, 1 ori $a5, $zero, 11 .p2align 4, , 16 .LBB2_10: # %.split # Parent Loop BB2_2 Depth=1 # => This Inner Loop Header: Depth=2 - bstrpick.d $a2, $a5, 31, 0 - movgr2fr.d $ft3, $a2 + bstrpick.d $a3, $a5, 31, 0 + movgr2fr.d $ft3, $a3 ffint.d.l $ft3, $ft3 - fdiv.d $ft3, $ft3, $fa3 + fdiv.d $ft3, $ft3, $ft1 fadd.d $ft12, $ft3, $ft2 fcmp.clt.d $fcc0, $ft13, $ft12 bcnez $fcc0, .LBB2_16 # %bb.11: # in Loop: Header=BB2_10 Depth=2 - fdiv.d $ft3, $ft12, $fa4 + fdiv.d $ft3, $ft12, $fa3 fmul.d $ft4, $ft12, $ft12 - fmul.d $ft5, $fa6, $ft4 + fmul.d $ft5, $fa5, $ft4 fmadd.d $ft4, $ft4, $ft12, $ft5 - fmadd.d $ft4, $fa7, $ft12, $ft4 - fadd.d $ft4, $ft0, $ft4 + fmadd.d $ft4, $fa6, $ft12, $ft4 + fadd.d $ft4, $fa7, $ft4 fmul.d $ft3, $ft3, $ft4 fcmp.cule.d $fcc0, $ft3, $fa2 - movcf2gr $a2, $fcc0 - sub.d $a2, $zero, $a2 - ori $a2, $a2, 1 - addi.w $a3, $t0, 0 - bne $a2, $a3, .LBB2_17 + movcf2gr $a3, $fcc0 + sub.d $a3, $zero, $a3 + ori $a3, $a3, 1 + addi.w $a4, $t0, 0 + bne $a3, $a4, .LBB2_17 # %bb.12: # in Loop: Header=BB2_10 Depth=2 - fsub.d $fa5, $ft3, $fa2 - fneg.d $ft3, $fa5 - fcmp.cult.d $fcc0, $fa5, $fa1 + fsub.d $fa4, $ft3, $fa2 + fneg.d $ft3, $fa4 + fcmp.cult.d $fcc0, $fa4, $fa1 addi.w $a5, $a5, 1 - fsel $fa5, $fa5, $ft3, $fcc0 - move $t0, $a2 + fsel $fa4, $fa4, $ft3, $fcc0 + move $t0, $a3 bne $a5, $a6, .LBB2_10 # %bb.13: # in Loop: Header=BB2_2 Depth=1 - move $a3, $zero - move $a5, $a3 - addi.w $a3, $a3, 1 - blt $a5, $a1, .LBB2_2 + move $a4, $zero + move $a5, $a4 + addi.w $a4, $a4, 1 + blt $a5, $a2, .LBB2_2 .LBB2_14: - move $a3, $zero + move $a4, $zero move $a7, $zero # implicit-def: $r9 bnez $a7, .LBB2_19 @@ -471,197 +443,217 @@ findratio: # @findratio bnez $a7, .LBB2_19 b .LBB2_32 .LBB2_16: - move $a3, $zero - move $a2, $t0 + move $a4, $zero + move $a3, $t0 bnez $a7, .LBB2_19 b .LBB2_32 .LBB2_17: - move $a2, $t0 + move $a3, $t0 .LBB2_18: # %.split255.us - pcalau12i $a3, %pc_hi20(.LCPI2_3) - fld.d $fa6, $a3, %pc_lo12(.LCPI2_3) - move $a3, $zero + move $a4, $zero move $a7, $zero - fsub.d $fa7, $ft3, $fa2 - fabs.d $fa7, $fa7 + fsub.d $fa5, $ft3, $fa2 + fabs.d $fa5, $fa5 + lu12i.w $a5, 293601 + ori $a5, $a5, 1147 + lu32i.d $a5, 293601 + lu52i.d $a5, $a5, -1032 + movgr2fr.d $fa6, $a5 fadd.d $fa6, $ft12, $fa6 - fcmp.cult.d $fcc0, $fa7, $fa5 + fcmp.cult.d $fcc0, $fa5, $fa4 fsel $ft12, $fa6, $ft12, $fcc0 # implicit-def: $r9 beqz $a7, .LBB2_32 .LBB2_19: # %.loopexit368 - blt $a1, $a3, .LBB2_32 + blt $a2, $a4, .LBB2_32 # %bb.20: # %.lr.ph291 - frecip.d $fa4, $fa4 - vldi $vr5, -896 - vldi $vr6, -992 - fmadd.d $fa5, $fa0, $fa6, $fa5 - vldi $vr6, -840 - fmul.d $fa7, $fs0, $fa6 - vldi $vr6, -984 - pcalau12i $a6, %pc_hi20(.LCPI2_4) - fld.d $ft0, $a6, %pc_lo12(.LCPI2_4) - fmadd.d $fa6, $fa0, $fa6, $fa7 - vldi $vr9, -912 - fadd.d $fa6, $fa6, $ft1 - fmul.d $ft5, $fs0, $ft0 - fmadd.d $fa7, $ft5, $fa0, $fa7 - vldi $vr12, -880 - fmadd.d $ft0, $fa0, $ft4, $fa7 - vldi $vr7, -1024 - fadd.d $ft0, $ft0, $fa7 - fmul.d $ft1, $ft13, $ft13 + frecip.d $fa3, $fa3 + vldi $vr4, -896 + vldi $vr5, -992 + fmadd.d $fa4, $fa0, $fa5, $fa4 + vldi $vr5, -840 + fmul.d $fa6, $fs0, $fa5 + vldi $vr5, -984 + fmadd.d $fa5, $fa0, $fa5, $fa6 + vldi $vr7, -912 + fadd.d $fa5, $fa5, $fa7 + lu52i.d $a6, $zero, 1028 + movgr2fr.d $fa7, $a6 + fmul.d $ft4, $fs0, $fa7 + fmadd.d $fa6, $ft4, $fa0, $fa6 + vldi $vr11, -880 + fmadd.d $fa7, $fa0, $ft3, $fa6 + vldi $vr6, -1024 + fadd.d $fa7, $fa7, $fa6 + fmul.d $ft0, $ft13, $ft13 + fmul.d $ft1, $ft13, $ft0 fmul.d $ft2, $ft13, $ft1 - pcalau12i $a6, %pc_hi20(.LCPI2_5) - fld.d $ft6, $a6, %pc_lo12(.LCPI2_5) - fmul.d $ft3, $ft13, $ft2 - vldi $vr15, -976 - fnmadd.d $ft4, $fa0, $ft7, $ft4 - fmul.d $ft7, $fs0, $ft6 - vldi $vr14, -968 - fmadd.d $ft6, $fa0, $ft6, $ft7 - fadd.d $ft6, $ft6, $fa7 - fneg.d $ft6, $ft6 - fmadd.d $ft5, $ft5, $fa0, $ft7 - vldi $vr15, -1008 - fadd.d $ft7, $ft5, $ft7 - fld.d $ft5, $a4, %pc_lo12(.LCPI2_0) - fneg.d $ft7, $ft7 - ori $a4, $zero, 99 - addi.w $a6, $a5, 0 - move $t0, $a3 + vldi $vr13, -976 + fnmadd.d $ft3, $fa0, $ft5, $ft3 + ori $a7, $zero, 0 + ori $a6, $zero, 0 + lu32i.d $a6, 131072 + lu52i.d $a6, $a6, -1020 + movgr2fr.d $ft5, $a6 + fmul.d $ft6, $fs0, $ft5 + vldi $vr13, -968 + fmadd.d $ft5, $fa0, $ft5, $ft6 + fadd.d $ft5, $ft5, $fa6 + fneg.d $ft5, $ft5 + fmadd.d $ft4, $ft4, $fa0, $ft6 + vldi $vr14, -1008 + fadd.d $ft4, $ft4, $ft6 + fneg.d $ft6, $ft4 + ori $a6, $zero, 0 + lu32i.d $a6, -97152 + lu52i.d $a6, $a6, 1042 + movgr2fr.d $ft4, $a6 + ori $a6, $zero, 99 + lu32i.d $a7, -458752 + lu52i.d $a7, $a7, 1029 + movgr2fr.d $ft7, $a7 + addi.w $a7, $a5, 0 + move $t1, $a4 b .LBB2_23 .LBB2_21: # in Loop: Header=BB2_23 Depth=1 - move $a2, $t0 - move $t0, $a3 + move $a3, $t1 + move $t1, $a4 .LBB2_22: # %.loopexit # in Loop: Header=BB2_23 Depth=1 - move $a7, $t0 - addi.w $t0, $t0, 1 - bge $a7, $a1, .LBB2_32 + move $t0, $t1 + addi.w $t1, $t1, 1 + bge $t0, $a2, .LBB2_32 .LBB2_23: # =>This Loop Header: Depth=1 # Child Loop BB2_25 Depth 2 # Child Loop BB2_28 Depth 2 - slt $a7, $a3, $t0 - masknez $a7, $a5, $a7 - addi.w $a7, $a7, 0 - blt $a4, $a7, .LBB2_22 + slt $t0, $a4, $t1 + masknez $t0, $a5, $t0 + addi.w $t0, $t0, 0 + blt $a6, $t0, .LBB2_22 # %bb.24: # %.lr.ph # in Loop: Header=BB2_23 Depth=1 - bstrpick.d $a5, $t0, 31, 0 + bstrpick.d $a5, $t1, 31, 0 movgr2fr.d $ft8, $a5 ffint.d.l $ft8, $ft8 - beq $t0, $a3, .LBB2_28 + beq $t1, $a4, .LBB2_28 .p2align 4, , 16 .LBB2_25: # %.lr.ph.split.us # Parent Loop BB2_23 Depth=1 # => This Inner Loop Header: Depth=2 - movgr2fr.w $ft9, $a7 + movgr2fr.w $ft9, $t0 ffint.d.w $ft9, $ft9 - fdiv.d $ft9, $ft9, $fa3 + fdiv.d $ft9, $ft9, $ft7 fadd.d $ft12, $ft9, $ft8 fmul.d $ft9, $ft12, $ft12 fmul.d $ft10, $ft12, $ft9 - fmul.d $ft11, $fa5, $ft10 + fmul.d $ft11, $fa4, $ft10 fneg.d $ft10, $ft10 fmadd.d $ft10, $ft10, $ft12, $ft11 - fmadd.d $ft9, $fa6, $ft9, $ft10 - fmadd.d $ft9, $ft0, $ft12, $ft9 - fmadd.d $ft9, $ft3, $fa7, $ft9 - fmadd.d $ft9, $ft4, $ft2, $ft9 - fmadd.d $ft9, $ft6, $ft1, $ft9 - fmadd.d $ft9, $ft7, $ft13, $ft9 - fmul.d $ft9, $fa4, $ft9 + fmadd.d $ft9, $fa5, $ft9, $ft10 + fmadd.d $ft9, $fa7, $ft12, $ft9 + fmadd.d $ft9, $ft2, $fa6, $ft9 + fmadd.d $ft9, $ft3, $ft1, $ft9 + fmadd.d $ft9, $ft5, $ft0, $ft9 + fmadd.d $ft9, $ft6, $ft13, $ft9 + fmul.d $ft9, $fa3, $ft9 fcmp.cule.d $fcc0, $ft9, $fa2 movcf2gr $a5, $fcc0 sub.d $a5, $zero, $a5 ori $a5, $a5, 1 - addi.w $t1, $a2, 0 - bne $a5, $t1, .LBB2_31 + addi.w $t2, $a3, 0 + bne $a5, $t2, .LBB2_31 # %bb.26: # %select.unfold.us # in Loop: Header=BB2_25 Depth=2 - fsub.d $ft5, $ft9, $fa2 - fneg.d $ft9, $ft5 - fcmp.cult.d $fcc0, $ft5, $fa1 - addi.w $a7, $a7, 1 + fsub.d $ft4, $ft9, $fa2 + fneg.d $ft9, $ft4 + fcmp.cult.d $fcc0, $ft4, $fa1 + addi.w $t0, $t0, 1 ori $a5, $zero, 100 - fsel $ft5, $ft5, $ft9, $fcc0 - bne $a7, $a5, .LBB2_25 + fsel $ft4, $ft4, $ft9, $fcc0 + bne $t0, $a5, .LBB2_25 b .LBB2_22 .p2align 4, , 16 .LBB2_27: # %select.unfold # in Loop: Header=BB2_28 Depth=2 - addi.w $a7, $a7, 1 + addi.w $t0, $t0, 1 ori $a5, $zero, 100 - move $a2, $t0 - beq $a7, $a5, .LBB2_21 + move $a3, $t1 + beq $t0, $a5, .LBB2_21 .LBB2_28: # %.lr.ph.split # Parent Loop BB2_23 Depth=1 # => This Inner Loop Header: Depth=2 - movgr2fr.w $ft9, $a7 + movgr2fr.w $ft9, $t0 ffint.d.w $ft9, $ft9 - fdiv.d $ft9, $ft9, $fa3 + fdiv.d $ft9, $ft9, $ft7 fadd.d $ft12, $ft9, $ft8 fmul.d $ft9, $ft12, $ft12 fmul.d $ft10, $ft12, $ft9 - fmul.d $ft11, $fa5, $ft10 + fmul.d $ft11, $fa4, $ft10 fneg.d $ft10, $ft10 fmadd.d $ft10, $ft10, $ft12, $ft11 - fmadd.d $ft9, $fa6, $ft9, $ft10 - fmadd.d $ft9, $ft0, $ft12, $ft9 - fmadd.d $ft9, $ft3, $fa7, $ft9 - fmadd.d $ft9, $ft4, $ft2, $ft9 - fmadd.d $ft9, $ft6, $ft1, $ft9 - fmadd.d $ft9, $ft7, $ft13, $ft9 - fmul.d $ft9, $fa4, $ft9 + fmadd.d $ft9, $fa5, $ft9, $ft10 + fmadd.d $ft9, $fa7, $ft12, $ft9 + fmadd.d $ft9, $ft2, $fa6, $ft9 + fmadd.d $ft9, $ft3, $ft1, $ft9 + fmadd.d $ft9, $ft5, $ft0, $ft9 + fmadd.d $ft9, $ft6, $ft13, $ft9 + fmul.d $ft9, $fa3, $ft9 fcmp.cule.d $fcc0, $ft9, $fa2 movcf2gr $a5, $fcc0 sub.d $a5, $zero, $a5 - ori $t0, $a5, 1 - beq $a6, $a7, .LBB2_27 + ori $t1, $a5, 1 + beq $a7, $t0, .LBB2_27 # %bb.29: # in Loop: Header=BB2_28 Depth=2 - addi.w $a2, $a2, 0 - bne $t0, $a2, .LBB2_31 + addi.w $a3, $a3, 0 + bne $t1, $a3, .LBB2_31 # %bb.30: # in Loop: Header=BB2_28 Depth=2 - fsub.d $ft5, $ft9, $fa2 - fneg.d $ft9, $ft5 - fcmp.cult.d $fcc0, $ft5, $fa1 - fsel $ft5, $ft5, $ft9, $fcc0 + fsub.d $ft4, $ft9, $fa2 + fneg.d $ft9, $ft4 + fcmp.cult.d $fcc0, $ft4, $fa1 + fsel $ft4, $ft4, $ft9, $fcc0 b .LBB2_27 .LBB2_31: # %.loopexit.thread - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa3, $a1, %pc_lo12(.LCPI2_3) - fsub.d $fa4, $ft9, $fa2 - fabs.d $fa4, $fa4 - fadd.d $fa3, $ft12, $fa3 - fcmp.cult.d $fcc0, $fa4, $ft5 - fsel $ft12, $fa3, $ft12, $fcc0 + fsub.d $fa3, $ft9, $fa2 + fabs.d $fa3, $fa3 + lu12i.w $a2, 293601 + ori $a2, $a2, 1147 + lu32i.d $a2, 293601 + lu52i.d $a2, $a2, -1032 + movgr2fr.d $fa4, $a2 + fadd.d $fa4, $ft12, $fa4 + fcmp.cult.d $fcc0, $fa3, $ft4 + fsel $ft12, $fa4, $ft12, $fcc0 .LBB2_32: # %.loopexit227 fmul.d $ft1, $ft12, $ft12 - fmul.d $ft0, $ft12, $ft1 + fmul.d $fa7, $ft12, $ft1 fcmp.cult.d $fcc0, $ft13, $ft12 - fmul.d $fs7, $ft12, $ft0 + fmul.d $fs7, $ft12, $fa7 vst $vr9, $sp, 1488 # 16-byte Folded Spill - fst.d $ft0, $sp, 1528 # 8-byte Folded Spill + fst.d $fa7, $sp, 1528 # 8-byte Folded Spill bceqz $fcc0, .LBB2_34 # %bb.33: fmul.d $fa3, $ft12, $fs7 vldi $vr4, -876 vldi $vr5, -978 fmadd.d $fa4, $fa0, $fa5, $fa4 - pcalau12i $a1, %pc_hi20(.LCPI2_7) - fld.d $fa5, $a1, %pc_lo12(.LCPI2_7) fmul.d $fa4, $fa4, $fs7 - vldi $vr6, -896 - fmadd.d $fa3, $fa3, $fa6, $fa4 - fmul.d $fa4, $fs0, $fa5 - pcalau12i $a1, %pc_hi20(.LCPI2_8) - fld.d $fa5, $a1, %pc_lo12(.LCPI2_8) - vldi $vr6, -962 - fmsub.d $fa7, $fa0, $fa6, $fa4 - fmadd.d $fa3, $fa7, $ft0, $fa3 - fmul.d $fa5, $fs0, $fa5 - fmadd.d $fa7, $fa4, $fa0, $fa5 + vldi $vr5, -896 + fmadd.d $fa3, $fa3, $fa5, $fa4 + ori $a2, $zero, 0 + ori $a3, $zero, 0 + lu32i.d $a3, 262144 + lu52i.d $a3, $a3, 1028 + movgr2fr.d $fa4, $a3 + fmul.d $fa4, $fs0, $fa4 + vldi $vr5, -962 + fmsub.d $fa6, $fa0, $fa5, $fa4 + fmadd.d $fa3, $fa6, $fa7, $fa3 + ori $a3, $zero, 0 + lu32i.d $a3, -131072 + lu52i.d $a4, $a3, -1020 + movgr2fr.d $fa6, $a4 + fmul.d $fa6, $fs0, $fa6 + fmadd.d $fa7, $fa4, $fa0, $fa6 vldi $vr8, -1004 fmadd.d $fa7, $fa0, $ft0, $fa7 fadd.d $fa7, $fa7, $ft0 @@ -678,18 +670,19 @@ findratio: # @findratio fmul.d $ft1, $ft13, $fa7 fmul.d $ft2, $ft13, $ft1 fmul.d $ft3, $ft13, $ft2 - fmadd.d $fa6, $fa0, $fa6, $ft0 + fmadd.d $fa5, $fa0, $fa5, $ft0 fneg.d $ft2, $ft2 - fmul.d $fa6, $fa6, $ft2 + fmul.d $fa5, $fa5, $ft2 vldi $vr10, -1008 - pcalau12i $a1, %pc_hi20(.LCPI2_9) - fld.d $ft4, $a1, %pc_lo12(.LCPI2_9) - pcalau12i $a1, %pc_hi20(.LCPI2_10) - fld.d $ft5, $a1, %pc_lo12(.LCPI2_10) - fmadd.d $fa6, $ft3, $ft2, $fa6 - fnmadd.d $fa5, $fa0, $ft4, $fa5 - fmadd.d $fa5, $fa5, $ft1, $fa6 - fmul.d $fa6, $fs0, $ft5 + fmadd.d $fa5, $ft3, $ft2, $fa5 + lu52i.d $a3, $a3, 1028 + movgr2fr.d $ft3, $a3 + fnmadd.d $fa6, $fa0, $ft3, $fa6 + fmadd.d $fa5, $fa6, $ft1, $fa5 + lu32i.d $a2, 425984 + lu52i.d $a2, $a2, -1019 + movgr2fr.d $fa6, $a2 + fmul.d $fa6, $fs0, $fa6 fmadd.d $fa6, $fa4, $fa0, $fa6 vldi $vr9, -972 fmadd.d $fa6, $fa0, $ft1, $fa6 @@ -702,12 +695,12 @@ findratio: # @findratio fmadd.d $fa4, $fa4, $fa0, $fa6 fmadd.d $fa4, $fa0, $ft0, $fa4 fadd.d $fa4, $fa4, $ft2 - pcalau12i $a1, %pc_hi20(.LCPI2_6) - fld.d $fa6, $a1, %pc_lo12(.LCPI2_6) fneg.d $fa4, $fa4 fmadd.d $fa4, $fa4, $ft13, $fa5 fadd.d $fa3, $fa4, $fa3 - fmul.d $fa3, $fa3, $fa6 + lu52i.d $a1, $a1, 1021 + movgr2fr.d $fa4, $a1 + fmul.d $fa3, $fa3, $fa4 vldi $vr4, -1000 fmul.d $fa2, $fa2, $fa4 fmul.d $fa2, $fs0, $fa2 @@ -717,7 +710,7 @@ findratio: # @findratio vldi $vr3, -1004 vldi $vr4, -850 fmadd.d $fa3, $fa0, $fa4, $fa3 - fmul.d $fa3, $fa3, $ft0 + fmul.d $fa3, $fa3, $fa7 vldi $vr5, -1024 fmadd.d $fa3, $fs7, $fa5, $fa3 vldi $vr5, -834 @@ -737,27 +730,30 @@ findratio: # @findratio fadd.d $fa3, $fa4, $fa3 vldi $vr4, -1000 fmul.d $fa2, $fa2, $fa4 - pcalau12i $a1, %pc_hi20(.LCPI2_6) - fld.d $fa4, $a1, %pc_lo12(.LCPI2_6) fmul.d $fa2, $fs0, $fa2 fdiv.d $fa2, $fa2, $ft12 fdiv.d $fa2, $fa3, $fa2 - fmul.d $fa2, $fa2, $fa4 + lu52i.d $a1, $a1, 1021 + movgr2fr.d $fa3, $a1 + fmul.d $fa2, $fa2, $fa3 .LBB2_35: - pcalau12i $a1, %pc_hi20(.LCPI2_11) - fld.d $fa3, $a1, %pc_lo12(.LCPI2_11) + lu12i.w $a1, 349525 + ori $s2, $a1, 1365 + lu32i.d $s2, 349525 + lu52i.d $a1, $s2, 1022 + movgr2fr.d $fa3, $a1 fst.d $fa3, $sp, 1456 # 8-byte Folded Spill fmul.d $fa0, $fa0, $fa3 fst.d $fa0, $sp, 1480 # 8-byte Folded Spill fdiv.d $fa0, $fa0, $fa2 fst.d $fa0, $a0, 0 - pcalau12i $s3, %pc_hi20(a) - fld.d $fa2, $s3, %pc_lo12(a) + pcalau12i $s4, %pc_hi20(a) + fld.d $fa2, $s4, %pc_lo12(a) pcalau12i $a0, %pc_hi20(bb) fld.d $fa0, $a0, %pc_lo12(bb) fadd.d $fa3, $fa2, $fa0 - pcalau12i $s2, %pc_hi20(c) - fst.d $fa3, $s2, %pc_lo12(c) + pcalau12i $s3, %pc_hi20(c) + fst.d $fa3, $s3, %pc_lo12(c) fneg.d $fa0, $fa1 fmul.d $fs1, $fa2, $fa0 fcmp.cule.d $fcc0, $ft12, $ft13 @@ -805,13 +801,13 @@ findratio: # @findratio fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI2_12) - fld.d $fs5, $a0, %pc_lo12(.LCPI2_12) vld $vr1, $sp, 1568 # 16-byte Folded Reload fdiv.d $fa0, $fa0, $fa1 vld $vr1, $sp, 1552 # 16-byte Folded Reload fdiv.d $fa1, $fs4, $fa1 fsub.d $fa0, $fa0, $fa1 + lu52i.d $a0, $s2, 1020 + movgr2fr.d $fs5, $a0 fmul.d $fa0, $fa0, $fs5 vst $vr0, $sp, 1232 # 16-byte Folded Spill fmov.d $fa0, $fs3 @@ -880,7 +876,7 @@ findratio: # @findratio pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 544 # 16-byte Folded Spill + vst $vr0, $sp, 576 # 16-byte Folded Spill fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 @@ -896,7 +892,7 @@ findratio: # @findratio pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 448 # 16-byte Folded Spill + vst $vr0, $sp, 480 # 16-byte Folded Spill fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 @@ -906,7 +902,7 @@ findratio: # @findratio pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 480 # 16-byte Folded Spill + vst $vr0, $sp, 496 # 16-byte Folded Spill fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 @@ -1028,12 +1024,12 @@ findratio: # @findratio pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 592 # 16-byte Folded Spill + vst $vr0, $sp, 608 # 16-byte Folded Spill fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 528 # 16-byte Folded Spill + vst $vr0, $sp, 544 # 16-byte Folded Spill vld $vr0, $sp, 1504 # 16-byte Folded Reload vldi $vr1, -784 fadd.d $fa0, $fa0, $fa1 @@ -1077,11 +1073,11 @@ findratio: # @findratio pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 fmov.d $fs4, $fa0 - fst.d $fa0, $sp, 576 # 8-byte Folded Spill + fst.d $fa0, $sp, 536 # 8-byte Folded Spill fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 - fst.d $fa0, $sp, 568 # 8-byte Folded Spill + fst.d $fa0, $sp, 528 # 8-byte Folded Spill fld.d $fa2, $s1, %pc_lo12(rootN) vst $vr2, $sp, 1536 # 16-byte Folded Spill vld $vr1, $sp, 1568 # 16-byte Folded Reload @@ -1090,16 +1086,16 @@ findratio: # @findratio fdiv.d $fa1, $fs4, $fa0 fsub.d $fa0, $fa3, $fa1 fmul.d $fa0, $fa2, $fa0 - fst.d $fa0, $sp, 584 # 8-byte Folded Spill + fst.d $fa0, $sp, 568 # 8-byte Folded Spill fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 fmov.d $fs4, $fa0 - fst.d $fa0, $sp, 472 # 8-byte Folded Spill + fst.d $fa0, $sp, 464 # 8-byte Folded Spill fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 - fst.d $fa0, $sp, 464 # 8-byte Folded Spill + fst.d $fa0, $sp, 456 # 8-byte Folded Spill vld $vr1, $sp, 1568 # 16-byte Folded Reload fdiv.d $fa2, $fa0, $fa1 vld $vr0, $sp, 1552 # 16-byte Folded Reload @@ -1107,7 +1103,7 @@ findratio: # @findratio fsub.d $fa0, $fa2, $fa1 vld $vr1, $sp, 1536 # 16-byte Folded Reload fmul.d $fa0, $fa1, $fa0 - fst.d $fa0, $sp, 504 # 8-byte Folded Spill + fst.d $fa0, $sp, 472 # 8-byte Folded Spill fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 @@ -1138,20 +1134,20 @@ findratio: # @findratio pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 896 # 16-byte Folded Spill + vst $vr0, $sp, 880 # 16-byte Folded Spill fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI2_13) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_13) # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 832 # 16-byte Folded Spill + vst $vr0, $sp, 848 # 16-byte Folded Spill vld $vr0, $sp, 1536 # 16-byte Folded Reload - vldi $vr2, -1000 - fmul.d $fa0, $fa0, $fa2 - fst.d $fa0, $sp, 808 # 8-byte Folded Spill - vld $vr0, $sp, 1360 # 16-byte Folded Reload + vldi $vr1, -1000 fmul.d $fa0, $fa0, $fa1 + fst.d $fa0, $sp, 792 # 8-byte Folded Spill + lu52i.d $a0, $s2, 1023 + movgr2fr.d $fa0, $a0 + vld $vr1, $sp, 1360 # 16-byte Folded Reload + fmul.d $fa0, $fa1, $fa0 vst $vr0, $sp, 736 # 16-byte Folded Spill fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(exp) @@ -1172,7 +1168,7 @@ findratio: # @findratio pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 880 # 16-byte Folded Spill + vst $vr0, $sp, 896 # 16-byte Folded Spill fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 @@ -1225,7 +1221,7 @@ findratio: # @findratio vst $vr0, $sp, 320 # 16-byte Folded Spill vld $vr1, $sp, 1568 # 16-byte Folded Reload fdiv.d $fa2, $fa0, $fa1 - fld.d $fa3, $s3, %pc_lo12(a) + fld.d $fa3, $s4, %pc_lo12(a) vst $vr3, $sp, 1408 # 16-byte Folded Spill vld $vr1, $sp, 1344 # 16-byte Folded Reload vld $vr0, $sp, 1552 # 16-byte Folded Reload @@ -1233,7 +1229,7 @@ findratio: # @findratio fsub.d $fa0, $fa2, $fa1 vld $vr1, $sp, 1536 # 16-byte Folded Reload fmul.d $fa0, $fa1, $fa0 - vst $vr0, $sp, 608 # 16-byte Folded Spill + vst $vr0, $sp, 592 # 16-byte Folded Spill fld.d $fs3, $sp, 1312 # 8-byte Folded Reload fmul.d $fs0, $fa3, $fs3 fmov.d $fa0, $fs0 @@ -1259,7 +1255,7 @@ findratio: # @findratio fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 - fld.d $fa1, $s2, %pc_lo12(c) + fld.d $fa1, $s3, %pc_lo12(c) vst $vr1, $sp, 1424 # 16-byte Folded Spill # kill: def $f0_64 killed $f0_64 def $vr0 vst $vr0, $sp, 1056 # 16-byte Folded Spill @@ -1295,12 +1291,12 @@ findratio: # @findratio pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 848 # 16-byte Folded Spill + vst $vr0, $sp, 832 # 16-byte Folded Spill fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 # kill: def $f0_64 killed $f0_64 def $vr0 - vst $vr0, $sp, 784 # 16-byte Folded Spill + vst $vr0, $sp, 800 # 16-byte Folded Spill vld $vr10, $sp, 1424 # 16-byte Folded Reload vori.b $vr0, $vr10, 0 vld $vr1, $sp, 1568 # 16-byte Folded Reload @@ -1400,14 +1396,14 @@ findratio: # @findratio vextrins.d $vr3, $vr12, 16 vfmadd.d $vr0, $vr1, $vr0, $vr3 vst $vr0, $sp, 1296 # 16-byte Folded Spill - vld $vr25, $sp, 512 # 16-byte Folded Reload - fdiv.d $fa0, $fs1, $ft5 - vld $vr12, $sp, 544 # 16-byte Folded Reload + vld $vr26, $sp, 512 # 16-byte Folded Reload + fdiv.d $fa0, $fs2, $ft5 + vld $vr12, $sp, 576 # 16-byte Folded Reload fdiv.d $fa1, $ft4, $ft6 fsub.d $fa0, $fa0, $fa1 vld $vr4, $sp, 400 # 16-byte Folded Reload fdiv.d $fa1, $fa4, $ft5 - vld $vr11, $sp, 448 # 16-byte Folded Reload + vld $vr11, $sp, 480 # 16-byte Folded Reload fdiv.d $ft9, $ft3, $ft6 fsub.d $fa1, $fa1, $ft9 vori.b $vr17, $vr10, 0 @@ -1424,7 +1420,7 @@ findratio: # @findratio vld $vr10, $sp, 416 # 16-byte Folded Reload fdiv.d $ft11, $ft2, $ft9 vreplvei.d $vr18, $vr21, 1 - vld $vr9, $sp, 480 # 16-byte Folded Reload + vld $vr9, $sp, 496 # 16-byte Folded Reload fdiv.d $ft14, $ft1, $ft10 fsub.d $ft11, $ft11, $ft14 vld $vr15, $sp, 384 # 16-byte Folded Reload @@ -1434,11 +1430,11 @@ findratio: # @findratio fsub.d $ft14, $ft14, $ft15 fneg.d $fa0, $fa0 fld.d $fa2, $sp, 640 # 8-byte Folded Reload - fmul.d $fs0, $fa2, $fa0 + fmul.d $fs1, $fa2, $fa0 fneg.d $fa0, $fa1 fmul.d $fa0, $fa2, $fa0 fneg.d $fa1, $ft11 - fmul.d $fs2, $fa2, $fa1 + fmul.d $fs0, $fa2, $fa1 fneg.d $fa1, $ft14 fmul.d $ft11, $fa2, $fa1 vld $vr3, $sp, 240 # 16-byte Folded Reload @@ -1446,74 +1442,74 @@ findratio: # @findratio vld $vr2, $sp, 256 # 16-byte Folded Reload fdiv.d $ft14, $fa2, $ft6 fsub.d $fa1, $fa1, $ft14 - vextrins.d $vr3, $vr25, 16 + vextrins.d $vr3, $vr26, 16 vextrins.d $vr2, $vr12, 16 - vori.b $vr25, $vr28, 0 - vshuf4i.d $vr25, $vr20, 12 + vori.b $vr26, $vr28, 0 + vshuf4i.d $vr26, $vr20, 12 vori.b $vr29, $vr27, 0 vshuf4i.d $vr29, $vr21, 12 - vfdiv.d $vr22, $vr3, $vr25 + vfdiv.d $vr22, $vr3, $vr26 vfdiv.d $vr23, $vr2, $vr29 vfsub.d $vr22, $vr22, $vr23 vldi $vr23, -1008 vld $vr5, $sp, 272 # 16-byte Folded Reload vpackev.d $vr23, $vr23, $vr5 - vextrins.d $vr1, $vr24, 16 + vextrins.d $vr1, $vr25, 16 vfmadd.d $vr22, $vr22, $vr23, $vr1 vld $vr12, $sp, 288 # 16-byte Folded Reload fdiv.d $fa1, $ft4, $ft5 vld $vr30, $sp, 304 # 16-byte Folded Reload - fdiv.d $fs0, $fs6, $ft6 - fsub.d $fa1, $fa1, $fs0 + fdiv.d $fs1, $fs6, $ft6 + fsub.d $fa1, $fa1, $fs1 vextrins.d $vr12, $vr4, 16 vextrins.d $vr30, $vr11, 16 - vfdiv.d $vr24, $vr12, $vr25 - vfdiv.d $vr25, $vr30, $vr29 + vfdiv.d $vr25, $vr12, $vr26 + vfdiv.d $vr26, $vr30, $vr29 vori.b $vr11, $vr30, 0 - vfsub.d $vr24, $vr24, $vr25 + vfsub.d $vr25, $vr25, $vr26 vextrins.d $vr1, $vr0, 16 - vfmadd.d $vr0, $vr24, $vr23, $vr1 + vfmadd.d $vr0, $vr25, $vr23, $vr1 vld $vr5, $sp, 352 # 16-byte Folded Reload fdiv.d $fa1, $fa5, $ft9 - vld $vr25, $sp, 368 # 16-byte Folded Reload - fdiv.d $fs0, $fs1, $ft10 - fsub.d $fa1, $fa1, $fs0 + vld $vr26, $sp, 368 # 16-byte Folded Reload + fdiv.d $fs1, $fs2, $ft10 + fsub.d $fa1, $fa1, $fs1 vextrins.d $vr5, $vr10, 16 - vori.b $vr10, $vr25, 0 + vori.b $vr10, $vr26, 0 vextrins.d $vr10, $vr9, 16 vfmul.d $vr4, $vr7, $vr20 vst $vr4, $sp, 640 # 16-byte Folded Spill - vfmul.d $vr25, $vr6, $vr21 + vfmul.d $vr26, $vr6, $vr21 vld $vr7, $sp, 1456 # 16-byte Folded Reload vpackod.d $vr20, $vr4, $vr7 vfdiv.d $vr21, $vr5, $vr20 - vpackod.d $vr29, $vr25, $vr8 + vpackod.d $vr29, $vr26, $vr8 vfdiv.d $vr30, $vr10, $vr29 vfsub.d $vr21, $vr21, $vr30 - vextrins.d $vr1, $vr26, 16 + vextrins.d $vr1, $vr24, 16 vfmadd.d $vr1, $vr21, $vr23, $vr1 - vld $vr9, $sp, 528 # 16-byte Folded Reload + vld $vr9, $sp, 544 # 16-byte Folded Reload fdiv.d $ft13, $ft1, $ft9 - vld $vr4, $sp, 592 # 16-byte Folded Reload - fdiv.d $fs2, $fa4, $ft10 - fsub.d $ft13, $ft13, $fs2 + vld $vr4, $sp, 608 # 16-byte Folded Reload + fdiv.d $fs0, $fa4, $ft10 + fsub.d $ft13, $ft13, $fs0 vextrins.d $vr9, $vr15, 16 vextrins.d $vr4, $vr16, 16 vfdiv.d $vr20, $vr9, $vr20 - vfdiv.d $vr26, $vr4, $vr29 - vfsub.d $vr20, $vr20, $vr26 + vfdiv.d $vr24, $vr4, $vr29 + vfsub.d $vr20, $vr20, $vr24 vextrins.d $vr21, $vr19, 16 vfmadd.d $vr23, $vr20, $vr23, $vr21 vld $vr20, $sp, 1312 # 16-byte Folded Reload vld $vr6, $sp, 1568 # 16-byte Folded Reload vextrins.d $vr20, $vr6, 0 - vld $vr24, $sp, 16 # 16-byte Folded Reload - vori.b $vr21, $vr24, 0 + vld $vr25, $sp, 16 # 16-byte Folded Reload + vori.b $vr21, $vr25, 0 vld $vr6, $sp, 1552 # 16-byte Folded Reload vextrins.d $vr21, $vr6, 0 vfdiv.d $vr19, $vr3, $vr20 - vfdiv.d $vr26, $vr2, $vr21 - vfsub.d $vr19, $vr19, $vr26 + vfdiv.d $vr24, $vr2, $vr21 + vfsub.d $vr19, $vr19, $vr24 vld $vr2, $sp, 1440 # 16-byte Folded Reload vld $vr3, $sp, 1024 # 16-byte Folded Reload vextrins.d $vr2, $vr3, 16 @@ -1530,8 +1526,8 @@ findratio: # @findratio vori.b $vr22, $vr27, 0 vshuf4i.d $vr22, $vr8, 12 vori.b $vr20, $vr8, 0 - vfdiv.d $vr26, $vr10, $vr22 - vfsub.d $vr21, $vr21, $vr26 + vfdiv.d $vr24, $vr10, $vr22 + vfsub.d $vr21, $vr21, $vr24 vfmadd.d $vr1, $vr2, $vr21, $vr1 vst $vr1, $sp, 1440 # 16-byte Folded Spill vfdiv.d $vr0, $vr9, $vr0 @@ -1539,24 +1535,24 @@ findratio: # @findratio vfsub.d $vr0, $vr0, $vr1 vfmadd.d $vr0, $vr2, $vr0, $vr23 vst $vr0, $sp, 1024 # 16-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI2_14) - fld.d $fa0, $a0, %pc_lo12(.LCPI2_14) - fld.d $fa1, $sp, 568 # 8-byte Folded Reload - fdiv.d $fa1, $fa1, $fs7 - fld.d $fa2, $sp, 576 # 8-byte Folded Reload + fld.d $fa0, $sp, 528 # 8-byte Folded Reload + fdiv.d $fa0, $fa0, $fs7 + fld.d $fa1, $sp, 536 # 8-byte Folded Reload vld $vr7, $sp, 32 # 16-byte Folded Reload - fdiv.d $ft15, $fa2, $fa7 - fsub.d $fa1, $fa1, $ft15 - fld.d $fa2, $sp, 584 # 8-byte Folded Reload - fmadd.d $fs2, $fa1, $fa0, $fa2 - fld.d $fa1, $sp, 464 # 8-byte Folded Reload - fdiv.d $fa1, $fa1, $fs7 + fdiv.d $fa1, $fa1, $fa7 + fsub.d $fa0, $fa0, $fa1 + lu52i.d $a0, $s2, -1026 + movgr2fr.d $fa1, $a0 + fld.d $fa2, $sp, 568 # 8-byte Folded Reload + fmadd.d $fs0, $fa0, $fa1, $fa2 + fld.d $fa0, $sp, 456 # 8-byte Folded Reload + fdiv.d $fa0, $fa0, $fs7 vori.b $vr6, $vr31, 0 - fld.d $fa2, $sp, 472 # 8-byte Folded Reload + fld.d $fa2, $sp, 464 # 8-byte Folded Reload fdiv.d $ft15, $fa2, $fa7 - fsub.d $fa1, $fa1, $ft15 - fld.d $fa2, $sp, 504 # 8-byte Folded Reload - fmadd.d $ft15, $fa1, $fa0, $fa2 + fsub.d $fa0, $fa0, $ft15 + fld.d $fa2, $sp, 472 # 8-byte Folded Reload + fmadd.d $ft15, $fa0, $fa1, $fa2 vld $vr2, $sp, 656 # 16-byte Folded Reload vld $vr0, $sp, 320 # 16-byte Folded Reload vextrins.d $vr0, $vr2, 16 @@ -1598,7 +1594,7 @@ findratio: # @findratio vextrins.d $vr2, $vr5, 16 vfdiv.d $vr30, $vr2, $vr27 vfsub.d $vr0, $vr0, $vr30 - vld $vr2, $sp, 608 # 16-byte Folded Reload + vld $vr2, $sp, 592 # 16-byte Folded Reload vextrins.d $vr2, $vr28, 16 vfmul.d $vr28, $vr3, $vr15 lu52i.d $a0, $zero, -1026 @@ -1607,24 +1603,24 @@ findratio: # @findratio vfmadd.d $vr22, $vr0, $vr30, $vr2 vld $vr0, $sp, 1328 # 16-byte Folded Reload vextrins.d $vr0, $vr6, 16 - vori.b $vr15, $vr24, 0 - vshuf4i.d $vr27, $vr24, 6 + vori.b $vr15, $vr25, 0 + vshuf4i.d $vr27, $vr25, 6 vfdiv.d $vr0, $vr0, $vr27 vfsub.d $vr0, $vr1, $vr0 vld $vr1, $sp, 624 # 16-byte Folded Reload vextrins.d $vr1, $vr16, 16 vfmadd.d $vr21, $vr0, $vr30, $vr1 - vld $vr10, $sp, 832 # 16-byte Folded Reload + vld $vr10, $sp, 848 # 16-byte Folded Reload fdiv.d $fa0, $ft2, $ft9 - vld $vr16, $sp, 896 # 16-byte Folded Reload + vld $vr16, $sp, 880 # 16-byte Folded Reload fdiv.d $fa1, $ft8, $ft10 fsub.d $fa0, $fa0, $fa1 fdiv.d $fa1, $ft2, $ft5 fdiv.d $fs3, $ft8, $ft6 fsub.d $fa1, $fa1, $fs3 - pcalau12i $a0, %pc_hi20(.LCPI2_15) - vld $vr30, $a0, %pc_lo12(.LCPI2_15) - fld.d $fa5, $sp, 808 # 8-byte Folded Reload + pcalau12i $a0, %pc_hi20(.LCPI2_0) + vld $vr30, $a0, %pc_lo12(.LCPI2_0) + fld.d $fa5, $sp, 792 # 8-byte Folded Reload fmul.d $fa1, $fa5, $fa1 vldi $vr31, -880 fmadd.d $fs3, $fa0, $fs7, $fa1 @@ -1639,19 +1635,19 @@ findratio: # @findratio fsub.d $fa1, $fa1, $ft5 fmul.d $fa1, $fa5, $fa1 fmadd.d $ft5, $fa0, $fs7, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI2_16) - vld $vr0, $a0, %pc_lo12(.LCPI2_16) - vld $vr4, $sp, 880 # 16-byte Folded Reload + pcalau12i $a0, %pc_hi20(.LCPI2_1) + vld $vr0, $a0, %pc_lo12(.LCPI2_1) + vld $vr4, $sp, 896 # 16-byte Folded Reload fdiv.d $fa1, $fa4, $ft9 vld $vr12, $sp, 912 # 16-byte Folded Reload fdiv.d $ft6, $ft4, $ft10 fsub.d $fa6, $fa1, $ft6 vfmul.d $vr14, $vr3, $vr0 - vreplvei.d $vr29, $vr25, 0 + vreplvei.d $vr29, $vr26, 0 vld $vr0, $sp, 1408 # 16-byte Folded Reload fmul.d $fa2, $fa0, $fs5 - vld $vr24, $sp, 640 # 16-byte Folded Reload - vreplvei.d $vr3, $vr24, 0 + vld $vr25, $sp, 640 # 16-byte Folded Reload + vreplvei.d $vr3, $vr25, 0 vld $vr0, $sp, 1424 # 16-byte Folded Reload fmul.d $fa1, $fa0, $fa3 vld $vr9, $sp, 688 # 16-byte Folded Reload @@ -1662,8 +1658,8 @@ findratio: # @findratio vextrins.d $vr9, $vr4, 16 vextrins.d $vr8, $vr12, 16 vshuf4i.d $vr14, $vr0, 1 - vfdiv.d $vr11, $vr9, $vr24 - vfdiv.d $vr12, $vr8, $vr25 + vfdiv.d $vr11, $vr9, $vr25 + vfdiv.d $vr12, $vr8, $vr26 vfsub.d $vr11, $vr11, $vr12 vori.b $vr12, $vr14, 0 vextrins.d $vr12, $vr31, 16 @@ -1682,16 +1678,16 @@ findratio: # @findratio fdiv.d $ft3, $ft9, $ft10 fsub.d $fa4, $fa4, $ft3 fmul.d $fa4, $fa5, $fa4 - vld $vr6, $sp, 784 # 16-byte Folded Reload + vld $vr6, $sp, 800 # 16-byte Folded Reload fdiv.d $fa1, $fa6, $fa1 - vld $vr5, $sp, 848 # 16-byte Folded Reload + vld $vr5, $sp, 832 # 16-byte Folded Reload fdiv.d $fa2, $fa5, $fa2 fsub.d $fa1, $fa1, $fa2 vextrins.d $vr6, $vr30, 16 vextrins.d $vr5, $vr17, 16 - vfdiv.d $vr2, $vr6, $vr24 + vfdiv.d $vr2, $vr6, $vr25 vori.b $vr18, $vr6, 0 - vfdiv.d $vr11, $vr5, $vr25 + vfdiv.d $vr11, $vr5, $vr26 vori.b $vr17, $vr5, 0 vfsub.d $vr2, $vr2, $vr11 fneg.d $fa1, $fa1 @@ -1790,7 +1786,7 @@ findratio: # @findratio vld $vr8, $sp, 1024 # 16-byte Folded Reload vfsub.d $vr4, $vr4, $vr8 fld.d $ft0, $sp, 1528 # 8-byte Folded Reload - fmul.d $ft0, $ft0, $fs2 + fmul.d $ft0, $ft0, $fs0 vld $vr12, $sp, 1136 # 16-byte Folded Reload vori.b $vr9, $vr12, 0 vld $vr10, $sp, 1152 # 16-byte Folded Reload @@ -1851,13 +1847,13 @@ findratio: # @findratio fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI2_12) - fld.d $fs7, $a0, %pc_lo12(.LCPI2_12) vld $vr1, $sp, 1568 # 16-byte Folded Reload fdiv.d $fa0, $fa0, $fa1 vld $vr1, $sp, 1552 # 16-byte Folded Reload fdiv.d $fa1, $fs4, $fa1 fsub.d $fa0, $fa0, $fa1 + lu52i.d $a0, $s2, 1020 + movgr2fr.d $fs7, $a0 fmul.d $fa0, $fa0, $fs7 vst $vr0, $sp, 1392 # 16-byte Folded Spill fmov.d $fa0, $fs2 @@ -2305,24 +2301,25 @@ findratio: # @findratio fld.d $fa1, $sp, 1480 # 8-byte Folded Reload fdiv.d $fa0, $fa1, $fa0 fst.d $fa0, $fp, 0 - fld.d $fs7, $sp, 1584 # 8-byte Folded Reload - fld.d $fs6, $sp, 1592 # 8-byte Folded Reload - fld.d $fs5, $sp, 1600 # 8-byte Folded Reload - fld.d $fs4, $sp, 1608 # 8-byte Folded Reload - fld.d $fs3, $sp, 1616 # 8-byte Folded Reload - fld.d $fs2, $sp, 1624 # 8-byte Folded Reload - fld.d $fs1, $sp, 1632 # 8-byte Folded Reload - fld.d $fs0, $sp, 1640 # 8-byte Folded Reload - ld.d $s3, $sp, 1648 # 8-byte Folded Reload - ld.d $s2, $sp, 1656 # 8-byte Folded Reload - ld.d $s1, $sp, 1664 # 8-byte Folded Reload - ld.d $s0, $sp, 1672 # 8-byte Folded Reload - ld.d $fp, $sp, 1680 # 8-byte Folded Reload - ld.d $ra, $sp, 1688 # 8-byte Folded Reload - addi.d $sp, $sp, 1696 + fld.d $fs7, $sp, 1592 # 8-byte Folded Reload + fld.d $fs6, $sp, 1600 # 8-byte Folded Reload + fld.d $fs5, $sp, 1608 # 8-byte Folded Reload + fld.d $fs4, $sp, 1616 # 8-byte Folded Reload + fld.d $fs3, $sp, 1624 # 8-byte Folded Reload + fld.d $fs2, $sp, 1632 # 8-byte Folded Reload + fld.d $fs1, $sp, 1640 # 8-byte Folded Reload + fld.d $fs0, $sp, 1648 # 8-byte Folded Reload + ld.d $s4, $sp, 1656 # 8-byte Folded Reload + ld.d $s3, $sp, 1664 # 8-byte Folded Reload + ld.d $s2, $sp, 1672 # 8-byte Folded Reload + ld.d $s1, $sp, 1680 # 8-byte Folded Reload + ld.d $s0, $sp, 1688 # 8-byte Folded Reload + ld.d $fp, $sp, 1696 # 8-byte Folded Reload + ld.d $ra, $sp, 1704 # 8-byte Folded Reload + addi.d $sp, $sp, 1712 ret .LBB2_39: - move $a3, $zero + move $a4, $zero ori $a5, $zero, 10 bnez $a7, .LBB2_19 b .LBB2_32 @@ -2383,12 +2380,7 @@ Nterm1: # @Nterm1 .Lfunc_end3: .size Nterm1, .Lfunc_end3-Nterm1 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Nterm2 -.LCPI4_0: - .dword 0x3fe5555555555555 # double 0.66666666666666663 - .text - .globl Nterm2 + .globl Nterm2 # -- Begin function Nterm2 .p2align 5 .type Nterm2,@function Nterm2: # @Nterm2 @@ -2426,12 +2418,15 @@ Nterm2: # @Nterm2 fld.d $fa3, $a0, %pc_lo12(rootN) fdiv.d $fa0, $fa0, $fs4 fdiv.d $fa4, $fs1, $fs2 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa5, $a0, %pc_lo12(.LCPI4_0) fsub.d $fa0, $fa0, $fa4 fneg.d $fa0, $fa0 fmul.d $fa0, $fa3, $fa0 - fmadd.d $fa0, $fa2, $fa5, $fa0 + lu12i.w $a0, 349525 + ori $a0, $a0, 1365 + lu32i.d $a0, 349525 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa3, $a0 + fmadd.d $fa0, $fa2, $fa3, $fa0 fmul.d $fa0, $fa1, $fa0 fld.d $fs4, $sp, 0 # 8-byte Folded Reload fld.d $fs3, $sp, 8 # 8-byte Folded Reload @@ -2900,12 +2895,7 @@ NNterm1: # @NNterm1 .Lfunc_end12: .size NNterm1, .Lfunc_end12-NNterm1 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function NNterm2 -.LCPI13_0: - .dword 0xbfe5555555555555 # double -0.66666666666666663 - .text - .globl NNterm2 + .globl NNterm2 # -- Begin function NNterm2 .p2align 5 .type NNterm2,@function NNterm2: # @NNterm2 @@ -2942,12 +2932,15 @@ NNterm2: # @NNterm2 pcalau12i $a0, %pc_hi20(rootN) fld.d $fa3, $a0, %pc_lo12(rootN) fdiv.d $fa0, $fa0, $fs4 - pcalau12i $a0, %pc_hi20(.LCPI13_0) - fld.d $fa4, $a0, %pc_lo12(.LCPI13_0) - fdiv.d $fa5, $fs1, $fs2 - fsub.d $fa0, $fa0, $fa5 + fdiv.d $fa4, $fs1, $fs2 + fsub.d $fa0, $fa0, $fa4 fmul.d $fa0, $fa3, $fa0 - fmadd.d $fa0, $fa2, $fa4, $fa0 + lu12i.w $a0, 349525 + ori $a0, $a0, 1365 + lu32i.d $a0, 349525 + lu52i.d $a0, $a0, -1026 + movgr2fr.d $fa3, $a0 + fmadd.d $fa0, $fa2, $fa3, $fa0 fmul.d $fa0, $fa1, $fa0 fld.d $fs4, $sp, 0 # 8-byte Folded Reload fld.d $fs3, $sp, 8 # 8-byte Folded Reload @@ -3024,12 +3017,7 @@ NNterm3: # @NNterm3 .Lfunc_end14: .size NNterm3, .Lfunc_end14-NNterm3 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function NNterm4 -.LCPI15_0: - .dword 0x3ff5555555555555 # double 1.3333333333333333 - .text - .globl NNterm4 + .globl NNterm4 # -- Begin function NNterm4 .p2align 5 .type NNterm4,@function NNterm4: # @NNterm4 @@ -3079,12 +3067,15 @@ NNterm4: # @NNterm4 fld.d $fa3, $a0, %pc_lo12(N) fdiv.d $fa4, $fa0, $fa4 fdiv.d $fa1, $fs1, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI15_0) - fld.d $fa6, $a0, %pc_lo12(.LCPI15_0) fsub.d $fa1, $fa4, $fa1 fmul.d $fa4, $fa3, $fa5 fmadd.d $fa1, $fa4, $fa1, $fa2 - fmul.d $fa2, $fa3, $fa6 + lu12i.w $a0, 349525 + ori $a0, $a0, 1365 + lu32i.d $a0, 349525 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa2, $a0 + fmul.d $fa2, $fa3, $fa2 fmul.d $fa2, $fa7, $fa2 fdiv.d $fa0, $fa0, $fs4 fdiv.d $fa3, $fs1, $fs2 @@ -3102,12 +3093,7 @@ NNterm4: # @NNterm4 .Lfunc_end15: .size NNterm4, .Lfunc_end15-NNterm4 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function NNterm5 -.LCPI16_0: - .dword 0x3ff5555555555555 # double 1.3333333333333333 - .text - .globl NNterm5 + .globl NNterm5 # -- Begin function NNterm5 .p2align 5 .type NNterm5,@function NNterm5: # @NNterm5 @@ -3157,12 +3143,15 @@ NNterm5: # @NNterm5 fld.d $fa4, $a0, %pc_lo12(N) fdiv.d $fa6, $fa0, $fa6 fdiv.d $fa2, $fs0, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI16_0) - fld.d $ft0, $a0, %pc_lo12(.LCPI16_0) fsub.d $fa2, $fa6, $fa2 fmul.d $fa6, $fa4, $fa7 fmadd.d $fa2, $fa6, $fa2, $fa3 - fmul.d $fa3, $fa4, $ft0 + lu12i.w $a0, 349525 + ori $a0, $a0, 1365 + lu32i.d $a0, 349525 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa3, $a0 + fmul.d $fa3, $fa4, $fa3 fmul.d $fa3, $ft1, $fa3 fdiv.d $fa0, $fa0, $fa5 fdiv.d $fa1, $fs0, $fa1 @@ -3338,12 +3327,7 @@ DDterm3: # @DDterm3 .Lfunc_end19: .size DDterm3, .Lfunc_end19-DDterm3 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DDterm4 -.LCPI20_0: - .dword 0x3ff5555555555555 # double 1.3333333333333333 - .text - .globl DDterm4 + .globl DDterm4 # -- Begin function DDterm4 .p2align 5 .type DDterm4,@function DDterm4: # @DDterm4 @@ -3389,12 +3373,15 @@ DDterm4: # @DDterm4 fdiv.d $fa4, $fa0, $fa4 fdiv.d $fa1, $fs0, $fa1 fsub.d $fa1, $fa4, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI20_0) - fld.d $fa4, $a0, %pc_lo12(.LCPI20_0) - vldi $vr5, -896 - fmul.d $fa5, $fa3, $fa5 - fmadd.d $fa1, $fa5, $fa1, $fa2 - fmul.d $fa2, $fa3, $fa4 + vldi $vr4, -896 + fmul.d $fa4, $fa3, $fa4 + fmadd.d $fa1, $fa4, $fa1, $fa2 + lu12i.w $a0, 349525 + ori $a0, $a0, 1365 + lu32i.d $a0, 349525 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa2, $a0 + fmul.d $fa2, $fa3, $fa2 fmul.d $fa2, $fa7, $fa2 fdiv.d $fa0, $fa0, $fs3 fdiv.d $fa3, $fs0, $fs1 @@ -3410,12 +3397,7 @@ DDterm4: # @DDterm4 .Lfunc_end20: .size DDterm4, .Lfunc_end20-DDterm4 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function getptree -.LCPI21_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl getptree + .globl getptree # -- Begin function getptree .p2align 5 .type getptree,@function getptree: # @getptree @@ -3799,11 +3781,14 @@ getptree: # @getptree .LBB21_56: # %._crit_edge115 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI21_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI21_0) - fdiv.d $fa1, $fs1, $fs2 - fcmp.clt.d $fcc0, $fs1, $fa0 - fsel $fa0, $fa1, $fs0, $fcc0 + fdiv.d $fa0, $fs1, $fs2 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa1, $a0 + fcmp.clt.d $fcc0, $fs1, $fa1 + fsel $fa0, $fa0, $fs0, $fcc0 fld.d $fs2, $sp, 8 # 8-byte Folded Reload fld.d $fs1, $sp, 16 # 8-byte Folded Reload fld.d $fs0, $sp, 24 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/Prolangs-C/allroots/CMakeFiles/allroots.dir/newton.s b/results/MultiSource/Benchmarks/Prolangs-C/allroots/CMakeFiles/allroots.dir/newton.s index 65003b6f..c77aca3d 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/allroots/CMakeFiles/allroots.dir/newton.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/allroots/CMakeFiles/allroots.dir/newton.s @@ -1,10 +1,6 @@ .file "newton.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function newton -.LCPI0_0: - .dword 0x3ed4f8b588e368f1 # double 5.0000000000000004E-6 .text - .globl newton + .globl newton # -- Begin function newton .p2align 5 .type newton,@function newton: # @newton @@ -44,9 +40,12 @@ newton: # @newton fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(d_abs) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs2, $a0, %pc_lo12(.LCPI0_0) fdiv.d $fa0, $fs1, $fa0 + lu12i.w $a0, -487882 + ori $a0, $a0, 2289 + lu32i.d $a0, 325813 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fs2, $a0 fcmp.cule.d $fcc0, $fa0, $fs2 bcnez $fcc0, .LBB0_4 # %bb.1: # %.lr.ph.preheader diff --git a/results/MultiSource/Benchmarks/Prolangs-C/football/CMakeFiles/football.dir/common.s b/results/MultiSource/Benchmarks/Prolangs-C/football/CMakeFiles/football.dir/common.s index 7b6ef92a..c3f22fd1 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/football/CMakeFiles/football.dir/common.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/football/CMakeFiles/football.dir/common.s @@ -1078,12 +1078,7 @@ div_conf_rec: # @div_conf_rec .Lfunc_end6: .size div_conf_rec, .Lfunc_end6-div_conf_rec # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function break_net_points -.LCPI7_0: - .dword 0xc0c3880000000000 # double -1.0E+4 - .text - .globl break_net_points + .globl break_net_points # -- Begin function break_net_points .p2align 5 .type break_net_points,@function break_net_points: # @break_net_points @@ -1100,9 +1095,11 @@ break_net_points: # @break_net_points ori $a2, $zero, 1 beq $fp, $a2, .LBB7_7 # %bb.3: # %.lr.ph - pcalau12i $a2, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI7_0) addi.d $a2, $sp, 16 + ori $a3, $zero, 0 + lu32i.d $a3, 231424 + lu52i.d $a3, $a3, -1012 + movgr2fr.d $fa0, $a3 ori $a3, $zero, 92 bnez $fp, .LBB7_9 # %bb.4: # %.lr.ph.split.us42.preheader @@ -1138,9 +1135,11 @@ break_net_points: # @break_net_points move $fp, $zero b .LBB7_16 .LBB7_7: # %.lr.ph.split.us.preheader - pcalau12i $a2, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI7_0) addi.d $a2, $sp, 16 + ori $a3, $zero, 0 + lu32i.d $a3, 231424 + lu52i.d $a3, $a3, -1012 + movgr2fr.d $fa0, $a3 ori $a3, $zero, 92 pcalau12i $a4, %got_pc_hi20(team_info_wi_div) ld.d $a4, $a4, %got_pc_lo12(team_info_wi_div) diff --git a/results/MultiSource/Benchmarks/Prolangs-C/football/CMakeFiles/football.dir/io.s b/results/MultiSource/Benchmarks/Prolangs-C/football/CMakeFiles/football.dir/io.s index 0a51d73c..6915e172 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/football/CMakeFiles/football.dir/io.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/football/CMakeFiles/football.dir/io.s @@ -947,12 +947,7 @@ find_nth_place_team: # @find_nth_place_team .Lfunc_end12: .size find_nth_place_team, .Lfunc_end12-find_nth_place_team # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function display_info -.LCPI13_0: - .dword 0x408f400000000000 # double 1000 - .text - .globl display_info + .globl display_info # -- Begin function display_info .p2align 5 .type display_info,@function display_info: # @display_info @@ -971,6 +966,7 @@ display_info: # @display_info st.d $s8, $sp, 216 # 8-byte Folded Spill fst.d $fs0, $sp, 208 # 8-byte Folded Spill fst.d $fs1, $sp, 200 # 8-byte Folded Spill + fst.d $fs2, $sp, 192 # 8-byte Folded Spill move $fp, $a1 move $s0, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) @@ -985,7 +981,7 @@ display_info: # @display_info mul.d $a0, $fp, $a0 pcalau12i $a1, %got_pc_hi20(team) ld.d $a1, $a1, %got_pc_lo12(team) - st.d $a1, $sp, 152 # 8-byte Folded Spill + st.d $a1, $sp, 144 # 8-byte Folded Spill add.d $a0, $a1, $a0 addi.d $a2, $a0, 15 pcalau12i $a0, %pc_hi20(.L.str.3) @@ -997,69 +993,73 @@ display_info: # @display_info mul.d $a0, $fp, $a0 pcalau12i $a1, %got_pc_hi20(team_info) ld.d $a1, $a1, %got_pc_lo12(team_info) - st.d $a1, $sp, 72 # 8-byte Folded Spill + st.d $a1, $sp, 64 # 8-byte Folded Spill add.d $a1, $a1, $a0 - st.d $a1, $sp, 136 # 8-byte Folded Spill + st.d $a1, $sp, 128 # 8-byte Folded Spill pcalau12i $a1, %got_pc_hi20(divisions) ld.d $s1, $a1, %got_pc_lo12(divisions) - st.d $fp, $sp, 184 # 8-byte Folded Spill + st.d $fp, $sp, 176 # 8-byte Folded Spill alsl.d $a1, $fp, $s1, 3 - st.d $a1, $sp, 176 # 8-byte Folded Spill + st.d $a1, $sp, 168 # 8-byte Folded Spill pcalau12i $a1, %got_pc_hi20(team_info_wi_conf) ld.d $a1, $a1, %got_pc_lo12(team_info_wi_conf) add.d $a1, $a1, $a0 - st.d $a1, $sp, 112 # 8-byte Folded Spill + st.d $a1, $sp, 104 # 8-byte Folded Spill pcalau12i $a1, %got_pc_hi20(team_info_wi_div) ld.d $a1, $a1, %got_pc_lo12(team_info_wi_div) add.d $a0, $a1, $a0 - st.d $a0, $sp, 104 # 8-byte Folded Spill + st.d $a0, $sp, 96 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(sched) ld.d $a0, $a0, %got_pc_lo12(sched) - st.d $a0, $sp, 168 # 8-byte Folded Spill + st.d $a0, $sp, 160 # 8-byte Folded Spill addi.d $s3, $a0, 192 pcalau12i $a0, %got_pc_hi20(num_games) ld.d $s8, $a0, %got_pc_lo12(num_games) pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $a0, $a0, %pc_lo12(.L.str.4) - st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $a0, $sp, 152 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LJTI13_0) addi.d $a0, $a0, %pc_lo12(.LJTI13_0) - st.d $a0, $sp, 192 # 8-byte Folded Spill + st.d $a0, $sp, 184 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.14) addi.d $a0, $a0, %pc_lo12(.L.str.14) - st.d $a0, $sp, 128 # 8-byte Folded Spill + st.d $a0, $sp, 120 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.13) addi.d $a0, $a0, %pc_lo12(.L.str.13) - st.d $a0, $sp, 120 # 8-byte Folded Spill + st.d $a0, $sp, 112 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.17) addi.d $a0, $a0, %pc_lo12(.L.str.17) - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.16) addi.d $a0, $a0, %pc_lo12(.L.str.16) - st.d $a0, $sp, 88 # 8-byte Folded Spill + st.d $a0, $sp, 80 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.15) addi.d $a0, $a0, %pc_lo12(.L.str.15) - st.d $a0, $sp, 80 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.12) addi.d $a0, $a0, %pc_lo12(.L.str.12) - st.d $a0, $sp, 48 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.18) addi.d $a0, $a0, %pc_lo12(.L.str.18) - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(standings) ld.d $s7, $a0, %got_pc_lo12(standings) ori $s6, $zero, 116 - movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs0, $a0 + movgr2fr.d $fs1, $zero ori $fp, $zero, 13 - st.d $s8, $sp, 144 # 8-byte Folded Spill + st.d $s8, $sp, 136 # 8-byte Folded Spill b .LBB13_4 .LBB13_1: # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload ld.w $a0, $a0, 0 sltui $s5, $a0, 1 pcalau12i $a0, %got_pc_hi20(net_ranks) ld.d $a0, $a0, %got_pc_lo12(net_ranks) - ld.d $fp, $sp, 184 # 8-byte Folded Reload + ld.d $fp, $sp, 176 # 8-byte Folded Reload move $a1, $fp pcaddu18i $ra, %call36(find_teams_rank) jirl $ra, $ra, 0 @@ -1073,7 +1073,7 @@ display_info: # @display_info pcaddu18i $ra, %call36(find_teams_rank) jirl $ra, $ra, 0 move $a5, $a0 - movfr2gr.d $a3, $fs1 + movfr2gr.d $a3, $fs2 pcalau12i $a0, %pc_hi20(.L.str.28) addi.d $a1, $a0, %pc_lo12(.L.str.28) .LBB13_2: # %find_nth_place_team.exit.thread @@ -1105,7 +1105,7 @@ display_info: # @display_info # %bb.5: # in Loop: Header=BB13_4 Depth=1 ori $a1, $zero, 28 ori $a2, $zero, 1 - ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload .LBB13_6: # in Loop: Header=BB13_4 Depth=1 move $a3, $s0 pcaddu18i $ra, %call36(fwrite) @@ -1119,12 +1119,12 @@ display_info: # @display_info # in Loop: Header=BB13_4 Depth=1 ori $a0, $zero, 180 mul.d $a7, $s2, $a0 - ld.d $a1, $sp, 168 # 8-byte Folded Reload + ld.d $a1, $sp, 160 # 8-byte Folded Reload add.d $a4, $a1, $a7 ld.w $a3, $a4, 20 addi.d $a1, $a4, 12 ori $a2, $zero, 1 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 176 # 8-byte Folded Reload bne $a3, $a0, .LBB13_9 # %bb.8: # in Loop: Header=BB13_4 Depth=1 move $a3, $a0 @@ -1210,7 +1210,7 @@ display_info: # @display_info or $s4, $a3, $a2 ori $a2, $zero, 30 mul.d $a3, $a4, $a2 - ld.d $a2, $sp, 152 # 8-byte Folded Reload + ld.d $a2, $sp, 144 # 8-byte Folded Reload add.d $a2, $a2, $a3 addi.d $a2, $a2, 15 pcaddu18i $ra, %call36(fprintf) @@ -1258,7 +1258,7 @@ display_info: # @display_info pcaddu18i $ra, %call36(fwrite) jirl $ra, $ra, 0 .LBB13_21: # in Loop: Header=BB13_4 Depth=1 - ld.d $s8, $sp, 144 # 8-byte Folded Reload + ld.d $s8, $sp, 136 # 8-byte Folded Reload ori $s6, $zero, 116 move $s7, $s1 move $s1, $fp @@ -1268,12 +1268,12 @@ display_info: # @display_info bltu $a1, $a0, .LBB13_3 .LBB13_22: # in Loop: Header=BB13_4 Depth=1 slli.d $a0, $a0, 2 - ld.d $a1, $sp, 192 # 8-byte Folded Reload + ld.d $a1, $sp, 184 # 8-byte Folded Reload ldx.w $a0, $a1, $a0 add.d $a0, $a1, $a0 jr $a0 .LBB13_23: # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload ld.w $a1, $a0, 0 ld.w $a0, $a0, 4 addi.d $a3, $s2, -2 @@ -1371,7 +1371,7 @@ display_info: # @display_info addi.d $a3, $a0, %pc_lo12(.L.str.20) b .LBB13_77 .LBB13_46: # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.w $a2, $a0, 0 ld.w $a3, $a0, 4 ld.w $a4, $a0, 8 @@ -1383,11 +1383,11 @@ display_info: # @display_info addi.d $a0, $a0, %pc_lo12(.L.str.33) b .LBB13_69 .LBB13_48: # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.w $s4, $a0, 0 ld.w $s5, $a0, 4 ld.w $a0, $a0, 8 - st.d $a0, $sp, 64 # 8-byte Folded Spill + st.d $a0, $sp, 56 # 8-byte Folded Spill slti $a0, $s4, 10 slti $a1, $s5, 10 pcalau12i $a2, %pc_hi20(.L.str.22) @@ -1397,16 +1397,16 @@ display_info: # @display_info addi.d $a4, $a4, %pc_lo12(.L.str.25) maskeqz $a1, $a4, $a1 or $a1, $a1, $a3 - ld.d $a3, $sp, 176 # 8-byte Folded Reload + ld.d $a3, $sp, 168 # 8-byte Folded Reload ld.w $a3, $a3, 0 maskeqz $a1, $a1, $a0 masknez $a0, $a2, $a0 or $a0, $a1, $a0 - st.d $a0, $sp, 56 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill sltui $s8, $a3, 1 pcalau12i $a0, %got_pc_hi20(conf_standings) ld.d $a0, $a0, %got_pc_lo12(conf_standings) - ld.d $fp, $sp, 184 # 8-byte Folded Reload + ld.d $fp, $sp, 176 # 8-byte Folded Reload move $a1, $fp pcaddu18i $ra, %call36(find_teams_rank) jirl $ra, $ra, 0 @@ -1426,15 +1426,15 @@ display_info: # @display_info move $a0, $s0 move $a2, $s4 move $a3, $s5 - ld.d $a4, $sp, 64 # 8-byte Folded Reload - ld.d $a5, $sp, 56 # 8-byte Folded Reload + ld.d $a4, $sp, 56 # 8-byte Folded Reload + ld.d $a5, $sp, 48 # 8-byte Folded Reload move $a6, $s8 - ld.d $s8, $sp, 144 # 8-byte Folded Reload + ld.d $s8, $sp, 136 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 b .LBB13_3 .LBB13_49: # in Loop: Header=BB13_4 Depth=1 - ld.d $a4, $sp, 136 # 8-byte Folded Reload + ld.d $a4, $sp, 128 # 8-byte Folded Reload ld.w $a1, $a4, 12 ld.w $a0, $a4, 0 ld.w $a2, $a4, 4 @@ -1443,48 +1443,48 @@ display_info: # @display_info add.d $a0, $a2, $a0 add.w $a0, $a0, $a3 sub.w $s4, $a1, $a4 - fmov.d $fs1, $fs0 + fmov.d $fs2, $fs1 beqz $a0, .LBB13_1 # %bb.50: # in Loop: Header=BB13_4 Depth=1 movgr2fr.w $fa0, $s4 ffint.d.w $fa0, $fa0 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 - fdiv.d $fs1, $fa0, $fa1 + fdiv.d $fs2, $fa0, $fa1 b .LBB13_1 .LBB13_51: # in Loop: Header=BB13_4 Depth=1 - ld.d $a1, $sp, 176 # 8-byte Folded Reload + ld.d $a1, $sp, 168 # 8-byte Folded Reload ld.w $a0, $a1, 0 sltui $a0, $a0, 1 ld.w $a1, $a1, 4 - ld.d $a2, $sp, 128 # 8-byte Folded Reload + ld.d $a2, $sp, 120 # 8-byte Folded Reload masknez $a2, $a2, $a0 - ld.d $a3, $sp, 120 # 8-byte Folded Reload + ld.d $a3, $sp, 112 # 8-byte Folded Reload maskeqz $a0, $a3, $a0 or $a2, $a0, $a2 sltui $a0, $a1, 1 addi.d $a1, $a1, -1 sltui $a1, $a1, 1 - ld.d $a3, $sp, 96 # 8-byte Folded Reload + ld.d $a3, $sp, 88 # 8-byte Folded Reload masknez $a3, $a3, $a1 - ld.d $a4, $sp, 88 # 8-byte Folded Reload + ld.d $a4, $sp, 80 # 8-byte Folded Reload maskeqz $a1, $a4, $a1 or $a1, $a1, $a3 masknez $a1, $a1, $a0 - ld.d $a3, $sp, 80 # 8-byte Folded Reload + ld.d $a3, $sp, 72 # 8-byte Folded Reload maskeqz $a0, $a3, $a0 or $a3, $a0, $a1 move $a0, $s0 - ld.d $a1, $sp, 48 # 8-byte Folded Reload + ld.d $a1, $sp, 40 # 8-byte Folded Reload b .LBB13_75 .LBB13_52: # in Loop: Header=BB13_4 Depth=1 - ld.d $a3, $sp, 112 # 8-byte Folded Reload + ld.d $a3, $sp, 104 # 8-byte Folded Reload ld.w $a0, $a3, 0 ld.w $a1, $a3, 4 ld.w $a2, $a3, 8 add.d $a0, $a1, $a0 add.w $a0, $a0, $a2 - fmov.d $fa0, $fs0 + fmov.d $fa0, $fs1 beqz $a0, .LBB13_54 # %bb.53: # in Loop: Header=BB13_4 Depth=1 ld.w $a1, $a3, 12 @@ -1496,12 +1496,12 @@ display_info: # @display_info ffint.d.w $fa1, $fa1 fdiv.d $fa0, $fa0, $fa1 .LBB13_54: # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload ld.w $a0, $a0, 0 sltui $a0, $a0, 1 - ld.d $a1, $sp, 128 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload masknez $a1, $a1, $a0 - ld.d $a2, $sp, 120 # 8-byte Folded Reload + ld.d $a2, $sp, 112 # 8-byte Folded Reload maskeqz $a0, $a2, $a0 or $a2, $a0, $a1 movfr2gr.d $a3, $fa0 @@ -1511,10 +1511,10 @@ display_info: # @display_info .LBB13_55: # in Loop: Header=BB13_4 Depth=1 ori $a1, $zero, 38 ori $a2, $zero, 1 - ld.d $a0, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 32 # 8-byte Folded Reload b .LBB13_70 .LBB13_56: # in Loop: Header=BB13_4 Depth=1 - ld.d $a4, $sp, 104 # 8-byte Folded Reload + ld.d $a4, $sp, 96 # 8-byte Folded Reload ld.w $a1, $a4, 12 ld.w $a0, $a4, 0 ld.w $a2, $a4, 4 @@ -1523,7 +1523,7 @@ display_info: # @display_info add.d $a0, $a2, $a0 add.w $a0, $a0, $a3 sub.w $a4, $a1, $a4 - fmov.d $fa0, $fs0 + fmov.d $fa0, $fs1 beqz $a0, .LBB13_58 # %bb.57: # in Loop: Header=BB13_4 Depth=1 movgr2fr.w $fa0, $a4 @@ -1532,25 +1532,25 @@ display_info: # @display_info ffint.d.w $fa1, $fa1 fdiv.d $fa0, $fa0, $fa1 .LBB13_58: # in Loop: Header=BB13_4 Depth=1 - ld.d $a1, $sp, 176 # 8-byte Folded Reload + ld.d $a1, $sp, 168 # 8-byte Folded Reload ld.w $a0, $a1, 0 sltui $a0, $a0, 1 ld.w $a1, $a1, 4 - ld.d $a2, $sp, 128 # 8-byte Folded Reload + ld.d $a2, $sp, 120 # 8-byte Folded Reload masknez $a2, $a2, $a0 - ld.d $a3, $sp, 120 # 8-byte Folded Reload + ld.d $a3, $sp, 112 # 8-byte Folded Reload maskeqz $a0, $a3, $a0 or $a2, $a0, $a2 sltui $a0, $a1, 1 addi.d $a1, $a1, -1 sltui $a1, $a1, 1 - ld.d $a3, $sp, 96 # 8-byte Folded Reload + ld.d $a3, $sp, 88 # 8-byte Folded Reload masknez $a3, $a3, $a1 - ld.d $a5, $sp, 88 # 8-byte Folded Reload + ld.d $a5, $sp, 80 # 8-byte Folded Reload maskeqz $a1, $a5, $a1 or $a1, $a1, $a3 masknez $a1, $a1, $a0 - ld.d $a3, $sp, 80 # 8-byte Folded Reload + ld.d $a3, $sp, 72 # 8-byte Folded Reload maskeqz $a0, $a3, $a0 or $a3, $a0, $a1 movfr2gr.d $a5, $fa0 @@ -1561,7 +1561,7 @@ display_info: # @display_info jirl $ra, $ra, 0 b .LBB13_3 .LBB13_59: # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.w $a2, $a0, 0 ld.w $a3, $a0, 4 ld.w $a4, $a0, 8 @@ -1574,28 +1574,28 @@ display_info: # @display_info jirl $ra, $ra, 0 b .LBB13_3 .LBB13_61: # in Loop: Header=BB13_4 Depth=1 - ld.d $a3, $sp, 136 # 8-byte Folded Reload + ld.d $a3, $sp, 128 # 8-byte Folded Reload ld.w $a0, $a3, 0 ld.w $a1, $a3, 4 ld.w $a2, $a3, 8 ld.w $s4, $a3, 12 add.d $a0, $a1, $a0 add.w $a0, $a0, $a2 - fmov.d $fs1, $fs0 + fmov.d $fs2, $fs1 beqz $a0, .LBB13_63 # %bb.62: # in Loop: Header=BB13_4 Depth=1 movgr2fr.w $fa0, $s4 ffint.d.w $fa0, $fa0 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 - fdiv.d $fs1, $fa0, $fa1 + fdiv.d $fs2, $fa0, $fa1 .LBB13_63: # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload ld.w $a0, $a0, 0 sltui $s5, $a0, 1 pcalau12i $a0, %got_pc_hi20(offence_ranks) ld.d $a0, $a0, %got_pc_lo12(offence_ranks) - ld.d $fp, $sp, 184 # 8-byte Folded Reload + ld.d $fp, $sp, 176 # 8-byte Folded Reload move $a1, $fp pcaddu18i $ra, %call36(find_teams_rank) jirl $ra, $ra, 0 @@ -1609,7 +1609,7 @@ display_info: # @display_info pcaddu18i $ra, %call36(find_teams_rank) jirl $ra, $ra, 0 move $a5, $a0 - movfr2gr.d $a3, $fs1 + movfr2gr.d $a3, $fs2 pcalau12i $a0, %pc_hi20(.L.str.26) addi.d $a1, $a0, %pc_lo12(.L.str.26) b .LBB13_2 @@ -1618,28 +1618,28 @@ display_info: # @display_info addi.d $a0, $a0, %pc_lo12(.L.str.29) b .LBB13_69 .LBB13_65: # in Loop: Header=BB13_4 Depth=1 - ld.d $a3, $sp, 136 # 8-byte Folded Reload + ld.d $a3, $sp, 128 # 8-byte Folded Reload ld.w $a0, $a3, 0 ld.w $a1, $a3, 4 ld.w $a2, $a3, 8 ld.w $s4, $a3, 16 add.d $a0, $a1, $a0 add.w $a0, $a0, $a2 - fmov.d $fs1, $fs0 + fmov.d $fs2, $fs1 beqz $a0, .LBB13_67 # %bb.66: # in Loop: Header=BB13_4 Depth=1 movgr2fr.w $fa0, $s4 ffint.d.w $fa0, $fa0 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 - fdiv.d $fs1, $fa0, $fa1 + fdiv.d $fs2, $fa0, $fa1 .LBB13_67: # in Loop: Header=BB13_4 Depth=1 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload ld.w $a0, $a0, 0 sltui $s5, $a0, 1 pcalau12i $a0, %got_pc_hi20(defence_ranks) ld.d $a0, $a0, %got_pc_lo12(defence_ranks) - ld.d $fp, $sp, 184 # 8-byte Folded Reload + ld.d $fp, $sp, 176 # 8-byte Folded Reload move $a1, $fp pcaddu18i $ra, %call36(find_teams_rank) jirl $ra, $ra, 0 @@ -1653,7 +1653,7 @@ display_info: # @display_info pcaddu18i $ra, %call36(find_teams_rank) jirl $ra, $ra, 0 move $a5, $a0 - movfr2gr.d $a3, $fs1 + movfr2gr.d $a3, $fs2 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $a1, $a0, %pc_lo12(.L.str.27) b .LBB13_2 @@ -1671,7 +1671,7 @@ display_info: # @display_info jirl $ra, $ra, 0 b .LBB13_3 .LBB13_71: # in Loop: Header=BB13_4 Depth=1 - ld.d $a4, $sp, 136 # 8-byte Folded Reload + ld.d $a4, $sp, 128 # 8-byte Folded Reload ld.w $a1, $a4, 12 ld.w $a0, $a4, 0 ld.w $a2, $a4, 4 @@ -1680,7 +1680,7 @@ display_info: # @display_info add.d $a0, $a2, $a0 add.w $a0, $a0, $a3 sub.w $a2, $a1, $a4 - fmov.d $fa0, $fs0 + fmov.d $fa0, $fs1 beqz $a0, .LBB13_73 # %bb.72: # in Loop: Header=BB13_4 Depth=1 movgr2fr.w $fa0, $a2 @@ -1715,7 +1715,7 @@ display_info: # @display_info .LBB13_77: # in Loop: Header=BB13_4 Depth=1 ori $a0, $zero, 92 mul.d $a1, $s4, $a0 - ld.d $a2, $sp, 72 # 8-byte Folded Reload + ld.d $a2, $sp, 64 # 8-byte Folded Reload add.d $a0, $a2, $a1 ldx.w $a4, $a2, $a1 ld.w $a5, $a0, 4 @@ -1746,9 +1746,7 @@ display_info: # @display_info bcnez $fcc0, .LBB13_81 # %bb.79: # %.critedge213 # in Loop: Header=BB13_4 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI13_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI13_0) - fmadd.d $fa0, $fa0, $fa1, $fa2 + fmadd.d $fa0, $fa0, $fs0, $fa2 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 b .LBB13_82 @@ -1763,7 +1761,7 @@ display_info: # @display_info ld.w $a0, $a0, 16 ori $a2, $zero, 30 mul.d $a2, $s4, $a2 - ld.d $t1, $sp, 152 # 8-byte Folded Reload + ld.d $t1, $sp, 144 # 8-byte Folded Reload add.d $a2, $t1, $a2 addi.d $a2, $a2, 15 st.d $a0, $sp, 16 @@ -1776,6 +1774,7 @@ display_info: # @display_info jirl $ra, $ra, 0 b .LBB13_3 .LBB13_83: + fld.d $fs2, $sp, 192 # 8-byte Folded Reload fld.d $fs1, $sp, 200 # 8-byte Folded Reload fld.d $fs0, $sp, 208 # 8-byte Folded Reload ld.d $s8, $sp, 216 # 8-byte Folded Reload @@ -2527,28 +2526,24 @@ display_tiebreaker: # @display_tiebreaker .Lfunc_end16: .size display_tiebreaker, .Lfunc_end16-display_tiebreaker # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function display_records -.LCPI17_0: - .dword 0x408f400000000000 # double 1000 - .text - .globl display_records + .globl display_records # -- Begin function display_records .p2align 5 .type display_records,@function display_records: # @display_records # %bb.0: - addi.d $sp, $sp, -128 - st.d $ra, $sp, 120 # 8-byte Folded Spill - st.d $fp, $sp, 112 # 8-byte Folded Spill - st.d $s0, $sp, 104 # 8-byte Folded Spill - st.d $s1, $sp, 96 # 8-byte Folded Spill - st.d $s2, $sp, 88 # 8-byte Folded Spill - st.d $s3, $sp, 80 # 8-byte Folded Spill - st.d $s4, $sp, 72 # 8-byte Folded Spill - st.d $s5, $sp, 64 # 8-byte Folded Spill - st.d $s6, $sp, 56 # 8-byte Folded Spill - st.d $s7, $sp, 48 # 8-byte Folded Spill - st.d $s8, $sp, 40 # 8-byte Folded Spill + addi.d $sp, $sp, -144 + st.d $ra, $sp, 136 # 8-byte Folded Spill + st.d $fp, $sp, 128 # 8-byte Folded Spill + st.d $s0, $sp, 120 # 8-byte Folded Spill + st.d $s1, $sp, 112 # 8-byte Folded Spill + st.d $s2, $sp, 104 # 8-byte Folded Spill + st.d $s3, $sp, 96 # 8-byte Folded Spill + st.d $s4, $sp, 88 # 8-byte Folded Spill + st.d $s5, $sp, 80 # 8-byte Folded Spill + st.d $s6, $sp, 72 # 8-byte Folded Spill + st.d $s7, $sp, 64 # 8-byte Folded Spill + st.d $s8, $sp, 56 # 8-byte Folded Spill + fst.d $fs0, $sp, 48 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a0, $a0, %pc_lo12(.L.str.1) @@ -2589,7 +2584,7 @@ display_records: # @display_records jirl $ra, $ra, 0 ori $a1, $zero, 4 ori $a2, $zero, 1 - st.d $s1, $sp, 32 # 8-byte Folded Spill + st.d $s1, $sp, 40 # 8-byte Folded Spill move $a0, $s1 move $a3, $fp pcaddu18i $ra, %call36(fwrite) @@ -2603,7 +2598,7 @@ display_records: # @display_records jirl $ra, $ra, 0 pcalau12i $a0, %got_pc_hi20(conf_standings) ld.d $a0, $a0, %got_pc_lo12(conf_standings) - st.d $a0, $sp, 24 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(abs_standings) ld.d $s2, $a0, %got_pc_lo12(abs_standings) pcalau12i $a0, %got_pc_hi20(team) @@ -2614,16 +2609,20 @@ display_records: # @display_records ld.d $s7, $a0, %got_pc_lo12(team_info) pcalau12i $a0, %pc_hi20(.L.str.22) addi.d $a0, $a0, %pc_lo12(.L.str.22) - st.d $a0, $sp, 8 # 8-byte Folded Spill + st.d $a0, $sp, 16 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.69) addi.d $a0, $a0, %pc_lo12(.L.str.69) - st.d $a0, $sp, 16 # 8-byte Folded Spill + st.d $a0, $sp, 24 # 8-byte Folded Spill move $s6, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs0, $a0 b .LBB17_3 .p2align 4, , 16 .LBB17_1: # in Loop: Header=BB17_3 Depth=1 move $a6, $zero - ld.d $a5, $sp, 8 # 8-byte Folded Reload + ld.d $a5, $sp, 16 # 8-byte Folded Reload .LBB17_2: # %.critedge101.thread # in Loop: Header=BB17_3 Depth=1 pcalau12i $a0, %pc_hi20(.L.str.70) @@ -2636,7 +2635,7 @@ display_records: # @display_records ori $a0, $zero, 56 beq $s6, $a0, .LBB17_16 .LBB17_3: # =>This Inner Loop Header: Depth=1 - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 32 # 8-byte Folded Reload add.d $s4, $a0, $s6 ld.w $s5, $s4, 4 move $a0, $s2 @@ -2705,16 +2704,14 @@ display_records: # @display_records bcnez $fcc0, .LBB17_9 # %bb.7: # %.critedge # in Loop: Header=BB17_3 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI17_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI17_0) - fmadd.d $fa0, $fa0, $fa1, $fa2 + fmadd.d $fa0, $fa0, $fs0, $fa2 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a6, $fa0 b .LBB17_10 .p2align 4, , 16 .LBB17_8: # in Loop: Header=BB17_3 Depth=1 move $a6, $zero - ld.d $a5, $sp, 8 # 8-byte Folded Reload + ld.d $a5, $sp, 16 # 8-byte Folded Reload b .LBB17_10 .p2align 4, , 16 .LBB17_9: # in Loop: Header=BB17_3 Depth=1 @@ -2722,12 +2719,12 @@ display_records: # @display_records .LBB17_10: # %.critedge.thread # in Loop: Header=BB17_3 Depth=1 move $a0, $fp - ld.d $a1, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 24 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ori $a1, $zero, 4 ori $a2, $zero, 1 - ld.d $a0, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 40 # 8-byte Folded Reload move $a3, $fp pcaddu18i $ra, %call36(fwrite) jirl $ra, $ra, 0 @@ -2796,9 +2793,7 @@ display_records: # @display_records bcnez $fcc0, .LBB17_15 # %bb.14: # %.critedge101 # in Loop: Header=BB17_3 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI17_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI17_0) - fmadd.d $fa0, $fa0, $fa1, $fa2 + fmadd.d $fa0, $fa0, $fs0, $fa2 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a6, $fa0 b .LBB17_2 @@ -2812,18 +2807,19 @@ display_records: # @display_records ori $a1, $zero, 30 ori $a2, $zero, 1 move $a3, $fp - ld.d $s8, $sp, 40 # 8-byte Folded Reload - ld.d $s7, $sp, 48 # 8-byte Folded Reload - ld.d $s6, $sp, 56 # 8-byte Folded Reload - ld.d $s5, $sp, 64 # 8-byte Folded Reload - ld.d $s4, $sp, 72 # 8-byte Folded Reload - ld.d $s3, $sp, 80 # 8-byte Folded Reload - ld.d $s2, $sp, 88 # 8-byte Folded Reload - ld.d $s1, $sp, 96 # 8-byte Folded Reload - ld.d $s0, $sp, 104 # 8-byte Folded Reload - ld.d $fp, $sp, 112 # 8-byte Folded Reload - ld.d $ra, $sp, 120 # 8-byte Folded Reload - addi.d $sp, $sp, 128 + fld.d $fs0, $sp, 48 # 8-byte Folded Reload + ld.d $s8, $sp, 56 # 8-byte Folded Reload + ld.d $s7, $sp, 64 # 8-byte Folded Reload + ld.d $s6, $sp, 72 # 8-byte Folded Reload + ld.d $s5, $sp, 80 # 8-byte Folded Reload + ld.d $s4, $sp, 88 # 8-byte Folded Reload + ld.d $s3, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload + ld.d $s1, $sp, 112 # 8-byte Folded Reload + ld.d $s0, $sp, 120 # 8-byte Folded Reload + ld.d $fp, $sp, 128 # 8-byte Folded Reload + ld.d $ra, $sp, 136 # 8-byte Folded Reload + addi.d $sp, $sp, 144 pcaddu18i $t8, %call36(fwrite) jr $t8 .Lfunc_end17: @@ -4676,28 +4672,24 @@ display_net: # @display_net .Lfunc_end24: .size display_net, .Lfunc_end24-display_net # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function display_standings -.LCPI25_0: - .dword 0x408f400000000000 # double 1000 - .text - .globl display_standings + .globl display_standings # -- Begin function display_standings .p2align 5 .type display_standings,@function display_standings: # @display_standings # %bb.0: - addi.d $sp, $sp, -144 - st.d $ra, $sp, 136 # 8-byte Folded Spill - st.d $fp, $sp, 128 # 8-byte Folded Spill - st.d $s0, $sp, 120 # 8-byte Folded Spill - st.d $s1, $sp, 112 # 8-byte Folded Spill - st.d $s2, $sp, 104 # 8-byte Folded Spill - st.d $s3, $sp, 96 # 8-byte Folded Spill - st.d $s4, $sp, 88 # 8-byte Folded Spill - st.d $s5, $sp, 80 # 8-byte Folded Spill - st.d $s6, $sp, 72 # 8-byte Folded Spill - st.d $s7, $sp, 64 # 8-byte Folded Spill - st.d $s8, $sp, 56 # 8-byte Folded Spill + addi.d $sp, $sp, -160 + st.d $ra, $sp, 152 # 8-byte Folded Spill + st.d $fp, $sp, 144 # 8-byte Folded Spill + st.d $s0, $sp, 136 # 8-byte Folded Spill + st.d $s1, $sp, 128 # 8-byte Folded Spill + st.d $s2, $sp, 120 # 8-byte Folded Spill + st.d $s3, $sp, 112 # 8-byte Folded Spill + st.d $s4, $sp, 104 # 8-byte Folded Spill + st.d $s5, $sp, 96 # 8-byte Folded Spill + st.d $s6, $sp, 88 # 8-byte Folded Spill + st.d $s7, $sp, 80 # 8-byte Folded Spill + st.d $s8, $sp, 72 # 8-byte Folded Spill + fst.d $fs0, $sp, 64 # 8-byte Folded Spill move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a0, $a0, %pc_lo12(.L.str.1) @@ -4732,7 +4724,7 @@ display_standings: # @display_standings addi.d $a0, $a0, %pc_lo12(.L.str.99) ori $a1, $zero, 3 ori $a2, $zero, 1 - st.d $a0, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill move $a3, $fp pcaddu18i $ra, %call36(fwrite) jirl $ra, $ra, 0 @@ -4754,15 +4746,19 @@ display_standings: # @display_standings addi.d $s8, $a0, %pc_lo12(.L.str.20) pcalau12i $a0, %pc_hi20(.L.str.21) addi.d $a0, $a0, %pc_lo12(.L.str.21) - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 56 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.22) addi.d $s3, $a0, %pc_lo12(.L.str.22) pcalau12i $a0, %got_pc_hi20(team_info) ld.d $s6, $a0, %got_pc_lo12(team_info) pcalau12i $a0, %pc_hi20(.L.str.19) - addi.d $s1, $a0, %pc_lo12(.L.str.19) + addi.d $a0, $a0, %pc_lo12(.L.str.19) + st.d $a0, $sp, 40 # 8-byte Folded Spill move $s5, $zero - st.d $s2, $sp, 48 # 8-byte Folded Spill + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs0, $a0 b .LBB25_3 .p2align 4, , 16 .LBB25_1: # in Loop: Header=BB25_3 Depth=1 @@ -4789,10 +4785,11 @@ display_standings: # @display_standings pcaddu18i $ra, %call36(fputc) jirl $ra, $ra, 0 addi.d $s5, $s5, 4 - ld.d $s2, $sp, 48 # 8-byte Folded Reload + move $s2, $s1 ori $a0, $zero, 20 beq $s5, $a0, .LBB25_16 .LBB25_3: # =>This Inner Loop Header: Depth=1 + move $s1, $s2 add.d $s0, $s2, $s5 ld.w $s2, $s0, 4 move $a0, $s2 @@ -4805,7 +4802,7 @@ display_standings: # @display_standings pcaddu18i $ra, %call36(a_wild_card) jirl $ra, $ra, 0 sltui $a0, $a0, 1 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a1, $sp, 56 # 8-byte Folded Reload masknez $a1, $a1, $a0 maskeqz $a0, $s3, $a0 or $a3, $a0, $a1 @@ -4843,9 +4840,7 @@ display_standings: # @display_standings bcnez $fcc0, .LBB25_9 # %bb.7: # %.critedge # in Loop: Header=BB25_3 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI25_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI25_0) - fmadd.d $fa0, $fa0, $fa1, $fa2 + fmadd.d $fa0, $fa0, $fs0, $fa2 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 b .LBB25_10 @@ -4869,12 +4864,12 @@ display_standings: # @display_standings st.d $t0, $sp, 8 st.d $a1, $sp, 0 move $a0, $fp - move $a1, $s1 + ld.d $a1, $sp, 40 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ori $a1, $zero, 3 ori $a2, $zero, 1 - ld.d $a0, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 48 # 8-byte Folded Reload move $a3, $fp pcaddu18i $ra, %call36(fwrite) jirl $ra, $ra, 0 @@ -4889,7 +4884,7 @@ display_standings: # @display_standings pcaddu18i $ra, %call36(a_wild_card) jirl $ra, $ra, 0 sltui $a0, $a0, 1 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a1, $sp, 56 # 8-byte Folded Reload masknez $a1, $a1, $a0 maskeqz $a0, $s3, $a0 or $a3, $a0, $a1 @@ -4925,9 +4920,7 @@ display_standings: # @display_standings bcnez $fcc0, .LBB25_15 # %bb.14: # %.critedge270 # in Loop: Header=BB25_3 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI25_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI25_0) - fmadd.d $fa0, $fa0, $fa1, $fa2 + fmadd.d $fa0, $fa0, $fs0, $fa2 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 b .LBB25_2 @@ -4949,10 +4942,15 @@ display_standings: # @display_standings addi.d $s3, $a0, %pc_lo12(.L.str.22) pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $a0, $a0, %pc_lo12(.L.str.19) - st.d $a0, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.99) - addi.d $s1, $a0, %pc_lo12(.L.str.99) + addi.d $a0, $a0, %pc_lo12(.L.str.99) + st.d $a0, $sp, 40 # 8-byte Folded Spill move $s5, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs0, $a0 b .LBB25_19 .p2align 4, , 16 .LBB25_17: # in Loop: Header=BB25_19 Depth=1 @@ -4979,7 +4977,7 @@ display_standings: # @display_standings pcaddu18i $ra, %call36(fputc) jirl $ra, $ra, 0 addi.d $s5, $s5, 4 - ld.d $s2, $sp, 48 # 8-byte Folded Reload + move $s2, $s1 ori $a0, $zero, 16 beq $s5, $a0, .LBB25_32 .LBB25_19: # =>This Inner Loop Header: Depth=1 @@ -4995,7 +4993,7 @@ display_standings: # @display_standings pcaddu18i $ra, %call36(a_wild_card) jirl $ra, $ra, 0 sltui $a0, $a0, 1 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a1, $sp, 56 # 8-byte Folded Reload masknez $a1, $a1, $a0 maskeqz $a0, $s3, $a0 or $a3, $a0, $a1 @@ -5031,9 +5029,7 @@ display_standings: # @display_standings bcnez $fcc0, .LBB25_25 # %bb.23: # %.critedge272 # in Loop: Header=BB25_19 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI25_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI25_0) - fmadd.d $fa0, $fa0, $fa1, $fa2 + fmadd.d $fa0, $fa0, $fs0, $fa2 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 b .LBB25_26 @@ -5057,12 +5053,12 @@ display_standings: # @display_standings st.d $t0, $sp, 8 st.d $a1, $sp, 0 move $a0, $fp - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ld.d $a1, $sp, 48 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ori $a1, $zero, 3 ori $a2, $zero, 1 - move $a0, $s1 + ld.d $a0, $sp, 40 # 8-byte Folded Reload move $a3, $fp pcaddu18i $ra, %call36(fwrite) jirl $ra, $ra, 0 @@ -5077,7 +5073,7 @@ display_standings: # @display_standings pcaddu18i $ra, %call36(a_wild_card) jirl $ra, $ra, 0 sltui $a0, $a0, 1 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a1, $sp, 56 # 8-byte Folded Reload masknez $a1, $a1, $a0 maskeqz $a0, $s3, $a0 or $a3, $a0, $a1 @@ -5113,9 +5109,7 @@ display_standings: # @display_standings bcnez $fcc0, .LBB25_31 # %bb.30: # %.critedge274 # in Loop: Header=BB25_19 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI25_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI25_0) - fmadd.d $fa0, $fa0, $fa1, $fa2 + fmadd.d $fa0, $fa0, $fs0, $fa2 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 b .LBB25_18 @@ -5140,7 +5134,7 @@ display_standings: # @display_standings pcaddu18i $ra, %call36(a_wild_card) jirl $ra, $ra, 0 sltui $a0, $a0, 1 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a1, $sp, 56 # 8-byte Folded Reload masknez $a1, $a1, $a0 pcalau12i $a2, %pc_hi20(.L.str.22) addi.d $a2, $a2, %pc_lo12(.L.str.22) @@ -5178,9 +5172,12 @@ display_standings: # @display_standings or $a7, $a7, $a1 bcnez $fcc0, .LBB25_39 # %bb.37: # %.critedge276 - pcalau12i $a1, %pc_hi20(.LCPI25_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI25_0) - fmadd.d $fa0, $fa0, $fa1, $fa2 + vldi $vr1, -928 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa2, $a1 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 b .LBB25_40 @@ -5234,7 +5231,7 @@ display_standings: # @display_standings pcaddu18i $ra, %call36(a_wild_card) jirl $ra, $ra, 0 sltui $a0, $a0, 1 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a1, $sp, 56 # 8-byte Folded Reload masknez $a1, $a1, $a0 pcalau12i $a2, %pc_hi20(.L.str.22) addi.d $a2, $a2, %pc_lo12(.L.str.22) @@ -5274,9 +5271,12 @@ display_standings: # @display_standings or $a7, $a7, $a1 bcnez $fcc0, .LBB25_47 # %bb.45: # %.critedge278 - pcalau12i $a1, %pc_hi20(.LCPI25_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI25_0) - fmadd.d $fa0, $fa0, $fa1, $fa2 + vldi $vr1, -928 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa2, $a1 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 b .LBB25_48 @@ -5295,7 +5295,7 @@ display_standings: # @display_standings pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $a1, $a0, %pc_lo12(.L.str.19) move $a0, $fp - st.d $a1, $sp, 32 # 8-byte Folded Spill + st.d $a1, $sp, 48 # 8-byte Folded Spill pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ori $a0, $zero, 10 @@ -5305,8 +5305,13 @@ display_standings: # @display_standings pcalau12i $a0, %pc_hi20(.L.str.20) addi.d $s8, $a0, %pc_lo12(.L.str.20) pcalau12i $a0, %pc_hi20(.L.str.99) - addi.d $s1, $a0, %pc_lo12(.L.str.99) + addi.d $a0, $a0, %pc_lo12(.L.str.99) + st.d $a0, $sp, 40 # 8-byte Folded Spill move $s0, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs0, $a0 b .LBB25_51 .p2align 4, , 16 .LBB25_49: # in Loop: Header=BB25_51 Depth=1 @@ -5333,7 +5338,7 @@ display_standings: # @display_standings pcaddu18i $ra, %call36(fputc) jirl $ra, $ra, 0 addi.d $s0, $s0, 4 - ld.d $s2, $sp, 48 # 8-byte Folded Reload + move $s2, $s1 ori $a0, $zero, 16 beq $s0, $a0, .LBB25_64 .LBB25_51: # =>This Inner Loop Header: Depth=1 @@ -5349,7 +5354,7 @@ display_standings: # @display_standings pcaddu18i $ra, %call36(a_wild_card) jirl $ra, $ra, 0 sltui $a0, $a0, 1 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a1, $sp, 56 # 8-byte Folded Reload masknez $a1, $a1, $a0 maskeqz $a0, $s5, $a0 or $a3, $a0, $a1 @@ -5385,9 +5390,7 @@ display_standings: # @display_standings bcnez $fcc0, .LBB25_57 # %bb.55: # %.critedge280 # in Loop: Header=BB25_51 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI25_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI25_0) - fmadd.d $fa0, $fa0, $fa1, $fa2 + fmadd.d $fa0, $fa0, $fs0, $fa2 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 b .LBB25_58 @@ -5411,12 +5414,12 @@ display_standings: # @display_standings st.d $t0, $sp, 8 st.d $a1, $sp, 0 move $a0, $fp - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ld.d $a1, $sp, 48 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ori $a1, $zero, 3 ori $a2, $zero, 1 - move $a0, $s1 + ld.d $a0, $sp, 40 # 8-byte Folded Reload move $a3, $fp pcaddu18i $ra, %call36(fwrite) jirl $ra, $ra, 0 @@ -5431,7 +5434,7 @@ display_standings: # @display_standings pcaddu18i $ra, %call36(a_wild_card) jirl $ra, $ra, 0 sltui $a0, $a0, 1 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a1, $sp, 56 # 8-byte Folded Reload masknez $a1, $a1, $a0 maskeqz $a0, $s5, $a0 or $a3, $a0, $a1 @@ -5467,9 +5470,7 @@ display_standings: # @display_standings bcnez $fcc0, .LBB25_63 # %bb.62: # %.critedge282 # in Loop: Header=BB25_51 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI25_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI25_0) - fmadd.d $fa0, $fa0, $fa1, $fa2 + fmadd.d $fa0, $fa0, $fs0, $fa2 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 b .LBB25_50 @@ -5480,18 +5481,19 @@ display_standings: # @display_standings .LBB25_64: ori $a0, $zero, 10 move $a1, $fp - ld.d $s8, $sp, 56 # 8-byte Folded Reload - ld.d $s7, $sp, 64 # 8-byte Folded Reload - ld.d $s6, $sp, 72 # 8-byte Folded Reload - ld.d $s5, $sp, 80 # 8-byte Folded Reload - ld.d $s4, $sp, 88 # 8-byte Folded Reload - ld.d $s3, $sp, 96 # 8-byte Folded Reload - ld.d $s2, $sp, 104 # 8-byte Folded Reload - ld.d $s1, $sp, 112 # 8-byte Folded Reload - ld.d $s0, $sp, 120 # 8-byte Folded Reload - ld.d $fp, $sp, 128 # 8-byte Folded Reload - ld.d $ra, $sp, 136 # 8-byte Folded Reload - addi.d $sp, $sp, 144 + fld.d $fs0, $sp, 64 # 8-byte Folded Reload + ld.d $s8, $sp, 72 # 8-byte Folded Reload + ld.d $s7, $sp, 80 # 8-byte Folded Reload + ld.d $s6, $sp, 88 # 8-byte Folded Reload + ld.d $s5, $sp, 96 # 8-byte Folded Reload + ld.d $s4, $sp, 104 # 8-byte Folded Reload + ld.d $s3, $sp, 112 # 8-byte Folded Reload + ld.d $s2, $sp, 120 # 8-byte Folded Reload + ld.d $s1, $sp, 128 # 8-byte Folded Reload + ld.d $s0, $sp, 136 # 8-byte Folded Reload + ld.d $fp, $sp, 144 # 8-byte Folded Reload + ld.d $ra, $sp, 152 # 8-byte Folded Reload + addi.d $sp, $sp, 160 pcaddu18i $t8, %call36(fputc) jr $t8 .Lfunc_end25: diff --git a/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/alabel.s b/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/alabel.s index aca53ef9..3a3f7075 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/alabel.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/alabel.s @@ -1,14 +1,6 @@ .file "alabel.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function alabel -.LCPI0_0: - .dword 0x3ff199999999999a # double 1.1000000000000001 -.LCPI0_1: - .dword 0x4052000000000000 # double 72 -.LCPI0_2: - .dword 0x4054000000000000 # double 80 .text - .globl alabel + .globl alabel # -- Begin function alabel .p2align 5 .type alabel,@function alabel: # @alabel @@ -39,18 +31,21 @@ alabel: # @alabel andi $a1, $a2, 255 beqz $a1, .LBB0_4 # %bb.3: - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_0) addi.d $a1, $s0, -116 sltui $a1, $a1, 1 addi.d $a2, $s0, -99 sltui $a2, $a2, 1 - vldi $vr1, -928 - movgr2fr.d $fa2, $zero + vldi $vr0, -928 + movgr2fr.d $fa1, $zero movgr2cf $fcc0, $a2 - fsel $fa1, $fa2, $fa1, $fcc0 - movgr2cf $fcc0, $a1 fsel $fa0, $fa1, $fa0, $fcc0 + lu12i.w $a2, -419431 + ori $a2, $a2, 2458 + lu32i.d $a2, 104857 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa1, $a2 + movgr2cf $fcc0, $a1 + fsel $fa0, $fa0, $fa1, $fcc0 addi.d $a1, $fp, -114 sltui $a1, $a1, 1 addi.d $a2, $fp, -99 @@ -106,14 +101,18 @@ alabel: # @alabel fld.d $fa3, $a5, 0 fadd.d $fa2, $fa2, $fa3 movgr2fr.w $fa3, $a4 - pcalau12i $a5, %pc_hi20(.LCPI0_1) - fld.d $fa4, $a5, %pc_lo12(.LCPI0_1) - pcalau12i $a5, %pc_hi20(.LCPI0_2) - fld.d $fa5, $a5, %pc_lo12(.LCPI0_2) ffint.d.w $fa3, $fa3 fmul.d $fa0, $fa0, $fa3 - fmul.d $fa0, $fa0, $fa4 - fdiv.d $fa0, $fa0, $fa5 + ori $a5, $zero, 0 + ori $a6, $zero, 0 + lu32i.d $a6, 131072 + lu52i.d $a6, $a6, 1029 + movgr2fr.d $fa3, $a6 + fmul.d $fa0, $fa0, $fa3 + lu32i.d $a5, 262144 + lu52i.d $a5, $a5, 1029 + movgr2fr.d $fa3, $a5 + fdiv.d $fa0, $fa0, $fa3 fadd.d $fa0, $fa0, $fa2 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a5, $fa0 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/fill.s b/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/fill.s index 19f9a8b4..97459f4f 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/fill.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/fill.s @@ -1,10 +1,6 @@ .file "fill.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fill -.LCPI0_0: - .dword 0x40efffc000000000 # double 65534 .text - .globl fill + .globl fill # -- Begin function fill .p2align 5 .type fill,@function fill: # @fill @@ -15,12 +11,14 @@ fill: # @fill movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 vldi $vr1, -784 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_0) fadd.d $fa0, $fa0, $fa1 vldi $vr1, -1004 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fa0, $fa0, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, -64 + lu52i.d $a0, $a0, 1038 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a2, $fa0 ori $a0, $zero, 5 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/linemod.s b/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/linemod.s index 5744c830..5ba0e290 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/linemod.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/linemod.s @@ -1,10 +1,6 @@ .file "linemod.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function linemod -.LCPI0_0: - .word 0x4479c000 # float 999 .text - .globl linemod + .globl linemod # -- Begin function linemod .p2align 5 .type linemod,@function linemod: # @linemod @@ -97,8 +93,8 @@ linemod: # @linemod vldi $vr0, -1264 b .LBB0_11 .LBB0_13: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 280476 + movgr2fr.w $fa0, $a0 ori $s0, $zero, 2 b .LBB0_11 .LBB0_14: diff --git a/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/rotate.s b/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/rotate.s index fc7b244e..cc12749c 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/rotate.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/plot2fig/CMakeFiles/plot2fig.dir/rotate.s @@ -1,23 +1,22 @@ .file "rotate.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function rotate -.LCPI0_0: - .dword 0x400921fb54442d18 # double 3.1415926535897931 -.LCPI0_1: - .dword 0x4066800000000000 # double 180 .text - .globl rotate + .globl rotate # -- Begin function rotate .p2align 5 .type rotate,@function rotate: # @rotate # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_1) - movgr2fr.w $fa2, $a2 - ffint.d.w $fa2, $fa2 - fmul.d $fa0, $fa2, $fa0 + movgr2fr.w $fa0, $a2 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, 425984 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 pcalau12i $a0, %pc_hi20(text_rotation) diff --git a/results/MultiSource/Benchmarks/Prolangs-C/simulator/CMakeFiles/simulator.dir/instruct.s b/results/MultiSource/Benchmarks/Prolangs-C/simulator/CMakeFiles/simulator.dir/instruct.s index 243816df..1f7c369d 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/simulator/CMakeFiles/simulator.dir/instruct.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/simulator/CMakeFiles/simulator.dir/instruct.s @@ -1026,14 +1026,7 @@ LDX_P: # @LDX_P .Lfunc_end15: .size LDX_P, .Lfunc_end15-LDX_P # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function MUL_P -.LCPI16_0: - .dword 0x4160000000000000 # double 8388608 -.LCPI16_1: - .dword 0x3e70000000000000 # double 5.9604644775390625E-8 - .text - .globl MUL_P + .globl MUL_P # -- Begin function MUL_P .p2align 5 .type MUL_P,@function MUL_P: # @MUL_P @@ -1064,11 +1057,11 @@ MUL_P: # @MUL_P # %bb.2: movgr2fr.w $fa0, $a1 ffint.d.w $fa0, $fa0 - pcalau12i $a5, %pc_hi20(.LCPI16_0) - fld.d $fa1, $a5, %pc_lo12(.LCPI16_0) - movgr2fr.w $fa2, $a2 - ffint.d.w $fa2, $fa2 - fmul.d $fa0, $fa0, $fa2 + movgr2fr.w $fa1, $a2 + ffint.d.w $fa1, $fa1 + fmul.d $fa0, $fa0, $fa1 + lu52i.d $a5, $zero, 1046 + movgr2fr.d $fa1, $a5 fcmp.cle.d $fcc0, $fa1, $fa0 ori $a5, $zero, 1 bcnez $fcc0, .LBB16_4 @@ -1086,9 +1079,7 @@ MUL_P: # @MUL_P sub.d $a6, $a6, $a1 movgr2fr.w $fa0, $a6 ffint.d.w $fa0, $fa0 - pcalau12i $a6, %pc_hi20(.LCPI16_1) - fld.d $fa1, $a6, %pc_lo12(.LCPI16_1) - movgr2fr.w $fa2, $a2 + movgr2fr.w $fa1, $a2 b .LBB16_8 .LBB16_6: xori $a6, $a3, 1 @@ -1099,21 +1090,21 @@ MUL_P: # @MUL_P ffint.d.w $fa0, $fa0 lu12i.w $a6, 4096 sub.d $a6, $a6, $a2 - pcalau12i $a7, %pc_hi20(.LCPI16_1) - fld.d $fa1, $a7, %pc_lo12(.LCPI16_1) - movgr2fr.w $fa2, $a6 + movgr2fr.w $fa1, $a6 .LBB16_8: - ffint.d.w $fa2, $fa2 - fmul.d $fa0, $fa2, $fa0 + ffint.d.w $fa1, $fa1 + fmul.d $fa0, $fa1, $fa0 + lu52i.d $a6, $zero, 999 + movgr2fr.d $fa1, $a6 fmul.d $fa1, $fa0, $fa1 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a6, $fa1 slli.d $a6, $a6, 24 - pcalau12i $a7, %pc_hi20(.LCPI16_0) - fld.d $fa1, $a7, %pc_lo12(.LCPI16_0) - movgr2fr.w $fa2, $a6 - ffint.d.w $fa2, $fa2 - fsub.d $fa0, $fa0, $fa2 + movgr2fr.w $fa1, $a6 + ffint.d.w $fa1, $fa1 + fsub.d $fa0, $fa0, $fa1 + lu52i.d $a6, $zero, 1046 + movgr2fr.d $fa1, $a6 fcmp.clt.d $fcc0, $fa1, $fa0 ori $a6, $zero, 1 bcnez $fcc0, .LBB16_11 @@ -1134,11 +1125,11 @@ MUL_P: # @MUL_P movgr2fr.w $fa0, $a4 ffint.d.w $fa0, $fa0 sub.d $a2, $a3, $a2 - pcalau12i $a3, %pc_hi20(.LCPI16_0) - fld.d $fa1, $a3, %pc_lo12(.LCPI16_0) - movgr2fr.w $fa2, $a2 - ffint.d.w $fa2, $fa2 - fmul.d $fa0, $fa2, $fa0 + movgr2fr.w $fa1, $a2 + ffint.d.w $fa1, $fa1 + fmul.d $fa0, $fa1, $fa0 + lu52i.d $a2, $zero, 1046 + movgr2fr.d $fa1, $a2 fcmp.cle.d $fcc0, $fa1, $fa0 ori $s0, $zero, 1 bcnez $fcc0, .LBB16_15 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/simulator/CMakeFiles/simulator.dir/instruct2.s b/results/MultiSource/Benchmarks/Prolangs-C/simulator/CMakeFiles/simulator.dir/instruct2.s index d9e93e52..93640bff 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/simulator/CMakeFiles/simulator.dir/instruct2.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/simulator/CMakeFiles/simulator.dir/instruct2.s @@ -380,14 +380,7 @@ DIVR_P: # @DIVR_P .Lfunc_end4: .size DIVR_P, .Lfunc_end4-DIVR_P # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function MULR_P -.LCPI5_0: - .dword 0x4160000000000000 # double 8388608 -.LCPI5_1: - .dword 0x3e70000000000000 # double 5.9604644775390625E-8 - .text - .globl MULR_P + .globl MULR_P # -- Begin function MULR_P .p2align 5 .type MULR_P,@function MULR_P: # @MULR_P @@ -431,11 +424,11 @@ MULR_P: # @MULR_P # %bb.5: movgr2fr.w $fa0, $a4 ffint.d.w $fa0, $fa0 - pcalau12i $a5, %pc_hi20(.LCPI5_0) - fld.d $fa1, $a5, %pc_lo12(.LCPI5_0) - movgr2fr.w $fa2, $a7 - ffint.d.w $fa2, $fa2 - fmul.d $fa0, $fa2, $fa0 + movgr2fr.w $fa1, $a7 + ffint.d.w $fa1, $fa1 + fmul.d $fa0, $fa1, $fa0 + lu52i.d $a5, $zero, 1046 + movgr2fr.d $fa1, $a5 fcmp.cle.d $fcc0, $fa1, $fa0 ori $a5, $zero, 1 bcnez $fcc0, .LBB5_7 @@ -451,24 +444,24 @@ MULR_P: # @MULR_P bnez $a7, .LBB5_10 # %bb.8: lu12i.w $a7, 4096 + ld.w $t0, $a6, 0 sub.d $a7, $a7, $a4 movgr2fr.w $fa0, $a7 - ld.w $a7, $a6, 0 ffint.d.w $fa0, $fa0 - pcalau12i $t0, %pc_hi20(.LCPI5_1) - fld.d $fa1, $t0, %pc_lo12(.LCPI5_1) - movgr2fr.w $fa2, $a7 - ffint.d.w $fa2, $fa2 - fmul.d $fa0, $fa0, $fa2 + movgr2fr.w $fa1, $t0 + ffint.d.w $fa1, $fa1 + fmul.d $fa0, $fa0, $fa1 + lu52i.d $a7, $zero, 999 + movgr2fr.d $fa1, $a7 fmul.d $fa1, $fa0, $fa1 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a7, $fa1 slli.d $a7, $a7, 24 - pcalau12i $t0, %pc_hi20(.LCPI5_0) - fld.d $fa1, $t0, %pc_lo12(.LCPI5_0) - movgr2fr.w $fa2, $a7 - ffint.d.w $fa2, $fa2 - fsub.d $fa0, $fa0, $fa2 + movgr2fr.w $fa1, $a7 + ffint.d.w $fa1, $fa1 + fsub.d $fa0, $fa0, $fa1 + lu52i.d $a7, $zero, 1046 + movgr2fr.d $fa1, $a7 fcmp.cult.d $fcc0, $fa0, $fa1 ori $a7, $zero, 1 bcnez $fcc0, .LBB5_12 @@ -486,20 +479,20 @@ MULR_P: # @MULR_P ffint.d.w $fa0, $fa0 lu12i.w $t0, 4096 sub.d $a7, $t0, $a7 - pcalau12i $t0, %pc_hi20(.LCPI5_1) - fld.d $fa1, $t0, %pc_lo12(.LCPI5_1) - movgr2fr.w $fa2, $a7 - ffint.d.w $fa2, $fa2 - fmul.d $fa0, $fa0, $fa2 + movgr2fr.w $fa1, $a7 + ffint.d.w $fa1, $fa1 + fmul.d $fa0, $fa0, $fa1 + lu52i.d $a7, $zero, 999 + movgr2fr.d $fa1, $a7 fmul.d $fa1, $fa0, $fa1 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a7, $fa1 slli.d $a7, $a7, 24 - pcalau12i $t0, %pc_hi20(.LCPI5_0) - fld.d $fa1, $t0, %pc_lo12(.LCPI5_0) - movgr2fr.w $fa2, $a7 - ffint.d.w $fa2, $fa2 - fsub.d $fa0, $fa0, $fa2 + movgr2fr.w $fa1, $a7 + ffint.d.w $fa1, $fa1 + fsub.d $fa0, $fa0, $fa1 + lu52i.d $a7, $zero, 1046 + movgr2fr.d $fa1, $a7 fcmp.cle.d $fcc0, $fa1, $fa0 ori $a7, $zero, 1 bcnez $fcc0, .LBB5_9 @@ -520,11 +513,11 @@ MULR_P: # @MULR_P movgr2fr.w $fa0, $a4 ffint.d.w $fa0, $fa0 sub.d $a2, $a2, $a3 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - fld.d $fa1, $a3, %pc_lo12(.LCPI5_0) - movgr2fr.w $fa2, $a2 - ffint.d.w $fa2, $fa2 - fmul.d $fa0, $fa0, $fa2 + movgr2fr.w $fa1, $a2 + ffint.d.w $fa1, $fa1 + fmul.d $fa0, $fa0, $fa1 + lu52i.d $a2, $zero, 1046 + movgr2fr.d $fa1, $a2 fcmp.cle.d $fcc0, $fa1, $fa0 ori $s1, $zero, 1 bcnez $fcc0, .LBB5_17 diff --git a/results/MultiSource/Benchmarks/Prolangs-C/simulator/CMakeFiles/simulator.dir/machine.s b/results/MultiSource/Benchmarks/Prolangs-C/simulator/CMakeFiles/simulator.dir/machine.s index 527e99a6..6897acb4 100644 --- a/results/MultiSource/Benchmarks/Prolangs-C/simulator/CMakeFiles/simulator.dir/machine.s +++ b/results/MultiSource/Benchmarks/Prolangs-C/simulator/CMakeFiles/simulator.dir/machine.s @@ -845,14 +845,7 @@ PRINT_ADDRESS: # @PRINT_ADDRESS .Lfunc_end11: .size PRINT_ADDRESS, .Lfunc_end11-PRINT_ADDRESS # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function PRINT_CONSTANT -.LCPI12_0: - .dword 0x40062e42fefa39ef # double 2.7725887222397811 -.LCPI12_1: - .dword 0x40026bb1bbb55516 # double 2.3025850929940459 - .text - .globl PRINT_CONSTANT + .globl PRINT_CONSTANT # -- Begin function PRINT_CONSTANT .p2align 5 .type PRINT_CONSTANT,@function PRINT_CONSTANT: # @PRINT_CONSTANT @@ -876,8 +869,11 @@ PRINT_CONSTANT: # @PRINT_CONSTANT ffint.d.w $fa0, $fa0 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI12_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI12_1) + lu12i.w $a0, -279723 + ori $a0, $a0, 1302 + lu32i.d $a0, 158641 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 @@ -890,8 +886,11 @@ PRINT_CONSTANT: # @PRINT_CONSTANT ffint.d.w $fa0, $fa0 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI12_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI12_0) + lu12i.w $a0, -4189 + ori $a0, $a0, 2543 + lu32i.d $a0, 405058 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 diff --git a/results/MultiSource/Benchmarks/Ptrdist/ks/CMakeFiles/ks.dir/KS-2.s b/results/MultiSource/Benchmarks/Ptrdist/ks/CMakeFiles/ks.dir/KS-2.s index cc805e4a..696fd256 100644 --- a/results/MultiSource/Benchmarks/Ptrdist/ks/CMakeFiles/ks.dir/KS-2.s +++ b/results/MultiSource/Benchmarks/Ptrdist/ks/CMakeFiles/ks.dir/KS-2.s @@ -159,12 +159,7 @@ UpdateDs: # @UpdateDs .Lfunc_end2: .size UpdateDs, .Lfunc_end2-UpdateDs # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function FindMaxGpAndSwap -.LCPI3_0: - .word 0xcb18967f # float -9999999 - .text - .globl FindMaxGpAndSwap + .globl FindMaxGpAndSwap # -- Begin function FindMaxGpAndSwap .p2align 5 .type FindMaxGpAndSwap,@function FindMaxGpAndSwap: # @FindMaxGpAndSwap @@ -175,8 +170,10 @@ FindMaxGpAndSwap: # @FindMaxGpAndSwap pcalau12i $a0, %got_pc_hi20(groupA) ld.d $a3, $a0, %got_pc_lo12(groupA) ld.d $a4, $a3, 0 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI3_0) + lu12i.w $a0, -216695 + ori $a0, $a0, 1663 + lu32i.d $a0, 0 + movgr2fr.w $fa0, $a0 beqz $a4, .LBB3_18 # %bb.1: # %.preheader.lr.ph pcalau12i $a1, %got_pc_hi20(groupB) @@ -184,7 +181,7 @@ FindMaxGpAndSwap: # @FindMaxGpAndSwap ld.d $a5, $a1, 0 beqz $a5, .LBB3_18 # %bb.2: # %.preheader.preheader - fld.s $fa0, $a0, %pc_lo12(.LCPI3_0) + movgr2fr.w $fa0, $a0 pcalau12i $a0, %got_pc_hi20(D) ld.d $a6, $a0, %got_pc_lo12(D) pcalau12i $a0, %got_pc_hi20(modules) @@ -506,12 +503,7 @@ FindMaxGpAndSwap: # @FindMaxGpAndSwap .Lfunc_end3: .size FindMaxGpAndSwap, .Lfunc_end3-FindMaxGpAndSwap # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function FindGMax -.LCPI4_0: - .word 0xcb18967f # float -9999999 - .text - .globl FindGMax + .globl FindGMax # -- Begin function FindGMax .p2align 5 .type FindGMax,@function FindGMax: # @FindGMax @@ -522,17 +514,19 @@ FindGMax: # @FindGMax pcalau12i $a1, %got_pc_hi20(numModules) ld.d $a1, $a1, %got_pc_lo12(numModules) ld.d $a2, $a1, 0 + lu12i.w $a3, -216695 + ori $a5, $a3, 1663 ori $a3, $zero, 2 - pcalau12i $a4, %pc_hi20(.LCPI4_0) + lu32i.d $a5, 0 bgeu $a2, $a3, .LBB4_2 # %bb.1: - fld.s $fa0, $a4, %pc_lo12(.LCPI4_0) + movgr2fr.w $fa0, $a5 ret .LBB4_2: # %.lr.ph.preheader pcalau12i $a3, %got_pc_hi20(GP) ld.d $a3, $a3, %got_pc_lo12(GP) - fld.s $fa0, $a4, %pc_lo12(.LCPI4_0) move $a4, $zero + movgr2fr.w $fa0, $a5 b .LBB4_4 .p2align 4, , 16 .LBB4_3: # in Loop: Header=BB4_4 Depth=1 @@ -1065,12 +1059,7 @@ PrintResults: # @PrintResults .Lfunc_end6: .size PrintResults, .Lfunc_end6-PrintResults # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI7_0: - .word 0xcb18967f # float -9999999 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -1111,11 +1100,13 @@ main: # @main ori $s6, $zero, 1 pcalau12i $a0, %got_pc_hi20(numModules) ld.d $s7, $a0, %got_pc_lo12(numModules) - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI7_0) addi.w $a0, $zero, -1 lu32i.d $a0, 0 st.d $a0, $sp, 64 # 8-byte Folded Spill + lu12i.w $a0, -216695 + ori $a0, $a0, 1663 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 pcalau12i $a0, %got_pc_hi20(stdout) ld.d $a0, $a0, %got_pc_lo12(stdout) st.d $a0, $sp, 56 # 8-byte Folded Spill diff --git a/results/MultiSource/Benchmarks/Rodinia/backprop/CMakeFiles/backprop.dir/backpropKernel.s b/results/MultiSource/Benchmarks/Rodinia/backprop/CMakeFiles/backprop.dir/backpropKernel.s index 073312a5..354e5ad7 100644 --- a/results/MultiSource/Benchmarks/Rodinia/backprop/CMakeFiles/backprop.dir/backpropKernel.s +++ b/results/MultiSource/Benchmarks/Rodinia/backprop/CMakeFiles/backprop.dir/backpropKernel.s @@ -1,10 +1,6 @@ .file "backpropKernel.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function bpnn_train_kernel -.LCPI0_0: - .dword 0x3fd3333333333333 # double 0.29999999999999999 .text - .globl bpnn_train_kernel + .globl bpnn_train_kernel # -- Begin function bpnn_train_kernel .p2align 5 .type bpnn_train_kernel,@function bpnn_train_kernel: # @bpnn_train_kernel @@ -67,8 +63,11 @@ bpnn_train_kernel: # @bpnn_train_kernel st.d $a0, $sp, 56 # 8-byte Folded Spill addi.d $a0, $s4, -1 st.d $a0, $sp, 48 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 209715 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fs0, $a0 lu12i.w $t2, 260096 movgr2fr.w $fs1, $zero movgr2fr.d $fs2, $zero diff --git a/results/MultiSource/Benchmarks/Rodinia/backprop/CMakeFiles/backprop.dir/main.s b/results/MultiSource/Benchmarks/Rodinia/backprop/CMakeFiles/backprop.dir/main.s index 093dcdf0..e9a3dd85 100644 --- a/results/MultiSource/Benchmarks/Rodinia/backprop/CMakeFiles/backprop.dir/main.s +++ b/results/MultiSource/Benchmarks/Rodinia/backprop/CMakeFiles/backprop.dir/main.s @@ -1,10 +1,6 @@ .file "main.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI0_0: - .word 0x30000000 # float 4.65661287E-10 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -68,10 +64,10 @@ main: # @main ori $a0, $zero, 8 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.s $fs0, $a1, %pc_lo12(.LCPI0_0) st.d $a0, $sp, 120 # 8-byte Folded Spill move $fp, $zero + lu12i.w $a0, 196608 + movgr2fr.w $fs0, $a0 st.d $s0, $sp, 160 # 8-byte Folded Spill .p2align 4, , 16 .LBB0_1: # %.preheader81 @@ -462,7 +458,7 @@ main: # @main jirl $ra, $ra, 0 move $s5, $zero move $a1, $zero - move $s8, $zero + move $fp, $zero st.d $s3, $sp, 104 # 8-byte Folded Spill addi.d $a0, $s3, 32 st.d $a0, $sp, 144 # 8-byte Folded Spill @@ -500,7 +496,7 @@ main: # @main .p2align 4, , 16 .LBB0_5: # %._crit_edge.i # in Loop: Header=BB0_6 Depth=1 - addi.d $s8, $s8, 68 + addi.d $fp, $fp, 68 ld.d $a1, $sp, 152 # 8-byte Folded Reload addi.w $a1, $a1, 17 addi.w $s5, $s5, -17 @@ -543,7 +539,7 @@ main: # @main addi.w $a0, $a0, 17 st.d $a0, $sp, 248 # 8-byte Folded Spill ld.d $a0, $sp, 160 # 8-byte Folded Reload - beq $s8, $a0, .LBB0_40 + beq $fp, $a0, .LBB0_40 .LBB0_6: # %.preheader34.i # =>This Inner Loop Header: Depth=1 st.d $a1, $sp, 152 # 8-byte Folded Spill @@ -553,10 +549,10 @@ main: # @main addi.d $a1, $zero, -1000 mul.w $a0, $a0, $a1 ld.d $a1, $sp, 144 # 8-byte Folded Reload - add.d $fp, $a1, $s8 + add.d $s8, $a1, $fp bne $a0, $s5, .LBB0_8 # %bb.7: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, -32 + fld.s $fa0, $s8, -32 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -572,7 +568,7 @@ main: # @main sub.w $a0, $a1, $a0 bne $a0, $s5, .LBB0_10 # %bb.9: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, -28 + fld.s $fa0, $s8, -28 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -588,7 +584,7 @@ main: # @main sub.w $a0, $a1, $a0 bne $a0, $s5, .LBB0_12 # %bb.11: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, -24 + fld.s $fa0, $s8, -24 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -604,7 +600,7 @@ main: # @main sub.w $a0, $a1, $a0 bne $a0, $s5, .LBB0_14 # %bb.13: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, -20 + fld.s $fa0, $s8, -20 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -621,7 +617,7 @@ main: # @main sub.w $a0, $a1, $a0 bne $a0, $s5, .LBB0_16 # %bb.15: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, -16 + fld.s $fa0, $s8, -16 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -638,7 +634,7 @@ main: # @main sub.w $a0, $a1, $a0 bne $a0, $s5, .LBB0_18 # %bb.17: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, -12 + fld.s $fa0, $s8, -12 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -655,7 +651,7 @@ main: # @main sub.w $a0, $a1, $a0 bne $a0, $s5, .LBB0_20 # %bb.19: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, -8 + fld.s $fa0, $s8, -8 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -672,7 +668,7 @@ main: # @main sub.w $a0, $a1, $a0 bne $a0, $s5, .LBB0_22 # %bb.21: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, -4 + fld.s $fa0, $s8, -4 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -690,7 +686,7 @@ main: # @main bne $a0, $s5, .LBB0_24 # %bb.23: # in Loop: Header=BB0_6 Depth=1 ld.d $a0, $sp, 144 # 8-byte Folded Reload - fldx.s $fa0, $a0, $s8 + fldx.s $fa0, $a0, $fp fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -707,7 +703,7 @@ main: # @main sub.w $a0, $a1, $a0 bne $a0, $s5, .LBB0_26 # %bb.25: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, 4 + fld.s $fa0, $s8, 4 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -724,7 +720,7 @@ main: # @main sub.w $a0, $a1, $a0 bne $a0, $s5, .LBB0_28 # %bb.27: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, 8 + fld.s $fa0, $s8, 8 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -741,7 +737,7 @@ main: # @main sub.w $a0, $a1, $a0 bne $a0, $s5, .LBB0_30 # %bb.29: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, 12 + fld.s $fa0, $s8, 12 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -758,7 +754,7 @@ main: # @main sub.w $a0, $a1, $a0 bne $a0, $s5, .LBB0_32 # %bb.31: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, 16 + fld.s $fa0, $s8, 16 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -775,7 +771,7 @@ main: # @main sub.w $a0, $a1, $a0 bne $a0, $s5, .LBB0_34 # %bb.33: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, 20 + fld.s $fa0, $s8, 20 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -792,7 +788,7 @@ main: # @main sub.w $a0, $a1, $a0 bne $a0, $s5, .LBB0_36 # %bb.35: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, 24 + fld.s $fa0, $s8, 24 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -808,7 +804,7 @@ main: # @main sub.w $a0, $a1, $a0 bne $a0, $s5, .LBB0_38 # %bb.37: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, 28 + fld.s $fa0, $s8, 28 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) @@ -824,7 +820,7 @@ main: # @main sub.w $a0, $a1, $a0 bne $a0, $s5, .LBB0_5 # %bb.39: # in Loop: Header=BB0_6 Depth=1 - fld.s $fa0, $fp, 32 + fld.s $fa0, $s8, 32 fcvt.d.s $fa0, $fa0 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) diff --git a/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/main.s b/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/main.s index 29814d1e..1e41f3df 100644 --- a/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/main.s +++ b/results/MultiSource/Benchmarks/Rodinia/hotspot/CMakeFiles/hotspot.dir/main.s @@ -1,91 +1,65 @@ .file "main.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function compute_tran_temp -.LCPI0_0: - .dword 0x3f60000000000000 # double 0.001953125 -.LCPI0_1: - .dword 0x412ab3f000000000 # double 875000 -.LCPI0_2: - .dword 0x4069000000000000 # double 200 -.LCPI0_3: - .dword 0x4059000000000000 # double 100 -.LCPI0_4: - .dword 0x413ab3f000000000 # double 1.75E+6 -.LCPI0_5: - .dword 0x4146e36000000000 # double 3.0E+6 -.LCPI0_6: - .dword 0x3f50624dd2f1a9fc # double 0.001 .text - .globl compute_tran_temp + .globl compute_tran_temp # -- Begin function compute_tran_temp .p2align 5 .type compute_tran_temp,@function compute_tran_temp: # @compute_tran_temp # %bb.0: pcalau12i $a3, %pc_hi20(chip_height) fld.d $fa0, $a3, %pc_lo12(chip_height) - pcalau12i $a3, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a3, %pc_lo12(.LCPI0_0) - fmul.d $fa3, $fa0, $fa1 pcalau12i $a3, %pc_hi20(chip_width) - fld.d $fa0, $a3, %pc_lo12(chip_width) + fld.d $fa1, $a3, %pc_lo12(chip_width) + lu52i.d $a3, $zero, 1014 + movgr2fr.d $fa2, $a3 + fmul.d $fa3, $fa0, $fa2 + fmul.d $fa4, $fa1, $fa2 pcalau12i $a3, %pc_hi20(t_chip) - fld.d $fa4, $a3, %pc_lo12(t_chip) - pcalau12i $a3, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a3, %pc_lo12(.LCPI0_1) - fmul.d $fa5, $fa0, $fa1 - pcalau12i $a3, %pc_hi20(.LCPI0_2) - fld.d $fa1, $a3, %pc_lo12(.LCPI0_2) - fmul.d $fa0, $fa4, $fa2 + fld.d $fa5, $a3, %pc_lo12(t_chip) + ori $a3, $zero, 0 + ori $a4, $zero, 0 + lu32i.d $a4, -347152 + lu52i.d $a5, $a4, 1042 + movgr2fr.d $fa0, $a5 fmul.d $fa0, $fa5, $fa0 + fmul.d $fa0, $fa4, $fa0 fmul.d $fa0, $fa3, $fa0 - fmul.d $fa2, $fa4, $fa1 + ori $a5, $zero, 0 + lu32i.d $a5, -458752 + lu52i.d $a6, $a5, 1030 + movgr2fr.d $fa1, $a6 + fmul.d $fa2, $fa5, $fa1 fmul.d $fa1, $fa3, $fa2 - pcalau12i $a3, %pc_hi20(.LCPI0_3) - fld.d $fa6, $a3, %pc_lo12(.LCPI0_3) - fdiv.d $fa1, $fa5, $fa1 - fmul.d $fa2, $fa5, $fa2 + fdiv.d $fa1, $fa4, $fa1 + fmul.d $fa2, $fa4, $fa2 fdiv.d $fa2, $fa3, $fa2 + lu52i.d $a5, $a5, 1029 + movgr2fr.d $fa6, $a5 fmul.d $fa3, $fa3, $fa6 - fmul.d $fa3, $fa3, $fa5 - fdiv.d $fa3, $fa4, $fa3 - vldi $vr5, -928 + fmul.d $fa3, $fa3, $fa4 + fdiv.d $fa3, $fa5, $fa3 + vldi $vr4, -928 + fmul.d $fa4, $fa5, $fa4 + lu52i.d $a4, $a4, 1043 + movgr2fr.d $fa5, $a4 fmul.d $fa4, $fa4, $fa5 - pcalau12i $a3, %pc_hi20(.LCPI0_4) - fld.d $fa6, $a3, %pc_lo12(.LCPI0_4) - pcalau12i $a3, %pc_hi20(.LCPI0_5) - fld.d $fa7, $a3, %pc_lo12(.LCPI0_5) - pcalau12i $a3, %pc_hi20(.LCPI0_6) - fld.d $ft0, $a3, %pc_lo12(.LCPI0_6) - pcalau12i $a3, %pc_hi20(amb_temp) - fld.d $fa5, $a3, %pc_lo12(amb_temp) - fmul.d $fa4, $fa4, $fa6 - fdiv.d $fa4, $fa7, $fa4 - fdiv.d $fa4, $ft0, $fa4 + lu32i.d $a3, 451424 + lu52i.d $a3, $a3, 1044 + movgr2fr.d $fa5, $a3 + fdiv.d $fa4, $fa5, $fa4 + lu12i.w $a3, -184550 + ori $a3, $a3, 2556 + lu32i.d $a3, 25165 + pcalau12i $a4, %pc_hi20(amb_temp) + fld.d $fa5, $a4, %pc_lo12(amb_temp) + lu52i.d $a3, $a3, 1013 + movgr2fr.d $fa6, $a3 + fdiv.d $fa4, $fa6, $fa4 pcaddu18i $t8, %call36(hotspotKernel) jr $t8 .Lfunc_end0: .size compute_tran_temp, .Lfunc_end0-compute_tran_temp # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI1_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI1_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI1_2: - .dword 0x3f60000000000000 # double 0.001953125 -.LCPI1_3: - .dword 0x412ab3f000000000 # double 875000 -.LCPI1_4: - .dword 0x4069000000000000 # double 200 -.LCPI1_5: - .dword 0x4059000000000000 # double 100 -.LCPI1_6: - .dword 0x413ab3f000000000 # double 1.75E+6 -.LCPI1_7: - .dword 0x4146e36000000000 # double 3.0E+6 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -132,11 +106,17 @@ main: # @main jirl $ra, $ra, 0 move $s2, $zero move $s4, $zero - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI1_0) - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI1_1) + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs1, $a0 lu12i.w $s5, -1 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs0, $a0 lu12i.w $s3, 1 ori $s6, $zero, 512 .p2align 4, , 16 @@ -195,41 +175,47 @@ main: # @main # %bb.7: pcalau12i $a0, %pc_hi20(chip_height) fld.d $fa0, $a0, %pc_lo12(chip_height) - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_2) - fmul.d $fa3, $fa0, $fa1 pcalau12i $a0, %pc_hi20(chip_width) - fld.d $fa0, $a0, %pc_lo12(chip_width) + fld.d $fa1, $a0, %pc_lo12(chip_width) + lu52i.d $a0, $zero, 1014 + movgr2fr.d $fa2, $a0 + fmul.d $fa3, $fa0, $fa2 + fmul.d $fa4, $fa1, $fa2 pcalau12i $a0, %pc_hi20(t_chip) - fld.d $fa4, $a0, %pc_lo12(t_chip) - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fa2, $a0, %pc_lo12(.LCPI1_3) - fmul.d $fa5, $fa0, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI1_4) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_4) - fmul.d $fa0, $fa4, $fa2 + fld.d $fa5, $a0, %pc_lo12(t_chip) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -347152 + lu52i.d $a2, $a1, 1042 + movgr2fr.d $fa0, $a2 fmul.d $fa0, $fa5, $fa0 + fmul.d $fa0, $fa4, $fa0 fmul.d $fa0, $fa3, $fa0 - fmul.d $fa2, $fa4, $fa1 + ori $a2, $zero, 0 + lu32i.d $a2, -458752 + lu52i.d $a3, $a2, 1030 + movgr2fr.d $fa1, $a3 + fmul.d $fa2, $fa5, $fa1 fmul.d $fa1, $fa3, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI1_5) - fld.d $fa6, $a0, %pc_lo12(.LCPI1_5) - fdiv.d $fa1, $fa5, $fa1 - fmul.d $fa2, $fa5, $fa2 + fdiv.d $fa1, $fa4, $fa1 + fmul.d $fa2, $fa4, $fa2 fdiv.d $fa2, $fa3, $fa2 + lu52i.d $a2, $a2, 1029 + movgr2fr.d $fa6, $a2 fmul.d $fa3, $fa3, $fa6 - fmul.d $fa3, $fa3, $fa5 - fdiv.d $fa3, $fa4, $fa3 - vldi $vr5, -928 + fmul.d $fa3, $fa3, $fa4 + fdiv.d $fa3, $fa5, $fa3 + vldi $vr4, -928 + fmul.d $fa4, $fa5, $fa4 + lu52i.d $a1, $a1, 1043 + movgr2fr.d $fa5, $a1 fmul.d $fa4, $fa4, $fa5 - pcalau12i $a0, %pc_hi20(.LCPI1_6) - fld.d $fa6, $a0, %pc_lo12(.LCPI1_6) - pcalau12i $a0, %pc_hi20(.LCPI1_7) - fld.d $fa7, $a0, %pc_lo12(.LCPI1_7) - pcalau12i $a0, %pc_hi20(amb_temp) - fld.d $fa5, $a0, %pc_lo12(amb_temp) - fmul.d $fa4, $fa4, $fa6 - fdiv.d $fa4, $fa7, $fa4 + lu32i.d $a0, 451424 + lu52i.d $a0, $a0, 1044 + pcalau12i $a1, %pc_hi20(amb_temp) + fld.d $fa5, $a1, %pc_lo12(amb_temp) + movgr2fr.d $fa6, $a0 + fdiv.d $fa4, $fa6, $fa4 fdiv.d $fa4, $fs0, $fa4 ld.d $a0, $sp, 32 # 8-byte Folded Reload st.d $s8, $sp, 8 # 8-byte Folded Spill diff --git a/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/main.s b/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/main.s index 229cab23..51d37352 100644 --- a/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/main.s +++ b/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/main.s @@ -1,10 +1,6 @@ .file "main.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI0_0: - .word 0x30000000 # float 4.65661287E-10 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -62,9 +58,9 @@ main: # @main pcaddu18i $ra, %call36(glibc_compat_srand) jirl $ra, $ra, 0 move $fp, $zero - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI0_0) lu12i.w $s0, -1 + lu12i.w $a0, 196608 + movgr2fr.w $fs0, $a0 lu12i.w $s7, 1 ori $s1, $zero, 2048 move $s2, $s8 @@ -221,12 +217,7 @@ main: # @main .Lfunc_end0: .size main, .Lfunc_end0-main # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function random_matrix -.LCPI1_0: - .word 0x30000000 # float 4.65661287E-10 - .text - .globl random_matrix + .globl random_matrix # -- Begin function random_matrix .p2align 5 .type random_matrix,@function random_matrix: # @random_matrix @@ -245,9 +236,9 @@ random_matrix: # @random_matrix pcaddu18i $ra, %call36(glibc_compat_srand) jirl $ra, $ra, 0 move $s0, $zero - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI1_0) lu12i.w $s1, -1 + lu12i.w $a0, 196608 + movgr2fr.w $fs0, $a0 lu12i.w $s2, 1 ori $s3, $zero, 2048 .p2align 4, , 16 diff --git a/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/sradKernel.s b/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/sradKernel.s index 2801a2c5..9987df7a 100644 --- a/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/sradKernel.s +++ b/results/MultiSource/Benchmarks/Rodinia/srad/CMakeFiles/srad.dir/sradKernel.s @@ -1,18 +1,6 @@ .file "sradKernel.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function srad_kernel -.LCPI0_0: - .word 0x38800000 # float 6.10351563E-5 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0xbfb0000000000000 # double -0.0625 -.LCPI0_2: - .dword 0xb690000000000000 # double -7.0064923216240854E-46 -.LCPI0_3: - .dword 0x3ff0000010000000 # double 1.0000000596046448 .text - .globl srad_kernel + .globl srad_kernel # -- Begin function srad_kernel .p2align 5 .type srad_kernel,@function srad_kernel: # @srad_kernel @@ -48,7 +36,7 @@ srad_kernel: # @srad_kernel add.d $s3, $s7, $t1 add.d $s4, $a2, $t1 add.d $s5, $a3, $t1 - st.d $t1, $sp, 112 # 8-byte Folded Spill + st.d $t1, $sp, 120 # 8-byte Folded Spill add.d $t1, $a6, $t1 add.d $t2, $a6, $t7 addi.d $t3, $a5, 4 @@ -67,7 +55,7 @@ srad_kernel: # @srad_kernel sltu $t6, $t1, $t8 and $t5, $t5, $t6 or $t4, $t4, $t5 - st.d $s2, $sp, 104 # 8-byte Folded Spill + st.d $s2, $sp, 112 # 8-byte Folded Spill sltu $t5, $s2, $a7 sltu $t6, $t3, $t8 and $t5, $t5, $t6 @@ -84,7 +72,7 @@ srad_kernel: # @srad_kernel sltu $t6, $t1, $a4 and $t5, $t5, $t6 or $t4, $t4, $t5 - st.d $s3, $sp, 96 # 8-byte Folded Spill + st.d $s3, $sp, 104 # 8-byte Folded Spill sltu $t5, $s3, $a7 sltu $t6, $t3, $a4 and $t5, $t5, $t6 @@ -97,7 +85,7 @@ srad_kernel: # @srad_kernel sltu $t6, $t1, $fp and $t5, $t5, $t6 or $t4, $t4, $t5 - st.d $s4, $sp, 88 # 8-byte Folded Spill + st.d $s4, $sp, 96 # 8-byte Folded Spill sltu $t5, $s4, $a7 sltu $t6, $t3, $fp and $t5, $t5, $t6 @@ -106,54 +94,54 @@ srad_kernel: # @srad_kernel sltu $t6, $t1, $s0 and $t5, $t5, $t6 or $t4, $t4, $t5 - st.d $s5, $sp, 80 # 8-byte Folded Spill + st.d $s5, $sp, 88 # 8-byte Folded Spill sltu $t5, $s5, $a7 sltu $t6, $t3, $s0 and $t5, $t5, $t6 add.d $t6, $s8, $a0 or $t4, $t4, $t5 - add.d $s6, $a5, $a0 + add.d $s2, $a5, $a0 sltu $t2, $t3, $t2 addu16i.d $t3, $a6, 128 sltu $t1, $t1, $a7 and $t1, $t1, $t2 or $t1, $t4, $t1 - sltu $t2, $s6, $t3 + sltu $t2, $s2, $t3 sltu $t4, $ra, $a7 and $t2, $t2, $t4 addu16i.d $t4, $s8, 128 addi.d $t4, $t4, -4 - sltu $t4, $s6, $t4 + sltu $t4, $s2, $t4 sltu $t5, $t6, $a7 and $t4, $t4, $t5 or $t2, $t2, $t4 addu16i.d $t4, $s7, 128 addi.d $t4, $t4, -4 - sltu $t4, $s6, $t4 - add.d $s2, $s7, $a0 - sltu $t5, $s2, $a7 + sltu $t4, $s2, $t4 + add.d $s3, $s7, $a0 + sltu $t5, $s3, $a7 and $t4, $t4, $t5 or $t2, $t2, $t4 addu16i.d $t4, $a2, 128 addi.d $t4, $t4, -4 - sltu $t4, $s6, $t4 - add.d $t5, $a2, $a0 - st.d $t5, $sp, 232 # 8-byte Folded Spill - sltu $t5, $t5, $a7 + sltu $t4, $s2, $t4 + add.d $s4, $a2, $a0 + sltu $t5, $s4, $a7 and $t4, $t4, $t5 or $t2, $t2, $t4 addu16i.d $t4, $a3, 128 addi.d $t4, $t4, -4 - sltu $t4, $s6, $t4 + st.d $s2, $sp, 80 # 8-byte Folded Spill + sltu $t4, $s2, $t4 add.d $t5, $a3, $a0 - st.d $t5, $sp, 224 # 8-byte Folded Spill + st.d $t5, $sp, 232 # 8-byte Folded Spill sltu $a7, $t5, $a7 and $a7, $t4, $a7 or $a7, $t2, $a7 - st.d $a7, $sp, 64 # 8-byte Folded Spill + st.d $a7, $sp, 72 # 8-byte Folded Spill addi.d $a7, $t3, -4 add.d $t3, $a5, $t7 - st.d $a7, $sp, 208 # 8-byte Folded Spill + st.d $a7, $sp, 216 # 8-byte Folded Spill sltu $a7, $a5, $a7 sltu $t2, $a6, $t3 and $a7, $a7, $t2 @@ -169,63 +157,66 @@ srad_kernel: # @srad_kernel sltu $a7, $a2, $t3 and $a7, $t2, $a7 add.d $a1, $a5, $a1 - st.d $a1, $sp, 216 # 8-byte Folded Spill + st.d $a1, $sp, 224 # 8-byte Folded Spill or $a1, $a4, $a7 sltu $a7, $a5, $s0 sltu $a4, $a3, $t3 and $a4, $a7, $a4 or $a1, $a1, $a4 - st.d $a1, $sp, 256 # 8-byte Folded Spill + st.d $a1, $sp, 264 # 8-byte Folded Spill lu12i.w $a1, 2 ori $a1, $a1, 4 add.d $a1, $a5, $a1 - st.d $a1, $sp, 56 # 8-byte Folded Spill + st.d $a1, $sp, 64 # 8-byte Folded Spill ori $a0, $a0, 4 add.d $a0, $a6, $a0 - st.d $a0, $sp, 48 # 8-byte Folded Spill + st.d $a0, $sp, 56 # 8-byte Folded Spill movgr2fr.w $fa0, $zero - vldi $vr1, -928 - vldi $vr2, -944 - vldi $vr3, -912 - vldi $vr4, -1168 + lu12i.w $a0, 231424 + movgr2fr.w $fa1, $a0 + vldi $vr2, -928 + lu52i.d $a0, $zero, -1029 + movgr2fr.d $fa3, $a0 + vldi $vr4, -944 + vldi $vr5, -912 + vldi $vr6, -1168 + lu52i.d $a0, $zero, -1175 + movgr2fr.d $fa7, $a0 andi $a0, $t1, 1 - st.d $a0, $sp, 248 # 8-byte Folded Spill - vldi $vr5, -960 + st.d $a0, $sp, 256 # 8-byte Folded Spill + vldi $vr8, -960 + lu12i.w $a0, 65536 + lu52i.d $s6, $a0, 1023 lu12i.w $a0, -2 ori $a0, $a0, 4080 - st.d $a0, $sp, 240 # 8-byte Folded Spill - lu12i.w $a0, 65536 - lu52i.d $t7, $a0, 1023 + st.d $a0, $sp, 248 # 8-byte Folded Spill add.d $a0, $a6, $t0 - st.d $a0, $sp, 184 # 8-byte Folded Spill + st.d $a0, $sp, 192 # 8-byte Folded Spill add.d $a0, $a3, $t0 - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill add.d $a0, $s8, $t0 - st.d $a0, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill add.d $a0, $a5, $t0 - st.d $a0, $sp, 24 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill add.d $a0, $s7, $t0 - st.d $a0, $sp, 16 # 8-byte Folded Spill + st.d $a0, $sp, 24 # 8-byte Folded Spill add.d $a0, $a2, $t0 - st.d $a0, $sp, 8 # 8-byte Folded Spill + st.d $a0, $sp, 16 # 8-byte Folded Spill ori $t1, $zero, 512 ori $t2, $zero, 128 - ori $t4, $zero, 4092 - lu12i.w $t3, -1 - ori $t5, $zero, 4080 - lu52i.d $s0, $zero, 1022 - lu52i.d $s3, $zero, -1029 - st.d $a6, $sp, 168 # 8-byte Folded Spill - st.d $a5, $sp, 160 # 8-byte Folded Spill - st.d $a3, $sp, 152 # 8-byte Folded Spill - st.d $a2, $sp, 144 # 8-byte Folded Spill - st.d $s7, $sp, 136 # 8-byte Folded Spill - st.d $s8, $sp, 128 # 8-byte Folded Spill - st.d $ra, $sp, 120 # 8-byte Folded Spill - st.d $t6, $sp, 200 # 8-byte Folded Spill - st.d $s6, $sp, 72 # 8-byte Folded Spill - st.d $s2, $sp, 192 # 8-byte Folded Spill - st.d $t7, $sp, 264 # 8-byte Folded Spill + ori $t3, $zero, 4092 + lu12i.w $t5, -1 + ori $s2, $zero, 4080 + st.d $a6, $sp, 176 # 8-byte Folded Spill + st.d $a5, $sp, 168 # 8-byte Folded Spill + st.d $a3, $sp, 160 # 8-byte Folded Spill + st.d $a2, $sp, 152 # 8-byte Folded Spill + st.d $s7, $sp, 144 # 8-byte Folded Spill + st.d $s8, $sp, 136 # 8-byte Folded Spill + st.d $ra, $sp, 128 # 8-byte Folded Spill + st.d $t6, $sp, 208 # 8-byte Folded Spill + st.d $s3, $sp, 200 # 8-byte Folded Spill + st.d $s4, $sp, 240 # 8-byte Folded Spill .p2align 4, , 16 .LBB0_1: # %.preheader624 # =>This Loop Header: Depth=1 @@ -241,9 +232,9 @@ srad_kernel: # @srad_kernel # Child Loop BB0_57 Depth 3 move $a0, $zero move $a1, $a5 - fmov.s $fa7, $fa0 - fmov.s $fa6, $fa0 - ld.d $a7, $sp, 224 # 8-byte Folded Reload + fmov.s $ft2, $fa0 + fmov.s $ft1, $fa0 + ld.d $a7, $sp, 232 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_2: # %.preheader622 # Parent Loop BB0_1 Depth=1 @@ -254,736 +245,728 @@ srad_kernel: # @srad_kernel .LBB0_3: # Parent Loop BB0_1 Depth=1 # Parent Loop BB0_2 Depth=2 # => This Inner Loop Header: Depth=3 - fldx.s $ft0, $a1, $a4 - fadd.s $fa7, $fa7, $ft0 - fmul.s $ft0, $ft0, $ft0 + fldx.s $ft3, $a1, $a4 + fadd.s $ft2, $ft2, $ft3 + fmul.s $ft3, $ft3, $ft3 addi.d $a4, $a4, 4 - fadd.s $fa6, $fa6, $ft0 + fadd.s $ft1, $ft1, $ft3 bne $a4, $t1, .LBB0_3 # %bb.4: # in Loop: Header=BB0_2 Depth=2 addi.d $a0, $a0, 1 add.d $a1, $a1, $t0 bne $a0, $t2, .LBB0_2 # %bb.5: # in Loop: Header=BB0_1 Depth=1 - fld.s $ft0, $a5, 0 - fsub.s $ft1, $ft0, $ft0 - fst.s $ft1, $s8, 0 - fldx.s $ft1, $a5, $t0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $ft2, $a0, %pc_lo12(.LCPI0_0) - fsub.s $ft1, $ft1, $ft0 - fst.s $ft1, $s7, 0 - fld.s $ft1, $a5, 0 - fmul.s $fa7, $fa7, $ft2 - fsub.s $ft1, $ft1, $ft0 - fst.s $ft1, $a2, 0 - fld.s $ft1, $a5, 4 - fmul.s $fa6, $fa6, $ft2 - fmul.s $fa7, $fa7, $fa7 - fsub.s $fa6, $fa6, $fa7 - fsub.s $ft1, $ft1, $ft0 - fst.s $ft1, $a3, 0 - fld.s $ft2, $s8, 0 - fld.s $ft3, $s7, 0 - fdiv.s $fa6, $fa6, $fa7 - fld.s $fa7, $a2, 0 + fld.s $ft3, $a5, 0 + fsub.s $ft4, $ft3, $ft3 + fst.s $ft4, $s8, 0 + fldx.s $ft4, $a5, $t0 + fsub.s $ft4, $ft4, $ft3 + fst.s $ft4, $s7, 0 + fld.s $ft4, $a5, 0 + fmul.s $ft2, $ft2, $fa1 + fsub.s $ft4, $ft4, $ft3 + fst.s $ft4, $a2, 0 + fld.s $ft4, $a5, 4 + fmul.s $ft1, $ft1, $fa1 + fmul.s $ft2, $ft2, $ft2 + fsub.s $ft1, $ft1, $ft2 + fsub.s $ft4, $ft4, $ft3 + fst.s $ft4, $a3, 0 + fld.s $ft5, $s8, 0 + fld.s $ft6, $s7, 0 + fdiv.s $ft1, $ft1, $ft2 + fld.s $ft2, $a2, 0 + fmul.s $ft7, $ft5, $ft5 + fmul.s $ft8, $ft6, $ft6 + fadd.s $ft7, $ft7, $ft8 + fmul.s $ft8, $ft2, $ft2 + fadd.s $ft7, $ft7, $ft8 + fmul.s $ft8, $ft4, $ft4 + fadd.s $ft7, $ft8, $ft7 + fmul.s $ft8, $ft3, $ft3 + fdiv.s $ft7, $ft7, $ft8 + fadd.s $ft5, $ft5, $ft6 + fadd.s $ft2, $ft5, $ft2 + fadd.s $ft2, $ft4, $ft2 + fdiv.s $ft2, $ft2, $ft3 + fcvt.d.s $ft3, $ft7 + fmul.d $ft3, $ft3, $fa2 fmul.s $ft4, $ft2, $ft2 - fmul.s $ft5, $ft3, $ft3 - fadd.s $ft4, $ft4, $ft5 - fmul.s $ft5, $fa7, $fa7 - fadd.s $ft4, $ft4, $ft5 - fmul.s $ft5, $ft1, $ft1 - fadd.s $ft4, $ft5, $ft4 - fmul.s $ft5, $ft0, $ft0 - fdiv.s $ft4, $ft4, $ft5 - fadd.s $ft2, $ft2, $ft3 - fadd.s $fa7, $ft2, $fa7 - fadd.s $fa7, $ft1, $fa7 - fdiv.s $ft0, $fa7, $ft0 - fcvt.d.s $ft1, $ft4 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa7, $a0, %pc_lo12(.LCPI0_1) - fmul.d $ft1, $ft1, $fa1 - fmul.s $ft2, $ft0, $ft0 + fcvt.d.s $ft4, $ft4 + fmul.d $ft4, $ft4, $fa3 + fadd.d $ft3, $ft3, $ft4 + fcvt.s.d $ft3, $ft3 fcvt.d.s $ft2, $ft2 - fmul.d $ft2, $ft2, $fa7 - fadd.d $ft1, $ft1, $ft2 - fcvt.s.d $ft1, $ft1 - fcvt.d.s $ft0, $ft0 - fmul.d $ft0, $ft0, $fa2 - fadd.d $ft0, $ft0, $fa3 - fcvt.s.d $ft0, $ft0 - fmul.s $ft0, $ft0, $ft0 - fdiv.s $ft0, $ft1, $ft0 - fsub.s $ft1, $ft0, $fa6 - fadd.s $ft0, $fa6, $fa4 - fmul.s $ft0, $fa6, $ft0 - fdiv.s $ft1, $ft1, $ft0 - fcvt.d.s $ft1, $ft1 - fadd.d $ft2, $ft1, $fa3 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $ft1, $a0, %pc_lo12(.LCPI0_2) - frecip.d $ft2, $ft2 - fcvt.s.d $ft3, $ft2 - fst.s $ft3, $a6, 0 - fcmp.clt.d $fcc0, $ft2, $ft1 - pcalau12i $s6, %pc_hi20(.LCPI0_3) - fmov.s $ft3, $fa0 - ld.d $a1, $sp, 232 # 8-byte Folded Reload + fmul.d $ft2, $ft2, $fa4 + fadd.d $ft2, $ft2, $fa5 + fcvt.s.d $ft2, $ft2 + fmul.s $ft2, $ft2, $ft2 + fdiv.s $ft2, $ft3, $ft2 + fsub.s $ft3, $ft2, $ft1 + fadd.s $ft2, $ft1, $fa6 + fmul.s $ft2, $ft1, $ft2 + fdiv.s $ft3, $ft3, $ft2 + fcvt.d.s $ft3, $ft3 + fadd.d $ft3, $ft3, $fa5 + frecip.d $ft3, $ft3 + fcvt.s.d $ft4, $ft3 + fcmp.clt.d $fcc0, $ft3, $fa7 + fst.s $ft4, $a6, 0 + fmov.s $ft4, $fa0 bcnez $fcc0, .LBB0_8 # %bb.6: # in Loop: Header=BB0_1 Depth=1 - fld.d $ft3, $s6, %pc_lo12(.LCPI0_3) - fcmp.cule.d $fcc0, $ft2, $ft3 + movgr2fr.d $ft4, $s6 + fcmp.cule.d $fcc0, $ft3, $ft4 bcnez $fcc0, .LBB0_9 # %bb.7: # in Loop: Header=BB0_1 Depth=1 - vldi $vr11, -1168 + vldi $vr12, -1168 .LBB0_8: # %.sink.split # in Loop: Header=BB0_1 Depth=1 - fst.s $ft3, $a6, 0 + fst.s $ft4, $a6, 0 .LBB0_9: # in Loop: Header=BB0_1 Depth=1 - fldx.s $ft2, $a5, $t4 - fsub.s $ft3, $ft2, $ft2 - fstx.s $ft3, $s8, $t4 + fldx.s $ft3, $a5, $t3 + fsub.s $ft4, $ft3, $ft3 + fstx.s $ft4, $s8, $t3 ori $a0, $t0, 4092 st.d $a0, $sp, 272 # 8-byte Folded Spill - fldx.s $ft3, $a5, $a0 - fsub.s $ft3, $ft3, $ft2 - fstx.s $ft3, $s7, $t4 + fldx.s $ft4, $a5, $a0 + fsub.s $ft4, $ft4, $ft3 + fstx.s $ft4, $s7, $t3 ori $a0, $zero, 4088 - fldx.s $ft3, $a5, $a0 - fsub.s $ft3, $ft3, $ft2 - fstx.s $ft3, $a2, $t4 - fldx.s $ft3, $a5, $t4 - fsub.s $ft3, $ft3, $ft2 - fstx.s $ft3, $a3, $t4 - fldx.s $ft4, $s8, $t4 - fldx.s $ft5, $s7, $t4 - fldx.s $ft6, $a2, $t4 - fmul.s $ft7, $ft4, $ft4 + fldx.s $ft4, $a5, $a0 + fsub.s $ft4, $ft4, $ft3 + fstx.s $ft4, $a2, $t3 + fldx.s $ft4, $a5, $t3 + fsub.s $ft4, $ft4, $ft3 + fstx.s $ft4, $a3, $t3 + fldx.s $ft5, $s8, $t3 + fldx.s $ft6, $s7, $t3 + fldx.s $ft7, $a2, $t3 fmul.s $ft8, $ft5, $ft5 - fadd.s $ft7, $ft7, $ft8 - fmul.s $ft8, $ft6, $ft6 - fadd.s $ft7, $ft7, $ft8 - fmul.s $ft8, $ft3, $ft3 - fadd.s $ft7, $ft8, $ft7 - fmul.s $ft8, $ft2, $ft2 - fdiv.s $ft7, $ft7, $ft8 + fmul.s $ft9, $ft6, $ft6 + fadd.s $ft8, $ft8, $ft9 + fmul.s $ft9, $ft7, $ft7 + fadd.s $ft8, $ft8, $ft9 + fmul.s $ft9, $ft4, $ft4 + fadd.s $ft8, $ft9, $ft8 + fmul.s $ft9, $ft3, $ft3 + fdiv.s $ft8, $ft8, $ft9 + fadd.s $ft5, $ft5, $ft6 + fadd.s $ft5, $ft5, $ft7 fadd.s $ft4, $ft4, $ft5 - fadd.s $ft4, $ft4, $ft6 - fadd.s $ft3, $ft3, $ft4 - fdiv.s $ft2, $ft3, $ft2 - fcvt.d.s $ft3, $ft7 - fmul.d $ft3, $ft3, $fa1 - fmul.s $ft4, $ft2, $ft2 - fcvt.d.s $ft4, $ft4 - fmul.d $ft4, $ft4, $fa7 - fadd.d $ft3, $ft3, $ft4 + fdiv.s $ft3, $ft4, $ft3 + fcvt.d.s $ft4, $ft8 + fmul.d $ft4, $ft4, $fa2 + fmul.s $ft5, $ft3, $ft3 + fcvt.d.s $ft5, $ft5 + fmul.d $ft5, $ft5, $fa3 + fadd.d $ft4, $ft4, $ft5 + fcvt.s.d $ft4, $ft4 + fcvt.d.s $ft3, $ft3 + fmul.d $ft3, $ft3, $fa4 + fadd.d $ft3, $ft3, $fa5 fcvt.s.d $ft3, $ft3 - fcvt.d.s $ft2, $ft2 - fmul.d $ft2, $ft2, $fa2 - fadd.d $ft2, $ft2, $fa3 - fcvt.s.d $ft2, $ft2 - fmul.s $ft2, $ft2, $ft2 - fdiv.s $ft2, $ft3, $ft2 - fsub.s $ft2, $ft2, $fa6 - fdiv.s $ft2, $ft2, $ft0 - fcvt.d.s $ft2, $ft2 - fadd.d $ft2, $ft2, $fa3 - frecip.d $ft2, $ft2 - fcvt.s.d $ft3, $ft2 - fcmp.clt.d $fcc0, $ft2, $ft1 - fstx.s $ft3, $a6, $t4 - fmov.s $ft3, $fa0 + fmul.s $ft3, $ft3, $ft3 + fdiv.s $ft3, $ft4, $ft3 + fsub.s $ft3, $ft3, $ft1 + fdiv.s $ft3, $ft3, $ft2 + fcvt.d.s $ft3, $ft3 + fadd.d $ft3, $ft3, $fa5 + frecip.d $ft3, $ft3 + fcvt.s.d $ft4, $ft3 + fcmp.clt.d $fcc0, $ft3, $fa7 + fstx.s $ft4, $a6, $t3 + fmov.s $ft4, $fa0 bcnez $fcc0, .LBB0_12 # %bb.10: # in Loop: Header=BB0_1 Depth=1 - fld.d $ft3, $s6, %pc_lo12(.LCPI0_3) - fcmp.cule.d $fcc0, $ft2, $ft3 + movgr2fr.d $ft4, $s6 + fcmp.cule.d $fcc0, $ft3, $ft4 bcnez $fcc0, .LBB0_13 # %bb.11: # in Loop: Header=BB0_1 Depth=1 - vldi $vr11, -1168 + vldi $vr12, -1168 .LBB0_12: # %.sink.split663 # in Loop: Header=BB0_1 Depth=1 - fstx.s $ft3, $a6, $t4 + fstx.s $ft4, $a6, $t3 .LBB0_13: # in Loop: Header=BB0_1 Depth=1 - ld.d $a4, $sp, 216 # 8-byte Folded Reload - fldx.s $ft2, $a4, $t0 - fld.s $ft3, $a4, 0 - fsub.s $ft3, $ft3, $ft2 - fst.s $ft3, $t6, 0 - fldx.s $ft3, $a4, $t0 - fsub.s $ft3, $ft3, $ft2 - fst.s $ft3, $s2, 0 - fldx.s $ft3, $a4, $t0 - fsub.s $ft3, $ft3, $ft2 - fst.s $ft3, $a1, 0 - ld.d $a0, $sp, 112 # 8-byte Folded Reload - fldx.s $ft3, $a4, $a0 - fsub.s $ft3, $ft3, $ft2 - fst.s $ft3, $a7, 0 - fld.s $ft4, $t6, 0 - fld.s $ft5, $s2, 0 - fld.s $ft6, $a1, 0 - fmul.s $ft7, $ft4, $ft4 + ld.d $a1, $sp, 224 # 8-byte Folded Reload + fldx.s $ft3, $a1, $t0 + fld.s $ft4, $a1, 0 + fsub.s $ft4, $ft4, $ft3 + fst.s $ft4, $t6, 0 + fldx.s $ft4, $a1, $t0 + fsub.s $ft4, $ft4, $ft3 + fst.s $ft4, $s3, 0 + fldx.s $ft4, $a1, $t0 + fsub.s $ft4, $ft4, $ft3 + fst.s $ft4, $s4, 0 + ld.d $a0, $sp, 120 # 8-byte Folded Reload + fldx.s $ft4, $a1, $a0 + fsub.s $ft4, $ft4, $ft3 + fst.s $ft4, $a7, 0 + fld.s $ft5, $t6, 0 + fld.s $ft6, $s3, 0 + fld.s $ft7, $s4, 0 fmul.s $ft8, $ft5, $ft5 - fadd.s $ft7, $ft7, $ft8 - fmul.s $ft8, $ft6, $ft6 - fadd.s $ft7, $ft7, $ft8 - fmul.s $ft8, $ft3, $ft3 - fadd.s $ft7, $ft8, $ft7 - fmul.s $ft8, $ft2, $ft2 - fdiv.s $ft7, $ft7, $ft8 + fmul.s $ft9, $ft6, $ft6 + fadd.s $ft8, $ft8, $ft9 + fmul.s $ft9, $ft7, $ft7 + fadd.s $ft8, $ft8, $ft9 + fmul.s $ft9, $ft4, $ft4 + fadd.s $ft8, $ft9, $ft8 + fmul.s $ft9, $ft3, $ft3 + fdiv.s $ft8, $ft8, $ft9 + fadd.s $ft5, $ft5, $ft6 + fadd.s $ft5, $ft5, $ft7 fadd.s $ft4, $ft4, $ft5 - fadd.s $ft4, $ft4, $ft6 - fadd.s $ft3, $ft3, $ft4 - fdiv.s $ft2, $ft3, $ft2 - fcvt.d.s $ft3, $ft7 - fmul.d $ft3, $ft3, $fa1 - fmul.s $ft4, $ft2, $ft2 - fcvt.d.s $ft4, $ft4 - fmul.d $ft4, $ft4, $fa7 - fadd.d $ft3, $ft3, $ft4 + fdiv.s $ft3, $ft4, $ft3 + fcvt.d.s $ft4, $ft8 + fmul.d $ft4, $ft4, $fa2 + fmul.s $ft5, $ft3, $ft3 + fcvt.d.s $ft5, $ft5 + fmul.d $ft5, $ft5, $fa3 + fadd.d $ft4, $ft4, $ft5 + fcvt.s.d $ft4, $ft4 + fcvt.d.s $ft3, $ft3 + fmul.d $ft3, $ft3, $fa4 + fadd.d $ft3, $ft3, $fa5 fcvt.s.d $ft3, $ft3 - fcvt.d.s $ft2, $ft2 - fmul.d $ft2, $ft2, $fa2 - fadd.d $ft2, $ft2, $fa3 - fcvt.s.d $ft2, $ft2 - fmul.s $ft2, $ft2, $ft2 - fdiv.s $ft2, $ft3, $ft2 - fsub.s $ft2, $ft2, $fa6 - fdiv.s $ft2, $ft2, $ft0 - fcvt.d.s $ft2, $ft2 - fadd.d $ft2, $ft2, $fa3 - frecip.d $ft2, $ft2 - fcvt.s.d $ft3, $ft2 - fcmp.clt.d $fcc0, $ft2, $ft1 - fst.s $ft3, $ra, 0 - fmov.s $ft3, $fa0 + fmul.s $ft3, $ft3, $ft3 + fdiv.s $ft3, $ft4, $ft3 + fsub.s $ft3, $ft3, $ft1 + fdiv.s $ft3, $ft3, $ft2 + fcvt.d.s $ft3, $ft3 + fadd.d $ft3, $ft3, $fa5 + frecip.d $ft3, $ft3 + fcvt.s.d $ft4, $ft3 + fcmp.clt.d $fcc0, $ft3, $fa7 + fst.s $ft4, $ra, 0 + fmov.s $ft4, $fa0 bcnez $fcc0, .LBB0_16 # %bb.14: # in Loop: Header=BB0_1 Depth=1 - fld.d $ft3, $s6, %pc_lo12(.LCPI0_3) - fcmp.cule.d $fcc0, $ft2, $ft3 + movgr2fr.d $ft4, $s6 + fcmp.cule.d $fcc0, $ft3, $ft4 bcnez $fcc0, .LBB0_17 # %bb.15: # in Loop: Header=BB0_1 Depth=1 - vldi $vr11, -1168 + vldi $vr12, -1168 .LBB0_16: # %.sink.split665 # in Loop: Header=BB0_1 Depth=1 - fst.s $ft3, $ra, 0 + fst.s $ft4, $ra, 0 .LBB0_17: # in Loop: Header=BB0_1 Depth=1 - ld.d $a0, $sp, 216 # 8-byte Folded Reload - ld.d $a2, $sp, 272 # 8-byte Folded Reload - fldx.s $ft2, $a0, $a2 - fldx.s $ft3, $a0, $t4 - fsub.s $ft3, $ft3, $ft2 - fstx.s $ft3, $t6, $t4 - fldx.s $ft3, $a0, $a2 - fsub.s $ft3, $ft3, $ft2 - fstx.s $ft3, $s2, $t4 - ori $t2, $t0, 4088 - fldx.s $ft3, $a0, $t2 - fsub.s $ft3, $ft3, $ft2 - fstx.s $ft3, $a1, $t4 - fldx.s $ft3, $a0, $a2 - fsub.s $ft3, $ft3, $ft2 - fstx.s $ft3, $a7, $t4 - fldx.s $ft4, $t6, $t4 - fldx.s $ft5, $s2, $t4 - fldx.s $ft6, $a1, $t4 - fmul.s $ft7, $ft4, $ft4 + ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a1, $sp, 272 # 8-byte Folded Reload + fldx.s $ft3, $a0, $a1 + fldx.s $ft4, $a0, $t3 + fsub.s $ft4, $ft4, $ft3 + fstx.s $ft4, $t6, $t3 + fldx.s $ft4, $a0, $a1 + fsub.s $ft4, $ft4, $ft3 + fstx.s $ft4, $s3, $t3 + ori $a3, $t0, 4088 + fldx.s $ft4, $a0, $a3 + fsub.s $ft4, $ft4, $ft3 + fstx.s $ft4, $s4, $t3 + fldx.s $ft4, $a0, $a1 + fsub.s $ft4, $ft4, $ft3 + fstx.s $ft4, $a7, $t3 + fldx.s $ft5, $t6, $t3 + fldx.s $ft6, $s3, $t3 + fldx.s $ft7, $s4, $t3 fmul.s $ft8, $ft5, $ft5 - fadd.s $ft7, $ft7, $ft8 - fmul.s $ft8, $ft6, $ft6 - fadd.s $ft7, $ft7, $ft8 - fmul.s $ft8, $ft3, $ft3 - fadd.s $ft7, $ft8, $ft7 - fmul.s $ft8, $ft2, $ft2 - fdiv.s $ft7, $ft7, $ft8 + fmul.s $ft9, $ft6, $ft6 + fadd.s $ft8, $ft8, $ft9 + fmul.s $ft9, $ft7, $ft7 + fadd.s $ft8, $ft8, $ft9 + fmul.s $ft9, $ft4, $ft4 + fadd.s $ft8, $ft9, $ft8 + fmul.s $ft9, $ft3, $ft3 + fdiv.s $ft8, $ft8, $ft9 + fadd.s $ft5, $ft5, $ft6 + fadd.s $ft5, $ft5, $ft7 fadd.s $ft4, $ft4, $ft5 - fadd.s $ft4, $ft4, $ft6 - fadd.s $ft3, $ft3, $ft4 - fdiv.s $ft2, $ft3, $ft2 - fcvt.d.s $ft3, $ft7 - fmul.d $ft3, $ft3, $fa1 - fmul.s $ft4, $ft2, $ft2 - fcvt.d.s $ft4, $ft4 - fmul.d $ft4, $ft4, $fa7 - fadd.d $ft3, $ft3, $ft4 + fdiv.s $ft3, $ft4, $ft3 + fcvt.d.s $ft4, $ft8 + fmul.d $ft4, $ft4, $fa2 + fmul.s $ft5, $ft3, $ft3 + fcvt.d.s $ft5, $ft5 + fmul.d $ft5, $ft5, $fa3 + fadd.d $ft4, $ft4, $ft5 + fcvt.s.d $ft4, $ft4 + fcvt.d.s $ft3, $ft3 + fmul.d $ft3, $ft3, $fa4 + fadd.d $ft3, $ft3, $fa5 fcvt.s.d $ft3, $ft3 - fcvt.d.s $ft2, $ft2 - fmul.d $ft2, $ft2, $fa2 - fadd.d $ft2, $ft2, $fa3 - fcvt.s.d $ft2, $ft2 - fmul.s $ft2, $ft2, $ft2 - fdiv.s $ft2, $ft3, $ft2 - fsub.s $ft2, $ft2, $fa6 - fdiv.s $ft2, $ft2, $ft0 - fcvt.d.s $ft2, $ft2 - fadd.d $ft2, $ft2, $fa3 - frecip.d $ft2, $ft2 - fcvt.s.d $ft3, $ft2 - fcmp.clt.d $fcc0, $ft2, $ft1 - ld.d $a0, $sp, 208 # 8-byte Folded Reload - fst.s $ft3, $a0, 0 + fmul.s $ft3, $ft3, $ft3 + fdiv.s $ft3, $ft4, $ft3 + fsub.s $ft3, $ft3, $ft1 + fdiv.s $ft3, $ft3, $ft2 + fcvt.d.s $ft3, $ft3 + fadd.d $ft3, $ft3, $fa5 + frecip.d $ft3, $ft3 + fcvt.s.d $ft4, $ft3 + fcmp.clt.d $fcc0, $ft3, $fa7 + ld.d $a0, $sp, 216 # 8-byte Folded Reload + fst.s $ft4, $a0, 0 bceqz $fcc0, .LBB0_19 # %bb.18: # in Loop: Header=BB0_1 Depth=1 - movgr2fr.w $ft2, $zero + movgr2fr.w $ft3, $zero + lu52i.d $s7, $zero, 1022 + lu52i.d $s8, $zero, 1021 b .LBB0_21 .p2align 4, , 16 .LBB0_19: # in Loop: Header=BB0_1 Depth=1 - fld.d $ft3, $s6, %pc_lo12(.LCPI0_3) - fcmp.cule.d $fcc0, $ft2, $ft3 + movgr2fr.d $ft4, $s6 + fcmp.cule.d $fcc0, $ft3, $ft4 + lu52i.d $s7, $zero, 1022 + lu52i.d $s8, $zero, 1021 bcnez $fcc0, .LBB0_22 # %bb.20: # in Loop: Header=BB0_1 Depth=1 - vldi $vr10, -1168 + vldi $vr11, -1168 .LBB0_21: # %.sink.split667 # in Loop: Header=BB0_1 Depth=1 - ld.d $a0, $sp, 208 # 8-byte Folded Reload - fst.s $ft2, $a0, 0 + ld.d $a0, $sp, 216 # 8-byte Folded Reload + fst.s $ft3, $a0, 0 .LBB0_22: # %.preheader621.preheader # in Loop: Header=BB0_1 Depth=1 - st.d $s1, $sp, 176 # 8-byte Folded Spill - vreplvei.w $vr10, $vr6, 0 - vreplvei.w $vr11, $vr8, 0 - ori $a5, $zero, 1 - ld.d $a4, $sp, 8 # 8-byte Folded Reload - ld.d $a2, $sp, 16 # 8-byte Folded Reload - ld.d $a1, $sp, 24 # 8-byte Folded Reload - ld.d $a0, $sp, 32 # 8-byte Folded Reload - ld.d $s7, $sp, 40 # 8-byte Folded Reload - ld.d $ra, $sp, 184 # 8-byte Folded Reload - ld.d $t8, $sp, 104 # 8-byte Folded Reload - ld.d $fp, $sp, 56 # 8-byte Folded Reload + st.d $s1, $sp, 184 # 8-byte Folded Spill + vreplvei.w $vr11, $vr9, 0 + vreplvei.w $vr12, $vr10, 0 + ori $t4, $zero, 1 + ld.d $ra, $sp, 16 # 8-byte Folded Reload + ld.d $a4, $sp, 24 # 8-byte Folded Reload + ld.d $a2, $sp, 32 # 8-byte Folded Reload + ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $t2, $sp, 192 # 8-byte Folded Reload + ld.d $t6, $sp, 112 # 8-byte Folded Reload + ld.d $t8, $sp, 64 # 8-byte Folded Reload + ld.d $fp, $sp, 104 # 8-byte Folded Reload ld.d $s4, $sp, 96 # 8-byte Folded Reload - ld.d $s8, $sp, 88 # 8-byte Folded Reload - ld.d $t4, $sp, 80 # 8-byte Folded Reload - move $s5, $a6 + ld.d $s1, $sp, 88 # 8-byte Folded Reload + move $t1, $a6 lu12i.w $a6, 2 b .LBB0_24 .p2align 4, , 16 .LBB0_23: # in Loop: Header=BB0_24 Depth=2 - addi.d $a5, $a5, 1 - add.d $s5, $s5, $t0 - add.d $t4, $t4, $t0 - add.d $s8, $s8, $t0 + addi.d $t4, $t4, 1 + add.d $t1, $t1, $t0 + add.d $s1, $s1, $t0 add.d $s4, $s4, $t0 add.d $fp, $fp, $t0 add.d $t8, $t8, $t0 - add.d $ra, $ra, $t0 - add.d $s7, $s7, $t0 + add.d $t6, $t6, $t0 + add.d $t2, $t2, $t0 add.d $a0, $a0, $t0 add.d $a1, $a1, $t0 add.d $a2, $a2, $t0 add.d $a4, $a4, $t0 - ld.d $t7, $sp, 264 # 8-byte Folded Reload - ori $a3, $zero, 2047 - lu52i.d $s0, $zero, 1022 - lu52i.d $s3, $zero, -1029 - beq $a5, $a3, .LBB0_43 + add.d $ra, $ra, $t0 + ori $a5, $zero, 2047 + lu52i.d $s7, $zero, 1022 + lu52i.d $s8, $zero, 1021 + beq $t4, $a5, .LBB0_43 .LBB0_24: # %.preheader621 # Parent Loop BB0_1 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB0_27 Depth 3 # Child Loop BB0_40 Depth 3 - ori $a3, $zero, 1 - ld.d $a7, $sp, 248 # 8-byte Folded Reload + ori $a5, $zero, 1 + ld.d $a7, $sp, 256 # 8-byte Folded Reload bnez $a7, .LBB0_36 # %bb.25: # %vector.body814.preheader # in Loop: Header=BB0_24 Depth=2 - ori $s2, $t3, 16 - ld.d $t6, $sp, 240 # 8-byte Folded Reload + ori $s5, $t5, 16 + lu52i.d $t7, $zero, -1029 + lu52i.d $s0, $zero, -1175 + ld.d $s3, $sp, 248 # 8-byte Folded Reload b .LBB0_27 .p2align 4, , 16 .LBB0_26: # %pred.store.continue829 # in Loop: Header=BB0_27 Depth=3 - addi.d $s2, $s2, 16 - beqz $s2, .LBB0_35 + addi.d $s5, $s5, 16 + beqz $s5, .LBB0_35 .LBB0_27: # %vector.body814 # Parent Loop BB0_1 Depth=1 # Parent Loop BB0_24 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $a3, $fp, $s2 - vld $vr12, $a3, -16 - vldx $vr13, $a3, $t6 - vldx $vr14, $a3, $t5 - vfsub.s $vr13, $vr13, $vr12 - add.d $a7, $t8, $s2 - vstx $vr13, $a7, $t5 - vfsub.s $vr14, $vr14, $vr12 - vld $vr15, $a3, -20 - add.d $a7, $s4, $s2 - vstx $vr14, $a7, $t5 - vld $vr16, $a3, -12 - vfsub.s $vr15, $vr15, $vr12 - add.d $a3, $s8, $s2 - vstx $vr15, $a3, $t5 - vfsub.s $vr16, $vr16, $vr12 - add.d $a3, $t4, $s2 - vstx $vr16, $a3, $t5 - vfmul.s $vr17, $vr13, $vr13 + add.d $a5, $t8, $s5 + vld $vr13, $a5, -16 + vldx $vr14, $a5, $s3 + vldx $vr15, $a5, $s2 + vfsub.s $vr14, $vr14, $vr13 + add.d $a7, $t6, $s5 + vstx $vr14, $a7, $s2 + vfsub.s $vr15, $vr15, $vr13 + vld $vr16, $a5, -20 + add.d $a7, $fp, $s5 + vstx $vr15, $a7, $s2 + vld $vr17, $a5, -12 + vfsub.s $vr16, $vr16, $vr13 + add.d $a5, $s4, $s5 + vstx $vr16, $a5, $s2 + vfsub.s $vr17, $vr17, $vr13 + add.d $a5, $s1, $s5 + vstx $vr17, $a5, $s2 vfmul.s $vr18, $vr14, $vr14 - vfadd.s $vr17, $vr17, $vr18 - vfmul.s $vr18, $vr15, $vr15 - vfadd.s $vr17, $vr17, $vr18 - vfmul.s $vr18, $vr16, $vr16 - vfadd.s $vr17, $vr18, $vr17 - vfmul.s $vr18, $vr12, $vr12 - vfdiv.s $vr17, $vr17, $vr18 - vfadd.s $vr13, $vr13, $vr14 - vfadd.s $vr13, $vr13, $vr15 - vfadd.s $vr13, $vr16, $vr13 - vfdiv.s $vr12, $vr13, $vr12 - vreplvei.w $vr13, $vr17, 3 - fcvt.d.s $ft5, $ft5 - vreplvei.w $vr14, $vr17, 2 + vfmul.s $vr19, $vr15, $vr15 + vfadd.s $vr18, $vr18, $vr19 + vfmul.s $vr19, $vr16, $vr16 + vfadd.s $vr18, $vr18, $vr19 + vfmul.s $vr19, $vr17, $vr17 + vfadd.s $vr18, $vr19, $vr18 + vfmul.s $vr19, $vr13, $vr13 + vfdiv.s $vr18, $vr18, $vr19 + vfadd.s $vr14, $vr14, $vr15 + vfadd.s $vr14, $vr14, $vr16 + vfadd.s $vr14, $vr17, $vr14 + vfdiv.s $vr13, $vr14, $vr13 + vreplvei.w $vr14, $vr18, 3 fcvt.d.s $ft6, $ft6 - vextrins.d $vr14, $vr13, 16 - vreplvei.w $vr13, $vr17, 1 - fcvt.d.s $ft5, $ft5 - vreplvei.w $vr15, $vr17, 0 + vreplvei.w $vr15, $vr18, 2 fcvt.d.s $ft7, $ft7 - vextrins.d $vr15, $vr13, 16 - vreplgr2vr.d $vr13, $s0 - vfmul.d $vr15, $vr15, $vr13 - vfmul.d $vr13, $vr14, $vr13 - vfmul.s $vr14, $vr12, $vr12 - vreplvei.w $vr16, $vr14, 3 + vextrins.d $vr15, $vr14, 16 + vreplvei.w $vr14, $vr18, 1 + fcvt.d.s $ft6, $ft6 + vreplvei.w $vr16, $vr18, 0 fcvt.d.s $ft8, $ft8 - vreplvei.w $vr17, $vr14, 2 + vextrins.d $vr16, $vr14, 16 + vreplgr2vr.d $vr14, $s7 + vfmul.d $vr16, $vr16, $vr14 + vfmul.d $vr14, $vr15, $vr14 + vfmul.s $vr15, $vr13, $vr13 + vreplvei.w $vr17, $vr15, 3 fcvt.d.s $ft9, $ft9 - vextrins.d $vr17, $vr16, 16 - vreplvei.w $vr16, $vr14, 1 - fcvt.d.s $ft8, $ft8 - vreplvei.w $vr14, $vr14, 0 - fcvt.d.s $ft6, $ft6 - vextrins.d $vr14, $vr16, 16 - vreplgr2vr.d $vr16, $s3 - vfmul.d $vr14, $vr14, $vr16 - vfmul.d $vr16, $vr17, $vr16 - vfadd.d $vr16, $vr13, $vr16 - vfadd.d $vr13, $vr15, $vr14 - vreplvei.d $vr14, $vr13, 1 + vreplvei.w $vr18, $vr15, 2 + fcvt.d.s $ft10, $ft10 + vextrins.d $vr18, $vr17, 16 + vreplvei.w $vr17, $vr15, 1 + fcvt.d.s $ft9, $ft9 + vreplvei.w $vr15, $vr15, 0 + fcvt.d.s $ft7, $ft7 + vextrins.d $vr15, $vr17, 16 + vreplgr2vr.d $vr17, $t7 + vfmul.d $vr15, $vr15, $vr17 + vfmul.d $vr17, $vr18, $vr17 + vfadd.d $vr17, $vr14, $vr17 + vfadd.d $vr14, $vr16, $vr15 + vreplvei.d $vr15, $vr14, 1 + fcvt.s.d $ft7, $ft7 + vreplvei.d $vr14, $vr14, 0 fcvt.s.d $ft6, $ft6 + vextrins.w $vr14, $vr15, 16 + vreplvei.d $vr15, $vr17, 0 + fcvt.s.d $ft7, $ft7 + vextrins.w $vr14, $vr15, 32 + vreplvei.d $vr15, $vr17, 1 + fcvt.s.d $ft7, $ft7 + vextrins.w $vr14, $vr15, 48 + vreplvei.w $vr15, $vr13, 3 + fcvt.d.s $ft7, $ft7 + vreplvei.w $vr16, $vr13, 2 + fcvt.d.s $ft8, $ft8 + vextrins.d $vr16, $vr15, 16 + vreplvei.w $vr15, $vr13, 1 + fcvt.d.s $ft7, $ft7 + vreplvei.w $vr13, $vr13, 0 + fcvt.d.s $ft5, $ft5 + vextrins.d $vr13, $vr15, 16 + vreplgr2vr.d $vr15, $s8 + vfmul.d $vr13, $vr13, $vr15 + vfmul.d $vr15, $vr16, $vr15 + lu52i.d $a5, $zero, 1023 + vreplgr2vr.d $vr16, $a5 + vfadd.d $vr15, $vr15, $vr16 + vfadd.d $vr13, $vr13, $vr16 + vreplvei.d $vr17, $vr13, 1 + fcvt.s.d $ft9, $ft9 vreplvei.d $vr13, $vr13, 0 fcvt.s.d $ft5, $ft5 - vextrins.w $vr13, $vr14, 16 - vreplvei.d $vr14, $vr16, 0 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr13, $vr14, 32 - vreplvei.d $vr14, $vr16, 1 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr13, $vr14, 48 - vreplvei.w $vr14, $vr12, 3 + vextrins.w $vr13, $vr17, 16 + vreplvei.d $vr17, $vr15, 0 + fcvt.s.d $ft9, $ft9 + vextrins.w $vr13, $vr17, 32 + vreplvei.d $vr15, $vr15, 1 + fcvt.s.d $ft7, $ft7 + vextrins.w $vr13, $vr15, 48 + vfmul.s $vr13, $vr13, $vr13 + vfdiv.s $vr13, $vr14, $vr13 + vfsub.s $vr13, $vr13, $vr11 + vfdiv.s $vr13, $vr13, $vr12 + vreplvei.w $vr14, $vr13, 3 fcvt.d.s $ft6, $ft6 - vreplvei.w $vr15, $vr12, 2 + vreplvei.w $vr15, $vr13, 2 fcvt.d.s $ft7, $ft7 vextrins.d $vr15, $vr14, 16 - vreplvei.w $vr14, $vr12, 1 - fcvt.d.s $ft6, $ft6 - vreplvei.w $vr12, $vr12, 0 - fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr14, 16 - lu52i.d $a3, $zero, 1021 - vreplgr2vr.d $vr14, $a3 - vfmul.d $vr12, $vr12, $vr14 - vfmul.d $vr14, $vr15, $vr14 - lu52i.d $a3, $zero, 1023 - vreplgr2vr.d $vr15, $a3 - vfadd.d $vr14, $vr14, $vr15 - vfadd.d $vr12, $vr12, $vr15 - vreplvei.d $vr16, $vr12, 1 - fcvt.s.d $ft8, $ft8 - vreplvei.d $vr12, $vr12, 0 - fcvt.s.d $ft4, $ft4 - vextrins.w $vr12, $vr16, 16 - vreplvei.d $vr16, $vr14, 0 - fcvt.s.d $ft8, $ft8 - vextrins.w $vr12, $vr16, 32 - vreplvei.d $vr14, $vr14, 1 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr12, $vr14, 48 - vfmul.s $vr12, $vr12, $vr12 - vfdiv.s $vr12, $vr13, $vr12 - vfsub.s $vr12, $vr12, $vr10 - vfdiv.s $vr12, $vr12, $vr11 - vreplvei.w $vr13, $vr12, 3 - fcvt.d.s $ft5, $ft5 - vreplvei.w $vr14, $vr12, 2 + vreplvei.w $vr14, $vr13, 1 fcvt.d.s $ft6, $ft6 - vextrins.d $vr14, $vr13, 16 - vreplvei.w $vr13, $vr12, 1 + vreplvei.w $vr13, $vr13, 0 fcvt.d.s $ft5, $ft5 - vreplvei.w $vr12, $vr12, 0 - fcvt.d.s $ft4, $ft4 - vextrins.d $vr12, $vr13, 16 - vfadd.d $vr12, $vr12, $vr15 - vfadd.d $vr13, $vr14, $vr15 + vextrins.d $vr13, $vr14, 16 + vfadd.d $vr13, $vr13, $vr16 + vfadd.d $vr14, $vr15, $vr16 + vfrecip.d $vr14, $vr14 vfrecip.d $vr13, $vr13 - vfrecip.d $vr12, $vr12 - vreplvei.d $vr14, $vr12, 1 - fcvt.s.d $ft6, $ft6 - vreplvei.d $vr15, $vr12, 0 + vreplvei.d $vr15, $vr13, 1 fcvt.s.d $ft7, $ft7 - vextrins.w $vr15, $vr14, 16 - vreplvei.d $vr14, $vr13, 0 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr15, $vr14, 32 - vreplvei.d $vr14, $vr13, 1 - fcvt.s.d $ft6, $ft6 - vextrins.w $vr15, $vr14, 48 - add.d $a3, $s5, $s2 + vreplvei.d $vr16, $vr13, 0 + fcvt.s.d $ft8, $ft8 + vextrins.w $vr16, $vr15, 16 + vreplvei.d $vr15, $vr14, 0 + fcvt.s.d $ft7, $ft7 + vextrins.w $vr16, $vr15, 32 + vreplvei.d $vr15, $vr14, 1 + fcvt.s.d $ft7, $ft7 + vextrins.w $vr16, $vr15, 48 + add.d $a5, $t1, $s5 ori $a7, $t0, 4084 - vstx $vr15, $a3, $a7 - lu52i.d $t1, $zero, -1175 - vreplgr2vr.d $vr14, $t1 - vfcmp.clt.d $vr15, $vr12, $vr14 - vfcmp.clt.d $vr14, $vr13, $vr14 - vpickev.w $vr14, $vr14, $vr15 - vreplgr2vr.d $vr15, $t7 - vfcmp.clt.d $vr12, $vr15, $vr12 - vfcmp.clt.d $vr13, $vr15, $vr13 - vpickev.w $vr12, $vr13, $vr12 - vor.v $vr12, $vr12, $vr14 - lu12i.w $t1, 260096 - vreplgr2vr.w $vr13, $t1 - vrepli.b $vr15, 0 - vpickve2gr.w $t1, $vr12, 0 - andi $t1, $t1, 1 - vbitsel.v $vr13, $vr13, $vr15, $vr14 - bnez $t1, .LBB0_31 + vstx $vr16, $a5, $a7 + vreplgr2vr.d $vr15, $s0 + vfcmp.clt.d $vr16, $vr13, $vr15 + vfcmp.clt.d $vr15, $vr14, $vr15 + vpickev.w $vr15, $vr15, $vr16 + vreplgr2vr.d $vr16, $s6 + vfcmp.clt.d $vr13, $vr16, $vr13 + vfcmp.clt.d $vr14, $vr16, $vr14 + vpickev.w $vr13, $vr14, $vr13 + vor.v $vr13, $vr13, $vr15 + lu12i.w $t3, 260096 + vreplgr2vr.w $vr14, $t3 + vrepli.b $vr16, 0 + vpickve2gr.w $t3, $vr13, 0 + andi $t3, $t3, 1 + vbitsel.v $vr14, $vr14, $vr16, $vr15 + bnez $t3, .LBB0_31 # %bb.28: # %pred.store.continue # in Loop: Header=BB0_27 Depth=3 - vpickve2gr.w $a7, $vr12, 1 + vpickve2gr.w $a7, $vr13, 1 andi $a7, $a7, 1 bnez $a7, .LBB0_32 .LBB0_29: # %pred.store.continue825 # in Loop: Header=BB0_27 Depth=3 - vpickve2gr.w $a7, $vr12, 2 + vpickve2gr.w $a7, $vr13, 2 andi $a7, $a7, 1 bnez $a7, .LBB0_33 .LBB0_30: # %pred.store.continue827 # in Loop: Header=BB0_27 Depth=3 - vpickve2gr.w $a7, $vr12, 3 + vpickve2gr.w $a7, $vr13, 3 andi $a7, $a7, 1 beqz $a7, .LBB0_26 b .LBB0_34 .p2align 4, , 16 .LBB0_31: # %pred.store.if # in Loop: Header=BB0_27 Depth=3 - add.d $a7, $a3, $a7 - vstelm.w $vr13, $a7, 0, 0 - vpickve2gr.w $a7, $vr12, 1 + add.d $a7, $a5, $a7 + vstelm.w $vr14, $a7, 0, 0 + vpickve2gr.w $a7, $vr13, 1 andi $a7, $a7, 1 beqz $a7, .LBB0_29 .LBB0_32: # %pred.store.if824 # in Loop: Header=BB0_27 Depth=3 - add.d $a7, $a3, $t2 - vstelm.w $vr13, $a7, 0, 1 - vpickve2gr.w $a7, $vr12, 2 + add.d $a7, $a5, $a3 + vstelm.w $vr14, $a7, 0, 1 + vpickve2gr.w $a7, $vr13, 2 andi $a7, $a7, 1 beqz $a7, .LBB0_30 .LBB0_33: # %pred.store.if826 # in Loop: Header=BB0_27 Depth=3 ld.d $a7, $sp, 272 # 8-byte Folded Reload - add.d $a7, $a3, $a7 - vstelm.w $vr13, $a7, 0, 2 - vpickve2gr.w $a7, $vr12, 3 + add.d $a7, $a5, $a7 + vstelm.w $vr14, $a7, 0, 2 + vpickve2gr.w $a7, $vr13, 3 andi $a7, $a7, 1 beqz $a7, .LBB0_26 .LBB0_34: # %pred.store.if828 # in Loop: Header=BB0_27 Depth=3 - add.d $a3, $a3, $a6 - vstelm.w $vr13, $a3, 0, 3 + add.d $a5, $a5, $a6 + vstelm.w $vr14, $a5, 0, 3 b .LBB0_26 .p2align 4, , 16 .LBB0_35: # in Loop: Header=BB0_24 Depth=2 - ori $a3, $zero, 1021 + ori $a5, $zero, 1021 .LBB0_36: # %scalar.ph810.preheader # in Loop: Header=BB0_24 Depth=2 - slli.d $s2, $a3, 2 - ori $a3, $zero, 4092 + slli.d $s5, $a5, 2 + ori $s8, $zero, 4092 + move $a5, $ra move $s3, $a4 move $a7, $a2 - move $t1, $a1 + move $t3, $a1 move $t7, $a0 - move $s0, $s7 - move $t6, $ra + move $s0, $t2 b .LBB0_40 .p2align 4, , 16 .LBB0_37: # in Loop: Header=BB0_40 Depth=3 - movgr2fr.w $ft4, $zero + movgr2fr.w $ft5, $zero .LBB0_38: # %.sink.split669 # in Loop: Header=BB0_40 Depth=3 - fstx.s $ft4, $t6, $s2 + fstx.s $ft5, $s0, $s5 .LBB0_39: # in Loop: Header=BB0_40 Depth=3 - addi.d $a3, $a3, -4 - addi.d $t6, $t6, 4 + addi.d $s8, $s8, -4 addi.d $s0, $s0, 4 addi.d $t7, $t7, 4 - addi.d $t1, $t1, 4 + addi.d $t3, $t3, 4 addi.d $a7, $a7, 4 addi.d $s3, $s3, 4 - beq $s2, $a3, .LBB0_23 + addi.d $a5, $a5, 4 + beq $s5, $s8, .LBB0_23 .LBB0_40: # %scalar.ph810 # Parent Loop BB0_1 Depth=1 # Parent Loop BB0_24 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $s1, $t1, $s2 - fldx.s $ft4, $t1, $s2 - fldx.s $ft5, $s1, $t3 - fsub.s $ft5, $ft5, $ft4 - fstx.s $ft5, $t7, $s2 - fldx.s $ft5, $s1, $t0 - fsub.s $ft5, $ft5, $ft4 - fstx.s $ft5, $a7, $s2 - fld.s $ft5, $s1, -4 - fsub.s $ft5, $ft5, $ft4 - fstx.s $ft5, $s3, $s2 - fld.s $ft5, $s1, 4 - fsub.s $ft5, $ft5, $ft4 - fstx.s $ft5, $s0, $s2 - fldx.s $ft6, $t7, $s2 - fldx.s $ft7, $a7, $s2 - fldx.s $ft8, $s3, $s2 - fmul.s $ft9, $ft6, $ft6 + add.d $s7, $a7, $s5 + fldx.s $ft5, $a7, $s5 + fldx.s $ft6, $s7, $t5 + fsub.s $ft6, $ft6, $ft5 + fstx.s $ft6, $t3, $s5 + fldx.s $ft6, $s7, $t0 + fsub.s $ft6, $ft6, $ft5 + fstx.s $ft6, $s3, $s5 + fld.s $ft6, $s7, -4 + fsub.s $ft6, $ft6, $ft5 + fstx.s $ft6, $a5, $s5 + fld.s $ft6, $s7, 4 + fsub.s $ft6, $ft6, $ft5 + fstx.s $ft6, $t7, $s5 + fldx.s $ft7, $t3, $s5 + fldx.s $ft8, $s3, $s5 + fldx.s $ft9, $a5, $s5 fmul.s $ft10, $ft7, $ft7 - fadd.s $ft9, $ft9, $ft10 - fmul.s $ft10, $ft8, $ft8 - fadd.s $ft9, $ft9, $ft10 - fmul.s $ft10, $ft5, $ft5 - fadd.s $ft9, $ft10, $ft9 - fmul.s $ft10, $ft4, $ft4 - fdiv.s $ft9, $ft9, $ft10 + fmul.s $ft11, $ft8, $ft8 + fadd.s $ft10, $ft10, $ft11 + fmul.s $ft11, $ft9, $ft9 + fadd.s $ft10, $ft10, $ft11 + fmul.s $ft11, $ft6, $ft6 + fadd.s $ft10, $ft11, $ft10 + fmul.s $ft11, $ft5, $ft5 + fdiv.s $ft10, $ft10, $ft11 + fadd.s $ft7, $ft7, $ft8 + fadd.s $ft7, $ft7, $ft9 fadd.s $ft6, $ft6, $ft7 - fadd.s $ft6, $ft6, $ft8 - fadd.s $ft5, $ft5, $ft6 - fdiv.s $ft4, $ft5, $ft4 - fcvt.d.s $ft5, $ft9 - fmul.d $ft5, $ft5, $fa1 - fmul.s $ft6, $ft4, $ft4 - fcvt.d.s $ft6, $ft6 - fmul.d $ft6, $ft6, $fa7 - fadd.d $ft5, $ft5, $ft6 + fdiv.s $ft5, $ft6, $ft5 + fcvt.d.s $ft6, $ft10 + fmul.d $ft6, $ft6, $fa2 + fmul.s $ft7, $ft5, $ft5 + fcvt.d.s $ft7, $ft7 + fmul.d $ft7, $ft7, $fa3 + fadd.d $ft6, $ft6, $ft7 + fcvt.s.d $ft6, $ft6 + fcvt.d.s $ft5, $ft5 + fmul.d $ft5, $ft5, $fa4 + fadd.d $ft5, $ft5, $fa5 fcvt.s.d $ft5, $ft5 - fcvt.d.s $ft4, $ft4 - fmul.d $ft4, $ft4, $fa2 - fadd.d $ft4, $ft4, $fa3 - fcvt.s.d $ft4, $ft4 - fmul.s $ft4, $ft4, $ft4 - fdiv.s $ft4, $ft5, $ft4 - fsub.s $ft4, $ft4, $fa6 - fdiv.s $ft4, $ft4, $ft0 - fcvt.d.s $ft4, $ft4 - fadd.d $ft4, $ft4, $fa3 - frecip.d $ft4, $ft4 - fcvt.s.d $ft5, $ft4 - fcmp.clt.d $fcc0, $ft4, $ft1 - fstx.s $ft5, $t6, $s2 + fmul.s $ft5, $ft5, $ft5 + fdiv.s $ft5, $ft6, $ft5 + fsub.s $ft5, $ft5, $ft1 + fdiv.s $ft5, $ft5, $ft2 + fcvt.d.s $ft5, $ft5 + fadd.d $ft5, $ft5, $fa5 + frecip.d $ft5, $ft5 + fcvt.s.d $ft6, $ft5 + fcmp.clt.d $fcc0, $ft5, $fa7 + fstx.s $ft6, $s0, $s5 bcnez $fcc0, .LBB0_37 # %bb.41: # in Loop: Header=BB0_40 Depth=3 - fld.d $ft5, $s6, %pc_lo12(.LCPI0_3) - fcmp.cule.d $fcc0, $ft4, $ft5 + movgr2fr.d $ft6, $s6 + fcmp.cule.d $fcc0, $ft5, $ft6 bcnez $fcc0, .LBB0_39 # %bb.42: # in Loop: Header=BB0_40 Depth=3 - vldi $vr12, -1168 + vldi $vr13, -1168 b .LBB0_38 .p2align 4, , 16 .LBB0_43: # %vector.memcheck698 # in Loop: Header=BB0_1 Depth=1 lu52i.d $a0, $zero, 1020 - vreplgr2vr.d $vr6, $a0 - ld.d $a4, $sp, 232 # 8-byte Folded Reload - ld.d $a0, $sp, 64 # 8-byte Folded Reload + vreplgr2vr.d $vr9, $a0 + ld.d $t7, $sp, 208 # 8-byte Folded Reload + ld.d $t8, $sp, 200 # 8-byte Folded Reload + ld.d $a4, $sp, 240 # 8-byte Folded Reload + ld.d $t4, $sp, 232 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload beqz $a0, .LBB0_45 # %bb.44: # in Loop: Header=BB0_1 Depth=1 move $a1, $zero - ld.d $a6, $sp, 168 # 8-byte Folded Reload - ld.d $a5, $sp, 160 # 8-byte Folded Reload - ld.d $a3, $sp, 152 # 8-byte Folded Reload - ld.d $a2, $sp, 144 # 8-byte Folded Reload - ld.d $s7, $sp, 136 # 8-byte Folded Reload - ld.d $s8, $sp, 128 # 8-byte Folded Reload - ld.d $ra, $sp, 120 # 8-byte Folded Reload - ld.d $t8, $sp, 200 # 8-byte Folded Reload - ld.d $s6, $sp, 72 # 8-byte Folded Reload - ld.d $fp, $sp, 192 # 8-byte Folded Reload - ld.d $t6, $sp, 224 # 8-byte Folded Reload + ld.d $a6, $sp, 176 # 8-byte Folded Reload + ld.d $a5, $sp, 168 # 8-byte Folded Reload + ld.d $a3, $sp, 160 # 8-byte Folded Reload + ld.d $a2, $sp, 152 # 8-byte Folded Reload + ld.d $s7, $sp, 144 # 8-byte Folded Reload + ld.d $s8, $sp, 136 # 8-byte Folded Reload + ld.d $ra, $sp, 128 # 8-byte Folded Reload + ld.d $t6, $sp, 80 # 8-byte Folded Reload b .LBB0_48 .p2align 4, , 16 .LBB0_45: # %vector.body726.preheader # in Loop: Header=BB0_1 Depth=1 - ori $a0, $t3, 16 - ld.d $a6, $sp, 168 # 8-byte Folded Reload - ld.d $a5, $sp, 160 # 8-byte Folded Reload - ld.d $a3, $sp, 152 # 8-byte Folded Reload - ld.d $a2, $sp, 144 # 8-byte Folded Reload - ld.d $s7, $sp, 136 # 8-byte Folded Reload - ld.d $s8, $sp, 128 # 8-byte Folded Reload - ld.d $ra, $sp, 120 # 8-byte Folded Reload - ld.d $t8, $sp, 200 # 8-byte Folded Reload - ld.d $s6, $sp, 72 # 8-byte Folded Reload - ld.d $fp, $sp, 192 # 8-byte Folded Reload - ld.d $t6, $sp, 224 # 8-byte Folded Reload + ori $a0, $t5, 16 + ld.d $a6, $sp, 176 # 8-byte Folded Reload + ld.d $a5, $sp, 168 # 8-byte Folded Reload + ld.d $a3, $sp, 160 # 8-byte Folded Reload + ld.d $a2, $sp, 152 # 8-byte Folded Reload + ld.d $s7, $sp, 144 # 8-byte Folded Reload + ld.d $s8, $sp, 136 # 8-byte Folded Reload + ld.d $ra, $sp, 128 # 8-byte Folded Reload + ld.d $t6, $sp, 80 # 8-byte Folded Reload ori $a7, $zero, 4084 .p2align 4, , 16 .LBB0_46: # %vector.body726 # Parent Loop BB0_1 Depth=1 # => This Inner Loop Header: Depth=2 add.d $a1, $ra, $a0 - vldx $vr7, $a1, $t5 - vldx $vr8, $a1, $a7 + vldx $vr10, $a1, $s2 + vldx $vr11, $a1, $a7 + add.d $a1, $t7, $a0 + vldx $vr12, $a1, $s2 add.d $a1, $t8, $a0 - vldx $vr9, $a1, $t5 - add.d $a1, $fp, $a0 - vldx $vr10, $a1, $t5 + vldx $vr13, $a1, $s2 add.d $a1, $a4, $a0 - vldx $vr11, $a1, $t5 - vfmul.s $vr9, $vr7, $vr9 - vfmul.s $vr10, $vr7, $vr10 - vfadd.s $vr9, $vr9, $vr10 - vfmul.s $vr7, $vr7, $vr11 + vldx $vr14, $a1, $s2 + vfmul.s $vr12, $vr10, $vr12 + vfmul.s $vr13, $vr10, $vr13 + vfadd.s $vr12, $vr12, $vr13 + vfmul.s $vr10, $vr10, $vr14 + add.d $a1, $t4, $a0 + vldx $vr13, $a1, $s2 add.d $a1, $t6, $a0 - vldx $vr10, $a1, $t5 - add.d $a1, $s6, $a0 - vldx $vr11, $a1, $t5 - vfadd.s $vr7, $vr9, $vr7 - vfmul.s $vr8, $vr8, $vr10 - vfadd.s $vr7, $vr7, $vr8 - vreplvei.w $vr8, $vr11, 1 - fcvt.d.s $ft0, $ft0 - vreplvei.w $vr9, $vr11, 0 - fcvt.d.s $ft1, $ft1 - vextrins.d $vr9, $vr8, 16 - vreplvei.w $vr8, $vr11, 3 - fcvt.d.s $ft0, $ft0 - vreplvei.w $vr10, $vr11, 2 - fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr8, 16 - vreplvei.w $vr8, $vr7, 3 - fcvt.d.s $ft0, $ft0 - vreplvei.w $vr11, $vr7, 2 + vldx $vr14, $a1, $s2 + vfadd.s $vr10, $vr12, $vr10 + vfmul.s $vr11, $vr11, $vr13 + vfadd.s $vr10, $vr10, $vr11 + vreplvei.w $vr11, $vr14, 1 + fcvt.d.s $ft3, $ft3 + vreplvei.w $vr12, $vr14, 0 + fcvt.d.s $ft4, $ft4 + vextrins.d $vr12, $vr11, 16 + vreplvei.w $vr11, $vr14, 3 + fcvt.d.s $ft3, $ft3 + vreplvei.w $vr13, $vr14, 2 + fcvt.d.s $ft5, $ft5 + vextrins.d $vr13, $vr11, 16 + vreplvei.w $vr11, $vr10, 3 + fcvt.d.s $ft3, $ft3 + vreplvei.w $vr14, $vr10, 2 + fcvt.d.s $ft6, $ft6 + vextrins.d $vr14, $vr11, 16 + vreplvei.w $vr11, $vr10, 1 fcvt.d.s $ft3, $ft3 - vextrins.d $vr11, $vr8, 16 - vreplvei.w $vr8, $vr7, 1 - fcvt.d.s $ft0, $ft0 - vreplvei.w $vr7, $vr7, 0 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr8, 16 - vfmul.d $vr7, $vr7, $vr6 - vfmul.d $vr8, $vr11, $vr6 - vfadd.d $vr8, $vr8, $vr10 - vfadd.d $vr7, $vr7, $vr9 - vreplvei.d $vr9, $vr7, 1 - fcvt.s.d $ft1, $ft1 - vreplvei.d $vr7, $vr7, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr9, 16 - vreplvei.d $vr9, $vr8, 0 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr7, $vr9, 32 - vreplvei.d $vr8, $vr8, 1 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr7, $vr8, 48 + vreplvei.w $vr10, $vr10, 0 + fcvt.d.s $ft2, $ft2 + vextrins.d $vr10, $vr11, 16 + vfmul.d $vr10, $vr10, $vr9 + vfmul.d $vr11, $vr14, $vr9 + vfadd.d $vr11, $vr11, $vr13 + vfadd.d $vr10, $vr10, $vr12 + vreplvei.d $vr12, $vr10, 1 + fcvt.s.d $ft4, $ft4 + vreplvei.d $vr10, $vr10, 0 + fcvt.s.d $ft2, $ft2 + vextrins.w $vr10, $vr12, 16 + vreplvei.d $vr12, $vr11, 0 + fcvt.s.d $ft4, $ft4 + vextrins.w $vr10, $vr12, 32 + vreplvei.d $vr11, $vr11, 1 + fcvt.s.d $ft3, $ft3 + vextrins.w $vr10, $vr11, 48 addi.d $a0, $a0, 16 - vstx $vr7, $a1, $t5 + vstx $vr10, $a1, $s2 bnez $a0, .LBB0_46 # %bb.47: # in Loop: Header=BB0_1 Depth=1 ori $a1, $zero, 1020 @@ -991,84 +974,83 @@ srad_kernel: # @srad_kernel # in Loop: Header=BB0_1 Depth=1 slli.d $a0, $a1, 2 addi.d $a1, $a1, -1023 - move $t7, $s6 - move $t4, $fp - move $t2, $t8 - ld.d $a7, $sp, 48 # 8-byte Folded Reload + move $t3, $t8 + move $t2, $t7 + ld.d $a7, $sp, 56 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_49: # %.preheader623 # Parent Loop BB0_1 Depth=1 # => This Inner Loop Header: Depth=2 add.d $t1, $a7, $a0 - fld.s $fa7, $t1, -4 - fldx.s $ft0, $t2, $a0 - fldx.s $ft1, $t4, $a0 - fldx.s $ft2, $a7, $a0 - fmul.s $ft0, $fa7, $ft0 - fldx.s $ft3, $a4, $a0 - fmul.s $ft1, $fa7, $ft1 - fadd.s $ft0, $ft0, $ft1 - fldx.s $ft1, $t6, $a0 - fmul.s $fa7, $fa7, $ft3 - fldx.s $ft3, $t7, $a0 - fadd.s $fa7, $ft0, $fa7 - fmul.s $ft0, $ft2, $ft1 - fadd.s $fa7, $fa7, $ft0 - fcvt.d.s $ft0, $ft3 - fcvt.d.s $fa7, $fa7 - fmul.d $fa7, $fa7, $fa5 - fadd.d $fa7, $fa7, $ft0 - fcvt.s.d $fa7, $fa7 - fstx.s $fa7, $t7, $a0 + fld.s $ft2, $t1, -4 + fldx.s $ft3, $t2, $a0 + fldx.s $ft4, $t3, $a0 + fldx.s $ft5, $a7, $a0 + fmul.s $ft3, $ft2, $ft3 + fldx.s $ft6, $a4, $a0 + fmul.s $ft4, $ft2, $ft4 + fadd.s $ft3, $ft3, $ft4 + fldx.s $ft4, $t4, $a0 + fmul.s $ft2, $ft2, $ft6 + fldx.s $ft6, $t6, $a0 + fadd.s $ft2, $ft3, $ft2 + fmul.s $ft3, $ft5, $ft4 + fadd.s $ft2, $ft2, $ft3 + fcvt.d.s $ft3, $ft6 + fcvt.d.s $ft2, $ft2 + fmul.d $ft2, $ft2, $ft0 + fadd.d $ft2, $ft2, $ft3 + fcvt.s.d $ft2, $ft2 + fstx.s $ft2, $t6, $a0 addi.d $a7, $a7, 4 addi.d $t2, $t2, 4 - addi.d $t4, $t4, 4 + addi.d $t3, $t3, 4 addi.d $a4, $a4, 4 - addi.d $t6, $t6, 4 + addi.d $t4, $t4, 4 addi.d $a1, $a1, 1 - addi.d $t7, $t7, 4 + addi.d $t6, $t6, 4 bnez $a1, .LBB0_49 # %bb.50: # in Loop: Header=BB0_1 Depth=1 - ld.d $a0, $sp, 208 # 8-byte Folded Reload - fld.s $fa7, $a0, 0 + ld.d $a0, $sp, 216 # 8-byte Folded Reload + fld.s $ft2, $a0, 0 ori $a1, $zero, 4092 - fldx.s $ft0, $t8, $a1 - fldx.s $ft1, $fp, $a1 - move $s3, $zero - fmul.s $ft0, $fa7, $ft0 + fldx.s $ft3, $t7, $a1 + fldx.s $ft4, $t8, $a1 + move $s4, $zero + fmul.s $ft3, $ft2, $ft3 + ld.d $a0, $sp, 240 # 8-byte Folded Reload + fldx.s $ft5, $a0, $a1 + fmul.s $ft4, $ft2, $ft4 + fadd.s $ft3, $ft3, $ft4 ld.d $a0, $sp, 232 # 8-byte Folded Reload - fldx.s $ft2, $a0, $a1 - fmul.s $ft1, $fa7, $ft1 - fadd.s $ft0, $ft0, $ft1 + fldx.s $ft4, $a0, $a1 + fmul.s $ft5, $ft2, $ft5 ld.d $a0, $sp, 224 # 8-byte Folded Reload - fldx.s $ft1, $a0, $a1 - fmul.s $ft2, $fa7, $ft2 - ld.d $a0, $sp, 216 # 8-byte Folded Reload ld.d $a1, $sp, 272 # 8-byte Folded Reload - fldx.s $ft3, $a0, $a1 - fadd.s $ft0, $ft0, $ft2 - fmul.s $fa7, $fa7, $ft1 - fadd.s $fa7, $ft0, $fa7 - fcvt.d.s $ft0, $ft3 - fcvt.d.s $fa7, $fa7 - fmul.d $fa7, $fa7, $fa5 - fadd.d $fa7, $fa7, $ft0 - fcvt.s.d $fa7, $fa7 - fstx.s $fa7, $a0, $a1 - move $s2, $a6 - move $a7, $s8 - move $t1, $s7 - move $t2, $a2 - move $t4, $a3 - move $t6, $a5 - ld.d $t7, $sp, 184 # 8-byte Folded Reload + fldx.s $ft6, $a0, $a1 + fadd.s $ft3, $ft3, $ft5 + fmul.s $ft2, $ft2, $ft4 + fadd.s $ft2, $ft3, $ft2 + fcvt.d.s $ft3, $ft6 + fcvt.d.s $ft2, $ft2 + fmul.d $ft2, $ft2, $ft0 + fadd.d $ft2, $ft2, $ft3 + fcvt.s.d $ft2, $ft2 + fstx.s $ft2, $a0, $a1 + move $s3, $a6 + move $t2, $s8 + move $a7, $s7 + move $t1, $a2 + move $t3, $a3 + move $t4, $a5 + ld.d $t6, $sp, 192 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_51: # %.preheader # Parent Loop BB0_1 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB0_54 Depth 3 # Child Loop BB0_57 Depth 3 - ld.d $a0, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload beqz $a0, .LBB0_53 # %bb.52: # in Loop: Header=BB0_51 Depth=2 move $a1, $zero @@ -1076,70 +1058,70 @@ srad_kernel: # @srad_kernel .p2align 4, , 16 .LBB0_53: # %vector.body.preheader # in Loop: Header=BB0_51 Depth=2 - ori $a4, $t3, 16 + ori $a4, $t5, 16 .p2align 4, , 16 .LBB0_54: # %vector.body # Parent Loop BB0_1 Depth=1 # Parent Loop BB0_51 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $a0, $t7, $a4 - vld $vr7, $a0, -16 - vldx $vr8, $a0, $t5 - add.d $a1, $t1, $a4 - vldx $vr9, $a1, $t5 + add.d $a0, $t6, $a4 + vld $vr10, $a0, -16 + vldx $vr11, $a0, $s2 add.d $a1, $a7, $a4 - vldx $vr10, $a1, $t5 + vldx $vr12, $a1, $s2 add.d $a1, $t2, $a4 - vldx $vr11, $a1, $t5 - vfmul.s $vr8, $vr8, $vr9 - vld $vr9, $a0, -12 - vfmul.s $vr10, $vr7, $vr10 - vfadd.s $vr8, $vr10, $vr8 - vfmul.s $vr7, $vr7, $vr11 + vldx $vr13, $a1, $s2 + add.d $a1, $t1, $a4 + vldx $vr14, $a1, $s2 + vfmul.s $vr11, $vr11, $vr12 + vld $vr12, $a0, -12 + vfmul.s $vr13, $vr10, $vr13 + vfadd.s $vr11, $vr13, $vr11 + vfmul.s $vr10, $vr10, $vr14 + add.d $a0, $t3, $a4 + vldx $vr13, $a0, $s2 add.d $a0, $t4, $a4 - vldx $vr10, $a0, $t5 - add.d $a0, $t6, $a4 - vldx $vr11, $a0, $t5 - vfadd.s $vr7, $vr8, $vr7 - vfmul.s $vr8, $vr9, $vr10 - vfadd.s $vr7, $vr7, $vr8 - vreplvei.w $vr8, $vr11, 1 - fcvt.d.s $ft0, $ft0 - vreplvei.w $vr9, $vr11, 0 - fcvt.d.s $ft1, $ft1 - vextrins.d $vr9, $vr8, 16 - vreplvei.w $vr8, $vr11, 3 - fcvt.d.s $ft0, $ft0 - vreplvei.w $vr10, $vr11, 2 - fcvt.d.s $ft2, $ft2 - vextrins.d $vr10, $vr8, 16 - vreplvei.w $vr8, $vr7, 3 - fcvt.d.s $ft0, $ft0 - vreplvei.w $vr11, $vr7, 2 + vldx $vr14, $a0, $s2 + vfadd.s $vr10, $vr11, $vr10 + vfmul.s $vr11, $vr12, $vr13 + vfadd.s $vr10, $vr10, $vr11 + vreplvei.w $vr11, $vr14, 1 + fcvt.d.s $ft3, $ft3 + vreplvei.w $vr12, $vr14, 0 + fcvt.d.s $ft4, $ft4 + vextrins.d $vr12, $vr11, 16 + vreplvei.w $vr11, $vr14, 3 + fcvt.d.s $ft3, $ft3 + vreplvei.w $vr13, $vr14, 2 + fcvt.d.s $ft5, $ft5 + vextrins.d $vr13, $vr11, 16 + vreplvei.w $vr11, $vr10, 3 + fcvt.d.s $ft3, $ft3 + vreplvei.w $vr14, $vr10, 2 + fcvt.d.s $ft6, $ft6 + vextrins.d $vr14, $vr11, 16 + vreplvei.w $vr11, $vr10, 1 fcvt.d.s $ft3, $ft3 - vextrins.d $vr11, $vr8, 16 - vreplvei.w $vr8, $vr7, 1 - fcvt.d.s $ft0, $ft0 - vreplvei.w $vr7, $vr7, 0 - fcvt.d.s $fa7, $fa7 - vextrins.d $vr7, $vr8, 16 - vfmul.d $vr7, $vr7, $vr6 - vfmul.d $vr8, $vr11, $vr6 - vfadd.d $vr8, $vr8, $vr10 - vfadd.d $vr7, $vr7, $vr9 - vreplvei.d $vr9, $vr7, 1 - fcvt.s.d $ft1, $ft1 - vreplvei.d $vr7, $vr7, 0 - fcvt.s.d $fa7, $fa7 - vextrins.w $vr7, $vr9, 16 - vreplvei.d $vr9, $vr8, 0 - fcvt.s.d $ft1, $ft1 - vextrins.w $vr7, $vr9, 32 - vreplvei.d $vr8, $vr8, 1 - fcvt.s.d $ft0, $ft0 - vextrins.w $vr7, $vr8, 48 + vreplvei.w $vr10, $vr10, 0 + fcvt.d.s $ft2, $ft2 + vextrins.d $vr10, $vr11, 16 + vfmul.d $vr10, $vr10, $vr9 + vfmul.d $vr11, $vr14, $vr9 + vfadd.d $vr11, $vr11, $vr13 + vfadd.d $vr10, $vr10, $vr12 + vreplvei.d $vr12, $vr10, 1 + fcvt.s.d $ft4, $ft4 + vreplvei.d $vr10, $vr10, 0 + fcvt.s.d $ft2, $ft2 + vextrins.w $vr10, $vr12, 16 + vreplvei.d $vr12, $vr11, 0 + fcvt.s.d $ft4, $ft4 + vextrins.w $vr10, $vr12, 32 + vreplvei.d $vr11, $vr11, 1 + fcvt.s.d $ft3, $ft3 + vextrins.w $vr10, $vr11, 48 addi.d $a4, $a4, 16 - vstx $vr7, $a0, $t5 + vstx $vr10, $a0, $s2 bnez $a4, .LBB0_54 # %bb.55: # in Loop: Header=BB0_51 Depth=2 ori $a1, $zero, 1020 @@ -1147,70 +1129,68 @@ srad_kernel: # @srad_kernel # in Loop: Header=BB0_51 Depth=2 slli.d $a0, $a1, 2 addi.d $a1, $a1, -1023 - move $s5, $t6 - move $s4, $t4 - move $a4, $t2 - move $t8, $t1 - move $fp, $a7 - move $s0, $s2 + move $s5, $t4 + move $a4, $t3 + move $t7, $t1 + move $t8, $a7 + move $fp, $t2 + move $s0, $s3 .p2align 4, , 16 .LBB0_57: # %scalar.ph # Parent Loop BB0_1 Depth=1 # Parent Loop BB0_51 Depth=2 # => This Inner Loop Header: Depth=3 add.d $s1, $s0, $a0 - fldx.s $fa7, $s1, $t0 - fldx.s $ft0, $t8, $a0 - fldx.s $ft1, $s0, $a0 - fldx.s $ft2, $fp, $a0 - fld.s $ft3, $s1, 4 - fmul.s $fa7, $fa7, $ft0 - fldx.s $ft0, $a4, $a0 - fmul.s $ft2, $ft1, $ft2 - fadd.s $fa7, $ft2, $fa7 - fldx.s $ft2, $s4, $a0 - fmul.s $ft0, $ft1, $ft0 - fldx.s $ft1, $s5, $a0 - fadd.s $fa7, $fa7, $ft0 - fmul.s $ft0, $ft3, $ft2 - fadd.s $fa7, $fa7, $ft0 - fcvt.d.s $ft0, $ft1 - fcvt.d.s $fa7, $fa7 - fmul.d $fa7, $fa7, $fa5 - fadd.d $fa7, $fa7, $ft0 - fcvt.s.d $fa7, $fa7 - fstx.s $fa7, $s5, $a0 + fldx.s $ft2, $s1, $t0 + fldx.s $ft3, $t8, $a0 + fldx.s $ft4, $s0, $a0 + fldx.s $ft5, $fp, $a0 + fld.s $ft6, $s1, 4 + fmul.s $ft2, $ft2, $ft3 + fldx.s $ft3, $t7, $a0 + fmul.s $ft5, $ft4, $ft5 + fadd.s $ft2, $ft5, $ft2 + fldx.s $ft5, $a4, $a0 + fmul.s $ft3, $ft4, $ft3 + fldx.s $ft4, $s5, $a0 + fadd.s $ft2, $ft2, $ft3 + fmul.s $ft3, $ft6, $ft5 + fadd.s $ft2, $ft2, $ft3 + fcvt.d.s $ft3, $ft4 + fcvt.d.s $ft2, $ft2 + fmul.d $ft2, $ft2, $ft0 + fadd.d $ft2, $ft2, $ft3 + fcvt.s.d $ft2, $ft2 + fstx.s $ft2, $s5, $a0 addi.d $s0, $s0, 4 addi.d $fp, $fp, 4 addi.d $t8, $t8, 4 + addi.d $t7, $t7, 4 addi.d $a4, $a4, 4 - addi.d $s4, $s4, 4 addi.d $a1, $a1, 1 addi.d $s5, $s5, 4 bnez $a1, .LBB0_57 # %bb.58: # in Loop: Header=BB0_51 Depth=2 - addi.d $s3, $s3, 1 - add.d $t7, $t7, $t0 + addi.d $s4, $s4, 1 add.d $t6, $t6, $t0 add.d $t4, $t4, $t0 - add.d $t2, $t2, $t0 + add.d $t3, $t3, $t0 add.d $t1, $t1, $t0 add.d $a7, $a7, $t0 - add.d $s2, $s2, $t0 + add.d $t2, $t2, $t0 + add.d $s3, $s3, $t0 ori $a0, $zero, 2047 - bne $s3, $a0, .LBB0_51 + bne $s4, $a0, .LBB0_51 # %bb.59: # in Loop: Header=BB0_1 Depth=1 - ld.d $s1, $sp, 176 # 8-byte Folded Reload + ld.d $s1, $sp, 184 # 8-byte Folded Reload addi.w $s1, $s1, 1 - ld.d $t6, $sp, 200 # 8-byte Folded Reload - ld.d $s2, $sp, 192 # 8-byte Folded Reload - ld.d $t7, $sp, 264 # 8-byte Folded Reload + ld.d $t6, $sp, 208 # 8-byte Folded Reload + ld.d $s3, $sp, 200 # 8-byte Folded Reload + ld.d $s4, $sp, 240 # 8-byte Folded Reload ori $t1, $zero, 512 ori $t2, $zero, 128 - ori $t4, $zero, 4092 + ori $t3, $zero, 4092 ori $a0, $zero, 10 - lu52i.d $s0, $zero, 1022 - lu52i.d $s3, $zero, -1029 bne $s1, $a0, .LBB0_1 # %bb.60: ld.d $s8, $sp, 280 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/SciMark2-C/CMakeFiles/scimark2.dir/FFT.s b/results/MultiSource/Benchmarks/SciMark2-C/CMakeFiles/scimark2.dir/FFT.s index d508a195..d550e1f0 100644 --- a/results/MultiSource/Benchmarks/SciMark2-C/CMakeFiles/scimark2.dir/FFT.s +++ b/results/MultiSource/Benchmarks/SciMark2-C/CMakeFiles/scimark2.dir/FFT.s @@ -116,12 +116,7 @@ FFT_transform: # @FFT_transform .Lfunc_end2: .size FFT_transform, .Lfunc_end2-FFT_transform # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function FFT_transform_internal -.LCPI3_0: - .dword 0x400921fb54442d18 # double 3.1415926535897931 - .text - .p2align 5 + .p2align 5 # -- Begin function FFT_transform_internal .type FFT_transform_internal,@function FFT_transform_internal: # @FFT_transform_internal # %bb.0: @@ -211,10 +206,13 @@ FFT_transform_internal: # @FFT_transform_internal blez $s1, .LBB3_17 # %bb.14: # %.lr.ph114 movgr2fr.w $fa0, $a2 - pcalau12i $a1, %pc_hi20(.LCPI3_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI3_0) ffint.d.w $fa0, $fa0 fadd.d $fa0, $fa0, $fa0 + lu12i.w $a1, 345154 + ori $a1, $a1, 3352 + lu32i.d $a1, -450053 + lu52i.d $a1, $a1, 1024 + movgr2fr.d $fa1, $a1 ori $s2, $zero, 1 fmul.d $fs2, $fa0, $fa1 bge $s2, $a0, .LBB3_16 diff --git a/results/MultiSource/Benchmarks/SciMark2-C/CMakeFiles/scimark2.dir/kernel.s b/results/MultiSource/Benchmarks/SciMark2-C/CMakeFiles/scimark2.dir/kernel.s index cecfa955..2c6afe70 100644 --- a/results/MultiSource/Benchmarks/SciMark2-C/CMakeFiles/scimark2.dir/kernel.s +++ b/results/MultiSource/Benchmarks/SciMark2-C/CMakeFiles/scimark2.dir/kernel.s @@ -1,12 +1,6 @@ .file "kernel.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function kernel_measureFFT -.LCPI0_0: - .dword 0x40c0000000000000 # double 8192 -.LCPI0_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 .text - .globl kernel_measureFFT + .globl kernel_measureFFT # -- Begin function kernel_measureFFT .p2align 5 .type kernel_measureFFT,@function kernel_measureFFT: # @kernel_measureFFT @@ -716,15 +710,18 @@ kernel_measureFFT: # @kernel_measureFFT move $a0, $s1 pcaddu18i $ra, %call36(FFT_num_flops) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) + lu52i.d $a0, $zero, 1036 + movgr2fr.d $fa1, $a0 fmul.d $fs0, $fa0, $fa1 move $a0, $s0 pcaddu18i $ra, %call36(Stopwatch_read) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_1) fdiv.d $fa0, $fs0, $fa0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa1, $a0 fmul.d $fs0, $fa0, $fa1 move $a0, $s0 pcaddu18i $ra, %call36(Stopwatch_delete) @@ -745,12 +742,7 @@ kernel_measureFFT: # @kernel_measureFFT .Lfunc_end0: .size kernel_measureFFT, .Lfunc_end0-kernel_measureFFT # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function kernel_measureSOR -.LCPI1_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl kernel_measureSOR + .globl kernel_measureSOR # -- Begin function kernel_measureSOR .p2align 5 .type kernel_measureSOR,@function kernel_measureSOR: # @kernel_measureSOR @@ -962,9 +954,12 @@ kernel_measureSOR: # @kernel_measureSOR move $a0, $s1 pcaddu18i $ra, %call36(Stopwatch_read) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_0) fdiv.d $fa0, $fs0, $fa0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa1, $a0 fmul.d $fs0, $fa0, $fa1 move $a0, $s1 pcaddu18i $ra, %call36(Stopwatch_delete) @@ -986,12 +981,7 @@ kernel_measureSOR: # @kernel_measureSOR .Lfunc_end1: .size kernel_measureSOR, .Lfunc_end1-kernel_measureSOR # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function kernel_measureMonteCarlo -.LCPI2_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl kernel_measureMonteCarlo + .globl kernel_measureMonteCarlo # -- Begin function kernel_measureMonteCarlo .p2align 5 .type kernel_measureMonteCarlo,@function kernel_measureMonteCarlo: # @kernel_measureMonteCarlo @@ -1135,9 +1125,12 @@ kernel_measureMonteCarlo: # @kernel_measureMonteCarlo move $a0, $fp pcaddu18i $ra, %call36(Stopwatch_read) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_0) fdiv.d $fa0, $fs0, $fa0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa1, $a0 fmul.d $fs0, $fa0, $fa1 move $a0, $fp pcaddu18i $ra, %call36(Stopwatch_delete) @@ -1158,10 +1151,6 @@ kernel_measureMonteCarlo: # @kernel_measureMonteCarlo .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI3_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 .text .globl kernel_measureSparseMatMult .p2align 5 @@ -1554,9 +1543,12 @@ kernel_measureSparseMatMult: # @kernel_measureSparseMatMult move $a0, $s6 pcaddu18i $ra, %call36(Stopwatch_read) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_1) fdiv.d $fa0, $fs0, $fa0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa1, $a0 fmul.d $fs0, $fa0, $fa1 move $a0, $s6 pcaddu18i $ra, %call36(Stopwatch_delete) @@ -1593,14 +1585,7 @@ kernel_measureSparseMatMult: # @kernel_measureSparseMatMult .Lfunc_end3: .size kernel_measureSparseMatMult, .Lfunc_end3-kernel_measureSparseMatMult # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function kernel_measureLU -.LCPI4_0: - .dword 0x40c0000000000000 # double 8192 -.LCPI4_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl kernel_measureLU + .globl kernel_measureLU # -- Begin function kernel_measureLU .p2align 5 .type kernel_measureLU,@function kernel_measureLU: # @kernel_measureLU @@ -2261,15 +2246,18 @@ kernel_measureLU: # @kernel_measureLU move $a0, $fp pcaddu18i $ra, %call36(LU_num_flops) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_0) + lu52i.d $a0, $zero, 1036 + movgr2fr.d $fa1, $a0 fmul.d $fs0, $fa0, $fa1 move $a0, $s1 pcaddu18i $ra, %call36(Stopwatch_read) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI4_1) fdiv.d $fa0, $fs0, $fa0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa1, $a0 fmul.d $fs0, $fa0, $fa1 move $a0, $s1 pcaddu18i $ra, %call36(Stopwatch_delete) diff --git a/results/MultiSource/Benchmarks/SciMark2-C/CMakeFiles/scimark2.dir/scimark2.s b/results/MultiSource/Benchmarks/SciMark2-C/CMakeFiles/scimark2.dir/scimark2.s index e285f58e..7b30034d 100644 --- a/results/MultiSource/Benchmarks/SciMark2-C/CMakeFiles/scimark2.dir/scimark2.s +++ b/results/MultiSource/Benchmarks/SciMark2-C/CMakeFiles/scimark2.dir/scimark2.s @@ -1,10 +1,6 @@ .file "scimark2.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x416312d000000000 # double 1.0E+7 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -126,20 +122,20 @@ main: # @main move $a1, $fp pcaddu18i $ra, %call36(kernel_measureFFT) jirl $ra, $ra, 0 - fmov.d $fs1, $fa0 + fmov.d $fs2, $fa0 move $a0, $s0 vld $vr0, $sp, 16 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 move $a1, $fp pcaddu18i $ra, %call36(kernel_measureSOR) jirl $ra, $ra, 0 - fmov.d $fs2, $fa0 + fmov.d $fs3, $fa0 vld $vr0, $sp, 16 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 move $a0, $fp pcaddu18i $ra, %call36(kernel_measureMonteCarlo) jirl $ra, $ra, 0 - fmov.d $fs3, $fa0 + fmov.d $fs4, $fa0 move $a0, $s1 move $a1, $s2 vld $vr0, $sp, 16 # 16-byte Folded Reload @@ -147,7 +143,7 @@ main: # @main move $a2, $fp pcaddu18i $ra, %call36(kernel_measureSparseMatMult) jirl $ra, $ra, 0 - fmov.d $fs4, $fa0 + fmov.d $fs1, $fa0 move $a0, $s0 vld $vr0, $sp, 16 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 killed $vr0 @@ -155,9 +151,9 @@ main: # @main pcaddu18i $ra, %call36(kernel_measureLU) jirl $ra, $ra, 0 fmov.d $fs0, $fa0 - fadd.d $fa0, $fs1, $fs2 - fadd.d $fa0, $fa0, $fs3 + fadd.d $fa0, $fs2, $fs3 fadd.d $fa0, $fa0, $fs4 + fadd.d $fa0, $fa0, $fs1 fadd.d $fa0, $fa0, $fs0 vldi $vr1, -1004 fdiv.d $fs5, $fa0, $fa1 @@ -165,22 +161,24 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.Lstr) pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs6, $a0, %pc_lo12(.LCPI0_0) + ori $a0, $zero, 0 + lu32i.d $a0, 201424 + lu52i.d $a0, $a0, 1046 + movgr2fr.d $fs6, $a0 fdiv.d $fa0, $fs5, $fs6 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.6) addi.d $a0, $a0, %pc_lo12(.L.str.6) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - fdiv.d $fa0, $fs1, $fs6 + fdiv.d $fa0, $fs2, $fs6 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.7) addi.d $a0, $a0, %pc_lo12(.L.str.7) move $a2, $s3 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - fdiv.d $fa0, $fs2, $fs6 + fdiv.d $fa0, $fs3, $fs6 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.8) addi.d $a0, $a0, %pc_lo12(.L.str.8) @@ -188,13 +186,13 @@ main: # @main move $a3, $s0 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - fdiv.d $fa0, $fs3, $fs6 + fdiv.d $fa0, $fs4, $fs6 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.9) addi.d $a0, $a0, %pc_lo12(.L.str.9) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - fdiv.d $fa0, $fs4, $fs6 + fdiv.d $fa0, $fs1, $fs6 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.10) addi.d $a0, $a0, %pc_lo12(.L.str.10) diff --git a/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-dbl/CMakeFiles/CrossingThresholds-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-dbl/CMakeFiles/CrossingThresholds-dbl.dir/tsc.s index 29f439c7..ead0e4d2 100644 --- a/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-dbl/CMakeFiles/CrossingThresholds-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-dbl/CMakeFiles/CrossingThresholds-dbl.dir/tsc.s @@ -17123,12 +17123,7 @@ s291: # @s291 .Lfunc_end8: .size s291, .Lfunc_end8-s291 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function s292 -.LCPI9_0: - .dword 0x3fd54fdf3b645a1d # double 0.33300000000000002 - .text - .globl s292 + .globl s292 # -- Begin function s292 .p2align 5 .type s292,@function s292: # @s292 @@ -17167,8 +17162,11 @@ s292: # @s292 lu12i.w $a0, 62 ori $a1, $a0, 2080 add.d $s0, $fp, $a1 - pcalau12i $a1, %pc_hi20(.LCPI9_0) - fld.d $fs1, $a1, %pc_lo12(.LCPI9_0) + lu12i.w $a1, 243269 + ori $a1, $a1, 2589 + lu32i.d $a1, 348127 + lu52i.d $a1, $a1, 1021 + movgr2fr.d $fs1, $a1 ori $s8, $a0, 2048 lu12i.w $a0, 125 ori $a0, $a0, 64 diff --git a/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-flt/CMakeFiles/CrossingThresholds-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-flt/CMakeFiles/CrossingThresholds-flt.dir/tsc.s index 14863a8b..9d10ed3f 100644 --- a/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-flt/CMakeFiles/CrossingThresholds-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/CrossingThresholds-flt/CMakeFiles/CrossingThresholds-flt.dir/tsc.s @@ -12327,12 +12327,7 @@ s291: # @s291 .Lfunc_end8: .size s291, .Lfunc_end8-s291 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function s292 -.LCPI9_0: - .word 0x3eaa7efa # float 0.333000004 - .text - .globl s292 + .globl s292 # -- Begin function s292 .p2align 5 .type s292,@function s292: # @s292 @@ -12371,8 +12366,9 @@ s292: # @s292 lu12i.w $a0, 31 ori $a1, $a0, 1040 add.d $s0, $fp, $a1 - pcalau12i $a1, %pc_hi20(.LCPI9_0) - fld.s $fs1, $a1, %pc_lo12(.LCPI9_0) + lu12i.w $a1, 256679 + ori $a1, $a1, 3834 + movgr2fr.w $fs1, $a1 ori $s8, $a0, 1024 lu12i.w $a0, 62 ori $a0, $a0, 2096 diff --git a/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s index 0305a254..15c28129 100644 --- a/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Reductions-dbl/CMakeFiles/Reductions-dbl.dir/tsc.s @@ -17688,12 +17688,7 @@ s316: # @s316 .Lfunc_end13: .size s316, .Lfunc_end13-s316 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function s317 -.LCPI14_0: - .dword 0x3fefae147ae147ae # double 0.98999999999999999 - .text - .globl s317 + .globl s317 # -- Begin function s317 .p2align 5 .type s317,@function s317: # @s317 @@ -17720,10 +17715,13 @@ s317: # @s317 ld.w $a0, $a0, %pc_lo12(ntimes) blez $a0, .LBB14_5 # %bb.1: # %.preheader.preheader - pcalau12i $a0, %pc_hi20(.LCPI14_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI14_0) lu12i.w $a0, 3 ori $s8, $a0, 3712 + lu12i.w $a0, 503316 + ori $a0, $a0, 1966 + lu32i.d $a0, -20972 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs0, $a0 pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) move $s7, $zero @@ -18268,12 +18266,7 @@ s3110: # @s3110 .Lfunc_end17: .size s3110, .Lfunc_end17-s3110 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function s13110 -.LCPI18_0: - .dword 0x7ff8000000000000 # double NaN - .text - .globl s13110 + .globl s13110 # -- Begin function s13110 .p2align 5 .type s13110,@function s13110: # @s13110 @@ -18390,8 +18383,10 @@ s13110: # @s13110 fadd.d $fs0, $fa1, $fa0 b .LBB18_9 .LBB18_8: - pcalau12i $a0, %pc_hi20(.LCPI18_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI18_0) + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 2047 + movgr2fr.d $fs0, $a0 .LBB18_9: # %._crit_edge pcalau12i $a0, %pc_hi20(.L.str.148) addi.d $a0, $a0, %pc_lo12(.L.str.148) @@ -19436,12 +19431,7 @@ set: # @set .Lfunc_end23: .size set, .Lfunc_end23-set # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI24_0: - .dword 0x3fefae147ae147ae # double 0.98999999999999999 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -19619,11 +19609,14 @@ main: # @main ld.w $a0, $a0, %pc_lo12(ntimes) blez $a0, .LBB24_15 # %bb.11: # %.preheader.i4.preheader - move $s8, $zero - pcalau12i $a0, %pc_hi20(.LCPI24_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI24_0) + move $s7, $zero + lu12i.w $a0, 503316 + ori $a0, $a0, 1966 + lu32i.d $a0, -20972 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 3 - ori $s7, $a0, 3712 + ori $s8, $a0, 3712 lu12i.w $a0, 62 ori $a0, $a0, 2080 add.d $s0, $fp, $a0 @@ -19650,7 +19643,7 @@ main: # @main # =>This Loop Header: Depth=1 # Child Loop BB24_13 Depth 2 vldi $vr0, -912 - move $a0, $s7 + move $a0, $s8 .p2align 4, , 16 .LBB24_13: # Parent Loop BB24_12 Depth=1 # => This Inner Loop Header: Depth=2 @@ -19672,9 +19665,9 @@ main: # @main jirl $ra, $ra, 0 ld.d $a0, $sp, 48 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(ntimes) - addi.w $s8, $s8, 1 + addi.w $s7, $s7, 1 alsl.w $a0, $a0, $a0, 2 - blt $s8, $a0, .LBB24_12 + blt $s7, $a0, .LBB24_12 b .LBB24_16 .LBB24_15: # implicit-def: $f0_64 diff --git a/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s b/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s index e5a21ceb..1a67d581 100644 --- a/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s +++ b/results/MultiSource/Benchmarks/TSVC/Reductions-flt/CMakeFiles/Reductions-flt.dir/tsc.s @@ -12895,12 +12895,7 @@ s316: # @s316 .Lfunc_end13: .size s316, .Lfunc_end13-s316 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function s317 -.LCPI14_0: - .word 0x3f7d70a4 # float 0.990000009 - .text - .globl s317 + .globl s317 # -- Begin function s317 .p2align 5 .type s317,@function s317: # @s317 @@ -12927,10 +12922,11 @@ s317: # @s317 ld.w $a0, $a0, %pc_lo12(ntimes) blez $a0, .LBB14_5 # %bb.1: # %.preheader.preheader - pcalau12i $a0, %pc_hi20(.LCPI14_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI14_0) lu12i.w $a0, 3 ori $s8, $a0, 3712 + lu12i.w $a0, 260055 + ori $a0, $a0, 164 + movgr2fr.w $fs0, $a0 pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) move $s7, $zero @@ -13475,12 +13471,7 @@ s3110: # @s3110 .Lfunc_end17: .size s3110, .Lfunc_end17-s3110 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function s13110 -.LCPI18_0: - .word 0x7fc00000 # float NaN - .text - .globl s13110 + .globl s13110 # -- Begin function s13110 .p2align 5 .type s13110,@function s13110: # @s13110 @@ -13593,8 +13584,8 @@ s13110: # @s13110 fadd.s $fs0, $fa1, $fa0 b .LBB18_9 .LBB18_8: - pcalau12i $a0, %pc_hi20(.LCPI18_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI18_0) + lu12i.w $a0, 523264 + movgr2fr.w $fs0, $a0 .LBB18_9: # %._crit_edge pcalau12i $a0, %pc_hi20(.L.str.148) addi.d $a0, $a0, %pc_lo12(.L.str.148) @@ -14451,12 +14442,7 @@ set: # @set .Lfunc_end23: .size set, .Lfunc_end23-set # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI24_0: - .word 0x3f7d70a4 # float 0.990000009 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -14547,10 +14533,11 @@ main: # @main blez $a0, .LBB24_9 # %bb.5: # %.preheader.i.preheader st.d $s3, $sp, 24 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI24_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI24_0) lu12i.w $a0, 3 ori $s7, $a0, 3712 + lu12i.w $a0, 260055 + ori $a0, $a0, 164 + movgr2fr.w $fs0, $a0 pcalau12i $a0, %pc_hi20(global_data) addi.d $fp, $a0, %pc_lo12(global_data) move $s8, $zero diff --git a/results/MultiSource/Benchmarks/VersaBench/beamformer/CMakeFiles/beamformer.dir/beamformer.s b/results/MultiSource/Benchmarks/VersaBench/beamformer/CMakeFiles/beamformer.dir/beamformer.s index 415d59ea..cd9bd1ea 100644 --- a/results/MultiSource/Benchmarks/VersaBench/beamformer/CMakeFiles/beamformer.dir/beamformer.s +++ b/results/MultiSource/Benchmarks/VersaBench/beamformer/CMakeFiles/beamformer.dir/beamformer.s @@ -2661,10 +2661,6 @@ begin_StrictFP: # @begin_StrictFP .LCPI2_1: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI2_2: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 .text .globl begin .p2align 5 @@ -2852,7 +2848,7 @@ begin: # @begin ori $a0, $a0, 2440 add.d $a1, $sp, $a0 ori $a0, $zero, 1 - ori $s5, $zero, 1 + ori $s6, $zero, 1 pcaddu18i $ra, %call36(BeamFormWeights) jirl $ra, $ra, 0 lu12i.w $a0, 208 @@ -2943,83 +2939,88 @@ begin: # @begin ori $a0, $a1, 4 lu12i.w $a2, 184 ori $a2, $a2, 2728 - add.d $a7, $sp, $a2 - st.d $a0, $sp, 208 # 8-byte Folded Spill - add.d $a0, $a7, $a0 + add.d $a2, $sp, $a2 + st.d $a0, $sp, 120 # 8-byte Folded Spill + add.d $a0, $a2, $a0 st.d $a0, $sp, 192 # 8-byte Folded Spill - lu12i.w $s3, 2 + lu12i.w $s4, 2 ld.d $a0, $sp, 216 # 8-byte Folded Reload - add.d $a2, $a0, $s3 - st.d $a2, $sp, 184 # 8-byte Folded Spill - lu12i.w $a2, 4 - add.d $a3, $a0, $a2 - st.d $a3, $sp, 176 # 8-byte Folded Spill + add.d $a3, $a0, $s4 + st.d $a3, $sp, 184 # 8-byte Folded Spill + lu12i.w $a3, 4 + add.d $a4, $a0, $a3 + st.d $a4, $sp, 176 # 8-byte Folded Spill add.d $a0, $a0, $a1 st.d $a0, $sp, 168 # 8-byte Folded Spill - add.d $a0, $s8, $s3 + add.d $a0, $s8, $s4 st.d $a0, $sp, 160 # 8-byte Folded Spill - add.d $a0, $s8, $a2 + add.d $a0, $s8, $a3 st.d $a0, $sp, 152 # 8-byte Folded Spill add.d $a0, $s8, $a1 st.d $a0, $sp, 144 # 8-byte Folded Spill - ori $a0, $s3, 4 - st.d $a0, $sp, 136 # 8-byte Folded Spill - add.d $a0, $a7, $a0 - st.d $a0, $sp, 128 # 8-byte Folded Spill + ori $a1, $s4, 4 lu12i.w $a0, 12 ori $a0, $a0, 8 - lu12i.w $a1, 16 - ori $a1, $a1, 2728 - add.d $a1, $sp, $a1 - add.d $a0, $a1, $a0 - st.d $a0, $sp, 96 # 8-byte Folded Spill + lu12i.w $a4, 16 + ori $a4, $a4, 2728 + add.d $a4, $sp, $a4 + add.d $a0, $a4, $a0 + st.d $a0, $sp, 88 # 8-byte Folded Spill lu12i.w $a0, 24 ori $a0, $a0, 8 - add.d $a0, $a1, $a0 - st.d $a0, $sp, 88 # 8-byte Folded Spill + add.d $a0, $a4, $a0 + st.d $a0, $sp, 80 # 8-byte Folded Spill lu12i.w $a0, 36 ori $a0, $a0, 8 - add.d $a0, $a1, $a0 - st.d $a0, $sp, 80 # 8-byte Folded Spill - addu16i.d $a0, $a1, 3 - addi.d $a0, $a0, 8 + add.d $a0, $a4, $a0 st.d $a0, $sp, 72 # 8-byte Folded Spill + addu16i.d $a0, $a4, 3 + addi.d $a0, $a0, 8 + st.d $a0, $sp, 64 # 8-byte Folded Spill lu12i.w $a0, 60 ori $a0, $a0, 8 - add.d $a0, $a1, $a0 - st.d $a0, $sp, 64 # 8-byte Folded Spill + add.d $a0, $a4, $a0 + st.d $a0, $sp, 56 # 8-byte Folded Spill lu12i.w $a0, 72 ori $a0, $a0, 8 - add.d $a0, $a1, $a0 - st.d $a0, $sp, 56 # 8-byte Folded Spill + add.d $a0, $a4, $a0 + st.d $a0, $sp, 48 # 8-byte Folded Spill lu12i.w $a0, 84 ori $a0, $a0, 8 - add.d $a0, $a1, $a0 - st.d $a0, $sp, 48 # 8-byte Folded Spill - addu16i.d $a0, $a1, 6 - addi.d $a0, $a0, 8 + add.d $a0, $a4, $a0 st.d $a0, $sp, 40 # 8-byte Folded Spill + addu16i.d $a0, $a4, 6 + addi.d $a0, $a0, 8 + st.d $a0, $sp, 32 # 8-byte Folded Spill lu12i.w $a0, 108 ori $a0, $a0, 8 - add.d $a0, $a1, $a0 - st.d $a0, $sp, 32 # 8-byte Folded Spill + add.d $a0, $a4, $a0 + st.d $a0, $sp, 24 # 8-byte Folded Spill lu12i.w $a0, 120 ori $a0, $a0, 8 - add.d $a0, $a1, $a0 - st.d $a0, $sp, 24 # 8-byte Folded Spill + add.d $a0, $a4, $a0 + st.d $a0, $sp, 16 # 8-byte Folded Spill lu12i.w $a0, 132 ori $a0, $a0, 8 - add.d $a0, $a1, $a0 - st.d $a0, $sp, 16 # 8-byte Folded Spill + add.d $a0, $a4, $a0 + st.d $a0, $sp, 8 # 8-byte Folded Spill lu12i.w $a0, 268288 lu32i.d $a0, -524288 lu52i.d $a0, $a0, 1048 - st.d $a0, $sp, 120 # 8-byte Folded Spill - ori $a0, $a2, 4 st.d $a0, $sp, 112 # 8-byte Folded Spill - add.d $a0, $a7, $a0 + lu12i.w $a0, -487882 + ori $a0, $a0, 2289 + lu32i.d $a0, 325813 + lu52i.d $a0, $a0, 1006 + st.d $a0, $sp, 136 # 8-byte Folded Spill + st.d $a1, $sp, 208 # 8-byte Folded Spill + add.d $a0, $a2, $a1 st.d $a0, $sp, 104 # 8-byte Folded Spill - ori $t0, $zero, 1024 + ori $a0, $a3, 4 + st.d $a0, $sp, 128 # 8-byte Folded Spill + add.d $a0, $a2, $a0 + st.d $a0, $sp, 96 # 8-byte Folded Spill + ori $a7, $zero, 1024 st.d $s8, $sp, 200 # 8-byte Folded Spill .p2align 4, , 16 .LBB2_3: # %.preheader119 @@ -3082,22 +3083,22 @@ begin: # @begin blez $a0, .LBB2_191 .LBB2_5: # %.preheader115.preheader # in Loop: Header=BB2_3 Depth=1 - move $s2, $zero - move $s4, $zero + move $s3, $zero + move $s5, $zero vldi $vr7, -1168 b .LBB2_7 .p2align 4, , 16 .LBB2_6: # in Loop: Header=BB2_7 Depth=2 st.w $a1, $fp, 8 st.w $a0, $fp, 4 - ld.d $s4, $sp, 272 # 8-byte Folded Reload - addi.d $s4, $s4, 1 - addi.d $s2, $s2, 1 + ld.d $s5, $sp, 272 # 8-byte Folded Reload + addi.d $s5, $s5, 1 + addi.d $s3, $s3, 1 ld.d $s8, $sp, 200 # 8-byte Folded Reload - ori $s5, $zero, 1 - lu12i.w $s3, 2 + ori $s6, $zero, 1 + lu12i.w $s4, 2 ori $a0, $zero, 12 - beq $s4, $a0, .LBB2_146 + beq $s5, $a0, .LBB2_146 .LBB2_7: # %.preheader115 # Parent Loop BB2_3 Depth=1 # => This Loop Header: Depth=2 @@ -3131,7 +3132,7 @@ begin: # @begin # Child Loop BB2_141 Depth 4 ld.d $a2, $sp, 208 # 8-byte Folded Reload move $fp, $zero - bne $s4, $s5, .LBB2_14 + bne $s5, $s6, .LBB2_14 # %bb.8: # %.lr.ph.split.i.us.preheader # in Loop: Header=BB2_7 Depth=2 ori $s0, $zero, 1024 @@ -3139,15 +3140,15 @@ begin: # @begin ori $a0, $a0, 2732 add.d $s1, $sp, $a0 ld.d $a0, $sp, 216 # 8-byte Folded Reload - ld.d $s3, $sp, 120 # 8-byte Folded Reload - ori $a1, $zero, 768 + ld.d $s2, $sp, 112 # 8-byte Folded Reload + ori $s4, $zero, 768 b .LBB2_10 .p2align 4, , 16 .LBB2_9: # in Loop: Header=BB2_10 Depth=3 - lu12i.w $a2, 185 - ori $a2, $a2, 680 - add.d $a2, $sp, $a2 - stptr.d $s3, $a2, 0 + lu12i.w $a1, 185 + ori $a1, $a1, 680 + add.d $a1, $sp, $a1 + stptr.d $s2, $a1, 0 addi.w $fp, $fp, 1 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 @@ -3156,7 +3157,7 @@ begin: # @begin # Parent Loop BB2_3 Depth=1 # Parent Loop BB2_7 Depth=2 # => This Inner Loop Header: Depth=3 - beq $s0, $a1, .LBB2_9 + beq $s0, $s4, .LBB2_9 # %bb.11: # in Loop: Header=BB2_10 Depth=3 movgr2fr.w $fa0, $fp ffint.d.w $fa1, $fa0 @@ -3181,20 +3182,19 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 - ori $a1, $zero, 768 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 ld.d $a0, $sp, 216 # 8-byte Folded Reload b .LBB2_12 .p2align 4, , 16 .LBB2_14: # %.lr.ph.split.us.i.preheader.preheader # in Loop: Header=BB2_7 Depth=2 lu12i.w $s0, -2 - lu12i.w $a1, 6 - ld.d $s1, $sp, 136 # 8-byte Folded Reload + lu12i.w $s1, 6 + ld.d $s2, $sp, 120 # 8-byte Folded Reload + lu12i.w $a0, 184 + ori $a0, $a0, 2728 + add.d $a1, $sp, $a0 .p2align 4, , 16 .LBB2_15: # %.lr.ph.split.us.i # Parent Loop BB2_3 Depth=1 @@ -3209,13 +3209,13 @@ begin: # @begin # in Loop: Header=BB2_15 Depth=3 fcvt.s.d $fa0, $fa0 fneg.s $fa1, $fa0 - add.d $a0, $a7, $s0 - fstx.s $fa1, $a0, $s3 + add.d $a0, $a1, $s0 + fstx.s $fa1, $a0, $s4 fadd.s $fa0, $fa0, $fa7 fneg.s $fa0, $fa0 - fstx.s $fa0, $a0, $s1 + fstx.s $fa0, $a0, $a2 addi.d $s0, $s0, 8 - add.w $fp, $fp, $s2 + add.w $fp, $fp, $s3 bnez $s0, .LBB2_15 b .LBB2_24 .LBB2_17: # %call.sqrt880 @@ -3223,27 +3223,26 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 + ori $a7, $zero, 1024 vldi $vr7, -1168 + ld.d $a2, $sp, 208 # 8-byte Folded Reload lu12i.w $a0, 184 ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 - ld.d $a2, $sp, 208 # 8-byte Folded Reload - lu12i.w $a1, 6 + add.d $a1, $sp, $a0 b .LBB2_16 .p2align 4, , 16 .LBB2_18: # %.lr.ph.split.i.us.1.preheader # in Loop: Header=BB2_7 Depth=2 move $fp, $zero ori $s0, $zero, 1024 - ld.d $s1, $sp, 128 # 8-byte Folded Reload + ld.d $s1, $sp, 104 # 8-byte Folded Reload b .LBB2_20 .p2align 4, , 16 .LBB2_19: # in Loop: Header=BB2_20 Depth=3 - lu12i.w $a2, 187 - ori $a2, $a2, 680 - add.d $a2, $sp, $a2 - stptr.d $s3, $a2, 0 + lu12i.w $a1, 187 + ori $a1, $a1, 680 + add.d $a1, $sp, $a1 + stptr.d $s2, $a1, 0 addi.w $fp, $fp, 1 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 @@ -3252,7 +3251,7 @@ begin: # @begin # Parent Loop BB2_3 Depth=1 # Parent Loop BB2_7 Depth=2 # => This Inner Loop Header: Depth=3 - beq $s0, $a1, .LBB2_19 + beq $s0, $s4, .LBB2_19 # %bb.21: # in Loop: Header=BB2_20 Depth=3 movgr2fr.w $fa0, $fp ffint.d.w $fa1, $fa0 @@ -3277,12 +3276,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 - ori $a1, $zero, 768 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 ld.d $a0, $sp, 216 # 8-byte Folded Reload b .LBB2_22 .p2align 4, , 16 @@ -3290,8 +3285,8 @@ begin: # @begin # in Loop: Header=BB2_7 Depth=2 move $fp, $zero lu12i.w $s0, -2 - lu12i.w $s1, 4 - ld.d $s3, $sp, 112 # 8-byte Folded Reload + lu12i.w $s4, 4 + ld.d $a2, $sp, 128 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_25: # %.lr.ph.split.us.i.1 # Parent Loop BB2_3 Depth=1 @@ -3306,13 +3301,13 @@ begin: # @begin # in Loop: Header=BB2_25 Depth=3 fcvt.s.d $fa0, $fa0 fneg.s $fa1, $fa0 - add.d $a0, $a7, $s0 - fstx.s $fa1, $a0, $s1 + add.d $a0, $a1, $s0 + fstx.s $fa1, $a0, $s4 fadd.s $fa0, $fa0, $fa7 fneg.s $fa0, $fa0 - fstx.s $fa0, $a0, $s3 + fstx.s $fa0, $a0, $a2 addi.d $s0, $s0, 8 - add.w $fp, $fp, $s2 + add.w $fp, $fp, $s3 bnez $s0, .LBB2_25 b .LBB2_34 .LBB2_27: # %call.sqrt881 @@ -3320,27 +3315,26 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 + ori $a7, $zero, 1024 + ld.d $a2, $sp, 128 # 8-byte Folded Reload vldi $vr7, -1168 lu12i.w $a0, 184 ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 - ld.d $a2, $sp, 208 # 8-byte Folded Reload - lu12i.w $a1, 6 + add.d $a1, $sp, $a0 b .LBB2_26 .p2align 4, , 16 .LBB2_28: # %.lr.ph.split.i.us.2.preheader # in Loop: Header=BB2_7 Depth=2 move $fp, $zero ori $s0, $zero, 1024 - ld.d $s1, $sp, 104 # 8-byte Folded Reload + ld.d $s1, $sp, 96 # 8-byte Folded Reload b .LBB2_30 .p2align 4, , 16 .LBB2_29: # in Loop: Header=BB2_30 Depth=3 - lu12i.w $a2, 189 - ori $a2, $a2, 680 - add.d $a2, $sp, $a2 - stptr.d $s3, $a2, 0 + lu12i.w $a1, 189 + ori $a1, $a1, 680 + add.d $a1, $sp, $a1 + stptr.d $s2, $a1, 0 addi.w $fp, $fp, 1 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 @@ -3349,7 +3343,7 @@ begin: # @begin # Parent Loop BB2_3 Depth=1 # Parent Loop BB2_7 Depth=2 # => This Inner Loop Header: Depth=3 - beq $s0, $a1, .LBB2_29 + beq $s0, $s4, .LBB2_29 # %bb.31: # in Loop: Header=BB2_30 Depth=3 movgr2fr.w $fa0, $fp ffint.d.w $fa1, $fa0 @@ -3374,12 +3368,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 - ori $a1, $zero, 768 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 ld.d $a0, $sp, 216 # 8-byte Folded Reload b .LBB2_32 .p2align 4, , 16 @@ -3401,13 +3391,13 @@ begin: # @begin # in Loop: Header=BB2_35 Depth=3 fcvt.s.d $fa0, $fa0 fneg.s $fa1, $fa0 - add.d $a0, $a7, $s0 - fstx.s $fa1, $a0, $a1 + add.d $a0, $a1, $s0 + fstx.s $fa1, $a0, $s1 fadd.s $fa0, $fa0, $fa7 fneg.s $fa0, $fa0 - fstx.s $fa0, $a0, $a2 + fstx.s $fa0, $a0, $s2 addi.d $s0, $s0, 8 - add.w $fp, $fp, $s2 + add.w $fp, $fp, $s3 bnez $s0, .LBB2_35 b .LBB2_44 .LBB2_37: # %call.sqrt882 @@ -3415,13 +3405,11 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 + ori $a7, $zero, 1024 vldi $vr7, -1168 lu12i.w $a0, 184 ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 - ld.d $a2, $sp, 208 # 8-byte Folded Reload - lu12i.w $a1, 6 + add.d $a1, $sp, $a0 b .LBB2_36 .p2align 4, , 16 .LBB2_38: # %.lr.ph.split.i.us.3.preheader @@ -3432,10 +3420,10 @@ begin: # @begin b .LBB2_40 .p2align 4, , 16 .LBB2_39: # in Loop: Header=BB2_40 Depth=3 - lu12i.w $a2, 191 - ori $a2, $a2, 680 - add.d $a2, $sp, $a2 - stptr.d $s3, $a2, 0 + lu12i.w $a1, 191 + ori $a1, $a1, 680 + add.d $a1, $sp, $a1 + stptr.d $s2, $a1, 0 addi.w $fp, $fp, 1 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 @@ -3444,7 +3432,7 @@ begin: # @begin # Parent Loop BB2_3 Depth=1 # Parent Loop BB2_7 Depth=2 # => This Inner Loop Header: Depth=3 - beq $s0, $a1, .LBB2_39 + beq $s0, $s4, .LBB2_39 # %bb.41: # in Loop: Header=BB2_40 Depth=3 movgr2fr.w $fa0, $fp ffint.d.w $fa1, $fa0 @@ -3469,12 +3457,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 - ori $a1, $zero, 768 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 ld.d $a0, $sp, 216 # 8-byte Folded Reload b .LBB2_42 .p2align 4, , 16 @@ -3504,7 +3488,7 @@ begin: # @begin fst.s $fa0, $s1, 0 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 - add.w $fp, $fp, $s2 + add.w $fp, $fp, $s3 bnez $s0, .LBB2_45 b .LBB2_54 .LBB2_47: # %call.sqrt883 @@ -3512,11 +3496,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 ld.d $a0, $sp, 216 # 8-byte Folded Reload b .LBB2_46 .p2align 4, , 16 @@ -3528,7 +3509,7 @@ begin: # @begin b .LBB2_50 .p2align 4, , 16 .LBB2_49: # in Loop: Header=BB2_50 Depth=3 - st.d $s3, $a0, 2044 + st.d $s2, $a0, 2044 addi.w $fp, $fp, 1 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 @@ -3537,7 +3518,7 @@ begin: # @begin # Parent Loop BB2_3 Depth=1 # Parent Loop BB2_7 Depth=2 # => This Inner Loop Header: Depth=3 - beq $s0, $a1, .LBB2_49 + beq $s0, $s4, .LBB2_49 # %bb.51: # in Loop: Header=BB2_50 Depth=3 movgr2fr.w $fa0, $fp ffint.d.w $fa1, $fa0 @@ -3562,12 +3543,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 - ori $a1, $zero, 768 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 ld.d $a0, $sp, 216 # 8-byte Folded Reload b .LBB2_52 .p2align 4, , 16 @@ -3596,7 +3573,7 @@ begin: # @begin fst.s $fa0, $s1, 0 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 - add.w $fp, $fp, $s2 + add.w $fp, $fp, $s3 bnez $s0, .LBB2_55 b .LBB2_64 .LBB2_57: # %call.sqrt884 @@ -3604,11 +3581,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 b .LBB2_56 .p2align 4, , 16 .LBB2_58: # %.lr.ph.split.i.us.5.preheader @@ -3619,7 +3593,7 @@ begin: # @begin b .LBB2_60 .p2align 4, , 16 .LBB2_59: # in Loop: Header=BB2_60 Depth=3 - stptr.d $s3, $a0, 10236 + stptr.d $s2, $a0, 10236 addi.w $fp, $fp, 1 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 @@ -3628,7 +3602,7 @@ begin: # @begin # Parent Loop BB2_3 Depth=1 # Parent Loop BB2_7 Depth=2 # => This Inner Loop Header: Depth=3 - beq $s0, $a1, .LBB2_59 + beq $s0, $s4, .LBB2_59 # %bb.61: # in Loop: Header=BB2_60 Depth=3 movgr2fr.w $fa0, $fp ffint.d.w $fa1, $fa0 @@ -3653,12 +3627,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 - ori $a1, $zero, 768 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 ld.d $a0, $sp, 216 # 8-byte Folded Reload b .LBB2_62 .p2align 4, , 16 @@ -3687,7 +3657,7 @@ begin: # @begin fst.s $fa0, $s1, 0 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 - add.w $fp, $fp, $s2 + add.w $fp, $fp, $s3 bnez $s0, .LBB2_65 b .LBB2_74 .LBB2_67: # %call.sqrt885 @@ -3695,11 +3665,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 b .LBB2_66 .p2align 4, , 16 .LBB2_68: # %.lr.ph.split.i.us.6.preheader @@ -3710,7 +3677,7 @@ begin: # @begin b .LBB2_70 .p2align 4, , 16 .LBB2_69: # in Loop: Header=BB2_70 Depth=3 - stptr.d $s3, $a0, 18428 + stptr.d $s2, $a0, 18428 addi.w $fp, $fp, 1 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 @@ -3719,7 +3686,7 @@ begin: # @begin # Parent Loop BB2_3 Depth=1 # Parent Loop BB2_7 Depth=2 # => This Inner Loop Header: Depth=3 - beq $s0, $a1, .LBB2_69 + beq $s0, $s4, .LBB2_69 # %bb.71: # in Loop: Header=BB2_70 Depth=3 movgr2fr.w $fa0, $fp ffint.d.w $fa1, $fa0 @@ -3744,12 +3711,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 - ori $a1, $zero, 768 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 ld.d $a0, $sp, 216 # 8-byte Folded Reload b .LBB2_72 .p2align 4, , 16 @@ -3778,7 +3741,7 @@ begin: # @begin fst.s $fa0, $s1, 0 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 - add.w $fp, $fp, $s2 + add.w $fp, $fp, $s3 bnez $s0, .LBB2_75 b .LBB2_84 .LBB2_77: # %call.sqrt886 @@ -3786,11 +3749,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 b .LBB2_76 .p2align 4, , 16 .LBB2_78: # %.lr.ph.split.i.us.7.preheader @@ -3801,7 +3761,7 @@ begin: # @begin b .LBB2_80 .p2align 4, , 16 .LBB2_79: # in Loop: Header=BB2_80 Depth=3 - stptr.d $s3, $a0, 26620 + stptr.d $s2, $a0, 26620 addi.w $fp, $fp, 1 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 @@ -3810,7 +3770,7 @@ begin: # @begin # Parent Loop BB2_3 Depth=1 # Parent Loop BB2_7 Depth=2 # => This Inner Loop Header: Depth=3 - beq $s0, $a1, .LBB2_79 + beq $s0, $s4, .LBB2_79 # %bb.81: # in Loop: Header=BB2_80 Depth=3 movgr2fr.w $fa0, $fp ffint.d.w $fa1, $fa0 @@ -3835,12 +3795,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 - ori $a1, $zero, 768 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 ld.d $a0, $sp, 216 # 8-byte Folded Reload b .LBB2_82 .p2align 4, , 16 @@ -3869,7 +3825,7 @@ begin: # @begin fst.s $fa0, $s1, 0 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 - add.w $fp, $fp, $s2 + add.w $fp, $fp, $s3 bnez $s0, .LBB2_85 b .LBB2_94 .LBB2_87: # %call.sqrt887 @@ -3877,11 +3833,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 b .LBB2_86 .p2align 4, , 16 .LBB2_88: # %.lr.ph.split.i.us.8.preheader @@ -3892,7 +3845,7 @@ begin: # @begin b .LBB2_90 .p2align 4, , 16 .LBB2_89: # in Loop: Header=BB2_90 Depth=3 - st.d $s3, $s8, 2044 + st.d $s2, $s8, 2044 addi.w $fp, $fp, 1 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 @@ -3901,7 +3854,7 @@ begin: # @begin # Parent Loop BB2_3 Depth=1 # Parent Loop BB2_7 Depth=2 # => This Inner Loop Header: Depth=3 - beq $s0, $a1, .LBB2_89 + beq $s0, $s4, .LBB2_89 # %bb.91: # in Loop: Header=BB2_90 Depth=3 movgr2fr.w $fa0, $fp ffint.d.w $fa1, $fa0 @@ -3926,12 +3879,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 - ori $a1, $zero, 768 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 b .LBB2_92 .p2align 4, , 16 .LBB2_94: # %.lr.ph.split.us.i.8.preheader @@ -3959,7 +3908,7 @@ begin: # @begin fst.s $fa0, $s1, 0 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 - add.w $fp, $fp, $s2 + add.w $fp, $fp, $s3 bnez $s0, .LBB2_95 b .LBB2_104 .LBB2_97: # %call.sqrt888 @@ -3967,11 +3916,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 b .LBB2_96 .p2align 4, , 16 .LBB2_98: # %.lr.ph.split.i.us.9.preheader @@ -3982,7 +3928,7 @@ begin: # @begin b .LBB2_100 .p2align 4, , 16 .LBB2_99: # in Loop: Header=BB2_100 Depth=3 - stptr.d $s3, $s8, 10236 + stptr.d $s2, $s8, 10236 addi.w $fp, $fp, 1 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 @@ -3991,7 +3937,7 @@ begin: # @begin # Parent Loop BB2_3 Depth=1 # Parent Loop BB2_7 Depth=2 # => This Inner Loop Header: Depth=3 - beq $s0, $a1, .LBB2_99 + beq $s0, $s4, .LBB2_99 # %bb.101: # in Loop: Header=BB2_100 Depth=3 movgr2fr.w $fa0, $fp ffint.d.w $fa1, $fa0 @@ -4016,12 +3962,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 - ori $a1, $zero, 768 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 b .LBB2_102 .p2align 4, , 16 .LBB2_104: # %.lr.ph.split.us.i.9.preheader @@ -4049,7 +3991,7 @@ begin: # @begin fst.s $fa0, $s1, 0 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 - add.w $fp, $fp, $s2 + add.w $fp, $fp, $s3 bnez $s0, .LBB2_105 b .LBB2_114 .LBB2_107: # %call.sqrt889 @@ -4057,11 +3999,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 b .LBB2_106 .p2align 4, , 16 .LBB2_108: # %.lr.ph.split.i.us.10.preheader @@ -4072,7 +4011,7 @@ begin: # @begin b .LBB2_110 .p2align 4, , 16 .LBB2_109: # in Loop: Header=BB2_110 Depth=3 - stptr.d $s3, $s8, 18428 + stptr.d $s2, $s8, 18428 addi.w $fp, $fp, 1 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 @@ -4081,7 +4020,7 @@ begin: # @begin # Parent Loop BB2_3 Depth=1 # Parent Loop BB2_7 Depth=2 # => This Inner Loop Header: Depth=3 - beq $s0, $a1, .LBB2_109 + beq $s0, $s4, .LBB2_109 # %bb.111: # in Loop: Header=BB2_110 Depth=3 movgr2fr.w $fa0, $fp ffint.d.w $fa1, $fa0 @@ -4106,12 +4045,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 - ori $a1, $zero, 768 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 b .LBB2_112 .p2align 4, , 16 .LBB2_114: # %.lr.ph.split.us.i.10.preheader @@ -4139,7 +4074,7 @@ begin: # @begin fst.s $fa0, $s1, 0 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 - add.w $fp, $fp, $s2 + add.w $fp, $fp, $s3 bnez $s0, .LBB2_115 b .LBB2_124 .LBB2_117: # %call.sqrt890 @@ -4147,11 +4082,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 b .LBB2_116 .p2align 4, , 16 .LBB2_118: # %.lr.ph.split.i.us.11.preheader @@ -4162,7 +4094,7 @@ begin: # @begin b .LBB2_120 .p2align 4, , 16 .LBB2_119: # in Loop: Header=BB2_120 Depth=3 - stptr.d $s3, $s8, 26620 + stptr.d $s2, $s8, 26620 addi.w $fp, $fp, 1 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 @@ -4171,7 +4103,7 @@ begin: # @begin # Parent Loop BB2_3 Depth=1 # Parent Loop BB2_7 Depth=2 # => This Inner Loop Header: Depth=3 - beq $s0, $a1, .LBB2_119 + beq $s0, $s4, .LBB2_119 # %bb.121: # in Loop: Header=BB2_120 Depth=3 movgr2fr.w $fa0, $fp ffint.d.w $fa1, $fa0 @@ -4196,12 +4128,8 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 - ori $a1, $zero, 768 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 b .LBB2_122 .p2align 4, , 16 .LBB2_124: # %.lr.ph.split.us.i.11.preheader @@ -4229,7 +4157,7 @@ begin: # @begin fst.s $fa0, $s1, 0 addi.d $s0, $s0, -1 addi.d $s1, $s1, 8 - add.w $fp, $fp, $s2 + add.w $fp, $fp, $s3 bnez $s0, .LBB2_125 b .LBB2_128 .LBB2_127: # %call.sqrt891 @@ -4237,58 +4165,58 @@ begin: # @begin fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 b .LBB2_126 .p2align 4, , 16 .LBB2_128: # %.preheader114 # in Loop: Header=BB2_7 Depth=2 - st.d $s4, $sp, 272 # 8-byte Folded Spill - slli.d $a1, $s4, 5 + st.d $s5, $sp, 272 # 8-byte Folded Spill + slli.d $a1, $s5, 5 lu12i.w $a0, 208 ori $a0, $a0, 3240 add.d $a0, $sp, $a0 - ldx.w $s3, $a1, $a0 - move $s6, $zero - move $s5, $a1 + ldx.w $s4, $a1, $a0 + move $s0, $zero + move $s6, $a1 add.d $fp, $a0, $a1 - addi.d $s7, $s3, -1 - slli.w $s8, $s3, 1 - ld.d $s0, $fp, 24 + addi.d $s8, $s4, -1 + slli.w $s7, $s4, 1 + ld.d $s1, $fp, 24 ld.w $a1, $fp, 8 ld.w $a0, $fp, 4 ori $a4, $zero, 1 - slt $a2, $a4, $s8 - maskeqz $a3, $s8, $a2 + slt $a2, $a4, $s7 + maskeqz $a3, $s7, $a2 masknez $a2, $a4, $a2 or $a2, $a3, $a2 ld.d $a3, $sp, 248 # 8-byte Folded Reload - alsl.w $s4, $s3, $a3, 1 - slli.d $s1, $a2, 2 + alsl.w $s5, $s4, $a3, 1 + slli.d $s2, $a2, 2 b .LBB2_130 .p2align 4, , 16 .LBB2_129: # %BeamFirFilter.exit # in Loop: Header=BB2_130 Depth=3 - addi.d $s6, $s6, 1 - beq $s6, $t0, .LBB2_137 + addi.d $s0, $s0, 1 + beq $s0, $a7, .LBB2_137 .LBB2_130: # Parent Loop BB2_3 Depth=1 # Parent Loop BB2_7 Depth=2 # => This Loop Header: Depth=3 # Child Loop BB2_132 Depth 4 - alsl.d $a3, $s6, $a7, 3 - slli.d $a2, $s6, 3 - fldx.s $fa0, $a2, $a7 - sub.d $a2, $s7, $a1 + lu12i.w $a2, 184 + ori $a2, $a2, 2728 + add.d $a4, $sp, $a2 + alsl.d $a3, $s0, $a4, 3 + slli.d $a2, $s0, 3 + fldx.s $fa0, $a2, $a4 + sub.d $a2, $s8, $a1 slli.w $a2, $a2, 1 slli.d $a4, $a2, 2 - fstx.s $fa0, $s0, $a4 + fstx.s $fa0, $s1, $a4 fld.s $fa0, $a3, 4 - alsl.d $a3, $a2, $s0, 2 + alsl.d $a3, $a2, $s1, 2 fst.s $fa0, $a3, 4 - blez $s3, .LBB2_133 + blez $s4, .LBB2_133 # %bb.131: # %.lr.ph.i # in Loop: Header=BB2_130 Depth=3 ld.d $a4, $fp, 16 @@ -4301,9 +4229,9 @@ begin: # @begin # Parent Loop BB2_7 Depth=2 # Parent Loop BB2_130 Depth=3 # => This Inner Loop Header: Depth=4 - alsl.d $a5, $a2, $s0, 2 + alsl.d $a5, $a2, $s1, 2 slli.d $a6, $a2, 2 - fldx.s $fa2, $s0, $a6 + fldx.s $fa2, $s1, $a6 fld.s $fa3, $a5, 4 fld.s $fa4, $a4, 0 fld.s $fa5, $a4, -4 @@ -4314,10 +4242,10 @@ begin: # @begin fadd.s $fa1, $fa1, $fa6 fadd.s $fa0, $fa0, $fa2 addi.w $a2, $a2, 2 - and $a2, $a2, $s4 + and $a2, $a2, $s5 addi.d $a3, $a3, 2 addi.d $a4, $a4, 8 - blt $a3, $s8, .LBB2_132 + blt $a3, $s7, .LBB2_132 b .LBB2_134 .p2align 4, , 16 .LBB2_133: # in Loop: Header=BB2_130 Depth=3 @@ -4328,9 +4256,9 @@ begin: # @begin lu12i.w $a2, 160 ori $a2, $a2, 2728 add.d $a2, $sp, $a2 - alsl.d $a2, $s6, $a2, 3 + alsl.d $a2, $s0, $a2, 3 addi.d $a1, $a1, 1 - and $a1, $a1, $s7 + and $a1, $a1, $s8 fst.s $fa1, $a2, 0 fst.s $fa0, $a2, 4 addi.w $a2, $a0, 1 @@ -4338,36 +4266,33 @@ begin: # @begin sltu $a3, $zero, $a0 maskeqz $a0, $a2, $a3 maskeqz $a1, $a1, $a3 - bne $a2, $t0, .LBB2_129 + bne $a2, $a7, .LBB2_129 # %bb.135: # %._crit_edge.i # in Loop: Header=BB2_130 Depth=3 - blez $s3, .LBB2_129 + blez $s4, .LBB2_129 # %bb.136: # %.lr.ph63.preheader.i # in Loop: Header=BB2_130 Depth=3 - move $a0, $s0 + move $a0, $s1 move $a1, $zero - move $a2, $s1 + move $a2, $s2 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 move $a0, $zero move $a1, $zero b .LBB2_129 .p2align 4, , 16 .LBB2_137: # %.preheader113 # in Loop: Header=BB2_7 Depth=2 - move $s3, $zero + move $s4, $zero st.w $a1, $fp, 8 st.w $a0, $fp, 4 lu12i.w $a0, 208 ori $a0, $a0, 2856 add.d $a0, $sp, $a0 - add.d $fp, $a0, $s5 - ldx.w $s6, $s5, $a0 + add.d $fp, $a0, $s6 + ldx.w $s0, $s6, $a0 ld.d $a0, $sp, 272 # 8-byte Folded Reload alsl.d $a0, $a0, $a0, 1 slli.d $a0, $a0, 14 @@ -4375,31 +4300,31 @@ begin: # @begin ori $a1, $a1, 2728 add.d $a1, $sp, $a1 add.d $s7, $a1, $a0 - addi.d $s8, $s6, -1 - slli.w $s4, $s6, 1 - ld.d $s0, $fp, 24 + addi.d $s8, $s0, -1 + slli.w $s5, $s0, 1 + ld.d $s1, $fp, 24 ld.w $a1, $fp, 8 ld.w $a0, $fp, 4 ori $a4, $zero, 1 - slt $a2, $a4, $s4 - maskeqz $a3, $s4, $a2 + slt $a2, $a4, $s5 + maskeqz $a3, $s5, $a2 masknez $a2, $a4, $a2 or $a2, $a3, $a2 ld.d $a3, $sp, 248 # 8-byte Folded Reload - alsl.w $s5, $s6, $a3, 1 - slli.d $s1, $a2, 2 + alsl.w $s6, $s0, $a3, 1 + slli.d $s2, $a2, 2 b .LBB2_139 .p2align 4, , 16 .LBB2_138: # %BeamFirFilter.exit85 # in Loop: Header=BB2_139 Depth=3 - addi.d $s3, $s3, 1 + addi.d $s4, $s4, 1 ori $a2, $zero, 512 - beq $s3, $a2, .LBB2_6 + beq $s4, $a2, .LBB2_6 .LBB2_139: # Parent Loop BB2_3 Depth=1 # Parent Loop BB2_7 Depth=2 # => This Loop Header: Depth=3 # Child Loop BB2_141 Depth 4 - slli.d $a2, $s3, 4 + slli.d $a2, $s4, 4 lu12i.w $a3, 160 ori $a3, $a3, 2728 add.d $a3, $sp, $a3 @@ -4407,8 +4332,8 @@ begin: # @begin sub.d $a2, $s8, $a1 slli.w $a2, $a2, 1 slli.d $a4, $a2, 2 - stx.d $a3, $s0, $a4 - blez $s6, .LBB2_142 + stx.d $a3, $s1, $a4 + blez $s0, .LBB2_142 # %bb.140: # %.lr.ph.i79 # in Loop: Header=BB2_139 Depth=3 ld.d $a4, $fp, 16 @@ -4421,9 +4346,9 @@ begin: # @begin # Parent Loop BB2_7 Depth=2 # Parent Loop BB2_139 Depth=3 # => This Inner Loop Header: Depth=4 - alsl.d $a5, $a2, $s0, 2 + alsl.d $a5, $a2, $s1, 2 slli.d $a6, $a2, 2 - fldx.s $fa2, $s0, $a6 + fldx.s $fa2, $s1, $a6 fld.s $fa3, $a5, 4 fld.s $fa4, $a4, 0 fld.s $fa5, $a4, -4 @@ -4434,10 +4359,10 @@ begin: # @begin fadd.s $fa1, $fa1, $fa6 fadd.s $fa0, $fa0, $fa2 addi.w $a2, $a2, 2 - and $a2, $a2, $s5 + and $a2, $a2, $s6 addi.d $a3, $a3, 2 addi.d $a4, $a4, 8 - blt $a3, $s4, .LBB2_141 + blt $a3, $s5, .LBB2_141 b .LBB2_143 .p2align 4, , 16 .LBB2_142: # in Loop: Header=BB2_139 Depth=3 @@ -4445,7 +4370,7 @@ begin: # @begin fmov.s $fa1, $fa0 .LBB2_143: # %._crit_edge.i74 # in Loop: Header=BB2_139 Depth=3 - alsl.d $a2, $s3, $s7, 3 + alsl.d $a2, $s4, $s7, 3 addi.d $a1, $a1, 1 and $a1, $a1, $s8 fst.s $fa1, $a2, 0 @@ -4455,22 +4380,19 @@ begin: # @begin sltu $a3, $zero, $a0 maskeqz $a0, $a2, $a3 maskeqz $a1, $a1, $a3 - bne $a2, $t0, .LBB2_138 + bne $a2, $a7, .LBB2_138 # %bb.144: # %._crit_edge.i74 # in Loop: Header=BB2_139 Depth=3 - blez $s6, .LBB2_138 + blez $s0, .LBB2_138 # %bb.145: # %.lr.ph63.preheader.i77 # in Loop: Header=BB2_139 Depth=3 - move $a0, $s0 + move $a0, $s1 move $a1, $zero - move $a2, $s1 + move $a2, $s2 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ori $t0, $zero, 1024 + ori $a7, $zero, 1024 vldi $vr7, -1168 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 move $a0, $zero move $a1, $zero b .LBB2_138 @@ -4480,16 +4402,17 @@ begin: # @begin lu12i.w $a0, 4 ori $a0, $a0, 2444 add.d $a0, $sp, $a0 - lu12i.w $t0, -1 - move $a1, $t0 + lu12i.w $a6, -1 + move $a1, $a6 + ld.d $a4, $sp, 88 # 8-byte Folded Reload lu12i.w $a2, 16 ori $a2, $a2, 2736 - add.d $a4, $sp, $a2 + add.d $a5, $sp, $a2 .p2align 4, , 16 .LBB2_147: # %vector.body393 # Parent Loop BB2_3 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $a2, $a4, $a1 + add.d $a2, $a5, $a1 ldptr.d $a3, $a2, 4088 ldptr.d $a2, $a2, 4096 st.d $a3, $a0, -100 @@ -4502,11 +4425,8 @@ begin: # @begin lu12i.w $a0, 4 ori $a0, $a0, 2452 add.d $a0, $sp, $a0 - move $a1, $t0 - ld.d $a4, $sp, 96 # 8-byte Folded Reload - ld.d $a5, $sp, 88 # 8-byte Folded Reload - ld.d $a6, $sp, 80 # 8-byte Folded Reload - ld.d $a7, $sp, 72 # 8-byte Folded Reload + move $a1, $a6 + ld.d $a5, $sp, 8 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_149: # %vector.body387 # Parent Loop BB2_3 Depth=1 @@ -4524,12 +4444,13 @@ begin: # @begin lu12i.w $a0, 4 ori $a0, $a0, 2460 add.d $a0, $sp, $a0 - move $a1, $t0 + move $a1, $a6 + ld.d $a4, $sp, 80 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_151: # %vector.body381 # Parent Loop BB2_3 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $a2, $a5, $a1 + add.d $a2, $a4, $a1 ldptr.d $a3, $a2, 4088 ldptr.d $a2, $a2, 4096 st.d $a3, $a0, -100 @@ -4542,12 +4463,13 @@ begin: # @begin lu12i.w $a0, 4 ori $a0, $a0, 2468 add.d $a0, $sp, $a0 - move $a1, $t0 + move $a1, $a6 + ld.d $a4, $sp, 72 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_153: # %vector.body375 # Parent Loop BB2_3 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $a2, $a6, $a1 + add.d $a2, $a4, $a1 ldptr.d $a3, $a2, 4088 ldptr.d $a2, $a2, 4096 st.d $a3, $a0, -100 @@ -4560,12 +4482,13 @@ begin: # @begin lu12i.w $a0, 4 ori $a0, $a0, 2476 add.d $a0, $sp, $a0 - move $a1, $t0 + move $a1, $a6 + ld.d $a4, $sp, 64 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_155: # %vector.body369 # Parent Loop BB2_3 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $a2, $a7, $a1 + add.d $a2, $a4, $a1 ldptr.d $a3, $a2, 4088 ldptr.d $a2, $a2, 4096 st.d $a3, $a0, -100 @@ -4578,8 +4501,8 @@ begin: # @begin lu12i.w $a0, 4 ori $a0, $a0, 2484 add.d $a0, $sp, $a0 - move $a1, $t0 - ld.d $a4, $sp, 64 # 8-byte Folded Reload + move $a1, $a6 + ld.d $a4, $sp, 56 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_157: # %vector.body363 # Parent Loop BB2_3 Depth=1 @@ -4597,11 +4520,8 @@ begin: # @begin lu12i.w $a0, 4 ori $a0, $a0, 2492 add.d $a0, $sp, $a0 - move $a1, $t0 - ld.d $a4, $sp, 56 # 8-byte Folded Reload - ld.d $a5, $sp, 48 # 8-byte Folded Reload - ld.d $a6, $sp, 40 # 8-byte Folded Reload - ld.d $a7, $sp, 32 # 8-byte Folded Reload + move $a1, $a6 + ld.d $a4, $sp, 48 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_159: # %vector.body357 # Parent Loop BB2_3 Depth=1 @@ -4619,12 +4539,13 @@ begin: # @begin lu12i.w $a0, 4 ori $a0, $a0, 2500 add.d $a0, $sp, $a0 - move $a1, $t0 + move $a1, $a6 + ld.d $a4, $sp, 40 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_161: # %vector.body351 # Parent Loop BB2_3 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $a2, $a5, $a1 + add.d $a2, $a4, $a1 ldptr.d $a3, $a2, 4088 ldptr.d $a2, $a2, 4096 st.d $a3, $a0, -100 @@ -4637,12 +4558,13 @@ begin: # @begin lu12i.w $a0, 4 ori $a0, $a0, 2508 add.d $a0, $sp, $a0 - move $a1, $t0 + move $a1, $a6 + ld.d $a4, $sp, 32 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_163: # %vector.body345 # Parent Loop BB2_3 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $a2, $a6, $a1 + add.d $a2, $a4, $a1 ldptr.d $a3, $a2, 4088 ldptr.d $a2, $a2, 4096 st.d $a3, $a0, -100 @@ -4655,12 +4577,13 @@ begin: # @begin lu12i.w $a0, 4 ori $a0, $a0, 2516 add.d $a0, $sp, $a0 - move $a1, $t0 + move $a1, $a6 + ld.d $a4, $sp, 24 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_165: # %vector.body339 # Parent Loop BB2_3 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $a2, $a7, $a1 + add.d $a2, $a4, $a1 ldptr.d $a3, $a2, 4088 ldptr.d $a2, $a2, 4096 st.d $a3, $a0, -100 @@ -4673,8 +4596,8 @@ begin: # @begin lu12i.w $a0, 4 ori $a0, $a0, 2524 add.d $a0, $sp, $a0 - move $a1, $t0 - ld.d $a4, $sp, 24 # 8-byte Folded Reload + move $a1, $a6 + ld.d $a4, $sp, 16 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_167: # %vector.body333 # Parent Loop BB2_3 Depth=1 @@ -4692,13 +4615,12 @@ begin: # @begin lu12i.w $a0, 4 ori $a0, $a0, 2532 add.d $a0, $sp, $a0 - move $a1, $t0 - ld.d $a4, $sp, 16 # 8-byte Folded Reload + move $a1, $a6 .p2align 4, , 16 .LBB2_169: # %vector.body327 # Parent Loop BB2_3 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $a2, $a4, $a1 + add.d $a2, $a5, $a1 ldptr.d $a3, $a2, 4088 ldptr.d $a2, $a2, 4096 st.d $a3, $a0, -100 @@ -5063,51 +4985,48 @@ begin: # @begin # in Loop: Header=BB2_171 Depth=2 lu12i.w $a0, 3 ori $a0, $a0, 2344 - add.d $s0, $sp, $a0 + add.d $s1, $sp, $a0 st.d $t6, $sp, 272 # 8-byte Folded Spill slli.d $a0, $t6, 5 lu12i.w $a1, 208 ori $a1, $a1, 2728 add.d $a1, $sp, $a1 ldx.w $fp, $a0, $a1 - move $s6, $zero - add.d $s3, $a1, $a0 + move $s0, $zero + add.d $s4, $a1, $a0 addi.d $s7, $fp, -1 - slli.w $s4, $fp, 1 - ld.d $s1, $s3, 24 - ld.w $a1, $s3, 8 - ld.w $a0, $s3, 4 - slt $a2, $s5, $s4 - maskeqz $a3, $s4, $a2 - masknez $a2, $s5, $a2 + slli.w $s5, $fp, 1 + ld.d $s2, $s4, 24 + ld.w $a1, $s4, 8 + ld.w $a0, $s4, 4 + slt $a2, $s6, $s5 + maskeqz $a3, $s5, $a2 + masknez $a2, $s6, $a2 or $a2, $a3, $a2 ld.d $a3, $sp, 248 # 8-byte Folded Reload - alsl.w $s5, $fp, $a3, 1 - slli.d $s2, $a2, 2 - lu12i.w $a2, 2 - ori $a2, $a2, 2344 - add.d $t0, $sp, $a2 + alsl.w $s6, $fp, $a3, 1 + slli.d $s3, $a2, 2 b .LBB2_175 .p2align 4, , 16 .LBB2_174: # %BeamFirFilter.exit97 # in Loop: Header=BB2_175 Depth=3 - addi.d $s6, $s6, 1 + addi.d $s0, $s0, 1 ori $a2, $zero, 512 - beq $s6, $a2, .LBB2_182 + beq $s0, $a2, .LBB2_182 .LBB2_175: # Parent Loop BB2_3 Depth=1 # Parent Loop BB2_171 Depth=2 # => This Loop Header: Depth=3 # Child Loop BB2_177 Depth 4 - slli.d $a2, $s6, 3 - ldx.d $a3, $a2, $s0 + slli.d $a2, $s0, 3 + ldx.d $a3, $a2, $s1 sub.d $a2, $s7, $a1 slli.w $a2, $a2, 1 slli.d $a4, $a2, 2 - stx.d $a3, $s1, $a4 + stx.d $a3, $s2, $a4 blez $fp, .LBB2_178 # %bb.176: # %.lr.ph.i91 # in Loop: Header=BB2_175 Depth=3 - ld.d $a4, $s3, 16 + ld.d $a4, $s4, 16 move $a3, $zero movgr2fr.w $fa1, $zero addi.d $a4, $a4, 4 @@ -5117,9 +5036,9 @@ begin: # @begin # Parent Loop BB2_171 Depth=2 # Parent Loop BB2_175 Depth=3 # => This Inner Loop Header: Depth=4 - alsl.d $a5, $a2, $s1, 2 + alsl.d $a5, $a2, $s2, 2 slli.d $a6, $a2, 2 - fldx.s $fa2, $s1, $a6 + fldx.s $fa2, $s2, $a6 fld.s $fa3, $a5, 4 fld.s $fa4, $a4, 0 fld.s $fa5, $a4, -4 @@ -5130,10 +5049,10 @@ begin: # @begin fadd.s $fa1, $fa1, $fa6 fadd.s $fa0, $fa0, $fa2 addi.w $a2, $a2, 2 - and $a2, $a2, $s5 + and $a2, $a2, $s6 addi.d $a3, $a3, 2 addi.d $a4, $a4, 8 - blt $a3, $s4, .LBB2_177 + blt $a3, $s5, .LBB2_177 b .LBB2_179 .p2align 4, , 16 .LBB2_178: # in Loop: Header=BB2_175 Depth=3 @@ -5141,7 +5060,10 @@ begin: # @begin fmov.s $fa1, $fa0 .LBB2_179: # %._crit_edge.i86 # in Loop: Header=BB2_175 Depth=3 - alsl.d $a2, $s6, $t0, 3 + lu12i.w $a2, 2 + ori $a2, $a2, 2344 + add.d $a2, $sp, $a2 + alsl.d $a2, $s0, $a2, 3 addi.d $a1, $a1, 1 and $a1, $a1, $s7 fst.s $fa1, $a2, 0 @@ -5158,14 +5080,11 @@ begin: # @begin blez $fp, .LBB2_174 # %bb.181: # %.lr.ph63.preheader.i89 # in Loop: Header=BB2_175 Depth=3 - move $a0, $s1 + move $a0, $s2 move $a1, $zero - move $a2, $s2 + move $a2, $s3 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - lu12i.w $a0, 2 - ori $a0, $a0, 2344 - add.d $t0, $sp, $a0 move $a0, $zero move $a1, $zero b .LBB2_174 @@ -5177,13 +5096,16 @@ begin: # @begin ld.d $a2, $sp, 224 # 8-byte Folded Reload vld $vr1, $a2, %pc_lo12(.LCPI2_1) move $a2, $zero - st.w $a1, $s3, 8 - st.w $a0, $s3, 4 + st.w $a1, $s4, 8 + st.w $a0, $s4, 4 lu12i.w $s2, 1 lu12i.w $a0, 2 + ori $a0, $a0, 2344 + add.d $t0, $sp, $a0 + lu12i.w $a0, 2 ori $a0, $a0, 296 add.d $t1, $sp, $a0 - ori $s4, $zero, 2048 + ori $s3, $zero, 2048 .p2align 4, , 16 .LBB2_183: # %vector.body # Parent Loop BB2_3 Depth=1 @@ -5220,7 +5142,7 @@ begin: # @begin vaddi.du $vr1, $vr1, 4 addi.d $a2, $a2, 16 vaddi.du $vr0, $vr0, 4 - bne $a2, $s4, .LBB2_183 + bne $a2, $s3, .LBB2_183 # %bb.184: # %Magnitude.exit # in Loop: Header=BB2_171 Depth=2 lu12i.w $a0, 2 @@ -5232,8 +5154,8 @@ begin: # @begin jirl $ra, $ra, 0 ld.d $t6, $sp, 272 # 8-byte Folded Reload addi.d $t6, $t6, 1 - ori $s5, $zero, 1 - lu12i.w $s3, 2 + ori $s6, $zero, 1 + lu12i.w $s4, 2 ori $a0, $zero, 4 bne $t6, $a0, .LBB2_171 # %bb.185: # %.preheader.preheader @@ -5248,13 +5170,13 @@ begin: # @begin pcalau12i $a0, %pc_hi20(detector_out_StrictFP) addi.d $a0, $a0, %pc_lo12(detector_out_StrictFP) add.d $s1, $a0, $fp - fldx.s $fa1, $s0, $s4 - fldx.s $fa0, $s1, $s4 - pcalau12i $a0, %pc_hi20(.LCPI2_2) - fld.d $fs0, $a0, %pc_lo12(.LCPI2_2) + fldx.s $fa1, $s0, $s3 + fldx.s $fa0, $s1, $s3 fsub.s $fa2, $fa1, $fa0 fabs.s $fa2, $fa2 fcvt.d.s $fa2, $fa2 + ld.d $a0, $sp, 136 # 8-byte Folded Reload + movgr2fr.d $fs0, $a0 fcmp.cule.d $fcc0, $fa2, $fs0 bceqz $fcc0, .LBB2_192 # %bb.187: # %check_FP.exit @@ -5296,8 +5218,8 @@ begin: # @begin addi.d $a0, $a0, %pc_lo12(.L.str.1) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - fldx.s $fa1, $s0, $s3 - fldx.s $fa0, $s1, $s3 + fldx.s $fa1, $s0, $s4 + fldx.s $fa0, $s1, $s4 fsub.s $fa2, $fa1, $fa0 fabs.s $fa2, $fa2 fcvt.d.s $fa2, $fa2 @@ -5312,10 +5234,7 @@ begin: # @begin pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 addi.d $fp, $fp, 4 - lu12i.w $a0, 184 - ori $a0, $a0, 2728 - add.d $a7, $sp, $a0 - ori $t0, $zero, 1024 + ori $a7, $zero, 1024 bnez $fp, .LBB2_186 b .LBB2_3 .LBB2_191: @@ -5353,10 +5272,7 @@ begin: # @begin movfr2gr.d $a3, $fa0 pcalau12i $a1, %pc_hi20(.L.str.2) addi.d $a1, $a1, %pc_lo12(.L.str.2) - lu12i.w $a4, -487882 - ori $a4, $a4, 2289 - lu32i.d $a4, 325813 - lu52i.d $a4, $a4, 1006 + ld.d $a4, $sp, 136 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ori $a0, $zero, 1 diff --git a/results/MultiSource/Benchmarks/VersaBench/dbms/CMakeFiles/dbms.dir/calcMetricsData.s b/results/MultiSource/Benchmarks/VersaBench/dbms/CMakeFiles/dbms.dir/calcMetricsData.s index 476acc3e..6fbe76e2 100644 --- a/results/MultiSource/Benchmarks/VersaBench/dbms/CMakeFiles/dbms.dir/calcMetricsData.s +++ b/results/MultiSource/Benchmarks/VersaBench/dbms/CMakeFiles/dbms.dir/calcMetricsData.s @@ -1,10 +1,6 @@ .file "calcMetricsData.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function calcMetricsData -.LCPI0_0: - .dword 0xc7efffffe091ff3d # double -3.4028234699999998E+38 .text - .globl calcMetricsData + .globl calcMetricsData # -- Begin function calcMetricsData .p2align 5 .type calcMetricsData,@function calcMetricsData: # @calcMetricsData @@ -35,11 +31,10 @@ calcMetricsData: # @calcMetricsData bcnez $fcc0, .LBB0_3 .LBB0_2: lu12i.w $a0, -128737 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_0) ori $a0, $a0, 3901 lu52i.d $a0, $a0, -898 st.d $a0, $fp, 72 + movgr2fr.d $fa0, $a0 ld.d $a0, $fp, 96 fst.d $fa0, $fp, 80 bgtz $a0, .LBB0_5 @@ -68,11 +63,10 @@ calcMetricsData: # @calcMetricsData bcnez $fcc0, .LBB0_7 .LBB0_6: lu12i.w $a0, -128737 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_0) ori $a0, $a0, 3901 lu52i.d $a0, $a0, -898 st.d $a0, $fp, 136 + movgr2fr.d $fa0, $a0 ld.d $a0, $fp, 160 fst.d $fa0, $fp, 144 bgtz $a0, .LBB0_9 @@ -101,11 +95,10 @@ calcMetricsData: # @calcMetricsData bcnez $fcc0, .LBB0_11 .LBB0_10: lu12i.w $a0, -128737 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_0) ori $a0, $a0, 3901 lu52i.d $a0, $a0, -898 st.d $a0, $fp, 200 + movgr2fr.d $fa0, $a0 b .LBB0_12 .LBB0_11: fdiv.d $fa1, $fa1, $fa0 diff --git a/results/MultiSource/Benchmarks/VersaBench/dbms/CMakeFiles/dbms.dir/getFloat.s b/results/MultiSource/Benchmarks/VersaBench/dbms/CMakeFiles/dbms.dir/getFloat.s index be3dae43..d5f00d87 100644 --- a/results/MultiSource/Benchmarks/VersaBench/dbms/CMakeFiles/dbms.dir/getFloat.s +++ b/results/MultiSource/Benchmarks/VersaBench/dbms/CMakeFiles/dbms.dir/getFloat.s @@ -1,18 +1,6 @@ .file "getFloat.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function getFloat -.LCPI0_0: - .word 0xff7fffff # float -3.40282347E+38 -.LCPI0_2: - .word 0x7f7fffff # float 3.40282347E+38 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0xc7efffffe091ff3d # double -3.4028234699999998E+38 -.LCPI0_3: - .dword 0x47efffffe091ff3d # double 3.4028234699999998E+38 .text - .globl getFloat + .globl getFloat # -- Begin function getFloat .p2align 5 .type getFloat,@function getFloat: # @getFloat @@ -25,8 +13,10 @@ getFloat: # @getFloat move $fp, $a1 pcaddu18i $ra, %call36(getString) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.s $fs0, $a1, %pc_lo12(.LCPI0_0) + lu12i.w $a1, -2049 + ori $a1, $a1, 4095 + lu32i.d $a1, 0 + movgr2fr.w $fs0, $a1 beqz $a0, .LBB0_7 # %bb.1: addi.d $a1, $sp, 8 @@ -49,20 +39,23 @@ getFloat: # @getFloat ori $a0, $zero, 3 beq $a1, $a2, .LBB0_8 .LBB0_4: - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_1) fcvt.d.s $fa0, $fs1 + lu12i.w $a0, -128737 + ori $a1, $a0, 3901 + lu52i.d $a0, $a1, -898 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 ori $a0, $zero, 2 bcnez $fcc0, .LBB0_8 # %bb.5: - pcalau12i $a1, %pc_hi20(.LCPI0_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_3) + lu52i.d $a1, $a1, 1150 + movgr2fr.d $fa1, $a1 fcmp.cule.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB0_10 # %bb.6: - pcalau12i $a1, %pc_hi20(.LCPI0_2) - fld.s $fs0, $a1, %pc_lo12(.LCPI0_2) + lu12i.w $a1, 522239 + ori $a1, $a1, 4095 + movgr2fr.w $fs0, $a1 b .LBB0_8 .LBB0_7: ori $a0, $zero, 1 diff --git a/results/MultiSource/Benchmarks/VersaBench/dbms/CMakeFiles/dbms.dir/valid.s b/results/MultiSource/Benchmarks/VersaBench/dbms/CMakeFiles/dbms.dir/valid.s index 37c2e3b0..36ef089e 100644 --- a/results/MultiSource/Benchmarks/VersaBench/dbms/CMakeFiles/dbms.dir/valid.s +++ b/results/MultiSource/Benchmarks/VersaBench/dbms/CMakeFiles/dbms.dir/valid.s @@ -59,12 +59,7 @@ validIndexKey: # @validIndexKey .Lfunc_end0: .size validIndexKey, .Lfunc_end0-validIndexKey # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function validAttributes -.LCPI1_0: - .dword 0x47efffffe091ff3d # double 3.4028234699999998E+38 - .text - .globl validAttributes + .globl validAttributes # -- Begin function validAttributes .p2align 5 .type validAttributes,@function validAttributes: # @validAttributes @@ -81,8 +76,10 @@ validAttributes: # @validAttributes ori $a4, $zero, 7 pcalau12i $a5, %pc_hi20(.L.str.5) addi.d $a5, $a5, %pc_lo12(.L.str.5) - pcalau12i $a6, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a6, %pc_lo12(.LCPI1_0) + lu12i.w $a6, -128737 + ori $a6, $a6, 3901 + lu52i.d $a6, $a6, 1150 + movgr2fr.d $fa0, $a6 pcalau12i $a6, %pc_hi20(.L.str.6) addi.d $a6, $a6, %pc_lo12(.L.str.6) b .LBB1_4 diff --git a/results/MultiSource/Benchmarks/llubenchmark/CMakeFiles/llu.dir/llubenchmark.s b/results/MultiSource/Benchmarks/llubenchmark/CMakeFiles/llu.dir/llubenchmark.s index c359a2b3..a7f381e5 100644 --- a/results/MultiSource/Benchmarks/llubenchmark/CMakeFiles/llu.dir/llubenchmark.s +++ b/results/MultiSource/Benchmarks/llubenchmark/CMakeFiles/llu.dir/llubenchmark.s @@ -60,12 +60,7 @@ allocate: # @allocate .Lfunc_end1: .size allocate, .Lfunc_end1-allocate # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI2_0: - .word 0x3eaa7efa # float 0.333000004 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -90,15 +85,16 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.Lstr.7) pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 + lu12i.w $a0, 256679 ori $a1, $zero, 2 - pcalau12i $a0, %pc_hi20(.LCPI2_0) + ori $a0, $a0, 3834 blt $s3, $a1, .LBB2_13 # %bb.1: # %.lr.ph.preheader ori $a1, $zero, 196 st.d $a1, $sp, 48 # 8-byte Folded Spill ori $a2, $zero, 1 - fld.s $fs0, $a0, %pc_lo12(.LCPI2_0) ori $s0, $zero, 1000 + movgr2fr.w $fs0, $a0 ori $s4, $zero, 45 ori $s5, $zero, 16 pcalau12i $a0, %pc_hi20(.LJTI2_0) @@ -179,11 +175,11 @@ main: # @main move $s2, $a0 b .LBB2_3 .LBB2_13: - fld.s $fs0, $a0, %pc_lo12(.LCPI2_0) ori $s0, $zero, 1000 ori $s2, $zero, 1 - ori $a0, $zero, 196 - st.d $a0, $sp, 48 # 8-byte Folded Spill + ori $a1, $zero, 196 + st.d $a1, $sp, 48 # 8-byte Folded Spill + movgr2fr.w $fs0, $a0 .LBB2_14: # %._crit_edge ld.d $a0, $sp, 48 # 8-byte Folded Reload addi.w $s5, $a0, 0 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Calignm1.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Calignm1.s index 09bef9e9..51fb9d76 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Calignm1.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Calignm1.s @@ -274,16 +274,12 @@ tracking: # @tracking .Lfunc_end0: .size tracking, .Lfunc_end0-tracking # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Calignm1 -.LCPI1_0: - .dword 0x3ff199999999999a # double 1.1000000000000001 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI1_1: + .p2align 4, 0x0 # -- Begin function Calignm1 +.LCPI1_0: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI1_2: +.LCPI1_1: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -475,21 +471,24 @@ Calignm1: # @Calignm1 # %bb.18: # %.thread bge $fp, $a1, .LBB1_28 .LBB1_19: # %.thread370 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) ld.d $a0, $sp, 144 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, 104857 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 slt $a1, $a0, $s5 masknez $a0, $a0, $a1 maskeqz $a1, $s5, $a1 or $a0, $a1, $a0 - movgr2fr.w $fa1, $s6 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $s6 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 slt $a2, $a1, $fp @@ -1787,8 +1786,8 @@ Calignm1: # @Calignm1 move $a6, $zero b .LBB1_136 .LBB1_133: # %vector.ph706 - pcalau12i $a2, %pc_hi20(.LCPI1_1) - vld $vr0, $a2, %pc_lo12(.LCPI1_1) + pcalau12i $a2, %pc_hi20(.LCPI1_0) + vld $vr0, $a2, %pc_lo12(.LCPI1_0) bstrpick.d $a2, $a5, 31, 2 slli.d $a6, $a2, 2 addi.d $a2, $a3, 16 @@ -1842,8 +1841,8 @@ Calignm1: # @Calignm1 .LBB1_141: # %vector.ph719 bstrpick.d $a2, $t0, 31, 3 slli.d $a2, $a2, 3 - pcalau12i $a4, %pc_hi20(.LCPI1_2) - vld $vr0, $a4, %pc_lo12(.LCPI1_2) + pcalau12i $a4, %pc_hi20(.LCPI1_1) + vld $vr0, $a4, %pc_lo12(.LCPI1_1) addi.d $a4, $a0, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Galign11.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Galign11.s index 3c419ca8..fd292387 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Galign11.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Galign11.s @@ -1,14 +1,10 @@ .file "Galign11.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function G__align11 -.LCPI0_0: - .dword 0x3ff4cccccccccccd # double 1.3 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI0_1: + .p2align 4, 0x0 # -- Begin function G__align11 +.LCPI0_0: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI0_2: +.LCPI0_1: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -159,23 +155,26 @@ G__align11: # @G__align11 ld.d $a0, $sp, 40 # 8-byte Folded Reload ld.w $s7, $a0, %pc_lo12(G__align11.orlgth2) .LBB0_9: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) ld.d $a0, $sp, 88 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, 314572 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 slt $a1, $a0, $s5 masknez $a0, $a0, $a1 maskeqz $a1, $s5, $a1 or $s5, $a1, $a0 addi.w $s3, $s5, 100 ld.d $a0, $sp, 144 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 slt $a1, $a0, $s7 @@ -719,8 +718,8 @@ G__align11: # @G__align11 move $a2, $zero b .LBB0_70 .LBB0_67: # %vector.ph311 - pcalau12i $a3, %pc_hi20(.LCPI0_1) - vld $vr0, $a3, %pc_lo12(.LCPI0_1) + pcalau12i $a3, %pc_hi20(.LCPI0_0) + vld $vr0, $a3, %pc_lo12(.LCPI0_0) bstrpick.d $a2, $a2, 31, 2 slli.d $a2, $a2, 2 addi.d $a3, $s6, 16 @@ -775,8 +774,8 @@ G__align11: # @G__align11 .LBB0_75: # %vector.ph324 bstrpick.d $a3, $a3, 31, 3 slli.d $a3, $a3, 3 - pcalau12i $a4, %pc_hi20(.LCPI0_2) - vld $vr0, $a4, %pc_lo12(.LCPI0_2) + pcalau12i $a4, %pc_hi20(.LCPI0_1) + vld $vr0, $a4, %pc_lo12(.LCPI0_1) addi.d $a4, $a1, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 @@ -994,12 +993,7 @@ G__align11: # @G__align11 .Lfunc_end0: .size G__align11, .Lfunc_end0-G__align11 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function G__align11_noalign -.LCPI1_0: - .dword 0x3ff4cccccccccccd # double 1.3 - .text - .globl G__align11_noalign + .globl G__align11_noalign # -- Begin function G__align11_noalign .p2align 5 .type G__align11_noalign,@function G__align11_noalign: # @G__align11_noalign @@ -1104,20 +1098,23 @@ G__align11_noalign: # @G__align11_noalign ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(G__align11_noalign.orlgth2) .LBB1_7: - pcalau12i $a2, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI1_0) - movgr2fr.w $fa1, $s5 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a2, $fa1 + movgr2fr.w $fa0, $s5 + ffint.d.w $fa0, $fa0 + lu12i.w $a2, -209716 + ori $a2, $a2, 3277 + lu32i.d $a2, 314572 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa1, $a2 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a2, $fa0 slt $a3, $a2, $a1 masknez $a2, $a2, $a3 maskeqz $a1, $a1, $a3 or $s3, $a1, $a2 - movgr2fr.w $fa1, $s0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $s0 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 slt $a2, $a1, $a0 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Halignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Halignmm.s index e1a00289..7a9783fc 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Halignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Halignmm.s @@ -429,60 +429,53 @@ imp_match_init_strictH: # @imp_match_init_strictH .Lfunc_end1: .size imp_match_init_strictH, .Lfunc_end1-imp_match_init_strictH # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function H__align -.LCPI2_0: - .dword 0x3ff4cccccccccccd # double 1.3 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI2_1: + .p2align 4, 0x0 # -- Begin function H__align +.LCPI2_0: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI2_3: +.LCPI2_1: .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI2_4: +.LCPI2_2: .dword 1 # 0x1 .dword 2 # 0x2 -.LCPI2_5: +.LCPI2_3: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI2_6: +.LCPI2_4: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI2_2: - .word 0x461c4000 # float 1.0E+4 .text .globl H__align .p2align 5 .type H__align,@function H__align: # @H__align # %bb.0: - addi.d $sp, $sp, -496 - st.d $ra, $sp, 488 # 8-byte Folded Spill - st.d $fp, $sp, 480 # 8-byte Folded Spill - st.d $s0, $sp, 472 # 8-byte Folded Spill - st.d $s1, $sp, 464 # 8-byte Folded Spill - st.d $s2, $sp, 456 # 8-byte Folded Spill - st.d $s3, $sp, 448 # 8-byte Folded Spill - st.d $s4, $sp, 440 # 8-byte Folded Spill - st.d $s5, $sp, 432 # 8-byte Folded Spill - st.d $s6, $sp, 424 # 8-byte Folded Spill - st.d $s7, $sp, 416 # 8-byte Folded Spill - st.d $s8, $sp, 408 # 8-byte Folded Spill - fst.d $fs0, $sp, 400 # 8-byte Folded Spill - fst.d $fs1, $sp, 392 # 8-byte Folded Spill - fst.d $fs2, $sp, 384 # 8-byte Folded Spill - st.d $a7, $sp, 32 # 8-byte Folded Spill - st.d $a6, $sp, 8 # 8-byte Folded Spill + addi.d $sp, $sp, -512 + st.d $ra, $sp, 504 # 8-byte Folded Spill + st.d $fp, $sp, 496 # 8-byte Folded Spill + st.d $s0, $sp, 488 # 8-byte Folded Spill + st.d $s1, $sp, 480 # 8-byte Folded Spill + st.d $s2, $sp, 472 # 8-byte Folded Spill + st.d $s3, $sp, 464 # 8-byte Folded Spill + st.d $s4, $sp, 456 # 8-byte Folded Spill + st.d $s5, $sp, 448 # 8-byte Folded Spill + st.d $s6, $sp, 440 # 8-byte Folded Spill + st.d $s7, $sp, 432 # 8-byte Folded Spill + st.d $s8, $sp, 424 # 8-byte Folded Spill + fst.d $fs0, $sp, 416 # 8-byte Folded Spill + fst.d $fs1, $sp, 408 # 8-byte Folded Spill + fst.d $fs2, $sp, 400 # 8-byte Folded Spill + fst.d $fs3, $sp, 392 # 8-byte Folded Spill + st.d $a7, $sp, 40 # 8-byte Folded Spill + st.d $a6, $sp, 16 # 8-byte Folded Spill move $s8, $a5 - st.d $a4, $sp, 120 # 8-byte Folded Spill + st.d $a4, $sp, 128 # 8-byte Folded Spill move $s1, $a3 move $s2, $a2 move $s4, $a1 @@ -494,9 +487,9 @@ H__align: # @H__align ld.w $a0, $a0, 0 st.d $a0, $sp, 208 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.mseq1) - st.d $a0, $sp, 80 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.mseq2) - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill bnez $s3, .LBB2_2 # %bb.1: pcalau12i $a0, %got_pc_hi20(njob) @@ -506,39 +499,39 @@ H__align: # @H__align pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 ld.w $a1, $fp, 0 - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 88 # 8-byte Folded Reload st.d $a0, $a2, %pc_lo12(H__align.mseq1) move $a0, $a1 move $a1, $zero pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 ld.w $s3, $s7, %pc_lo12(H__align.orlgth1) - ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a1, $sp, 48 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(H__align.mseq2) .LBB2_2: - st.d $s0, $sp, 88 # 8-byte Folded Spill + st.d $s0, $sp, 96 # 8-byte Folded Spill ld.d $a0, $s0, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 ld.d $a1, $s4, 0 - st.d $a0, $sp, 72 # 8-byte Folded Spill + st.d $a0, $sp, 80 # 8-byte Folded Spill addi.w $fp, $a0, 0 move $a0, $a1 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 pcalau12i $a1, %pc_hi20(H__align.orlgth2) - st.d $a1, $sp, 200 # 8-byte Folded Spill + st.d $a1, $sp, 192 # 8-byte Folded Spill ld.w $s5, $a1, %pc_lo12(H__align.orlgth2) - st.d $a0, $sp, 176 # 8-byte Folded Spill + st.d $a0, $sp, 184 # 8-byte Folded Spill addi.w $s6, $a0, 0 pcalau12i $a0, %pc_hi20(H__align.w1) - st.d $a0, $sp, 368 # 8-byte Folded Spill + st.d $a0, $sp, 376 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.w2) st.d $a0, $sp, 312 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.initverticalw) st.d $a0, $sp, 304 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.lastverticalw) - st.d $a0, $sp, 64 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.m) st.d $a0, $sp, 296 # 8-byte Folded Spill pcalau12i $s0, %pc_hi20(H__align.mseq) @@ -547,19 +540,19 @@ H__align: # @H__align pcalau12i $a0, %pc_hi20(H__align.digf2) st.d $a0, $sp, 320 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.diaf1) - st.d $a0, $sp, 344 # 8-byte Folded Spill + st.d $a0, $sp, 352 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.diaf2) - st.d $a0, $sp, 360 # 8-byte Folded Spill + st.d $a0, $sp, 368 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.gappat1) - st.d $a0, $sp, 48 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(H__align.gappat2) st.d $a0, $sp, 56 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(H__align.gappat2) + st.d $a0, $sp, 64 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.gapz1) st.d $a0, $sp, 264 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.gapz2) st.d $a0, $sp, 272 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.gapf1) - st.d $a0, $sp, 192 # 8-byte Folded Spill + st.d $a0, $sp, 200 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.gapf2) st.d $a0, $sp, 248 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.ogcp1g) @@ -571,24 +564,24 @@ H__align: # @H__align pcalau12i $a0, %pc_hi20(H__align.fgcp2g) st.d $a0, $sp, 240 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.cpmx1) - st.d $a0, $sp, 376 # 8-byte Folded Spill + st.d $a0, $sp, 384 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.cpmx2) - st.d $a0, $sp, 352 # 8-byte Folded Spill + st.d $a0, $sp, 360 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.floatwork) st.d $a0, $sp, 288 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.intwork) st.d $a0, $sp, 280 # 8-byte Folded Spill - st.d $s8, $sp, 104 # 8-byte Folded Spill - st.d $s4, $sp, 96 # 8-byte Folded Spill - st.d $s6, $sp, 336 # 8-byte Folded Spill - st.d $fp, $sp, 112 # 8-byte Folded Spill + st.d $s8, $sp, 112 # 8-byte Folded Spill + st.d $s4, $sp, 104 # 8-byte Folded Spill + st.d $s6, $sp, 344 # 8-byte Folded Spill + st.d $fp, $sp, 120 # 8-byte Folded Spill blt $s3, $fp, .LBB2_4 # %bb.3: bge $s5, $s6, .LBB2_8 .LBB2_4: pcalau12i $s6, %pc_hi20(H__align.match) pcalau12i $a0, %pc_hi20(H__align.ogcp1) - st.d $a0, $sp, 184 # 8-byte Folded Spill + st.d $a0, $sp, 176 # 8-byte Folded Spill pcalau12i $s8, %pc_hi20(H__align.ogcp2) pcalau12i $s4, %pc_hi20(H__align.fgcp1) pcalau12i $fp, %pc_hi20(H__align.fgcp2) @@ -596,7 +589,7 @@ H__align: # @H__align # %bb.5: blez $s5, .LBB2_7 # %bb.6: - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.w1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -611,7 +604,7 @@ H__align: # @H__align ld.d $a0, $a0, %pc_lo12(H__align.initverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.lastverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -626,11 +619,11 @@ H__align: # @H__align ld.d $a0, $s0, %pc_lo12(H__align.mseq) pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 56 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.gappat1) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.gappat2) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 @@ -642,11 +635,11 @@ H__align: # @H__align ld.d $a0, $a0, %pc_lo12(H__align.digf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.diaf1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.diaf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -658,7 +651,7 @@ H__align: # @H__align ld.d $a0, $a0, %pc_lo12(H__align.gapz2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.gapf1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -666,7 +659,7 @@ H__align: # @H__align ld.d $a0, $a0, %pc_lo12(H__align.gapf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 176 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.ogcp1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -695,11 +688,11 @@ H__align: # @H__align ld.d $a0, $a0, %pc_lo12(H__align.fgcp2g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.cpmx1) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.cpmx2) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 @@ -712,27 +705,30 @@ H__align: # @H__align pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 ld.w $s3, $s7, %pc_lo12(H__align.orlgth1) - ld.d $a0, $sp, 200 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload ld.w $s5, $a0, %pc_lo12(H__align.orlgth2) .LBB2_7: - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI2_0) - ld.d $a0, $sp, 72 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + ld.d $a0, $sp, 80 # 8-byte Folded Reload + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, 314572 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 slt $a1, $a0, $s3 masknez $a0, $a0, $a1 maskeqz $a1, $s3, $a1 or $s3, $a1, $a0 st.d $s8, $sp, 152 # 8-byte Folded Spill addi.w $s8, $s3, 100 - ld.d $a0, $sp, 176 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + ld.d $a0, $sp, 184 # 8-byte Folded Reload + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 slt $a1, $a0, $s5 @@ -747,7 +743,7 @@ H__align: # @H__align move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 368 # 8-byte Folded Reload + ld.d $a1, $sp, 376 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(H__align.w1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -767,7 +763,7 @@ H__align: # @H__align move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 64 # 8-byte Folded Reload + ld.d $a1, $sp, 72 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(H__align.lastverticalw) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -799,24 +795,24 @@ H__align: # @H__align move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 344 # 8-byte Folded Reload + ld.d $a1, $sp, 352 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(H__align.diaf1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 360 # 8-byte Folded Reload + ld.d $a1, $sp, 368 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(H__align.diaf2) ori $a1, $zero, 8 move $a0, $s6 pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 - ld.d $a1, $sp, 48 # 8-byte Folded Reload + ld.d $a1, $sp, 56 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(H__align.gappat1) ori $a1, $zero, 8 move $a0, $s4 pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 - ld.d $a1, $sp, 56 # 8-byte Folded Reload + ld.d $a1, $sp, 64 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(H__align.gappat2) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -831,7 +827,7 @@ H__align: # @H__align move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 192 # 8-byte Folded Reload + ld.d $a1, $sp, 200 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(H__align.gapf1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -841,7 +837,7 @@ H__align: # @H__align move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 184 # 8-byte Folded Reload + ld.d $a1, $sp, 176 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(H__align.ogcp1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -882,13 +878,13 @@ H__align: # @H__align move $a1, $s6 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 376 # 8-byte Folded Reload + ld.d $a1, $sp, 384 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(H__align.cpmx1) ori $a0, $zero, 26 move $a1, $s4 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 352 # 8-byte Folded Reload + ld.d $a1, $sp, 360 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(H__align.cpmx2) slt $a0, $fp, $s8 masknez $a1, $fp, $a0 @@ -908,21 +904,21 @@ H__align: # @H__align ld.d $a1, $sp, 280 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(H__align.intwork) st.w $s3, $s7, %pc_lo12(H__align.orlgth1) - ld.d $a0, $sp, 200 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload st.w $s5, $a0, %pc_lo12(H__align.orlgth2) - ld.d $s8, $sp, 104 # 8-byte Folded Reload - ld.d $s4, $sp, 96 # 8-byte Folded Reload - ld.d $s6, $sp, 336 # 8-byte Folded Reload - ld.d $fp, $sp, 112 # 8-byte Folded Reload + ld.d $s8, $sp, 112 # 8-byte Folded Reload + ld.d $s4, $sp, 104 # 8-byte Folded Reload + ld.d $s6, $sp, 344 # 8-byte Folded Reload + ld.d $fp, $sp, 120 # 8-byte Folded Reload .LBB2_8: - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload blez $a0, .LBB2_11 # %bb.9: # %.lr.ph ld.d $a0, $s0, %pc_lo12(H__align.mseq) - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 88 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(H__align.mseq1) - ld.d $a2, $sp, 120 # 8-byte Folded Reload - ld.d $a3, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $sp, 128 # 8-byte Folded Reload + ld.d $a3, $sp, 96 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_10: # =>This Inner Loop Header: Depth=1 ld.d $a4, $a0, 0 @@ -938,9 +934,9 @@ H__align: # @H__align blez $s8, .LBB2_14 # %bb.12: # %.lr.ph665 ld.d $a1, $s0, %pc_lo12(H__align.mseq) - ld.d $a0, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 48 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.mseq2) - ld.d $a2, $sp, 120 # 8-byte Folded Reload + ld.d $a2, $sp, 128 # 8-byte Folded Reload alsl.d $a1, $a2, $a1, 3 move $a2, $s8 .p2align 4, , 16 @@ -955,7 +951,7 @@ H__align: # @H__align addi.d $a0, $a0, 8 bnez $a2, .LBB2_13 .LBB2_14: # %._crit_edge - ld.d $s4, $sp, 504 + ld.d $s4, $sp, 520 pcalau12i $a0, %got_pc_hi20(commonAlloc1) ld.d $fp, $a0, %got_pc_lo12(commonAlloc1) ld.w $a0, $fp, 0 @@ -982,7 +978,7 @@ H__align: # @H__align jirl $ra, $ra, 0 ld.w $s3, $s7, %pc_lo12(H__align.orlgth1) ld.w $a0, $fp, 0 - ld.d $a1, $sp, 200 # 8-byte Folded Reload + ld.d $a1, $sp, 192 # 8-byte Folded Reload ld.w $s5, $a1, %pc_lo12(H__align.orlgth2) ld.w $a1, $s0, 0 .LBB2_20: @@ -1004,25 +1000,25 @@ H__align: # @H__align st.w $s3, $fp, 0 st.w $s5, $s0, 0 .LBB2_21: - ld.d $a1, $sp, 376 # 8-byte Folded Reload + ld.d $a1, $sp, 384 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(H__align.cpmx1) ld.d $a2, $sp, 208 # 8-byte Folded Reload movgr2fr.w $fs0, $a2 pcalau12i $a2, %pc_hi20(H__align.ijp) - st.d $a2, $sp, 24 # 8-byte Folded Spill + st.d $a2, $sp, 32 # 8-byte Folded Spill st.d $a0, $a2, %pc_lo12(H__align.ijp) - ld.d $s3, $sp, 88 # 8-byte Folded Reload + ld.d $s3, $sp, 96 # 8-byte Folded Reload move $a0, $s3 move $a2, $s2 - ld.d $s5, $sp, 112 # 8-byte Folded Reload + ld.d $s5, $sp, 120 # 8-byte Folded Reload move $a3, $s5 - ld.d $s0, $sp, 120 # 8-byte Folded Reload + ld.d $s0, $sp, 128 # 8-byte Folded Reload move $a4, $s0 pcaddu18i $ra, %call36(cpmx_calc_new) jirl $ra, $ra, 0 - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(H__align.cpmx2) - ld.d $fp, $sp, 96 # 8-byte Folded Reload + ld.d $fp, $sp, 104 # 8-byte Folded Reload move $a0, $fp move $a2, $s1 move $a3, $s6 @@ -1063,7 +1059,7 @@ H__align: # @H__align move $a4, $s6 pcaddu18i $ra, %call36(st_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 56 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.gappat1) move $a1, $s0 move $a2, $s3 @@ -1071,7 +1067,7 @@ H__align: # @H__align move $a4, $s5 pcaddu18i $ra, %call36(st_getGapPattern) jirl $ra, $ra, 0 - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.gappat2) move $a1, $s8 move $a2, $fp @@ -1095,7 +1091,7 @@ H__align: # @H__align move $a4, $s6 pcaddu18i $ra, %call36(getdigapfreq_st) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.diaf1) move $a1, $s0 move $a2, $s3 @@ -1103,7 +1099,7 @@ H__align: # @H__align move $a4, $s5 pcaddu18i $ra, %call36(getdiaminofreq_x) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.diaf2) move $a1, $s8 move $a2, $fp @@ -1113,9 +1109,9 @@ H__align: # @H__align jirl $ra, $ra, 0 b .LBB2_24 .LBB2_23: - ld.d $s6, $sp, 528 - ld.d $s7, $sp, 520 - ld.d $s8, $sp, 512 + ld.d $s6, $sp, 544 + ld.d $s7, $sp, 536 + ld.d $s8, $sp, 528 move $a1, $s0 move $a2, $s3 move $a3, $s2 @@ -1126,10 +1122,10 @@ H__align: # @H__align jirl $ra, $ra, 0 ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.ogcp2g) - ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 112 # 8-byte Folded Reload move $a2, $fp move $a3, $s1 - ld.d $a4, $sp, 336 # 8-byte Folded Reload + ld.d $a4, $sp, 344 # 8-byte Folded Reload move $a5, $s8 move $a6, $s7 pcaddu18i $ra, %call36(new_OpeningGapCount_zure) @@ -1146,10 +1142,10 @@ H__align: # @H__align jirl $ra, $ra, 0 ld.d $a0, $sp, 240 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.fgcp2g) - ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 112 # 8-byte Folded Reload move $a2, $fp move $a3, $s1 - ld.d $a4, $sp, 336 # 8-byte Folded Reload + ld.d $a4, $sp, 344 # 8-byte Folded Reload move $a5, $s4 move $a6, $s6 pcaddu18i $ra, %call36(new_FinalGapCount_zure) @@ -1166,16 +1162,16 @@ H__align: # @H__align jirl $ra, $ra, 0 ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.digf2) - ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 112 # 8-byte Folded Reload move $a2, $fp move $a3, $s1 - ld.d $a4, $sp, 336 # 8-byte Folded Reload + ld.d $a4, $sp, 344 # 8-byte Folded Reload move $a5, $s8 - ld.d $s8, $sp, 104 # 8-byte Folded Reload + ld.d $s8, $sp, 112 # 8-byte Folded Reload move $a6, $s6 pcaddu18i $ra, %call36(getdigapfreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.diaf1) move $a1, $s0 move $a2, $s3 @@ -1185,19 +1181,19 @@ H__align: # @H__align move $a6, $s7 pcaddu18i $ra, %call36(getdiaminofreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.diaf2) move $a1, $s8 move $a2, $fp move $a3, $s1 - ld.d $a4, $sp, 336 # 8-byte Folded Reload + ld.d $a4, $sp, 344 # 8-byte Folded Reload move $a5, $s4 move $a6, $s6 - ld.d $s6, $sp, 336 # 8-byte Folded Reload + ld.d $s6, $sp, 344 # 8-byte Folded Reload pcaddu18i $ra, %call36(getdiaminofreq_part) jirl $ra, $ra, 0 .LBB2_24: - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.gapf1) ffint.s.w $fa0, $fs0 vst $vr0, $sp, 320 # 16-byte Folded Spill @@ -1231,16 +1227,16 @@ H__align: # @H__align move $a4, $s6 pcaddu18i $ra, %call36(getgapfreq_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.d $s1, $a0, %pc_lo12(H__align.w1) ld.d $a0, $sp, 312 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.w2) - st.d $a0, $sp, 368 # 8-byte Folded Spill + st.d $a0, $sp, 376 # 8-byte Folded Spill ld.d $a0, $sp, 304 # 8-byte Folded Reload ld.d $s0, $a0, %pc_lo12(H__align.initverticalw) - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $s2, $a0, %pc_lo12(H__align.cpmx2) - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload ld.d $s8, $a0, %pc_lo12(H__align.cpmx1) ld.d $a0, $sp, 288 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(H__align.floatwork) @@ -1257,15 +1253,13 @@ H__align: # @H__align pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(impmtx) - st.d $a0, $sp, 152 # 8-byte Folded Spill - ld.d $s7, $sp, 176 # 8-byte Folded Reload - slli.d $a1, $s7, 32 - pcalau12i $a0, %pc_hi20(.LCPI2_2) - st.d $a0, $sp, 240 # 8-byte Folded Spill - ld.d $s3, $sp, 32 # 8-byte Folded Reload + st.d $a0, $sp, 160 # 8-byte Folded Spill + ld.d $s7, $sp, 184 # 8-byte Folded Reload + slli.d $a0, $s7, 32 + ld.d $s3, $sp, 40 # 8-byte Folded Reload st.d $s4, $sp, 304 # 8-byte Folded Spill st.d $fp, $sp, 312 # 8-byte Folded Spill - st.d $a1, $sp, 184 # 8-byte Folded Spill + st.d $a0, $sp, 192 # 8-byte Folded Spill bnez $s3, .LBB2_26 # %bb.25: # %.critedge ori $a7, $zero, 1 @@ -1283,9 +1277,9 @@ H__align: # @H__align .LBB2_26: blez $s5, .LBB2_29 # %bb.27: # %.lr.ph.i - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(impmtx) - ld.d $a1, $sp, 72 # 8-byte Folded Reload + ld.d $a1, $sp, 80 # 8-byte Folded Reload bstrpick.d $a1, $a1, 30, 0 move $a2, $s0 .p2align 4, , 16 @@ -1320,7 +1314,7 @@ H__align: # @H__align beq $a0, $fp, .LBB2_37 b .LBB2_47 .LBB2_31: # %.lr.ph.preheader.i - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(impmtx) ld.d $a5, $a0, 0 bstrpick.d $a3, $s7, 31, 0 @@ -1362,14 +1356,14 @@ H__align: # @H__align ori $a1, $zero, 1 bne $a0, $a1, .LBB2_40 .LBB2_37: # %.preheader649 - ld.d $t7, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 120 # 8-byte Folded Reload bgtz $t7, .LBB2_147 .LBB2_38: # %.preheader647 bgtz $s6, .LBB2_163 # %bb.39: # %.loopexit648.thread ld.d $a0, $sp, 296 # 8-byte Folded Reload - ld.d $t2, $a0, %pc_lo12(H__align.m) - st.w $zero, $t2, 0 + ld.d $t3, $a0, %pc_lo12(H__align.m) + st.w $zero, $t3, 0 b .LBB2_55 .LBB2_40: # %.preheader652 blez $s6, .LBB2_47 @@ -1390,8 +1384,8 @@ H__align: # @H__align move $a3, $a2 bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI2_1) - vld $vr1, $a5, %pc_lo12(.LCPI2_1) + pcalau12i $a5, %pc_hi20(.LCPI2_0) + vld $vr1, $a5, %pc_lo12(.LCPI2_0) addi.d $a5, $s1, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr2, $a6 @@ -1468,13 +1462,13 @@ H__align: # @H__align addi.d $a4, $a4, 4 bnez $a1, .LBB2_46 .LBB2_47: # %.preheader650 - ld.d $t7, $sp, 112 # 8-byte Folded Reload + ld.d $t7, $sp, 120 # 8-byte Folded Reload blez $t7, .LBB2_54 # %bb.48: # %.lr.ph671 pcalau12i $a0, %got_pc_hi20(offset) ld.d $a0, $a0, %got_pc_lo12(offset) ld.w $a0, $a0, 0 - ld.d $a1, $sp, 72 # 8-byte Folded Reload + ld.d $a1, $sp, 80 # 8-byte Folded Reload addi.d $a1, $a1, 1 bstrpick.d $a1, $a1, 31, 0 addi.d $a2, $a1, -1 @@ -1488,8 +1482,8 @@ H__align: # @H__align move $a3, $a2 bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI2_1) - vld $vr1, $a5, %pc_lo12(.LCPI2_1) + pcalau12i $a5, %pc_hi20(.LCPI2_0) + vld $vr1, $a5, %pc_lo12(.LCPI2_0) addi.d $a5, $s0, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr2, $a6 @@ -1567,11 +1561,11 @@ H__align: # @H__align bnez $a1, .LBB2_53 .LBB2_54: # %.loopexit648 ld.d $a0, $sp, 296 # 8-byte Folded Reload - ld.d $t2, $a0, %pc_lo12(H__align.m) - st.w $zero, $t2, 0 + ld.d $t3, $a0, %pc_lo12(H__align.m) + st.w $zero, $t3, 0 bgtz $s6, .LBB2_57 .LBB2_55: # %._crit_edge684 - ld.d $t1, $sp, 184 # 8-byte Folded Reload + ld.d $t2, $sp, 192 # 8-byte Folded Reload ori $a0, $zero, 1 bnez $s6, .LBB2_65 # %bb.56: @@ -1584,164 +1578,165 @@ H__align: # @H__align ld.d $a0, $a0, %pc_lo12(H__align.mp) addi.d $a1, $s7, 1 bstrpick.d $a1, $a1, 31, 0 - addi.d $a2, $a1, -1 - ori $a4, $zero, 8 - ori $a3, $zero, 1 - ld.d $t1, $sp, 184 # 8-byte Folded Reload - bltu $a2, $a4, .LBB2_62 + addi.d $a3, $a1, -1 + ori $a4, $zero, 1 + ori $a5, $zero, 8 + lu12i.w $a2, 287172 + ld.d $t2, $sp, 192 # 8-byte Folded Reload + bltu $a3, $a5, .LBB2_62 # %bb.58: # %vector.memcheck946 - sub.d $a4, $t2, $s1 - addi.d $a4, $a4, 4 - ori $a5, $zero, 32 - bltu $a4, $a5, .LBB2_62 + sub.d $a5, $t3, $s1 + addi.d $a5, $a5, 4 + ori $a6, $zero, 32 + bltu $a5, $a6, .LBB2_62 # %bb.59: # %vector.ph949 - move $a5, $zero - move $a4, $a2 - bstrins.d $a4, $zero, 2, 0 - ori $a6, $zero, 1 - move $a3, $a2 - bstrins.d $a3, $a6, 2, 0 + move $a6, $zero + move $a5, $a3 + bstrins.d $a5, $zero, 2, 0 + ori $a7, $zero, 1 + move $a4, $a3 + bstrins.d $a4, $a7, 2, 0 vreplvei.w $vr0, $vr10, 0 - addi.d $a6, $a0, 20 + addi.d $a7, $a0, 20 vrepli.b $vr1, 0 - lu12i.w $a7, 287172 - vreplgr2vr.w $vr2, $a7 - move $a7, $a4 + vreplgr2vr.w $vr2, $a2 + move $t0, $a5 .p2align 4, , 16 .LBB2_60: # %vector.body954 # =>This Inner Loop Header: Depth=1 - add.d $t0, $s1, $a5 - vldx $vr3, $s1, $a5 - vld $vr4, $t0, 16 - add.d $t0, $a6, $a5 - vst $vr1, $t0, -16 - vstx $vr1, $a6, $a5 + add.d $t1, $s1, $a6 + vldx $vr3, $s1, $a6 + vld $vr4, $t1, 16 + add.d $t1, $a7, $a6 + vst $vr1, $t1, -16 + vstx $vr1, $a7, $a6 vfadd.s $vr3, $vr3, $vr1 vfadd.s $vr4, $vr4, $vr1 vfmadd.s $vr3, $vr0, $vr2, $vr3 vfmadd.s $vr4, $vr0, $vr2, $vr4 - add.d $t0, $t2, $a5 - vst $vr3, $t0, 4 - vst $vr4, $t0, 20 - addi.d $a7, $a7, -8 - addi.d $a5, $a5, 32 - bnez $a7, .LBB2_60 + add.d $t1, $t3, $a6 + vst $vr3, $t1, 4 + vst $vr4, $t1, 20 + addi.d $t0, $t0, -8 + addi.d $a6, $a6, 32 + bnez $t0, .LBB2_60 # %bb.61: # %middle.block960 - beq $a2, $a4, .LBB2_64 + beq $a3, $a5, .LBB2_64 .LBB2_62: # %scalar.ph947.preheader - slli.d $a2, $a3, 2 - ld.d $a4, $sp, 240 # 8-byte Folded Reload - fld.s $fa0, $a4, %pc_lo12(.LCPI2_2) - addi.d $a4, $s1, -4 - sub.d $a1, $a1, $a3 - movgr2fr.w $fa1, $zero + slli.d $a3, $a4, 2 + addi.d $a5, $s1, -4 + sub.d $a1, $a1, $a4 + movgr2fr.w $fa0, $zero + movgr2fr.w $fa1, $a2 .p2align 4, , 16 .LBB2_63: # %scalar.ph947 # =>This Inner Loop Header: Depth=1 - fldx.s $fa2, $a4, $a2 - stx.w $zero, $a0, $a2 - fadd.s $fa2, $fa2, $fa1 - fmadd.s $fa2, $ft2, $fa0, $fa2 - fstx.s $fa2, $t2, $a2 + fldx.s $fa2, $a5, $a3 + stx.w $zero, $a0, $a3 + fadd.s $fa2, $fa2, $fa0 + fmadd.s $fa2, $ft2, $fa1, $fa2 + fstx.s $fa2, $t3, $a3 addi.d $a1, $a1, -1 - addi.d $a2, $a2, 4 + addi.d $a3, $a3, 4 bnez $a1, .LBB2_63 .LBB2_64: move $a0, $zero .LBB2_65: # %._crit_edge684.thread ori $a1, $zero, 0 lu32i.d $a1, -1 - add.d $a1, $t1, $a1 + add.d $a1, $t2, $a1 srai.d $a1, $a1, 30 fldx.s $fa0, $s1, $a1 st.d $a0, $sp, 296 # 8-byte Folded Spill move $a0, $zero .LBB2_66: - ld.d $a1, $sp, 64 # 8-byte Folded Reload + ld.d $a1, $sp, 72 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(H__align.lastverticalw) st.d $a1, $sp, 288 # 8-byte Folded Spill fst.s $fa0, $a1, 0 pcalau12i $a1, %got_pc_hi20(outgap) ld.d $a1, $a1, %got_pc_lo12(outgap) - st.d $a1, $sp, 16 # 8-byte Folded Spill + st.d $a1, $sp, 24 # 8-byte Folded Spill ld.w $a1, $a1, 0 sltu $a2, $zero, $a1 - ld.d $a3, $sp, 72 # 8-byte Folded Reload + ld.d $a3, $sp, 80 # 8-byte Folded Reload add.w $a3, $a2, $a3 ori $a2, $zero, 2 st.d $a3, $sp, 280 # 8-byte Folded Spill blt $a3, $a2, .LBB2_118 # %bb.67: # %.lr.ph705 - st.d $t2, $sp, 232 # 8-byte Folded Spill + st.d $t3, $sp, 240 # 8-byte Folded Spill st.d $s8, $sp, 264 # 8-byte Folded Spill st.d $s2, $sp, 272 # 8-byte Folded Spill - st.d $s0, $sp, 352 # 8-byte Folded Spill + st.d $s0, $sp, 360 # 8-byte Folded Spill sltui $a1, $s3, 1 or $a0, $a1, $a0 st.d $a0, $sp, 256 # 8-byte Folded Spill - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(impmtx) - st.d $a0, $sp, 224 # 8-byte Folded Spill - ld.d $a0, $sp, 24 # 8-byte Folded Reload + st.d $a0, $sp, 232 # 8-byte Folded Spill + ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.ijp) - st.d $a0, $sp, 376 # 8-byte Folded Spill + st.d $a0, $sp, 384 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(H__align.mp) ld.d $a0, $a0, %pc_lo12(H__align.mp) - st.d $a0, $sp, 216 # 8-byte Folded Spill - ld.d $a0, $sp, 56 # 8-byte Folded Reload + st.d $a0, $sp, 224 # 8-byte Folded Spill + ld.d $a0, $sp, 64 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(H__align.gappat2) - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.diaf1) - st.d $a0, $sp, 208 # 8-byte Folded Spill - ld.d $a0, $sp, 48 # 8-byte Folded Reload + st.d $a0, $sp, 216 # 8-byte Folded Spill + ld.d $a0, $sp, 56 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.gappat1) - st.d $a0, $sp, 200 # 8-byte Folded Spill - ld.d $a0, $sp, 360 # 8-byte Folded Reload + st.d $a0, $sp, 208 # 8-byte Folded Spill + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $s0, $a0, %pc_lo12(H__align.diaf2) - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.gapf1) - st.d $a0, $sp, 192 # 8-byte Folded Spill + st.d $a0, $sp, 200 # 8-byte Folded Spill fcvt.d.s $fs1, $ft2 ld.d $a0, $sp, 248 # 8-byte Folded Reload ld.d $s3, $a0, %pc_lo12(H__align.gapf2) ori $a0, $zero, 0 lu32i.d $a0, -1 - add.d $a0, $t1, $a0 + add.d $a0, $t2, $a0 srai.d $a0, $a0, 30 st.d $a0, $sp, 248 # 8-byte Folded Spill addi.d $a0, $s7, 1 bstrpick.d $s2, $a0, 31, 0 addi.w $a0, $s7, -1 - st.d $a0, $sp, 184 # 8-byte Folded Spill + st.d $a0, $sp, 192 # 8-byte Folded Spill bstrpick.d $a0, $a0, 31, 0 slli.d $a1, $a0, 2 addi.d $a1, $a1, 4 - st.d $a1, $sp, 168 # 8-byte Folded Spill + st.d $a1, $sp, 176 # 8-byte Folded Spill addi.d $a0, $a0, 1 - st.d $a0, $sp, 144 # 8-byte Folded Spill + st.d $a0, $sp, 152 # 8-byte Folded Spill bstrpick.d $a0, $a0, 32, 3 slli.d $a1, $a0, 3 slli.d $a0, $a0, 5 - st.d $a0, $sp, 136 # 8-byte Folded Spill - st.d $a1, $sp, 160 # 8-byte Folded Spill + st.d $a0, $sp, 144 # 8-byte Folded Spill + st.d $a1, $sp, 168 # 8-byte Folded Spill sub.d $a0, $s7, $a1 - st.d $a0, $sp, 128 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill movgr2fr.w $fs2, $zero ori $s4, $zero, 1 + lu12i.w $a0, 287172 + movgr2fr.w $fs3, $a0 fmov.s $fs0, $fs2 b .LBB2_69 .p2align 4, , 16 .LBB2_68: # %._crit_edge697 # in Loop: Header=BB2_69 Depth=1 ld.d $a0, $sp, 248 # 8-byte Folded Reload - ld.d $a2, $sp, 360 # 8-byte Folded Reload + ld.d $a2, $sp, 368 # 8-byte Folded Reload fldx.s $fa0, $a2, $a0 addi.d $s4, $s4, 1 ld.d $a0, $sp, 288 # 8-byte Folded Reload - ld.d $a1, $sp, 344 # 8-byte Folded Reload + ld.d $a1, $sp, 352 # 8-byte Folded Reload fstx.s $fa0, $a0, $a1 move $s1, $a2 - ld.d $s6, $sp, 336 # 8-byte Folded Reload + ld.d $s6, $sp, 344 # 8-byte Folded Reload ld.d $a0, $sp, 280 # 8-byte Folded Reload beq $s4, $a0, .LBB2_119 .LBB2_69: # =>This Loop Header: Depth=1 @@ -1756,10 +1751,10 @@ H__align: # @H__align # Child Loop BB2_107 Depth 4 addi.d $s8, $s4, -1 slli.d $a0, $s8, 2 - ld.d $a1, $sp, 352 # 8-byte Folded Reload + ld.d $a1, $sp, 360 # 8-byte Folded Reload fldx.s $fa0, $a1, $a0 - ld.d $s5, $sp, 368 # 8-byte Folded Reload - st.d $s1, $sp, 368 # 8-byte Folded Spill + ld.d $s5, $sp, 376 # 8-byte Folded Reload + st.d $s1, $sp, 376 # 8-byte Folded Spill fst.s $fa0, $s1, 0 move $a0, $s5 ld.d $a1, $sp, 264 # 8-byte Folded Reload @@ -1776,14 +1771,14 @@ H__align: # @H__align bnez $a0, .LBB2_75 # %bb.70: # %.lr.ph.preheader.i534 # in Loop: Header=BB2_69 Depth=1 - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ldx.d $a4, $a0, $a3 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload ori $a1, $zero, 7 bltu $a0, $a1, .LBB2_73 # %bb.71: # %vector.memcheck963 # in Loop: Header=BB2_69 Depth=1 - ld.d $a1, $sp, 168 # 8-byte Folded Reload + ld.d $a1, $sp, 176 # 8-byte Folded Reload add.d $a0, $a4, $a1 bgeu $s5, $a0, .LBB2_115 # %bb.72: # %vector.memcheck963 @@ -1792,7 +1787,7 @@ H__align: # @H__align bgeu $a4, $a0, .LBB2_115 .LBB2_73: # in Loop: Header=BB2_69 Depth=1 move $a0, $a4 - ld.d $a2, $sp, 176 # 8-byte Folded Reload + ld.d $a2, $sp, 184 # 8-byte Folded Reload move $a1, $s5 .p2align 4, , 16 .LBB2_74: # %.lr.ph.i535 @@ -1810,10 +1805,10 @@ H__align: # @H__align .LBB2_75: # %imp_match_out_veadH.exit540 # in Loop: Header=BB2_69 Depth=1 slli.d $a1, $s4, 2 - ld.d $a0, $sp, 352 # 8-byte Folded Reload - st.d $a1, $sp, 344 # 8-byte Folded Spill + ld.d $a0, $sp, 360 # 8-byte Folded Reload + st.d $a1, $sp, 352 # 8-byte Folded Spill fldx.s $fa0, $a0, $a1 - st.d $s5, $sp, 360 # 8-byte Folded Spill + st.d $s5, $sp, 368 # 8-byte Folded Spill fst.s $fa0, $s5, 0 vld $vr11, $sp, 320 # 16-byte Folded Reload ld.d $a0, $sp, 296 # 8-byte Folded Reload @@ -1821,27 +1816,25 @@ H__align: # @H__align bnez $a0, .LBB2_68 # %bb.76: # %.lr.ph696 # in Loop: Header=BB2_69 Depth=1 - ld.d $t4, $sp, 368 # 8-byte Folded Reload + ld.d $t4, $sp, 376 # 8-byte Folded Reload fld.s $fa0, $t4, 0 - ld.d $a0, $sp, 240 # 8-byte Folded Reload - fld.s $fa1, $a0, %pc_lo12(.LCPI2_2) move $a1, $zero fadd.s $fa0, $fa0, $fs2 - fmadd.s $fa0, $ft3, $fa1, $fa0 - ld.d $a0, $sp, 200 # 8-byte Folded Reload + fmadd.s $fa0, $ft3, $fs3, $fa0 + ld.d $a0, $sp, 208 # 8-byte Folded Reload ldx.d $s6, $a0, $a3 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload ldx.d $a3, $a0, $a3 - ld.d $a0, $sp, 208 # 8-byte Folded Reload + ld.d $a0, $sp, 216 # 8-byte Folded Reload alsl.d $a4, $s4, $a0, 2 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload alsl.d $a5, $s4, $a0, 2 addi.d $a6, $s6, 8 addi.d $a7, $s6, 16 ori $t0, $zero, 1 - ld.d $t1, $sp, 360 # 8-byte Folded Reload - ld.d $t2, $sp, 232 # 8-byte Folded Reload - ld.d $t3, $sp, 216 # 8-byte Folded Reload + ld.d $t1, $sp, 368 # 8-byte Folded Reload + ld.d $t2, $sp, 240 # 8-byte Folded Reload + ld.d $t3, $sp, 224 # 8-byte Folded Reload b .LBB2_78 .p2align 4, , 16 .LBB2_77: # in Loop: Header=BB2_78 Depth=2 @@ -1969,7 +1962,7 @@ H__align: # @H__align # %bb.91: # in Loop: Header=BB2_78 Depth=2 ld.w $a0, $t3, 0 slli.d $a2, $a0, 3 - ld.d $t6, $sp, 376 # 8-byte Folded Reload + ld.d $t6, $sp, 384 # 8-byte Folded Reload ldx.d $a2, $t6, $a2 fcvt.d.s $fa3, $fa3 alsl.d $a2, $t0, $a2, 2 @@ -2101,12 +2094,12 @@ H__align: # @H__align b .LBB2_77 .LBB2_115: # %vector.ph971 # in Loop: Header=BB2_69 Depth=1 - ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 144 # 8-byte Folded Reload add.d $a0, $a4, $a1 add.d $a1, $s5, $a1 addi.d $a2, $s5, 16 addi.d $a4, $a4, 16 - ld.d $a5, $sp, 160 # 8-byte Folded Reload + ld.d $a5, $sp, 168 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_116: # %vector.body975 # Parent Loop BB2_69 Depth=1 @@ -2125,26 +2118,26 @@ H__align: # @H__align bnez $a5, .LBB2_116 # %bb.117: # %middle.block986 # in Loop: Header=BB2_69 Depth=1 - ld.d $a2, $sp, 128 # 8-byte Folded Reload - ld.d $a4, $sp, 144 # 8-byte Folded Reload - ld.d $a5, $sp, 160 # 8-byte Folded Reload + ld.d $a2, $sp, 136 # 8-byte Folded Reload + ld.d $a4, $sp, 152 # 8-byte Folded Reload + ld.d $a5, $sp, 168 # 8-byte Folded Reload bne $a4, $a5, .LBB2_74 b .LBB2_75 .LBB2_118: movgr2fr.w $fs0, $zero - ld.d $s0, $sp, 96 # 8-byte Folded Reload - ld.d $t2, $sp, 88 # 8-byte Folded Reload + ld.d $s0, $sp, 104 # 8-byte Folded Reload + ld.d $t2, $sp, 96 # 8-byte Folded Reload beqz $a1, .LBB2_120 b .LBB2_134 .LBB2_119: # %._crit_edge706.loopexit - ld.d $a0, $sp, 16 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload ld.w $a1, $a0, 0 move $s1, $a2 - ld.d $s7, $sp, 176 # 8-byte Folded Reload - ld.d $t7, $sp, 112 # 8-byte Folded Reload - ld.d $s3, $sp, 32 # 8-byte Folded Reload - ld.d $s0, $sp, 96 # 8-byte Folded Reload - ld.d $t2, $sp, 88 # 8-byte Folded Reload + ld.d $s7, $sp, 184 # 8-byte Folded Reload + ld.d $t7, $sp, 120 # 8-byte Folded Reload + ld.d $s3, $sp, 40 # 8-byte Folded Reload + ld.d $s0, $sp, 104 # 8-byte Folded Reload + ld.d $t2, $sp, 96 # 8-byte Folded Reload bnez $a1, .LBB2_134 .LBB2_120: # %.preheader646 ld.d $a0, $sp, 296 # 8-byte Folded Reload @@ -2167,10 +2160,10 @@ H__align: # @H__align bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 vreplgr2vr.d $vr1, $s7 - pcalau12i $a5, %pc_hi20(.LCPI2_3) - vld $vr2, $a5, %pc_lo12(.LCPI2_3) - pcalau12i $a5, %pc_hi20(.LCPI2_4) - vld $vr3, $a5, %pc_lo12(.LCPI2_4) + pcalau12i $a5, %pc_hi20(.LCPI2_1) + vld $vr2, $a5, %pc_lo12(.LCPI2_1) + pcalau12i $a5, %pc_hi20(.LCPI2_2) + vld $vr3, $a5, %pc_lo12(.LCPI2_2) addi.d $a5, $s1, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr4, $a6 @@ -2254,7 +2247,7 @@ H__align: # @H__align .LBB2_127: # %.preheader645 blez $t7, .LBB2_134 # %bb.128: # %.lr.ph714 - ld.d $a2, $sp, 72 # 8-byte Folded Reload + ld.d $a2, $sp, 80 # 8-byte Folded Reload bstrpick.d $a0, $a2, 31, 0 pcalau12i $a1, %got_pc_hi20(offset) ld.d $a1, $a1, %got_pc_lo12(offset) @@ -2263,7 +2256,7 @@ H__align: # @H__align ffint.d.w $fa1, $fa0 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - ld.d $a0, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(H__align.lastverticalw) fneg.d $fa1, $fa1 addi.d $a0, $a2, 1 @@ -2331,15 +2324,15 @@ H__align: # @H__align addi.d $a1, $a1, 4 bnez $a2, .LBB2_133 .LBB2_134: # %.loopexit - ld.d $a0, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(H__align.lastverticalw) - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload ld.d $s2, $a0, %pc_lo12(H__align.mseq1) - ld.d $a0, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 48 # 8-byte Folded Reload ld.d $s7, $a0, %pc_lo12(H__align.mseq2) - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.ijp) - st.d $a0, $sp, 368 # 8-byte Folded Spill + st.d $a0, $sp, 376 # 8-byte Folded Spill ld.d $a0, $t2, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 @@ -2354,24 +2347,24 @@ H__align: # @H__align move $a0, $s8 pcaddu18i $ra, %call36(AllocateCharVec) jirl $ra, $ra, 0 - st.d $a0, $sp, 360 # 8-byte Folded Spill + st.d $a0, $sp, 368 # 8-byte Folded Spill move $a0, $s8 pcaddu18i $ra, %call36(AllocateCharVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 24 # 8-byte Folded Reload ld.w $a2, $a1, 0 addi.w $a1, $s6, 0 - st.d $a1, $sp, 376 # 8-byte Folded Spill + st.d $a1, $sp, 384 # 8-byte Folded Spill addi.w $a1, $s4, 0 - st.d $a0, $sp, 352 # 8-byte Folded Spill + st.d $a0, $sp, 360 # 8-byte Folded Spill ori $a3, $zero, 1 bnez $s3, .LBB2_141 # %bb.135: - ld.d $s3, $sp, 368 # 8-byte Folded Reload + ld.d $s3, $sp, 376 # 8-byte Folded Reload beq $a2, $a3, .LBB2_184 # %bb.136: fld.s $fa0, $fp, 0 - ld.d $a2, $sp, 376 # 8-byte Folded Reload + ld.d $a2, $sp, 384 # 8-byte Folded Reload blez $a2, .LBB2_179 # %bb.137: # %.lr.ph.i639 slli.d $a2, $s6, 3 @@ -2398,11 +2391,11 @@ H__align: # @H__align fmov.s $fa0, $fa1 b .LBB2_138 .LBB2_141: - ld.d $ra, $sp, 368 # 8-byte Folded Reload + ld.d $ra, $sp, 376 # 8-byte Folded Reload beq $a2, $a3, .LBB2_192 # %bb.142: fld.s $fa0, $fp, 0 - ld.d $a2, $sp, 376 # 8-byte Folded Reload + ld.d $a2, $sp, 384 # 8-byte Folded Reload blez $a2, .LBB2_187 # %bb.143: # %.lr.ph.i586 slli.d $a2, $s6, 3 @@ -2429,18 +2422,18 @@ H__align: # @H__align fmov.s $fa0, $fa1 b .LBB2_144 .LBB2_147: # %.lr.ph675 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.diaf1) ld.d $a1, $sp, 248 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(H__align.gapf2) fcvt.d.s $fa0, $ft2 - ld.d $a2, $sp, 48 # 8-byte Folded Reload + ld.d $a2, $sp, 56 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(H__align.gappat1) - ld.d $a3, $sp, 56 # 8-byte Folded Reload + ld.d $a3, $sp, 64 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(H__align.gappat2) - ld.d $a4, $sp, 360 # 8-byte Folded Reload + ld.d $a4, $sp, 368 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(H__align.diaf2) - ld.d $a5, $sp, 72 # 8-byte Folded Reload + ld.d $a5, $sp, 80 # 8-byte Folded Reload addi.d $a5, $a5, 1 bstrpick.d $a5, $a5, 31, 0 ori $a6, $zero, 1 @@ -2552,16 +2545,16 @@ H__align: # @H__align fmadd.s $fa7, $ft0, $ft1, $fa7 b .LBB2_160 .LBB2_163: # %.lr.ph679 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.diaf2) - ld.d $a1, $sp, 192 # 8-byte Folded Reload + ld.d $a1, $sp, 200 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(H__align.gapf1) fcvt.d.s $fa0, $ft2 - ld.d $a2, $sp, 56 # 8-byte Folded Reload + ld.d $a2, $sp, 64 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(H__align.gappat2) - ld.d $a3, $sp, 48 # 8-byte Folded Reload + ld.d $a3, $sp, 56 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(H__align.gappat1) - ld.d $a4, $sp, 344 # 8-byte Folded Reload + ld.d $a4, $sp, 352 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(H__align.diaf1) addi.d $a5, $s7, 1 bstrpick.d $a5, $a5, 31, 0 @@ -2700,7 +2693,7 @@ H__align: # @H__align fmov.s $fa0, $fa1 b .LBB2_181 .LBB2_184: # %.loopexit.i592 - ld.d $a2, $sp, 376 # 8-byte Folded Reload + ld.d $a2, $sp, 384 # 8-byte Folded Reload bltz $a2, .LBB2_200 # %bb.185: # %.lr.ph10.preheader.i594 addi.d $a3, $s6, 1 @@ -2737,7 +2730,7 @@ H__align: # @H__align fmov.s $fa0, $fa1 b .LBB2_189 .LBB2_192: # %.loopexit.i - ld.d $a2, $sp, 376 # 8-byte Folded Reload + ld.d $a2, $sp, 384 # 8-byte Folded Reload bltz $a2, .LBB2_208 # %bb.193: # %.lr.ph10.preheader.i addi.d $a3, $s6, 1 @@ -2748,8 +2741,8 @@ H__align: # @H__align move $a3, $zero b .LBB2_206 .LBB2_195: # %vector.ph1099 - pcalau12i $a4, %pc_hi20(.LCPI2_5) - vld $vr0, $a4, %pc_lo12(.LCPI2_5) + pcalau12i $a4, %pc_hi20(.LCPI2_3) + vld $vr0, $a4, %pc_lo12(.LCPI2_3) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 addi.d $a4, $s3, 16 @@ -2801,8 +2794,8 @@ H__align: # @H__align move $a4, $zero b .LBB2_214 .LBB2_203: # %vector.ph1025 - pcalau12i $a4, %pc_hi20(.LCPI2_5) - vld $vr0, $a4, %pc_lo12(.LCPI2_5) + pcalau12i $a4, %pc_hi20(.LCPI2_3) + vld $vr0, $a4, %pc_lo12(.LCPI2_3) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 addi.d $a4, $ra, 16 @@ -2843,7 +2836,7 @@ H__align: # @H__align addi.d $a3, $a3, 1 bnez $a2, .LBB2_207 .LBB2_208: # %.preheader1.i - ld.d $a2, $sp, 496 + ld.d $a2, $sp, 512 bltz $a1, .LBB2_258 # %bb.209: # %.lr.ph13.i ld.d $a3, $ra, 0 @@ -2857,8 +2850,8 @@ H__align: # @H__align .LBB2_211: # %vector.ph1115 bstrpick.d $a4, $a4, 31, 3 slli.d $a4, $a4, 3 - pcalau12i $a5, %pc_hi20(.LCPI2_6) - vld $vr0, $a5, %pc_lo12(.LCPI2_6) + pcalau12i $a5, %pc_hi20(.LCPI2_4) + vld $vr0, $a5, %pc_lo12(.LCPI2_4) addi.d $a5, $a2, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 @@ -2889,8 +2882,8 @@ H__align: # @H__align addi.d $a5, $a5, -1 bnez $a3, .LBB2_215 .LBB2_216: # %._crit_edge.i606 - ld.d $a2, $sp, 360 # 8-byte Folded Reload - ld.d $a3, $sp, 376 # 8-byte Folded Reload + ld.d $a2, $sp, 368 # 8-byte Folded Reload + ld.d $a3, $sp, 384 # 8-byte Folded Reload add.d $a2, $a2, $a3 add.d $s8, $a2, $a1 stx.b $zero, $a2, $a1 @@ -3087,15 +3080,15 @@ H__align: # @H__align move $s6, $a6 bge $s5, $a5, .LBB2_218 .LBB2_246: # %._crit_edge42.i618 - ld.d $s5, $sp, 120 # 8-byte Folded Reload - ld.d $s6, $sp, 88 # 8-byte Folded Reload + ld.d $s5, $sp, 128 # 8-byte Folded Reload + ld.d $s6, $sp, 96 # 8-byte Folded Reload bgtz $s5, .LBB2_248 b .LBB2_250 .LBB2_247: move $s8, $t3 move $s1, $t2 - ld.d $s5, $sp, 120 # 8-byte Folded Reload - ld.d $s6, $sp, 88 # 8-byte Folded Reload + ld.d $s5, $sp, 128 # 8-byte Folded Reload + ld.d $s6, $sp, 96 # 8-byte Folded Reload blez $s5, .LBB2_250 .LBB2_248: # %.lr.ph50.preheader.i627 move $fp, $s6 @@ -3113,9 +3106,9 @@ H__align: # @H__align addi.d $s2, $s2, 8 bnez $s0, .LBB2_249 .LBB2_250: # %.preheader.i620 - ld.d $s2, $sp, 104 # 8-byte Folded Reload - ld.d $s3, $sp, 96 # 8-byte Folded Reload - ld.d $s4, $sp, 336 # 8-byte Folded Reload + ld.d $s2, $sp, 112 # 8-byte Folded Reload + ld.d $s3, $sp, 104 # 8-byte Folded Reload + ld.d $s4, $sp, 344 # 8-byte Folded Reload blez $s2, .LBB2_298 # %bb.251: # %.lr.ph52.preheader.i621 move $fp, $s3 @@ -3136,8 +3129,8 @@ H__align: # @H__align .LBB2_253: # %vector.ph1040 bstrpick.d $a5, $a5, 31, 3 slli.d $a5, $a5, 3 - pcalau12i $a6, %pc_hi20(.LCPI2_6) - vld $vr0, $a6, %pc_lo12(.LCPI2_6) + pcalau12i $a6, %pc_hi20(.LCPI2_4) + vld $vr0, $a6, %pc_lo12(.LCPI2_4) addi.d $a6, $a3, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 @@ -3168,8 +3161,8 @@ H__align: # @H__align addi.d $a6, $a6, -1 bnez $a4, .LBB2_257 .LBB2_258: # %._crit_edge.i - ld.d $a3, $sp, 360 # 8-byte Folded Reload - ld.d $a4, $sp, 376 # 8-byte Folded Reload + ld.d $a3, $sp, 368 # 8-byte Folded Reload + ld.d $a4, $sp, 384 # 8-byte Folded Reload add.d $a3, $a3, $a4 add.d $s8, $a3, $a1 stx.b $zero, $a3, $a1 @@ -3284,7 +3277,7 @@ H__align: # @H__align bnez $fp, .LBB2_272 .LBB2_273: # %._crit_edge30.i # in Loop: Header=BB2_260 Depth=1 - ld.d $fp, $sp, 376 # 8-byte Folded Reload + ld.d $fp, $sp, 384 # 8-byte Folded Reload bne $t2, $fp, .LBB2_286 b .LBB2_288 .p2align 4, , 16 @@ -3325,7 +3318,7 @@ H__align: # @H__align bnez $ra, .LBB2_279 # %bb.280: # %middle.block1092 # in Loop: Header=BB2_260 Depth=1 - ld.d $ra, $sp, 368 # 8-byte Folded Reload + ld.d $ra, $sp, 376 # 8-byte Folded Reload bne $s5, $s3, .LBB2_282 b .LBB2_284 .p2align 4, , 16 @@ -3359,14 +3352,14 @@ H__align: # @H__align .LBB2_285: # in Loop: Header=BB2_260 Depth=1 move $t8, $s8 move $t7, $s1 - ld.d $fp, $sp, 376 # 8-byte Folded Reload + ld.d $fp, $sp, 384 # 8-byte Folded Reload beq $t2, $fp, .LBB2_288 .LBB2_286: # %._crit_edge30.i # in Loop: Header=BB2_260 Depth=1 addi.w $fp, $s4, 0 beq $t3, $fp, .LBB2_288 # %bb.287: # in Loop: Header=BB2_260 Depth=1 - ld.d $fp, $sp, 152 # 8-byte Folded Reload + ld.d $fp, $sp, 160 # 8-byte Folded Reload ld.d $fp, $fp, %pc_lo12(impmtx) ldx.d $t5, $fp, $t5 fldx.s $fa3, $t5, $t6 @@ -3387,15 +3380,15 @@ H__align: # @H__align move $s6, $t1 bge $s5, $t0, .LBB2_260 .LBB2_291: # %._crit_edge42.i - ld.d $s5, $sp, 120 # 8-byte Folded Reload - ld.d $s6, $sp, 88 # 8-byte Folded Reload + ld.d $s5, $sp, 128 # 8-byte Folded Reload + ld.d $s6, $sp, 96 # 8-byte Folded Reload bgtz $s5, .LBB2_293 b .LBB2_295 .LBB2_292: move $s8, $t8 move $s1, $t7 - ld.d $s5, $sp, 120 # 8-byte Folded Reload - ld.d $s6, $sp, 88 # 8-byte Folded Reload + ld.d $s5, $sp, 128 # 8-byte Folded Reload + ld.d $s6, $sp, 96 # 8-byte Folded Reload blez $s5, .LBB2_295 .LBB2_293: # %.lr.ph50.preheader.i move $fp, $s6 @@ -3413,9 +3406,9 @@ H__align: # @H__align addi.d $s2, $s2, 8 bnez $s0, .LBB2_294 .LBB2_295: # %.preheader.i585 - ld.d $s2, $sp, 104 # 8-byte Folded Reload - ld.d $s3, $sp, 96 # 8-byte Folded Reload - ld.d $s4, $sp, 336 # 8-byte Folded Reload + ld.d $s2, $sp, 112 # 8-byte Folded Reload + ld.d $s3, $sp, 104 # 8-byte Folded Reload + ld.d $s4, $sp, 344 # 8-byte Folded Reload blez $s2, .LBB2_298 # %bb.296: # %.lr.ph52.preheader.i move $fp, $s3 @@ -3433,21 +3426,21 @@ H__align: # @H__align addi.d $s7, $s7, 8 bnez $s0, .LBB2_297 .LBB2_298: # %Atracking_localhom.exit - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(H__align.mseq1) ld.d $a0, $a0, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 addi.w $a3, $a0, 0 lu12i.w $a4, 1220 - ld.d $s1, $sp, 176 # 8-byte Folded Reload - ld.d $a2, $sp, 8 # 8-byte Folded Reload + ld.d $s1, $sp, 184 # 8-byte Folded Reload + ld.d $a2, $sp, 16 # 8-byte Folded Reload blt $a2, $a3, .LBB2_300 # %bb.299: # %Atracking_localhom.exit ori $a0, $a4, 2881 @@ -3468,7 +3461,7 @@ H__align: # @H__align .LBB2_301: blez $s5, .LBB2_304 # %bb.302: # %.lr.ph717 - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(H__align.mseq1) .p2align 4, , 16 .LBB2_303: # =>This Inner Loop Header: Depth=1 @@ -3483,7 +3476,7 @@ H__align: # @H__align .LBB2_304: # %.preheader644 blez $s2, .LBB2_307 # %bb.305: # %.lr.ph719 - ld.d $a0, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 48 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(H__align.mseq2) .p2align 4, , 16 .LBB2_306: # =>This Inner Loop Header: Depth=1 @@ -3505,12 +3498,12 @@ H__align: # @H__align addi.d $a1, $a1, %pc_lo12(.L.str.2) pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload bltz $a0, .LBB2_310 # %bb.308: # %.lr.ph724 - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 56 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(H__align.gappat1) - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload addi.d $a0, $a0, 1 bstrpick.d $s0, $a0, 31, 0 .p2align 4, , 16 @@ -3525,7 +3518,7 @@ H__align: # @H__align .LBB2_310: # %.preheader bltz $s4, .LBB2_313 # %bb.311: # %.lr.ph727 - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(H__align.gappat2) addi.d $a0, $s1, 1 bstrpick.d $s0, $a0, 31, 0 @@ -3540,21 +3533,22 @@ H__align: # @H__align bnez $s0, .LBB2_312 .LBB2_313: # %._crit_edge728 fmov.s $fa0, $fs0 - fld.d $fs2, $sp, 384 # 8-byte Folded Reload - fld.d $fs1, $sp, 392 # 8-byte Folded Reload - fld.d $fs0, $sp, 400 # 8-byte Folded Reload - ld.d $s8, $sp, 408 # 8-byte Folded Reload - ld.d $s7, $sp, 416 # 8-byte Folded Reload - ld.d $s6, $sp, 424 # 8-byte Folded Reload - ld.d $s5, $sp, 432 # 8-byte Folded Reload - ld.d $s4, $sp, 440 # 8-byte Folded Reload - ld.d $s3, $sp, 448 # 8-byte Folded Reload - ld.d $s2, $sp, 456 # 8-byte Folded Reload - ld.d $s1, $sp, 464 # 8-byte Folded Reload - ld.d $s0, $sp, 472 # 8-byte Folded Reload - ld.d $fp, $sp, 480 # 8-byte Folded Reload - ld.d $ra, $sp, 488 # 8-byte Folded Reload - addi.d $sp, $sp, 496 + fld.d $fs3, $sp, 392 # 8-byte Folded Reload + fld.d $fs2, $sp, 400 # 8-byte Folded Reload + fld.d $fs1, $sp, 408 # 8-byte Folded Reload + fld.d $fs0, $sp, 416 # 8-byte Folded Reload + ld.d $s8, $sp, 424 # 8-byte Folded Reload + ld.d $s7, $sp, 432 # 8-byte Folded Reload + ld.d $s6, $sp, 440 # 8-byte Folded Reload + ld.d $s5, $sp, 448 # 8-byte Folded Reload + ld.d $s4, $sp, 456 # 8-byte Folded Reload + ld.d $s3, $sp, 464 # 8-byte Folded Reload + ld.d $s2, $sp, 472 # 8-byte Folded Reload + ld.d $s1, $sp, 480 # 8-byte Folded Reload + ld.d $s0, $sp, 488 # 8-byte Folded Reload + ld.d $fp, $sp, 496 # 8-byte Folded Reload + ld.d $ra, $sp, 504 # 8-byte Folded Reload + addi.d $sp, $sp, 512 ret .LBB2_314: # %vector.ph bstrpick.d $a0, $s7, 31, 3 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalign11.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalign11.s index 659266aa..17ce6f0c 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalign11.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalign11.s @@ -1,14 +1,6 @@ .file "Lalign11.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function L__align11 -.LCPI0_0: - .dword 0x3ff4cccccccccccd # double 1.3 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI0_1: - .word 0xce6e6b28 # float -1.0E+9 .text - .globl L__align11 + .globl L__align11 # -- Begin function L__align11 .p2align 5 .type L__align11,@function L__align11: # @L__align11 @@ -91,8 +83,8 @@ L__align11: # @L__align11 st.d $a0, $sp, 112 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(L__align11.lastverticalw) st.d $a0, $sp, 80 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(L__align11.m) - pcalau12i $s5, %pc_hi20(L__align11.mp) + pcalau12i $s5, %pc_hi20(L__align11.m) + pcalau12i $a1, %pc_hi20(L__align11.mp) pcalau12i $s1, %pc_hi20(L__align11.mseq) st.d $s6, $sp, 48 # 8-byte Folded Spill st.d $s4, $sp, 16 # 8-byte Folded Spill @@ -131,11 +123,11 @@ L__align11: # @L__align11 ld.d $a0, $a0, %pc_lo12(L__align11.lastverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $s6, %pc_lo12(L__align11.m) + ld.d $a0, $sp, 8 # 8-byte Folded Reload + ld.d $a0, $a0, %pc_lo12(L__align11.m) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 8 # 8-byte Folded Reload - ld.d $a0, $a0, %pc_lo12(L__align11.mp) + ld.d $a0, $s6, %pc_lo12(L__align11.mp) pcaddu18i $ra, %call36(FreeIntVec) jirl $ra, $ra, 0 ld.d $a0, $s1, %pc_lo12(L__align11.mseq) @@ -149,21 +141,24 @@ L__align11: # @L__align11 ld.d $s4, $sp, 104 # 8-byte Folded Reload move $s6, $a1 .LBB0_9: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) - movgr2fr.w $fa1, $s4 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + movgr2fr.w $fa0, $s4 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, 314572 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 slt $a1, $a0, $s7 masknez $a0, $a0, $a1 maskeqz $a1, $s7, $a1 or $s7, $a1, $a0 ld.d $a0, $sp, 136 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 slt $a1, $a0, $s8 @@ -201,12 +196,12 @@ L__align11: # @L__align11 move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 + ld.d $s5, $sp, 8 # 8-byte Folded Reload st.d $a0, $s5, %pc_lo12(L__align11.m) move $a0, $s4 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 - ld.d $s5, $sp, 8 # 8-byte Folded Reload - st.d $a0, $s5, %pc_lo12(L__align11.mp) + st.d $a0, $s6, %pc_lo12(L__align11.mp) pcalau12i $a0, %got_pc_hi20(njob) ld.d $a0, $a0, %got_pc_lo12(njob) ld.w $a0, $a0, 0 @@ -350,8 +345,8 @@ L__align11: # @L__align11 # %bb.23: # %match_calc.exit184 blez $s2, .LBB0_32 # %bb.24: # %.lr.ph - ld.d $a4, $s6, %pc_lo12(L__align11.m) - ld.d $a5, $s5, %pc_lo12(L__align11.mp) + ld.d $a4, $s5, %pc_lo12(L__align11.m) + ld.d $a5, $s6, %pc_lo12(L__align11.mp) ld.d $a0, $sp, 136 # 8-byte Folded Reload addi.d $a6, $a0, 1 bstrpick.d $a6, $a6, 31, 0 @@ -415,15 +410,17 @@ L__align11: # @L__align11 slli.d $a5, $a0, 32 ori $a6, $zero, 0 lu32i.d $a6, -1 - add.d $a6, $a5, $a6 - srai.d $a5, $a6, 30 + add.d $a7, $a5, $a6 + srai.d $a5, $a7, 30 fldx.s $fa0, $t0, $a5 ld.d $a3, $sp, 80 # 8-byte Folded Reload ld.d $a5, $a3, %pc_lo12(L__align11.lastverticalw) fst.s $fa0, $a5, 0 - addi.w $a7, $t8, 1 - add.w $s7, $a7, $a0 - pcalau12i $t1, %pc_hi20(.LCPI0_1) + addi.w $t1, $t8, 1 + add.w $s7, $t1, $a0 + lu12i.w $a6, -203034 + ori $a6, $a6, 2856 + lu32i.d $a6, 0 blez $t7, .LBB0_47 # %bb.33: # %.lr.ph224 move $s3, $zero @@ -438,15 +435,15 @@ L__align11: # @L__align11 ffint.s.w $fa0, $fa0 ffint.s.w $fa1, $fa1 ffint.s.w $fa2, $fa2 - ld.d $t2, $s6, %pc_lo12(L__align11.m) - srai.d $a6, $a6, 32 - ld.d $t3, $s5, %pc_lo12(L__align11.mp) - bstrpick.d $a7, $a7, 31, 0 + ld.d $t2, $s5, %pc_lo12(L__align11.m) + ld.d $t3, $s6, %pc_lo12(L__align11.mp) + srai.d $t4, $a7, 32 + bstrpick.d $a7, $t1, 31, 0 addi.d $a0, $t2, 4 - fld.s $fs0, $t1, %pc_lo12(.LCPI0_1) addi.d $t1, $t3, 4 ori $t2, $zero, 1 - slli.d $t3, $a6, 2 + movgr2fr.w $fs0, $a6 + slli.d $t3, $t4, 2 b .LBB0_35 .p2align 4, , 16 .LBB0_34: # %._crit_edge210 @@ -579,9 +576,9 @@ L__align11: # @L__align11 fmov.s $fa6, $fa0 b .LBB0_40 .LBB0_47: - fld.s $fs0, $t1, %pc_lo12(.LCPI0_1) move $s8, $zero move $s3, $zero + movgr2fr.w $fs0, $a6 .LBB0_48: # %._crit_edge225 addi.w $a2, $s8, 0 slli.d $a0, $a2, 3 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalignmm.s index 5ef6907a..ae91143d 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Lalignmm.s @@ -1,10 +1,6 @@ .file "Lalignmm.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function Lalignmm_hmout -.LCPI0_0: - .word 0xcb189680 # float -1.0E+7 .text - .globl Lalignmm_hmout + .globl Lalignmm_hmout # -- Begin function Lalignmm_hmout .p2align 5 .type Lalignmm_hmout,@function Lalignmm_hmout: # @Lalignmm_hmout @@ -1198,10 +1194,12 @@ Lalignmm_hmout: # @Lalignmm_hmout add.d $s6, $t5, $a0 ld.d $a1, $sp, 152 # 8-byte Folded Reload add.d $fp, $a1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI0_0) addi.d $a0, $t7, -2 st.d $a0, $sp, 136 # 8-byte Folded Spill + lu12i.w $a0, -216695 + ori $a0, $a0, 1664 + lu32i.d $a0, 0 + movgr2fr.w $fs0, $a0 addi.d $a0, $t4, 8 st.d $a0, $sp, 128 # 8-byte Folded Spill movgr2fr.w $fa3, $zero @@ -2289,12 +2287,7 @@ Lalignmm_hmout: # @Lalignmm_hmout .Lfunc_end0: .size Lalignmm_hmout, .Lfunc_end0-Lalignmm_hmout # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function Lalign2m2m_hmout -.LCPI1_0: - .word 0xcb189680 # float -1.0E+7 - .text - .globl Lalign2m2m_hmout + .globl Lalign2m2m_hmout # -- Begin function Lalign2m2m_hmout .p2align 5 .type Lalign2m2m_hmout,@function Lalign2m2m_hmout: # @Lalign2m2m_hmout @@ -3507,10 +3500,12 @@ Lalign2m2m_hmout: # @Lalign2m2m_hmout add.d $s6, $t5, $a0 ld.d $a1, $sp, 152 # 8-byte Folded Reload add.d $fp, $a1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI1_0) addi.d $a0, $t7, -2 st.d $a0, $sp, 136 # 8-byte Folded Spill + lu12i.w $a0, -216695 + ori $a0, $a0, 1664 + lu32i.d $a0, 0 + movgr2fr.w $fs0, $a0 addi.d $a0, $t4, 8 st.d $a0, $sp, 128 # 8-byte Folded Spill movgr2fr.w $fa3, $zero diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalign11.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalign11.s index 3a0c68bc..89b83117 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalign11.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalign11.s @@ -639,20 +639,12 @@ extendmseq: # @extendmseq .Lfunc_end1: .size extendmseq, .Lfunc_end1-extendmseq # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function MSalign11 -.LCPI2_0: - .dword 0x3ff4cccccccccccd # double 1.3 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI2_1: - .word 0xc479f99a # float -999.900024 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI2_2: + .p2align 4, 0x0 # -- Begin function MSalign11 +.LCPI2_0: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI2_3: +.LCPI2_1: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -709,7 +701,7 @@ MSalign11: # @MSalign11 ld.w $s4, $a1, %pc_lo12(MSalign11.orlgth1) st.d $a0, $s2, %pc_lo12(MSalign11.mseq2) .LBB2_2: - st.d $s2, $sp, 120 # 8-byte Folded Spill + st.d $s2, $sp, 128 # 8-byte Folded Spill ld.d $a0, $s0, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 @@ -728,7 +720,7 @@ MSalign11: # @MSalign11 pcalau12i $a0, %pc_hi20(MSalign11.w2) st.d $a0, $sp, 144 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(MSalign11.initverticalw) - st.d $a0, $sp, 272 # 8-byte Folded Spill + st.d $a0, $sp, 264 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(MSalign11.lastverticalw) st.d $a0, $sp, 256 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(MSalign11.m) @@ -736,27 +728,27 @@ MSalign11: # @MSalign11 pcalau12i $a0, %pc_hi20(MSalign11.mp) st.d $a0, $sp, 192 # 8-byte Folded Spill pcalau12i $s6, %pc_hi20(MSalign11.mseq) - st.d $s6, $sp, 112 # 8-byte Folded Spill + st.d $s6, $sp, 120 # 8-byte Folded Spill st.d $s1, $sp, 136 # 8-byte Folded Spill st.d $s5, $sp, 96 # 8-byte Folded Spill + st.d $s8, $sp, 248 # 8-byte Folded Spill blt $s4, $s1, .LBB2_5 # %bb.3: blt $s7, $s3, .LBB2_5 # %bb.4: move $t6, $zero - st.d $zero, $sp, 128 # 8-byte Folded Spill + move $s8, $zero b .LBB2_9 .LBB2_5: - st.d $s2, $sp, 264 # 8-byte Folded Spill + st.d $s2, $sp, 240 # 8-byte Folded Spill pcalau12i $s2, %pc_hi20(MSalign11.match) pcalau12i $a0, %pc_hi20(MSalign11.cpmx1) - st.d $a0, $sp, 232 # 8-byte Folded Spill + st.d $a0, $sp, 208 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(MSalign11.cpmx2) - st.d $a0, $sp, 240 # 8-byte Folded Spill + st.d $a0, $sp, 224 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(MSalign11.floatwork) - st.d $a0, $sp, 248 # 8-byte Folded Spill + st.d $a0, $sp, 232 # 8-byte Folded Spill pcalau12i $s1, %pc_hi20(MSalign11.intwork) - st.d $s8, $sp, 224 # 8-byte Folded Spill blez $s4, .LBB2_8 # %bb.6: blez $s7, .LBB2_8 @@ -772,7 +764,7 @@ MSalign11: # @MSalign11 ld.d $a0, $s2, %pc_lo12(MSalign11.match) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(MSalign11.initverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -797,15 +789,15 @@ MSalign11: # @MSalign11 ld.d $a0, $s6, %pc_lo12(MSalign11.mseq) pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 232 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(MSalign11.cpmx1) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(MSalign11.cpmx2) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(MSalign11.floatwork) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 @@ -814,26 +806,29 @@ MSalign11: # @MSalign11 jirl $ra, $ra, 0 ld.d $a0, $sp, 280 # 8-byte Folded Reload ld.w $s4, $a0, %pc_lo12(MSalign11.orlgth1) - ld.d $a0, $sp, 264 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload ld.w $s7, $a0, %pc_lo12(MSalign11.orlgth2) .LBB2_8: - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI2_0) ld.d $a0, $sp, 168 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, 314572 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 slt $a1, $a0, $s4 masknez $a0, $a0, $a1 maskeqz $a1, $s4, $a1 or $s4, $a1, $a0 addi.w $s6, $s4, 100 ld.d $a0, $sp, 216 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 slt $a1, $a0, $s7 @@ -841,7 +836,7 @@ MSalign11: # @MSalign11 maskeqz $a1, $s7, $a1 or $s7, $a1, $a0 addi.w $s5, $s7, 100 - st.d $s1, $sp, 208 # 8-byte Folded Spill + st.d $s1, $sp, 184 # 8-byte Folded Spill addi.w $s1, $s7, 102 move $a0, $s1 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -861,7 +856,7 @@ MSalign11: # @MSalign11 move $a0, $s2 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 272 # 8-byte Folded Reload + ld.d $a1, $sp, 264 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(MSalign11.initverticalw) move $a0, $s2 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -871,7 +866,7 @@ MSalign11: # @MSalign11 move $a0, $s2 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - move $s8, $a0 + st.d $a0, $sp, 272 # 8-byte Folded Spill move $a0, $s1 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 @@ -885,51 +880,50 @@ MSalign11: # @MSalign11 move $a0, $s1 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - st.d $a0, $sp, 128 # 8-byte Folded Spill + move $s8, $a0 pcalau12i $a0, %got_pc_hi20(njob) ld.d $a0, $a0, %got_pc_lo12(njob) ld.w $a0, $a0, 0 add.w $a1, $s5, $s6 pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(MSalign11.mseq) ori $a0, $zero, 26 move $a1, $s2 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 232 # 8-byte Folded Reload + ld.d $a1, $sp, 208 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(MSalign11.cpmx1) ori $a0, $zero, 26 move $a1, $s1 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 240 # 8-byte Folded Reload + ld.d $a1, $sp, 224 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(MSalign11.cpmx2) slt $a0, $s5, $s6 masknez $a1, $s5, $a0 maskeqz $a0, $s6, $a0 - ld.d $s6, $sp, 112 # 8-byte Folded Reload + ld.d $s6, $sp, 120 # 8-byte Folded Reload or $a0, $a0, $a1 addi.w $s1, $a0, 2 ori $a0, $zero, 26 move $a1, $s1 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 248 # 8-byte Folded Reload + ld.d $a1, $sp, 232 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(MSalign11.floatwork) ori $a0, $zero, 26 move $a1, $s1 pcaddu18i $ra, %call36(AllocateIntMtx) jirl $ra, $ra, 0 - move $t6, $s8 - ld.d $a1, $sp, 208 # 8-byte Folded Reload + ld.d $t6, $sp, 272 # 8-byte Folded Reload + ld.d $a1, $sp, 184 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(MSalign11.intwork) ld.d $a0, $sp, 280 # 8-byte Folded Reload st.w $s4, $a0, %pc_lo12(MSalign11.orlgth1) - ld.d $s2, $sp, 264 # 8-byte Folded Reload + ld.d $s2, $sp, 240 # 8-byte Folded Reload st.w $s7, $s2, %pc_lo12(MSalign11.orlgth2) - ld.d $s8, $sp, 224 # 8-byte Folded Reload .LBB2_9: ld.d $a0, $s6, %pc_lo12(MSalign11.mseq) ld.d $a1, $a0, 0 @@ -937,7 +931,7 @@ MSalign11: # @MSalign11 ld.d $a2, $a2, %pc_lo12(MSalign11.mseq1) st.d $a1, $a2, 0 ld.d $a0, $a0, 8 - ld.d $a1, $sp, 120 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(MSalign11.mseq2) st.d $a0, $a1, 0 pcalau12i $a0, %got_pc_hi20(commonAlloc1) @@ -948,7 +942,7 @@ MSalign11: # @MSalign11 ld.w $a1, $s6, 0 pcalau12i $a2, %pc_hi20(MSalign11.WMMTX) st.d $a2, $sp, 208 # 8-byte Folded Spill - st.d $t6, $sp, 264 # 8-byte Folded Spill + st.d $t6, $sp, 272 # 8-byte Folded Spill blt $a0, $s4, .LBB2_11 # %bb.10: bge $a1, $s7, .LBB2_15 @@ -997,7 +991,7 @@ MSalign11: # @MSalign11 st.d $a0, $a1, %pc_lo12(MSalign11.WMMTX) st.w $s4, $s5, 0 st.w $s7, $s6, 0 - ld.d $t6, $sp, 264 # 8-byte Folded Reload + ld.d $t6, $sp, 272 # 8-byte Folded Reload .LBB2_15: pcalau12i $a0, %got_pc_hi20(commonIP) ld.d $a0, $a0, %got_pc_lo12(commonIP) @@ -1006,7 +1000,7 @@ MSalign11: # @MSalign11 ld.d $s2, $a0, %pc_lo12(MSalign11.w1) ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(MSalign11.w2) - ld.d $a1, $sp, 272 # 8-byte Folded Reload + ld.d $a1, $sp, 264 # 8-byte Folded Reload ld.d $a2, $a1, %pc_lo12(MSalign11.initverticalw) ld.d $a1, $fp, 0 ld.d $a3, $s0, 0 @@ -1038,8 +1032,8 @@ MSalign11: # @MSalign11 move $a6, $t0 bnez $a5, .LBB2_17 .LBB2_18: # %match_calc.exit - movgr2fr.w $fa0, $s8 - ld.d $t7, $sp, 128 # 8-byte Folded Reload + ld.d $a4, $sp, 248 # 8-byte Folded Reload + movgr2fr.w $fa0, $a4 beqz $s3, .LBB2_21 # %bb.19: # %.lr.ph.i309 ld.b $a3, $a3, 0 @@ -1069,7 +1063,7 @@ MSalign11: # @MSalign11 ld.d $a4, $a3, 0 ffint.s.w $fs1, $fa0 fst.s $fa1, $a4, 0 - fst.s $fa1, $t7, 0 + fst.s $fa1, $s8, 0 ld.d $a1, $sp, 168 # 8-byte Folded Reload addi.d $a1, $a1, 1 blez $t5, .LBB2_26 @@ -1092,12 +1086,12 @@ MSalign11: # @MSalign11 fadd.s $fa0, $fa0, $fs1 fst.s $fa0, $a2, 0 fst.s $fa0, $a6, 0 - fld.s $fa1, $t7, 0 + fld.s $fa1, $s8, 0 fld.s $fa0, $a2, 0 fcmp.cule.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB2_23 # %bb.25: # in Loop: Header=BB2_24 Depth=1 - fst.s $fa0, $t7, 0 + fst.s $fa0, $s8, 0 b .LBB2_23 .LBB2_26: # %._crit_edge fld.s $fa0, $s2, 0 @@ -1198,17 +1192,20 @@ MSalign11: # @MSalign11 ld.d $a3, $sp, 256 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(MSalign11.lastverticalw) fst.s $fa0, $a3, 0 - pcalau12i $a3, %pc_hi20(.LCPI2_1) + lu12i.w $a3, -243809 + ori $a3, $a3, 2458 + lu32i.d $a3, 0 + st.d $s8, $sp, 112 # 8-byte Folded Spill blez $t5, .LBB2_58 # %bb.39: # %.lr.ph356 st.d $a3, $sp, 88 # 8-byte Folded Spill bstrpick.d $a1, $a1, 31, 0 st.d $a1, $sp, 240 # 8-byte Folded Spill - bstrpick.d $s8, $a2, 31, 0 - addi.d $a1, $t7, 4 + bstrpick.d $s1, $a2, 31, 0 + addi.d $a1, $s8, 4 st.d $a1, $sp, 176 # 8-byte Folded Spill movgr2fr.w $fs0, $zero - ori $s1, $zero, 1 + ori $s8, $zero, 1 pcalau12i $s4, %pc_hi20(MSalign11.mi) pcalau12i $s7, %pc_hi20(MSalign11.mpi) pcalau12i $a1, %got_pc_hi20(stderr) @@ -1225,19 +1222,19 @@ MSalign11: # @MSalign11 fldx.s $fa0, $s5, $a0 ld.d $a0, $sp, 256 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(MSalign11.lastverticalw) - addi.d $s1, $s1, 1 + addi.d $s8, $s8, 1 ld.d $a0, $sp, 280 # 8-byte Folded Reload fstx.s $fa0, $a1, $a0 move $a0, $s2 move $s2, $s5 ld.d $a2, $sp, 240 # 8-byte Folded Reload - beq $s1, $a2, .LBB2_59 + beq $s8, $a2, .LBB2_59 .LBB2_41: # =>This Loop Header: Depth=1 # Child Loop BB2_43 Depth 2 # Child Loop BB2_47 Depth 2 - ld.d $a1, $sp, 272 # 8-byte Folded Reload + ld.d $a1, $sp, 264 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(MSalign11.initverticalw) - addi.d $s6, $s1, -1 + addi.d $s6, $s8, -1 slli.d $a2, $s6, 2 fldx.s $fa0, $a1, $a2 move $s5, $a0 @@ -1246,7 +1243,7 @@ MSalign11: # @MSalign11 # %bb.42: # %.lr.ph.i316 # in Loop: Header=BB2_41 Depth=1 ld.d $a0, $s0, 0 - ldx.b $a0, $a0, $s1 + ldx.b $a0, $a0, $s8 ld.d $a2, $fp, 0 slli.d $a0, $a0, 9 pcalau12i $a3, %got_pc_hi20(amino_dis) @@ -1270,7 +1267,7 @@ MSalign11: # @MSalign11 bnez $a3, .LBB2_43 .LBB2_44: # %match_calc.exit321 # in Loop: Header=BB2_41 Depth=1 - slli.d $a0, $s1, 2 + slli.d $a0, $s8, 2 fldx.s $fa0, $a1, $a0 fst.s $fa0, $s5, 0 fld.s $fa1, $s2, 0 @@ -1288,17 +1285,17 @@ MSalign11: # @MSalign11 ld.d $a1, $sp, 224 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 - ld.d $t6, $sp, 264 # 8-byte Folded Reload + ld.d $t6, $sp, 272 # 8-byte Folded Reload blez $s3, .LBB2_40 # %bb.45: # %.lr.ph348 # in Loop: Header=BB2_41 Depth=1 move $a0, $zero ld.d $a1, $sp, 184 # 8-byte Folded Reload ld.d $a2, $a1, %pc_lo12(MSalign11.ijp) - alsl.d $a1, $s1, $t6, 2 + alsl.d $a1, $s8, $t6, 2 ld.d $a3, $sp, 208 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(MSalign11.WMMTX) - slli.d $a4, $s1, 3 + slli.d $a4, $s8, 3 ldx.d $a5, $a2, $a4 ld.d $a2, $sp, 200 # 8-byte Folded Reload ld.d $a6, $a2, %pc_lo12(MSalign11.m) @@ -1330,7 +1327,7 @@ MSalign11: # @MSalign11 addi.d $a5, $a5, 4 addi.d $a6, $a6, 4 addi.d $a7, $a7, 4 - beq $s8, $t0, .LBB2_40 + beq $s1, $t0, .LBB2_40 .LBB2_47: # Parent Loop BB2_41 Depth=1 # => This Inner Loop Header: Depth=2 fld.s $fa1, $t3, 0 @@ -1355,7 +1352,7 @@ MSalign11: # @MSalign11 bcnez $fcc0, .LBB2_51 # %bb.50: # in Loop: Header=BB2_47 Depth=2 ld.w $t4, $a6, 0 - sub.d $t4, $s1, $t4 + sub.d $t4, $s8, $t4 st.w $t4, $a3, 0 fmov.s $fs0, $fa2 .LBB2_51: # in Loop: Header=BB2_47 Depth=2 @@ -1386,22 +1383,22 @@ MSalign11: # @MSalign11 fst.s $fa1, $t2, 0 b .LBB2_46 .LBB2_58: - fld.s $fs2, $a3, %pc_lo12(.LCPI2_1) move $s1, $zero move $s8, $zero movgr2fr.w $fs0, $zero + movgr2fr.w $fs2, $a3 ld.d $a0, $sp, 168 # 8-byte Folded Reload addi.w $s6, $a0, -1 bgtz $s3, .LBB2_62 b .LBB2_64 .LBB2_59: # %.lr.ph363 - ld.d $a0, $sp, 88 # 8-byte Folded Reload - fld.s $fs2, $a0, %pc_lo12(.LCPI2_1) move $a0, $zero move $s8, $zero move $s1, $zero ld.d $a2, $sp, 168 # 8-byte Folded Reload bstrpick.d $a2, $a2, 30, 0 + ld.d $a3, $sp, 88 # 8-byte Folded Reload + movgr2fr.w $fs2, $a3 ld.d $a6, $sp, 104 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_60: # =>This Inner Loop Header: Depth=1 @@ -1456,7 +1453,7 @@ MSalign11: # @MSalign11 addi.d $a0, $a0, %pc_lo12(.L.str.4) st.d $a0, $sp, 280 # 8-byte Folded Spill move $s4, $zero - ld.d $s2, $sp, 264 # 8-byte Folded Reload + ld.d $s2, $sp, 272 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_66: # %.lr.ph376 # =>This Inner Loop Header: Depth=1 @@ -1483,7 +1480,7 @@ MSalign11: # @MSalign11 addi.d $a0, $a0, %pc_lo12(.L.str.5) st.d $a0, $sp, 280 # 8-byte Folded Spill move $s4, $zero - ld.d $s2, $sp, 128 # 8-byte Folded Reload + ld.d $s2, $sp, 112 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_69: # %.lr.ph378 # =>This Inner Loop Header: Depth=1 @@ -1537,7 +1534,7 @@ MSalign11: # @MSalign11 add.d $a2, $a0, $s5 st.d $a2, $a1, 0 stx.b $zero, $a0, $s5 - ld.d $s7, $sp, 120 # 8-byte Folded Reload + ld.d $s7, $sp, 128 # 8-byte Folded Reload ld.d $a2, $s7, %pc_lo12(MSalign11.mseq2) ld.d $a0, $a2, 0 add.d $a3, $a0, $s5 @@ -1549,7 +1546,7 @@ MSalign11: # @MSalign11 ld.d $a6, $a3, %pc_lo12(MSalign11.w1) ld.d $a3, $sp, 144 # 8-byte Folded Reload ld.d $a7, $a3, %pc_lo12(MSalign11.w2) - ld.d $a3, $sp, 272 # 8-byte Folded Reload + ld.d $a3, $sp, 264 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(MSalign11.initverticalw) ld.d $a4, $sp, 200 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(MSalign11.m) @@ -1565,8 +1562,8 @@ MSalign11: # @MSalign11 st.d $a4, $sp, 8 st.d $a3, $sp, 0 fmov.s $fa0, $fs1 - ld.d $a1, $sp, 264 # 8-byte Folded Reload - ld.d $a2, $sp, 128 # 8-byte Folded Reload + ld.d $a1, $sp, 272 # 8-byte Folded Reload + ld.d $a2, $sp, 112 # 8-byte Folded Reload ld.d $a3, $sp, 136 # 8-byte Folded Reload move $a4, $s3 pcaddu18i $ra, %call36(backdp) @@ -1593,7 +1590,7 @@ MSalign11: # @MSalign11 move $a1, $s1 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(MSalign11.mseq) ld.d $a1, $a0, 0 ld.d $s4, $s4, %pc_lo12(MSalign11.mseq1) @@ -1630,8 +1627,8 @@ MSalign11: # @MSalign11 move $a2, $zero b .LBB2_76 .LBB2_73: # %vector.ph460 - pcalau12i $a3, %pc_hi20(.LCPI2_2) - vld $vr0, $a3, %pc_lo12(.LCPI2_2) + pcalau12i $a3, %pc_hi20(.LCPI2_0) + vld $vr0, $a3, %pc_lo12(.LCPI2_0) bstrpick.d $a2, $a2, 31, 2 slli.d $a2, $a2, 2 addi.d $a3, $s5, 16 @@ -1686,8 +1683,8 @@ MSalign11: # @MSalign11 .LBB2_81: # %vector.ph473 bstrpick.d $a3, $a3, 31, 3 slli.d $a3, $a3, 3 - pcalau12i $a4, %pc_hi20(.LCPI2_3) - vld $vr0, $a4, %pc_lo12(.LCPI2_3) + pcalau12i $a4, %pc_hi20(.LCPI2_1) + vld $vr0, $a4, %pc_lo12(.LCPI2_1) addi.d $a4, $a1, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalignmm.s index 4cc52c43..27869491 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/MSalignmm.s @@ -737,16 +737,12 @@ MSalignmm: # @MSalignmm .Lfunc_end0: .size MSalignmm, .Lfunc_end0-MSalignmm # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function MSalignmm_rec -.LCPI1_0: - .word 0xcb189680 # float -1.0E+7 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI1_1: + .p2align 4, 0x0 # -- Begin function MSalignmm_rec +.LCPI1_0: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI1_2: +.LCPI1_1: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -1649,8 +1645,8 @@ MSalignmm_rec: # @MSalignmm_rec bne $a2, $a3, .LBB1_34 b .LBB1_36 .LBB1_73: # %vector.ph310 - pcalau12i $a2, %pc_hi20(.LCPI1_1) - vld $vr0, $a2, %pc_lo12(.LCPI1_1) + pcalau12i $a2, %pc_hi20(.LCPI1_0) + vld $vr0, $a2, %pc_lo12(.LCPI1_0) bstrpick.d $a2, $a0, 31, 2 slli.d $a2, $a2, 2 ld.d $a3, $sp, 496 # 8-byte Folded Reload @@ -1705,8 +1701,8 @@ MSalignmm_rec: # @MSalignmm_rec .LBB1_80: # %vector.ph323 bstrpick.d $a3, $a2, 31, 3 slli.d $a3, $a3, 3 - pcalau12i $a4, %pc_hi20(.LCPI1_2) - vld $vr0, $a4, %pc_lo12(.LCPI1_2) + pcalau12i $a4, %pc_hi20(.LCPI1_1) + vld $vr0, $a4, %pc_lo12(.LCPI1_1) addi.d $a4, $a0, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 @@ -2508,13 +2504,15 @@ MSalignmm_rec: # @MSalignmm_rec alsl.d $a0, $a0, $a4, 2 addi.d $fp, $a0, 4 add.d $a0, $a1, $t3 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.s $fs0, $a1, %pc_lo12(.LCPI1_0) addi.d $s8, $a0, 4 addi.d $a0, $a3, 8 st.d $a0, $sp, 272 # 8-byte Folded Spill addi.d $a0, $s0, -2 st.d $a0, $sp, 264 # 8-byte Folded Spill + lu12i.w $a0, -216695 + ori $a0, $a0, 1664 + lu32i.d $a0, 0 + movgr2fr.w $fs0, $a0 b .LBB1_160 .p2align 4, , 16 .LBB1_158: # in Loop: Header=BB1_160 Depth=1 @@ -2529,7 +2527,9 @@ MSalignmm_rec: # @MSalignmm_rec maskeqz $a0, $s3, $a0 or $a0, $a0, $a1 st.d $a0, $sp, 480 # 8-byte Folded Spill + move $ra, $t4 ld.d $s3, $sp, 408 # 8-byte Folded Reload + ld.d $t4, $sp, 512 # 8-byte Folded Reload ld.d $a0, $sp, 488 # 8-byte Folded Reload beq $s0, $a0, .LBB1_212 .LBB1_160: # =>This Loop Header: Depth=1 @@ -2550,9 +2550,9 @@ MSalignmm_rec: # @MSalignmm_rec fstx.s $fa0, $t7, $a1 ld.d $a1, $sp, 432 # 8-byte Folded Reload ld.d $a2, $sp, 440 # 8-byte Folded Reload + st.d $t7, $sp, 400 # 8-byte Folded Spill move $a3, $s0 ld.d $a4, $sp, 520 # 8-byte Folded Reload - st.d $t7, $sp, 400 # 8-byte Folded Spill ld.d $a5, $sp, 464 # 8-byte Folded Reload ld.d $a6, $sp, 456 # 8-byte Folded Reload move $a7, $zero @@ -2690,8 +2690,6 @@ MSalignmm_rec: # @MSalignmm_rec fadd.s $fa0, $fs0, $fa0 fst.s $fa0, $a0, 0 .LBB1_175: # in Loop: Header=BB1_160 Depth=1 - move $ra, $t4 - ld.d $t4, $sp, 512 # 8-byte Folded Reload bne $s3, $s4, .LBB1_159 # %bb.176: # %.lr.ph80.preheader # in Loop: Header=BB1_160 Depth=1 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Qalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Qalignmm.s index 79e1a861..ac9a91dc 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Qalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Qalignmm.s @@ -451,35 +451,27 @@ imp_match_init_strictQ: # @imp_match_init_strictQ .Lfunc_end2: .size imp_match_init_strictQ, .Lfunc_end2-imp_match_init_strictQ # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Q__align -.LCPI3_0: - .dword 0x3ff4cccccccccccd # double 1.3 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI3_1: + .p2align 4, 0x0 # -- Begin function Q__align +.LCPI3_0: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI3_3: +.LCPI3_1: .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI3_4: +.LCPI3_2: .dword 1 # 0x1 .dword 2 # 0x2 -.LCPI3_5: +.LCPI3_3: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI3_6: +.LCPI3_4: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI3_2: - .word 0x461c4000 # float 1.0E+4 .text .globl Q__align .p2align 5 @@ -499,11 +491,12 @@ Q__align: # @Q__align st.d $s7, $sp, 448 # 8-byte Folded Spill st.d $s8, $sp, 440 # 8-byte Folded Spill fst.d $fs0, $sp, 432 # 8-byte Folded Spill - st.d $a7, $sp, 48 # 8-byte Folded Spill - st.d $a6, $sp, 56 # 8-byte Folded Spill + fst.d $fs1, $sp, 424 # 8-byte Folded Spill + st.d $a7, $sp, 40 # 8-byte Folded Spill + st.d $a6, $sp, 48 # 8-byte Folded Spill move $s7, $a5 move $s3, $a4 - st.d $a3, $sp, 80 # 8-byte Folded Spill + st.d $a3, $sp, 32 # 8-byte Folded Spill move $s8, $a2 move $s5, $a1 move $s1, $a0 @@ -512,11 +505,11 @@ Q__align: # @Q__align pcalau12i $s4, %pc_hi20(Q__align.orlgth1) ld.w $s6, $s4, %pc_lo12(Q__align.orlgth1) ld.w $a0, $a0, 0 - st.d $a0, $sp, 72 # 8-byte Folded Spill + st.d $a0, $sp, 64 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.mseq1) - st.d $a0, $sp, 144 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.mseq2) - st.d $a0, $sp, 112 # 8-byte Folded Spill + st.d $a0, $sp, 104 # 8-byte Folded Spill bnez $s6, .LBB3_2 # %bb.1: pcalau12i $a0, %got_pc_hi20(njob) @@ -526,14 +519,14 @@ Q__align: # @Q__align pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 ld.w $a1, $fp, 0 - ld.d $a2, $sp, 144 # 8-byte Folded Reload + ld.d $a2, $sp, 136 # 8-byte Folded Reload st.d $a0, $a2, %pc_lo12(Q__align.mseq1) move $a0, $a1 move $a1, $zero pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 ld.w $s6, $s4, %pc_lo12(Q__align.orlgth1) - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 104 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.mseq2) .LBB3_2: ld.d $a0, $s1, 0 @@ -546,89 +539,89 @@ Q__align: # @Q__align pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 pcalau12i $a1, %pc_hi20(Q__align.orlgth2) - st.d $a1, $sp, 40 # 8-byte Folded Spill + st.d $a1, $sp, 24 # 8-byte Folded Spill ld.w $s0, $a1, %pc_lo12(Q__align.orlgth2) - st.d $a0, $sp, 208 # 8-byte Folded Spill + st.d $a0, $sp, 200 # 8-byte Folded Spill addi.w $a5, $a0, 0 pcalau12i $a0, %pc_hi20(Q__align.w1) - st.d $a0, $sp, 304 # 8-byte Folded Spill + st.d $a0, $sp, 192 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.w2) - st.d $a0, $sp, 200 # 8-byte Folded Spill + st.d $a0, $sp, 144 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.initverticalw) - st.d $a0, $sp, 328 # 8-byte Folded Spill + st.d $a0, $sp, 304 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.lastverticalw) - st.d $a0, $sp, 128 # 8-byte Folded Spill + st.d $a0, $sp, 120 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.m) - st.d $a0, $sp, 312 # 8-byte Folded Spill + st.d $a0, $sp, 296 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.mp) - st.d $a0, $sp, 184 # 8-byte Folded Spill + st.d $a0, $sp, 176 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.mseq) - st.d $a0, $sp, 336 # 8-byte Folded Spill + st.d $a0, $sp, 320 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.digf1) - st.d $a0, $sp, 392 # 8-byte Folded Spill + st.d $a0, $sp, 384 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.digf2) - st.d $a0, $sp, 376 # 8-byte Folded Spill + st.d $a0, $sp, 368 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.diaf1) - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 80 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.diaf2) - st.d $a0, $sp, 104 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.gapz1) - st.d $a0, $sp, 400 # 8-byte Folded Spill + st.d $a0, $sp, 392 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.gapz2) - st.d $a0, $sp, 384 # 8-byte Folded Spill + st.d $a0, $sp, 376 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.gapf1) - st.d $a0, $sp, 136 # 8-byte Folded Spill + st.d $a0, $sp, 96 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.gapf2) - st.d $a0, $sp, 152 # 8-byte Folded Spill + st.d $a0, $sp, 128 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.ogcp1g) - st.d $a0, $sp, 424 # 8-byte Folded Spill + st.d $a0, $sp, 416 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.ogcp2g) - st.d $a0, $sp, 408 # 8-byte Folded Spill + st.d $a0, $sp, 400 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.fgcp1g) - st.d $a0, $sp, 416 # 8-byte Folded Spill + st.d $a0, $sp, 408 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.fgcp2g) - st.d $a0, $sp, 368 # 8-byte Folded Spill + st.d $a0, $sp, 360 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.og_h_dg_n1_p) - st.d $a0, $sp, 264 # 8-byte Folded Spill + st.d $a0, $sp, 256 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.og_h_dg_n2_p) - st.d $a0, $sp, 280 # 8-byte Folded Spill + st.d $a0, $sp, 272 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.fg_h_dg_n1_p) - st.d $a0, $sp, 256 # 8-byte Folded Spill + st.d $a0, $sp, 248 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.fg_h_dg_n2_p) - st.d $a0, $sp, 272 # 8-byte Folded Spill + st.d $a0, $sp, 264 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.og_t_fg_h_dg_n1_p) - st.d $a0, $sp, 248 # 8-byte Folded Spill + st.d $a0, $sp, 240 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.og_t_fg_h_dg_n2_p) - st.d $a0, $sp, 224 # 8-byte Folded Spill + st.d $a0, $sp, 216 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.fg_t_og_h_dg_n1_p) - st.d $a0, $sp, 232 # 8-byte Folded Spill + st.d $a0, $sp, 224 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.fg_t_og_h_dg_n2_p) - st.d $a0, $sp, 192 # 8-byte Folded Spill + st.d $a0, $sp, 184 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.gapz_n1) - st.d $a0, $sp, 240 # 8-byte Folded Spill + st.d $a0, $sp, 232 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.gapz_n2) - st.d $a0, $sp, 216 # 8-byte Folded Spill + st.d $a0, $sp, 208 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.cpmx1) - st.d $a0, $sp, 352 # 8-byte Folded Spill + st.d $a0, $sp, 344 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.cpmx2) - st.d $a0, $sp, 320 # 8-byte Folded Spill + st.d $a0, $sp, 312 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align.floatwork) - st.d $a0, $sp, 296 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(Q__align.intwork) st.d $a0, $sp, 288 # 8-byte Folded Spill - st.d $s5, $sp, 168 # 8-byte Folded Spill - st.d $s3, $sp, 176 # 8-byte Folded Spill - st.d $s1, $sp, 160 # 8-byte Folded Spill - st.d $fp, $sp, 120 # 8-byte Folded Spill - st.d $a5, $sp, 360 # 8-byte Folded Spill - st.d $s4, $sp, 32 # 8-byte Folded Spill - st.d $s2, $sp, 88 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(Q__align.intwork) + st.d $a0, $sp, 280 # 8-byte Folded Spill + st.d $s5, $sp, 160 # 8-byte Folded Spill + st.d $s3, $sp, 168 # 8-byte Folded Spill + st.d $s1, $sp, 152 # 8-byte Folded Spill + st.d $fp, $sp, 112 # 8-byte Folded Spill + st.d $a5, $sp, 352 # 8-byte Folded Spill + st.d $s4, $sp, 16 # 8-byte Folded Spill + st.d $s2, $sp, 72 # 8-byte Folded Spill blt $s6, $fp, .LBB3_5 # %bb.3: blt $s0, $a5, .LBB3_5 # %bb.4: - ld.d $fp, $sp, 336 # 8-byte Folded Reload - ld.d $s2, $sp, 120 # 8-byte Folded Reload + ld.d $fp, $sp, 320 # 8-byte Folded Reload + ld.d $s2, $sp, 112 # 8-byte Folded Reload bgtz $s3, .LBB3_9 b .LBB3_11 .LBB3_5: @@ -637,161 +630,164 @@ Q__align: # @Q__align # %bb.6: blez $s0, .LBB3_8 # %bb.7: - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.w1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 200 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.w2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a0, $fp, %pc_lo12(Q__align.match) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 328 # 8-byte Folded Reload + ld.d $a0, $sp, 304 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.initverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.lastverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 312 # 8-byte Folded Reload + ld.d $a0, $sp, 296 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.m) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 176 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.mp) pcaddu18i $ra, %call36(FreeIntVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.mseq) pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.digf1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.digf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.diaf1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.diaf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapz1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapz2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapf1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + ld.d $a0, $sp, 416 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.ogcp1g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 400 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.ogcp2g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.fgcp1g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.fgcp2g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 264 # 8-byte Folded Reload + ld.d $a0, $sp, 256 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.og_h_dg_n1_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 280 # 8-byte Folded Reload + ld.d $a0, $sp, 272 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.og_h_dg_n2_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 248 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.fg_h_dg_n1_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.fg_h_dg_n2_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.og_t_fg_h_dg_n1_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 216 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.og_t_fg_h_dg_n2_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 232 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.fg_t_og_h_dg_n1_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.fg_t_og_h_dg_n2_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapz_n1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 216 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapz_n2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 344 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.cpmx1) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 320 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.cpmx2) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 296 # 8-byte Folded Reload + ld.d $a0, $sp, 288 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.floatwork) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 288 # 8-byte Folded Reload + ld.d $a0, $sp, 280 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.intwork) pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 ld.w $s6, $s4, %pc_lo12(Q__align.orlgth1) - ld.d $a0, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload ld.w $s0, $a0, %pc_lo12(Q__align.orlgth2) .LBB3_8: - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI3_0) - movgr2fr.w $fa1, $s2 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + movgr2fr.w $fa0, $s2 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, 314572 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 slt $a1, $a0, $s6 masknez $a0, $a0, $a1 maskeqz $a1, $s6, $a1 or $s6, $a1, $a0 addi.w $s1, $s6, 100 - ld.d $a0, $sp, 208 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + ld.d $a0, $sp, 200 # 8-byte Folded Reload + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 slt $a1, $a0, $s0 @@ -803,12 +799,12 @@ Q__align: # @Q__align move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 304 # 8-byte Folded Reload + ld.d $a1, $sp, 192 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.w1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 200 # 8-byte Folded Reload + ld.d $a1, $sp, 144 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.w2) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -818,22 +814,22 @@ Q__align: # @Q__align move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 328 # 8-byte Folded Reload + ld.d $a1, $sp, 304 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.initverticalw) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 128 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.lastverticalw) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 312 # 8-byte Folded Reload + ld.d $a1, $sp, 296 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.m) move $a0, $s4 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 184 # 8-byte Folded Reload + ld.d $a1, $sp, 176 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.mp) pcalau12i $a0, %got_pc_hi20(njob) ld.d $a0, $a0, %got_pc_lo12(njob) @@ -841,129 +837,129 @@ Q__align: # @Q__align add.w $a1, $s2, $s1 pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - ld.d $fp, $sp, 336 # 8-byte Folded Reload + ld.d $fp, $sp, 320 # 8-byte Folded Reload st.d $a0, $fp, %pc_lo12(Q__align.mseq) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 392 # 8-byte Folded Reload + ld.d $a1, $sp, 384 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.digf1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 376 # 8-byte Folded Reload + ld.d $a1, $sp, 368 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.digf2) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 96 # 8-byte Folded Reload + ld.d $a1, $sp, 80 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.diaf1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 88 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.diaf2) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 400 # 8-byte Folded Reload + ld.d $a1, $sp, 392 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.gapz1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 384 # 8-byte Folded Reload + ld.d $a1, $sp, 376 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.gapz2) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.gapf1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.gapf2) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 424 # 8-byte Folded Reload + ld.d $a1, $sp, 416 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.ogcp1g) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 408 # 8-byte Folded Reload + ld.d $a1, $sp, 400 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.ogcp2g) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 416 # 8-byte Folded Reload + ld.d $a1, $sp, 408 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.fgcp1g) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 368 # 8-byte Folded Reload + ld.d $a1, $sp, 360 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.fgcp2g) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 264 # 8-byte Folded Reload + ld.d $a1, $sp, 256 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.og_h_dg_n1_p) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 280 # 8-byte Folded Reload + ld.d $a1, $sp, 272 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.og_h_dg_n2_p) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 256 # 8-byte Folded Reload + ld.d $a1, $sp, 248 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.fg_h_dg_n1_p) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 272 # 8-byte Folded Reload + ld.d $a1, $sp, 264 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.fg_h_dg_n2_p) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 248 # 8-byte Folded Reload + ld.d $a1, $sp, 240 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.og_t_fg_h_dg_n1_p) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 224 # 8-byte Folded Reload + ld.d $a1, $sp, 216 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.og_t_fg_h_dg_n2_p) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 232 # 8-byte Folded Reload + ld.d $a1, $sp, 224 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.fg_t_og_h_dg_n1_p) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 192 # 8-byte Folded Reload + ld.d $a1, $sp, 184 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.fg_t_og_h_dg_n2_p) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 240 # 8-byte Folded Reload + ld.d $a1, $sp, 232 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.gapz_n1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 216 # 8-byte Folded Reload + ld.d $a1, $sp, 208 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.gapz_n2) ori $a0, $zero, 26 move $a1, $s5 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 352 # 8-byte Folded Reload + ld.d $a1, $sp, 344 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.cpmx1) ori $a0, $zero, 26 move $a1, $s4 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 320 # 8-byte Folded Reload + ld.d $a1, $sp, 312 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.cpmx2) slt $a0, $s2, $s1 masknez $a1, $s2, $a0 @@ -974,27 +970,27 @@ Q__align: # @Q__align move $a0, $s3 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 296 # 8-byte Folded Reload + ld.d $a1, $sp, 288 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.floatwork) ori $a1, $zero, 27 move $a0, $s3 pcaddu18i $ra, %call36(AllocateIntMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 288 # 8-byte Folded Reload + ld.d $a1, $sp, 280 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align.intwork) - ld.d $a0, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 16 # 8-byte Folded Reload st.w $s6, $a0, %pc_lo12(Q__align.orlgth1) - ld.d $a0, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload st.w $s0, $a0, %pc_lo12(Q__align.orlgth2) - ld.d $s5, $sp, 168 # 8-byte Folded Reload - ld.d $s3, $sp, 176 # 8-byte Folded Reload - ld.d $s1, $sp, 160 # 8-byte Folded Reload - ld.d $a5, $sp, 360 # 8-byte Folded Reload - ld.d $s2, $sp, 120 # 8-byte Folded Reload + ld.d $s5, $sp, 160 # 8-byte Folded Reload + ld.d $s3, $sp, 168 # 8-byte Folded Reload + ld.d $s1, $sp, 152 # 8-byte Folded Reload + ld.d $a5, $sp, 352 # 8-byte Folded Reload + ld.d $s2, $sp, 112 # 8-byte Folded Reload blez $s3, .LBB3_11 .LBB3_9: # %.lr.ph ld.d $a0, $fp, %pc_lo12(Q__align.mseq) - ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a1, $sp, 136 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align.mseq1) move $a2, $s3 .p2align 4, , 16 @@ -1012,7 +1008,7 @@ Q__align: # @Q__align blez $s7, .LBB3_14 # %bb.12: # %.lr.ph568 ld.d $a1, $fp, %pc_lo12(Q__align.mseq) - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.mseq2) alsl.d $a1, $s3, $a1, 3 move $a2, $s7 @@ -1030,7 +1026,7 @@ Q__align: # @Q__align bnez $a2, .LBB3_13 .LBB3_14: # %._crit_edge ld.d $s4, $sp, 536 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload movgr2fr.w $fs0, $a0 pcalau12i $a0, %got_pc_hi20(commonAlloc1) ld.d $fp, $a0, %got_pc_lo12(commonAlloc1) @@ -1056,10 +1052,10 @@ Q__align: # @Q__align ld.d $a0, $a0, 0 pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 16 # 8-byte Folded Reload ld.w $s6, $a0, %pc_lo12(Q__align.orlgth1) ld.w $a0, $fp, 0 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a1, $sp, 24 # 8-byte Folded Reload ld.w $s0, $a1, %pc_lo12(Q__align.orlgth2) ld.w $a1, $s1, 0 .LBB3_20: @@ -1080,40 +1076,40 @@ Q__align: # @Q__align st.d $a0, $a1, 0 st.w $s2, $fp, 0 st.w $s0, $s1, 0 - ld.d $s2, $sp, 120 # 8-byte Folded Reload + ld.d $s2, $sp, 112 # 8-byte Folded Reload .LBB3_21: - ld.d $a1, $sp, 352 # 8-byte Folded Reload + ld.d $a1, $sp, 344 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align.cpmx1) ffint.s.w $fa0, $fs0 - vst $vr0, $sp, 336 # 16-byte Folded Spill + vst $vr0, $sp, 320 # 16-byte Folded Spill pcalau12i $a2, %pc_hi20(Q__align.ijp) - st.d $a2, $sp, 72 # 8-byte Folded Spill + st.d $a2, $sp, 64 # 8-byte Folded Spill st.d $a0, $a2, %pc_lo12(Q__align.ijp) - ld.d $s0, $sp, 160 # 8-byte Folded Reload + ld.d $s0, $sp, 152 # 8-byte Folded Reload move $a0, $s0 move $a2, $s8 move $a3, $s2 move $a4, $s3 pcaddu18i $ra, %call36(cpmx_calc_new) jirl $ra, $ra, 0 - ld.d $a0, $sp, 320 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(Q__align.cpmx2) move $a0, $s5 - ld.d $fp, $sp, 80 # 8-byte Folded Reload + ld.d $fp, $sp, 32 # 8-byte Folded Reload move $a2, $fp - ld.d $s1, $sp, 360 # 8-byte Folded Reload + ld.d $s1, $sp, 352 # 8-byte Folded Reload move $a3, $s1 move $a4, $s7 pcaddu18i $ra, %call36(cpmx_calc_new) jirl $ra, $ra, 0 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + ld.d $a0, $sp, 416 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.ogcp1g) beqz $s4, .LBB3_23 # %bb.22: ld.d $s5, $sp, 560 ld.d $s3, $sp, 552 ld.d $s6, $sp, 544 - ld.d $a1, $sp, 176 # 8-byte Folded Reload + ld.d $a1, $sp, 168 # 8-byte Folded Reload move $a2, $s0 move $a3, $s8 move $a4, $s2 @@ -1121,19 +1117,19 @@ Q__align: # @Q__align move $a6, $s3 pcaddu18i $ra, %call36(new_OpeningGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 400 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.ogcp2g) move $a1, $s7 - ld.d $a2, $sp, 168 # 8-byte Folded Reload + ld.d $a2, $sp, 160 # 8-byte Folded Reload move $a3, $fp move $a4, $s1 move $a5, $s6 move $a6, $s5 pcaddu18i $ra, %call36(new_OpeningGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.fgcp1g) - ld.d $a1, $sp, 176 # 8-byte Folded Reload + ld.d $a1, $sp, 168 # 8-byte Folded Reload move $a2, $s0 move $a3, $s8 move $a4, $s2 @@ -1141,19 +1137,19 @@ Q__align: # @Q__align move $a6, $s3 pcaddu18i $ra, %call36(new_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.fgcp2g) move $a1, $s7 - ld.d $a2, $sp, 168 # 8-byte Folded Reload + ld.d $a2, $sp, 160 # 8-byte Folded Reload move $a3, $fp move $a4, $s1 move $a5, $s6 move $a6, $s5 pcaddu18i $ra, %call36(new_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.digf1) - ld.d $a1, $sp, 176 # 8-byte Folded Reload + ld.d $a1, $sp, 168 # 8-byte Folded Reload move $a2, $s0 move $a3, $s8 move $a4, $s2 @@ -1161,40 +1157,40 @@ Q__align: # @Q__align move $a6, $s3 pcaddu18i $ra, %call36(getdigapfreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.digf2) move $a1, $s7 - ld.d $a2, $sp, 168 # 8-byte Folded Reload + ld.d $a2, $sp, 160 # 8-byte Folded Reload move $a3, $fp move $a4, $s1 move $a5, $s6 move $a6, $s5 pcaddu18i $ra, %call36(getdigapfreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.diaf1) - ld.d $a1, $sp, 176 # 8-byte Folded Reload + ld.d $a1, $sp, 168 # 8-byte Folded Reload move $a2, $s0 move $a3, $s8 move $a4, $s2 move $a5, $s4 move $a6, $s3 move $s3, $fp - ld.d $fp, $sp, 176 # 8-byte Folded Reload + ld.d $fp, $sp, 168 # 8-byte Folded Reload pcaddu18i $ra, %call36(getdiaminofreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.diaf2) move $a1, $s7 - ld.d $a2, $sp, 168 # 8-byte Folded Reload + ld.d $a2, $sp, 160 # 8-byte Folded Reload move $a3, $s3 move $a4, $s1 move $a5, $s6 move $a6, $s5 - ld.d $s5, $sp, 168 # 8-byte Folded Reload + ld.d $s5, $sp, 160 # 8-byte Folded Reload pcaddu18i $ra, %call36(getdiaminofreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapf1) move $a1, $fp move $a2, $s0 @@ -1202,7 +1198,7 @@ Q__align: # @Q__align move $a4, $s2 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapf2) move $a1, $s7 move $a2, $s5 @@ -1210,7 +1206,7 @@ Q__align: # @Q__align move $a4, $s1 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapz1) move $a1, $fp move $a2, $s0 @@ -1219,7 +1215,7 @@ Q__align: # @Q__align move $a5, $s4 pcaddu18i $ra, %call36(getgapfreq_zure_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapz2) move $a1, $s7 move $a2, $s5 @@ -1236,7 +1232,7 @@ Q__align: # @Q__align move $a4, $s2 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 400 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.ogcp2g) move $a1, $s7 move $a2, $s5 @@ -1244,7 +1240,7 @@ Q__align: # @Q__align move $a4, $s1 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.fgcp1g) move $a1, $s3 move $a2, $s0 @@ -1252,7 +1248,7 @@ Q__align: # @Q__align move $a4, $s2 pcaddu18i $ra, %call36(st_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.fgcp2g) move $a1, $s7 move $a2, $s5 @@ -1260,7 +1256,7 @@ Q__align: # @Q__align move $a4, $s1 pcaddu18i $ra, %call36(st_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.digf1) move $a1, $s3 move $a2, $s0 @@ -1268,7 +1264,7 @@ Q__align: # @Q__align move $a4, $s2 pcaddu18i $ra, %call36(getdigapfreq_st) jirl $ra, $ra, 0 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.digf2) move $a1, $s7 move $a2, $s5 @@ -1276,7 +1272,7 @@ Q__align: # @Q__align move $a4, $s1 pcaddu18i $ra, %call36(getdigapfreq_st) jirl $ra, $ra, 0 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.diaf1) move $a1, $s3 move $a2, $s0 @@ -1284,7 +1280,7 @@ Q__align: # @Q__align move $a4, $s2 pcaddu18i $ra, %call36(getdiaminofreq_x) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.diaf2) move $a1, $s7 move $a2, $s5 @@ -1292,7 +1288,7 @@ Q__align: # @Q__align move $a4, $s1 pcaddu18i $ra, %call36(getdiaminofreq_x) jirl $ra, $ra, 0 - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapf1) move $a1, $s3 move $a2, $s0 @@ -1300,7 +1296,7 @@ Q__align: # @Q__align move $a4, $s2 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapf2) move $a1, $s7 move $a2, $s5 @@ -1308,7 +1304,7 @@ Q__align: # @Q__align move $a4, $s1 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapz1) move $a1, $s3 move $a2, $s0 @@ -1316,7 +1312,7 @@ Q__align: # @Q__align move $a4, $s2 pcaddu18i $ra, %call36(getgapfreq_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapz2) move $a1, $s7 move $a2, $s5 @@ -1325,35 +1321,33 @@ Q__align: # @Q__align pcaddu18i $ra, %call36(getgapfreq_zure) jirl $ra, $ra, 0 .LBB3_24: - ld.d $s4, $sp, 88 # 8-byte Folded Reload - ld.d $s6, $sp, 328 # 8-byte Folded Reload - addi.w $a0, $zero, -1 - vld $vr18, $sp, 336 # 16-byte Folded Reload + ld.d $s4, $sp, 72 # 8-byte Folded Reload + ld.d $s6, $sp, 200 # 8-byte Folded Reload + addi.w $s8, $zero, -1 + vld $vr18, $sp, 320 # 16-byte Folded Reload fcvt.d.s $fa0, $ft10 - ld.d $s8, $sp, 208 # 8-byte Folded Reload move $ra, $s1 - st.d $a0, $sp, 80 # 8-byte Folded Spill - blt $s1, $a0, .LBB3_29 + blt $s1, $s8, .LBB3_29 # %bb.25: # %.lr.ph571 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 400 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.ogcp2g) - ld.d $a1, $sp, 376 # 8-byte Folded Reload + ld.d $a1, $sp, 368 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align.digf2) - ld.d $a2, $sp, 280 # 8-byte Folded Reload + ld.d $a2, $sp, 272 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(Q__align.og_h_dg_n2_p) - ld.d $a3, $sp, 368 # 8-byte Folded Reload + ld.d $a3, $sp, 360 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(Q__align.fgcp2g) - ld.d $a4, $sp, 272 # 8-byte Folded Reload + ld.d $a4, $sp, 264 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(Q__align.fg_h_dg_n2_p) - ld.d $a5, $sp, 224 # 8-byte Folded Reload + ld.d $a5, $sp, 216 # 8-byte Folded Reload ld.d $a5, $a5, %pc_lo12(Q__align.og_t_fg_h_dg_n2_p) - ld.d $a6, $sp, 192 # 8-byte Folded Reload + ld.d $a6, $sp, 184 # 8-byte Folded Reload ld.d $a6, $a6, %pc_lo12(Q__align.fg_t_og_h_dg_n2_p) - ld.d $a7, $sp, 384 # 8-byte Folded Reload + ld.d $a7, $sp, 376 # 8-byte Folded Reload ld.d $a7, $a7, %pc_lo12(Q__align.gapz2) - ld.d $t0, $sp, 216 # 8-byte Folded Reload + ld.d $t0, $sp, 208 # 8-byte Folded Reload ld.d $t0, $t0, %pc_lo12(Q__align.gapz_n2) - addi.d $t3, $s8, 2 + addi.d $t3, $s6, 2 bstrpick.d $t1, $t3, 31, 0 ori $t2, $zero, 40 bgeu $t1, $t2, .LBB3_44 @@ -1429,26 +1423,25 @@ Q__align: # @Q__align addi.d $a0, $a0, 4 bnez $t1, .LBB3_28 .LBB3_29: # %._crit_edge572 - ld.d $a0, $sp, 80 # 8-byte Folded Reload - blt $s2, $a0, .LBB3_34 + blt $s2, $s8, .LBB3_34 # %bb.30: # %.lr.ph575 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + ld.d $a0, $sp, 416 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.ogcp1g) - ld.d $a1, $sp, 392 # 8-byte Folded Reload + ld.d $a1, $sp, 384 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align.digf1) - ld.d $a2, $sp, 264 # 8-byte Folded Reload + ld.d $a2, $sp, 256 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(Q__align.og_h_dg_n1_p) - ld.d $a3, $sp, 416 # 8-byte Folded Reload + ld.d $a3, $sp, 408 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(Q__align.fgcp1g) - ld.d $a4, $sp, 256 # 8-byte Folded Reload + ld.d $a4, $sp, 248 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(Q__align.fg_h_dg_n1_p) - ld.d $a5, $sp, 248 # 8-byte Folded Reload + ld.d $a5, $sp, 240 # 8-byte Folded Reload ld.d $a5, $a5, %pc_lo12(Q__align.og_t_fg_h_dg_n1_p) - ld.d $a6, $sp, 232 # 8-byte Folded Reload + ld.d $a6, $sp, 224 # 8-byte Folded Reload ld.d $a6, $a6, %pc_lo12(Q__align.fg_t_og_h_dg_n1_p) - ld.d $a7, $sp, 400 # 8-byte Folded Reload + ld.d $a7, $sp, 392 # 8-byte Folded Reload ld.d $a7, $a7, %pc_lo12(Q__align.gapz1) - ld.d $t0, $sp, 240 # 8-byte Folded Reload + ld.d $t0, $sp, 232 # 8-byte Folded Reload ld.d $t0, $t0, %pc_lo12(Q__align.gapz_n1) addi.d $t3, $s4, 2 bstrpick.d $t1, $t3, 31, 0 @@ -1526,24 +1519,26 @@ Q__align: # @Q__align addi.d $a0, $a0, 4 bnez $t1, .LBB3_33 .LBB3_34: # %._crit_edge576 - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload ld.d $t3, $a0, %pc_lo12(Q__align.w1) - ld.d $a0, $sp, 200 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $t4, $a0, %pc_lo12(Q__align.w2) pcalau12i $a0, %got_pc_hi20(RNAscoremtx) ld.d $a0, $a0, %got_pc_lo12(RNAscoremtx) - st.d $a0, $sp, 400 # 8-byte Folded Spill + st.d $a0, $sp, 392 # 8-byte Folded Spill ld.bu $a0, $a0, 0 ori $a1, $zero, 114 pcalau12i $a2, %pc_hi20(impmtx) - st.d $a2, $sp, 152 # 8-byte Folded Spill + st.d $a2, $sp, 144 # 8-byte Folded Spill move $s0, $t3 move $s1, $t4 + st.d $s8, $sp, 24 # 8-byte Folded Spill bne $a0, $a1, .LBB3_37 # %bb.35: beqz $s2, .LBB3_110 # %bb.36: # %.lr.ph.preheader.i - ld.d $s3, $s6, %pc_lo12(Q__align.initverticalw) + ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $s3, $a0, %pc_lo12(Q__align.initverticalw) slli.d $a0, $s4, 2 bstrpick.d $a0, $a0, 33, 2 slli.d $a2, $a0, 2 @@ -1553,14 +1548,15 @@ Q__align: # @Q__align jirl $ra, $ra, 0 b .LBB3_38 .LBB3_37: - ld.d $s3, $s6, %pc_lo12(Q__align.initverticalw) - ld.d $a0, $sp, 320 # 8-byte Folded Reload + ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $s3, $a0, %pc_lo12(Q__align.initverticalw) + ld.d $a0, $sp, 312 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(Q__align.cpmx2) - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 344 # 8-byte Folded Reload ld.d $a2, $a0, %pc_lo12(Q__align.cpmx1) - ld.d $a0, $sp, 296 # 8-byte Folded Reload - ld.d $a5, $a0, %pc_lo12(Q__align.floatwork) ld.d $a0, $sp, 288 # 8-byte Folded Reload + ld.d $a5, $a0, %pc_lo12(Q__align.floatwork) + ld.d $a0, $sp, 280 # 8-byte Folded Reload ld.d $a6, $a0, %pc_lo12(Q__align.intwork) ori $a7, $zero, 1 move $a0, $s3 @@ -1569,16 +1565,16 @@ Q__align: # @Q__align pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 .LBB3_38: # %clearvec.exit - ld.d $ra, $sp, 360 # 8-byte Folded Reload - vld $vr18, $sp, 336 # 16-byte Folded Reload - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $ra, $sp, 352 # 8-byte Folded Reload + vld $vr18, $sp, 320 # 16-byte Folded Reload + ld.d $a0, $sp, 40 # 8-byte Folded Reload move $t3, $s0 move $t4, $s1 beqz $a0, .LBB3_43 # %bb.39: blez $s2, .LBB3_42 # %bb.40: # %.lr.ph.i - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(impmtx) bstrpick.d $a1, $s4, 30, 0 .p2align 4, , 16 @@ -1593,7 +1589,7 @@ Q__align: # @Q__align addi.d $a0, $a0, 8 bnez $a1, .LBB3_41 .LBB3_42: - st.d $zero, $sp, 200 # 8-byte Folded Spill + st.d $zero, $sp, 192 # 8-byte Folded Spill b .LBB3_112 .LBB3_43: ori $a0, $zero, 1 @@ -1712,7 +1708,7 @@ Q__align: # @Q__align sltu $s1, $a5, $fp sltu $s0, $a7, $s0 and $s0, $s1, $s0 - ld.d $s2, $sp, 120 # 8-byte Folded Reload + ld.d $s2, $sp, 112 # 8-byte Folded Reload bnez $s0, .LBB3_27 # %bb.65: # %vector.memcheck sltu $s0, $a6, $t4 @@ -1910,7 +1906,7 @@ Q__align: # @Q__align addi.d $t3, $t3, 16 bnez $s2, .LBB3_75 # %bb.76: # %middle.block - ld.d $s2, $sp, 120 # 8-byte Folded Reload + ld.d $s2, $sp, 112 # 8-byte Folded Reload bne $t1, $t2, .LBB3_27 b .LBB3_29 .LBB3_77: # %vector.memcheck900 @@ -2027,7 +2023,7 @@ Q__align: # @Q__align sltu $s1, $a5, $fp sltu $s0, $a7, $s0 and $s0, $s1, $s0 - ld.d $s2, $sp, 120 # 8-byte Folded Reload + ld.d $s2, $sp, 112 # 8-byte Folded Reload bnez $s0, .LBB3_32 # %bb.98: # %vector.memcheck900 sltu $s0, $a6, $t4 @@ -2225,26 +2221,25 @@ Q__align: # @Q__align addi.d $t3, $t3, 16 bnez $s2, .LBB3_108 # %bb.109: # %middle.block1050 - ld.d $s2, $sp, 120 # 8-byte Folded Reload + ld.d $s2, $sp, 112 # 8-byte Folded Reload bne $t1, $t2, .LBB3_32 b .LBB3_34 .LBB3_110: # %clearvec.exit.thread - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 40 # 8-byte Folded Reload sltui $a0, $a0, 1 .LBB3_111: # %imp_match_out_vead_tateQ.exit - st.d $a0, $sp, 200 # 8-byte Folded Spill + st.d $a0, $sp, 192 # 8-byte Folded Spill .LBB3_112: # %imp_match_out_vead_tateQ.exit - ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.bu $a0, $a0, 0 ori $a1, $zero, 114 - slli.d $s3, $s8, 32 - pcalau12i $a2, %pc_hi20(.LCPI3_2) - st.d $a2, $sp, 328 # 8-byte Folded Spill + slli.d $s8, $s6, 32 + lu12i.w $s3, 287172 bne $a0, $a1, .LBB3_115 # %bb.113: beqz $ra, .LBB3_139 # %bb.114: # %.lr.ph.preheader.i523 - slli.d $a0, $s8, 2 + slli.d $a0, $s6, 2 bstrpick.d $a0, $a0, 33, 2 slli.d $a2, $a0, 2 move $a0, $t3 @@ -2254,13 +2249,13 @@ Q__align: # @Q__align jirl $ra, $ra, 0 b .LBB3_116 .LBB3_115: - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 344 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(Q__align.cpmx1) - ld.d $a0, $sp, 320 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload ld.d $a2, $a0, %pc_lo12(Q__align.cpmx2) - ld.d $a0, $sp, 296 # 8-byte Folded Reload - ld.d $a5, $a0, %pc_lo12(Q__align.floatwork) ld.d $a0, $sp, 288 # 8-byte Folded Reload + ld.d $a5, $a0, %pc_lo12(Q__align.floatwork) + ld.d $a0, $sp, 280 # 8-byte Folded Reload ld.d $a6, $a0, %pc_lo12(Q__align.intwork) ori $a7, $zero, 1 move $a0, $t3 @@ -2271,24 +2266,24 @@ Q__align: # @Q__align jirl $ra, $ra, 0 .LBB3_116: # %clearvec.exit524 sltui $a0, $fp, 1 - ld.d $a1, $sp, 200 # 8-byte Folded Reload + ld.d $a1, $sp, 192 # 8-byte Folded Reload or $a0, $a1, $a0 move $ra, $fp - vld $vr18, $sp, 336 # 16-byte Folded Reload + vld $vr18, $sp, 320 # 16-byte Folded Reload move $t3, $s0 move $t4, $s1 bnez $a0, .LBB3_122 # %bb.117: # %.lr.ph.preheader.i525 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(impmtx) ld.d $a5, $a0, 0 - bstrpick.d $a3, $s8, 31, 0 + bstrpick.d $a3, $s6, 31, 0 ori $a0, $zero, 8 bltu $a3, $a0, .LBB3_120 # %bb.118: # %vector.memcheck1053 addi.w $a0, $zero, -4 lu32i.d $a0, 3 - alsl.d $a0, $s8, $a0, 2 + alsl.d $a0, $s6, $a0, 2 bstrpick.d $a0, $a0, 33, 2 slli.d $a0, $a0, 2 addi.d $a0, $a0, 4 @@ -2299,7 +2294,7 @@ Q__align: # @Q__align bgeu $a5, $a0, .LBB3_290 .LBB3_120: move $a1, $a5 - move $a0, $s8 + move $a0, $s6 move $a2, $t3 .p2align 4, , 16 .LBB3_121: # %.lr.ph.i526 @@ -2320,30 +2315,31 @@ Q__align: # @Q__align ori $a1, $zero, 1 bne $a0, $a1, .LBB3_132 .LBB3_123: - ld.d $a0, $sp, 424 # 8-byte Folded Reload + ld.d $a0, $sp, 416 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.ogcp1g) - ld.d $a1, $sp, 280 # 8-byte Folded Reload + ld.d $a1, $sp, 272 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align.og_h_dg_n2_p) - ld.d $a2, $sp, 408 # 8-byte Folded Reload + ld.d $a2, $sp, 400 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(Q__align.ogcp2g) fld.s $fa0, $a0, 0 fld.s $fa1, $a1, 0 fld.s $fa2, $a2, 0 - ld.d $a0, $sp, 264 # 8-byte Folded Reload + ld.d $a0, $sp, 256 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.og_h_dg_n1_p) - ld.d $a1, $sp, 416 # 8-byte Folded Reload + ld.d $a1, $sp, 408 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align.fgcp1g) - ld.d $a2, $sp, 272 # 8-byte Folded Reload + ld.d $a2, $sp, 264 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(Q__align.fg_h_dg_n2_p) - ld.d $a3, $sp, 368 # 8-byte Folded Reload + ld.d $a3, $sp, 360 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(Q__align.fgcp2g) fld.s $fa3, $a0, 0 fld.s $fa4, $a1, 0 fld.s $fa5, $a2, 0 fld.s $fa6, $a3, 0 - ld.d $a0, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 248 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.fg_h_dg_n1_p) - ld.d $a2, $s6, %pc_lo12(Q__align.initverticalw) + ld.d $a1, $sp, 304 # 8-byte Folded Reload + ld.d $a2, $a1, %pc_lo12(Q__align.initverticalw) movgr2fr.w $fa7, $zero fmadd.s $fa0, $fa0, $fa1, $fa7 fld.s $fa1, $a0, 0 @@ -2358,11 +2354,11 @@ Q__align: # @Q__align fst.s $fa0, $t3, 0 blez $s2, .LBB3_127 # %bb.124: # %.lr.ph586 - ld.d $a0, $sp, 216 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapz_n2) - ld.d $a1, $sp, 248 # 8-byte Folded Reload + ld.d $a1, $sp, 240 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align.og_t_fg_h_dg_n1_p) - ld.d $a3, $sp, 232 # 8-byte Folded Reload + ld.d $a3, $sp, 224 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(Q__align.fg_t_og_h_dg_n1_p) addi.d $a4, $s4, 1 bstrpick.d $a4, $a4, 31, 0 @@ -2395,13 +2391,13 @@ Q__align: # @Q__align .LBB3_127: # %.preheader556 blez $ra, .LBB3_156 # %bb.128: # %.lr.ph589 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.gapz_n1) - ld.d $a1, $sp, 224 # 8-byte Folded Reload + ld.d $a1, $sp, 216 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align.og_t_fg_h_dg_n2_p) - ld.d $a2, $sp, 192 # 8-byte Folded Reload + ld.d $a2, $sp, 184 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(Q__align.fg_t_og_h_dg_n2_p) - addi.d $a3, $s8, 1 + addi.d $a3, $s6, 1 bstrpick.d $a3, $a3, 31, 0 addi.d $a5, $a3, -1 ori $a6, $zero, 8 @@ -2430,7 +2426,7 @@ Q__align: # @Q__align addi.d $a5, $a5, 4 bnez $a3, .LBB3_130 .LBB3_131: # %.loopexit557.thread751 - ld.d $a0, $sp, 312 # 8-byte Folded Reload + ld.d $a0, $sp, 296 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.m) st.w $zero, $a0, 0 b .LBB3_148 @@ -2440,7 +2436,7 @@ Q__align: # @Q__align pcalau12i $a0, %got_pc_hi20(offset) ld.d $a0, $a0, %got_pc_lo12(offset) ld.w $a0, $a0, 0 - addi.d $a1, $s8, 1 + addi.d $a1, $s6, 1 bstrpick.d $a1, $a1, 31, 0 addi.d $a2, $a1, -1 ori $a4, $zero, 4 @@ -2453,8 +2449,8 @@ Q__align: # @Q__align move $a3, $a2 bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI3_1) - vld $vr1, $a5, %pc_lo12(.LCPI3_1) + pcalau12i $a5, %pc_hi20(.LCPI3_0) + vld $vr1, $a5, %pc_lo12(.LCPI3_0) addi.d $a5, $t3, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr2, $a6 @@ -2543,7 +2539,8 @@ Q__align: # @Q__align pcalau12i $a0, %got_pc_hi20(offset) ld.d $a0, $a0, %got_pc_lo12(offset) ld.w $a0, $a0, 0 - ld.d $a1, $s6, %pc_lo12(Q__align.initverticalw) + ld.d $a1, $sp, 304 # 8-byte Folded Reload + ld.d $a1, $a1, %pc_lo12(Q__align.initverticalw) addi.d $a2, $s4, 1 bstrpick.d $a2, $a2, 31, 0 addi.d $a3, $a2, -1 @@ -2557,8 +2554,8 @@ Q__align: # @Q__align move $a4, $a3 bstrins.d $a4, $a6, 1, 0 vreplgr2vr.w $vr0, $a0 - pcalau12i $a6, %pc_hi20(.LCPI3_1) - vld $vr1, $a6, %pc_lo12(.LCPI3_1) + pcalau12i $a6, %pc_hi20(.LCPI3_0) + vld $vr1, $a6, %pc_lo12(.LCPI3_0) addi.d $a6, $a1, 4 lu52i.d $a7, $zero, -1026 vreplgr2vr.d $vr2, $a7 @@ -2635,14 +2632,14 @@ Q__align: # @Q__align addi.d $a1, $a1, 4 bnez $a2, .LBB3_146 .LBB3_147: # %.loopexit557 - ld.d $a0, $sp, 312 # 8-byte Folded Reload + ld.d $a0, $sp, 296 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.m) st.w $zero, $a0, 0 blez $ra, .LBB3_157 .LBB3_148: # %.lr.ph593 - ld.d $a1, $sp, 184 # 8-byte Folded Reload + ld.d $a1, $sp, 176 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align.mp) - addi.d $a2, $s8, 1 + addi.d $a2, $s6, 1 bstrpick.d $a2, $a2, 31, 0 addi.d $a3, $a2, -1 ori $a5, $zero, 8 @@ -2663,8 +2660,7 @@ Q__align: # @Q__align vreplvei.w $vr0, $vr18, 0 addi.d $a7, $a1, 20 vrepli.b $vr1, 0 - lu12i.w $t0, 287172 - vreplgr2vr.w $vr2, $t0 + vreplgr2vr.w $vr2, $s3 move $t0, $a5 .p2align 4, , 16 .LBB3_151: # %vector.body1194 @@ -2686,11 +2682,10 @@ Q__align: # @Q__align # %bb.152: # %middle.block1200 beq $a3, $a5, .LBB3_155 .LBB3_153: # %scalar.ph1187.preheader - ld.d $a3, $sp, 328 # 8-byte Folded Reload - fld.s $fa0, $a3, %pc_lo12(.LCPI3_2) slli.d $a3, $a4, 2 addi.d $a5, $t3, -4 sub.d $a2, $a2, $a4 + movgr2fr.w $fa0, $s3 .p2align 4, , 16 .LBB3_154: # %scalar.ph1187 # =>This Inner Loop Header: Depth=1 @@ -2705,7 +2700,7 @@ Q__align: # @Q__align move $t2, $zero b .LBB3_158 .LBB3_156: # %.loopexit557.thread - ld.d $a0, $sp, 312 # 8-byte Folded Reload + ld.d $a0, $sp, 296 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.m) st.w $zero, $a0, 0 .LBB3_157: # %._crit_edge594 @@ -2714,15 +2709,15 @@ Q__align: # @Q__align .LBB3_158: # %._crit_edge594.thread ori $a1, $zero, 0 lu32i.d $a1, -1 - add.d $a1, $s3, $a1 + add.d $a1, $s8, $a1 srai.d $a1, $a1, 30 fldx.s $fa0, $t3, $a1 - st.d $t2, $sp, 392 # 8-byte Folded Spill + st.d $t2, $sp, 384 # 8-byte Folded Spill move $t2, $zero .LBB3_159: - ld.d $a1, $sp, 128 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align.lastverticalw) - st.d $a1, $sp, 384 # 8-byte Folded Spill + st.d $a1, $sp, 376 # 8-byte Folded Spill fst.s $fa0, $a1, 0 pcalau12i $a1, %got_pc_hi20(outgap) ld.d $s0, $a1, %got_pc_lo12(outgap) @@ -2730,83 +2725,86 @@ Q__align: # @Q__align sltu $a2, $zero, $a1 add.w $a3, $a2, $s4 ori $a2, $zero, 2 - st.d $a3, $sp, 376 # 8-byte Folded Spill - st.d $s7, $sp, 64 # 8-byte Folded Spill + st.d $a3, $sp, 368 # 8-byte Folded Spill + st.d $s7, $sp, 56 # 8-byte Folded Spill blt $a3, $a2, .LBB3_183 # %bb.160: # %.lr.ph630 - st.d $s0, $sp, 24 # 8-byte Folded Spill - ld.d $t5, $s6, %pc_lo12(Q__align.initverticalw) - ld.d $a1, $sp, 352 # 8-byte Folded Reload + st.d $s0, $sp, 16 # 8-byte Folded Spill + ld.d $a1, $sp, 304 # 8-byte Folded Reload + ld.d $t5, $a1, %pc_lo12(Q__align.initverticalw) + ld.d $a1, $sp, 344 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align.cpmx1) - st.d $a1, $sp, 312 # 8-byte Folded Spill - ld.d $a1, $sp, 320 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(Q__align.cpmx2) st.d $a1, $sp, 304 # 8-byte Folded Spill - ld.d $a1, $sp, 296 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(Q__align.floatwork) + ld.d $a1, $sp, 312 # 8-byte Folded Reload + ld.d $a1, $a1, %pc_lo12(Q__align.cpmx2) st.d $a1, $sp, 296 # 8-byte Folded Spill ld.d $a1, $sp, 288 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(Q__align.intwork) + ld.d $a1, $a1, %pc_lo12(Q__align.floatwork) st.d $a1, $sp, 288 # 8-byte Folded Spill - slli.d $a1, $s8, 2 + ld.d $a1, $sp, 280 # 8-byte Folded Reload + ld.d $a1, $a1, %pc_lo12(Q__align.intwork) + st.d $a1, $sp, 280 # 8-byte Folded Spill + slli.d $a1, $s6, 2 bstrpick.d $a1, $a1, 33, 2 slli.d $a1, $a1, 2 - st.d $a1, $sp, 136 # 8-byte Folded Spill - ld.d $a1, $sp, 184 # 8-byte Folded Reload + st.d $a1, $sp, 128 # 8-byte Folded Spill + ld.d $a1, $sp, 176 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align.mp) - ld.d $a2, $sp, 192 # 8-byte Folded Reload + ld.d $a2, $sp, 184 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(Q__align.fg_t_og_h_dg_n2_p) - ld.d $a3, $sp, 216 # 8-byte Folded Reload + ld.d $a3, $sp, 208 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(Q__align.gapz_n2) - ld.d $a4, $sp, 224 # 8-byte Folded Reload + ld.d $a4, $sp, 216 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(Q__align.og_t_fg_h_dg_n2_p) - ld.d $a5, $sp, 280 # 8-byte Folded Reload + ld.d $a5, $sp, 272 # 8-byte Folded Reload ld.d $a5, $a5, %pc_lo12(Q__align.og_h_dg_n2_p) - ld.d $a6, $sp, 272 # 8-byte Folded Reload + ld.d $a6, $sp, 264 # 8-byte Folded Reload ld.d $a6, $a6, %pc_lo12(Q__align.fg_h_dg_n2_p) addi.d $s1, $a3, 8 - ld.d $a3, $sp, 368 # 8-byte Folded Reload + ld.d $a3, $sp, 360 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(Q__align.fgcp2g) - ld.d $a7, $sp, 408 # 8-byte Folded Reload + ld.d $a7, $sp, 400 # 8-byte Folded Reload ld.d $a7, $a7, %pc_lo12(Q__align.ogcp2g) - ld.d $t0, $sp, 232 # 8-byte Folded Reload + ld.d $t0, $sp, 224 # 8-byte Folded Reload ld.d $t0, $t0, %pc_lo12(Q__align.fg_t_og_h_dg_n1_p) - st.d $t0, $sp, 280 # 8-byte Folded Spill - ld.d $t0, $sp, 248 # 8-byte Folded Reload - ld.d $t0, $t0, %pc_lo12(Q__align.og_t_fg_h_dg_n1_p) st.d $t0, $sp, 272 # 8-byte Folded Spill - ld.d $t0, $sp, 264 # 8-byte Folded Reload - ld.d $t0, $t0, %pc_lo12(Q__align.og_h_dg_n1_p) + ld.d $t0, $sp, 240 # 8-byte Folded Reload + ld.d $t0, $t0, %pc_lo12(Q__align.og_t_fg_h_dg_n1_p) st.d $t0, $sp, 264 # 8-byte Folded Spill ld.d $t0, $sp, 256 # 8-byte Folded Reload - ld.d $t0, $t0, %pc_lo12(Q__align.fg_h_dg_n1_p) + ld.d $t0, $t0, %pc_lo12(Q__align.og_h_dg_n1_p) st.d $t0, $sp, 256 # 8-byte Folded Spill - ld.d $t0, $sp, 240 # 8-byte Folded Reload - ld.d $t0, $t0, %pc_lo12(Q__align.gapz_n1) + ld.d $t0, $sp, 248 # 8-byte Folded Reload + ld.d $t0, $t0, %pc_lo12(Q__align.fg_h_dg_n1_p) st.d $t0, $sp, 248 # 8-byte Folded Spill - ld.d $t0, $sp, 416 # 8-byte Folded Reload - ld.d $t0, $t0, %pc_lo12(Q__align.fgcp1g) + ld.d $t0, $sp, 232 # 8-byte Folded Reload + ld.d $t0, $t0, %pc_lo12(Q__align.gapz_n1) st.d $t0, $sp, 240 # 8-byte Folded Spill - ld.d $t0, $sp, 424 # 8-byte Folded Reload - ld.d $t0, $t0, %pc_lo12(Q__align.ogcp1g) + ld.d $t0, $sp, 408 # 8-byte Folded Reload + ld.d $t0, $t0, %pc_lo12(Q__align.fgcp1g) st.d $t0, $sp, 232 # 8-byte Folded Spill + ld.d $t0, $sp, 416 # 8-byte Folded Reload + ld.d $t0, $t0, %pc_lo12(Q__align.ogcp1g) + st.d $t0, $sp, 224 # 8-byte Folded Spill ori $t0, $zero, 0 lu32i.d $t0, -1 - add.d $t0, $s3, $t0 + add.d $t0, $s8, $t0 srai.d $t0, $t0, 30 - st.d $t0, $sp, 368 # 8-byte Folded Spill - addi.w $t0, $s8, -1 - st.d $t0, $sp, 192 # 8-byte Folded Spill + st.d $t0, $sp, 360 # 8-byte Folded Spill + addi.w $t0, $s6, -1 + st.d $t0, $sp, 184 # 8-byte Folded Spill bstrpick.d $t0, $t0, 31, 0 slli.d $t1, $t0, 2 addi.d $t1, $t1, 4 - st.d $t1, $sp, 104 # 8-byte Folded Spill + st.d $t1, $sp, 96 # 8-byte Folded Spill addi.d $t0, $t0, 1 - st.d $t0, $sp, 48 # 8-byte Folded Spill + st.d $t0, $sp, 80 # 8-byte Folded Spill bstrpick.d $t0, $t0, 32, 3 + movgr2fr.w $fs1, $s3 slli.d $t1, $t0, 3 slli.d $t0, $t0, 5 st.d $t0, $sp, 40 # 8-byte Folded Spill + move $t0, $s6 addi.d $s6, $a0, 4 addi.d $fp, $a1, 4 addi.d $s4, $a2, 4 @@ -2816,32 +2814,32 @@ Q__align: # @Q__align addi.d $s5, $a3, 4 addi.d $s3, $a7, 4 movgr2fr.w $fs0, $zero - ld.d $a0, $sp, 200 # 8-byte Folded Reload - st.d $t2, $sp, 320 # 8-byte Folded Spill + ld.d $a0, $sp, 192 # 8-byte Folded Reload + st.d $t2, $sp, 312 # 8-byte Folded Spill or $a0, $t2, $a0 - st.d $a0, $sp, 224 # 8-byte Folded Spill - ld.d $a0, $sp, 152 # 8-byte Folded Reload + st.d $a0, $sp, 216 # 8-byte Folded Spill + ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(impmtx) - st.d $a0, $sp, 184 # 8-byte Folded Spill - ld.d $a0, $sp, 72 # 8-byte Folded Reload + st.d $a0, $sp, 176 # 8-byte Folded Spill + ld.d $a0, $sp, 64 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.ijp) - st.d $a0, $sp, 216 # 8-byte Folded Spill - st.d $t1, $sp, 96 # 8-byte Folded Spill - sub.d $a0, $s8, $t1 + st.d $a0, $sp, 208 # 8-byte Folded Spill + st.d $t1, $sp, 88 # 8-byte Folded Spill + sub.d $a0, $t0, $t1 st.d $a0, $sp, 32 # 8-byte Folded Spill ori $s8, $zero, 1 - st.d $t5, $sp, 352 # 8-byte Folded Spill + st.d $t5, $sp, 344 # 8-byte Folded Spill b .LBB3_162 .p2align 4, , 16 .LBB3_161: # %._crit_edge622 # in Loop: Header=BB3_162 Depth=1 - ld.d $a2, $sp, 368 # 8-byte Folded Reload + ld.d $a2, $sp, 360 # 8-byte Folded Reload fldx.s $fa0, $t2, $a2 - ld.d $a2, $sp, 384 # 8-byte Folded Reload + ld.d $a2, $sp, 376 # 8-byte Folded Reload fstx.s $fa0, $a2, $a1 move $s8, $a0 move $t3, $t2 - ld.d $a1, $sp, 376 # 8-byte Folded Reload + ld.d $a1, $sp, 368 # 8-byte Folded Reload beq $a0, $a1, .LBB3_184 .LBB3_162: # =>This Loop Header: Depth=1 # Child Loop BB3_181 Depth 2 @@ -2853,65 +2851,65 @@ Q__align: # @Q__align move $t2, $t4 move $t4, $t3 fst.s $fa0, $t3, 0 - ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.bu $a0, $a0, 0 ori $a1, $zero, 114 bne $a0, $a1, .LBB3_165 # %bb.163: # in Loop: Header=BB3_162 Depth=1 - ld.d $a0, $sp, 320 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload bnez $a0, .LBB3_171 # %bb.164: # %clearvec.exit529.thread # in Loop: Header=BB3_162 Depth=1 move $a0, $t2 move $a1, $zero - ld.d $a2, $sp, 136 # 8-byte Folded Reload - st.d $t4, $sp, 424 # 8-byte Folded Spill - st.d $t2, $sp, 416 # 8-byte Folded Spill - st.d $t6, $sp, 408 # 8-byte Folded Spill + ld.d $a2, $sp, 128 # 8-byte Folded Reload + st.d $t4, $sp, 416 # 8-byte Folded Spill + st.d $t2, $sp, 408 # 8-byte Folded Spill + st.d $t6, $sp, 400 # 8-byte Folded Spill pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.d $t6, $sp, 408 # 8-byte Folded Reload - ld.d $t2, $sp, 416 # 8-byte Folded Reload - ld.d $t5, $sp, 352 # 8-byte Folded Reload - ld.d $t4, $sp, 424 # 8-byte Folded Reload - ld.d $ra, $sp, 360 # 8-byte Folded Reload - ld.d $a0, $sp, 200 # 8-byte Folded Reload + ld.d $t6, $sp, 400 # 8-byte Folded Reload + ld.d $t2, $sp, 408 # 8-byte Folded Reload + ld.d $t5, $sp, 344 # 8-byte Folded Reload + ld.d $t4, $sp, 416 # 8-byte Folded Reload + ld.d $ra, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload beqz $a0, .LBB3_166 b .LBB3_171 .p2align 4, , 16 .LBB3_165: # %clearvec.exit529 # in Loop: Header=BB3_162 Depth=1 move $a0, $t2 - ld.d $a1, $sp, 312 # 8-byte Folded Reload - ld.d $a2, $sp, 304 # 8-byte Folded Reload + ld.d $a1, $sp, 304 # 8-byte Folded Reload + ld.d $a2, $sp, 296 # 8-byte Folded Reload move $a3, $s8 move $a4, $ra - ld.d $a5, $sp, 296 # 8-byte Folded Reload - ld.d $a6, $sp, 288 # 8-byte Folded Reload + ld.d $a5, $sp, 288 # 8-byte Folded Reload + ld.d $a6, $sp, 280 # 8-byte Folded Reload move $a7, $zero - st.d $t4, $sp, 424 # 8-byte Folded Spill - st.d $t2, $sp, 416 # 8-byte Folded Spill - st.d $t6, $sp, 408 # 8-byte Folded Spill + st.d $t4, $sp, 416 # 8-byte Folded Spill + st.d $t2, $sp, 408 # 8-byte Folded Spill + st.d $t6, $sp, 400 # 8-byte Folded Spill pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 - ld.d $t6, $sp, 408 # 8-byte Folded Reload - ld.d $t2, $sp, 416 # 8-byte Folded Reload - ld.d $t5, $sp, 352 # 8-byte Folded Reload - ld.d $t4, $sp, 424 # 8-byte Folded Reload - ld.d $ra, $sp, 360 # 8-byte Folded Reload - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $t6, $sp, 400 # 8-byte Folded Reload + ld.d $t2, $sp, 408 # 8-byte Folded Reload + ld.d $t5, $sp, 344 # 8-byte Folded Reload + ld.d $t4, $sp, 416 # 8-byte Folded Reload + ld.d $ra, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 216 # 8-byte Folded Reload bnez $a0, .LBB3_171 .LBB3_166: # %.lr.ph.preheader.i531 # in Loop: Header=BB3_162 Depth=1 slli.d $a0, $s8, 3 - ld.d $a1, $sp, 184 # 8-byte Folded Reload + ld.d $a1, $sp, 176 # 8-byte Folded Reload ldx.d $a3, $a1, $a0 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload ori $a1, $zero, 7 bltu $a0, $a1, .LBB3_169 # %bb.167: # %vector.memcheck1203 # in Loop: Header=BB3_162 Depth=1 - ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload add.d $a0, $a3, $a1 bgeu $t2, $a0, .LBB3_180 # %bb.168: # %vector.memcheck1203 @@ -2920,7 +2918,7 @@ Q__align: # @Q__align bgeu $a3, $a0, .LBB3_180 .LBB3_169: # in Loop: Header=BB3_162 Depth=1 move $a0, $a3 - ld.d $a2, $sp, 208 # 8-byte Folded Reload + ld.d $a2, $sp, 200 # 8-byte Folded Reload move $a1, $t2 .p2align 4, , 16 .LBB3_170: # %.lr.ph.i532 @@ -2941,37 +2939,35 @@ Q__align: # @Q__align fldx.s $fa0, $t5, $a1 fst.s $fa0, $t2, 0 addi.d $a0, $s8, 1 - ld.d $a2, $sp, 392 # 8-byte Folded Reload + ld.d $a2, $sp, 384 # 8-byte Folded Reload bnez $a2, .LBB3_161 # %bb.172: # %.lr.ph621.preheader # in Loop: Header=BB3_162 Depth=1 move $a2, $zero move $a3, $zero move $a4, $zero - ld.d $a5, $sp, 280 # 8-byte Folded Reload - fldx.s $fa0, $a5, $a1 ld.d $a5, $sp, 272 # 8-byte Folded Reload - fldx.s $fa1, $a5, $a1 + fldx.s $fa0, $a5, $a1 ld.d $a5, $sp, 264 # 8-byte Folded Reload - fldx.s $fa2, $a5, $a1 + fldx.s $fa1, $a5, $a1 ld.d $a5, $sp, 256 # 8-byte Folded Reload + fldx.s $fa2, $a5, $a1 + ld.d $a5, $sp, 248 # 8-byte Folded Reload fldx.s $fa3, $a5, $a1 - ld.d $a6, $sp, 248 # 8-byte Folded Reload + ld.d $a6, $sp, 240 # 8-byte Folded Reload fldx.s $fa4, $a6, $a1 slli.d $a5, $a0, 2 fldx.s $fa5, $a6, $a5 - ld.d $a5, $sp, 240 # 8-byte Folded Reload + ld.d $a5, $sp, 232 # 8-byte Folded Reload fldx.s $fa6, $a5, $a1 fld.s $ft0, $t4, 0 - ld.d $a5, $sp, 328 # 8-byte Folded Reload - fld.s $ft1, $a5, %pc_lo12(.LCPI3_2) slli.d $a5, $s8, 3 - ld.d $a6, $sp, 216 # 8-byte Folded Reload + ld.d $a6, $sp, 208 # 8-byte Folded Reload ldx.d $a6, $a6, $a5 - ld.d $a5, $sp, 232 # 8-byte Folded Reload + ld.d $a5, $sp, 224 # 8-byte Folded Reload fldx.s $fa7, $a5, $a1 - vld $vr10, $sp, 336 # 16-byte Folded Reload - fmadd.s $ft0, $ft2, $ft1, $ft0 + vld $vr9, $sp, 320 # 16-byte Folded Reload + fmadd.s $ft0, $ft1, $fs1, $ft0 addi.d $a5, $t2, 4 addi.d $a6, $a6, 4 addi.d $a7, $zero, -1 @@ -3050,7 +3046,7 @@ Q__align: # @Q__align add.d $a1, $t2, $a1 addi.d $a2, $t2, 16 addi.d $a3, $a3, 16 - ld.d $a4, $sp, 96 # 8-byte Folded Reload + ld.d $a4, $sp, 88 # 8-byte Folded Reload .p2align 4, , 16 .LBB3_181: # %vector.body1215 # Parent Loop BB3_162 Depth=1 @@ -3070,8 +3066,8 @@ Q__align: # @Q__align # %bb.182: # %middle.block1226 # in Loop: Header=BB3_162 Depth=1 ld.d $a2, $sp, 32 # 8-byte Folded Reload - ld.d $a3, $sp, 48 # 8-byte Folded Reload - ld.d $a4, $sp, 96 # 8-byte Folded Reload + ld.d $a3, $sp, 80 # 8-byte Folded Reload + ld.d $a4, $sp, 88 # 8-byte Folded Reload bne $a3, $a4, .LBB3_170 b .LBB3_171 .LBB3_183: @@ -3079,23 +3075,23 @@ Q__align: # @Q__align bnez $a1, .LBB3_199 b .LBB3_185 .LBB3_184: # %._crit_edge631.loopexit - ld.d $s0, $sp, 24 # 8-byte Folded Reload + ld.d $s0, $sp, 16 # 8-byte Folded Reload ld.w $a1, $s0, 0 move $t3, $t2 - ld.d $s7, $sp, 64 # 8-byte Folded Reload - ld.d $s5, $sp, 168 # 8-byte Folded Reload - ld.d $s4, $sp, 88 # 8-byte Folded Reload - ld.d $s2, $sp, 120 # 8-byte Folded Reload - ld.d $s8, $sp, 208 # 8-byte Folded Reload + ld.d $s7, $sp, 56 # 8-byte Folded Reload + ld.d $s5, $sp, 160 # 8-byte Folded Reload + ld.d $s4, $sp, 72 # 8-byte Folded Reload + ld.d $s2, $sp, 112 # 8-byte Folded Reload + ld.d $s6, $sp, 200 # 8-byte Folded Reload bnez $a1, .LBB3_199 .LBB3_185: # %.preheader555 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload bnez $a0, .LBB3_192 # %bb.186: # %.lr.ph636 pcalau12i $a0, %got_pc_hi20(offset) ld.d $a0, $a0, %got_pc_lo12(offset) ld.w $a0, $a0, 0 - addi.d $a1, $s8, 1 + addi.d $a1, $s6, 1 bstrpick.d $a1, $a1, 31, 0 addi.d $a2, $a1, -1 ori $a4, $zero, 4 @@ -3108,11 +3104,11 @@ Q__align: # @Q__align move $a3, $a2 bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 - vreplgr2vr.d $vr1, $s8 - pcalau12i $a5, %pc_hi20(.LCPI3_3) - vld $vr2, $a5, %pc_lo12(.LCPI3_3) - pcalau12i $a5, %pc_hi20(.LCPI3_4) - vld $vr3, $a5, %pc_lo12(.LCPI3_4) + vreplgr2vr.d $vr1, $s6 + pcalau12i $a5, %pc_hi20(.LCPI3_1) + vld $vr2, $a5, %pc_lo12(.LCPI3_1) + pcalau12i $a5, %pc_hi20(.LCPI3_2) + vld $vr3, $a5, %pc_lo12(.LCPI3_2) addi.d $a5, $t3, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr4, $a6 @@ -3173,7 +3169,7 @@ Q__align: # @Q__align # %bb.189: # %middle.block1247 beq $a2, $a4, .LBB3_192 .LBB3_190: # %scalar.ph1231.preheader - sub.w $a2, $s8, $a3 + sub.w $a2, $s6, $a3 mul.d $a2, $a0, $a2 alsl.d $a4, $a3, $t3, 2 sub.d $a1, $a1, $a3 @@ -3204,7 +3200,7 @@ Q__align: # @Q__align ffint.d.w $fa1, $fa0 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(Q__align.lastverticalw) fneg.d $fa1, $fa1 addi.d $a0, $s4, 1 @@ -3272,32 +3268,32 @@ Q__align: # @Q__align addi.d $a1, $a1, 4 bnez $a2, .LBB3_198 .LBB3_199: # %.loopexit - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $s3, $a0, %pc_lo12(Q__align.lastverticalw) - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload ld.d $s6, $a0, %pc_lo12(Q__align.mseq1) - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.d $s4, $a0, %pc_lo12(Q__align.mseq2) - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload ld.d $s2, $a0, %pc_lo12(Q__align.ijp) - ld.d $a0, $sp, 200 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload beqz $a0, .LBB3_201 # %bb.200: st.d $s7, $sp, 0 move $a0, $t3 move $a1, $s3 - ld.d $a2, $sp, 160 # 8-byte Folded Reload + ld.d $a2, $sp, 152 # 8-byte Folded Reload move $a3, $s5 move $a4, $s6 move $a5, $s4 move $a6, $s2 - ld.d $s0, $sp, 176 # 8-byte Folded Reload + ld.d $s0, $sp, 168 # 8-byte Folded Reload move $a7, $s0 pcaddu18i $ra, %call36(Atracking) jirl $ra, $ra, 0 b .LBB3_280 .LBB3_201: - ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, 0 move $fp, $t3 pcaddu18i $ra, %call36(strlen) @@ -3309,7 +3305,7 @@ Q__align: # @Q__align jirl $ra, $ra, 0 move $s5, $a0 add.w $a0, $a0, $s7 - st.d $a0, $sp, 424 # 8-byte Folded Spill + st.d $a0, $sp, 416 # 8-byte Folded Spill addi.w $s8, $a0, 1 move $a0, $s8 pcaddu18i $ra, %call36(AllocateCharVec) @@ -3321,9 +3317,9 @@ Q__align: # @Q__align ld.w $a1, $s0, 0 ori $a3, $zero, 1 addi.w $ra, $s7, 0 - st.d $s5, $sp, 416 # 8-byte Folded Spill + st.d $s5, $sp, 408 # 8-byte Folded Spill addi.w $a2, $s5, 0 - ld.d $s8, $sp, 80 # 8-byte Folded Reload + ld.d $s8, $sp, 24 # 8-byte Folded Reload beq $a1, $a3, .LBB3_212 # %bb.202: move $a7, $fp @@ -3333,7 +3329,7 @@ Q__align: # @Q__align slli.d $a1, $s7, 3 bstrpick.d $a1, $a1, 33, 3 slli.d $a1, $a1, 3 - ld.d $a3, $sp, 416 # 8-byte Folded Reload + ld.d $a3, $sp, 408 # 8-byte Folded Reload slli.d $a3, $a3, 32 srai.d $a3, $a3, 30 bstrpick.d $a4, $s7, 30, 0 @@ -3359,7 +3355,7 @@ Q__align: # @Q__align # %bb.208: # %.lr.ph7.i slli.d $a1, $s7, 32 srai.d $a1, $a1, 29 - ld.d $a5, $sp, 416 # 8-byte Folded Reload + ld.d $a5, $sp, 408 # 8-byte Folded Reload slli.d $a3, $a5, 2 bstrpick.d $a3, $a3, 32, 2 slli.d $a3, $a3, 2 @@ -3490,8 +3486,8 @@ Q__align: # @Q__align bne $a5, $t0, .LBB3_129 b .LBB3_131 .LBB3_227: # %vector.ph1265 - pcalau12i $a4, %pc_hi20(.LCPI3_5) - vld $vr0, $a4, %pc_lo12(.LCPI3_5) + pcalau12i $a4, %pc_hi20(.LCPI3_3) + vld $vr0, $a4, %pc_lo12(.LCPI3_3) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 addi.d $a4, $s2, 16 @@ -3536,7 +3532,7 @@ Q__align: # @Q__align bltz $a2, .LBB3_240 # %bb.233: # %.lr.ph13.i ld.d $a3, $s2, 0 - ld.d $a4, $sp, 416 # 8-byte Folded Reload + ld.d $a4, $sp, 408 # 8-byte Folded Reload addi.d $a5, $a4, 1 bstrpick.d $a4, $a5, 31, 0 ori $a6, $zero, 8 @@ -3547,8 +3543,8 @@ Q__align: # @Q__align .LBB3_235: # %vector.ph1280 bstrpick.d $a5, $a5, 31, 3 slli.d $a5, $a5, 3 - pcalau12i $a6, %pc_hi20(.LCPI3_6) - vld $vr0, $a6, %pc_lo12(.LCPI3_6) + pcalau12i $a6, %pc_hi20(.LCPI3_4) + vld $vr0, $a6, %pc_lo12(.LCPI3_4) addi.d $a6, $a3, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 @@ -3579,16 +3575,16 @@ Q__align: # @Q__align addi.d $a6, $a6, -1 bnez $a4, .LBB3_239 .LBB3_240: # %._crit_edge.i - st.d $s1, $sp, 408 # 8-byte Folded Spill + st.d $s1, $sp, 400 # 8-byte Folded Spill add.d $a3, $s1, $ra add.d $s3, $a3, $a2 stx.b $zero, $a3, $a2 - st.d $a0, $sp, 400 # 8-byte Folded Spill + st.d $a0, $sp, 392 # 8-byte Folded Spill add.d $a3, $a0, $ra add.d $s5, $a3, $a2 stx.b $zero, $a3, $a2 st.w $zero, $a1, 0 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + ld.d $a0, $sp, 416 # 8-byte Folded Reload bltz $a0, .LBB3_272 # %bb.241: # %.lr.ph41.i.preheader move $a7, $zero @@ -3599,7 +3595,7 @@ Q__align: # @Q__align vrepli.b $vr1, 45 vrepli.b $vr2, 111 ori $a5, $zero, 16 - ld.d $a6, $sp, 416 # 8-byte Folded Reload + ld.d $a6, $sp, 408 # 8-byte Folded Reload move $a0, $s2 .p2align 4, , 16 .LBB3_242: # %.lr.ph41.i @@ -3766,11 +3762,11 @@ Q__align: # @Q__align beq $t1, $ra, .LBB3_268 .LBB3_266: # %._crit_edge30.i # in Loop: Header=BB3_242 Depth=1 - ld.d $t8, $sp, 416 # 8-byte Folded Reload + ld.d $t8, $sp, 408 # 8-byte Folded Reload addi.w $t8, $t8, 0 beq $t2, $t8, .LBB3_268 # %bb.267: # in Loop: Header=BB3_242 Depth=1 - ld.d $t8, $sp, 152 # 8-byte Folded Reload + ld.d $t8, $sp, 144 # 8-byte Folded Reload ld.d $t8, $t8, %pc_lo12(impmtx) ldx.d $t4, $t8, $t4 fldx.s $fa3, $t4, $t5 @@ -3789,7 +3785,7 @@ Q__align: # @Q__align addi.w $a7, $a7, 2 st.b $a2, $t6, -1 move $s7, $t0 - ld.d $t0, $sp, 424 # 8-byte Folded Reload + ld.d $t0, $sp, 416 # 8-byte Folded Reload bge $t0, $a7, .LBB3_242 b .LBB3_272 .p2align 4, , 16 @@ -3801,17 +3797,17 @@ Q__align: # @Q__align bnez $t7, .LBB3_245 b .LBB3_251 .LBB3_272: # %._crit_edge42.i - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload bgtz $a0, .LBB3_274 b .LBB3_276 .LBB3_273: move $s3, $t7 move $s5, $t6 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload blez $a0, .LBB3_276 .LBB3_274: # %.lr.ph50.preheader.i - ld.d $fp, $sp, 160 # 8-byte Folded Reload - ld.d $s0, $sp, 176 # 8-byte Folded Reload + ld.d $fp, $sp, 152 # 8-byte Folded Reload + ld.d $s0, $sp, 168 # 8-byte Folded Reload .p2align 4, , 16 .LBB3_275: # %.lr.ph50.i # =>This Inner Loop Header: Depth=1 @@ -3825,10 +3821,10 @@ Q__align: # @Q__align addi.d $s6, $s6, 8 bnez $s0, .LBB3_275 .LBB3_276: # %.preheader.i - ld.d $s7, $sp, 64 # 8-byte Folded Reload + ld.d $s7, $sp, 56 # 8-byte Folded Reload blez $s7, .LBB3_279 # %bb.277: # %.lr.ph52.preheader.i - ld.d $fp, $sp, 168 # 8-byte Folded Reload + ld.d $fp, $sp, 160 # 8-byte Folded Reload move $s0, $s7 .p2align 4, , 16 .LBB3_278: # %.lr.ph52.i @@ -3843,32 +3839,32 @@ Q__align: # @Q__align addi.d $s4, $s4, 8 bnez $s0, .LBB3_278 .LBB3_279: # %Atracking_localhom.exit - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 400 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $s5, $sp, 168 # 8-byte Folded Reload - ld.d $s0, $sp, 176 # 8-byte Folded Reload + ld.d $s5, $sp, 160 # 8-byte Folded Reload + ld.d $s0, $sp, 168 # 8-byte Folded Reload .LBB3_280: - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align.mseq1) ld.d $a0, $a0, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 addi.w $a3, $a0, 0 lu12i.w $a4, 1220 - ld.d $a2, $sp, 56 # 8-byte Folded Reload + ld.d $a2, $sp, 48 # 8-byte Folded Reload blt $a2, $a3, .LBB3_293 # %bb.281: ori $a0, $a4, 2881 bge $a3, $a0, .LBB3_293 # %bb.282: - ld.d $s1, $sp, 160 # 8-byte Folded Reload + ld.d $s1, $sp, 152 # 8-byte Folded Reload blez $s0, .LBB3_285 .LBB3_283: # %.lr.ph642 - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(Q__align.mseq1) .p2align 4, , 16 .LBB3_284: # =>This Inner Loop Header: Depth=1 @@ -3883,7 +3879,7 @@ Q__align: # @Q__align .LBB3_285: # %.preheader blez $s7, .LBB3_288 # %bb.286: # %.lr.ph644 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(Q__align.mseq2) .p2align 4, , 16 .LBB3_287: # =>This Inner Loop Header: Depth=1 @@ -3897,6 +3893,7 @@ Q__align: # @Q__align bnez $s7, .LBB3_287 .LBB3_288: # %._crit_edge645 fmov.s $fa0, $fs0 + fld.d $fs1, $sp, 424 # 8-byte Folded Reload fld.d $fs0, $sp, 432 # 8-byte Folded Reload ld.d $s8, $sp, 440 # 8-byte Folded Reload ld.d $s7, $sp, 448 # 8-byte Folded Reload @@ -3914,14 +3911,14 @@ Q__align: # @Q__align .LBB3_289: movgr2fr.w $fa0, $zero ori $a1, $zero, 1 - st.d $a1, $sp, 392 # 8-byte Folded Spill + st.d $a1, $sp, 384 # 8-byte Folded Spill b .LBB3_159 .LBB3_290: # %vector.ph1061 - bstrpick.d $a0, $s8, 31, 3 + bstrpick.d $a0, $s6, 31, 3 slli.d $a4, $a0, 3 slli.d $a2, $a0, 5 add.d $a1, $a5, $a2 - andi $a0, $s8, 7 + andi $a0, $s6, 7 add.d $a2, $t3, $a2 addi.d $a6, $t3, 16 addi.d $a5, $a5, 16 @@ -3957,14 +3954,14 @@ Q__align: # @Q__align addi.d $a0, $a0, %pc_lo12(.L.str.1) pcaddu18i $ra, %call36(ErrorExit) jirl $ra, $ra, 0 - ld.d $s1, $sp, 160 # 8-byte Folded Reload + ld.d $s1, $sp, 152 # 8-byte Folded Reload bgtz $s0, .LBB3_283 b .LBB3_285 .LBB3_294: - ld.d $s2, $sp, 120 # 8-byte Folded Reload + ld.d $s2, $sp, 112 # 8-byte Folded Reload b .LBB3_27 .LBB3_295: - ld.d $s2, $sp, 120 # 8-byte Folded Reload + ld.d $s2, $sp, 112 # 8-byte Folded Reload b .LBB3_32 .Lfunc_end3: .size Q__align, .Lfunc_end3-Q__align @@ -4879,35 +4876,27 @@ Atracking: # @Atracking .Lfunc_end5: .size Atracking, .Lfunc_end5-Atracking # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Q__align_gapmap -.LCPI6_0: - .dword 0x3ff4cccccccccccd # double 1.3 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI6_1: + .p2align 4, 0x0 # -- Begin function Q__align_gapmap +.LCPI6_0: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI6_3: +.LCPI6_1: .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI6_4: +.LCPI6_2: .dword 1 # 0x1 .dword 2 # 0x2 -.LCPI6_5: +.LCPI6_3: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI6_6: +.LCPI6_4: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI6_2: - .word 0x461c4000 # float 1.0E+4 .text .globl Q__align_gapmap .p2align 5 @@ -4927,10 +4916,11 @@ Q__align_gapmap: # @Q__align_gapmap st.d $s7, $sp, 464 # 8-byte Folded Spill st.d $s8, $sp, 456 # 8-byte Folded Spill fst.d $fs0, $sp, 448 # 8-byte Folded Spill - st.d $a7, $sp, 128 # 8-byte Folded Spill - st.d $a6, $sp, 64 # 8-byte Folded Spill - st.d $a5, $sp, 392 # 8-byte Folded Spill - st.d $a4, $sp, 440 # 8-byte Folded Spill + fst.d $fs1, $sp, 440 # 8-byte Folded Spill + st.d $a7, $sp, 64 # 8-byte Folded Spill + st.d $a6, $sp, 72 # 8-byte Folded Spill + st.d $a5, $sp, 384 # 8-byte Folded Spill + st.d $a4, $sp, 432 # 8-byte Folded Spill st.d $a3, $sp, 56 # 8-byte Folded Spill move $s8, $a2 move $s7, $a1 @@ -4943,7 +4933,7 @@ Q__align_gapmap: # @Q__align_gapmap ld.w $fp, $a0, 0 pcalau12i $s3, %pc_hi20(Q__align_gapmap.mseq1) pcalau12i $a0, %pc_hi20(Q__align_gapmap.mseq2) - st.d $a0, $sp, 112 # 8-byte Folded Spill + st.d $a0, $sp, 120 # 8-byte Folded Spill bnez $s6, .LBB6_2 # %bb.1: pcalau12i $a0, %got_pc_hi20(njob) @@ -4962,10 +4952,10 @@ Q__align_gapmap: # @Q__align_gapmap jirl $ra, $ra, 0 ld.d $a1, $sp, 304 # 8-byte Folded Reload ld.w $s6, $a1, %pc_lo12(Q__align_gapmap.orlgth1) - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.mseq2) .LBB6_2: - st.d $s0, $sp, 432 # 8-byte Folded Spill + st.d $s0, $sp, 424 # 8-byte Folded Spill ld.d $a0, $s0, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 @@ -4976,80 +4966,80 @@ Q__align_gapmap: # @Q__align_gapmap pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 pcalau12i $a1, %pc_hi20(Q__align_gapmap.orlgth2) - st.d $a1, $sp, 72 # 8-byte Folded Spill + st.d $a1, $sp, 80 # 8-byte Folded Spill ld.w $s0, $a1, %pc_lo12(Q__align_gapmap.orlgth2) - st.d $a0, $sp, 328 # 8-byte Folded Spill + st.d $a0, $sp, 320 # 8-byte Folded Spill addi.w $a6, $a0, 0 pcalau12i $a0, %pc_hi20(Q__align_gapmap.w1) - st.d $a0, $sp, 272 # 8-byte Folded Spill + st.d $a0, $sp, 160 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.w2) st.d $a0, $sp, 152 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.initverticalw) - st.d $a0, $sp, 248 # 8-byte Folded Spill + st.d $a0, $sp, 256 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.lastverticalw) - st.d $a0, $sp, 120 # 8-byte Folded Spill + st.d $a0, $sp, 128 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.m) - st.d $a0, $sp, 224 # 8-byte Folded Spill + st.d $a0, $sp, 232 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.mp) - st.d $a0, $sp, 88 # 8-byte Folded Spill + st.d $a0, $sp, 96 # 8-byte Folded Spill pcalau12i $a7, %pc_hi20(Q__align_gapmap.mseq) pcalau12i $a0, %pc_hi20(Q__align_gapmap.digf1) - st.d $a0, $sp, 376 # 8-byte Folded Spill + st.d $a0, $sp, 368 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.digf2) - st.d $a0, $sp, 360 # 8-byte Folded Spill + st.d $a0, $sp, 352 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.diaf1) st.d $a0, $sp, 144 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.diaf2) st.d $a0, $sp, 280 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.gapz1) - st.d $a0, $sp, 384 # 8-byte Folded Spill + st.d $a0, $sp, 376 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.gapz2) - st.d $a0, $sp, 368 # 8-byte Folded Spill + st.d $a0, $sp, 360 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.gapf1) st.d $a0, $sp, 288 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.gapf2) st.d $a0, $sp, 296 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.ogcp1g) - st.d $a0, $sp, 424 # 8-byte Folded Spill + st.d $a0, $sp, 416 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.ogcp2g) - st.d $a0, $sp, 408 # 8-byte Folded Spill + st.d $a0, $sp, 400 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.fgcp1g) - st.d $a0, $sp, 416 # 8-byte Folded Spill + st.d $a0, $sp, 408 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.fgcp2g) - st.d $a0, $sp, 400 # 8-byte Folded Spill + st.d $a0, $sp, 392 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.og_h_dg_n1_p) - st.d $a0, $sp, 336 # 8-byte Folded Spill + st.d $a0, $sp, 328 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.og_h_dg_n2_p) - st.d $a0, $sp, 216 # 8-byte Folded Spill + st.d $a0, $sp, 224 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.fg_h_dg_n1_p) - st.d $a0, $sp, 264 # 8-byte Folded Spill + st.d $a0, $sp, 272 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.fg_h_dg_n2_p) - st.d $a0, $sp, 208 # 8-byte Folded Spill + st.d $a0, $sp, 216 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.og_t_fg_h_dg_n1_p) - st.d $a0, $sp, 200 # 8-byte Folded Spill + st.d $a0, $sp, 208 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.og_t_fg_h_dg_n2_p) - st.d $a0, $sp, 176 # 8-byte Folded Spill + st.d $a0, $sp, 184 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.fg_t_og_h_dg_n1_p) - st.d $a0, $sp, 192 # 8-byte Folded Spill + st.d $a0, $sp, 200 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.fg_t_og_h_dg_n2_p) - st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $a0, $sp, 168 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.gapz_n1) - st.d $a0, $sp, 184 # 8-byte Folded Spill + st.d $a0, $sp, 192 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.gapz_n2) - st.d $a0, $sp, 168 # 8-byte Folded Spill + st.d $a0, $sp, 176 # 8-byte Folded Spill pcalau12i $a1, %pc_hi20(Q__align_gapmap.cpmx1) pcalau12i $a0, %pc_hi20(Q__align_gapmap.cpmx2) - st.d $a0, $sp, 352 # 8-byte Folded Spill + st.d $a0, $sp, 336 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.floatwork) - st.d $a0, $sp, 240 # 8-byte Folded Spill + st.d $a0, $sp, 248 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(Q__align_gapmap.intwork) - st.d $a0, $sp, 232 # 8-byte Folded Spill - st.d $s3, $sp, 104 # 8-byte Folded Spill - st.d $s2, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 240 # 8-byte Folded Spill + st.d $s3, $sp, 112 # 8-byte Folded Spill + st.d $s2, $sp, 104 # 8-byte Folded Spill st.d $a6, $sp, 344 # 8-byte Folded Spill move $s4, $a1 - st.d $a1, $sp, 256 # 8-byte Folded Spill - st.d $s1, $sp, 80 # 8-byte Folded Spill + st.d $a1, $sp, 264 # 8-byte Folded Spill + st.d $s1, $sp, 88 # 8-byte Folded Spill st.d $s8, $sp, 136 # 8-byte Folded Spill blt $s6, $s2, .LBB6_4 # %bb.3: @@ -5063,7 +5053,7 @@ Q__align_gapmap: # @Q__align_gapmap # %bb.5: blez $s0, .LBB6_7 # %bb.6: - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.w1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -5074,19 +5064,19 @@ Q__align_gapmap: # @Q__align_gapmap ld.d $a0, $fp, %pc_lo12(Q__align_gapmap.match) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 256 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.initverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.lastverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.m) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.mp) pcaddu18i $ra, %call36(FreeIntVec) jirl $ra, $ra, 0 @@ -5094,11 +5084,11 @@ Q__align_gapmap: # @Q__align_gapmap ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.mseq) pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.digf1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.digf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -5110,11 +5100,11 @@ Q__align_gapmap: # @Q__align_gapmap ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.diaf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.gapz1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.gapz2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -5126,98 +5116,101 @@ Q__align_gapmap: # @Q__align_gapmap ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.gapf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + ld.d $a0, $sp, 416 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.ogcp1g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 400 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.ogcp2g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.fgcp1g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.fgcp2g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.og_h_dg_n1_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 216 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.og_h_dg_n2_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 264 # 8-byte Folded Reload + ld.d $a0, $sp, 272 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.fg_h_dg_n1_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 208 # 8-byte Folded Reload + ld.d $a0, $sp, 216 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.fg_h_dg_n2_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 200 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n1_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n2_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n1_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n2_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.gapz_n1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 168 # 8-byte Folded Reload + ld.d $a0, $sp, 176 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.gapz_n2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a0, $s3, %pc_lo12(Q__align_gapmap.cpmx1) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 336 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.cpmx2) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 248 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.floatwork) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 232 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.intwork) pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 ld.d $a0, $sp, 304 # 8-byte Folded Reload ld.w $s6, $a0, %pc_lo12(Q__align_gapmap.orlgth1) - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload ld.w $s0, $a0, %pc_lo12(Q__align_gapmap.orlgth2) .LBB6_7: - pcalau12i $a0, %pc_hi20(.LCPI6_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI6_0) - movgr2fr.w $fa1, $s1 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + movgr2fr.w $fa0, $s1 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, 314572 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 slt $a1, $a0, $s6 masknez $a0, $a0, $a1 maskeqz $a1, $s6, $a1 or $s6, $a1, $a0 addi.w $s1, $s6, 100 - ld.d $a0, $sp, 328 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + ld.d $a0, $sp, 320 # 8-byte Folded Reload + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 slt $a1, $a0, $s0 @@ -5229,7 +5222,7 @@ Q__align_gapmap: # @Q__align_gapmap move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 272 # 8-byte Folded Reload + ld.d $a1, $sp, 160 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.w1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -5244,22 +5237,22 @@ Q__align_gapmap: # @Q__align_gapmap move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 248 # 8-byte Folded Reload + ld.d $a1, $sp, 256 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.initverticalw) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 120 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.lastverticalw) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 224 # 8-byte Folded Reload + ld.d $a1, $sp, 232 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.m) move $a0, $s4 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.mp) pcalau12i $a0, %got_pc_hi20(njob) ld.d $a0, $a0, %got_pc_lo12(njob) @@ -5272,12 +5265,12 @@ Q__align_gapmap: # @Q__align_gapmap move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 376 # 8-byte Folded Reload + ld.d $a1, $sp, 368 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.digf1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 360 # 8-byte Folded Reload + ld.d $a1, $sp, 352 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.digf2) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -5292,12 +5285,12 @@ Q__align_gapmap: # @Q__align_gapmap move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 384 # 8-byte Folded Reload + ld.d $a1, $sp, 376 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.gapz1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 368 # 8-byte Folded Reload + ld.d $a1, $sp, 360 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.gapz2) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -5312,72 +5305,72 @@ Q__align_gapmap: # @Q__align_gapmap move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 424 # 8-byte Folded Reload + ld.d $a1, $sp, 416 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.ogcp1g) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 408 # 8-byte Folded Reload + ld.d $a1, $sp, 400 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.ogcp2g) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 416 # 8-byte Folded Reload + ld.d $a1, $sp, 408 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.fgcp1g) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 400 # 8-byte Folded Reload + ld.d $a1, $sp, 392 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.fgcp2g) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 336 # 8-byte Folded Reload + ld.d $a1, $sp, 328 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.og_h_dg_n1_p) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 216 # 8-byte Folded Reload + ld.d $a1, $sp, 224 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.og_h_dg_n2_p) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 264 # 8-byte Folded Reload + ld.d $a1, $sp, 272 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.fg_h_dg_n1_p) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 208 # 8-byte Folded Reload + ld.d $a1, $sp, 216 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.fg_h_dg_n2_p) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 200 # 8-byte Folded Reload + ld.d $a1, $sp, 208 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n1_p) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 176 # 8-byte Folded Reload + ld.d $a1, $sp, 184 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n2_p) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 192 # 8-byte Folded Reload + ld.d $a1, $sp, 200 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n1_p) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 168 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n2_p) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 184 # 8-byte Folded Reload + ld.d $a1, $sp, 192 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.gapz_n1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 168 # 8-byte Folded Reload + ld.d $a1, $sp, 176 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.gapz_n2) ori $a0, $zero, 26 move $a1, $s5 @@ -5389,7 +5382,7 @@ Q__align_gapmap: # @Q__align_gapmap ld.d $s8, $sp, 304 # 8-byte Folded Reload pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 352 # 8-byte Folded Reload + ld.d $a1, $sp, 336 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.cpmx2) slt $a0, $s2, $s1 masknez $a1, $s2, $a0 @@ -5400,33 +5393,33 @@ Q__align_gapmap: # @Q__align_gapmap move $a0, $s3 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 240 # 8-byte Folded Reload + ld.d $a1, $sp, 248 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.floatwork) ori $a1, $zero, 27 move $a0, $s3 - ld.d $s4, $sp, 256 # 8-byte Folded Reload + ld.d $s4, $sp, 264 # 8-byte Folded Reload pcaddu18i $ra, %call36(AllocateIntMtx) jirl $ra, $ra, 0 move $a7, $fp - ld.d $a1, $sp, 232 # 8-byte Folded Reload + ld.d $a1, $sp, 240 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(Q__align_gapmap.intwork) st.w $s6, $s8, %pc_lo12(Q__align_gapmap.orlgth1) - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload st.w $s0, $a0, %pc_lo12(Q__align_gapmap.orlgth2) - ld.d $s3, $sp, 104 # 8-byte Folded Reload - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s3, $sp, 112 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload ld.d $a6, $sp, 344 # 8-byte Folded Reload ld.d $s8, $sp, 136 # 8-byte Folded Reload ld.d $fp, $sp, 40 # 8-byte Folded Reload .LBB6_8: - ld.d $a5, $sp, 392 # 8-byte Folded Reload - ld.d $a0, $sp, 440 # 8-byte Folded Reload + ld.d $a5, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 432 # 8-byte Folded Reload blez $a0, .LBB6_11 # %bb.9: # %.lr.ph ld.d $a0, $a7, %pc_lo12(Q__align_gapmap.mseq) ld.d $a1, $s3, %pc_lo12(Q__align_gapmap.mseq1) - ld.d $a2, $sp, 440 # 8-byte Folded Reload - ld.d $a3, $sp, 432 # 8-byte Folded Reload + ld.d $a2, $sp, 432 # 8-byte Folded Reload + ld.d $a3, $sp, 424 # 8-byte Folded Reload .p2align 4, , 16 .LBB6_10: # =>This Inner Loop Header: Depth=1 ld.d $a4, $a0, 0 @@ -5442,9 +5435,9 @@ Q__align_gapmap: # @Q__align_gapmap blez $a5, .LBB6_14 # %bb.12: # %.lr.ph564 ld.d $a1, $a7, %pc_lo12(Q__align_gapmap.mseq) - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.mseq2) - ld.d $a2, $sp, 440 # 8-byte Folded Reload + ld.d $a2, $sp, 432 # 8-byte Folded Reload alsl.d $a1, $a2, $a1, 3 move $a2, $a5 move $a3, $s7 @@ -5489,7 +5482,7 @@ Q__align_gapmap: # @Q__align_gapmap ld.d $a0, $sp, 304 # 8-byte Folded Reload ld.w $s6, $a0, %pc_lo12(Q__align_gapmap.orlgth1) ld.w $a0, $fp, 0 - ld.d $a1, $sp, 72 # 8-byte Folded Reload + ld.d $a1, $sp, 80 # 8-byte Folded Reload ld.w $s0, $a1, %pc_lo12(Q__align_gapmap.orlgth2) ld.w $a1, $s1, 0 .LBB6_20: @@ -5510,34 +5503,34 @@ Q__align_gapmap: # @Q__align_gapmap st.d $a0, $a1, 0 st.w $s2, $fp, 0 st.w $s0, $s1, 0 - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload .LBB6_21: ld.d $a1, $s4, %pc_lo12(Q__align_gapmap.cpmx1) ffint.s.w $fa0, $fs0 vst $vr0, $sp, 304 # 16-byte Folded Spill pcalau12i $a2, %pc_hi20(Q__align_gapmap.ijp) - st.d $a2, $sp, 72 # 8-byte Folded Spill + st.d $a2, $sp, 80 # 8-byte Folded Spill st.d $a0, $a2, %pc_lo12(Q__align_gapmap.ijp) - ld.d $s3, $sp, 432 # 8-byte Folded Reload + ld.d $s3, $sp, 424 # 8-byte Folded Reload move $a0, $s3 move $a2, $s8 move $a3, $s2 - ld.d $s6, $sp, 440 # 8-byte Folded Reload + ld.d $s6, $sp, 432 # 8-byte Folded Reload move $a4, $s6 pcaddu18i $ra, %call36(cpmx_calc_new) jirl $ra, $ra, 0 - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 336 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(Q__align_gapmap.cpmx2) move $a0, $s7 ld.d $s4, $sp, 56 # 8-byte Folded Reload move $a2, $s4 ld.d $s1, $sp, 344 # 8-byte Folded Reload move $a3, $s1 - ld.d $fp, $sp, 392 # 8-byte Folded Reload + ld.d $fp, $sp, 384 # 8-byte Folded Reload move $a4, $fp pcaddu18i $ra, %call36(cpmx_calc_new) jirl $ra, $ra, 0 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + ld.d $a0, $sp, 416 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.ogcp1g) beqz $s5, .LBB6_23 # %bb.22: @@ -5545,7 +5538,7 @@ Q__align_gapmap: # @Q__align_gapmap st.d $a1, $sp, 40 # 8-byte Folded Spill ld.d $a6, $sp, 568 ld.d $s6, $sp, 560 - ld.d $a1, $sp, 440 # 8-byte Folded Reload + ld.d $a1, $sp, 432 # 8-byte Folded Reload move $a2, $s3 move $a3, $s8 move $a4, $s2 @@ -5556,7 +5549,7 @@ Q__align_gapmap: # @Q__align_gapmap move $s3, $a6 pcaddu18i $ra, %call36(new_OpeningGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 400 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.ogcp2g) move $a1, $fp move $a2, $s7 @@ -5569,9 +5562,9 @@ Q__align_gapmap: # @Q__align_gapmap move $a6, $s6 pcaddu18i $ra, %call36(new_OpeningGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.fgcp1g) - ld.d $a1, $sp, 440 # 8-byte Folded Reload + ld.d $a1, $sp, 432 # 8-byte Folded Reload move $a2, $s8 move $s8, $s1 ld.d $s1, $sp, 136 # 8-byte Folded Reload @@ -5583,7 +5576,7 @@ Q__align_gapmap: # @Q__align_gapmap st.d $s3, $sp, 32 # 8-byte Folded Spill pcaddu18i $ra, %call36(new_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.fgcp2g) move $a1, $fp move $a2, $s7 @@ -5594,10 +5587,10 @@ Q__align_gapmap: # @Q__align_gapmap move $s3, $s6 pcaddu18i $ra, %call36(new_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.digf1) - ld.d $a1, $sp, 440 # 8-byte Folded Reload - ld.d $fp, $sp, 432 # 8-byte Folded Reload + ld.d $a1, $sp, 432 # 8-byte Folded Reload + ld.d $fp, $sp, 424 # 8-byte Folded Reload move $a2, $fp move $a3, $s1 move $a4, $s2 @@ -5606,9 +5599,9 @@ Q__align_gapmap: # @Q__align_gapmap move $a6, $s0 pcaddu18i $ra, %call36(getdigapfreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.digf2) - ld.d $s0, $sp, 392 # 8-byte Folded Reload + ld.d $s0, $sp, 384 # 8-byte Folded Reload move $a1, $s0 move $a2, $s7 move $s6, $s4 @@ -5621,7 +5614,7 @@ Q__align_gapmap: # @Q__align_gapmap jirl $ra, $ra, 0 ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.diaf1) - ld.d $a1, $sp, 440 # 8-byte Folded Reload + ld.d $a1, $sp, 432 # 8-byte Folded Reload move $a2, $fp move $a3, $s1 move $a4, $s2 @@ -5638,7 +5631,7 @@ Q__align_gapmap: # @Q__align_gapmap move $s3, $s6 move $a4, $s8 move $a5, $s4 - ld.d $s6, $sp, 440 # 8-byte Folded Reload + ld.d $s6, $sp, 432 # 8-byte Folded Reload ld.d $a6, $sp, 40 # 8-byte Folded Reload pcaddu18i $ra, %call36(getdiaminofreq_part) jirl $ra, $ra, 0 @@ -5660,7 +5653,7 @@ Q__align_gapmap: # @Q__align_gapmap move $a4, $s8 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.gapz1) move $a1, $s6 move $a2, $fp @@ -5670,7 +5663,7 @@ Q__align_gapmap: # @Q__align_gapmap move $a5, $s0 pcaddu18i $ra, %call36(getgapfreq_zure_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.gapz2) move $a1, $s5 move $a2, $s7 @@ -5687,7 +5680,7 @@ Q__align_gapmap: # @Q__align_gapmap move $a4, $s2 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 400 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.ogcp2g) move $a1, $fp move $a2, $s7 @@ -5695,7 +5688,7 @@ Q__align_gapmap: # @Q__align_gapmap move $a4, $s1 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 408 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.fgcp1g) move $a1, $s6 move $a2, $s3 @@ -5703,7 +5696,7 @@ Q__align_gapmap: # @Q__align_gapmap move $a4, $s2 pcaddu18i $ra, %call36(st_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.fgcp2g) move $a1, $fp move $a2, $s7 @@ -5711,7 +5704,7 @@ Q__align_gapmap: # @Q__align_gapmap move $a4, $s1 pcaddu18i $ra, %call36(st_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.digf1) move $a1, $s6 move $a2, $s3 @@ -5719,7 +5712,7 @@ Q__align_gapmap: # @Q__align_gapmap move $a4, $s2 pcaddu18i $ra, %call36(getdigapfreq_st) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.digf2) move $a1, $fp move $a2, $s7 @@ -5759,7 +5752,7 @@ Q__align_gapmap: # @Q__align_gapmap move $a4, $s1 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.gapz1) move $a1, $s6 move $a2, $s3 @@ -5767,7 +5760,7 @@ Q__align_gapmap: # @Q__align_gapmap move $a4, $s2 pcaddu18i $ra, %call36(getgapfreq_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.gapz2) move $a1, $fp move $a2, $s7 @@ -5776,35 +5769,35 @@ Q__align_gapmap: # @Q__align_gapmap pcaddu18i $ra, %call36(getgapfreq_zure) jirl $ra, $ra, 0 .LBB6_24: - ld.d $s4, $sp, 80 # 8-byte Folded Reload + ld.d $s4, $sp, 88 # 8-byte Folded Reload addi.w $a0, $zero, -1 vld $vr18, $sp, 304 # 16-byte Folded Reload fcvt.d.s $fa0, $ft10 - ld.d $ra, $sp, 392 # 8-byte Folded Reload + ld.d $ra, $sp, 384 # 8-byte Folded Reload st.d $a0, $sp, 296 # 8-byte Folded Spill - ld.d $s8, $sp, 248 # 8-byte Folded Reload - ld.d $s5, $sp, 104 # 8-byte Folded Reload + ld.d $s8, $sp, 256 # 8-byte Folded Reload + ld.d $s5, $sp, 112 # 8-byte Folded Reload blt $s1, $a0, .LBB6_29 # %bb.25: # %.lr.ph567 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 400 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.ogcp2g) - ld.d $a1, $sp, 360 # 8-byte Folded Reload + ld.d $a1, $sp, 352 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align_gapmap.digf2) - ld.d $a2, $sp, 216 # 8-byte Folded Reload + ld.d $a2, $sp, 224 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.og_h_dg_n2_p) - ld.d $a3, $sp, 400 # 8-byte Folded Reload + ld.d $a3, $sp, 392 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.fgcp2g) - ld.d $a4, $sp, 208 # 8-byte Folded Reload + ld.d $a4, $sp, 216 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(Q__align_gapmap.fg_h_dg_n2_p) - ld.d $a5, $sp, 176 # 8-byte Folded Reload + ld.d $a5, $sp, 184 # 8-byte Folded Reload ld.d $a5, $a5, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n2_p) - ld.d $a6, $sp, 160 # 8-byte Folded Reload + ld.d $a6, $sp, 168 # 8-byte Folded Reload ld.d $a6, $a6, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n2_p) - ld.d $a7, $sp, 368 # 8-byte Folded Reload + ld.d $a7, $sp, 360 # 8-byte Folded Reload ld.d $a7, $a7, %pc_lo12(Q__align_gapmap.gapz2) - ld.d $t0, $sp, 168 # 8-byte Folded Reload + ld.d $t0, $sp, 176 # 8-byte Folded Reload ld.d $t0, $t0, %pc_lo12(Q__align_gapmap.gapz_n2) - ld.d $t1, $sp, 328 # 8-byte Folded Reload + ld.d $t1, $sp, 320 # 8-byte Folded Reload addi.d $t3, $t1, 2 bstrpick.d $t1, $t3, 31, 0 ori $t2, $zero, 40 @@ -5884,23 +5877,23 @@ Q__align_gapmap: # @Q__align_gapmap ld.d $a0, $sp, 296 # 8-byte Folded Reload blt $s2, $a0, .LBB6_34 # %bb.30: # %.lr.ph571 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + ld.d $a0, $sp, 416 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(Q__align_gapmap.ogcp1g) - ld.d $a1, $sp, 376 # 8-byte Folded Reload + ld.d $a1, $sp, 368 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(Q__align_gapmap.digf1) - ld.d $a2, $sp, 336 # 8-byte Folded Reload + ld.d $a2, $sp, 328 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.og_h_dg_n1_p) - ld.d $a3, $sp, 416 # 8-byte Folded Reload + ld.d $a3, $sp, 408 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.fgcp1g) - ld.d $a4, $sp, 264 # 8-byte Folded Reload + ld.d $a4, $sp, 272 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(Q__align_gapmap.fg_h_dg_n1_p) - ld.d $a5, $sp, 200 # 8-byte Folded Reload + ld.d $a5, $sp, 208 # 8-byte Folded Reload ld.d $a5, $a5, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n1_p) - ld.d $a6, $sp, 192 # 8-byte Folded Reload + ld.d $a6, $sp, 200 # 8-byte Folded Reload ld.d $a6, $a6, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n1_p) - ld.d $a7, $sp, 384 # 8-byte Folded Reload + ld.d $a7, $sp, 376 # 8-byte Folded Reload ld.d $a7, $a7, %pc_lo12(Q__align_gapmap.gapz1) - ld.d $t0, $sp, 184 # 8-byte Folded Reload + ld.d $t0, $sp, 192 # 8-byte Folded Reload ld.d $t0, $t0, %pc_lo12(Q__align_gapmap.gapz_n1) addi.d $t3, $s4, 2 bstrpick.d $t1, $t3, 31, 0 @@ -5982,13 +5975,13 @@ Q__align_gapmap: # @Q__align_gapmap st.d $a0, $sp, 288 # 8-byte Folded Spill ld.d $a0, $sp, 584 st.d $a0, $sp, 280 # 8-byte Folded Spill - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.d $t5, $a0, %pc_lo12(Q__align_gapmap.w1) ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $t6, $a0, %pc_lo12(Q__align_gapmap.w2) pcalau12i $a0, %got_pc_hi20(RNAscoremtx) ld.d $a0, $a0, %got_pc_lo12(RNAscoremtx) - st.d $a0, $sp, 384 # 8-byte Folded Spill + st.d $a0, $sp, 376 # 8-byte Folded Spill ld.bu $a0, $a0, 0 ori $a1, $zero, 114 move $s0, $t5 @@ -6008,13 +6001,13 @@ Q__align_gapmap: # @Q__align_gapmap b .LBB6_38 .LBB6_37: ld.d $s3, $s8, %pc_lo12(Q__align_gapmap.initverticalw) - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 336 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(Q__align_gapmap.cpmx2) - ld.d $a0, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $a2, $a0, %pc_lo12(Q__align_gapmap.cpmx1) - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 248 # 8-byte Folded Reload ld.d $a5, $a0, %pc_lo12(Q__align_gapmap.floatwork) - ld.d $a0, $sp, 232 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload ld.d $a6, $a0, %pc_lo12(Q__align_gapmap.intwork) ori $a7, $zero, 1 move $a0, $s3 @@ -6023,10 +6016,10 @@ Q__align_gapmap: # @Q__align_gapmap pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 .LBB6_38: # %clearvec.exit - ld.d $ra, $sp, 392 # 8-byte Folded Reload + ld.d $ra, $sp, 384 # 8-byte Folded Reload ld.d $t4, $sp, 344 # 8-byte Folded Reload vld $vr18, $sp, 304 # 16-byte Folded Reload - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload move $t5, $s0 move $t6, $s1 beqz $a0, .LBB6_43 @@ -6055,11 +6048,11 @@ Q__align_gapmap: # @Q__align_gapmap move $s3, $a4 bnez $a2, .LBB6_41 .LBB6_42: - st.d $zero, $sp, 152 # 8-byte Folded Spill + st.d $zero, $sp, 160 # 8-byte Folded Spill b .LBB6_111 .LBB6_43: ori $a0, $zero, 1 - st.d $a0, $sp, 152 # 8-byte Folded Spill + st.d $a0, $sp, 160 # 8-byte Folded Spill b .LBB6_111 .LBB6_44: # %vector.memcheck alsl.d $s2, $t1, $a2, 2 @@ -6068,114 +6061,114 @@ Q__align_gapmap: # @Q__align_gapmap sltu $t4, $a4, $s2 and $t4, $t2, $t4 move $t2, $zero - bnez $t4, .LBB6_282 + bnez $t4, .LBB6_283 # %bb.45: # %vector.memcheck alsl.d $s0, $t1, $a5, 2 sltu $t4, $a2, $s0 sltu $t5, $a5, $s2 and $t4, $t4, $t5 - bnez $t4, .LBB6_282 + bnez $t4, .LBB6_283 # %bb.46: # %vector.memcheck alsl.d $t5, $t1, $a6, 2 sltu $t4, $a2, $t5 sltu $t6, $a6, $s2 and $t4, $t4, $t6 - bnez $t4, .LBB6_282 + bnez $t4, .LBB6_283 # %bb.47: # %vector.memcheck alsl.d $t4, $t1, $t0, 2 sltu $t6, $a2, $t4 sltu $t7, $t0, $s2 and $t6, $t6, $t7 - bnez $t6, .LBB6_282 + bnez $t6, .LBB6_283 # %bb.48: # %vector.memcheck alsl.d $t6, $t1, $a0, 2 sltu $t7, $a2, $t6 sltu $t8, $a0, $s2 and $t7, $t7, $t8 - bnez $t7, .LBB6_282 + bnez $t7, .LBB6_283 # %bb.49: # %vector.memcheck alsl.d $t7, $t1, $a1, 2 sltu $t8, $a2, $t7 sltu $fp, $a1, $s2 and $t8, $t8, $fp - bnez $t8, .LBB6_282 + bnez $t8, .LBB6_283 # %bb.50: # %vector.memcheck alsl.d $t8, $t1, $a3, 2 sltu $fp, $a2, $t8 sltu $s3, $a3, $s2 and $fp, $fp, $s3 - bnez $fp, .LBB6_282 + bnez $fp, .LBB6_283 # %bb.51: # %vector.memcheck alsl.d $fp, $t1, $a7, 2 sltu $s3, $a2, $fp sltu $s2, $a7, $s2 and $s2, $s3, $s2 - bnez $s2, .LBB6_282 + bnez $s2, .LBB6_283 # %bb.52: # %vector.memcheck sltu $s2, $a4, $s0 sltu $s3, $a5, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB6_282 + bnez $s2, .LBB6_283 # %bb.53: # %vector.memcheck sltu $s2, $a4, $t5 sltu $s3, $a6, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB6_282 + bnez $s2, .LBB6_283 # %bb.54: # %vector.memcheck sltu $s2, $a4, $t4 sltu $s3, $t0, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB6_282 + bnez $s2, .LBB6_283 # %bb.55: # %vector.memcheck sltu $s2, $a4, $t6 sltu $s3, $a0, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB6_282 + bnez $s2, .LBB6_283 # %bb.56: # %vector.memcheck sltu $s2, $a4, $t7 sltu $s3, $a1, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB6_282 + bnez $s2, .LBB6_283 # %bb.57: # %vector.memcheck sltu $s2, $a4, $t8 sltu $s3, $a3, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB6_282 + bnez $s2, .LBB6_283 # %bb.58: # %vector.memcheck sltu $s2, $a4, $fp sltu $s1, $a7, $s1 and $s1, $s2, $s1 - bnez $s1, .LBB6_282 + bnez $s1, .LBB6_283 # %bb.59: # %vector.memcheck sltu $s1, $a5, $t5 sltu $s2, $a6, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB6_282 + bnez $s1, .LBB6_283 # %bb.60: # %vector.memcheck sltu $s1, $a5, $t4 sltu $s2, $t0, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB6_282 + bnez $s1, .LBB6_283 # %bb.61: # %vector.memcheck sltu $s1, $a5, $t6 sltu $s2, $a0, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB6_282 + bnez $s1, .LBB6_283 # %bb.62: # %vector.memcheck sltu $s1, $a5, $t7 sltu $s2, $a1, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB6_282 + bnez $s1, .LBB6_283 # %bb.63: # %vector.memcheck sltu $s1, $a5, $t8 sltu $s2, $a3, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB6_282 + bnez $s1, .LBB6_283 # %bb.64: # %vector.memcheck sltu $s1, $a5, $fp sltu $s0, $a7, $s0 and $s0, $s1, $s0 - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload bnez $s0, .LBB6_27 # %bb.65: # %vector.memcheck sltu $s0, $a6, $t4 @@ -6373,7 +6366,7 @@ Q__align_gapmap: # @Q__align_gapmap addi.d $t3, $t3, 16 bnez $s2, .LBB6_75 # %bb.76: # %middle.block - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload bne $t1, $t2, .LBB6_27 b .LBB6_29 .LBB6_77: # %vector.memcheck883 @@ -6383,114 +6376,114 @@ Q__align_gapmap: # @Q__align_gapmap sltu $t4, $a4, $s2 and $t4, $t2, $t4 move $t2, $zero - bnez $t4, .LBB6_283 + bnez $t4, .LBB6_284 # %bb.78: # %vector.memcheck883 alsl.d $s0, $t1, $a5, 2 sltu $t4, $a2, $s0 sltu $t5, $a5, $s2 and $t4, $t4, $t5 - bnez $t4, .LBB6_283 + bnez $t4, .LBB6_284 # %bb.79: # %vector.memcheck883 alsl.d $t5, $t1, $a6, 2 sltu $t4, $a2, $t5 sltu $t6, $a6, $s2 and $t4, $t4, $t6 - bnez $t4, .LBB6_283 + bnez $t4, .LBB6_284 # %bb.80: # %vector.memcheck883 alsl.d $t4, $t1, $t0, 2 sltu $t6, $a2, $t4 sltu $t7, $t0, $s2 and $t6, $t6, $t7 - bnez $t6, .LBB6_283 + bnez $t6, .LBB6_284 # %bb.81: # %vector.memcheck883 alsl.d $t6, $t1, $a0, 2 sltu $t7, $a2, $t6 sltu $t8, $a0, $s2 and $t7, $t7, $t8 - bnez $t7, .LBB6_283 + bnez $t7, .LBB6_284 # %bb.82: # %vector.memcheck883 alsl.d $t7, $t1, $a1, 2 sltu $t8, $a2, $t7 sltu $fp, $a1, $s2 and $t8, $t8, $fp - bnez $t8, .LBB6_283 + bnez $t8, .LBB6_284 # %bb.83: # %vector.memcheck883 alsl.d $t8, $t1, $a3, 2 sltu $fp, $a2, $t8 sltu $s3, $a3, $s2 and $fp, $fp, $s3 - bnez $fp, .LBB6_283 + bnez $fp, .LBB6_284 # %bb.84: # %vector.memcheck883 alsl.d $fp, $t1, $a7, 2 sltu $s3, $a2, $fp sltu $s2, $a7, $s2 and $s2, $s3, $s2 - bnez $s2, .LBB6_283 + bnez $s2, .LBB6_284 # %bb.85: # %vector.memcheck883 sltu $s2, $a4, $s0 sltu $s3, $a5, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB6_283 + bnez $s2, .LBB6_284 # %bb.86: # %vector.memcheck883 sltu $s2, $a4, $t5 sltu $s3, $a6, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB6_283 + bnez $s2, .LBB6_284 # %bb.87: # %vector.memcheck883 sltu $s2, $a4, $t4 sltu $s3, $t0, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB6_283 + bnez $s2, .LBB6_284 # %bb.88: # %vector.memcheck883 sltu $s2, $a4, $t6 sltu $s3, $a0, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB6_283 + bnez $s2, .LBB6_284 # %bb.89: # %vector.memcheck883 sltu $s2, $a4, $t7 sltu $s3, $a1, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB6_283 + bnez $s2, .LBB6_284 # %bb.90: # %vector.memcheck883 sltu $s2, $a4, $t8 sltu $s3, $a3, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB6_283 + bnez $s2, .LBB6_284 # %bb.91: # %vector.memcheck883 sltu $s2, $a4, $fp sltu $s1, $a7, $s1 and $s1, $s2, $s1 - bnez $s1, .LBB6_283 + bnez $s1, .LBB6_284 # %bb.92: # %vector.memcheck883 sltu $s1, $a5, $t5 sltu $s2, $a6, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB6_283 + bnez $s1, .LBB6_284 # %bb.93: # %vector.memcheck883 sltu $s1, $a5, $t4 sltu $s2, $t0, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB6_283 + bnez $s1, .LBB6_284 # %bb.94: # %vector.memcheck883 sltu $s1, $a5, $t6 sltu $s2, $a0, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB6_283 + bnez $s1, .LBB6_284 # %bb.95: # %vector.memcheck883 sltu $s1, $a5, $t7 sltu $s2, $a1, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB6_283 + bnez $s1, .LBB6_284 # %bb.96: # %vector.memcheck883 sltu $s1, $a5, $t8 sltu $s2, $a3, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB6_283 + bnez $s1, .LBB6_284 # %bb.97: # %vector.memcheck883 sltu $s1, $a5, $fp sltu $s0, $a7, $s0 and $s0, $s1, $s0 - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload bnez $s0, .LBB6_32 # %bb.98: # %vector.memcheck883 sltu $s0, $a6, $t4 @@ -6688,24 +6681,24 @@ Q__align_gapmap: # @Q__align_gapmap addi.d $t3, $t3, 16 bnez $s2, .LBB6_108 # %bb.109: # %middle.block1033 - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload bne $t1, $t2, .LBB6_32 b .LBB6_34 .LBB6_110: # %clearvec.exit.thread - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload sltui $a0, $a0, 1 - st.d $a0, $sp, 152 # 8-byte Folded Spill + st.d $a0, $sp, 160 # 8-byte Folded Spill ld.d $t4, $sp, 344 # 8-byte Folded Reload .LBB6_111: # %imp_match_out_vead_tateQ_gapmap.exit - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.bu $a0, $a0, 0 ori $a1, $zero, 114 - ld.d $fp, $sp, 256 # 8-byte Folded Reload + ld.d $fp, $sp, 264 # 8-byte Folded Reload bne $a0, $a1, .LBB6_114 # %bb.112: beqz $t4, .LBB6_119 # %bb.113: # %.lr.ph.preheader.i531 - ld.d $a0, $sp, 328 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload slli.d $a0, $a0, 2 bstrpick.d $a0, $a0, 33, 2 slli.d $a2, $a0, 2 @@ -6716,11 +6709,11 @@ Q__align_gapmap: # @Q__align_gapmap b .LBB6_115 .LBB6_114: ld.d $a1, $fp, %pc_lo12(Q__align_gapmap.cpmx1) - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 336 # 8-byte Folded Reload ld.d $a2, $a0, %pc_lo12(Q__align_gapmap.cpmx2) - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 248 # 8-byte Folded Reload ld.d $a5, $a0, %pc_lo12(Q__align_gapmap.floatwork) - ld.d $a0, $sp, 232 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload ld.d $a6, $a0, %pc_lo12(Q__align_gapmap.intwork) ori $a7, $zero, 1 move $a0, $t5 @@ -6729,10 +6722,10 @@ Q__align_gapmap: # @Q__align_gapmap pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 .LBB6_115: # %clearvec.exit532 - ld.d $ra, $sp, 392 # 8-byte Folded Reload + ld.d $ra, $sp, 384 # 8-byte Folded Reload ld.d $t4, $sp, 344 # 8-byte Folded Reload vld $vr18, $sp, 304 # 16-byte Folded Reload - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload move $t5, $s0 move $t6, $s1 bnez $a0, .LBB6_119 @@ -6747,7 +6740,7 @@ Q__align_gapmap: # @Q__align_gapmap ldx.d $a0, $a1, $a0 ld.d $a3, $sp, 288 # 8-byte Folded Reload move $a2, $t5 - ld.d $a1, $sp, 328 # 8-byte Folded Reload + ld.d $a1, $sp, 320 # 8-byte Folded Reload .p2align 4, , 16 .LBB6_118: # %.lr.ph.i533 # =>This Inner Loop Header: Depth=1 @@ -6765,173 +6758,172 @@ Q__align_gapmap: # @Q__align_gapmap .LBB6_119: # %imp_match_out_veadQ_gapmap.exit pcalau12i $a0, %got_pc_hi20(outgap) ld.d $t7, $a0, %got_pc_lo12(outgap) - ld.w $a1, $t7, 0 - ori $a2, $zero, 1 - ld.d $a0, $sp, 328 # 8-byte Folded Reload - slli.d $a0, $a0, 32 - pcalau12i $a3, %pc_hi20(.LCPI6_2) - st.d $a3, $sp, 272 # 8-byte Folded Spill - bne $a1, $a2, .LBB6_129 + ld.w $a2, $t7, 0 + ori $a3, $zero, 1 + ld.d $a0, $sp, 320 # 8-byte Folded Reload + slli.d $a1, $a0, 32 + lu12i.w $a0, 287172 + bne $a2, $a3, .LBB6_129 # %bb.120: - ld.d $a1, $sp, 424 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(Q__align_gapmap.ogcp1g) - ld.d $a2, $sp, 216 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.og_h_dg_n2_p) - ld.d $a3, $sp, 408 # 8-byte Folded Reload - ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.ogcp2g) - fld.s $fa0, $a1, 0 - fld.s $fa1, $a2, 0 - fld.s $fa2, $a3, 0 - ld.d $a1, $sp, 336 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(Q__align_gapmap.og_h_dg_n1_p) ld.d $a2, $sp, 416 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.fgcp1g) - ld.d $a3, $sp, 208 # 8-byte Folded Reload - ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.fg_h_dg_n2_p) + ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.ogcp1g) + ld.d $a3, $sp, 224 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.og_h_dg_n2_p) ld.d $a4, $sp, 400 # 8-byte Folded Reload - ld.d $a4, $a4, %pc_lo12(Q__align_gapmap.fgcp2g) - fld.s $fa3, $a1, 0 - fld.s $fa4, $a2, 0 - fld.s $fa5, $a3, 0 - fld.s $fa6, $a4, 0 - ld.d $a1, $sp, 264 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(Q__align_gapmap.fg_h_dg_n1_p) - ld.d $a3, $s8, %pc_lo12(Q__align_gapmap.initverticalw) + ld.d $a4, $a4, %pc_lo12(Q__align_gapmap.ogcp2g) + fld.s $fa0, $a2, 0 + fld.s $fa1, $a3, 0 + fld.s $fa2, $a4, 0 + ld.d $a2, $sp, 328 # 8-byte Folded Reload + ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.og_h_dg_n1_p) + ld.d $a3, $sp, 408 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.fgcp1g) + ld.d $a4, $sp, 216 # 8-byte Folded Reload + ld.d $a4, $a4, %pc_lo12(Q__align_gapmap.fg_h_dg_n2_p) + ld.d $a5, $sp, 392 # 8-byte Folded Reload + ld.d $a5, $a5, %pc_lo12(Q__align_gapmap.fgcp2g) + fld.s $fa3, $a2, 0 + fld.s $fa4, $a3, 0 + fld.s $fa5, $a4, 0 + fld.s $fa6, $a5, 0 + ld.d $a2, $sp, 272 # 8-byte Folded Reload + ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.fg_h_dg_n1_p) + ld.d $a4, $s8, %pc_lo12(Q__align_gapmap.initverticalw) movgr2fr.w $fa7, $zero fmadd.s $fa0, $fa0, $fa1, $fa7 - fld.s $fa1, $a1, 0 - fld.s $fa7, $a3, 0 + fld.s $fa1, $a2, 0 + fld.s $fa7, $a4, 0 fmadd.s $fa0, $fa2, $fa3, $fa0 fmadd.s $fa0, $fa4, $fa5, $fa0 fmadd.s $fa0, $fa6, $fa1, $fa0 fadd.s $fa1, $fa0, $fa7 - fst.s $fa1, $a3, 0 + fst.s $fa1, $a4, 0 fld.s $fa1, $t5, 0 fadd.s $fa0, $fa0, $fa1 fst.s $fa0, $t5, 0 blez $s2, .LBB6_124 # %bb.121: # %.lr.ph582 - ld.d $a1, $sp, 168 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(Q__align_gapmap.gapz_n2) - ld.d $a2, $sp, 200 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n1_p) - ld.d $a4, $sp, 192 # 8-byte Folded Reload - ld.d $a4, $a4, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n1_p) - addi.d $a5, $s4, 1 - bstrpick.d $a5, $a5, 31, 0 - addi.d $a7, $a5, -1 - ori $t0, $zero, 8 - ori $a6, $zero, 1 - bgeu $a7, $t0, .LBB6_205 + ld.d $a2, $sp, 176 # 8-byte Folded Reload + ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.gapz_n2) + ld.d $a3, $sp, 208 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n1_p) + ld.d $a5, $sp, 200 # 8-byte Folded Reload + ld.d $a5, $a5, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n1_p) + addi.d $a6, $s4, 1 + bstrpick.d $a6, $a6, 31, 0 + addi.d $t0, $a6, -1 + ori $t1, $zero, 8 + ori $a7, $zero, 1 + bgeu $t0, $t1, .LBB6_205 .LBB6_122: # %scalar.ph1085.preheader - alsl.d $a4, $a6, $a4, 2 - alsl.d $a3, $a6, $a3, 2 - sub.d $a5, $a5, $a6 + alsl.d $a5, $a7, $a5, 2 + alsl.d $a4, $a7, $a4, 2 + sub.d $a6, $a6, $a7 .p2align 4, , 16 .LBB6_123: # %scalar.ph1085 # =>This Inner Loop Header: Depth=1 - fld.s $fa0, $a1, 0 - fld.s $fa1, $a2, 0 - fld.s $fa2, $a3, 0 + fld.s $fa0, $a2, 0 + fld.s $fa1, $a3, 0 + fld.s $fa2, $a4, 0 fmul.s $fa0, $fa0, $fa1 fadd.s $fa0, $fa2, $fa0 - fst.s $fa0, $a3, 0 - fld.s $fa1, $a1, 4 - fld.s $fa2, $a4, 0 + fst.s $fa0, $a4, 0 + fld.s $fa1, $a2, 4 + fld.s $fa2, $a5, 0 fmul.s $fa1, $fa1, $fa2 fadd.s $fa0, $fa0, $fa1 - fst.s $fa0, $a3, 0 + fst.s $fa0, $a4, 0 + addi.d $a5, $a5, 4 + addi.d $a6, $a6, -1 addi.d $a4, $a4, 4 - addi.d $a5, $a5, -1 - addi.d $a3, $a3, 4 - bnez $a5, .LBB6_123 + bnez $a6, .LBB6_123 .LBB6_124: # %.preheader556 blez $t4, .LBB6_152 # %bb.125: # %.lr.ph585 - ld.d $a1, $sp, 184 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(Q__align_gapmap.gapz_n1) - ld.d $a2, $sp, 176 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n2_p) - ld.d $a3, $sp, 160 # 8-byte Folded Reload - ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n2_p) - ld.d $a4, $sp, 328 # 8-byte Folded Reload - addi.d $a4, $a4, 1 - bstrpick.d $a4, $a4, 31, 0 - addi.d $a6, $a4, -1 - ori $a7, $zero, 8 - ori $a5, $zero, 1 - bgeu $a6, $a7, .LBB6_211 + ld.d $a2, $sp, 192 # 8-byte Folded Reload + ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.gapz_n1) + ld.d $a3, $sp, 184 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n2_p) + ld.d $a4, $sp, 168 # 8-byte Folded Reload + ld.d $a4, $a4, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n2_p) + ld.d $a5, $sp, 320 # 8-byte Folded Reload + addi.d $a5, $a5, 1 + bstrpick.d $a5, $a5, 31, 0 + addi.d $a7, $a5, -1 + ori $t0, $zero, 8 + ori $a6, $zero, 1 + bgeu $a7, $t0, .LBB6_211 .LBB6_126: # %scalar.ph1123.preheader - alsl.d $a3, $a5, $a3, 2 - alsl.d $a6, $a5, $t5, 2 - sub.d $a4, $a4, $a5 + alsl.d $a4, $a6, $a4, 2 + alsl.d $a7, $a6, $t5, 2 + sub.d $a5, $a5, $a6 .p2align 4, , 16 .LBB6_127: # %scalar.ph1123 # =>This Inner Loop Header: Depth=1 - fld.s $fa0, $a1, 0 - fld.s $fa1, $a2, 0 - fld.s $fa2, $a6, 0 + fld.s $fa0, $a2, 0 + fld.s $fa1, $a3, 0 + fld.s $fa2, $a7, 0 fmul.s $fa0, $fa0, $fa1 fadd.s $fa0, $fa2, $fa0 - fst.s $fa0, $a6, 0 - fld.s $fa1, $a1, 4 - fld.s $fa2, $a3, 0 + fst.s $fa0, $a7, 0 + fld.s $fa1, $a2, 4 + fld.s $fa2, $a4, 0 fmul.s $fa1, $fa1, $fa2 fadd.s $fa0, $fa0, $fa1 - fst.s $fa0, $a6, 0 - addi.d $a3, $a3, 4 - addi.d $a4, $a4, -1 - addi.d $a6, $a6, 4 - bnez $a4, .LBB6_127 + fst.s $fa0, $a7, 0 + addi.d $a4, $a4, 4 + addi.d $a5, $a5, -1 + addi.d $a7, $a7, 4 + bnez $a5, .LBB6_127 .LBB6_128: # %.loopexit557.thread740 - ld.d $a1, $sp, 224 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(Q__align_gapmap.m) - st.w $zero, $a1, 0 + ld.d $a2, $sp, 232 # 8-byte Folded Reload + ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.m) + st.w $zero, $a2, 0 b .LBB6_144 .LBB6_129: # %.preheader560 blez $t4, .LBB6_136 # %bb.130: # %.lr.ph575 - pcalau12i $a1, %got_pc_hi20(offset) - ld.d $a1, $a1, %got_pc_lo12(offset) - ld.w $a1, $a1, 0 - ld.d $a2, $sp, 328 # 8-byte Folded Reload - addi.d $a2, $a2, 1 - bstrpick.d $a2, $a2, 31, 0 - addi.d $a3, $a2, -1 - ori $a5, $zero, 4 - ori $a4, $zero, 1 - bltu $a3, $a5, .LBB6_134 + pcalau12i $a2, %got_pc_hi20(offset) + ld.d $a2, $a2, %got_pc_lo12(offset) + ld.w $a2, $a2, 0 + ld.d $a3, $sp, 320 # 8-byte Folded Reload + addi.d $a3, $a3, 1 + bstrpick.d $a3, $a3, 31, 0 + addi.d $a4, $a3, -1 + ori $a6, $zero, 4 + ori $a5, $zero, 1 + bltu $a4, $a6, .LBB6_134 # %bb.131: # %vector.ph1038 - move $a5, $a3 - bstrins.d $a5, $zero, 1, 0 - ori $a6, $zero, 1 - move $a4, $a3 - bstrins.d $a4, $a6, 1, 0 - vreplgr2vr.w $vr0, $a1 - pcalau12i $a6, %pc_hi20(.LCPI6_1) - vld $vr1, $a6, %pc_lo12(.LCPI6_1) - addi.d $a6, $t5, 4 - lu52i.d $a7, $zero, -1026 - vreplgr2vr.d $vr2, $a7 - move $a7, $a5 + move $a6, $a4 + bstrins.d $a6, $zero, 1, 0 + ori $a7, $zero, 1 + move $a5, $a4 + bstrins.d $a5, $a7, 1, 0 + vreplgr2vr.w $vr0, $a2 + pcalau12i $a7, %pc_hi20(.LCPI6_0) + vld $vr1, $a7, %pc_lo12(.LCPI6_0) + addi.d $a7, $t5, 4 + lu52i.d $t0, $zero, -1026 + vreplgr2vr.d $vr2, $t0 + move $t0, $a6 .p2align 4, , 16 .LBB6_132: # %vector.body1043 # =>This Inner Loop Header: Depth=1 vmul.w $vr3, $vr0, $vr1 - vpickve2gr.w $t0, $vr3, 3 - movgr2fr.w $fa4, $t0 + vpickve2gr.w $t1, $vr3, 3 + movgr2fr.w $fa4, $t1 ffint.d.w $fa4, $fa4 - vpickve2gr.w $t0, $vr3, 2 - movgr2fr.w $fa5, $t0 + vpickve2gr.w $t1, $vr3, 2 + movgr2fr.w $fa5, $t1 ffint.d.w $fa5, $fa5 vextrins.d $vr5, $vr4, 16 - vpickve2gr.w $t0, $vr3, 1 - movgr2fr.w $fa4, $t0 + vpickve2gr.w $t1, $vr3, 1 + movgr2fr.w $fa4, $t1 ffint.d.w $fa4, $fa4 - vpickve2gr.w $t0, $vr3, 0 - movgr2fr.w $fa3, $t0 + vpickve2gr.w $t1, $vr3, 0 + movgr2fr.w $fa3, $t1 ffint.d.w $fa3, $fa3 - vld $vr6, $a6, 0 + vld $vr6, $a7, 0 vextrins.d $vr3, $vr4, 16 vfmul.d $vr3, $vr3, $vr2 vfmul.d $vr4, $vr5, $vr2 @@ -6958,77 +6950,77 @@ Q__align_gapmap: # @Q__align_gapmap vreplvei.d $vr4, $vr4, 1 fcvt.s.d $fa4, $fa4 vextrins.w $vr3, $vr4, 48 - vst $vr3, $a6, 0 + vst $vr3, $a7, 0 vaddi.wu $vr1, $vr1, 4 - addi.d $a7, $a7, -4 - addi.d $a6, $a6, 16 - bnez $a7, .LBB6_132 + addi.d $t0, $t0, -4 + addi.d $a7, $a7, 16 + bnez $t0, .LBB6_132 # %bb.133: # %middle.block1047 - beq $a3, $a5, .LBB6_136 + beq $a4, $a6, .LBB6_136 .LBB6_134: # %scalar.ph1036.preheader - mul.d $a3, $a1, $a4 - alsl.d $a5, $a4, $t5, 2 - sub.d $a2, $a2, $a4 + mul.d $a4, $a2, $a5 + alsl.d $a6, $a5, $t5, 2 + sub.d $a3, $a3, $a5 vldi $vr0, -800 .p2align 4, , 16 .LBB6_135: # %scalar.ph1036 # =>This Inner Loop Header: Depth=1 - fld.s $fa1, $a5, 0 - movgr2fr.w $fa2, $a3 + fld.s $fa1, $a6, 0 + movgr2fr.w $fa2, $a4 ffint.d.w $fa2, $fa2 fmul.d $fa2, $fa2, $fa0 fcvt.d.s $fa1, $fa1 fadd.d $fa1, $fa1, $fa2 fcvt.s.d $fa1, $fa1 - fst.s $fa1, $a5, 0 - add.w $a3, $a3, $a1 - addi.d $a2, $a2, -1 - addi.d $a5, $a5, 4 - bnez $a2, .LBB6_135 -.LBB6_136: # %.preheader558 - blez $s2, .LBB6_143 -# %bb.137: # %.lr.ph578 - pcalau12i $a1, %got_pc_hi20(offset) - ld.d $a1, $a1, %got_pc_lo12(offset) - ld.w $a1, $a1, 0 - ld.d $a2, $s8, %pc_lo12(Q__align_gapmap.initverticalw) - addi.d $a3, $s4, 1 - bstrpick.d $a3, $a3, 31, 0 - addi.d $a4, $a3, -1 - ori $a6, $zero, 4 - ori $a5, $zero, 1 - bltu $a4, $a6, .LBB6_141 + fst.s $fa1, $a6, 0 + add.w $a4, $a4, $a2 + addi.d $a3, $a3, -1 + addi.d $a6, $a6, 4 + bnez $a3, .LBB6_135 +.LBB6_136: # %.preheader558 + blez $s2, .LBB6_143 +# %bb.137: # %.lr.ph578 + pcalau12i $a2, %got_pc_hi20(offset) + ld.d $a2, $a2, %got_pc_lo12(offset) + ld.w $a2, $a2, 0 + ld.d $a3, $s8, %pc_lo12(Q__align_gapmap.initverticalw) + addi.d $a4, $s4, 1 + bstrpick.d $a4, $a4, 31, 0 + addi.d $a5, $a4, -1 + ori $a7, $zero, 4 + ori $a6, $zero, 1 + bltu $a5, $a7, .LBB6_141 # %bb.138: # %vector.ph1052 - move $a6, $a4 - bstrins.d $a6, $zero, 1, 0 - ori $a7, $zero, 1 - move $a5, $a4 - bstrins.d $a5, $a7, 1, 0 - vreplgr2vr.w $vr0, $a1 - pcalau12i $a7, %pc_hi20(.LCPI6_1) - vld $vr1, $a7, %pc_lo12(.LCPI6_1) - addi.d $a7, $a2, 4 - lu52i.d $t0, $zero, -1026 - vreplgr2vr.d $vr2, $t0 - move $t0, $a6 + move $a7, $a5 + bstrins.d $a7, $zero, 1, 0 + ori $t0, $zero, 1 + move $a6, $a5 + bstrins.d $a6, $t0, 1, 0 + vreplgr2vr.w $vr0, $a2 + pcalau12i $t0, %pc_hi20(.LCPI6_0) + vld $vr1, $t0, %pc_lo12(.LCPI6_0) + addi.d $t0, $a3, 4 + lu52i.d $t1, $zero, -1026 + vreplgr2vr.d $vr2, $t1 + move $t1, $a7 .p2align 4, , 16 .LBB6_139: # %vector.body1057 # =>This Inner Loop Header: Depth=1 vmul.w $vr3, $vr0, $vr1 - vpickve2gr.w $t1, $vr3, 3 - movgr2fr.w $fa4, $t1 + vpickve2gr.w $t2, $vr3, 3 + movgr2fr.w $fa4, $t2 ffint.d.w $fa4, $fa4 - vpickve2gr.w $t1, $vr3, 2 - movgr2fr.w $fa5, $t1 + vpickve2gr.w $t2, $vr3, 2 + movgr2fr.w $fa5, $t2 ffint.d.w $fa5, $fa5 vextrins.d $vr5, $vr4, 16 - vpickve2gr.w $t1, $vr3, 1 - movgr2fr.w $fa4, $t1 + vpickve2gr.w $t2, $vr3, 1 + movgr2fr.w $fa4, $t2 ffint.d.w $fa4, $fa4 - vpickve2gr.w $t1, $vr3, 0 - movgr2fr.w $fa3, $t1 + vpickve2gr.w $t2, $vr3, 0 + movgr2fr.w $fa3, $t2 ffint.d.w $fa3, $fa3 - vld $vr6, $a7, 0 + vld $vr6, $t0, 0 vextrins.d $vr3, $vr4, 16 vfmul.d $vr3, $vr3, $vr2 vfmul.d $vr4, $vr5, $vr2 @@ -7055,300 +7047,300 @@ Q__align_gapmap: # @Q__align_gapmap vreplvei.d $vr4, $vr4, 1 fcvt.s.d $fa4, $fa4 vextrins.w $vr3, $vr4, 48 - vst $vr3, $a7, 0 + vst $vr3, $t0, 0 vaddi.wu $vr1, $vr1, 4 - addi.d $t0, $t0, -4 - addi.d $a7, $a7, 16 - bnez $t0, .LBB6_139 + addi.d $t1, $t1, -4 + addi.d $t0, $t0, 16 + bnez $t1, .LBB6_139 # %bb.140: # %middle.block1064 - beq $a4, $a6, .LBB6_143 + beq $a5, $a7, .LBB6_143 .LBB6_141: # %scalar.ph1050.preheader - mul.d $a4, $a1, $a5 - alsl.d $a2, $a5, $a2, 2 - sub.d $a3, $a3, $a5 + mul.d $a5, $a2, $a6 + alsl.d $a3, $a6, $a3, 2 + sub.d $a4, $a4, $a6 vldi $vr0, -800 .p2align 4, , 16 .LBB6_142: # %scalar.ph1050 # =>This Inner Loop Header: Depth=1 - fld.s $fa1, $a2, 0 - movgr2fr.w $fa2, $a4 + fld.s $fa1, $a3, 0 + movgr2fr.w $fa2, $a5 ffint.d.w $fa2, $fa2 fmul.d $fa2, $fa2, $fa0 fcvt.d.s $fa1, $fa1 fadd.d $fa1, $fa1, $fa2 fcvt.s.d $fa1, $fa1 - fst.s $fa1, $a2, 0 - add.w $a4, $a4, $a1 - addi.d $a3, $a3, -1 - addi.d $a2, $a2, 4 - bnez $a3, .LBB6_142 + fst.s $fa1, $a3, 0 + add.w $a5, $a5, $a2 + addi.d $a4, $a4, -1 + addi.d $a3, $a3, 4 + bnez $a4, .LBB6_142 .LBB6_143: # %.loopexit557 - ld.d $a1, $sp, 224 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(Q__align_gapmap.m) - st.w $zero, $a1, 0 + ld.d $a2, $sp, 232 # 8-byte Folded Reload + ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.m) + st.w $zero, $a2, 0 blez $t4, .LBB6_153 .LBB6_144: # %.lr.ph589 - ld.d $a2, $sp, 88 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.mp) - ld.d $a3, $sp, 328 # 8-byte Folded Reload - addi.d $a3, $a3, 1 - bstrpick.d $a3, $a3, 31, 0 - addi.d $a4, $a3, -1 - ori $a6, $zero, 8 - ori $a5, $zero, 1 - bltu $a4, $a6, .LBB6_149 + ld.d $a3, $sp, 96 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.mp) + ld.d $a4, $sp, 320 # 8-byte Folded Reload + addi.d $a4, $a4, 1 + bstrpick.d $a4, $a4, 31, 0 + addi.d $a5, $a4, -1 + ori $a7, $zero, 8 + ori $a6, $zero, 1 + bltu $a5, $a7, .LBB6_149 # %bb.145: # %vector.memcheck1143 - sub.d $a6, $a1, $t5 - addi.d $a6, $a6, 4 - ori $a7, $zero, 32 - bltu $a6, $a7, .LBB6_149 + sub.d $a7, $a2, $t5 + addi.d $a7, $a7, 4 + ori $t0, $zero, 32 + bltu $a7, $t0, .LBB6_149 # %bb.146: # %vector.ph1146 - move $a7, $zero - move $a6, $a4 - bstrins.d $a6, $zero, 2, 0 - ori $t0, $zero, 1 - move $a5, $a4 - bstrins.d $a5, $t0, 2, 0 + move $t0, $zero + move $a7, $a5 + bstrins.d $a7, $zero, 2, 0 + ori $t1, $zero, 1 + move $a6, $a5 + bstrins.d $a6, $t1, 2, 0 vreplvei.w $vr0, $vr18, 0 - addi.d $t0, $a2, 20 + addi.d $t1, $a3, 20 vrepli.b $vr1, 0 - lu12i.w $t1, 287172 - vreplgr2vr.w $vr2, $t1 - move $t1, $a6 + vreplgr2vr.w $vr2, $a0 + move $t2, $a7 .p2align 4, , 16 .LBB6_147: # %vector.body1151 # =>This Inner Loop Header: Depth=1 - add.d $t2, $t0, $a7 - add.d $t3, $t5, $a7 - vldx $vr3, $t5, $a7 - vld $vr4, $t3, 16 - vst $vr1, $t2, -16 - vstx $vr1, $t0, $a7 + add.d $t3, $t1, $t0 + add.d $t4, $t5, $t0 + vldx $vr3, $t5, $t0 + vld $vr4, $t4, 16 + vst $vr1, $t3, -16 + vstx $vr1, $t1, $t0 vfmadd.s $vr3, $vr0, $vr2, $vr3 vfmadd.s $vr4, $vr0, $vr2, $vr4 - add.d $t2, $a1, $a7 - vst $vr3, $t2, 4 - vst $vr4, $t2, 20 - addi.d $t1, $t1, -8 - addi.d $a7, $a7, 32 - bnez $t1, .LBB6_147 + add.d $t3, $a2, $t0 + vst $vr3, $t3, 4 + vst $vr4, $t3, 20 + addi.d $t2, $t2, -8 + addi.d $t0, $t0, 32 + bnez $t2, .LBB6_147 # %bb.148: # %middle.block1157 - beq $a4, $a6, .LBB6_151 + ld.d $t4, $sp, 344 # 8-byte Folded Reload + beq $a5, $a7, .LBB6_151 .LBB6_149: # %scalar.ph1144.preheader - ld.d $a4, $sp, 272 # 8-byte Folded Reload - fld.s $fa0, $a4, %pc_lo12(.LCPI6_2) - slli.d $a4, $a5, 2 - addi.d $a6, $t5, -4 - sub.d $a3, $a3, $a5 + slli.d $a5, $a6, 2 + addi.d $a7, $t5, -4 + sub.d $a4, $a4, $a6 + movgr2fr.w $fa0, $a0 .p2align 4, , 16 .LBB6_150: # %scalar.ph1144 # =>This Inner Loop Header: Depth=1 - fldx.s $fa1, $a6, $a4 - stx.w $zero, $a2, $a4 + fldx.s $fa1, $a7, $a5 + stx.w $zero, $a3, $a5 fmadd.s $fa1, $ft10, $fa0, $fa1 - fstx.s $fa1, $a1, $a4 - addi.d $a3, $a3, -1 - addi.d $a4, $a4, 4 - bnez $a3, .LBB6_150 + fstx.s $fa1, $a2, $a5 + addi.d $a4, $a4, -1 + addi.d $a5, $a5, 4 + bnez $a4, .LBB6_150 .LBB6_151: - move $t2, $zero + move $t3, $zero b .LBB6_154 .LBB6_152: # %.loopexit557.thread - ld.d $a1, $sp, 224 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(Q__align_gapmap.m) - st.w $zero, $a1, 0 + ld.d $a2, $sp, 232 # 8-byte Folded Reload + ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.m) + st.w $zero, $a2, 0 .LBB6_153: # %._crit_edge590 - ori $t2, $zero, 1 + ori $t3, $zero, 1 beqz $t4, .LBB6_280 .LBB6_154: # %._crit_edge590.thread - ori $a2, $zero, 0 - lu32i.d $a2, -1 - add.d $a2, $a0, $a2 - srai.d $a2, $a2, 30 - fldx.s $fa0, $t5, $a2 - st.d $t2, $sp, 376 # 8-byte Folded Spill - move $t2, $zero + ori $a3, $zero, 0 + lu32i.d $a3, -1 + add.d $a3, $a1, $a3 + srai.d $a3, $a3, 30 + fldx.s $fa0, $t5, $a3 + st.d $t3, $sp, 368 # 8-byte Folded Spill + move $t3, $zero .LBB6_155: ld.w $s0, $t7, 0 - ld.d $a2, $sp, 120 # 8-byte Folded Reload - ld.d $a3, $a2, %pc_lo12(Q__align_gapmap.lastverticalw) - sltu $a2, $zero, $s0 - add.w $a4, $a2, $s4 - ori $a2, $zero, 2 - st.d $a3, $sp, 368 # 8-byte Folded Spill - fst.s $fa0, $a3, 0 - st.d $s7, $sp, 144 # 8-byte Folded Spill + ld.d $a3, $sp, 128 # 8-byte Folded Reload + ld.d $a4, $a3, %pc_lo12(Q__align_gapmap.lastverticalw) + sltu $a3, $zero, $s0 + add.w $a5, $a3, $s4 + ori $a3, $zero, 2 st.d $a4, $sp, 360 # 8-byte Folded Spill - blt $a4, $a2, .LBB6_173 + fst.s $fa0, $a4, 0 + st.d $s7, $sp, 152 # 8-byte Folded Spill + st.d $a5, $sp, 352 # 8-byte Folded Spill + blt $a5, $a3, .LBB6_173 # %bb.156: # %.lr.ph626 - st.d $t7, $sp, 56 # 8-byte Folded Spill - ld.d $t3, $s8, %pc_lo12(Q__align_gapmap.initverticalw) - ld.d $a2, $fp, %pc_lo12(Q__align_gapmap.cpmx1) - st.d $a2, $sp, 256 # 8-byte Folded Spill - ld.d $a2, $sp, 352 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.cpmx2) - st.d $a2, $sp, 248 # 8-byte Folded Spill - ld.d $a2, $sp, 240 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.floatwork) - st.d $a2, $sp, 240 # 8-byte Folded Spill - ld.d $a2, $sp, 232 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.intwork) - st.d $a2, $sp, 232 # 8-byte Folded Spill - ld.d $a2, $sp, 328 # 8-byte Folded Reload - slli.d $a2, $a2, 2 - bstrpick.d $a2, $a2, 33, 2 - slli.d $a2, $a2, 2 - st.d $a2, $sp, 128 # 8-byte Folded Spill - pcalau12i $a2, %pc_hi20(impmtx) - ld.d $a2, $a2, %pc_lo12(impmtx) - st.d $a2, $sp, 136 # 8-byte Folded Spill - ld.d $a2, $sp, 72 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.ijp) - st.d $a2, $sp, 224 # 8-byte Folded Spill - ld.d $a2, $sp, 88 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(Q__align_gapmap.mp) - ld.d $a3, $sp, 160 # 8-byte Folded Reload - ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n2_p) + st.d $t7, $sp, 64 # 8-byte Folded Spill + ld.d $t7, $s8, %pc_lo12(Q__align_gapmap.initverticalw) + ld.d $a3, $fp, %pc_lo12(Q__align_gapmap.cpmx1) + st.d $a3, $sp, 264 # 8-byte Folded Spill + ld.d $a3, $sp, 336 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.cpmx2) + st.d $a3, $sp, 256 # 8-byte Folded Spill + ld.d $a3, $sp, 248 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.floatwork) + st.d $a3, $sp, 248 # 8-byte Folded Spill + ld.d $a3, $sp, 240 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.intwork) + st.d $a3, $sp, 240 # 8-byte Folded Spill + ld.d $a3, $sp, 320 # 8-byte Folded Reload + slli.d $a3, $a3, 2 + bstrpick.d $a3, $a3, 33, 2 + slli.d $a3, $a3, 2 + st.d $a3, $sp, 136 # 8-byte Folded Spill + pcalau12i $a3, %pc_hi20(impmtx) + ld.d $a3, $a3, %pc_lo12(impmtx) + st.d $a3, $sp, 144 # 8-byte Folded Spill + ld.d $a3, $sp, 80 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.ijp) + st.d $a3, $sp, 232 # 8-byte Folded Spill + ld.d $a3, $sp, 96 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(Q__align_gapmap.mp) ld.d $a4, $sp, 168 # 8-byte Folded Reload - ld.d $a4, $a4, %pc_lo12(Q__align_gapmap.gapz_n2) + ld.d $a4, $a4, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n2_p) ld.d $a5, $sp, 176 # 8-byte Folded Reload - ld.d $a5, $a5, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n2_p) - ld.d $a6, $sp, 216 # 8-byte Folded Reload - ld.d $a6, $a6, %pc_lo12(Q__align_gapmap.og_h_dg_n2_p) - ld.d $a7, $sp, 208 # 8-byte Folded Reload - ld.d $a7, $a7, %pc_lo12(Q__align_gapmap.fg_h_dg_n2_p) - addi.d $s5, $a4, 8 - ld.d $a4, $sp, 400 # 8-byte Folded Reload - ld.d $a4, $a4, %pc_lo12(Q__align_gapmap.fgcp2g) - ld.d $t0, $sp, 408 # 8-byte Folded Reload - ld.d $t0, $t0, %pc_lo12(Q__align_gapmap.ogcp2g) - ld.d $t1, $sp, 192 # 8-byte Folded Reload - ld.d $t1, $t1, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n1_p) - st.d $t1, $sp, 216 # 8-byte Folded Spill - ld.d $t1, $sp, 200 # 8-byte Folded Reload - ld.d $t1, $t1, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n1_p) - st.d $t1, $sp, 208 # 8-byte Folded Spill - ld.d $t1, $sp, 336 # 8-byte Folded Reload - ld.d $t1, $t1, %pc_lo12(Q__align_gapmap.og_h_dg_n1_p) - st.d $t1, $sp, 200 # 8-byte Folded Spill - ld.d $t1, $sp, 264 # 8-byte Folded Reload - ld.d $t1, $t1, %pc_lo12(Q__align_gapmap.fg_h_dg_n1_p) - st.d $t1, $sp, 192 # 8-byte Folded Spill - ld.d $t1, $sp, 184 # 8-byte Folded Reload - ld.d $t1, $t1, %pc_lo12(Q__align_gapmap.gapz_n1) - st.d $t1, $sp, 184 # 8-byte Folded Spill - ld.d $t1, $sp, 416 # 8-byte Folded Reload - ld.d $t1, $t1, %pc_lo12(Q__align_gapmap.fgcp1g) - st.d $t1, $sp, 176 # 8-byte Folded Spill - ld.d $t1, $sp, 424 # 8-byte Folded Reload - ld.d $t1, $t1, %pc_lo12(Q__align_gapmap.ogcp1g) - st.d $t1, $sp, 168 # 8-byte Folded Spill - ori $t1, $zero, 0 - lu32i.d $t1, -1 - add.d $a0, $a0, $t1 - srai.d $a0, $a0, 30 - st.d $a0, $sp, 352 # 8-byte Folded Spill - ld.d $a0, $sp, 152 # 8-byte Folded Reload - st.d $t2, $sp, 264 # 8-byte Folded Spill - or $a0, $a0, $t2 - st.d $a0, $sp, 160 # 8-byte Folded Spill - addi.d $s4, $a1, 4 - addi.d $s0, $a2, 4 - addi.d $fp, $a3, 4 - addi.d $s1, $a5, 4 - addi.d $s7, $a6, 4 - addi.d $s2, $a7, 4 - addi.d $s6, $a4, 4 - addi.d $s3, $t0, 4 + ld.d $a5, $a5, %pc_lo12(Q__align_gapmap.gapz_n2) + ld.d $a6, $sp, 184 # 8-byte Folded Reload + ld.d $a6, $a6, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n2_p) + ld.d $a7, $sp, 224 # 8-byte Folded Reload + ld.d $a7, $a7, %pc_lo12(Q__align_gapmap.og_h_dg_n2_p) + ld.d $t0, $sp, 216 # 8-byte Folded Reload + ld.d $t0, $t0, %pc_lo12(Q__align_gapmap.fg_h_dg_n2_p) + addi.d $s5, $a5, 8 + ld.d $a5, $sp, 392 # 8-byte Folded Reload + ld.d $a5, $a5, %pc_lo12(Q__align_gapmap.fgcp2g) + ld.d $t1, $sp, 400 # 8-byte Folded Reload + ld.d $t1, $t1, %pc_lo12(Q__align_gapmap.ogcp2g) + ld.d $t2, $sp, 200 # 8-byte Folded Reload + ld.d $t2, $t2, %pc_lo12(Q__align_gapmap.fg_t_og_h_dg_n1_p) + st.d $t2, $sp, 224 # 8-byte Folded Spill + ld.d $t2, $sp, 208 # 8-byte Folded Reload + ld.d $t2, $t2, %pc_lo12(Q__align_gapmap.og_t_fg_h_dg_n1_p) + st.d $t2, $sp, 216 # 8-byte Folded Spill + ld.d $t2, $sp, 328 # 8-byte Folded Reload + ld.d $t2, $t2, %pc_lo12(Q__align_gapmap.og_h_dg_n1_p) + st.d $t2, $sp, 208 # 8-byte Folded Spill + ld.d $t2, $sp, 272 # 8-byte Folded Reload + ld.d $t2, $t2, %pc_lo12(Q__align_gapmap.fg_h_dg_n1_p) + st.d $t2, $sp, 200 # 8-byte Folded Spill + ld.d $t2, $sp, 192 # 8-byte Folded Reload + ld.d $t2, $t2, %pc_lo12(Q__align_gapmap.gapz_n1) + st.d $t2, $sp, 192 # 8-byte Folded Spill + ld.d $t2, $sp, 408 # 8-byte Folded Reload + ld.d $t2, $t2, %pc_lo12(Q__align_gapmap.fgcp1g) + st.d $t2, $sp, 184 # 8-byte Folded Spill + ld.d $t2, $sp, 416 # 8-byte Folded Reload + ld.d $t2, $t2, %pc_lo12(Q__align_gapmap.ogcp1g) + st.d $t2, $sp, 176 # 8-byte Folded Spill + ori $t2, $zero, 0 + lu32i.d $t2, -1 + add.d $a1, $a1, $t2 + srai.d $a1, $a1, 30 + st.d $a1, $sp, 336 # 8-byte Folded Spill + ld.d $a1, $sp, 160 # 8-byte Folded Reload + st.d $t3, $sp, 272 # 8-byte Folded Spill + or $a1, $a1, $t3 + st.d $a1, $sp, 168 # 8-byte Folded Spill + addi.d $s4, $a2, 4 + addi.d $s0, $a3, 4 + addi.d $fp, $a4, 4 + addi.d $s1, $a6, 4 + addi.d $s7, $a7, 4 + addi.d $s2, $t0, 4 + addi.d $s6, $a5, 4 + addi.d $s3, $t1, 4 movgr2fr.w $fs0, $zero ori $s8, $zero, 1 - st.d $t3, $sp, 336 # 8-byte Folded Spill + movgr2fr.w $fs1, $a0 + st.d $t7, $sp, 328 # 8-byte Folded Spill b .LBB6_158 .p2align 4, , 16 .LBB6_157: # %._crit_edge618 # in Loop: Header=BB6_158 Depth=1 - ld.d $a1, $sp, 352 # 8-byte Folded Reload + ld.d $a1, $sp, 336 # 8-byte Folded Reload fldx.s $fa0, $t1, $a1 - ld.d $a1, $sp, 368 # 8-byte Folded Reload - fstx.s $fa0, $a1, $t5 + ld.d $a1, $sp, 360 # 8-byte Folded Reload + fstx.s $fa0, $a1, $t3 move $s8, $a0 move $t5, $t1 - ld.d $a1, $sp, 360 # 8-byte Folded Reload + ld.d $a1, $sp, 352 # 8-byte Folded Reload beq $a0, $a1, .LBB6_174 .LBB6_158: # =>This Loop Header: Depth=1 # Child Loop BB6_163 Depth 2 # Child Loop BB6_167 Depth 2 addi.d $t2, $s8, -1 slli.d $a0, $t2, 2 - fldx.s $fa0, $t3, $a0 + fldx.s $fa0, $t7, $a0 move $t1, $t6 move $t6, $t5 fst.s $fa0, $t5, 0 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.bu $a0, $a0, 0 - slli.d $t5, $s8, 2 + slli.d $t3, $s8, 2 ori $a1, $zero, 114 bne $a0, $a1, .LBB6_161 # %bb.159: # in Loop: Header=BB6_158 Depth=1 - ld.d $a0, $sp, 264 # 8-byte Folded Reload + ld.d $a0, $sp, 272 # 8-byte Folded Reload bnez $a0, .LBB6_164 # %bb.160: # %clearvec.exit537.thread # in Loop: Header=BB6_158 Depth=1 move $a0, $t1 move $a1, $zero - ld.d $a2, $sp, 128 # 8-byte Folded Reload - st.d $t6, $sp, 424 # 8-byte Folded Spill - st.d $t1, $sp, 416 # 8-byte Folded Spill - st.d $t2, $sp, 408 # 8-byte Folded Spill - st.d $t5, $sp, 400 # 8-byte Folded Spill + ld.d $a2, $sp, 136 # 8-byte Folded Reload + st.d $t6, $sp, 416 # 8-byte Folded Spill + st.d $t1, $sp, 408 # 8-byte Folded Spill + st.d $t2, $sp, 400 # 8-byte Folded Spill + st.d $t3, $sp, 392 # 8-byte Folded Spill pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.d $t5, $sp, 400 # 8-byte Folded Reload - ld.d $t2, $sp, 408 # 8-byte Folded Reload - ld.d $t1, $sp, 416 # 8-byte Folded Reload - ld.d $t3, $sp, 336 # 8-byte Folded Reload - ld.d $t6, $sp, 424 # 8-byte Folded Reload + ld.d $t3, $sp, 392 # 8-byte Folded Reload + ld.d $t2, $sp, 400 # 8-byte Folded Reload + ld.d $t1, $sp, 408 # 8-byte Folded Reload + ld.d $t7, $sp, 328 # 8-byte Folded Reload + ld.d $t6, $sp, 416 # 8-byte Folded Reload ld.d $t4, $sp, 344 # 8-byte Folded Reload - ld.d $ra, $sp, 392 # 8-byte Folded Reload - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $ra, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload beqz $a0, .LBB6_162 b .LBB6_164 .p2align 4, , 16 .LBB6_161: # %clearvec.exit537 # in Loop: Header=BB6_158 Depth=1 move $a0, $t1 - ld.d $a1, $sp, 256 # 8-byte Folded Reload - ld.d $a2, $sp, 248 # 8-byte Folded Reload + ld.d $a1, $sp, 264 # 8-byte Folded Reload + ld.d $a2, $sp, 256 # 8-byte Folded Reload move $a3, $s8 move $a4, $t4 - ld.d $a5, $sp, 240 # 8-byte Folded Reload - ld.d $a6, $sp, 232 # 8-byte Folded Reload + ld.d $a5, $sp, 248 # 8-byte Folded Reload + ld.d $a6, $sp, 240 # 8-byte Folded Reload move $a7, $zero - st.d $t6, $sp, 424 # 8-byte Folded Spill - st.d $t1, $sp, 416 # 8-byte Folded Spill - st.d $t2, $sp, 408 # 8-byte Folded Spill - st.d $t5, $sp, 400 # 8-byte Folded Spill + st.d $t6, $sp, 416 # 8-byte Folded Spill + st.d $t1, $sp, 408 # 8-byte Folded Spill + st.d $t2, $sp, 400 # 8-byte Folded Spill + st.d $t3, $sp, 392 # 8-byte Folded Spill pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 - ld.d $t5, $sp, 400 # 8-byte Folded Reload - ld.d $t2, $sp, 408 # 8-byte Folded Reload - ld.d $t1, $sp, 416 # 8-byte Folded Reload - ld.d $t3, $sp, 336 # 8-byte Folded Reload - ld.d $t6, $sp, 424 # 8-byte Folded Reload + ld.d $t3, $sp, 392 # 8-byte Folded Reload + ld.d $t2, $sp, 400 # 8-byte Folded Reload + ld.d $t1, $sp, 408 # 8-byte Folded Reload + ld.d $t7, $sp, 328 # 8-byte Folded Reload + ld.d $t6, $sp, 416 # 8-byte Folded Reload ld.d $t4, $sp, 344 # 8-byte Folded Reload - ld.d $ra, $sp, 392 # 8-byte Folded Reload - ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $ra, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload bnez $a0, .LBB6_164 .LBB6_162: # %.lr.ph.i539.preheader # in Loop: Header=BB6_158 Depth=1 ld.d $a0, $sp, 280 # 8-byte Folded Reload - ldx.w $a0, $a0, $t5 + ldx.w $a0, $a0, $t3 slli.d $a0, $a0, 3 - ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 144 # 8-byte Folded Reload ldx.d $a0, $a1, $a0 ld.d $a3, $sp, 288 # 8-byte Folded Reload move $a2, $t1 - ld.d $a1, $sp, 328 # 8-byte Folded Reload + ld.d $a1, $sp, 320 # 8-byte Folded Reload .p2align 4, , 16 .LBB6_163: # %.lr.ph.i539 # Parent Loop BB6_158 Depth=1 @@ -7366,40 +7358,38 @@ Q__align_gapmap: # @Q__align_gapmap bnez $a1, .LBB6_163 .LBB6_164: # %imp_match_out_veadQ_gapmap.exit544 # in Loop: Header=BB6_158 Depth=1 - fldx.s $fa0, $t3, $t5 + fldx.s $fa0, $t7, $t3 fst.s $fa0, $t1, 0 addi.d $a0, $s8, 1 - ld.d $a1, $sp, 376 # 8-byte Folded Reload + ld.d $a1, $sp, 368 # 8-byte Folded Reload bnez $a1, .LBB6_157 # %bb.165: # %.lr.ph617.preheader # in Loop: Header=BB6_158 Depth=1 move $a1, $zero move $a2, $zero move $a3, $zero + ld.d $a4, $sp, 224 # 8-byte Folded Reload + fldx.s $fa0, $a4, $t3 ld.d $a4, $sp, 216 # 8-byte Folded Reload - fldx.s $fa0, $a4, $t5 + fldx.s $fa1, $a4, $t3 ld.d $a4, $sp, 208 # 8-byte Folded Reload - fldx.s $fa1, $a4, $t5 + fldx.s $fa2, $a4, $t3 ld.d $a4, $sp, 200 # 8-byte Folded Reload - fldx.s $fa2, $a4, $t5 - ld.d $a4, $sp, 192 # 8-byte Folded Reload - fldx.s $fa3, $a4, $t5 - ld.d $a5, $sp, 184 # 8-byte Folded Reload - fldx.s $fa4, $a5, $t5 + fldx.s $fa3, $a4, $t3 + ld.d $a5, $sp, 192 # 8-byte Folded Reload + fldx.s $fa4, $a5, $t3 slli.d $a4, $a0, 2 fldx.s $fa5, $a5, $a4 - ld.d $a4, $sp, 176 # 8-byte Folded Reload - fldx.s $fa6, $a4, $t5 + ld.d $a4, $sp, 184 # 8-byte Folded Reload + fldx.s $fa6, $a4, $t3 fld.s $ft0, $t6, 0 - ld.d $a4, $sp, 272 # 8-byte Folded Reload - fld.s $ft1, $a4, %pc_lo12(.LCPI6_2) slli.d $a4, $s8, 3 - ld.d $a5, $sp, 224 # 8-byte Folded Reload + ld.d $a5, $sp, 232 # 8-byte Folded Reload ldx.d $a5, $a5, $a4 - ld.d $a4, $sp, 168 # 8-byte Folded Reload - fldx.s $fa7, $a4, $t5 - vld $vr10, $sp, 304 # 16-byte Folded Reload - fmadd.s $ft0, $ft2, $ft1, $ft0 + ld.d $a4, $sp, 176 # 8-byte Folded Reload + fldx.s $fa7, $a4, $t3 + vld $vr9, $sp, 304 # 16-byte Folded Reload + fmadd.s $ft0, $ft1, $fs1, $ft0 addi.d $a4, $t1, 4 addi.d $a5, $a5, 4 addi.d $a6, $zero, -1 @@ -7476,23 +7466,23 @@ Q__align_gapmap: # @Q__align_gapmap bnez $s0, .LBB6_189 b .LBB6_175 .LBB6_174: # %._crit_edge627.loopexit - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload ld.w $s0, $a0, 0 move $t5, $t1 - ld.d $s7, $sp, 144 # 8-byte Folded Reload - ld.d $s6, $sp, 440 # 8-byte Folded Reload - ld.d $s5, $sp, 104 # 8-byte Folded Reload - ld.d $s4, $sp, 80 # 8-byte Folded Reload - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s7, $sp, 152 # 8-byte Folded Reload + ld.d $s6, $sp, 432 # 8-byte Folded Reload + ld.d $s5, $sp, 112 # 8-byte Folded Reload + ld.d $s4, $sp, 88 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload bnez $s0, .LBB6_189 .LBB6_175: # %.preheader555 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload bnez $a0, .LBB6_182 # %bb.176: # %.lr.ph632 pcalau12i $a0, %got_pc_hi20(offset) ld.d $a0, $a0, %got_pc_lo12(offset) ld.w $a0, $a0, 0 - ld.d $a1, $sp, 328 # 8-byte Folded Reload + ld.d $a1, $sp, 320 # 8-byte Folded Reload addi.d $a1, $a1, 1 bstrpick.d $a1, $a1, 31, 0 addi.d $a2, $a1, -1 @@ -7506,12 +7496,12 @@ Q__align_gapmap: # @Q__align_gapmap move $a3, $a2 bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 - ld.d $a5, $sp, 328 # 8-byte Folded Reload + ld.d $a5, $sp, 320 # 8-byte Folded Reload vreplgr2vr.d $vr1, $a5 - pcalau12i $a5, %pc_hi20(.LCPI6_3) - vld $vr2, $a5, %pc_lo12(.LCPI6_3) - pcalau12i $a5, %pc_hi20(.LCPI6_4) - vld $vr3, $a5, %pc_lo12(.LCPI6_4) + pcalau12i $a5, %pc_hi20(.LCPI6_1) + vld $vr2, $a5, %pc_lo12(.LCPI6_1) + pcalau12i $a5, %pc_hi20(.LCPI6_2) + vld $vr3, $a5, %pc_lo12(.LCPI6_2) addi.d $a5, $t5, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr4, $a6 @@ -7572,7 +7562,7 @@ Q__align_gapmap: # @Q__align_gapmap # %bb.179: # %middle.block1176 beq $a2, $a4, .LBB6_182 .LBB6_180: # %scalar.ph1160.preheader - ld.d $a2, $sp, 328 # 8-byte Folded Reload + ld.d $a2, $sp, 320 # 8-byte Folded Reload sub.w $a2, $a2, $a3 mul.d $a2, $a0, $a2 alsl.d $a4, $a3, $t5, 2 @@ -7604,7 +7594,7 @@ Q__align_gapmap: # @Q__align_gapmap ffint.d.w $fa1, $fa0 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(Q__align_gapmap.lastverticalw) fneg.d $fa1, $fa1 addi.d $a0, $s4, 1 @@ -7672,21 +7662,21 @@ Q__align_gapmap: # @Q__align_gapmap addi.d $a1, $a1, 4 bnez $a2, .LBB6_188 .LBB6_189: # %.loopexit - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $s8, $a0, %pc_lo12(Q__align_gapmap.lastverticalw) ld.d $s4, $s5, %pc_lo12(Q__align_gapmap.mseq1) - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload move $fp, $s5 ld.d $s5, $a0, %pc_lo12(Q__align_gapmap.mseq2) - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload ld.d $s3, $a0, %pc_lo12(Q__align_gapmap.ijp) - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload beqz $a0, .LBB6_191 # %bb.190: st.d $ra, $sp, 0 move $a0, $t5 move $a1, $s8 - ld.d $s8, $sp, 432 # 8-byte Folded Reload + ld.d $s8, $sp, 424 # 8-byte Folded Reload move $a2, $s8 move $a3, $s7 move $a4, $s4 @@ -7698,7 +7688,7 @@ Q__align_gapmap: # @Q__align_gapmap ld.d $s4, $fp, %pc_lo12(Q__align_gapmap.mseq1) b .LBB6_271 .LBB6_191: - ld.d $a0, $sp, 432 # 8-byte Folded Reload + ld.d $a0, $sp, 424 # 8-byte Folded Reload ld.d $a0, $a0, 0 move $fp, $t5 pcaddu18i $ra, %call36(strlen) @@ -7767,8 +7757,8 @@ Q__align_gapmap: # @Q__align_gapmap fmov.s $fa0, $fa1 b .LBB6_199 .LBB6_202: # %.loopexit.i - ld.d $ra, $sp, 392 # 8-byte Folded Reload - ld.d $s8, $sp, 432 # 8-byte Folded Reload + ld.d $ra, $sp, 384 # 8-byte Folded Reload + ld.d $s8, $sp, 424 # 8-byte Folded Reload bltz $a1, .LBB6_222 # %bb.203: # %.lr.ph17.preheader.i addi.d $a4, $s7, 1 @@ -7779,106 +7769,108 @@ Q__align_gapmap: # @Q__align_gapmap move $a4, $zero b .LBB6_220 .LBB6_205: # %vector.memcheck1067 - addi.d $t0, $a3, 4 - alsl.d $t2, $a5, $a3, 2 - addi.d $t1, $a1, 8 - sltu $t1, $t0, $t1 - sltu $t3, $a1, $t2 - and $t1, $t1, $t3 - bnez $t1, .LBB6_122 -# %bb.206: # %vector.memcheck1067 - addi.d $t1, $a2, 4 - sltu $t1, $t0, $t1 - sltu $t3, $a2, $t2 - and $t1, $t1, $t3 - bnez $t1, .LBB6_122 -# %bb.207: # %vector.memcheck1067 addi.d $t1, $a4, 4 - alsl.d $t3, $a5, $a4, 2 - sltu $t3, $t0, $t3 + alsl.d $t3, $a6, $a4, 2 + addi.d $t2, $a2, 8 sltu $t2, $t1, $t2 - and $t2, $t3, $t2 - bnez $t2, .LBB6_122 + sltu $t4, $a2, $t3 + and $t2, $t2, $t4 + bnez $t2, .LBB6_282 +# %bb.206: # %vector.memcheck1067 + addi.d $t2, $a3, 4 + sltu $t2, $t1, $t2 + sltu $t4, $a3, $t3 + and $t2, $t2, $t4 + bnez $t2, .LBB6_282 +# %bb.207: # %vector.memcheck1067 + addi.d $t2, $a5, 4 + alsl.d $t4, $a6, $a5, 2 + sltu $t4, $t1, $t4 + sltu $t3, $t2, $t3 + and $t3, $t4, $t3 + ld.d $t4, $sp, 344 # 8-byte Folded Reload + bnez $t3, .LBB6_122 # %bb.208: # %vector.ph1087 - move $t2, $a7 - bstrins.d $t2, $zero, 1, 0 - fld.s $fa1, $a1, 0 - fld.s $fa2, $a2, 0 - ori $t3, $zero, 1 - move $a6, $a7 - vldrepl.w $vr0, $a1, 4 - bstrins.d $a6, $t3, 1, 0 + move $t3, $t0 + bstrins.d $t3, $zero, 1, 0 + fld.s $fa1, $a2, 0 + fld.s $fa2, $a3, 0 + ori $t4, $zero, 1 + move $a7, $t0 + vldrepl.w $vr0, $a2, 4 + bstrins.d $a7, $t4, 1, 0 fmul.s $fa1, $fa1, $fa2 vreplvei.w $vr1, $vr1, 0 - move $t3, $t2 + move $t4, $t3 .p2align 4, , 16 .LBB6_209: # %vector.body1090 # =>This Inner Loop Header: Depth=1 - vld $vr2, $t0, 0 - vld $vr3, $t1, 0 + vld $vr2, $t1, 0 + vld $vr3, $t2, 0 vfadd.s $vr2, $vr2, $vr1 vfmul.s $vr3, $vr0, $vr3 vfadd.s $vr2, $vr2, $vr3 - vst $vr2, $t0, 0 - addi.d $t0, $t0, 16 - addi.d $t3, $t3, -4 + vst $vr2, $t1, 0 addi.d $t1, $t1, 16 - bnez $t3, .LBB6_209 + addi.d $t4, $t4, -4 + addi.d $t2, $t2, 16 + bnez $t4, .LBB6_209 # %bb.210: # %middle.block1102 - bne $a7, $t2, .LBB6_122 + ld.d $t4, $sp, 344 # 8-byte Folded Reload + bne $t0, $t3, .LBB6_122 b .LBB6_124 .LBB6_211: # %vector.memcheck1105 - addi.d $a7, $t5, 4 - alsl.d $t1, $a4, $t5, 2 - addi.d $t0, $a1, 8 - sltu $t0, $a7, $t0 - sltu $t2, $a1, $t1 - and $t0, $t0, $t2 - bnez $t0, .LBB6_126 + addi.d $t0, $t5, 4 + alsl.d $t2, $a5, $t5, 2 + addi.d $t1, $a2, 8 + sltu $t1, $t0, $t1 + sltu $t3, $a2, $t2 + and $t1, $t1, $t3 + bnez $t1, .LBB6_126 # %bb.212: # %vector.memcheck1105 - addi.d $t0, $a2, 4 - sltu $t0, $a7, $t0 - sltu $t2, $a2, $t1 - and $t0, $t0, $t2 - bnez $t0, .LBB6_126 -# %bb.213: # %vector.memcheck1105 - addi.d $t0, $a3, 4 - alsl.d $t2, $a4, $a3, 2 - sltu $t2, $a7, $t2 + addi.d $t1, $a3, 4 sltu $t1, $t0, $t1 - and $t1, $t2, $t1 + sltu $t3, $a3, $t2 + and $t1, $t1, $t3 bnez $t1, .LBB6_126 +# %bb.213: # %vector.memcheck1105 + addi.d $t1, $a4, 4 + alsl.d $t3, $a5, $a4, 2 + sltu $t3, $t0, $t3 + sltu $t2, $t1, $t2 + and $t2, $t3, $t2 + bnez $t2, .LBB6_126 # %bb.214: # %vector.ph1125 - move $t1, $a6 - bstrins.d $t1, $zero, 1, 0 - fld.s $fa1, $a1, 0 - fld.s $fa2, $a2, 0 - ori $t2, $zero, 1 - move $a5, $a6 - vldrepl.w $vr0, $a1, 4 - bstrins.d $a5, $t2, 1, 0 + move $t2, $a7 + bstrins.d $t2, $zero, 1, 0 + fld.s $fa1, $a2, 0 + fld.s $fa2, $a3, 0 + ori $t3, $zero, 1 + move $a6, $a7 + vldrepl.w $vr0, $a2, 4 + bstrins.d $a6, $t3, 1, 0 fmul.s $fa1, $fa1, $fa2 vreplvei.w $vr1, $vr1, 0 - move $t2, $t1 + move $t3, $t2 .p2align 4, , 16 .LBB6_215: # %vector.body1128 # =>This Inner Loop Header: Depth=1 - vld $vr2, $a7, 0 - vld $vr3, $t0, 0 + vld $vr2, $t0, 0 + vld $vr3, $t1, 0 vfadd.s $vr2, $vr2, $vr1 vfmul.s $vr3, $vr0, $vr3 vfadd.s $vr2, $vr2, $vr3 - vst $vr2, $a7, 0 - addi.d $a7, $a7, 16 - addi.d $t2, $t2, -4 + vst $vr2, $t0, 0 addi.d $t0, $t0, 16 - bnez $t2, .LBB6_215 + addi.d $t3, $t3, -4 + addi.d $t1, $t1, 16 + bnez $t3, .LBB6_215 # %bb.216: # %middle.block1140 - bne $a6, $t1, .LBB6_126 + bne $a7, $t2, .LBB6_126 b .LBB6_128 .LBB6_217: # %vector.ph1194 - pcalau12i $a5, %pc_hi20(.LCPI6_5) - vld $vr0, $a5, %pc_lo12(.LCPI6_5) + pcalau12i $a5, %pc_hi20(.LCPI6_3) + vld $vr0, $a5, %pc_lo12(.LCPI6_3) bstrpick.d $a4, $a4, 31, 2 slli.d $a4, $a4, 2 addi.d $a5, $s3, 16 @@ -7932,8 +7924,8 @@ Q__align_gapmap: # @Q__align_gapmap .LBB6_225: # %vector.ph1209 bstrpick.d $a4, $a4, 31, 3 slli.d $a4, $a4, 3 - pcalau12i $a5, %pc_hi20(.LCPI6_6) - vld $vr0, $a5, %pc_lo12(.LCPI6_6) + pcalau12i $a5, %pc_hi20(.LCPI6_4) + vld $vr0, $a5, %pc_lo12(.LCPI6_4) addi.d $a5, $a2, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 @@ -8052,8 +8044,8 @@ Q__align_gapmap: # @Q__align_gapmap .LBB6_245: # %._crit_edge29.i # in Loop: Header=BB6_246 Depth=2 addi.d $t8, $t8, -1 - ld.d $s6, $sp, 440 # 8-byte Folded Reload - ld.d $s8, $sp, 432 # 8-byte Folded Reload + ld.d $s6, $sp, 432 # 8-byte Folded Reload + ld.d $s8, $sp, 424 # 8-byte Folded Reload beqz $t8, .LBB6_250 .LBB6_246: # %.preheader3.i # Parent Loop BB6_239 Depth=1 @@ -8106,7 +8098,7 @@ Q__align_gapmap: # @Q__align_gapmap .LBB6_251: # %._crit_edge32.i # in Loop: Header=BB6_239 Depth=1 add.w $t1, $t6, $t1 - ld.d $s7, $sp, 144 # 8-byte Folded Reload + ld.d $s7, $sp, 152 # 8-byte Folded Reload ld.d $t7, $sp, 296 # 8-byte Folded Reload beq $t6, $t7, .LBB6_258 # %bb.252: # %.preheader1.preheader.i @@ -8230,16 +8222,16 @@ Q__align_gapmap: # @Q__align_gapmap bnez $t5, .LBB6_268 b .LBB6_238 .LBB6_269: - ld.d $s7, $sp, 144 # 8-byte Folded Reload + ld.d $s7, $sp, 152 # 8-byte Folded Reload .LBB6_270: # %Atracking_localhom_gapmap.exit - ld.d $fp, $sp, 104 # 8-byte Folded Reload + ld.d $fp, $sp, 112 # 8-byte Folded Reload .LBB6_271: # %Atracking_localhom_gapmap.exit ld.d $a0, $s4, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 addi.w $a3, $a0, 0 lu12i.w $a4, 1220 - ld.d $a2, $sp, 64 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload blt $a2, $a3, .LBB6_281 # %bb.272: # %Atracking_localhom_gapmap.exit ori $a0, $a4, 2881 @@ -8259,10 +8251,10 @@ Q__align_gapmap: # @Q__align_gapmap addi.d $s8, $s8, 8 bnez $s6, .LBB6_275 .LBB6_276: # %.preheader - ld.d $a2, $sp, 392 # 8-byte Folded Reload + ld.d $a2, $sp, 384 # 8-byte Folded Reload blez $a2, .LBB6_279 # %bb.277: # %.lr.ph640 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(Q__align_gapmap.mseq2) .p2align 4, , 16 .LBB6_278: # =>This Inner Loop Header: Depth=1 @@ -8277,6 +8269,7 @@ Q__align_gapmap: # @Q__align_gapmap bnez $a2, .LBB6_278 .LBB6_279: # %._crit_edge641 fmov.s $fa0, $fs0 + fld.d $fs1, $sp, 440 # 8-byte Folded Reload fld.d $fs0, $sp, 448 # 8-byte Folded Reload ld.d $s8, $sp, 456 # 8-byte Folded Reload ld.d $s7, $sp, 464 # 8-byte Folded Reload @@ -8293,8 +8286,8 @@ Q__align_gapmap: # @Q__align_gapmap ret .LBB6_280: movgr2fr.w $fa0, $zero - ori $a2, $zero, 1 - st.d $a2, $sp, 376 # 8-byte Folded Spill + ori $a3, $zero, 1 + st.d $a3, $sp, 368 # 8-byte Folded Spill b .LBB6_155 .LBB6_281: pcalau12i $a0, %got_pc_hi20(stderr) @@ -8312,10 +8305,13 @@ Q__align_gapmap: # @Q__align_gapmap bgtz $s6, .LBB6_274 b .LBB6_276 .LBB6_282: - ld.d $s2, $sp, 96 # 8-byte Folded Reload - b .LBB6_27 + ld.d $t4, $sp, 344 # 8-byte Folded Reload + b .LBB6_122 .LBB6_283: - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload + b .LBB6_27 +.LBB6_284: + ld.d $s2, $sp, 104 # 8-byte Folded Reload b .LBB6_32 .Lfunc_end6: .size Q__align_gapmap, .Lfunc_end6-Q__align_gapmap diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Ralignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Ralignmm.s index fa74ae53..f7811a73 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Ralignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Ralignmm.s @@ -429,35 +429,27 @@ imp_match_init_strictR: # @imp_match_init_strictR .Lfunc_end1: .size imp_match_init_strictR, .Lfunc_end1-imp_match_init_strictR # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function R__align -.LCPI2_0: - .dword 0x3ff4cccccccccccd # double 1.3 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI2_1: + .p2align 4, 0x0 # -- Begin function R__align +.LCPI2_0: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI2_3: +.LCPI2_1: .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI2_4: +.LCPI2_2: .dword 1 # 0x1 .dword 2 # 0x2 -.LCPI2_5: +.LCPI2_3: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI2_6: +.LCPI2_4: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI2_2: - .word 0x461c4000 # float 1.0E+4 .text .globl R__align .p2align 5 @@ -478,24 +470,25 @@ R__align: # @R__align st.d $s8, $sp, 456 # 8-byte Folded Spill fst.d $fs0, $sp, 448 # 8-byte Folded Spill fst.d $fs1, $sp, 440 # 8-byte Folded Spill - st.d $a7, $sp, 40 # 8-byte Folded Spill - st.d $a6, $sp, 24 # 8-byte Folded Spill + fst.d $fs2, $sp, 432 # 8-byte Folded Spill + st.d $a7, $sp, 56 # 8-byte Folded Spill + st.d $a6, $sp, 8 # 8-byte Folded Spill move $s6, $a5 - st.d $a4, $sp, 104 # 8-byte Folded Spill - st.d $a3, $sp, 288 # 8-byte Folded Spill - st.d $a2, $sp, 280 # 8-byte Folded Spill + st.d $a4, $sp, 96 # 8-byte Folded Spill + st.d $a3, $sp, 272 # 8-byte Folded Spill + move $s4, $a2 move $s8, $a1 move $s3, $a0 pcalau12i $a0, %got_pc_hi20(penalty) ld.d $a0, $a0, %got_pc_lo12(penalty) - pcalau12i $s4, %pc_hi20(R__align.orlgth1) - ld.w $s2, $s4, %pc_lo12(R__align.orlgth1) + pcalau12i $s0, %pc_hi20(R__align.orlgth1) + ld.w $s2, $s0, %pc_lo12(R__align.orlgth1) ld.w $a0, $a0, 0 - st.d $a0, $sp, 416 # 8-byte Folded Spill + st.d $a0, $sp, 400 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.mseq1) - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.mseq2) - st.d $a0, $sp, 80 # 8-byte Folded Spill + st.d $a0, $sp, 64 # 8-byte Folded Spill bnez $s2, .LBB2_2 # %bb.1: pcalau12i $a0, %got_pc_hi20(njob) @@ -505,247 +498,251 @@ R__align: # @R__align pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 ld.w $a1, $fp, 0 - ld.d $a2, $sp, 96 # 8-byte Folded Reload + ld.d $a2, $sp, 88 # 8-byte Folded Reload st.d $a0, $a2, %pc_lo12(R__align.mseq1) move $a0, $a1 move $a1, $zero pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - ld.w $s2, $s4, %pc_lo12(R__align.orlgth1) - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.w $s2, $s0, %pc_lo12(R__align.orlgth1) + ld.d $a1, $sp, 64 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.mseq2) .LBB2_2: ld.d $a0, $s3, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 ld.d $a1, $s8, 0 - move $s5, $a0 - addi.w $s0, $a0, 0 + st.d $a0, $sp, 48 # 8-byte Folded Spill + addi.w $s5, $a0, 0 move $a0, $a1 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 pcalau12i $a1, %pc_hi20(R__align.orlgth2) - st.d $a1, $sp, 224 # 8-byte Folded Spill + st.d $a1, $sp, 216 # 8-byte Folded Spill ld.w $fp, $a1, %pc_lo12(R__align.orlgth2) - st.d $a0, $sp, 184 # 8-byte Folded Spill + st.d $a0, $sp, 176 # 8-byte Folded Spill addi.w $a1, $a0, 0 pcalau12i $a0, %pc_hi20(R__align.w1) - st.d $a0, $sp, 392 # 8-byte Folded Spill + st.d $a0, $sp, 368 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.w2) - st.d $a0, $sp, 376 # 8-byte Folded Spill + st.d $a0, $sp, 360 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.initverticalw) - st.d $a0, $sp, 368 # 8-byte Folded Spill + st.d $a0, $sp, 352 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.lastverticalw) - st.d $a0, $sp, 88 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.m) - st.d $a0, $sp, 384 # 8-byte Folded Spill + st.d $a0, $sp, 376 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.mp) - st.d $a0, $sp, 320 # 8-byte Folded Spill + st.d $a0, $sp, 304 # 8-byte Folded Spill pcalau12i $s1, %pc_hi20(R__align.mseq) pcalau12i $a0, %pc_hi20(R__align.digf1) - st.d $a0, $sp, 272 # 8-byte Folded Spill + st.d $a0, $sp, 264 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.digf2) - st.d $a0, $sp, 256 # 8-byte Folded Spill + st.d $a0, $sp, 248 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.diaf1) - st.d $a0, $sp, 264 # 8-byte Folded Spill + st.d $a0, $sp, 256 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.diaf2) - st.d $a0, $sp, 240 # 8-byte Folded Spill + st.d $a0, $sp, 232 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.gapz1) - st.d $a0, $sp, 248 # 8-byte Folded Spill + st.d $a0, $sp, 240 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.gapz2) - st.d $a0, $sp, 232 # 8-byte Folded Spill + st.d $a0, $sp, 224 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.gapf1) - st.d $a0, $sp, 304 # 8-byte Folded Spill + st.d $a0, $sp, 280 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.gapf2) - st.d $a0, $sp, 344 # 8-byte Folded Spill + st.d $a0, $sp, 328 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.ogcp1g) - st.d $a0, $sp, 336 # 8-byte Folded Spill + st.d $a0, $sp, 320 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.ogcp2g) - st.d $a0, $sp, 328 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(R__align.fgcp1g) st.d $a0, $sp, 312 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(R__align.fgcp2g) + pcalau12i $a0, %pc_hi20(R__align.fgcp1g) st.d $a0, $sp, 296 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(R__align.fgcp2g) + st.d $a0, $sp, 288 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.cpmx1) - st.d $a0, $sp, 408 # 8-byte Folded Spill + st.d $a0, $sp, 392 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.cpmx2) - st.d $a0, $sp, 400 # 8-byte Folded Spill + st.d $a0, $sp, 384 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.floatwork) - st.d $a0, $sp, 360 # 8-byte Folded Spill + st.d $a0, $sp, 344 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(R__align.intwork) - st.d $a0, $sp, 352 # 8-byte Folded Spill - st.d $s6, $sp, 48 # 8-byte Folded Spill - st.d $s8, $sp, 112 # 8-byte Folded Spill - st.d $s3, $sp, 120 # 8-byte Folded Spill - st.d $s0, $sp, 72 # 8-byte Folded Spill - st.d $a1, $sp, 432 # 8-byte Folded Spill - st.d $s5, $sp, 64 # 8-byte Folded Spill - blt $s2, $s0, .LBB2_4 + st.d $a0, $sp, 336 # 8-byte Folded Spill + st.d $s6, $sp, 32 # 8-byte Folded Spill + st.d $s8, $sp, 104 # 8-byte Folded Spill + st.d $s3, $sp, 112 # 8-byte Folded Spill + st.d $s5, $sp, 80 # 8-byte Folded Spill + st.d $a1, $sp, 424 # 8-byte Folded Spill + blt $s2, $s5, .LBB2_4 # %bb.3: bge $fp, $a1, .LBB2_9 .LBB2_4: pcalau12i $s6, %pc_hi20(R__align.match) pcalau12i $a0, %pc_hi20(R__align.ogcp1) - st.d $a0, $sp, 216 # 8-byte Folded Spill - pcalau12i $s7, %pc_hi20(R__align.ogcp2) - pcalau12i $s0, %pc_hi20(R__align.fgcp1) + st.d $a0, $sp, 200 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(R__align.ogcp2) + st.d $a0, $sp, 208 # 8-byte Folded Spill + pcalau12i $s7, %pc_hi20(R__align.fgcp1) pcalau12i $s3, %pc_hi20(R__align.fgcp2) + st.d $s4, $sp, 192 # 8-byte Folded Spill blez $s2, .LBB2_7 # %bb.5: - ld.d $a2, $sp, 184 # 8-byte Folded Reload + ld.d $s5, $sp, 176 # 8-byte Folded Reload blez $fp, .LBB2_8 # %bb.6: - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.w1) - move $fp, $a2 pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.w2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a0, $s6, %pc_lo12(R__align.match) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.initverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.lastverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.m) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 320 # 8-byte Folded Reload + ld.d $a0, $sp, 304 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.mp) pcaddu18i $ra, %call36(FreeIntVec) jirl $ra, $ra, 0 ld.d $a0, $s1, %pc_lo12(R__align.mseq) pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.digf1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 248 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.digf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 264 # 8-byte Folded Reload + ld.d $a0, $sp, 256 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.diaf1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.diaf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapz1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 232 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapz2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 280 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapf1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 216 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.ogcp1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $s7, %pc_lo12(R__align.ogcp2) + ld.d $a0, $sp, 208 # 8-byte Folded Reload + ld.d $a0, $a0, %pc_lo12(R__align.ogcp2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $s0, %pc_lo12(R__align.fgcp1) + ld.d $a0, $s7, %pc_lo12(R__align.fgcp1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a0, $s3, %pc_lo12(R__align.fgcp2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.ogcp1g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 328 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.ogcp2g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 312 # 8-byte Folded Reload + ld.d $a0, $sp, 296 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.fgcp1g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 296 # 8-byte Folded Reload + ld.d $a0, $sp, 288 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.fgcp2g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.cpmx1) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.cpmx2) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 344 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.floatwork) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 336 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.intwork) pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 - move $a2, $fp - ld.w $s2, $s4, %pc_lo12(R__align.orlgth1) - ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.w $s2, $s0, %pc_lo12(R__align.orlgth1) + ld.d $a0, $sp, 216 # 8-byte Folded Reload ld.w $fp, $a0, %pc_lo12(R__align.orlgth2) b .LBB2_8 .LBB2_7: - ld.d $a2, $sp, 184 # 8-byte Folded Reload + ld.d $s5, $sp, 176 # 8-byte Folded Reload .LBB2_8: - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI2_0) - movgr2fr.w $fa1, $s5 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + ld.d $a0, $sp, 48 # 8-byte Folded Reload + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, 314572 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 slt $a1, $a0, $s2 masknez $a0, $a0, $a1 maskeqz $a1, $s2, $a1 or $s2, $a1, $a0 - move $s8, $s4 - move $s4, $s7 - st.d $s3, $sp, 208 # 8-byte Folded Spill + st.d $s3, $sp, 184 # 8-byte Folded Spill addi.w $s3, $s2, 100 - movgr2fr.w $fa1, $a2 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $s5 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 slt $a1, $a0, $fp masknez $a0, $a0, $a1 maskeqz $a1, $fp, $a1 or $fp, $a1, $a0 + move $s4, $s0 + move $s0, $s7 addi.w $s7, $fp, 100 addi.w $s5, $fp, 102 move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 392 # 8-byte Folded Reload + ld.d $a1, $sp, 368 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.w1) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 376 # 8-byte Folded Reload + ld.d $a1, $sp, 360 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.w2) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -755,22 +752,22 @@ R__align: # @R__align move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 368 # 8-byte Folded Reload + ld.d $a1, $sp, 352 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.initverticalw) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a1, $sp, 72 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.lastverticalw) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 384 # 8-byte Folded Reload + ld.d $a1, $sp, 376 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.m) move $a0, $s5 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 320 # 8-byte Folded Reload + ld.d $a1, $sp, 304 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.mp) pcalau12i $a0, %got_pc_hi20(njob) ld.d $a0, $a0, %got_pc_lo12(njob) @@ -782,93 +779,95 @@ R__align: # @R__align move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 272 # 8-byte Folded Reload + ld.d $a1, $sp, 264 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.digf1) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 256 # 8-byte Folded Reload + ld.d $a1, $sp, 248 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.digf2) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 264 # 8-byte Folded Reload + ld.d $a1, $sp, 256 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.diaf1) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 240 # 8-byte Folded Reload + ld.d $a1, $sp, 232 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.diaf2) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 248 # 8-byte Folded Reload + ld.d $a1, $sp, 240 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.gapz1) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 232 # 8-byte Folded Reload + ld.d $a1, $sp, 224 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.gapz2) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 304 # 8-byte Folded Reload + ld.d $a1, $sp, 280 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.gapf1) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 344 # 8-byte Folded Reload + ld.d $a1, $sp, 328 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.gapf2) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 216 # 8-byte Folded Reload + ld.d $a1, $sp, 200 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.ogcp1) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - st.d $a0, $s4, %pc_lo12(R__align.ogcp2) + ld.d $a1, $sp, 208 # 8-byte Folded Reload + st.d $a0, $a1, %pc_lo12(R__align.ogcp2) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 st.d $a0, $s0, %pc_lo12(R__align.fgcp1) + ld.d $s8, $sp, 216 # 8-byte Folded Reload + move $s0, $s4 move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 208 # 8-byte Folded Reload + ld.d $a1, $sp, 184 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.fgcp2) - move $s4, $s8 move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 336 # 8-byte Folded Reload + ld.d $a1, $sp, 320 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.ogcp1g) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 328 # 8-byte Folded Reload + ld.d $a1, $sp, 312 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.ogcp2g) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 312 # 8-byte Folded Reload + ld.d $a1, $sp, 296 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.fgcp1g) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 296 # 8-byte Folded Reload + ld.d $a1, $sp, 288 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.fgcp2g) ori $a0, $zero, 26 move $a1, $s6 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 408 # 8-byte Folded Reload + ld.d $a1, $sp, 392 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.cpmx1) ori $a0, $zero, 26 move $a1, $s5 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 400 # 8-byte Folded Reload + ld.d $a1, $sp, 384 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.cpmx2) slt $a0, $s7, $s3 masknez $a1, $s7, $a0 @@ -879,48 +878,48 @@ R__align: # @R__align move $a0, $s3 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 360 # 8-byte Folded Reload + ld.d $a1, $sp, 344 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.floatwork) ori $a1, $zero, 27 move $a0, $s3 pcaddu18i $ra, %call36(AllocateIntMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 352 # 8-byte Folded Reload + ld.d $a1, $sp, 336 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(R__align.intwork) - st.w $s2, $s8, %pc_lo12(R__align.orlgth1) - ld.d $a0, $sp, 224 # 8-byte Folded Reload - st.w $fp, $a0, %pc_lo12(R__align.orlgth2) - ld.d $s6, $sp, 48 # 8-byte Folded Reload - ld.d $s8, $sp, 112 # 8-byte Folded Reload - ld.d $s3, $sp, 120 # 8-byte Folded Reload + st.w $s2, $s4, %pc_lo12(R__align.orlgth1) + st.w $fp, $s8, %pc_lo12(R__align.orlgth2) + ld.d $s6, $sp, 32 # 8-byte Folded Reload + ld.d $s8, $sp, 104 # 8-byte Folded Reload + ld.d $s3, $sp, 112 # 8-byte Folded Reload + ld.d $s5, $sp, 80 # 8-byte Folded Reload + ld.d $s4, $sp, 192 # 8-byte Folded Reload .LBB2_9: - ld.d $a0, $sp, 104 # 8-byte Folded Reload - ld.d $a5, $sp, 72 # 8-byte Folded Reload - ld.d $a6, $sp, 432 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload blez $a0, .LBB2_12 # %bb.10: # %.lr.ph ld.d $a0, $s1, %pc_lo12(R__align.mseq) - ld.d $a1, $sp, 96 # 8-byte Folded Reload + ld.d $a1, $sp, 88 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(R__align.mseq1) - ld.d $a2, $sp, 104 # 8-byte Folded Reload + ld.d $a2, $sp, 96 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_11: # =>This Inner Loop Header: Depth=1 ld.d $a4, $a0, 0 st.d $a4, $a1, 0 ld.d $a4, $s3, 0 - stx.b $zero, $a4, $a5 + stx.b $zero, $a4, $s5 addi.d $s3, $s3, 8 addi.d $a1, $a1, 8 addi.d $a2, $a2, -1 addi.d $a0, $a0, 8 bnez $a2, .LBB2_11 .LBB2_12: # %.preheader580 + ld.d $a5, $sp, 424 # 8-byte Folded Reload blez $s6, .LBB2_15 # %bb.13: # %.lr.ph592 ld.d $a1, $s1, %pc_lo12(R__align.mseq) - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.mseq2) - ld.d $a2, $sp, 104 # 8-byte Folded Reload + ld.d $a2, $sp, 96 # 8-byte Folded Reload alsl.d $a1, $a2, $a1, 3 move $a2, $s6 move $a3, $s8 @@ -929,7 +928,7 @@ R__align: # @R__align ld.d $a4, $a1, 0 st.d $a4, $a0, 0 ld.d $a4, $a3, 0 - stx.b $zero, $a4, $a6 + stx.b $zero, $a4, $a5 addi.d $a1, $a1, 8 addi.d $a3, $a3, 8 addi.d $a2, $a2, -1 @@ -937,7 +936,7 @@ R__align: # @R__align bnez $a2, .LBB2_14 .LBB2_15: # %._crit_edge ld.d $s7, $sp, 552 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 400 # 8-byte Folded Reload movgr2fr.w $fs0, $a0 pcalau12i $a0, %got_pc_hi20(commonAlloc1) ld.d $s1, $a0, %got_pc_lo12(commonAlloc1) @@ -963,9 +962,9 @@ R__align: # @R__align ld.d $a0, $a0, 0 pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 - ld.w $s2, $s4, %pc_lo12(R__align.orlgth1) + ld.w $s2, $s0, %pc_lo12(R__align.orlgth1) ld.w $a0, $s1, 0 - ld.d $a1, $sp, 224 # 8-byte Folded Reload + ld.d $a1, $sp, 216 # 8-byte Folded Reload ld.w $fp, $a1, %pc_lo12(R__align.orlgth2) ld.w $a1, $s3, 0 .LBB2_21: @@ -987,325 +986,318 @@ R__align: # @R__align st.w $s0, $s1, 0 st.w $fp, $s3, 0 .LBB2_22: - ld.d $a1, $sp, 408 # 8-byte Folded Reload + ld.d $a1, $sp, 392 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(R__align.cpmx1) ffint.s.w $fa0, $fs0 - vst $vr0, $sp, 416 # 16-byte Folded Spill - pcalau12i $s1, %pc_hi20(R__align.ijp) - st.d $a0, $s1, %pc_lo12(R__align.ijp) - ld.d $s0, $sp, 120 # 8-byte Folded Reload + vst $vr0, $sp, 400 # 16-byte Folded Spill + pcalau12i $s2, %pc_hi20(R__align.ijp) + st.d $a0, $s2, %pc_lo12(R__align.ijp) + ld.d $s0, $sp, 112 # 8-byte Folded Reload move $a0, $s0 - ld.d $s3, $sp, 280 # 8-byte Folded Reload - move $a2, $s3 - ld.d $s2, $sp, 72 # 8-byte Folded Reload - move $a3, $s2 - ld.d $fp, $sp, 104 # 8-byte Folded Reload + move $a2, $s4 + ld.d $s1, $sp, 80 # 8-byte Folded Reload + move $a3, $s1 + ld.d $fp, $sp, 96 # 8-byte Folded Reload move $a4, $fp pcaddu18i $ra, %call36(cpmx_calc_new) jirl $ra, $ra, 0 - ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(R__align.cpmx2) move $a0, $s8 - ld.d $s4, $sp, 288 # 8-byte Folded Reload + move $s5, $s4 + ld.d $s4, $sp, 272 # 8-byte Folded Reload move $a2, $s4 - ld.d $s5, $sp, 432 # 8-byte Folded Reload - move $a3, $s5 + ld.d $s3, $sp, 424 # 8-byte Folded Reload + move $a3, $s3 move $a4, $s6 pcaddu18i $ra, %call36(cpmx_calc_new) jirl $ra, $ra, 0 - ld.d $a0, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.ogcp1g) - st.d $s1, $sp, 32 # 8-byte Folded Spill + st.d $s2, $sp, 24 # 8-byte Folded Spill bnez $s7, .LBB2_24 # %bb.23: move $a1, $fp move $a2, $s0 - move $a3, $s3 - move $a4, $s2 + move $a3, $s5 + move $a4, $s1 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 328 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.ogcp2g) move $a1, $s6 move $a2, $s8 move $a3, $s4 - move $a4, $s5 + move $a4, $s3 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 312 # 8-byte Folded Reload + ld.d $a0, $sp, 296 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.fgcp1g) move $a1, $fp move $a2, $s0 - move $a3, $s3 - move $a4, $s2 + move $a3, $s5 + move $a4, $s1 pcaddu18i $ra, %call36(st_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 296 # 8-byte Folded Reload + ld.d $a0, $sp, 288 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.fgcp2g) move $a1, $s6 move $a2, $s8 move $a3, $s4 - move $a4, $s5 + move $a4, $s3 pcaddu18i $ra, %call36(st_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.digf1) move $a1, $fp move $a2, $s0 - move $a3, $s3 - move $a4, $s2 + move $a3, $s5 + move $a4, $s1 pcaddu18i $ra, %call36(getdigapfreq_st) jirl $ra, $ra, 0 - ld.d $a0, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 248 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.digf2) move $a1, $s6 move $a2, $s8 move $a3, $s4 - move $a4, $s5 + move $a4, $s3 pcaddu18i $ra, %call36(getdigapfreq_st) jirl $ra, $ra, 0 - ld.d $a0, $sp, 264 # 8-byte Folded Reload + ld.d $a0, $sp, 256 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.diaf1) move $a1, $fp move $a2, $s0 - move $a3, $s3 - move $a4, $s2 + move $a3, $s5 + move $a4, $s1 pcaddu18i $ra, %call36(getdiaminofreq_x) jirl $ra, $ra, 0 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.diaf2) move $a1, $s6 move $a2, $s8 move $a3, $s4 - move $a4, $s5 + move $a4, $s3 pcaddu18i $ra, %call36(getdiaminofreq_x) jirl $ra, $ra, 0 - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 280 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapf1) move $a1, $fp move $a2, $s0 - move $a3, $s3 - move $a4, $s2 + move $a3, $s5 + move $a4, $s1 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapf2) move $a1, $s6 move $a2, $s8 move $a3, $s4 - move $a4, $s5 + move $a4, $s3 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapz1) move $a1, $fp move $a2, $s0 - move $a3, $s3 - move $a4, $s2 + move $a3, $s5 + move $a4, $s1 pcaddu18i $ra, %call36(getgapfreq_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 232 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapz2) move $a1, $s6 move $a2, $s8 move $a3, $s4 - move $a4, $s5 + move $s2, $s3 + move $a4, $s3 pcaddu18i $ra, %call36(getgapfreq_zure) jirl $ra, $ra, 0 b .LBB2_25 .LBB2_24: - ld.d $a1, $sp, 576 - st.d $a1, $sp, 224 # 8-byte Folded Spill - ld.d $s0, $sp, 568 + ld.d $s2, $sp, 576 + ld.d $s3, $sp, 568 ld.d $s8, $sp, 560 move $a1, $fp - ld.d $a2, $sp, 120 # 8-byte Folded Reload - move $a3, $s3 - move $a4, $s2 + ld.d $a2, $sp, 112 # 8-byte Folded Reload + move $a3, $s5 + move $a4, $s1 move $a5, $s7 - move $a6, $s0 + move $a6, $s3 pcaddu18i $ra, %call36(new_OpeningGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 328 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.ogcp2g) move $a1, $s6 - ld.d $a2, $sp, 112 # 8-byte Folded Reload + ld.d $a2, $sp, 104 # 8-byte Folded Reload move $a3, $s4 - move $a4, $s5 + ld.d $a4, $sp, 424 # 8-byte Folded Reload move $a5, $s8 - ld.d $s1, $sp, 224 # 8-byte Folded Reload - move $a6, $s1 + move $a6, $s2 pcaddu18i $ra, %call36(new_OpeningGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 312 # 8-byte Folded Reload + ld.d $a0, $sp, 296 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.fgcp1g) move $a1, $fp - ld.d $a2, $sp, 120 # 8-byte Folded Reload - move $a3, $s3 - move $a4, $s2 + ld.d $a2, $sp, 112 # 8-byte Folded Reload + move $a3, $s5 + move $a4, $s1 move $a5, $s7 - move $a6, $s0 + move $a6, $s3 pcaddu18i $ra, %call36(new_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 296 # 8-byte Folded Reload + ld.d $a0, $sp, 288 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.fgcp2g) move $a1, $s6 - ld.d $a2, $sp, 112 # 8-byte Folded Reload + ld.d $a2, $sp, 104 # 8-byte Folded Reload move $a3, $s4 - move $a4, $s5 + ld.d $a4, $sp, 424 # 8-byte Folded Reload move $a5, $s8 - move $a6, $s1 + move $a6, $s2 pcaddu18i $ra, %call36(new_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.digf1) move $a1, $fp - ld.d $a2, $sp, 120 # 8-byte Folded Reload - move $a3, $s3 - move $a4, $s2 + ld.d $a2, $sp, 112 # 8-byte Folded Reload + move $a3, $s5 + move $a4, $s1 move $a5, $s7 - move $a6, $s0 + move $a6, $s3 pcaddu18i $ra, %call36(getdigapfreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 248 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.digf2) move $a1, $s6 - ld.d $a2, $sp, 112 # 8-byte Folded Reload + ld.d $a2, $sp, 104 # 8-byte Folded Reload move $a3, $s4 - move $a4, $s5 + ld.d $a4, $sp, 424 # 8-byte Folded Reload move $a5, $s8 - move $a6, $s1 + move $a6, $s2 pcaddu18i $ra, %call36(getdigapfreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 264 # 8-byte Folded Reload + ld.d $a0, $sp, 256 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.diaf1) move $a1, $fp - ld.d $a2, $sp, 120 # 8-byte Folded Reload - move $a3, $s3 - move $a4, $s2 + ld.d $a2, $sp, 112 # 8-byte Folded Reload + move $a3, $s5 + move $a4, $s1 move $a5, $s7 - move $a6, $s0 - ld.d $s0, $sp, 120 # 8-byte Folded Reload + move $a6, $s3 + ld.d $s3, $sp, 424 # 8-byte Folded Reload + ld.d $s0, $sp, 112 # 8-byte Folded Reload pcaddu18i $ra, %call36(getdiaminofreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 240 # 8-byte Folded Reload + ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.diaf2) move $a1, $s6 - ld.d $a2, $sp, 112 # 8-byte Folded Reload + ld.d $a2, $sp, 104 # 8-byte Folded Reload move $a3, $s4 - move $a4, $s5 + move $a4, $s3 move $a5, $s8 - move $a6, $s1 - ld.d $s1, $sp, 112 # 8-byte Folded Reload + move $a6, $s2 + ld.d $s2, $sp, 104 # 8-byte Folded Reload pcaddu18i $ra, %call36(getdiaminofreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 280 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapf1) move $a1, $fp move $a2, $s0 - move $a3, $s3 - move $a4, $s2 + move $a3, $s5 + move $a4, $s1 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapf2) move $a1, $s6 - move $a2, $s1 + move $a2, $s2 move $a3, $s4 - move $a4, $s5 + move $a4, $s3 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a0, $sp, 240 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapz1) move $a1, $fp move $a2, $s0 - move $a3, $s3 - move $a4, $s2 + move $a3, $s5 + move $a4, $s1 move $a5, $s7 pcaddu18i $ra, %call36(getgapfreq_zure_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 232 # 8-byte Folded Reload + ld.d $a0, $sp, 224 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.gapz2) move $a1, $s6 - move $a2, $s1 + move $a2, $s2 move $a3, $s4 - move $a4, $s5 + move $s2, $s3 + move $a4, $s3 move $a5, $s7 pcaddu18i $ra, %call36(getgapfreq_zure_part) jirl $ra, $ra, 0 .LBB2_25: - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $s8, $a0, %pc_lo12(R__align.w1) - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $s7, $a0, %pc_lo12(R__align.w2) - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.initverticalw) - ld.d $a1, $sp, 400 # 8-byte Folded Reload + ld.d $a1, $sp, 384 # 8-byte Folded Reload ld.d $fp, $a1, %pc_lo12(R__align.cpmx2) - ld.d $a1, $sp, 408 # 8-byte Folded Reload - ld.d $s4, $a1, %pc_lo12(R__align.cpmx1) - ld.d $a1, $sp, 360 # 8-byte Folded Reload - ld.d $s1, $a1, %pc_lo12(R__align.floatwork) - ld.d $a1, $sp, 352 # 8-byte Folded Reload - ld.d $s0, $a1, %pc_lo12(R__align.intwork) + ld.d $a1, $sp, 392 # 8-byte Folded Reload + ld.d $s0, $a1, %pc_lo12(R__align.cpmx1) + ld.d $a1, $sp, 344 # 8-byte Folded Reload + ld.d $s4, $a1, %pc_lo12(R__align.floatwork) + ld.d $a1, $sp, 336 # 8-byte Folded Reload + ld.d $s5, $a1, %pc_lo12(R__align.intwork) ori $a7, $zero, 1 - st.d $a0, $sp, 400 # 8-byte Folded Spill + st.d $a0, $sp, 392 # 8-byte Folded Spill move $a1, $fp - move $a2, $s4 + move $a2, $s0 move $a3, $zero - move $a4, $s2 - move $a5, $s1 - move $a6, $s0 + move $a4, $s1 + move $a5, $s4 + move $a6, $s5 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(impmtx) - st.d $a0, $sp, 152 # 8-byte Folded Spill - ld.d $s6, $sp, 184 # 8-byte Folded Reload + st.d $a0, $sp, 144 # 8-byte Folded Spill + ld.d $s6, $sp, 176 # 8-byte Folded Reload slli.d $s3, $s6, 32 addi.d $a0, $s6, 1 - st.d $a0, $sp, 56 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI2_2) - st.d $a0, $sp, 304 # 8-byte Folded Spill - vld $vr0, $sp, 416 # 16-byte Folded Reload + st.d $a0, $sp, 40 # 8-byte Folded Spill + vld $vr0, $sp, 400 # 16-byte Folded Reload fcvt.d.s $fs1, $fa0 - ld.d $a0, $sp, 40 # 8-byte Folded Reload - st.d $s4, $sp, 408 # 8-byte Folded Spill - move $s5, $a0 - st.d $fp, $sp, 392 # 8-byte Folded Spill + ld.d $a0, $sp, 56 # 8-byte Folded Reload + st.d $fp, $sp, 384 # 8-byte Folded Spill bnez $a0, .LBB2_27 # %bb.26: # %.critedge ori $a7, $zero, 1 move $a0, $s8 - move $a1, $s4 + move $a1, $s0 move $a2, $fp move $a3, $zero - ld.d $fp, $sp, 432 # 8-byte Folded Reload - move $a4, $fp - move $a5, $s1 - move $a6, $s0 + move $a4, $s2 + move $a5, $s4 + move $a6, $s5 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 - ld.d $t7, $sp, 400 # 8-byte Folded Reload + move $t4, $s6 + ld.d $t7, $sp, 392 # 8-byte Folded Reload move $t8, $s7 - move $s4, $s1 + move $s1, $s0 move $t2, $s8 - ld.d $t3, $sp, 32 # 8-byte Folded Reload - move $s8, $s5 - move $t1, $fp - ld.d $t4, $sp, 64 # 8-byte Folded Reload - move $t5, $s2 - ld.d $s2, $sp, 112 # 8-byte Folded Reload + ld.d $t3, $sp, 24 # 8-byte Folded Reload + ld.d $s8, $sp, 56 # 8-byte Folded Reload + ld.d $s6, $sp, 104 # 8-byte Folded Reload + lu12i.w $t5, 287172 b .LBB2_41 .LBB2_27: - move $s4, $s1 - move $s1, $s2 - blez $s2, .LBB2_30 + blez $s1, .LBB2_30 # %bb.28: # %.lr.ph.i - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(impmtx) - ld.d $a1, $sp, 64 # 8-byte Folded Reload + ld.d $a1, $sp, 48 # 8-byte Folded Reload bstrpick.d $a1, $a1, 30, 0 - ld.d $a2, $sp, 400 # 8-byte Folded Reload + ld.d $a2, $sp, 392 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_29: # =>This Inner Loop Header: Depth=1 ld.d $a3, $a0, 0 @@ -1321,13 +1313,13 @@ R__align: # @R__align ori $a7, $zero, 1 ori $fp, $zero, 1 move $a0, $s8 - ld.d $a1, $sp, 408 # 8-byte Folded Reload - ld.d $a2, $sp, 392 # 8-byte Folded Reload + move $s1, $s0 + move $a1, $s0 + ld.d $a2, $sp, 384 # 8-byte Folded Reload move $a3, $zero - ld.d $s2, $sp, 432 # 8-byte Folded Reload move $a4, $s2 move $a5, $s4 - move $a6, $s0 + move $a6, $s5 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 bnez $s2, .LBB2_32 @@ -1335,74 +1327,70 @@ R__align: # @R__align pcalau12i $a0, %got_pc_hi20(outgap) ld.d $a0, $a0, %got_pc_lo12(outgap) ld.w $a0, $a0, 0 - ld.d $t7, $sp, 400 # 8-byte Folded Reload + ld.d $t7, $sp, 392 # 8-byte Folded Reload move $t8, $s7 move $t2, $s8 - ld.d $t3, $sp, 32 # 8-byte Folded Reload - move $s8, $s5 - ld.d $t4, $sp, 64 # 8-byte Folded Reload - move $t5, $s1 - ld.d $s2, $sp, 112 # 8-byte Folded Reload - ld.d $t1, $sp, 432 # 8-byte Folded Reload + ld.d $t3, $sp, 24 # 8-byte Folded Reload + ld.d $s8, $sp, 56 # 8-byte Folded Reload + move $t4, $s6 + ld.d $s6, $sp, 104 # 8-byte Folded Reload + lu12i.w $t5, 287172 beq $a0, $fp, .LBB2_42 b .LBB2_54 .LBB2_32: # %.lr.ph.preheader.i - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(impmtx) ld.d $a5, $a0, 0 + move $t4, $s6 bstrpick.d $a3, $s6, 31, 0 ori $a0, $zero, 8 - ld.d $t7, $sp, 400 # 8-byte Folded Reload + ld.d $t7, $sp, 392 # 8-byte Folded Reload move $t8, $s7 bgeu $a3, $a0, .LBB2_34 # %bb.33: move $a1, $a5 - move $a0, $s6 + move $a0, $t4 move $t2, $s8 move $a2, $s8 - ld.d $t3, $sp, 32 # 8-byte Folded Reload - move $s8, $s5 - ld.d $t4, $sp, 64 # 8-byte Folded Reload - move $t5, $s1 - ld.d $s2, $sp, 112 # 8-byte Folded Reload - ld.d $t1, $sp, 432 # 8-byte Folded Reload + ld.d $t3, $sp, 24 # 8-byte Folded Reload + ld.d $s8, $sp, 56 # 8-byte Folded Reload + ld.d $s6, $sp, 104 # 8-byte Folded Reload + lu12i.w $t5, 287172 b .LBB2_40 .LBB2_34: # %vector.memcheck addi.w $a0, $zero, -4 lu32i.d $a0, 3 - alsl.d $a0, $s6, $a0, 2 + alsl.d $a0, $t4, $a0, 2 bstrpick.d $a0, $a0, 33, 2 slli.d $a0, $a0, 2 addi.d $a0, $a0, 4 add.d $a1, $a5, $a0 move $t2, $s8 - ld.d $s2, $sp, 112 # 8-byte Folded Reload + ld.d $s6, $sp, 104 # 8-byte Folded Reload bgeu $s8, $a1, .LBB2_37 # %bb.35: # %vector.memcheck add.d $a0, $t2, $a0 bgeu $a5, $a0, .LBB2_37 # %bb.36: move $a1, $a5 - move $a0, $s6 + move $a0, $t4 move $a2, $t2 - ld.d $t3, $sp, 32 # 8-byte Folded Reload - move $s8, $s5 - ld.d $t4, $sp, 64 # 8-byte Folded Reload - move $t5, $s1 - ld.d $t1, $sp, 432 # 8-byte Folded Reload + ld.d $t3, $sp, 24 # 8-byte Folded Reload + ld.d $s8, $sp, 56 # 8-byte Folded Reload + lu12i.w $t5, 287172 b .LBB2_40 .LBB2_37: # %vector.ph - bstrpick.d $a0, $s6, 31, 3 + bstrpick.d $a0, $t4, 31, 3 slli.d $a4, $a0, 3 slli.d $a2, $a0, 5 add.d $a1, $a5, $a2 - andi $a0, $s6, 7 + andi $a0, $t4, 7 add.d $a2, $t2, $a2 addi.d $a6, $t2, 16 addi.d $a5, $a5, 16 move $a7, $a4 - ld.d $t3, $sp, 32 # 8-byte Folded Reload - move $s8, $s5 + ld.d $t3, $sp, 24 # 8-byte Folded Reload + lu12i.w $t5, 287172 .p2align 4, , 16 .LBB2_38: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -1419,9 +1407,7 @@ R__align: # @R__align addi.d $a5, $a5, 32 bnez $a7, .LBB2_38 # %bb.39: # %middle.block - ld.d $t4, $sp, 64 # 8-byte Folded Reload - move $t5, $s1 - ld.d $t1, $sp, 432 # 8-byte Folded Reload + ld.d $s8, $sp, 56 # 8-byte Folded Reload beq $a3, $a4, .LBB2_41 .p2align 4, , 16 .LBB2_40: # %.lr.ph.i506 @@ -1442,9 +1428,9 @@ R__align: # @R__align ori $a1, $zero, 1 bne $a0, $a1, .LBB2_47 .LBB2_42: - ld.d $a0, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $a2, $a0, %pc_lo12(R__align.ogcp1g) - ld.d $a0, $sp, 328 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.ogcp2g) fld.s $fa0, $a2, 0 fld.s $fa1, $a0, 0 @@ -1460,9 +1446,9 @@ R__align: # @R__align fcvt.s.d $fa3, $fa3 fsub.d $fa0, $fa2, $fa0 fmul.d $fa0, $fa0, $fa1 - ld.d $a1, $sp, 312 # 8-byte Folded Reload - ld.d $a3, $a1, %pc_lo12(R__align.fgcp1g) ld.d $a1, $sp, 296 # 8-byte Folded Reload + ld.d $a3, $a1, %pc_lo12(R__align.fgcp1g) + ld.d $a1, $sp, 288 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(R__align.fgcp2g) fmul.d $fa0, $fa0, $fs1 fcvt.d.s $fa1, $fa3 @@ -1490,16 +1476,18 @@ R__align: # @R__align fld.s $fa1, $t2, 0 fadd.s $fa0, $fa1, $fa0 fst.s $fa0, $t2, 0 - vld $vr9, $sp, 416 # 16-byte Folded Reload - blez $t5, .LBB2_45 + ld.d $a4, $sp, 80 # 8-byte Folded Reload + vld $vr9, $sp, 400 # 16-byte Folded Reload + blez $a4, .LBB2_45 # %bb.43: # %.lr.ph603 - ld.d $a4, $sp, 232 # 8-byte Folded Reload + ld.d $a4, $sp, 224 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(R__align.gapz2) - ld.d $a5, $sp, 272 # 8-byte Folded Reload + ld.d $a5, $sp, 264 # 8-byte Folded Reload ld.d $a5, $a5, %pc_lo12(R__align.digf1) - ld.d $a6, $sp, 264 # 8-byte Folded Reload + ld.d $a6, $sp, 256 # 8-byte Folded Reload ld.d $a6, $a6, %pc_lo12(R__align.diaf1) - addi.d $a7, $t4, 1 + ld.d $a7, $sp, 48 # 8-byte Folded Reload + addi.d $a7, $a7, 1 bstrpick.d $a7, $a7, 31, 0 addi.d $a7, $a7, -1 movgr2fr.w $fa1, $zero @@ -1571,19 +1559,19 @@ R__align: # @R__align addi.d $t0, $t0, 4 bnez $a7, .LBB2_44 .LBB2_45: # %.preheader575 - bgtz $t1, .LBB2_64 + bgtz $s2, .LBB2_64 # %bb.46: # %.loopexit576.thread - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.d $a6, $a0, %pc_lo12(R__align.m) st.w $zero, $a6, 0 b .LBB2_62 .LBB2_47: # %.preheader579 - blez $t1, .LBB2_54 + blez $s2, .LBB2_54 # %bb.48: # %.lr.ph595 pcalau12i $a0, %got_pc_hi20(offset) ld.d $a0, $a0, %got_pc_lo12(offset) ld.w $a0, $a0, 0 - ld.d $a1, $sp, 56 # 8-byte Folded Reload + ld.d $a1, $sp, 40 # 8-byte Folded Reload bstrpick.d $a1, $a1, 31, 0 addi.d $a2, $a1, -1 ori $a4, $zero, 4 @@ -1596,8 +1584,8 @@ R__align: # @R__align move $a3, $a2 bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI2_1) - vld $vr1, $a5, %pc_lo12(.LCPI2_1) + pcalau12i $a5, %pc_hi20(.LCPI2_0) + vld $vr1, $a5, %pc_lo12(.LCPI2_0) addi.d $a5, $t2, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr2, $a6 @@ -1674,12 +1662,14 @@ R__align: # @R__align addi.d $a4, $a4, 4 bnez $a1, .LBB2_53 .LBB2_54: # %.preheader577 - blez $t5, .LBB2_61 + ld.d $a0, $sp, 80 # 8-byte Folded Reload + blez $a0, .LBB2_61 # %bb.55: # %.lr.ph598 pcalau12i $a0, %got_pc_hi20(offset) ld.d $a0, $a0, %got_pc_lo12(offset) ld.w $a0, $a0, 0 - addi.d $a1, $t4, 1 + ld.d $a1, $sp, 48 # 8-byte Folded Reload + addi.d $a1, $a1, 1 bstrpick.d $a1, $a1, 31, 0 addi.d $a2, $a1, -1 ori $a4, $zero, 4 @@ -1692,8 +1682,8 @@ R__align: # @R__align move $a3, $a2 bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI2_1) - vld $vr1, $a5, %pc_lo12(.LCPI2_1) + pcalau12i $a5, %pc_hi20(.LCPI2_0) + vld $vr1, $a5, %pc_lo12(.LCPI2_0) addi.d $a5, $t7, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr2, $a6 @@ -1770,28 +1760,27 @@ R__align: # @R__align addi.d $a4, $a4, 4 bnez $a1, .LBB2_60 .LBB2_61: # %.loopexit576 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.d $a6, $a0, %pc_lo12(R__align.m) st.w $zero, $a6, 0 - vld $vr9, $sp, 416 # 16-byte Folded Reload - bgtz $t1, .LBB2_67 + vld $vr9, $sp, 400 # 16-byte Folded Reload + bgtz $s2, .LBB2_67 .LBB2_62: # %._crit_edge612 ori $a0, $zero, 1 - bnez $t1, .LBB2_75 + bnez $s2, .LBB2_75 # %bb.63: movgr2fr.w $fa0, $zero ori $a1, $zero, 1 - st.d $a1, $sp, 384 # 8-byte Folded Spill - ld.d $a3, $sp, 408 # 8-byte Folded Reload + st.d $a1, $sp, 376 # 8-byte Folded Spill b .LBB2_76 .LBB2_64: # %.lr.ph607 - ld.d $a2, $sp, 248 # 8-byte Folded Reload + ld.d $a2, $sp, 240 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(R__align.gapz1) - ld.d $a3, $sp, 256 # 8-byte Folded Reload + ld.d $a3, $sp, 248 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(R__align.digf2) - ld.d $a4, $sp, 240 # 8-byte Folded Reload + ld.d $a4, $sp, 232 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(R__align.diaf2) - ld.d $a5, $sp, 56 # 8-byte Folded Reload + ld.d $a5, $sp, 40 # 8-byte Folded Reload bstrpick.d $a5, $a5, 31, 0 addi.d $a5, $a5, -1 movgr2fr.w $fa1, $zero @@ -1863,13 +1852,13 @@ R__align: # @R__align addi.d $a6, $a6, 4 bnez $a5, .LBB2_65 # %bb.66: # %.loopexit576.thread774 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload ld.d $a6, $a0, %pc_lo12(R__align.m) st.w $zero, $a6, 0 .LBB2_67: # %.lr.ph611 - ld.d $a0, $sp, 320 # 8-byte Folded Reload + ld.d $a0, $sp, 304 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.mp) - ld.d $a1, $sp, 56 # 8-byte Folded Reload + ld.d $a1, $sp, 40 # 8-byte Folded Reload bstrpick.d $a1, $a1, 31, 0 addi.d $a2, $a1, -1 ori $a4, $zero, 8 @@ -1891,8 +1880,7 @@ R__align: # @R__align vreplvei.w $vr0, $vr9, 0 addi.d $a6, $a0, 20 vrepli.b $vr1, 0 - lu12i.w $a7, 287172 - vreplgr2vr.w $vr2, $a7 + vreplgr2vr.w $vr2, $t5 move $a7, $a4 .p2align 4, , 16 .LBB2_70: # %vector.body847 @@ -1915,11 +1903,10 @@ R__align: # @R__align move $a6, $fp beq $a2, $a4, .LBB2_74 .LBB2_72: # %scalar.ph840.preheader - ld.d $a2, $sp, 304 # 8-byte Folded Reload - fld.s $fa0, $a2, %pc_lo12(.LCPI2_2) slli.d $a2, $a3, 2 addi.d $a4, $t2, -4 sub.d $a1, $a1, $a3 + movgr2fr.w $fa0, $t5 .p2align 4, , 16 .LBB2_73: # %scalar.ph840 # =>This Inner Loop Header: Depth=1 @@ -1933,122 +1920,122 @@ R__align: # @R__align .LBB2_74: move $a0, $zero .LBB2_75: # %._crit_edge612.thread - ld.d $a3, $sp, 408 # 8-byte Folded Reload ori $a1, $zero, 0 lu32i.d $a1, -1 add.d $a1, $s3, $a1 srai.d $a1, $a1, 30 fldx.s $fa0, $t2, $a1 - st.d $a0, $sp, 384 # 8-byte Folded Spill + st.d $a0, $sp, 376 # 8-byte Folded Spill move $a0, $zero .LBB2_76: - ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a1, $sp, 72 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(R__align.lastverticalw) - st.d $a1, $sp, 376 # 8-byte Folded Spill + st.d $a1, $sp, 368 # 8-byte Folded Spill fst.s $fa0, $a1, 0 pcalau12i $a1, %got_pc_hi20(outgap) - ld.d $s1, $a1, %got_pc_lo12(outgap) - ld.w $a1, $s1, 0 + ld.d $a1, $a1, %got_pc_lo12(outgap) + st.d $a1, $sp, 16 # 8-byte Folded Spill + ld.w $a1, $a1, 0 sltu $a2, $zero, $a1 - add.w $a4, $a2, $t4 + ld.d $a3, $sp, 48 # 8-byte Folded Reload + add.w $a3, $a2, $a3 ori $a2, $zero, 2 - st.d $a4, $sp, 368 # 8-byte Folded Spill - blt $a4, $a2, .LBB2_110 + st.d $a3, $sp, 360 # 8-byte Folded Spill + blt $a3, $a2, .LBB2_110 # %bb.77: # %.lr.ph633 - st.d $s1, $sp, 16 # 8-byte Folded Spill - st.d $s0, $sp, 352 # 8-byte Folded Spill - st.d $s4, $sp, 360 # 8-byte Folded Spill + st.d $s5, $sp, 336 # 8-byte Folded Spill + st.d $s4, $sp, 344 # 8-byte Folded Spill + st.d $s1, $sp, 352 # 8-byte Folded Spill sltui $a1, $s8, 1 or $a0, $a1, $a0 - st.d $a0, $sp, 344 # 8-byte Folded Spill - ld.d $a0, $sp, 152 # 8-byte Folded Reload + st.d $a0, $sp, 328 # 8-byte Folded Spill + ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(impmtx) - st.d $a0, $sp, 288 # 8-byte Folded Spill - ld.d $a0, $t3, %pc_lo12(R__align.ijp) st.d $a0, $sp, 280 # 8-byte Folded Spill + ld.d $a0, $t3, %pc_lo12(R__align.ijp) + st.d $a0, $sp, 272 # 8-byte Folded Spill + ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a3, $a0, %pc_lo12(R__align.mp) ld.d $a0, $sp, 320 # 8-byte Folded Reload - ld.d $a4, $a0, %pc_lo12(R__align.mp) - ld.d $a0, $sp, 336 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.ogcp1g) - st.d $a0, $sp, 320 # 8-byte Folded Spill - ld.d $a0, $sp, 328 # 8-byte Folded Reload - ld.d $a5, $a0, %pc_lo12(R__align.ogcp2g) + st.d $a0, $sp, 304 # 8-byte Folded Spill ld.d $a0, $sp, 312 # 8-byte Folded Reload - ld.d $a0, $a0, %pc_lo12(R__align.fgcp1g) - st.d $a0, $sp, 312 # 8-byte Folded Spill + ld.d $a4, $a0, %pc_lo12(R__align.ogcp2g) ld.d $a0, $sp, 296 # 8-byte Folded Reload - ld.d $a7, $a0, %pc_lo12(R__align.fgcp2g) - ld.d $a0, $sp, 248 # 8-byte Folded Reload - ld.d $a0, $a0, %pc_lo12(R__align.gapz1) - st.d $a0, $sp, 248 # 8-byte Folded Spill - ld.d $a0, $sp, 256 # 8-byte Folded Reload - ld.d $t0, $a0, %pc_lo12(R__align.digf2) + ld.d $a0, $a0, %pc_lo12(R__align.fgcp1g) + st.d $a0, $sp, 296 # 8-byte Folded Spill + ld.d $a0, $sp, 288 # 8-byte Folded Reload + ld.d $a5, $a0, %pc_lo12(R__align.fgcp2g) ld.d $a0, $sp, 240 # 8-byte Folded Reload - ld.d $t1, $a0, %pc_lo12(R__align.diaf2) + ld.d $a0, $a0, %pc_lo12(R__align.gapz1) + st.d $a0, $sp, 240 # 8-byte Folded Spill + ld.d $a0, $sp, 248 # 8-byte Folded Reload + ld.d $a7, $a0, %pc_lo12(R__align.digf2) ld.d $a0, $sp, 232 # 8-byte Folded Reload - ld.d $t3, $a0, %pc_lo12(R__align.gapz2) - ld.d $a0, $sp, 272 # 8-byte Folded Reload - ld.d $a0, $a0, %pc_lo12(R__align.digf1) - st.d $a0, $sp, 216 # 8-byte Folded Spill + ld.d $t0, $a0, %pc_lo12(R__align.diaf2) + ld.d $a0, $sp, 224 # 8-byte Folded Reload + ld.d $t1, $a0, %pc_lo12(R__align.gapz2) ld.d $a0, $sp, 264 # 8-byte Folded Reload - ld.d $a0, $a0, %pc_lo12(R__align.diaf1) + ld.d $a0, $a0, %pc_lo12(R__align.digf1) st.d $a0, $sp, 208 # 8-byte Folded Spill + ld.d $a0, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $a0, %pc_lo12(R__align.diaf1) + st.d $a0, $sp, 200 # 8-byte Folded Spill ori $a0, $zero, 0 lu32i.d $a0, -1 add.d $a0, $s3, $a0 srai.d $a0, $a0, 30 - st.d $a0, $sp, 336 # 8-byte Folded Spill - ld.d $a0, $sp, 56 # 8-byte Folded Reload + st.d $a0, $sp, 320 # 8-byte Folded Spill + ld.d $a0, $sp, 40 # 8-byte Folded Reload bstrpick.d $a0, $a0, 31, 0 - bstrpick.d $a1, $s6, 30, 0 - st.d $a1, $sp, 200 # 8-byte Folded Spill - addi.w $a1, $s6, -1 + bstrpick.d $a1, $t4, 30, 0 st.d $a1, $sp, 192 # 8-byte Folded Spill + addi.w $a1, $t4, -1 + st.d $a1, $sp, 184 # 8-byte Folded Spill bstrpick.d $a1, $a1, 31, 0 slli.d $a2, $a1, 2 addi.d $a2, $a2, 4 - st.d $a2, $sp, 176 # 8-byte Folded Spill + st.d $a2, $sp, 168 # 8-byte Folded Spill addi.d $a1, $a1, 1 - st.d $a1, $sp, 144 # 8-byte Folded Spill + st.d $a1, $sp, 136 # 8-byte Folded Spill bstrpick.d $a1, $a1, 32, 3 slli.d $a2, $a1, 3 slli.d $a1, $a1, 5 - st.d $a1, $sp, 136 # 8-byte Folded Spill - st.d $a2, $sp, 168 # 8-byte Folded Spill - sub.d $a1, $s6, $a2 st.d $a1, $sp, 128 # 8-byte Folded Spill - st.d $a6, $sp, 296 # 8-byte Folded Spill + st.d $a2, $sp, 160 # 8-byte Folded Spill + sub.d $a1, $t4, $a2 + st.d $a1, $sp, 120 # 8-byte Folded Spill + st.d $a6, $sp, 288 # 8-byte Folded Spill addi.d $fp, $a6, 8 - st.d $t1, $sp, 232 # 8-byte Folded Spill - addi.d $s4, $t1, 8 - st.d $t0, $sp, 240 # 8-byte Folded Spill - addi.d $s1, $t0, 8 - st.d $a7, $sp, 256 # 8-byte Folded Spill - addi.d $s2, $a7, 8 - st.d $a5, $sp, 264 # 8-byte Folded Spill - addi.d $s6, $a5, 8 + st.d $t0, $sp, 224 # 8-byte Folded Spill + addi.d $s4, $t0, 8 + st.d $a7, $sp, 232 # 8-byte Folded Spill + addi.d $s1, $a7, 8 + st.d $a5, $sp, 248 # 8-byte Folded Spill + addi.d $s2, $a5, 8 + movgr2fr.w $fs2, $t5 + st.d $a4, $sp, 256 # 8-byte Folded Spill + addi.d $s6, $a4, 8 addi.d $a0, $a0, -2 - st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $a0, $sp, 152 # 8-byte Folded Spill movgr2fr.w $fs0, $zero - st.d $t3, $sp, 224 # 8-byte Folded Spill - addi.d $s7, $t3, 12 - st.d $a4, $sp, 272 # 8-byte Folded Spill - addi.d $s3, $a4, 8 + st.d $t1, $sp, 216 # 8-byte Folded Spill + addi.d $s7, $t1, 12 + st.d $a3, $sp, 264 # 8-byte Folded Spill + addi.d $s3, $a3, 8 ori $s5, $zero, 1 - move $a1, $a3 - st.d $s3, $sp, 328 # 8-byte Folded Spill + st.d $s3, $sp, 312 # 8-byte Folded Spill b .LBB2_79 .p2align 4, , 16 .LBB2_78: # %._crit_edge625 # in Loop: Header=BB2_79 Depth=1 - ld.d $a1, $sp, 336 # 8-byte Folded Reload + ld.d $a1, $sp, 320 # 8-byte Folded Reload fldx.s $fa0, $s0, $a1 addi.d $s5, $s5, 1 - ld.d $a1, $sp, 376 # 8-byte Folded Reload + ld.d $a1, $sp, 368 # 8-byte Folded Reload fstx.s $fa0, $a1, $a0 move $t2, $s0 - ld.d $a1, $sp, 408 # 8-byte Folded Reload - ld.d $a0, $sp, 368 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload beq $s5, $a0, .LBB2_111 .LBB2_79: # =>This Loop Header: Depth=1 # Child Loop BB2_108 Depth 2 @@ -2066,29 +2053,30 @@ R__align: # @R__align move $s8, $t2 fst.s $fa0, $t2, 0 move $a0, $t8 - ld.d $a2, $sp, 392 # 8-byte Folded Reload + ld.d $a1, $sp, 352 # 8-byte Folded Reload + ld.d $a2, $sp, 384 # 8-byte Folded Reload move $a3, $s5 - ld.d $a4, $sp, 432 # 8-byte Folded Reload - ld.d $a5, $sp, 360 # 8-byte Folded Reload - ld.d $a6, $sp, 352 # 8-byte Folded Reload + ld.d $a4, $sp, 424 # 8-byte Folded Reload + ld.d $a5, $sp, 344 # 8-byte Folded Reload + ld.d $a6, $sp, 336 # 8-byte Folded Reload move $a7, $zero pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 move $a5, $s0 - vld $vr17, $sp, 416 # 16-byte Folded Reload - ld.d $a0, $sp, 344 # 8-byte Folded Reload + vld $vr17, $sp, 400 # 16-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload bnez $a0, .LBB2_85 # %bb.80: # %.lr.ph.preheader.i508 # in Loop: Header=BB2_79 Depth=1 slli.d $a0, $s5, 3 - ld.d $a1, $sp, 288 # 8-byte Folded Reload + ld.d $a1, $sp, 280 # 8-byte Folded Reload ldx.d $a3, $a1, $a0 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload ori $a1, $zero, 7 bltu $a0, $a1, .LBB2_83 # %bb.81: # %vector.memcheck856 # in Loop: Header=BB2_79 Depth=1 - ld.d $a1, $sp, 176 # 8-byte Folded Reload + ld.d $a1, $sp, 168 # 8-byte Folded Reload add.d $a0, $a3, $a1 bgeu $a5, $a0, .LBB2_107 # %bb.82: # %vector.memcheck856 @@ -2097,7 +2085,7 @@ R__align: # @R__align bgeu $a3, $a0, .LBB2_107 .LBB2_83: # in Loop: Header=BB2_79 Depth=1 move $a0, $a3 - ld.d $a2, $sp, 184 # 8-byte Folded Reload + ld.d $a2, $sp, 176 # 8-byte Folded Reload move $a1, $a5 .p2align 4, , 16 .LBB2_84: # %.lr.ph.i509 @@ -2115,12 +2103,12 @@ R__align: # @R__align .LBB2_85: # %imp_match_out_veadR.exit514 # in Loop: Header=BB2_79 Depth=1 slli.d $a0, $s5, 2 - ld.d $t7, $sp, 400 # 8-byte Folded Reload + ld.d $t7, $sp, 392 # 8-byte Folded Reload fldx.s $fa0, $t7, $a0 move $s0, $a5 fst.s $fa0, $a5, 0 move $t8, $s8 - ld.d $a1, $sp, 384 # 8-byte Folded Reload + ld.d $a1, $sp, 376 # 8-byte Folded Reload vldi $vr18, -912 vldi $vr19, -928 ori $s8, $zero, 1 @@ -2129,22 +2117,20 @@ R__align: # @R__align move $s1, $s2 move $s2, $s6 move $s6, $s3 - ld.d $s3, $sp, 328 # 8-byte Folded Reload + ld.d $s3, $sp, 312 # 8-byte Folded Reload bnez $a1, .LBB2_78 # %bb.86: # %.lr.ph624 # in Loop: Header=BB2_79 Depth=1 - fld.s $fa0, $t8, 0 - ld.d $a1, $sp, 304 # 8-byte Folded Reload - fld.s $fa1, $a1, %pc_lo12(.LCPI2_2) - fmadd.s $fa1, $ft9, $fa1, $fa0 + fld.s $fa1, $t8, 0 + fmadd.s $fa0, $ft9, $fs2, $fa1 slli.d $a1, $s5, 3 - ld.d $a2, $sp, 320 # 8-byte Folded Reload + ld.d $a2, $sp, 304 # 8-byte Folded Reload fldx.s $fa4, $a2, $a0 - ld.d $a2, $sp, 264 # 8-byte Folded Reload + ld.d $a2, $sp, 256 # 8-byte Folded Reload fld.s $fa2, $a2, 4 - ld.d $a2, $sp, 280 # 8-byte Folded Reload + ld.d $a2, $sp, 272 # 8-byte Folded Reload ldx.d $t1, $a2, $a1 - ld.d $a1, $sp, 248 # 8-byte Folded Reload + ld.d $a1, $sp, 240 # 8-byte Folded Reload alsl.d $a1, $s5, $a1, 2 fcvt.d.s $ft2, $fa4 fcvt.d.s $fa5, $fa2 @@ -2153,14 +2139,14 @@ R__align: # @R__align fmul.d $fa3, $fa3, $fs1 fmul.d $fa3, $fa3, $ft11 fcvt.s.d $fa3, $fa3 - fadd.s $fa3, $fa0, $fa3 + fadd.s $fa3, $fa1, $fa3 fsub.d $fa7, $ft10, $ft2 fmul.d $fa6, $fa7, $fa5 fmul.d $fa6, $fa6, $fs1 fmul.d $ft0, $fa6, $ft11 - ld.d $a2, $sp, 312 # 8-byte Folded Reload + ld.d $a2, $sp, 296 # 8-byte Folded Reload fldx.s $fa6, $a2, $a0 - ld.d $a2, $sp, 256 # 8-byte Folded Reload + ld.d $a2, $sp, 248 # 8-byte Folded Reload fld.s $ft3, $a2, 4 fcvt.s.d $ft0, $ft0 fadd.s $ft0, $fa3, $ft0 @@ -2180,10 +2166,10 @@ R__align: # @R__align fcvt.s.d $ft5, $ft5 fadd.s $fs0, $ft0, $ft5 fcvt.d.s $ft0, $ft6 - ld.d $a2, $sp, 240 # 8-byte Folded Reload + ld.d $a2, $sp, 232 # 8-byte Folded Reload fld.s $ft5, $a2, 4 fadd.d $ft4, $ft4, $fa5 - ld.d $a2, $sp, 232 # 8-byte Folded Reload + ld.d $a2, $sp, 224 # 8-byte Folded Reload fld.s $fa5, $a2, 4 fsub.d $ft6, $ft10, $ft0 fcvt.d.s $ft5, $ft5 @@ -2195,7 +2181,7 @@ R__align: # @R__align fmul.d $ft0, $ft0, $ft11 fmul.d $ft0, $ft0, $fs1 fcvt.s.d $ft0, $ft0 - fadd.s $ft0, $fa1, $ft0 + fadd.s $ft0, $fa0, $ft0 fcmp.cule.s $fcc0, $ft0, $fs0 st.w $zero, $t1, 4 bcnez $fcc0, .LBB2_88 @@ -2205,11 +2191,11 @@ R__align: # @R__align st.w $a2, $t1, 4 fmov.s $fs0, $ft0 .LBB2_88: # in Loop: Header=BB2_79 Depth=1 - ld.d $a6, $sp, 224 # 8-byte Folded Reload + ld.d $a6, $sp, 216 # 8-byte Folded Reload fld.s $ft4, $a6, 8 - ld.d $a2, $sp, 216 # 8-byte Folded Reload + ld.d $a2, $sp, 208 # 8-byte Folded Reload alsl.d $a2, $s5, $a2, 2 - ld.d $a3, $sp, 208 # 8-byte Folded Reload + ld.d $a3, $sp, 200 # 8-byte Folded Reload alsl.d $a3, $s5, $a3, 2 fld.s $ft0, $a1, 0 fcvt.d.s $ft4, $ft4 @@ -2223,7 +2209,7 @@ R__align: # @R__align fsub.d $ft3, $ft5, $ft3 fmul.d $ft4, $ft3, $ft4 fmadd.d $ft4, $ft6, $ft2, $ft4 - ld.d $a5, $sp, 296 # 8-byte Folded Reload + ld.d $a5, $sp, 288 # 8-byte Folded Reload fld.s $ft2, $a5, 4 fmul.d $ft4, $ft4, $ft11 fmul.d $ft4, $ft4, $fs1 @@ -2232,7 +2218,7 @@ R__align: # @R__align fcmp.cule.s $fcc0, $ft4, $fs0 bcnez $fcc0, .LBB2_90 # %bb.89: # in Loop: Header=BB2_79 Depth=1 - ld.d $a4, $sp, 272 # 8-byte Folded Reload + ld.d $a4, $sp, 264 # 8-byte Folded Reload ld.w $a4, $a4, 4 sub.d $a4, $s5, $a4 st.w $a4, $t1, 4 @@ -2247,7 +2233,7 @@ R__align: # @R__align fmul.d $fa7, $fa7, $ft11 fmul.d $fa7, $fa7, $fs1 fcvt.s.d $fa7, $fa7 - fadd.s $fa7, $fa0, $fa7 + fadd.s $fa7, $fa1, $fa7 fcmp.cult.s $fcc0, $fa7, $ft2 bceqz $fcc0, .LBB2_93 # %bb.91: # in Loop: Header=BB2_79 Depth=1 @@ -2265,22 +2251,22 @@ R__align: # @R__align .p2align 4, , 16 .LBB2_93: # in Loop: Header=BB2_79 Depth=1 fst.s $fa7, $a5, 4 - ld.d $a4, $sp, 272 # 8-byte Folded Reload + ld.d $a4, $sp, 264 # 8-byte Folded Reload st.w $ra, $a4, 4 move $a4, $s0 .LBB2_94: # in Loop: Header=BB2_79 Depth=1 fld.s $fa4, $a4, 4 fadd.s $fa4, $fs0, $fa4 fst.s $fa4, $a4, 4 - ld.d $a4, $sp, 200 # 8-byte Folded Reload + ld.d $a4, $sp, 192 # 8-byte Folded Reload beq $a4, $s8, .LBB2_78 # %bb.95: # %.peel.next # in Loop: Header=BB2_79 Depth=1 move $a4, $zero move $t5, $zero - ld.d $a5, $sp, 320 # 8-byte Folded Reload + ld.d $a5, $sp, 304 # 8-byte Folded Reload alsl.d $a5, $s5, $a5, 2 - ld.d $a6, $sp, 312 # 8-byte Folded Reload + ld.d $a6, $sp, 296 # 8-byte Folded Reload alsl.d $a6, $s5, $a6, 2 fcvt.d.s $fa4, $ft0 fsub.d $fa6, $ft10, $fa4 @@ -2290,15 +2276,15 @@ R__align: # @R__align fmul.d $fa2, $fa2, $ft11 fmul.d $fa2, $fa2, $fs1 fcvt.s.d $fa2, $fa2 - fadd.s $fa0, $fa0, $fa2 - fcmp.cle.s $fcc0, $fa1, $fa0 - fsel $fa0, $fa1, $fa0, $fcc0 + fadd.s $fa1, $fa1, $fa2 + fcmp.cle.s $fcc0, $fa0, $fa1 + fsel $fa0, $fa0, $fa1, $fcc0 addi.d $a7, $t8, 4 addi.d $t0, $s0, 8 addi.d $t1, $t1, 8 ori $t2, $zero, 1 addi.d $t3, $zero, -2 - ld.d $t4, $sp, 160 # 8-byte Folded Reload + ld.d $t4, $sp, 152 # 8-byte Folded Reload b .LBB2_98 .p2align 4, , 16 .LBB2_96: # in Loop: Header=BB2_98 Depth=2 @@ -2448,12 +2434,12 @@ R__align: # @R__align b .LBB2_97 .LBB2_107: # %vector.ph864 # in Loop: Header=BB2_79 Depth=1 - ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload add.d $a0, $a3, $a1 add.d $a1, $a5, $a1 addi.d $a2, $a5, 16 addi.d $a3, $a3, 16 - ld.d $a4, $sp, 168 # 8-byte Folded Reload + ld.d $a4, $sp, 160 # 8-byte Folded Reload .p2align 4, , 16 .LBB2_108: # %vector.body868 # Parent Loop BB2_79 Depth=1 @@ -2472,34 +2458,34 @@ R__align: # @R__align bnez $a4, .LBB2_108 # %bb.109: # %middle.block879 # in Loop: Header=BB2_79 Depth=1 - ld.d $a2, $sp, 128 # 8-byte Folded Reload - ld.d $a3, $sp, 144 # 8-byte Folded Reload - ld.d $a4, $sp, 168 # 8-byte Folded Reload + ld.d $a2, $sp, 120 # 8-byte Folded Reload + ld.d $a3, $sp, 136 # 8-byte Folded Reload + ld.d $a4, $sp, 160 # 8-byte Folded Reload bne $a3, $a4, .LBB2_84 b .LBB2_85 .LBB2_110: movgr2fr.w $fs0, $zero + ld.d $t0, $sp, 80 # 8-byte Folded Reload beqz $a1, .LBB2_112 b .LBB2_126 .LBB2_111: # %._crit_edge634.loopexit - ld.d $s1, $sp, 16 # 8-byte Folded Reload - ld.w $a1, $s1, 0 + ld.d $a0, $sp, 16 # 8-byte Folded Reload + ld.w $a1, $a0, 0 move $t2, $s0 - ld.d $s2, $sp, 112 # 8-byte Folded Reload - ld.d $s8, $sp, 40 # 8-byte Folded Reload - ld.d $t4, $sp, 64 # 8-byte Folded Reload - ld.d $t5, $sp, 72 # 8-byte Folded Reload - ld.d $s6, $sp, 184 # 8-byte Folded Reload - ld.d $t3, $sp, 32 # 8-byte Folded Reload + ld.d $s6, $sp, 104 # 8-byte Folded Reload + ld.d $s8, $sp, 56 # 8-byte Folded Reload + ld.d $t4, $sp, 176 # 8-byte Folded Reload + ld.d $t3, $sp, 24 # 8-byte Folded Reload + ld.d $t0, $sp, 80 # 8-byte Folded Reload bnez $a1, .LBB2_126 .LBB2_112: # %.preheader574 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload bnez $a0, .LBB2_119 # %bb.113: # %.lr.ph639 pcalau12i $a0, %got_pc_hi20(offset) ld.d $a0, $a0, %got_pc_lo12(offset) ld.w $a0, $a0, 0 - ld.d $a1, $sp, 56 # 8-byte Folded Reload + ld.d $a1, $sp, 40 # 8-byte Folded Reload bstrpick.d $a1, $a1, 31, 0 addi.d $a2, $a1, -1 ori $a4, $zero, 4 @@ -2512,11 +2498,11 @@ R__align: # @R__align move $a3, $a2 bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 - vreplgr2vr.d $vr1, $s6 - pcalau12i $a5, %pc_hi20(.LCPI2_3) - vld $vr2, $a5, %pc_lo12(.LCPI2_3) - pcalau12i $a5, %pc_hi20(.LCPI2_4) - vld $vr3, $a5, %pc_lo12(.LCPI2_4) + vreplgr2vr.d $vr1, $t4 + pcalau12i $a5, %pc_hi20(.LCPI2_1) + vld $vr2, $a5, %pc_lo12(.LCPI2_1) + pcalau12i $a5, %pc_hi20(.LCPI2_2) + vld $vr3, $a5, %pc_lo12(.LCPI2_2) addi.d $a5, $t2, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr4, $a6 @@ -2577,7 +2563,7 @@ R__align: # @R__align # %bb.116: # %middle.block900 beq $a2, $a4, .LBB2_119 .LBB2_117: # %scalar.ph884.preheader - sub.w $a2, $s6, $a3 + sub.w $a2, $t4, $a3 mul.d $a2, $a0, $a2 alsl.d $a4, $a3, $t2, 2 sub.d $a1, $a1, $a3 @@ -2598,9 +2584,10 @@ R__align: # @R__align addi.d $a4, $a4, 4 bnez $a1, .LBB2_118 .LBB2_119: # %.preheader573 - blez $t5, .LBB2_126 + blez $t0, .LBB2_126 # %bb.120: # %.lr.ph642 - bstrpick.d $a0, $t4, 31, 0 + ld.d $a2, $sp, 48 # 8-byte Folded Reload + bstrpick.d $a0, $a2, 31, 0 pcalau12i $a1, %got_pc_hi20(offset) ld.d $a1, $a1, %got_pc_lo12(offset) ld.w $a1, $a1, 0 @@ -2608,10 +2595,10 @@ R__align: # @R__align ffint.d.w $fa1, $fa0 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(R__align.lastverticalw) fneg.d $fa1, $fa1 - addi.d $a0, $t4, 1 + addi.d $a0, $a2, 1 bstrpick.d $a2, $a0, 31, 0 addi.d $a3, $a2, -1 ori $a4, $zero, 2 @@ -2677,49 +2664,50 @@ R__align: # @R__align bnez $a2, .LBB2_125 .LBB2_126: # %.loopexit move $s0, $t2 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(R__align.lastverticalw) - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload ld.d $s5, $a0, %pc_lo12(R__align.mseq1) - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload ld.d $s3, $a0, %pc_lo12(R__align.mseq2) ld.d $a0, $t3, %pc_lo12(R__align.ijp) - st.d $a0, $sp, 408 # 8-byte Folded Spill - ld.d $a0, $sp, 120 # 8-byte Folded Reload + st.d $a0, $sp, 392 # 8-byte Folded Spill + ld.d $a0, $sp, 112 # 8-byte Folded Reload ld.d $a0, $a0, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - ld.d $a1, $s2, 0 + ld.d $a1, $s6, 0 move $s7, $a0 move $a0, $a1 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 move $s6, $a0 add.w $a0, $a0, $s7 - st.d $a0, $sp, 416 # 8-byte Folded Spill + st.d $a0, $sp, 424 # 8-byte Folded Spill addi.w $s4, $a0, 1 move $a0, $s4 pcaddu18i $ra, %call36(AllocateCharVec) jirl $ra, $ra, 0 - move $s2, $a0 + move $s1, $a0 move $a0, $s4 pcaddu18i $ra, %call36(AllocateCharVec) jirl $ra, $ra, 0 - ld.w $a2, $s1, 0 + ld.d $a1, $sp, 16 # 8-byte Folded Reload + ld.w $a2, $a1, 0 addi.w $a1, $s7, 0 - st.d $a1, $sp, 432 # 8-byte Folded Spill + st.d $a1, $sp, 400 # 8-byte Folded Spill addi.w $a1, $s6, 0 - st.d $s2, $sp, 400 # 8-byte Folded Spill - st.d $a0, $sp, 392 # 8-byte Folded Spill + st.d $s1, $sp, 384 # 8-byte Folded Spill + st.d $a0, $sp, 376 # 8-byte Folded Spill ori $a3, $zero, 1 bnez $s8, .LBB2_133 # %bb.127: - ld.d $s1, $sp, 408 # 8-byte Folded Reload + ld.d $s2, $sp, 392 # 8-byte Folded Reload beq $a2, $a3, .LBB2_144 # %bb.128: move $a7, $s0 fld.s $fa0, $fp, 0 - ld.d $a2, $sp, 432 # 8-byte Folded Reload + ld.d $a2, $sp, 400 # 8-byte Folded Reload blez $a2, .LBB2_139 # %bb.129: # %.lr.ph.i568 slli.d $a2, $s7, 3 @@ -2741,17 +2729,17 @@ R__align: # @R__align fcmp.cult.s $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB2_130 # %bb.132: # in Loop: Header=BB2_131 Depth=1 - ldx.d $a6, $s1, $a2 + ldx.d $a6, $s2, $a2 stx.w $a5, $a6, $a3 fmov.s $fa0, $fa1 b .LBB2_130 .LBB2_133: - ld.d $ra, $sp, 408 # 8-byte Folded Reload + ld.d $ra, $sp, 392 # 8-byte Folded Reload beq $a2, $a3, .LBB2_152 # %bb.134: move $a7, $s0 fld.s $fa0, $fp, 0 - ld.d $a2, $sp, 432 # 8-byte Folded Reload + ld.d $a2, $sp, 400 # 8-byte Folded Reload blez $a2, .LBB2_147 # %bb.135: # %.lr.ph.i515 slli.d $a2, $s7, 3 @@ -2799,12 +2787,12 @@ R__align: # @R__align fcmp.cult.s $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB2_141 # %bb.143: # in Loop: Header=BB2_142 Depth=1 - ldx.d $a6, $s1, $a2 + ldx.d $a6, $s2, $a2 stx.w $a5, $a6, $a3 fmov.s $fa0, $fa1 b .LBB2_141 .LBB2_144: # %.loopexit.i521 - ld.d $a2, $sp, 432 # 8-byte Folded Reload + ld.d $a2, $sp, 400 # 8-byte Folded Reload bltz $a2, .LBB2_160 # %bb.145: # %.lr.ph10.preheader.i523 addi.d $a3, $s7, 1 @@ -2841,7 +2829,7 @@ R__align: # @R__align fmov.s $fa0, $fa1 b .LBB2_149 .LBB2_152: # %.loopexit.i - ld.d $a2, $sp, 432 # 8-byte Folded Reload + ld.d $a2, $sp, 400 # 8-byte Folded Reload bltz $a2, .LBB2_168 # %bb.153: # %.lr.ph10.preheader.i addi.d $a3, $s7, 1 @@ -2852,11 +2840,11 @@ R__align: # @R__align move $a3, $zero b .LBB2_166 .LBB2_155: # %vector.ph992 - pcalau12i $a4, %pc_hi20(.LCPI2_5) - vld $vr0, $a4, %pc_lo12(.LCPI2_5) + pcalau12i $a4, %pc_hi20(.LCPI2_3) + vld $vr0, $a4, %pc_lo12(.LCPI2_3) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 - addi.d $a4, $s1, 16 + addi.d $a4, $s2, 16 move $a5, $a3 .p2align 4, , 16 .LBB2_156: # %vector.body995 @@ -2881,7 +2869,7 @@ R__align: # @R__align # %bb.157: # %middle.block1003 beq $a2, $a3, .LBB2_160 .LBB2_158: # %.lr.ph10.i525.preheader - alsl.d $a4, $a3, $s1, 3 + alsl.d $a4, $a3, $s2, 3 sub.d $a2, $a2, $a3 addi.d $a3, $a3, 1 .p2align 4, , 16 @@ -2896,7 +2884,7 @@ R__align: # @R__align .LBB2_160: # %.preheader1.i529 bltz $a1, .LBB2_176 # %bb.161: # %.lr.ph13.i530 - ld.d $a2, $s1, 0 + ld.d $a2, $s2, 0 addi.d $a4, $s6, 1 bstrpick.d $a3, $a4, 31, 0 ori $a5, $zero, 8 @@ -2905,8 +2893,8 @@ R__align: # @R__align move $a4, $zero b .LBB2_174 .LBB2_163: # %vector.ph918 - pcalau12i $a4, %pc_hi20(.LCPI2_5) - vld $vr0, $a4, %pc_lo12(.LCPI2_5) + pcalau12i $a4, %pc_hi20(.LCPI2_3) + vld $vr0, $a4, %pc_lo12(.LCPI2_3) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 addi.d $a4, $ra, 16 @@ -2961,8 +2949,8 @@ R__align: # @R__align .LBB2_171: # %vector.ph1008 bstrpick.d $a4, $a4, 31, 3 slli.d $a4, $a4, 3 - pcalau12i $a5, %pc_hi20(.LCPI2_6) - vld $vr0, $a5, %pc_lo12(.LCPI2_6) + pcalau12i $a5, %pc_hi20(.LCPI2_4) + vld $vr0, $a5, %pc_lo12(.LCPI2_4) addi.d $a5, $a2, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 @@ -2993,15 +2981,15 @@ R__align: # @R__align addi.d $a5, $a5, -1 bnez $a3, .LBB2_175 .LBB2_176: # %._crit_edge.i535 - ld.d $a3, $sp, 432 # 8-byte Folded Reload - add.d $a2, $s2, $a3 + ld.d $a3, $sp, 400 # 8-byte Folded Reload + add.d $a2, $s1, $a3 add.d $s8, $a2, $a1 stx.b $zero, $a2, $a1 add.d $a0, $a0, $a3 add.d $s4, $a0, $a1 stx.b $zero, $a0, $a1 - ld.d $s2, $sp, 416 # 8-byte Folded Reload - bltz $s2, .LBB2_206 + ld.d $a0, $sp, 424 # 8-byte Folded Reload + bltz $a0, .LBB2_206 # %bb.177: # %.lr.ph41.i536.preheader move $a5, $zero addi.w $a0, $zero, -1 @@ -3021,7 +3009,7 @@ R__align: # @R__align # Child Loop BB2_190 Depth 2 addi.w $a7, $s7, 0 slli.d $a6, $a7, 3 - ldx.d $a6, $s1, $a6 + ldx.d $a6, $s2, $a6 addi.w $t0, $s6, 0 slli.d $t1, $t0, 2 ldx.w $t1, $a6, $t1 @@ -3189,18 +3177,19 @@ R__align: # @R__align addi.w $a5, $a5, 2 st.b $a1, $t2, -1 move $s7, $a6 - bge $s2, $a5, .LBB2_178 + ld.d $a6, $sp, 424 # 8-byte Folded Reload + bge $a6, $a5, .LBB2_178 .LBB2_206: # %._crit_edge42.i547 - ld.d $s6, $sp, 104 # 8-byte Folded Reload + ld.d $s6, $sp, 96 # 8-byte Folded Reload bgtz $s6, .LBB2_208 b .LBB2_210 .LBB2_207: move $s8, $t3 move $s4, $t2 - ld.d $s6, $sp, 104 # 8-byte Folded Reload + ld.d $s6, $sp, 96 # 8-byte Folded Reload blez $s6, .LBB2_210 .LBB2_208: # %.lr.ph50.preheader.i556 - ld.d $fp, $sp, 120 # 8-byte Folded Reload + ld.d $fp, $sp, 112 # 8-byte Folded Reload move $s0, $s6 .p2align 4, , 16 .LBB2_209: # %.lr.ph50.i558 @@ -3215,8 +3204,8 @@ R__align: # @R__align addi.d $s5, $s5, 8 bnez $s0, .LBB2_209 .LBB2_210: # %.preheader.i549 - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s2, $sp, 112 # 8-byte Folded Reload + ld.d $s1, $sp, 32 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload blez $s1, .LBB2_258 # %bb.211: # %.lr.ph52.preheader.i550 move $fp, $s2 @@ -3237,8 +3226,8 @@ R__align: # @R__align .LBB2_213: # %vector.ph933 bstrpick.d $a5, $a5, 31, 3 slli.d $a5, $a5, 3 - pcalau12i $a6, %pc_hi20(.LCPI2_6) - vld $vr0, $a6, %pc_lo12(.LCPI2_6) + pcalau12i $a6, %pc_hi20(.LCPI2_4) + vld $vr0, $a6, %pc_lo12(.LCPI2_4) addi.d $a6, $a3, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 @@ -3269,15 +3258,15 @@ R__align: # @R__align addi.d $a6, $a6, -1 bnez $a4, .LBB2_217 .LBB2_218: # %._crit_edge.i - ld.d $a4, $sp, 432 # 8-byte Folded Reload - add.d $a3, $s2, $a4 + ld.d $a4, $sp, 400 # 8-byte Folded Reload + add.d $a3, $s1, $a4 add.d $s8, $a3, $a1 stx.b $zero, $a3, $a1 add.d $a3, $a0, $a4 add.d $s4, $a3, $a1 stx.b $zero, $a3, $a1 st.w $zero, $a2, 0 - ld.d $a0, $sp, 416 # 8-byte Folded Reload + ld.d $a0, $sp, 424 # 8-byte Folded Reload bltz $a0, .LBB2_251 # %bb.219: # %.lr.ph41.i.preheader move $t0, $zero @@ -3361,7 +3350,6 @@ R__align: # @R__align vhaddw.d.w $vr3, $vr3, $vr3 vhaddw.q.d $vr3, $vr3, $vr3 vpickve2gr.d $t0, $vr3, 0 - ld.d $a0, $sp, 416 # 8-byte Folded Reload bne $s1, $s0, .LBB2_231 b .LBB2_233 .p2align 4, , 16 @@ -3384,8 +3372,8 @@ R__align: # @R__align bnez $fp, .LBB2_232 .LBB2_233: # %._crit_edge30.i # in Loop: Header=BB2_220 Depth=1 - ld.d $fp, $sp, 432 # 8-byte Folded Reload - bne $t2, $fp, .LBB2_246 + ld.d $a0, $sp, 400 # 8-byte Folded Reload + bne $t2, $a0, .LBB2_246 b .LBB2_248 .p2align 4, , 16 .LBB2_234: # in Loop: Header=BB2_220 Depth=1 @@ -3425,7 +3413,7 @@ R__align: # @R__align bnez $ra, .LBB2_239 # %bb.240: # %middle.block985 # in Loop: Header=BB2_220 Depth=1 - ld.d $ra, $sp, 408 # 8-byte Folded Reload + ld.d $ra, $sp, 392 # 8-byte Folded Reload bne $s2, $s1, .LBB2_242 b .LBB2_244 .p2align 4, , 16 @@ -3454,19 +3442,18 @@ R__align: # @R__align add.d $t0, $t7, $t0 move $s8, $s0 move $s4, $fp - ld.d $a0, $sp, 416 # 8-byte Folded Reload bne $t4, $a1, .LBB2_225 .LBB2_245: # in Loop: Header=BB2_220 Depth=1 move $t8, $s8 move $t7, $s4 - ld.d $fp, $sp, 432 # 8-byte Folded Reload - beq $t2, $fp, .LBB2_248 + ld.d $a0, $sp, 400 # 8-byte Folded Reload + beq $t2, $a0, .LBB2_248 .LBB2_246: # %._crit_edge30.i # in Loop: Header=BB2_220 Depth=1 addi.w $fp, $s6, 0 beq $t3, $fp, .LBB2_248 # %bb.247: # in Loop: Header=BB2_220 Depth=1 - ld.d $fp, $sp, 152 # 8-byte Folded Reload + ld.d $fp, $sp, 144 # 8-byte Folded Reload ld.d $fp, $fp, %pc_lo12(impmtx) ldx.d $t5, $fp, $t5 fldx.s $fa3, $t5, $t6 @@ -3485,18 +3472,19 @@ R__align: # @R__align addi.w $t0, $t0, 2 st.b $a3, $t7, -1 move $s7, $t1 + ld.d $a0, $sp, 424 # 8-byte Folded Reload bge $a0, $t0, .LBB2_220 .LBB2_251: # %._crit_edge42.i - ld.d $s6, $sp, 104 # 8-byte Folded Reload + ld.d $s6, $sp, 96 # 8-byte Folded Reload bgtz $s6, .LBB2_253 b .LBB2_255 .LBB2_252: move $s8, $t8 move $s4, $t7 - ld.d $s6, $sp, 104 # 8-byte Folded Reload + ld.d $s6, $sp, 96 # 8-byte Folded Reload blez $s6, .LBB2_255 .LBB2_253: # %.lr.ph50.preheader.i - ld.d $fp, $sp, 120 # 8-byte Folded Reload + ld.d $fp, $sp, 112 # 8-byte Folded Reload move $s0, $s6 .p2align 4, , 16 .LBB2_254: # %.lr.ph50.i @@ -3511,8 +3499,8 @@ R__align: # @R__align addi.d $s5, $s5, 8 bnez $s0, .LBB2_254 .LBB2_255: # %.preheader.i - ld.d $s1, $sp, 48 # 8-byte Folded Reload - ld.d $s2, $sp, 112 # 8-byte Folded Reload + ld.d $s1, $sp, 32 # 8-byte Folded Reload + ld.d $s2, $sp, 104 # 8-byte Folded Reload blez $s1, .LBB2_258 # %bb.256: # %.lr.ph52.preheader.i move $fp, $s2 @@ -3530,20 +3518,20 @@ R__align: # @R__align addi.d $s3, $s3, 8 bnez $s0, .LBB2_257 .LBB2_258: # %Atracking_localhom.exit - ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 376 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(R__align.mseq1) ld.d $a0, $a0, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 addi.w $a3, $a0, 0 lu12i.w $a4, 1220 - ld.d $a2, $sp, 24 # 8-byte Folded Reload + ld.d $a2, $sp, 8 # 8-byte Folded Reload blt $a2, $a3, .LBB2_260 # %bb.259: # %Atracking_localhom.exit ori $a0, $a4, 2881 @@ -3562,10 +3550,10 @@ R__align: # @R__align pcaddu18i $ra, %call36(ErrorExit) jirl $ra, $ra, 0 .LBB2_261: - ld.d $s0, $sp, 120 # 8-byte Folded Reload + ld.d $s0, $sp, 112 # 8-byte Folded Reload blez $s6, .LBB2_264 # %bb.262: # %.lr.ph645 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(R__align.mseq1) .p2align 4, , 16 .LBB2_263: # =>This Inner Loop Header: Depth=1 @@ -3580,7 +3568,7 @@ R__align: # @R__align .LBB2_264: # %.preheader blez $s1, .LBB2_267 # %bb.265: # %.lr.ph647 - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(R__align.mseq2) .p2align 4, , 16 .LBB2_266: # =>This Inner Loop Header: Depth=1 @@ -3603,6 +3591,7 @@ R__align: # @R__align pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 fmov.s $fa0, $fs0 + fld.d $fs2, $sp, 432 # 8-byte Folded Reload fld.d $fs1, $sp, 440 # 8-byte Folded Reload fld.d $fs0, $sp, 448 # 8-byte Folded Reload ld.d $s8, $sp, 456 # 8-byte Folded Reload diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/SAalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/SAalignmm.s index 7da804e6..c834748e 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/SAalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/SAalignmm.s @@ -1,14 +1,10 @@ .file "SAalignmm.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Aalign -.LCPI0_0: - .dword 0x3ff199999999999a # double 1.1000000000000001 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI0_1: + .p2align 4, 0x0 # -- Begin function Aalign +.LCPI0_0: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI0_2: +.LCPI0_1: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -672,8 +668,8 @@ Aalign: # @Aalign move $a3, $zero b .LBB0_71 .LBB0_68: # %vector.ph338 - pcalau12i $a4, %pc_hi20(.LCPI0_1) - vld $vr0, $a4, %pc_lo12(.LCPI0_1) + pcalau12i $a4, %pc_hi20(.LCPI0_0) + vld $vr0, $a4, %pc_lo12(.LCPI0_0) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 addi.d $a4, $s6, 16 @@ -727,8 +723,8 @@ Aalign: # @Aalign .LBB0_76: # %vector.ph351 bstrpick.d $a3, $a3, 31, 3 slli.d $a3, $a3, 3 - pcalau12i $a4, %pc_hi20(.LCPI0_2) - vld $vr0, $a4, %pc_lo12(.LCPI0_2) + pcalau12i $a4, %pc_hi20(.LCPI0_1) + vld $vr0, $a4, %pc_lo12(.LCPI0_1) addi.d $a4, $a1, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 @@ -1135,14 +1131,17 @@ Aalign: # @Aalign ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.w $s3, $a0, %pc_lo12(Aalign.orlgth2) .LBB0_132: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) ld.d $a0, $sp, 96 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, 104857 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 slt $a1, $a0, $s8 masknez $a0, $a0, $a1 maskeqz $a1, $s8, $a1 @@ -1150,9 +1149,9 @@ Aalign: # @Aalign or $s4, $a1, $a0 addi.w $s8, $s4, 100 ld.d $a0, $sp, 88 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 slt $a1, $a0, $s3 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Salignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Salignmm.s index 3e9bab95..66e782df 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Salignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/Salignmm.s @@ -451,27 +451,23 @@ imp_match_init_strict: # @imp_match_init_strict .Lfunc_end2: .size imp_match_init_strict, .Lfunc_end2-imp_match_init_strict # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function A__align -.LCPI3_0: - .dword 0x3ff4cccccccccccd # double 1.3 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI3_1: + .p2align 4, 0x0 # -- Begin function A__align +.LCPI3_0: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI3_2: +.LCPI3_1: .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI3_3: +.LCPI3_2: .dword 1 # 0x1 .dword 2 # 0x2 -.LCPI3_4: +.LCPI3_3: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI3_5: +.LCPI3_4: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -659,22 +655,25 @@ A__align: # @A__align .LBB3_7: ld.d $s1, $sp, 256 # 8-byte Folded Reload .LBB3_8: - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI3_0) ld.d $a0, $sp, 112 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, 314572 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 slt $a1, $a0, $s4 masknez $a0, $a0, $a1 maskeqz $a1, $s4, $a1 or $s4, $a1, $a0 addi.w $s0, $s4, 100 - movgr2fr.w $fa1, $s1 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $s1 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 slt $a1, $a0, $s7 @@ -1481,8 +1480,8 @@ A__align: # @A__align move $a3, $a2 bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI3_1) - vld $vr1, $a5, %pc_lo12(.LCPI3_1) + pcalau12i $a5, %pc_hi20(.LCPI3_0) + vld $vr1, $a5, %pc_lo12(.LCPI3_0) addi.d $a5, $s5, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr2, $a6 @@ -1577,8 +1576,8 @@ A__align: # @A__align move $a3, $a2 bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI3_1) - vld $vr1, $a5, %pc_lo12(.LCPI3_1) + pcalau12i $a5, %pc_hi20(.LCPI3_0) + vld $vr1, $a5, %pc_lo12(.LCPI3_0) ld.d $a5, $sp, 312 # 8-byte Folded Reload addi.d $a5, $a5, 4 lu52i.d $a6, $zero, -1026 @@ -1996,10 +1995,10 @@ A__align: # @A__align bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 vreplgr2vr.d $vr1, $t3 + pcalau12i $a5, %pc_hi20(.LCPI3_1) + vld $vr2, $a5, %pc_lo12(.LCPI3_1) pcalau12i $a5, %pc_hi20(.LCPI3_2) - vld $vr2, $a5, %pc_lo12(.LCPI3_2) - pcalau12i $a5, %pc_hi20(.LCPI3_3) - vld $vr3, $a5, %pc_lo12(.LCPI3_3) + vld $vr3, $a5, %pc_lo12(.LCPI3_2) addi.d $a5, $s5, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr4, $a6 @@ -2321,8 +2320,8 @@ A__align: # @A__align move $a3, $zero b .LBB3_152 .LBB3_149: # %vector.ph765 - pcalau12i $a4, %pc_hi20(.LCPI3_4) - vld $vr0, $a4, %pc_lo12(.LCPI3_4) + pcalau12i $a4, %pc_hi20(.LCPI3_3) + vld $vr0, $a4, %pc_lo12(.LCPI3_3) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 addi.d $a4, $s8, 16 @@ -2377,8 +2376,8 @@ A__align: # @A__align .LBB3_157: # %vector.ph780 bstrpick.d $a5, $a5, 31, 3 slli.d $a5, $a5, 3 - pcalau12i $a6, %pc_hi20(.LCPI3_5) - vld $vr0, $a6, %pc_lo12(.LCPI3_5) + pcalau12i $a6, %pc_hi20(.LCPI3_4) + vld $vr0, $a6, %pc_lo12(.LCPI3_4) addi.d $a6, $a3, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 @@ -3767,27 +3766,23 @@ Atracking: # @Atracking .Lfunc_end5: .size Atracking, .Lfunc_end5-Atracking # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function A__align_gapmap -.LCPI6_0: - .dword 0x3ff4cccccccccccd # double 1.3 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI6_1: + .p2align 4, 0x0 # -- Begin function A__align_gapmap +.LCPI6_0: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI6_2: +.LCPI6_1: .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI6_3: +.LCPI6_2: .dword 1 # 0x1 .dword 2 # 0x2 -.LCPI6_4: +.LCPI6_3: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI6_5: +.LCPI6_4: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -3895,7 +3890,7 @@ A__align_gapmap: # @A__align_gapmap # %bb.3: bge $s3, $s8, .LBB6_9 .LBB6_4: - pcalau12i $fp, %pc_hi20(A__align_gapmap.match) + pcalau12i $s6, %pc_hi20(A__align_gapmap.match) blez $s4, .LBB6_7 # %bb.5: ld.d $s0, $sp, 56 # 8-byte Folded Reload @@ -3909,7 +3904,7 @@ A__align_gapmap: # @A__align_gapmap ld.d $a0, $a0, %pc_lo12(A__align_gapmap.w2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $fp, %pc_lo12(A__align_gapmap.match) + ld.d $a0, $s6, %pc_lo12(A__align_gapmap.match) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a0, $sp, 240 # 8-byte Folded Reload @@ -3969,22 +3964,25 @@ A__align_gapmap: # @A__align_gapmap .LBB6_7: ld.d $s0, $sp, 56 # 8-byte Folded Reload .LBB6_8: - pcalau12i $a0, %pc_hi20(.LCPI6_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI6_0) - movgr2fr.w $fa1, $s0 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + movgr2fr.w $fa0, $s0 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, 314572 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 slt $a1, $a0, $s4 masknez $a0, $a0, $a1 maskeqz $a1, $s4, $a1 or $s4, $a1, $a0 addi.w $s0, $s4, 100 ld.d $a0, $sp, 216 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 slt $a1, $a0, $s3 @@ -4008,7 +4006,7 @@ A__align_gapmap: # @A__align_gapmap move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - st.d $a0, $fp, %pc_lo12(A__align_gapmap.match) + st.d $a0, $s6, %pc_lo12(A__align_gapmap.match) addi.w $s6, $s4, 102 move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -4478,8 +4476,8 @@ A__align_gapmap: # @A__align_gapmap move $a3, $a2 bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI6_1) - vld $vr1, $a5, %pc_lo12(.LCPI6_1) + pcalau12i $a5, %pc_hi20(.LCPI6_0) + vld $vr1, $a5, %pc_lo12(.LCPI6_0) addi.d $a5, $s5, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr2, $a6 @@ -4648,8 +4646,8 @@ A__align_gapmap: # @A__align_gapmap move $a3, $a2 bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI6_1) - vld $vr1, $a5, %pc_lo12(.LCPI6_1) + pcalau12i $a5, %pc_hi20(.LCPI6_0) + vld $vr1, $a5, %pc_lo12(.LCPI6_0) ld.d $a5, $sp, 280 # 8-byte Folded Reload addi.d $a5, $a5, 4 lu52i.d $a6, $zero, -1026 @@ -5011,10 +5009,10 @@ A__align_gapmap: # @A__align_gapmap vreplgr2vr.w $vr0, $a0 ld.d $a5, $sp, 216 # 8-byte Folded Reload vreplgr2vr.d $vr1, $a5 + pcalau12i $a5, %pc_hi20(.LCPI6_1) + vld $vr2, $a5, %pc_lo12(.LCPI6_1) pcalau12i $a5, %pc_hi20(.LCPI6_2) - vld $vr2, $a5, %pc_lo12(.LCPI6_2) - pcalau12i $a5, %pc_hi20(.LCPI6_3) - vld $vr3, $a5, %pc_lo12(.LCPI6_3) + vld $vr3, $a5, %pc_lo12(.LCPI6_2) addi.d $a5, $s5, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr4, $a6 @@ -5337,8 +5335,8 @@ A__align_gapmap: # @A__align_gapmap move $a3, $zero b .LBB6_132 .LBB6_129: # %vector.ph689 - pcalau12i $a4, %pc_hi20(.LCPI6_4) - vld $vr0, $a4, %pc_lo12(.LCPI6_4) + pcalau12i $a4, %pc_hi20(.LCPI6_3) + vld $vr0, $a4, %pc_lo12(.LCPI6_3) bstrpick.d $a3, $a3, 31, 2 slli.d $a3, $a3, 2 addi.d $a4, $s2, 16 @@ -5394,8 +5392,8 @@ A__align_gapmap: # @A__align_gapmap .LBB6_137: # %vector.ph704 bstrpick.d $a5, $a5, 31, 3 slli.d $a5, $a5, 3 - pcalau12i $a6, %pc_hi20(.LCPI6_5) - vld $vr0, $a6, %pc_lo12(.LCPI6_5) + pcalau12i $a6, %pc_hi20(.LCPI6_4) + vld $vr0, $a6, %pc_lo12(.LCPI6_4) addi.d $a6, $a3, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/constants.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/constants.s index 9964c85b..f6e1dd6b 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/constants.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/constants.s @@ -475,10 +475,6 @@ .LCPI0_157: .dword 0x4045000000000000 # double 42 .dword 0x0000000000000000 # double 0 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_158: - .dword 0x4079000000000000 # double 400 .text .globl JTTmtx .p2align 5 @@ -1084,104 +1080,106 @@ JTTmtx: # @JTTmtx vstx $vr3, $a5, $a1 ori $a5, $zero, 2216 vstx $vr4, $a5, $a1 - lu32i.d $a0, 163840 - pcalau12i $a5, %pc_hi20(.LCPI0_142) - vld $vr3, $a5, %pc_lo12(.LCPI0_142) - lu52i.d $a0, $a0, 1028 - ori $a5, $zero, 2248 - add.d $a5, $sp, $a5 - stptr.d $a0, $a5, 0 - ori $a0, $zero, 2368 - vstx $vr3, $a0, $a1 + ori $a5, $zero, 0 + lu32i.d $a5, 163840 + pcalau12i $a6, %pc_hi20(.LCPI0_142) + vld $vr3, $a6, %pc_lo12(.LCPI0_142) + lu52i.d $a5, $a5, 1028 + ori $a6, $zero, 2248 + add.d $a6, $sp, $a6 + stptr.d $a5, $a6, 0 + ori $a5, $zero, 2368 + vstx $vr3, $a5, $a1 vreplgr2vr.d $vr3, $a2 - pcalau12i $a0, %pc_hi20(.LCPI0_143) - vld $vr4, $a0, %pc_lo12(.LCPI0_143) - ori $a0, $zero, 2384 - vstx $vr3, $a0, $a1 - ori $a0, $zero, 2536 - vstx $vr4, $a0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI0_144) - vld $vr3, $a0, %pc_lo12(.LCPI0_144) - pcalau12i $a0, %pc_hi20(.LCPI0_145) - vld $vr4, $a0, %pc_lo12(.LCPI0_145) - ori $a0, $zero, 2552 - vstx $vr3, $a0, $a1 - ori $a0, $zero, 2704 - vstx $vr4, $a0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI0_146) - vld $vr3, $a0, %pc_lo12(.LCPI0_146) - ori $a0, $zero, 2864 - pcalau12i $a2, %pc_hi20(.LCPI0_147) - vld $vr4, $a2, %pc_lo12(.LCPI0_147) - vstx $vr3, $a0, $a1 + pcalau12i $a2, %pc_hi20(.LCPI0_143) + vld $vr4, $a2, %pc_lo12(.LCPI0_143) + ori $a2, $zero, 2384 + vstx $vr3, $a2, $a1 + ori $a2, $zero, 2536 + vstx $vr4, $a2, $a1 + pcalau12i $a2, %pc_hi20(.LCPI0_144) + vld $vr3, $a2, %pc_lo12(.LCPI0_144) + pcalau12i $a2, %pc_hi20(.LCPI0_145) + vld $vr4, $a2, %pc_lo12(.LCPI0_145) + ori $a2, $zero, 2552 + vstx $vr3, $a2, $a1 + ori $a2, $zero, 2704 + vstx $vr4, $a2, $a1 + pcalau12i $a2, %pc_hi20(.LCPI0_146) + vld $vr3, $a2, %pc_lo12(.LCPI0_146) + ori $a2, $zero, 2864 + pcalau12i $a5, %pc_hi20(.LCPI0_147) + vld $vr4, $a5, %pc_lo12(.LCPI0_147) + vstx $vr3, $a2, $a1 st.d $zero, $sp, 16 vst $vr1, $sp, 184 vst $vr4, $sp, 352 st.d $zero, $sp, 520 st.d $zero, $sp, 688 - pcalau12i $a0, %pc_hi20(.LCPI0_148) - vld $vr3, $a0, %pc_lo12(.LCPI0_148) - pcalau12i $a0, %pc_hi20(.LCPI0_149) - vld $vr4, $a0, %pc_lo12(.LCPI0_149) - pcalau12i $a0, %pc_hi20(.LCPI0_150) - vld $vr5, $a0, %pc_lo12(.LCPI0_150) + pcalau12i $a2, %pc_hi20(.LCPI0_148) + vld $vr3, $a2, %pc_lo12(.LCPI0_148) + pcalau12i $a2, %pc_hi20(.LCPI0_149) + vld $vr4, $a2, %pc_lo12(.LCPI0_149) + pcalau12i $a2, %pc_hi20(.LCPI0_150) + vld $vr5, $a2, %pc_lo12(.LCPI0_150) vst $vr3, $sp, 856 vst $vr4, $sp, 1024 vst $vr0, $sp, 1192 vst $vr5, $sp, 1360 - pcalau12i $a0, %pc_hi20(.LCPI0_151) - vld $vr3, $a0, %pc_lo12(.LCPI0_151) - pcalau12i $a0, %pc_hi20(.LCPI0_152) - vld $vr4, $a0, %pc_lo12(.LCPI0_152) - pcalau12i $a0, %pc_hi20(.LCPI0_153) - vld $vr5, $a0, %pc_lo12(.LCPI0_153) + pcalau12i $a2, %pc_hi20(.LCPI0_151) + vld $vr3, $a2, %pc_lo12(.LCPI0_151) + pcalau12i $a2, %pc_hi20(.LCPI0_152) + vld $vr4, $a2, %pc_lo12(.LCPI0_152) + pcalau12i $a2, %pc_hi20(.LCPI0_153) + vld $vr5, $a2, %pc_lo12(.LCPI0_153) vst $vr3, $sp, 1528 vst $vr0, $sp, 1696 vst $vr4, $sp, 1864 vst $vr5, $sp, 2032 - ori $a0, $zero, 2184 - vstx $vr1, $a0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI0_154) - vld $vr1, $a0, %pc_lo12(.LCPI0_154) - pcalau12i $a0, %pc_hi20(.LCPI0_155) - vld $vr3, $a0, %pc_lo12(.LCPI0_155) - ori $a0, $zero, 2352 - vstx $vr1, $a0, $a1 - ori $a0, $zero, 2520 - vstx $vr3, $a0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI0_156) - vld $vr1, $a0, %pc_lo12(.LCPI0_156) - ori $a0, $zero, 2688 - vstx $vr0, $a0, $a1 - ori $a0, $zero, 2848 - vstx $vr1, $a0, $a1 - pcalau12i $a0, %pc_hi20(.LCPI0_157) - vld $vr0, $a0, %pc_lo12(.LCPI0_157) - ori $a0, $zero, 3024 - vstx $vr2, $a0, $a1 - ori $a0, $zero, 3184 - vstx $vr0, $a0, $a1 - addi.d $a0, $sp, 176 - ori $a1, $zero, 1 - addi.d $a2, $sp, 24 + ori $a2, $zero, 2184 + vstx $vr1, $a2, $a1 + pcalau12i $a2, %pc_hi20(.LCPI0_154) + vld $vr1, $a2, %pc_lo12(.LCPI0_154) + pcalau12i $a2, %pc_hi20(.LCPI0_155) + vld $vr3, $a2, %pc_lo12(.LCPI0_155) + ori $a2, $zero, 2352 + vstx $vr1, $a2, $a1 + ori $a2, $zero, 2520 + vstx $vr3, $a2, $a1 + pcalau12i $a2, %pc_hi20(.LCPI0_156) + vld $vr1, $a2, %pc_lo12(.LCPI0_156) + ori $a2, $zero, 2688 + vstx $vr0, $a2, $a1 + ori $a2, $zero, 2848 + vstx $vr1, $a2, $a1 + pcalau12i $a2, %pc_hi20(.LCPI0_157) + vld $vr0, $a2, %pc_lo12(.LCPI0_157) + ori $a2, $zero, 3024 + vstx $vr2, $a2, $a1 + ori $a2, $zero, 3184 + vstx $vr0, $a2, $a1 + addi.d $a1, $sp, 176 + ori $a2, $zero, 1 beqz $a4, .LBB0_6 # %bb.1: # %.preheader70.preheader - pcalau12i $a4, %pc_hi20(.L__const.JTTmtx.freq0_TM) - addi.d $a4, $a4, %pc_lo12(.L__const.JTTmtx.freq0_TM) - pcalau12i $a5, %pc_hi20(.LCPI0_158) - fld.d $fa0, $a5, %pc_lo12(.LCPI0_158) - ori $a5, $zero, 20 + addi.d $a4, $sp, 24 + pcalau12i $a5, %pc_hi20(.L__const.JTTmtx.freq0_TM) + addi.d $a5, $a5, %pc_lo12(.L__const.JTTmtx.freq0_TM) + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa0, $a0 + ori $a0, $zero, 20 .p2align 4, , 16 .LBB0_2: # %.preheader70 # =>This Loop Header: Depth=1 # Child Loop BB0_3 Depth 2 - slli.d $a6, $a1, 3 - fldx.d $fa1, $a4, $a6 + slli.d $a6, $a2, 3 + fldx.d $fa1, $a5, $a6 fmul.d $fa1, $fa1, $fa0 - move $a6, $a1 - move $a7, $a2 - move $t0, $a0 - move $t1, $a4 + move $a6, $a2 + move $a7, $a4 + move $t0, $a1 + move $t1, $a5 .p2align 4, , 16 .LBB0_3: # Parent Loop BB0_2 Depth=1 # => This Inner Loop Header: Depth=2 @@ -1197,31 +1195,33 @@ JTTmtx: # @JTTmtx addi.d $a7, $a7, 160 bnez $a6, .LBB0_3 # %bb.4: # in Loop: Header=BB0_2 Depth=1 - addi.d $a1, $a1, 1 - addi.d $a0, $a0, 160 - addi.d $a2, $a2, 8 - bne $a1, $a5, .LBB0_2 + addi.d $a2, $a2, 1 + addi.d $a1, $a1, 160 + addi.d $a4, $a4, 8 + bne $a2, $a0, .LBB0_2 # %bb.5: # %.preheader68.preheader pcalau12i $a0, %pc_hi20(.L__const.JTTmtx.freq0_TM) addi.d $a1, $a0, %pc_lo12(.L__const.JTTmtx.freq0_TM) b .LBB0_11 .LBB0_6: # %.preheader66.preheader - pcalau12i $a4, %pc_hi20(.L__const.BLOSUMmtx.freqd) - addi.d $a4, $a4, %pc_lo12(.L__const.BLOSUMmtx.freqd) - pcalau12i $a5, %pc_hi20(.LCPI0_158) - fld.d $fa0, $a5, %pc_lo12(.LCPI0_158) - ori $a5, $zero, 20 + addi.d $a4, $sp, 24 + pcalau12i $a5, %pc_hi20(.L__const.BLOSUMmtx.freqd) + addi.d $a5, $a5, %pc_lo12(.L__const.BLOSUMmtx.freqd) + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa0, $a0 + ori $a0, $zero, 20 .p2align 4, , 16 .LBB0_7: # %.preheader66 # =>This Loop Header: Depth=1 # Child Loop BB0_8 Depth 2 - slli.d $a6, $a1, 3 - fldx.d $fa1, $a4, $a6 + slli.d $a6, $a2, 3 + fldx.d $fa1, $a5, $a6 fmul.d $fa1, $fa1, $fa0 - move $a6, $a1 - move $a7, $a2 - move $t0, $a0 - move $t1, $a4 + move $a6, $a2 + move $a7, $a4 + move $t0, $a1 + move $t1, $a5 .p2align 4, , 16 .LBB0_8: # Parent Loop BB0_7 Depth=1 # => This Inner Loop Header: Depth=2 @@ -1237,10 +1237,10 @@ JTTmtx: # @JTTmtx addi.d $a7, $a7, 160 bnez $a6, .LBB0_8 # %bb.9: # in Loop: Header=BB0_7 Depth=1 - addi.d $a1, $a1, 1 - addi.d $a0, $a0, 160 - addi.d $a2, $a2, 8 - bne $a1, $a5, .LBB0_7 + addi.d $a2, $a2, 1 + addi.d $a1, $a1, 160 + addi.d $a4, $a4, 8 + bne $a2, $a0, .LBB0_7 # %bb.10: # %.preheader65.preheader pcalau12i $a0, %pc_hi20(.L__const.BLOSUMmtx.freqd) addi.d $a1, $a0, %pc_lo12(.L__const.BLOSUMmtx.freqd) @@ -1736,30 +1736,8 @@ BLOSUMmtx: # @BLOSUMmtx .word .LBB1_22-.LJTI1_0 .word .LBB1_8-.LJTI1_0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function constants -.LCPI2_0: - .dword 0x3fe3333333333333 # double 0.59999999999999998 -.LCPI2_1: - .dword 0xc08f400000000000 # double -1000 -.LCPI2_2: - .dword 0x3f847ae147ae147b # double 0.01 -.LCPI2_3: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 -.LCPI2_4: - .dword 0x408f400000000000 # double 1000 -.LCPI2_5: - .dword 0x4082c00000000000 # double 600 -.LCPI2_6: - .dword 0x3ffccccccccccccd # double 1.8 -.LCPI2_7: - .dword 0xbf50624dd2f1a9fc # double -0.001 -.LCPI2_8: - .dword 0xbf689374bc6a7efa # double -0.0030000000000000001 -.LCPI2_9: - .dword 0x3fb0000000000000 # double 0.0625 .text - .globl constants + .globl constants # -- Begin function constants .p2align 5 .type constants,@function constants: # @constants @@ -1815,20 +1793,20 @@ constants: # @constants lu32i.d $a1, 0 st.w $a1, $a0, 0 pcalau12i $a0, %got_pc_hi20(RNAppenalty) - ld.d $a1, $a0, %got_pc_lo12(RNAppenalty) - ld.w $a0, $a1, 0 + ld.d $a0, $a0, %got_pc_lo12(RNAppenalty) + ld.w $a1, $a0, 0 ori $a6, $s3, 1705 - beq $a0, $a6, .LBB2_51 + beq $a1, $a6, .LBB2_51 # %bb.2: - pcalau12i $a1, %got_pc_hi20(RNAppenalty_ex) - ld.d $a1, $a1, %got_pc_lo12(RNAppenalty_ex) - ld.w $a2, $a1, 0 + pcalau12i $a0, %got_pc_hi20(RNAppenalty_ex) + ld.d $a0, $a0, %got_pc_lo12(RNAppenalty_ex) + ld.w $a2, $a0, 0 beq $a2, $a6, .LBB2_52 .LBB2_3: - pcalau12i $a1, %got_pc_hi20(ppenalty) - ld.d $a3, $a1, %got_pc_lo12(ppenalty) - ld.w $a1, $a3, 0 - beq $a1, $a6, .LBB2_53 + pcalau12i $a0, %got_pc_hi20(ppenalty) + ld.d $a3, $a0, %got_pc_lo12(ppenalty) + ld.w $a0, $a3, 0 + beq $a0, $a6, .LBB2_53 .LBB2_4: pcalau12i $a3, %got_pc_hi20(ppenalty_OP) ld.d $a3, $a3, %got_pc_lo12(ppenalty_OP) @@ -1875,36 +1853,39 @@ constants: # @constants ori $a4, $zero, 2 st.w $a4, $s2, 0 .LBB2_14: - pcalau12i $a6, %pc_hi20(.LCPI2_6) - fld.d $fa1, $a6, %pc_lo12(.LCPI2_6) - movgr2fr.w $fa0, $a0 + movgr2fr.w $fa0, $a1 ffint.d.w $fa0, $fa0 - vldi $vr2, -928 - fmadd.d $fa0, $fa0, $fa1, $fa2 + vldi $vr1, -928 + lu12i.w $a1, -209716 + ori $a1, $a1, 3277 + lu32i.d $a1, -209716 + lu52i.d $a1, $a1, 1023 + movgr2fr.d $fa2, $a1 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a0, $fa0 + movfr2gr.s $a1, $fa0 pcalau12i $a6, %got_pc_hi20(RNApenalty) ld.d $a6, $a6, %got_pc_lo12(RNApenalty) - st.w $a0, $a6, 0 + st.w $a1, $a6, 0 movgr2fr.w $fa0, $a2 ffint.d.w $fa0, $fa0 - fmadd.d $fa0, $fa0, $fa1, $fa2 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a0, $fa0 + movfr2gr.s $a1, $fa0 pcalau12i $a2, %got_pc_hi20(RNApenalty_ex) ld.d $a2, $a2, %got_pc_lo12(RNApenalty_ex) - st.w $a0, $a2, 0 + st.w $a1, $a2, 0 movgr2fr.w $fa0, $t2 ffint.d.w $fa0, $fa0 - fmadd.d $fa0, $fa0, $fa1, $fa2 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 - movfr2gr.s $a0, $fa0 + movfr2gr.s $a1, $fa0 pcalau12i $a2, %got_pc_hi20(RNAthr) ld.d $a2, $a2, %got_pc_lo12(RNAthr) - st.w $a0, $a2, 0 - movgr2fr.w $fa0, $a1 + st.w $a1, $a2, 0 + movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 - fmadd.d $fa3, $fa0, $fa1, $fa2 + fmadd.d $fa3, $fa0, $fa2, $fa1 ftintrz.w.d $fa3, $fa3 movfr2gr.s $a0, $fa3 pcalau12i $a1, %got_pc_hi20(penalty) @@ -1912,7 +1893,7 @@ constants: # @constants st.w $a0, $a1, 0 movgr2fr.w $fa3, $a5 ffint.d.w $fa3, $fa3 - fmadd.d $fa3, $fa3, $fa1, $fa2 + fmadd.d $fa3, $fa3, $fa2, $fa1 ftintrz.w.d $fa3, $fa3 movfr2gr.s $a0, $fa3 pcalau12i $a1, %got_pc_hi20(penalty_OP) @@ -1920,7 +1901,7 @@ constants: # @constants st.w $a0, $a1, 0 movgr2fr.w $fa3, $a7 ffint.d.w $fa3, $fa3 - fmadd.d $fa3, $fa3, $fa1, $fa2 + fmadd.d $fa3, $fa3, $fa2, $fa1 ftintrz.w.d $fa3, $fa3 movfr2gr.s $a0, $fa3 pcalau12i $a1, %got_pc_hi20(penalty_ex) @@ -1928,7 +1909,7 @@ constants: # @constants st.w $a0, $a1, 0 movgr2fr.w $fa3, $t0 ffint.d.w $fa3, $fa3 - fmadd.d $fa3, $fa3, $fa1, $fa2 + fmadd.d $fa3, $fa3, $fa2, $fa1 ftintrz.w.d $fa3, $fa3 movfr2gr.s $a0, $fa3 pcalau12i $a1, %got_pc_hi20(penalty_EX) @@ -1936,7 +1917,7 @@ constants: # @constants st.w $a0, $a1, 0 movgr2fr.w $fa3, $t1 ffint.d.w $fa3, $fa3 - fmadd.d $fa1, $fa3, $fa1, $fa2 + fmadd.d $fa1, $fa3, $fa2, $fa1 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a0, $fa1 pcalau12i $a1, %got_pc_hi20(offset) @@ -1969,18 +1950,24 @@ constants: # @constants masknez $a1, $a1, $a0 pcalau12i $a2, %pc_hi20(.L.str.3) addi.d $a2, $a2, %pc_lo12(.L.str.3) - pcalau12i $a5, %pc_hi20(.LCPI2_7) - fld.d $fa1, $a5, %pc_lo12(.LCPI2_7) - pcalau12i $a5, %pc_hi20(.LCPI2_8) - fld.d $fa2, $a5, %pc_lo12(.LCPI2_8) maskeqz $a0, $a2, $a0 or $a2, $a0, $a1 - fmul.d $fa4, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa2 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, -1035 + movgr2fr.d $fa1, $a0 + fmul.d $fa2, $fa0, $fa1 + lu12i.w $a0, -276825 + ori $a0, $a0, 3834 + lu32i.d $a0, -486540 + lu52i.d $a0, $a0, -1034 + movgr2fr.d $fa4, $a0 + fmul.d $fa0, $fa0, $fa4 fmul.d $fa1, $fa3, $fa1 - fmul.d $fa2, $fa3, $fa2 - fst.d $fa2, $sp, 0 - movfr2gr.d $a5, $fa4 + fmul.d $fa3, $fa3, $fa4 + fst.d $fa3, $sp, 0 + movfr2gr.d $a5, $fa2 movfr2gr.d $a6, $fa0 movfr2gr.d $a7, $fa1 pcalau12i $a0, %got_pc_hi20(modelname) @@ -2093,12 +2080,12 @@ constants: # @constants fadd.d $ft0, $ft0, $ft3 fadd.d $ft0, $ft0, $ft4 fadd.d $fa4, $ft0, $fa4 - pcalau12i $a4, %pc_hi20(.LCPI2_9) - fld.d $ft0, $a4, %pc_lo12(.LCPI2_9) fadd.d $fa4, $fa4, $fa5 fadd.d $fa4, $fa4, $fa6 fadd.d $fa4, $fa4, $fa7 - fmul.d $fa4, $fa4, $ft0 + lu52i.d $a4, $zero, 1019 + movgr2fr.d $fa5, $a4 + fmul.d $fa4, $fa4, $fa5 pcalau12i $a4, %got_pc_hi20(disp) ld.d $a4, $a4, %got_pc_lo12(disp) ld.w $a4, $a4, 0 @@ -2146,41 +2133,43 @@ constants: # @constants vst $vr0, $a2, 16 vld $vr0, $a3, 16 vld $vr1, $a3, 0 - pcalau12i $a4, %pc_hi20(.LCPI2_5) - fld.d $fa3, $a4, %pc_lo12(.LCPI2_5) vfsub.d $vr0, $vr0, $vr2 vfsub.d $vr1, $vr1, $vr2 vst $vr1, $a3, 0 vst $vr0, $a3, 16 - vld $vr0, $a0, 16 - vld $vr1, $a0, 0 - fdiv.d $fa2, $fa3, $fa4 - vreplvei.d $vr2, $vr2, 0 - vfmul.d $vr0, $vr2, $vr0 - vfmul.d $vr1, $vr2, $vr1 - vst $vr1, $a0, 0 - vst $vr0, $a0, 16 - vld $vr0, $a1, 16 - vld $vr1, $a1, 0 - vfmul.d $vr0, $vr2, $vr0 - vfmul.d $vr1, $vr2, $vr1 - vst $vr1, $a1, 0 - vst $vr0, $a1, 16 - vld $vr0, $a2, 16 - vld $vr1, $a2, 0 - vfmul.d $vr0, $vr2, $vr0 + ori $a3, $zero, 0 + lu32i.d $a3, 180224 + lu52i.d $a3, $a3, 1032 + movgr2fr.d $fa0, $a3 + vld $vr1, $a0, 16 + vld $vr2, $a0, 0 + fdiv.d $fa0, $fa0, $fa4 + vreplvei.d $vr0, $vr0, 0 + vfmul.d $vr1, $vr0, $vr1 + vfmul.d $vr2, $vr0, $vr2 + vst $vr2, $a0, 0 + vst $vr1, $a0, 16 + vld $vr1, $a1, 16 + vld $vr2, $a1, 0 + vfmul.d $vr1, $vr0, $vr1 + vfmul.d $vr2, $vr0, $vr2 + vst $vr2, $a1, 0 + vst $vr1, $a1, 16 + vld $vr1, $a2, 16 + vld $vr2, $a2, 0 + vfmul.d $vr1, $vr0, $vr1 ld.d $a0, $s1, 24 - vfmul.d $vr1, $vr2, $vr1 - vst $vr1, $a2, 0 - vst $vr0, $a2, 16 - vld $vr0, $a0, 16 - vld $vr1, $a0, 0 - vfmul.d $vr0, $vr2, $vr0 - vfmul.d $vr1, $vr2, $vr1 + vfmul.d $vr2, $vr0, $vr2 + vst $vr2, $a2, 0 + vst $vr1, $a2, 16 + vld $vr1, $a0, 16 + vld $vr2, $a0, 0 + vfmul.d $vr1, $vr0, $vr1 + vfmul.d $vr0, $vr0, $vr2 ld.w $a2, $s4, 0 ld.d $a1, $s1, 0 - vst $vr1, $a0, 0 - vst $vr0, $a0, 16 + vst $vr0, $a0, 0 + vst $vr1, $a0, 16 movgr2fr.w $fa0, $a2 vld $vr1, $a1, 16 ffint.d.w $fa0, $fa0 @@ -2238,14 +2227,14 @@ constants: # @constants move $s1, $a0 pcalau12i $a0, %got_pc_hi20(ppenalty) ld.d $s2, $a0, %got_pc_lo12(ppenalty) - ld.w $a0, $s2, 0 + ld.w $a1, $s2, 0 ori $a2, $s3, 1705 - beq $a0, $a2, .LBB2_134 + beq $a1, $a2, .LBB2_134 # %bb.21: - pcalau12i $a1, %got_pc_hi20(ppenalty_OP) - ld.d $a3, $a1, %got_pc_lo12(ppenalty_OP) - ld.w $a1, $a3, 0 - beq $a1, $a2, .LBB2_135 + pcalau12i $a0, %got_pc_hi20(ppenalty_OP) + ld.d $a3, $a0, %got_pc_lo12(ppenalty_OP) + ld.w $a0, $a3, 0 + beq $a0, $a2, .LBB2_135 .LBB2_22: pcalau12i $a3, %got_pc_hi20(ppenalty_ex) ld.d $s3, $a3, %got_pc_lo12(ppenalty_ex) @@ -2275,46 +2264,49 @@ constants: # @constants ori $a2, $zero, 1 st.w $a2, $a6, 0 .LBB2_28: - pcalau12i $a2, %pc_hi20(.LCPI2_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI2_0) - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - vldi $vr2, -928 - fmadd.d $fa1, $fa1, $fa0, $fa2 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 + vldi $vr1, -928 + lu12i.w $a1, 209715 + ori $a1, $a1, 819 + lu32i.d $a1, 209715 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa2, $a1 + fmadd.d $fa0, $fa0, $fa2, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a1, $fa0 pcalau12i $a2, %got_pc_hi20(penalty) ld.d $a2, $a2, %got_pc_lo12(penalty) st.d $a2, $sp, 120 # 8-byte Folded Spill - st.w $a0, $a2, 0 - movgr2fr.w $fa1, $a1 - ffint.d.w $fa1, $fa1 - fmadd.d $fa1, $fa1, $fa0, $fa2 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + st.w $a1, $a2, 0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + fmadd.d $fa0, $fa0, $fa2, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 pcalau12i $a1, %got_pc_hi20(penalty_OP) ld.d $a1, $a1, %got_pc_lo12(penalty_OP) st.w $a0, $a1, 0 - movgr2fr.w $fa1, $a3 - ffint.d.w $fa1, $fa1 - fmadd.d $fa1, $fa1, $fa0, $fa2 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + movgr2fr.w $fa0, $a3 + ffint.d.w $fa0, $fa0 + fmadd.d $fa0, $fa0, $fa2, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 pcalau12i $a1, %got_pc_hi20(penalty_ex) ld.d $a1, $a1, %got_pc_lo12(penalty_ex) st.d $a1, $sp, 112 # 8-byte Folded Spill st.w $a0, $a1, 0 - movgr2fr.w $fa1, $a4 - ffint.d.w $fa1, $fa1 - fmadd.d $fa1, $fa1, $fa0, $fa2 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + movgr2fr.w $fa0, $a4 + ffint.d.w $fa0, $fa0 + fmadd.d $fa0, $fa0, $fa2, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 pcalau12i $a1, %got_pc_hi20(penalty_EX) ld.d $a1, $a1, %got_pc_lo12(penalty_EX) st.w $a0, $a1, 0 - movgr2fr.w $fa1, $a5 - ffint.d.w $fa1, $fa1 - fmadd.d $fa0, $fa1, $fa0, $fa2 + movgr2fr.w $fa0, $a5 + ffint.d.w $fa0, $fa0 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 pcalau12i $a1, %got_pc_hi20(offset) @@ -2352,23 +2344,25 @@ constants: # @constants jirl $ra, $ra, 0 ld.w $a0, $s2, 0 ld.w $a2, $s7, 0 - pcalau12i $a1, %pc_hi20(.LCPI2_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI2_1) - movgr2fr.w $fa1, $a0 - ld.w $a0, $s5, 0 - ffint.d.w $fa1, $fa1 - fdiv.d $fa1, $fa1, $fa0 - ld.w $a1, $s3, 0 - movgr2fr.w $fa2, $a0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1016 + ld.w $a1, $s5, 0 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.w $a0, $s3, 0 + movgr2fr.w $fa2, $a1 ffint.d.w $fa2, $fa2 - fdiv.d $fa2, $fa2, $fa0 - movgr2fr.w $fa3, $a1 + fdiv.d $fa2, $fa2, $fa1 + movgr2fr.w $fa3, $a0 ffint.d.w $fa3, $fa3 - fdiv.d $fa0, $fa3, $fa0 + fdiv.d $fa1, $fa3, $fa1 addi.w $s2, $zero, -1 - movfr2gr.d $a3, $fa1 + movfr2gr.d $a3, $fa0 movfr2gr.d $a4, $fa2 - movfr2gr.d $a5, $fa0 + movfr2gr.d $a5, $fa1 pcalau12i $a0, %got_pc_hi20(modelname) ld.d $a0, $a0, %got_pc_lo12(modelname) bne $a2, $s2, .LBB2_140 @@ -2415,15 +2409,15 @@ constants: # @constants jirl $ra, $ra, 0 st.d $a0, $sp, 96 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(ppenalty) - ld.d $a1, $a0, %got_pc_lo12(ppenalty) - ld.w $a0, $a1, 0 + ld.d $a0, $a0, %got_pc_lo12(ppenalty) + ld.w $a1, $a0, 0 ori $a2, $s3, 1705 - beq $a0, $a2, .LBB2_159 + beq $a1, $a2, .LBB2_159 # %bb.33: - pcalau12i $a1, %got_pc_hi20(ppenalty_OP) - ld.d $a3, $a1, %got_pc_lo12(ppenalty_OP) - ld.w $a1, $a3, 0 - beq $a1, $a2, .LBB2_160 + pcalau12i $a0, %got_pc_hi20(ppenalty_OP) + ld.d $a3, $a0, %got_pc_lo12(ppenalty_OP) + ld.w $a0, $a3, 0 + beq $a0, $a2, .LBB2_160 .LBB2_34: pcalau12i $a3, %got_pc_hi20(ppenalty_ex) ld.d $a3, $a3, %got_pc_lo12(ppenalty_ex) @@ -2458,21 +2452,24 @@ constants: # @constants ori $a2, $zero, 1 st.w $a2, $a7, 0 .LBB2_41: # %.preheader1201.preheader - pcalau12i $a2, %pc_hi20(.LCPI2_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI2_0) - movgr2fr.w $fa0, $a0 + movgr2fr.w $fa0, $a1 ffint.d.w $fa0, $fa0 - vldi $vr2, -928 - fmadd.d $fa3, $fa0, $fa1, $fa2 + vldi $vr1, -928 + lu12i.w $a1, 209715 + ori $a1, $a1, 819 + lu32i.d $a1, 209715 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa2, $a1 + fmadd.d $fa3, $fa0, $fa2, $fa1 ftintrz.w.d $fa3, $fa3 - movfr2gr.s $a0, $fa3 + movfr2gr.s $a1, $fa3 pcalau12i $a2, %got_pc_hi20(penalty) ld.d $a2, $a2, %got_pc_lo12(penalty) st.d $a2, $sp, 40 # 8-byte Folded Spill - st.w $a0, $a2, 0 - movgr2fr.w $fa3, $a1 + st.w $a1, $a2, 0 + movgr2fr.w $fa3, $a0 ffint.d.w $fa3, $fa3 - fmadd.d $fa3, $fa3, $fa1, $fa2 + fmadd.d $fa3, $fa3, $fa2, $fa1 ftintrz.w.d $fa3, $fa3 movfr2gr.s $a0, $fa3 pcalau12i $a1, %got_pc_hi20(penalty_OP) @@ -2480,7 +2477,7 @@ constants: # @constants st.w $a0, $a1, 0 movgr2fr.w $fa3, $a4 ffint.d.w $fa3, $fa3 - fmadd.d $fa3, $fa3, $fa1, $fa2 + fmadd.d $fa3, $fa3, $fa2, $fa1 ftintrz.w.d $fa3, $fa3 movfr2gr.s $a0, $fa3 pcalau12i $a1, %got_pc_hi20(penalty_ex) @@ -2489,7 +2486,7 @@ constants: # @constants st.w $a0, $a1, 0 movgr2fr.w $fa3, $a5 ffint.d.w $fa3, $fa3 - fmadd.d $fa3, $fa3, $fa1, $fa2 + fmadd.d $fa3, $fa3, $fa2, $fa1 ftintrz.w.d $fa3, $fa3 movfr2gr.s $a0, $fa3 pcalau12i $a1, %got_pc_hi20(penalty_EX) @@ -2497,7 +2494,7 @@ constants: # @constants st.w $a0, $a1, 0 movgr2fr.w $fa3, $a6 ffint.d.w $fa3, $fa3 - fmadd.d $fa1, $fa3, $fa1, $fa2 + fmadd.d $fa1, $fa3, $fa2, $fa1 ftintrz.w.d $fa1, $fa1 movfr2gr.s $a0, $fa1 pcalau12i $a1, %got_pc_hi20(offset) @@ -2531,12 +2528,14 @@ constants: # @constants st.d $a1, $sp, 112 # 8-byte Folded Spill masknez $a1, $a1, $a0 pcalau12i $a2, %pc_hi20(.L.str.33) - addi.d $a4, $a2, %pc_lo12(.L.str.33) - pcalau12i $a2, %pc_hi20(.LCPI2_1) - fld.d $fa1, $a2, %pc_lo12(.LCPI2_1) - move $s0, $a4 - maskeqz $a0, $a4, $a0 + addi.d $a2, $a2, %pc_lo12(.L.str.33) + move $s0, $a2 + maskeqz $a0, $a2, $a0 or $a2, $a0, $a1 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1016 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 fdiv.d $fa1, $fa3, $fa1 movfr2gr.d $a4, $fa0 @@ -2794,9 +2793,12 @@ constants: # @constants fmadd.d $fa0, $fa2, $fa3, $fa0 bne $a0, $a1, .LBB2_44 # %bb.45: - pcalau12i $a0, %pc_hi20(.LCPI2_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_2) move $a0, $zero + lu12i.w $a1, 293601 + ori $a1, $a1, 1147 + lu32i.d $a1, 293601 + lu52i.d $a1, $a1, 1016 + movgr2fr.d $fa1, $a1 fdiv.d $fa0, $fa1, $fa0 fneg.d $fa1, $fa0 vldi $vr2, -912 @@ -2843,24 +2845,24 @@ constants: # @constants bne $a2, $a1, .LBB2_49 b .LBB2_46 .LBB2_51: - addi.w $a0, $zero, -1530 - move $a2, $a0 + addi.w $a1, $zero, -1530 + move $a2, $a1 lu32i.d $a2, 0 - st.w $a2, $a1, 0 - pcalau12i $a1, %got_pc_hi20(RNAppenalty_ex) - ld.d $a1, $a1, %got_pc_lo12(RNAppenalty_ex) - ld.w $a2, $a1, 0 + st.w $a2, $a0, 0 + pcalau12i $a0, %got_pc_hi20(RNAppenalty_ex) + ld.d $a0, $a0, %got_pc_lo12(RNAppenalty_ex) + ld.w $a2, $a0, 0 bne $a2, $a6, .LBB2_3 .LBB2_52: move $a2, $zero - st.w $zero, $a1, 0 - pcalau12i $a1, %got_pc_hi20(ppenalty) - ld.d $a3, $a1, %got_pc_lo12(ppenalty) - ld.w $a1, $a3, 0 - bne $a1, $a6, .LBB2_4 + st.w $zero, $a0, 0 + pcalau12i $a0, %got_pc_hi20(ppenalty) + ld.d $a3, $a0, %got_pc_lo12(ppenalty) + ld.w $a0, $a3, 0 + bne $a0, $a6, .LBB2_4 .LBB2_53: - addi.w $a1, $zero, -1530 - move $a4, $a1 + addi.w $a0, $zero, -1530 + move $a4, $a0 lu32i.d $a4, 0 st.w $a4, $a3, 0 pcalau12i $a3, %got_pc_hi20(ppenalty_OP) @@ -2904,14 +2906,17 @@ constants: # @constants movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 addi.d $a0, $a0, 2 - pcalau12i $a1, %pc_hi20(.LCPI2_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_2) - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fdiv.d $fa0, $fa0, $fa2 - fmul.d $fa0, $fa0, $fa1 - frecip.d $fa2, $fa2 - fmul.d $fa1, $fa2, $fa1 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + fdiv.d $fa0, $fa0, $fa1 + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmul.d $fa0, $fa0, $fa2 + frecip.d $fa1, $fa1 + fmul.d $fa1, $fa1, $fa2 ld.d $s2, $sp, 56 # 8-byte Folded Reload ld.d $a0, $s2, 0 lu12i.w $a1, 503316 @@ -3143,20 +3148,22 @@ constants: # @constants ld.d $s2, $s1, 0 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s2, 0 + movgr2fr.d $fa0, $a0 .LBB2_65: pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI2_4) - fld.d $fs1, $a0, %pc_lo12(.LCPI2_4) - fld.d $fa1, $s2, 8 - fmul.d $fa0, $fa0, $fs1 - fcmp.cune.d $fcc0, $fa1, $fs0 - fst.d $fa0, $s2, 0 + fmov.d $fa1, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + fld.d $fa0, $s2, 8 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs1, $a0 + fmul.d $fa1, $fa1, $fs1 + fcmp.cune.d $fcc0, $fa0, $fs0 + fst.d $fa1, $s2, 0 bcnez $fcc0, .LBB2_67 # %bb.66: ld.d $a3, $s8, 0 @@ -3169,13 +3176,11 @@ constants: # @constants ld.d $s2, $s1, 0 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s2, 8 + movgr2fr.d $fa0, $a0 .LBB2_67: - fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 fld.d $fa1, $s2, 16 @@ -3194,19 +3199,23 @@ constants: # @constants ld.d $s2, $s1, 0 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s2, 16 + movgr2fr.d $fa1, $a0 .LBB2_69: fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 - fld.d $fa1, $s2, 24 - fmul.d $fa0, $fa0, $fs1 - fcmp.cune.d $fcc0, $fa1, $fs0 - fst.d $fa0, $s2, 16 + fmov.d $fa1, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + fld.d $fa0, $s2, 24 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs1, $a0 + fmul.d $fa1, $fa1, $fs1 + fcmp.cune.d $fcc0, $fa0, $fs0 + fst.d $fa1, $s2, 16 ld.d $fp, $sp, 64 # 8-byte Folded Reload bcnez $fcc0, .LBB2_71 # %bb.70: @@ -3220,13 +3229,11 @@ constants: # @constants ld.d $s2, $s1, 0 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s2, 24 + movgr2fr.d $fa0, $a0 .LBB2_71: # %.preheader1153.1 - fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 ld.d $s4, $s1, 8 @@ -3246,18 +3253,22 @@ constants: # @constants ld.d $s4, $s1, 8 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s4, 0 + movgr2fr.d $fa0, $a0 .LBB2_73: pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 - fld.d $fa1, $s4, 8 - fmul.d $fa0, $fa0, $fs1 - fcmp.cune.d $fcc0, $fa1, $fs0 - fst.d $fa0, $s4, 0 + fmov.d $fa1, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + fld.d $fa0, $s4, 8 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs1, $a0 + fmul.d $fa1, $fa1, $fs1 + fcmp.cune.d $fcc0, $fa0, $fs0 + fst.d $fa1, $s4, 0 bcnez $fcc0, .LBB2_75 # %bb.74: ld.d $a3, $s8, 0 @@ -3270,13 +3281,11 @@ constants: # @constants ld.d $s4, $s1, 8 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s4, 8 + movgr2fr.d $fa0, $a0 .LBB2_75: - fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 fld.d $fa1, $s4, 16 @@ -3295,19 +3304,23 @@ constants: # @constants ld.d $s4, $s1, 8 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s4, 16 + movgr2fr.d $fa1, $a0 .LBB2_77: fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 - fld.d $fa1, $s4, 24 - fmul.d $fa0, $fa0, $fs1 - fcmp.cune.d $fcc0, $fa1, $fs0 - fst.d $fa0, $s4, 16 + fmov.d $fa1, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + fld.d $fa0, $s4, 24 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs1, $a0 + fmul.d $fa1, $fa1, $fs1 + fcmp.cune.d $fcc0, $fa0, $fs0 + fst.d $fa1, $s4, 16 bcnez $fcc0, .LBB2_79 # %bb.78: ld.d $a3, $s8, 0 @@ -3320,13 +3333,11 @@ constants: # @constants ld.d $s4, $s1, 8 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s4, 24 + movgr2fr.d $fa0, $a0 .LBB2_79: # %.preheader1153.2 - fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 ld.d $s2, $s1, 16 @@ -3346,18 +3357,22 @@ constants: # @constants ld.d $s2, $s1, 16 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s2, 0 + movgr2fr.d $fa0, $a0 .LBB2_81: pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 - fld.d $fa1, $s2, 8 - fmul.d $fa0, $fa0, $fs1 - fcmp.cune.d $fcc0, $fa1, $fs0 - fst.d $fa0, $s2, 0 + fmov.d $fa1, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + fld.d $fa0, $s2, 8 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs1, $a0 + fmul.d $fa1, $fa1, $fs1 + fcmp.cune.d $fcc0, $fa0, $fs0 + fst.d $fa1, $s2, 0 bcnez $fcc0, .LBB2_83 # %bb.82: ld.d $a3, $s8, 0 @@ -3370,13 +3385,11 @@ constants: # @constants ld.d $s2, $s1, 16 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s2, 8 + movgr2fr.d $fa0, $a0 .LBB2_83: - fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 fld.d $fa1, $s2, 16 @@ -3395,19 +3408,23 @@ constants: # @constants ld.d $s2, $s1, 16 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s2, 16 + movgr2fr.d $fa1, $a0 .LBB2_85: fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 - fld.d $fa1, $s2, 24 - fmul.d $fa0, $fa0, $fs1 - fcmp.cune.d $fcc0, $fa1, $fs0 - fst.d $fa0, $s2, 16 + fmov.d $fa1, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + fld.d $fa0, $s2, 24 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs1, $a0 + fmul.d $fa1, $fa1, $fs1 + fcmp.cune.d $fcc0, $fa0, $fs0 + fst.d $fa1, $s2, 16 bcnez $fcc0, .LBB2_87 # %bb.86: ld.d $a3, $s8, 0 @@ -3420,13 +3437,11 @@ constants: # @constants ld.d $s2, $s1, 16 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s2, 24 + movgr2fr.d $fa0, $a0 .LBB2_87: # %.preheader1153.3 - fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 ld.d $s4, $s1, 24 @@ -3446,18 +3461,22 @@ constants: # @constants ld.d $s4, $s1, 24 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s4, 0 + movgr2fr.d $fa0, $a0 .LBB2_89: pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 - fld.d $fa1, $s4, 8 - fmul.d $fa0, $fa0, $fs1 - fcmp.cune.d $fcc0, $fa1, $fs0 - fst.d $fa0, $s4, 0 + fmov.d $fa1, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + fld.d $fa0, $s4, 8 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs1, $a0 + fmul.d $fa1, $fa1, $fs1 + fcmp.cune.d $fcc0, $fa0, $fs0 + fst.d $fa1, $s4, 0 bcnez $fcc0, .LBB2_91 # %bb.90: ld.d $a3, $s8, 0 @@ -3470,13 +3489,11 @@ constants: # @constants ld.d $s4, $s1, 24 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s4, 8 + movgr2fr.d $fa0, $a0 .LBB2_91: - fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 fld.d $fa1, $s4, 16 @@ -3495,19 +3512,23 @@ constants: # @constants ld.d $s4, $s1, 24 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s4, 16 + movgr2fr.d $fa1, $a0 .LBB2_93: fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 - fld.d $fa1, $s4, 24 - fmul.d $fa0, $fa0, $fs1 - fcmp.cune.d $fcc0, $fa1, $fs0 - fst.d $fa0, $s4, 16 + fmov.d $fa1, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + fld.d $fa0, $s4, 24 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs1, $a0 + fmul.d $fa1, $fa1, $fs1 + fcmp.cune.d $fcc0, $fa0, $fs0 + fst.d $fa1, $s4, 16 bcnez $fcc0, .LBB2_95 # %bb.94: ld.d $a3, $s8, 0 @@ -3520,13 +3541,11 @@ constants: # @constants ld.d $s4, $s1, 24 lu12i.w $a0, -487882 ori $a0, $a0, 2289 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI2_3) lu32i.d $a0, 325813 lu52i.d $a0, $a0, 1006 st.d $a0, $s4, 24 + movgr2fr.d $fa0, $a0 .LBB2_95: - fmov.d $fa0, $fa1 pcaddu18i $ra, %call36(log10) jirl $ra, $ra, 0 ld.w $a0, $s3, 0 @@ -4063,11 +4082,12 @@ constants: # @constants lu12i.w $a0, -487882 ori $a0, $a0, 2289 lu32i.d $a0, 325813 - pcalau12i $a1, %pc_hi20(.LCPI2_3) - fld.d $fs0, $a1, %pc_lo12(.LCPI2_3) - pcalau12i $a1, %pc_hi20(.LCPI2_4) - fld.d $fs1, $a1, %pc_lo12(.LCPI2_4) lu52i.d $s7, $a0, 1006 + movgr2fr.d $fs0, $s7 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs1, $a0 ori $s6, $zero, 160 vst $vr23, $sp, 128 # 16-byte Folded Spill b .LBB2_109 @@ -4291,54 +4311,56 @@ constants: # @constants fmadd.d $fa0, $fa5, $fa6, $fa0 ld.d $a2, $fp, 72 fld.d $fa1, $a1, 64 - fld.d $fa2, $a3, 64 ld.d $a1, $fp, 80 + fld.d $fa2, $a3, 64 fld.d $fa3, $a2, 72 - ld.d $a2, $fp, 88 - fmadd.d $fa0, $fa1, $fa2, $fa0 - fld.d $fa1, $a1, 80 - ld.d $a1, $fp, 96 - fld.d $fa2, $a2, 88 - ld.d $a2, $fp, 104 fld.d $fa4, $a3, 72 - fld.d $fa5, $a1, 96 - ld.d $a1, $fp, 112 - fld.d $fa6, $a2, 104 - fld.d $fa7, $a3, 80 + fld.d $fa5, $a1, 80 + fld.d $fa6, $a3, 80 + fmadd.d $fa0, $fa1, $fa2, $fa0 + ld.d $a1, $fp, 88 fmadd.d $fa0, $fa3, $fa4, $fa0 - fld.d $fa3, $a1, 112 + fmadd.d $fa0, $fa5, $fa6, $fa0 + ld.d $a2, $fp, 96 + fld.d $fa1, $a1, 88 + ld.d $a1, $fp, 104 + fld.d $fa2, $a3, 88 + fld.d $fa3, $a2, 96 + ld.d $a2, $fp, 112 + fld.d $fa4, $a1, 104 ld.d $a1, $fp, 120 - fmadd.d $fa0, $fa1, $fa7, $fa0 - fld.d $fa1, $a3, 88 - fld.d $fa4, $a3, 96 + fld.d $fa5, $a3, 96 + fld.d $fa6, $a2, 112 + ld.d $a2, $fp, 128 fld.d $fa7, $a1, 120 - ld.d $a1, $fp, 128 - fmadd.d $fa0, $fa2, $fa1, $fa0 - fld.d $fa1, $a3, 104 - fmadd.d $fa0, $fa5, $fa4, $fa0 - fld.d $fa2, $a1, 128 + fmadd.d $fa0, $fa1, $fa2, $fa0 + fmadd.d $fa0, $fa3, $fa5, $fa0 + fld.d $fa1, $a2, 128 ld.d $a1, $fp, 136 - fmadd.d $fa0, $fa6, $fa1, $fa0 - fld.d $fa1, $a3, 112 - fld.d $fa4, $a3, 120 - fld.d $fa5, $a1, 136 + fld.d $fa2, $a3, 104 + fld.d $fa3, $a3, 112 + fld.d $fa5, $a3, 120 + fld.d $ft0, $a1, 136 ld.d $a1, $fp, 144 - fmadd.d $fa0, $fa3, $fa1, $fa0 - fld.d $fa1, $a3, 128 - fmadd.d $fa0, $fa7, $fa4, $fa0 - fld.d $fa3, $a1, 144 + fmadd.d $fa0, $fa4, $fa2, $fa0 + fmadd.d $fa0, $fa6, $fa3, $fa0 + fmadd.d $fa0, $fa7, $fa5, $fa0 + fld.d $fa2, $a1, 144 ld.d $a1, $fp, 152 - fmadd.d $fa0, $fa2, $fa1, $fa0 - fld.d $fa1, $a3, 136 - fld.d $fa2, $a3, 144 - fld.d $fa4, $a1, 152 - fld.d $fa6, $a3, 152 - pcalau12i $a1, %pc_hi20(.LCPI2_5) - fld.d $fa7, $a1, %pc_lo12(.LCPI2_5) - fmadd.d $fa0, $fa5, $fa1, $fa0 - fmadd.d $fa0, $fa3, $fa2, $fa0 - fmadd.d $fa0, $fa4, $fa6, $fa0 - fdiv.d $fa0, $fa7, $fa0 + fld.d $fa3, $a3, 128 + fld.d $fa4, $a3, 136 + fld.d $fa5, $a3, 144 + fld.d $fa6, $a1, 152 + fld.d $fa7, $a3, 152 + fmadd.d $fa0, $fa1, $fa3, $fa0 + fmadd.d $fa0, $ft0, $fa4, $fa0 + fmadd.d $fa0, $fa2, $fa5, $fa0 + fmadd.d $fa0, $fa6, $fa7, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, 180224 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa1, $a1 + fdiv.d $fa0, $fa1, $fa0 vreplvei.d $vr0, $vr0, 0 ori $a1, $zero, 160 .p2align 4, , 16 @@ -5013,12 +5035,14 @@ constants: # @constants vld $vr6, $a2, 16 fmul.d $fa3, $fa4, $fa3 fadd.d $fa3, $fa3, $fs0 - pcalau12i $a0, %pc_hi20(.LCPI2_5) - fld.d $fa4, $a0, %pc_lo12(.LCPI2_5) fadd.d $fa0, $fa3, $fa0 fadd.d $fa0, $fa0, $fa2 fadd.d $fa0, $fa0, $fa1 - fdiv.d $fa0, $fa4, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, 180224 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa1, $fa0 vreplvei.d $vr0, $vr0, 0 vfmul.d $vr1, $vr0, $vr6 ld.d $a3, $s1, 8 @@ -5091,17 +5115,17 @@ constants: # @constants movfr2gr.s $a3, $fa0 b .LBB2_165 .LBB2_134: - addi.w $a0, $zero, -1530 - move $a1, $a0 - lu32i.d $a1, 0 - st.w $a1, $s2, 0 - pcalau12i $a1, %got_pc_hi20(ppenalty_OP) - ld.d $a3, $a1, %got_pc_lo12(ppenalty_OP) - ld.w $a1, $a3, 0 - bne $a1, $a2, .LBB2_22 -.LBB2_135: addi.w $a1, $zero, -1530 - move $a4, $a1 + move $a0, $a1 + lu32i.d $a0, 0 + st.w $a0, $s2, 0 + pcalau12i $a0, %got_pc_hi20(ppenalty_OP) + ld.d $a3, $a0, %got_pc_lo12(ppenalty_OP) + ld.w $a0, $a3, 0 + bne $a0, $a2, .LBB2_22 +.LBB2_135: + addi.w $a0, $zero, -1530 + move $a4, $a0 lu32i.d $a4, 0 st.w $a4, $a3, 0 pcalau12i $a3, %got_pc_hi20(ppenalty_ex) @@ -5441,54 +5465,56 @@ constants: # @constants fmadd.d $fa0, $fa5, $fa6, $fa0 ld.d $a2, $fp, 72 fld.d $fa1, $a1, 64 - fld.d $fa2, $s5, 64 ld.d $a1, $fp, 80 + fld.d $fa2, $s5, 64 fld.d $fa3, $a2, 72 - ld.d $a2, $fp, 88 - fmadd.d $fa0, $fa1, $fa2, $fa0 - fld.d $fa1, $a1, 80 - ld.d $a1, $fp, 96 - fld.d $fa2, $a2, 88 - ld.d $a2, $fp, 104 fld.d $fa4, $s5, 72 - fld.d $fa5, $a1, 96 - ld.d $a1, $fp, 112 - fld.d $fa6, $a2, 104 - fld.d $fa7, $s5, 80 + fld.d $fa5, $a1, 80 + fld.d $fa6, $s5, 80 + fmadd.d $fa0, $fa1, $fa2, $fa0 + ld.d $a1, $fp, 88 fmadd.d $fa0, $fa3, $fa4, $fa0 - fld.d $fa3, $a1, 112 + fmadd.d $fa0, $fa5, $fa6, $fa0 + ld.d $a2, $fp, 96 + fld.d $fa1, $a1, 88 + ld.d $a1, $fp, 104 + fld.d $fa2, $s5, 88 + fld.d $fa3, $a2, 96 + ld.d $a2, $fp, 112 + fld.d $fa4, $a1, 104 ld.d $a1, $fp, 120 - fmadd.d $fa0, $fa1, $fa7, $fa0 - fld.d $fa1, $s5, 88 - fld.d $fa4, $s5, 96 + fld.d $fa5, $s5, 96 + fld.d $fa6, $a2, 112 + ld.d $a2, $fp, 128 fld.d $fa7, $a1, 120 - ld.d $a1, $fp, 128 - fmadd.d $fa0, $fa2, $fa1, $fa0 - fld.d $fa1, $s5, 104 - fmadd.d $fa0, $fa5, $fa4, $fa0 - fld.d $fa2, $a1, 128 + fmadd.d $fa0, $fa1, $fa2, $fa0 + fmadd.d $fa0, $fa3, $fa5, $fa0 + fld.d $fa1, $a2, 128 ld.d $a1, $fp, 136 - fmadd.d $fa0, $fa6, $fa1, $fa0 - fld.d $fa1, $s5, 112 - fld.d $fa4, $s5, 120 - fld.d $fa5, $a1, 136 + fld.d $fa2, $s5, 104 + fld.d $fa3, $s5, 112 + fld.d $fa5, $s5, 120 + fld.d $ft0, $a1, 136 ld.d $a1, $fp, 144 - fmadd.d $fa0, $fa3, $fa1, $fa0 - fld.d $fa1, $s5, 128 - fmadd.d $fa0, $fa7, $fa4, $fa0 - fld.d $fa3, $a1, 144 + fmadd.d $fa0, $fa4, $fa2, $fa0 + fmadd.d $fa0, $fa6, $fa3, $fa0 + fmadd.d $fa0, $fa7, $fa5, $fa0 + fld.d $fa2, $a1, 144 ld.d $a1, $fp, 152 - fmadd.d $fa0, $fa2, $fa1, $fa0 - fld.d $fa1, $s5, 136 - fld.d $fa2, $s5, 144 - fld.d $fa4, $a1, 152 - fld.d $fa6, $s5, 152 - pcalau12i $a1, %pc_hi20(.LCPI2_5) - fld.d $fa7, $a1, %pc_lo12(.LCPI2_5) - fmadd.d $fa0, $fa5, $fa1, $fa0 - fmadd.d $fa0, $fa3, $fa2, $fa0 - fmadd.d $fa0, $fa4, $fa6, $fa0 - fdiv.d $fa0, $fa7, $fa0 + fld.d $fa3, $s5, 128 + fld.d $fa4, $s5, 136 + fld.d $fa5, $s5, 144 + fld.d $fa6, $a1, 152 + fld.d $fa7, $s5, 152 + fmadd.d $fa0, $fa1, $fa3, $fa0 + fmadd.d $fa0, $ft0, $fa4, $fa0 + fmadd.d $fa0, $fa2, $fa5, $fa0 + fmadd.d $fa0, $fa6, $fa7, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, 180224 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa1, $a1 + fdiv.d $fa0, $fa1, $fa0 vreplvei.d $vr0, $vr0, 0 ori $a1, $zero, 160 .p2align 4, , 16 @@ -6054,17 +6080,17 @@ constants: # @constants jirl $ra, $ra, 0 b .LBB2_536 .LBB2_159: - addi.w $a0, $zero, -1530 - move $a3, $a0 + addi.w $a1, $zero, -1530 + move $a3, $a1 lu32i.d $a3, 0 - st.w $a3, $a1, 0 - pcalau12i $a1, %got_pc_hi20(ppenalty_OP) - ld.d $a3, $a1, %got_pc_lo12(ppenalty_OP) - ld.w $a1, $a3, 0 - bne $a1, $a2, .LBB2_34 + st.w $a3, $a0, 0 + pcalau12i $a0, %got_pc_hi20(ppenalty_OP) + ld.d $a3, $a0, %got_pc_lo12(ppenalty_OP) + ld.w $a0, $a3, 0 + bne $a0, $a2, .LBB2_34 .LBB2_160: - addi.w $a1, $zero, -1530 - move $a4, $a1 + addi.w $a0, $zero, -1530 + move $a4, $a0 lu32i.d $a4, 0 st.w $a4, $a3, 0 pcalau12i $a3, %got_pc_hi20(ppenalty_ex) @@ -7655,18 +7681,20 @@ constants: # @constants fld.d $fa1, $s7, 0 fld.d $fa2, $fp, 0 movgr2fr.d $fa0, $zero - fmadd.d $fa2, $fa1, $fa2, $fa0 fld.d $fa6, $s7, 40 fld.d $fa3, $fp, 8 fld.d $ft4, $s7, 80 fld.d $fa4, $fp, 16 fld.d $ft2, $s7, 120 fld.d $fa5, $fp, 24 - pcalau12i $a0, %pc_hi20(.LCPI2_5) - fld.d $ft1, $a0, %pc_lo12(.LCPI2_5) + fmadd.d $fa2, $fa1, $fa2, $fa0 fmadd.d $fa2, $fa6, $fa3, $fa2 fmadd.d $fa2, $ft4, $fa4, $fa2 fmadd.d $fa2, $ft2, $fa5, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, 180224 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $ft1, $a0 fdiv.d $ft3, $ft1, $fa2 fld.d $fa2, $s7, 8 fmul.d $fa1, $ft3, $fa1 @@ -11364,12 +11392,7 @@ constants: # @constants .Lfunc_end2: .size constants, .Lfunc_end2-constants # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function calcfreq_nuc -.LCPI3_0: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 - .text - .p2align 5 + .p2align 5 # -- Begin function calcfreq_nuc .type calcfreq_nuc,@function calcfreq_nuc: # @calcfreq_nuc # %bb.0: # %.preheader44 @@ -11387,7 +11410,7 @@ calcfreq_nuc: # @calcfreq_nuc vrepli.b $vr0, 0 vst $vr0, $a2, 16 vst $vr0, $a2, 0 - pcalau12i $s3, %pc_hi20(.LCPI3_0) + lu12i.w $s3, -85564 blez $a0, .LBB3_9 # %bb.1: # %.lr.ph50.preheader move $s0, $a1 @@ -11442,14 +11465,23 @@ calcfreq_nuc: # @calcfreq_nuc b .LBB3_5 .LBB3_8: # %.preheader43 fld.d $fa0, $fp, 0 - fld.d $fa1, $s3, %pc_lo12(.LCPI3_0) + ori $a0, $s3, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB3_10 .LBB3_9: # %.preheader43.thread - fld.d $fa0, $s3, %pc_lo12(.LCPI3_0) + ori $a0, $s3, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa0, $a0 .LBB3_10: # %.preheader42.preheader fld.d $fa1, $fp, 8 - fld.d $fa2, $s3, %pc_lo12(.LCPI3_0) + ori $a0, $s3, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 fld.d $fa3, $fp, 16 fcmp.clt.d $fcc0, $fa1, $fa2 fld.d $fa4, $fp, 24 @@ -11485,12 +11517,7 @@ calcfreq_nuc: # @calcfreq_nuc .Lfunc_end3: .size calcfreq_nuc, .Lfunc_end3-calcfreq_nuc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function calcfreq -.LCPI4_0: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 - .text - .p2align 5 + .p2align 5 # -- Begin function calcfreq .type calcfreq,@function calcfreq: # @calcfreq # %bb.0: # %.preheader48 @@ -11504,7 +11531,6 @@ calcfreq: # @calcfreq st.d $s4, $sp, 56 # 8-byte Folded Spill st.d $s5, $sp, 48 # 8-byte Folded Spill st.d $s6, $sp, 40 # 8-byte Folded Spill - st.d $s7, $sp, 32 # 8-byte Folded Spill move $fp, $a2 move $s0, $a1 move $s1, $a0 @@ -11514,23 +11540,22 @@ calcfreq: # @calcfreq pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 lu12i.w $s3, -85564 - pcalau12i $s4, %pc_hi20(.LCPI4_0) - blez $s1, .LBB4_30 + blez $s1, .LBB4_9 # %bb.1: # %.lr.ph54.preheader pcalau12i $a0, %got_pc_hi20(amino_n) - ld.d $s5, $a0, %got_pc_lo12(amino_n) - move $s6, $zero - ori $s7, $zero, 19 + ld.d $s4, $a0, %got_pc_lo12(amino_n) + move $s5, $zero + ori $s6, $zero, 19 b .LBB4_3 .p2align 4, , 16 .LBB4_2: # %._crit_edge # in Loop: Header=BB4_3 Depth=1 - addi.d $s6, $s6, 1 - beq $s6, $s1, .LBB4_8 + addi.d $s5, $s5, 1 + beq $s5, $s1, .LBB4_8 .LBB4_3: # %.lr.ph54 # =>This Loop Header: Depth=1 # Child Loop BB4_6 Depth 2 - slli.d $a0, $s6, 3 + slli.d $a0, $s5, 3 ldx.d $s2, $s0, $a0 move $a0, $s2 pcaddu18i $ra, %call36(strlen) @@ -11552,8 +11577,8 @@ calcfreq: # @calcfreq # => This Inner Loop Header: Depth=2 ld.b $a1, $s2, 0 slli.d $a1, $a1, 2 - ldx.w $a1, $s5, $a1 - bltu $s7, $a1, .LBB4_5 + ldx.w $a1, $s4, $a1 + bltu $s6, $a1, .LBB4_5 # %bb.7: # in Loop: Header=BB4_6 Depth=2 slli.d $a1, $a1, 3 fldx.d $fa0, $fp, $a1 @@ -11562,92 +11587,100 @@ calcfreq: # @calcfreq b .LBB4_5 .LBB4_8: # %.preheader47 fld.d $fa0, $fp, 0 - fld.d $fa1, $s4, %pc_lo12(.LCPI4_0) + ori $a0, $s3, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_30 -# %bb.9: + bceqz $fcc0, .LBB4_10 +.LBB4_9: # %.preheader47.thread + ori $a0, $s3, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + st.d $a0, $fp, 0 +.LBB4_10: fld.d $fa1, $fp, 8 - fld.d $fa0, $s4, %pc_lo12(.LCPI4_0) + ori $a0, $s3, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa0, $a0 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_31 -.LBB4_10: +# %bb.11: fld.d $fa1, $fp, 16 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_32 -.LBB4_11: +.LBB4_12: fld.d $fa1, $fp, 24 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_33 -.LBB4_12: +.LBB4_13: fld.d $fa1, $fp, 32 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_34 -.LBB4_13: +.LBB4_14: fld.d $fa1, $fp, 40 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_35 -.LBB4_14: +.LBB4_15: fld.d $fa1, $fp, 48 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_36 -.LBB4_15: +.LBB4_16: fld.d $fa1, $fp, 56 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_37 -.LBB4_16: +.LBB4_17: fld.d $fa1, $fp, 64 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_38 -.LBB4_17: +.LBB4_18: fld.d $fa1, $fp, 72 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_39 -.LBB4_18: +.LBB4_19: fld.d $fa1, $fp, 80 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_40 -.LBB4_19: +.LBB4_20: fld.d $fa1, $fp, 88 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_41 -.LBB4_20: +.LBB4_21: fld.d $fa1, $fp, 96 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_42 -.LBB4_21: +.LBB4_22: fld.d $fa1, $fp, 104 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_43 -.LBB4_22: +.LBB4_23: fld.d $fa1, $fp, 112 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_44 -.LBB4_23: +.LBB4_24: fld.d $fa1, $fp, 120 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_45 -.LBB4_24: +.LBB4_25: fld.d $fa1, $fp, 128 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_46 -.LBB4_25: +.LBB4_26: fld.d $fa1, $fp, 136 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_47 -.LBB4_26: +.LBB4_27: fld.d $fa1, $fp, 144 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_48 -.LBB4_27: +.LBB4_28: fld.d $fa1, $fp, 152 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_29 -.LBB4_28: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 + bcnez $fcc0, .LBB4_30 +.LBB4_29: st.d $a0, $fp, 152 -.LBB4_29: # %.preheader.preheader +.LBB4_30: # %.preheader.preheader pcalau12i $a0, %got_pc_hi20(stderr) ld.d $s1, $a0, %got_pc_lo12(stderr) ld.d $a3, $s1, 0 @@ -11839,7 +11872,6 @@ calcfreq: # @calcfreq vfdiv.d $vr1, $vr3, $vr2 vst $vr1, $fp, 128 vst $vr0, $fp, 144 - ld.d $s7, $sp, 32 # 8-byte Folded Reload ld.d $s6, $sp, 40 # 8-byte Folded Reload ld.d $s5, $sp, 48 # 8-byte Folded Reload ld.d $s4, $sp, 56 # 8-byte Folded Reload @@ -11851,160 +11883,97 @@ calcfreq: # @calcfreq ld.d $ra, $sp, 104 # 8-byte Folded Reload addi.d $sp, $sp, 112 ret -.LBB4_30: # %.preheader47.thread - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 - st.d $a0, $fp, 0 - fld.d $fa1, $fp, 8 - fld.d $fa0, $s4, %pc_lo12(.LCPI4_0) - fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_10 .LBB4_31: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 8 fld.d $fa1, $fp, 16 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_11 + bcnez $fcc0, .LBB4_12 .LBB4_32: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 16 fld.d $fa1, $fp, 24 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_12 + bcnez $fcc0, .LBB4_13 .LBB4_33: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 24 fld.d $fa1, $fp, 32 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_13 + bcnez $fcc0, .LBB4_14 .LBB4_34: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 32 fld.d $fa1, $fp, 40 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_14 + bcnez $fcc0, .LBB4_15 .LBB4_35: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 40 fld.d $fa1, $fp, 48 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_15 + bcnez $fcc0, .LBB4_16 .LBB4_36: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 48 fld.d $fa1, $fp, 56 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_16 + bcnez $fcc0, .LBB4_17 .LBB4_37: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 56 fld.d $fa1, $fp, 64 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_17 + bcnez $fcc0, .LBB4_18 .LBB4_38: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 64 fld.d $fa1, $fp, 72 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_18 + bcnez $fcc0, .LBB4_19 .LBB4_39: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 72 fld.d $fa1, $fp, 80 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_19 + bcnez $fcc0, .LBB4_20 .LBB4_40: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 80 fld.d $fa1, $fp, 88 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_20 + bcnez $fcc0, .LBB4_21 .LBB4_41: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 88 fld.d $fa1, $fp, 96 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_21 + bcnez $fcc0, .LBB4_22 .LBB4_42: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 96 fld.d $fa1, $fp, 104 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_22 + bcnez $fcc0, .LBB4_23 .LBB4_43: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 104 fld.d $fa1, $fp, 112 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_23 + bcnez $fcc0, .LBB4_24 .LBB4_44: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 112 fld.d $fa1, $fp, 120 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_24 + bcnez $fcc0, .LBB4_25 .LBB4_45: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 120 fld.d $fa1, $fp, 128 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_25 + bcnez $fcc0, .LBB4_26 .LBB4_46: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 128 fld.d $fa1, $fp, 136 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_26 + bcnez $fcc0, .LBB4_27 .LBB4_47: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 136 fld.d $fa1, $fp, 144 fcmp.cule.d $fcc0, $fa0, $fa1 - bcnez $fcc0, .LBB4_27 + bcnez $fcc0, .LBB4_28 .LBB4_48: - ori $a0, $s3, 813 - lu32i.d $a0, -379166 - lu52i.d $a0, $a0, 1009 st.d $a0, $fp, 144 fld.d $fa1, $fp, 152 fcmp.cule.d $fcc0, $fa0, $fa1 - bceqz $fcc0, .LBB4_28 - b .LBB4_29 + bceqz $fcc0, .LBB4_29 + b .LBB4_30 .Lfunc_end4: .size calcfreq, .Lfunc_end4-calcfreq # -- End function diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/fft.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/fft.s index 1d5ade94..f97cd26c 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/fft.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/fft.s @@ -1,10 +1,6 @@ .file "fft.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fft -.LCPI0_0: - .dword 0x400921fb54442d18 # double 3.1415926535897931 .text - .globl fft + .globl fft # -- Begin function fft .p2align 5 .type fft,@function fft: # @fft @@ -54,11 +50,14 @@ fft: # @fft move $s1, $a0 beqz $a0, .LBB0_8 # %bb.4: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) - movgr2fr.d $fa1, $s4 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa0, $fa1 + movgr2fr.d $fa0, $s4 + ffint.d.l $fa0, $fa0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa1, $fa0 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 fadd.d $fa1, $fa0, $fa0 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/fftFunctions.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/fftFunctions.s index e37b4b14..d8586f17 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/fftFunctions.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/fftFunctions.s @@ -324,12 +324,7 @@ FreeFukusosuuMtx: # @FreeFukusosuuMtx .Lfunc_end7: .size FreeFukusosuuMtx, .Lfunc_end7-FreeFukusosuuMtx # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function getKouho -.LCPI8_0: - .dword 0xc0c387f333333333 # double -9999.8999999999996 - .text - .globl getKouho + .globl getKouho # -- Begin function getKouho .p2align 5 .type getKouho,@function getKouho: # @getKouho @@ -344,12 +339,11 @@ getKouho: # @getKouho # %bb.2: # %.preheader.us.preheader move $a5, $zero move $a6, $zero - pcalau12i $a7, %pc_hi20(.LCPI8_0) - fld.d $fa0, $a7, %pc_lo12(.LCPI8_0) lu12i.w $a7, 209715 ori $a7, $a7, 819 lu32i.d $a7, 231411 lu52i.d $a7, $a7, -1012 + movgr2fr.d $fa0, $a7 .p2align 4, , 16 .LBB8_3: # %.preheader.us # =>This Loop Header: Depth=1 @@ -710,14 +704,7 @@ zurasu: # @zurasu .Lfunc_end10: .size zurasu, .Lfunc_end10-zurasu # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function alignableReagion -.LCPI11_0: - .dword 0x4059000000000000 # double 100 -.LCPI11_1: - .dword 0x4082c00000000000 # double 600 - .text - .globl alignableReagion + .globl alignableReagion # -- Begin function alignableReagion .p2align 5 .type alignableReagion,@function alignableReagion: # @alignableReagion @@ -779,13 +766,17 @@ alignableReagion: # @alignableReagion pcalau12i $a1, %got_pc_hi20(fftThreshold) ld.d $a1, $a1, %got_pc_lo12(fftThreshold) ld.w $a1, $a1, 0 - pcalau12i $a2, %pc_hi20(.LCPI11_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI11_0) - pcalau12i $a2, %pc_hi20(.LCPI11_1) - fld.d $fa1, $a2, %pc_lo12(.LCPI11_1) - movgr2fr.w $fa2, $a1 - ffint.d.w $fa2, $fa2 - fdiv.d $fa0, $fa2, $fa0 + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 + ori $a1, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -458752 + lu52i.d $a2, $a2, 1029 + movgr2fr.d $fa1, $a2 + fdiv.d $fa0, $fa0, $fa1 + lu32i.d $a1, 180224 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa1, $a1 fmul.d $fa0, $fa0, $fa1 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 @@ -1174,12 +1165,7 @@ alignableReagion: # @alignableReagion .Lfunc_end11: .size alignableReagion, .Lfunc_end11-alignableReagion # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function blockAlign -.LCPI12_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .globl blockAlign + .globl blockAlign # -- Begin function blockAlign .p2align 5 .type blockAlign,@function blockAlign: # @blockAlign @@ -1308,8 +1294,9 @@ blockAlign: # @blockAlign ld.d $a1, $s7, %pc_lo12(blockAlign.track) ori $a2, $zero, 1 lu52i.d $a3, $zero, 1107 - pcalau12i $a4, %pc_hi20(.LCPI12_0) - fld.d $fa0, $a4, %pc_lo12(.LCPI12_0) + lu12i.w $a4, 256 + lu52i.d $a4, $a4, 1107 + movgr2fr.d $fa0, $a4 lu12i.w $a4, 275200 addi.d $a5, $s3, 8 addi.d $a6, $s4, 8 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/genGalign11.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/genGalign11.s index 5401d806..25f43331 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/genGalign11.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/genGalign11.s @@ -1,51 +1,41 @@ .file "genGalign11.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function genG__align11 -.LCPI0_0: - .dword 0x3ff4cccccccccccd # double 1.3 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI0_1: - .word 0xcb189680 # float -1.0E+7 .text - .globl genG__align11 + .globl genG__align11 # -- Begin function genG__align11 .p2align 5 .type genG__align11,@function genG__align11: # @genG__align11 # %bb.0: - addi.d $sp, $sp, -288 - st.d $ra, $sp, 280 # 8-byte Folded Spill - st.d $fp, $sp, 272 # 8-byte Folded Spill - st.d $s0, $sp, 264 # 8-byte Folded Spill - st.d $s1, $sp, 256 # 8-byte Folded Spill - st.d $s2, $sp, 248 # 8-byte Folded Spill - st.d $s3, $sp, 240 # 8-byte Folded Spill - st.d $s4, $sp, 232 # 8-byte Folded Spill - st.d $s5, $sp, 224 # 8-byte Folded Spill - st.d $s6, $sp, 216 # 8-byte Folded Spill - st.d $s7, $sp, 208 # 8-byte Folded Spill - st.d $s8, $sp, 200 # 8-byte Folded Spill - fst.d $fs0, $sp, 192 # 8-byte Folded Spill - st.d $a2, $sp, 80 # 8-byte Folded Spill + addi.d $sp, $sp, -304 + st.d $ra, $sp, 296 # 8-byte Folded Spill + st.d $fp, $sp, 288 # 8-byte Folded Spill + st.d $s0, $sp, 280 # 8-byte Folded Spill + st.d $s1, $sp, 272 # 8-byte Folded Spill + st.d $s2, $sp, 264 # 8-byte Folded Spill + st.d $s3, $sp, 256 # 8-byte Folded Spill + st.d $s4, $sp, 248 # 8-byte Folded Spill + st.d $s5, $sp, 240 # 8-byte Folded Spill + st.d $s6, $sp, 232 # 8-byte Folded Spill + st.d $s7, $sp, 224 # 8-byte Folded Spill + st.d $s8, $sp, 216 # 8-byte Folded Spill + fst.d $fs0, $sp, 208 # 8-byte Folded Spill + st.d $a2, $sp, 88 # 8-byte Folded Spill move $fp, $a1 move $s0, $a0 pcalau12i $a0, %got_pc_hi20(penalty) ld.d $a0, $a0, %got_pc_lo12(penalty) - ld.w $a0, $a0, 0 - st.d $a0, $sp, 56 # 8-byte Folded Spill + ld.w $s5, $a0, 0 pcalau12i $a0, %got_pc_hi20(penalty_OP) ld.d $a0, $a0, %got_pc_lo12(penalty_OP) - ld.w $s8, $a0, 0 + ld.w $s2, $a0, 0 pcalau12i $a0, %got_pc_hi20(penalty_ex) ld.d $a0, $a0, %got_pc_lo12(penalty_ex) - pcalau12i $s6, %pc_hi20(genG__align11.orlgth1) - ld.w $a1, $s6, %pc_lo12(genG__align11.orlgth1) - ld.w $a0, $a0, 0 - st.d $a0, $sp, 48 # 8-byte Folded Spill + pcalau12i $s8, %pc_hi20(genG__align11.orlgth1) + ld.w $a1, $s8, %pc_lo12(genG__align11.orlgth1) + ld.w $s4, $a0, 0 pcalau12i $a0, %pc_hi20(genG__align11.mseq1) - st.d $a0, $sp, 88 # 8-byte Folded Spill + st.d $a0, $sp, 104 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(genG__align11.mseq2) - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 112 # 8-byte Folded Spill bnez $a1, .LBB0_2 # %bb.1: pcalau12i $a0, %got_pc_hi20(njob) @@ -55,164 +45,170 @@ genG__align11: # @genG__align11 pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 ld.w $a1, $s1, 0 - ld.d $a2, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $sp, 104 # 8-byte Folded Reload st.d $a0, $a2, %pc_lo12(genG__align11.mseq1) move $a0, $a1 move $a1, $zero pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 96 # 8-byte Folded Reload + ld.d $a1, $sp, 112 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(genG__align11.mseq2) .LBB0_2: ld.d $a0, $s0, 0 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 ld.d $a1, $fp, 0 - move $s2, $a0 - addi.w $s4, $a0, 0 + st.d $a0, $sp, 96 # 8-byte Folded Spill + addi.w $s1, $a0, 0 move $a0, $a1 pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - st.d $a0, $sp, 168 # 8-byte Folded Spill + move $t0, $s1 + st.d $a0, $sp, 184 # 8-byte Folded Spill addi.w $s3, $a0, 0 - blez $s4, .LBB0_97 + st.d $s1, $sp, 168 # 8-byte Folded Spill + blez $s1, .LBB0_97 # %bb.3: blez $s3, .LBB0_97 .LBB0_4: - ld.w $s7, $s6, %pc_lo12(genG__align11.orlgth1) - pcalau12i $a0, %pc_hi20(genG__align11.orlgth2) - st.d $a0, $sp, 152 # 8-byte Folded Spill - ld.w $s1, $a0, %pc_lo12(genG__align11.orlgth2) + ld.w $s7, $s8, %pc_lo12(genG__align11.orlgth1) + pcalau12i $s6, %pc_hi20(genG__align11.orlgth2) + ld.w $s1, $s6, %pc_lo12(genG__align11.orlgth2) pcalau12i $a0, %pc_hi20(genG__align11.w1) - st.d $a0, $sp, 184 # 8-byte Folded Spill + st.d $a0, $sp, 200 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(genG__align11.w2) - st.d $a0, $sp, 176 # 8-byte Folded Spill + st.d $a0, $sp, 192 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(genG__align11.initverticalw) - st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $a0, $sp, 176 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(genG__align11.lastverticalw) - st.d $a0, $sp, 104 # 8-byte Folded Spill + st.d $a0, $sp, 120 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(genG__align11.m) - st.d $a0, $sp, 128 # 8-byte Folded Spill + st.d $a0, $sp, 144 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(genG__align11.mp) - st.d $a0, $sp, 136 # 8-byte Folded Spill + st.d $a0, $sp, 152 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(genG__align11.largeM) - st.d $a0, $sp, 112 # 8-byte Folded Spill + st.d $a0, $sp, 128 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(genG__align11.Mp) - st.d $a0, $sp, 120 # 8-byte Folded Spill - pcalau12i $s5, %pc_hi20(genG__align11.mseq) - st.d $s8, $sp, 72 # 8-byte Folded Spill - st.d $s2, $sp, 64 # 8-byte Folded Spill - blt $s7, $s4, .LBB0_6 + st.d $a0, $sp, 136 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(genG__align11.mseq) + st.d $s2, $sp, 72 # 8-byte Folded Spill + st.d $s4, $sp, 64 # 8-byte Folded Spill + st.d $s5, $sp, 80 # 8-byte Folded Spill + blt $s7, $t0, .LBB0_6 # %bb.5: bge $s1, $s3, .LBB0_10 .LBB0_6: - st.d $s6, $sp, 144 # 8-byte Folded Spill + st.d $a0, $sp, 56 # 8-byte Folded Spill + st.d $s6, $sp, 160 # 8-byte Folded Spill pcalau12i $s6, %pc_hi20(genG__align11.match) pcalau12i $a0, %pc_hi20(genG__align11.cpmx1) - st.d $a0, $sp, 16 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(genG__align11.cpmx2) st.d $a0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(genG__align11.floatwork) + pcalau12i $a0, %pc_hi20(genG__align11.cpmx2) st.d $a0, $sp, 32 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(genG__align11.intwork) + pcalau12i $a0, %pc_hi20(genG__align11.floatwork) st.d $a0, $sp, 40 # 8-byte Folded Spill - st.d $s4, $sp, 8 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(genG__align11.intwork) + st.d $a0, $sp, 48 # 8-byte Folded Spill blez $s7, .LBB0_9 # %bb.7: blez $s1, .LBB0_9 # %bb.8: - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(genG__align11.w1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(genG__align11.w2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a0, $s6, %pc_lo12(genG__align11.match) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $a0, $sp, 176 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(genG__align11.initverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(genG__align11.lastverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(genG__align11.m) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(genG__align11.mp) pcaddu18i $ra, %call36(FreeIntVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(genG__align11.largeM) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(genG__align11.Mp) pcaddu18i $ra, %call36(FreeIntVec) jirl $ra, $ra, 0 - ld.d $a0, $s5, %pc_lo12(genG__align11.mseq) + ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $a0, %pc_lo12(genG__align11.mseq) pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 16 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(genG__align11.cpmx1) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(genG__align11.cpmx2) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 40 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(genG__align11.floatwork) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 48 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(genG__align11.intwork) pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 144 # 8-byte Folded Reload - ld.w $s7, $a0, %pc_lo12(genG__align11.orlgth1) - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.w $s7, $s8, %pc_lo12(genG__align11.orlgth1) + ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.w $s1, $a0, %pc_lo12(genG__align11.orlgth2) .LBB0_9: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) - movgr2fr.w $fa1, $s2 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + ld.d $a0, $sp, 96 # 8-byte Folded Reload + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, 314572 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 slt $a1, $a0, $s7 masknez $a0, $a0, $a1 maskeqz $a1, $s7, $a1 or $s7, $a1, $a0 addi.w $s2, $s7, 100 - ld.d $a0, $sp, 168 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + ld.d $a0, $sp, 184 # 8-byte Folded Reload + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 slt $a1, $a0, $s1 masknez $a0, $a0, $a1 maskeqz $a1, $s1, $a1 or $s1, $a1, $a0 - addi.w $s8, $s1, 100 + addi.w $s5, $s1, 100 addi.w $s4, $s1, 102 move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 184 # 8-byte Folded Reload + ld.d $a1, $sp, 200 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(genG__align11.w1) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 176 # 8-byte Folded Reload + ld.d $a1, $sp, 192 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(genG__align11.w2) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -222,54 +218,56 @@ genG__align11: # @genG__align11 move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 176 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(genG__align11.initverticalw) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(genG__align11.lastverticalw) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 128 # 8-byte Folded Reload + ld.d $a1, $sp, 144 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(genG__align11.m) move $a0, $s4 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 136 # 8-byte Folded Reload + ld.d $a1, $sp, 152 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(genG__align11.mp) move $a0, $s4 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(genG__align11.largeM) move $a0, $s4 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 120 # 8-byte Folded Reload + ld.d $a1, $sp, 136 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(genG__align11.Mp) pcalau12i $a0, %got_pc_hi20(njob) ld.d $a0, $a0, %got_pc_lo12(njob) ld.w $a0, $a0, 0 - add.w $a1, $s8, $s2 + add.w $a1, $s5, $s2 pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - st.d $a0, $s5, %pc_lo12(genG__align11.mseq) + st.d $s8, $sp, 16 # 8-byte Folded Spill + ld.d $s8, $sp, 56 # 8-byte Folded Reload + st.d $a0, $s8, %pc_lo12(genG__align11.mseq) ori $a0, $zero, 26 move $a1, $s6 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 24 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(genG__align11.cpmx1) ori $a0, $zero, 26 move $a1, $s4 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 24 # 8-byte Folded Reload + ld.d $a1, $sp, 32 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(genG__align11.cpmx2) - slt $a0, $s8, $s2 - masknez $a1, $s8, $a0 + slt $a0, $s5, $s2 + masknez $a1, $s5, $a0 maskeqz $a0, $s2, $a0 or $a0, $a0, $a1 addi.w $s4, $a0, 2 @@ -277,28 +275,28 @@ genG__align11: # @genG__align11 move $a1, $s4 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ld.d $a1, $sp, 40 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(genG__align11.floatwork) ori $a0, $zero, 26 move $a1, $s4 pcaddu18i $ra, %call36(AllocateIntMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a1, $sp, 48 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(genG__align11.intwork) - ld.d $s6, $sp, 144 # 8-byte Folded Reload - st.w $s7, $s6, %pc_lo12(genG__align11.orlgth1) - ld.d $a0, $sp, 152 # 8-byte Folded Reload - st.w $s1, $a0, %pc_lo12(genG__align11.orlgth2) - ld.d $s8, $sp, 72 # 8-byte Folded Reload - ld.d $s4, $sp, 8 # 8-byte Folded Reload + move $a0, $s8 + ld.d $s8, $sp, 16 # 8-byte Folded Reload + st.w $s7, $s8, %pc_lo12(genG__align11.orlgth1) + ld.d $s6, $sp, 160 # 8-byte Folded Reload + st.w $s1, $s6, %pc_lo12(genG__align11.orlgth2) + ld.d $t0, $sp, 168 # 8-byte Folded Reload .LBB0_10: - ld.d $a0, $s5, %pc_lo12(genG__align11.mseq) + ld.d $a0, $a0, %pc_lo12(genG__align11.mseq) ld.d $a1, $a0, 0 - ld.d $a2, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $sp, 104 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(genG__align11.mseq1) st.d $a1, $a2, 0 ld.d $a0, $a0, 8 - ld.d $a1, $sp, 96 # 8-byte Folded Reload + ld.d $a1, $sp, 112 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(genG__align11.mseq2) st.d $a0, $a1, 0 pcalau12i $a0, %got_pc_hi20(commonAlloc1) @@ -316,7 +314,6 @@ genG__align11: # @genG__align11 ld.d $s6, $a0, 0 b .LBB0_17 .LBB0_13: - move $s8, $s4 beqz $a0, .LBB0_16 # %bb.14: beqz $a1, .LBB0_16 @@ -331,10 +328,9 @@ genG__align11: # @genG__align11 ld.d $a0, $a0, 0 pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 - ld.w $s7, $s6, %pc_lo12(genG__align11.orlgth1) + ld.w $s7, $s8, %pc_lo12(genG__align11.orlgth1) ld.w $a0, $s2, 0 - ld.d $a1, $sp, 152 # 8-byte Folded Reload - ld.w $s1, $a1, %pc_lo12(genG__align11.orlgth2) + ld.w $s1, $s6, %pc_lo12(genG__align11.orlgth2) ld.w $a1, $s5, 0 .LBB0_16: slt $a2, $a0, $s7 @@ -364,40 +360,39 @@ genG__align11: # @genG__align11 st.d $s6, $a0, 0 st.w $s7, $s2, 0 st.w $s1, $s5, 0 - move $s4, $s8 - ld.d $s8, $sp, 72 # 8-byte Folded Reload + ld.d $t0, $sp, 168 # 8-byte Folded Reload .LBB0_17: pcalau12i $a0, %got_pc_hi20(commonIP) ld.d $a0, $a0, %got_pc_lo12(commonIP) ld.d $s5, $a0, 0 pcalau12i $a0, %pc_hi20(genG__align11.ijpi) - ld.d $a1, $sp, 184 # 8-byte Folded Reload + ld.d $a1, $sp, 200 # 8-byte Folded Reload ld.d $a4, $a1, %pc_lo12(genG__align11.w1) - ld.d $a1, $sp, 176 # 8-byte Folded Reload + ld.d $a1, $sp, 192 # 8-byte Folded Reload ld.d $a5, $a1, %pc_lo12(genG__align11.w2) - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 176 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(genG__align11.initverticalw) ld.d $a2, $fp, 0 - st.d $a2, $sp, 184 # 8-byte Folded Spill + st.d $a2, $sp, 200 # 8-byte Folded Spill ld.d $a2, $s0, 0 - st.d $a2, $sp, 176 # 8-byte Folded Spill + st.d $a2, $sp, 192 # 8-byte Folded Spill st.d $s5, $a0, %pc_lo12(genG__align11.ijpi) pcalau12i $a0, %pc_hi20(genG__align11.ijpj) st.d $s6, $a0, %pc_lo12(genG__align11.ijpj) pcalau12i $a0, %got_pc_hi20(amino_dis) ld.d $a0, $a0, %got_pc_lo12(amino_dis) - st.d $a0, $sp, 160 # 8-byte Folded Spill - ld.d $t7, $sp, 64 # 8-byte Folded Reload - beqz $s4, .LBB0_20 + st.d $a0, $sp, 176 # 8-byte Folded Spill + ld.d $s7, $sp, 96 # 8-byte Folded Reload + beqz $t0, .LBB0_20 # %bb.18: # %.lr.ph.i.preheader - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload ld.b $a0, $a0, 0 slli.d $a0, $a0, 9 - ld.d $a2, $sp, 160 # 8-byte Folded Reload + ld.d $a2, $sp, 176 # 8-byte Folded Reload add.d $a0, $a2, $a0 move $a2, $a1 - ld.d $a6, $sp, 176 # 8-byte Folded Reload - move $a3, $t7 + ld.d $a6, $sp, 192 # 8-byte Folded Reload + move $a3, $s7 .p2align 4, , 16 .LBB0_19: # %.lr.ph.i # =>This Inner Loop Header: Depth=1 @@ -413,18 +408,18 @@ genG__align11: # @genG__align11 move $a2, $a7 bnez $a3, .LBB0_19 .LBB0_20: # %match_calc.exit - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload movgr2fr.w $fa0, $a0 beqz $s3, .LBB0_23 # %bb.21: # %.lr.ph.i241.preheader - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload ld.b $a0, $a0, 0 slli.d $a0, $a0, 9 - ld.d $a2, $sp, 160 # 8-byte Folded Reload + ld.d $a2, $sp, 176 # 8-byte Folded Reload add.d $a0, $a2, $a0 move $a2, $a4 - ld.d $a6, $sp, 184 # 8-byte Folded Reload - ld.d $a3, $sp, 168 # 8-byte Folded Reload + ld.d $a6, $sp, 200 # 8-byte Folded Reload + ld.d $a3, $sp, 184 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_22: # %.lr.ph.i241 # =>This Inner Loop Header: Depth=1 @@ -445,14 +440,14 @@ genG__align11: # @genG__align11 ld.d $a7, $a0, %got_pc_lo12(outgap) ld.w $a2, $a7, 0 ori $a3, $zero, 1 - ld.d $a0, $sp, 168 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload slli.d $a6, $a0, 32 addi.d $a0, $a0, 1 bne $a2, $a3, .LBB0_38 # %bb.24: # %.preheader255 - blez $s4, .LBB0_31 + blez $t0, .LBB0_31 # %bb.25: # %.lr.ph.preheader - addi.d $a2, $t7, 1 + addi.d $a2, $s7, 1 bstrpick.d $a2, $a2, 31, 0 addi.d $a3, $a2, -1 ori $t1, $zero, 8 @@ -540,13 +535,13 @@ genG__align11: # @genG__align11 .LBB0_38: # %.loopexit blez $s3, .LBB0_42 .LBB0_39: # %.lr.ph263 - ld.d $a2, $sp, 128 # 8-byte Folded Reload + ld.d $a2, $sp, 144 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(genG__align11.m) - ld.d $a3, $sp, 136 # 8-byte Folded Reload + ld.d $a3, $sp, 152 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(genG__align11.mp) - ld.d $t0, $sp, 112 # 8-byte Folded Reload + ld.d $t0, $sp, 128 # 8-byte Folded Reload ld.d $t0, $t0, %pc_lo12(genG__align11.largeM) - ld.d $t1, $sp, 120 # 8-byte Folded Reload + ld.d $t1, $sp, 136 # 8-byte Folded Reload ld.d $t1, $t1, %pc_lo12(genG__align11.Mp) bstrpick.d $t2, $a0, 31, 0 addi.d $t4, $t2, -1 @@ -599,7 +594,6 @@ genG__align11: # @genG__align11 sub.d $t6, $t1, $a3 bltu $t6, $t5, .LBB0_40 # %bb.48: # %vector.ph392 - move $s7, $t7 move $t5, $t4 bstrins.d $t5, $zero, 2, 0 ori $t6, $zero, 1 @@ -633,7 +627,6 @@ genG__align11: # @genG__align11 addi.d $s2, $s2, 32 bnez $s4, .LBB0_49 # %bb.50: # %middle.block401 - move $t7, $s7 bne $t4, $t5, .LBB0_40 .LBB0_51: # %._crit_edge.thread ori $a2, $zero, 0 @@ -643,28 +636,29 @@ genG__align11: # @genG__align11 fldx.s $fa1, $a4, $a2 .LBB0_52: ld.w $a2, $a7, 0 - ld.d $a3, $sp, 104 # 8-byte Folded Reload + ld.d $a3, $sp, 120 # 8-byte Folded Reload ld.d $a7, $a3, %pc_lo12(genG__align11.lastverticalw) sltu $a2, $zero, $a2 - add.w $t0, $a2, $t7 + add.w $t0, $a2, $s7 ori $a2, $zero, 2 fst.s $fa1, $a7, 0 - st.d $s6, $sp, 152 # 8-byte Folded Spill - st.d $s5, $sp, 144 # 8-byte Folded Spill + st.d $s6, $sp, 168 # 8-byte Folded Spill + st.d $s5, $sp, 160 # 8-byte Folded Spill blt $t0, $a2, .LBB0_72 # %bb.53: # %.lr.ph296 - movgr2fr.w $fa1, $s8 - ld.d $a2, $sp, 48 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload + movgr2fr.w $fa1, $a2 + ld.d $a2, $sp, 64 # 8-byte Folded Reload movgr2fr.w $fa2, $a2 ffint.s.w $fa1, $fa1 ffint.s.w $fa2, $fa2 - ld.d $a2, $sp, 128 # 8-byte Folded Reload + ld.d $a2, $sp, 144 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(genG__align11.m) - ld.d $a3, $sp, 112 # 8-byte Folded Reload + ld.d $a3, $sp, 128 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(genG__align11.largeM) - ld.d $t1, $sp, 136 # 8-byte Folded Reload + ld.d $t1, $sp, 152 # 8-byte Folded Reload ld.d $t1, $t1, %pc_lo12(genG__align11.mp) - ld.d $t2, $sp, 120 # 8-byte Folded Reload + ld.d $t2, $sp, 136 # 8-byte Folded Reload ld.d $t2, $t2, %pc_lo12(genG__align11.Mp) ori $t3, $zero, 0 lu32i.d $t3, -1 @@ -672,19 +666,21 @@ genG__align11: # @genG__align11 srai.d $a6, $a6, 30 bstrpick.d $a0, $a0, 31, 0 addi.d $a2, $a2, 4 - st.d $a2, $sp, 136 # 8-byte Folded Spill + st.d $a2, $sp, 152 # 8-byte Folded Spill addi.d $a0, $a0, -1 - st.d $a0, $sp, 128 # 8-byte Folded Spill + st.d $a0, $sp, 144 # 8-byte Folded Spill addi.d $a0, $a3, 4 - st.d $a0, $sp, 120 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill addi.d $a0, $t1, 4 - st.d $a0, $sp, 112 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.s $fa3, $a0, %pc_lo12(.LCPI0_1) + st.d $a0, $sp, 128 # 8-byte Folded Spill addi.d $a0, $t2, 4 - st.d $a0, $sp, 104 # 8-byte Folded Spill + st.d $a0, $sp, 120 # 8-byte Folded Spill movgr2fr.w $fs0, $zero ori $t5, $zero, 1 + lu12i.w $a0, -216695 + ori $a0, $a0, 1664 + lu32i.d $a0, 0 + movgr2fr.w $fa3, $a0 b .LBB0_56 .p2align 4, , 16 .LBB0_54: # %match_calc.exit253.thread @@ -711,14 +707,14 @@ genG__align11: # @genG__align11 beqz $s3, .LBB0_54 # %bb.57: # %.lr.ph.i248.preheader # in Loop: Header=BB0_56 Depth=1 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload ldx.b $a0, $a0, $t5 slli.d $a0, $a0, 9 - ld.d $a2, $sp, 160 # 8-byte Folded Reload + ld.d $a2, $sp, 176 # 8-byte Folded Reload add.d $a0, $a2, $a0 move $a2, $t6 - ld.d $t1, $sp, 184 # 8-byte Folded Reload - ld.d $a3, $sp, 168 # 8-byte Folded Reload + ld.d $t1, $sp, 200 # 8-byte Folded Reload + ld.d $a3, $sp, 184 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_58: # %.lr.ph.i248 # Parent Loop BB0_56 Depth=1 @@ -743,9 +739,9 @@ genG__align11: # @genG__align11 # in Loop: Header=BB0_56 Depth=1 fld.s $fa4, $a5, 0 slli.d $a0, $t5, 3 - ld.d $a2, $sp, 144 # 8-byte Folded Reload + ld.d $a2, $sp, 160 # 8-byte Folded Reload ldx.d $a2, $a2, $a0 - ld.d $a3, $sp, 152 # 8-byte Folded Reload + ld.d $a3, $sp, 168 # 8-byte Folded Reload ldx.d $a0, $a3, $a0 move $t8, $zero move $s7, $zero @@ -755,11 +751,11 @@ genG__align11: # @genG__align11 addi.d $ra, $t6, 4 addi.d $s2, $a2, 4 addi.d $s6, $a0, 4 - ld.d $t4, $sp, 104 # 8-byte Folded Reload - ld.d $t3, $sp, 112 # 8-byte Folded Reload - ld.d $t2, $sp, 120 # 8-byte Folded Reload - ld.d $t1, $sp, 128 # 8-byte Folded Reload - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $t4, $sp, 120 # 8-byte Folded Reload + ld.d $t3, $sp, 128 # 8-byte Folded Reload + ld.d $t2, $sp, 136 # 8-byte Folded Reload + ld.d $t1, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload fmov.s $fa6, $fa3 move $a2, $a5 fmov.s $fa5, $fa4 @@ -866,20 +862,20 @@ genG__align11: # @genG__align11 .LBB0_72: movgr2fr.w $fs0, $zero .LBB0_73: # %._crit_edge297 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.d $s3, $a0, %pc_lo12(genG__align11.mseq1) - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload ld.d $s1, $a0, %pc_lo12(genG__align11.mseq2) - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 move $s2, $a0 addi.w $s4, $a0, 0 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload pcaddu18i $ra, %call36(strlen) jirl $ra, $ra, 0 - ld.d $t5, $sp, 152 # 8-byte Folded Reload - ld.d $t6, $sp, 144 # 8-byte Folded Reload + ld.d $t5, $sp, 168 # 8-byte Folded Reload + ld.d $t6, $sp, 160 # 8-byte Folded Reload bltz $s4, .LBB0_76 # %bb.74: # %.lr.ph.preheader.i addi.d $a1, $s2, 1 @@ -1053,7 +1049,7 @@ genG__align11: # @genG__align11 jirl $ra, $ra, 0 addi.w $a3, $a0, 0 lu12i.w $a4, 1220 - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 88 # 8-byte Folded Reload blt $a2, $a3, .LBB0_98 # %bb.95: # %genGtracking.exit ori $a0, $a4, 2881 @@ -1068,19 +1064,19 @@ genG__align11: # @genG__align11 pcaddu18i $ra, %call36(strcpy) jirl $ra, $ra, 0 fmov.s $fa0, $fs0 - fld.d $fs0, $sp, 192 # 8-byte Folded Reload - ld.d $s8, $sp, 200 # 8-byte Folded Reload - ld.d $s7, $sp, 208 # 8-byte Folded Reload - ld.d $s6, $sp, 216 # 8-byte Folded Reload - ld.d $s5, $sp, 224 # 8-byte Folded Reload - ld.d $s4, $sp, 232 # 8-byte Folded Reload - ld.d $s3, $sp, 240 # 8-byte Folded Reload - ld.d $s2, $sp, 248 # 8-byte Folded Reload - ld.d $s1, $sp, 256 # 8-byte Folded Reload - ld.d $s0, $sp, 264 # 8-byte Folded Reload - ld.d $fp, $sp, 272 # 8-byte Folded Reload - ld.d $ra, $sp, 280 # 8-byte Folded Reload - addi.d $sp, $sp, 288 + fld.d $fs0, $sp, 208 # 8-byte Folded Reload + ld.d $s8, $sp, 216 # 8-byte Folded Reload + ld.d $s7, $sp, 224 # 8-byte Folded Reload + ld.d $s6, $sp, 232 # 8-byte Folded Reload + ld.d $s5, $sp, 240 # 8-byte Folded Reload + ld.d $s4, $sp, 248 # 8-byte Folded Reload + ld.d $s3, $sp, 256 # 8-byte Folded Reload + ld.d $s2, $sp, 264 # 8-byte Folded Reload + ld.d $s1, $sp, 272 # 8-byte Folded Reload + ld.d $s0, $sp, 280 # 8-byte Folded Reload + ld.d $fp, $sp, 288 # 8-byte Folded Reload + ld.d $ra, $sp, 296 # 8-byte Folded Reload + addi.d $sp, $sp, 304 ret .LBB0_97: pcalau12i $a0, %got_pc_hi20(stderr) @@ -1088,10 +1084,11 @@ genG__align11: # @genG__align11 ld.d $a0, $a0, 0 pcalau12i $a1, %pc_hi20(.L.str) addi.d $a1, $a1, %pc_lo12(.L.str) - move $a2, $s4 + move $a2, $t0 move $a3, $s3 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 + ld.d $t0, $sp, 168 # 8-byte Folded Reload b .LBB0_4 .LBB0_98: pcalau12i $a0, %got_pc_hi20(stderr) @@ -1106,10 +1103,10 @@ genG__align11: # @genG__align11 addi.d $a0, $a0, %pc_lo12(.L.str.2) pcaddu18i $ra, %call36(ErrorExit) jirl $ra, $ra, 0 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(genG__align11.mseq1) ld.d $s2, $a0, 0 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload ld.d $s1, $a0, %pc_lo12(genG__align11.mseq2) b .LBB0_96 .Lfunc_end0: diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/genalign11.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/genalign11.s index cbd0fbca..f0a9633e 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/genalign11.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/genalign11.s @@ -1,16 +1,6 @@ .file "genalign11.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function genL__align11 -.LCPI0_0: - .dword 0x3ff4cccccccccccd # double 1.3 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI0_1: - .word 0xce6e6b28 # float -1.0E+9 -.LCPI0_2: - .word 0xc97423fe # float -999999.875 .text - .globl genL__align11 + .globl genL__align11 # -- Begin function genL__align11 .p2align 5 .type genL__align11,@function genL__align11: # @genL__align11 @@ -182,22 +172,25 @@ genL__align11: # @genL__align11 ld.d $a0, $sp, 184 # 8-byte Folded Reload ld.w $s3, $a0, %pc_lo12(genL__align11.orlgth2) .LBB0_7: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) - movgr2fr.w $fa1, $s5 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + movgr2fr.w $fa0, $s5 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, 314572 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 slt $a1, $a0, $s6 masknez $a0, $a0, $a1 maskeqz $a1, $s6, $a1 or $s6, $a1, $a0 addi.w $s4, $s6, 100 ld.d $a0, $sp, 200 # 8-byte Folded Reload - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 slt $a1, $a0, $s3 @@ -497,9 +490,11 @@ genL__align11: # @genL__align11 ld.d $a0, $sp, 112 # 8-byte Folded Reload ld.d $a4, $a0, %pc_lo12(genL__align11.lastverticalw) fst.s $fa0, $a4, 0 - addi.w $a0, $s5, 1 - add.w $s6, $a0, $a6 - pcalau12i $t2, %pc_hi20(.LCPI0_1) + addi.w $t2, $s5, 1 + add.w $s6, $t2, $a6 + lu12i.w $a0, -203034 + ori $a0, $a0, 2856 + lu32i.d $a0, 0 blez $fp, .LBB0_47 # %bb.27: # %.lr.ph313 move $s1, $zero @@ -526,12 +521,14 @@ genL__align11: # @genL__align11 ld.d $t0, $t0, %pc_lo12(genL__align11.mp) ld.d $t1, $sp, 128 # 8-byte Folded Reload ld.d $t1, $t1, %pc_lo12(genL__align11.Mp) - fld.s $fs0, $t2, %pc_lo12(.LCPI0_1) - pcalau12i $t2, %pc_hi20(.LCPI0_2) - fld.s $fa4, $t2, %pc_lo12(.LCPI0_2) - bstrpick.d $t2, $a0, 31, 0 + bstrpick.d $t2, $t2, 31, 0 ori $t3, $zero, 1 + movgr2fr.w $fs0, $a0 slli.d $t4, $a3, 2 + lu12i.w $a0, -223422 + ori $a0, $a0, 1022 + lu32i.d $a0, 0 + movgr2fr.w $fa4, $a0 b .LBB0_29 .p2align 4, , 16 .LBB0_28: # %._crit_edge297 @@ -709,9 +706,9 @@ genL__align11: # @genL__align11 fmov.s $ft2, $fa3 b .LBB0_34 .LBB0_47: - fld.s $fs0, $t2, %pc_lo12(.LCPI0_1) move $s8, $zero move $s1, $zero + movgr2fr.w $fs0, $a0 .LBB0_48: # %._crit_edge314 addi.w $s7, $s8, 0 slli.d $s3, $s7, 3 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/io.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/io.s index a81cc5ae..26c72780 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/io.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/io.s @@ -1,12 +1,6 @@ .file "io.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function putlocalhom3 -.LCPI0_0: - .dword 0x4017333333333333 # double 5.7999999999999998 -.LCPI0_1: - .dword 0x4082c00000000000 # double 600 .text - .globl putlocalhom3 + .globl putlocalhom3 # -- Begin function putlocalhom3 .p2align 5 .type putlocalhom3,@function putlocalhom3: # @putlocalhom3 @@ -88,15 +82,20 @@ putlocalhom3: # @putlocalhom3 pcalau12i $a0, %got_pc_hi20(divpairscore) ld.d $a0, $a0, %got_pc_lo12(divpairscore) st.d $a0, $sp, 32 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs3, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fs4, $a0, %pc_lo12(.LCPI0_1) move $s7, $zero move $a0, $zero st.d $zero, $sp, 24 # 8-byte Folded Spill move $s8, $zero move $a4, $zero + lu12i.w $a2, 209715 + ori $a2, $a2, 819 + lu32i.d $a2, 471859 + lu52i.d $a2, $a2, 1025 + movgr2fr.d $fs3, $a2 + ori $a2, $zero, 0 + lu32i.d $a2, 180224 + lu52i.d $a2, $a2, 1032 + movgr2fr.d $fs4, $a2 move $s6, $fp fmov.d $fs0, $fs2 fmov.d $fs1, $fs2 @@ -251,14 +250,19 @@ putlocalhom3: # @putlocalhom3 addi.d $a0, $a0, 1 st.w $a0, $s6, 48 movgr2fr.w $fa0, $a0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) ffint.d.w $fa0, $fa0 fdiv.d $fa0, $fs1, $fa0 + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 471859 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fa0, $fa0, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, 180224 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 fst.d $fa0, $s6, 40 .LBB0_26: ld.d $a0, $s2, 0 @@ -279,11 +283,16 @@ putlocalhom3: # @putlocalhom3 .LBB0_29: beqz $fp, .LBB0_32 # %bb.30: # %.lr.ph162 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_1) + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 471859 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fa0, $a0 fmul.d $fa0, $fs0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, 180224 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 movgr2fr.w $fa1, $s3 ffint.d.w $fa1, $fa1 @@ -323,14 +332,7 @@ putlocalhom3: # @putlocalhom3 .Lfunc_end0: .size putlocalhom3, .Lfunc_end0-putlocalhom3 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function putlocalhom_ext -.LCPI1_0: - .dword 0x4017333333333333 # double 5.7999999999999998 -.LCPI1_1: - .dword 0x4082c00000000000 # double 600 - .text - .globl putlocalhom_ext + .globl putlocalhom_ext # -- Begin function putlocalhom_ext .p2align 5 .type putlocalhom_ext,@function putlocalhom_ext: # @putlocalhom_ext @@ -353,32 +355,37 @@ putlocalhom_ext: # @putlocalhom_ext move $s0, $a4 move $s1, $a3 move $fp, $a2 - move $s7, $a1 + move $s8, $a1 + lu12i.w $s3, 209715 beqz $a5, .LBB1_16 # %bb.1: # %.lr.ph pcalau12i $a2, %got_pc_hi20(divpairscore) ld.d $a3, $a2, %got_pc_lo12(divpairscore) - move $s2, $zero + move $s6, $zero move $a2, $zero move $t1, $zero st.d $zero, $sp, 16 # 8-byte Folded Spill - move $s6, $zero - move $s3, $zero - move $s8, $zero + move $s7, $zero + move $s4, $zero + move $a4, $zero ld.w $a3, $a3, 0 st.d $a3, $sp, 24 # 8-byte Folded Spill - pcalau12i $a3, %pc_hi20(.LCPI1_0) - fld.d $fs0, $a3, %pc_lo12(.LCPI1_0) - pcalau12i $a3, %pc_hi20(.LCPI1_1) - fld.d $fs1, $a3, %pc_lo12(.LCPI1_1) ori $a6, $zero, 1 ori $a7, $zero, 45 - move $s4, $fp + ori $a3, $s3, 819 + lu32i.d $a3, 471859 + lu52i.d $a3, $a3, 1025 + movgr2fr.d $fs0, $a3 + ori $a3, $zero, 0 + lu32i.d $a3, 180224 + lu52i.d $a3, $a3, 1032 + movgr2fr.d $fs1, $a3 + move $s5, $fp b .LBB1_5 .LBB1_2: # in Loop: Header=BB1_5 Depth=1 - move $s6, $zero + move $s7, $zero move $a2, $zero - add.d $a0, $s0, $s2 + add.d $a0, $s0, $s6 ld.d $a1, $sp, 16 # 8-byte Folded Reload add.d $a1, $a0, $a1 st.d $a1, $sp, 16 # 8-byte Folded Spill @@ -386,83 +393,85 @@ putlocalhom_ext: # @putlocalhom_ext ori $a6, $zero, 1 ori $a7, $zero, 45 .LBB1_4: # in Loop: Header=BB1_5 Depth=1 - ld.bu $a1, $s5, 0 - addi.d $a0, $s5, 1 + ld.bu $a1, $s3, 0 + addi.d $a0, $s3, 1 addi.d $a1, $a1, -45 sltu $a1, $zero, $a1 - ld.bu $a3, $s7, 0 + ld.bu $a3, $s8, 0 add.d $s1, $s1, $a1 - addi.d $s7, $s7, 1 - ld.bu $a5, $s5, 1 + addi.d $s8, $s8, 1 + ld.bu $a5, $s3, 1 addi.d $a3, $a3, -45 sltu $a3, $zero, $a3 add.w $s0, $s0, $a3 - sub.d $s2, $zero, $s3 + sub.d $s6, $zero, $s4 beqz $a5, .LBB1_15 .LBB1_5: # =>This Inner Loop Header: Depth=1 - move $s5, $a0 + move $s3, $a0 andi $a0, $a5, 255 bne $a2, $a6, .LBB1_12 # %bb.6: # in Loop: Header=BB1_5 Depth=1 beq $a0, $a7, .LBB1_8 # %bb.7: # in Loop: Header=BB1_5 Depth=1 - ld.bu $a1, $s7, 0 + ld.bu $a1, $s8, 0 bne $a1, $a7, .LBB1_12 .LBB1_8: # in Loop: Header=BB1_5 Depth=1 blez $t1, .LBB1_10 # %bb.9: # in Loop: Header=BB1_5 Depth=1 ori $a0, $zero, 1 ori $a1, $zero, 80 - st.d $s4, $sp, 32 # 8-byte Folded Spill - move $s4, $s3 - move $s3, $t1 + move $s2, $a4 + st.d $s5, $sp, 32 # 8-byte Folded Spill + move $s5, $s4 + move $s4, $t1 pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 - move $t1, $s3 - move $s3, $s4 + move $t1, $s4 + move $s4, $s5 + move $a4, $s2 ld.d $a1, $sp, 32 # 8-byte Folded Reload st.d $a0, $a1, 8 st.d $zero, $a0, 8 - move $s4, $a0 + move $s5, $a0 .LBB1_10: # in Loop: Header=BB1_5 Depth=1 addi.d $a1, $s1, -1 addi.w $a0, $s0, -1 addi.w $t1, $t1, 1 - st.w $s8, $s4, 24 - st.w $s3, $s4, 32 - st.w $a1, $s4, 28 - st.w $a0, $s4, 36 + st.w $a4, $s5, 24 + st.w $s4, $s5, 32 + st.w $a1, $s5, 28 + st.w $a0, $s5, 36 ld.d $a1, $sp, 24 # 8-byte Folded Reload beqz $a1, .LBB1_2 # %bb.11: # in Loop: Header=BB1_5 Depth=1 move $a2, $zero - sub.d $a0, $a0, $s3 + sub.d $a0, $a0, $s4 addi.d $a0, $a0, 1 - st.w $a0, $s4, 48 - movgr2fr.w $fa0, $s6 + st.w $a0, $s5, 48 + movgr2fr.w $fa0, $s7 ffint.d.w $fa0, $fa0 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 fdiv.d $fa0, $fa0, $fa1 fmul.d $fa0, $fa0, $fs0 fdiv.d $fa0, $fa0, $fs1 - fst.d $fa0, $s4, 40 - move $s6, $zero + fst.d $fa0, $s5, 40 + move $s7, $zero b .LBB1_3 .p2align 4, , 16 .LBB1_12: # in Loop: Header=BB1_5 Depth=1 beq $a0, $a7, .LBB1_4 # %bb.13: # in Loop: Header=BB1_5 Depth=1 - ld.b $a0, $s7, 0 + ld.b $a0, $s8, 0 beq $a0, $a7, .LBB1_4 # %bb.14: # in Loop: Header=BB1_5 Depth=1 sltui $a1, $a2, 1 - masknez $a2, $s8, $a1 + masknez $a2, $a4, $a1 maskeqz $a3, $s1, $a1 - or $s8, $a3, $a2 - masknez $a2, $s3, $a1 + or $a4, $a3, $a2 + masknez $a2, $s4, $a1 maskeqz $a1, $s0, $a1 - or $s3, $a1, $a2 + or $s4, $a1, $a2 ext.w.b $a1, $a5 slli.d $a1, $a1, 2 pcalau12i $a2, %got_pc_hi20(amino_n) @@ -477,67 +486,74 @@ putlocalhom_ext: # @putlocalhom_ext add.d $a1, $a3, $a1 slli.d $a0, $a0, 2 ldx.w $a0, $a1, $a0 - add.w $s6, $a0, $s6 + add.w $s7, $a0, $s7 ori $a2, $zero, 1 b .LBB1_4 .LBB1_15: # %._crit_edge.loopexit slt $a2, $zero, $t1 - movgr2fr.w $fa0, $s6 + movgr2fr.w $fa0, $s7 ffint.d.w $fs0, $fa0 - ld.d $s5, $sp, 16 # 8-byte Folded Reload + lu12i.w $s3, 209715 + ld.d $s2, $sp, 16 # 8-byte Folded Reload ld.bu $a3, $a0, -1 ori $a0, $zero, 45 bne $a3, $a0, .LBB1_17 b .LBB1_22 .LBB1_16: - move $s8, $zero - move $s3, $zero - move $s5, $zero - move $a2, $zero + move $a4, $zero + move $s4, $zero move $s2, $zero + move $a2, $zero + move $s6, $zero movgr2fr.d $fs0, $zero - move $s4, $fp + move $s5, $fp ld.bu $a3, $a0, -1 ori $a0, $zero, 45 beq $a3, $a0, .LBB1_22 .LBB1_17: - ld.bu $a1, $s7, -1 + ld.bu $a1, $s8, -1 beq $a1, $a0, .LBB1_22 # %bb.18: beqz $a2, .LBB1_20 # %bb.19: ori $a0, $zero, 1 ori $a1, $zero, 80 + move $s7, $a4 pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 - st.d $a0, $s4, 8 + move $a4, $s7 + st.d $a0, $s5, 8 st.d $zero, $a0, 8 - move $s4, $a0 + move $s5, $a0 .LBB1_20: addi.d $a1, $s1, -1 addi.w $a0, $s0, -1 - st.w $s8, $s4, 24 - st.w $s3, $s4, 32 - st.w $a1, $s4, 28 - st.w $a0, $s4, 36 + st.w $a4, $s5, 24 + st.w $s4, $s5, 32 + st.w $a1, $s5, 28 + st.w $a0, $s5, 36 pcalau12i $a1, %got_pc_hi20(divpairscore) ld.d $a1, $a1, %got_pc_lo12(divpairscore) ld.w $a1, $a1, 0 beqz $a1, .LBB1_27 # %bb.21: - sub.d $a0, $a0, $s3 + sub.d $a0, $a0, $s4 addi.d $a0, $a0, 1 - st.w $a0, $s4, 48 + st.w $a0, $s5, 48 movgr2fr.w $fa0, $a0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_0) - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI1_1) ffint.d.w $fa0, $fa0 fdiv.d $fa0, $fs0, $fa0 + ori $a0, $s3, 819 + lu32i.d $a0, 471859 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fa0, $fa0, $fa2 - fst.d $fa0, $s4, 40 + ori $a0, $zero, 0 + lu32i.d $a0, 180224 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + fst.d $fa0, $s5, 40 .LBB1_22: pcalau12i $a0, %got_pc_hi20(divpairscore) ld.d $a0, $a0, %got_pc_lo12(divpairscore) @@ -546,14 +562,13 @@ putlocalhom_ext: # @putlocalhom_ext .LBB1_23: beqz $fp, .LBB1_26 # %bb.24: # %.lr.ph145.preheader - lu12i.w $a0, 209715 - ori $a0, $a0, 819 + ori $a0, $s3, 819 lu32i.d $a0, 471859 lu52i.d $a0, $a0, 1025 .p2align 4, , 16 .LBB1_25: # %.lr.ph145 # =>This Inner Loop Header: Depth=1 - st.w $s5, $fp, 48 + st.w $s2, $fp, 48 st.d $a0, $fp, 40 ld.d $fp, $fp, 8 bnez $fp, .LBB1_25 @@ -574,8 +589,8 @@ putlocalhom_ext: # @putlocalhom_ext addi.d $sp, $sp, 144 ret .LBB1_27: - add.d $a0, $s0, $s2 - add.d $s5, $a0, $s5 + add.d $a0, $s0, $s6 + add.d $s2, $a0, $s2 pcalau12i $a0, %got_pc_hi20(divpairscore) ld.d $a0, $a0, %got_pc_lo12(divpairscore) ld.w $a0, $a0, 0 @@ -584,14 +599,7 @@ putlocalhom_ext: # @putlocalhom_ext .Lfunc_end1: .size putlocalhom_ext, .Lfunc_end1-putlocalhom_ext # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function putlocalhom2 -.LCPI2_0: - .dword 0x4017333333333333 # double 5.7999999999999998 -.LCPI2_1: - .dword 0x4082c00000000000 # double 600 - .text - .globl putlocalhom2 + .globl putlocalhom2 # -- Begin function putlocalhom2 .p2align 5 .type putlocalhom2,@function putlocalhom2: # @putlocalhom2 @@ -608,131 +616,133 @@ putlocalhom2: # @putlocalhom2 st.d $s6, $sp, 72 # 8-byte Folded Spill st.d $s7, $sp, 64 # 8-byte Folded Spill st.d $s8, $sp, 56 # 8-byte Folded Spill + fst.d $fs0, $sp, 48 # 8-byte Folded Spill + fst.d $fs1, $sp, 40 # 8-byte Folded Spill ld.bu $a5, $a0, 0 move $s0, $a4 move $s1, $a3 move $fp, $a2 - pcalau12i $a2, %pc_hi20(.LCPI2_0) - st.d $a2, $sp, 16 # 8-byte Folded Spill - pcalau12i $a2, %pc_hi20(.LCPI2_1) - st.d $a2, $sp, 8 # 8-byte Folded Spill + lu12i.w $s4, 209715 beqz $a5, .LBB2_16 # %bb.1: # %.lr.ph pcalau12i $a2, %got_pc_hi20(divpairscore) ld.d $a3, $a2, %got_pc_lo12(divpairscore) - move $s4, $zero + move $s3, $zero move $a2, $zero + move $a7, $zero + st.d $zero, $sp, 16 # 8-byte Folded Spill + st.d $zero, $sp, 8 # 8-byte Folded Spill move $a6, $zero - st.d $zero, $sp, 32 # 8-byte Folded Spill - st.d $zero, $sp, 24 # 8-byte Folded Spill - move $s8, $zero + move $s5, $zero move $s6, $zero - move $a4, $zero ld.w $a3, $a3, 0 - st.d $a3, $sp, 40 # 8-byte Folded Spill - ori $a7, $zero, 1 - ori $t0, $zero, 45 + st.d $a3, $sp, 24 # 8-byte Folded Spill + ori $t0, $zero, 1 + ori $t1, $zero, 45 + ori $a3, $s4, 819 + lu32i.d $a3, 471859 + lu52i.d $a3, $a3, 1025 + movgr2fr.d $fs0, $a3 + ori $a3, $zero, 0 + lu32i.d $a3, 180224 + lu52i.d $a3, $a3, 1032 + movgr2fr.d $fs1, $a3 move $s7, $fp b .LBB2_5 .LBB2_2: # in Loop: Header=BB2_5 Depth=1 move $a2, $zero - ld.d $a0, $sp, 24 # 8-byte Folded Reload - add.w $a0, $a0, $s8 - st.d $a0, $sp, 24 # 8-byte Folded Spill - add.d $a0, $s0, $s4 - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 8 # 8-byte Folded Reload + add.w $a0, $a0, $a6 + st.d $a0, $sp, 8 # 8-byte Folded Spill + add.d $a0, $s0, $s3 + ld.d $a1, $sp, 16 # 8-byte Folded Reload add.w $a1, $a0, $a1 - st.d $a1, $sp, 32 # 8-byte Folded Spill + st.d $a1, $sp, 16 # 8-byte Folded Spill .LBB2_3: # in Loop: Header=BB2_5 Depth=1 - move $s8, $zero - ori $a7, $zero, 1 - ori $t0, $zero, 45 + move $a6, $zero + ori $t0, $zero, 1 + ori $t1, $zero, 45 .LBB2_4: # in Loop: Header=BB2_5 Depth=1 - ld.bu $a1, $s2, 0 - addi.d $a0, $s2, 1 + ld.bu $a1, $s8, 0 + addi.d $a0, $s8, 1 addi.d $a1, $a1, -45 sltu $a1, $zero, $a1 - ld.bu $a3, $s3, 0 + ld.bu $a3, $s4, 0 add.d $s1, $s1, $a1 - addi.d $a1, $s3, 1 - ld.bu $a5, $s2, 1 + addi.d $a1, $s4, 1 + ld.bu $a5, $s8, 1 addi.d $a3, $a3, -45 sltu $a3, $zero, $a3 add.w $s0, $s0, $a3 - sub.d $s4, $zero, $s6 + sub.d $s3, $zero, $s5 beqz $a5, .LBB2_15 .LBB2_5: # =>This Inner Loop Header: Depth=1 - move $s2, $a0 - move $s3, $a1 + move $s8, $a0 + move $s4, $a1 andi $a0, $a5, 255 - bne $a2, $a7, .LBB2_12 + bne $a2, $t0, .LBB2_12 # %bb.6: # in Loop: Header=BB2_5 Depth=1 - beq $a0, $t0, .LBB2_8 + beq $a0, $t1, .LBB2_8 # %bb.7: # in Loop: Header=BB2_5 Depth=1 - ld.bu $a1, $s3, 0 - bne $a1, $t0, .LBB2_12 + ld.bu $a1, $s4, 0 + bne $a1, $t1, .LBB2_12 .LBB2_8: # in Loop: Header=BB2_5 Depth=1 - blez $a6, .LBB2_10 + blez $a7, .LBB2_10 # %bb.9: # in Loop: Header=BB2_5 Depth=1 ori $a0, $zero, 1 ori $a1, $zero, 80 - move $s5, $s8 - move $s8, $a4 - st.d $s7, $sp, 48 # 8-byte Folded Spill - move $s7, $s6 - move $s6, $a6 + move $s2, $a6 + st.d $s3, $sp, 32 # 8-byte Folded Spill + move $s3, $s7 + move $s7, $s5 + move $s5, $a7 pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 - move $a6, $s6 - move $s6, $s7 - move $a4, $s8 - move $s8, $s5 - ld.d $a1, $sp, 48 # 8-byte Folded Reload - st.d $a0, $a1, 8 + move $a7, $s5 + move $s5, $s7 + move $a6, $s2 + st.d $a0, $s3, 8 + ld.d $s3, $sp, 32 # 8-byte Folded Reload st.d $zero, $a0, 8 move $s7, $a0 .LBB2_10: # in Loop: Header=BB2_5 Depth=1 addi.d $a1, $s1, -1 addi.w $a0, $s0, -1 - addi.w $a6, $a6, 1 - st.w $a4, $s7, 24 - st.w $s6, $s7, 32 + addi.w $a7, $a7, 1 + st.w $s6, $s7, 24 + st.w $s5, $s7, 32 st.w $a1, $s7, 28 st.w $a0, $s7, 36 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a1, $sp, 24 # 8-byte Folded Reload beqz $a1, .LBB2_2 # %bb.11: # in Loop: Header=BB2_5 Depth=1 move $a2, $zero - movgr2fr.w $fa0, $s8 - sub.d $a0, $a0, $s6 + sub.d $a0, $a0, $s5 addi.d $a0, $a0, 1 + st.w $a0, $s7, 48 + movgr2fr.w $fa0, $a6 ffint.d.w $fa0, $fa0 movgr2fr.w $fa1, $a0 - ld.d $a1, $sp, 16 # 8-byte Folded Reload - fld.d $fa2, $a1, %pc_lo12(.LCPI2_0) ffint.d.w $fa1, $fa1 - ld.d $a1, $sp, 8 # 8-byte Folded Reload - fld.d $fa3, $a1, %pc_lo12(.LCPI2_1) fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa2 - st.w $a0, $s7, 48 - fdiv.d $fa0, $fa0, $fa3 + fmul.d $fa0, $fa0, $fs0 + fdiv.d $fa0, $fa0, $fs1 fst.d $fa0, $s7, 40 b .LBB2_3 .p2align 4, , 16 .LBB2_12: # in Loop: Header=BB2_5 Depth=1 - beq $a0, $t0, .LBB2_4 + beq $a0, $t1, .LBB2_4 # %bb.13: # in Loop: Header=BB2_5 Depth=1 - ld.b $a0, $s3, 0 - beq $a0, $t0, .LBB2_4 + ld.b $a0, $s4, 0 + beq $a0, $t1, .LBB2_4 # %bb.14: # in Loop: Header=BB2_5 Depth=1 sltui $a1, $a2, 1 - masknez $a2, $a4, $a1 - maskeqz $a3, $s1, $a1 - or $a4, $a3, $a2 masknez $a2, $s6, $a1 + maskeqz $a3, $s1, $a1 + or $s6, $a3, $a2 + masknez $a2, $s5, $a1 maskeqz $a1, $s0, $a1 - or $s6, $a1, $a2 + or $s5, $a1, $a2 ext.w.b $a1, $a5 slli.d $a1, $a1, 2 pcalau12i $a2, %got_pc_hi20(amino_n) @@ -747,25 +757,26 @@ putlocalhom2: # @putlocalhom2 add.d $a1, $a3, $a1 slli.d $a0, $a0, 2 ldx.w $a0, $a1, $a0 - add.w $s8, $a0, $s8 + add.w $a6, $a0, $a6 ori $a2, $zero, 1 b .LBB2_4 .LBB2_15: # %._crit_edge.loopexit - slt $a2, $zero, $a6 - ld.d $s2, $sp, 32 # 8-byte Folded Reload - ld.d $s3, $sp, 24 # 8-byte Folded Reload + slt $a2, $zero, $a7 + lu12i.w $s4, 209715 + ld.d $s2, $sp, 16 # 8-byte Folded Reload + ld.d $s8, $sp, 8 # 8-byte Folded Reload ld.bu $a3, $a0, -1 ori $a0, $zero, 45 bne $a3, $a0, .LBB2_17 b .LBB2_22 .LBB2_16: - move $a4, $zero move $s6, $zero + move $s5, $zero + move $a6, $zero move $s8, $zero - move $s3, $zero move $s2, $zero move $a2, $zero - move $s4, $zero + move $s3, $zero move $s7, $fp ld.bu $a3, $a0, -1 ori $a0, $zero, 45 @@ -778,18 +789,20 @@ putlocalhom2: # @putlocalhom2 # %bb.19: ori $a0, $zero, 1 ori $a1, $zero, 80 - move $s5, $a4 + st.d $s6, $sp, 32 # 8-byte Folded Spill + move $s6, $a6 pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 - move $a4, $s5 + move $a6, $s6 + ld.d $s6, $sp, 32 # 8-byte Folded Reload st.d $a0, $s7, 8 st.d $zero, $a0, 8 move $s7, $a0 .LBB2_20: addi.d $a1, $s1, -1 addi.w $a0, $s0, -1 - st.w $a4, $s7, 24 - st.w $s6, $s7, 32 + st.w $s6, $s7, 24 + st.w $s5, $s7, 32 st.w $a1, $s7, 28 st.w $a0, $s7, 36 pcalau12i $a1, %got_pc_hi20(divpairscore) @@ -797,20 +810,24 @@ putlocalhom2: # @putlocalhom2 ld.w $a1, $a1, 0 beqz $a1, .LBB2_27 # %bb.21: - sub.d $a0, $a0, $s6 + sub.d $a0, $a0, $s5 addi.d $a0, $a0, 1 st.w $a0, $s7, 48 - movgr2fr.w $fa0, $s8 + movgr2fr.w $fa0, $a6 ffint.d.w $fa0, $fa0 movgr2fr.w $fa1, $a0 - ld.d $a0, $sp, 16 # 8-byte Folded Reload - fld.d $fa2, $a0, %pc_lo12(.LCPI2_0) - ld.d $a0, $sp, 8 # 8-byte Folded Reload - fld.d $fa3, $a0, %pc_lo12(.LCPI2_1) ffint.d.w $fa1, $fa1 fdiv.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa2 - fdiv.d $fa0, $fa0, $fa3 + ori $a0, $s4, 819 + lu32i.d $a0, 471859 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, 180224 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 fst.d $fa0, $s7, 40 .LBB2_22: pcalau12i $a0, %got_pc_hi20(divpairscore) @@ -820,11 +837,13 @@ putlocalhom2: # @putlocalhom2 .LBB2_23: beqz $fp, .LBB2_26 # %bb.24: # %.lr.ph149 - ld.d $a0, $sp, 16 # 8-byte Folded Reload - fld.d $fa0, $a0, %pc_lo12(.LCPI2_0) - movgr2fr.w $fa1, $s3 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $s8 + ffint.d.w $fa0, $fa0 + ori $a0, $s4, 819 + lu32i.d $a0, 471859 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 ori $a0, $zero, 600 mul.d $a0, $s2, $a0 movgr2fr.w $fa1, $a0 @@ -837,6 +856,8 @@ putlocalhom2: # @putlocalhom2 ld.d $fp, $fp, 8 bnez $fp, .LBB2_25 .LBB2_26: # %.loopexit + fld.d $fs1, $sp, 40 # 8-byte Folded Reload + fld.d $fs0, $sp, 48 # 8-byte Folded Reload ld.d $s8, $sp, 56 # 8-byte Folded Reload ld.d $s7, $sp, 64 # 8-byte Folded Reload ld.d $s6, $sp, 72 # 8-byte Folded Reload @@ -851,8 +872,8 @@ putlocalhom2: # @putlocalhom2 addi.d $sp, $sp, 144 ret .LBB2_27: - add.w $s3, $s3, $s8 - add.d $a0, $s0, $s4 + add.w $s8, $s8, $a6 + add.d $a0, $s0, $s3 add.w $s2, $a0, $s2 pcalau12i $a0, %got_pc_hi20(divpairscore) ld.d $a0, $a0, %got_pc_lo12(divpairscore) @@ -862,130 +883,126 @@ putlocalhom2: # @putlocalhom2 .Lfunc_end2: .size putlocalhom2, .Lfunc_end2-putlocalhom2 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function putlocalhom -.LCPI3_0: - .dword 0x4017333333333333 # double 5.7999999999999998 -.LCPI3_1: - .dword 0x4082c00000000000 # double 600 - .text - .globl putlocalhom + .globl putlocalhom # -- Begin function putlocalhom .p2align 5 .type putlocalhom,@function putlocalhom: # @putlocalhom # %bb.0: - addi.d $sp, $sp, -176 - st.d $ra, $sp, 168 # 8-byte Folded Spill - st.d $fp, $sp, 160 # 8-byte Folded Spill - st.d $s0, $sp, 152 # 8-byte Folded Spill - st.d $s1, $sp, 144 # 8-byte Folded Spill - st.d $s2, $sp, 136 # 8-byte Folded Spill - st.d $s3, $sp, 128 # 8-byte Folded Spill - st.d $s4, $sp, 120 # 8-byte Folded Spill - st.d $s5, $sp, 112 # 8-byte Folded Spill - st.d $s6, $sp, 104 # 8-byte Folded Spill - st.d $s7, $sp, 96 # 8-byte Folded Spill - st.d $s8, $sp, 88 # 8-byte Folded Spill - fst.d $fs0, $sp, 80 # 8-byte Folded Spill - fst.d $fs1, $sp, 72 # 8-byte Folded Spill - fst.d $fs2, $sp, 64 # 8-byte Folded Spill - fst.d $fs3, $sp, 56 # 8-byte Folded Spill - fst.d $fs4, $sp, 48 # 8-byte Folded Spill + addi.d $sp, $sp, -160 + st.d $ra, $sp, 152 # 8-byte Folded Spill + st.d $fp, $sp, 144 # 8-byte Folded Spill + st.d $s0, $sp, 136 # 8-byte Folded Spill + st.d $s1, $sp, 128 # 8-byte Folded Spill + st.d $s2, $sp, 120 # 8-byte Folded Spill + st.d $s3, $sp, 112 # 8-byte Folded Spill + st.d $s4, $sp, 104 # 8-byte Folded Spill + st.d $s5, $sp, 96 # 8-byte Folded Spill + st.d $s6, $sp, 88 # 8-byte Folded Spill + st.d $s7, $sp, 80 # 8-byte Folded Spill + st.d $s8, $sp, 72 # 8-byte Folded Spill + fst.d $fs0, $sp, 64 # 8-byte Folded Spill + fst.d $fs1, $sp, 56 # 8-byte Folded Spill + fst.d $fs2, $sp, 48 # 8-byte Folded Spill + fst.d $fs3, $sp, 40 # 8-byte Folded Spill + fst.d $fs4, $sp, 32 # 8-byte Folded Spill ld.bu $a5, $a0, 0 move $s0, $a4 move $s1, $a3 move $fp, $a2 - pcalau12i $a4, %pc_hi20(.LCPI3_0) - pcalau12i $a3, %pc_hi20(.LCPI3_1) - beqz $a5, .LBB3_18 + lu12i.w $a3, 209715 + beqz $a5, .LBB3_17 # %bb.1: # %.lr.ph move $s2, $a1 pcalau12i $a1, %got_pc_hi20(divpairscore) ld.d $a2, $a1, %got_pc_lo12(divpairscore) - move $s7, $zero + move $s6, $zero move $a1, $zero + move $s8, $zero + st.d $zero, $sp, 16 # 8-byte Folded Spill + move $s5, $zero move $s3, $zero - st.d $zero, $sp, 32 # 8-byte Folded Spill - move $s6, $zero - move $s4, $zero ld.w $a2, $a2, 0 - st.d $a2, $sp, 40 # 8-byte Folded Spill - addi.d $s5, $a0, 1 + st.d $a2, $sp, 24 # 8-byte Folded Spill + addi.d $s4, $a0, 1 movgr2fr.d $fs2, $zero - st.d $a4, $sp, 16 # 8-byte Folded Spill - fld.d $fs3, $a4, %pc_lo12(.LCPI3_0) - st.d $a3, $sp, 24 # 8-byte Folded Spill - fld.d $fs4, $a3, %pc_lo12(.LCPI3_1) - ori $a4, $zero, 1 + ori $a6, $zero, 1 ori $a7, $zero, 45 - move $s8, $fp + ori $a0, $a3, 819 + lu32i.d $a0, 471859 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fs3, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, 180224 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs4, $a0 + move $s7, $fp fmov.d $fs0, $fs2 fmov.d $fs1, $fs2 b .LBB3_5 .LBB3_2: # in Loop: Header=BB3_5 Depth=1 move $a1, $zero fadd.d $fs0, $fs1, $fs0 - add.d $a0, $s0, $s7 - ld.d $a2, $sp, 32 # 8-byte Folded Reload + add.d $a0, $s0, $s6 + ld.d $a2, $sp, 16 # 8-byte Folded Reload add.d $a2, $a0, $a2 - st.d $a2, $sp, 32 # 8-byte Folded Spill + st.d $a2, $sp, 16 # 8-byte Folded Spill .LBB3_3: # in Loop: Header=BB3_5 Depth=1 fmov.d $fs1, $fs2 - ori $a4, $zero, 1 + ori $a6, $zero, 1 ori $a7, $zero, 45 .LBB3_4: # in Loop: Header=BB3_5 Depth=1 - ld.bu $a0, $s5, -1 + ld.bu $a0, $s4, -1 addi.d $a0, $a0, -45 ld.bu $a2, $s2, 0 sltu $a0, $zero, $a0 add.d $s1, $s1, $a0 addi.d $s2, $s2, 1 addi.d $a0, $a2, -45 - ld.bu $a5, $s5, 0 + ld.bu $a5, $s4, 0 sltu $a0, $zero, $a0 add.w $s0, $s0, $a0 - sub.d $s7, $zero, $s6 - addi.d $s5, $s5, 1 + sub.d $s6, $zero, $s5 + addi.d $s4, $s4, 1 beqz $a5, .LBB3_15 .LBB3_5: # =>This Inner Loop Header: Depth=1 andi $a0, $a5, 255 - bne $a1, $a4, .LBB3_12 + bne $a1, $a6, .LBB3_12 # %bb.6: # in Loop: Header=BB3_5 Depth=1 beq $a0, $a7, .LBB3_8 # %bb.7: # in Loop: Header=BB3_5 Depth=1 ld.bu $a2, $s2, 0 bne $a2, $a7, .LBB3_12 .LBB3_8: # in Loop: Header=BB3_5 Depth=1 - blez $s3, .LBB3_10 + blez $s8, .LBB3_10 # %bb.9: # in Loop: Header=BB3_5 Depth=1 ori $a0, $zero, 1 ori $a1, $zero, 80 pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 - st.d $a0, $s8, 8 + st.d $a0, $s7, 8 st.d $zero, $a0, 8 - move $s8, $a0 + move $s7, $a0 .LBB3_10: # in Loop: Header=BB3_5 Depth=1 addi.d $a1, $s1, -1 addi.w $a0, $s0, -1 - addi.w $s3, $s3, 1 - st.w $s4, $s8, 24 - st.w $s6, $s8, 32 - st.w $a1, $s8, 28 - st.w $a0, $s8, 36 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + addi.w $s8, $s8, 1 + st.w $s3, $s7, 24 + st.w $s5, $s7, 32 + st.w $a1, $s7, 28 + st.w $a0, $s7, 36 + ld.d $a1, $sp, 24 # 8-byte Folded Reload beqz $a1, .LBB3_2 # %bb.11: # in Loop: Header=BB3_5 Depth=1 move $a1, $zero - sub.d $a0, $a0, $s6 + sub.d $a0, $a0, $s5 addi.d $a0, $a0, 1 - st.w $a0, $s8, 48 + st.w $a0, $s7, 48 movgr2fr.w $fa0, $a0 ffint.d.w $fa0, $fa0 fdiv.d $fa0, $fs1, $fa0 fmul.d $fa0, $fa0, $fs3 fdiv.d $fa0, $fa0, $fs4 - fst.d $fa0, $s8, 40 + fst.d $fa0, $s7, 40 b .LBB3_3 .p2align 4, , 16 .LBB3_12: # in Loop: Header=BB3_5 Depth=1 @@ -995,12 +1012,12 @@ putlocalhom: # @putlocalhom beq $a0, $a7, .LBB3_4 # %bb.14: # in Loop: Header=BB3_5 Depth=1 sltui $a1, $a1, 1 - masknez $a2, $s4, $a1 + masknez $a2, $s3, $a1 maskeqz $a3, $s1, $a1 - or $s4, $a3, $a2 - masknez $a2, $s6, $a1 + or $s3, $a3, $a2 + masknez $a2, $s5, $a1 maskeqz $a1, $s0, $a1 - or $s6, $a1, $a2 + or $s5, $a1, $a2 ext.w.b $a1, $a5 slli.d $a1, $a1, 2 pcalau12i $a2, %got_pc_hi20(amino_n) @@ -1021,89 +1038,100 @@ putlocalhom: # @putlocalhom ori $a1, $zero, 1 b .LBB3_4 .LBB3_15: # %._crit_edge - blez $s3, .LBB3_17 + blez $s8, .LBB3_18 # %bb.16: ori $a0, $zero, 1 ori $a1, $zero, 80 pcaddu18i $ra, %call36(calloc) jirl $ra, $ra, 0 - st.d $a0, $s8, 8 + st.d $a0, $s7, 8 st.d $zero, $a0, 8 - move $s8, $a0 -.LBB3_17: # %._crit_edge.thread - ld.d $a3, $sp, 24 # 8-byte Folded Reload - ld.d $a4, $sp, 16 # 8-byte Folded Reload - b .LBB3_19 -.LBB3_18: + move $s7, $a0 + b .LBB3_18 +.LBB3_17: movgr2fr.d $fs0, $zero - move $s7, $zero - st.d $zero, $sp, 32 # 8-byte Folded Spill move $s6, $zero - move $s4, $zero + st.d $zero, $sp, 16 # 8-byte Folded Spill + move $s5, $zero + move $s3, $zero fmov.d $fs1, $fs0 - move $s8, $fp -.LBB3_19: # %._crit_edge.thread + move $s7, $fp +.LBB3_18: # %._crit_edge.thread addi.d $a1, $s1, -1 addi.w $a0, $s0, -1 - st.w $s4, $s8, 24 - st.w $s6, $s8, 32 - st.w $a1, $s8, 28 - st.w $a0, $s8, 36 + st.w $s3, $s7, 24 + st.w $s5, $s7, 32 + st.w $a1, $s7, 28 + st.w $a0, $s7, 36 pcalau12i $a1, %got_pc_hi20(divpairscore) ld.d $a1, $a1, %got_pc_lo12(divpairscore) ld.w $a1, $a1, 0 - beqz $a1, .LBB3_21 -# %bb.20: # %.thread - sub.d $a0, $a0, $s6 + beqz $a1, .LBB3_20 +# %bb.19: # %.thread + sub.d $a0, $a0, $s5 addi.d $a0, $a0, 1 - st.w $a0, $s8, 48 + st.w $a0, $s7, 48 movgr2fr.w $fa0, $a0 - fld.d $fa1, $a4, %pc_lo12(.LCPI3_0) - fld.d $fa2, $a3, %pc_lo12(.LCPI3_1) ffint.d.w $fa0, $fa0 fdiv.d $fa0, $fs1, $fa0 + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 471859 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fa0, $fa0, $fa2 - fst.d $fa0, $s8, 40 - b .LBB3_24 -.LBB3_21: - beqz $fp, .LBB3_24 -# %bb.22: # %.lr.ph146 - add.d $a0, $s0, $s7 - fld.d $fa0, $a4, %pc_lo12(.LCPI3_0) - fld.d $fa1, $a3, %pc_lo12(.LCPI3_1) - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ori $a0, $zero, 0 + lu32i.d $a0, 180224 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + fst.d $fa0, $s7, 40 + b .LBB3_23 +.LBB3_20: + beqz $fp, .LBB3_23 +# %bb.21: # %.lr.ph146 + add.d $a0, $s0, $s6 + ld.d $a1, $sp, 16 # 8-byte Folded Reload add.w $a0, $a0, $a1 - fadd.d $fa2, $fs1, $fs0 - fmul.d $fa0, $fa2, $fa0 + fadd.d $fa0, $fs1, $fs0 + lu12i.w $a1, 209715 + ori $a1, $a1, 819 + lu32i.d $a1, 471859 + lu52i.d $a1, $a1, 1025 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 + ori $a1, $zero, 0 + lu32i.d $a1, 180224 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa1, $a1 fdiv.d $fa0, $fa0, $fa1 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 fdiv.d $fa0, $fa0, $fa1 .p2align 4, , 16 -.LBB3_23: # =>This Inner Loop Header: Depth=1 +.LBB3_22: # =>This Inner Loop Header: Depth=1 st.w $a0, $fp, 48 fst.d $fa0, $fp, 40 ld.d $fp, $fp, 8 - bnez $fp, .LBB3_23 -.LBB3_24: # %.loopexit - fld.d $fs4, $sp, 48 # 8-byte Folded Reload - fld.d $fs3, $sp, 56 # 8-byte Folded Reload - fld.d $fs2, $sp, 64 # 8-byte Folded Reload - fld.d $fs1, $sp, 72 # 8-byte Folded Reload - fld.d $fs0, $sp, 80 # 8-byte Folded Reload - ld.d $s8, $sp, 88 # 8-byte Folded Reload - ld.d $s7, $sp, 96 # 8-byte Folded Reload - ld.d $s6, $sp, 104 # 8-byte Folded Reload - ld.d $s5, $sp, 112 # 8-byte Folded Reload - ld.d $s4, $sp, 120 # 8-byte Folded Reload - ld.d $s3, $sp, 128 # 8-byte Folded Reload - ld.d $s2, $sp, 136 # 8-byte Folded Reload - ld.d $s1, $sp, 144 # 8-byte Folded Reload - ld.d $s0, $sp, 152 # 8-byte Folded Reload - ld.d $fp, $sp, 160 # 8-byte Folded Reload - ld.d $ra, $sp, 168 # 8-byte Folded Reload - addi.d $sp, $sp, 176 + bnez $fp, .LBB3_22 +.LBB3_23: # %.loopexit + fld.d $fs4, $sp, 32 # 8-byte Folded Reload + fld.d $fs3, $sp, 40 # 8-byte Folded Reload + fld.d $fs2, $sp, 48 # 8-byte Folded Reload + fld.d $fs1, $sp, 56 # 8-byte Folded Reload + fld.d $fs0, $sp, 64 # 8-byte Folded Reload + ld.d $s8, $sp, 72 # 8-byte Folded Reload + ld.d $s7, $sp, 80 # 8-byte Folded Reload + ld.d $s6, $sp, 88 # 8-byte Folded Reload + ld.d $s5, $sp, 96 # 8-byte Folded Reload + ld.d $s4, $sp, 104 # 8-byte Folded Reload + ld.d $s3, $sp, 112 # 8-byte Folded Reload + ld.d $s2, $sp, 120 # 8-byte Folded Reload + ld.d $s1, $sp, 128 # 8-byte Folded Reload + ld.d $s0, $sp, 136 # 8-byte Folded Reload + ld.d $fp, $sp, 144 # 8-byte Folded Reload + ld.d $ra, $sp, 152 # 8-byte Folded Reload + addi.d $sp, $sp, 160 ret .Lfunc_end3: .size putlocalhom, .Lfunc_end3-putlocalhom @@ -3644,12 +3672,7 @@ countalpha: # @countalpha .Lfunc_end27: .size countalpha, .Lfunc_end27-countalpha # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function getnumlen_nogap -.LCPI28_0: - .dword 0x7ff8000000000000 # double NaN - .text - .globl getnumlen_nogap + .globl getnumlen_nogap # -- Begin function getnumlen_nogap .p2align 5 .type getnumlen_nogap,@function getnumlen_nogap: # @getnumlen_nogap @@ -3921,8 +3944,10 @@ getnumlen_nogap: # @getnumlen_nogap beq $a3, $a4, .LBB28_28 b .LBB28_29 .LBB28_37: - pcalau12i $a0, %pc_hi20(.LCPI28_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI28_0) + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 2047 + movgr2fr.d $fs0, $a0 b .LBB28_39 .LBB28_38: # %._crit_edge.loopexit movgr2fr.w $fa0, $s6 @@ -5365,12 +5390,7 @@ readhat2_float: # @readhat2_float .Lfunc_end37: .size readhat2_float, .Lfunc_end37-readhat2_float # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function readhat2_int -.LCPI38_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl readhat2_int + .globl readhat2_int # -- Begin function readhat2_int .p2align 5 .type readhat2_int,@function readhat2_int: # @readhat2_int @@ -5484,28 +5504,30 @@ readhat2_int: # @readhat2_int addi.d $sp, $sp, 384 ret .LBB38_13: # %.lr.ph29 - move $s4, $zero + move $s6, $zero addi.w $s3, $s1, -1 bstrpick.d $s1, $s1, 31, 0 - pcalau12i $a0, %pc_hi20(.LCPI38_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI38_0) ori $fp, $zero, 4 - ori $s6, $zero, 10 + ori $s5, $zero, 10 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fs0, $a0 b .LBB38_15 .p2align 4, , 16 .LBB38_14: # %.loopexit # in Loop: Header=BB38_15 Depth=1 addi.d $s2, $s2, 1 addi.d $fp, $fp, 4 - beq $s4, $s3, .LBB38_12 + beq $s6, $s3, .LBB38_12 .LBB38_15: # %.lr.ph27 # =>This Loop Header: Depth=1 # Child Loop BB38_17 Depth 2 ld.d $a0, $sp, 8 # 8-byte Folded Reload - alsl.d $s7, $s4, $a0, 3 - addi.d $s4, $s4, 1 + alsl.d $s7, $s6, $a0, 3 + addi.d $s6, $s6, 1 move $s8, $s1 - move $s5, $fp + move $s4, $fp b .LBB38_17 .p2align 4, , 16 .LBB38_16: # %.lr.ph.i22 @@ -5546,16 +5568,16 @@ readhat2_int: # @readhat2_int fmadd.d $fa0, $fa0, $fs0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 - stx.w $a1, $a0, $s5 + stx.w $a1, $a0, $s4 addi.d $s8, $s8, -1 - addi.d $s5, $s5, 4 + addi.d $s4, $s4, 4 beq $s2, $s8, .LBB38_14 .LBB38_17: # Parent Loop BB38_15 Depth=1 # => This Inner Loop Header: Depth=2 move $a0, $s0 pcaddu18i $ra, %call36(getc) jirl $ra, $ra, 0 - beq $a0, $s6, .LBB38_16 + beq $a0, $s5, .LBB38_16 # %bb.18: # in Loop: Header=BB38_17 Depth=2 move $a1, $s0 pcaddu18i $ra, %call36(ungetc) @@ -6164,12 +6186,7 @@ WriteFloatHat2: # @WriteFloatHat2 .Lfunc_end41: .size WriteFloatHat2, .Lfunc_end41-WriteFloatHat2 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function WriteHat2_int -.LCPI42_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl WriteHat2_int + .globl WriteHat2_int # -- Begin function WriteHat2_int .p2align 5 .type WriteHat2_int,@function WriteHat2_int: # @WriteHat2_int @@ -6187,7 +6204,6 @@ WriteHat2_int: # @WriteHat2_int st.d $s7, $sp, 64 # 8-byte Folded Spill st.d $s8, $sp, 56 # 8-byte Folded Spill fst.d $fs0, $sp, 48 # 8-byte Folded Spill - fst.d $fs1, $sp, 40 # 8-byte Folded Spill move $s0, $a3 move $s2, $a2 move $s7, $a1 @@ -6229,9 +6245,11 @@ WriteHat2_int: # @WriteHat2_int .LBB42_5: movgr2fr.d $fa0, $zero .LBB42_6: # %._crit_edge - pcalau12i $a0, %pc_hi20(.LCPI42_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI42_0) - fdiv.d $fs1, $fa0, $fs0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fdiv.d $fs0, $fa0, $fa1 pcalau12i $a0, %pc_hi20(.L.str.22) addi.d $s4, $a0, %pc_lo12(.L.str.22) ori $a2, $zero, 1 @@ -6246,7 +6264,7 @@ WriteHat2_int: # @WriteHat2_int pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 vldi $vr0, -1020 - fmul.d $fa0, $fs1, $fa0 + fmul.d $fa0, $fs0, $fa0 movfr2gr.d $a2, $fa0 pcalau12i $a0, %pc_hi20(.L.str.26) addi.d $a1, $a0, %pc_lo12(.L.str.26) @@ -6278,31 +6296,35 @@ WriteHat2_int: # @WriteHat2_int bstrpick.d $s3, $s5, 31, 0 ori $s4, $zero, 1 ori $fp, $zero, 4 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fs0, $a0 pcalau12i $a0, %pc_hi20(.L.str.28) addi.d $s2, $a0, %pc_lo12(.L.str.28) move $a1, $zero lu12i.w $a0, 174762 ori $s6, $a0, 2731 - st.d $s0, $sp, 24 # 8-byte Folded Spill - st.d $s7, $sp, 16 # 8-byte Folded Spill - st.d $s3, $sp, 8 # 8-byte Folded Spill + st.d $s0, $sp, 32 # 8-byte Folded Spill + st.d $s7, $sp, 24 # 8-byte Folded Spill + st.d $s3, $sp, 16 # 8-byte Folded Spill b .LBB42_12 .p2align 4, , 16 .LBB42_11: # %.loopexit # in Loop: Header=BB42_12 Depth=1 addi.d $s4, $s4, 1 addi.d $fp, $fp, 4 - ld.d $s0, $sp, 24 # 8-byte Folded Reload - ld.d $s7, $sp, 16 # 8-byte Folded Reload - ld.d $s3, $sp, 8 # 8-byte Folded Reload - ld.d $a1, $sp, 32 # 8-byte Folded Reload + ld.d $s0, $sp, 32 # 8-byte Folded Reload + ld.d $s7, $sp, 24 # 8-byte Folded Reload + ld.d $s3, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 40 # 8-byte Folded Reload beq $a1, $s3, .LBB42_17 .LBB42_12: # %.lr.ph63 # =>This Loop Header: Depth=1 # Child Loop BB42_15 Depth 2 alsl.d $s8, $a1, $s0, 3 addi.d $a0, $a1, 1 - st.d $a0, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill move $s0, $s7 move $s5, $fp ori $s7, $zero, 1 @@ -6344,7 +6366,6 @@ WriteHat2_int: # @WriteHat2_int bnez $a0, .LBB42_14 b .LBB42_13 .LBB42_17: # %._crit_edge66 - fld.d $fs1, $sp, 40 # 8-byte Folded Reload fld.d $fs0, $sp, 48 # 8-byte Folded Reload ld.d $s8, $sp, 56 # 8-byte Folded Reload ld.d $s7, $sp, 64 # 8-byte Folded Reload @@ -8045,14 +8066,8 @@ ReadBlastm7: # @ReadBlastm7 .Lfunc_end49: .size ReadBlastm7, .Lfunc_end49-ReadBlastm7 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function addlocalhom_r -.LCPI50_0: - .dword 0x4017333333333333 # double 5.7999999999999998 -.LCPI50_1: - .dword 0x4082c00000000000 # double 600 .section .text.unlikely.,"ax",@progbits - .p2align 5 + .p2align 5 # -- Begin function addlocalhom_r .type addlocalhom_r,@function addlocalhom_r: # @addlocalhom_r # %bb.0: @@ -8354,13 +8369,18 @@ addlocalhom_r: # @addlocalhom_r beqz $fp, .LBB50_27 # %bb.25: # %.lr.ph40 add.d $a0, $s5, $s0 - pcalau12i $a1, %pc_hi20(.LCPI50_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI50_0) - pcalau12i $a1, %pc_hi20(.LCPI50_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI50_1) add.w $a0, $a0, $s4 - fadd.d $fa2, $fs1, $fs0 - fmul.d $fa0, $fa2, $fa0 + fadd.d $fa0, $fs1, $fs0 + lu12i.w $a1, 209715 + ori $a1, $a1, 819 + lu32i.d $a1, 471859 + lu52i.d $a1, $a1, 1025 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 + ori $a1, $zero, 0 + lu32i.d $a1, 180224 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa1, $a1 fdiv.d $fa0, $fa0, $fa1 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 @@ -10944,14 +10964,7 @@ WriteForFasta: # @WriteForFasta .Lfunc_end66: .size WriteForFasta, .Lfunc_end66-WriteForFasta # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function readlocalhomtable2 -.LCPI67_0: - .dword 0x4017333333333333 # double 5.7999999999999998 -.LCPI67_1: - .dword 0x4082c00000000000 # double 600 - .text - .globl readlocalhomtable2 + .globl readlocalhomtable2 # -- Begin function readlocalhomtable2 .p2align 5 .type readlocalhomtable2,@function readlocalhomtable2: # @readlocalhomtable2 @@ -10986,13 +10999,18 @@ readlocalhomtable2: # @readlocalhomtable2 addi.d $s1, $a0, %pc_lo12(readlocalhomtable2.buff) pcalau12i $a0, %pc_hi20(.L.str.75) addi.d $s2, $a0, %pc_lo12(.L.str.75) - pcalau12i $a0, %pc_hi20(.LCPI67_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI67_0) - pcalau12i $a0, %pc_hi20(.LCPI67_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI67_1) addi.w $s6, $zero, -1 lu32i.d $s6, 0 - movgr2fr.d $fs2, $zero + movgr2fr.d $fs0, $zero + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 471859 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fs1, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, 180224 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs2, $a0 b .LBB67_4 .p2align 4, , 16 .LBB67_2: # in Loop: Header=BB67_4 Depth=1 @@ -11088,13 +11106,13 @@ readlocalhomtable2: # @readlocalhomtable2 st.w $a6, $a0, 28 ld.w $a1, $sp, 56 st.w $a7, $a0, 36 - fadd.d $fa0, $fa0, $fs2 + fadd.d $fa0, $fa0, $fs0 st.w $t0, $a0, 48 slli.d $a2, $a1, 3 ldx.d $a2, $fp, $a2 ld.w $a3, $sp, 60 - fdiv.d $fa0, $fa0, $fs0 - fmul.d $fs3, $fa0, $fs1 + fdiv.d $fa0, $fa0, $fs1 + fmul.d $fs3, $fa0, $fs2 fst.d $fs3, $a0, 40 blez $a5, .LBB67_2 # %bb.8: # in Loop: Header=BB67_4 Depth=1 @@ -11135,14 +11153,7 @@ readlocalhomtable2: # @readlocalhomtable2 .Lfunc_end67: .size readlocalhomtable2, .Lfunc_end67-readlocalhomtable2 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function readlocalhomtable -.LCPI68_0: - .dword 0x4017333333333333 # double 5.7999999999999998 -.LCPI68_1: - .dword 0x4082c00000000000 # double 600 - .text - .globl readlocalhomtable + .globl readlocalhomtable # -- Begin function readlocalhomtable .p2align 5 .type readlocalhomtable,@function readlocalhomtable: # @readlocalhomtable @@ -11258,13 +11269,18 @@ readlocalhomtable: # @readlocalhomtable addi.d $s2, $a0, %pc_lo12(readlocalhomtable.buff) pcalau12i $a0, %pc_hi20(.L.str.75) addi.d $s3, $a0, %pc_lo12(.L.str.75) - pcalau12i $a0, %pc_hi20(.LCPI68_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI68_0) - pcalau12i $a0, %pc_hi20(.LCPI68_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI68_1) move $s6, $zero move $s7, $zero - movgr2fr.d $fs2, $zero + movgr2fr.d $fs0, $zero + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 471859 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fs1, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, 180224 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs2, $a0 b .LBB68_16 .p2align 4, , 16 .LBB68_14: # in Loop: Header=BB68_16 Depth=1 @@ -11349,12 +11365,12 @@ readlocalhomtable: # @readlocalhomtable ld.w $a0, $sp, 44 slli.d $a2, $a2, 3 ldx.d $a2, $s1, $a2 - fadd.d $fa0, $fa0, $fs2 + fadd.d $fa0, $fa0, $fs0 st.w $a0, $s7, 48 slli.d $a0, $a1, 2 ldx.w $a1, $a2, $a0 - fdiv.d $fa0, $fa0, $fs0 - fmul.d $fs3, $fa0, $fs1 + fdiv.d $fa0, $fa0, $fs1 + fmul.d $fs3, $fa0, $fs2 fst.d $fs3, $s7, 40 addi.d $a3, $a1, 1 stx.w $a3, $a2, $a0 @@ -12995,12 +13011,7 @@ miyataout_reorder_pointer: # @miyataout_reorder_pointer .Lfunc_end77: .size miyataout_reorder_pointer, .Lfunc_end77-miyataout_reorder_pointer # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function readmccaskill -.LCPI78_0: - .dword 0x3f847ae147ae147b # double 0.01 - .text - .globl readmccaskill + .globl readmccaskill # -- Begin function readmccaskill .p2align 5 .type readmccaskill,@function readmccaskill: # @readmccaskill @@ -13063,8 +13074,11 @@ readmccaskill: # @readmccaskill ori $s5, $zero, 62 pcalau12i $a0, %pc_hi20(.L.str.95) addi.d $s3, $a0, %pc_lo12(.L.str.95) - pcalau12i $a0, %pc_hi20(.LCPI78_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI78_0) + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fs0, $a0 movgr2fr.w $fs1, $zero ori $s6, $zero, 24 addi.w $a0, $zero, -1 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/mltaln9.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/mltaln9.s index 6eba187c..2ceda737 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/mltaln9.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/mltaln9.s @@ -1221,12 +1221,7 @@ intergroup_score_new: # @intergroup_score_new .Lfunc_end9: .size intergroup_score_new, .Lfunc_end9-intergroup_score_new # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function score_calc3 -.LCPI10_0: - .dword 0x7ff8000000000000 # double NaN - .text - .globl score_calc3 + .globl score_calc3 # -- Begin function score_calc3 .p2align 5 .type score_calc3,@function score_calc3: # @score_calc3 @@ -1489,9 +1484,11 @@ score_calc3: # @score_calc3 addi.d $s1, $a0, %pc_lo12(score_calc3.mseq1) pcalau12i $a0, %pc_hi20(score_calc3.mseq2) addi.d $s2, $a0, %pc_lo12(score_calc3.mseq2) - pcalau12i $a0, %pc_hi20(.LCPI10_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI10_0) move $s3, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 2047 + movgr2fr.d $fs1, $a0 .p2align 4, , 16 .LBB10_31: # %.lr.ph95 # =>This Loop Header: Depth=1 @@ -2144,12 +2141,7 @@ score_calc4: # @score_calc4 .Lfunc_end12: .size score_calc4, .Lfunc_end12-score_calc4 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function upg2 -.LCPI13_0: - .word 0x461c3c00 # float 9999 - .text - .globl upg2 + .globl upg2 # -- Begin function upg2 .p2align 5 .type upg2,@function upg2: # @upg2 @@ -2238,8 +2230,9 @@ upg2: # @upg2 move $a0, $zero addi.w $a1, $s2, -1 addi.w $a2, $zero, -1 - pcalau12i $a3, %pc_hi20(.LCPI13_0) - fld.s $fa0, $a3, %pc_lo12(.LCPI13_0) + lu12i.w $a3, 287171 + ori $a3, $a3, 3072 + movgr2fr.w $fa0, $a3 addi.d $a3, $sp, 8 vldi $vr1, -928 ori $a4, $zero, 0 @@ -3958,35 +3951,28 @@ cluster_minimum_float: # @cluster_minimum_float .Lfunc_end18: .size cluster_minimum_float, .Lfunc_end18-cluster_minimum_float # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function fixed_musclesupg_float_realloc_nobk_halfmtx_treeout -.LCPI19_0: - .word 0x4479f99a # float 999.900024 -.LCPI19_1: - .word 0x43f9f99a # float 499.950012 - .text - .globl fixed_musclesupg_float_realloc_nobk_halfmtx_treeout + .globl fixed_musclesupg_float_realloc_nobk_halfmtx_treeout # -- Begin function fixed_musclesupg_float_realloc_nobk_halfmtx_treeout .p2align 5 .type fixed_musclesupg_float_realloc_nobk_halfmtx_treeout,@function fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_realloc_nobk_halfmtx_treeout # %bb.0: - addi.d $sp, $sp, -336 - st.d $ra, $sp, 328 # 8-byte Folded Spill - st.d $fp, $sp, 320 # 8-byte Folded Spill - st.d $s0, $sp, 312 # 8-byte Folded Spill - st.d $s1, $sp, 304 # 8-byte Folded Spill - st.d $s2, $sp, 296 # 8-byte Folded Spill - st.d $s3, $sp, 288 # 8-byte Folded Spill - st.d $s4, $sp, 280 # 8-byte Folded Spill - st.d $s5, $sp, 272 # 8-byte Folded Spill - st.d $s6, $sp, 264 # 8-byte Folded Spill - st.d $s7, $sp, 256 # 8-byte Folded Spill - st.d $s8, $sp, 248 # 8-byte Folded Spill - fst.d $fs0, $sp, 240 # 8-byte Folded Spill - fst.d $fs1, $sp, 232 # 8-byte Folded Spill + addi.d $sp, $sp, -320 + st.d $ra, $sp, 312 # 8-byte Folded Spill + st.d $fp, $sp, 304 # 8-byte Folded Spill + st.d $s0, $sp, 296 # 8-byte Folded Spill + st.d $s1, $sp, 288 # 8-byte Folded Spill + st.d $s2, $sp, 280 # 8-byte Folded Spill + st.d $s3, $sp, 272 # 8-byte Folded Spill + st.d $s4, $sp, 264 # 8-byte Folded Spill + st.d $s5, $sp, 256 # 8-byte Folded Spill + st.d $s6, $sp, 248 # 8-byte Folded Spill + st.d $s7, $sp, 240 # 8-byte Folded Spill + st.d $s8, $sp, 232 # 8-byte Folded Spill + fst.d $fs0, $sp, 224 # 8-byte Folded Spill + fst.d $fs1, $sp, 216 # 8-byte Folded Spill move $s5, $a4 - st.d $a3, $sp, 120 # 8-byte Folded Spill - st.d $a2, $sp, 160 # 8-byte Folded Spill + st.d $a3, $sp, 104 # 8-byte Folded Spill + move $a3, $a2 move $s2, $a1 st.d $a0, $sp, 152 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(sueff1) @@ -4020,48 +4006,49 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r pcalau12i $a0, %pc_hi20(cluster_minimum_float) addi.d $a0, $a0, %pc_lo12(cluster_minimum_float) .LBB19_6: - st.d $a0, $sp, 216 # 8-byte Folded Spill - pcalau12i $s8, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.hist) - ld.d $a0, $s8, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.hist) - pcalau12i $s4, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tmptmplen) - pcalau12i $s6, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) - pcalau12i $a1, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nmemar) - st.d $a1, $sp, 112 # 8-byte Folded Spill - pcalau12i $s0, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.treetmp) + st.d $a0, $sp, 200 # 8-byte Folded Spill + pcalau12i $s6, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.hist) + ld.d $a0, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.hist) + pcalau12i $s1, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tmptmplen) + pcalau12i $s0, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) + pcalau12i $s4, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nmemar) + pcalau12i $a1, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.treetmp) + st.d $a1, $sp, 88 # 8-byte Folded Spill pcalau12i $s7, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nametmp) pcalau12i $a1, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tree) - st.d $a1, $sp, 168 # 8-byte Folded Spill - st.d $s7, $sp, 96 # 8-byte Folded Spill - st.d $s0, $sp, 88 # 8-byte Folded Spill + st.d $a1, $sp, 160 # 8-byte Folded Spill + st.d $s0, $sp, 120 # 8-byte Folded Spill + st.d $s7, $sp, 24 # 8-byte Folded Spill + st.d $a3, $sp, 112 # 8-byte Folded Spill beqz $a0, .LBB19_98 # %bb.7: - move $s0, $zero + move $s8, $zero move $s3, $zero - st.d $s4, $sp, 104 # 8-byte Folded Spill + st.d $s8, $sp, 208 # 8-byte Folded Spill + st.d $s1, $sp, 96 # 8-byte Folded Spill ld.d $a2, $sp, 152 # 8-byte Folded Reload blez $a2, .LBB19_99 .LBB19_8: # %.preheader325.lr.ph - st.d $s0, $sp, 224 # 8-byte Folded Spill - st.d $s8, $sp, 72 # 8-byte Folded Spill - st.d $s6, $sp, 128 # 8-byte Folded Spill + st.d $s6, $sp, 72 # 8-byte Folded Spill + st.d $s4, $sp, 80 # 8-byte Folded Spill pcaddu18i $ra, %call36(__ctype_b_loc) jirl $ra, $ra, 0 move $s6, $a0 vrepli.b $vr0, 0 - vst $vr0, $sp, 192 # 16-byte Folded Spill + vst $vr0, $sp, 176 # 16-byte Folded Spill ori $fp, $zero, 95 ori $s0, $zero, 30 pcalau12i $a0, %pc_hi20(.L.str.11) addi.d $s7, $a0, %pc_lo12(.L.str.11) move $s8, $zero - ld.d $s1, $sp, 96 # 8-byte Folded Reload + ld.d $s1, $sp, 24 # 8-byte Folded Reload .p2align 4, , 16 .LBB19_9: # %.preheader325 # =>This Loop Header: Depth=1 # Child Loop BB19_10 Depth 2 ld.d $a1, $s1, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nametmp) move $a0, $zero - vld $vr0, $sp, 192 # 16-byte Folded Reload + vld $vr0, $sp, 176 # 16-byte Folded Reload vst $vr0, $a1, 8 st.d $zero, $a1, 22 st.d $zero, $a1, 0 @@ -4083,7 +4070,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r addi.d $a0, $a0, 1 bne $a0, $s0, .LBB19_10 # %bb.11: # in Loop: Header=BB19_9 Depth=1 - ld.d $a0, $sp, 168 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tree) st.b $zero, $a1, 30 slli.d $a2, $s8, 3 @@ -4097,16 +4084,17 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r ld.d $a0, $sp, 152 # 8-byte Folded Reload bne $s8, $a0, .LBB19_9 # %bb.12: # %.lr.ph.preheader - ld.d $s6, $sp, 128 # 8-byte Folded Reload - ld.d $a1, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) + ld.d $s5, $sp, 120 # 8-byte Folded Reload + ld.d $a1, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) move $a0, $zero move $a2, $zero ld.d $fp, $sp, 152 # 8-byte Folded Reload slli.d $a3, $fp, 4 alsl.d $a3, $fp, $a3, 3 - ld.d $s8, $sp, 72 # 8-byte Folded Reload - ld.d $t0, $sp, 104 # 8-byte Folded Reload - ld.d $s0, $sp, 224 # 8-byte Folded Reload + ld.d $s4, $sp, 80 # 8-byte Folded Reload + ld.d $s6, $sp, 72 # 8-byte Folded Reload + ld.d $t0, $sp, 96 # 8-byte Folded Reload + ld.d $t1, $sp, 208 # 8-byte Folded Reload .p2align 4, , 16 .LBB19_13: # %.lr.ph # =>This Inner Loop Header: Depth=1 @@ -4126,16 +4114,14 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r st.d $a3, $sp, 56 # 8-byte Folded Spill alsl.d $a2, $a3, $a2, 3 stx.d $zero, $a1, $a2 - ld.d $a1, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) + ld.d $a1, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) lu12i.w $a2, 280479 - pcalau12i $t1, %pc_hi20(.LCPI19_0) - fld.s $fa0, $t1, %pc_lo12(.LCPI19_0) addi.w $t2, $zero, -1 - ori $a2, $a2, 2458 - st.d $a2, $sp, 176 # 8-byte Folded Spill + ori $s0, $a2, 2458 st.d $t2, $sp, 64 # 8-byte Folded Spill lu32i.d $t2, 0 - ld.d $s5, $sp, 88 # 8-byte Folded Reload + movgr2fr.w $fa0, $s0 + ld.d $s8, $sp, 112 # 8-byte Folded Reload b .LBB19_16 .p2align 4, , 16 .LBB19_15: # %setnearest.exit @@ -4150,10 +4136,9 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r alsl.d $a3, $a0, $a3, 3 ldx.d $a4, $a1, $a3 slli.d $a5, $a0, 2 - alsl.d $a3, $a0, $s0, 2 - ld.d $a6, $sp, 176 # 8-byte Folded Reload - stx.w $a6, $s3, $a5 - stx.w $t2, $s0, $a5 + alsl.d $a3, $a0, $t1, 2 + stx.w $s0, $s3, $a5 + stx.w $t2, $t1, $a5 fmov.s $fa1, $fa0 beqz $a4, .LBB19_22 # %bb.17: # %.lr.ph.i @@ -4212,15 +4197,13 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r b .LBB19_23 .LBB19_27: # %.lr.ph338 st.d $t2, $sp, 32 # 8-byte Folded Spill - st.d $t1, $sp, 80 # 8-byte Folded Spill ld.d $a0, $t0, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tmptmplen) slli.d $a2, $fp, 2 move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.d $a0, $s8, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.hist) - ld.d $a1, $sp, 112 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nmemar) + ld.d $a0, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.hist) + ld.d $a1, $s4, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nmemar) ori $a3, $zero, 8 move $a2, $zero bltu $fp, $a3, .LBB19_32 @@ -4268,14 +4251,13 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r .LBB19_34: # %._crit_edge339 pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) - st.d $a0, $sp, 24 # 8-byte Folded Spill + st.d $a0, $sp, 16 # 8-byte Folded Spill ld.d $a1, $a0, 0 ori $a0, $zero, 10 pcaddu18i $ra, %call36(fputc) jirl $ra, $ra, 0 - ld.d $t3, $sp, 112 # 8-byte Folded Reload ori $a0, $zero, 1 - ld.d $t1, $sp, 80 # 8-byte Folded Reload + ld.d $s1, $sp, 208 # 8-byte Folded Reload beq $fp, $a0, .LBB19_100 # %bb.35: # %.lr.ph388.preheader lu12i.w $a0, -209716 @@ -4286,18 +4268,19 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r st.d $a0, $sp, 40 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.13) addi.d $a0, $a0, %pc_lo12(.L.str.13) - st.d $a0, $sp, 16 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI19_1) - fld.s $fs0, $a0, %pc_lo12(.LCPI19_1) + st.d $a0, $sp, 8 # 8-byte Folded Spill move $a2, $zero + lu12i.w $a0, 278431 + ori $a0, $a0, 2458 + movgr2fr.w $fs0, $a0 ld.d $s7, $sp, 64 # 8-byte Folded Reload + st.d $s0, $sp, 144 # 8-byte Folded Spill b .LBB19_37 .p2align 4, , 16 .LBB19_36: # %._crit_edge384 # in Loop: Header=BB19_37 Depth=1 - ld.d $a2, $sp, 184 # 8-byte Folded Reload addi.d $a2, $a2, 1 - ld.d $s1, $sp, 96 # 8-byte Folded Reload + ld.d $s8, $sp, 112 # 8-byte Folded Reload ld.d $a0, $sp, 56 # 8-byte Folded Reload beq $a2, $a0, .LBB19_100 .LBB19_37: # %.lr.ph388 @@ -4315,22 +4298,21 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r mul.d $a0, $a2, $a0 rotri.w $a0, $a0, 1 ld.d $a1, $sp, 40 # 8-byte Folded Reload - st.d $a2, $sp, 184 # 8-byte Folded Spill + st.d $a2, $sp, 168 # 8-byte Folded Spill bgeu $a1, $a0, .LBB19_42 # %bb.38: # in Loop: Header=BB19_37 Depth=1 - ld.d $a1, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) + ld.d $a1, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) ld.d $a0, $a1, 0 beqz $a0, .LBB19_43 .LBB19_39: # %.lr.ph346.preheader # in Loop: Header=BB19_37 Depth=1 - fld.s $fa0, $t1, %pc_lo12(.LCPI19_0) - ld.d $s6, $sp, 160 # 8-byte Folded Reload + movgr2fr.w $fa0, $s0 .p2align 4, , 16 .LBB19_40: # %.lr.ph346 # Parent Loop BB19_37 Depth=1 # => This Inner Loop Header: Depth=2 - ld.w $a2, $a1, 16 - slli.d $a1, $a2, 2 + ld.w $a5, $a1, 16 + slli.d $a1, $a5, 2 fldx.s $fa1, $s3, $a1 fcmp.clt.s $fcc0, $fa1, $fa0 move $a1, $a0 @@ -4338,66 +4320,62 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r fsel $fa0, $fa0, $fa1, $fcc0 movcf2gr $a3, $fcc0 masknez $a4, $s7, $a3 - maskeqz $a2, $a2, $a3 - or $s7, $a2, $a4 + maskeqz $a3, $a5, $a3 + or $s7, $a3, $a4 bnez $a0, .LBB19_40 # %bb.41: # %._crit_edge347.loopexit # in Loop: Header=BB19_37 Depth=1 vldi $vr1, -1184 fmul.s $fs1, $fa0, $fa1 - move $fp, $s7 - ld.d $a2, $sp, 184 # 8-byte Folded Reload + move $s0, $s7 b .LBB19_44 .LBB19_42: # in Loop: Header=BB19_37 Depth=1 - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 16 # 8-byte Folded Reload ld.d $a0, $a0, 0 - ld.d $a1, $sp, 16 # 8-byte Folded Reload - ld.d $a2, $sp, 184 # 8-byte Folded Reload + ld.d $a1, $sp, 8 # 8-byte Folded Reload ld.d $a3, $sp, 152 # 8-byte Folded Reload - move $fp, $t3 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 - move $t3, $fp - ld.d $a2, $sp, 184 # 8-byte Folded Reload - ld.d $t1, $sp, 80 # 8-byte Folded Reload - ld.d $a1, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) + ld.d $a2, $sp, 168 # 8-byte Folded Reload + ld.d $s1, $sp, 208 # 8-byte Folded Reload + ld.d $a1, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) ld.d $a0, $a1, 0 bnez $a0, .LBB19_39 .p2align 4, , 16 .LBB19_43: # in Loop: Header=BB19_37 Depth=1 - move $fp, $s7 + move $s0, $s7 fmov.s $fs1, $fs0 - ld.d $s6, $sp, 160 # 8-byte Folded Reload .LBB19_44: # %._crit_edge347 # in Loop: Header=BB19_37 Depth=1 - slli.d $a0, $fp, 2 - ldx.w $s1, $s0, $a0 - slt $a0, $s1, $fp + slli.d $a0, $s0, 2 + ldx.w $s1, $s1, $a0 + slt $a0, $s1, $s0 maskeqz $a1, $s1, $a0 - masknez $a0, $fp, $a0 + masknez $a0, $s0, $a0 or $s7, $a1, $a0 - ld.d $a3, $s8, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.hist) - ld.d $s8, $t3, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nmemar) - slli.d $s4, $a2, 3 - ldx.d $a0, $s6, $s4 - slli.d $s0, $s7, 2 - ldx.w $a1, $s8, $s0 - st.d $a3, $sp, 192 # 8-byte Folded Spill - ldx.w $s5, $a3, $s0 + ld.d $a1, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.hist) + ld.d $a3, $s4, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nmemar) + slli.d $s6, $a2, 3 + ldx.d $a0, $s8, $s6 + slli.d $fp, $s7, 2 + st.d $a3, $sp, 176 # 8-byte Folded Spill + ldx.w $a2, $a3, $fp + move $s4, $a1 + ldx.w $s5, $a1, $fp ld.d $a0, $a0, 0 - st.d $a1, $sp, 144 # 8-byte Folded Spill - slli.d $a1, $a1, 2 + st.d $a2, $sp, 136 # 8-byte Folded Spill + slli.d $a1, $a2, 2 addi.d $a1, $a1, 4 pcaddu18i $ra, %call36(realloc) jirl $ra, $ra, 0 - st.d $s4, $sp, 136 # 8-byte Folded Spill - ldx.d $a1, $s6, $s4 + st.d $s6, $sp, 128 # 8-byte Folded Spill + ldx.d $a1, $s8, $s6 st.d $a0, $a1, 0 ld.d $a2, $sp, 64 # 8-byte Folded Reload beq $s5, $a2, .LBB19_51 # %bb.45: # in Loop: Header=BB19_37 Depth=1 slli.d $a2, $s5, 3 - ldx.d $a2, $s6, $a2 + ldx.d $a2, $s8, $a2 ld.d $a5, $a2, 0 ld.d $a6, $a2, 8 ld.w $t0, $a5, 0 @@ -4430,6 +4408,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r # %bb.48: # %.preheader320.loopexit # in Loop: Header=BB19_37 Depth=1 ld.w $a4, $a3, 0 + ld.d $a6, $sp, 168 # 8-byte Folded Reload beq $a4, $a2, .LBB19_53 .LBB19_49: # %.lr.ph358.preheader # in Loop: Header=BB19_37 Depth=1 @@ -4450,12 +4429,14 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r st.w $s7, $a0, 0 ld.d $a2, $sp, 32 # 8-byte Folded Reload st.w $a2, $a0, 4 + ld.d $a6, $sp, 168 # 8-byte Folded Reload b .LBB19_55 .p2align 4, , 16 .LBB19_52: # in Loop: Header=BB19_37 Depth=1 masknez $a4, $t1, $a7 maskeqz $a5, $t0, $a7 or $a4, $a5, $a4 + ld.d $a6, $sp, 168 # 8-byte Folded Reload bne $a4, $a2, .LBB19_49 .LBB19_53: # in Loop: Header=BB19_37 Depth=1 move $a5, $a0 @@ -4464,32 +4445,30 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r lu32i.d $a2, 0 st.w $a2, $a5, 0 .LBB19_55: # in Loop: Header=BB19_37 Depth=1 - slt $a0, $fp, $s1 + slt $a0, $s0, $s1 maskeqz $a2, $s1, $a0 - masknez $a0, $fp, $a0 - or $fp, $a2, $a0 - slli.d $s5, $fp, 2 - move $s4, $s8 - ldx.w $s8, $s8, $s5 - ld.d $a0, $sp, 192 # 8-byte Folded Reload - ldx.w $s6, $a0, $s5 + masknez $a0, $s0, $a0 + or $s1, $a2, $a0 + slli.d $s6, $s1, 2 + ld.d $a0, $sp, 176 # 8-byte Folded Reload + ldx.w $s0, $a0, $s6 + ldx.w $s5, $s4, $s6 ld.d $a0, $a1, 8 - ld.d $a1, $sp, 160 # 8-byte Folded Reload - ld.d $a2, $sp, 184 # 8-byte Folded Reload - alsl.d $s1, $a2, $a1, 3 - slli.d $a1, $s8, 2 + alsl.d $s8, $a6, $s8, 3 + slli.d $a1, $s0, 2 addi.d $a1, $a1, 4 pcaddu18i $ra, %call36(realloc) jirl $ra, $ra, 0 - ld.d $a1, $s1, 0 + ld.d $a1, $s8, 0 st.d $a0, $a1, 8 beqz $a0, .LBB19_101 # %bb.56: # in Loop: Header=BB19_37 Depth=1 - addi.w $s1, $zero, -1 - beq $s6, $s1, .LBB19_63 + addi.w $s8, $zero, -1 + ld.d $t1, $sp, 208 # 8-byte Folded Reload + beq $s5, $s8, .LBB19_63 # %bb.57: # in Loop: Header=BB19_37 Depth=1 - slli.d $a1, $s6, 3 - ld.d $a2, $sp, 160 # 8-byte Folded Reload + slli.d $a1, $s5, 3 + ld.d $a2, $sp, 112 # 8-byte Folded Reload ldx.d $a1, $a2, $a1 ld.d $a3, $a1, 0 ld.d $a4, $a1, 8 @@ -4503,7 +4482,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r masknez $a1, $a4, $a5 maskeqz $t0, $a3, $a5 or $a1, $t0, $a1 - beq $a2, $s1, .LBB19_64 + beq $a2, $s8, .LBB19_64 # %bb.58: # %.lr.ph365.preheader # in Loop: Header=BB19_37 Depth=1 masknez $a3, $a3, $a5 @@ -4518,12 +4497,12 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r ld.w $a2, $a3, 0 addi.d $a0, $a0, 4 addi.d $a3, $a3, 4 - bne $a2, $s1, .LBB19_59 + bne $a2, $s8, .LBB19_59 # %bb.60: # %.preheader.loopexit # in Loop: Header=BB19_37 Depth=1 ld.w $a2, $a1, 0 - ld.d $s6, $sp, 128 # 8-byte Folded Reload - beq $a2, $s1, .LBB19_65 + ld.d $s5, $sp, 120 # 8-byte Folded Reload + beq $a2, $s8, .LBB19_65 .LBB19_61: # %.lr.ph370.preheader # in Loop: Header=BB19_37 Depth=1 addi.d $a1, $a1, 4 @@ -4536,98 +4515,97 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r addi.d $a3, $a0, 4 addi.d $a1, $a1, 4 move $a0, $a3 - bne $a2, $s1, .LBB19_62 + bne $a2, $s8, .LBB19_62 b .LBB19_66 .p2align 4, , 16 .LBB19_63: # in Loop: Header=BB19_37 Depth=1 - st.w $fp, $a0, 0 - move $a1, $s1 + st.w $s1, $a0, 0 + move $a1, $s8 lu32i.d $a1, 0 st.w $a1, $a0, 4 - ld.d $s6, $sp, 128 # 8-byte Folded Reload + ld.d $s5, $sp, 120 # 8-byte Folded Reload b .LBB19_67 .p2align 4, , 16 .LBB19_64: # in Loop: Header=BB19_37 Depth=1 masknez $a2, $a7, $a5 maskeqz $a3, $a6, $a5 or $a2, $a3, $a2 - ld.d $s6, $sp, 128 # 8-byte Folded Reload - bne $a2, $s1, .LBB19_61 + ld.d $s5, $sp, 120 # 8-byte Folded Reload + bne $a2, $s8, .LBB19_61 .LBB19_65: # in Loop: Header=BB19_37 Depth=1 move $a3, $a0 .LBB19_66: # %._crit_edge371 # in Loop: Header=BB19_37 Depth=1 - move $a0, $s1 + move $a0, $s8 lu32i.d $a0, 0 st.w $a0, $a3, 0 .LBB19_67: # in Loop: Header=BB19_37 Depth=1 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tmptmplen) - fldx.s $fa0, $a0, $s0 - ld.d $a1, $sp, 120 # 8-byte Folded Reload - ld.d $a2, $sp, 136 # 8-byte Folded Reload + fldx.s $fa0, $a0, $fp + ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a2, $sp, 128 # 8-byte Folded Reload ldx.d $a1, $a1, $a2 fsub.s $fa0, $fs1, $fa0 fst.s $fa0, $a1, 0 - fldx.s $fa0, $a0, $s5 - ld.d $a2, $sp, 192 # 8-byte Folded Reload - alsl.d $a2, $s7, $a2, 2 - alsl.d $a3, $s7, $s4, 2 + fldx.s $fa0, $a0, $s6 + alsl.d $a2, $s7, $s4, 2 + ld.d $a3, $sp, 176 # 8-byte Folded Reload + alsl.d $a3, $s7, $a3, 2 fsub.s $fa0, $fs1, $fa0 fst.s $fa0, $a1, 4 - fstx.s $fs1, $a0, $s0 - ld.d $s5, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) - ld.d $a0, $sp, 184 # 8-byte Folded Reload + fstx.s $fs1, $a0, $fp + ld.d $s6, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) + ld.d $a0, $sp, 168 # 8-byte Folded Reload st.w $a0, $a2, 0 - ld.d $a0, $sp, 144 # 8-byte Folded Reload - add.d $a0, $s8, $a0 + ld.d $a0, $sp, 136 # 8-byte Folded Reload + add.d $a0, $s0, $a0 st.w $a0, $a3, 0 - ld.d $a0, $sp, 176 # 8-byte Folded Reload - stx.w $a0, $s3, $s0 - beqz $s5, .LBB19_81 + ld.d $a0, $sp, 144 # 8-byte Folded Reload + stx.w $a0, $s3, $fp + beqz $s6, .LBB19_81 # %bb.68: # %.lr.ph377 # in Loop: Header=BB19_37 Depth=1 - ld.d $a0, $sp, 120 # 8-byte Folded Reload - ld.d $a1, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 168 # 8-byte Folded Reload alsl.d $a0, $a1, $a0, 3 - st.d $a0, $sp, 144 # 8-byte Folded Spill - alsl.d $s8, $s7, $s3, 2 - ld.d $a0, $sp, 224 # 8-byte Folded Reload - alsl.d $a0, $s7, $a0, 2 - st.d $a0, $sp, 192 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill + alsl.d $s0, $s7, $s3, 2 + alsl.d $a0, $s7, $t1, 2 + st.d $a0, $sp, 176 # 8-byte Folded Spill b .LBB19_70 .p2align 4, , 16 .LBB19_69: # in Loop: Header=BB19_70 Depth=2 - ld.d $s5, $s5, 0 - beqz $s5, .LBB19_80 + ld.d $s6, $s6, 0 + beqz $s6, .LBB19_80 .LBB19_70: # Parent Loop BB19_37 Depth=1 # => This Inner Loop Header: Depth=2 - ld.w $s6, $s5, 16 - beq $s6, $s7, .LBB19_69 + ld.w $s5, $s6, 16 + beq $s5, $s7, .LBB19_69 # %bb.71: # in Loop: Header=BB19_70 Depth=2 - beq $s6, $fp, .LBB19_69 + beq $s5, $s1, .LBB19_69 # %bb.72: # in Loop: Header=BB19_70 Depth=2 - move $a1, $s6 - move $a2, $s6 - move $a0, $fp + move $a1, $s5 + move $a2, $s5 + move $a0, $s1 move $a3, $s7 - move $a4, $s6 - blt $s6, $s7, .LBB19_74 + move $a4, $s5 + blt $s5, $s7, .LBB19_74 # %bb.73: # in Loop: Header=BB19_70 Depth=2 - slt $a0, $fp, $s6 - masknez $a1, $fp, $a0 - maskeqz $a0, $s6, $a0 + slt $a0, $s1, $s5 + masknez $a1, $s1, $a0 + maskeqz $a0, $s5, $a0 or $a0, $a0, $a1 - slt $a1, $s6, $fp - masknez $a2, $fp, $a1 - maskeqz $a1, $s6, $a1 + slt $a1, $s5, $s1 + masknez $a2, $s1, $a1 + maskeqz $a1, $s5, $a1 or $a1, $a1, $a2 move $a2, $s7 - move $a3, $s6 + move $a3, $s5 move $a4, $s7 .LBB19_74: # in Loop: Header=BB19_70 Depth=2 - slli.d $s0, $a2, 3 - ldx.d $a2, $s2, $s0 + slli.d $fp, $a2, 3 + ldx.d $a2, $s2, $fp sub.w $a3, $a3, $a4 slli.d $s4, $a3, 2 slli.d $a3, $a1, 3 @@ -4636,59 +4614,59 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r sub.w $a0, $a0, $a1 slli.d $a0, $a0, 2 fldx.s $fa1, $a3, $a0 - ld.d $a0, $sp, 216 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload jirl $ra, $a0, 0 - ldx.d $a0, $s2, $s0 + ldx.d $a0, $s2, $fp fstx.s $fa0, $a0, $s4 - slli.d $a0, $s6, 2 + slli.d $a0, $s5, 2 fldx.s $fa1, $s3, $a0 fcmp.cule.s $fcc0, $fa1, $fa0 - ld.d $a2, $sp, 224 # 8-byte Folded Reload + ld.d $a2, $sp, 208 # 8-byte Folded Reload bceqz $fcc0, .LBB19_77 # %bb.75: # in Loop: Header=BB19_70 Depth=2 - fld.s $fa1, $s8, 0 + fld.s $fa1, $s0, 0 fcmp.cule.s $fcc0, $fa1, $fa0 bceqz $fcc0, .LBB19_78 .LBB19_76: # in Loop: Header=BB19_70 Depth=2 ldx.w $a0, $a2, $a0 - bne $a0, $fp, .LBB19_69 + bne $a0, $s1, .LBB19_69 b .LBB19_79 .p2align 4, , 16 .LBB19_77: # in Loop: Header=BB19_70 Depth=2 - alsl.d $a1, $s6, $s3, 2 + alsl.d $a1, $s5, $s3, 2 fst.s $fa0, $a1, 0 stx.w $s7, $a2, $a0 - fld.s $fa1, $s8, 0 + fld.s $fa1, $s0, 0 fcmp.cule.s $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB19_76 .LBB19_78: # in Loop: Header=BB19_70 Depth=2 - fst.s $fa0, $s8, 0 - ld.d $a1, $sp, 192 # 8-byte Folded Reload - st.w $s6, $a1, 0 + fst.s $fa0, $s0, 0 + ld.d $a1, $sp, 176 # 8-byte Folded Reload + st.w $s5, $a1, 0 ldx.w $a0, $a2, $a0 - bne $a0, $fp, .LBB19_69 + bne $a0, $s1, .LBB19_69 .LBB19_79: # in Loop: Header=BB19_70 Depth=2 - alsl.d $a0, $s6, $a2, 2 + alsl.d $a0, $s5, $a2, 2 st.w $s7, $a0, 0 b .LBB19_69 .p2align 4, , 16 .LBB19_80: # %._crit_edge378.loopexit # in Loop: Header=BB19_37 Depth=1 - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload ld.d $a1, $a0, 0 - ld.d $s6, $sp, 128 # 8-byte Folded Reload + ld.d $s5, $sp, 120 # 8-byte Folded Reload .LBB19_81: # %._crit_edge378 # in Loop: Header=BB19_37 Depth=1 - ld.d $s8, $sp, 168 # 8-byte Folded Reload - ld.d $a3, $s8, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tree) - ld.d $s5, $sp, 88 # 8-byte Folded Reload - ld.d $a0, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.treetmp) - slli.d $s4, $s7, 3 - ldx.d $a2, $a3, $s4 + ld.d $s6, $sp, 160 # 8-byte Folded Reload + ld.d $a3, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tree) + ld.d $s4, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $s4, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.treetmp) + slli.d $s0, $s7, 3 + ldx.d $a2, $a3, $s0 fld.s $fa0, $a1, 0 - slli.d $s0, $fp, 3 + slli.d $fp, $s1, 3 fld.s $fa1, $a1, 4 - ldx.d $a4, $a3, $s0 + ldx.d $a4, $a3, $fp fcvt.d.s $fa0, $fa0 fcvt.d.s $fa1, $fa1 movfr2gr.d $a3, $fa0 @@ -4697,14 +4675,14 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r addi.d $a1, $a1, %pc_lo12(.L.str.16) pcaddu18i $ra, %call36(sprintf) jirl $ra, $ra, 0 - ld.d $a0, $s8, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tree) - ldx.d $a0, $a0, $s4 - ld.d $a1, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.treetmp) + ld.d $a0, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tree) + ldx.d $a0, $a0, $s0 + ld.d $a1, $s4, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.treetmp) pcaddu18i $ra, %call36(strcpy) jirl $ra, $ra, 0 - ld.d $a1, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) - slli.d $a0, $fp, 4 - alsl.d $a2, $fp, $a0, 3 + ld.d $a1, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) + slli.d $a0, $s1, 4 + alsl.d $a2, $s1, $a0, 3 add.d $a0, $a1, $a2 ld.d $a0, $a0, 8 ldx.d $a1, $a1, $a2 @@ -4713,16 +4691,16 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r # %bb.82: # in Loop: Header=BB19_37 Depth=1 st.d $a0, $a1, 8 .LBB19_83: # in Loop: Header=BB19_37 Depth=1 - ldx.d $a0, $s2, $s0 + ldx.d $a0, $s2, $fp pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) - stx.d $zero, $s2, $s0 - ld.d $t3, $sp, 112 # 8-byte Folded Reload - ld.d $s8, $sp, 72 # 8-byte Folded Reload - ld.d $s0, $sp, 224 # 8-byte Folded Reload - ld.d $t1, $sp, 80 # 8-byte Folded Reload - ld.d $t2, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) + stx.d $zero, $s2, $fp + ld.d $s4, $sp, 80 # 8-byte Folded Reload + ld.d $s6, $sp, 72 # 8-byte Folded Reload + ld.d $s1, $sp, 208 # 8-byte Folded Reload + ld.d $s0, $sp, 144 # 8-byte Folded Reload + ld.d $a2, $sp, 168 # 8-byte Folded Reload beqz $a0, .LBB19_36 # %bb.84: # %.lr.ph383.preheader # in Loop: Header=BB19_37 Depth=1 @@ -4738,26 +4716,26 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r # => This Loop Header: Depth=2 # Child Loop BB19_90 Depth 3 # Child Loop BB19_95 Depth 3 - ld.w $a2, $a1, 16 - slli.d $a6, $a2, 2 - ldx.w $a3, $s0, $a6 + ld.w $t1, $a1, 16 + slli.d $a6, $t1, 2 + ldx.w $a3, $s1, $a6 bne $a3, $s7, .LBB19_85 # %bb.87: # in Loop: Header=BB19_86 Depth=2 - alsl.d $a3, $a2, $s0, 2 - slli.d $a4, $a2, 4 - alsl.d $a4, $a2, $a4, 3 + alsl.d $a3, $t1, $s1, 2 + slli.d $a4, $t1, 4 + alsl.d $a4, $t1, $a4, 3 ldx.d $a5, $a0, $a4 - alsl.d $a4, $a2, $s3, 2 - stx.w $t2, $s3, $a6 - move $a6, $s1 + alsl.d $a4, $t1, $s3, 2 + stx.w $s0, $s3, $a6 + move $a6, $s8 lu32i.d $a6, 0 st.w $a6, $a3, 0 beqz $a5, .LBB19_92 # %bb.88: # %.lr.ph.i308 # in Loop: Header=BB19_86 Depth=2 - slli.d $a6, $a2, 3 + slli.d $a6, $t1, 3 ldx.d $a6, $s2, $a6 - fld.s $fa0, $t1, %pc_lo12(.LCPI19_0) + movgr2fr.w $fa0, $s0 b .LBB19_90 .p2align 4, , 16 .LBB19_89: # in Loop: Header=BB19_90 Depth=3 @@ -4767,7 +4745,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r # Parent Loop BB19_86 Depth=2 # => This Inner Loop Header: Depth=3 ld.w $a7, $a5, 16 - sub.w $t0, $a7, $a2 + sub.w $t0, $a7, $t1 slli.d $t0, $t0, 2 fldx.s $fa1, $a6, $t0 fcmp.cule.s $fcc0, $fa0, $fa1 @@ -4778,7 +4756,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r fmov.s $fa0, $fa1 b .LBB19_89 .LBB19_92: # in Loop: Header=BB19_86 Depth=2 - fld.s $fa0, $t1, %pc_lo12(.LCPI19_0) + movgr2fr.w $fa0, $s0 .LBB19_93: # %.lr.ph42.i315.preheader # in Loop: Header=BB19_86 Depth=2 move $a5, $a0 @@ -4792,11 +4770,11 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r # Parent Loop BB19_86 Depth=2 # => This Inner Loop Header: Depth=3 ld.w $a6, $a5, 16 - beq $a2, $a6, .LBB19_85 + beq $t1, $a6, .LBB19_85 # %bb.96: # in Loop: Header=BB19_95 Depth=3 slli.d $a7, $a6, 3 ldx.d $a7, $s2, $a7 - sub.w $t0, $a2, $a6 + sub.w $t0, $t1, $a6 slli.d $t0, $t0, 2 fldx.s $fa1, $a7, $t0 fcmp.cule.s $fcc0, $fa0, $fa1 @@ -4813,23 +4791,22 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 ld.w $a1, $fp, 0 - st.d $a0, $s8, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.hist) + st.d $a0, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.hist) move $a0, $a1 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 ld.w $s3, $fp, 0 - st.d $a0, $s4, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tmptmplen) + st.d $a0, $s1, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tmptmplen) slli.d $a0, $s3, 4 alsl.d $a0, $s3, $a0, 3 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - st.d $a0, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) + st.d $a0, $s0, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) move $a0, $s3 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 ld.w $a1, $fp, 0 - ld.d $a2, $sp, 112 # 8-byte Folded Reload - st.d $a0, $a2, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nmemar) + st.d $a0, $s4, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nmemar) move $a0, $a1 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 @@ -4839,30 +4816,31 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 ld.w $a1, $fp, 0 - move $s1, $a0 - ori $a0, $zero, 50 - mul.w $a0, $a1, $a0 + move $s8, $a0 + ori $s0, $zero, 50 + mul.w $a0, $a1, $s0 pcaddu18i $ra, %call36(AllocateCharVec) jirl $ra, $ra, 0 - st.d $a0, $s0, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.treetmp) - move $s0, $s1 + ld.d $a1, $sp, 88 # 8-byte Folded Reload + st.d $a0, $a1, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.treetmp) ori $a0, $zero, 30 pcaddu18i $ra, %call36(AllocateCharVec) jirl $ra, $ra, 0 ld.w $a2, $fp, 0 st.d $a0, $s7, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nametmp) - ori $a0, $zero, 50 - mul.w $a1, $a2, $a0 + mul.w $a1, $a2, $s0 move $a0, $a2 pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 168 # 8-byte Folded Reload + ld.d $a1, $sp, 160 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tree) - st.d $s4, $sp, 104 # 8-byte Folded Spill + st.d $s8, $sp, 208 # 8-byte Folded Spill + st.d $s1, $sp, 96 # 8-byte Folded Spill ld.d $a2, $sp, 152 # 8-byte Folded Reload bgtz $a2, .LBB19_8 .LBB19_99: # %.preheader322.thread - ld.d $a0, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) + ld.d $s5, $sp, 120 # 8-byte Folded Reload + ld.d $a0, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) slli.d $a1, $a2, 4 alsl.d $a1, $a2, $a1, 3 add.d $a0, $a0, $a1 @@ -4873,18 +4851,16 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r ori $a0, $zero, 10 pcaddu18i $ra, %call36(fputc) jirl $ra, $ra, 0 - ld.d $s1, $sp, 96 # 8-byte Folded Reload - ld.d $s5, $sp, 88 # 8-byte Folded Reload - ld.d $t3, $sp, 112 # 8-byte Folded Reload + ld.d $s1, $sp, 208 # 8-byte Folded Reload .LBB19_100: # %._crit_edge389 pcalau12i $a0, %pc_hi20(.L.str.17) addi.d $a0, $a0, %pc_lo12(.L.str.17) pcalau12i $a1, %pc_hi20(.L.str.18) addi.d $a1, $a1, %pc_lo12(.L.str.18) - move $s2, $t3 pcaddu18i $ra, %call36(fopen) jirl $ra, $ra, 0 - ld.d $a2, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.treetmp) + ld.d $s0, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $s0, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.treetmp) move $fp, $a0 pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $a1, $a0, %pc_lo12(.L.str.4) @@ -4894,55 +4870,56 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r move $a0, $fp pcaddu18i $ra, %call36(fclose) jirl $ra, $ra, 0 - ld.d $a0, $sp, 168 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tree) pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 - ld.d $a0, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.treetmp) + ld.d $a0, $s0, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.treetmp) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $s1, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nametmp) + ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $a0, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nametmp) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $fp, $sp, 104 # 8-byte Folded Reload + ld.d $fp, $sp, 96 # 8-byte Folded Reload ld.d $a0, $fp, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tmptmplen) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $s8, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.hist) + ld.d $a0, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.hist) st.d $zero, $fp, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.tmptmplen) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) - st.d $zero, $s8, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.hist) + ld.d $a0, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) + st.d $zero, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.hist) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $s2, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nmemar) - st.d $zero, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) + ld.d $a0, $s4, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nmemar) + st.d $zero, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.ac) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - st.d $zero, $s2, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nmemar) + st.d $zero, $s4, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx_treeout.nmemar) move $a0, $s3 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - move $a0, $s0 - fld.d $fs1, $sp, 232 # 8-byte Folded Reload - fld.d $fs0, $sp, 240 # 8-byte Folded Reload - ld.d $s8, $sp, 248 # 8-byte Folded Reload - ld.d $s7, $sp, 256 # 8-byte Folded Reload - ld.d $s6, $sp, 264 # 8-byte Folded Reload - ld.d $s5, $sp, 272 # 8-byte Folded Reload - ld.d $s4, $sp, 280 # 8-byte Folded Reload - ld.d $s3, $sp, 288 # 8-byte Folded Reload - ld.d $s2, $sp, 296 # 8-byte Folded Reload - ld.d $s1, $sp, 304 # 8-byte Folded Reload - ld.d $s0, $sp, 312 # 8-byte Folded Reload - ld.d $fp, $sp, 320 # 8-byte Folded Reload - ld.d $ra, $sp, 328 # 8-byte Folded Reload - addi.d $sp, $sp, 336 + move $a0, $s1 + fld.d $fs1, $sp, 216 # 8-byte Folded Reload + fld.d $fs0, $sp, 224 # 8-byte Folded Reload + ld.d $s8, $sp, 232 # 8-byte Folded Reload + ld.d $s7, $sp, 240 # 8-byte Folded Reload + ld.d $s6, $sp, 248 # 8-byte Folded Reload + ld.d $s5, $sp, 256 # 8-byte Folded Reload + ld.d $s4, $sp, 264 # 8-byte Folded Reload + ld.d $s3, $sp, 272 # 8-byte Folded Reload + ld.d $s2, $sp, 280 # 8-byte Folded Reload + ld.d $s1, $sp, 288 # 8-byte Folded Reload + ld.d $s0, $sp, 296 # 8-byte Folded Reload + ld.d $fp, $sp, 304 # 8-byte Folded Reload + ld.d $ra, $sp, 312 # 8-byte Folded Reload + addi.d $sp, $sp, 320 pcaddu18i $t8, %call36(free) jr $t8 .LBB19_101: - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 16 # 8-byte Folded Reload ld.d $a3, $a0, 0 pcalau12i $a0, %pc_hi20(.L.str.15) addi.d $a0, $a0, %pc_lo12(.L.str.15) @@ -4967,14 +4944,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx_treeout: # @fixed_musclesupg_float_r .Lfunc_end19: .size fixed_musclesupg_float_realloc_nobk_halfmtx_treeout, .Lfunc_end19-fixed_musclesupg_float_realloc_nobk_halfmtx_treeout # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function fixed_musclesupg_float_realloc_nobk_halfmtx -.LCPI20_0: - .word 0x4479f99a # float 999.900024 -.LCPI20_1: - .word 0x43f9f99a # float 499.950012 - .text - .globl fixed_musclesupg_float_realloc_nobk_halfmtx + .globl fixed_musclesupg_float_realloc_nobk_halfmtx # -- Begin function fixed_musclesupg_float_realloc_nobk_halfmtx .p2align 5 .type fixed_musclesupg_float_realloc_nobk_halfmtx,@function fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_nobk_halfmtx @@ -5029,17 +4999,18 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n addi.d $a0, $a0, %pc_lo12(cluster_minimum_float) .LBB20_6: st.d $a0, $sp, 168 # 8-byte Folded Spill - pcalau12i $s7, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx.hist) - ld.d $a0, $s7, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.hist) - pcalau12i $s1, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx.tmptmplen) + pcalau12i $s8, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx.hist) + ld.d $a0, $s8, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.hist) + pcalau12i $a1, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx.tmptmplen) + st.d $a1, $sp, 104 # 8-byte Folded Spill pcalau12i $s6, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx.ac) - pcalau12i $s5, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx.nmemar) - st.d $s1, $sp, 104 # 8-byte Folded Spill + pcalau12i $s1, %pc_hi20(fixed_musclesupg_float_realloc_nobk_halfmtx.nmemar) beqz $a0, .LBB20_92 # %bb.7: - move $s8, $zero + move $a0, $zero move $s3, $zero - ld.d $a0, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.ac) + ld.d $a6, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.ac) + st.d $a0, $sp, 176 # 8-byte Folded Spill blez $s0, .LBB20_93 .LBB20_8: # %.lr.ph.preheader ld.d $a1, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.ac) @@ -5050,19 +5021,19 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n .p2align 4, , 16 .LBB20_9: # %.lr.ph # =>This Inner Loop Header: Depth=1 - add.d $a5, $a0, $a2 + add.d $a5, $a6, $a2 addi.d $a5, $a5, 24 - stx.d $a5, $a0, $a2 - add.d $a0, $a1, $a2 - addi.d $a5, $a0, -24 - st.d $a5, $a0, 8 - st.w $a3, $a0, 16 + stx.d $a5, $a6, $a2 + add.d $a6, $a1, $a2 + addi.d $a5, $a6, -24 + st.d $a5, $a6, 8 + st.w $a3, $a6, 16 addi.d $a2, $a2, 24 addi.d $a3, $a3, 1 - move $a0, $a1 + move $a6, $a1 bne $a4, $a2, .LBB20_9 # %bb.10: # %.lr.ph298 - move $a0, $zero + move $t0, $zero addi.w $a3, $s0, -1 slli.d $a2, $a3, 4 st.d $a3, $sp, 80 # 8-byte Folded Spill @@ -5070,36 +5041,33 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n stx.d $zero, $a1, $a2 ld.d $a1, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.ac) lu12i.w $a2, 280479 - pcalau12i $fp, %pc_hi20(.LCPI20_0) - fld.s $fa0, $fp, %pc_lo12(.LCPI20_0) - addi.w $t0, $zero, -1 - ori $a2, $a2, 2458 - st.d $a2, $sp, 160 # 8-byte Folded Spill - st.d $t0, $sp, 88 # 8-byte Folded Spill - lu32i.d $t0, 0 + addi.w $t1, $zero, -1 + ori $fp, $a2, 2458 + st.d $t1, $sp, 88 # 8-byte Folded Spill + lu32i.d $t1, 0 + movgr2fr.w $fa0, $fp b .LBB20_12 .p2align 4, , 16 .LBB20_11: # %setnearest.exit # in Loop: Header=BB20_12 Depth=1 - addi.d $a0, $a0, 1 - beq $a0, $s0, .LBB20_23 + addi.d $t0, $t0, 1 + beq $t0, $s0, .LBB20_23 .LBB20_12: # =>This Loop Header: Depth=1 # Child Loop BB20_15 Depth 2 # Child Loop BB20_20 Depth 2 - alsl.d $a2, $a0, $s3, 2 - slli.d $a3, $a0, 4 - alsl.d $a3, $a0, $a3, 3 + alsl.d $a2, $t0, $s3, 2 + slli.d $a3, $t0, 4 + alsl.d $a3, $t0, $a3, 3 ldx.d $a4, $a1, $a3 - slli.d $a5, $a0, 2 - alsl.d $a3, $a0, $s8, 2 - ld.d $a6, $sp, 160 # 8-byte Folded Reload - stx.w $a6, $s3, $a5 - stx.w $t0, $s8, $a5 + slli.d $a5, $t0, 2 + alsl.d $a3, $t0, $a0, 2 + stx.w $fp, $s3, $a5 + stx.w $t1, $a0, $a5 fmov.s $fa1, $fa0 beqz $a4, .LBB20_18 # %bb.13: # %.lr.ph.i # in Loop: Header=BB20_12 Depth=1 - slli.d $a5, $a0, 3 + slli.d $a5, $t0, 3 ldx.d $a5, $s2, $a5 fmov.s $fa1, $fa0 b .LBB20_15 @@ -5110,7 +5078,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n .LBB20_15: # Parent Loop BB20_12 Depth=1 # => This Inner Loop Header: Depth=2 ld.w $a6, $a4, 16 - sub.w $a7, $a6, $a0 + sub.w $a7, $a6, $t0 slli.d $a7, $a7, 2 fldx.s $fa2, $a5, $a7 fcmp.cule.s $fcc0, $fa1, $fa2 @@ -5136,12 +5104,12 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n # Parent Loop BB20_12 Depth=1 # => This Inner Loop Header: Depth=2 ld.wu $a5, $a4, 16 - beq $a0, $a5, .LBB20_11 + beq $t0, $a5, .LBB20_11 # %bb.21: # in Loop: Header=BB20_20 Depth=2 addi.w $a6, $a5, 0 slli.d $a6, $a6, 3 ldx.d $a6, $s2, $a6 - sub.w $a7, $a0, $a5 + sub.w $a7, $t0, $a5 slli.d $a7, $a7, 2 fldx.s $fa2, $a6, $a7 fcmp.cule.s $fcc0, $fa1, $fa2 @@ -5152,15 +5120,15 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n fmov.s $fa1, $fa2 b .LBB20_19 .LBB20_23: # %.lr.ph302 - st.d $t0, $sp, 32 # 8-byte Folded Spill + st.d $t1, $sp, 40 # 8-byte Folded Spill ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.tmptmplen) slli.d $a2, $s0, 2 move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.d $a0, $s7, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.hist) - ld.d $a1, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.nmemar) + ld.d $a0, $s8, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.hist) + ld.d $a1, $s1, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.nmemar) ori $a3, $zero, 8 move $a2, $zero bltu $s0, $a3, .LBB20_28 @@ -5208,13 +5176,14 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n .LBB20_30: # %._crit_edge303 pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) - st.d $a0, $sp, 24 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill ld.d $a1, $a0, 0 ori $a0, $zero, 10 pcaddu18i $ra, %call36(fputc) jirl $ra, $ra, 0 ori $a0, $zero, 1 - st.d $s0, $sp, 16 # 8-byte Folded Spill + st.d $s0, $sp, 24 # 8-byte Folded Spill + ld.d $t1, $sp, 176 # 8-byte Folded Reload beq $s0, $a0, .LBB20_94 # %bb.31: # %.lr.ph352.preheader lu12i.w $a0, -209716 @@ -5225,22 +5194,22 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n st.d $a0, $sp, 64 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.13) addi.d $a0, $a0, %pc_lo12(.L.str.13) - st.d $a0, $sp, 8 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI20_1) - fld.s $fs0, $a0, %pc_lo12(.LCPI20_1) + st.d $a0, $sp, 16 # 8-byte Folded Spill move $a2, $zero + lu12i.w $a0, 278431 + ori $a0, $a0, 2458 + movgr2fr.w $fs0, $a0 ld.d $s0, $sp, 88 # 8-byte Folded Reload - st.d $s4, $sp, 136 # 8-byte Folded Spill - st.d $s6, $sp, 128 # 8-byte Folded Spill - st.d $s7, $sp, 48 # 8-byte Folded Spill - st.d $fp, $sp, 40 # 8-byte Folded Spill - st.d $s5, $sp, 56 # 8-byte Folded Spill - st.d $s8, $sp, 176 # 8-byte Folded Spill + st.d $s4, $sp, 144 # 8-byte Folded Spill + st.d $s1, $sp, 56 # 8-byte Folded Spill + st.d $s6, $sp, 136 # 8-byte Folded Spill + st.d $s8, $sp, 48 # 8-byte Folded Spill + st.d $fp, $sp, 128 # 8-byte Folded Spill b .LBB20_33 .p2align 4, , 16 .LBB20_32: # %._crit_edge348 # in Loop: Header=BB20_33 Depth=1 - ld.d $a2, $sp, 144 # 8-byte Folded Reload + ld.d $a2, $sp, 152 # 8-byte Folded Reload addi.d $a2, $a2, 1 ld.d $a0, $sp, 80 # 8-byte Folded Reload beq $a2, $a0, .LBB20_94 @@ -5259,7 +5228,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n mul.d $a0, $a2, $a0 rotri.w $a0, $a0, 1 ld.d $a1, $sp, 64 # 8-byte Folded Reload - st.d $a2, $sp, 144 # 8-byte Folded Spill + st.d $a2, $sp, 152 # 8-byte Folded Spill bgeu $a1, $a0, .LBB20_38 # %bb.34: # in Loop: Header=BB20_33 Depth=1 ld.d $a1, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.ac) @@ -5267,7 +5236,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n beqz $a0, .LBB20_39 .LBB20_35: # %.lr.ph310.preheader # in Loop: Header=BB20_33 Depth=1 - fld.s $fa0, $fp, %pc_lo12(.LCPI20_0) + movgr2fr.w $fa0, $fp .p2align 4, , 16 .LBB20_36: # %.lr.ph310 # Parent Loop BB20_33 Depth=1 @@ -5288,53 +5257,56 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n # in Loop: Header=BB20_33 Depth=1 vldi $vr1, -1184 fmul.s $fs1, $fa0, $fa1 - move $fp, $s0 - ld.d $a2, $sp, 144 # 8-byte Folded Reload + move $s5, $s0 + ld.d $a2, $sp, 152 # 8-byte Folded Reload b .LBB20_40 .LBB20_38: # in Loop: Header=BB20_33 Depth=1 - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.d $a0, $a0, 0 - ld.d $a1, $sp, 8 # 8-byte Folded Reload - ld.d $a3, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 16 # 8-byte Folded Reload + ld.d $a2, $sp, 152 # 8-byte Folded Reload + ld.d $a3, $sp, 24 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 - ld.d $a2, $sp, 144 # 8-byte Folded Reload + ld.d $a2, $sp, 152 # 8-byte Folded Reload + ld.d $t1, $sp, 176 # 8-byte Folded Reload ld.d $a1, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.ac) ld.d $a0, $a1, 0 bnez $a0, .LBB20_35 .p2align 4, , 16 .LBB20_39: # in Loop: Header=BB20_33 Depth=1 - move $fp, $s0 + move $s5, $s0 fmov.s $fs1, $fs0 .LBB20_40: # %._crit_edge311 # in Loop: Header=BB20_33 Depth=1 - slli.d $a0, $fp, 2 - ldx.w $s1, $s8, $a0 - slt $a0, $s1, $fp - maskeqz $a1, $s1, $a0 - masknez $a0, $fp, $a0 + slli.d $a0, $s5, 2 + ldx.w $s7, $t1, $a0 + slt $a0, $s7, $s5 + maskeqz $a1, $s7, $a0 + masknez $a0, $s5, $a0 or $s0, $a1, $a0 - ld.d $s6, $s7, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.hist) - ld.d $a1, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.nmemar) - slli.d $s7, $a2, 3 - ldx.d $a0, $s4, $s7 - slli.d $s8, $s0, 2 - st.d $a1, $sp, 152 # 8-byte Folded Spill - ldx.w $a1, $a1, $s8 - ldx.w $s5, $s6, $s8 + ld.d $a1, $s8, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.hist) + ld.d $a3, $s1, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.nmemar) + slli.d $s8, $a2, 3 + ldx.d $a0, $s4, $s8 + slli.d $s1, $s0, 2 + st.d $a3, $sp, 160 # 8-byte Folded Spill + ldx.w $a2, $a3, $s1 + move $s6, $a1 + ldx.w $fp, $a1, $s1 ld.d $a0, $a0, 0 - st.d $a1, $sp, 120 # 8-byte Folded Spill - slli.d $a1, $a1, 2 + st.d $a2, $sp, 120 # 8-byte Folded Spill + slli.d $a1, $a2, 2 addi.d $a1, $a1, 4 pcaddu18i $ra, %call36(realloc) jirl $ra, $ra, 0 - st.d $s7, $sp, 112 # 8-byte Folded Spill - ldx.d $a1, $s4, $s7 + st.d $s8, $sp, 112 # 8-byte Folded Spill + ldx.d $a1, $s4, $s8 st.d $a0, $a1, 0 ld.d $a2, $sp, 88 # 8-byte Folded Reload - beq $s5, $a2, .LBB20_47 + beq $fp, $a2, .LBB20_47 # %bb.41: # in Loop: Header=BB20_33 Depth=1 - slli.d $a2, $s5, 3 + slli.d $a2, $fp, 3 ldx.d $a2, $s4, $a2 ld.d $a5, $a2, 0 ld.d $a6, $a2, 8 @@ -5356,6 +5328,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n maskeqz $a6, $a6, $a7 or $a5, $a6, $a5 addi.d $a5, $a5, 4 + move $s4, $s6 .p2align 4, , 16 .LBB20_43: # %.lr.ph317 # Parent Loop BB20_33 Depth=1 @@ -5386,14 +5359,16 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n .p2align 4, , 16 .LBB20_47: # in Loop: Header=BB20_33 Depth=1 st.w $s0, $a0, 0 - ld.d $a2, $sp, 32 # 8-byte Folded Reload + ld.d $a2, $sp, 40 # 8-byte Folded Reload st.w $a2, $a0, 4 + move $s4, $s6 b .LBB20_51 .p2align 4, , 16 .LBB20_48: # in Loop: Header=BB20_33 Depth=1 masknez $a4, $t1, $a7 maskeqz $a5, $t0, $a7 or $a4, $a5, $a4 + move $s4, $s6 bne $a4, $a2, .LBB20_45 .LBB20_49: # in Loop: Header=BB20_33 Depth=1 move $a5, $a0 @@ -5402,32 +5377,32 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n lu32i.d $a2, 0 st.w $a2, $a5, 0 .LBB20_51: # in Loop: Header=BB20_33 Depth=1 - slt $a0, $fp, $s1 - maskeqz $a2, $s1, $a0 - masknez $a0, $fp, $a0 - or $fp, $a2, $a0 - slli.d $s7, $fp, 2 - ld.d $a0, $sp, 152 # 8-byte Folded Reload - ldx.w $s5, $a0, $s7 - move $s4, $s6 - ldx.w $s6, $s6, $s7 + slt $a0, $s5, $s7 + maskeqz $a2, $s7, $a0 + masknez $a0, $s5, $a0 + or $s8, $a2, $a0 + slli.d $s6, $s8, 2 + ld.d $a0, $sp, 160 # 8-byte Folded Reload + ldx.w $s7, $a0, $s6 + ldx.w $fp, $s4, $s6 ld.d $a0, $a1, 8 - ld.d $a1, $sp, 136 # 8-byte Folded Reload - ld.d $a2, $sp, 144 # 8-byte Folded Reload - alsl.d $s1, $a2, $a1, 3 - slli.d $a1, $s5, 2 + ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a2, $sp, 152 # 8-byte Folded Reload + alsl.d $s5, $a2, $a1, 3 + slli.d $a1, $s7, 2 addi.d $a1, $a1, 4 pcaddu18i $ra, %call36(realloc) jirl $ra, $ra, 0 - ld.d $a1, $s1, 0 + ld.d $a1, $s5, 0 st.d $a0, $a1, 8 beqz $a0, .LBB20_95 # %bb.52: # in Loop: Header=BB20_33 Depth=1 - addi.w $s1, $zero, -1 - beq $s6, $s1, .LBB20_59 + addi.w $s5, $zero, -1 + ld.d $t1, $sp, 176 # 8-byte Folded Reload + beq $fp, $s5, .LBB20_59 # %bb.53: # in Loop: Header=BB20_33 Depth=1 - slli.d $a1, $s6, 3 - ld.d $a2, $sp, 136 # 8-byte Folded Reload + slli.d $a1, $fp, 3 + ld.d $a2, $sp, 144 # 8-byte Folded Reload ldx.d $a1, $a2, $a1 ld.d $a3, $a1, 0 ld.d $a4, $a1, 8 @@ -5441,7 +5416,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n masknez $a1, $a4, $a5 maskeqz $t0, $a3, $a5 or $a1, $t0, $a1 - beq $a2, $s1, .LBB20_60 + beq $a2, $s5, .LBB20_60 # %bb.54: # %.lr.ph329.preheader # in Loop: Header=BB20_33 Depth=1 masknez $a3, $a3, $a5 @@ -5456,13 +5431,12 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n ld.w $a2, $a3, 0 addi.d $a0, $a0, 4 addi.d $a3, $a3, 4 - bne $a2, $s1, .LBB20_55 + bne $a2, $s5, .LBB20_55 # %bb.56: # %.preheader.loopexit # in Loop: Header=BB20_33 Depth=1 ld.w $a2, $a1, 0 ld.d $a4, $sp, 128 # 8-byte Folded Reload - ld.d $a5, $sp, 104 # 8-byte Folded Reload - beq $a2, $s1, .LBB20_61 + beq $a2, $s5, .LBB20_61 .LBB20_57: # %.lr.ph334.preheader # in Loop: Header=BB20_33 Depth=1 addi.d $a1, $a1, 4 @@ -5475,16 +5449,15 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n addi.d $a3, $a0, 4 addi.d $a1, $a1, 4 move $a0, $a3 - bne $a2, $s1, .LBB20_58 + bne $a2, $s5, .LBB20_58 b .LBB20_62 .p2align 4, , 16 .LBB20_59: # in Loop: Header=BB20_33 Depth=1 - st.w $fp, $a0, 0 - move $a1, $s1 + st.w $s8, $a0, 0 + move $a1, $s5 lu32i.d $a1, 0 st.w $a1, $a0, 4 ld.d $a4, $sp, 128 # 8-byte Folded Reload - ld.d $a5, $sp, 104 # 8-byte Folded Reload b .LBB20_63 .p2align 4, , 16 .LBB20_60: # in Loop: Header=BB20_33 Depth=1 @@ -5492,76 +5465,75 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n maskeqz $a3, $a6, $a5 or $a2, $a3, $a2 ld.d $a4, $sp, 128 # 8-byte Folded Reload - ld.d $a5, $sp, 104 # 8-byte Folded Reload - bne $a2, $s1, .LBB20_57 + bne $a2, $s5, .LBB20_57 .LBB20_61: # in Loop: Header=BB20_33 Depth=1 move $a3, $a0 .LBB20_62: # %._crit_edge335 # in Loop: Header=BB20_33 Depth=1 - move $a0, $s1 + move $a0, $s5 lu32i.d $a0, 0 st.w $a0, $a3, 0 .LBB20_63: # %.lr.ph341 # in Loop: Header=BB20_33 Depth=1 - ld.d $a0, $a5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.tmptmplen) - fldx.s $fa0, $a0, $s8 + ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $a0, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.tmptmplen) + fldx.s $fa0, $a0, $s1 ld.d $a1, $sp, 96 # 8-byte Folded Reload ld.d $a2, $sp, 112 # 8-byte Folded Reload ldx.d $a1, $a1, $a2 fsub.s $fa0, $fs1, $fa0 fst.s $fa0, $a1, 0 - fldx.s $fa0, $a0, $s7 + fldx.s $fa0, $a0, $s6 alsl.d $a2, $s0, $s4, 2 - ld.d $a3, $sp, 152 # 8-byte Folded Reload + ld.d $a3, $sp, 160 # 8-byte Folded Reload alsl.d $a3, $s0, $a3, 2 fsub.s $fa0, $fs1, $fa0 fst.s $fa0, $a1, 4 - fstx.s $fs1, $a0, $s8 - ld.d $a0, $sp, 144 # 8-byte Folded Reload + fstx.s $fs1, $a0, $s1 + ld.d $a0, $sp, 152 # 8-byte Folded Reload st.w $a0, $a2, 0 ld.d $a0, $sp, 120 # 8-byte Folded Reload - add.d $a0, $s5, $a0 + add.d $a0, $s7, $a0 st.w $a0, $a3, 0 - ld.d $s5, $a4, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.ac) + ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $s6, $a0, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.ac) alsl.d $s7, $s0, $s3, 2 - ld.d $a0, $sp, 160 # 8-byte Folded Reload - stx.w $a0, $s3, $s8 - ld.d $s8, $sp, 176 # 8-byte Folded Reload - alsl.d $a0, $s0, $s8, 2 - st.d $a0, $sp, 152 # 8-byte Folded Spill + stx.w $a4, $s3, $s1 + alsl.d $a0, $s0, $t1, 2 + st.d $a0, $sp, 160 # 8-byte Folded Spill b .LBB20_65 .p2align 4, , 16 .LBB20_64: # in Loop: Header=BB20_65 Depth=2 - ld.d $s5, $s5, 0 - beqz $s5, .LBB20_75 + ld.d $s6, $s6, 0 + beqz $s6, .LBB20_75 .LBB20_65: # Parent Loop BB20_33 Depth=1 # => This Inner Loop Header: Depth=2 - ld.w $s6, $s5, 16 - beq $s6, $s0, .LBB20_64 + ld.w $fp, $s6, 16 + beq $fp, $s0, .LBB20_64 # %bb.66: # in Loop: Header=BB20_65 Depth=2 - beq $s6, $fp, .LBB20_64 + beq $fp, $s8, .LBB20_64 # %bb.67: # in Loop: Header=BB20_65 Depth=2 - move $a1, $s6 - move $a2, $s6 - move $a0, $fp + move $a1, $fp + move $a2, $fp + move $a0, $s8 move $a3, $s0 - move $a4, $s6 - blt $s6, $s0, .LBB20_69 + move $a4, $fp + blt $fp, $s0, .LBB20_69 # %bb.68: # in Loop: Header=BB20_65 Depth=2 - slt $a0, $fp, $s6 - masknez $a1, $fp, $a0 - maskeqz $a0, $s6, $a0 + slt $a0, $s8, $fp + masknez $a1, $s8, $a0 + maskeqz $a0, $fp, $a0 or $a0, $a0, $a1 - slt $a1, $s6, $fp - masknez $a2, $fp, $a1 - maskeqz $a1, $s6, $a1 + slt $a1, $fp, $s8 + masknez $a2, $s8, $a1 + maskeqz $a1, $fp, $a1 or $a1, $a1, $a2 move $a2, $s0 - move $a3, $s6 + move $a3, $fp move $a4, $s0 .LBB20_69: # in Loop: Header=BB20_65 Depth=2 - slli.d $s8, $a2, 3 - ldx.d $a2, $s2, $s8 + slli.d $s1, $a2, 3 + ldx.d $a2, $s2, $s1 sub.w $a3, $a3, $a4 slli.d $s4, $a3, 2 slli.d $a3, $a1, 3 @@ -5572,46 +5544,46 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n fldx.s $fa1, $a3, $a0 ld.d $a0, $sp, 168 # 8-byte Folded Reload jirl $ra, $a0, 0 - ldx.d $a0, $s2, $s8 + ldx.d $a0, $s2, $s1 fstx.s $fa0, $a0, $s4 - slli.d $a0, $s6, 2 + slli.d $a0, $fp, 2 fldx.s $fa1, $s3, $a0 fcmp.cule.s $fcc0, $fa1, $fa0 - ld.d $s8, $sp, 176 # 8-byte Folded Reload + ld.d $a2, $sp, 176 # 8-byte Folded Reload bceqz $fcc0, .LBB20_72 # %bb.70: # in Loop: Header=BB20_65 Depth=2 fld.s $fa1, $s7, 0 fcmp.cule.s $fcc0, $fa1, $fa0 bceqz $fcc0, .LBB20_73 .LBB20_71: # in Loop: Header=BB20_65 Depth=2 - ldx.w $a0, $s8, $a0 - bne $a0, $fp, .LBB20_64 + ldx.w $a0, $a2, $a0 + bne $a0, $s8, .LBB20_64 b .LBB20_74 .p2align 4, , 16 .LBB20_72: # in Loop: Header=BB20_65 Depth=2 - alsl.d $a1, $s6, $s3, 2 + alsl.d $a1, $fp, $s3, 2 fst.s $fa0, $a1, 0 - stx.w $s0, $s8, $a0 + stx.w $s0, $a2, $a0 fld.s $fa1, $s7, 0 fcmp.cule.s $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB20_71 .LBB20_73: # in Loop: Header=BB20_65 Depth=2 fst.s $fa0, $s7, 0 - ld.d $a1, $sp, 152 # 8-byte Folded Reload - st.w $s6, $a1, 0 - ldx.w $a0, $s8, $a0 - bne $a0, $fp, .LBB20_64 + ld.d $a1, $sp, 160 # 8-byte Folded Reload + st.w $fp, $a1, 0 + ldx.w $a0, $a2, $a0 + bne $a0, $s8, .LBB20_64 .LBB20_74: # in Loop: Header=BB20_65 Depth=2 - alsl.d $a0, $s6, $s8, 2 + alsl.d $a0, $fp, $a2, 2 st.w $s0, $a0, 0 b .LBB20_64 .p2align 4, , 16 .LBB20_75: # %._crit_edge342.loopexit # in Loop: Header=BB20_33 Depth=1 - ld.d $s6, $sp, 128 # 8-byte Folded Reload + ld.d $s6, $sp, 136 # 8-byte Folded Reload ld.d $a1, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.ac) - slli.d $a0, $fp, 4 - alsl.d $a2, $fp, $a0, 3 + slli.d $a0, $s8, 4 + alsl.d $a2, $s8, $a0, 3 add.d $a0, $a1, $a2 ld.d $a0, $a0, 8 ldx.d $a1, $a1, $a2 @@ -5620,16 +5592,17 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n # %bb.76: # in Loop: Header=BB20_33 Depth=1 st.d $a0, $a1, 8 .LBB20_77: # in Loop: Header=BB20_33 Depth=1 - slli.d $fp, $fp, 3 + slli.d $fp, $s8, 3 ldx.d $a0, $s2, $fp pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 ld.d $a0, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.ac) stx.d $zero, $s2, $fp - ld.d $s4, $sp, 136 # 8-byte Folded Reload - ld.d $s5, $sp, 56 # 8-byte Folded Reload - ld.d $s7, $sp, 48 # 8-byte Folded Reload - ld.d $fp, $sp, 40 # 8-byte Folded Reload + ld.d $s4, $sp, 144 # 8-byte Folded Reload + ld.d $s1, $sp, 56 # 8-byte Folded Reload + ld.d $s8, $sp, 48 # 8-byte Folded Reload + ld.d $t1, $sp, 176 # 8-byte Folded Reload + ld.d $fp, $sp, 128 # 8-byte Folded Reload beqz $a0, .LBB20_32 # %bb.78: # %.lr.ph347.preheader # in Loop: Header=BB20_33 Depth=1 @@ -5647,17 +5620,16 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n # Child Loop BB20_89 Depth 3 ld.w $a2, $a1, 16 slli.d $a6, $a2, 2 - ldx.w $a3, $s8, $a6 + ldx.w $a3, $t1, $a6 bne $a3, $s0, .LBB20_79 # %bb.81: # in Loop: Header=BB20_80 Depth=2 - alsl.d $a3, $a2, $s8, 2 + alsl.d $a3, $a2, $t1, 2 slli.d $a4, $a2, 4 alsl.d $a4, $a2, $a4, 3 ldx.d $a5, $a0, $a4 alsl.d $a4, $a2, $s3, 2 - ld.d $a7, $sp, 160 # 8-byte Folded Reload - stx.w $a7, $s3, $a6 - move $a6, $s1 + stx.w $fp, $s3, $a6 + move $a6, $s5 lu32i.d $a6, 0 st.w $a6, $a3, 0 beqz $a5, .LBB20_86 @@ -5665,7 +5637,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n # in Loop: Header=BB20_80 Depth=2 slli.d $a6, $a2, 3 ldx.d $a6, $s2, $a6 - fld.s $fa0, $fp, %pc_lo12(.LCPI20_0) + movgr2fr.w $fa0, $fp b .LBB20_84 .p2align 4, , 16 .LBB20_83: # in Loop: Header=BB20_84 Depth=3 @@ -5686,7 +5658,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n fmov.s $fa0, $fa1 b .LBB20_83 .LBB20_86: # in Loop: Header=BB20_80 Depth=2 - fld.s $fa0, $fp, %pc_lo12(.LCPI20_0) + movgr2fr.w $fa0, $fp .LBB20_87: # %.lr.ph42.i285.preheader # in Loop: Header=BB20_80 Depth=2 move $a5, $a0 @@ -5721,12 +5693,13 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 ld.w $a1, $fp, 0 - st.d $a0, $s7, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.hist) + st.d $a0, $s8, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.hist) move $a0, $a1 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 ld.w $s3, $fp, 0 - st.d $a0, $s1, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.tmptmplen) + ld.d $a1, $sp, 104 # 8-byte Folded Reload + st.d $a0, $a1, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.tmptmplen) slli.d $a0, $s3, 4 alsl.d $a0, $s3, $a0, 3 pcaddu18i $ra, %call36(malloc) @@ -5736,7 +5709,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 ld.w $a1, $fp, 0 - st.d $a0, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.nmemar) + st.d $a0, $s1, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.nmemar) move $a0, $a1 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 @@ -5745,13 +5718,13 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n move $a0, $a1 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 - move $s8, $a0 - ld.d $a0, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.ac) + ld.d $a6, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.ac) + st.d $a0, $sp, 176 # 8-byte Folded Spill bgtz $s0, .LBB20_8 .LBB20_93: # %.preheader292.thread slli.d $a1, $s0, 4 alsl.d $a1, $s0, $a1, 3 - add.d $a0, $a0, $a1 + add.d $a0, $a6, $a1 st.d $zero, $a0, -24 pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) @@ -5759,28 +5732,30 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n ori $a0, $zero, 10 pcaddu18i $ra, %call36(fputc) jirl $ra, $ra, 0 + ld.d $t1, $sp, 176 # 8-byte Folded Reload .LBB20_94: # %._crit_edge353 ld.d $fp, $sp, 104 # 8-byte Folded Reload ld.d $a0, $fp, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.tmptmplen) + move $s0, $t1 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $s7, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.hist) + ld.d $a0, $s8, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.hist) st.d $zero, $fp, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.tmptmplen) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 ld.d $a0, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.ac) - st.d $zero, $s7, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.hist) + st.d $zero, $s8, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.hist) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.nmemar) + ld.d $a0, $s1, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.nmemar) st.d $zero, $s6, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.ac) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - st.d $zero, $s5, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.nmemar) + st.d $zero, $s1, %pc_lo12(fixed_musclesupg_float_realloc_nobk_halfmtx.nmemar) move $a0, $s3 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - move $a0, $s8 + move $a0, $s0 fld.d $fs1, $sp, 184 # 8-byte Folded Reload fld.d $fs0, $sp, 192 # 8-byte Folded Reload ld.d $s8, $sp, 200 # 8-byte Folded Reload @@ -5798,7 +5773,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n pcaddu18i $t8, %call36(free) jr $t8 .LBB20_95: - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.d $a3, $a0, 0 pcalau12i $a0, %pc_hi20(.L.str.15) addi.d $a0, $a0, %pc_lo12(.L.str.15) @@ -5823,14 +5798,7 @@ fixed_musclesupg_float_realloc_nobk_halfmtx: # @fixed_musclesupg_float_realloc_n .Lfunc_end20: .size fixed_musclesupg_float_realloc_nobk_halfmtx, .Lfunc_end20-fixed_musclesupg_float_realloc_nobk_halfmtx # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function veryfastsupg_double_loadtop -.LCPI21_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI21_1: - .dword 0x3feccccccccccccd # double 0.90000000000000002 - .text - .globl veryfastsupg_double_loadtop + .globl veryfastsupg_double_loadtop # -- Begin function veryfastsupg_double_loadtop .p2align 5 .type veryfastsupg_double_loadtop,@function veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop @@ -5848,7 +5816,8 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop st.d $s7, $sp, 144 # 8-byte Folded Spill st.d $s8, $sp, 136 # 8-byte Folded Spill fst.d $fs0, $sp, 128 # 8-byte Folded Spill - st.d $a3, $sp, 72 # 8-byte Folded Spill + fst.d $fs1, $sp, 120 # 8-byte Folded Spill + st.d $a3, $sp, 64 # 8-byte Folded Spill move $s2, $a2 move $s3, $a1 move $s7, $a0 @@ -5858,18 +5827,18 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop addi.d $a1, $a1, %pc_lo12(.L.str.9) pcaddu18i $ra, %call36(fopen) jirl $ra, $ra, 0 - st.d $a0, $sp, 80 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill beqz $a0, .LBB21_60 # %bb.1: pcalau12i $a0, %pc_hi20(veryfastsupg_double_loadtop.hist) - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill ld.d $a0, $a0, %pc_lo12(veryfastsupg_double_loadtop.hist) pcalau12i $s1, %pc_hi20(veryfastsupg_double_loadtop.treetmp) pcalau12i $s0, %pc_hi20(veryfastsupg_double_loadtop.tree) pcalau12i $a1, %pc_hi20(veryfastsupg_double_loadtop.tmptmplen) - st.d $a1, $sp, 88 # 8-byte Folded Spill + st.d $a1, $sp, 80 # 8-byte Folded Spill pcalau12i $a1, %pc_hi20(veryfastsupg_double_loadtop.ac) - st.d $a1, $sp, 104 # 8-byte Folded Spill + st.d $a1, $sp, 96 # 8-byte Folded Spill beqz $a0, .LBB21_7 # %bb.2: blez $s7, .LBB21_8 @@ -5892,7 +5861,7 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop addi.w $s5, $s5, 1 bne $s4, $fp, .LBB21_4 # %bb.5: # %.lr.ph206 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(veryfastsupg_double_loadtop.ac) ori $a0, $zero, 1 bne $s7, $a0, .LBB21_9 @@ -5919,22 +5888,22 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 ld.w $a1, $fp, 0 - ld.d $a2, $sp, 96 # 8-byte Folded Reload + ld.d $a2, $sp, 88 # 8-byte Folded Reload st.d $a0, $a2, %pc_lo12(veryfastsupg_double_loadtop.hist) slli.d $s4, $a1, 3 move $a0, $s4 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a1, $sp, 80 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(veryfastsupg_double_loadtop.tmptmplen) move $a0, $s4 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(veryfastsupg_double_loadtop.ac) bgtz $s7, .LBB21_3 .LBB21_8: # %._crit_edge212.thread - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(veryfastsupg_double_loadtop.ac) alsl.d $a0, $s7, $a0, 3 addi.w $a1, $zero, -1 @@ -5987,9 +5956,9 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop .LBB21_14: # %._crit_edge212 addi.w $a2, $s7, -1 addi.w $fp, $zero, -1 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(veryfastsupg_double_loadtop.tmptmplen) - st.d $a2, $sp, 64 # 8-byte Folded Spill + st.d $a2, $sp, 56 # 8-byte Folded Spill slli.d $a2, $a2, 3 move $s5, $fp lu32i.d $s5, 0 @@ -5998,7 +5967,7 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop move $a2, $s4 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(veryfastsupg_double_loadtop.hist) slli.d $a2, $s7, 2 ori $a1, $zero, 255 @@ -6006,16 +5975,16 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop jirl $ra, $ra, 0 pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) - st.d $a0, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 24 # 8-byte Folded Spill ld.d $a1, $a0, 0 ori $a0, $zero, 10 pcaddu18i $ra, %call36(fputc) jirl $ra, $ra, 0 ori $a0, $zero, 1 - st.d $s7, $sp, 24 # 8-byte Folded Spill + st.d $s7, $sp, 16 # 8-byte Folded Spill bne $s7, $a0, .LBB21_16 .LBB21_15: # %._crit_edge241 - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload pcaddu18i $ra, %call36(fclose) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.17) @@ -6040,16 +6009,16 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop ori $a0, $zero, 10 pcaddu18i $ra, %call36(fputc) jirl $ra, $ra, 0 - ld.d $fp, $sp, 88 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload ld.d $a0, $fp, %pc_lo12(veryfastsupg_double_loadtop.tmptmplen) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 88 # 8-byte Folded Reload ld.d $a0, $s2, %pc_lo12(veryfastsupg_double_loadtop.hist) st.d $zero, $fp, %pc_lo12(veryfastsupg_double_loadtop.tmptmplen) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $fp, $sp, 104 # 8-byte Folded Reload + ld.d $fp, $sp, 96 # 8-byte Folded Reload ld.d $a0, $fp, %pc_lo12(veryfastsupg_double_loadtop.ac) st.d $zero, $s2, %pc_lo12(veryfastsupg_double_loadtop.hist) pcaddu18i $ra, %call36(free) @@ -6061,6 +6030,7 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop ld.d $a0, $s1, %pc_lo12(veryfastsupg_double_loadtop.treetmp) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 + fld.d $fs1, $sp, 120 # 8-byte Folded Reload fld.d $fs0, $sp, 128 # 8-byte Folded Reload ld.d $s8, $sp, 136 # 8-byte Folded Reload ld.d $s7, $sp, 144 # 8-byte Folded Reload @@ -6077,23 +6047,28 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop ret .LBB21_16: # %.lr.ph240 lu12i.w $a0, -209716 - ori $a0, $a0, 3277 - st.d $a0, $sp, 56 # 8-byte Folded Spill + ori $s4, $a0, 3277 lu12i.w $a0, 104857 ori $a0, $a0, 2457 st.d $a0, $sp, 48 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.24) addi.d $a0, $a0, %pc_lo12(.L.str.24) - st.d $a0, $sp, 16 # 8-byte Folded Spill - lu12i.w $s4, -264192 - bstrins.d $s4, $s4, 62, 32 + st.d $a0, $sp, 8 # 8-byte Folded Spill + lu12i.w $a0, -264192 + bstrins.d $a0, $a0, 62, 32 + st.d $a0, $sp, 40 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(loadtreeoneline.gett) addi.d $s7, $a0, %pc_lo12(loadtreeoneline.gett) pcalau12i $a0, %pc_hi20(.L.str.38) addi.d $a0, $a0, %pc_lo12(.L.str.38) - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill move $s6, $zero movgr2fr.d $fs0, $zero + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fs1, $a0 b .LBB21_18 .p2align 4, , 16 .LBB21_17: # in Loop: Header=BB21_18 Depth=1 @@ -6113,7 +6088,7 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop pcaddu18i $ra, %call36(strcpy) jirl $ra, $ra, 0 addi.d $s6, $s6, 1 - ld.d $a0, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $sp, 56 # 8-byte Folded Reload beq $s6, $a0, .LBB21_15 .LBB21_18: # =>This Loop Header: Depth=1 # Child Loop BB21_25 Depth 2 @@ -6121,42 +6096,42 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop # Child Loop BB21_36 Depth 2 # Child Loop BB21_39 Depth 2 # Child Loop BB21_51 Depth 2 - ld.d $a0, $sp, 56 # 8-byte Folded Reload - mul.d $a0, $s6, $a0 + mul.d $a0, $s6, $s4 rotri.w $a0, $a0, 1 ld.d $a1, $sp, 48 # 8-byte Folded Reload bgeu $a1, $a0, .LBB21_57 .LBB21_19: # in Loop: Header=BB21_18 Depth=1 - st.d $s4, $sp, 112 + ld.d $a0, $sp, 40 # 8-byte Folded Reload + st.d $a0, $sp, 104 ori $a1, $zero, 999 move $a0, $s7 - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload pcaddu18i $ra, %call36(fgets) jirl $ra, $ra, 0 - addi.d $a2, $sp, 120 - addi.d $a4, $sp, 112 + addi.d $a2, $sp, 112 + addi.d $a4, $sp, 104 move $a0, $s7 - ld.d $a1, $sp, 40 # 8-byte Folded Reload - addi.d $a3, $sp, 124 - addi.d $a5, $sp, 116 + ld.d $a1, $sp, 32 # 8-byte Folded Reload + addi.d $a3, $sp, 116 + addi.d $a5, $sp, 108 pcaddu18i $ra, %call36(__isoc99_sscanf) jirl $ra, $ra, 0 - ld.w $a2, $sp, 120 - ld.w $a3, $sp, 124 + ld.w $a2, $sp, 112 + ld.w $a3, $sp, 116 addi.w $a0, $a2, -1 - st.w $a0, $sp, 120 + st.w $a0, $sp, 112 addi.w $a1, $a3, -1 - st.w $a1, $sp, 124 + st.w $a1, $sp, 116 bge $a2, $a3, .LBB21_59 # %bb.20: # %loadtreeoneline.exit # in Loop: Header=BB21_18 Depth=1 - fld.s $fa0, $sp, 112 + fld.s $fa0, $sp, 104 vldi $vr1, -1040 fcmp.cune.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB21_58 # %bb.21: # %loadtreeoneline.exit # in Loop: Header=BB21_18 Depth=1 - fld.s $fa0, $sp, 116 + fld.s $fa0, $sp, 108 fcmp.ceq.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB21_58 # %bb.22: # in Loop: Header=BB21_18 Depth=1 @@ -6164,14 +6139,14 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop ldx.d $a6, $s3, $s8 slli.d $a3, $a1, 3 slli.d $a4, $s6, 3 - ld.d $a2, $sp, 96 # 8-byte Folded Reload + ld.d $a2, $sp, 88 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(veryfastsupg_double_loadtop.hist) ldx.d $a5, $s2, $a4 fldx.d $fa0, $a6, $a3 slli.d $a6, $a0, 2 ldx.w $a7, $a2, $a6 ld.d $a6, $a5, 0 - vldi $vr5, -928 + vldi $vr3, -928 beq $a7, $fp, .LBB21_29 # %bb.23: # in Loop: Header=BB21_18 Depth=1 slli.d $a7, $a7, 3 @@ -6310,12 +6285,12 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop # in Loop: Header=BB21_18 Depth=1 st.w $s5, $t0, 0 .LBB21_43: # in Loop: Header=BB21_18 Depth=1 - ld.d $a5, $sp, 88 # 8-byte Folded Reload + ld.d $a5, $sp, 80 # 8-byte Folded Reload ld.d $a5, $a5, %pc_lo12(veryfastsupg_double_loadtop.tmptmplen) fldx.d $fa1, $a5, $s8 - ld.d $a6, $sp, 72 # 8-byte Folded Reload + ld.d $a6, $sp, 64 # 8-byte Folded Reload ldx.d $a6, $a6, $a4 - fmul.d $fa0, $fa0, $fa5 + fmul.d $fa0, $fa0, $fa3 fsub.d $fa2, $fa0, $fa1 fst.d $fa2, $a6, 0 fldx.d $fa1, $a5, $a3 @@ -6333,7 +6308,7 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop .LBB21_47: # in Loop: Header=BB21_18 Depth=1 move $a4, $zero alsl.d $a7, $a0, $a2, 2 - ld.d $a2, $sp, 104 # 8-byte Folded Reload + ld.d $a2, $sp, 96 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(veryfastsupg_double_loadtop.ac) alsl.d $a5, $a0, $a5, 3 fst.d $fa0, $a5, 0 @@ -6362,14 +6337,14 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop fldx.d $fa1, $a5, $a7 fcmp.clt.d $fcc0, $fa0, $fa1 fsel $fa2, $fa1, $fa0, $fcc0 - pcalau12i $a5, %pc_hi20(.LCPI21_0) - fld.d $fa3, $a5, %pc_lo12(.LCPI21_0) - pcalau12i $a5, %pc_hi20(.LCPI21_1) - fld.d $fa4, $a5, %pc_lo12(.LCPI21_1) fadd.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa5 fmul.d $fa0, $fa0, $fa3 - fmadd.d $fa0, $fa2, $fa4, $fa0 + fmul.d $fa0, $fa0, $fs1 + move $a5, $s4 + lu32i.d $a5, -209716 + lu52i.d $a5, $a5, 1022 + movgr2fr.d $fa1, $a5 + fmadd.d $fa0, $fa2, $fa1, $fa0 fstx.d $fa0, $t1, $t0 .LBB21_50: # in Loop: Header=BB21_51 Depth=2 slli.d $a4, $a4, 3 @@ -6402,16 +6377,16 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop st.w $a0, $a1, 4 b .LBB21_17 .LBB21_57: # in Loop: Header=BB21_18 Depth=1 - ld.d $a0, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload ld.d $a0, $a0, 0 - ld.d $a1, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 8 # 8-byte Folded Reload move $a2, $s6 - ld.d $a3, $sp, 24 # 8-byte Folded Reload + ld.d $a3, $sp, 16 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 b .LBB21_19 .LBB21_58: - ld.d $a0, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload ld.d $a3, $a0, 0 pcalau12i $a0, %pc_hi20(.L.str.25) addi.d $a0, $a0, %pc_lo12(.L.str.25) @@ -6423,7 +6398,7 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop pcaddu18i $ra, %call36(exit) jirl $ra, $ra, 0 .LBB21_59: - ld.d $a0, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload ld.d $a3, $a0, 0 pcalau12i $a0, %pc_hi20(.L.str.39) addi.d $a0, $a0, %pc_lo12(.L.str.39) @@ -6450,14 +6425,7 @@ veryfastsupg_double_loadtop: # @veryfastsupg_double_loadtop .Lfunc_end21: .size veryfastsupg_double_loadtop, .Lfunc_end21-veryfastsupg_double_loadtop # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function veryfastsupg_double_loadtree -.LCPI22_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI22_1: - .dword 0x3feccccccccccccd # double 0.90000000000000002 - .text - .globl veryfastsupg_double_loadtree + .globl veryfastsupg_double_loadtree # -- Begin function veryfastsupg_double_loadtree .p2align 5 .type veryfastsupg_double_loadtree,@function veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree @@ -6475,7 +6443,8 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree st.d $s7, $sp, 144 # 8-byte Folded Spill st.d $s8, $sp, 136 # 8-byte Folded Spill fst.d $fs0, $sp, 128 # 8-byte Folded Spill - st.d $a3, $sp, 72 # 8-byte Folded Spill + fst.d $fs1, $sp, 120 # 8-byte Folded Spill + st.d $a3, $sp, 64 # 8-byte Folded Spill move $s2, $a2 move $s3, $a1 move $s7, $a0 @@ -6485,18 +6454,18 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree addi.d $a1, $a1, %pc_lo12(.L.str.9) pcaddu18i $ra, %call36(fopen) jirl $ra, $ra, 0 - st.d $a0, $sp, 80 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill beqz $a0, .LBB22_60 # %bb.1: pcalau12i $a0, %pc_hi20(veryfastsupg_double_loadtree.hist) - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill ld.d $a0, $a0, %pc_lo12(veryfastsupg_double_loadtree.hist) pcalau12i $s1, %pc_hi20(veryfastsupg_double_loadtree.treetmp) pcalau12i $s0, %pc_hi20(veryfastsupg_double_loadtree.tree) pcalau12i $a1, %pc_hi20(veryfastsupg_double_loadtree.tmptmplen) - st.d $a1, $sp, 88 # 8-byte Folded Spill + st.d $a1, $sp, 80 # 8-byte Folded Spill pcalau12i $a1, %pc_hi20(veryfastsupg_double_loadtree.ac) - st.d $a1, $sp, 104 # 8-byte Folded Spill + st.d $a1, $sp, 96 # 8-byte Folded Spill beqz $a0, .LBB22_7 # %bb.2: blez $s7, .LBB22_8 @@ -6519,7 +6488,7 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree addi.w $s5, $s5, 1 bne $s4, $fp, .LBB22_4 # %bb.5: # %.lr.ph195 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(veryfastsupg_double_loadtree.ac) ori $a0, $zero, 1 bne $s7, $a0, .LBB22_9 @@ -6546,22 +6515,22 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 ld.w $a1, $fp, 0 - ld.d $a2, $sp, 96 # 8-byte Folded Reload + ld.d $a2, $sp, 88 # 8-byte Folded Reload st.d $a0, $a2, %pc_lo12(veryfastsupg_double_loadtree.hist) slli.d $s4, $a1, 3 move $a0, $s4 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a1, $sp, 80 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(veryfastsupg_double_loadtree.tmptmplen) move $a0, $s4 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(veryfastsupg_double_loadtree.ac) bgtz $s7, .LBB22_3 .LBB22_8: # %._crit_edge201.thread - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(veryfastsupg_double_loadtree.ac) alsl.d $a0, $s7, $a0, 3 addi.w $a1, $zero, -1 @@ -6614,9 +6583,9 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree .LBB22_14: # %._crit_edge201 addi.w $a2, $s7, -1 addi.w $fp, $zero, -1 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(veryfastsupg_double_loadtree.tmptmplen) - st.d $a2, $sp, 64 # 8-byte Folded Spill + st.d $a2, $sp, 56 # 8-byte Folded Spill slli.d $a2, $a2, 3 move $s5, $fp lu32i.d $s5, 0 @@ -6625,7 +6594,7 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree move $a2, $s4 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(veryfastsupg_double_loadtree.hist) slli.d $a2, $s7, 2 ori $a1, $zero, 255 @@ -6633,16 +6602,16 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree jirl $ra, $ra, 0 pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) - st.d $a0, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 24 # 8-byte Folded Spill ld.d $a1, $a0, 0 ori $a0, $zero, 10 pcaddu18i $ra, %call36(fputc) jirl $ra, $ra, 0 ori $a0, $zero, 1 - st.d $s7, $sp, 24 # 8-byte Folded Spill + st.d $s7, $sp, 16 # 8-byte Folded Spill bne $s7, $a0, .LBB22_16 .LBB22_15: # %._crit_edge230 - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload pcaddu18i $ra, %call36(fclose) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.17) @@ -6667,16 +6636,16 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree ori $a0, $zero, 10 pcaddu18i $ra, %call36(fputc) jirl $ra, $ra, 0 - ld.d $fp, $sp, 88 # 8-byte Folded Reload + ld.d $fp, $sp, 80 # 8-byte Folded Reload ld.d $a0, $fp, %pc_lo12(veryfastsupg_double_loadtree.tmptmplen) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $s2, $sp, 96 # 8-byte Folded Reload + ld.d $s2, $sp, 88 # 8-byte Folded Reload ld.d $a0, $s2, %pc_lo12(veryfastsupg_double_loadtree.hist) st.d $zero, $fp, %pc_lo12(veryfastsupg_double_loadtree.tmptmplen) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $fp, $sp, 104 # 8-byte Folded Reload + ld.d $fp, $sp, 96 # 8-byte Folded Reload ld.d $a0, $fp, %pc_lo12(veryfastsupg_double_loadtree.ac) st.d $zero, $s2, %pc_lo12(veryfastsupg_double_loadtree.hist) pcaddu18i $ra, %call36(free) @@ -6688,6 +6657,7 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree ld.d $a0, $s1, %pc_lo12(veryfastsupg_double_loadtree.treetmp) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 + fld.d $fs1, $sp, 120 # 8-byte Folded Reload fld.d $fs0, $sp, 128 # 8-byte Folded Reload ld.d $s8, $sp, 136 # 8-byte Folded Reload ld.d $s7, $sp, 144 # 8-byte Folded Reload @@ -6704,23 +6674,28 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree ret .LBB22_16: # %.lr.ph229 lu12i.w $a0, -209716 - ori $a0, $a0, 3277 - st.d $a0, $sp, 56 # 8-byte Folded Spill + ori $s4, $a0, 3277 lu12i.w $a0, 104857 ori $a0, $a0, 2457 st.d $a0, $sp, 48 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.24) addi.d $a0, $a0, %pc_lo12(.L.str.24) - st.d $a0, $sp, 16 # 8-byte Folded Spill - lu12i.w $s4, -264192 - bstrins.d $s4, $s4, 62, 32 + st.d $a0, $sp, 8 # 8-byte Folded Spill + lu12i.w $a0, -264192 + bstrins.d $a0, $a0, 62, 32 + st.d $a0, $sp, 40 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(loadtreeoneline.gett) addi.d $s7, $a0, %pc_lo12(loadtreeoneline.gett) pcalau12i $a0, %pc_hi20(.L.str.38) addi.d $a0, $a0, %pc_lo12(.L.str.38) - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill move $s6, $zero movgr2fr.w $fs0, $zero + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fs1, $a0 b .LBB22_18 .p2align 4, , 16 .LBB22_17: # in Loop: Header=BB22_18 Depth=1 @@ -6740,7 +6715,7 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree pcaddu18i $ra, %call36(strcpy) jirl $ra, $ra, 0 addi.d $s6, $s6, 1 - ld.d $a0, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $sp, 56 # 8-byte Folded Reload beq $s6, $a0, .LBB22_15 .LBB22_18: # =>This Loop Header: Depth=1 # Child Loop BB22_29 Depth 2 @@ -6748,42 +6723,42 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree # Child Loop BB22_40 Depth 2 # Child Loop BB22_43 Depth 2 # Child Loop BB22_51 Depth 2 - ld.d $a0, $sp, 56 # 8-byte Folded Reload - mul.d $a0, $s6, $a0 + mul.d $a0, $s6, $s4 rotri.w $a0, $a0, 1 ld.d $a1, $sp, 48 # 8-byte Folded Reload bgeu $a1, $a0, .LBB22_57 .LBB22_19: # in Loop: Header=BB22_18 Depth=1 - st.d $s4, $sp, 112 + ld.d $a0, $sp, 40 # 8-byte Folded Reload + st.d $a0, $sp, 104 ori $a1, $zero, 999 move $a0, $s7 - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload pcaddu18i $ra, %call36(fgets) jirl $ra, $ra, 0 - addi.d $a2, $sp, 120 - addi.d $a4, $sp, 112 + addi.d $a2, $sp, 112 + addi.d $a4, $sp, 104 move $a0, $s7 - ld.d $a1, $sp, 40 # 8-byte Folded Reload - addi.d $a3, $sp, 124 - addi.d $a5, $sp, 116 + ld.d $a1, $sp, 32 # 8-byte Folded Reload + addi.d $a3, $sp, 116 + addi.d $a5, $sp, 108 pcaddu18i $ra, %call36(__isoc99_sscanf) jirl $ra, $ra, 0 - ld.w $a2, $sp, 120 - ld.w $a3, $sp, 124 + ld.w $a2, $sp, 112 + ld.w $a3, $sp, 116 addi.w $a0, $a2, -1 - st.w $a0, $sp, 120 + st.w $a0, $sp, 112 addi.w $a1, $a3, -1 - st.w $a1, $sp, 124 + st.w $a1, $sp, 116 bge $a2, $a3, .LBB22_59 # %bb.20: # %loadtreeoneline.exit # in Loop: Header=BB22_18 Depth=1 - fld.s $fa0, $sp, 112 + fld.s $fa0, $sp, 104 vldi $vr1, -1040 fcmp.ceq.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB22_58 # %bb.21: # %loadtreeoneline.exit # in Loop: Header=BB22_18 Depth=1 - fld.s $fa2, $sp, 116 + fld.s $fa2, $sp, 108 fcmp.cune.s $fcc0, $fa2, $fa1 bceqz $fcc0, .LBB22_58 # %bb.22: # in Loop: Header=BB22_18 Depth=1 @@ -6792,19 +6767,19 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree slli.d $a3, $a1, 3 fldx.d $fa1, $a2, $a3 fcmp.cule.s $fcc0, $fs0, $fa0 - vldi $vr5, -928 + vldi $vr3, -928 bcnez $fcc0, .LBB22_24 # %bb.23: # in Loop: Header=BB22_18 Depth=1 - st.w $zero, $sp, 112 + st.w $zero, $sp, 104 fmov.s $fa0, $fs0 .LBB22_24: # in Loop: Header=BB22_18 Depth=1 fcmp.cule.s $fcc0, $fs0, $fa2 bcnez $fcc0, .LBB22_26 # %bb.25: # in Loop: Header=BB22_18 Depth=1 - st.w $zero, $sp, 116 + st.w $zero, $sp, 108 .LBB22_26: # in Loop: Header=BB22_18 Depth=1 slli.d $a2, $s6, 3 - ld.d $a4, $sp, 96 # 8-byte Folded Reload + ld.d $a4, $sp, 88 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(veryfastsupg_double_loadtree.hist) ldx.d $a5, $s2, $a2 slli.d $a6, $a0, 2 @@ -6950,16 +6925,16 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree .LBB22_47: # in Loop: Header=BB22_18 Depth=1 move $a5, $zero alsl.d $a4, $a0, $a4, 2 - ld.d $a6, $sp, 72 # 8-byte Folded Reload + ld.d $a6, $sp, 64 # 8-byte Folded Reload ldx.d $a6, $a6, $a2 - fmul.d $fa1, $fa1, $fa5 - fld.s $fa2, $sp, 116 + fmul.d $fa1, $fa1, $fa3 + fld.s $fa2, $sp, 108 fcvt.d.s $fa0, $fa0 fst.d $fa0, $a6, 0 - ld.d $a2, $sp, 88 # 8-byte Folded Reload + ld.d $a2, $sp, 80 # 8-byte Folded Reload ld.d $a7, $a2, %pc_lo12(veryfastsupg_double_loadtree.tmptmplen) fcvt.d.s $fa0, $fa2 - ld.d $a2, $sp, 104 # 8-byte Folded Reload + ld.d $a2, $sp, 96 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(veryfastsupg_double_loadtree.ac) fst.d $fa0, $a6, 8 fstx.d $fa1, $a7, $s8 @@ -6988,14 +6963,14 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree fldx.d $fa1, $a4, $a7 fcmp.clt.d $fcc0, $fa0, $fa1 fsel $fa2, $fa1, $fa0, $fcc0 - pcalau12i $a4, %pc_hi20(.LCPI22_0) - fld.d $fa3, $a4, %pc_lo12(.LCPI22_0) - pcalau12i $a4, %pc_hi20(.LCPI22_1) - fld.d $fa4, $a4, %pc_lo12(.LCPI22_1) fadd.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa5 fmul.d $fa0, $fa0, $fa3 - fmadd.d $fa0, $fa2, $fa4, $fa0 + fmul.d $fa0, $fa0, $fs1 + move $a4, $s4 + lu32i.d $a4, -209716 + lu52i.d $a4, $a4, 1022 + movgr2fr.d $fa1, $a4 + fmadd.d $fa0, $fa2, $fa1, $fa0 fstx.d $fa0, $t1, $t0 .LBB22_50: # in Loop: Header=BB22_51 Depth=2 slli.d $a4, $a5, 3 @@ -7028,16 +7003,16 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree st.w $a0, $a1, 4 b .LBB22_17 .LBB22_57: # in Loop: Header=BB22_18 Depth=1 - ld.d $a0, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload ld.d $a0, $a0, 0 - ld.d $a1, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 8 # 8-byte Folded Reload move $a2, $s6 - ld.d $a3, $sp, 24 # 8-byte Folded Reload + ld.d $a3, $sp, 16 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 b .LBB22_19 .LBB22_58: - ld.d $a0, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload ld.d $a3, $a0, 0 pcalau12i $a0, %pc_hi20(.L.str.26) addi.d $a0, $a0, %pc_lo12(.L.str.26) @@ -7049,7 +7024,7 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree pcaddu18i $ra, %call36(exit) jirl $ra, $ra, 0 .LBB22_59: - ld.d $a0, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload ld.d $a3, $a0, 0 pcalau12i $a0, %pc_hi20(.L.str.39) addi.d $a0, $a0, %pc_lo12(.L.str.39) @@ -7076,14 +7051,7 @@ veryfastsupg_double_loadtree: # @veryfastsupg_double_loadtree .Lfunc_end22: .size veryfastsupg_double_loadtree, .Lfunc_end22-veryfastsupg_double_loadtree # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function veryfastsupg_double_outtree -.LCPI23_0: - .dword 0x40e869fe66666666 # double 49999.949999999997 -.LCPI23_1: - .dword 0x40f869fe66666666 # double 99999.899999999994 - .text - .globl veryfastsupg_double_outtree + .globl veryfastsupg_double_outtree # -- Begin function veryfastsupg_double_outtree .p2align 5 .type veryfastsupg_double_outtree,@function veryfastsupg_double_outtree: # @veryfastsupg_double_outtree @@ -7157,13 +7125,13 @@ veryfastsupg_double_outtree: # @veryfastsupg_double_outtree beqz $a0, .LBB23_12 # %bb.7: st.d $s4, $sp, 104 # 8-byte Folded Spill - st.d $fp, $sp, 40 # 8-byte Folded Spill + st.d $fp, $sp, 32 # 8-byte Folded Spill blez $s6, .LBB23_13 .LBB23_8: # %.preheader228.lr.ph pcaddu18i $ra, %call36(__ctype_b_loc) jirl $ra, $ra, 0 move $s4, $a0 - st.d $s6, $sp, 32 # 8-byte Folded Spill + st.d $s6, $sp, 24 # 8-byte Folded Spill slli.d $s3, $s6, 3 addi.d $s8, $s5, 14 ori $s5, $zero, 1 @@ -7498,7 +7466,7 @@ veryfastsupg_double_outtree: # @veryfastsupg_double_outtree ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(veryfastsupg_double_outtree.ac) ori $a0, $zero, 1 - ld.d $s0, $sp, 32 # 8-byte Folded Reload + ld.d $s0, $sp, 24 # 8-byte Folded Reload bne $s0, $a0, .LBB23_14 # %bb.11: move $a0, $zero @@ -7544,7 +7512,7 @@ veryfastsupg_double_outtree: # @veryfastsupg_double_outtree jirl $ra, $ra, 0 st.d $a0, $s1, %pc_lo12(veryfastsupg_double_outtree.nametmp) st.d $s4, $sp, 104 # 8-byte Folded Spill - st.d $fp, $sp, 40 # 8-byte Folded Spill + st.d $fp, $sp, 32 # 8-byte Folded Spill bgtz $s6, .LBB23_8 .LBB23_13: # %._crit_edge238.thread ld.d $a0, $sp, 152 # 8-byte Folded Reload @@ -7620,7 +7588,7 @@ veryfastsupg_double_outtree: # @veryfastsupg_double_outtree jirl $ra, $ra, 0 pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) - st.d $a0, $sp, 24 # 8-byte Folded Spill + st.d $a0, $sp, 16 # 8-byte Folded Spill ld.d $a1, $a0, 0 ori $a0, $zero, 10 pcaddu18i $ra, %call36(fputc) @@ -7673,7 +7641,7 @@ veryfastsupg_double_outtree: # @veryfastsupg_double_outtree ld.d $a0, $s0, %pc_lo12(veryfastsupg_double_outtree.treetmp) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 32 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(veryfastsupg_double_outtree.nametmp) fld.d $fs0, $sp, 160 # 8-byte Folded Reload ld.d $s8, $sp, 168 # 8-byte Folded Reload @@ -7696,10 +7664,14 @@ veryfastsupg_double_outtree: # @veryfastsupg_double_outtree st.d $a0, $sp, 56 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.24) addi.d $a0, $a0, %pc_lo12(.L.str.24) - st.d $a0, $sp, 16 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI23_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI23_0) + st.d $a0, $sp, 8 # 8-byte Folded Spill move $s4, $zero + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, -497154 + st.d $a0, $sp, 40 # 8-byte Folded Spill + lu52i.d $a0, $a0, 1038 + movgr2fr.d $fs0, $a0 move $s6, $s5 move $s8, $s5 b .LBB23_23 @@ -7744,46 +7716,47 @@ veryfastsupg_double_outtree: # @veryfastsupg_double_outtree .LBB23_24: # in Loop: Header=BB23_23 Depth=1 ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(veryfastsupg_double_outtree.ac) - ld.w $a4, $a0, 0 + ld.w $a2, $a0, 0 ld.d $t6, $sp, 80 # 8-byte Folded Reload - beq $a4, $s5, .LBB23_30 + beq $a2, $s5, .LBB23_30 # %bb.25: # %.preheader.preheader # in Loop: Header=BB23_23 Depth=1 - pcalau12i $a1, %pc_hi20(.LCPI23_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI23_1) move $a1, $zero + ld.d $a3, $sp, 40 # 8-byte Folded Reload + lu52i.d $a3, $a3, 1039 + movgr2fr.d $fa0, $a3 .p2align 4, , 16 .LBB23_26: # %.preheader # Parent Loop BB23_23 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB23_27 Depth 3 - slli.d $a2, $a1, 3 - ldx.d $a2, $s2, $a2 - move $a3, $a1 - move $a1, $a4 + slli.d $a3, $a1, 3 + ldx.d $a3, $s2, $a3 + move $a4, $a1 + move $a1, $a2 .p2align 4, , 16 .LBB23_27: # Parent Loop BB23_23 Depth=1 # Parent Loop BB23_26 Depth=2 # => This Inner Loop Header: Depth=3 - slli.d $a5, $a4, 3 - fldx.d $fa1, $a2, $a5 + slli.d $a5, $a2, 3 + fldx.d $fa1, $a3, $a5 fcmp.clt.d $fcc0, $fa1, $fa0 fsel $fa0, $fa0, $fa1, $fcc0 movcf2gr $a6, $fcc0 masknez $a7, $s8, $a6 - maskeqz $t0, $a3, $a6 - maskeqz $t1, $a4, $a6 - ldx.w $a4, $a0, $a5 + maskeqz $t0, $a4, $a6 + maskeqz $t1, $a2, $a6 + ldx.w $a2, $a0, $a5 or $s8, $t0, $a7 masknez $a6, $s6, $a6 addi.w $a5, $zero, -1 or $s6, $t1, $a6 - bne $a4, $a5, .LBB23_27 + bne $a2, $a5, .LBB23_27 # %bb.28: # %.loopexit # in Loop: Header=BB23_26 Depth=2 slli.d $a2, $a1, 3 - ldx.w $a4, $a0, $a2 - bne $a4, $a5, .LBB23_26 + ldx.w $a2, $a0, $a2 + bne $a2, $a5, .LBB23_26 # %bb.29: # %._crit_edge248.loopexit # in Loop: Header=BB23_23 Depth=1 vldi $vr1, -928 @@ -8022,11 +7995,11 @@ veryfastsupg_double_outtree: # @veryfastsupg_double_outtree st.w $a1, $a0, 4 b .LBB23_22 .LBB23_62: # in Loop: Header=BB23_23 Depth=1 - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 16 # 8-byte Folded Reload ld.d $a0, $a0, 0 - ld.d $a1, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 8 # 8-byte Folded Reload move $a2, $s4 - ld.d $a3, $sp, 32 # 8-byte Folded Reload + ld.d $a3, $sp, 24 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 b .LBB23_24 @@ -8082,47 +8055,37 @@ cluster_minimum_double: # @cluster_minimum_double .Lfunc_end26: .size cluster_minimum_double, .Lfunc_end26-cluster_minimum_double # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function veryfastsupg -.LCPI27_0: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI27_1: - .dword 0x413e848000000000 # double 2.0E+6 -.LCPI27_2: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI27_3: - .dword 0x3feccccccccccccd # double 0.90000000000000002 - .text - .globl veryfastsupg + .globl veryfastsupg # -- Begin function veryfastsupg .p2align 5 .type veryfastsupg,@function veryfastsupg: # @veryfastsupg # %bb.0: - addi.d $sp, $sp, -176 - st.d $ra, $sp, 168 # 8-byte Folded Spill - st.d $fp, $sp, 160 # 8-byte Folded Spill - st.d $s0, $sp, 152 # 8-byte Folded Spill - st.d $s1, $sp, 144 # 8-byte Folded Spill - st.d $s2, $sp, 136 # 8-byte Folded Spill - st.d $s3, $sp, 128 # 8-byte Folded Spill - st.d $s4, $sp, 120 # 8-byte Folded Spill - st.d $s5, $sp, 112 # 8-byte Folded Spill - st.d $s6, $sp, 104 # 8-byte Folded Spill - st.d $s7, $sp, 96 # 8-byte Folded Spill - st.d $s8, $sp, 88 # 8-byte Folded Spill - fst.d $fs0, $sp, 80 # 8-byte Folded Spill - fst.d $fs1, $sp, 72 # 8-byte Folded Spill + addi.d $sp, $sp, -192 + st.d $ra, $sp, 184 # 8-byte Folded Spill + st.d $fp, $sp, 176 # 8-byte Folded Spill + st.d $s0, $sp, 168 # 8-byte Folded Spill + st.d $s1, $sp, 160 # 8-byte Folded Spill + st.d $s2, $sp, 152 # 8-byte Folded Spill + st.d $s3, $sp, 144 # 8-byte Folded Spill + st.d $s4, $sp, 136 # 8-byte Folded Spill + st.d $s5, $sp, 128 # 8-byte Folded Spill + st.d $s6, $sp, 120 # 8-byte Folded Spill + st.d $s7, $sp, 112 # 8-byte Folded Spill + st.d $s8, $sp, 104 # 8-byte Folded Spill + fst.d $fs0, $sp, 96 # 8-byte Folded Spill + fst.d $fs1, $sp, 88 # 8-byte Folded Spill + fst.d $fs2, $sp, 80 # 8-byte Folded Spill pcalau12i $s6, %pc_hi20(veryfastsupg.eff) ld.d $a4, $s6, %pc_lo12(veryfastsupg.eff) - st.d $a3, $sp, 32 # 8-byte Folded Spill + st.d $a3, $sp, 40 # 8-byte Folded Spill move $s1, $a2 move $s0, $a0 pcalau12i $a0, %pc_hi20(veryfastsupg.hist) - st.d $a0, $sp, 64 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill pcalau12i $t4, %pc_hi20(veryfastsupg.tmptmplen) pcalau12i $t3, %pc_hi20(veryfastsupg.ac) - st.d $t3, $sp, 40 # 8-byte Folded Spill - st.d $t4, $sp, 48 # 8-byte Folded Spill + st.d $t3, $sp, 48 # 8-byte Folded Spill + st.d $t4, $sp, 56 # 8-byte Folded Spill beqz $a4, .LBB27_13 # %bb.1: blez $s0, .LBB27_14 @@ -8133,14 +8096,13 @@ veryfastsupg: # @veryfastsupg slli.d $a3, $a3, 2 ori $a4, $zero, 4 vldi $vr0, -928 - pcalau12i $a5, %pc_hi20(.LCPI27_0) - fld.d $fs0, $a5, %pc_lo12(.LCPI27_0) - lu52i.d $a5, $zero, 1022 - vreplgr2vr.d $vr1, $a5 ori $a5, $zero, 0 lu32i.d $a5, -97152 lu52i.d $a5, $a5, 1042 - vreplgr2vr.d $vr2, $a5 + movgr2fr.d $fa1, $a5 + lu52i.d $a6, $zero, 1022 + vreplgr2vr.d $vr2, $a6 + vreplgr2vr.d $vr3, $a5 b .LBB27_4 .p2align 4, , 16 .LBB27_3: # %._crit_edge.us @@ -8168,28 +8130,28 @@ veryfastsupg: # @veryfastsupg .LBB27_7: # %vector.body # Parent Loop BB27_4 Depth=1 # => This Inner Loop Header: Depth=2 - vld $vr3, $t0, -16 - vld $vr4, $t0, 0 - vfmadd.d $vr3, $vr3, $vr2, $vr1 - vfmadd.d $vr4, $vr4, $vr2, $vr1 - vreplvei.d $vr5, $vr3, 0 - ftintrz.w.d $fa5, $fa5 - movfr2gr.s $t2, $fa5 - vinsgr2vr.w $vr5, $t2, 0 - vreplvei.d $vr3, $vr3, 1 - ftintrz.w.d $fa3, $fa3 - movfr2gr.s $t2, $fa3 - vinsgr2vr.w $vr5, $t2, 1 - vreplvei.d $vr3, $vr4, 0 - ftintrz.w.d $fa3, $fa3 - movfr2gr.s $t2, $fa3 - vinsgr2vr.w $vr3, $t2, 0 + vld $vr4, $t0, -16 + vld $vr5, $t0, 0 + vfmadd.d $vr4, $vr4, $vr3, $vr2 + vfmadd.d $vr5, $vr5, $vr3, $vr2 + vreplvei.d $vr6, $vr4, 0 + ftintrz.w.d $fa6, $fa6 + movfr2gr.s $t2, $fa6 + vinsgr2vr.w $vr6, $t2, 0 vreplvei.d $vr4, $vr4, 1 ftintrz.w.d $fa4, $fa4 movfr2gr.s $t2, $fa4 - vinsgr2vr.w $vr3, $t2, 1 - vpackev.d $vr3, $vr3, $vr5 - vst $vr3, $a7, -8 + vinsgr2vr.w $vr6, $t2, 1 + vreplvei.d $vr4, $vr5, 0 + ftintrz.w.d $fa4, $fa4 + movfr2gr.s $t2, $fa4 + vinsgr2vr.w $vr4, $t2, 0 + vreplvei.d $vr5, $vr5, 1 + ftintrz.w.d $fa5, $fa5 + movfr2gr.s $t2, $fa5 + vinsgr2vr.w $vr4, $t2, 1 + vpackev.d $vr4, $vr4, $vr6 + vst $vr4, $a7, -8 addi.d $t1, $t1, -4 addi.d $a7, $a7, 16 addi.d $t0, $t0, 32 @@ -8207,10 +8169,10 @@ veryfastsupg: # @veryfastsupg .LBB27_10: # %scalar.ph # Parent Loop BB27_4 Depth=1 # => This Inner Loop Header: Depth=2 - fld.d $fa3, $a5, 0 - fmadd.d $fa3, $fa3, $fs0, $fa0 - ftintrz.w.d $fa3, $fa3 - movfr2gr.s $t0, $fa3 + fld.d $fa4, $a5, 0 + fmadd.d $fa4, $fa4, $fa1, $fa0 + ftintrz.w.d $fa4, $fa4 + movfr2gr.s $t0, $fa4 st.w $t0, $a6, 0 addi.d $a5, $a5, 8 addi.d $a7, $a7, -1 @@ -8238,19 +8200,19 @@ veryfastsupg: # @veryfastsupg pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 ld.w $a1, $s2, 0 - ld.d $a2, $sp, 64 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload st.d $a0, $a2, %pc_lo12(veryfastsupg.hist) slli.d $s2, $a1, 3 move $a0, $s2 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - ld.d $a1, $sp, 48 # 8-byte Folded Reload + ld.d $a1, $sp, 56 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(veryfastsupg.tmptmplen) move $a0, $s2 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - ld.d $t4, $sp, 48 # 8-byte Folded Reload - ld.d $t3, $sp, 40 # 8-byte Folded Reload + ld.d $t4, $sp, 56 # 8-byte Folded Reload + ld.d $t3, $sp, 48 # 8-byte Folded Reload move $a1, $fp st.d $a0, $t3, %pc_lo12(veryfastsupg.ac) bgtz $s0, .LBB27_2 @@ -8311,13 +8273,13 @@ veryfastsupg: # @veryfastsupg move $a3, $s4 ld.d $a0, $t4, %pc_lo12(veryfastsupg.tmptmplen) lu32i.d $a3, 0 - st.d $a3, $sp, 24 # 8-byte Folded Spill + st.d $a3, $sp, 32 # 8-byte Folded Spill stx.w $a3, $a1, $a2 slli.d $a2, $s0, 3 move $a1, $zero pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.d $a0, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(veryfastsupg.hist) slli.d $a2, $s0, 2 ori $a1, $zero, 255 @@ -8325,7 +8287,7 @@ veryfastsupg: # @veryfastsupg jirl $ra, $ra, 0 pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) - st.d $a0, $sp, 16 # 8-byte Folded Spill + st.d $a0, $sp, 24 # 8-byte Folded Spill ld.d $a1, $a0, 0 ori $a0, $zero, 10 pcaddu18i $ra, %call36(fputc) @@ -8334,8 +8296,8 @@ veryfastsupg: # @veryfastsupg bne $s0, $a0, .LBB27_23 .LBB27_21: # %._crit_edge217.._crit_edge262_crit_edge ld.d $a0, $s6, %pc_lo12(veryfastsupg.eff) - ld.d $t6, $sp, 40 # 8-byte Folded Reload - ld.d $t7, $sp, 48 # 8-byte Folded Reload + ld.d $t6, $sp, 48 # 8-byte Folded Reload + ld.d $t7, $sp, 56 # 8-byte Folded Reload .LBB27_22: # %._crit_edge262 move $fp, $t6 move $s1, $t7 @@ -8345,7 +8307,7 @@ veryfastsupg: # @veryfastsupg st.d $zero, $s6, %pc_lo12(veryfastsupg.eff) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $s0, $sp, 64 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload ld.d $a0, $s0, %pc_lo12(veryfastsupg.hist) st.d $zero, $s1, %pc_lo12(veryfastsupg.tmptmplen) pcaddu18i $ra, %call36(free) @@ -8355,20 +8317,21 @@ veryfastsupg: # @veryfastsupg pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 st.d $zero, $fp, %pc_lo12(veryfastsupg.ac) - fld.d $fs1, $sp, 72 # 8-byte Folded Reload - fld.d $fs0, $sp, 80 # 8-byte Folded Reload - ld.d $s8, $sp, 88 # 8-byte Folded Reload - ld.d $s7, $sp, 96 # 8-byte Folded Reload - ld.d $s6, $sp, 104 # 8-byte Folded Reload - ld.d $s5, $sp, 112 # 8-byte Folded Reload - ld.d $s4, $sp, 120 # 8-byte Folded Reload - ld.d $s3, $sp, 128 # 8-byte Folded Reload - ld.d $s2, $sp, 136 # 8-byte Folded Reload - ld.d $s1, $sp, 144 # 8-byte Folded Reload - ld.d $s0, $sp, 152 # 8-byte Folded Reload - ld.d $fp, $sp, 160 # 8-byte Folded Reload - ld.d $ra, $sp, 168 # 8-byte Folded Reload - addi.d $sp, $sp, 176 + fld.d $fs2, $sp, 80 # 8-byte Folded Reload + fld.d $fs1, $sp, 88 # 8-byte Folded Reload + fld.d $fs0, $sp, 96 # 8-byte Folded Reload + ld.d $s8, $sp, 104 # 8-byte Folded Reload + ld.d $s7, $sp, 112 # 8-byte Folded Reload + ld.d $s6, $sp, 120 # 8-byte Folded Reload + ld.d $s5, $sp, 128 # 8-byte Folded Reload + ld.d $s4, $sp, 136 # 8-byte Folded Reload + ld.d $s3, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 152 # 8-byte Folded Reload + ld.d $s1, $sp, 160 # 8-byte Folded Reload + ld.d $s0, $sp, 168 # 8-byte Folded Reload + ld.d $fp, $sp, 176 # 8-byte Folded Reload + ld.d $ra, $sp, 184 # 8-byte Folded Reload + addi.d $sp, $sp, 192 ret .LBB27_23: # %.lr.ph261.preheader lu12i.w $a0, -209716 @@ -8377,20 +8340,29 @@ veryfastsupg: # @veryfastsupg ori $s7, $a0, 2457 pcalau12i $a0, %pc_hi20(.L.str.24) addi.d $a0, $a0, %pc_lo12(.L.str.24) - st.d $a0, $sp, 8 # 8-byte Folded Spill + st.d $a0, $sp, 16 # 8-byte Folded Spill move $s3, $zero - pcalau12i $a0, %pc_hi20(.LCPI27_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI27_1) + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a1, $a0, 1043 + movgr2fr.d $fs0, $a1 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fs1, $a0 vldi $vr3, -928 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fs2, $a0 lu12i.w $a0, 976 ori $a0, $a0, 2304 - st.d $a0, $sp, 56 # 8-byte Folded Spill + st.d $a0, $sp, 64 # 8-byte Folded Spill move $s2, $s4 move $fp, $s4 - ld.d $t6, $sp, 40 # 8-byte Folded Reload - ld.d $t7, $sp, 48 # 8-byte Folded Reload - ld.d $t8, $sp, 32 # 8-byte Folded Reload - ld.d $ra, $sp, 24 # 8-byte Folded Reload + ld.d $t6, $sp, 48 # 8-byte Folded Reload + ld.d $t7, $sp, 56 # 8-byte Folded Reload + ld.d $t8, $sp, 40 # 8-byte Folded Reload + ld.d $ra, $sp, 32 # 8-byte Folded Reload b .LBB27_25 .p2align 4, , 16 .LBB27_24: # in Loop: Header=BB27_25 Depth=1 @@ -8416,7 +8388,7 @@ veryfastsupg: # @veryfastsupg # in Loop: Header=BB27_25 Depth=1 ld.d $a0, $s6, %pc_lo12(veryfastsupg.eff) move $a2, $zero - ld.d $a3, $sp, 56 # 8-byte Folded Reload + ld.d $a3, $sp, 64 # 8-byte Folded Reload .p2align 4, , 16 .LBB27_28: # %.preheader # Parent Loop BB27_25 Depth=1 @@ -8458,28 +8430,28 @@ veryfastsupg: # @veryfastsupg fmul.d $fa0, $fa0, $fa3 b .LBB27_34 .LBB27_32: # in Loop: Header=BB27_25 Depth=1 - ld.d $a0, $sp, 16 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload ld.d $a0, $a0, 0 - ld.d $a1, $sp, 8 # 8-byte Folded Reload + ld.d $a1, $sp, 16 # 8-byte Folded Reload move $a2, $s3 move $a3, $s0 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 vldi $vr3, -928 - ld.d $ra, $sp, 24 # 8-byte Folded Reload - ld.d $t8, $sp, 32 # 8-byte Folded Reload - ld.d $t7, $sp, 48 # 8-byte Folded Reload - ld.d $t6, $sp, 40 # 8-byte Folded Reload + ld.d $ra, $sp, 32 # 8-byte Folded Reload + ld.d $t8, $sp, 40 # 8-byte Folded Reload + ld.d $t7, $sp, 56 # 8-byte Folded Reload + ld.d $t6, $sp, 48 # 8-byte Folded Reload ld.d $a1, $t6, %pc_lo12(veryfastsupg.ac) ld.w $a6, $a1, 0 bne $a6, $s4, .LBB27_27 .p2align 4, , 16 .LBB27_33: # in Loop: Header=BB27_25 Depth=1 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 .LBB27_34: # %._crit_edge227 # in Loop: Header=BB27_25 Depth=1 slli.d $a2, $s3, 3 - ld.d $a0, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(veryfastsupg.hist) ldx.d $a3, $s1, $a2 slli.d $a4, $fp, 2 @@ -8631,7 +8603,7 @@ veryfastsupg: # @veryfastsupg slli.d $a4, $fp, 3 fldx.d $fa1, $a3, $a4 ldx.d $a5, $t8, $a2 - fdiv.d $fa0, $fa0, $fs0 + fdiv.d $fa0, $fa0, $fs1 fsub.d $fa1, $fa0, $fa1 fst.d $fa1, $a5, 0 slli.d $a2, $s2, 3 @@ -8669,18 +8641,18 @@ veryfastsupg: # @veryfastsupg masknez $t1, $a4, $a5 maskeqz $a5, $t0, $a5 or $a5, $a5, $t1 - add.d $a4, $a4, $t0 - movgr2fr.w $fa0, $a4 + movgr2fr.w $fa0, $a5 ffint.d.w $fa0, $fa0 - pcalau12i $a4, %pc_hi20(.LCPI27_2) - fld.d $fa1, $a4, %pc_lo12(.LCPI27_2) - fmul.d $fa0, $fa0, $fa3 - pcalau12i $a4, %pc_hi20(.LCPI27_3) - fld.d $fa2, $a4, %pc_lo12(.LCPI27_3) - fmul.d $fa0, $fa0, $fa1 - movgr2fr.w $fa1, $a5 + add.d $a4, $a4, $t0 + movgr2fr.w $fa1, $a4 ffint.d.w $fa1, $fa1 - fmadd.d $fa0, $fa1, $fa2, $fa0 + fmul.d $fa1, $fa1, $fa3 + fmul.d $fa1, $fa1, $fs2 + move $a4, $s5 + lu32i.d $a4, -209716 + lu52i.d $a4, $a4, 1022 + movgr2fr.d $fa2, $a4 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a4, $fa0 stx.w $a4, $a7, $a6 @@ -8717,14 +8689,7 @@ veryfastsupg: # @veryfastsupg .Lfunc_end27: .size veryfastsupg, .Lfunc_end27-veryfastsupg # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function veryfastsupg_int -.LCPI28_0: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI28_1: - .dword 0x3feccccccccccccd # double 0.90000000000000002 - .text - .globl veryfastsupg_int + .globl veryfastsupg_int # -- Begin function veryfastsupg_int .p2align 5 .type veryfastsupg_int,@function veryfastsupg_int: # @veryfastsupg_int @@ -8741,17 +8706,18 @@ veryfastsupg_int: # @veryfastsupg_int st.d $s6, $sp, 104 # 8-byte Folded Spill st.d $s7, $sp, 96 # 8-byte Folded Spill st.d $s8, $sp, 88 # 8-byte Folded Spill + fst.d $fs0, $sp, 80 # 8-byte Folded Spill pcalau12i $s6, %pc_hi20(veryfastsupg_int.eff) ld.d $a4, $s6, %pc_lo12(veryfastsupg_int.eff) - st.d $a3, $sp, 72 # 8-byte Folded Spill + st.d $a3, $sp, 64 # 8-byte Folded Spill move $s1, $a2 move $s0, $a0 pcalau12i $a0, %pc_hi20(veryfastsupg_int.hist) - st.d $a0, $sp, 80 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill pcalau12i $t4, %pc_hi20(veryfastsupg_int.tmptmplen) pcalau12i $t3, %pc_hi20(veryfastsupg_int.ac) - st.d $t3, $sp, 32 # 8-byte Folded Spill - st.d $t4, $sp, 40 # 8-byte Folded Spill + st.d $t3, $sp, 24 # 8-byte Folded Spill + st.d $t4, $sp, 32 # 8-byte Folded Spill beqz $a4, .LBB28_13 # %bb.1: blez $s0, .LBB28_14 @@ -8839,19 +8805,19 @@ veryfastsupg_int: # @veryfastsupg_int pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 ld.w $a1, $s2, 0 - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload st.d $a0, $a2, %pc_lo12(veryfastsupg_int.hist) move $a0, $a1 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 ld.w $a1, $s2, 0 - ld.d $a2, $sp, 40 # 8-byte Folded Reload + ld.d $a2, $sp, 32 # 8-byte Folded Reload st.d $a0, $a2, %pc_lo12(veryfastsupg_int.tmptmplen) slli.d $a0, $a1, 3 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - ld.d $t4, $sp, 40 # 8-byte Folded Reload - ld.d $t3, $sp, 32 # 8-byte Folded Reload + ld.d $t4, $sp, 32 # 8-byte Folded Reload + ld.d $t3, $sp, 24 # 8-byte Folded Reload move $a1, $fp st.d $a0, $t3, %pc_lo12(veryfastsupg_int.ac) bgtz $s0, .LBB28_2 @@ -8912,14 +8878,14 @@ veryfastsupg_int: # @veryfastsupg_int move $a3, $s4 ld.d $a0, $t4, %pc_lo12(veryfastsupg_int.tmptmplen) lu32i.d $a3, 0 - st.d $a3, $sp, 64 # 8-byte Folded Spill + st.d $a3, $sp, 56 # 8-byte Folded Spill stx.w $a3, $a1, $a2 slli.d $s2, $s0, 2 move $a1, $zero move $a2, $s2 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(veryfastsupg_int.hist) ori $a1, $zero, 255 move $a2, $s2 @@ -8927,7 +8893,7 @@ veryfastsupg_int: # @veryfastsupg_int jirl $ra, $ra, 0 pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) - st.d $a0, $sp, 24 # 8-byte Folded Spill + st.d $a0, $sp, 16 # 8-byte Folded Spill ld.d $a1, $a0, 0 ori $a0, $zero, 10 pcaddu18i $ra, %call36(fputc) @@ -8936,8 +8902,8 @@ veryfastsupg_int: # @veryfastsupg_int bne $s0, $a0, .LBB28_23 .LBB28_21: # %._crit_edge217.._crit_edge262_crit_edge ld.d $a0, $s6, %pc_lo12(veryfastsupg_int.eff) - ld.d $t8, $sp, 32 # 8-byte Folded Reload - ld.d $ra, $sp, 40 # 8-byte Folded Reload + ld.d $t8, $sp, 24 # 8-byte Folded Reload + ld.d $ra, $sp, 32 # 8-byte Folded Reload .LBB28_22: # %._crit_edge262 move $fp, $t8 move $s1, $ra @@ -8947,7 +8913,7 @@ veryfastsupg_int: # @veryfastsupg_int st.d $zero, $s6, %pc_lo12(veryfastsupg_int.eff) pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $s0, $sp, 80 # 8-byte Folded Reload + ld.d $s0, $sp, 72 # 8-byte Folded Reload ld.d $a0, $s0, %pc_lo12(veryfastsupg_int.hist) st.d $zero, $s1, %pc_lo12(veryfastsupg_int.tmptmplen) pcaddu18i $ra, %call36(free) @@ -8957,6 +8923,7 @@ veryfastsupg_int: # @veryfastsupg_int pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 st.d $zero, $fp, %pc_lo12(veryfastsupg_int.ac) + fld.d $fs0, $sp, 80 # 8-byte Folded Reload ld.d $s8, $sp, 88 # 8-byte Folded Reload ld.d $s7, $sp, 96 # 8-byte Folded Reload ld.d $s6, $sp, 104 # 8-byte Folded Reload @@ -8977,19 +8944,24 @@ veryfastsupg_int: # @veryfastsupg_int ori $s7, $a0, 2457 pcalau12i $a0, %pc_hi20(.L.str.24) addi.d $a0, $a0, %pc_lo12(.L.str.24) - st.d $a0, $sp, 16 # 8-byte Folded Spill + st.d $a0, $sp, 8 # 8-byte Folded Spill move $s3, $zero lu12i.w $a0, 488 ori $a0, $a0, 1152 - st.d $a0, $sp, 48 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill vldi $vr3, -928 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 976 ori $a0, $a0, 2304 - st.d $a0, $sp, 56 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill move $fp, $s4 move $s5, $s4 - ld.d $t8, $sp, 32 # 8-byte Folded Reload - ld.d $ra, $sp, 40 # 8-byte Folded Reload + ld.d $t8, $sp, 24 # 8-byte Folded Reload + ld.d $ra, $sp, 32 # 8-byte Folded Reload b .LBB28_25 .p2align 4, , 16 .LBB28_24: # in Loop: Header=BB28_25 Depth=1 @@ -9015,7 +8987,7 @@ veryfastsupg_int: # @veryfastsupg_int # in Loop: Header=BB28_25 Depth=1 ld.d $a0, $s6, %pc_lo12(veryfastsupg_int.eff) move $a2, $zero - ld.d $a3, $sp, 56 # 8-byte Folded Reload + ld.d $a3, $sp, 48 # 8-byte Folded Reload .p2align 4, , 16 .LBB28_28: # %.preheader # Parent Loop BB28_25 Depth=1 @@ -9059,26 +9031,26 @@ veryfastsupg_int: # @veryfastsupg_int movfr2gr.s $a0, $fa0 b .LBB28_34 .LBB28_32: # in Loop: Header=BB28_25 Depth=1 - ld.d $a0, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 16 # 8-byte Folded Reload ld.d $a0, $a0, 0 - ld.d $a1, $sp, 16 # 8-byte Folded Reload + ld.d $a1, $sp, 8 # 8-byte Folded Reload move $a2, $s3 move $a3, $s0 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 vldi $vr3, -928 - ld.d $ra, $sp, 40 # 8-byte Folded Reload - ld.d $t8, $sp, 32 # 8-byte Folded Reload + ld.d $ra, $sp, 32 # 8-byte Folded Reload + ld.d $t8, $sp, 24 # 8-byte Folded Reload ld.d $a1, $t8, %pc_lo12(veryfastsupg_int.ac) ld.w $a6, $a1, 0 bne $a6, $s4, .LBB28_27 .p2align 4, , 16 .LBB28_33: # in Loop: Header=BB28_25 Depth=1 - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 40 # 8-byte Folded Reload .LBB28_34: # %._crit_edge227 # in Loop: Header=BB28_25 Depth=1 slli.d $a3, $s3, 3 - ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload ld.d $a4, $a2, %pc_lo12(veryfastsupg_int.hist) ldx.d $a6, $s1, $a3 slli.d $a2, $s5, 2 @@ -9138,7 +9110,7 @@ veryfastsupg_int: # @veryfastsupg_int .p2align 4, , 16 .LBB28_41: # in Loop: Header=BB28_25 Depth=1 st.w $s5, $a5, 0 - ld.d $a7, $sp, 64 # 8-byte Folded Reload + ld.d $a7, $sp, 56 # 8-byte Folded Reload st.w $a7, $a5, 4 slli.d $a5, $fp, 2 ldx.w $a7, $a4, $a5 @@ -9146,7 +9118,7 @@ veryfastsupg_int: # @veryfastsupg_int bne $a7, $s4, .LBB28_46 .LBB28_42: # in Loop: Header=BB28_25 Depth=1 st.w $fp, $a6, 0 - ld.d $a7, $sp, 64 # 8-byte Folded Reload + ld.d $a7, $sp, 56 # 8-byte Folded Reload st.w $a7, $a6, 4 b .LBB28_55 .p2align 4, , 16 @@ -9233,7 +9205,7 @@ veryfastsupg_int: # @veryfastsupg_int move $a6, $zero sub.d $t0, $a0, $t0 movgr2fr.w $fa0, $t0 - ld.d $t0, $sp, 72 # 8-byte Folded Reload + ld.d $t0, $sp, 64 # 8-byte Folded Reload ldx.d $a3, $t0, $a3 ldx.w $a5, $a7, $a5 alsl.d $a4, $s5, $a4, 2 @@ -9272,20 +9244,20 @@ veryfastsupg_int: # @veryfastsupg_int masknez $t0, $a2, $a3 maskeqz $a3, $a7, $a3 or $a3, $a3, $t0 - add.d $a2, $a2, $a7 - movgr2fr.w $fa0, $a2 - pcalau12i $a2, %pc_hi20(.LCPI28_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI28_0) + movgr2fr.w $fa0, $a3 ffint.s.w $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fmul.d $fa0, $fa0, $fa3 - fmul.d $fa0, $fa0, $fa1 - pcalau12i $a2, %pc_hi20(.LCPI28_1) - fld.d $fa1, $a2, %pc_lo12(.LCPI28_1) - movgr2fr.w $fa2, $a3 - ffint.s.w $fa2, $fa2 - fcvt.d.s $fa2, $fa2 - fmadd.d $fa0, $fa2, $fa1, $fa0 + add.d $a2, $a2, $a7 + movgr2fr.w $fa1, $a2 + ffint.s.w $fa1, $fa1 + fcvt.d.s $fa1, $fa1 + fmul.d $fa1, $fa1, $fa3 + fmul.d $fa1, $fa1, $fs0 + move $a2, $s2 + lu32i.d $a2, -209716 + lu52i.d $a2, $a2, 1022 + movgr2fr.d $fa2, $a2 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a2, $fa0 stx.w $a2, $a5, $a4 @@ -9324,37 +9296,26 @@ veryfastsupg_int: # @veryfastsupg_int .Lfunc_end28: .size veryfastsupg_int, .Lfunc_end28-veryfastsupg_int # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function fastsupg -.LCPI29_0: - .word 0x459c3c00 # float 4999.5 -.LCPI29_1: - .word 0x461c3c00 # float 9999 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI29_2: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI29_3: - .dword 0x3feccccccccccccd # double 0.90000000000000002 - .text - .globl fastsupg + .globl fastsupg # -- Begin function fastsupg .p2align 5 .type fastsupg,@function fastsupg: # @fastsupg # %bb.0: - addi.d $sp, $sp, -176 - st.d $ra, $sp, 168 # 8-byte Folded Spill - st.d $fp, $sp, 160 # 8-byte Folded Spill - st.d $s0, $sp, 152 # 8-byte Folded Spill - st.d $s1, $sp, 144 # 8-byte Folded Spill - st.d $s2, $sp, 136 # 8-byte Folded Spill - st.d $s3, $sp, 128 # 8-byte Folded Spill - st.d $s4, $sp, 120 # 8-byte Folded Spill - st.d $s5, $sp, 112 # 8-byte Folded Spill - st.d $s6, $sp, 104 # 8-byte Folded Spill - st.d $s7, $sp, 96 # 8-byte Folded Spill - st.d $s8, $sp, 88 # 8-byte Folded Spill - fst.d $fs0, $sp, 80 # 8-byte Folded Spill + addi.d $sp, $sp, -192 + st.d $ra, $sp, 184 # 8-byte Folded Spill + st.d $fp, $sp, 176 # 8-byte Folded Spill + st.d $s0, $sp, 168 # 8-byte Folded Spill + st.d $s1, $sp, 160 # 8-byte Folded Spill + st.d $s2, $sp, 152 # 8-byte Folded Spill + st.d $s3, $sp, 144 # 8-byte Folded Spill + st.d $s4, $sp, 136 # 8-byte Folded Spill + st.d $s5, $sp, 128 # 8-byte Folded Spill + st.d $s6, $sp, 120 # 8-byte Folded Spill + st.d $s7, $sp, 112 # 8-byte Folded Spill + st.d $s8, $sp, 104 # 8-byte Folded Spill + fst.d $fs0, $sp, 96 # 8-byte Folded Spill + fst.d $fs1, $sp, 88 # 8-byte Folded Spill + fst.d $fs2, $sp, 80 # 8-byte Folded Spill pcalau12i $s4, %pc_hi20(fastsupg.eff) ld.d $a4, $s4, %pc_lo12(fastsupg.eff) st.d $a3, $sp, 72 # 8-byte Folded Spill @@ -9584,19 +9545,21 @@ fastsupg: # @fastsupg ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a1, $a0, 0 ori $a0, $zero, 10 - fld.d $fs0, $sp, 80 # 8-byte Folded Reload - ld.d $s8, $sp, 88 # 8-byte Folded Reload - ld.d $s7, $sp, 96 # 8-byte Folded Reload - ld.d $s6, $sp, 104 # 8-byte Folded Reload - ld.d $s5, $sp, 112 # 8-byte Folded Reload - ld.d $s4, $sp, 120 # 8-byte Folded Reload - ld.d $s3, $sp, 128 # 8-byte Folded Reload - ld.d $s2, $sp, 136 # 8-byte Folded Reload - ld.d $s1, $sp, 144 # 8-byte Folded Reload - ld.d $s0, $sp, 152 # 8-byte Folded Reload - ld.d $fp, $sp, 160 # 8-byte Folded Reload - ld.d $ra, $sp, 168 # 8-byte Folded Reload - addi.d $sp, $sp, 176 + fld.d $fs2, $sp, 80 # 8-byte Folded Reload + fld.d $fs1, $sp, 88 # 8-byte Folded Reload + fld.d $fs0, $sp, 96 # 8-byte Folded Reload + ld.d $s8, $sp, 104 # 8-byte Folded Reload + ld.d $s7, $sp, 112 # 8-byte Folded Reload + ld.d $s6, $sp, 120 # 8-byte Folded Reload + ld.d $s5, $sp, 128 # 8-byte Folded Reload + ld.d $s4, $sp, 136 # 8-byte Folded Reload + ld.d $s3, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 152 # 8-byte Folded Reload + ld.d $s1, $sp, 160 # 8-byte Folded Reload + ld.d $s0, $sp, 168 # 8-byte Folded Reload + ld.d $fp, $sp, 176 # 8-byte Folded Reload + ld.d $ra, $sp, 184 # 8-byte Folded Reload + addi.d $sp, $sp, 192 pcaddu18i $t8, %call36(fputc) jr $t8 .LBB29_28: # %.lr.ph223 @@ -9608,10 +9571,19 @@ fastsupg: # @fastsupg pcalau12i $a0, %pc_hi20(.L.str.24) addi.d $a0, $a0, %pc_lo12(.L.str.24) st.d $a0, $sp, 16 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI29_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI29_0) move $s3, $zero + lu12i.w $a0, 285123 + ori $a0, $a0, 3072 + movgr2fr.w $fs0, $a0 vldi $vr4, -928 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, 287171 + ori $a0, $a0, 3072 + movgr2fr.w $fs2, $a0 vldi $vr5, -1184 move $s7, $t6 move $s2, $t6 @@ -9645,9 +9617,8 @@ fastsupg: # @fastsupg .LBB29_32: # %.preheader.lr.ph # in Loop: Header=BB29_30 Depth=1 ld.d $a1, $s4, %pc_lo12(fastsupg.eff) - pcalau12i $a2, %pc_hi20(.LCPI29_1) - fld.s $fa0, $a2, %pc_lo12(.LCPI29_1) move $a2, $zero + fmov.s $fa0, $fs2 .p2align 4, , 16 .LBB29_33: # %.preheader # Parent Loop BB29_30 Depth=1 @@ -9836,16 +9807,16 @@ fastsupg: # @fastsupg fldx.s $fa1, $a5, $a6 fcmp.clt.s $fcc0, $fa0, $fa1 fsel $fa2, $fa1, $fa0, $fcc0 + fcvt.d.s $fa2, $fa2 fadd.s $fa0, $fa0, $fa1 fcvt.d.s $fa0, $fa0 - pcalau12i $a5, %pc_hi20(.LCPI29_2) - fld.d $fa1, $a5, %pc_lo12(.LCPI29_2) - pcalau12i $a5, %pc_hi20(.LCPI29_3) - fld.d $fa3, $a5, %pc_lo12(.LCPI29_3) fmul.d $fa0, $fa0, $fa4 - fmul.d $fa0, $fa0, $fa1 - fcvt.d.s $fa1, $fa2 - fmadd.d $fa0, $fa1, $fa3, $fa0 + fmul.d $fa0, $fa0, $fs1 + move $a5, $s5 + lu32i.d $a5, -209716 + lu52i.d $a5, $a5, 1022 + movgr2fr.d $fa1, $a5 + fmadd.d $fa0, $fa2, $fa1, $fa0 fcvt.s.d $fa0, $fa0 fstx.s $fa0, $t0, $a7 .LBB29_54: # in Loop: Header=BB29_55 Depth=2 @@ -9869,18 +9840,7 @@ fastsupg: # @fastsupg .Lfunc_end29: .size fastsupg, .Lfunc_end29-fastsupg # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function supg -.LCPI30_0: - .word 0x461c3c00 # float 9999 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI30_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI30_2: - .dword 0x3feccccccccccccd # double 0.90000000000000002 - .text - .globl supg + .globl supg # -- Begin function supg .p2align 5 .type supg,@function supg: # @supg @@ -10045,11 +10005,20 @@ supg: # @supg ld.d $a2, $s3, %pc_lo12(supg.eff) ld.d $a3, $s4, %pc_lo12(supg.pair) ld.d $a4, $s5, %pc_lo12(supg.tmplen) - pcalau12i $a5, %pc_hi20(.LCPI30_0) - fld.s $fa0, $a5, %pc_lo12(.LCPI30_0) - vldi $vr1, -928 + lu12i.w $a5, -419431 + ori $a5, $a5, 2458 + lu32i.d $a5, -419431 + lu52i.d $a5, $a5, 1019 + movgr2fr.d $fa0, $a5 + lu12i.w $a5, -209716 + ori $a5, $a5, 3277 + lu32i.d $a5, -209716 + lu52i.d $a5, $a5, 1022 + movgr2fr.d $fa1, $a5 lu12i.w $a5, 287171 ori $a5, $a5, 3072 + movgr2fr.w $fa2, $a5 + vldi $vr3, -928 b .LBB30_22 .p2align 4, , 16 .LBB30_21: # %._crit_edge208 @@ -10069,7 +10038,7 @@ supg: # @supg ori $t0, $zero, 4 addi.d $a6, $zero, -1 addi.d $a7, $zero, -1 - fmov.s $fa2, $fa0 + fmov.s $fa4, $fa2 move $t2, $a2 .p2align 4, , 16 .LBB30_23: # %.lr.ph176.preheader @@ -10087,9 +10056,9 @@ supg: # @supg # Parent Loop BB30_22 Depth=1 # Parent Loop BB30_23 Depth=2 # => This Inner Loop Header: Depth=3 - fld.s $fa3, $t4, 0 - fcmp.clt.s $fcc0, $fa3, $fa2 - fsel $fa2, $fa2, $fa3, $fcc0 + fld.s $fa5, $t4, 0 + fcmp.clt.s $fcc0, $fa5, $fa4 + fsel $fa4, $fa4, $fa5, $fcc0 movcf2gr $t6, $fcc0 masknez $a7, $a7, $t6 maskeqz $t7, $t3, $t6 @@ -10164,20 +10133,20 @@ supg: # @supg move $t4, $zero st.w $t3, $t1, 0 slli.d $t1, $t0, 2 - fldx.s $fa3, $a4, $t1 + fldx.s $fa5, $a4, $t1 slli.d $t3, $a7, 2 - fldx.s $fa4, $a4, $t3 - fcvt.d.s $fa2, $fa2 - ldx.d $t2, $fp, $t2 - fmul.d $fa2, $fa2, $fa1 + fldx.s $fa6, $a4, $t3 fcvt.d.s $fa4, $fa4 - fsub.d $fa4, $fa2, $fa4 - fst.d $fa4, $t2, 0 - fcvt.d.s $fa3, $fa3 - fsub.d $fa3, $fa2, $fa3 - fst.d $fa3, $t2, 8 - fcvt.s.d $fa2, $fa2 - fstx.s $fa2, $a4, $t3 + ldx.d $t2, $fp, $t2 + fmul.d $fa4, $fa4, $fa3 + fcvt.d.s $fa6, $fa6 + fsub.d $fa6, $fa4, $fa6 + fst.d $fa6, $t2, 0 + fcvt.d.s $fa5, $fa5 + fsub.d $fa5, $fa4, $fa5 + fst.d $fa5, $t2, 8 + fcvt.s.d $fa4, $fa4 + fstx.s $fa4, $a4, $t3 alsl.d $t2, $t0, $a3, 3 alsl.d $t3, $a7, $a3, 3 .p2align 4, , 16 @@ -10215,24 +10184,20 @@ supg: # @supg slli.d $t5, $t5, 3 ldx.d $t5, $a2, $t5 slli.d $t8, $t8, 2 - fldx.s $fa2, $t7, $t8 + fldx.s $fa4, $t7, $t8 slli.d $t6, $t6, 2 - fldx.s $fa3, $t5, $t6 - fcmp.clt.s $fcc0, $fa2, $fa3 - fsel $fa4, $fa3, $fa2, $fcc0 - fadd.s $fa2, $fa2, $fa3 - fcvt.d.s $fa2, $fa2 - pcalau12i $s2, %pc_hi20(.LCPI30_1) - fld.d $fa3, $s2, %pc_lo12(.LCPI30_1) - pcalau12i $s2, %pc_hi20(.LCPI30_2) - fld.d $fa5, $s2, %pc_lo12(.LCPI30_2) - fmul.d $fa2, $fa2, $fa1 - fmul.d $fa2, $fa2, $fa3 - fcvt.d.s $fa3, $fa4 - fmadd.d $fa2, $fa3, $fa5, $fa2 + fldx.s $fa5, $t5, $t6 + fcmp.clt.s $fcc0, $fa4, $fa5 + fsel $fa6, $fa5, $fa4, $fcc0 + fcvt.d.s $fa6, $fa6 + fadd.s $fa4, $fa4, $fa5 + fcvt.d.s $fa4, $fa4 + fmul.d $fa4, $fa4, $fa3 + fmul.d $fa4, $fa4, $fa0 + fmadd.d $fa4, $fa6, $fa1, $fa4 ld.d $s2, $t4, 0 - fcvt.s.d $fa2, $fa2 - fstx.s $fa2, $t7, $t8 + fcvt.s.d $fa4, $fa4 + fstx.s $fa4, $t7, $t8 stx.w $a5, $t5, $t6 stx.w $a5, $s2, $t1 .LBB30_40: # in Loop: Header=BB30_41 Depth=2 @@ -10266,12 +10231,7 @@ supg: # @supg .Lfunc_end30: .size supg, .Lfunc_end30-supg # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function spg -.LCPI31_0: - .word 0x461c3c00 # float 9999 - .text - .globl spg + .globl spg # -- Begin function spg .p2align 5 .type spg,@function spg: # @spg @@ -10422,8 +10382,9 @@ spg: # @spg move $a0, $zero addi.w $a1, $s1, -1 addi.w $a2, $zero, -1 - pcalau12i $a3, %pc_hi20(.LCPI31_0) - fld.s $fa0, $a3, %pc_lo12(.LCPI31_0) + lu12i.w $a3, 287171 + ori $a3, $a3, 3072 + movgr2fr.w $fa0, $a3 addi.d $a3, $sp, 8 vldi $vr1, -928 ori $a4, $zero, 0 @@ -11107,12 +11068,7 @@ countnode_int: # @countnode_int .Lfunc_end34: .size countnode_int, .Lfunc_end34-countnode_int # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function counteff_simple_float -.LCPI35_0: - .dword 0x3f50624dd2f1a9fc # double 0.001 - .text - .globl counteff_simple_float + .globl counteff_simple_float # -- Begin function counteff_simple_float .p2align 5 .type counteff_simple_float,@function counteff_simple_float: # @counteff_simple_float @@ -11174,10 +11130,11 @@ counteff_simple_float: # @counteff_simple_float ori $a1, $zero, 1 bne $fp, $a1, .LBB35_10 .LBB35_8: # %.lr.ph61.preheader - ori $a0, $zero, 4 - bgeu $fp, $a0, .LBB35_18 + ori $a1, $zero, 4 + lu12i.w $a0, -184550 + bgeu $fp, $a1, .LBB35_18 # %bb.9: - move $a0, $zero + move $a1, $zero b .LBB35_21 .LBB35_10: # %.preheader50.preheader move $a1, $zero @@ -11247,45 +11204,46 @@ counteff_simple_float: # @counteff_simple_float bgez $a5, .LBB35_17 b .LBB35_11 .LBB35_18: # %vector.ph95 - addi.d $a1, $s1, 16 - bstrpick.d $a0, $fp, 30, 2 - slli.d $a0, $a0, 2 - lu12i.w $a2, -184550 - ori $a2, $a2, 2556 - lu32i.d $a2, 25165 - lu52i.d $a2, $a2, 1013 - vreplgr2vr.d $vr0, $a2 - move $a2, $a0 + addi.d $a2, $s1, 16 + bstrpick.d $a1, $fp, 30, 2 + slli.d $a1, $a1, 2 + ori $a3, $a0, 2556 + lu32i.d $a3, 25165 + lu52i.d $a3, $a3, 1013 + vreplgr2vr.d $vr0, $a3 + move $a3, $a1 .p2align 4, , 16 .LBB35_19: # %vector.body98 # =>This Inner Loop Header: Depth=1 - vld $vr1, $a1, -16 - vld $vr2, $a1, 0 + vld $vr1, $a2, -16 + vld $vr2, $a2, 0 vfadd.d $vr1, $vr1, $vr0 vfadd.d $vr2, $vr2, $vr0 - vst $vr1, $a1, -16 - vst $vr2, $a1, 0 - addi.d $a2, $a2, -4 - addi.d $a1, $a1, 32 - bnez $a2, .LBB35_19 + vst $vr1, $a2, -16 + vst $vr2, $a2, 0 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 32 + bnez $a3, .LBB35_19 # %bb.20: # %middle.block102 - beq $a0, $fp, .LBB35_23 + beq $a1, $fp, .LBB35_23 .LBB35_21: # %.lr.ph61.preheader119 - pcalau12i $a1, %pc_hi20(counteff_simple_float.rootnode) - addi.d $a1, $a1, %pc_lo12(counteff_simple_float.rootnode) - pcalau12i $a2, %pc_hi20(.LCPI35_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI35_0) - alsl.d $a1, $a0, $a1, 3 - sub.d $a0, $fp, $a0 + pcalau12i $a2, %pc_hi20(counteff_simple_float.rootnode) + addi.d $a2, $a2, %pc_lo12(counteff_simple_float.rootnode) + alsl.d $a2, $a1, $a2, 3 + sub.d $a1, $fp, $a1 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa0, $a0 .p2align 4, , 16 .LBB35_22: # %.lr.ph61 # =>This Inner Loop Header: Depth=1 - fld.d $fa1, $a1, 0 + fld.d $fa1, $a2, 0 fadd.d $fa1, $fa1, $fa0 - fst.d $fa1, $a1, 0 - addi.d $a0, $a0, -1 - addi.d $a1, $a1, 8 - bnez $a0, .LBB35_22 + fst.d $fa1, $a2, 0 + addi.d $a1, $a1, -1 + addi.d $a2, $a2, 8 + bnez $a1, .LBB35_22 .LBB35_23: # %.lr.ph64.preheader movgr2fr.d $fa0, $zero move $a0, $fp @@ -11355,12 +11313,7 @@ counteff_simple_float: # @counteff_simple_float .Lfunc_end35: .size counteff_simple_float, .Lfunc_end35-counteff_simple_float # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function counteff_simple -.LCPI36_0: - .dword 0x3f50624dd2f1a9fc # double 0.001 - .text - .globl counteff_simple + .globl counteff_simple # -- Begin function counteff_simple .p2align 5 .type counteff_simple,@function counteff_simple: # @counteff_simple @@ -11422,10 +11375,11 @@ counteff_simple: # @counteff_simple ori $a1, $zero, 1 bne $fp, $a1, .LBB36_10 .LBB36_8: # %.lr.ph61.preheader - ori $a0, $zero, 4 - bgeu $fp, $a0, .LBB36_18 + ori $a1, $zero, 4 + lu12i.w $a0, -184550 + bgeu $fp, $a1, .LBB36_18 # %bb.9: - move $a0, $zero + move $a1, $zero b .LBB36_21 .LBB36_10: # %.preheader50.preheader move $a1, $zero @@ -11493,45 +11447,46 @@ counteff_simple: # @counteff_simple bgez $a5, .LBB36_17 b .LBB36_11 .LBB36_18: # %vector.ph95 - addi.d $a1, $s1, 16 - bstrpick.d $a0, $fp, 30, 2 - slli.d $a0, $a0, 2 - lu12i.w $a2, -184550 - ori $a2, $a2, 2556 - lu32i.d $a2, 25165 - lu52i.d $a2, $a2, 1013 - vreplgr2vr.d $vr0, $a2 - move $a2, $a0 + addi.d $a2, $s1, 16 + bstrpick.d $a1, $fp, 30, 2 + slli.d $a1, $a1, 2 + ori $a3, $a0, 2556 + lu32i.d $a3, 25165 + lu52i.d $a3, $a3, 1013 + vreplgr2vr.d $vr0, $a3 + move $a3, $a1 .p2align 4, , 16 .LBB36_19: # %vector.body98 # =>This Inner Loop Header: Depth=1 - vld $vr1, $a1, -16 - vld $vr2, $a1, 0 + vld $vr1, $a2, -16 + vld $vr2, $a2, 0 vfadd.d $vr1, $vr1, $vr0 vfadd.d $vr2, $vr2, $vr0 - vst $vr1, $a1, -16 - vst $vr2, $a1, 0 - addi.d $a2, $a2, -4 - addi.d $a1, $a1, 32 - bnez $a2, .LBB36_19 + vst $vr1, $a2, -16 + vst $vr2, $a2, 0 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 32 + bnez $a3, .LBB36_19 # %bb.20: # %middle.block102 - beq $a0, $fp, .LBB36_23 + beq $a1, $fp, .LBB36_23 .LBB36_21: # %.lr.ph61.preheader119 - pcalau12i $a1, %pc_hi20(counteff_simple.rootnode) - addi.d $a1, $a1, %pc_lo12(counteff_simple.rootnode) - pcalau12i $a2, %pc_hi20(.LCPI36_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI36_0) - alsl.d $a1, $a0, $a1, 3 - sub.d $a0, $fp, $a0 + pcalau12i $a2, %pc_hi20(counteff_simple.rootnode) + addi.d $a2, $a2, %pc_lo12(counteff_simple.rootnode) + alsl.d $a2, $a1, $a2, 3 + sub.d $a1, $fp, $a1 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa0, $a0 .p2align 4, , 16 .LBB36_22: # %.lr.ph61 # =>This Inner Loop Header: Depth=1 - fld.d $fa1, $a1, 0 + fld.d $fa1, $a2, 0 fadd.d $fa1, $fa1, $fa0 - fst.d $fa1, $a1, 0 - addi.d $a0, $a0, -1 - addi.d $a1, $a1, 8 - bnez $a0, .LBB36_22 + fst.d $fa1, $a2, 0 + addi.d $a1, $a1, -1 + addi.d $a2, $a2, 8 + bnez $a1, .LBB36_22 .LBB36_23: # %.lr.ph64.preheader movgr2fr.d $fa0, $zero move $a0, $fp @@ -11601,12 +11556,7 @@ counteff_simple: # @counteff_simple .Lfunc_end36: .size counteff_simple, .Lfunc_end36-counteff_simple # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function counteff -.LCPI37_0: - .dword 0x3f50624dd2f1a9fc # double 0.001 - .text - .globl counteff + .globl counteff # -- Begin function counteff .p2align 5 .type counteff,@function counteff: # @counteff @@ -11824,8 +11774,9 @@ counteff: # @counteff ori $a1, $zero, 1 bne $s0, $a1, .LBB37_53 .LBB37_31: # %.lr.ph226.preheader - ori $a1, $zero, 4 - bgeu $s0, $a1, .LBB37_61 + ori $a2, $zero, 4 + lu12i.w $a1, -184550 + bgeu $s0, $a2, .LBB37_61 # %bb.32: move $a0, $zero b .LBB37_64 @@ -12058,45 +12009,46 @@ counteff: # @counteff b .LBB37_54 .LBB37_61: # %vector.ph315 slli.d $a0, $a0, 2 - lu12i.w $a1, 97 - ori $a1, $a1, 2720 - add.d $a1, $sp, $a1 - lu12i.w $a2, -184550 - ori $a2, $a2, 2556 - lu32i.d $a2, 25165 - lu52i.d $a2, $a2, 1013 - vreplgr2vr.d $vr0, $a2 - move $a2, $a0 + lu12i.w $a2, 97 + ori $a2, $a2, 2720 + add.d $a2, $sp, $a2 + ori $a3, $a1, 2556 + lu32i.d $a3, 25165 + lu52i.d $a3, $a3, 1013 + vreplgr2vr.d $vr0, $a3 + move $a3, $a0 .p2align 4, , 16 .LBB37_62: # %vector.body318 # =>This Inner Loop Header: Depth=1 - vld $vr1, $a1, -16 - vld $vr2, $a1, 0 + vld $vr1, $a2, -16 + vld $vr2, $a2, 0 vfadd.d $vr1, $vr1, $vr0 vfadd.d $vr2, $vr2, $vr0 - vst $vr1, $a1, -16 - vst $vr2, $a1, 0 - addi.d $a2, $a2, -4 - addi.d $a1, $a1, 32 - bnez $a2, .LBB37_62 + vst $vr1, $a2, -16 + vst $vr2, $a2, 0 + addi.d $a3, $a3, -4 + addi.d $a2, $a2, 32 + bnez $a3, .LBB37_62 # %bb.63: # %middle.block322 beq $a0, $s0, .LBB37_66 .LBB37_64: # %.lr.ph226.preheader326 - pcalau12i $a1, %pc_hi20(.LCPI37_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI37_0) - lu12i.w $a1, 97 - ori $a1, $a1, 2704 - add.d $a1, $sp, $a1 - alsl.d $a1, $a0, $a1, 3 + lu12i.w $a2, 97 + ori $a2, $a2, 2704 + add.d $a2, $sp, $a2 + alsl.d $a2, $a0, $a2, 3 sub.d $a0, $s0, $a0 + ori $a1, $a1, 2556 + lu32i.d $a1, 25165 + lu52i.d $a1, $a1, 1013 + movgr2fr.d $fa0, $a1 .p2align 4, , 16 .LBB37_65: # %.lr.ph226 # =>This Inner Loop Header: Depth=1 - fld.d $fa1, $a1, 0 + fld.d $fa1, $a2, 0 fadd.d $fa1, $fa1, $fa0 - fst.d $fa1, $a1, 0 + fst.d $fa1, $a2, 0 addi.d $a0, $a0, -1 - addi.d $a1, $a1, 8 + addi.d $a2, $a2, 8 bnez $a0, .LBB37_65 .LBB37_66: # %.preheader.us.preheader move $a0, $zero @@ -12411,12 +12363,7 @@ substitution_score: # @substitution_score .Lfunc_end41: .size substitution_score, .Lfunc_end41-substitution_score # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function substitution_hosei -.LCPI42_0: - .dword 0x3fee666666666666 # double 0.94999999999999996 - .text - .globl substitution_hosei + .globl substitution_hosei # -- Begin function substitution_hosei .p2align 5 .type substitution_hosei,@function substitution_hosei: # @substitution_hosei @@ -12452,12 +12399,15 @@ substitution_hosei: # @substitution_hosei ffint.s.l $fa0, $fa0 bstrpick.d $a0, $a2, 31, 0 movgr2fr.d $fa1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI42_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI42_0) ffint.s.l $fa1, $fa1 fdiv.s $fa0, $fa0, $fa1 fcvt.d.s $fa0, $fa0 - fcmp.cule.d $fcc0, $fa2, $fa0 + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, -104858 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 + fcmp.cule.d $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB42_8 # %bb.7: vldi $vr1, -912 @@ -12542,12 +12492,7 @@ substitution: # @substitution .Lfunc_end43: .size substitution, .Lfunc_end43-substitution # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function treeconstruction -.LCPI44_0: - .dword 0x3fee666666666666 # double 0.94999999999999996 - .text - .globl treeconstruction + .globl treeconstruction # -- Begin function treeconstruction .p2align 5 .type treeconstruction,@function treeconstruction: # @treeconstruction @@ -12587,9 +12532,12 @@ treeconstruction: # @treeconstruction move $s6, $zero addi.w $a0, $s0, -1 st.d $a0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI44_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI44_0) ori $s2, $zero, 1 + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, -104858 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs0, $a0 ori $s7, $zero, 45 b .LBB44_5 .p2align 4, , 16 @@ -12770,12 +12718,7 @@ treeconstruction: # @treeconstruction .Lfunc_end44: .size treeconstruction, .Lfunc_end44-treeconstruction # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function bscore_calc -.LCPI45_0: - .dword 0x4079000000000000 # double 400 - .text - .globl bscore_calc + .globl bscore_calc # -- Begin function bscore_calc .p2align 5 .type bscore_calc,@function bscore_calc: # @bscore_calc @@ -12935,13 +12878,15 @@ bscore_calc: # @bscore_calc ld.d $a0, $a0, %got_pc_lo12(scoremtx) ld.w $a0, $a0, 0 sltui $a0, $a0, 1 - pcalau12i $a1, %pc_hi20(.LCPI45_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI45_0) - vldi $vr2, -912 - movgr2fr.d $fa3, $zero + vldi $vr1, -912 + movgr2fr.d $fa2, $zero movgr2cf $fcc0, $a0 - fsel $fa2, $fa3, $fa2, $fcc0 - fmadd.d $fa0, $fa2, $fa1, $fa0 + fsel $fa1, $fa2, $fa1, $fcc0 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa2, $fa0 fcvt.s.d $fa0, $fa0 ld.d $s8, $sp, 8 # 8-byte Folded Reload ld.d $s7, $sp, 16 # 8-byte Folded Reload @@ -13626,14 +13571,7 @@ isaligned: # @isaligned .Lfunc_end54: .size isaligned, .Lfunc_end54-isaligned # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function score_calc_for_score -.LCPI55_0: - .dword 0x7ff8000000000000 # double NaN -.LCPI55_1: - .dword 0x4079000000000000 # double 400 - .text - .globl score_calc_for_score + .globl score_calc_for_score # -- Begin function score_calc_for_score .p2align 5 .type score_calc_for_score,@function score_calc_for_score: # @score_calc_for_score @@ -13783,8 +13721,10 @@ score_calc_for_score: # @score_calc_for_score movgr2fr.d $fs0, $zero b .LBB55_19 .LBB55_18: - pcalau12i $a0, %pc_hi20(.LCPI55_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI55_0) + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 2047 + movgr2fr.d $fs0, $a0 .LBB55_19: # %._crit_edge pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) @@ -13799,11 +13739,13 @@ score_calc_for_score: # @score_calc_for_score vldi $vr1, -784 fadd.d $fa1, $fa0, $fa1 fmul.d $fa0, $fa1, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI55_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI55_1) - vldi $vr2, -928 - fmul.d $fa0, $fa0, $fa2 + vldi $vr1, -928 + fmul.d $fa0, $fa0, $fa1 fdiv.d $fa0, $fs0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa1, $a0 fadd.d $fa0, $fa0, $fa1 fld.d $fs0, $sp, 8 # 8-byte Folded Reload ld.d $s1, $sp, 16 # 8-byte Folded Reload @@ -13871,12 +13813,7 @@ floatncpy: # @floatncpy .Lfunc_end56: .size floatncpy, .Lfunc_end56-floatncpy # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function score_calc_a -.LCPI57_0: - .dword 0x4079000000000000 # double 400 - .text - .globl score_calc_a + .globl score_calc_a # -- Begin function score_calc_a .p2align 5 .type score_calc_a,@function score_calc_a: # @score_calc_a @@ -14023,13 +13960,15 @@ score_calc_a: # @score_calc_a ld.d $a0, $a0, %got_pc_lo12(scoremtx) ld.w $a0, $a0, 0 sltui $a0, $a0, 1 - pcalau12i $a1, %pc_hi20(.LCPI57_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI57_0) - vldi $vr2, -912 - movgr2fr.d $fa3, $zero + vldi $vr1, -912 + movgr2fr.d $fa2, $zero movgr2cf $fcc0, $a0 - fsel $fa2, $fa3, $fa2, $fcc0 - fmadd.d $fa0, $fa2, $fa1, $fa0 + fsel $fa1, $fa2, $fa1, $fcc0 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa2, $fa0 fcvt.s.d $fa0, $fa0 ld.d $s8, $sp, 8 # 8-byte Folded Reload ld.d $s7, $sp, 16 # 8-byte Folded Reload @@ -14047,12 +13986,7 @@ score_calc_a: # @score_calc_a .Lfunc_end57: .size score_calc_a, .Lfunc_end57-score_calc_a # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function score_calc_s -.LCPI58_0: - .word 0x43c80000 # float 400 - .text - .globl score_calc_s + .globl score_calc_s # -- Begin function score_calc_s .p2align 5 .type score_calc_s,@function score_calc_s: # @score_calc_s @@ -14184,10 +14118,10 @@ score_calc_s: # @score_calc_s .LBB58_9: fmov.s $fa0, $fa1 .LBB58_10: # %._crit_edge - pcalau12i $a0, %pc_hi20(.LCPI58_0) - fld.s $fa2, $a0, %pc_lo12(.LCPI58_0) fdiv.s $fa0, $fa0, $fa1 - fadd.s $fa0, $fa0, $fa2 + lu12i.w $a0, 277632 + movgr2fr.w $fa1, $a0 + fadd.s $fa0, $fa0, $fa1 ld.d $s6, $sp, 8 # 8-byte Folded Reload ld.d $s5, $sp, 16 # 8-byte Folded Reload ld.d $s4, $sp, 24 # 8-byte Folded Reload @@ -14202,12 +14136,7 @@ score_calc_s: # @score_calc_s .Lfunc_end58: .size score_calc_s, .Lfunc_end58-score_calc_s # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function score_calc_for_score_s -.LCPI59_0: - .dword 0x4079000000000000 # double 400 - .text - .globl score_calc_for_score_s + .globl score_calc_for_score_s # -- Begin function score_calc_for_score_s .p2align 5 .type score_calc_for_score_s,@function score_calc_for_score_s: # @score_calc_for_score_s @@ -14334,10 +14263,12 @@ score_calc_for_score_s: # @score_calc_for_score_s .LBB59_9: fmov.d $fa1, $fa0 .LBB59_10: # %._crit_edge - pcalau12i $a0, %pc_hi20(.LCPI59_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI59_0) fdiv.d $fa0, $fa1, $fa0 - fadd.d $fa0, $fa0, $fa2 + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa1, $a0 + fadd.d $fa0, $fa0, $fa1 ld.d $s4, $sp, 8 # 8-byte Folded Reload ld.d $s3, $sp, 16 # 8-byte Folded Reload ld.d $s2, $sp, 24 # 8-byte Folded Reload @@ -14777,14 +14708,7 @@ DSPscore: # @DSPscore .Lfunc_end62: .size DSPscore, .Lfunc_end62-DSPscore # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function searchAnchors -.LCPI63_0: - .dword 0x4059000000000000 # double 100 -.LCPI63_1: - .dword 0x4082c00000000000 # double 600 - .text - .globl searchAnchors + .globl searchAnchors # -- Begin function searchAnchors .p2align 5 .type searchAnchors,@function searchAnchors: # @searchAnchors @@ -14827,13 +14751,17 @@ searchAnchors: # @searchAnchors pcalau12i $a0, %got_pc_hi20(divThreshold) ld.d $a0, $a0, %got_pc_lo12(divThreshold) ld.w $a0, $a0, 0 - pcalau12i $a1, %pc_hi20(.LCPI63_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI63_0) - pcalau12i $a1, %pc_hi20(.LCPI63_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI63_1) - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fdiv.d $fa0, $fa2, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fdiv.d $fa0, $fa0, $fa1 + lu32i.d $a0, 180224 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 pcalau12i $a0, %got_pc_hi20(divWinSize) ld.d $a0, $a0, %got_pc_lo12(divWinSize) @@ -15225,12 +15153,7 @@ dontcalcimportance: # @dontcalcimportance .Lfunc_end64: .size dontcalcimportance, .Lfunc_end64-dontcalcimportance # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function calcimportance -.LCPI65_0: - .dword 0x7ff8000000000000 # double NaN - .text - .globl calcimportance + .globl calcimportance # -- Begin function calcimportance .p2align 5 .type calcimportance,@function calcimportance: # @calcimportance @@ -15316,9 +15239,11 @@ calcimportance: # @calcimportance slli.d $s3, $a0, 3 addi.d $s6, $s2, 8 addi.d $s7, $s2, 16 - pcalau12i $a0, %pc_hi20(.LCPI65_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI65_0) vldi $vr3, -784 + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 2047 + movgr2fr.d $fs0, $a0 movgr2fr.d $fs1, $zero ori $s8, $zero, 5 b .LBB65_11 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/pairlocalalign.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/pairlocalalign.s index 108f5754..c4918ad3 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/pairlocalalign.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/pairlocalalign.s @@ -1,12 +1,6 @@ .file "pairlocalalign.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function arguments -.LCPI0_0: - .dword 0x408f400000000000 # double 1000 -.LCPI0_1: - .dword 0x3fe3333333333333 # double 0.59999999999999998 .text - .globl arguments + .globl arguments # -- Begin function arguments .p2align 5 .type arguments,@function arguments: # @arguments @@ -376,10 +370,12 @@ arguments: # @arguments ld.d $t1, $sp, 192 # 8-byte Folded Reload ld.d $t0, $sp, 200 # 8-byte Folded Reload ld.d $a7, $sp, 208 # 8-byte Folded Reload - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) - vldi $vr2, -800 - fmadd.d $fa0, $fa0, $fa1, $fa2 + vldi $vr1, -800 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 ld.d $a1, $sp, 72 # 8-byte Folded Reload @@ -398,10 +394,12 @@ arguments: # @arguments ld.d $t1, $sp, 192 # 8-byte Folded Reload ld.d $t0, $sp, 200 # 8-byte Folded Reload ld.d $a7, $sp, 208 # 8-byte Folded Reload - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) - vldi $vr2, -800 - fmadd.d $fa0, $fa0, $fa1, $fa2 + vldi $vr1, -800 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 ld.d $a1, $sp, 104 # 8-byte Folded Reload @@ -437,20 +435,25 @@ arguments: # @arguments ld.d $t1, $sp, 192 # 8-byte Folded Reload ld.d $t0, $sp, 200 # 8-byte Folded Reload ld.d $a7, $sp, 208 # 8-byte Folded Reload - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) - vldi $vr2, -928 - fmadd.d $fa0, $fa0, $fa1, $fa2 + vldi $vr1, -928 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 pcalau12i $a1, %got_pc_hi20(ppslocal) ld.d $a1, $a1, %got_pc_lo12(ppslocal) - pcalau12i $a2, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI0_1) st.w $a0, $a1, 0 - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmadd.d $fa0, $fa1, $fa0, $fa2 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 209715 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 ld.d $a1, $sp, 112 # 8-byte Folded Reload @@ -469,10 +472,12 @@ arguments: # @arguments ld.d $t1, $sp, 192 # 8-byte Folded Reload ld.d $t0, $sp, 200 # 8-byte Folded Reload ld.d $a7, $sp, 208 # 8-byte Folded Reload - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) - vldi $vr2, -800 - fmadd.d $fa0, $fa0, $fa1, $fa2 + vldi $vr1, -800 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 ld.d $a1, $sp, 96 # 8-byte Folded Reload @@ -491,10 +496,12 @@ arguments: # @arguments ld.d $t1, $sp, 192 # 8-byte Folded Reload ld.d $t0, $sp, 200 # 8-byte Folded Reload ld.d $a7, $sp, 208 # 8-byte Folded Reload - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) - vldi $vr2, -800 - fmadd.d $fa0, $fa0, $fa1, $fa2 + vldi $vr1, -800 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 ld.d $a1, $sp, 88 # 8-byte Folded Reload @@ -530,10 +537,12 @@ arguments: # @arguments ld.d $t1, $sp, 192 # 8-byte Folded Reload ld.d $t0, $sp, 200 # 8-byte Folded Reload ld.d $a7, $sp, 208 # 8-byte Folded Reload - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) - vldi $vr2, -800 - fmadd.d $fa0, $fa0, $fa1, $fa2 + vldi $vr1, -800 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa2, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 ld.d $a1, $sp, 80 # 8-byte Folded Reload @@ -975,14 +984,7 @@ countamino: # @countamino .Lfunc_end1: .size countamino, .Lfunc_end1-countamino # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI2_0: - .dword 0x408f400000000000 # double 1000 -.LCPI2_1: - .dword 0xc0c3878000000000 # double -9999 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -1168,26 +1170,28 @@ main: # @main pcalau12i $a1, %got_pc_hi20(ppenalty) ld.d $s7, $a1, %got_pc_lo12(ppenalty) ld.w $a1, $s7, 0 - pcalau12i $a2, %pc_hi20(.LCPI2_0) - fld.d $fs0, $a2, %pc_lo12(.LCPI2_0) movgr2fr.w $fa0, $a1 ffint.d.w $fa0, $fa0 - fdiv.d $fa0, $fa0, $fs0 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa1, $a1 + fdiv.d $fa0, $fa0, $fa1 pcalau12i $a1, %got_pc_hi20(ppenalty_ex) ld.d $s0, $a1, %got_pc_lo12(ppenalty_ex) ld.w $a1, $s0, 0 - movgr2fr.w $fa1, $a1 - ffint.d.w $fa1, $fa1 - fdiv.d $fa1, $fa1, $fs0 + movgr2fr.w $fa2, $a1 + ffint.d.w $fa2, $fa2 + fdiv.d $fa2, $fa2, $fa1 pcalau12i $a1, %got_pc_hi20(poffset) ld.d $s2, $a1, %got_pc_lo12(poffset) ld.w $a1, $s2, 0 - movgr2fr.w $fa2, $a1 - ffint.d.w $fa2, $fa2 - fdiv.d $fa2, $fa2, $fs0 + movgr2fr.w $fa3, $a1 + ffint.d.w $fa3, $fa3 + fdiv.d $fa1, $fa3, $fa1 movfr2gr.d $a2, $fa0 - movfr2gr.d $a3, $fa1 - movfr2gr.d $a4, $fa2 + movfr2gr.d $a3, $fa2 + movfr2gr.d $a4, $fa1 pcalau12i $a1, %pc_hi20(.L.str.26) addi.d $a1, $a1, %pc_lo12(.L.str.26) pcaddu18i $ra, %call36(fprintf) @@ -1300,19 +1304,23 @@ main: # @main .LBB2_32: ld.w $a0, $s7, 0 movgr2fr.w $fa0, $a0 - ld.w $a0, $s0, 0 ffint.d.w $fa0, $fa0 - fdiv.d $fa0, $fa0, $fs0 - ld.w $a1, $s2, 0 - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fdiv.d $fa1, $fa1, $fs0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + ld.w $a1, $s0, 0 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.w $a0, $s2, 0 movgr2fr.w $fa2, $a1 ffint.d.w $fa2, $fa2 - fdiv.d $fa2, $fa2, $fs0 + fdiv.d $fa2, $fa2, $fa1 + movgr2fr.w $fa3, $a0 + ffint.d.w $fa3, $fa3 + fdiv.d $fa1, $fa3, $fa1 movfr2gr.d $a2, $fa0 - movfr2gr.d $a3, $fa1 - movfr2gr.d $a4, $fa2 + movfr2gr.d $a3, $fa2 + movfr2gr.d $a4, $fa1 pcalau12i $a0, %pc_hi20(.L.str.26) addi.d $a1, $a0, %pc_lo12(.L.str.26) move $a0, $fp @@ -2710,9 +2718,11 @@ main: # @main .LBB2_196: # %.lr.ph62.i move $a2, $zero addi.w $a3, $s7, -1 - pcalau12i $a4, %pc_hi20(.LCPI2_1) - fld.d $fa0, $a4, %pc_lo12(.LCPI2_1) ori $a4, $zero, 8 + ori $a5, $zero, 0 + lu32i.d $a5, 231296 + lu52i.d $a5, $a5, -1012 + movgr2fr.d $fa0, $a5 movgr2fr.d $fa1, $zero vldi $vr2, -912 b .LBB2_198 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partQalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partQalignmm.s index 39935d0d..fdac3978 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partQalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partQalignmm.s @@ -963,60 +963,53 @@ part_imp_match_initQ: # @part_imp_match_initQ .Lfunc_end3: .size part_imp_match_initQ, .Lfunc_end3-part_imp_match_initQ # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function partQ__align -.LCPI4_0: - .dword 0x3ff4cccccccccccd # double 1.3 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI4_1: + .p2align 4, 0x0 # -- Begin function partQ__align +.LCPI4_0: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI4_3: +.LCPI4_1: .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI4_4: +.LCPI4_2: .dword 1 # 0x1 .dword 2 # 0x2 -.LCPI4_5: +.LCPI4_3: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI4_6: +.LCPI4_4: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI4_2: - .word 0x461c4000 # float 1.0E+4 .text .globl partQ__align .p2align 5 .type partQ__align,@function partQ__align: # @partQ__align # %bb.0: - addi.d $sp, $sp, -512 - st.d $ra, $sp, 504 # 8-byte Folded Spill - st.d $fp, $sp, 496 # 8-byte Folded Spill - st.d $s0, $sp, 488 # 8-byte Folded Spill - st.d $s1, $sp, 480 # 8-byte Folded Spill - st.d $s2, $sp, 472 # 8-byte Folded Spill - st.d $s3, $sp, 464 # 8-byte Folded Spill - st.d $s4, $sp, 456 # 8-byte Folded Spill - st.d $s5, $sp, 448 # 8-byte Folded Spill - st.d $s6, $sp, 440 # 8-byte Folded Spill - st.d $s7, $sp, 432 # 8-byte Folded Spill - st.d $s8, $sp, 424 # 8-byte Folded Spill - fst.d $fs0, $sp, 416 # 8-byte Folded Spill - st.d $a7, $sp, 96 # 8-byte Folded Spill - st.d $a6, $sp, 32 # 8-byte Folded Spill + addi.d $sp, $sp, -528 + st.d $ra, $sp, 520 # 8-byte Folded Spill + st.d $fp, $sp, 512 # 8-byte Folded Spill + st.d $s0, $sp, 504 # 8-byte Folded Spill + st.d $s1, $sp, 496 # 8-byte Folded Spill + st.d $s2, $sp, 488 # 8-byte Folded Spill + st.d $s3, $sp, 480 # 8-byte Folded Spill + st.d $s4, $sp, 472 # 8-byte Folded Spill + st.d $s5, $sp, 464 # 8-byte Folded Spill + st.d $s6, $sp, 456 # 8-byte Folded Spill + st.d $s7, $sp, 448 # 8-byte Folded Spill + st.d $s8, $sp, 440 # 8-byte Folded Spill + fst.d $fs0, $sp, 432 # 8-byte Folded Spill + fst.d $fs1, $sp, 424 # 8-byte Folded Spill + st.d $a7, $sp, 104 # 8-byte Folded Spill + st.d $a6, $sp, 40 # 8-byte Folded Spill move $s8, $a5 - st.d $a4, $sp, 400 # 8-byte Folded Spill + st.d $a4, $sp, 408 # 8-byte Folded Spill st.d $a3, $sp, 224 # 8-byte Folded Spill - st.d $a2, $sp, 328 # 8-byte Folded Spill + st.d $a2, $sp, 336 # 8-byte Folded Spill move $s1, $a1 move $s2, $a0 pcalau12i $a0, %got_pc_hi20(penalty) @@ -1025,11 +1018,11 @@ partQ__align: # @partQ__align st.d $a1, $sp, 216 # 8-byte Folded Spill ld.w $s7, $a1, %pc_lo12(partQ__align.orlgth1) ld.w $a0, $a0, 0 - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.mseq1) - st.d $a0, $sp, 104 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(partQ__align.mseq2) st.d $a0, $sp, 112 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(partQ__align.mseq2) + st.d $a0, $sp, 120 # 8-byte Folded Spill bnez $s7, .LBB4_2 # %bb.1: pcalau12i $a0, %got_pc_hi20(njob) @@ -1039,7 +1032,7 @@ partQ__align: # @partQ__align pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 ld.w $a1, $fp, 0 - ld.d $a2, $sp, 104 # 8-byte Folded Reload + ld.d $a2, $sp, 112 # 8-byte Folded Reload st.d $a0, $a2, %pc_lo12(partQ__align.mseq1) move $a0, $a1 move $a1, $zero @@ -1047,7 +1040,7 @@ partQ__align: # @partQ__align jirl $ra, $ra, 0 ld.d $a1, $sp, 216 # 8-byte Folded Reload ld.w $s7, $a1, %pc_lo12(partQ__align.orlgth1) - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.mseq2) .LBB4_2: ld.d $a0, $s2, 0 @@ -1066,79 +1059,79 @@ partQ__align: # @partQ__align addi.w $a1, $s5, 0 pcalau12i $s6, %pc_hi20(partQ__align.w1) pcalau12i $a0, %pc_hi20(partQ__align.w2) - st.d $a0, $sp, 312 # 8-byte Folded Spill + st.d $a0, $sp, 320 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.initverticalw) - st.d $a0, $sp, 304 # 8-byte Folded Spill + st.d $a0, $sp, 312 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.lastverticalw) - st.d $a0, $sp, 128 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.m) - st.d $a0, $sp, 320 # 8-byte Folded Spill + st.d $a0, $sp, 328 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.mp) - st.d $a0, $sp, 56 # 8-byte Folded Spill + st.d $a0, $sp, 64 # 8-byte Folded Spill pcalau12i $s4, %pc_hi20(partQ__align.mseq) pcalau12i $a0, %pc_hi20(partQ__align.digf1) - st.d $a0, $sp, 360 # 8-byte Folded Spill + st.d $a0, $sp, 368 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.digf2) - st.d $a0, $sp, 336 # 8-byte Folded Spill + st.d $a0, $sp, 344 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.diaf1) st.d $a0, $sp, 232 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.diaf2) st.d $a0, $sp, 240 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.gapz1) - st.d $a0, $sp, 384 # 8-byte Folded Spill + st.d $a0, $sp, 392 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.gapz2) - st.d $a0, $sp, 344 # 8-byte Folded Spill + st.d $a0, $sp, 352 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.gapf1) - st.d $a0, $sp, 264 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(partQ__align.gapf2) st.d $a0, $sp, 272 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(partQ__align.gapf2) + st.d $a0, $sp, 280 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.ogcp1g) - st.d $a0, $sp, 376 # 8-byte Folded Spill + st.d $a0, $sp, 384 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.ogcp2g) - st.d $a0, $sp, 200 # 8-byte Folded Spill + st.d $a0, $sp, 208 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.fgcp1g) - st.d $a0, $sp, 288 # 8-byte Folded Spill + st.d $a0, $sp, 296 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.fgcp2g) - st.d $a0, $sp, 192 # 8-byte Folded Spill + st.d $a0, $sp, 200 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.og_h_dg_n1_p) - st.d $a0, $sp, 184 # 8-byte Folded Spill + st.d $a0, $sp, 192 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.og_h_dg_n2_p) - st.d $a0, $sp, 280 # 8-byte Folded Spill + st.d $a0, $sp, 288 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.fg_h_dg_n1_p) - st.d $a0, $sp, 176 # 8-byte Folded Spill + st.d $a0, $sp, 184 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.fg_h_dg_n2_p) - st.d $a0, $sp, 168 # 8-byte Folded Spill + st.d $a0, $sp, 176 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.og_t_fg_h_dg_n1_p) - st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $a0, $sp, 168 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.og_t_fg_h_dg_n2_p) - st.d $a0, $sp, 80 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(partQ__align.fg_t_og_h_dg_n1_p) st.d $a0, $sp, 88 # 8-byte Folded Spill + pcalau12i $a0, %pc_hi20(partQ__align.fg_t_og_h_dg_n1_p) + st.d $a0, $sp, 96 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.fg_t_og_h_dg_n2_p) - st.d $a0, $sp, 64 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.gapz_n1) - st.d $a0, $sp, 152 # 8-byte Folded Spill + st.d $a0, $sp, 160 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.gapz_n2) - st.d $a0, $sp, 72 # 8-byte Folded Spill + st.d $a0, $sp, 80 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.cpmx1) - st.d $a0, $sp, 392 # 8-byte Folded Spill + st.d $a0, $sp, 400 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.cpmx2) - st.d $a0, $sp, 352 # 8-byte Folded Spill + st.d $a0, $sp, 360 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.floatwork) - st.d $a0, $sp, 296 # 8-byte Folded Spill + st.d $a0, $sp, 304 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(partQ__align.intwork) - st.d $a0, $sp, 256 # 8-byte Folded Spill - st.d $s1, $sp, 136 # 8-byte Folded Spill - st.d $s3, $sp, 144 # 8-byte Folded Spill - st.d $s5, $sp, 48 # 8-byte Folded Spill - st.d $a1, $sp, 368 # 8-byte Folded Spill - st.d $fp, $sp, 120 # 8-byte Folded Spill - st.d $s6, $sp, 208 # 8-byte Folded Spill + st.d $a0, $sp, 264 # 8-byte Folded Spill + st.d $s1, $sp, 144 # 8-byte Folded Spill + st.d $s3, $sp, 152 # 8-byte Folded Spill + st.d $s5, $sp, 56 # 8-byte Folded Spill + st.d $a1, $sp, 376 # 8-byte Folded Spill + st.d $fp, $sp, 128 # 8-byte Folded Spill + st.d $s6, $sp, 32 # 8-byte Folded Spill blt $s7, $s3, .LBB4_4 # %bb.3: bge $s0, $a1, .LBB4_8 .LBB4_4: - st.d $s4, $sp, 408 # 8-byte Folded Spill + st.d $s4, $sp, 416 # 8-byte Folded Spill pcalau12i $s3, %pc_hi20(partQ__align.match) blez $s7, .LBB4_7 # %bb.5: @@ -1147,38 +1140,38 @@ partQ__align: # @partQ__align ld.d $a0, $s6, %pc_lo12(partQ__align.w1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 312 # 8-byte Folded Reload + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.w2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a0, $s3, %pc_lo12(partQ__align.match) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $a0, $sp, 312 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.initverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.lastverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 320 # 8-byte Folded Reload + ld.d $a0, $sp, 328 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.m) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.mp) pcaddu18i $ra, %call36(FreeIntVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 408 # 8-byte Folded Reload + ld.d $a0, $sp, 416 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.mseq) pcaddu18i $ra, %call36(FreeCharMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.digf1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 344 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.digf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 @@ -1190,91 +1183,91 @@ partQ__align: # @partQ__align ld.d $a0, $a0, %pc_lo12(partQ__align.diaf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapz1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapz2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 264 # 8-byte Folded Reload + ld.d $a0, $sp, 272 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapf1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 280 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapf2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.ogcp1g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 200 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.ogcp2g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 288 # 8-byte Folded Reload + ld.d $a0, $sp, 296 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.fgcp1g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.fgcp2g) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.og_h_dg_n1_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 280 # 8-byte Folded Reload + ld.d $a0, $sp, 288 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.og_h_dg_n2_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.d $a0, $sp, 184 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.fg_h_dg_n1_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 168 # 8-byte Folded Reload + ld.d $a0, $sp, 176 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.fg_h_dg_n2_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.og_t_fg_h_dg_n1_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a0, $sp, 88 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.og_t_fg_h_dg_n2_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.fg_t_og_h_dg_n1_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.fg_t_og_h_dg_n2_p) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapz_n1) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapz_n2) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $sp, 392 # 8-byte Folded Reload + ld.d $a0, $sp, 400 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.cpmx1) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.cpmx2) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 296 # 8-byte Folded Reload + ld.d $a0, $sp, 304 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.floatwork) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 - ld.d $a0, $sp, 256 # 8-byte Folded Reload + ld.d $a0, $sp, 264 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.intwork) pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 @@ -1283,21 +1276,24 @@ partQ__align: # @partQ__align ld.d $a0, $sp, 24 # 8-byte Folded Reload ld.w $s0, $a0, %pc_lo12(partQ__align.orlgth2) .LBB4_7: - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI4_0) - movgr2fr.w $fa1, $fp - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + movgr2fr.w $fa0, $fp + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, 314572 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 slt $a1, $a0, $s7 masknez $a0, $a0, $a1 maskeqz $a1, $s7, $a1 or $s7, $a1, $a0 addi.w $fp, $s7, 100 - movgr2fr.w $fa1, $s5 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $s5 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 slt $a1, $a0, $s0 @@ -1313,7 +1309,7 @@ partQ__align: # @partQ__align move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 312 # 8-byte Folded Reload + ld.d $a1, $sp, 320 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.w2) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -1323,22 +1319,22 @@ partQ__align: # @partQ__align move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 304 # 8-byte Folded Reload + ld.d $a1, $sp, 312 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.initverticalw) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 128 # 8-byte Folded Reload + ld.d $a1, $sp, 136 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.lastverticalw) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 320 # 8-byte Folded Reload + ld.d $a1, $sp, 328 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.m) move $a0, $s5 pcaddu18i $ra, %call36(AllocateIntVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 56 # 8-byte Folded Reload + ld.d $a1, $sp, 64 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.mp) pcalau12i $a0, %got_pc_hi20(njob) ld.d $a0, $a0, %got_pc_lo12(njob) @@ -1346,17 +1342,17 @@ partQ__align: # @partQ__align add.w $a1, $s1, $fp pcaddu18i $ra, %call36(AllocateCharMtx) jirl $ra, $ra, 0 - ld.d $s4, $sp, 408 # 8-byte Folded Reload + ld.d $s4, $sp, 416 # 8-byte Folded Reload st.d $a0, $s4, %pc_lo12(partQ__align.mseq) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 360 # 8-byte Folded Reload + ld.d $a1, $sp, 368 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.digf1) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 336 # 8-byte Folded Reload + ld.d $a1, $sp, 344 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.digf2) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) @@ -1371,104 +1367,104 @@ partQ__align: # @partQ__align move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 384 # 8-byte Folded Reload + ld.d $a1, $sp, 392 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.gapz1) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 344 # 8-byte Folded Reload + ld.d $a1, $sp, 352 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.gapz2) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 264 # 8-byte Folded Reload + ld.d $a1, $sp, 272 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.gapf1) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 272 # 8-byte Folded Reload + ld.d $a1, $sp, 280 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.gapf2) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 376 # 8-byte Folded Reload + ld.d $a1, $sp, 384 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.ogcp1g) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 200 # 8-byte Folded Reload + ld.d $a1, $sp, 208 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.ogcp2g) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 288 # 8-byte Folded Reload + ld.d $a1, $sp, 296 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.fgcp1g) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 192 # 8-byte Folded Reload + ld.d $a1, $sp, 200 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.fgcp2g) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 184 # 8-byte Folded Reload + ld.d $a1, $sp, 192 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.og_h_dg_n1_p) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 280 # 8-byte Folded Reload + ld.d $a1, $sp, 288 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.og_h_dg_n2_p) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 176 # 8-byte Folded Reload + ld.d $a1, $sp, 184 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.fg_h_dg_n1_p) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 168 # 8-byte Folded Reload + ld.d $a1, $sp, 176 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.fg_h_dg_n2_p) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 168 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.og_t_fg_h_dg_n1_p) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 80 # 8-byte Folded Reload + ld.d $a1, $sp, 88 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.og_t_fg_h_dg_n2_p) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.fg_t_og_h_dg_n1_p) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 64 # 8-byte Folded Reload + ld.d $a1, $sp, 72 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.fg_t_og_h_dg_n2_p) move $a0, $s6 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 152 # 8-byte Folded Reload + ld.d $a1, $sp, 160 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.gapz_n1) move $a0, $s5 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - ld.d $a1, $sp, 72 # 8-byte Folded Reload + ld.d $a1, $sp, 80 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.gapz_n2) ori $a0, $zero, 26 move $a1, $s6 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 392 # 8-byte Folded Reload + ld.d $a1, $sp, 400 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.cpmx1) ori $a0, $zero, 26 move $a1, $s5 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 352 # 8-byte Folded Reload + ld.d $a1, $sp, 360 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.cpmx2) slt $a0, $s1, $fp masknez $a1, $s1, $a0 @@ -1479,25 +1475,25 @@ partQ__align: # @partQ__align move $a0, $s3 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 296 # 8-byte Folded Reload + ld.d $a1, $sp, 304 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.floatwork) ori $a1, $zero, 26 move $a0, $s3 pcaddu18i $ra, %call36(AllocateIntMtx) jirl $ra, $ra, 0 - ld.d $a1, $sp, 256 # 8-byte Folded Reload + ld.d $a1, $sp, 264 # 8-byte Folded Reload st.d $a0, $a1, %pc_lo12(partQ__align.intwork) ld.d $a0, $sp, 216 # 8-byte Folded Reload st.w $s7, $a0, %pc_lo12(partQ__align.orlgth1) ld.d $a0, $sp, 24 # 8-byte Folded Reload st.w $s0, $a0, %pc_lo12(partQ__align.orlgth2) .LBB4_8: - ld.d $a4, $sp, 400 # 8-byte Folded Reload + ld.d $a4, $sp, 408 # 8-byte Folded Reload ld.d $s3, $sp, 224 # 8-byte Folded Reload blez $a4, .LBB4_16 # %bb.9: # %.lr.ph ld.d $a0, $s4, %pc_lo12(partQ__align.mseq) - ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 112 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(partQ__align.mseq1) ori $a3, $zero, 4 move $a2, $zero @@ -1542,7 +1538,7 @@ partQ__align: # @partQ__align blez $s8, .LBB4_22 # %bb.17: # %.lr.ph647 ld.d $a0, $s4, %pc_lo12(partQ__align.mseq) - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(partQ__align.mseq2) ori $a2, $zero, 8 bltu $s8, $a2, .LBB4_19 @@ -1569,14 +1565,14 @@ partQ__align: # @partQ__align addi.d $a1, $a1, 8 bnez $a2, .LBB4_21 .LBB4_22: # %._crit_edge - ld.d $s4, $sp, 568 + ld.d $s4, $sp, 584 pcalau12i $a0, %got_pc_hi20(commonAlloc1) ld.d $fp, $a0, %got_pc_lo12(commonAlloc1) ld.w $a0, $fp, 0 pcalau12i $a1, %got_pc_hi20(commonAlloc2) ld.d $s1, $a1, %got_pc_lo12(commonAlloc2) ld.w $a1, $s1, 0 - st.d $s2, $sp, 408 # 8-byte Folded Spill + st.d $s2, $sp, 416 # 8-byte Folded Spill blt $a0, $s7, .LBB4_25 # %bb.23: # %._crit_edge blt $a1, $s0, .LBB4_25 @@ -1619,111 +1615,115 @@ partQ__align: # @partQ__align st.d $a0, $a1, 0 st.w $s2, $fp, 0 st.w $s0, $s1, 0 - ld.d $a4, $sp, 400 # 8-byte Folded Reload - ld.d $s2, $sp, 408 # 8-byte Folded Reload + ld.d $a4, $sp, 408 # 8-byte Folded Reload + ld.d $s2, $sp, 416 # 8-byte Folded Reload .LBB4_29: - ld.d $a2, $sp, 40 # 8-byte Folded Reload - ld.d $a1, $sp, 392 # 8-byte Folded Reload + ld.d $a2, $sp, 48 # 8-byte Folded Reload + ld.d $a1, $sp, 400 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(partQ__align.cpmx1) movgr2fr.w $fs0, $a2 pcalau12i $a2, %pc_hi20(partQ__align.ijp) - st.d $a2, $sp, 40 # 8-byte Folded Spill + st.d $a2, $sp, 48 # 8-byte Folded Spill st.d $a0, $a2, %pc_lo12(partQ__align.ijp) move $a0, $s2 - ld.d $a2, $sp, 328 # 8-byte Folded Reload - ld.d $s6, $sp, 144 # 8-byte Folded Reload + ld.d $a2, $sp, 336 # 8-byte Folded Reload + ld.d $s6, $sp, 152 # 8-byte Folded Reload move $a3, $s6 - move $s1, $a4 + move $s7, $a4 pcaddu18i $ra, %call36(cpmx_calc_new) jirl $ra, $ra, 0 - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(partQ__align.cpmx2) - ld.d $fp, $sp, 136 # 8-byte Folded Reload + ld.d $fp, $sp, 144 # 8-byte Folded Reload move $a0, $fp move $a2, $s3 - ld.d $s0, $sp, 368 # 8-byte Folded Reload - move $a3, $s0 + ld.d $s1, $sp, 376 # 8-byte Folded Reload + move $a3, $s1 move $a4, $s8 pcaddu18i $ra, %call36(cpmx_calc_new) jirl $ra, $ra, 0 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.ogcp1g) beqz $s4, .LBB4_31 # %bb.30: - ld.d $s0, $sp, 592 + move $a1, $s7 + ld.d $s0, $sp, 608 move $a5, $s4 st.d $s4, $sp, 16 # 8-byte Folded Spill - ld.d $s7, $sp, 328 # 8-byte Folded Reload - ld.d $s4, $sp, 584 - ld.d $s6, $sp, 576 - move $a1, $s1 + ld.d $s7, $sp, 336 # 8-byte Folded Reload + ld.d $s4, $sp, 600 + ld.d $s6, $sp, 592 + move $s1, $a1 move $a2, $s2 move $a3, $s7 - ld.d $a4, $sp, 144 # 8-byte Folded Reload + ld.d $a4, $sp, 152 # 8-byte Folded Reload move $a6, $s4 pcaddu18i $ra, %call36(new_OpeningGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 200 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.ogcp2g) move $a1, $s8 move $a2, $fp move $a3, $s3 move $s5, $s3 - ld.d $a4, $sp, 368 # 8-byte Folded Reload + ld.d $a4, $sp, 376 # 8-byte Folded Reload move $a5, $s6 move $a6, $s0 move $s3, $s0 pcaddu18i $ra, %call36(new_OpeningGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 288 # 8-byte Folded Reload + ld.d $a0, $sp, 296 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.fgcp1g) move $a1, $s1 move $a2, $s2 move $a3, $s7 - ld.d $a4, $sp, 144 # 8-byte Folded Reload + ld.d $a4, $sp, 152 # 8-byte Folded Reload ld.d $s0, $sp, 16 # 8-byte Folded Reload move $a5, $s0 st.d $s4, $sp, 216 # 8-byte Folded Spill move $a6, $s4 pcaddu18i $ra, %call36(new_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.fgcp2g) move $a1, $s8 move $a2, $fp move $a3, $s5 - ld.d $a4, $sp, 368 # 8-byte Folded Reload + ld.d $a4, $sp, 376 # 8-byte Folded Reload move $a5, $s6 move $a6, $s3 pcaddu18i $ra, %call36(new_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.digf1) move $a1, $s1 move $a2, $s2 move $a3, $s7 - ld.d $a4, $sp, 144 # 8-byte Folded Reload + ld.d $a4, $sp, 152 # 8-byte Folded Reload move $a5, $s0 move $a6, $s4 pcaddu18i $ra, %call36(getdigapfreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 344 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.digf2) move $a1, $s8 move $a2, $fp move $a3, $s5 - ld.d $a4, $sp, 368 # 8-byte Folded Reload + ld.d $a4, $sp, 376 # 8-byte Folded Reload move $a5, $s6 + move $s5, $s3 move $a6, $s3 pcaddu18i $ra, %call36(getdigapfreq_part) jirl $ra, $ra, 0 ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.diaf1) move $s4, $s1 - move $a1, $s1 + ld.d $s1, $sp, 376 # 8-byte Folded Reload + move $a1, $s4 move $a2, $s2 + move $s3, $s7 move $a3, $s7 - ld.d $a4, $sp, 144 # 8-byte Folded Reload + ld.d $a4, $sp, 152 # 8-byte Folded Reload move $a5, $s0 ld.d $a6, $sp, 216 # 8-byte Folded Reload pcaddu18i $ra, %call36(getdiaminofreq_part) @@ -1732,102 +1732,100 @@ partQ__align: # @partQ__align ld.d $a0, $a0, %pc_lo12(partQ__align.diaf2) move $a1, $s8 move $a2, $fp - ld.d $s1, $sp, 224 # 8-byte Folded Reload - move $a3, $s1 - ld.d $a4, $sp, 368 # 8-byte Folded Reload + ld.d $s7, $sp, 224 # 8-byte Folded Reload + move $a3, $s7 + move $a4, $s1 move $a5, $s6 - ld.d $s6, $sp, 144 # 8-byte Folded Reload - move $a6, $s3 + ld.d $s6, $sp, 152 # 8-byte Folded Reload + move $a6, $s5 pcaddu18i $ra, %call36(getdiaminofreq_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 264 # 8-byte Folded Reload + ld.d $a0, $sp, 272 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapf1) move $a1, $s4 move $a2, $s2 - move $a3, $s7 + move $a3, $s3 move $a4, $s6 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 280 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapf2) move $a1, $s8 move $a2, $fp - move $a3, $s1 - ld.d $a4, $sp, 368 # 8-byte Folded Reload + move $a3, $s7 + move $a4, $s1 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapz1) move $a1, $s4 move $a2, $s2 - move $a3, $s7 + move $a3, $s3 move $a4, $s6 - move $s3, $s0 - ld.d $s0, $sp, 368 # 8-byte Folded Reload - move $a5, $s3 + move $a5, $s0 pcaddu18i $ra, %call36(getgapfreq_zure_part) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapz2) move $a1, $s8 move $a2, $fp - move $a3, $s1 - move $a4, $s0 - move $a5, $s3 + move $a3, $s7 + move $a4, $s1 + move $a5, $s0 pcaddu18i $ra, %call36(getgapfreq_zure_part) jirl $ra, $ra, 0 b .LBB4_32 .LBB4_31: - move $a1, $s1 + move $a1, $s7 move $a2, $s2 - ld.d $s4, $sp, 328 # 8-byte Folded Reload + ld.d $s4, $sp, 336 # 8-byte Folded Reload move $a3, $s4 move $a4, $s6 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 200 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.ogcp2g) move $a1, $s8 move $a2, $fp move $a3, $s3 - move $a4, $s0 + move $a4, $s1 pcaddu18i $ra, %call36(st_OpeningGapCount) jirl $ra, $ra, 0 - ld.d $a0, $sp, 288 # 8-byte Folded Reload + ld.d $a0, $sp, 296 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.fgcp1g) - move $a1, $s1 + move $a1, $s7 move $a2, $s2 move $a3, $s4 move $a4, $s6 pcaddu18i $ra, %call36(st_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 192 # 8-byte Folded Reload + ld.d $a0, $sp, 200 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.fgcp2g) move $a1, $s8 move $a2, $fp move $a3, $s3 - move $a4, $s0 + move $a4, $s1 pcaddu18i $ra, %call36(st_FinalGapCount_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $a0, $sp, 368 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.digf1) - move $a1, $s1 + move $a1, $s7 move $a2, $s2 move $a3, $s4 move $a4, $s6 pcaddu18i $ra, %call36(getdigapfreq_st) jirl $ra, $ra, 0 - ld.d $a0, $sp, 336 # 8-byte Folded Reload + ld.d $a0, $sp, 344 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.digf2) move $a1, $s8 move $a2, $fp move $a3, $s3 - move $a4, $s0 + move $a4, $s1 pcaddu18i $ra, %call36(getdigapfreq_st) jirl $ra, $ra, 0 ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.diaf1) - move $a1, $s1 + move $a1, $s7 move $a2, $s2 move $a3, $s4 move $a4, $s6 @@ -1838,67 +1836,67 @@ partQ__align: # @partQ__align move $a1, $s8 move $a2, $fp move $a3, $s3 - move $a4, $s0 + move $a4, $s1 pcaddu18i $ra, %call36(getdiaminofreq_x) jirl $ra, $ra, 0 - ld.d $a0, $sp, 264 # 8-byte Folded Reload + ld.d $a0, $sp, 272 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapf1) - move $a1, $s1 + move $a1, $s7 move $a2, $s2 move $a3, $s4 move $a4, $s6 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 280 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapf2) move $a1, $s8 move $a2, $fp move $a3, $s3 - move $a4, $s0 + move $a4, $s1 pcaddu18i $ra, %call36(getgapfreq) jirl $ra, $ra, 0 - ld.d $a0, $sp, 384 # 8-byte Folded Reload + ld.d $a0, $sp, 392 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapz1) - move $a1, $s1 + move $a1, $s7 move $a2, $s2 move $a3, $s4 move $a4, $s6 pcaddu18i $ra, %call36(getgapfreq_zure) jirl $ra, $ra, 0 - ld.d $a0, $sp, 344 # 8-byte Folded Reload + ld.d $a0, $sp, 352 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.gapz2) move $a1, $s8 move $a2, $fp move $a3, $s3 - move $a4, $s0 + move $a4, $s1 pcaddu18i $ra, %call36(getgapfreq_zure) jirl $ra, $ra, 0 .LBB4_32: - ld.d $s7, $sp, 120 # 8-byte Folded Reload - ld.d $s5, $sp, 48 # 8-byte Folded Reload + ld.d $s7, $sp, 128 # 8-byte Folded Reload + ld.d $s3, $sp, 56 # 8-byte Folded Reload addi.w $ra, $zero, -1 ffint.s.w $ft10, $fs0 - blt $s0, $ra, .LBB4_37 + blt $s1, $ra, .LBB4_37 # %bb.33: # %.lr.ph650 - ld.d $a0, $sp, 200 # 8-byte Folded Reload + ld.d $a0, $sp, 208 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.ogcp2g) - ld.d $a1, $sp, 336 # 8-byte Folded Reload + ld.d $a1, $sp, 344 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(partQ__align.digf2) - ld.d $a2, $sp, 280 # 8-byte Folded Reload + ld.d $a2, $sp, 288 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(partQ__align.og_h_dg_n2_p) - ld.d $a3, $sp, 192 # 8-byte Folded Reload + ld.d $a3, $sp, 200 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(partQ__align.fgcp2g) - ld.d $a4, $sp, 168 # 8-byte Folded Reload + ld.d $a4, $sp, 176 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(partQ__align.fg_h_dg_n2_p) - ld.d $a5, $sp, 80 # 8-byte Folded Reload + ld.d $a5, $sp, 88 # 8-byte Folded Reload ld.d $a5, $a5, %pc_lo12(partQ__align.og_t_fg_h_dg_n2_p) - ld.d $a6, $sp, 64 # 8-byte Folded Reload + ld.d $a6, $sp, 72 # 8-byte Folded Reload ld.d $a6, $a6, %pc_lo12(partQ__align.fg_t_og_h_dg_n2_p) - ld.d $a7, $sp, 344 # 8-byte Folded Reload + ld.d $a7, $sp, 352 # 8-byte Folded Reload ld.d $a7, $a7, %pc_lo12(partQ__align.gapz2) - ld.d $t0, $sp, 72 # 8-byte Folded Reload + ld.d $t0, $sp, 80 # 8-byte Folded Reload ld.d $t0, $t0, %pc_lo12(partQ__align.gapz_n2) - addi.d $t3, $s5, 2 + addi.d $t3, $s3, 2 bstrpick.d $t1, $t3, 31, 0 ori $t2, $zero, 40 fcvt.d.s $fa0, $ft10 @@ -1977,23 +1975,23 @@ partQ__align: # @partQ__align .LBB4_37: # %._crit_edge651 blt $s6, $ra, .LBB4_42 # %bb.38: # %.lr.ph654 - ld.d $a0, $sp, 376 # 8-byte Folded Reload + ld.d $a0, $sp, 384 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.ogcp1g) - ld.d $a1, $sp, 360 # 8-byte Folded Reload + ld.d $a1, $sp, 368 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(partQ__align.digf1) - ld.d $a2, $sp, 184 # 8-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(partQ__align.og_h_dg_n1_p) - ld.d $a3, $sp, 288 # 8-byte Folded Reload + ld.d $a3, $sp, 296 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(partQ__align.fgcp1g) - ld.d $a4, $sp, 176 # 8-byte Folded Reload + ld.d $a4, $sp, 184 # 8-byte Folded Reload ld.d $a4, $a4, %pc_lo12(partQ__align.fg_h_dg_n1_p) - ld.d $a5, $sp, 160 # 8-byte Folded Reload + ld.d $a5, $sp, 168 # 8-byte Folded Reload ld.d $a5, $a5, %pc_lo12(partQ__align.og_t_fg_h_dg_n1_p) - ld.d $a6, $sp, 88 # 8-byte Folded Reload + ld.d $a6, $sp, 96 # 8-byte Folded Reload ld.d $a6, $a6, %pc_lo12(partQ__align.fg_t_og_h_dg_n1_p) - ld.d $a7, $sp, 384 # 8-byte Folded Reload + ld.d $a7, $sp, 392 # 8-byte Folded Reload ld.d $a7, $a7, %pc_lo12(partQ__align.gapz1) - ld.d $t0, $sp, 152 # 8-byte Folded Reload + ld.d $t0, $sp, 160 # 8-byte Folded Reload ld.d $t0, $t0, %pc_lo12(partQ__align.gapz_n1) addi.d $t3, $s7, 2 bstrpick.d $t1, $t3, 31, 0 @@ -2074,59 +2072,59 @@ partQ__align: # @partQ__align .LBB4_42: # %._crit_edge655 vst $vr18, $sp, 240 # 16-byte Folded Spill st.d $ra, $sp, 24 # 8-byte Folded Spill - ld.d $a0, $sp, 560 + ld.d $a0, $sp, 576 + st.d $a0, $sp, 280 # 8-byte Folded Spill + ld.d $a0, $sp, 568 st.d $a0, $sp, 272 # 8-byte Folded Spill ld.d $a0, $sp, 552 - st.d $a0, $sp, 264 # 8-byte Folded Spill - ld.d $a0, $sp, 536 - st.d $a0, $sp, 360 # 8-byte Folded Spill - ld.d $s0, $sp, 520 - ld.d $a0, $sp, 208 # 8-byte Folded Reload - ld.d $s3, $a0, %pc_lo12(partQ__align.w1) - ld.d $a0, $sp, 312 # 8-byte Folded Reload + st.d $a0, $sp, 368 # 8-byte Folded Spill + ld.d $s0, $sp, 536 + ld.d $a0, $sp, 32 # 8-byte Folded Reload + ld.d $s7, $a0, %pc_lo12(partQ__align.w1) + ld.d $a0, $sp, 320 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(partQ__align.w2) - st.d $a0, $sp, 384 # 8-byte Folded Spill - ld.d $a0, $sp, 304 # 8-byte Folded Reload + st.d $a0, $sp, 392 # 8-byte Folded Spill + ld.d $a0, $sp, 312 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(partQ__align.initverticalw) - ld.d $a0, $sp, 352 # 8-byte Folded Reload - ld.d $s7, $a0, %pc_lo12(partQ__align.cpmx2) - ld.d $a0, $sp, 392 # 8-byte Folded Reload - ld.d $fp, $a0, %pc_lo12(partQ__align.cpmx1) - ld.d $a0, $sp, 296 # 8-byte Folded Reload - ld.d $s1, $a0, %pc_lo12(partQ__align.floatwork) - ld.d $a0, $sp, 256 # 8-byte Folded Reload - ld.d $s6, $a0, %pc_lo12(partQ__align.intwork) + ld.d $a0, $sp, 360 # 8-byte Folded Reload + ld.d $s6, $a0, %pc_lo12(partQ__align.cpmx2) + ld.d $a0, $sp, 400 # 8-byte Folded Reload + ld.d $s5, $a0, %pc_lo12(partQ__align.cpmx1) + ld.d $a0, $sp, 304 # 8-byte Folded Reload + ld.d $fp, $a0, %pc_lo12(partQ__align.floatwork) + ld.d $a0, $sp, 264 # 8-byte Folded Reload + ld.d $s1, $a0, %pc_lo12(partQ__align.intwork) ori $a7, $zero, 1 - st.d $a1, $sp, 392 # 8-byte Folded Spill + st.d $a1, $sp, 400 # 8-byte Folded Spill move $a0, $a1 - move $a1, $s7 - move $a2, $fp + move $a1, $s6 + move $a2, $s5 move $a3, $zero - ld.d $s4, $sp, 144 # 8-byte Folded Reload + ld.d $s4, $sp, 152 # 8-byte Folded Reload move $a4, $s4 - move $a5, $s1 - move $a6, $s6 + move $a5, $fp + move $a6, $s1 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload beqz $a0, .LBB4_49 # %bb.43: move $a1, $s4 - move $s4, $s3 - ld.d $a4, $sp, 368 # 8-byte Folded Reload + move $s4, $s7 + ld.d $a4, $sp, 376 # 8-byte Folded Reload blez $a1, .LBB4_46 # %bb.44: # %.lr.ph.i - ld.d $a0, $sp, 272 # 8-byte Folded Reload + ld.d $a0, $sp, 280 # 8-byte Folded Reload ld.w $a1, $a0, 0 pcalau12i $a0, %pc_hi20(impmtx) ld.d $a0, $a0, %pc_lo12(impmtx) - ld.d $a2, $sp, 360 # 8-byte Folded Reload + ld.d $a2, $sp, 368 # 8-byte Folded Reload add.w $a2, $a1, $a2 - ld.d $a1, $sp, 120 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload bstrpick.d $a1, $a1, 30, 0 slli.d $a2, $a2, 2 - ld.d $a3, $sp, 264 # 8-byte Folded Reload - ld.d $a6, $sp, 392 # 8-byte Folded Reload + ld.d $a3, $sp, 272 # 8-byte Folded Reload + ld.d $a6, $sp, 400 # 8-byte Folded Reload .p2align 4, , 16 .LBB4_45: # =>This Inner Loop Header: Depth=1 ld.w $a5, $a3, 0 @@ -2144,19 +2142,19 @@ partQ__align: # @partQ__align .LBB4_46: # %part_imp_match_out_vead_tate_gapmapQ.exit ori $a7, $zero, 1 move $a0, $s4 - move $a1, $fp - move $a2, $s7 + move $a1, $s5 + move $a2, $s6 move $a3, $zero - move $a5, $s1 - move $a6, $s6 + move $a5, $fp + move $a6, $s1 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 - ld.d $t4, $sp, 368 # 8-byte Folded Reload - ld.d $a5, $sp, 360 # 8-byte Folded Reload - move $t8, $s5 - blez $t4, .LBB4_50 + ld.d $t5, $sp, 376 # 8-byte Folded Reload + ld.d $a5, $sp, 368 # 8-byte Folded Reload + move $t8, $s3 + blez $t5, .LBB4_50 # %bb.47: # %.lr.ph.i523 - ld.d $a0, $sp, 264 # 8-byte Folded Reload + ld.d $a0, $sp, 272 # 8-byte Folded Reload ld.w $a0, $a0, 0 pcalau12i $a1, %pc_hi20(impmtx) ld.d $a1, $a1, %pc_lo12(impmtx) @@ -2164,7 +2162,7 @@ partQ__align: # @partQ__align slli.d $a0, $a0, 3 ldx.d $a0, $a1, $a0 bstrpick.d $a1, $t8, 30, 0 - ld.d $a2, $sp, 272 # 8-byte Folded Reload + ld.d $a2, $sp, 280 # 8-byte Folded Reload move $a3, $s4 .p2align 4, , 16 .LBB4_48: # =>This Inner Loop Header: Depth=1 @@ -2182,195 +2180,193 @@ partQ__align: # @partQ__align b .LBB4_50 .LBB4_49: # %.critedge ori $a7, $zero, 1 - move $s4, $s3 - move $a0, $s3 - move $a1, $fp - move $a2, $s7 + move $s4, $s7 + move $a0, $s7 + move $a1, $s5 + move $a2, $s6 move $a3, $zero - ld.d $s3, $sp, 368 # 8-byte Folded Reload - move $a4, $s3 - move $a5, $s1 - move $a6, $s6 + ld.d $s7, $sp, 376 # 8-byte Folded Reload + move $a4, $s7 + move $a5, $fp + move $a6, $s1 pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 - move $t4, $s3 - move $t8, $s5 + move $t5, $s7 + move $t8, $s3 .LBB4_50: # %part_imp_match_out_vead_gapmapQ.exit pcalau12i $a0, %got_pc_hi20(outgap) - ld.d $s5, $a0, %got_pc_lo12(outgap) - ld.w $a1, $s5, 0 - ori $a2, $zero, 1 - slli.d $a0, $t8, 32 - pcalau12i $a3, %pc_hi20(.LCPI4_2) - st.d $a3, $sp, 232 # 8-byte Folded Spill + ld.d $s3, $a0, %got_pc_lo12(outgap) + ld.w $a2, $s3, 0 + ori $a3, $zero, 1 + slli.d $a1, $t8, 32 + lu12i.w $a0, 287172 vld $vr8, $sp, 240 # 16-byte Folded Reload - ld.d $t6, $sp, 384 # 8-byte Folded Reload - bne $a1, $a2, .LBB4_60 + bne $a2, $a3, .LBB4_60 # %bb.51: - ld.d $a1, $sp, 376 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(partQ__align.ogcp1g) - ld.d $a2, $sp, 280 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(partQ__align.og_h_dg_n2_p) - ld.d $a3, $sp, 200 # 8-byte Folded Reload - ld.d $a3, $a3, %pc_lo12(partQ__align.ogcp2g) - fld.s $fa0, $a1, 0 - fld.s $fa1, $a2, 0 - ld.d $a1, $sp, 184 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(partQ__align.og_h_dg_n1_p) - fld.s $fa2, $a3, 0 - ld.d $a2, $sp, 288 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(partQ__align.fgcp1g) - ld.d $a3, $sp, 168 # 8-byte Folded Reload - ld.d $a3, $a3, %pc_lo12(partQ__align.fg_h_dg_n2_p) - fld.s $fa3, $a1, 0 - ld.d $a1, $sp, 192 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(partQ__align.fgcp2g) - fld.s $fa4, $a2, 0 - fld.s $fa5, $a3, 0 - ld.d $a2, $sp, 176 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(partQ__align.fg_h_dg_n1_p) - fld.s $fa6, $a1, 0 + ld.d $a2, $sp, 384 # 8-byte Folded Reload + ld.d $a2, $a2, %pc_lo12(partQ__align.ogcp1g) + ld.d $a3, $sp, 288 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(partQ__align.og_h_dg_n2_p) + ld.d $a4, $sp, 208 # 8-byte Folded Reload + ld.d $a4, $a4, %pc_lo12(partQ__align.ogcp2g) + fld.s $fa0, $a2, 0 + fld.s $fa1, $a3, 0 + ld.d $a2, $sp, 192 # 8-byte Folded Reload + ld.d $a2, $a2, %pc_lo12(partQ__align.og_h_dg_n1_p) + fld.s $fa2, $a4, 0 + ld.d $a3, $sp, 296 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(partQ__align.fgcp1g) + ld.d $a4, $sp, 176 # 8-byte Folded Reload + ld.d $a4, $a4, %pc_lo12(partQ__align.fg_h_dg_n2_p) + fld.s $fa3, $a2, 0 + ld.d $a2, $sp, 200 # 8-byte Folded Reload + ld.d $a2, $a2, %pc_lo12(partQ__align.fgcp2g) + fld.s $fa4, $a3, 0 + fld.s $fa5, $a4, 0 + ld.d $a3, $sp, 184 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(partQ__align.fg_h_dg_n1_p) + fld.s $fa6, $a2, 0 movgr2fr.w $fa7, $zero fmadd.s $fa0, $fa0, $fa1, $fa7 - fld.s $fa1, $a2, 0 - ld.d $a1, $sp, 392 # 8-byte Folded Reload - fld.s $fa7, $a1, 0 + fld.s $fa1, $a3, 0 + ld.d $a2, $sp, 400 # 8-byte Folded Reload + fld.s $fa7, $a2, 0 fmadd.s $fa0, $fa2, $fa3, $fa0 fmadd.s $fa0, $fa4, $fa5, $fa0 fmadd.s $fa0, $fa6, $fa1, $fa0 fadd.s $fa1, $fa7, $fa0 - fst.s $fa1, $a1, 0 - move $t5, $s4 + fst.s $fa1, $a2, 0 + move $t6, $s4 fld.s $fa1, $s4, 0 fadd.s $fa0, $fa0, $fa1 - fst.s $fa0, $s4, 0 - ld.d $t7, $sp, 120 # 8-byte Folded Reload - ld.d $s3, $sp, 144 # 8-byte Folded Reload - blez $s3, .LBB4_55 -# %bb.52: # %.lr.ph665 - ld.d $a1, $sp, 72 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(partQ__align.gapz_n2) - ld.d $a2, $sp, 160 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(partQ__align.og_t_fg_h_dg_n1_p) - ld.d $a3, $sp, 88 # 8-byte Folded Reload - ld.d $a3, $a3, %pc_lo12(partQ__align.fg_t_og_h_dg_n1_p) - addi.d $a4, $t7, 1 - bstrpick.d $a4, $a4, 31, 0 - addi.d $a6, $a4, -1 - ori $a7, $zero, 8 - ori $a5, $zero, 1 - bgeu $a6, $a7, .LBB4_267 + fst.s $fa0, $s4, 0 + ld.d $t7, $sp, 128 # 8-byte Folded Reload + ld.d $s4, $sp, 152 # 8-byte Folded Reload + blez $s4, .LBB4_55 +# %bb.52: # %.lr.ph665 + ld.d $a2, $sp, 80 # 8-byte Folded Reload + ld.d $a2, $a2, %pc_lo12(partQ__align.gapz_n2) + ld.d $a3, $sp, 168 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(partQ__align.og_t_fg_h_dg_n1_p) + ld.d $a4, $sp, 96 # 8-byte Folded Reload + ld.d $a4, $a4, %pc_lo12(partQ__align.fg_t_og_h_dg_n1_p) + addi.d $a5, $t7, 1 + bstrpick.d $a5, $a5, 31, 0 + addi.d $a7, $a5, -1 + ori $t0, $zero, 8 + ori $a6, $zero, 1 + bgeu $a7, $t0, .LBB4_267 .LBB4_53: # %scalar.ph1208.preheader - alsl.d $a3, $a5, $a3, 2 - ld.d $a6, $sp, 392 # 8-byte Folded Reload - alsl.d $a6, $a5, $a6, 2 - sub.d $a4, $a4, $a5 + alsl.d $a4, $a6, $a4, 2 + ld.d $a7, $sp, 400 # 8-byte Folded Reload + alsl.d $a7, $a6, $a7, 2 + sub.d $a5, $a5, $a6 .p2align 4, , 16 .LBB4_54: # %scalar.ph1208 # =>This Inner Loop Header: Depth=1 - fld.s $fa0, $a1, 0 - fld.s $fa1, $a2, 0 - fld.s $fa2, $a6, 0 + fld.s $fa0, $a2, 0 + fld.s $fa1, $a3, 0 + fld.s $fa2, $a7, 0 fmul.s $fa0, $fa0, $fa1 fadd.s $fa0, $fa2, $fa0 - fst.s $fa0, $a6, 0 - fld.s $fa1, $a1, 4 - fld.s $fa2, $a3, 0 + fst.s $fa0, $a7, 0 + fld.s $fa1, $a2, 4 + fld.s $fa2, $a4, 0 fmul.s $fa1, $fa1, $fa2 fadd.s $fa0, $fa0, $fa1 - fst.s $fa0, $a6, 0 - addi.d $a3, $a3, 4 - addi.d $a4, $a4, -1 - addi.d $a6, $a6, 4 - bnez $a4, .LBB4_54 + fst.s $fa0, $a7, 0 + addi.d $a4, $a4, 4 + addi.d $a5, $a5, -1 + addi.d $a7, $a7, 4 + bnez $a5, .LBB4_54 .LBB4_55: # %.preheader638 - blez $t4, .LBB4_149 + blez $t5, .LBB4_149 # %bb.56: # %.lr.ph668 - ld.d $a1, $sp, 152 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(partQ__align.gapz_n1) - ld.d $a2, $sp, 80 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(partQ__align.og_t_fg_h_dg_n2_p) - ld.d $a3, $sp, 64 # 8-byte Folded Reload - ld.d $a3, $a3, %pc_lo12(partQ__align.fg_t_og_h_dg_n2_p) - addi.d $a4, $t8, 1 - bstrpick.d $a4, $a4, 31, 0 - addi.d $a6, $a4, -1 - ori $a7, $zero, 8 - ori $a5, $zero, 1 - bgeu $a6, $a7, .LBB4_273 + ld.d $a2, $sp, 160 # 8-byte Folded Reload + ld.d $a2, $a2, %pc_lo12(partQ__align.gapz_n1) + ld.d $a3, $sp, 88 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(partQ__align.og_t_fg_h_dg_n2_p) + ld.d $a4, $sp, 72 # 8-byte Folded Reload + ld.d $a4, $a4, %pc_lo12(partQ__align.fg_t_og_h_dg_n2_p) + addi.d $a5, $t8, 1 + bstrpick.d $a5, $a5, 31, 0 + addi.d $a7, $a5, -1 + ori $t0, $zero, 8 + ori $a6, $zero, 1 + bgeu $a7, $t0, .LBB4_273 .LBB4_57: # %scalar.ph1246.preheader - alsl.d $a3, $a5, $a3, 2 - alsl.d $a6, $a5, $t5, 2 - sub.d $a4, $a4, $a5 + alsl.d $a4, $a6, $a4, 2 + alsl.d $a7, $a6, $t6, 2 + sub.d $a5, $a5, $a6 .p2align 4, , 16 .LBB4_58: # %scalar.ph1246 # =>This Inner Loop Header: Depth=1 - fld.s $fa0, $a1, 0 - fld.s $fa1, $a2, 0 - fld.s $fa2, $a6, 0 + fld.s $fa0, $a2, 0 + fld.s $fa1, $a3, 0 + fld.s $fa2, $a7, 0 fmul.s $fa0, $fa0, $fa1 fadd.s $fa0, $fa2, $fa0 - fst.s $fa0, $a6, 0 - fld.s $fa1, $a1, 4 - fld.s $fa2, $a3, 0 + fst.s $fa0, $a7, 0 + fld.s $fa1, $a2, 4 + fld.s $fa2, $a4, 0 fmul.s $fa1, $fa1, $fa2 fadd.s $fa0, $fa0, $fa1 - fst.s $fa0, $a6, 0 - addi.d $a3, $a3, 4 - addi.d $a4, $a4, -1 - addi.d $a6, $a6, 4 - bnez $a4, .LBB4_58 + fst.s $fa0, $a7, 0 + addi.d $a4, $a4, 4 + addi.d $a5, $a5, -1 + addi.d $a7, $a7, 4 + bnez $a5, .LBB4_58 .LBB4_59: # %.loopexit639.thread832 - ld.d $a1, $sp, 320 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(partQ__align.m) - st.w $zero, $a1, 0 + ld.d $a2, $sp, 328 # 8-byte Folded Reload + ld.d $a2, $a2, %pc_lo12(partQ__align.m) + st.w $zero, $a2, 0 b .LBB4_75 .LBB4_60: # %.preheader642 - move $t5, $s4 - ld.d $t7, $sp, 120 # 8-byte Folded Reload - ld.d $s3, $sp, 144 # 8-byte Folded Reload - blez $t4, .LBB4_67 + move $t6, $s4 + ld.d $t7, $sp, 128 # 8-byte Folded Reload + ld.d $s4, $sp, 152 # 8-byte Folded Reload + blez $t5, .LBB4_67 # %bb.61: # %.lr.ph658 - pcalau12i $a1, %got_pc_hi20(offset) - ld.d $a1, $a1, %got_pc_lo12(offset) - ld.w $a1, $a1, 0 - addi.d $a2, $t8, 1 - bstrpick.d $a2, $a2, 31, 0 - addi.d $a3, $a2, -1 - ori $a5, $zero, 4 - ori $a4, $zero, 1 - bltu $a3, $a5, .LBB4_65 + pcalau12i $a2, %got_pc_hi20(offset) + ld.d $a2, $a2, %got_pc_lo12(offset) + ld.w $a2, $a2, 0 + addi.d $a3, $t8, 1 + bstrpick.d $a3, $a3, 31, 0 + addi.d $a4, $a3, -1 + ori $a6, $zero, 4 + ori $a5, $zero, 1 + bltu $a4, $a6, .LBB4_65 # %bb.62: # %vector.ph1161 - move $a5, $a3 - bstrins.d $a5, $zero, 1, 0 - ori $a6, $zero, 1 - move $a4, $a3 - bstrins.d $a4, $a6, 1, 0 - vreplgr2vr.w $vr0, $a1 - pcalau12i $a6, %pc_hi20(.LCPI4_1) - vld $vr1, $a6, %pc_lo12(.LCPI4_1) - addi.d $a6, $t5, 4 - lu52i.d $a7, $zero, -1026 - vreplgr2vr.d $vr2, $a7 - move $a7, $a5 + move $a6, $a4 + bstrins.d $a6, $zero, 1, 0 + ori $a7, $zero, 1 + move $a5, $a4 + bstrins.d $a5, $a7, 1, 0 + vreplgr2vr.w $vr0, $a2 + pcalau12i $a7, %pc_hi20(.LCPI4_0) + vld $vr1, $a7, %pc_lo12(.LCPI4_0) + addi.d $a7, $t6, 4 + lu52i.d $t0, $zero, -1026 + vreplgr2vr.d $vr2, $t0 + move $t0, $a6 .p2align 4, , 16 .LBB4_63: # %vector.body1166 # =>This Inner Loop Header: Depth=1 vmul.w $vr3, $vr0, $vr1 - vpickve2gr.w $t0, $vr3, 3 - movgr2fr.w $fa4, $t0 + vpickve2gr.w $t1, $vr3, 3 + movgr2fr.w $fa4, $t1 ffint.d.w $fa4, $fa4 - vpickve2gr.w $t0, $vr3, 2 - movgr2fr.w $fa5, $t0 + vpickve2gr.w $t1, $vr3, 2 + movgr2fr.w $fa5, $t1 ffint.d.w $fa5, $fa5 vextrins.d $vr5, $vr4, 16 - vpickve2gr.w $t0, $vr3, 1 - movgr2fr.w $fa4, $t0 + vpickve2gr.w $t1, $vr3, 1 + movgr2fr.w $fa4, $t1 ffint.d.w $fa4, $fa4 - vpickve2gr.w $t0, $vr3, 0 - movgr2fr.w $fa3, $t0 + vpickve2gr.w $t1, $vr3, 0 + movgr2fr.w $fa3, $t1 ffint.d.w $fa3, $fa3 - vld $vr6, $a6, 0 + vld $vr6, $a7, 0 vextrins.d $vr3, $vr4, 16 vfmul.d $vr3, $vr3, $vr2 vfmul.d $vr4, $vr5, $vr2 @@ -2397,77 +2393,77 @@ partQ__align: # @partQ__align vreplvei.d $vr4, $vr4, 1 fcvt.s.d $fa4, $fa4 vextrins.w $vr3, $vr4, 48 - vst $vr3, $a6, 0 + vst $vr3, $a7, 0 vaddi.wu $vr1, $vr1, 4 - addi.d $a7, $a7, -4 - addi.d $a6, $a6, 16 - bnez $a7, .LBB4_63 + addi.d $t0, $t0, -4 + addi.d $a7, $a7, 16 + bnez $t0, .LBB4_63 # %bb.64: # %middle.block1170 - beq $a3, $a5, .LBB4_67 + beq $a4, $a6, .LBB4_67 .LBB4_65: # %scalar.ph1159.preheader - mul.d $a3, $a1, $a4 - alsl.d $a5, $a4, $t5, 2 - sub.d $a2, $a2, $a4 + mul.d $a4, $a2, $a5 + alsl.d $a6, $a5, $t6, 2 + sub.d $a3, $a3, $a5 vldi $vr0, -800 .p2align 4, , 16 .LBB4_66: # %scalar.ph1159 # =>This Inner Loop Header: Depth=1 - fld.s $fa1, $a5, 0 - movgr2fr.w $fa2, $a3 + fld.s $fa1, $a6, 0 + movgr2fr.w $fa2, $a4 ffint.d.w $fa2, $fa2 fmul.d $fa2, $fa2, $fa0 fcvt.d.s $fa1, $fa1 fadd.d $fa1, $fa1, $fa2 fcvt.s.d $fa1, $fa1 - fst.s $fa1, $a5, 0 - add.w $a3, $a3, $a1 - addi.d $a2, $a2, -1 - addi.d $a5, $a5, 4 - bnez $a2, .LBB4_66 + fst.s $fa1, $a6, 0 + add.w $a4, $a4, $a2 + addi.d $a3, $a3, -1 + addi.d $a6, $a6, 4 + bnez $a3, .LBB4_66 .LBB4_67: # %.preheader640 - blez $s3, .LBB4_74 + blez $s4, .LBB4_74 # %bb.68: # %.lr.ph661 - pcalau12i $a1, %got_pc_hi20(offset) - ld.d $a1, $a1, %got_pc_lo12(offset) - ld.w $a1, $a1, 0 - addi.d $a2, $t7, 1 - bstrpick.d $a2, $a2, 31, 0 - addi.d $a3, $a2, -1 - ori $a5, $zero, 4 - ori $a4, $zero, 1 - bltu $a3, $a5, .LBB4_72 + pcalau12i $a2, %got_pc_hi20(offset) + ld.d $a2, $a2, %got_pc_lo12(offset) + ld.w $a2, $a2, 0 + addi.d $a3, $t7, 1 + bstrpick.d $a3, $a3, 31, 0 + addi.d $a4, $a3, -1 + ori $a6, $zero, 4 + ori $a5, $zero, 1 + bltu $a4, $a6, .LBB4_72 # %bb.69: # %vector.ph1175 - move $a5, $a3 - bstrins.d $a5, $zero, 1, 0 - ori $a6, $zero, 1 - move $a4, $a3 - bstrins.d $a4, $a6, 1, 0 - vreplgr2vr.w $vr0, $a1 - pcalau12i $a6, %pc_hi20(.LCPI4_1) - vld $vr1, $a6, %pc_lo12(.LCPI4_1) - ld.d $a6, $sp, 392 # 8-byte Folded Reload - addi.d $a6, $a6, 4 - lu52i.d $a7, $zero, -1026 - vreplgr2vr.d $vr2, $a7 - move $a7, $a5 + move $a6, $a4 + bstrins.d $a6, $zero, 1, 0 + ori $a7, $zero, 1 + move $a5, $a4 + bstrins.d $a5, $a7, 1, 0 + vreplgr2vr.w $vr0, $a2 + pcalau12i $a7, %pc_hi20(.LCPI4_0) + vld $vr1, $a7, %pc_lo12(.LCPI4_0) + ld.d $a7, $sp, 400 # 8-byte Folded Reload + addi.d $a7, $a7, 4 + lu52i.d $t0, $zero, -1026 + vreplgr2vr.d $vr2, $t0 + move $t0, $a6 .p2align 4, , 16 .LBB4_70: # %vector.body1180 # =>This Inner Loop Header: Depth=1 vmul.w $vr3, $vr0, $vr1 - vpickve2gr.w $t0, $vr3, 3 - movgr2fr.w $fa4, $t0 + vpickve2gr.w $t1, $vr3, 3 + movgr2fr.w $fa4, $t1 ffint.d.w $fa4, $fa4 - vpickve2gr.w $t0, $vr3, 2 - movgr2fr.w $fa5, $t0 + vpickve2gr.w $t1, $vr3, 2 + movgr2fr.w $fa5, $t1 ffint.d.w $fa5, $fa5 vextrins.d $vr5, $vr4, 16 - vpickve2gr.w $t0, $vr3, 1 - movgr2fr.w $fa4, $t0 + vpickve2gr.w $t1, $vr3, 1 + movgr2fr.w $fa4, $t1 ffint.d.w $fa4, $fa4 - vpickve2gr.w $t0, $vr3, 0 - movgr2fr.w $fa3, $t0 + vpickve2gr.w $t1, $vr3, 0 + movgr2fr.w $fa3, $t1 ffint.d.w $fa3, $fa3 - vld $vr6, $a6, 0 + vld $vr6, $a7, 0 vextrins.d $vr3, $vr4, 16 vfmul.d $vr3, $vr3, $vr2 vfmul.d $vr4, $vr5, $vr2 @@ -2494,103 +2490,101 @@ partQ__align: # @partQ__align vreplvei.d $vr4, $vr4, 1 fcvt.s.d $fa4, $fa4 vextrins.w $vr3, $vr4, 48 - vst $vr3, $a6, 0 + vst $vr3, $a7, 0 vaddi.wu $vr1, $vr1, 4 - addi.d $a7, $a7, -4 - addi.d $a6, $a6, 16 - bnez $a7, .LBB4_70 + addi.d $t0, $t0, -4 + addi.d $a7, $a7, 16 + bnez $t0, .LBB4_70 # %bb.71: # %middle.block1187 - beq $a3, $a5, .LBB4_74 + beq $a4, $a6, .LBB4_74 .LBB4_72: # %scalar.ph1173.preheader - mul.d $a3, $a1, $a4 - ld.d $a5, $sp, 392 # 8-byte Folded Reload - alsl.d $a5, $a4, $a5, 2 - sub.d $a2, $a2, $a4 + mul.d $a4, $a2, $a5 + ld.d $a6, $sp, 400 # 8-byte Folded Reload + alsl.d $a6, $a5, $a6, 2 + sub.d $a3, $a3, $a5 vldi $vr0, -800 .p2align 4, , 16 .LBB4_73: # %scalar.ph1173 # =>This Inner Loop Header: Depth=1 - fld.s $fa1, $a5, 0 - movgr2fr.w $fa2, $a3 + fld.s $fa1, $a6, 0 + movgr2fr.w $fa2, $a4 ffint.d.w $fa2, $fa2 fmul.d $fa2, $fa2, $fa0 fcvt.d.s $fa1, $fa1 fadd.d $fa1, $fa1, $fa2 fcvt.s.d $fa1, $fa1 - fst.s $fa1, $a5, 0 - add.w $a3, $a3, $a1 - addi.d $a2, $a2, -1 - addi.d $a5, $a5, 4 - bnez $a2, .LBB4_73 + fst.s $fa1, $a6, 0 + add.w $a4, $a4, $a2 + addi.d $a3, $a3, -1 + addi.d $a6, $a6, 4 + bnez $a3, .LBB4_73 .LBB4_74: # %.loopexit639 - ld.d $a1, $sp, 320 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(partQ__align.m) - st.w $zero, $a1, 0 - blez $t4, .LBB4_150 + ld.d $a2, $sp, 328 # 8-byte Folded Reload + ld.d $a2, $a2, %pc_lo12(partQ__align.m) + st.w $zero, $a2, 0 + blez $t5, .LBB4_150 .LBB4_75: # %.lr.ph672 - ld.d $a2, $sp, 56 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(partQ__align.mp) - addi.d $a3, $t8, 1 - bstrpick.d $a3, $a3, 31, 0 - addi.d $a4, $a3, -1 - ori $a6, $zero, 8 - ori $a5, $zero, 1 - bltu $a4, $a6, .LBB4_80 + ld.d $a3, $sp, 64 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(partQ__align.mp) + addi.d $a4, $t8, 1 + bstrpick.d $a4, $a4, 31, 0 + addi.d $a5, $a4, -1 + ori $a7, $zero, 8 + ori $a6, $zero, 1 + bltu $a5, $a7, .LBB4_80 # %bb.76: # %vector.memcheck1266 - sub.d $a6, $a1, $t5 - addi.d $a6, $a6, 4 - ori $a7, $zero, 32 - bltu $a6, $a7, .LBB4_80 + sub.d $a7, $a2, $t6 + addi.d $a7, $a7, 4 + ori $t0, $zero, 32 + bltu $a7, $t0, .LBB4_80 # %bb.77: # %vector.ph1270 - move $a7, $zero - move $a6, $a4 - bstrins.d $a6, $zero, 2, 0 - ori $t0, $zero, 1 - move $a5, $a4 - bstrins.d $a5, $t0, 2, 0 + move $t0, $zero + move $a7, $a5 + bstrins.d $a7, $zero, 2, 0 + ori $t1, $zero, 1 + move $a6, $a5 + bstrins.d $a6, $t1, 2, 0 vreplvei.w $vr0, $vr8, 0 - addi.d $t0, $a2, 20 + addi.d $t1, $a3, 20 vrepli.b $vr1, 0 - lu12i.w $t1, 287172 - vreplgr2vr.w $vr2, $t1 - move $t1, $a6 + vreplgr2vr.w $vr2, $a0 + move $t2, $a7 .p2align 4, , 16 .LBB4_78: # %vector.body1275 # =>This Inner Loop Header: Depth=1 - add.d $t2, $t0, $a7 - add.d $t3, $t5, $a7 - vldx $vr3, $t5, $a7 - vld $vr4, $t3, 16 - vst $vr1, $t2, -16 - vstx $vr1, $t0, $a7 + add.d $t3, $t1, $t0 + add.d $t4, $t6, $t0 + vldx $vr3, $t6, $t0 + vld $vr4, $t4, 16 + vst $vr1, $t3, -16 + vstx $vr1, $t1, $t0 vfmadd.s $vr3, $vr0, $vr2, $vr3 vfmadd.s $vr4, $vr0, $vr2, $vr4 - add.d $t2, $a1, $a7 - vst $vr3, $t2, 4 - vst $vr4, $t2, 20 - addi.d $t1, $t1, -8 - addi.d $a7, $a7, 32 - bnez $t1, .LBB4_78 + add.d $t3, $a2, $t0 + vst $vr3, $t3, 4 + vst $vr4, $t3, 20 + addi.d $t2, $t2, -8 + addi.d $t0, $t0, 32 + bnez $t2, .LBB4_78 # %bb.79: # %middle.block1281 - beq $a4, $a6, .LBB4_82 + beq $a5, $a7, .LBB4_82 .LBB4_80: # %scalar.ph1268.preheader - ld.d $a4, $sp, 232 # 8-byte Folded Reload - fld.s $fa0, $a4, %pc_lo12(.LCPI4_2) - slli.d $a4, $a5, 2 - addi.d $a6, $t5, -4 - sub.d $a3, $a3, $a5 + slli.d $a5, $a6, 2 + addi.d $a7, $t6, -4 + sub.d $a4, $a4, $a6 + movgr2fr.w $fa0, $a0 .p2align 4, , 16 .LBB4_81: # %scalar.ph1268 # =>This Inner Loop Header: Depth=1 - fldx.s $fa1, $a6, $a4 - stx.w $zero, $a2, $a4 + fldx.s $fa1, $a7, $a5 + stx.w $zero, $a3, $a5 fmadd.s $fa1, $ft0, $fa0, $fa1 - fstx.s $fa1, $a1, $a4 - addi.d $a3, $a3, -1 - addi.d $a4, $a4, 4 - bnez $a3, .LBB4_81 + fstx.s $fa1, $a2, $a5 + addi.d $a4, $a4, -1 + addi.d $a5, $a5, 4 + bnez $a4, .LBB4_81 .LBB4_82: - st.d $zero, $sp, 352 # 8-byte Folded Spill + st.d $zero, $sp, 360 # 8-byte Folded Spill b .LBB4_151 .LBB4_83: # %vector.memcheck860 alsl.d $s2, $t1, $a2, 2 @@ -2599,160 +2593,162 @@ partQ__align: # @partQ__align sltu $t4, $a4, $s2 and $t4, $t2, $t4 move $t2, $zero - bnez $t4, .LBB4_339 + bnez $t4, .LBB4_344 # %bb.84: # %vector.memcheck860 alsl.d $s0, $t1, $a5, 2 sltu $t4, $a2, $s0 sltu $t5, $a5, $s2 and $t4, $t4, $t5 - bnez $t4, .LBB4_339 + bnez $t4, .LBB4_344 # %bb.85: # %vector.memcheck860 alsl.d $t5, $t1, $a6, 2 sltu $t4, $a2, $t5 sltu $t6, $a6, $s2 and $t4, $t4, $t6 - bnez $t4, .LBB4_339 + bnez $t4, .LBB4_344 # %bb.86: # %vector.memcheck860 + move $s4, $s3 alsl.d $t4, $t1, $t0, 2 sltu $t6, $a2, $t4 sltu $t7, $t0, $s2 and $t6, $t6, $t7 - bnez $t6, .LBB4_339 + bnez $t6, .LBB4_341 # %bb.87: # %vector.memcheck860 alsl.d $t6, $t1, $a0, 2 sltu $t7, $a2, $t6 sltu $t8, $a0, $s2 and $t7, $t7, $t8 - bnez $t7, .LBB4_339 + bnez $t7, .LBB4_341 # %bb.88: # %vector.memcheck860 alsl.d $t7, $t1, $a1, 2 sltu $t8, $a2, $t7 sltu $fp, $a1, $s2 and $t8, $t8, $fp - bnez $t8, .LBB4_339 + bnez $t8, .LBB4_341 # %bb.89: # %vector.memcheck860 alsl.d $t8, $t1, $a3, 2 sltu $fp, $a2, $t8 sltu $s3, $a3, $s2 and $fp, $fp, $s3 - bnez $fp, .LBB4_339 + bnez $fp, .LBB4_341 # %bb.90: # %vector.memcheck860 alsl.d $fp, $t1, $a7, 2 sltu $s3, $a2, $fp sltu $s2, $a7, $s2 and $s2, $s3, $s2 - bnez $s2, .LBB4_339 + bnez $s2, .LBB4_341 # %bb.91: # %vector.memcheck860 sltu $s2, $a4, $s0 sltu $s3, $a5, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB4_339 + bnez $s2, .LBB4_341 # %bb.92: # %vector.memcheck860 sltu $s2, $a4, $t5 sltu $s3, $a6, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB4_339 + bnez $s2, .LBB4_341 # %bb.93: # %vector.memcheck860 sltu $s2, $a4, $t4 sltu $s3, $t0, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB4_339 + bnez $s2, .LBB4_341 # %bb.94: # %vector.memcheck860 sltu $s2, $a4, $t6 sltu $s3, $a0, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB4_339 + bnez $s2, .LBB4_341 # %bb.95: # %vector.memcheck860 sltu $s2, $a4, $t7 sltu $s3, $a1, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB4_339 + bnez $s2, .LBB4_341 # %bb.96: # %vector.memcheck860 sltu $s2, $a4, $t8 sltu $s3, $a3, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB4_339 + bnez $s2, .LBB4_341 # %bb.97: # %vector.memcheck860 sltu $s2, $a4, $fp sltu $s1, $a7, $s1 and $s1, $s2, $s1 - bnez $s1, .LBB4_339 + bnez $s1, .LBB4_341 # %bb.98: # %vector.memcheck860 sltu $s1, $a5, $t5 sltu $s2, $a6, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB4_339 + bnez $s1, .LBB4_341 # %bb.99: # %vector.memcheck860 sltu $s1, $a5, $t4 sltu $s2, $t0, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB4_339 + bnez $s1, .LBB4_341 # %bb.100: # %vector.memcheck860 sltu $s1, $a5, $t6 sltu $s2, $a0, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB4_339 + bnez $s1, .LBB4_341 # %bb.101: # %vector.memcheck860 sltu $s1, $a5, $t7 sltu $s2, $a1, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB4_339 + bnez $s1, .LBB4_341 # %bb.102: # %vector.memcheck860 sltu $s1, $a5, $t8 sltu $s2, $a3, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB4_339 + bnez $s1, .LBB4_341 # %bb.103: # %vector.memcheck860 sltu $s1, $a5, $fp sltu $s0, $a7, $s0 and $s0, $s1, $s0 - ld.d $s2, $sp, 408 # 8-byte Folded Reload - bnez $s0, .LBB4_340 + ld.d $s2, $sp, 416 # 8-byte Folded Reload + bnez $s0, .LBB4_342 # %bb.104: # %vector.memcheck860 sltu $s0, $a6, $t4 sltu $s1, $t0, $t5 and $s0, $s0, $s1 - bnez $s0, .LBB4_340 + bnez $s0, .LBB4_342 # %bb.105: # %vector.memcheck860 sltu $s0, $a6, $t6 sltu $s1, $a0, $t5 and $s0, $s0, $s1 - bnez $s0, .LBB4_340 + bnez $s0, .LBB4_342 # %bb.106: # %vector.memcheck860 sltu $s0, $a6, $t7 sltu $s1, $a1, $t5 and $s0, $s0, $s1 - bnez $s0, .LBB4_340 + bnez $s0, .LBB4_342 # %bb.107: # %vector.memcheck860 sltu $s0, $a6, $t8 sltu $s1, $a3, $t5 and $s0, $s0, $s1 - bnez $s0, .LBB4_340 + bnez $s0, .LBB4_342 # %bb.108: # %vector.memcheck860 sltu $s0, $a6, $fp sltu $t5, $a7, $t5 and $t5, $s0, $t5 - bnez $t5, .LBB4_340 + bnez $t5, .LBB4_342 # %bb.109: # %vector.memcheck860 sltu $t5, $t0, $t6 sltu $t6, $a0, $t4 and $t5, $t5, $t6 - ld.d $s6, $sp, 144 # 8-byte Folded Reload - bnez $t5, .LBB4_35 + ld.d $s6, $sp, 152 # 8-byte Folded Reload + bnez $t5, .LBB4_340 # %bb.110: # %vector.memcheck860 sltu $t5, $t0, $t7 sltu $t6, $a1, $t4 and $t5, $t5, $t6 - bnez $t5, .LBB4_35 + bnez $t5, .LBB4_340 # %bb.111: # %vector.memcheck860 sltu $t5, $t0, $t8 sltu $t6, $a3, $t4 and $t5, $t5, $t6 - bnez $t5, .LBB4_35 + bnez $t5, .LBB4_340 # %bb.112: # %vector.memcheck860 sltu $t5, $t0, $fp sltu $t4, $a7, $t4 and $t4, $t5, $t4 + move $s3, $s4 bnez $t4, .LBB4_35 # %bb.113: # %vector.ph986 bstrpick.d $t2, $t3, 31, 2 @@ -2905,8 +2901,8 @@ partQ__align: # @partQ__align addi.d $t3, $t3, 16 bnez $s5, .LBB4_114 # %bb.115: # %middle.block1003 - ld.d $s5, $sp, 48 # 8-byte Folded Reload - ld.d $s6, $sp, 144 # 8-byte Folded Reload + ld.d $s3, $sp, 56 # 8-byte Folded Reload + ld.d $s6, $sp, 152 # 8-byte Folded Reload bne $t1, $t2, .LBB4_35 b .LBB4_37 .LBB4_116: # %vector.memcheck1006 @@ -2916,159 +2912,161 @@ partQ__align: # @partQ__align sltu $t4, $a4, $s2 and $t4, $t2, $t4 move $t2, $zero - bnez $t4, .LBB4_341 + bnez $t4, .LBB4_345 # %bb.117: # %vector.memcheck1006 alsl.d $s0, $t1, $a5, 2 sltu $t4, $a2, $s0 sltu $t5, $a5, $s2 and $t4, $t4, $t5 - bnez $t4, .LBB4_341 + bnez $t4, .LBB4_345 # %bb.118: # %vector.memcheck1006 alsl.d $t5, $t1, $a6, 2 sltu $t4, $a2, $t5 sltu $t6, $a6, $s2 and $t4, $t4, $t6 - bnez $t4, .LBB4_341 + bnez $t4, .LBB4_345 # %bb.119: # %vector.memcheck1006 + move $s4, $s3 alsl.d $t4, $t1, $t0, 2 sltu $t6, $a2, $t4 sltu $t7, $t0, $s2 and $t6, $t6, $t7 - bnez $t6, .LBB4_341 + bnez $t6, .LBB4_343 # %bb.120: # %vector.memcheck1006 alsl.d $t6, $t1, $a0, 2 sltu $t7, $a2, $t6 sltu $t8, $a0, $s2 and $t7, $t7, $t8 - bnez $t7, .LBB4_341 + bnez $t7, .LBB4_343 # %bb.121: # %vector.memcheck1006 alsl.d $t7, $t1, $a1, 2 sltu $t8, $a2, $t7 sltu $fp, $a1, $s2 and $t8, $t8, $fp - bnez $t8, .LBB4_341 + bnez $t8, .LBB4_343 # %bb.122: # %vector.memcheck1006 alsl.d $t8, $t1, $a3, 2 sltu $fp, $a2, $t8 sltu $s3, $a3, $s2 and $fp, $fp, $s3 - bnez $fp, .LBB4_341 + bnez $fp, .LBB4_343 # %bb.123: # %vector.memcheck1006 alsl.d $fp, $t1, $a7, 2 sltu $s3, $a2, $fp sltu $s2, $a7, $s2 and $s2, $s3, $s2 - bnez $s2, .LBB4_341 + bnez $s2, .LBB4_343 # %bb.124: # %vector.memcheck1006 sltu $s2, $a4, $s0 sltu $s3, $a5, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB4_341 + bnez $s2, .LBB4_343 # %bb.125: # %vector.memcheck1006 sltu $s2, $a4, $t5 sltu $s3, $a6, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB4_341 + bnez $s2, .LBB4_343 # %bb.126: # %vector.memcheck1006 sltu $s2, $a4, $t4 sltu $s3, $t0, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB4_341 + bnez $s2, .LBB4_343 # %bb.127: # %vector.memcheck1006 sltu $s2, $a4, $t6 sltu $s3, $a0, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB4_341 + bnez $s2, .LBB4_343 # %bb.128: # %vector.memcheck1006 sltu $s2, $a4, $t7 sltu $s3, $a1, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB4_341 + bnez $s2, .LBB4_343 # %bb.129: # %vector.memcheck1006 sltu $s2, $a4, $t8 sltu $s3, $a3, $s1 and $s2, $s2, $s3 - bnez $s2, .LBB4_341 + bnez $s2, .LBB4_343 # %bb.130: # %vector.memcheck1006 sltu $s2, $a4, $fp sltu $s1, $a7, $s1 and $s1, $s2, $s1 - bnez $s1, .LBB4_341 + bnez $s1, .LBB4_343 # %bb.131: # %vector.memcheck1006 sltu $s1, $a5, $t5 sltu $s2, $a6, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB4_341 + bnez $s1, .LBB4_343 # %bb.132: # %vector.memcheck1006 sltu $s1, $a5, $t4 sltu $s2, $t0, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB4_341 + bnez $s1, .LBB4_343 # %bb.133: # %vector.memcheck1006 sltu $s1, $a5, $t6 sltu $s2, $a0, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB4_341 + bnez $s1, .LBB4_343 # %bb.134: # %vector.memcheck1006 sltu $s1, $a5, $t7 sltu $s2, $a1, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB4_341 + bnez $s1, .LBB4_343 # %bb.135: # %vector.memcheck1006 sltu $s1, $a5, $t8 sltu $s2, $a3, $s0 and $s1, $s1, $s2 - bnez $s1, .LBB4_341 + bnez $s1, .LBB4_343 # %bb.136: # %vector.memcheck1006 sltu $s1, $a5, $fp sltu $s0, $a7, $s0 and $s0, $s1, $s0 - ld.d $s2, $sp, 408 # 8-byte Folded Reload - bnez $s0, .LBB4_40 + ld.d $s2, $sp, 416 # 8-byte Folded Reload + bnez $s0, .LBB4_338 # %bb.137: # %vector.memcheck1006 sltu $s0, $a6, $t4 sltu $s1, $t0, $t5 and $s0, $s0, $s1 - bnez $s0, .LBB4_40 + bnez $s0, .LBB4_338 # %bb.138: # %vector.memcheck1006 sltu $s0, $a6, $t6 sltu $s1, $a0, $t5 and $s0, $s0, $s1 - bnez $s0, .LBB4_40 + bnez $s0, .LBB4_338 # %bb.139: # %vector.memcheck1006 sltu $s0, $a6, $t7 sltu $s1, $a1, $t5 and $s0, $s0, $s1 - bnez $s0, .LBB4_40 + bnez $s0, .LBB4_338 # %bb.140: # %vector.memcheck1006 sltu $s0, $a6, $t8 sltu $s1, $a3, $t5 and $s0, $s0, $s1 - bnez $s0, .LBB4_40 + bnez $s0, .LBB4_338 # %bb.141: # %vector.memcheck1006 sltu $s0, $a6, $fp sltu $t5, $a7, $t5 and $t5, $s0, $t5 - bnez $t5, .LBB4_40 + bnez $t5, .LBB4_338 # %bb.142: # %vector.memcheck1006 sltu $t5, $t0, $t6 sltu $t6, $a0, $t4 and $t5, $t5, $t6 - bnez $t5, .LBB4_40 + bnez $t5, .LBB4_338 # %bb.143: # %vector.memcheck1006 sltu $t5, $t0, $t7 sltu $t6, $a1, $t4 and $t5, $t5, $t6 - bnez $t5, .LBB4_40 + bnez $t5, .LBB4_338 # %bb.144: # %vector.memcheck1006 sltu $t5, $t0, $t8 sltu $t6, $a3, $t4 and $t5, $t5, $t6 - bnez $t5, .LBB4_40 + bnez $t5, .LBB4_338 # %bb.145: # %vector.memcheck1006 sltu $t5, $t0, $fp sltu $t4, $a7, $t4 and $t4, $t5, $t4 + move $s3, $s4 bnez $t4, .LBB4_40 # %bb.146: # %vector.ph1137 bstrpick.d $t2, $t3, 31, 2 @@ -3221,158 +3219,160 @@ partQ__align: # @partQ__align addi.d $t3, $t3, 16 bnez $s5, .LBB4_147 # %bb.148: # %middle.block1156 - ld.d $s5, $sp, 48 # 8-byte Folded Reload + ld.d $s3, $sp, 56 # 8-byte Folded Reload bne $t1, $t2, .LBB4_40 b .LBB4_42 .LBB4_149: # %.loopexit639.thread - ld.d $a1, $sp, 320 # 8-byte Folded Reload - ld.d $a1, $a1, %pc_lo12(partQ__align.m) - st.w $zero, $a1, 0 + ld.d $a2, $sp, 328 # 8-byte Folded Reload + ld.d $a2, $a2, %pc_lo12(partQ__align.m) + st.w $zero, $a2, 0 .LBB4_150: # %._crit_edge673 - ori $a2, $zero, 1 - st.d $a2, $sp, 352 # 8-byte Folded Spill - beqz $t4, .LBB4_279 + ori $a3, $zero, 1 + st.d $a3, $sp, 360 # 8-byte Folded Spill + beqz $t5, .LBB4_279 .LBB4_151: # %._crit_edge673.thread - ori $a2, $zero, 0 - lu32i.d $a2, -1 - add.d $a2, $a0, $a2 - srai.d $a2, $a2, 30 - fldx.s $fa0, $t5, $a2 + ori $a3, $zero, 0 + lu32i.d $a3, -1 + add.d $a3, $a1, $a3 + srai.d $a3, $a3, 30 + fldx.s $fa0, $t6, $a3 .LBB4_152: - st.d $s0, $sp, 256 # 8-byte Folded Spill - ld.w $s0, $s5, 0 - ld.d $a2, $sp, 128 # 8-byte Folded Reload - ld.d $a3, $a2, %pc_lo12(partQ__align.lastverticalw) - sltu $a2, $zero, $s0 - add.w $a4, $a2, $t7 - ori $a2, $zero, 2 - st.d $a3, $sp, 344 # 8-byte Folded Spill - fst.s $fa0, $a3, 0 - st.d $a4, $sp, 336 # 8-byte Folded Spill - blt $a4, $a2, .LBB4_167 + st.d $s0, $sp, 264 # 8-byte Folded Spill + ld.w $s0, $s3, 0 + ld.d $a3, $sp, 136 # 8-byte Folded Reload + ld.d $a4, $a3, %pc_lo12(partQ__align.lastverticalw) + sltu $a3, $zero, $s0 + add.w $a5, $a3, $t7 + ori $a3, $zero, 2 + st.d $a4, $sp, 352 # 8-byte Folded Spill + fst.s $fa0, $a4, 0 + st.d $a5, $sp, 344 # 8-byte Folded Spill + blt $a5, $a3, .LBB4_167 # %bb.153: # %.lr.ph709 - st.d $s5, $sp, 8 # 8-byte Folded Spill - st.d $s6, $sp, 304 # 8-byte Folded Spill + st.d $s3, $sp, 16 # 8-byte Folded Spill st.d $s1, $sp, 312 # 8-byte Folded Spill st.d $fp, $sp, 320 # 8-byte Folded Spill - st.d $s7, $sp, 328 # 8-byte Folded Spill - st.d $s8, $sp, 16 # 8-byte Folded Spill - ld.d $a2, $sp, 96 # 8-byte Folded Reload - sltu $a2, $zero, $a2 - slt $a3, $zero, $t4 - and $a2, $a2, $a3 - st.d $a2, $sp, 296 # 8-byte Folded Spill - pcalau12i $a2, %pc_hi20(impmtx) - ld.d $a2, $a2, %pc_lo12(impmtx) - st.d $a2, $sp, 224 # 8-byte Folded Spill - bstrpick.d $a2, $t8, 30, 0 - st.d $a2, $sp, 216 # 8-byte Folded Spill - ld.d $a2, $sp, 40 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(partQ__align.ijp) - st.d $a2, $sp, 208 # 8-byte Folded Spill - ld.d $a2, $sp, 56 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(partQ__align.mp) + st.d $s5, $sp, 328 # 8-byte Folded Spill + st.d $s6, $sp, 336 # 8-byte Folded Spill + st.d $s8, $sp, 32 # 8-byte Folded Spill + ld.d $a3, $sp, 104 # 8-byte Folded Reload + sltu $a3, $zero, $a3 + slt $a4, $zero, $t5 + and $a3, $a3, $a4 + st.d $a3, $sp, 304 # 8-byte Folded Spill + pcalau12i $a3, %pc_hi20(impmtx) + ld.d $a3, $a3, %pc_lo12(impmtx) + st.d $a3, $sp, 232 # 8-byte Folded Spill + bstrpick.d $a3, $t8, 30, 0 + st.d $a3, $sp, 224 # 8-byte Folded Spill + ld.d $a3, $sp, 48 # 8-byte Folded Reload + ld.d $a3, $a3, %pc_lo12(partQ__align.ijp) + st.d $a3, $sp, 216 # 8-byte Folded Spill ld.d $a3, $sp, 64 # 8-byte Folded Reload - ld.d $a3, $a3, %pc_lo12(partQ__align.fg_t_og_h_dg_n2_p) + ld.d $a3, $a3, %pc_lo12(partQ__align.mp) ld.d $a4, $sp, 72 # 8-byte Folded Reload - ld.d $a4, $a4, %pc_lo12(partQ__align.gapz_n2) + ld.d $a4, $a4, %pc_lo12(partQ__align.fg_t_og_h_dg_n2_p) ld.d $a5, $sp, 80 # 8-byte Folded Reload - ld.d $a5, $a5, %pc_lo12(partQ__align.og_t_fg_h_dg_n2_p) - ld.d $a6, $sp, 280 # 8-byte Folded Reload - ld.d $a6, $a6, %pc_lo12(partQ__align.og_h_dg_n2_p) - ld.d $a7, $sp, 168 # 8-byte Folded Reload - ld.d $a7, $a7, %pc_lo12(partQ__align.fg_h_dg_n2_p) - addi.d $t3, $a4, 8 - ld.d $a4, $sp, 192 # 8-byte Folded Reload - ld.d $a4, $a4, %pc_lo12(partQ__align.fgcp2g) - ld.d $t0, $sp, 200 # 8-byte Folded Reload - ld.d $t0, $t0, %pc_lo12(partQ__align.ogcp2g) - ld.d $t1, $sp, 88 # 8-byte Folded Reload - ld.d $t1, $t1, %pc_lo12(partQ__align.fg_t_og_h_dg_n1_p) - st.d $t1, $sp, 200 # 8-byte Folded Spill - ld.d $t1, $sp, 160 # 8-byte Folded Reload - ld.d $t1, $t1, %pc_lo12(partQ__align.og_t_fg_h_dg_n1_p) - st.d $t1, $sp, 192 # 8-byte Folded Spill - ld.d $t1, $sp, 184 # 8-byte Folded Reload - ld.d $t1, $t1, %pc_lo12(partQ__align.og_h_dg_n1_p) - st.d $t1, $sp, 184 # 8-byte Folded Spill - ld.d $t1, $sp, 176 # 8-byte Folded Reload - ld.d $t1, $t1, %pc_lo12(partQ__align.fg_h_dg_n1_p) - st.d $t1, $sp, 176 # 8-byte Folded Spill - ld.d $t1, $sp, 152 # 8-byte Folded Reload - ld.d $t1, $t1, %pc_lo12(partQ__align.gapz_n1) - st.d $t1, $sp, 168 # 8-byte Folded Spill - ld.d $t1, $sp, 288 # 8-byte Folded Reload - ld.d $t1, $t1, %pc_lo12(partQ__align.fgcp1g) - st.d $t1, $sp, 160 # 8-byte Folded Spill - ld.d $t1, $sp, 376 # 8-byte Folded Reload - ld.d $t1, $t1, %pc_lo12(partQ__align.ogcp1g) - st.d $t1, $sp, 152 # 8-byte Folded Spill - ori $t1, $zero, 0 - lu32i.d $t1, -1 - add.d $a0, $a0, $t1 - srai.d $a0, $a0, 30 - st.d $a0, $sp, 288 # 8-byte Folded Spill - addi.d $s2, $a1, 4 - addi.d $s8, $a2, 4 - addi.d $s0, $a3, 4 - addi.d $fp, $a5, 4 - addi.d $s1, $a6, 4 - addi.d $s5, $a7, 4 - addi.d $s6, $a4, 4 - addi.d $s3, $t0, 4 + ld.d $a5, $a5, %pc_lo12(partQ__align.gapz_n2) + ld.d $a6, $sp, 88 # 8-byte Folded Reload + ld.d $a6, $a6, %pc_lo12(partQ__align.og_t_fg_h_dg_n2_p) + ld.d $a7, $sp, 288 # 8-byte Folded Reload + ld.d $a7, $a7, %pc_lo12(partQ__align.og_h_dg_n2_p) + ld.d $t0, $sp, 176 # 8-byte Folded Reload + ld.d $t0, $t0, %pc_lo12(partQ__align.fg_h_dg_n2_p) + addi.d $t4, $a5, 8 + ld.d $a5, $sp, 200 # 8-byte Folded Reload + ld.d $a5, $a5, %pc_lo12(partQ__align.fgcp2g) + ld.d $t1, $sp, 208 # 8-byte Folded Reload + ld.d $t1, $t1, %pc_lo12(partQ__align.ogcp2g) + ld.d $t2, $sp, 96 # 8-byte Folded Reload + ld.d $t2, $t2, %pc_lo12(partQ__align.fg_t_og_h_dg_n1_p) + st.d $t2, $sp, 208 # 8-byte Folded Spill + ld.d $t2, $sp, 168 # 8-byte Folded Reload + ld.d $t2, $t2, %pc_lo12(partQ__align.og_t_fg_h_dg_n1_p) + st.d $t2, $sp, 200 # 8-byte Folded Spill + ld.d $t2, $sp, 192 # 8-byte Folded Reload + ld.d $t2, $t2, %pc_lo12(partQ__align.og_h_dg_n1_p) + st.d $t2, $sp, 192 # 8-byte Folded Spill + ld.d $t2, $sp, 184 # 8-byte Folded Reload + ld.d $t2, $t2, %pc_lo12(partQ__align.fg_h_dg_n1_p) + st.d $t2, $sp, 184 # 8-byte Folded Spill + ld.d $t2, $sp, 160 # 8-byte Folded Reload + ld.d $t2, $t2, %pc_lo12(partQ__align.gapz_n1) + st.d $t2, $sp, 176 # 8-byte Folded Spill + ld.d $t2, $sp, 296 # 8-byte Folded Reload + ld.d $t2, $t2, %pc_lo12(partQ__align.fgcp1g) + st.d $t2, $sp, 168 # 8-byte Folded Spill + ld.d $t2, $sp, 384 # 8-byte Folded Reload + ld.d $t2, $t2, %pc_lo12(partQ__align.ogcp1g) + st.d $t2, $sp, 160 # 8-byte Folded Spill + ori $t2, $zero, 0 + lu32i.d $t2, -1 + add.d $a1, $a1, $t2 + srai.d $a1, $a1, 30 + st.d $a1, $sp, 296 # 8-byte Folded Spill + addi.d $s2, $a2, 4 + addi.d $s8, $a3, 4 + addi.d $s0, $a4, 4 + addi.d $fp, $a6, 4 + addi.d $s1, $a7, 4 + addi.d $s5, $t0, 4 + addi.d $s6, $a5, 4 + addi.d $s3, $t1, 4 movgr2fr.w $fs0, $zero ori $s7, $zero, 1 - st.d $t3, $sp, 280 # 8-byte Folded Spill + movgr2fr.w $fs1, $a0 + ld.d $t2, $sp, 392 # 8-byte Folded Reload + st.d $t4, $sp, 288 # 8-byte Folded Spill b .LBB4_155 .p2align 4, , 16 .LBB4_154: # %._crit_edge701 # in Loop: Header=BB4_155 Depth=1 - ld.d $a2, $sp, 288 # 8-byte Folded Reload + ld.d $a2, $sp, 296 # 8-byte Folded Reload fldx.s $fa0, $s4, $a2 - ld.d $a2, $sp, 344 # 8-byte Folded Reload + ld.d $a2, $sp, 352 # 8-byte Folded Reload fstx.s $fa0, $a2, $a0 move $s7, $a1 - move $t5, $s4 - ld.d $a0, $sp, 336 # 8-byte Folded Reload + move $t6, $s4 + ld.d $a0, $sp, 344 # 8-byte Folded Reload beq $a1, $a0, .LBB4_168 .LBB4_155: # =>This Loop Header: Depth=1 # Child Loop BB4_157 Depth 2 # Child Loop BB4_161 Depth 2 addi.d $a0, $s7, -1 - st.d $a0, $sp, 376 # 8-byte Folded Spill + st.d $a0, $sp, 384 # 8-byte Folded Spill slli.d $a0, $a0, 2 - ld.d $a1, $sp, 392 # 8-byte Folded Reload + ld.d $a1, $sp, 400 # 8-byte Folded Reload fldx.s $fa0, $a1, $a0 - move $s4, $t6 - st.d $t5, $sp, 384 # 8-byte Folded Spill - fst.s $fa0, $t5, 0 - move $a0, $t6 - ld.d $a1, $sp, 320 # 8-byte Folded Reload - ld.d $a2, $sp, 328 # 8-byte Folded Reload + move $s4, $t2 + st.d $t6, $sp, 392 # 8-byte Folded Spill + fst.s $fa0, $t6, 0 + move $a0, $t2 + ld.d $a1, $sp, 328 # 8-byte Folded Reload + ld.d $a2, $sp, 336 # 8-byte Folded Reload move $a3, $s7 - move $a4, $t4 - ld.d $a5, $sp, 312 # 8-byte Folded Reload - ld.d $a6, $sp, 304 # 8-byte Folded Reload + move $a4, $t5 + ld.d $a5, $sp, 320 # 8-byte Folded Reload + ld.d $a6, $sp, 312 # 8-byte Folded Reload move $a7, $zero pcaddu18i $ra, %call36(match_calc) jirl $ra, $ra, 0 slli.d $a0, $s7, 2 - ld.d $a6, $sp, 360 # 8-byte Folded Reload - ld.d $a1, $sp, 296 # 8-byte Folded Reload + ld.d $a6, $sp, 368 # 8-byte Folded Reload + ld.d $a1, $sp, 304 # 8-byte Folded Reload beqz $a1, .LBB4_158 # %bb.156: # %.lr.ph.i528 # in Loop: Header=BB4_155 Depth=1 - ld.d $a1, $sp, 264 # 8-byte Folded Reload + ld.d $a1, $sp, 272 # 8-byte Folded Reload ldx.w $a1, $a1, $a0 - ld.d $a2, $sp, 256 # 8-byte Folded Reload + ld.d $a2, $sp, 264 # 8-byte Folded Reload add.w $a1, $a1, $a2 slli.d $a1, $a1, 3 - ld.d $a2, $sp, 224 # 8-byte Folded Reload + ld.d $a2, $sp, 232 # 8-byte Folded Reload ldx.d $a1, $a2, $a1 - ld.d $a2, $sp, 272 # 8-byte Folded Reload + ld.d $a2, $sp, 280 # 8-byte Folded Reload move $a3, $s4 - ld.d $a4, $sp, 216 # 8-byte Folded Reload + ld.d $a4, $sp, 224 # 8-byte Folded Reload .p2align 4, , 16 .LBB4_157: # Parent Loop BB4_155 Depth=1 # => This Inner Loop Header: Depth=2 @@ -3389,45 +3389,43 @@ partQ__align: # @partQ__align bnez $a4, .LBB4_157 .LBB4_158: # %part_imp_match_out_vead_gapmapQ.exit533 # in Loop: Header=BB4_155 Depth=1 - ld.d $a1, $sp, 392 # 8-byte Folded Reload + ld.d $a1, $sp, 400 # 8-byte Folded Reload fldx.s $fa0, $a1, $a0 fst.s $fa0, $s4, 0 addi.d $a1, $s7, 1 - ld.d $t4, $sp, 368 # 8-byte Folded Reload - ld.d $t6, $sp, 384 # 8-byte Folded Reload - ld.d $a2, $sp, 352 # 8-byte Folded Reload - ld.d $t3, $sp, 280 # 8-byte Folded Reload ld.d $t5, $sp, 376 # 8-byte Folded Reload + ld.d $t2, $sp, 392 # 8-byte Folded Reload + ld.d $a2, $sp, 360 # 8-byte Folded Reload + ld.d $t4, $sp, 288 # 8-byte Folded Reload + ld.d $t6, $sp, 384 # 8-byte Folded Reload bnez $a2, .LBB4_154 # %bb.159: # %.lr.ph700.preheader # in Loop: Header=BB4_155 Depth=1 move $a2, $zero move $a3, $zero move $a4, $zero - ld.d $a5, $sp, 200 # 8-byte Folded Reload + ld.d $a5, $sp, 208 # 8-byte Folded Reload fldx.s $fa0, $a5, $a0 - ld.d $a5, $sp, 192 # 8-byte Folded Reload + ld.d $a5, $sp, 200 # 8-byte Folded Reload fldx.s $fa1, $a5, $a0 - ld.d $a5, $sp, 184 # 8-byte Folded Reload + ld.d $a5, $sp, 192 # 8-byte Folded Reload fldx.s $fa2, $a5, $a0 - ld.d $a5, $sp, 176 # 8-byte Folded Reload + ld.d $a5, $sp, 184 # 8-byte Folded Reload fldx.s $fa3, $a5, $a0 - ld.d $a6, $sp, 168 # 8-byte Folded Reload + ld.d $a6, $sp, 176 # 8-byte Folded Reload fldx.s $fa4, $a6, $a0 slli.d $a5, $a1, 2 fldx.s $fa5, $a6, $a5 - ld.d $a5, $sp, 160 # 8-byte Folded Reload + ld.d $a5, $sp, 168 # 8-byte Folded Reload fldx.s $fa6, $a5, $a0 - fld.s $ft0, $t6, 0 - ld.d $a5, $sp, 232 # 8-byte Folded Reload - fld.s $ft1, $a5, %pc_lo12(.LCPI4_2) + fld.s $ft0, $t2, 0 slli.d $a5, $s7, 3 - ld.d $a6, $sp, 208 # 8-byte Folded Reload + ld.d $a6, $sp, 216 # 8-byte Folded Reload ldx.d $a6, $a6, $a5 - ld.d $a5, $sp, 152 # 8-byte Folded Reload + ld.d $a5, $sp, 160 # 8-byte Folded Reload fldx.s $fa7, $a5, $a0 - vld $vr10, $sp, 240 # 16-byte Folded Reload - fmadd.s $ft0, $ft2, $ft1, $ft0 + vld $vr9, $sp, 240 # 16-byte Folded Reload + fmadd.s $ft0, $ft1, $fs1, $ft0 addi.d $a5, $s4, 4 addi.d $a6, $a6, 4 addi.d $a7, $zero, -1 @@ -3448,12 +3446,12 @@ partQ__align: # @partQ__align addi.d $a3, $a3, 4 addi.w $a2, $a2, 1 addi.d $a7, $a7, -1 - beq $t4, $a2, .LBB4_154 + beq $t5, $a2, .LBB4_154 .LBB4_161: # %.lr.ph700 # Parent Loop BB4_155 Depth=1 # => This Inner Loop Header: Depth=2 fldx.s $ft2, $s1, $a3 - fldx.s $ft1, $t6, $a3 + fldx.s $ft1, $t2, $a3 fldx.s $ft3, $s3, $a3 fmul.s $ft2, $fa7, $ft2 fadd.s $ft2, $ft1, $ft2 @@ -3476,7 +3474,7 @@ partQ__align: # @partQ__align stx.w $t0, $a6, $a3 fmov.s $fs0, $ft2 .LBB4_163: # in Loop: Header=BB4_161 Depth=2 - add.d $t0, $t3, $a3 + add.d $t0, $t4, $a3 fld.s $ft4, $t0, 0 fldx.s $ft3, $s2, $a3 fldx.s $ft2, $fp, $a3 @@ -3497,31 +3495,31 @@ partQ__align: # @partQ__align bcnez $fcc0, .LBB4_160 # %bb.166: # in Loop: Header=BB4_161 Depth=2 fstx.s $ft4, $s2, $a3 - stx.w $t5, $s8, $a3 + stx.w $t6, $s8, $a3 b .LBB4_160 .LBB4_167: movgr2fr.w $fs0, $zero - ld.d $s5, $sp, 136 # 8-byte Folded Reload + ld.d $s5, $sp, 144 # 8-byte Folded Reload bnez $s0, .LBB4_183 b .LBB4_169 .LBB4_168: # %._crit_edge710.loopexit - ld.d $a0, $sp, 8 # 8-byte Folded Reload + ld.d $a0, $sp, 16 # 8-byte Folded Reload ld.w $s0, $a0, 0 - move $t5, $s4 - ld.d $s8, $sp, 16 # 8-byte Folded Reload - ld.d $s5, $sp, 136 # 8-byte Folded Reload - ld.d $s2, $sp, 408 # 8-byte Folded Reload - ld.d $t7, $sp, 120 # 8-byte Folded Reload - ld.d $s3, $sp, 144 # 8-byte Folded Reload + move $t6, $s4 + ld.d $s8, $sp, 32 # 8-byte Folded Reload + ld.d $s5, $sp, 144 # 8-byte Folded Reload + ld.d $s2, $sp, 416 # 8-byte Folded Reload + ld.d $t7, $sp, 128 # 8-byte Folded Reload + ld.d $s4, $sp, 152 # 8-byte Folded Reload bnez $s0, .LBB4_183 .LBB4_169: # %.preheader637 - ld.d $a0, $sp, 352 # 8-byte Folded Reload + ld.d $a0, $sp, 360 # 8-byte Folded Reload bnez $a0, .LBB4_176 # %bb.170: # %.lr.ph715 pcalau12i $a0, %got_pc_hi20(offset) ld.d $a0, $a0, %got_pc_lo12(offset) ld.w $a0, $a0, 0 - ld.d $a1, $sp, 48 # 8-byte Folded Reload + ld.d $a1, $sp, 56 # 8-byte Folded Reload addi.d $a1, $a1, 1 bstrpick.d $a1, $a1, 31, 0 addi.d $a2, $a1, -1 @@ -3535,13 +3533,13 @@ partQ__align: # @partQ__align move $a3, $a2 bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 - ld.d $a5, $sp, 48 # 8-byte Folded Reload + ld.d $a5, $sp, 56 # 8-byte Folded Reload vreplgr2vr.d $vr1, $a5 - pcalau12i $a5, %pc_hi20(.LCPI4_3) - vld $vr2, $a5, %pc_lo12(.LCPI4_3) - pcalau12i $a5, %pc_hi20(.LCPI4_4) - vld $vr3, $a5, %pc_lo12(.LCPI4_4) - addi.d $a5, $t5, 4 + pcalau12i $a5, %pc_hi20(.LCPI4_1) + vld $vr2, $a5, %pc_lo12(.LCPI4_1) + pcalau12i $a5, %pc_hi20(.LCPI4_2) + vld $vr3, $a5, %pc_lo12(.LCPI4_2) + addi.d $a5, $t6, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr4, $a6 move $a6, $a4 @@ -3601,10 +3599,10 @@ partQ__align: # @partQ__align # %bb.173: # %middle.block1300 beq $a2, $a4, .LBB4_176 .LBB4_174: # %scalar.ph1284.preheader - ld.d $a2, $sp, 48 # 8-byte Folded Reload + ld.d $a2, $sp, 56 # 8-byte Folded Reload sub.w $a2, $a2, $a3 mul.d $a2, $a0, $a2 - alsl.d $a4, $a3, $t5, 2 + alsl.d $a4, $a3, $t6, 2 sub.d $a1, $a1, $a3 vldi $vr0, -800 .p2align 4, , 16 @@ -3623,7 +3621,7 @@ partQ__align: # @partQ__align addi.d $a4, $a4, 4 bnez $a1, .LBB4_175 .LBB4_176: # %.preheader636 - blez $s3, .LBB4_183 + blez $s4, .LBB4_183 # %bb.177: # %.lr.ph718 bstrpick.d $a0, $t7, 31, 0 pcalau12i $a1, %got_pc_hi20(offset) @@ -3633,7 +3631,7 @@ partQ__align: # @partQ__align ffint.d.w $fa1, $fa0 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $sp, 136 # 8-byte Folded Reload ld.d $a1, $a0, %pc_lo12(partQ__align.lastverticalw) fneg.d $fa1, $fa1 addi.d $a0, $t7, 1 @@ -3701,14 +3699,14 @@ partQ__align: # @partQ__align addi.d $a1, $a1, 4 bnez $a2, .LBB4_182 .LBB4_183: # %.loopexit - move $s1, $t5 - ld.d $a0, $sp, 128 # 8-byte Folded Reload + move $s1, $t6 + ld.d $a0, $sp, 136 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(partQ__align.lastverticalw) - ld.d $a0, $sp, 104 # 8-byte Folded Reload - ld.d $s3, $a0, %pc_lo12(partQ__align.mseq1) ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $s3, $a0, %pc_lo12(partQ__align.mseq1) + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $s6, $a0, %pc_lo12(partQ__align.mseq2) - ld.d $a0, $sp, 40 # 8-byte Folded Reload + ld.d $a0, $sp, 48 # 8-byte Folded Reload ld.d $s7, $a0, %pc_lo12(partQ__align.ijp) ld.d $a0, $s2, 0 pcaddu18i $ra, %call36(strlen) @@ -3721,11 +3719,11 @@ partQ__align: # @partQ__align addi.w $a2, $s4, 0 addi.w $a3, $a0, 0 add.w $a1, $a0, $s4 - ld.d $a4, $sp, 96 # 8-byte Folded Reload + ld.d $a4, $sp, 104 # 8-byte Folded Reload beqz $a4, .LBB4_190 # %bb.184: ori $a4, $zero, 1 - ld.d $ra, $sp, 400 # 8-byte Folded Reload + ld.d $ra, $sp, 408 # 8-byte Folded Reload beq $s0, $a4, .LBB4_201 # %bb.185: fld.s $fa0, $fp, 0 @@ -3757,7 +3755,7 @@ partQ__align: # @partQ__align .LBB4_190: ori $a4, $zero, 1 move $a5, $s0 - ld.d $s0, $sp, 400 # 8-byte Folded Reload + ld.d $s0, $sp, 408 # 8-byte Folded Reload ld.d $ra, $sp, 24 # 8-byte Folded Reload beq $a5, $a4, .LBB4_261 # %bb.191: @@ -3824,8 +3822,8 @@ partQ__align: # @partQ__align move $a5, $zero b .LBB4_207 .LBB4_204: # %vector.ph1347 - pcalau12i $a6, %pc_hi20(.LCPI4_5) - vld $vr0, $a6, %pc_lo12(.LCPI4_5) + pcalau12i $a6, %pc_hi20(.LCPI4_3) + vld $vr0, $a6, %pc_lo12(.LCPI4_3) bstrpick.d $a5, $a5, 31, 2 slli.d $a5, $a5, 2 addi.d $a6, $s7, 16 @@ -3879,8 +3877,8 @@ partQ__align: # @partQ__align .LBB4_212: # %vector.ph1363 bstrpick.d $a5, $a5, 31, 3 slli.d $a5, $a5, 3 - pcalau12i $a6, %pc_hi20(.LCPI4_6) - vld $vr0, $a6, %pc_lo12(.LCPI4_6) + pcalau12i $a6, %pc_hi20(.LCPI4_4) + vld $vr0, $a6, %pc_lo12(.LCPI4_4) addi.d $a6, $a3, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 @@ -3925,7 +3923,7 @@ partQ__align: # @partQ__align addi.d $a3, $a3, 8 bnez $a4, .LBB4_219 .LBB4_220: # %.preheader6.i - ld.d $a3, $sp, 512 + ld.d $a3, $sp, 528 blez $s8, .LBB4_223 # %bb.221: # %.lr.ph24.i move $a4, $s6 @@ -3946,7 +3944,7 @@ partQ__align: # @partQ__align move $a4, $zero pcalau12i $a5, %pc_hi20(impmtx) ld.d $a5, $a5, %pc_lo12(impmtx) - st.d $a5, $sp, 392 # 8-byte Folded Spill + st.d $a5, $sp, 400 # 8-byte Folded Spill bstrpick.d $a6, $ra, 31, 0 bstrpick.d $a7, $s8, 31, 0 addi.w $t0, $zero, -1 @@ -4000,8 +3998,8 @@ partQ__align: # @partQ__align .LBB4_232: # %._crit_edge29.i # in Loop: Header=BB4_233 Depth=2 addi.d $fp, $fp, -1 - ld.d $ra, $sp, 400 # 8-byte Folded Reload - ld.d $s2, $sp, 408 # 8-byte Folded Reload + ld.d $ra, $sp, 408 # 8-byte Folded Reload + ld.d $s2, $sp, 416 # 8-byte Folded Reload beqz $fp, .LBB4_237 .LBB4_233: # %.preheader3.i # Parent Loop BB4_226 Depth=1 @@ -4053,7 +4051,7 @@ partQ__align: # @partQ__align # in Loop: Header=BB4_226 Depth=1 add.d $a4, $s4, $a4 add.d $a4, $t8, $a4 - ld.d $s5, $sp, 136 # 8-byte Folded Reload + ld.d $s5, $sp, 144 # 8-byte Folded Reload .LBB4_238: # %._crit_edge32.i # in Loop: Header=BB4_226 Depth=1 add.w $t2, $t7, $t2 @@ -4121,16 +4119,16 @@ partQ__align: # @partQ__align beq $t5, $t7, .LBB4_248 # %bb.247: # in Loop: Header=BB4_226 Depth=1 slli.d $t7, $t4, 2 - ld.d $a5, $sp, 264 # 8-byte Folded Reload + ld.d $a5, $sp, 272 # 8-byte Folded Reload ldx.w $t7, $a5, $t7 - ld.d $a5, $sp, 256 # 8-byte Folded Reload + ld.d $a5, $sp, 264 # 8-byte Folded Reload add.w $t7, $t7, $a5 - ld.d $a5, $sp, 272 # 8-byte Folded Reload + ld.d $a5, $sp, 280 # 8-byte Folded Reload ldx.w $t6, $a5, $t6 slli.d $t7, $t7, 3 - ld.d $a5, $sp, 392 # 8-byte Folded Reload + ld.d $a5, $sp, 400 # 8-byte Folded Reload ldx.d $t7, $a5, $t7 - ld.d $a5, $sp, 360 # 8-byte Folded Reload + ld.d $a5, $sp, 368 # 8-byte Folded Reload add.w $t6, $t6, $a5 slli.d $t6, $t6, 2 fldx.s $fa0, $t7, $t6 @@ -4138,7 +4136,7 @@ partQ__align: # @partQ__align fadd.s $fa0, $fa0, $fa1 fst.s $fa0, $a3, 0 .LBB4_248: # in Loop: Header=BB4_226 Depth=1 - ld.d $s2, $sp, 408 # 8-byte Folded Reload + ld.d $s2, $sp, 416 # 8-byte Folded Reload blez $t4, .LBB4_329 # %bb.249: # in Loop: Header=BB4_226 Depth=1 blez $t5, .LBB4_329 @@ -4244,110 +4242,110 @@ partQ__align: # @partQ__align beq $a2, $s8, .LBB4_22 b .LBB4_20 .LBB4_267: # %vector.memcheck1190 - ld.d $t0, $sp, 392 # 8-byte Folded Reload - addi.d $a7, $t0, 4 - alsl.d $t1, $a4, $t0, 2 - addi.d $t0, $a1, 8 - sltu $t0, $a7, $t0 - sltu $t2, $a1, $t1 - and $t0, $t0, $t2 - bnez $t0, .LBB4_53 + ld.d $t1, $sp, 400 # 8-byte Folded Reload + addi.d $t0, $t1, 4 + alsl.d $t2, $a5, $t1, 2 + addi.d $t1, $a2, 8 + sltu $t1, $t0, $t1 + sltu $t3, $a2, $t2 + and $t1, $t1, $t3 + bnez $t1, .LBB4_53 # %bb.268: # %vector.memcheck1190 - addi.d $t0, $a2, 4 - sltu $t0, $a7, $t0 - sltu $t2, $a2, $t1 - and $t0, $t0, $t2 - bnez $t0, .LBB4_53 -# %bb.269: # %vector.memcheck1190 - addi.d $t0, $a3, 4 - alsl.d $t2, $a4, $a3, 2 - sltu $t2, $a7, $t2 + addi.d $t1, $a3, 4 sltu $t1, $t0, $t1 - and $t1, $t2, $t1 + sltu $t3, $a3, $t2 + and $t1, $t1, $t3 bnez $t1, .LBB4_53 +# %bb.269: # %vector.memcheck1190 + addi.d $t1, $a4, 4 + alsl.d $t3, $a5, $a4, 2 + sltu $t3, $t0, $t3 + sltu $t2, $t1, $t2 + and $t2, $t3, $t2 + bnez $t2, .LBB4_53 # %bb.270: # %vector.ph1210 - move $t1, $a6 - bstrins.d $t1, $zero, 1, 0 - fld.s $fa1, $a1, 0 - fld.s $fa2, $a2, 0 - ori $t2, $zero, 1 - move $a5, $a6 - vldrepl.w $vr0, $a1, 4 - bstrins.d $a5, $t2, 1, 0 + move $t2, $a7 + bstrins.d $t2, $zero, 1, 0 + fld.s $fa1, $a2, 0 + fld.s $fa2, $a3, 0 + ori $t3, $zero, 1 + move $a6, $a7 + vldrepl.w $vr0, $a2, 4 + bstrins.d $a6, $t3, 1, 0 fmul.s $fa1, $fa1, $fa2 vreplvei.w $vr1, $vr1, 0 - move $t2, $t1 + move $t3, $t2 .p2align 4, , 16 .LBB4_271: # %vector.body1213 # =>This Inner Loop Header: Depth=1 - vld $vr2, $a7, 0 - vld $vr3, $t0, 0 + vld $vr2, $t0, 0 + vld $vr3, $t1, 0 vfadd.s $vr2, $vr2, $vr1 vfmul.s $vr3, $vr0, $vr3 vfadd.s $vr2, $vr2, $vr3 - vst $vr2, $a7, 0 - addi.d $a7, $a7, 16 - addi.d $t2, $t2, -4 + vst $vr2, $t0, 0 addi.d $t0, $t0, 16 - bnez $t2, .LBB4_271 + addi.d $t3, $t3, -4 + addi.d $t1, $t1, 16 + bnez $t3, .LBB4_271 # %bb.272: # %middle.block1225 - bne $a6, $t1, .LBB4_53 + bne $a7, $t2, .LBB4_53 b .LBB4_55 .LBB4_273: # %vector.memcheck1228 - addi.d $a7, $t5, 4 - alsl.d $t1, $a4, $t5, 2 - addi.d $t0, $a1, 8 - sltu $t0, $a7, $t0 - sltu $t2, $a1, $t1 - and $t0, $t0, $t2 - bnez $t0, .LBB4_57 + addi.d $t0, $t6, 4 + alsl.d $t2, $a5, $t6, 2 + addi.d $t1, $a2, 8 + sltu $t1, $t0, $t1 + sltu $t3, $a2, $t2 + and $t1, $t1, $t3 + bnez $t1, .LBB4_57 # %bb.274: # %vector.memcheck1228 - addi.d $t0, $a2, 4 - sltu $t0, $a7, $t0 - sltu $t2, $a2, $t1 - and $t0, $t0, $t2 - bnez $t0, .LBB4_57 -# %bb.275: # %vector.memcheck1228 - addi.d $t0, $a3, 4 - alsl.d $t2, $a4, $a3, 2 - sltu $t2, $a7, $t2 + addi.d $t1, $a3, 4 sltu $t1, $t0, $t1 - and $t1, $t2, $t1 + sltu $t3, $a3, $t2 + and $t1, $t1, $t3 bnez $t1, .LBB4_57 +# %bb.275: # %vector.memcheck1228 + addi.d $t1, $a4, 4 + alsl.d $t3, $a5, $a4, 2 + sltu $t3, $t0, $t3 + sltu $t2, $t1, $t2 + and $t2, $t3, $t2 + bnez $t2, .LBB4_57 # %bb.276: # %vector.ph1248 - move $t1, $a6 - bstrins.d $t1, $zero, 1, 0 - fld.s $fa1, $a1, 0 - fld.s $fa2, $a2, 0 - ori $t2, $zero, 1 - move $a5, $a6 - vldrepl.w $vr0, $a1, 4 - bstrins.d $a5, $t2, 1, 0 + move $t2, $a7 + bstrins.d $t2, $zero, 1, 0 + fld.s $fa1, $a2, 0 + fld.s $fa2, $a3, 0 + ori $t3, $zero, 1 + move $a6, $a7 + vldrepl.w $vr0, $a2, 4 + bstrins.d $a6, $t3, 1, 0 fmul.s $fa1, $fa1, $fa2 vreplvei.w $vr1, $vr1, 0 - move $t2, $t1 + move $t3, $t2 .p2align 4, , 16 .LBB4_277: # %vector.body1251 # =>This Inner Loop Header: Depth=1 - vld $vr2, $a7, 0 - vld $vr3, $t0, 0 + vld $vr2, $t0, 0 + vld $vr3, $t1, 0 vfadd.s $vr2, $vr2, $vr1 vfmul.s $vr3, $vr0, $vr3 vfadd.s $vr2, $vr2, $vr3 - vst $vr2, $a7, 0 - addi.d $a7, $a7, 16 - addi.d $t2, $t2, -4 + vst $vr2, $t0, 0 addi.d $t0, $t0, 16 - bnez $t2, .LBB4_277 + addi.d $t3, $t3, -4 + addi.d $t1, $t1, 16 + bnez $t3, .LBB4_277 # %bb.278: # %middle.block1263 - bne $a6, $t1, .LBB4_57 + bne $a7, $t2, .LBB4_57 b .LBB4_59 .LBB4_279: movgr2fr.w $fa0, $zero b .LBB4_152 .LBB4_280: # %vector.ph1318 - pcalau12i $a5, %pc_hi20(.LCPI4_5) - vld $vr0, $a5, %pc_lo12(.LCPI4_5) + pcalau12i $a5, %pc_hi20(.LCPI4_3) + vld $vr0, $a5, %pc_lo12(.LCPI4_3) bstrpick.d $a4, $a4, 31, 2 slli.d $a4, $a4, 2 addi.d $a5, $s7, 16 @@ -4401,8 +4399,8 @@ partQ__align: # @partQ__align .LBB4_288: # %vector.ph1333 bstrpick.d $a4, $a4, 31, 3 slli.d $a4, $a4, 3 - pcalau12i $a5, %pc_hi20(.LCPI4_6) - vld $vr0, $a5, %pc_lo12(.LCPI4_6) + pcalau12i $a5, %pc_hi20(.LCPI4_4) + vld $vr0, $a5, %pc_lo12(.LCPI4_4) addi.d $a5, $a2, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 @@ -4677,16 +4675,16 @@ partQ__align: # @partQ__align jirl $ra, $ra, 0 addi.w $a3, $a0, 0 lu12i.w $a4, 1220 - ld.d $a2, $sp, 32 # 8-byte Folded Reload - blt $a2, $a3, .LBB4_338 + ld.d $a2, $sp, 40 # 8-byte Folded Reload + blt $a2, $a3, .LBB4_339 # %bb.330: # %Atracking_localhom.exit ori $a0, $a4, 2881 - bge $a3, $a0, .LBB4_338 + bge $a3, $a0, .LBB4_339 # %bb.331: - ld.d $a2, $sp, 400 # 8-byte Folded Reload + ld.d $a2, $sp, 408 # 8-byte Folded Reload blez $a2, .LBB4_334 .LBB4_332: # %.lr.ph721 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(partQ__align.mseq1) .p2align 4, , 16 .LBB4_333: # =>This Inner Loop Header: Depth=1 @@ -4702,7 +4700,7 @@ partQ__align: # @partQ__align .LBB4_334: # %.preheader blez $s8, .LBB4_337 # %bb.335: # %.lr.ph723 - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $fp, $a0, %pc_lo12(partQ__align.mseq2) .p2align 4, , 16 .LBB4_336: # =>This Inner Loop Header: Depth=1 @@ -4716,21 +4714,25 @@ partQ__align: # @partQ__align bnez $s8, .LBB4_336 .LBB4_337: # %._crit_edge724 fmov.s $fa0, $fs0 - fld.d $fs0, $sp, 416 # 8-byte Folded Reload - ld.d $s8, $sp, 424 # 8-byte Folded Reload - ld.d $s7, $sp, 432 # 8-byte Folded Reload - ld.d $s6, $sp, 440 # 8-byte Folded Reload - ld.d $s5, $sp, 448 # 8-byte Folded Reload - ld.d $s4, $sp, 456 # 8-byte Folded Reload - ld.d $s3, $sp, 464 # 8-byte Folded Reload - ld.d $s2, $sp, 472 # 8-byte Folded Reload - ld.d $s1, $sp, 480 # 8-byte Folded Reload - ld.d $s0, $sp, 488 # 8-byte Folded Reload - ld.d $fp, $sp, 496 # 8-byte Folded Reload - ld.d $ra, $sp, 504 # 8-byte Folded Reload - addi.d $sp, $sp, 512 + fld.d $fs1, $sp, 424 # 8-byte Folded Reload + fld.d $fs0, $sp, 432 # 8-byte Folded Reload + ld.d $s8, $sp, 440 # 8-byte Folded Reload + ld.d $s7, $sp, 448 # 8-byte Folded Reload + ld.d $s6, $sp, 456 # 8-byte Folded Reload + ld.d $s5, $sp, 464 # 8-byte Folded Reload + ld.d $s4, $sp, 472 # 8-byte Folded Reload + ld.d $s3, $sp, 480 # 8-byte Folded Reload + ld.d $s2, $sp, 488 # 8-byte Folded Reload + ld.d $s1, $sp, 496 # 8-byte Folded Reload + ld.d $s0, $sp, 504 # 8-byte Folded Reload + ld.d $fp, $sp, 512 # 8-byte Folded Reload + ld.d $ra, $sp, 520 # 8-byte Folded Reload + addi.d $sp, $sp, 528 ret .LBB4_338: + move $s3, $s4 + b .LBB4_40 +.LBB4_339: pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 @@ -4743,16 +4745,28 @@ partQ__align: # @partQ__align addi.d $a0, $a0, %pc_lo12(.L.str.7) pcaddu18i $ra, %call36(ErrorExit) jirl $ra, $ra, 0 - ld.d $a2, $sp, 400 # 8-byte Folded Reload + ld.d $a2, $sp, 408 # 8-byte Folded Reload bgtz $a2, .LBB4_332 b .LBB4_334 -.LBB4_339: - ld.d $s2, $sp, 408 # 8-byte Folded Reload .LBB4_340: - ld.d $s6, $sp, 144 # 8-byte Folded Reload + move $s3, $s4 b .LBB4_35 .LBB4_341: - ld.d $s2, $sp, 408 # 8-byte Folded Reload + ld.d $s2, $sp, 416 # 8-byte Folded Reload +.LBB4_342: + move $s3, $s4 + ld.d $s6, $sp, 152 # 8-byte Folded Reload + b .LBB4_35 +.LBB4_343: + ld.d $s2, $sp, 416 # 8-byte Folded Reload + move $s3, $s4 + b .LBB4_40 +.LBB4_344: + ld.d $s2, $sp, 416 # 8-byte Folded Reload + ld.d $s6, $sp, 152 # 8-byte Folded Reload + b .LBB4_35 +.LBB4_345: + ld.d $s2, $sp, 416 # 8-byte Folded Reload b .LBB4_40 .Lfunc_end4: .size partQ__align, .Lfunc_end4-partQ__align diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partSalignmm.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partSalignmm.s index c87245d6..ec0c674f 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partSalignmm.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/partSalignmm.s @@ -963,27 +963,23 @@ part_imp_match_init: # @part_imp_match_init .Lfunc_end3: .size part_imp_match_init, .Lfunc_end3-part_imp_match_init # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function partA__align -.LCPI4_0: - .dword 0x3ff4cccccccccccd # double 1.3 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI4_1: + .p2align 4, 0x0 # -- Begin function partA__align +.LCPI4_0: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI4_2: +.LCPI4_1: .dword 3 # 0x3 .dword 4 # 0x4 -.LCPI4_3: +.LCPI4_2: .dword 1 # 0x1 .dword 2 # 0x2 -.LCPI4_4: +.LCPI4_3: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI4_5: +.LCPI4_4: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -1171,21 +1167,24 @@ partA__align: # @partA__align ld.d $a0, $sp, 176 # 8-byte Folded Reload ld.w $s7, $a0, %pc_lo12(partA__align.orlgth2) .LBB4_7: - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI4_0) - movgr2fr.w $fa1, $s0 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a0, $fa1 + movgr2fr.w $fa0, $s0 + ffint.d.w $fa0, $fa0 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 + lu32i.d $a0, 314572 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a0, $fa0 slt $a1, $a0, $s4 masknez $a0, $a0, $a1 maskeqz $a1, $s4, $a1 or $s4, $a1, $a0 addi.w $s0, $s4, 100 - movgr2fr.w $fa1, $s3 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $s3 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a0, $fa0 slt $a1, $a0, $s7 @@ -1824,8 +1823,8 @@ partA__align: # @partA__align move $a3, $a2 bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI4_1) - vld $vr1, $a5, %pc_lo12(.LCPI4_1) + pcalau12i $a5, %pc_hi20(.LCPI4_0) + vld $vr1, $a5, %pc_lo12(.LCPI4_0) addi.d $a5, $s6, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr2, $a6 @@ -1920,8 +1919,8 @@ partA__align: # @partA__align move $a3, $a2 bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 - pcalau12i $a5, %pc_hi20(.LCPI4_1) - vld $vr1, $a5, %pc_lo12(.LCPI4_1) + pcalau12i $a5, %pc_hi20(.LCPI4_0) + vld $vr1, $a5, %pc_lo12(.LCPI4_0) ld.d $a5, $sp, 272 # 8-byte Folded Reload addi.d $a5, $a5, 4 lu52i.d $a6, $zero, -1026 @@ -2331,10 +2330,10 @@ partA__align: # @partA__align bstrins.d $a3, $a5, 1, 0 vreplgr2vr.w $vr0, $a0 vreplgr2vr.d $vr1, $t3 + pcalau12i $a5, %pc_hi20(.LCPI4_1) + vld $vr2, $a5, %pc_lo12(.LCPI4_1) pcalau12i $a5, %pc_hi20(.LCPI4_2) - vld $vr2, $a5, %pc_lo12(.LCPI4_2) - pcalau12i $a5, %pc_hi20(.LCPI4_3) - vld $vr3, $a5, %pc_lo12(.LCPI4_3) + vld $vr3, $a5, %pc_lo12(.LCPI4_2) addi.d $a5, $s6, 4 lu52i.d $a6, $zero, -1026 vreplgr2vr.d $vr4, $a6 @@ -2621,8 +2620,8 @@ partA__align: # @partA__align move $a5, $zero b .LBB4_143 .LBB4_140: # %vector.ph873 - pcalau12i $a6, %pc_hi20(.LCPI4_4) - vld $vr0, $a6, %pc_lo12(.LCPI4_4) + pcalau12i $a6, %pc_hi20(.LCPI4_3) + vld $vr0, $a6, %pc_lo12(.LCPI4_3) bstrpick.d $a5, $a5, 31, 2 slli.d $a5, $a5, 2 addi.d $a6, $ra, 16 @@ -2676,8 +2675,8 @@ partA__align: # @partA__align .LBB4_148: # %vector.ph889 bstrpick.d $a5, $a5, 31, 3 slli.d $a5, $a5, 3 - pcalau12i $a6, %pc_hi20(.LCPI4_5) - vld $vr0, $a6, %pc_lo12(.LCPI4_5) + pcalau12i $a6, %pc_hi20(.LCPI4_4) + vld $vr0, $a6, %pc_lo12(.LCPI4_4) addi.d $a6, $a3, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 @@ -3292,8 +3291,8 @@ partA__align: # @partA__align bne $a4, $a5, .LBB4_60 b .LBB4_77 .LBB4_219: # %vector.ph844 - pcalau12i $a5, %pc_hi20(.LCPI4_4) - vld $vr0, $a5, %pc_lo12(.LCPI4_4) + pcalau12i $a5, %pc_hi20(.LCPI4_3) + vld $vr0, $a5, %pc_lo12(.LCPI4_3) bstrpick.d $a4, $a4, 31, 2 slli.d $a4, $a4, 2 addi.d $a5, $s5, 16 @@ -3347,8 +3346,8 @@ partA__align: # @partA__align .LBB4_227: # %vector.ph859 bstrpick.d $a4, $a4, 31, 3 slli.d $a4, $a4, 3 - pcalau12i $a5, %pc_hi20(.LCPI4_5) - vld $vr0, $a5, %pc_lo12(.LCPI4_5) + pcalau12i $a5, %pc_hi20(.LCPI4_4) + vld $vr0, $a5, %pc_lo12(.LCPI4_4) addi.d $a5, $a2, 16 vrepli.b $vr1, -1 vrepli.w $vr2, -5 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/rna.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/rna.s index 4afa7f98..59672478 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/rna.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/rna.s @@ -132,12 +132,7 @@ rnaalifoldcall: # @rnaalifoldcall .Lfunc_end0: .size rnaalifoldcall, .Lfunc_end0-rnaalifoldcall # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function foldrna -.LCPI1_0: - .word 0x44160000 # float 600 - .text - .globl foldrna + .globl foldrna # -- Begin function foldrna .p2align 5 .type foldrna,@function foldrna: # @foldrna @@ -1495,9 +1490,9 @@ foldrna: # @foldrna movgr2fr.w $fa0, $zero pcalau12i $a7, %got_pc_hi20(consweight_rna) ld.d $a7, $a7, %got_pc_lo12(consweight_rna) - pcalau12i $t0, %pc_hi20(.LCPI1_0) - fld.s $fa1, $t0, %pc_lo12(.LCPI1_0) move $t0, $zero + lu12i.w $t1, 278880 + movgr2fr.w $fa1, $t1 ld.d $fp, $sp, 160 # 8-byte Folded Reload b .LBB1_150 .p2align 4, , 16 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/suboptalign11.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/suboptalign11.s index 638391a1..c99c1580 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/suboptalign11.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/suboptalign11.s @@ -1,20 +1,6 @@ .file "suboptalign11.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function suboptalign11 -.LCPI0_0: - .dword 0x3ff4cccccccccccd # double 1.3 -.LCPI0_1: - .dword 0xc08f3f3340000000 # double -999.9000244140625 -.LCPI0_4: - .dword 0x3fd3333333333333 # double 0.29999999999999999 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI0_2: - .word 0xc479f99a # float -999.900024 -.LCPI0_3: - .word 0xc97423fe # float -999999.875 .section .text.unlikely.,"ax",@progbits - .globl suboptalign11 + .globl suboptalign11 # -- Begin function suboptalign11 .p2align 5 .type suboptalign11,@function suboptalign11: # @suboptalign11 @@ -120,7 +106,7 @@ suboptalign11: # @suboptalign11 jirl $ra, $ra, 0 pcalau12i $s2, %pc_hi20(suboptalign11.orlgth1) ld.w $a0, $s2, %pc_lo12(suboptalign11.orlgth1) - pcalau12i $s7, %pc_hi20(suboptalign11.orlgth2) + pcalau12i $s6, %pc_hi20(suboptalign11.orlgth2) addi.w $s8, $s1, 0 pcalau12i $a1, %pc_hi20(suboptalign11.w1) st.d $a1, $sp, 240 # 8-byte Folded Spill @@ -128,7 +114,7 @@ suboptalign11: # @suboptalign11 st.d $a1, $sp, 232 # 8-byte Folded Spill pcalau12i $a1, %pc_hi20(suboptalign11.initverticalw) st.d $a1, $sp, 224 # 8-byte Folded Spill - pcalau12i $s4, %pc_hi20(suboptalign11.lastverticalw) + pcalau12i $s7, %pc_hi20(suboptalign11.lastverticalw) pcalau12i $a1, %pc_hi20(suboptalign11.m) st.d $a1, $sp, 256 # 8-byte Folded Spill pcalau12i $a1, %pc_hi20(suboptalign11.mp) @@ -147,7 +133,7 @@ suboptalign11: # @suboptalign11 st.d $s5, $sp, 72 # 8-byte Folded Spill blt $a0, $s5, .LBB0_6 # %bb.5: # %middle.block - ld.w $a0, $s7, %pc_lo12(suboptalign11.orlgth2) + ld.w $a0, $s6, %pc_lo12(suboptalign11.orlgth2) bge $a0, $s8, .LBB0_10 .LBB0_6: ld.d $a3, $fp, 0 @@ -158,11 +144,11 @@ suboptalign11: # @suboptalign11 pcaddu18i $ra, %call36(fwrite) jirl $ra, $ra, 0 ld.w $a1, $s2, %pc_lo12(suboptalign11.orlgth1) - ld.w $a0, $s7, %pc_lo12(suboptalign11.orlgth2) + ld.w $a0, $s6, %pc_lo12(suboptalign11.orlgth2) pcalau12i $a2, %pc_hi20(suboptalign11.cpmx1) st.d $a2, $sp, 216 # 8-byte Folded Spill pcalau12i $s8, %pc_hi20(suboptalign11.cpmx2) - pcalau12i $s6, %pc_hi20(suboptalign11.floatwork) + pcalau12i $s4, %pc_hi20(suboptalign11.floatwork) pcalau12i $s3, %pc_hi20(suboptalign11.intwork) blez $a1, .LBB0_9 # %bb.7: @@ -187,7 +173,7 @@ suboptalign11: # @suboptalign11 ld.d $a0, $a0, %pc_lo12(suboptalign11.initverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 - ld.d $a0, $s4, %pc_lo12(suboptalign11.lastverticalw) + ld.d $a0, $s7, %pc_lo12(suboptalign11.lastverticalw) pcaddu18i $ra, %call36(FreeFloatVec) jirl $ra, $ra, 0 ld.d $a3, $fp, 0 @@ -234,39 +220,40 @@ suboptalign11: # @suboptalign11 ori $a2, $zero, 1 pcaddu18i $ra, %call36(fwrite) jirl $ra, $ra, 0 - ld.d $a0, $s6, %pc_lo12(suboptalign11.floatwork) + ld.d $a0, $s4, %pc_lo12(suboptalign11.floatwork) pcaddu18i $ra, %call36(FreeFloatMtx) jirl $ra, $ra, 0 ld.d $a0, $s3, %pc_lo12(suboptalign11.intwork) pcaddu18i $ra, %call36(FreeIntMtx) jirl $ra, $ra, 0 ld.w $a1, $s2, %pc_lo12(suboptalign11.orlgth1) - ld.w $a0, $s7, %pc_lo12(suboptalign11.orlgth2) + ld.w $a0, $s6, %pc_lo12(suboptalign11.orlgth2) .LBB0_9: - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI0_0) - movgr2fr.w $fa1, $s0 - ffint.d.w $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - ftintrz.w.d $fa1, $fa1 - movfr2gr.s $a2, $fa1 + movgr2fr.w $fa0, $s0 + ffint.d.w $fa0, $fa0 + lu12i.w $a2, -209716 + ori $a2, $a2, 3277 + lu32i.d $a2, 314572 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa1, $a2 + fmul.d $fa0, $fa0, $fa1 + ftintrz.w.d $fa0, $fa0 + movfr2gr.s $a2, $fa0 slt $a3, $a2, $a1 masknez $a2, $a2, $a3 maskeqz $a1, $a1, $a3 - st.d $s6, $sp, 176 # 8-byte Folded Spill st.d $s3, $sp, 184 # 8-byte Folded Spill or $s3, $a1, $a2 addi.w $fp, $s3, 100 - movgr2fr.w $fa1, $s1 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $s1 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 slt $a2, $a1, $a0 masknez $a1, $a1, $a2 maskeqz $a0, $a0, $a2 - move $s6, $s8 - move $s8, $s4 + st.d $s4, $sp, 176 # 8-byte Folded Spill or $s4, $a0, $a1 addi.w $s5, $s4, 100 st.d $s0, $sp, 192 # 8-byte Folded Spill @@ -290,7 +277,7 @@ suboptalign11: # @suboptalign11 move $a0, $s1 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 - st.d $a0, $s8, %pc_lo12(suboptalign11.lastverticalw) + st.d $a0, $s7, %pc_lo12(suboptalign11.lastverticalw) move $a0, $s0 pcaddu18i $ra, %call36(AllocateFloatVec) jirl $ra, $ra, 0 @@ -321,7 +308,7 @@ suboptalign11: # @suboptalign11 move $a1, $s0 pcaddu18i $ra, %call36(AllocateFloatMtx) jirl $ra, $ra, 0 - st.d $a0, $s6, %pc_lo12(suboptalign11.cpmx2) + st.d $a0, $s8, %pc_lo12(suboptalign11.cpmx2) slt $a0, $s5, $fp masknez $a1, $s5, $a0 maskeqz $a0, $fp, $a0 @@ -359,8 +346,7 @@ suboptalign11: # @suboptalign11 st.d $a0, $a1, %pc_lo12(suboptalign11.mseq2) st.w $s3, $s2, %pc_lo12(suboptalign11.orlgth1) ld.d $fp, $sp, 136 # 8-byte Folded Reload - st.w $s4, $s7, %pc_lo12(suboptalign11.orlgth2) - move $s4, $s8 + st.w $s4, $s6, %pc_lo12(suboptalign11.orlgth2) ld.d $s5, $sp, 72 # 8-byte Folded Reload ld.d $s8, $sp, 280 # 8-byte Folded Reload .LBB0_10: @@ -389,7 +375,7 @@ suboptalign11: # @suboptalign11 st.d $a3, $sp, 120 # 8-byte Folded Spill blt $a2, $a0, .LBB0_13 # %bb.11: - ld.w $a3, $s7, %pc_lo12(suboptalign11.orlgth2) + ld.w $a3, $s6, %pc_lo12(suboptalign11.orlgth2) blt $a1, $a3, .LBB0_13 # %bb.12: # %._crit_edge539 pcalau12i $a0, %got_pc_hi20(commonJP) @@ -397,8 +383,8 @@ suboptalign11: # @suboptalign11 ld.d $a0, $a0, 0 b .LBB0_17 .LBB0_13: - st.d $s4, $sp, 216 # 8-byte Folded Spill - move $s6, $s0 + st.d $s7, $sp, 216 # 8-byte Folded Spill + move $s7, $s0 beqz $a2, .LBB0_16 # %bb.14: beqz $a1, .LBB0_16 @@ -422,7 +408,7 @@ suboptalign11: # @suboptalign11 ld.w $a1, $s3, 0 .LBB0_16: slt $a3, $a2, $a0 - ld.w $a4, $s7, %pc_lo12(suboptalign11.orlgth2) + ld.w $a4, $s6, %pc_lo12(suboptalign11.orlgth2) masknez $a2, $a2, $a3 maskeqz $a0, $a0, $a3 or $s2, $a0, $a2 @@ -455,8 +441,8 @@ suboptalign11: # @suboptalign11 st.w $s2, $fp, 0 st.w $s4, $s3, 0 ld.d $s1, $sp, 64 # 8-byte Folded Reload - move $s0, $s6 - ld.d $s4, $sp, 216 # 8-byte Folded Reload + move $s0, $s7 + ld.d $s7, $sp, 216 # 8-byte Folded Reload .LBB0_17: pcalau12i $a1, %got_pc_hi20(commonIP) ld.d $a1, $a1, %got_pc_lo12(commonIP) @@ -476,29 +462,29 @@ suboptalign11: # @suboptalign11 pcaddu18i $ra, %call36(fwrite) jirl $ra, $ra, 0 ld.d $a0, $sp, 240 # 8-byte Folded Reload - ld.d $a6, $a0, %pc_lo12(suboptalign11.w1) + ld.d $s2, $a0, %pc_lo12(suboptalign11.w1) ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $ra, $a0, %pc_lo12(suboptalign11.w2) ld.d $a0, $sp, 224 # 8-byte Folded Reload - ld.d $s2, $a0, %pc_lo12(suboptalign11.initverticalw) - ld.d $t0, $sp, 208 # 8-byte Folded Reload - ld.d $t1, $sp, 200 # 8-byte Folded Reload + ld.d $s4, $a0, %pc_lo12(suboptalign11.initverticalw) + ld.d $a7, $sp, 208 # 8-byte Folded Reload + ld.d $t0, $sp, 200 # 8-byte Folded Reload blez $s5, .LBB0_20 # %bb.18: # %.lr.ph.i - ld.d $a0, $t0, 0 - ld.d $a1, $t1, 0 + ld.d $a0, $a7, 0 + ld.d $a1, $t0, 0 bstrpick.d $a2, $s0, 30, 0 pcalau12i $a3, %got_pc_hi20(amino_dis) ld.d $a3, $a3, %got_pc_lo12(amino_dis) - move $a4, $s2 + move $a4, $s4 .p2align 4, , 16 .LBB0_19: # =>This Inner Loop Header: Depth=1 ld.b $a5, $a0, 0 - ld.b $a7, $a1, 0 + ld.b $a6, $a1, 0 slli.d $a5, $a5, 9 add.d $a5, $a3, $a5 - slli.d $a7, $a7, 2 - ldx.w $a5, $a5, $a7 + slli.d $a6, $a6, 2 + ldx.w $a5, $a5, $a6 movgr2fr.w $fa0, $a5 ffint.s.w $fa0, $fa0 fst.s $fa0, $a4, 0 @@ -507,24 +493,24 @@ suboptalign11: # @suboptalign11 addi.d $a1, $a1, 1 bnez $a2, .LBB0_19 .LBB0_20: # %match_calc.exit - addi.d $t8, $s1, 1 - ld.d $t7, $sp, 128 # 8-byte Folded Reload + addi.d $t7, $s1, 1 + ld.d $t6, $sp, 128 # 8-byte Folded Reload blez $s8, .LBB0_26 # %bb.21: # %.lr.ph.i362 - ld.d $a0, $t1, 0 - ld.d $a1, $t0, 0 + ld.d $a0, $t0, 0 + ld.d $a1, $a7, 0 bstrpick.d $a2, $s1, 30, 0 pcalau12i $a3, %got_pc_hi20(amino_dis) ld.d $a3, $a3, %got_pc_lo12(amino_dis) - move $a4, $a6 + move $a4, $s2 .p2align 4, , 16 .LBB0_22: # =>This Inner Loop Header: Depth=1 ld.b $a5, $a0, 0 - ld.b $a7, $a1, 0 + ld.b $a6, $a1, 0 slli.d $a5, $a5, 9 add.d $a5, $a3, $a5 - slli.d $a7, $a7, 2 - ldx.w $a5, $a5, $a7 + slli.d $a6, $a6, 2 + ldx.w $a5, $a5, $a6 movgr2fr.w $fa0, $a5 ffint.s.w $fa0, $fa0 fst.s $fa0, $a4, 0 @@ -541,14 +527,14 @@ suboptalign11: # @suboptalign11 ld.d $a2, $a2, %pc_lo12(suboptalign11.largeM) ld.d $a3, $sp, 264 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(suboptalign11.Mp) - bstrpick.d $a4, $t8, 31, 0 - addi.d $a7, $a4, -1 - ori $t0, $zero, 16 + bstrpick.d $a4, $t7, 31, 0 + addi.d $a6, $a4, -1 + ori $a7, $zero, 16 ori $a5, $zero, 1 - bgeu $a7, $t0, .LBB0_79 + bgeu $a6, $a7, .LBB0_79 .LBB0_24: # %scalar.ph623.preheader - alsl.d $a7, $a5, $a6, 2 - addi.d $a7, $a7, -4 + alsl.d $a6, $a5, $s2, 2 + addi.d $a6, $a6, -4 alsl.d $a0, $a5, $a0, 2 alsl.d $a1, $a5, $a1, 2 alsl.d $a2, $a5, $a2, 2 @@ -557,12 +543,12 @@ suboptalign11: # @suboptalign11 .p2align 4, , 16 .LBB0_25: # %scalar.ph623 # =>This Inner Loop Header: Depth=1 - fld.s $fa0, $a7, 0 + fld.s $fa0, $a6, 0 fst.s $fa0, $a0, 0 st.w $zero, $a1, 0 fst.s $fa0, $a2, 0 st.w $zero, $a3, 0 - addi.d $a7, $a7, 4 + addi.d $a6, $a6, 4 addi.d $a0, $a0, 4 addi.d $a1, $a1, 4 addi.d $a2, $a2, 4 @@ -575,8 +561,8 @@ suboptalign11: # @suboptalign11 lu32i.d $a1, -1 add.d $a0, $a0, $a1 srai.d $a1, $a0, 30 - fldx.s $fa0, $a6, $a1 - ld.d $a1, $s4, %pc_lo12(suboptalign11.lastverticalw) + fldx.s $fa0, $s2, $a1 + ld.d $a1, $s7, %pc_lo12(suboptalign11.lastverticalw) st.d $a1, $sp, 232 # 8-byte Folded Spill fst.s $fa0, $a1, 0 addi.w $a2, $s0, 1 @@ -587,15 +573,17 @@ suboptalign11: # @suboptalign11 st.d $a2, $sp, 40 # 8-byte Folded Spill bstrpick.d $a1, $a2, 31, 0 st.d $a1, $sp, 240 # 8-byte Folded Spill - st.d $t8, $sp, 48 # 8-byte Folded Spill + st.d $t7, $sp, 48 # 8-byte Folded Spill bgtz $s5, .LBB0_28 # %bb.27: - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_1) move $s5, $zero move $s6, $zero move $fp, $zero movgr2fr.w $fs0, $zero + lu12i.w $a0, 262144 + lu32i.d $a0, -49357 + lu52i.d $a0, $a0, -1016 + movgr2fr.d $fs1, $a0 b .LBB0_58 .LBB0_28: # %.lr.ph446 move $a1, $s1 @@ -615,7 +603,7 @@ suboptalign11: # @suboptalign11 ld.d $a1, $sp, 256 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(suboptalign11.m) st.d $a1, $sp, 168 # 8-byte Folded Spill - ld.d $t1, $t7, %pc_lo12(suboptalign11.shuryo) + ld.d $t1, $t6, %pc_lo12(suboptalign11.shuryo) ld.d $a1, $sp, 248 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(suboptalign11.largeM) st.d $a1, $sp, 160 # 8-byte Folded Spill @@ -626,36 +614,42 @@ suboptalign11: # @suboptalign11 ld.d $a1, $a1, %pc_lo12(suboptalign11.Mp) st.d $a1, $sp, 144 # 8-byte Folded Spill addi.d $t5, $t1, 8 - movgr2fr.w $fa1, $zero + movgr2fr.w $fa2, $zero ori $t6, $zero, 1 - pcalau12i $a1, %pc_hi20(.LCPI0_2) - fld.s $fa0, $a1, %pc_lo12(.LCPI0_2) + lu12i.w $a1, -243809 + ori $a1, $a1, 2458 + lu32i.d $a1, 0 + movgr2fr.w $fa0, $a1 slli.d $a0, $a0, 2 st.d $a0, $sp, 216 # 8-byte Folded Spill + lu12i.w $a0, -223422 + ori $a0, $a0, 1022 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 ori $t8, $zero, 99 lu12i.w $a0, -1 ori $a0, $a0, 896 st.d $a0, $sp, 272 # 8-byte Folded Spill ori $s7, $zero, 3200 - fmov.s $fs0, $fa1 - st.d $s2, $sp, 224 # 8-byte Folded Spill + fmov.s $fs0, $fa2 + st.d $s4, $sp, 224 # 8-byte Folded Spill b .LBB0_31 .p2align 4, , 16 .LBB0_29: # %._crit_edge422.critedge # in Loop: Header=BB0_31 Depth=1 - fldx.s $fa2, $s2, $t0 - fst.s $fa2, $ra, 0 + fldx.s $fa3, $s4, $t0 + fst.s $fa3, $ra, 0 .LBB0_30: # %._crit_edge422 # in Loop: Header=BB0_31 Depth=1 - ld.d $a6, $sp, 264 # 8-byte Folded Reload + ld.d $s2, $sp, 264 # 8-byte Folded Reload ld.d $a0, $sp, 216 # 8-byte Folded Reload - fldx.s $fa2, $a6, $a0 + fldx.s $fa3, $s2, $a0 addi.d $t6, $t6, 1 ld.d $a0, $sp, 232 # 8-byte Folded Reload ld.d $a1, $sp, 248 # 8-byte Folded Reload - fstx.s $fa2, $a0, $a1 + fstx.s $fa3, $a0, $a1 ld.d $ra, $sp, 256 # 8-byte Folded Reload - ld.d $s2, $sp, 224 # 8-byte Folded Reload + ld.d $s4, $sp, 224 # 8-byte Folded Reload ld.d $a0, $sp, 240 # 8-byte Folded Reload beq $t6, $a0, .LBB0_57 .LBB0_31: # =>This Loop Header: Depth=1 @@ -664,16 +658,15 @@ suboptalign11: # @suboptalign11 # Child Loop BB0_50 Depth 3 addi.d $s3, $t6, -1 slli.d $a0, $s3, 2 - fldx.s $fa2, $s2, $a0 - fst.s $fa2, $a6, 0 + fldx.s $fa3, $s4, $a0 + fst.s $fa3, $s2, 0 slli.d $t0, $t6, 2 st.d $ra, $sp, 264 # 8-byte Folded Spill - st.d $a6, $sp, 256 # 8-byte Folded Spill + st.d $s2, $sp, 256 # 8-byte Folded Spill st.d $t0, $sp, 248 # 8-byte Folded Spill blez $s8, .LBB0_29 # %bb.32: # %.lr.ph.i368 # in Loop: Header=BB0_31 Depth=1 - move $s0, $a6 ld.d $a0, $sp, 200 # 8-byte Folded Reload ld.d $a0, $a0, 0 ld.d $a1, $sp, 208 # 8-byte Folded Reload @@ -691,25 +684,23 @@ suboptalign11: # @suboptalign11 add.d $a4, $a6, $a4 slli.d $a6, $a7, 2 ldx.w $a4, $a4, $a6 - movgr2fr.w $fa2, $a4 - ffint.s.w $fa2, $fa2 - fst.s $fa2, $a2, 0 + movgr2fr.w $fa3, $a4 + ffint.s.w $fa3, $fa3 + fst.s $fa3, $a2, 0 addi.d $a5, $a5, -1 addi.d $a2, $a2, 4 addi.d $a1, $a1, 1 bnez $a5, .LBB0_33 # %bb.34: # %match_calc.exit373 # in Loop: Header=BB0_31 Depth=1 - fldx.s $fa2, $s2, $t0 - fst.s $fa2, $ra, 0 - fld.s $fa2, $s0, 0 + fldx.s $fa3, $s4, $t0 + fst.s $fa3, $ra, 0 + fld.s $fa3, $s2, 0 slli.d $a0, $t6, 3 ld.d $a1, $sp, 176 # 8-byte Folded Reload ldx.d $s4, $a1, $a0 ld.d $a1, $sp, 184 # 8-byte Folded Reload ldx.d $a6, $a1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.s $fa3, $a0, %pc_lo12(.LCPI0_3) move $a0, $zero move $a7, $zero move $t0, $zero @@ -720,37 +711,38 @@ suboptalign11: # @suboptalign11 ld.d $a2, $sp, 152 # 8-byte Folded Reload ld.d $t7, $sp, 144 # 8-byte Folded Reload move $a1, $ra - move $ra, $s0 - fmov.s $fa4, $fa2 + fmov.s $fa5, $fa1 + move $ra, $s2 + fmov.s $fa4, $fa3 b .LBB0_36 .p2align 4, , 16 .LBB0_35: # in Loop: Header=BB0_36 Depth=2 addi.d $a1, $a1, 4 - fcmp.clt.s $fcc0, $fa2, $fa6 - fsel $fa2, $fa2, $fa6, $fcc0 + fcmp.clt.s $fcc0, $fa3, $fa7 + fsel $fa3, $fa3, $fa7, $fcc0 movcf2gr $a4, $fcc0 masknez $t0, $t0, $a4 maskeqz $a4, $a5, $a4 or $t0, $a4, $t0 - fadd.s $fa2, $fa2, $fs3 - fcmp.clt.s $fcc0, $fa4, $fa7 - fsel $fa4, $fa4, $fa7, $fcc0 + fadd.s $fa3, $fa3, $fs3 + fcmp.clt.s $fcc0, $fa4, $ft0 + fsel $fa4, $fa4, $ft0, $fcc0 movcf2gr $a4, $fcc0 masknez $t3, $t3, $a4 maskeqz $a4, $a5, $a4 or $t3, $a4, $t3 - fcmp.clt.s $fcc0, $fa0, $fa5 - fsel $fa0, $fa0, $fa5, $fcc0 + fcmp.clt.s $fcc0, $fa0, $fa6 + fsel $fa0, $fa0, $fa6, $fcc0 movcf2gr $a4, $fcc0 masknez $a5, $s5, $a4 maskeqz $s0, $t6, $a4 or $s5, $s0, $a5 - fld.s $fa5, $a1, 0 + fld.s $fa6, $a1, 0 masknez $a5, $s6, $a4 maskeqz $a4, $s2, $a4 or $s6, $a4, $a5 - fadd.s $fa5, $fs0, $fa5 - fst.s $fa5, $a1, 0 + fadd.s $fa6, $fs0, $fa6 + fst.s $fa6, $a1, 0 addi.d $ra, $ra, 4 addi.w $a5, $s2, 1 beq $s2, $s8, .LBB0_30 @@ -760,58 +752,58 @@ suboptalign11: # @suboptalign11 # Child Loop BB0_50 Depth 3 move $s2, $a5 addi.d $a2, $a2, 4 - fld.s $fa6, $ra, 0 + fld.s $fa7, $ra, 0 st.w $s3, $a6, 4 addi.d $a6, $a6, 4 addi.d $a5, $a5, -1 - fadd.s $fa5, $fa2, $fs1 - fcmp.clt.s $fcc0, $fa6, $fa5 - fsel $fa5, $fa6, $fa5, $fcc0 + fadd.s $fa6, $fa3, $fs1 + fcmp.clt.s $fcc0, $fa7, $fa6 + fsel $fa6, $fa7, $fa6, $fcc0 movcf2gr $a4, $fcc0 - fld.s $fa7, $t2, 4 + fld.s $ft0, $t2, 4 masknez $s0, $a5, $a4 maskeqz $a4, $t0, $a4 or $a4, $a4, $s0 - fadd.s $ft0, $fa7, $fs1 - fcmp.cule.s $fcc0, $ft0, $fa5 + fadd.s $ft1, $ft0, $fs1 + fcmp.cule.s $fcc0, $ft1, $fa6 st.w $a4, $s4, 4 bcnez $fcc0, .LBB0_38 # %bb.37: # in Loop: Header=BB0_36 Depth=2 ld.w $a4, $a2, 0 st.w $a4, $a6, 0 - fmov.s $fa5, $ft0 + fmov.s $fa6, $ft1 .LBB0_38: # in Loop: Header=BB0_36 Depth=2 - fcmp.cule.s $fcc0, $fa6, $fa7 + fcmp.cule.s $fcc0, $fa7, $ft0 bcnez $fcc0, .LBB0_40 # %bb.39: # in Loop: Header=BB0_36 Depth=2 st.w $s3, $a2, 0 - fmov.s $fa7, $fa6 + fmov.s $ft0, $fa7 .LBB0_40: # in Loop: Header=BB0_36 Depth=2 addi.d $t2, $t2, 4 addi.d $s4, $s4, 4 - fadd.s $ft0, $fa7, $fs3 - fadd.s $fa7, $fa3, $fs2 - fcmp.cule.s $fcc0, $fa7, $fa5 - fst.s $ft0, $t2, 0 + fadd.s $ft1, $ft0, $fs3 + fadd.s $ft0, $fa5, $fs2 + fcmp.cule.s $fcc0, $ft0, $fa6 + fst.s $ft1, $t2, 0 bcnez $fcc0, .LBB0_42 # %bb.41: # in Loop: Header=BB0_36 Depth=2 st.w $a7, $a6, 0 st.w $a0, $s4, 0 - fmov.s $fa5, $fa7 + fmov.s $fa6, $ft0 .LBB0_42: # in Loop: Header=BB0_36 Depth=2 addi.d $t4, $t4, 4 - fld.s $ft0, $t4, 0 - fcmp.clt.s $fcc0, $fa3, $fa4 - fsel $fa3, $fa3, $fa4, $fcc0 - fcmp.cule.s $fcc1, $ft0, $fa3 + fld.s $ft1, $t4, 0 + fcmp.clt.s $fcc0, $fa5, $fa4 + fsel $fa5, $fa5, $fa4, $fcc0 + fcmp.cule.s $fcc1, $ft1, $fa5 addi.d $t7, $t7, 4 bcnez $fcc1, .LBB0_44 # %bb.43: # in Loop: Header=BB0_36 Depth=2 ld.w $a7, $t7, 0 - fmov.s $fa3, $ft0 + fmov.s $fa5, $ft1 move $a0, $a5 - fld.s $fa7, $ra, 0 - fcmp.cule.s $fcc0, $fa7, $ft0 + fld.s $ft0, $ra, 0 + fcmp.cule.s $fcc0, $ft0, $ft1 bceqz $fcc0, .LBB0_45 b .LBB0_46 .p2align 4, , 16 @@ -823,12 +815,12 @@ suboptalign11: # @suboptalign11 masknez $a0, $a0, $a4 maskeqz $a4, $t3, $a4 or $a0, $a4, $a0 - fld.s $fa7, $ra, 0 - fcmp.cule.s $fcc0, $fa7, $ft0 + fld.s $ft0, $ra, 0 + fcmp.cule.s $fcc0, $ft0, $ft1 bcnez $fcc0, .LBB0_46 .LBB0_45: # in Loop: Header=BB0_36 Depth=2 - fst.s $fa7, $t4, 0 - fld.s $fa7, $ra, 0 + fst.s $ft0, $t4, 0 + fld.s $ft0, $ra, 0 st.w $s3, $t7, 0 .LBB0_46: # in Loop: Header=BB0_36 Depth=2 blt $t8, $fp, .LBB0_48 @@ -837,9 +829,9 @@ suboptalign11: # @suboptalign11 add.d $s0, $t1, $a4 stx.w $t6, $t1, $a4 st.w $s2, $s0, 4 - fst.s $fa5, $s0, 8 - fcmp.clt.s $fcc0, $fa5, $fa1 - fsel $fa1, $fa1, $fa5, $fcc0 + fst.s $fa6, $s0, 8 + fcmp.clt.s $fcc0, $fa6, $fa2 + fsel $fa2, $fa2, $fa6, $fcc0 movcf2gr $a4, $fcc0 masknez $s0, $s1, $a4 maskeqz $a4, $fp, $a4 @@ -848,7 +840,7 @@ suboptalign11: # @suboptalign11 b .LBB0_55 .p2align 4, , 16 .LBB0_48: # in Loop: Header=BB0_36 Depth=2 - fcmp.cule.s $fcc0, $fa5, $fa1 + fcmp.cule.s $fcc0, $fa6, $fa2 bcnez $fcc0, .LBB0_55 # %bb.49: # in Loop: Header=BB0_36 Depth=2 move $a4, $zero @@ -856,22 +848,22 @@ suboptalign11: # @suboptalign11 add.d $s8, $t1, $s0 stx.w $t6, $t1, $s0 st.w $s2, $s8, 4 - fst.s $fa5, $s8, 8 + fst.s $fa6, $s8, 8 ld.d $s0, $sp, 272 # 8-byte Folded Reload .p2align 4, , 16 .LBB0_50: # Parent Loop BB0_31 Depth=1 # Parent Loop BB0_36 Depth=2 # => This Inner Loop Header: Depth=3 add.d $s8, $t5, $s0 - fldx.s $fa1, $s8, $s7 - fcmp.clt.s $fcc0, $fa1, $fa5 + fldx.s $fa2, $s8, $s7 + fcmp.clt.s $fcc0, $fa2, $fa6 bcnez $fcc0, .LBB0_53 # %bb.51: # in Loop: Header=BB0_50 Depth=3 addi.d $s0, $s0, 32 addi.w $a4, $a4, 1 bnez $s0, .LBB0_50 # %bb.52: # in Loop: Header=BB0_36 Depth=2 - fmov.s $fa1, $fa5 + fmov.s $fa2, $fa6 b .LBB0_54 .LBB0_53: # in Loop: Header=BB0_36 Depth=2 move $s1, $a4 @@ -880,8 +872,8 @@ suboptalign11: # @suboptalign11 ld.d $s8, $sp, 280 # 8-byte Folded Reload .LBB0_55: # %.loopexit384 # in Loop: Header=BB0_36 Depth=2 - fcmp.cule.s $fcc0, $fs4, $fa5 - fmov.s $fs0, $fa5 + fcmp.cule.s $fcc0, $fs4, $fa6 + fmov.s $fs0, $fa6 bcnez $fcc0, .LBB0_35 # %bb.56: # in Loop: Header=BB0_36 Depth=2 st.w $a3, $a6, 0 @@ -1068,8 +1060,11 @@ suboptalign11: # @suboptalign11 lu12i.w $s2, 1220 blez $fp, .LBB0_115 # %bb.78: # %.lr.ph474.preheader - pcalau12i $a0, %pc_hi20(.LCPI0_4) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_4) + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 209715 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(.L.str.13) addi.d $a0, $a0, %pc_lo12(.L.str.13) st.d $a0, $sp, 256 # 8-byte Folded Spill @@ -1083,56 +1078,56 @@ suboptalign11: # @suboptalign11 st.d $s8, $sp, 248 # 8-byte Folded Spill b .LBB0_89 .LBB0_79: # %vector.memcheck - sub.d $t1, $a2, $a0 - ori $t0, $zero, 32 - bltu $t1, $t0, .LBB0_24 + sub.d $t0, $a2, $a0 + ori $a7, $zero, 32 + bltu $t0, $a7, .LBB0_24 # %bb.80: # %vector.memcheck - sub.d $t1, $a0, $a6 - addi.d $t1, $t1, 4 - bltu $t1, $t0, .LBB0_24 + sub.d $t0, $a0, $s2 + addi.d $t0, $t0, 4 + bltu $t0, $a7, .LBB0_24 # %bb.81: # %vector.memcheck - sub.d $t0, $a2, $a6 - addi.d $t1, $t0, 4 - ori $t0, $zero, 32 - bltu $t1, $t0, .LBB0_24 + sub.d $a7, $a2, $s2 + addi.d $t0, $a7, 4 + ori $a7, $zero, 32 + bltu $t0, $a7, .LBB0_24 # %bb.82: # %vector.memcheck - sub.d $t1, $a3, $a1 - bltu $t1, $t0, .LBB0_24 + sub.d $t0, $a3, $a1 + bltu $t0, $a7, .LBB0_24 # %bb.83: # %vector.ph624 - move $t0, $a7 - bstrins.d $t0, $zero, 2, 0 - ori $t1, $zero, 1 - move $a5, $a7 - bstrins.d $a5, $t1, 2, 0 - addi.d $t1, $a6, 16 - addi.d $t2, $a0, 20 - addi.d $t3, $a3, 20 - addi.d $t4, $a1, 20 - addi.d $t5, $a2, 20 + move $a7, $a6 + bstrins.d $a7, $zero, 2, 0 + ori $t0, $zero, 1 + move $a5, $a6 + bstrins.d $a5, $t0, 2, 0 + addi.d $t0, $s2, 16 + addi.d $t1, $a0, 20 + addi.d $t2, $a3, 20 + addi.d $t3, $a1, 20 + addi.d $t4, $a2, 20 vrepli.b $vr0, 0 - move $t6, $t0 + move $t5, $a7 .p2align 4, , 16 .LBB0_84: # %vector.body625 # =>This Inner Loop Header: Depth=1 - vld $vr1, $t1, -16 - vld $vr2, $t1, 0 - vst $vr1, $t2, -16 - vst $vr2, $t2, 0 - vst $vr0, $t4, -16 - vst $vr0, $t4, 0 - vst $vr1, $t5, -16 - vst $vr2, $t5, 0 + vld $vr1, $t0, -16 + vld $vr2, $t0, 0 + vst $vr1, $t1, -16 + vst $vr2, $t1, 0 vst $vr0, $t3, -16 vst $vr0, $t3, 0 - addi.d $t6, $t6, -8 + vst $vr1, $t4, -16 + vst $vr2, $t4, 0 + vst $vr0, $t2, -16 + vst $vr0, $t2, 0 + addi.d $t5, $t5, -8 + addi.d $t0, $t0, 32 addi.d $t1, $t1, 32 addi.d $t2, $t2, 32 addi.d $t3, $t3, 32 addi.d $t4, $t4, 32 - addi.d $t5, $t5, 32 - bnez $t6, .LBB0_84 + bnez $t5, .LBB0_84 # %bb.85: # %middle.block629 - bne $a7, $t0, .LBB0_24 + bne $a6, $a7, .LBB0_24 b .LBB0_26 .LBB0_86: # %.thread109.i # in Loop: Header=BB0_89 Depth=1 diff --git a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/tddis.s b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/tddis.s index a0d71534..aed8c06c 100644 --- a/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/tddis.s +++ b/results/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/tddis.s @@ -65,12 +65,7 @@ mdfymtx: # @mdfymtx .Lfunc_end0: .size mdfymtx, .Lfunc_end0-mdfymtx # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function score_calc -.LCPI1_0: - .dword 0x7ff8000000000000 # double NaN - .text - .globl score_calc + .globl score_calc # -- Begin function score_calc .p2align 5 .type score_calc,@function score_calc: # @score_calc @@ -219,8 +214,10 @@ score_calc: # @score_calc movgr2fr.d $fa0, $zero b .LBB1_20 .LBB1_18: - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 2047 + movgr2fr.d $fa0, $a0 b .LBB1_20 .LBB1_19: # %._crit_edge.loopexit fcvt.d.s $fa0, $fa0 diff --git a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jcdctmgr.s b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jcdctmgr.s index 141ca95c..197891b8 100644 --- a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jcdctmgr.s +++ b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jcdctmgr.s @@ -75,22 +75,7 @@ jinit_forward_dct: # @jinit_forward_dct .Lfunc_end0: .size jinit_forward_dct, .Lfunc_end0-jinit_forward_dct # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function start_pass_fdctmgr -.LCPI1_0: - .dword 0x3ff63150b14861ef # double 1.3870398450000001 -.LCPI1_1: - .dword 0x3ff4e7ae914d6fca # double 1.3065629649999999 -.LCPI1_2: - .dword 0x3ff2d062ef6c11aa # double 1.1758756020000001 -.LCPI1_3: - .dword 0x3fe92469c0a7bf3b # double 0.785694958 -.LCPI1_4: - .dword 0x3fe1517a7bc720bb # double 0.54119609999999996 -.LCPI1_5: - .dword 0x3fd1a855de72ab5d # double 0.275899379 - .text - .p2align 5 + .p2align 5 # -- Begin function start_pass_fdctmgr .type start_pass_fdctmgr,@function start_pass_fdctmgr: # @start_pass_fdctmgr # %bb.0: @@ -126,18 +111,36 @@ start_pass_fdctmgr: # @start_pass_fdctmgr addi.d $s8, $a0, %pc_lo12(start_pass_fdctmgr.aanscalefactor) move $s5, $zero vldi $vr5, -992 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI1_0) - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI1_1) - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI1_2) - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI1_3) - pcalau12i $a0, %pc_hi20(.LCPI1_4) - fld.d $fs4, $a0, %pc_lo12(.LCPI1_4) - pcalau12i $a0, %pc_hi20(.LCPI1_5) - fld.d $fs5, $a0, %pc_lo12(.LCPI1_5) + lu12i.w $a0, -322426 + ori $a0, $a0, 495 + lu32i.d $a0, 405840 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs0, $a0 + lu12i.w $a0, -453418 + ori $a0, $a0, 4042 + lu32i.d $a0, 321454 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -67903 + ori $a0, $a0, 426 + lu32i.d $a0, 184418 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -259461 + ori $a0, $a0, 3899 + lu32i.d $a0, -449431 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, 506994 + ori $a0, $a0, 187 + lu32i.d $a0, 86394 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -137430 + ori $a0, $a0, 2909 + lu32i.d $a0, 108629 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fs5, $a0 ori $s2, $zero, 64 vrepli.b $vr6, 0 ori $a0, $zero, 1024 diff --git a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jddctmgr.s b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jddctmgr.s index 6aaa75ce..40bba127 100644 --- a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jddctmgr.s +++ b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jddctmgr.s @@ -63,22 +63,7 @@ jinit_inverse_dct: # @jinit_inverse_dct .Lfunc_end0: .size jinit_inverse_dct, .Lfunc_end0-jinit_inverse_dct # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function start_pass -.LCPI1_0: - .dword 0x3ff63150b14861ef # double 1.3870398450000001 -.LCPI1_1: - .dword 0x3ff4e7ae914d6fca # double 1.3065629649999999 -.LCPI1_2: - .dword 0x3ff2d062ef6c11aa # double 1.1758756020000001 -.LCPI1_3: - .dword 0x3fe92469c0a7bf3b # double 0.785694958 -.LCPI1_4: - .dword 0x3fe1517a7bc720bb # double 0.54119609999999996 -.LCPI1_5: - .dword 0x3fd1a855de72ab5d # double 0.275899379 - .text - .p2align 5 + .p2align 5 # -- Begin function start_pass .type start_pass,@function start_pass: # @start_pass # %bb.0: @@ -111,19 +96,37 @@ start_pass: # @start_pass pcalau12i $a0, %got_pc_hi20(jpeg_idct_1x1) ld.d $s3, $a0, %got_pc_lo12(jpeg_idct_1x1) ori $a7, $zero, 7 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI1_0) - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI1_1) - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI1_2) - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI1_3) - pcalau12i $a0, %pc_hi20(.LCPI1_4) - fld.d $fs4, $a0, %pc_lo12(.LCPI1_4) - pcalau12i $a0, %pc_hi20(.LCPI1_5) - fld.d $fs5, $a0, %pc_lo12(.LCPI1_5) vrepli.b $vr5, 0 + lu12i.w $a0, -322426 + ori $a0, $a0, 495 + lu32i.d $a0, 405840 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs0, $a0 + lu12i.w $a0, -453418 + ori $a0, $a0, 4042 + lu32i.d $a0, 321454 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -67903 + ori $a0, $a0, 426 + lu32i.d $a0, 184418 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -259461 + ori $a0, $a0, 3899 + lu32i.d $a0, -449431 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, 506994 + ori $a0, $a0, 187 + lu32i.d $a0, 86394 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -137430 + ori $a0, $a0, 2909 + lu32i.d $a0, 108629 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(start_pass.aanscales) addi.d $s7, $a0, %pc_lo12(start_pass.aanscales) ori $a0, $zero, 2048 diff --git a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jidctflt.s b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jidctflt.s index a7f52a72..3eef6b76 100644 --- a/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jidctflt.s +++ b/results/MultiSource/Benchmarks/mediabench/jpeg/jpeg-6a/CMakeFiles/cjpeg.dir/jidctflt.s @@ -1,16 +1,6 @@ .file "jidctflt.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function jpeg_idct_float -.LCPI0_0: - .word 0x3fb504f3 # float 1.41421354 -.LCPI0_1: - .word 0x3fec835e # float 1.84775901 -.LCPI0_2: - .word 0x3f8a8bd4 # float 1.08239222 -.LCPI0_3: - .word 0xc0273d75 # float -2.61312604 .text - .globl jpeg_idct_float + .globl jpeg_idct_float # -- Begin function jpeg_idct_float .p2align 5 .type jpeg_idct_float,@function jpeg_idct_float: # @jpeg_idct_float @@ -20,17 +10,22 @@ jpeg_idct_float: # @jpeg_idct_float ld.d $a0, $a0, 408 addi.d $a1, $a2, 64 addi.d $a2, $a5, 128 - pcalau12i $a5, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a5, %pc_lo12(.LCPI0_0) - pcalau12i $a5, %pc_hi20(.LCPI0_1) - fld.s $fa1, $a5, %pc_lo12(.LCPI0_1) - pcalau12i $a5, %pc_hi20(.LCPI0_2) - fld.s $fa2, $a5, %pc_lo12(.LCPI0_2) - pcalau12i $a5, %pc_hi20(.LCPI0_3) - fld.s $fa3, $a5, %pc_lo12(.LCPI0_3) addi.d $a5, $sp, 16 ori $a6, $zero, 9 ori $a7, $zero, 1 + lu12i.w $t0, 260944 + ori $t0, $t0, 1267 + movgr2fr.w $fa0, $t0 + lu12i.w $t0, 261832 + ori $t0, $t0, 862 + movgr2fr.w $fa1, $t0 + lu12i.w $t0, 260264 + ori $t0, $t0, 3028 + movgr2fr.w $fa2, $t0 + lu12i.w $t0, -261517 + ori $t0, $t0, 3445 + lu32i.d $t0, 0 + movgr2fr.w $fa3, $t0 b .LBB0_3 .p2align 4, , 16 .LBB0_1: # in Loop: Header=BB0_3 Depth=1 diff --git a/results/MultiSource/Benchmarks/mediabench/mpeg2/mpeg2dec/CMakeFiles/mpeg2decode.dir/gethdr.s b/results/MultiSource/Benchmarks/mediabench/mpeg2/mpeg2dec/CMakeFiles/mpeg2decode.dir/gethdr.s index dcca2976..85829fbd 100644 --- a/results/MultiSource/Benchmarks/mediabench/mpeg2/mpeg2dec/CMakeFiles/mpeg2decode.dir/gethdr.s +++ b/results/MultiSource/Benchmarks/mediabench/mpeg2/mpeg2dec/CMakeFiles/mpeg2decode.dir/gethdr.s @@ -877,27 +877,23 @@ marker_bit: # @marker_bit .Lfunc_end3: .size marker_bit, .Lfunc_end3-marker_bit # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function extension_and_user_data -.LCPI4_0: - .dword 0x4079000000000000 # double 400 - .text - .p2align 5 + .p2align 5 # -- Begin function extension_and_user_data .type extension_and_user_data,@function extension_and_user_data: # @extension_and_user_data # %bb.0: - addi.d $sp, $sp, -256 - st.d $ra, $sp, 248 # 8-byte Folded Spill - st.d $fp, $sp, 240 # 8-byte Folded Spill - st.d $s0, $sp, 232 # 8-byte Folded Spill - st.d $s1, $sp, 224 # 8-byte Folded Spill - st.d $s2, $sp, 216 # 8-byte Folded Spill - st.d $s3, $sp, 208 # 8-byte Folded Spill - st.d $s4, $sp, 200 # 8-byte Folded Spill - st.d $s5, $sp, 192 # 8-byte Folded Spill - st.d $s6, $sp, 184 # 8-byte Folded Spill - st.d $s7, $sp, 176 # 8-byte Folded Spill - st.d $s8, $sp, 168 # 8-byte Folded Spill + addi.d $sp, $sp, -272 + st.d $ra, $sp, 264 # 8-byte Folded Spill + st.d $fp, $sp, 256 # 8-byte Folded Spill + st.d $s0, $sp, 248 # 8-byte Folded Spill + st.d $s1, $sp, 240 # 8-byte Folded Spill + st.d $s2, $sp, 232 # 8-byte Folded Spill + st.d $s3, $sp, 224 # 8-byte Folded Spill + st.d $s4, $sp, 216 # 8-byte Folded Spill + st.d $s5, $sp, 208 # 8-byte Folded Spill + st.d $s6, $sp, 200 # 8-byte Folded Spill + st.d $s7, $sp, 192 # 8-byte Folded Spill + st.d $s8, $sp, 184 # 8-byte Folded Spill + fst.d $fs0, $sp, 176 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(ld) ld.d $s4, $a0, %got_pc_lo12(ld) ld.d $a0, $s4, 0 @@ -925,62 +921,66 @@ extension_and_user_data: # @extension_and_user_data ori $s0, $zero, 434 pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) - st.d $a0, $sp, 16 # 8-byte Folded Spill + st.d $a0, $sp, 24 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.2) addi.d $a0, $a0, %pc_lo12(.L.str.2) - st.d $a0, $sp, 8 # 8-byte Folded Spill + st.d $a0, $sp, 16 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.LJTI4_0) addi.d $a0, $a0, %pc_lo12(.LJTI4_0) - st.d $a0, $sp, 160 # 8-byte Folded Spill + st.d $a0, $sp, 168 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(layer_id) ld.d $a0, $a0, %got_pc_lo12(layer_id) - st.d $a0, $sp, 152 # 8-byte Folded Spill + st.d $a0, $sp, 160 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(profile_and_level_indication) ld.d $a0, $a0, %got_pc_lo12(profile_and_level_indication) - st.d $a0, $sp, 128 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(progressive_sequence) ld.d $a0, $a0, %got_pc_lo12(progressive_sequence) - st.d $a0, $sp, 144 # 8-byte Folded Spill + st.d $a0, $sp, 152 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(chroma_format) ld.d $a0, $a0, %got_pc_lo12(chroma_format) - st.d $a0, $sp, 120 # 8-byte Folded Spill + st.d $a0, $sp, 128 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(low_delay) ld.d $a0, $a0, %got_pc_lo12(low_delay) - st.d $a0, $sp, 112 # 8-byte Folded Spill + st.d $a0, $sp, 120 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(frame_rate_extension_n) ld.d $a0, $a0, %got_pc_lo12(frame_rate_extension_n) - st.d $a0, $sp, 104 # 8-byte Folded Spill + st.d $a0, $sp, 112 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(frame_rate_extension_d) ld.d $a0, $a0, %got_pc_lo12(frame_rate_extension_d) - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 104 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(frame_rate_code) ld.d $a0, $a0, %got_pc_lo12(frame_rate_code) - st.d $a0, $sp, 88 # 8-byte Folded Spill + st.d $a0, $sp, 96 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(frame_rate_Table) addi.d $a0, $a0, %pc_lo12(frame_rate_Table) - st.d $a0, $sp, 80 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(frame_rate) ld.d $a0, $a0, %got_pc_lo12(frame_rate) - st.d $a0, $sp, 72 # 8-byte Folded Spill + st.d $a0, $sp, 80 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(profile) ld.d $a0, $a0, %got_pc_lo12(profile) - st.d $a0, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(level) ld.d $a0, $a0, %got_pc_lo12(level) - st.d $a0, $sp, 24 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(horizontal_size) ld.d $a0, $a0, %got_pc_lo12(horizontal_size) - st.d $a0, $sp, 64 # 8-byte Folded Spill + st.d $a0, $sp, 72 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(vertical_size) ld.d $a0, $a0, %got_pc_lo12(vertical_size) - st.d $a0, $sp, 56 # 8-byte Folded Spill + st.d $a0, $sp, 64 # 8-byte Folded Spill pcalau12i $a0, %got_pc_hi20(bit_rate_value) ld.d $a0, $a0, %got_pc_lo12(bit_rate_value) - st.d $a0, $sp, 48 # 8-byte Folded Spill + st.d $a0, $sp, 56 # 8-byte Folded Spill + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fs0, $a0 ori $s8, $zero, 64 lu12i.w $a0, 3 ori $a0, $a0, 4095 - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 48 # 8-byte Folded Spill .p2align 4, , 16 .LBB4_3: # %next_start_code.exit # =>This Loop Header: Depth=1 @@ -1011,69 +1011,69 @@ extension_and_user_data: # @extension_and_user_data bltu $a1, $a0, .LBB4_53 # %bb.6: # in Loop: Header=BB4_3 Depth=1 slli.d $a0, $a0, 2 - ld.d $a1, $sp, 160 # 8-byte Folded Reload + ld.d $a1, $sp, 168 # 8-byte Folded Reload ldx.w $a0, $a1, $a0 add.d $a0, $a1, $a0 jr $a0 .LBB4_7: # in Loop: Header=BB4_3 Depth=1 ld.d $a0, $s4, 0 stptr.d $s6, $a0, 3144 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 160 # 8-byte Folded Reload st.w $zero, $a0, 0 ori $a0, $zero, 8 pcaddu18i $ra, %call36(Get_Bits) jirl $ra, $ra, 0 - ld.d $fp, $sp, 128 # 8-byte Folded Reload + ld.d $fp, $sp, 136 # 8-byte Folded Reload st.w $a0, $fp, 0 ori $a0, $zero, 1 pcaddu18i $ra, %call36(Get_Bits) jirl $ra, $ra, 0 - ld.d $a1, $sp, 144 # 8-byte Folded Reload + ld.d $a1, $sp, 152 # 8-byte Folded Reload st.w $a0, $a1, 0 ori $a0, $zero, 2 pcaddu18i $ra, %call36(Get_Bits) jirl $ra, $ra, 0 - ld.d $a1, $sp, 120 # 8-byte Folded Reload + ld.d $a1, $sp, 128 # 8-byte Folded Reload st.w $a0, $a1, 0 ori $a0, $zero, 2 pcaddu18i $ra, %call36(Get_Bits) jirl $ra, $ra, 0 - move $s2, $a0 + move $s1, $a0 ori $a0, $zero, 2 pcaddu18i $ra, %call36(Get_Bits) jirl $ra, $ra, 0 - move $s1, $a0 + move $s0, $a0 ori $a0, $zero, 12 pcaddu18i $ra, %call36(Get_Bits) jirl $ra, $ra, 0 - move $s3, $a0 + move $s2, $a0 ori $a0, $zero, 1 pcaddu18i $ra, %call36(Get_Bits) jirl $ra, $ra, 0 ori $a0, $zero, 8 pcaddu18i $ra, %call36(Get_Bits) jirl $ra, $ra, 0 - move $s0, $a0 + move $s3, $a0 ori $a0, $zero, 1 pcaddu18i $ra, %call36(Get_Bits) jirl $ra, $ra, 0 - ld.d $a1, $sp, 112 # 8-byte Folded Reload + ld.d $a1, $sp, 120 # 8-byte Folded Reload st.w $a0, $a1, 0 ori $a0, $zero, 2 pcaddu18i $ra, %call36(Get_Bits) jirl $ra, $ra, 0 - ld.d $s5, $sp, 104 # 8-byte Folded Reload + ld.d $s5, $sp, 112 # 8-byte Folded Reload st.w $a0, $s5, 0 ori $a0, $zero, 5 pcaddu18i $ra, %call36(Get_Bits) jirl $ra, $ra, 0 - ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a1, $sp, 96 # 8-byte Folded Reload ld.w $a1, $a1, 0 ld.w $a2, $s5, 0 - ld.d $a3, $sp, 96 # 8-byte Folded Reload + ld.d $a3, $sp, 104 # 8-byte Folded Reload st.w $a0, $a3, 0 slli.d $a1, $a1, 3 - ld.d $a3, $sp, 80 # 8-byte Folded Reload + ld.d $a3, $sp, 88 # 8-byte Folded Reload fldx.d $fa0, $a3, $a1 addi.w $a1, $a2, 1 addi.w $a0, $a0, 1 @@ -1082,14 +1082,14 @@ extension_and_user_data: # @extension_and_user_data ld.w $a1, $fp, 0 ffint.d.w $fa1, $fa1 fmul.d $fa0, $fa0, $fa1 - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload fst.d $fa0, $a0, 0 andi $a2, $a1, 128 andi $a0, $a1, 15 bnez $a2, .LBB4_41 # %bb.8: # in Loop: Header=BB4_3 Depth=1 srli.d $a1, $a1, 4 - ld.d $a2, $sp, 32 # 8-byte Folded Reload + ld.d $a2, $sp, 40 # 8-byte Folded Reload st.w $a1, $a2, 0 b .LBB4_43 .p2align 4, , 16 @@ -1125,7 +1125,7 @@ extension_and_user_data: # @extension_and_user_data jirl $ra, $ra, 0 ld.d $a1, $s4, 0 ldptr.w $a1, $a1, 3148 - ld.d $a2, $sp, 152 # 8-byte Folded Reload + ld.d $a2, $sp, 160 # 8-byte Folded Reload st.w $a0, $a2, 0 ori $a0, $zero, 2 bne $a1, $a0, .LBB4_14 @@ -1196,7 +1196,7 @@ extension_and_user_data: # @extension_and_user_data ori $a0, $zero, 15 pcaddu18i $ra, %call36(Get_Bits) jirl $ra, $ra, 0 - ld.d $fp, $sp, 40 # 8-byte Folded Reload + ld.d $fp, $sp, 48 # 8-byte Folded Reload slt $a1, $fp, $a0 lu12i.w $s0, -8 add.d $a2, $a0, $s0 @@ -1243,7 +1243,7 @@ extension_and_user_data: # @extension_and_user_data ld.d $a0, $s4, 0 ori $a1, $zero, 2100 ldx.w $a0, $a0, $a1 - st.d $a0, $sp, 136 # 8-byte Folded Spill + st.d $a0, $sp, 144 # 8-byte Folded Spill ori $a0, $zero, 1 pcaddu18i $ra, %call36(Get_Bits) jirl $ra, $ra, 0 @@ -1297,7 +1297,7 @@ extension_and_user_data: # @extension_and_user_data ld.w $a0, $fp, 0 blez $a0, .LBB4_51 # %bb.18: # in Loop: Header=BB4_3 Depth=1 - ld.d $a0, $sp, 136 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload srai.d $a0, $a0, 3 addi.d $a1, $a0, -4 pcalau12i $a0, %pc_hi20(.L.str.15) @@ -1572,7 +1572,7 @@ extension_and_user_data: # @extension_and_user_data bne $s0, $s8, .LBB4_33 b .LBB4_51 .LBB4_34: # in Loop: Header=BB4_3 Depth=1 - ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.w $a0, $a0, 0 beqz $a0, .LBB4_45 # %bb.35: # in Loop: Header=BB4_3 Depth=1 @@ -1645,38 +1645,36 @@ extension_and_user_data: # @extension_and_user_data ori $a1, $zero, 5 bne $a0, $a1, .LBB4_44 # %bb.42: # in Loop: Header=BB4_3 Depth=1 - ld.d $a0, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 40 # 8-byte Folded Reload ori $a1, $zero, 133 st.w $a1, $a0, 0 ori $a0, $zero, 8 .LBB4_43: # %.sink.split.i # in Loop: Header=BB4_3 Depth=1 - ld.d $a1, $sp, 24 # 8-byte Folded Reload + ld.d $a1, $sp, 32 # 8-byte Folded Reload st.w $a0, $a1, 0 .LBB4_44: # %sequence_extension.exit # in Loop: Header=BB4_3 Depth=1 - ld.d $a2, $sp, 64 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload ld.wu $a0, $a2, 0 - ld.d $a3, $sp, 56 # 8-byte Folded Reload + ld.d $a3, $sp, 64 # 8-byte Folded Reload ld.wu $a1, $a3, 0 - bstrins.d $a0, $s2, 63, 12 + bstrins.d $a0, $s1, 63, 12 st.w $a0, $a2, 0 - ld.d $a2, $sp, 48 # 8-byte Folded Reload + ld.d $a2, $sp, 56 # 8-byte Folded Reload ld.w $a0, $a2, 0 - bstrins.d $a1, $s1, 63, 12 + bstrins.d $a1, $s0, 63, 12 st.w $a1, $a3, 0 - slli.d $a1, $s3, 18 + slli.d $a1, $s2, 18 add.d $a0, $a0, $a1 - pcalau12i $a1, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI4_0) st.w $a0, $a2, 0 - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + fmul.d $fa0, $fa0, $fs0 pcalau12i $a0, %got_pc_hi20(bit_rate) ld.d $a0, $a0, %got_pc_lo12(bit_rate) fst.d $fa0, $a0, 0 - slli.d $a0, $s0, 10 + slli.d $a0, $s3, 10 pcalau12i $a1, %got_pc_hi20(vbv_buffer_size) ld.d $a1, $a1, %got_pc_lo12(vbv_buffer_size) ld.w $a2, $a1, 0 @@ -1753,25 +1751,26 @@ extension_and_user_data: # @extension_and_user_data bne $a0, $s6, .LBB4_52 b .LBB4_3 .LBB4_53: # in Loop: Header=BB4_3 Depth=1 - ld.d $a0, $sp, 16 # 8-byte Folded Reload + ld.d $a0, $sp, 24 # 8-byte Folded Reload ld.d $a0, $a0, 0 - ld.d $a1, $sp, 8 # 8-byte Folded Reload + ld.d $a1, $sp, 16 # 8-byte Folded Reload pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 b .LBB4_51 .LBB4_54: - ld.d $s8, $sp, 168 # 8-byte Folded Reload - ld.d $s7, $sp, 176 # 8-byte Folded Reload - ld.d $s6, $sp, 184 # 8-byte Folded Reload - ld.d $s5, $sp, 192 # 8-byte Folded Reload - ld.d $s4, $sp, 200 # 8-byte Folded Reload - ld.d $s3, $sp, 208 # 8-byte Folded Reload - ld.d $s2, $sp, 216 # 8-byte Folded Reload - ld.d $s1, $sp, 224 # 8-byte Folded Reload - ld.d $s0, $sp, 232 # 8-byte Folded Reload - ld.d $fp, $sp, 240 # 8-byte Folded Reload - ld.d $ra, $sp, 248 # 8-byte Folded Reload - addi.d $sp, $sp, 256 + fld.d $fs0, $sp, 176 # 8-byte Folded Reload + ld.d $s8, $sp, 184 # 8-byte Folded Reload + ld.d $s7, $sp, 192 # 8-byte Folded Reload + ld.d $s6, $sp, 200 # 8-byte Folded Reload + ld.d $s5, $sp, 208 # 8-byte Folded Reload + ld.d $s4, $sp, 216 # 8-byte Folded Reload + ld.d $s3, $sp, 224 # 8-byte Folded Reload + ld.d $s2, $sp, 232 # 8-byte Folded Reload + ld.d $s1, $sp, 240 # 8-byte Folded Reload + ld.d $s0, $sp, 248 # 8-byte Folded Reload + ld.d $fp, $sp, 256 # 8-byte Folded Reload + ld.d $ra, $sp, 264 # 8-byte Folded Reload + addi.d $sp, $sp, 272 ret .Lfunc_end4: .size extension_and_user_data, .Lfunc_end4-extension_and_user_data diff --git a/results/MultiSource/Benchmarks/nbench/CMakeFiles/nbench.dir/nbench1.s b/results/MultiSource/Benchmarks/nbench/CMakeFiles/nbench.dir/nbench1.s index 08d150d9..6aa4d763 100644 --- a/results/MultiSource/Benchmarks/nbench/CMakeFiles/nbench.dir/nbench1.s +++ b/results/MultiSource/Benchmarks/nbench/CMakeFiles/nbench.dir/nbench1.s @@ -1,10 +1,6 @@ .file "nbench1.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DoNumSort -.LCPI0_0: - .dword 0x4077700000000000 # double 375 .text - .globl DoNumSort + .globl DoNumSort # -- Begin function DoNumSort .p2align 5 .type DoNumSort,@function DoNumSort: # @DoNumSort @@ -46,10 +42,12 @@ DoNumSort: # @DoNumSort pcaddu18i $ra, %call36(ErrorExit) jirl $ra, $ra, 0 .LBB0_3: # %.loopexit - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_0) move $s0, $zero movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fs1, $a0 .p2align 4, , 16 .LBB0_4: # =>This Inner Loop Header: Depth=1 ld.d $a1, $s2, 32 @@ -407,12 +405,7 @@ DoNumSortIteration: # @DoNumSortIteration .Lfunc_end1: .size DoNumSortIteration, .Lfunc_end1-DoNumSortIteration # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DoStringSort -.LCPI2_0: - .dword 0x405f400000000000 # double 125 - .text - .globl DoStringSort + .globl DoStringSort # -- Begin function DoStringSort .p2align 5 .type DoStringSort,@function DoStringSort: # @DoStringSort @@ -448,11 +441,13 @@ DoStringSort: # @DoStringSort pcaddu18i $ra, %call36(ErrorExit) jirl $ra, $ra, 0 .LBB2_3: # %.loopexit - ld.hu $a1, $s1, 24 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI2_0) move $s0, $zero + ld.hu $a1, $s1, 24 movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs1, $a0 .p2align 4, , 16 .LBB2_4: # =>This Inner Loop Header: Depth=1 ld.d $a2, $s1, 32 @@ -1257,14 +1252,7 @@ DoStringSortIteration: # @DoStringSortIteration .Lfunc_end3: .size DoStringSortIteration, .Lfunc_end3-DoStringSortIteration # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DoBitops -.LCPI4_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI4_1: - .dword 0x413312d000000000 # double 1.25E+6 - .text - .globl DoBitops + .globl DoBitops # -- Begin function DoBitops .p2align 5 .type DoBitops,@function DoBitops: # @DoBitops @@ -1322,13 +1310,16 @@ DoBitops: # @DoBitops jirl $ra, $ra, 0 .LBB4_5: # %.loopexit move $s1, $zero - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI4_0) - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI4_1) movgr2fr.d $fs0, $zero lu52i.d $s3, $zero, 1107 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs1, $a0 lu12i.w $s4, 275200 + ori $a0, $zero, 0 + lu32i.d $a0, 201424 + lu52i.d $a0, $a0, 1043 + movgr2fr.d $fs2, $a0 .p2align 4, , 16 .LBB4_6: # =>This Inner Loop Header: Depth=1 ld.d $a2, $s2, 24 @@ -1617,14 +1608,7 @@ DoBitfieldIteration: # @DoBitfieldIteration .Lfunc_end5: .size DoBitfieldIteration, .Lfunc_end5-DoBitfieldIteration # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DoEmFloat -.LCPI6_0: - .dword 0x4052c00000000000 # double 75 -.LCPI6_1: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .globl DoEmFloat + .globl DoEmFloat # -- Begin function DoEmFloat .p2align 5 .type DoEmFloat,@function DoEmFloat: # @DoEmFloat @@ -1950,10 +1934,12 @@ DoEmFloat: # @DoEmFloat .LBB6_28: # %.thread st.d $s2, $s3, 24 .LBB6_29: # %.preheader - pcalau12i $a0, %pc_hi20(.LCPI6_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI6_0) move $s2, $zero movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + lu32i.d $a0, 180224 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fs1, $a0 .p2align 4, , 16 .LBB6_30: # =>This Inner Loop Header: Depth=1 ld.d $a3, $s3, 16 @@ -1983,12 +1969,13 @@ DoEmFloat: # @DoEmFloat jirl $ra, $ra, 0 ld.d $a0, $s3, 24 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI6_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI6_1) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa1, $a0 @@ -2019,14 +2006,7 @@ DoEmFloat: # @DoEmFloat .Lfunc_end6: .size DoEmFloat, .Lfunc_end6-DoEmFloat # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DoFourier -.LCPI7_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI7_1: - .dword 0x40c3880000000000 # double 1.0E+4 - .text - .globl DoFourier + .globl DoFourier # -- Begin function DoFourier .p2align 5 .type DoFourier,@function DoFourier: # @DoFourier @@ -2087,11 +2067,14 @@ DoFourier: # @DoFourier ld.d $a2, $s2, 16 movgr2fr.d $fs0, $zero lu52i.d $s3, $zero, 1107 - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI7_0) - pcalau12i $a0, %pc_hi20(.LCPI7_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI7_1) + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs1, $a0 lu12i.w $s4, 275200 + ori $a0, $zero, 0 + lu32i.d $a0, 231424 + lu52i.d $a0, $a0, 1036 + movgr2fr.d $fs2, $a0 .p2align 4, , 16 .LBB7_6: # =>This Inner Loop Header: Depth=1 move $a0, $fp @@ -2211,16 +2194,7 @@ DoFourier: # @DoFourier .Lfunc_end7: .size DoFourier, .Lfunc_end7-DoFourier # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DoFPUTransIteration -.LCPI8_0: - .dword 0x3f847ae147ae147b # double 0.01 -.LCPI8_1: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI8_2: - .dword 0x400921fb54442d18 # double 3.1415926535897931 - .text - .p2align 5 + .p2align 5 # -- Begin function DoFPUTransIteration .type DoFPUTransIteration,@function DoFPUTransIteration: # @DoFPUTransIteration # %bb.0: @@ -2253,9 +2227,12 @@ DoFPUTransIteration: # @DoFPUTransIteration vst $vr0, $sp, 16 # 16-byte Folded Spill pcaddu18i $ra, %call36(StartStopwatch) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI8_0) - fld.d $fs3, $a1, %pc_lo12(.LCPI8_0) move $s2, $a0 + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fs3, $a0 .p2align 4, , 16 .LBB8_1: # %thefunction.exit27.us.i # =>This Inner Loop Header: Depth=1 @@ -2285,12 +2262,16 @@ DoFPUTransIteration: # @DoFPUTransIteration # %bb.3: # %.lr.ph.preheader ori $s3, $zero, 1 lu52i.d $s4, $zero, 1107 - pcalau12i $a0, %pc_hi20(.LCPI8_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI8_1) + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fa0, $a0 fst.d $fa0, $sp, 16 # 8-byte Folded Spill lu12i.w $s5, 275200 - pcalau12i $a0, %pc_hi20(.LCPI8_2) - fld.d $fs5, $a0, %pc_lo12(.LCPI8_2) + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fs5, $a0 movgr2fr.d $fs6, $zero addi.w $s6, $zero, -198 .p2align 4, , 16 @@ -2418,12 +2399,7 @@ DoFPUTransIteration: # @DoFPUTransIteration .Lfunc_end8: .size DoFPUTransIteration, .Lfunc_end8-DoFPUTransIteration # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DoAssign -.LCPI9_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .globl DoAssign + .globl DoAssign # -- Begin function DoAssign .p2align 5 .type DoAssign,@function DoAssign: # @DoAssign @@ -2509,12 +2485,13 @@ DoAssign: # @DoAssign jirl $ra, $ra, 0 ld.d $a0, $s1, 16 srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI9_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI9_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa1, $a0 @@ -4227,14 +4204,7 @@ DoAssignIteration: # @DoAssignIteration .Lfunc_end10: .size DoAssignIteration, .Lfunc_end10-DoAssignIteration # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DoIDEA -.LCPI11_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI11_1: - .dword 0x409d4c0000000000 # double 1875 - .text - .globl DoIDEA + .globl DoIDEA # -- Begin function DoIDEA .p2align 5 .type DoIDEA,@function DoIDEA: # @DoIDEA @@ -5165,10 +5135,13 @@ DoIDEA: # @DoIDEA ld.hu $s3, $sp, 164 sub.d $s2, $s5, $s6 movgr2fr.d $fs0, $zero - pcalau12i $a0, %pc_hi20(.LCPI11_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI11_0) - pcalau12i $a0, %pc_hi20(.LCPI11_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI11_1) + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs1, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, -177152 + lu52i.d $a0, $a0, 1033 + movgr2fr.d $fs2, $a0 addi.w $s1, $zero, -8 move $s4, $a2 b .LBB11_130 @@ -5641,14 +5614,7 @@ DoIDEA: # @DoIDEA .Lfunc_end11: .size DoIDEA, .Lfunc_end11-DoIDEA # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DoHuffman -.LCPI12_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI12_1: - .dword 0x407f400000000000 # double 500 - .text - .globl DoHuffman + .globl DoHuffman # -- Begin function DoHuffman .p2align 5 .type DoHuffman,@function DoHuffman: # @DoHuffman @@ -5875,13 +5841,16 @@ DoHuffman: # @DoHuffman .LBB12_18: # %.loopexit move $s2, $zero ld.d $a4, $s1, 24 - pcalau12i $a0, %pc_hi20(.LCPI12_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI12_0) - pcalau12i $a0, %pc_hi20(.LCPI12_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI12_1) movgr2fr.d $fs0, $zero lu52i.d $fp, $zero, 1107 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs1, $a0 lu12i.w $s0, 275200 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fs2, $a0 .p2align 4, , 16 .LBB12_19: # =>This Inner Loop Header: Depth=1 ld.d $a3, $s1, 16 @@ -6437,16 +6406,7 @@ DoHuffIteration: # @DoHuffIteration .Lfunc_end13: .size DoHuffIteration, .Lfunc_end13-DoHuffIteration # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DoNNET -.LCPI14_0: - .dword 0x3feccccccccccccd # double 0.90000000000000002 -.LCPI14_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI14_2: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .globl DoNNET + .globl DoNNET # -- Begin function DoNNET .p2align 5 .type DoNNET,@function DoNNET: # @DoNNET @@ -6535,18 +6495,16 @@ DoNNET: # @DoNNET lu12i.w $a0, -209716 ori $a0, $a0, 3277 lu32i.d $a0, -209716 - lu52i.d $a0, $a0, 1022 - vreplgr2vr.d $vr0, $a0 + lu52i.d $s6, $a0, 1022 + vreplgr2vr.d $vr0, $s6 vst $vr0, $sp, 64 # 16-byte Folded Spill lu12i.w $a0, -419431 ori $a0, $a0, 2458 lu32i.d $a0, -419431 lu52i.d $a0, $a0, 1019 + st.d $a0, $sp, 80 # 8-byte Folded Spill vreplgr2vr.d $vr0, $a0 vst $vr0, $sp, 48 # 16-byte Folded Spill - pcalau12i $s6, %pc_hi20(.LCPI14_0) - pcalau12i $a0, %pc_hi20(.LCPI14_1) - st.d $a0, $sp, 80 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.72) addi.d $a0, $a0, %pc_lo12(.L.str.72) st.d $a0, $sp, 40 # 8-byte Folded Spill @@ -6942,7 +6900,7 @@ DoNNET: # @DoNNET .LBB14_76: # %scalar.ph # in Loop: Header=BB14_10 Depth=1 fld.d $fa1, $a0, 256 - fld.d $fa0, $s6, %pc_lo12(.LCPI14_0) + movgr2fr.d $fa0, $s6 fcmp.cle.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB14_79 b .LBB14_80 @@ -6956,12 +6914,12 @@ DoNNET: # @DoNNET # in Loop: Header=BB14_10 Depth=1 vstelm.d $vr0, $a0, 248, 1 fld.d $fa1, $a0, 256 - fld.d $fa0, $s6, %pc_lo12(.LCPI14_0) + movgr2fr.d $fa0, $s6 fcmp.cle.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB14_80 .LBB14_79: # in Loop: Header=BB14_10 Depth=1 ld.d $a1, $sp, 80 # 8-byte Folded Reload - fld.d $fa0, $a1, %pc_lo12(.LCPI14_1) + movgr2fr.d $fa0, $a1 fcmp.cult.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB14_81 .LBB14_80: # %.sink.split.i @@ -6970,12 +6928,12 @@ DoNNET: # @DoNNET .LBB14_81: # %scalar.ph.1 # in Loop: Header=BB14_10 Depth=1 fld.d $fa1, $a0, 264 - fld.d $fa0, $s6, %pc_lo12(.LCPI14_0) + movgr2fr.d $fa0, $s6 fcmp.cle.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB14_83 # %bb.82: # in Loop: Header=BB14_10 Depth=1 ld.d $a1, $sp, 80 # 8-byte Folded Reload - fld.d $fa0, $a1, %pc_lo12(.LCPI14_1) + movgr2fr.d $fa0, $a1 fcmp.cult.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB14_84 .LBB14_83: # %.sink.split.i.1 @@ -6984,12 +6942,12 @@ DoNNET: # @DoNNET .LBB14_84: # %scalar.ph.2 # in Loop: Header=BB14_10 Depth=1 fld.d $fa1, $a0, 272 - fld.d $fa0, $s6, %pc_lo12(.LCPI14_0) + movgr2fr.d $fa0, $s6 fcmp.cle.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB14_86 # %bb.85: # in Loop: Header=BB14_10 Depth=1 ld.d $a1, $sp, 80 # 8-byte Folded Reload - fld.d $fa0, $a1, %pc_lo12(.LCPI14_1) + movgr2fr.d $fa0, $a1 fcmp.cult.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB14_87 .LBB14_86: # %.sink.split.i.2 @@ -7095,9 +7053,10 @@ DoNNET: # @DoNNET .LBB14_95: # %.loopexit.preheader move $fp, $zero movgr2fr.d $fs0, $zero - pcalau12i $a0, %pc_hi20(.LCPI14_2) - fld.d $fs1, $a0, %pc_lo12(.LCPI14_2) lu52i.d $s1, $zero, 1107 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs1, $a0 lu12i.w $s2, 275200 .p2align 4, , 16 .LBB14_96: # %.loopexit @@ -7151,18 +7110,7 @@ DoNNET: # @DoNNET .Lfunc_end14: .size DoNNET, .Lfunc_end14-DoNNET # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DoNNetIteration -.LCPI15_0: - .dword 0x40f86a0000000000 # double 1.0E+5 -.LCPI15_1: - .dword 0x40c3880000000000 # double 1.0E+4 -.LCPI15_2: - .dword 0x3fb70a3d70a3d70a # double 0.089999999999999996 -.LCPI15_3: - .dword 0x3fb999999999999a # double 0.10000000000000001 - .text - .p2align 5 + .p2align 5 # -- Begin function DoNNetIteration .type DoNNetIteration,@function DoNNetIteration: # @DoNNetIteration # %bb.0: @@ -7186,37 +7134,45 @@ DoNNetIteration: # @DoNNetIteration fst.d $fs5, $sp, 920 # 8-byte Folded Spill fst.d $fs6, $sp, 912 # 8-byte Folded Spill fst.d $fs7, $sp, 904 # 8-byte Folded Spill - move $s1, $a0 + move $s0, $a0 pcaddu18i $ra, %call36(StartStopwatch) jirl $ra, $ra, 0 - st.d $a0, $sp, 16 # 8-byte Folded Spill - beqz $s1, .LBB15_95 + st.d $a0, $sp, 8 # 8-byte Folded Spill + beqz $s0, .LBB15_95 # %bb.1: lu12i.w $a0, 24 - pcalau12i $a1, %pc_hi20(.LCPI15_0) - fld.d $fs0, $a1, %pc_lo12(.LCPI15_0) - ori $s2, $a0, 1696 - pcalau12i $a0, %pc_hi20(mid_wts) - addi.d $s6, $a0, %pc_lo12(mid_wts) - ori $s7, $zero, 280 - pcalau12i $a0, %pc_hi20(out_wts) - addi.d $s8, $a0, %pc_lo12(out_wts) - pcalau12i $a0, %pc_hi20(.LCPI15_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI15_1) + ori $s1, $a0, 1696 + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -497152 + lu52i.d $a1, $a1, 1039 + movgr2fr.d $fs0, $a1 + pcalau12i $a1, %pc_hi20(mid_wts) + addi.d $s6, $a1, %pc_lo12(mid_wts) + ori $s2, $zero, 280 + pcalau12i $a1, %pc_hi20(out_wts) + addi.d $s8, $a1, %pc_lo12(out_wts) + lu32i.d $a0, 231424 + lu52i.d $a0, $a0, 1036 + movgr2fr.d $fs1, $a0 lu12i.w $a0, -1 ori $a0, $a0, 1856 st.d $a0, $sp, 72 # 8-byte Folded Spill vrepli.b $vr5, 0 - ori $fp, $zero, 2496 lu12i.w $a0, 461373 ori $a0, $a0, 1802 lu32i.d $a0, 461373 - lu52i.d $a1, $a0, 1019 + lu52i.d $s7, $a0, 1019 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + st.d $a0, $sp, 32 # 8-byte Folded Spill + st.d $s1, $sp, 16 # 8-byte Folded Spill fst.d $fs0, $sp, 88 # 8-byte Folded Spill - st.d $s2, $sp, 24 # 8-byte Folded Spill fst.d $fs1, $sp, 80 # 8-byte Folded Spill vst $vr5, $sp, 96 # 16-byte Folded Spill - st.d $a1, $sp, 64 # 8-byte Folded Spill + st.d $s7, $sp, 64 # 8-byte Folded Spill b .LBB15_5 .p2align 4, , 16 .LBB15_2: # %._crit_edge.thread @@ -7235,12 +7191,13 @@ DoNNetIteration: # @DoNNetIteration ori $a0, $zero, 1 .LBB15_4: # %.loopexit # in Loop: Header=BB15_5 Depth=1 - ld.d $s1, $sp, 32 # 8-byte Folded Reload - ld.d $s2, $sp, 24 # 8-byte Folded Reload - addi.d $s1, $s1, -1 - ld.d $a2, $sp, 56 # 8-byte Folded Reload - st.w $a0, $a2, %pc_lo12(learned) - beqz $s1, .LBB15_95 + ld.d $s0, $sp, 24 # 8-byte Folded Reload + ld.d $s1, $sp, 16 # 8-byte Folded Reload + ori $s2, $zero, 280 + addi.d $s0, $s0, -1 + ld.d $a1, $sp, 56 # 8-byte Folded Reload + st.w $a0, $a1, %pc_lo12(learned) + beqz $s0, .LBB15_95 .LBB15_5: # %.preheader22 # =>This Loop Header: Depth=1 # Child Loop BB15_6 Depth 2 @@ -7261,11 +7218,11 @@ DoNNetIteration: # @DoNNetIteration # Child Loop BB15_83 Depth 3 # Child Loop BB15_90 Depth 2 # Child Loop BB15_93 Depth 2 - move $s0, $zero + move $fp, $zero .p2align 4, , 16 .LBB15_6: # Parent Loop BB15_5 Depth=1 # => This Inner Loop Header: Depth=2 - move $a0, $s2 + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 vldi $vr1, -800 @@ -7276,17 +7233,17 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - fstx.d $fa0, $s6, $s0 - addi.d $s0, $s0, 8 - bne $s0, $s7, .LBB15_6 + fstx.d $fa0, $s6, $fp + addi.d $fp, $fp, 8 + bne $fp, $s2, .LBB15_6 # %bb.7: # %.preheader18.1.i.preheader # in Loop: Header=BB15_5 Depth=1 - move $s0, $zero + move $fp, $zero .p2align 4, , 16 .LBB15_8: # %.preheader18.1.i # Parent Loop BB15_5 Depth=1 # => This Inner Loop Header: Depth=2 - move $a0, $s2 + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 vldi $vr1, -800 @@ -7297,18 +7254,18 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - add.d $a0, $s6, $s0 - addi.d $s0, $s0, 8 + add.d $a0, $s6, $fp + addi.d $fp, $fp, 8 fst.d $fa0, $a0, 280 - bne $s0, $s7, .LBB15_8 + bne $fp, $s2, .LBB15_8 # %bb.9: # %.preheader18.2.i.preheader # in Loop: Header=BB15_5 Depth=1 - move $s0, $zero + move $fp, $zero .p2align 4, , 16 .LBB15_10: # %.preheader18.2.i # Parent Loop BB15_5 Depth=1 # => This Inner Loop Header: Depth=2 - move $a0, $s2 + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 vldi $vr1, -800 @@ -7319,18 +7276,18 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - add.d $a0, $s6, $s0 - addi.d $s0, $s0, 8 + add.d $a0, $s6, $fp + addi.d $fp, $fp, 8 fst.d $fa0, $a0, 560 - bne $s0, $s7, .LBB15_10 + bne $fp, $s2, .LBB15_10 # %bb.11: # %.preheader18.3.i.preheader # in Loop: Header=BB15_5 Depth=1 - move $s0, $zero + move $fp, $zero .p2align 4, , 16 .LBB15_12: # %.preheader18.3.i # Parent Loop BB15_5 Depth=1 # => This Inner Loop Header: Depth=2 - move $a0, $s2 + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 vldi $vr1, -800 @@ -7341,18 +7298,18 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - add.d $a0, $s6, $s0 - addi.d $s0, $s0, 8 + add.d $a0, $s6, $fp + addi.d $fp, $fp, 8 fst.d $fa0, $a0, 840 - bne $s0, $s7, .LBB15_12 + bne $fp, $s2, .LBB15_12 # %bb.13: # %.preheader18.4.i.preheader # in Loop: Header=BB15_5 Depth=1 - move $s0, $zero + move $fp, $zero .p2align 4, , 16 .LBB15_14: # %.preheader18.4.i # Parent Loop BB15_5 Depth=1 # => This Inner Loop Header: Depth=2 - move $a0, $s2 + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 vldi $vr1, -800 @@ -7363,18 +7320,18 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - add.d $a0, $s6, $s0 - addi.d $s0, $s0, 8 + add.d $a0, $s6, $fp + addi.d $fp, $fp, 8 fst.d $fa0, $a0, 1120 - bne $s0, $s7, .LBB15_14 + bne $fp, $s2, .LBB15_14 # %bb.15: # %.preheader18.5.i.preheader # in Loop: Header=BB15_5 Depth=1 - move $s0, $zero + move $fp, $zero .p2align 4, , 16 .LBB15_16: # %.preheader18.5.i # Parent Loop BB15_5 Depth=1 # => This Inner Loop Header: Depth=2 - move $a0, $s2 + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 vldi $vr1, -800 @@ -7385,18 +7342,18 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - add.d $a0, $s6, $s0 - addi.d $s0, $s0, 8 + add.d $a0, $s6, $fp + addi.d $fp, $fp, 8 fst.d $fa0, $a0, 1400 - bne $s0, $s7, .LBB15_16 + bne $fp, $s2, .LBB15_16 # %bb.17: # %.preheader18.6.i.preheader # in Loop: Header=BB15_5 Depth=1 - move $s0, $zero + move $fp, $zero .p2align 4, , 16 .LBB15_18: # %.preheader18.6.i # Parent Loop BB15_5 Depth=1 # => This Inner Loop Header: Depth=2 - move $a0, $s2 + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 vldi $vr1, -800 @@ -7407,18 +7364,18 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - add.d $a0, $s6, $s0 - addi.d $s0, $s0, 8 + add.d $a0, $s6, $fp + addi.d $fp, $fp, 8 fst.d $fa0, $a0, 1680 - bne $s0, $s7, .LBB15_18 + bne $fp, $s2, .LBB15_18 # %bb.19: # %.preheader18.7.i.preheader # in Loop: Header=BB15_5 Depth=1 - move $s0, $zero + move $fp, $zero .p2align 4, , 16 .LBB15_20: # %.preheader18.7.i # Parent Loop BB15_5 Depth=1 # => This Inner Loop Header: Depth=2 - move $a0, $s2 + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 vldi $vr1, -800 @@ -7429,21 +7386,21 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - add.d $a0, $s6, $s0 - addi.d $s0, $s0, 8 + add.d $a0, $s6, $fp + addi.d $fp, $fp, 8 fst.d $fa0, $a0, 1960 - bne $s0, $s7, .LBB15_20 + bne $fp, $s2, .LBB15_20 # %bb.21: # %.preheader.i.preheader # in Loop: Header=BB15_5 Depth=1 - st.d $s1, $sp, 32 # 8-byte Folded Spill - move $s0, $zero - ori $s3, $zero, 512 + st.d $s0, $sp, 24 # 8-byte Folded Spill + move $fp, $zero + ori $s2, $zero, 512 .p2align 4, , 16 .LBB15_22: # %.preheader.i # Parent Loop BB15_5 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $s1, $s8, $s0 - move $a0, $s2 + add.d $s0, $s8, $fp + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 bstrpick.d $a0, $a0, 31, 0 @@ -7454,8 +7411,8 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - fstx.d $fa0, $s8, $s0 - move $a0, $s2 + fstx.d $fa0, $s8, $fp + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 bstrpick.d $a0, $a0, 31, 0 @@ -7466,8 +7423,8 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - fst.d $fa0, $s1, 8 - move $a0, $s2 + fst.d $fa0, $s0, 8 + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 bstrpick.d $a0, $a0, 31, 0 @@ -7478,8 +7435,8 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - fst.d $fa0, $s1, 16 - move $a0, $s2 + fst.d $fa0, $s0, 16 + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 bstrpick.d $a0, $a0, 31, 0 @@ -7490,8 +7447,8 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - fst.d $fa0, $s1, 24 - move $a0, $s2 + fst.d $fa0, $s0, 24 + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 bstrpick.d $a0, $a0, 31, 0 @@ -7502,8 +7459,8 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - fst.d $fa0, $s1, 32 - move $a0, $s2 + fst.d $fa0, $s0, 32 + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 bstrpick.d $a0, $a0, 31, 0 @@ -7514,8 +7471,8 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - fst.d $fa0, $s1, 40 - move $a0, $s2 + fst.d $fa0, $s0, 40 + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 bstrpick.d $a0, $a0, 31, 0 @@ -7526,8 +7483,8 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - fst.d $fa0, $s1, 48 - move $a0, $s2 + fst.d $fa0, $s0, 48 + move $a0, $s1 pcaddu18i $ra, %call36(abs_randwc) jirl $ra, $ra, 0 vldi $vr1, -800 @@ -7538,9 +7495,9 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa0, $fa0, $fa1 vldi $vr1, -928 fmul.d $fa0, $fa0, $fa1 - addi.d $s0, $s0, 64 - fst.d $fa0, $s1, 56 - bne $s0, $s3, .LBB15_22 + addi.d $fp, $fp, 64 + fst.d $fa0, $s0, 56 + bne $fp, $s2, .LBB15_22 # %bb.23: # %randomize_wts.exit # in Loop: Header=BB15_5 Depth=1 pcalau12i $a0, %pc_hi20(mid_wt_change) @@ -7591,21 +7548,23 @@ DoNNetIteration: # @DoNNetIteration # %bb.24: # %.lr.ph.preheader # in Loop: Header=BB15_5 Depth=1 st.d $a3, $sp, 40 # 8-byte Folded Spill - move $ra, $zero - ori $t0, $zero, 2240 + st.d $zero, $sp, 560 # 8-byte Folded Spill + ori $a5, $zero, 2240 vld $vr5, $sp, 96 # 16-byte Folded Reload - ori $t1, $zero, 2256 - ori $t2, $zero, 2272 - ori $t3, $zero, 2288 - ori $t4, $zero, 2304 - ori $t5, $zero, 2320 - ori $t6, $zero, 2336 - ori $t7, $zero, 2352 - ori $t8, $zero, 2368 - ori $s0, $zero, 2384 - ori $s1, $zero, 2400 - ori $a6, $zero, 2464 - ori $a7, $zero, 2480 + ori $a7, $zero, 2256 + ori $t0, $zero, 2272 + ori $t1, $zero, 2288 + ori $t2, $zero, 2304 + ori $t3, $zero, 2320 + ori $t4, $zero, 2336 + ori $t5, $zero, 2352 + ori $t6, $zero, 2368 + ori $t7, $zero, 2384 + ori $t8, $zero, 2400 + ori $fp, $zero, 2416 + ori $s0, $zero, 2432 + ori $s1, $zero, 2448 + ori $ra, $zero, 2512 .p2align 4, , 16 .LBB15_25: # %.lr.ph # Parent Loop BB15_5 Depth=1 @@ -7619,70 +7578,69 @@ DoNNetIteration: # @DoNNetIteration ld.d $a0, $sp, 136 # 8-byte Folded Reload st.d $zero, $a0, %pc_lo12(worst_error) ld.d $a0, $sp, 72 # 8-byte Folded Reload - ori $a3, $zero, 2416 - ori $a4, $zero, 2432 - ori $a5, $zero, 2448 - ori $s7, $zero, 2512 + ori $a3, $zero, 2464 + ori $a4, $zero, 2480 + ori $a6, $zero, 2496 .p2align 4, , 16 .LBB15_26: # %.preheader23.i # Parent Loop BB15_5 Depth=1 # Parent Loop BB15_25 Depth=2 # => This Inner Loop Header: Depth=3 add.d $a1, $s3, $a0 - vldx $vr0, $a1, $t0 + vldx $vr0, $a1, $a5 add.d $a2, $s2, $a0 - vldx $vr1, $a1, $t1 - vldx $vr2, $a1, $t2 - vstx $vr0, $a2, $t0 + vldx $vr1, $a1, $a7 + vldx $vr2, $a1, $t0 + vstx $vr0, $a2, $a5 + vldx $vr0, $a1, $t1 + vstx $vr1, $a2, $a7 + vstx $vr2, $a2, $t0 + vldx $vr1, $a1, $t2 + vstx $vr0, $a2, $t1 vldx $vr0, $a1, $t3 - vstx $vr1, $a2, $t1 - vstx $vr2, $a2, $t2 - vldx $vr1, $a1, $t4 + vldx $vr2, $a1, $t4 + vstx $vr1, $a2, $t2 + vldx $vr1, $a1, $t5 vstx $vr0, $a2, $t3 - vldx $vr0, $a1, $t5 - vldx $vr2, $a1, $t6 - vstx $vr1, $a2, $t4 + vstx $vr2, $a2, $t4 + vldx $vr0, $a1, $t6 + vstx $vr1, $a2, $t5 vldx $vr1, $a1, $t7 - vstx $vr0, $a2, $t5 - vstx $vr2, $a2, $t6 - vldx $vr0, $a1, $t8 + vldx $vr2, $a1, $t8 + vstx $vr0, $a2, $t6 + vldx $vr0, $a1, $fp vstx $vr1, $a2, $t7 + vstx $vr2, $a2, $t8 vldx $vr1, $a1, $s0 - vldx $vr2, $a1, $s1 - vstx $vr0, $a2, $t8 - vldx $vr0, $a1, $a3 + vstx $vr0, $a2, $fp + vstx $vr5, $a1, $a5 + vstx $vr5, $a1, $a7 vstx $vr1, $a2, $s0 - vstx $vr2, $a2, $s1 - vldx $vr1, $a1, $a4 - vstx $vr0, $a2, $a3 + vldx $vr0, $a1, $s1 vstx $vr5, $a1, $t0 vstx $vr5, $a1, $t1 - vstx $vr1, $a2, $a4 - vldx $vr0, $a1, $a5 vstx $vr5, $a1, $t2 + vstx $vr0, $a2, $s1 + vldx $vr0, $a1, $a3 vstx $vr5, $a1, $t3 vstx $vr5, $a1, $t4 - vstx $vr0, $a2, $a5 - vldx $vr0, $a1, $a6 vstx $vr5, $a1, $t5 + vstx $vr0, $a2, $a3 + vldx $vr0, $a1, $a4 vstx $vr5, $a1, $t6 vstx $vr5, $a1, $t7 - vstx $vr0, $a2, $a6 - vldx $vr0, $a1, $a7 vstx $vr5, $a1, $t8 + vstx $vr0, $a2, $a4 + vldx $vr0, $a1, $a6 + vstx $vr5, $a1, $fp vstx $vr5, $a1, $s0 vstx $vr5, $a1, $s1 - vstx $vr0, $a2, $a7 - vldx $vr0, $a1, $fp + vstx $vr0, $a2, $a6 + fldx.d $fa0, $a1, $ra vstx $vr5, $a1, $a3 vstx $vr5, $a1, $a4 - vstx $vr5, $a1, $a5 - vstx $vr0, $a2, $fp - fldx.d $fa0, $a1, $s7 vstx $vr5, $a1, $a6 - vstx $vr5, $a1, $a7 - vstx $vr5, $a1, $fp - fstx.d $fa0, $a2, $s7 + fstx.d $fa0, $a2, $ra addi.d $a0, $a0, 280 stptr.d $zero, $a1, 2512 bnez $a0, .LBB15_26 @@ -7784,86 +7742,86 @@ DoNNetIteration: # @DoNNetIteration vst $vr5, $s5, 480 vst $vr0, $s4, 496 vst $vr5, $s5, 496 - st.d $ra, $sp, 560 # 8-byte Folded Spill ori $a0, $zero, 280 - mul.d $a0, $ra, $a0 + ld.d $a1, $sp, 560 # 8-byte Folded Reload + mul.d $a0, $a1, $a0 pcalau12i $a1, %pc_hi20(in_pats) addi.d $a1, $a1, %pc_lo12(in_pats) - add.d $s7, $a1, $a0 - fld.d $fa0, $s7, 136 + add.d $s0, $a1, $a0 + fld.d $fa0, $s0, 136 fst.d $fa0, $sp, 880 # 8-byte Folded Spill - fld.d $fa0, $s7, 144 + fld.d $fa0, $s0, 144 fst.d $fa0, $sp, 864 # 8-byte Folded Spill - fld.d $fa0, $s7, 152 + fld.d $fa0, $s0, 152 fst.d $fa0, $sp, 848 # 8-byte Folded Spill fldx.d $fa0, $a1, $a0 fst.d $fa0, $sp, 832 # 8-byte Folded Spill - fld.d $fa0, $s7, 8 + fld.d $fa0, $s0, 8 fst.d $fa0, $sp, 816 # 8-byte Folded Spill - fld.d $fa0, $s7, 16 + fld.d $fa0, $s0, 16 fst.d $fa0, $sp, 800 # 8-byte Folded Spill - fld.d $fa0, $s7, 24 + fld.d $fa0, $s0, 24 fst.d $fa0, $sp, 784 # 8-byte Folded Spill - fld.d $fa0, $s7, 32 + fld.d $fa0, $s0, 32 fst.d $fa0, $sp, 768 # 8-byte Folded Spill - fld.d $fa0, $s7, 40 + fld.d $fa0, $s0, 40 fst.d $fa0, $sp, 752 # 8-byte Folded Spill - fld.d $fa0, $s7, 48 + fld.d $fa0, $s0, 48 fst.d $fa0, $sp, 736 # 8-byte Folded Spill - fld.d $fa0, $s7, 56 + fld.d $fa0, $s0, 56 fst.d $fa0, $sp, 720 # 8-byte Folded Spill - fld.d $fa0, $s7, 64 + fld.d $fa0, $s0, 64 fst.d $fa0, $sp, 704 # 8-byte Folded Spill - fld.d $fa0, $s7, 72 + fld.d $fa0, $s0, 72 fst.d $fa0, $sp, 688 # 8-byte Folded Spill - fld.d $fa0, $s7, 80 + fld.d $fa0, $s0, 80 fst.d $fa0, $sp, 680 # 8-byte Folded Spill - fld.d $fa0, $s7, 88 + fld.d $fa0, $s0, 88 fst.d $fa0, $sp, 672 # 8-byte Folded Spill - fld.d $fa0, $s7, 96 + fld.d $fa0, $s0, 96 fst.d $fa0, $sp, 664 # 8-byte Folded Spill - fld.d $fa0, $s7, 104 + fld.d $fa0, $s0, 104 fst.d $fa0, $sp, 656 # 8-byte Folded Spill - fld.d $fa0, $s7, 112 + fld.d $fa0, $s0, 112 fst.d $fa0, $sp, 648 # 8-byte Folded Spill - fld.d $fa0, $s7, 120 + fld.d $fa0, $s0, 120 fst.d $fa0, $sp, 640 # 8-byte Folded Spill - fld.d $fa0, $s7, 128 + fld.d $fa0, $s0, 128 fst.d $fa0, $sp, 632 # 8-byte Folded Spill - fld.d $fa0, $s7, 160 + fld.d $fa0, $s0, 160 fst.d $fa0, $sp, 624 # 8-byte Folded Spill - fld.d $fa0, $s7, 168 + fld.d $fa0, $s0, 168 fst.d $fa0, $sp, 616 # 8-byte Folded Spill - fld.d $fa0, $s7, 176 + fld.d $fa0, $s0, 176 fst.d $fa0, $sp, 608 # 8-byte Folded Spill - fld.d $fa0, $s7, 184 + fld.d $fa0, $s0, 184 fst.d $fa0, $sp, 600 # 8-byte Folded Spill - fld.d $fa0, $s7, 192 + fld.d $fa0, $s0, 192 fst.d $fa0, $sp, 592 # 8-byte Folded Spill - fld.d $fa0, $s7, 200 + fld.d $fa0, $s0, 200 fst.d $fa0, $sp, 584 # 8-byte Folded Spill - fld.d $fa0, $s7, 208 + fld.d $fa0, $s0, 208 fst.d $fa0, $sp, 576 # 8-byte Folded Spill - fld.d $fa0, $s7, 216 + fld.d $fa0, $s0, 216 fst.d $fa0, $sp, 568 # 8-byte Folded Spill - fld.d $fs0, $s7, 224 - fld.d $fs1, $s7, 232 - fld.d $fs2, $s7, 240 - fld.d $fs3, $s7, 248 - fld.d $fs4, $s7, 256 - fld.d $fs5, $s7, 264 - fld.d $fs6, $s7, 272 - move $s0, $zero - addi.d $s1, $s6, 136 - ori $fp, $zero, 64 + fld.d $fs0, $s0, 224 + fld.d $fs1, $s0, 232 + fld.d $fs2, $s0, 240 + fld.d $fs3, $s0, 248 + fld.d $fs4, $s0, 256 + fld.d $fs5, $s0, 264 + fld.d $fs6, $s0, 272 + move $s1, $zero + addi.d $fp, $s6, 136 + ori $s7, $zero, 64 .p2align 4, , 16 .LBB15_28: # %.preheader.i.i # Parent Loop BB15_5 Depth=1 # Parent Loop BB15_25 Depth=2 # => This Inner Loop Header: Depth=3 - fld.d $fa0, $s1, -136 - fld.d $fa1, $s1, -128 - fld.d $fa2, $s1, -120 + fld.d $fa0, $fp, -136 + fld.d $fa1, $fp, -128 + fld.d $fa2, $fp, -120 movgr2fr.d $fs7, $zero fld.d $fa3, $sp, 832 # 8-byte Folded Reload fmadd.d $fa0, $fa0, $fa3, $fs7 @@ -7871,10 +7829,10 @@ DoNNetIteration: # @DoNNetIteration fmadd.d $fa0, $fa1, $fa3, $fa0 fld.d $fa1, $sp, 800 # 8-byte Folded Reload fmadd.d $fa0, $fa2, $fa1, $fa0 - fld.d $fa1, $s1, -112 - fld.d $fa2, $s1, -104 - fld.d $fa3, $s1, -96 - fld.d $fa4, $s1, -88 + fld.d $fa1, $fp, -112 + fld.d $fa2, $fp, -104 + fld.d $fa3, $fp, -96 + fld.d $fa4, $fp, -88 fld.d $fa5, $sp, 784 # 8-byte Folded Reload fmadd.d $fa0, $fa1, $fa5, $fa0 fld.d $fa1, $sp, 768 # 8-byte Folded Reload @@ -7883,10 +7841,10 @@ DoNNetIteration: # @DoNNetIteration fmadd.d $fa0, $fa3, $fa1, $fa0 fld.d $fa1, $sp, 736 # 8-byte Folded Reload fmadd.d $fa0, $fa4, $fa1, $fa0 - fld.d $fa1, $s1, -80 - fld.d $fa2, $s1, -72 - fld.d $fa3, $s1, -64 - fld.d $fa4, $s1, -56 + fld.d $fa1, $fp, -80 + fld.d $fa2, $fp, -72 + fld.d $fa3, $fp, -64 + fld.d $fa4, $fp, -56 fld.d $fa5, $sp, 720 # 8-byte Folded Reload fmadd.d $fa0, $fa1, $fa5, $fa0 fld.d $fa1, $sp, 704 # 8-byte Folded Reload @@ -7895,10 +7853,10 @@ DoNNetIteration: # @DoNNetIteration fmadd.d $fa0, $fa3, $fa1, $fa0 fld.d $fa1, $sp, 680 # 8-byte Folded Reload fmadd.d $fa0, $fa4, $fa1, $fa0 - fld.d $fa1, $s1, -48 - fld.d $fa2, $s1, -40 - fld.d $fa3, $s1, -32 - fld.d $fa4, $s1, -24 + fld.d $fa1, $fp, -48 + fld.d $fa2, $fp, -40 + fld.d $fa3, $fp, -32 + fld.d $fa4, $fp, -24 fld.d $fa5, $sp, 672 # 8-byte Folded Reload fmadd.d $fa0, $fa1, $fa5, $fa0 fld.d $fa1, $sp, 664 # 8-byte Folded Reload @@ -7907,10 +7865,10 @@ DoNNetIteration: # @DoNNetIteration fmadd.d $fa0, $fa3, $fa1, $fa0 fld.d $fa1, $sp, 648 # 8-byte Folded Reload fmadd.d $fa0, $fa4, $fa1, $fa0 - fld.d $fa1, $s1, -16 - fld.d $fa2, $s1, -8 - fld.d $fa3, $s1, 0 - fld.d $fa4, $s1, 8 + fld.d $fa1, $fp, -16 + fld.d $fa2, $fp, -8 + fld.d $fa3, $fp, 0 + fld.d $fa4, $fp, 8 fld.d $fa5, $sp, 640 # 8-byte Folded Reload fmadd.d $fa0, $fa1, $fa5, $fa0 fld.d $fa1, $sp, 632 # 8-byte Folded Reload @@ -7919,10 +7877,10 @@ DoNNetIteration: # @DoNNetIteration fmadd.d $fa0, $fa3, $fa1, $fa0 fld.d $fa1, $sp, 864 # 8-byte Folded Reload fmadd.d $fa0, $fa4, $fa1, $fa0 - fld.d $fa1, $s1, 16 - fld.d $fa2, $s1, 24 - fld.d $fa3, $s1, 32 - fld.d $fa4, $s1, 40 + fld.d $fa1, $fp, 16 + fld.d $fa2, $fp, 24 + fld.d $fa3, $fp, 32 + fld.d $fa4, $fp, 40 fld.d $fa5, $sp, 848 # 8-byte Folded Reload fmadd.d $fa0, $fa1, $fa5, $fa0 fld.d $fa1, $sp, 624 # 8-byte Folded Reload @@ -7931,10 +7889,10 @@ DoNNetIteration: # @DoNNetIteration fmadd.d $fa0, $fa3, $fa1, $fa0 fld.d $fa1, $sp, 608 # 8-byte Folded Reload fmadd.d $fa0, $fa4, $fa1, $fa0 - fld.d $fa1, $s1, 48 - fld.d $fa2, $s1, 56 - fld.d $fa3, $s1, 64 - fld.d $fa4, $s1, 72 + fld.d $fa1, $fp, 48 + fld.d $fa2, $fp, 56 + fld.d $fa3, $fp, 64 + fld.d $fa4, $fp, 72 fld.d $fa5, $sp, 600 # 8-byte Folded Reload fmadd.d $fa0, $fa1, $fa5, $fa0 fld.d $fa1, $sp, 592 # 8-byte Folded Reload @@ -7943,19 +7901,19 @@ DoNNetIteration: # @DoNNetIteration fmadd.d $fa0, $fa3, $fa1, $fa0 fld.d $fa1, $sp, 576 # 8-byte Folded Reload fmadd.d $fa0, $fa4, $fa1, $fa0 - fld.d $fa1, $s1, 80 - fld.d $fa2, $s1, 88 - fld.d $fa3, $s1, 96 - fld.d $fa4, $s1, 104 + fld.d $fa1, $fp, 80 + fld.d $fa2, $fp, 88 + fld.d $fa3, $fp, 96 + fld.d $fa4, $fp, 104 fld.d $fa5, $sp, 568 # 8-byte Folded Reload fmadd.d $fa0, $fa1, $fa5, $fa0 fmadd.d $fa0, $fa2, $fs0, $fa0 fmadd.d $fa0, $fa3, $fs1, $fa0 fmadd.d $fa0, $fa4, $fs2, $fa0 - fld.d $fa1, $s1, 112 - fld.d $fa2, $s1, 120 - fld.d $fa3, $s1, 128 - fld.d $fa4, $s1, 136 + fld.d $fa1, $fp, 112 + fld.d $fa2, $fp, 120 + fld.d $fa3, $fp, 128 + fld.d $fa4, $fp, 136 fmadd.d $fa0, $fa1, $fs3, $fa0 fmadd.d $fa0, $fa2, $fs4, $fa0 fmadd.d $fa0, $fa3, $fs5, $fa0 @@ -7967,10 +7925,10 @@ DoNNetIteration: # @DoNNetIteration frecip.d $fa0, $fa0 pcalau12i $a0, %pc_hi20(mid_out) addi.d $a0, $a0, %pc_lo12(mid_out) - fstx.d $fa0, $a0, $s0 - addi.d $s0, $s0, 8 - addi.d $s1, $s1, 280 - bne $s0, $fp, .LBB15_28 + fstx.d $fa0, $a0, $s1 + addi.d $s1, $s1, 8 + addi.d $fp, $fp, 280 + bne $s1, $s7, .LBB15_28 # %bb.29: # %do_forward_pass.exit # in Loop: Header=BB15_25 Depth=2 vld $vr1, $a0, 0 @@ -8069,8 +8027,8 @@ DoNNetIteration: # @DoNNetIteration vfrecip.d $vr0, $vr0 vst $vr0, $sp, 160 # 16-byte Folded Spill pcalau12i $a0, %pc_hi20(out_out) - addi.d $s0, $a0, %pc_lo12(out_out) - vst $vr0, $s0, 0 + addi.d $s1, $a0, %pc_lo12(out_out) + vst $vr0, $s1, 0 fld.d $fa0, $s8, 128 fld.d $fa1, $s8, 136 fld.d $fa2, $s8, 144 @@ -8147,7 +8105,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr0, $vr1, $vr0 vfrecip.d $vr0, $vr0 vst $vr0, $sp, 176 # 16-byte Folded Spill - vst $vr0, $s0, 16 + vst $vr0, $s1, 16 fld.d $fs6, $s8, 256 fld.d $fa1, $s8, 264 fld.d $fa2, $s8, 272 @@ -8223,7 +8181,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr0, $vr1, $vr0 vfrecip.d $vr0, $vr0 vst $vr0, $sp, 192 # 16-byte Folded Spill - vst $vr0, $s0, 32 + vst $vr0, $s1, 32 fld.d $fs3, $s8, 384 fld.d $fa1, $s8, 392 fld.d $fa2, $s8, 400 @@ -8297,9 +8255,9 @@ DoNNetIteration: # @DoNNetIteration vld $vr0, $sp, 752 # 16-byte Folded Reload vfadd.d $vr0, $vr1, $vr0 vfrecip.d $vr6, $vr0 - vst $vr6, $s0, 48 - ld.d $a3, $sp, 560 # 8-byte Folded Reload - slli.d $a0, $a3, 6 + vst $vr6, $s1, 48 + ld.d $a0, $sp, 560 # 8-byte Folded Reload + slli.d $a0, $a0, 6 pcalau12i $a1, %pc_hi20(out_pats) addi.d $a1, $a1, %pc_lo12(out_pats) fldx.d $fa0, $a1, $a0 @@ -8315,35 +8273,17 @@ DoNNetIteration: # @DoNNetIteration fsub.d $fa3, $fa0, $fa1 fneg.d $fa0, $fa1 ori $a7, $zero, 512 - ori $t0, $zero, 2240 - ori $t1, $zero, 2256 - ori $t2, $zero, 2272 - ori $t3, $zero, 2288 - ori $t4, $zero, 2304 - ori $t5, $zero, 2320 - ori $t6, $zero, 2336 - ori $t7, $zero, 2352 - ori $t8, $zero, 2368 - ori $s0, $zero, 2384 - ori $s1, $zero, 2400 - ori $fp, $zero, 2496 + ori $s1, $zero, 2448 + ori $ra, $zero, 2512 + ld.d $s7, $sp, 64 # 8-byte Folded Reload b .LBB15_35 .p2align 4, , 16 .LBB15_31: # in Loop: Header=BB15_25 Depth=2 fcmp.cule.d $fcc0, $fa1, $fs7 fadd.d $fa3, $fa1, $fs7 - ori $t0, $zero, 2240 - ori $t1, $zero, 2256 - ori $t2, $zero, 2272 - ori $t3, $zero, 2288 - ori $t4, $zero, 2304 - ori $t5, $zero, 2320 - ori $t6, $zero, 2336 - ori $t7, $zero, 2352 - ori $t8, $zero, 2368 - ori $s0, $zero, 2384 - ori $s1, $zero, 2400 - ori $fp, $zero, 2496 + ori $a7, $zero, 512 + ori $s1, $zero, 2448 + ld.d $s7, $sp, 64 # 8-byte Folded Reload bcnez $fcc0, .LBB15_33 # %bb.32: # in Loop: Header=BB15_25 Depth=2 fmov.d $fa0, $fa1 @@ -8352,7 +8292,7 @@ DoNNetIteration: # @DoNNetIteration .LBB15_33: # in Loop: Header=BB15_25 Depth=2 fmov.d $fa0, $fs7 .LBB15_34: # in Loop: Header=BB15_25 Depth=2 - ori $a7, $zero, 512 + ori $ra, $zero, 2512 .LBB15_35: # in Loop: Header=BB15_25 Depth=2 vld $vr17, $sp, 720 # 16-byte Folded Reload vld $vr18, $sp, 704 # 16-byte Folded Reload @@ -8532,7 +8472,8 @@ DoNNetIteration: # @DoNNetIteration # in Loop: Header=BB15_25 Depth=2 vldi $vr10, -960 fmul.d $ft2, $ft3, $ft2 - slli.d $a3, $a3, 3 + ld.d $a0, $sp, 560 # 8-byte Folded Reload + slli.d $a3, $a0, 3 pcalau12i $a0, %pc_hi20(avg_out_error) addi.d $a0, $a0, %pc_lo12(avg_out_error) fstx.d $ft2, $a0, $a3 @@ -8704,7 +8645,6 @@ DoNNetIteration: # @DoNNetIteration fst.d $fa5, $a3, 40 fst.d $fa4, $a3, 48 fst.d $fa3, $a3, 56 - ld.d $ra, $sp, 64 # 8-byte Folded Reload vld $vr16, $sp, 736 # 16-byte Folded Reload .p2align 4, , 16 .LBB15_71: # %.preheader.i.i10 @@ -8712,8 +8652,7 @@ DoNNetIteration: # @DoNNetIteration # Parent Loop BB15_25 Depth=2 # => This Inner Loop Header: Depth=3 fld.d $fa2, $a2, 0 - pcalau12i $a3, %pc_hi20(.LCPI15_2) - fld.d $fa1, $a3, %pc_lo12(.LCPI15_2) + movgr2fr.d $fa1, $s7 fmul.d $fa2, $fa2, $fa1 add.d $a3, $s4, $a4 add.d $a5, $s8, $a4 @@ -8722,7 +8661,7 @@ DoNNetIteration: # @DoNNetIteration vfmul.d $vr12, $vr16, $vr11 vldx $vr13, $s4, $a4 vldx $vr14, $s8, $a4 - vreplgr2vr.d $vr2, $ra + vreplgr2vr.d $vr2, $s7 vldx $vr15, $s5, $a4 vfmadd.d $vr12, $vr13, $vr2, $vr12 vfadd.d $vr13, $vr14, $vr12 @@ -8762,7 +8701,7 @@ DoNNetIteration: # @DoNNetIteration # %bb.72: # %adjust_out_wts.exit.i # in Loop: Header=BB15_25 Depth=2 fmul.d $ft2, $ft2, $fa1 - vld $vr12, $s7, 0 + vld $vr12, $s0, 0 vreplvei.d $vr11, $vr10, 0 vld $vr13, $s2, 0 vld $vr14, $s6, 0 @@ -8772,7 +8711,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 0 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 16 + vld $vr13, $s0, 16 vst $vr12, $s3, 0 vld $vr12, $s2, 16 vld $vr14, $s6, 16 @@ -8782,7 +8721,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 16 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 32 + vld $vr13, $s0, 32 vst $vr12, $s3, 16 vld $vr12, $s2, 32 vld $vr14, $s6, 32 @@ -8792,7 +8731,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 32 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 48 + vld $vr13, $s0, 48 vst $vr12, $s3, 32 vld $vr12, $s2, 48 vld $vr14, $s6, 48 @@ -8802,7 +8741,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 48 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 64 + vld $vr13, $s0, 64 vst $vr12, $s3, 48 vld $vr12, $s2, 64 vld $vr14, $s6, 64 @@ -8812,7 +8751,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 64 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 80 + vld $vr13, $s0, 80 vst $vr12, $s3, 64 vld $vr12, $s2, 80 vld $vr14, $s6, 80 @@ -8822,7 +8761,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 80 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 96 + vld $vr13, $s0, 96 vst $vr12, $s3, 80 vld $vr12, $s2, 96 vld $vr14, $s6, 96 @@ -8832,7 +8771,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 96 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 112 + vld $vr13, $s0, 112 vst $vr12, $s3, 96 vld $vr12, $s2, 112 vld $vr14, $s6, 112 @@ -8842,7 +8781,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 112 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 128 + vld $vr13, $s0, 128 vst $vr12, $s3, 112 vld $vr12, $s2, 128 vld $vr14, $s6, 128 @@ -8852,7 +8791,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 128 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 144 + vld $vr13, $s0, 144 vst $vr12, $s3, 128 vld $vr12, $s2, 144 vld $vr14, $s6, 144 @@ -8862,7 +8801,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 144 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 160 + vld $vr13, $s0, 160 vst $vr12, $s3, 144 vld $vr12, $s2, 160 vld $vr14, $s6, 160 @@ -8872,7 +8811,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 160 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 176 + vld $vr13, $s0, 176 vst $vr12, $s3, 160 vld $vr12, $s2, 176 vld $vr14, $s6, 176 @@ -8882,7 +8821,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 176 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 192 + vld $vr13, $s0, 192 vst $vr12, $s3, 176 vld $vr12, $s2, 192 vld $vr14, $s6, 192 @@ -8892,7 +8831,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 192 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 208 + vld $vr13, $s0, 208 vst $vr12, $s3, 192 vld $vr12, $s2, 208 vld $vr14, $s6, 208 @@ -8902,7 +8841,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 208 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 224 + vld $vr13, $s0, 224 vst $vr12, $s3, 208 vld $vr12, $s2, 224 vld $vr14, $s6, 224 @@ -8912,7 +8851,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 224 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 240 + vld $vr13, $s0, 240 vst $vr12, $s3, 224 vld $vr12, $s2, 240 vld $vr14, $s6, 240 @@ -8922,7 +8861,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr13, $vr14, $vr12 vst $vr13, $s6, 240 vfadd.d $vr12, $vr12, $vr15 - vld $vr13, $s7, 256 + vld $vr13, $s0, 256 vst $vr12, $s3, 240 vld $vr12, $s2, 256 vld $vr14, $s6, 256 @@ -8932,7 +8871,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr14, $vr11 vst $vr12, $s6, 256 vfadd.d $vr11, $vr11, $vr13 - fld.d $ft4, $s7, 272 + fld.d $ft4, $s0, 272 vst $vr11, $s3, 256 fld.d $ft3, $s2, 272 fld.d $ft5, $s6, 272 @@ -8944,7 +8883,7 @@ DoNNetIteration: # @DoNNetIteration fadd.d $ft2, $ft2, $ft4 fst.d $ft2, $s3, 272 fmul.d $ft1, $ft1, $fa1 - vld $vr11, $s7, 0 + vld $vr11, $s0, 0 vreplvei.d $vr10, $vr9, 0 vld $vr12, $s2, 280 vld $vr13, $s6, 280 @@ -8954,7 +8893,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 280 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 16 + vld $vr12, $s0, 16 vst $vr11, $s3, 280 vld $vr11, $s2, 296 vld $vr13, $s6, 296 @@ -8964,7 +8903,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 296 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 32 + vld $vr12, $s0, 32 vst $vr11, $s3, 296 vld $vr11, $s2, 312 vld $vr13, $s6, 312 @@ -8974,7 +8913,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 312 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 48 + vld $vr12, $s0, 48 vst $vr11, $s3, 312 vld $vr11, $s2, 328 vld $vr13, $s6, 328 @@ -8984,7 +8923,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 328 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 64 + vld $vr12, $s0, 64 vst $vr11, $s3, 328 vld $vr11, $s2, 344 vld $vr13, $s6, 344 @@ -8994,7 +8933,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 344 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 80 + vld $vr12, $s0, 80 vst $vr11, $s3, 344 vld $vr11, $s2, 360 vld $vr13, $s6, 360 @@ -9004,7 +8943,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 360 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 96 + vld $vr12, $s0, 96 vst $vr11, $s3, 360 vld $vr11, $s2, 376 vld $vr13, $s6, 376 @@ -9014,7 +8953,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 376 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 112 + vld $vr12, $s0, 112 vst $vr11, $s3, 376 vld $vr11, $s2, 392 vld $vr13, $s6, 392 @@ -9024,7 +8963,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 392 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 128 + vld $vr12, $s0, 128 vst $vr11, $s3, 392 vld $vr11, $s2, 408 vld $vr13, $s6, 408 @@ -9034,7 +8973,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 408 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 144 + vld $vr12, $s0, 144 vst $vr11, $s3, 408 vld $vr11, $s2, 424 vld $vr13, $s6, 424 @@ -9044,7 +8983,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 424 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 160 + vld $vr12, $s0, 160 vst $vr11, $s3, 424 vld $vr11, $s2, 440 vld $vr13, $s6, 440 @@ -9054,7 +8993,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 440 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 176 + vld $vr12, $s0, 176 vst $vr11, $s3, 440 vld $vr11, $s2, 456 vld $vr13, $s6, 456 @@ -9064,7 +9003,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 456 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 192 + vld $vr12, $s0, 192 vst $vr11, $s3, 456 vld $vr11, $s2, 472 vld $vr13, $s6, 472 @@ -9074,7 +9013,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 472 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 208 + vld $vr12, $s0, 208 vst $vr11, $s3, 472 vld $vr11, $s2, 488 vld $vr13, $s6, 488 @@ -9084,7 +9023,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 488 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 224 + vld $vr12, $s0, 224 vst $vr11, $s3, 488 vld $vr11, $s2, 504 vld $vr13, $s6, 504 @@ -9094,7 +9033,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 504 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 240 + vld $vr12, $s0, 240 vst $vr11, $s3, 504 vld $vr11, $s2, 520 vld $vr13, $s6, 520 @@ -9104,7 +9043,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr12, $vr13, $vr11 vst $vr12, $s6, 520 vfadd.d $vr11, $vr11, $vr14 - vld $vr12, $s7, 256 + vld $vr12, $s0, 256 vst $vr11, $s3, 520 vld $vr11, $s2, 536 vld $vr13, $s6, 536 @@ -9114,7 +9053,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr13, $vr10 vst $vr11, $s6, 536 vfadd.d $vr10, $vr10, $vr12 - fld.d $ft3, $s7, 272 + fld.d $ft3, $s0, 272 vst $vr10, $s3, 536 fld.d $ft2, $s2, 552 fld.d $ft4, $s6, 552 @@ -9126,7 +9065,7 @@ DoNNetIteration: # @DoNNetIteration fadd.d $ft1, $ft1, $ft3 fst.d $ft1, $s3, 552 fmul.d $ft0, $ft0, $fa1 - vld $vr10, $s7, 0 + vld $vr10, $s0, 0 vreplvei.d $vr9, $vr8, 0 vld $vr11, $s2, 560 vld $vr12, $s6, 560 @@ -9136,7 +9075,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 560 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 16 + vld $vr11, $s0, 16 vst $vr10, $s3, 560 vld $vr10, $s2, 576 vld $vr12, $s6, 576 @@ -9146,7 +9085,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 576 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 32 + vld $vr11, $s0, 32 vst $vr10, $s3, 576 vld $vr10, $s2, 592 vld $vr12, $s6, 592 @@ -9156,7 +9095,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 592 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 48 + vld $vr11, $s0, 48 vst $vr10, $s3, 592 vld $vr10, $s2, 608 vld $vr12, $s6, 608 @@ -9166,7 +9105,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 608 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 64 + vld $vr11, $s0, 64 vst $vr10, $s3, 608 vld $vr10, $s2, 624 vld $vr12, $s6, 624 @@ -9176,7 +9115,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 624 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 80 + vld $vr11, $s0, 80 vst $vr10, $s3, 624 vld $vr10, $s2, 640 vld $vr12, $s6, 640 @@ -9186,7 +9125,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 640 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 96 + vld $vr11, $s0, 96 vst $vr10, $s3, 640 vld $vr10, $s2, 656 vld $vr12, $s6, 656 @@ -9196,7 +9135,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 656 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 112 + vld $vr11, $s0, 112 vst $vr10, $s3, 656 vld $vr10, $s2, 672 vld $vr12, $s6, 672 @@ -9206,7 +9145,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 672 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 128 + vld $vr11, $s0, 128 vst $vr10, $s3, 672 vld $vr10, $s2, 688 vld $vr12, $s6, 688 @@ -9216,7 +9155,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 688 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 144 + vld $vr11, $s0, 144 vst $vr10, $s3, 688 vld $vr10, $s2, 704 vld $vr12, $s6, 704 @@ -9226,7 +9165,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 704 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 160 + vld $vr11, $s0, 160 vst $vr10, $s3, 704 vld $vr10, $s2, 720 vld $vr12, $s6, 720 @@ -9236,7 +9175,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 720 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 176 + vld $vr11, $s0, 176 vst $vr10, $s3, 720 vld $vr10, $s2, 736 vld $vr12, $s6, 736 @@ -9246,7 +9185,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 736 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 192 + vld $vr11, $s0, 192 vst $vr10, $s3, 736 vld $vr10, $s2, 752 vld $vr12, $s6, 752 @@ -9256,7 +9195,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 752 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 208 + vld $vr11, $s0, 208 vst $vr10, $s3, 752 vld $vr10, $s2, 768 vld $vr12, $s6, 768 @@ -9266,7 +9205,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 768 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 224 + vld $vr11, $s0, 224 vst $vr10, $s3, 768 vld $vr10, $s2, 784 vld $vr12, $s6, 784 @@ -9276,7 +9215,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 784 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 240 + vld $vr11, $s0, 240 vst $vr10, $s3, 784 vld $vr10, $s2, 800 vld $vr12, $s6, 800 @@ -9286,7 +9225,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr11, $vr12, $vr10 vst $vr11, $s6, 800 vfadd.d $vr10, $vr10, $vr13 - vld $vr11, $s7, 256 + vld $vr11, $s0, 256 vst $vr10, $s3, 800 vld $vr10, $s2, 816 vld $vr12, $s6, 816 @@ -9296,7 +9235,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr12, $vr9 vst $vr10, $s6, 816 vfadd.d $vr9, $vr9, $vr11 - fld.d $ft2, $s7, 272 + fld.d $ft2, $s0, 272 vst $vr9, $s3, 816 fld.d $ft1, $s2, 832 fld.d $ft3, $s6, 832 @@ -9308,7 +9247,7 @@ DoNNetIteration: # @DoNNetIteration fadd.d $ft0, $ft0, $ft2 fst.d $ft0, $s3, 832 fmul.d $fa7, $fa7, $fa1 - vld $vr9, $s7, 0 + vld $vr9, $s0, 0 vreplvei.d $vr8, $vr7, 0 vld $vr10, $s2, 840 vld $vr11, $s6, 840 @@ -9318,7 +9257,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 840 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 16 + vld $vr10, $s0, 16 vst $vr9, $s3, 840 vld $vr9, $s2, 856 vld $vr11, $s6, 856 @@ -9328,7 +9267,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 856 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 32 + vld $vr10, $s0, 32 vst $vr9, $s3, 856 vld $vr9, $s2, 872 vld $vr11, $s6, 872 @@ -9338,7 +9277,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 872 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 48 + vld $vr10, $s0, 48 vst $vr9, $s3, 872 vld $vr9, $s2, 888 vld $vr11, $s6, 888 @@ -9348,7 +9287,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 888 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 64 + vld $vr10, $s0, 64 vst $vr9, $s3, 888 vld $vr9, $s2, 904 vld $vr11, $s6, 904 @@ -9358,7 +9297,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 904 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 80 + vld $vr10, $s0, 80 vst $vr9, $s3, 904 vld $vr9, $s2, 920 vld $vr11, $s6, 920 @@ -9368,7 +9307,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 920 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 96 + vld $vr10, $s0, 96 vst $vr9, $s3, 920 vld $vr9, $s2, 936 vld $vr11, $s6, 936 @@ -9378,7 +9317,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 936 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 112 + vld $vr10, $s0, 112 vst $vr9, $s3, 936 vld $vr9, $s2, 952 vld $vr11, $s6, 952 @@ -9388,7 +9327,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 952 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 128 + vld $vr10, $s0, 128 vst $vr9, $s3, 952 vld $vr9, $s2, 968 vld $vr11, $s6, 968 @@ -9398,7 +9337,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 968 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 144 + vld $vr10, $s0, 144 vst $vr9, $s3, 968 vld $vr9, $s2, 984 vld $vr11, $s6, 984 @@ -9408,7 +9347,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 984 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 160 + vld $vr10, $s0, 160 vst $vr9, $s3, 984 vld $vr9, $s2, 1000 vld $vr11, $s6, 1000 @@ -9418,7 +9357,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 1000 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 176 + vld $vr10, $s0, 176 vst $vr9, $s3, 1000 vld $vr9, $s2, 1016 vld $vr11, $s6, 1016 @@ -9428,7 +9367,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 1016 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 192 + vld $vr10, $s0, 192 vst $vr9, $s3, 1016 vld $vr9, $s2, 1032 vld $vr11, $s6, 1032 @@ -9438,7 +9377,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 1032 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 208 + vld $vr10, $s0, 208 vst $vr9, $s3, 1032 vld $vr9, $s2, 1048 vld $vr11, $s6, 1048 @@ -9448,7 +9387,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 1048 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 224 + vld $vr10, $s0, 224 vst $vr9, $s3, 1048 vld $vr9, $s2, 1064 vld $vr11, $s6, 1064 @@ -9458,7 +9397,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 1064 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 240 + vld $vr10, $s0, 240 vst $vr9, $s3, 1064 vld $vr9, $s2, 1080 vld $vr11, $s6, 1080 @@ -9468,7 +9407,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr10, $vr11, $vr9 vst $vr10, $s6, 1080 vfadd.d $vr9, $vr9, $vr12 - vld $vr10, $s7, 256 + vld $vr10, $s0, 256 vst $vr9, $s3, 1080 vld $vr9, $s2, 1096 vld $vr11, $s6, 1096 @@ -9478,7 +9417,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr11, $vr8 vst $vr9, $s6, 1096 vfadd.d $vr8, $vr8, $vr10 - fld.d $ft1, $s7, 272 + fld.d $ft1, $s0, 272 vst $vr8, $s3, 1096 fld.d $ft0, $s2, 1112 fld.d $ft2, $s6, 1112 @@ -9490,7 +9429,7 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa7, $fa7, $ft1 fst.d $fa7, $s3, 1112 fmul.d $fa6, $fa6, $fa1 - vld $vr8, $s7, 0 + vld $vr8, $s0, 0 vreplvei.d $vr7, $vr6, 0 vld $vr9, $s2, 1120 vld $vr10, $s6, 1120 @@ -9500,7 +9439,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1120 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 16 + vld $vr9, $s0, 16 vst $vr8, $s3, 1120 vld $vr8, $s2, 1136 vld $vr10, $s6, 1136 @@ -9510,7 +9449,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1136 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 32 + vld $vr9, $s0, 32 vst $vr8, $s3, 1136 vld $vr8, $s2, 1152 vld $vr10, $s6, 1152 @@ -9520,7 +9459,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1152 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 48 + vld $vr9, $s0, 48 vst $vr8, $s3, 1152 vld $vr8, $s2, 1168 vld $vr10, $s6, 1168 @@ -9530,7 +9469,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1168 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 64 + vld $vr9, $s0, 64 vst $vr8, $s3, 1168 vld $vr8, $s2, 1184 vld $vr10, $s6, 1184 @@ -9540,7 +9479,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1184 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 80 + vld $vr9, $s0, 80 vst $vr8, $s3, 1184 vld $vr8, $s2, 1200 vld $vr10, $s6, 1200 @@ -9550,7 +9489,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1200 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 96 + vld $vr9, $s0, 96 vst $vr8, $s3, 1200 vld $vr8, $s2, 1216 vld $vr10, $s6, 1216 @@ -9560,7 +9499,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1216 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 112 + vld $vr9, $s0, 112 vst $vr8, $s3, 1216 vld $vr8, $s2, 1232 vld $vr10, $s6, 1232 @@ -9570,7 +9509,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1232 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 128 + vld $vr9, $s0, 128 vst $vr8, $s3, 1232 vld $vr8, $s2, 1248 vld $vr10, $s6, 1248 @@ -9580,7 +9519,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1248 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 144 + vld $vr9, $s0, 144 vst $vr8, $s3, 1248 vld $vr8, $s2, 1264 vld $vr10, $s6, 1264 @@ -9590,7 +9529,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1264 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 160 + vld $vr9, $s0, 160 vst $vr8, $s3, 1264 vld $vr8, $s2, 1280 vld $vr10, $s6, 1280 @@ -9600,7 +9539,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1280 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 176 + vld $vr9, $s0, 176 vst $vr8, $s3, 1280 vld $vr8, $s2, 1296 vld $vr10, $s6, 1296 @@ -9610,7 +9549,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1296 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 192 + vld $vr9, $s0, 192 vst $vr8, $s3, 1296 vld $vr8, $s2, 1312 vld $vr10, $s6, 1312 @@ -9620,7 +9559,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1312 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 208 + vld $vr9, $s0, 208 vst $vr8, $s3, 1312 vld $vr8, $s2, 1328 vld $vr10, $s6, 1328 @@ -9630,7 +9569,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1328 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 224 + vld $vr9, $s0, 224 vst $vr8, $s3, 1328 vld $vr8, $s2, 1344 vld $vr10, $s6, 1344 @@ -9640,7 +9579,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1344 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 240 + vld $vr9, $s0, 240 vst $vr8, $s3, 1344 vld $vr8, $s2, 1360 vld $vr10, $s6, 1360 @@ -9650,7 +9589,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr9, $vr10, $vr8 vst $vr9, $s6, 1360 vfadd.d $vr8, $vr8, $vr11 - vld $vr9, $s7, 256 + vld $vr9, $s0, 256 vst $vr8, $s3, 1360 vld $vr8, $s2, 1376 vld $vr10, $s6, 1376 @@ -9660,7 +9599,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr10, $vr7 vst $vr8, $s6, 1376 vfadd.d $vr7, $vr7, $vr9 - fld.d $ft0, $s7, 272 + fld.d $ft0, $s0, 272 vst $vr7, $s3, 1376 fld.d $fa7, $s2, 1392 fld.d $ft1, $s6, 1392 @@ -9672,7 +9611,7 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa6, $fa6, $ft0 fst.d $fa6, $s3, 1392 fmul.d $fa5, $fa5, $fa1 - vld $vr7, $s7, 0 + vld $vr7, $s0, 0 vreplvei.d $vr6, $vr5, 0 vld $vr8, $s2, 1400 vld $vr9, $s6, 1400 @@ -9682,7 +9621,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1400 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 16 + vld $vr8, $s0, 16 vst $vr7, $s3, 1400 vld $vr7, $s2, 1416 vld $vr9, $s6, 1416 @@ -9692,7 +9631,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1416 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 32 + vld $vr8, $s0, 32 vst $vr7, $s3, 1416 vld $vr7, $s2, 1432 vld $vr9, $s6, 1432 @@ -9702,7 +9641,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1432 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 48 + vld $vr8, $s0, 48 vst $vr7, $s3, 1432 vld $vr7, $s2, 1448 vld $vr9, $s6, 1448 @@ -9712,7 +9651,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1448 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 64 + vld $vr8, $s0, 64 vst $vr7, $s3, 1448 vld $vr7, $s2, 1464 vld $vr9, $s6, 1464 @@ -9722,7 +9661,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1464 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 80 + vld $vr8, $s0, 80 vst $vr7, $s3, 1464 vld $vr7, $s2, 1480 vld $vr9, $s6, 1480 @@ -9732,7 +9671,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1480 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 96 + vld $vr8, $s0, 96 vst $vr7, $s3, 1480 vld $vr7, $s2, 1496 vld $vr9, $s6, 1496 @@ -9742,7 +9681,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1496 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 112 + vld $vr8, $s0, 112 vst $vr7, $s3, 1496 vld $vr7, $s2, 1512 vld $vr9, $s6, 1512 @@ -9752,7 +9691,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1512 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 128 + vld $vr8, $s0, 128 vst $vr7, $s3, 1512 vld $vr7, $s2, 1528 vld $vr9, $s6, 1528 @@ -9762,7 +9701,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1528 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 144 + vld $vr8, $s0, 144 vst $vr7, $s3, 1528 vld $vr7, $s2, 1544 vld $vr9, $s6, 1544 @@ -9772,7 +9711,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1544 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 160 + vld $vr8, $s0, 160 vst $vr7, $s3, 1544 vld $vr7, $s2, 1560 vld $vr9, $s6, 1560 @@ -9782,7 +9721,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1560 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 176 + vld $vr8, $s0, 176 vst $vr7, $s3, 1560 vld $vr7, $s2, 1576 vld $vr9, $s6, 1576 @@ -9792,7 +9731,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1576 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 192 + vld $vr8, $s0, 192 vst $vr7, $s3, 1576 vld $vr7, $s2, 1592 vld $vr9, $s6, 1592 @@ -9802,7 +9741,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1592 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 208 + vld $vr8, $s0, 208 vst $vr7, $s3, 1592 vld $vr7, $s2, 1608 vld $vr9, $s6, 1608 @@ -9812,7 +9751,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1608 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 224 + vld $vr8, $s0, 224 vst $vr7, $s3, 1608 vld $vr7, $s2, 1624 vld $vr9, $s6, 1624 @@ -9822,7 +9761,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1624 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 240 + vld $vr8, $s0, 240 vst $vr7, $s3, 1624 vld $vr7, $s2, 1640 vld $vr9, $s6, 1640 @@ -9832,7 +9771,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr8, $vr9, $vr7 vst $vr8, $s6, 1640 vfadd.d $vr7, $vr7, $vr10 - vld $vr8, $s7, 256 + vld $vr8, $s0, 256 vst $vr7, $s3, 1640 vld $vr7, $s2, 1656 vld $vr9, $s6, 1656 @@ -9842,7 +9781,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr9, $vr6 vst $vr7, $s6, 1656 vfadd.d $vr6, $vr6, $vr8 - fld.d $fa7, $s7, 272 + fld.d $fa7, $s0, 272 vst $vr6, $s3, 1656 fld.d $fa6, $s2, 1672 fld.d $ft0, $s6, 1672 @@ -9854,7 +9793,7 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa5, $fa5, $fa7 fst.d $fa5, $s3, 1672 fmul.d $fa4, $fa4, $fa1 - vld $vr6, $s7, 0 + vld $vr6, $s0, 0 vreplvei.d $vr5, $vr4, 0 vld $vr7, $s2, 1680 vld $vr8, $s6, 1680 @@ -9864,7 +9803,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1680 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 16 + vld $vr7, $s0, 16 vst $vr6, $s3, 1680 vld $vr6, $s2, 1696 vld $vr8, $s6, 1696 @@ -9874,7 +9813,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1696 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 32 + vld $vr7, $s0, 32 vst $vr6, $s3, 1696 vld $vr6, $s2, 1712 vld $vr8, $s6, 1712 @@ -9884,7 +9823,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1712 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 48 + vld $vr7, $s0, 48 vst $vr6, $s3, 1712 vld $vr6, $s2, 1728 vld $vr8, $s6, 1728 @@ -9894,7 +9833,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1728 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 64 + vld $vr7, $s0, 64 vst $vr6, $s3, 1728 vld $vr6, $s2, 1744 vld $vr8, $s6, 1744 @@ -9904,7 +9843,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1744 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 80 + vld $vr7, $s0, 80 vst $vr6, $s3, 1744 vld $vr6, $s2, 1760 vld $vr8, $s6, 1760 @@ -9914,7 +9853,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1760 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 96 + vld $vr7, $s0, 96 vst $vr6, $s3, 1760 vld $vr6, $s2, 1776 vld $vr8, $s6, 1776 @@ -9924,7 +9863,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1776 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 112 + vld $vr7, $s0, 112 vst $vr6, $s3, 1776 vld $vr6, $s2, 1792 vld $vr8, $s6, 1792 @@ -9934,7 +9873,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1792 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 128 + vld $vr7, $s0, 128 vst $vr6, $s3, 1792 vld $vr6, $s2, 1808 vld $vr8, $s6, 1808 @@ -9944,7 +9883,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1808 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 144 + vld $vr7, $s0, 144 vst $vr6, $s3, 1808 vld $vr6, $s2, 1824 vld $vr8, $s6, 1824 @@ -9954,7 +9893,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1824 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 160 + vld $vr7, $s0, 160 vst $vr6, $s3, 1824 vld $vr6, $s2, 1840 vld $vr8, $s6, 1840 @@ -9964,7 +9903,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1840 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 176 + vld $vr7, $s0, 176 vst $vr6, $s3, 1840 vld $vr6, $s2, 1856 vld $vr8, $s6, 1856 @@ -9974,7 +9913,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1856 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 192 + vld $vr7, $s0, 192 vst $vr6, $s3, 1856 vld $vr6, $s2, 1872 vld $vr8, $s6, 1872 @@ -9984,7 +9923,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1872 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 208 + vld $vr7, $s0, 208 vst $vr6, $s3, 1872 vld $vr6, $s2, 1888 vld $vr8, $s6, 1888 @@ -9994,7 +9933,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1888 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 224 + vld $vr7, $s0, 224 vst $vr6, $s3, 1888 vld $vr6, $s2, 1904 vld $vr8, $s6, 1904 @@ -10004,7 +9943,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1904 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 240 + vld $vr7, $s0, 240 vst $vr6, $s3, 1904 vld $vr6, $s2, 1920 vld $vr8, $s6, 1920 @@ -10014,7 +9953,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr7, $vr8, $vr6 vst $vr7, $s6, 1920 vfadd.d $vr6, $vr6, $vr9 - vld $vr7, $s7, 256 + vld $vr7, $s0, 256 vst $vr6, $s3, 1920 vld $vr6, $s2, 1936 vld $vr8, $s6, 1936 @@ -10024,7 +9963,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr6, $vr8, $vr5 vst $vr6, $s6, 1936 vfadd.d $vr5, $vr5, $vr7 - fld.d $fa6, $s7, 272 + fld.d $fa6, $s0, 272 vst $vr5, $s3, 1936 fld.d $fa5, $s2, 1952 fld.d $fa7, $s6, 1952 @@ -10036,7 +9975,7 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa4, $fa4, $fa6 fst.d $fa4, $s3, 1952 fmul.d $fa3, $fa3, $fa1 - vld $vr5, $s7, 0 + vld $vr5, $s0, 0 vreplvei.d $vr4, $vr3, 0 vld $vr6, $s2, 1960 vld $vr7, $s6, 1960 @@ -10046,7 +9985,7 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr6, $vr7, $vr5 vst $vr6, $s6, 1960 vfadd.d $vr5, $vr5, $vr8 - vld $vr6, $s7, 16 + vld $vr6, $s0, 16 vld $vr7, $s2, 1976 vst $vr5, $s3, 1960 vld $vr5, $s6, 1976 @@ -10058,7 +9997,7 @@ DoNNetIteration: # @DoNNetIteration vld $vr5, $s6, 1992 vfadd.d $vr6, $vr6, $vr7 vst $vr6, $s3, 1976 - vld $vr6, $s7, 32 + vld $vr6, $s0, 32 vld $vr7, $s2, 1992 vld $vr8, $s6, 2008 vld $vr9, $s3, 1992 @@ -10068,7 +10007,7 @@ DoNNetIteration: # @DoNNetIteration vst $vr5, $s6, 1992 vfadd.d $vr5, $vr6, $vr9 vst $vr5, $s3, 1992 - vld $vr5, $s7, 48 + vld $vr5, $s0, 48 vld $vr6, $s2, 2008 vld $vr7, $s6, 2024 vld $vr9, $s3, 2008 @@ -10078,7 +10017,7 @@ DoNNetIteration: # @DoNNetIteration vst $vr6, $s6, 2008 vfadd.d $vr5, $vr5, $vr9 vst $vr5, $s3, 2008 - vld $vr5, $s7, 64 + vld $vr5, $s0, 64 vld $vr6, $s2, 2024 vld $vr8, $s6, 2040 vld $vr9, $s3, 2024 @@ -10088,7 +10027,7 @@ DoNNetIteration: # @DoNNetIteration vst $vr6, $s6, 2024 vfadd.d $vr5, $vr5, $vr9 vst $vr5, $s3, 2024 - vld $vr5, $s7, 80 + vld $vr5, $s0, 80 vld $vr6, $s2, 2040 ori $a2, $zero, 2056 vldx $vr7, $s6, $a2 @@ -10099,7 +10038,7 @@ DoNNetIteration: # @DoNNetIteration vst $vr6, $s6, 2040 vfadd.d $vr5, $vr5, $vr9 vst $vr5, $s3, 2040 - vld $vr5, $s7, 96 + vld $vr5, $s0, 96 vldx $vr6, $s2, $a2 ori $a3, $zero, 2072 vldx $vr8, $s6, $a3 @@ -10110,7 +10049,7 @@ DoNNetIteration: # @DoNNetIteration vstx $vr6, $s6, $a2 vfadd.d $vr5, $vr5, $vr9 vstx $vr5, $s3, $a2 - vld $vr5, $s7, 112 + vld $vr5, $s0, 112 vldx $vr6, $s2, $a3 ori $a2, $zero, 2088 vldx $vr7, $s6, $a2 @@ -10121,7 +10060,7 @@ DoNNetIteration: # @DoNNetIteration vstx $vr6, $s6, $a3 vfadd.d $vr5, $vr5, $vr9 vstx $vr5, $s3, $a3 - vld $vr5, $s7, 128 + vld $vr5, $s0, 128 vldx $vr6, $s2, $a2 ori $a3, $zero, 2104 vldx $vr8, $s6, $a3 @@ -10132,7 +10071,7 @@ DoNNetIteration: # @DoNNetIteration vstx $vr6, $s6, $a2 vfadd.d $vr5, $vr5, $vr9 vstx $vr5, $s3, $a2 - vld $vr5, $s7, 144 + vld $vr5, $s0, 144 vldx $vr6, $s2, $a3 ori $a2, $zero, 2120 vldx $vr7, $s6, $a2 @@ -10143,7 +10082,7 @@ DoNNetIteration: # @DoNNetIteration vstx $vr6, $s6, $a3 vfadd.d $vr5, $vr5, $vr9 vstx $vr5, $s3, $a3 - vld $vr5, $s7, 160 + vld $vr5, $s0, 160 vldx $vr6, $s2, $a2 ori $a3, $zero, 2136 vldx $vr8, $s6, $a3 @@ -10154,7 +10093,7 @@ DoNNetIteration: # @DoNNetIteration vstx $vr6, $s6, $a2 vfadd.d $vr5, $vr5, $vr9 vstx $vr5, $s3, $a2 - vld $vr5, $s7, 176 + vld $vr5, $s0, 176 vldx $vr6, $s2, $a3 ori $a2, $zero, 2152 vldx $vr7, $s6, $a2 @@ -10165,7 +10104,7 @@ DoNNetIteration: # @DoNNetIteration vstx $vr6, $s6, $a3 vfadd.d $vr5, $vr5, $vr9 vstx $vr5, $s3, $a3 - vld $vr5, $s7, 192 + vld $vr5, $s0, 192 vldx $vr6, $s2, $a2 ori $a3, $zero, 2168 vldx $vr8, $s6, $a3 @@ -10179,10 +10118,10 @@ DoNNetIteration: # @DoNNetIteration ori $a4, $zero, 2184 vldx $vr5, $s6, $a4 vldx $vr6, $s2, $a4 - vld $vr7, $s7, 208 + vld $vr7, $s0, 208 vldx $vr9, $s2, $a3 vldx $vr10, $s3, $a3 - vld $vr11, $s7, 224 + vld $vr11, $s0, 224 vfmul.d $vr7, $vr4, $vr7 vfmadd.d $vr7, $vr9, $vr2, $vr7 vfadd.d $vr8, $vr8, $vr7 @@ -10193,9 +10132,9 @@ DoNNetIteration: # @DoNNetIteration vfmadd.d $vr6, $vr6, $vr2, $vr7 vfadd.d $vr5, $vr5, $vr6 vstx $vr5, $s6, $a4 - vld $vr5, $s7, 240 - vld $vr7, $s7, 256 - fld.d $ft0, $s7, 272 + vld $vr5, $s0, 240 + vld $vr7, $s0, 256 + fld.d $ft0, $s0, 272 ld.d $a5, $sp, 128 # 8-byte Folded Reload ld.w $a2, $a5, %pc_lo12(iteration_count) ori $a3, $zero, 2200 @@ -10213,8 +10152,8 @@ DoNNetIteration: # @DoNNetIteration vfadd.d $vr6, $vr9, $vr5 vstx $vr6, $s6, $a3 vldx $vr6, $s3, $a3 - ld.d $ra, $sp, 560 # 8-byte Folded Reload - addi.d $ra, $ra, 1 + ld.d $a6, $sp, 560 # 8-byte Folded Reload + addi.d $a6, $a6, 1 ori $a4, $zero, 2216 vldx $vr9, $s6, $a4 ori $a5, $zero, 2232 @@ -10238,19 +10177,30 @@ DoNNetIteration: # @DoNNetIteration fadd.d $fa1, $fa1, $fa2 fstx.d $fa1, $s3, $a5 fld.d $fs0, $sp, 88 # 8-byte Folded Reload - ori $s7, $zero, 280 fld.d $fs1, $sp, 80 # 8-byte Folded Reload + ori $a5, $zero, 2240 vld $vr5, $sp, 96 # 16-byte Folded Reload - ori $a6, $zero, 2464 - ori $a7, $zero, 2480 - blt $ra, $a2, .LBB15_25 + ori $a7, $zero, 2256 + ori $t0, $zero, 2272 + ori $t1, $zero, 2288 + ori $t2, $zero, 2304 + ori $t3, $zero, 2320 + ori $t4, $zero, 2336 + ori $t5, $zero, 2352 + ori $t6, $zero, 2368 + ori $t7, $zero, 2384 + ori $t8, $zero, 2400 + ori $fp, $zero, 2416 + ori $s0, $zero, 2432 + st.d $a6, $sp, 560 # 8-byte Folded Spill + blt $a6, $a2, .LBB15_25 # %bb.73: # %._crit_edge # in Loop: Header=BB15_25 Depth=2 ld.d $a4, $sp, 40 # 8-byte Folded Reload ld.w $a3, $a4, %pc_lo12(numpasses) addi.d $a3, $a3, 1 st.w $a3, $a4, %pc_lo12(numpasses) - ori $a5, $zero, 1 + ori $a6, $zero, 1 blez $a2, .LBB15_3 # %bb.74: # %.lr.ph.preheader.i.i # in Loop: Header=BB15_25 Depth=2 @@ -10277,13 +10227,13 @@ DoNNetIteration: # @DoNNetIteration ld.d $a0, $sp, 136 # 8-byte Folded Reload fst.d $fa1, $a0, %pc_lo12(worst_error) movgr2fr.d $fa2, $a3 - pcalau12i $a0, %pc_hi20(.LCPI15_3) - fld.d $fa3, $a0, %pc_lo12(.LCPI15_3) ffint.d.l $fa2, $fa2 fdiv.d $fa0, $fa0, $fa2 ld.d $a0, $sp, 48 # 8-byte Folded Reload fst.d $fa0, $a0, %pc_lo12(average_error) - fcmp.cult.d $fcc0, $fa1, $fa3 + ld.d $a0, $sp, 32 # 8-byte Folded Reload + movgr2fr.d $fa0, $a0 + fcmp.cult.d $fcc0, $fa1, $fa0 lu52i.d $a0, $zero, 1027 vreplgr2vr.d $vr0, $a0 ori $a0, $zero, 4 @@ -10341,7 +10291,7 @@ DoNNetIteration: # @DoNNetIteration fld.d $fa0, $a0, 0 fcmp.cult.d $fcc0, $fa0, $fa6 movcf2gr $a3, $fcc0 - masknez $a4, $a5, $a3 + masknez $a4, $a6, $a3 maskeqz $a1, $a1, $a3 or $a1, $a1, $a4 addi.d $a2, $a2, -1 @@ -10352,7 +10302,7 @@ DoNNetIteration: # @DoNNetIteration bnez $a1, .LBB15_88 # %bb.85: # %check_out_error.exit # in Loop: Header=BB15_25 Depth=2 - move $ra, $zero + st.d $zero, $sp, 560 # 8-byte Folded Spill ld.d $a0, $sp, 56 # 8-byte Folded Reload st.w $zero, $a0, %pc_lo12(learned) b .LBB15_25 @@ -10363,8 +10313,9 @@ DoNNetIteration: # @DoNNetIteration # %bb.87: # in Loop: Header=BB15_5 Depth=1 move $a0, $zero move $a1, $zero - ld.d $s1, $sp, 32 # 8-byte Folded Reload - ld.d $s2, $sp, 24 # 8-byte Folded Reload + ld.d $s0, $sp, 24 # 8-byte Folded Reload + ld.d $s1, $sp, 16 # 8-byte Folded Reload + ori $s2, $zero, 280 b .LBB15_92 .p2align 4, , 16 .LBB15_88: # in Loop: Header=BB15_5 Depth=1 @@ -10379,8 +10330,9 @@ DoNNetIteration: # @DoNNetIteration vrepli.b $vr1, 0 move $a4, $a0 vori.b $vr2, $vr1, 0 - ld.d $s1, $sp, 32 # 8-byte Folded Reload - ld.d $s2, $sp, 24 # 8-byte Folded Reload + ld.d $s0, $sp, 24 # 8-byte Folded Reload + ld.d $s1, $sp, 16 # 8-byte Folded Reload + ori $s2, $zero, 280 .p2align 4, , 16 .LBB15_90: # %vector.body # Parent Loop BB15_5 Depth=1 @@ -10415,7 +10367,7 @@ DoNNetIteration: # @DoNNetIteration fld.d $fa0, $a0, 0 fcmp.cult.d $fcc0, $fa0, $fa6 movcf2gr $a3, $fcc0 - masknez $a4, $a5, $a3 + masknez $a4, $a6, $a3 maskeqz $a1, $a1, $a3 or $a1, $a1, $a4 addi.d $a2, $a2, -1 @@ -10426,12 +10378,12 @@ DoNNetIteration: # @DoNNetIteration sltu $a0, $zero, $a1 sub.d $a0, $zero, $a0 ori $a0, $a0, 1 - addi.d $s1, $s1, -1 - ld.d $a2, $sp, 56 # 8-byte Folded Reload - st.w $a0, $a2, %pc_lo12(learned) - bnez $s1, .LBB15_5 + addi.d $s0, $s0, -1 + ld.d $a1, $sp, 56 # 8-byte Folded Reload + st.w $a0, $a1, %pc_lo12(learned) + bnez $s0, .LBB15_5 .LBB15_95: # %._crit_edge26 - ld.d $a0, $sp, 16 # 8-byte Folded Reload + ld.d $a0, $sp, 8 # 8-byte Folded Reload fld.d $fs7, $sp, 904 # 8-byte Folded Reload fld.d $fs6, $sp, 912 # 8-byte Folded Reload fld.d $fs5, $sp, 920 # 8-byte Folded Reload @@ -10457,14 +10409,7 @@ DoNNetIteration: # @DoNNetIteration .Lfunc_end15: .size DoNNetIteration, .Lfunc_end15-DoNNetIteration # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DoLU -.LCPI16_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI16_1: - .dword 0x407f400000000000 # double 500 - .text - .globl DoLU + .globl DoLU # -- Begin function DoLU .p2align 5 .type DoLU,@function DoLU: # @DoLU @@ -11040,13 +10985,16 @@ DoLU: # @DoLU .LBB16_39: move $s1, $zero ld.d $a4, $s8, 16 - pcalau12i $a0, %pc_hi20(.LCPI16_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI16_0) - pcalau12i $a0, %pc_hi20(.LCPI16_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI16_1) movgr2fr.d $fs0, $zero lu52i.d $s4, $zero, 1107 + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs1, $a0 lu12i.w $s5, 275200 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fs2, $a0 .p2align 4, , 16 .LBB16_40: # =>This Inner Loop Header: Depth=1 move $a0, $fp @@ -11126,12 +11074,7 @@ DoLU: # @DoLU .Lfunc_end16: .size DoLU, .Lfunc_end16-DoLU # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function DoLUIteration -.LCPI17_0: - .dword 0x3bc79ca10c924223 # double 9.9999999999999995E-21 - .text - .p2align 5 + .p2align 5 # -- Begin function DoLUIteration .type DoLUIteration,@function DoLUIteration: # @DoLUIteration # %bb.0: @@ -11364,6 +11307,7 @@ DoLUIteration: # @DoLUIteration lu32i.d $a0, 498849 lu52i.d $a0, $a0, 956 st.d $a0, $sp, 104 # 8-byte Folded Spill + movgr2fr.d $fa1, $a0 move $t6, $s1 move $t3, $s2 st.d $s0, $sp, 56 # 8-byte Folded Spill @@ -11414,24 +11358,24 @@ DoLUIteration: # @DoLUIteration # => This Loop Header: Depth=2 # Child Loop BB17_19 Depth 3 move $a1, $zero - fmov.d $fa1, $fa0 + fmov.d $fa2, $fa0 .p2align 4, , 16 .LBB17_19: # Parent Loop BB17_17 Depth=1 # Parent Loop BB17_18 Depth=2 # => This Inner Loop Header: Depth=3 - fldx.d $fa2, $a3, $a1 - fabs.d $fa2, $fa2 - fcmp.clt.d $fcc0, $fa1, $fa2 + fldx.d $fa3, $a3, $a1 + fabs.d $fa3, $fa3 + fcmp.clt.d $fcc0, $fa2, $fa3 addi.d $a1, $a1, 8 - fsel $fa1, $fa1, $fa2, $fcc0 + fsel $fa2, $fa2, $fa3, $fcc0 bne $a1, $a6, .LBB17_19 # %bb.20: # in Loop: Header=BB17_18 Depth=2 - fcmp.ceq.d $fcc0, $fa1, $fa0 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB17_16 # %bb.21: # in Loop: Header=BB17_18 Depth=2 - frecip.d $fa1, $fa1 + frecip.d $fa2, $fa2 slli.d $a1, $a0, 3 - fstx.d $fa1, $a2, $a1 + fstx.d $fa2, $a2, $a1 addi.d $a0, $a0, 1 addi.d $a3, $a3, 808 bne $a0, $t0, .LBB17_18 @@ -11482,7 +11426,7 @@ DoLUIteration: # @DoLUIteration .LBB17_26: # %.loopexit146.i.i # in Loop: Header=BB17_27 Depth=3 alsl.d $a1, $s7, $a7, 3 - fst.d $fa1, $a1, 0 + fst.d $fa2, $a1, 0 addi.d $a4, $a4, 1 addi.d $a5, $a5, 808 beq $a4, $s7, .LBB17_30 @@ -11492,7 +11436,7 @@ DoLUIteration: # @DoLUIteration # Child Loop BB17_29 Depth 4 mul.d $a1, $a4, $a6 add.d $a7, $t8, $a1 - fldx.d $fa1, $a7, $a0 + fldx.d $fa2, $a7, $a0 beqz $a4, .LBB17_26 # %bb.28: # %.preheader145.i.i.preheader # in Loop: Header=BB17_27 Depth=3 @@ -11505,10 +11449,10 @@ DoLUIteration: # @DoLUIteration # Parent Loop BB17_24 Depth=2 # Parent Loop BB17_27 Depth=3 # => This Inner Loop Header: Depth=4 - fld.d $fa2, $t4, 0 - fld.d $fa3, $t5, 0 - fneg.d $fa2, $fa2 - fmadd.d $fa1, $fa2, $fa3, $fa1 + fld.d $fa3, $t4, 0 + fld.d $fa4, $t5, 0 + fneg.d $fa3, $fa3 + fmadd.d $fa2, $fa3, $fa4, $fa2 addi.d $s0, $s0, -1 addi.d $t5, $t5, 808 addi.d $t4, $t4, 8 @@ -11525,19 +11469,19 @@ DoLUIteration: # @DoLUIteration sub.d $a7, $t1, $s7 move $t7, $fp move $a5, $s7 - fmov.d $fa1, $fa0 + fmov.d $fa2, $fa0 b .LBB17_32 .p2align 4, , 16 .LBB17_31: # %.loopexit.i.i # in Loop: Header=BB17_32 Depth=3 alsl.d $a1, $s7, $t5, 3 - fst.d $fa2, $a1, 0 + fst.d $fa3, $a1, 0 slli.d $a1, $a5, 3 - fldx.d $fa3, $s5, $a1 - fabs.d $fa2, $fa2 - fmul.d $fa2, $fa2, $fa3 - fcmp.cult.d $fcc0, $fa2, $fa1 - fsel $fa1, $fa2, $fa1, $fcc0 + fldx.d $fa4, $s5, $a1 + fabs.d $fa3, $fa3 + fmul.d $fa3, $fa3, $fa4 + fcmp.cult.d $fcc0, $fa3, $fa2 + fsel $fa2, $fa3, $fa2, $fcc0 movcf2gr $a1, $fcc0 masknez $a4, $a5, $a1 maskeqz $a1, $ra, $a1 @@ -11551,7 +11495,7 @@ DoLUIteration: # @DoLUIteration # Child Loop BB17_34 Depth 4 mul.d $a1, $a5, $a6 add.d $t5, $t8, $a1 - fldx.d $fa2, $t5, $a0 + fldx.d $fa3, $t5, $a0 beqz $s7, .LBB17_31 # %bb.33: # %.preheader.i.i.preheader # in Loop: Header=BB17_32 Depth=3 @@ -11563,10 +11507,10 @@ DoLUIteration: # @DoLUIteration # Parent Loop BB17_24 Depth=2 # Parent Loop BB17_32 Depth=3 # => This Inner Loop Header: Depth=4 - fld.d $fa3, $a4, 0 - fldx.d $fa4, $s1, $a1 - fneg.d $fa3, $fa3 - fmadd.d $fa2, $fa3, $fa4, $fa2 + fld.d $fa4, $a4, 0 + fldx.d $fa5, $s1, $a1 + fneg.d $fa4, $fa4 + fmadd.d $fa3, $fa4, $fa5, $fa3 addi.d $a1, $a1, 808 addi.d $a4, $a4, 8 bne $s6, $a1, .LBB17_34 @@ -11591,206 +11535,206 @@ DoLUIteration: # @DoLUIteration .p2align 4, , 16 .LBB17_39: # %vector.body92 # in Loop: Header=BB17_24 Depth=2 - vld $vr1, $s0, 0 - vld $vr2, $s0, 16 - vld $vr3, $t5, 0 - vld $vr4, $t5, 16 - vst $vr1, $t5, 0 - vst $vr2, $t5, 16 - vst $vr3, $s0, 0 - vst $vr4, $s0, 16 - vld $vr1, $s0, 32 - vld $vr2, $s0, 48 - vld $vr3, $t5, 32 - vld $vr4, $t5, 48 - vst $vr1, $t5, 32 - vst $vr2, $t5, 48 - vst $vr3, $s0, 32 - vst $vr4, $s0, 48 - vld $vr1, $s0, 64 - vld $vr2, $s0, 80 - vld $vr3, $t5, 64 - vld $vr4, $t5, 80 - vst $vr1, $t5, 64 - vst $vr2, $t5, 80 - vst $vr3, $s0, 64 - vst $vr4, $s0, 80 - vld $vr1, $s0, 96 - vld $vr2, $s0, 112 - vld $vr3, $t5, 96 - vld $vr4, $t5, 112 - vst $vr1, $t5, 96 - vst $vr2, $t5, 112 - vst $vr3, $s0, 96 - vst $vr4, $s0, 112 - vld $vr1, $s0, 128 - vld $vr2, $s0, 144 - vld $vr3, $t5, 128 - vld $vr4, $t5, 144 - vst $vr1, $t5, 128 - vst $vr2, $t5, 144 - vst $vr3, $s0, 128 - vst $vr4, $s0, 144 - vld $vr1, $s0, 160 - vld $vr2, $s0, 176 - vld $vr3, $t5, 160 - vld $vr4, $t5, 176 - vst $vr1, $t5, 160 - vst $vr2, $t5, 176 - vst $vr3, $s0, 160 - vst $vr4, $s0, 176 - vld $vr1, $s0, 192 - vld $vr2, $s0, 208 - vld $vr3, $t5, 192 - vld $vr4, $t5, 208 - vst $vr1, $t5, 192 - vst $vr2, $t5, 208 - vst $vr3, $s0, 192 - vst $vr4, $s0, 208 - vld $vr1, $s0, 224 - vld $vr2, $s0, 240 - vld $vr3, $t5, 224 - vld $vr4, $t5, 240 - vst $vr1, $t5, 224 - vst $vr2, $t5, 240 - vst $vr3, $s0, 224 - vst $vr4, $s0, 240 - vld $vr1, $s0, 256 - vld $vr2, $s0, 272 - vld $vr3, $t5, 256 - vld $vr4, $t5, 272 - vst $vr1, $t5, 256 - vst $vr2, $t5, 272 - vst $vr3, $s0, 256 - vst $vr4, $s0, 272 - vld $vr1, $s0, 288 - vld $vr2, $s0, 304 - vld $vr3, $t5, 288 - vld $vr4, $t5, 304 - vst $vr1, $t5, 288 - vst $vr2, $t5, 304 - vst $vr3, $s0, 288 - vst $vr4, $s0, 304 - vld $vr1, $s0, 320 - vld $vr2, $s0, 336 - vld $vr3, $t5, 320 - vld $vr4, $t5, 336 - vst $vr1, $t5, 320 - vst $vr2, $t5, 336 - vst $vr3, $s0, 320 - vst $vr4, $s0, 336 - vld $vr1, $s0, 352 - vld $vr2, $s0, 368 - vld $vr3, $t5, 352 - vld $vr4, $t5, 368 - vst $vr1, $t5, 352 - vst $vr2, $t5, 368 - vst $vr3, $s0, 352 - vst $vr4, $s0, 368 - vld $vr1, $s0, 384 - vld $vr2, $s0, 400 - vld $vr3, $t5, 384 - vld $vr4, $t5, 400 - vst $vr1, $t5, 384 - vst $vr2, $t5, 400 - vst $vr3, $s0, 384 - vst $vr4, $s0, 400 - vld $vr1, $s0, 416 - vld $vr2, $s0, 432 - vld $vr3, $t5, 416 - vld $vr4, $t5, 432 - vst $vr1, $t5, 416 - vst $vr2, $t5, 432 - vst $vr3, $s0, 416 - vst $vr4, $s0, 432 - vld $vr1, $s0, 448 - vld $vr2, $s0, 464 - vld $vr3, $t5, 448 - vld $vr4, $t5, 464 - vst $vr1, $t5, 448 - vst $vr2, $t5, 464 - vst $vr3, $s0, 448 - vst $vr4, $s0, 464 - vld $vr1, $s0, 480 - vld $vr2, $s0, 496 - vld $vr3, $t5, 480 - vld $vr4, $t5, 496 - vst $vr1, $t5, 480 - vst $vr2, $t5, 496 - vst $vr3, $s0, 480 - vst $vr4, $s0, 496 - vld $vr1, $s0, 512 - vld $vr2, $s0, 528 - vld $vr3, $t5, 512 - vld $vr4, $t5, 528 - vst $vr1, $t5, 512 - vst $vr2, $t5, 528 - vst $vr3, $s0, 512 - vst $vr4, $s0, 528 - vld $vr1, $s0, 544 - vld $vr2, $s0, 560 - vld $vr3, $t5, 544 - vld $vr4, $t5, 560 - vst $vr1, $t5, 544 - vst $vr2, $t5, 560 - vst $vr3, $s0, 544 - vst $vr4, $s0, 560 - vld $vr1, $s0, 576 - vld $vr2, $s0, 592 - vld $vr3, $t5, 576 - vld $vr4, $t5, 592 - vst $vr1, $t5, 576 - vst $vr2, $t5, 592 - vst $vr3, $s0, 576 - vst $vr4, $s0, 592 - vld $vr1, $s0, 608 - vld $vr2, $s0, 624 - vld $vr3, $t5, 608 - vld $vr4, $t5, 624 - vst $vr1, $t5, 608 - vst $vr2, $t5, 624 - vst $vr3, $s0, 608 - vst $vr4, $s0, 624 - vld $vr1, $s0, 640 - vld $vr2, $s0, 656 - vld $vr3, $t5, 640 - vld $vr4, $t5, 656 - vst $vr1, $t5, 640 - vst $vr2, $t5, 656 - vst $vr3, $s0, 640 - vst $vr4, $s0, 656 - vld $vr1, $s0, 672 - vld $vr2, $s0, 688 - vld $vr3, $t5, 672 - vld $vr4, $t5, 688 - vst $vr1, $t5, 672 - vst $vr2, $t5, 688 - vst $vr3, $s0, 672 - vst $vr4, $s0, 688 - vld $vr1, $s0, 704 - vld $vr2, $s0, 720 - vld $vr3, $t5, 704 - vld $vr4, $t5, 720 - vst $vr1, $t5, 704 - vst $vr2, $t5, 720 - vst $vr3, $s0, 704 - vst $vr4, $s0, 720 - vld $vr1, $s0, 736 - vld $vr2, $s0, 752 - vld $vr3, $t5, 736 - vld $vr4, $t5, 752 - vst $vr1, $t5, 736 - vst $vr2, $t5, 752 - vst $vr3, $s0, 736 - vst $vr4, $s0, 752 - vld $vr1, $s0, 768 - vld $vr2, $s0, 784 - vld $vr3, $t5, 768 - vld $vr4, $t5, 784 - vst $vr1, $t5, 768 - vst $vr2, $t5, 784 - vst $vr3, $s0, 768 - vst $vr4, $s0, 784 + vld $vr2, $s0, 0 + vld $vr3, $s0, 16 + vld $vr4, $t5, 0 + vld $vr5, $t5, 16 + vst $vr2, $t5, 0 + vst $vr3, $t5, 16 + vst $vr4, $s0, 0 + vst $vr5, $s0, 16 + vld $vr2, $s0, 32 + vld $vr3, $s0, 48 + vld $vr4, $t5, 32 + vld $vr5, $t5, 48 + vst $vr2, $t5, 32 + vst $vr3, $t5, 48 + vst $vr4, $s0, 32 + vst $vr5, $s0, 48 + vld $vr2, $s0, 64 + vld $vr3, $s0, 80 + vld $vr4, $t5, 64 + vld $vr5, $t5, 80 + vst $vr2, $t5, 64 + vst $vr3, $t5, 80 + vst $vr4, $s0, 64 + vst $vr5, $s0, 80 + vld $vr2, $s0, 96 + vld $vr3, $s0, 112 + vld $vr4, $t5, 96 + vld $vr5, $t5, 112 + vst $vr2, $t5, 96 + vst $vr3, $t5, 112 + vst $vr4, $s0, 96 + vst $vr5, $s0, 112 + vld $vr2, $s0, 128 + vld $vr3, $s0, 144 + vld $vr4, $t5, 128 + vld $vr5, $t5, 144 + vst $vr2, $t5, 128 + vst $vr3, $t5, 144 + vst $vr4, $s0, 128 + vst $vr5, $s0, 144 + vld $vr2, $s0, 160 + vld $vr3, $s0, 176 + vld $vr4, $t5, 160 + vld $vr5, $t5, 176 + vst $vr2, $t5, 160 + vst $vr3, $t5, 176 + vst $vr4, $s0, 160 + vst $vr5, $s0, 176 + vld $vr2, $s0, 192 + vld $vr3, $s0, 208 + vld $vr4, $t5, 192 + vld $vr5, $t5, 208 + vst $vr2, $t5, 192 + vst $vr3, $t5, 208 + vst $vr4, $s0, 192 + vst $vr5, $s0, 208 + vld $vr2, $s0, 224 + vld $vr3, $s0, 240 + vld $vr4, $t5, 224 + vld $vr5, $t5, 240 + vst $vr2, $t5, 224 + vst $vr3, $t5, 240 + vst $vr4, $s0, 224 + vst $vr5, $s0, 240 + vld $vr2, $s0, 256 + vld $vr3, $s0, 272 + vld $vr4, $t5, 256 + vld $vr5, $t5, 272 + vst $vr2, $t5, 256 + vst $vr3, $t5, 272 + vst $vr4, $s0, 256 + vst $vr5, $s0, 272 + vld $vr2, $s0, 288 + vld $vr3, $s0, 304 + vld $vr4, $t5, 288 + vld $vr5, $t5, 304 + vst $vr2, $t5, 288 + vst $vr3, $t5, 304 + vst $vr4, $s0, 288 + vst $vr5, $s0, 304 + vld $vr2, $s0, 320 + vld $vr3, $s0, 336 + vld $vr4, $t5, 320 + vld $vr5, $t5, 336 + vst $vr2, $t5, 320 + vst $vr3, $t5, 336 + vst $vr4, $s0, 320 + vst $vr5, $s0, 336 + vld $vr2, $s0, 352 + vld $vr3, $s0, 368 + vld $vr4, $t5, 352 + vld $vr5, $t5, 368 + vst $vr2, $t5, 352 + vst $vr3, $t5, 368 + vst $vr4, $s0, 352 + vst $vr5, $s0, 368 + vld $vr2, $s0, 384 + vld $vr3, $s0, 400 + vld $vr4, $t5, 384 + vld $vr5, $t5, 400 + vst $vr2, $t5, 384 + vst $vr3, $t5, 400 + vst $vr4, $s0, 384 + vst $vr5, $s0, 400 + vld $vr2, $s0, 416 + vld $vr3, $s0, 432 + vld $vr4, $t5, 416 + vld $vr5, $t5, 432 + vst $vr2, $t5, 416 + vst $vr3, $t5, 432 + vst $vr4, $s0, 416 + vst $vr5, $s0, 432 + vld $vr2, $s0, 448 + vld $vr3, $s0, 464 + vld $vr4, $t5, 448 + vld $vr5, $t5, 464 + vst $vr2, $t5, 448 + vst $vr3, $t5, 464 + vst $vr4, $s0, 448 + vst $vr5, $s0, 464 + vld $vr2, $s0, 480 + vld $vr3, $s0, 496 + vld $vr4, $t5, 480 + vld $vr5, $t5, 496 + vst $vr2, $t5, 480 + vst $vr3, $t5, 496 + vst $vr4, $s0, 480 + vst $vr5, $s0, 496 + vld $vr2, $s0, 512 + vld $vr3, $s0, 528 + vld $vr4, $t5, 512 + vld $vr5, $t5, 528 + vst $vr2, $t5, 512 + vst $vr3, $t5, 528 + vst $vr4, $s0, 512 + vst $vr5, $s0, 528 + vld $vr2, $s0, 544 + vld $vr3, $s0, 560 + vld $vr4, $t5, 544 + vld $vr5, $t5, 560 + vst $vr2, $t5, 544 + vst $vr3, $t5, 560 + vst $vr4, $s0, 544 + vst $vr5, $s0, 560 + vld $vr2, $s0, 576 + vld $vr3, $s0, 592 + vld $vr4, $t5, 576 + vld $vr5, $t5, 592 + vst $vr2, $t5, 576 + vst $vr3, $t5, 592 + vst $vr4, $s0, 576 + vst $vr5, $s0, 592 + vld $vr2, $s0, 608 + vld $vr3, $s0, 624 + vld $vr4, $t5, 608 + vld $vr5, $t5, 624 + vst $vr2, $t5, 608 + vst $vr3, $t5, 624 + vst $vr4, $s0, 608 + vst $vr5, $s0, 624 + vld $vr2, $s0, 640 + vld $vr3, $s0, 656 + vld $vr4, $t5, 640 + vld $vr5, $t5, 656 + vst $vr2, $t5, 640 + vst $vr3, $t5, 656 + vst $vr4, $s0, 640 + vst $vr5, $s0, 656 + vld $vr2, $s0, 672 + vld $vr3, $s0, 688 + vld $vr4, $t5, 672 + vld $vr5, $t5, 688 + vst $vr2, $t5, 672 + vst $vr3, $t5, 688 + vst $vr4, $s0, 672 + vst $vr5, $s0, 688 + vld $vr2, $s0, 704 + vld $vr3, $s0, 720 + vld $vr4, $t5, 704 + vld $vr5, $t5, 720 + vst $vr2, $t5, 704 + vst $vr3, $t5, 720 + vst $vr4, $s0, 704 + vst $vr5, $s0, 720 + vld $vr2, $s0, 736 + vld $vr3, $s0, 752 + vld $vr4, $t5, 736 + vld $vr5, $t5, 752 + vst $vr2, $t5, 736 + vst $vr3, $t5, 752 + vst $vr4, $s0, 736 + vst $vr5, $s0, 752 + vld $vr2, $s0, 768 + vld $vr3, $s0, 784 + vld $vr4, $t5, 768 + vld $vr5, $t5, 784 + vst $vr2, $t5, 768 + vst $vr3, $t5, 784 + vst $vr4, $s0, 768 + vst $vr5, $s0, 784 ori $a1, $zero, 100 .LBB17_40: # %scalar.ph90.preheader # in Loop: Header=BB17_24 Depth=2 @@ -11801,35 +11745,34 @@ DoLUIteration: # @DoLUIteration # Parent Loop BB17_17 Depth=1 # Parent Loop BB17_24 Depth=2 # => This Inner Loop Header: Depth=3 - fldx.d $fa1, $fp, $a1 - fldx.d $fa2, $a5, $a1 - fstx.d $fa1, $a5, $a1 - fstx.d $fa2, $fp, $a1 + fldx.d $fa2, $fp, $a1 + fldx.d $fa3, $a5, $a1 + fstx.d $fa2, $a5, $a1 + fstx.d $fa3, $fp, $a1 addi.d $a1, $a1, 8 bne $a1, $a6, .LBB17_41 # %bb.42: # in Loop: Header=BB17_24 Depth=2 slli.d $a1, $a4, 3 - fldx.d $fa1, $s5, $a0 - fldx.d $fa2, $s5, $a1 - fstx.d $fa1, $s5, $a1 - fstx.d $fa2, $s5, $a0 + fldx.d $fa2, $s5, $a0 + fldx.d $fa3, $s5, $a1 + fstx.d $fa2, $s5, $a1 + fstx.d $fa3, $s5, $a0 .LBB17_43: # in Loop: Header=BB17_24 Depth=2 - fldx.d $fa1, $s0, $a0 + fldx.d $fa2, $s0, $a0 slli.d $a0, $s7, 2 - fcmp.cune.d $fcc0, $fa1, $fa0 + fcmp.cune.d $fcc0, $fa2, $fa0 stx.w $ra, $a0, $t2 bcnez $fcc0, .LBB17_45 # %bb.44: # in Loop: Header=BB17_24 Depth=2 - pcalau12i $a0, %pc_hi20(.LCPI17_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI17_0) alsl.d $a0, $s7, $s0, 3 ld.d $a1, $sp, 104 # 8-byte Folded Reload st.d $a1, $a0, 0 + fmov.d $fa2, $fa1 .LBB17_45: # in Loop: Header=BB17_24 Depth=2 beq $s7, $t1, .LBB17_52 # %bb.46: # %.lr.ph173.preheader.i.i # in Loop: Header=BB17_24 Depth=2 - frecip.d $fa1, $fa1 + frecip.d $fa2, $fa2 move $a0, $s7 ori $a1, $zero, 2 bltu $a7, $a1, .LBB17_50 @@ -11844,12 +11787,12 @@ DoLUIteration: # @DoLUIteration # Parent Loop BB17_17 Depth=1 # Parent Loop BB17_24 Depth=2 # => This Inner Loop Header: Depth=3 - fld.d $fa2, $a1, -808 - fld.d $fa3, $a1, 0 - fmul.d $fa2, $fa1, $fa2 - fmul.d $fa3, $fa1, $fa3 - fst.d $fa2, $a1, -808 - fst.d $fa3, $a1, 0 + fld.d $fa3, $a1, -808 + fld.d $fa4, $a1, 0 + fmul.d $fa3, $fa2, $fa3 + fmul.d $fa4, $fa2, $fa4 + fst.d $fa3, $a1, -808 + fst.d $fa4, $a1, 0 addi.d $s8, $s8, -2 addi.d $a1, $a1, 1616 bnez $s8, .LBB17_48 @@ -11866,9 +11809,9 @@ DoLUIteration: # @DoLUIteration # Parent Loop BB17_17 Depth=1 # Parent Loop BB17_24 Depth=2 # => This Inner Loop Header: Depth=3 - fld.d $fa2, $a0, 0 - fmul.d $fa2, $fa1, $fa2 - fst.d $fa2, $a0, 0 + fld.d $fa3, $a0, 0 + fmul.d $fa3, $fa2, $fa3 + fst.d $fa3, $a0, 0 addi.d $a1, $a1, 1 addi.d $a0, $a0, 808 bnez $a1, .LBB17_51 @@ -11893,14 +11836,14 @@ DoLUIteration: # @DoLUIteration b .LBB17_56 .p2align 4, , 16 .LBB17_53: # in Loop: Header=BB17_56 Depth=2 - fcmp.ceq.d $fcc0, $fa1, $fa0 + fcmp.ceq.d $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB17_60 # %bb.54: # in Loop: Header=BB17_56 Depth=2 move $a4, $a2 .LBB17_55: # %.loopexit61.i.i # in Loop: Header=BB17_56 Depth=2 alsl.d $a1, $a2, $s3, 3 - fst.d $fa1, $a1, 0 + fst.d $fa2, $a1, 0 addi.d $a2, $a2, 1 addi.d $a3, $a3, 808 beq $a2, $t0, .LBB17_61 @@ -11912,10 +11855,10 @@ DoLUIteration: # @DoLUIteration ldx.w $a1, $a1, $t2 slli.d $a1, $a1, 3 slli.d $a5, $a2, 3 - fldx.d $fa2, $s3, $a5 - fldx.d $fa1, $s3, $a1 + fldx.d $fa3, $s3, $a5 + fldx.d $fa2, $s3, $a1 addi.w $a5, $a4, 0 - fstx.d $fa2, $s3, $a1 + fstx.d $fa3, $s3, $a1 beq $a5, $a0, .LBB17_53 # %bb.57: # %.preheader60.i.i # in Loop: Header=BB17_56 Depth=2 @@ -11928,10 +11871,10 @@ DoLUIteration: # @DoLUIteration .LBB17_59: # Parent Loop BB17_17 Depth=1 # Parent Loop BB17_56 Depth=2 # => This Inner Loop Header: Depth=3 - fld.d $fa2, $t4, 0 - fld.d $fa3, $a7, 0 - fneg.d $fa2, $fa2 - fmadd.d $fa1, $fa2, $fa3, $fa1 + fld.d $fa3, $t4, 0 + fld.d $fa4, $a7, 0 + fneg.d $fa3, $fa3 + fmadd.d $fa2, $fa3, $fa4, $fa2 addi.d $a5, $a5, 1 addi.d $a7, $a7, 8 addi.d $t4, $t4, 8 @@ -11953,10 +11896,10 @@ DoLUIteration: # @DoLUIteration # in Loop: Header=BB17_63 Depth=2 mul.d $a1, $a4, $a6 add.d $a1, $t8, $a1 - fldx.d $fa2, $a1, $a5 + fldx.d $fa3, $a1, $a5 alsl.d $a1, $a4, $s3, 3 - fdiv.d $fa1, $fa1, $fa2 - fst.d $fa1, $a1, 0 + fdiv.d $fa2, $fa2, $fa3 + fst.d $fa2, $a1, 0 addi.d $a1, $a4, -1 addi.d $a0, $a0, 1 addi.d $a3, $a3, -8 @@ -11968,7 +11911,7 @@ DoLUIteration: # @DoLUIteration # Child Loop BB17_65 Depth 3 move $a4, $a1 slli.d $a5, $a1, 3 - fldx.d $fa1, $s3, $a5 + fldx.d $fa2, $s3, $a5 ori $a1, $zero, 99 bltu $a1, $a4, .LBB17_62 # %bb.64: # %.lr.ph69.i.i @@ -11980,10 +11923,10 @@ DoLUIteration: # @DoLUIteration .LBB17_65: # Parent Loop BB17_17 Depth=1 # Parent Loop BB17_63 Depth=2 # => This Inner Loop Header: Depth=3 - fld.d $fa2, $a7, 0 - fld.d $fa3, $t4, 0 - fneg.d $fa2, $fa2 - fmadd.d $fa1, $fa2, $fa3, $fa1 + fld.d $fa3, $a7, 0 + fld.d $fa4, $t4, 0 + fneg.d $fa3, $fa3 + fmadd.d $fa2, $fa3, $fa4, $fa2 addi.d $t5, $t5, -1 addi.d $t4, $t4, 8 addi.d $a7, $a7, 8 diff --git a/results/MultiSource/Benchmarks/sim/CMakeFiles/sim.dir/sim.s b/results/MultiSource/Benchmarks/sim/CMakeFiles/sim.dir/sim.s index 49ea0aad..2088b67a 100644 --- a/results/MultiSource/Benchmarks/sim/CMakeFiles/sim.dir/sim.s +++ b/results/MultiSource/Benchmarks/sim/CMakeFiles/sim.dir/sim.s @@ -1,10 +1,6 @@ .file "sim.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3fc999999999999a # double 0.20000000000000001 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -233,8 +229,11 @@ main: # @main st.d $s4, $sp, 24 # 8-byte Folded Spill st.d $s7, $sp, 56 # 8-byte Folded Spill st.d $s1, $sp, 40 # 8-byte Folded Spill + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 ori $s8, $zero, 5 - pcalau12i $a0, %pc_hi20(.LCPI0_0) + lu52i.d $a0, $a0, 1020 st.d $fp, $sp, 32 # 8-byte Folded Spill blt $s6, $s8, .LBB0_42 # %bb.33: # %.lr.ph.preheader @@ -245,9 +244,9 @@ main: # @main vldi $vr2, -912 vldi $vr0, -784 vst $vr0, $sp, 96 # 16-byte Folded Spill - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) vldi $vr0, -1000 vst $vr0, $sp, 112 # 16-byte Folded Spill + movgr2fr.d $fs0, $a0 ori $s6, $zero, 61 ori $s0, $zero, 17 pcalau12i $a0, %pc_hi20(.LJTI0_0) @@ -304,12 +303,12 @@ main: # @main vst $vr0, $sp, 112 # 16-byte Folded Spill b .LBB0_35 .LBB0_42: - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) vldi $vr0, -1000 vst $vr0, $sp, 112 # 16-byte Folded Spill vldi $vr0, -784 vst $vr0, $sp, 80 # 16-byte Folded Spill vldi $vr1, -912 + movgr2fr.d $fs0, $a0 vst $vr0, $sp, 96 # 16-byte Folded Spill .LBB0_43: # %._crit_edge vst $vr1, $sp, 128 # 16-byte Folded Spill @@ -5241,12 +5240,7 @@ ckopen: # @ckopen .Lfunc_end12: .size ckopen, .Lfunc_end12-ckopen # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function dtime -.LCPI13_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl dtime + .globl dtime # -- Begin function dtime .p2align 5 .type dtime,@function dtime: # @dtime @@ -5261,14 +5255,17 @@ dtime: # @dtime pcaddu18i $ra, %call36(getrusage) jirl $ra, $ra, 0 ld.d $a0, $fp, 0 + ld.d $a1, $fp, 8 movgr2fr.d $fa0, $a0 - ld.d $a0, $fp, 8 - pcalau12i $a1, %pc_hi20(.LCPI13_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI13_0) ffint.d.l $fa0, $fa0 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 movgr2fr.d $fa2, $a0 - ffint.d.l $fa2, $fa2 - fmadd.d $fa0, $fa2, $fa1, $fa0 + fmadd.d $fa0, $fa1, $fa2, $fa0 ld.d $fp, $sp, 0 # 8-byte Folded Reload ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 diff --git a/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s b/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s index dde614d7..f6929b3c 100644 --- a/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s +++ b/results/MultiSource/Benchmarks/tramp3d-v4/CMakeFiles/tramp3d-v4.dir/tramp3d-v4.s @@ -23165,16 +23165,12 @@ _Z15handle_cmd_argsiPPc: # @_Z15handle_cmd_argsiPPc .Lfunc_end157: .size _Z15handle_cmd_argsiPPc, .Lfunc_end157-_Z15handle_cmd_argsiPPc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI158_0: - .dword 0x4002666666666666 # double 2.2999999999999998 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI158_1: + .p2align 4, 0x0 # -- Begin function main +.LCPI158_0: .dword 0x4058c00000000000 # double 99 .dword 0x4059000000000000 # double 100 -.LCPI158_2: +.LCPI158_1: .dword 0x3fd0000000000000 # double 0.25 .dword 0x4000000000000000 # double 2 .text @@ -26330,13 +26326,12 @@ main: # @main ld.d $a0, $sp, 1536 ld.d $a1, $sp, 1528 fld.d $fa0, $s8, %pc_lo12(gamma_) - vldi $vr1, -784 - pcalau12i $a3, %pc_hi20(.LCPI158_0) - fld.d $fa2, $a3, %pc_lo12(.LCPI158_0) - fadd.d $fa0, $fa0, $fa1 ld.d $a0, $a0, 8 mul.d $a1, $a1, $a2 - fmul.d $fa0, $fa0, $fa2 + vldi $vr1, -784 + fadd.d $fa0, $fa0, $fa1 + movgr2fr.d $fa1, $a3 + fmul.d $fa0, $fa0, $fa1 frecip.d $fa0, $fa0 fstx.d $fa0, $a0, $a1 ld.d $a0, $sp, 1104 @@ -26591,13 +26586,13 @@ main: # @main ld.w $a1, $sp, 980 ld.w $a2, $sp, 988 .LBB158_120: # %_ZplI6NoMeshILi3EEd13ExpressionTagI10BinaryNodeI8OpDivide6ScalarIdE9UnaryNodeI5FnExpS3_I10OpMultiplyS6_S3_I5FnPowS7_I6FnNormS3_I10OpSubtract9ReferenceI5FieldIS1_6VectorILi3Ed4FullE10ViewEngineILi3E13IndexFunctionIN10GenericURMI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEE16PositionsFunctorEEEEES5_ISH_EEES6_EEEEEdEN15MakeFieldReturnIS3_I5OpAddN10CreateLeafISE_IT_T0_T1_EE6Leaf_tENS15_IT2_E6Leaf_tEEE12Expression_tERKS19_RKS1C_.exit - pcalau12i $a3, %pc_hi20(.LCPI158_1) - vld $vr0, $a3, %pc_lo12(.LCPI158_1) + pcalau12i $a3, %pc_hi20(.LCPI158_0) + vld $vr0, $a3, %pc_lo12(.LCPI158_0) vst $vr0, $sp, 432 lu52i.d $a3, $zero, 1021 vreplgr2vr.d $vr0, $a3 - pcalau12i $a3, %pc_hi20(.LCPI158_2) - vld $vr1, $a3, %pc_lo12(.LCPI158_2) + pcalau12i $a3, %pc_hi20(.LCPI158_1) + vld $vr1, $a3, %pc_lo12(.LCPI158_1) vst $vr0, $sp, 456 addi.d $a3, $sp, 880 st.d $a3, $sp, 448 @@ -115683,12 +115678,8 @@ _ZN15ReductionKernelId11FnMinAssign6EngineILi3Ed9BrickViewE15InlineKernelTagE3ru .Lfunc_end543: .size _ZN15ReductionKernelId11FnMinAssign6EngineILi3Ed9BrickViewE15InlineKernelTagE3runEv, .Lfunc_end543-_ZN15ReductionKernelId11FnMinAssign6EngineILi3Ed9BrickViewE15InlineKernelTagE3runEv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN18ReductionEvaluatorI15InlineKernelTagE8evaluateId11FnMinAssign6EngineILi3Ed9BrickViewE8IntervalILi3EEEEvRT_RKT0_RKT1_RKT2_10WrappedIntILi3EE -.LCPI544_0: - .dword 0x7fefffffffffffff # double 1.7976931348623157E+308 .section .text._ZN18ReductionEvaluatorI15InlineKernelTagE8evaluateId11FnMinAssign6EngineILi3Ed9BrickViewE8IntervalILi3EEEEvRT_RKT0_RKT1_RKT2_10WrappedIntILi3EE,"axG",@progbits,_ZN18ReductionEvaluatorI15InlineKernelTagE8evaluateId11FnMinAssign6EngineILi3Ed9BrickViewE8IntervalILi3EEEEvRT_RKT0_RKT1_RKT2_10WrappedIntILi3EE,comdat - .weak _ZN18ReductionEvaluatorI15InlineKernelTagE8evaluateId11FnMinAssign6EngineILi3Ed9BrickViewE8IntervalILi3EEEEvRT_RKT0_RKT1_RKT2_10WrappedIntILi3EE + .weak _ZN18ReductionEvaluatorI15InlineKernelTagE8evaluateId11FnMinAssign6EngineILi3Ed9BrickViewE8IntervalILi3EEEEvRT_RKT0_RKT1_RKT2_10WrappedIntILi3EE # -- Begin function _ZN18ReductionEvaluatorI15InlineKernelTagE8evaluateId11FnMinAssign6EngineILi3Ed9BrickViewE8IntervalILi3EEEEvRT_RKT0_RKT1_RKT2_10WrappedIntILi3EE .p2align 5 .type _ZN18ReductionEvaluatorI15InlineKernelTagE8evaluateId11FnMinAssign6EngineILi3Ed9BrickViewE8IntervalILi3EEEEvRT_RKT0_RKT1_RKT2_10WrappedIntILi3EE,@function _ZN18ReductionEvaluatorI15InlineKernelTagE8evaluateId11FnMinAssign6EngineILi3Ed9BrickViewE8IntervalILi3EEEEvRT_RKT0_RKT1_RKT2_10WrappedIntILi3EE: # @_ZN18ReductionEvaluatorI15InlineKernelTagE8evaluateId11FnMinAssign6EngineILi3Ed9BrickViewE8IntervalILi3EEEEvRT_RKT0_RKT1_RKT2_10WrappedIntILi3EE @@ -115735,14 +115726,15 @@ _ZN18ReductionEvaluatorI15InlineKernelTagE8evaluateId11FnMinAssign6EngineILi3Ed9 addi.d $a2, $sp, 112 jirl $ra, $a3, 0 .LBB544_2: # %_ZN6EngineILi3Ed9BrickViewEC2ERKS1_.exit - ld.d $a2, $s1, 8 - ld.w $a1, $s0, 20 - pcalau12i $t0, %pc_hi20(.LCPI544_0) - fld.d $fa0, $t0, %pc_lo12(.LCPI544_0) + ld.d $a1, $s1, 8 addi.d $a0, $sp, 72 - alsl.d $a2, $s2, $a2, 3 - st.d $a2, $sp, 88 - blez $a1, .LBB544_11 + alsl.d $a1, $s2, $a1, 3 + ld.w $a2, $s0, 20 + st.d $a1, $sp, 88 + addi.w $a3, $zero, -1 + lu52i.d $t2, $a3, 2046 + movgr2fr.d $fa0, $t2 + blez $a2, .LBB544_11 # %bb.3: # %.preheader23.lr.ph ld.w $a3, $s0, 12 blez $a3, .LBB544_11 @@ -115753,9 +115745,9 @@ _ZN18ReductionEvaluatorI15InlineKernelTagE8evaluateId11FnMinAssign6EngineILi3Ed9 ld.w $a5, $sp, 40 ld.w $a6, $sp, 44 ld.w $a7, $sp, 48 - fld.d $fa0, $t0, %pc_lo12(.LCPI544_0) move $t0, $zero move $t1, $zero + movgr2fr.d $fa0, $t2 .p2align 4, , 16 .LBB544_6: # %.preheader23.us.us # =>This Loop Header: Depth=1 @@ -115775,7 +115767,7 @@ _ZN18ReductionEvaluatorI15InlineKernelTagE8evaluateId11FnMinAssign6EngineILi3Ed9 # Parent Loop BB544_7 Depth=2 # => This Inner Loop Header: Depth=3 slli.d $t6, $t5, 3 - fldx.d $fa1, $a2, $t6 + fldx.d $fa1, $a1, $t6 fcmp.clt.d $fcc0, $fa1, $fa0 fsel $fa0, $fa0, $fa1, $fcc0 addi.d $t4, $t4, -1 @@ -115790,7 +115782,7 @@ _ZN18ReductionEvaluatorI15InlineKernelTagE8evaluateId11FnMinAssign6EngineILi3Ed9 # in Loop: Header=BB544_6 Depth=1 addi.w $t1, $t1, 1 add.w $t0, $t0, $a7 - bne $t1, $a1, .LBB544_6 + bne $t1, $a2, .LBB544_6 .LBB544_11: # %._crit_edge fst.d $fa0, $fp, 0 pcaddu18i $ra, %call36(_ZN12DataBlockPtrIdLb0EED2Ev) diff --git a/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/functionobjects.dir/functionobjects.s b/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/functionobjects.dir/functionobjects.s index 75b57080..1edbc01e 100644 --- a/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/functionobjects.dir/functionobjects.s +++ b/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/functionobjects.dir/functionobjects.s @@ -62,12 +62,7 @@ _Z13record_resultdPKc: # @_Z13record_resultdPKc .Lfunc_end0: .size _Z13record_resultdPKc, .Lfunc_end0-_Z13record_resultdPKc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z9summarizePKciiii -.LCPI1_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl _Z9summarizePKciiii + .globl _Z9summarizePKciiii # -- Begin function _Z9summarizePKciiii .p2align 5 .type _Z9summarizePKciiii,@function _Z9summarizePKciiii: # @_Z9summarizePKciiii @@ -135,14 +130,16 @@ _Z9summarizePKciiii: # @_Z9summarizePKciiii # %bb.5: # %.lr.ph45.preheader st.d $s0, $sp, 8 # 8-byte Folded Spill st.d $s5, $sp, 16 # 8-byte Folded Spill + ori $a0, $zero, 0 movgr2fr.w $fa0, $s3 movgr2fr.w $fa1, $s2 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI1_0) + lu32i.d $a0, -97152 ffint.d.w $fa0, $fa0 ffint.d.w $fa1, $fa1 + lu52i.d $a0, $a0, 1042 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fs1, $fa0, $fa2 + movgr2fr.d $fa1, $a0 + fdiv.d $fs1, $fa0, $fa1 pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $s2, $a0, %pc_lo12(.L.str.4) pcalau12i $a0, %pc_hi20(.L.str.5) @@ -409,12 +406,7 @@ _Z11start_timerv: # @_Z11start_timerv .Lfunc_end3: .size _Z11start_timerv, .Lfunc_end3-_Z11start_timerv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z5timerv -.LCPI4_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl _Z5timerv + .globl _Z5timerv # -- Begin function _Z5timerv .p2align 5 .type _Z5timerv,@function _Z5timerv: # @_Z5timerv @@ -426,12 +418,14 @@ _Z5timerv: # @_Z5timerv pcalau12i $a1, %pc_hi20(start_time) ld.d $a1, $a1, %pc_lo12(start_time) pcalau12i $a2, %pc_hi20(end_time) - pcalau12i $a3, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI4_0) sub.d $a1, $a0, $a1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 st.d $a0, $a2, %pc_lo12(end_time) ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 diff --git a/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/loop_unroll.dir/loop_unroll.s b/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/loop_unroll.dir/loop_unroll.s index 8e324c02..672875f8 100644 --- a/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/loop_unroll.dir/loop_unroll.s +++ b/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/loop_unroll.dir/loop_unroll.s @@ -62,12 +62,7 @@ _Z13record_resultdPKc: # @_Z13record_resultdPKc .Lfunc_end0: .size _Z13record_resultdPKc, .Lfunc_end0-_Z13record_resultdPKc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z9summarizePKciiii -.LCPI1_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl _Z9summarizePKciiii + .globl _Z9summarizePKciiii # -- Begin function _Z9summarizePKciiii .p2align 5 .type _Z9summarizePKciiii,@function _Z9summarizePKciiii: # @_Z9summarizePKciiii @@ -135,14 +130,16 @@ _Z9summarizePKciiii: # @_Z9summarizePKciiii # %bb.5: # %.lr.ph45.preheader st.d $s0, $sp, 8 # 8-byte Folded Spill st.d $s5, $sp, 16 # 8-byte Folded Spill + ori $a0, $zero, 0 movgr2fr.w $fa0, $s3 movgr2fr.w $fa1, $s2 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI1_0) + lu32i.d $a0, -97152 ffint.d.w $fa0, $fa0 ffint.d.w $fa1, $fa1 + lu52i.d $a0, $a0, 1042 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fs1, $fa0, $fa2 + movgr2fr.d $fa1, $a0 + fdiv.d $fs1, $fa0, $fa1 pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $s2, $a0, %pc_lo12(.L.str.4) pcalau12i $a0, %pc_hi20(.L.str.5) @@ -409,12 +406,7 @@ _Z11start_timerv: # @_Z11start_timerv .Lfunc_end3: .size _Z11start_timerv, .Lfunc_end3-_Z11start_timerv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z5timerv -.LCPI4_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl _Z5timerv + .globl _Z5timerv # -- Begin function _Z5timerv .p2align 5 .type _Z5timerv,@function _Z5timerv: # @_Z5timerv @@ -426,12 +418,14 @@ _Z5timerv: # @_Z5timerv pcalau12i $a1, %pc_hi20(start_time) ld.d $a1, $a1, %pc_lo12(start_time) pcalau12i $a2, %pc_hi20(end_time) - pcalau12i $a3, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI4_0) sub.d $a1, $a0, $a1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 st.d $a0, $a2, %pc_lo12(end_time) ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 @@ -756,12 +750,8 @@ main: # @main .size main, .Lfunc_end5-main .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi32EiEvPKT0_iPKc -.LCPI6_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi32EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi32EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi32EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi32EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi32EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi32EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi32EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi32EiEvPKT0_iPKc @@ -1179,12 +1169,14 @@ _Z27test_for_loop_unroll_factorILi32EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB6_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI6_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI6_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -1218,12 +1210,8 @@ _Z27test_for_loop_unroll_factorILi32EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi32EiEvPKT0_iPKc, .Lfunc_end6-_Z27test_for_loop_unroll_factorILi32EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi31EiEvPKT0_iPKc -.LCPI7_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi31EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi31EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi31EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi31EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi31EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi31EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi31EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi31EiEvPKT0_iPKc @@ -1645,12 +1633,14 @@ _Z27test_for_loop_unroll_factorILi31EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB7_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI7_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -1684,12 +1674,8 @@ _Z27test_for_loop_unroll_factorILi31EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi31EiEvPKT0_iPKc, .Lfunc_end7-_Z27test_for_loop_unroll_factorILi31EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi30EiEvPKT0_iPKc -.LCPI8_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi30EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi30EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi30EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi30EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi30EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi30EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi30EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi30EiEvPKT0_iPKc @@ -2105,12 +2091,14 @@ _Z27test_for_loop_unroll_factorILi30EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB8_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI8_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI8_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -2223,12 +2211,8 @@ _ZN14for_loop_testsILi28EiE7do_testEPKiPKc: # @_ZN14for_loop_testsILi28EiE7do_te .size _ZN14for_loop_testsILi28EiE7do_testEPKiPKc, .Lfunc_end9-_ZN14for_loop_testsILi28EiE7do_testEPKiPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi29EiEvPKT0_iPKc -.LCPI10_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi29EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi29EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi29EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi29EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi29EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi29EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi29EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi29EiEvPKT0_iPKc @@ -2645,12 +2629,14 @@ _Z27test_for_loop_unroll_factorILi29EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB10_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI10_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI10_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -2684,12 +2670,8 @@ _Z27test_for_loop_unroll_factorILi29EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi29EiEvPKT0_iPKc, .Lfunc_end10-_Z27test_for_loop_unroll_factorILi29EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi28EiEvPKT0_iPKc -.LCPI11_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi28EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi28EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi28EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi28EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi28EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi28EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi28EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi28EiEvPKT0_iPKc @@ -3102,12 +3084,14 @@ _Z27test_for_loop_unroll_factorILi28EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB11_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI11_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI11_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -3141,12 +3125,8 @@ _Z27test_for_loop_unroll_factorILi28EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi28EiEvPKT0_iPKc, .Lfunc_end11-_Z27test_for_loop_unroll_factorILi28EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi27EiEvPKT0_iPKc -.LCPI12_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi27EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi27EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi27EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi27EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi27EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi27EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi27EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi27EiEvPKT0_iPKc @@ -3556,12 +3536,14 @@ _Z27test_for_loop_unroll_factorILi27EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB12_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI12_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI12_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -3595,12 +3577,8 @@ _Z27test_for_loop_unroll_factorILi27EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi27EiEvPKT0_iPKc, .Lfunc_end12-_Z27test_for_loop_unroll_factorILi27EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi26EiEvPKT0_iPKc -.LCPI13_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi26EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi26EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi26EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi26EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi26EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi26EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi26EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi26EiEvPKT0_iPKc @@ -4004,12 +3982,14 @@ _Z27test_for_loop_unroll_factorILi26EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB13_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI13_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI13_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -4043,12 +4023,8 @@ _Z27test_for_loop_unroll_factorILi26EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi26EiEvPKT0_iPKc, .Lfunc_end13-_Z27test_for_loop_unroll_factorILi26EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi25EiEvPKT0_iPKc -.LCPI14_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi25EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi25EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi25EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi25EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi25EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi25EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi25EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi25EiEvPKT0_iPKc @@ -4457,12 +4433,14 @@ _Z27test_for_loop_unroll_factorILi25EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB14_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI14_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI14_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -4496,12 +4474,8 @@ _Z27test_for_loop_unroll_factorILi25EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi25EiEvPKT0_iPKc, .Lfunc_end14-_Z27test_for_loop_unroll_factorILi25EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi24EiEvPKT0_iPKc -.LCPI15_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi24EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi24EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi24EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi24EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi24EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi24EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi24EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi24EiEvPKT0_iPKc @@ -4905,12 +4879,14 @@ _Z27test_for_loop_unroll_factorILi24EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB15_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI15_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI15_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -4944,12 +4920,8 @@ _Z27test_for_loop_unroll_factorILi24EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi24EiEvPKT0_iPKc, .Lfunc_end15-_Z27test_for_loop_unroll_factorILi24EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi23EiEvPKT0_iPKc -.LCPI16_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi23EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi23EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi23EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi23EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi23EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi23EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi23EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi23EiEvPKT0_iPKc @@ -5354,12 +5326,14 @@ _Z27test_for_loop_unroll_factorILi23EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB16_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI16_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI16_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -5393,12 +5367,8 @@ _Z27test_for_loop_unroll_factorILi23EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi23EiEvPKT0_iPKc, .Lfunc_end16-_Z27test_for_loop_unroll_factorILi23EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi22EiEvPKT0_iPKc -.LCPI17_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi22EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi22EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi22EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi22EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi22EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi22EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi22EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi22EiEvPKT0_iPKc @@ -5798,12 +5768,14 @@ _Z27test_for_loop_unroll_factorILi22EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB17_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI17_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI17_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -5837,12 +5809,8 @@ _Z27test_for_loop_unroll_factorILi22EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi22EiEvPKT0_iPKc, .Lfunc_end17-_Z27test_for_loop_unroll_factorILi22EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi21EiEvPKT0_iPKc -.LCPI18_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi21EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi21EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi21EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi21EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi21EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi21EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi21EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi21EiEvPKT0_iPKc @@ -6243,12 +6211,14 @@ _Z27test_for_loop_unroll_factorILi21EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB18_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI18_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI18_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -6360,12 +6330,8 @@ _ZN14for_loop_testsILi19EiE7do_testEPKiPKc: # @_ZN14for_loop_testsILi19EiE7do_te .size _ZN14for_loop_testsILi19EiE7do_testEPKiPKc, .Lfunc_end19-_ZN14for_loop_testsILi19EiE7do_testEPKiPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi20EiEvPKT0_iPKc -.LCPI20_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi20EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi20EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi20EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi20EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi20EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi20EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi20EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi20EiEvPKT0_iPKc @@ -6774,12 +6740,14 @@ _Z27test_for_loop_unroll_factorILi20EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB20_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI20_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI20_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -6813,12 +6781,8 @@ _Z27test_for_loop_unroll_factorILi20EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi20EiEvPKT0_iPKc, .Lfunc_end20-_Z27test_for_loop_unroll_factorILi20EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi19EiEvPKT0_iPKc -.LCPI21_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi19EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi19EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi19EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi19EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi19EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi19EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi19EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi19EiEvPKT0_iPKc @@ -7223,12 +7187,14 @@ _Z27test_for_loop_unroll_factorILi19EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB21_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI21_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI21_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -7262,12 +7228,8 @@ _Z27test_for_loop_unroll_factorILi19EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi19EiEvPKT0_iPKc, .Lfunc_end21-_Z27test_for_loop_unroll_factorILi19EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi18EiEvPKT0_iPKc -.LCPI22_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi18EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi18EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi18EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi18EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi18EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi18EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi18EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi18EiEvPKT0_iPKc @@ -7668,12 +7630,14 @@ _Z27test_for_loop_unroll_factorILi18EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB22_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI22_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI22_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -7707,12 +7671,8 @@ _Z27test_for_loop_unroll_factorILi18EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi18EiEvPKT0_iPKc, .Lfunc_end22-_Z27test_for_loop_unroll_factorILi18EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi17EiEvPKT0_iPKc -.LCPI23_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi17EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi17EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi17EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi17EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi17EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi17EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi17EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi17EiEvPKT0_iPKc @@ -8111,12 +8071,14 @@ _Z27test_for_loop_unroll_factorILi17EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB23_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI23_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI23_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -8150,12 +8112,8 @@ _Z27test_for_loop_unroll_factorILi17EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi17EiEvPKT0_iPKc, .Lfunc_end23-_Z27test_for_loop_unroll_factorILi17EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi16EiEvPKT0_iPKc -.LCPI24_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi16EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi16EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi16EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi16EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi16EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi16EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi16EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi16EiEvPKT0_iPKc @@ -8495,12 +8453,14 @@ _Z27test_for_loop_unroll_factorILi16EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB24_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI24_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI24_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -8534,12 +8494,8 @@ _Z27test_for_loop_unroll_factorILi16EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi16EiEvPKT0_iPKc, .Lfunc_end24-_Z27test_for_loop_unroll_factorILi16EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi15EiEvPKT0_iPKc -.LCPI25_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi15EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi15EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi15EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi15EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi15EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi15EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi15EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi15EiEvPKT0_iPKc @@ -8878,12 +8834,14 @@ _Z27test_for_loop_unroll_factorILi15EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB25_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI25_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI25_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -8917,12 +8875,8 @@ _Z27test_for_loop_unroll_factorILi15EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi15EiEvPKT0_iPKc, .Lfunc_end25-_Z27test_for_loop_unroll_factorILi15EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi14EiEvPKT0_iPKc -.LCPI26_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi14EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi14EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi14EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi14EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi14EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi14EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi14EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi14EiEvPKT0_iPKc @@ -9257,12 +9211,14 @@ _Z27test_for_loop_unroll_factorILi14EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB26_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI26_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI26_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -9296,12 +9252,8 @@ _Z27test_for_loop_unroll_factorILi14EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi14EiEvPKT0_iPKc, .Lfunc_end26-_Z27test_for_loop_unroll_factorILi14EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi13EiEvPKT0_iPKc -.LCPI27_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi13EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi13EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi13EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi13EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi13EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi13EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi13EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi13EiEvPKT0_iPKc @@ -9628,12 +9580,14 @@ _Z27test_for_loop_unroll_factorILi13EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB27_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI27_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI27_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -9667,12 +9621,8 @@ _Z27test_for_loop_unroll_factorILi13EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi13EiEvPKT0_iPKc, .Lfunc_end27-_Z27test_for_loop_unroll_factorILi13EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi12EiEvPKT0_iPKc -.LCPI28_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi12EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi12EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi12EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi12EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi12EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi12EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi12EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi12EiEvPKT0_iPKc @@ -10001,12 +9951,14 @@ _Z27test_for_loop_unroll_factorILi12EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB28_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI28_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI28_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -10121,12 +10073,8 @@ _ZN14for_loop_testsILi10EiE7do_testEPKiPKc: # @_ZN14for_loop_testsILi10EiE7do_te .size _ZN14for_loop_testsILi10EiE7do_testEPKiPKc, .Lfunc_end29-_ZN14for_loop_testsILi10EiE7do_testEPKiPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi11EiEvPKT0_iPKc -.LCPI30_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi11EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi11EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi11EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi11EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi11EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi11EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi11EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi11EiEvPKT0_iPKc @@ -10453,12 +10401,14 @@ _Z27test_for_loop_unroll_factorILi11EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB30_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI30_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI30_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -10492,12 +10442,8 @@ _Z27test_for_loop_unroll_factorILi11EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi11EiEvPKT0_iPKc, .Lfunc_end30-_Z27test_for_loop_unroll_factorILi11EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi10EiEvPKT0_iPKc -.LCPI31_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi10EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi10EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi10EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi10EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi10EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi10EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi10EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi10EiEvPKT0_iPKc @@ -10822,12 +10768,14 @@ _Z27test_for_loop_unroll_factorILi10EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB31_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI31_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI31_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -10861,12 +10809,8 @@ _Z27test_for_loop_unroll_factorILi10EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi10EiEvPKT0_iPKc, .Lfunc_end31-_Z27test_for_loop_unroll_factorILi10EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi9EiEvPKT0_iPKc -.LCPI32_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi9EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi9EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi9EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi9EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi9EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi9EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi9EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi9EiEvPKT0_iPKc @@ -11191,12 +11135,14 @@ _Z27test_for_loop_unroll_factorILi9EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB32_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI32_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI32_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -11230,12 +11176,8 @@ _Z27test_for_loop_unroll_factorILi9EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi9EiEvPKT0_iPKc, .Lfunc_end32-_Z27test_for_loop_unroll_factorILi9EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi8EiEvPKT0_iPKc -.LCPI33_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi8EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi8EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi8EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi8EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi8EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi8EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi8EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi8EiEvPKT0_iPKc @@ -11554,12 +11496,14 @@ _Z27test_for_loop_unroll_factorILi8EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB33_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI33_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI33_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -11593,12 +11537,8 @@ _Z27test_for_loop_unroll_factorILi8EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi8EiEvPKT0_iPKc, .Lfunc_end33-_Z27test_for_loop_unroll_factorILi8EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi7EiEvPKT0_iPKc -.LCPI34_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi7EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi7EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi7EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi7EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi7EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi7EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi7EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi7EiEvPKT0_iPKc @@ -11921,12 +11861,14 @@ _Z27test_for_loop_unroll_factorILi7EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB34_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI34_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI34_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -11960,12 +11902,8 @@ _Z27test_for_loop_unroll_factorILi7EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi7EiEvPKT0_iPKc, .Lfunc_end34-_Z27test_for_loop_unroll_factorILi7EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi6EiEvPKT0_iPKc -.LCPI35_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi6EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi6EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi6EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi6EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi6EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi6EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi6EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi6EiEvPKT0_iPKc @@ -12282,12 +12220,14 @@ _Z27test_for_loop_unroll_factorILi6EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB35_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI35_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI35_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -12321,12 +12261,8 @@ _Z27test_for_loop_unroll_factorILi6EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi6EiEvPKT0_iPKc, .Lfunc_end35-_Z27test_for_loop_unroll_factorILi6EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi5EiEvPKT0_iPKc -.LCPI36_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi5EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi5EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi5EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi5EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi5EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi5EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi5EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi5EiEvPKT0_iPKc @@ -12791,12 +12727,14 @@ _Z27test_for_loop_unroll_factorILi5EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB36_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI36_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI36_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -12830,12 +12768,8 @@ _Z27test_for_loop_unroll_factorILi5EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi5EiEvPKT0_iPKc, .Lfunc_end36-_Z27test_for_loop_unroll_factorILi5EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi4EiEvPKT0_iPKc -.LCPI37_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi4EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi4EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi4EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi4EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi4EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi4EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi4EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi4EiEvPKT0_iPKc @@ -13272,12 +13206,14 @@ _Z27test_for_loop_unroll_factorILi4EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB37_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI37_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI37_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -13311,12 +13247,8 @@ _Z27test_for_loop_unroll_factorILi4EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi4EiEvPKT0_iPKc, .Lfunc_end37-_Z27test_for_loop_unroll_factorILi4EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi3EiEvPKT0_iPKc -.LCPI38_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi3EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi3EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi3EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi3EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi3EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi3EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi3EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi3EiEvPKT0_iPKc @@ -13741,12 +13673,14 @@ _Z27test_for_loop_unroll_factorILi3EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB38_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI38_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI38_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -13780,12 +13714,8 @@ _Z27test_for_loop_unroll_factorILi3EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi3EiEvPKT0_iPKc, .Lfunc_end38-_Z27test_for_loop_unroll_factorILi3EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi2EiEvPKT0_iPKc -.LCPI39_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi2EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi2EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi2EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi2EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi2EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi2EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi2EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi2EiEvPKT0_iPKc @@ -14167,12 +14097,14 @@ _Z27test_for_loop_unroll_factorILi2EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB39_34: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI39_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI39_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -14206,12 +14138,8 @@ _Z27test_for_loop_unroll_factorILi2EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi2EiEvPKT0_iPKc, .Lfunc_end39-_Z27test_for_loop_unroll_factorILi2EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi1EiEvPKT0_iPKc -.LCPI40_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi1EiEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi1EiEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi1EiEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi1EiEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi1EiEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi1EiEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi1EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi1EiEvPKT0_iPKc @@ -14421,12 +14349,14 @@ _Z27test_for_loop_unroll_factorILi1EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa ld.w $a0, $s3, %pc_lo12(current_test) .LBB40_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI40_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI40_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -14459,12 +14389,8 @@ _Z27test_for_loop_unroll_factorILi1EiEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi1EiEvPKT0_iPKc, .Lfunc_end40-_Z27test_for_loop_unroll_factorILi1EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi32EiEvPKT0_iPKc -.LCPI41_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi32EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi32EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi32EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi32EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi32EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi32EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi32EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi32EiEvPKT0_iPKc @@ -14882,12 +14808,14 @@ _Z29test_while_loop_unroll_factorILi32EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB41_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI41_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI41_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -14921,12 +14849,8 @@ _Z29test_while_loop_unroll_factorILi32EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi32EiEvPKT0_iPKc, .Lfunc_end41-_Z29test_while_loop_unroll_factorILi32EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi31EiEvPKT0_iPKc -.LCPI42_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi31EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi31EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi31EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi31EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi31EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi31EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi31EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi31EiEvPKT0_iPKc @@ -15348,12 +15272,14 @@ _Z29test_while_loop_unroll_factorILi31EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB42_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI42_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI42_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -15387,12 +15313,8 @@ _Z29test_while_loop_unroll_factorILi31EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi31EiEvPKT0_iPKc, .Lfunc_end42-_Z29test_while_loop_unroll_factorILi31EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi30EiEvPKT0_iPKc -.LCPI43_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi30EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi30EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi30EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi30EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi30EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi30EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi30EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi30EiEvPKT0_iPKc @@ -15808,12 +15730,14 @@ _Z29test_while_loop_unroll_factorILi30EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB43_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI43_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI43_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -15926,12 +15850,8 @@ _ZN16while_loop_testsILi28EiE7do_testEPKiPKc: # @_ZN16while_loop_testsILi28EiE7d .size _ZN16while_loop_testsILi28EiE7do_testEPKiPKc, .Lfunc_end44-_ZN16while_loop_testsILi28EiE7do_testEPKiPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi29EiEvPKT0_iPKc -.LCPI45_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi29EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi29EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi29EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi29EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi29EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi29EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi29EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi29EiEvPKT0_iPKc @@ -16348,12 +16268,14 @@ _Z29test_while_loop_unroll_factorILi29EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB45_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI45_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI45_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -16387,12 +16309,8 @@ _Z29test_while_loop_unroll_factorILi29EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi29EiEvPKT0_iPKc, .Lfunc_end45-_Z29test_while_loop_unroll_factorILi29EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi28EiEvPKT0_iPKc -.LCPI46_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi28EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi28EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi28EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi28EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi28EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi28EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi28EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi28EiEvPKT0_iPKc @@ -16805,12 +16723,14 @@ _Z29test_while_loop_unroll_factorILi28EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB46_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI46_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI46_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -16844,12 +16764,8 @@ _Z29test_while_loop_unroll_factorILi28EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi28EiEvPKT0_iPKc, .Lfunc_end46-_Z29test_while_loop_unroll_factorILi28EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi27EiEvPKT0_iPKc -.LCPI47_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi27EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi27EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi27EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi27EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi27EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi27EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi27EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi27EiEvPKT0_iPKc @@ -17259,12 +17175,14 @@ _Z29test_while_loop_unroll_factorILi27EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB47_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI47_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI47_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -17298,12 +17216,8 @@ _Z29test_while_loop_unroll_factorILi27EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi27EiEvPKT0_iPKc, .Lfunc_end47-_Z29test_while_loop_unroll_factorILi27EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi26EiEvPKT0_iPKc -.LCPI48_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi26EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi26EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi26EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi26EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi26EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi26EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi26EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi26EiEvPKT0_iPKc @@ -17707,12 +17621,14 @@ _Z29test_while_loop_unroll_factorILi26EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB48_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI48_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI48_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -17746,12 +17662,8 @@ _Z29test_while_loop_unroll_factorILi26EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi26EiEvPKT0_iPKc, .Lfunc_end48-_Z29test_while_loop_unroll_factorILi26EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi25EiEvPKT0_iPKc -.LCPI49_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi25EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi25EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi25EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi25EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi25EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi25EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi25EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi25EiEvPKT0_iPKc @@ -18160,12 +18072,14 @@ _Z29test_while_loop_unroll_factorILi25EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB49_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI49_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI49_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -18199,12 +18113,8 @@ _Z29test_while_loop_unroll_factorILi25EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi25EiEvPKT0_iPKc, .Lfunc_end49-_Z29test_while_loop_unroll_factorILi25EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi24EiEvPKT0_iPKc -.LCPI50_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi24EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi24EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi24EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi24EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi24EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi24EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi24EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi24EiEvPKT0_iPKc @@ -18608,12 +18518,14 @@ _Z29test_while_loop_unroll_factorILi24EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB50_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI50_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI50_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -18647,12 +18559,8 @@ _Z29test_while_loop_unroll_factorILi24EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi24EiEvPKT0_iPKc, .Lfunc_end50-_Z29test_while_loop_unroll_factorILi24EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi23EiEvPKT0_iPKc -.LCPI51_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi23EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi23EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi23EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi23EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi23EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi23EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi23EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi23EiEvPKT0_iPKc @@ -19057,12 +18965,14 @@ _Z29test_while_loop_unroll_factorILi23EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB51_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI51_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI51_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -19096,12 +19006,8 @@ _Z29test_while_loop_unroll_factorILi23EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi23EiEvPKT0_iPKc, .Lfunc_end51-_Z29test_while_loop_unroll_factorILi23EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi22EiEvPKT0_iPKc -.LCPI52_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi22EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi22EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi22EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi22EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi22EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi22EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi22EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi22EiEvPKT0_iPKc @@ -19501,12 +19407,14 @@ _Z29test_while_loop_unroll_factorILi22EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB52_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI52_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI52_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -19540,12 +19448,8 @@ _Z29test_while_loop_unroll_factorILi22EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi22EiEvPKT0_iPKc, .Lfunc_end52-_Z29test_while_loop_unroll_factorILi22EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi21EiEvPKT0_iPKc -.LCPI53_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi21EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi21EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi21EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi21EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi21EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi21EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi21EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi21EiEvPKT0_iPKc @@ -19946,12 +19850,14 @@ _Z29test_while_loop_unroll_factorILi21EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB53_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI53_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI53_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -20063,12 +19969,8 @@ _ZN16while_loop_testsILi19EiE7do_testEPKiPKc: # @_ZN16while_loop_testsILi19EiE7d .size _ZN16while_loop_testsILi19EiE7do_testEPKiPKc, .Lfunc_end54-_ZN16while_loop_testsILi19EiE7do_testEPKiPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi20EiEvPKT0_iPKc -.LCPI55_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi20EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi20EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi20EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi20EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi20EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi20EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi20EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi20EiEvPKT0_iPKc @@ -20477,12 +20379,14 @@ _Z29test_while_loop_unroll_factorILi20EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB55_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI55_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI55_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -20516,12 +20420,8 @@ _Z29test_while_loop_unroll_factorILi20EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi20EiEvPKT0_iPKc, .Lfunc_end55-_Z29test_while_loop_unroll_factorILi20EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi19EiEvPKT0_iPKc -.LCPI56_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi19EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi19EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi19EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi19EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi19EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi19EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi19EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi19EiEvPKT0_iPKc @@ -20926,12 +20826,14 @@ _Z29test_while_loop_unroll_factorILi19EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB56_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI56_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI56_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -20965,12 +20867,8 @@ _Z29test_while_loop_unroll_factorILi19EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi19EiEvPKT0_iPKc, .Lfunc_end56-_Z29test_while_loop_unroll_factorILi19EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi18EiEvPKT0_iPKc -.LCPI57_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi18EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi18EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi18EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi18EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi18EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi18EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi18EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi18EiEvPKT0_iPKc @@ -21371,12 +21269,14 @@ _Z29test_while_loop_unroll_factorILi18EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB57_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI57_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI57_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -21410,12 +21310,8 @@ _Z29test_while_loop_unroll_factorILi18EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi18EiEvPKT0_iPKc, .Lfunc_end57-_Z29test_while_loop_unroll_factorILi18EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi17EiEvPKT0_iPKc -.LCPI58_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi17EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi17EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi17EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi17EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi17EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi17EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi17EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi17EiEvPKT0_iPKc @@ -21814,12 +21710,14 @@ _Z29test_while_loop_unroll_factorILi17EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB58_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI58_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI58_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -21853,12 +21751,8 @@ _Z29test_while_loop_unroll_factorILi17EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi17EiEvPKT0_iPKc, .Lfunc_end58-_Z29test_while_loop_unroll_factorILi17EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi16EiEvPKT0_iPKc -.LCPI59_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi16EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi16EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi16EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi16EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi16EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi16EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi16EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi16EiEvPKT0_iPKc @@ -22198,12 +22092,14 @@ _Z29test_while_loop_unroll_factorILi16EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB59_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI59_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI59_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -22237,12 +22133,8 @@ _Z29test_while_loop_unroll_factorILi16EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi16EiEvPKT0_iPKc, .Lfunc_end59-_Z29test_while_loop_unroll_factorILi16EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi15EiEvPKT0_iPKc -.LCPI60_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi15EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi15EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi15EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi15EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi15EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi15EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi15EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi15EiEvPKT0_iPKc @@ -22581,12 +22473,14 @@ _Z29test_while_loop_unroll_factorILi15EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB60_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI60_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI60_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -22620,12 +22514,8 @@ _Z29test_while_loop_unroll_factorILi15EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi15EiEvPKT0_iPKc, .Lfunc_end60-_Z29test_while_loop_unroll_factorILi15EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi14EiEvPKT0_iPKc -.LCPI61_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi14EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi14EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi14EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi14EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi14EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi14EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi14EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi14EiEvPKT0_iPKc @@ -22960,12 +22850,14 @@ _Z29test_while_loop_unroll_factorILi14EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB61_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI61_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI61_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -22999,12 +22891,8 @@ _Z29test_while_loop_unroll_factorILi14EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi14EiEvPKT0_iPKc, .Lfunc_end61-_Z29test_while_loop_unroll_factorILi14EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi13EiEvPKT0_iPKc -.LCPI62_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi13EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi13EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi13EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi13EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi13EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi13EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi13EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi13EiEvPKT0_iPKc @@ -23331,12 +23219,14 @@ _Z29test_while_loop_unroll_factorILi13EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB62_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI62_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI62_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -23370,12 +23260,8 @@ _Z29test_while_loop_unroll_factorILi13EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi13EiEvPKT0_iPKc, .Lfunc_end62-_Z29test_while_loop_unroll_factorILi13EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi12EiEvPKT0_iPKc -.LCPI63_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi12EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi12EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi12EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi12EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi12EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi12EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi12EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi12EiEvPKT0_iPKc @@ -23704,12 +23590,14 @@ _Z29test_while_loop_unroll_factorILi12EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB63_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI63_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI63_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -23824,12 +23712,8 @@ _ZN16while_loop_testsILi10EiE7do_testEPKiPKc: # @_ZN16while_loop_testsILi10EiE7d .size _ZN16while_loop_testsILi10EiE7do_testEPKiPKc, .Lfunc_end64-_ZN16while_loop_testsILi10EiE7do_testEPKiPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi11EiEvPKT0_iPKc -.LCPI65_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi11EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi11EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi11EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi11EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi11EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi11EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi11EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi11EiEvPKT0_iPKc @@ -24156,12 +24040,14 @@ _Z29test_while_loop_unroll_factorILi11EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB65_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI65_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI65_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -24195,12 +24081,8 @@ _Z29test_while_loop_unroll_factorILi11EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi11EiEvPKT0_iPKc, .Lfunc_end65-_Z29test_while_loop_unroll_factorILi11EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi10EiEvPKT0_iPKc -.LCPI66_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi10EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi10EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi10EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi10EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi10EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi10EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi10EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi10EiEvPKT0_iPKc @@ -24525,12 +24407,14 @@ _Z29test_while_loop_unroll_factorILi10EiEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB66_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI66_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI66_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -24564,12 +24448,8 @@ _Z29test_while_loop_unroll_factorILi10EiEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi10EiEvPKT0_iPKc, .Lfunc_end66-_Z29test_while_loop_unroll_factorILi10EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi9EiEvPKT0_iPKc -.LCPI67_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi9EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi9EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi9EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi9EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi9EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi9EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi9EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi9EiEvPKT0_iPKc @@ -24894,12 +24774,14 @@ _Z29test_while_loop_unroll_factorILi9EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB67_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI67_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI67_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -24933,12 +24815,8 @@ _Z29test_while_loop_unroll_factorILi9EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi9EiEvPKT0_iPKc, .Lfunc_end67-_Z29test_while_loop_unroll_factorILi9EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi8EiEvPKT0_iPKc -.LCPI68_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi8EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi8EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi8EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi8EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi8EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi8EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi8EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi8EiEvPKT0_iPKc @@ -25257,12 +25135,14 @@ _Z29test_while_loop_unroll_factorILi8EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB68_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI68_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI68_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -25296,12 +25176,8 @@ _Z29test_while_loop_unroll_factorILi8EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi8EiEvPKT0_iPKc, .Lfunc_end68-_Z29test_while_loop_unroll_factorILi8EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi7EiEvPKT0_iPKc -.LCPI69_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi7EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi7EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi7EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi7EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi7EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi7EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi7EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi7EiEvPKT0_iPKc @@ -25624,12 +25500,14 @@ _Z29test_while_loop_unroll_factorILi7EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB69_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI69_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI69_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -25663,12 +25541,8 @@ _Z29test_while_loop_unroll_factorILi7EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi7EiEvPKT0_iPKc, .Lfunc_end69-_Z29test_while_loop_unroll_factorILi7EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi6EiEvPKT0_iPKc -.LCPI70_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi6EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi6EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi6EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi6EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi6EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi6EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi6EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi6EiEvPKT0_iPKc @@ -25985,12 +25859,14 @@ _Z29test_while_loop_unroll_factorILi6EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB70_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI70_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI70_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -26024,12 +25900,8 @@ _Z29test_while_loop_unroll_factorILi6EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi6EiEvPKT0_iPKc, .Lfunc_end70-_Z29test_while_loop_unroll_factorILi6EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi5EiEvPKT0_iPKc -.LCPI71_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi5EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi5EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi5EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi5EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi5EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi5EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi5EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi5EiEvPKT0_iPKc @@ -26494,12 +26366,14 @@ _Z29test_while_loop_unroll_factorILi5EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB71_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI71_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI71_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -26533,12 +26407,8 @@ _Z29test_while_loop_unroll_factorILi5EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi5EiEvPKT0_iPKc, .Lfunc_end71-_Z29test_while_loop_unroll_factorILi5EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi4EiEvPKT0_iPKc -.LCPI72_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi4EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi4EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi4EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi4EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi4EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi4EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi4EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi4EiEvPKT0_iPKc @@ -26975,12 +26845,14 @@ _Z29test_while_loop_unroll_factorILi4EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB72_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI72_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI72_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -27014,12 +26886,8 @@ _Z29test_while_loop_unroll_factorILi4EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi4EiEvPKT0_iPKc, .Lfunc_end72-_Z29test_while_loop_unroll_factorILi4EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi3EiEvPKT0_iPKc -.LCPI73_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi3EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi3EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi3EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi3EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi3EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi3EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi3EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi3EiEvPKT0_iPKc @@ -27444,12 +27312,14 @@ _Z29test_while_loop_unroll_factorILi3EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB73_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI73_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI73_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -27483,12 +27353,8 @@ _Z29test_while_loop_unroll_factorILi3EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi3EiEvPKT0_iPKc, .Lfunc_end73-_Z29test_while_loop_unroll_factorILi3EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi2EiEvPKT0_iPKc -.LCPI74_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi2EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi2EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi2EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi2EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi2EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi2EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi2EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi2EiEvPKT0_iPKc @@ -27870,12 +27736,14 @@ _Z29test_while_loop_unroll_factorILi2EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB74_34: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI74_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI74_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -27909,12 +27777,8 @@ _Z29test_while_loop_unroll_factorILi2EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi2EiEvPKT0_iPKc, .Lfunc_end74-_Z29test_while_loop_unroll_factorILi2EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi1EiEvPKT0_iPKc -.LCPI75_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi1EiEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi1EiEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi1EiEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi1EiEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi1EiEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi1EiEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi1EiEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi1EiEvPKT0_iPKc @@ -28124,12 +27988,14 @@ _Z29test_while_loop_unroll_factorILi1EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol ld.w $a0, $s3, %pc_lo12(current_test) .LBB75_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI75_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI75_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -28162,12 +28028,8 @@ _Z29test_while_loop_unroll_factorILi1EiEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi1EiEvPKT0_iPKc, .Lfunc_end75-_Z29test_while_loop_unroll_factorILi1EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi32EiEvPKT0_iPKc -.LCPI76_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi32EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi32EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi32EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi32EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi32EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi32EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi32EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi32EiEvPKT0_iPKc @@ -28582,12 +28444,14 @@ _Z26test_do_loop_unroll_factorILi32EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB76_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI76_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI76_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -28621,12 +28485,8 @@ _Z26test_do_loop_unroll_factorILi32EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi32EiEvPKT0_iPKc, .Lfunc_end76-_Z26test_do_loop_unroll_factorILi32EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi31EiEvPKT0_iPKc -.LCPI77_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi31EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi31EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi31EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi31EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi31EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi31EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi31EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi31EiEvPKT0_iPKc @@ -29044,12 +28904,14 @@ _Z26test_do_loop_unroll_factorILi31EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB77_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI77_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI77_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -29083,12 +28945,8 @@ _Z26test_do_loop_unroll_factorILi31EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi31EiEvPKT0_iPKc, .Lfunc_end77-_Z26test_do_loop_unroll_factorILi31EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi30EiEvPKT0_iPKc -.LCPI78_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi30EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi30EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi30EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi30EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi30EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi30EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi30EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi30EiEvPKT0_iPKc @@ -29502,12 +29360,14 @@ _Z26test_do_loop_unroll_factorILi30EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB78_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI78_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI78_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -29620,12 +29480,8 @@ _ZN13do_loop_testsILi28EiE7do_testEPKiPKc: # @_ZN13do_loop_testsILi28EiE7do_test .size _ZN13do_loop_testsILi28EiE7do_testEPKiPKc, .Lfunc_end79-_ZN13do_loop_testsILi28EiE7do_testEPKiPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi29EiEvPKT0_iPKc -.LCPI80_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi29EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi29EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi29EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi29EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi29EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi29EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi29EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi29EiEvPKT0_iPKc @@ -30040,12 +29896,14 @@ _Z26test_do_loop_unroll_factorILi29EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB80_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI80_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI80_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -30079,12 +29937,8 @@ _Z26test_do_loop_unroll_factorILi29EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi29EiEvPKT0_iPKc, .Lfunc_end80-_Z26test_do_loop_unroll_factorILi29EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi28EiEvPKT0_iPKc -.LCPI81_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi28EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi28EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi28EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi28EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi28EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi28EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi28EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi28EiEvPKT0_iPKc @@ -30495,12 +30349,14 @@ _Z26test_do_loop_unroll_factorILi28EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB81_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI81_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI81_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -30534,12 +30390,8 @@ _Z26test_do_loop_unroll_factorILi28EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi28EiEvPKT0_iPKc, .Lfunc_end81-_Z26test_do_loop_unroll_factorILi28EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi27EiEvPKT0_iPKc -.LCPI82_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi27EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi27EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi27EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi27EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi27EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi27EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi27EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi27EiEvPKT0_iPKc @@ -30947,12 +30799,14 @@ _Z26test_do_loop_unroll_factorILi27EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB82_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI82_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI82_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -30986,12 +30840,8 @@ _Z26test_do_loop_unroll_factorILi27EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi27EiEvPKT0_iPKc, .Lfunc_end82-_Z26test_do_loop_unroll_factorILi27EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi26EiEvPKT0_iPKc -.LCPI83_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi26EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi26EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi26EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi26EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi26EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi26EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi26EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi26EiEvPKT0_iPKc @@ -31393,12 +31243,14 @@ _Z26test_do_loop_unroll_factorILi26EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB83_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI83_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI83_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -31432,12 +31284,8 @@ _Z26test_do_loop_unroll_factorILi26EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi26EiEvPKT0_iPKc, .Lfunc_end83-_Z26test_do_loop_unroll_factorILi26EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi25EiEvPKT0_iPKc -.LCPI84_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi25EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi25EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi25EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi25EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi25EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi25EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi25EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi25EiEvPKT0_iPKc @@ -31844,12 +31692,14 @@ _Z26test_do_loop_unroll_factorILi25EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB84_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI84_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI84_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -31883,12 +31733,8 @@ _Z26test_do_loop_unroll_factorILi25EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi25EiEvPKT0_iPKc, .Lfunc_end84-_Z26test_do_loop_unroll_factorILi25EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi24EiEvPKT0_iPKc -.LCPI85_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi24EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi24EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi24EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi24EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi24EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi24EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi24EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi24EiEvPKT0_iPKc @@ -32290,12 +32136,14 @@ _Z26test_do_loop_unroll_factorILi24EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB85_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI85_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI85_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -32329,12 +32177,8 @@ _Z26test_do_loop_unroll_factorILi24EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi24EiEvPKT0_iPKc, .Lfunc_end85-_Z26test_do_loop_unroll_factorILi24EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi23EiEvPKT0_iPKc -.LCPI86_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi23EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi23EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi23EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi23EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi23EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi23EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi23EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi23EiEvPKT0_iPKc @@ -32737,12 +32581,14 @@ _Z26test_do_loop_unroll_factorILi23EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB86_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI86_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI86_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -32776,12 +32622,8 @@ _Z26test_do_loop_unroll_factorILi23EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi23EiEvPKT0_iPKc, .Lfunc_end86-_Z26test_do_loop_unroll_factorILi23EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi22EiEvPKT0_iPKc -.LCPI87_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi22EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi22EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi22EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi22EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi22EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi22EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi22EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi22EiEvPKT0_iPKc @@ -33179,12 +33021,14 @@ _Z26test_do_loop_unroll_factorILi22EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB87_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI87_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI87_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -33218,12 +33062,8 @@ _Z26test_do_loop_unroll_factorILi22EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi22EiEvPKT0_iPKc, .Lfunc_end87-_Z26test_do_loop_unroll_factorILi22EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi21EiEvPKT0_iPKc -.LCPI88_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi21EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi21EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi21EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi21EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi21EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi21EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi21EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi21EiEvPKT0_iPKc @@ -33622,12 +33462,14 @@ _Z26test_do_loop_unroll_factorILi21EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB88_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI88_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI88_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -33739,12 +33581,8 @@ _ZN13do_loop_testsILi19EiE7do_testEPKiPKc: # @_ZN13do_loop_testsILi19EiE7do_test .size _ZN13do_loop_testsILi19EiE7do_testEPKiPKc, .Lfunc_end89-_ZN13do_loop_testsILi19EiE7do_testEPKiPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi20EiEvPKT0_iPKc -.LCPI90_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi20EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi20EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi20EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi20EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi20EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi20EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi20EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi20EiEvPKT0_iPKc @@ -34155,12 +33993,14 @@ _Z26test_do_loop_unroll_factorILi20EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB90_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI90_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI90_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -34194,12 +34034,8 @@ _Z26test_do_loop_unroll_factorILi20EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi20EiEvPKT0_iPKc, .Lfunc_end90-_Z26test_do_loop_unroll_factorILi20EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi19EiEvPKT0_iPKc -.LCPI91_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi19EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi19EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi19EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi19EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi19EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi19EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi19EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi19EiEvPKT0_iPKc @@ -34606,12 +34442,14 @@ _Z26test_do_loop_unroll_factorILi19EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB91_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI91_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI91_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -34645,12 +34483,8 @@ _Z26test_do_loop_unroll_factorILi19EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi19EiEvPKT0_iPKc, .Lfunc_end91-_Z26test_do_loop_unroll_factorILi19EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi18EiEvPKT0_iPKc -.LCPI92_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi18EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi18EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi18EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi18EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi18EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi18EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi18EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi18EiEvPKT0_iPKc @@ -35053,12 +34887,14 @@ _Z26test_do_loop_unroll_factorILi18EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB92_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI92_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI92_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -35092,12 +34928,8 @@ _Z26test_do_loop_unroll_factorILi18EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi18EiEvPKT0_iPKc, .Lfunc_end92-_Z26test_do_loop_unroll_factorILi18EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi17EiEvPKT0_iPKc -.LCPI93_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi17EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi17EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi17EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi17EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi17EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi17EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi17EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi17EiEvPKT0_iPKc @@ -35500,12 +35332,14 @@ _Z26test_do_loop_unroll_factorILi17EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB93_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI93_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI93_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -35539,12 +35373,8 @@ _Z26test_do_loop_unroll_factorILi17EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi17EiEvPKT0_iPKc, .Lfunc_end93-_Z26test_do_loop_unroll_factorILi17EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi16EiEvPKT0_iPKc -.LCPI94_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi16EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi16EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi16EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi16EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi16EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi16EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi16EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi16EiEvPKT0_iPKc @@ -35881,12 +35711,14 @@ _Z26test_do_loop_unroll_factorILi16EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB94_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI94_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI94_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -35920,12 +35752,8 @@ _Z26test_do_loop_unroll_factorILi16EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi16EiEvPKT0_iPKc, .Lfunc_end94-_Z26test_do_loop_unroll_factorILi16EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi15EiEvPKT0_iPKc -.LCPI95_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi15EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi15EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi15EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi15EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi15EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi15EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi15EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi15EiEvPKT0_iPKc @@ -36260,12 +36088,14 @@ _Z26test_do_loop_unroll_factorILi15EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB95_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI95_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI95_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -36299,12 +36129,8 @@ _Z26test_do_loop_unroll_factorILi15EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi15EiEvPKT0_iPKc, .Lfunc_end95-_Z26test_do_loop_unroll_factorILi15EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi14EiEvPKT0_iPKc -.LCPI96_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi14EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi14EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi14EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi14EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi14EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi14EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi14EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi14EiEvPKT0_iPKc @@ -36637,12 +36463,14 @@ _Z26test_do_loop_unroll_factorILi14EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB96_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI96_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI96_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -36676,12 +36504,8 @@ _Z26test_do_loop_unroll_factorILi14EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi14EiEvPKT0_iPKc, .Lfunc_end96-_Z26test_do_loop_unroll_factorILi14EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi13EiEvPKT0_iPKc -.LCPI97_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi13EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi13EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi13EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi13EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi13EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi13EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi13EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi13EiEvPKT0_iPKc @@ -37006,12 +36830,14 @@ _Z26test_do_loop_unroll_factorILi13EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB97_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI97_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI97_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -37045,12 +36871,8 @@ _Z26test_do_loop_unroll_factorILi13EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi13EiEvPKT0_iPKc, .Lfunc_end97-_Z26test_do_loop_unroll_factorILi13EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi12EiEvPKT0_iPKc -.LCPI98_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi12EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi12EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi12EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi12EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi12EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi12EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi12EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi12EiEvPKT0_iPKc @@ -37377,12 +37199,14 @@ _Z26test_do_loop_unroll_factorILi12EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB98_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI98_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI98_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -37497,12 +37321,8 @@ _ZN13do_loop_testsILi10EiE7do_testEPKiPKc: # @_ZN13do_loop_testsILi10EiE7do_test .size _ZN13do_loop_testsILi10EiE7do_testEPKiPKc, .Lfunc_end99-_ZN13do_loop_testsILi10EiE7do_testEPKiPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi11EiEvPKT0_iPKc -.LCPI100_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi11EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi11EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi11EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi11EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi11EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi11EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi11EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi11EiEvPKT0_iPKc @@ -37827,12 +37647,14 @@ _Z26test_do_loop_unroll_factorILi11EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB100_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI100_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI100_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -37866,12 +37688,8 @@ _Z26test_do_loop_unroll_factorILi11EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi11EiEvPKT0_iPKc, .Lfunc_end100-_Z26test_do_loop_unroll_factorILi11EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi10EiEvPKT0_iPKc -.LCPI101_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi10EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi10EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi10EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi10EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi10EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi10EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi10EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi10EiEvPKT0_iPKc @@ -38194,12 +38012,14 @@ _Z26test_do_loop_unroll_factorILi10EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB101_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI101_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI101_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -38233,12 +38053,8 @@ _Z26test_do_loop_unroll_factorILi10EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi10EiEvPKT0_iPKc, .Lfunc_end101-_Z26test_do_loop_unroll_factorILi10EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi9EiEvPKT0_iPKc -.LCPI102_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi9EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi9EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi9EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi9EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi9EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi9EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi9EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi9EiEvPKT0_iPKc @@ -38561,12 +38377,14 @@ _Z26test_do_loop_unroll_factorILi9EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB102_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI102_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI102_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -38600,12 +38418,8 @@ _Z26test_do_loop_unroll_factorILi9EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi9EiEvPKT0_iPKc, .Lfunc_end102-_Z26test_do_loop_unroll_factorILi9EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi8EiEvPKT0_iPKc -.LCPI103_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi8EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi8EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi8EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi8EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi8EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi8EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi8EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi8EiEvPKT0_iPKc @@ -38921,12 +38735,14 @@ _Z26test_do_loop_unroll_factorILi8EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB103_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI103_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI103_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -38960,12 +38776,8 @@ _Z26test_do_loop_unroll_factorILi8EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi8EiEvPKT0_iPKc, .Lfunc_end103-_Z26test_do_loop_unroll_factorILi8EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi7EiEvPKT0_iPKc -.LCPI104_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi7EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi7EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi7EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi7EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi7EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi7EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi7EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi7EiEvPKT0_iPKc @@ -39284,12 +39096,14 @@ _Z26test_do_loop_unroll_factorILi7EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB104_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI104_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI104_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -39323,12 +39137,8 @@ _Z26test_do_loop_unroll_factorILi7EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi7EiEvPKT0_iPKc, .Lfunc_end104-_Z26test_do_loop_unroll_factorILi7EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi6EiEvPKT0_iPKc -.LCPI105_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi6EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi6EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi6EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi6EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi6EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi6EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi6EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi6EiEvPKT0_iPKc @@ -39643,12 +39453,14 @@ _Z26test_do_loop_unroll_factorILi6EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB105_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI105_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI105_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -39682,12 +39494,8 @@ _Z26test_do_loop_unroll_factorILi6EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi6EiEvPKT0_iPKc, .Lfunc_end105-_Z26test_do_loop_unroll_factorILi6EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi5EiEvPKT0_iPKc -.LCPI106_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi5EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi5EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi5EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi5EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi5EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi5EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi5EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi5EiEvPKT0_iPKc @@ -40150,12 +39958,14 @@ _Z26test_do_loop_unroll_factorILi5EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB106_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI106_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI106_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -40189,12 +39999,8 @@ _Z26test_do_loop_unroll_factorILi5EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi5EiEvPKT0_iPKc, .Lfunc_end106-_Z26test_do_loop_unroll_factorILi5EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi4EiEvPKT0_iPKc -.LCPI107_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi4EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi4EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi4EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi4EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi4EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi4EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi4EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi4EiEvPKT0_iPKc @@ -40627,12 +40433,14 @@ _Z26test_do_loop_unroll_factorILi4EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB107_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI107_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI107_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -40666,12 +40474,8 @@ _Z26test_do_loop_unroll_factorILi4EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi4EiEvPKT0_iPKc, .Lfunc_end107-_Z26test_do_loop_unroll_factorILi4EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi3EiEvPKT0_iPKc -.LCPI108_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi3EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi3EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi3EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi3EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi3EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi3EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi3EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi3EiEvPKT0_iPKc @@ -41094,12 +40898,14 @@ _Z26test_do_loop_unroll_factorILi3EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB108_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI108_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI108_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -41133,12 +40939,8 @@ _Z26test_do_loop_unroll_factorILi3EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi3EiEvPKT0_iPKc, .Lfunc_end108-_Z26test_do_loop_unroll_factorILi3EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi2EiEvPKT0_iPKc -.LCPI109_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi2EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi2EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi2EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi2EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi2EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi2EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi2EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi2EiEvPKT0_iPKc @@ -41515,12 +41317,14 @@ _Z26test_do_loop_unroll_factorILi2EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB109_34: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI109_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI109_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -41554,12 +41358,8 @@ _Z26test_do_loop_unroll_factorILi2EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi2EiEvPKT0_iPKc, .Lfunc_end109-_Z26test_do_loop_unroll_factorILi2EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi1EiEvPKT0_iPKc -.LCPI110_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi1EiEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi1EiEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi1EiEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi1EiEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi1EiEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi1EiEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi1EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi1EiEvPKT0_iPKc @@ -41769,12 +41569,14 @@ _Z26test_do_loop_unroll_factorILi1EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact ld.w $a0, $s3, %pc_lo12(current_test) .LBB110_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI110_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI110_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -41807,12 +41609,8 @@ _Z26test_do_loop_unroll_factorILi1EiEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi1EiEvPKT0_iPKc, .Lfunc_end110-_Z26test_do_loop_unroll_factorILi1EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi32EiEvPKT0_iPKc -.LCPI111_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi32EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi32EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi32EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi32EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi32EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi32EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi32EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi32EiEvPKT0_iPKc @@ -42227,12 +42025,14 @@ _Z28test_goto_loop_unroll_factorILi32EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB111_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI111_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI111_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -42266,12 +42066,8 @@ _Z28test_goto_loop_unroll_factorILi32EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi32EiEvPKT0_iPKc, .Lfunc_end111-_Z28test_goto_loop_unroll_factorILi32EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi31EiEvPKT0_iPKc -.LCPI112_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi31EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi31EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi31EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi31EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi31EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi31EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi31EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi31EiEvPKT0_iPKc @@ -42689,12 +42485,14 @@ _Z28test_goto_loop_unroll_factorILi31EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB112_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI112_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI112_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -42728,12 +42526,8 @@ _Z28test_goto_loop_unroll_factorILi31EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi31EiEvPKT0_iPKc, .Lfunc_end112-_Z28test_goto_loop_unroll_factorILi31EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi30EiEvPKT0_iPKc -.LCPI113_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi30EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi30EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi30EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi30EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi30EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi30EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi30EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi30EiEvPKT0_iPKc @@ -43147,12 +42941,14 @@ _Z28test_goto_loop_unroll_factorILi30EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB113_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI113_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI113_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -43265,12 +43061,8 @@ _ZN15goto_loop_testsILi28EiE7do_testEPKiPKc: # @_ZN15goto_loop_testsILi28EiE7do_ .size _ZN15goto_loop_testsILi28EiE7do_testEPKiPKc, .Lfunc_end114-_ZN15goto_loop_testsILi28EiE7do_testEPKiPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi29EiEvPKT0_iPKc -.LCPI115_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi29EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi29EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi29EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi29EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi29EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi29EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi29EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi29EiEvPKT0_iPKc @@ -43685,12 +43477,14 @@ _Z28test_goto_loop_unroll_factorILi29EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB115_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI115_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI115_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -43724,12 +43518,8 @@ _Z28test_goto_loop_unroll_factorILi29EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi29EiEvPKT0_iPKc, .Lfunc_end115-_Z28test_goto_loop_unroll_factorILi29EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi28EiEvPKT0_iPKc -.LCPI116_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi28EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi28EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi28EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi28EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi28EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi28EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi28EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi28EiEvPKT0_iPKc @@ -44140,12 +43930,14 @@ _Z28test_goto_loop_unroll_factorILi28EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB116_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI116_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI116_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -44179,12 +43971,8 @@ _Z28test_goto_loop_unroll_factorILi28EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi28EiEvPKT0_iPKc, .Lfunc_end116-_Z28test_goto_loop_unroll_factorILi28EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi27EiEvPKT0_iPKc -.LCPI117_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi27EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi27EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi27EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi27EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi27EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi27EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi27EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi27EiEvPKT0_iPKc @@ -44592,12 +44380,14 @@ _Z28test_goto_loop_unroll_factorILi27EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB117_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI117_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI117_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -44631,12 +44421,8 @@ _Z28test_goto_loop_unroll_factorILi27EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi27EiEvPKT0_iPKc, .Lfunc_end117-_Z28test_goto_loop_unroll_factorILi27EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi26EiEvPKT0_iPKc -.LCPI118_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi26EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi26EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi26EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi26EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi26EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi26EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi26EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi26EiEvPKT0_iPKc @@ -45038,12 +44824,14 @@ _Z28test_goto_loop_unroll_factorILi26EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB118_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI118_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI118_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -45077,12 +44865,8 @@ _Z28test_goto_loop_unroll_factorILi26EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi26EiEvPKT0_iPKc, .Lfunc_end118-_Z28test_goto_loop_unroll_factorILi26EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi25EiEvPKT0_iPKc -.LCPI119_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi25EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi25EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi25EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi25EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi25EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi25EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi25EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi25EiEvPKT0_iPKc @@ -45489,12 +45273,14 @@ _Z28test_goto_loop_unroll_factorILi25EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB119_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI119_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI119_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -45528,12 +45314,8 @@ _Z28test_goto_loop_unroll_factorILi25EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi25EiEvPKT0_iPKc, .Lfunc_end119-_Z28test_goto_loop_unroll_factorILi25EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi24EiEvPKT0_iPKc -.LCPI120_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi24EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi24EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi24EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi24EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi24EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi24EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi24EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi24EiEvPKT0_iPKc @@ -45935,12 +45717,14 @@ _Z28test_goto_loop_unroll_factorILi24EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB120_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI120_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI120_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -45974,12 +45758,8 @@ _Z28test_goto_loop_unroll_factorILi24EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi24EiEvPKT0_iPKc, .Lfunc_end120-_Z28test_goto_loop_unroll_factorILi24EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi23EiEvPKT0_iPKc -.LCPI121_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi23EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi23EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi23EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi23EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi23EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi23EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi23EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi23EiEvPKT0_iPKc @@ -46382,12 +46162,14 @@ _Z28test_goto_loop_unroll_factorILi23EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB121_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI121_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI121_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -46421,12 +46203,8 @@ _Z28test_goto_loop_unroll_factorILi23EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi23EiEvPKT0_iPKc, .Lfunc_end121-_Z28test_goto_loop_unroll_factorILi23EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc -.LCPI122_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc @@ -46824,12 +46602,14 @@ _Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB122_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI122_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI122_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -46863,12 +46643,8 @@ _Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc, .Lfunc_end122-_Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi21EiEvPKT0_iPKc -.LCPI123_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi21EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi21EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi21EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi21EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi21EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi21EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi21EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi21EiEvPKT0_iPKc @@ -47267,12 +47043,14 @@ _Z28test_goto_loop_unroll_factorILi21EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB123_35: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI123_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI123_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -47384,12 +47162,8 @@ _ZN15goto_loop_testsILi19EiE7do_testEPKiPKc: # @_ZN15goto_loop_testsILi19EiE7do_ .size _ZN15goto_loop_testsILi19EiE7do_testEPKiPKc, .Lfunc_end124-_ZN15goto_loop_testsILi19EiE7do_testEPKiPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi20EiEvPKT0_iPKc -.LCPI125_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi20EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi20EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi20EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi20EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi20EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi20EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi20EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi20EiEvPKT0_iPKc @@ -47800,12 +47574,14 @@ _Z28test_goto_loop_unroll_factorILi20EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB125_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI125_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI125_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -47839,12 +47615,8 @@ _Z28test_goto_loop_unroll_factorILi20EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi20EiEvPKT0_iPKc, .Lfunc_end125-_Z28test_goto_loop_unroll_factorILi20EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi19EiEvPKT0_iPKc -.LCPI126_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi19EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi19EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi19EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi19EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi19EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi19EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi19EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi19EiEvPKT0_iPKc @@ -48251,12 +48023,14 @@ _Z28test_goto_loop_unroll_factorILi19EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB126_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI126_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI126_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -48290,12 +48064,8 @@ _Z28test_goto_loop_unroll_factorILi19EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi19EiEvPKT0_iPKc, .Lfunc_end126-_Z28test_goto_loop_unroll_factorILi19EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi18EiEvPKT0_iPKc -.LCPI127_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi18EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi18EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi18EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi18EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi18EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi18EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi18EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi18EiEvPKT0_iPKc @@ -48698,12 +48468,14 @@ _Z28test_goto_loop_unroll_factorILi18EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB127_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI127_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI127_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -48737,12 +48509,8 @@ _Z28test_goto_loop_unroll_factorILi18EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi18EiEvPKT0_iPKc, .Lfunc_end127-_Z28test_goto_loop_unroll_factorILi18EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi17EiEvPKT0_iPKc -.LCPI128_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi17EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi17EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi17EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi17EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi17EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi17EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi17EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi17EiEvPKT0_iPKc @@ -49145,12 +48913,14 @@ _Z28test_goto_loop_unroll_factorILi17EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB128_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI128_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI128_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -49184,12 +48954,8 @@ _Z28test_goto_loop_unroll_factorILi17EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi17EiEvPKT0_iPKc, .Lfunc_end128-_Z28test_goto_loop_unroll_factorILi17EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi16EiEvPKT0_iPKc -.LCPI129_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi16EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi16EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi16EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi16EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi16EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi16EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi16EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi16EiEvPKT0_iPKc @@ -49526,12 +49292,14 @@ _Z28test_goto_loop_unroll_factorILi16EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB129_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI129_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI129_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -49565,12 +49333,8 @@ _Z28test_goto_loop_unroll_factorILi16EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi16EiEvPKT0_iPKc, .Lfunc_end129-_Z28test_goto_loop_unroll_factorILi16EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi15EiEvPKT0_iPKc -.LCPI130_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi15EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi15EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi15EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi15EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi15EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi15EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi15EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi15EiEvPKT0_iPKc @@ -49905,12 +49669,14 @@ _Z28test_goto_loop_unroll_factorILi15EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB130_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI130_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI130_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -49944,12 +49710,8 @@ _Z28test_goto_loop_unroll_factorILi15EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi15EiEvPKT0_iPKc, .Lfunc_end130-_Z28test_goto_loop_unroll_factorILi15EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi14EiEvPKT0_iPKc -.LCPI131_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi14EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi14EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi14EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi14EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi14EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi14EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi14EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi14EiEvPKT0_iPKc @@ -50282,12 +50044,14 @@ _Z28test_goto_loop_unroll_factorILi14EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB131_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI131_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI131_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -50321,12 +50085,8 @@ _Z28test_goto_loop_unroll_factorILi14EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi14EiEvPKT0_iPKc, .Lfunc_end131-_Z28test_goto_loop_unroll_factorILi14EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi13EiEvPKT0_iPKc -.LCPI132_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi13EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi13EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi13EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi13EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi13EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi13EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi13EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi13EiEvPKT0_iPKc @@ -50651,12 +50411,14 @@ _Z28test_goto_loop_unroll_factorILi13EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB132_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI132_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI132_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -50690,12 +50452,8 @@ _Z28test_goto_loop_unroll_factorILi13EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi13EiEvPKT0_iPKc, .Lfunc_end132-_Z28test_goto_loop_unroll_factorILi13EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi12EiEvPKT0_iPKc -.LCPI133_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi12EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi12EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi12EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi12EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi12EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi12EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi12EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi12EiEvPKT0_iPKc @@ -51022,12 +50780,14 @@ _Z28test_goto_loop_unroll_factorILi12EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB133_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI133_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI133_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -51142,12 +50902,8 @@ _ZN15goto_loop_testsILi10EiE7do_testEPKiPKc: # @_ZN15goto_loop_testsILi10EiE7do_ .size _ZN15goto_loop_testsILi10EiE7do_testEPKiPKc, .Lfunc_end134-_ZN15goto_loop_testsILi10EiE7do_testEPKiPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi11EiEvPKT0_iPKc -.LCPI135_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi11EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi11EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi11EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi11EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi11EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi11EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi11EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi11EiEvPKT0_iPKc @@ -51472,12 +51228,14 @@ _Z28test_goto_loop_unroll_factorILi11EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB135_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI135_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI135_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -51511,12 +51269,8 @@ _Z28test_goto_loop_unroll_factorILi11EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi11EiEvPKT0_iPKc, .Lfunc_end135-_Z28test_goto_loop_unroll_factorILi11EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi10EiEvPKT0_iPKc -.LCPI136_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi10EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi10EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi10EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi10EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi10EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi10EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi10EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi10EiEvPKT0_iPKc @@ -51839,12 +51593,14 @@ _Z28test_goto_loop_unroll_factorILi10EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB136_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI136_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI136_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -51878,12 +51634,8 @@ _Z28test_goto_loop_unroll_factorILi10EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi10EiEvPKT0_iPKc, .Lfunc_end136-_Z28test_goto_loop_unroll_factorILi10EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi9EiEvPKT0_iPKc -.LCPI137_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi9EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi9EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi9EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi9EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi9EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi9EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi9EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi9EiEvPKT0_iPKc @@ -52206,12 +51958,14 @@ _Z28test_goto_loop_unroll_factorILi9EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB137_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI137_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI137_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -52245,12 +51999,8 @@ _Z28test_goto_loop_unroll_factorILi9EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi9EiEvPKT0_iPKc, .Lfunc_end137-_Z28test_goto_loop_unroll_factorILi9EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi8EiEvPKT0_iPKc -.LCPI138_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi8EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi8EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi8EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi8EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi8EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi8EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi8EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi8EiEvPKT0_iPKc @@ -52566,12 +52316,14 @@ _Z28test_goto_loop_unroll_factorILi8EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB138_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI138_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI138_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -52605,12 +52357,8 @@ _Z28test_goto_loop_unroll_factorILi8EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi8EiEvPKT0_iPKc, .Lfunc_end138-_Z28test_goto_loop_unroll_factorILi8EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi7EiEvPKT0_iPKc -.LCPI139_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi7EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi7EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi7EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi7EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi7EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi7EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi7EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi7EiEvPKT0_iPKc @@ -52929,12 +52677,14 @@ _Z28test_goto_loop_unroll_factorILi7EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB139_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI139_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI139_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -52968,12 +52718,8 @@ _Z28test_goto_loop_unroll_factorILi7EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi7EiEvPKT0_iPKc, .Lfunc_end139-_Z28test_goto_loop_unroll_factorILi7EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi6EiEvPKT0_iPKc -.LCPI140_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi6EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi6EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi6EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi6EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi6EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi6EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi6EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi6EiEvPKT0_iPKc @@ -53288,12 +53034,14 @@ _Z28test_goto_loop_unroll_factorILi6EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB140_30: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI140_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI140_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -53327,12 +53075,8 @@ _Z28test_goto_loop_unroll_factorILi6EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi6EiEvPKT0_iPKc, .Lfunc_end140-_Z28test_goto_loop_unroll_factorILi6EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi5EiEvPKT0_iPKc -.LCPI141_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi5EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi5EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi5EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi5EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi5EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi5EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi5EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi5EiEvPKT0_iPKc @@ -53795,12 +53539,14 @@ _Z28test_goto_loop_unroll_factorILi5EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB141_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI141_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI141_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -53834,12 +53580,8 @@ _Z28test_goto_loop_unroll_factorILi5EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi5EiEvPKT0_iPKc, .Lfunc_end141-_Z28test_goto_loop_unroll_factorILi5EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi4EiEvPKT0_iPKc -.LCPI142_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi4EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi4EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi4EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi4EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi4EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi4EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi4EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi4EiEvPKT0_iPKc @@ -54272,12 +54014,14 @@ _Z28test_goto_loop_unroll_factorILi4EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB142_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI142_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI142_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -54311,12 +54055,8 @@ _Z28test_goto_loop_unroll_factorILi4EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi4EiEvPKT0_iPKc, .Lfunc_end142-_Z28test_goto_loop_unroll_factorILi4EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi3EiEvPKT0_iPKc -.LCPI143_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi3EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi3EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi3EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi3EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi3EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi3EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi3EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi3EiEvPKT0_iPKc @@ -54739,12 +54479,14 @@ _Z28test_goto_loop_unroll_factorILi3EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB143_36: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI143_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI143_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -54778,12 +54520,8 @@ _Z28test_goto_loop_unroll_factorILi3EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi3EiEvPKT0_iPKc, .Lfunc_end143-_Z28test_goto_loop_unroll_factorILi3EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi2EiEvPKT0_iPKc -.LCPI144_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi2EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi2EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi2EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi2EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi2EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi2EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi2EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi2EiEvPKT0_iPKc @@ -55160,12 +54898,14 @@ _Z28test_goto_loop_unroll_factorILi2EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB144_34: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI144_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI144_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -55199,12 +54939,8 @@ _Z28test_goto_loop_unroll_factorILi2EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi2EiEvPKT0_iPKc, .Lfunc_end144-_Z28test_goto_loop_unroll_factorILi2EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi1EiEvPKT0_iPKc -.LCPI145_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi1EiEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi1EiEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi1EiEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi1EiEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi1EiEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi1EiEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi1EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi1EiEvPKT0_iPKc @@ -55414,12 +55150,14 @@ _Z28test_goto_loop_unroll_factorILi1EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ ld.w $a0, $s3, %pc_lo12(current_test) .LBB145_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI145_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI145_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -55452,22 +55190,8 @@ _Z28test_goto_loop_unroll_factorILi1EiEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi1EiEvPKT0_iPKc, .Lfunc_end145-_Z28test_goto_loop_unroll_factorILi1EiEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc -.LCPI146_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI146_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI146_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI146_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI146_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI146_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc @@ -55527,19 +55251,31 @@ _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -31 addi.d $s6, $s1, 128 addi.d $s7, $s1, 256 - pcalau12i $a0, %pc_hi20(.LCPI146_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI146_0) - pcalau12i $a0, %pc_hi20(.LCPI146_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI146_1) - pcalau12i $a0, %pc_hi20(.LCPI146_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI146_2) - pcalau12i $a0, %pc_hi20(.LCPI146_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI146_4) - pcalau12i $a0, %pc_hi20(.LCPI146_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI146_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -55558,266 +55294,266 @@ _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB146_5: # Parent Loop BB146_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -128 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 32 addi.w $a1, $a1, 32 @@ -55832,12 +55568,12 @@ _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB146_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -55845,20 +55581,20 @@ _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB146_8: # %._crit_edge.us # in Loop: Header=BB146_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB146_3 # %bb.9: # in Loop: Header=BB146_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -55870,19 +55606,31 @@ _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB146_10: # %.preheader19.lr.ph.split blez $s0, .LBB146_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI146_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI146_0) - pcalau12i $a0, %pc_hi20(.LCPI146_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI146_1) - pcalau12i $a0, %pc_hi20(.LCPI146_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI146_2) - pcalau12i $a0, %pc_hi20(.LCPI146_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI146_4) - pcalau12i $a0, %pc_hi20(.LCPI146_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI146_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -55897,17 +55645,17 @@ _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB146_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB146_14: # Parent Loop BB146_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -55915,20 +55663,20 @@ _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB146_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB146_12 # %bb.16: # in Loop: Header=BB146_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -55941,16 +55689,25 @@ _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB146_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI146_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI146_0) - pcalau12i $a0, %pc_hi20(.LCPI146_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI146_1) - pcalau12i $a0, %pc_hi20(.LCPI146_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI146_2) - pcalau12i $a0, %pc_hi20(.LCPI146_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI146_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -55969,9 +55726,9 @@ _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB146_18 # %bb.20: # in Loop: Header=BB146_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -56010,12 +55767,14 @@ _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB146_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI146_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI146_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -56055,22 +55814,8 @@ _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc, .Lfunc_end146-_Z27test_for_loop_unroll_factorILi32EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc -.LCPI147_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI147_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI147_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI147_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI147_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI147_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc @@ -56130,19 +55875,31 @@ _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -30 addi.d $s6, $s1, 120 addi.d $s7, $s1, 248 - pcalau12i $a0, %pc_hi20(.LCPI147_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI147_0) - pcalau12i $a0, %pc_hi20(.LCPI147_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI147_1) - pcalau12i $a0, %pc_hi20(.LCPI147_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI147_2) - pcalau12i $a0, %pc_hi20(.LCPI147_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI147_4) - pcalau12i $a0, %pc_hi20(.LCPI147_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI147_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -56161,258 +55918,258 @@ _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB147_5: # Parent Loop BB147_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 31 addi.w $a1, $a1, 31 @@ -56427,12 +56184,12 @@ _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB147_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -56440,20 +56197,20 @@ _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB147_8: # %._crit_edge.us # in Loop: Header=BB147_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB147_3 # %bb.9: # in Loop: Header=BB147_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -56465,19 +56222,31 @@ _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB147_10: # %.preheader19.lr.ph.split blez $s0, .LBB147_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI147_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI147_0) - pcalau12i $a0, %pc_hi20(.LCPI147_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI147_1) - pcalau12i $a0, %pc_hi20(.LCPI147_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI147_2) - pcalau12i $a0, %pc_hi20(.LCPI147_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI147_4) - pcalau12i $a0, %pc_hi20(.LCPI147_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI147_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -56492,17 +56261,17 @@ _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB147_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB147_14: # Parent Loop BB147_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -56510,20 +56279,20 @@ _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB147_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB147_12 # %bb.16: # in Loop: Header=BB147_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -56536,16 +56305,25 @@ _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB147_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI147_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI147_0) - pcalau12i $a0, %pc_hi20(.LCPI147_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI147_1) - pcalau12i $a0, %pc_hi20(.LCPI147_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI147_2) - pcalau12i $a0, %pc_hi20(.LCPI147_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI147_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -56564,9 +56342,9 @@ _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB147_18 # %bb.20: # in Loop: Header=BB147_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -56605,12 +56383,14 @@ _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB147_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI147_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI147_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -56650,22 +56430,8 @@ _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc, .Lfunc_end147-_Z27test_for_loop_unroll_factorILi31EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc -.LCPI148_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI148_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI148_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI148_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI148_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI148_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc @@ -56725,19 +56491,31 @@ _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -29 addi.d $s6, $s1, 120 addi.d $s7, $s1, 240 - pcalau12i $a0, %pc_hi20(.LCPI148_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI148_0) - pcalau12i $a0, %pc_hi20(.LCPI148_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI148_1) - pcalau12i $a0, %pc_hi20(.LCPI148_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI148_2) - pcalau12i $a0, %pc_hi20(.LCPI148_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI148_4) - pcalau12i $a0, %pc_hi20(.LCPI148_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI148_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -56756,250 +56534,250 @@ _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB148_5: # Parent Loop BB148_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 30 addi.w $a1, $a1, 30 @@ -57014,12 +56792,12 @@ _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB148_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -57027,20 +56805,20 @@ _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB148_8: # %._crit_edge.us # in Loop: Header=BB148_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB148_3 # %bb.9: # in Loop: Header=BB148_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -57052,19 +56830,31 @@ _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB148_10: # %.preheader19.lr.ph.split blez $s0, .LBB148_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI148_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI148_0) - pcalau12i $a0, %pc_hi20(.LCPI148_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI148_1) - pcalau12i $a0, %pc_hi20(.LCPI148_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI148_2) - pcalau12i $a0, %pc_hi20(.LCPI148_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI148_4) - pcalau12i $a0, %pc_hi20(.LCPI148_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI148_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -57079,17 +56869,17 @@ _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB148_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB148_14: # Parent Loop BB148_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -57097,20 +56887,20 @@ _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB148_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB148_12 # %bb.16: # in Loop: Header=BB148_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -57123,16 +56913,25 @@ _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB148_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI148_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI148_0) - pcalau12i $a0, %pc_hi20(.LCPI148_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI148_1) - pcalau12i $a0, %pc_hi20(.LCPI148_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI148_2) - pcalau12i $a0, %pc_hi20(.LCPI148_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI148_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -57151,9 +56950,9 @@ _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB148_18 # %bb.20: # in Loop: Header=BB148_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -57192,12 +56991,14 @@ _Z27test_for_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB148_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI148_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI148_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -57316,22 +57117,8 @@ _ZN14for_loop_testsILi28EdE7do_testEPKdPKc: # @_ZN14for_loop_testsILi28EdE7do_te .size _ZN14for_loop_testsILi28EdE7do_testEPKdPKc, .Lfunc_end149-_ZN14for_loop_testsILi28EdE7do_testEPKdPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc -.LCPI150_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI150_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI150_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI150_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI150_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI150_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc @@ -57391,19 +57178,31 @@ _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -28 addi.d $s6, $s1, 112 addi.d $s7, $s1, 232 - pcalau12i $a0, %pc_hi20(.LCPI150_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI150_0) - pcalau12i $a0, %pc_hi20(.LCPI150_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI150_1) - pcalau12i $a0, %pc_hi20(.LCPI150_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI150_2) - pcalau12i $a0, %pc_hi20(.LCPI150_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI150_4) - pcalau12i $a0, %pc_hi20(.LCPI150_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI150_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -57422,242 +57221,242 @@ _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB150_5: # Parent Loop BB150_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 29 addi.w $a1, $a1, 29 @@ -57672,12 +57471,12 @@ _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB150_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -57685,20 +57484,20 @@ _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB150_8: # %._crit_edge.us # in Loop: Header=BB150_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB150_3 # %bb.9: # in Loop: Header=BB150_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -57710,19 +57509,31 @@ _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB150_10: # %.preheader19.lr.ph.split blez $s0, .LBB150_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI150_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI150_0) - pcalau12i $a0, %pc_hi20(.LCPI150_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI150_1) - pcalau12i $a0, %pc_hi20(.LCPI150_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI150_2) - pcalau12i $a0, %pc_hi20(.LCPI150_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI150_4) - pcalau12i $a0, %pc_hi20(.LCPI150_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI150_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -57737,17 +57548,17 @@ _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB150_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB150_14: # Parent Loop BB150_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -57755,20 +57566,20 @@ _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB150_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB150_12 # %bb.16: # in Loop: Header=BB150_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -57781,16 +57592,25 @@ _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB150_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI150_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI150_0) - pcalau12i $a0, %pc_hi20(.LCPI150_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI150_1) - pcalau12i $a0, %pc_hi20(.LCPI150_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI150_2) - pcalau12i $a0, %pc_hi20(.LCPI150_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI150_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -57809,9 +57629,9 @@ _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB150_18 # %bb.20: # in Loop: Header=BB150_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -57850,12 +57670,14 @@ _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB150_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI150_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI150_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -57895,22 +57717,8 @@ _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc, .Lfunc_end150-_Z27test_for_loop_unroll_factorILi29EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc -.LCPI151_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI151_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI151_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI151_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI151_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI151_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc @@ -57970,19 +57778,31 @@ _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -27 addi.d $s6, $s1, 112 addi.d $s7, $s1, 224 - pcalau12i $a0, %pc_hi20(.LCPI151_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI151_0) - pcalau12i $a0, %pc_hi20(.LCPI151_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI151_1) - pcalau12i $a0, %pc_hi20(.LCPI151_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI151_2) - pcalau12i $a0, %pc_hi20(.LCPI151_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI151_4) - pcalau12i $a0, %pc_hi20(.LCPI151_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI151_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -58001,234 +57821,234 @@ _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB151_5: # Parent Loop BB151_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 28 addi.w $a1, $a1, 28 @@ -58243,12 +58063,12 @@ _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB151_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -58256,20 +58076,20 @@ _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB151_8: # %._crit_edge.us # in Loop: Header=BB151_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB151_3 # %bb.9: # in Loop: Header=BB151_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -58281,19 +58101,31 @@ _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB151_10: # %.preheader19.lr.ph.split blez $s0, .LBB151_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI151_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI151_0) - pcalau12i $a0, %pc_hi20(.LCPI151_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI151_1) - pcalau12i $a0, %pc_hi20(.LCPI151_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI151_2) - pcalau12i $a0, %pc_hi20(.LCPI151_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI151_4) - pcalau12i $a0, %pc_hi20(.LCPI151_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI151_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -58308,17 +58140,17 @@ _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB151_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB151_14: # Parent Loop BB151_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -58326,20 +58158,20 @@ _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB151_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB151_12 # %bb.16: # in Loop: Header=BB151_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -58352,16 +58184,25 @@ _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB151_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI151_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI151_0) - pcalau12i $a0, %pc_hi20(.LCPI151_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI151_1) - pcalau12i $a0, %pc_hi20(.LCPI151_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI151_2) - pcalau12i $a0, %pc_hi20(.LCPI151_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI151_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -58380,9 +58221,9 @@ _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB151_18 # %bb.20: # in Loop: Header=BB151_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -58421,12 +58262,14 @@ _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB151_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI151_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI151_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -58466,22 +58309,8 @@ _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc, .Lfunc_end151-_Z27test_for_loop_unroll_factorILi28EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc -.LCPI152_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI152_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI152_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI152_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI152_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI152_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc @@ -58541,19 +58370,31 @@ _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -26 addi.d $s6, $s1, 104 addi.d $s7, $s1, 216 - pcalau12i $a0, %pc_hi20(.LCPI152_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI152_0) - pcalau12i $a0, %pc_hi20(.LCPI152_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI152_1) - pcalau12i $a0, %pc_hi20(.LCPI152_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI152_2) - pcalau12i $a0, %pc_hi20(.LCPI152_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI152_4) - pcalau12i $a0, %pc_hi20(.LCPI152_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI152_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -58572,226 +58413,226 @@ _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB152_5: # Parent Loop BB152_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 27 addi.w $a1, $a1, 27 @@ -58806,12 +58647,12 @@ _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB152_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -58819,20 +58660,20 @@ _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB152_8: # %._crit_edge.us # in Loop: Header=BB152_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB152_3 # %bb.9: # in Loop: Header=BB152_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -58844,19 +58685,31 @@ _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB152_10: # %.preheader19.lr.ph.split blez $s0, .LBB152_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI152_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI152_0) - pcalau12i $a0, %pc_hi20(.LCPI152_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI152_1) - pcalau12i $a0, %pc_hi20(.LCPI152_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI152_2) - pcalau12i $a0, %pc_hi20(.LCPI152_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI152_4) - pcalau12i $a0, %pc_hi20(.LCPI152_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI152_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -58871,17 +58724,17 @@ _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB152_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB152_14: # Parent Loop BB152_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -58889,20 +58742,20 @@ _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB152_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB152_12 # %bb.16: # in Loop: Header=BB152_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -58915,16 +58768,25 @@ _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB152_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI152_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI152_0) - pcalau12i $a0, %pc_hi20(.LCPI152_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI152_1) - pcalau12i $a0, %pc_hi20(.LCPI152_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI152_2) - pcalau12i $a0, %pc_hi20(.LCPI152_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI152_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -58943,9 +58805,9 @@ _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB152_18 # %bb.20: # in Loop: Header=BB152_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -58984,12 +58846,14 @@ _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB152_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI152_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI152_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -59029,22 +58893,8 @@ _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc, .Lfunc_end152-_Z27test_for_loop_unroll_factorILi27EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc -.LCPI153_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI153_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI153_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI153_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI153_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI153_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc @@ -59104,19 +58954,31 @@ _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -25 addi.d $s6, $s1, 104 addi.d $s7, $s1, 208 - pcalau12i $a0, %pc_hi20(.LCPI153_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI153_0) - pcalau12i $a0, %pc_hi20(.LCPI153_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI153_1) - pcalau12i $a0, %pc_hi20(.LCPI153_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI153_2) - pcalau12i $a0, %pc_hi20(.LCPI153_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI153_4) - pcalau12i $a0, %pc_hi20(.LCPI153_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI153_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -59135,218 +58997,218 @@ _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB153_5: # Parent Loop BB153_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 26 addi.w $a1, $a1, 26 @@ -59361,12 +59223,12 @@ _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB153_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -59374,20 +59236,20 @@ _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB153_8: # %._crit_edge.us # in Loop: Header=BB153_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB153_3 # %bb.9: # in Loop: Header=BB153_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -59399,19 +59261,31 @@ _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB153_10: # %.preheader19.lr.ph.split blez $s0, .LBB153_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI153_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI153_0) - pcalau12i $a0, %pc_hi20(.LCPI153_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI153_1) - pcalau12i $a0, %pc_hi20(.LCPI153_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI153_2) - pcalau12i $a0, %pc_hi20(.LCPI153_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI153_4) - pcalau12i $a0, %pc_hi20(.LCPI153_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI153_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -59426,17 +59300,17 @@ _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB153_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB153_14: # Parent Loop BB153_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -59444,20 +59318,20 @@ _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB153_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB153_12 # %bb.16: # in Loop: Header=BB153_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -59470,16 +59344,25 @@ _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB153_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI153_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI153_0) - pcalau12i $a0, %pc_hi20(.LCPI153_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI153_1) - pcalau12i $a0, %pc_hi20(.LCPI153_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI153_2) - pcalau12i $a0, %pc_hi20(.LCPI153_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI153_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -59498,9 +59381,9 @@ _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB153_18 # %bb.20: # in Loop: Header=BB153_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -59539,12 +59422,14 @@ _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB153_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI153_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI153_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -59584,22 +59469,8 @@ _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc, .Lfunc_end153-_Z27test_for_loop_unroll_factorILi26EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc -.LCPI154_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI154_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI154_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI154_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI154_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI154_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc @@ -59659,19 +59530,31 @@ _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -24 addi.d $s6, $s1, 96 addi.d $s7, $s1, 200 - pcalau12i $a0, %pc_hi20(.LCPI154_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI154_0) - pcalau12i $a0, %pc_hi20(.LCPI154_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI154_1) - pcalau12i $a0, %pc_hi20(.LCPI154_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI154_2) - pcalau12i $a0, %pc_hi20(.LCPI154_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI154_4) - pcalau12i $a0, %pc_hi20(.LCPI154_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI154_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -59690,210 +59573,210 @@ _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB154_5: # Parent Loop BB154_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 25 addi.w $a1, $a1, 25 @@ -59908,12 +59791,12 @@ _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB154_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -59921,20 +59804,20 @@ _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB154_8: # %._crit_edge.us # in Loop: Header=BB154_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB154_3 # %bb.9: # in Loop: Header=BB154_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -59946,19 +59829,31 @@ _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB154_10: # %.preheader19.lr.ph.split blez $s0, .LBB154_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI154_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI154_0) - pcalau12i $a0, %pc_hi20(.LCPI154_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI154_1) - pcalau12i $a0, %pc_hi20(.LCPI154_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI154_2) - pcalau12i $a0, %pc_hi20(.LCPI154_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI154_4) - pcalau12i $a0, %pc_hi20(.LCPI154_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI154_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -59973,17 +59868,17 @@ _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB154_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB154_14: # Parent Loop BB154_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -59991,20 +59886,20 @@ _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB154_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB154_12 # %bb.16: # in Loop: Header=BB154_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -60017,16 +59912,25 @@ _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB154_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI154_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI154_0) - pcalau12i $a0, %pc_hi20(.LCPI154_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI154_1) - pcalau12i $a0, %pc_hi20(.LCPI154_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI154_2) - pcalau12i $a0, %pc_hi20(.LCPI154_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI154_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -60045,9 +59949,9 @@ _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB154_18 # %bb.20: # in Loop: Header=BB154_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -60086,12 +59990,14 @@ _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB154_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI154_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI154_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -60131,22 +60037,8 @@ _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc, .Lfunc_end154-_Z27test_for_loop_unroll_factorILi25EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc -.LCPI155_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI155_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI155_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI155_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI155_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI155_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc @@ -60206,19 +60098,31 @@ _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -23 addi.d $s6, $s1, 96 addi.d $s7, $s1, 192 - pcalau12i $a0, %pc_hi20(.LCPI155_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI155_0) - pcalau12i $a0, %pc_hi20(.LCPI155_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI155_1) - pcalau12i $a0, %pc_hi20(.LCPI155_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI155_2) - pcalau12i $a0, %pc_hi20(.LCPI155_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI155_4) - pcalau12i $a0, %pc_hi20(.LCPI155_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI155_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -60237,202 +60141,202 @@ _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB155_5: # Parent Loop BB155_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 24 addi.w $a1, $a1, 24 @@ -60447,12 +60351,12 @@ _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB155_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -60460,20 +60364,20 @@ _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB155_8: # %._crit_edge.us # in Loop: Header=BB155_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB155_3 # %bb.9: # in Loop: Header=BB155_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -60485,19 +60389,31 @@ _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB155_10: # %.preheader19.lr.ph.split blez $s0, .LBB155_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI155_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI155_0) - pcalau12i $a0, %pc_hi20(.LCPI155_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI155_1) - pcalau12i $a0, %pc_hi20(.LCPI155_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI155_2) - pcalau12i $a0, %pc_hi20(.LCPI155_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI155_4) - pcalau12i $a0, %pc_hi20(.LCPI155_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI155_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -60512,17 +60428,17 @@ _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB155_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB155_14: # Parent Loop BB155_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -60530,20 +60446,20 @@ _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB155_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB155_12 # %bb.16: # in Loop: Header=BB155_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -60556,16 +60472,25 @@ _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB155_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI155_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI155_0) - pcalau12i $a0, %pc_hi20(.LCPI155_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI155_1) - pcalau12i $a0, %pc_hi20(.LCPI155_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI155_2) - pcalau12i $a0, %pc_hi20(.LCPI155_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI155_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -60584,9 +60509,9 @@ _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB155_18 # %bb.20: # in Loop: Header=BB155_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -60625,12 +60550,14 @@ _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB155_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI155_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI155_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -60670,22 +60597,8 @@ _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc, .Lfunc_end155-_Z27test_for_loop_unroll_factorILi24EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc -.LCPI156_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI156_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI156_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI156_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI156_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI156_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc @@ -60745,19 +60658,31 @@ _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -22 addi.d $s6, $s1, 88 addi.d $s7, $s1, 184 - pcalau12i $a0, %pc_hi20(.LCPI156_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI156_0) - pcalau12i $a0, %pc_hi20(.LCPI156_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI156_1) - pcalau12i $a0, %pc_hi20(.LCPI156_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI156_2) - pcalau12i $a0, %pc_hi20(.LCPI156_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI156_4) - pcalau12i $a0, %pc_hi20(.LCPI156_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI156_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -60776,194 +60701,194 @@ _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB156_5: # Parent Loop BB156_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 23 addi.w $a1, $a1, 23 @@ -60978,12 +60903,12 @@ _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB156_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -60991,20 +60916,20 @@ _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB156_8: # %._crit_edge.us # in Loop: Header=BB156_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB156_3 # %bb.9: # in Loop: Header=BB156_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -61016,19 +60941,31 @@ _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB156_10: # %.preheader18.lr.ph.split blez $s0, .LBB156_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI156_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI156_0) - pcalau12i $a0, %pc_hi20(.LCPI156_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI156_1) - pcalau12i $a0, %pc_hi20(.LCPI156_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI156_2) - pcalau12i $a0, %pc_hi20(.LCPI156_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI156_4) - pcalau12i $a0, %pc_hi20(.LCPI156_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI156_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -61043,17 +60980,17 @@ _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB156_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB156_14: # Parent Loop BB156_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -61061,20 +60998,20 @@ _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB156_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB156_12 # %bb.16: # in Loop: Header=BB156_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -61087,16 +61024,25 @@ _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB156_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI156_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI156_0) - pcalau12i $a0, %pc_hi20(.LCPI156_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI156_1) - pcalau12i $a0, %pc_hi20(.LCPI156_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI156_2) - pcalau12i $a0, %pc_hi20(.LCPI156_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI156_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -61115,9 +61061,9 @@ _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB156_18 # %bb.20: # in Loop: Header=BB156_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -61156,12 +61102,14 @@ _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB156_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI156_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI156_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -61201,22 +61149,8 @@ _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc, .Lfunc_end156-_Z27test_for_loop_unroll_factorILi23EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc -.LCPI157_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI157_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI157_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI157_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI157_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI157_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc @@ -61276,19 +61210,31 @@ _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -21 addi.d $s6, $s1, 88 addi.d $s7, $s1, 176 - pcalau12i $a0, %pc_hi20(.LCPI157_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI157_0) - pcalau12i $a0, %pc_hi20(.LCPI157_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI157_1) - pcalau12i $a0, %pc_hi20(.LCPI157_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI157_2) - pcalau12i $a0, %pc_hi20(.LCPI157_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI157_4) - pcalau12i $a0, %pc_hi20(.LCPI157_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI157_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -61307,186 +61253,186 @@ _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB157_5: # Parent Loop BB157_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 22 addi.w $a1, $a1, 22 @@ -61501,12 +61447,12 @@ _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB157_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -61514,20 +61460,20 @@ _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB157_8: # %._crit_edge.us # in Loop: Header=BB157_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB157_3 # %bb.9: # in Loop: Header=BB157_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -61539,19 +61485,31 @@ _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB157_10: # %.preheader18.lr.ph.split blez $s0, .LBB157_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI157_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI157_0) - pcalau12i $a0, %pc_hi20(.LCPI157_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI157_1) - pcalau12i $a0, %pc_hi20(.LCPI157_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI157_2) - pcalau12i $a0, %pc_hi20(.LCPI157_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI157_4) - pcalau12i $a0, %pc_hi20(.LCPI157_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI157_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -61566,17 +61524,17 @@ _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB157_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB157_14: # Parent Loop BB157_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -61584,20 +61542,20 @@ _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB157_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB157_12 # %bb.16: # in Loop: Header=BB157_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -61610,16 +61568,25 @@ _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB157_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI157_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI157_0) - pcalau12i $a0, %pc_hi20(.LCPI157_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI157_1) - pcalau12i $a0, %pc_hi20(.LCPI157_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI157_2) - pcalau12i $a0, %pc_hi20(.LCPI157_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI157_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -61638,9 +61605,9 @@ _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB157_18 # %bb.20: # in Loop: Header=BB157_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -61679,12 +61646,14 @@ _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB157_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI157_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI157_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -61724,22 +61693,8 @@ _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc, .Lfunc_end157-_Z27test_for_loop_unroll_factorILi22EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc -.LCPI158_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI158_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI158_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI158_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI158_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI158_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc @@ -61799,19 +61754,31 @@ _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -20 addi.d $s6, $s1, 80 addi.d $s7, $s1, 168 - pcalau12i $a0, %pc_hi20(.LCPI158_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI158_0) - pcalau12i $a0, %pc_hi20(.LCPI158_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI158_1) - pcalau12i $a0, %pc_hi20(.LCPI158_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI158_2) - pcalau12i $a0, %pc_hi20(.LCPI158_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI158_4) - pcalau12i $a0, %pc_hi20(.LCPI158_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI158_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -61830,178 +61797,178 @@ _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB158_5: # Parent Loop BB158_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 21 addi.w $a1, $a1, 21 @@ -62016,12 +61983,12 @@ _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB158_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -62029,20 +61996,20 @@ _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB158_8: # %._crit_edge.us # in Loop: Header=BB158_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB158_3 # %bb.9: # in Loop: Header=BB158_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -62054,19 +62021,31 @@ _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB158_10: # %.preheader18.lr.ph.split blez $s0, .LBB158_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI158_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI158_0) - pcalau12i $a0, %pc_hi20(.LCPI158_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI158_1) - pcalau12i $a0, %pc_hi20(.LCPI158_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI158_2) - pcalau12i $a0, %pc_hi20(.LCPI158_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI158_4) - pcalau12i $a0, %pc_hi20(.LCPI158_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI158_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -62081,17 +62060,17 @@ _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB158_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB158_14: # Parent Loop BB158_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -62099,20 +62078,20 @@ _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB158_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB158_12 # %bb.16: # in Loop: Header=BB158_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -62125,16 +62104,25 @@ _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB158_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI158_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI158_0) - pcalau12i $a0, %pc_hi20(.LCPI158_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI158_1) - pcalau12i $a0, %pc_hi20(.LCPI158_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI158_2) - pcalau12i $a0, %pc_hi20(.LCPI158_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI158_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -62153,9 +62141,9 @@ _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB158_18 # %bb.20: # in Loop: Header=BB158_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -62194,12 +62182,14 @@ _Z27test_for_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB158_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI158_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI158_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -62317,22 +62307,8 @@ _ZN14for_loop_testsILi19EdE7do_testEPKdPKc: # @_ZN14for_loop_testsILi19EdE7do_te .size _ZN14for_loop_testsILi19EdE7do_testEPKdPKc, .Lfunc_end159-_ZN14for_loop_testsILi19EdE7do_testEPKdPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc -.LCPI160_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI160_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI160_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI160_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI160_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI160_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc @@ -62392,19 +62368,31 @@ _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -19 addi.d $s6, $s1, 80 addi.d $s7, $s1, 160 - pcalau12i $a0, %pc_hi20(.LCPI160_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI160_0) - pcalau12i $a0, %pc_hi20(.LCPI160_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI160_1) - pcalau12i $a0, %pc_hi20(.LCPI160_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI160_2) - pcalau12i $a0, %pc_hi20(.LCPI160_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI160_4) - pcalau12i $a0, %pc_hi20(.LCPI160_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI160_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -62423,170 +62411,170 @@ _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB160_5: # Parent Loop BB160_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 20 addi.w $a1, $a1, 20 @@ -62601,12 +62589,12 @@ _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB160_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -62614,20 +62602,20 @@ _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB160_8: # %._crit_edge.us # in Loop: Header=BB160_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB160_3 # %bb.9: # in Loop: Header=BB160_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -62639,19 +62627,31 @@ _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB160_10: # %.preheader18.lr.ph.split blez $s0, .LBB160_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI160_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI160_0) - pcalau12i $a0, %pc_hi20(.LCPI160_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI160_1) - pcalau12i $a0, %pc_hi20(.LCPI160_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI160_2) - pcalau12i $a0, %pc_hi20(.LCPI160_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI160_4) - pcalau12i $a0, %pc_hi20(.LCPI160_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI160_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -62666,17 +62666,17 @@ _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB160_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB160_14: # Parent Loop BB160_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -62684,20 +62684,20 @@ _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB160_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB160_12 # %bb.16: # in Loop: Header=BB160_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -62710,16 +62710,25 @@ _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB160_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI160_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI160_0) - pcalau12i $a0, %pc_hi20(.LCPI160_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI160_1) - pcalau12i $a0, %pc_hi20(.LCPI160_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI160_2) - pcalau12i $a0, %pc_hi20(.LCPI160_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI160_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -62738,9 +62747,9 @@ _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB160_18 # %bb.20: # in Loop: Header=BB160_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -62779,12 +62788,14 @@ _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB160_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI160_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI160_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -62824,22 +62835,8 @@ _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc, .Lfunc_end160-_Z27test_for_loop_unroll_factorILi20EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc -.LCPI161_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI161_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI161_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI161_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI161_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI161_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc @@ -62899,19 +62896,31 @@ _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -18 addi.d $s6, $s1, 72 addi.d $s7, $s1, 152 - pcalau12i $a0, %pc_hi20(.LCPI161_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI161_0) - pcalau12i $a0, %pc_hi20(.LCPI161_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI161_1) - pcalau12i $a0, %pc_hi20(.LCPI161_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI161_2) - pcalau12i $a0, %pc_hi20(.LCPI161_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI161_4) - pcalau12i $a0, %pc_hi20(.LCPI161_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI161_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -62930,162 +62939,162 @@ _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB161_5: # Parent Loop BB161_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 19 addi.w $a1, $a1, 19 @@ -63100,12 +63109,12 @@ _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB161_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -63113,20 +63122,20 @@ _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB161_8: # %._crit_edge.us # in Loop: Header=BB161_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB161_3 # %bb.9: # in Loop: Header=BB161_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -63138,19 +63147,31 @@ _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB161_10: # %.preheader18.lr.ph.split blez $s0, .LBB161_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI161_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI161_0) - pcalau12i $a0, %pc_hi20(.LCPI161_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI161_1) - pcalau12i $a0, %pc_hi20(.LCPI161_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI161_2) - pcalau12i $a0, %pc_hi20(.LCPI161_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI161_4) - pcalau12i $a0, %pc_hi20(.LCPI161_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI161_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -63165,17 +63186,17 @@ _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB161_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB161_14: # Parent Loop BB161_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -63183,20 +63204,20 @@ _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB161_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB161_12 # %bb.16: # in Loop: Header=BB161_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -63209,16 +63230,25 @@ _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB161_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI161_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI161_0) - pcalau12i $a0, %pc_hi20(.LCPI161_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI161_1) - pcalau12i $a0, %pc_hi20(.LCPI161_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI161_2) - pcalau12i $a0, %pc_hi20(.LCPI161_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI161_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -63237,9 +63267,9 @@ _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB161_18 # %bb.20: # in Loop: Header=BB161_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -63278,12 +63308,14 @@ _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB161_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI161_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI161_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -63323,22 +63355,8 @@ _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc, .Lfunc_end161-_Z27test_for_loop_unroll_factorILi19EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc -.LCPI162_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI162_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI162_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI162_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI162_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI162_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc @@ -63398,19 +63416,31 @@ _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -17 addi.d $s6, $s1, 72 addi.d $s7, $s1, 144 - pcalau12i $a0, %pc_hi20(.LCPI162_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI162_0) - pcalau12i $a0, %pc_hi20(.LCPI162_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI162_1) - pcalau12i $a0, %pc_hi20(.LCPI162_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI162_2) - pcalau12i $a0, %pc_hi20(.LCPI162_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI162_4) - pcalau12i $a0, %pc_hi20(.LCPI162_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI162_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -63429,154 +63459,154 @@ _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB162_5: # Parent Loop BB162_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 18 addi.w $a1, $a1, 18 @@ -63591,12 +63621,12 @@ _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB162_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -63604,20 +63634,20 @@ _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB162_8: # %._crit_edge.us # in Loop: Header=BB162_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB162_3 # %bb.9: # in Loop: Header=BB162_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -63629,19 +63659,31 @@ _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB162_10: # %.preheader18.lr.ph.split blez $s0, .LBB162_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI162_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI162_0) - pcalau12i $a0, %pc_hi20(.LCPI162_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI162_1) - pcalau12i $a0, %pc_hi20(.LCPI162_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI162_2) - pcalau12i $a0, %pc_hi20(.LCPI162_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI162_4) - pcalau12i $a0, %pc_hi20(.LCPI162_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI162_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -63656,17 +63698,17 @@ _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB162_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB162_14: # Parent Loop BB162_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -63674,20 +63716,20 @@ _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB162_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB162_12 # %bb.16: # in Loop: Header=BB162_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -63700,16 +63742,25 @@ _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB162_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI162_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI162_0) - pcalau12i $a0, %pc_hi20(.LCPI162_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI162_1) - pcalau12i $a0, %pc_hi20(.LCPI162_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI162_2) - pcalau12i $a0, %pc_hi20(.LCPI162_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI162_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -63728,9 +63779,9 @@ _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB162_18 # %bb.20: # in Loop: Header=BB162_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -63769,12 +63820,14 @@ _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB162_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI162_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI162_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -63814,22 +63867,8 @@ _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc, .Lfunc_end162-_Z27test_for_loop_unroll_factorILi18EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc -.LCPI163_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI163_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI163_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI163_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI163_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI163_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc @@ -63889,19 +63928,31 @@ _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -16 addi.d $s6, $s1, 64 addi.d $s7, $s1, 136 - pcalau12i $a0, %pc_hi20(.LCPI163_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI163_0) - pcalau12i $a0, %pc_hi20(.LCPI163_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI163_1) - pcalau12i $a0, %pc_hi20(.LCPI163_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI163_2) - pcalau12i $a0, %pc_hi20(.LCPI163_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI163_4) - pcalau12i $a0, %pc_hi20(.LCPI163_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI163_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -63920,146 +63971,146 @@ _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB163_5: # Parent Loop BB163_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 17 addi.w $a1, $a1, 17 @@ -64074,12 +64125,12 @@ _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB163_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -64087,20 +64138,20 @@ _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB163_8: # %._crit_edge.us # in Loop: Header=BB163_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB163_3 # %bb.9: # in Loop: Header=BB163_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -64112,19 +64163,31 @@ _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB163_10: # %.preheader18.lr.ph.split blez $s0, .LBB163_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI163_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI163_0) - pcalau12i $a0, %pc_hi20(.LCPI163_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI163_1) - pcalau12i $a0, %pc_hi20(.LCPI163_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI163_2) - pcalau12i $a0, %pc_hi20(.LCPI163_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI163_4) - pcalau12i $a0, %pc_hi20(.LCPI163_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI163_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -64139,17 +64202,17 @@ _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB163_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB163_14: # Parent Loop BB163_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -64157,20 +64220,20 @@ _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB163_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB163_12 # %bb.16: # in Loop: Header=BB163_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -64183,16 +64246,25 @@ _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB163_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI163_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI163_0) - pcalau12i $a0, %pc_hi20(.LCPI163_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI163_1) - pcalau12i $a0, %pc_hi20(.LCPI163_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI163_2) - pcalau12i $a0, %pc_hi20(.LCPI163_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI163_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -64211,9 +64283,9 @@ _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB163_18 # %bb.20: # in Loop: Header=BB163_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -64252,12 +64324,14 @@ _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB163_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI163_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI163_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -64297,22 +64371,8 @@ _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc, .Lfunc_end163-_Z27test_for_loop_unroll_factorILi17EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc -.LCPI164_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI164_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI164_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI164_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI164_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI164_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc @@ -64372,19 +64432,31 @@ _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -15 addi.d $s6, $s1, 64 addi.d $s7, $s1, 128 - pcalau12i $a0, %pc_hi20(.LCPI164_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI164_0) - pcalau12i $a0, %pc_hi20(.LCPI164_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI164_1) - pcalau12i $a0, %pc_hi20(.LCPI164_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI164_2) - pcalau12i $a0, %pc_hi20(.LCPI164_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI164_4) - pcalau12i $a0, %pc_hi20(.LCPI164_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI164_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -64403,138 +64475,138 @@ _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB164_5: # Parent Loop BB164_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 16 addi.w $a1, $a1, 16 @@ -64549,12 +64621,12 @@ _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB164_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -64562,20 +64634,20 @@ _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB164_8: # %._crit_edge.us # in Loop: Header=BB164_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB164_3 # %bb.9: # in Loop: Header=BB164_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -64587,19 +64659,31 @@ _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB164_10: # %.preheader18.lr.ph.split blez $s0, .LBB164_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI164_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI164_0) - pcalau12i $a0, %pc_hi20(.LCPI164_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI164_1) - pcalau12i $a0, %pc_hi20(.LCPI164_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI164_2) - pcalau12i $a0, %pc_hi20(.LCPI164_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI164_4) - pcalau12i $a0, %pc_hi20(.LCPI164_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI164_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -64614,17 +64698,17 @@ _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB164_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB164_14: # Parent Loop BB164_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -64632,20 +64716,20 @@ _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB164_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB164_12 # %bb.16: # in Loop: Header=BB164_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -64658,16 +64742,25 @@ _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB164_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI164_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI164_0) - pcalau12i $a0, %pc_hi20(.LCPI164_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI164_1) - pcalau12i $a0, %pc_hi20(.LCPI164_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI164_2) - pcalau12i $a0, %pc_hi20(.LCPI164_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI164_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -64686,9 +64779,9 @@ _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB164_18 # %bb.20: # in Loop: Header=BB164_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -64727,12 +64820,14 @@ _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB164_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI164_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI164_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -64772,22 +64867,8 @@ _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc, .Lfunc_end164-_Z27test_for_loop_unroll_factorILi16EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc -.LCPI165_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI165_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI165_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI165_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI165_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI165_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc @@ -64847,19 +64928,31 @@ _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -14 addi.d $s6, $s1, 56 addi.d $s7, $s1, 120 - pcalau12i $a0, %pc_hi20(.LCPI165_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI165_0) - pcalau12i $a0, %pc_hi20(.LCPI165_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI165_1) - pcalau12i $a0, %pc_hi20(.LCPI165_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI165_2) - pcalau12i $a0, %pc_hi20(.LCPI165_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI165_4) - pcalau12i $a0, %pc_hi20(.LCPI165_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI165_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -64878,130 +64971,130 @@ _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB165_5: # Parent Loop BB165_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 15 addi.w $a1, $a1, 15 @@ -65016,12 +65109,12 @@ _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB165_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -65029,20 +65122,20 @@ _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB165_8: # %._crit_edge.us # in Loop: Header=BB165_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB165_3 # %bb.9: # in Loop: Header=BB165_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -65054,19 +65147,31 @@ _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB165_10: # %.preheader18.lr.ph.split blez $s0, .LBB165_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI165_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI165_0) - pcalau12i $a0, %pc_hi20(.LCPI165_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI165_1) - pcalau12i $a0, %pc_hi20(.LCPI165_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI165_2) - pcalau12i $a0, %pc_hi20(.LCPI165_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI165_4) - pcalau12i $a0, %pc_hi20(.LCPI165_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI165_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -65081,17 +65186,17 @@ _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB165_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB165_14: # Parent Loop BB165_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -65099,20 +65204,20 @@ _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB165_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB165_12 # %bb.16: # in Loop: Header=BB165_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -65125,16 +65230,25 @@ _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB165_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI165_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI165_0) - pcalau12i $a0, %pc_hi20(.LCPI165_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI165_1) - pcalau12i $a0, %pc_hi20(.LCPI165_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI165_2) - pcalau12i $a0, %pc_hi20(.LCPI165_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI165_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -65153,9 +65267,9 @@ _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB165_18 # %bb.20: # in Loop: Header=BB165_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -65194,12 +65308,14 @@ _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB165_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI165_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI165_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -65239,22 +65355,8 @@ _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc, .Lfunc_end165-_Z27test_for_loop_unroll_factorILi15EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc -.LCPI166_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI166_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI166_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI166_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI166_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI166_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc @@ -65314,19 +65416,31 @@ _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -13 addi.d $s6, $s1, 56 addi.d $s7, $s1, 112 - pcalau12i $a0, %pc_hi20(.LCPI166_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI166_0) - pcalau12i $a0, %pc_hi20(.LCPI166_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI166_1) - pcalau12i $a0, %pc_hi20(.LCPI166_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI166_2) - pcalau12i $a0, %pc_hi20(.LCPI166_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI166_4) - pcalau12i $a0, %pc_hi20(.LCPI166_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI166_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -65345,122 +65459,122 @@ _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB166_5: # Parent Loop BB166_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 14 addi.w $a1, $a1, 14 @@ -65475,12 +65589,12 @@ _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB166_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -65488,20 +65602,20 @@ _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB166_8: # %._crit_edge.us # in Loop: Header=BB166_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB166_3 # %bb.9: # in Loop: Header=BB166_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -65513,19 +65627,31 @@ _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB166_10: # %.preheader18.lr.ph.split blez $s0, .LBB166_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI166_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI166_0) - pcalau12i $a0, %pc_hi20(.LCPI166_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI166_1) - pcalau12i $a0, %pc_hi20(.LCPI166_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI166_2) - pcalau12i $a0, %pc_hi20(.LCPI166_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI166_4) - pcalau12i $a0, %pc_hi20(.LCPI166_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI166_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -65540,17 +65666,17 @@ _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB166_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB166_14: # Parent Loop BB166_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -65558,20 +65684,20 @@ _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB166_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB166_12 # %bb.16: # in Loop: Header=BB166_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -65584,16 +65710,25 @@ _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB166_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI166_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI166_0) - pcalau12i $a0, %pc_hi20(.LCPI166_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI166_1) - pcalau12i $a0, %pc_hi20(.LCPI166_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI166_2) - pcalau12i $a0, %pc_hi20(.LCPI166_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI166_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -65612,9 +65747,9 @@ _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB166_18 # %bb.20: # in Loop: Header=BB166_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -65653,12 +65788,14 @@ _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB166_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI166_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI166_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -65698,22 +65835,8 @@ _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc, .Lfunc_end166-_Z27test_for_loop_unroll_factorILi14EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc -.LCPI167_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI167_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI167_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI167_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI167_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI167_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc @@ -65773,19 +65896,31 @@ _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -12 addi.d $s6, $s1, 48 addi.d $s7, $s1, 104 - pcalau12i $a0, %pc_hi20(.LCPI167_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI167_0) - pcalau12i $a0, %pc_hi20(.LCPI167_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI167_1) - pcalau12i $a0, %pc_hi20(.LCPI167_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI167_2) - pcalau12i $a0, %pc_hi20(.LCPI167_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI167_4) - pcalau12i $a0, %pc_hi20(.LCPI167_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI167_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -65804,114 +65939,114 @@ _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB167_5: # Parent Loop BB167_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 13 addi.w $a1, $a1, 13 @@ -65926,12 +66061,12 @@ _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB167_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -65939,20 +66074,20 @@ _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB167_8: # %._crit_edge.us # in Loop: Header=BB167_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB167_3 # %bb.9: # in Loop: Header=BB167_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -65964,19 +66099,31 @@ _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB167_10: # %.preheader18.lr.ph.split blez $s0, .LBB167_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI167_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI167_0) - pcalau12i $a0, %pc_hi20(.LCPI167_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI167_1) - pcalau12i $a0, %pc_hi20(.LCPI167_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI167_2) - pcalau12i $a0, %pc_hi20(.LCPI167_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI167_4) - pcalau12i $a0, %pc_hi20(.LCPI167_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI167_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -65991,17 +66138,17 @@ _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB167_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB167_14: # Parent Loop BB167_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -66009,20 +66156,20 @@ _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB167_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB167_12 # %bb.16: # in Loop: Header=BB167_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -66035,16 +66182,25 @@ _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB167_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI167_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI167_0) - pcalau12i $a0, %pc_hi20(.LCPI167_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI167_1) - pcalau12i $a0, %pc_hi20(.LCPI167_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI167_2) - pcalau12i $a0, %pc_hi20(.LCPI167_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI167_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -66063,9 +66219,9 @@ _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB167_18 # %bb.20: # in Loop: Header=BB167_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -66104,12 +66260,14 @@ _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB167_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI167_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI167_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -66149,22 +66307,8 @@ _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc, .Lfunc_end167-_Z27test_for_loop_unroll_factorILi13EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc -.LCPI168_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI168_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI168_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI168_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI168_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI168_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc @@ -66224,19 +66368,31 @@ _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -11 addi.d $s6, $s1, 48 addi.d $s7, $s1, 96 - pcalau12i $a0, %pc_hi20(.LCPI168_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI168_0) - pcalau12i $a0, %pc_hi20(.LCPI168_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI168_1) - pcalau12i $a0, %pc_hi20(.LCPI168_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI168_2) - pcalau12i $a0, %pc_hi20(.LCPI168_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI168_4) - pcalau12i $a0, %pc_hi20(.LCPI168_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI168_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -66255,106 +66411,106 @@ _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB168_5: # Parent Loop BB168_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 12 addi.w $a1, $a1, 12 @@ -66369,12 +66525,12 @@ _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB168_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -66382,20 +66538,20 @@ _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB168_8: # %._crit_edge.us # in Loop: Header=BB168_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB168_3 # %bb.9: # in Loop: Header=BB168_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -66407,19 +66563,31 @@ _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB168_10: # %.preheader17.lr.ph.split blez $s0, .LBB168_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI168_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI168_0) - pcalau12i $a0, %pc_hi20(.LCPI168_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI168_1) - pcalau12i $a0, %pc_hi20(.LCPI168_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI168_2) - pcalau12i $a0, %pc_hi20(.LCPI168_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI168_4) - pcalau12i $a0, %pc_hi20(.LCPI168_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI168_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -66434,17 +66602,17 @@ _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB168_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB168_14: # Parent Loop BB168_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -66452,20 +66620,20 @@ _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB168_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB168_12 # %bb.16: # in Loop: Header=BB168_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -66478,16 +66646,25 @@ _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB168_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI168_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI168_0) - pcalau12i $a0, %pc_hi20(.LCPI168_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI168_1) - pcalau12i $a0, %pc_hi20(.LCPI168_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI168_2) - pcalau12i $a0, %pc_hi20(.LCPI168_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI168_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -66506,9 +66683,9 @@ _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB168_18 # %bb.20: # in Loop: Header=BB168_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -66547,12 +66724,14 @@ _Z27test_for_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB168_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI168_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI168_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -66673,22 +66852,8 @@ _ZN14for_loop_testsILi10EdE7do_testEPKdPKc: # @_ZN14for_loop_testsILi10EdE7do_te .size _ZN14for_loop_testsILi10EdE7do_testEPKdPKc, .Lfunc_end169-_ZN14for_loop_testsILi10EdE7do_testEPKdPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc -.LCPI170_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI170_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI170_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI170_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI170_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI170_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc @@ -66748,19 +66913,31 @@ _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -10 addi.d $s6, $s1, 40 addi.d $s7, $s1, 88 - pcalau12i $a0, %pc_hi20(.LCPI170_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI170_0) - pcalau12i $a0, %pc_hi20(.LCPI170_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI170_1) - pcalau12i $a0, %pc_hi20(.LCPI170_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI170_2) - pcalau12i $a0, %pc_hi20(.LCPI170_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI170_4) - pcalau12i $a0, %pc_hi20(.LCPI170_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI170_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -66779,98 +66956,98 @@ _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB170_5: # Parent Loop BB170_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 11 addi.w $a1, $a1, 11 @@ -66885,12 +67062,12 @@ _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB170_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -66898,20 +67075,20 @@ _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB170_8: # %._crit_edge.us # in Loop: Header=BB170_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB170_3 # %bb.9: # in Loop: Header=BB170_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -66923,19 +67100,31 @@ _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB170_10: # %.preheader17.lr.ph.split blez $s0, .LBB170_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI170_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI170_0) - pcalau12i $a0, %pc_hi20(.LCPI170_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI170_1) - pcalau12i $a0, %pc_hi20(.LCPI170_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI170_2) - pcalau12i $a0, %pc_hi20(.LCPI170_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI170_4) - pcalau12i $a0, %pc_hi20(.LCPI170_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI170_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -66950,17 +67139,17 @@ _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB170_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB170_14: # Parent Loop BB170_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -66968,20 +67157,20 @@ _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB170_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB170_12 # %bb.16: # in Loop: Header=BB170_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -66994,16 +67183,25 @@ _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB170_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI170_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI170_0) - pcalau12i $a0, %pc_hi20(.LCPI170_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI170_1) - pcalau12i $a0, %pc_hi20(.LCPI170_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI170_2) - pcalau12i $a0, %pc_hi20(.LCPI170_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI170_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -67022,9 +67220,9 @@ _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB170_18 # %bb.20: # in Loop: Header=BB170_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -67063,12 +67261,14 @@ _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB170_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI170_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI170_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -67108,22 +67308,8 @@ _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc, .Lfunc_end170-_Z27test_for_loop_unroll_factorILi11EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc -.LCPI171_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI171_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI171_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI171_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI171_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI171_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc @@ -67183,19 +67369,31 @@ _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f addi.w $s2, $s0, -9 addi.d $s6, $s1, 40 addi.d $s7, $s1, 80 - pcalau12i $a0, %pc_hi20(.LCPI171_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI171_0) - pcalau12i $a0, %pc_hi20(.LCPI171_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI171_1) - pcalau12i $a0, %pc_hi20(.LCPI171_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI171_2) - pcalau12i $a0, %pc_hi20(.LCPI171_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI171_4) - pcalau12i $a0, %pc_hi20(.LCPI171_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI171_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -67213,90 +67411,90 @@ _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a0, $zero move $a4, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB171_5: # Parent Loop BB171_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 10 addi.w $a2, $a2, 10 @@ -67311,12 +67509,12 @@ _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Parent Loop BB171_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a5, $a5, 8 @@ -67324,20 +67522,20 @@ _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB171_8: # %._crit_edge.us # in Loop: Header=BB171_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB171_3 # %bb.9: # in Loop: Header=BB171_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -67350,19 +67548,31 @@ _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB171_10: # %.preheader17.lr.ph.split blez $s0, .LBB171_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI171_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI171_0) - pcalau12i $a0, %pc_hi20(.LCPI171_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI171_1) - pcalau12i $a0, %pc_hi20(.LCPI171_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI171_2) - pcalau12i $a0, %pc_hi20(.LCPI171_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI171_4) - pcalau12i $a0, %pc_hi20(.LCPI171_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI171_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -67377,17 +67587,17 @@ _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # Child Loop BB171_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB171_14: # Parent Loop BB171_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -67395,20 +67605,20 @@ _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB171_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB171_12 # %bb.16: # in Loop: Header=BB171_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -67421,16 +67631,25 @@ _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .LBB171_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI171_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI171_0) - pcalau12i $a0, %pc_hi20(.LCPI171_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI171_1) - pcalau12i $a0, %pc_hi20(.LCPI171_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI171_2) - pcalau12i $a0, %pc_hi20(.LCPI171_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI171_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -67449,9 +67668,9 @@ _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB171_18 # %bb.20: # in Loop: Header=BB171_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -67490,12 +67709,14 @@ _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB171_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI171_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI171_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -67535,22 +67756,8 @@ _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_f .size _Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc, .Lfunc_end171-_Z27test_for_loop_unroll_factorILi10EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc -.LCPI172_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI172_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI172_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI172_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI172_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI172_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc @@ -67610,19 +67817,31 @@ _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa addi.w $s2, $s0, -8 addi.d $s6, $s1, 32 addi.d $s7, $s1, 72 - pcalau12i $a0, %pc_hi20(.LCPI172_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI172_0) - pcalau12i $a0, %pc_hi20(.LCPI172_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI172_1) - pcalau12i $a0, %pc_hi20(.LCPI172_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI172_2) - pcalau12i $a0, %pc_hi20(.LCPI172_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI172_4) - pcalau12i $a0, %pc_hi20(.LCPI172_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI172_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -67640,82 +67859,82 @@ _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a0, $zero move $a4, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB172_5: # Parent Loop BB172_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 9 addi.w $a2, $a2, 9 @@ -67730,12 +67949,12 @@ _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Parent Loop BB172_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a5, $a5, 8 @@ -67743,20 +67962,20 @@ _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB172_8: # %._crit_edge.us # in Loop: Header=BB172_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB172_3 # %bb.9: # in Loop: Header=BB172_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -67769,19 +67988,31 @@ _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB172_10: # %.preheader17.lr.ph.split blez $s0, .LBB172_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI172_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI172_0) - pcalau12i $a0, %pc_hi20(.LCPI172_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI172_1) - pcalau12i $a0, %pc_hi20(.LCPI172_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI172_2) - pcalau12i $a0, %pc_hi20(.LCPI172_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI172_4) - pcalau12i $a0, %pc_hi20(.LCPI172_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI172_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -67796,17 +68027,17 @@ _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Child Loop BB172_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB172_14: # Parent Loop BB172_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -67814,20 +68045,20 @@ _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB172_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB172_12 # %bb.16: # in Loop: Header=BB172_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -67840,16 +68071,25 @@ _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB172_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI172_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI172_0) - pcalau12i $a0, %pc_hi20(.LCPI172_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI172_1) - pcalau12i $a0, %pc_hi20(.LCPI172_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI172_2) - pcalau12i $a0, %pc_hi20(.LCPI172_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI172_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -67868,9 +68108,9 @@ _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB172_18 # %bb.20: # in Loop: Header=BB172_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -67909,12 +68149,14 @@ _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB172_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI172_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI172_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -67954,22 +68196,8 @@ _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc, .Lfunc_end172-_Z27test_for_loop_unroll_factorILi9EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc -.LCPI173_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI173_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI173_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI173_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI173_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI173_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc @@ -68029,19 +68257,31 @@ _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa addi.w $s2, $s0, -7 addi.d $s6, $s1, 32 addi.d $s7, $s1, 64 - pcalau12i $a0, %pc_hi20(.LCPI173_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI173_0) - pcalau12i $a0, %pc_hi20(.LCPI173_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI173_1) - pcalau12i $a0, %pc_hi20(.LCPI173_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI173_2) - pcalau12i $a0, %pc_hi20(.LCPI173_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI173_4) - pcalau12i $a0, %pc_hi20(.LCPI173_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI173_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -68059,74 +68299,74 @@ _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a0, $zero move $a4, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB173_5: # Parent Loop BB173_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 8 addi.w $a2, $a2, 8 @@ -68141,12 +68381,12 @@ _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Parent Loop BB173_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a5, $a5, 8 @@ -68154,20 +68394,20 @@ _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB173_8: # %._crit_edge.us # in Loop: Header=BB173_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB173_3 # %bb.9: # in Loop: Header=BB173_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -68180,19 +68420,31 @@ _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB173_10: # %.preheader17.lr.ph.split blez $s0, .LBB173_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI173_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI173_0) - pcalau12i $a0, %pc_hi20(.LCPI173_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI173_1) - pcalau12i $a0, %pc_hi20(.LCPI173_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI173_2) - pcalau12i $a0, %pc_hi20(.LCPI173_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI173_4) - pcalau12i $a0, %pc_hi20(.LCPI173_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI173_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -68207,17 +68459,17 @@ _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Child Loop BB173_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB173_14: # Parent Loop BB173_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -68225,20 +68477,20 @@ _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB173_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB173_12 # %bb.16: # in Loop: Header=BB173_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -68251,16 +68503,25 @@ _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB173_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI173_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI173_0) - pcalau12i $a0, %pc_hi20(.LCPI173_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI173_1) - pcalau12i $a0, %pc_hi20(.LCPI173_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI173_2) - pcalau12i $a0, %pc_hi20(.LCPI173_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI173_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -68279,9 +68540,9 @@ _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB173_18 # %bb.20: # in Loop: Header=BB173_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -68320,12 +68581,14 @@ _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB173_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI173_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI173_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -68365,22 +68628,8 @@ _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc, .Lfunc_end173-_Z27test_for_loop_unroll_factorILi8EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc -.LCPI174_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI174_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI174_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI174_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI174_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI174_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc @@ -68440,19 +68689,31 @@ _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa addi.w $s2, $s0, -6 addi.d $s6, $s1, 24 addi.d $s7, $s1, 56 - pcalau12i $a0, %pc_hi20(.LCPI174_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI174_0) - pcalau12i $a0, %pc_hi20(.LCPI174_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI174_1) - pcalau12i $a0, %pc_hi20(.LCPI174_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI174_2) - pcalau12i $a0, %pc_hi20(.LCPI174_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI174_4) - pcalau12i $a0, %pc_hi20(.LCPI174_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI174_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -68470,66 +68731,66 @@ _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a0, $zero move $a4, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB174_5: # Parent Loop BB174_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 7 addi.w $a2, $a2, 7 @@ -68544,12 +68805,12 @@ _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Parent Loop BB174_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a5, $a5, 8 @@ -68557,20 +68818,20 @@ _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB174_8: # %._crit_edge.us # in Loop: Header=BB174_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB174_3 # %bb.9: # in Loop: Header=BB174_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -68583,19 +68844,31 @@ _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB174_10: # %.preheader17.lr.ph.split blez $s0, .LBB174_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI174_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI174_0) - pcalau12i $a0, %pc_hi20(.LCPI174_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI174_1) - pcalau12i $a0, %pc_hi20(.LCPI174_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI174_2) - pcalau12i $a0, %pc_hi20(.LCPI174_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI174_4) - pcalau12i $a0, %pc_hi20(.LCPI174_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI174_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -68610,17 +68883,17 @@ _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Child Loop BB174_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB174_14: # Parent Loop BB174_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -68628,20 +68901,20 @@ _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB174_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB174_12 # %bb.16: # in Loop: Header=BB174_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -68654,16 +68927,25 @@ _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB174_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI174_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI174_0) - pcalau12i $a0, %pc_hi20(.LCPI174_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI174_1) - pcalau12i $a0, %pc_hi20(.LCPI174_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI174_2) - pcalau12i $a0, %pc_hi20(.LCPI174_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI174_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -68682,9 +68964,9 @@ _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB174_18 # %bb.20: # in Loop: Header=BB174_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -68723,12 +69005,14 @@ _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB174_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI174_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI174_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -68768,22 +69052,8 @@ _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc, .Lfunc_end174-_Z27test_for_loop_unroll_factorILi7EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc -.LCPI175_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI175_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI175_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI175_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI175_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI175_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc @@ -68843,19 +69113,31 @@ _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa addi.w $s2, $s0, -5 addi.d $s6, $s1, 24 addi.d $s7, $s1, 48 - pcalau12i $a0, %pc_hi20(.LCPI175_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI175_0) - pcalau12i $a0, %pc_hi20(.LCPI175_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI175_1) - pcalau12i $a0, %pc_hi20(.LCPI175_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI175_2) - pcalau12i $a0, %pc_hi20(.LCPI175_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI175_4) - pcalau12i $a0, %pc_hi20(.LCPI175_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI175_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -68873,58 +69155,58 @@ _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a0, $zero move $a4, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB175_5: # Parent Loop BB175_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 6 addi.d $a3, $a3, 48 @@ -68939,12 +69221,12 @@ _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Parent Loop BB175_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a5, $a5, 8 @@ -68952,20 +69234,20 @@ _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB175_8: # %._crit_edge.us # in Loop: Header=BB175_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB175_3 # %bb.9: # in Loop: Header=BB175_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -68978,19 +69260,31 @@ _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB175_10: # %.preheader17.lr.ph.split blez $s0, .LBB175_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI175_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI175_0) - pcalau12i $a0, %pc_hi20(.LCPI175_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI175_1) - pcalau12i $a0, %pc_hi20(.LCPI175_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI175_2) - pcalau12i $a0, %pc_hi20(.LCPI175_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI175_4) - pcalau12i $a0, %pc_hi20(.LCPI175_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI175_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -69005,17 +69299,17 @@ _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Child Loop BB175_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB175_14: # Parent Loop BB175_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -69023,20 +69317,20 @@ _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB175_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB175_12 # %bb.16: # in Loop: Header=BB175_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -69049,16 +69343,25 @@ _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB175_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI175_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI175_0) - pcalau12i $a0, %pc_hi20(.LCPI175_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI175_1) - pcalau12i $a0, %pc_hi20(.LCPI175_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI175_2) - pcalau12i $a0, %pc_hi20(.LCPI175_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI175_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -69077,9 +69380,9 @@ _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB175_18 # %bb.20: # in Loop: Header=BB175_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -69118,12 +69421,14 @@ _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB175_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI175_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI175_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -69163,22 +69468,8 @@ _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc, .Lfunc_end175-_Z27test_for_loop_unroll_factorILi6EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc -.LCPI176_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI176_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI176_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI176_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI176_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI176_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc @@ -69238,19 +69529,31 @@ _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa addi.w $s2, $s0, -4 addi.d $s6, $s1, 16 addi.d $s7, $s1, 40 - pcalau12i $a0, %pc_hi20(.LCPI176_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI176_0) - pcalau12i $a0, %pc_hi20(.LCPI176_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI176_1) - pcalau12i $a0, %pc_hi20(.LCPI176_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI176_2) - pcalau12i $a0, %pc_hi20(.LCPI176_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI176_4) - pcalau12i $a0, %pc_hi20(.LCPI176_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI176_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -69268,50 +69571,50 @@ _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a0, $zero move $a4, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB176_5: # Parent Loop BB176_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 5 addi.d $a3, $a3, 40 @@ -69326,12 +69629,12 @@ _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Parent Loop BB176_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a5, $a5, 8 @@ -69339,20 +69642,20 @@ _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB176_8: # %._crit_edge.us # in Loop: Header=BB176_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB176_3 # %bb.9: # in Loop: Header=BB176_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -69365,19 +69668,31 @@ _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB176_10: # %.preheader17.lr.ph.split blez $s0, .LBB176_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI176_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI176_0) - pcalau12i $a0, %pc_hi20(.LCPI176_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI176_1) - pcalau12i $a0, %pc_hi20(.LCPI176_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI176_2) - pcalau12i $a0, %pc_hi20(.LCPI176_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI176_4) - pcalau12i $a0, %pc_hi20(.LCPI176_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI176_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -69392,17 +69707,17 @@ _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Child Loop BB176_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB176_14: # Parent Loop BB176_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -69410,20 +69725,20 @@ _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB176_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB176_12 # %bb.16: # in Loop: Header=BB176_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -69436,16 +69751,25 @@ _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB176_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI176_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI176_0) - pcalau12i $a0, %pc_hi20(.LCPI176_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI176_1) - pcalau12i $a0, %pc_hi20(.LCPI176_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI176_2) - pcalau12i $a0, %pc_hi20(.LCPI176_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI176_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -69464,9 +69788,9 @@ _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB176_18 # %bb.20: # in Loop: Header=BB176_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -69505,12 +69829,14 @@ _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB176_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI176_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI176_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -69550,22 +69876,8 @@ _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc, .Lfunc_end176-_Z27test_for_loop_unroll_factorILi5EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc -.LCPI177_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI177_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI177_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI177_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI177_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI177_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc @@ -69625,19 +69937,31 @@ _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa addi.w $s2, $s0, -3 addi.d $s6, $s1, 16 addi.d $s7, $s1, 32 - pcalau12i $a0, %pc_hi20(.LCPI177_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI177_0) - pcalau12i $a0, %pc_hi20(.LCPI177_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI177_1) - pcalau12i $a0, %pc_hi20(.LCPI177_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI177_2) - pcalau12i $a0, %pc_hi20(.LCPI177_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI177_4) - pcalau12i $a0, %pc_hi20(.LCPI177_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI177_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -69655,42 +69979,42 @@ _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a0, $zero move $a4, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB177_5: # Parent Loop BB177_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 4 addi.d $a3, $a3, 32 @@ -69705,12 +70029,12 @@ _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Parent Loop BB177_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a5, $a5, 8 @@ -69718,20 +70042,20 @@ _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB177_8: # %._crit_edge.us # in Loop: Header=BB177_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB177_3 # %bb.9: # in Loop: Header=BB177_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -69744,19 +70068,31 @@ _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB177_10: # %.preheader17.lr.ph.split blez $s0, .LBB177_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI177_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI177_0) - pcalau12i $a0, %pc_hi20(.LCPI177_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI177_1) - pcalau12i $a0, %pc_hi20(.LCPI177_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI177_2) - pcalau12i $a0, %pc_hi20(.LCPI177_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI177_4) - pcalau12i $a0, %pc_hi20(.LCPI177_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI177_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -69771,17 +70107,17 @@ _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Child Loop BB177_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB177_14: # Parent Loop BB177_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -69789,20 +70125,20 @@ _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB177_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB177_12 # %bb.16: # in Loop: Header=BB177_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -69815,16 +70151,25 @@ _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB177_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI177_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI177_0) - pcalau12i $a0, %pc_hi20(.LCPI177_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI177_1) - pcalau12i $a0, %pc_hi20(.LCPI177_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI177_2) - pcalau12i $a0, %pc_hi20(.LCPI177_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI177_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -69843,9 +70188,9 @@ _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB177_18 # %bb.20: # in Loop: Header=BB177_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -69884,12 +70229,14 @@ _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB177_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI177_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI177_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -69929,22 +70276,8 @@ _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc, .Lfunc_end177-_Z27test_for_loop_unroll_factorILi4EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc -.LCPI178_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI178_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI178_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI178_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI178_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI178_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc @@ -70004,19 +70337,31 @@ _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa addi.w $s2, $s0, -2 addi.d $s6, $s1, 16 addi.d $s7, $s1, 24 - pcalau12i $a0, %pc_hi20(.LCPI178_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI178_0) - pcalau12i $a0, %pc_hi20(.LCPI178_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI178_1) - pcalau12i $a0, %pc_hi20(.LCPI178_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI178_2) - pcalau12i $a0, %pc_hi20(.LCPI178_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI178_4) - pcalau12i $a0, %pc_hi20(.LCPI178_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI178_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -70034,34 +70379,34 @@ _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a0, $zero move $a5, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB178_5: # Parent Loop BB178_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a5 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 3 addi.d $a3, $a3, 24 @@ -70076,12 +70421,12 @@ _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Parent Loop BB178_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -70089,20 +70434,20 @@ _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB178_8: # %._crit_edge.us # in Loop: Header=BB178_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB178_3 # %bb.9: # in Loop: Header=BB178_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -70115,19 +70460,31 @@ _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB178_10: # %.preheader17.lr.ph.split blez $s0, .LBB178_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI178_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI178_0) - pcalau12i $a0, %pc_hi20(.LCPI178_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI178_1) - pcalau12i $a0, %pc_hi20(.LCPI178_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI178_2) - pcalau12i $a0, %pc_hi20(.LCPI178_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI178_4) - pcalau12i $a0, %pc_hi20(.LCPI178_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI178_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -70142,17 +70499,17 @@ _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Child Loop BB178_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB178_14: # Parent Loop BB178_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -70160,20 +70517,20 @@ _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB178_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB178_12 # %bb.16: # in Loop: Header=BB178_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -70186,16 +70543,25 @@ _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB178_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI178_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI178_0) - pcalau12i $a0, %pc_hi20(.LCPI178_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI178_1) - pcalau12i $a0, %pc_hi20(.LCPI178_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI178_2) - pcalau12i $a0, %pc_hi20(.LCPI178_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI178_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -70214,9 +70580,9 @@ _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB178_18 # %bb.20: # in Loop: Header=BB178_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -70255,12 +70621,14 @@ _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB178_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI178_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI178_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -70300,22 +70668,8 @@ _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc, .Lfunc_end178-_Z27test_for_loop_unroll_factorILi3EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc -.LCPI179_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI179_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI179_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI179_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI179_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI179_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc @@ -70375,19 +70729,31 @@ _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa addi.w $s5, $s0, -1 addi.d $s6, $s1, 8 addi.d $s7, $s1, 16 - pcalau12i $a0, %pc_hi20(.LCPI179_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI179_0) - pcalau12i $a0, %pc_hi20(.LCPI179_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI179_1) - pcalau12i $a0, %pc_hi20(.LCPI179_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI179_2) - pcalau12i $a0, %pc_hi20(.LCPI179_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI179_4) - pcalau12i $a0, %pc_hi20(.LCPI179_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI179_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -70405,26 +70771,26 @@ _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a0, $zero move $a5, $s7 move $a4, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB179_5: # Parent Loop BB179_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a3, $a5 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 2 addi.d $a4, $a4, 16 @@ -70439,12 +70805,12 @@ _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Parent Loop BB179_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a3, $a3, 8 @@ -70452,20 +70818,20 @@ _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB179_8: # %._crit_edge.us # in Loop: Header=BB179_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB179_3 # %bb.9: # in Loop: Header=BB179_4 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -70489,16 +70855,24 @@ _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa lu32i.d $a2, -393216 lu52i.d $a2, $a2, -1022 vreplgr2vr.d $vr4, $a2 - lu32i.d $a0, -268678 - pcalau12i $a2, %pc_hi20(.LCPI179_2) - fld.d $fs0, $a2, %pc_lo12(.LCPI179_2) - pcalau12i $a2, %pc_hi20(.LCPI179_4) - fld.d $fs1, $a2, %pc_lo12(.LCPI179_4) - pcalau12i $a2, %pc_hi20(.LCPI179_3) - fld.d $fs2, $a2, %pc_lo12(.LCPI179_3) - lu52i.d $a0, $a0, 1042 - vreplgr2vr.d $vr5, $a0 - movgr2fr.d $fs3, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + vreplgr2vr.d $vr5, $a2 + movgr2fr.d $fs0, $zero + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s5, $zero @@ -70523,16 +70897,16 @@ _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa vfadd.d $vr0, $vr0, $vr3 vfmadd.d $vr0, $vr0, $vr5, $vr4 vreplvei.d $vr1, $vr0, 0 - fadd.d $fa1, $fa1, $fs3 + fadd.d $fa1, $fa1, $fs0 vreplvei.d $vr0, $vr0, 1 - fmul.d $fa0, $fa0, $fs0 + fmul.d $fa0, $fa0, $fs1 fadd.d $fa0, $fa1, $fa0 fabs.d $fa2, $fa1 fdiv.d $fa1, $fa0, $fa1 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fsel $fa0, $fa0, $fa1, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB179_12 # %bb.14: # in Loop: Header=BB179_13 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -70546,16 +70920,25 @@ _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa b .LBB179_12 .LBB179_15: # %.preheader17.preheader fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI179_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI179_0) - pcalau12i $a0, %pc_hi20(.LCPI179_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI179_1) - pcalau12i $a0, %pc_hi20(.LCPI179_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI179_2) - pcalau12i $a0, %pc_hi20(.LCPI179_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI179_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -70574,9 +70957,9 @@ _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB179_16 # %bb.18: # in Loop: Header=BB179_17 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -70615,12 +70998,14 @@ _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB179_23: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI179_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI179_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -70660,22 +71045,8 @@ _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc, .Lfunc_end179-_Z27test_for_loop_unroll_factorILi2EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc -.LCPI180_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI180_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI180_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI180_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI180_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI180_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc,"axG",@progbits,_Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc,comdat - .weak _Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc + .weak _Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc # -- Begin function _Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc .p2align 5 .type _Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc,@function _Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc @@ -70729,19 +71100,31 @@ _Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # %bb.1: # %.preheader17.lr.ph blez $s0, .LBB180_8 # %bb.2: # %.preheader17.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI180_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI180_0) - pcalau12i $a0, %pc_hi20(.LCPI180_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI180_1) - pcalau12i $a0, %pc_hi20(.LCPI180_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI180_2) - pcalau12i $a0, %pc_hi20(.LCPI180_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI180_4) - pcalau12i $a0, %pc_hi20(.LCPI180_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI180_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s6, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s7, $zero @@ -70756,17 +71139,17 @@ _Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # Child Loop BB180_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB180_5: # Parent Loop BB180_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -70774,20 +71157,20 @@ _Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa # %bb.6: # %._crit_edge.us # in Loop: Header=BB180_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB180_3 # %bb.7: # in Loop: Header=BB180_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -70800,16 +71183,25 @@ _Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .LBB180_8: # %.preheader17.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI180_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI180_0) - pcalau12i $a0, %pc_hi20(.LCPI180_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI180_1) - pcalau12i $a0, %pc_hi20(.LCPI180_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI180_2) - pcalau12i $a0, %pc_hi20(.LCPI180_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI180_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s2, $zero @@ -70828,9 +71220,9 @@ _Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB180_9 # %bb.11: # in Loop: Header=BB180_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -70869,12 +71261,14 @@ _Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB180_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI180_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI180_5) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -70912,22 +71306,8 @@ _Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z27test_for_loop_unroll_fa .size _Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc, .Lfunc_end180-_Z27test_for_loop_unroll_factorILi1EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc -.LCPI181_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI181_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI181_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI181_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI181_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI181_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc @@ -70987,19 +71367,31 @@ _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -31 addi.d $s6, $s1, 128 addi.d $s7, $s1, 256 - pcalau12i $a0, %pc_hi20(.LCPI181_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI181_0) - pcalau12i $a0, %pc_hi20(.LCPI181_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI181_1) - pcalau12i $a0, %pc_hi20(.LCPI181_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI181_2) - pcalau12i $a0, %pc_hi20(.LCPI181_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI181_4) - pcalau12i $a0, %pc_hi20(.LCPI181_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI181_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -71018,266 +71410,266 @@ _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB181_5: # Parent Loop BB181_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -128 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 32 addi.w $a1, $a1, 32 @@ -71292,12 +71684,12 @@ _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB181_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -71305,20 +71697,20 @@ _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB181_8: # %._crit_edge.us # in Loop: Header=BB181_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB181_3 # %bb.9: # in Loop: Header=BB181_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -71330,19 +71722,31 @@ _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB181_10: # %.preheader19.lr.ph.split blez $s0, .LBB181_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI181_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI181_0) - pcalau12i $a0, %pc_hi20(.LCPI181_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI181_1) - pcalau12i $a0, %pc_hi20(.LCPI181_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI181_2) - pcalau12i $a0, %pc_hi20(.LCPI181_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI181_4) - pcalau12i $a0, %pc_hi20(.LCPI181_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI181_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -71357,17 +71761,17 @@ _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB181_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB181_14: # Parent Loop BB181_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -71375,20 +71779,20 @@ _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB181_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB181_12 # %bb.16: # in Loop: Header=BB181_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -71401,16 +71805,25 @@ _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB181_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI181_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI181_0) - pcalau12i $a0, %pc_hi20(.LCPI181_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI181_1) - pcalau12i $a0, %pc_hi20(.LCPI181_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI181_2) - pcalau12i $a0, %pc_hi20(.LCPI181_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI181_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -71429,9 +71842,9 @@ _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB181_18 # %bb.20: # in Loop: Header=BB181_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -71470,12 +71883,14 @@ _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB181_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI181_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI181_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -71515,22 +71930,8 @@ _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc, .Lfunc_end181-_Z29test_while_loop_unroll_factorILi32EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc -.LCPI182_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI182_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI182_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI182_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI182_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI182_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc @@ -71590,19 +71991,31 @@ _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -30 addi.d $s6, $s1, 120 addi.d $s7, $s1, 248 - pcalau12i $a0, %pc_hi20(.LCPI182_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI182_0) - pcalau12i $a0, %pc_hi20(.LCPI182_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI182_1) - pcalau12i $a0, %pc_hi20(.LCPI182_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI182_2) - pcalau12i $a0, %pc_hi20(.LCPI182_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI182_4) - pcalau12i $a0, %pc_hi20(.LCPI182_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI182_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -71621,258 +72034,258 @@ _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB182_5: # Parent Loop BB182_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 31 addi.w $a1, $a1, 31 @@ -71887,12 +72300,12 @@ _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB182_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -71900,20 +72313,20 @@ _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB182_8: # %._crit_edge.us # in Loop: Header=BB182_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB182_3 # %bb.9: # in Loop: Header=BB182_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -71925,19 +72338,31 @@ _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB182_10: # %.preheader19.lr.ph.split blez $s0, .LBB182_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI182_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI182_0) - pcalau12i $a0, %pc_hi20(.LCPI182_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI182_1) - pcalau12i $a0, %pc_hi20(.LCPI182_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI182_2) - pcalau12i $a0, %pc_hi20(.LCPI182_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI182_4) - pcalau12i $a0, %pc_hi20(.LCPI182_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI182_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -71952,17 +72377,17 @@ _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB182_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB182_14: # Parent Loop BB182_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -71970,20 +72395,20 @@ _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB182_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB182_12 # %bb.16: # in Loop: Header=BB182_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -71996,16 +72421,25 @@ _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB182_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI182_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI182_0) - pcalau12i $a0, %pc_hi20(.LCPI182_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI182_1) - pcalau12i $a0, %pc_hi20(.LCPI182_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI182_2) - pcalau12i $a0, %pc_hi20(.LCPI182_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI182_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -72024,9 +72458,9 @@ _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB182_18 # %bb.20: # in Loop: Header=BB182_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -72065,12 +72499,14 @@ _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB182_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI182_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI182_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -72110,22 +72546,8 @@ _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc, .Lfunc_end182-_Z29test_while_loop_unroll_factorILi31EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc -.LCPI183_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI183_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI183_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI183_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI183_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI183_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc @@ -72185,19 +72607,31 @@ _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -29 addi.d $s6, $s1, 120 addi.d $s7, $s1, 240 - pcalau12i $a0, %pc_hi20(.LCPI183_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI183_0) - pcalau12i $a0, %pc_hi20(.LCPI183_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI183_1) - pcalau12i $a0, %pc_hi20(.LCPI183_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI183_2) - pcalau12i $a0, %pc_hi20(.LCPI183_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI183_4) - pcalau12i $a0, %pc_hi20(.LCPI183_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI183_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -72216,250 +72650,250 @@ _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB183_5: # Parent Loop BB183_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 30 addi.w $a1, $a1, 30 @@ -72474,12 +72908,12 @@ _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB183_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -72487,20 +72921,20 @@ _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB183_8: # %._crit_edge.us # in Loop: Header=BB183_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB183_3 # %bb.9: # in Loop: Header=BB183_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -72512,19 +72946,31 @@ _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB183_10: # %.preheader19.lr.ph.split blez $s0, .LBB183_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI183_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI183_0) - pcalau12i $a0, %pc_hi20(.LCPI183_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI183_1) - pcalau12i $a0, %pc_hi20(.LCPI183_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI183_2) - pcalau12i $a0, %pc_hi20(.LCPI183_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI183_4) - pcalau12i $a0, %pc_hi20(.LCPI183_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI183_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -72539,17 +72985,17 @@ _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB183_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB183_14: # Parent Loop BB183_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -72557,20 +73003,20 @@ _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB183_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB183_12 # %bb.16: # in Loop: Header=BB183_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -72583,16 +73029,25 @@ _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB183_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI183_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI183_0) - pcalau12i $a0, %pc_hi20(.LCPI183_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI183_1) - pcalau12i $a0, %pc_hi20(.LCPI183_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI183_2) - pcalau12i $a0, %pc_hi20(.LCPI183_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI183_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -72611,9 +73066,9 @@ _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB183_18 # %bb.20: # in Loop: Header=BB183_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -72652,12 +73107,14 @@ _Z29test_while_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB183_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI183_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI183_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -72776,22 +73233,8 @@ _ZN16while_loop_testsILi28EdE7do_testEPKdPKc: # @_ZN16while_loop_testsILi28EdE7d .size _ZN16while_loop_testsILi28EdE7do_testEPKdPKc, .Lfunc_end184-_ZN16while_loop_testsILi28EdE7do_testEPKdPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc -.LCPI185_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI185_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI185_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI185_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI185_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI185_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc @@ -72851,19 +73294,31 @@ _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -28 addi.d $s6, $s1, 112 addi.d $s7, $s1, 232 - pcalau12i $a0, %pc_hi20(.LCPI185_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI185_0) - pcalau12i $a0, %pc_hi20(.LCPI185_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI185_1) - pcalau12i $a0, %pc_hi20(.LCPI185_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI185_2) - pcalau12i $a0, %pc_hi20(.LCPI185_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI185_4) - pcalau12i $a0, %pc_hi20(.LCPI185_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI185_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -72882,242 +73337,242 @@ _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB185_5: # Parent Loop BB185_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 29 addi.w $a1, $a1, 29 @@ -73132,12 +73587,12 @@ _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB185_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -73145,20 +73600,20 @@ _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB185_8: # %._crit_edge.us # in Loop: Header=BB185_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB185_3 # %bb.9: # in Loop: Header=BB185_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -73170,19 +73625,31 @@ _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB185_10: # %.preheader19.lr.ph.split blez $s0, .LBB185_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI185_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI185_0) - pcalau12i $a0, %pc_hi20(.LCPI185_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI185_1) - pcalau12i $a0, %pc_hi20(.LCPI185_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI185_2) - pcalau12i $a0, %pc_hi20(.LCPI185_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI185_4) - pcalau12i $a0, %pc_hi20(.LCPI185_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI185_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -73197,17 +73664,17 @@ _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB185_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB185_14: # Parent Loop BB185_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -73215,20 +73682,20 @@ _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB185_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB185_12 # %bb.16: # in Loop: Header=BB185_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -73241,16 +73708,25 @@ _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB185_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI185_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI185_0) - pcalau12i $a0, %pc_hi20(.LCPI185_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI185_1) - pcalau12i $a0, %pc_hi20(.LCPI185_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI185_2) - pcalau12i $a0, %pc_hi20(.LCPI185_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI185_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -73269,9 +73745,9 @@ _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB185_18 # %bb.20: # in Loop: Header=BB185_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -73310,12 +73786,14 @@ _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB185_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI185_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI185_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -73355,22 +73833,8 @@ _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc, .Lfunc_end185-_Z29test_while_loop_unroll_factorILi29EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc -.LCPI186_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI186_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI186_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI186_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI186_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI186_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc @@ -73430,19 +73894,31 @@ _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -27 addi.d $s6, $s1, 112 addi.d $s7, $s1, 224 - pcalau12i $a0, %pc_hi20(.LCPI186_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI186_0) - pcalau12i $a0, %pc_hi20(.LCPI186_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI186_1) - pcalau12i $a0, %pc_hi20(.LCPI186_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI186_2) - pcalau12i $a0, %pc_hi20(.LCPI186_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI186_4) - pcalau12i $a0, %pc_hi20(.LCPI186_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI186_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -73461,234 +73937,234 @@ _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB186_5: # Parent Loop BB186_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 28 addi.w $a1, $a1, 28 @@ -73703,12 +74179,12 @@ _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB186_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -73716,20 +74192,20 @@ _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB186_8: # %._crit_edge.us # in Loop: Header=BB186_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB186_3 # %bb.9: # in Loop: Header=BB186_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -73741,19 +74217,31 @@ _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB186_10: # %.preheader19.lr.ph.split blez $s0, .LBB186_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI186_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI186_0) - pcalau12i $a0, %pc_hi20(.LCPI186_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI186_1) - pcalau12i $a0, %pc_hi20(.LCPI186_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI186_2) - pcalau12i $a0, %pc_hi20(.LCPI186_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI186_4) - pcalau12i $a0, %pc_hi20(.LCPI186_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI186_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -73768,17 +74256,17 @@ _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB186_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB186_14: # Parent Loop BB186_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -73786,20 +74274,20 @@ _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB186_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB186_12 # %bb.16: # in Loop: Header=BB186_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -73812,16 +74300,25 @@ _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB186_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI186_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI186_0) - pcalau12i $a0, %pc_hi20(.LCPI186_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI186_1) - pcalau12i $a0, %pc_hi20(.LCPI186_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI186_2) - pcalau12i $a0, %pc_hi20(.LCPI186_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI186_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -73840,9 +74337,9 @@ _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB186_18 # %bb.20: # in Loop: Header=BB186_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -73881,12 +74378,14 @@ _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB186_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI186_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI186_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -73926,22 +74425,8 @@ _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc, .Lfunc_end186-_Z29test_while_loop_unroll_factorILi28EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc -.LCPI187_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI187_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI187_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI187_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI187_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI187_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc @@ -74001,19 +74486,31 @@ _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -26 addi.d $s6, $s1, 104 addi.d $s7, $s1, 216 - pcalau12i $a0, %pc_hi20(.LCPI187_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI187_0) - pcalau12i $a0, %pc_hi20(.LCPI187_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI187_1) - pcalau12i $a0, %pc_hi20(.LCPI187_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI187_2) - pcalau12i $a0, %pc_hi20(.LCPI187_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI187_4) - pcalau12i $a0, %pc_hi20(.LCPI187_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI187_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -74032,226 +74529,226 @@ _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB187_5: # Parent Loop BB187_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 27 addi.w $a1, $a1, 27 @@ -74266,12 +74763,12 @@ _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB187_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -74279,20 +74776,20 @@ _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB187_8: # %._crit_edge.us # in Loop: Header=BB187_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB187_3 # %bb.9: # in Loop: Header=BB187_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -74304,19 +74801,31 @@ _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB187_10: # %.preheader19.lr.ph.split blez $s0, .LBB187_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI187_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI187_0) - pcalau12i $a0, %pc_hi20(.LCPI187_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI187_1) - pcalau12i $a0, %pc_hi20(.LCPI187_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI187_2) - pcalau12i $a0, %pc_hi20(.LCPI187_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI187_4) - pcalau12i $a0, %pc_hi20(.LCPI187_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI187_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -74331,17 +74840,17 @@ _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB187_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB187_14: # Parent Loop BB187_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -74349,20 +74858,20 @@ _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB187_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB187_12 # %bb.16: # in Loop: Header=BB187_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -74375,16 +74884,25 @@ _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB187_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI187_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI187_0) - pcalau12i $a0, %pc_hi20(.LCPI187_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI187_1) - pcalau12i $a0, %pc_hi20(.LCPI187_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI187_2) - pcalau12i $a0, %pc_hi20(.LCPI187_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI187_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -74403,9 +74921,9 @@ _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB187_18 # %bb.20: # in Loop: Header=BB187_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -74444,12 +74962,14 @@ _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB187_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI187_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI187_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -74489,22 +75009,8 @@ _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc, .Lfunc_end187-_Z29test_while_loop_unroll_factorILi27EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc -.LCPI188_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI188_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI188_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI188_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI188_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI188_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc @@ -74564,19 +75070,31 @@ _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -25 addi.d $s6, $s1, 104 addi.d $s7, $s1, 208 - pcalau12i $a0, %pc_hi20(.LCPI188_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI188_0) - pcalau12i $a0, %pc_hi20(.LCPI188_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI188_1) - pcalau12i $a0, %pc_hi20(.LCPI188_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI188_2) - pcalau12i $a0, %pc_hi20(.LCPI188_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI188_4) - pcalau12i $a0, %pc_hi20(.LCPI188_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI188_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -74595,218 +75113,218 @@ _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB188_5: # Parent Loop BB188_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 26 addi.w $a1, $a1, 26 @@ -74821,12 +75339,12 @@ _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB188_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -74834,20 +75352,20 @@ _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB188_8: # %._crit_edge.us # in Loop: Header=BB188_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB188_3 # %bb.9: # in Loop: Header=BB188_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -74859,19 +75377,31 @@ _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB188_10: # %.preheader19.lr.ph.split blez $s0, .LBB188_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI188_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI188_0) - pcalau12i $a0, %pc_hi20(.LCPI188_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI188_1) - pcalau12i $a0, %pc_hi20(.LCPI188_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI188_2) - pcalau12i $a0, %pc_hi20(.LCPI188_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI188_4) - pcalau12i $a0, %pc_hi20(.LCPI188_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI188_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -74886,17 +75416,17 @@ _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB188_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB188_14: # Parent Loop BB188_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -74904,20 +75434,20 @@ _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB188_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB188_12 # %bb.16: # in Loop: Header=BB188_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -74930,16 +75460,25 @@ _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB188_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI188_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI188_0) - pcalau12i $a0, %pc_hi20(.LCPI188_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI188_1) - pcalau12i $a0, %pc_hi20(.LCPI188_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI188_2) - pcalau12i $a0, %pc_hi20(.LCPI188_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI188_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -74958,9 +75497,9 @@ _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB188_18 # %bb.20: # in Loop: Header=BB188_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -74999,12 +75538,14 @@ _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB188_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI188_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI188_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -75044,22 +75585,8 @@ _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc, .Lfunc_end188-_Z29test_while_loop_unroll_factorILi26EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc -.LCPI189_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI189_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI189_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI189_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI189_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI189_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc @@ -75119,19 +75646,31 @@ _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -24 addi.d $s6, $s1, 96 addi.d $s7, $s1, 200 - pcalau12i $a0, %pc_hi20(.LCPI189_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI189_0) - pcalau12i $a0, %pc_hi20(.LCPI189_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI189_1) - pcalau12i $a0, %pc_hi20(.LCPI189_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI189_2) - pcalau12i $a0, %pc_hi20(.LCPI189_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI189_4) - pcalau12i $a0, %pc_hi20(.LCPI189_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI189_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -75150,210 +75689,210 @@ _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB189_5: # Parent Loop BB189_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 25 addi.w $a1, $a1, 25 @@ -75368,12 +75907,12 @@ _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB189_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -75381,20 +75920,20 @@ _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB189_8: # %._crit_edge.us # in Loop: Header=BB189_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB189_3 # %bb.9: # in Loop: Header=BB189_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -75406,19 +75945,31 @@ _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB189_10: # %.preheader19.lr.ph.split blez $s0, .LBB189_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI189_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI189_0) - pcalau12i $a0, %pc_hi20(.LCPI189_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI189_1) - pcalau12i $a0, %pc_hi20(.LCPI189_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI189_2) - pcalau12i $a0, %pc_hi20(.LCPI189_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI189_4) - pcalau12i $a0, %pc_hi20(.LCPI189_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI189_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -75433,17 +75984,17 @@ _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB189_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB189_14: # Parent Loop BB189_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -75451,20 +76002,20 @@ _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB189_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB189_12 # %bb.16: # in Loop: Header=BB189_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -75477,16 +76028,25 @@ _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB189_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI189_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI189_0) - pcalau12i $a0, %pc_hi20(.LCPI189_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI189_1) - pcalau12i $a0, %pc_hi20(.LCPI189_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI189_2) - pcalau12i $a0, %pc_hi20(.LCPI189_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI189_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -75505,9 +76065,9 @@ _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB189_18 # %bb.20: # in Loop: Header=BB189_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -75546,12 +76106,14 @@ _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB189_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI189_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI189_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -75591,22 +76153,8 @@ _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc, .Lfunc_end189-_Z29test_while_loop_unroll_factorILi25EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc -.LCPI190_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI190_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI190_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI190_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI190_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI190_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc @@ -75666,19 +76214,31 @@ _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -23 addi.d $s6, $s1, 96 addi.d $s7, $s1, 192 - pcalau12i $a0, %pc_hi20(.LCPI190_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI190_0) - pcalau12i $a0, %pc_hi20(.LCPI190_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI190_1) - pcalau12i $a0, %pc_hi20(.LCPI190_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI190_2) - pcalau12i $a0, %pc_hi20(.LCPI190_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI190_4) - pcalau12i $a0, %pc_hi20(.LCPI190_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI190_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -75697,202 +76257,202 @@ _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB190_5: # Parent Loop BB190_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 24 addi.w $a1, $a1, 24 @@ -75907,12 +76467,12 @@ _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB190_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -75920,20 +76480,20 @@ _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB190_8: # %._crit_edge.us # in Loop: Header=BB190_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB190_3 # %bb.9: # in Loop: Header=BB190_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -75945,19 +76505,31 @@ _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB190_10: # %.preheader19.lr.ph.split blez $s0, .LBB190_17 # %bb.11: # %.preheader19.us29.preheader - pcalau12i $a0, %pc_hi20(.LCPI190_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI190_0) - pcalau12i $a0, %pc_hi20(.LCPI190_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI190_1) - pcalau12i $a0, %pc_hi20(.LCPI190_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI190_2) - pcalau12i $a0, %pc_hi20(.LCPI190_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI190_4) - pcalau12i $a0, %pc_hi20(.LCPI190_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI190_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -75972,17 +76544,17 @@ _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB190_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB190_14: # Parent Loop BB190_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -75990,20 +76562,20 @@ _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us37 # in Loop: Header=BB190_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB190_12 # %bb.16: # in Loop: Header=BB190_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -76016,16 +76588,25 @@ _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB190_17: # %.preheader19.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI190_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI190_0) - pcalau12i $a0, %pc_hi20(.LCPI190_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI190_1) - pcalau12i $a0, %pc_hi20(.LCPI190_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI190_2) - pcalau12i $a0, %pc_hi20(.LCPI190_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI190_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -76044,9 +76625,9 @@ _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB190_18 # %bb.20: # in Loop: Header=BB190_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -76085,12 +76666,14 @@ _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB190_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI190_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI190_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -76130,22 +76713,8 @@ _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc, .Lfunc_end190-_Z29test_while_loop_unroll_factorILi24EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc -.LCPI191_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI191_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI191_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI191_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI191_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI191_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc @@ -76205,19 +76774,31 @@ _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -22 addi.d $s6, $s1, 88 addi.d $s7, $s1, 184 - pcalau12i $a0, %pc_hi20(.LCPI191_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI191_0) - pcalau12i $a0, %pc_hi20(.LCPI191_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI191_1) - pcalau12i $a0, %pc_hi20(.LCPI191_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI191_2) - pcalau12i $a0, %pc_hi20(.LCPI191_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI191_4) - pcalau12i $a0, %pc_hi20(.LCPI191_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI191_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -76236,194 +76817,194 @@ _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB191_5: # Parent Loop BB191_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 23 addi.w $a1, $a1, 23 @@ -76438,12 +77019,12 @@ _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB191_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -76451,20 +77032,20 @@ _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB191_8: # %._crit_edge.us # in Loop: Header=BB191_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB191_3 # %bb.9: # in Loop: Header=BB191_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -76476,19 +77057,31 @@ _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB191_10: # %.preheader18.lr.ph.split blez $s0, .LBB191_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI191_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI191_0) - pcalau12i $a0, %pc_hi20(.LCPI191_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI191_1) - pcalau12i $a0, %pc_hi20(.LCPI191_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI191_2) - pcalau12i $a0, %pc_hi20(.LCPI191_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI191_4) - pcalau12i $a0, %pc_hi20(.LCPI191_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI191_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -76503,17 +77096,17 @@ _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB191_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB191_14: # Parent Loop BB191_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -76521,20 +77114,20 @@ _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB191_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB191_12 # %bb.16: # in Loop: Header=BB191_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -76547,16 +77140,25 @@ _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB191_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI191_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI191_0) - pcalau12i $a0, %pc_hi20(.LCPI191_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI191_1) - pcalau12i $a0, %pc_hi20(.LCPI191_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI191_2) - pcalau12i $a0, %pc_hi20(.LCPI191_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI191_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -76575,9 +77177,9 @@ _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB191_18 # %bb.20: # in Loop: Header=BB191_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -76616,12 +77218,14 @@ _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB191_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI191_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI191_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -76661,22 +77265,8 @@ _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc, .Lfunc_end191-_Z29test_while_loop_unroll_factorILi23EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc -.LCPI192_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI192_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI192_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI192_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI192_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI192_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc @@ -76736,19 +77326,31 @@ _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -21 addi.d $s6, $s1, 88 addi.d $s7, $s1, 176 - pcalau12i $a0, %pc_hi20(.LCPI192_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI192_0) - pcalau12i $a0, %pc_hi20(.LCPI192_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI192_1) - pcalau12i $a0, %pc_hi20(.LCPI192_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI192_2) - pcalau12i $a0, %pc_hi20(.LCPI192_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI192_4) - pcalau12i $a0, %pc_hi20(.LCPI192_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI192_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -76767,186 +77369,186 @@ _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB192_5: # Parent Loop BB192_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 22 addi.w $a1, $a1, 22 @@ -76961,12 +77563,12 @@ _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB192_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -76974,20 +77576,20 @@ _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB192_8: # %._crit_edge.us # in Loop: Header=BB192_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB192_3 # %bb.9: # in Loop: Header=BB192_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -76999,19 +77601,31 @@ _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB192_10: # %.preheader18.lr.ph.split blez $s0, .LBB192_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI192_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI192_0) - pcalau12i $a0, %pc_hi20(.LCPI192_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI192_1) - pcalau12i $a0, %pc_hi20(.LCPI192_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI192_2) - pcalau12i $a0, %pc_hi20(.LCPI192_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI192_4) - pcalau12i $a0, %pc_hi20(.LCPI192_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI192_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -77026,17 +77640,17 @@ _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB192_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB192_14: # Parent Loop BB192_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -77044,20 +77658,20 @@ _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB192_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB192_12 # %bb.16: # in Loop: Header=BB192_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -77070,16 +77684,25 @@ _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB192_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI192_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI192_0) - pcalau12i $a0, %pc_hi20(.LCPI192_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI192_1) - pcalau12i $a0, %pc_hi20(.LCPI192_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI192_2) - pcalau12i $a0, %pc_hi20(.LCPI192_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI192_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -77098,9 +77721,9 @@ _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB192_18 # %bb.20: # in Loop: Header=BB192_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -77139,12 +77762,14 @@ _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB192_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI192_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI192_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -77184,22 +77809,8 @@ _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc, .Lfunc_end192-_Z29test_while_loop_unroll_factorILi22EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc -.LCPI193_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI193_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI193_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI193_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI193_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI193_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc @@ -77259,19 +77870,31 @@ _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -20 addi.d $s6, $s1, 80 addi.d $s7, $s1, 168 - pcalau12i $a0, %pc_hi20(.LCPI193_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI193_0) - pcalau12i $a0, %pc_hi20(.LCPI193_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI193_1) - pcalau12i $a0, %pc_hi20(.LCPI193_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI193_2) - pcalau12i $a0, %pc_hi20(.LCPI193_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI193_4) - pcalau12i $a0, %pc_hi20(.LCPI193_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI193_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -77290,178 +77913,178 @@ _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB193_5: # Parent Loop BB193_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 21 addi.w $a1, $a1, 21 @@ -77476,12 +78099,12 @@ _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB193_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -77489,20 +78112,20 @@ _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB193_8: # %._crit_edge.us # in Loop: Header=BB193_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB193_3 # %bb.9: # in Loop: Header=BB193_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -77514,19 +78137,31 @@ _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB193_10: # %.preheader18.lr.ph.split blez $s0, .LBB193_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI193_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI193_0) - pcalau12i $a0, %pc_hi20(.LCPI193_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI193_1) - pcalau12i $a0, %pc_hi20(.LCPI193_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI193_2) - pcalau12i $a0, %pc_hi20(.LCPI193_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI193_4) - pcalau12i $a0, %pc_hi20(.LCPI193_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI193_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -77541,17 +78176,17 @@ _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB193_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB193_14: # Parent Loop BB193_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -77559,20 +78194,20 @@ _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB193_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB193_12 # %bb.16: # in Loop: Header=BB193_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -77585,16 +78220,25 @@ _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB193_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI193_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI193_0) - pcalau12i $a0, %pc_hi20(.LCPI193_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI193_1) - pcalau12i $a0, %pc_hi20(.LCPI193_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI193_2) - pcalau12i $a0, %pc_hi20(.LCPI193_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI193_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -77613,9 +78257,9 @@ _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB193_18 # %bb.20: # in Loop: Header=BB193_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -77654,12 +78298,14 @@ _Z29test_while_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB193_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI193_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI193_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -77777,22 +78423,8 @@ _ZN16while_loop_testsILi19EdE7do_testEPKdPKc: # @_ZN16while_loop_testsILi19EdE7d .size _ZN16while_loop_testsILi19EdE7do_testEPKdPKc, .Lfunc_end194-_ZN16while_loop_testsILi19EdE7do_testEPKdPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc -.LCPI195_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI195_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI195_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI195_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI195_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI195_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc @@ -77852,19 +78484,31 @@ _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -19 addi.d $s6, $s1, 80 addi.d $s7, $s1, 160 - pcalau12i $a0, %pc_hi20(.LCPI195_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI195_0) - pcalau12i $a0, %pc_hi20(.LCPI195_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI195_1) - pcalau12i $a0, %pc_hi20(.LCPI195_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI195_2) - pcalau12i $a0, %pc_hi20(.LCPI195_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI195_4) - pcalau12i $a0, %pc_hi20(.LCPI195_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI195_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -77883,170 +78527,170 @@ _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB195_5: # Parent Loop BB195_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 20 addi.w $a1, $a1, 20 @@ -78061,12 +78705,12 @@ _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB195_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -78074,20 +78718,20 @@ _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB195_8: # %._crit_edge.us # in Loop: Header=BB195_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB195_3 # %bb.9: # in Loop: Header=BB195_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -78099,19 +78743,31 @@ _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB195_10: # %.preheader18.lr.ph.split blez $s0, .LBB195_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI195_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI195_0) - pcalau12i $a0, %pc_hi20(.LCPI195_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI195_1) - pcalau12i $a0, %pc_hi20(.LCPI195_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI195_2) - pcalau12i $a0, %pc_hi20(.LCPI195_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI195_4) - pcalau12i $a0, %pc_hi20(.LCPI195_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI195_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -78126,17 +78782,17 @@ _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB195_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB195_14: # Parent Loop BB195_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -78144,20 +78800,20 @@ _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB195_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB195_12 # %bb.16: # in Loop: Header=BB195_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -78170,16 +78826,25 @@ _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB195_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI195_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI195_0) - pcalau12i $a0, %pc_hi20(.LCPI195_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI195_1) - pcalau12i $a0, %pc_hi20(.LCPI195_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI195_2) - pcalau12i $a0, %pc_hi20(.LCPI195_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI195_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -78198,9 +78863,9 @@ _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB195_18 # %bb.20: # in Loop: Header=BB195_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -78239,12 +78904,14 @@ _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB195_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI195_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI195_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -78284,22 +78951,8 @@ _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc, .Lfunc_end195-_Z29test_while_loop_unroll_factorILi20EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc -.LCPI196_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI196_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI196_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI196_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI196_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI196_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc @@ -78359,19 +79012,31 @@ _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -18 addi.d $s6, $s1, 72 addi.d $s7, $s1, 152 - pcalau12i $a0, %pc_hi20(.LCPI196_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI196_0) - pcalau12i $a0, %pc_hi20(.LCPI196_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI196_1) - pcalau12i $a0, %pc_hi20(.LCPI196_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI196_2) - pcalau12i $a0, %pc_hi20(.LCPI196_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI196_4) - pcalau12i $a0, %pc_hi20(.LCPI196_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI196_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -78390,162 +79055,162 @@ _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB196_5: # Parent Loop BB196_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 19 addi.w $a1, $a1, 19 @@ -78560,12 +79225,12 @@ _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB196_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -78573,20 +79238,20 @@ _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB196_8: # %._crit_edge.us # in Loop: Header=BB196_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB196_3 # %bb.9: # in Loop: Header=BB196_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -78598,19 +79263,31 @@ _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB196_10: # %.preheader18.lr.ph.split blez $s0, .LBB196_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI196_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI196_0) - pcalau12i $a0, %pc_hi20(.LCPI196_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI196_1) - pcalau12i $a0, %pc_hi20(.LCPI196_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI196_2) - pcalau12i $a0, %pc_hi20(.LCPI196_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI196_4) - pcalau12i $a0, %pc_hi20(.LCPI196_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI196_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -78625,17 +79302,17 @@ _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB196_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB196_14: # Parent Loop BB196_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -78643,20 +79320,20 @@ _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB196_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB196_12 # %bb.16: # in Loop: Header=BB196_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -78669,16 +79346,25 @@ _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB196_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI196_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI196_0) - pcalau12i $a0, %pc_hi20(.LCPI196_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI196_1) - pcalau12i $a0, %pc_hi20(.LCPI196_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI196_2) - pcalau12i $a0, %pc_hi20(.LCPI196_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI196_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -78697,9 +79383,9 @@ _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB196_18 # %bb.20: # in Loop: Header=BB196_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -78738,12 +79424,14 @@ _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB196_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI196_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI196_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -78783,22 +79471,8 @@ _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc, .Lfunc_end196-_Z29test_while_loop_unroll_factorILi19EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc -.LCPI197_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI197_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI197_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI197_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI197_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI197_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc @@ -78858,19 +79532,31 @@ _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -17 addi.d $s6, $s1, 72 addi.d $s7, $s1, 144 - pcalau12i $a0, %pc_hi20(.LCPI197_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI197_0) - pcalau12i $a0, %pc_hi20(.LCPI197_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI197_1) - pcalau12i $a0, %pc_hi20(.LCPI197_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI197_2) - pcalau12i $a0, %pc_hi20(.LCPI197_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI197_4) - pcalau12i $a0, %pc_hi20(.LCPI197_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI197_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -78889,154 +79575,154 @@ _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB197_5: # Parent Loop BB197_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 18 addi.w $a1, $a1, 18 @@ -79051,12 +79737,12 @@ _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB197_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -79064,20 +79750,20 @@ _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB197_8: # %._crit_edge.us # in Loop: Header=BB197_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB197_3 # %bb.9: # in Loop: Header=BB197_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -79089,19 +79775,31 @@ _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB197_10: # %.preheader18.lr.ph.split blez $s0, .LBB197_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI197_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI197_0) - pcalau12i $a0, %pc_hi20(.LCPI197_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI197_1) - pcalau12i $a0, %pc_hi20(.LCPI197_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI197_2) - pcalau12i $a0, %pc_hi20(.LCPI197_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI197_4) - pcalau12i $a0, %pc_hi20(.LCPI197_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI197_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -79116,17 +79814,17 @@ _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB197_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB197_14: # Parent Loop BB197_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -79134,20 +79832,20 @@ _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB197_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB197_12 # %bb.16: # in Loop: Header=BB197_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -79160,16 +79858,25 @@ _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB197_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI197_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI197_0) - pcalau12i $a0, %pc_hi20(.LCPI197_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI197_1) - pcalau12i $a0, %pc_hi20(.LCPI197_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI197_2) - pcalau12i $a0, %pc_hi20(.LCPI197_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI197_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -79188,9 +79895,9 @@ _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB197_18 # %bb.20: # in Loop: Header=BB197_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -79229,12 +79936,14 @@ _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB197_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI197_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI197_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -79274,22 +79983,8 @@ _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc, .Lfunc_end197-_Z29test_while_loop_unroll_factorILi18EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc -.LCPI198_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI198_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI198_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI198_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI198_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI198_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc @@ -79349,19 +80044,31 @@ _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -16 addi.d $s6, $s1, 64 addi.d $s7, $s1, 136 - pcalau12i $a0, %pc_hi20(.LCPI198_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI198_0) - pcalau12i $a0, %pc_hi20(.LCPI198_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI198_1) - pcalau12i $a0, %pc_hi20(.LCPI198_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI198_2) - pcalau12i $a0, %pc_hi20(.LCPI198_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI198_4) - pcalau12i $a0, %pc_hi20(.LCPI198_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI198_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -79380,146 +80087,146 @@ _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB198_5: # Parent Loop BB198_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 17 addi.w $a1, $a1, 17 @@ -79534,12 +80241,12 @@ _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB198_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -79547,20 +80254,20 @@ _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB198_8: # %._crit_edge.us # in Loop: Header=BB198_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB198_3 # %bb.9: # in Loop: Header=BB198_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -79572,19 +80279,31 @@ _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB198_10: # %.preheader18.lr.ph.split blez $s0, .LBB198_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI198_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI198_0) - pcalau12i $a0, %pc_hi20(.LCPI198_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI198_1) - pcalau12i $a0, %pc_hi20(.LCPI198_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI198_2) - pcalau12i $a0, %pc_hi20(.LCPI198_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI198_4) - pcalau12i $a0, %pc_hi20(.LCPI198_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI198_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -79599,17 +80318,17 @@ _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB198_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB198_14: # Parent Loop BB198_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -79617,20 +80336,20 @@ _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB198_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB198_12 # %bb.16: # in Loop: Header=BB198_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -79643,16 +80362,25 @@ _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB198_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI198_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI198_0) - pcalau12i $a0, %pc_hi20(.LCPI198_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI198_1) - pcalau12i $a0, %pc_hi20(.LCPI198_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI198_2) - pcalau12i $a0, %pc_hi20(.LCPI198_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI198_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -79671,9 +80399,9 @@ _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB198_18 # %bb.20: # in Loop: Header=BB198_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -79712,12 +80440,14 @@ _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB198_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI198_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI198_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -79757,22 +80487,8 @@ _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc, .Lfunc_end198-_Z29test_while_loop_unroll_factorILi17EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc -.LCPI199_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI199_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI199_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI199_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI199_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI199_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc @@ -79832,19 +80548,31 @@ _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -15 addi.d $s6, $s1, 64 addi.d $s7, $s1, 128 - pcalau12i $a0, %pc_hi20(.LCPI199_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI199_0) - pcalau12i $a0, %pc_hi20(.LCPI199_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI199_1) - pcalau12i $a0, %pc_hi20(.LCPI199_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI199_2) - pcalau12i $a0, %pc_hi20(.LCPI199_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI199_4) - pcalau12i $a0, %pc_hi20(.LCPI199_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI199_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -79863,138 +80591,138 @@ _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB199_5: # Parent Loop BB199_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 16 addi.w $a1, $a1, 16 @@ -80009,12 +80737,12 @@ _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB199_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -80022,20 +80750,20 @@ _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB199_8: # %._crit_edge.us # in Loop: Header=BB199_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB199_3 # %bb.9: # in Loop: Header=BB199_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -80047,19 +80775,31 @@ _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB199_10: # %.preheader18.lr.ph.split blez $s0, .LBB199_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI199_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI199_0) - pcalau12i $a0, %pc_hi20(.LCPI199_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI199_1) - pcalau12i $a0, %pc_hi20(.LCPI199_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI199_2) - pcalau12i $a0, %pc_hi20(.LCPI199_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI199_4) - pcalau12i $a0, %pc_hi20(.LCPI199_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI199_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -80074,17 +80814,17 @@ _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB199_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB199_14: # Parent Loop BB199_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -80092,20 +80832,20 @@ _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB199_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB199_12 # %bb.16: # in Loop: Header=BB199_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -80118,16 +80858,25 @@ _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB199_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI199_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI199_0) - pcalau12i $a0, %pc_hi20(.LCPI199_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI199_1) - pcalau12i $a0, %pc_hi20(.LCPI199_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI199_2) - pcalau12i $a0, %pc_hi20(.LCPI199_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI199_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -80146,9 +80895,9 @@ _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB199_18 # %bb.20: # in Loop: Header=BB199_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -80187,12 +80936,14 @@ _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB199_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI199_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI199_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -80232,22 +80983,8 @@ _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc, .Lfunc_end199-_Z29test_while_loop_unroll_factorILi16EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc -.LCPI200_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI200_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI200_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI200_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI200_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI200_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc @@ -80307,19 +81044,31 @@ _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -14 addi.d $s6, $s1, 56 addi.d $s7, $s1, 120 - pcalau12i $a0, %pc_hi20(.LCPI200_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI200_0) - pcalau12i $a0, %pc_hi20(.LCPI200_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI200_1) - pcalau12i $a0, %pc_hi20(.LCPI200_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI200_2) - pcalau12i $a0, %pc_hi20(.LCPI200_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI200_4) - pcalau12i $a0, %pc_hi20(.LCPI200_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI200_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -80338,130 +81087,130 @@ _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB200_5: # Parent Loop BB200_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 15 addi.w $a1, $a1, 15 @@ -80476,12 +81225,12 @@ _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB200_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -80489,20 +81238,20 @@ _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB200_8: # %._crit_edge.us # in Loop: Header=BB200_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB200_3 # %bb.9: # in Loop: Header=BB200_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -80514,19 +81263,31 @@ _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB200_10: # %.preheader18.lr.ph.split blez $s0, .LBB200_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI200_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI200_0) - pcalau12i $a0, %pc_hi20(.LCPI200_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI200_1) - pcalau12i $a0, %pc_hi20(.LCPI200_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI200_2) - pcalau12i $a0, %pc_hi20(.LCPI200_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI200_4) - pcalau12i $a0, %pc_hi20(.LCPI200_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI200_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -80541,17 +81302,17 @@ _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB200_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB200_14: # Parent Loop BB200_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -80559,20 +81320,20 @@ _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB200_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB200_12 # %bb.16: # in Loop: Header=BB200_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -80585,16 +81346,25 @@ _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB200_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI200_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI200_0) - pcalau12i $a0, %pc_hi20(.LCPI200_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI200_1) - pcalau12i $a0, %pc_hi20(.LCPI200_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI200_2) - pcalau12i $a0, %pc_hi20(.LCPI200_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI200_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -80613,9 +81383,9 @@ _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB200_18 # %bb.20: # in Loop: Header=BB200_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -80654,12 +81424,14 @@ _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB200_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI200_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI200_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -80699,22 +81471,8 @@ _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc, .Lfunc_end200-_Z29test_while_loop_unroll_factorILi15EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc -.LCPI201_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI201_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI201_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI201_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI201_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI201_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc @@ -80774,19 +81532,31 @@ _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -13 addi.d $s6, $s1, 56 addi.d $s7, $s1, 112 - pcalau12i $a0, %pc_hi20(.LCPI201_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI201_0) - pcalau12i $a0, %pc_hi20(.LCPI201_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI201_1) - pcalau12i $a0, %pc_hi20(.LCPI201_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI201_2) - pcalau12i $a0, %pc_hi20(.LCPI201_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI201_4) - pcalau12i $a0, %pc_hi20(.LCPI201_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI201_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -80805,122 +81575,122 @@ _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB201_5: # Parent Loop BB201_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 14 addi.w $a1, $a1, 14 @@ -80935,12 +81705,12 @@ _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB201_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -80948,20 +81718,20 @@ _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB201_8: # %._crit_edge.us # in Loop: Header=BB201_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB201_3 # %bb.9: # in Loop: Header=BB201_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -80973,19 +81743,31 @@ _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB201_10: # %.preheader18.lr.ph.split blez $s0, .LBB201_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI201_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI201_0) - pcalau12i $a0, %pc_hi20(.LCPI201_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI201_1) - pcalau12i $a0, %pc_hi20(.LCPI201_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI201_2) - pcalau12i $a0, %pc_hi20(.LCPI201_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI201_4) - pcalau12i $a0, %pc_hi20(.LCPI201_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI201_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -81000,17 +81782,17 @@ _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB201_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB201_14: # Parent Loop BB201_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -81018,20 +81800,20 @@ _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB201_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB201_12 # %bb.16: # in Loop: Header=BB201_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -81044,16 +81826,25 @@ _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB201_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI201_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI201_0) - pcalau12i $a0, %pc_hi20(.LCPI201_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI201_1) - pcalau12i $a0, %pc_hi20(.LCPI201_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI201_2) - pcalau12i $a0, %pc_hi20(.LCPI201_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI201_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -81072,9 +81863,9 @@ _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB201_18 # %bb.20: # in Loop: Header=BB201_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -81113,12 +81904,14 @@ _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB201_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI201_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI201_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -81158,22 +81951,8 @@ _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc, .Lfunc_end201-_Z29test_while_loop_unroll_factorILi14EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc -.LCPI202_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI202_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI202_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI202_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI202_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI202_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc @@ -81233,19 +82012,31 @@ _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -12 addi.d $s6, $s1, 48 addi.d $s7, $s1, 104 - pcalau12i $a0, %pc_hi20(.LCPI202_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI202_0) - pcalau12i $a0, %pc_hi20(.LCPI202_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI202_1) - pcalau12i $a0, %pc_hi20(.LCPI202_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI202_2) - pcalau12i $a0, %pc_hi20(.LCPI202_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI202_4) - pcalau12i $a0, %pc_hi20(.LCPI202_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI202_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -81264,114 +82055,114 @@ _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB202_5: # Parent Loop BB202_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 13 addi.w $a1, $a1, 13 @@ -81386,12 +82177,12 @@ _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB202_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -81399,20 +82190,20 @@ _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB202_8: # %._crit_edge.us # in Loop: Header=BB202_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB202_3 # %bb.9: # in Loop: Header=BB202_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -81424,19 +82215,31 @@ _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB202_10: # %.preheader18.lr.ph.split blez $s0, .LBB202_17 # %bb.11: # %.preheader18.us28.preheader - pcalau12i $a0, %pc_hi20(.LCPI202_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI202_0) - pcalau12i $a0, %pc_hi20(.LCPI202_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI202_1) - pcalau12i $a0, %pc_hi20(.LCPI202_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI202_2) - pcalau12i $a0, %pc_hi20(.LCPI202_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI202_4) - pcalau12i $a0, %pc_hi20(.LCPI202_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI202_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -81451,17 +82254,17 @@ _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB202_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB202_14: # Parent Loop BB202_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -81469,20 +82272,20 @@ _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us36 # in Loop: Header=BB202_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB202_12 # %bb.16: # in Loop: Header=BB202_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -81495,16 +82298,25 @@ _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB202_17: # %.preheader18.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI202_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI202_0) - pcalau12i $a0, %pc_hi20(.LCPI202_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI202_1) - pcalau12i $a0, %pc_hi20(.LCPI202_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI202_2) - pcalau12i $a0, %pc_hi20(.LCPI202_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI202_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -81523,9 +82335,9 @@ _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB202_18 # %bb.20: # in Loop: Header=BB202_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -81564,12 +82376,14 @@ _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB202_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI202_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI202_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -81609,22 +82423,8 @@ _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc, .Lfunc_end202-_Z29test_while_loop_unroll_factorILi13EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc -.LCPI203_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI203_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI203_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI203_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI203_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI203_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc @@ -81684,19 +82484,31 @@ _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -11 addi.d $s6, $s1, 48 addi.d $s7, $s1, 96 - pcalau12i $a0, %pc_hi20(.LCPI203_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI203_0) - pcalau12i $a0, %pc_hi20(.LCPI203_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI203_1) - pcalau12i $a0, %pc_hi20(.LCPI203_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI203_2) - pcalau12i $a0, %pc_hi20(.LCPI203_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI203_4) - pcalau12i $a0, %pc_hi20(.LCPI203_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI203_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -81715,106 +82527,106 @@ _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB203_5: # Parent Loop BB203_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 12 addi.w $a1, $a1, 12 @@ -81829,12 +82641,12 @@ _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB203_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -81842,20 +82654,20 @@ _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB203_8: # %._crit_edge.us # in Loop: Header=BB203_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB203_3 # %bb.9: # in Loop: Header=BB203_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -81867,19 +82679,31 @@ _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB203_10: # %.preheader17.lr.ph.split blez $s0, .LBB203_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI203_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI203_0) - pcalau12i $a0, %pc_hi20(.LCPI203_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI203_1) - pcalau12i $a0, %pc_hi20(.LCPI203_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI203_2) - pcalau12i $a0, %pc_hi20(.LCPI203_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI203_4) - pcalau12i $a0, %pc_hi20(.LCPI203_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI203_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -81894,17 +82718,17 @@ _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB203_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB203_14: # Parent Loop BB203_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -81912,20 +82736,20 @@ _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB203_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB203_12 # %bb.16: # in Loop: Header=BB203_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -81938,16 +82762,25 @@ _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB203_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI203_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI203_0) - pcalau12i $a0, %pc_hi20(.LCPI203_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI203_1) - pcalau12i $a0, %pc_hi20(.LCPI203_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI203_2) - pcalau12i $a0, %pc_hi20(.LCPI203_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI203_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -81966,9 +82799,9 @@ _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB203_18 # %bb.20: # in Loop: Header=BB203_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -82007,12 +82840,14 @@ _Z29test_while_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB203_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI203_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI203_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -82133,22 +82968,8 @@ _ZN16while_loop_testsILi10EdE7do_testEPKdPKc: # @_ZN16while_loop_testsILi10EdE7d .size _ZN16while_loop_testsILi10EdE7do_testEPKdPKc, .Lfunc_end204-_ZN16while_loop_testsILi10EdE7do_testEPKdPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc -.LCPI205_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI205_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI205_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI205_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI205_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI205_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc @@ -82208,19 +83029,31 @@ _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -10 addi.d $s6, $s1, 40 addi.d $s7, $s1, 88 - pcalau12i $a0, %pc_hi20(.LCPI205_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI205_0) - pcalau12i $a0, %pc_hi20(.LCPI205_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI205_1) - pcalau12i $a0, %pc_hi20(.LCPI205_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI205_2) - pcalau12i $a0, %pc_hi20(.LCPI205_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI205_4) - pcalau12i $a0, %pc_hi20(.LCPI205_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI205_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -82239,98 +83072,98 @@ _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a3, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB205_5: # Parent Loop BB205_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 11 addi.w $a1, $a1, 11 @@ -82345,12 +83178,12 @@ _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB205_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -82358,20 +83191,20 @@ _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB205_8: # %._crit_edge.us # in Loop: Header=BB205_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB205_3 # %bb.9: # in Loop: Header=BB205_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -82383,19 +83216,31 @@ _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB205_10: # %.preheader17.lr.ph.split blez $s0, .LBB205_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI205_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI205_0) - pcalau12i $a0, %pc_hi20(.LCPI205_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI205_1) - pcalau12i $a0, %pc_hi20(.LCPI205_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI205_2) - pcalau12i $a0, %pc_hi20(.LCPI205_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI205_4) - pcalau12i $a0, %pc_hi20(.LCPI205_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI205_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -82410,17 +83255,17 @@ _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB205_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB205_14: # Parent Loop BB205_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -82428,20 +83273,20 @@ _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB205_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB205_12 # %bb.16: # in Loop: Header=BB205_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -82454,16 +83299,25 @@ _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB205_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI205_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI205_0) - pcalau12i $a0, %pc_hi20(.LCPI205_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI205_1) - pcalau12i $a0, %pc_hi20(.LCPI205_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI205_2) - pcalau12i $a0, %pc_hi20(.LCPI205_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI205_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -82482,9 +83336,9 @@ _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB205_18 # %bb.20: # in Loop: Header=BB205_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -82523,12 +83377,14 @@ _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB205_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI205_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI205_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -82568,22 +83424,8 @@ _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc, .Lfunc_end205-_Z29test_while_loop_unroll_factorILi11EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc -.LCPI206_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI206_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI206_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI206_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI206_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI206_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc @@ -82643,19 +83485,31 @@ _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z29test_while_loop_unro addi.w $s2, $s0, -9 addi.d $s6, $s1, 40 addi.d $s7, $s1, 80 - pcalau12i $a0, %pc_hi20(.LCPI206_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI206_0) - pcalau12i $a0, %pc_hi20(.LCPI206_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI206_1) - pcalau12i $a0, %pc_hi20(.LCPI206_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI206_2) - pcalau12i $a0, %pc_hi20(.LCPI206_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI206_4) - pcalau12i $a0, %pc_hi20(.LCPI206_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI206_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -82673,90 +83527,90 @@ _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a0, $zero move $a4, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB206_5: # Parent Loop BB206_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 10 addi.w $a2, $a2, 10 @@ -82771,12 +83625,12 @@ _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Parent Loop BB206_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a5, $a5, 8 @@ -82784,20 +83638,20 @@ _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB206_8: # %._crit_edge.us # in Loop: Header=BB206_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB206_3 # %bb.9: # in Loop: Header=BB206_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -82810,19 +83664,31 @@ _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB206_10: # %.preheader17.lr.ph.split blez $s0, .LBB206_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI206_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI206_0) - pcalau12i $a0, %pc_hi20(.LCPI206_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI206_1) - pcalau12i $a0, %pc_hi20(.LCPI206_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI206_2) - pcalau12i $a0, %pc_hi20(.LCPI206_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI206_4) - pcalau12i $a0, %pc_hi20(.LCPI206_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI206_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -82837,17 +83703,17 @@ _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # Child Loop BB206_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB206_14: # Parent Loop BB206_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -82855,20 +83721,20 @@ _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z29test_while_loop_unro # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB206_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB206_12 # %bb.16: # in Loop: Header=BB206_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -82881,16 +83747,25 @@ _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .LBB206_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI206_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI206_0) - pcalau12i $a0, %pc_hi20(.LCPI206_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI206_1) - pcalau12i $a0, %pc_hi20(.LCPI206_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI206_2) - pcalau12i $a0, %pc_hi20(.LCPI206_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI206_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -82909,9 +83784,9 @@ _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z29test_while_loop_unro fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB206_18 # %bb.20: # in Loop: Header=BB206_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -82950,12 +83825,14 @@ _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z29test_while_loop_unro move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB206_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI206_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI206_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -82995,22 +83872,8 @@ _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z29test_while_loop_unro .size _Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc, .Lfunc_end206-_Z29test_while_loop_unroll_factorILi10EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc -.LCPI207_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI207_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI207_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI207_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI207_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI207_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc @@ -83070,19 +83933,31 @@ _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol addi.w $s2, $s0, -8 addi.d $s6, $s1, 32 addi.d $s7, $s1, 72 - pcalau12i $a0, %pc_hi20(.LCPI207_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI207_0) - pcalau12i $a0, %pc_hi20(.LCPI207_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI207_1) - pcalau12i $a0, %pc_hi20(.LCPI207_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI207_2) - pcalau12i $a0, %pc_hi20(.LCPI207_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI207_4) - pcalau12i $a0, %pc_hi20(.LCPI207_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI207_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -83100,82 +83975,82 @@ _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a0, $zero move $a4, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB207_5: # Parent Loop BB207_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 9 addi.w $a2, $a2, 9 @@ -83190,12 +84065,12 @@ _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Parent Loop BB207_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a5, $a5, 8 @@ -83203,20 +84078,20 @@ _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB207_8: # %._crit_edge.us # in Loop: Header=BB207_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB207_3 # %bb.9: # in Loop: Header=BB207_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -83229,19 +84104,31 @@ _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB207_10: # %.preheader17.lr.ph.split blez $s0, .LBB207_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI207_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI207_0) - pcalau12i $a0, %pc_hi20(.LCPI207_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI207_1) - pcalau12i $a0, %pc_hi20(.LCPI207_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI207_2) - pcalau12i $a0, %pc_hi20(.LCPI207_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI207_4) - pcalau12i $a0, %pc_hi20(.LCPI207_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI207_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -83256,17 +84143,17 @@ _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Child Loop BB207_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB207_14: # Parent Loop BB207_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -83274,20 +84161,20 @@ _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB207_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB207_12 # %bb.16: # in Loop: Header=BB207_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -83300,16 +84187,25 @@ _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB207_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI207_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI207_0) - pcalau12i $a0, %pc_hi20(.LCPI207_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI207_1) - pcalau12i $a0, %pc_hi20(.LCPI207_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI207_2) - pcalau12i $a0, %pc_hi20(.LCPI207_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI207_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -83328,9 +84224,9 @@ _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB207_18 # %bb.20: # in Loop: Header=BB207_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -83369,12 +84265,14 @@ _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB207_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI207_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI207_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -83414,22 +84312,8 @@ _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc, .Lfunc_end207-_Z29test_while_loop_unroll_factorILi9EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc -.LCPI208_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI208_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI208_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI208_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI208_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI208_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc @@ -83489,19 +84373,31 @@ _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol addi.w $s2, $s0, -7 addi.d $s6, $s1, 32 addi.d $s7, $s1, 64 - pcalau12i $a0, %pc_hi20(.LCPI208_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI208_0) - pcalau12i $a0, %pc_hi20(.LCPI208_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI208_1) - pcalau12i $a0, %pc_hi20(.LCPI208_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI208_2) - pcalau12i $a0, %pc_hi20(.LCPI208_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI208_4) - pcalau12i $a0, %pc_hi20(.LCPI208_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI208_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -83519,74 +84415,74 @@ _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a0, $zero move $a4, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB208_5: # Parent Loop BB208_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 8 addi.w $a2, $a2, 8 @@ -83601,12 +84497,12 @@ _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Parent Loop BB208_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a5, $a5, 8 @@ -83614,20 +84510,20 @@ _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB208_8: # %._crit_edge.us # in Loop: Header=BB208_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB208_3 # %bb.9: # in Loop: Header=BB208_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -83640,19 +84536,31 @@ _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB208_10: # %.preheader17.lr.ph.split blez $s0, .LBB208_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI208_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI208_0) - pcalau12i $a0, %pc_hi20(.LCPI208_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI208_1) - pcalau12i $a0, %pc_hi20(.LCPI208_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI208_2) - pcalau12i $a0, %pc_hi20(.LCPI208_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI208_4) - pcalau12i $a0, %pc_hi20(.LCPI208_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI208_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -83667,17 +84575,17 @@ _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Child Loop BB208_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB208_14: # Parent Loop BB208_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -83685,20 +84593,20 @@ _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB208_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB208_12 # %bb.16: # in Loop: Header=BB208_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -83711,16 +84619,25 @@ _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB208_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI208_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI208_0) - pcalau12i $a0, %pc_hi20(.LCPI208_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI208_1) - pcalau12i $a0, %pc_hi20(.LCPI208_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI208_2) - pcalau12i $a0, %pc_hi20(.LCPI208_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI208_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -83739,9 +84656,9 @@ _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB208_18 # %bb.20: # in Loop: Header=BB208_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -83780,12 +84697,14 @@ _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB208_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI208_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI208_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -83825,22 +84744,8 @@ _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc, .Lfunc_end208-_Z29test_while_loop_unroll_factorILi8EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc -.LCPI209_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI209_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI209_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI209_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI209_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI209_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc @@ -83900,19 +84805,31 @@ _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol addi.w $s2, $s0, -6 addi.d $s6, $s1, 24 addi.d $s7, $s1, 56 - pcalau12i $a0, %pc_hi20(.LCPI209_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI209_0) - pcalau12i $a0, %pc_hi20(.LCPI209_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI209_1) - pcalau12i $a0, %pc_hi20(.LCPI209_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI209_2) - pcalau12i $a0, %pc_hi20(.LCPI209_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI209_4) - pcalau12i $a0, %pc_hi20(.LCPI209_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI209_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -83930,66 +84847,66 @@ _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a0, $zero move $a4, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB209_5: # Parent Loop BB209_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 7 addi.w $a2, $a2, 7 @@ -84004,12 +84921,12 @@ _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Parent Loop BB209_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a5, $a5, 8 @@ -84017,20 +84934,20 @@ _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB209_8: # %._crit_edge.us # in Loop: Header=BB209_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB209_3 # %bb.9: # in Loop: Header=BB209_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -84043,19 +84960,31 @@ _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB209_10: # %.preheader17.lr.ph.split blez $s0, .LBB209_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI209_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI209_0) - pcalau12i $a0, %pc_hi20(.LCPI209_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI209_1) - pcalau12i $a0, %pc_hi20(.LCPI209_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI209_2) - pcalau12i $a0, %pc_hi20(.LCPI209_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI209_4) - pcalau12i $a0, %pc_hi20(.LCPI209_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI209_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -84070,17 +84999,17 @@ _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Child Loop BB209_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB209_14: # Parent Loop BB209_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -84088,20 +85017,20 @@ _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB209_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB209_12 # %bb.16: # in Loop: Header=BB209_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -84114,16 +85043,25 @@ _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB209_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI209_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI209_0) - pcalau12i $a0, %pc_hi20(.LCPI209_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI209_1) - pcalau12i $a0, %pc_hi20(.LCPI209_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI209_2) - pcalau12i $a0, %pc_hi20(.LCPI209_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI209_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -84142,9 +85080,9 @@ _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB209_18 # %bb.20: # in Loop: Header=BB209_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -84183,12 +85121,14 @@ _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB209_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI209_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI209_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -84228,22 +85168,8 @@ _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc, .Lfunc_end209-_Z29test_while_loop_unroll_factorILi7EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc -.LCPI210_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI210_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI210_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI210_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI210_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI210_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc @@ -84303,19 +85229,31 @@ _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol addi.w $s2, $s0, -5 addi.d $s6, $s1, 24 addi.d $s7, $s1, 48 - pcalau12i $a0, %pc_hi20(.LCPI210_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI210_0) - pcalau12i $a0, %pc_hi20(.LCPI210_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI210_1) - pcalau12i $a0, %pc_hi20(.LCPI210_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI210_2) - pcalau12i $a0, %pc_hi20(.LCPI210_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI210_4) - pcalau12i $a0, %pc_hi20(.LCPI210_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI210_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -84333,58 +85271,58 @@ _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a0, $zero move $a4, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB210_5: # Parent Loop BB210_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 6 addi.d $a3, $a3, 48 @@ -84399,12 +85337,12 @@ _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Parent Loop BB210_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a5, $a5, 8 @@ -84412,20 +85350,20 @@ _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB210_8: # %._crit_edge.us # in Loop: Header=BB210_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB210_3 # %bb.9: # in Loop: Header=BB210_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -84438,19 +85376,31 @@ _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB210_10: # %.preheader17.lr.ph.split blez $s0, .LBB210_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI210_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI210_0) - pcalau12i $a0, %pc_hi20(.LCPI210_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI210_1) - pcalau12i $a0, %pc_hi20(.LCPI210_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI210_2) - pcalau12i $a0, %pc_hi20(.LCPI210_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI210_4) - pcalau12i $a0, %pc_hi20(.LCPI210_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI210_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -84465,17 +85415,17 @@ _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Child Loop BB210_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB210_14: # Parent Loop BB210_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -84483,20 +85433,20 @@ _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB210_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB210_12 # %bb.16: # in Loop: Header=BB210_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -84509,16 +85459,25 @@ _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB210_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI210_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI210_0) - pcalau12i $a0, %pc_hi20(.LCPI210_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI210_1) - pcalau12i $a0, %pc_hi20(.LCPI210_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI210_2) - pcalau12i $a0, %pc_hi20(.LCPI210_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI210_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -84537,9 +85496,9 @@ _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB210_18 # %bb.20: # in Loop: Header=BB210_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -84578,12 +85537,14 @@ _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB210_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI210_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI210_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -84623,22 +85584,8 @@ _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc, .Lfunc_end210-_Z29test_while_loop_unroll_factorILi6EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc -.LCPI211_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI211_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI211_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI211_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI211_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI211_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc @@ -84698,19 +85645,31 @@ _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol addi.w $s2, $s0, -4 addi.d $s6, $s1, 16 addi.d $s7, $s1, 40 - pcalau12i $a0, %pc_hi20(.LCPI211_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI211_0) - pcalau12i $a0, %pc_hi20(.LCPI211_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI211_1) - pcalau12i $a0, %pc_hi20(.LCPI211_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI211_2) - pcalau12i $a0, %pc_hi20(.LCPI211_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI211_4) - pcalau12i $a0, %pc_hi20(.LCPI211_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI211_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -84728,50 +85687,50 @@ _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a0, $zero move $a4, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB211_5: # Parent Loop BB211_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 5 addi.d $a3, $a3, 40 @@ -84786,12 +85745,12 @@ _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Parent Loop BB211_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a5, $a5, 8 @@ -84799,20 +85758,20 @@ _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB211_8: # %._crit_edge.us # in Loop: Header=BB211_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB211_3 # %bb.9: # in Loop: Header=BB211_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -84825,19 +85784,31 @@ _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB211_10: # %.preheader17.lr.ph.split blez $s0, .LBB211_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI211_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI211_0) - pcalau12i $a0, %pc_hi20(.LCPI211_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI211_1) - pcalau12i $a0, %pc_hi20(.LCPI211_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI211_2) - pcalau12i $a0, %pc_hi20(.LCPI211_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI211_4) - pcalau12i $a0, %pc_hi20(.LCPI211_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI211_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -84852,17 +85823,17 @@ _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Child Loop BB211_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB211_14: # Parent Loop BB211_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -84870,20 +85841,20 @@ _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB211_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB211_12 # %bb.16: # in Loop: Header=BB211_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -84896,16 +85867,25 @@ _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB211_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI211_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI211_0) - pcalau12i $a0, %pc_hi20(.LCPI211_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI211_1) - pcalau12i $a0, %pc_hi20(.LCPI211_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI211_2) - pcalau12i $a0, %pc_hi20(.LCPI211_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI211_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -84924,9 +85904,9 @@ _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB211_18 # %bb.20: # in Loop: Header=BB211_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -84965,12 +85945,14 @@ _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB211_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI211_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI211_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -85010,22 +85992,8 @@ _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc, .Lfunc_end211-_Z29test_while_loop_unroll_factorILi5EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc -.LCPI212_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI212_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI212_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI212_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI212_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI212_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc @@ -85085,19 +86053,31 @@ _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol addi.w $s2, $s0, -3 addi.d $s6, $s1, 16 addi.d $s7, $s1, 32 - pcalau12i $a0, %pc_hi20(.LCPI212_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI212_0) - pcalau12i $a0, %pc_hi20(.LCPI212_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI212_1) - pcalau12i $a0, %pc_hi20(.LCPI212_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI212_2) - pcalau12i $a0, %pc_hi20(.LCPI212_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI212_4) - pcalau12i $a0, %pc_hi20(.LCPI212_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI212_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -85115,42 +86095,42 @@ _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a0, $zero move $a4, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB212_5: # Parent Loop BB212_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 4 addi.d $a3, $a3, 32 @@ -85165,12 +86145,12 @@ _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Parent Loop BB212_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a5, $a5, 8 @@ -85178,20 +86158,20 @@ _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB212_8: # %._crit_edge.us # in Loop: Header=BB212_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB212_3 # %bb.9: # in Loop: Header=BB212_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -85204,19 +86184,31 @@ _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB212_10: # %.preheader17.lr.ph.split blez $s0, .LBB212_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI212_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI212_0) - pcalau12i $a0, %pc_hi20(.LCPI212_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI212_1) - pcalau12i $a0, %pc_hi20(.LCPI212_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI212_2) - pcalau12i $a0, %pc_hi20(.LCPI212_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI212_4) - pcalau12i $a0, %pc_hi20(.LCPI212_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI212_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -85231,17 +86223,17 @@ _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Child Loop BB212_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB212_14: # Parent Loop BB212_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -85249,20 +86241,20 @@ _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB212_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB212_12 # %bb.16: # in Loop: Header=BB212_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -85275,16 +86267,25 @@ _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB212_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI212_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI212_0) - pcalau12i $a0, %pc_hi20(.LCPI212_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI212_1) - pcalau12i $a0, %pc_hi20(.LCPI212_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI212_2) - pcalau12i $a0, %pc_hi20(.LCPI212_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI212_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -85303,9 +86304,9 @@ _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB212_18 # %bb.20: # in Loop: Header=BB212_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -85344,12 +86345,14 @@ _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB212_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI212_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI212_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -85389,22 +86392,8 @@ _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc, .Lfunc_end212-_Z29test_while_loop_unroll_factorILi4EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc -.LCPI213_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI213_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI213_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI213_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI213_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI213_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc @@ -85464,19 +86453,31 @@ _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol addi.w $s2, $s0, -2 addi.d $s6, $s1, 16 addi.d $s7, $s1, 24 - pcalau12i $a0, %pc_hi20(.LCPI213_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI213_0) - pcalau12i $a0, %pc_hi20(.LCPI213_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI213_1) - pcalau12i $a0, %pc_hi20(.LCPI213_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI213_2) - pcalau12i $a0, %pc_hi20(.LCPI213_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI213_4) - pcalau12i $a0, %pc_hi20(.LCPI213_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI213_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -85494,34 +86495,34 @@ _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a0, $zero move $a5, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB213_5: # Parent Loop BB213_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a4, $a5 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 3 addi.d $a3, $a3, 24 @@ -85536,12 +86537,12 @@ _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Parent Loop BB213_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a4, $a4, 8 @@ -85549,20 +86550,20 @@ _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB213_8: # %._crit_edge.us # in Loop: Header=BB213_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB213_3 # %bb.9: # in Loop: Header=BB213_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -85575,19 +86576,31 @@ _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB213_10: # %.preheader17.lr.ph.split blez $s0, .LBB213_17 # %bb.11: # %.preheader17.us27.preheader - pcalau12i $a0, %pc_hi20(.LCPI213_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI213_0) - pcalau12i $a0, %pc_hi20(.LCPI213_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI213_1) - pcalau12i $a0, %pc_hi20(.LCPI213_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI213_2) - pcalau12i $a0, %pc_hi20(.LCPI213_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI213_4) - pcalau12i $a0, %pc_hi20(.LCPI213_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI213_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s6, $zero @@ -85602,17 +86615,17 @@ _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Child Loop BB213_14 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB213_14: # Parent Loop BB213_13 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -85620,20 +86633,20 @@ _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # %bb.15: # %._crit_edge.us35 # in Loop: Header=BB213_13 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB213_12 # %bb.16: # in Loop: Header=BB213_13 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -85646,16 +86659,25 @@ _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB213_17: # %.preheader17.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI213_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI213_0) - pcalau12i $a0, %pc_hi20(.LCPI213_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI213_1) - pcalau12i $a0, %pc_hi20(.LCPI213_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI213_2) - pcalau12i $a0, %pc_hi20(.LCPI213_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI213_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -85674,9 +86696,9 @@ _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB213_18 # %bb.20: # in Loop: Header=BB213_19 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -85715,12 +86737,14 @@ _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB213_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI213_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI213_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -85760,22 +86784,8 @@ _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc, .Lfunc_end213-_Z29test_while_loop_unroll_factorILi3EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc -.LCPI214_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI214_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI214_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI214_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI214_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI214_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc @@ -85835,19 +86845,31 @@ _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol addi.w $s5, $s0, -1 addi.d $s6, $s1, 8 addi.d $s7, $s1, 16 - pcalau12i $a0, %pc_hi20(.LCPI214_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI214_0) - pcalau12i $a0, %pc_hi20(.LCPI214_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI214_1) - pcalau12i $a0, %pc_hi20(.LCPI214_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI214_2) - pcalau12i $a0, %pc_hi20(.LCPI214_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI214_4) - pcalau12i $a0, %pc_hi20(.LCPI214_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI214_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s8, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -85865,26 +86887,26 @@ _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a0, $zero move $a5, $s7 move $a4, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB214_5: # Parent Loop BB214_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a3, $a5 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 2 addi.d $a4, $a4, 16 @@ -85899,12 +86921,12 @@ _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Parent Loop BB214_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, 1 addi.d $a3, $a3, 8 @@ -85912,20 +86934,20 @@ _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB214_8: # %._crit_edge.us # in Loop: Header=BB214_4 Depth=1 fld.d $fa1, $s8, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB214_3 # %bb.9: # in Loop: Header=BB214_4 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -85949,16 +86971,24 @@ _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol lu32i.d $a2, -393216 lu52i.d $a2, $a2, -1022 vreplgr2vr.d $vr4, $a2 - lu32i.d $a0, -268678 - pcalau12i $a2, %pc_hi20(.LCPI214_2) - fld.d $fs0, $a2, %pc_lo12(.LCPI214_2) - pcalau12i $a2, %pc_hi20(.LCPI214_4) - fld.d $fs1, $a2, %pc_lo12(.LCPI214_4) - pcalau12i $a2, %pc_hi20(.LCPI214_3) - fld.d $fs2, $a2, %pc_lo12(.LCPI214_3) - lu52i.d $a0, $a0, 1042 - vreplgr2vr.d $vr5, $a0 - movgr2fr.d $fs3, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + vreplgr2vr.d $vr5, $a2 + movgr2fr.d $fs0, $zero + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s5, $zero @@ -85983,16 +87013,16 @@ _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol vfadd.d $vr0, $vr0, $vr3 vfmadd.d $vr0, $vr0, $vr5, $vr4 vreplvei.d $vr1, $vr0, 0 - fadd.d $fa1, $fa1, $fs3 + fadd.d $fa1, $fa1, $fs0 vreplvei.d $vr0, $vr0, 1 - fmul.d $fa0, $fa0, $fs0 + fmul.d $fa0, $fa0, $fs1 fadd.d $fa0, $fa1, $fa0 fabs.d $fa2, $fa1 fdiv.d $fa1, $fa0, $fa1 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fsel $fa0, $fa0, $fa1, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB214_12 # %bb.14: # in Loop: Header=BB214_13 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -86006,16 +87036,25 @@ _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol b .LBB214_12 .LBB214_15: # %.preheader17.preheader fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI214_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI214_0) - pcalau12i $a0, %pc_hi20(.LCPI214_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI214_1) - pcalau12i $a0, %pc_hi20(.LCPI214_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI214_2) - pcalau12i $a0, %pc_hi20(.LCPI214_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI214_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -86034,9 +87073,9 @@ _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB214_16 # %bb.18: # in Loop: Header=BB214_17 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -86075,12 +87114,14 @@ _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB214_23: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI214_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI214_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -86120,22 +87161,8 @@ _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc, .Lfunc_end214-_Z29test_while_loop_unroll_factorILi2EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc -.LCPI215_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI215_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI215_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI215_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI215_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI215_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc,"axG",@progbits,_Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc,comdat - .weak _Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc + .weak _Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc # -- Begin function _Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc .p2align 5 .type _Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc,@function _Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc @@ -86189,19 +87216,31 @@ _Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # %bb.1: # %.preheader17.lr.ph blez $s0, .LBB215_8 # %bb.2: # %.preheader17.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI215_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI215_0) - pcalau12i $a0, %pc_hi20(.LCPI215_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI215_1) - pcalau12i $a0, %pc_hi20(.LCPI215_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI215_2) - pcalau12i $a0, %pc_hi20(.LCPI215_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI215_4) - pcalau12i $a0, %pc_hi20(.LCPI215_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI215_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s6, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s7, $zero @@ -86216,17 +87255,17 @@ _Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # Child Loop BB215_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB215_5: # Parent Loop BB215_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -86234,20 +87273,20 @@ _Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol # %bb.6: # %._crit_edge.us # in Loop: Header=BB215_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB215_3 # %bb.7: # in Loop: Header=BB215_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -86260,16 +87299,25 @@ _Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .LBB215_8: # %.preheader17.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI215_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI215_0) - pcalau12i $a0, %pc_hi20(.LCPI215_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI215_1) - pcalau12i $a0, %pc_hi20(.LCPI215_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI215_2) - pcalau12i $a0, %pc_hi20(.LCPI215_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI215_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s2, $zero @@ -86288,9 +87336,9 @@ _Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB215_9 # %bb.11: # in Loop: Header=BB215_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -86329,12 +87377,14 @@ _Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB215_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI215_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI215_5) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -86372,22 +87422,8 @@ _Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z29test_while_loop_unrol .size _Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc, .Lfunc_end215-_Z29test_while_loop_unroll_factorILi1EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc -.LCPI216_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI216_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI216_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI216_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI216_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI216_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc @@ -86449,19 +87485,31 @@ _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 128 addi.d $s7, $s1, 256 addi.d $s8, $s0, -32 - pcalau12i $a0, %pc_hi20(.LCPI216_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI216_0) - pcalau12i $a0, %pc_hi20(.LCPI216_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI216_1) - pcalau12i $a0, %pc_hi20(.LCPI216_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI216_2) - pcalau12i $a0, %pc_hi20(.LCPI216_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI216_4) - pcalau12i $a0, %pc_hi20(.LCPI216_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI216_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -86480,267 +87528,267 @@ _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB216_5: # Parent Loop BB216_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -128 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 32 addi.w $a0, $a0, 32 @@ -86758,12 +87806,12 @@ _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB216_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -86771,20 +87819,20 @@ _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB216_9: # %.loopexit.us # in Loop: Header=BB216_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB216_3 # %bb.10: # in Loop: Header=BB216_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -86796,19 +87844,31 @@ _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB216_11: # %.lr.ph.split blez $s0, .LBB216_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI216_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI216_0) - pcalau12i $a0, %pc_hi20(.LCPI216_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI216_1) - pcalau12i $a0, %pc_hi20(.LCPI216_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI216_2) - pcalau12i $a0, %pc_hi20(.LCPI216_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI216_4) - pcalau12i $a0, %pc_hi20(.LCPI216_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI216_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -86823,17 +87883,17 @@ _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB216_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB216_15: # Parent Loop BB216_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -86841,20 +87901,20 @@ _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us38 # in Loop: Header=BB216_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB216_13 # %bb.17: # in Loop: Header=BB216_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -86867,16 +87927,25 @@ _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB216_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI216_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI216_0) - pcalau12i $a0, %pc_hi20(.LCPI216_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI216_1) - pcalau12i $a0, %pc_hi20(.LCPI216_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI216_2) - pcalau12i $a0, %pc_hi20(.LCPI216_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI216_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -86895,9 +87964,9 @@ _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB216_19 # %bb.21: # in Loop: Header=BB216_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -86937,12 +88006,14 @@ _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB216_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI216_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI216_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -86982,22 +88053,8 @@ _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc, .Lfunc_end216-_Z26test_do_loop_unroll_factorILi32EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc -.LCPI217_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI217_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI217_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI217_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI217_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI217_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc @@ -87059,19 +88116,31 @@ _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 120 addi.d $s7, $s1, 248 addi.d $s8, $s0, -31 - pcalau12i $a0, %pc_hi20(.LCPI217_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI217_0) - pcalau12i $a0, %pc_hi20(.LCPI217_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI217_1) - pcalau12i $a0, %pc_hi20(.LCPI217_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI217_2) - pcalau12i $a0, %pc_hi20(.LCPI217_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI217_4) - pcalau12i $a0, %pc_hi20(.LCPI217_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI217_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -87090,259 +88159,259 @@ _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB217_5: # Parent Loop BB217_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 31 addi.w $a0, $a0, 31 @@ -87360,12 +88429,12 @@ _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB217_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -87373,20 +88442,20 @@ _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB217_9: # %.loopexit.us # in Loop: Header=BB217_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB217_3 # %bb.10: # in Loop: Header=BB217_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -87398,19 +88467,31 @@ _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB217_11: # %.lr.ph.split blez $s0, .LBB217_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI217_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI217_0) - pcalau12i $a0, %pc_hi20(.LCPI217_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI217_1) - pcalau12i $a0, %pc_hi20(.LCPI217_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI217_2) - pcalau12i $a0, %pc_hi20(.LCPI217_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI217_4) - pcalau12i $a0, %pc_hi20(.LCPI217_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI217_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -87425,17 +88506,17 @@ _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB217_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB217_15: # Parent Loop BB217_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -87443,20 +88524,20 @@ _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us38 # in Loop: Header=BB217_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB217_13 # %bb.17: # in Loop: Header=BB217_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -87469,16 +88550,25 @@ _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB217_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI217_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI217_0) - pcalau12i $a0, %pc_hi20(.LCPI217_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI217_1) - pcalau12i $a0, %pc_hi20(.LCPI217_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI217_2) - pcalau12i $a0, %pc_hi20(.LCPI217_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI217_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -87497,9 +88587,9 @@ _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB217_19 # %bb.21: # in Loop: Header=BB217_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -87539,12 +88629,14 @@ _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB217_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI217_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI217_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -87584,22 +88676,8 @@ _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc, .Lfunc_end217-_Z26test_do_loop_unroll_factorILi31EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc -.LCPI218_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI218_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI218_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI218_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI218_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI218_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc @@ -87661,19 +88739,31 @@ _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 120 addi.d $s7, $s1, 240 addi.d $s8, $s0, -30 - pcalau12i $a0, %pc_hi20(.LCPI218_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI218_0) - pcalau12i $a0, %pc_hi20(.LCPI218_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI218_1) - pcalau12i $a0, %pc_hi20(.LCPI218_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI218_2) - pcalau12i $a0, %pc_hi20(.LCPI218_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI218_4) - pcalau12i $a0, %pc_hi20(.LCPI218_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI218_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -87692,251 +88782,251 @@ _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB218_5: # Parent Loop BB218_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 30 addi.w $a0, $a0, 30 @@ -87954,12 +89044,12 @@ _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB218_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -87967,20 +89057,20 @@ _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB218_9: # %.loopexit.us # in Loop: Header=BB218_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB218_3 # %bb.10: # in Loop: Header=BB218_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -87992,19 +89082,31 @@ _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB218_11: # %.lr.ph.split blez $s0, .LBB218_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI218_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI218_0) - pcalau12i $a0, %pc_hi20(.LCPI218_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI218_1) - pcalau12i $a0, %pc_hi20(.LCPI218_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI218_2) - pcalau12i $a0, %pc_hi20(.LCPI218_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI218_4) - pcalau12i $a0, %pc_hi20(.LCPI218_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI218_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -88019,17 +89121,17 @@ _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB218_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB218_15: # Parent Loop BB218_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -88037,20 +89139,20 @@ _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us38 # in Loop: Header=BB218_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB218_13 # %bb.17: # in Loop: Header=BB218_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -88063,16 +89165,25 @@ _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB218_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI218_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI218_0) - pcalau12i $a0, %pc_hi20(.LCPI218_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI218_1) - pcalau12i $a0, %pc_hi20(.LCPI218_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI218_2) - pcalau12i $a0, %pc_hi20(.LCPI218_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI218_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -88091,9 +89202,9 @@ _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB218_19 # %bb.21: # in Loop: Header=BB218_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -88133,12 +89244,14 @@ _Z26test_do_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB218_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI218_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI218_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -88257,22 +89370,8 @@ _ZN13do_loop_testsILi28EdE7do_testEPKdPKc: # @_ZN13do_loop_testsILi28EdE7do_test .size _ZN13do_loop_testsILi28EdE7do_testEPKdPKc, .Lfunc_end219-_ZN13do_loop_testsILi28EdE7do_testEPKdPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc -.LCPI220_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI220_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI220_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI220_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI220_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI220_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc @@ -88334,19 +89433,31 @@ _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 112 addi.d $s7, $s1, 232 addi.d $s8, $s0, -29 - pcalau12i $a0, %pc_hi20(.LCPI220_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI220_0) - pcalau12i $a0, %pc_hi20(.LCPI220_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI220_1) - pcalau12i $a0, %pc_hi20(.LCPI220_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI220_2) - pcalau12i $a0, %pc_hi20(.LCPI220_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI220_4) - pcalau12i $a0, %pc_hi20(.LCPI220_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI220_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -88365,243 +89476,243 @@ _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB220_5: # Parent Loop BB220_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 29 addi.w $a0, $a0, 29 @@ -88619,12 +89730,12 @@ _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB220_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -88632,20 +89743,20 @@ _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB220_9: # %.loopexit.us # in Loop: Header=BB220_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB220_3 # %bb.10: # in Loop: Header=BB220_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -88657,19 +89768,31 @@ _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB220_11: # %.lr.ph.split blez $s0, .LBB220_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI220_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI220_0) - pcalau12i $a0, %pc_hi20(.LCPI220_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI220_1) - pcalau12i $a0, %pc_hi20(.LCPI220_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI220_2) - pcalau12i $a0, %pc_hi20(.LCPI220_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI220_4) - pcalau12i $a0, %pc_hi20(.LCPI220_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI220_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -88684,17 +89807,17 @@ _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB220_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB220_15: # Parent Loop BB220_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -88702,20 +89825,20 @@ _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us38 # in Loop: Header=BB220_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB220_13 # %bb.17: # in Loop: Header=BB220_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -88728,16 +89851,25 @@ _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB220_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI220_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI220_0) - pcalau12i $a0, %pc_hi20(.LCPI220_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI220_1) - pcalau12i $a0, %pc_hi20(.LCPI220_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI220_2) - pcalau12i $a0, %pc_hi20(.LCPI220_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI220_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -88756,9 +89888,9 @@ _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB220_19 # %bb.21: # in Loop: Header=BB220_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -88798,12 +89930,14 @@ _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB220_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI220_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI220_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -88843,22 +89977,8 @@ _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc, .Lfunc_end220-_Z26test_do_loop_unroll_factorILi29EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc -.LCPI221_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI221_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI221_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI221_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI221_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI221_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc @@ -88920,19 +90040,31 @@ _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 112 addi.d $s7, $s1, 224 addi.d $s8, $s0, -28 - pcalau12i $a0, %pc_hi20(.LCPI221_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI221_0) - pcalau12i $a0, %pc_hi20(.LCPI221_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI221_1) - pcalau12i $a0, %pc_hi20(.LCPI221_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI221_2) - pcalau12i $a0, %pc_hi20(.LCPI221_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI221_4) - pcalau12i $a0, %pc_hi20(.LCPI221_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI221_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -88951,235 +90083,235 @@ _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB221_5: # Parent Loop BB221_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 28 addi.w $a0, $a0, 28 @@ -89197,12 +90329,12 @@ _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB221_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -89210,20 +90342,20 @@ _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB221_9: # %.loopexit.us # in Loop: Header=BB221_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB221_3 # %bb.10: # in Loop: Header=BB221_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -89235,19 +90367,31 @@ _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB221_11: # %.lr.ph.split blez $s0, .LBB221_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI221_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI221_0) - pcalau12i $a0, %pc_hi20(.LCPI221_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI221_1) - pcalau12i $a0, %pc_hi20(.LCPI221_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI221_2) - pcalau12i $a0, %pc_hi20(.LCPI221_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI221_4) - pcalau12i $a0, %pc_hi20(.LCPI221_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI221_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -89262,17 +90406,17 @@ _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB221_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB221_15: # Parent Loop BB221_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -89280,20 +90424,20 @@ _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us38 # in Loop: Header=BB221_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB221_13 # %bb.17: # in Loop: Header=BB221_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -89306,16 +90450,25 @@ _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB221_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI221_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI221_0) - pcalau12i $a0, %pc_hi20(.LCPI221_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI221_1) - pcalau12i $a0, %pc_hi20(.LCPI221_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI221_2) - pcalau12i $a0, %pc_hi20(.LCPI221_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI221_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -89334,9 +90487,9 @@ _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB221_19 # %bb.21: # in Loop: Header=BB221_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -89376,12 +90529,14 @@ _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB221_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI221_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI221_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -89421,22 +90576,8 @@ _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc, .Lfunc_end221-_Z26test_do_loop_unroll_factorILi28EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc -.LCPI222_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI222_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI222_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI222_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI222_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI222_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc @@ -89498,19 +90639,31 @@ _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 104 addi.d $s7, $s1, 216 addi.d $s8, $s0, -27 - pcalau12i $a0, %pc_hi20(.LCPI222_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI222_0) - pcalau12i $a0, %pc_hi20(.LCPI222_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI222_1) - pcalau12i $a0, %pc_hi20(.LCPI222_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI222_2) - pcalau12i $a0, %pc_hi20(.LCPI222_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI222_4) - pcalau12i $a0, %pc_hi20(.LCPI222_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI222_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -89529,227 +90682,227 @@ _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB222_5: # Parent Loop BB222_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 27 addi.w $a0, $a0, 27 @@ -89767,12 +90920,12 @@ _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB222_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -89780,20 +90933,20 @@ _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB222_9: # %.loopexit.us # in Loop: Header=BB222_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB222_3 # %bb.10: # in Loop: Header=BB222_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -89805,19 +90958,31 @@ _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB222_11: # %.lr.ph.split blez $s0, .LBB222_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI222_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI222_0) - pcalau12i $a0, %pc_hi20(.LCPI222_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI222_1) - pcalau12i $a0, %pc_hi20(.LCPI222_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI222_2) - pcalau12i $a0, %pc_hi20(.LCPI222_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI222_4) - pcalau12i $a0, %pc_hi20(.LCPI222_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI222_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -89832,17 +90997,17 @@ _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB222_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB222_15: # Parent Loop BB222_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -89850,20 +91015,20 @@ _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us38 # in Loop: Header=BB222_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB222_13 # %bb.17: # in Loop: Header=BB222_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -89876,16 +91041,25 @@ _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB222_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI222_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI222_0) - pcalau12i $a0, %pc_hi20(.LCPI222_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI222_1) - pcalau12i $a0, %pc_hi20(.LCPI222_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI222_2) - pcalau12i $a0, %pc_hi20(.LCPI222_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI222_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -89904,9 +91078,9 @@ _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB222_19 # %bb.21: # in Loop: Header=BB222_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -89946,12 +91120,14 @@ _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB222_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI222_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI222_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -89991,22 +91167,8 @@ _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc, .Lfunc_end222-_Z26test_do_loop_unroll_factorILi27EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc -.LCPI223_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI223_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI223_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI223_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI223_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI223_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc @@ -90068,19 +91230,31 @@ _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 104 addi.d $s7, $s1, 208 addi.d $s8, $s0, -26 - pcalau12i $a0, %pc_hi20(.LCPI223_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI223_0) - pcalau12i $a0, %pc_hi20(.LCPI223_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI223_1) - pcalau12i $a0, %pc_hi20(.LCPI223_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI223_2) - pcalau12i $a0, %pc_hi20(.LCPI223_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI223_4) - pcalau12i $a0, %pc_hi20(.LCPI223_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI223_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -90099,219 +91273,219 @@ _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB223_5: # Parent Loop BB223_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 26 addi.w $a0, $a0, 26 @@ -90329,12 +91503,12 @@ _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB223_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -90342,20 +91516,20 @@ _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB223_9: # %.loopexit.us # in Loop: Header=BB223_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB223_3 # %bb.10: # in Loop: Header=BB223_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -90367,19 +91541,31 @@ _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB223_11: # %.lr.ph.split blez $s0, .LBB223_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI223_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI223_0) - pcalau12i $a0, %pc_hi20(.LCPI223_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI223_1) - pcalau12i $a0, %pc_hi20(.LCPI223_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI223_2) - pcalau12i $a0, %pc_hi20(.LCPI223_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI223_4) - pcalau12i $a0, %pc_hi20(.LCPI223_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI223_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -90394,17 +91580,17 @@ _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB223_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB223_15: # Parent Loop BB223_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -90412,20 +91598,20 @@ _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us38 # in Loop: Header=BB223_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB223_13 # %bb.17: # in Loop: Header=BB223_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -90438,16 +91624,25 @@ _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB223_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI223_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI223_0) - pcalau12i $a0, %pc_hi20(.LCPI223_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI223_1) - pcalau12i $a0, %pc_hi20(.LCPI223_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI223_2) - pcalau12i $a0, %pc_hi20(.LCPI223_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI223_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -90466,9 +91661,9 @@ _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB223_19 # %bb.21: # in Loop: Header=BB223_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -90508,12 +91703,14 @@ _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB223_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI223_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI223_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -90553,22 +91750,8 @@ _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc, .Lfunc_end223-_Z26test_do_loop_unroll_factorILi26EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc -.LCPI224_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI224_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI224_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI224_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI224_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI224_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc @@ -90630,19 +91813,31 @@ _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 96 addi.d $s7, $s1, 200 addi.d $s8, $s0, -25 - pcalau12i $a0, %pc_hi20(.LCPI224_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI224_0) - pcalau12i $a0, %pc_hi20(.LCPI224_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI224_1) - pcalau12i $a0, %pc_hi20(.LCPI224_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI224_2) - pcalau12i $a0, %pc_hi20(.LCPI224_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI224_4) - pcalau12i $a0, %pc_hi20(.LCPI224_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI224_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -90661,211 +91856,211 @@ _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB224_5: # Parent Loop BB224_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 25 addi.w $a0, $a0, 25 @@ -90883,12 +92078,12 @@ _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB224_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -90896,20 +92091,20 @@ _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB224_9: # %.loopexit.us # in Loop: Header=BB224_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB224_3 # %bb.10: # in Loop: Header=BB224_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -90921,19 +92116,31 @@ _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB224_11: # %.lr.ph.split blez $s0, .LBB224_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI224_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI224_0) - pcalau12i $a0, %pc_hi20(.LCPI224_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI224_1) - pcalau12i $a0, %pc_hi20(.LCPI224_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI224_2) - pcalau12i $a0, %pc_hi20(.LCPI224_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI224_4) - pcalau12i $a0, %pc_hi20(.LCPI224_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI224_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -90948,17 +92155,17 @@ _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB224_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB224_15: # Parent Loop BB224_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -90966,20 +92173,20 @@ _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us38 # in Loop: Header=BB224_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB224_13 # %bb.17: # in Loop: Header=BB224_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -90992,16 +92199,25 @@ _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB224_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI224_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI224_0) - pcalau12i $a0, %pc_hi20(.LCPI224_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI224_1) - pcalau12i $a0, %pc_hi20(.LCPI224_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI224_2) - pcalau12i $a0, %pc_hi20(.LCPI224_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI224_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -91020,9 +92236,9 @@ _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB224_19 # %bb.21: # in Loop: Header=BB224_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -91062,12 +92278,14 @@ _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB224_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI224_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI224_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -91107,22 +92325,8 @@ _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc, .Lfunc_end224-_Z26test_do_loop_unroll_factorILi25EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc -.LCPI225_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI225_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI225_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI225_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI225_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI225_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc @@ -91184,19 +92388,31 @@ _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 96 addi.d $s7, $s1, 192 addi.d $s8, $s0, -24 - pcalau12i $a0, %pc_hi20(.LCPI225_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI225_0) - pcalau12i $a0, %pc_hi20(.LCPI225_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI225_1) - pcalau12i $a0, %pc_hi20(.LCPI225_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI225_2) - pcalau12i $a0, %pc_hi20(.LCPI225_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI225_4) - pcalau12i $a0, %pc_hi20(.LCPI225_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI225_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -91215,203 +92431,203 @@ _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB225_5: # Parent Loop BB225_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 24 addi.w $a0, $a0, 24 @@ -91429,12 +92645,12 @@ _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB225_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -91442,20 +92658,20 @@ _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB225_9: # %.loopexit.us # in Loop: Header=BB225_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB225_3 # %bb.10: # in Loop: Header=BB225_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -91467,19 +92683,31 @@ _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB225_11: # %.lr.ph.split blez $s0, .LBB225_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI225_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI225_0) - pcalau12i $a0, %pc_hi20(.LCPI225_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI225_1) - pcalau12i $a0, %pc_hi20(.LCPI225_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI225_2) - pcalau12i $a0, %pc_hi20(.LCPI225_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI225_4) - pcalau12i $a0, %pc_hi20(.LCPI225_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI225_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -91494,17 +92722,17 @@ _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB225_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB225_15: # Parent Loop BB225_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -91512,20 +92740,20 @@ _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us38 # in Loop: Header=BB225_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB225_13 # %bb.17: # in Loop: Header=BB225_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -91538,16 +92766,25 @@ _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB225_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI225_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI225_0) - pcalau12i $a0, %pc_hi20(.LCPI225_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI225_1) - pcalau12i $a0, %pc_hi20(.LCPI225_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI225_2) - pcalau12i $a0, %pc_hi20(.LCPI225_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI225_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -91566,9 +92803,9 @@ _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB225_19 # %bb.21: # in Loop: Header=BB225_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -91608,12 +92845,14 @@ _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB225_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI225_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI225_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -91653,22 +92892,8 @@ _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc, .Lfunc_end225-_Z26test_do_loop_unroll_factorILi24EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc -.LCPI226_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI226_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI226_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI226_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI226_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI226_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc @@ -91730,19 +92955,31 @@ _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 88 addi.d $s7, $s1, 184 addi.d $s8, $s0, -23 - pcalau12i $a0, %pc_hi20(.LCPI226_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI226_0) - pcalau12i $a0, %pc_hi20(.LCPI226_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI226_1) - pcalau12i $a0, %pc_hi20(.LCPI226_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI226_2) - pcalau12i $a0, %pc_hi20(.LCPI226_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI226_4) - pcalau12i $a0, %pc_hi20(.LCPI226_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI226_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -91761,195 +92998,195 @@ _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB226_5: # Parent Loop BB226_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 23 addi.w $a0, $a0, 23 @@ -91967,12 +93204,12 @@ _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB226_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -91980,20 +93217,20 @@ _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB226_9: # %.loopexit.us # in Loop: Header=BB226_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB226_3 # %bb.10: # in Loop: Header=BB226_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -92005,19 +93242,31 @@ _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB226_11: # %.lr.ph.split blez $s0, .LBB226_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI226_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI226_0) - pcalau12i $a0, %pc_hi20(.LCPI226_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI226_1) - pcalau12i $a0, %pc_hi20(.LCPI226_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI226_2) - pcalau12i $a0, %pc_hi20(.LCPI226_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI226_4) - pcalau12i $a0, %pc_hi20(.LCPI226_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI226_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -92032,17 +93281,17 @@ _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB226_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB226_15: # Parent Loop BB226_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -92050,20 +93299,20 @@ _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us37 # in Loop: Header=BB226_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB226_13 # %bb.17: # in Loop: Header=BB226_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -92076,16 +93325,25 @@ _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB226_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI226_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI226_0) - pcalau12i $a0, %pc_hi20(.LCPI226_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI226_1) - pcalau12i $a0, %pc_hi20(.LCPI226_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI226_2) - pcalau12i $a0, %pc_hi20(.LCPI226_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI226_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -92104,9 +93362,9 @@ _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB226_19 # %bb.21: # in Loop: Header=BB226_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -92146,12 +93404,14 @@ _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB226_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI226_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI226_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -92191,22 +93451,8 @@ _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc, .Lfunc_end226-_Z26test_do_loop_unroll_factorILi23EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc -.LCPI227_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI227_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI227_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI227_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI227_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI227_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc @@ -92268,19 +93514,31 @@ _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 88 addi.d $s7, $s1, 176 addi.d $s8, $s0, -22 - pcalau12i $a0, %pc_hi20(.LCPI227_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI227_0) - pcalau12i $a0, %pc_hi20(.LCPI227_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI227_1) - pcalau12i $a0, %pc_hi20(.LCPI227_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI227_2) - pcalau12i $a0, %pc_hi20(.LCPI227_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI227_4) - pcalau12i $a0, %pc_hi20(.LCPI227_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI227_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -92299,187 +93557,187 @@ _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB227_5: # Parent Loop BB227_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 22 addi.w $a0, $a0, 22 @@ -92497,12 +93755,12 @@ _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB227_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -92510,20 +93768,20 @@ _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB227_9: # %.loopexit.us # in Loop: Header=BB227_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB227_3 # %bb.10: # in Loop: Header=BB227_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -92535,19 +93793,31 @@ _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB227_11: # %.lr.ph.split blez $s0, .LBB227_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI227_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI227_0) - pcalau12i $a0, %pc_hi20(.LCPI227_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI227_1) - pcalau12i $a0, %pc_hi20(.LCPI227_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI227_2) - pcalau12i $a0, %pc_hi20(.LCPI227_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI227_4) - pcalau12i $a0, %pc_hi20(.LCPI227_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI227_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -92562,17 +93832,17 @@ _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB227_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB227_15: # Parent Loop BB227_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -92580,20 +93850,20 @@ _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us37 # in Loop: Header=BB227_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB227_13 # %bb.17: # in Loop: Header=BB227_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -92606,16 +93876,25 @@ _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB227_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI227_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI227_0) - pcalau12i $a0, %pc_hi20(.LCPI227_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI227_1) - pcalau12i $a0, %pc_hi20(.LCPI227_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI227_2) - pcalau12i $a0, %pc_hi20(.LCPI227_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI227_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -92634,9 +93913,9 @@ _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB227_19 # %bb.21: # in Loop: Header=BB227_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -92676,12 +93955,14 @@ _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB227_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI227_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI227_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -92721,22 +94002,8 @@ _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc, .Lfunc_end227-_Z26test_do_loop_unroll_factorILi22EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc -.LCPI228_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI228_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI228_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI228_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI228_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI228_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc @@ -92798,19 +94065,31 @@ _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 80 addi.d $s7, $s1, 168 addi.d $s8, $s0, -21 - pcalau12i $a0, %pc_hi20(.LCPI228_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI228_0) - pcalau12i $a0, %pc_hi20(.LCPI228_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI228_1) - pcalau12i $a0, %pc_hi20(.LCPI228_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI228_2) - pcalau12i $a0, %pc_hi20(.LCPI228_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI228_4) - pcalau12i $a0, %pc_hi20(.LCPI228_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI228_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -92829,179 +94108,179 @@ _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB228_5: # Parent Loop BB228_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 21 addi.w $a0, $a0, 21 @@ -93019,12 +94298,12 @@ _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB228_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -93032,20 +94311,20 @@ _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB228_9: # %.loopexit.us # in Loop: Header=BB228_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB228_3 # %bb.10: # in Loop: Header=BB228_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -93057,19 +94336,31 @@ _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB228_11: # %.lr.ph.split blez $s0, .LBB228_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI228_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI228_0) - pcalau12i $a0, %pc_hi20(.LCPI228_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI228_1) - pcalau12i $a0, %pc_hi20(.LCPI228_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI228_2) - pcalau12i $a0, %pc_hi20(.LCPI228_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI228_4) - pcalau12i $a0, %pc_hi20(.LCPI228_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI228_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -93084,17 +94375,17 @@ _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB228_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB228_15: # Parent Loop BB228_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -93102,20 +94393,20 @@ _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us37 # in Loop: Header=BB228_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB228_13 # %bb.17: # in Loop: Header=BB228_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -93128,16 +94419,25 @@ _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB228_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI228_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI228_0) - pcalau12i $a0, %pc_hi20(.LCPI228_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI228_1) - pcalau12i $a0, %pc_hi20(.LCPI228_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI228_2) - pcalau12i $a0, %pc_hi20(.LCPI228_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI228_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -93156,9 +94456,9 @@ _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB228_19 # %bb.21: # in Loop: Header=BB228_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -93198,12 +94498,14 @@ _Z26test_do_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB228_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI228_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI228_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -93321,22 +94623,8 @@ _ZN13do_loop_testsILi19EdE7do_testEPKdPKc: # @_ZN13do_loop_testsILi19EdE7do_test .size _ZN13do_loop_testsILi19EdE7do_testEPKdPKc, .Lfunc_end229-_ZN13do_loop_testsILi19EdE7do_testEPKdPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc -.LCPI230_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI230_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI230_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI230_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI230_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI230_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc @@ -93398,19 +94686,31 @@ _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 80 addi.d $s7, $s1, 160 addi.d $s8, $s0, -20 - pcalau12i $a0, %pc_hi20(.LCPI230_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI230_0) - pcalau12i $a0, %pc_hi20(.LCPI230_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI230_1) - pcalau12i $a0, %pc_hi20(.LCPI230_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI230_2) - pcalau12i $a0, %pc_hi20(.LCPI230_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI230_4) - pcalau12i $a0, %pc_hi20(.LCPI230_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI230_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -93429,171 +94729,171 @@ _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB230_5: # Parent Loop BB230_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 20 addi.w $a0, $a0, 20 @@ -93611,12 +94911,12 @@ _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB230_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -93624,20 +94924,20 @@ _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB230_9: # %.loopexit.us # in Loop: Header=BB230_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB230_3 # %bb.10: # in Loop: Header=BB230_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -93649,19 +94949,31 @@ _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB230_11: # %.lr.ph.split blez $s0, .LBB230_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI230_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI230_0) - pcalau12i $a0, %pc_hi20(.LCPI230_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI230_1) - pcalau12i $a0, %pc_hi20(.LCPI230_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI230_2) - pcalau12i $a0, %pc_hi20(.LCPI230_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI230_4) - pcalau12i $a0, %pc_hi20(.LCPI230_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI230_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -93676,17 +94988,17 @@ _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB230_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB230_15: # Parent Loop BB230_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -93694,20 +95006,20 @@ _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us37 # in Loop: Header=BB230_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB230_13 # %bb.17: # in Loop: Header=BB230_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -93720,16 +95032,25 @@ _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB230_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI230_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI230_0) - pcalau12i $a0, %pc_hi20(.LCPI230_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI230_1) - pcalau12i $a0, %pc_hi20(.LCPI230_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI230_2) - pcalau12i $a0, %pc_hi20(.LCPI230_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI230_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -93748,9 +95069,9 @@ _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB230_19 # %bb.21: # in Loop: Header=BB230_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -93790,12 +95111,14 @@ _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB230_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI230_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI230_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -93835,22 +95158,8 @@ _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc, .Lfunc_end230-_Z26test_do_loop_unroll_factorILi20EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc -.LCPI231_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI231_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI231_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI231_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI231_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI231_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc @@ -93912,19 +95221,31 @@ _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 72 addi.d $s7, $s1, 152 addi.d $s8, $s0, -19 - pcalau12i $a0, %pc_hi20(.LCPI231_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI231_0) - pcalau12i $a0, %pc_hi20(.LCPI231_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI231_1) - pcalau12i $a0, %pc_hi20(.LCPI231_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI231_2) - pcalau12i $a0, %pc_hi20(.LCPI231_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI231_4) - pcalau12i $a0, %pc_hi20(.LCPI231_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI231_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -93943,163 +95264,163 @@ _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB231_5: # Parent Loop BB231_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 19 addi.w $a0, $a0, 19 @@ -94117,12 +95438,12 @@ _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB231_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -94130,20 +95451,20 @@ _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB231_9: # %.loopexit.us # in Loop: Header=BB231_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB231_3 # %bb.10: # in Loop: Header=BB231_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -94155,19 +95476,31 @@ _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB231_11: # %.lr.ph.split blez $s0, .LBB231_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI231_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI231_0) - pcalau12i $a0, %pc_hi20(.LCPI231_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI231_1) - pcalau12i $a0, %pc_hi20(.LCPI231_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI231_2) - pcalau12i $a0, %pc_hi20(.LCPI231_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI231_4) - pcalau12i $a0, %pc_hi20(.LCPI231_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI231_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -94182,17 +95515,17 @@ _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB231_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB231_15: # Parent Loop BB231_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -94200,20 +95533,20 @@ _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us37 # in Loop: Header=BB231_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB231_13 # %bb.17: # in Loop: Header=BB231_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -94226,16 +95559,25 @@ _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB231_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI231_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI231_0) - pcalau12i $a0, %pc_hi20(.LCPI231_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI231_1) - pcalau12i $a0, %pc_hi20(.LCPI231_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI231_2) - pcalau12i $a0, %pc_hi20(.LCPI231_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI231_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -94254,9 +95596,9 @@ _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB231_19 # %bb.21: # in Loop: Header=BB231_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -94296,12 +95638,14 @@ _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB231_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI231_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI231_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -94341,22 +95685,8 @@ _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc, .Lfunc_end231-_Z26test_do_loop_unroll_factorILi19EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc -.LCPI232_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI232_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI232_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI232_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI232_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI232_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc @@ -94418,19 +95748,31 @@ _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 72 addi.d $s7, $s1, 144 addi.d $s8, $s0, -18 - pcalau12i $a0, %pc_hi20(.LCPI232_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI232_0) - pcalau12i $a0, %pc_hi20(.LCPI232_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI232_1) - pcalau12i $a0, %pc_hi20(.LCPI232_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI232_2) - pcalau12i $a0, %pc_hi20(.LCPI232_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI232_4) - pcalau12i $a0, %pc_hi20(.LCPI232_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI232_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -94449,155 +95791,155 @@ _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB232_5: # Parent Loop BB232_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 18 addi.w $a0, $a0, 18 @@ -94615,12 +95957,12 @@ _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB232_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -94628,20 +95970,20 @@ _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB232_9: # %.loopexit.us # in Loop: Header=BB232_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB232_3 # %bb.10: # in Loop: Header=BB232_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -94653,19 +95995,31 @@ _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB232_11: # %.lr.ph.split blez $s0, .LBB232_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI232_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI232_0) - pcalau12i $a0, %pc_hi20(.LCPI232_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI232_1) - pcalau12i $a0, %pc_hi20(.LCPI232_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI232_2) - pcalau12i $a0, %pc_hi20(.LCPI232_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI232_4) - pcalau12i $a0, %pc_hi20(.LCPI232_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI232_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -94680,17 +96034,17 @@ _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB232_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB232_15: # Parent Loop BB232_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -94698,20 +96052,20 @@ _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us37 # in Loop: Header=BB232_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB232_13 # %bb.17: # in Loop: Header=BB232_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -94724,16 +96078,25 @@ _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB232_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI232_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI232_0) - pcalau12i $a0, %pc_hi20(.LCPI232_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI232_1) - pcalau12i $a0, %pc_hi20(.LCPI232_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI232_2) - pcalau12i $a0, %pc_hi20(.LCPI232_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI232_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -94752,9 +96115,9 @@ _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB232_19 # %bb.21: # in Loop: Header=BB232_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -94794,12 +96157,14 @@ _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB232_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI232_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI232_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -94839,22 +96204,8 @@ _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc, .Lfunc_end232-_Z26test_do_loop_unroll_factorILi18EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc -.LCPI233_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI233_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI233_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI233_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI233_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI233_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc @@ -94916,19 +96267,31 @@ _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 64 addi.d $s7, $s1, 136 addi.d $s8, $s0, -17 - pcalau12i $a0, %pc_hi20(.LCPI233_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI233_0) - pcalau12i $a0, %pc_hi20(.LCPI233_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI233_1) - pcalau12i $a0, %pc_hi20(.LCPI233_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI233_2) - pcalau12i $a0, %pc_hi20(.LCPI233_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI233_4) - pcalau12i $a0, %pc_hi20(.LCPI233_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI233_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -94947,147 +96310,147 @@ _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB233_5: # Parent Loop BB233_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 17 addi.w $a0, $a0, 17 @@ -95105,12 +96468,12 @@ _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB233_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -95118,20 +96481,20 @@ _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB233_9: # %.loopexit.us # in Loop: Header=BB233_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB233_3 # %bb.10: # in Loop: Header=BB233_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -95143,19 +96506,31 @@ _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB233_11: # %.lr.ph.split blez $s0, .LBB233_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI233_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI233_0) - pcalau12i $a0, %pc_hi20(.LCPI233_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI233_1) - pcalau12i $a0, %pc_hi20(.LCPI233_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI233_2) - pcalau12i $a0, %pc_hi20(.LCPI233_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI233_4) - pcalau12i $a0, %pc_hi20(.LCPI233_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI233_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -95170,17 +96545,17 @@ _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB233_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB233_15: # Parent Loop BB233_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -95188,20 +96563,20 @@ _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us37 # in Loop: Header=BB233_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB233_13 # %bb.17: # in Loop: Header=BB233_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -95214,16 +96589,25 @@ _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB233_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI233_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI233_0) - pcalau12i $a0, %pc_hi20(.LCPI233_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI233_1) - pcalau12i $a0, %pc_hi20(.LCPI233_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI233_2) - pcalau12i $a0, %pc_hi20(.LCPI233_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI233_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -95242,9 +96626,9 @@ _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB233_19 # %bb.21: # in Loop: Header=BB233_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -95284,12 +96668,14 @@ _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB233_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI233_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI233_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -95329,22 +96715,8 @@ _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc, .Lfunc_end233-_Z26test_do_loop_unroll_factorILi17EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc -.LCPI234_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI234_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI234_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI234_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI234_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI234_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc @@ -95406,19 +96778,31 @@ _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 64 addi.d $s7, $s1, 128 addi.d $s8, $s0, -16 - pcalau12i $a0, %pc_hi20(.LCPI234_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI234_0) - pcalau12i $a0, %pc_hi20(.LCPI234_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI234_1) - pcalau12i $a0, %pc_hi20(.LCPI234_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI234_2) - pcalau12i $a0, %pc_hi20(.LCPI234_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI234_4) - pcalau12i $a0, %pc_hi20(.LCPI234_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI234_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -95437,139 +96821,139 @@ _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB234_5: # Parent Loop BB234_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 16 addi.w $a0, $a0, 16 @@ -95587,12 +96971,12 @@ _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB234_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -95600,20 +96984,20 @@ _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB234_9: # %.loopexit.us # in Loop: Header=BB234_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB234_3 # %bb.10: # in Loop: Header=BB234_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -95625,19 +97009,31 @@ _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB234_11: # %.lr.ph.split blez $s0, .LBB234_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI234_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI234_0) - pcalau12i $a0, %pc_hi20(.LCPI234_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI234_1) - pcalau12i $a0, %pc_hi20(.LCPI234_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI234_2) - pcalau12i $a0, %pc_hi20(.LCPI234_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI234_4) - pcalau12i $a0, %pc_hi20(.LCPI234_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI234_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -95652,17 +97048,17 @@ _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB234_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB234_15: # Parent Loop BB234_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -95670,20 +97066,20 @@ _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us37 # in Loop: Header=BB234_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB234_13 # %bb.17: # in Loop: Header=BB234_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -95696,16 +97092,25 @@ _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB234_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI234_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI234_0) - pcalau12i $a0, %pc_hi20(.LCPI234_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI234_1) - pcalau12i $a0, %pc_hi20(.LCPI234_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI234_2) - pcalau12i $a0, %pc_hi20(.LCPI234_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI234_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -95724,9 +97129,9 @@ _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB234_19 # %bb.21: # in Loop: Header=BB234_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -95766,12 +97171,14 @@ _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB234_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI234_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI234_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -95811,22 +97218,8 @@ _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc, .Lfunc_end234-_Z26test_do_loop_unroll_factorILi16EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc -.LCPI235_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI235_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI235_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI235_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI235_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI235_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc @@ -95888,19 +97281,31 @@ _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 56 addi.d $s7, $s1, 120 addi.d $s8, $s0, -15 - pcalau12i $a0, %pc_hi20(.LCPI235_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI235_0) - pcalau12i $a0, %pc_hi20(.LCPI235_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI235_1) - pcalau12i $a0, %pc_hi20(.LCPI235_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI235_2) - pcalau12i $a0, %pc_hi20(.LCPI235_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI235_4) - pcalau12i $a0, %pc_hi20(.LCPI235_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI235_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -95919,131 +97324,131 @@ _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB235_5: # Parent Loop BB235_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 15 addi.w $a0, $a0, 15 @@ -96061,12 +97466,12 @@ _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB235_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -96074,20 +97479,20 @@ _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB235_9: # %.loopexit.us # in Loop: Header=BB235_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB235_3 # %bb.10: # in Loop: Header=BB235_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -96099,19 +97504,31 @@ _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB235_11: # %.lr.ph.split blez $s0, .LBB235_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI235_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI235_0) - pcalau12i $a0, %pc_hi20(.LCPI235_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI235_1) - pcalau12i $a0, %pc_hi20(.LCPI235_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI235_2) - pcalau12i $a0, %pc_hi20(.LCPI235_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI235_4) - pcalau12i $a0, %pc_hi20(.LCPI235_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI235_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -96126,17 +97543,17 @@ _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB235_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB235_15: # Parent Loop BB235_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -96144,20 +97561,20 @@ _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us37 # in Loop: Header=BB235_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB235_13 # %bb.17: # in Loop: Header=BB235_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -96170,16 +97587,25 @@ _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB235_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI235_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI235_0) - pcalau12i $a0, %pc_hi20(.LCPI235_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI235_1) - pcalau12i $a0, %pc_hi20(.LCPI235_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI235_2) - pcalau12i $a0, %pc_hi20(.LCPI235_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI235_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -96198,9 +97624,9 @@ _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB235_19 # %bb.21: # in Loop: Header=BB235_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -96240,12 +97666,14 @@ _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB235_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI235_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI235_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -96285,22 +97713,8 @@ _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc, .Lfunc_end235-_Z26test_do_loop_unroll_factorILi15EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc -.LCPI236_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI236_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI236_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI236_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI236_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI236_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc @@ -96362,19 +97776,31 @@ _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 56 addi.d $s7, $s1, 112 addi.d $s8, $s0, -14 - pcalau12i $a0, %pc_hi20(.LCPI236_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI236_0) - pcalau12i $a0, %pc_hi20(.LCPI236_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI236_1) - pcalau12i $a0, %pc_hi20(.LCPI236_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI236_2) - pcalau12i $a0, %pc_hi20(.LCPI236_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI236_4) - pcalau12i $a0, %pc_hi20(.LCPI236_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI236_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -96393,123 +97819,123 @@ _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB236_5: # Parent Loop BB236_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 14 addi.w $a0, $a0, 14 @@ -96527,12 +97953,12 @@ _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB236_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -96540,20 +97966,20 @@ _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB236_9: # %.loopexit.us # in Loop: Header=BB236_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB236_3 # %bb.10: # in Loop: Header=BB236_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -96565,19 +97991,31 @@ _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB236_11: # %.lr.ph.split blez $s0, .LBB236_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI236_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI236_0) - pcalau12i $a0, %pc_hi20(.LCPI236_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI236_1) - pcalau12i $a0, %pc_hi20(.LCPI236_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI236_2) - pcalau12i $a0, %pc_hi20(.LCPI236_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI236_4) - pcalau12i $a0, %pc_hi20(.LCPI236_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI236_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -96592,17 +98030,17 @@ _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB236_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB236_15: # Parent Loop BB236_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -96610,20 +98048,20 @@ _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us37 # in Loop: Header=BB236_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB236_13 # %bb.17: # in Loop: Header=BB236_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -96636,16 +98074,25 @@ _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB236_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI236_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI236_0) - pcalau12i $a0, %pc_hi20(.LCPI236_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI236_1) - pcalau12i $a0, %pc_hi20(.LCPI236_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI236_2) - pcalau12i $a0, %pc_hi20(.LCPI236_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI236_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -96664,9 +98111,9 @@ _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB236_19 # %bb.21: # in Loop: Header=BB236_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -96706,12 +98153,14 @@ _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB236_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI236_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI236_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -96751,22 +98200,8 @@ _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc, .Lfunc_end236-_Z26test_do_loop_unroll_factorILi14EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc -.LCPI237_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI237_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI237_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI237_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI237_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI237_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc @@ -96828,19 +98263,31 @@ _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 48 addi.d $s7, $s1, 104 addi.d $s8, $s0, -13 - pcalau12i $a0, %pc_hi20(.LCPI237_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI237_0) - pcalau12i $a0, %pc_hi20(.LCPI237_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI237_1) - pcalau12i $a0, %pc_hi20(.LCPI237_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI237_2) - pcalau12i $a0, %pc_hi20(.LCPI237_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI237_4) - pcalau12i $a0, %pc_hi20(.LCPI237_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI237_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -96859,115 +98306,115 @@ _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB237_5: # Parent Loop BB237_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 13 addi.w $a0, $a0, 13 @@ -96985,12 +98432,12 @@ _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB237_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -96998,20 +98445,20 @@ _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB237_9: # %.loopexit.us # in Loop: Header=BB237_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB237_3 # %bb.10: # in Loop: Header=BB237_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -97023,19 +98470,31 @@ _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB237_11: # %.lr.ph.split blez $s0, .LBB237_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI237_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI237_0) - pcalau12i $a0, %pc_hi20(.LCPI237_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI237_1) - pcalau12i $a0, %pc_hi20(.LCPI237_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI237_2) - pcalau12i $a0, %pc_hi20(.LCPI237_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI237_4) - pcalau12i $a0, %pc_hi20(.LCPI237_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI237_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -97050,17 +98509,17 @@ _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB237_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB237_15: # Parent Loop BB237_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -97068,20 +98527,20 @@ _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us37 # in Loop: Header=BB237_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB237_13 # %bb.17: # in Loop: Header=BB237_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -97094,16 +98553,25 @@ _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB237_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI237_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI237_0) - pcalau12i $a0, %pc_hi20(.LCPI237_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI237_1) - pcalau12i $a0, %pc_hi20(.LCPI237_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI237_2) - pcalau12i $a0, %pc_hi20(.LCPI237_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI237_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -97122,9 +98590,9 @@ _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB237_19 # %bb.21: # in Loop: Header=BB237_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -97164,12 +98632,14 @@ _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB237_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI237_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI237_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -97209,22 +98679,8 @@ _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc, .Lfunc_end237-_Z26test_do_loop_unroll_factorILi13EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc -.LCPI238_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI238_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI238_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI238_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI238_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI238_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc @@ -97286,19 +98742,31 @@ _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 48 addi.d $s7, $s1, 96 addi.d $s8, $s0, -12 - pcalau12i $a0, %pc_hi20(.LCPI238_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI238_0) - pcalau12i $a0, %pc_hi20(.LCPI238_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI238_1) - pcalau12i $a0, %pc_hi20(.LCPI238_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI238_2) - pcalau12i $a0, %pc_hi20(.LCPI238_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI238_4) - pcalau12i $a0, %pc_hi20(.LCPI238_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI238_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -97317,107 +98785,107 @@ _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB238_5: # Parent Loop BB238_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 12 addi.w $a0, $a0, 12 @@ -97435,12 +98903,12 @@ _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB238_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -97448,20 +98916,20 @@ _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB238_9: # %.loopexit.us # in Loop: Header=BB238_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB238_3 # %bb.10: # in Loop: Header=BB238_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -97473,19 +98941,31 @@ _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB238_11: # %.lr.ph.split blez $s0, .LBB238_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI238_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI238_0) - pcalau12i $a0, %pc_hi20(.LCPI238_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI238_1) - pcalau12i $a0, %pc_hi20(.LCPI238_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI238_2) - pcalau12i $a0, %pc_hi20(.LCPI238_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI238_4) - pcalau12i $a0, %pc_hi20(.LCPI238_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI238_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -97500,17 +98980,17 @@ _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB238_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB238_15: # Parent Loop BB238_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -97518,20 +98998,20 @@ _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us36 # in Loop: Header=BB238_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB238_13 # %bb.17: # in Loop: Header=BB238_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -97544,16 +99024,25 @@ _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB238_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI238_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI238_0) - pcalau12i $a0, %pc_hi20(.LCPI238_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI238_1) - pcalau12i $a0, %pc_hi20(.LCPI238_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI238_2) - pcalau12i $a0, %pc_hi20(.LCPI238_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI238_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -97572,9 +99061,9 @@ _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB238_19 # %bb.21: # in Loop: Header=BB238_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -97614,12 +99103,14 @@ _Z26test_do_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB238_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI238_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI238_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -97740,22 +99231,8 @@ _ZN13do_loop_testsILi10EdE7do_testEPKdPKc: # @_ZN13do_loop_testsILi10EdE7do_test .size _ZN13do_loop_testsILi10EdE7do_testEPKdPKc, .Lfunc_end239-_ZN13do_loop_testsILi10EdE7do_testEPKdPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc -.LCPI240_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI240_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI240_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI240_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI240_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI240_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc @@ -97817,19 +99294,31 @@ _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 40 addi.d $s7, $s1, 88 addi.d $s8, $s0, -11 - pcalau12i $a0, %pc_hi20(.LCPI240_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI240_0) - pcalau12i $a0, %pc_hi20(.LCPI240_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI240_1) - pcalau12i $a0, %pc_hi20(.LCPI240_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI240_2) - pcalau12i $a0, %pc_hi20(.LCPI240_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI240_4) - pcalau12i $a0, %pc_hi20(.LCPI240_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI240_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -97848,99 +99337,99 @@ _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB240_5: # Parent Loop BB240_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 11 addi.w $a0, $a0, 11 @@ -97958,12 +99447,12 @@ _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB240_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -97971,20 +99460,20 @@ _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB240_9: # %.loopexit.us # in Loop: Header=BB240_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB240_3 # %bb.10: # in Loop: Header=BB240_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -97996,19 +99485,31 @@ _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB240_11: # %.lr.ph.split blez $s0, .LBB240_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI240_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI240_0) - pcalau12i $a0, %pc_hi20(.LCPI240_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI240_1) - pcalau12i $a0, %pc_hi20(.LCPI240_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI240_2) - pcalau12i $a0, %pc_hi20(.LCPI240_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI240_4) - pcalau12i $a0, %pc_hi20(.LCPI240_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI240_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -98023,17 +99524,17 @@ _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB240_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB240_15: # Parent Loop BB240_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -98041,20 +99542,20 @@ _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us36 # in Loop: Header=BB240_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB240_13 # %bb.17: # in Loop: Header=BB240_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -98067,16 +99568,25 @@ _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB240_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI240_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI240_0) - pcalau12i $a0, %pc_hi20(.LCPI240_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI240_1) - pcalau12i $a0, %pc_hi20(.LCPI240_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI240_2) - pcalau12i $a0, %pc_hi20(.LCPI240_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI240_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -98095,9 +99605,9 @@ _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB240_19 # %bb.21: # in Loop: Header=BB240_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -98137,12 +99647,14 @@ _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB240_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI240_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI240_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -98182,22 +99694,8 @@ _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc, .Lfunc_end240-_Z26test_do_loop_unroll_factorILi11EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc -.LCPI241_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI241_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI241_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI241_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI241_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI241_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc @@ -98259,19 +99757,31 @@ _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac addi.d $s6, $s1, 40 addi.d $s7, $s1, 80 addi.d $s8, $s0, -10 - pcalau12i $a0, %pc_hi20(.LCPI241_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI241_0) - pcalau12i $a0, %pc_hi20(.LCPI241_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI241_1) - pcalau12i $a0, %pc_hi20(.LCPI241_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI241_2) - pcalau12i $a0, %pc_hi20(.LCPI241_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI241_4) - pcalau12i $a0, %pc_hi20(.LCPI241_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI241_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -98289,91 +99799,91 @@ _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a3, $zero move $a4, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB241_5: # Parent Loop BB241_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a3 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a3, $a3, 10 addi.w $a0, $a0, 10 @@ -98391,12 +99901,12 @@ _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Parent Loop BB241_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a5, $a5, 8 @@ -98404,20 +99914,20 @@ _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB241_9: # %.loopexit.us # in Loop: Header=BB241_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB241_3 # %bb.10: # in Loop: Header=BB241_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -98430,19 +99940,31 @@ _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB241_11: # %.lr.ph.split blez $s0, .LBB241_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI241_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI241_0) - pcalau12i $a0, %pc_hi20(.LCPI241_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI241_1) - pcalau12i $a0, %pc_hi20(.LCPI241_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI241_2) - pcalau12i $a0, %pc_hi20(.LCPI241_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI241_4) - pcalau12i $a0, %pc_hi20(.LCPI241_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI241_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -98457,17 +99979,17 @@ _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # Child Loop BB241_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB241_15: # Parent Loop BB241_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -98475,20 +99997,20 @@ _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac # %bb.16: # %.loopexit.us36 # in Loop: Header=BB241_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB241_13 # %bb.17: # in Loop: Header=BB241_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -98501,16 +100023,25 @@ _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .LBB241_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI241_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI241_0) - pcalau12i $a0, %pc_hi20(.LCPI241_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI241_1) - pcalau12i $a0, %pc_hi20(.LCPI241_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI241_2) - pcalau12i $a0, %pc_hi20(.LCPI241_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI241_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -98529,9 +100060,9 @@ _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB241_19 # %bb.21: # in Loop: Header=BB241_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -98571,12 +100102,14 @@ _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB241_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI241_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI241_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -98616,22 +100149,8 @@ _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fac .size _Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc, .Lfunc_end241-_Z26test_do_loop_unroll_factorILi10EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc -.LCPI242_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI242_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI242_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI242_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI242_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI242_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc @@ -98693,19 +100212,31 @@ _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact addi.d $s6, $s1, 32 addi.d $s7, $s1, 72 addi.d $s8, $s0, -9 - pcalau12i $a0, %pc_hi20(.LCPI242_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI242_0) - pcalau12i $a0, %pc_hi20(.LCPI242_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI242_1) - pcalau12i $a0, %pc_hi20(.LCPI242_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI242_2) - pcalau12i $a0, %pc_hi20(.LCPI242_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI242_4) - pcalau12i $a0, %pc_hi20(.LCPI242_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI242_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -98723,83 +100254,83 @@ _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a3, $zero move $a4, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB242_5: # Parent Loop BB242_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a3 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a3, $a3, 9 addi.w $a0, $a0, 9 @@ -98817,12 +100348,12 @@ _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Parent Loop BB242_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a5, $a5, 8 @@ -98830,20 +100361,20 @@ _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB242_9: # %.loopexit.us # in Loop: Header=BB242_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB242_3 # %bb.10: # in Loop: Header=BB242_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -98856,19 +100387,31 @@ _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB242_11: # %.lr.ph.split blez $s0, .LBB242_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI242_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI242_0) - pcalau12i $a0, %pc_hi20(.LCPI242_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI242_1) - pcalau12i $a0, %pc_hi20(.LCPI242_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI242_2) - pcalau12i $a0, %pc_hi20(.LCPI242_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI242_4) - pcalau12i $a0, %pc_hi20(.LCPI242_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI242_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -98883,17 +100426,17 @@ _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Child Loop BB242_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB242_15: # Parent Loop BB242_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -98901,20 +100444,20 @@ _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # %bb.16: # %.loopexit.us36 # in Loop: Header=BB242_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB242_13 # %bb.17: # in Loop: Header=BB242_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -98927,16 +100470,25 @@ _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB242_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI242_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI242_0) - pcalau12i $a0, %pc_hi20(.LCPI242_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI242_1) - pcalau12i $a0, %pc_hi20(.LCPI242_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI242_2) - pcalau12i $a0, %pc_hi20(.LCPI242_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI242_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -98955,9 +100507,9 @@ _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB242_19 # %bb.21: # in Loop: Header=BB242_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -98997,12 +100549,14 @@ _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB242_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI242_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI242_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -99042,22 +100596,8 @@ _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc, .Lfunc_end242-_Z26test_do_loop_unroll_factorILi9EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc -.LCPI243_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI243_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI243_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI243_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI243_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI243_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc @@ -99119,19 +100659,31 @@ _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact addi.d $s6, $s1, 32 addi.d $s7, $s1, 64 addi.d $s8, $s0, -8 - pcalau12i $a0, %pc_hi20(.LCPI243_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI243_0) - pcalau12i $a0, %pc_hi20(.LCPI243_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI243_1) - pcalau12i $a0, %pc_hi20(.LCPI243_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI243_2) - pcalau12i $a0, %pc_hi20(.LCPI243_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI243_4) - pcalau12i $a0, %pc_hi20(.LCPI243_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI243_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -99149,75 +100701,75 @@ _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a3, $zero move $a4, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB243_5: # Parent Loop BB243_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a3 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a3, $a3, 8 addi.w $a0, $a0, 8 @@ -99235,12 +100787,12 @@ _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Parent Loop BB243_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a5, $a5, 8 @@ -99248,20 +100800,20 @@ _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB243_9: # %.loopexit.us # in Loop: Header=BB243_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB243_3 # %bb.10: # in Loop: Header=BB243_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -99274,19 +100826,31 @@ _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB243_11: # %.lr.ph.split blez $s0, .LBB243_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI243_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI243_0) - pcalau12i $a0, %pc_hi20(.LCPI243_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI243_1) - pcalau12i $a0, %pc_hi20(.LCPI243_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI243_2) - pcalau12i $a0, %pc_hi20(.LCPI243_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI243_4) - pcalau12i $a0, %pc_hi20(.LCPI243_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI243_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -99301,17 +100865,17 @@ _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Child Loop BB243_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB243_15: # Parent Loop BB243_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -99319,20 +100883,20 @@ _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # %bb.16: # %.loopexit.us36 # in Loop: Header=BB243_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB243_13 # %bb.17: # in Loop: Header=BB243_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -99345,16 +100909,25 @@ _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB243_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI243_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI243_0) - pcalau12i $a0, %pc_hi20(.LCPI243_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI243_1) - pcalau12i $a0, %pc_hi20(.LCPI243_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI243_2) - pcalau12i $a0, %pc_hi20(.LCPI243_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI243_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -99373,9 +100946,9 @@ _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB243_19 # %bb.21: # in Loop: Header=BB243_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -99415,12 +100988,14 @@ _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB243_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI243_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI243_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -99460,22 +101035,8 @@ _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc, .Lfunc_end243-_Z26test_do_loop_unroll_factorILi8EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc -.LCPI244_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI244_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI244_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI244_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI244_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI244_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc @@ -99537,19 +101098,31 @@ _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact addi.d $s6, $s1, 24 addi.d $s7, $s1, 56 addi.d $s8, $s0, -7 - pcalau12i $a0, %pc_hi20(.LCPI244_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI244_0) - pcalau12i $a0, %pc_hi20(.LCPI244_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI244_1) - pcalau12i $a0, %pc_hi20(.LCPI244_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI244_2) - pcalau12i $a0, %pc_hi20(.LCPI244_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI244_4) - pcalau12i $a0, %pc_hi20(.LCPI244_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI244_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -99567,67 +101140,67 @@ _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a3, $zero move $a4, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB244_5: # Parent Loop BB244_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a3 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a3, $a3, 7 addi.w $a0, $a0, 7 @@ -99645,12 +101218,12 @@ _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Parent Loop BB244_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a5, $a5, 8 @@ -99658,20 +101231,20 @@ _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB244_9: # %.loopexit.us # in Loop: Header=BB244_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB244_3 # %bb.10: # in Loop: Header=BB244_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -99684,19 +101257,31 @@ _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB244_11: # %.lr.ph.split blez $s0, .LBB244_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI244_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI244_0) - pcalau12i $a0, %pc_hi20(.LCPI244_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI244_1) - pcalau12i $a0, %pc_hi20(.LCPI244_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI244_2) - pcalau12i $a0, %pc_hi20(.LCPI244_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI244_4) - pcalau12i $a0, %pc_hi20(.LCPI244_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI244_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -99711,17 +101296,17 @@ _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Child Loop BB244_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB244_15: # Parent Loop BB244_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -99729,20 +101314,20 @@ _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # %bb.16: # %.loopexit.us36 # in Loop: Header=BB244_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB244_13 # %bb.17: # in Loop: Header=BB244_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -99755,16 +101340,25 @@ _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB244_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI244_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI244_0) - pcalau12i $a0, %pc_hi20(.LCPI244_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI244_1) - pcalau12i $a0, %pc_hi20(.LCPI244_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI244_2) - pcalau12i $a0, %pc_hi20(.LCPI244_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI244_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -99783,9 +101377,9 @@ _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB244_19 # %bb.21: # in Loop: Header=BB244_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -99825,12 +101419,14 @@ _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB244_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI244_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI244_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -99870,22 +101466,8 @@ _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc, .Lfunc_end244-_Z26test_do_loop_unroll_factorILi7EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc -.LCPI245_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI245_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI245_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI245_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI245_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI245_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc @@ -99947,19 +101529,31 @@ _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact addi.d $s6, $s1, 24 addi.d $s7, $s1, 48 addi.d $s8, $s0, -6 - pcalau12i $a0, %pc_hi20(.LCPI245_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI245_0) - pcalau12i $a0, %pc_hi20(.LCPI245_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI245_1) - pcalau12i $a0, %pc_hi20(.LCPI245_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI245_2) - pcalau12i $a0, %pc_hi20(.LCPI245_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI245_4) - pcalau12i $a0, %pc_hi20(.LCPI245_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI245_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -99977,59 +101571,59 @@ _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a3, $zero move $a4, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB245_5: # Parent Loop BB245_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a3 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a3, $a3, 6 addi.d $a2, $a2, 48 @@ -100047,12 +101641,12 @@ _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Parent Loop BB245_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a5, $a5, 8 @@ -100060,20 +101654,20 @@ _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB245_9: # %.loopexit.us # in Loop: Header=BB245_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB245_3 # %bb.10: # in Loop: Header=BB245_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -100086,19 +101680,31 @@ _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB245_11: # %.lr.ph.split blez $s0, .LBB245_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI245_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI245_0) - pcalau12i $a0, %pc_hi20(.LCPI245_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI245_1) - pcalau12i $a0, %pc_hi20(.LCPI245_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI245_2) - pcalau12i $a0, %pc_hi20(.LCPI245_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI245_4) - pcalau12i $a0, %pc_hi20(.LCPI245_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI245_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -100113,17 +101719,17 @@ _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Child Loop BB245_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB245_15: # Parent Loop BB245_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -100131,20 +101737,20 @@ _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # %bb.16: # %.loopexit.us36 # in Loop: Header=BB245_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB245_13 # %bb.17: # in Loop: Header=BB245_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -100157,16 +101763,25 @@ _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB245_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI245_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI245_0) - pcalau12i $a0, %pc_hi20(.LCPI245_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI245_1) - pcalau12i $a0, %pc_hi20(.LCPI245_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI245_2) - pcalau12i $a0, %pc_hi20(.LCPI245_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI245_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -100185,9 +101800,9 @@ _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB245_19 # %bb.21: # in Loop: Header=BB245_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -100227,12 +101842,14 @@ _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB245_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI245_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI245_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -100272,22 +101889,8 @@ _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc, .Lfunc_end245-_Z26test_do_loop_unroll_factorILi6EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc -.LCPI246_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI246_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI246_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI246_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI246_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI246_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc @@ -100349,19 +101952,31 @@ _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact addi.d $s6, $s1, 16 addi.d $s7, $s1, 40 addi.d $s8, $s0, -5 - pcalau12i $a0, %pc_hi20(.LCPI246_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI246_0) - pcalau12i $a0, %pc_hi20(.LCPI246_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI246_1) - pcalau12i $a0, %pc_hi20(.LCPI246_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI246_2) - pcalau12i $a0, %pc_hi20(.LCPI246_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI246_4) - pcalau12i $a0, %pc_hi20(.LCPI246_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI246_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -100379,51 +101994,51 @@ _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a3, $zero move $a4, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB246_5: # Parent Loop BB246_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a3 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a3, $a3, 5 addi.d $a2, $a2, 40 @@ -100441,12 +102056,12 @@ _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Parent Loop BB246_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a5, $a5, 8 @@ -100454,20 +102069,20 @@ _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB246_9: # %.loopexit.us # in Loop: Header=BB246_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB246_3 # %bb.10: # in Loop: Header=BB246_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -100480,19 +102095,31 @@ _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB246_11: # %.lr.ph.split blez $s0, .LBB246_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI246_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI246_0) - pcalau12i $a0, %pc_hi20(.LCPI246_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI246_1) - pcalau12i $a0, %pc_hi20(.LCPI246_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI246_2) - pcalau12i $a0, %pc_hi20(.LCPI246_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI246_4) - pcalau12i $a0, %pc_hi20(.LCPI246_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI246_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -100507,17 +102134,17 @@ _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Child Loop BB246_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB246_15: # Parent Loop BB246_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -100525,20 +102152,20 @@ _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # %bb.16: # %.loopexit.us36 # in Loop: Header=BB246_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB246_13 # %bb.17: # in Loop: Header=BB246_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -100551,16 +102178,25 @@ _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB246_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI246_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI246_0) - pcalau12i $a0, %pc_hi20(.LCPI246_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI246_1) - pcalau12i $a0, %pc_hi20(.LCPI246_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI246_2) - pcalau12i $a0, %pc_hi20(.LCPI246_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI246_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -100579,9 +102215,9 @@ _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB246_19 # %bb.21: # in Loop: Header=BB246_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -100621,12 +102257,14 @@ _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB246_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI246_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI246_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -100666,22 +102304,8 @@ _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc, .Lfunc_end246-_Z26test_do_loop_unroll_factorILi5EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc -.LCPI247_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI247_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI247_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI247_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI247_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI247_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc @@ -100743,19 +102367,31 @@ _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact addi.d $s6, $s1, 16 addi.d $s7, $s1, 32 addi.d $s8, $s0, -4 - pcalau12i $a0, %pc_hi20(.LCPI247_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI247_0) - pcalau12i $a0, %pc_hi20(.LCPI247_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI247_1) - pcalau12i $a0, %pc_hi20(.LCPI247_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI247_2) - pcalau12i $a0, %pc_hi20(.LCPI247_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI247_4) - pcalau12i $a0, %pc_hi20(.LCPI247_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI247_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -100773,43 +102409,43 @@ _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a3, $zero move $a4, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB247_5: # Parent Loop BB247_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a3 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a3, $a3, 4 addi.d $a2, $a2, 32 @@ -100827,12 +102463,12 @@ _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Parent Loop BB247_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a5, $a5, 8 @@ -100840,20 +102476,20 @@ _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB247_9: # %.loopexit.us # in Loop: Header=BB247_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB247_3 # %bb.10: # in Loop: Header=BB247_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -100866,19 +102502,31 @@ _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB247_11: # %.lr.ph.split blez $s0, .LBB247_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI247_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI247_0) - pcalau12i $a0, %pc_hi20(.LCPI247_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI247_1) - pcalau12i $a0, %pc_hi20(.LCPI247_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI247_2) - pcalau12i $a0, %pc_hi20(.LCPI247_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI247_4) - pcalau12i $a0, %pc_hi20(.LCPI247_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI247_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -100893,17 +102541,17 @@ _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Child Loop BB247_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB247_15: # Parent Loop BB247_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -100911,20 +102559,20 @@ _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # %bb.16: # %.loopexit.us36 # in Loop: Header=BB247_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB247_13 # %bb.17: # in Loop: Header=BB247_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -100937,16 +102585,25 @@ _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB247_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI247_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI247_0) - pcalau12i $a0, %pc_hi20(.LCPI247_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI247_1) - pcalau12i $a0, %pc_hi20(.LCPI247_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI247_2) - pcalau12i $a0, %pc_hi20(.LCPI247_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI247_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -100965,9 +102622,9 @@ _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB247_19 # %bb.21: # in Loop: Header=BB247_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -101007,12 +102664,14 @@ _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB247_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI247_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI247_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -101052,22 +102711,8 @@ _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc, .Lfunc_end247-_Z26test_do_loop_unroll_factorILi4EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc -.LCPI248_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI248_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI248_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI248_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI248_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI248_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc @@ -101129,19 +102774,31 @@ _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact addi.d $s6, $s1, 16 addi.d $s7, $s1, 24 addi.d $s8, $s0, -3 - pcalau12i $a0, %pc_hi20(.LCPI248_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI248_0) - pcalau12i $a0, %pc_hi20(.LCPI248_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI248_1) - pcalau12i $a0, %pc_hi20(.LCPI248_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI248_2) - pcalau12i $a0, %pc_hi20(.LCPI248_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI248_4) - pcalau12i $a0, %pc_hi20(.LCPI248_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI248_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -101159,35 +102816,35 @@ _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a4, $zero move $a5, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB248_5: # Parent Loop BB248_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a4 move $a3, $a5 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a4, $a4, 3 addi.d $a2, $a2, 24 @@ -101205,12 +102862,12 @@ _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Parent Loop BB248_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a3, $a3, 8 @@ -101218,20 +102875,20 @@ _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB248_9: # %.loopexit.us # in Loop: Header=BB248_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB248_3 # %bb.10: # in Loop: Header=BB248_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -101244,19 +102901,31 @@ _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB248_11: # %.lr.ph.split blez $s0, .LBB248_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI248_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI248_0) - pcalau12i $a0, %pc_hi20(.LCPI248_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI248_1) - pcalau12i $a0, %pc_hi20(.LCPI248_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI248_2) - pcalau12i $a0, %pc_hi20(.LCPI248_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI248_4) - pcalau12i $a0, %pc_hi20(.LCPI248_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI248_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -101271,17 +102940,17 @@ _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Child Loop BB248_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB248_15: # Parent Loop BB248_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -101289,20 +102958,20 @@ _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # %bb.16: # %.loopexit.us36 # in Loop: Header=BB248_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB248_13 # %bb.17: # in Loop: Header=BB248_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -101315,16 +102984,25 @@ _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB248_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI248_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI248_0) - pcalau12i $a0, %pc_hi20(.LCPI248_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI248_1) - pcalau12i $a0, %pc_hi20(.LCPI248_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI248_2) - pcalau12i $a0, %pc_hi20(.LCPI248_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI248_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -101343,9 +103021,9 @@ _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB248_19 # %bb.21: # in Loop: Header=BB248_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -101385,12 +103063,14 @@ _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB248_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI248_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI248_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -101430,22 +103110,8 @@ _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc, .Lfunc_end248-_Z26test_do_loop_unroll_factorILi3EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc -.LCPI249_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI249_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI249_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI249_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI249_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI249_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc @@ -101507,19 +103173,31 @@ _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact addi.d $s6, $s1, 8 addi.d $s7, $s1, 16 addi.d $s8, $s0, -2 - pcalau12i $a0, %pc_hi20(.LCPI249_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI249_0) - pcalau12i $a0, %pc_hi20(.LCPI249_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI249_1) - pcalau12i $a0, %pc_hi20(.LCPI249_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI249_2) - pcalau12i $a0, %pc_hi20(.LCPI249_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI249_4) - pcalau12i $a0, %pc_hi20(.LCPI249_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI249_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s3, $zero @@ -101537,27 +103215,27 @@ _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a4, $zero move $a5, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB249_5: # Parent Loop BB249_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a4 move $a0, $a5 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a4, $a4, 2 addi.d $a3, $a3, 16 @@ -101575,12 +103253,12 @@ _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Parent Loop BB249_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a0, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, -1 addi.d $a0, $a0, 8 @@ -101588,20 +103266,20 @@ _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB249_9: # %.loopexit.us # in Loop: Header=BB249_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB249_3 # %bb.10: # in Loop: Header=BB249_4 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -101626,16 +103304,24 @@ _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact lu32i.d $a2, -393216 lu52i.d $a2, $a2, -1022 vreplgr2vr.d $vr5, $a2 - lu32i.d $a0, -268678 - pcalau12i $a2, %pc_hi20(.LCPI249_2) - fld.d $fs0, $a2, %pc_lo12(.LCPI249_2) - pcalau12i $a2, %pc_hi20(.LCPI249_4) - fld.d $fs1, $a2, %pc_lo12(.LCPI249_4) - pcalau12i $a2, %pc_hi20(.LCPI249_3) - fld.d $fs2, $a2, %pc_lo12(.LCPI249_3) - lu52i.d $a0, $a0, 1042 - vreplgr2vr.d $vr6, $a0 - movgr2fr.d $fs3, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + vreplgr2vr.d $vr6, $a2 + movgr2fr.d $fs0, $zero + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -101659,16 +103345,16 @@ _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact vfadd.d $vr1, $vr1, $vr4 vfmadd.d $vr1, $vr1, $vr6, $vr5 vreplvei.d $vr2, $vr1, 0 - fadd.d $fa2, $fa2, $fs3 + fadd.d $fa2, $fa2, $fs0 vreplvei.d $vr1, $vr1, 1 - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fadd.d $fa1, $fa2, $fa1 fabs.d $fa3, $fa2 fdiv.d $fa2, $fa1, $fa2 - fcmp.clt.d $fcc0, $fs1, $fa3 + fcmp.clt.d $fcc0, $fs2, $fa3 fsel $fa1, $fa1, $fa2, $fcc0 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB249_13 # %bb.15: # in Loop: Header=BB249_14 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -101682,16 +103368,25 @@ _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact ld.w $a1, $s4, %pc_lo12(iterations) b .LBB249_13 .LBB249_16: # %.lr.ph.split.split.preheader - pcalau12i $a0, %pc_hi20(.LCPI249_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI249_0) - pcalau12i $a0, %pc_hi20(.LCPI249_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI249_1) - pcalau12i $a0, %pc_hi20(.LCPI249_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI249_2) - pcalau12i $a0, %pc_hi20(.LCPI249_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI249_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -101710,9 +103405,9 @@ _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB249_17 # %bb.19: # in Loop: Header=BB249_18 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -101752,12 +103447,14 @@ _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB249_24: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI249_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI249_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -101797,22 +103494,8 @@ _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc, .Lfunc_end249-_Z26test_do_loop_unroll_factorILi2EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc -.LCPI250_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI250_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI250_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI250_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI250_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI250_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc,"axG",@progbits,_Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc,comdat - .weak _Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc + .weak _Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc # -- Begin function _Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc .p2align 5 .type _Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc,@function _Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc @@ -101866,19 +103549,31 @@ _Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # %bb.1: # %.lr.ph blez $s0, .LBB250_8 # %bb.2: # %.preheader23.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI250_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI250_0) - pcalau12i $a0, %pc_hi20(.LCPI250_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI250_1) - pcalau12i $a0, %pc_hi20(.LCPI250_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI250_2) - pcalau12i $a0, %pc_hi20(.LCPI250_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI250_4) - pcalau12i $a0, %pc_hi20(.LCPI250_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI250_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s6, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s7, $zero @@ -101893,17 +103588,17 @@ _Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # Child Loop BB250_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB250_5: # Parent Loop BB250_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -101911,20 +103606,20 @@ _Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact # %bb.6: # %.loopexit.us # in Loop: Header=BB250_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB250_3 # %bb.7: # in Loop: Header=BB250_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -101937,16 +103632,25 @@ _Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .LBB250_8: # %.lr.ph.split.split.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI250_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI250_0) - pcalau12i $a0, %pc_hi20(.LCPI250_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI250_1) - pcalau12i $a0, %pc_hi20(.LCPI250_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI250_2) - pcalau12i $a0, %pc_hi20(.LCPI250_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI250_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s2, $zero @@ -101965,9 +103669,9 @@ _Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB250_9 # %bb.11: # in Loop: Header=BB250_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -102006,12 +103710,14 @@ _Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB250_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI250_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI250_5) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -102049,22 +103755,8 @@ _Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z26test_do_loop_unroll_fact .size _Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc, .Lfunc_end250-_Z26test_do_loop_unroll_factorILi1EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc -.LCPI251_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI251_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI251_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI251_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI251_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI251_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc @@ -102126,19 +103818,31 @@ _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 128 addi.d $s7, $s1, 256 addi.d $s8, $s0, -32 - pcalau12i $a0, %pc_hi20(.LCPI251_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI251_0) - pcalau12i $a0, %pc_hi20(.LCPI251_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI251_1) - pcalau12i $a0, %pc_hi20(.LCPI251_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI251_2) - pcalau12i $a0, %pc_hi20(.LCPI251_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI251_4) - pcalau12i $a0, %pc_hi20(.LCPI251_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI251_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -102157,267 +103861,267 @@ _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB251_5: # Parent Loop BB251_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -128 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 32 addi.w $a0, $a0, 32 @@ -102435,12 +104139,12 @@ _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB251_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -102448,20 +104152,20 @@ _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB251_9: # %.loopexit.us # in Loop: Header=BB251_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB251_3 # %bb.10: # in Loop: Header=BB251_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -102473,19 +104177,31 @@ _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB251_11: # %.lr.ph.split blez $s0, .LBB251_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI251_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI251_0) - pcalau12i $a0, %pc_hi20(.LCPI251_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI251_1) - pcalau12i $a0, %pc_hi20(.LCPI251_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI251_2) - pcalau12i $a0, %pc_hi20(.LCPI251_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI251_4) - pcalau12i $a0, %pc_hi20(.LCPI251_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI251_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -102500,17 +104216,17 @@ _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB251_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB251_15: # Parent Loop BB251_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -102518,20 +104234,20 @@ _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us38 # in Loop: Header=BB251_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB251_13 # %bb.17: # in Loop: Header=BB251_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -102544,16 +104260,25 @@ _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB251_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI251_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI251_0) - pcalau12i $a0, %pc_hi20(.LCPI251_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI251_1) - pcalau12i $a0, %pc_hi20(.LCPI251_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI251_2) - pcalau12i $a0, %pc_hi20(.LCPI251_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI251_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -102572,9 +104297,9 @@ _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB251_19 # %bb.21: # in Loop: Header=BB251_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -102614,12 +104339,14 @@ _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB251_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI251_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI251_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -102659,22 +104386,8 @@ _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc, .Lfunc_end251-_Z28test_goto_loop_unroll_factorILi32EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc -.LCPI252_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI252_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI252_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI252_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI252_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI252_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc @@ -102736,19 +104449,31 @@ _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 120 addi.d $s7, $s1, 248 addi.d $s8, $s0, -31 - pcalau12i $a0, %pc_hi20(.LCPI252_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI252_0) - pcalau12i $a0, %pc_hi20(.LCPI252_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI252_1) - pcalau12i $a0, %pc_hi20(.LCPI252_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI252_2) - pcalau12i $a0, %pc_hi20(.LCPI252_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI252_4) - pcalau12i $a0, %pc_hi20(.LCPI252_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI252_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -102767,259 +104492,259 @@ _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB252_5: # Parent Loop BB252_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 31 addi.w $a0, $a0, 31 @@ -103037,12 +104762,12 @@ _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB252_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -103050,20 +104775,20 @@ _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB252_9: # %.loopexit.us # in Loop: Header=BB252_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB252_3 # %bb.10: # in Loop: Header=BB252_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -103075,19 +104800,31 @@ _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB252_11: # %.lr.ph.split blez $s0, .LBB252_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI252_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI252_0) - pcalau12i $a0, %pc_hi20(.LCPI252_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI252_1) - pcalau12i $a0, %pc_hi20(.LCPI252_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI252_2) - pcalau12i $a0, %pc_hi20(.LCPI252_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI252_4) - pcalau12i $a0, %pc_hi20(.LCPI252_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI252_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -103102,17 +104839,17 @@ _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB252_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB252_15: # Parent Loop BB252_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -103120,20 +104857,20 @@ _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us38 # in Loop: Header=BB252_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB252_13 # %bb.17: # in Loop: Header=BB252_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -103146,16 +104883,25 @@ _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB252_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI252_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI252_0) - pcalau12i $a0, %pc_hi20(.LCPI252_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI252_1) - pcalau12i $a0, %pc_hi20(.LCPI252_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI252_2) - pcalau12i $a0, %pc_hi20(.LCPI252_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI252_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -103174,9 +104920,9 @@ _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB252_19 # %bb.21: # in Loop: Header=BB252_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -103216,12 +104962,14 @@ _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB252_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI252_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI252_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -103261,22 +105009,8 @@ _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc, .Lfunc_end252-_Z28test_goto_loop_unroll_factorILi31EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc -.LCPI253_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI253_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI253_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI253_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI253_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI253_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc @@ -103338,19 +105072,31 @@ _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 120 addi.d $s7, $s1, 240 addi.d $s8, $s0, -30 - pcalau12i $a0, %pc_hi20(.LCPI253_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI253_0) - pcalau12i $a0, %pc_hi20(.LCPI253_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI253_1) - pcalau12i $a0, %pc_hi20(.LCPI253_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI253_2) - pcalau12i $a0, %pc_hi20(.LCPI253_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI253_4) - pcalau12i $a0, %pc_hi20(.LCPI253_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI253_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -103369,251 +105115,251 @@ _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB253_5: # Parent Loop BB253_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -120 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 30 addi.w $a0, $a0, 30 @@ -103631,12 +105377,12 @@ _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB253_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -103644,20 +105390,20 @@ _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB253_9: # %.loopexit.us # in Loop: Header=BB253_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB253_3 # %bb.10: # in Loop: Header=BB253_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -103669,19 +105415,31 @@ _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB253_11: # %.lr.ph.split blez $s0, .LBB253_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI253_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI253_0) - pcalau12i $a0, %pc_hi20(.LCPI253_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI253_1) - pcalau12i $a0, %pc_hi20(.LCPI253_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI253_2) - pcalau12i $a0, %pc_hi20(.LCPI253_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI253_4) - pcalau12i $a0, %pc_hi20(.LCPI253_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI253_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -103696,17 +105454,17 @@ _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB253_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB253_15: # Parent Loop BB253_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -103714,20 +105472,20 @@ _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us38 # in Loop: Header=BB253_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB253_13 # %bb.17: # in Loop: Header=BB253_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -103740,16 +105498,25 @@ _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB253_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI253_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI253_0) - pcalau12i $a0, %pc_hi20(.LCPI253_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI253_1) - pcalau12i $a0, %pc_hi20(.LCPI253_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI253_2) - pcalau12i $a0, %pc_hi20(.LCPI253_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI253_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -103768,9 +105535,9 @@ _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB253_19 # %bb.21: # in Loop: Header=BB253_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -103810,12 +105577,14 @@ _Z28test_goto_loop_unroll_factorILi30EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB253_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI253_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI253_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -103934,22 +105703,8 @@ _ZN15goto_loop_testsILi28EdE7do_testEPKdPKc: # @_ZN15goto_loop_testsILi28EdE7do_ .size _ZN15goto_loop_testsILi28EdE7do_testEPKdPKc, .Lfunc_end254-_ZN15goto_loop_testsILi28EdE7do_testEPKdPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc -.LCPI255_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI255_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI255_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI255_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI255_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI255_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc @@ -104011,19 +105766,31 @@ _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 112 addi.d $s7, $s1, 232 addi.d $s8, $s0, -29 - pcalau12i $a0, %pc_hi20(.LCPI255_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI255_0) - pcalau12i $a0, %pc_hi20(.LCPI255_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI255_1) - pcalau12i $a0, %pc_hi20(.LCPI255_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI255_2) - pcalau12i $a0, %pc_hi20(.LCPI255_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI255_4) - pcalau12i $a0, %pc_hi20(.LCPI255_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI255_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -104042,243 +105809,243 @@ _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB255_5: # Parent Loop BB255_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 29 addi.w $a0, $a0, 29 @@ -104296,12 +106063,12 @@ _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB255_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -104309,20 +106076,20 @@ _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB255_9: # %.loopexit.us # in Loop: Header=BB255_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB255_3 # %bb.10: # in Loop: Header=BB255_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -104334,19 +106101,31 @@ _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB255_11: # %.lr.ph.split blez $s0, .LBB255_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI255_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI255_0) - pcalau12i $a0, %pc_hi20(.LCPI255_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI255_1) - pcalau12i $a0, %pc_hi20(.LCPI255_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI255_2) - pcalau12i $a0, %pc_hi20(.LCPI255_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI255_4) - pcalau12i $a0, %pc_hi20(.LCPI255_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI255_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -104361,17 +106140,17 @@ _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB255_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB255_15: # Parent Loop BB255_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -104379,20 +106158,20 @@ _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us38 # in Loop: Header=BB255_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB255_13 # %bb.17: # in Loop: Header=BB255_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -104405,16 +106184,25 @@ _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB255_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI255_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI255_0) - pcalau12i $a0, %pc_hi20(.LCPI255_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI255_1) - pcalau12i $a0, %pc_hi20(.LCPI255_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI255_2) - pcalau12i $a0, %pc_hi20(.LCPI255_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI255_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -104433,9 +106221,9 @@ _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB255_19 # %bb.21: # in Loop: Header=BB255_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -104475,12 +106263,14 @@ _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB255_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI255_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI255_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -104520,22 +106310,8 @@ _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc, .Lfunc_end255-_Z28test_goto_loop_unroll_factorILi29EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc -.LCPI256_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI256_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI256_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI256_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI256_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI256_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc @@ -104597,19 +106373,31 @@ _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 112 addi.d $s7, $s1, 224 addi.d $s8, $s0, -28 - pcalau12i $a0, %pc_hi20(.LCPI256_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI256_0) - pcalau12i $a0, %pc_hi20(.LCPI256_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI256_1) - pcalau12i $a0, %pc_hi20(.LCPI256_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI256_2) - pcalau12i $a0, %pc_hi20(.LCPI256_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI256_4) - pcalau12i $a0, %pc_hi20(.LCPI256_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI256_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -104628,235 +106416,235 @@ _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB256_5: # Parent Loop BB256_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -112 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 28 addi.w $a0, $a0, 28 @@ -104874,12 +106662,12 @@ _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB256_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -104887,20 +106675,20 @@ _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB256_9: # %.loopexit.us # in Loop: Header=BB256_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB256_3 # %bb.10: # in Loop: Header=BB256_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -104912,19 +106700,31 @@ _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB256_11: # %.lr.ph.split blez $s0, .LBB256_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI256_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI256_0) - pcalau12i $a0, %pc_hi20(.LCPI256_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI256_1) - pcalau12i $a0, %pc_hi20(.LCPI256_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI256_2) - pcalau12i $a0, %pc_hi20(.LCPI256_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI256_4) - pcalau12i $a0, %pc_hi20(.LCPI256_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI256_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -104939,17 +106739,17 @@ _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB256_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB256_15: # Parent Loop BB256_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -104957,20 +106757,20 @@ _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us38 # in Loop: Header=BB256_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB256_13 # %bb.17: # in Loop: Header=BB256_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -104983,16 +106783,25 @@ _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB256_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI256_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI256_0) - pcalau12i $a0, %pc_hi20(.LCPI256_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI256_1) - pcalau12i $a0, %pc_hi20(.LCPI256_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI256_2) - pcalau12i $a0, %pc_hi20(.LCPI256_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI256_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -105011,9 +106820,9 @@ _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB256_19 # %bb.21: # in Loop: Header=BB256_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -105053,12 +106862,14 @@ _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB256_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI256_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI256_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -105098,22 +106909,8 @@ _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc, .Lfunc_end256-_Z28test_goto_loop_unroll_factorILi28EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc -.LCPI257_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI257_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI257_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI257_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI257_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI257_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc @@ -105175,19 +106972,31 @@ _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 104 addi.d $s7, $s1, 216 addi.d $s8, $s0, -27 - pcalau12i $a0, %pc_hi20(.LCPI257_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI257_0) - pcalau12i $a0, %pc_hi20(.LCPI257_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI257_1) - pcalau12i $a0, %pc_hi20(.LCPI257_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI257_2) - pcalau12i $a0, %pc_hi20(.LCPI257_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI257_4) - pcalau12i $a0, %pc_hi20(.LCPI257_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI257_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -105206,227 +107015,227 @@ _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB257_5: # Parent Loop BB257_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 27 addi.w $a0, $a0, 27 @@ -105444,12 +107253,12 @@ _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB257_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -105457,20 +107266,20 @@ _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB257_9: # %.loopexit.us # in Loop: Header=BB257_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB257_3 # %bb.10: # in Loop: Header=BB257_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -105482,19 +107291,31 @@ _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB257_11: # %.lr.ph.split blez $s0, .LBB257_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI257_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI257_0) - pcalau12i $a0, %pc_hi20(.LCPI257_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI257_1) - pcalau12i $a0, %pc_hi20(.LCPI257_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI257_2) - pcalau12i $a0, %pc_hi20(.LCPI257_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI257_4) - pcalau12i $a0, %pc_hi20(.LCPI257_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI257_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -105509,17 +107330,17 @@ _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB257_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB257_15: # Parent Loop BB257_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -105527,20 +107348,20 @@ _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us38 # in Loop: Header=BB257_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB257_13 # %bb.17: # in Loop: Header=BB257_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -105553,16 +107374,25 @@ _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB257_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI257_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI257_0) - pcalau12i $a0, %pc_hi20(.LCPI257_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI257_1) - pcalau12i $a0, %pc_hi20(.LCPI257_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI257_2) - pcalau12i $a0, %pc_hi20(.LCPI257_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI257_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -105581,9 +107411,9 @@ _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB257_19 # %bb.21: # in Loop: Header=BB257_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -105623,12 +107453,14 @@ _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB257_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI257_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI257_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -105668,22 +107500,8 @@ _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc, .Lfunc_end257-_Z28test_goto_loop_unroll_factorILi27EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc -.LCPI258_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI258_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI258_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI258_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI258_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI258_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc @@ -105745,19 +107563,31 @@ _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 104 addi.d $s7, $s1, 208 addi.d $s8, $s0, -26 - pcalau12i $a0, %pc_hi20(.LCPI258_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI258_0) - pcalau12i $a0, %pc_hi20(.LCPI258_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI258_1) - pcalau12i $a0, %pc_hi20(.LCPI258_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI258_2) - pcalau12i $a0, %pc_hi20(.LCPI258_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI258_4) - pcalau12i $a0, %pc_hi20(.LCPI258_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI258_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -105776,219 +107606,219 @@ _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB258_5: # Parent Loop BB258_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -104 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 26 addi.w $a0, $a0, 26 @@ -106006,12 +107836,12 @@ _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB258_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -106019,20 +107849,20 @@ _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB258_9: # %.loopexit.us # in Loop: Header=BB258_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB258_3 # %bb.10: # in Loop: Header=BB258_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -106044,19 +107874,31 @@ _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB258_11: # %.lr.ph.split blez $s0, .LBB258_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI258_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI258_0) - pcalau12i $a0, %pc_hi20(.LCPI258_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI258_1) - pcalau12i $a0, %pc_hi20(.LCPI258_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI258_2) - pcalau12i $a0, %pc_hi20(.LCPI258_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI258_4) - pcalau12i $a0, %pc_hi20(.LCPI258_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI258_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -106071,17 +107913,17 @@ _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB258_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB258_15: # Parent Loop BB258_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -106089,20 +107931,20 @@ _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us38 # in Loop: Header=BB258_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB258_13 # %bb.17: # in Loop: Header=BB258_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -106115,16 +107957,25 @@ _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB258_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI258_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI258_0) - pcalau12i $a0, %pc_hi20(.LCPI258_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI258_1) - pcalau12i $a0, %pc_hi20(.LCPI258_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI258_2) - pcalau12i $a0, %pc_hi20(.LCPI258_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI258_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -106143,9 +107994,9 @@ _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB258_19 # %bb.21: # in Loop: Header=BB258_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -106185,12 +108036,14 @@ _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB258_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI258_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI258_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -106230,22 +108083,8 @@ _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc, .Lfunc_end258-_Z28test_goto_loop_unroll_factorILi26EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc -.LCPI259_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI259_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI259_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI259_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI259_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI259_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc @@ -106307,19 +108146,31 @@ _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 96 addi.d $s7, $s1, 200 addi.d $s8, $s0, -25 - pcalau12i $a0, %pc_hi20(.LCPI259_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI259_0) - pcalau12i $a0, %pc_hi20(.LCPI259_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI259_1) - pcalau12i $a0, %pc_hi20(.LCPI259_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI259_2) - pcalau12i $a0, %pc_hi20(.LCPI259_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI259_4) - pcalau12i $a0, %pc_hi20(.LCPI259_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI259_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -106338,211 +108189,211 @@ _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB259_5: # Parent Loop BB259_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 25 addi.w $a0, $a0, 25 @@ -106560,12 +108411,12 @@ _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB259_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -106573,20 +108424,20 @@ _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB259_9: # %.loopexit.us # in Loop: Header=BB259_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB259_3 # %bb.10: # in Loop: Header=BB259_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -106598,19 +108449,31 @@ _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB259_11: # %.lr.ph.split blez $s0, .LBB259_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI259_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI259_0) - pcalau12i $a0, %pc_hi20(.LCPI259_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI259_1) - pcalau12i $a0, %pc_hi20(.LCPI259_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI259_2) - pcalau12i $a0, %pc_hi20(.LCPI259_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI259_4) - pcalau12i $a0, %pc_hi20(.LCPI259_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI259_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -106625,17 +108488,17 @@ _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB259_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB259_15: # Parent Loop BB259_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -106643,20 +108506,20 @@ _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us38 # in Loop: Header=BB259_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB259_13 # %bb.17: # in Loop: Header=BB259_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -106669,16 +108532,25 @@ _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB259_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI259_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI259_0) - pcalau12i $a0, %pc_hi20(.LCPI259_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI259_1) - pcalau12i $a0, %pc_hi20(.LCPI259_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI259_2) - pcalau12i $a0, %pc_hi20(.LCPI259_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI259_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -106697,9 +108569,9 @@ _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB259_19 # %bb.21: # in Loop: Header=BB259_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -106739,12 +108611,14 @@ _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB259_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI259_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI259_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -106784,22 +108658,8 @@ _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc, .Lfunc_end259-_Z28test_goto_loop_unroll_factorILi25EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc -.LCPI260_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI260_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI260_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI260_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI260_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI260_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc @@ -106861,19 +108721,31 @@ _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 96 addi.d $s7, $s1, 192 addi.d $s8, $s0, -24 - pcalau12i $a0, %pc_hi20(.LCPI260_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI260_0) - pcalau12i $a0, %pc_hi20(.LCPI260_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI260_1) - pcalau12i $a0, %pc_hi20(.LCPI260_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI260_2) - pcalau12i $a0, %pc_hi20(.LCPI260_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI260_4) - pcalau12i $a0, %pc_hi20(.LCPI260_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI260_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -106892,203 +108764,203 @@ _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB260_5: # Parent Loop BB260_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -96 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 24 addi.w $a0, $a0, 24 @@ -107106,12 +108978,12 @@ _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB260_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -107119,20 +108991,20 @@ _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB260_9: # %.loopexit.us # in Loop: Header=BB260_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB260_3 # %bb.10: # in Loop: Header=BB260_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -107144,19 +109016,31 @@ _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB260_11: # %.lr.ph.split blez $s0, .LBB260_18 # %bb.12: # %.preheader.us37.preheader - pcalau12i $a0, %pc_hi20(.LCPI260_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI260_0) - pcalau12i $a0, %pc_hi20(.LCPI260_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI260_1) - pcalau12i $a0, %pc_hi20(.LCPI260_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI260_2) - pcalau12i $a0, %pc_hi20(.LCPI260_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI260_4) - pcalau12i $a0, %pc_hi20(.LCPI260_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI260_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -107171,17 +109055,17 @@ _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB260_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB260_15: # Parent Loop BB260_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -107189,20 +109073,20 @@ _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us38 # in Loop: Header=BB260_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB260_13 # %bb.17: # in Loop: Header=BB260_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -107215,16 +109099,25 @@ _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB260_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI260_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI260_0) - pcalau12i $a0, %pc_hi20(.LCPI260_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI260_1) - pcalau12i $a0, %pc_hi20(.LCPI260_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI260_2) - pcalau12i $a0, %pc_hi20(.LCPI260_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI260_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -107243,9 +109136,9 @@ _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB260_19 # %bb.21: # in Loop: Header=BB260_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -107285,12 +109178,14 @@ _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB260_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI260_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI260_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -107330,22 +109225,8 @@ _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc, .Lfunc_end260-_Z28test_goto_loop_unroll_factorILi24EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc -.LCPI261_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI261_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI261_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI261_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI261_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI261_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc @@ -107407,19 +109288,31 @@ _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 88 addi.d $s7, $s1, 184 addi.d $s8, $s0, -23 - pcalau12i $a0, %pc_hi20(.LCPI261_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI261_0) - pcalau12i $a0, %pc_hi20(.LCPI261_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI261_1) - pcalau12i $a0, %pc_hi20(.LCPI261_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI261_2) - pcalau12i $a0, %pc_hi20(.LCPI261_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI261_4) - pcalau12i $a0, %pc_hi20(.LCPI261_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI261_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -107438,195 +109331,195 @@ _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB261_5: # Parent Loop BB261_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 23 addi.w $a0, $a0, 23 @@ -107644,12 +109537,12 @@ _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB261_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -107657,20 +109550,20 @@ _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB261_9: # %.loopexit.us # in Loop: Header=BB261_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB261_3 # %bb.10: # in Loop: Header=BB261_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -107682,19 +109575,31 @@ _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB261_11: # %.lr.ph.split blez $s0, .LBB261_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI261_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI261_0) - pcalau12i $a0, %pc_hi20(.LCPI261_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI261_1) - pcalau12i $a0, %pc_hi20(.LCPI261_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI261_2) - pcalau12i $a0, %pc_hi20(.LCPI261_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI261_4) - pcalau12i $a0, %pc_hi20(.LCPI261_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI261_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -107709,17 +109614,17 @@ _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB261_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB261_15: # Parent Loop BB261_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -107727,20 +109632,20 @@ _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us37 # in Loop: Header=BB261_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB261_13 # %bb.17: # in Loop: Header=BB261_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -107753,16 +109658,25 @@ _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB261_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI261_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI261_0) - pcalau12i $a0, %pc_hi20(.LCPI261_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI261_1) - pcalau12i $a0, %pc_hi20(.LCPI261_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI261_2) - pcalau12i $a0, %pc_hi20(.LCPI261_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI261_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -107781,9 +109695,9 @@ _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB261_19 # %bb.21: # in Loop: Header=BB261_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -107823,12 +109737,14 @@ _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB261_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI261_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI261_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -107868,22 +109784,8 @@ _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc, .Lfunc_end261-_Z28test_goto_loop_unroll_factorILi23EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc -.LCPI262_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI262_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI262_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI262_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI262_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI262_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc @@ -107945,19 +109847,31 @@ _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 88 addi.d $s7, $s1, 176 addi.d $s8, $s0, -22 - pcalau12i $a0, %pc_hi20(.LCPI262_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI262_0) - pcalau12i $a0, %pc_hi20(.LCPI262_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI262_1) - pcalau12i $a0, %pc_hi20(.LCPI262_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI262_2) - pcalau12i $a0, %pc_hi20(.LCPI262_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI262_4) - pcalau12i $a0, %pc_hi20(.LCPI262_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI262_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -107976,187 +109890,187 @@ _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB262_5: # Parent Loop BB262_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -88 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 22 addi.w $a0, $a0, 22 @@ -108174,12 +110088,12 @@ _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB262_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -108187,20 +110101,20 @@ _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB262_9: # %.loopexit.us # in Loop: Header=BB262_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB262_3 # %bb.10: # in Loop: Header=BB262_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -108212,19 +110126,31 @@ _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB262_11: # %.lr.ph.split blez $s0, .LBB262_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI262_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI262_0) - pcalau12i $a0, %pc_hi20(.LCPI262_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI262_1) - pcalau12i $a0, %pc_hi20(.LCPI262_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI262_2) - pcalau12i $a0, %pc_hi20(.LCPI262_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI262_4) - pcalau12i $a0, %pc_hi20(.LCPI262_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI262_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -108239,17 +110165,17 @@ _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB262_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB262_15: # Parent Loop BB262_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -108257,20 +110183,20 @@ _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us37 # in Loop: Header=BB262_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB262_13 # %bb.17: # in Loop: Header=BB262_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -108283,16 +110209,25 @@ _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB262_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI262_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI262_0) - pcalau12i $a0, %pc_hi20(.LCPI262_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI262_1) - pcalau12i $a0, %pc_hi20(.LCPI262_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI262_2) - pcalau12i $a0, %pc_hi20(.LCPI262_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI262_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -108311,9 +110246,9 @@ _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB262_19 # %bb.21: # in Loop: Header=BB262_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -108353,12 +110288,14 @@ _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB262_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI262_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI262_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -108398,22 +110335,8 @@ _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc, .Lfunc_end262-_Z28test_goto_loop_unroll_factorILi22EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc -.LCPI263_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI263_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI263_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI263_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI263_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI263_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc @@ -108475,19 +110398,31 @@ _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 80 addi.d $s7, $s1, 168 addi.d $s8, $s0, -21 - pcalau12i $a0, %pc_hi20(.LCPI263_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI263_0) - pcalau12i $a0, %pc_hi20(.LCPI263_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI263_1) - pcalau12i $a0, %pc_hi20(.LCPI263_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI263_2) - pcalau12i $a0, %pc_hi20(.LCPI263_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI263_4) - pcalau12i $a0, %pc_hi20(.LCPI263_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI263_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -108506,179 +110441,179 @@ _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB263_5: # Parent Loop BB263_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 21 addi.w $a0, $a0, 21 @@ -108696,12 +110631,12 @@ _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB263_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -108709,20 +110644,20 @@ _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB263_9: # %.loopexit.us # in Loop: Header=BB263_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB263_3 # %bb.10: # in Loop: Header=BB263_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -108734,19 +110669,31 @@ _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB263_11: # %.lr.ph.split blez $s0, .LBB263_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI263_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI263_0) - pcalau12i $a0, %pc_hi20(.LCPI263_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI263_1) - pcalau12i $a0, %pc_hi20(.LCPI263_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI263_2) - pcalau12i $a0, %pc_hi20(.LCPI263_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI263_4) - pcalau12i $a0, %pc_hi20(.LCPI263_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI263_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -108761,17 +110708,17 @@ _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB263_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB263_15: # Parent Loop BB263_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -108779,20 +110726,20 @@ _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us37 # in Loop: Header=BB263_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB263_13 # %bb.17: # in Loop: Header=BB263_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -108805,16 +110752,25 @@ _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB263_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI263_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI263_0) - pcalau12i $a0, %pc_hi20(.LCPI263_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI263_1) - pcalau12i $a0, %pc_hi20(.LCPI263_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI263_2) - pcalau12i $a0, %pc_hi20(.LCPI263_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI263_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -108833,9 +110789,9 @@ _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB263_19 # %bb.21: # in Loop: Header=BB263_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -108875,12 +110831,14 @@ _Z28test_goto_loop_unroll_factorILi21EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB263_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI263_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI263_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -108998,22 +110956,8 @@ _ZN15goto_loop_testsILi19EdE7do_testEPKdPKc: # @_ZN15goto_loop_testsILi19EdE7do_ .size _ZN15goto_loop_testsILi19EdE7do_testEPKdPKc, .Lfunc_end264-_ZN15goto_loop_testsILi19EdE7do_testEPKdPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc -.LCPI265_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI265_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI265_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI265_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI265_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI265_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc @@ -109075,19 +111019,31 @@ _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 80 addi.d $s7, $s1, 160 addi.d $s8, $s0, -20 - pcalau12i $a0, %pc_hi20(.LCPI265_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI265_0) - pcalau12i $a0, %pc_hi20(.LCPI265_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI265_1) - pcalau12i $a0, %pc_hi20(.LCPI265_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI265_2) - pcalau12i $a0, %pc_hi20(.LCPI265_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI265_4) - pcalau12i $a0, %pc_hi20(.LCPI265_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI265_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -109106,171 +111062,171 @@ _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB265_5: # Parent Loop BB265_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -80 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 20 addi.w $a0, $a0, 20 @@ -109288,12 +111244,12 @@ _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB265_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -109301,20 +111257,20 @@ _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB265_9: # %.loopexit.us # in Loop: Header=BB265_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB265_3 # %bb.10: # in Loop: Header=BB265_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -109326,19 +111282,31 @@ _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB265_11: # %.lr.ph.split blez $s0, .LBB265_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI265_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI265_0) - pcalau12i $a0, %pc_hi20(.LCPI265_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI265_1) - pcalau12i $a0, %pc_hi20(.LCPI265_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI265_2) - pcalau12i $a0, %pc_hi20(.LCPI265_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI265_4) - pcalau12i $a0, %pc_hi20(.LCPI265_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI265_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -109353,17 +111321,17 @@ _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB265_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB265_15: # Parent Loop BB265_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -109371,20 +111339,20 @@ _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us37 # in Loop: Header=BB265_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB265_13 # %bb.17: # in Loop: Header=BB265_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -109397,16 +111365,25 @@ _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB265_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI265_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI265_0) - pcalau12i $a0, %pc_hi20(.LCPI265_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI265_1) - pcalau12i $a0, %pc_hi20(.LCPI265_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI265_2) - pcalau12i $a0, %pc_hi20(.LCPI265_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI265_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -109425,9 +111402,9 @@ _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB265_19 # %bb.21: # in Loop: Header=BB265_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -109467,12 +111444,14 @@ _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB265_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI265_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI265_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -109512,22 +111491,8 @@ _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc, .Lfunc_end265-_Z28test_goto_loop_unroll_factorILi20EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc -.LCPI266_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI266_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI266_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI266_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI266_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI266_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc @@ -109589,19 +111554,31 @@ _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 72 addi.d $s7, $s1, 152 addi.d $s8, $s0, -19 - pcalau12i $a0, %pc_hi20(.LCPI266_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI266_0) - pcalau12i $a0, %pc_hi20(.LCPI266_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI266_1) - pcalau12i $a0, %pc_hi20(.LCPI266_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI266_2) - pcalau12i $a0, %pc_hi20(.LCPI266_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI266_4) - pcalau12i $a0, %pc_hi20(.LCPI266_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI266_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -109620,163 +111597,163 @@ _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB266_5: # Parent Loop BB266_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 19 addi.w $a0, $a0, 19 @@ -109794,12 +111771,12 @@ _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB266_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -109807,20 +111784,20 @@ _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB266_9: # %.loopexit.us # in Loop: Header=BB266_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB266_3 # %bb.10: # in Loop: Header=BB266_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -109832,19 +111809,31 @@ _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB266_11: # %.lr.ph.split blez $s0, .LBB266_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI266_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI266_0) - pcalau12i $a0, %pc_hi20(.LCPI266_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI266_1) - pcalau12i $a0, %pc_hi20(.LCPI266_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI266_2) - pcalau12i $a0, %pc_hi20(.LCPI266_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI266_4) - pcalau12i $a0, %pc_hi20(.LCPI266_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI266_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -109859,17 +111848,17 @@ _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB266_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB266_15: # Parent Loop BB266_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -109877,20 +111866,20 @@ _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us37 # in Loop: Header=BB266_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB266_13 # %bb.17: # in Loop: Header=BB266_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -109903,16 +111892,25 @@ _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB266_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI266_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI266_0) - pcalau12i $a0, %pc_hi20(.LCPI266_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI266_1) - pcalau12i $a0, %pc_hi20(.LCPI266_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI266_2) - pcalau12i $a0, %pc_hi20(.LCPI266_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI266_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -109931,9 +111929,9 @@ _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB266_19 # %bb.21: # in Loop: Header=BB266_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -109973,12 +111971,14 @@ _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB266_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI266_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI266_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -110018,22 +112018,8 @@ _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc, .Lfunc_end266-_Z28test_goto_loop_unroll_factorILi19EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc -.LCPI267_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI267_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI267_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI267_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI267_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI267_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc @@ -110095,19 +112081,31 @@ _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 72 addi.d $s7, $s1, 144 addi.d $s8, $s0, -18 - pcalau12i $a0, %pc_hi20(.LCPI267_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI267_0) - pcalau12i $a0, %pc_hi20(.LCPI267_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI267_1) - pcalau12i $a0, %pc_hi20(.LCPI267_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI267_2) - pcalau12i $a0, %pc_hi20(.LCPI267_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI267_4) - pcalau12i $a0, %pc_hi20(.LCPI267_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI267_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -110126,155 +112124,155 @@ _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB267_5: # Parent Loop BB267_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -72 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 18 addi.w $a0, $a0, 18 @@ -110292,12 +112290,12 @@ _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB267_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -110305,20 +112303,20 @@ _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB267_9: # %.loopexit.us # in Loop: Header=BB267_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB267_3 # %bb.10: # in Loop: Header=BB267_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -110330,19 +112328,31 @@ _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB267_11: # %.lr.ph.split blez $s0, .LBB267_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI267_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI267_0) - pcalau12i $a0, %pc_hi20(.LCPI267_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI267_1) - pcalau12i $a0, %pc_hi20(.LCPI267_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI267_2) - pcalau12i $a0, %pc_hi20(.LCPI267_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI267_4) - pcalau12i $a0, %pc_hi20(.LCPI267_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI267_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -110357,17 +112367,17 @@ _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB267_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB267_15: # Parent Loop BB267_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -110375,20 +112385,20 @@ _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us37 # in Loop: Header=BB267_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB267_13 # %bb.17: # in Loop: Header=BB267_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -110401,16 +112411,25 @@ _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB267_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI267_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI267_0) - pcalau12i $a0, %pc_hi20(.LCPI267_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI267_1) - pcalau12i $a0, %pc_hi20(.LCPI267_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI267_2) - pcalau12i $a0, %pc_hi20(.LCPI267_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI267_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -110429,9 +112448,9 @@ _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB267_19 # %bb.21: # in Loop: Header=BB267_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -110471,12 +112490,14 @@ _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB267_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI267_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI267_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -110516,22 +112537,8 @@ _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc, .Lfunc_end267-_Z28test_goto_loop_unroll_factorILi18EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc -.LCPI268_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI268_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI268_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI268_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI268_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI268_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc @@ -110593,19 +112600,31 @@ _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 64 addi.d $s7, $s1, 136 addi.d $s8, $s0, -17 - pcalau12i $a0, %pc_hi20(.LCPI268_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI268_0) - pcalau12i $a0, %pc_hi20(.LCPI268_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI268_1) - pcalau12i $a0, %pc_hi20(.LCPI268_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI268_2) - pcalau12i $a0, %pc_hi20(.LCPI268_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI268_4) - pcalau12i $a0, %pc_hi20(.LCPI268_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI268_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -110624,147 +112643,147 @@ _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB268_5: # Parent Loop BB268_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 17 addi.w $a0, $a0, 17 @@ -110782,12 +112801,12 @@ _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB268_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -110795,20 +112814,20 @@ _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB268_9: # %.loopexit.us # in Loop: Header=BB268_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB268_3 # %bb.10: # in Loop: Header=BB268_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -110820,19 +112839,31 @@ _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB268_11: # %.lr.ph.split blez $s0, .LBB268_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI268_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI268_0) - pcalau12i $a0, %pc_hi20(.LCPI268_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI268_1) - pcalau12i $a0, %pc_hi20(.LCPI268_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI268_2) - pcalau12i $a0, %pc_hi20(.LCPI268_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI268_4) - pcalau12i $a0, %pc_hi20(.LCPI268_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI268_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -110847,17 +112878,17 @@ _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB268_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB268_15: # Parent Loop BB268_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -110865,20 +112896,20 @@ _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us37 # in Loop: Header=BB268_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB268_13 # %bb.17: # in Loop: Header=BB268_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -110891,16 +112922,25 @@ _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB268_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI268_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI268_0) - pcalau12i $a0, %pc_hi20(.LCPI268_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI268_1) - pcalau12i $a0, %pc_hi20(.LCPI268_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI268_2) - pcalau12i $a0, %pc_hi20(.LCPI268_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI268_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -110919,9 +112959,9 @@ _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB268_19 # %bb.21: # in Loop: Header=BB268_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -110961,12 +113001,14 @@ _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB268_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI268_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI268_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -111006,22 +113048,8 @@ _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc, .Lfunc_end268-_Z28test_goto_loop_unroll_factorILi17EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc -.LCPI269_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI269_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI269_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI269_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI269_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI269_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc @@ -111083,19 +113111,31 @@ _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 64 addi.d $s7, $s1, 128 addi.d $s8, $s0, -16 - pcalau12i $a0, %pc_hi20(.LCPI269_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI269_0) - pcalau12i $a0, %pc_hi20(.LCPI269_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI269_1) - pcalau12i $a0, %pc_hi20(.LCPI269_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI269_2) - pcalau12i $a0, %pc_hi20(.LCPI269_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI269_4) - pcalau12i $a0, %pc_hi20(.LCPI269_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI269_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -111114,139 +113154,139 @@ _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB269_5: # Parent Loop BB269_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -64 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 16 addi.w $a0, $a0, 16 @@ -111264,12 +113304,12 @@ _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB269_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -111277,20 +113317,20 @@ _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB269_9: # %.loopexit.us # in Loop: Header=BB269_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB269_3 # %bb.10: # in Loop: Header=BB269_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -111302,19 +113342,31 @@ _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB269_11: # %.lr.ph.split blez $s0, .LBB269_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI269_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI269_0) - pcalau12i $a0, %pc_hi20(.LCPI269_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI269_1) - pcalau12i $a0, %pc_hi20(.LCPI269_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI269_2) - pcalau12i $a0, %pc_hi20(.LCPI269_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI269_4) - pcalau12i $a0, %pc_hi20(.LCPI269_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI269_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -111329,17 +113381,17 @@ _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB269_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB269_15: # Parent Loop BB269_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -111347,20 +113399,20 @@ _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us37 # in Loop: Header=BB269_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB269_13 # %bb.17: # in Loop: Header=BB269_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -111373,16 +113425,25 @@ _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB269_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI269_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI269_0) - pcalau12i $a0, %pc_hi20(.LCPI269_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI269_1) - pcalau12i $a0, %pc_hi20(.LCPI269_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI269_2) - pcalau12i $a0, %pc_hi20(.LCPI269_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI269_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -111401,9 +113462,9 @@ _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB269_19 # %bb.21: # in Loop: Header=BB269_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -111443,12 +113504,14 @@ _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB269_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI269_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI269_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -111488,22 +113551,8 @@ _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc, .Lfunc_end269-_Z28test_goto_loop_unroll_factorILi16EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc -.LCPI270_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI270_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI270_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI270_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI270_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI270_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc @@ -111565,19 +113614,31 @@ _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 56 addi.d $s7, $s1, 120 addi.d $s8, $s0, -15 - pcalau12i $a0, %pc_hi20(.LCPI270_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI270_0) - pcalau12i $a0, %pc_hi20(.LCPI270_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI270_1) - pcalau12i $a0, %pc_hi20(.LCPI270_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI270_2) - pcalau12i $a0, %pc_hi20(.LCPI270_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI270_4) - pcalau12i $a0, %pc_hi20(.LCPI270_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI270_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -111596,131 +113657,131 @@ _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB270_5: # Parent Loop BB270_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 15 addi.w $a0, $a0, 15 @@ -111738,12 +113799,12 @@ _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB270_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -111751,20 +113812,20 @@ _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB270_9: # %.loopexit.us # in Loop: Header=BB270_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB270_3 # %bb.10: # in Loop: Header=BB270_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -111776,19 +113837,31 @@ _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB270_11: # %.lr.ph.split blez $s0, .LBB270_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI270_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI270_0) - pcalau12i $a0, %pc_hi20(.LCPI270_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI270_1) - pcalau12i $a0, %pc_hi20(.LCPI270_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI270_2) - pcalau12i $a0, %pc_hi20(.LCPI270_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI270_4) - pcalau12i $a0, %pc_hi20(.LCPI270_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI270_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -111803,17 +113876,17 @@ _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB270_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB270_15: # Parent Loop BB270_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -111821,20 +113894,20 @@ _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us37 # in Loop: Header=BB270_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB270_13 # %bb.17: # in Loop: Header=BB270_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -111847,16 +113920,25 @@ _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB270_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI270_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI270_0) - pcalau12i $a0, %pc_hi20(.LCPI270_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI270_1) - pcalau12i $a0, %pc_hi20(.LCPI270_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI270_2) - pcalau12i $a0, %pc_hi20(.LCPI270_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI270_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -111875,9 +113957,9 @@ _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB270_19 # %bb.21: # in Loop: Header=BB270_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -111917,12 +113999,14 @@ _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB270_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI270_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI270_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -111962,22 +114046,8 @@ _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc, .Lfunc_end270-_Z28test_goto_loop_unroll_factorILi15EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc -.LCPI271_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI271_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI271_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI271_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI271_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI271_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc @@ -112039,19 +114109,31 @@ _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 56 addi.d $s7, $s1, 112 addi.d $s8, $s0, -14 - pcalau12i $a0, %pc_hi20(.LCPI271_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI271_0) - pcalau12i $a0, %pc_hi20(.LCPI271_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI271_1) - pcalau12i $a0, %pc_hi20(.LCPI271_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI271_2) - pcalau12i $a0, %pc_hi20(.LCPI271_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI271_4) - pcalau12i $a0, %pc_hi20(.LCPI271_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI271_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -112070,123 +114152,123 @@ _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB271_5: # Parent Loop BB271_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -56 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 14 addi.w $a0, $a0, 14 @@ -112204,12 +114286,12 @@ _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB271_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -112217,20 +114299,20 @@ _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB271_9: # %.loopexit.us # in Loop: Header=BB271_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB271_3 # %bb.10: # in Loop: Header=BB271_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -112242,19 +114324,31 @@ _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB271_11: # %.lr.ph.split blez $s0, .LBB271_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI271_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI271_0) - pcalau12i $a0, %pc_hi20(.LCPI271_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI271_1) - pcalau12i $a0, %pc_hi20(.LCPI271_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI271_2) - pcalau12i $a0, %pc_hi20(.LCPI271_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI271_4) - pcalau12i $a0, %pc_hi20(.LCPI271_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI271_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -112269,17 +114363,17 @@ _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB271_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB271_15: # Parent Loop BB271_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -112287,20 +114381,20 @@ _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us37 # in Loop: Header=BB271_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB271_13 # %bb.17: # in Loop: Header=BB271_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -112313,16 +114407,25 @@ _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB271_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI271_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI271_0) - pcalau12i $a0, %pc_hi20(.LCPI271_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI271_1) - pcalau12i $a0, %pc_hi20(.LCPI271_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI271_2) - pcalau12i $a0, %pc_hi20(.LCPI271_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI271_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -112341,9 +114444,9 @@ _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB271_19 # %bb.21: # in Loop: Header=BB271_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -112383,12 +114486,14 @@ _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB271_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI271_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI271_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -112428,22 +114533,8 @@ _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc, .Lfunc_end271-_Z28test_goto_loop_unroll_factorILi14EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc -.LCPI272_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI272_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI272_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI272_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI272_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI272_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc @@ -112505,19 +114596,31 @@ _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 48 addi.d $s7, $s1, 104 addi.d $s8, $s0, -13 - pcalau12i $a0, %pc_hi20(.LCPI272_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI272_0) - pcalau12i $a0, %pc_hi20(.LCPI272_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI272_1) - pcalau12i $a0, %pc_hi20(.LCPI272_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI272_2) - pcalau12i $a0, %pc_hi20(.LCPI272_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI272_4) - pcalau12i $a0, %pc_hi20(.LCPI272_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI272_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -112536,115 +114639,115 @@ _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB272_5: # Parent Loop BB272_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 13 addi.w $a0, $a0, 13 @@ -112662,12 +114765,12 @@ _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB272_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -112675,20 +114778,20 @@ _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB272_9: # %.loopexit.us # in Loop: Header=BB272_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB272_3 # %bb.10: # in Loop: Header=BB272_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -112700,19 +114803,31 @@ _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB272_11: # %.lr.ph.split blez $s0, .LBB272_18 # %bb.12: # %.preheader.us36.preheader - pcalau12i $a0, %pc_hi20(.LCPI272_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI272_0) - pcalau12i $a0, %pc_hi20(.LCPI272_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI272_1) - pcalau12i $a0, %pc_hi20(.LCPI272_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI272_2) - pcalau12i $a0, %pc_hi20(.LCPI272_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI272_4) - pcalau12i $a0, %pc_hi20(.LCPI272_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI272_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -112727,17 +114842,17 @@ _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB272_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB272_15: # Parent Loop BB272_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -112745,20 +114860,20 @@ _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us37 # in Loop: Header=BB272_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB272_13 # %bb.17: # in Loop: Header=BB272_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -112771,16 +114886,25 @@ _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB272_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI272_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI272_0) - pcalau12i $a0, %pc_hi20(.LCPI272_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI272_1) - pcalau12i $a0, %pc_hi20(.LCPI272_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI272_2) - pcalau12i $a0, %pc_hi20(.LCPI272_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI272_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -112799,9 +114923,9 @@ _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB272_19 # %bb.21: # in Loop: Header=BB272_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -112841,12 +114965,14 @@ _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB272_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI272_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI272_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -112886,22 +115012,8 @@ _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc, .Lfunc_end272-_Z28test_goto_loop_unroll_factorILi13EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc -.LCPI273_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI273_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI273_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI273_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI273_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI273_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc @@ -112963,19 +115075,31 @@ _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 48 addi.d $s7, $s1, 96 addi.d $s8, $s0, -12 - pcalau12i $a0, %pc_hi20(.LCPI273_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI273_0) - pcalau12i $a0, %pc_hi20(.LCPI273_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI273_1) - pcalau12i $a0, %pc_hi20(.LCPI273_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI273_2) - pcalau12i $a0, %pc_hi20(.LCPI273_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI273_4) - pcalau12i $a0, %pc_hi20(.LCPI273_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI273_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -112994,107 +115118,107 @@ _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB273_5: # Parent Loop BB273_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -48 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 12 addi.w $a0, $a0, 12 @@ -113112,12 +115236,12 @@ _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB273_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -113125,20 +115249,20 @@ _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB273_9: # %.loopexit.us # in Loop: Header=BB273_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB273_3 # %bb.10: # in Loop: Header=BB273_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -113150,19 +115274,31 @@ _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB273_11: # %.lr.ph.split blez $s0, .LBB273_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI273_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI273_0) - pcalau12i $a0, %pc_hi20(.LCPI273_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI273_1) - pcalau12i $a0, %pc_hi20(.LCPI273_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI273_2) - pcalau12i $a0, %pc_hi20(.LCPI273_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI273_4) - pcalau12i $a0, %pc_hi20(.LCPI273_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI273_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -113177,17 +115313,17 @@ _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB273_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB273_15: # Parent Loop BB273_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -113195,20 +115331,20 @@ _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us36 # in Loop: Header=BB273_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB273_13 # %bb.17: # in Loop: Header=BB273_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -113221,16 +115357,25 @@ _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB273_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI273_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI273_0) - pcalau12i $a0, %pc_hi20(.LCPI273_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI273_1) - pcalau12i $a0, %pc_hi20(.LCPI273_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI273_2) - pcalau12i $a0, %pc_hi20(.LCPI273_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI273_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -113249,9 +115394,9 @@ _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB273_19 # %bb.21: # in Loop: Header=BB273_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -113291,12 +115436,14 @@ _Z28test_goto_loop_unroll_factorILi12EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB273_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI273_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI273_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -113417,22 +115564,8 @@ _ZN15goto_loop_testsILi10EdE7do_testEPKdPKc: # @_ZN15goto_loop_testsILi10EdE7do_ .size _ZN15goto_loop_testsILi10EdE7do_testEPKdPKc, .Lfunc_end274-_ZN15goto_loop_testsILi10EdE7do_testEPKdPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc -.LCPI275_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI275_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI275_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI275_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI275_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI275_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc @@ -113494,19 +115627,31 @@ _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 40 addi.d $s7, $s1, 88 addi.d $s8, $s0, -11 - pcalau12i $a0, %pc_hi20(.LCPI275_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI275_0) - pcalau12i $a0, %pc_hi20(.LCPI275_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI275_1) - pcalau12i $a0, %pc_hi20(.LCPI275_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI275_2) - pcalau12i $a0, %pc_hi20(.LCPI275_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI275_4) - pcalau12i $a0, %pc_hi20(.LCPI275_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI275_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -516992 + lu52i.d $a1, $a1, 1036 + movgr2fr.d $fs1, $a1 vldi $vr3, -854 + ori $a1, $zero, 0 + lu32i.d $a1, -268678 + lu52i.d $a1, $a1, 1042 + movgr2fr.d $fs2, $a1 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -113525,99 +115670,99 @@ _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $zero move $a3, $s7 move $a1, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB275_5: # Parent Loop BB275_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a1, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a1, 40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a5, $a2 move $a4, $a3 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, 11 addi.w $a0, $a0, 11 @@ -113635,12 +115780,12 @@ _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB275_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a4, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a4, $a4, 8 @@ -113648,20 +115793,20 @@ _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB275_9: # %.loopexit.us # in Loop: Header=BB275_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB275_3 # %bb.10: # in Loop: Header=BB275_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -113673,19 +115818,31 @@ _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB275_11: # %.lr.ph.split blez $s0, .LBB275_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI275_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI275_0) - pcalau12i $a0, %pc_hi20(.LCPI275_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI275_1) - pcalau12i $a0, %pc_hi20(.LCPI275_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI275_2) - pcalau12i $a0, %pc_hi20(.LCPI275_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI275_4) - pcalau12i $a0, %pc_hi20(.LCPI275_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI275_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -113700,17 +115857,17 @@ _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB275_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB275_15: # Parent Loop BB275_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -113718,20 +115875,20 @@ _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us36 # in Loop: Header=BB275_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB275_13 # %bb.17: # in Loop: Header=BB275_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -113744,16 +115901,25 @@ _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB275_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI275_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI275_0) - pcalau12i $a0, %pc_hi20(.LCPI275_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI275_1) - pcalau12i $a0, %pc_hi20(.LCPI275_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI275_2) - pcalau12i $a0, %pc_hi20(.LCPI275_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI275_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -113772,9 +115938,9 @@ _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB275_19 # %bb.21: # in Loop: Header=BB275_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -113814,12 +115980,14 @@ _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB275_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI275_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI275_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -113859,22 +116027,8 @@ _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc, .Lfunc_end275-_Z28test_goto_loop_unroll_factorILi11EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc -.LCPI276_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI276_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI276_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI276_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI276_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI276_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc @@ -113936,19 +116090,31 @@ _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll addi.d $s6, $s1, 40 addi.d $s7, $s1, 80 addi.d $s8, $s0, -10 - pcalau12i $a0, %pc_hi20(.LCPI276_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI276_0) - pcalau12i $a0, %pc_hi20(.LCPI276_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI276_1) - pcalau12i $a0, %pc_hi20(.LCPI276_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI276_2) - pcalau12i $a0, %pc_hi20(.LCPI276_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI276_4) - pcalau12i $a0, %pc_hi20(.LCPI276_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI276_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -113966,91 +116132,91 @@ _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a3, $zero move $a4, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB276_5: # Parent Loop BB276_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -40 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a3 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a3, $a3, 10 addi.w $a0, $a0, 10 @@ -114068,12 +116234,12 @@ _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Parent Loop BB276_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a5, $a5, 8 @@ -114081,20 +116247,20 @@ _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB276_9: # %.loopexit.us # in Loop: Header=BB276_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB276_3 # %bb.10: # in Loop: Header=BB276_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -114107,19 +116273,31 @@ _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB276_11: # %.lr.ph.split blez $s0, .LBB276_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI276_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI276_0) - pcalau12i $a0, %pc_hi20(.LCPI276_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI276_1) - pcalau12i $a0, %pc_hi20(.LCPI276_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI276_2) - pcalau12i $a0, %pc_hi20(.LCPI276_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI276_4) - pcalau12i $a0, %pc_hi20(.LCPI276_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI276_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -114134,17 +116312,17 @@ _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # Child Loop BB276_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB276_15: # Parent Loop BB276_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -114152,20 +116330,20 @@ _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll # %bb.16: # %.loopexit.us36 # in Loop: Header=BB276_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB276_13 # %bb.17: # in Loop: Header=BB276_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -114178,16 +116356,25 @@ _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .LBB276_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI276_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI276_0) - pcalau12i $a0, %pc_hi20(.LCPI276_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI276_1) - pcalau12i $a0, %pc_hi20(.LCPI276_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI276_2) - pcalau12i $a0, %pc_hi20(.LCPI276_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI276_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -114206,9 +116393,9 @@ _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB276_19 # %bb.21: # in Loop: Header=BB276_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -114248,12 +116435,14 @@ _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB276_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI276_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI276_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -114293,22 +116482,8 @@ _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll .size _Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc, .Lfunc_end276-_Z28test_goto_loop_unroll_factorILi10EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc -.LCPI277_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI277_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI277_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI277_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI277_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI277_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc @@ -114370,19 +116545,31 @@ _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ addi.d $s6, $s1, 32 addi.d $s7, $s1, 72 addi.d $s8, $s0, -9 - pcalau12i $a0, %pc_hi20(.LCPI277_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI277_0) - pcalau12i $a0, %pc_hi20(.LCPI277_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI277_1) - pcalau12i $a0, %pc_hi20(.LCPI277_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI277_2) - pcalau12i $a0, %pc_hi20(.LCPI277_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI277_4) - pcalau12i $a0, %pc_hi20(.LCPI277_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI277_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -114400,83 +116587,83 @@ _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a3, $zero move $a4, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB277_5: # Parent Loop BB277_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a3 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a3, $a3, 9 addi.w $a0, $a0, 9 @@ -114494,12 +116681,12 @@ _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Parent Loop BB277_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a5, $a5, 8 @@ -114507,20 +116694,20 @@ _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB277_9: # %.loopexit.us # in Loop: Header=BB277_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB277_3 # %bb.10: # in Loop: Header=BB277_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -114533,19 +116720,31 @@ _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB277_11: # %.lr.ph.split blez $s0, .LBB277_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI277_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI277_0) - pcalau12i $a0, %pc_hi20(.LCPI277_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI277_1) - pcalau12i $a0, %pc_hi20(.LCPI277_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI277_2) - pcalau12i $a0, %pc_hi20(.LCPI277_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI277_4) - pcalau12i $a0, %pc_hi20(.LCPI277_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI277_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -114560,17 +116759,17 @@ _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Child Loop BB277_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB277_15: # Parent Loop BB277_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -114578,20 +116777,20 @@ _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # %bb.16: # %.loopexit.us36 # in Loop: Header=BB277_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB277_13 # %bb.17: # in Loop: Header=BB277_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -114604,16 +116803,25 @@ _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB277_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI277_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI277_0) - pcalau12i $a0, %pc_hi20(.LCPI277_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI277_1) - pcalau12i $a0, %pc_hi20(.LCPI277_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI277_2) - pcalau12i $a0, %pc_hi20(.LCPI277_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI277_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -114632,9 +116840,9 @@ _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB277_19 # %bb.21: # in Loop: Header=BB277_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -114674,12 +116882,14 @@ _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB277_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI277_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI277_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -114719,22 +116929,8 @@ _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc, .Lfunc_end277-_Z28test_goto_loop_unroll_factorILi9EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc -.LCPI278_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI278_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI278_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI278_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI278_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI278_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc @@ -114796,19 +116992,31 @@ _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ addi.d $s6, $s1, 32 addi.d $s7, $s1, 64 addi.d $s8, $s0, -8 - pcalau12i $a0, %pc_hi20(.LCPI278_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI278_0) - pcalau12i $a0, %pc_hi20(.LCPI278_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI278_1) - pcalau12i $a0, %pc_hi20(.LCPI278_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI278_2) - pcalau12i $a0, %pc_hi20(.LCPI278_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI278_4) - pcalau12i $a0, %pc_hi20(.LCPI278_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI278_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -114826,75 +117034,75 @@ _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a3, $zero move $a4, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB278_5: # Parent Loop BB278_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -32 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a3 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a3, $a3, 8 addi.w $a0, $a0, 8 @@ -114912,12 +117120,12 @@ _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Parent Loop BB278_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a5, $a5, 8 @@ -114925,20 +117133,20 @@ _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB278_9: # %.loopexit.us # in Loop: Header=BB278_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB278_3 # %bb.10: # in Loop: Header=BB278_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -114951,19 +117159,31 @@ _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB278_11: # %.lr.ph.split blez $s0, .LBB278_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI278_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI278_0) - pcalau12i $a0, %pc_hi20(.LCPI278_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI278_1) - pcalau12i $a0, %pc_hi20(.LCPI278_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI278_2) - pcalau12i $a0, %pc_hi20(.LCPI278_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI278_4) - pcalau12i $a0, %pc_hi20(.LCPI278_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI278_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -114978,17 +117198,17 @@ _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Child Loop BB278_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB278_15: # Parent Loop BB278_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -114996,20 +117216,20 @@ _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # %bb.16: # %.loopexit.us36 # in Loop: Header=BB278_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB278_13 # %bb.17: # in Loop: Header=BB278_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -115022,16 +117242,25 @@ _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB278_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI278_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI278_0) - pcalau12i $a0, %pc_hi20(.LCPI278_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI278_1) - pcalau12i $a0, %pc_hi20(.LCPI278_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI278_2) - pcalau12i $a0, %pc_hi20(.LCPI278_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI278_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -115050,9 +117279,9 @@ _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB278_19 # %bb.21: # in Loop: Header=BB278_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -115092,12 +117321,14 @@ _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB278_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI278_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI278_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -115137,22 +117368,8 @@ _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc, .Lfunc_end278-_Z28test_goto_loop_unroll_factorILi8EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc -.LCPI279_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI279_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI279_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI279_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI279_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI279_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc @@ -115214,19 +117431,31 @@ _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ addi.d $s6, $s1, 24 addi.d $s7, $s1, 56 addi.d $s8, $s0, -7 - pcalau12i $a0, %pc_hi20(.LCPI279_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI279_0) - pcalau12i $a0, %pc_hi20(.LCPI279_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI279_1) - pcalau12i $a0, %pc_hi20(.LCPI279_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI279_2) - pcalau12i $a0, %pc_hi20(.LCPI279_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI279_4) - pcalau12i $a0, %pc_hi20(.LCPI279_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI279_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -115244,67 +117473,67 @@ _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a3, $zero move $a4, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB279_5: # Parent Loop BB279_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a3 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a3, $a3, 7 addi.w $a0, $a0, 7 @@ -115322,12 +117551,12 @@ _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Parent Loop BB279_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a5, $a5, 8 @@ -115335,20 +117564,20 @@ _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB279_9: # %.loopexit.us # in Loop: Header=BB279_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB279_3 # %bb.10: # in Loop: Header=BB279_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -115361,19 +117590,31 @@ _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB279_11: # %.lr.ph.split blez $s0, .LBB279_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI279_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI279_0) - pcalau12i $a0, %pc_hi20(.LCPI279_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI279_1) - pcalau12i $a0, %pc_hi20(.LCPI279_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI279_2) - pcalau12i $a0, %pc_hi20(.LCPI279_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI279_4) - pcalau12i $a0, %pc_hi20(.LCPI279_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI279_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -115388,17 +117629,17 @@ _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Child Loop BB279_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB279_15: # Parent Loop BB279_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -115406,20 +117647,20 @@ _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # %bb.16: # %.loopexit.us36 # in Loop: Header=BB279_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB279_13 # %bb.17: # in Loop: Header=BB279_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -115432,16 +117673,25 @@ _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB279_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI279_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI279_0) - pcalau12i $a0, %pc_hi20(.LCPI279_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI279_1) - pcalau12i $a0, %pc_hi20(.LCPI279_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI279_2) - pcalau12i $a0, %pc_hi20(.LCPI279_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI279_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -115460,9 +117710,9 @@ _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB279_19 # %bb.21: # in Loop: Header=BB279_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -115502,12 +117752,14 @@ _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB279_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI279_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI279_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -115547,22 +117799,8 @@ _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc, .Lfunc_end279-_Z28test_goto_loop_unroll_factorILi7EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc -.LCPI280_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI280_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI280_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI280_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI280_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI280_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc @@ -115624,19 +117862,31 @@ _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ addi.d $s6, $s1, 24 addi.d $s7, $s1, 48 addi.d $s8, $s0, -6 - pcalau12i $a0, %pc_hi20(.LCPI280_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI280_0) - pcalau12i $a0, %pc_hi20(.LCPI280_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI280_1) - pcalau12i $a0, %pc_hi20(.LCPI280_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI280_2) - pcalau12i $a0, %pc_hi20(.LCPI280_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI280_4) - pcalau12i $a0, %pc_hi20(.LCPI280_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI280_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -115654,59 +117904,59 @@ _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a3, $zero move $a4, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB280_5: # Parent Loop BB280_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -24 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a3 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a3, $a3, 6 addi.d $a2, $a2, 48 @@ -115724,12 +117974,12 @@ _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Parent Loop BB280_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a5, $a5, 8 @@ -115737,20 +117987,20 @@ _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB280_9: # %.loopexit.us # in Loop: Header=BB280_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB280_3 # %bb.10: # in Loop: Header=BB280_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -115763,19 +118013,31 @@ _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB280_11: # %.lr.ph.split blez $s0, .LBB280_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI280_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI280_0) - pcalau12i $a0, %pc_hi20(.LCPI280_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI280_1) - pcalau12i $a0, %pc_hi20(.LCPI280_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI280_2) - pcalau12i $a0, %pc_hi20(.LCPI280_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI280_4) - pcalau12i $a0, %pc_hi20(.LCPI280_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI280_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -115790,17 +118052,17 @@ _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Child Loop BB280_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB280_15: # Parent Loop BB280_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -115808,20 +118070,20 @@ _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # %bb.16: # %.loopexit.us36 # in Loop: Header=BB280_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB280_13 # %bb.17: # in Loop: Header=BB280_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -115834,16 +118096,25 @@ _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB280_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI280_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI280_0) - pcalau12i $a0, %pc_hi20(.LCPI280_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI280_1) - pcalau12i $a0, %pc_hi20(.LCPI280_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI280_2) - pcalau12i $a0, %pc_hi20(.LCPI280_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI280_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -115862,9 +118133,9 @@ _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB280_19 # %bb.21: # in Loop: Header=BB280_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -115904,12 +118175,14 @@ _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB280_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI280_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI280_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -115949,22 +118222,8 @@ _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc, .Lfunc_end280-_Z28test_goto_loop_unroll_factorILi6EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc -.LCPI281_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI281_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI281_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI281_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI281_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI281_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc @@ -116026,19 +118285,31 @@ _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ addi.d $s6, $s1, 16 addi.d $s7, $s1, 40 addi.d $s8, $s0, -5 - pcalau12i $a0, %pc_hi20(.LCPI281_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI281_0) - pcalau12i $a0, %pc_hi20(.LCPI281_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI281_1) - pcalau12i $a0, %pc_hi20(.LCPI281_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI281_2) - pcalau12i $a0, %pc_hi20(.LCPI281_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI281_4) - pcalau12i $a0, %pc_hi20(.LCPI281_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI281_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -116056,51 +118327,51 @@ _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a3, $zero move $a4, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB281_5: # Parent Loop BB281_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a3 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a3, $a3, 5 addi.d $a2, $a2, 40 @@ -116118,12 +118389,12 @@ _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Parent Loop BB281_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a5, $a5, 8 @@ -116131,20 +118402,20 @@ _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB281_9: # %.loopexit.us # in Loop: Header=BB281_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB281_3 # %bb.10: # in Loop: Header=BB281_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -116157,19 +118428,31 @@ _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB281_11: # %.lr.ph.split blez $s0, .LBB281_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI281_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI281_0) - pcalau12i $a0, %pc_hi20(.LCPI281_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI281_1) - pcalau12i $a0, %pc_hi20(.LCPI281_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI281_2) - pcalau12i $a0, %pc_hi20(.LCPI281_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI281_4) - pcalau12i $a0, %pc_hi20(.LCPI281_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI281_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -116184,17 +118467,17 @@ _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Child Loop BB281_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB281_15: # Parent Loop BB281_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -116202,20 +118485,20 @@ _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # %bb.16: # %.loopexit.us36 # in Loop: Header=BB281_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB281_13 # %bb.17: # in Loop: Header=BB281_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -116228,16 +118511,25 @@ _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB281_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI281_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI281_0) - pcalau12i $a0, %pc_hi20(.LCPI281_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI281_1) - pcalau12i $a0, %pc_hi20(.LCPI281_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI281_2) - pcalau12i $a0, %pc_hi20(.LCPI281_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI281_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -116256,9 +118548,9 @@ _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB281_19 # %bb.21: # in Loop: Header=BB281_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -116298,12 +118590,14 @@ _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB281_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI281_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI281_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -116343,22 +118637,8 @@ _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc, .Lfunc_end281-_Z28test_goto_loop_unroll_factorILi5EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc -.LCPI282_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI282_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI282_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI282_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI282_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI282_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc @@ -116420,19 +118700,31 @@ _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ addi.d $s6, $s1, 16 addi.d $s7, $s1, 32 addi.d $s8, $s0, -4 - pcalau12i $a0, %pc_hi20(.LCPI282_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI282_0) - pcalau12i $a0, %pc_hi20(.LCPI282_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI282_1) - pcalau12i $a0, %pc_hi20(.LCPI282_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI282_2) - pcalau12i $a0, %pc_hi20(.LCPI282_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI282_4) - pcalau12i $a0, %pc_hi20(.LCPI282_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI282_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -116450,43 +118742,43 @@ _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a3, $zero move $a4, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB282_5: # Parent Loop BB282_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a3 move $a5, $a4 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a3, $a3, 4 addi.d $a2, $a2, 32 @@ -116504,12 +118796,12 @@ _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Parent Loop BB282_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a5, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a5, $a5, 8 @@ -116517,20 +118809,20 @@ _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB282_9: # %.loopexit.us # in Loop: Header=BB282_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB282_3 # %bb.10: # in Loop: Header=BB282_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -116543,19 +118835,31 @@ _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB282_11: # %.lr.ph.split blez $s0, .LBB282_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI282_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI282_0) - pcalau12i $a0, %pc_hi20(.LCPI282_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI282_1) - pcalau12i $a0, %pc_hi20(.LCPI282_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI282_2) - pcalau12i $a0, %pc_hi20(.LCPI282_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI282_4) - pcalau12i $a0, %pc_hi20(.LCPI282_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI282_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -116570,17 +118874,17 @@ _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Child Loop BB282_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB282_15: # Parent Loop BB282_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -116588,20 +118892,20 @@ _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # %bb.16: # %.loopexit.us36 # in Loop: Header=BB282_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB282_13 # %bb.17: # in Loop: Header=BB282_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -116614,16 +118918,25 @@ _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB282_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI282_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI282_0) - pcalau12i $a0, %pc_hi20(.LCPI282_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI282_1) - pcalau12i $a0, %pc_hi20(.LCPI282_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI282_2) - pcalau12i $a0, %pc_hi20(.LCPI282_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI282_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -116642,9 +118955,9 @@ _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB282_19 # %bb.21: # in Loop: Header=BB282_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -116684,12 +118997,14 @@ _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB282_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI282_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI282_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -116729,22 +119044,8 @@ _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc, .Lfunc_end282-_Z28test_goto_loop_unroll_factorILi4EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc -.LCPI283_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI283_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI283_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI283_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI283_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI283_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc @@ -116806,19 +119107,31 @@ _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ addi.d $s6, $s1, 16 addi.d $s7, $s1, 24 addi.d $s8, $s0, -3 - pcalau12i $a0, %pc_hi20(.LCPI283_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI283_0) - pcalau12i $a0, %pc_hi20(.LCPI283_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI283_1) - pcalau12i $a0, %pc_hi20(.LCPI283_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI283_2) - pcalau12i $a0, %pc_hi20(.LCPI283_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI283_4) - pcalau12i $a0, %pc_hi20(.LCPI283_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI283_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -116836,35 +119149,35 @@ _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a4, $zero move $a5, $s7 move $a2, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB283_5: # Parent Loop BB283_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, -16 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a4 move $a3, $a5 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a4, $a4, 3 addi.d $a2, $a2, 24 @@ -116882,12 +119195,12 @@ _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Parent Loop BB283_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a3, $a3, 8 @@ -116895,20 +119208,20 @@ _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB283_9: # %.loopexit.us # in Loop: Header=BB283_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB283_3 # %bb.10: # in Loop: Header=BB283_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -116921,19 +119234,31 @@ _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB283_11: # %.lr.ph.split blez $s0, .LBB283_18 # %bb.12: # %.preheader.us35.preheader - pcalau12i $a0, %pc_hi20(.LCPI283_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI283_0) - pcalau12i $a0, %pc_hi20(.LCPI283_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI283_1) - pcalau12i $a0, %pc_hi20(.LCPI283_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI283_2) - pcalau12i $a0, %pc_hi20(.LCPI283_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI283_4) - pcalau12i $a0, %pc_hi20(.LCPI283_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI283_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s4, $zero @@ -116948,17 +119273,17 @@ _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Child Loop BB283_15 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB283_15: # Parent Loop BB283_14 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -116966,20 +119291,20 @@ _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # %bb.16: # %.loopexit.us36 # in Loop: Header=BB283_14 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB283_13 # %bb.17: # in Loop: Header=BB283_14 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -116992,16 +119317,25 @@ _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB283_18: # %.lr.ph.split.split.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI283_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI283_0) - pcalau12i $a0, %pc_hi20(.LCPI283_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI283_1) - pcalau12i $a0, %pc_hi20(.LCPI283_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI283_2) - pcalau12i $a0, %pc_hi20(.LCPI283_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI283_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s1, $zero @@ -117020,9 +119354,9 @@ _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB283_19 # %bb.21: # in Loop: Header=BB283_20 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -117062,12 +119396,14 @@ _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB283_26: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI283_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI283_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -117107,22 +119443,8 @@ _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc, .Lfunc_end283-_Z28test_goto_loop_unroll_factorILi3EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc -.LCPI284_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI284_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI284_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI284_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI284_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI284_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc @@ -117184,19 +119506,31 @@ _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ addi.d $s6, $s1, 8 addi.d $s7, $s1, 16 addi.d $s8, $s0, -2 - pcalau12i $a0, %pc_hi20(.LCPI284_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI284_0) - pcalau12i $a0, %pc_hi20(.LCPI284_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI284_1) - pcalau12i $a0, %pc_hi20(.LCPI284_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI284_2) - pcalau12i $a0, %pc_hi20(.LCPI284_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI284_4) - pcalau12i $a0, %pc_hi20(.LCPI284_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI284_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $fp, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s1, $a0, %pc_lo12(.L.str.19) move $s3, $zero @@ -117214,27 +119548,27 @@ _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a4, $zero move $a5, $s7 move $a3, $s6 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB284_5: # Parent Loop BB284_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a3, -8 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fld.d $fa2, $a3, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 move $a6, $a4 move $a0, $a5 - fadd.d $fa1, $fa2, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa2, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a4, $a4, 2 addi.d $a3, $a3, 16 @@ -117252,12 +119586,12 @@ _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Parent Loop BB284_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a0, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a2, $a2, -1 addi.d $a0, $a0, 8 @@ -117265,20 +119599,20 @@ _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB284_9: # %.loopexit.us # in Loop: Header=BB284_4 Depth=1 fld.d $fa1, $fp, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB284_3 # %bb.10: # in Loop: Header=BB284_4 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -117303,16 +119637,24 @@ _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ lu32i.d $a2, -393216 lu52i.d $a2, $a2, -1022 vreplgr2vr.d $vr5, $a2 - lu32i.d $a0, -268678 - pcalau12i $a2, %pc_hi20(.LCPI284_2) - fld.d $fs0, $a2, %pc_lo12(.LCPI284_2) - pcalau12i $a2, %pc_hi20(.LCPI284_4) - fld.d $fs1, $a2, %pc_lo12(.LCPI284_4) - pcalau12i $a2, %pc_hi20(.LCPI284_3) - fld.d $fs2, $a2, %pc_lo12(.LCPI284_3) - lu52i.d $a0, $a0, 1042 - vreplgr2vr.d $vr6, $a0 - movgr2fr.d $fs3, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + vreplgr2vr.d $vr6, $a2 + movgr2fr.d $fs0, $zero + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -117336,16 +119678,16 @@ _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ vfadd.d $vr1, $vr1, $vr4 vfmadd.d $vr1, $vr1, $vr6, $vr5 vreplvei.d $vr2, $vr1, 0 - fadd.d $fa2, $fa2, $fs3 + fadd.d $fa2, $fa2, $fs0 vreplvei.d $vr1, $vr1, 1 - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fadd.d $fa1, $fa2, $fa1 fabs.d $fa3, $fa2 fdiv.d $fa2, $fa1, $fa2 - fcmp.clt.d $fcc0, $fs1, $fa3 + fcmp.clt.d $fcc0, $fs2, $fa3 fsel $fa1, $fa1, $fa2, $fcc0 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB284_13 # %bb.15: # in Loop: Header=BB284_14 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -117359,16 +119701,25 @@ _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ ld.w $a1, $s4, %pc_lo12(iterations) b .LBB284_13 .LBB284_16: # %.lr.ph.split.split.preheader - pcalau12i $a0, %pc_hi20(.LCPI284_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI284_0) - pcalau12i $a0, %pc_hi20(.LCPI284_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI284_1) - pcalau12i $a0, %pc_hi20(.LCPI284_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI284_2) - pcalau12i $a0, %pc_hi20(.LCPI284_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI284_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $fp, $zero @@ -117387,9 +119738,9 @@ _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB284_17 # %bb.19: # in Loop: Header=BB284_18 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -117429,12 +119780,14 @@ _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB284_24: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI284_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI284_5) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -117474,22 +119827,8 @@ _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .size _Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc, .Lfunc_end284-_Z28test_goto_loop_unroll_factorILi2EdEvPKT0_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z28test_goto_loop_unroll_factorILi1EdEvPKT0_iPKc -.LCPI285_0: - .dword 0x40c81c8000000000 # double 12345 -.LCPI285_1: - .dword 0x412be67a00000000 # double 914237 -.LCPI285_2: - .dword 0xc0bf400000000000 # double -8000 -.LCPI285_3: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI285_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI285_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z28test_goto_loop_unroll_factorILi1EdEvPKT0_iPKc,"axG",@progbits,_Z28test_goto_loop_unroll_factorILi1EdEvPKT0_iPKc,comdat - .weak _Z28test_goto_loop_unroll_factorILi1EdEvPKT0_iPKc + .weak _Z28test_goto_loop_unroll_factorILi1EdEvPKT0_iPKc # -- Begin function _Z28test_goto_loop_unroll_factorILi1EdEvPKT0_iPKc .p2align 5 .type _Z28test_goto_loop_unroll_factorILi1EdEvPKT0_iPKc,@function _Z28test_goto_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_factorILi1EdEvPKT0_iPKc @@ -117543,19 +119882,31 @@ _Z28test_goto_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # %bb.1: # %.lr.ph blez $s0, .LBB285_8 # %bb.2: # %.preheader23.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI285_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI285_0) - pcalau12i $a0, %pc_hi20(.LCPI285_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI285_1) - pcalau12i $a0, %pc_hi20(.LCPI285_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI285_2) - pcalau12i $a0, %pc_hi20(.LCPI285_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI285_4) - pcalau12i $a0, %pc_hi20(.LCPI285_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI285_3) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs1, $a2 vldi $vr3, -854 + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs2, $a2 pcalau12i $s6, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s2, $a0, %pc_lo12(.L.str.19) move $s7, $zero @@ -117570,17 +119921,17 @@ _Z28test_goto_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # Child Loop BB285_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB285_5: # Parent Loop BB285_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -117588,20 +119939,20 @@ _Z28test_goto_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ # %bb.6: # %.loopexit.us # in Loop: Header=BB285_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmadd.d $fa1, $fa1, $fs1, $fa3 - fmul.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fadd.d $fa1, $fa1, $fs1 + fmadd.d $fa1, $fa1, $fs2, $fa3 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB285_3 # %bb.7: # in Loop: Header=BB285_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -117614,16 +119965,25 @@ _Z28test_goto_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ .LBB285_8: # %.lr.ph.split.split.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI285_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI285_0) - pcalau12i $a0, %pc_hi20(.LCPI285_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI285_1) - pcalau12i $a0, %pc_hi20(.LCPI285_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI285_2) - pcalau12i $a0, %pc_hi20(.LCPI285_3) - fld.d $fs3, $a0, %pc_lo12(.LCPI285_3) + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -516992 + lu52i.d $a2, $a2, 1036 + movgr2fr.d $fs0, $a2 vldi $vr2, -854 - movgr2fr.d $fs4, $zero + ori $a2, $zero, 0 + lu32i.d $a2, -268678 + lu52i.d $a2, $a2, 1042 + movgr2fr.d $fs1, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $s0, $a0, %pc_lo12(.L.str.19) move $s2, $zero @@ -117642,9 +120002,9 @@ _Z28test_goto_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ fadd.d $fa1, $fa1, $fs0 fmadd.d $fa1, $fa1, $fs1, $fa2 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB285_9 # %bb.11: # in Loop: Header=BB285_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -117683,12 +120043,14 @@ _Z28test_goto_loop_unroll_factorILi1EdEvPKT0_iPKc: # @_Z28test_goto_loop_unroll_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB285_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI285_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI285_5) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 diff --git a/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/simple_types_constant_folding.dir/simple_types_constant_folding.s b/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/simple_types_constant_folding.dir/simple_types_constant_folding.s index 3b00e624..9365491a 100644 --- a/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/simple_types_constant_folding.dir/simple_types_constant_folding.s +++ b/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/simple_types_constant_folding.dir/simple_types_constant_folding.s @@ -62,12 +62,7 @@ _Z13record_resultdPKc: # @_Z13record_resultdPKc .Lfunc_end0: .size _Z13record_resultdPKc, .Lfunc_end0-_Z13record_resultdPKc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z9summarizePKciiii -.LCPI1_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl _Z9summarizePKciiii + .globl _Z9summarizePKciiii # -- Begin function _Z9summarizePKciiii .p2align 5 .type _Z9summarizePKciiii,@function _Z9summarizePKciiii: # @_Z9summarizePKciiii @@ -135,14 +130,16 @@ _Z9summarizePKciiii: # @_Z9summarizePKciiii # %bb.5: # %.lr.ph45.preheader st.d $s0, $sp, 8 # 8-byte Folded Spill st.d $s5, $sp, 16 # 8-byte Folded Spill + ori $a0, $zero, 0 movgr2fr.w $fa0, $s3 movgr2fr.w $fa1, $s2 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI1_0) + lu32i.d $a0, -97152 ffint.d.w $fa0, $fa0 ffint.d.w $fa1, $fa1 + lu52i.d $a0, $a0, 1042 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fs1, $fa0, $fa2 + movgr2fr.d $fa1, $a0 + fdiv.d $fs1, $fa0, $fa1 pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $s2, $a0, %pc_lo12(.L.str.4) pcalau12i $a0, %pc_hi20(.L.str.5) @@ -409,12 +406,7 @@ _Z11start_timerv: # @_Z11start_timerv .Lfunc_end3: .size _Z11start_timerv, .Lfunc_end3-_Z11start_timerv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z5timerv -.LCPI4_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl _Z5timerv + .globl _Z5timerv # -- Begin function _Z5timerv .p2align 5 .type _Z5timerv,@function _Z5timerv: # @_Z5timerv @@ -426,12 +418,14 @@ _Z5timerv: # @_Z5timerv pcalau12i $a1, %pc_hi20(start_time) ld.d $a1, $a1, %pc_lo12(start_time) pcalau12i $a2, %pc_hi20(end_time) - pcalau12i $a3, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI4_0) sub.d $a1, $a0, $a1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 st.d $a0, $a2, %pc_lo12(end_time) ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 @@ -439,12 +433,7 @@ _Z5timerv: # @_Z5timerv .Lfunc_end4: .size _Z5timerv, .Lfunc_end4-_Z5timerv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI5_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -2018,8 +2007,8 @@ main: # @main pcaddu18i $ra, %call36(_Z13test_constantIl28custom_multiple_constant_xorIlEEvPT_iPKc) jirl $ra, $ra, 0 fld.d $fa0, $s3, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI5_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 fsub.d $fa1, $fa0, $fa1 ftintrz.l.d $fa1, $fa1 @@ -2505,12 +2494,8 @@ main: # @main .size main, .Lfunc_end5-main .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa10custom_twoIaEEvPT_iPKc -.LCPI6_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa10custom_twoIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa10custom_twoIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa10custom_twoIaEEvPT_iPKc + .weak _Z13test_constantIa10custom_twoIaEEvPT_iPKc # -- Begin function _Z13test_constantIa10custom_twoIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa10custom_twoIaEEvPT_iPKc,@function _Z13test_constantIa10custom_twoIaEEvPT_iPKc: # @_Z13test_constantIa10custom_twoIaEEvPT_iPKc @@ -2612,12 +2597,14 @@ _Z13test_constantIa10custom_twoIaEEvPT_iPKc: # @_Z13test_constantIa10custom_twoI move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB6_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI6_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI6_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -2648,12 +2635,8 @@ _Z13test_constantIa10custom_twoIaEEvPT_iPKc: # @_Z13test_constantIa10custom_twoI .size _Z13test_constantIa10custom_twoIaEEvPT_iPKc, .Lfunc_end6-_Z13test_constantIa10custom_twoIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa20custom_add_constantsIaEEvPT_iPKc -.LCPI7_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa20custom_add_constantsIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa20custom_add_constantsIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa20custom_add_constantsIaEEvPT_iPKc + .weak _Z13test_constantIa20custom_add_constantsIaEEvPT_iPKc # -- Begin function _Z13test_constantIa20custom_add_constantsIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa20custom_add_constantsIaEEvPT_iPKc,@function _Z13test_constantIa20custom_add_constantsIaEEvPT_iPKc: # @_Z13test_constantIa20custom_add_constantsIaEEvPT_iPKc @@ -2755,12 +2738,14 @@ _Z13test_constantIa20custom_add_constantsIaEEvPT_iPKc: # @_Z13test_constantIa20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB7_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI7_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -2791,12 +2776,8 @@ _Z13test_constantIa20custom_add_constantsIaEEvPT_iPKc: # @_Z13test_constantIa20c .size _Z13test_constantIa20custom_add_constantsIaEEvPT_iPKc, .Lfunc_end7-_Z13test_constantIa20custom_add_constantsIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa20custom_sub_constantsIaEEvPT_iPKc -.LCPI8_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa20custom_sub_constantsIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa20custom_sub_constantsIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa20custom_sub_constantsIaEEvPT_iPKc + .weak _Z13test_constantIa20custom_sub_constantsIaEEvPT_iPKc # -- Begin function _Z13test_constantIa20custom_sub_constantsIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa20custom_sub_constantsIaEEvPT_iPKc,@function _Z13test_constantIa20custom_sub_constantsIaEEvPT_iPKc: # @_Z13test_constantIa20custom_sub_constantsIaEEvPT_iPKc @@ -2898,12 +2879,14 @@ _Z13test_constantIa20custom_sub_constantsIaEEvPT_iPKc: # @_Z13test_constantIa20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB8_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI8_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI8_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -2934,12 +2917,8 @@ _Z13test_constantIa20custom_sub_constantsIaEEvPT_iPKc: # @_Z13test_constantIa20c .size _Z13test_constantIa20custom_sub_constantsIaEEvPT_iPKc, .Lfunc_end8-_Z13test_constantIa20custom_sub_constantsIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa25custom_multiply_constantsIaEEvPT_iPKc -.LCPI9_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa25custom_multiply_constantsIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa25custom_multiply_constantsIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa25custom_multiply_constantsIaEEvPT_iPKc + .weak _Z13test_constantIa25custom_multiply_constantsIaEEvPT_iPKc # -- Begin function _Z13test_constantIa25custom_multiply_constantsIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa25custom_multiply_constantsIaEEvPT_iPKc,@function _Z13test_constantIa25custom_multiply_constantsIaEEvPT_iPKc: # @_Z13test_constantIa25custom_multiply_constantsIaEEvPT_iPKc @@ -3043,12 +3022,14 @@ _Z13test_constantIa25custom_multiply_constantsIaEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB9_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI9_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI9_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -3079,12 +3060,8 @@ _Z13test_constantIa25custom_multiply_constantsIaEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIa25custom_multiply_constantsIaEEvPT_iPKc, .Lfunc_end9-_Z13test_constantIa25custom_multiply_constantsIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa23custom_divide_constantsIaEEvPT_iPKc -.LCPI10_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa23custom_divide_constantsIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa23custom_divide_constantsIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa23custom_divide_constantsIaEEvPT_iPKc + .weak _Z13test_constantIa23custom_divide_constantsIaEEvPT_iPKc # -- Begin function _Z13test_constantIa23custom_divide_constantsIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa23custom_divide_constantsIaEEvPT_iPKc,@function _Z13test_constantIa23custom_divide_constantsIaEEvPT_iPKc: # @_Z13test_constantIa23custom_divide_constantsIaEEvPT_iPKc @@ -3186,12 +3163,14 @@ _Z13test_constantIa23custom_divide_constantsIaEEvPT_iPKc: # @_Z13test_constantIa move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB10_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI10_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI10_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -3222,12 +3201,8 @@ _Z13test_constantIa23custom_divide_constantsIaEEvPT_iPKc: # @_Z13test_constantIa .size _Z13test_constantIa23custom_divide_constantsIaEEvPT_iPKc, .Lfunc_end10-_Z13test_constantIa23custom_divide_constantsIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa20custom_mod_constantsIaEEvPT_iPKc -.LCPI11_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa20custom_mod_constantsIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa20custom_mod_constantsIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa20custom_mod_constantsIaEEvPT_iPKc + .weak _Z13test_constantIa20custom_mod_constantsIaEEvPT_iPKc # -- Begin function _Z13test_constantIa20custom_mod_constantsIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa20custom_mod_constantsIaEEvPT_iPKc,@function _Z13test_constantIa20custom_mod_constantsIaEEvPT_iPKc: # @_Z13test_constantIa20custom_mod_constantsIaEEvPT_iPKc @@ -3329,12 +3304,14 @@ _Z13test_constantIa20custom_mod_constantsIaEEvPT_iPKc: # @_Z13test_constantIa20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB11_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI11_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI11_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -3365,12 +3342,8 @@ _Z13test_constantIa20custom_mod_constantsIaEEvPT_iPKc: # @_Z13test_constantIa20c .size _Z13test_constantIa20custom_mod_constantsIaEEvPT_iPKc, .Lfunc_end11-_Z13test_constantIa20custom_mod_constantsIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa22custom_equal_constantsIaEEvPT_iPKc -.LCPI12_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa22custom_equal_constantsIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa22custom_equal_constantsIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa22custom_equal_constantsIaEEvPT_iPKc + .weak _Z13test_constantIa22custom_equal_constantsIaEEvPT_iPKc # -- Begin function _Z13test_constantIa22custom_equal_constantsIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa22custom_equal_constantsIaEEvPT_iPKc,@function _Z13test_constantIa22custom_equal_constantsIaEEvPT_iPKc: # @_Z13test_constantIa22custom_equal_constantsIaEEvPT_iPKc @@ -3423,12 +3396,14 @@ _Z13test_constantIa22custom_equal_constantsIaEEvPT_iPKc: # @_Z13test_constantIa2 # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB12_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI12_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI12_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -3457,12 +3432,8 @@ _Z13test_constantIa22custom_equal_constantsIaEEvPT_iPKc: # @_Z13test_constantIa2 .size _Z13test_constantIa22custom_equal_constantsIaEEvPT_iPKc, .Lfunc_end12-_Z13test_constantIa22custom_equal_constantsIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa25custom_notequal_constantsIaEEvPT_iPKc -.LCPI13_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa25custom_notequal_constantsIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa25custom_notequal_constantsIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa25custom_notequal_constantsIaEEvPT_iPKc + .weak _Z13test_constantIa25custom_notequal_constantsIaEEvPT_iPKc # -- Begin function _Z13test_constantIa25custom_notequal_constantsIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa25custom_notequal_constantsIaEEvPT_iPKc,@function _Z13test_constantIa25custom_notequal_constantsIaEEvPT_iPKc: # @_Z13test_constantIa25custom_notequal_constantsIaEEvPT_iPKc @@ -3564,12 +3535,14 @@ _Z13test_constantIa25custom_notequal_constantsIaEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB13_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI13_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI13_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -3600,12 +3573,8 @@ _Z13test_constantIa25custom_notequal_constantsIaEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIa25custom_notequal_constantsIaEEvPT_iPKc, .Lfunc_end13-_Z13test_constantIa25custom_notequal_constantsIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa28custom_greaterthan_constantsIaEEvPT_iPKc -.LCPI14_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa28custom_greaterthan_constantsIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa28custom_greaterthan_constantsIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa28custom_greaterthan_constantsIaEEvPT_iPKc + .weak _Z13test_constantIa28custom_greaterthan_constantsIaEEvPT_iPKc # -- Begin function _Z13test_constantIa28custom_greaterthan_constantsIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa28custom_greaterthan_constantsIaEEvPT_iPKc,@function _Z13test_constantIa28custom_greaterthan_constantsIaEEvPT_iPKc: # @_Z13test_constantIa28custom_greaterthan_constantsIaEEvPT_iPKc @@ -3707,12 +3676,14 @@ _Z13test_constantIa28custom_greaterthan_constantsIaEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB14_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI14_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI14_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -3743,12 +3714,8 @@ _Z13test_constantIa28custom_greaterthan_constantsIaEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIa28custom_greaterthan_constantsIaEEvPT_iPKc, .Lfunc_end14-_Z13test_constantIa28custom_greaterthan_constantsIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa25custom_lessthan_constantsIaEEvPT_iPKc -.LCPI15_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa25custom_lessthan_constantsIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa25custom_lessthan_constantsIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa25custom_lessthan_constantsIaEEvPT_iPKc + .weak _Z13test_constantIa25custom_lessthan_constantsIaEEvPT_iPKc # -- Begin function _Z13test_constantIa25custom_lessthan_constantsIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa25custom_lessthan_constantsIaEEvPT_iPKc,@function _Z13test_constantIa25custom_lessthan_constantsIaEEvPT_iPKc: # @_Z13test_constantIa25custom_lessthan_constantsIaEEvPT_iPKc @@ -3801,12 +3768,14 @@ _Z13test_constantIa25custom_lessthan_constantsIaEEvPT_iPKc: # @_Z13test_constant # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB15_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI15_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI15_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -3835,12 +3804,8 @@ _Z13test_constantIa25custom_lessthan_constantsIaEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIa25custom_lessthan_constantsIaEEvPT_iPKc, .Lfunc_end15-_Z13test_constantIa25custom_lessthan_constantsIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa33custom_greaterthanequal_constantsIaEEvPT_iPKc -.LCPI16_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa33custom_greaterthanequal_constantsIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa33custom_greaterthanequal_constantsIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa33custom_greaterthanequal_constantsIaEEvPT_iPKc + .weak _Z13test_constantIa33custom_greaterthanequal_constantsIaEEvPT_iPKc # -- Begin function _Z13test_constantIa33custom_greaterthanequal_constantsIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa33custom_greaterthanequal_constantsIaEEvPT_iPKc,@function _Z13test_constantIa33custom_greaterthanequal_constantsIaEEvPT_iPKc: # @_Z13test_constantIa33custom_greaterthanequal_constantsIaEEvPT_iPKc @@ -3942,12 +3907,14 @@ _Z13test_constantIa33custom_greaterthanequal_constantsIaEEvPT_iPKc: # @_Z13test_ move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB16_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI16_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI16_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -3978,12 +3945,8 @@ _Z13test_constantIa33custom_greaterthanequal_constantsIaEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIa33custom_greaterthanequal_constantsIaEEvPT_iPKc, .Lfunc_end16-_Z13test_constantIa33custom_greaterthanequal_constantsIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa30custom_lessthanequal_constantsIaEEvPT_iPKc -.LCPI17_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa30custom_lessthanequal_constantsIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa30custom_lessthanequal_constantsIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa30custom_lessthanequal_constantsIaEEvPT_iPKc + .weak _Z13test_constantIa30custom_lessthanequal_constantsIaEEvPT_iPKc # -- Begin function _Z13test_constantIa30custom_lessthanequal_constantsIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa30custom_lessthanequal_constantsIaEEvPT_iPKc,@function _Z13test_constantIa30custom_lessthanequal_constantsIaEEvPT_iPKc: # @_Z13test_constantIa30custom_lessthanequal_constantsIaEEvPT_iPKc @@ -4036,12 +3999,14 @@ _Z13test_constantIa30custom_lessthanequal_constantsIaEEvPT_iPKc: # @_Z13test_con # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB17_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI17_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI17_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -4070,12 +4035,8 @@ _Z13test_constantIa30custom_lessthanequal_constantsIaEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIa30custom_lessthanequal_constantsIaEEvPT_iPKc, .Lfunc_end17-_Z13test_constantIa30custom_lessthanequal_constantsIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa20custom_and_constantsIaEEvPT_iPKc -.LCPI18_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa20custom_and_constantsIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa20custom_and_constantsIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa20custom_and_constantsIaEEvPT_iPKc + .weak _Z13test_constantIa20custom_and_constantsIaEEvPT_iPKc # -- Begin function _Z13test_constantIa20custom_and_constantsIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa20custom_and_constantsIaEEvPT_iPKc,@function _Z13test_constantIa20custom_and_constantsIaEEvPT_iPKc: # @_Z13test_constantIa20custom_and_constantsIaEEvPT_iPKc @@ -4177,12 +4138,14 @@ _Z13test_constantIa20custom_and_constantsIaEEvPT_iPKc: # @_Z13test_constantIa20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB18_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI18_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI18_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -4213,12 +4176,8 @@ _Z13test_constantIa20custom_and_constantsIaEEvPT_iPKc: # @_Z13test_constantIa20c .size _Z13test_constantIa20custom_and_constantsIaEEvPT_iPKc, .Lfunc_end18-_Z13test_constantIa20custom_and_constantsIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa19custom_or_constantsIaEEvPT_iPKc -.LCPI19_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa19custom_or_constantsIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa19custom_or_constantsIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa19custom_or_constantsIaEEvPT_iPKc + .weak _Z13test_constantIa19custom_or_constantsIaEEvPT_iPKc # -- Begin function _Z13test_constantIa19custom_or_constantsIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa19custom_or_constantsIaEEvPT_iPKc,@function _Z13test_constantIa19custom_or_constantsIaEEvPT_iPKc: # @_Z13test_constantIa19custom_or_constantsIaEEvPT_iPKc @@ -4322,12 +4281,14 @@ _Z13test_constantIa19custom_or_constantsIaEEvPT_iPKc: # @_Z13test_constantIa19cu move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB19_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI19_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI19_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -4358,12 +4319,8 @@ _Z13test_constantIa19custom_or_constantsIaEEvPT_iPKc: # @_Z13test_constantIa19cu .size _Z13test_constantIa19custom_or_constantsIaEEvPT_iPKc, .Lfunc_end19-_Z13test_constantIa19custom_or_constantsIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa20custom_xor_constantsIaEEvPT_iPKc -.LCPI20_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa20custom_xor_constantsIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa20custom_xor_constantsIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa20custom_xor_constantsIaEEvPT_iPKc + .weak _Z13test_constantIa20custom_xor_constantsIaEEvPT_iPKc # -- Begin function _Z13test_constantIa20custom_xor_constantsIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa20custom_xor_constantsIaEEvPT_iPKc,@function _Z13test_constantIa20custom_xor_constantsIaEEvPT_iPKc: # @_Z13test_constantIa20custom_xor_constantsIaEEvPT_iPKc @@ -4467,12 +4424,14 @@ _Z13test_constantIa20custom_xor_constantsIaEEvPT_iPKc: # @_Z13test_constantIa20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB20_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI20_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI20_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -4503,12 +4462,8 @@ _Z13test_constantIa20custom_xor_constantsIaEEvPT_iPKc: # @_Z13test_constantIa20c .size _Z13test_constantIa20custom_xor_constantsIaEEvPT_iPKc, .Lfunc_end20-_Z13test_constantIa20custom_xor_constantsIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa19custom_constant_addIaEEvPT_iPKc -.LCPI21_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa19custom_constant_addIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa19custom_constant_addIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa19custom_constant_addIaEEvPT_iPKc + .weak _Z13test_constantIa19custom_constant_addIaEEvPT_iPKc # -- Begin function _Z13test_constantIa19custom_constant_addIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa19custom_constant_addIaEEvPT_iPKc,@function _Z13test_constantIa19custom_constant_addIaEEvPT_iPKc: # @_Z13test_constantIa19custom_constant_addIaEEvPT_iPKc @@ -4744,12 +4699,14 @@ _Z13test_constantIa19custom_constant_addIaEEvPT_iPKc: # @_Z13test_constantIa19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB21_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI21_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI21_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -4783,12 +4740,8 @@ _Z13test_constantIa19custom_constant_addIaEEvPT_iPKc: # @_Z13test_constantIa19cu .size _Z13test_constantIa19custom_constant_addIaEEvPT_iPKc, .Lfunc_end21-_Z13test_constantIa19custom_constant_addIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa28custom_multiple_constant_addIaEEvPT_iPKc -.LCPI22_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa28custom_multiple_constant_addIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa28custom_multiple_constant_addIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa28custom_multiple_constant_addIaEEvPT_iPKc + .weak _Z13test_constantIa28custom_multiple_constant_addIaEEvPT_iPKc # -- Begin function _Z13test_constantIa28custom_multiple_constant_addIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa28custom_multiple_constant_addIaEEvPT_iPKc,@function _Z13test_constantIa28custom_multiple_constant_addIaEEvPT_iPKc: # @_Z13test_constantIa28custom_multiple_constant_addIaEEvPT_iPKc @@ -5024,12 +4977,14 @@ _Z13test_constantIa28custom_multiple_constant_addIaEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB22_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI22_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI22_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -5063,12 +5018,8 @@ _Z13test_constantIa28custom_multiple_constant_addIaEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIa28custom_multiple_constant_addIaEEvPT_iPKc, .Lfunc_end22-_Z13test_constantIa28custom_multiple_constant_addIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa19custom_constant_subIaEEvPT_iPKc -.LCPI23_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa19custom_constant_subIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa19custom_constant_subIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa19custom_constant_subIaEEvPT_iPKc + .weak _Z13test_constantIa19custom_constant_subIaEEvPT_iPKc # -- Begin function _Z13test_constantIa19custom_constant_subIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa19custom_constant_subIaEEvPT_iPKc,@function _Z13test_constantIa19custom_constant_subIaEEvPT_iPKc: # @_Z13test_constantIa19custom_constant_subIaEEvPT_iPKc @@ -5307,12 +5258,14 @@ _Z13test_constantIa19custom_constant_subIaEEvPT_iPKc: # @_Z13test_constantIa19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB23_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI23_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI23_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -5346,12 +5299,8 @@ _Z13test_constantIa19custom_constant_subIaEEvPT_iPKc: # @_Z13test_constantIa19cu .size _Z13test_constantIa19custom_constant_subIaEEvPT_iPKc, .Lfunc_end23-_Z13test_constantIa19custom_constant_subIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa28custom_multiple_constant_subIaEEvPT_iPKc -.LCPI24_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa28custom_multiple_constant_subIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa28custom_multiple_constant_subIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa28custom_multiple_constant_subIaEEvPT_iPKc + .weak _Z13test_constantIa28custom_multiple_constant_subIaEEvPT_iPKc # -- Begin function _Z13test_constantIa28custom_multiple_constant_subIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa28custom_multiple_constant_subIaEEvPT_iPKc,@function _Z13test_constantIa28custom_multiple_constant_subIaEEvPT_iPKc: # @_Z13test_constantIa28custom_multiple_constant_subIaEEvPT_iPKc @@ -5590,12 +5539,14 @@ _Z13test_constantIa28custom_multiple_constant_subIaEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB24_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI24_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI24_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -5629,12 +5580,8 @@ _Z13test_constantIa28custom_multiple_constant_subIaEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIa28custom_multiple_constant_subIaEEvPT_iPKc, .Lfunc_end24-_Z13test_constantIa28custom_multiple_constant_subIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa24custom_constant_multiplyIaEEvPT_iPKc -.LCPI25_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa24custom_constant_multiplyIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa24custom_constant_multiplyIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa24custom_constant_multiplyIaEEvPT_iPKc + .weak _Z13test_constantIa24custom_constant_multiplyIaEEvPT_iPKc # -- Begin function _Z13test_constantIa24custom_constant_multiplyIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa24custom_constant_multiplyIaEEvPT_iPKc,@function _Z13test_constantIa24custom_constant_multiplyIaEEvPT_iPKc: # @_Z13test_constantIa24custom_constant_multiplyIaEEvPT_iPKc @@ -5836,12 +5783,14 @@ _Z13test_constantIa24custom_constant_multiplyIaEEvPT_iPKc: # @_Z13test_constantI move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB25_23: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI25_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI25_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -5875,12 +5824,8 @@ _Z13test_constantIa24custom_constant_multiplyIaEEvPT_iPKc: # @_Z13test_constantI .size _Z13test_constantIa24custom_constant_multiplyIaEEvPT_iPKc, .Lfunc_end25-_Z13test_constantIa24custom_constant_multiplyIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa33custom_multiple_constant_multiplyIaEEvPT_iPKc -.LCPI26_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa33custom_multiple_constant_multiplyIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa33custom_multiple_constant_multiplyIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa33custom_multiple_constant_multiplyIaEEvPT_iPKc + .weak _Z13test_constantIa33custom_multiple_constant_multiplyIaEEvPT_iPKc # -- Begin function _Z13test_constantIa33custom_multiple_constant_multiplyIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa33custom_multiple_constant_multiplyIaEEvPT_iPKc,@function _Z13test_constantIa33custom_multiple_constant_multiplyIaEEvPT_iPKc: # @_Z13test_constantIa33custom_multiple_constant_multiplyIaEEvPT_iPKc @@ -6082,12 +6027,14 @@ _Z13test_constantIa33custom_multiple_constant_multiplyIaEEvPT_iPKc: # @_Z13test_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB26_23: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI26_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI26_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -6121,12 +6068,8 @@ _Z13test_constantIa33custom_multiple_constant_multiplyIaEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIa33custom_multiple_constant_multiplyIaEEvPT_iPKc, .Lfunc_end26-_Z13test_constantIa33custom_multiple_constant_multiplyIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa34custom_multiple_constant_multiply2IaEEvPT_iPKc -.LCPI27_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa34custom_multiple_constant_multiply2IaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa34custom_multiple_constant_multiply2IaEEvPT_iPKc,comdat - .weak _Z13test_constantIa34custom_multiple_constant_multiply2IaEEvPT_iPKc + .weak _Z13test_constantIa34custom_multiple_constant_multiply2IaEEvPT_iPKc # -- Begin function _Z13test_constantIa34custom_multiple_constant_multiply2IaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa34custom_multiple_constant_multiply2IaEEvPT_iPKc,@function _Z13test_constantIa34custom_multiple_constant_multiply2IaEEvPT_iPKc: # @_Z13test_constantIa34custom_multiple_constant_multiply2IaEEvPT_iPKc @@ -6362,12 +6305,14 @@ _Z13test_constantIa34custom_multiple_constant_multiply2IaEEvPT_iPKc: # @_Z13test move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB27_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI27_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI27_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -6401,12 +6346,8 @@ _Z13test_constantIa34custom_multiple_constant_multiply2IaEEvPT_iPKc: # @_Z13test .size _Z13test_constantIa34custom_multiple_constant_multiply2IaEEvPT_iPKc, .Lfunc_end27-_Z13test_constantIa34custom_multiple_constant_multiply2IaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa22custom_constant_divideIaEEvPT_iPKc -.LCPI28_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa22custom_constant_divideIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa22custom_constant_divideIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa22custom_constant_divideIaEEvPT_iPKc + .weak _Z13test_constantIa22custom_constant_divideIaEEvPT_iPKc # -- Begin function _Z13test_constantIa22custom_constant_divideIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa22custom_constant_divideIaEEvPT_iPKc,@function _Z13test_constantIa22custom_constant_divideIaEEvPT_iPKc: # @_Z13test_constantIa22custom_constant_divideIaEEvPT_iPKc @@ -6722,12 +6663,14 @@ _Z13test_constantIa22custom_constant_divideIaEEvPT_iPKc: # @_Z13test_constantIa2 move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB28_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI28_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI28_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -6761,12 +6704,8 @@ _Z13test_constantIa22custom_constant_divideIaEEvPT_iPKc: # @_Z13test_constantIa2 .size _Z13test_constantIa22custom_constant_divideIaEEvPT_iPKc, .Lfunc_end28-_Z13test_constantIa22custom_constant_divideIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa31custom_multiple_constant_divideIaEEvPT_iPKc -.LCPI29_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa31custom_multiple_constant_divideIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa31custom_multiple_constant_divideIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa31custom_multiple_constant_divideIaEEvPT_iPKc + .weak _Z13test_constantIa31custom_multiple_constant_divideIaEEvPT_iPKc # -- Begin function _Z13test_constantIa31custom_multiple_constant_divideIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa31custom_multiple_constant_divideIaEEvPT_iPKc,@function _Z13test_constantIa31custom_multiple_constant_divideIaEEvPT_iPKc: # @_Z13test_constantIa31custom_multiple_constant_divideIaEEvPT_iPKc @@ -7094,12 +7033,14 @@ _Z13test_constantIa31custom_multiple_constant_divideIaEEvPT_iPKc: # @_Z13test_co move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB29_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI29_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI29_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -7133,12 +7074,8 @@ _Z13test_constantIa31custom_multiple_constant_divideIaEEvPT_iPKc: # @_Z13test_co .size _Z13test_constantIa31custom_multiple_constant_divideIaEEvPT_iPKc, .Lfunc_end29-_Z13test_constantIa31custom_multiple_constant_divideIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa32custom_multiple_constant_divide2IaEEvPT_iPKc -.LCPI30_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa32custom_multiple_constant_divide2IaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa32custom_multiple_constant_divide2IaEEvPT_iPKc,comdat - .weak _Z13test_constantIa32custom_multiple_constant_divide2IaEEvPT_iPKc + .weak _Z13test_constantIa32custom_multiple_constant_divide2IaEEvPT_iPKc # -- Begin function _Z13test_constantIa32custom_multiple_constant_divide2IaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa32custom_multiple_constant_divide2IaEEvPT_iPKc,@function _Z13test_constantIa32custom_multiple_constant_divide2IaEEvPT_iPKc: # @_Z13test_constantIa32custom_multiple_constant_divide2IaEEvPT_iPKc @@ -7374,12 +7311,14 @@ _Z13test_constantIa32custom_multiple_constant_divide2IaEEvPT_iPKc: # @_Z13test_c move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB30_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI30_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI30_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -7413,12 +7352,8 @@ _Z13test_constantIa32custom_multiple_constant_divide2IaEEvPT_iPKc: # @_Z13test_c .size _Z13test_constantIa32custom_multiple_constant_divide2IaEEvPT_iPKc, .Lfunc_end30-_Z13test_constantIa32custom_multiple_constant_divide2IaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa30custom_multiple_constant_mixedIaEEvPT_iPKc -.LCPI31_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa30custom_multiple_constant_mixedIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa30custom_multiple_constant_mixedIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa30custom_multiple_constant_mixedIaEEvPT_iPKc + .weak _Z13test_constantIa30custom_multiple_constant_mixedIaEEvPT_iPKc # -- Begin function _Z13test_constantIa30custom_multiple_constant_mixedIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa30custom_multiple_constant_mixedIaEEvPT_iPKc,@function _Z13test_constantIa30custom_multiple_constant_mixedIaEEvPT_iPKc: # @_Z13test_constantIa30custom_multiple_constant_mixedIaEEvPT_iPKc @@ -7647,12 +7582,14 @@ _Z13test_constantIa30custom_multiple_constant_mixedIaEEvPT_iPKc: # @_Z13test_con move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB31_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI31_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI31_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -7686,12 +7623,8 @@ _Z13test_constantIa30custom_multiple_constant_mixedIaEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIa30custom_multiple_constant_mixedIaEEvPT_iPKc, .Lfunc_end31-_Z13test_constantIa30custom_multiple_constant_mixedIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa19custom_constant_andIaEEvPT_iPKc -.LCPI32_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa19custom_constant_andIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa19custom_constant_andIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa19custom_constant_andIaEEvPT_iPKc + .weak _Z13test_constantIa19custom_constant_andIaEEvPT_iPKc # -- Begin function _Z13test_constantIa19custom_constant_andIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa19custom_constant_andIaEEvPT_iPKc,@function _Z13test_constantIa19custom_constant_andIaEEvPT_iPKc: # @_Z13test_constantIa19custom_constant_andIaEEvPT_iPKc @@ -7924,12 +7857,14 @@ _Z13test_constantIa19custom_constant_andIaEEvPT_iPKc: # @_Z13test_constantIa19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB32_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI32_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI32_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -7963,12 +7898,8 @@ _Z13test_constantIa19custom_constant_andIaEEvPT_iPKc: # @_Z13test_constantIa19cu .size _Z13test_constantIa19custom_constant_andIaEEvPT_iPKc, .Lfunc_end32-_Z13test_constantIa19custom_constant_andIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa28custom_multiple_constant_andIaEEvPT_iPKc -.LCPI33_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa28custom_multiple_constant_andIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa28custom_multiple_constant_andIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa28custom_multiple_constant_andIaEEvPT_iPKc + .weak _Z13test_constantIa28custom_multiple_constant_andIaEEvPT_iPKc # -- Begin function _Z13test_constantIa28custom_multiple_constant_andIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa28custom_multiple_constant_andIaEEvPT_iPKc,@function _Z13test_constantIa28custom_multiple_constant_andIaEEvPT_iPKc: # @_Z13test_constantIa28custom_multiple_constant_andIaEEvPT_iPKc @@ -8201,12 +8132,14 @@ _Z13test_constantIa28custom_multiple_constant_andIaEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB33_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI33_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI33_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -8240,12 +8173,8 @@ _Z13test_constantIa28custom_multiple_constant_andIaEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIa28custom_multiple_constant_andIaEEvPT_iPKc, .Lfunc_end33-_Z13test_constantIa28custom_multiple_constant_andIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa18custom_constant_orIaEEvPT_iPKc -.LCPI34_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa18custom_constant_orIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa18custom_constant_orIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa18custom_constant_orIaEEvPT_iPKc + .weak _Z13test_constantIa18custom_constant_orIaEEvPT_iPKc # -- Begin function _Z13test_constantIa18custom_constant_orIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa18custom_constant_orIaEEvPT_iPKc,@function _Z13test_constantIa18custom_constant_orIaEEvPT_iPKc: # @_Z13test_constantIa18custom_constant_orIaEEvPT_iPKc @@ -8467,12 +8396,14 @@ _Z13test_constantIa18custom_constant_orIaEEvPT_iPKc: # @_Z13test_constantIa18cus move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB34_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI34_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI34_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -8506,12 +8437,8 @@ _Z13test_constantIa18custom_constant_orIaEEvPT_iPKc: # @_Z13test_constantIa18cus .size _Z13test_constantIa18custom_constant_orIaEEvPT_iPKc, .Lfunc_end34-_Z13test_constantIa18custom_constant_orIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa27custom_multiple_constant_orIaEEvPT_iPKc -.LCPI35_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa27custom_multiple_constant_orIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa27custom_multiple_constant_orIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa27custom_multiple_constant_orIaEEvPT_iPKc + .weak _Z13test_constantIa27custom_multiple_constant_orIaEEvPT_iPKc # -- Begin function _Z13test_constantIa27custom_multiple_constant_orIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa27custom_multiple_constant_orIaEEvPT_iPKc,@function _Z13test_constantIa27custom_multiple_constant_orIaEEvPT_iPKc: # @_Z13test_constantIa27custom_multiple_constant_orIaEEvPT_iPKc @@ -8727,12 +8654,14 @@ _Z13test_constantIa27custom_multiple_constant_orIaEEvPT_iPKc: # @_Z13test_consta move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB35_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI35_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI35_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -8766,12 +8695,8 @@ _Z13test_constantIa27custom_multiple_constant_orIaEEvPT_iPKc: # @_Z13test_consta .size _Z13test_constantIa27custom_multiple_constant_orIaEEvPT_iPKc, .Lfunc_end35-_Z13test_constantIa27custom_multiple_constant_orIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa19custom_constant_xorIaEEvPT_iPKc -.LCPI36_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa19custom_constant_xorIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa19custom_constant_xorIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa19custom_constant_xorIaEEvPT_iPKc + .weak _Z13test_constantIa19custom_constant_xorIaEEvPT_iPKc # -- Begin function _Z13test_constantIa19custom_constant_xorIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa19custom_constant_xorIaEEvPT_iPKc,@function _Z13test_constantIa19custom_constant_xorIaEEvPT_iPKc: # @_Z13test_constantIa19custom_constant_xorIaEEvPT_iPKc @@ -9007,12 +8932,14 @@ _Z13test_constantIa19custom_constant_xorIaEEvPT_iPKc: # @_Z13test_constantIa19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB36_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI36_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI36_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -9046,12 +8973,8 @@ _Z13test_constantIa19custom_constant_xorIaEEvPT_iPKc: # @_Z13test_constantIa19cu .size _Z13test_constantIa19custom_constant_xorIaEEvPT_iPKc, .Lfunc_end36-_Z13test_constantIa19custom_constant_xorIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIa28custom_multiple_constant_xorIaEEvPT_iPKc -.LCPI37_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIa28custom_multiple_constant_xorIaEEvPT_iPKc,"axG",@progbits,_Z13test_constantIa28custom_multiple_constant_xorIaEEvPT_iPKc,comdat - .weak _Z13test_constantIa28custom_multiple_constant_xorIaEEvPT_iPKc + .weak _Z13test_constantIa28custom_multiple_constant_xorIaEEvPT_iPKc # -- Begin function _Z13test_constantIa28custom_multiple_constant_xorIaEEvPT_iPKc .p2align 5 .type _Z13test_constantIa28custom_multiple_constant_xorIaEEvPT_iPKc,@function _Z13test_constantIa28custom_multiple_constant_xorIaEEvPT_iPKc: # @_Z13test_constantIa28custom_multiple_constant_xorIaEEvPT_iPKc @@ -9287,12 +9210,14 @@ _Z13test_constantIa28custom_multiple_constant_xorIaEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB37_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI37_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI37_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -9326,12 +9251,8 @@ _Z13test_constantIa28custom_multiple_constant_xorIaEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIa28custom_multiple_constant_xorIaEEvPT_iPKc, .Lfunc_end37-_Z13test_constantIa28custom_multiple_constant_xorIaEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh10custom_twoIhEEvPT_iPKc -.LCPI38_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh10custom_twoIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh10custom_twoIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh10custom_twoIhEEvPT_iPKc + .weak _Z13test_constantIh10custom_twoIhEEvPT_iPKc # -- Begin function _Z13test_constantIh10custom_twoIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh10custom_twoIhEEvPT_iPKc,@function _Z13test_constantIh10custom_twoIhEEvPT_iPKc: # @_Z13test_constantIh10custom_twoIhEEvPT_iPKc @@ -9433,12 +9354,14 @@ _Z13test_constantIh10custom_twoIhEEvPT_iPKc: # @_Z13test_constantIh10custom_twoI move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB38_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI38_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI38_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -9469,12 +9392,8 @@ _Z13test_constantIh10custom_twoIhEEvPT_iPKc: # @_Z13test_constantIh10custom_twoI .size _Z13test_constantIh10custom_twoIhEEvPT_iPKc, .Lfunc_end38-_Z13test_constantIh10custom_twoIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh20custom_add_constantsIhEEvPT_iPKc -.LCPI39_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh20custom_add_constantsIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh20custom_add_constantsIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh20custom_add_constantsIhEEvPT_iPKc + .weak _Z13test_constantIh20custom_add_constantsIhEEvPT_iPKc # -- Begin function _Z13test_constantIh20custom_add_constantsIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh20custom_add_constantsIhEEvPT_iPKc,@function _Z13test_constantIh20custom_add_constantsIhEEvPT_iPKc: # @_Z13test_constantIh20custom_add_constantsIhEEvPT_iPKc @@ -9576,12 +9495,14 @@ _Z13test_constantIh20custom_add_constantsIhEEvPT_iPKc: # @_Z13test_constantIh20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB39_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI39_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI39_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -9612,12 +9533,8 @@ _Z13test_constantIh20custom_add_constantsIhEEvPT_iPKc: # @_Z13test_constantIh20c .size _Z13test_constantIh20custom_add_constantsIhEEvPT_iPKc, .Lfunc_end39-_Z13test_constantIh20custom_add_constantsIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh20custom_sub_constantsIhEEvPT_iPKc -.LCPI40_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh20custom_sub_constantsIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh20custom_sub_constantsIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh20custom_sub_constantsIhEEvPT_iPKc + .weak _Z13test_constantIh20custom_sub_constantsIhEEvPT_iPKc # -- Begin function _Z13test_constantIh20custom_sub_constantsIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh20custom_sub_constantsIhEEvPT_iPKc,@function _Z13test_constantIh20custom_sub_constantsIhEEvPT_iPKc: # @_Z13test_constantIh20custom_sub_constantsIhEEvPT_iPKc @@ -9719,12 +9636,14 @@ _Z13test_constantIh20custom_sub_constantsIhEEvPT_iPKc: # @_Z13test_constantIh20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB40_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI40_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI40_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -9755,12 +9674,8 @@ _Z13test_constantIh20custom_sub_constantsIhEEvPT_iPKc: # @_Z13test_constantIh20c .size _Z13test_constantIh20custom_sub_constantsIhEEvPT_iPKc, .Lfunc_end40-_Z13test_constantIh20custom_sub_constantsIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh25custom_multiply_constantsIhEEvPT_iPKc -.LCPI41_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh25custom_multiply_constantsIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh25custom_multiply_constantsIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh25custom_multiply_constantsIhEEvPT_iPKc + .weak _Z13test_constantIh25custom_multiply_constantsIhEEvPT_iPKc # -- Begin function _Z13test_constantIh25custom_multiply_constantsIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh25custom_multiply_constantsIhEEvPT_iPKc,@function _Z13test_constantIh25custom_multiply_constantsIhEEvPT_iPKc: # @_Z13test_constantIh25custom_multiply_constantsIhEEvPT_iPKc @@ -9864,12 +9779,14 @@ _Z13test_constantIh25custom_multiply_constantsIhEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB41_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI41_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI41_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -9900,12 +9817,8 @@ _Z13test_constantIh25custom_multiply_constantsIhEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIh25custom_multiply_constantsIhEEvPT_iPKc, .Lfunc_end41-_Z13test_constantIh25custom_multiply_constantsIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh23custom_divide_constantsIhEEvPT_iPKc -.LCPI42_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh23custom_divide_constantsIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh23custom_divide_constantsIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh23custom_divide_constantsIhEEvPT_iPKc + .weak _Z13test_constantIh23custom_divide_constantsIhEEvPT_iPKc # -- Begin function _Z13test_constantIh23custom_divide_constantsIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh23custom_divide_constantsIhEEvPT_iPKc,@function _Z13test_constantIh23custom_divide_constantsIhEEvPT_iPKc: # @_Z13test_constantIh23custom_divide_constantsIhEEvPT_iPKc @@ -10007,12 +9920,14 @@ _Z13test_constantIh23custom_divide_constantsIhEEvPT_iPKc: # @_Z13test_constantIh move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB42_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI42_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI42_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -10043,12 +9958,8 @@ _Z13test_constantIh23custom_divide_constantsIhEEvPT_iPKc: # @_Z13test_constantIh .size _Z13test_constantIh23custom_divide_constantsIhEEvPT_iPKc, .Lfunc_end42-_Z13test_constantIh23custom_divide_constantsIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh20custom_mod_constantsIhEEvPT_iPKc -.LCPI43_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh20custom_mod_constantsIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh20custom_mod_constantsIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh20custom_mod_constantsIhEEvPT_iPKc + .weak _Z13test_constantIh20custom_mod_constantsIhEEvPT_iPKc # -- Begin function _Z13test_constantIh20custom_mod_constantsIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh20custom_mod_constantsIhEEvPT_iPKc,@function _Z13test_constantIh20custom_mod_constantsIhEEvPT_iPKc: # @_Z13test_constantIh20custom_mod_constantsIhEEvPT_iPKc @@ -10150,12 +10061,14 @@ _Z13test_constantIh20custom_mod_constantsIhEEvPT_iPKc: # @_Z13test_constantIh20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB43_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI43_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI43_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -10186,12 +10099,8 @@ _Z13test_constantIh20custom_mod_constantsIhEEvPT_iPKc: # @_Z13test_constantIh20c .size _Z13test_constantIh20custom_mod_constantsIhEEvPT_iPKc, .Lfunc_end43-_Z13test_constantIh20custom_mod_constantsIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh22custom_equal_constantsIhEEvPT_iPKc -.LCPI44_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh22custom_equal_constantsIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh22custom_equal_constantsIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh22custom_equal_constantsIhEEvPT_iPKc + .weak _Z13test_constantIh22custom_equal_constantsIhEEvPT_iPKc # -- Begin function _Z13test_constantIh22custom_equal_constantsIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh22custom_equal_constantsIhEEvPT_iPKc,@function _Z13test_constantIh22custom_equal_constantsIhEEvPT_iPKc: # @_Z13test_constantIh22custom_equal_constantsIhEEvPT_iPKc @@ -10244,12 +10153,14 @@ _Z13test_constantIh22custom_equal_constantsIhEEvPT_iPKc: # @_Z13test_constantIh2 # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB44_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI44_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI44_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -10278,12 +10189,8 @@ _Z13test_constantIh22custom_equal_constantsIhEEvPT_iPKc: # @_Z13test_constantIh2 .size _Z13test_constantIh22custom_equal_constantsIhEEvPT_iPKc, .Lfunc_end44-_Z13test_constantIh22custom_equal_constantsIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh25custom_notequal_constantsIhEEvPT_iPKc -.LCPI45_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh25custom_notequal_constantsIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh25custom_notequal_constantsIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh25custom_notequal_constantsIhEEvPT_iPKc + .weak _Z13test_constantIh25custom_notequal_constantsIhEEvPT_iPKc # -- Begin function _Z13test_constantIh25custom_notequal_constantsIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh25custom_notequal_constantsIhEEvPT_iPKc,@function _Z13test_constantIh25custom_notequal_constantsIhEEvPT_iPKc: # @_Z13test_constantIh25custom_notequal_constantsIhEEvPT_iPKc @@ -10385,12 +10292,14 @@ _Z13test_constantIh25custom_notequal_constantsIhEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB45_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI45_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI45_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -10421,12 +10330,8 @@ _Z13test_constantIh25custom_notequal_constantsIhEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIh25custom_notequal_constantsIhEEvPT_iPKc, .Lfunc_end45-_Z13test_constantIh25custom_notequal_constantsIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh28custom_greaterthan_constantsIhEEvPT_iPKc -.LCPI46_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh28custom_greaterthan_constantsIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh28custom_greaterthan_constantsIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh28custom_greaterthan_constantsIhEEvPT_iPKc + .weak _Z13test_constantIh28custom_greaterthan_constantsIhEEvPT_iPKc # -- Begin function _Z13test_constantIh28custom_greaterthan_constantsIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh28custom_greaterthan_constantsIhEEvPT_iPKc,@function _Z13test_constantIh28custom_greaterthan_constantsIhEEvPT_iPKc: # @_Z13test_constantIh28custom_greaterthan_constantsIhEEvPT_iPKc @@ -10528,12 +10433,14 @@ _Z13test_constantIh28custom_greaterthan_constantsIhEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB46_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI46_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI46_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -10564,12 +10471,8 @@ _Z13test_constantIh28custom_greaterthan_constantsIhEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIh28custom_greaterthan_constantsIhEEvPT_iPKc, .Lfunc_end46-_Z13test_constantIh28custom_greaterthan_constantsIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh25custom_lessthan_constantsIhEEvPT_iPKc -.LCPI47_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh25custom_lessthan_constantsIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh25custom_lessthan_constantsIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh25custom_lessthan_constantsIhEEvPT_iPKc + .weak _Z13test_constantIh25custom_lessthan_constantsIhEEvPT_iPKc # -- Begin function _Z13test_constantIh25custom_lessthan_constantsIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh25custom_lessthan_constantsIhEEvPT_iPKc,@function _Z13test_constantIh25custom_lessthan_constantsIhEEvPT_iPKc: # @_Z13test_constantIh25custom_lessthan_constantsIhEEvPT_iPKc @@ -10622,12 +10525,14 @@ _Z13test_constantIh25custom_lessthan_constantsIhEEvPT_iPKc: # @_Z13test_constant # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB47_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI47_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI47_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -10656,12 +10561,8 @@ _Z13test_constantIh25custom_lessthan_constantsIhEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIh25custom_lessthan_constantsIhEEvPT_iPKc, .Lfunc_end47-_Z13test_constantIh25custom_lessthan_constantsIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh33custom_greaterthanequal_constantsIhEEvPT_iPKc -.LCPI48_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh33custom_greaterthanequal_constantsIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh33custom_greaterthanequal_constantsIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh33custom_greaterthanequal_constantsIhEEvPT_iPKc + .weak _Z13test_constantIh33custom_greaterthanequal_constantsIhEEvPT_iPKc # -- Begin function _Z13test_constantIh33custom_greaterthanequal_constantsIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh33custom_greaterthanequal_constantsIhEEvPT_iPKc,@function _Z13test_constantIh33custom_greaterthanequal_constantsIhEEvPT_iPKc: # @_Z13test_constantIh33custom_greaterthanequal_constantsIhEEvPT_iPKc @@ -10763,12 +10664,14 @@ _Z13test_constantIh33custom_greaterthanequal_constantsIhEEvPT_iPKc: # @_Z13test_ move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB48_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI48_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI48_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -10799,12 +10702,8 @@ _Z13test_constantIh33custom_greaterthanequal_constantsIhEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIh33custom_greaterthanequal_constantsIhEEvPT_iPKc, .Lfunc_end48-_Z13test_constantIh33custom_greaterthanequal_constantsIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh30custom_lessthanequal_constantsIhEEvPT_iPKc -.LCPI49_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh30custom_lessthanequal_constantsIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh30custom_lessthanequal_constantsIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh30custom_lessthanequal_constantsIhEEvPT_iPKc + .weak _Z13test_constantIh30custom_lessthanequal_constantsIhEEvPT_iPKc # -- Begin function _Z13test_constantIh30custom_lessthanequal_constantsIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh30custom_lessthanequal_constantsIhEEvPT_iPKc,@function _Z13test_constantIh30custom_lessthanequal_constantsIhEEvPT_iPKc: # @_Z13test_constantIh30custom_lessthanequal_constantsIhEEvPT_iPKc @@ -10857,12 +10756,14 @@ _Z13test_constantIh30custom_lessthanequal_constantsIhEEvPT_iPKc: # @_Z13test_con # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB49_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI49_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI49_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -10891,12 +10792,8 @@ _Z13test_constantIh30custom_lessthanequal_constantsIhEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIh30custom_lessthanequal_constantsIhEEvPT_iPKc, .Lfunc_end49-_Z13test_constantIh30custom_lessthanequal_constantsIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh20custom_and_constantsIhEEvPT_iPKc -.LCPI50_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh20custom_and_constantsIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh20custom_and_constantsIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh20custom_and_constantsIhEEvPT_iPKc + .weak _Z13test_constantIh20custom_and_constantsIhEEvPT_iPKc # -- Begin function _Z13test_constantIh20custom_and_constantsIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh20custom_and_constantsIhEEvPT_iPKc,@function _Z13test_constantIh20custom_and_constantsIhEEvPT_iPKc: # @_Z13test_constantIh20custom_and_constantsIhEEvPT_iPKc @@ -10998,12 +10895,14 @@ _Z13test_constantIh20custom_and_constantsIhEEvPT_iPKc: # @_Z13test_constantIh20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB50_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI50_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI50_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -11034,12 +10933,8 @@ _Z13test_constantIh20custom_and_constantsIhEEvPT_iPKc: # @_Z13test_constantIh20c .size _Z13test_constantIh20custom_and_constantsIhEEvPT_iPKc, .Lfunc_end50-_Z13test_constantIh20custom_and_constantsIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh19custom_or_constantsIhEEvPT_iPKc -.LCPI51_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh19custom_or_constantsIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh19custom_or_constantsIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh19custom_or_constantsIhEEvPT_iPKc + .weak _Z13test_constantIh19custom_or_constantsIhEEvPT_iPKc # -- Begin function _Z13test_constantIh19custom_or_constantsIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh19custom_or_constantsIhEEvPT_iPKc,@function _Z13test_constantIh19custom_or_constantsIhEEvPT_iPKc: # @_Z13test_constantIh19custom_or_constantsIhEEvPT_iPKc @@ -11143,12 +11038,14 @@ _Z13test_constantIh19custom_or_constantsIhEEvPT_iPKc: # @_Z13test_constantIh19cu move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB51_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI51_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI51_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -11179,12 +11076,8 @@ _Z13test_constantIh19custom_or_constantsIhEEvPT_iPKc: # @_Z13test_constantIh19cu .size _Z13test_constantIh19custom_or_constantsIhEEvPT_iPKc, .Lfunc_end51-_Z13test_constantIh19custom_or_constantsIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh20custom_xor_constantsIhEEvPT_iPKc -.LCPI52_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh20custom_xor_constantsIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh20custom_xor_constantsIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh20custom_xor_constantsIhEEvPT_iPKc + .weak _Z13test_constantIh20custom_xor_constantsIhEEvPT_iPKc # -- Begin function _Z13test_constantIh20custom_xor_constantsIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh20custom_xor_constantsIhEEvPT_iPKc,@function _Z13test_constantIh20custom_xor_constantsIhEEvPT_iPKc: # @_Z13test_constantIh20custom_xor_constantsIhEEvPT_iPKc @@ -11288,12 +11181,14 @@ _Z13test_constantIh20custom_xor_constantsIhEEvPT_iPKc: # @_Z13test_constantIh20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB52_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI52_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI52_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -11324,12 +11219,8 @@ _Z13test_constantIh20custom_xor_constantsIhEEvPT_iPKc: # @_Z13test_constantIh20c .size _Z13test_constantIh20custom_xor_constantsIhEEvPT_iPKc, .Lfunc_end52-_Z13test_constantIh20custom_xor_constantsIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh19custom_constant_addIhEEvPT_iPKc -.LCPI53_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh19custom_constant_addIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh19custom_constant_addIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh19custom_constant_addIhEEvPT_iPKc + .weak _Z13test_constantIh19custom_constant_addIhEEvPT_iPKc # -- Begin function _Z13test_constantIh19custom_constant_addIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh19custom_constant_addIhEEvPT_iPKc,@function _Z13test_constantIh19custom_constant_addIhEEvPT_iPKc: # @_Z13test_constantIh19custom_constant_addIhEEvPT_iPKc @@ -11565,12 +11456,14 @@ _Z13test_constantIh19custom_constant_addIhEEvPT_iPKc: # @_Z13test_constantIh19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB53_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI53_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI53_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -11604,12 +11497,8 @@ _Z13test_constantIh19custom_constant_addIhEEvPT_iPKc: # @_Z13test_constantIh19cu .size _Z13test_constantIh19custom_constant_addIhEEvPT_iPKc, .Lfunc_end53-_Z13test_constantIh19custom_constant_addIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh28custom_multiple_constant_addIhEEvPT_iPKc -.LCPI54_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh28custom_multiple_constant_addIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh28custom_multiple_constant_addIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh28custom_multiple_constant_addIhEEvPT_iPKc + .weak _Z13test_constantIh28custom_multiple_constant_addIhEEvPT_iPKc # -- Begin function _Z13test_constantIh28custom_multiple_constant_addIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh28custom_multiple_constant_addIhEEvPT_iPKc,@function _Z13test_constantIh28custom_multiple_constant_addIhEEvPT_iPKc: # @_Z13test_constantIh28custom_multiple_constant_addIhEEvPT_iPKc @@ -11845,12 +11734,14 @@ _Z13test_constantIh28custom_multiple_constant_addIhEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB54_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI54_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI54_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -11884,12 +11775,8 @@ _Z13test_constantIh28custom_multiple_constant_addIhEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIh28custom_multiple_constant_addIhEEvPT_iPKc, .Lfunc_end54-_Z13test_constantIh28custom_multiple_constant_addIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh19custom_constant_subIhEEvPT_iPKc -.LCPI55_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh19custom_constant_subIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh19custom_constant_subIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh19custom_constant_subIhEEvPT_iPKc + .weak _Z13test_constantIh19custom_constant_subIhEEvPT_iPKc # -- Begin function _Z13test_constantIh19custom_constant_subIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh19custom_constant_subIhEEvPT_iPKc,@function _Z13test_constantIh19custom_constant_subIhEEvPT_iPKc: # @_Z13test_constantIh19custom_constant_subIhEEvPT_iPKc @@ -12128,12 +12015,14 @@ _Z13test_constantIh19custom_constant_subIhEEvPT_iPKc: # @_Z13test_constantIh19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB55_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI55_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI55_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -12167,12 +12056,8 @@ _Z13test_constantIh19custom_constant_subIhEEvPT_iPKc: # @_Z13test_constantIh19cu .size _Z13test_constantIh19custom_constant_subIhEEvPT_iPKc, .Lfunc_end55-_Z13test_constantIh19custom_constant_subIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh28custom_multiple_constant_subIhEEvPT_iPKc -.LCPI56_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh28custom_multiple_constant_subIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh28custom_multiple_constant_subIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh28custom_multiple_constant_subIhEEvPT_iPKc + .weak _Z13test_constantIh28custom_multiple_constant_subIhEEvPT_iPKc # -- Begin function _Z13test_constantIh28custom_multiple_constant_subIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh28custom_multiple_constant_subIhEEvPT_iPKc,@function _Z13test_constantIh28custom_multiple_constant_subIhEEvPT_iPKc: # @_Z13test_constantIh28custom_multiple_constant_subIhEEvPT_iPKc @@ -12411,12 +12296,14 @@ _Z13test_constantIh28custom_multiple_constant_subIhEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB56_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI56_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI56_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -12450,12 +12337,8 @@ _Z13test_constantIh28custom_multiple_constant_subIhEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIh28custom_multiple_constant_subIhEEvPT_iPKc, .Lfunc_end56-_Z13test_constantIh28custom_multiple_constant_subIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh24custom_constant_multiplyIhEEvPT_iPKc -.LCPI57_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh24custom_constant_multiplyIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh24custom_constant_multiplyIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh24custom_constant_multiplyIhEEvPT_iPKc + .weak _Z13test_constantIh24custom_constant_multiplyIhEEvPT_iPKc # -- Begin function _Z13test_constantIh24custom_constant_multiplyIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh24custom_constant_multiplyIhEEvPT_iPKc,@function _Z13test_constantIh24custom_constant_multiplyIhEEvPT_iPKc: # @_Z13test_constantIh24custom_constant_multiplyIhEEvPT_iPKc @@ -12657,12 +12540,14 @@ _Z13test_constantIh24custom_constant_multiplyIhEEvPT_iPKc: # @_Z13test_constantI move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB57_23: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI57_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI57_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -12696,12 +12581,8 @@ _Z13test_constantIh24custom_constant_multiplyIhEEvPT_iPKc: # @_Z13test_constantI .size _Z13test_constantIh24custom_constant_multiplyIhEEvPT_iPKc, .Lfunc_end57-_Z13test_constantIh24custom_constant_multiplyIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh33custom_multiple_constant_multiplyIhEEvPT_iPKc -.LCPI58_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh33custom_multiple_constant_multiplyIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh33custom_multiple_constant_multiplyIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh33custom_multiple_constant_multiplyIhEEvPT_iPKc + .weak _Z13test_constantIh33custom_multiple_constant_multiplyIhEEvPT_iPKc # -- Begin function _Z13test_constantIh33custom_multiple_constant_multiplyIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh33custom_multiple_constant_multiplyIhEEvPT_iPKc,@function _Z13test_constantIh33custom_multiple_constant_multiplyIhEEvPT_iPKc: # @_Z13test_constantIh33custom_multiple_constant_multiplyIhEEvPT_iPKc @@ -12903,12 +12784,14 @@ _Z13test_constantIh33custom_multiple_constant_multiplyIhEEvPT_iPKc: # @_Z13test_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB58_23: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI58_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI58_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -12942,12 +12825,8 @@ _Z13test_constantIh33custom_multiple_constant_multiplyIhEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIh33custom_multiple_constant_multiplyIhEEvPT_iPKc, .Lfunc_end58-_Z13test_constantIh33custom_multiple_constant_multiplyIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh34custom_multiple_constant_multiply2IhEEvPT_iPKc -.LCPI59_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh34custom_multiple_constant_multiply2IhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh34custom_multiple_constant_multiply2IhEEvPT_iPKc,comdat - .weak _Z13test_constantIh34custom_multiple_constant_multiply2IhEEvPT_iPKc + .weak _Z13test_constantIh34custom_multiple_constant_multiply2IhEEvPT_iPKc # -- Begin function _Z13test_constantIh34custom_multiple_constant_multiply2IhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh34custom_multiple_constant_multiply2IhEEvPT_iPKc,@function _Z13test_constantIh34custom_multiple_constant_multiply2IhEEvPT_iPKc: # @_Z13test_constantIh34custom_multiple_constant_multiply2IhEEvPT_iPKc @@ -13183,12 +13062,14 @@ _Z13test_constantIh34custom_multiple_constant_multiply2IhEEvPT_iPKc: # @_Z13test move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB59_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI59_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI59_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -13222,12 +13103,8 @@ _Z13test_constantIh34custom_multiple_constant_multiply2IhEEvPT_iPKc: # @_Z13test .size _Z13test_constantIh34custom_multiple_constant_multiply2IhEEvPT_iPKc, .Lfunc_end59-_Z13test_constantIh34custom_multiple_constant_multiply2IhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh22custom_constant_divideIhEEvPT_iPKc -.LCPI60_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh22custom_constant_divideIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh22custom_constant_divideIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh22custom_constant_divideIhEEvPT_iPKc + .weak _Z13test_constantIh22custom_constant_divideIhEEvPT_iPKc # -- Begin function _Z13test_constantIh22custom_constant_divideIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh22custom_constant_divideIhEEvPT_iPKc,@function _Z13test_constantIh22custom_constant_divideIhEEvPT_iPKc: # @_Z13test_constantIh22custom_constant_divideIhEEvPT_iPKc @@ -13507,12 +13384,14 @@ _Z13test_constantIh22custom_constant_divideIhEEvPT_iPKc: # @_Z13test_constantIh2 move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB60_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI60_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI60_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -13546,12 +13425,8 @@ _Z13test_constantIh22custom_constant_divideIhEEvPT_iPKc: # @_Z13test_constantIh2 .size _Z13test_constantIh22custom_constant_divideIhEEvPT_iPKc, .Lfunc_end60-_Z13test_constantIh22custom_constant_divideIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh31custom_multiple_constant_divideIhEEvPT_iPKc -.LCPI61_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh31custom_multiple_constant_divideIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh31custom_multiple_constant_divideIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh31custom_multiple_constant_divideIhEEvPT_iPKc + .weak _Z13test_constantIh31custom_multiple_constant_divideIhEEvPT_iPKc # -- Begin function _Z13test_constantIh31custom_multiple_constant_divideIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh31custom_multiple_constant_divideIhEEvPT_iPKc,@function _Z13test_constantIh31custom_multiple_constant_divideIhEEvPT_iPKc: # @_Z13test_constantIh31custom_multiple_constant_divideIhEEvPT_iPKc @@ -13829,12 +13704,14 @@ _Z13test_constantIh31custom_multiple_constant_divideIhEEvPT_iPKc: # @_Z13test_co move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB61_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI61_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI61_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -13868,12 +13745,8 @@ _Z13test_constantIh31custom_multiple_constant_divideIhEEvPT_iPKc: # @_Z13test_co .size _Z13test_constantIh31custom_multiple_constant_divideIhEEvPT_iPKc, .Lfunc_end61-_Z13test_constantIh31custom_multiple_constant_divideIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh32custom_multiple_constant_divide2IhEEvPT_iPKc -.LCPI62_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh32custom_multiple_constant_divide2IhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh32custom_multiple_constant_divide2IhEEvPT_iPKc,comdat - .weak _Z13test_constantIh32custom_multiple_constant_divide2IhEEvPT_iPKc + .weak _Z13test_constantIh32custom_multiple_constant_divide2IhEEvPT_iPKc # -- Begin function _Z13test_constantIh32custom_multiple_constant_divide2IhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh32custom_multiple_constant_divide2IhEEvPT_iPKc,@function _Z13test_constantIh32custom_multiple_constant_divide2IhEEvPT_iPKc: # @_Z13test_constantIh32custom_multiple_constant_divide2IhEEvPT_iPKc @@ -14109,12 +13982,14 @@ _Z13test_constantIh32custom_multiple_constant_divide2IhEEvPT_iPKc: # @_Z13test_c move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB62_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI62_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI62_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -14148,12 +14023,8 @@ _Z13test_constantIh32custom_multiple_constant_divide2IhEEvPT_iPKc: # @_Z13test_c .size _Z13test_constantIh32custom_multiple_constant_divide2IhEEvPT_iPKc, .Lfunc_end62-_Z13test_constantIh32custom_multiple_constant_divide2IhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh30custom_multiple_constant_mixedIhEEvPT_iPKc -.LCPI63_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh30custom_multiple_constant_mixedIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh30custom_multiple_constant_mixedIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh30custom_multiple_constant_mixedIhEEvPT_iPKc + .weak _Z13test_constantIh30custom_multiple_constant_mixedIhEEvPT_iPKc # -- Begin function _Z13test_constantIh30custom_multiple_constant_mixedIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh30custom_multiple_constant_mixedIhEEvPT_iPKc,@function _Z13test_constantIh30custom_multiple_constant_mixedIhEEvPT_iPKc: # @_Z13test_constantIh30custom_multiple_constant_mixedIhEEvPT_iPKc @@ -14382,12 +14253,14 @@ _Z13test_constantIh30custom_multiple_constant_mixedIhEEvPT_iPKc: # @_Z13test_con move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB63_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI63_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI63_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -14421,12 +14294,8 @@ _Z13test_constantIh30custom_multiple_constant_mixedIhEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIh30custom_multiple_constant_mixedIhEEvPT_iPKc, .Lfunc_end63-_Z13test_constantIh30custom_multiple_constant_mixedIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh19custom_constant_andIhEEvPT_iPKc -.LCPI64_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh19custom_constant_andIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh19custom_constant_andIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh19custom_constant_andIhEEvPT_iPKc + .weak _Z13test_constantIh19custom_constant_andIhEEvPT_iPKc # -- Begin function _Z13test_constantIh19custom_constant_andIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh19custom_constant_andIhEEvPT_iPKc,@function _Z13test_constantIh19custom_constant_andIhEEvPT_iPKc: # @_Z13test_constantIh19custom_constant_andIhEEvPT_iPKc @@ -14659,12 +14528,14 @@ _Z13test_constantIh19custom_constant_andIhEEvPT_iPKc: # @_Z13test_constantIh19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB64_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI64_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI64_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -14698,12 +14569,8 @@ _Z13test_constantIh19custom_constant_andIhEEvPT_iPKc: # @_Z13test_constantIh19cu .size _Z13test_constantIh19custom_constant_andIhEEvPT_iPKc, .Lfunc_end64-_Z13test_constantIh19custom_constant_andIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh28custom_multiple_constant_andIhEEvPT_iPKc -.LCPI65_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh28custom_multiple_constant_andIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh28custom_multiple_constant_andIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh28custom_multiple_constant_andIhEEvPT_iPKc + .weak _Z13test_constantIh28custom_multiple_constant_andIhEEvPT_iPKc # -- Begin function _Z13test_constantIh28custom_multiple_constant_andIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh28custom_multiple_constant_andIhEEvPT_iPKc,@function _Z13test_constantIh28custom_multiple_constant_andIhEEvPT_iPKc: # @_Z13test_constantIh28custom_multiple_constant_andIhEEvPT_iPKc @@ -14936,12 +14803,14 @@ _Z13test_constantIh28custom_multiple_constant_andIhEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB65_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI65_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI65_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -14975,12 +14844,8 @@ _Z13test_constantIh28custom_multiple_constant_andIhEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIh28custom_multiple_constant_andIhEEvPT_iPKc, .Lfunc_end65-_Z13test_constantIh28custom_multiple_constant_andIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh18custom_constant_orIhEEvPT_iPKc -.LCPI66_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh18custom_constant_orIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh18custom_constant_orIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh18custom_constant_orIhEEvPT_iPKc + .weak _Z13test_constantIh18custom_constant_orIhEEvPT_iPKc # -- Begin function _Z13test_constantIh18custom_constant_orIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh18custom_constant_orIhEEvPT_iPKc,@function _Z13test_constantIh18custom_constant_orIhEEvPT_iPKc: # @_Z13test_constantIh18custom_constant_orIhEEvPT_iPKc @@ -15202,12 +15067,14 @@ _Z13test_constantIh18custom_constant_orIhEEvPT_iPKc: # @_Z13test_constantIh18cus move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB66_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI66_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI66_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -15241,12 +15108,8 @@ _Z13test_constantIh18custom_constant_orIhEEvPT_iPKc: # @_Z13test_constantIh18cus .size _Z13test_constantIh18custom_constant_orIhEEvPT_iPKc, .Lfunc_end66-_Z13test_constantIh18custom_constant_orIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh27custom_multiple_constant_orIhEEvPT_iPKc -.LCPI67_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh27custom_multiple_constant_orIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh27custom_multiple_constant_orIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh27custom_multiple_constant_orIhEEvPT_iPKc + .weak _Z13test_constantIh27custom_multiple_constant_orIhEEvPT_iPKc # -- Begin function _Z13test_constantIh27custom_multiple_constant_orIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh27custom_multiple_constant_orIhEEvPT_iPKc,@function _Z13test_constantIh27custom_multiple_constant_orIhEEvPT_iPKc: # @_Z13test_constantIh27custom_multiple_constant_orIhEEvPT_iPKc @@ -15462,12 +15325,14 @@ _Z13test_constantIh27custom_multiple_constant_orIhEEvPT_iPKc: # @_Z13test_consta move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB67_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI67_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI67_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -15501,12 +15366,8 @@ _Z13test_constantIh27custom_multiple_constant_orIhEEvPT_iPKc: # @_Z13test_consta .size _Z13test_constantIh27custom_multiple_constant_orIhEEvPT_iPKc, .Lfunc_end67-_Z13test_constantIh27custom_multiple_constant_orIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh19custom_constant_xorIhEEvPT_iPKc -.LCPI68_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh19custom_constant_xorIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh19custom_constant_xorIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh19custom_constant_xorIhEEvPT_iPKc + .weak _Z13test_constantIh19custom_constant_xorIhEEvPT_iPKc # -- Begin function _Z13test_constantIh19custom_constant_xorIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh19custom_constant_xorIhEEvPT_iPKc,@function _Z13test_constantIh19custom_constant_xorIhEEvPT_iPKc: # @_Z13test_constantIh19custom_constant_xorIhEEvPT_iPKc @@ -15742,12 +15603,14 @@ _Z13test_constantIh19custom_constant_xorIhEEvPT_iPKc: # @_Z13test_constantIh19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB68_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI68_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI68_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -15781,12 +15644,8 @@ _Z13test_constantIh19custom_constant_xorIhEEvPT_iPKc: # @_Z13test_constantIh19cu .size _Z13test_constantIh19custom_constant_xorIhEEvPT_iPKc, .Lfunc_end68-_Z13test_constantIh19custom_constant_xorIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIh28custom_multiple_constant_xorIhEEvPT_iPKc -.LCPI69_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIh28custom_multiple_constant_xorIhEEvPT_iPKc,"axG",@progbits,_Z13test_constantIh28custom_multiple_constant_xorIhEEvPT_iPKc,comdat - .weak _Z13test_constantIh28custom_multiple_constant_xorIhEEvPT_iPKc + .weak _Z13test_constantIh28custom_multiple_constant_xorIhEEvPT_iPKc # -- Begin function _Z13test_constantIh28custom_multiple_constant_xorIhEEvPT_iPKc .p2align 5 .type _Z13test_constantIh28custom_multiple_constant_xorIhEEvPT_iPKc,@function _Z13test_constantIh28custom_multiple_constant_xorIhEEvPT_iPKc: # @_Z13test_constantIh28custom_multiple_constant_xorIhEEvPT_iPKc @@ -16022,12 +15881,14 @@ _Z13test_constantIh28custom_multiple_constant_xorIhEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB69_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI69_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI69_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -16061,12 +15922,8 @@ _Z13test_constantIh28custom_multiple_constant_xorIhEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIh28custom_multiple_constant_xorIhEEvPT_iPKc, .Lfunc_end69-_Z13test_constantIh28custom_multiple_constant_xorIhEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs10custom_twoIsEEvPT_iPKc -.LCPI70_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs10custom_twoIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs10custom_twoIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs10custom_twoIsEEvPT_iPKc + .weak _Z13test_constantIs10custom_twoIsEEvPT_iPKc # -- Begin function _Z13test_constantIs10custom_twoIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs10custom_twoIsEEvPT_iPKc,@function _Z13test_constantIs10custom_twoIsEEvPT_iPKc: # @_Z13test_constantIs10custom_twoIsEEvPT_iPKc @@ -16169,12 +16026,14 @@ _Z13test_constantIs10custom_twoIsEEvPT_iPKc: # @_Z13test_constantIs10custom_twoI move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB70_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI70_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI70_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -16205,12 +16064,8 @@ _Z13test_constantIs10custom_twoIsEEvPT_iPKc: # @_Z13test_constantIs10custom_twoI .size _Z13test_constantIs10custom_twoIsEEvPT_iPKc, .Lfunc_end70-_Z13test_constantIs10custom_twoIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs20custom_add_constantsIsEEvPT_iPKc -.LCPI71_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs20custom_add_constantsIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs20custom_add_constantsIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs20custom_add_constantsIsEEvPT_iPKc + .weak _Z13test_constantIs20custom_add_constantsIsEEvPT_iPKc # -- Begin function _Z13test_constantIs20custom_add_constantsIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs20custom_add_constantsIsEEvPT_iPKc,@function _Z13test_constantIs20custom_add_constantsIsEEvPT_iPKc: # @_Z13test_constantIs20custom_add_constantsIsEEvPT_iPKc @@ -16313,12 +16168,14 @@ _Z13test_constantIs20custom_add_constantsIsEEvPT_iPKc: # @_Z13test_constantIs20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB71_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI71_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI71_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -16349,12 +16206,8 @@ _Z13test_constantIs20custom_add_constantsIsEEvPT_iPKc: # @_Z13test_constantIs20c .size _Z13test_constantIs20custom_add_constantsIsEEvPT_iPKc, .Lfunc_end71-_Z13test_constantIs20custom_add_constantsIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs20custom_sub_constantsIsEEvPT_iPKc -.LCPI72_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs20custom_sub_constantsIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs20custom_sub_constantsIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs20custom_sub_constantsIsEEvPT_iPKc + .weak _Z13test_constantIs20custom_sub_constantsIsEEvPT_iPKc # -- Begin function _Z13test_constantIs20custom_sub_constantsIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs20custom_sub_constantsIsEEvPT_iPKc,@function _Z13test_constantIs20custom_sub_constantsIsEEvPT_iPKc: # @_Z13test_constantIs20custom_sub_constantsIsEEvPT_iPKc @@ -16457,12 +16310,14 @@ _Z13test_constantIs20custom_sub_constantsIsEEvPT_iPKc: # @_Z13test_constantIs20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB72_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI72_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI72_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -16493,12 +16348,8 @@ _Z13test_constantIs20custom_sub_constantsIsEEvPT_iPKc: # @_Z13test_constantIs20c .size _Z13test_constantIs20custom_sub_constantsIsEEvPT_iPKc, .Lfunc_end72-_Z13test_constantIs20custom_sub_constantsIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs25custom_multiply_constantsIsEEvPT_iPKc -.LCPI73_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs25custom_multiply_constantsIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs25custom_multiply_constantsIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs25custom_multiply_constantsIsEEvPT_iPKc + .weak _Z13test_constantIs25custom_multiply_constantsIsEEvPT_iPKc # -- Begin function _Z13test_constantIs25custom_multiply_constantsIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs25custom_multiply_constantsIsEEvPT_iPKc,@function _Z13test_constantIs25custom_multiply_constantsIsEEvPT_iPKc: # @_Z13test_constantIs25custom_multiply_constantsIsEEvPT_iPKc @@ -16603,12 +16454,14 @@ _Z13test_constantIs25custom_multiply_constantsIsEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB73_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI73_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI73_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -16639,12 +16492,8 @@ _Z13test_constantIs25custom_multiply_constantsIsEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIs25custom_multiply_constantsIsEEvPT_iPKc, .Lfunc_end73-_Z13test_constantIs25custom_multiply_constantsIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs23custom_divide_constantsIsEEvPT_iPKc -.LCPI74_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs23custom_divide_constantsIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs23custom_divide_constantsIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs23custom_divide_constantsIsEEvPT_iPKc + .weak _Z13test_constantIs23custom_divide_constantsIsEEvPT_iPKc # -- Begin function _Z13test_constantIs23custom_divide_constantsIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs23custom_divide_constantsIsEEvPT_iPKc,@function _Z13test_constantIs23custom_divide_constantsIsEEvPT_iPKc: # @_Z13test_constantIs23custom_divide_constantsIsEEvPT_iPKc @@ -16747,12 +16596,14 @@ _Z13test_constantIs23custom_divide_constantsIsEEvPT_iPKc: # @_Z13test_constantIs move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB74_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI74_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI74_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -16783,12 +16634,8 @@ _Z13test_constantIs23custom_divide_constantsIsEEvPT_iPKc: # @_Z13test_constantIs .size _Z13test_constantIs23custom_divide_constantsIsEEvPT_iPKc, .Lfunc_end74-_Z13test_constantIs23custom_divide_constantsIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs20custom_mod_constantsIsEEvPT_iPKc -.LCPI75_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs20custom_mod_constantsIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs20custom_mod_constantsIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs20custom_mod_constantsIsEEvPT_iPKc + .weak _Z13test_constantIs20custom_mod_constantsIsEEvPT_iPKc # -- Begin function _Z13test_constantIs20custom_mod_constantsIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs20custom_mod_constantsIsEEvPT_iPKc,@function _Z13test_constantIs20custom_mod_constantsIsEEvPT_iPKc: # @_Z13test_constantIs20custom_mod_constantsIsEEvPT_iPKc @@ -16891,12 +16738,14 @@ _Z13test_constantIs20custom_mod_constantsIsEEvPT_iPKc: # @_Z13test_constantIs20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB75_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI75_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI75_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -16927,12 +16776,8 @@ _Z13test_constantIs20custom_mod_constantsIsEEvPT_iPKc: # @_Z13test_constantIs20c .size _Z13test_constantIs20custom_mod_constantsIsEEvPT_iPKc, .Lfunc_end75-_Z13test_constantIs20custom_mod_constantsIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs22custom_equal_constantsIsEEvPT_iPKc -.LCPI76_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs22custom_equal_constantsIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs22custom_equal_constantsIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs22custom_equal_constantsIsEEvPT_iPKc + .weak _Z13test_constantIs22custom_equal_constantsIsEEvPT_iPKc # -- Begin function _Z13test_constantIs22custom_equal_constantsIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs22custom_equal_constantsIsEEvPT_iPKc,@function _Z13test_constantIs22custom_equal_constantsIsEEvPT_iPKc: # @_Z13test_constantIs22custom_equal_constantsIsEEvPT_iPKc @@ -16985,12 +16830,14 @@ _Z13test_constantIs22custom_equal_constantsIsEEvPT_iPKc: # @_Z13test_constantIs2 # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB76_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI76_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI76_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -17019,12 +16866,8 @@ _Z13test_constantIs22custom_equal_constantsIsEEvPT_iPKc: # @_Z13test_constantIs2 .size _Z13test_constantIs22custom_equal_constantsIsEEvPT_iPKc, .Lfunc_end76-_Z13test_constantIs22custom_equal_constantsIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs25custom_notequal_constantsIsEEvPT_iPKc -.LCPI77_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs25custom_notequal_constantsIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs25custom_notequal_constantsIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs25custom_notequal_constantsIsEEvPT_iPKc + .weak _Z13test_constantIs25custom_notequal_constantsIsEEvPT_iPKc # -- Begin function _Z13test_constantIs25custom_notequal_constantsIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs25custom_notequal_constantsIsEEvPT_iPKc,@function _Z13test_constantIs25custom_notequal_constantsIsEEvPT_iPKc: # @_Z13test_constantIs25custom_notequal_constantsIsEEvPT_iPKc @@ -17127,12 +16970,14 @@ _Z13test_constantIs25custom_notequal_constantsIsEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB77_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI77_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI77_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -17163,12 +17008,8 @@ _Z13test_constantIs25custom_notequal_constantsIsEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIs25custom_notequal_constantsIsEEvPT_iPKc, .Lfunc_end77-_Z13test_constantIs25custom_notequal_constantsIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs28custom_greaterthan_constantsIsEEvPT_iPKc -.LCPI78_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs28custom_greaterthan_constantsIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs28custom_greaterthan_constantsIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs28custom_greaterthan_constantsIsEEvPT_iPKc + .weak _Z13test_constantIs28custom_greaterthan_constantsIsEEvPT_iPKc # -- Begin function _Z13test_constantIs28custom_greaterthan_constantsIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs28custom_greaterthan_constantsIsEEvPT_iPKc,@function _Z13test_constantIs28custom_greaterthan_constantsIsEEvPT_iPKc: # @_Z13test_constantIs28custom_greaterthan_constantsIsEEvPT_iPKc @@ -17271,12 +17112,14 @@ _Z13test_constantIs28custom_greaterthan_constantsIsEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB78_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI78_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI78_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -17307,12 +17150,8 @@ _Z13test_constantIs28custom_greaterthan_constantsIsEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIs28custom_greaterthan_constantsIsEEvPT_iPKc, .Lfunc_end78-_Z13test_constantIs28custom_greaterthan_constantsIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs25custom_lessthan_constantsIsEEvPT_iPKc -.LCPI79_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs25custom_lessthan_constantsIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs25custom_lessthan_constantsIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs25custom_lessthan_constantsIsEEvPT_iPKc + .weak _Z13test_constantIs25custom_lessthan_constantsIsEEvPT_iPKc # -- Begin function _Z13test_constantIs25custom_lessthan_constantsIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs25custom_lessthan_constantsIsEEvPT_iPKc,@function _Z13test_constantIs25custom_lessthan_constantsIsEEvPT_iPKc: # @_Z13test_constantIs25custom_lessthan_constantsIsEEvPT_iPKc @@ -17365,12 +17204,14 @@ _Z13test_constantIs25custom_lessthan_constantsIsEEvPT_iPKc: # @_Z13test_constant # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB79_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI79_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI79_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -17399,12 +17240,8 @@ _Z13test_constantIs25custom_lessthan_constantsIsEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIs25custom_lessthan_constantsIsEEvPT_iPKc, .Lfunc_end79-_Z13test_constantIs25custom_lessthan_constantsIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs33custom_greaterthanequal_constantsIsEEvPT_iPKc -.LCPI80_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs33custom_greaterthanequal_constantsIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs33custom_greaterthanequal_constantsIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs33custom_greaterthanequal_constantsIsEEvPT_iPKc + .weak _Z13test_constantIs33custom_greaterthanequal_constantsIsEEvPT_iPKc # -- Begin function _Z13test_constantIs33custom_greaterthanequal_constantsIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs33custom_greaterthanequal_constantsIsEEvPT_iPKc,@function _Z13test_constantIs33custom_greaterthanequal_constantsIsEEvPT_iPKc: # @_Z13test_constantIs33custom_greaterthanequal_constantsIsEEvPT_iPKc @@ -17507,12 +17344,14 @@ _Z13test_constantIs33custom_greaterthanequal_constantsIsEEvPT_iPKc: # @_Z13test_ move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB80_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI80_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI80_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -17543,12 +17382,8 @@ _Z13test_constantIs33custom_greaterthanequal_constantsIsEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIs33custom_greaterthanequal_constantsIsEEvPT_iPKc, .Lfunc_end80-_Z13test_constantIs33custom_greaterthanequal_constantsIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs30custom_lessthanequal_constantsIsEEvPT_iPKc -.LCPI81_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs30custom_lessthanequal_constantsIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs30custom_lessthanequal_constantsIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs30custom_lessthanequal_constantsIsEEvPT_iPKc + .weak _Z13test_constantIs30custom_lessthanequal_constantsIsEEvPT_iPKc # -- Begin function _Z13test_constantIs30custom_lessthanequal_constantsIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs30custom_lessthanequal_constantsIsEEvPT_iPKc,@function _Z13test_constantIs30custom_lessthanequal_constantsIsEEvPT_iPKc: # @_Z13test_constantIs30custom_lessthanequal_constantsIsEEvPT_iPKc @@ -17601,12 +17436,14 @@ _Z13test_constantIs30custom_lessthanequal_constantsIsEEvPT_iPKc: # @_Z13test_con # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB81_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI81_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI81_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -17635,12 +17472,8 @@ _Z13test_constantIs30custom_lessthanequal_constantsIsEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIs30custom_lessthanequal_constantsIsEEvPT_iPKc, .Lfunc_end81-_Z13test_constantIs30custom_lessthanequal_constantsIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs20custom_and_constantsIsEEvPT_iPKc -.LCPI82_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs20custom_and_constantsIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs20custom_and_constantsIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs20custom_and_constantsIsEEvPT_iPKc + .weak _Z13test_constantIs20custom_and_constantsIsEEvPT_iPKc # -- Begin function _Z13test_constantIs20custom_and_constantsIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs20custom_and_constantsIsEEvPT_iPKc,@function _Z13test_constantIs20custom_and_constantsIsEEvPT_iPKc: # @_Z13test_constantIs20custom_and_constantsIsEEvPT_iPKc @@ -17743,12 +17576,14 @@ _Z13test_constantIs20custom_and_constantsIsEEvPT_iPKc: # @_Z13test_constantIs20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB82_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI82_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI82_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -17779,12 +17614,8 @@ _Z13test_constantIs20custom_and_constantsIsEEvPT_iPKc: # @_Z13test_constantIs20c .size _Z13test_constantIs20custom_and_constantsIsEEvPT_iPKc, .Lfunc_end82-_Z13test_constantIs20custom_and_constantsIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs19custom_or_constantsIsEEvPT_iPKc -.LCPI83_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs19custom_or_constantsIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs19custom_or_constantsIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs19custom_or_constantsIsEEvPT_iPKc + .weak _Z13test_constantIs19custom_or_constantsIsEEvPT_iPKc # -- Begin function _Z13test_constantIs19custom_or_constantsIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs19custom_or_constantsIsEEvPT_iPKc,@function _Z13test_constantIs19custom_or_constantsIsEEvPT_iPKc: # @_Z13test_constantIs19custom_or_constantsIsEEvPT_iPKc @@ -17889,12 +17720,14 @@ _Z13test_constantIs19custom_or_constantsIsEEvPT_iPKc: # @_Z13test_constantIs19cu move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB83_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI83_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI83_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -17925,12 +17758,8 @@ _Z13test_constantIs19custom_or_constantsIsEEvPT_iPKc: # @_Z13test_constantIs19cu .size _Z13test_constantIs19custom_or_constantsIsEEvPT_iPKc, .Lfunc_end83-_Z13test_constantIs19custom_or_constantsIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs20custom_xor_constantsIsEEvPT_iPKc -.LCPI84_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs20custom_xor_constantsIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs20custom_xor_constantsIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs20custom_xor_constantsIsEEvPT_iPKc + .weak _Z13test_constantIs20custom_xor_constantsIsEEvPT_iPKc # -- Begin function _Z13test_constantIs20custom_xor_constantsIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs20custom_xor_constantsIsEEvPT_iPKc,@function _Z13test_constantIs20custom_xor_constantsIsEEvPT_iPKc: # @_Z13test_constantIs20custom_xor_constantsIsEEvPT_iPKc @@ -18035,12 +17864,14 @@ _Z13test_constantIs20custom_xor_constantsIsEEvPT_iPKc: # @_Z13test_constantIs20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB84_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI84_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI84_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -18071,12 +17902,8 @@ _Z13test_constantIs20custom_xor_constantsIsEEvPT_iPKc: # @_Z13test_constantIs20c .size _Z13test_constantIs20custom_xor_constantsIsEEvPT_iPKc, .Lfunc_end84-_Z13test_constantIs20custom_xor_constantsIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs19custom_constant_addIsEEvPT_iPKc -.LCPI85_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs19custom_constant_addIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs19custom_constant_addIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs19custom_constant_addIsEEvPT_iPKc + .weak _Z13test_constantIs19custom_constant_addIsEEvPT_iPKc # -- Begin function _Z13test_constantIs19custom_constant_addIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs19custom_constant_addIsEEvPT_iPKc,@function _Z13test_constantIs19custom_constant_addIsEEvPT_iPKc: # @_Z13test_constantIs19custom_constant_addIsEEvPT_iPKc @@ -18321,12 +18148,14 @@ _Z13test_constantIs19custom_constant_addIsEEvPT_iPKc: # @_Z13test_constantIs19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB85_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI85_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI85_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -18360,12 +18189,8 @@ _Z13test_constantIs19custom_constant_addIsEEvPT_iPKc: # @_Z13test_constantIs19cu .size _Z13test_constantIs19custom_constant_addIsEEvPT_iPKc, .Lfunc_end85-_Z13test_constantIs19custom_constant_addIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs28custom_multiple_constant_addIsEEvPT_iPKc -.LCPI86_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs28custom_multiple_constant_addIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs28custom_multiple_constant_addIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs28custom_multiple_constant_addIsEEvPT_iPKc + .weak _Z13test_constantIs28custom_multiple_constant_addIsEEvPT_iPKc # -- Begin function _Z13test_constantIs28custom_multiple_constant_addIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs28custom_multiple_constant_addIsEEvPT_iPKc,@function _Z13test_constantIs28custom_multiple_constant_addIsEEvPT_iPKc: # @_Z13test_constantIs28custom_multiple_constant_addIsEEvPT_iPKc @@ -18610,12 +18435,14 @@ _Z13test_constantIs28custom_multiple_constant_addIsEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB86_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI86_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI86_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -18649,12 +18476,8 @@ _Z13test_constantIs28custom_multiple_constant_addIsEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIs28custom_multiple_constant_addIsEEvPT_iPKc, .Lfunc_end86-_Z13test_constantIs28custom_multiple_constant_addIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs19custom_constant_subIsEEvPT_iPKc -.LCPI87_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs19custom_constant_subIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs19custom_constant_subIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs19custom_constant_subIsEEvPT_iPKc + .weak _Z13test_constantIs19custom_constant_subIsEEvPT_iPKc # -- Begin function _Z13test_constantIs19custom_constant_subIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs19custom_constant_subIsEEvPT_iPKc,@function _Z13test_constantIs19custom_constant_subIsEEvPT_iPKc: # @_Z13test_constantIs19custom_constant_subIsEEvPT_iPKc @@ -18902,12 +18725,14 @@ _Z13test_constantIs19custom_constant_subIsEEvPT_iPKc: # @_Z13test_constantIs19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB87_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI87_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI87_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -18941,12 +18766,8 @@ _Z13test_constantIs19custom_constant_subIsEEvPT_iPKc: # @_Z13test_constantIs19cu .size _Z13test_constantIs19custom_constant_subIsEEvPT_iPKc, .Lfunc_end87-_Z13test_constantIs19custom_constant_subIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs28custom_multiple_constant_subIsEEvPT_iPKc -.LCPI88_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs28custom_multiple_constant_subIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs28custom_multiple_constant_subIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs28custom_multiple_constant_subIsEEvPT_iPKc + .weak _Z13test_constantIs28custom_multiple_constant_subIsEEvPT_iPKc # -- Begin function _Z13test_constantIs28custom_multiple_constant_subIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs28custom_multiple_constant_subIsEEvPT_iPKc,@function _Z13test_constantIs28custom_multiple_constant_subIsEEvPT_iPKc: # @_Z13test_constantIs28custom_multiple_constant_subIsEEvPT_iPKc @@ -19194,12 +19015,14 @@ _Z13test_constantIs28custom_multiple_constant_subIsEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB88_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI88_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI88_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -19233,12 +19056,8 @@ _Z13test_constantIs28custom_multiple_constant_subIsEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIs28custom_multiple_constant_subIsEEvPT_iPKc, .Lfunc_end88-_Z13test_constantIs28custom_multiple_constant_subIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs24custom_constant_multiplyIsEEvPT_iPKc -.LCPI89_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs24custom_constant_multiplyIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs24custom_constant_multiplyIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs24custom_constant_multiplyIsEEvPT_iPKc + .weak _Z13test_constantIs24custom_constant_multiplyIsEEvPT_iPKc # -- Begin function _Z13test_constantIs24custom_constant_multiplyIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs24custom_constant_multiplyIsEEvPT_iPKc,@function _Z13test_constantIs24custom_constant_multiplyIsEEvPT_iPKc: # @_Z13test_constantIs24custom_constant_multiplyIsEEvPT_iPKc @@ -19479,12 +19298,14 @@ _Z13test_constantIs24custom_constant_multiplyIsEEvPT_iPKc: # @_Z13test_constantI move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB89_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI89_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI89_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -19518,12 +19339,8 @@ _Z13test_constantIs24custom_constant_multiplyIsEEvPT_iPKc: # @_Z13test_constantI .size _Z13test_constantIs24custom_constant_multiplyIsEEvPT_iPKc, .Lfunc_end89-_Z13test_constantIs24custom_constant_multiplyIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs33custom_multiple_constant_multiplyIsEEvPT_iPKc -.LCPI90_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs33custom_multiple_constant_multiplyIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs33custom_multiple_constant_multiplyIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs33custom_multiple_constant_multiplyIsEEvPT_iPKc + .weak _Z13test_constantIs33custom_multiple_constant_multiplyIsEEvPT_iPKc # -- Begin function _Z13test_constantIs33custom_multiple_constant_multiplyIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs33custom_multiple_constant_multiplyIsEEvPT_iPKc,@function _Z13test_constantIs33custom_multiple_constant_multiplyIsEEvPT_iPKc: # @_Z13test_constantIs33custom_multiple_constant_multiplyIsEEvPT_iPKc @@ -19764,12 +19581,14 @@ _Z13test_constantIs33custom_multiple_constant_multiplyIsEEvPT_iPKc: # @_Z13test_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB90_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI90_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI90_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -19803,12 +19622,8 @@ _Z13test_constantIs33custom_multiple_constant_multiplyIsEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIs33custom_multiple_constant_multiplyIsEEvPT_iPKc, .Lfunc_end90-_Z13test_constantIs33custom_multiple_constant_multiplyIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs34custom_multiple_constant_multiply2IsEEvPT_iPKc -.LCPI91_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs34custom_multiple_constant_multiply2IsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs34custom_multiple_constant_multiply2IsEEvPT_iPKc,comdat - .weak _Z13test_constantIs34custom_multiple_constant_multiply2IsEEvPT_iPKc + .weak _Z13test_constantIs34custom_multiple_constant_multiply2IsEEvPT_iPKc # -- Begin function _Z13test_constantIs34custom_multiple_constant_multiply2IsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs34custom_multiple_constant_multiply2IsEEvPT_iPKc,@function _Z13test_constantIs34custom_multiple_constant_multiply2IsEEvPT_iPKc: # @_Z13test_constantIs34custom_multiple_constant_multiply2IsEEvPT_iPKc @@ -20056,12 +19871,14 @@ _Z13test_constantIs34custom_multiple_constant_multiply2IsEEvPT_iPKc: # @_Z13test move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB91_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI91_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI91_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -20095,12 +19912,8 @@ _Z13test_constantIs34custom_multiple_constant_multiply2IsEEvPT_iPKc: # @_Z13test .size _Z13test_constantIs34custom_multiple_constant_multiply2IsEEvPT_iPKc, .Lfunc_end91-_Z13test_constantIs34custom_multiple_constant_multiply2IsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs22custom_constant_divideIsEEvPT_iPKc -.LCPI92_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs22custom_constant_divideIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs22custom_constant_divideIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs22custom_constant_divideIsEEvPT_iPKc + .weak _Z13test_constantIs22custom_constant_divideIsEEvPT_iPKc # -- Begin function _Z13test_constantIs22custom_constant_divideIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs22custom_constant_divideIsEEvPT_iPKc,@function _Z13test_constantIs22custom_constant_divideIsEEvPT_iPKc: # @_Z13test_constantIs22custom_constant_divideIsEEvPT_iPKc @@ -20395,12 +20208,14 @@ _Z13test_constantIs22custom_constant_divideIsEEvPT_iPKc: # @_Z13test_constantIs2 move $a2, $a0 ld.w $a0, $s8, %pc_lo12(current_test) .LBB92_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI92_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI92_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -20434,12 +20249,8 @@ _Z13test_constantIs22custom_constant_divideIsEEvPT_iPKc: # @_Z13test_constantIs2 .size _Z13test_constantIs22custom_constant_divideIsEEvPT_iPKc, .Lfunc_end92-_Z13test_constantIs22custom_constant_divideIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs31custom_multiple_constant_divideIsEEvPT_iPKc -.LCPI93_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs31custom_multiple_constant_divideIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs31custom_multiple_constant_divideIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs31custom_multiple_constant_divideIsEEvPT_iPKc + .weak _Z13test_constantIs31custom_multiple_constant_divideIsEEvPT_iPKc # -- Begin function _Z13test_constantIs31custom_multiple_constant_divideIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs31custom_multiple_constant_divideIsEEvPT_iPKc,@function _Z13test_constantIs31custom_multiple_constant_divideIsEEvPT_iPKc: # @_Z13test_constantIs31custom_multiple_constant_divideIsEEvPT_iPKc @@ -20753,12 +20564,14 @@ _Z13test_constantIs31custom_multiple_constant_divideIsEEvPT_iPKc: # @_Z13test_co move $a2, $a0 ld.w $a0, $s8, %pc_lo12(current_test) .LBB93_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI93_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI93_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -20792,12 +20605,8 @@ _Z13test_constantIs31custom_multiple_constant_divideIsEEvPT_iPKc: # @_Z13test_co .size _Z13test_constantIs31custom_multiple_constant_divideIsEEvPT_iPKc, .Lfunc_end93-_Z13test_constantIs31custom_multiple_constant_divideIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs32custom_multiple_constant_divide2IsEEvPT_iPKc -.LCPI94_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs32custom_multiple_constant_divide2IsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs32custom_multiple_constant_divide2IsEEvPT_iPKc,comdat - .weak _Z13test_constantIs32custom_multiple_constant_divide2IsEEvPT_iPKc + .weak _Z13test_constantIs32custom_multiple_constant_divide2IsEEvPT_iPKc # -- Begin function _Z13test_constantIs32custom_multiple_constant_divide2IsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs32custom_multiple_constant_divide2IsEEvPT_iPKc,@function _Z13test_constantIs32custom_multiple_constant_divide2IsEEvPT_iPKc: # @_Z13test_constantIs32custom_multiple_constant_divide2IsEEvPT_iPKc @@ -21042,12 +20851,14 @@ _Z13test_constantIs32custom_multiple_constant_divide2IsEEvPT_iPKc: # @_Z13test_c move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB94_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI94_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI94_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -21081,12 +20892,8 @@ _Z13test_constantIs32custom_multiple_constant_divide2IsEEvPT_iPKc: # @_Z13test_c .size _Z13test_constantIs32custom_multiple_constant_divide2IsEEvPT_iPKc, .Lfunc_end94-_Z13test_constantIs32custom_multiple_constant_divide2IsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs30custom_multiple_constant_mixedIsEEvPT_iPKc -.LCPI95_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs30custom_multiple_constant_mixedIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs30custom_multiple_constant_mixedIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs30custom_multiple_constant_mixedIsEEvPT_iPKc + .weak _Z13test_constantIs30custom_multiple_constant_mixedIsEEvPT_iPKc # -- Begin function _Z13test_constantIs30custom_multiple_constant_mixedIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs30custom_multiple_constant_mixedIsEEvPT_iPKc,@function _Z13test_constantIs30custom_multiple_constant_mixedIsEEvPT_iPKc: # @_Z13test_constantIs30custom_multiple_constant_mixedIsEEvPT_iPKc @@ -21321,12 +21128,14 @@ _Z13test_constantIs30custom_multiple_constant_mixedIsEEvPT_iPKc: # @_Z13test_con move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB95_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI95_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI95_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -21360,12 +21169,8 @@ _Z13test_constantIs30custom_multiple_constant_mixedIsEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIs30custom_multiple_constant_mixedIsEEvPT_iPKc, .Lfunc_end95-_Z13test_constantIs30custom_multiple_constant_mixedIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs19custom_constant_andIsEEvPT_iPKc -.LCPI96_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs19custom_constant_andIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs19custom_constant_andIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs19custom_constant_andIsEEvPT_iPKc + .weak _Z13test_constantIs19custom_constant_andIsEEvPT_iPKc # -- Begin function _Z13test_constantIs19custom_constant_andIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs19custom_constant_andIsEEvPT_iPKc,@function _Z13test_constantIs19custom_constant_andIsEEvPT_iPKc: # @_Z13test_constantIs19custom_constant_andIsEEvPT_iPKc @@ -21609,12 +21414,14 @@ _Z13test_constantIs19custom_constant_andIsEEvPT_iPKc: # @_Z13test_constantIs19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB96_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI96_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI96_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -21648,12 +21455,8 @@ _Z13test_constantIs19custom_constant_andIsEEvPT_iPKc: # @_Z13test_constantIs19cu .size _Z13test_constantIs19custom_constant_andIsEEvPT_iPKc, .Lfunc_end96-_Z13test_constantIs19custom_constant_andIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs28custom_multiple_constant_andIsEEvPT_iPKc -.LCPI97_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs28custom_multiple_constant_andIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs28custom_multiple_constant_andIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs28custom_multiple_constant_andIsEEvPT_iPKc + .weak _Z13test_constantIs28custom_multiple_constant_andIsEEvPT_iPKc # -- Begin function _Z13test_constantIs28custom_multiple_constant_andIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs28custom_multiple_constant_andIsEEvPT_iPKc,@function _Z13test_constantIs28custom_multiple_constant_andIsEEvPT_iPKc: # @_Z13test_constantIs28custom_multiple_constant_andIsEEvPT_iPKc @@ -21897,12 +21700,14 @@ _Z13test_constantIs28custom_multiple_constant_andIsEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB97_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI97_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI97_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -21936,12 +21741,8 @@ _Z13test_constantIs28custom_multiple_constant_andIsEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIs28custom_multiple_constant_andIsEEvPT_iPKc, .Lfunc_end97-_Z13test_constantIs28custom_multiple_constant_andIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs18custom_constant_orIsEEvPT_iPKc -.LCPI98_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs18custom_constant_orIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs18custom_constant_orIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs18custom_constant_orIsEEvPT_iPKc + .weak _Z13test_constantIs18custom_constant_orIsEEvPT_iPKc # -- Begin function _Z13test_constantIs18custom_constant_orIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs18custom_constant_orIsEEvPT_iPKc,@function _Z13test_constantIs18custom_constant_orIsEEvPT_iPKc: # @_Z13test_constantIs18custom_constant_orIsEEvPT_iPKc @@ -22168,12 +21969,14 @@ _Z13test_constantIs18custom_constant_orIsEEvPT_iPKc: # @_Z13test_constantIs18cus move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB98_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI98_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI98_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -22207,12 +22010,8 @@ _Z13test_constantIs18custom_constant_orIsEEvPT_iPKc: # @_Z13test_constantIs18cus .size _Z13test_constantIs18custom_constant_orIsEEvPT_iPKc, .Lfunc_end98-_Z13test_constantIs18custom_constant_orIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs27custom_multiple_constant_orIsEEvPT_iPKc -.LCPI99_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs27custom_multiple_constant_orIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs27custom_multiple_constant_orIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs27custom_multiple_constant_orIsEEvPT_iPKc + .weak _Z13test_constantIs27custom_multiple_constant_orIsEEvPT_iPKc # -- Begin function _Z13test_constantIs27custom_multiple_constant_orIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs27custom_multiple_constant_orIsEEvPT_iPKc,@function _Z13test_constantIs27custom_multiple_constant_orIsEEvPT_iPKc: # @_Z13test_constantIs27custom_multiple_constant_orIsEEvPT_iPKc @@ -22441,12 +22240,14 @@ _Z13test_constantIs27custom_multiple_constant_orIsEEvPT_iPKc: # @_Z13test_consta move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB99_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI99_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI99_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -22480,12 +22281,8 @@ _Z13test_constantIs27custom_multiple_constant_orIsEEvPT_iPKc: # @_Z13test_consta .size _Z13test_constantIs27custom_multiple_constant_orIsEEvPT_iPKc, .Lfunc_end99-_Z13test_constantIs27custom_multiple_constant_orIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs19custom_constant_xorIsEEvPT_iPKc -.LCPI100_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs19custom_constant_xorIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs19custom_constant_xorIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs19custom_constant_xorIsEEvPT_iPKc + .weak _Z13test_constantIs19custom_constant_xorIsEEvPT_iPKc # -- Begin function _Z13test_constantIs19custom_constant_xorIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs19custom_constant_xorIsEEvPT_iPKc,@function _Z13test_constantIs19custom_constant_xorIsEEvPT_iPKc: # @_Z13test_constantIs19custom_constant_xorIsEEvPT_iPKc @@ -22729,12 +22526,14 @@ _Z13test_constantIs19custom_constant_xorIsEEvPT_iPKc: # @_Z13test_constantIs19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB100_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI100_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI100_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -22768,12 +22567,8 @@ _Z13test_constantIs19custom_constant_xorIsEEvPT_iPKc: # @_Z13test_constantIs19cu .size _Z13test_constantIs19custom_constant_xorIsEEvPT_iPKc, .Lfunc_end100-_Z13test_constantIs19custom_constant_xorIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIs28custom_multiple_constant_xorIsEEvPT_iPKc -.LCPI101_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIs28custom_multiple_constant_xorIsEEvPT_iPKc,"axG",@progbits,_Z13test_constantIs28custom_multiple_constant_xorIsEEvPT_iPKc,comdat - .weak _Z13test_constantIs28custom_multiple_constant_xorIsEEvPT_iPKc + .weak _Z13test_constantIs28custom_multiple_constant_xorIsEEvPT_iPKc # -- Begin function _Z13test_constantIs28custom_multiple_constant_xorIsEEvPT_iPKc .p2align 5 .type _Z13test_constantIs28custom_multiple_constant_xorIsEEvPT_iPKc,@function _Z13test_constantIs28custom_multiple_constant_xorIsEEvPT_iPKc: # @_Z13test_constantIs28custom_multiple_constant_xorIsEEvPT_iPKc @@ -23017,12 +22812,14 @@ _Z13test_constantIs28custom_multiple_constant_xorIsEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB101_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI101_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI101_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -23056,12 +22853,8 @@ _Z13test_constantIs28custom_multiple_constant_xorIsEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIs28custom_multiple_constant_xorIsEEvPT_iPKc, .Lfunc_end101-_Z13test_constantIs28custom_multiple_constant_xorIsEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt10custom_twoItEEvPT_iPKc -.LCPI102_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt10custom_twoItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt10custom_twoItEEvPT_iPKc,comdat - .weak _Z13test_constantIt10custom_twoItEEvPT_iPKc + .weak _Z13test_constantIt10custom_twoItEEvPT_iPKc # -- Begin function _Z13test_constantIt10custom_twoItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt10custom_twoItEEvPT_iPKc,@function _Z13test_constantIt10custom_twoItEEvPT_iPKc: # @_Z13test_constantIt10custom_twoItEEvPT_iPKc @@ -23164,12 +22957,14 @@ _Z13test_constantIt10custom_twoItEEvPT_iPKc: # @_Z13test_constantIt10custom_twoI move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB102_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI102_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI102_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -23200,12 +22995,8 @@ _Z13test_constantIt10custom_twoItEEvPT_iPKc: # @_Z13test_constantIt10custom_twoI .size _Z13test_constantIt10custom_twoItEEvPT_iPKc, .Lfunc_end102-_Z13test_constantIt10custom_twoItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt20custom_add_constantsItEEvPT_iPKc -.LCPI103_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt20custom_add_constantsItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt20custom_add_constantsItEEvPT_iPKc,comdat - .weak _Z13test_constantIt20custom_add_constantsItEEvPT_iPKc + .weak _Z13test_constantIt20custom_add_constantsItEEvPT_iPKc # -- Begin function _Z13test_constantIt20custom_add_constantsItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt20custom_add_constantsItEEvPT_iPKc,@function _Z13test_constantIt20custom_add_constantsItEEvPT_iPKc: # @_Z13test_constantIt20custom_add_constantsItEEvPT_iPKc @@ -23308,12 +23099,14 @@ _Z13test_constantIt20custom_add_constantsItEEvPT_iPKc: # @_Z13test_constantIt20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB103_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI103_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI103_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -23344,12 +23137,8 @@ _Z13test_constantIt20custom_add_constantsItEEvPT_iPKc: # @_Z13test_constantIt20c .size _Z13test_constantIt20custom_add_constantsItEEvPT_iPKc, .Lfunc_end103-_Z13test_constantIt20custom_add_constantsItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt20custom_sub_constantsItEEvPT_iPKc -.LCPI104_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt20custom_sub_constantsItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt20custom_sub_constantsItEEvPT_iPKc,comdat - .weak _Z13test_constantIt20custom_sub_constantsItEEvPT_iPKc + .weak _Z13test_constantIt20custom_sub_constantsItEEvPT_iPKc # -- Begin function _Z13test_constantIt20custom_sub_constantsItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt20custom_sub_constantsItEEvPT_iPKc,@function _Z13test_constantIt20custom_sub_constantsItEEvPT_iPKc: # @_Z13test_constantIt20custom_sub_constantsItEEvPT_iPKc @@ -23452,12 +23241,14 @@ _Z13test_constantIt20custom_sub_constantsItEEvPT_iPKc: # @_Z13test_constantIt20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB104_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI104_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI104_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -23488,12 +23279,8 @@ _Z13test_constantIt20custom_sub_constantsItEEvPT_iPKc: # @_Z13test_constantIt20c .size _Z13test_constantIt20custom_sub_constantsItEEvPT_iPKc, .Lfunc_end104-_Z13test_constantIt20custom_sub_constantsItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt25custom_multiply_constantsItEEvPT_iPKc -.LCPI105_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt25custom_multiply_constantsItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt25custom_multiply_constantsItEEvPT_iPKc,comdat - .weak _Z13test_constantIt25custom_multiply_constantsItEEvPT_iPKc + .weak _Z13test_constantIt25custom_multiply_constantsItEEvPT_iPKc # -- Begin function _Z13test_constantIt25custom_multiply_constantsItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt25custom_multiply_constantsItEEvPT_iPKc,@function _Z13test_constantIt25custom_multiply_constantsItEEvPT_iPKc: # @_Z13test_constantIt25custom_multiply_constantsItEEvPT_iPKc @@ -23598,12 +23385,14 @@ _Z13test_constantIt25custom_multiply_constantsItEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB105_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI105_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI105_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -23634,12 +23423,8 @@ _Z13test_constantIt25custom_multiply_constantsItEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIt25custom_multiply_constantsItEEvPT_iPKc, .Lfunc_end105-_Z13test_constantIt25custom_multiply_constantsItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt23custom_divide_constantsItEEvPT_iPKc -.LCPI106_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt23custom_divide_constantsItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt23custom_divide_constantsItEEvPT_iPKc,comdat - .weak _Z13test_constantIt23custom_divide_constantsItEEvPT_iPKc + .weak _Z13test_constantIt23custom_divide_constantsItEEvPT_iPKc # -- Begin function _Z13test_constantIt23custom_divide_constantsItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt23custom_divide_constantsItEEvPT_iPKc,@function _Z13test_constantIt23custom_divide_constantsItEEvPT_iPKc: # @_Z13test_constantIt23custom_divide_constantsItEEvPT_iPKc @@ -23742,12 +23527,14 @@ _Z13test_constantIt23custom_divide_constantsItEEvPT_iPKc: # @_Z13test_constantIt move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB106_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI106_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI106_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -23778,12 +23565,8 @@ _Z13test_constantIt23custom_divide_constantsItEEvPT_iPKc: # @_Z13test_constantIt .size _Z13test_constantIt23custom_divide_constantsItEEvPT_iPKc, .Lfunc_end106-_Z13test_constantIt23custom_divide_constantsItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt20custom_mod_constantsItEEvPT_iPKc -.LCPI107_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt20custom_mod_constantsItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt20custom_mod_constantsItEEvPT_iPKc,comdat - .weak _Z13test_constantIt20custom_mod_constantsItEEvPT_iPKc + .weak _Z13test_constantIt20custom_mod_constantsItEEvPT_iPKc # -- Begin function _Z13test_constantIt20custom_mod_constantsItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt20custom_mod_constantsItEEvPT_iPKc,@function _Z13test_constantIt20custom_mod_constantsItEEvPT_iPKc: # @_Z13test_constantIt20custom_mod_constantsItEEvPT_iPKc @@ -23886,12 +23669,14 @@ _Z13test_constantIt20custom_mod_constantsItEEvPT_iPKc: # @_Z13test_constantIt20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB107_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI107_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI107_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -23922,12 +23707,8 @@ _Z13test_constantIt20custom_mod_constantsItEEvPT_iPKc: # @_Z13test_constantIt20c .size _Z13test_constantIt20custom_mod_constantsItEEvPT_iPKc, .Lfunc_end107-_Z13test_constantIt20custom_mod_constantsItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt22custom_equal_constantsItEEvPT_iPKc -.LCPI108_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt22custom_equal_constantsItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt22custom_equal_constantsItEEvPT_iPKc,comdat - .weak _Z13test_constantIt22custom_equal_constantsItEEvPT_iPKc + .weak _Z13test_constantIt22custom_equal_constantsItEEvPT_iPKc # -- Begin function _Z13test_constantIt22custom_equal_constantsItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt22custom_equal_constantsItEEvPT_iPKc,@function _Z13test_constantIt22custom_equal_constantsItEEvPT_iPKc: # @_Z13test_constantIt22custom_equal_constantsItEEvPT_iPKc @@ -23980,12 +23761,14 @@ _Z13test_constantIt22custom_equal_constantsItEEvPT_iPKc: # @_Z13test_constantIt2 # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB108_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI108_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI108_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -24014,12 +23797,8 @@ _Z13test_constantIt22custom_equal_constantsItEEvPT_iPKc: # @_Z13test_constantIt2 .size _Z13test_constantIt22custom_equal_constantsItEEvPT_iPKc, .Lfunc_end108-_Z13test_constantIt22custom_equal_constantsItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt25custom_notequal_constantsItEEvPT_iPKc -.LCPI109_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt25custom_notequal_constantsItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt25custom_notequal_constantsItEEvPT_iPKc,comdat - .weak _Z13test_constantIt25custom_notequal_constantsItEEvPT_iPKc + .weak _Z13test_constantIt25custom_notequal_constantsItEEvPT_iPKc # -- Begin function _Z13test_constantIt25custom_notequal_constantsItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt25custom_notequal_constantsItEEvPT_iPKc,@function _Z13test_constantIt25custom_notequal_constantsItEEvPT_iPKc: # @_Z13test_constantIt25custom_notequal_constantsItEEvPT_iPKc @@ -24122,12 +23901,14 @@ _Z13test_constantIt25custom_notequal_constantsItEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB109_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI109_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI109_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -24158,12 +23939,8 @@ _Z13test_constantIt25custom_notequal_constantsItEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIt25custom_notequal_constantsItEEvPT_iPKc, .Lfunc_end109-_Z13test_constantIt25custom_notequal_constantsItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt28custom_greaterthan_constantsItEEvPT_iPKc -.LCPI110_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt28custom_greaterthan_constantsItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt28custom_greaterthan_constantsItEEvPT_iPKc,comdat - .weak _Z13test_constantIt28custom_greaterthan_constantsItEEvPT_iPKc + .weak _Z13test_constantIt28custom_greaterthan_constantsItEEvPT_iPKc # -- Begin function _Z13test_constantIt28custom_greaterthan_constantsItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt28custom_greaterthan_constantsItEEvPT_iPKc,@function _Z13test_constantIt28custom_greaterthan_constantsItEEvPT_iPKc: # @_Z13test_constantIt28custom_greaterthan_constantsItEEvPT_iPKc @@ -24266,12 +24043,14 @@ _Z13test_constantIt28custom_greaterthan_constantsItEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB110_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI110_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI110_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -24302,12 +24081,8 @@ _Z13test_constantIt28custom_greaterthan_constantsItEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIt28custom_greaterthan_constantsItEEvPT_iPKc, .Lfunc_end110-_Z13test_constantIt28custom_greaterthan_constantsItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt25custom_lessthan_constantsItEEvPT_iPKc -.LCPI111_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt25custom_lessthan_constantsItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt25custom_lessthan_constantsItEEvPT_iPKc,comdat - .weak _Z13test_constantIt25custom_lessthan_constantsItEEvPT_iPKc + .weak _Z13test_constantIt25custom_lessthan_constantsItEEvPT_iPKc # -- Begin function _Z13test_constantIt25custom_lessthan_constantsItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt25custom_lessthan_constantsItEEvPT_iPKc,@function _Z13test_constantIt25custom_lessthan_constantsItEEvPT_iPKc: # @_Z13test_constantIt25custom_lessthan_constantsItEEvPT_iPKc @@ -24360,12 +24135,14 @@ _Z13test_constantIt25custom_lessthan_constantsItEEvPT_iPKc: # @_Z13test_constant # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB111_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI111_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI111_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -24394,12 +24171,8 @@ _Z13test_constantIt25custom_lessthan_constantsItEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIt25custom_lessthan_constantsItEEvPT_iPKc, .Lfunc_end111-_Z13test_constantIt25custom_lessthan_constantsItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt33custom_greaterthanequal_constantsItEEvPT_iPKc -.LCPI112_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt33custom_greaterthanequal_constantsItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt33custom_greaterthanequal_constantsItEEvPT_iPKc,comdat - .weak _Z13test_constantIt33custom_greaterthanequal_constantsItEEvPT_iPKc + .weak _Z13test_constantIt33custom_greaterthanequal_constantsItEEvPT_iPKc # -- Begin function _Z13test_constantIt33custom_greaterthanequal_constantsItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt33custom_greaterthanequal_constantsItEEvPT_iPKc,@function _Z13test_constantIt33custom_greaterthanequal_constantsItEEvPT_iPKc: # @_Z13test_constantIt33custom_greaterthanequal_constantsItEEvPT_iPKc @@ -24502,12 +24275,14 @@ _Z13test_constantIt33custom_greaterthanequal_constantsItEEvPT_iPKc: # @_Z13test_ move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB112_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI112_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI112_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -24538,12 +24313,8 @@ _Z13test_constantIt33custom_greaterthanequal_constantsItEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIt33custom_greaterthanequal_constantsItEEvPT_iPKc, .Lfunc_end112-_Z13test_constantIt33custom_greaterthanequal_constantsItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt30custom_lessthanequal_constantsItEEvPT_iPKc -.LCPI113_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt30custom_lessthanequal_constantsItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt30custom_lessthanequal_constantsItEEvPT_iPKc,comdat - .weak _Z13test_constantIt30custom_lessthanequal_constantsItEEvPT_iPKc + .weak _Z13test_constantIt30custom_lessthanequal_constantsItEEvPT_iPKc # -- Begin function _Z13test_constantIt30custom_lessthanequal_constantsItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt30custom_lessthanequal_constantsItEEvPT_iPKc,@function _Z13test_constantIt30custom_lessthanequal_constantsItEEvPT_iPKc: # @_Z13test_constantIt30custom_lessthanequal_constantsItEEvPT_iPKc @@ -24596,12 +24367,14 @@ _Z13test_constantIt30custom_lessthanequal_constantsItEEvPT_iPKc: # @_Z13test_con # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB113_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI113_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI113_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -24630,12 +24403,8 @@ _Z13test_constantIt30custom_lessthanequal_constantsItEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIt30custom_lessthanequal_constantsItEEvPT_iPKc, .Lfunc_end113-_Z13test_constantIt30custom_lessthanequal_constantsItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt20custom_and_constantsItEEvPT_iPKc -.LCPI114_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt20custom_and_constantsItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt20custom_and_constantsItEEvPT_iPKc,comdat - .weak _Z13test_constantIt20custom_and_constantsItEEvPT_iPKc + .weak _Z13test_constantIt20custom_and_constantsItEEvPT_iPKc # -- Begin function _Z13test_constantIt20custom_and_constantsItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt20custom_and_constantsItEEvPT_iPKc,@function _Z13test_constantIt20custom_and_constantsItEEvPT_iPKc: # @_Z13test_constantIt20custom_and_constantsItEEvPT_iPKc @@ -24738,12 +24507,14 @@ _Z13test_constantIt20custom_and_constantsItEEvPT_iPKc: # @_Z13test_constantIt20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB114_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI114_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI114_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -24774,12 +24545,8 @@ _Z13test_constantIt20custom_and_constantsItEEvPT_iPKc: # @_Z13test_constantIt20c .size _Z13test_constantIt20custom_and_constantsItEEvPT_iPKc, .Lfunc_end114-_Z13test_constantIt20custom_and_constantsItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt19custom_or_constantsItEEvPT_iPKc -.LCPI115_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt19custom_or_constantsItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt19custom_or_constantsItEEvPT_iPKc,comdat - .weak _Z13test_constantIt19custom_or_constantsItEEvPT_iPKc + .weak _Z13test_constantIt19custom_or_constantsItEEvPT_iPKc # -- Begin function _Z13test_constantIt19custom_or_constantsItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt19custom_or_constantsItEEvPT_iPKc,@function _Z13test_constantIt19custom_or_constantsItEEvPT_iPKc: # @_Z13test_constantIt19custom_or_constantsItEEvPT_iPKc @@ -24884,12 +24651,14 @@ _Z13test_constantIt19custom_or_constantsItEEvPT_iPKc: # @_Z13test_constantIt19cu move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB115_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI115_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI115_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -24920,12 +24689,8 @@ _Z13test_constantIt19custom_or_constantsItEEvPT_iPKc: # @_Z13test_constantIt19cu .size _Z13test_constantIt19custom_or_constantsItEEvPT_iPKc, .Lfunc_end115-_Z13test_constantIt19custom_or_constantsItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt20custom_xor_constantsItEEvPT_iPKc -.LCPI116_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt20custom_xor_constantsItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt20custom_xor_constantsItEEvPT_iPKc,comdat - .weak _Z13test_constantIt20custom_xor_constantsItEEvPT_iPKc + .weak _Z13test_constantIt20custom_xor_constantsItEEvPT_iPKc # -- Begin function _Z13test_constantIt20custom_xor_constantsItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt20custom_xor_constantsItEEvPT_iPKc,@function _Z13test_constantIt20custom_xor_constantsItEEvPT_iPKc: # @_Z13test_constantIt20custom_xor_constantsItEEvPT_iPKc @@ -25030,12 +24795,14 @@ _Z13test_constantIt20custom_xor_constantsItEEvPT_iPKc: # @_Z13test_constantIt20c move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB116_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI116_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI116_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -25066,12 +24833,8 @@ _Z13test_constantIt20custom_xor_constantsItEEvPT_iPKc: # @_Z13test_constantIt20c .size _Z13test_constantIt20custom_xor_constantsItEEvPT_iPKc, .Lfunc_end116-_Z13test_constantIt20custom_xor_constantsItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt19custom_constant_addItEEvPT_iPKc -.LCPI117_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt19custom_constant_addItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt19custom_constant_addItEEvPT_iPKc,comdat - .weak _Z13test_constantIt19custom_constant_addItEEvPT_iPKc + .weak _Z13test_constantIt19custom_constant_addItEEvPT_iPKc # -- Begin function _Z13test_constantIt19custom_constant_addItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt19custom_constant_addItEEvPT_iPKc,@function _Z13test_constantIt19custom_constant_addItEEvPT_iPKc: # @_Z13test_constantIt19custom_constant_addItEEvPT_iPKc @@ -25316,12 +25079,14 @@ _Z13test_constantIt19custom_constant_addItEEvPT_iPKc: # @_Z13test_constantIt19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB117_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI117_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI117_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -25355,12 +25120,8 @@ _Z13test_constantIt19custom_constant_addItEEvPT_iPKc: # @_Z13test_constantIt19cu .size _Z13test_constantIt19custom_constant_addItEEvPT_iPKc, .Lfunc_end117-_Z13test_constantIt19custom_constant_addItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt28custom_multiple_constant_addItEEvPT_iPKc -.LCPI118_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt28custom_multiple_constant_addItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt28custom_multiple_constant_addItEEvPT_iPKc,comdat - .weak _Z13test_constantIt28custom_multiple_constant_addItEEvPT_iPKc + .weak _Z13test_constantIt28custom_multiple_constant_addItEEvPT_iPKc # -- Begin function _Z13test_constantIt28custom_multiple_constant_addItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt28custom_multiple_constant_addItEEvPT_iPKc,@function _Z13test_constantIt28custom_multiple_constant_addItEEvPT_iPKc: # @_Z13test_constantIt28custom_multiple_constant_addItEEvPT_iPKc @@ -25605,12 +25366,14 @@ _Z13test_constantIt28custom_multiple_constant_addItEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB118_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI118_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI118_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -25644,12 +25407,8 @@ _Z13test_constantIt28custom_multiple_constant_addItEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIt28custom_multiple_constant_addItEEvPT_iPKc, .Lfunc_end118-_Z13test_constantIt28custom_multiple_constant_addItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt19custom_constant_subItEEvPT_iPKc -.LCPI119_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt19custom_constant_subItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt19custom_constant_subItEEvPT_iPKc,comdat - .weak _Z13test_constantIt19custom_constant_subItEEvPT_iPKc + .weak _Z13test_constantIt19custom_constant_subItEEvPT_iPKc # -- Begin function _Z13test_constantIt19custom_constant_subItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt19custom_constant_subItEEvPT_iPKc,@function _Z13test_constantIt19custom_constant_subItEEvPT_iPKc: # @_Z13test_constantIt19custom_constant_subItEEvPT_iPKc @@ -25897,12 +25656,14 @@ _Z13test_constantIt19custom_constant_subItEEvPT_iPKc: # @_Z13test_constantIt19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB119_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI119_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI119_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -25936,12 +25697,8 @@ _Z13test_constantIt19custom_constant_subItEEvPT_iPKc: # @_Z13test_constantIt19cu .size _Z13test_constantIt19custom_constant_subItEEvPT_iPKc, .Lfunc_end119-_Z13test_constantIt19custom_constant_subItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt28custom_multiple_constant_subItEEvPT_iPKc -.LCPI120_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt28custom_multiple_constant_subItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt28custom_multiple_constant_subItEEvPT_iPKc,comdat - .weak _Z13test_constantIt28custom_multiple_constant_subItEEvPT_iPKc + .weak _Z13test_constantIt28custom_multiple_constant_subItEEvPT_iPKc # -- Begin function _Z13test_constantIt28custom_multiple_constant_subItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt28custom_multiple_constant_subItEEvPT_iPKc,@function _Z13test_constantIt28custom_multiple_constant_subItEEvPT_iPKc: # @_Z13test_constantIt28custom_multiple_constant_subItEEvPT_iPKc @@ -26189,12 +25946,14 @@ _Z13test_constantIt28custom_multiple_constant_subItEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB120_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI120_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI120_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -26228,12 +25987,8 @@ _Z13test_constantIt28custom_multiple_constant_subItEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIt28custom_multiple_constant_subItEEvPT_iPKc, .Lfunc_end120-_Z13test_constantIt28custom_multiple_constant_subItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt24custom_constant_multiplyItEEvPT_iPKc -.LCPI121_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt24custom_constant_multiplyItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt24custom_constant_multiplyItEEvPT_iPKc,comdat - .weak _Z13test_constantIt24custom_constant_multiplyItEEvPT_iPKc + .weak _Z13test_constantIt24custom_constant_multiplyItEEvPT_iPKc # -- Begin function _Z13test_constantIt24custom_constant_multiplyItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt24custom_constant_multiplyItEEvPT_iPKc,@function _Z13test_constantIt24custom_constant_multiplyItEEvPT_iPKc: # @_Z13test_constantIt24custom_constant_multiplyItEEvPT_iPKc @@ -26474,12 +26229,14 @@ _Z13test_constantIt24custom_constant_multiplyItEEvPT_iPKc: # @_Z13test_constantI move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB121_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI121_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI121_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -26513,12 +26270,8 @@ _Z13test_constantIt24custom_constant_multiplyItEEvPT_iPKc: # @_Z13test_constantI .size _Z13test_constantIt24custom_constant_multiplyItEEvPT_iPKc, .Lfunc_end121-_Z13test_constantIt24custom_constant_multiplyItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt33custom_multiple_constant_multiplyItEEvPT_iPKc -.LCPI122_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt33custom_multiple_constant_multiplyItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt33custom_multiple_constant_multiplyItEEvPT_iPKc,comdat - .weak _Z13test_constantIt33custom_multiple_constant_multiplyItEEvPT_iPKc + .weak _Z13test_constantIt33custom_multiple_constant_multiplyItEEvPT_iPKc # -- Begin function _Z13test_constantIt33custom_multiple_constant_multiplyItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt33custom_multiple_constant_multiplyItEEvPT_iPKc,@function _Z13test_constantIt33custom_multiple_constant_multiplyItEEvPT_iPKc: # @_Z13test_constantIt33custom_multiple_constant_multiplyItEEvPT_iPKc @@ -26759,12 +26512,14 @@ _Z13test_constantIt33custom_multiple_constant_multiplyItEEvPT_iPKc: # @_Z13test_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB122_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI122_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI122_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -26798,12 +26553,8 @@ _Z13test_constantIt33custom_multiple_constant_multiplyItEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIt33custom_multiple_constant_multiplyItEEvPT_iPKc, .Lfunc_end122-_Z13test_constantIt33custom_multiple_constant_multiplyItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt34custom_multiple_constant_multiply2ItEEvPT_iPKc -.LCPI123_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt34custom_multiple_constant_multiply2ItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt34custom_multiple_constant_multiply2ItEEvPT_iPKc,comdat - .weak _Z13test_constantIt34custom_multiple_constant_multiply2ItEEvPT_iPKc + .weak _Z13test_constantIt34custom_multiple_constant_multiply2ItEEvPT_iPKc # -- Begin function _Z13test_constantIt34custom_multiple_constant_multiply2ItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt34custom_multiple_constant_multiply2ItEEvPT_iPKc,@function _Z13test_constantIt34custom_multiple_constant_multiply2ItEEvPT_iPKc: # @_Z13test_constantIt34custom_multiple_constant_multiply2ItEEvPT_iPKc @@ -27051,12 +26802,14 @@ _Z13test_constantIt34custom_multiple_constant_multiply2ItEEvPT_iPKc: # @_Z13test move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB123_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI123_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI123_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -27090,12 +26843,8 @@ _Z13test_constantIt34custom_multiple_constant_multiply2ItEEvPT_iPKc: # @_Z13test .size _Z13test_constantIt34custom_multiple_constant_multiply2ItEEvPT_iPKc, .Lfunc_end123-_Z13test_constantIt34custom_multiple_constant_multiply2ItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt22custom_constant_divideItEEvPT_iPKc -.LCPI124_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt22custom_constant_divideItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt22custom_constant_divideItEEvPT_iPKc,comdat - .weak _Z13test_constantIt22custom_constant_divideItEEvPT_iPKc + .weak _Z13test_constantIt22custom_constant_divideItEEvPT_iPKc # -- Begin function _Z13test_constantIt22custom_constant_divideItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt22custom_constant_divideItEEvPT_iPKc,@function _Z13test_constantIt22custom_constant_divideItEEvPT_iPKc: # @_Z13test_constantIt22custom_constant_divideItEEvPT_iPKc @@ -27368,12 +27117,14 @@ _Z13test_constantIt22custom_constant_divideItEEvPT_iPKc: # @_Z13test_constantIt2 move $a2, $a0 ld.w $a0, $s8, %pc_lo12(current_test) .LBB124_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI124_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI124_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -27407,12 +27158,8 @@ _Z13test_constantIt22custom_constant_divideItEEvPT_iPKc: # @_Z13test_constantIt2 .size _Z13test_constantIt22custom_constant_divideItEEvPT_iPKc, .Lfunc_end124-_Z13test_constantIt22custom_constant_divideItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt31custom_multiple_constant_divideItEEvPT_iPKc -.LCPI125_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt31custom_multiple_constant_divideItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt31custom_multiple_constant_divideItEEvPT_iPKc,comdat - .weak _Z13test_constantIt31custom_multiple_constant_divideItEEvPT_iPKc + .weak _Z13test_constantIt31custom_multiple_constant_divideItEEvPT_iPKc # -- Begin function _Z13test_constantIt31custom_multiple_constant_divideItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt31custom_multiple_constant_divideItEEvPT_iPKc,@function _Z13test_constantIt31custom_multiple_constant_divideItEEvPT_iPKc: # @_Z13test_constantIt31custom_multiple_constant_divideItEEvPT_iPKc @@ -27685,12 +27432,14 @@ _Z13test_constantIt31custom_multiple_constant_divideItEEvPT_iPKc: # @_Z13test_co move $a2, $a0 ld.w $a0, $s8, %pc_lo12(current_test) .LBB125_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI125_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI125_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -27724,12 +27473,8 @@ _Z13test_constantIt31custom_multiple_constant_divideItEEvPT_iPKc: # @_Z13test_co .size _Z13test_constantIt31custom_multiple_constant_divideItEEvPT_iPKc, .Lfunc_end125-_Z13test_constantIt31custom_multiple_constant_divideItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt32custom_multiple_constant_divide2ItEEvPT_iPKc -.LCPI126_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt32custom_multiple_constant_divide2ItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt32custom_multiple_constant_divide2ItEEvPT_iPKc,comdat - .weak _Z13test_constantIt32custom_multiple_constant_divide2ItEEvPT_iPKc + .weak _Z13test_constantIt32custom_multiple_constant_divide2ItEEvPT_iPKc # -- Begin function _Z13test_constantIt32custom_multiple_constant_divide2ItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt32custom_multiple_constant_divide2ItEEvPT_iPKc,@function _Z13test_constantIt32custom_multiple_constant_divide2ItEEvPT_iPKc: # @_Z13test_constantIt32custom_multiple_constant_divide2ItEEvPT_iPKc @@ -27974,12 +27719,14 @@ _Z13test_constantIt32custom_multiple_constant_divide2ItEEvPT_iPKc: # @_Z13test_c move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB126_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI126_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI126_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -28013,12 +27760,8 @@ _Z13test_constantIt32custom_multiple_constant_divide2ItEEvPT_iPKc: # @_Z13test_c .size _Z13test_constantIt32custom_multiple_constant_divide2ItEEvPT_iPKc, .Lfunc_end126-_Z13test_constantIt32custom_multiple_constant_divide2ItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt30custom_multiple_constant_mixedItEEvPT_iPKc -.LCPI127_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt30custom_multiple_constant_mixedItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt30custom_multiple_constant_mixedItEEvPT_iPKc,comdat - .weak _Z13test_constantIt30custom_multiple_constant_mixedItEEvPT_iPKc + .weak _Z13test_constantIt30custom_multiple_constant_mixedItEEvPT_iPKc # -- Begin function _Z13test_constantIt30custom_multiple_constant_mixedItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt30custom_multiple_constant_mixedItEEvPT_iPKc,@function _Z13test_constantIt30custom_multiple_constant_mixedItEEvPT_iPKc: # @_Z13test_constantIt30custom_multiple_constant_mixedItEEvPT_iPKc @@ -28253,12 +27996,14 @@ _Z13test_constantIt30custom_multiple_constant_mixedItEEvPT_iPKc: # @_Z13test_con move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB127_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI127_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI127_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -28292,12 +28037,8 @@ _Z13test_constantIt30custom_multiple_constant_mixedItEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIt30custom_multiple_constant_mixedItEEvPT_iPKc, .Lfunc_end127-_Z13test_constantIt30custom_multiple_constant_mixedItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt19custom_constant_andItEEvPT_iPKc -.LCPI128_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt19custom_constant_andItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt19custom_constant_andItEEvPT_iPKc,comdat - .weak _Z13test_constantIt19custom_constant_andItEEvPT_iPKc + .weak _Z13test_constantIt19custom_constant_andItEEvPT_iPKc # -- Begin function _Z13test_constantIt19custom_constant_andItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt19custom_constant_andItEEvPT_iPKc,@function _Z13test_constantIt19custom_constant_andItEEvPT_iPKc: # @_Z13test_constantIt19custom_constant_andItEEvPT_iPKc @@ -28541,12 +28282,14 @@ _Z13test_constantIt19custom_constant_andItEEvPT_iPKc: # @_Z13test_constantIt19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB128_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI128_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI128_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -28580,12 +28323,8 @@ _Z13test_constantIt19custom_constant_andItEEvPT_iPKc: # @_Z13test_constantIt19cu .size _Z13test_constantIt19custom_constant_andItEEvPT_iPKc, .Lfunc_end128-_Z13test_constantIt19custom_constant_andItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt28custom_multiple_constant_andItEEvPT_iPKc -.LCPI129_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt28custom_multiple_constant_andItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt28custom_multiple_constant_andItEEvPT_iPKc,comdat - .weak _Z13test_constantIt28custom_multiple_constant_andItEEvPT_iPKc + .weak _Z13test_constantIt28custom_multiple_constant_andItEEvPT_iPKc # -- Begin function _Z13test_constantIt28custom_multiple_constant_andItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt28custom_multiple_constant_andItEEvPT_iPKc,@function _Z13test_constantIt28custom_multiple_constant_andItEEvPT_iPKc: # @_Z13test_constantIt28custom_multiple_constant_andItEEvPT_iPKc @@ -28829,12 +28568,14 @@ _Z13test_constantIt28custom_multiple_constant_andItEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB129_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI129_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI129_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -28868,12 +28609,8 @@ _Z13test_constantIt28custom_multiple_constant_andItEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIt28custom_multiple_constant_andItEEvPT_iPKc, .Lfunc_end129-_Z13test_constantIt28custom_multiple_constant_andItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt18custom_constant_orItEEvPT_iPKc -.LCPI130_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt18custom_constant_orItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt18custom_constant_orItEEvPT_iPKc,comdat - .weak _Z13test_constantIt18custom_constant_orItEEvPT_iPKc + .weak _Z13test_constantIt18custom_constant_orItEEvPT_iPKc # -- Begin function _Z13test_constantIt18custom_constant_orItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt18custom_constant_orItEEvPT_iPKc,@function _Z13test_constantIt18custom_constant_orItEEvPT_iPKc: # @_Z13test_constantIt18custom_constant_orItEEvPT_iPKc @@ -29100,12 +28837,14 @@ _Z13test_constantIt18custom_constant_orItEEvPT_iPKc: # @_Z13test_constantIt18cus move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB130_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI130_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI130_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -29139,12 +28878,8 @@ _Z13test_constantIt18custom_constant_orItEEvPT_iPKc: # @_Z13test_constantIt18cus .size _Z13test_constantIt18custom_constant_orItEEvPT_iPKc, .Lfunc_end130-_Z13test_constantIt18custom_constant_orItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt27custom_multiple_constant_orItEEvPT_iPKc -.LCPI131_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt27custom_multiple_constant_orItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt27custom_multiple_constant_orItEEvPT_iPKc,comdat - .weak _Z13test_constantIt27custom_multiple_constant_orItEEvPT_iPKc + .weak _Z13test_constantIt27custom_multiple_constant_orItEEvPT_iPKc # -- Begin function _Z13test_constantIt27custom_multiple_constant_orItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt27custom_multiple_constant_orItEEvPT_iPKc,@function _Z13test_constantIt27custom_multiple_constant_orItEEvPT_iPKc: # @_Z13test_constantIt27custom_multiple_constant_orItEEvPT_iPKc @@ -29373,12 +29108,14 @@ _Z13test_constantIt27custom_multiple_constant_orItEEvPT_iPKc: # @_Z13test_consta move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB131_25: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI131_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI131_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -29412,12 +29149,8 @@ _Z13test_constantIt27custom_multiple_constant_orItEEvPT_iPKc: # @_Z13test_consta .size _Z13test_constantIt27custom_multiple_constant_orItEEvPT_iPKc, .Lfunc_end131-_Z13test_constantIt27custom_multiple_constant_orItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt19custom_constant_xorItEEvPT_iPKc -.LCPI132_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt19custom_constant_xorItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt19custom_constant_xorItEEvPT_iPKc,comdat - .weak _Z13test_constantIt19custom_constant_xorItEEvPT_iPKc + .weak _Z13test_constantIt19custom_constant_xorItEEvPT_iPKc # -- Begin function _Z13test_constantIt19custom_constant_xorItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt19custom_constant_xorItEEvPT_iPKc,@function _Z13test_constantIt19custom_constant_xorItEEvPT_iPKc: # @_Z13test_constantIt19custom_constant_xorItEEvPT_iPKc @@ -29661,12 +29394,14 @@ _Z13test_constantIt19custom_constant_xorItEEvPT_iPKc: # @_Z13test_constantIt19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB132_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI132_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI132_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -29700,12 +29435,8 @@ _Z13test_constantIt19custom_constant_xorItEEvPT_iPKc: # @_Z13test_constantIt19cu .size _Z13test_constantIt19custom_constant_xorItEEvPT_iPKc, .Lfunc_end132-_Z13test_constantIt19custom_constant_xorItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIt28custom_multiple_constant_xorItEEvPT_iPKc -.LCPI133_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIt28custom_multiple_constant_xorItEEvPT_iPKc,"axG",@progbits,_Z13test_constantIt28custom_multiple_constant_xorItEEvPT_iPKc,comdat - .weak _Z13test_constantIt28custom_multiple_constant_xorItEEvPT_iPKc + .weak _Z13test_constantIt28custom_multiple_constant_xorItEEvPT_iPKc # -- Begin function _Z13test_constantIt28custom_multiple_constant_xorItEEvPT_iPKc .p2align 5 .type _Z13test_constantIt28custom_multiple_constant_xorItEEvPT_iPKc,@function _Z13test_constantIt28custom_multiple_constant_xorItEEvPT_iPKc: # @_Z13test_constantIt28custom_multiple_constant_xorItEEvPT_iPKc @@ -29949,12 +29680,14 @@ _Z13test_constantIt28custom_multiple_constant_xorItEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB133_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI133_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI133_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -29988,12 +29721,8 @@ _Z13test_constantIt28custom_multiple_constant_xorItEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIt28custom_multiple_constant_xorItEEvPT_iPKc, .Lfunc_end133-_Z13test_constantIt28custom_multiple_constant_xorItEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi10custom_twoIiEEvPT_iPKc -.LCPI134_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi10custom_twoIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi10custom_twoIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi10custom_twoIiEEvPT_iPKc + .weak _Z13test_constantIi10custom_twoIiEEvPT_iPKc # -- Begin function _Z13test_constantIi10custom_twoIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi10custom_twoIiEEvPT_iPKc,@function _Z13test_constantIi10custom_twoIiEEvPT_iPKc: # @_Z13test_constantIi10custom_twoIiEEvPT_iPKc @@ -30095,12 +29824,14 @@ _Z13test_constantIi10custom_twoIiEEvPT_iPKc: # @_Z13test_constantIi10custom_twoI move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB134_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI134_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI134_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -30131,12 +29862,8 @@ _Z13test_constantIi10custom_twoIiEEvPT_iPKc: # @_Z13test_constantIi10custom_twoI .size _Z13test_constantIi10custom_twoIiEEvPT_iPKc, .Lfunc_end134-_Z13test_constantIi10custom_twoIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi20custom_add_constantsIiEEvPT_iPKc -.LCPI135_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi20custom_add_constantsIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi20custom_add_constantsIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi20custom_add_constantsIiEEvPT_iPKc + .weak _Z13test_constantIi20custom_add_constantsIiEEvPT_iPKc # -- Begin function _Z13test_constantIi20custom_add_constantsIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi20custom_add_constantsIiEEvPT_iPKc,@function _Z13test_constantIi20custom_add_constantsIiEEvPT_iPKc: # @_Z13test_constantIi20custom_add_constantsIiEEvPT_iPKc @@ -30238,12 +29965,14 @@ _Z13test_constantIi20custom_add_constantsIiEEvPT_iPKc: # @_Z13test_constantIi20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB135_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI135_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI135_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -30274,12 +30003,8 @@ _Z13test_constantIi20custom_add_constantsIiEEvPT_iPKc: # @_Z13test_constantIi20c .size _Z13test_constantIi20custom_add_constantsIiEEvPT_iPKc, .Lfunc_end135-_Z13test_constantIi20custom_add_constantsIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi20custom_sub_constantsIiEEvPT_iPKc -.LCPI136_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi20custom_sub_constantsIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi20custom_sub_constantsIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi20custom_sub_constantsIiEEvPT_iPKc + .weak _Z13test_constantIi20custom_sub_constantsIiEEvPT_iPKc # -- Begin function _Z13test_constantIi20custom_sub_constantsIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi20custom_sub_constantsIiEEvPT_iPKc,@function _Z13test_constantIi20custom_sub_constantsIiEEvPT_iPKc: # @_Z13test_constantIi20custom_sub_constantsIiEEvPT_iPKc @@ -30381,12 +30106,14 @@ _Z13test_constantIi20custom_sub_constantsIiEEvPT_iPKc: # @_Z13test_constantIi20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB136_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI136_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI136_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -30417,12 +30144,8 @@ _Z13test_constantIi20custom_sub_constantsIiEEvPT_iPKc: # @_Z13test_constantIi20c .size _Z13test_constantIi20custom_sub_constantsIiEEvPT_iPKc, .Lfunc_end136-_Z13test_constantIi20custom_sub_constantsIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi25custom_multiply_constantsIiEEvPT_iPKc -.LCPI137_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi25custom_multiply_constantsIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi25custom_multiply_constantsIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi25custom_multiply_constantsIiEEvPT_iPKc + .weak _Z13test_constantIi25custom_multiply_constantsIiEEvPT_iPKc # -- Begin function _Z13test_constantIi25custom_multiply_constantsIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi25custom_multiply_constantsIiEEvPT_iPKc,@function _Z13test_constantIi25custom_multiply_constantsIiEEvPT_iPKc: # @_Z13test_constantIi25custom_multiply_constantsIiEEvPT_iPKc @@ -30526,12 +30249,14 @@ _Z13test_constantIi25custom_multiply_constantsIiEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB137_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI137_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI137_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -30562,12 +30287,8 @@ _Z13test_constantIi25custom_multiply_constantsIiEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIi25custom_multiply_constantsIiEEvPT_iPKc, .Lfunc_end137-_Z13test_constantIi25custom_multiply_constantsIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi23custom_divide_constantsIiEEvPT_iPKc -.LCPI138_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi23custom_divide_constantsIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi23custom_divide_constantsIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi23custom_divide_constantsIiEEvPT_iPKc + .weak _Z13test_constantIi23custom_divide_constantsIiEEvPT_iPKc # -- Begin function _Z13test_constantIi23custom_divide_constantsIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi23custom_divide_constantsIiEEvPT_iPKc,@function _Z13test_constantIi23custom_divide_constantsIiEEvPT_iPKc: # @_Z13test_constantIi23custom_divide_constantsIiEEvPT_iPKc @@ -30669,12 +30390,14 @@ _Z13test_constantIi23custom_divide_constantsIiEEvPT_iPKc: # @_Z13test_constantIi move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB138_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI138_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI138_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -30705,12 +30428,8 @@ _Z13test_constantIi23custom_divide_constantsIiEEvPT_iPKc: # @_Z13test_constantIi .size _Z13test_constantIi23custom_divide_constantsIiEEvPT_iPKc, .Lfunc_end138-_Z13test_constantIi23custom_divide_constantsIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi20custom_mod_constantsIiEEvPT_iPKc -.LCPI139_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi20custom_mod_constantsIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi20custom_mod_constantsIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi20custom_mod_constantsIiEEvPT_iPKc + .weak _Z13test_constantIi20custom_mod_constantsIiEEvPT_iPKc # -- Begin function _Z13test_constantIi20custom_mod_constantsIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi20custom_mod_constantsIiEEvPT_iPKc,@function _Z13test_constantIi20custom_mod_constantsIiEEvPT_iPKc: # @_Z13test_constantIi20custom_mod_constantsIiEEvPT_iPKc @@ -30812,12 +30531,14 @@ _Z13test_constantIi20custom_mod_constantsIiEEvPT_iPKc: # @_Z13test_constantIi20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB139_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI139_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI139_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -30848,12 +30569,8 @@ _Z13test_constantIi20custom_mod_constantsIiEEvPT_iPKc: # @_Z13test_constantIi20c .size _Z13test_constantIi20custom_mod_constantsIiEEvPT_iPKc, .Lfunc_end139-_Z13test_constantIi20custom_mod_constantsIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi22custom_equal_constantsIiEEvPT_iPKc -.LCPI140_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi22custom_equal_constantsIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi22custom_equal_constantsIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi22custom_equal_constantsIiEEvPT_iPKc + .weak _Z13test_constantIi22custom_equal_constantsIiEEvPT_iPKc # -- Begin function _Z13test_constantIi22custom_equal_constantsIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi22custom_equal_constantsIiEEvPT_iPKc,@function _Z13test_constantIi22custom_equal_constantsIiEEvPT_iPKc: # @_Z13test_constantIi22custom_equal_constantsIiEEvPT_iPKc @@ -30906,12 +30623,14 @@ _Z13test_constantIi22custom_equal_constantsIiEEvPT_iPKc: # @_Z13test_constantIi2 # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB140_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI140_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI140_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -30940,12 +30659,8 @@ _Z13test_constantIi22custom_equal_constantsIiEEvPT_iPKc: # @_Z13test_constantIi2 .size _Z13test_constantIi22custom_equal_constantsIiEEvPT_iPKc, .Lfunc_end140-_Z13test_constantIi22custom_equal_constantsIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi25custom_notequal_constantsIiEEvPT_iPKc -.LCPI141_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi25custom_notequal_constantsIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi25custom_notequal_constantsIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi25custom_notequal_constantsIiEEvPT_iPKc + .weak _Z13test_constantIi25custom_notequal_constantsIiEEvPT_iPKc # -- Begin function _Z13test_constantIi25custom_notequal_constantsIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi25custom_notequal_constantsIiEEvPT_iPKc,@function _Z13test_constantIi25custom_notequal_constantsIiEEvPT_iPKc: # @_Z13test_constantIi25custom_notequal_constantsIiEEvPT_iPKc @@ -31047,12 +30762,14 @@ _Z13test_constantIi25custom_notequal_constantsIiEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB141_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI141_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI141_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -31083,12 +30800,8 @@ _Z13test_constantIi25custom_notequal_constantsIiEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIi25custom_notequal_constantsIiEEvPT_iPKc, .Lfunc_end141-_Z13test_constantIi25custom_notequal_constantsIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi28custom_greaterthan_constantsIiEEvPT_iPKc -.LCPI142_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi28custom_greaterthan_constantsIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi28custom_greaterthan_constantsIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi28custom_greaterthan_constantsIiEEvPT_iPKc + .weak _Z13test_constantIi28custom_greaterthan_constantsIiEEvPT_iPKc # -- Begin function _Z13test_constantIi28custom_greaterthan_constantsIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi28custom_greaterthan_constantsIiEEvPT_iPKc,@function _Z13test_constantIi28custom_greaterthan_constantsIiEEvPT_iPKc: # @_Z13test_constantIi28custom_greaterthan_constantsIiEEvPT_iPKc @@ -31190,12 +30903,14 @@ _Z13test_constantIi28custom_greaterthan_constantsIiEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB142_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI142_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI142_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -31226,12 +30941,8 @@ _Z13test_constantIi28custom_greaterthan_constantsIiEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIi28custom_greaterthan_constantsIiEEvPT_iPKc, .Lfunc_end142-_Z13test_constantIi28custom_greaterthan_constantsIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi25custom_lessthan_constantsIiEEvPT_iPKc -.LCPI143_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi25custom_lessthan_constantsIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi25custom_lessthan_constantsIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi25custom_lessthan_constantsIiEEvPT_iPKc + .weak _Z13test_constantIi25custom_lessthan_constantsIiEEvPT_iPKc # -- Begin function _Z13test_constantIi25custom_lessthan_constantsIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi25custom_lessthan_constantsIiEEvPT_iPKc,@function _Z13test_constantIi25custom_lessthan_constantsIiEEvPT_iPKc: # @_Z13test_constantIi25custom_lessthan_constantsIiEEvPT_iPKc @@ -31284,12 +30995,14 @@ _Z13test_constantIi25custom_lessthan_constantsIiEEvPT_iPKc: # @_Z13test_constant # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB143_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI143_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI143_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -31318,12 +31031,8 @@ _Z13test_constantIi25custom_lessthan_constantsIiEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIi25custom_lessthan_constantsIiEEvPT_iPKc, .Lfunc_end143-_Z13test_constantIi25custom_lessthan_constantsIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi33custom_greaterthanequal_constantsIiEEvPT_iPKc -.LCPI144_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi33custom_greaterthanequal_constantsIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi33custom_greaterthanequal_constantsIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi33custom_greaterthanequal_constantsIiEEvPT_iPKc + .weak _Z13test_constantIi33custom_greaterthanequal_constantsIiEEvPT_iPKc # -- Begin function _Z13test_constantIi33custom_greaterthanequal_constantsIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi33custom_greaterthanequal_constantsIiEEvPT_iPKc,@function _Z13test_constantIi33custom_greaterthanequal_constantsIiEEvPT_iPKc: # @_Z13test_constantIi33custom_greaterthanequal_constantsIiEEvPT_iPKc @@ -31425,12 +31134,14 @@ _Z13test_constantIi33custom_greaterthanequal_constantsIiEEvPT_iPKc: # @_Z13test_ move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB144_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI144_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI144_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -31461,12 +31172,8 @@ _Z13test_constantIi33custom_greaterthanequal_constantsIiEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIi33custom_greaterthanequal_constantsIiEEvPT_iPKc, .Lfunc_end144-_Z13test_constantIi33custom_greaterthanequal_constantsIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi30custom_lessthanequal_constantsIiEEvPT_iPKc -.LCPI145_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi30custom_lessthanequal_constantsIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi30custom_lessthanequal_constantsIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi30custom_lessthanequal_constantsIiEEvPT_iPKc + .weak _Z13test_constantIi30custom_lessthanequal_constantsIiEEvPT_iPKc # -- Begin function _Z13test_constantIi30custom_lessthanequal_constantsIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi30custom_lessthanequal_constantsIiEEvPT_iPKc,@function _Z13test_constantIi30custom_lessthanequal_constantsIiEEvPT_iPKc: # @_Z13test_constantIi30custom_lessthanequal_constantsIiEEvPT_iPKc @@ -31519,12 +31226,14 @@ _Z13test_constantIi30custom_lessthanequal_constantsIiEEvPT_iPKc: # @_Z13test_con # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB145_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI145_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI145_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -31553,12 +31262,8 @@ _Z13test_constantIi30custom_lessthanequal_constantsIiEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIi30custom_lessthanequal_constantsIiEEvPT_iPKc, .Lfunc_end145-_Z13test_constantIi30custom_lessthanequal_constantsIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi20custom_and_constantsIiEEvPT_iPKc -.LCPI146_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi20custom_and_constantsIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi20custom_and_constantsIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi20custom_and_constantsIiEEvPT_iPKc + .weak _Z13test_constantIi20custom_and_constantsIiEEvPT_iPKc # -- Begin function _Z13test_constantIi20custom_and_constantsIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi20custom_and_constantsIiEEvPT_iPKc,@function _Z13test_constantIi20custom_and_constantsIiEEvPT_iPKc: # @_Z13test_constantIi20custom_and_constantsIiEEvPT_iPKc @@ -31660,12 +31365,14 @@ _Z13test_constantIi20custom_and_constantsIiEEvPT_iPKc: # @_Z13test_constantIi20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB146_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI146_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI146_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -31696,12 +31403,8 @@ _Z13test_constantIi20custom_and_constantsIiEEvPT_iPKc: # @_Z13test_constantIi20c .size _Z13test_constantIi20custom_and_constantsIiEEvPT_iPKc, .Lfunc_end146-_Z13test_constantIi20custom_and_constantsIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi19custom_or_constantsIiEEvPT_iPKc -.LCPI147_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi19custom_or_constantsIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi19custom_or_constantsIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi19custom_or_constantsIiEEvPT_iPKc + .weak _Z13test_constantIi19custom_or_constantsIiEEvPT_iPKc # -- Begin function _Z13test_constantIi19custom_or_constantsIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi19custom_or_constantsIiEEvPT_iPKc,@function _Z13test_constantIi19custom_or_constantsIiEEvPT_iPKc: # @_Z13test_constantIi19custom_or_constantsIiEEvPT_iPKc @@ -31803,12 +31506,14 @@ _Z13test_constantIi19custom_or_constantsIiEEvPT_iPKc: # @_Z13test_constantIi19cu move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB147_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI147_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI147_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -31839,12 +31544,8 @@ _Z13test_constantIi19custom_or_constantsIiEEvPT_iPKc: # @_Z13test_constantIi19cu .size _Z13test_constantIi19custom_or_constantsIiEEvPT_iPKc, .Lfunc_end147-_Z13test_constantIi19custom_or_constantsIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi20custom_xor_constantsIiEEvPT_iPKc -.LCPI148_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi20custom_xor_constantsIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi20custom_xor_constantsIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi20custom_xor_constantsIiEEvPT_iPKc + .weak _Z13test_constantIi20custom_xor_constantsIiEEvPT_iPKc # -- Begin function _Z13test_constantIi20custom_xor_constantsIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi20custom_xor_constantsIiEEvPT_iPKc,@function _Z13test_constantIi20custom_xor_constantsIiEEvPT_iPKc: # @_Z13test_constantIi20custom_xor_constantsIiEEvPT_iPKc @@ -31946,12 +31647,14 @@ _Z13test_constantIi20custom_xor_constantsIiEEvPT_iPKc: # @_Z13test_constantIi20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB148_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI148_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI148_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -31982,12 +31685,8 @@ _Z13test_constantIi20custom_xor_constantsIiEEvPT_iPKc: # @_Z13test_constantIi20c .size _Z13test_constantIi20custom_xor_constantsIiEEvPT_iPKc, .Lfunc_end148-_Z13test_constantIi20custom_xor_constantsIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi19custom_constant_addIiEEvPT_iPKc -.LCPI149_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi19custom_constant_addIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi19custom_constant_addIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi19custom_constant_addIiEEvPT_iPKc + .weak _Z13test_constantIi19custom_constant_addIiEEvPT_iPKc # -- Begin function _Z13test_constantIi19custom_constant_addIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi19custom_constant_addIiEEvPT_iPKc,@function _Z13test_constantIi19custom_constant_addIiEEvPT_iPKc: # @_Z13test_constantIi19custom_constant_addIiEEvPT_iPKc @@ -32181,12 +31880,14 @@ _Z13test_constantIi19custom_constant_addIiEEvPT_iPKc: # @_Z13test_constantIi19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB149_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI149_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI149_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -32219,12 +31920,8 @@ _Z13test_constantIi19custom_constant_addIiEEvPT_iPKc: # @_Z13test_constantIi19cu .size _Z13test_constantIi19custom_constant_addIiEEvPT_iPKc, .Lfunc_end149-_Z13test_constantIi19custom_constant_addIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi28custom_multiple_constant_addIiEEvPT_iPKc -.LCPI150_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi28custom_multiple_constant_addIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi28custom_multiple_constant_addIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi28custom_multiple_constant_addIiEEvPT_iPKc + .weak _Z13test_constantIi28custom_multiple_constant_addIiEEvPT_iPKc # -- Begin function _Z13test_constantIi28custom_multiple_constant_addIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi28custom_multiple_constant_addIiEEvPT_iPKc,@function _Z13test_constantIi28custom_multiple_constant_addIiEEvPT_iPKc: # @_Z13test_constantIi28custom_multiple_constant_addIiEEvPT_iPKc @@ -32418,12 +32115,14 @@ _Z13test_constantIi28custom_multiple_constant_addIiEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB150_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI150_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI150_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -32456,12 +32155,8 @@ _Z13test_constantIi28custom_multiple_constant_addIiEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIi28custom_multiple_constant_addIiEEvPT_iPKc, .Lfunc_end150-_Z13test_constantIi28custom_multiple_constant_addIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi19custom_constant_subIiEEvPT_iPKc -.LCPI151_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi19custom_constant_subIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi19custom_constant_subIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi19custom_constant_subIiEEvPT_iPKc + .weak _Z13test_constantIi19custom_constant_subIiEEvPT_iPKc # -- Begin function _Z13test_constantIi19custom_constant_subIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi19custom_constant_subIiEEvPT_iPKc,@function _Z13test_constantIi19custom_constant_subIiEEvPT_iPKc: # @_Z13test_constantIi19custom_constant_subIiEEvPT_iPKc @@ -32658,12 +32353,14 @@ _Z13test_constantIi19custom_constant_subIiEEvPT_iPKc: # @_Z13test_constantIi19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB151_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI151_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI151_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -32696,12 +32393,8 @@ _Z13test_constantIi19custom_constant_subIiEEvPT_iPKc: # @_Z13test_constantIi19cu .size _Z13test_constantIi19custom_constant_subIiEEvPT_iPKc, .Lfunc_end151-_Z13test_constantIi19custom_constant_subIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi28custom_multiple_constant_subIiEEvPT_iPKc -.LCPI152_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi28custom_multiple_constant_subIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi28custom_multiple_constant_subIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi28custom_multiple_constant_subIiEEvPT_iPKc + .weak _Z13test_constantIi28custom_multiple_constant_subIiEEvPT_iPKc # -- Begin function _Z13test_constantIi28custom_multiple_constant_subIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi28custom_multiple_constant_subIiEEvPT_iPKc,@function _Z13test_constantIi28custom_multiple_constant_subIiEEvPT_iPKc: # @_Z13test_constantIi28custom_multiple_constant_subIiEEvPT_iPKc @@ -32898,12 +32591,14 @@ _Z13test_constantIi28custom_multiple_constant_subIiEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB152_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI152_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI152_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -32936,12 +32631,8 @@ _Z13test_constantIi28custom_multiple_constant_subIiEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIi28custom_multiple_constant_subIiEEvPT_iPKc, .Lfunc_end152-_Z13test_constantIi28custom_multiple_constant_subIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi24custom_constant_multiplyIiEEvPT_iPKc -.LCPI153_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi24custom_constant_multiplyIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi24custom_constant_multiplyIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi24custom_constant_multiplyIiEEvPT_iPKc + .weak _Z13test_constantIi24custom_constant_multiplyIiEEvPT_iPKc # -- Begin function _Z13test_constantIi24custom_constant_multiplyIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi24custom_constant_multiplyIiEEvPT_iPKc,@function _Z13test_constantIi24custom_constant_multiplyIiEEvPT_iPKc: # @_Z13test_constantIi24custom_constant_multiplyIiEEvPT_iPKc @@ -33129,12 +32820,14 @@ _Z13test_constantIi24custom_constant_multiplyIiEEvPT_iPKc: # @_Z13test_constantI ld.w $a0, $s3, %pc_lo12(current_test) .LBB153_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI153_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI153_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -33167,12 +32860,8 @@ _Z13test_constantIi24custom_constant_multiplyIiEEvPT_iPKc: # @_Z13test_constantI .size _Z13test_constantIi24custom_constant_multiplyIiEEvPT_iPKc, .Lfunc_end153-_Z13test_constantIi24custom_constant_multiplyIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi33custom_multiple_constant_multiplyIiEEvPT_iPKc -.LCPI154_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi33custom_multiple_constant_multiplyIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi33custom_multiple_constant_multiplyIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi33custom_multiple_constant_multiplyIiEEvPT_iPKc + .weak _Z13test_constantIi33custom_multiple_constant_multiplyIiEEvPT_iPKc # -- Begin function _Z13test_constantIi33custom_multiple_constant_multiplyIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi33custom_multiple_constant_multiplyIiEEvPT_iPKc,@function _Z13test_constantIi33custom_multiple_constant_multiplyIiEEvPT_iPKc: # @_Z13test_constantIi33custom_multiple_constant_multiplyIiEEvPT_iPKc @@ -33360,12 +33049,14 @@ _Z13test_constantIi33custom_multiple_constant_multiplyIiEEvPT_iPKc: # @_Z13test_ ld.w $a0, $s3, %pc_lo12(current_test) .LBB154_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI154_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI154_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -33398,12 +33089,8 @@ _Z13test_constantIi33custom_multiple_constant_multiplyIiEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIi33custom_multiple_constant_multiplyIiEEvPT_iPKc, .Lfunc_end154-_Z13test_constantIi33custom_multiple_constant_multiplyIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi34custom_multiple_constant_multiply2IiEEvPT_iPKc -.LCPI155_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi34custom_multiple_constant_multiply2IiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi34custom_multiple_constant_multiply2IiEEvPT_iPKc,comdat - .weak _Z13test_constantIi34custom_multiple_constant_multiply2IiEEvPT_iPKc + .weak _Z13test_constantIi34custom_multiple_constant_multiply2IiEEvPT_iPKc # -- Begin function _Z13test_constantIi34custom_multiple_constant_multiply2IiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi34custom_multiple_constant_multiply2IiEEvPT_iPKc,@function _Z13test_constantIi34custom_multiple_constant_multiply2IiEEvPT_iPKc: # @_Z13test_constantIi34custom_multiple_constant_multiply2IiEEvPT_iPKc @@ -33600,12 +33287,14 @@ _Z13test_constantIi34custom_multiple_constant_multiply2IiEEvPT_iPKc: # @_Z13test ld.w $a0, $s3, %pc_lo12(current_test) .LBB155_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI155_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI155_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -33638,12 +33327,8 @@ _Z13test_constantIi34custom_multiple_constant_multiply2IiEEvPT_iPKc: # @_Z13test .size _Z13test_constantIi34custom_multiple_constant_multiply2IiEEvPT_iPKc, .Lfunc_end155-_Z13test_constantIi34custom_multiple_constant_multiply2IiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi22custom_constant_divideIiEEvPT_iPKc -.LCPI156_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi22custom_constant_divideIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi22custom_constant_divideIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi22custom_constant_divideIiEEvPT_iPKc + .weak _Z13test_constantIi22custom_constant_divideIiEEvPT_iPKc # -- Begin function _Z13test_constantIi22custom_constant_divideIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi22custom_constant_divideIiEEvPT_iPKc,@function _Z13test_constantIi22custom_constant_divideIiEEvPT_iPKc: # @_Z13test_constantIi22custom_constant_divideIiEEvPT_iPKc @@ -33849,12 +33534,14 @@ _Z13test_constantIi22custom_constant_divideIiEEvPT_iPKc: # @_Z13test_constantIi2 ld.w $a0, $s3, %pc_lo12(current_test) .LBB156_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI156_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI156_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -33887,12 +33574,8 @@ _Z13test_constantIi22custom_constant_divideIiEEvPT_iPKc: # @_Z13test_constantIi2 .size _Z13test_constantIi22custom_constant_divideIiEEvPT_iPKc, .Lfunc_end156-_Z13test_constantIi22custom_constant_divideIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi31custom_multiple_constant_divideIiEEvPT_iPKc -.LCPI157_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi31custom_multiple_constant_divideIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi31custom_multiple_constant_divideIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi31custom_multiple_constant_divideIiEEvPT_iPKc + .weak _Z13test_constantIi31custom_multiple_constant_divideIiEEvPT_iPKc # -- Begin function _Z13test_constantIi31custom_multiple_constant_divideIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi31custom_multiple_constant_divideIiEEvPT_iPKc,@function _Z13test_constantIi31custom_multiple_constant_divideIiEEvPT_iPKc: # @_Z13test_constantIi31custom_multiple_constant_divideIiEEvPT_iPKc @@ -34104,12 +33787,14 @@ _Z13test_constantIi31custom_multiple_constant_divideIiEEvPT_iPKc: # @_Z13test_co ld.w $a0, $s3, %pc_lo12(current_test) .LBB157_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI157_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI157_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -34142,12 +33827,8 @@ _Z13test_constantIi31custom_multiple_constant_divideIiEEvPT_iPKc: # @_Z13test_co .size _Z13test_constantIi31custom_multiple_constant_divideIiEEvPT_iPKc, .Lfunc_end157-_Z13test_constantIi31custom_multiple_constant_divideIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi32custom_multiple_constant_divide2IiEEvPT_iPKc -.LCPI158_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi32custom_multiple_constant_divide2IiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi32custom_multiple_constant_divide2IiEEvPT_iPKc,comdat - .weak _Z13test_constantIi32custom_multiple_constant_divide2IiEEvPT_iPKc + .weak _Z13test_constantIi32custom_multiple_constant_divide2IiEEvPT_iPKc # -- Begin function _Z13test_constantIi32custom_multiple_constant_divide2IiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi32custom_multiple_constant_divide2IiEEvPT_iPKc,@function _Z13test_constantIi32custom_multiple_constant_divide2IiEEvPT_iPKc: # @_Z13test_constantIi32custom_multiple_constant_divide2IiEEvPT_iPKc @@ -34341,12 +34022,14 @@ _Z13test_constantIi32custom_multiple_constant_divide2IiEEvPT_iPKc: # @_Z13test_c ld.w $a0, $s3, %pc_lo12(current_test) .LBB158_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI158_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI158_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -34379,12 +34062,8 @@ _Z13test_constantIi32custom_multiple_constant_divide2IiEEvPT_iPKc: # @_Z13test_c .size _Z13test_constantIi32custom_multiple_constant_divide2IiEEvPT_iPKc, .Lfunc_end158-_Z13test_constantIi32custom_multiple_constant_divide2IiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi30custom_multiple_constant_mixedIiEEvPT_iPKc -.LCPI159_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi30custom_multiple_constant_mixedIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi30custom_multiple_constant_mixedIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi30custom_multiple_constant_mixedIiEEvPT_iPKc + .weak _Z13test_constantIi30custom_multiple_constant_mixedIiEEvPT_iPKc # -- Begin function _Z13test_constantIi30custom_multiple_constant_mixedIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi30custom_multiple_constant_mixedIiEEvPT_iPKc,@function _Z13test_constantIi30custom_multiple_constant_mixedIiEEvPT_iPKc: # @_Z13test_constantIi30custom_multiple_constant_mixedIiEEvPT_iPKc @@ -34566,12 +34245,14 @@ _Z13test_constantIi30custom_multiple_constant_mixedIiEEvPT_iPKc: # @_Z13test_con ld.w $a0, $s3, %pc_lo12(current_test) .LBB159_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI159_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI159_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -34604,12 +34285,8 @@ _Z13test_constantIi30custom_multiple_constant_mixedIiEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIi30custom_multiple_constant_mixedIiEEvPT_iPKc, .Lfunc_end159-_Z13test_constantIi30custom_multiple_constant_mixedIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi19custom_constant_andIiEEvPT_iPKc -.LCPI160_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi19custom_constant_andIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi19custom_constant_andIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi19custom_constant_andIiEEvPT_iPKc + .weak _Z13test_constantIi19custom_constant_andIiEEvPT_iPKc # -- Begin function _Z13test_constantIi19custom_constant_andIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi19custom_constant_andIiEEvPT_iPKc,@function _Z13test_constantIi19custom_constant_andIiEEvPT_iPKc: # @_Z13test_constantIi19custom_constant_andIiEEvPT_iPKc @@ -34799,12 +34476,14 @@ _Z13test_constantIi19custom_constant_andIiEEvPT_iPKc: # @_Z13test_constantIi19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB160_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI160_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI160_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -34837,12 +34516,8 @@ _Z13test_constantIi19custom_constant_andIiEEvPT_iPKc: # @_Z13test_constantIi19cu .size _Z13test_constantIi19custom_constant_andIiEEvPT_iPKc, .Lfunc_end160-_Z13test_constantIi19custom_constant_andIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi28custom_multiple_constant_andIiEEvPT_iPKc -.LCPI161_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi28custom_multiple_constant_andIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi28custom_multiple_constant_andIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi28custom_multiple_constant_andIiEEvPT_iPKc + .weak _Z13test_constantIi28custom_multiple_constant_andIiEEvPT_iPKc # -- Begin function _Z13test_constantIi28custom_multiple_constant_andIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi28custom_multiple_constant_andIiEEvPT_iPKc,@function _Z13test_constantIi28custom_multiple_constant_andIiEEvPT_iPKc: # @_Z13test_constantIi28custom_multiple_constant_andIiEEvPT_iPKc @@ -35032,12 +34707,14 @@ _Z13test_constantIi28custom_multiple_constant_andIiEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB161_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI161_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI161_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -35070,12 +34747,8 @@ _Z13test_constantIi28custom_multiple_constant_andIiEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIi28custom_multiple_constant_andIiEEvPT_iPKc, .Lfunc_end161-_Z13test_constantIi28custom_multiple_constant_andIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi18custom_constant_orIiEEvPT_iPKc -.LCPI162_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi18custom_constant_orIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi18custom_constant_orIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi18custom_constant_orIiEEvPT_iPKc + .weak _Z13test_constantIi18custom_constant_orIiEEvPT_iPKc # -- Begin function _Z13test_constantIi18custom_constant_orIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi18custom_constant_orIiEEvPT_iPKc,@function _Z13test_constantIi18custom_constant_orIiEEvPT_iPKc: # @_Z13test_constantIi18custom_constant_orIiEEvPT_iPKc @@ -35253,12 +34926,14 @@ _Z13test_constantIi18custom_constant_orIiEEvPT_iPKc: # @_Z13test_constantIi18cus ld.w $a0, $s3, %pc_lo12(current_test) .LBB162_19: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI162_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI162_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -35291,12 +34966,8 @@ _Z13test_constantIi18custom_constant_orIiEEvPT_iPKc: # @_Z13test_constantIi18cus .size _Z13test_constantIi18custom_constant_orIiEEvPT_iPKc, .Lfunc_end162-_Z13test_constantIi18custom_constant_orIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi27custom_multiple_constant_orIiEEvPT_iPKc -.LCPI163_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi27custom_multiple_constant_orIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi27custom_multiple_constant_orIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi27custom_multiple_constant_orIiEEvPT_iPKc + .weak _Z13test_constantIi27custom_multiple_constant_orIiEEvPT_iPKc # -- Begin function _Z13test_constantIi27custom_multiple_constant_orIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi27custom_multiple_constant_orIiEEvPT_iPKc,@function _Z13test_constantIi27custom_multiple_constant_orIiEEvPT_iPKc: # @_Z13test_constantIi27custom_multiple_constant_orIiEEvPT_iPKc @@ -35474,12 +35145,14 @@ _Z13test_constantIi27custom_multiple_constant_orIiEEvPT_iPKc: # @_Z13test_consta ld.w $a0, $s3, %pc_lo12(current_test) .LBB163_19: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI163_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI163_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -35512,12 +35185,8 @@ _Z13test_constantIi27custom_multiple_constant_orIiEEvPT_iPKc: # @_Z13test_consta .size _Z13test_constantIi27custom_multiple_constant_orIiEEvPT_iPKc, .Lfunc_end163-_Z13test_constantIi27custom_multiple_constant_orIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi19custom_constant_xorIiEEvPT_iPKc -.LCPI164_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi19custom_constant_xorIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi19custom_constant_xorIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi19custom_constant_xorIiEEvPT_iPKc + .weak _Z13test_constantIi19custom_constant_xorIiEEvPT_iPKc # -- Begin function _Z13test_constantIi19custom_constant_xorIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi19custom_constant_xorIiEEvPT_iPKc,@function _Z13test_constantIi19custom_constant_xorIiEEvPT_iPKc: # @_Z13test_constantIi19custom_constant_xorIiEEvPT_iPKc @@ -35707,12 +35376,14 @@ _Z13test_constantIi19custom_constant_xorIiEEvPT_iPKc: # @_Z13test_constantIi19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB164_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI164_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI164_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -35745,12 +35416,8 @@ _Z13test_constantIi19custom_constant_xorIiEEvPT_iPKc: # @_Z13test_constantIi19cu .size _Z13test_constantIi19custom_constant_xorIiEEvPT_iPKc, .Lfunc_end164-_Z13test_constantIi19custom_constant_xorIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIi28custom_multiple_constant_xorIiEEvPT_iPKc -.LCPI165_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIi28custom_multiple_constant_xorIiEEvPT_iPKc,"axG",@progbits,_Z13test_constantIi28custom_multiple_constant_xorIiEEvPT_iPKc,comdat - .weak _Z13test_constantIi28custom_multiple_constant_xorIiEEvPT_iPKc + .weak _Z13test_constantIi28custom_multiple_constant_xorIiEEvPT_iPKc # -- Begin function _Z13test_constantIi28custom_multiple_constant_xorIiEEvPT_iPKc .p2align 5 .type _Z13test_constantIi28custom_multiple_constant_xorIiEEvPT_iPKc,@function _Z13test_constantIi28custom_multiple_constant_xorIiEEvPT_iPKc: # @_Z13test_constantIi28custom_multiple_constant_xorIiEEvPT_iPKc @@ -35940,12 +35607,14 @@ _Z13test_constantIi28custom_multiple_constant_xorIiEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB165_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI165_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI165_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -35978,12 +35647,8 @@ _Z13test_constantIi28custom_multiple_constant_xorIiEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIi28custom_multiple_constant_xorIiEEvPT_iPKc, .Lfunc_end165-_Z13test_constantIi28custom_multiple_constant_xorIiEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj10custom_twoIjEEvPT_iPKc -.LCPI166_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj10custom_twoIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj10custom_twoIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj10custom_twoIjEEvPT_iPKc + .weak _Z13test_constantIj10custom_twoIjEEvPT_iPKc # -- Begin function _Z13test_constantIj10custom_twoIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj10custom_twoIjEEvPT_iPKc,@function _Z13test_constantIj10custom_twoIjEEvPT_iPKc: # @_Z13test_constantIj10custom_twoIjEEvPT_iPKc @@ -36085,12 +35750,14 @@ _Z13test_constantIj10custom_twoIjEEvPT_iPKc: # @_Z13test_constantIj10custom_twoI move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB166_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI166_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI166_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -36121,12 +35788,8 @@ _Z13test_constantIj10custom_twoIjEEvPT_iPKc: # @_Z13test_constantIj10custom_twoI .size _Z13test_constantIj10custom_twoIjEEvPT_iPKc, .Lfunc_end166-_Z13test_constantIj10custom_twoIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj20custom_add_constantsIjEEvPT_iPKc -.LCPI167_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj20custom_add_constantsIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj20custom_add_constantsIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj20custom_add_constantsIjEEvPT_iPKc + .weak _Z13test_constantIj20custom_add_constantsIjEEvPT_iPKc # -- Begin function _Z13test_constantIj20custom_add_constantsIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj20custom_add_constantsIjEEvPT_iPKc,@function _Z13test_constantIj20custom_add_constantsIjEEvPT_iPKc: # @_Z13test_constantIj20custom_add_constantsIjEEvPT_iPKc @@ -36228,12 +35891,14 @@ _Z13test_constantIj20custom_add_constantsIjEEvPT_iPKc: # @_Z13test_constantIj20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB167_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI167_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI167_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -36264,12 +35929,8 @@ _Z13test_constantIj20custom_add_constantsIjEEvPT_iPKc: # @_Z13test_constantIj20c .size _Z13test_constantIj20custom_add_constantsIjEEvPT_iPKc, .Lfunc_end167-_Z13test_constantIj20custom_add_constantsIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj20custom_sub_constantsIjEEvPT_iPKc -.LCPI168_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj20custom_sub_constantsIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj20custom_sub_constantsIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj20custom_sub_constantsIjEEvPT_iPKc + .weak _Z13test_constantIj20custom_sub_constantsIjEEvPT_iPKc # -- Begin function _Z13test_constantIj20custom_sub_constantsIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj20custom_sub_constantsIjEEvPT_iPKc,@function _Z13test_constantIj20custom_sub_constantsIjEEvPT_iPKc: # @_Z13test_constantIj20custom_sub_constantsIjEEvPT_iPKc @@ -36371,12 +36032,14 @@ _Z13test_constantIj20custom_sub_constantsIjEEvPT_iPKc: # @_Z13test_constantIj20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB168_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI168_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI168_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -36407,12 +36070,8 @@ _Z13test_constantIj20custom_sub_constantsIjEEvPT_iPKc: # @_Z13test_constantIj20c .size _Z13test_constantIj20custom_sub_constantsIjEEvPT_iPKc, .Lfunc_end168-_Z13test_constantIj20custom_sub_constantsIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj25custom_multiply_constantsIjEEvPT_iPKc -.LCPI169_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj25custom_multiply_constantsIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj25custom_multiply_constantsIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj25custom_multiply_constantsIjEEvPT_iPKc + .weak _Z13test_constantIj25custom_multiply_constantsIjEEvPT_iPKc # -- Begin function _Z13test_constantIj25custom_multiply_constantsIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj25custom_multiply_constantsIjEEvPT_iPKc,@function _Z13test_constantIj25custom_multiply_constantsIjEEvPT_iPKc: # @_Z13test_constantIj25custom_multiply_constantsIjEEvPT_iPKc @@ -36516,12 +36175,14 @@ _Z13test_constantIj25custom_multiply_constantsIjEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s1, %pc_lo12(current_test) .LBB169_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI169_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI169_0) sub.d $a1, $s0, $s2 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -36552,12 +36213,8 @@ _Z13test_constantIj25custom_multiply_constantsIjEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIj25custom_multiply_constantsIjEEvPT_iPKc, .Lfunc_end169-_Z13test_constantIj25custom_multiply_constantsIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj23custom_divide_constantsIjEEvPT_iPKc -.LCPI170_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj23custom_divide_constantsIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj23custom_divide_constantsIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj23custom_divide_constantsIjEEvPT_iPKc + .weak _Z13test_constantIj23custom_divide_constantsIjEEvPT_iPKc # -- Begin function _Z13test_constantIj23custom_divide_constantsIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj23custom_divide_constantsIjEEvPT_iPKc,@function _Z13test_constantIj23custom_divide_constantsIjEEvPT_iPKc: # @_Z13test_constantIj23custom_divide_constantsIjEEvPT_iPKc @@ -36659,12 +36316,14 @@ _Z13test_constantIj23custom_divide_constantsIjEEvPT_iPKc: # @_Z13test_constantIj move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB170_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI170_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI170_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -36695,12 +36354,8 @@ _Z13test_constantIj23custom_divide_constantsIjEEvPT_iPKc: # @_Z13test_constantIj .size _Z13test_constantIj23custom_divide_constantsIjEEvPT_iPKc, .Lfunc_end170-_Z13test_constantIj23custom_divide_constantsIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj20custom_mod_constantsIjEEvPT_iPKc -.LCPI171_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj20custom_mod_constantsIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj20custom_mod_constantsIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj20custom_mod_constantsIjEEvPT_iPKc + .weak _Z13test_constantIj20custom_mod_constantsIjEEvPT_iPKc # -- Begin function _Z13test_constantIj20custom_mod_constantsIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj20custom_mod_constantsIjEEvPT_iPKc,@function _Z13test_constantIj20custom_mod_constantsIjEEvPT_iPKc: # @_Z13test_constantIj20custom_mod_constantsIjEEvPT_iPKc @@ -36802,12 +36457,14 @@ _Z13test_constantIj20custom_mod_constantsIjEEvPT_iPKc: # @_Z13test_constantIj20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB171_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI171_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI171_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -36838,12 +36495,8 @@ _Z13test_constantIj20custom_mod_constantsIjEEvPT_iPKc: # @_Z13test_constantIj20c .size _Z13test_constantIj20custom_mod_constantsIjEEvPT_iPKc, .Lfunc_end171-_Z13test_constantIj20custom_mod_constantsIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj22custom_equal_constantsIjEEvPT_iPKc -.LCPI172_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj22custom_equal_constantsIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj22custom_equal_constantsIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj22custom_equal_constantsIjEEvPT_iPKc + .weak _Z13test_constantIj22custom_equal_constantsIjEEvPT_iPKc # -- Begin function _Z13test_constantIj22custom_equal_constantsIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj22custom_equal_constantsIjEEvPT_iPKc,@function _Z13test_constantIj22custom_equal_constantsIjEEvPT_iPKc: # @_Z13test_constantIj22custom_equal_constantsIjEEvPT_iPKc @@ -36896,12 +36549,14 @@ _Z13test_constantIj22custom_equal_constantsIjEEvPT_iPKc: # @_Z13test_constantIj2 # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB172_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI172_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI172_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -36930,12 +36585,8 @@ _Z13test_constantIj22custom_equal_constantsIjEEvPT_iPKc: # @_Z13test_constantIj2 .size _Z13test_constantIj22custom_equal_constantsIjEEvPT_iPKc, .Lfunc_end172-_Z13test_constantIj22custom_equal_constantsIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj25custom_notequal_constantsIjEEvPT_iPKc -.LCPI173_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj25custom_notequal_constantsIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj25custom_notequal_constantsIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj25custom_notequal_constantsIjEEvPT_iPKc + .weak _Z13test_constantIj25custom_notequal_constantsIjEEvPT_iPKc # -- Begin function _Z13test_constantIj25custom_notequal_constantsIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj25custom_notequal_constantsIjEEvPT_iPKc,@function _Z13test_constantIj25custom_notequal_constantsIjEEvPT_iPKc: # @_Z13test_constantIj25custom_notequal_constantsIjEEvPT_iPKc @@ -37037,12 +36688,14 @@ _Z13test_constantIj25custom_notequal_constantsIjEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB173_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI173_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI173_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -37073,12 +36726,8 @@ _Z13test_constantIj25custom_notequal_constantsIjEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIj25custom_notequal_constantsIjEEvPT_iPKc, .Lfunc_end173-_Z13test_constantIj25custom_notequal_constantsIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj28custom_greaterthan_constantsIjEEvPT_iPKc -.LCPI174_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj28custom_greaterthan_constantsIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj28custom_greaterthan_constantsIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj28custom_greaterthan_constantsIjEEvPT_iPKc + .weak _Z13test_constantIj28custom_greaterthan_constantsIjEEvPT_iPKc # -- Begin function _Z13test_constantIj28custom_greaterthan_constantsIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj28custom_greaterthan_constantsIjEEvPT_iPKc,@function _Z13test_constantIj28custom_greaterthan_constantsIjEEvPT_iPKc: # @_Z13test_constantIj28custom_greaterthan_constantsIjEEvPT_iPKc @@ -37180,12 +36829,14 @@ _Z13test_constantIj28custom_greaterthan_constantsIjEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB174_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI174_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI174_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -37216,12 +36867,8 @@ _Z13test_constantIj28custom_greaterthan_constantsIjEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIj28custom_greaterthan_constantsIjEEvPT_iPKc, .Lfunc_end174-_Z13test_constantIj28custom_greaterthan_constantsIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj25custom_lessthan_constantsIjEEvPT_iPKc -.LCPI175_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj25custom_lessthan_constantsIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj25custom_lessthan_constantsIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj25custom_lessthan_constantsIjEEvPT_iPKc + .weak _Z13test_constantIj25custom_lessthan_constantsIjEEvPT_iPKc # -- Begin function _Z13test_constantIj25custom_lessthan_constantsIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj25custom_lessthan_constantsIjEEvPT_iPKc,@function _Z13test_constantIj25custom_lessthan_constantsIjEEvPT_iPKc: # @_Z13test_constantIj25custom_lessthan_constantsIjEEvPT_iPKc @@ -37274,12 +36921,14 @@ _Z13test_constantIj25custom_lessthan_constantsIjEEvPT_iPKc: # @_Z13test_constant # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB175_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI175_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI175_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -37308,12 +36957,8 @@ _Z13test_constantIj25custom_lessthan_constantsIjEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIj25custom_lessthan_constantsIjEEvPT_iPKc, .Lfunc_end175-_Z13test_constantIj25custom_lessthan_constantsIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj33custom_greaterthanequal_constantsIjEEvPT_iPKc -.LCPI176_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj33custom_greaterthanequal_constantsIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj33custom_greaterthanequal_constantsIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj33custom_greaterthanequal_constantsIjEEvPT_iPKc + .weak _Z13test_constantIj33custom_greaterthanequal_constantsIjEEvPT_iPKc # -- Begin function _Z13test_constantIj33custom_greaterthanequal_constantsIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj33custom_greaterthanequal_constantsIjEEvPT_iPKc,@function _Z13test_constantIj33custom_greaterthanequal_constantsIjEEvPT_iPKc: # @_Z13test_constantIj33custom_greaterthanequal_constantsIjEEvPT_iPKc @@ -37415,12 +37060,14 @@ _Z13test_constantIj33custom_greaterthanequal_constantsIjEEvPT_iPKc: # @_Z13test_ move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB176_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI176_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI176_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -37451,12 +37098,8 @@ _Z13test_constantIj33custom_greaterthanequal_constantsIjEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIj33custom_greaterthanequal_constantsIjEEvPT_iPKc, .Lfunc_end176-_Z13test_constantIj33custom_greaterthanequal_constantsIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj30custom_lessthanequal_constantsIjEEvPT_iPKc -.LCPI177_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj30custom_lessthanequal_constantsIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj30custom_lessthanequal_constantsIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj30custom_lessthanequal_constantsIjEEvPT_iPKc + .weak _Z13test_constantIj30custom_lessthanequal_constantsIjEEvPT_iPKc # -- Begin function _Z13test_constantIj30custom_lessthanequal_constantsIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj30custom_lessthanequal_constantsIjEEvPT_iPKc,@function _Z13test_constantIj30custom_lessthanequal_constantsIjEEvPT_iPKc: # @_Z13test_constantIj30custom_lessthanequal_constantsIjEEvPT_iPKc @@ -37509,12 +37152,14 @@ _Z13test_constantIj30custom_lessthanequal_constantsIjEEvPT_iPKc: # @_Z13test_con # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB177_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI177_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI177_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -37543,12 +37188,8 @@ _Z13test_constantIj30custom_lessthanequal_constantsIjEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIj30custom_lessthanequal_constantsIjEEvPT_iPKc, .Lfunc_end177-_Z13test_constantIj30custom_lessthanequal_constantsIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj20custom_and_constantsIjEEvPT_iPKc -.LCPI178_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj20custom_and_constantsIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj20custom_and_constantsIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj20custom_and_constantsIjEEvPT_iPKc + .weak _Z13test_constantIj20custom_and_constantsIjEEvPT_iPKc # -- Begin function _Z13test_constantIj20custom_and_constantsIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj20custom_and_constantsIjEEvPT_iPKc,@function _Z13test_constantIj20custom_and_constantsIjEEvPT_iPKc: # @_Z13test_constantIj20custom_and_constantsIjEEvPT_iPKc @@ -37650,12 +37291,14 @@ _Z13test_constantIj20custom_and_constantsIjEEvPT_iPKc: # @_Z13test_constantIj20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB178_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI178_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI178_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -37686,12 +37329,8 @@ _Z13test_constantIj20custom_and_constantsIjEEvPT_iPKc: # @_Z13test_constantIj20c .size _Z13test_constantIj20custom_and_constantsIjEEvPT_iPKc, .Lfunc_end178-_Z13test_constantIj20custom_and_constantsIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj19custom_or_constantsIjEEvPT_iPKc -.LCPI179_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj19custom_or_constantsIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj19custom_or_constantsIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj19custom_or_constantsIjEEvPT_iPKc + .weak _Z13test_constantIj19custom_or_constantsIjEEvPT_iPKc # -- Begin function _Z13test_constantIj19custom_or_constantsIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj19custom_or_constantsIjEEvPT_iPKc,@function _Z13test_constantIj19custom_or_constantsIjEEvPT_iPKc: # @_Z13test_constantIj19custom_or_constantsIjEEvPT_iPKc @@ -37793,12 +37432,14 @@ _Z13test_constantIj19custom_or_constantsIjEEvPT_iPKc: # @_Z13test_constantIj19cu move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB179_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI179_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI179_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -37829,12 +37470,8 @@ _Z13test_constantIj19custom_or_constantsIjEEvPT_iPKc: # @_Z13test_constantIj19cu .size _Z13test_constantIj19custom_or_constantsIjEEvPT_iPKc, .Lfunc_end179-_Z13test_constantIj19custom_or_constantsIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj20custom_xor_constantsIjEEvPT_iPKc -.LCPI180_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj20custom_xor_constantsIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj20custom_xor_constantsIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj20custom_xor_constantsIjEEvPT_iPKc + .weak _Z13test_constantIj20custom_xor_constantsIjEEvPT_iPKc # -- Begin function _Z13test_constantIj20custom_xor_constantsIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj20custom_xor_constantsIjEEvPT_iPKc,@function _Z13test_constantIj20custom_xor_constantsIjEEvPT_iPKc: # @_Z13test_constantIj20custom_xor_constantsIjEEvPT_iPKc @@ -37936,12 +37573,14 @@ _Z13test_constantIj20custom_xor_constantsIjEEvPT_iPKc: # @_Z13test_constantIj20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB180_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI180_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI180_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -37972,12 +37611,8 @@ _Z13test_constantIj20custom_xor_constantsIjEEvPT_iPKc: # @_Z13test_constantIj20c .size _Z13test_constantIj20custom_xor_constantsIjEEvPT_iPKc, .Lfunc_end180-_Z13test_constantIj20custom_xor_constantsIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj19custom_constant_addIjEEvPT_iPKc -.LCPI181_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj19custom_constant_addIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj19custom_constant_addIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj19custom_constant_addIjEEvPT_iPKc + .weak _Z13test_constantIj19custom_constant_addIjEEvPT_iPKc # -- Begin function _Z13test_constantIj19custom_constant_addIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj19custom_constant_addIjEEvPT_iPKc,@function _Z13test_constantIj19custom_constant_addIjEEvPT_iPKc: # @_Z13test_constantIj19custom_constant_addIjEEvPT_iPKc @@ -38171,12 +37806,14 @@ _Z13test_constantIj19custom_constant_addIjEEvPT_iPKc: # @_Z13test_constantIj19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB181_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI181_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI181_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -38209,12 +37846,8 @@ _Z13test_constantIj19custom_constant_addIjEEvPT_iPKc: # @_Z13test_constantIj19cu .size _Z13test_constantIj19custom_constant_addIjEEvPT_iPKc, .Lfunc_end181-_Z13test_constantIj19custom_constant_addIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj28custom_multiple_constant_addIjEEvPT_iPKc -.LCPI182_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj28custom_multiple_constant_addIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj28custom_multiple_constant_addIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj28custom_multiple_constant_addIjEEvPT_iPKc + .weak _Z13test_constantIj28custom_multiple_constant_addIjEEvPT_iPKc # -- Begin function _Z13test_constantIj28custom_multiple_constant_addIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj28custom_multiple_constant_addIjEEvPT_iPKc,@function _Z13test_constantIj28custom_multiple_constant_addIjEEvPT_iPKc: # @_Z13test_constantIj28custom_multiple_constant_addIjEEvPT_iPKc @@ -38408,12 +38041,14 @@ _Z13test_constantIj28custom_multiple_constant_addIjEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB182_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI182_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI182_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -38446,12 +38081,8 @@ _Z13test_constantIj28custom_multiple_constant_addIjEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIj28custom_multiple_constant_addIjEEvPT_iPKc, .Lfunc_end182-_Z13test_constantIj28custom_multiple_constant_addIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj19custom_constant_subIjEEvPT_iPKc -.LCPI183_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj19custom_constant_subIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj19custom_constant_subIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj19custom_constant_subIjEEvPT_iPKc + .weak _Z13test_constantIj19custom_constant_subIjEEvPT_iPKc # -- Begin function _Z13test_constantIj19custom_constant_subIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj19custom_constant_subIjEEvPT_iPKc,@function _Z13test_constantIj19custom_constant_subIjEEvPT_iPKc: # @_Z13test_constantIj19custom_constant_subIjEEvPT_iPKc @@ -38648,12 +38279,14 @@ _Z13test_constantIj19custom_constant_subIjEEvPT_iPKc: # @_Z13test_constantIj19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB183_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI183_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI183_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -38686,12 +38319,8 @@ _Z13test_constantIj19custom_constant_subIjEEvPT_iPKc: # @_Z13test_constantIj19cu .size _Z13test_constantIj19custom_constant_subIjEEvPT_iPKc, .Lfunc_end183-_Z13test_constantIj19custom_constant_subIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj28custom_multiple_constant_subIjEEvPT_iPKc -.LCPI184_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj28custom_multiple_constant_subIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj28custom_multiple_constant_subIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj28custom_multiple_constant_subIjEEvPT_iPKc + .weak _Z13test_constantIj28custom_multiple_constant_subIjEEvPT_iPKc # -- Begin function _Z13test_constantIj28custom_multiple_constant_subIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj28custom_multiple_constant_subIjEEvPT_iPKc,@function _Z13test_constantIj28custom_multiple_constant_subIjEEvPT_iPKc: # @_Z13test_constantIj28custom_multiple_constant_subIjEEvPT_iPKc @@ -38888,12 +38517,14 @@ _Z13test_constantIj28custom_multiple_constant_subIjEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB184_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI184_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI184_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -38926,12 +38557,8 @@ _Z13test_constantIj28custom_multiple_constant_subIjEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIj28custom_multiple_constant_subIjEEvPT_iPKc, .Lfunc_end184-_Z13test_constantIj28custom_multiple_constant_subIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj24custom_constant_multiplyIjEEvPT_iPKc -.LCPI185_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj24custom_constant_multiplyIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj24custom_constant_multiplyIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj24custom_constant_multiplyIjEEvPT_iPKc + .weak _Z13test_constantIj24custom_constant_multiplyIjEEvPT_iPKc # -- Begin function _Z13test_constantIj24custom_constant_multiplyIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj24custom_constant_multiplyIjEEvPT_iPKc,@function _Z13test_constantIj24custom_constant_multiplyIjEEvPT_iPKc: # @_Z13test_constantIj24custom_constant_multiplyIjEEvPT_iPKc @@ -39122,12 +38749,14 @@ _Z13test_constantIj24custom_constant_multiplyIjEEvPT_iPKc: # @_Z13test_constantI ld.w $a0, $s3, %pc_lo12(current_test) .LBB185_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI185_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI185_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -39160,12 +38789,8 @@ _Z13test_constantIj24custom_constant_multiplyIjEEvPT_iPKc: # @_Z13test_constantI .size _Z13test_constantIj24custom_constant_multiplyIjEEvPT_iPKc, .Lfunc_end185-_Z13test_constantIj24custom_constant_multiplyIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj33custom_multiple_constant_multiplyIjEEvPT_iPKc -.LCPI186_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj33custom_multiple_constant_multiplyIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj33custom_multiple_constant_multiplyIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj33custom_multiple_constant_multiplyIjEEvPT_iPKc + .weak _Z13test_constantIj33custom_multiple_constant_multiplyIjEEvPT_iPKc # -- Begin function _Z13test_constantIj33custom_multiple_constant_multiplyIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj33custom_multiple_constant_multiplyIjEEvPT_iPKc,@function _Z13test_constantIj33custom_multiple_constant_multiplyIjEEvPT_iPKc: # @_Z13test_constantIj33custom_multiple_constant_multiplyIjEEvPT_iPKc @@ -39356,12 +38981,14 @@ _Z13test_constantIj33custom_multiple_constant_multiplyIjEEvPT_iPKc: # @_Z13test_ ld.w $a0, $s3, %pc_lo12(current_test) .LBB186_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI186_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI186_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -39394,12 +39021,8 @@ _Z13test_constantIj33custom_multiple_constant_multiplyIjEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIj33custom_multiple_constant_multiplyIjEEvPT_iPKc, .Lfunc_end186-_Z13test_constantIj33custom_multiple_constant_multiplyIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj34custom_multiple_constant_multiply2IjEEvPT_iPKc -.LCPI187_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj34custom_multiple_constant_multiply2IjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj34custom_multiple_constant_multiply2IjEEvPT_iPKc,comdat - .weak _Z13test_constantIj34custom_multiple_constant_multiply2IjEEvPT_iPKc + .weak _Z13test_constantIj34custom_multiple_constant_multiply2IjEEvPT_iPKc # -- Begin function _Z13test_constantIj34custom_multiple_constant_multiply2IjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj34custom_multiple_constant_multiply2IjEEvPT_iPKc,@function _Z13test_constantIj34custom_multiple_constant_multiply2IjEEvPT_iPKc: # @_Z13test_constantIj34custom_multiple_constant_multiply2IjEEvPT_iPKc @@ -39596,12 +39219,14 @@ _Z13test_constantIj34custom_multiple_constant_multiply2IjEEvPT_iPKc: # @_Z13test ld.w $a0, $s3, %pc_lo12(current_test) .LBB187_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI187_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI187_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -39634,12 +39259,8 @@ _Z13test_constantIj34custom_multiple_constant_multiply2IjEEvPT_iPKc: # @_Z13test .size _Z13test_constantIj34custom_multiple_constant_multiply2IjEEvPT_iPKc, .Lfunc_end187-_Z13test_constantIj34custom_multiple_constant_multiply2IjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj22custom_constant_divideIjEEvPT_iPKc -.LCPI188_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj22custom_constant_divideIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj22custom_constant_divideIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj22custom_constant_divideIjEEvPT_iPKc + .weak _Z13test_constantIj22custom_constant_divideIjEEvPT_iPKc # -- Begin function _Z13test_constantIj22custom_constant_divideIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj22custom_constant_divideIjEEvPT_iPKc,@function _Z13test_constantIj22custom_constant_divideIjEEvPT_iPKc: # @_Z13test_constantIj22custom_constant_divideIjEEvPT_iPKc @@ -39844,12 +39465,14 @@ _Z13test_constantIj22custom_constant_divideIjEEvPT_iPKc: # @_Z13test_constantIj2 ld.w $a0, $s3, %pc_lo12(current_test) .LBB188_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI188_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI188_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -39882,12 +39505,8 @@ _Z13test_constantIj22custom_constant_divideIjEEvPT_iPKc: # @_Z13test_constantIj2 .size _Z13test_constantIj22custom_constant_divideIjEEvPT_iPKc, .Lfunc_end188-_Z13test_constantIj22custom_constant_divideIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj31custom_multiple_constant_divideIjEEvPT_iPKc -.LCPI189_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj31custom_multiple_constant_divideIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj31custom_multiple_constant_divideIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj31custom_multiple_constant_divideIjEEvPT_iPKc + .weak _Z13test_constantIj31custom_multiple_constant_divideIjEEvPT_iPKc # -- Begin function _Z13test_constantIj31custom_multiple_constant_divideIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj31custom_multiple_constant_divideIjEEvPT_iPKc,@function _Z13test_constantIj31custom_multiple_constant_divideIjEEvPT_iPKc: # @_Z13test_constantIj31custom_multiple_constant_divideIjEEvPT_iPKc @@ -40092,12 +39711,14 @@ _Z13test_constantIj31custom_multiple_constant_divideIjEEvPT_iPKc: # @_Z13test_co ld.w $a0, $s3, %pc_lo12(current_test) .LBB189_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI189_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI189_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -40130,12 +39751,8 @@ _Z13test_constantIj31custom_multiple_constant_divideIjEEvPT_iPKc: # @_Z13test_co .size _Z13test_constantIj31custom_multiple_constant_divideIjEEvPT_iPKc, .Lfunc_end189-_Z13test_constantIj31custom_multiple_constant_divideIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj32custom_multiple_constant_divide2IjEEvPT_iPKc -.LCPI190_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj32custom_multiple_constant_divide2IjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj32custom_multiple_constant_divide2IjEEvPT_iPKc,comdat - .weak _Z13test_constantIj32custom_multiple_constant_divide2IjEEvPT_iPKc + .weak _Z13test_constantIj32custom_multiple_constant_divide2IjEEvPT_iPKc # -- Begin function _Z13test_constantIj32custom_multiple_constant_divide2IjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj32custom_multiple_constant_divide2IjEEvPT_iPKc,@function _Z13test_constantIj32custom_multiple_constant_divide2IjEEvPT_iPKc: # @_Z13test_constantIj32custom_multiple_constant_divide2IjEEvPT_iPKc @@ -40329,12 +39946,14 @@ _Z13test_constantIj32custom_multiple_constant_divide2IjEEvPT_iPKc: # @_Z13test_c ld.w $a0, $s3, %pc_lo12(current_test) .LBB190_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI190_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI190_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -40367,12 +39986,8 @@ _Z13test_constantIj32custom_multiple_constant_divide2IjEEvPT_iPKc: # @_Z13test_c .size _Z13test_constantIj32custom_multiple_constant_divide2IjEEvPT_iPKc, .Lfunc_end190-_Z13test_constantIj32custom_multiple_constant_divide2IjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj30custom_multiple_constant_mixedIjEEvPT_iPKc -.LCPI191_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj30custom_multiple_constant_mixedIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj30custom_multiple_constant_mixedIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj30custom_multiple_constant_mixedIjEEvPT_iPKc + .weak _Z13test_constantIj30custom_multiple_constant_mixedIjEEvPT_iPKc # -- Begin function _Z13test_constantIj30custom_multiple_constant_mixedIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj30custom_multiple_constant_mixedIjEEvPT_iPKc,@function _Z13test_constantIj30custom_multiple_constant_mixedIjEEvPT_iPKc: # @_Z13test_constantIj30custom_multiple_constant_mixedIjEEvPT_iPKc @@ -40557,12 +40172,14 @@ _Z13test_constantIj30custom_multiple_constant_mixedIjEEvPT_iPKc: # @_Z13test_con ld.w $a0, $s3, %pc_lo12(current_test) .LBB191_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI191_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI191_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -40595,12 +40212,8 @@ _Z13test_constantIj30custom_multiple_constant_mixedIjEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIj30custom_multiple_constant_mixedIjEEvPT_iPKc, .Lfunc_end191-_Z13test_constantIj30custom_multiple_constant_mixedIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj19custom_constant_andIjEEvPT_iPKc -.LCPI192_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj19custom_constant_andIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj19custom_constant_andIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj19custom_constant_andIjEEvPT_iPKc + .weak _Z13test_constantIj19custom_constant_andIjEEvPT_iPKc # -- Begin function _Z13test_constantIj19custom_constant_andIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj19custom_constant_andIjEEvPT_iPKc,@function _Z13test_constantIj19custom_constant_andIjEEvPT_iPKc: # @_Z13test_constantIj19custom_constant_andIjEEvPT_iPKc @@ -40790,12 +40403,14 @@ _Z13test_constantIj19custom_constant_andIjEEvPT_iPKc: # @_Z13test_constantIj19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB192_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI192_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI192_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -40828,12 +40443,8 @@ _Z13test_constantIj19custom_constant_andIjEEvPT_iPKc: # @_Z13test_constantIj19cu .size _Z13test_constantIj19custom_constant_andIjEEvPT_iPKc, .Lfunc_end192-_Z13test_constantIj19custom_constant_andIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj28custom_multiple_constant_andIjEEvPT_iPKc -.LCPI193_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj28custom_multiple_constant_andIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj28custom_multiple_constant_andIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj28custom_multiple_constant_andIjEEvPT_iPKc + .weak _Z13test_constantIj28custom_multiple_constant_andIjEEvPT_iPKc # -- Begin function _Z13test_constantIj28custom_multiple_constant_andIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj28custom_multiple_constant_andIjEEvPT_iPKc,@function _Z13test_constantIj28custom_multiple_constant_andIjEEvPT_iPKc: # @_Z13test_constantIj28custom_multiple_constant_andIjEEvPT_iPKc @@ -41023,12 +40634,14 @@ _Z13test_constantIj28custom_multiple_constant_andIjEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB193_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI193_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI193_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -41061,12 +40674,8 @@ _Z13test_constantIj28custom_multiple_constant_andIjEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIj28custom_multiple_constant_andIjEEvPT_iPKc, .Lfunc_end193-_Z13test_constantIj28custom_multiple_constant_andIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj18custom_constant_orIjEEvPT_iPKc -.LCPI194_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj18custom_constant_orIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj18custom_constant_orIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj18custom_constant_orIjEEvPT_iPKc + .weak _Z13test_constantIj18custom_constant_orIjEEvPT_iPKc # -- Begin function _Z13test_constantIj18custom_constant_orIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj18custom_constant_orIjEEvPT_iPKc,@function _Z13test_constantIj18custom_constant_orIjEEvPT_iPKc: # @_Z13test_constantIj18custom_constant_orIjEEvPT_iPKc @@ -41244,12 +40853,14 @@ _Z13test_constantIj18custom_constant_orIjEEvPT_iPKc: # @_Z13test_constantIj18cus ld.w $a0, $s3, %pc_lo12(current_test) .LBB194_19: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI194_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI194_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -41282,12 +40893,8 @@ _Z13test_constantIj18custom_constant_orIjEEvPT_iPKc: # @_Z13test_constantIj18cus .size _Z13test_constantIj18custom_constant_orIjEEvPT_iPKc, .Lfunc_end194-_Z13test_constantIj18custom_constant_orIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj27custom_multiple_constant_orIjEEvPT_iPKc -.LCPI195_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj27custom_multiple_constant_orIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj27custom_multiple_constant_orIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj27custom_multiple_constant_orIjEEvPT_iPKc + .weak _Z13test_constantIj27custom_multiple_constant_orIjEEvPT_iPKc # -- Begin function _Z13test_constantIj27custom_multiple_constant_orIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj27custom_multiple_constant_orIjEEvPT_iPKc,@function _Z13test_constantIj27custom_multiple_constant_orIjEEvPT_iPKc: # @_Z13test_constantIj27custom_multiple_constant_orIjEEvPT_iPKc @@ -41465,12 +41072,14 @@ _Z13test_constantIj27custom_multiple_constant_orIjEEvPT_iPKc: # @_Z13test_consta ld.w $a0, $s3, %pc_lo12(current_test) .LBB195_19: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI195_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI195_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -41503,12 +41112,8 @@ _Z13test_constantIj27custom_multiple_constant_orIjEEvPT_iPKc: # @_Z13test_consta .size _Z13test_constantIj27custom_multiple_constant_orIjEEvPT_iPKc, .Lfunc_end195-_Z13test_constantIj27custom_multiple_constant_orIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj19custom_constant_xorIjEEvPT_iPKc -.LCPI196_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj19custom_constant_xorIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj19custom_constant_xorIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj19custom_constant_xorIjEEvPT_iPKc + .weak _Z13test_constantIj19custom_constant_xorIjEEvPT_iPKc # -- Begin function _Z13test_constantIj19custom_constant_xorIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj19custom_constant_xorIjEEvPT_iPKc,@function _Z13test_constantIj19custom_constant_xorIjEEvPT_iPKc: # @_Z13test_constantIj19custom_constant_xorIjEEvPT_iPKc @@ -41701,12 +41306,14 @@ _Z13test_constantIj19custom_constant_xorIjEEvPT_iPKc: # @_Z13test_constantIj19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB196_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI196_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI196_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -41739,12 +41346,8 @@ _Z13test_constantIj19custom_constant_xorIjEEvPT_iPKc: # @_Z13test_constantIj19cu .size _Z13test_constantIj19custom_constant_xorIjEEvPT_iPKc, .Lfunc_end196-_Z13test_constantIj19custom_constant_xorIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIj28custom_multiple_constant_xorIjEEvPT_iPKc -.LCPI197_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIj28custom_multiple_constant_xorIjEEvPT_iPKc,"axG",@progbits,_Z13test_constantIj28custom_multiple_constant_xorIjEEvPT_iPKc,comdat - .weak _Z13test_constantIj28custom_multiple_constant_xorIjEEvPT_iPKc + .weak _Z13test_constantIj28custom_multiple_constant_xorIjEEvPT_iPKc # -- Begin function _Z13test_constantIj28custom_multiple_constant_xorIjEEvPT_iPKc .p2align 5 .type _Z13test_constantIj28custom_multiple_constant_xorIjEEvPT_iPKc,@function _Z13test_constantIj28custom_multiple_constant_xorIjEEvPT_iPKc: # @_Z13test_constantIj28custom_multiple_constant_xorIjEEvPT_iPKc @@ -41937,12 +41540,14 @@ _Z13test_constantIj28custom_multiple_constant_xorIjEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB197_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI197_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI197_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -41975,12 +41580,8 @@ _Z13test_constantIj28custom_multiple_constant_xorIjEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIj28custom_multiple_constant_xorIjEEvPT_iPKc, .Lfunc_end197-_Z13test_constantIj28custom_multiple_constant_xorIjEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl10custom_twoIlEEvPT_iPKc -.LCPI198_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl10custom_twoIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl10custom_twoIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl10custom_twoIlEEvPT_iPKc + .weak _Z13test_constantIl10custom_twoIlEEvPT_iPKc # -- Begin function _Z13test_constantIl10custom_twoIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl10custom_twoIlEEvPT_iPKc,@function _Z13test_constantIl10custom_twoIlEEvPT_iPKc: # @_Z13test_constantIl10custom_twoIlEEvPT_iPKc @@ -42082,12 +41683,14 @@ _Z13test_constantIl10custom_twoIlEEvPT_iPKc: # @_Z13test_constantIl10custom_twoI move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB198_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI198_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI198_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -42118,12 +41721,8 @@ _Z13test_constantIl10custom_twoIlEEvPT_iPKc: # @_Z13test_constantIl10custom_twoI .size _Z13test_constantIl10custom_twoIlEEvPT_iPKc, .Lfunc_end198-_Z13test_constantIl10custom_twoIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl20custom_add_constantsIlEEvPT_iPKc -.LCPI199_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl20custom_add_constantsIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl20custom_add_constantsIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl20custom_add_constantsIlEEvPT_iPKc + .weak _Z13test_constantIl20custom_add_constantsIlEEvPT_iPKc # -- Begin function _Z13test_constantIl20custom_add_constantsIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl20custom_add_constantsIlEEvPT_iPKc,@function _Z13test_constantIl20custom_add_constantsIlEEvPT_iPKc: # @_Z13test_constantIl20custom_add_constantsIlEEvPT_iPKc @@ -42225,12 +41824,14 @@ _Z13test_constantIl20custom_add_constantsIlEEvPT_iPKc: # @_Z13test_constantIl20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB199_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI199_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI199_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -42261,12 +41862,8 @@ _Z13test_constantIl20custom_add_constantsIlEEvPT_iPKc: # @_Z13test_constantIl20c .size _Z13test_constantIl20custom_add_constantsIlEEvPT_iPKc, .Lfunc_end199-_Z13test_constantIl20custom_add_constantsIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl20custom_sub_constantsIlEEvPT_iPKc -.LCPI200_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl20custom_sub_constantsIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl20custom_sub_constantsIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl20custom_sub_constantsIlEEvPT_iPKc + .weak _Z13test_constantIl20custom_sub_constantsIlEEvPT_iPKc # -- Begin function _Z13test_constantIl20custom_sub_constantsIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl20custom_sub_constantsIlEEvPT_iPKc,@function _Z13test_constantIl20custom_sub_constantsIlEEvPT_iPKc: # @_Z13test_constantIl20custom_sub_constantsIlEEvPT_iPKc @@ -42368,12 +41965,14 @@ _Z13test_constantIl20custom_sub_constantsIlEEvPT_iPKc: # @_Z13test_constantIl20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB200_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI200_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI200_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -42404,12 +42003,8 @@ _Z13test_constantIl20custom_sub_constantsIlEEvPT_iPKc: # @_Z13test_constantIl20c .size _Z13test_constantIl20custom_sub_constantsIlEEvPT_iPKc, .Lfunc_end200-_Z13test_constantIl20custom_sub_constantsIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl25custom_multiply_constantsIlEEvPT_iPKc -.LCPI201_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl25custom_multiply_constantsIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl25custom_multiply_constantsIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl25custom_multiply_constantsIlEEvPT_iPKc + .weak _Z13test_constantIl25custom_multiply_constantsIlEEvPT_iPKc # -- Begin function _Z13test_constantIl25custom_multiply_constantsIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl25custom_multiply_constantsIlEEvPT_iPKc,@function _Z13test_constantIl25custom_multiply_constantsIlEEvPT_iPKc: # @_Z13test_constantIl25custom_multiply_constantsIlEEvPT_iPKc @@ -42511,12 +42106,14 @@ _Z13test_constantIl25custom_multiply_constantsIlEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB201_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI201_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI201_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -42547,12 +42144,8 @@ _Z13test_constantIl25custom_multiply_constantsIlEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIl25custom_multiply_constantsIlEEvPT_iPKc, .Lfunc_end201-_Z13test_constantIl25custom_multiply_constantsIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl23custom_divide_constantsIlEEvPT_iPKc -.LCPI202_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl23custom_divide_constantsIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl23custom_divide_constantsIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl23custom_divide_constantsIlEEvPT_iPKc + .weak _Z13test_constantIl23custom_divide_constantsIlEEvPT_iPKc # -- Begin function _Z13test_constantIl23custom_divide_constantsIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl23custom_divide_constantsIlEEvPT_iPKc,@function _Z13test_constantIl23custom_divide_constantsIlEEvPT_iPKc: # @_Z13test_constantIl23custom_divide_constantsIlEEvPT_iPKc @@ -42654,12 +42247,14 @@ _Z13test_constantIl23custom_divide_constantsIlEEvPT_iPKc: # @_Z13test_constantIl move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB202_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI202_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI202_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -42690,12 +42285,8 @@ _Z13test_constantIl23custom_divide_constantsIlEEvPT_iPKc: # @_Z13test_constantIl .size _Z13test_constantIl23custom_divide_constantsIlEEvPT_iPKc, .Lfunc_end202-_Z13test_constantIl23custom_divide_constantsIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl20custom_mod_constantsIlEEvPT_iPKc -.LCPI203_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl20custom_mod_constantsIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl20custom_mod_constantsIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl20custom_mod_constantsIlEEvPT_iPKc + .weak _Z13test_constantIl20custom_mod_constantsIlEEvPT_iPKc # -- Begin function _Z13test_constantIl20custom_mod_constantsIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl20custom_mod_constantsIlEEvPT_iPKc,@function _Z13test_constantIl20custom_mod_constantsIlEEvPT_iPKc: # @_Z13test_constantIl20custom_mod_constantsIlEEvPT_iPKc @@ -42797,12 +42388,14 @@ _Z13test_constantIl20custom_mod_constantsIlEEvPT_iPKc: # @_Z13test_constantIl20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB203_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI203_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI203_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -42833,12 +42426,8 @@ _Z13test_constantIl20custom_mod_constantsIlEEvPT_iPKc: # @_Z13test_constantIl20c .size _Z13test_constantIl20custom_mod_constantsIlEEvPT_iPKc, .Lfunc_end203-_Z13test_constantIl20custom_mod_constantsIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl22custom_equal_constantsIlEEvPT_iPKc -.LCPI204_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl22custom_equal_constantsIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl22custom_equal_constantsIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl22custom_equal_constantsIlEEvPT_iPKc + .weak _Z13test_constantIl22custom_equal_constantsIlEEvPT_iPKc # -- Begin function _Z13test_constantIl22custom_equal_constantsIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl22custom_equal_constantsIlEEvPT_iPKc,@function _Z13test_constantIl22custom_equal_constantsIlEEvPT_iPKc: # @_Z13test_constantIl22custom_equal_constantsIlEEvPT_iPKc @@ -42891,12 +42480,14 @@ _Z13test_constantIl22custom_equal_constantsIlEEvPT_iPKc: # @_Z13test_constantIl2 # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB204_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI204_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI204_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -42925,12 +42516,8 @@ _Z13test_constantIl22custom_equal_constantsIlEEvPT_iPKc: # @_Z13test_constantIl2 .size _Z13test_constantIl22custom_equal_constantsIlEEvPT_iPKc, .Lfunc_end204-_Z13test_constantIl22custom_equal_constantsIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl25custom_notequal_constantsIlEEvPT_iPKc -.LCPI205_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl25custom_notequal_constantsIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl25custom_notequal_constantsIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl25custom_notequal_constantsIlEEvPT_iPKc + .weak _Z13test_constantIl25custom_notequal_constantsIlEEvPT_iPKc # -- Begin function _Z13test_constantIl25custom_notequal_constantsIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl25custom_notequal_constantsIlEEvPT_iPKc,@function _Z13test_constantIl25custom_notequal_constantsIlEEvPT_iPKc: # @_Z13test_constantIl25custom_notequal_constantsIlEEvPT_iPKc @@ -43032,12 +42619,14 @@ _Z13test_constantIl25custom_notequal_constantsIlEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB205_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI205_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI205_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -43068,12 +42657,8 @@ _Z13test_constantIl25custom_notequal_constantsIlEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIl25custom_notequal_constantsIlEEvPT_iPKc, .Lfunc_end205-_Z13test_constantIl25custom_notequal_constantsIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl28custom_greaterthan_constantsIlEEvPT_iPKc -.LCPI206_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl28custom_greaterthan_constantsIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl28custom_greaterthan_constantsIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl28custom_greaterthan_constantsIlEEvPT_iPKc + .weak _Z13test_constantIl28custom_greaterthan_constantsIlEEvPT_iPKc # -- Begin function _Z13test_constantIl28custom_greaterthan_constantsIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl28custom_greaterthan_constantsIlEEvPT_iPKc,@function _Z13test_constantIl28custom_greaterthan_constantsIlEEvPT_iPKc: # @_Z13test_constantIl28custom_greaterthan_constantsIlEEvPT_iPKc @@ -43175,12 +42760,14 @@ _Z13test_constantIl28custom_greaterthan_constantsIlEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB206_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI206_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI206_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -43211,12 +42798,8 @@ _Z13test_constantIl28custom_greaterthan_constantsIlEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIl28custom_greaterthan_constantsIlEEvPT_iPKc, .Lfunc_end206-_Z13test_constantIl28custom_greaterthan_constantsIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl25custom_lessthan_constantsIlEEvPT_iPKc -.LCPI207_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl25custom_lessthan_constantsIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl25custom_lessthan_constantsIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl25custom_lessthan_constantsIlEEvPT_iPKc + .weak _Z13test_constantIl25custom_lessthan_constantsIlEEvPT_iPKc # -- Begin function _Z13test_constantIl25custom_lessthan_constantsIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl25custom_lessthan_constantsIlEEvPT_iPKc,@function _Z13test_constantIl25custom_lessthan_constantsIlEEvPT_iPKc: # @_Z13test_constantIl25custom_lessthan_constantsIlEEvPT_iPKc @@ -43269,12 +42852,14 @@ _Z13test_constantIl25custom_lessthan_constantsIlEEvPT_iPKc: # @_Z13test_constant # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB207_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI207_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI207_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -43303,12 +42888,8 @@ _Z13test_constantIl25custom_lessthan_constantsIlEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIl25custom_lessthan_constantsIlEEvPT_iPKc, .Lfunc_end207-_Z13test_constantIl25custom_lessthan_constantsIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl33custom_greaterthanequal_constantsIlEEvPT_iPKc -.LCPI208_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl33custom_greaterthanequal_constantsIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl33custom_greaterthanequal_constantsIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl33custom_greaterthanequal_constantsIlEEvPT_iPKc + .weak _Z13test_constantIl33custom_greaterthanequal_constantsIlEEvPT_iPKc # -- Begin function _Z13test_constantIl33custom_greaterthanequal_constantsIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl33custom_greaterthanequal_constantsIlEEvPT_iPKc,@function _Z13test_constantIl33custom_greaterthanequal_constantsIlEEvPT_iPKc: # @_Z13test_constantIl33custom_greaterthanequal_constantsIlEEvPT_iPKc @@ -43410,12 +42991,14 @@ _Z13test_constantIl33custom_greaterthanequal_constantsIlEEvPT_iPKc: # @_Z13test_ move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB208_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI208_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI208_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -43446,12 +43029,8 @@ _Z13test_constantIl33custom_greaterthanequal_constantsIlEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIl33custom_greaterthanequal_constantsIlEEvPT_iPKc, .Lfunc_end208-_Z13test_constantIl33custom_greaterthanequal_constantsIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl30custom_lessthanequal_constantsIlEEvPT_iPKc -.LCPI209_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl30custom_lessthanequal_constantsIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl30custom_lessthanequal_constantsIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl30custom_lessthanequal_constantsIlEEvPT_iPKc + .weak _Z13test_constantIl30custom_lessthanequal_constantsIlEEvPT_iPKc # -- Begin function _Z13test_constantIl30custom_lessthanequal_constantsIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl30custom_lessthanequal_constantsIlEEvPT_iPKc,@function _Z13test_constantIl30custom_lessthanequal_constantsIlEEvPT_iPKc: # @_Z13test_constantIl30custom_lessthanequal_constantsIlEEvPT_iPKc @@ -43504,12 +43083,14 @@ _Z13test_constantIl30custom_lessthanequal_constantsIlEEvPT_iPKc: # @_Z13test_con # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB209_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI209_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI209_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -43538,12 +43119,8 @@ _Z13test_constantIl30custom_lessthanequal_constantsIlEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIl30custom_lessthanequal_constantsIlEEvPT_iPKc, .Lfunc_end209-_Z13test_constantIl30custom_lessthanequal_constantsIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl20custom_and_constantsIlEEvPT_iPKc -.LCPI210_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl20custom_and_constantsIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl20custom_and_constantsIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl20custom_and_constantsIlEEvPT_iPKc + .weak _Z13test_constantIl20custom_and_constantsIlEEvPT_iPKc # -- Begin function _Z13test_constantIl20custom_and_constantsIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl20custom_and_constantsIlEEvPT_iPKc,@function _Z13test_constantIl20custom_and_constantsIlEEvPT_iPKc: # @_Z13test_constantIl20custom_and_constantsIlEEvPT_iPKc @@ -43645,12 +43222,14 @@ _Z13test_constantIl20custom_and_constantsIlEEvPT_iPKc: # @_Z13test_constantIl20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB210_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI210_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI210_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -43681,12 +43260,8 @@ _Z13test_constantIl20custom_and_constantsIlEEvPT_iPKc: # @_Z13test_constantIl20c .size _Z13test_constantIl20custom_and_constantsIlEEvPT_iPKc, .Lfunc_end210-_Z13test_constantIl20custom_and_constantsIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl19custom_or_constantsIlEEvPT_iPKc -.LCPI211_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl19custom_or_constantsIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl19custom_or_constantsIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl19custom_or_constantsIlEEvPT_iPKc + .weak _Z13test_constantIl19custom_or_constantsIlEEvPT_iPKc # -- Begin function _Z13test_constantIl19custom_or_constantsIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl19custom_or_constantsIlEEvPT_iPKc,@function _Z13test_constantIl19custom_or_constantsIlEEvPT_iPKc: # @_Z13test_constantIl19custom_or_constantsIlEEvPT_iPKc @@ -43788,12 +43363,14 @@ _Z13test_constantIl19custom_or_constantsIlEEvPT_iPKc: # @_Z13test_constantIl19cu move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB211_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI211_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI211_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -43824,12 +43401,8 @@ _Z13test_constantIl19custom_or_constantsIlEEvPT_iPKc: # @_Z13test_constantIl19cu .size _Z13test_constantIl19custom_or_constantsIlEEvPT_iPKc, .Lfunc_end211-_Z13test_constantIl19custom_or_constantsIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl20custom_xor_constantsIlEEvPT_iPKc -.LCPI212_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl20custom_xor_constantsIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl20custom_xor_constantsIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl20custom_xor_constantsIlEEvPT_iPKc + .weak _Z13test_constantIl20custom_xor_constantsIlEEvPT_iPKc # -- Begin function _Z13test_constantIl20custom_xor_constantsIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl20custom_xor_constantsIlEEvPT_iPKc,@function _Z13test_constantIl20custom_xor_constantsIlEEvPT_iPKc: # @_Z13test_constantIl20custom_xor_constantsIlEEvPT_iPKc @@ -43931,12 +43504,14 @@ _Z13test_constantIl20custom_xor_constantsIlEEvPT_iPKc: # @_Z13test_constantIl20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB212_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI212_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI212_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -43967,12 +43542,8 @@ _Z13test_constantIl20custom_xor_constantsIlEEvPT_iPKc: # @_Z13test_constantIl20c .size _Z13test_constantIl20custom_xor_constantsIlEEvPT_iPKc, .Lfunc_end212-_Z13test_constantIl20custom_xor_constantsIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl19custom_constant_addIlEEvPT_iPKc -.LCPI213_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl19custom_constant_addIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl19custom_constant_addIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl19custom_constant_addIlEEvPT_iPKc + .weak _Z13test_constantIl19custom_constant_addIlEEvPT_iPKc # -- Begin function _Z13test_constantIl19custom_constant_addIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl19custom_constant_addIlEEvPT_iPKc,@function _Z13test_constantIl19custom_constant_addIlEEvPT_iPKc: # @_Z13test_constantIl19custom_constant_addIlEEvPT_iPKc @@ -44164,12 +43735,14 @@ _Z13test_constantIl19custom_constant_addIlEEvPT_iPKc: # @_Z13test_constantIl19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB213_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI213_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI213_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -44202,12 +43775,8 @@ _Z13test_constantIl19custom_constant_addIlEEvPT_iPKc: # @_Z13test_constantIl19cu .size _Z13test_constantIl19custom_constant_addIlEEvPT_iPKc, .Lfunc_end213-_Z13test_constantIl19custom_constant_addIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl28custom_multiple_constant_addIlEEvPT_iPKc -.LCPI214_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl28custom_multiple_constant_addIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl28custom_multiple_constant_addIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl28custom_multiple_constant_addIlEEvPT_iPKc + .weak _Z13test_constantIl28custom_multiple_constant_addIlEEvPT_iPKc # -- Begin function _Z13test_constantIl28custom_multiple_constant_addIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl28custom_multiple_constant_addIlEEvPT_iPKc,@function _Z13test_constantIl28custom_multiple_constant_addIlEEvPT_iPKc: # @_Z13test_constantIl28custom_multiple_constant_addIlEEvPT_iPKc @@ -44399,12 +43968,14 @@ _Z13test_constantIl28custom_multiple_constant_addIlEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB214_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI214_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI214_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -44437,12 +44008,8 @@ _Z13test_constantIl28custom_multiple_constant_addIlEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIl28custom_multiple_constant_addIlEEvPT_iPKc, .Lfunc_end214-_Z13test_constantIl28custom_multiple_constant_addIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl19custom_constant_subIlEEvPT_iPKc -.LCPI215_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl19custom_constant_subIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl19custom_constant_subIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl19custom_constant_subIlEEvPT_iPKc + .weak _Z13test_constantIl19custom_constant_subIlEEvPT_iPKc # -- Begin function _Z13test_constantIl19custom_constant_subIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl19custom_constant_subIlEEvPT_iPKc,@function _Z13test_constantIl19custom_constant_subIlEEvPT_iPKc: # @_Z13test_constantIl19custom_constant_subIlEEvPT_iPKc @@ -44637,12 +44204,14 @@ _Z13test_constantIl19custom_constant_subIlEEvPT_iPKc: # @_Z13test_constantIl19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB215_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI215_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI215_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -44675,12 +44244,8 @@ _Z13test_constantIl19custom_constant_subIlEEvPT_iPKc: # @_Z13test_constantIl19cu .size _Z13test_constantIl19custom_constant_subIlEEvPT_iPKc, .Lfunc_end215-_Z13test_constantIl19custom_constant_subIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl28custom_multiple_constant_subIlEEvPT_iPKc -.LCPI216_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl28custom_multiple_constant_subIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl28custom_multiple_constant_subIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl28custom_multiple_constant_subIlEEvPT_iPKc + .weak _Z13test_constantIl28custom_multiple_constant_subIlEEvPT_iPKc # -- Begin function _Z13test_constantIl28custom_multiple_constant_subIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl28custom_multiple_constant_subIlEEvPT_iPKc,@function _Z13test_constantIl28custom_multiple_constant_subIlEEvPT_iPKc: # @_Z13test_constantIl28custom_multiple_constant_subIlEEvPT_iPKc @@ -44875,12 +44440,14 @@ _Z13test_constantIl28custom_multiple_constant_subIlEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB216_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI216_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI216_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -44913,12 +44480,8 @@ _Z13test_constantIl28custom_multiple_constant_subIlEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIl28custom_multiple_constant_subIlEEvPT_iPKc, .Lfunc_end216-_Z13test_constantIl28custom_multiple_constant_subIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl24custom_constant_multiplyIlEEvPT_iPKc -.LCPI217_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl24custom_constant_multiplyIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl24custom_constant_multiplyIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl24custom_constant_multiplyIlEEvPT_iPKc + .weak _Z13test_constantIl24custom_constant_multiplyIlEEvPT_iPKc # -- Begin function _Z13test_constantIl24custom_constant_multiplyIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl24custom_constant_multiplyIlEEvPT_iPKc,@function _Z13test_constantIl24custom_constant_multiplyIlEEvPT_iPKc: # @_Z13test_constantIl24custom_constant_multiplyIlEEvPT_iPKc @@ -45104,12 +44667,14 @@ _Z13test_constantIl24custom_constant_multiplyIlEEvPT_iPKc: # @_Z13test_constantI ld.w $a0, $s3, %pc_lo12(current_test) .LBB217_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI217_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI217_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -45142,12 +44707,8 @@ _Z13test_constantIl24custom_constant_multiplyIlEEvPT_iPKc: # @_Z13test_constantI .size _Z13test_constantIl24custom_constant_multiplyIlEEvPT_iPKc, .Lfunc_end217-_Z13test_constantIl24custom_constant_multiplyIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl33custom_multiple_constant_multiplyIlEEvPT_iPKc -.LCPI218_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl33custom_multiple_constant_multiplyIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl33custom_multiple_constant_multiplyIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl33custom_multiple_constant_multiplyIlEEvPT_iPKc + .weak _Z13test_constantIl33custom_multiple_constant_multiplyIlEEvPT_iPKc # -- Begin function _Z13test_constantIl33custom_multiple_constant_multiplyIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl33custom_multiple_constant_multiplyIlEEvPT_iPKc,@function _Z13test_constantIl33custom_multiple_constant_multiplyIlEEvPT_iPKc: # @_Z13test_constantIl33custom_multiple_constant_multiplyIlEEvPT_iPKc @@ -45333,12 +44894,14 @@ _Z13test_constantIl33custom_multiple_constant_multiplyIlEEvPT_iPKc: # @_Z13test_ ld.w $a0, $s3, %pc_lo12(current_test) .LBB218_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI218_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI218_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -45371,12 +44934,8 @@ _Z13test_constantIl33custom_multiple_constant_multiplyIlEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIl33custom_multiple_constant_multiplyIlEEvPT_iPKc, .Lfunc_end218-_Z13test_constantIl33custom_multiple_constant_multiplyIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl34custom_multiple_constant_multiply2IlEEvPT_iPKc -.LCPI219_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl34custom_multiple_constant_multiply2IlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl34custom_multiple_constant_multiply2IlEEvPT_iPKc,comdat - .weak _Z13test_constantIl34custom_multiple_constant_multiply2IlEEvPT_iPKc + .weak _Z13test_constantIl34custom_multiple_constant_multiply2IlEEvPT_iPKc # -- Begin function _Z13test_constantIl34custom_multiple_constant_multiply2IlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl34custom_multiple_constant_multiply2IlEEvPT_iPKc,@function _Z13test_constantIl34custom_multiple_constant_multiply2IlEEvPT_iPKc: # @_Z13test_constantIl34custom_multiple_constant_multiply2IlEEvPT_iPKc @@ -45571,12 +45130,14 @@ _Z13test_constantIl34custom_multiple_constant_multiply2IlEEvPT_iPKc: # @_Z13test ld.w $a0, $s3, %pc_lo12(current_test) .LBB219_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI219_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI219_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -45609,12 +45170,8 @@ _Z13test_constantIl34custom_multiple_constant_multiply2IlEEvPT_iPKc: # @_Z13test .size _Z13test_constantIl34custom_multiple_constant_multiply2IlEEvPT_iPKc, .Lfunc_end219-_Z13test_constantIl34custom_multiple_constant_multiply2IlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl22custom_constant_divideIlEEvPT_iPKc -.LCPI220_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl22custom_constant_divideIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl22custom_constant_divideIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl22custom_constant_divideIlEEvPT_iPKc + .weak _Z13test_constantIl22custom_constant_divideIlEEvPT_iPKc # -- Begin function _Z13test_constantIl22custom_constant_divideIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl22custom_constant_divideIlEEvPT_iPKc,@function _Z13test_constantIl22custom_constant_divideIlEEvPT_iPKc: # @_Z13test_constantIl22custom_constant_divideIlEEvPT_iPKc @@ -45820,12 +45377,14 @@ _Z13test_constantIl22custom_constant_divideIlEEvPT_iPKc: # @_Z13test_constantIl2 ld.w $a0, $s3, %pc_lo12(current_test) .LBB220_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI220_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI220_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -45858,12 +45417,8 @@ _Z13test_constantIl22custom_constant_divideIlEEvPT_iPKc: # @_Z13test_constantIl2 .size _Z13test_constantIl22custom_constant_divideIlEEvPT_iPKc, .Lfunc_end220-_Z13test_constantIl22custom_constant_divideIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl31custom_multiple_constant_divideIlEEvPT_iPKc -.LCPI221_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl31custom_multiple_constant_divideIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl31custom_multiple_constant_divideIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl31custom_multiple_constant_divideIlEEvPT_iPKc + .weak _Z13test_constantIl31custom_multiple_constant_divideIlEEvPT_iPKc # -- Begin function _Z13test_constantIl31custom_multiple_constant_divideIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl31custom_multiple_constant_divideIlEEvPT_iPKc,@function _Z13test_constantIl31custom_multiple_constant_divideIlEEvPT_iPKc: # @_Z13test_constantIl31custom_multiple_constant_divideIlEEvPT_iPKc @@ -46073,12 +45628,14 @@ _Z13test_constantIl31custom_multiple_constant_divideIlEEvPT_iPKc: # @_Z13test_co ld.w $a0, $s3, %pc_lo12(current_test) .LBB221_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI221_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI221_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -46111,12 +45668,8 @@ _Z13test_constantIl31custom_multiple_constant_divideIlEEvPT_iPKc: # @_Z13test_co .size _Z13test_constantIl31custom_multiple_constant_divideIlEEvPT_iPKc, .Lfunc_end221-_Z13test_constantIl31custom_multiple_constant_divideIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl32custom_multiple_constant_divide2IlEEvPT_iPKc -.LCPI222_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl32custom_multiple_constant_divide2IlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl32custom_multiple_constant_divide2IlEEvPT_iPKc,comdat - .weak _Z13test_constantIl32custom_multiple_constant_divide2IlEEvPT_iPKc + .weak _Z13test_constantIl32custom_multiple_constant_divide2IlEEvPT_iPKc # -- Begin function _Z13test_constantIl32custom_multiple_constant_divide2IlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl32custom_multiple_constant_divide2IlEEvPT_iPKc,@function _Z13test_constantIl32custom_multiple_constant_divide2IlEEvPT_iPKc: # @_Z13test_constantIl32custom_multiple_constant_divide2IlEEvPT_iPKc @@ -46308,12 +45861,14 @@ _Z13test_constantIl32custom_multiple_constant_divide2IlEEvPT_iPKc: # @_Z13test_c ld.w $a0, $s3, %pc_lo12(current_test) .LBB222_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI222_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI222_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -46346,12 +45901,8 @@ _Z13test_constantIl32custom_multiple_constant_divide2IlEEvPT_iPKc: # @_Z13test_c .size _Z13test_constantIl32custom_multiple_constant_divide2IlEEvPT_iPKc, .Lfunc_end222-_Z13test_constantIl32custom_multiple_constant_divide2IlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl30custom_multiple_constant_mixedIlEEvPT_iPKc -.LCPI223_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl30custom_multiple_constant_mixedIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl30custom_multiple_constant_mixedIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl30custom_multiple_constant_mixedIlEEvPT_iPKc + .weak _Z13test_constantIl30custom_multiple_constant_mixedIlEEvPT_iPKc # -- Begin function _Z13test_constantIl30custom_multiple_constant_mixedIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl30custom_multiple_constant_mixedIlEEvPT_iPKc,@function _Z13test_constantIl30custom_multiple_constant_mixedIlEEvPT_iPKc: # @_Z13test_constantIl30custom_multiple_constant_mixedIlEEvPT_iPKc @@ -46531,12 +46082,14 @@ _Z13test_constantIl30custom_multiple_constant_mixedIlEEvPT_iPKc: # @_Z13test_con ld.w $a0, $s3, %pc_lo12(current_test) .LBB223_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI223_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI223_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -46569,12 +46122,8 @@ _Z13test_constantIl30custom_multiple_constant_mixedIlEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIl30custom_multiple_constant_mixedIlEEvPT_iPKc, .Lfunc_end223-_Z13test_constantIl30custom_multiple_constant_mixedIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl19custom_constant_andIlEEvPT_iPKc -.LCPI224_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl19custom_constant_andIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl19custom_constant_andIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl19custom_constant_andIlEEvPT_iPKc + .weak _Z13test_constantIl19custom_constant_andIlEEvPT_iPKc # -- Begin function _Z13test_constantIl19custom_constant_andIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl19custom_constant_andIlEEvPT_iPKc,@function _Z13test_constantIl19custom_constant_andIlEEvPT_iPKc: # @_Z13test_constantIl19custom_constant_andIlEEvPT_iPKc @@ -46762,12 +46311,14 @@ _Z13test_constantIl19custom_constant_andIlEEvPT_iPKc: # @_Z13test_constantIl19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB224_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI224_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI224_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -46800,12 +46351,8 @@ _Z13test_constantIl19custom_constant_andIlEEvPT_iPKc: # @_Z13test_constantIl19cu .size _Z13test_constantIl19custom_constant_andIlEEvPT_iPKc, .Lfunc_end224-_Z13test_constantIl19custom_constant_andIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl28custom_multiple_constant_andIlEEvPT_iPKc -.LCPI225_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl28custom_multiple_constant_andIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl28custom_multiple_constant_andIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl28custom_multiple_constant_andIlEEvPT_iPKc + .weak _Z13test_constantIl28custom_multiple_constant_andIlEEvPT_iPKc # -- Begin function _Z13test_constantIl28custom_multiple_constant_andIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl28custom_multiple_constant_andIlEEvPT_iPKc,@function _Z13test_constantIl28custom_multiple_constant_andIlEEvPT_iPKc: # @_Z13test_constantIl28custom_multiple_constant_andIlEEvPT_iPKc @@ -46993,12 +46540,14 @@ _Z13test_constantIl28custom_multiple_constant_andIlEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB225_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI225_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI225_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -47031,12 +46580,8 @@ _Z13test_constantIl28custom_multiple_constant_andIlEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIl28custom_multiple_constant_andIlEEvPT_iPKc, .Lfunc_end225-_Z13test_constantIl28custom_multiple_constant_andIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl18custom_constant_orIlEEvPT_iPKc -.LCPI226_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl18custom_constant_orIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl18custom_constant_orIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl18custom_constant_orIlEEvPT_iPKc + .weak _Z13test_constantIl18custom_constant_orIlEEvPT_iPKc # -- Begin function _Z13test_constantIl18custom_constant_orIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl18custom_constant_orIlEEvPT_iPKc,@function _Z13test_constantIl18custom_constant_orIlEEvPT_iPKc: # @_Z13test_constantIl18custom_constant_orIlEEvPT_iPKc @@ -47212,12 +46757,14 @@ _Z13test_constantIl18custom_constant_orIlEEvPT_iPKc: # @_Z13test_constantIl18cus ld.w $a0, $s3, %pc_lo12(current_test) .LBB226_19: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI226_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI226_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -47250,12 +46797,8 @@ _Z13test_constantIl18custom_constant_orIlEEvPT_iPKc: # @_Z13test_constantIl18cus .size _Z13test_constantIl18custom_constant_orIlEEvPT_iPKc, .Lfunc_end226-_Z13test_constantIl18custom_constant_orIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl27custom_multiple_constant_orIlEEvPT_iPKc -.LCPI227_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl27custom_multiple_constant_orIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl27custom_multiple_constant_orIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl27custom_multiple_constant_orIlEEvPT_iPKc + .weak _Z13test_constantIl27custom_multiple_constant_orIlEEvPT_iPKc # -- Begin function _Z13test_constantIl27custom_multiple_constant_orIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl27custom_multiple_constant_orIlEEvPT_iPKc,@function _Z13test_constantIl27custom_multiple_constant_orIlEEvPT_iPKc: # @_Z13test_constantIl27custom_multiple_constant_orIlEEvPT_iPKc @@ -47431,12 +46974,14 @@ _Z13test_constantIl27custom_multiple_constant_orIlEEvPT_iPKc: # @_Z13test_consta ld.w $a0, $s3, %pc_lo12(current_test) .LBB227_19: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI227_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI227_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -47469,12 +47014,8 @@ _Z13test_constantIl27custom_multiple_constant_orIlEEvPT_iPKc: # @_Z13test_consta .size _Z13test_constantIl27custom_multiple_constant_orIlEEvPT_iPKc, .Lfunc_end227-_Z13test_constantIl27custom_multiple_constant_orIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl19custom_constant_xorIlEEvPT_iPKc -.LCPI228_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl19custom_constant_xorIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl19custom_constant_xorIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl19custom_constant_xorIlEEvPT_iPKc + .weak _Z13test_constantIl19custom_constant_xorIlEEvPT_iPKc # -- Begin function _Z13test_constantIl19custom_constant_xorIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl19custom_constant_xorIlEEvPT_iPKc,@function _Z13test_constantIl19custom_constant_xorIlEEvPT_iPKc: # @_Z13test_constantIl19custom_constant_xorIlEEvPT_iPKc @@ -47662,12 +47203,14 @@ _Z13test_constantIl19custom_constant_xorIlEEvPT_iPKc: # @_Z13test_constantIl19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB228_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI228_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI228_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -47700,12 +47243,8 @@ _Z13test_constantIl19custom_constant_xorIlEEvPT_iPKc: # @_Z13test_constantIl19cu .size _Z13test_constantIl19custom_constant_xorIlEEvPT_iPKc, .Lfunc_end228-_Z13test_constantIl19custom_constant_xorIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIl28custom_multiple_constant_xorIlEEvPT_iPKc -.LCPI229_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIl28custom_multiple_constant_xorIlEEvPT_iPKc,"axG",@progbits,_Z13test_constantIl28custom_multiple_constant_xorIlEEvPT_iPKc,comdat - .weak _Z13test_constantIl28custom_multiple_constant_xorIlEEvPT_iPKc + .weak _Z13test_constantIl28custom_multiple_constant_xorIlEEvPT_iPKc # -- Begin function _Z13test_constantIl28custom_multiple_constant_xorIlEEvPT_iPKc .p2align 5 .type _Z13test_constantIl28custom_multiple_constant_xorIlEEvPT_iPKc,@function _Z13test_constantIl28custom_multiple_constant_xorIlEEvPT_iPKc: # @_Z13test_constantIl28custom_multiple_constant_xorIlEEvPT_iPKc @@ -47893,12 +47432,14 @@ _Z13test_constantIl28custom_multiple_constant_xorIlEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB229_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI229_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI229_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -47931,12 +47472,8 @@ _Z13test_constantIl28custom_multiple_constant_xorIlEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIl28custom_multiple_constant_xorIlEEvPT_iPKc, .Lfunc_end229-_Z13test_constantIl28custom_multiple_constant_xorIlEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm10custom_twoImEEvPT_iPKc -.LCPI230_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm10custom_twoImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm10custom_twoImEEvPT_iPKc,comdat - .weak _Z13test_constantIm10custom_twoImEEvPT_iPKc + .weak _Z13test_constantIm10custom_twoImEEvPT_iPKc # -- Begin function _Z13test_constantIm10custom_twoImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm10custom_twoImEEvPT_iPKc,@function _Z13test_constantIm10custom_twoImEEvPT_iPKc: # @_Z13test_constantIm10custom_twoImEEvPT_iPKc @@ -48038,12 +47575,14 @@ _Z13test_constantIm10custom_twoImEEvPT_iPKc: # @_Z13test_constantIm10custom_twoI move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB230_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI230_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI230_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -48074,12 +47613,8 @@ _Z13test_constantIm10custom_twoImEEvPT_iPKc: # @_Z13test_constantIm10custom_twoI .size _Z13test_constantIm10custom_twoImEEvPT_iPKc, .Lfunc_end230-_Z13test_constantIm10custom_twoImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm20custom_add_constantsImEEvPT_iPKc -.LCPI231_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm20custom_add_constantsImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm20custom_add_constantsImEEvPT_iPKc,comdat - .weak _Z13test_constantIm20custom_add_constantsImEEvPT_iPKc + .weak _Z13test_constantIm20custom_add_constantsImEEvPT_iPKc # -- Begin function _Z13test_constantIm20custom_add_constantsImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm20custom_add_constantsImEEvPT_iPKc,@function _Z13test_constantIm20custom_add_constantsImEEvPT_iPKc: # @_Z13test_constantIm20custom_add_constantsImEEvPT_iPKc @@ -48181,12 +47716,14 @@ _Z13test_constantIm20custom_add_constantsImEEvPT_iPKc: # @_Z13test_constantIm20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB231_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI231_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI231_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -48217,12 +47754,8 @@ _Z13test_constantIm20custom_add_constantsImEEvPT_iPKc: # @_Z13test_constantIm20c .size _Z13test_constantIm20custom_add_constantsImEEvPT_iPKc, .Lfunc_end231-_Z13test_constantIm20custom_add_constantsImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm20custom_sub_constantsImEEvPT_iPKc -.LCPI232_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm20custom_sub_constantsImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm20custom_sub_constantsImEEvPT_iPKc,comdat - .weak _Z13test_constantIm20custom_sub_constantsImEEvPT_iPKc + .weak _Z13test_constantIm20custom_sub_constantsImEEvPT_iPKc # -- Begin function _Z13test_constantIm20custom_sub_constantsImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm20custom_sub_constantsImEEvPT_iPKc,@function _Z13test_constantIm20custom_sub_constantsImEEvPT_iPKc: # @_Z13test_constantIm20custom_sub_constantsImEEvPT_iPKc @@ -48324,12 +47857,14 @@ _Z13test_constantIm20custom_sub_constantsImEEvPT_iPKc: # @_Z13test_constantIm20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB232_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI232_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI232_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -48360,12 +47895,8 @@ _Z13test_constantIm20custom_sub_constantsImEEvPT_iPKc: # @_Z13test_constantIm20c .size _Z13test_constantIm20custom_sub_constantsImEEvPT_iPKc, .Lfunc_end232-_Z13test_constantIm20custom_sub_constantsImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm25custom_multiply_constantsImEEvPT_iPKc -.LCPI233_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm25custom_multiply_constantsImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm25custom_multiply_constantsImEEvPT_iPKc,comdat - .weak _Z13test_constantIm25custom_multiply_constantsImEEvPT_iPKc + .weak _Z13test_constantIm25custom_multiply_constantsImEEvPT_iPKc # -- Begin function _Z13test_constantIm25custom_multiply_constantsImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm25custom_multiply_constantsImEEvPT_iPKc,@function _Z13test_constantIm25custom_multiply_constantsImEEvPT_iPKc: # @_Z13test_constantIm25custom_multiply_constantsImEEvPT_iPKc @@ -48467,12 +47998,14 @@ _Z13test_constantIm25custom_multiply_constantsImEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB233_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI233_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI233_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -48503,12 +48036,8 @@ _Z13test_constantIm25custom_multiply_constantsImEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIm25custom_multiply_constantsImEEvPT_iPKc, .Lfunc_end233-_Z13test_constantIm25custom_multiply_constantsImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm23custom_divide_constantsImEEvPT_iPKc -.LCPI234_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm23custom_divide_constantsImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm23custom_divide_constantsImEEvPT_iPKc,comdat - .weak _Z13test_constantIm23custom_divide_constantsImEEvPT_iPKc + .weak _Z13test_constantIm23custom_divide_constantsImEEvPT_iPKc # -- Begin function _Z13test_constantIm23custom_divide_constantsImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm23custom_divide_constantsImEEvPT_iPKc,@function _Z13test_constantIm23custom_divide_constantsImEEvPT_iPKc: # @_Z13test_constantIm23custom_divide_constantsImEEvPT_iPKc @@ -48610,12 +48139,14 @@ _Z13test_constantIm23custom_divide_constantsImEEvPT_iPKc: # @_Z13test_constantIm move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB234_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI234_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI234_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -48646,12 +48177,8 @@ _Z13test_constantIm23custom_divide_constantsImEEvPT_iPKc: # @_Z13test_constantIm .size _Z13test_constantIm23custom_divide_constantsImEEvPT_iPKc, .Lfunc_end234-_Z13test_constantIm23custom_divide_constantsImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm20custom_mod_constantsImEEvPT_iPKc -.LCPI235_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm20custom_mod_constantsImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm20custom_mod_constantsImEEvPT_iPKc,comdat - .weak _Z13test_constantIm20custom_mod_constantsImEEvPT_iPKc + .weak _Z13test_constantIm20custom_mod_constantsImEEvPT_iPKc # -- Begin function _Z13test_constantIm20custom_mod_constantsImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm20custom_mod_constantsImEEvPT_iPKc,@function _Z13test_constantIm20custom_mod_constantsImEEvPT_iPKc: # @_Z13test_constantIm20custom_mod_constantsImEEvPT_iPKc @@ -48753,12 +48280,14 @@ _Z13test_constantIm20custom_mod_constantsImEEvPT_iPKc: # @_Z13test_constantIm20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB235_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI235_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI235_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -48789,12 +48318,8 @@ _Z13test_constantIm20custom_mod_constantsImEEvPT_iPKc: # @_Z13test_constantIm20c .size _Z13test_constantIm20custom_mod_constantsImEEvPT_iPKc, .Lfunc_end235-_Z13test_constantIm20custom_mod_constantsImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm22custom_equal_constantsImEEvPT_iPKc -.LCPI236_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm22custom_equal_constantsImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm22custom_equal_constantsImEEvPT_iPKc,comdat - .weak _Z13test_constantIm22custom_equal_constantsImEEvPT_iPKc + .weak _Z13test_constantIm22custom_equal_constantsImEEvPT_iPKc # -- Begin function _Z13test_constantIm22custom_equal_constantsImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm22custom_equal_constantsImEEvPT_iPKc,@function _Z13test_constantIm22custom_equal_constantsImEEvPT_iPKc: # @_Z13test_constantIm22custom_equal_constantsImEEvPT_iPKc @@ -48847,12 +48372,14 @@ _Z13test_constantIm22custom_equal_constantsImEEvPT_iPKc: # @_Z13test_constantIm2 # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB236_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI236_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI236_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -48881,12 +48408,8 @@ _Z13test_constantIm22custom_equal_constantsImEEvPT_iPKc: # @_Z13test_constantIm2 .size _Z13test_constantIm22custom_equal_constantsImEEvPT_iPKc, .Lfunc_end236-_Z13test_constantIm22custom_equal_constantsImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm25custom_notequal_constantsImEEvPT_iPKc -.LCPI237_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm25custom_notequal_constantsImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm25custom_notequal_constantsImEEvPT_iPKc,comdat - .weak _Z13test_constantIm25custom_notequal_constantsImEEvPT_iPKc + .weak _Z13test_constantIm25custom_notequal_constantsImEEvPT_iPKc # -- Begin function _Z13test_constantIm25custom_notequal_constantsImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm25custom_notequal_constantsImEEvPT_iPKc,@function _Z13test_constantIm25custom_notequal_constantsImEEvPT_iPKc: # @_Z13test_constantIm25custom_notequal_constantsImEEvPT_iPKc @@ -48988,12 +48511,14 @@ _Z13test_constantIm25custom_notequal_constantsImEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB237_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI237_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI237_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -49024,12 +48549,8 @@ _Z13test_constantIm25custom_notequal_constantsImEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIm25custom_notequal_constantsImEEvPT_iPKc, .Lfunc_end237-_Z13test_constantIm25custom_notequal_constantsImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm28custom_greaterthan_constantsImEEvPT_iPKc -.LCPI238_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm28custom_greaterthan_constantsImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm28custom_greaterthan_constantsImEEvPT_iPKc,comdat - .weak _Z13test_constantIm28custom_greaterthan_constantsImEEvPT_iPKc + .weak _Z13test_constantIm28custom_greaterthan_constantsImEEvPT_iPKc # -- Begin function _Z13test_constantIm28custom_greaterthan_constantsImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm28custom_greaterthan_constantsImEEvPT_iPKc,@function _Z13test_constantIm28custom_greaterthan_constantsImEEvPT_iPKc: # @_Z13test_constantIm28custom_greaterthan_constantsImEEvPT_iPKc @@ -49131,12 +48652,14 @@ _Z13test_constantIm28custom_greaterthan_constantsImEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB238_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI238_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI238_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -49167,12 +48690,8 @@ _Z13test_constantIm28custom_greaterthan_constantsImEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIm28custom_greaterthan_constantsImEEvPT_iPKc, .Lfunc_end238-_Z13test_constantIm28custom_greaterthan_constantsImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm25custom_lessthan_constantsImEEvPT_iPKc -.LCPI239_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm25custom_lessthan_constantsImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm25custom_lessthan_constantsImEEvPT_iPKc,comdat - .weak _Z13test_constantIm25custom_lessthan_constantsImEEvPT_iPKc + .weak _Z13test_constantIm25custom_lessthan_constantsImEEvPT_iPKc # -- Begin function _Z13test_constantIm25custom_lessthan_constantsImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm25custom_lessthan_constantsImEEvPT_iPKc,@function _Z13test_constantIm25custom_lessthan_constantsImEEvPT_iPKc: # @_Z13test_constantIm25custom_lessthan_constantsImEEvPT_iPKc @@ -49225,12 +48744,14 @@ _Z13test_constantIm25custom_lessthan_constantsImEEvPT_iPKc: # @_Z13test_constant # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB239_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI239_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI239_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -49259,12 +48780,8 @@ _Z13test_constantIm25custom_lessthan_constantsImEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIm25custom_lessthan_constantsImEEvPT_iPKc, .Lfunc_end239-_Z13test_constantIm25custom_lessthan_constantsImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm33custom_greaterthanequal_constantsImEEvPT_iPKc -.LCPI240_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm33custom_greaterthanequal_constantsImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm33custom_greaterthanequal_constantsImEEvPT_iPKc,comdat - .weak _Z13test_constantIm33custom_greaterthanequal_constantsImEEvPT_iPKc + .weak _Z13test_constantIm33custom_greaterthanequal_constantsImEEvPT_iPKc # -- Begin function _Z13test_constantIm33custom_greaterthanequal_constantsImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm33custom_greaterthanequal_constantsImEEvPT_iPKc,@function _Z13test_constantIm33custom_greaterthanequal_constantsImEEvPT_iPKc: # @_Z13test_constantIm33custom_greaterthanequal_constantsImEEvPT_iPKc @@ -49366,12 +48883,14 @@ _Z13test_constantIm33custom_greaterthanequal_constantsImEEvPT_iPKc: # @_Z13test_ move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB240_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI240_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI240_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -49402,12 +48921,8 @@ _Z13test_constantIm33custom_greaterthanequal_constantsImEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIm33custom_greaterthanequal_constantsImEEvPT_iPKc, .Lfunc_end240-_Z13test_constantIm33custom_greaterthanequal_constantsImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm30custom_lessthanequal_constantsImEEvPT_iPKc -.LCPI241_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm30custom_lessthanequal_constantsImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm30custom_lessthanequal_constantsImEEvPT_iPKc,comdat - .weak _Z13test_constantIm30custom_lessthanequal_constantsImEEvPT_iPKc + .weak _Z13test_constantIm30custom_lessthanequal_constantsImEEvPT_iPKc # -- Begin function _Z13test_constantIm30custom_lessthanequal_constantsImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm30custom_lessthanequal_constantsImEEvPT_iPKc,@function _Z13test_constantIm30custom_lessthanequal_constantsImEEvPT_iPKc: # @_Z13test_constantIm30custom_lessthanequal_constantsImEEvPT_iPKc @@ -49460,12 +48975,14 @@ _Z13test_constantIm30custom_lessthanequal_constantsImEEvPT_iPKc: # @_Z13test_con # %bb.3: # %._crit_edge.i ld.w $a1, $s1, %pc_lo12(current_test) .LBB241_4: # %_Z13record_resultdPKc.exit - pcalau12i $a2, %pc_hi20(.LCPI241_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI241_0) sub.d $a2, $s0, $s2 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -97152 + lu52i.d $a2, $a2, 1042 movgr2fr.d $fa1, $a2 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a2, $a1, $a0, 4 slli.d $a3, $a1, 4 fstx.d $fa0, $a0, $a3 @@ -49494,12 +49011,8 @@ _Z13test_constantIm30custom_lessthanequal_constantsImEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIm30custom_lessthanequal_constantsImEEvPT_iPKc, .Lfunc_end241-_Z13test_constantIm30custom_lessthanequal_constantsImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm20custom_and_constantsImEEvPT_iPKc -.LCPI242_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm20custom_and_constantsImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm20custom_and_constantsImEEvPT_iPKc,comdat - .weak _Z13test_constantIm20custom_and_constantsImEEvPT_iPKc + .weak _Z13test_constantIm20custom_and_constantsImEEvPT_iPKc # -- Begin function _Z13test_constantIm20custom_and_constantsImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm20custom_and_constantsImEEvPT_iPKc,@function _Z13test_constantIm20custom_and_constantsImEEvPT_iPKc: # @_Z13test_constantIm20custom_and_constantsImEEvPT_iPKc @@ -49601,12 +49114,14 @@ _Z13test_constantIm20custom_and_constantsImEEvPT_iPKc: # @_Z13test_constantIm20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB242_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI242_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI242_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -49637,12 +49152,8 @@ _Z13test_constantIm20custom_and_constantsImEEvPT_iPKc: # @_Z13test_constantIm20c .size _Z13test_constantIm20custom_and_constantsImEEvPT_iPKc, .Lfunc_end242-_Z13test_constantIm20custom_and_constantsImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm19custom_or_constantsImEEvPT_iPKc -.LCPI243_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm19custom_or_constantsImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm19custom_or_constantsImEEvPT_iPKc,comdat - .weak _Z13test_constantIm19custom_or_constantsImEEvPT_iPKc + .weak _Z13test_constantIm19custom_or_constantsImEEvPT_iPKc # -- Begin function _Z13test_constantIm19custom_or_constantsImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm19custom_or_constantsImEEvPT_iPKc,@function _Z13test_constantIm19custom_or_constantsImEEvPT_iPKc: # @_Z13test_constantIm19custom_or_constantsImEEvPT_iPKc @@ -49744,12 +49255,14 @@ _Z13test_constantIm19custom_or_constantsImEEvPT_iPKc: # @_Z13test_constantIm19cu move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB243_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI243_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI243_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -49780,12 +49293,8 @@ _Z13test_constantIm19custom_or_constantsImEEvPT_iPKc: # @_Z13test_constantIm19cu .size _Z13test_constantIm19custom_or_constantsImEEvPT_iPKc, .Lfunc_end243-_Z13test_constantIm19custom_or_constantsImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm20custom_xor_constantsImEEvPT_iPKc -.LCPI244_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm20custom_xor_constantsImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm20custom_xor_constantsImEEvPT_iPKc,comdat - .weak _Z13test_constantIm20custom_xor_constantsImEEvPT_iPKc + .weak _Z13test_constantIm20custom_xor_constantsImEEvPT_iPKc # -- Begin function _Z13test_constantIm20custom_xor_constantsImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm20custom_xor_constantsImEEvPT_iPKc,@function _Z13test_constantIm20custom_xor_constantsImEEvPT_iPKc: # @_Z13test_constantIm20custom_xor_constantsImEEvPT_iPKc @@ -49887,12 +49396,14 @@ _Z13test_constantIm20custom_xor_constantsImEEvPT_iPKc: # @_Z13test_constantIm20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB244_12: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI244_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI244_0) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -49923,14 +49434,8 @@ _Z13test_constantIm20custom_xor_constantsImEEvPT_iPKc: # @_Z13test_constantIm20c .size _Z13test_constantIm20custom_xor_constantsImEEvPT_iPKc, .Lfunc_end244-_Z13test_constantIm20custom_xor_constantsImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm19custom_constant_addImEEvPT_iPKc -.LCPI245_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI245_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm19custom_constant_addImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm19custom_constant_addImEEvPT_iPKc,comdat - .weak _Z13test_constantIm19custom_constant_addImEEvPT_iPKc + .weak _Z13test_constantIm19custom_constant_addImEEvPT_iPKc # -- Begin function _Z13test_constantIm19custom_constant_addImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm19custom_constant_addImEEvPT_iPKc,@function _Z13test_constantIm19custom_constant_addImEEvPT_iPKc: # @_Z13test_constantIm19custom_constant_addImEEvPT_iPKc @@ -49983,8 +49488,8 @@ _Z13test_constantIm19custom_constant_addImEEvPT_iPKc: # @_Z13test_constantIm19cu st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 pcalau12i $fp, %pc_hi20(init_value) - pcalau12i $a0, %pc_hi20(.LCPI245_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI245_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 lu12i.w $a0, 19 @@ -50077,8 +49582,8 @@ _Z13test_constantIm19custom_constant_addImEEvPT_iPKc: # @_Z13test_constantIm19cu .LBB245_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI245_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI245_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s1, $a0, 3904 lu12i.w $a0, -20 @@ -50144,12 +49649,14 @@ _Z13test_constantIm19custom_constant_addImEEvPT_iPKc: # @_Z13test_constantIm19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB245_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI245_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI245_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -50183,14 +49690,8 @@ _Z13test_constantIm19custom_constant_addImEEvPT_iPKc: # @_Z13test_constantIm19cu .size _Z13test_constantIm19custom_constant_addImEEvPT_iPKc, .Lfunc_end245-_Z13test_constantIm19custom_constant_addImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm28custom_multiple_constant_addImEEvPT_iPKc -.LCPI246_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI246_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm28custom_multiple_constant_addImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm28custom_multiple_constant_addImEEvPT_iPKc,comdat - .weak _Z13test_constantIm28custom_multiple_constant_addImEEvPT_iPKc + .weak _Z13test_constantIm28custom_multiple_constant_addImEEvPT_iPKc # -- Begin function _Z13test_constantIm28custom_multiple_constant_addImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm28custom_multiple_constant_addImEEvPT_iPKc,@function _Z13test_constantIm28custom_multiple_constant_addImEEvPT_iPKc: # @_Z13test_constantIm28custom_multiple_constant_addImEEvPT_iPKc @@ -50243,8 +49744,8 @@ _Z13test_constantIm28custom_multiple_constant_addImEEvPT_iPKc: # @_Z13test_const st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 pcalau12i $fp, %pc_hi20(init_value) - pcalau12i $a0, %pc_hi20(.LCPI246_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI246_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 lu12i.w $a0, 19 @@ -50337,8 +49838,8 @@ _Z13test_constantIm28custom_multiple_constant_addImEEvPT_iPKc: # @_Z13test_const .LBB246_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI246_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI246_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s1, $a0, 3904 lu12i.w $a0, -20 @@ -50404,12 +49905,14 @@ _Z13test_constantIm28custom_multiple_constant_addImEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB246_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI246_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI246_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -50443,14 +49946,8 @@ _Z13test_constantIm28custom_multiple_constant_addImEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIm28custom_multiple_constant_addImEEvPT_iPKc, .Lfunc_end246-_Z13test_constantIm28custom_multiple_constant_addImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm19custom_constant_subImEEvPT_iPKc -.LCPI247_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI247_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm19custom_constant_subImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm19custom_constant_subImEEvPT_iPKc,comdat - .weak _Z13test_constantIm19custom_constant_subImEEvPT_iPKc + .weak _Z13test_constantIm19custom_constant_subImEEvPT_iPKc # -- Begin function _Z13test_constantIm19custom_constant_subImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm19custom_constant_subImEEvPT_iPKc,@function _Z13test_constantIm19custom_constant_subImEEvPT_iPKc: # @_Z13test_constantIm19custom_constant_subImEEvPT_iPKc @@ -50503,8 +50000,8 @@ _Z13test_constantIm19custom_constant_subImEEvPT_iPKc: # @_Z13test_constantIm19cu st.d $a0, $sp, 72 # 8-byte Folded Spill ori $a5, $zero, 4 pcalau12i $fp, %pc_hi20(init_value) - pcalau12i $a0, %pc_hi20(.LCPI247_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI247_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 lu12i.w $a0, -20 @@ -50600,8 +50097,8 @@ _Z13test_constantIm19custom_constant_subImEEvPT_iPKc: # @_Z13test_constantIm19cu .LBB247_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI247_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI247_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s1, $a0, 3904 lu12i.w $a0, 19 @@ -50667,12 +50164,14 @@ _Z13test_constantIm19custom_constant_subImEEvPT_iPKc: # @_Z13test_constantIm19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB247_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI247_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI247_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -50706,14 +50205,8 @@ _Z13test_constantIm19custom_constant_subImEEvPT_iPKc: # @_Z13test_constantIm19cu .size _Z13test_constantIm19custom_constant_subImEEvPT_iPKc, .Lfunc_end247-_Z13test_constantIm19custom_constant_subImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm28custom_multiple_constant_subImEEvPT_iPKc -.LCPI248_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI248_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm28custom_multiple_constant_subImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm28custom_multiple_constant_subImEEvPT_iPKc,comdat - .weak _Z13test_constantIm28custom_multiple_constant_subImEEvPT_iPKc + .weak _Z13test_constantIm28custom_multiple_constant_subImEEvPT_iPKc # -- Begin function _Z13test_constantIm28custom_multiple_constant_subImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm28custom_multiple_constant_subImEEvPT_iPKc,@function _Z13test_constantIm28custom_multiple_constant_subImEEvPT_iPKc: # @_Z13test_constantIm28custom_multiple_constant_subImEEvPT_iPKc @@ -50766,8 +50259,8 @@ _Z13test_constantIm28custom_multiple_constant_subImEEvPT_iPKc: # @_Z13test_const st.d $a0, $sp, 72 # 8-byte Folded Spill ori $a5, $zero, 4 pcalau12i $fp, %pc_hi20(init_value) - pcalau12i $a0, %pc_hi20(.LCPI248_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI248_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 lu12i.w $a0, -20 @@ -50863,8 +50356,8 @@ _Z13test_constantIm28custom_multiple_constant_subImEEvPT_iPKc: # @_Z13test_const .LBB248_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI248_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI248_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s1, $a0, 3904 lu12i.w $a0, 19 @@ -50930,12 +50423,14 @@ _Z13test_constantIm28custom_multiple_constant_subImEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB248_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI248_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI248_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -50969,14 +50464,8 @@ _Z13test_constantIm28custom_multiple_constant_subImEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIm28custom_multiple_constant_subImEEvPT_iPKc, .Lfunc_end248-_Z13test_constantIm28custom_multiple_constant_subImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm24custom_constant_multiplyImEEvPT_iPKc -.LCPI249_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI249_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm24custom_constant_multiplyImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm24custom_constant_multiplyImEEvPT_iPKc,comdat - .weak _Z13test_constantIm24custom_constant_multiplyImEEvPT_iPKc + .weak _Z13test_constantIm24custom_constant_multiplyImEEvPT_iPKc # -- Begin function _Z13test_constantIm24custom_constant_multiplyImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm24custom_constant_multiplyImEEvPT_iPKc,@function _Z13test_constantIm24custom_constant_multiplyImEEvPT_iPKc: # @_Z13test_constantIm24custom_constant_multiplyImEEvPT_iPKc @@ -51029,9 +50518,9 @@ _Z13test_constantIm24custom_constant_multiplyImEEvPT_iPKc: # @_Z13test_constantI st.d $a0, $sp, 72 # 8-byte Folded Spill ori $a5, $zero, 4 ori $fp, $zero, 120 - pcalau12i $a0, %pc_hi20(.LCPI249_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI249_0) pcalau12i $s4, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 234 ori $s8, $a0, 1536 pcalau12i $a0, %pc_hi20(.L.str.299) @@ -51122,8 +50611,8 @@ _Z13test_constantIm24custom_constant_multiplyImEEvPT_iPKc: # @_Z13test_constantI .LBB249_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI249_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI249_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 234 ori $s1, $a0, 1536 pcalau12i $a0, %pc_hi20(.L.str.299) @@ -51187,12 +50676,14 @@ _Z13test_constantIm24custom_constant_multiplyImEEvPT_iPKc: # @_Z13test_constantI ld.w $a0, $s3, %pc_lo12(current_test) .LBB249_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI249_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI249_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -51226,14 +50717,8 @@ _Z13test_constantIm24custom_constant_multiplyImEEvPT_iPKc: # @_Z13test_constantI .size _Z13test_constantIm24custom_constant_multiplyImEEvPT_iPKc, .Lfunc_end249-_Z13test_constantIm24custom_constant_multiplyImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm33custom_multiple_constant_multiplyImEEvPT_iPKc -.LCPI250_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI250_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm33custom_multiple_constant_multiplyImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm33custom_multiple_constant_multiplyImEEvPT_iPKc,comdat - .weak _Z13test_constantIm33custom_multiple_constant_multiplyImEEvPT_iPKc + .weak _Z13test_constantIm33custom_multiple_constant_multiplyImEEvPT_iPKc # -- Begin function _Z13test_constantIm33custom_multiple_constant_multiplyImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm33custom_multiple_constant_multiplyImEEvPT_iPKc,@function _Z13test_constantIm33custom_multiple_constant_multiplyImEEvPT_iPKc: # @_Z13test_constantIm33custom_multiple_constant_multiplyImEEvPT_iPKc @@ -51286,9 +50771,9 @@ _Z13test_constantIm33custom_multiple_constant_multiplyImEEvPT_iPKc: # @_Z13test_ st.d $a0, $sp, 72 # 8-byte Folded Spill ori $a5, $zero, 4 ori $fp, $zero, 120 - pcalau12i $a0, %pc_hi20(.LCPI250_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI250_0) pcalau12i $s4, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 234 ori $s8, $a0, 1536 pcalau12i $a0, %pc_hi20(.L.str.299) @@ -51379,8 +50864,8 @@ _Z13test_constantIm33custom_multiple_constant_multiplyImEEvPT_iPKc: # @_Z13test_ .LBB250_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI250_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI250_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 234 ori $s1, $a0, 1536 pcalau12i $a0, %pc_hi20(.L.str.299) @@ -51444,12 +50929,14 @@ _Z13test_constantIm33custom_multiple_constant_multiplyImEEvPT_iPKc: # @_Z13test_ ld.w $a0, $s3, %pc_lo12(current_test) .LBB250_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI250_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI250_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -51483,14 +50970,8 @@ _Z13test_constantIm33custom_multiple_constant_multiplyImEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIm33custom_multiple_constant_multiplyImEEvPT_iPKc, .Lfunc_end250-_Z13test_constantIm33custom_multiple_constant_multiplyImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm34custom_multiple_constant_multiply2ImEEvPT_iPKc -.LCPI251_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI251_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm34custom_multiple_constant_multiply2ImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm34custom_multiple_constant_multiply2ImEEvPT_iPKc,comdat - .weak _Z13test_constantIm34custom_multiple_constant_multiply2ImEEvPT_iPKc + .weak _Z13test_constantIm34custom_multiple_constant_multiply2ImEEvPT_iPKc # -- Begin function _Z13test_constantIm34custom_multiple_constant_multiply2ImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm34custom_multiple_constant_multiply2ImEEvPT_iPKc,@function _Z13test_constantIm34custom_multiple_constant_multiply2ImEEvPT_iPKc: # @_Z13test_constantIm34custom_multiple_constant_multiply2ImEEvPT_iPKc @@ -51543,8 +51024,8 @@ _Z13test_constantIm34custom_multiple_constant_multiply2ImEEvPT_iPKc: # @_Z13test st.d $a0, $sp, 72 # 8-byte Folded Spill ori $a5, $zero, 4 pcalau12i $fp, %pc_hi20(init_value) - pcalau12i $a0, %pc_hi20(.LCPI251_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI251_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 lu12i.w $a0, 234 @@ -51640,8 +51121,8 @@ _Z13test_constantIm34custom_multiple_constant_multiply2ImEEvPT_iPKc: # @_Z13test .LBB251_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI251_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI251_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s1, $a0, 3904 lu12i.w $a0, -235 @@ -51707,12 +51188,14 @@ _Z13test_constantIm34custom_multiple_constant_multiply2ImEEvPT_iPKc: # @_Z13test ld.w $a0, $s3, %pc_lo12(current_test) .LBB251_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI251_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI251_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -51746,14 +51229,8 @@ _Z13test_constantIm34custom_multiple_constant_multiply2ImEEvPT_iPKc: # @_Z13test .size _Z13test_constantIm34custom_multiple_constant_multiply2ImEEvPT_iPKc, .Lfunc_end251-_Z13test_constantIm34custom_multiple_constant_multiply2ImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm22custom_constant_divideImEEvPT_iPKc -.LCPI252_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI252_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm22custom_constant_divideImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm22custom_constant_divideImEEvPT_iPKc,comdat - .weak _Z13test_constantIm22custom_constant_divideImEEvPT_iPKc + .weak _Z13test_constantIm22custom_constant_divideImEEvPT_iPKc # -- Begin function _Z13test_constantIm22custom_constant_divideImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm22custom_constant_divideImEEvPT_iPKc,@function _Z13test_constantIm22custom_constant_divideImEEvPT_iPKc: # @_Z13test_constantIm22custom_constant_divideImEEvPT_iPKc @@ -51810,8 +51287,8 @@ _Z13test_constantIm22custom_constant_divideImEEvPT_iPKc: # @_Z13test_constantIm2 lu32i.d $a0, -209716 lu52i.d $fp, $a0, -820 pcalau12i $s4, %pc_hi20(init_value) - pcalau12i $a0, %pc_hi20(.LCPI252_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI252_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu52i.d $s2, $zero, -2048 lu12i.w $a0, 1 ori $s7, $a0, 3904 @@ -51912,8 +51389,8 @@ _Z13test_constantIm22custom_constant_divideImEEvPT_iPKc: # @_Z13test_constantIm2 .LBB252_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI252_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI252_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu52i.d $s1, $zero, -2048 lu12i.w $a0, -209716 ori $a0, $a0, 3277 @@ -51985,12 +51462,14 @@ _Z13test_constantIm22custom_constant_divideImEEvPT_iPKc: # @_Z13test_constantIm2 ld.w $a0, $s3, %pc_lo12(current_test) .LBB252_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI252_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI252_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -52024,14 +51503,8 @@ _Z13test_constantIm22custom_constant_divideImEEvPT_iPKc: # @_Z13test_constantIm2 .size _Z13test_constantIm22custom_constant_divideImEEvPT_iPKc, .Lfunc_end252-_Z13test_constantIm22custom_constant_divideImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm31custom_multiple_constant_divideImEEvPT_iPKc -.LCPI253_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI253_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm31custom_multiple_constant_divideImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm31custom_multiple_constant_divideImEEvPT_iPKc,comdat - .weak _Z13test_constantIm31custom_multiple_constant_divideImEEvPT_iPKc + .weak _Z13test_constantIm31custom_multiple_constant_divideImEEvPT_iPKc # -- Begin function _Z13test_constantIm31custom_multiple_constant_divideImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm31custom_multiple_constant_divideImEEvPT_iPKc,@function _Z13test_constantIm31custom_multiple_constant_divideImEEvPT_iPKc: # @_Z13test_constantIm31custom_multiple_constant_divideImEEvPT_iPKc @@ -52088,8 +51561,8 @@ _Z13test_constantIm31custom_multiple_constant_divideImEEvPT_iPKc: # @_Z13test_co lu32i.d $a0, -489336 lu52i.d $fp, $a0, -1912 pcalau12i $s4, %pc_hi20(init_value) - pcalau12i $a0, %pc_hi20(.LCPI253_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI253_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu52i.d $s2, $zero, -2048 lu12i.w $a0, 1 ori $s7, $a0, 3904 @@ -52190,8 +51663,8 @@ _Z13test_constantIm31custom_multiple_constant_divideImEEvPT_iPKc: # @_Z13test_co .LBB253_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI253_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI253_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu52i.d $s1, $zero, -2048 lu12i.w $a0, -489336 ori $a0, $a0, 2185 @@ -52263,12 +51736,14 @@ _Z13test_constantIm31custom_multiple_constant_divideImEEvPT_iPKc: # @_Z13test_co ld.w $a0, $s3, %pc_lo12(current_test) .LBB253_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI253_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI253_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -52302,14 +51777,8 @@ _Z13test_constantIm31custom_multiple_constant_divideImEEvPT_iPKc: # @_Z13test_co .size _Z13test_constantIm31custom_multiple_constant_divideImEEvPT_iPKc, .Lfunc_end253-_Z13test_constantIm31custom_multiple_constant_divideImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm32custom_multiple_constant_divide2ImEEvPT_iPKc -.LCPI254_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI254_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm32custom_multiple_constant_divide2ImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm32custom_multiple_constant_divide2ImEEvPT_iPKc,comdat - .weak _Z13test_constantIm32custom_multiple_constant_divide2ImEEvPT_iPKc + .weak _Z13test_constantIm32custom_multiple_constant_divide2ImEEvPT_iPKc # -- Begin function _Z13test_constantIm32custom_multiple_constant_divide2ImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm32custom_multiple_constant_divide2ImEEvPT_iPKc,@function _Z13test_constantIm32custom_multiple_constant_divide2ImEEvPT_iPKc: # @_Z13test_constantIm32custom_multiple_constant_divide2ImEEvPT_iPKc @@ -52362,8 +51831,8 @@ _Z13test_constantIm32custom_multiple_constant_divide2ImEEvPT_iPKc: # @_Z13test_c st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 pcalau12i $fp, %pc_hi20(init_value) - pcalau12i $a0, %pc_hi20(.LCPI254_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI254_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 lu12i.w $a0, 3 @@ -52456,8 +51925,8 @@ _Z13test_constantIm32custom_multiple_constant_divide2ImEEvPT_iPKc: # @_Z13test_c .LBB254_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI254_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI254_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s1, $a0, 3904 lu12i.w $a0, -4 @@ -52523,12 +51992,14 @@ _Z13test_constantIm32custom_multiple_constant_divide2ImEEvPT_iPKc: # @_Z13test_c ld.w $a0, $s3, %pc_lo12(current_test) .LBB254_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI254_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI254_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -52562,14 +52033,8 @@ _Z13test_constantIm32custom_multiple_constant_divide2ImEEvPT_iPKc: # @_Z13test_c .size _Z13test_constantIm32custom_multiple_constant_divide2ImEEvPT_iPKc, .Lfunc_end254-_Z13test_constantIm32custom_multiple_constant_divide2ImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm30custom_multiple_constant_mixedImEEvPT_iPKc -.LCPI255_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI255_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm30custom_multiple_constant_mixedImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm30custom_multiple_constant_mixedImEEvPT_iPKc,comdat - .weak _Z13test_constantIm30custom_multiple_constant_mixedImEEvPT_iPKc + .weak _Z13test_constantIm30custom_multiple_constant_mixedImEEvPT_iPKc # -- Begin function _Z13test_constantIm30custom_multiple_constant_mixedImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm30custom_multiple_constant_mixedImEEvPT_iPKc,@function _Z13test_constantIm30custom_multiple_constant_mixedImEEvPT_iPKc: # @_Z13test_constantIm30custom_multiple_constant_mixedImEEvPT_iPKc @@ -52620,9 +52085,9 @@ _Z13test_constantIm30custom_multiple_constant_mixedImEEvPT_iPKc: # @_Z13test_con slli.d $s6, $a0, 2 addi.d $s7, $s1, 16 ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI255_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI255_0) pcalau12i $fp, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.299) @@ -52709,8 +52174,8 @@ _Z13test_constantIm30custom_multiple_constant_mixedImEEvPT_iPKc: # @_Z13test_con .LBB255_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI255_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI255_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s1, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.299) @@ -52774,12 +52239,14 @@ _Z13test_constantIm30custom_multiple_constant_mixedImEEvPT_iPKc: # @_Z13test_con ld.w $a0, $s3, %pc_lo12(current_test) .LBB255_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI255_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI255_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -52813,14 +52280,8 @@ _Z13test_constantIm30custom_multiple_constant_mixedImEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIm30custom_multiple_constant_mixedImEEvPT_iPKc, .Lfunc_end255-_Z13test_constantIm30custom_multiple_constant_mixedImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm19custom_constant_andImEEvPT_iPKc -.LCPI256_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI256_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm19custom_constant_andImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm19custom_constant_andImEEvPT_iPKc,comdat - .weak _Z13test_constantIm19custom_constant_andImEEvPT_iPKc + .weak _Z13test_constantIm19custom_constant_andImEEvPT_iPKc # -- Begin function _Z13test_constantIm19custom_constant_andImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm19custom_constant_andImEEvPT_iPKc,@function _Z13test_constantIm19custom_constant_andImEEvPT_iPKc: # @_Z13test_constantIm19custom_constant_andImEEvPT_iPKc @@ -52871,9 +52332,9 @@ _Z13test_constantIm19custom_constant_andImEEvPT_iPKc: # @_Z13test_constantIm19cu slli.d $s6, $a0, 2 addi.d $s7, $s1, 16 ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI256_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI256_0) pcalau12i $fp, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.299) @@ -52967,8 +52428,8 @@ _Z13test_constantIm19custom_constant_andImEEvPT_iPKc: # @_Z13test_constantIm19cu .LBB256_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI256_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI256_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s1, $zero @@ -53030,12 +52491,14 @@ _Z13test_constantIm19custom_constant_andImEEvPT_iPKc: # @_Z13test_constantIm19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB256_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI256_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI256_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -53069,14 +52532,8 @@ _Z13test_constantIm19custom_constant_andImEEvPT_iPKc: # @_Z13test_constantIm19cu .size _Z13test_constantIm19custom_constant_andImEEvPT_iPKc, .Lfunc_end256-_Z13test_constantIm19custom_constant_andImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm28custom_multiple_constant_andImEEvPT_iPKc -.LCPI257_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI257_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm28custom_multiple_constant_andImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm28custom_multiple_constant_andImEEvPT_iPKc,comdat - .weak _Z13test_constantIm28custom_multiple_constant_andImEEvPT_iPKc + .weak _Z13test_constantIm28custom_multiple_constant_andImEEvPT_iPKc # -- Begin function _Z13test_constantIm28custom_multiple_constant_andImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm28custom_multiple_constant_andImEEvPT_iPKc,@function _Z13test_constantIm28custom_multiple_constant_andImEEvPT_iPKc: # @_Z13test_constantIm28custom_multiple_constant_andImEEvPT_iPKc @@ -53127,9 +52584,9 @@ _Z13test_constantIm28custom_multiple_constant_andImEEvPT_iPKc: # @_Z13test_const slli.d $s6, $a0, 2 addi.d $s7, $s1, 16 ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI257_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI257_0) pcalau12i $fp, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.299) @@ -53223,8 +52680,8 @@ _Z13test_constantIm28custom_multiple_constant_andImEEvPT_iPKc: # @_Z13test_const .LBB257_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI257_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI257_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s1, $zero @@ -53286,12 +52743,14 @@ _Z13test_constantIm28custom_multiple_constant_andImEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB257_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI257_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI257_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -53325,14 +52784,8 @@ _Z13test_constantIm28custom_multiple_constant_andImEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIm28custom_multiple_constant_andImEEvPT_iPKc, .Lfunc_end257-_Z13test_constantIm28custom_multiple_constant_andImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm18custom_constant_orImEEvPT_iPKc -.LCPI258_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI258_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm18custom_constant_orImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm18custom_constant_orImEEvPT_iPKc,comdat - .weak _Z13test_constantIm18custom_constant_orImEEvPT_iPKc + .weak _Z13test_constantIm18custom_constant_orImEEvPT_iPKc # -- Begin function _Z13test_constantIm18custom_constant_orImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm18custom_constant_orImEEvPT_iPKc,@function _Z13test_constantIm18custom_constant_orImEEvPT_iPKc: # @_Z13test_constantIm18custom_constant_orImEEvPT_iPKc @@ -53383,9 +52836,9 @@ _Z13test_constantIm18custom_constant_orImEEvPT_iPKc: # @_Z13test_constantIm18cus slli.d $s6, $a0, 2 addi.d $s7, $s1, 16 ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI258_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI258_0) pcalau12i $fp, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.299) @@ -53520,12 +52973,14 @@ _Z13test_constantIm18custom_constant_orImEEvPT_iPKc: # @_Z13test_constantIm18cus ld.w $a0, $s3, %pc_lo12(current_test) .LBB258_19: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI258_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI258_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -53559,14 +53014,8 @@ _Z13test_constantIm18custom_constant_orImEEvPT_iPKc: # @_Z13test_constantIm18cus .size _Z13test_constantIm18custom_constant_orImEEvPT_iPKc, .Lfunc_end258-_Z13test_constantIm18custom_constant_orImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm27custom_multiple_constant_orImEEvPT_iPKc -.LCPI259_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI259_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm27custom_multiple_constant_orImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm27custom_multiple_constant_orImEEvPT_iPKc,comdat - .weak _Z13test_constantIm27custom_multiple_constant_orImEEvPT_iPKc + .weak _Z13test_constantIm27custom_multiple_constant_orImEEvPT_iPKc # -- Begin function _Z13test_constantIm27custom_multiple_constant_orImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm27custom_multiple_constant_orImEEvPT_iPKc,@function _Z13test_constantIm27custom_multiple_constant_orImEEvPT_iPKc: # @_Z13test_constantIm27custom_multiple_constant_orImEEvPT_iPKc @@ -53617,9 +53066,9 @@ _Z13test_constantIm27custom_multiple_constant_orImEEvPT_iPKc: # @_Z13test_consta slli.d $s6, $a0, 2 addi.d $s7, $s1, 16 ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI259_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI259_0) pcalau12i $fp, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.299) @@ -53754,12 +53203,14 @@ _Z13test_constantIm27custom_multiple_constant_orImEEvPT_iPKc: # @_Z13test_consta ld.w $a0, $s3, %pc_lo12(current_test) .LBB259_19: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI259_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI259_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -53793,14 +53244,8 @@ _Z13test_constantIm27custom_multiple_constant_orImEEvPT_iPKc: # @_Z13test_consta .size _Z13test_constantIm27custom_multiple_constant_orImEEvPT_iPKc, .Lfunc_end259-_Z13test_constantIm27custom_multiple_constant_orImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm19custom_constant_xorImEEvPT_iPKc -.LCPI260_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI260_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm19custom_constant_xorImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm19custom_constant_xorImEEvPT_iPKc,comdat - .weak _Z13test_constantIm19custom_constant_xorImEEvPT_iPKc + .weak _Z13test_constantIm19custom_constant_xorImEEvPT_iPKc # -- Begin function _Z13test_constantIm19custom_constant_xorImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm19custom_constant_xorImEEvPT_iPKc,@function _Z13test_constantIm19custom_constant_xorImEEvPT_iPKc: # @_Z13test_constantIm19custom_constant_xorImEEvPT_iPKc @@ -53851,9 +53296,9 @@ _Z13test_constantIm19custom_constant_xorImEEvPT_iPKc: # @_Z13test_constantIm19cu slli.d $s6, $a0, 2 addi.d $s7, $s1, 16 ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI260_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI260_0) pcalau12i $fp, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.299) @@ -53947,8 +53392,8 @@ _Z13test_constantIm19custom_constant_xorImEEvPT_iPKc: # @_Z13test_constantIm19cu .LBB260_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI260_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI260_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s1, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.299) @@ -54013,12 +53458,14 @@ _Z13test_constantIm19custom_constant_xorImEEvPT_iPKc: # @_Z13test_constantIm19cu ld.w $a0, $s3, %pc_lo12(current_test) .LBB260_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI260_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI260_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -54052,14 +53499,8 @@ _Z13test_constantIm19custom_constant_xorImEEvPT_iPKc: # @_Z13test_constantIm19cu .size _Z13test_constantIm19custom_constant_xorImEEvPT_iPKc, .Lfunc_end260-_Z13test_constantIm19custom_constant_xorImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantIm28custom_multiple_constant_xorImEEvPT_iPKc -.LCPI261_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI261_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIm28custom_multiple_constant_xorImEEvPT_iPKc,"axG",@progbits,_Z13test_constantIm28custom_multiple_constant_xorImEEvPT_iPKc,comdat - .weak _Z13test_constantIm28custom_multiple_constant_xorImEEvPT_iPKc + .weak _Z13test_constantIm28custom_multiple_constant_xorImEEvPT_iPKc # -- Begin function _Z13test_constantIm28custom_multiple_constant_xorImEEvPT_iPKc .p2align 5 .type _Z13test_constantIm28custom_multiple_constant_xorImEEvPT_iPKc,@function _Z13test_constantIm28custom_multiple_constant_xorImEEvPT_iPKc: # @_Z13test_constantIm28custom_multiple_constant_xorImEEvPT_iPKc @@ -54110,9 +53551,9 @@ _Z13test_constantIm28custom_multiple_constant_xorImEEvPT_iPKc: # @_Z13test_const slli.d $s6, $a0, 2 addi.d $s7, $s1, 16 ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI261_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI261_0) pcalau12i $fp, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.299) @@ -54206,8 +53647,8 @@ _Z13test_constantIm28custom_multiple_constant_xorImEEvPT_iPKc: # @_Z13test_const .LBB261_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI261_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI261_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s1, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.299) @@ -54272,12 +53713,14 @@ _Z13test_constantIm28custom_multiple_constant_xorImEEvPT_iPKc: # @_Z13test_const ld.w $a0, $s3, %pc_lo12(current_test) .LBB261_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI261_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI261_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -54311,20 +53754,8 @@ _Z13test_constantIm28custom_multiple_constant_xorImEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIm28custom_multiple_constant_xorImEEvPT_iPKc, .Lfunc_end261-_Z13test_constantIm28custom_multiple_constant_xorImEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf10custom_twoIfEEvPT_iPKc -.LCPI262_0: - .word 0xc67a0000 # float -16000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI262_1: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI262_2: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI262_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf10custom_twoIfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf10custom_twoIfEEvPT_iPKc,comdat - .weak _Z13test_constantIf10custom_twoIfEEvPT_iPKc + .weak _Z13test_constantIf10custom_twoIfEEvPT_iPKc # -- Begin function _Z13test_constantIf10custom_twoIfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf10custom_twoIfEEvPT_iPKc,@function _Z13test_constantIf10custom_twoIfEEvPT_iPKc: # @_Z13test_constantIf10custom_twoIfEEvPT_iPKc @@ -54369,14 +53800,21 @@ _Z13test_constantIf10custom_twoIfEEvPT_iPKc: # @_Z13test_constantIf10custom_twoI # %bb.1: # %.preheader.lr.ph blez $s0, .LBB262_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI262_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI262_0) - pcalau12i $a0, %pc_hi20(.LCPI262_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI262_1) - pcalau12i $a0, %pc_hi20(.LCPI262_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI262_2) - movgr2fr.w $fs3, $zero + movgr2fr.w $fs0, $zero vldi $vr3, -1280 + lu12i.w $a0, -235616 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s1, $a0, %pc_lo12(.L.str.299) move $s5, $zero @@ -54390,7 +53828,7 @@ _Z13test_constantIf10custom_twoIfEEvPT_iPKc: # @_Z13test_constantIf10custom_twoI # =>This Loop Header: Depth=1 # Child Loop BB262_5 Depth 2 move $a0, $s0 - fmov.s $fa0, $fs3 + fmov.s $fa0, $fs0 .p2align 4, , 16 .LBB262_5: # Parent Loop BB262_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -54399,15 +53837,15 @@ _Z13test_constantIf10custom_twoIfEEvPT_iPKc: # @_Z13test_constantIf10custom_twoI bnez $a0, .LBB262_5 # %bb.6: # %._crit_edge.us # in Loop: Header=BB262_4 Depth=1 - fadd.s $fa1, $fa0, $fs0 + fadd.s $fa1, $fa0, $fs1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB262_3 # %bb.7: # in Loop: Header=BB262_4 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -54459,12 +53897,14 @@ _Z13test_constantIf10custom_twoIfEEvPT_iPKc: # @_Z13test_constantIf10custom_twoI move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB262_14: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI262_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI262_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -54498,20 +53938,8 @@ _Z13test_constantIf10custom_twoIfEEvPT_iPKc: # @_Z13test_constantIf10custom_twoI .size _Z13test_constantIf10custom_twoIfEEvPT_iPKc, .Lfunc_end262-_Z13test_constantIf10custom_twoIfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc -.LCPI263_0: - .word 0xc6bb8000 # float -24000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI263_1: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI263_2: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI263_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc,comdat - .weak _Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc + .weak _Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc # -- Begin function _Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc,@function _Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc: # @_Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc @@ -54556,14 +53984,21 @@ _Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc: # @_Z13test_constantIf20c # %bb.1: # %.preheader.lr.ph blez $s0, .LBB263_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI263_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI263_0) - pcalau12i $a0, %pc_hi20(.LCPI263_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI263_1) - pcalau12i $a0, %pc_hi20(.LCPI263_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI263_2) - movgr2fr.w $fs3, $zero + movgr2fr.w $fs0, $zero vldi $vr3, -1272 + lu12i.w $a0, -234568 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s1, $a0, %pc_lo12(.L.str.299) move $s5, $zero @@ -54577,7 +54012,7 @@ _Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc: # @_Z13test_constantIf20c # =>This Loop Header: Depth=1 # Child Loop BB263_5 Depth 2 move $a0, $s0 - fmov.s $fa0, $fs3 + fmov.s $fa0, $fs0 .p2align 4, , 16 .LBB263_5: # Parent Loop BB263_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -54586,15 +54021,15 @@ _Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc: # @_Z13test_constantIf20c bnez $a0, .LBB263_5 # %bb.6: # %._crit_edge.us # in Loop: Header=BB263_4 Depth=1 - fadd.s $fa1, $fa0, $fs0 + fadd.s $fa1, $fa0, $fs1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB263_3 # %bb.7: # in Loop: Header=BB263_4 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -54646,12 +54081,14 @@ _Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc: # @_Z13test_constantIf20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB263_14: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI263_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI263_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -54685,20 +54122,8 @@ _Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc: # @_Z13test_constantIf20c .size _Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc, .Lfunc_end263-_Z13test_constantIf20custom_add_constantsIfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc -.LCPI264_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI264_1: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI264_2: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI264_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc,comdat - .weak _Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc + .weak _Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc # -- Begin function _Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc,@function _Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc: # @_Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc @@ -54743,14 +54168,21 @@ _Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc: # @_Z13test_constantIf20c # %bb.1: # %.preheader.lr.ph blez $s0, .LBB264_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI264_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI264_0) - pcalau12i $a0, %pc_hi20(.LCPI264_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI264_1) - pcalau12i $a0, %pc_hi20(.LCPI264_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI264_2) - movgr2fr.w $fs3, $zero + movgr2fr.w $fs0, $zero vldi $vr3, -1168 + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s1, $a0, %pc_lo12(.L.str.299) move $s5, $zero @@ -54764,7 +54196,7 @@ _Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc: # @_Z13test_constantIf20c # =>This Loop Header: Depth=1 # Child Loop BB264_5 Depth 2 move $a0, $s0 - fmov.s $fa0, $fs3 + fmov.s $fa0, $fs0 .p2align 4, , 16 .LBB264_5: # Parent Loop BB264_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -54773,15 +54205,15 @@ _Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc: # @_Z13test_constantIf20c bnez $a0, .LBB264_5 # %bb.6: # %._crit_edge.us # in Loop: Header=BB264_4 Depth=1 - fadd.s $fa1, $fa0, $fs0 + fadd.s $fa1, $fa0, $fs1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB264_3 # %bb.7: # in Loop: Header=BB264_4 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -54833,12 +54265,14 @@ _Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc: # @_Z13test_constantIf20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB264_14: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI264_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI264_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -54872,20 +54306,8 @@ _Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc: # @_Z13test_constantIf20c .size _Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc, .Lfunc_end264-_Z13test_constantIf20custom_sub_constantsIfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc -.LCPI265_0: - .word 0xc73b8000 # float -48000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI265_1: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI265_2: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI265_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc,comdat - .weak _Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc + .weak _Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc # -- Begin function _Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc,@function _Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc: # @_Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc @@ -54930,14 +54352,21 @@ _Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc: # @_Z13test_constant # %bb.1: # %.preheader.lr.ph blez $s0, .LBB265_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI265_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI265_0) - pcalau12i $a0, %pc_hi20(.LCPI265_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI265_1) - pcalau12i $a0, %pc_hi20(.LCPI265_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI265_2) - movgr2fr.w $fs3, $zero + movgr2fr.w $fs0, $zero vldi $vr3, -1256 + lu12i.w $a0, -232520 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s1, $a0, %pc_lo12(.L.str.299) move $s5, $zero @@ -54951,7 +54380,7 @@ _Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc: # @_Z13test_constant # =>This Loop Header: Depth=1 # Child Loop BB265_5 Depth 2 move $a0, $s0 - fmov.s $fa0, $fs3 + fmov.s $fa0, $fs0 .p2align 4, , 16 .LBB265_5: # Parent Loop BB265_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -54960,15 +54389,15 @@ _Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc: # @_Z13test_constant bnez $a0, .LBB265_5 # %bb.6: # %._crit_edge.us # in Loop: Header=BB265_4 Depth=1 - fadd.s $fa1, $fa0, $fs0 + fadd.s $fa1, $fa0, $fs1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB265_3 # %bb.7: # in Loop: Header=BB265_4 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -55020,12 +54449,14 @@ _Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB265_14: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI265_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI265_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -55059,20 +54490,8 @@ _Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc, .Lfunc_end265-_Z13test_constantIf25custom_multiply_constantsIfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc -.LCPI266_0: - .word 0xc67a0000 # float -16000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI266_1: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI266_2: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI266_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc,comdat - .weak _Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc + .weak _Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc # -- Begin function _Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc,@function _Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc: # @_Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc @@ -55117,14 +54536,21 @@ _Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc: # @_Z13test_constantIf # %bb.1: # %.preheader.lr.ph blez $s0, .LBB266_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI266_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI266_0) - pcalau12i $a0, %pc_hi20(.LCPI266_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI266_1) - pcalau12i $a0, %pc_hi20(.LCPI266_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI266_2) - movgr2fr.w $fs3, $zero + movgr2fr.w $fs0, $zero vldi $vr3, -1280 + lu12i.w $a0, -235616 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s1, $a0, %pc_lo12(.L.str.299) move $s5, $zero @@ -55138,7 +54564,7 @@ _Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc: # @_Z13test_constantIf # =>This Loop Header: Depth=1 # Child Loop BB266_5 Depth 2 move $a0, $s0 - fmov.s $fa0, $fs3 + fmov.s $fa0, $fs0 .p2align 4, , 16 .LBB266_5: # Parent Loop BB266_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -55147,15 +54573,15 @@ _Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc: # @_Z13test_constantIf bnez $a0, .LBB266_5 # %bb.6: # %._crit_edge.us # in Loop: Header=BB266_4 Depth=1 - fadd.s $fa1, $fa0, $fs0 + fadd.s $fa1, $fa0, $fs1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB266_3 # %bb.7: # in Loop: Header=BB266_4 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -55207,12 +54633,14 @@ _Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc: # @_Z13test_constantIf move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB266_14: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI266_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI266_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -55246,20 +54674,8 @@ _Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc: # @_Z13test_constantIf .size _Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc, .Lfunc_end266-_Z13test_constantIf23custom_divide_constantsIfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf19custom_constant_addIfEEvPT_iPKc -.LCPI267_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI267_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI267_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI267_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf19custom_constant_addIfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf19custom_constant_addIfEEvPT_iPKc,comdat - .weak _Z13test_constantIf19custom_constant_addIfEEvPT_iPKc + .weak _Z13test_constantIf19custom_constant_addIfEEvPT_iPKc # -- Begin function _Z13test_constantIf19custom_constant_addIfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf19custom_constant_addIfEEvPT_iPKc,@function _Z13test_constantIf19custom_constant_addIfEEvPT_iPKc: # @_Z13test_constantIf19custom_constant_addIfEEvPT_iPKc @@ -55309,15 +54725,22 @@ _Z13test_constantIf19custom_constant_addIfEEvPT_iPKc: # @_Z13test_constantIf19cu # %bb.1: # %.preheader.lr.ph blez $s0, .LBB267_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI267_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI267_0) - pcalau12i $a0, %pc_hi20(.LCPI267_2) - fld.d $fs1, $a0, %pc_lo12(.LCPI267_2) - pcalau12i $a0, %pc_hi20(.LCPI267_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI267_1) - movgr2fr.w $fs3, $zero + movgr2fr.w $fs0, $zero vldi $vr3, -1244 pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -55332,7 +54755,7 @@ _Z13test_constantIf19custom_constant_addIfEEvPT_iPKc: # @_Z13test_constantIf19cu # Child Loop BB267_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs3 + fmov.s $fa0, $fs0 .p2align 4, , 16 .LBB267_5: # Parent Loop BB267_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -55347,16 +54770,16 @@ _Z13test_constantIf19custom_constant_addIfEEvPT_iPKc: # @_Z13test_constantIf19cu fld.d $fa1, $s6, %pc_lo12(init_value) fcvt.s.d $fa1, $fa1 fadd.s $fa1, $fa1, $fa3 - fmul.s $fa1, $fa1, $fs0 + fmul.s $fa1, $fa1, $fs1 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB267_3 # %bb.7: # in Loop: Header=BB267_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -55369,12 +54792,16 @@ _Z13test_constantIf19custom_constant_addIfEEvPT_iPKc: # @_Z13test_constantIf19cu .LBB267_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI267_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI267_0) - pcalau12i $a0, %pc_hi20(.LCPI267_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI267_1) vldi $vr2, -1244 - movgr2fr.w $fs2, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs0, $a0 + movgr2fr.w $fs1, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -55389,10 +54816,10 @@ _Z13test_constantIf19custom_constant_addIfEEvPT_iPKc: # @_Z13test_constantIf19cu fcvt.s.d $fa1, $fa0 fadd.s $fa1, $fa1, $fa2 fmul.s $fa1, $fa1, $fs0 - fadd.s $fa1, $fa1, $fs2 + fadd.s $fa1, $fa1, $fs1 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB267_9 # %bb.11: # in Loop: Header=BB267_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -55431,12 +54858,14 @@ _Z13test_constantIf19custom_constant_addIfEEvPT_iPKc: # @_Z13test_constantIf19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB267_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI267_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI267_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -55472,20 +54901,8 @@ _Z13test_constantIf19custom_constant_addIfEEvPT_iPKc: # @_Z13test_constantIf19cu .size _Z13test_constantIf19custom_constant_addIfEEvPT_iPKc, .Lfunc_end267-_Z13test_constantIf19custom_constant_addIfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc -.LCPI268_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI268_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI268_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI268_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc,comdat - .weak _Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc + .weak _Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc # -- Begin function _Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc,@function _Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc: # @_Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc @@ -55538,15 +54955,22 @@ _Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc: # @_Z13test_const movgr2fr.w $fs0, $zero vldi $vr3, -1168 vldi $vr4, -1280 - pcalau12i $a0, %pc_hi20(.LCPI268_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI268_0) - pcalau12i $a0, %pc_hi20(.LCPI268_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI268_2) - pcalau12i $a0, %pc_hi20(.LCPI268_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI268_1) vldi $vr5, -1272 vldi $vr6, -1264 pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -55609,13 +55033,17 @@ _Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc: # @_Z13test_const fld.d $fa0, $s1, %pc_lo12(init_value) vldi $vr2, -1168 vldi $vr3, -1280 - pcalau12i $a0, %pc_hi20(.LCPI268_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI268_0) - pcalau12i $a0, %pc_hi20(.LCPI268_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI268_1) vldi $vr4, -1272 vldi $vr5, -1264 - movgr2fr.w $fs2, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs0, $a0 + movgr2fr.w $fs1, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -55633,10 +55061,10 @@ _Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc: # @_Z13test_const fadd.s $fa1, $fa1, $fa4 fadd.s $fa1, $fa1, $fa5 fmul.s $fa1, $fa1, $fs0 - fadd.s $fa1, $fa1, $fs2 + fadd.s $fa1, $fa1, $fs1 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB268_9 # %bb.11: # in Loop: Header=BB268_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -55678,12 +55106,14 @@ _Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB268_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI268_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI268_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -55719,20 +55149,8 @@ _Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc, .Lfunc_end268-_Z13test_constantIf28custom_multiple_constant_addIfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf19custom_constant_subIfEEvPT_iPKc -.LCPI269_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI269_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI269_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI269_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf19custom_constant_subIfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf19custom_constant_subIfEEvPT_iPKc,comdat - .weak _Z13test_constantIf19custom_constant_subIfEEvPT_iPKc + .weak _Z13test_constantIf19custom_constant_subIfEEvPT_iPKc # -- Begin function _Z13test_constantIf19custom_constant_subIfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf19custom_constant_subIfEEvPT_iPKc,@function _Z13test_constantIf19custom_constant_subIfEEvPT_iPKc: # @_Z13test_constantIf19custom_constant_subIfEEvPT_iPKc @@ -55782,15 +55200,22 @@ _Z13test_constantIf19custom_constant_subIfEEvPT_iPKc: # @_Z13test_constantIf19cu # %bb.1: # %.preheader.lr.ph blez $s0, .LBB269_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI269_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI269_0) - pcalau12i $a0, %pc_hi20(.LCPI269_2) - fld.d $fs1, $a0, %pc_lo12(.LCPI269_2) - pcalau12i $a0, %pc_hi20(.LCPI269_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI269_1) - movgr2fr.w $fs3, $zero + movgr2fr.w $fs0, $zero vldi $vr3, -1116 pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -55805,7 +55230,7 @@ _Z13test_constantIf19custom_constant_subIfEEvPT_iPKc: # @_Z13test_constantIf19cu # Child Loop BB269_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs3 + fmov.s $fa0, $fs0 .p2align 4, , 16 .LBB269_5: # Parent Loop BB269_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -55820,16 +55245,16 @@ _Z13test_constantIf19custom_constant_subIfEEvPT_iPKc: # @_Z13test_constantIf19cu fld.d $fa1, $s6, %pc_lo12(init_value) fcvt.s.d $fa1, $fa1 fadd.s $fa1, $fa1, $fa3 - fmul.s $fa1, $fa1, $fs0 + fmul.s $fa1, $fa1, $fs1 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB269_3 # %bb.7: # in Loop: Header=BB269_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -55842,12 +55267,16 @@ _Z13test_constantIf19custom_constant_subIfEEvPT_iPKc: # @_Z13test_constantIf19cu .LBB269_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI269_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI269_0) - pcalau12i $a0, %pc_hi20(.LCPI269_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI269_1) vldi $vr2, -1116 - movgr2fr.w $fs2, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs0, $a0 + movgr2fr.w $fs1, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -55862,10 +55291,10 @@ _Z13test_constantIf19custom_constant_subIfEEvPT_iPKc: # @_Z13test_constantIf19cu fcvt.s.d $fa1, $fa0 fadd.s $fa1, $fa1, $fa2 fmul.s $fa1, $fa1, $fs0 - fadd.s $fa1, $fa1, $fs2 + fadd.s $fa1, $fa1, $fs1 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB269_9 # %bb.11: # in Loop: Header=BB269_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -55904,12 +55333,14 @@ _Z13test_constantIf19custom_constant_subIfEEvPT_iPKc: # @_Z13test_constantIf19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB269_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI269_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI269_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -55945,20 +55376,8 @@ _Z13test_constantIf19custom_constant_subIfEEvPT_iPKc: # @_Z13test_constantIf19cu .size _Z13test_constantIf19custom_constant_subIfEEvPT_iPKc, .Lfunc_end269-_Z13test_constantIf19custom_constant_subIfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc -.LCPI270_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI270_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI270_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI270_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc,comdat - .weak _Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc + .weak _Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc # -- Begin function _Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc,@function _Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc: # @_Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc @@ -56011,15 +55430,22 @@ _Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc: # @_Z13test_const movgr2fr.w $fs0, $zero vldi $vr3, -1040 vldi $vr4, -1152 - pcalau12i $a0, %pc_hi20(.LCPI270_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI270_0) - pcalau12i $a0, %pc_hi20(.LCPI270_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI270_2) - pcalau12i $a0, %pc_hi20(.LCPI270_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI270_1) vldi $vr5, -1144 vldi $vr6, -1136 pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -56082,13 +55508,17 @@ _Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc: # @_Z13test_const fld.d $fa0, $s1, %pc_lo12(init_value) vldi $vr2, -1040 vldi $vr3, -1152 - pcalau12i $a0, %pc_hi20(.LCPI270_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI270_0) - pcalau12i $a0, %pc_hi20(.LCPI270_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI270_1) vldi $vr4, -1144 vldi $vr5, -1136 - movgr2fr.w $fs2, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs0, $a0 + movgr2fr.w $fs1, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -56106,10 +55536,10 @@ _Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc: # @_Z13test_const fadd.s $fa1, $fa1, $fa4 fadd.s $fa1, $fa1, $fa5 fmul.s $fa1, $fa1, $fs0 - fadd.s $fa1, $fa1, $fs2 + fadd.s $fa1, $fa1, $fs1 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB270_9 # %bb.11: # in Loop: Header=BB270_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -56151,12 +55581,14 @@ _Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB270_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI270_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI270_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -56192,24 +55624,8 @@ _Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc, .Lfunc_end270-_Z13test_constantIf28custom_multiple_constant_subIfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc -.LCPI271_0: - .word 0xc2f00000 # float -120 -.LCPI271_1: - .word 0x45fa0000 # float 8000 -.LCPI271_3: - .word 0x42f00000 # float 120 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI271_2: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI271_4: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI271_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc,comdat - .weak _Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc + .weak _Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc # -- Begin function _Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc,@function _Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc: # @_Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc @@ -56263,18 +55679,25 @@ _Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc: # @_Z13test_constantI # %bb.1: # %.preheader.lr.ph blez $s0, .LBB271_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI271_3) - fld.s $fs0, $a0, %pc_lo12(.LCPI271_3) - pcalau12i $a0, %pc_hi20(.LCPI271_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI271_0) - pcalau12i $a0, %pc_hi20(.LCPI271_1) - fld.s $fs2, $a0, %pc_lo12(.LCPI271_1) - pcalau12i $a0, %pc_hi20(.LCPI271_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI271_4) - pcalau12i $a0, %pc_hi20(.LCPI271_2) - fld.d $fs4, $a0, %pc_lo12(.LCPI271_2) - movgr2fr.w $fs5, $zero + movgr2fr.w $fs0, $zero + lu12i.w $a0, 274176 + movgr2fr.w $fs1, $a0 pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -250112 + lu32i.d $a0, 0 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, 286624 + movgr2fr.w $fs3, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -56289,12 +55712,12 @@ _Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc: # @_Z13test_constantI # Child Loop BB271_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs5 + fmov.s $fa0, $fs0 .p2align 4, , 16 .LBB271_5: # Parent Loop BB271_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.s $fa1, $a2, 0 - fmul.s $fa1, $fa1, $fs0 + fmul.s $fa1, $fa1, $fs1 fadd.s $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 4 @@ -56303,17 +55726,17 @@ _Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc: # @_Z13test_constantI # in Loop: Header=BB271_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) fcvt.s.d $fa1, $fa1 - fmul.s $fa1, $fa1, $fs1 fmul.s $fa1, $fa1, $fs2 + fmul.s $fa1, $fa1, $fs3 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB271_3 # %bb.7: # in Loop: Header=BB271_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -56325,13 +55748,17 @@ _Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc: # @_Z13test_constantI .LBB271_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI271_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI271_0) - pcalau12i $a0, %pc_hi20(.LCPI271_1) - fld.s $fs1, $a0, %pc_lo12(.LCPI271_1) - pcalau12i $a0, %pc_hi20(.LCPI271_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI271_2) - movgr2fr.w $fs3, $zero + lu12i.w $a0, -250112 + lu32i.d $a0, 0 + movgr2fr.w $fs0, $a0 + lu12i.w $a0, 286624 + movgr2fr.w $fs1, $a0 + movgr2fr.w $fs2, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -56346,10 +55773,10 @@ _Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc: # @_Z13test_constantI fcvt.s.d $fa1, $fa0 fmul.s $fa1, $fa1, $fs0 fmul.s $fa1, $fa1, $fs1 - fadd.s $fa1, $fa1, $fs3 + fadd.s $fa1, $fa1, $fs2 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB271_9 # %bb.11: # in Loop: Header=BB271_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -56387,12 +55814,14 @@ _Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc: # @_Z13test_constantI move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB271_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI271_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI271_5) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -56430,20 +55859,8 @@ _Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc: # @_Z13test_constantI .size _Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc, .Lfunc_end271-_Z13test_constantIf24custom_constant_multiplyIfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc -.LCPI272_0: - .word 0x45fa0000 # float 8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI272_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI272_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI272_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc,comdat - .weak _Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc + .weak _Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc # -- Begin function _Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc,@function _Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc: # @_Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc @@ -56496,15 +55913,21 @@ _Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc: # @_Z13test_ movgr2fr.w $fs0, $zero vldi $vr3, -1272 vldi $vr4, -1264 - pcalau12i $a0, %pc_hi20(.LCPI272_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI272_0) - pcalau12i $a0, %pc_hi20(.LCPI272_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI272_2) - pcalau12i $a0, %pc_hi20(.LCPI272_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI272_1) vldi $vr5, -1260 pcalau12i $s6, %pc_hi20(init_value) vldi $vr6, -1144 + lu12i.w $a0, 286624 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -56566,13 +55989,16 @@ _Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc: # @_Z13test_ pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) vldi $vr2, -1144 - pcalau12i $a0, %pc_hi20(.LCPI272_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI272_0) - pcalau12i $a0, %pc_hi20(.LCPI272_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI272_1) vldi $vr3, -1264 vldi $vr4, -1260 - movgr2fr.w $fs2, $zero + lu12i.w $a0, 286624 + movgr2fr.w $fs0, $a0 + movgr2fr.w $fs1, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -56590,10 +56016,10 @@ _Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc: # @_Z13test_ fmul.s $fa1, $fa1, $fa3 fmul.s $fa1, $fa1, $fa4 fmul.s $fa1, $fa1, $fs0 - fadd.s $fa1, $fa1, $fs2 + fadd.s $fa1, $fa1, $fs1 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB272_9 # %bb.11: # in Loop: Header=BB272_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -56634,12 +56060,14 @@ _Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc: # @_Z13test_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB272_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI272_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI272_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -56675,22 +56103,8 @@ _Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc, .Lfunc_end272-_Z13test_constantIf33custom_multiple_constant_multiplyIfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc -.LCPI273_0: - .word 0x42f00000 # float 120 -.LCPI273_1: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI273_2: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI273_3: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI273_4: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc,comdat - .weak _Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc + .weak _Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc # -- Begin function _Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc,@function _Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc: # @_Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc @@ -56742,16 +56156,23 @@ _Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc: # @_Z13test # %bb.1: # %.preheader.lr.ph blez $s0, .LBB273_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI273_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI273_0) - pcalau12i $a0, %pc_hi20(.LCPI273_1) - fld.s $fs1, $a0, %pc_lo12(.LCPI273_1) - pcalau12i $a0, %pc_hi20(.LCPI273_3) - fld.d $fs2, $a0, %pc_lo12(.LCPI273_3) - pcalau12i $a0, %pc_hi20(.LCPI273_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI273_2) - movgr2fr.w $fs4, $zero + movgr2fr.w $fs0, $zero + lu12i.w $a0, 274176 + movgr2fr.w $fs1, $a0 pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -56766,12 +56187,12 @@ _Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc: # @_Z13test # Child Loop BB273_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs4 + fmov.s $fa0, $fs0 .p2align 4, , 16 .LBB273_5: # Parent Loop BB273_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.s $fa1, $a2, 0 - fadd.s $fa1, $fa1, $fs0 + fadd.s $fa1, $fa1, $fs1 fadd.s $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 4 @@ -56780,17 +56201,17 @@ _Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc: # @_Z13test # in Loop: Header=BB273_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) fcvt.s.d $fa1, $fa1 - fadd.s $fa1, $fa1, $fs0 - fmul.s $fa1, $fa1, $fs1 + fadd.s $fa1, $fa1, $fs1 + fmul.s $fa1, $fa1, $fs2 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs2, $fa2 + fcmp.clt.d $fcc0, $fs3, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs3 + fcmp.clt.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB273_3 # %bb.7: # in Loop: Header=BB273_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -56802,13 +56223,17 @@ _Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc: # @_Z13test .LBB273_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI273_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI273_0) - pcalau12i $a0, %pc_hi20(.LCPI273_1) - fld.s $fs1, $a0, %pc_lo12(.LCPI273_1) - pcalau12i $a0, %pc_hi20(.LCPI273_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI273_2) - movgr2fr.w $fs3, $zero + lu12i.w $a0, 274176 + movgr2fr.w $fs0, $a0 + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + movgr2fr.w $fs2, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -56823,10 +56248,10 @@ _Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc: # @_Z13test fcvt.s.d $fa1, $fa0 fadd.s $fa1, $fa1, $fs0 fmul.s $fa1, $fa1, $fs1 - fadd.s $fa1, $fa1, $fs3 + fadd.s $fa1, $fa1, $fs2 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB273_9 # %bb.11: # in Loop: Header=BB273_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -56864,12 +56289,14 @@ _Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc: # @_Z13test move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB273_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI273_4) - fld.d $fa0, $a1, %pc_lo12(.LCPI273_4) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -56906,20 +56333,8 @@ _Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc: # @_Z13test .size _Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc, .Lfunc_end273-_Z13test_constantIf34custom_multiple_constant_multiply2IfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc -.LCPI274_0: - .word 0x45fa0000 # float 8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI274_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI274_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI274_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc,comdat - .weak _Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc + .weak _Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc # -- Begin function _Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc,@function _Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc: # @_Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc @@ -56970,15 +56385,21 @@ _Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc: # @_Z13test_constantIf2 blez $s0, .LBB274_8 # %bb.2: # %.preheader.us.preheader movgr2fr.w $fs0, $zero - pcalau12i $a0, %pc_hi20(.LCPI274_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI274_0) - pcalau12i $a0, %pc_hi20(.LCPI274_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI274_2) - pcalau12i $a0, %pc_hi20(.LCPI274_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI274_1) vldi $vr3, -1260 pcalau12i $s6, %pc_hi20(init_value) vldi $vr4, -1132 + lu12i.w $a0, 286624 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -57031,12 +56452,15 @@ _Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc: # @_Z13test_constantIf2 .LBB274_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI274_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI274_0) - pcalau12i $a0, %pc_hi20(.LCPI274_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI274_1) vldi $vr2, -1132 - movgr2fr.w $fs2, $zero + lu12i.w $a0, 286624 + movgr2fr.w $fs0, $a0 + movgr2fr.w $fs1, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -57051,10 +56475,10 @@ _Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc: # @_Z13test_constantIf2 fcvt.s.d $fa1, $fa0 fdiv.s $fa1, $fa1, $fa2 fmul.s $fa1, $fa1, $fs0 - fadd.s $fa1, $fa1, $fs2 + fadd.s $fa1, $fa1, $fs1 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB274_9 # %bb.11: # in Loop: Header=BB274_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -57093,12 +56517,14 @@ _Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc: # @_Z13test_constantIf2 move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB274_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI274_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI274_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -57134,20 +56560,8 @@ _Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc: # @_Z13test_constantIf2 .size _Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc, .Lfunc_end274-_Z13test_constantIf22custom_constant_divideIfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc -.LCPI275_0: - .word 0x45fa0000 # float 8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI275_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI275_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI275_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc,comdat - .weak _Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc + .weak _Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc # -- Begin function _Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc,@function _Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc: # @_Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc @@ -57201,15 +56615,21 @@ _Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc: # @_Z13test_co vldi $vr3, -1184 vldi $vr4, -1272 vldi $vr5, -1200 - pcalau12i $a0, %pc_hi20(.LCPI275_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI275_0) - pcalau12i $a0, %pc_hi20(.LCPI275_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI275_2) - pcalau12i $a0, %pc_hi20(.LCPI275_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI275_1) vldi $vr6, -1260 pcalau12i $s6, %pc_hi20(init_value) vldi $vr7, -1056 + lu12i.w $a0, 286624 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -57273,13 +56693,16 @@ _Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc: # @_Z13test_co fld.d $fa0, $s1, %pc_lo12(init_value) vldi $vr2, -1056 vldi $vr3, -1272 - pcalau12i $a0, %pc_hi20(.LCPI275_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI275_0) - pcalau12i $a0, %pc_hi20(.LCPI275_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI275_1) vldi $vr4, -1200 vldi $vr5, -1260 - movgr2fr.w $fs2, $zero + lu12i.w $a0, 286624 + movgr2fr.w $fs0, $a0 + movgr2fr.w $fs1, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -57297,10 +56720,10 @@ _Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc: # @_Z13test_co fmul.s $fa1, $fa1, $fa4 fdiv.s $fa1, $fa1, $fa5 fmul.s $fa1, $fa1, $fs0 - fadd.s $fa1, $fa1, $fs2 + fadd.s $fa1, $fa1, $fs1 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB275_9 # %bb.11: # in Loop: Header=BB275_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -57342,12 +56765,14 @@ _Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc: # @_Z13test_co move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB275_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI275_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI275_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -57383,20 +56808,8 @@ _Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc: # @_Z13test_co .size _Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc, .Lfunc_end275-_Z13test_constantIf31custom_multiple_constant_divideIfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc -.LCPI276_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI276_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI276_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI276_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc,comdat - .weak _Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc + .weak _Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc # -- Begin function _Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc,@function _Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc: # @_Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc @@ -57446,15 +56859,22 @@ _Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc: # @_Z13test_c # %bb.1: # %.preheader.lr.ph blez $s0, .LBB276_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI276_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI276_0) - pcalau12i $a0, %pc_hi20(.LCPI276_2) - fld.d $fs1, $a0, %pc_lo12(.LCPI276_2) - pcalau12i $a0, %pc_hi20(.LCPI276_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI276_1) - movgr2fr.w $fs3, $zero + movgr2fr.w $fs0, $zero vldi $vr3, -1280 pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -57469,7 +56889,7 @@ _Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc: # @_Z13test_c # Child Loop BB276_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs3 + fmov.s $fa0, $fs0 .p2align 4, , 16 .LBB276_5: # Parent Loop BB276_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -57484,16 +56904,16 @@ _Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc: # @_Z13test_c fld.d $fa1, $s6, %pc_lo12(init_value) fcvt.s.d $fa1, $fa1 fadd.s $fa1, $fa1, $fa3 - fmul.s $fa1, $fa1, $fs0 + fmul.s $fa1, $fa1, $fs1 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB276_3 # %bb.7: # in Loop: Header=BB276_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -57506,12 +56926,16 @@ _Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc: # @_Z13test_c .LBB276_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI276_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI276_0) - pcalau12i $a0, %pc_hi20(.LCPI276_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI276_1) vldi $vr2, -1280 - movgr2fr.w $fs2, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs0, $a0 + movgr2fr.w $fs1, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -57526,10 +56950,10 @@ _Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc: # @_Z13test_c fcvt.s.d $fa1, $fa0 fadd.s $fa1, $fa1, $fa2 fmul.s $fa1, $fa1, $fs0 - fadd.s $fa1, $fa1, $fs2 + fadd.s $fa1, $fa1, $fs1 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB276_9 # %bb.11: # in Loop: Header=BB276_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -57568,12 +56992,14 @@ _Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc: # @_Z13test_c move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB276_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI276_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI276_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -57609,22 +57035,8 @@ _Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc: # @_Z13test_c .size _Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc, .Lfunc_end276-_Z13test_constantIf32custom_multiple_constant_divide2IfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc -.LCPI277_0: - .word 0xc019999a # float -2.4000001 -.LCPI277_1: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI277_2: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI277_3: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI277_4: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc,"axG",@progbits,_Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc,comdat - .weak _Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc + .weak _Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc # -- Begin function _Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc .p2align 5 .type _Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc,@function _Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc: # @_Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc @@ -57676,17 +57088,26 @@ _Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc: # @_Z13test_con # %bb.1: # %.preheader.lr.ph blez $s0, .LBB277_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI277_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI277_0) - pcalau12i $a0, %pc_hi20(.LCPI277_1) - fld.s $fs1, $a0, %pc_lo12(.LCPI277_1) - pcalau12i $a0, %pc_hi20(.LCPI277_3) - fld.d $fs2, $a0, %pc_lo12(.LCPI277_3) - pcalau12i $a0, %pc_hi20(.LCPI277_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI277_2) - movgr2fr.w $fs4, $zero + movgr2fr.w $fs0, $zero vldi $vr3, -1280 + lu12i.w $a0, -261735 + ori $a0, $a0, 2458 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -57701,13 +57122,13 @@ _Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc: # @_Z13test_con # Child Loop BB277_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs4 + fmov.s $fa0, $fs0 .p2align 4, , 16 .LBB277_5: # Parent Loop BB277_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.s $fa1, $a2, 0 fadd.s $fa1, $fa1, $fa3 - fadd.s $fa1, $fa1, $fs0 + fadd.s $fa1, $fa1, $fs1 fadd.s $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 4 @@ -57717,17 +57138,17 @@ _Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc: # @_Z13test_con fld.d $fa1, $s6, %pc_lo12(init_value) fcvt.s.d $fa1, $fa1 fadd.s $fa1, $fa1, $fa3 - fadd.s $fa1, $fa1, $fs0 - fmul.s $fa1, $fa1, $fs1 + fadd.s $fa1, $fa1, $fs1 + fmul.s $fa1, $fa1, $fs2 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs2, $fa2 + fcmp.clt.d $fcc0, $fs3, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs3 + fcmp.clt.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB277_3 # %bb.7: # in Loop: Header=BB277_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -57740,14 +57161,20 @@ _Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc: # @_Z13test_con .LBB277_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI277_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI277_0) - pcalau12i $a0, %pc_hi20(.LCPI277_1) - fld.s $fs1, $a0, %pc_lo12(.LCPI277_1) - pcalau12i $a0, %pc_hi20(.LCPI277_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI277_2) vldi $vr2, -1280 - movgr2fr.w $fs3, $zero + lu12i.w $a0, -261735 + ori $a0, $a0, 2458 + lu32i.d $a0, 0 + movgr2fr.w $fs0, $a0 + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + movgr2fr.w $fs2, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -57763,10 +57190,10 @@ _Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc: # @_Z13test_con fadd.s $fa1, $fa1, $fa2 fadd.s $fa1, $fa1, $fs0 fmul.s $fa1, $fa1, $fs1 - fadd.s $fa1, $fa1, $fs3 + fadd.s $fa1, $fa1, $fs2 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB277_9 # %bb.11: # in Loop: Header=BB277_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -57805,12 +57232,14 @@ _Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc: # @_Z13test_con move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB277_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI277_4) - fld.d $fa0, $a1, %pc_lo12(.LCPI277_4) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -57847,18 +57276,8 @@ _Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc: # @_Z13test_con .size _Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc, .Lfunc_end277-_Z13test_constantIf30custom_multiple_constant_mixedIfEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId10custom_twoIdEEvPT_iPKc -.LCPI278_0: - .dword 0xc0cf400000000000 # double -16000 -.LCPI278_1: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI278_2: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI278_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId10custom_twoIdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId10custom_twoIdEEvPT_iPKc,comdat - .weak _Z13test_constantId10custom_twoIdEEvPT_iPKc + .weak _Z13test_constantId10custom_twoIdEEvPT_iPKc # -- Begin function _Z13test_constantId10custom_twoIdEEvPT_iPKc .p2align 5 .type _Z13test_constantId10custom_twoIdEEvPT_iPKc,@function _Z13test_constantId10custom_twoIdEEvPT_iPKc: # @_Z13test_constantId10custom_twoIdEEvPT_iPKc @@ -57903,14 +57322,22 @@ _Z13test_constantId10custom_twoIdEEvPT_iPKc: # @_Z13test_constantId10custom_twoI # %bb.1: # %.preheader.lr.ph blez $s0, .LBB278_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI278_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI278_0) - pcalau12i $a0, %pc_hi20(.LCPI278_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI278_1) - pcalau12i $a0, %pc_hi20(.LCPI278_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI278_2) - movgr2fr.d $fs3, $zero + movgr2fr.d $fs0, $zero vldi $vr3, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1012 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s1, $a0, %pc_lo12(.L.str.299) move $s5, $zero @@ -57924,7 +57351,7 @@ _Z13test_constantId10custom_twoIdEEvPT_iPKc: # @_Z13test_constantId10custom_twoI # =>This Loop Header: Depth=1 # Child Loop BB278_5 Depth 2 move $a0, $s0 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB278_5: # Parent Loop BB278_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -57933,13 +57360,13 @@ _Z13test_constantId10custom_twoIdEEvPT_iPKc: # @_Z13test_constantId10custom_twoI bnez $a0, .LBB278_5 # %bb.6: # %._crit_edge.us # in Loop: Header=BB278_4 Depth=1 - fadd.d $fa1, $fa0, $fs0 + fadd.d $fa1, $fa0, $fs1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB278_3 # %bb.7: # in Loop: Header=BB278_4 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -57991,12 +57418,14 @@ _Z13test_constantId10custom_twoIdEEvPT_iPKc: # @_Z13test_constantId10custom_twoI move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB278_14: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI278_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI278_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -58030,18 +57459,8 @@ _Z13test_constantId10custom_twoIdEEvPT_iPKc: # @_Z13test_constantId10custom_twoI .size _Z13test_constantId10custom_twoIdEEvPT_iPKc, .Lfunc_end278-_Z13test_constantId10custom_twoIdEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId20custom_add_constantsIdEEvPT_iPKc -.LCPI279_0: - .dword 0xc0d7700000000000 # double -24000 -.LCPI279_1: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI279_2: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI279_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId20custom_add_constantsIdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId20custom_add_constantsIdEEvPT_iPKc,comdat - .weak _Z13test_constantId20custom_add_constantsIdEEvPT_iPKc + .weak _Z13test_constantId20custom_add_constantsIdEEvPT_iPKc # -- Begin function _Z13test_constantId20custom_add_constantsIdEEvPT_iPKc .p2align 5 .type _Z13test_constantId20custom_add_constantsIdEEvPT_iPKc,@function _Z13test_constantId20custom_add_constantsIdEEvPT_iPKc: # @_Z13test_constantId20custom_add_constantsIdEEvPT_iPKc @@ -58086,14 +57505,22 @@ _Z13test_constantId20custom_add_constantsIdEEvPT_iPKc: # @_Z13test_constantId20c # %bb.1: # %.preheader.lr.ph blez $s0, .LBB279_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI279_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI279_0) - pcalau12i $a0, %pc_hi20(.LCPI279_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI279_1) - pcalau12i $a0, %pc_hi20(.LCPI279_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI279_2) - movgr2fr.d $fs3, $zero + movgr2fr.d $fs0, $zero vldi $vr3, -1016 + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, -1011 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s1, $a0, %pc_lo12(.L.str.299) move $s5, $zero @@ -58107,7 +57534,7 @@ _Z13test_constantId20custom_add_constantsIdEEvPT_iPKc: # @_Z13test_constantId20c # =>This Loop Header: Depth=1 # Child Loop BB279_5 Depth 2 move $a0, $s0 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB279_5: # Parent Loop BB279_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -58116,13 +57543,13 @@ _Z13test_constantId20custom_add_constantsIdEEvPT_iPKc: # @_Z13test_constantId20c bnez $a0, .LBB279_5 # %bb.6: # %._crit_edge.us # in Loop: Header=BB279_4 Depth=1 - fadd.d $fa1, $fa0, $fs0 + fadd.d $fa1, $fa0, $fs1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB279_3 # %bb.7: # in Loop: Header=BB279_4 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -58174,12 +57601,14 @@ _Z13test_constantId20custom_add_constantsIdEEvPT_iPKc: # @_Z13test_constantId20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB279_14: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI279_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI279_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -58213,18 +57642,8 @@ _Z13test_constantId20custom_add_constantsIdEEvPT_iPKc: # @_Z13test_constantId20c .size _Z13test_constantId20custom_add_constantsIdEEvPT_iPKc, .Lfunc_end279-_Z13test_constantId20custom_add_constantsIdEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc -.LCPI280_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI280_1: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI280_2: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI280_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc,comdat - .weak _Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc + .weak _Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc # -- Begin function _Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc .p2align 5 .type _Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc,@function _Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc: # @_Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc @@ -58269,14 +57688,22 @@ _Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc: # @_Z13test_constantId20c # %bb.1: # %.preheader.lr.ph blez $s0, .LBB280_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI280_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI280_0) - pcalau12i $a0, %pc_hi20(.LCPI280_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI280_1) - pcalau12i $a0, %pc_hi20(.LCPI280_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI280_2) - movgr2fr.d $fs3, $zero + movgr2fr.d $fs0, $zero vldi $vr3, -912 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s1, $a0, %pc_lo12(.L.str.299) move $s5, $zero @@ -58290,7 +57717,7 @@ _Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc: # @_Z13test_constantId20c # =>This Loop Header: Depth=1 # Child Loop BB280_5 Depth 2 move $a0, $s0 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB280_5: # Parent Loop BB280_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -58299,13 +57726,13 @@ _Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc: # @_Z13test_constantId20c bnez $a0, .LBB280_5 # %bb.6: # %._crit_edge.us # in Loop: Header=BB280_4 Depth=1 - fadd.d $fa1, $fa0, $fs0 + fadd.d $fa1, $fa0, $fs1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB280_3 # %bb.7: # in Loop: Header=BB280_4 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -58357,12 +57784,14 @@ _Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc: # @_Z13test_constantId20c move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB280_14: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI280_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI280_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -58396,18 +57825,8 @@ _Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc: # @_Z13test_constantId20c .size _Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc, .Lfunc_end280-_Z13test_constantId20custom_sub_constantsIdEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc -.LCPI281_0: - .dword 0xc0e7700000000000 # double -48000 -.LCPI281_1: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI281_2: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI281_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc,comdat - .weak _Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc + .weak _Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc # -- Begin function _Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc .p2align 5 .type _Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc,@function _Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc: # @_Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc @@ -58452,14 +57871,22 @@ _Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc: # @_Z13test_constant # %bb.1: # %.preheader.lr.ph blez $s0, .LBB281_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI281_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI281_0) - pcalau12i $a0, %pc_hi20(.LCPI281_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI281_1) - pcalau12i $a0, %pc_hi20(.LCPI281_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI281_2) - movgr2fr.d $fs3, $zero + movgr2fr.d $fs0, $zero vldi $vr3, -1000 + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, -1010 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s1, $a0, %pc_lo12(.L.str.299) move $s5, $zero @@ -58473,7 +57900,7 @@ _Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc: # @_Z13test_constant # =>This Loop Header: Depth=1 # Child Loop BB281_5 Depth 2 move $a0, $s0 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB281_5: # Parent Loop BB281_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -58482,13 +57909,13 @@ _Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc: # @_Z13test_constant bnez $a0, .LBB281_5 # %bb.6: # %._crit_edge.us # in Loop: Header=BB281_4 Depth=1 - fadd.d $fa1, $fa0, $fs0 + fadd.d $fa1, $fa0, $fs1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB281_3 # %bb.7: # in Loop: Header=BB281_4 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -58540,12 +57967,14 @@ _Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc: # @_Z13test_constant move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB281_14: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI281_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI281_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -58579,18 +58008,8 @@ _Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc: # @_Z13test_constant .size _Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc, .Lfunc_end281-_Z13test_constantId25custom_multiply_constantsIdEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc -.LCPI282_0: - .dword 0xc0cf400000000000 # double -16000 -.LCPI282_1: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI282_2: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI282_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc,comdat - .weak _Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc + .weak _Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc # -- Begin function _Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc .p2align 5 .type _Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc,@function _Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc: # @_Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc @@ -58635,14 +58054,22 @@ _Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc: # @_Z13test_constantId # %bb.1: # %.preheader.lr.ph blez $s0, .LBB282_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI282_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI282_0) - pcalau12i $a0, %pc_hi20(.LCPI282_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI282_1) - pcalau12i $a0, %pc_hi20(.LCPI282_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI282_2) - movgr2fr.d $fs3, $zero + movgr2fr.d $fs0, $zero vldi $vr3, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1012 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s1, $a0, %pc_lo12(.L.str.299) move $s5, $zero @@ -58656,7 +58083,7 @@ _Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc: # @_Z13test_constantId # =>This Loop Header: Depth=1 # Child Loop BB282_5 Depth 2 move $a0, $s0 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB282_5: # Parent Loop BB282_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -58665,13 +58092,13 @@ _Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc: # @_Z13test_constantId bnez $a0, .LBB282_5 # %bb.6: # %._crit_edge.us # in Loop: Header=BB282_4 Depth=1 - fadd.d $fa1, $fa0, $fs0 + fadd.d $fa1, $fa0, $fs1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB282_3 # %bb.7: # in Loop: Header=BB282_4 Depth=1 ld.w $a1, $s2, %pc_lo12(current_test) @@ -58723,12 +58150,14 @@ _Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc: # @_Z13test_constantId move $a2, $a0 ld.w $a0, $s2, %pc_lo12(current_test) .LBB282_14: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI282_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI282_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -58762,18 +58191,8 @@ _Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc: # @_Z13test_constantId .size _Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc, .Lfunc_end282-_Z13test_constantId23custom_divide_constantsIdEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId19custom_constant_addIdEEvPT_iPKc -.LCPI283_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI283_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI283_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI283_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId19custom_constant_addIdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId19custom_constant_addIdEEvPT_iPKc,comdat - .weak _Z13test_constantId19custom_constant_addIdEEvPT_iPKc + .weak _Z13test_constantId19custom_constant_addIdEEvPT_iPKc # -- Begin function _Z13test_constantId19custom_constant_addIdEEvPT_iPKc .p2align 5 .type _Z13test_constantId19custom_constant_addIdEEvPT_iPKc,@function _Z13test_constantId19custom_constant_addIdEEvPT_iPKc: # @_Z13test_constantId19custom_constant_addIdEEvPT_iPKc @@ -58823,15 +58242,23 @@ _Z13test_constantId19custom_constant_addIdEEvPT_iPKc: # @_Z13test_constantId19cu # %bb.1: # %.preheader.lr.ph blez $s0, .LBB283_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI283_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI283_0) - pcalau12i $a0, %pc_hi20(.LCPI283_2) - fld.d $fs1, $a0, %pc_lo12(.LCPI283_2) - pcalau12i $a0, %pc_hi20(.LCPI283_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI283_1) - movgr2fr.d $fs3, $zero + movgr2fr.d $fs0, $zero vldi $vr3, -988 pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -58846,7 +58273,7 @@ _Z13test_constantId19custom_constant_addIdEEvPT_iPKc: # @_Z13test_constantId19cu # Child Loop BB283_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB283_5: # Parent Loop BB283_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -58860,14 +58287,14 @@ _Z13test_constantId19custom_constant_addIdEEvPT_iPKc: # @_Z13test_constantId19cu # in Loop: Header=BB283_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) fadd.d $fa1, $fa1, $fa3 - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB283_3 # %bb.7: # in Loop: Header=BB283_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -58880,12 +58307,17 @@ _Z13test_constantId19custom_constant_addIdEEvPT_iPKc: # @_Z13test_constantId19cu .LBB283_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI283_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI283_0) - pcalau12i $a0, %pc_hi20(.LCPI283_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI283_1) vldi $vr2, -988 - movgr2fr.d $fs2, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs0, $a0 + movgr2fr.d $fs1, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -58899,9 +58331,9 @@ _Z13test_constantId19custom_constant_addIdEEvPT_iPKc: # @_Z13test_constantId19cu # =>This Inner Loop Header: Depth=1 fadd.d $fa1, $fa0, $fa2 fmul.d $fa1, $fa1, $fs0 - fadd.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB283_9 # %bb.11: # in Loop: Header=BB283_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -58940,12 +58372,14 @@ _Z13test_constantId19custom_constant_addIdEEvPT_iPKc: # @_Z13test_constantId19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB283_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI283_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI283_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -58981,18 +58415,8 @@ _Z13test_constantId19custom_constant_addIdEEvPT_iPKc: # @_Z13test_constantId19cu .size _Z13test_constantId19custom_constant_addIdEEvPT_iPKc, .Lfunc_end283-_Z13test_constantId19custom_constant_addIdEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc -.LCPI284_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI284_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI284_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI284_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc,comdat - .weak _Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc + .weak _Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc # -- Begin function _Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc .p2align 5 .type _Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc,@function _Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc: # @_Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc @@ -59045,15 +58469,23 @@ _Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc: # @_Z13test_const movgr2fr.d $fs0, $zero vldi $vr3, -912 vldi $vr4, -1024 - pcalau12i $a0, %pc_hi20(.LCPI284_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI284_0) - pcalau12i $a0, %pc_hi20(.LCPI284_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI284_2) - pcalau12i $a0, %pc_hi20(.LCPI284_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI284_1) vldi $vr5, -1016 vldi $vr6, -1008 pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -59113,13 +58545,18 @@ _Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc: # @_Z13test_const fld.d $fa0, $s1, %pc_lo12(init_value) vldi $vr2, -912 vldi $vr3, -1024 - pcalau12i $a0, %pc_hi20(.LCPI284_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI284_0) - pcalau12i $a0, %pc_hi20(.LCPI284_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI284_1) vldi $vr4, -1016 vldi $vr5, -1008 - movgr2fr.d $fs2, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs0, $a0 + movgr2fr.d $fs1, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -59136,9 +58573,9 @@ _Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc: # @_Z13test_const fadd.d $fa1, $fa1, $fa4 fadd.d $fa1, $fa1, $fa5 fmul.d $fa1, $fa1, $fs0 - fadd.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB284_9 # %bb.11: # in Loop: Header=BB284_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -59180,12 +58617,14 @@ _Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB284_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI284_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI284_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -59221,18 +58660,8 @@ _Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc, .Lfunc_end284-_Z13test_constantId28custom_multiple_constant_addIdEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId19custom_constant_subIdEEvPT_iPKc -.LCPI285_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI285_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI285_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI285_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId19custom_constant_subIdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId19custom_constant_subIdEEvPT_iPKc,comdat - .weak _Z13test_constantId19custom_constant_subIdEEvPT_iPKc + .weak _Z13test_constantId19custom_constant_subIdEEvPT_iPKc # -- Begin function _Z13test_constantId19custom_constant_subIdEEvPT_iPKc .p2align 5 .type _Z13test_constantId19custom_constant_subIdEEvPT_iPKc,@function _Z13test_constantId19custom_constant_subIdEEvPT_iPKc: # @_Z13test_constantId19custom_constant_subIdEEvPT_iPKc @@ -59282,15 +58711,23 @@ _Z13test_constantId19custom_constant_subIdEEvPT_iPKc: # @_Z13test_constantId19cu # %bb.1: # %.preheader.lr.ph blez $s0, .LBB285_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI285_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI285_0) - pcalau12i $a0, %pc_hi20(.LCPI285_2) - fld.d $fs1, $a0, %pc_lo12(.LCPI285_2) - pcalau12i $a0, %pc_hi20(.LCPI285_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI285_1) - movgr2fr.d $fs3, $zero + movgr2fr.d $fs0, $zero vldi $vr3, -860 pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -59305,7 +58742,7 @@ _Z13test_constantId19custom_constant_subIdEEvPT_iPKc: # @_Z13test_constantId19cu # Child Loop BB285_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB285_5: # Parent Loop BB285_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -59319,14 +58756,14 @@ _Z13test_constantId19custom_constant_subIdEEvPT_iPKc: # @_Z13test_constantId19cu # in Loop: Header=BB285_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) fadd.d $fa1, $fa1, $fa3 - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB285_3 # %bb.7: # in Loop: Header=BB285_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -59339,12 +58776,17 @@ _Z13test_constantId19custom_constant_subIdEEvPT_iPKc: # @_Z13test_constantId19cu .LBB285_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI285_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI285_0) - pcalau12i $a0, %pc_hi20(.LCPI285_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI285_1) vldi $vr2, -860 - movgr2fr.d $fs2, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs0, $a0 + movgr2fr.d $fs1, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -59358,9 +58800,9 @@ _Z13test_constantId19custom_constant_subIdEEvPT_iPKc: # @_Z13test_constantId19cu # =>This Inner Loop Header: Depth=1 fadd.d $fa1, $fa0, $fa2 fmul.d $fa1, $fa1, $fs0 - fadd.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB285_9 # %bb.11: # in Loop: Header=BB285_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -59399,12 +58841,14 @@ _Z13test_constantId19custom_constant_subIdEEvPT_iPKc: # @_Z13test_constantId19cu move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB285_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI285_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI285_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -59440,18 +58884,8 @@ _Z13test_constantId19custom_constant_subIdEEvPT_iPKc: # @_Z13test_constantId19cu .size _Z13test_constantId19custom_constant_subIdEEvPT_iPKc, .Lfunc_end285-_Z13test_constantId19custom_constant_subIdEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc -.LCPI286_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI286_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI286_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI286_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc,comdat - .weak _Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc + .weak _Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc # -- Begin function _Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc .p2align 5 .type _Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc,@function _Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc: # @_Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc @@ -59504,15 +58938,23 @@ _Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc: # @_Z13test_const movgr2fr.d $fs0, $zero vldi $vr3, -784 vldi $vr4, -896 - pcalau12i $a0, %pc_hi20(.LCPI286_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI286_0) - pcalau12i $a0, %pc_hi20(.LCPI286_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI286_2) - pcalau12i $a0, %pc_hi20(.LCPI286_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI286_1) vldi $vr5, -888 vldi $vr6, -880 pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -59572,13 +59014,18 @@ _Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc: # @_Z13test_const fld.d $fa0, $s1, %pc_lo12(init_value) vldi $vr2, -784 vldi $vr3, -896 - pcalau12i $a0, %pc_hi20(.LCPI286_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI286_0) - pcalau12i $a0, %pc_hi20(.LCPI286_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI286_1) vldi $vr4, -888 vldi $vr5, -880 - movgr2fr.d $fs2, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs0, $a0 + movgr2fr.d $fs1, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -59595,9 +59042,9 @@ _Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc: # @_Z13test_const fadd.d $fa1, $fa1, $fa4 fadd.d $fa1, $fa1, $fa5 fmul.d $fa1, $fa1, $fs0 - fadd.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB286_9 # %bb.11: # in Loop: Header=BB286_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -59639,12 +59086,14 @@ _Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc: # @_Z13test_const move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB286_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI286_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI286_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -59680,22 +59129,8 @@ _Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc: # @_Z13test_const .size _Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc, .Lfunc_end286-_Z13test_constantId28custom_multiple_constant_subIdEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc -.LCPI287_0: - .dword 0xc05e000000000000 # double -120 -.LCPI287_1: - .dword 0x40bf400000000000 # double 8000 -.LCPI287_2: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI287_3: - .dword 0x405e000000000000 # double 120 -.LCPI287_4: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI287_5: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc,comdat - .weak _Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc + .weak _Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc # -- Begin function _Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc .p2align 5 .type _Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc,@function _Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc: # @_Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc @@ -59749,18 +59184,28 @@ _Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc: # @_Z13test_constantI # %bb.1: # %.preheader.lr.ph blez $s0, .LBB287_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI287_3) - fld.d $fs0, $a0, %pc_lo12(.LCPI287_3) - pcalau12i $a0, %pc_hi20(.LCPI287_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI287_0) - pcalau12i $a0, %pc_hi20(.LCPI287_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI287_1) - pcalau12i $a0, %pc_hi20(.LCPI287_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI287_4) - pcalau12i $a0, %pc_hi20(.LCPI287_2) - fld.d $fs4, $a0, %pc_lo12(.LCPI287_2) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -131072 + lu52i.d $a3, $a2, 1029 + movgr2fr.d $fs1, $a3 pcalau12i $s6, %pc_hi20(init_value) + lu52i.d $a2, $a2, -1019 + movgr2fr.d $fs2, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1035 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -59775,12 +59220,12 @@ _Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc: # @_Z13test_constantI # Child Loop BB287_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB287_5: # Parent Loop BB287_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -59788,15 +59233,15 @@ _Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc: # @_Z13test_constantI # %bb.6: # %._crit_edge.us # in Loop: Header=BB287_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) - fmul.d $fa1, $fa1, $fs1 fmul.d $fa1, $fa1, $fs2 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB287_3 # %bb.7: # in Loop: Header=BB287_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -59808,13 +59253,20 @@ _Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc: # @_Z13test_constantI .LBB287_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI287_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI287_0) - pcalau12i $a0, %pc_hi20(.LCPI287_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI287_1) - pcalau12i $a0, %pc_hi20(.LCPI287_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI287_2) - movgr2fr.d $fs3, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -131072 + lu52i.d $a2, $a2, -1019 + movgr2fr.d $fs0, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1035 + movgr2fr.d $fs1, $a0 + movgr2fr.d $fs2, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -59828,9 +59280,9 @@ _Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc: # @_Z13test_constantI # =>This Inner Loop Header: Depth=1 fmul.d $fa1, $fa0, $fs0 fmul.d $fa1, $fa1, $fs1 - fadd.d $fa1, $fa1, $fs3 + fadd.d $fa1, $fa1, $fs2 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB287_9 # %bb.11: # in Loop: Header=BB287_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -59868,12 +59320,14 @@ _Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc: # @_Z13test_constantI move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB287_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI287_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI287_5) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -59911,18 +59365,8 @@ _Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc: # @_Z13test_constantI .size _Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc, .Lfunc_end287-_Z13test_constantId24custom_constant_multiplyIdEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc -.LCPI288_0: - .dword 0x40bf400000000000 # double 8000 -.LCPI288_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI288_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI288_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc,comdat - .weak _Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc + .weak _Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc # -- Begin function _Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc .p2align 5 .type _Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc,@function _Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc: # @_Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc @@ -59975,15 +59419,23 @@ _Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc: # @_Z13test_ movgr2fr.d $fs0, $zero vldi $vr3, -1016 vldi $vr4, -1008 - pcalau12i $a0, %pc_hi20(.LCPI288_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI288_0) - pcalau12i $a0, %pc_hi20(.LCPI288_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI288_2) - pcalau12i $a0, %pc_hi20(.LCPI288_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI288_1) vldi $vr5, -1004 pcalau12i $s6, %pc_hi20(init_value) vldi $vr6, -888 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1035 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -60042,13 +59494,18 @@ _Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc: # @_Z13test_ pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) vldi $vr2, -888 - pcalau12i $a0, %pc_hi20(.LCPI288_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI288_0) - pcalau12i $a0, %pc_hi20(.LCPI288_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI288_1) vldi $vr3, -1008 vldi $vr4, -1004 - movgr2fr.d $fs2, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1035 + movgr2fr.d $fs0, $a0 + movgr2fr.d $fs1, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -60065,9 +59522,9 @@ _Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc: # @_Z13test_ fmul.d $fa1, $fa1, $fa3 fmul.d $fa1, $fa1, $fa4 fmul.d $fa1, $fa1, $fs0 - fadd.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB288_9 # %bb.11: # in Loop: Header=BB288_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -60108,12 +59565,14 @@ _Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc: # @_Z13test_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB288_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI288_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI288_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -60149,20 +59608,8 @@ _Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc: # @_Z13test_ .size _Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc, .Lfunc_end288-_Z13test_constantId33custom_multiple_constant_multiplyIdEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc -.LCPI289_0: - .dword 0x405e000000000000 # double 120 -.LCPI289_1: - .dword 0xc0bf400000000000 # double -8000 -.LCPI289_2: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI289_3: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI289_4: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc,comdat - .weak _Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc + .weak _Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc # -- Begin function _Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc .p2align 5 .type _Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc,@function _Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc: # @_Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc @@ -60214,16 +59661,26 @@ _Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc: # @_Z13test # %bb.1: # %.preheader.lr.ph blez $s0, .LBB289_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI289_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI289_0) - pcalau12i $a0, %pc_hi20(.LCPI289_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI289_1) - pcalau12i $a0, %pc_hi20(.LCPI289_3) - fld.d $fs2, $a0, %pc_lo12(.LCPI289_3) - pcalau12i $a0, %pc_hi20(.LCPI289_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI289_2) - movgr2fr.d $fs4, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -131072 + lu52i.d $a2, $a2, 1029 + movgr2fr.d $fs1, $a2 pcalau12i $s6, %pc_hi20(init_value) + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -60238,12 +59695,12 @@ _Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc: # @_Z13test # Child Loop BB289_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs4 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB289_5: # Parent Loop BB289_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 - fadd.d $fa1, $fa1, $fs0 + fadd.d $fa1, $fa1, $fs1 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -60251,15 +59708,15 @@ _Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc: # @_Z13test # %bb.6: # %._crit_edge.us # in Loop: Header=BB289_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) - fadd.d $fa1, $fa1, $fs0 - fmul.d $fa1, $fa1, $fs1 + fadd.d $fa1, $fa1, $fs1 + fmul.d $fa1, $fa1, $fs2 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs2, $fa2 + fcmp.clt.d $fcc0, $fs3, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs3 + fcmp.clt.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB289_3 # %bb.7: # in Loop: Header=BB289_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -60271,13 +59728,20 @@ _Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc: # @_Z13test .LBB289_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI289_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI289_0) - pcalau12i $a0, %pc_hi20(.LCPI289_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI289_1) - pcalau12i $a0, %pc_hi20(.LCPI289_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI289_2) - movgr2fr.d $fs3, $zero + ori $a0, $zero, 0 + ori $a2, $zero, 0 + lu32i.d $a2, -131072 + lu52i.d $a2, $a2, 1029 + movgr2fr.d $fs0, $a2 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + movgr2fr.d $fs2, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -60291,9 +59755,9 @@ _Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc: # @_Z13test # =>This Inner Loop Header: Depth=1 fadd.d $fa1, $fa0, $fs0 fmul.d $fa1, $fa1, $fs1 - fadd.d $fa1, $fa1, $fs3 + fadd.d $fa1, $fa1, $fs2 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB289_9 # %bb.11: # in Loop: Header=BB289_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -60331,12 +59795,14 @@ _Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc: # @_Z13test move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB289_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI289_4) - fld.d $fa0, $a1, %pc_lo12(.LCPI289_4) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -60373,18 +59839,8 @@ _Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc: # @_Z13test .size _Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc, .Lfunc_end289-_Z13test_constantId34custom_multiple_constant_multiply2IdEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId22custom_constant_divideIdEEvPT_iPKc -.LCPI290_0: - .dword 0x40bf400000000000 # double 8000 -.LCPI290_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI290_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI290_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId22custom_constant_divideIdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId22custom_constant_divideIdEEvPT_iPKc,comdat - .weak _Z13test_constantId22custom_constant_divideIdEEvPT_iPKc + .weak _Z13test_constantId22custom_constant_divideIdEEvPT_iPKc # -- Begin function _Z13test_constantId22custom_constant_divideIdEEvPT_iPKc .p2align 5 .type _Z13test_constantId22custom_constant_divideIdEEvPT_iPKc,@function _Z13test_constantId22custom_constant_divideIdEEvPT_iPKc: # @_Z13test_constantId22custom_constant_divideIdEEvPT_iPKc @@ -60435,15 +59891,23 @@ _Z13test_constantId22custom_constant_divideIdEEvPT_iPKc: # @_Z13test_constantId2 blez $s0, .LBB290_8 # %bb.2: # %.preheader.us.preheader movgr2fr.d $fs0, $zero - pcalau12i $a0, %pc_hi20(.LCPI290_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI290_0) - pcalau12i $a0, %pc_hi20(.LCPI290_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI290_2) - pcalau12i $a0, %pc_hi20(.LCPI290_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI290_1) vldi $vr3, -1004 pcalau12i $s6, %pc_hi20(init_value) vldi $vr4, -876 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1035 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -60493,12 +59957,17 @@ _Z13test_constantId22custom_constant_divideIdEEvPT_iPKc: # @_Z13test_constantId2 .LBB290_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI290_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI290_0) - pcalau12i $a0, %pc_hi20(.LCPI290_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI290_1) vldi $vr2, -876 - movgr2fr.d $fs2, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1035 + movgr2fr.d $fs0, $a0 + movgr2fr.d $fs1, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -60512,9 +59981,9 @@ _Z13test_constantId22custom_constant_divideIdEEvPT_iPKc: # @_Z13test_constantId2 # =>This Inner Loop Header: Depth=1 fdiv.d $fa1, $fa0, $fa2 fmul.d $fa1, $fa1, $fs0 - fadd.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB290_9 # %bb.11: # in Loop: Header=BB290_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -60553,12 +60022,14 @@ _Z13test_constantId22custom_constant_divideIdEEvPT_iPKc: # @_Z13test_constantId2 move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB290_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI290_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI290_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -60594,18 +60065,8 @@ _Z13test_constantId22custom_constant_divideIdEEvPT_iPKc: # @_Z13test_constantId2 .size _Z13test_constantId22custom_constant_divideIdEEvPT_iPKc, .Lfunc_end290-_Z13test_constantId22custom_constant_divideIdEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc -.LCPI291_0: - .dword 0x40bf400000000000 # double 8000 -.LCPI291_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI291_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI291_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc,comdat - .weak _Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc + .weak _Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc # -- Begin function _Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc .p2align 5 .type _Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc,@function _Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc: # @_Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc @@ -60659,15 +60120,23 @@ _Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc: # @_Z13test_co vldi $vr3, -928 vldi $vr4, -1016 vldi $vr5, -944 - pcalau12i $a0, %pc_hi20(.LCPI291_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI291_0) - pcalau12i $a0, %pc_hi20(.LCPI291_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI291_2) - pcalau12i $a0, %pc_hi20(.LCPI291_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI291_1) vldi $vr6, -1004 pcalau12i $s6, %pc_hi20(init_value) vldi $vr7, -800 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1035 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -60728,13 +60197,18 @@ _Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc: # @_Z13test_co fld.d $fa0, $s1, %pc_lo12(init_value) vldi $vr2, -800 vldi $vr3, -1016 - pcalau12i $a0, %pc_hi20(.LCPI291_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI291_0) - pcalau12i $a0, %pc_hi20(.LCPI291_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI291_1) vldi $vr4, -944 vldi $vr5, -1004 - movgr2fr.d $fs2, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1035 + movgr2fr.d $fs0, $a0 + movgr2fr.d $fs1, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -60751,9 +60225,9 @@ _Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc: # @_Z13test_co fmul.d $fa1, $fa1, $fa4 fdiv.d $fa1, $fa1, $fa5 fmul.d $fa1, $fa1, $fs0 - fadd.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB291_9 # %bb.11: # in Loop: Header=BB291_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -60795,12 +60269,14 @@ _Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc: # @_Z13test_co move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB291_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI291_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI291_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -60836,18 +60312,8 @@ _Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc: # @_Z13test_co .size _Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc, .Lfunc_end291-_Z13test_constantId31custom_multiple_constant_divideIdEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc -.LCPI292_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI292_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI292_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI292_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc,comdat - .weak _Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc + .weak _Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc # -- Begin function _Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc .p2align 5 .type _Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc,@function _Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc: # @_Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc @@ -60897,15 +60363,23 @@ _Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc: # @_Z13test_c # %bb.1: # %.preheader.lr.ph blez $s0, .LBB292_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI292_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI292_0) - pcalau12i $a0, %pc_hi20(.LCPI292_2) - fld.d $fs1, $a0, %pc_lo12(.LCPI292_2) - pcalau12i $a0, %pc_hi20(.LCPI292_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI292_1) - movgr2fr.d $fs3, $zero + movgr2fr.d $fs0, $zero vldi $vr3, -1024 pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -60920,7 +60394,7 @@ _Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc: # @_Z13test_c # Child Loop BB292_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB292_5: # Parent Loop BB292_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -60934,14 +60408,14 @@ _Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc: # @_Z13test_c # in Loop: Header=BB292_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) fadd.d $fa1, $fa1, $fa3 - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs1, $fa2 + fcmp.clt.d $fcc0, $fs2, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs2 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB292_3 # %bb.7: # in Loop: Header=BB292_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -60954,12 +60428,17 @@ _Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc: # @_Z13test_c .LBB292_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI292_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI292_0) - pcalau12i $a0, %pc_hi20(.LCPI292_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI292_1) vldi $vr2, -1024 - movgr2fr.d $fs2, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs0, $a0 + movgr2fr.d $fs1, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs2, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -60973,9 +60452,9 @@ _Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc: # @_Z13test_c # =>This Inner Loop Header: Depth=1 fadd.d $fa1, $fa0, $fa2 fmul.d $fa1, $fa1, $fs0 - fadd.d $fa1, $fa1, $fs2 + fadd.d $fa1, $fa1, $fs1 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs1 + fcmp.clt.d $fcc0, $fa1, $fs2 bcnez $fcc0, .LBB292_9 # %bb.11: # in Loop: Header=BB292_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -61014,12 +60493,14 @@ _Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc: # @_Z13test_c move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB292_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI292_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI292_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -61055,20 +60536,8 @@ _Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc: # @_Z13test_c .size _Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc, .Lfunc_end292-_Z13test_constantId32custom_multiple_constant_divide2IdEEvPT_iPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13test_constantId30custom_multiple_constant_mixedIdEEvPT_iPKc -.LCPI293_0: - .dword 0xc003333333333333 # double -2.3999999999999999 -.LCPI293_1: - .dword 0xc0bf400000000000 # double -8000 -.LCPI293_2: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI293_3: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI293_4: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z13test_constantId30custom_multiple_constant_mixedIdEEvPT_iPKc,"axG",@progbits,_Z13test_constantId30custom_multiple_constant_mixedIdEEvPT_iPKc,comdat - .weak _Z13test_constantId30custom_multiple_constant_mixedIdEEvPT_iPKc + .weak _Z13test_constantId30custom_multiple_constant_mixedIdEEvPT_iPKc # -- Begin function _Z13test_constantId30custom_multiple_constant_mixedIdEEvPT_iPKc .p2align 5 .type _Z13test_constantId30custom_multiple_constant_mixedIdEEvPT_iPKc,@function _Z13test_constantId30custom_multiple_constant_mixedIdEEvPT_iPKc: # @_Z13test_constantId30custom_multiple_constant_mixedIdEEvPT_iPKc @@ -61120,17 +60589,28 @@ _Z13test_constantId30custom_multiple_constant_mixedIdEEvPT_iPKc: # @_Z13test_con # %bb.1: # %.preheader.lr.ph blez $s0, .LBB293_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI293_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI293_0) - pcalau12i $a0, %pc_hi20(.LCPI293_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI293_1) - pcalau12i $a0, %pc_hi20(.LCPI293_3) - fld.d $fs2, $a0, %pc_lo12(.LCPI293_3) - pcalau12i $a0, %pc_hi20(.LCPI293_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI293_2) - movgr2fr.d $fs4, $zero + movgr2fr.d $fs0, $zero vldi $vr3, -1024 + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 209715 + lu52i.d $a0, $a0, -1024 + movgr2fr.d $fs1, $a0 pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s2, $a0, %pc_lo12(.L.str.299) move $s7, $zero @@ -61145,13 +60625,13 @@ _Z13test_constantId30custom_multiple_constant_mixedIdEEvPT_iPKc: # @_Z13test_con # Child Loop BB293_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs4 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB293_5: # Parent Loop BB293_4 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa1, $a2, 0 fadd.d $fa1, $fa1, $fa3 - fadd.d $fa1, $fa1, $fs0 + fadd.d $fa1, $fa1, $fs1 fadd.d $fa0, $fa0, $fa1 addi.d $a0, $a0, -1 addi.d $a2, $a2, 8 @@ -61160,15 +60640,15 @@ _Z13test_constantId30custom_multiple_constant_mixedIdEEvPT_iPKc: # @_Z13test_con # in Loop: Header=BB293_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) fadd.d $fa1, $fa1, $fa3 - fadd.d $fa1, $fa1, $fs0 - fmul.d $fa1, $fa1, $fs1 + fadd.d $fa1, $fa1, $fs1 + fmul.d $fa1, $fa1, $fs2 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs2, $fa2 + fcmp.clt.d $fcc0, $fs3, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs3 + fcmp.clt.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB293_3 # %bb.7: # in Loop: Header=BB293_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -61181,14 +60661,22 @@ _Z13test_constantId30custom_multiple_constant_mixedIdEEvPT_iPKc: # @_Z13test_con .LBB293_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI293_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI293_0) - pcalau12i $a0, %pc_hi20(.LCPI293_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI293_1) - pcalau12i $a0, %pc_hi20(.LCPI293_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI293_2) vldi $vr2, -1024 - movgr2fr.d $fs3, $zero + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 209715 + lu52i.d $a0, $a0, -1024 + movgr2fr.d $fs0, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + movgr2fr.d $fs2, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.299) addi.d $s0, $a0, %pc_lo12(.L.str.299) move $s2, $zero @@ -61203,9 +60691,9 @@ _Z13test_constantId30custom_multiple_constant_mixedIdEEvPT_iPKc: # @_Z13test_con fadd.d $fa1, $fa0, $fa2 fadd.d $fa1, $fa1, $fs0 fmul.d $fa1, $fa1, $fs1 - fadd.d $fa1, $fa1, $fs3 + fadd.d $fa1, $fa1, $fs2 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB293_9 # %bb.11: # in Loop: Header=BB293_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -61244,12 +60732,14 @@ _Z13test_constantId30custom_multiple_constant_mixedIdEEvPT_iPKc: # @_Z13test_con move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB293_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI293_4) - fld.d $fa0, $a1, %pc_lo12(.LCPI293_4) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 diff --git a/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/simple_types_loop_invariant.dir/simple_types_loop_invariant.s b/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/simple_types_loop_invariant.dir/simple_types_loop_invariant.s index 713fd1b2..05dbb677 100644 --- a/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/simple_types_loop_invariant.dir/simple_types_loop_invariant.s +++ b/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/simple_types_loop_invariant.dir/simple_types_loop_invariant.s @@ -62,12 +62,7 @@ _Z13record_resultdPKc: # @_Z13record_resultdPKc .Lfunc_end0: .size _Z13record_resultdPKc, .Lfunc_end0-_Z13record_resultdPKc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z9summarizePKciiii -.LCPI1_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl _Z9summarizePKciiii + .globl _Z9summarizePKciiii # -- Begin function _Z9summarizePKciiii .p2align 5 .type _Z9summarizePKciiii,@function _Z9summarizePKciiii: # @_Z9summarizePKciiii @@ -135,14 +130,16 @@ _Z9summarizePKciiii: # @_Z9summarizePKciiii # %bb.5: # %.lr.ph45.preheader st.d $s0, $sp, 8 # 8-byte Folded Spill st.d $s5, $sp, 16 # 8-byte Folded Spill + ori $a0, $zero, 0 movgr2fr.w $fa0, $s3 movgr2fr.w $fa1, $s2 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI1_0) + lu32i.d $a0, -97152 ffint.d.w $fa0, $fa0 ffint.d.w $fa1, $fa1 + lu52i.d $a0, $a0, 1042 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fs1, $fa0, $fa2 + movgr2fr.d $fa1, $a0 + fdiv.d $fs1, $fa0, $fa1 pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $s2, $a0, %pc_lo12(.L.str.4) pcalau12i $a0, %pc_hi20(.L.str.5) @@ -409,12 +406,7 @@ _Z11start_timerv: # @_Z11start_timerv .Lfunc_end3: .size _Z11start_timerv, .Lfunc_end3-_Z11start_timerv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z5timerv -.LCPI4_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl _Z5timerv + .globl _Z5timerv # -- Begin function _Z5timerv .p2align 5 .type _Z5timerv,@function _Z5timerv: # @_Z5timerv @@ -426,12 +418,14 @@ _Z5timerv: # @_Z5timerv pcalau12i $a1, %pc_hi20(start_time) ld.d $a1, $a1, %pc_lo12(start_time) pcalau12i $a2, %pc_hi20(end_time) - pcalau12i $a3, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI4_0) sub.d $a1, $a0, $a1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 st.d $a0, $a2, %pc_lo12(end_time) ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 @@ -439,12 +433,7 @@ _Z5timerv: # @_Z5timerv .Lfunc_end4: .size _Z5timerv, .Lfunc_end4-_Z5timerv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI5_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -1780,8 +1769,8 @@ main: # @main pcaddu18i $ra, %call36(_Z14test_variable4Il28custom_multiple_variable_xorIlEEvPT_iS2_S2_S2_S2_PKc) jirl $ra, $ra, 0 fld.d $fa1, $s6, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fa0, $a0 fcmp.clt.d $fcc0, $fa1, $fa0 fsub.d $fa2, $fa1, $fa0 ftintrz.l.d $fa2, $fa2 @@ -2269,12 +2258,8 @@ main: # @main .size main, .Lfunc_end5-main .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc -.LCPI6_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc + .weak _Z14test_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc,@function _Z14test_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc: # @_Z14test_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc @@ -2524,12 +2509,14 @@ _Z14test_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB6_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI6_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI6_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -2563,12 +2550,8 @@ _Z14test_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc, .Lfunc_end6-_Z14test_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z22test_hoisted_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc -.LCPI7_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z22test_hoisted_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc,"axG",@progbits,_Z22test_hoisted_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc,comdat - .weak _Z22test_hoisted_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc + .weak _Z22test_hoisted_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc # -- Begin function _Z22test_hoisted_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc .p2align 5 .type _Z22test_hoisted_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc,@function _Z22test_hoisted_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc: # @_Z22test_hoisted_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc @@ -2806,12 +2789,14 @@ _Z22test_hoisted_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc: # @_Z22test_ho move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB7_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI7_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -2845,12 +2830,8 @@ _Z22test_hoisted_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc: # @_Z22test_ho .size _Z22test_hoisted_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc, .Lfunc_end7-_Z22test_hoisted_variable1Ia19custom_add_variableIaEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ia28custom_add_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc -.LCPI8_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ia28custom_add_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ia28custom_add_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ia28custom_add_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ia28custom_add_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ia28custom_add_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ia28custom_add_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ia28custom_add_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ia28custom_add_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc @@ -3106,12 +3087,14 @@ _Z14test_variable4Ia28custom_add_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB8_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI8_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI8_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -3145,12 +3128,8 @@ _Z14test_variable4Ia28custom_add_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ia28custom_add_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end8-_Z14test_variable4Ia28custom_add_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ia19custom_sub_variableIaEEvPT_iS2_PKc -.LCPI9_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ia19custom_sub_variableIaEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ia19custom_sub_variableIaEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ia19custom_sub_variableIaEEvPT_iS2_PKc + .weak _Z14test_variable1Ia19custom_sub_variableIaEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ia19custom_sub_variableIaEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ia19custom_sub_variableIaEEvPT_iS2_PKc,@function _Z14test_variable1Ia19custom_sub_variableIaEEvPT_iS2_PKc: # @_Z14test_variable1Ia19custom_sub_variableIaEEvPT_iS2_PKc @@ -3400,12 +3379,14 @@ _Z14test_variable1Ia19custom_sub_variableIaEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB9_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI9_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI9_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -3439,12 +3420,8 @@ _Z14test_variable1Ia19custom_sub_variableIaEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ia19custom_sub_variableIaEEvPT_iS2_PKc, .Lfunc_end9-_Z14test_variable1Ia19custom_sub_variableIaEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ia28custom_sub_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc -.LCPI10_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ia28custom_sub_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ia28custom_sub_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ia28custom_sub_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ia28custom_sub_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ia28custom_sub_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ia28custom_sub_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ia28custom_sub_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ia28custom_sub_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc @@ -3700,12 +3677,14 @@ _Z14test_variable4Ia28custom_sub_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB10_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI10_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI10_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -3739,12 +3718,8 @@ _Z14test_variable4Ia28custom_sub_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ia28custom_sub_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end10-_Z14test_variable4Ia28custom_sub_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ia24custom_multiply_variableIaEEvPT_iS2_PKc -.LCPI11_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ia24custom_multiply_variableIaEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ia24custom_multiply_variableIaEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ia24custom_multiply_variableIaEEvPT_iS2_PKc + .weak _Z14test_variable1Ia24custom_multiply_variableIaEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ia24custom_multiply_variableIaEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ia24custom_multiply_variableIaEEvPT_iS2_PKc,@function _Z14test_variable1Ia24custom_multiply_variableIaEEvPT_iS2_PKc: # @_Z14test_variable1Ia24custom_multiply_variableIaEEvPT_iS2_PKc @@ -3992,12 +3967,14 @@ _Z14test_variable1Ia24custom_multiply_variableIaEEvPT_iS2_PKc: # @_Z14test_varia move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB11_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI11_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI11_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -4031,12 +4008,8 @@ _Z14test_variable1Ia24custom_multiply_variableIaEEvPT_iS2_PKc: # @_Z14test_varia .size _Z14test_variable1Ia24custom_multiply_variableIaEEvPT_iS2_PKc, .Lfunc_end11-_Z14test_variable1Ia24custom_multiply_variableIaEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ia33custom_multiply_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc -.LCPI12_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ia33custom_multiply_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ia33custom_multiply_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ia33custom_multiply_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ia33custom_multiply_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ia33custom_multiply_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ia33custom_multiply_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ia33custom_multiply_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ia33custom_multiply_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc @@ -4292,12 +4265,14 @@ _Z14test_variable4Ia33custom_multiply_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc: move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB12_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI12_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI12_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -4331,12 +4306,8 @@ _Z14test_variable4Ia33custom_multiply_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Ia33custom_multiply_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end12-_Z14test_variable4Ia33custom_multiply_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ia34custom_multiply_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc -.LCPI13_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ia34custom_multiply_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ia34custom_multiply_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ia34custom_multiply_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ia34custom_multiply_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ia34custom_multiply_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ia34custom_multiply_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ia34custom_multiply_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ia34custom_multiply_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc @@ -4595,12 +4566,14 @@ _Z14test_variable4Ia34custom_multiply_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB13_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI13_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI13_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -4634,12 +4607,8 @@ _Z14test_variable4Ia34custom_multiply_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc .size _Z14test_variable4Ia34custom_multiply_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end13-_Z14test_variable4Ia34custom_multiply_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ia22custom_divide_variableIaEEvPT_iS2_PKc -.LCPI14_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ia22custom_divide_variableIaEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ia22custom_divide_variableIaEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ia22custom_divide_variableIaEEvPT_iS2_PKc + .weak _Z14test_variable1Ia22custom_divide_variableIaEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ia22custom_divide_variableIaEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ia22custom_divide_variableIaEEvPT_iS2_PKc,@function _Z14test_variable1Ia22custom_divide_variableIaEEvPT_iS2_PKc: # @_Z14test_variable1Ia22custom_divide_variableIaEEvPT_iS2_PKc @@ -4899,12 +4868,14 @@ _Z14test_variable1Ia22custom_divide_variableIaEEvPT_iS2_PKc: # @_Z14test_variabl move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB14_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI14_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI14_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -4938,12 +4909,8 @@ _Z14test_variable1Ia22custom_divide_variableIaEEvPT_iS2_PKc: # @_Z14test_variabl .size _Z14test_variable1Ia22custom_divide_variableIaEEvPT_iS2_PKc, .Lfunc_end14-_Z14test_variable1Ia22custom_divide_variableIaEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ia31custom_divide_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc -.LCPI15_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ia31custom_divide_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ia31custom_divide_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ia31custom_divide_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ia31custom_divide_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ia31custom_divide_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ia31custom_divide_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ia31custom_divide_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ia31custom_divide_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc @@ -5255,12 +5222,14 @@ _Z14test_variable4Ia31custom_divide_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc: # move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB15_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI15_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI15_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -5294,12 +5263,8 @@ _Z14test_variable4Ia31custom_divide_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4Ia31custom_divide_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end15-_Z14test_variable4Ia31custom_divide_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ia32custom_divide_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc -.LCPI16_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ia32custom_divide_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ia32custom_divide_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ia32custom_divide_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ia32custom_divide_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ia32custom_divide_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ia32custom_divide_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ia32custom_divide_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ia32custom_divide_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc @@ -5557,12 +5522,14 @@ _Z14test_variable4Ia32custom_divide_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc: move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB16_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI16_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI16_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -5596,12 +5563,8 @@ _Z14test_variable4Ia32custom_divide_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Ia32custom_divide_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end16-_Z14test_variable4Ia32custom_divide_multiple_variable2IaEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ia30custom_mixed_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc -.LCPI17_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ia30custom_mixed_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ia30custom_mixed_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ia30custom_mixed_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ia30custom_mixed_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ia30custom_mixed_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ia30custom_mixed_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ia30custom_mixed_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ia30custom_mixed_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc @@ -5877,12 +5840,14 @@ _Z14test_variable4Ia30custom_mixed_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc: # move $a2, $a0 ld.w $a0, $s7, %pc_lo12(current_test) .LBB17_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI17_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI17_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -5916,12 +5881,8 @@ _Z14test_variable4Ia30custom_mixed_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4Ia30custom_mixed_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end17-_Z14test_variable4Ia30custom_mixed_multiple_variableIaEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ia19custom_variable_andIaEEvPT_iS2_PKc -.LCPI18_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ia19custom_variable_andIaEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ia19custom_variable_andIaEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ia19custom_variable_andIaEEvPT_iS2_PKc + .weak _Z14test_variable1Ia19custom_variable_andIaEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ia19custom_variable_andIaEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ia19custom_variable_andIaEEvPT_iS2_PKc,@function _Z14test_variable1Ia19custom_variable_andIaEEvPT_iS2_PKc: # @_Z14test_variable1Ia19custom_variable_andIaEEvPT_iS2_PKc @@ -6171,12 +6132,14 @@ _Z14test_variable1Ia19custom_variable_andIaEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB18_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI18_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI18_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -6210,12 +6173,8 @@ _Z14test_variable1Ia19custom_variable_andIaEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ia19custom_variable_andIaEEvPT_iS2_PKc, .Lfunc_end18-_Z14test_variable1Ia19custom_variable_andIaEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ia28custom_multiple_variable_andIaEEvPT_iS2_S2_S2_S2_PKc -.LCPI19_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ia28custom_multiple_variable_andIaEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ia28custom_multiple_variable_andIaEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ia28custom_multiple_variable_andIaEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ia28custom_multiple_variable_andIaEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ia28custom_multiple_variable_andIaEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ia28custom_multiple_variable_andIaEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ia28custom_multiple_variable_andIaEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ia28custom_multiple_variable_andIaEEvPT_iS2_S2_S2_S2_PKc @@ -6482,12 +6441,14 @@ _Z14test_variable4Ia28custom_multiple_variable_andIaEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB19_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI19_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI19_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -6521,12 +6482,8 @@ _Z14test_variable4Ia28custom_multiple_variable_andIaEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ia28custom_multiple_variable_andIaEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end19-_Z14test_variable4Ia28custom_multiple_variable_andIaEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ia18custom_variable_orIaEEvPT_iS2_PKc -.LCPI20_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ia18custom_variable_orIaEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ia18custom_variable_orIaEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ia18custom_variable_orIaEEvPT_iS2_PKc + .weak _Z14test_variable1Ia18custom_variable_orIaEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ia18custom_variable_orIaEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ia18custom_variable_orIaEEvPT_iS2_PKc,@function _Z14test_variable1Ia18custom_variable_orIaEEvPT_iS2_PKc: # @_Z14test_variable1Ia18custom_variable_orIaEEvPT_iS2_PKc @@ -6776,12 +6733,14 @@ _Z14test_variable1Ia18custom_variable_orIaEEvPT_iS2_PKc: # @_Z14test_variable1Ia move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB20_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI20_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI20_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -6815,12 +6774,8 @@ _Z14test_variable1Ia18custom_variable_orIaEEvPT_iS2_PKc: # @_Z14test_variable1Ia .size _Z14test_variable1Ia18custom_variable_orIaEEvPT_iS2_PKc, .Lfunc_end20-_Z14test_variable1Ia18custom_variable_orIaEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ia27custom_multiple_variable_orIaEEvPT_iS2_S2_S2_S2_PKc -.LCPI21_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ia27custom_multiple_variable_orIaEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ia27custom_multiple_variable_orIaEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ia27custom_multiple_variable_orIaEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ia27custom_multiple_variable_orIaEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ia27custom_multiple_variable_orIaEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ia27custom_multiple_variable_orIaEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ia27custom_multiple_variable_orIaEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ia27custom_multiple_variable_orIaEEvPT_iS2_S2_S2_S2_PKc @@ -7087,12 +7042,14 @@ _Z14test_variable4Ia27custom_multiple_variable_orIaEEvPT_iS2_S2_S2_S2_PKc: # @_Z move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB21_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI21_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI21_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -7126,12 +7083,8 @@ _Z14test_variable4Ia27custom_multiple_variable_orIaEEvPT_iS2_S2_S2_S2_PKc: # @_Z .size _Z14test_variable4Ia27custom_multiple_variable_orIaEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end21-_Z14test_variable4Ia27custom_multiple_variable_orIaEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ia19custom_variable_xorIaEEvPT_iS2_PKc -.LCPI22_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ia19custom_variable_xorIaEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ia19custom_variable_xorIaEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ia19custom_variable_xorIaEEvPT_iS2_PKc + .weak _Z14test_variable1Ia19custom_variable_xorIaEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ia19custom_variable_xorIaEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ia19custom_variable_xorIaEEvPT_iS2_PKc,@function _Z14test_variable1Ia19custom_variable_xorIaEEvPT_iS2_PKc: # @_Z14test_variable1Ia19custom_variable_xorIaEEvPT_iS2_PKc @@ -7381,12 +7334,14 @@ _Z14test_variable1Ia19custom_variable_xorIaEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB22_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI22_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI22_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -7420,12 +7375,8 @@ _Z14test_variable1Ia19custom_variable_xorIaEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ia19custom_variable_xorIaEEvPT_iS2_PKc, .Lfunc_end22-_Z14test_variable1Ia19custom_variable_xorIaEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ia28custom_multiple_variable_xorIaEEvPT_iS2_S2_S2_S2_PKc -.LCPI23_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ia28custom_multiple_variable_xorIaEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ia28custom_multiple_variable_xorIaEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ia28custom_multiple_variable_xorIaEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ia28custom_multiple_variable_xorIaEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ia28custom_multiple_variable_xorIaEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ia28custom_multiple_variable_xorIaEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ia28custom_multiple_variable_xorIaEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ia28custom_multiple_variable_xorIaEEvPT_iS2_S2_S2_S2_PKc @@ -7692,12 +7643,14 @@ _Z14test_variable4Ia28custom_multiple_variable_xorIaEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB23_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI23_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI23_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -7731,12 +7684,8 @@ _Z14test_variable4Ia28custom_multiple_variable_xorIaEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ia28custom_multiple_variable_xorIaEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end23-_Z14test_variable4Ia28custom_multiple_variable_xorIaEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc -.LCPI24_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc + .weak _Z14test_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc,@function _Z14test_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc: # @_Z14test_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc @@ -7986,12 +7935,14 @@ _Z14test_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB24_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI24_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI24_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -8025,12 +7976,8 @@ _Z14test_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc, .Lfunc_end24-_Z14test_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z22test_hoisted_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc -.LCPI25_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z22test_hoisted_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc,"axG",@progbits,_Z22test_hoisted_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc,comdat - .weak _Z22test_hoisted_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc + .weak _Z22test_hoisted_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc # -- Begin function _Z22test_hoisted_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc .p2align 5 .type _Z22test_hoisted_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc,@function _Z22test_hoisted_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc: # @_Z22test_hoisted_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc @@ -8268,12 +8215,14 @@ _Z22test_hoisted_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc: # @_Z22test_ho move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB25_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI25_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI25_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -8307,12 +8256,8 @@ _Z22test_hoisted_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc: # @_Z22test_ho .size _Z22test_hoisted_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc, .Lfunc_end25-_Z22test_hoisted_variable1Ih19custom_add_variableIhEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ih28custom_add_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc -.LCPI26_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ih28custom_add_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ih28custom_add_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ih28custom_add_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ih28custom_add_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ih28custom_add_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ih28custom_add_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ih28custom_add_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ih28custom_add_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc @@ -8568,12 +8513,14 @@ _Z14test_variable4Ih28custom_add_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB26_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI26_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI26_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -8607,12 +8554,8 @@ _Z14test_variable4Ih28custom_add_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ih28custom_add_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end26-_Z14test_variable4Ih28custom_add_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ih19custom_sub_variableIhEEvPT_iS2_PKc -.LCPI27_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ih19custom_sub_variableIhEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ih19custom_sub_variableIhEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ih19custom_sub_variableIhEEvPT_iS2_PKc + .weak _Z14test_variable1Ih19custom_sub_variableIhEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ih19custom_sub_variableIhEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ih19custom_sub_variableIhEEvPT_iS2_PKc,@function _Z14test_variable1Ih19custom_sub_variableIhEEvPT_iS2_PKc: # @_Z14test_variable1Ih19custom_sub_variableIhEEvPT_iS2_PKc @@ -8862,12 +8805,14 @@ _Z14test_variable1Ih19custom_sub_variableIhEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB27_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI27_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI27_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -8901,12 +8846,8 @@ _Z14test_variable1Ih19custom_sub_variableIhEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ih19custom_sub_variableIhEEvPT_iS2_PKc, .Lfunc_end27-_Z14test_variable1Ih19custom_sub_variableIhEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ih28custom_sub_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc -.LCPI28_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ih28custom_sub_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ih28custom_sub_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ih28custom_sub_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ih28custom_sub_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ih28custom_sub_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ih28custom_sub_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ih28custom_sub_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ih28custom_sub_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc @@ -9162,12 +9103,14 @@ _Z14test_variable4Ih28custom_sub_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB28_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI28_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI28_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -9201,12 +9144,8 @@ _Z14test_variable4Ih28custom_sub_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ih28custom_sub_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end28-_Z14test_variable4Ih28custom_sub_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ih24custom_multiply_variableIhEEvPT_iS2_PKc -.LCPI29_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ih24custom_multiply_variableIhEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ih24custom_multiply_variableIhEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ih24custom_multiply_variableIhEEvPT_iS2_PKc + .weak _Z14test_variable1Ih24custom_multiply_variableIhEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ih24custom_multiply_variableIhEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ih24custom_multiply_variableIhEEvPT_iS2_PKc,@function _Z14test_variable1Ih24custom_multiply_variableIhEEvPT_iS2_PKc: # @_Z14test_variable1Ih24custom_multiply_variableIhEEvPT_iS2_PKc @@ -9454,12 +9393,14 @@ _Z14test_variable1Ih24custom_multiply_variableIhEEvPT_iS2_PKc: # @_Z14test_varia move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB29_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI29_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI29_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -9493,12 +9434,8 @@ _Z14test_variable1Ih24custom_multiply_variableIhEEvPT_iS2_PKc: # @_Z14test_varia .size _Z14test_variable1Ih24custom_multiply_variableIhEEvPT_iS2_PKc, .Lfunc_end29-_Z14test_variable1Ih24custom_multiply_variableIhEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ih33custom_multiply_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc -.LCPI30_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ih33custom_multiply_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ih33custom_multiply_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ih33custom_multiply_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ih33custom_multiply_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ih33custom_multiply_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ih33custom_multiply_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ih33custom_multiply_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ih33custom_multiply_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc @@ -9754,12 +9691,14 @@ _Z14test_variable4Ih33custom_multiply_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc: move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB30_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI30_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI30_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -9793,12 +9732,8 @@ _Z14test_variable4Ih33custom_multiply_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Ih33custom_multiply_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end30-_Z14test_variable4Ih33custom_multiply_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ih34custom_multiply_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc -.LCPI31_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ih34custom_multiply_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ih34custom_multiply_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ih34custom_multiply_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ih34custom_multiply_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ih34custom_multiply_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ih34custom_multiply_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ih34custom_multiply_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ih34custom_multiply_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc @@ -10057,12 +9992,14 @@ _Z14test_variable4Ih34custom_multiply_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB31_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI31_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI31_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -10096,12 +10033,8 @@ _Z14test_variable4Ih34custom_multiply_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc .size _Z14test_variable4Ih34custom_multiply_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end31-_Z14test_variable4Ih34custom_multiply_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ih22custom_divide_variableIhEEvPT_iS2_PKc -.LCPI32_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ih22custom_divide_variableIhEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ih22custom_divide_variableIhEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ih22custom_divide_variableIhEEvPT_iS2_PKc + .weak _Z14test_variable1Ih22custom_divide_variableIhEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ih22custom_divide_variableIhEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ih22custom_divide_variableIhEEvPT_iS2_PKc,@function _Z14test_variable1Ih22custom_divide_variableIhEEvPT_iS2_PKc: # @_Z14test_variable1Ih22custom_divide_variableIhEEvPT_iS2_PKc @@ -10398,12 +10331,14 @@ _Z14test_variable1Ih22custom_divide_variableIhEEvPT_iS2_PKc: # @_Z14test_variabl move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB32_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI32_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI32_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -10437,12 +10372,8 @@ _Z14test_variable1Ih22custom_divide_variableIhEEvPT_iS2_PKc: # @_Z14test_variabl .size _Z14test_variable1Ih22custom_divide_variableIhEEvPT_iS2_PKc, .Lfunc_end32-_Z14test_variable1Ih22custom_divide_variableIhEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ih31custom_divide_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc -.LCPI33_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ih31custom_divide_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ih31custom_divide_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ih31custom_divide_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ih31custom_divide_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ih31custom_divide_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ih31custom_divide_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ih31custom_divide_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ih31custom_divide_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc @@ -10878,12 +10809,14 @@ _Z14test_variable4Ih31custom_divide_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc: # move $a2, $a0 ld.w $a0, $s7, %pc_lo12(current_test) .LBB33_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI33_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI33_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -10917,12 +10850,8 @@ _Z14test_variable4Ih31custom_divide_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4Ih31custom_divide_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end33-_Z14test_variable4Ih31custom_divide_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ih32custom_divide_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc -.LCPI34_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ih32custom_divide_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ih32custom_divide_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ih32custom_divide_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ih32custom_divide_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ih32custom_divide_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ih32custom_divide_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ih32custom_divide_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ih32custom_divide_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc @@ -11178,12 +11107,14 @@ _Z14test_variable4Ih32custom_divide_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc: move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB34_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI34_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI34_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -11217,12 +11148,8 @@ _Z14test_variable4Ih32custom_divide_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Ih32custom_divide_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end34-_Z14test_variable4Ih32custom_divide_multiple_variable2IhEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ih30custom_mixed_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc -.LCPI35_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ih30custom_mixed_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ih30custom_mixed_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ih30custom_mixed_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ih30custom_mixed_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ih30custom_mixed_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ih30custom_mixed_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ih30custom_mixed_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ih30custom_mixed_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc @@ -11478,12 +11405,14 @@ _Z14test_variable4Ih30custom_mixed_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc: # move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB35_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI35_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI35_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -11517,12 +11446,8 @@ _Z14test_variable4Ih30custom_mixed_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4Ih30custom_mixed_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end35-_Z14test_variable4Ih30custom_mixed_multiple_variableIhEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ih19custom_variable_andIhEEvPT_iS2_PKc -.LCPI36_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ih19custom_variable_andIhEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ih19custom_variable_andIhEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ih19custom_variable_andIhEEvPT_iS2_PKc + .weak _Z14test_variable1Ih19custom_variable_andIhEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ih19custom_variable_andIhEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ih19custom_variable_andIhEEvPT_iS2_PKc,@function _Z14test_variable1Ih19custom_variable_andIhEEvPT_iS2_PKc: # @_Z14test_variable1Ih19custom_variable_andIhEEvPT_iS2_PKc @@ -11772,12 +11697,14 @@ _Z14test_variable1Ih19custom_variable_andIhEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB36_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI36_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI36_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -11811,12 +11738,8 @@ _Z14test_variable1Ih19custom_variable_andIhEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ih19custom_variable_andIhEEvPT_iS2_PKc, .Lfunc_end36-_Z14test_variable1Ih19custom_variable_andIhEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ih28custom_multiple_variable_andIhEEvPT_iS2_S2_S2_S2_PKc -.LCPI37_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ih28custom_multiple_variable_andIhEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ih28custom_multiple_variable_andIhEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ih28custom_multiple_variable_andIhEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ih28custom_multiple_variable_andIhEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ih28custom_multiple_variable_andIhEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ih28custom_multiple_variable_andIhEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ih28custom_multiple_variable_andIhEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ih28custom_multiple_variable_andIhEEvPT_iS2_S2_S2_S2_PKc @@ -12083,12 +12006,14 @@ _Z14test_variable4Ih28custom_multiple_variable_andIhEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB37_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI37_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI37_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -12122,12 +12047,8 @@ _Z14test_variable4Ih28custom_multiple_variable_andIhEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ih28custom_multiple_variable_andIhEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end37-_Z14test_variable4Ih28custom_multiple_variable_andIhEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ih18custom_variable_orIhEEvPT_iS2_PKc -.LCPI38_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ih18custom_variable_orIhEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ih18custom_variable_orIhEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ih18custom_variable_orIhEEvPT_iS2_PKc + .weak _Z14test_variable1Ih18custom_variable_orIhEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ih18custom_variable_orIhEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ih18custom_variable_orIhEEvPT_iS2_PKc,@function _Z14test_variable1Ih18custom_variable_orIhEEvPT_iS2_PKc: # @_Z14test_variable1Ih18custom_variable_orIhEEvPT_iS2_PKc @@ -12377,12 +12298,14 @@ _Z14test_variable1Ih18custom_variable_orIhEEvPT_iS2_PKc: # @_Z14test_variable1Ih move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB38_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI38_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI38_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -12416,12 +12339,8 @@ _Z14test_variable1Ih18custom_variable_orIhEEvPT_iS2_PKc: # @_Z14test_variable1Ih .size _Z14test_variable1Ih18custom_variable_orIhEEvPT_iS2_PKc, .Lfunc_end38-_Z14test_variable1Ih18custom_variable_orIhEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ih27custom_multiple_variable_orIhEEvPT_iS2_S2_S2_S2_PKc -.LCPI39_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ih27custom_multiple_variable_orIhEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ih27custom_multiple_variable_orIhEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ih27custom_multiple_variable_orIhEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ih27custom_multiple_variable_orIhEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ih27custom_multiple_variable_orIhEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ih27custom_multiple_variable_orIhEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ih27custom_multiple_variable_orIhEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ih27custom_multiple_variable_orIhEEvPT_iS2_S2_S2_S2_PKc @@ -12688,12 +12607,14 @@ _Z14test_variable4Ih27custom_multiple_variable_orIhEEvPT_iS2_S2_S2_S2_PKc: # @_Z move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB39_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI39_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI39_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -12727,12 +12648,8 @@ _Z14test_variable4Ih27custom_multiple_variable_orIhEEvPT_iS2_S2_S2_S2_PKc: # @_Z .size _Z14test_variable4Ih27custom_multiple_variable_orIhEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end39-_Z14test_variable4Ih27custom_multiple_variable_orIhEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ih19custom_variable_xorIhEEvPT_iS2_PKc -.LCPI40_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ih19custom_variable_xorIhEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ih19custom_variable_xorIhEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ih19custom_variable_xorIhEEvPT_iS2_PKc + .weak _Z14test_variable1Ih19custom_variable_xorIhEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ih19custom_variable_xorIhEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ih19custom_variable_xorIhEEvPT_iS2_PKc,@function _Z14test_variable1Ih19custom_variable_xorIhEEvPT_iS2_PKc: # @_Z14test_variable1Ih19custom_variable_xorIhEEvPT_iS2_PKc @@ -12982,12 +12899,14 @@ _Z14test_variable1Ih19custom_variable_xorIhEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB40_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI40_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI40_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -13021,12 +12940,8 @@ _Z14test_variable1Ih19custom_variable_xorIhEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ih19custom_variable_xorIhEEvPT_iS2_PKc, .Lfunc_end40-_Z14test_variable1Ih19custom_variable_xorIhEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ih28custom_multiple_variable_xorIhEEvPT_iS2_S2_S2_S2_PKc -.LCPI41_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ih28custom_multiple_variable_xorIhEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ih28custom_multiple_variable_xorIhEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ih28custom_multiple_variable_xorIhEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ih28custom_multiple_variable_xorIhEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ih28custom_multiple_variable_xorIhEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ih28custom_multiple_variable_xorIhEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ih28custom_multiple_variable_xorIhEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ih28custom_multiple_variable_xorIhEEvPT_iS2_S2_S2_S2_PKc @@ -13293,12 +13208,14 @@ _Z14test_variable4Ih28custom_multiple_variable_xorIhEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB41_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI41_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI41_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -13332,12 +13249,8 @@ _Z14test_variable4Ih28custom_multiple_variable_xorIhEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ih28custom_multiple_variable_xorIhEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end41-_Z14test_variable4Ih28custom_multiple_variable_xorIhEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Is19custom_add_variableIsEEvPT_iS2_PKc -.LCPI42_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Is19custom_add_variableIsEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Is19custom_add_variableIsEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Is19custom_add_variableIsEEvPT_iS2_PKc + .weak _Z14test_variable1Is19custom_add_variableIsEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Is19custom_add_variableIsEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Is19custom_add_variableIsEEvPT_iS2_PKc,@function _Z14test_variable1Is19custom_add_variableIsEEvPT_iS2_PKc: # @_Z14test_variable1Is19custom_add_variableIsEEvPT_iS2_PKc @@ -13589,12 +13502,14 @@ _Z14test_variable1Is19custom_add_variableIsEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB42_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI42_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI42_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -13628,12 +13543,8 @@ _Z14test_variable1Is19custom_add_variableIsEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Is19custom_add_variableIsEEvPT_iS2_PKc, .Lfunc_end42-_Z14test_variable1Is19custom_add_variableIsEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z22test_hoisted_variable1Is19custom_add_variableIsEEvPT_iS2_PKc -.LCPI43_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z22test_hoisted_variable1Is19custom_add_variableIsEEvPT_iS2_PKc,"axG",@progbits,_Z22test_hoisted_variable1Is19custom_add_variableIsEEvPT_iS2_PKc,comdat - .weak _Z22test_hoisted_variable1Is19custom_add_variableIsEEvPT_iS2_PKc + .weak _Z22test_hoisted_variable1Is19custom_add_variableIsEEvPT_iS2_PKc # -- Begin function _Z22test_hoisted_variable1Is19custom_add_variableIsEEvPT_iS2_PKc .p2align 5 .type _Z22test_hoisted_variable1Is19custom_add_variableIsEEvPT_iS2_PKc,@function _Z22test_hoisted_variable1Is19custom_add_variableIsEEvPT_iS2_PKc: # @_Z22test_hoisted_variable1Is19custom_add_variableIsEEvPT_iS2_PKc @@ -13879,12 +13790,14 @@ _Z22test_hoisted_variable1Is19custom_add_variableIsEEvPT_iS2_PKc: # @_Z22test_ho move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB43_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI43_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI43_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -13918,12 +13831,8 @@ _Z22test_hoisted_variable1Is19custom_add_variableIsEEvPT_iS2_PKc: # @_Z22test_ho .size _Z22test_hoisted_variable1Is19custom_add_variableIsEEvPT_iS2_PKc, .Lfunc_end43-_Z22test_hoisted_variable1Is19custom_add_variableIsEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Is28custom_add_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc -.LCPI44_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Is28custom_add_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Is28custom_add_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Is28custom_add_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Is28custom_add_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Is28custom_add_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Is28custom_add_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Is28custom_add_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Is28custom_add_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc @@ -14181,12 +14090,14 @@ _Z14test_variable4Is28custom_add_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB44_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI44_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI44_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -14220,12 +14131,8 @@ _Z14test_variable4Is28custom_add_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Is28custom_add_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end44-_Z14test_variable4Is28custom_add_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Is19custom_sub_variableIsEEvPT_iS2_PKc -.LCPI45_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Is19custom_sub_variableIsEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Is19custom_sub_variableIsEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Is19custom_sub_variableIsEEvPT_iS2_PKc + .weak _Z14test_variable1Is19custom_sub_variableIsEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Is19custom_sub_variableIsEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Is19custom_sub_variableIsEEvPT_iS2_PKc,@function _Z14test_variable1Is19custom_sub_variableIsEEvPT_iS2_PKc: # @_Z14test_variable1Is19custom_sub_variableIsEEvPT_iS2_PKc @@ -14477,12 +14384,14 @@ _Z14test_variable1Is19custom_sub_variableIsEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB45_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI45_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI45_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -14516,12 +14425,8 @@ _Z14test_variable1Is19custom_sub_variableIsEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Is19custom_sub_variableIsEEvPT_iS2_PKc, .Lfunc_end45-_Z14test_variable1Is19custom_sub_variableIsEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Is28custom_sub_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc -.LCPI46_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Is28custom_sub_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Is28custom_sub_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Is28custom_sub_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Is28custom_sub_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Is28custom_sub_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Is28custom_sub_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Is28custom_sub_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Is28custom_sub_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc @@ -14779,12 +14684,14 @@ _Z14test_variable4Is28custom_sub_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB46_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI46_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI46_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -14818,12 +14725,8 @@ _Z14test_variable4Is28custom_sub_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Is28custom_sub_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end46-_Z14test_variable4Is28custom_sub_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Is24custom_multiply_variableIsEEvPT_iS2_PKc -.LCPI47_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Is24custom_multiply_variableIsEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Is24custom_multiply_variableIsEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Is24custom_multiply_variableIsEEvPT_iS2_PKc + .weak _Z14test_variable1Is24custom_multiply_variableIsEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Is24custom_multiply_variableIsEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Is24custom_multiply_variableIsEEvPT_iS2_PKc,@function _Z14test_variable1Is24custom_multiply_variableIsEEvPT_iS2_PKc: # @_Z14test_variable1Is24custom_multiply_variableIsEEvPT_iS2_PKc @@ -15067,12 +14970,14 @@ _Z14test_variable1Is24custom_multiply_variableIsEEvPT_iS2_PKc: # @_Z14test_varia move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB47_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI47_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI47_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -15106,12 +15011,8 @@ _Z14test_variable1Is24custom_multiply_variableIsEEvPT_iS2_PKc: # @_Z14test_varia .size _Z14test_variable1Is24custom_multiply_variableIsEEvPT_iS2_PKc, .Lfunc_end47-_Z14test_variable1Is24custom_multiply_variableIsEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Is33custom_multiply_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc -.LCPI48_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Is33custom_multiply_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Is33custom_multiply_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Is33custom_multiply_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Is33custom_multiply_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Is33custom_multiply_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Is33custom_multiply_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Is33custom_multiply_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Is33custom_multiply_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc @@ -15363,12 +15264,14 @@ _Z14test_variable4Is33custom_multiply_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc: move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB48_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI48_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI48_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -15402,12 +15305,8 @@ _Z14test_variable4Is33custom_multiply_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Is33custom_multiply_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end48-_Z14test_variable4Is33custom_multiply_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Is34custom_multiply_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc -.LCPI49_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Is34custom_multiply_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Is34custom_multiply_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Is34custom_multiply_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Is34custom_multiply_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Is34custom_multiply_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Is34custom_multiply_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Is34custom_multiply_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Is34custom_multiply_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc @@ -15669,12 +15568,14 @@ _Z14test_variable4Is34custom_multiply_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB49_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI49_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI49_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -15708,12 +15609,8 @@ _Z14test_variable4Is34custom_multiply_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc .size _Z14test_variable4Is34custom_multiply_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end49-_Z14test_variable4Is34custom_multiply_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Is22custom_divide_variableIsEEvPT_iS2_PKc -.LCPI50_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Is22custom_divide_variableIsEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Is22custom_divide_variableIsEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Is22custom_divide_variableIsEEvPT_iS2_PKc + .weak _Z14test_variable1Is22custom_divide_variableIsEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Is22custom_divide_variableIsEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Is22custom_divide_variableIsEEvPT_iS2_PKc,@function _Z14test_variable1Is22custom_divide_variableIsEEvPT_iS2_PKc: # @_Z14test_variable1Is22custom_divide_variableIsEEvPT_iS2_PKc @@ -15979,12 +15876,14 @@ _Z14test_variable1Is22custom_divide_variableIsEEvPT_iS2_PKc: # @_Z14test_variabl move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB50_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI50_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI50_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -16018,12 +15917,8 @@ _Z14test_variable1Is22custom_divide_variableIsEEvPT_iS2_PKc: # @_Z14test_variabl .size _Z14test_variable1Is22custom_divide_variableIsEEvPT_iS2_PKc, .Lfunc_end50-_Z14test_variable1Is22custom_divide_variableIsEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Is31custom_divide_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc -.LCPI51_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Is31custom_divide_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Is31custom_divide_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Is31custom_divide_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Is31custom_divide_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Is31custom_divide_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Is31custom_divide_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Is31custom_divide_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Is31custom_divide_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc @@ -16334,12 +16229,14 @@ _Z14test_variable4Is31custom_divide_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc: # move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB51_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI51_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI51_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -16373,12 +16270,8 @@ _Z14test_variable4Is31custom_divide_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4Is31custom_divide_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end51-_Z14test_variable4Is31custom_divide_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Is32custom_divide_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc -.LCPI52_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Is32custom_divide_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Is32custom_divide_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Is32custom_divide_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Is32custom_divide_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Is32custom_divide_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Is32custom_divide_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Is32custom_divide_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Is32custom_divide_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc @@ -16636,12 +16529,14 @@ _Z14test_variable4Is32custom_divide_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc: move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB52_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI52_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI52_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -16675,12 +16570,8 @@ _Z14test_variable4Is32custom_divide_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Is32custom_divide_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end52-_Z14test_variable4Is32custom_divide_multiple_variable2IsEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Is30custom_mixed_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc -.LCPI53_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Is30custom_mixed_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Is30custom_mixed_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Is30custom_mixed_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Is30custom_mixed_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Is30custom_mixed_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Is30custom_mixed_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Is30custom_mixed_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Is30custom_mixed_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc @@ -16938,12 +16829,14 @@ _Z14test_variable4Is30custom_mixed_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc: # move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB53_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI53_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI53_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -16977,12 +16870,8 @@ _Z14test_variable4Is30custom_mixed_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4Is30custom_mixed_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end53-_Z14test_variable4Is30custom_mixed_multiple_variableIsEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Is19custom_variable_andIsEEvPT_iS2_PKc -.LCPI54_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Is19custom_variable_andIsEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Is19custom_variable_andIsEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Is19custom_variable_andIsEEvPT_iS2_PKc + .weak _Z14test_variable1Is19custom_variable_andIsEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Is19custom_variable_andIsEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Is19custom_variable_andIsEEvPT_iS2_PKc,@function _Z14test_variable1Is19custom_variable_andIsEEvPT_iS2_PKc: # @_Z14test_variable1Is19custom_variable_andIsEEvPT_iS2_PKc @@ -17234,12 +17123,14 @@ _Z14test_variable1Is19custom_variable_andIsEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB54_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI54_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI54_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -17273,12 +17164,8 @@ _Z14test_variable1Is19custom_variable_andIsEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Is19custom_variable_andIsEEvPT_iS2_PKc, .Lfunc_end54-_Z14test_variable1Is19custom_variable_andIsEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Is28custom_multiple_variable_andIsEEvPT_iS2_S2_S2_S2_PKc -.LCPI55_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Is28custom_multiple_variable_andIsEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Is28custom_multiple_variable_andIsEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Is28custom_multiple_variable_andIsEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Is28custom_multiple_variable_andIsEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Is28custom_multiple_variable_andIsEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Is28custom_multiple_variable_andIsEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Is28custom_multiple_variable_andIsEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Is28custom_multiple_variable_andIsEEvPT_iS2_S2_S2_S2_PKc @@ -17549,12 +17436,14 @@ _Z14test_variable4Is28custom_multiple_variable_andIsEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB55_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI55_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI55_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -17588,12 +17477,8 @@ _Z14test_variable4Is28custom_multiple_variable_andIsEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Is28custom_multiple_variable_andIsEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end55-_Z14test_variable4Is28custom_multiple_variable_andIsEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Is18custom_variable_orIsEEvPT_iS2_PKc -.LCPI56_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Is18custom_variable_orIsEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Is18custom_variable_orIsEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Is18custom_variable_orIsEEvPT_iS2_PKc + .weak _Z14test_variable1Is18custom_variable_orIsEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Is18custom_variable_orIsEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Is18custom_variable_orIsEEvPT_iS2_PKc,@function _Z14test_variable1Is18custom_variable_orIsEEvPT_iS2_PKc: # @_Z14test_variable1Is18custom_variable_orIsEEvPT_iS2_PKc @@ -17845,12 +17730,14 @@ _Z14test_variable1Is18custom_variable_orIsEEvPT_iS2_PKc: # @_Z14test_variable1Is move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB56_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI56_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI56_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -17884,12 +17771,8 @@ _Z14test_variable1Is18custom_variable_orIsEEvPT_iS2_PKc: # @_Z14test_variable1Is .size _Z14test_variable1Is18custom_variable_orIsEEvPT_iS2_PKc, .Lfunc_end56-_Z14test_variable1Is18custom_variable_orIsEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Is27custom_multiple_variable_orIsEEvPT_iS2_S2_S2_S2_PKc -.LCPI57_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Is27custom_multiple_variable_orIsEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Is27custom_multiple_variable_orIsEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Is27custom_multiple_variable_orIsEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Is27custom_multiple_variable_orIsEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Is27custom_multiple_variable_orIsEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Is27custom_multiple_variable_orIsEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Is27custom_multiple_variable_orIsEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Is27custom_multiple_variable_orIsEEvPT_iS2_S2_S2_S2_PKc @@ -18160,12 +18043,14 @@ _Z14test_variable4Is27custom_multiple_variable_orIsEEvPT_iS2_S2_S2_S2_PKc: # @_Z move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB57_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI57_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI57_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -18199,12 +18084,8 @@ _Z14test_variable4Is27custom_multiple_variable_orIsEEvPT_iS2_S2_S2_S2_PKc: # @_Z .size _Z14test_variable4Is27custom_multiple_variable_orIsEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end57-_Z14test_variable4Is27custom_multiple_variable_orIsEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Is19custom_variable_xorIsEEvPT_iS2_PKc -.LCPI58_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Is19custom_variable_xorIsEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Is19custom_variable_xorIsEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Is19custom_variable_xorIsEEvPT_iS2_PKc + .weak _Z14test_variable1Is19custom_variable_xorIsEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Is19custom_variable_xorIsEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Is19custom_variable_xorIsEEvPT_iS2_PKc,@function _Z14test_variable1Is19custom_variable_xorIsEEvPT_iS2_PKc: # @_Z14test_variable1Is19custom_variable_xorIsEEvPT_iS2_PKc @@ -18456,12 +18337,14 @@ _Z14test_variable1Is19custom_variable_xorIsEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB58_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI58_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI58_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -18495,12 +18378,8 @@ _Z14test_variable1Is19custom_variable_xorIsEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Is19custom_variable_xorIsEEvPT_iS2_PKc, .Lfunc_end58-_Z14test_variable1Is19custom_variable_xorIsEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Is28custom_multiple_variable_xorIsEEvPT_iS2_S2_S2_S2_PKc -.LCPI59_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Is28custom_multiple_variable_xorIsEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Is28custom_multiple_variable_xorIsEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Is28custom_multiple_variable_xorIsEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Is28custom_multiple_variable_xorIsEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Is28custom_multiple_variable_xorIsEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Is28custom_multiple_variable_xorIsEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Is28custom_multiple_variable_xorIsEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Is28custom_multiple_variable_xorIsEEvPT_iS2_S2_S2_S2_PKc @@ -18771,12 +18650,14 @@ _Z14test_variable4Is28custom_multiple_variable_xorIsEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB59_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI59_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI59_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -18810,12 +18691,8 @@ _Z14test_variable4Is28custom_multiple_variable_xorIsEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Is28custom_multiple_variable_xorIsEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end59-_Z14test_variable4Is28custom_multiple_variable_xorIsEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1It19custom_add_variableItEEvPT_iS2_PKc -.LCPI60_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1It19custom_add_variableItEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1It19custom_add_variableItEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1It19custom_add_variableItEEvPT_iS2_PKc + .weak _Z14test_variable1It19custom_add_variableItEEvPT_iS2_PKc # -- Begin function _Z14test_variable1It19custom_add_variableItEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1It19custom_add_variableItEEvPT_iS2_PKc,@function _Z14test_variable1It19custom_add_variableItEEvPT_iS2_PKc: # @_Z14test_variable1It19custom_add_variableItEEvPT_iS2_PKc @@ -19067,12 +18944,14 @@ _Z14test_variable1It19custom_add_variableItEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB60_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI60_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI60_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -19106,12 +18985,8 @@ _Z14test_variable1It19custom_add_variableItEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1It19custom_add_variableItEEvPT_iS2_PKc, .Lfunc_end60-_Z14test_variable1It19custom_add_variableItEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z22test_hoisted_variable1It19custom_add_variableItEEvPT_iS2_PKc -.LCPI61_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z22test_hoisted_variable1It19custom_add_variableItEEvPT_iS2_PKc,"axG",@progbits,_Z22test_hoisted_variable1It19custom_add_variableItEEvPT_iS2_PKc,comdat - .weak _Z22test_hoisted_variable1It19custom_add_variableItEEvPT_iS2_PKc + .weak _Z22test_hoisted_variable1It19custom_add_variableItEEvPT_iS2_PKc # -- Begin function _Z22test_hoisted_variable1It19custom_add_variableItEEvPT_iS2_PKc .p2align 5 .type _Z22test_hoisted_variable1It19custom_add_variableItEEvPT_iS2_PKc,@function _Z22test_hoisted_variable1It19custom_add_variableItEEvPT_iS2_PKc: # @_Z22test_hoisted_variable1It19custom_add_variableItEEvPT_iS2_PKc @@ -19357,12 +19232,14 @@ _Z22test_hoisted_variable1It19custom_add_variableItEEvPT_iS2_PKc: # @_Z22test_ho move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB61_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI61_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI61_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -19396,12 +19273,8 @@ _Z22test_hoisted_variable1It19custom_add_variableItEEvPT_iS2_PKc: # @_Z22test_ho .size _Z22test_hoisted_variable1It19custom_add_variableItEEvPT_iS2_PKc, .Lfunc_end61-_Z22test_hoisted_variable1It19custom_add_variableItEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4It28custom_add_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc -.LCPI62_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4It28custom_add_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4It28custom_add_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4It28custom_add_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4It28custom_add_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4It28custom_add_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4It28custom_add_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4It28custom_add_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4It28custom_add_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc @@ -19659,12 +19532,14 @@ _Z14test_variable4It28custom_add_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB62_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI62_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI62_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -19698,12 +19573,8 @@ _Z14test_variable4It28custom_add_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4It28custom_add_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end62-_Z14test_variable4It28custom_add_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1It19custom_sub_variableItEEvPT_iS2_PKc -.LCPI63_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1It19custom_sub_variableItEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1It19custom_sub_variableItEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1It19custom_sub_variableItEEvPT_iS2_PKc + .weak _Z14test_variable1It19custom_sub_variableItEEvPT_iS2_PKc # -- Begin function _Z14test_variable1It19custom_sub_variableItEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1It19custom_sub_variableItEEvPT_iS2_PKc,@function _Z14test_variable1It19custom_sub_variableItEEvPT_iS2_PKc: # @_Z14test_variable1It19custom_sub_variableItEEvPT_iS2_PKc @@ -19955,12 +19826,14 @@ _Z14test_variable1It19custom_sub_variableItEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB63_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI63_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI63_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -19994,12 +19867,8 @@ _Z14test_variable1It19custom_sub_variableItEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1It19custom_sub_variableItEEvPT_iS2_PKc, .Lfunc_end63-_Z14test_variable1It19custom_sub_variableItEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4It28custom_sub_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc -.LCPI64_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4It28custom_sub_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4It28custom_sub_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4It28custom_sub_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4It28custom_sub_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4It28custom_sub_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4It28custom_sub_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4It28custom_sub_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4It28custom_sub_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc @@ -20257,12 +20126,14 @@ _Z14test_variable4It28custom_sub_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB64_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI64_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI64_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -20296,12 +20167,8 @@ _Z14test_variable4It28custom_sub_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4It28custom_sub_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end64-_Z14test_variable4It28custom_sub_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1It24custom_multiply_variableItEEvPT_iS2_PKc -.LCPI65_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1It24custom_multiply_variableItEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1It24custom_multiply_variableItEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1It24custom_multiply_variableItEEvPT_iS2_PKc + .weak _Z14test_variable1It24custom_multiply_variableItEEvPT_iS2_PKc # -- Begin function _Z14test_variable1It24custom_multiply_variableItEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1It24custom_multiply_variableItEEvPT_iS2_PKc,@function _Z14test_variable1It24custom_multiply_variableItEEvPT_iS2_PKc: # @_Z14test_variable1It24custom_multiply_variableItEEvPT_iS2_PKc @@ -20545,12 +20412,14 @@ _Z14test_variable1It24custom_multiply_variableItEEvPT_iS2_PKc: # @_Z14test_varia move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB65_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI65_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI65_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -20584,12 +20453,8 @@ _Z14test_variable1It24custom_multiply_variableItEEvPT_iS2_PKc: # @_Z14test_varia .size _Z14test_variable1It24custom_multiply_variableItEEvPT_iS2_PKc, .Lfunc_end65-_Z14test_variable1It24custom_multiply_variableItEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4It33custom_multiply_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc -.LCPI66_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4It33custom_multiply_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4It33custom_multiply_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4It33custom_multiply_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4It33custom_multiply_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4It33custom_multiply_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4It33custom_multiply_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4It33custom_multiply_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4It33custom_multiply_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc @@ -20841,12 +20706,14 @@ _Z14test_variable4It33custom_multiply_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc: move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB66_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI66_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI66_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -20880,12 +20747,8 @@ _Z14test_variable4It33custom_multiply_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4It33custom_multiply_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end66-_Z14test_variable4It33custom_multiply_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4It34custom_multiply_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc -.LCPI67_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4It34custom_multiply_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4It34custom_multiply_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4It34custom_multiply_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4It34custom_multiply_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4It34custom_multiply_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4It34custom_multiply_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4It34custom_multiply_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4It34custom_multiply_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc @@ -21147,12 +21010,14 @@ _Z14test_variable4It34custom_multiply_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB67_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI67_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI67_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -21186,12 +21051,8 @@ _Z14test_variable4It34custom_multiply_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc .size _Z14test_variable4It34custom_multiply_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end67-_Z14test_variable4It34custom_multiply_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1It22custom_divide_variableItEEvPT_iS2_PKc -.LCPI68_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1It22custom_divide_variableItEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1It22custom_divide_variableItEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1It22custom_divide_variableItEEvPT_iS2_PKc + .weak _Z14test_variable1It22custom_divide_variableItEEvPT_iS2_PKc # -- Begin function _Z14test_variable1It22custom_divide_variableItEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1It22custom_divide_variableItEEvPT_iS2_PKc,@function _Z14test_variable1It22custom_divide_variableItEEvPT_iS2_PKc: # @_Z14test_variable1It22custom_divide_variableItEEvPT_iS2_PKc @@ -21466,12 +21327,14 @@ _Z14test_variable1It22custom_divide_variableItEEvPT_iS2_PKc: # @_Z14test_variabl move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB68_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI68_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI68_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -21505,12 +21368,8 @@ _Z14test_variable1It22custom_divide_variableItEEvPT_iS2_PKc: # @_Z14test_variabl .size _Z14test_variable1It22custom_divide_variableItEEvPT_iS2_PKc, .Lfunc_end68-_Z14test_variable1It22custom_divide_variableItEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4It31custom_divide_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc -.LCPI69_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4It31custom_divide_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4It31custom_divide_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4It31custom_divide_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4It31custom_divide_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4It31custom_divide_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4It31custom_divide_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4It31custom_divide_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4It31custom_divide_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc @@ -21875,12 +21734,14 @@ _Z14test_variable4It31custom_divide_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc: # move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB69_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI69_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI69_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -21914,12 +21775,8 @@ _Z14test_variable4It31custom_divide_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4It31custom_divide_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end69-_Z14test_variable4It31custom_divide_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4It32custom_divide_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc -.LCPI70_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4It32custom_divide_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4It32custom_divide_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4It32custom_divide_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4It32custom_divide_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4It32custom_divide_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4It32custom_divide_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4It32custom_divide_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4It32custom_divide_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc @@ -22177,12 +22034,14 @@ _Z14test_variable4It32custom_divide_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc: move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB70_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI70_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI70_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -22216,12 +22075,8 @@ _Z14test_variable4It32custom_divide_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4It32custom_divide_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end70-_Z14test_variable4It32custom_divide_multiple_variable2ItEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4It30custom_mixed_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc -.LCPI71_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4It30custom_mixed_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4It30custom_mixed_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4It30custom_mixed_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4It30custom_mixed_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4It30custom_mixed_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4It30custom_mixed_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4It30custom_mixed_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4It30custom_mixed_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc @@ -22479,12 +22334,14 @@ _Z14test_variable4It30custom_mixed_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc: # move $a2, $a0 ld.w $a0, $s6, %pc_lo12(current_test) .LBB71_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI71_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI71_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -22518,12 +22375,8 @@ _Z14test_variable4It30custom_mixed_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4It30custom_mixed_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end71-_Z14test_variable4It30custom_mixed_multiple_variableItEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1It19custom_variable_andItEEvPT_iS2_PKc -.LCPI72_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1It19custom_variable_andItEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1It19custom_variable_andItEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1It19custom_variable_andItEEvPT_iS2_PKc + .weak _Z14test_variable1It19custom_variable_andItEEvPT_iS2_PKc # -- Begin function _Z14test_variable1It19custom_variable_andItEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1It19custom_variable_andItEEvPT_iS2_PKc,@function _Z14test_variable1It19custom_variable_andItEEvPT_iS2_PKc: # @_Z14test_variable1It19custom_variable_andItEEvPT_iS2_PKc @@ -22775,12 +22628,14 @@ _Z14test_variable1It19custom_variable_andItEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB72_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI72_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI72_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -22814,12 +22669,8 @@ _Z14test_variable1It19custom_variable_andItEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1It19custom_variable_andItEEvPT_iS2_PKc, .Lfunc_end72-_Z14test_variable1It19custom_variable_andItEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4It28custom_multiple_variable_andItEEvPT_iS2_S2_S2_S2_PKc -.LCPI73_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4It28custom_multiple_variable_andItEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4It28custom_multiple_variable_andItEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4It28custom_multiple_variable_andItEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4It28custom_multiple_variable_andItEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4It28custom_multiple_variable_andItEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4It28custom_multiple_variable_andItEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4It28custom_multiple_variable_andItEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4It28custom_multiple_variable_andItEEvPT_iS2_S2_S2_S2_PKc @@ -23090,12 +22941,14 @@ _Z14test_variable4It28custom_multiple_variable_andItEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB73_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI73_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI73_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -23129,12 +22982,8 @@ _Z14test_variable4It28custom_multiple_variable_andItEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4It28custom_multiple_variable_andItEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end73-_Z14test_variable4It28custom_multiple_variable_andItEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1It18custom_variable_orItEEvPT_iS2_PKc -.LCPI74_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1It18custom_variable_orItEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1It18custom_variable_orItEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1It18custom_variable_orItEEvPT_iS2_PKc + .weak _Z14test_variable1It18custom_variable_orItEEvPT_iS2_PKc # -- Begin function _Z14test_variable1It18custom_variable_orItEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1It18custom_variable_orItEEvPT_iS2_PKc,@function _Z14test_variable1It18custom_variable_orItEEvPT_iS2_PKc: # @_Z14test_variable1It18custom_variable_orItEEvPT_iS2_PKc @@ -23386,12 +23235,14 @@ _Z14test_variable1It18custom_variable_orItEEvPT_iS2_PKc: # @_Z14test_variable1It move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB74_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI74_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI74_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -23425,12 +23276,8 @@ _Z14test_variable1It18custom_variable_orItEEvPT_iS2_PKc: # @_Z14test_variable1It .size _Z14test_variable1It18custom_variable_orItEEvPT_iS2_PKc, .Lfunc_end74-_Z14test_variable1It18custom_variable_orItEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4It27custom_multiple_variable_orItEEvPT_iS2_S2_S2_S2_PKc -.LCPI75_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4It27custom_multiple_variable_orItEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4It27custom_multiple_variable_orItEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4It27custom_multiple_variable_orItEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4It27custom_multiple_variable_orItEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4It27custom_multiple_variable_orItEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4It27custom_multiple_variable_orItEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4It27custom_multiple_variable_orItEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4It27custom_multiple_variable_orItEEvPT_iS2_S2_S2_S2_PKc @@ -23701,12 +23548,14 @@ _Z14test_variable4It27custom_multiple_variable_orItEEvPT_iS2_S2_S2_S2_PKc: # @_Z move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB75_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI75_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI75_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -23740,12 +23589,8 @@ _Z14test_variable4It27custom_multiple_variable_orItEEvPT_iS2_S2_S2_S2_PKc: # @_Z .size _Z14test_variable4It27custom_multiple_variable_orItEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end75-_Z14test_variable4It27custom_multiple_variable_orItEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1It19custom_variable_xorItEEvPT_iS2_PKc -.LCPI76_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1It19custom_variable_xorItEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1It19custom_variable_xorItEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1It19custom_variable_xorItEEvPT_iS2_PKc + .weak _Z14test_variable1It19custom_variable_xorItEEvPT_iS2_PKc # -- Begin function _Z14test_variable1It19custom_variable_xorItEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1It19custom_variable_xorItEEvPT_iS2_PKc,@function _Z14test_variable1It19custom_variable_xorItEEvPT_iS2_PKc: # @_Z14test_variable1It19custom_variable_xorItEEvPT_iS2_PKc @@ -23997,12 +23842,14 @@ _Z14test_variable1It19custom_variable_xorItEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s4, %pc_lo12(current_test) .LBB76_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI76_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI76_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -24036,12 +23883,8 @@ _Z14test_variable1It19custom_variable_xorItEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1It19custom_variable_xorItEEvPT_iS2_PKc, .Lfunc_end76-_Z14test_variable1It19custom_variable_xorItEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4It28custom_multiple_variable_xorItEEvPT_iS2_S2_S2_S2_PKc -.LCPI77_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4It28custom_multiple_variable_xorItEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4It28custom_multiple_variable_xorItEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4It28custom_multiple_variable_xorItEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4It28custom_multiple_variable_xorItEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4It28custom_multiple_variable_xorItEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4It28custom_multiple_variable_xorItEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4It28custom_multiple_variable_xorItEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4It28custom_multiple_variable_xorItEEvPT_iS2_S2_S2_S2_PKc @@ -24312,12 +24155,14 @@ _Z14test_variable4It28custom_multiple_variable_xorItEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB77_27: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI77_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI77_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -24351,12 +24196,8 @@ _Z14test_variable4It28custom_multiple_variable_xorItEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4It28custom_multiple_variable_xorItEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end77-_Z14test_variable4It28custom_multiple_variable_xorItEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc -.LCPI78_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc + .weak _Z14test_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc,@function _Z14test_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc: # @_Z14test_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc @@ -24548,12 +24389,14 @@ _Z14test_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB78_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI78_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI78_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -24586,12 +24429,8 @@ _Z14test_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc, .Lfunc_end78-_Z14test_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z22test_hoisted_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc -.LCPI79_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z22test_hoisted_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc,"axG",@progbits,_Z22test_hoisted_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc,comdat - .weak _Z22test_hoisted_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc + .weak _Z22test_hoisted_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc # -- Begin function _Z22test_hoisted_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc .p2align 5 .type _Z22test_hoisted_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc,@function _Z22test_hoisted_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc: # @_Z22test_hoisted_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc @@ -24785,12 +24624,14 @@ _Z22test_hoisted_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc: # @_Z22test_ho ld.w $a0, $s3, %pc_lo12(current_test) .LBB79_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI79_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI79_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -24823,12 +24664,8 @@ _Z22test_hoisted_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc: # @_Z22test_ho .size _Z22test_hoisted_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc, .Lfunc_end79-_Z22test_hoisted_variable1Ii19custom_add_variableIiEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ii28custom_add_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc -.LCPI80_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ii28custom_add_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ii28custom_add_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ii28custom_add_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ii28custom_add_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ii28custom_add_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ii28custom_add_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ii28custom_add_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ii28custom_add_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc @@ -25026,12 +24863,14 @@ _Z14test_variable4Ii28custom_add_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s6, %pc_lo12(current_test) .LBB80_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI80_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI80_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -25064,12 +24903,8 @@ _Z14test_variable4Ii28custom_add_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ii28custom_add_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end80-_Z14test_variable4Ii28custom_add_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ii19custom_sub_variableIiEEvPT_iS2_PKc -.LCPI81_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ii19custom_sub_variableIiEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ii19custom_sub_variableIiEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ii19custom_sub_variableIiEEvPT_iS2_PKc + .weak _Z14test_variable1Ii19custom_sub_variableIiEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ii19custom_sub_variableIiEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ii19custom_sub_variableIiEEvPT_iS2_PKc,@function _Z14test_variable1Ii19custom_sub_variableIiEEvPT_iS2_PKc: # @_Z14test_variable1Ii19custom_sub_variableIiEEvPT_iS2_PKc @@ -25260,12 +25095,14 @@ _Z14test_variable1Ii19custom_sub_variableIiEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB81_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI81_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI81_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -25298,12 +25135,8 @@ _Z14test_variable1Ii19custom_sub_variableIiEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ii19custom_sub_variableIiEEvPT_iS2_PKc, .Lfunc_end81-_Z14test_variable1Ii19custom_sub_variableIiEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ii28custom_sub_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc -.LCPI82_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ii28custom_sub_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ii28custom_sub_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ii28custom_sub_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ii28custom_sub_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ii28custom_sub_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ii28custom_sub_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ii28custom_sub_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ii28custom_sub_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc @@ -25500,12 +25333,14 @@ _Z14test_variable4Ii28custom_sub_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s6, %pc_lo12(current_test) .LBB82_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI82_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI82_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -25538,12 +25373,8 @@ _Z14test_variable4Ii28custom_sub_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ii28custom_sub_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end82-_Z14test_variable4Ii28custom_sub_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ii24custom_multiply_variableIiEEvPT_iS2_PKc -.LCPI83_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ii24custom_multiply_variableIiEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ii24custom_multiply_variableIiEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ii24custom_multiply_variableIiEEvPT_iS2_PKc + .weak _Z14test_variable1Ii24custom_multiply_variableIiEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ii24custom_multiply_variableIiEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ii24custom_multiply_variableIiEEvPT_iS2_PKc,@function _Z14test_variable1Ii24custom_multiply_variableIiEEvPT_iS2_PKc: # @_Z14test_variable1Ii24custom_multiply_variableIiEEvPT_iS2_PKc @@ -25733,12 +25564,14 @@ _Z14test_variable1Ii24custom_multiply_variableIiEEvPT_iS2_PKc: # @_Z14test_varia ld.w $a0, $s4, %pc_lo12(current_test) .LBB83_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI83_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI83_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -25771,12 +25604,8 @@ _Z14test_variable1Ii24custom_multiply_variableIiEEvPT_iS2_PKc: # @_Z14test_varia .size _Z14test_variable1Ii24custom_multiply_variableIiEEvPT_iS2_PKc, .Lfunc_end83-_Z14test_variable1Ii24custom_multiply_variableIiEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ii33custom_multiply_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc -.LCPI84_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ii33custom_multiply_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ii33custom_multiply_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ii33custom_multiply_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ii33custom_multiply_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ii33custom_multiply_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ii33custom_multiply_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ii33custom_multiply_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ii33custom_multiply_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc @@ -25974,12 +25803,14 @@ _Z14test_variable4Ii33custom_multiply_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc: ld.w $a0, $s6, %pc_lo12(current_test) .LBB84_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI84_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI84_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -26012,12 +25843,8 @@ _Z14test_variable4Ii33custom_multiply_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Ii33custom_multiply_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end84-_Z14test_variable4Ii33custom_multiply_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ii34custom_multiply_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc -.LCPI85_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ii34custom_multiply_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ii34custom_multiply_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ii34custom_multiply_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ii34custom_multiply_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ii34custom_multiply_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ii34custom_multiply_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ii34custom_multiply_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ii34custom_multiply_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc @@ -26218,12 +26045,14 @@ _Z14test_variable4Ii34custom_multiply_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc ld.w $a0, $s2, %pc_lo12(current_test) .LBB85_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI85_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI85_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -26256,12 +26085,8 @@ _Z14test_variable4Ii34custom_multiply_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc .size _Z14test_variable4Ii34custom_multiply_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end85-_Z14test_variable4Ii34custom_multiply_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ii22custom_divide_variableIiEEvPT_iS2_PKc -.LCPI86_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ii22custom_divide_variableIiEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ii22custom_divide_variableIiEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ii22custom_divide_variableIiEEvPT_iS2_PKc + .weak _Z14test_variable1Ii22custom_divide_variableIiEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ii22custom_divide_variableIiEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ii22custom_divide_variableIiEEvPT_iS2_PKc,@function _Z14test_variable1Ii22custom_divide_variableIiEEvPT_iS2_PKc: # @_Z14test_variable1Ii22custom_divide_variableIiEEvPT_iS2_PKc @@ -26453,12 +26278,14 @@ _Z14test_variable1Ii22custom_divide_variableIiEEvPT_iS2_PKc: # @_Z14test_variabl ld.w $a0, $s4, %pc_lo12(current_test) .LBB86_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI86_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI86_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -26491,12 +26318,8 @@ _Z14test_variable1Ii22custom_divide_variableIiEEvPT_iS2_PKc: # @_Z14test_variabl .size _Z14test_variable1Ii22custom_divide_variableIiEEvPT_iS2_PKc, .Lfunc_end86-_Z14test_variable1Ii22custom_divide_variableIiEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ii31custom_divide_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc -.LCPI87_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ii31custom_divide_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ii31custom_divide_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ii31custom_divide_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ii31custom_divide_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ii31custom_divide_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ii31custom_divide_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ii31custom_divide_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ii31custom_divide_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc @@ -26723,12 +26546,14 @@ _Z14test_variable4Ii31custom_divide_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc: # ld.w $a0, $s3, %pc_lo12(current_test) .LBB87_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI87_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI87_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -26761,12 +26586,8 @@ _Z14test_variable4Ii31custom_divide_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4Ii31custom_divide_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end87-_Z14test_variable4Ii31custom_divide_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ii32custom_divide_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc -.LCPI88_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ii32custom_divide_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ii32custom_divide_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ii32custom_divide_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ii32custom_divide_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ii32custom_divide_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ii32custom_divide_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ii32custom_divide_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ii32custom_divide_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc @@ -26964,12 +26785,14 @@ _Z14test_variable4Ii32custom_divide_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc: ld.w $a0, $s6, %pc_lo12(current_test) .LBB88_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI88_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI88_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -27002,12 +26825,8 @@ _Z14test_variable4Ii32custom_divide_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Ii32custom_divide_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end88-_Z14test_variable4Ii32custom_divide_multiple_variable2IiEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ii30custom_mixed_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc -.LCPI89_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ii30custom_mixed_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ii30custom_mixed_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ii30custom_mixed_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ii30custom_mixed_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ii30custom_mixed_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ii30custom_mixed_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ii30custom_mixed_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ii30custom_mixed_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc @@ -27216,12 +27035,14 @@ _Z14test_variable4Ii30custom_mixed_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc: # ld.w $a0, $s3, %pc_lo12(current_test) .LBB89_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI89_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI89_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -27254,12 +27075,8 @@ _Z14test_variable4Ii30custom_mixed_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4Ii30custom_mixed_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end89-_Z14test_variable4Ii30custom_mixed_multiple_variableIiEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ii19custom_variable_andIiEEvPT_iS2_PKc -.LCPI90_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ii19custom_variable_andIiEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ii19custom_variable_andIiEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ii19custom_variable_andIiEEvPT_iS2_PKc + .weak _Z14test_variable1Ii19custom_variable_andIiEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ii19custom_variable_andIiEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ii19custom_variable_andIiEEvPT_iS2_PKc,@function _Z14test_variable1Ii19custom_variable_andIiEEvPT_iS2_PKc: # @_Z14test_variable1Ii19custom_variable_andIiEEvPT_iS2_PKc @@ -27451,12 +27268,14 @@ _Z14test_variable1Ii19custom_variable_andIiEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB90_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI90_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI90_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -27489,12 +27308,8 @@ _Z14test_variable1Ii19custom_variable_andIiEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ii19custom_variable_andIiEEvPT_iS2_PKc, .Lfunc_end90-_Z14test_variable1Ii19custom_variable_andIiEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ii28custom_multiple_variable_andIiEEvPT_iS2_S2_S2_S2_PKc -.LCPI91_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ii28custom_multiple_variable_andIiEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ii28custom_multiple_variable_andIiEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ii28custom_multiple_variable_andIiEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ii28custom_multiple_variable_andIiEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ii28custom_multiple_variable_andIiEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ii28custom_multiple_variable_andIiEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ii28custom_multiple_variable_andIiEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ii28custom_multiple_variable_andIiEEvPT_iS2_S2_S2_S2_PKc @@ -27702,12 +27517,14 @@ _Z14test_variable4Ii28custom_multiple_variable_andIiEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s6, %pc_lo12(current_test) .LBB91_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI91_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI91_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -27740,12 +27557,8 @@ _Z14test_variable4Ii28custom_multiple_variable_andIiEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ii28custom_multiple_variable_andIiEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end91-_Z14test_variable4Ii28custom_multiple_variable_andIiEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ii18custom_variable_orIiEEvPT_iS2_PKc -.LCPI92_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ii18custom_variable_orIiEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ii18custom_variable_orIiEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ii18custom_variable_orIiEEvPT_iS2_PKc + .weak _Z14test_variable1Ii18custom_variable_orIiEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ii18custom_variable_orIiEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ii18custom_variable_orIiEEvPT_iS2_PKc,@function _Z14test_variable1Ii18custom_variable_orIiEEvPT_iS2_PKc: # @_Z14test_variable1Ii18custom_variable_orIiEEvPT_iS2_PKc @@ -27937,12 +27750,14 @@ _Z14test_variable1Ii18custom_variable_orIiEEvPT_iS2_PKc: # @_Z14test_variable1Ii ld.w $a0, $s4, %pc_lo12(current_test) .LBB92_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI92_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI92_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -27975,12 +27790,8 @@ _Z14test_variable1Ii18custom_variable_orIiEEvPT_iS2_PKc: # @_Z14test_variable1Ii .size _Z14test_variable1Ii18custom_variable_orIiEEvPT_iS2_PKc, .Lfunc_end92-_Z14test_variable1Ii18custom_variable_orIiEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ii27custom_multiple_variable_orIiEEvPT_iS2_S2_S2_S2_PKc -.LCPI93_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ii27custom_multiple_variable_orIiEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ii27custom_multiple_variable_orIiEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ii27custom_multiple_variable_orIiEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ii27custom_multiple_variable_orIiEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ii27custom_multiple_variable_orIiEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ii27custom_multiple_variable_orIiEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ii27custom_multiple_variable_orIiEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ii27custom_multiple_variable_orIiEEvPT_iS2_S2_S2_S2_PKc @@ -28188,12 +27999,14 @@ _Z14test_variable4Ii27custom_multiple_variable_orIiEEvPT_iS2_S2_S2_S2_PKc: # @_Z ld.w $a0, $s6, %pc_lo12(current_test) .LBB93_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI93_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI93_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -28226,12 +28039,8 @@ _Z14test_variable4Ii27custom_multiple_variable_orIiEEvPT_iS2_S2_S2_S2_PKc: # @_Z .size _Z14test_variable4Ii27custom_multiple_variable_orIiEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end93-_Z14test_variable4Ii27custom_multiple_variable_orIiEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ii19custom_variable_xorIiEEvPT_iS2_PKc -.LCPI94_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ii19custom_variable_xorIiEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ii19custom_variable_xorIiEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ii19custom_variable_xorIiEEvPT_iS2_PKc + .weak _Z14test_variable1Ii19custom_variable_xorIiEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ii19custom_variable_xorIiEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ii19custom_variable_xorIiEEvPT_iS2_PKc,@function _Z14test_variable1Ii19custom_variable_xorIiEEvPT_iS2_PKc: # @_Z14test_variable1Ii19custom_variable_xorIiEEvPT_iS2_PKc @@ -28422,12 +28231,14 @@ _Z14test_variable1Ii19custom_variable_xorIiEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB94_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI94_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI94_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -28460,12 +28271,8 @@ _Z14test_variable1Ii19custom_variable_xorIiEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ii19custom_variable_xorIiEEvPT_iS2_PKc, .Lfunc_end94-_Z14test_variable1Ii19custom_variable_xorIiEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ii28custom_multiple_variable_xorIiEEvPT_iS2_S2_S2_S2_PKc -.LCPI95_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ii28custom_multiple_variable_xorIiEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ii28custom_multiple_variable_xorIiEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ii28custom_multiple_variable_xorIiEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ii28custom_multiple_variable_xorIiEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ii28custom_multiple_variable_xorIiEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ii28custom_multiple_variable_xorIiEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ii28custom_multiple_variable_xorIiEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ii28custom_multiple_variable_xorIiEEvPT_iS2_S2_S2_S2_PKc @@ -28675,12 +28482,14 @@ _Z14test_variable4Ii28custom_multiple_variable_xorIiEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s3, %pc_lo12(current_test) .LBB95_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI95_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI95_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -28713,12 +28522,8 @@ _Z14test_variable4Ii28custom_multiple_variable_xorIiEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ii28custom_multiple_variable_xorIiEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end95-_Z14test_variable4Ii28custom_multiple_variable_xorIiEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc -.LCPI96_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc + .weak _Z14test_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc,@function _Z14test_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc: # @_Z14test_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc @@ -28913,12 +28718,14 @@ _Z14test_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB96_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI96_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI96_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -28951,12 +28758,8 @@ _Z14test_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc, .Lfunc_end96-_Z14test_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z22test_hoisted_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc -.LCPI97_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z22test_hoisted_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc,"axG",@progbits,_Z22test_hoisted_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc,comdat - .weak _Z22test_hoisted_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc + .weak _Z22test_hoisted_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc # -- Begin function _Z22test_hoisted_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc .p2align 5 .type _Z22test_hoisted_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc,@function _Z22test_hoisted_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc: # @_Z22test_hoisted_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc @@ -29150,12 +28953,14 @@ _Z22test_hoisted_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc: # @_Z22test_ho ld.w $a0, $s3, %pc_lo12(current_test) .LBB97_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI97_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI97_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -29188,12 +28993,8 @@ _Z22test_hoisted_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc: # @_Z22test_ho .size _Z22test_hoisted_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc, .Lfunc_end97-_Z22test_hoisted_variable1Ij19custom_add_variableIjEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ij28custom_add_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc -.LCPI98_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ij28custom_add_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ij28custom_add_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ij28custom_add_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ij28custom_add_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ij28custom_add_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ij28custom_add_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ij28custom_add_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ij28custom_add_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc @@ -29394,12 +29195,14 @@ _Z14test_variable4Ij28custom_add_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s6, %pc_lo12(current_test) .LBB98_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI98_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI98_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -29432,12 +29235,8 @@ _Z14test_variable4Ij28custom_add_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ij28custom_add_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end98-_Z14test_variable4Ij28custom_add_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ij19custom_sub_variableIjEEvPT_iS2_PKc -.LCPI99_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ij19custom_sub_variableIjEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ij19custom_sub_variableIjEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ij19custom_sub_variableIjEEvPT_iS2_PKc + .weak _Z14test_variable1Ij19custom_sub_variableIjEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ij19custom_sub_variableIjEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ij19custom_sub_variableIjEEvPT_iS2_PKc,@function _Z14test_variable1Ij19custom_sub_variableIjEEvPT_iS2_PKc: # @_Z14test_variable1Ij19custom_sub_variableIjEEvPT_iS2_PKc @@ -29632,12 +29431,14 @@ _Z14test_variable1Ij19custom_sub_variableIjEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB99_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI99_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI99_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -29670,12 +29471,8 @@ _Z14test_variable1Ij19custom_sub_variableIjEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ij19custom_sub_variableIjEEvPT_iS2_PKc, .Lfunc_end99-_Z14test_variable1Ij19custom_sub_variableIjEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ij28custom_sub_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc -.LCPI100_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ij28custom_sub_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ij28custom_sub_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ij28custom_sub_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ij28custom_sub_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ij28custom_sub_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ij28custom_sub_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ij28custom_sub_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ij28custom_sub_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc @@ -29876,12 +29673,14 @@ _Z14test_variable4Ij28custom_sub_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s6, %pc_lo12(current_test) .LBB100_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI100_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI100_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -29914,12 +29713,8 @@ _Z14test_variable4Ij28custom_sub_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ij28custom_sub_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end100-_Z14test_variable4Ij28custom_sub_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ij24custom_multiply_variableIjEEvPT_iS2_PKc -.LCPI101_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ij24custom_multiply_variableIjEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ij24custom_multiply_variableIjEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ij24custom_multiply_variableIjEEvPT_iS2_PKc + .weak _Z14test_variable1Ij24custom_multiply_variableIjEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ij24custom_multiply_variableIjEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ij24custom_multiply_variableIjEEvPT_iS2_PKc,@function _Z14test_variable1Ij24custom_multiply_variableIjEEvPT_iS2_PKc: # @_Z14test_variable1Ij24custom_multiply_variableIjEEvPT_iS2_PKc @@ -30109,12 +29904,14 @@ _Z14test_variable1Ij24custom_multiply_variableIjEEvPT_iS2_PKc: # @_Z14test_varia ld.w $a0, $s4, %pc_lo12(current_test) .LBB101_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI101_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI101_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -30147,12 +29944,8 @@ _Z14test_variable1Ij24custom_multiply_variableIjEEvPT_iS2_PKc: # @_Z14test_varia .size _Z14test_variable1Ij24custom_multiply_variableIjEEvPT_iS2_PKc, .Lfunc_end101-_Z14test_variable1Ij24custom_multiply_variableIjEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ij33custom_multiply_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc -.LCPI102_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ij33custom_multiply_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ij33custom_multiply_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ij33custom_multiply_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ij33custom_multiply_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ij33custom_multiply_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ij33custom_multiply_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ij33custom_multiply_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ij33custom_multiply_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc @@ -30350,12 +30143,14 @@ _Z14test_variable4Ij33custom_multiply_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc: ld.w $a0, $s6, %pc_lo12(current_test) .LBB102_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI102_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI102_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -30388,12 +30183,8 @@ _Z14test_variable4Ij33custom_multiply_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Ij33custom_multiply_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end102-_Z14test_variable4Ij33custom_multiply_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ij34custom_multiply_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc -.LCPI103_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ij34custom_multiply_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ij34custom_multiply_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ij34custom_multiply_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ij34custom_multiply_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ij34custom_multiply_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ij34custom_multiply_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ij34custom_multiply_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ij34custom_multiply_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc @@ -30597,12 +30388,14 @@ _Z14test_variable4Ij34custom_multiply_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc ld.w $a0, $s6, %pc_lo12(current_test) .LBB103_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI103_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI103_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -30635,12 +30428,8 @@ _Z14test_variable4Ij34custom_multiply_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc .size _Z14test_variable4Ij34custom_multiply_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end103-_Z14test_variable4Ij34custom_multiply_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ij22custom_divide_variableIjEEvPT_iS2_PKc -.LCPI104_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ij22custom_divide_variableIjEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ij22custom_divide_variableIjEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ij22custom_divide_variableIjEEvPT_iS2_PKc + .weak _Z14test_variable1Ij22custom_divide_variableIjEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ij22custom_divide_variableIjEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ij22custom_divide_variableIjEEvPT_iS2_PKc,@function _Z14test_variable1Ij22custom_divide_variableIjEEvPT_iS2_PKc: # @_Z14test_variable1Ij22custom_divide_variableIjEEvPT_iS2_PKc @@ -30837,12 +30626,14 @@ _Z14test_variable1Ij22custom_divide_variableIjEEvPT_iS2_PKc: # @_Z14test_variabl ld.w $a0, $s4, %pc_lo12(current_test) .LBB104_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI104_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI104_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -30875,12 +30666,8 @@ _Z14test_variable1Ij22custom_divide_variableIjEEvPT_iS2_PKc: # @_Z14test_variabl .size _Z14test_variable1Ij22custom_divide_variableIjEEvPT_iS2_PKc, .Lfunc_end104-_Z14test_variable1Ij22custom_divide_variableIjEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ij31custom_divide_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc -.LCPI105_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ij31custom_divide_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ij31custom_divide_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ij31custom_divide_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ij31custom_divide_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ij31custom_divide_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ij31custom_divide_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ij31custom_divide_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ij31custom_divide_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc @@ -31112,12 +30899,14 @@ _Z14test_variable4Ij31custom_divide_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc: # ld.w $a0, $s3, %pc_lo12(current_test) .LBB105_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI105_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI105_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -31150,12 +30939,8 @@ _Z14test_variable4Ij31custom_divide_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4Ij31custom_divide_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end105-_Z14test_variable4Ij31custom_divide_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ij32custom_divide_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc -.LCPI106_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ij32custom_divide_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ij32custom_divide_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ij32custom_divide_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ij32custom_divide_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ij32custom_divide_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ij32custom_divide_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ij32custom_divide_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ij32custom_divide_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc @@ -31356,12 +31141,14 @@ _Z14test_variable4Ij32custom_divide_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc: ld.w $a0, $s6, %pc_lo12(current_test) .LBB106_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI106_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI106_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -31394,12 +31181,8 @@ _Z14test_variable4Ij32custom_divide_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Ij32custom_divide_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end106-_Z14test_variable4Ij32custom_divide_multiple_variable2IjEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ij30custom_mixed_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc -.LCPI107_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ij30custom_mixed_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ij30custom_mixed_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ij30custom_mixed_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ij30custom_mixed_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ij30custom_mixed_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ij30custom_mixed_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ij30custom_mixed_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ij30custom_mixed_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc @@ -31611,12 +31394,14 @@ _Z14test_variable4Ij30custom_mixed_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc: # ld.w $a0, $s3, %pc_lo12(current_test) .LBB107_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI107_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI107_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -31649,12 +31434,8 @@ _Z14test_variable4Ij30custom_mixed_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4Ij30custom_mixed_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end107-_Z14test_variable4Ij30custom_mixed_multiple_variableIjEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ij19custom_variable_andIjEEvPT_iS2_PKc -.LCPI108_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ij19custom_variable_andIjEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ij19custom_variable_andIjEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ij19custom_variable_andIjEEvPT_iS2_PKc + .weak _Z14test_variable1Ij19custom_variable_andIjEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ij19custom_variable_andIjEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ij19custom_variable_andIjEEvPT_iS2_PKc,@function _Z14test_variable1Ij19custom_variable_andIjEEvPT_iS2_PKc: # @_Z14test_variable1Ij19custom_variable_andIjEEvPT_iS2_PKc @@ -31849,12 +31630,14 @@ _Z14test_variable1Ij19custom_variable_andIjEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB108_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI108_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI108_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -31887,12 +31670,8 @@ _Z14test_variable1Ij19custom_variable_andIjEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ij19custom_variable_andIjEEvPT_iS2_PKc, .Lfunc_end108-_Z14test_variable1Ij19custom_variable_andIjEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ij28custom_multiple_variable_andIjEEvPT_iS2_S2_S2_S2_PKc -.LCPI109_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ij28custom_multiple_variable_andIjEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ij28custom_multiple_variable_andIjEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ij28custom_multiple_variable_andIjEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ij28custom_multiple_variable_andIjEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ij28custom_multiple_variable_andIjEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ij28custom_multiple_variable_andIjEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ij28custom_multiple_variable_andIjEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ij28custom_multiple_variable_andIjEEvPT_iS2_S2_S2_S2_PKc @@ -32103,12 +31882,14 @@ _Z14test_variable4Ij28custom_multiple_variable_andIjEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s6, %pc_lo12(current_test) .LBB109_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI109_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI109_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -32141,12 +31922,8 @@ _Z14test_variable4Ij28custom_multiple_variable_andIjEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ij28custom_multiple_variable_andIjEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end109-_Z14test_variable4Ij28custom_multiple_variable_andIjEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ij18custom_variable_orIjEEvPT_iS2_PKc -.LCPI110_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ij18custom_variable_orIjEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ij18custom_variable_orIjEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ij18custom_variable_orIjEEvPT_iS2_PKc + .weak _Z14test_variable1Ij18custom_variable_orIjEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ij18custom_variable_orIjEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ij18custom_variable_orIjEEvPT_iS2_PKc,@function _Z14test_variable1Ij18custom_variable_orIjEEvPT_iS2_PKc: # @_Z14test_variable1Ij18custom_variable_orIjEEvPT_iS2_PKc @@ -32341,12 +32118,14 @@ _Z14test_variable1Ij18custom_variable_orIjEEvPT_iS2_PKc: # @_Z14test_variable1Ij ld.w $a0, $s4, %pc_lo12(current_test) .LBB110_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI110_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI110_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -32379,12 +32158,8 @@ _Z14test_variable1Ij18custom_variable_orIjEEvPT_iS2_PKc: # @_Z14test_variable1Ij .size _Z14test_variable1Ij18custom_variable_orIjEEvPT_iS2_PKc, .Lfunc_end110-_Z14test_variable1Ij18custom_variable_orIjEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ij27custom_multiple_variable_orIjEEvPT_iS2_S2_S2_S2_PKc -.LCPI111_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ij27custom_multiple_variable_orIjEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ij27custom_multiple_variable_orIjEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ij27custom_multiple_variable_orIjEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ij27custom_multiple_variable_orIjEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ij27custom_multiple_variable_orIjEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ij27custom_multiple_variable_orIjEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ij27custom_multiple_variable_orIjEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ij27custom_multiple_variable_orIjEEvPT_iS2_S2_S2_S2_PKc @@ -32595,12 +32370,14 @@ _Z14test_variable4Ij27custom_multiple_variable_orIjEEvPT_iS2_S2_S2_S2_PKc: # @_Z ld.w $a0, $s6, %pc_lo12(current_test) .LBB111_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI111_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI111_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -32633,12 +32410,8 @@ _Z14test_variable4Ij27custom_multiple_variable_orIjEEvPT_iS2_S2_S2_S2_PKc: # @_Z .size _Z14test_variable4Ij27custom_multiple_variable_orIjEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end111-_Z14test_variable4Ij27custom_multiple_variable_orIjEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Ij19custom_variable_xorIjEEvPT_iS2_PKc -.LCPI112_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Ij19custom_variable_xorIjEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Ij19custom_variable_xorIjEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Ij19custom_variable_xorIjEEvPT_iS2_PKc + .weak _Z14test_variable1Ij19custom_variable_xorIjEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Ij19custom_variable_xorIjEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Ij19custom_variable_xorIjEEvPT_iS2_PKc,@function _Z14test_variable1Ij19custom_variable_xorIjEEvPT_iS2_PKc: # @_Z14test_variable1Ij19custom_variable_xorIjEEvPT_iS2_PKc @@ -32833,12 +32606,14 @@ _Z14test_variable1Ij19custom_variable_xorIjEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB112_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI112_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI112_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -32871,12 +32646,8 @@ _Z14test_variable1Ij19custom_variable_xorIjEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Ij19custom_variable_xorIjEEvPT_iS2_PKc, .Lfunc_end112-_Z14test_variable1Ij19custom_variable_xorIjEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Ij28custom_multiple_variable_xorIjEEvPT_iS2_S2_S2_S2_PKc -.LCPI113_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Ij28custom_multiple_variable_xorIjEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Ij28custom_multiple_variable_xorIjEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Ij28custom_multiple_variable_xorIjEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Ij28custom_multiple_variable_xorIjEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Ij28custom_multiple_variable_xorIjEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Ij28custom_multiple_variable_xorIjEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Ij28custom_multiple_variable_xorIjEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Ij28custom_multiple_variable_xorIjEEvPT_iS2_S2_S2_S2_PKc @@ -33087,12 +32858,14 @@ _Z14test_variable4Ij28custom_multiple_variable_xorIjEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s6, %pc_lo12(current_test) .LBB113_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI113_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI113_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -33125,12 +32898,8 @@ _Z14test_variable4Ij28custom_multiple_variable_xorIjEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Ij28custom_multiple_variable_xorIjEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end113-_Z14test_variable4Ij28custom_multiple_variable_xorIjEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Il19custom_add_variableIlEEvPT_iS2_PKc -.LCPI114_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Il19custom_add_variableIlEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Il19custom_add_variableIlEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Il19custom_add_variableIlEEvPT_iS2_PKc + .weak _Z14test_variable1Il19custom_add_variableIlEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Il19custom_add_variableIlEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Il19custom_add_variableIlEEvPT_iS2_PKc,@function _Z14test_variable1Il19custom_add_variableIlEEvPT_iS2_PKc: # @_Z14test_variable1Il19custom_add_variableIlEEvPT_iS2_PKc @@ -33320,12 +33089,14 @@ _Z14test_variable1Il19custom_add_variableIlEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB114_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI114_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI114_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -33358,12 +33129,8 @@ _Z14test_variable1Il19custom_add_variableIlEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Il19custom_add_variableIlEEvPT_iS2_PKc, .Lfunc_end114-_Z14test_variable1Il19custom_add_variableIlEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z22test_hoisted_variable1Il19custom_add_variableIlEEvPT_iS2_PKc -.LCPI115_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z22test_hoisted_variable1Il19custom_add_variableIlEEvPT_iS2_PKc,"axG",@progbits,_Z22test_hoisted_variable1Il19custom_add_variableIlEEvPT_iS2_PKc,comdat - .weak _Z22test_hoisted_variable1Il19custom_add_variableIlEEvPT_iS2_PKc + .weak _Z22test_hoisted_variable1Il19custom_add_variableIlEEvPT_iS2_PKc # -- Begin function _Z22test_hoisted_variable1Il19custom_add_variableIlEEvPT_iS2_PKc .p2align 5 .type _Z22test_hoisted_variable1Il19custom_add_variableIlEEvPT_iS2_PKc,@function _Z22test_hoisted_variable1Il19custom_add_variableIlEEvPT_iS2_PKc: # @_Z22test_hoisted_variable1Il19custom_add_variableIlEEvPT_iS2_PKc @@ -33556,12 +33323,14 @@ _Z22test_hoisted_variable1Il19custom_add_variableIlEEvPT_iS2_PKc: # @_Z22test_ho ld.w $a0, $s3, %pc_lo12(current_test) .LBB115_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI115_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI115_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -33594,12 +33363,8 @@ _Z22test_hoisted_variable1Il19custom_add_variableIlEEvPT_iS2_PKc: # @_Z22test_ho .size _Z22test_hoisted_variable1Il19custom_add_variableIlEEvPT_iS2_PKc, .Lfunc_end115-_Z22test_hoisted_variable1Il19custom_add_variableIlEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Il28custom_add_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc -.LCPI116_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Il28custom_add_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Il28custom_add_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Il28custom_add_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Il28custom_add_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Il28custom_add_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Il28custom_add_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Il28custom_add_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Il28custom_add_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc @@ -33795,12 +33560,14 @@ _Z14test_variable4Il28custom_add_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s6, %pc_lo12(current_test) .LBB116_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI116_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI116_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -33833,12 +33600,8 @@ _Z14test_variable4Il28custom_add_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Il28custom_add_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end116-_Z14test_variable4Il28custom_add_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Il19custom_sub_variableIlEEvPT_iS2_PKc -.LCPI117_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Il19custom_sub_variableIlEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Il19custom_sub_variableIlEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Il19custom_sub_variableIlEEvPT_iS2_PKc + .weak _Z14test_variable1Il19custom_sub_variableIlEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Il19custom_sub_variableIlEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Il19custom_sub_variableIlEEvPT_iS2_PKc,@function _Z14test_variable1Il19custom_sub_variableIlEEvPT_iS2_PKc: # @_Z14test_variable1Il19custom_sub_variableIlEEvPT_iS2_PKc @@ -34027,12 +33790,14 @@ _Z14test_variable1Il19custom_sub_variableIlEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB117_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI117_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI117_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -34065,12 +33830,8 @@ _Z14test_variable1Il19custom_sub_variableIlEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Il19custom_sub_variableIlEEvPT_iS2_PKc, .Lfunc_end117-_Z14test_variable1Il19custom_sub_variableIlEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Il28custom_sub_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc -.LCPI118_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Il28custom_sub_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Il28custom_sub_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Il28custom_sub_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Il28custom_sub_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Il28custom_sub_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Il28custom_sub_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Il28custom_sub_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Il28custom_sub_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc @@ -34265,12 +34026,14 @@ _Z14test_variable4Il28custom_sub_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s6, %pc_lo12(current_test) .LBB118_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI118_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI118_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -34303,12 +34066,8 @@ _Z14test_variable4Il28custom_sub_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Il28custom_sub_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end118-_Z14test_variable4Il28custom_sub_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Il24custom_multiply_variableIlEEvPT_iS2_PKc -.LCPI119_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Il24custom_multiply_variableIlEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Il24custom_multiply_variableIlEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Il24custom_multiply_variableIlEEvPT_iS2_PKc + .weak _Z14test_variable1Il24custom_multiply_variableIlEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Il24custom_multiply_variableIlEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Il24custom_multiply_variableIlEEvPT_iS2_PKc,@function _Z14test_variable1Il24custom_multiply_variableIlEEvPT_iS2_PKc: # @_Z14test_variable1Il24custom_multiply_variableIlEEvPT_iS2_PKc @@ -34496,12 +34255,14 @@ _Z14test_variable1Il24custom_multiply_variableIlEEvPT_iS2_PKc: # @_Z14test_varia ld.w $a0, $s4, %pc_lo12(current_test) .LBB119_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI119_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI119_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -34534,12 +34295,8 @@ _Z14test_variable1Il24custom_multiply_variableIlEEvPT_iS2_PKc: # @_Z14test_varia .size _Z14test_variable1Il24custom_multiply_variableIlEEvPT_iS2_PKc, .Lfunc_end119-_Z14test_variable1Il24custom_multiply_variableIlEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Il33custom_multiply_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc -.LCPI120_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Il33custom_multiply_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Il33custom_multiply_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Il33custom_multiply_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Il33custom_multiply_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Il33custom_multiply_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Il33custom_multiply_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Il33custom_multiply_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Il33custom_multiply_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc @@ -34735,12 +34492,14 @@ _Z14test_variable4Il33custom_multiply_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc: ld.w $a0, $s6, %pc_lo12(current_test) .LBB120_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI120_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI120_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -34773,12 +34532,8 @@ _Z14test_variable4Il33custom_multiply_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Il33custom_multiply_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end120-_Z14test_variable4Il33custom_multiply_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Il34custom_multiply_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc -.LCPI121_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Il34custom_multiply_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Il34custom_multiply_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Il34custom_multiply_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Il34custom_multiply_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Il34custom_multiply_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Il34custom_multiply_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Il34custom_multiply_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Il34custom_multiply_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc @@ -34977,12 +34732,14 @@ _Z14test_variable4Il34custom_multiply_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc ld.w $a0, $s2, %pc_lo12(current_test) .LBB121_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI121_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI121_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -35015,12 +34772,8 @@ _Z14test_variable4Il34custom_multiply_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc .size _Z14test_variable4Il34custom_multiply_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end121-_Z14test_variable4Il34custom_multiply_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Il22custom_divide_variableIlEEvPT_iS2_PKc -.LCPI122_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Il22custom_divide_variableIlEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Il22custom_divide_variableIlEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Il22custom_divide_variableIlEEvPT_iS2_PKc + .weak _Z14test_variable1Il22custom_divide_variableIlEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Il22custom_divide_variableIlEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Il22custom_divide_variableIlEEvPT_iS2_PKc,@function _Z14test_variable1Il22custom_divide_variableIlEEvPT_iS2_PKc: # @_Z14test_variable1Il22custom_divide_variableIlEEvPT_iS2_PKc @@ -35210,12 +34963,14 @@ _Z14test_variable1Il22custom_divide_variableIlEEvPT_iS2_PKc: # @_Z14test_variabl ld.w $a0, $s4, %pc_lo12(current_test) .LBB122_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI122_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI122_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -35248,12 +35003,8 @@ _Z14test_variable1Il22custom_divide_variableIlEEvPT_iS2_PKc: # @_Z14test_variabl .size _Z14test_variable1Il22custom_divide_variableIlEEvPT_iS2_PKc, .Lfunc_end122-_Z14test_variable1Il22custom_divide_variableIlEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Il31custom_divide_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc -.LCPI123_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Il31custom_divide_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Il31custom_divide_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Il31custom_divide_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Il31custom_divide_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Il31custom_divide_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Il31custom_divide_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Il31custom_divide_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Il31custom_divide_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc @@ -35478,12 +35229,14 @@ _Z14test_variable4Il31custom_divide_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc: # ld.w $a0, $s3, %pc_lo12(current_test) .LBB123_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI123_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI123_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -35516,12 +35269,8 @@ _Z14test_variable4Il31custom_divide_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4Il31custom_divide_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end123-_Z14test_variable4Il31custom_divide_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Il32custom_divide_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc -.LCPI124_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Il32custom_divide_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Il32custom_divide_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Il32custom_divide_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Il32custom_divide_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Il32custom_divide_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Il32custom_divide_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Il32custom_divide_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Il32custom_divide_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc @@ -35717,12 +35466,14 @@ _Z14test_variable4Il32custom_divide_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc: ld.w $a0, $s6, %pc_lo12(current_test) .LBB124_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI124_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI124_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -35755,12 +35506,8 @@ _Z14test_variable4Il32custom_divide_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Il32custom_divide_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end124-_Z14test_variable4Il32custom_divide_multiple_variable2IlEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Il30custom_mixed_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc -.LCPI125_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Il30custom_mixed_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Il30custom_mixed_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Il30custom_mixed_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Il30custom_mixed_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Il30custom_mixed_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Il30custom_mixed_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Il30custom_mixed_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Il30custom_mixed_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc @@ -35967,12 +35714,14 @@ _Z14test_variable4Il30custom_mixed_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc: # ld.w $a0, $s3, %pc_lo12(current_test) .LBB125_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI125_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI125_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -36005,12 +35754,8 @@ _Z14test_variable4Il30custom_mixed_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4Il30custom_mixed_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end125-_Z14test_variable4Il30custom_mixed_multiple_variableIlEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Il19custom_variable_andIlEEvPT_iS2_PKc -.LCPI126_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Il19custom_variable_andIlEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Il19custom_variable_andIlEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Il19custom_variable_andIlEEvPT_iS2_PKc + .weak _Z14test_variable1Il19custom_variable_andIlEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Il19custom_variable_andIlEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Il19custom_variable_andIlEEvPT_iS2_PKc,@function _Z14test_variable1Il19custom_variable_andIlEEvPT_iS2_PKc: # @_Z14test_variable1Il19custom_variable_andIlEEvPT_iS2_PKc @@ -36200,12 +35945,14 @@ _Z14test_variable1Il19custom_variable_andIlEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB126_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI126_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI126_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -36238,12 +35985,8 @@ _Z14test_variable1Il19custom_variable_andIlEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Il19custom_variable_andIlEEvPT_iS2_PKc, .Lfunc_end126-_Z14test_variable1Il19custom_variable_andIlEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Il28custom_multiple_variable_andIlEEvPT_iS2_S2_S2_S2_PKc -.LCPI127_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Il28custom_multiple_variable_andIlEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Il28custom_multiple_variable_andIlEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Il28custom_multiple_variable_andIlEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Il28custom_multiple_variable_andIlEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Il28custom_multiple_variable_andIlEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Il28custom_multiple_variable_andIlEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Il28custom_multiple_variable_andIlEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Il28custom_multiple_variable_andIlEEvPT_iS2_S2_S2_S2_PKc @@ -36449,12 +36192,14 @@ _Z14test_variable4Il28custom_multiple_variable_andIlEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s6, %pc_lo12(current_test) .LBB127_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI127_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI127_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -36487,12 +36232,8 @@ _Z14test_variable4Il28custom_multiple_variable_andIlEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Il28custom_multiple_variable_andIlEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end127-_Z14test_variable4Il28custom_multiple_variable_andIlEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Il18custom_variable_orIlEEvPT_iS2_PKc -.LCPI128_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Il18custom_variable_orIlEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Il18custom_variable_orIlEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Il18custom_variable_orIlEEvPT_iS2_PKc + .weak _Z14test_variable1Il18custom_variable_orIlEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Il18custom_variable_orIlEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Il18custom_variable_orIlEEvPT_iS2_PKc,@function _Z14test_variable1Il18custom_variable_orIlEEvPT_iS2_PKc: # @_Z14test_variable1Il18custom_variable_orIlEEvPT_iS2_PKc @@ -36682,12 +36423,14 @@ _Z14test_variable1Il18custom_variable_orIlEEvPT_iS2_PKc: # @_Z14test_variable1Il ld.w $a0, $s4, %pc_lo12(current_test) .LBB128_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI128_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI128_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -36720,12 +36463,8 @@ _Z14test_variable1Il18custom_variable_orIlEEvPT_iS2_PKc: # @_Z14test_variable1Il .size _Z14test_variable1Il18custom_variable_orIlEEvPT_iS2_PKc, .Lfunc_end128-_Z14test_variable1Il18custom_variable_orIlEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Il27custom_multiple_variable_orIlEEvPT_iS2_S2_S2_S2_PKc -.LCPI129_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Il27custom_multiple_variable_orIlEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Il27custom_multiple_variable_orIlEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Il27custom_multiple_variable_orIlEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Il27custom_multiple_variable_orIlEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Il27custom_multiple_variable_orIlEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Il27custom_multiple_variable_orIlEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Il27custom_multiple_variable_orIlEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Il27custom_multiple_variable_orIlEEvPT_iS2_S2_S2_S2_PKc @@ -36931,12 +36670,14 @@ _Z14test_variable4Il27custom_multiple_variable_orIlEEvPT_iS2_S2_S2_S2_PKc: # @_Z ld.w $a0, $s6, %pc_lo12(current_test) .LBB129_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI129_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI129_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -36969,12 +36710,8 @@ _Z14test_variable4Il27custom_multiple_variable_orIlEEvPT_iS2_S2_S2_S2_PKc: # @_Z .size _Z14test_variable4Il27custom_multiple_variable_orIlEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end129-_Z14test_variable4Il27custom_multiple_variable_orIlEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Il19custom_variable_xorIlEEvPT_iS2_PKc -.LCPI130_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Il19custom_variable_xorIlEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Il19custom_variable_xorIlEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Il19custom_variable_xorIlEEvPT_iS2_PKc + .weak _Z14test_variable1Il19custom_variable_xorIlEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Il19custom_variable_xorIlEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Il19custom_variable_xorIlEEvPT_iS2_PKc,@function _Z14test_variable1Il19custom_variable_xorIlEEvPT_iS2_PKc: # @_Z14test_variable1Il19custom_variable_xorIlEEvPT_iS2_PKc @@ -37163,12 +36900,14 @@ _Z14test_variable1Il19custom_variable_xorIlEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB130_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI130_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI130_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -37201,12 +36940,8 @@ _Z14test_variable1Il19custom_variable_xorIlEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Il19custom_variable_xorIlEEvPT_iS2_PKc, .Lfunc_end130-_Z14test_variable1Il19custom_variable_xorIlEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Il28custom_multiple_variable_xorIlEEvPT_iS2_S2_S2_S2_PKc -.LCPI131_0: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Il28custom_multiple_variable_xorIlEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Il28custom_multiple_variable_xorIlEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Il28custom_multiple_variable_xorIlEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Il28custom_multiple_variable_xorIlEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Il28custom_multiple_variable_xorIlEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Il28custom_multiple_variable_xorIlEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Il28custom_multiple_variable_xorIlEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Il28custom_multiple_variable_xorIlEEvPT_iS2_S2_S2_S2_PKc @@ -37414,12 +37149,14 @@ _Z14test_variable4Il28custom_multiple_variable_xorIlEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s3, %pc_lo12(current_test) .LBB131_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI131_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI131_0) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -37452,14 +37189,8 @@ _Z14test_variable4Il28custom_multiple_variable_xorIlEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Il28custom_multiple_variable_xorIlEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end131-_Z14test_variable4Il28custom_multiple_variable_xorIlEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Im19custom_add_variableImEEvPT_iS2_PKc -.LCPI132_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI132_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Im19custom_add_variableImEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Im19custom_add_variableImEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Im19custom_add_variableImEEvPT_iS2_PKc + .weak _Z14test_variable1Im19custom_add_variableImEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Im19custom_add_variableImEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Im19custom_add_variableImEEvPT_iS2_PKc,@function _Z14test_variable1Im19custom_add_variableImEEvPT_iS2_PKc: # @_Z14test_variable1Im19custom_add_variableImEEvPT_iS2_PKc @@ -37513,9 +37244,9 @@ _Z14test_variable1Im19custom_add_variableImEEvPT_iS2_PKc: # @_Z14test_variable1I addi.d $a0, $s2, 16 st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI132_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI132_0) pcalau12i $s5, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $fp, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -37608,8 +37339,8 @@ _Z14test_variable1Im19custom_add_variableImEEvPT_iS2_PKc: # @_Z14test_variable1I .LBB132_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI132_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI132_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s2, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -37674,12 +37405,14 @@ _Z14test_variable1Im19custom_add_variableImEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB132_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI132_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI132_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -37713,14 +37446,8 @@ _Z14test_variable1Im19custom_add_variableImEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Im19custom_add_variableImEEvPT_iS2_PKc, .Lfunc_end132-_Z14test_variable1Im19custom_add_variableImEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z22test_hoisted_variable1Im19custom_add_variableImEEvPT_iS2_PKc -.LCPI133_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI133_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z22test_hoisted_variable1Im19custom_add_variableImEEvPT_iS2_PKc,"axG",@progbits,_Z22test_hoisted_variable1Im19custom_add_variableImEEvPT_iS2_PKc,comdat - .weak _Z22test_hoisted_variable1Im19custom_add_variableImEEvPT_iS2_PKc + .weak _Z22test_hoisted_variable1Im19custom_add_variableImEEvPT_iS2_PKc # -- Begin function _Z22test_hoisted_variable1Im19custom_add_variableImEEvPT_iS2_PKc .p2align 5 .type _Z22test_hoisted_variable1Im19custom_add_variableImEEvPT_iS2_PKc,@function _Z22test_hoisted_variable1Im19custom_add_variableImEEvPT_iS2_PKc: # @_Z22test_hoisted_variable1Im19custom_add_variableImEEvPT_iS2_PKc @@ -37775,9 +37502,9 @@ _Z22test_hoisted_variable1Im19custom_add_variableImEEvPT_iS2_PKc: # @_Z22test_ho addi.d $a0, $s2, 16 st.d $a0, $sp, 48 # 8-byte Folded Spill ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI133_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI133_0) pcalau12i $s5, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $fp, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -37867,8 +37594,8 @@ _Z22test_hoisted_variable1Im19custom_add_variableImEEvPT_iS2_PKc: # @_Z22test_ho .LBB133_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI133_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI133_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s2, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -37935,12 +37662,14 @@ _Z22test_hoisted_variable1Im19custom_add_variableImEEvPT_iS2_PKc: # @_Z22test_ho ld.w $a0, $s3, %pc_lo12(current_test) .LBB133_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI133_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI133_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -37974,14 +37703,8 @@ _Z22test_hoisted_variable1Im19custom_add_variableImEEvPT_iS2_PKc: # @_Z22test_ho .size _Z22test_hoisted_variable1Im19custom_add_variableImEEvPT_iS2_PKc, .Lfunc_end133-_Z22test_hoisted_variable1Im19custom_add_variableImEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Im28custom_add_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc -.LCPI134_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI134_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Im28custom_add_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Im28custom_add_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Im28custom_add_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Im28custom_add_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Im28custom_add_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Im28custom_add_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Im28custom_add_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Im28custom_add_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc @@ -38041,9 +37764,9 @@ _Z14test_variable4Im28custom_add_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # @_ addi.d $a0, $s1, 16 st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI134_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI134_0) pcalau12i $s7, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $fp, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -38136,8 +37859,8 @@ _Z14test_variable4Im28custom_add_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # @_ .LBB134_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI134_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI134_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s1, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -38202,12 +37925,14 @@ _Z14test_variable4Im28custom_add_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s6, %pc_lo12(current_test) .LBB134_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI134_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI134_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -38241,14 +37966,8 @@ _Z14test_variable4Im28custom_add_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Im28custom_add_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end134-_Z14test_variable4Im28custom_add_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Im19custom_sub_variableImEEvPT_iS2_PKc -.LCPI135_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI135_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Im19custom_sub_variableImEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Im19custom_sub_variableImEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Im19custom_sub_variableImEEvPT_iS2_PKc + .weak _Z14test_variable1Im19custom_sub_variableImEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Im19custom_sub_variableImEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Im19custom_sub_variableImEEvPT_iS2_PKc,@function _Z14test_variable1Im19custom_sub_variableImEEvPT_iS2_PKc: # @_Z14test_variable1Im19custom_sub_variableImEEvPT_iS2_PKc @@ -38302,9 +38021,9 @@ _Z14test_variable1Im19custom_sub_variableImEEvPT_iS2_PKc: # @_Z14test_variable1I addi.d $a0, $s2, 16 st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI135_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI135_0) pcalau12i $s5, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $fp, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -38397,8 +38116,8 @@ _Z14test_variable1Im19custom_sub_variableImEEvPT_iS2_PKc: # @_Z14test_variable1I .LBB135_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI135_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI135_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s2, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -38463,12 +38182,14 @@ _Z14test_variable1Im19custom_sub_variableImEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB135_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI135_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI135_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -38502,14 +38223,8 @@ _Z14test_variable1Im19custom_sub_variableImEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Im19custom_sub_variableImEEvPT_iS2_PKc, .Lfunc_end135-_Z14test_variable1Im19custom_sub_variableImEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Im28custom_sub_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc -.LCPI136_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI136_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Im28custom_sub_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Im28custom_sub_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Im28custom_sub_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Im28custom_sub_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Im28custom_sub_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Im28custom_sub_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Im28custom_sub_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Im28custom_sub_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc @@ -38569,9 +38284,9 @@ _Z14test_variable4Im28custom_sub_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # @_ addi.d $a0, $s1, 16 st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI136_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI136_0) pcalau12i $s7, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $fp, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -38664,8 +38379,8 @@ _Z14test_variable4Im28custom_sub_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # @_ .LBB136_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI136_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI136_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s1, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -38730,12 +38445,14 @@ _Z14test_variable4Im28custom_sub_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s6, %pc_lo12(current_test) .LBB136_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI136_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI136_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -38769,14 +38486,8 @@ _Z14test_variable4Im28custom_sub_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Im28custom_sub_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end136-_Z14test_variable4Im28custom_sub_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc -.LCPI137_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI137_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc + .weak _Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc,@function _Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc: # @_Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc @@ -38832,15 +38543,15 @@ _Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc: # @_Z14test_varia vreplgr2vr.d $vr4, $s0 addi.d $a0, $s2, 16 st.d $a0, $sp, 56 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI137_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI137_0) ori $a5, $zero, 4 - pcalau12i $fp, %pc_hi20(init_value) + pcalau12i $s5, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu52i.d $s3, $zero, -2048 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $a0, $a0, %pc_lo12(.L.str.179) st.d $a0, $sp, 48 # 8-byte Folded Spill - move $s5, $zero + move $fp, $zero vrepli.b $vr0, 0 vst $vr0, $sp, 32 # 16-byte Folded Spill vst $vr4, $sp, 64 # 16-byte Folded Spill @@ -38848,8 +38559,8 @@ _Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc: # @_Z14test_varia .p2align 4, , 16 .LBB137_3: # %_Z26check_shifted_variable_sumIm24custom_multiply_variableImEEvT_S2_.exit.us # in Loop: Header=BB137_4 Depth=1 - addi.w $s5, $s5, 1 - bge $s5, $a1, .LBB137_17 + addi.w $fp, $fp, 1 + bge $fp, $a1, .LBB137_17 .LBB137_4: # %.preheader.us # =>This Loop Header: Depth=1 # Child Loop BB137_7 Depth 2 @@ -38900,7 +38611,7 @@ _Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc: # @_Z14test_varia bnez $a3, .LBB137_10 .LBB137_11: # %._crit_edge.us # in Loop: Header=BB137_4 Depth=1 - fld.d $fa0, $fp, %pc_lo12(init_value) + fld.d $fa0, $s5, %pc_lo12(init_value) fcmp.clt.d $fcc0, $fa0, $fs0 fsub.d $fa1, $fa0, $fs0 ftintrz.l.d $fa1, $fa1 @@ -38926,8 +38637,8 @@ _Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc: # @_Z14test_varia .LBB137_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI137_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI137_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu52i.d $s1, $zero, -2048 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) @@ -38991,12 +38702,14 @@ _Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc: # @_Z14test_varia ld.w $a0, $s4, %pc_lo12(current_test) .LBB137_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI137_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI137_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -39030,14 +38743,8 @@ _Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc: # @_Z14test_varia .size _Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc, .Lfunc_end137-_Z14test_variable1Im24custom_multiply_variableImEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc -.LCPI138_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI138_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc @@ -39101,15 +38808,15 @@ _Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: vreplgr2vr.d $vr4, $s3 addi.d $a0, $s1, 16 st.d $a0, $sp, 56 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI138_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI138_0) ori $a5, $zero, 4 - pcalau12i $fp, %pc_hi20(init_value) + pcalau12i $s7, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu52i.d $s2, $zero, -2048 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $a0, $a0, %pc_lo12(.L.str.179) st.d $a0, $sp, 48 # 8-byte Folded Spill - move $s7, $zero + move $fp, $zero vrepli.b $vr0, 0 vst $vr0, $sp, 32 # 16-byte Folded Spill vst $vr4, $sp, 64 # 16-byte Folded Spill @@ -39117,8 +38824,8 @@ _Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: .p2align 4, , 16 .LBB138_3: # %_Z26check_shifted_variable_sumIm33custom_multiply_multiple_variableImEEvT_S2_S2_S2_S2_.exit.us # in Loop: Header=BB138_4 Depth=1 - addi.w $s7, $s7, 1 - bge $s7, $a1, .LBB138_17 + addi.w $fp, $fp, 1 + bge $fp, $a1, .LBB138_17 .LBB138_4: # %.preheader.us # =>This Loop Header: Depth=1 # Child Loop BB138_7 Depth 2 @@ -39169,7 +38876,7 @@ _Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: bnez $a3, .LBB138_10 .LBB138_11: # %._crit_edge.us # in Loop: Header=BB138_4 Depth=1 - fld.d $fa0, $fp, %pc_lo12(init_value) + fld.d $fa0, $s7, %pc_lo12(init_value) fcmp.clt.d $fcc0, $fa0, $fs0 fsub.d $fa1, $fa0, $fs0 ftintrz.l.d $fa1, $fa1 @@ -39195,8 +38902,8 @@ _Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: .LBB138_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI138_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI138_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu52i.d $s1, $zero, -2048 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) @@ -39260,12 +38967,14 @@ _Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: ld.w $a0, $s6, %pc_lo12(current_test) .LBB138_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI138_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI138_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -39299,14 +39008,8 @@ _Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end138-_Z14test_variable4Im33custom_multiply_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Im34custom_multiply_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc -.LCPI139_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI139_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Im34custom_multiply_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Im34custom_multiply_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Im34custom_multiply_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Im34custom_multiply_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Im34custom_multiply_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Im34custom_multiply_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Im34custom_multiply_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Im34custom_multiply_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc @@ -39368,9 +39071,9 @@ _Z14test_variable4Im34custom_multiply_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc addi.d $a0, $s1, 16 st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI139_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI139_0) pcalau12i $s7, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $fp, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -39464,8 +39167,8 @@ _Z14test_variable4Im34custom_multiply_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc .LBB139_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI139_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI139_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s1, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -39530,12 +39233,14 @@ _Z14test_variable4Im34custom_multiply_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc ld.w $a0, $s6, %pc_lo12(current_test) .LBB139_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI139_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI139_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -39569,14 +39274,8 @@ _Z14test_variable4Im34custom_multiply_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc .size _Z14test_variable4Im34custom_multiply_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end139-_Z14test_variable4Im34custom_multiply_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Im22custom_divide_variableImEEvPT_iS2_PKc -.LCPI140_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI140_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Im22custom_divide_variableImEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Im22custom_divide_variableImEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Im22custom_divide_variableImEEvPT_iS2_PKc + .weak _Z14test_variable1Im22custom_divide_variableImEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Im22custom_divide_variableImEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Im22custom_divide_variableImEEvPT_iS2_PKc,@function _Z14test_variable1Im22custom_divide_variableImEEvPT_iS2_PKc: # @_Z14test_variable1Im22custom_divide_variableImEEvPT_iS2_PKc @@ -39631,8 +39330,8 @@ _Z14test_variable1Im22custom_divide_variableImEEvPT_iS2_PKc: # @_Z14test_variabl st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 pcalau12i $s5, %pc_hi20(init_value) - pcalau12i $a0, %pc_hi20(.LCPI140_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI140_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu52i.d $s3, $zero, -2048 lu12i.w $a0, 1 ori $s8, $a0, 3904 @@ -39728,8 +39427,8 @@ _Z14test_variable1Im22custom_divide_variableImEEvPT_iS2_PKc: # @_Z14test_variabl .LBB140_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI140_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI140_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu52i.d $s2, $zero, -2048 lu12i.w $a0, 1 ori $s3, $a0, 3904 @@ -39796,12 +39495,14 @@ _Z14test_variable1Im22custom_divide_variableImEEvPT_iS2_PKc: # @_Z14test_variabl ld.w $a0, $s4, %pc_lo12(current_test) .LBB140_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI140_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI140_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -39835,14 +39536,8 @@ _Z14test_variable1Im22custom_divide_variableImEEvPT_iS2_PKc: # @_Z14test_variabl .size _Z14test_variable1Im22custom_divide_variableImEEvPT_iS2_PKc, .Lfunc_end140-_Z14test_variable1Im22custom_divide_variableImEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Im31custom_divide_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc -.LCPI141_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI141_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Im31custom_divide_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Im31custom_divide_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Im31custom_divide_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Im31custom_divide_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Im31custom_divide_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Im31custom_divide_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Im31custom_divide_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Im31custom_divide_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc @@ -39906,8 +39601,8 @@ _Z14test_variable4Im31custom_divide_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 pcalau12i $s6, %pc_hi20(init_value) - pcalau12i $a0, %pc_hi20(.LCPI141_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI141_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu52i.d $s5, $zero, -2048 lu12i.w $a0, 1 ori $s7, $a0, 3904 @@ -40024,8 +39719,8 @@ _Z14test_variable4Im31custom_divide_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # .LBB141_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI141_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI141_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu52i.d $s5, $zero, -2048 lu12i.w $a0, 1 ori $s6, $a0, 3904 @@ -40098,12 +39793,14 @@ _Z14test_variable4Im31custom_divide_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # ld.w $a0, $s3, %pc_lo12(current_test) .LBB141_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI141_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI141_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -40137,14 +39834,8 @@ _Z14test_variable4Im31custom_divide_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4Im31custom_divide_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end141-_Z14test_variable4Im31custom_divide_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Im32custom_divide_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc -.LCPI142_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI142_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Im32custom_divide_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Im32custom_divide_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Im32custom_divide_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Im32custom_divide_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Im32custom_divide_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Im32custom_divide_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Im32custom_divide_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Im32custom_divide_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc @@ -40204,9 +39895,9 @@ _Z14test_variable4Im32custom_divide_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc: addi.d $a0, $s1, 16 st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI142_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI142_0) pcalau12i $s7, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $fp, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -40299,8 +39990,8 @@ _Z14test_variable4Im32custom_divide_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc: .LBB142_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI142_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI142_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s1, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -40365,12 +40056,14 @@ _Z14test_variable4Im32custom_divide_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc: ld.w $a0, $s6, %pc_lo12(current_test) .LBB142_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI142_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI142_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -40404,14 +40097,8 @@ _Z14test_variable4Im32custom_divide_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Im32custom_divide_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end142-_Z14test_variable4Im32custom_divide_multiple_variable2ImEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Im30custom_mixed_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc -.LCPI143_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI143_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Im30custom_mixed_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Im30custom_mixed_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Im30custom_mixed_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Im30custom_mixed_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Im30custom_mixed_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Im30custom_mixed_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Im30custom_mixed_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Im30custom_mixed_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc @@ -40473,9 +40160,9 @@ _Z14test_variable4Im30custom_mixed_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # addi.d $a0, $s2, 16 st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI143_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI143_0) pcalau12i $s7, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s3, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -40575,8 +40262,8 @@ _Z14test_variable4Im30custom_mixed_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # .LBB143_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI143_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI143_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s1, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -40643,12 +40330,14 @@ _Z14test_variable4Im30custom_mixed_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # ld.w $a0, $s3, %pc_lo12(current_test) .LBB143_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI143_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI143_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -40682,14 +40371,8 @@ _Z14test_variable4Im30custom_mixed_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4Im30custom_mixed_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end143-_Z14test_variable4Im30custom_mixed_multiple_variableImEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Im19custom_variable_andImEEvPT_iS2_PKc -.LCPI144_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI144_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Im19custom_variable_andImEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Im19custom_variable_andImEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Im19custom_variable_andImEEvPT_iS2_PKc + .weak _Z14test_variable1Im19custom_variable_andImEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Im19custom_variable_andImEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Im19custom_variable_andImEEvPT_iS2_PKc,@function _Z14test_variable1Im19custom_variable_andImEEvPT_iS2_PKc: # @_Z14test_variable1Im19custom_variable_andImEEvPT_iS2_PKc @@ -40743,9 +40426,9 @@ _Z14test_variable1Im19custom_variable_andImEEvPT_iS2_PKc: # @_Z14test_variable1I addi.d $a0, $s2, 16 st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI144_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI144_0) pcalau12i $s5, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $fp, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -40838,8 +40521,8 @@ _Z14test_variable1Im19custom_variable_andImEEvPT_iS2_PKc: # @_Z14test_variable1I .LBB144_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI144_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI144_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s2, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -40904,12 +40587,14 @@ _Z14test_variable1Im19custom_variable_andImEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB144_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI144_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI144_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -40943,14 +40628,8 @@ _Z14test_variable1Im19custom_variable_andImEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Im19custom_variable_andImEEvPT_iS2_PKc, .Lfunc_end144-_Z14test_variable1Im19custom_variable_andImEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Im28custom_multiple_variable_andImEEvPT_iS2_S2_S2_S2_PKc -.LCPI145_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI145_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Im28custom_multiple_variable_andImEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Im28custom_multiple_variable_andImEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Im28custom_multiple_variable_andImEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Im28custom_multiple_variable_andImEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Im28custom_multiple_variable_andImEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Im28custom_multiple_variable_andImEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Im28custom_multiple_variable_andImEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Im28custom_multiple_variable_andImEEvPT_iS2_S2_S2_S2_PKc @@ -41020,9 +40699,9 @@ _Z14test_variable4Im28custom_multiple_variable_andImEEvPT_iS2_S2_S2_S2_PKc: # @_ addi.d $a0, $s1, 16 st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI145_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI145_0) pcalau12i $s5, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -41116,8 +40795,8 @@ _Z14test_variable4Im28custom_multiple_variable_andImEEvPT_iS2_S2_S2_S2_PKc: # @_ .LBB145_13: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI145_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI145_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s2, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -41184,12 +40863,14 @@ _Z14test_variable4Im28custom_multiple_variable_andImEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s3, %pc_lo12(current_test) .LBB145_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI145_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI145_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -41223,14 +40904,8 @@ _Z14test_variable4Im28custom_multiple_variable_andImEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Im28custom_multiple_variable_andImEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end145-_Z14test_variable4Im28custom_multiple_variable_andImEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Im18custom_variable_orImEEvPT_iS2_PKc -.LCPI146_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI146_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Im18custom_variable_orImEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Im18custom_variable_orImEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Im18custom_variable_orImEEvPT_iS2_PKc + .weak _Z14test_variable1Im18custom_variable_orImEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Im18custom_variable_orImEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Im18custom_variable_orImEEvPT_iS2_PKc,@function _Z14test_variable1Im18custom_variable_orImEEvPT_iS2_PKc: # @_Z14test_variable1Im18custom_variable_orImEEvPT_iS2_PKc @@ -41284,9 +40959,9 @@ _Z14test_variable1Im18custom_variable_orImEEvPT_iS2_PKc: # @_Z14test_variable1Im addi.d $a0, $s2, 16 st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI146_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI146_0) pcalau12i $s5, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $fp, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -41379,8 +41054,8 @@ _Z14test_variable1Im18custom_variable_orImEEvPT_iS2_PKc: # @_Z14test_variable1Im .LBB146_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI146_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI146_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s2, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -41445,12 +41120,14 @@ _Z14test_variable1Im18custom_variable_orImEEvPT_iS2_PKc: # @_Z14test_variable1Im ld.w $a0, $s4, %pc_lo12(current_test) .LBB146_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI146_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI146_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -41484,14 +41161,8 @@ _Z14test_variable1Im18custom_variable_orImEEvPT_iS2_PKc: # @_Z14test_variable1Im .size _Z14test_variable1Im18custom_variable_orImEEvPT_iS2_PKc, .Lfunc_end146-_Z14test_variable1Im18custom_variable_orImEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Im27custom_multiple_variable_orImEEvPT_iS2_S2_S2_S2_PKc -.LCPI147_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI147_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Im27custom_multiple_variable_orImEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Im27custom_multiple_variable_orImEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Im27custom_multiple_variable_orImEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Im27custom_multiple_variable_orImEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Im27custom_multiple_variable_orImEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Im27custom_multiple_variable_orImEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Im27custom_multiple_variable_orImEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Im27custom_multiple_variable_orImEEvPT_iS2_S2_S2_S2_PKc @@ -41561,9 +41232,9 @@ _Z14test_variable4Im27custom_multiple_variable_orImEEvPT_iS2_S2_S2_S2_PKc: # @_Z addi.d $a0, $s1, 16 st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI147_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI147_0) pcalau12i $s5, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -41657,8 +41328,8 @@ _Z14test_variable4Im27custom_multiple_variable_orImEEvPT_iS2_S2_S2_S2_PKc: # @_Z .LBB147_13: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI147_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI147_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s2, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -41725,12 +41396,14 @@ _Z14test_variable4Im27custom_multiple_variable_orImEEvPT_iS2_S2_S2_S2_PKc: # @_Z ld.w $a0, $s3, %pc_lo12(current_test) .LBB147_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI147_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI147_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -41764,14 +41437,8 @@ _Z14test_variable4Im27custom_multiple_variable_orImEEvPT_iS2_S2_S2_S2_PKc: # @_Z .size _Z14test_variable4Im27custom_multiple_variable_orImEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end147-_Z14test_variable4Im27custom_multiple_variable_orImEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Im19custom_variable_xorImEEvPT_iS2_PKc -.LCPI148_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI148_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Im19custom_variable_xorImEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Im19custom_variable_xorImEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Im19custom_variable_xorImEEvPT_iS2_PKc + .weak _Z14test_variable1Im19custom_variable_xorImEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Im19custom_variable_xorImEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Im19custom_variable_xorImEEvPT_iS2_PKc,@function _Z14test_variable1Im19custom_variable_xorImEEvPT_iS2_PKc: # @_Z14test_variable1Im19custom_variable_xorImEEvPT_iS2_PKc @@ -41825,9 +41492,9 @@ _Z14test_variable1Im19custom_variable_xorImEEvPT_iS2_PKc: # @_Z14test_variable1I addi.d $a0, $s2, 16 st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI148_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI148_0) pcalau12i $s5, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $fp, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -41920,8 +41587,8 @@ _Z14test_variable1Im19custom_variable_xorImEEvPT_iS2_PKc: # @_Z14test_variable1I .LBB148_13: # %.preheader.preheader pcalau12i $fp, %pc_hi20(init_value) fld.d $fa0, $fp, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI148_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI148_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s2, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -41986,12 +41653,14 @@ _Z14test_variable1Im19custom_variable_xorImEEvPT_iS2_PKc: # @_Z14test_variable1I ld.w $a0, $s4, %pc_lo12(current_test) .LBB148_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI148_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI148_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -42025,14 +41694,8 @@ _Z14test_variable1Im19custom_variable_xorImEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Im19custom_variable_xorImEEvPT_iS2_PKc, .Lfunc_end148-_Z14test_variable1Im19custom_variable_xorImEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Im28custom_multiple_variable_xorImEEvPT_iS2_S2_S2_S2_PKc -.LCPI149_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 -.LCPI149_1: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Im28custom_multiple_variable_xorImEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Im28custom_multiple_variable_xorImEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Im28custom_multiple_variable_xorImEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Im28custom_multiple_variable_xorImEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Im28custom_multiple_variable_xorImEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Im28custom_multiple_variable_xorImEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Im28custom_multiple_variable_xorImEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Im28custom_multiple_variable_xorImEEvPT_iS2_S2_S2_S2_PKc @@ -42102,9 +41765,9 @@ _Z14test_variable4Im28custom_multiple_variable_xorImEEvPT_iS2_S2_S2_S2_PKc: # @_ addi.d $a0, $s1, 16 st.d $a0, $sp, 56 # 8-byte Folded Spill ori $a5, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI149_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI149_0) pcalau12i $s5, %pc_hi20(init_value) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s4, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -42198,8 +41861,8 @@ _Z14test_variable4Im28custom_multiple_variable_xorImEEvPT_iS2_S2_S2_S2_PKc: # @_ .LBB149_13: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI149_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI149_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 1 ori $s2, $a0, 3904 pcalau12i $a0, %pc_hi20(.L.str.179) @@ -42266,12 +41929,14 @@ _Z14test_variable4Im28custom_multiple_variable_xorImEEvPT_iS2_S2_S2_S2_PKc: # @_ ld.w $a0, $s3, %pc_lo12(current_test) .LBB149_21: # %_Z13record_resultdPKc.exit ld.d $a4, $sp, 16 # 8-byte Folded Reload - pcalau12i $a1, %pc_hi20(.LCPI149_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI149_1) sub.d $a1, $s0, $fp + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -42305,20 +41970,8 @@ _Z14test_variable4Im28custom_multiple_variable_xorImEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Im28custom_multiple_variable_xorImEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end149-_Z14test_variable4Im28custom_multiple_variable_xorImEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc -.LCPI150_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI150_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI150_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI150_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc + .weak _Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc # -- Begin function _Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc,@function _Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc @@ -42371,14 +42024,21 @@ _Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1I # %bb.1: # %.preheader.lr.ph blez $s0, .LBB150_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI150_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI150_0) - pcalau12i $a0, %pc_hi20(.LCPI150_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI150_2) - pcalau12i $a0, %pc_hi20(.LCPI150_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI150_1) - movgr2fr.w $fs4, $zero + movgr2fr.w $fs1, $zero pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -42393,7 +42053,7 @@ _Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1I # Child Loop BB150_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs4 + fmov.s $fa0, $fs1 .p2align 4, , 16 .LBB150_5: # Parent Loop BB150_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -42408,16 +42068,16 @@ _Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1I fld.d $fa1, $s6, %pc_lo12(init_value) fcvt.s.d $fa1, $fa1 fadd.s $fa1, $fs0, $fa1 - fmul.s $fa1, $fa1, $fs1 + fmul.s $fa1, $fa1, $fs2 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs2, $fa2 + fcmp.clt.d $fcc0, $fs3, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs3 + fcmp.clt.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB150_3 # %bb.7: # in Loop: Header=BB150_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -42429,11 +42089,15 @@ _Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1I .LBB150_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI150_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI150_0) - pcalau12i $a0, %pc_hi20(.LCPI150_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI150_1) - movgr2fr.w $fs3, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + movgr2fr.w $fs2, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -42448,10 +42112,10 @@ _Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1I fcvt.s.d $fa1, $fa0 fadd.s $fa1, $fs0, $fa1 fmul.s $fa1, $fa1, $fs1 - fadd.s $fa1, $fa1, $fs3 + fadd.s $fa1, $fa1, $fs2 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB150_9 # %bb.11: # in Loop: Header=BB150_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -42489,12 +42153,14 @@ _Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB150_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI150_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI150_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -42531,20 +42197,8 @@ _Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc, .Lfunc_end150-_Z14test_variable1If19custom_add_variableIfEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc -.LCPI151_0: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI151_2: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI151_3: - .dword 0x412e848000000000 # double 1.0E+6 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI151_1: - .word 0xc5fa0000 # float -8000 .section .text._Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc,"axG",@progbits,_Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc,comdat - .weak _Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc + .weak _Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc # -- Begin function _Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc .p2align 5 .type _Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc,@function _Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc: # @_Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc @@ -42601,14 +42255,21 @@ _Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc: # @_Z22test_ho ffint.s.w $fs1, $fa0 blez $s0, .LBB151_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI151_1) - fld.s $fs2, $a0, %pc_lo12(.LCPI151_1) - pcalau12i $a0, %pc_hi20(.LCPI151_0) - fld.d $fs3, $a0, %pc_lo12(.LCPI151_0) - pcalau12i $a0, %pc_hi20(.LCPI151_2) - fld.d $fs4, $a0, %pc_lo12(.LCPI151_2) - movgr2fr.w $fs5, $zero + movgr2fr.w $fs2, $zero pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs3, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -42623,7 +42284,7 @@ _Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc: # @_Z22test_ho # Child Loop BB151_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs5 + fmov.s $fa0, $fs2 .p2align 4, , 16 .LBB151_5: # Parent Loop BB151_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -42638,16 +42299,16 @@ _Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc: # @_Z22test_ho fmadd.s $fa0, $fs1, $fs0, $fa0 fcvt.s.d $fa1, $fa1 fadd.s $fa1, $fs0, $fa1 - fmul.s $fa1, $fa1, $fs2 + fmul.s $fa1, $fa1, $fs3 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB151_3 # %bb.7: # in Loop: Header=BB151_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -42659,16 +42320,23 @@ _Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc: # @_Z22test_ho .LBB151_8: # %.preheader.lr.ph.split movgr2fr.w $fa0, $zero fmadd.s $fs1, $fs1, $fs0, $fa0 - fabs.s $fa1, $fs1 - pcalau12i $a0, %pc_hi20(.LCPI151_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI151_0) + fabs.s $fa0, $fs1 + fcvt.d.s $fa1, $fa0 pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - fcvt.d.s $fa1, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI151_1) - fld.s $fs2, $a0, %pc_lo12(.LCPI151_1) - pcalau12i $a0, %pc_hi20(.LCPI151_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI151_2) + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -42750,12 +42418,14 @@ _Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc: # @_Z22test_ho move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB151_19: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI151_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI151_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -42793,20 +42463,8 @@ _Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc: # @_Z22test_ho .size _Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc, .Lfunc_end151-_Z22test_hoisted_variable1If19custom_add_variableIfEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc -.LCPI152_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI152_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI152_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI152_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc @@ -42868,14 +42526,21 @@ _Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_ # %bb.1: # %.preheader.lr.ph blez $s0, .LBB152_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI152_0) - fld.s $fs4, $a0, %pc_lo12(.LCPI152_0) - pcalau12i $a0, %pc_hi20(.LCPI152_2) - fld.d $fs5, $a0, %pc_lo12(.LCPI152_2) - pcalau12i $a0, %pc_hi20(.LCPI152_1) - fld.d $fs6, $a0, %pc_lo12(.LCPI152_1) - movgr2fr.w $fs7, $zero + movgr2fr.w $fs4, $zero pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs5, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs6, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs7, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -42890,7 +42555,7 @@ _Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_ # Child Loop BB152_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs7 + fmov.s $fa0, $fs4 .p2align 4, , 16 .LBB152_5: # Parent Loop BB152_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -42911,16 +42576,16 @@ _Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_ fadd.s $fa1, $fs2, $fa1 fadd.s $fa1, $fs1, $fa1 fadd.s $fa1, $fs0, $fa1 - fmul.s $fa1, $fa1, $fs4 + fmul.s $fa1, $fa1, $fs5 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs5, $fa2 + fcmp.clt.d $fcc0, $fs6, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs6 + fcmp.clt.d $fcc0, $fa0, $fs7 bcnez $fcc0, .LBB152_3 # %bb.7: # in Loop: Header=BB152_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -42932,11 +42597,15 @@ _Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_ .LBB152_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI152_0) - fld.s $fs4, $a0, %pc_lo12(.LCPI152_0) - pcalau12i $a0, %pc_hi20(.LCPI152_1) - fld.d $fs5, $a0, %pc_lo12(.LCPI152_1) - movgr2fr.w $fs6, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs4, $a0 + movgr2fr.w $fs5, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs6, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -42954,10 +42623,10 @@ _Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_ fadd.s $fa1, $fs1, $fa1 fadd.s $fa1, $fs0, $fa1 fmul.s $fa1, $fa1, $fs4 - fadd.s $fa1, $fa1, $fs6 + fadd.s $fa1, $fa1, $fs5 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs5 + fcmp.clt.d $fcc0, $fa1, $fs6 bcnez $fcc0, .LBB152_9 # %bb.11: # in Loop: Header=BB152_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -42995,12 +42664,14 @@ _Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB152_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI152_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI152_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -43040,20 +42711,8 @@ _Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end152-_Z14test_variable4If28custom_add_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc -.LCPI153_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI153_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI153_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI153_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc + .weak _Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc # -- Begin function _Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc,@function _Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc @@ -43106,14 +42765,21 @@ _Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1I # %bb.1: # %.preheader.lr.ph blez $s0, .LBB153_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI153_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI153_0) - pcalau12i $a0, %pc_hi20(.LCPI153_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI153_2) - pcalau12i $a0, %pc_hi20(.LCPI153_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI153_1) - movgr2fr.w $fs4, $zero + movgr2fr.w $fs1, $zero pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -43128,7 +42794,7 @@ _Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1I # Child Loop BB153_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs4 + fmov.s $fa0, $fs1 .p2align 4, , 16 .LBB153_5: # Parent Loop BB153_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -43143,16 +42809,16 @@ _Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1I fld.d $fa1, $s6, %pc_lo12(init_value) fcvt.s.d $fa1, $fa1 fsub.s $fa1, $fa1, $fs0 - fmul.s $fa1, $fa1, $fs1 + fmul.s $fa1, $fa1, $fs2 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs2, $fa2 + fcmp.clt.d $fcc0, $fs3, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs3 + fcmp.clt.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB153_3 # %bb.7: # in Loop: Header=BB153_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -43164,11 +42830,15 @@ _Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1I .LBB153_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI153_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI153_0) - pcalau12i $a0, %pc_hi20(.LCPI153_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI153_1) - movgr2fr.w $fs3, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + movgr2fr.w $fs2, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -43183,10 +42853,10 @@ _Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1I fcvt.s.d $fa1, $fa0 fsub.s $fa1, $fa1, $fs0 fmul.s $fa1, $fa1, $fs1 - fadd.s $fa1, $fa1, $fs3 + fadd.s $fa1, $fa1, $fs2 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB153_9 # %bb.11: # in Loop: Header=BB153_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -43224,12 +42894,14 @@ _Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB153_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI153_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI153_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -43266,20 +42938,8 @@ _Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc, .Lfunc_end153-_Z14test_variable1If19custom_sub_variableIfEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc -.LCPI154_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI154_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI154_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI154_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc @@ -43341,14 +43001,21 @@ _Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_ # %bb.1: # %.preheader.lr.ph blez $s0, .LBB154_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI154_0) - fld.s $fs4, $a0, %pc_lo12(.LCPI154_0) - pcalau12i $a0, %pc_hi20(.LCPI154_2) - fld.d $fs5, $a0, %pc_lo12(.LCPI154_2) - pcalau12i $a0, %pc_hi20(.LCPI154_1) - fld.d $fs6, $a0, %pc_lo12(.LCPI154_1) - movgr2fr.w $fs7, $zero + movgr2fr.w $fs4, $zero pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs5, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs6, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs7, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -43363,7 +43030,7 @@ _Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_ # Child Loop BB154_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs7 + fmov.s $fa0, $fs4 .p2align 4, , 16 .LBB154_5: # Parent Loop BB154_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -43384,16 +43051,16 @@ _Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_ fsub.s $fa1, $fa1, $fs2 fsub.s $fa1, $fa1, $fs1 fsub.s $fa1, $fa1, $fs0 - fmul.s $fa1, $fa1, $fs4 + fmul.s $fa1, $fa1, $fs5 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs5, $fa2 + fcmp.clt.d $fcc0, $fs6, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs6 + fcmp.clt.d $fcc0, $fa0, $fs7 bcnez $fcc0, .LBB154_3 # %bb.7: # in Loop: Header=BB154_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -43405,11 +43072,15 @@ _Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_ .LBB154_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI154_0) - fld.s $fs4, $a0, %pc_lo12(.LCPI154_0) - pcalau12i $a0, %pc_hi20(.LCPI154_1) - fld.d $fs5, $a0, %pc_lo12(.LCPI154_1) - movgr2fr.w $fs6, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs4, $a0 + movgr2fr.w $fs5, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs6, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -43427,10 +43098,10 @@ _Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_ fsub.s $fa1, $fa1, $fs1 fsub.s $fa1, $fa1, $fs0 fmul.s $fa1, $fa1, $fs4 - fadd.s $fa1, $fa1, $fs6 + fadd.s $fa1, $fa1, $fs5 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs5 + fcmp.clt.d $fcc0, $fa1, $fs6 bcnez $fcc0, .LBB154_9 # %bb.11: # in Loop: Header=BB154_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -43468,12 +43139,14 @@ _Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB154_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI154_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI154_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -43513,20 +43186,8 @@ _Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end154-_Z14test_variable4If28custom_sub_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc -.LCPI155_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI155_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI155_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI155_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc + .weak _Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc # -- Begin function _Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc,@function _Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc @@ -43579,14 +43240,21 @@ _Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc: # @_Z14test_varia # %bb.1: # %.preheader.lr.ph blez $s0, .LBB155_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI155_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI155_0) - pcalau12i $a0, %pc_hi20(.LCPI155_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI155_2) - pcalau12i $a0, %pc_hi20(.LCPI155_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI155_1) - movgr2fr.w $fs4, $zero + movgr2fr.w $fs1, $zero pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -43601,7 +43269,7 @@ _Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc: # @_Z14test_varia # Child Loop BB155_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs4 + fmov.s $fa0, $fs1 .p2align 4, , 16 .LBB155_5: # Parent Loop BB155_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -43616,16 +43284,16 @@ _Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc: # @_Z14test_varia fld.d $fa1, $s6, %pc_lo12(init_value) fcvt.s.d $fa1, $fa1 fmul.s $fa1, $fs0, $fa1 - fmul.s $fa1, $fa1, $fs1 + fmul.s $fa1, $fa1, $fs2 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs2, $fa2 + fcmp.clt.d $fcc0, $fs3, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs3 + fcmp.clt.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB155_3 # %bb.7: # in Loop: Header=BB155_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -43637,11 +43305,15 @@ _Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc: # @_Z14test_varia .LBB155_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI155_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI155_0) - pcalau12i $a0, %pc_hi20(.LCPI155_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI155_1) - movgr2fr.w $fs3, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + movgr2fr.w $fs2, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -43656,10 +43328,10 @@ _Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc: # @_Z14test_varia fcvt.s.d $fa1, $fa0 fmul.s $fa1, $fs0, $fa1 fmul.s $fa1, $fa1, $fs1 - fadd.s $fa1, $fa1, $fs3 + fadd.s $fa1, $fa1, $fs2 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB155_9 # %bb.11: # in Loop: Header=BB155_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -43697,12 +43369,14 @@ _Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc: # @_Z14test_varia move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB155_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI155_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI155_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -43739,20 +43413,8 @@ _Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc: # @_Z14test_varia .size _Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc, .Lfunc_end155-_Z14test_variable1If24custom_multiply_variableIfEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc -.LCPI156_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI156_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI156_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI156_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc @@ -43814,14 +43476,21 @@ _Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # %bb.1: # %.preheader.lr.ph blez $s0, .LBB156_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI156_0) - fld.s $fs4, $a0, %pc_lo12(.LCPI156_0) - pcalau12i $a0, %pc_hi20(.LCPI156_2) - fld.d $fs5, $a0, %pc_lo12(.LCPI156_2) - pcalau12i $a0, %pc_hi20(.LCPI156_1) - fld.d $fs6, $a0, %pc_lo12(.LCPI156_1) - movgr2fr.w $fs7, $zero + movgr2fr.w $fs4, $zero pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs5, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs6, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs7, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -43836,7 +43505,7 @@ _Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # Child Loop BB156_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs7 + fmov.s $fa0, $fs4 .p2align 4, , 16 .LBB156_5: # Parent Loop BB156_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -43857,16 +43526,16 @@ _Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: fmul.s $fa1, $fs2, $fa1 fmul.s $fa1, $fs1, $fa1 fmul.s $fa1, $fs0, $fa1 - fmul.s $fa1, $fa1, $fs4 + fmul.s $fa1, $fa1, $fs5 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs5, $fa2 + fcmp.clt.d $fcc0, $fs6, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs6 + fcmp.clt.d $fcc0, $fa0, $fs7 bcnez $fcc0, .LBB156_3 # %bb.7: # in Loop: Header=BB156_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -43878,11 +43547,15 @@ _Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: .LBB156_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI156_0) - fld.s $fs4, $a0, %pc_lo12(.LCPI156_0) - pcalau12i $a0, %pc_hi20(.LCPI156_1) - fld.d $fs5, $a0, %pc_lo12(.LCPI156_1) - movgr2fr.w $fs6, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs4, $a0 + movgr2fr.w $fs5, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs6, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -43900,10 +43573,10 @@ _Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: fmul.s $fa1, $fs1, $fa1 fmul.s $fa1, $fs0, $fa1 fmul.s $fa1, $fa1, $fs4 - fadd.s $fa1, $fa1, $fs6 + fadd.s $fa1, $fa1, $fs5 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs5 + fcmp.clt.d $fcc0, $fa1, $fs6 bcnez $fcc0, .LBB156_9 # %bb.11: # in Loop: Header=BB156_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -43941,12 +43614,14 @@ _Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB156_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI156_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI156_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -43986,20 +43661,8 @@ _Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end156-_Z14test_variable4If33custom_multiply_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc -.LCPI157_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI157_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI157_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI157_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc @@ -44059,14 +43722,21 @@ _Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc fmul.s $fs1, $fa0, $fs1 blez $s0, .LBB157_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI157_0) - fld.s $fs2, $a0, %pc_lo12(.LCPI157_0) - pcalau12i $a0, %pc_hi20(.LCPI157_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI157_2) - pcalau12i $a0, %pc_hi20(.LCPI157_1) - fld.d $fs4, $a0, %pc_lo12(.LCPI157_1) - movgr2fr.w $fs5, $zero + movgr2fr.w $fs2, $zero pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs3, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -44081,7 +43751,7 @@ _Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc # Child Loop BB157_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs5 + fmov.s $fa0, $fs2 .p2align 4, , 16 .LBB157_5: # Parent Loop BB157_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -44096,16 +43766,16 @@ _Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc fld.d $fa1, $s6, %pc_lo12(init_value) fcvt.s.d $fa1, $fa1 fmadd.s $fa1, $fs1, $fs0, $fa1 - fmul.s $fa1, $fa1, $fs2 + fmul.s $fa1, $fa1, $fs3 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB157_3 # %bb.7: # in Loop: Header=BB157_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -44117,11 +43787,15 @@ _Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc .LBB157_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI157_0) - fld.s $fs2, $a0, %pc_lo12(.LCPI157_0) - pcalau12i $a0, %pc_hi20(.LCPI157_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI157_1) - movgr2fr.w $fs4, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs2, $a0 + movgr2fr.w $fs3, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -44136,10 +43810,10 @@ _Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc fcvt.s.d $fa1, $fa0 fmadd.s $fa1, $fs1, $fs0, $fa1 fmul.s $fa1, $fa1, $fs2 - fadd.s $fa1, $fa1, $fs4 + fadd.s $fa1, $fa1, $fs3 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB157_9 # %bb.11: # in Loop: Header=BB157_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -44177,12 +43851,14 @@ _Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB157_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI157_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI157_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -44220,20 +43896,8 @@ _Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc .size _Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end157-_Z14test_variable4If34custom_multiply_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc -.LCPI158_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI158_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI158_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI158_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc + .weak _Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc # -- Begin function _Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc,@function _Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc: # @_Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc @@ -44286,14 +43950,21 @@ _Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc: # @_Z14test_variabl # %bb.1: # %.preheader.lr.ph blez $s0, .LBB158_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI158_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI158_0) - pcalau12i $a0, %pc_hi20(.LCPI158_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI158_2) - pcalau12i $a0, %pc_hi20(.LCPI158_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI158_1) - movgr2fr.w $fs4, $zero + movgr2fr.w $fs1, $zero pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -44308,7 +43979,7 @@ _Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc: # @_Z14test_variabl # Child Loop BB158_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs4 + fmov.s $fa0, $fs1 .p2align 4, , 16 .LBB158_5: # Parent Loop BB158_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -44323,16 +43994,16 @@ _Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc: # @_Z14test_variabl fld.d $fa1, $s6, %pc_lo12(init_value) fcvt.s.d $fa1, $fa1 fdiv.s $fa1, $fa1, $fs0 - fmul.s $fa1, $fa1, $fs1 + fmul.s $fa1, $fa1, $fs2 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs2, $fa2 + fcmp.clt.d $fcc0, $fs3, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs3 + fcmp.clt.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB158_3 # %bb.7: # in Loop: Header=BB158_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -44344,11 +44015,15 @@ _Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc: # @_Z14test_variabl .LBB158_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI158_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI158_0) - pcalau12i $a0, %pc_hi20(.LCPI158_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI158_1) - movgr2fr.w $fs3, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + movgr2fr.w $fs2, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -44363,10 +44038,10 @@ _Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc: # @_Z14test_variabl fcvt.s.d $fa1, $fa0 fdiv.s $fa1, $fa1, $fs0 fmul.s $fa1, $fa1, $fs1 - fadd.s $fa1, $fa1, $fs3 + fadd.s $fa1, $fa1, $fs2 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB158_9 # %bb.11: # in Loop: Header=BB158_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -44404,12 +44079,14 @@ _Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc: # @_Z14test_variabl move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB158_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI158_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI158_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -44446,20 +44123,8 @@ _Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc: # @_Z14test_variabl .size _Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc, .Lfunc_end158-_Z14test_variable1If22custom_divide_variableIfEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc -.LCPI159_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI159_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI159_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI159_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc @@ -44521,14 +44186,21 @@ _Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # # %bb.1: # %.preheader.lr.ph blez $s0, .LBB159_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI159_0) - fld.s $fs4, $a0, %pc_lo12(.LCPI159_0) - pcalau12i $a0, %pc_hi20(.LCPI159_2) - fld.d $fs5, $a0, %pc_lo12(.LCPI159_2) - pcalau12i $a0, %pc_hi20(.LCPI159_1) - fld.d $fs6, $a0, %pc_lo12(.LCPI159_1) - movgr2fr.w $fs7, $zero + movgr2fr.w $fs4, $zero pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs5, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs6, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs7, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -44543,7 +44215,7 @@ _Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # # Child Loop BB159_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs7 + fmov.s $fa0, $fs4 .p2align 4, , 16 .LBB159_5: # Parent Loop BB159_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -44564,16 +44236,16 @@ _Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # fdiv.s $fa1, $fa1, $fs2 fdiv.s $fa1, $fa1, $fs1 fdiv.s $fa1, $fa1, $fs0 - fmul.s $fa1, $fa1, $fs4 + fmul.s $fa1, $fa1, $fs5 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs5, $fa2 + fcmp.clt.d $fcc0, $fs6, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs6 + fcmp.clt.d $fcc0, $fa0, $fs7 bcnez $fcc0, .LBB159_3 # %bb.7: # in Loop: Header=BB159_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -44585,11 +44257,15 @@ _Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # .LBB159_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI159_0) - fld.s $fs4, $a0, %pc_lo12(.LCPI159_0) - pcalau12i $a0, %pc_hi20(.LCPI159_1) - fld.d $fs5, $a0, %pc_lo12(.LCPI159_1) - movgr2fr.w $fs6, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs4, $a0 + movgr2fr.w $fs5, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs6, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -44607,10 +44283,10 @@ _Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # fdiv.s $fa1, $fa1, $fs1 fdiv.s $fa1, $fa1, $fs0 fmul.s $fa1, $fa1, $fs4 - fadd.s $fa1, $fa1, $fs6 + fadd.s $fa1, $fa1, $fs5 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs5 + fcmp.clt.d $fcc0, $fa1, $fs6 bcnez $fcc0, .LBB159_9 # %bb.11: # in Loop: Header=BB159_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -44648,12 +44324,14 @@ _Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB159_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI159_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI159_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -44693,20 +44371,8 @@ _Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end159-_Z14test_variable4If31custom_divide_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc -.LCPI160_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI160_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI160_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI160_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc @@ -44765,14 +44431,21 @@ _Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc: fdiv.s $fs0, $fa0, $fs0 blez $s0, .LBB160_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI160_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI160_0) - pcalau12i $a0, %pc_hi20(.LCPI160_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI160_2) - pcalau12i $a0, %pc_hi20(.LCPI160_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI160_1) - movgr2fr.w $fs4, $zero + movgr2fr.w $fs1, $zero pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -44787,7 +44460,7 @@ _Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc: # Child Loop BB160_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs4 + fmov.s $fa0, $fs1 .p2align 4, , 16 .LBB160_5: # Parent Loop BB160_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -44802,16 +44475,16 @@ _Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc: fld.d $fa1, $s6, %pc_lo12(init_value) fcvt.s.d $fa1, $fa1 fadd.s $fa1, $fs0, $fa1 - fmul.s $fa1, $fa1, $fs1 + fmul.s $fa1, $fa1, $fs2 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs2, $fa2 + fcmp.clt.d $fcc0, $fs3, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs3 + fcmp.clt.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB160_3 # %bb.7: # in Loop: Header=BB160_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -44823,11 +44496,15 @@ _Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc: .LBB160_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI160_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI160_0) - pcalau12i $a0, %pc_hi20(.LCPI160_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI160_1) - movgr2fr.w $fs3, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs1, $a0 + movgr2fr.w $fs2, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -44842,10 +44519,10 @@ _Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc: fcvt.s.d $fa1, $fa0 fadd.s $fa1, $fs0, $fa1 fmul.s $fa1, $fa1, $fs1 - fadd.s $fa1, $fa1, $fs3 + fadd.s $fa1, $fa1, $fs2 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB160_9 # %bb.11: # in Loop: Header=BB160_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -44883,12 +44560,14 @@ _Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc: move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB160_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI160_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI160_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -44925,20 +44604,8 @@ _Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end160-_Z14test_variable4If32custom_divide_multiple_variable2IfEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc -.LCPI161_0: - .word 0xc5fa0000 # float -8000 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI161_1: - .dword 0x3f50624dd2f1a9fc # double 0.001 -.LCPI161_2: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI161_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc @@ -44998,14 +44665,21 @@ _Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # fdiv.s $fs1, $fa0, $fs1 blez $s0, .LBB161_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI161_0) - fld.s $fs2, $a0, %pc_lo12(.LCPI161_0) - pcalau12i $a0, %pc_hi20(.LCPI161_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI161_2) - pcalau12i $a0, %pc_hi20(.LCPI161_1) - fld.d $fs4, $a0, %pc_lo12(.LCPI161_1) - movgr2fr.w $fs5, $zero + movgr2fr.w $fs2, $zero pcalau12i $s6, %pc_hi20(init_value) + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs3, $a0 + lu12i.w $a0, -85564 + ori $a0, $a0, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -45020,7 +44694,7 @@ _Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # # Child Loop BB161_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.s $fa0, $fs5 + fmov.s $fa0, $fs2 .p2align 4, , 16 .LBB161_5: # Parent Loop BB161_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -45037,16 +44711,16 @@ _Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # fcvt.s.d $fa1, $fa1 fadd.s $fa1, $fs0, $fa1 fsub.s $fa1, $fa1, $fs1 - fmul.s $fa1, $fa1, $fs2 + fmul.s $fa1, $fa1, $fs3 fadd.s $fa1, $fa0, $fa1 fabs.s $fa2, $fa0 fcvt.d.s $fa2, $fa2 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fdiv.s $fa0, $fa1, $fa0 fsel $fa0, $fa1, $fa0, $fcc0 fabs.s $fa0, $fa0 fcvt.d.s $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB161_3 # %bb.7: # in Loop: Header=BB161_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -45058,11 +44732,15 @@ _Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # .LBB161_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI161_0) - fld.s $fs2, $a0, %pc_lo12(.LCPI161_0) - pcalau12i $a0, %pc_hi20(.LCPI161_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI161_1) - movgr2fr.w $fs4, $zero + lu12i.w $a0, -237664 + lu32i.d $a0, 0 + movgr2fr.w $fs2, $a0 + movgr2fr.w $fs3, $zero + lu12i.w $a0, -184550 + ori $a0, $a0, 2556 + lu32i.d $a0, 25165 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -45078,10 +44756,10 @@ _Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # fadd.s $fa1, $fs0, $fa1 fsub.s $fa1, $fa1, $fs1 fmul.s $fa1, $fa1, $fs2 - fadd.s $fa1, $fa1, $fs4 + fadd.s $fa1, $fa1, $fs3 fabs.s $fa1, $fa1 fcvt.d.s $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB161_9 # %bb.11: # in Loop: Header=BB161_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -45119,12 +44797,14 @@ _Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB161_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI161_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI161_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -45162,18 +44842,8 @@ _Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end161-_Z14test_variable4If30custom_mixed_multiple_variableIfEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc -.LCPI162_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI162_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI162_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI162_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc + .weak _Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc,@function _Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc @@ -45226,14 +44896,22 @@ _Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1I # %bb.1: # %.preheader.lr.ph blez $s0, .LBB162_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI162_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI162_0) - pcalau12i $a0, %pc_hi20(.LCPI162_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI162_2) - pcalau12i $a0, %pc_hi20(.LCPI162_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI162_1) - movgr2fr.d $fs4, $zero + movgr2fr.d $fs1, $zero pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -45248,7 +44926,7 @@ _Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1I # Child Loop BB162_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs4 + fmov.d $fa0, $fs1 .p2align 4, , 16 .LBB162_5: # Parent Loop BB162_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -45262,14 +44940,14 @@ _Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1I # in Loop: Header=BB162_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) fadd.d $fa1, $fs0, $fa1 - fmul.d $fa1, $fa1, $fs1 + fmul.d $fa1, $fa1, $fs2 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs2, $fa2 + fcmp.clt.d $fcc0, $fs3, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs3 + fcmp.clt.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB162_3 # %bb.7: # in Loop: Header=BB162_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -45281,11 +44959,16 @@ _Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1I .LBB162_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI162_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI162_0) - pcalau12i $a0, %pc_hi20(.LCPI162_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI162_1) - movgr2fr.d $fs3, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + movgr2fr.d $fs2, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -45299,9 +44982,9 @@ _Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1I # =>This Inner Loop Header: Depth=1 fadd.d $fa1, $fs0, $fa0 fmul.d $fa1, $fa1, $fs1 - fadd.d $fa1, $fa1, $fs3 + fadd.d $fa1, $fa1, $fs2 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB162_9 # %bb.11: # in Loop: Header=BB162_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -45339,12 +45022,14 @@ _Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB162_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI162_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI162_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -45381,18 +45066,8 @@ _Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc, .Lfunc_end162-_Z14test_variable1Id19custom_add_variableIdEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc -.LCPI163_0: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI163_1: - .dword 0xc0bf400000000000 # double -8000 -.LCPI163_2: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI163_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc,"axG",@progbits,_Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc,comdat - .weak _Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc + .weak _Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc # -- Begin function _Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc .p2align 5 .type _Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc,@function _Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc: # @_Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc @@ -45449,14 +45124,22 @@ _Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc: # @_Z22test_ho ffint.d.w $fs1, $fa0 blez $s0, .LBB163_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI163_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI163_1) - pcalau12i $a0, %pc_hi20(.LCPI163_0) - fld.d $fs3, $a0, %pc_lo12(.LCPI163_0) - pcalau12i $a0, %pc_hi20(.LCPI163_2) - fld.d $fs4, $a0, %pc_lo12(.LCPI163_2) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs2, $zero pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -45471,7 +45154,7 @@ _Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc: # @_Z22test_ho # Child Loop BB163_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs2 .p2align 4, , 16 .LBB163_5: # Parent Loop BB163_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -45485,14 +45168,14 @@ _Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc: # @_Z22test_ho fld.d $fa1, $s6, %pc_lo12(init_value) fmadd.d $fa0, $fs1, $fs0, $fa0 fadd.d $fa1, $fs0, $fa1 - fmul.d $fa1, $fa1, $fs2 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB163_3 # %bb.7: # in Loop: Header=BB163_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -45504,19 +45187,27 @@ _Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc: # @_Z22test_ho .LBB163_8: # %.preheader.lr.ph.split movgr2fr.d $fa0, $zero fmadd.d $fs1, $fs1, $fs0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI163_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI163_0) + fabs.d $fa1, $fs1 pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - fabs.d $fa2, $fs1 - pcalau12i $a0, %pc_hi20(.LCPI163_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI163_1) - pcalau12i $a0, %pc_hi20(.LCPI163_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI163_2) + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fa2, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero - fcmp.clt.d $fcc0, $fa1, $fa2 + fcmp.clt.d $fcc0, $fa2, $fa1 bcnez $fcc0, .LBB163_10 b .LBB163_13 .p2align 4, , 16 @@ -45590,12 +45281,14 @@ _Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc: # @_Z22test_ho move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB163_19: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI163_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI163_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -45633,18 +45326,8 @@ _Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc: # @_Z22test_ho .size _Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc, .Lfunc_end163-_Z22test_hoisted_variable1Id19custom_add_variableIdEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc -.LCPI164_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI164_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI164_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI164_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc @@ -45706,14 +45389,22 @@ _Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_ # %bb.1: # %.preheader.lr.ph blez $s0, .LBB164_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI164_0) - fld.d $fs4, $a0, %pc_lo12(.LCPI164_0) - pcalau12i $a0, %pc_hi20(.LCPI164_2) - fld.d $fs5, $a0, %pc_lo12(.LCPI164_2) - pcalau12i $a0, %pc_hi20(.LCPI164_1) - fld.d $fs6, $a0, %pc_lo12(.LCPI164_1) - movgr2fr.d $fs7, $zero + movgr2fr.d $fs4, $zero pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs5, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs6, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs7, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -45728,7 +45419,7 @@ _Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_ # Child Loop BB164_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs7 + fmov.d $fa0, $fs4 .p2align 4, , 16 .LBB164_5: # Parent Loop BB164_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -45748,14 +45439,14 @@ _Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_ fadd.d $fa1, $fs2, $fa1 fadd.d $fa1, $fs1, $fa1 fadd.d $fa1, $fs0, $fa1 - fmul.d $fa1, $fa1, $fs4 + fmul.d $fa1, $fa1, $fs5 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs5, $fa2 + fcmp.clt.d $fcc0, $fs6, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs6 + fcmp.clt.d $fcc0, $fa0, $fs7 bcnez $fcc0, .LBB164_3 # %bb.7: # in Loop: Header=BB164_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -45767,11 +45458,16 @@ _Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_ .LBB164_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI164_0) - fld.d $fs4, $a0, %pc_lo12(.LCPI164_0) - pcalau12i $a0, %pc_hi20(.LCPI164_1) - fld.d $fs5, $a0, %pc_lo12(.LCPI164_1) - movgr2fr.d $fs6, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs4, $a0 + movgr2fr.d $fs5, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs6, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -45788,9 +45484,9 @@ _Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_ fadd.d $fa1, $fs1, $fa1 fadd.d $fa1, $fs0, $fa1 fmul.d $fa1, $fa1, $fs4 - fadd.d $fa1, $fa1, $fs6 + fadd.d $fa1, $fa1, $fs5 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs5 + fcmp.clt.d $fcc0, $fa1, $fs6 bcnez $fcc0, .LBB164_9 # %bb.11: # in Loop: Header=BB164_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -45828,12 +45524,14 @@ _Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB164_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI164_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI164_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -45873,18 +45571,8 @@ _Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end164-_Z14test_variable4Id28custom_add_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc -.LCPI165_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI165_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI165_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI165_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc + .weak _Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc,@function _Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc @@ -45937,14 +45625,22 @@ _Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1I # %bb.1: # %.preheader.lr.ph blez $s0, .LBB165_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI165_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI165_0) - pcalau12i $a0, %pc_hi20(.LCPI165_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI165_2) - pcalau12i $a0, %pc_hi20(.LCPI165_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI165_1) - movgr2fr.d $fs4, $zero + movgr2fr.d $fs1, $zero pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -45959,7 +45655,7 @@ _Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1I # Child Loop BB165_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs4 + fmov.d $fa0, $fs1 .p2align 4, , 16 .LBB165_5: # Parent Loop BB165_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -45973,14 +45669,14 @@ _Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1I # in Loop: Header=BB165_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) fsub.d $fa1, $fa1, $fs0 - fmul.d $fa1, $fa1, $fs1 + fmul.d $fa1, $fa1, $fs2 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs2, $fa2 + fcmp.clt.d $fcc0, $fs3, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs3 + fcmp.clt.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB165_3 # %bb.7: # in Loop: Header=BB165_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -45992,11 +45688,16 @@ _Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1I .LBB165_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI165_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI165_0) - pcalau12i $a0, %pc_hi20(.LCPI165_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI165_1) - movgr2fr.d $fs3, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + movgr2fr.d $fs2, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -46010,9 +45711,9 @@ _Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1I # =>This Inner Loop Header: Depth=1 fsub.d $fa1, $fa0, $fs0 fmul.d $fa1, $fa1, $fs1 - fadd.d $fa1, $fa1, $fs3 + fadd.d $fa1, $fa1, $fs2 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB165_9 # %bb.11: # in Loop: Header=BB165_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -46050,12 +45751,14 @@ _Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1I move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB165_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI165_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI165_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -46092,18 +45795,8 @@ _Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1I .size _Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc, .Lfunc_end165-_Z14test_variable1Id19custom_sub_variableIdEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc -.LCPI166_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI166_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI166_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI166_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc @@ -46165,14 +45858,22 @@ _Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_ # %bb.1: # %.preheader.lr.ph blez $s0, .LBB166_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI166_0) - fld.d $fs4, $a0, %pc_lo12(.LCPI166_0) - pcalau12i $a0, %pc_hi20(.LCPI166_2) - fld.d $fs5, $a0, %pc_lo12(.LCPI166_2) - pcalau12i $a0, %pc_hi20(.LCPI166_1) - fld.d $fs6, $a0, %pc_lo12(.LCPI166_1) - movgr2fr.d $fs7, $zero + movgr2fr.d $fs4, $zero pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs5, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs6, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs7, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -46187,7 +45888,7 @@ _Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_ # Child Loop BB166_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs7 + fmov.d $fa0, $fs4 .p2align 4, , 16 .LBB166_5: # Parent Loop BB166_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -46207,14 +45908,14 @@ _Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_ fsub.d $fa1, $fa1, $fs2 fsub.d $fa1, $fa1, $fs1 fsub.d $fa1, $fa1, $fs0 - fmul.d $fa1, $fa1, $fs4 + fmul.d $fa1, $fa1, $fs5 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs5, $fa2 + fcmp.clt.d $fcc0, $fs6, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs6 + fcmp.clt.d $fcc0, $fa0, $fs7 bcnez $fcc0, .LBB166_3 # %bb.7: # in Loop: Header=BB166_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -46226,11 +45927,16 @@ _Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_ .LBB166_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI166_0) - fld.d $fs4, $a0, %pc_lo12(.LCPI166_0) - pcalau12i $a0, %pc_hi20(.LCPI166_1) - fld.d $fs5, $a0, %pc_lo12(.LCPI166_1) - movgr2fr.d $fs6, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs4, $a0 + movgr2fr.d $fs5, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs6, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -46247,9 +45953,9 @@ _Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_ fsub.d $fa1, $fa1, $fs1 fsub.d $fa1, $fa1, $fs0 fmul.d $fa1, $fa1, $fs4 - fadd.d $fa1, $fa1, $fs6 + fadd.d $fa1, $fa1, $fs5 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs5 + fcmp.clt.d $fcc0, $fa1, $fs6 bcnez $fcc0, .LBB166_9 # %bb.11: # in Loop: Header=BB166_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -46287,12 +45993,14 @@ _Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_ move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB166_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI166_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI166_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -46332,18 +46040,8 @@ _Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_ .size _Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end166-_Z14test_variable4Id28custom_sub_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc -.LCPI167_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI167_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI167_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI167_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc + .weak _Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc,@function _Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc @@ -46396,14 +46094,22 @@ _Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc: # @_Z14test_varia # %bb.1: # %.preheader.lr.ph blez $s0, .LBB167_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI167_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI167_0) - pcalau12i $a0, %pc_hi20(.LCPI167_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI167_2) - pcalau12i $a0, %pc_hi20(.LCPI167_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI167_1) - movgr2fr.d $fs4, $zero + movgr2fr.d $fs1, $zero pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -46418,7 +46124,7 @@ _Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc: # @_Z14test_varia # Child Loop BB167_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs4 + fmov.d $fa0, $fs1 .p2align 4, , 16 .LBB167_5: # Parent Loop BB167_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -46432,14 +46138,14 @@ _Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc: # @_Z14test_varia # in Loop: Header=BB167_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) fmul.d $fa1, $fs0, $fa1 - fmul.d $fa1, $fa1, $fs1 + fmul.d $fa1, $fa1, $fs2 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs2, $fa2 + fcmp.clt.d $fcc0, $fs3, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs3 + fcmp.clt.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB167_3 # %bb.7: # in Loop: Header=BB167_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -46451,11 +46157,16 @@ _Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc: # @_Z14test_varia .LBB167_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI167_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI167_0) - pcalau12i $a0, %pc_hi20(.LCPI167_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI167_1) - movgr2fr.d $fs3, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + movgr2fr.d $fs2, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -46469,9 +46180,9 @@ _Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc: # @_Z14test_varia # =>This Inner Loop Header: Depth=1 fmul.d $fa1, $fs0, $fa0 fmul.d $fa1, $fa1, $fs1 - fadd.d $fa1, $fa1, $fs3 + fadd.d $fa1, $fa1, $fs2 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB167_9 # %bb.11: # in Loop: Header=BB167_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -46509,12 +46220,14 @@ _Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc: # @_Z14test_varia move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB167_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI167_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI167_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -46551,18 +46264,8 @@ _Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc: # @_Z14test_varia .size _Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc, .Lfunc_end167-_Z14test_variable1Id24custom_multiply_variableIdEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc -.LCPI168_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI168_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI168_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI168_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc @@ -46624,14 +46327,22 @@ _Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # %bb.1: # %.preheader.lr.ph blez $s0, .LBB168_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI168_0) - fld.d $fs4, $a0, %pc_lo12(.LCPI168_0) - pcalau12i $a0, %pc_hi20(.LCPI168_2) - fld.d $fs5, $a0, %pc_lo12(.LCPI168_2) - pcalau12i $a0, %pc_hi20(.LCPI168_1) - fld.d $fs6, $a0, %pc_lo12(.LCPI168_1) - movgr2fr.d $fs7, $zero + movgr2fr.d $fs4, $zero pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs5, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs6, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs7, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -46646,7 +46357,7 @@ _Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # Child Loop BB168_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs7 + fmov.d $fa0, $fs4 .p2align 4, , 16 .LBB168_5: # Parent Loop BB168_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -46666,14 +46377,14 @@ _Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: fmul.d $fa1, $fs2, $fa1 fmul.d $fa1, $fs1, $fa1 fmul.d $fa1, $fs0, $fa1 - fmul.d $fa1, $fa1, $fs4 + fmul.d $fa1, $fa1, $fs5 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs5, $fa2 + fcmp.clt.d $fcc0, $fs6, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs6 + fcmp.clt.d $fcc0, $fa0, $fs7 bcnez $fcc0, .LBB168_3 # %bb.7: # in Loop: Header=BB168_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -46685,11 +46396,16 @@ _Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: .LBB168_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI168_0) - fld.d $fs4, $a0, %pc_lo12(.LCPI168_0) - pcalau12i $a0, %pc_hi20(.LCPI168_1) - fld.d $fs5, $a0, %pc_lo12(.LCPI168_1) - movgr2fr.d $fs6, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs4, $a0 + movgr2fr.d $fs5, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs6, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -46706,9 +46422,9 @@ _Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: fmul.d $fa1, $fs1, $fa1 fmul.d $fa1, $fs0, $fa1 fmul.d $fa1, $fa1, $fs4 - fadd.d $fa1, $fa1, $fs6 + fadd.d $fa1, $fa1, $fs5 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs5 + fcmp.clt.d $fcc0, $fa1, $fs6 bcnez $fcc0, .LBB168_9 # %bb.11: # in Loop: Header=BB168_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -46746,12 +46462,14 @@ _Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB168_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI168_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI168_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -46791,18 +46509,8 @@ _Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end168-_Z14test_variable4Id33custom_multiply_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc -.LCPI169_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI169_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI169_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI169_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc @@ -46862,14 +46570,22 @@ _Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc fmul.d $fs1, $fa0, $fs1 blez $s0, .LBB169_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI169_0) - fld.d $fs2, $a0, %pc_lo12(.LCPI169_0) - pcalau12i $a0, %pc_hi20(.LCPI169_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI169_2) - pcalau12i $a0, %pc_hi20(.LCPI169_1) - fld.d $fs4, $a0, %pc_lo12(.LCPI169_1) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs2, $zero pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -46884,7 +46600,7 @@ _Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc # Child Loop BB169_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs2 .p2align 4, , 16 .LBB169_5: # Parent Loop BB169_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -46898,14 +46614,14 @@ _Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc # in Loop: Header=BB169_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) fmadd.d $fa1, $fs1, $fs0, $fa1 - fmul.d $fa1, $fa1, $fs2 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB169_3 # %bb.7: # in Loop: Header=BB169_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -46917,11 +46633,16 @@ _Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc .LBB169_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI169_0) - fld.d $fs2, $a0, %pc_lo12(.LCPI169_0) - pcalau12i $a0, %pc_hi20(.LCPI169_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI169_1) - movgr2fr.d $fs4, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -46935,9 +46656,9 @@ _Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc # =>This Inner Loop Header: Depth=1 fmadd.d $fa1, $fs1, $fs0, $fa0 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB169_9 # %bb.11: # in Loop: Header=BB169_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -46975,12 +46696,14 @@ _Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB169_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI169_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI169_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -47018,18 +46741,8 @@ _Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc .size _Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end169-_Z14test_variable4Id34custom_multiply_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc -.LCPI170_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI170_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI170_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI170_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc,"axG",@progbits,_Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc,comdat - .weak _Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc + .weak _Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc # -- Begin function _Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc .p2align 5 .type _Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc,@function _Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc: # @_Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc @@ -47082,14 +46795,22 @@ _Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc: # @_Z14test_variabl # %bb.1: # %.preheader.lr.ph blez $s0, .LBB170_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI170_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI170_0) - pcalau12i $a0, %pc_hi20(.LCPI170_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI170_2) - pcalau12i $a0, %pc_hi20(.LCPI170_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI170_1) - movgr2fr.d $fs4, $zero + movgr2fr.d $fs1, $zero pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -47104,7 +46825,7 @@ _Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc: # @_Z14test_variabl # Child Loop BB170_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs4 + fmov.d $fa0, $fs1 .p2align 4, , 16 .LBB170_5: # Parent Loop BB170_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -47118,14 +46839,14 @@ _Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc: # @_Z14test_variabl # in Loop: Header=BB170_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) fdiv.d $fa1, $fa1, $fs0 - fmul.d $fa1, $fa1, $fs1 + fmul.d $fa1, $fa1, $fs2 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs2, $fa2 + fcmp.clt.d $fcc0, $fs3, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs3 + fcmp.clt.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB170_3 # %bb.7: # in Loop: Header=BB170_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -47137,11 +46858,16 @@ _Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc: # @_Z14test_variabl .LBB170_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI170_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI170_0) - pcalau12i $a0, %pc_hi20(.LCPI170_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI170_1) - movgr2fr.d $fs3, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + movgr2fr.d $fs2, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -47155,9 +46881,9 @@ _Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc: # @_Z14test_variabl # =>This Inner Loop Header: Depth=1 fdiv.d $fa1, $fa0, $fs0 fmul.d $fa1, $fa1, $fs1 - fadd.d $fa1, $fa1, $fs3 + fadd.d $fa1, $fa1, $fs2 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB170_9 # %bb.11: # in Loop: Header=BB170_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -47195,12 +46921,14 @@ _Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc: # @_Z14test_variabl move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB170_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI170_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI170_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -47237,18 +46965,8 @@ _Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc: # @_Z14test_variabl .size _Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc, .Lfunc_end170-_Z14test_variable1Id22custom_divide_variableIdEEvPT_iS2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc -.LCPI171_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI171_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI171_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI171_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc @@ -47310,14 +47028,22 @@ _Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # # %bb.1: # %.preheader.lr.ph blez $s0, .LBB171_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI171_0) - fld.d $fs4, $a0, %pc_lo12(.LCPI171_0) - pcalau12i $a0, %pc_hi20(.LCPI171_2) - fld.d $fs5, $a0, %pc_lo12(.LCPI171_2) - pcalau12i $a0, %pc_hi20(.LCPI171_1) - fld.d $fs6, $a0, %pc_lo12(.LCPI171_1) - movgr2fr.d $fs7, $zero + movgr2fr.d $fs4, $zero pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs5, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs6, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs7, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -47332,7 +47058,7 @@ _Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # # Child Loop BB171_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs7 + fmov.d $fa0, $fs4 .p2align 4, , 16 .LBB171_5: # Parent Loop BB171_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -47352,14 +47078,14 @@ _Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # fdiv.d $fa1, $fa1, $fs2 fdiv.d $fa1, $fa1, $fs1 fdiv.d $fa1, $fa1, $fs0 - fmul.d $fa1, $fa1, $fs4 + fmul.d $fa1, $fa1, $fs5 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs5, $fa2 + fcmp.clt.d $fcc0, $fs6, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs6 + fcmp.clt.d $fcc0, $fa0, $fs7 bcnez $fcc0, .LBB171_3 # %bb.7: # in Loop: Header=BB171_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -47371,11 +47097,16 @@ _Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # .LBB171_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI171_0) - fld.d $fs4, $a0, %pc_lo12(.LCPI171_0) - pcalau12i $a0, %pc_hi20(.LCPI171_1) - fld.d $fs5, $a0, %pc_lo12(.LCPI171_1) - movgr2fr.d $fs6, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs4, $a0 + movgr2fr.d $fs5, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs6, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -47392,9 +47123,9 @@ _Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # fdiv.d $fa1, $fa1, $fs1 fdiv.d $fa1, $fa1, $fs0 fmul.d $fa1, $fa1, $fs4 - fadd.d $fa1, $fa1, $fs6 + fadd.d $fa1, $fa1, $fs5 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs5 + fcmp.clt.d $fcc0, $fa1, $fs6 bcnez $fcc0, .LBB171_9 # %bb.11: # in Loop: Header=BB171_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -47432,12 +47163,14 @@ _Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB171_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI171_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI171_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -47477,18 +47210,8 @@ _Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # .size _Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end171-_Z14test_variable4Id31custom_divide_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc -.LCPI172_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI172_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI172_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI172_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc @@ -47547,14 +47270,22 @@ _Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc: fdiv.d $fs0, $fa0, $fs0 blez $s0, .LBB172_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI172_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI172_0) - pcalau12i $a0, %pc_hi20(.LCPI172_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI172_2) - pcalau12i $a0, %pc_hi20(.LCPI172_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI172_1) - movgr2fr.d $fs4, $zero + movgr2fr.d $fs1, $zero pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -47569,7 +47300,7 @@ _Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc: # Child Loop BB172_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs4 + fmov.d $fa0, $fs1 .p2align 4, , 16 .LBB172_5: # Parent Loop BB172_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -47583,14 +47314,14 @@ _Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc: # in Loop: Header=BB172_4 Depth=1 fld.d $fa1, $s6, %pc_lo12(init_value) fadd.d $fa1, $fs0, $fa1 - fmul.d $fa1, $fa1, $fs1 + fmul.d $fa1, $fa1, $fs2 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs2, $fa2 + fcmp.clt.d $fcc0, $fs3, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs3 + fcmp.clt.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB172_3 # %bb.7: # in Loop: Header=BB172_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -47602,11 +47333,16 @@ _Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc: .LBB172_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI172_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI172_0) - pcalau12i $a0, %pc_hi20(.LCPI172_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI172_1) - movgr2fr.d $fs3, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs1, $a0 + movgr2fr.d $fs2, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs3, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -47620,9 +47356,9 @@ _Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc: # =>This Inner Loop Header: Depth=1 fadd.d $fa1, $fs0, $fa0 fmul.d $fa1, $fa1, $fs1 - fadd.d $fa1, $fa1, $fs3 + fadd.d $fa1, $fa1, $fs2 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs2 + fcmp.clt.d $fcc0, $fa1, $fs3 bcnez $fcc0, .LBB172_9 # %bb.11: # in Loop: Header=BB172_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -47660,12 +47396,14 @@ _Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc: move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB172_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI172_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI172_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 @@ -47702,18 +47440,8 @@ _Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc: .size _Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc, .Lfunc_end172-_Z14test_variable4Id32custom_divide_multiple_variable2IdEEvPT_iS2_S2_S2_S2_PKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14test_variable4Id30custom_mixed_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc -.LCPI173_0: - .dword 0xc0bf400000000000 # double -8000 -.LCPI173_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI173_2: - .dword 0x3e45798ee2308c3a # double 1.0E-8 -.LCPI173_3: - .dword 0x412e848000000000 # double 1.0E+6 .section .text._Z14test_variable4Id30custom_mixed_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc,"axG",@progbits,_Z14test_variable4Id30custom_mixed_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc,comdat - .weak _Z14test_variable4Id30custom_mixed_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc + .weak _Z14test_variable4Id30custom_mixed_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc # -- Begin function _Z14test_variable4Id30custom_mixed_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc .p2align 5 .type _Z14test_variable4Id30custom_mixed_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc,@function _Z14test_variable4Id30custom_mixed_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # @_Z14test_variable4Id30custom_mixed_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc @@ -47773,14 +47501,22 @@ _Z14test_variable4Id30custom_mixed_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # fdiv.d $fs1, $fa0, $fs1 blez $s0, .LBB173_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI173_0) - fld.d $fs2, $a0, %pc_lo12(.LCPI173_0) - pcalau12i $a0, %pc_hi20(.LCPI173_2) - fld.d $fs3, $a0, %pc_lo12(.LCPI173_2) - pcalau12i $a0, %pc_hi20(.LCPI173_1) - fld.d $fs4, $a0, %pc_lo12(.LCPI173_1) - movgr2fr.d $fs5, $zero + movgr2fr.d $fs2, $zero pcalau12i $s6, %pc_hi20(init_value) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 + lu12i.w $a0, -122104 + ori $a0, $a0, 3130 + lu32i.d $a0, 358798 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fs4, $a0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs5, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s2, $a0, %pc_lo12(.L.str.179) move $s7, $zero @@ -47795,7 +47531,7 @@ _Z14test_variable4Id30custom_mixed_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # # Child Loop BB173_5 Depth 2 move $a0, $s0 move $a2, $s1 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs2 .p2align 4, , 16 .LBB173_5: # Parent Loop BB173_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -47811,14 +47547,14 @@ _Z14test_variable4Id30custom_mixed_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # fld.d $fa1, $s6, %pc_lo12(init_value) fadd.d $fa1, $fs0, $fa1 fsub.d $fa1, $fa1, $fs1 - fmul.d $fa1, $fa1, $fs2 + fmul.d $fa1, $fa1, $fs3 fadd.d $fa1, $fa0, $fa1 fabs.d $fa2, $fa0 fdiv.d $fa0, $fa1, $fa0 - fcmp.clt.d $fcc0, $fs3, $fa2 + fcmp.clt.d $fcc0, $fs4, $fa2 fsel $fa0, $fa1, $fa0, $fcc0 fabs.d $fa0, $fa0 - fcmp.clt.d $fcc0, $fa0, $fs4 + fcmp.clt.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB173_3 # %bb.7: # in Loop: Header=BB173_4 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -47830,11 +47566,16 @@ _Z14test_variable4Id30custom_mixed_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # .LBB173_8: # %.preheader.preheader pcalau12i $s1, %pc_hi20(init_value) fld.d $fa0, $s1, %pc_lo12(init_value) - pcalau12i $a0, %pc_hi20(.LCPI173_0) - fld.d $fs2, $a0, %pc_lo12(.LCPI173_0) - pcalau12i $a0, %pc_hi20(.LCPI173_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI173_1) - movgr2fr.d $fs4, $zero + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs2, $a0 + movgr2fr.d $fs3, $zero + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.179) addi.d $s0, $a0, %pc_lo12(.L.str.179) move $s2, $zero @@ -47849,9 +47590,9 @@ _Z14test_variable4Id30custom_mixed_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # fadd.d $fa1, $fs0, $fa0 fsub.d $fa1, $fa1, $fs1 fmul.d $fa1, $fa1, $fs2 - fadd.d $fa1, $fa1, $fs4 + fadd.d $fa1, $fa1, $fs3 fabs.d $fa1, $fa1 - fcmp.clt.d $fcc0, $fa1, $fs3 + fcmp.clt.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB173_9 # %bb.11: # in Loop: Header=BB173_10 Depth=1 ld.w $a1, $s3, %pc_lo12(current_test) @@ -47889,12 +47630,14 @@ _Z14test_variable4Id30custom_mixed_multiple_variableIdEEvPT_iS2_S2_S2_S2_PKc: # move $a2, $a0 ld.w $a0, $s3, %pc_lo12(current_test) .LBB173_16: # %_Z13record_resultdPKc.exit - pcalau12i $a1, %pc_hi20(.LCPI173_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI173_3) sub.d $a1, $s0, $s1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 alsl.d $a1, $a0, $a2, 4 slli.d $a3, $a0, 4 fstx.d $fa0, $a2, $a3 diff --git a/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/stepanov_abstraction.dir/stepanov_abstraction.s b/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/stepanov_abstraction.dir/stepanov_abstraction.s index 199c9eca..e9915d55 100644 --- a/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/stepanov_abstraction.dir/stepanov_abstraction.s +++ b/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/stepanov_abstraction.dir/stepanov_abstraction.s @@ -62,12 +62,7 @@ _Z13record_resultdPKc: # @_Z13record_resultdPKc .Lfunc_end0: .size _Z13record_resultdPKc, .Lfunc_end0-_Z13record_resultdPKc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z9summarizePKciiii -.LCPI1_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl _Z9summarizePKciiii + .globl _Z9summarizePKciiii # -- Begin function _Z9summarizePKciiii .p2align 5 .type _Z9summarizePKciiii,@function _Z9summarizePKciiii: # @_Z9summarizePKciiii @@ -135,14 +130,16 @@ _Z9summarizePKciiii: # @_Z9summarizePKciiii # %bb.5: # %.lr.ph45.preheader st.d $s0, $sp, 8 # 8-byte Folded Spill st.d $s5, $sp, 16 # 8-byte Folded Spill + ori $a0, $zero, 0 movgr2fr.w $fa0, $s3 movgr2fr.w $fa1, $s2 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI1_0) + lu32i.d $a0, -97152 ffint.d.w $fa0, $fa0 ffint.d.w $fa1, $fa1 + lu52i.d $a0, $a0, 1042 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fs1, $fa0, $fa2 + movgr2fr.d $fa1, $a0 + fdiv.d $fs1, $fa0, $fa1 pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $s2, $a0, %pc_lo12(.L.str.4) pcalau12i $a0, %pc_hi20(.L.str.5) @@ -409,12 +406,7 @@ _Z11start_timerv: # @_Z11start_timerv .Lfunc_end3: .size _Z11start_timerv, .Lfunc_end3-_Z11start_timerv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z5timerv -.LCPI4_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl _Z5timerv + .globl _Z5timerv # -- Begin function _Z5timerv .p2align 5 .type _Z5timerv,@function _Z5timerv: # @_Z5timerv @@ -426,12 +418,14 @@ _Z5timerv: # @_Z5timerv pcalau12i $a1, %pc_hi20(start_time) ld.d $a1, $a1, %pc_lo12(start_time) pcalau12i $a2, %pc_hi20(end_time) - pcalau12i $a3, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI4_0) sub.d $a1, $a0, $a1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 st.d $a0, $a2, %pc_lo12(end_time) ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 @@ -439,12 +433,7 @@ _Z5timerv: # @_Z5timerv .Lfunc_end4: .size _Z5timerv, .Lfunc_end4-_Z5timerv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI5_0: - .dword 0x409f400000000000 # double 2000 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -650,12 +639,14 @@ main: # @main st.d $a1, $sp, 136 # 8-byte Folded Spill blez $a0, .LBB5_97 # %bb.26: # %.lr.ph.i74 - pcalau12i $s4, %pc_hi20(.LCPI5_0) pcalau12i $s0, %pc_hi20(current_test) beq $s1, $s6, .LBB5_33 # %bb.27: # %.lr.ph.i.preheader.i.preheader - fld.d $fs0, $s4, %pc_lo12(.LCPI5_0) - movgr2fr.d $fs1, $zero + movgr2fr.d $fs0, $zero + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs1, $a1 pcalau12i $a1, %pc_hi20(.L.str.50) addi.d $fp, $a1, %pc_lo12(.L.str.50) move $s3, $zero @@ -668,7 +659,7 @@ main: # @main .LBB5_29: # %.lr.ph.i.preheader.i # =>This Loop Header: Depth=1 # Child Loop BB5_30 Depth 2 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 move $a1, $s1 .p2align 4, , 16 .LBB5_30: # %.lr.ph.i.i @@ -682,7 +673,7 @@ main: # @main # %bb.31: # %_ZN9benchmark10accumulateIPddEET0_T_S3_S2_.exit.loopexit.i # in Loop: Header=BB5_29 Depth=1 fld.d $fa1, $s2, %pc_lo12(init_value) - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB5_28 # %bb.32: # in Loop: Header=BB5_29 Depth=1 @@ -694,7 +685,10 @@ main: # @main b .LBB5_28 .LBB5_33: # %_ZN9benchmark10accumulateIPddEET0_T_S3_S2_.exit.us.preheader.i fld.d $fa0, $s2, %pc_lo12(init_value) - fld.d $fs0, $s4, %pc_lo12(.LCPI5_0) + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs0, $a1 movgr2fr.d $fs1, $zero pcalau12i $a1, %pc_hi20(.L.str.50) addi.d $fp, $a1, %pc_lo12(.L.str.50) @@ -727,9 +721,11 @@ main: # @main ld.d $s3, $a1, %pc_lo12(dPe) beq $s1, $s3, .LBB5_45 # %bb.39: # %.lr.ph.preheader.i.i.preheader - move $s6, $s4 - fld.d $fs0, $s4, %pc_lo12(.LCPI5_0) - movgr2fr.d $fs1, $zero + movgr2fr.d $fs0, $zero + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs1, $a1 pcalau12i $a1, %pc_hi20(.L.str.50) addi.d $fp, $a1, %pc_lo12(.L.str.50) move $s4, $zero @@ -742,7 +738,7 @@ main: # @main .LBB5_41: # %.lr.ph.preheader.i.i # =>This Loop Header: Depth=1 # Child Loop BB5_42 Depth 2 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 move $a1, $s1 .p2align 4, , 16 .LBB5_42: # %.lr.ph.i.i78 @@ -756,7 +752,7 @@ main: # @main # %bb.43: # %_ZN9benchmark10accumulateI14PointerWrapperIdEdEET0_T_S4_S3_.exit.loopexit.i # in Loop: Header=BB5_41 Depth=1 fld.d $fa1, $s2, %pc_lo12(init_value) - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB5_40 # %bb.44: # in Loop: Header=BB5_41 Depth=1 @@ -768,8 +764,10 @@ main: # @main b .LBB5_40 .LBB5_45: # %_ZN9benchmark10accumulateI14PointerWrapperIdEdEET0_T_S4_S3_.exit.us.preheader.i fld.d $fa0, $s2, %pc_lo12(init_value) - move $s6, $s4 - fld.d $fs0, $s4, %pc_lo12(.LCPI5_0) + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs0, $a1 movgr2fr.d $fs1, $zero pcalau12i $a1, %pc_hi20(.L.str.50) addi.d $fp, $a1, %pc_lo12(.L.str.50) @@ -802,8 +800,11 @@ main: # @main ld.d $s3, $a1, %pc_lo12(DVpe) beq $s1, $s3, .LBB5_57 # %bb.51: # %.lr.ph.i.preheader.i87.preheader - fld.d $fs0, $s6, %pc_lo12(.LCPI5_0) - movgr2fr.d $fs1, $zero + movgr2fr.d $fs0, $zero + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs1, $a1 pcalau12i $a1, %pc_hi20(.L.str.50) addi.d $fp, $a1, %pc_lo12(.L.str.50) move $s4, $zero @@ -817,7 +818,7 @@ main: # @main # =>This Loop Header: Depth=1 # Child Loop BB5_54 Depth 2 move $a1, $s1 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB5_54: # %.lr.ph.i.i89 # Parent Loop BB5_53 Depth=1 @@ -830,7 +831,7 @@ main: # @main # %bb.55: # %_ZN9benchmark10accumulateIP12ValueWrapperIdES2_EET0_T_S5_S4_.exit.loopexit.i # in Loop: Header=BB5_53 Depth=1 fld.d $fa1, $s2, %pc_lo12(init_value) - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB5_52 # %bb.56: # in Loop: Header=BB5_53 Depth=1 @@ -842,7 +843,10 @@ main: # @main b .LBB5_52 .LBB5_57: # %_ZN9benchmark10accumulateIP12ValueWrapperIdES2_EET0_T_S5_S4_.exit.us.preheader.i fld.d $fa0, $s2, %pc_lo12(init_value) - fld.d $fs0, $s6, %pc_lo12(.LCPI5_0) + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs0, $a1 movgr2fr.d $fs1, $zero pcalau12i $a1, %pc_hi20(.L.str.50) addi.d $fp, $a1, %pc_lo12(.L.str.50) @@ -875,8 +879,11 @@ main: # @main ld.d $s3, $a1, %pc_lo12(DVPe) beq $s1, $s3, .LBB5_69 # %bb.63: # %.lr.ph.preheader.i.i100.preheader - fld.d $fs0, $s6, %pc_lo12(.LCPI5_0) - movgr2fr.d $fs1, $zero + movgr2fr.d $fs0, $zero + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs1, $a1 pcalau12i $a1, %pc_hi20(.L.str.50) addi.d $fp, $a1, %pc_lo12(.L.str.50) move $s4, $zero @@ -890,7 +897,7 @@ main: # @main # =>This Loop Header: Depth=1 # Child Loop BB5_66 Depth 2 move $a1, $s1 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB5_66: # %.lr.ph.i.i101 # Parent Loop BB5_65 Depth=1 @@ -903,7 +910,7 @@ main: # @main # %bb.67: # %_ZN9benchmark10accumulateI14PointerWrapperI12ValueWrapperIdEES3_EET0_T_S6_S5_.exit.loopexit.i # in Loop: Header=BB5_65 Depth=1 fld.d $fa1, $s2, %pc_lo12(init_value) - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB5_64 # %bb.68: # in Loop: Header=BB5_65 Depth=1 @@ -915,7 +922,10 @@ main: # @main b .LBB5_64 .LBB5_69: # %_ZN9benchmark10accumulateI14PointerWrapperI12ValueWrapperIdEES3_EET0_T_S6_S5_.exit.us.preheader.i fld.d $fa0, $s2, %pc_lo12(init_value) - fld.d $fs0, $s6, %pc_lo12(.LCPI5_0) + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs0, $a1 movgr2fr.d $fs1, $zero pcalau12i $a1, %pc_hi20(.L.str.50) addi.d $fp, $a1, %pc_lo12(.L.str.50) @@ -948,8 +958,11 @@ main: # @main ld.d $s3, $a1, %pc_lo12(DV10pe) beq $s1, $s3, .LBB5_81 # %bb.75: # %.lr.ph.i.preheader.i109.preheader - fld.d $fs0, $s6, %pc_lo12(.LCPI5_0) - movgr2fr.d $fs1, $zero + movgr2fr.d $fs0, $zero + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs1, $a1 pcalau12i $a1, %pc_hi20(.L.str.50) addi.d $fp, $a1, %pc_lo12(.L.str.50) move $s4, $zero @@ -963,7 +976,7 @@ main: # @main # =>This Loop Header: Depth=1 # Child Loop BB5_78 Depth 2 move $a1, $s1 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB5_78: # %.lr.ph.i.i111 # Parent Loop BB5_77 Depth=1 @@ -976,7 +989,7 @@ main: # @main # %bb.79: # %_ZN9benchmark10accumulateIP12ValueWrapperIS1_IS1_IS1_IS1_IS1_IS1_IS1_IS1_IS1_IdEEEEEEEEEESB_EET0_T_SE_SD_.exit.loopexit.i # in Loop: Header=BB5_77 Depth=1 fld.d $fa1, $s2, %pc_lo12(init_value) - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB5_76 # %bb.80: # in Loop: Header=BB5_77 Depth=1 @@ -988,7 +1001,10 @@ main: # @main b .LBB5_76 .LBB5_81: # %_ZN9benchmark10accumulateIP12ValueWrapperIS1_IS1_IS1_IS1_IS1_IS1_IS1_IS1_IS1_IdEEEEEEEEEESB_EET0_T_SE_SD_.exit.us.preheader.i fld.d $fa0, $s2, %pc_lo12(init_value) - fld.d $fs0, $s6, %pc_lo12(.LCPI5_0) + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs0, $a1 movgr2fr.d $fs1, $zero pcalau12i $a1, %pc_hi20(.L.str.50) addi.d $fp, $a1, %pc_lo12(.L.str.50) @@ -1021,8 +1037,11 @@ main: # @main ld.d $s3, $a1, %pc_lo12(DV10Pe) beq $s1, $s3, .LBB5_93 # %bb.87: # %.lr.ph.preheader.i.i124.preheader - fld.d $fs0, $s6, %pc_lo12(.LCPI5_0) - movgr2fr.d $fs1, $zero + movgr2fr.d $fs0, $zero + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs1, $a1 pcalau12i $a1, %pc_hi20(.L.str.50) addi.d $fp, $a1, %pc_lo12(.L.str.50) move $s4, $zero @@ -1036,7 +1055,7 @@ main: # @main # =>This Loop Header: Depth=1 # Child Loop BB5_90 Depth 2 move $a1, $s1 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB5_90: # %.lr.ph.i.i126 # Parent Loop BB5_89 Depth=1 @@ -1049,7 +1068,7 @@ main: # @main # %bb.91: # %_ZN9benchmark10accumulateI14PointerWrapperI12ValueWrapperIS2_IS2_IS2_IS2_IS2_IS2_IS2_IS2_IS2_IdEEEEEEEEEEESC_EET0_T_SF_SE_.exit.loopexit.i # in Loop: Header=BB5_89 Depth=1 fld.d $fa1, $s2, %pc_lo12(init_value) - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB5_88 # %bb.92: # in Loop: Header=BB5_89 Depth=1 @@ -1061,7 +1080,10 @@ main: # @main b .LBB5_88 .LBB5_93: # %_ZN9benchmark10accumulateI14PointerWrapperI12ValueWrapperIS2_IS2_IS2_IS2_IS2_IS2_IS2_IS2_IS2_IdEEEEEEEEEEESC_EET0_T_SF_SE_.exit.us.preheader.i fld.d $fa0, $s2, %pc_lo12(init_value) - fld.d $fs0, $s6, %pc_lo12(.LCPI5_0) + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs0, $a1 movgr2fr.d $fs1, $zero pcalau12i $a1, %pc_hi20(.L.str.50) addi.d $fp, $a1, %pc_lo12(.L.str.50) diff --git a/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/stepanov_vector.dir/stepanov_vector.s b/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/stepanov_vector.dir/stepanov_vector.s index e56ffbae..3e00eaf0 100644 --- a/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/stepanov_vector.dir/stepanov_vector.s +++ b/results/SingleSource/Benchmarks/Adobe-C++/CMakeFiles/stepanov_vector.dir/stepanov_vector.s @@ -62,12 +62,7 @@ _Z13record_resultdPKc: # @_Z13record_resultdPKc .Lfunc_end0: .size _Z13record_resultdPKc, .Lfunc_end0-_Z13record_resultdPKc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z9summarizePKciiii -.LCPI1_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl _Z9summarizePKciiii + .globl _Z9summarizePKciiii # -- Begin function _Z9summarizePKciiii .p2align 5 .type _Z9summarizePKciiii,@function _Z9summarizePKciiii: # @_Z9summarizePKciiii @@ -135,14 +130,16 @@ _Z9summarizePKciiii: # @_Z9summarizePKciiii # %bb.5: # %.lr.ph45.preheader st.d $s0, $sp, 8 # 8-byte Folded Spill st.d $s5, $sp, 16 # 8-byte Folded Spill + ori $a0, $zero, 0 movgr2fr.w $fa0, $s3 movgr2fr.w $fa1, $s2 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI1_0) + lu32i.d $a0, -97152 ffint.d.w $fa0, $fa0 ffint.d.w $fa1, $fa1 + lu52i.d $a0, $a0, 1042 fmul.d $fa0, $fa0, $fa1 - fdiv.d $fs1, $fa0, $fa2 + movgr2fr.d $fa1, $a0 + fdiv.d $fs1, $fa0, $fa1 pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $s2, $a0, %pc_lo12(.L.str.4) pcalau12i $a0, %pc_hi20(.L.str.5) @@ -409,12 +406,7 @@ _Z11start_timerv: # @_Z11start_timerv .Lfunc_end3: .size _Z11start_timerv, .Lfunc_end3-_Z11start_timerv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z5timerv -.LCPI4_0: - .dword 0x412e848000000000 # double 1.0E+6 - .text - .globl _Z5timerv + .globl _Z5timerv # -- Begin function _Z5timerv .p2align 5 .type _Z5timerv,@function _Z5timerv: # @_Z5timerv @@ -426,12 +418,14 @@ _Z5timerv: # @_Z5timerv pcalau12i $a1, %pc_hi20(start_time) ld.d $a1, $a1, %pc_lo12(start_time) pcalau12i $a2, %pc_hi20(end_time) - pcalau12i $a3, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI4_0) sub.d $a1, $a0, $a1 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, -97152 + lu52i.d $a1, $a1, 1042 movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 st.d $a0, $a2, %pc_lo12(end_time) ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 @@ -439,12 +433,7 @@ _Z5timerv: # @_Z5timerv .Lfunc_end4: .size _Z5timerv, .Lfunc_end4-_Z5timerv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI5_0: - .dword 0x409f400000000000 # double 2000 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -482,7 +471,7 @@ main: # @main .cfi_offset 56, -96 .cfi_offset 57, -104 ori $s1, $zero, 2 - pcalau12i $s7, %pc_hi20(iterations) + pcalau12i $s6, %pc_hi20(iterations) pcalau12i $s2, %pc_hi20(init_value) blt $a0, $s1, .LBB5_3 # %bb.1: @@ -493,7 +482,7 @@ main: # @main move $a1, $zero pcaddu18i $ra, %call36(__isoc23_strtol) jirl $ra, $ra, 0 - st.w $a0, $s7, %pc_lo12(iterations) + st.w $a0, $s6, %pc_lo12(iterations) beq $s0, $s1, .LBB5_3 # %bb.2: ld.d $a0, $fp, 16 @@ -508,11 +497,10 @@ main: # @main addi.w $a0, $a0, 123 pcaddu18i $ra, %call36(srand) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(dpb) - st.d $a0, $sp, 136 # 8-byte Folded Spill - ld.d $s3, $a0, %pc_lo12(dpb) + pcalau12i $s8, %pc_hi20(dpb) + ld.d $s3, $s8, %pc_lo12(dpb) pcalau12i $a0, %pc_hi20(dpe) - st.d $a0, $sp, 128 # 8-byte Folded Spill + st.d $a0, $sp, 136 # 8-byte Folded Spill ld.d $s4, $a0, %pc_lo12(dpe) fld.d $fa1, $s2, %pc_lo12(init_value) beq $s3, $s4, .LBB5_10 @@ -580,32 +568,34 @@ main: # @main vstx $vr0, $a2, $a1 bnez $a0, .LBB5_12 # %bb.13: # %_ZN9benchmark4fillIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEdEEvT_S8_T0_.exit - ld.w $a0, $s7, %pc_lo12(iterations) - pcalau12i $s8, %pc_hi20(current_test) + ld.w $a0, $s6, %pc_lo12(iterations) + pcalau12i $s7, %pc_hi20(current_test) pcalau12i $a1, %pc_hi20(rdpb) - st.d $a1, $sp, 104 # 8-byte Folded Spill + st.d $a1, $sp, 112 # 8-byte Folded Spill pcalau12i $a1, %pc_hi20(rdpe) - st.d $a1, $sp, 96 # 8-byte Folded Spill + st.d $a1, $sp, 104 # 8-byte Folded Spill blez $a0, .LBB5_76 # %bb.14: # %.lr.ph.i67 - pcalau12i $s5, %pc_hi20(.LCPI5_0) - fld.d $fs0, $s5, %pc_lo12(.LCPI5_0) - movgr2fr.d $fs1, $zero - pcalau12i $a1, %pc_hi20(.L.str.51) - addi.d $s1, $a1, %pc_lo12(.L.str.51) beq $s3, $s4, .LBB5_21 # %bb.15: # %.lr.ph.i.preheader.i.preheader - move $s6, $zero + movgr2fr.d $fs0, $zero + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs1, $a1 + pcalau12i $a1, %pc_hi20(.L.str.51) + addi.d $s1, $a1, %pc_lo12(.L.str.51) + move $s5, $zero b .LBB5_17 .p2align 4, , 16 .LBB5_16: # %_Z9check_sumd.exit.i # in Loop: Header=BB5_17 Depth=1 - addi.w $s6, $s6, 1 - bge $s6, $a0, .LBB5_25 + addi.w $s5, $s5, 1 + bge $s5, $a0, .LBB5_25 .LBB5_17: # %.lr.ph.i.preheader.i # =>This Loop Header: Depth=1 # Child Loop BB5_18 Depth 2 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 move $a1, $s3 .p2align 4, , 16 .LBB5_18: # %.lr.ph.i.i @@ -619,17 +609,24 @@ main: # @main # %bb.19: # %_ZN9benchmark10accumulateIPddEET0_T_S3_S2_.exit.loopexit.i # in Loop: Header=BB5_17 Depth=1 fld.d $fa1, $s2, %pc_lo12(init_value) - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB5_16 # %bb.20: # in Loop: Header=BB5_17 Depth=1 - ld.w $a1, $s8, %pc_lo12(current_test) + ld.w $a1, $s7, %pc_lo12(current_test) move $a0, $s1 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $s7, %pc_lo12(iterations) + ld.w $a0, $s6, %pc_lo12(iterations) b .LBB5_16 .LBB5_21: # %_ZN9benchmark10accumulateIPddEET0_T_S3_S2_.exit.us.i.preheader + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs0, $a1 + movgr2fr.d $fs1, $zero + pcalau12i $a1, %pc_hi20(.L.str.51) + addi.d $s1, $a1, %pc_lo12(.L.str.51) move $s3, $zero b .LBB5_23 .p2align 4, , 16 @@ -643,20 +640,23 @@ main: # @main fcmp.ceq.d $fcc0, $fa0, $fs1 bcnez $fcc0, .LBB5_22 # %bb.24: # in Loop: Header=BB5_23 Depth=1 - ld.w $a1, $s8, %pc_lo12(current_test) + ld.w $a1, $s7, %pc_lo12(current_test) move $a0, $s1 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 fld.d $fa1, $s2, %pc_lo12(init_value) - ld.w $a0, $s7, %pc_lo12(iterations) + ld.w $a0, $s6, %pc_lo12(iterations) b .LBB5_22 .LBB5_25: # %_Z15test_accumulateIPddEvT_S1_T0_PKc.exit blez $a0, .LBB5_76 # %bb.26: # %.lr.ph.preheader.i.i.preheader - fld.d $fs0, $s5, %pc_lo12(.LCPI5_0) - movgr2fr.d $fs1, $zero + movgr2fr.d $fs0, $zero lu12i.w $a1, -4 ori $s3, $a1, 384 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs1, $a1 pcalau12i $a1, %pc_hi20(.L.str.51) addi.d $s1, $a1, %pc_lo12(.L.str.51) move $s4, $zero @@ -670,7 +670,7 @@ main: # @main # =>This Loop Header: Depth=1 # Child Loop BB5_29 Depth 2 move $a1, $s3 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB5_29: # %.lr.ph.i.i71 # Parent Loop BB5_28 Depth=1 @@ -683,24 +683,28 @@ main: # @main # %bb.30: # %_ZN9benchmark10accumulateIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEdEET0_T_S9_S8_.exit.loopexit.i # in Loop: Header=BB5_28 Depth=1 fld.d $fa1, $s2, %pc_lo12(init_value) - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB5_27 # %bb.31: # in Loop: Header=BB5_28 Depth=1 - ld.w $a1, $s8, %pc_lo12(current_test) + ld.w $a1, $s7, %pc_lo12(current_test) move $a0, $s1 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $s7, %pc_lo12(iterations) + ld.w $a0, $s6, %pc_lo12(iterations) b .LBB5_27 .LBB5_32: # %_Z15test_accumulateIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEdEvT_S7_T0_PKc.exit blez $a0, .LBB5_76 # %bb.33: # %.lr.ph.i78.preheader - ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a1, $sp, 112 # 8-byte Folded Reload ld.d $s3, $a1, %pc_lo12(rdpb) - ld.d $a1, $sp, 96 # 8-byte Folded Reload + ld.d $a1, $sp, 104 # 8-byte Folded Reload ld.d $s4, $a1, %pc_lo12(rdpe) - movgr2fr.d $fs1, $zero + movgr2fr.d $fs0, $zero + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs1, $a1 pcalau12i $a1, %pc_hi20(.L.str.51) addi.d $s1, $a1, %pc_lo12(.L.str.51) move $s5, $zero @@ -713,12 +717,12 @@ main: # @main .LBB5_35: # %.lr.ph.i78 # =>This Loop Header: Depth=1 # Child Loop BB5_37 Depth 2 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 beq $s3, $s4, .LBB5_38 # %bb.36: # %.lr.ph.i.i80.preheader # in Loop: Header=BB5_35 Depth=1 move $a1, $s3 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB5_37: # %.lr.ph.i.i80 # Parent Loop BB5_35 Depth=1 @@ -731,20 +735,24 @@ main: # @main .LBB5_38: # %_ZN9benchmark10accumulateISt16reverse_iteratorIPdEdEET0_T_S5_S4_.exit.i # in Loop: Header=BB5_35 Depth=1 fld.d $fa1, $s2, %pc_lo12(init_value) - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB5_34 # %bb.39: # in Loop: Header=BB5_35 Depth=1 - ld.w $a1, $s8, %pc_lo12(current_test) + ld.w $a1, $s7, %pc_lo12(current_test) move $a0, $s1 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $s7, %pc_lo12(iterations) + ld.w $a0, $s6, %pc_lo12(iterations) b .LBB5_34 .LBB5_40: # %_Z15test_accumulateISt16reverse_iteratorIPdEdEvT_S3_T0_PKc.exit blez $a0, .LBB5_76 # %bb.41: # %.lr.ph.i84.preheader - movgr2fr.d $fs1, $zero + movgr2fr.d $fs0, $zero + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs1, $a1 pcalau12i $a1, %pc_hi20(.L.str.51) addi.d $s1, $a1, %pc_lo12(.L.str.51) move $s3, $zero @@ -758,7 +766,7 @@ main: # @main # =>This Loop Header: Depth=1 # Child Loop BB5_44 Depth 2 move $a1, $fp - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB5_44: # %.lr.ph.i.i86 # Parent Loop BB5_43 Depth=1 @@ -771,22 +779,26 @@ main: # @main # %bb.45: # %_ZN9benchmark10accumulateISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEdEET0_T_SB_SA_.exit.i # in Loop: Header=BB5_43 Depth=1 fld.d $fa1, $s2, %pc_lo12(init_value) - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB5_42 # %bb.46: # in Loop: Header=BB5_43 Depth=1 - ld.w $a1, $s8, %pc_lo12(current_test) + ld.w $a1, $s7, %pc_lo12(current_test) move $a0, $s1 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $s7, %pc_lo12(iterations) + ld.w $a0, $s6, %pc_lo12(iterations) b .LBB5_42 .LBB5_47: # %_Z15test_accumulateISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEdEvT_S9_T0_PKc.exit blez $a0, .LBB5_76 # %bb.48: # %.lr.ph.i91.preheader - movgr2fr.d $fs1, $zero + movgr2fr.d $fs0, $zero lu12i.w $a1, 3 ori $s3, $a1, 3712 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs1, $a1 pcalau12i $a1, %pc_hi20(.L.str.51) addi.d $s1, $a1, %pc_lo12(.L.str.51) move $s4, $zero @@ -800,7 +812,7 @@ main: # @main # =>This Loop Header: Depth=1 # Child Loop BB5_51 Depth 2 move $a1, $s3 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB5_51: # %.lr.ph.i.i95 # Parent Loop BB5_50 Depth=1 @@ -813,15 +825,15 @@ main: # @main # %bb.52: # %_ZN9benchmark10accumulateISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEdEET0_T_SB_SA_.exit.i99 # in Loop: Header=BB5_50 Depth=1 fld.d $fa1, $s2, %pc_lo12(init_value) - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB5_49 # %bb.53: # in Loop: Header=BB5_50 Depth=1 - ld.w $a1, $s8, %pc_lo12(current_test) + ld.w $a1, $s7, %pc_lo12(current_test) move $a0, $s1 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $s7, %pc_lo12(iterations) + ld.w $a0, $s6, %pc_lo12(iterations) b .LBB5_49 .LBB5_54: # %_Z15test_accumulateISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEdEvT_S9_T0_PKc.exit103 blez $a0, .LBB5_76 @@ -830,7 +842,11 @@ main: # @main ld.d $s3, $a1, %pc_lo12(rrdpb+8) pcalau12i $a1, %pc_hi20(rrdpe+8) ld.d $s4, $a1, %pc_lo12(rrdpe+8) - movgr2fr.d $fs1, $zero + movgr2fr.d $fs0, $zero + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs1, $a1 pcalau12i $a1, %pc_hi20(.L.str.51) addi.d $s1, $a1, %pc_lo12(.L.str.51) move $s5, $zero @@ -843,12 +859,12 @@ main: # @main .LBB5_57: # %.lr.ph.i104 # =>This Loop Header: Depth=1 # Child Loop BB5_59 Depth 2 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 beq $s3, $s4, .LBB5_60 # %bb.58: # %.lr.ph.i.i107.preheader # in Loop: Header=BB5_57 Depth=1 move $a1, $s3 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB5_59: # %.lr.ph.i.i107 # Parent Loop BB5_57 Depth=1 @@ -861,24 +877,28 @@ main: # @main .LBB5_60: # %_ZN9benchmark10accumulateISt16reverse_iteratorIS1_IPdEEdEET0_T_S6_S5_.exit.i # in Loop: Header=BB5_57 Depth=1 fld.d $fa1, $s2, %pc_lo12(init_value) - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB5_56 # %bb.61: # in Loop: Header=BB5_57 Depth=1 - ld.w $a1, $s8, %pc_lo12(current_test) + ld.w $a1, $s7, %pc_lo12(current_test) move $a0, $s1 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $s7, %pc_lo12(iterations) + ld.w $a0, $s6, %pc_lo12(iterations) b .LBB5_56 .LBB5_62: # %_Z15test_accumulateISt16reverse_iteratorIS0_IPdEEdEvT_S4_T0_PKc.exit blez $a0, .LBB5_76 # %bb.63: # %.lr.ph.i113.preheader - movgr2fr.d $fs1, $zero + movgr2fr.d $fs0, $zero lu12i.w $a1, -4 ori $s4, $a1, 384 lu12i.w $a1, 3 ori $s3, $a1, 3712 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs1, $a1 pcalau12i $a1, %pc_hi20(.L.str.51) addi.d $s1, $a1, %pc_lo12(.L.str.51) move $s5, $zero @@ -892,7 +912,7 @@ main: # @main # =>This Loop Header: Depth=1 # Child Loop BB5_66 Depth 2 move $a1, $s4 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB5_66: # %.lr.ph.i.i117 # Parent Loop BB5_65 Depth=1 @@ -905,22 +925,26 @@ main: # @main # %bb.67: # %_ZN9benchmark10accumulateISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEET0_T_SC_SB_.exit.i # in Loop: Header=BB5_65 Depth=1 fld.d $fa1, $s2, %pc_lo12(init_value) - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB5_64 # %bb.68: # in Loop: Header=BB5_65 Depth=1 - ld.w $a1, $s8, %pc_lo12(current_test) + ld.w $a1, $s7, %pc_lo12(current_test) move $a0, $s1 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $s7, %pc_lo12(iterations) + ld.w $a0, $s6, %pc_lo12(iterations) b .LBB5_64 .LBB5_69: # %_Z15test_accumulateISt16reverse_iteratorIS0_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEvT_SA_T0_PKc.exit blez $a0, .LBB5_76 # %bb.70: # %.lr.ph.i123.preheader - movgr2fr.d $fs1, $zero + movgr2fr.d $fs0, $zero lu12i.w $a1, -4 ori $s4, $a1, 384 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs1, $a1 pcalau12i $a1, %pc_hi20(.L.str.51) addi.d $s1, $a1, %pc_lo12(.L.str.51) move $s5, $zero @@ -934,7 +958,7 @@ main: # @main # =>This Loop Header: Depth=1 # Child Loop BB5_73 Depth 2 move $a1, $s4 - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB5_73: # %.lr.ph.i.i127 # Parent Loop BB5_72 Depth=1 @@ -947,15 +971,15 @@ main: # @main # %bb.74: # %_ZN9benchmark10accumulateISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEET0_T_SC_SB_.exit.i132 # in Loop: Header=BB5_72 Depth=1 fld.d $fa1, $s2, %pc_lo12(init_value) - fmul.d $fa1, $fa1, $fs0 + fmul.d $fa1, $fa1, $fs1 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB5_71 # %bb.75: # in Loop: Header=BB5_72 Depth=1 - ld.w $a1, $s8, %pc_lo12(current_test) + ld.w $a1, $s7, %pc_lo12(current_test) move $a0, $s1 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.w $a0, $s7, %pc_lo12(iterations) + ld.w $a0, $s6, %pc_lo12(iterations) b .LBB5_71 .LBB5_76: # %_ZNKSt6vectorIdSaIdEE12_M_check_lenEmPKc.exit.i431 lu12i.w $a1, 67108 @@ -964,7 +988,7 @@ main: # @main srli.d $a1, $a0, 63 srai.d $a0, $a0, 38 add.d $a0, $a0, $a1 - st.w $a0, $s7, %pc_lo12(iterations) + st.w $a0, $s6, %pc_lo12(iterations) .Ltmp0: # EH_LABEL move $a0, $fp pcaddu18i $ra, %call36(_Znwm) @@ -976,8 +1000,9 @@ main: # @main move $a2, $fp pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 - pcalau12i $s6, %pc_hi20(dMpb) - ld.d $s2, $s6, %pc_lo12(dMpb) + pcalau12i $a0, %pc_hi20(dMpb) + st.d $a0, $sp, 128 # 8-byte Folded Spill + ld.d $s2, $a0, %pc_lo12(dMpb) pcalau12i $a0, %pc_hi20(dMpe) st.d $a0, $sp, 120 # 8-byte Folded Spill ld.d $s3, $a0, %pc_lo12(dMpe) @@ -994,7 +1019,8 @@ main: # @main move $s2, $a0 bne $a0, $s3, .LBB5_78 # %bb.79: # %_ZN9benchmark11fill_randomIPddEEvT_S2_.exit.loopexit - ld.d $s2, $s6, %pc_lo12(dMpb) + ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $s2, $a0, %pc_lo12(dMpb) ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $s3, $a0, %pc_lo12(dMpe) .LBB5_80: # %_ZN9benchmark11fill_randomIPddEEvT_S2_.exit @@ -1048,9 +1074,8 @@ main: # @main move $a1, $a3 bne $a2, $s3, .LBB5_86 .LBB5_87: # %_ZN9benchmark4copyIPdN9__gnu_cxx17__normal_iteratorIS1_St6vectorIdSaIdEEEEEEvT_S8_T0_.exit + ld.d $a2, $s8, %pc_lo12(dpb) ld.d $a0, $sp, 136 # 8-byte Folded Reload - ld.d $a2, $a0, %pc_lo12(dpb) - ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a3, $a0, %pc_lo12(dpe) .Ltmp3: # EH_LABEL pcalau12i $a0, %pc_hi20(.L.str.26) @@ -1076,21 +1101,21 @@ main: # @main jirl $ra, $ra, 0 .Ltmp6: # EH_LABEL # %bb.89: - st.d $s8, $sp, 160 # 8-byte Folded Spill - st.d $s7, $sp, 176 # 8-byte Folded Spill - ld.w $s3, $s7, %pc_lo12(iterations) + st.d $s8, $sp, 96 # 8-byte Folded Spill + st.d $s7, $sp, 160 # 8-byte Folded Spill + st.d $s6, $sp, 176 # 8-byte Folded Spill + ld.w $s3, $s6, %pc_lo12(iterations) pcalau12i $a0, %pc_hi20(rdMpb) pcalau12i $a1, %pc_hi20(rdMpe) - st.d $s6, $sp, 80 # 8-byte Folded Spill - st.d $a0, $sp, 64 # 8-byte Folded Spill - st.d $a1, $sp, 56 # 8-byte Folded Spill - blez $s3, .LBB5_153 + st.d $a0, $sp, 72 # 8-byte Folded Spill + st.d $a1, $sp, 64 # 8-byte Folded Spill + blez $s3, .LBB5_178 # %bb.90: # %.lr.ph.i148.preheader ld.d $s1, $a0, %pc_lo12(rdMpb) ld.d $s5, $a1, %pc_lo12(rdMpe) - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload ld.d $s6, $a0, %pc_lo12(rdpb) - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.d $s7, $a0, %pc_lo12(rdpe) addi.d $s8, $s6, -8 pcalau12i $a0, %pc_hi20(.L.str.52) @@ -1190,18 +1215,18 @@ main: # @main bcnez $fcc0, .LBB5_104 b .LBB5_91 .LBB5_106: # %_Z19test_insertion_sortISt16reverse_iteratorIPdEdEvT_S3_S3_S3_T0_PKc.exit - blez $s3, .LBB5_152 + blez $s3, .LBB5_178 # %bb.107: # %.lr.ph.i153.preheader lu12i.w $a0, 3 - ori $s7, $a0, 3704 - add.d $s1, $s0, $s7 + ori $a1, $a0, 3704 + st.d $a1, $sp, 16 # 8-byte Folded Spill + add.d $s1, $s0, $a1 ori $s8, $a0, 3696 ori $s5, $zero, 8 lu12i.w $a0, -4 ori $s6, $a0, 392 pcalau12i $a0, %pc_hi20(.L.str.52) - addi.d $a0, $a0, %pc_lo12(.L.str.52) - st.d $a0, $sp, 152 # 8-byte Folded Spill + addi.d $s7, $a0, %pc_lo12(.L.str.52) move $s4, $zero b .LBB5_110 .p2align 4, , 16 @@ -1209,7 +1234,7 @@ main: # @main # in Loop: Header=BB5_110 Depth=1 ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.w $a1, $a0, %pc_lo12(current_test) - ld.d $a0, $sp, 152 # 8-byte Folded Reload + move $a0, $s7 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 ld.d $a0, $sp, 176 # 8-byte Folded Reload @@ -1283,15 +1308,15 @@ main: # @main bcnez $fcc0, .LBB5_117 b .LBB5_108 .LBB5_119: # %_Z19test_insertion_sortISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEdEvT_S9_S9_S9_T0_PKc.exit - blez $s3, .LBB5_152 + blez $s3, .LBB5_178 # %bb.120: # %.lr.ph.i163.preheader - add.d $s1, $s0, $s7 + ld.d $a0, $sp, 16 # 8-byte Folded Reload + add.d $s1, $s0, $a0 ori $s5, $zero, 8 lu12i.w $a0, -4 ori $s6, $a0, 392 pcalau12i $a0, %pc_hi20(.L.str.52) - addi.d $a0, $a0, %pc_lo12(.L.str.52) - st.d $a0, $sp, 152 # 8-byte Folded Spill + addi.d $s7, $a0, %pc_lo12(.L.str.52) move $s4, $zero b .LBB5_123 .p2align 4, , 16 @@ -1299,7 +1324,7 @@ main: # @main # in Loop: Header=BB5_123 Depth=1 ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.w $a1, $a0, %pc_lo12(current_test) - ld.d $a0, $sp, 152 # 8-byte Folded Reload + move $a0, $s7 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 ld.d $a0, $sp, 176 # 8-byte Folded Reload @@ -1373,7 +1398,7 @@ main: # @main bcnez $fcc0, .LBB5_130 b .LBB5_121 .LBB5_132: # %_Z19test_insertion_sortISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEdEvT_S9_S9_S9_T0_PKc.exit188 - blez $s3, .LBB5_152 + blez $s3, .LBB5_178 # %bb.133: # %.lr.ph.i189.preheader pcalau12i $a0, %pc_hi20(rrdMpb+8) ld.d $a4, $a0, %pc_lo12(rrdMpb+8) @@ -1383,7 +1408,7 @@ main: # @main ld.d $s6, $a0, %pc_lo12(rrdMpe+8) pcalau12i $a0, %pc_hi20(rrdpe+8) ld.d $s4, $a0, %pc_lo12(rrdpe+8) - addi.d $a5, $s1, 8 + addi.d $s7, $s1, 8 sub.d $a0, $s1, $a4 sub.d $a1, $s6, $a4 addi.d $a1, $a1, -8 @@ -1393,45 +1418,41 @@ main: # @main sltui $a0, $a0, 32 or $a0, $a1, $a0 st.d $a0, $sp, 152 # 8-byte Folded Spill - st.d $a2, $sp, 48 # 8-byte Folded Spill + st.d $a2, $sp, 56 # 8-byte Folded Spill bstrpick.d $a0, $a2, 61, 2 slli.d $a1, $a0, 2 - st.d $a1, $sp, 112 # 8-byte Folded Spill + st.d $a1, $sp, 144 # 8-byte Folded Spill slli.d $a0, $a0, 5 add.d $a1, $s1, $a0 - st.d $a1, $sp, 40 # 8-byte Folded Spill + st.d $a1, $sp, 48 # 8-byte Folded Spill add.d $a0, $a4, $a0 - st.d $a0, $sp, 32 # 8-byte Folded Spill + st.d $a0, $sp, 40 # 8-byte Folded Spill addi.d $a0, $a4, 16 - st.d $a0, $sp, 24 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill addi.d $a0, $s1, 16 - st.d $a0, $sp, 16 # 8-byte Folded Spill + st.d $a0, $sp, 24 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(.L.str.52) addi.d $a0, $a0, %pc_lo12(.L.str.52) - st.d $a0, $sp, 72 # 8-byte Folded Spill + st.d $a0, $sp, 80 # 8-byte Folded Spill move $s5, $zero addi.w $s8, $zero, -8 - st.d $s7, $sp, 144 # 8-byte Folded Spill b .LBB5_136 .p2align 4, , 16 .LBB5_134: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIS1_IPdEEEEbT_S5_.exit.i.i # in Loop: Header=BB5_136 Depth=1 ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.w $a1, $a0, %pc_lo12(current_test) - ld.d $a0, $sp, 72 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload move $s3, $a4 - move $s7, $a5 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - move $a5, $s7 move $a4, $s3 ld.d $a0, $sp, 176 # 8-byte Folded Reload ld.w $s3, $a0, %pc_lo12(iterations) .LBB5_135: # %_Z13verify_sortedISt16reverse_iteratorIS0_IPdEEEvT_S4_.exit.i # in Loop: Header=BB5_136 Depth=1 addi.w $s5, $s5, 1 - ld.d $s7, $sp, 144 # 8-byte Folded Reload - bge $s5, $s3, .LBB5_196 + bge $s5, $s3, .LBB5_152 .LBB5_136: # %.lr.ph.i189 # =>This Loop Header: Depth=1 # Child Loop BB5_139 Depth 2 @@ -1448,9 +1469,9 @@ main: # @main bnez $a2, .LBB5_141 # %bb.138: # %vector.body986.preheader # in Loop: Header=BB5_136 Depth=1 - ld.d $a0, $sp, 112 # 8-byte Folded Reload - ld.d $a1, $sp, 16 # 8-byte Folded Reload - ld.d $a2, $sp, 24 # 8-byte Folded Reload + ld.d $a0, $sp, 144 # 8-byte Folded Reload + ld.d $a1, $sp, 24 # 8-byte Folded Reload + ld.d $a2, $sp, 32 # 8-byte Folded Reload .p2align 4, , 16 .LBB5_139: # %vector.body986 # Parent Loop BB5_136 Depth=1 @@ -1465,10 +1486,10 @@ main: # @main bnez $a0, .LBB5_139 # %bb.140: # %middle.block995 # in Loop: Header=BB5_136 Depth=1 - ld.d $a0, $sp, 40 # 8-byte Folded Reload - ld.d $a1, $sp, 32 # 8-byte Folded Reload - ld.d $a2, $sp, 48 # 8-byte Folded Reload - ld.d $a3, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a2, $sp, 56 # 8-byte Folded Reload + ld.d $a3, $sp, 144 # 8-byte Folded Reload beq $a2, $a3, .LBB5_142 .p2align 4, , 16 .LBB5_141: # %.lr.ph.i.i191 @@ -1483,11 +1504,11 @@ main: # @main bne $a2, $s6, .LBB5_141 .LBB5_142: # %_ZN9benchmark4copyISt16reverse_iteratorIS1_IPdEES4_EEvT_S5_T0_.exit.i # in Loop: Header=BB5_136 Depth=1 - beq $a5, $s4, .LBB5_149 + beq $s7, $s4, .LBB5_149 # %bb.143: # %.lr.ph.i2.i194.preheader # in Loop: Header=BB5_136 Depth=1 move $a0, $zero - move $a1, $a5 + move $a1, $s7 b .LBB5_145 .p2align 4, , 16 .LBB5_144: # %..critedge.i.i195_crit_edge @@ -1523,7 +1544,7 @@ main: # @main bne $a1, $s4, .LBB5_145 .LBB5_149: # %_ZN9benchmark13insertionSortISt16reverse_iteratorIS1_IPdEEdEEvT_S5_.exit.i.preheader # in Loop: Header=BB5_136 Depth=1 - move $a0, $a5 + move $a0, $s7 .p2align 4, , 16 .LBB5_150: # %_ZN9benchmark13insertionSortISt16reverse_iteratorIS1_IPdEEdEEvT_S5_.exit.i # Parent Loop BB5_136 Depth=1 @@ -1536,15 +1557,185 @@ main: # @main addi.d $a0, $a0, 8 bcnez $fcc0, .LBB5_150 b .LBB5_134 -.LBB5_152: - ld.d $s6, $sp, 80 # 8-byte Folded Reload -.LBB5_153: # %_Z19test_insertion_sortISt16reverse_iteratorIS0_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEvT_SA_SA_SA_T0_PKc.exit244 - ld.d $a0, $s6, %pc_lo12(dMpb) +.LBB5_152: # %_Z19test_insertion_sortISt16reverse_iteratorIS0_IPdEEdEvT_S4_S4_S4_T0_PKc.exit + blez $s3, .LBB5_178 +# %bb.153: # %.lr.ph.i199.preheader + lu12i.w $a0, -4 + ori $s1, $a0, 392 + pcalau12i $a0, %pc_hi20(.L.str.52) + addi.d $s4, $a0, %pc_lo12(.L.str.52) + move $s5, $zero + ld.d $s6, $sp, 16 # 8-byte Folded Reload + b .LBB5_156 + .p2align 4, , 16 +.LBB5_154: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEEbT_SB_.exit.i.i + # in Loop: Header=BB5_156 Depth=1 + ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.w $a1, $a0, %pc_lo12(current_test) + move $a0, $s4 + pcaddu18i $ra, %call36(printf) + jirl $ra, $ra, 0 + ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.w $s3, $a0, %pc_lo12(iterations) +.LBB5_155: # %_Z13verify_sortedISt16reverse_iteratorIS0_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEvT_SA_.exit.i + # in Loop: Header=BB5_156 Depth=1 + addi.w $s5, $s5, 1 + bge $s5, $s3, .LBB5_165 +.LBB5_156: # %.lr.ph.i199 + # =>This Loop Header: Depth=1 + # Child Loop BB5_158 Depth 2 + # Child Loop BB5_159 Depth 3 + # Child Loop BB5_163 Depth 2 + move $a0, $s0 + ld.d $a1, $sp, 168 # 8-byte Folded Reload + move $a2, $fp + pcaddu18i $ra, %call36(memcpy) + jirl $ra, $ra, 0 + move $a0, $zero + ori $a1, $zero, 8 + b .LBB5_158 + .p2align 4, , 16 +.LBB5_157: # %.lr.ph.i2.i207..critedge.i.i209_crit_edge + # in Loop: Header=BB5_158 Depth=2 + addi.d $a2, $a3, 8 + fst.d $fa0, $a2, 0 + addi.d $a1, $a1, 8 + addi.d $a0, $a0, 8 + beq $a1, $fp, .LBB5_162 +.LBB5_158: # %.lr.ph28.i.i + # Parent Loop BB5_156 Depth=1 + # => This Loop Header: Depth=2 + # Child Loop BB5_159 Depth 3 + fldx.d $fa0, $s0, $a1 + move $a2, $a0 + .p2align 4, , 16 +.LBB5_159: # %.lr.ph.i2.i207 + # Parent Loop BB5_156 Depth=1 + # Parent Loop BB5_158 Depth=2 + # => This Inner Loop Header: Depth=3 + fldx.d $fa1, $s0, $a2 + fcmp.cule.d $fcc0, $fa1, $fa0 + add.d $a3, $s0, $a2 + bcnez $fcc0, .LBB5_157 +# %bb.160: # in Loop: Header=BB5_159 Depth=3 + addi.d $a2, $a2, -8 + fst.d $fa1, $a3, 8 + bne $a2, $s8, .LBB5_159 +# %bb.161: # in Loop: Header=BB5_158 Depth=2 + move $a2, $s0 + fst.d $fa0, $a2, 0 + addi.d $a1, $a1, 8 + addi.d $a0, $a0, 8 + bne $a1, $fp, .LBB5_158 +.LBB5_162: # %_ZN9benchmark13insertionSortISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEEvT_SB_.exit.i.preheader + # in Loop: Header=BB5_156 Depth=1 + move $a0, $s1 + .p2align 4, , 16 +.LBB5_163: # %_ZN9benchmark13insertionSortISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEEvT_SB_.exit.i + # Parent Loop BB5_156 Depth=1 + # => This Inner Loop Header: Depth=2 + beqz $a0, .LBB5_155 +# %bb.164: # in Loop: Header=BB5_163 Depth=2 + add.d $a1, $s0, $a0 + fldx.d $fa0, $a1, $fp + fldx.d $fa1, $a1, $s6 + fcmp.cule.d $fcc0, $fa1, $fa0 + addi.d $a0, $a0, 8 + bcnez $fcc0, .LBB5_163 + b .LBB5_154 +.LBB5_165: # %_Z19test_insertion_sortISt16reverse_iteratorIS0_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEvT_SA_SA_SA_T0_PKc.exit + blez $s3, .LBB5_178 +# %bb.166: # %.lr.ph.i216.preheader + lu12i.w $a0, -4 + ori $s1, $a0, 392 + pcalau12i $a0, %pc_hi20(.L.str.52) + addi.d $s4, $a0, %pc_lo12(.L.str.52) + move $s5, $zero + b .LBB5_169 + .p2align 4, , 16 +.LBB5_167: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEEbT_SB_.exit.i.i240 + # in Loop: Header=BB5_169 Depth=1 + ld.d $a0, $sp, 160 # 8-byte Folded Reload + ld.w $a1, $a0, %pc_lo12(current_test) + move $a0, $s4 + pcaddu18i $ra, %call36(printf) + jirl $ra, $ra, 0 + ld.d $a0, $sp, 176 # 8-byte Folded Reload + ld.w $s3, $a0, %pc_lo12(iterations) +.LBB5_168: # %_Z13verify_sortedISt16reverse_iteratorIS0_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEvT_SA_.exit.i242 + # in Loop: Header=BB5_169 Depth=1 + addi.w $s5, $s5, 1 + bge $s5, $s3, .LBB5_178 +.LBB5_169: # %.lr.ph.i216 + # =>This Loop Header: Depth=1 + # Child Loop BB5_171 Depth 2 + # Child Loop BB5_172 Depth 3 + # Child Loop BB5_176 Depth 2 + move $a0, $s0 + ld.d $a1, $sp, 168 # 8-byte Folded Reload + move $a2, $fp + pcaddu18i $ra, %call36(memcpy) + jirl $ra, $ra, 0 + move $a0, $zero + ori $a1, $zero, 8 + b .LBB5_171 + .p2align 4, , 16 +.LBB5_170: # %.lr.ph.i2.i230..critedge.i.i232_crit_edge + # in Loop: Header=BB5_171 Depth=2 + addi.d $a2, $a3, 8 + fst.d $fa0, $a2, 0 + addi.d $a1, $a1, 8 + addi.d $a0, $a0, 8 + beq $a1, $fp, .LBB5_175 +.LBB5_171: # %.lr.ph28.i.i227 + # Parent Loop BB5_169 Depth=1 + # => This Loop Header: Depth=2 + # Child Loop BB5_172 Depth 3 + fldx.d $fa0, $s0, $a1 + move $a2, $a0 + .p2align 4, , 16 +.LBB5_172: # %.lr.ph.i2.i230 + # Parent Loop BB5_169 Depth=1 + # Parent Loop BB5_171 Depth=2 + # => This Inner Loop Header: Depth=3 + fldx.d $fa1, $s0, $a2 + fcmp.cule.d $fcc0, $fa1, $fa0 + add.d $a3, $s0, $a2 + bcnez $fcc0, .LBB5_170 +# %bb.173: # in Loop: Header=BB5_172 Depth=3 + addi.d $a2, $a2, -8 + fst.d $fa1, $a3, 8 + bne $a2, $s8, .LBB5_172 +# %bb.174: # in Loop: Header=BB5_171 Depth=2 + move $a2, $s0 + fst.d $fa0, $a2, 0 + addi.d $a1, $a1, 8 + addi.d $a0, $a0, 8 + bne $a1, $fp, .LBB5_171 +.LBB5_175: # %_ZN9benchmark13insertionSortISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEEvT_SB_.exit.i236.preheader + # in Loop: Header=BB5_169 Depth=1 + move $a0, $s1 + .p2align 4, , 16 +.LBB5_176: # %_ZN9benchmark13insertionSortISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEEvT_SB_.exit.i236 + # Parent Loop BB5_169 Depth=1 + # => This Inner Loop Header: Depth=2 + beqz $a0, .LBB5_168 +# %bb.177: # in Loop: Header=BB5_176 Depth=2 + add.d $a1, $s0, $a0 + fldx.d $fa0, $a1, $fp + fldx.d $fa1, $a1, $s6 + fcmp.cule.d $fcc0, $fa1, $fa0 + addi.d $a0, $a0, 8 + bcnez $fcc0, .LBB5_176 + b .LBB5_167 +.LBB5_178: # %_Z19test_insertion_sortISt16reverse_iteratorIS0_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEvT_SA_SA_SA_T0_PKc.exit244 + ld.d $a0, $sp, 128 # 8-byte Folded Reload + ld.d $a0, $a0, %pc_lo12(dMpb) ld.d $a1, $sp, 120 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(dMpe) - ld.d $a2, $sp, 136 # 8-byte Folded Reload + ld.d $a2, $sp, 96 # 8-byte Folded Reload ld.d $a2, $a2, %pc_lo12(dpb) - ld.d $a3, $sp, 128 # 8-byte Folded Reload + ld.d $a3, $sp, 136 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(dpe) slli.d $a4, $s3, 3 ld.d $s7, $sp, 176 # 8-byte Folded Reload @@ -1556,7 +1747,7 @@ main: # @main pcaddu18i $ra, %call36(_Z14test_quicksortIPddEvT_S1_S1_S1_T0_PKc) jirl $ra, $ra, 0 .Ltmp8: # EH_LABEL -# %bb.154: +# %bb.179: .Ltmp9: # EH_LABEL pcalau12i $a0, %pc_hi20(.L.str.35) addi.d $a4, $a0, %pc_lo12(.L.str.35) @@ -1568,58 +1759,58 @@ main: # @main pcaddu18i $ra, %call36(_Z14test_quicksortIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEdEvT_S7_S7_S7_T0_PKc) jirl $ra, $ra, 0 .Ltmp10: # EH_LABEL -# %bb.155: +# %bb.180: ld.w $a0, $s7, %pc_lo12(iterations) - blez $a0, .LBB5_238 -# %bb.156: # %.lr.ph.i245.preheader - ld.d $a0, $sp, 64 # 8-byte Folded Reload + blez $a0, .LBB5_229 +# %bb.181: # %.lr.ph.i245.preheader + ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.d $s1, $a0, %pc_lo12(rdMpb) - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload ld.d $s3, $a0, %pc_lo12(rdMpe) - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload ld.d $s5, $a0, %pc_lo12(rdpb) - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.d $s6, $a0, %pc_lo12(rdpe) addi.d $s7, $s5, -8 pcalau12i $a0, %pc_hi20(.L.str.52) addi.d $s4, $a0, %pc_lo12(.L.str.52) move $s8, $zero - b .LBB5_159 + b .LBB5_184 .p2align 4, , 16 -.LBB5_157: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIPdEEEbT_S4_.exit.i.i252 - # in Loop: Header=BB5_159 Depth=1 +.LBB5_182: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIPdEEEbT_S4_.exit.i.i252 + # in Loop: Header=BB5_184 Depth=1 ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.w $a1, $a0, %pc_lo12(current_test) move $a0, $s4 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 -.LBB5_158: # %_Z13verify_sortedISt16reverse_iteratorIPdEEvT_S3_.exit.i253 - # in Loop: Header=BB5_159 Depth=1 +.LBB5_183: # %_Z13verify_sortedISt16reverse_iteratorIPdEEvT_S3_.exit.i253 + # in Loop: Header=BB5_184 Depth=1 ld.d $a0, $sp, 176 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(iterations) addi.w $s8, $s8, 1 - bge $s8, $a0, .LBB5_166 -.LBB5_159: # %.lr.ph.i245 + bge $s8, $a0, .LBB5_191 +.LBB5_184: # %.lr.ph.i245 # =>This Loop Header: Depth=1 - # Child Loop BB5_161 Depth 2 - # Child Loop BB5_164 Depth 2 - beq $s1, $s3, .LBB5_162 -# %bb.160: # %.lr.ph.i.i247.preheader - # in Loop: Header=BB5_159 Depth=1 + # Child Loop BB5_186 Depth 2 + # Child Loop BB5_189 Depth 2 + beq $s1, $s3, .LBB5_187 +# %bb.185: # %.lr.ph.i.i247.preheader + # in Loop: Header=BB5_184 Depth=1 move $a0, $s7 move $a1, $s1 .p2align 4, , 16 -.LBB5_161: # %.lr.ph.i.i247 - # Parent Loop BB5_159 Depth=1 +.LBB5_186: # %.lr.ph.i.i247 + # Parent Loop BB5_184 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa0, $a1, -8 addi.d $a2, $a1, -8 fst.d $fa0, $a0, 0 addi.d $a0, $a0, -8 move $a1, $a2 - bne $a2, $s3, .LBB5_161 -.LBB5_162: # %_ZN9benchmark4copyISt16reverse_iteratorIPdES3_EEvT_S4_T0_.exit.i249 - # in Loop: Header=BB5_159 Depth=1 + bne $a2, $s3, .LBB5_186 +.LBB5_187: # %_ZN9benchmark4copyISt16reverse_iteratorIPdES3_EEvT_S4_T0_.exit.i249 + # in Loop: Header=BB5_184 Depth=1 st.d $s5, $sp, 216 st.d $s6, $sp, 200 .Ltmp11: # EH_LABEL @@ -1628,26 +1819,26 @@ main: # @main pcaddu18i $ra, %call36(_ZN9benchmark9quicksortISt16reverse_iteratorIPdEdEEvT_S4_) jirl $ra, $ra, 0 .Ltmp12: # EH_LABEL -# %bb.163: # %.noexc254.preheader - # in Loop: Header=BB5_159 Depth=1 +# %bb.188: # %.noexc254.preheader + # in Loop: Header=BB5_184 Depth=1 move $a0, $s7 .p2align 4, , 16 -.LBB5_164: # %.noexc254 - # Parent Loop BB5_159 Depth=1 +.LBB5_189: # %.noexc254 + # Parent Loop BB5_184 Depth=1 # => This Inner Loop Header: Depth=2 - beq $a0, $s6, .LBB5_158 -# %bb.165: # in Loop: Header=BB5_164 Depth=2 + beq $a0, $s6, .LBB5_183 +# %bb.190: # in Loop: Header=BB5_189 Depth=2 fld.d $fa0, $a0, -8 fld.d $fa1, $a0, 0 addi.d $a0, $a0, -8 fcmp.cule.d $fcc0, $fa1, $fa0 - bcnez $fcc0, .LBB5_164 - b .LBB5_157 -.LBB5_166: # %.loopexit734 + bcnez $fcc0, .LBB5_189 + b .LBB5_182 +.LBB5_191: # %.loopexit734 ld.d $s7, $sp, 176 # 8-byte Folded Reload ld.d $s8, $sp, 160 # 8-byte Folded Reload - blez $a0, .LBB5_238 -# %bb.167: # %.lr.ph.i255.preheader + blez $a0, .LBB5_229 +# %bb.192: # %.lr.ph.i255.preheader lu12i.w $a0, 3 ori $s3, $a0, 3704 add.d $s1, $s0, $s3 @@ -1656,22 +1847,22 @@ main: # @main pcalau12i $a0, %pc_hi20(.L.str.52) addi.d $s4, $a0, %pc_lo12(.L.str.52) move $s6, $zero - b .LBB5_170 + b .LBB5_195 .p2align 4, , 16 -.LBB5_168: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEbT_SA_.exit.i.i267 - # in Loop: Header=BB5_170 Depth=1 +.LBB5_193: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEbT_SA_.exit.i.i267 + # in Loop: Header=BB5_195 Depth=1 ld.w $a1, $s8, %pc_lo12(current_test) move $a0, $s4 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 -.LBB5_169: # %_Z13verify_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEvT_S9_.exit.i268 - # in Loop: Header=BB5_170 Depth=1 +.LBB5_194: # %_Z13verify_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEvT_S9_.exit.i268 + # in Loop: Header=BB5_195 Depth=1 ld.w $a0, $s7, %pc_lo12(iterations) addi.w $s6, $s6, 1 - bge $s6, $a0, .LBB5_174 -.LBB5_170: # %.lr.ph.i255 + bge $s6, $a0, .LBB5_199 +.LBB5_195: # %.lr.ph.i255 # =>This Loop Header: Depth=1 - # Child Loop BB5_172 Depth 2 + # Child Loop BB5_197 Depth 2 move $a0, $s0 ld.d $a1, $sp, 168 # 8-byte Folded Reload move $a2, $fp @@ -1685,49 +1876,49 @@ main: # @main pcaddu18i $ra, %call36(_ZN9benchmark9quicksortISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEdEEvT_SA_) jirl $ra, $ra, 0 .Ltmp15: # EH_LABEL -# %bb.171: # %.noexc269.preheader - # in Loop: Header=BB5_170 Depth=1 +# %bb.196: # %.noexc269.preheader + # in Loop: Header=BB5_195 Depth=1 move $a0, $s5 move $a1, $s1 move $a2, $s2 .p2align 4, , 16 -.LBB5_172: # %.noexc269 - # Parent Loop BB5_170 Depth=1 +.LBB5_197: # %.noexc269 + # Parent Loop BB5_195 Depth=1 # => This Inner Loop Header: Depth=2 - beqz $a0, .LBB5_169 -# %bb.173: # in Loop: Header=BB5_172 Depth=2 + beqz $a0, .LBB5_194 +# %bb.198: # in Loop: Header=BB5_197 Depth=2 fld.d $fa0, $a1, -8 fld.d $fa1, $a2, -8 addi.d $a1, $a1, -8 addi.d $a2, $a2, -8 fcmp.cule.d $fcc0, $fa1, $fa0 addi.d $a0, $a0, 8 - bcnez $fcc0, .LBB5_172 - b .LBB5_168 -.LBB5_174: - blez $a0, .LBB5_238 -# %bb.175: # %.lr.ph.i270.preheader + bcnez $fcc0, .LBB5_197 + b .LBB5_193 +.LBB5_199: + blez $a0, .LBB5_229 +# %bb.200: # %.lr.ph.i270.preheader lu12i.w $a0, -4 ori $s5, $a0, 392 pcalau12i $a0, %pc_hi20(.L.str.52) addi.d $s4, $a0, %pc_lo12(.L.str.52) move $s6, $zero - b .LBB5_178 + b .LBB5_203 .p2align 4, , 16 -.LBB5_176: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEbT_SA_.exit.i.i282 - # in Loop: Header=BB5_178 Depth=1 +.LBB5_201: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEbT_SA_.exit.i.i282 + # in Loop: Header=BB5_203 Depth=1 ld.w $a1, $s8, %pc_lo12(current_test) move $a0, $s4 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 -.LBB5_177: # %_Z13verify_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEvT_S9_.exit.i283 - # in Loop: Header=BB5_178 Depth=1 +.LBB5_202: # %_Z13verify_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEvT_S9_.exit.i283 + # in Loop: Header=BB5_203 Depth=1 ld.w $a0, $s7, %pc_lo12(iterations) addi.w $s6, $s6, 1 - bge $s6, $a0, .LBB5_182 -.LBB5_178: # %.lr.ph.i270 + bge $s6, $a0, .LBB5_207 +.LBB5_203: # %.lr.ph.i270 # =>This Loop Header: Depth=1 - # Child Loop BB5_180 Depth 2 + # Child Loop BB5_205 Depth 2 move $a0, $s0 ld.d $a1, $sp, 168 # 8-byte Folded Reload move $a2, $fp @@ -1741,28 +1932,28 @@ main: # @main pcaddu18i $ra, %call36(_ZN9benchmark9quicksortISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEdEEvT_SA_) jirl $ra, $ra, 0 .Ltmp18: # EH_LABEL -# %bb.179: # %.noexc284.preheader - # in Loop: Header=BB5_178 Depth=1 +# %bb.204: # %.noexc284.preheader + # in Loop: Header=BB5_203 Depth=1 move $a0, $s5 move $a1, $s1 move $a2, $s2 .p2align 4, , 16 -.LBB5_180: # %.noexc284 - # Parent Loop BB5_178 Depth=1 +.LBB5_205: # %.noexc284 + # Parent Loop BB5_203 Depth=1 # => This Inner Loop Header: Depth=2 - beqz $a0, .LBB5_177 -# %bb.181: # in Loop: Header=BB5_180 Depth=2 + beqz $a0, .LBB5_202 +# %bb.206: # in Loop: Header=BB5_205 Depth=2 fld.d $fa0, $a1, -8 fld.d $fa1, $a2, -8 addi.d $a1, $a1, -8 addi.d $a2, $a2, -8 fcmp.cule.d $fcc0, $fa1, $fa0 addi.d $a0, $a0, 8 - bcnez $fcc0, .LBB5_180 - b .LBB5_176 -.LBB5_182: # %.loopexit728 - blez $a0, .LBB5_238 -# %bb.183: # %.lr.ph.i286 + bcnez $fcc0, .LBB5_205 + b .LBB5_201 +.LBB5_207: # %.loopexit728 + blez $a0, .LBB5_229 +# %bb.208: # %.lr.ph.i286 pcalau12i $a0, %pc_hi20(rrdMpb+8) ld.d $s1, $a0, %pc_lo12(rrdMpb+8) pcalau12i $a0, %pc_hi20(rrdMpe+8) @@ -1780,7 +1971,7 @@ main: # @main sltui $a0, $a0, 32 or $a0, $a1, $a0 st.d $a0, $sp, 144 # 8-byte Folded Spill - st.d $a2, $sp, 72 # 8-byte Folded Spill + st.d $a2, $sp, 56 # 8-byte Folded Spill bstrpick.d $a0, $a2, 61, 2 slli.d $a1, $a0, 2 st.d $a1, $sp, 152 # 8-byte Folded Spill @@ -1796,42 +1987,42 @@ main: # @main addi.d $s8, $s5, 8 pcalau12i $a0, %pc_hi20(.L.str.52) addi.d $a0, $a0, %pc_lo12(.L.str.52) - st.d $a0, $sp, 112 # 8-byte Folded Spill + st.d $a0, $sp, 80 # 8-byte Folded Spill move $s4, $zero - b .LBB5_186 + b .LBB5_211 .p2align 4, , 16 -.LBB5_184: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIS1_IPdEEEEbT_S5_.exit.i.i295 - # in Loop: Header=BB5_186 Depth=1 +.LBB5_209: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIS1_IPdEEEEbT_S5_.exit.i.i295 + # in Loop: Header=BB5_211 Depth=1 ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.w $a1, $a0, %pc_lo12(current_test) - ld.d $a0, $sp, 112 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 -.LBB5_185: # %_Z13verify_sortedISt16reverse_iteratorIS0_IPdEEEvT_S4_.exit.i296 - # in Loop: Header=BB5_186 Depth=1 +.LBB5_210: # %_Z13verify_sortedISt16reverse_iteratorIS0_IPdEEEvT_S4_.exit.i296 + # in Loop: Header=BB5_211 Depth=1 ld.d $a0, $sp, 176 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(iterations) addi.w $s4, $s4, 1 - bge $s4, $a0, .LBB5_209 -.LBB5_186: # =>This Loop Header: Depth=1 - # Child Loop BB5_189 Depth 2 - # Child Loop BB5_191 Depth 2 - # Child Loop BB5_194 Depth 2 - beq $s1, $s6, .LBB5_192 -# %bb.187: # %.lr.ph.i.i289.preheader - # in Loop: Header=BB5_186 Depth=1 + bge $s4, $a0, .LBB5_221 +.LBB5_211: # =>This Loop Header: Depth=1 + # Child Loop BB5_214 Depth 2 + # Child Loop BB5_216 Depth 2 + # Child Loop BB5_219 Depth 2 + beq $s1, $s6, .LBB5_217 +# %bb.212: # %.lr.ph.i.i289.preheader + # in Loop: Header=BB5_211 Depth=1 move $a0, $s5 move $a1, $s1 ld.d $a2, $sp, 144 # 8-byte Folded Reload - bnez $a2, .LBB5_191 -# %bb.188: # %vector.body1006.preheader - # in Loop: Header=BB5_186 Depth=1 + bnez $a2, .LBB5_216 +# %bb.213: # %vector.body1006.preheader + # in Loop: Header=BB5_211 Depth=1 ld.d $a0, $sp, 152 # 8-byte Folded Reload ld.d $a1, $sp, 24 # 8-byte Folded Reload ld.d $a2, $sp, 32 # 8-byte Folded Reload .p2align 4, , 16 -.LBB5_189: # %vector.body1006 - # Parent Loop BB5_186 Depth=1 +.LBB5_214: # %vector.body1006 + # Parent Loop BB5_211 Depth=1 # => This Inner Loop Header: Depth=2 vld $vr0, $a2, -16 vld $vr1, $a2, 0 @@ -1840,17 +2031,17 @@ main: # @main addi.d $a2, $a2, 32 addi.d $a0, $a0, -4 addi.d $a1, $a1, 32 - bnez $a0, .LBB5_189 -# %bb.190: # %middle.block1015 - # in Loop: Header=BB5_186 Depth=1 + bnez $a0, .LBB5_214 +# %bb.215: # %middle.block1015 + # in Loop: Header=BB5_211 Depth=1 ld.d $a0, $sp, 48 # 8-byte Folded Reload ld.d $a1, $sp, 40 # 8-byte Folded Reload - ld.d $a2, $sp, 72 # 8-byte Folded Reload + ld.d $a2, $sp, 56 # 8-byte Folded Reload ld.d $a3, $sp, 152 # 8-byte Folded Reload - beq $a2, $a3, .LBB5_192 + beq $a2, $a3, .LBB5_217 .p2align 4, , 16 -.LBB5_191: # %.lr.ph.i.i289 - # Parent Loop BB5_186 Depth=1 +.LBB5_216: # %.lr.ph.i.i289 + # Parent Loop BB5_211 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa0, $a1, 0 addi.d $a2, $a1, 8 @@ -1858,9 +2049,9 @@ main: # @main fst.d $fa0, $a0, 0 move $a0, $a1 move $a1, $a2 - bne $a2, $s6, .LBB5_191 -.LBB5_192: # %_ZN9benchmark4copyISt16reverse_iteratorIS1_IPdEES4_EEvT_S5_T0_.exit.i292 - # in Loop: Header=BB5_186 Depth=1 + bne $a2, $s6, .LBB5_216 +.LBB5_217: # %_ZN9benchmark4copyISt16reverse_iteratorIS1_IPdEES4_EEvT_S5_T0_.exit.i292 + # in Loop: Header=BB5_211 Depth=1 st.d $s5, $sp, 224 st.d $s7, $sp, 208 .Ltmp20: # EH_LABEL @@ -1869,136 +2060,50 @@ main: # @main pcaddu18i $ra, %call36(_ZN9benchmark9quicksortISt16reverse_iteratorIS1_IPdEEdEEvT_S5_) jirl $ra, $ra, 0 .Ltmp21: # EH_LABEL -# %bb.193: # %.noexc297.preheader - # in Loop: Header=BB5_186 Depth=1 +# %bb.218: # %.noexc297.preheader + # in Loop: Header=BB5_211 Depth=1 move $a0, $s8 .p2align 4, , 16 -.LBB5_194: # %.noexc297 - # Parent Loop BB5_186 Depth=1 +.LBB5_219: # %.noexc297 + # Parent Loop BB5_211 Depth=1 # => This Inner Loop Header: Depth=2 - beq $a0, $s7, .LBB5_185 -# %bb.195: # in Loop: Header=BB5_194 Depth=2 + beq $a0, $s7, .LBB5_210 +# %bb.220: # in Loop: Header=BB5_219 Depth=2 fld.d $fa0, $a0, 0 fld.d $fa1, $a0, -8 fcmp.cule.d $fcc0, $fa1, $fa0 addi.d $a0, $a0, 8 - bcnez $fcc0, .LBB5_194 - b .LBB5_184 -.LBB5_196: # %_Z19test_insertion_sortISt16reverse_iteratorIS0_IPdEEdEvT_S4_S4_S4_T0_PKc.exit - ld.d $s6, $sp, 80 # 8-byte Folded Reload - blez $s3, .LBB5_153 -# %bb.197: # %.lr.ph.i199.preheader - lu12i.w $a0, -4 - ori $s1, $a0, 392 - pcalau12i $a0, %pc_hi20(.L.str.52) - addi.d $s4, $a0, %pc_lo12(.L.str.52) - move $s5, $zero - b .LBB5_200 - .p2align 4, , 16 -.LBB5_198: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEEbT_SB_.exit.i.i - # in Loop: Header=BB5_200 Depth=1 - ld.d $a0, $sp, 160 # 8-byte Folded Reload - ld.w $a1, $a0, %pc_lo12(current_test) - move $a0, $s4 - pcaddu18i $ra, %call36(printf) - jirl $ra, $ra, 0 - ld.d $a0, $sp, 176 # 8-byte Folded Reload - ld.w $s3, $a0, %pc_lo12(iterations) -.LBB5_199: # %_Z13verify_sortedISt16reverse_iteratorIS0_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEvT_SA_.exit.i - # in Loop: Header=BB5_200 Depth=1 - addi.w $s5, $s5, 1 - bge $s5, $s3, .LBB5_217 -.LBB5_200: # %.lr.ph.i199 - # =>This Loop Header: Depth=1 - # Child Loop BB5_202 Depth 2 - # Child Loop BB5_203 Depth 3 - # Child Loop BB5_207 Depth 2 - move $a0, $s0 - ld.d $a1, $sp, 168 # 8-byte Folded Reload - move $a2, $fp - pcaddu18i $ra, %call36(memcpy) - jirl $ra, $ra, 0 - move $a0, $zero - ori $a1, $zero, 8 - b .LBB5_202 - .p2align 4, , 16 -.LBB5_201: # %.lr.ph.i2.i207..critedge.i.i209_crit_edge - # in Loop: Header=BB5_202 Depth=2 - addi.d $a2, $a3, 8 - fst.d $fa0, $a2, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, 8 - beq $a1, $fp, .LBB5_206 -.LBB5_202: # %.lr.ph28.i.i - # Parent Loop BB5_200 Depth=1 - # => This Loop Header: Depth=2 - # Child Loop BB5_203 Depth 3 - fldx.d $fa0, $s0, $a1 - move $a2, $a0 - .p2align 4, , 16 -.LBB5_203: # %.lr.ph.i2.i207 - # Parent Loop BB5_200 Depth=1 - # Parent Loop BB5_202 Depth=2 - # => This Inner Loop Header: Depth=3 - fldx.d $fa1, $s0, $a2 - fcmp.cule.d $fcc0, $fa1, $fa0 - add.d $a3, $s0, $a2 - bcnez $fcc0, .LBB5_201 -# %bb.204: # in Loop: Header=BB5_203 Depth=3 - addi.d $a2, $a2, -8 - fst.d $fa1, $a3, 8 - bne $a2, $s8, .LBB5_203 -# %bb.205: # in Loop: Header=BB5_202 Depth=2 - move $a2, $s0 - fst.d $fa0, $a2, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, 8 - bne $a1, $fp, .LBB5_202 -.LBB5_206: # %_ZN9benchmark13insertionSortISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEEvT_SB_.exit.i.preheader - # in Loop: Header=BB5_200 Depth=1 - move $a0, $s1 - .p2align 4, , 16 -.LBB5_207: # %_ZN9benchmark13insertionSortISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEEvT_SB_.exit.i - # Parent Loop BB5_200 Depth=1 - # => This Inner Loop Header: Depth=2 - beqz $a0, .LBB5_199 -# %bb.208: # in Loop: Header=BB5_207 Depth=2 - add.d $a1, $s0, $a0 - fldx.d $fa0, $a1, $fp - fldx.d $fa1, $a1, $s7 - fcmp.cule.d $fcc0, $fa1, $fa0 - addi.d $a0, $a0, 8 - bcnez $fcc0, .LBB5_207 - b .LBB5_198 -.LBB5_209: + bcnez $fcc0, .LBB5_219 + b .LBB5_209 +.LBB5_221: ld.d $s7, $sp, 176 # 8-byte Folded Reload ld.d $s6, $sp, 160 # 8-byte Folded Reload - blez $a0, .LBB5_238 -# %bb.210: # %.lr.ph.i298 + blez $a0, .LBB5_229 +# %bb.222: # %.lr.ph.i298 lu12i.w $a0, -4 ori $s1, $a0, 392 pcalau12i $a0, %pc_hi20(.L.str.52) addi.d $s4, $a0, %pc_lo12(.L.str.52) move $s5, $zero - ld.d $s8, $sp, 168 # 8-byte Folded Reload - b .LBB5_213 + ld.d $s8, $sp, 96 # 8-byte Folded Reload + b .LBB5_225 .p2align 4, , 16 -.LBB5_211: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEEbT_SB_.exit.i.i310 - # in Loop: Header=BB5_213 Depth=1 +.LBB5_223: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEEbT_SB_.exit.i.i310 + # in Loop: Header=BB5_225 Depth=1 ld.w $a1, $s6, %pc_lo12(current_test) move $a0, $s4 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 -.LBB5_212: # %_Z13verify_sortedISt16reverse_iteratorIS0_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEvT_SA_.exit.i311 - # in Loop: Header=BB5_213 Depth=1 +.LBB5_224: # %_Z13verify_sortedISt16reverse_iteratorIS0_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEvT_SA_.exit.i311 + # in Loop: Header=BB5_225 Depth=1 ld.w $a0, $s7, %pc_lo12(iterations) addi.w $s5, $s5, 1 - bge $s5, $a0, .LBB5_230 -.LBB5_213: # %_ZN9benchmark4copyISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEESA_EEvT_SB_T0_.exit.i306 + bge $s5, $a0, .LBB5_283 +.LBB5_225: # %_ZN9benchmark4copyISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEESA_EEvT_SB_T0_.exit.i306 # =>This Loop Header: Depth=1 - # Child Loop BB5_215 Depth 2 + # Child Loop BB5_227 Depth 2 move $a0, $s0 - move $a1, $s8 + ld.d $a1, $sp, 168 # 8-byte Folded Reload move $a2, $fp pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 @@ -2010,168 +2115,31 @@ main: # @main pcaddu18i $ra, %call36(_ZN9benchmark9quicksortISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEEvT_SB_) jirl $ra, $ra, 0 .Ltmp24: # EH_LABEL -# %bb.214: # %.noexc312.preheader - # in Loop: Header=BB5_213 Depth=1 +# %bb.226: # %.noexc312.preheader + # in Loop: Header=BB5_225 Depth=1 move $a0, $s1 .p2align 4, , 16 -.LBB5_215: # %.noexc312 - # Parent Loop BB5_213 Depth=1 +.LBB5_227: # %.noexc312 + # Parent Loop BB5_225 Depth=1 # => This Inner Loop Header: Depth=2 - beqz $a0, .LBB5_212 -# %bb.216: # in Loop: Header=BB5_215 Depth=2 + beqz $a0, .LBB5_224 +# %bb.228: # in Loop: Header=BB5_227 Depth=2 add.d $a1, $s0, $a0 fldx.d $fa0, $a1, $fp fldx.d $fa1, $a1, $s3 fcmp.cule.d $fcc0, $fa1, $fa0 addi.d $a0, $a0, 8 - bcnez $fcc0, .LBB5_215 - b .LBB5_211 -.LBB5_217: # %_Z19test_insertion_sortISt16reverse_iteratorIS0_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEvT_SA_SA_SA_T0_PKc.exit - blez $s3, .LBB5_153 -# %bb.218: # %.lr.ph.i216.preheader - lu12i.w $a0, -4 - ori $s1, $a0, 392 - pcalau12i $a0, %pc_hi20(.L.str.52) - addi.d $s4, $a0, %pc_lo12(.L.str.52) - move $s5, $zero - b .LBB5_221 - .p2align 4, , 16 -.LBB5_219: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEEbT_SB_.exit.i.i240 - # in Loop: Header=BB5_221 Depth=1 - ld.d $a0, $sp, 160 # 8-byte Folded Reload - ld.w $a1, $a0, %pc_lo12(current_test) - move $a0, $s4 - pcaddu18i $ra, %call36(printf) - jirl $ra, $ra, 0 - ld.d $a0, $sp, 176 # 8-byte Folded Reload - ld.w $s3, $a0, %pc_lo12(iterations) -.LBB5_220: # %_Z13verify_sortedISt16reverse_iteratorIS0_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEvT_SA_.exit.i242 - # in Loop: Header=BB5_221 Depth=1 - addi.w $s5, $s5, 1 - bge $s5, $s3, .LBB5_153 -.LBB5_221: # %.lr.ph.i216 - # =>This Loop Header: Depth=1 - # Child Loop BB5_223 Depth 2 - # Child Loop BB5_224 Depth 3 - # Child Loop BB5_228 Depth 2 - move $a0, $s0 - ld.d $a1, $sp, 168 # 8-byte Folded Reload - move $a2, $fp - pcaddu18i $ra, %call36(memcpy) - jirl $ra, $ra, 0 - move $a0, $zero - ori $a1, $zero, 8 + bcnez $fcc0, .LBB5_227 b .LBB5_223 - .p2align 4, , 16 -.LBB5_222: # %.lr.ph.i2.i230..critedge.i.i232_crit_edge - # in Loop: Header=BB5_223 Depth=2 - addi.d $a2, $a3, 8 - fst.d $fa0, $a2, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, 8 - beq $a1, $fp, .LBB5_227 -.LBB5_223: # %.lr.ph28.i.i227 - # Parent Loop BB5_221 Depth=1 - # => This Loop Header: Depth=2 - # Child Loop BB5_224 Depth 3 - fldx.d $fa0, $s0, $a1 - move $a2, $a0 - .p2align 4, , 16 -.LBB5_224: # %.lr.ph.i2.i230 - # Parent Loop BB5_221 Depth=1 - # Parent Loop BB5_223 Depth=2 - # => This Inner Loop Header: Depth=3 - fldx.d $fa1, $s0, $a2 - fcmp.cule.d $fcc0, $fa1, $fa0 - add.d $a3, $s0, $a2 - bcnez $fcc0, .LBB5_222 -# %bb.225: # in Loop: Header=BB5_224 Depth=3 - addi.d $a2, $a2, -8 - fst.d $fa1, $a3, 8 - bne $a2, $s8, .LBB5_224 -# %bb.226: # in Loop: Header=BB5_223 Depth=2 - move $a2, $s0 - fst.d $fa0, $a2, 0 - addi.d $a1, $a1, 8 - addi.d $a0, $a0, 8 - bne $a1, $fp, .LBB5_223 -.LBB5_227: # %_ZN9benchmark13insertionSortISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEEvT_SB_.exit.i236.preheader - # in Loop: Header=BB5_221 Depth=1 - move $a0, $s1 - .p2align 4, , 16 -.LBB5_228: # %_ZN9benchmark13insertionSortISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEEvT_SB_.exit.i236 - # Parent Loop BB5_221 Depth=1 - # => This Inner Loop Header: Depth=2 - beqz $a0, .LBB5_220 -# %bb.229: # in Loop: Header=BB5_228 Depth=2 - add.d $a1, $s0, $a0 - fldx.d $fa0, $a1, $fp - fldx.d $fa1, $a1, $s7 - fcmp.cule.d $fcc0, $fa1, $fa0 - addi.d $a0, $a0, 8 - bcnez $fcc0, .LBB5_228 - b .LBB5_219 -.LBB5_230: # %.loopexit722 - blez $a0, .LBB5_238 -# %bb.231: # %.lr.ph.i313 - lu12i.w $a0, -4 - ori $s1, $a0, 392 - pcalau12i $a0, %pc_hi20(.L.str.52) - addi.d $s4, $a0, %pc_lo12(.L.str.52) - move $s5, $zero - b .LBB5_234 - .p2align 4, , 16 -.LBB5_232: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEEbT_SB_.exit.i.i325 - # in Loop: Header=BB5_234 Depth=1 - ld.w $a1, $s6, %pc_lo12(current_test) - move $a0, $s4 - pcaddu18i $ra, %call36(printf) - jirl $ra, $ra, 0 -.LBB5_233: # %_Z13verify_sortedISt16reverse_iteratorIS0_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEvT_SA_.exit.i326 - # in Loop: Header=BB5_234 Depth=1 - ld.w $a0, $s7, %pc_lo12(iterations) - addi.w $s5, $s5, 1 - bge $s5, $a0, .LBB5_238 -.LBB5_234: # %_ZN9benchmark4copyISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEESA_EEvT_SB_T0_.exit.i321 - # =>This Loop Header: Depth=1 - # Child Loop BB5_236 Depth 2 - move $a0, $s0 - move $a1, $s8 - move $a2, $fp - pcaddu18i $ra, %call36(memcpy) - jirl $ra, $ra, 0 - st.d $s0, $sp, 224 - st.d $s2, $sp, 208 -.Ltmp26: # EH_LABEL - addi.d $a0, $sp, 216 - addi.d $a1, $sp, 200 - pcaddu18i $ra, %call36(_ZN9benchmark9quicksortISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEEvT_SB_) - jirl $ra, $ra, 0 -.Ltmp27: # EH_LABEL -# %bb.235: # %.noexc327.preheader - # in Loop: Header=BB5_234 Depth=1 - move $a0, $s1 - .p2align 4, , 16 -.LBB5_236: # %.noexc327 - # Parent Loop BB5_234 Depth=1 - # => This Inner Loop Header: Depth=2 - beqz $a0, .LBB5_233 -# %bb.237: # in Loop: Header=BB5_236 Depth=2 - add.d $a1, $s0, $a0 - fldx.d $fa0, $a1, $fp - fldx.d $fa1, $a1, $s3 - fcmp.cule.d $fcc0, $fa1, $fa0 - addi.d $a0, $a0, 8 - bcnez $fcc0, .LBB5_236 - b .LBB5_232 -.LBB5_238: # %.loopexit718 - ld.d $a0, $sp, 80 # 8-byte Folded Reload +.LBB5_229: # %.loopexit722.thread + ld.d $s8, $sp, 96 # 8-byte Folded Reload +.LBB5_230: # %.loopexit718 + ld.d $a0, $sp, 128 # 8-byte Folded Reload ld.d $a0, $a0, %pc_lo12(dMpb) ld.d $a1, $sp, 120 # 8-byte Folded Reload ld.d $a1, $a1, %pc_lo12(dMpe) - ld.d $a2, $sp, 136 # 8-byte Folded Reload - ld.d $a2, $a2, %pc_lo12(dpb) - ld.d $a3, $sp, 128 # 8-byte Folded Reload + ld.d $a2, $s8, %pc_lo12(dpb) + ld.d $a3, $sp, 136 # 8-byte Folded Reload ld.d $a3, $a3, %pc_lo12(dpe) .Ltmp29: # EH_LABEL pcalau12i $a4, %pc_hi20(.L.str.42) @@ -2180,7 +2148,7 @@ main: # @main pcaddu18i $ra, %call36(_Z14test_heap_sortIPddEvT_S1_S1_S1_T0_PKc) jirl $ra, $ra, 0 .Ltmp30: # EH_LABEL -# %bb.239: +# %bb.231: .Ltmp31: # EH_LABEL pcalau12i $a0, %pc_hi20(.L.str.43) addi.d $a4, $a0, %pc_lo12(.L.str.43) @@ -2192,58 +2160,58 @@ main: # @main pcaddu18i $ra, %call36(_Z14test_heap_sortIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEdEvT_S7_S7_S7_T0_PKc) jirl $ra, $ra, 0 .Ltmp32: # EH_LABEL -# %bb.240: +# %bb.232: ld.w $a0, $s7, %pc_lo12(iterations) - blez $a0, .LBB5_289 -# %bb.241: # %.lr.ph.i329.preheader - ld.d $a0, $sp, 64 # 8-byte Folded Reload + blez $a0, .LBB5_281 +# %bb.233: # %.lr.ph.i329.preheader + ld.d $a0, $sp, 72 # 8-byte Folded Reload ld.d $s1, $a0, %pc_lo12(rdMpb) - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $a0, $sp, 64 # 8-byte Folded Reload ld.d $s4, $a0, %pc_lo12(rdMpe) - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 112 # 8-byte Folded Reload ld.d $s5, $a0, %pc_lo12(rdpb) - ld.d $a0, $sp, 96 # 8-byte Folded Reload + ld.d $a0, $sp, 104 # 8-byte Folded Reload ld.d $s6, $a0, %pc_lo12(rdpe) addi.d $s7, $s5, -8 pcalau12i $a0, %pc_hi20(.L.str.52) addi.d $s3, $a0, %pc_lo12(.L.str.52) move $s8, $zero - b .LBB5_244 + b .LBB5_236 .p2align 4, , 16 -.LBB5_242: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIPdEEEbT_S4_.exit.i.i338 - # in Loop: Header=BB5_244 Depth=1 +.LBB5_234: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIPdEEEbT_S4_.exit.i.i338 + # in Loop: Header=BB5_236 Depth=1 ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.w $a1, $a0, %pc_lo12(current_test) move $a0, $s3 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 -.LBB5_243: # %_Z13verify_sortedISt16reverse_iteratorIPdEEvT_S3_.exit.i339 - # in Loop: Header=BB5_244 Depth=1 +.LBB5_235: # %_Z13verify_sortedISt16reverse_iteratorIPdEEvT_S3_.exit.i339 + # in Loop: Header=BB5_236 Depth=1 ld.d $a0, $sp, 176 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(iterations) addi.w $s8, $s8, 1 - bge $s8, $a0, .LBB5_251 -.LBB5_244: # %.lr.ph.i329 + bge $s8, $a0, .LBB5_243 +.LBB5_236: # %.lr.ph.i329 # =>This Loop Header: Depth=1 - # Child Loop BB5_246 Depth 2 - # Child Loop BB5_249 Depth 2 - beq $s1, $s4, .LBB5_247 -# %bb.245: # %.lr.ph.i.i332.preheader - # in Loop: Header=BB5_244 Depth=1 + # Child Loop BB5_238 Depth 2 + # Child Loop BB5_241 Depth 2 + beq $s1, $s4, .LBB5_239 +# %bb.237: # %.lr.ph.i.i332.preheader + # in Loop: Header=BB5_236 Depth=1 move $a0, $s7 move $a1, $s1 .p2align 4, , 16 -.LBB5_246: # %.lr.ph.i.i332 - # Parent Loop BB5_244 Depth=1 +.LBB5_238: # %.lr.ph.i.i332 + # Parent Loop BB5_236 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa0, $a1, -8 addi.d $a2, $a1, -8 fst.d $fa0, $a0, 0 addi.d $a0, $a0, -8 move $a1, $a2 - bne $a2, $s4, .LBB5_246 -.LBB5_247: # %_ZN9benchmark4copyISt16reverse_iteratorIPdES3_EEvT_S4_T0_.exit.i335 - # in Loop: Header=BB5_244 Depth=1 + bne $a2, $s4, .LBB5_238 +.LBB5_239: # %_ZN9benchmark4copyISt16reverse_iteratorIPdES3_EEvT_S4_T0_.exit.i335 + # in Loop: Header=BB5_236 Depth=1 st.d $s5, $sp, 216 st.d $s6, $sp, 200 .Ltmp34: # EH_LABEL @@ -2252,24 +2220,24 @@ main: # @main pcaddu18i $ra, %call36(_ZN9benchmark8heapsortISt16reverse_iteratorIPdEdEEvT_S4_) jirl $ra, $ra, 0 .Ltmp35: # EH_LABEL -# %bb.248: # %.noexc340.preheader - # in Loop: Header=BB5_244 Depth=1 +# %bb.240: # %.noexc340.preheader + # in Loop: Header=BB5_236 Depth=1 move $a0, $s7 .p2align 4, , 16 -.LBB5_249: # %.noexc340 - # Parent Loop BB5_244 Depth=1 +.LBB5_241: # %.noexc340 + # Parent Loop BB5_236 Depth=1 # => This Inner Loop Header: Depth=2 - beq $a0, $s6, .LBB5_243 -# %bb.250: # in Loop: Header=BB5_249 Depth=2 + beq $a0, $s6, .LBB5_235 +# %bb.242: # in Loop: Header=BB5_241 Depth=2 fld.d $fa0, $a0, -8 fld.d $fa1, $a0, 0 addi.d $a0, $a0, -8 fcmp.cule.d $fcc0, $fa1, $fa0 - bcnez $fcc0, .LBB5_249 - b .LBB5_242 -.LBB5_251: # %.loopexit715 - blez $a0, .LBB5_289 -# %bb.252: # %.lr.ph.i341.preheader + bcnez $fcc0, .LBB5_241 + b .LBB5_234 +.LBB5_243: # %.loopexit715 + blez $a0, .LBB5_281 +# %bb.244: # %.lr.ph.i341.preheader lu12i.w $a0, 3 ori $s4, $a0, 3704 add.d $s1, $s0, $s4 @@ -2280,22 +2248,22 @@ main: # @main move $s6, $zero ld.d $s7, $sp, 176 # 8-byte Folded Reload ld.d $s8, $sp, 160 # 8-byte Folded Reload - b .LBB5_255 + b .LBB5_247 .p2align 4, , 16 -.LBB5_253: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEbT_SA_.exit.i.i353 - # in Loop: Header=BB5_255 Depth=1 +.LBB5_245: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEbT_SA_.exit.i.i353 + # in Loop: Header=BB5_247 Depth=1 ld.w $a1, $s8, %pc_lo12(current_test) move $a0, $s3 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 -.LBB5_254: # %_Z13verify_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEvT_S9_.exit.i354 - # in Loop: Header=BB5_255 Depth=1 +.LBB5_246: # %_Z13verify_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEvT_S9_.exit.i354 + # in Loop: Header=BB5_247 Depth=1 ld.w $a0, $s7, %pc_lo12(iterations) addi.w $s6, $s6, 1 - bge $s6, $a0, .LBB5_259 -.LBB5_255: # %.lr.ph.i341 + bge $s6, $a0, .LBB5_251 +.LBB5_247: # %.lr.ph.i341 # =>This Loop Header: Depth=1 - # Child Loop BB5_257 Depth 2 + # Child Loop BB5_249 Depth 2 move $a0, $s0 ld.d $a1, $sp, 168 # 8-byte Folded Reload move $a2, $fp @@ -2309,49 +2277,49 @@ main: # @main pcaddu18i $ra, %call36(_ZN9benchmark8heapsortISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEdEEvT_SA_) jirl $ra, $ra, 0 .Ltmp38: # EH_LABEL -# %bb.256: # %.noexc355.preheader - # in Loop: Header=BB5_255 Depth=1 +# %bb.248: # %.noexc355.preheader + # in Loop: Header=BB5_247 Depth=1 move $a0, $s5 move $a1, $s1 move $a2, $s2 .p2align 4, , 16 -.LBB5_257: # %.noexc355 - # Parent Loop BB5_255 Depth=1 +.LBB5_249: # %.noexc355 + # Parent Loop BB5_247 Depth=1 # => This Inner Loop Header: Depth=2 - beqz $a0, .LBB5_254 -# %bb.258: # in Loop: Header=BB5_257 Depth=2 + beqz $a0, .LBB5_246 +# %bb.250: # in Loop: Header=BB5_249 Depth=2 fld.d $fa0, $a1, -8 fld.d $fa1, $a2, -8 addi.d $a1, $a1, -8 addi.d $a2, $a2, -8 fcmp.cule.d $fcc0, $fa1, $fa0 addi.d $a0, $a0, 8 - bcnez $fcc0, .LBB5_257 - b .LBB5_253 -.LBB5_259: - blez $a0, .LBB5_289 -# %bb.260: # %.lr.ph.i356.preheader + bcnez $fcc0, .LBB5_249 + b .LBB5_245 +.LBB5_251: + blez $a0, .LBB5_281 +# %bb.252: # %.lr.ph.i356.preheader lu12i.w $a0, -4 ori $s5, $a0, 392 pcalau12i $a0, %pc_hi20(.L.str.52) addi.d $s3, $a0, %pc_lo12(.L.str.52) move $s6, $zero - b .LBB5_263 + b .LBB5_255 .p2align 4, , 16 -.LBB5_261: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEbT_SA_.exit.i.i368 - # in Loop: Header=BB5_263 Depth=1 +.LBB5_253: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEbT_SA_.exit.i.i368 + # in Loop: Header=BB5_255 Depth=1 ld.w $a1, $s8, %pc_lo12(current_test) move $a0, $s3 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 -.LBB5_262: # %_Z13verify_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEvT_S9_.exit.i369 - # in Loop: Header=BB5_263 Depth=1 +.LBB5_254: # %_Z13verify_sortedISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEvT_S9_.exit.i369 + # in Loop: Header=BB5_255 Depth=1 ld.w $a0, $s7, %pc_lo12(iterations) addi.w $s6, $s6, 1 - bge $s6, $a0, .LBB5_267 -.LBB5_263: # %.lr.ph.i356 + bge $s6, $a0, .LBB5_259 +.LBB5_255: # %.lr.ph.i356 # =>This Loop Header: Depth=1 - # Child Loop BB5_265 Depth 2 + # Child Loop BB5_257 Depth 2 move $a0, $s0 ld.d $a1, $sp, 168 # 8-byte Folded Reload move $a2, $fp @@ -2365,28 +2333,28 @@ main: # @main pcaddu18i $ra, %call36(_ZN9benchmark8heapsortISt16reverse_iteratorIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEdEEvT_SA_) jirl $ra, $ra, 0 .Ltmp41: # EH_LABEL -# %bb.264: # %.noexc370.preheader - # in Loop: Header=BB5_263 Depth=1 +# %bb.256: # %.noexc370.preheader + # in Loop: Header=BB5_255 Depth=1 move $a0, $s5 move $a1, $s1 move $a2, $s2 .p2align 4, , 16 -.LBB5_265: # %.noexc370 - # Parent Loop BB5_263 Depth=1 +.LBB5_257: # %.noexc370 + # Parent Loop BB5_255 Depth=1 # => This Inner Loop Header: Depth=2 - beqz $a0, .LBB5_262 -# %bb.266: # in Loop: Header=BB5_265 Depth=2 + beqz $a0, .LBB5_254 +# %bb.258: # in Loop: Header=BB5_257 Depth=2 fld.d $fa0, $a1, -8 fld.d $fa1, $a2, -8 addi.d $a1, $a1, -8 addi.d $a2, $a2, -8 fcmp.cule.d $fcc0, $fa1, $fa0 addi.d $a0, $a0, 8 - bcnez $fcc0, .LBB5_265 - b .LBB5_261 -.LBB5_267: # %.loopexit709 - blez $a0, .LBB5_289 -# %bb.268: # %.lr.ph.i372 + bcnez $fcc0, .LBB5_257 + b .LBB5_253 +.LBB5_259: # %.loopexit709 + blez $a0, .LBB5_281 +# %bb.260: # %.lr.ph.i372 pcalau12i $a0, %pc_hi20(rrdMpb+8) ld.d $s1, $a0, %pc_lo12(rrdMpb+8) pcalau12i $a0, %pc_hi20(rrdMpe+8) @@ -2422,40 +2390,40 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.L.str.52) st.d $a0, $sp, 136 # 8-byte Folded Spill move $s3, $zero - b .LBB5_271 + b .LBB5_263 .p2align 4, , 16 -.LBB5_269: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIS1_IPdEEEEbT_S5_.exit.i.i381 - # in Loop: Header=BB5_271 Depth=1 +.LBB5_261: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIS1_IPdEEEEbT_S5_.exit.i.i381 + # in Loop: Header=BB5_263 Depth=1 ld.d $a0, $sp, 160 # 8-byte Folded Reload ld.w $a1, $a0, %pc_lo12(current_test) ld.d $a0, $sp, 136 # 8-byte Folded Reload pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 -.LBB5_270: # %_Z13verify_sortedISt16reverse_iteratorIS0_IPdEEEvT_S4_.exit.i382 - # in Loop: Header=BB5_271 Depth=1 +.LBB5_262: # %_Z13verify_sortedISt16reverse_iteratorIS0_IPdEEEvT_S4_.exit.i382 + # in Loop: Header=BB5_263 Depth=1 ld.d $a0, $sp, 176 # 8-byte Folded Reload ld.w $a0, $a0, %pc_lo12(iterations) addi.w $s3, $s3, 1 - bge $s3, $a0, .LBB5_281 -.LBB5_271: # =>This Loop Header: Depth=1 - # Child Loop BB5_274 Depth 2 - # Child Loop BB5_276 Depth 2 - # Child Loop BB5_279 Depth 2 - beq $s1, $s6, .LBB5_277 -# %bb.272: # %.lr.ph.i.i375.preheader - # in Loop: Header=BB5_271 Depth=1 + bge $s3, $a0, .LBB5_273 +.LBB5_263: # =>This Loop Header: Depth=1 + # Child Loop BB5_266 Depth 2 + # Child Loop BB5_268 Depth 2 + # Child Loop BB5_271 Depth 2 + beq $s1, $s6, .LBB5_269 +# %bb.264: # %.lr.ph.i.i375.preheader + # in Loop: Header=BB5_263 Depth=1 move $a0, $s5 move $a1, $s1 ld.d $a2, $sp, 152 # 8-byte Folded Reload - bnez $a2, .LBB5_276 -# %bb.273: # %vector.body1026.preheader - # in Loop: Header=BB5_271 Depth=1 + bnez $a2, .LBB5_268 +# %bb.265: # %vector.body1026.preheader + # in Loop: Header=BB5_263 Depth=1 ld.d $a0, $sp, 144 # 8-byte Folded Reload ld.d $a1, $sp, 96 # 8-byte Folded Reload ld.d $a2, $sp, 104 # 8-byte Folded Reload .p2align 4, , 16 -.LBB5_274: # %vector.body1026 - # Parent Loop BB5_271 Depth=1 +.LBB5_266: # %vector.body1026 + # Parent Loop BB5_263 Depth=1 # => This Inner Loop Header: Depth=2 vld $vr0, $a2, -16 vld $vr1, $a2, 0 @@ -2464,17 +2432,17 @@ main: # @main addi.d $a2, $a2, 32 addi.d $a0, $a0, -4 addi.d $a1, $a1, 32 - bnez $a0, .LBB5_274 -# %bb.275: # %middle.block1035 - # in Loop: Header=BB5_271 Depth=1 + bnez $a0, .LBB5_266 +# %bb.267: # %middle.block1035 + # in Loop: Header=BB5_263 Depth=1 ld.d $a0, $sp, 120 # 8-byte Folded Reload ld.d $a1, $sp, 112 # 8-byte Folded Reload ld.d $a2, $sp, 128 # 8-byte Folded Reload ld.d $a3, $sp, 144 # 8-byte Folded Reload - beq $a2, $a3, .LBB5_277 + beq $a2, $a3, .LBB5_269 .p2align 4, , 16 -.LBB5_276: # %.lr.ph.i.i375 - # Parent Loop BB5_271 Depth=1 +.LBB5_268: # %.lr.ph.i.i375 + # Parent Loop BB5_263 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa0, $a1, 0 addi.d $a2, $a1, 8 @@ -2482,9 +2450,9 @@ main: # @main fst.d $fa0, $a0, 0 move $a0, $a1 move $a1, $a2 - bne $a2, $s6, .LBB5_276 -.LBB5_277: # %_ZN9benchmark4copyISt16reverse_iteratorIS1_IPdEES4_EEvT_S5_T0_.exit.i378 - # in Loop: Header=BB5_271 Depth=1 + bne $a2, $s6, .LBB5_268 +.LBB5_269: # %_ZN9benchmark4copyISt16reverse_iteratorIS1_IPdEES4_EEvT_S5_T0_.exit.i378 + # in Loop: Header=BB5_263 Depth=1 st.d $s5, $sp, 224 st.d $s7, $sp, 208 .Ltmp43: # EH_LABEL @@ -2493,25 +2461,25 @@ main: # @main pcaddu18i $ra, %call36(_ZN9benchmark8heapsortISt16reverse_iteratorIS1_IPdEEdEEvT_S5_) jirl $ra, $ra, 0 .Ltmp44: # EH_LABEL -# %bb.278: # %.noexc383.preheader - # in Loop: Header=BB5_271 Depth=1 +# %bb.270: # %.noexc383.preheader + # in Loop: Header=BB5_263 Depth=1 move $a0, $s8 .p2align 4, , 16 -.LBB5_279: # %.noexc383 - # Parent Loop BB5_271 Depth=1 +.LBB5_271: # %.noexc383 + # Parent Loop BB5_263 Depth=1 # => This Inner Loop Header: Depth=2 - beq $a0, $s7, .LBB5_270 -# %bb.280: # in Loop: Header=BB5_279 Depth=2 + beq $a0, $s7, .LBB5_262 +# %bb.272: # in Loop: Header=BB5_271 Depth=2 fld.d $fa0, $a0, 0 fld.d $fa1, $a0, -8 fcmp.cule.d $fcc0, $fa1, $fa0 addi.d $a0, $a0, 8 - bcnez $fcc0, .LBB5_279 - b .LBB5_269 -.LBB5_281: + bcnez $fcc0, .LBB5_271 + b .LBB5_261 +.LBB5_273: ld.d $s7, $sp, 160 # 8-byte Folded Reload - blez $a0, .LBB5_289 -# %bb.282: # %.lr.ph.i384 + blez $a0, .LBB5_281 +# %bb.274: # %.lr.ph.i384 lu12i.w $a0, -4 ori $s1, $a0, 392 pcalau12i $a0, %pc_hi20(.L.str.52) @@ -2519,22 +2487,22 @@ main: # @main move $s5, $zero ld.d $s6, $sp, 176 # 8-byte Folded Reload ld.d $s8, $sp, 168 # 8-byte Folded Reload - b .LBB5_285 + b .LBB5_277 .p2align 4, , 16 -.LBB5_283: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEEbT_SB_.exit.i.i396 - # in Loop: Header=BB5_285 Depth=1 +.LBB5_275: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEEbT_SB_.exit.i.i396 + # in Loop: Header=BB5_277 Depth=1 ld.w $a1, $s7, %pc_lo12(current_test) move $a0, $s3 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 -.LBB5_284: # %_Z13verify_sortedISt16reverse_iteratorIS0_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEvT_SA_.exit.i397 - # in Loop: Header=BB5_285 Depth=1 +.LBB5_276: # %_Z13verify_sortedISt16reverse_iteratorIS0_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEvT_SA_.exit.i397 + # in Loop: Header=BB5_277 Depth=1 ld.w $a0, $s6, %pc_lo12(iterations) addi.w $s5, $s5, 1 bge $s5, $a0, .LBB5_291 -.LBB5_285: # %_ZN9benchmark4copyISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEESA_EEvT_SB_T0_.exit.i392 +.LBB5_277: # %_ZN9benchmark4copyISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEESA_EEvT_SB_T0_.exit.i392 # =>This Loop Header: Depth=1 - # Child Loop BB5_287 Depth 2 + # Child Loop BB5_279 Depth 2 move $a0, $s0 move $a1, $s8 move $a2, $fp @@ -2548,25 +2516,25 @@ main: # @main pcaddu18i $ra, %call36(_ZN9benchmark8heapsortISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEEvT_SB_) jirl $ra, $ra, 0 .Ltmp47: # EH_LABEL -# %bb.286: # %.noexc398.preheader - # in Loop: Header=BB5_285 Depth=1 +# %bb.278: # %.noexc398.preheader + # in Loop: Header=BB5_277 Depth=1 move $a0, $s1 .p2align 4, , 16 -.LBB5_287: # %.noexc398 - # Parent Loop BB5_285 Depth=1 +.LBB5_279: # %.noexc398 + # Parent Loop BB5_277 Depth=1 # => This Inner Loop Header: Depth=2 - beqz $a0, .LBB5_284 -# %bb.288: # in Loop: Header=BB5_287 Depth=2 + beqz $a0, .LBB5_276 +# %bb.280: # in Loop: Header=BB5_279 Depth=2 add.d $a1, $s0, $a0 fldx.d $fa0, $a1, $fp fldx.d $fa1, $a1, $s4 fcmp.cule.d $fcc0, $fa1, $fa0 addi.d $a0, $a0, 8 - bcnez $fcc0, .LBB5_287 - b .LBB5_283 -.LBB5_289: # %.loopexit703.thread + bcnez $fcc0, .LBB5_279 + b .LBB5_275 +.LBB5_281: # %.loopexit703.thread ld.d $s8, $sp, 168 # 8-byte Folded Reload -.LBB5_290: # %_ZNSt6vectorIdSaIdEED2Ev.exit417 +.LBB5_282: # %_ZNSt6vectorIdSaIdEED2Ev.exit417 move $a0, $s8 move $a1, $fp pcaddu18i $ra, %call36(_ZdlPvm) @@ -2591,8 +2559,61 @@ main: # @main ld.d $ra, $sp, 328 # 8-byte Folded Reload addi.d $sp, $sp, 336 ret +.LBB5_283: # %.loopexit722 + blez $a0, .LBB5_230 +# %bb.284: # %.lr.ph.i313 + lu12i.w $a0, -4 + ori $s1, $a0, 392 + pcalau12i $a0, %pc_hi20(.L.str.52) + addi.d $s4, $a0, %pc_lo12(.L.str.52) + move $s5, $zero + b .LBB5_287 + .p2align 4, , 16 +.LBB5_285: # %_ZN9benchmark9is_sortedISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEEbT_SB_.exit.i.i325 + # in Loop: Header=BB5_287 Depth=1 + ld.w $a1, $s6, %pc_lo12(current_test) + move $a0, $s4 + pcaddu18i $ra, %call36(printf) + jirl $ra, $ra, 0 +.LBB5_286: # %_Z13verify_sortedISt16reverse_iteratorIS0_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEEvT_SA_.exit.i326 + # in Loop: Header=BB5_287 Depth=1 + ld.w $a0, $s7, %pc_lo12(iterations) + addi.w $s5, $s5, 1 + bge $s5, $a0, .LBB5_230 +.LBB5_287: # %_ZN9benchmark4copyISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEESA_EEvT_SB_T0_.exit.i321 + # =>This Loop Header: Depth=1 + # Child Loop BB5_289 Depth 2 + move $a0, $s0 + ld.d $a1, $sp, 168 # 8-byte Folded Reload + move $a2, $fp + pcaddu18i $ra, %call36(memcpy) + jirl $ra, $ra, 0 + st.d $s0, $sp, 224 + st.d $s2, $sp, 208 +.Ltmp26: # EH_LABEL + addi.d $a0, $sp, 216 + addi.d $a1, $sp, 200 + pcaddu18i $ra, %call36(_ZN9benchmark9quicksortISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEEdEEvT_SB_) + jirl $ra, $ra, 0 +.Ltmp27: # EH_LABEL +# %bb.288: # %.noexc327.preheader + # in Loop: Header=BB5_287 Depth=1 + move $a0, $s1 + .p2align 4, , 16 +.LBB5_289: # %.noexc327 + # Parent Loop BB5_287 Depth=1 + # => This Inner Loop Header: Depth=2 + beqz $a0, .LBB5_286 +# %bb.290: # in Loop: Header=BB5_289 Depth=2 + add.d $a1, $s0, $a0 + fldx.d $fa0, $a1, $fp + fldx.d $fa1, $a1, $s3 + fcmp.cule.d $fcc0, $fa1, $fa0 + addi.d $a0, $a0, 8 + bcnez $fcc0, .LBB5_289 + b .LBB5_285 .LBB5_291: # %.loopexit703 - blez $a0, .LBB5_290 + blez $a0, .LBB5_282 # %bb.292: # %.lr.ph.i399 lu12i.w $a0, -4 ori $s1, $a0, 392 @@ -2611,7 +2632,7 @@ main: # @main # in Loop: Header=BB5_295 Depth=1 ld.w $a0, $s6, %pc_lo12(iterations) addi.w $s5, $s5, 1 - bge $s5, $a0, .LBB5_290 + bge $s5, $a0, .LBB5_282 .LBB5_295: # %_ZN9benchmark4copyISt16reverse_iteratorIS1_IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEESA_EEvT_SB_T0_.exit.i407 # =>This Loop Header: Depth=1 # Child Loop BB5_297 Depth 2 @@ -2776,60 +2797,60 @@ GCC_except_table5: .uleb128 .Ltmp24-.Ltmp23 # Call between .Ltmp23 and .Ltmp24 .uleb128 .Ltmp25-.Lfunc_begin0 # jumps to .Ltmp25 .byte 0 # On action: cleanup - .uleb128 .Ltmp24-.Lfunc_begin0 # >> Call Site 17 << - .uleb128 .Ltmp26-.Ltmp24 # Call between .Ltmp24 and .Ltmp26 - .byte 0 # has no landing pad - .byte 0 # On action: cleanup - .uleb128 .Ltmp26-.Lfunc_begin0 # >> Call Site 18 << - .uleb128 .Ltmp27-.Ltmp26 # Call between .Ltmp26 and .Ltmp27 - .uleb128 .Ltmp28-.Lfunc_begin0 # jumps to .Ltmp28 - .byte 0 # On action: cleanup - .uleb128 .Ltmp29-.Lfunc_begin0 # >> Call Site 19 << + .uleb128 .Ltmp29-.Lfunc_begin0 # >> Call Site 17 << .uleb128 .Ltmp32-.Ltmp29 # Call between .Ltmp29 and .Ltmp32 .uleb128 .Ltmp33-.Lfunc_begin0 # jumps to .Ltmp33 .byte 0 # On action: cleanup - .uleb128 .Ltmp32-.Lfunc_begin0 # >> Call Site 20 << + .uleb128 .Ltmp32-.Lfunc_begin0 # >> Call Site 18 << .uleb128 .Ltmp34-.Ltmp32 # Call between .Ltmp32 and .Ltmp34 .byte 0 # has no landing pad .byte 0 # On action: cleanup - .uleb128 .Ltmp34-.Lfunc_begin0 # >> Call Site 21 << + .uleb128 .Ltmp34-.Lfunc_begin0 # >> Call Site 19 << .uleb128 .Ltmp35-.Ltmp34 # Call between .Ltmp34 and .Ltmp35 .uleb128 .Ltmp36-.Lfunc_begin0 # jumps to .Ltmp36 .byte 0 # On action: cleanup - .uleb128 .Ltmp35-.Lfunc_begin0 # >> Call Site 22 << + .uleb128 .Ltmp35-.Lfunc_begin0 # >> Call Site 20 << .uleb128 .Ltmp37-.Ltmp35 # Call between .Ltmp35 and .Ltmp37 .byte 0 # has no landing pad .byte 0 # On action: cleanup - .uleb128 .Ltmp37-.Lfunc_begin0 # >> Call Site 23 << + .uleb128 .Ltmp37-.Lfunc_begin0 # >> Call Site 21 << .uleb128 .Ltmp38-.Ltmp37 # Call between .Ltmp37 and .Ltmp38 .uleb128 .Ltmp39-.Lfunc_begin0 # jumps to .Ltmp39 .byte 0 # On action: cleanup - .uleb128 .Ltmp38-.Lfunc_begin0 # >> Call Site 24 << + .uleb128 .Ltmp38-.Lfunc_begin0 # >> Call Site 22 << .uleb128 .Ltmp40-.Ltmp38 # Call between .Ltmp38 and .Ltmp40 .byte 0 # has no landing pad .byte 0 # On action: cleanup - .uleb128 .Ltmp40-.Lfunc_begin0 # >> Call Site 25 << + .uleb128 .Ltmp40-.Lfunc_begin0 # >> Call Site 23 << .uleb128 .Ltmp41-.Ltmp40 # Call between .Ltmp40 and .Ltmp41 .uleb128 .Ltmp42-.Lfunc_begin0 # jumps to .Ltmp42 .byte 0 # On action: cleanup - .uleb128 .Ltmp41-.Lfunc_begin0 # >> Call Site 26 << + .uleb128 .Ltmp41-.Lfunc_begin0 # >> Call Site 24 << .uleb128 .Ltmp43-.Ltmp41 # Call between .Ltmp41 and .Ltmp43 .byte 0 # has no landing pad .byte 0 # On action: cleanup - .uleb128 .Ltmp43-.Lfunc_begin0 # >> Call Site 27 << + .uleb128 .Ltmp43-.Lfunc_begin0 # >> Call Site 25 << .uleb128 .Ltmp44-.Ltmp43 # Call between .Ltmp43 and .Ltmp44 .uleb128 .Ltmp45-.Lfunc_begin0 # jumps to .Ltmp45 .byte 0 # On action: cleanup - .uleb128 .Ltmp44-.Lfunc_begin0 # >> Call Site 28 << + .uleb128 .Ltmp44-.Lfunc_begin0 # >> Call Site 26 << .uleb128 .Ltmp46-.Ltmp44 # Call between .Ltmp44 and .Ltmp46 .byte 0 # has no landing pad .byte 0 # On action: cleanup - .uleb128 .Ltmp46-.Lfunc_begin0 # >> Call Site 29 << + .uleb128 .Ltmp46-.Lfunc_begin0 # >> Call Site 27 << .uleb128 .Ltmp47-.Ltmp46 # Call between .Ltmp46 and .Ltmp47 .uleb128 .Ltmp48-.Lfunc_begin0 # jumps to .Ltmp48 .byte 0 # On action: cleanup - .uleb128 .Ltmp47-.Lfunc_begin0 # >> Call Site 30 << - .uleb128 .Ltmp49-.Ltmp47 # Call between .Ltmp47 and .Ltmp49 + .uleb128 .Ltmp47-.Lfunc_begin0 # >> Call Site 28 << + .uleb128 .Ltmp26-.Ltmp47 # Call between .Ltmp47 and .Ltmp26 + .byte 0 # has no landing pad + .byte 0 # On action: cleanup + .uleb128 .Ltmp26-.Lfunc_begin0 # >> Call Site 29 << + .uleb128 .Ltmp27-.Ltmp26 # Call between .Ltmp26 and .Ltmp27 + .uleb128 .Ltmp28-.Lfunc_begin0 # jumps to .Ltmp28 + .byte 0 # On action: cleanup + .uleb128 .Ltmp27-.Lfunc_begin0 # >> Call Site 30 << + .uleb128 .Ltmp49-.Ltmp27 # Call between .Ltmp27 and .Ltmp49 .byte 0 # has no landing pad .byte 0 # On action: cleanup .uleb128 .Ltmp49-.Lfunc_begin0 # >> Call Site 31 << diff --git a/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/n-body.dir/n-body.s b/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/n-body.dir/n-body.s index 408053ea..517f7067 100644 --- a/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/n-body.dir/n-body.s +++ b/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/n-body.dir/n-body.s @@ -176,12 +176,7 @@ energy: # @energy .Lfunc_end1: .size energy, .Lfunc_end1-energy # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function offset_momentum -.LCPI2_0: - .dword 0xc043bd3cc9be45de # double -39.478417604357432 - .text - .globl offset_momentum + .globl offset_momentum # -- Begin function offset_momentum .p2align 5 .type offset_momentum,@function offset_momentum: # @offset_momentum @@ -212,25 +207,17 @@ offset_momentum: # @offset_momentum ori $a0, $a0, 1502 lu32i.d $a0, 245052 lu52i.d $a0, $a0, -1020 - pcalau12i $a2, %pc_hi20(.LCPI2_0) - fld.d $fa2, $a2, %pc_lo12(.LCPI2_0) - vreplgr2vr.d $vr3, $a0 - vfdiv.d $vr1, $vr1, $vr3 + vreplgr2vr.d $vr2, $a0 + vfdiv.d $vr1, $vr1, $vr2 vst $vr1, $a1, 24 - fdiv.d $fa0, $fa0, $fa2 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 fst.d $fa0, $a1, 40 ret .Lfunc_end2: .size offset_momentum, .Lfunc_end2-offset_momentum # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI3_0: - .dword 0xc043bd3cc9be45de # double -39.478417604357432 -.LCPI3_1: - .dword 0x3f847ae147ae147b # double 0.01 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -239,65 +226,69 @@ main: # @main st.d $ra, $sp, 88 # 8-byte Folded Spill st.d $fp, $sp, 80 # 8-byte Folded Spill st.d $s0, $sp, 72 # 8-byte Folded Spill - fst.d $fs0, $sp, 64 # 8-byte Folded Spill - fst.d $fs1, $sp, 56 # 8-byte Folded Spill - fst.d $fs2, $sp, 48 # 8-byte Folded Spill - fst.d $fs3, $sp, 40 # 8-byte Folded Spill - fst.d $fs4, $sp, 32 # 8-byte Folded Spill - fst.d $fs5, $sp, 24 # 8-byte Folded Spill - fst.d $fs6, $sp, 16 # 8-byte Folded Spill + st.d $s1, $sp, 64 # 8-byte Folded Spill + fst.d $fs0, $sp, 56 # 8-byte Folded Spill + fst.d $fs1, $sp, 48 # 8-byte Folded Spill + fst.d $fs2, $sp, 40 # 8-byte Folded Spill + fst.d $fs3, $sp, 32 # 8-byte Folded Spill + fst.d $fs4, $sp, 24 # 8-byte Folded Spill + fst.d $fs5, $sp, 16 # 8-byte Folded Spill + fst.d $fs6, $sp, 8 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(bodies) addi.d $fp, $a0, %pc_lo12(bodies) fld.d $fa0, $fp, 24 - fld.d $fa7, $fp, 48 - movgr2fr.d $fs1, $zero + fld.d $fa6, $fp, 48 fld.d $fa1, $fp, 32 fld.d $fa2, $fp, 40 + movgr2fr.d $fs0, $zero + fmadd.d $fa0, $fa0, $fa6, $fs0 + fmadd.d $fa4, $fa1, $fa6, $fs0 + fmadd.d $fa2, $fa2, $fa6, $fs0 fld.d $ft3, $fp, 80 fld.d $fa3, $fp, 104 - fmadd.d $fa0, $fa0, $fa7, $fs1 - fmadd.d $fa4, $fa1, $fa7, $fs1 - fmadd.d $fa2, $fa2, $fa7, $fs1 - fmadd.d $fa0, $ft3, $fa3, $fa0 fld.d $ft4, $fp, 88 fld.d $ft5, $fp, 96 fld.d $ft2, $fp, 136 fld.d $fa1, $fp, 160 - fld.d $ft6, $fp, 144 + fmadd.d $fa0, $ft3, $fa3, $fa0 fmadd.d $fa4, $ft4, $fa3, $fa4 fmadd.d $fa2, $ft5, $fa3, $fa2 fmadd.d $fa5, $ft2, $fa1, $fa0 - fmadd.d $fa4, $ft6, $fa1, $fa4 + fld.d $ft6, $fp, 144 fld.d $ft7, $fp, 152 fld.d $ft0, $fp, 192 fld.d $fa0, $fp, 216 fld.d $ft1, $fp, 200 - fld.d $ft8, $fp, 208 - fmadd.d $fa2, $ft7, $fa1, $fa2 + fmadd.d $fa4, $ft6, $fa1, $fa4 + fmadd.d $fa7, $ft7, $fa1, $fa2 fmadd.d $fa5, $ft0, $fa0, $fa5 fmadd.d $fa4, $ft1, $fa0, $fa4 - fmadd.d $fa6, $ft8, $fa0, $fa2 + fld.d $ft8, $fp, 208 fld.d $ft9, $fp, 248 fld.d $fa2, $fp, 272 fld.d $ft10, $fp, 256 fld.d $ft11, $fp, 264 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $ft12, $a0, %pc_lo12(.LCPI3_0) + fmadd.d $fa7, $ft8, $fa0, $fa7 fmadd.d $fa5, $ft9, $fa2, $fa5 fmadd.d $fa4, $ft10, $fa2, $fa4 - fmadd.d $fa6, $ft11, $fa2, $fa6 + fmadd.d $fa7, $ft11, $fa2, $fa7 + lu12i.w $a0, -222236 + ori $a0, $a0, 1502 + lu32i.d $a0, 245052 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $ft12, $a0 fdiv.d $ft14, $fa5, $ft12 fst.d $ft14, $fp, 24 fdiv.d $ft15, $fa4, $ft12 fst.d $ft15, $fp, 32 - fdiv.d $fs0, $fa6, $ft12 - fst.d $fs0, $fp, 40 + fdiv.d $fs1, $fa7, $ft12 + fst.d $fs1, $fp, 40 vldi $vr26, -928 fmul.d $fa4, $fa2, $fs2 fmul.d $fa5, $ft10, $ft10 fmadd.d $fa5, $ft9, $ft9, $fa5 fmadd.d $fa5, $ft11, $ft11, $fa5 - fmul.d $fa6, $fa0, $fs2 + fmul.d $fa7, $fa0, $fs2 fmul.d $ft1, $ft1, $ft1 fmadd.d $ft0, $ft0, $ft0, $ft1 fmadd.d $ft0, $ft8, $ft8, $ft0 @@ -309,18 +300,18 @@ main: # @main fmul.d $ft4, $ft4, $ft4 fmadd.d $ft3, $ft3, $ft3, $ft4 fmadd.d $ft13, $ft5, $ft5, $ft3 - fmul.d $ft3, $fa7, $fs2 + fmul.d $ft3, $fa6, $fs2 fmul.d $ft4, $ft15, $ft15 fmadd.d $ft4, $ft14, $ft14, $ft4 - fmadd.d $ft4, $fs0, $fs0, $ft4 - fmadd.d $ft3, $ft3, $ft4, $fs1 + fmadd.d $ft4, $fs1, $fs1, $ft4 + fmadd.d $ft3, $ft3, $ft4, $fs0 fld.d $fs2, $fp, 16 fld.d $ft6, $fp, 72 fld.d $fs3, $fp, 0 fld.d $ft7, $fp, 56 fld.d $fs4, $fp, 8 fld.d $ft10, $fp, 64 - fmul.d $ft4, $fa7, $fa3 + fmul.d $ft4, $fa6, $fa3 fsub.d $ft5, $fs2, $ft6 fsub.d $ft8, $fs3, $ft7 fsub.d $ft9, $fs4, $ft10 @@ -333,7 +324,7 @@ main: # @main fld.d $ft3, $fp, 128 fld.d $ft4, $fp, 112 fld.d $ft5, $fp, 120 - fmul.d $ft9, $fa7, $fa1 + fmul.d $ft9, $fa6, $fa1 fsub.d $ft11, $fs2, $ft3 fsub.d $ft14, $fs3, $ft4 fsub.d $ft15, $fs4, $ft5 @@ -346,30 +337,30 @@ main: # @main fld.d $ft8, $fp, 184 fld.d $ft9, $fp, 168 fld.d $ft11, $fp, 176 - fmul.d $ft15, $fa7, $fa0 - fsub.d $fs0, $fs2, $ft8 + fmul.d $ft15, $fa6, $fa0 + fsub.d $fs1, $fs2, $ft8 fsub.d $fs5, $fs3, $ft9 fsub.d $fs6, $fs4, $ft11 fmul.d $fs6, $fs6, $fs6 fmadd.d $fs5, $fs5, $fs5, $fs6 - fmadd.d $fs0, $fs0, $fs0, $fs5 - fsqrt.d $fs0, $fs0 - fdiv.d $ft15, $ft15, $fs0 + fmadd.d $fs1, $fs1, $fs1, $fs5 + fsqrt.d $fs1, $fs1 + fdiv.d $ft15, $ft15, $fs1 fsub.d $fs5, $ft14, $ft15 fld.d $ft14, $fp, 240 fld.d $ft15, $fp, 224 - fld.d $fs0, $fp, 232 - fmul.d $fa7, $fa7, $fa2 + fld.d $fs1, $fp, 232 + fmul.d $fa6, $fa6, $fa2 fsub.d $fs2, $fs2, $ft14 fsub.d $fs3, $fs3, $ft15 - fsub.d $fs4, $fs4, $fs0 + fsub.d $fs4, $fs4, $fs1 fmul.d $fs4, $fs4, $fs4 fmadd.d $fs3, $fs3, $fs3, $fs4 fmadd.d $fs2, $fs2, $fs2, $fs3 fsqrt.d $fs2, $fs2 - fdiv.d $fa7, $fa7, $fs2 - fsub.d $fa7, $fs5, $fa7 - fmadd.d $fa7, $ft12, $ft13, $fa7 + fdiv.d $fa6, $fa6, $fs2 + fsub.d $fa6, $fs5, $fa6 + fmadd.d $fa6, $ft12, $ft13, $fa6 fmul.d $ft12, $fa3, $fa1 fsub.d $ft13, $ft6, $ft3 fsub.d $fs2, $ft7, $ft4 @@ -379,7 +370,7 @@ main: # @main fmadd.d $ft13, $ft13, $ft13, $fs2 fsqrt.d $ft13, $ft13 fdiv.d $ft12, $ft12, $ft13 - fsub.d $fa7, $fa7, $ft12 + fsub.d $fa6, $fa6, $ft12 fmul.d $ft12, $fa3, $fa0 fsub.d $ft13, $ft6, $ft8 fsub.d $fs2, $ft7, $ft9 @@ -389,19 +380,19 @@ main: # @main fmadd.d $ft13, $ft13, $ft13, $fs2 fsqrt.d $ft13, $ft13 fdiv.d $ft12, $ft12, $ft13 - fsub.d $fa7, $fa7, $ft12 + fsub.d $fa6, $fa6, $ft12 fmul.d $fa3, $fa3, $fa2 fsub.d $ft6, $ft6, $ft14 fsub.d $ft7, $ft7, $ft15 - fsub.d $ft10, $ft10, $fs0 + fsub.d $ft10, $ft10, $fs1 fmul.d $ft10, $ft10, $ft10 fmadd.d $ft7, $ft7, $ft7, $ft10 fmadd.d $ft6, $ft6, $ft6, $ft7 fsqrt.d $ft6, $ft6 fdiv.d $fa3, $fa3, $ft6 - fsub.d $fa3, $fa7, $fa3 + fsub.d $fa3, $fa6, $fa3 fmadd.d $fa3, $ft1, $ft2, $fa3 - fmul.d $fa7, $fa1, $fa0 + fmul.d $fa6, $fa1, $fa0 fsub.d $ft1, $ft3, $ft8 fsub.d $ft2, $ft4, $ft9 fsub.d $ft6, $ft5, $ft11 @@ -409,23 +400,23 @@ main: # @main fmadd.d $ft2, $ft2, $ft2, $ft6 fmadd.d $ft1, $ft1, $ft1, $ft2 fsqrt.d $ft1, $ft1 - fdiv.d $fa7, $fa7, $ft1 - fsub.d $fa3, $fa3, $fa7 + fdiv.d $fa6, $fa6, $ft1 + fsub.d $fa3, $fa3, $fa6 fmul.d $fa1, $fa1, $fa2 - fsub.d $fa7, $ft3, $ft14 + fsub.d $fa6, $ft3, $ft14 fsub.d $ft1, $ft4, $ft15 - fsub.d $ft2, $ft5, $fs0 + fsub.d $ft2, $ft5, $fs1 fmul.d $ft2, $ft2, $ft2 fmadd.d $ft1, $ft1, $ft1, $ft2 - fmadd.d $fa7, $fa7, $fa7, $ft1 - fsqrt.d $fa7, $fa7 - fdiv.d $fa1, $fa1, $fa7 + fmadd.d $fa6, $fa6, $fa6, $ft1 + fsqrt.d $fa6, $fa6 + fdiv.d $fa1, $fa1, $fa6 fsub.d $fa1, $fa3, $fa1 - fmadd.d $fa1, $fa6, $ft0, $fa1 + fmadd.d $fa1, $fa7, $ft0, $fa1 fmul.d $fa0, $fa0, $fa2 fsub.d $fa2, $ft8, $ft14 fsub.d $fa3, $ft9, $ft15 - fsub.d $fa6, $ft11, $fs0 + fsub.d $fa6, $ft11, $fs1 fmul.d $fa6, $fa6, $fa6 fmadd.d $fa3, $fa3, $fa3, $fa6 fmadd.d $fa2, $fa2, $fa2, $fa3 @@ -438,15 +429,17 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.L.str) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI3_1) lu12i.w $a0, 1220 ori $s0, $a0, 2880 + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $s1, $a0, 1016 .p2align 4, , 16 .LBB3_1: # =>This Inner Loop Header: Depth=1 + movgr2fr.d $fa0, $s1 ori $a0, $zero, 5 move $a1, $fp - fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(advance) jirl $ra, $ra, 0 addi.w $s0, $s0, -1 @@ -493,7 +486,7 @@ main: # @main fmul.d $ft3, $ft3, $ft3 fmadd.d $ft3, $ft4, $ft4, $ft3 fmadd.d $ft3, $ft5, $ft5, $ft3 - fmadd.d $ft2, $ft2, $ft3, $fs1 + fmadd.d $ft2, $ft2, $ft3, $fs0 fld.d $fs1, $fp, 16 fld.d $ft5, $fp, 72 fld.d $fs2, $fp, 0 @@ -619,13 +612,14 @@ main: # @main pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 move $a0, $zero - fld.d $fs6, $sp, 16 # 8-byte Folded Reload - fld.d $fs5, $sp, 24 # 8-byte Folded Reload - fld.d $fs4, $sp, 32 # 8-byte Folded Reload - fld.d $fs3, $sp, 40 # 8-byte Folded Reload - fld.d $fs2, $sp, 48 # 8-byte Folded Reload - fld.d $fs1, $sp, 56 # 8-byte Folded Reload - fld.d $fs0, $sp, 64 # 8-byte Folded Reload + fld.d $fs6, $sp, 8 # 8-byte Folded Reload + fld.d $fs5, $sp, 16 # 8-byte Folded Reload + fld.d $fs4, $sp, 24 # 8-byte Folded Reload + fld.d $fs3, $sp, 32 # 8-byte Folded Reload + fld.d $fs2, $sp, 40 # 8-byte Folded Reload + fld.d $fs1, $sp, 48 # 8-byte Folded Reload + fld.d $fs0, $sp, 56 # 8-byte Folded Reload + ld.d $s1, $sp, 64 # 8-byte Folded Reload ld.d $s0, $sp, 72 # 8-byte Folded Reload ld.d $fp, $sp, 80 # 8-byte Folded Reload ld.d $ra, $sp, 88 # 8-byte Folded Reload diff --git a/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/partialsums.dir/partialsums.s b/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/partialsums.dir/partialsums.s index 4132a43f..4623fdd0 100644 --- a/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/partialsums.dir/partialsums.s +++ b/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/partialsums.dir/partialsums.s @@ -28,18 +28,12 @@ sum_vec: # @sum_vec .Lfunc_end1: .size sum_vec, .Lfunc_end1-sum_vec # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI2_0: - .dword 0x3fe5555555555555 # double 0.66666666666666663 -.LCPI2_3: - .dword 0x414312d000000000 # double 2.5E+6 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI2_1: + .p2align 4, 0x0 # -- Begin function main +.LCPI2_0: .dword 0x3ff0000000000000 # double 1 .dword 0x4000000000000000 # double 2 -.LCPI2_2: +.LCPI2_1: .dword 0x3ff0000000000000 # double 1 .dword 0xbff0000000000000 # double -1 .text @@ -61,8 +55,11 @@ main: # @main fst.d $fs6, $sp, 96 # 8-byte Folded Spill move $fp, $zero movgr2fr.d $fs5, $zero - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI2_0) + lu12i.w $a0, 349525 + ori $a0, $a0, 1365 + lu32i.d $a0, 349525 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fs0, $a0 lu12i.w $a0, 610 ori $s0, $a0, 1440 fmov.d $fs4, $fs5 @@ -124,19 +121,21 @@ main: # @main fmov.d $fa0, $fs1 b .LBB2_2 .LBB2_4: # %.preheader.preheader - pcalau12i $a0, %pc_hi20(.LCPI2_1) - vld $vr0, $a0, %pc_lo12(.LCPI2_1) + pcalau12i $a0, %pc_hi20(.LCPI2_0) + vld $vr0, $a0, %pc_lo12(.LCPI2_0) vrepli.b $vr10, 0 lu52i.d $a0, $zero, 1023 vreplgr2vr.d $vr1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI2_2) - vld $vr2, $a0, %pc_lo12(.LCPI2_2) - pcalau12i $a0, %pc_hi20(.LCPI2_3) - fld.d $fa3, $a0, %pc_lo12(.LCPI2_3) + pcalau12i $a0, %pc_hi20(.LCPI2_1) + vld $vr2, $a0, %pc_lo12(.LCPI2_1) lu52i.d $a0, $zero, -1025 - vreplgr2vr.d $vr4, $a0 + vreplgr2vr.d $vr3, $a0 lu52i.d $a0, $zero, 1024 - vreplgr2vr.d $vr5, $a0 + vreplgr2vr.d $vr4, $a0 + ori $a0, $zero, 0 + lu32i.d $a0, 201424 + lu52i.d $a0, $a0, 1044 + movgr2fr.d $fa5, $a0 vori.b $vr11, $vr10, 0 vori.b $vr12, $vr10, 0 vori.b $vr9, $vr10, 0 @@ -155,10 +154,10 @@ main: # @main vfadd.d $vr11, $vr6, $vr11 vfdiv.d $vr6, $vr2, $vr0 vfadd.d $vr9, $vr6, $vr9 - vfmadd.d $vr6, $vr0, $vr5, $vr4 - vfadd.d $vr0, $vr0, $vr5 + vfmadd.d $vr6, $vr0, $vr4, $vr3 + vfadd.d $vr0, $vr0, $vr4 vreplvei.d $vr7, $vr0, 0 - fcmp.cle.d $fcc0, $fa7, $fa3 + fcmp.cle.d $fcc0, $fa7, $fa5 vfdiv.d $vr6, $vr2, $vr6 vfadd.d $vr8, $vr8, $vr6 bcnez $fcc0, .LBB2_5 diff --git a/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/puzzle.dir/puzzle.s b/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/puzzle.dir/puzzle.s index 6d5dbfa0..0f6dbba5 100644 --- a/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/puzzle.dir/puzzle.s +++ b/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/puzzle.dir/puzzle.s @@ -44,12 +44,7 @@ srand: # @srand .Lfunc_end1: .size srand, .Lfunc_end1-srand # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function randInt -.LCPI2_0: - .dword 0x3f00000000000000 # double 3.0517578125E-5 - .text - .globl randInt + .globl randInt # -- Begin function randInt .p2align 5 .type randInt,@function randInt: # @randInt @@ -75,19 +70,19 @@ randInt: # @randInt slli.d $a6, $a5, 15 sub.d $a5, $a5, $a6 add.d $a4, $a4, $a5 - pcalau12i $a5, %pc_hi20(.LCPI2_0) - fld.d $fa0, $a5, %pc_lo12(.LCPI2_0) sub.d $a1, $a1, $a0 addi.d $a1, $a1, 1 - movgr2fr.w $fa1, $a1 + movgr2fr.w $fa0, $a1 addi.w $a1, $a1, 0 - ffint.d.w $fa1, $fa1 + ffint.d.w $fa0, $fa0 addi.d $a4, $a4, 1 bstrpick.d $a4, $a4, 31, 0 + movgr2fr.d $fa1, $a4 + ffint.d.l $fa1, $fa1 + lu52i.d $a4, $zero, 1008 movgr2fr.d $fa2, $a4 - ffint.d.l $fa2, $fa2 - fmul.d $fa0, $fa2, $fa0 - fmul.d $fa0, $fa0, $fa1 + fmul.d $fa1, $fa1, $fa2 + fmul.d $fa0, $fa1, $fa0 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a4, $fa0 xor $a1, $a1, $a4 @@ -99,14 +94,7 @@ randInt: # @randInt .Lfunc_end2: .size randInt, .Lfunc_end2-randInt # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function shuffle -.LCPI3_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI3_1: - .dword 0x3f00000000000000 # double 3.0517578125E-5 - .text - .globl shuffle + .globl shuffle # -- Begin function shuffle .p2align 5 .type shuffle,@function shuffle: # @shuffle @@ -119,17 +107,18 @@ shuffle: # @shuffle alsl.d $a5, $a1, $a0, 2 addi.d $a5, $a5, -4 lu52i.d $a6, $zero, 1107 - pcalau12i $a7, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a7, %pc_lo12(.LCPI3_0) + lu12i.w $a7, 256 + lu52i.d $a7, $a7, 1107 + movgr2fr.d $fa0, $a7 lu12i.w $a7, 275200 lu12i.w $t0, 269412 ori $t0, $t0, 3693 lu12i.w $t1, 3 - pcalau12i $t2, %pc_hi20(.LCPI3_1) - fld.d $fa1, $t2, %pc_lo12(.LCPI3_1) ori $t1, $t1, 57 lu12i.w $t2, 32 ori $t2, $t2, 5 + lu52i.d $t3, $zero, 1008 + movgr2fr.d $fa1, $t3 .p2align 4, , 16 .LBB3_2: # %.lr.ph # =>This Inner Loop Header: Depth=1 @@ -188,12 +177,6 @@ shuffle: # @shuffle .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI4_1: - .dword 0x3f00000000000000 # double 3.0517578125E-5 -.LCPI4_2: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 .text .globl createRandomArray .p2align 5 @@ -275,11 +258,11 @@ createRandomArray: # @createRandomArray sub.d $a7, $a7, $t0 add.d $a6, $a6, $a7 addi.d $a6, $a6, 1 - pcalau12i $a7, %pc_hi20(.LCPI4_1) - fld.d $fa0, $a7, %pc_lo12(.LCPI4_1) bstrpick.d $a6, $a6, 31, 0 - movgr2fr.d $fa2, $a6 - ffint.d.l $fa2, $fa2 + movgr2fr.d $fa0, $a6 + ffint.d.l $fa2, $fa0 + lu52i.d $a6, $zero, 1008 + movgr2fr.d $fa0, $a6 fmul.d $fa2, $fa2, $fa0 fmul.d $fa1, $fa2, $fa1 ftintrz.w.d $fa1, $fa1 @@ -292,9 +275,10 @@ createRandomArray: # @createRandomArray beqz $fp, .LBB4_12 # %bb.9: # %.lr.ph.preheader.i alsl.d $a6, $fp, $a0, 2 - pcalau12i $a7, %pc_hi20(.LCPI4_2) - fld.d $fa1, $a7, %pc_lo12(.LCPI4_2) lu52i.d $a7, $zero, 1107 + lu12i.w $t0, 256 + lu52i.d $t0, $t0, 1107 + movgr2fr.d $fa1, $t0 lu12i.w $t0, 275200 ori $t1, $zero, 1 .p2align 4, , 16 diff --git a/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/recursive.dir/recursive.s b/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/recursive.dir/recursive.s index 2043ee23..2f193b76 100644 --- a/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/recursive.dir/recursive.s +++ b/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/recursive.dir/recursive.s @@ -227,18 +227,14 @@ takFP: # @takFP .Lfunc_end4: .size takFP, .Lfunc_end4-takFP # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI5_0: - .dword 0x4043000000000000 # double 38 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main # %bb.0: addi.d $sp, $sp, -16 st.d $ra, $sp, 8 # 8-byte Folded Spill + st.d $fp, $sp, 0 # 8-byte Folded Spill ori $a0, $zero, 3 ori $a1, $zero, 11 pcaddu18i $ra, %call36(ack) @@ -249,16 +245,16 @@ main: # @main ori $a1, $zero, 11 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) + ori $a0, $zero, 0 + lu32i.d $a0, 196608 + lu52i.d $fp, $a0, 1028 + movgr2fr.d $fa0, $fp pcaddu18i $ra, %call36(fibFP) jirl $ra, $ra, 0 movfr2gr.d $a2, $fa0 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $a0, $a0, %pc_lo12(.L.str.1) - ori $a1, $zero, 0 - lu32i.d $a1, 196608 - lu52i.d $a1, $a1, 1028 + move $a1, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 ori $a0, $zero, 30 @@ -293,6 +289,7 @@ main: # @main pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 move $a0, $zero + ld.d $fp, $sp, 0 # 8-byte Folded Reload ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 ret diff --git a/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/spectral-norm.dir/spectral-norm.s b/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/spectral-norm.dir/spectral-norm.s index e0d2831f..70f438fd 100644 --- a/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/spectral-norm.dir/spectral-norm.s +++ b/results/SingleSource/Benchmarks/BenchmarkGame/CMakeFiles/spectral-norm.dir/spectral-norm.s @@ -223,12 +223,7 @@ eval_AtA_times_u: # @eval_AtA_times_u .Lfunc_end3: .size eval_AtA_times_u, .Lfunc_end3-eval_AtA_times_u # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI4_0: - .dword 0x7ff8000000000000 # double NaN - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -432,8 +427,10 @@ main: # @main fdiv.d $fa0, $fa1, $fa0 b .LBB4_17 .LBB4_16: - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI4_0) + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 2047 + movgr2fr.d $fa0, $a0 .LBB4_17: # %._crit_edge fsqrt.d $fa1, $fa0 fcmp.cor.d $fcc0, $fa1, $fa1 diff --git a/results/SingleSource/Benchmarks/BenchmarkGame/Large/CMakeFiles/fasta.dir/fasta.s b/results/SingleSource/Benchmarks/BenchmarkGame/Large/CMakeFiles/fasta.dir/fasta.s index 0764e227..e1402b5a 100644 --- a/results/SingleSource/Benchmarks/BenchmarkGame/Large/CMakeFiles/fasta.dir/fasta.s +++ b/results/SingleSource/Benchmarks/BenchmarkGame/Large/CMakeFiles/fasta.dir/fasta.s @@ -1,10 +1,6 @@ .file "fasta.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI0_0: - .word 0x4808b000 # float 139968 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -166,9 +162,9 @@ main: # @main lu32i.d $a0, -86783 lu52i.d $s4, $a0, 958 lu12i.w $a0, 34 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.s $fs0, $a1, %pc_lo12(.LCPI0_0) ori $s5, $a0, 704 + lu12i.w $a0, 295051 + movgr2fr.w $fs0, $a0 addi.d $s7, $sp, 19 .p2align 4, , 16 .LBB0_3: # =>This Loop Header: Depth=1 diff --git a/results/SingleSource/Benchmarks/CoyoteBench/CMakeFiles/almabench.dir/almabench.s b/results/SingleSource/Benchmarks/CoyoteBench/CMakeFiles/almabench.dir/almabench.s index a9d9ac06..1bffc986 100644 --- a/results/SingleSource/Benchmarks/CoyoteBench/CMakeFiles/almabench.dir/almabench.s +++ b/results/SingleSource/Benchmarks/CoyoteBench/CMakeFiles/almabench.dir/almabench.s @@ -1,14 +1,8 @@ .file "almabench.c" - .section .rodata.cst8,"aM",@progbits,8 + .section .rodata.cst16,"aM",@progbits,16 .p2align 3, 0x0 # -- Begin function anpm .LCPI0_0: .dword 0x401921fb54442d18 # double 6.2831853071795862 -.LCPI0_2: - .dword 0x400921fb54442d18 # double 3.1415926535897931 - .section .rodata.cst16,"aM",@progbits,16 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0x401921fb54442d18 # double 6.2831853071795862 .dword 0xc01921fb54442d18 # double -6.2831853071795862 .text .globl anpm @@ -18,64 +12,41 @@ anpm: # @anpm # %bb.0: addi.d $sp, $sp, -32 st.d $ra, $sp, 24 # 8-byte Folded Spill + st.d $fp, $sp, 16 # 8-byte Folded Spill movgr2fr.d $fa1, $zero fcmp.clt.d $fcc0, $fa0, $fa1 movcf2gr $a0, $fcc0 - st.d $a0, $sp, 16 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) + st.d $a0, $sp, 8 + lu12i.w $a0, 345154 + ori $fp, $a0, 3352 + lu32i.d $fp, -450053 + lu52i.d $a0, $fp, 1025 + movgr2fr.d $fa1, $a0 pcaddu18i $ra, %call36(fmod) jirl $ra, $ra, 0 fabs.d $fa1, $fa0 - ld.d $a0, $sp, 16 + ld.d $a0, $sp, 8 movgr2cf $fcc0, $a0 movcf2gr $a0, $fcc0 slli.d $a0, $a0, 3 - pcalau12i $a1, %pc_hi20(.LCPI0_1) - addi.d $a1, $a1, %pc_lo12(.LCPI0_1) + pcalau12i $a1, %pc_hi20(.LCPI0_0) + addi.d $a1, $a1, %pc_lo12(.LCPI0_0) fldx.d $fa2, $a1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_2) fsub.d $fa2, $fa0, $fa2 + lu52i.d $a0, $fp, 1024 + movgr2fr.d $fa3, $a0 fcmp.cult.d $fcc0, $fa1, $fa3 fsel $fa0, $fa2, $fa0, $fcc0 + ld.d $fp, $sp, 16 # 8-byte Folded Reload ld.d $ra, $sp, 24 # 8-byte Folded Reload addi.d $sp, $sp, 32 ret .Lfunc_end0: .size anpm, .Lfunc_end0-anpm # -- End function - .section .rodata.cst8,"aM",@progbits,8 + .section .rodata.cst16,"aM",@progbits,16 .p2align 3, 0x0 # -- Begin function planetpv .LCPI1_0: - .dword 0xc142b42c80000000 # double -2451545 -.LCPI1_1: - .dword 0x41164b0800000000 # double 365250 -.LCPI1_2: - .dword 0x40ac200000000000 # double 3600 -.LCPI1_3: - .dword 0x3ed455a5b2ff8f9d # double 4.8481368110953598E-6 -.LCPI1_4: - .dword 0x401921fb54442d18 # double 6.2831853071795862 -.LCPI1_5: - .dword 0x3fd702a41f2e9970 # double 0.35953619999999997 -.LCPI1_6: - .dword 0x3e7ad7f29abcaf48 # double 9.9999999999999995E-8 -.LCPI1_8: - .dword 0x400921fb54442d18 # double 3.1415926535897931 -.LCPI1_9: - .dword 0x3d719799812dea11 # double 9.9999999999999998E-13 -.LCPI1_10: - .dword 0x3f919d6d51a6b69a # double 0.017202098950000001 -.LCPI1_11: - .dword 0xbfd9752e50f4b399 # double -0.39777715593191371 -.LCPI1_12: - .dword 0x3fed5c0357681ef3 # double 0.91748206206918181 -.LCPI1_13: - .dword 0x3fd9752e50f4b399 # double 0.39777715593191371 - .section .rodata.cst16,"aM",@progbits,16 - .p2align 3, 0x0 -.LCPI1_7: .dword 0x401921fb54442d18 # double 6.2831853071795862 .dword 0xc01921fb54442d18 # double -6.2831853071795862 .text @@ -84,687 +55,709 @@ anpm: # @anpm .type planetpv,@function planetpv: # @planetpv # %bb.0: - addi.d $sp, $sp, -608 - st.d $ra, $sp, 600 # 8-byte Folded Spill - st.d $fp, $sp, 592 # 8-byte Folded Spill - st.d $s0, $sp, 584 # 8-byte Folded Spill - st.d $s1, $sp, 576 # 8-byte Folded Spill - st.d $s2, $sp, 568 # 8-byte Folded Spill - st.d $s3, $sp, 560 # 8-byte Folded Spill - st.d $s4, $sp, 552 # 8-byte Folded Spill - st.d $s5, $sp, 544 # 8-byte Folded Spill - st.d $s6, $sp, 536 # 8-byte Folded Spill - st.d $s7, $sp, 528 # 8-byte Folded Spill - st.d $s8, $sp, 520 # 8-byte Folded Spill - fst.d $fs0, $sp, 512 # 8-byte Folded Spill - fst.d $fs1, $sp, 504 # 8-byte Folded Spill - fst.d $fs2, $sp, 496 # 8-byte Folded Spill - fst.d $fs3, $sp, 488 # 8-byte Folded Spill - fst.d $fs4, $sp, 480 # 8-byte Folded Spill - fst.d $fs5, $sp, 472 # 8-byte Folded Spill - fst.d $fs6, $sp, 464 # 8-byte Folded Spill - fst.d $fs7, $sp, 456 # 8-byte Folded Spill - fld.d $fa0, $a0, 0 - pcalau12i $a3, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a3, %pc_lo12(.LCPI1_0) - fld.d $fa2, $a0, 8 - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_1) - st.d $a2, $sp, 432 # 8-byte Folded Spill + addi.d $sp, $sp, -624 + st.d $ra, $sp, 616 # 8-byte Folded Spill + st.d $fp, $sp, 608 # 8-byte Folded Spill + st.d $s0, $sp, 600 # 8-byte Folded Spill + st.d $s1, $sp, 592 # 8-byte Folded Spill + st.d $s2, $sp, 584 # 8-byte Folded Spill + st.d $s3, $sp, 576 # 8-byte Folded Spill + st.d $s4, $sp, 568 # 8-byte Folded Spill + st.d $s5, $sp, 560 # 8-byte Folded Spill + st.d $s6, $sp, 552 # 8-byte Folded Spill + st.d $s7, $sp, 544 # 8-byte Folded Spill + st.d $s8, $sp, 536 # 8-byte Folded Spill + fst.d $fs0, $sp, 528 # 8-byte Folded Spill + fst.d $fs1, $sp, 520 # 8-byte Folded Spill + fst.d $fs2, $sp, 512 # 8-byte Folded Spill + fst.d $fs3, $sp, 504 # 8-byte Folded Spill + fst.d $fs4, $sp, 496 # 8-byte Folded Spill + fst.d $fs5, $sp, 488 # 8-byte Folded Spill + fst.d $fs6, $sp, 480 # 8-byte Folded Spill + fst.d $fs7, $sp, 472 # 8-byte Folded Spill + st.d $a2, $sp, 448 # 8-byte Folded Spill move $fp, $a1 - fadd.d $fa0, $fa0, $fa1 + fld.d $fa0, $a0, 0 + lu12i.w $a1, -524288 + lu32i.d $a1, 177196 + fld.d $fa1, $a0, 8 + lu52i.d $a0, $a1, -1004 + movgr2fr.d $fa2, $a0 fadd.d $fa0, $fa0, $fa2 - fdiv.d $fs0, $fa0, $fa3 - alsl.d $a0, $a1, $a1, 1 - slli.d $s1, $a0, 3 - pcalau12i $a0, %pc_hi20(a) - addi.d $a0, $a0, %pc_lo12(a) - add.d $a1, $a0, $s1 - fldx.d $fa0, $a0, $s1 + fadd.d $fa0, $fa0, $fa1 + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 412424 + lu52i.d $a1, $a1, 1041 + movgr2fr.d $fa1, $a1 + fdiv.d $fs0, $fa0, $fa1 + alsl.d $a1, $fp, $fp, 1 + slli.d $s2, $a1, 3 + pcalau12i $a1, %pc_hi20(a) + addi.d $a1, $a1, %pc_lo12(a) + add.d $a2, $a1, $s2 + fldx.d $fa0, $a1, $s2 + fst.d $fa0, $sp, 416 # 8-byte Folded Spill + fld.d $fa0, $a2, 8 + fst.d $fa0, $sp, 408 # 8-byte Folded Spill + fld.d $fa0, $a2, 16 fst.d $fa0, $sp, 400 # 8-byte Folded Spill - fld.d $fa0, $a1, 8 - fst.d $fa0, $sp, 392 # 8-byte Folded Spill - fld.d $fa0, $a1, 16 - fst.d $fa0, $sp, 384 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(dlm) - addi.d $a0, $a0, %pc_lo12(dlm) - add.d $a1, $a0, $s1 - fldx.d $fa0, $a0, $s1 - fld.d $fa1, $a1, 8 - fld.d $fa2, $a1, 16 - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fs2, $a0, %pc_lo12(.LCPI1_2) - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fs1, $a0, %pc_lo12(.LCPI1_3) - fmadd.d $fa1, $fa2, $fs0, $fa1 - fmul.d $fa1, $fs0, $fa1 - fmadd.d $fa0, $fa0, $fs2, $fa1 + pcalau12i $a1, %pc_hi20(dlm) + addi.d $a1, $a1, %pc_lo12(dlm) + add.d $a2, $a1, $s2 + fld.d $fa0, $a2, 8 + fld.d $fa1, $a2, 16 + fldx.d $fa2, $a1, $s2 + fmadd.d $fa0, $fa1, $fs0, $fa0 + fmul.d $fa0, $fs0, $fa0 + lu32i.d $a0, -253952 + lu52i.d $a0, $a0, 1034 + movgr2fr.d $fs2, $a0 + fmadd.d $fa0, $fa2, $fs2, $fa0 + lu12i.w $a0, -315400 + ori $a0, $a0, 3997 + lu32i.d $a0, 284069 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fs1, $a0 fmul.d $fs4, $fa0, $fs1 pcalau12i $a0, %pc_hi20(e) addi.d $a0, $a0, %pc_lo12(e) - add.d $a1, $a0, $s1 - fldx.d $fa0, $a0, $s1 - fst.d $fa0, $sp, 136 # 8-byte Folded Spill + add.d $a1, $a0, $s2 + fldx.d $fa0, $a0, $s2 + fst.d $fa0, $sp, 176 # 8-byte Folded Spill fld.d $fa0, $a1, 8 - fst.d $fa0, $sp, 448 # 8-byte Folded Spill + fst.d $fa0, $sp, 464 # 8-byte Folded Spill fld.d $fa0, $a1, 16 - fst.d $fa0, $sp, 176 # 8-byte Folded Spill + fst.d $fa0, $sp, 200 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(pi) addi.d $a0, $a0, %pc_lo12(pi) - add.d $a1, $a0, $s1 + add.d $a1, $a0, $s2 fld.d $fa0, $a1, 8 fld.d $fa1, $a1, 16 - fldx.d $fa2, $a0, $s1 + fldx.d $fa2, $a0, $s2 fmadd.d $fa0, $fa1, $fs0, $fa0 fmul.d $fa0, $fs0, $fa0 fmadd.d $fa0, $fa2, $fs2, $fa0 fmul.d $fa0, $fa0, $fs1 movgr2fr.d $fa1, $zero - fst.d $fa1, $sp, 352 # 8-byte Folded Spill + fst.d $fa1, $sp, 368 # 8-byte Folded Spill fcmp.clt.d $fcc0, $fa0, $fa1 movcf2gr $a0, $fcc0 - st.d $a0, $sp, 128 - pcalau12i $a0, %pc_hi20(.LCPI1_4) - fld.d $fs3, $a0, %pc_lo12(.LCPI1_4) + st.d $a0, $sp, 144 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + st.d $a0, $sp, 136 # 8-byte Folded Spill + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fs3, $a0 fmov.d $fa1, $fs3 - fst.d $fs3, $sp, 104 # 8-byte Folded Spill + fst.d $fs3, $sp, 112 # 8-byte Folded Spill pcaddu18i $ra, %call36(fmod) jirl $ra, $ra, 0 - fst.d $fa0, $sp, 112 # 8-byte Folded Spill + fst.d $fa0, $sp, 128 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(dinc) addi.d $a0, $a0, %pc_lo12(dinc) - add.d $a1, $a0, $s1 - fldx.d $fa0, $a0, $s1 - fst.d $fa0, $sp, 376 # 8-byte Folded Spill + add.d $a1, $a0, $s2 + fldx.d $fa0, $a0, $s2 + fst.d $fa0, $sp, 392 # 8-byte Folded Spill fld.d $fa0, $a1, 8 - fst.d $fa0, $sp, 368 # 8-byte Folded Spill + fst.d $fa0, $sp, 384 # 8-byte Folded Spill fld.d $fa0, $a1, 16 - fst.d $fa0, $sp, 360 # 8-byte Folded Spill + fst.d $fa0, $sp, 376 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(omega) addi.d $a0, $a0, %pc_lo12(omega) - add.d $a1, $a0, $s1 + add.d $a1, $a0, $s2 fld.d $fa0, $a1, 8 fld.d $fa1, $a1, 16 - fldx.d $fa2, $a0, $s1 + fldx.d $fa2, $a0, $s2 fmadd.d $fa0, $fa1, $fs0, $fa0 fmul.d $fa0, $fs0, $fa0 - fst.d $fs2, $sp, 408 # 8-byte Folded Spill + fst.d $fs2, $sp, 424 # 8-byte Folded Spill fmadd.d $fa0, $fa2, $fs2, $fa0 - fst.d $fs1, $sp, 416 # 8-byte Folded Spill + fst.d $fs1, $sp, 432 # 8-byte Folded Spill fmul.d $fa0, $fa0, $fs1 - fst.d $fa0, $sp, 344 # 8-byte Folded Spill + fst.d $fa0, $sp, 360 # 8-byte Folded Spill fmov.d $fa1, $fs3 pcaddu18i $ra, %call36(fmod) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_5) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_5) - fst.d $fa0, $sp, 440 # 8-byte Folded Spill - fmul.d $fs3, $fs0, $fa1 + fst.d $fa0, $sp, 456 # 8-byte Folded Spill + lu12i.w $a0, 127721 + ori $a0, $a0, 2416 + lu32i.d $a0, 459428 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa0, $a0 + fmul.d $fs2, $fs0, $fa0 alsl.d $a0, $fp, $fp, 3 - slli.d $s7, $a0, 3 + slli.d $s8, $a0, 3 pcalau12i $a0, %pc_hi20(kp) addi.d $a0, $a0, %pc_lo12(kp) - add.d $a1, $a0, $s7 - st.d $a1, $sp, 64 # 8-byte Folded Spill + add.d $a1, $a0, $s8 + st.d $a1, $sp, 72 # 8-byte Folded Spill slli.d $a1, $fp, 6 - st.d $fp, $sp, 424 # 8-byte Folded Spill - alsl.d $s8, $fp, $a1, 4 + st.d $fp, $sp, 440 # 8-byte Folded Spill + alsl.d $fp, $fp, $a1, 4 pcalau12i $a1, %pc_hi20(kq) addi.d $a1, $a1, %pc_lo12(kq) - add.d $s1, $a1, $s8 + add.d $s2, $a1, $fp pcalau12i $a2, %pc_hi20(ca) addi.d $a2, $a2, %pc_lo12(ca) - add.d $s6, $a2, $s7 + add.d $s7, $a2, $s8 pcalau12i $a3, %pc_hi20(sa) - addi.d $fp, $a3, %pc_lo12(sa) - add.d $s5, $fp, $s7 + addi.d $s0, $a3, %pc_lo12(sa) + add.d $s6, $s0, $s8 pcalau12i $a3, %pc_hi20(cl) - addi.d $s0, $a3, %pc_lo12(cl) - add.d $s2, $s0, $s8 + addi.d $s1, $a3, %pc_lo12(cl) + add.d $s3, $s1, $fp pcalau12i $a3, %pc_hi20(sl) - addi.d $s4, $a3, %pc_lo12(sl) - fldx.d $fa0, $a0, $s7 - fldx.d $fa1, $a1, $s8 - fldx.d $fa2, $a2, $s7 - fst.d $fa2, $sp, 336 # 8-byte Folded Spill - add.d $s3, $s4, $s8 - fmul.d $fs1, $fs3, $fa0 - fmul.d $fs2, $fs3, $fa1 + addi.d $s5, $a3, %pc_lo12(sl) + fldx.d $fa0, $a0, $s8 + fldx.d $fa1, $a1, $fp + fldx.d $fa2, $a2, $s8 + fst.d $fa2, $sp, 352 # 8-byte Folded Spill + add.d $s4, $s5, $fp + fmul.d $fs1, $fs2, $fa0 + fmul.d $fs3, $fs2, $fa1 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fldx.d $fa1, $fp, $s7 - fst.d $fa1, $sp, 328 # 8-byte Folded Spill - fst.d $fa0, $sp, 320 # 8-byte Folded Spill + fldx.d $fa1, $s0, $s8 + fst.d $fa1, $sp, 344 # 8-byte Folded Spill + fst.d $fa0, $sp, 336 # 8-byte Folded Spill fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fldx.d $fs5, $s0, $s8 - fst.d $fa0, $sp, 312 # 8-byte Folded Spill - fmov.d $fa0, $fs2 + fldx.d $fs6, $s1, $fp + fst.d $fa0, $sp, 328 # 8-byte Folded Spill + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fldx.d $fs7, $s4, $s8 + fldx.d $fs7, $s5, $fp fmov.d $fs1, $fa0 - fmov.d $fa0, $fs2 + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 fmul.d $fa0, $fs7, $fa0 - fmadd.d $fa0, $fs5, $fs1, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI1_6) - fld.d $fs7, $a0, %pc_lo12(.LCPI1_6) - ld.d $fp, $sp, 64 # 8-byte Folded Reload + fmadd.d $fa0, $fs6, $fs1, $fa0 + lu12i.w $a0, -414774 + ori $a0, $a0, 3912 + lu32i.d $a0, -337934 + lu52i.d $a0, $a0, 999 + movgr2fr.d $fs7, $a0 + ld.d $fp, $sp, 72 # 8-byte Folded Reload fld.d $fa1, $fp, 8 - fld.d $fa2, $s1, 8 - fld.d $fa3, $s6, 8 - fst.d $fa3, $sp, 304 # 8-byte Folded Spill + fld.d $fa2, $s2, 8 + fld.d $fa3, $s7, 8 + fst.d $fa3, $sp, 320 # 8-byte Folded Spill fmadd.d $fs4, $fa0, $fs7, $fs4 - fmul.d $fs1, $fs3, $fa1 - fmul.d $fs2, $fs3, $fa2 + fmul.d $fs1, $fs2, $fa1 + fmul.d $fs3, $fs2, $fa2 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fa1, $s5, 8 - fst.d $fa1, $sp, 296 # 8-byte Folded Spill - fst.d $fa0, $sp, 288 # 8-byte Folded Spill + fld.d $fa1, $s6, 8 + fst.d $fa1, $sp, 312 # 8-byte Folded Spill + fst.d $fa0, $sp, 304 # 8-byte Folded Spill fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fs5, $s2, 8 - fst.d $fa0, $sp, 280 # 8-byte Folded Spill - fmov.d $fa0, $fs2 + fld.d $fs6, $s3, 8 + fst.d $fa0, $sp, 296 # 8-byte Folded Spill + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fs6, $s3, 8 + fld.d $fs5, $s4, 8 fmov.d $fs1, $fa0 - fmov.d $fa0, $fs2 + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fmul.d $fa0, $fs6, $fa0 - fmadd.d $fa0, $fs5, $fs1, $fa0 + fmul.d $fa0, $fs5, $fa0 + fmadd.d $fa0, $fs6, $fs1, $fa0 fld.d $fa1, $fp, 16 - fld.d $fa2, $s1, 16 - fld.d $fa3, $s6, 16 - fst.d $fa3, $sp, 272 # 8-byte Folded Spill + fld.d $fa2, $s2, 16 + fld.d $fa3, $s7, 16 + fst.d $fa3, $sp, 288 # 8-byte Folded Spill fmadd.d $fs4, $fa0, $fs7, $fs4 - fmul.d $fs1, $fs3, $fa1 - fmul.d $fs2, $fs3, $fa2 + fmul.d $fs1, $fs2, $fa1 + fmul.d $fs3, $fs2, $fa2 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fa1, $s5, 16 - fst.d $fa1, $sp, 264 # 8-byte Folded Spill - fst.d $fa0, $sp, 256 # 8-byte Folded Spill + fld.d $fa1, $s6, 16 + fst.d $fa1, $sp, 280 # 8-byte Folded Spill + fst.d $fa0, $sp, 272 # 8-byte Folded Spill fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fs5, $s2, 16 - fst.d $fa0, $sp, 248 # 8-byte Folded Spill - fmov.d $fa0, $fs2 + fld.d $fs5, $s3, 16 + fst.d $fa0, $sp, 264 # 8-byte Folded Spill + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fs6, $s3, 16 + fld.d $fs6, $s4, 16 fmov.d $fs1, $fa0 - fmov.d $fa0, $fs2 + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 fmul.d $fa0, $fs6, $fa0 fmadd.d $fa0, $fs5, $fs1, $fa0 fld.d $fa1, $fp, 24 - fld.d $fa2, $s1, 24 - fld.d $fa3, $s6, 24 - fst.d $fa3, $sp, 240 # 8-byte Folded Spill + fld.d $fa2, $s2, 24 + fld.d $fa3, $s7, 24 + fst.d $fa3, $sp, 256 # 8-byte Folded Spill fmadd.d $fs4, $fa0, $fs7, $fs4 - fmul.d $fs1, $fs3, $fa1 - fmul.d $fs2, $fs3, $fa2 + fmul.d $fs1, $fs2, $fa1 + fmul.d $fs3, $fs2, $fa2 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fa1, $s5, 24 - fst.d $fa1, $sp, 232 # 8-byte Folded Spill - fst.d $fa0, $sp, 224 # 8-byte Folded Spill + fld.d $fa1, $s6, 24 + fst.d $fa1, $sp, 248 # 8-byte Folded Spill + fst.d $fa0, $sp, 240 # 8-byte Folded Spill fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fs5, $s2, 24 - fst.d $fa0, $sp, 216 # 8-byte Folded Spill - fmov.d $fa0, $fs2 + fld.d $fs5, $s3, 24 + fst.d $fa0, $sp, 232 # 8-byte Folded Spill + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fs6, $s3, 24 + fld.d $fs6, $s4, 24 fmov.d $fs1, $fa0 - fmov.d $fa0, $fs2 + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 fmul.d $fa0, $fs6, $fa0 fmadd.d $fa0, $fs5, $fs1, $fa0 fld.d $fa1, $fp, 32 - fld.d $fa2, $s1, 32 - fld.d $fa3, $s6, 32 - fst.d $fa3, $sp, 208 # 8-byte Folded Spill + fld.d $fa2, $s2, 32 + fld.d $fa3, $s7, 32 + fst.d $fa3, $sp, 224 # 8-byte Folded Spill fmadd.d $fs4, $fa0, $fs7, $fs4 - fmul.d $fs1, $fs3, $fa1 - fmul.d $fs2, $fs3, $fa2 + fmul.d $fs1, $fs2, $fa1 + fmul.d $fs3, $fs2, $fa2 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fa1, $s5, 32 - fst.d $fa1, $sp, 200 # 8-byte Folded Spill - fst.d $fa0, $sp, 192 # 8-byte Folded Spill + fld.d $fa1, $s6, 32 + fst.d $fa1, $sp, 216 # 8-byte Folded Spill + fst.d $fa0, $sp, 208 # 8-byte Folded Spill fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fs5, $s2, 32 - fst.d $fa0, $sp, 184 # 8-byte Folded Spill - fmov.d $fa0, $fs2 + fld.d $fs5, $s3, 32 + fst.d $fa0, $sp, 192 # 8-byte Folded Spill + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fs6, $s3, 32 + fld.d $fs6, $s4, 32 fmov.d $fs1, $fa0 - fmov.d $fa0, $fs2 + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 fmul.d $fa0, $fs6, $fa0 fmadd.d $fa0, $fs5, $fs1, $fa0 fld.d $fa1, $fp, 40 - fld.d $fa2, $s1, 40 - fld.d $fa3, $s6, 40 - fst.d $fa3, $sp, 168 # 8-byte Folded Spill + fld.d $fa2, $s2, 40 + fld.d $fa3, $s7, 40 + fst.d $fa3, $sp, 184 # 8-byte Folded Spill fmadd.d $fs4, $fa0, $fs7, $fs4 - fmul.d $fs1, $fs3, $fa1 - fmul.d $fs2, $fs3, $fa2 + fmul.d $fs1, $fs2, $fa1 + fmul.d $fs3, $fs2, $fa2 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fa1, $s5, 40 - fst.d $fa1, $sp, 160 # 8-byte Folded Spill - fst.d $fa0, $sp, 152 # 8-byte Folded Spill + fld.d $fa1, $s6, 40 + fst.d $fa1, $sp, 168 # 8-byte Folded Spill + fst.d $fa0, $sp, 160 # 8-byte Folded Spill fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fs5, $s2, 40 - fst.d $fa0, $sp, 144 # 8-byte Folded Spill - fmov.d $fa0, $fs2 + fld.d $fs5, $s3, 40 + fst.d $fa0, $sp, 152 # 8-byte Folded Spill + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fs6, $s3, 40 + fld.d $fs6, $s4, 40 fmov.d $fs1, $fa0 - fmov.d $fa0, $fs2 + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 fmul.d $fa0, $fs6, $fa0 fmadd.d $fa0, $fs5, $fs1, $fa0 fld.d $fa1, $fp, 48 - fld.d $fa2, $s1, 48 - fld.d $fa3, $s6, 48 + fld.d $fa2, $s2, 48 + fld.d $fa3, $s7, 48 fst.d $fa3, $sp, 120 # 8-byte Folded Spill fmadd.d $fs4, $fa0, $fs7, $fs4 - fmul.d $fs1, $fs3, $fa1 - fmul.d $fs2, $fs3, $fa2 + fmul.d $fs1, $fs2, $fa1 + fmul.d $fs3, $fs2, $fa2 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fa1, $s5, 48 - fst.d $fa1, $sp, 96 # 8-byte Folded Spill - fst.d $fa0, $sp, 88 # 8-byte Folded Spill + fld.d $fa1, $s6, 48 + fst.d $fa1, $sp, 104 # 8-byte Folded Spill + fst.d $fa0, $sp, 96 # 8-byte Folded Spill fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fs5, $s2, 48 - fst.d $fa0, $sp, 80 # 8-byte Folded Spill - fmov.d $fa0, $fs2 + fld.d $fs5, $s3, 48 + fst.d $fa0, $sp, 88 # 8-byte Folded Spill + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fs6, $s3, 48 + fld.d $fs6, $s4, 48 fmov.d $fs1, $fa0 - fmov.d $fa0, $fs2 + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 fmul.d $fa0, $fs6, $fa0 fmadd.d $fa0, $fs5, $fs1, $fa0 fld.d $fa1, $fp, 56 - fld.d $fa2, $s1, 56 - fld.d $fa3, $s6, 56 - fst.d $fa3, $sp, 72 # 8-byte Folded Spill + fld.d $fa2, $s2, 56 + fld.d $fa3, $s7, 56 + fst.d $fa3, $sp, 80 # 8-byte Folded Spill fmadd.d $fs1, $fa0, $fs7, $fs4 - fmul.d $fs2, $fs3, $fa1 - fmul.d $fs4, $fs3, $fa2 - fmov.d $fa0, $fs2 + fmul.d $fs3, $fs2, $fa1 + fmul.d $fs4, $fs2, $fa2 + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fa1, $s5, 56 - fst.d $fa1, $sp, 56 # 8-byte Folded Spill - fst.d $fa0, $sp, 48 # 8-byte Folded Spill - fmov.d $fa0, $fs2 + fld.d $fa1, $s6, 56 + fst.d $fa1, $sp, 64 # 8-byte Folded Spill + fst.d $fa0, $sp, 56 # 8-byte Folded Spill + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fs2, $s2, 56 - fst.d $fa0, $sp, 24 # 8-byte Folded Spill + fld.d $fs3, $s3, 56 + fst.d $fa0, $sp, 32 # 8-byte Folded Spill fmov.d $fa0, $fs4 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fs6, $s3, 56 - fmov.d $fs5, $fa0 + fld.d $fs5, $s4, 56 + fmov.d $fs6, $fa0 fmov.d $fa0, $fs4 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fmul.d $fa0, $fs6, $fa0 + fmul.d $fa0, $fs5, $fa0 fld.d $fa1, $fp, 64 - fld.d $fa2, $s6, 64 - fst.d $fa2, $sp, 64 # 8-byte Folded Spill - fmadd.d $fa0, $fs2, $fs5, $fa0 - fmadd.d $fs2, $fa0, $fs7, $fs1 - fmul.d $fs4, $fs3, $fa1 + fld.d $fa2, $s7, 64 + fst.d $fa2, $sp, 72 # 8-byte Folded Spill + fmadd.d $fa0, $fs3, $fs6, $fa0 + fmadd.d $fs3, $fa0, $fs7, $fs1 + fmul.d $fs4, $fs2, $fa1 fmov.d $fa0, $fs4 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fa1, $s5, 64 - fst.d $fa1, $sp, 40 # 8-byte Folded Spill - fst.d $fa0, $sp, 32 # 8-byte Folded Spill + fld.d $fa1, $s6, 64 + fst.d $fa1, $sp, 48 # 8-byte Folded Spill + fst.d $fa0, $sp, 40 # 8-byte Folded Spill fmov.d $fa0, $fs4 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fa1, $s1, 64 - fld.d $fs4, $s2, 64 - fst.d $fa0, $sp, 16 # 8-byte Folded Spill - fmul.d $fs5, $fs3, $fa1 - fmov.d $fa0, $fs5 + fld.d $fa1, $s2, 64 + fld.d $fs4, $s3, 64 + fst.d $fa0, $sp, 24 # 8-byte Folded Spill + fmul.d $fs6, $fs2, $fa1 + fmov.d $fa0, $fs6 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fs6, $s3, 64 + fld.d $fs5, $s4, 64 fmov.d $fs1, $fa0 - fmov.d $fa0, $fs5 + fmov.d $fa0, $fs6 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fmul.d $fa0, $fs6, $fa0 + fmul.d $fa0, $fs5, $fa0 fmadd.d $fa0, $fs4, $fs1, $fa0 - fld.d $fa1, $s1, 72 - fld.d $fs4, $s2, 72 + fld.d $fa1, $s2, 72 + fld.d $fs4, $s3, 72 fmul.d $fa0, $fs0, $fa0 - fmadd.d $fs2, $fa0, $fs7, $fs2 - fmul.d $fs1, $fs3, $fa1 + fmadd.d $fs3, $fa0, $fs7, $fs3 + fmul.d $fs1, $fs2, $fa1 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fld.d $fs3, $s3, 72 - fmov.d $fs5, $fa0 + fld.d $fs2, $s4, 72 + fmov.d $fs6, $fa0 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fmul.d $fa0, $fs3, $fa0 - fmadd.d $fa0, $fs4, $fs5, $fa0 + fmul.d $fa0, $fs2, $fa0 + fmadd.d $fa0, $fs4, $fs6, $fa0 fmul.d $fa0, $fs0, $fa0 - fmadd.d $fa0, $fa0, $fs7, $fs2 - fld.d $fa1, $sp, 448 # 8-byte Folded Reload - fld.d $fa2, $sp, 176 # 8-byte Folded Reload + fmadd.d $fa0, $fa0, $fs7, $fs3 + fld.d $fa1, $sp, 464 # 8-byte Folded Reload + fld.d $fa2, $sp, 200 # 8-byte Folded Reload fmadd.d $fs2, $fa2, $fs0, $fa1 - fld.d $fs3, $sp, 136 # 8-byte Folded Reload - fmadd.d $fs5, $fs2, $fs0, $fs3 - fmov.d $fs6, $fs0 - fld.d $fa3, $sp, 112 # 8-byte Folded Reload + fld.d $fs3, $sp, 176 # 8-byte Folded Reload + fmadd.d $fs6, $fs2, $fs0, $fs3 + fmov.d $fs5, $fs0 + fld.d $fa3, $sp, 128 # 8-byte Folded Reload fabs.d $fa1, $fa3 - ld.d $a0, $sp, 128 + ld.d $a0, $sp, 144 movgr2cf $fcc0, $a0 movcf2gr $a0, $fcc0 slli.d $a0, $a0, 3 - pcalau12i $a1, %pc_hi20(.LCPI1_7) - addi.d $s1, $a1, %pc_lo12(.LCPI1_7) - fldx.d $fa2, $s1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI1_8) - fld.d $fa4, $a0, %pc_lo12(.LCPI1_8) + pcalau12i $a1, %pc_hi20(.LCPI1_0) + addi.d $s2, $a1, %pc_lo12(.LCPI1_0) + fldx.d $fa2, $s2, $a0 fsub.d $fa2, $fa3, $fa2 - fst.d $fa4, $sp, 128 # 8-byte Folded Spill + ld.d $a0, $sp, 136 # 8-byte Folded Reload + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa4, $a0 + fst.d $fa4, $sp, 144 # 8-byte Folded Spill fcmp.cult.d $fcc0, $fa1, $fa4 fsel $fs4, $fa2, $fa3, $fcc0 - fld.d $fa1, $sp, 104 # 8-byte Folded Reload + fld.d $fa1, $sp, 112 # 8-byte Folded Reload pcaddu18i $ra, %call36(fmod) jirl $ra, $ra, 0 fsub.d $fs0, $fa0, $fs4 fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fmadd.d $fs1, $fs5, $fa0, $fs0 - fst.d $fs6, $sp, 112 # 8-byte Folded Spill - fnmadd.d $fs6, $fs2, $fs6, $fs3 - fst.d $fs0, $sp, 104 # 8-byte Folded Spill + fmadd.d $fs1, $fs6, $fa0, $fs0 + fst.d $fs5, $sp, 136 # 8-byte Folded Spill + fnmadd.d $fs5, $fs2, $fs5, $fs3 + fmov.d $fs3, $fs0 fsub.d $fs2, $fs0, $fs1 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fmadd.d $fs3, $fs5, $fa0, $fs2 + fmadd.d $fs2, $fs6, $fa0, $fs2 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 vldi $vr1, -912 - pcalau12i $a0, %pc_hi20(.LCPI1_9) - fld.d $fs2, $a0, %pc_lo12(.LCPI1_9) - fmadd.d $fa0, $fs6, $fa0, $fa1 - fdiv.d $fa0, $fs3, $fa0 + fmadd.d $fa0, $fs5, $fa0, $fa1 + fdiv.d $fa0, $fs2, $fa0 fabs.d $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs2 - fadd.d $fs3, $fs1, $fa0 - fst.d $fs5, $sp, 448 # 8-byte Folded Spill - fst.d $fs4, $sp, 176 # 8-byte Folded Spill - fst.d $fs6, $sp, 136 # 8-byte Folded Spill + lu12i.w $a0, -519458 + ori $a0, $a0, 2577 + lu32i.d $a0, 104345 + lu52i.d $a0, $a0, 983 + movgr2fr.d $fs0, $a0 + fcmp.clt.d $fcc0, $fa1, $fs0 + fadd.d $fs2, $fs1, $fa0 + fst.d $fs6, $sp, 464 # 8-byte Folded Spill + fst.d $fs4, $sp, 200 # 8-byte Folded Spill + fst.d $fs5, $sp, 176 # 8-byte Folded Spill bcnez $fcc0, .LBB1_10 # %bb.1: - fld.d $fs0, $sp, 104 # 8-byte Folded Reload - fsub.d $fs1, $fs0, $fs3 - fmov.d $fa0, $fs3 + fsub.d $fs1, $fs3, $fs2 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fa1, $sp, 448 # 8-byte Folded Reload + fld.d $fa1, $sp, 464 # 8-byte Folded Reload fmadd.d $fs1, $fa1, $fa0, $fs1 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 vldi $vr1, -912 - fmadd.d $fa0, $fs6, $fa0, $fa1 + fmadd.d $fa0, $fs5, $fa0, $fa1 fdiv.d $fa0, $fs1, $fa0 fabs.d $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs2 - fadd.d $fs3, $fs3, $fa0 + fcmp.clt.d $fcc0, $fa1, $fs0 + fadd.d $fs2, $fs2, $fa0 bcnez $fcc0, .LBB1_10 # %bb.2: - fsub.d $fs1, $fs0, $fs3 - fmov.d $fa0, $fs3 + fsub.d $fs1, $fs3, $fs2 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fa1, $sp, 448 # 8-byte Folded Reload + fld.d $fa1, $sp, 464 # 8-byte Folded Reload fmadd.d $fs1, $fa1, $fa0, $fs1 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 vldi $vr1, -912 - fmadd.d $fa0, $fs6, $fa0, $fa1 + fmadd.d $fa0, $fs5, $fa0, $fa1 fdiv.d $fa0, $fs1, $fa0 fabs.d $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs2 - fadd.d $fs3, $fs3, $fa0 + fcmp.clt.d $fcc0, $fa1, $fs0 + fadd.d $fs2, $fs2, $fa0 bcnez $fcc0, .LBB1_10 # %bb.3: - fsub.d $fs1, $fs0, $fs3 - fmov.d $fa0, $fs3 + fsub.d $fs1, $fs3, $fs2 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fa1, $sp, 448 # 8-byte Folded Reload + fld.d $fa1, $sp, 464 # 8-byte Folded Reload fmadd.d $fs1, $fa1, $fa0, $fs1 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 vldi $vr1, -912 - fmadd.d $fa0, $fs6, $fa0, $fa1 + fmadd.d $fa0, $fs5, $fa0, $fa1 fdiv.d $fa0, $fs1, $fa0 fabs.d $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs2 - fadd.d $fs3, $fs3, $fa0 + fcmp.clt.d $fcc0, $fa1, $fs0 + fadd.d $fs2, $fs2, $fa0 bcnez $fcc0, .LBB1_10 # %bb.4: - fsub.d $fs1, $fs0, $fs3 - fmov.d $fa0, $fs3 + fsub.d $fs1, $fs3, $fs2 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fa1, $sp, 448 # 8-byte Folded Reload + fld.d $fa1, $sp, 464 # 8-byte Folded Reload fmadd.d $fs1, $fa1, $fa0, $fs1 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 vldi $vr1, -912 - fmadd.d $fa0, $fs6, $fa0, $fa1 + fmadd.d $fa0, $fs5, $fa0, $fa1 fdiv.d $fa0, $fs1, $fa0 fabs.d $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs2 - fadd.d $fs3, $fs3, $fa0 + fcmp.clt.d $fcc0, $fa1, $fs0 + fadd.d $fs2, $fs2, $fa0 bcnez $fcc0, .LBB1_10 # %bb.5: - fsub.d $fs1, $fs0, $fs3 - fmov.d $fa0, $fs3 + fsub.d $fs1, $fs3, $fs2 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fa1, $sp, 448 # 8-byte Folded Reload + fld.d $fa1, $sp, 464 # 8-byte Folded Reload fmadd.d $fs1, $fa1, $fa0, $fs1 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 vldi $vr1, -912 - fmadd.d $fa0, $fs6, $fa0, $fa1 + fmadd.d $fa0, $fs5, $fa0, $fa1 fdiv.d $fa0, $fs1, $fa0 fabs.d $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs2 - fadd.d $fs3, $fs3, $fa0 + fcmp.clt.d $fcc0, $fa1, $fs0 + fadd.d $fs2, $fs2, $fa0 bcnez $fcc0, .LBB1_10 # %bb.6: - fsub.d $fs1, $fs0, $fs3 - fmov.d $fa0, $fs3 + fsub.d $fs1, $fs3, $fs2 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fa1, $sp, 448 # 8-byte Folded Reload + fld.d $fa1, $sp, 464 # 8-byte Folded Reload fmadd.d $fs1, $fa1, $fa0, $fs1 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 vldi $vr1, -912 - fmadd.d $fa0, $fs6, $fa0, $fa1 + fmadd.d $fa0, $fs5, $fa0, $fa1 fdiv.d $fa0, $fs1, $fa0 fabs.d $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs2 - fadd.d $fs3, $fs3, $fa0 + fcmp.clt.d $fcc0, $fa1, $fs0 + fadd.d $fs2, $fs2, $fa0 bcnez $fcc0, .LBB1_10 # %bb.7: - fsub.d $fs1, $fs0, $fs3 - fmov.d $fa0, $fs3 + fsub.d $fs1, $fs3, $fs2 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fa1, $sp, 448 # 8-byte Folded Reload + fld.d $fa1, $sp, 464 # 8-byte Folded Reload fmadd.d $fs1, $fa1, $fa0, $fs1 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 vldi $vr1, -912 - fmadd.d $fa0, $fs6, $fa0, $fa1 + fmadd.d $fa0, $fs5, $fa0, $fa1 fdiv.d $fa0, $fs1, $fa0 fabs.d $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs2 - fadd.d $fs3, $fs3, $fa0 + fcmp.clt.d $fcc0, $fa1, $fs0 + fadd.d $fs2, $fs2, $fa0 bcnez $fcc0, .LBB1_10 # %bb.8: - fsub.d $fs1, $fs0, $fs3 - fmov.d $fa0, $fs3 + fsub.d $fs1, $fs3, $fs2 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fa1, $sp, 448 # 8-byte Folded Reload + fld.d $fa1, $sp, 464 # 8-byte Folded Reload fmadd.d $fs1, $fa1, $fa0, $fs1 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 vldi $vr1, -912 - fmadd.d $fa0, $fs6, $fa0, $fa1 + fmadd.d $fa0, $fs5, $fa0, $fa1 fdiv.d $fa0, $fs1, $fa0 fabs.d $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs2 - fadd.d $fs3, $fs3, $fa0 + fcmp.clt.d $fcc0, $fa1, $fs0 + fadd.d $fs2, $fs2, $fa0 bcnez $fcc0, .LBB1_10 # %bb.9: - fsub.d $fs1, $fs0, $fs3 - fmov.d $fa0, $fs3 + fsub.d $fs1, $fs3, $fs2 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fld.d $fa1, $sp, 448 # 8-byte Folded Reload + fld.d $fa1, $sp, 464 # 8-byte Folded Reload fmadd.d $fs1, $fa1, $fa0, $fs1 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 vldi $vr1, -912 - fmadd.d $fa0, $fs6, $fa0, $fa1 + fmadd.d $fa0, $fs5, $fa0, $fa1 fdiv.d $fa0, $fs1, $fa0 - fadd.d $fs3, $fs3, $fa0 + fadd.d $fs2, $fs2, $fa0 .LBB1_10: - fld.d $fa0, $sp, 56 # 8-byte Folded Reload - fld.d $fa1, $sp, 24 # 8-byte Folded Reload + fld.d $fa0, $sp, 64 # 8-byte Folded Reload + fld.d $fa1, $sp, 32 # 8-byte Folded Reload fmul.d $fa0, $fa0, $fa1 - fld.d $fa1, $sp, 72 # 8-byte Folded Reload - fld.d $fa2, $sp, 48 # 8-byte Folded Reload + fld.d $fa1, $sp, 80 # 8-byte Folded Reload + fld.d $fa2, $sp, 56 # 8-byte Folded Reload fmadd.d $fa0, $fa1, $fa2, $fa0 - fld.d $fa1, $sp, 96 # 8-byte Folded Reload - fld.d $fa2, $sp, 80 # 8-byte Folded Reload + fld.d $fa1, $sp, 104 # 8-byte Folded Reload + fld.d $fa2, $sp, 88 # 8-byte Folded Reload fmul.d $fa1, $fa1, $fa2 fld.d $fa2, $sp, 120 # 8-byte Folded Reload - fld.d $fa3, $sp, 88 # 8-byte Folded Reload + fld.d $fa3, $sp, 96 # 8-byte Folded Reload fmadd.d $fa1, $fa2, $fa3, $fa1 - fld.d $fa2, $sp, 160 # 8-byte Folded Reload - fld.d $fa3, $sp, 144 # 8-byte Folded Reload + fld.d $fa2, $sp, 168 # 8-byte Folded Reload + fld.d $fa3, $sp, 152 # 8-byte Folded Reload fmul.d $fa2, $fa2, $fa3 - fld.d $fa3, $sp, 168 # 8-byte Folded Reload - fld.d $fa4, $sp, 152 # 8-byte Folded Reload + fld.d $fa3, $sp, 184 # 8-byte Folded Reload + fld.d $fa4, $sp, 160 # 8-byte Folded Reload fmadd.d $fa2, $fa3, $fa4, $fa2 - fld.d $fa3, $sp, 200 # 8-byte Folded Reload - fld.d $fa4, $sp, 184 # 8-byte Folded Reload + fld.d $fa3, $sp, 216 # 8-byte Folded Reload + fld.d $fa4, $sp, 192 # 8-byte Folded Reload fmul.d $fa3, $fa3, $fa4 - fld.d $fa4, $sp, 208 # 8-byte Folded Reload - fld.d $fa5, $sp, 192 # 8-byte Folded Reload + fld.d $fa4, $sp, 224 # 8-byte Folded Reload + fld.d $fa5, $sp, 208 # 8-byte Folded Reload fmadd.d $fa3, $fa4, $fa5, $fa3 - fld.d $fa4, $sp, 232 # 8-byte Folded Reload - fld.d $fa5, $sp, 216 # 8-byte Folded Reload + fld.d $fa4, $sp, 248 # 8-byte Folded Reload + fld.d $fa5, $sp, 232 # 8-byte Folded Reload fmul.d $fa4, $fa4, $fa5 - fld.d $fa5, $sp, 240 # 8-byte Folded Reload - fld.d $fa6, $sp, 224 # 8-byte Folded Reload + fld.d $fa5, $sp, 256 # 8-byte Folded Reload + fld.d $fa6, $sp, 240 # 8-byte Folded Reload fmadd.d $fa4, $fa5, $fa6, $fa4 - fld.d $fa5, $sp, 264 # 8-byte Folded Reload - fld.d $fa6, $sp, 248 # 8-byte Folded Reload + fld.d $fa5, $sp, 280 # 8-byte Folded Reload + fld.d $fa6, $sp, 264 # 8-byte Folded Reload fmul.d $fa5, $fa5, $fa6 - fld.d $fa6, $sp, 272 # 8-byte Folded Reload - fld.d $fa7, $sp, 256 # 8-byte Folded Reload + fld.d $fa6, $sp, 288 # 8-byte Folded Reload + fld.d $fa7, $sp, 272 # 8-byte Folded Reload fmadd.d $fa5, $fa6, $fa7, $fa5 - fld.d $fa6, $sp, 296 # 8-byte Folded Reload - fld.d $fa7, $sp, 280 # 8-byte Folded Reload + fld.d $fa6, $sp, 312 # 8-byte Folded Reload + fld.d $fa7, $sp, 296 # 8-byte Folded Reload fmul.d $fa6, $fa6, $fa7 - fld.d $fa7, $sp, 304 # 8-byte Folded Reload - fld.d $ft0, $sp, 288 # 8-byte Folded Reload + fld.d $fa7, $sp, 320 # 8-byte Folded Reload + fld.d $ft0, $sp, 304 # 8-byte Folded Reload fmadd.d $fa6, $fa7, $ft0, $fa6 - fld.d $fa7, $sp, 328 # 8-byte Folded Reload - fld.d $ft0, $sp, 312 # 8-byte Folded Reload + fld.d $fa7, $sp, 344 # 8-byte Folded Reload + fld.d $ft0, $sp, 328 # 8-byte Folded Reload fmul.d $fa7, $fa7, $ft0 - fld.d $ft0, $sp, 336 # 8-byte Folded Reload - fld.d $ft1, $sp, 320 # 8-byte Folded Reload - fmadd.d $fa7, $ft0, $ft1, $fa7 fld.d $ft0, $sp, 352 # 8-byte Folded Reload - fld.d $ft1, $sp, 344 # 8-byte Folded Reload + fld.d $ft1, $sp, 336 # 8-byte Folded Reload + fmadd.d $fa7, $ft0, $ft1, $fa7 + fld.d $ft0, $sp, 368 # 8-byte Folded Reload + fld.d $ft1, $sp, 360 # 8-byte Folded Reload fcmp.clt.d $fcc0, $ft1, $ft0 - fld.d $ft3, $sp, 440 # 8-byte Folded Reload + fld.d $ft3, $sp, 456 # 8-byte Folded Reload fabs.d $ft0, $ft3 - fld.d $ft1, $sp, 128 # 8-byte Folded Reload + fld.d $ft1, $sp, 144 # 8-byte Folded Reload fcmp.cult.d $fcc1, $ft0, $ft1 - fld.d $ft0, $sp, 40 # 8-byte Folded Reload - fld.d $ft1, $sp, 16 # 8-byte Folded Reload + fld.d $ft0, $sp, 48 # 8-byte Folded Reload + fld.d $ft1, $sp, 24 # 8-byte Folded Reload fmul.d $ft0, $ft0, $ft1 - fld.d $ft1, $sp, 64 # 8-byte Folded Reload - fld.d $ft2, $sp, 32 # 8-byte Folded Reload + fld.d $ft1, $sp, 72 # 8-byte Folded Reload + fld.d $ft2, $sp, 40 # 8-byte Folded Reload fmadd.d $ft0, $ft1, $ft2, $ft0 - fld.d $ft1, $sp, 392 # 8-byte Folded Reload - fld.d $ft2, $sp, 384 # 8-byte Folded Reload - fld.d $ft4, $sp, 112 # 8-byte Folded Reload - fmadd.d $ft1, $ft2, $ft4, $ft1 + fld.d $ft1, $sp, 408 # 8-byte Folded Reload fld.d $ft2, $sp, 400 # 8-byte Folded Reload + fld.d $ft4, $sp, 136 # 8-byte Folded Reload + fmadd.d $ft1, $ft2, $ft4, $ft1 + fld.d $ft2, $sp, 416 # 8-byte Folded Reload fmadd.d $ft1, $ft1, $ft4, $ft2 fmadd.d $fa7, $fa7, $fs7, $ft1 fmadd.d $fa6, $fa6, $fs7, $fa7 @@ -776,50 +769,50 @@ planetpv: # @planetpv fmadd.d $fa0, $fa0, $fs7, $fa1 movcf2gr $a0, $fcc0 slli.d $a0, $a0, 3 - fldx.d $fa1, $s1, $a0 + fldx.d $fa1, $s2, $a0 fmul.d $fa2, $ft4, $ft0 fmadd.d $fs7, $fa2, $fs7, $fa0 - fld.d $fa0, $sp, 368 # 8-byte Folded Reload - fld.d $fa2, $sp, 360 # 8-byte Folded Reload + fld.d $fa0, $sp, 384 # 8-byte Folded Reload + fld.d $fa2, $sp, 376 # 8-byte Folded Reload fmadd.d $fa0, $fa2, $ft4, $fa0 fsub.d $fs4, $ft3, $fa1 fmul.d $fa0, $ft4, $fa0 - fld.d $fa1, $sp, 408 # 8-byte Folded Reload - fld.d $fa2, $sp, 376 # 8-byte Folded Reload - fmadd.d $fs6, $fa2, $fa1, $fa0 + fld.d $fa1, $sp, 424 # 8-byte Folded Reload + fld.d $fa2, $sp, 392 # 8-byte Folded Reload + fmadd.d $fs5, $fa2, $fa1, $fa0 vldi $vr1, -928 vldi $vr0, -912 - fld.d $fa3, $sp, 448 # 8-byte Folded Reload + fld.d $fa3, $sp, 464 # 8-byte Folded Reload fadd.d $fa2, $fa3, $fa0 fsub.d $fa0, $fa0, $fa3 fdiv.d $fa0, $fa2, $fa0 - fsqrt.d $fs2, $fa0 - fcmp.cor.d $fcc0, $fs2, $fs2 - fmul.d $fs1, $fs3, $fa1 + fsqrt.d $fs3, $fa0 + fcmp.cor.d $fcc0, $fs3, $fs3 + fmul.d $fs1, $fs2, $fa1 bceqz $fcc0, .LBB1_14 .LBB1_11: # %.split fsel $fs0, $fs4, $ft3, $fcc1 - fld.d $fa0, $sp, 416 # 8-byte Folded Reload - fmul.d $fs6, $fs6, $fa0 + fld.d $fa0, $sp, 432 # 8-byte Folded Reload + fmul.d $fs5, $fs5, $fa0 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fmul.d $fs2, $fs2, $fa0 + fmul.d $fs3, $fs3, $fa0 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 fmov.d $fa1, $fa0 - fmov.d $fa0, $fs2 + fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(atan2) jirl $ra, $ra, 0 fadd.d $fs4, $fa0, $fa0 - fmov.d $fa0, $fs3 + fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 vldi $vr1, -912 - fld.d $fs5, $sp, 136 # 8-byte Folded Reload - fmadd.d $fs1, $fs5, $fa0, $fa1 - ld.d $a0, $sp, 424 # 8-byte Folded Reload + fld.d $fs6, $sp, 176 # 8-byte Folded Reload + fmadd.d $fs1, $fs6, $fa0, $fa1 + ld.d $a0, $sp, 440 # 8-byte Folded Reload slli.d $a0, $a0, 3 pcalau12i $a1, %pc_hi20(amas) addi.d $a1, $a1, %pc_lo12(amas) @@ -833,14 +826,17 @@ planetpv: # @planetpv fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB1_15 .LBB1_12: # %.split.split - pcalau12i $a0, %pc_hi20(.LCPI1_10) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_10) - fmul.d $fa2, $fs7, $fs1 - fst.d $fa2, $sp, 424 # 8-byte Folded Spill + fmul.d $fa1, $fs7, $fs1 + fst.d $fa1, $sp, 440 # 8-byte Folded Spill + lu12i.w $a0, 334443 + ori $a0, $a0, 1690 + lu32i.d $a0, 105837 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 - fst.d $fa0, $sp, 440 # 8-byte Folded Spill + fst.d $fa0, $sp, 456 # 8-byte Folded Spill vldi $vr0, -928 - fmul.d $fs1, $fs6, $fa0 + fmul.d $fs1, $fs5, $fa0 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 @@ -848,13 +844,13 @@ planetpv: # @planetpv fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fst.d $fs7, $sp, 416 # 8-byte Folded Spill + fst.d $fs7, $sp, 432 # 8-byte Folded Spill fmul.d $fs7, $fs2, $fa0 fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 - fmul.d $fs6, $fs2, $fa0 - fld.d $fs3, $sp, 176 # 8-byte Folded Reload + fmul.d $fs5, $fs2, $fa0 + fld.d $fs3, $sp, 200 # 8-byte Folded Reload fadd.d $fs2, $fs3, $fs4 fmov.d $fa0, $fs2 pcaddu18i $ra, %call36(sin) @@ -866,114 +862,120 @@ planetpv: # @planetpv fmov.d $fs2, $fa0 fneg.d $fa0, $fs0 fmul.d $fa0, $fs7, $fa0 - fmadd.d $fa2, $fs6, $fs2, $fa0 + fmadd.d $fa2, $fs5, $fs2, $fa0 vldi $vr0, -912 - fld.d $fs4, $sp, 448 # 8-byte Folded Reload - fmadd.d $fa1, $fs5, $fs4, $fa0 + fld.d $fs4, $sp, 464 # 8-byte Folded Reload + fmadd.d $fa1, $fs6, $fs4, $fa0 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 fadd.d $fa2, $fa2, $fa2 - fst.d $fa2, $sp, 408 # 8-byte Folded Spill + fst.d $fa2, $sp, 424 # 8-byte Folded Spill bceqz $fcc0, .LBB1_16 .LBB1_13: # %.split.split.split - fld.d $fa1, $sp, 416 # 8-byte Folded Reload - fdiv.d $fs5, $fa1, $fa0 + fld.d $fa1, $sp, 432 # 8-byte Folded Reload + fdiv.d $fs6, $fa1, $fa0 fmov.d $fa0, $fs1 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 - fst.d $fa0, $sp, 416 # 8-byte Folded Spill + fst.d $fa0, $sp, 432 # 8-byte Folded Spill fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(sin) jirl $ra, $ra, 0 fmadd.d $fa0, $fs4, $fa0, $fs0 - fmul.d $fs1, $fs5, $fa0 + fmul.d $fs1, $fs6, $fa0 fmov.d $fa0, $fs3 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 fmadd.d $fa0, $fs4, $fa0, $fs2 - fmul.d $fa0, $fs5, $fa0 - fadd.d $fa1, $fs6, $fs6 - fld.d $fa4, $sp, 408 # 8-byte Folded Reload - fneg.d $fa2, $fa4 - fmadd.d $fa3, $fa2, $fs6, $fs2 - fld.d $fa7, $sp, 424 # 8-byte Folded Reload - fmul.d $fa3, $fa7, $fa3 - fmadd.d $fa4, $fa4, $fs7, $fs0 - fmul.d $fa4, $fa7, $fa4 - fld.d $ft1, $sp, 416 # 8-byte Folded Reload - fmul.d $fa2, $ft1, $fa2 - pcalau12i $a0, %pc_hi20(.LCPI1_11) - fld.d $fa5, $a0, %pc_lo12(.LCPI1_11) - pcalau12i $a0, %pc_hi20(.LCPI1_12) - fld.d $fa6, $a0, %pc_lo12(.LCPI1_12) - fmul.d $fa2, $fa7, $fa2 - ld.d $a1, $sp, 432 # 8-byte Folded Reload - fst.d $fa3, $a1, 0 - fmul.d $fa3, $fa2, $fa5 - fmadd.d $fa3, $fa4, $fa6, $fa3 - pcalau12i $a0, %pc_hi20(.LCPI1_13) - fld.d $fa7, $a0, %pc_lo12(.LCPI1_13) - fst.d $fa3, $a1, 8 - fmul.d $fa3, $fs7, $fa1 - fmul.d $fa2, $fa2, $fa6 - fmadd.d $fa2, $fa4, $fa7, $fa2 - fst.d $fa2, $a1, 16 - vldi $vr2, -784 - fmadd.d $fa1, $fa1, $fs6, $fa2 - fmul.d $fa2, $fa3, $fa0 - fmadd.d $fa1, $fa1, $fs1, $fa2 - fld.d $ft0, $sp, 440 # 8-byte Folded Reload + fmul.d $fa0, $fs6, $fa0 + fadd.d $fa1, $fs5, $fs5 + fmul.d $fa2, $fs7, $fa1 + fld.d $fa5, $sp, 424 # 8-byte Folded Reload + fneg.d $fa3, $fa5 + fmadd.d $fa4, $fa3, $fs5, $fs2 + fld.d $fa6, $sp, 440 # 8-byte Folded Reload + fmul.d $fa4, $fa6, $fa4 + fmadd.d $fa5, $fa5, $fs7, $fs0 + fmul.d $fa5, $fa6, $fa5 + fld.d $ft1, $sp, 432 # 8-byte Folded Reload + fmul.d $fa3, $ft1, $fa3 + fmul.d $fa3, $fa6, $fa3 + ld.d $a2, $sp, 448 # 8-byte Folded Reload + fst.d $fa4, $a2, 0 + lu12i.w $a0, 331595 + ori $a0, $a0, 921 + lu32i.d $a0, -428754 + lu52i.d $a1, $a0, -1027 + movgr2fr.d $fa4, $a1 + fmul.d $fa6, $fa3, $fa4 + lu12i.w $a1, 358017 + ori $a1, $a1, 3827 + lu32i.d $a1, -173053 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa7, $a1 + fmadd.d $fa6, $fa5, $fa7, $fa6 + fst.d $fa6, $a2, 8 + fmul.d $fa3, $fa3, $fa7 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa6, $a0 + fmadd.d $fa3, $fa5, $fa6, $fa3 + fst.d $fa3, $a2, 16 + vldi $vr3, -784 + fmadd.d $fa1, $fa1, $fs5, $fa3 + fmul.d $fa3, $fa2, $fa0 + fmadd.d $fa1, $fa1, $fs1, $fa3 + fld.d $ft0, $sp, 456 # 8-byte Folded Reload fmul.d $fa1, $ft0, $fa1 - vldi $vr2, -896 - fmul.d $fa2, $fs7, $fa2 - vldi $vr4, -912 - fmadd.d $fa2, $fa2, $fs7, $fa4 - fneg.d $fa4, $fs1 - fmul.d $fa3, $fa3, $fa4 - fmadd.d $fa2, $fa2, $fa0, $fa3 + vldi $vr3, -896 + fmul.d $fa3, $fs7, $fa3 + vldi $vr5, -912 + fmadd.d $fa3, $fa3, $fs7, $fa5 + fneg.d $fa5, $fs1 + fmul.d $fa2, $fa2, $fa5 + fmadd.d $fa2, $fa3, $fa0, $fa2 fmul.d $fa2, $ft0, $fa2 fadd.d $fa3, $ft1, $ft1 fmul.d $fa0, $fs7, $fa0 - fmadd.d $fa0, $fs6, $fs1, $fa0 + fmadd.d $fa0, $fs5, $fs1, $fa0 fmul.d $fa0, $fa3, $fa0 fmul.d $fa0, $ft0, $fa0 - fst.d $fa1, $a1, 24 - fmul.d $fa1, $fa0, $fa5 - fmadd.d $fa1, $fa2, $fa6, $fa1 - fst.d $fa1, $a1, 32 - fmul.d $fa0, $fa0, $fa6 - fmadd.d $fa0, $fa2, $fa7, $fa0 - fst.d $fa0, $a1, 40 - fld.d $fs7, $sp, 456 # 8-byte Folded Reload - fld.d $fs6, $sp, 464 # 8-byte Folded Reload - fld.d $fs5, $sp, 472 # 8-byte Folded Reload - fld.d $fs4, $sp, 480 # 8-byte Folded Reload - fld.d $fs3, $sp, 488 # 8-byte Folded Reload - fld.d $fs2, $sp, 496 # 8-byte Folded Reload - fld.d $fs1, $sp, 504 # 8-byte Folded Reload - fld.d $fs0, $sp, 512 # 8-byte Folded Reload - ld.d $s8, $sp, 520 # 8-byte Folded Reload - ld.d $s7, $sp, 528 # 8-byte Folded Reload - ld.d $s6, $sp, 536 # 8-byte Folded Reload - ld.d $s5, $sp, 544 # 8-byte Folded Reload - ld.d $s4, $sp, 552 # 8-byte Folded Reload - ld.d $s3, $sp, 560 # 8-byte Folded Reload - ld.d $s2, $sp, 568 # 8-byte Folded Reload - ld.d $s1, $sp, 576 # 8-byte Folded Reload - ld.d $s0, $sp, 584 # 8-byte Folded Reload - ld.d $fp, $sp, 592 # 8-byte Folded Reload - ld.d $ra, $sp, 600 # 8-byte Folded Reload - addi.d $sp, $sp, 608 + fst.d $fa1, $a2, 24 + fmul.d $fa1, $fa0, $fa4 + fmadd.d $fa1, $fa2, $fa7, $fa1 + fst.d $fa1, $a2, 32 + fmul.d $fa0, $fa0, $fa7 + fmadd.d $fa0, $fa2, $fa6, $fa0 + fst.d $fa0, $a2, 40 + fld.d $fs7, $sp, 472 # 8-byte Folded Reload + fld.d $fs6, $sp, 480 # 8-byte Folded Reload + fld.d $fs5, $sp, 488 # 8-byte Folded Reload + fld.d $fs4, $sp, 496 # 8-byte Folded Reload + fld.d $fs3, $sp, 504 # 8-byte Folded Reload + fld.d $fs2, $sp, 512 # 8-byte Folded Reload + fld.d $fs1, $sp, 520 # 8-byte Folded Reload + fld.d $fs0, $sp, 528 # 8-byte Folded Reload + ld.d $s8, $sp, 536 # 8-byte Folded Reload + ld.d $s7, $sp, 544 # 8-byte Folded Reload + ld.d $s6, $sp, 552 # 8-byte Folded Reload + ld.d $s5, $sp, 560 # 8-byte Folded Reload + ld.d $s4, $sp, 568 # 8-byte Folded Reload + ld.d $s3, $sp, 576 # 8-byte Folded Reload + ld.d $s2, $sp, 584 # 8-byte Folded Reload + ld.d $s1, $sp, 592 # 8-byte Folded Reload + ld.d $s0, $sp, 600 # 8-byte Folded Reload + ld.d $fp, $sp, 608 # 8-byte Folded Reload + ld.d $ra, $sp, 616 # 8-byte Folded Reload + addi.d $sp, $sp, 624 ret .LBB1_14: # %call.sqrt movcf2gr $a0, $fcc1 - st.d $a0, $sp, 408 + st.d $a0, $sp, 424 pcaddu18i $ra, %call36(sqrt) jirl $ra, $ra, 0 - ld.d $a0, $sp, 408 + ld.d $a0, $sp, 424 movgr2cf $fcc1, $a0 - fld.d $ft3, $sp, 440 # 8-byte Folded Reload - fmov.d $fs2, $fa0 + fld.d $ft3, $sp, 456 # 8-byte Folded Reload + fmov.d $fs3, $fa0 b .LBB1_11 .LBB1_15: # %call.sqrt191 fmov.d $fa0, $fa1 @@ -988,14 +990,7 @@ planetpv: # @planetpv .Lfunc_end1: .size planetpv, .Lfunc_end1-planetpv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function radecdist -.LCPI2_0: - .dword 0x400e8ec8a4aeacc4 # double 3.8197186342054881 -.LCPI2_1: - .dword 0x404ca5dc1a63c1f8 # double 57.295779513082323 - .text - .globl radecdist + .globl radecdist # -- Begin function radecdist .p2align 5 .type radecdist,@function radecdist: # @radecdist @@ -1019,8 +1014,11 @@ radecdist: # @radecdist fld.d $fa1, $a0, 0 pcaddu18i $ra, %call36(atan2) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_0) + lu12i.w $a0, -374038 + ori $a0, $a0, 3268 + lu32i.d $a0, -94520 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 vldi $vr1, -968 fadd.d $fa1, $fa0, $fa1 @@ -1032,8 +1030,11 @@ radecdist: # @radecdist fdiv.d $fa0, $fa0, $fs0 pcaddu18i $ra, %call36(asin) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_1) + lu12i.w $a0, 108092 + ori $a0, $a0, 504 + lu32i.d $a0, -219684 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 fst.d $fa0, $s0, 8 fld.d $fs0, $sp, 0 # 8-byte Folded Reload @@ -1045,16 +1046,7 @@ radecdist: # @radecdist .Lfunc_end2: .size radecdist, .Lfunc_end2-radecdist # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI3_0: - .dword 0x4142b42c80000000 # double 2451545 -.LCPI3_1: - .dword 0x404ca5dc1a63c1f8 # double 57.295779513082323 -.LCPI3_2: - .dword 0x400e8ec8a4aeacc4 # double 3.8197186342054881 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -1074,11 +1066,13 @@ main: # @main fst.d $fs6, $sp, 272 # 8-byte Folded Spill fst.d $fs7, $sp, 264 # 8-byte Folded Spill move $fp, $zero - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI3_0) - fst.d $fa0, $sp, 56 # 8-byte Folded Spill lu12i.w $a0, 8 ori $s0, $a0, 3757 + lu12i.w $a0, -524288 + lu32i.d $a0, 177196 + lu52i.d $a0, $a0, 1044 + movgr2fr.d $fa0, $a0 + fst.d $fa0, $sp, 56 # 8-byte Folded Spill vldi $vr1, -912 ori $s1, $zero, 20 .p2align 4, , 16 @@ -1167,12 +1161,12 @@ main: # @main fsqrt.d $fs1, $fa2 pcaddu18i $ra, %call36(atan2) jirl $ra, $ra, 0 - fst.d $fa0, $sp, 112 # 8-byte Folded Spill - fst.d $fs1, $sp, 64 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill + fst.d $fs1, $sp, 112 # 8-byte Folded Spill fdiv.d $fa0, $fs0, $fs1 pcaddu18i $ra, %call36(asin) jirl $ra, $ra, 0 - fst.d $fa0, $sp, 104 # 8-byte Folded Spill + fst.d $fa0, $sp, 96 # 8-byte Folded Spill addi.d $a0, $sp, 248 ori $a1, $zero, 4 addi.d $a2, $sp, 200 @@ -1187,8 +1181,8 @@ main: # @main fsqrt.d $fs1, $fa2 pcaddu18i $ra, %call36(atan2) jirl $ra, $ra, 0 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill - fst.d $fs1, $sp, 88 # 8-byte Folded Spill + fst.d $fa0, $sp, 88 # 8-byte Folded Spill + fst.d $fs1, $sp, 64 # 8-byte Folded Spill fdiv.d $fa0, $fs0, $fs1 pcaddu18i $ra, %call36(asin) jirl $ra, $ra, 0 @@ -1204,11 +1198,11 @@ main: # @main fmul.d $fa2, $fa0, $fa0 fmadd.d $fa2, $fa1, $fa1, $fa2 fmadd.d $fa2, $fs0, $fs0, $fa2 - fsqrt.d $fs2, $fa2 + fsqrt.d $fs7, $fa2 pcaddu18i $ra, %call36(atan2) jirl $ra, $ra, 0 fst.d $fa0, $sp, 72 # 8-byte Folded Spill - fdiv.d $fa0, $fs0, $fs2 + fdiv.d $fa0, $fs0, $fs7 pcaddu18i $ra, %call36(asin) jirl $ra, $ra, 0 fmov.d $fs3, $fa0 @@ -1223,11 +1217,11 @@ main: # @main fmul.d $fa2, $fa0, $fa0 fmadd.d $fa2, $fa1, $fa1, $fa2 fmadd.d $fa2, $fs1, $fs1, $fa2 - fsqrt.d $fs7, $fa2 + fsqrt.d $fs2, $fa2 pcaddu18i $ra, %call36(atan2) jirl $ra, $ra, 0 fmov.d $fs4, $fa0 - fdiv.d $fa0, $fs1, $fs7 + fdiv.d $fa0, $fs1, $fs2 pcaddu18i $ra, %call36(asin) jirl $ra, $ra, 0 fmov.d $fs5, $fa0 @@ -1254,18 +1248,18 @@ main: # @main addi.w $s2, $s2, -1 bnez $s2, .LBB3_2 # %bb.3: # in Loop: Header=BB3_1 Depth=1 - fmov.d $ft10, $fs3 - fld.d $ft9, $sp, 72 # 8-byte Folded Reload + fmov.d $ft9, $fs3 + fld.d $fs3, $sp, 72 # 8-byte Folded Reload fld.d $ft8, $sp, 80 # 8-byte Folded Reload - fld.d $ft7, $sp, 96 # 8-byte Folded Reload - fld.d $ft6, $sp, 104 # 8-byte Folded Reload - fld.d $ft5, $sp, 112 # 8-byte Folded Reload - fst.d $fs7, $sp, 48 # 8-byte Folded Spill - fst.d $fs2, $sp, 192 # 8-byte Folded Spill + fld.d $ft7, $sp, 88 # 8-byte Folded Reload + fld.d $ft6, $sp, 96 # 8-byte Folded Reload + fld.d $ft5, $sp, 104 # 8-byte Folded Reload + fst.d $fs2, $sp, 48 # 8-byte Folded Spill + fst.d $fs7, $sp, 192 # 8-byte Folded Spill fld.d $ft4, $sp, 120 # 8-byte Folded Reload fld.d $ft3, $sp, 128 # 8-byte Folded Reload - fld.d $fs3, $sp, 88 # 8-byte Folded Reload fld.d $ft2, $sp, 144 # 8-byte Folded Reload + fld.d $fs2, $sp, 112 # 8-byte Folded Reload fld.d $ft1, $sp, 152 # 8-byte Folded Reload fld.d $fs7, $sp, 136 # 8-byte Folded Reload fld.d $ft0, $sp, 168 # 8-byte Folded Reload @@ -1275,64 +1269,70 @@ main: # @main addi.w $fp, $fp, 1 bne $fp, $s1, .LBB3_1 # %bb.4: # %.preheader.preheader - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_1) - pcalau12i $a0, %pc_hi20(.LCPI3_2) - fld.d $fa2, $a0, %pc_lo12(.LCPI3_2) + lu12i.w $a0, 108092 + ori $a0, $a0, 504 + lu32i.d $a0, -219684 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 fst.d $fa0, $sp, 56 # 8-byte Folded Spill - fmul.d $fa0, $fs6, $fa2 + lu12i.w $a0, -374038 + ori $a0, $a0, 3268 + lu32i.d $a0, -94520 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa0, $a0 + fmul.d $fa2, $fs6, $fa0 vldi $vr3, -968 - fadd.d $fa4, $fa0, $fa3 + fadd.d $fa4, $fa2, $fa3 movgr2fr.d $fa5, $zero - fcmp.clt.d $fcc0, $fa0, $fa5 - fsel $fa0, $fa0, $fa4, $fcc0 - fst.d $fa0, $sp, 40 # 8-byte Folded Spill - fmul.d $fa0, $fs5, $fa1 - fst.d $fa0, $sp, 32 # 8-byte Folded Spill - fmul.d $fa0, $fs4, $fa2 - fadd.d $fa4, $fa0, $fa3 - fcmp.clt.d $fcc0, $fa0, $fa5 - fsel $fa0, $fa0, $fa4, $fcc0 - fst.d $fa0, $sp, 24 # 8-byte Folded Spill - fmul.d $fa0, $ft10, $fa1 - fst.d $fa0, $sp, 16 # 8-byte Folded Spill - fmul.d $fa0, $ft9, $fa2 - fadd.d $fa4, $fa0, $fa3 - fcmp.clt.d $fcc0, $fa0, $fa5 - fsel $fa0, $fa0, $fa4, $fcc0 - fst.d $fa0, $sp, 72 # 8-byte Folded Spill - fmul.d $fa0, $ft8, $fa1 - fst.d $fa0, $sp, 80 # 8-byte Folded Spill - fmul.d $fa0, $ft7, $fa2 - fadd.d $fa4, $fa0, $fa3 - fcmp.clt.d $fcc0, $fa0, $fa5 - fsel $fa0, $fa0, $fa4, $fcc0 - fst.d $fa0, $sp, 96 # 8-byte Folded Spill - fmul.d $fa0, $ft6, $fa1 - fst.d $fa0, $sp, 104 # 8-byte Folded Spill - fmul.d $fa0, $ft5, $fa2 - fadd.d $fa4, $fa0, $fa3 - fcmp.clt.d $fcc0, $fa0, $fa5 - fsel $fa0, $fa0, $fa4, $fcc0 - fst.d $fa0, $sp, 112 # 8-byte Folded Spill + fcmp.clt.d $fcc0, $fa2, $fa5 + fsel $fa2, $fa2, $fa4, $fcc0 + fst.d $fa2, $sp, 40 # 8-byte Folded Spill + fmul.d $fa2, $fs5, $fa1 + fst.d $fa2, $sp, 32 # 8-byte Folded Spill + fmul.d $fa2, $fs4, $fa0 + fadd.d $fa4, $fa2, $fa3 + fcmp.clt.d $fcc0, $fa2, $fa5 + fsel $fa2, $fa2, $fa4, $fcc0 + fst.d $fa2, $sp, 24 # 8-byte Folded Spill + fmul.d $fa2, $ft9, $fa1 + fst.d $fa2, $sp, 16 # 8-byte Folded Spill + fmul.d $fa2, $fs3, $fa0 + fadd.d $fa4, $fa2, $fa3 + fcmp.clt.d $fcc0, $fa2, $fa5 + fsel $fa2, $fa2, $fa4, $fcc0 + fst.d $fa2, $sp, 72 # 8-byte Folded Spill + fmul.d $fa2, $ft8, $fa1 + fst.d $fa2, $sp, 80 # 8-byte Folded Spill + fmul.d $fa2, $ft7, $fa0 + fadd.d $fa4, $fa2, $fa3 + fcmp.clt.d $fcc0, $fa2, $fa5 + fsel $fa2, $fa2, $fa4, $fcc0 + fst.d $fa2, $sp, 88 # 8-byte Folded Spill + fmul.d $fa2, $ft6, $fa1 + fst.d $fa2, $sp, 96 # 8-byte Folded Spill + fmul.d $fa2, $ft5, $fa0 + fadd.d $fa4, $fa2, $fa3 + fcmp.clt.d $fcc0, $fa2, $fa5 + fsel $fa2, $fa2, $fa4, $fcc0 + fst.d $fa2, $sp, 104 # 8-byte Folded Spill fmul.d $fs4, $ft4, $fa1 - fmul.d $fa0, $ft3, $fa2 - fadd.d $fa4, $fa0, $fa3 - fcmp.clt.d $fcc0, $fa0, $fa5 - fsel $fs2, $fa0, $fa4, $fcc0 + fmul.d $fa2, $ft3, $fa0 + fadd.d $fa4, $fa2, $fa3 + fcmp.clt.d $fcc0, $fa2, $fa5 + fsel $fs3, $fa2, $fa4, $fcc0 fmul.d $fs5, $ft2, $fa1 - fmul.d $fa0, $ft1, $fa2 - fadd.d $fa4, $fa0, $fa3 + fmul.d $fa2, $ft1, $fa0 + fadd.d $fa4, $fa2, $fa3 + fcmp.clt.d $fcc0, $fa2, $fa5 + fsel $fs6, $fa2, $fa4, $fcc0 + fmul.d $fa1, $ft0, $fa1 + fmul.d $fa0, $fa7, $fa0 + fadd.d $fa2, $fa0, $fa3 fcmp.clt.d $fcc0, $fa0, $fa5 - fsel $fs6, $fa0, $fa4, $fcc0 - fmul.d $fa0, $ft0, $fa1 - fmul.d $fa1, $fa7, $fa2 - fadd.d $fa2, $fa1, $fa3 - fcmp.clt.d $fcc0, $fa1, $fa5 - fsel $fa1, $fa1, $fa2, $fcc0 - movfr2gr.d $a1, $fa1 - movfr2gr.d $a2, $fa0 + fsel $fa0, $fa0, $fa2, $fcc0 + movfr2gr.d $a1, $fa0 + movfr2gr.d $a2, $fa1 movfr2gr.d $a3, $fa6 pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $fp, $a0, %pc_lo12(.L.str.1) @@ -1345,26 +1345,26 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - movfr2gr.d $a1, $fs2 + movfr2gr.d $a1, $fs3 movfr2gr.d $a2, $fs4 movfr2gr.d $a3, $fs7 move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - fld.d $fa0, $sp, 112 # 8-byte Folded Reload - movfr2gr.d $a1, $fa0 fld.d $fa0, $sp, 104 # 8-byte Folded Reload + movfr2gr.d $a1, $fa0 + fld.d $fa0, $sp, 96 # 8-byte Folded Reload movfr2gr.d $a2, $fa0 - fld.d $fa0, $sp, 64 # 8-byte Folded Reload - movfr2gr.d $a3, $fa0 + movfr2gr.d $a3, $fs2 move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - fld.d $fa0, $sp, 96 # 8-byte Folded Reload + fld.d $fa0, $sp, 88 # 8-byte Folded Reload movfr2gr.d $a1, $fa0 fld.d $fa0, $sp, 80 # 8-byte Folded Reload movfr2gr.d $a2, $fa0 - movfr2gr.d $a3, $fs3 + fld.d $fa0, $sp, 64 # 8-byte Folded Reload + movfr2gr.d $a3, $fa0 move $a0, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 diff --git a/results/SingleSource/Benchmarks/CoyoteBench/CMakeFiles/fftbench.dir/fftbench.s b/results/SingleSource/Benchmarks/CoyoteBench/CMakeFiles/fftbench.dir/fftbench.s index 16fbb020..baeb4990 100644 --- a/results/SingleSource/Benchmarks/CoyoteBench/CMakeFiles/fftbench.dir/fftbench.s +++ b/results/SingleSource/Benchmarks/CoyoteBench/CMakeFiles/fftbench.dir/fftbench.s @@ -3,12 +3,8 @@ .globl _ZSt21ios_base_library_initv # End of file scope inline assembly - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3e340000002813d9 # double 4.6566128752499998E-9 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -89,11 +85,14 @@ main: # @main lu12i.w $a4, 4 ori $a4, $a4, 423 lu12i.w $a5, -1 - pcalau12i $a6, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a6, %pc_lo12(.LCPI0_0) ori $a5, $a5, 1260 lu12i.w $a6, 524287 ori $a6, $a6, 4095 + lu12i.w $t0, 641 + ori $t0, $t0, 985 + lu32i.d $t0, 262144 + lu52i.d $t0, $t0, 995 + movgr2fr.d $fa0, $t0 .p2align 4, , 16 .LBB0_6: # %_ZN10polynomialIdEC2Em.exit28 # =>This Inner Loop Header: Depth=1 @@ -1671,14 +1670,8 @@ GCC_except_table4: .Lcst_end2: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN10polynomialIdE3fftERKS0_ -.LCPI5_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI5_1: - .dword 0x401921fb54442d18 # double 6.2831853071795862 .section .text._ZN10polynomialIdE3fftERKS0_,"axG",@progbits,_ZN10polynomialIdE3fftERKS0_,comdat - .weak _ZN10polynomialIdE3fftERKS0_ + .weak _ZN10polynomialIdE3fftERKS0_ # -- Begin function _ZN10polynomialIdE3fftERKS0_ .p2align 5 .type _ZN10polynomialIdE3fftERKS0_,@function _ZN10polynomialIdE3fftERKS0_: # @_ZN10polynomialIdE3fftERKS0_ @@ -1799,10 +1792,14 @@ _ZN10polynomialIdE3fftERKS0_: # @_ZN10polynomialIdE3fftERKS0_ move $a1, $zero ori $s1, $zero, 1 ori $s4, $zero, 2 - pcalau12i $a0, %pc_hi20(.LCPI5_0) - fld.d $fs5, $a0, %pc_lo12(.LCPI5_0) - pcalau12i $a0, %pc_hi20(.LCPI5_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI5_1) + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs5, $a0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fs0, $a0 movgr2fr.d $fs1, $zero st.d $s0, $sp, 40 # 8-byte Folded Spill b .LBB5_9 @@ -1967,39 +1964,33 @@ _ZN10polynomialIdE3fftERKS0_: # @_ZN10polynomialIdE3fftERKS0_ .size _ZN10polynomialIdE3fftERKS0_, .Lfunc_end5-_ZN10polynomialIdE3fftERKS0_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE -.LCPI6_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI6_1: - .dword 0xc01921fb54442d18 # double -6.2831853071795862 .section .text._ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE,"axG",@progbits,_ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE,comdat - .weak _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE + .weak _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE # -- Begin function _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE .p2align 5 .type _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE,@function _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE: # @_ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE .cfi_startproc # %bb.0: - addi.d $sp, $sp, -224 - .cfi_def_cfa_offset 224 - st.d $ra, $sp, 216 # 8-byte Folded Spill - st.d $fp, $sp, 208 # 8-byte Folded Spill - st.d $s0, $sp, 200 # 8-byte Folded Spill - st.d $s1, $sp, 192 # 8-byte Folded Spill - st.d $s2, $sp, 184 # 8-byte Folded Spill - st.d $s3, $sp, 176 # 8-byte Folded Spill - st.d $s4, $sp, 168 # 8-byte Folded Spill - st.d $s5, $sp, 160 # 8-byte Folded Spill - st.d $s6, $sp, 152 # 8-byte Folded Spill - st.d $s7, $sp, 144 # 8-byte Folded Spill - st.d $s8, $sp, 136 # 8-byte Folded Spill - fst.d $fs0, $sp, 128 # 8-byte Folded Spill - fst.d $fs1, $sp, 120 # 8-byte Folded Spill - fst.d $fs2, $sp, 112 # 8-byte Folded Spill - fst.d $fs3, $sp, 104 # 8-byte Folded Spill - fst.d $fs4, $sp, 96 # 8-byte Folded Spill - fst.d $fs5, $sp, 88 # 8-byte Folded Spill - fst.d $fs6, $sp, 80 # 8-byte Folded Spill + addi.d $sp, $sp, -208 + .cfi_def_cfa_offset 208 + st.d $ra, $sp, 200 # 8-byte Folded Spill + st.d $fp, $sp, 192 # 8-byte Folded Spill + st.d $s0, $sp, 184 # 8-byte Folded Spill + st.d $s1, $sp, 176 # 8-byte Folded Spill + st.d $s2, $sp, 168 # 8-byte Folded Spill + st.d $s3, $sp, 160 # 8-byte Folded Spill + st.d $s4, $sp, 152 # 8-byte Folded Spill + st.d $s5, $sp, 144 # 8-byte Folded Spill + st.d $s6, $sp, 136 # 8-byte Folded Spill + st.d $s7, $sp, 128 # 8-byte Folded Spill + st.d $s8, $sp, 120 # 8-byte Folded Spill + fst.d $fs0, $sp, 112 # 8-byte Folded Spill + fst.d $fs1, $sp, 104 # 8-byte Folded Spill + fst.d $fs2, $sp, 96 # 8-byte Folded Spill + fst.d $fs3, $sp, 88 # 8-byte Folded Spill + fst.d $fs4, $sp, 80 # 8-byte Folded Spill + fst.d $fs5, $sp, 72 # 8-byte Folded Spill + fst.d $fs6, $sp, 64 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -2018,18 +2009,18 @@ _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE: # @_ZN10polynomialIdE11inve .cfi_offset 60, -128 .cfi_offset 61, -136 .cfi_offset 62, -144 - move $fp, $a1 + move $s0, $a1 ld.d $a1, $a1, 16 move $s2, $a0 move $a0, $a1 pcaddu18i $ra, %call36(_ZN10polynomialIdE4log2Em) jirl $ra, $ra, 0 - ld.d $a1, $fp, 16 - st.d $a0, $sp, 56 # 8-byte Folded Spill + ld.d $a1, $s0, 16 + st.d $a0, $sp, 40 # 8-byte Folded Spill move $a0, $a1 pcaddu18i $ra, %call36(_ZN10polynomialIdE4log2Em) jirl $ra, $ra, 0 - ld.d $s5, $fp, 16 + ld.d $s5, $s0, 16 move $s3, $a0 pcalau12i $a0, %pc_hi20(_ZTV10polynomialISt7complexIdEE+16) addi.d $a0, $a0, %pc_lo12(_ZTV10polynomialISt7complexIdEE+16) @@ -2044,17 +2035,17 @@ _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE: # @_ZN10polynomialIdE11inve or $a0, $a0, $a1 pcaddu18i $ra, %call36(_Znam) jirl $ra, $ra, 0 - move $s0, $a0 + move $fp, $a0 beqz $s5, .LBB6_7 # %bb.1: # %.lr.ph.i - move $a0, $s0 + move $a0, $fp move $a1, $zero move $a2, $s4 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 move $a0, $zero - st.d $s0, $s2, 8 - ld.d $a1, $fp, 8 + st.d $fp, $s2, 8 + ld.d $a1, $s0, 8 addi.d $a3, $s3, -1 ori $a2, $zero, 1 sll.w $a3, $a2, $a3 @@ -2081,47 +2072,50 @@ _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE: # @_ZN10polynomialIdE11inve # in Loop: Header=BB6_2 Depth=1 slli.d $a4, $a4, 4 addi.d $a0, $a0, 1 - vstx $vr0, $s0, $a4 + vstx $vr0, $fp, $a4 bne $a0, $s5, .LBB6_2 # %bb.5: # %_ZN10polynomialIdE11bit_reverseERKS_ISt7complexIdEE.exit - pcalau12i $a2, %pc_hi20(.LCPI6_0) - ld.d $a0, $sp, 56 # 8-byte Folded Reload + lu12i.w $a3, 256 + ld.d $a0, $sp, 40 # 8-byte Folded Reload bnez $a0, .LBB6_8 .LBB6_6: # %.preheader bnez $s5, .LBB6_22 b .LBB6_24 .LBB6_7: # %_ZN10polynomialISt7complexIdEEC2Em.exit.thread.i - st.d $s0, $s2, 8 - pcalau12i $a2, %pc_hi20(.LCPI6_0) - ld.d $a0, $sp, 56 # 8-byte Folded Reload + st.d $fp, $s2, 8 + lu12i.w $a3, 256 + ld.d $a0, $sp, 40 # 8-byte Folded Reload beqz $a0, .LBB6_6 .LBB6_8: # %.lr.ph77.preheader move $a1, $zero ori $s1, $zero, 1 ori $s6, $zero, 2 - st.d $a2, $sp, 40 # 8-byte Folded Spill - fld.d $fs6, $a2, %pc_lo12(.LCPI6_0) - pcalau12i $a0, %pc_hi20(.LCPI6_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI6_1) - movgr2fr.d $fs1, $zero - fneg.d $fs2, $fs1 - st.d $s0, $sp, 48 # 8-byte Folded Spill + lu52i.d $a0, $a3, 1107 + movgr2fr.d $fs6, $a0 + movgr2fr.d $fs0, $zero + fneg.d $fs1, $fs0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, -1023 + movgr2fr.d $fs2, $a0 + st.d $fp, $sp, 32 # 8-byte Folded Spill b .LBB6_10 .p2align 4, , 16 .LBB6_9: # in Loop: Header=BB6_10 Depth=1 slli.d $s6, $s6, 1 - ld.d $a1, $sp, 72 # 8-byte Folded Reload + ld.d $a1, $sp, 56 # 8-byte Folded Reload addi.d $a1, $a1, 1 - ld.d $s1, $sp, 64 # 8-byte Folded Reload + ld.d $s1, $sp, 48 # 8-byte Folded Reload slli.d $s1, $s1, 1 - ld.d $s0, $sp, 48 # 8-byte Folded Reload - ld.d $a0, $sp, 56 # 8-byte Folded Reload + ld.d $fp, $sp, 32 # 8-byte Folded Reload + ld.d $a0, $sp, 40 # 8-byte Folded Reload beq $a1, $a0, .LBB6_21 .LBB6_10: # %.lr.ph77 # =>This Loop Header: Depth=1 # Child Loop BB6_11 Depth 2 # Child Loop BB6_15 Depth 3 - st.d $a1, $sp, 72 # 8-byte Folded Spill + st.d $a1, $sp, 56 # 8-byte Folded Spill srli.d $a0, $s6, 32 lu52i.d $a1, $zero, 1107 or $a0, $a0, $a1 @@ -2132,27 +2126,27 @@ _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE: # @_ZN10polynomialIdE11inve bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa1, $a0 fadd.d $fa2, $fa1, $fa0 - fmov.d $fa0, $fs2 - fmov.d $fa1, $fs0 - fmov.d $fa3, $fs1 + fmov.d $fa0, $fs1 + fmov.d $fa1, $fs2 + fmov.d $fa3, $fs0 pcaddu18i $ra, %call36(__divdc3) jirl $ra, $ra, 0 pcaddu18i $ra, %call36(cexp) jirl $ra, $ra, 0 move $s4, $zero addi.d $s3, $s1, -1 - st.d $s1, $sp, 64 # 8-byte Folded Spill + st.d $s1, $sp, 48 # 8-byte Folded Spill slli.d $s2, $s1, 4 slli.d $s7, $s6, 4 vldi $vr8, -912 - move $s1, $s0 - fmov.d $fs3, $fs1 + move $s1, $fp + fmov.d $fs3, $fs0 .p2align 4, , 16 .LBB6_11: # %.preheader68 # Parent Loop BB6_10 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB6_15 Depth 3 - ld.d $a0, $fp, 16 + ld.d $a0, $s0, 16 addi.d $a1, $a0, -1 bgeu $a1, $s4, .LBB6_14 .LBB6_12: # %._crit_edge @@ -2176,15 +2170,15 @@ _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE: # @_ZN10polynomialIdE11inve .p2align 4, , 16 .LBB6_14: # %.lr.ph.preheader # in Loop: Header=BB6_11 Depth=2 - move $s0, $s1 + move $fp, $s1 move $s8, $s4 .p2align 4, , 16 .LBB6_15: # %.lr.ph # Parent Loop BB6_10 Depth=1 # Parent Loop BB6_11 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $s5, $s0, $s2 - fldx.d $fa2, $s0, $s2 + add.d $s5, $fp, $s2 + fldx.d $fa2, $fp, $s2 fld.d $fa3, $s5, 8 fmul.d $fa4, $ft0, $fa2 fmul.d $fa5, $fs3, $fa3 @@ -2195,19 +2189,19 @@ _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE: # @_ZN10polynomialIdE11inve fadd.d $fa5, $fa7, $fa6 bceqz $fcc0, .LBB6_17 .LBB6_16: # in Loop: Header=BB6_15 Depth=3 - fld.d $fa2, $s0, 0 - fld.d $fa3, $s0, 8 + fld.d $fa2, $fp, 0 + fld.d $fa3, $fp, 8 fadd.d $fa6, $fa4, $fa2 fadd.d $fa7, $fa5, $fa3 - fst.d $fa6, $s0, 0 - fst.d $fa7, $s0, 8 + fst.d $fa6, $fp, 0 + fst.d $fa7, $fp, 8 fsub.d $fa2, $fa2, $fa4 fsub.d $fa3, $fa3, $fa5 fst.d $fa2, $s5, 0 fst.d $fa3, $s5, 8 add.d $s8, $s8, $s6 addi.d $a1, $a0, -1 - add.d $s0, $s0, $s7 + add.d $fp, $fp, $s7 bgeu $a1, $s8, .LBB6_15 b .LBB6_12 .LBB6_17: # in Loop: Header=BB6_15 Depth=3 @@ -2222,7 +2216,7 @@ _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE: # @_ZN10polynomialIdE11inve pcaddu18i $ra, %call36(__muldc3) jirl $ra, $ra, 0 vld $vr8, $sp, 16 # 16-byte Folded Reload - ld.d $a0, $fp, 16 + ld.d $a0, $s0, 16 fmov.d $fa4, $fa0 fmov.d $fa0, $fs5 fmov.d $fa5, $fa1 @@ -2246,23 +2240,24 @@ _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE: # @_ZN10polynomialIdE11inve fmov.d $fa1, $fs4 b .LBB6_13 .LBB6_21: # %.preheader.loopexit - ld.d $s5, $fp, 16 - ld.d $a2, $sp, 40 # 8-byte Folded Reload + ld.d $s5, $s0, 16 + lu12i.w $a3, 256 beqz $s5, .LBB6_24 .LBB6_22: # %.lr.ph79 move $a0, $zero srli.d $a1, $s5, 32 - fld.d $fa0, $a2, %pc_lo12(.LCPI6_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu52i.d $a1, $a3, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 move $a1, $s5 lu12i.w $a2, 275200 bstrins.d $a1, $a2, 63, 32 movgr2fr.d $fa1, $a1 fadd.d $fa0, $fa1, $fa0 - addi.d $a1, $s0, 8 + addi.d $a1, $fp, 8 .p2align 4, , 16 .LBB6_23: # =>This Inner Loop Header: Depth=1 fld.d $fa1, $a1, -8 @@ -2275,25 +2270,25 @@ _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE: # @_ZN10polynomialIdE11inve addi.d $a1, $a1, 16 bltu $a0, $s5, .LBB6_23 .LBB6_24: # %._crit_edge80 - fld.d $fs6, $sp, 80 # 8-byte Folded Reload - fld.d $fs5, $sp, 88 # 8-byte Folded Reload - fld.d $fs4, $sp, 96 # 8-byte Folded Reload - fld.d $fs3, $sp, 104 # 8-byte Folded Reload - fld.d $fs2, $sp, 112 # 8-byte Folded Reload - fld.d $fs1, $sp, 120 # 8-byte Folded Reload - fld.d $fs0, $sp, 128 # 8-byte Folded Reload - ld.d $s8, $sp, 136 # 8-byte Folded Reload - ld.d $s7, $sp, 144 # 8-byte Folded Reload - ld.d $s6, $sp, 152 # 8-byte Folded Reload - ld.d $s5, $sp, 160 # 8-byte Folded Reload - ld.d $s4, $sp, 168 # 8-byte Folded Reload - ld.d $s3, $sp, 176 # 8-byte Folded Reload - ld.d $s2, $sp, 184 # 8-byte Folded Reload - ld.d $s1, $sp, 192 # 8-byte Folded Reload - ld.d $s0, $sp, 200 # 8-byte Folded Reload - ld.d $fp, $sp, 208 # 8-byte Folded Reload - ld.d $ra, $sp, 216 # 8-byte Folded Reload - addi.d $sp, $sp, 224 + fld.d $fs6, $sp, 64 # 8-byte Folded Reload + fld.d $fs5, $sp, 72 # 8-byte Folded Reload + fld.d $fs4, $sp, 80 # 8-byte Folded Reload + fld.d $fs3, $sp, 88 # 8-byte Folded Reload + fld.d $fs2, $sp, 96 # 8-byte Folded Reload + fld.d $fs1, $sp, 104 # 8-byte Folded Reload + fld.d $fs0, $sp, 112 # 8-byte Folded Reload + ld.d $s8, $sp, 120 # 8-byte Folded Reload + ld.d $s7, $sp, 128 # 8-byte Folded Reload + ld.d $s6, $sp, 136 # 8-byte Folded Reload + ld.d $s5, $sp, 144 # 8-byte Folded Reload + ld.d $s4, $sp, 152 # 8-byte Folded Reload + ld.d $s3, $sp, 160 # 8-byte Folded Reload + ld.d $s2, $sp, 168 # 8-byte Folded Reload + ld.d $s1, $sp, 176 # 8-byte Folded Reload + ld.d $s0, $sp, 184 # 8-byte Folded Reload + ld.d $fp, $sp, 192 # 8-byte Folded Reload + ld.d $ra, $sp, 200 # 8-byte Folded Reload + addi.d $sp, $sp, 208 ret .Lfunc_end6: .size _ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE, .Lfunc_end6-_ZN10polynomialIdE11inverse_fftERKS_ISt7complexIdEE diff --git a/results/SingleSource/Benchmarks/CoyoteBench/CMakeFiles/lpbench.dir/lpbench.s b/results/SingleSource/Benchmarks/CoyoteBench/CMakeFiles/lpbench.dir/lpbench.s index 527bad08..cffa6ab5 100644 --- a/results/SingleSource/Benchmarks/CoyoteBench/CMakeFiles/lpbench.dir/lpbench.s +++ b/results/SingleSource/Benchmarks/CoyoteBench/CMakeFiles/lpbench.dir/lpbench.s @@ -1,10 +1,6 @@ .file "lpbench.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function matgen -.LCPI0_0: - .dword 0x3e00000000200fe1 # double 4.65661287525E-10 .text - .globl matgen + .globl matgen # -- Begin function matgen .p2align 5 .type matgen,@function matgen: # @matgen @@ -36,10 +32,12 @@ matgen: # @matgen ori $a7, $a7, 423 lu12i.w $t0, -1 ori $t0, $t0, 1260 - pcalau12i $t1, %pc_hi20(.LCPI0_0) - fld.d $fa0, $t1, %pc_lo12(.LCPI0_0) lu12i.w $t1, 524287 ori $t1, $t1, 4095 + lu12i.w $t2, 512 + ori $t2, $t2, 4065 + lu52i.d $t2, $t2, 992 + movgr2fr.d $fa0, $t2 ori $t2, $zero, 2000 .p2align 4, , 16 .LBB0_1: # %.preheader23 @@ -936,12 +934,7 @@ dgesl: # @dgesl .Lfunc_end5: .size dgesl, .Lfunc_end5-dgesl # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI6_0: - .dword 0x3e00000000200fe1 # double 4.65661287525E-10 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -1021,10 +1014,12 @@ main: # @main ori $a7, $a7, 423 lu12i.w $t0, -1 ori $t0, $t0, 1260 - pcalau12i $t1, %pc_hi20(.LCPI6_0) - fld.d $fa0, $t1, %pc_lo12(.LCPI6_0) lu12i.w $t1, 524287 ori $t1, $t1, 4095 + lu12i.w $t2, 512 + ori $t2, $t2, 4065 + lu52i.d $t2, $t2, 992 + movgr2fr.d $fa0, $t2 ori $t2, $zero, 2000 .p2align 4, , 16 .LBB6_6: # %.preheader23.i diff --git a/results/SingleSource/Benchmarks/Dhrystone/CMakeFiles/fldry.dir/fldry.s b/results/SingleSource/Benchmarks/Dhrystone/CMakeFiles/fldry.dir/fldry.s index 2524897c..8bdce37b 100644 --- a/results/SingleSource/Benchmarks/Dhrystone/CMakeFiles/fldry.dir/fldry.s +++ b/results/SingleSource/Benchmarks/Dhrystone/CMakeFiles/fldry.dir/fldry.s @@ -214,12 +214,7 @@ Proc5: # @Proc5 .Lfunc_end6: .size Proc5, .Lfunc_end6-Proc5 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function Proc6 -.LCPI7_0: - .dword 0x4059000000000000 # double 100 - .text - .globl Proc6 + .globl Proc6 # -- Begin function Proc6 .p2align 5 .type Proc6,@function Proc6: # @Proc6 @@ -241,8 +236,10 @@ Proc6: # @Proc6 # %bb.3: pcalau12i $a0, %pc_hi20(IntGlob) fld.d $fa0, $a0, %pc_lo12(IntGlob) - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI7_0) + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa1, $fa0 movcf2gr $a0, $fcc0 masknez $a5, $a2, $a0 diff --git a/results/SingleSource/Benchmarks/Linpack/CMakeFiles/linpack-pc.dir/linpack-pc.s b/results/SingleSource/Benchmarks/Linpack/CMakeFiles/linpack-pc.dir/linpack-pc.s index 928a64b7..a15fd2a5 100644 --- a/results/SingleSource/Benchmarks/Linpack/CMakeFiles/linpack-pc.dir/linpack-pc.s +++ b/results/SingleSource/Benchmarks/Linpack/CMakeFiles/linpack-pc.dir/linpack-pc.s @@ -1,10 +1,6 @@ .file "linpack-pc.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function second -.LCPI0_0: - .word 0x49742400 # float 1.0E+6 .text - .globl second + .globl second # -- Begin function second .p2align 5 .type second,@function second: # @second @@ -13,11 +9,12 @@ second: # @second st.d $ra, $sp, 8 # 8-byte Folded Spill pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI0_0) - movgr2fr.d $fa1, $a0 - ffint.s.l $fa1, $fa1 - fdiv.s $fa0, $fa1, $fa0 + movgr2fr.d $fa0, $a0 + ffint.s.l $fa0, $fa0 + lu12i.w $a0, 300866 + ori $a0, $a0, 1024 + movgr2fr.w $fa1, $a0 + fdiv.s $fa0, $fa0, $fa1 ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 ret @@ -33,28 +30,8 @@ what_date: # @what_date .Lfunc_end1: .size what_date, .Lfunc_end1-what_date # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI2_0: - .dword 0x3f10000000000000 # double 6.103515625E-5 -.LCPI2_4: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI2_5: - .dword 0x4124f49560000000 # double 686666.6875 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI2_1: - .word 0x49742400 # float 1.0E+6 -.LCPI2_2: - .word 0x42c80000 # float 100 -.LCPI2_3: - .word 0x34000000 # float 1.1920929E-7 -.LCPI2_6: - .word 0x3d656042 # float 0.0560000017 -.LCPI2_7: - .word 0x43c80000 # float 400 .section .text.unlikely.,"ax",@progbits - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -122,9 +99,9 @@ main: # @main addi.d $a1, $a1, %pc_lo12(main.a) move $a2, $zero ori $a3, $zero, 3125 - pcalau12i $a4, %pc_hi20(.LCPI2_0) - fld.d $fs0, $a4, %pc_lo12(.LCPI2_0) lu12i.w $s5, -8 + lu52i.d $a4, $zero, 1009 + movgr2fr.d $fs0, $a4 ori $a4, $zero, 400 ori $a5, $zero, 100 .p2align 4, , 16 @@ -285,10 +262,11 @@ main: # @main fst.s $fs4, $s1, 396 pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(.LCPI2_1) - fld.s $fs1, $a1, %pc_lo12(.LCPI2_1) movgr2fr.d $fa0, $a0 ffint.s.l $fa0, $fa0 + lu12i.w $a0, 300866 + ori $a0, $a0, 1024 + movgr2fr.w $fs1, $a0 fdiv.s $fs2, $fa0, $fs1 pcalau12i $a0, %pc_hi20(main.ipvt) addi.d $a3, $a0, %pc_lo12(main.ipvt) @@ -504,7 +482,7 @@ main: # @main bne $t1, $t2, .LBB2_28 b .LBB2_19 .LBB2_29: # %dgesl.exit - st.d $t7, $sp, 40 # 8-byte Folded Spill + st.d $t7, $sp, 56 # 8-byte Folded Spill movgr2fr.d $fa0, $a0 ffint.s.l $fa0, $fa0 fld.s $fs1, $sp, 100 # 4-byte Folded Reload @@ -805,16 +783,16 @@ main: # @main fsel $fa0, $fa1, $fa0, $fcc0 bne $a0, $a1, .LBB2_36 # %bb.37: - pcalau12i $a0, %pc_hi20(.LCPI2_2) - fld.s $fa1, $a0, %pc_lo12(.LCPI2_2) - pcalau12i $a0, %pc_hi20(.LCPI2_3) - fld.s $fa2, $a0, %pc_lo12(.LCPI2_3) - fld.s $fa3, $sp, 192 # 4-byte Folded Reload - fld.s $fa4, $sp, 184 # 4-byte Folded Reload - fadd.s $fs3, $fa3, $fa4 + fld.s $fa1, $sp, 192 # 4-byte Folded Reload + fld.s $fa2, $sp, 184 # 4-byte Folded Reload + fadd.s $fs3, $fa1, $fa2 + lu12i.w $a0, 273536 + movgr2fr.w $fa1, $a0 fmul.s $fa1, $fs2, $fa1 fmul.s $fa0, $fa1, $fa0 - fmul.s $fa0, $fa0, $fa2 + lu12i.w $a0, 212992 + movgr2fr.w $fa1, $a0 + fmul.s $fa0, $fa0, $fa1 fdiv.s $fs1, $fs5, $fa0 fld.s $fa0, $s3, 0 fld.s $fa1, $s3, 396 @@ -881,34 +859,36 @@ main: # @main fst.s $fs3, $a0, 120 movgr2fr.w $fa0, $zero fcmp.cule.s $fcc0, $fs3, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI2_4) - st.d $a0, $sp, 56 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI2_5) - st.d $a0, $sp, 48 # 8-byte Folded Spill + lu12i.w $a1, 393216 fmov.s $fa1, $fa0 bcnez $fcc0, .LBB2_39 # %bb.38: - ld.d $a0, $sp, 56 # 8-byte Folded Reload - fld.d $fa0, $a0, %pc_lo12(.LCPI2_4) - ld.d $a0, $sp, 48 # 8-byte Folded Reload - fld.d $fa1, $a0, %pc_lo12(.LCPI2_5) - fcvt.d.s $fa2, $fs3 - fmul.d $fa0, $fa2, $fa0 + fcvt.d.s $fa0, $fs3 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 + move $a0, $a1 + lu32i.d $a0, 324757 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa1, $fa0 fcvt.s.d $fa0, $fa0 vldi $vr1, -1280 fdiv.s $fa1, $fa1, $fa0 .LBB2_39: # %.preheader52.us.i113.preheader.preheader - pcalau12i $a0, %pc_hi20(.LCPI2_6) - fld.s $fa2, $a0, %pc_lo12(.LCPI2_6) - ld.d $a1, $sp, 104 # 8-byte Folded Reload - fst.s $fa0, $a1, 180 - ld.d $a0, $sp, 16 # 8-byte Folded Reload - ld.d $a3, $a0, 0 - fst.s $fa1, $a1, 240 - fst.s $fa2, $sp, 84 # 4-byte Folded Spill - fdiv.s $fa0, $fs3, $fa2 - fst.s $fa0, $a1, 300 + ld.d $a2, $sp, 104 # 8-byte Folded Reload + fst.s $fa0, $a2, 180 + fst.s $fa1, $a2, 240 + lu12i.w $a0, 251478 + ori $a0, $a0, 66 + ld.d $a1, $sp, 16 # 8-byte Folded Reload + ld.d $a3, $a1, 0 + movgr2fr.w $fa0, $a0 + fst.s $fa0, $sp, 84 # 4-byte Folded Spill + fdiv.s $fa0, $fs3, $fa0 + fst.s $fa0, $a2, 300 pcalau12i $a0, %pc_hi20(.L.str.14) addi.d $a0, $a0, %pc_lo12(.L.str.14) ori $a1, $zero, 29 @@ -2011,14 +1991,14 @@ main: # @main ld.d $a0, $sp, 136 # 8-byte Folded Reload movgr2fr.d $fa1, $a0 ffint.s.l $fa1, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI2_7) - fld.s $fa3, $a0, %pc_lo12(.LCPI2_7) fdiv.s $fa1, $fa1, $fa2 fsub.s $fa0, $fa1, $fa0 + lu12i.w $a0, 277632 + movgr2fr.w $fa1, $a0 ld.d $s0, $sp, 16 # 8-byte Folded Reload ld.d $a0, $s0, 0 - fst.s $fa3, $sp, 12 # 4-byte Folded Spill - fdiv.s $fs1, $fa0, $fa3 + fst.s $fa1, $sp, 12 # 4-byte Folded Spill + fdiv.s $fs1, $fa0, $fa1 ori $a1, $zero, 1000 st.w $a1, $s1, %pc_lo12(main.ntimes) pcalau12i $a1, %pc_hi20(.L.str.18) @@ -2052,7 +2032,7 @@ main: # @main movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 fmul.s $fa0, $fs1, $fa0 - fst.s $fa0, $sp, 36 # 4-byte Folded Spill + fst.s $fa0, $sp, 52 # 4-byte Folded Spill ld.d $a0, $sp, 104 # 8-byte Folded Reload st.w $zero, $a0, 204 pcalau12i $a0, %pc_hi20(main.j) @@ -2077,6 +2057,14 @@ main: # @main addi.w $a0, $zero, -396 st.d $a0, $sp, 120 # 8-byte Folded Spill ori $s0, $zero, 8 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + st.d $a0, $sp, 40 # 8-byte Folded Spill + lu12i.w $a0, 393216 + lu32i.d $a0, 324757 + lu52i.d $a0, $a0, 1042 + st.d $a0, $sp, 32 # 8-byte Folded Spill fst.d $fs0, $sp, 200 # 8-byte Folded Spill b .LBB2_106 .p2align 4, , 16 @@ -2093,8 +2081,8 @@ main: # @main ld.d $s1, $sp, 112 # 8-byte Folded Reload ld.w $a0, $s1, %pc_lo12(main.ntimes) fdiv.s $fa0, $fa0, $fs1 - ld.d $a4, $sp, 88 # 8-byte Folded Reload - ld.w $a1, $a4, %pc_lo12(main.j) + ld.d $a3, $sp, 88 # 8-byte Folded Reload + ld.w $a1, $a3, %pc_lo12(main.j) fsub.s $fa0, $fa0, $fs0 movgr2fr.w $fa1, $a0 ffint.s.w $fa1, $fa1 @@ -2105,14 +2093,14 @@ main: # @main alsl.d $a0, $a1, $a2, 2 fst.s $fa0, $a0, 60 fadd.s $fa0, $fa2, $fa0 - ld.d $a3, $sp, 56 # 8-byte Folded Reload - fld.d $fa3, $a3, %pc_lo12(.LCPI2_4) - ld.d $a3, $sp, 48 # 8-byte Folded Reload - fld.d $fa4, $a3, %pc_lo12(.LCPI2_5) fst.s $fa0, $a0, 120 fcvt.d.s $fa1, $fa0 - fmul.d $fa1, $fa1, $fa3 - fdiv.d $fa1, $fa4, $fa1 + ld.d $a4, $sp, 40 # 8-byte Folded Reload + movgr2fr.d $fa2, $a4 + fmul.d $fa1, $fa1, $fa2 + ld.d $a4, $sp, 32 # 8-byte Folded Reload + movgr2fr.d $fa3, $a4 + fdiv.d $fa1, $fa3, $fa1 fcvt.s.d $fa1, $fa1 fst.s $fa1, $a0, 180 vldi $vr2, -1280 @@ -2125,7 +2113,7 @@ main: # @main fadd.s $fa0, $fa0, $fa1 fst.s $fa0, $a2, 204 addi.d $a0, $a1, 1 - st.w $a0, $a4, %pc_lo12(main.j) + st.w $a0, $a3, %pc_lo12(main.j) fld.d $fs0, $sp, 200 # 8-byte Folded Reload ori $a0, $zero, 5 bge $a1, $a0, .LBB2_169 @@ -2221,9 +2209,9 @@ main: # @main pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ld.d $a2, $sp, 208 # 8-byte Folded Reload - vld $vr29, $a2, 0 + vld $vr28, $a2, 0 vld $vr31, $a2, 16 - vld $vr28, $a2, 32 + vld $vr30, $a2, 32 vld $vr27, $a2, 48 vld $vr23, $a2, 64 vld $vr22, $a2, 80 @@ -2258,60 +2246,60 @@ main: # @main # => This Inner Loop Header: Depth=3 vld $vr25, $a1, -208 vld $vr26, $a1, -192 - vld $vr30, $a1, -176 + vld $vr29, $a1, -176 vld $vr24, $a1, -160 - vfadd.s $vr29, $vr29, $vr25 + vfadd.s $vr28, $vr28, $vr25 vfadd.s $vr31, $vr31, $vr26 - vfadd.s $vr28, $vr28, $vr30 + vfadd.s $vr30, $vr30, $vr29 vfadd.s $vr27, $vr27, $vr24 vld $vr24, $a1, -144 vld $vr25, $a1, -128 vld $vr26, $a1, -112 - vld $vr30, $a1, -96 + vld $vr29, $a1, -96 vfadd.s $vr23, $vr23, $vr24 vfadd.s $vr22, $vr22, $vr25 vfadd.s $vr21, $vr21, $vr26 - vfadd.s $vr20, $vr20, $vr30 + vfadd.s $vr20, $vr20, $vr29 vld $vr24, $a1, -80 vld $vr25, $a1, -64 vld $vr26, $a1, -48 - vld $vr30, $a1, -32 + vld $vr29, $a1, -32 vfadd.s $vr19, $vr19, $vr24 vfadd.s $vr18, $vr18, $vr25 vfadd.s $vr17, $vr17, $vr26 - vfadd.s $vr16, $vr16, $vr30 + vfadd.s $vr16, $vr16, $vr29 vld $vr24, $a1, -16 vld $vr25, $a1, 0 vld $vr26, $a1, 16 - vld $vr30, $a1, 32 + vld $vr29, $a1, 32 vfadd.s $vr15, $vr15, $vr24 vfadd.s $vr14, $vr14, $vr25 vfadd.s $vr13, $vr13, $vr26 - vfadd.s $vr12, $vr12, $vr30 + vfadd.s $vr12, $vr12, $vr29 vld $vr24, $a1, 48 vld $vr25, $a1, 64 vld $vr26, $a1, 80 - vld $vr30, $a1, 96 + vld $vr29, $a1, 96 vfadd.s $vr11, $vr11, $vr24 vfadd.s $vr10, $vr10, $vr25 vfadd.s $vr9, $vr9, $vr26 - vfadd.s $vr8, $vr8, $vr30 + vfadd.s $vr8, $vr8, $vr29 vld $vr24, $a1, 112 vld $vr25, $a1, 128 vld $vr26, $a1, 144 - vld $vr30, $a1, 160 + vld $vr29, $a1, 160 vfadd.s $vr7, $vr7, $vr24 vfadd.s $vr6, $vr6, $vr25 vfadd.s $vr5, $vr5, $vr26 - vfadd.s $vr4, $vr4, $vr30 + vfadd.s $vr4, $vr4, $vr29 fld.s $fs0, $a1, 176 fld.s $fs1, $a1, 180 fld.s $fs2, $a1, 184 - fld.s $fs6, $a1, 188 + fld.s $fs5, $a1, 188 fadd.s $fa3, $fa3, $fs0 fadd.s $fa2, $fa2, $fs1 fadd.s $fa1, $fa1, $fs2 - fadd.s $fa0, $fa0, $fs6 + fadd.s $fa0, $fa0, $fs5 addi.d $a0, $a0, -1 addi.d $a1, $a1, 804 bnez $a0, .LBB2_114 @@ -2319,9 +2307,9 @@ main: # @main # in Loop: Header=BB2_109 Depth=2 st.d $zero, $sp, 216 # 8-byte Folded Spill move $a1, $zero - vst $vr29, $a2, 0 + vst $vr28, $a2, 0 vst $vr31, $a2, 16 - vst $vr28, $a2, 32 + vst $vr30, $a2, 32 vst $vr27, $a2, 48 vst $vr23, $a2, 64 vst $vr22, $a2, 80 @@ -2603,7 +2591,7 @@ main: # @main ld.d $fp, $sp, 112 # 8-byte Folded Reload ld.w $a0, $fp, %pc_lo12(main.ntimes) fsub.s $fa0, $fa0, $fs0 - fld.s $fa1, $sp, 36 # 4-byte Folded Reload + fld.s $fa1, $sp, 52 # 4-byte Folded Reload fsub.s $fa0, $fa0, $fa1 ld.d $a1, $sp, 88 # 8-byte Folded Reload ld.w $a1, $a1, %pc_lo12(main.j) @@ -2617,7 +2605,7 @@ main: # @main jirl $ra, $ra, 0 ld.w $a1, $fp, %pc_lo12(main.ntimes) ld.d $t6, $sp, 208 # 8-byte Folded Reload - ld.d $t7, $sp, 40 # 8-byte Folded Reload + ld.d $t7, $sp, 56 # 8-byte Folded Reload ori $t8, $zero, 98 blez $a1, .LBB2_105 # %bb.144: # %.preheader388.preheader @@ -2836,7 +2824,6 @@ main: # @main bne $a7, $t0, .LBB2_168 b .LBB2_159 .LBB2_169: # %.preheader52.us.i260.preheader.preheader - fst.d $fa4, $sp, 48 # 8-byte Folded Spill fst.d $fa3, $sp, 56 # 8-byte Folded Spill ld.d $fp, $sp, 16 # 8-byte Folded Reload ld.d $a0, $fp, 0 @@ -2913,9 +2900,9 @@ main: # @main pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ld.d $a0, $sp, 208 # 8-byte Folded Reload - vld $vr30, $a0, 0 - vld $vr31, $a0, 16 - vld $vr29, $a0, 32 + vld $vr31, $a0, 0 + vld $vr30, $a0, 16 + vld $vr28, $a0, 32 vld $vr26, $a0, 48 vld $vr23, $a0, 64 vld $vr22, $a0, 80 @@ -2949,68 +2936,68 @@ main: # @main # => This Inner Loop Header: Depth=2 vld $vr24, $a1, -208 vld $vr25, $a1, -192 - vld $vr27, $a1, -176 - vld $vr28, $a1, -160 - vfadd.s $vr30, $vr30, $vr24 - vfadd.s $vr31, $vr31, $vr25 - vfadd.s $vr29, $vr29, $vr27 - vfadd.s $vr26, $vr26, $vr28 + vld $vr29, $a1, -176 + vld $vr27, $a1, -160 + vfadd.s $vr31, $vr31, $vr24 + vfadd.s $vr30, $vr30, $vr25 + vfadd.s $vr28, $vr28, $vr29 + vfadd.s $vr26, $vr26, $vr27 vld $vr24, $a1, -144 vld $vr25, $a1, -128 vld $vr27, $a1, -112 - vld $vr28, $a1, -96 + vld $vr29, $a1, -96 vfadd.s $vr23, $vr23, $vr24 vfadd.s $vr22, $vr22, $vr25 vfadd.s $vr21, $vr21, $vr27 - vfadd.s $vr20, $vr20, $vr28 + vfadd.s $vr20, $vr20, $vr29 vld $vr24, $a1, -80 vld $vr25, $a1, -64 vld $vr27, $a1, -48 - vld $vr28, $a1, -32 + vld $vr29, $a1, -32 vfadd.s $vr19, $vr19, $vr24 vfadd.s $vr18, $vr18, $vr25 vfadd.s $vr17, $vr17, $vr27 - vfadd.s $vr16, $vr16, $vr28 + vfadd.s $vr16, $vr16, $vr29 vld $vr24, $a1, -16 vld $vr25, $a1, 0 vld $vr27, $a1, 16 - vld $vr28, $a1, 32 + vld $vr29, $a1, 32 vfadd.s $vr15, $vr15, $vr24 vfadd.s $vr14, $vr14, $vr25 vfadd.s $vr13, $vr13, $vr27 - vfadd.s $vr12, $vr12, $vr28 + vfadd.s $vr12, $vr12, $vr29 vld $vr24, $a1, 48 vld $vr25, $a1, 64 vld $vr27, $a1, 80 - vld $vr28, $a1, 96 + vld $vr29, $a1, 96 vfadd.s $vr11, $vr11, $vr24 vfadd.s $vr10, $vr10, $vr25 vfadd.s $vr9, $vr9, $vr27 - vfadd.s $vr8, $vr8, $vr28 + vfadd.s $vr8, $vr8, $vr29 vld $vr24, $a1, 112 vld $vr25, $a1, 128 vld $vr27, $a1, 144 - vld $vr28, $a1, 160 + vld $vr29, $a1, 160 vfadd.s $vr7, $vr7, $vr24 vfadd.s $vr6, $vr6, $vr25 vfadd.s $vr5, $vr5, $vr27 - vfadd.s $vr4, $vr4, $vr28 + vfadd.s $vr4, $vr4, $vr29 fld.s $fs0, $a1, 176 fld.s $fs1, $a1, 180 fld.s $fs3, $a1, 184 - fld.s $fs4, $a1, 188 + fld.s $fs5, $a1, 188 fadd.s $fa3, $fa3, $fs0 fadd.s $fa2, $fa2, $fs1 fadd.s $fa1, $fa1, $fs3 - fadd.s $fa0, $fa0, $fs4 + fadd.s $fa0, $fa0, $fs5 addi.d $a2, $a2, -1 addi.d $a1, $a1, 800 bnez $a2, .LBB2_175 # %bb.176: # %matgen.exit284 # in Loop: Header=BB2_170 Depth=1 - vst $vr30, $a0, 0 - vst $vr31, $a0, 16 - vst $vr29, $a0, 32 + vst $vr31, $a0, 0 + vst $vr30, $a0, 16 + vst $vr28, $a0, 32 vst $vr26, $a0, 48 vst $vr23, $a0, 64 vst $vr22, $a0, 80 @@ -3082,7 +3069,7 @@ main: # @main movgr2fr.w $fa0, $a0 ffint.s.w $fa0, $fa0 fmul.s $fa0, $fs0, $fa0 - fst.s $fa0, $sp, 40 # 4-byte Folded Spill + fst.s $fa0, $sp, 52 # 4-byte Folded Spill ld.d $a0, $sp, 104 # 8-byte Folded Reload st.w $zero, $a0, 228 ori $a0, $zero, 7 @@ -3108,6 +3095,10 @@ main: # @main st.d $a0, $sp, 120 # 8-byte Folded Spill ori $s1, $zero, 99 ori $s0, $zero, 8 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + st.d $a0, $sp, 40 # 8-byte Folded Spill b .LBB2_179 .p2align 4, , 16 .LBB2_178: # %._crit_edge430 @@ -3137,9 +3128,10 @@ main: # @main fadd.s $fa0, $fa2, $fa0 fst.s $fa0, $a0, 120 fcvt.d.s $fa1, $fa0 - fld.d $fa2, $sp, 56 # 8-byte Folded Reload + ld.d $a4, $sp, 40 # 8-byte Folded Reload + movgr2fr.d $fa2, $a4 fmul.d $fa1, $fa1, $fa2 - fld.d $fa2, $sp, 48 # 8-byte Folded Reload + fld.d $fa2, $sp, 56 # 8-byte Folded Reload fdiv.d $fa1, $fa2, $fa1 fcvt.s.d $fa1, $fa1 fst.s $fa1, $a0, 180 @@ -3248,10 +3240,10 @@ main: # @main pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ld.d $a2, $sp, 208 # 8-byte Folded Reload - vld $vr29, $a2, 0 - vld $vr26, $a2, 16 - vld $vr31, $a2, 32 - vld $vr30, $a2, 48 + vld $vr26, $a2, 0 + vld $vr31, $a2, 16 + vld $vr30, $a2, 32 + vld $vr29, $a2, 48 vld $vr23, $a2, 64 vld $vr22, $a2, 80 vld $vr21, $a2, 96 @@ -3287,10 +3279,10 @@ main: # @main vld $vr25, $a1, -192 vld $vr27, $a1, -176 vld $vr28, $a1, -160 - vfadd.s $vr29, $vr29, $vr24 - vfadd.s $vr26, $vr26, $vr25 - vfadd.s $vr31, $vr31, $vr27 - vfadd.s $vr30, $vr30, $vr28 + vfadd.s $vr26, $vr26, $vr24 + vfadd.s $vr31, $vr31, $vr25 + vfadd.s $vr30, $vr30, $vr27 + vfadd.s $vr29, $vr29, $vr28 vld $vr24, $a1, -144 vld $vr25, $a1, -128 vld $vr27, $a1, -112 @@ -3346,10 +3338,10 @@ main: # @main # in Loop: Header=BB2_182 Depth=2 st.d $zero, $sp, 216 # 8-byte Folded Spill move $a1, $zero - vst $vr29, $a2, 0 - vst $vr26, $a2, 16 - vst $vr31, $a2, 32 - vst $vr30, $a2, 48 + vst $vr26, $a2, 0 + vst $vr31, $a2, 16 + vst $vr30, $a2, 32 + vst $vr29, $a2, 48 vst $vr23, $a2, 64 vst $vr22, $a2, 80 vst $vr21, $a2, 96 @@ -3630,7 +3622,7 @@ main: # @main ld.d $fp, $sp, 112 # 8-byte Folded Reload ld.w $a0, $fp, %pc_lo12(main.ntimes) fsub.s $fa0, $fa0, $fs0 - fld.s $fa1, $sp, 40 # 4-byte Folded Reload + fld.s $fa1, $sp, 52 # 4-byte Folded Reload fsub.s $fa0, $fa0, $fa1 ld.d $a1, $sp, 88 # 8-byte Folded Reload ld.w $a1, $a1, %pc_lo12(main.j) @@ -3900,12 +3892,8 @@ main: # @main .Lfunc_end2: .size main, .Lfunc_end2-main # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function matgen -.LCPI3_0: - .dword 0x3f10000000000000 # double 6.103515625E-5 .text - .globl matgen + .globl matgen # -- Begin function matgen .p2align 5 .type matgen,@function matgen: # @matgen @@ -3926,11 +3914,11 @@ matgen: # @matgen move $s1, $a0 move $a0, $zero slli.d $s3, $a1, 2 - pcalau12i $a1, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI3_0) ori $a2, $zero, 1325 ori $a1, $zero, 3125 lu12i.w $a3, -8 + lu52i.d $a5, $zero, 1009 + movgr2fr.d $fa0, $a5 move $a5, $s1 .p2align 4, , 16 .LBB3_2: # %.preheader52.us @@ -6076,19 +6064,14 @@ dmxpy: # @dmxpy .Lfunc_end6: .size dmxpy, .Lfunc_end6-dmxpy # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function epslon -.LCPI7_0: - .word 0x34000000 # float 1.1920929E-7 - .text - .globl epslon + .globl epslon # -- Begin function epslon .p2align 5 .type epslon,@function epslon: # @epslon # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI7_0) fabs.s $fa0, $fa0 + lu12i.w $a0, 212992 + movgr2fr.w $fa1, $a0 fmul.s $fa0, $fa0, $fa1 ret .Lfunc_end7: diff --git a/results/SingleSource/Benchmarks/McGill/CMakeFiles/misr.dir/misr.s b/results/SingleSource/Benchmarks/McGill/CMakeFiles/misr.dir/misr.s index d43e849a..c4e498f1 100644 --- a/results/SingleSource/Benchmarks/McGill/CMakeFiles/misr.dir/misr.s +++ b/results/SingleSource/Benchmarks/McGill/CMakeFiles/misr.dir/misr.s @@ -1,10 +1,6 @@ .file "misr.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x40f86a0000000000 # double 1.0E+5 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -166,12 +162,14 @@ main: # @main ld.hu $a6, $sp, 30 ld.hu $a7, $sp, 32 ld.hu $a0, $sp, 34 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI0_0) sub.d $a2, $fp, $s0 - movgr2fr.w $fa1, $a2 - ffint.d.w $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a2 + ffint.d.w $fa0, $fa0 + ori $a2, $zero, 0 + lu32i.d $a2, -497152 + lu52i.d $a2, $a2, 1039 + movgr2fr.d $fa1, $a2 + fdiv.d $fa0, $fa0, $fa1 st.d $a0, $sp, 0 fst.d $fa0, $sp, 8 pcalau12i $a0, %pc_hi20(.L.str.5) @@ -258,14 +256,7 @@ init: # @init .Lfunc_end2: .size init, .Lfunc_end2-init # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function simulate -.LCPI3_0: - .dword 0x408f400000000000 # double 1000 -.LCPI3_1: - .dword 0x40c3880000000000 # double 1.0E+4 - .text - .globl simulate + .globl simulate # -- Begin function simulate .p2align 5 .type simulate,@function simulate: # @simulate @@ -283,14 +274,15 @@ simulate: # @simulate st.d $s7, $sp, 128 # 8-byte Folded Spill st.d $s8, $sp, 120 # 8-byte Folded Spill fst.d $fs0, $sp, 112 # 8-byte Folded Spill + fst.d $fs1, $sp, 104 # 8-byte Folded Spill pcalau12i $a3, %pc_hi20(reg_len) - st.d $a3, $sp, 88 # 8-byte Folded Spill + st.d $a3, $sp, 80 # 8-byte Folded Spill ld.w $a4, $a3, %pc_lo12(reg_len) - move $s6, $a1 - st.d $a0, $sp, 80 # 8-byte Folded Spill + move $s5, $a1 + st.d $a0, $sp, 72 # 8-byte Folded Spill blez $a0, .LBB3_20 # %bb.1: # %.preheader92.lr.ph - move $s4, $a2 + move $s6, $a2 fmov.d $fs0, $fa0 move $a5, $zero addi.w $a0, $a4, -1 @@ -310,81 +302,88 @@ simulate: # @simulate maskeqz $a1, $a1, $a3 masknez $a0, $a0, $a3 or $a0, $a1, $a0 - st.d $a0, $sp, 104 # 8-byte Folded Spill - st.d $a6, $sp, 48 # 8-byte Folded Spill + st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a6, $sp, 40 # 8-byte Folded Spill bstrpick.d $a0, $a6, 31, 0 - st.d $a0, $sp, 24 # 8-byte Folded Spill - add.d $a0, $s4, $a2 st.d $a0, $sp, 16 # 8-byte Folded Spill + add.d $a0, $s6, $a2 + st.d $a0, $sp, 8 # 8-byte Folded Spill lu12i.w $a0, -117441 ori $a0, $a0, 1999 lu32i.d $a0, 301989 lu52i.d $s1, $a0, 524 + ori $s4, $zero, 1000 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs1, $a0 lu12i.w $a0, 231525 ori $a0, $a0, 2379 lu32i.d $a0, -145962 lu52i.d $a0, $a0, 838 - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill lu12i.w $a0, 2 ori $a0, $a0, 1808 - st.d $a0, $sp, 32 # 8-byte Folded Spill - st.d $s6, $sp, 64 # 8-byte Folded Spill - st.d $s4, $sp, 56 # 8-byte Folded Spill - st.d $a4, $sp, 72 # 8-byte Folded Spill + st.d $a0, $sp, 24 # 8-byte Folded Spill + st.d $s5, $sp, 56 # 8-byte Folded Spill + st.d $s6, $sp, 48 # 8-byte Folded Spill + st.d $a4, $sp, 64 # 8-byte Folded Spill b .LBB3_3 .p2align 4, , 16 .LBB3_2: # in Loop: Header=BB3_3 Depth=1 - ld.d $s6, $sp, 64 # 8-byte Folded Reload - ld.d $s5, $sp, 96 # 8-byte Folded Reload + ld.d $s5, $sp, 56 # 8-byte Folded Reload + ld.d $s7, $sp, 88 # 8-byte Folded Reload add.d $a0, $s3, $s2 andi $a0, $a0, 1 st.w $a0, $fp, 0 pcaddu18i $ra, %call36(lrand48) jirl $ra, $ra, 0 - ld.d $a1, $sp, 40 # 8-byte Folded Reload + ld.d $a1, $sp, 32 # 8-byte Folded Reload mulh.d $a1, $a0, $a1 srli.d $a2, $a1, 63 srai.d $a1, $a1, 11 add.d $a1, $a1, $a2 - ld.d $a2, $sp, 32 # 8-byte Folded Reload + ld.d $a2, $sp, 24 # 8-byte Folded Reload mul.d $a1, $a1, $a2 - pcalau12i $a2, %pc_hi20(.LCPI3_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI3_1) sub.d $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, 231424 + lu52i.d $a0, $a0, 1036 movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 fcmp.clt.d $fcc0, $fa0, $fs0 movcf2gr $a0, $fcc0 xor $a0, $s2, $a0 add.d $a0, $s0, $a0 andi $a0, $a0, 1 - addi.w $a5, $s5, 1 + addi.w $a5, $s7, 1 st.w $a0, $fp, 4 - ld.d $a4, $sp, 72 # 8-byte Folded Reload - ld.d $a0, $sp, 80 # 8-byte Folded Reload + ld.d $a4, $sp, 64 # 8-byte Folded Reload + ld.d $a0, $sp, 72 # 8-byte Folded Reload beq $a5, $a0, .LBB3_19 .LBB3_3: # %.preheader92 # =>This Loop Header: Depth=1 # Child Loop BB3_6 Depth 2 # Child Loop BB3_8 Depth 3 # Child Loop BB3_14 Depth 2 - st.d $a5, $sp, 96 # 8-byte Folded Spill - pcalau12i $s5, %pc_hi20(.LCPI3_0) + st.d $a5, $sp, 88 # 8-byte Folded Spill ori $a0, $zero, 32 blt $a4, $a0, .LBB3_10 # %bb.4: # %.lr.ph.preheader # in Loop: Header=BB3_3 Depth=1 - move $s7, $zero + move $s8, $zero move $s0, $zero move $s3, $zero + move $s7, $s6 b .LBB3_6 .p2align 4, , 16 .LBB3_5: # in Loop: Header=BB3_6 Depth=2 - addi.d $s7, $s7, 1 - addi.d $s4, $s4, 31 - ld.d $a0, $sp, 104 # 8-byte Folded Reload - beq $s7, $a0, .LBB3_11 + addi.d $s8, $s8, 1 + addi.d $s7, $s7, 31 + ld.d $a0, $sp, 96 # 8-byte Folded Reload + beq $s8, $a0, .LBB3_11 .LBB3_6: # %.lr.ph # Parent Loop BB3_3 Depth=1 # => This Loop Header: Depth=2 @@ -392,8 +391,8 @@ simulate: # @simulate pcaddu18i $ra, %call36(lrand48) jirl $ra, $ra, 0 move $s2, $a0 - move $s8, $zero - move $fp, $s6 + move $s6, $zero + move $fp, $s5 b .LBB3_8 .p2align 4, , 16 .LBB3_7: # in Loop: Header=BB3_8 Depth=3 @@ -408,30 +407,28 @@ simulate: # @simulate srli.d $a2, $a1, 63 srai.d $a1, $a1, 7 add.d $a1, $a1, $a2 - ori $a2, $zero, 1000 - mul.d $a1, $a1, $a2 + mul.d $a1, $a1, $s4 sub.d $a0, $a0, $a1 - fld.d $fa0, $s5, %pc_lo12(.LCPI3_0) - ld.d $s6, $fp, 8 - movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 - ld.w $a0, $s6, 4 + ld.d $s5, $fp, 8 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + fdiv.d $fa0, $fa0, $fs1 + ld.w $a0, $s5, 4 fcmp.clt.d $fcc0, $fa0, $fs0 movcf2gr $a1, $fcc0 xor $a1, $s2, $a1 add.d $a0, $a0, $a1 andi $a0, $a0, 1 st.w $a0, $fp, 4 - addi.d $s8, $s8, 1 + addi.d $s6, $s6, 1 srai.d $s2, $s2, 1 - move $fp, $s6 + move $fp, $s5 ori $a0, $zero, 31 - beq $s8, $a0, .LBB3_5 + beq $s6, $a0, .LBB3_5 .LBB3_8: # Parent Loop BB3_3 Depth=1 # Parent Loop BB3_6 Depth=2 # => This Inner Loop Header: Depth=3 - ldx.bu $a0, $s4, $s8 + ldx.bu $a0, $s7, $s6 ori $a1, $zero, 49 bne $a0, $a1, .LBB3_7 # %bb.9: # in Loop: Header=BB3_8 Depth=3 @@ -449,70 +446,67 @@ simulate: # @simulate pcaddu18i $ra, %call36(lrand48) jirl $ra, $ra, 0 move $s2, $a0 - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 40 # 8-byte Folded Reload blez $a0, .LBB3_16 # %bb.12: # %.lr.ph110.preheader # in Loop: Header=BB3_3 Depth=1 - ld.d $s7, $sp, 24 # 8-byte Folded Reload - ld.d $s8, $sp, 16 # 8-byte Folded Reload - ld.d $s4, $sp, 56 # 8-byte Folded Reload + ld.d $s7, $sp, 16 # 8-byte Folded Reload + ld.d $s6, $sp, 8 # 8-byte Folded Reload b .LBB3_14 .p2align 4, , 16 .LBB3_13: # in Loop: Header=BB3_14 Depth=2 - ld.d $a0, $s6, 8 + ld.d $a0, $s5, 8 ld.w $a0, $a0, 0 add.d $a0, $a0, $s2 andi $a0, $a0, 1 - st.w $a0, $s6, 0 + st.w $a0, $s5, 0 pcaddu18i $ra, %call36(lrand48) jirl $ra, $ra, 0 mulh.d $a1, $a0, $s1 srli.d $a2, $a1, 63 srai.d $a1, $a1, 7 add.d $a1, $a1, $a2 - ori $a2, $zero, 1000 - mul.d $a1, $a1, $a2 + mul.d $a1, $a1, $s4 sub.d $a0, $a0, $a1 - fld.d $fa0, $s5, %pc_lo12(.LCPI3_0) - ld.d $fp, $s6, 8 - movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + ld.d $fp, $s5, 8 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + fdiv.d $fa0, $fa0, $fs1 ld.w $a0, $fp, 4 fcmp.clt.d $fcc0, $fa0, $fs0 movcf2gr $a1, $fcc0 xor $a1, $s2, $a1 add.d $a0, $a0, $a1 andi $a0, $a0, 1 - st.w $a0, $s6, 4 + st.w $a0, $s5, 4 srai.d $s2, $s2, 1 addi.d $s7, $s7, -1 - addi.d $s8, $s8, 1 - move $s6, $fp + addi.d $s6, $s6, 1 + move $s5, $fp beqz $s7, .LBB3_17 .LBB3_14: # %.lr.ph110 # Parent Loop BB3_3 Depth=1 # => This Inner Loop Header: Depth=2 - ld.bu $a0, $s8, 0 + ld.bu $a0, $s6, 0 ori $a1, $zero, 49 bne $a0, $a1, .LBB3_13 # %bb.15: # in Loop: Header=BB3_14 Depth=2 - ld.w $a0, $s6, 0 - ld.w $a1, $s6, 4 + ld.w $a0, $s5, 0 + ld.w $a1, $s5, 4 add.d $s3, $a0, $s3 add.d $s0, $a1, $s0 b .LBB3_13 .p2align 4, , 16 .LBB3_16: # in Loop: Header=BB3_3 Depth=1 - move $fp, $s6 - ld.d $s4, $sp, 56 # 8-byte Folded Reload + move $fp, $s5 .LBB3_17: # %._crit_edge111 # in Loop: Header=BB3_3 Depth=1 pcaddu18i $ra, %call36(lrand48) jirl $ra, $ra, 0 - ld.d $a1, $sp, 88 # 8-byte Folded Reload + ld.d $a1, $sp, 80 # 8-byte Folded Reload ld.w $a1, $a1, %pc_lo12(reg_len) - add.d $a1, $s4, $a1 + ld.d $s6, $sp, 48 # 8-byte Folded Reload + add.d $a1, $s6, $a1 ld.bu $a1, $a1, -1 move $s2, $a0 ori $a0, $zero, 49 @@ -524,7 +518,7 @@ simulate: # @simulate add.d $s0, $a1, $s0 b .LBB3_2 .LBB3_19: # %.preheader.loopexit - ld.d $a0, $sp, 88 # 8-byte Folded Reload + ld.d $a0, $sp, 80 # 8-byte Folded Reload ld.w $a4, $a0, %pc_lo12(reg_len) .LBB3_20: # %.preheader move $a0, $zero @@ -534,17 +528,18 @@ simulate: # @simulate .p2align 4, , 16 .LBB3_22: # %.lr.ph119 # =>This Inner Loop Header: Depth=1 - ld.w $a2, $s6, 0 - ld.w $a3, $s6, 4 + ld.w $a2, $s5, 0 + ld.w $a3, $s5, 4 xor $a2, $a2, $a3 sltui $a2, $a2, 1 masknez $a3, $a1, $a2 - ld.d $s6, $s6, 8 + ld.d $s5, $s5, 8 maskeqz $a0, $a0, $a2 addi.w $a4, $a4, -1 or $a0, $a0, $a3 bnez $a4, .LBB3_22 .LBB3_23: # %._crit_edge120 + fld.d $fs1, $sp, 104 # 8-byte Folded Reload fld.d $fs0, $sp, 112 # 8-byte Folded Reload ld.d $s8, $sp, 120 # 8-byte Folded Reload ld.d $s7, $sp, 128 # 8-byte Folded Reload diff --git a/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/mandel-text.dir/mandel-text.s b/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/mandel-text.dir/mandel-text.s index 3873dffa..2304d762 100644 --- a/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/mandel-text.dir/mandel-text.s +++ b/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/mandel-text.dir/mandel-text.s @@ -1,12 +1,6 @@ .file "mandel-text.cpp" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3fa999999999999a # double 0.050000000000000003 -.LCPI0_1: - .dword 0xc002666666666666 # double -2.2999999999999998 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -30,10 +24,16 @@ main: # @main fst.d $fs4, $sp, 16 # 8-byte Folded Spill fst.d $fs5, $sp, 8 # 8-byte Folded Spill movgr2fr.d $fs0, $zero - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI0_1) + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, 419430 + ori $a0, $a0, 1638 + lu32i.d $a0, 157286 + lu52i.d $a0, $a0, -1024 + movgr2fr.d $fs2, $a0 ori $fp, $zero, 255 ori $s0, $zero, 2000 pcalau12i $a0, %got_pc_hi20(stdout) diff --git a/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/oopack_v1p8.dir/oopack_v1p8.s b/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/oopack_v1p8.dir/oopack_v1p8.s index 76e2f9bd..528923d4 100644 --- a/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/oopack_v1p8.dir/oopack_v1p8.s +++ b/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/oopack_v1p8.dir/oopack_v1p8.s @@ -108,21 +108,18 @@ _ZNK12MaxBenchmark4initEv: # @_ZNK12MaxBenchmark4initEv .Lfunc_end2: .size _ZNK12MaxBenchmark4initEv, .Lfunc_end2-_ZNK12MaxBenchmark4initEv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZNK12MaxBenchmark5checkEiRdS0_ -.LCPI3_0: - .dword 0x408f400000000000 # double 1000 - .text - .globl _ZNK12MaxBenchmark5checkEiRdS0_ + .globl _ZNK12MaxBenchmark5checkEiRdS0_ # -- Begin function _ZNK12MaxBenchmark5checkEiRdS0_ .p2align 5 .type _ZNK12MaxBenchmark5checkEiRdS0_,@function _ZNK12MaxBenchmark5checkEiRdS0_: # @_ZNK12MaxBenchmark5checkEiRdS0_ # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI3_0) - movgr2fr.w $fa1, $a1 - ffint.d.w $fa1, $fa1 - fmul.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 fst.d $fa0, $a2, 0 pcalau12i $a0, %pc_hi20(MaxResult) fld.d $fa0, $a0, %pc_lo12(MaxResult) @@ -307,12 +304,7 @@ _ZNK15MatrixBenchmark4initEv: # @_ZNK15MatrixBenchmark4initEv .Lfunc_end6: .size _ZNK15MatrixBenchmark4initEv, .Lfunc_end6-_ZNK15MatrixBenchmark4initEv # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZNK15MatrixBenchmark5checkEiRdS0_ -.LCPI7_0: - .dword 0x410e848000000000 # double 2.5E+5 - .text - .globl _ZNK15MatrixBenchmark5checkEiRdS0_ + .globl _ZNK15MatrixBenchmark5checkEiRdS0_ # -- Begin function _ZNK15MatrixBenchmark5checkEiRdS0_ .p2align 5 .type _ZNK15MatrixBenchmark5checkEiRdS0_,@function _ZNK15MatrixBenchmark5checkEiRdS0_: # @_ZNK15MatrixBenchmark5checkEiRdS0_ @@ -332,11 +324,13 @@ _ZNK15MatrixBenchmark5checkEiRdS0_: # @_ZNK15MatrixBenchmark5checkEiRdS0_ fadd.d $fa0, $fa0, $fa1 bnez $a0, .LBB7_1 # %bb.2: - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI7_0) fst.d $fa0, $a3, 0 movgr2fr.w $fa0, $a1 ffint.d.w $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1040 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 fst.d $fa0, $a2, 0 ret @@ -482,14 +476,7 @@ _ZNK17IteratorBenchmark5checkEiRdS0_: # @_ZNK17IteratorBenchmark5checkEiRdS0_ .Lfunc_end11: .size _ZNK17IteratorBenchmark5checkEiRdS0_, .Lfunc_end11-_ZNK17IteratorBenchmark5checkEiRdS0_ # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZNK16ComplexBenchmark7c_styleEv -.LCPI12_0: - .dword 0xbfebb67ae8584caa # double -0.8660254037844386 -.LCPI12_1: - .dword 0x3febb67ae8584caa # double 0.8660254037844386 - .text - .globl _ZNK16ComplexBenchmark7c_styleEv + .globl _ZNK16ComplexBenchmark7c_styleEv # -- Begin function _ZNK16ComplexBenchmark7c_styleEv .p2align 5 .type _ZNK16ComplexBenchmark7c_styleEv,@function _ZNK16ComplexBenchmark7c_styleEv: # @_ZNK16ComplexBenchmark7c_styleEv @@ -502,12 +489,15 @@ _ZNK16ComplexBenchmark7c_styleEv: # @_ZNK16ComplexBenchmark7c_styleEv ori $a2, $a4, 3712 pcalau12i $a3, %pc_hi20(X) addi.d $a3, $a3, %pc_lo12(X) - pcalau12i $a5, %pc_hi20(.LCPI12_0) - fld.d $fa0, $a5, %pc_lo12(.LCPI12_0) - pcalau12i $a5, %pc_hi20(.LCPI12_1) - fld.d $fa1, $a5, %pc_lo12(.LCPI12_1) - vldi $vr2, -928 + vldi $vr0, -928 ori $a4, $a4, 3720 + lu12i.w $a5, -96892 + ori $a5, $a5, 3242 + lu32i.d $a5, -280966 + lu52i.d $a6, $a5, -1026 + movgr2fr.d $fa1, $a6 + lu52i.d $a5, $a5, 1022 + movgr2fr.d $fa2, $a5 .p2align 4, , 16 .LBB12_1: # =>This Inner Loop Header: Depth=1 add.d $a5, $a1, $a0 @@ -516,11 +506,11 @@ _ZNK16ComplexBenchmark7c_styleEv: # @_ZNK16ComplexBenchmark7c_styleEv fldx.d $fa4, $a6, $a2 fldx.d $fa5, $a6, $a4 fldx.d $fa6, $a5, $a4 - fmadd.d $fa3, $fa4, $fa2, $fa3 - fmadd.d $fa3, $fa5, $fa0, $fa3 + fmadd.d $fa3, $fa4, $fa0, $fa3 + fmadd.d $fa3, $fa5, $fa1, $fa3 fstx.d $fa3, $a5, $a2 - fmadd.d $fa3, $fa5, $fa2, $fa6 - fmadd.d $fa3, $fa4, $fa1, $fa3 + fmadd.d $fa3, $fa5, $fa0, $fa6 + fmadd.d $fa3, $fa4, $fa2, $fa3 addi.d $a0, $a0, 16 fstx.d $fa3, $a5, $a4 bnez $a0, .LBB12_1 @@ -658,14 +648,7 @@ _ZNK16ComplexBenchmark5checkEiRdS0_: # @_ZNK16ComplexBenchmark5checkEiRdS0_ .Lfunc_end15: .size _ZNK16ComplexBenchmark5checkEiRdS0_, .Lfunc_end15-_ZNK16ComplexBenchmark5checkEiRdS0_ # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZNK9Benchmark8time_oneEMS_KFvvEiRdS2_S2_ -.LCPI16_0: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI16_1: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl _ZNK9Benchmark8time_oneEMS_KFvvEiRdS2_S2_ + .globl _ZNK9Benchmark8time_oneEMS_KFvvEiRdS2_S2_ # -- Begin function _ZNK9Benchmark8time_oneEMS_KFvvEiRdS2_S2_ .p2align 5 .type _ZNK9Benchmark8time_oneEMS_KFvvEiRdS2_S2_,@function _ZNK9Benchmark8time_oneEMS_KFvvEiRdS2_S2_: # @_ZNK9Benchmark8time_oneEMS_KFvvEiRdS2_S2_ @@ -763,15 +746,20 @@ _ZNK9Benchmark8time_oneEMS_KFvvEiRdS2_S2_: # @_ZNK9Benchmark8time_oneEMS_KFvvEiR sub.d $a0, $s4, $s6 movgr2fr.d $fa0, $a0 ffint.d.l $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI16_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI16_0) - fld.d $fa2, $sp, 8 - pcalau12i $a0, %pc_hi20(.LCPI16_1) - fld.d $fa3, $a0, %pc_lo12(.LCPI16_1) - fdiv.d $fa0, $fa0, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + fld.d $fa1, $sp, 8 + movgr2fr.d $fa2, $a0 + fdiv.d $fa0, $fa0, $fa2 fst.d $fa0, $s0, 0 - fdiv.d $fa0, $fa2, $fa0 - fmul.d $fa0, $fa0, $fa3 + fdiv.d $fa0, $fa1, $fa0 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 fst.d $fa0, $fp, 0 ld.d $s7, $sp, 16 # 8-byte Folded Reload ld.d $s6, $sp, 24 # 8-byte Folded Reload @@ -849,12 +837,7 @@ _ZN9Benchmark4findEPKc: # @_ZN9Benchmark4findEPKc .size _ZN9Benchmark4findEPKc, .Lfunc_end17-_ZN9Benchmark4findEPKc .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZNK9Benchmark9time_bothEi -.LCPI18_0: - .dword 0x3d10000000000000 # double 1.4210854715202004E-14 - .text - .globl _ZNK9Benchmark9time_bothEi + .globl _ZNK9Benchmark9time_bothEi # -- Begin function _ZNK9Benchmark9time_bothEi .p2align 5 .type _ZNK9Benchmark9time_bothEi,@function _ZNK9Benchmark9time_bothEi: # @_ZNK9Benchmark9time_bothEi @@ -950,12 +933,12 @@ _ZNK9Benchmark9time_bothEi: # @_ZNK9Benchmark9time_bothEi fld.d $fa1, $sp, 16 fsub.d $fa2, $fa0, $fa1 fcmp.clt.d $fcc0, $fa0, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI18_0) - fld.d $fa3, $a0, %pc_lo12(.LCPI18_0) fsel $fa0, $fa1, $fa0, $fcc0 fdiv.d $fs0, $fa2, $fa0 fabs.d $fa0, $fs0 - fcmp.cule.d $fcc0, $fa0, $fa3 + lu52i.d $a0, $zero, 977 + movgr2fr.d $fa1, $a0 + fcmp.cule.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB18_8 # %bb.7: ld.d $a0, $s0, 0 diff --git a/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/stepanov_container.dir/stepanov_container.s b/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/stepanov_container.dir/stepanov_container.s index 96f9c1ce..f350c860 100644 --- a/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/stepanov_container.dir/stepanov_container.s +++ b/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/stepanov_container.dir/stepanov_container.s @@ -1896,12 +1896,7 @@ _Z10initializePdS_: # @_Z10initializePdS_ .Lfunc_end11: .size _Z10initializePdS_, .Lfunc_end11-_Z10initializePdS_ # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z6logtwod -.LCPI12_0: - .dword 0x3fe62e42fefa39ef # double 0.69314718055994529 - .text - .globl _Z6logtwod + .globl _Z6logtwod # -- Begin function _Z6logtwod .p2align 5 .type _Z6logtwod,@function _Z6logtwod: # @_Z6logtwod @@ -1910,8 +1905,11 @@ _Z6logtwod: # @_Z6logtwod st.d $ra, $sp, 8 # 8-byte Folded Spill pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI12_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI12_0) + lu12i.w $a0, -4189 + ori $a0, $a0, 2543 + lu32i.d $a0, 405058 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 @@ -1919,14 +1917,7 @@ _Z6logtwod: # @_Z6logtwod .Lfunc_end12: .size _Z6logtwod, .Lfunc_end12-_Z6logtwod # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z15number_of_testsi -.LCPI13_0: - .dword 0x3fe62e42fefa39ef # double 0.69314718055994529 -.LCPI13_1: - .dword 0x4173021b091bf3aa # double 19931568.569324173 - .text - .globl _Z15number_of_testsi + .globl _Z15number_of_testsi # -- Begin function _Z15number_of_testsi .p2align 5 .type _Z15number_of_testsi,@function _Z15number_of_testsi: # @_Z15number_of_testsi @@ -1939,13 +1930,19 @@ _Z15number_of_testsi: # @_Z15number_of_testsi fmov.d $fa0, $fs0 pcaddu18i $ra, %call36(log) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI13_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI13_0) - pcalau12i $a0, %pc_hi20(.LCPI13_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI13_1) + lu12i.w $a0, -4189 + ori $a0, $a0, 2543 + lu32i.d $a0, 405058 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 fmul.d $fa0, $fa0, $fs0 - fdiv.d $fa0, $fa2, $fa0 + lu12i.w $a0, 37311 + ori $a0, $a0, 938 + lu32i.d $a0, 197147 + lu52i.d $a0, $a0, 1047 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa1, $fa0 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 ftintrz.w.d $fa0, $fa0 @@ -1957,14 +1954,7 @@ _Z15number_of_testsi: # @_Z15number_of_testsi .Lfunc_end13: .size _Z15number_of_testsi, .Lfunc_end13-_Z15number_of_testsi # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z9run_testsi -.LCPI14_0: - .dword 0x3fe62e42fefa39ef # double 0.69314718055994529 -.LCPI14_1: - .dword 0x4173021b091bf3aa # double 19931568.569324173 - .text - .globl _Z9run_testsi + .globl _Z9run_testsi # -- Begin function _Z9run_testsi .p2align 5 .type _Z9run_testsi,@function _Z9run_testsi: # @_Z9run_testsi @@ -2007,13 +1997,19 @@ _Z9run_testsi: # @_Z9run_testsi .LBB14_2: # %_ZNSt6vectorIdSaIdEE5clearEv.exit bltz $s2, .LBB14_39 # %bb.3: # %_ZNSt6vectorIdSaIdEE17_S_check_init_lenEmRKS0_.exit.i - pcalau12i $a0, %pc_hi20(.LCPI14_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI14_0) - pcalau12i $a0, %pc_hi20(.LCPI14_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI14_1) + lu12i.w $a0, -4189 + ori $a0, $a0, 2543 + lu32i.d $a0, 405058 + lu12i.w $a1, 37311 + lu52i.d $a0, $a0, 1022 + ori $a1, $a1, 938 + movgr2fr.d $fa1, $a0 + lu32i.d $a1, 197147 fdiv.d $fa0, $fa0, $fa1 + lu52i.d $a0, $a1, 1047 fmul.d $fa0, $fa0, $fs0 - fdiv.d $fa0, $fa2, $fa0 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa1, $fa0 vreplvei.d $vr0, $vr0, 0 vfrintrm.d $vr0, $vr0 ftintrz.w.d $fa0, $fa0 diff --git a/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/stepanov_v1p2.dir/stepanov_v1p2.s b/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/stepanov_v1p2.dir/stepanov_v1p2.s index 74acda0e..a559198c 100644 --- a/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/stepanov_v1p2.dir/stepanov_v1p2.s +++ b/results/SingleSource/Benchmarks/Misc-C++/CMakeFiles/stepanov_v1p2.dir/stepanov_v1p2.s @@ -1,14 +1,6 @@ .file "stepanov_v1p2.cpp" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z9summarizev -.LCPI0_0: - .dword 0x409f400000000000 # double 2000 -.LCPI0_1: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI0_2: - .dword 0x3e7ad7f29abcaf48 # double 9.9999999999999995E-8 .text - .globl _Z9summarizev + .globl _Z9summarizev # -- Begin function _Z9summarizev .p2align 5 .type _Z9summarizev,@function _Z9summarizev: # @_Z9summarizev @@ -41,22 +33,29 @@ _Z9summarizev: # @_Z9summarizev pcalau12i $s1, %pc_hi20(current_test) ld.w $a0, $s1, %pc_lo12(current_test) movgr2fr.d $fs1, $zero - pcalau12i $s2, %pc_hi20(.LCPI0_2) + lu12i.w $s2, -414774 blez $a0, .LBB0_7 # %bb.1: # %.lr.ph.preheader pcalau12i $a0, %pc_hi20(iterations) ld.w $a0, $a0, %pc_lo12(iterations) - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_0) - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_1) - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fmul.d $fa0, $fa2, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 fdiv.d $fs3, $fa0, $fa1 pcalau12i $a0, %pc_hi20(result_times) addi.d $s3, $a0, %pc_lo12(result_times) - fld.d $fs4, $s2, %pc_lo12(.LCPI0_2) + ori $a0, $s2, 3912 + lu32i.d $a0, -337934 + lu52i.d $a0, $a0, 999 + movgr2fr.d $fs4, $a0 pcalau12i $a0, %pc_hi20(.L.str.2) addi.d $fp, $a0, %pc_lo12(.L.str.2) move $s0, $zero @@ -131,10 +130,13 @@ _Z9summarizev: # @_Z9summarizev fdiv.d $fa0, $fs1, $fa0 pcaddu18i $ra, %call36(exp) jirl $ra, $ra, 0 - fld.d $fs1, $s2, %pc_lo12(.LCPI0_2) - ld.w $a0, $s1, %pc_lo12(current_test) + ori $a0, $s2, 3912 + lu32i.d $a0, -337934 + ld.w $a1, $s1, %pc_lo12(current_test) + lu52i.d $a0, $a0, 999 + movgr2fr.d $fs1, $a0 fmul.d $fs3, $fa0, $fs1 - movgr2fr.w $fa0, $a0 + movgr2fr.w $fa0, $a1 ffint.d.w $fa0, $fa0 fdiv.d $fa0, $fs5, $fa0 pcaddu18i $ra, %call36(exp) @@ -191,16 +193,7 @@ _Z9summarizev: # @_Z9summarizev .Lfunc_end0: .size _Z9summarizev, .Lfunc_end0-_Z9summarizev # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z5test0PdS_ -.LCPI1_0: - .dword 0x40b7700000000000 # double 6000 -.LCPI1_1: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI1_2: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 - .text - .globl _Z5test0PdS_ + .globl _Z5test0PdS_ # -- Begin function _Z5test0PdS_ .p2align 5 .type _Z5test0PdS_,@function _Z5test0PdS_: # @_Z5test0PdS_ @@ -231,9 +224,11 @@ _Z5test0PdS_: # @_Z5test0PdS_ srai.d $s4, $a0, 3 blez $s4, .LBB1_8 # %bb.2: # %.preheader.us.preheader - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI1_0) - movgr2fr.d $fs1, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, 1035 + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $s0, $a0, %pc_lo12(.L.str.27) move $s5, $zero @@ -248,7 +243,7 @@ _Z5test0PdS_: # @_Z5test0PdS_ # Child Loop BB1_5 Depth 2 move $a0, $s4 move $a2, $fp - fmov.d $fa0, $fs1 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB1_5: # Parent Loop BB1_4 Depth=1 # => This Inner Loop Header: Depth=2 @@ -259,7 +254,7 @@ _Z5test0PdS_: # @_Z5test0PdS_ bnez $a0, .LBB1_5 # %bb.6: # %._crit_edge.us # in Loop: Header=BB1_4 Depth=1 - fcmp.cune.d $fcc0, $fa0, $fs0 + fcmp.cune.d $fcc0, $fa0, $fs1 bceqz $fcc0, .LBB1_3 # %bb.7: # in Loop: Header=BB1_4 Depth=1 ld.w $a1, $s1, %pc_lo12(current_test) @@ -286,21 +281,23 @@ _Z5test0PdS_: # @_Z5test0PdS_ pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 ld.d $a1, $s2, %pc_lo12(start_time) - pcalau12i $a2, %pc_hi20(.LCPI1_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI1_1) - sub.d $a1, $a0, $a1 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI1_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_2) - ld.w $a1, $s1, %pc_lo12(current_test) pcalau12i $a2, %pc_hi20(end_time) st.d $a0, $a2, %pc_lo12(end_time) + sub.d $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.w $a0, $s1, %pc_lo12(current_test) + lu52i.d $a1, $zero, 1000 + movgr2fr.d $fa1, $a1 fadd.d $fa0, $fa0, $fa1 - addi.d $a0, $a1, 1 - st.w $a0, $s1, %pc_lo12(current_test) - slli.d $a0, $a1, 3 + addi.d $a1, $a0, 1 + st.w $a1, $s1, %pc_lo12(current_test) + slli.d $a0, $a0, 3 pcalau12i $a1, %pc_hi20(result_times) addi.d $a1, $a1, %pc_lo12(result_times) fstx.d $fa0, $a1, $a0 @@ -540,16 +537,8 @@ main: # @main .size main, .Lfunc_end2-main .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z4testIPddEvT_S1_T0_ -.LCPI3_0: - .dword 0x40b7700000000000 # double 6000 -.LCPI3_1: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI3_2: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 .section .text._Z4testIPddEvT_S1_T0_,"axG",@progbits,_Z4testIPddEvT_S1_T0_,comdat - .weak _Z4testIPddEvT_S1_T0_ + .weak _Z4testIPddEvT_S1_T0_ # -- Begin function _Z4testIPddEvT_S1_T0_ .p2align 5 .type _Z4testIPddEvT_S1_T0_,@function _Z4testIPddEvT_S1_T0_: # @_Z4testIPddEvT_S1_T0_ @@ -589,10 +578,12 @@ _Z4testIPddEvT_S1_T0_: # @_Z4testIPddEvT_S1_T0_ pcalau12i $s2, %pc_hi20(current_test) blez $a1, .LBB3_11 # %bb.1: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI3_0) + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, 1035 beq $s0, $fp, .LBB3_8 # %bb.2: # %.lr.ph.i.preheader.preheader - fld.d $fs1, $a0, %pc_lo12(.LCPI3_0) + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $s1, $a0, %pc_lo12(.L.str.27) move $s5, $zero @@ -628,7 +619,7 @@ _Z4testIPddEvT_S1_T0_: # @_Z4testIPddEvT_S1_T0_ ld.w $a1, $s4, %pc_lo12(iterations) b .LBB3_3 .LBB3_8: # %.lr.ph.split.us - fld.d $fa0, $a0, %pc_lo12(.LCPI3_0) + movgr2fr.d $fa0, $a0 fcmp.ceq.d $fcc0, $fs0, $fa0 bcnez $fcc0, .LBB3_11 # %bb.9: # %_Z10accumulateIPddET0_T_S2_S1_.exit.us.us.preheader @@ -649,21 +640,23 @@ _Z4testIPddEvT_S1_T0_: # @_Z4testIPddEvT_S1_T0_ pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 ld.d $a1, $s3, %pc_lo12(start_time) - pcalau12i $a2, %pc_hi20(.LCPI3_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI3_1) - sub.d $a1, $a0, $a1 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI3_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI3_2) - ld.w $a1, $s2, %pc_lo12(current_test) pcalau12i $a2, %pc_hi20(end_time) st.d $a0, $a2, %pc_lo12(end_time) + sub.d $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.w $a0, $s2, %pc_lo12(current_test) + lu52i.d $a1, $zero, 1000 + movgr2fr.d $fa1, $a1 fadd.d $fa0, $fa0, $fa1 - addi.d $a0, $a1, 1 - st.w $a0, $s2, %pc_lo12(current_test) - slli.d $a0, $a1, 3 + addi.d $a1, $a0, 1 + st.w $a1, $s2, %pc_lo12(current_test) + slli.d $a0, $a0, 3 pcalau12i $a1, %pc_hi20(result_times) addi.d $a1, $a1, %pc_lo12(result_times) fstx.d $fa0, $a1, $a0 @@ -683,16 +676,8 @@ _Z4testIPddEvT_S1_T0_: # @_Z4testIPddEvT_S1_T0_ .size _Z4testIPddEvT_S1_T0_, .Lfunc_end3-_Z4testIPddEvT_S1_T0_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z4testIP6DoubleS0_EvT_S2_T0_ -.LCPI4_0: - .dword 0x40b7700000000000 # double 6000 -.LCPI4_1: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI4_2: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 .section .text._Z4testIP6DoubleS0_EvT_S2_T0_,"axG",@progbits,_Z4testIP6DoubleS0_EvT_S2_T0_,comdat - .weak _Z4testIP6DoubleS0_EvT_S2_T0_ + .weak _Z4testIP6DoubleS0_EvT_S2_T0_ # -- Begin function _Z4testIP6DoubleS0_EvT_S2_T0_ .p2align 5 .type _Z4testIP6DoubleS0_EvT_S2_T0_,@function _Z4testIP6DoubleS0_EvT_S2_T0_: # @_Z4testIP6DoubleS0_EvT_S2_T0_ @@ -732,10 +717,12 @@ _Z4testIP6DoubleS0_EvT_S2_T0_: # @_Z4testIP6DoubleS0_EvT_S2_T0_ pcalau12i $s2, %pc_hi20(current_test) blez $a1, .LBB4_11 # %bb.1: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI4_0) + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, 1035 beq $s0, $fp, .LBB4_8 # %bb.2: # %.lr.ph.i.preheader.preheader - fld.d $fs1, $a0, %pc_lo12(.LCPI4_0) + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $s1, $a0, %pc_lo12(.L.str.27) move $s5, $zero @@ -771,7 +758,7 @@ _Z4testIP6DoubleS0_EvT_S2_T0_: # @_Z4testIP6DoubleS0_EvT_S2_T0_ ld.w $a1, $s4, %pc_lo12(iterations) b .LBB4_3 .LBB4_8: # %.lr.ph.split.us - fld.d $fa0, $a0, %pc_lo12(.LCPI4_0) + movgr2fr.d $fa0, $a0 fcmp.ceq.d $fcc0, $fs0, $fa0 bcnez $fcc0, .LBB4_11 # %bb.9: # %_Z10accumulateIP6DoubleS0_ET0_T_S3_S2_.exit.us.us.preheader @@ -792,21 +779,23 @@ _Z4testIP6DoubleS0_EvT_S2_T0_: # @_Z4testIP6DoubleS0_EvT_S2_T0_ pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 ld.d $a1, $s3, %pc_lo12(start_time) - pcalau12i $a2, %pc_hi20(.LCPI4_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI4_1) - sub.d $a1, $a0, $a1 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI4_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI4_2) - ld.w $a1, $s2, %pc_lo12(current_test) pcalau12i $a2, %pc_hi20(end_time) st.d $a0, $a2, %pc_lo12(end_time) + sub.d $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.w $a0, $s2, %pc_lo12(current_test) + lu52i.d $a1, $zero, 1000 + movgr2fr.d $fa1, $a1 fadd.d $fa0, $fa0, $fa1 - addi.d $a0, $a1, 1 - st.w $a0, $s2, %pc_lo12(current_test) - slli.d $a0, $a1, 3 + addi.d $a1, $a0, 1 + st.w $a1, $s2, %pc_lo12(current_test) + slli.d $a0, $a0, 3 pcalau12i $a1, %pc_hi20(result_times) addi.d $a1, $a1, %pc_lo12(result_times) fstx.d $fa0, $a1, $a0 @@ -826,16 +815,8 @@ _Z4testIP6DoubleS0_EvT_S2_T0_: # @_Z4testIP6DoubleS0_EvT_S2_T0_ .size _Z4testIP6DoubleS0_EvT_S2_T0_, .Lfunc_end4-_Z4testIP6DoubleS0_EvT_S2_T0_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z4testI14double_pointerdEvT_S1_T0_ -.LCPI5_0: - .dword 0x40b7700000000000 # double 6000 -.LCPI5_1: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI5_2: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 .section .text._Z4testI14double_pointerdEvT_S1_T0_,"axG",@progbits,_Z4testI14double_pointerdEvT_S1_T0_,comdat - .weak _Z4testI14double_pointerdEvT_S1_T0_ + .weak _Z4testI14double_pointerdEvT_S1_T0_ # -- Begin function _Z4testI14double_pointerdEvT_S1_T0_ .p2align 5 .type _Z4testI14double_pointerdEvT_S1_T0_,@function _Z4testI14double_pointerdEvT_S1_T0_: # @_Z4testI14double_pointerdEvT_S1_T0_ @@ -875,10 +856,12 @@ _Z4testI14double_pointerdEvT_S1_T0_: # @_Z4testI14double_pointerdEvT_S1_T0_ pcalau12i $s2, %pc_hi20(current_test) blez $a1, .LBB5_5 # %bb.1: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI5_0) + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, 1035 bne $s0, $fp, .LBB5_6 # %bb.2: # %.lr.ph.split.us - fld.d $fa0, $a0, %pc_lo12(.LCPI5_0) + movgr2fr.d $fa0, $a0 fcmp.ceq.d $fcc0, $fs0, $fa0 bcnez $fcc0, .LBB5_5 # %bb.3: # %_Z10accumulateI14double_pointerdET0_T_S2_S1_.exit.us.us.preheader @@ -899,21 +882,23 @@ _Z4testI14double_pointerdEvT_S1_T0_: # @_Z4testI14double_pointerdEvT_S1_T0_ pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 ld.d $a1, $s3, %pc_lo12(start_time) - pcalau12i $a2, %pc_hi20(.LCPI5_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI5_1) - sub.d $a1, $a0, $a1 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI5_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI5_2) - ld.w $a1, $s2, %pc_lo12(current_test) pcalau12i $a2, %pc_hi20(end_time) st.d $a0, $a2, %pc_lo12(end_time) + sub.d $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.w $a0, $s2, %pc_lo12(current_test) + lu52i.d $a1, $zero, 1000 + movgr2fr.d $fa1, $a1 fadd.d $fa0, $fa0, $fa1 - addi.d $a0, $a1, 1 - st.w $a0, $s2, %pc_lo12(current_test) - slli.d $a0, $a1, 3 + addi.d $a1, $a0, 1 + st.w $a1, $s2, %pc_lo12(current_test) + slli.d $a0, $a0, 3 pcalau12i $a1, %pc_hi20(result_times) addi.d $a1, $a1, %pc_lo12(result_times) fstx.d $fa0, $a1, $a0 @@ -930,7 +915,7 @@ _Z4testI14double_pointerdEvT_S1_T0_: # @_Z4testI14double_pointerdEvT_S1_T0_ addi.d $sp, $sp, 96 ret .LBB5_6: # %.lr.ph.preheader.i.preheader - fld.d $fs1, $a0, %pc_lo12(.LCPI5_0) + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $s1, $a0, %pc_lo12(.L.str.27) move $s5, $zero @@ -969,16 +954,8 @@ _Z4testI14double_pointerdEvT_S1_T0_: # @_Z4testI14double_pointerdEvT_S1_T0_ .size _Z4testI14double_pointerdEvT_S1_T0_, .Lfunc_end5-_Z4testI14double_pointerdEvT_S1_T0_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z4testI14Double_pointer6DoubleEvT_S2_T0_ -.LCPI6_0: - .dword 0x40b7700000000000 # double 6000 -.LCPI6_1: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI6_2: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 .section .text._Z4testI14Double_pointer6DoubleEvT_S2_T0_,"axG",@progbits,_Z4testI14Double_pointer6DoubleEvT_S2_T0_,comdat - .weak _Z4testI14Double_pointer6DoubleEvT_S2_T0_ + .weak _Z4testI14Double_pointer6DoubleEvT_S2_T0_ # -- Begin function _Z4testI14Double_pointer6DoubleEvT_S2_T0_ .p2align 5 .type _Z4testI14Double_pointer6DoubleEvT_S2_T0_,@function _Z4testI14Double_pointer6DoubleEvT_S2_T0_: # @_Z4testI14Double_pointer6DoubleEvT_S2_T0_ @@ -1018,10 +995,12 @@ _Z4testI14Double_pointer6DoubleEvT_S2_T0_: # @_Z4testI14Double_pointer6DoubleEvT pcalau12i $s2, %pc_hi20(current_test) blez $a1, .LBB6_5 # %bb.1: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI6_0) + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, 1035 bne $s0, $fp, .LBB6_6 # %bb.2: # %.lr.ph.split.us - fld.d $fa0, $a0, %pc_lo12(.LCPI6_0) + movgr2fr.d $fa0, $a0 fcmp.ceq.d $fcc0, $fs0, $fa0 bcnez $fcc0, .LBB6_5 # %bb.3: # %_Z10accumulateI14Double_pointer6DoubleET0_T_S3_S2_.exit.us.us.preheader @@ -1042,21 +1021,23 @@ _Z4testI14Double_pointer6DoubleEvT_S2_T0_: # @_Z4testI14Double_pointer6DoubleEvT pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 ld.d $a1, $s3, %pc_lo12(start_time) - pcalau12i $a2, %pc_hi20(.LCPI6_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI6_1) - sub.d $a1, $a0, $a1 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI6_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI6_2) - ld.w $a1, $s2, %pc_lo12(current_test) pcalau12i $a2, %pc_hi20(end_time) st.d $a0, $a2, %pc_lo12(end_time) + sub.d $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.w $a0, $s2, %pc_lo12(current_test) + lu52i.d $a1, $zero, 1000 + movgr2fr.d $fa1, $a1 fadd.d $fa0, $fa0, $fa1 - addi.d $a0, $a1, 1 - st.w $a0, $s2, %pc_lo12(current_test) - slli.d $a0, $a1, 3 + addi.d $a1, $a0, 1 + st.w $a1, $s2, %pc_lo12(current_test) + slli.d $a0, $a0, 3 pcalau12i $a1, %pc_hi20(result_times) addi.d $a1, $a1, %pc_lo12(result_times) fstx.d $fa0, $a1, $a0 @@ -1073,7 +1054,7 @@ _Z4testI14Double_pointer6DoubleEvT_S2_T0_: # @_Z4testI14Double_pointer6DoubleEvT addi.d $sp, $sp, 96 ret .LBB6_6: # %.lr.ph.preheader.i.preheader - fld.d $fs1, $a0, %pc_lo12(.LCPI6_0) + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $s1, $a0, %pc_lo12(.L.str.27) move $s5, $zero @@ -1112,16 +1093,8 @@ _Z4testI14Double_pointer6DoubleEvT_S2_T0_: # @_Z4testI14Double_pointer6DoubleEvT .size _Z4testI14Double_pointer6DoubleEvT_S2_T0_, .Lfunc_end6-_Z4testI14Double_pointer6DoubleEvT_S2_T0_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z4testI16reverse_iteratorIPddEdEvT_S3_T0_ -.LCPI7_0: - .dword 0x40b7700000000000 # double 6000 -.LCPI7_1: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI7_2: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 .section .text._Z4testI16reverse_iteratorIPddEdEvT_S3_T0_,"axG",@progbits,_Z4testI16reverse_iteratorIPddEdEvT_S3_T0_,comdat - .weak _Z4testI16reverse_iteratorIPddEdEvT_S3_T0_ + .weak _Z4testI16reverse_iteratorIPddEdEvT_S3_T0_ # -- Begin function _Z4testI16reverse_iteratorIPddEdEvT_S3_T0_ .p2align 5 .type _Z4testI16reverse_iteratorIPddEdEvT_S3_T0_,@function _Z4testI16reverse_iteratorIPddEdEvT_S3_T0_: # @_Z4testI16reverse_iteratorIPddEdEvT_S3_T0_ @@ -1161,10 +1134,12 @@ _Z4testI16reverse_iteratorIPddEdEvT_S3_T0_: # @_Z4testI16reverse_iteratorIPddEdE pcalau12i $s2, %pc_hi20(current_test) blez $a1, .LBB7_5 # %bb.1: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI7_0) + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, 1035 bne $s0, $fp, .LBB7_6 # %bb.2: # %.lr.ph.split.us - fld.d $fa0, $a0, %pc_lo12(.LCPI7_0) + movgr2fr.d $fa0, $a0 fcmp.ceq.d $fcc0, $fs0, $fa0 bcnez $fcc0, .LBB7_5 # %bb.3: # %_Z10accumulateI16reverse_iteratorIPddEdET0_T_S4_S3_.exit.us.us.preheader @@ -1185,21 +1160,23 @@ _Z4testI16reverse_iteratorIPddEdEvT_S3_T0_: # @_Z4testI16reverse_iteratorIPddEdE pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 ld.d $a1, $s3, %pc_lo12(start_time) - pcalau12i $a2, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI7_1) - sub.d $a1, $a0, $a1 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI7_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI7_2) - ld.w $a1, $s2, %pc_lo12(current_test) pcalau12i $a2, %pc_hi20(end_time) st.d $a0, $a2, %pc_lo12(end_time) + sub.d $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.w $a0, $s2, %pc_lo12(current_test) + lu52i.d $a1, $zero, 1000 + movgr2fr.d $fa1, $a1 fadd.d $fa0, $fa0, $fa1 - addi.d $a0, $a1, 1 - st.w $a0, $s2, %pc_lo12(current_test) - slli.d $a0, $a1, 3 + addi.d $a1, $a0, 1 + st.w $a1, $s2, %pc_lo12(current_test) + slli.d $a0, $a0, 3 pcalau12i $a1, %pc_hi20(result_times) addi.d $a1, $a1, %pc_lo12(result_times) fstx.d $fa0, $a1, $a0 @@ -1216,7 +1193,7 @@ _Z4testI16reverse_iteratorIPddEdEvT_S3_T0_: # @_Z4testI16reverse_iteratorIPddEdE addi.d $sp, $sp, 96 ret .LBB7_6: # %.lr.ph.preheader.i.preheader - fld.d $fs1, $a0, %pc_lo12(.LCPI7_0) + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $s1, $a0, %pc_lo12(.L.str.27) move $s5, $zero @@ -1255,16 +1232,8 @@ _Z4testI16reverse_iteratorIPddEdEvT_S3_T0_: # @_Z4testI16reverse_iteratorIPddEdE .size _Z4testI16reverse_iteratorIPddEdEvT_S3_T0_, .Lfunc_end7-_Z4testI16reverse_iteratorIPddEdEvT_S3_T0_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z4testI16reverse_iteratorIP6DoubleS1_ES1_EvT_S4_T0_ -.LCPI8_0: - .dword 0x40b7700000000000 # double 6000 -.LCPI8_1: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI8_2: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 .section .text._Z4testI16reverse_iteratorIP6DoubleS1_ES1_EvT_S4_T0_,"axG",@progbits,_Z4testI16reverse_iteratorIP6DoubleS1_ES1_EvT_S4_T0_,comdat - .weak _Z4testI16reverse_iteratorIP6DoubleS1_ES1_EvT_S4_T0_ + .weak _Z4testI16reverse_iteratorIP6DoubleS1_ES1_EvT_S4_T0_ # -- Begin function _Z4testI16reverse_iteratorIP6DoubleS1_ES1_EvT_S4_T0_ .p2align 5 .type _Z4testI16reverse_iteratorIP6DoubleS1_ES1_EvT_S4_T0_,@function _Z4testI16reverse_iteratorIP6DoubleS1_ES1_EvT_S4_T0_: # @_Z4testI16reverse_iteratorIP6DoubleS1_ES1_EvT_S4_T0_ @@ -1304,10 +1273,12 @@ _Z4testI16reverse_iteratorIP6DoubleS1_ES1_EvT_S4_T0_: # @_Z4testI16reverse_itera pcalau12i $s2, %pc_hi20(current_test) blez $a1, .LBB8_5 # %bb.1: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI8_0) + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, 1035 bne $s0, $fp, .LBB8_6 # %bb.2: # %.lr.ph.split.us - fld.d $fa0, $a0, %pc_lo12(.LCPI8_0) + movgr2fr.d $fa0, $a0 fcmp.ceq.d $fcc0, $fs0, $fa0 bcnez $fcc0, .LBB8_5 # %bb.3: # %_Z10accumulateI16reverse_iteratorIP6DoubleS1_ES1_ET0_T_S5_S4_.exit.us.us.preheader @@ -1328,21 +1299,23 @@ _Z4testI16reverse_iteratorIP6DoubleS1_ES1_EvT_S4_T0_: # @_Z4testI16reverse_itera pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 ld.d $a1, $s3, %pc_lo12(start_time) - pcalau12i $a2, %pc_hi20(.LCPI8_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI8_1) - sub.d $a1, $a0, $a1 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI8_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI8_2) - ld.w $a1, $s2, %pc_lo12(current_test) pcalau12i $a2, %pc_hi20(end_time) st.d $a0, $a2, %pc_lo12(end_time) + sub.d $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.w $a0, $s2, %pc_lo12(current_test) + lu52i.d $a1, $zero, 1000 + movgr2fr.d $fa1, $a1 fadd.d $fa0, $fa0, $fa1 - addi.d $a0, $a1, 1 - st.w $a0, $s2, %pc_lo12(current_test) - slli.d $a0, $a1, 3 + addi.d $a1, $a0, 1 + st.w $a1, $s2, %pc_lo12(current_test) + slli.d $a0, $a0, 3 pcalau12i $a1, %pc_hi20(result_times) addi.d $a1, $a1, %pc_lo12(result_times) fstx.d $fa0, $a1, $a0 @@ -1359,7 +1332,7 @@ _Z4testI16reverse_iteratorIP6DoubleS1_ES1_EvT_S4_T0_: # @_Z4testI16reverse_itera addi.d $sp, $sp, 96 ret .LBB8_6: # %.lr.ph.preheader.i.preheader - fld.d $fs1, $a0, %pc_lo12(.LCPI8_0) + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $s1, $a0, %pc_lo12(.L.str.27) move $s5, $zero @@ -1398,16 +1371,8 @@ _Z4testI16reverse_iteratorIP6DoubleS1_ES1_EvT_S4_T0_: # @_Z4testI16reverse_itera .size _Z4testI16reverse_iteratorIP6DoubleS1_ES1_EvT_S4_T0_, .Lfunc_end8-_Z4testI16reverse_iteratorIP6DoubleS1_ES1_EvT_S4_T0_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z4testI16reverse_iteratorI14double_pointerdEdEvT_S3_T0_ -.LCPI9_0: - .dword 0x40b7700000000000 # double 6000 -.LCPI9_1: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI9_2: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 .section .text._Z4testI16reverse_iteratorI14double_pointerdEdEvT_S3_T0_,"axG",@progbits,_Z4testI16reverse_iteratorI14double_pointerdEdEvT_S3_T0_,comdat - .weak _Z4testI16reverse_iteratorI14double_pointerdEdEvT_S3_T0_ + .weak _Z4testI16reverse_iteratorI14double_pointerdEdEvT_S3_T0_ # -- Begin function _Z4testI16reverse_iteratorI14double_pointerdEdEvT_S3_T0_ .p2align 5 .type _Z4testI16reverse_iteratorI14double_pointerdEdEvT_S3_T0_,@function _Z4testI16reverse_iteratorI14double_pointerdEdEvT_S3_T0_: # @_Z4testI16reverse_iteratorI14double_pointerdEdEvT_S3_T0_ @@ -1447,10 +1412,12 @@ _Z4testI16reverse_iteratorI14double_pointerdEdEvT_S3_T0_: # @_Z4testI16reverse_i pcalau12i $s2, %pc_hi20(current_test) blez $a1, .LBB9_5 # %bb.1: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI9_0) + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, 1035 bne $s0, $fp, .LBB9_6 # %bb.2: # %.lr.ph.split.us - fld.d $fa0, $a0, %pc_lo12(.LCPI9_0) + movgr2fr.d $fa0, $a0 fcmp.ceq.d $fcc0, $fs0, $fa0 bcnez $fcc0, .LBB9_5 # %bb.3: # %_Z10accumulateI16reverse_iteratorI14double_pointerdEdET0_T_S4_S3_.exit.us.us.preheader @@ -1471,21 +1438,23 @@ _Z4testI16reverse_iteratorI14double_pointerdEdEvT_S3_T0_: # @_Z4testI16reverse_i pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 ld.d $a1, $s3, %pc_lo12(start_time) - pcalau12i $a2, %pc_hi20(.LCPI9_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI9_1) - sub.d $a1, $a0, $a1 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI9_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI9_2) - ld.w $a1, $s2, %pc_lo12(current_test) pcalau12i $a2, %pc_hi20(end_time) st.d $a0, $a2, %pc_lo12(end_time) + sub.d $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.w $a0, $s2, %pc_lo12(current_test) + lu52i.d $a1, $zero, 1000 + movgr2fr.d $fa1, $a1 fadd.d $fa0, $fa0, $fa1 - addi.d $a0, $a1, 1 - st.w $a0, $s2, %pc_lo12(current_test) - slli.d $a0, $a1, 3 + addi.d $a1, $a0, 1 + st.w $a1, $s2, %pc_lo12(current_test) + slli.d $a0, $a0, 3 pcalau12i $a1, %pc_hi20(result_times) addi.d $a1, $a1, %pc_lo12(result_times) fstx.d $fa0, $a1, $a0 @@ -1502,7 +1471,7 @@ _Z4testI16reverse_iteratorI14double_pointerdEdEvT_S3_T0_: # @_Z4testI16reverse_i addi.d $sp, $sp, 96 ret .LBB9_6: # %.lr.ph.preheader.i.preheader - fld.d $fs1, $a0, %pc_lo12(.LCPI9_0) + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $s1, $a0, %pc_lo12(.L.str.27) move $s5, $zero @@ -1541,16 +1510,8 @@ _Z4testI16reverse_iteratorI14double_pointerdEdEvT_S3_T0_: # @_Z4testI16reverse_i .size _Z4testI16reverse_iteratorI14double_pointerdEdEvT_S3_T0_, .Lfunc_end9-_Z4testI16reverse_iteratorI14double_pointerdEdEvT_S3_T0_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z4testI16reverse_iteratorI14Double_pointer6DoubleES2_EvT_S4_T0_ -.LCPI10_0: - .dword 0x40b7700000000000 # double 6000 -.LCPI10_1: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI10_2: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 .section .text._Z4testI16reverse_iteratorI14Double_pointer6DoubleES2_EvT_S4_T0_,"axG",@progbits,_Z4testI16reverse_iteratorI14Double_pointer6DoubleES2_EvT_S4_T0_,comdat - .weak _Z4testI16reverse_iteratorI14Double_pointer6DoubleES2_EvT_S4_T0_ + .weak _Z4testI16reverse_iteratorI14Double_pointer6DoubleES2_EvT_S4_T0_ # -- Begin function _Z4testI16reverse_iteratorI14Double_pointer6DoubleES2_EvT_S4_T0_ .p2align 5 .type _Z4testI16reverse_iteratorI14Double_pointer6DoubleES2_EvT_S4_T0_,@function _Z4testI16reverse_iteratorI14Double_pointer6DoubleES2_EvT_S4_T0_: # @_Z4testI16reverse_iteratorI14Double_pointer6DoubleES2_EvT_S4_T0_ @@ -1590,10 +1551,12 @@ _Z4testI16reverse_iteratorI14Double_pointer6DoubleES2_EvT_S4_T0_: # @_Z4testI16r pcalau12i $s2, %pc_hi20(current_test) blez $a1, .LBB10_5 # %bb.1: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI10_0) + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, 1035 bne $s0, $fp, .LBB10_6 # %bb.2: # %.lr.ph.split.us - fld.d $fa0, $a0, %pc_lo12(.LCPI10_0) + movgr2fr.d $fa0, $a0 fcmp.ceq.d $fcc0, $fs0, $fa0 bcnez $fcc0, .LBB10_5 # %bb.3: # %_Z10accumulateI16reverse_iteratorI14Double_pointer6DoubleES2_ET0_T_S5_S4_.exit.us.us.preheader @@ -1614,21 +1577,23 @@ _Z4testI16reverse_iteratorI14Double_pointer6DoubleES2_EvT_S4_T0_: # @_Z4testI16r pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 ld.d $a1, $s3, %pc_lo12(start_time) - pcalau12i $a2, %pc_hi20(.LCPI10_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI10_1) - sub.d $a1, $a0, $a1 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI10_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI10_2) - ld.w $a1, $s2, %pc_lo12(current_test) pcalau12i $a2, %pc_hi20(end_time) st.d $a0, $a2, %pc_lo12(end_time) + sub.d $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.w $a0, $s2, %pc_lo12(current_test) + lu52i.d $a1, $zero, 1000 + movgr2fr.d $fa1, $a1 fadd.d $fa0, $fa0, $fa1 - addi.d $a0, $a1, 1 - st.w $a0, $s2, %pc_lo12(current_test) - slli.d $a0, $a1, 3 + addi.d $a1, $a0, 1 + st.w $a1, $s2, %pc_lo12(current_test) + slli.d $a0, $a0, 3 pcalau12i $a1, %pc_hi20(result_times) addi.d $a1, $a1, %pc_lo12(result_times) fstx.d $fa0, $a1, $a0 @@ -1645,7 +1610,7 @@ _Z4testI16reverse_iteratorI14Double_pointer6DoubleES2_EvT_S4_T0_: # @_Z4testI16r addi.d $sp, $sp, 96 ret .LBB10_6: # %.lr.ph.preheader.i.preheader - fld.d $fs1, $a0, %pc_lo12(.LCPI10_0) + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $s1, $a0, %pc_lo12(.L.str.27) move $s5, $zero @@ -1684,16 +1649,8 @@ _Z4testI16reverse_iteratorI14Double_pointer6DoubleES2_EvT_S4_T0_: # @_Z4testI16r .size _Z4testI16reverse_iteratorI14Double_pointer6DoubleES2_EvT_S4_T0_, .Lfunc_end10-_Z4testI16reverse_iteratorI14Double_pointer6DoubleES2_EvT_S4_T0_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z4testI16reverse_iteratorIS0_IPddEdEdEvT_S4_T0_ -.LCPI11_0: - .dword 0x40b7700000000000 # double 6000 -.LCPI11_1: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI11_2: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 .section .text._Z4testI16reverse_iteratorIS0_IPddEdEdEvT_S4_T0_,"axG",@progbits,_Z4testI16reverse_iteratorIS0_IPddEdEdEvT_S4_T0_,comdat - .weak _Z4testI16reverse_iteratorIS0_IPddEdEdEvT_S4_T0_ + .weak _Z4testI16reverse_iteratorIS0_IPddEdEdEvT_S4_T0_ # -- Begin function _Z4testI16reverse_iteratorIS0_IPddEdEdEvT_S4_T0_ .p2align 5 .type _Z4testI16reverse_iteratorIS0_IPddEdEdEvT_S4_T0_,@function _Z4testI16reverse_iteratorIS0_IPddEdEdEvT_S4_T0_: # @_Z4testI16reverse_iteratorIS0_IPddEdEdEvT_S4_T0_ @@ -1733,10 +1690,12 @@ _Z4testI16reverse_iteratorIS0_IPddEdEdEvT_S4_T0_: # @_Z4testI16reverse_iteratorI pcalau12i $s2, %pc_hi20(current_test) blez $a1, .LBB11_5 # %bb.1: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI11_0) + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, 1035 bne $s0, $fp, .LBB11_6 # %bb.2: # %.lr.ph.split.us - fld.d $fa0, $a0, %pc_lo12(.LCPI11_0) + movgr2fr.d $fa0, $a0 fcmp.ceq.d $fcc0, $fs0, $fa0 bcnez $fcc0, .LBB11_5 # %bb.3: # %_Z10accumulateI16reverse_iteratorIS0_IPddEdEdET0_T_S5_S4_.exit.us.us.preheader @@ -1757,21 +1716,23 @@ _Z4testI16reverse_iteratorIS0_IPddEdEdEvT_S4_T0_: # @_Z4testI16reverse_iteratorI pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 ld.d $a1, $s3, %pc_lo12(start_time) - pcalau12i $a2, %pc_hi20(.LCPI11_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI11_1) - sub.d $a1, $a0, $a1 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI11_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI11_2) - ld.w $a1, $s2, %pc_lo12(current_test) pcalau12i $a2, %pc_hi20(end_time) st.d $a0, $a2, %pc_lo12(end_time) + sub.d $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.w $a0, $s2, %pc_lo12(current_test) + lu52i.d $a1, $zero, 1000 + movgr2fr.d $fa1, $a1 fadd.d $fa0, $fa0, $fa1 - addi.d $a0, $a1, 1 - st.w $a0, $s2, %pc_lo12(current_test) - slli.d $a0, $a1, 3 + addi.d $a1, $a0, 1 + st.w $a1, $s2, %pc_lo12(current_test) + slli.d $a0, $a0, 3 pcalau12i $a1, %pc_hi20(result_times) addi.d $a1, $a1, %pc_lo12(result_times) fstx.d $fa0, $a1, $a0 @@ -1788,7 +1749,7 @@ _Z4testI16reverse_iteratorIS0_IPddEdEdEvT_S4_T0_: # @_Z4testI16reverse_iteratorI addi.d $sp, $sp, 96 ret .LBB11_6: # %.lr.ph.preheader.i.preheader - fld.d $fs1, $a0, %pc_lo12(.LCPI11_0) + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $s1, $a0, %pc_lo12(.L.str.27) move $s5, $zero @@ -1827,16 +1788,8 @@ _Z4testI16reverse_iteratorIS0_IPddEdEdEvT_S4_T0_: # @_Z4testI16reverse_iteratorI .size _Z4testI16reverse_iteratorIS0_IPddEdEdEvT_S4_T0_, .Lfunc_end11-_Z4testI16reverse_iteratorIS0_IPddEdEdEvT_S4_T0_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z4testI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_EvT_S5_T0_ -.LCPI12_0: - .dword 0x40b7700000000000 # double 6000 -.LCPI12_1: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI12_2: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 .section .text._Z4testI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_EvT_S5_T0_,"axG",@progbits,_Z4testI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_EvT_S5_T0_,comdat - .weak _Z4testI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_EvT_S5_T0_ + .weak _Z4testI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_EvT_S5_T0_ # -- Begin function _Z4testI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_EvT_S5_T0_ .p2align 5 .type _Z4testI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_EvT_S5_T0_,@function _Z4testI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_EvT_S5_T0_: # @_Z4testI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_EvT_S5_T0_ @@ -1876,10 +1829,12 @@ _Z4testI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_EvT_S5_T0_: # @_Z4testI16rever pcalau12i $s2, %pc_hi20(current_test) blez $a1, .LBB12_5 # %bb.1: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI12_0) + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, 1035 bne $s0, $fp, .LBB12_6 # %bb.2: # %.lr.ph.split.us - fld.d $fa0, $a0, %pc_lo12(.LCPI12_0) + movgr2fr.d $fa0, $a0 fcmp.ceq.d $fcc0, $fs0, $fa0 bcnez $fcc0, .LBB12_5 # %bb.3: # %_Z10accumulateI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_ET0_T_S6_S5_.exit.us.us.preheader @@ -1900,21 +1855,23 @@ _Z4testI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_EvT_S5_T0_: # @_Z4testI16rever pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 ld.d $a1, $s3, %pc_lo12(start_time) - pcalau12i $a2, %pc_hi20(.LCPI12_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI12_1) - sub.d $a1, $a0, $a1 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI12_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI12_2) - ld.w $a1, $s2, %pc_lo12(current_test) pcalau12i $a2, %pc_hi20(end_time) st.d $a0, $a2, %pc_lo12(end_time) + sub.d $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.w $a0, $s2, %pc_lo12(current_test) + lu52i.d $a1, $zero, 1000 + movgr2fr.d $fa1, $a1 fadd.d $fa0, $fa0, $fa1 - addi.d $a0, $a1, 1 - st.w $a0, $s2, %pc_lo12(current_test) - slli.d $a0, $a1, 3 + addi.d $a1, $a0, 1 + st.w $a1, $s2, %pc_lo12(current_test) + slli.d $a0, $a0, 3 pcalau12i $a1, %pc_hi20(result_times) addi.d $a1, $a1, %pc_lo12(result_times) fstx.d $fa0, $a1, $a0 @@ -1931,7 +1888,7 @@ _Z4testI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_EvT_S5_T0_: # @_Z4testI16rever addi.d $sp, $sp, 96 ret .LBB12_6: # %.lr.ph.preheader.i.preheader - fld.d $fs1, $a0, %pc_lo12(.LCPI12_0) + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $s1, $a0, %pc_lo12(.L.str.27) move $s5, $zero @@ -1970,16 +1927,8 @@ _Z4testI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_EvT_S5_T0_: # @_Z4testI16rever .size _Z4testI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_EvT_S5_T0_, .Lfunc_end12-_Z4testI16reverse_iteratorIS0_IP6DoubleS1_ES1_ES1_EvT_S5_T0_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z4testI16reverse_iteratorIS0_I14double_pointerdEdEdEvT_S4_T0_ -.LCPI13_0: - .dword 0x40b7700000000000 # double 6000 -.LCPI13_1: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI13_2: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 .section .text._Z4testI16reverse_iteratorIS0_I14double_pointerdEdEdEvT_S4_T0_,"axG",@progbits,_Z4testI16reverse_iteratorIS0_I14double_pointerdEdEdEvT_S4_T0_,comdat - .weak _Z4testI16reverse_iteratorIS0_I14double_pointerdEdEdEvT_S4_T0_ + .weak _Z4testI16reverse_iteratorIS0_I14double_pointerdEdEdEvT_S4_T0_ # -- Begin function _Z4testI16reverse_iteratorIS0_I14double_pointerdEdEdEvT_S4_T0_ .p2align 5 .type _Z4testI16reverse_iteratorIS0_I14double_pointerdEdEdEvT_S4_T0_,@function _Z4testI16reverse_iteratorIS0_I14double_pointerdEdEdEvT_S4_T0_: # @_Z4testI16reverse_iteratorIS0_I14double_pointerdEdEdEvT_S4_T0_ @@ -2019,10 +1968,12 @@ _Z4testI16reverse_iteratorIS0_I14double_pointerdEdEdEvT_S4_T0_: # @_Z4testI16rev pcalau12i $s2, %pc_hi20(current_test) blez $a1, .LBB13_5 # %bb.1: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI13_0) + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, 1035 bne $s0, $fp, .LBB13_6 # %bb.2: # %.lr.ph.split.us - fld.d $fa0, $a0, %pc_lo12(.LCPI13_0) + movgr2fr.d $fa0, $a0 fcmp.ceq.d $fcc0, $fs0, $fa0 bcnez $fcc0, .LBB13_5 # %bb.3: # %_Z10accumulateI16reverse_iteratorIS0_I14double_pointerdEdEdET0_T_S5_S4_.exit.us.us.preheader @@ -2043,21 +1994,23 @@ _Z4testI16reverse_iteratorIS0_I14double_pointerdEdEdEvT_S4_T0_: # @_Z4testI16rev pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 ld.d $a1, $s3, %pc_lo12(start_time) - pcalau12i $a2, %pc_hi20(.LCPI13_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI13_1) - sub.d $a1, $a0, $a1 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI13_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI13_2) - ld.w $a1, $s2, %pc_lo12(current_test) pcalau12i $a2, %pc_hi20(end_time) st.d $a0, $a2, %pc_lo12(end_time) + sub.d $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.w $a0, $s2, %pc_lo12(current_test) + lu52i.d $a1, $zero, 1000 + movgr2fr.d $fa1, $a1 fadd.d $fa0, $fa0, $fa1 - addi.d $a0, $a1, 1 - st.w $a0, $s2, %pc_lo12(current_test) - slli.d $a0, $a1, 3 + addi.d $a1, $a0, 1 + st.w $a1, $s2, %pc_lo12(current_test) + slli.d $a0, $a0, 3 pcalau12i $a1, %pc_hi20(result_times) addi.d $a1, $a1, %pc_lo12(result_times) fstx.d $fa0, $a1, $a0 @@ -2074,7 +2027,7 @@ _Z4testI16reverse_iteratorIS0_I14double_pointerdEdEdEvT_S4_T0_: # @_Z4testI16rev addi.d $sp, $sp, 96 ret .LBB13_6: # %.lr.ph.preheader.i.preheader - fld.d $fs1, $a0, %pc_lo12(.LCPI13_0) + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $s1, $a0, %pc_lo12(.L.str.27) move $s5, $zero @@ -2113,16 +2066,8 @@ _Z4testI16reverse_iteratorIS0_I14double_pointerdEdEdEvT_S4_T0_: # @_Z4testI16rev .size _Z4testI16reverse_iteratorIS0_I14double_pointerdEdEdEvT_S4_T0_, .Lfunc_end13-_Z4testI16reverse_iteratorIS0_I14double_pointerdEdEdEvT_S4_T0_ .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z4testI16reverse_iteratorIS0_I14Double_pointer6DoubleES2_ES2_EvT_S5_T0_ -.LCPI14_0: - .dword 0x40b7700000000000 # double 6000 -.LCPI14_1: - .dword 0x412e848000000000 # double 1.0E+6 -.LCPI14_2: - .dword 0x3e80000000000000 # double 1.1920928955078125E-7 .section .text._Z4testI16reverse_iteratorIS0_I14Double_pointer6DoubleES2_ES2_EvT_S5_T0_,"axG",@progbits,_Z4testI16reverse_iteratorIS0_I14Double_pointer6DoubleES2_ES2_EvT_S5_T0_,comdat - .weak _Z4testI16reverse_iteratorIS0_I14Double_pointer6DoubleES2_ES2_EvT_S5_T0_ + .weak _Z4testI16reverse_iteratorIS0_I14Double_pointer6DoubleES2_ES2_EvT_S5_T0_ # -- Begin function _Z4testI16reverse_iteratorIS0_I14Double_pointer6DoubleES2_ES2_EvT_S5_T0_ .p2align 5 .type _Z4testI16reverse_iteratorIS0_I14Double_pointer6DoubleES2_ES2_EvT_S5_T0_,@function _Z4testI16reverse_iteratorIS0_I14Double_pointer6DoubleES2_ES2_EvT_S5_T0_: # @_Z4testI16reverse_iteratorIS0_I14Double_pointer6DoubleES2_ES2_EvT_S5_T0_ @@ -2162,10 +2107,12 @@ _Z4testI16reverse_iteratorIS0_I14Double_pointer6DoubleES2_ES2_EvT_S5_T0_: # @_Z4 pcalau12i $s2, %pc_hi20(current_test) blez $a1, .LBB14_5 # %bb.1: # %.lr.ph - pcalau12i $a0, %pc_hi20(.LCPI14_0) + ori $a0, $zero, 0 + lu32i.d $a0, 487424 + lu52i.d $a0, $a0, 1035 bne $s0, $fp, .LBB14_6 # %bb.2: # %.lr.ph.split.us - fld.d $fa0, $a0, %pc_lo12(.LCPI14_0) + movgr2fr.d $fa0, $a0 fcmp.ceq.d $fcc0, $fs0, $fa0 bcnez $fcc0, .LBB14_5 # %bb.3: # %_Z10accumulateI16reverse_iteratorIS0_I14Double_pointer6DoubleES2_ES2_ET0_T_S6_S5_.exit.us.us.preheader @@ -2186,21 +2133,23 @@ _Z4testI16reverse_iteratorIS0_I14Double_pointer6DoubleES2_ES2_EvT_S5_T0_: # @_Z4 pcaddu18i $ra, %call36(clock) jirl $ra, $ra, 0 ld.d $a1, $s3, %pc_lo12(start_time) - pcalau12i $a2, %pc_hi20(.LCPI14_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI14_1) - sub.d $a1, $a0, $a1 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI14_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI14_2) - ld.w $a1, $s2, %pc_lo12(current_test) pcalau12i $a2, %pc_hi20(end_time) st.d $a0, $a2, %pc_lo12(end_time) + sub.d $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 + ld.w $a0, $s2, %pc_lo12(current_test) + lu52i.d $a1, $zero, 1000 + movgr2fr.d $fa1, $a1 fadd.d $fa0, $fa0, $fa1 - addi.d $a0, $a1, 1 - st.w $a0, $s2, %pc_lo12(current_test) - slli.d $a0, $a1, 3 + addi.d $a1, $a0, 1 + st.w $a1, $s2, %pc_lo12(current_test) + slli.d $a0, $a0, 3 pcalau12i $a1, %pc_hi20(result_times) addi.d $a1, $a1, %pc_lo12(result_times) fstx.d $fa0, $a1, $a0 @@ -2217,7 +2166,7 @@ _Z4testI16reverse_iteratorIS0_I14Double_pointer6DoubleES2_ES2_EvT_S5_T0_: # @_Z4 addi.d $sp, $sp, 96 ret .LBB14_6: # %.lr.ph.preheader.i.preheader - fld.d $fs1, $a0, %pc_lo12(.LCPI14_0) + movgr2fr.d $fs1, $a0 pcalau12i $a0, %pc_hi20(.L.str.27) addi.d $s1, $a0, %pc_lo12(.L.str.27) move $s5, $zero diff --git a/results/SingleSource/Benchmarks/Misc-C++/Large/CMakeFiles/ray.dir/ray.s b/results/SingleSource/Benchmarks/Misc-C++/Large/CMakeFiles/ray.dir/ray.s index 066393cc..9984121e 100644 --- a/results/SingleSource/Benchmarks/Misc-C++/Large/CMakeFiles/ray.dir/ray.s +++ b/results/SingleSource/Benchmarks/Misc-C++/Large/CMakeFiles/ray.dir/ray.s @@ -241,12 +241,7 @@ _Z9ray_traceRK3VecRK3RayRK5Scene: # @_Z9ray_traceRK3VecRK3RayRK5Scene .size _Z9ray_traceRK3VecRK3RayRK5Scene, .Lfunc_end6-_Z9ray_traceRK3VecRK3RayRK5Scene .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z6createiRK3Vecd -.LCPI7_0: - .dword 0x400bb67ae8584caa # double 3.4641016151377544 - .text - .globl _Z6createiRK3Vecd + .globl _Z6createiRK3Vecd # -- Begin function _Z6createiRK3Vecd .p2align 5 .type _Z6createiRK3Vecd,@function _Z6createiRK3Vecd: # @_Z6createiRK3Vecd @@ -310,12 +305,15 @@ _Z6createiRK3Vecd: # @_Z6createiRK3Vecd pcaddu18i $ra, %call36(_ZNSt8__detail15_List_node_base7_M_hookEPS0_) jirl $ra, $ra, 0 ld.d $a0, $sp, 104 - pcalau12i $a1, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI7_0) addi.d $a0, $a0, 1 st.d $a0, $sp, 104 - vldi $vr1, -1016 - fmul.d $fs1, $fs0, $fa1 + vldi $vr0, -1016 + fmul.d $fs1, $fs0, $fa0 + lu12i.w $a0, -96892 + ori $a0, $a0, 3242 + lu32i.d $a0, -280966 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa0, $a0 fdiv.d $fs2, $fs1, $fa0 vldi $vr0, -928 fmul.d $fs0, $fs0, $fa0 @@ -741,26 +739,12 @@ _ZN5SceneD2Ev: # @_ZN5SceneD2Ev .LCPI9_0: .dword 0x0000000000000000 # double 0 .dword 0xbff0000000000000 # double -1 -.LCPI9_4: +.LCPI9_1: .dword 0xbfd11acee560242a # double -0.2672612419124244 .dword 0x3fe9a8365810363f # double 0.80178372573727319 -.LCPI9_6: +.LCPI9_2: .dword 0x3fd11acee560242a # double 0.2672612419124244 .dword 0x3fe9a8365810363f # double 0.80178372573727319 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI9_1: - .dword 0xc070000000000000 # double -256 -.LCPI9_2: - .dword 0x4110000000000000 # double 262144 -.LCPI9_3: - .dword 0x4080000000000000 # double 512 -.LCPI9_5: - .dword 0x3fe11acee560242a # double 0.53452248382484879 -.LCPI9_7: - .dword 0x406fe00000000000 # double 255 -.LCPI9_8: - .dword 0x3fb0000000000000 # double 0.0625 .text .globl main .p2align 5 @@ -862,35 +846,37 @@ main: # @main addi.d $s3, $sp, 320 movgr2fr.d $fs2, $zero pcalau12i $s4, %pc_hi20(infinity) - pcalau12i $a1, %pc_hi20(.LCPI9_1) - fld.d $fs1, $a1, %pc_lo12(.LCPI9_1) - pcalau12i $a1, %pc_hi20(.LCPI9_2) - fld.d $fs3, $a1, %pc_lo12(.LCPI9_2) - pcalau12i $a1, %pc_hi20(.LCPI9_3) - fld.d $fs7, $a1, %pc_lo12(.LCPI9_3) + lu52i.d $a1, $zero, -1017 + movgr2fr.d $fs1, $a1 + lu52i.d $a1, $zero, 1041 + movgr2fr.d $fs3, $a1 + lu52i.d $a1, $zero, 1032 + movgr2fr.d $fs7, $a1 vrepli.b $vr0, 0 vst $vr0, $sp, 128 # 16-byte Folded Spill lu52i.d $s5, $zero, -1023 ori $s6, $zero, 4 - pcalau12i $a1, %pc_hi20(.LCPI9_7) - fld.d $fa0, $a1, %pc_lo12(.LCPI9_7) + ori $a1, $zero, 0 + lu32i.d $a1, -8192 + lu52i.d $a1, $a1, 1030 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI9_8) - fld.d $fa0, $a1, %pc_lo12(.LCPI9_8) + lu52i.d $a1, $zero, 1019 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 16 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI9_4) - vld $vr0, $a1, %pc_lo12(.LCPI9_4) + pcalau12i $a1, %pc_hi20(.LCPI9_1) + vld $vr0, $a1, %pc_lo12(.LCPI9_1) vst $vr0, $sp, 96 # 16-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI9_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI9_5) - fst.d $fa0, $sp, 88 # 8-byte Folded Spill - pcalau12i $s7, %pc_hi20(delta) - pcalau12i $a1, %pc_hi20(.LCPI9_6) - vld $vr0, $a1, %pc_lo12(.LCPI9_6) - vst $vr0, $sp, 64 # 16-byte Folded Spill lu12i.w $a1, -109054 ori $a1, $a1, 1066 lu32i.d $a1, 72398 + lu52i.d $a2, $a1, 1022 + pcalau12i $a3, %pc_hi20(.LCPI9_2) + vld $vr0, $a3, %pc_lo12(.LCPI9_2) + vst $vr0, $sp, 64 # 16-byte Folded Spill + movgr2fr.d $fa0, $a2 + fst.d $fa0, $sp, 88 # 8-byte Folded Spill + pcalau12i $s7, %pc_hi20(delta) lu52i.d $s8, $a1, -1026 st.d $s0, $sp, 32 # 8-byte Folded Spill b .LBB9_4 diff --git a/results/SingleSource/Benchmarks/Misc-C++/Large/CMakeFiles/sphereflake.dir/sphereflake.s b/results/SingleSource/Benchmarks/Misc-C++/Large/CMakeFiles/sphereflake.dir/sphereflake.s index 6f949948..9ef8d5c9 100644 --- a/results/SingleSource/Benchmarks/Misc-C++/Large/CMakeFiles/sphereflake.dir/sphereflake.s +++ b/results/SingleSource/Benchmarks/Misc-C++/Large/CMakeFiles/sphereflake.dir/sphereflake.s @@ -3,36 +3,24 @@ .globl _ZSt21ios_base_library_initv # End of file scope inline assembly - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3d719799812dea11 # double 9.9999999999999998E-13 -.LCPI0_7: - .dword 0x408ff80000000000 # double 1023 -.LCPI0_8: - .dword 0x4090000000000000 # double 1024 -.LCPI0_9: - .dword 0x7ff0000000000000 # double +Inf -.LCPI0_10: - .dword 0x4050000000000000 # double 64 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI0_1: + .p2align 4, 0x0 # -- Begin function main +.LCPI0_0: .dword 0xc080040000000000 # double -512.5 .dword 0xc080015555555555 # double -512.16666666666663 -.LCPI0_2: +.LCPI0_1: .dword 0x0000000000000000 # double 0 .dword 0xc07ffd5555555555 # double -511.83333333333331 -.LCPI0_3: +.LCPI0_2: .dword 0xc080040000000000 # double -512.5 .dword 0x0000000000000000 # double 0 -.LCPI0_4: +.LCPI0_3: .dword 0xc080015555555555 # double -512.16666666666663 .dword 0xc07ff80000000000 # double -511.5 -.LCPI0_5: +.LCPI0_4: .dword 0x0000000000000000 # double 0 .dword 0xc07ff80000000000 # double -511.5 -.LCPI0_6: +.LCPI0_5: .dword 0xc07ffd5555555555 # double -511.83333333333331 .dword 0x0000000000000000 # double 0 .text @@ -62,6 +50,7 @@ main: # @main fst.d $fs4, $sp, 192 # 8-byte Folded Spill fst.d $fs5, $sp, 184 # 8-byte Folded Spill fst.d $fs6, $sp, 176 # 8-byte Folded Spill + fst.d $fs7, $sp, 168 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -80,6 +69,7 @@ main: # @main .cfi_offset 60, -128 .cfi_offset 61, -136 .cfi_offset 62, -144 + .cfi_offset 63, -152 ori $s0, $zero, 2 ori $s1, $zero, 6 bne $a0, $s0, .LBB0_3 @@ -129,23 +119,26 @@ main: # @main jirl $ra, $ra, 0 move $s0, $a0 add.d $s3, $a0, $s2 - st.d $zero, $sp, 56 + st.d $zero, $sp, 40 vrepli.b $vr0, 0 - vst $vr0, $sp, 64 + vst $vr0, $sp, 48 addi.w $a0, $zero, -99 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a1, %pc_lo12(.LCPI0_0) - vldi $vr2, -912 + vldi $vr1, -912 vldi $vr0, -907 - vldi $vr1, -928 + vldi $vr2, -928 + lu12i.w $a1, -519458 + ori $a1, $a1, 2577 + lu32i.d $a1, 104345 + lu52i.d $a1, $a1, 983 + movgr2fr.d $fs0, $a1 .p2align 4, , 16 .LBB0_8: # %.preheader.i.i # =>This Inner Loop Header: Depth=1 - fmov.d $fa3, $fa2 - fdiv.d $fa2, $fa0, $fa2 - fadd.d $fa2, $fa3, $fa2 - fmul.d $fa2, $fa2, $fa1 - fsub.d $fa3, $fa2, $fa3 + fmov.d $fa3, $fa1 + fdiv.d $fa1, $fa0, $fa1 + fadd.d $fa1, $fa3, $fa1 + fmul.d $fa1, $fa1, $fa2 + fsub.d $fa3, $fa1, $fa3 fabs.d $fa3, $fa3 fcmp.cule.d $fcc0, $fa3, $fs0 bcnez $fcc0, .LBB0_10 @@ -155,16 +148,16 @@ main: # @main addi.w $a0, $a0, 1 bnez $a1, .LBB0_8 .LBB0_10: # %_ZNK3v_t4normEv.exit - frecip.d $fa0, $fa2 + frecip.d $fa0, $fa1 vldi $vr1, -944 fmul.d $fa1, $fa0, $fa1 vldi $vr2, -800 fmul.d $fa2, $fa0, $fa2 - fst.d $fa1, $sp, 32 - fst.d $fa0, $sp, 40 - fst.d $fa2, $sp, 48 - addi.d $a3, $sp, 56 - addi.d $a4, $sp, 32 + fst.d $fa1, $sp, 16 + fst.d $fa0, $sp, 24 + fst.d $fa2, $sp, 32 + addi.d $a3, $sp, 40 + addi.d $a4, $sp, 16 vldi $vr0, -912 move $a0, $s0 move $a1, $s1 @@ -200,68 +193,68 @@ main: # @main ori $a2, $zero, 5 pcaddu18i $ra, %call36(_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l) jirl $ra, $ra, 0 + pcalau12i $a0, %pc_hi20(.LCPI0_0) + vld $vr0, $a0, %pc_lo12(.LCPI0_0) pcalau12i $a0, %pc_hi20(.LCPI0_1) - vld $vr0, $a0, %pc_lo12(.LCPI0_1) + vld $vr1, $a0, %pc_lo12(.LCPI0_1) + vst $vr0, $sp, 64 + vst $vr1, $sp, 80 pcalau12i $a0, %pc_hi20(.LCPI0_2) - vld $vr1, $a0, %pc_lo12(.LCPI0_2) + vld $vr0, $a0, %pc_lo12(.LCPI0_2) pcalau12i $a0, %pc_hi20(.LCPI0_3) - vld $vr2, $a0, %pc_lo12(.LCPI0_3) + vld $vr1, $a0, %pc_lo12(.LCPI0_3) pcalau12i $a0, %pc_hi20(.LCPI0_4) - vld $vr3, $a0, %pc_lo12(.LCPI0_4) - vst $vr0, $sp, 80 - vst $vr1, $sp, 96 - vst $vr2, $sp, 112 - vst $vr3, $sp, 128 + vld $vr2, $a0, %pc_lo12(.LCPI0_4) pcalau12i $a0, %pc_hi20(.LCPI0_5) - vld $vr0, $a0, %pc_lo12(.LCPI0_5) - pcalau12i $a0, %pc_hi20(.LCPI0_6) - vld $vr1, $a0, %pc_lo12(.LCPI0_6) - pcalau12i $a0, %pc_hi20(.LCPI0_7) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_7) - vst $vr0, $sp, 144 - vst $vr1, $sp, 160 + vld $vr3, $a0, %pc_lo12(.LCPI0_5) + vst $vr0, $sp, 96 + vst $vr1, $sp, 112 + vst $vr2, $sp, 128 + vst $vr3, $sp, 144 + ori $a0, $zero, 0 + lu32i.d $a0, -2048 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fs1, $a0 movgr2fr.d $fs2, $zero pcalau12i $a0, %pc_hi20(_ZL5light) addi.d $s2, $a0, %pc_lo12(_ZL5light) - addi.d $s5, $sp, 80 - pcalau12i $a0, %pc_hi20(.LCPI0_8) - fld.d $fs3, $a0, %pc_lo12(.LCPI0_8) - pcalau12i $s6, %pc_hi20(.LCPI0_9) - fld.d $fs4, $s6, %pc_lo12(.LCPI0_9) - vldi $vr22, -1006 + addi.d $s5, $sp, 64 + lu52i.d $a0, $zero, 1033 + movgr2fr.d $fs3, $a0 + lu52i.d $s6, $zero, 2047 + movgr2fr.d $fs4, $s6 + vldi $vr23, -1006 addi.w $s7, $zero, -99 - vldi $vr30, -928 + vldi $vr31, -928 ori $s8, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI0_10) - st.d $a0, $sp, 24 # 8-byte Folded Spill b .LBB0_12 .p2align 4, , 16 .LBB0_11: # in Loop: Header=BB0_12 Depth=1 - ld.d $s4, $sp, 16 # 8-byte Folded Reload + ld.d $s4, $sp, 8 # 8-byte Folded Reload addi.w $s4, $s4, -1 vldi $vr0, -784 fadd.d $fs1, $fs1, $fa0 - beqz $s4, .LBB0_65 + beqz $s4, .LBB0_64 .LBB0_12: # %.preheader80.i # =>This Loop Header: Depth=1 # Child Loop BB0_15 Depth 2 - # Child Loop BB0_20 Depth 3 - # Child Loop BB0_22 Depth 4 - # Child Loop BB0_26 Depth 4 - # Child Loop BB0_29 Depth 5 - # Child Loop BB0_38 Depth 5 - # Child Loop BB0_45 Depth 4 - # Child Loop BB0_49 Depth 5 - # Child Loop BB0_57 Depth 5 - st.d $s4, $sp, 16 # 8-byte Folded Spill + # Child Loop BB0_19 Depth 3 + # Child Loop BB0_21 Depth 4 + # Child Loop BB0_25 Depth 4 + # Child Loop BB0_28 Depth 5 + # Child Loop BB0_35 Depth 5 + # Child Loop BB0_44 Depth 4 + # Child Loop BB0_48 Depth 5 + # Child Loop BB0_56 Depth 5 + st.d $s4, $sp, 8 # 8-byte Folded Spill ori $s4, $zero, 1024 fmov.d $fs5, $fs2 b .LBB0_15 .p2align 4, , 16 .LBB0_13: # %.split.us.loopexit.i # in Loop: Header=BB0_15 Depth=2 - ld.d $a0, $sp, 24 # 8-byte Folded Reload - fld.d $fa0, $a0, %pc_lo12(.LCPI0_10) + lu52i.d $a0, $zero, 1029 + movgr2fr.d $fa0, $a0 fmul.d $fa0, $fa3, $fa0 ftintrz.w.d $fa0, $fa0 movfr2gr.s $a1, $fa0 @@ -278,21 +271,21 @@ main: # @main addi.w $s4, $s4, -1 vldi $vr0, -912 fadd.d $fs5, $fs5, $fa0 - vldi $vr22, -1006 - vldi $vr30, -928 + vldi $vr23, -1006 + vldi $vr31, -928 beqz $s4, .LBB0_11 .LBB0_15: # %.preheader.i # Parent Loop BB0_12 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_20 Depth 3 - # Child Loop BB0_22 Depth 4 - # Child Loop BB0_26 Depth 4 - # Child Loop BB0_29 Depth 5 - # Child Loop BB0_38 Depth 5 - # Child Loop BB0_45 Depth 4 - # Child Loop BB0_49 Depth 5 - # Child Loop BB0_57 Depth 5 - blez $fp, .LBB0_64 + # Child Loop BB0_19 Depth 3 + # Child Loop BB0_21 Depth 4 + # Child Loop BB0_25 Depth 4 + # Child Loop BB0_28 Depth 5 + # Child Loop BB0_35 Depth 5 + # Child Loop BB0_44 Depth 4 + # Child Loop BB0_48 Depth 5 + # Child Loop BB0_56 Depth 5 + blez $fp, .LBB0_63 # %bb.16: # %.preheader.split.us.i.preheader # in Loop: Header=BB0_15 Depth=2 fld.d $fa0, $s2, 0 @@ -301,35 +294,27 @@ main: # @main move $a0, $zero fneg.d $fa4, $fa1 fmov.d $fa3, $fs2 - vldi $vr23, -878 - b .LBB0_20 + vldi $vr30, -878 + b .LBB0_19 .p2align 4, , 16 -.LBB0_17: # %_ZN6node_t9intersectILb0EEEvRK5ray_tR5hit_t.exit.us.i - # in Loop: Header=BB0_20 Depth=3 - fclass.d $ft4, $ft3 - movfr2gr.d $a1, $ft4 - andi $a1, $a1, 64 - sltu $a1, $zero, $a1 - andi $a1, $a1, 1 - beqz $a1, .LBB0_42 -# %bb.18: # in Loop: Header=BB0_20 Depth=3 +.LBB0_17: # in Loop: Header=BB0_19 Depth=3 movgr2fr.d $ft4, $zero -.LBB0_19: # %_ZL9ray_tracePK6node_tRK5ray_t.exit.us.i - # in Loop: Header=BB0_20 Depth=3 +.LBB0_18: # %_ZL9ray_tracePK6node_tRK5ray_t.exit.us.i + # in Loop: Header=BB0_19 Depth=3 addi.d $a0, $a0, 1 fadd.d $fa3, $fa3, $ft4 beq $a0, $s8, .LBB0_13 -.LBB0_20: # %.preheader.split.us.i +.LBB0_19: # %.preheader.split.us.i # Parent Loop BB0_12 Depth=1 # Parent Loop BB0_15 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_22 Depth 4 - # Child Loop BB0_26 Depth 4 - # Child Loop BB0_29 Depth 5 - # Child Loop BB0_38 Depth 5 - # Child Loop BB0_45 Depth 4 - # Child Loop BB0_49 Depth 5 - # Child Loop BB0_57 Depth 5 + # Child Loop BB0_21 Depth 4 + # Child Loop BB0_25 Depth 4 + # Child Loop BB0_28 Depth 5 + # Child Loop BB0_35 Depth 5 + # Child Loop BB0_44 Depth 4 + # Child Loop BB0_48 Depth 5 + # Child Loop BB0_56 Depth 5 slli.d $a1, $a0, 4 alsl.d $a1, $a0, $a1, 3 add.d $a2, $s5, $a1 @@ -350,33 +335,33 @@ main: # @main sltu $a1, $zero, $a1 andi $a1, $a1, 1 fmov.d $ft1, $fs4 - bnez $a1, .LBB0_24 -# %bb.21: # %.preheader.i.i.us.i.preheader - # in Loop: Header=BB0_20 Depth=3 + bnez $a1, .LBB0_23 +# %bb.20: # %.preheader.i.i.us.i.preheader + # in Loop: Header=BB0_19 Depth=3 addi.w $a1, $zero, -99 vldi $vr9, -912 .p2align 4, , 16 -.LBB0_22: # %.preheader.i.i.us.i +.LBB0_21: # %.preheader.i.i.us.i # Parent Loop BB0_12 Depth=1 # Parent Loop BB0_15 Depth=2 - # Parent Loop BB0_20 Depth=3 + # Parent Loop BB0_19 Depth=3 # => This Inner Loop Header: Depth=4 fmov.d $ft2, $ft1 fdiv.d $ft1, $ft0, $ft1 fadd.d $ft1, $ft2, $ft1 - fmul.d $ft1, $ft1, $fs6 + fmul.d $ft1, $ft1, $fs7 fsub.d $ft2, $ft1, $ft2 fabs.d $ft2, $ft2 fcmp.cule.d $fcc0, $ft2, $fs0 - bcnez $fcc0, .LBB0_24 -# %bb.23: # %.preheader.i.i.us.i - # in Loop: Header=BB0_22 Depth=4 + bcnez $fcc0, .LBB0_23 +# %bb.22: # %.preheader.i.i.us.i + # in Loop: Header=BB0_21 Depth=4 move $a2, $a1 addi.w $a1, $a1, 1 - bnez $a2, .LBB0_22 + bnez $a2, .LBB0_21 .p2align 4, , 16 -.LBB0_24: # %_ZNK3v_t4normEv.exit.us.i - # in Loop: Header=BB0_20 Depth=3 +.LBB0_23: # %_ZNK3v_t4normEv.exit.us.i + # in Loop: Header=BB0_19 Depth=3 frecip.d $ft0, $ft1 fmul.d $fa5, $fa5, $ft0 fmul.d $fa6, $fa6, $ft0 @@ -387,23 +372,23 @@ main: # @main fmov.d $ft2, $fs2 fmov.d $ft4, $fs4 move $a1, $s0 - b .LBB0_26 + b .LBB0_25 .p2align 4, , 16 -.LBB0_25: # %_ZNK8sphere_t9intersectERK5ray_t.exit28.thread.i.us.i - # in Loop: Header=BB0_26 Depth=4 +.LBB0_24: # %_ZNK8sphere_t9intersectERK5ray_t.exit28.thread.i.us.i + # in Loop: Header=BB0_25 Depth=4 addi.d $a1, $a1, 72 - bgeu $a1, $s3, .LBB0_17 -.LBB0_26: # %.lr.ph.i25.us.i + bgeu $a1, $s3, .LBB0_40 +.LBB0_25: # %.lr.ph.i25.us.i # Parent Loop BB0_12 Depth=1 # Parent Loop BB0_15 Depth=2 - # Parent Loop BB0_20 Depth=3 + # Parent Loop BB0_19 Depth=3 # => This Loop Header: Depth=4 - # Child Loop BB0_29 Depth 5 - # Child Loop BB0_38 Depth 5 + # Child Loop BB0_28 Depth 5 + # Child Loop BB0_35 Depth 5 fld.d $ft5, $a1, 16 fld.d $ft6, $a1, 0 fld.d $ft7, $a1, 8 - fadd.d $ft8, $ft5, $ft14 + fadd.d $ft8, $ft5, $ft15 fmul.d $ft5, $fa5, $ft6 fmul.d $ft9, $fa6, $ft7 fadd.d $ft5, $ft5, $ft9 @@ -421,43 +406,43 @@ main: # @main fadd.d $ft7, $ft7, $ft6 fcmp.clt.d $fcc0, $ft7, $fs2 fmov.d $ft6, $fs4 - bcnez $fcc0, .LBB0_32 -# %bb.27: # in Loop: Header=BB0_26 Depth=4 + bcnez $fcc0, .LBB0_31 +# %bb.26: # in Loop: Header=BB0_25 Depth=4 fclass.d $ft6, $ft7 movfr2gr.d $a2, $ft6 - fld.d $ft6, $s6, %pc_lo12(.LCPI0_9) andi $a2, $a2, 64 sltu $a2, $zero, $a2 andi $a2, $a2, 1 + movgr2fr.d $ft6, $s6 fmov.d $ft8, $ft6 - bnez $a2, .LBB0_31 -# %bb.28: # %.preheader.i.i.i27.us.i.preheader - # in Loop: Header=BB0_26 Depth=4 + bnez $a2, .LBB0_30 +# %bb.27: # %.preheader.i.i.i27.us.i.preheader + # in Loop: Header=BB0_25 Depth=4 addi.w $a2, $zero, -99 vldi $vr16, -912 .p2align 4, , 16 -.LBB0_29: # %.preheader.i.i.i27.us.i +.LBB0_28: # %.preheader.i.i.i27.us.i # Parent Loop BB0_12 Depth=1 # Parent Loop BB0_15 Depth=2 - # Parent Loop BB0_20 Depth=3 - # Parent Loop BB0_26 Depth=4 + # Parent Loop BB0_19 Depth=3 + # Parent Loop BB0_25 Depth=4 # => This Inner Loop Header: Depth=5 fmov.d $ft9, $ft8 fdiv.d $ft8, $ft7, $ft8 fadd.d $ft8, $ft9, $ft8 - fmul.d $ft8, $ft8, $fs6 + fmul.d $ft8, $ft8, $fs7 fsub.d $ft9, $ft8, $ft9 fabs.d $ft9, $ft9 fcmp.cule.d $fcc0, $ft9, $fs0 - bcnez $fcc0, .LBB0_31 -# %bb.30: # %.preheader.i.i.i27.us.i - # in Loop: Header=BB0_29 Depth=5 + bcnez $fcc0, .LBB0_30 +# %bb.29: # %.preheader.i.i.i27.us.i + # in Loop: Header=BB0_28 Depth=5 move $a3, $a2 addi.w $a2, $a2, 1 - bnez $a3, .LBB0_29 + bnez $a3, .LBB0_28 .p2align 4, , 16 -.LBB0_31: # %_ZL8LLVMsqrtd.exit.i.i32.us.i - # in Loop: Header=BB0_26 Depth=4 +.LBB0_30: # %_ZL8LLVMsqrtd.exit.i.i32.us.i + # in Loop: Header=BB0_25 Depth=4 fadd.d $ft7, $ft5, $ft8 fsub.d $ft5, $ft5, $ft8 movgr2fr.d $ft8, $zero @@ -465,15 +450,15 @@ main: # @main fsel $ft5, $ft7, $ft5, $fcc0 fcmp.clt.d $fcc0, $ft7, $ft8 fsel $ft6, $ft5, $ft6, $fcc0 -.LBB0_32: # %_ZNK8sphere_t9intersectERK5ray_t.exit.i35.us.i - # in Loop: Header=BB0_26 Depth=4 +.LBB0_31: # %_ZNK8sphere_t9intersectERK5ray_t.exit.i35.us.i + # in Loop: Header=BB0_25 Depth=4 fcmp.cult.d $fcc0, $ft6, $ft4 - bceqz $fcc0, .LBB0_36 -# %bb.33: # in Loop: Header=BB0_26 Depth=4 + bceqz $fcc0, .LBB0_39 +# %bb.32: # in Loop: Header=BB0_25 Depth=4 fld.d $ft5, $a1, 48 fld.d $ft6, $a1, 32 fld.d $ft7, $a1, 40 - fadd.d $ft9, $ft5, $ft14 + fadd.d $ft9, $ft5, $ft15 fmul.d $ft8, $fa5, $ft6 fmul.d $ft10, $fa6, $ft7 fadd.d $ft8, $ft8, $ft10 @@ -488,70 +473,61 @@ main: # @main fadd.d $ft9, $ft12, $ft9 fsub.d $ft9, $ft11, $ft9 fmul.d $ft11, $ft8, $ft8 - fadd.d $ft11, $ft11, $ft9 + fadd.d $ft12, $ft11, $ft9 movgr2fr.d $ft9, $zero - fcmp.clt.d $fcc0, $ft11, $ft9 - bcnez $fcc0, .LBB0_25 -# %bb.34: # in Loop: Header=BB0_26 Depth=4 - fclass.d $ft12, $ft11 - movfr2gr.d $a2, $ft12 + fcmp.clt.d $fcc0, $ft12, $ft9 + bcnez $fcc0, .LBB0_24 +# %bb.33: # in Loop: Header=BB0_25 Depth=4 + fclass.d $ft11, $ft12 + movfr2gr.d $a2, $ft11 andi $a2, $a2, 64 sltu $a2, $zero, $a2 andi $a2, $a2, 1 - beqz $a2, .LBB0_37 -# %bb.35: # in Loop: Header=BB0_26 Depth=4 - fld.d $ft12, $s6, %pc_lo12(.LCPI0_9) - b .LBB0_40 - .p2align 4, , 16 -.LBB0_36: # in Loop: Header=BB0_26 Depth=4 - ld.d $a2, $a1, 64 - slli.d $a3, $a2, 6 - alsl.d $a2, $a2, $a3, 3 - add.d $a1, $a1, $a2 - bltu $a1, $s3, .LBB0_26 - b .LBB0_17 -.LBB0_37: # %.preheader.i.i19.i.us.i.preheader - # in Loop: Header=BB0_26 Depth=4 - vldi $vr20, -912 + movgr2fr.d $ft11, $s6 + fmov.d $ft13, $ft11 + bnez $a2, .LBB0_37 +# %bb.34: # %.preheader.i.i19.i.us.i.preheader + # in Loop: Header=BB0_25 Depth=4 + vldi $vr21, -912 move $a2, $s7 .p2align 4, , 16 -.LBB0_38: # %.preheader.i.i19.i.us.i +.LBB0_35: # %.preheader.i.i19.i.us.i # Parent Loop BB0_12 Depth=1 # Parent Loop BB0_15 Depth=2 - # Parent Loop BB0_20 Depth=3 - # Parent Loop BB0_26 Depth=4 + # Parent Loop BB0_19 Depth=3 + # Parent Loop BB0_25 Depth=4 # => This Inner Loop Header: Depth=5 - fmov.d $ft13, $ft12 - fdiv.d $ft12, $ft11, $ft12 - fadd.d $ft12, $ft13, $ft12 - fmul.d $ft12, $ft12, $fs6 - fsub.d $ft13, $ft12, $ft13 - fabs.d $ft13, $ft13 - fcmp.cule.d $fcc0, $ft13, $fs0 - bcnez $fcc0, .LBB0_40 -# %bb.39: # %.preheader.i.i19.i.us.i - # in Loop: Header=BB0_38 Depth=5 + fmov.d $ft14, $ft13 + fdiv.d $ft13, $ft12, $ft13 + fadd.d $ft13, $ft14, $ft13 + fmul.d $ft13, $ft13, $fs7 + fsub.d $ft14, $ft13, $ft14 + fabs.d $ft14, $ft14 + fcmp.cule.d $fcc0, $ft14, $fs0 + bcnez $fcc0, .LBB0_37 +# %bb.36: # %.preheader.i.i19.i.us.i + # in Loop: Header=BB0_35 Depth=5 move $a3, $a2 addi.w $a2, $a2, 1 - bnez $a3, .LBB0_38 + bnez $a3, .LBB0_35 .p2align 4, , 16 -.LBB0_40: # %_ZNK8sphere_t9intersectERK5ray_t.exit28.i.us.i - # in Loop: Header=BB0_26 Depth=4 - fadd.d $ft11, $ft10, $ft12 - fsub.d $ft10, $ft10, $ft12 +.LBB0_37: # %_ZNK8sphere_t9intersectERK5ray_t.exit28.i.us.i + # in Loop: Header=BB0_25 Depth=4 + fadd.d $ft12, $ft10, $ft13 + fsub.d $ft10, $ft10, $ft13 fcmp.clt.d $fcc0, $ft9, $ft10 - fsel $ft10, $ft11, $ft10, $fcc0 - fcmp.clt.d $fcc0, $ft11, $ft9 - fsel $ft10, $ft10, $fs4, $fcc0 + fsel $ft10, $ft12, $ft10, $fcc0 + fcmp.clt.d $fcc0, $ft12, $ft9 + fsel $ft10, $ft10, $ft11, $fcc0 fcmp.cule.d $fcc0, $ft4, $ft10 - bcnez $fcc0, .LBB0_25 -# %bb.41: # in Loop: Header=BB0_26 Depth=4 + bcnez $fcc0, .LBB0_24 +# %bb.38: # in Loop: Header=BB0_25 Depth=4 fmul.d $ft0, $fa5, $ft10 fmul.d $ft1, $fa6, $ft10 fmul.d $ft2, $fa7, $ft10 fadd.d $ft0, $ft0, $ft9 fadd.d $ft1, $ft1, $ft9 - fadd.d $ft2, $ft2, $ft15 + fadd.d $ft2, $ft2, $fs6 fsub.d $ft0, $ft0, $ft6 fsub.d $ft1, $ft1, $ft7 fsub.d $ft3, $ft2, $ft5 @@ -561,9 +537,23 @@ main: # @main fmul.d $ft0, $ft4, $ft3 fmov.d $ft3, $ft10 fmov.d $ft4, $ft10 - b .LBB0_25 + b .LBB0_24 .p2align 4, , 16 -.LBB0_42: # in Loop: Header=BB0_20 Depth=3 +.LBB0_39: # in Loop: Header=BB0_25 Depth=4 + ld.d $a2, $a1, 64 + slli.d $a3, $a2, 6 + alsl.d $a2, $a2, $a3, 3 + add.d $a1, $a1, $a2 + bltu $a1, $s3, .LBB0_25 +.LBB0_40: # %_ZN6node_t9intersectILb0EEEvRK5ray_tR5hit_t.exit.us.i + # in Loop: Header=BB0_19 Depth=3 + fclass.d $ft4, $ft3 + movfr2gr.d $a1, $ft4 + andi $a1, $a1, 64 + sltu $a1, $zero, $a1 + andi $a1, $a1, 1 + bnez $a1, .LBB0_17 +# %bb.41: # in Loop: Header=BB0_19 Depth=3 fmul.d $ft4, $fa0, $ft2 fmul.d $ft5, $fa1, $ft1 fadd.d $ft4, $ft5, $ft4 @@ -571,8 +561,8 @@ main: # @main fadd.d $ft5, $ft5, $ft4 movgr2fr.d $ft4, $zero fcmp.cle.d $fcc0, $ft4, $ft5 - bcnez $fcc0, .LBB0_19 -# %bb.43: # in Loop: Header=BB0_20 Depth=3 + bcnez $fcc0, .LBB0_18 +# %bb.42: # in Loop: Header=BB0_19 Depth=3 fneg.d $ft4, $ft5 fmul.d $ft5, $fa5, $ft3 fmul.d $fa6, $fa6, $ft3 @@ -580,7 +570,7 @@ main: # @main movgr2fr.d $fa5, $zero fadd.d $ft3, $ft5, $fa5 fadd.d $ft5, $fa6, $fa5 - fadd.d $ft6, $fa7, $ft15 + fadd.d $ft6, $fa7, $fs6 fmul.d $fa6, $ft2, $fs0 fmul.d $fa7, $ft1, $fs0 fmul.d $ft0, $ft0, $fs0 @@ -588,22 +578,22 @@ main: # @main fadd.d $fa7, $ft5, $fa7 fadd.d $ft0, $ft0, $ft6 move $a1, $s0 - b .LBB0_45 + b .LBB0_44 .p2align 4, , 16 -.LBB0_44: # %_ZNK8sphere_t9intersectERK5ray_t.exit.i.thread.us.i - # in Loop: Header=BB0_45 Depth=4 +.LBB0_43: # %_ZNK8sphere_t9intersectERK5ray_t.exit.i.thread.us.i + # in Loop: Header=BB0_44 Depth=4 ld.d $a2, $a1, 64 slli.d $a3, $a2, 6 alsl.d $a2, $a2, $a3, 3 add.d $a1, $a1, $a2 - bgeu $a1, $s3, .LBB0_62 -.LBB0_45: # %.lr.ph.i.us.i + bgeu $a1, $s3, .LBB0_61 +.LBB0_44: # %.lr.ph.i.us.i # Parent Loop BB0_12 Depth=1 # Parent Loop BB0_15 Depth=2 - # Parent Loop BB0_20 Depth=3 + # Parent Loop BB0_19 Depth=3 # => This Loop Header: Depth=4 - # Child Loop BB0_49 Depth 5 - # Child Loop BB0_57 Depth 5 + # Child Loop BB0_48 Depth 5 + # Child Loop BB0_56 Depth 5 fld.d $ft1, $a1, 0 fld.d $ft2, $a1, 8 fld.d $ft3, $a1, 16 @@ -626,49 +616,49 @@ main: # @main fmul.d $ft3, $ft5, $ft5 fadd.d $ft2, $ft3, $ft2 fcmp.clt.d $fcc0, $ft2, $fa5 - bcnez $fcc0, .LBB0_44 -# %bb.46: # in Loop: Header=BB0_45 Depth=4 + bcnez $fcc0, .LBB0_43 +# %bb.45: # in Loop: Header=BB0_44 Depth=4 fclass.d $ft3, $ft2 movfr2gr.d $a2, $ft3 andi $a2, $a2, 64 sltu $a2, $zero, $a2 andi $a2, $a2, 1 - beqz $a2, .LBB0_48 -# %bb.47: # in Loop: Header=BB0_45 Depth=4 - fld.d $ft3, $s6, %pc_lo12(.LCPI0_9) - b .LBB0_51 -.LBB0_48: # %.preheader.i.i.i.us.i.preheader - # in Loop: Header=BB0_45 Depth=4 + beqz $a2, .LBB0_47 +# %bb.46: # in Loop: Header=BB0_44 Depth=4 + movgr2fr.d $ft3, $s6 + b .LBB0_50 +.LBB0_47: # %.preheader.i.i.i.us.i.preheader + # in Loop: Header=BB0_44 Depth=4 addi.w $a2, $zero, -99 vldi $vr11, -912 .p2align 4, , 16 -.LBB0_49: # %.preheader.i.i.i.us.i +.LBB0_48: # %.preheader.i.i.i.us.i # Parent Loop BB0_12 Depth=1 # Parent Loop BB0_15 Depth=2 - # Parent Loop BB0_20 Depth=3 - # Parent Loop BB0_45 Depth=4 + # Parent Loop BB0_19 Depth=3 + # Parent Loop BB0_44 Depth=4 # => This Inner Loop Header: Depth=5 fmov.d $ft5, $ft3 fdiv.d $ft3, $ft2, $ft3 fadd.d $ft3, $ft5, $ft3 - fmul.d $ft3, $ft3, $fs6 + fmul.d $ft3, $ft3, $fs7 fsub.d $ft5, $ft3, $ft5 fabs.d $ft5, $ft5 fcmp.cule.d $fcc0, $ft5, $fs0 - bcnez $fcc0, .LBB0_51 -# %bb.50: # %.preheader.i.i.i.us.i - # in Loop: Header=BB0_49 Depth=5 + bcnez $fcc0, .LBB0_50 +# %bb.49: # %.preheader.i.i.i.us.i + # in Loop: Header=BB0_48 Depth=5 move $a3, $a2 addi.w $a2, $a2, 1 - bnez $a3, .LBB0_49 + bnez $a3, .LBB0_48 .p2align 4, , 16 -.LBB0_51: # %_ZL8LLVMsqrtd.exit.i.i.us.i - # in Loop: Header=BB0_45 Depth=4 +.LBB0_50: # %_ZL8LLVMsqrtd.exit.i.i.us.i + # in Loop: Header=BB0_44 Depth=4 fadd.d $ft2, $ft1, $ft3 fcmp.clt.d $fcc0, $ft2, $fa5 - bcnez $fcc0, .LBB0_44 -# %bb.52: # %_ZL8LLVMsqrtd.exit.i.i.us.i - # in Loop: Header=BB0_45 Depth=4 + bcnez $fcc0, .LBB0_43 +# %bb.51: # %_ZL8LLVMsqrtd.exit.i.i.us.i + # in Loop: Header=BB0_44 Depth=4 fsub.d $ft1, $ft1, $ft3 fcmp.clt.d $fcc0, $fa5, $ft1 fsel $ft1, $ft2, $ft1, $fcc0 @@ -677,8 +667,8 @@ main: # @main andi $a2, $a2, 64 sltu $a2, $zero, $a2 andi $a2, $a2, 1 - bnez $a2, .LBB0_44 -# %bb.53: # in Loop: Header=BB0_45 Depth=4 + bnez $a2, .LBB0_43 +# %bb.52: # in Loop: Header=BB0_44 Depth=4 fld.d $ft1, $a1, 32 fld.d $ft2, $a1, 40 fld.d $ft3, $a1, 48 @@ -701,61 +691,62 @@ main: # @main fmul.d $ft3, $ft5, $ft5 fadd.d $ft2, $ft3, $ft2 fcmp.clt.d $fcc0, $ft2, $fa5 - bcnez $fcc0, .LBB0_61 -# %bb.54: # in Loop: Header=BB0_45 Depth=4 + bcnez $fcc0, .LBB0_60 +# %bb.53: # in Loop: Header=BB0_44 Depth=4 fclass.d $ft3, $ft2 movfr2gr.d $a2, $ft3 andi $a2, $a2, 64 sltu $a2, $zero, $a2 andi $a2, $a2, 1 - beqz $a2, .LBB0_56 -# %bb.55: # in Loop: Header=BB0_45 Depth=4 - fld.d $ft3, $s6, %pc_lo12(.LCPI0_9) - b .LBB0_59 -.LBB0_56: # %.preheader.i.i15.i.us.i.preheader - # in Loop: Header=BB0_45 Depth=4 + beqz $a2, .LBB0_55 +# %bb.54: # in Loop: Header=BB0_44 Depth=4 + movgr2fr.d $ft3, $s6 + b .LBB0_58 +.LBB0_55: # %.preheader.i.i15.i.us.i.preheader + # in Loop: Header=BB0_44 Depth=4 vldi $vr11, -912 move $a2, $s7 .p2align 4, , 16 -.LBB0_57: # %.preheader.i.i15.i.us.i +.LBB0_56: # %.preheader.i.i15.i.us.i # Parent Loop BB0_12 Depth=1 # Parent Loop BB0_15 Depth=2 - # Parent Loop BB0_20 Depth=3 - # Parent Loop BB0_45 Depth=4 + # Parent Loop BB0_19 Depth=3 + # Parent Loop BB0_44 Depth=4 # => This Inner Loop Header: Depth=5 fmov.d $ft5, $ft3 fdiv.d $ft3, $ft2, $ft3 fadd.d $ft3, $ft5, $ft3 - fmul.d $ft3, $ft3, $fs6 + fmul.d $ft3, $ft3, $fs7 fsub.d $ft5, $ft3, $ft5 fabs.d $ft5, $ft5 fcmp.cule.d $fcc0, $ft5, $fs0 - bcnez $fcc0, .LBB0_59 -# %bb.58: # %.preheader.i.i15.i.us.i - # in Loop: Header=BB0_57 Depth=5 + bcnez $fcc0, .LBB0_58 +# %bb.57: # %.preheader.i.i15.i.us.i + # in Loop: Header=BB0_56 Depth=5 move $a3, $a2 addi.w $a2, $a2, 1 - bnez $a3, .LBB0_57 -.LBB0_59: # %_ZL8LLVMsqrtd.exit.i20.i.us.i - # in Loop: Header=BB0_45 Depth=4 + bnez $a3, .LBB0_56 +.LBB0_58: # %_ZL8LLVMsqrtd.exit.i20.i.us.i + # in Loop: Header=BB0_44 Depth=4 fadd.d $ft2, $ft1, $ft3 fcmp.clt.d $fcc0, $ft2, $fa5 - bcnez $fcc0, .LBB0_61 -# %bb.60: # %_ZL8LLVMsqrtd.exit.i20.i.us.i - # in Loop: Header=BB0_45 Depth=4 + bcnez $fcc0, .LBB0_60 +# %bb.59: # %_ZL8LLVMsqrtd.exit.i20.i.us.i + # in Loop: Header=BB0_44 Depth=4 fsub.d $ft1, $ft1, $ft3 fcmp.clt.d $fcc0, $fa5, $ft1 fsel $ft1, $ft2, $ft1, $fcc0 - fcmp.cne.d $fcc0, $ft1, $fs4 - bcnez $fcc0, .LBB0_63 -.LBB0_61: # %.thread.i.us.i - # in Loop: Header=BB0_45 Depth=4 + movgr2fr.d $ft2, $s6 + fcmp.cne.d $fcc0, $ft1, $ft2 + bcnez $fcc0, .LBB0_62 +.LBB0_60: # %.thread.i.us.i + # in Loop: Header=BB0_44 Depth=4 addi.d $a1, $a1, 72 - bltu $a1, $s3, .LBB0_45 -.LBB0_62: # in Loop: Header=BB0_20 Depth=3 - fld.d $ft1, $s6, %pc_lo12(.LCPI0_9) -.LBB0_63: # %_ZN6node_t9intersectILb1EEEvRK5ray_tR5hit_t.exit.us.i - # in Loop: Header=BB0_20 Depth=3 + bltu $a1, $s3, .LBB0_44 +.LBB0_61: # in Loop: Header=BB0_19 Depth=3 + movgr2fr.d $ft1, $s6 +.LBB0_62: # %_ZN6node_t9intersectILb1EEEvRK5ray_tR5hit_t.exit.us.i + # in Loop: Header=BB0_19 Depth=3 fclass.d $fa6, $ft1 movfr2gr.d $a1, $fa6 andi $a1, $a1, 64 @@ -763,24 +754,24 @@ main: # @main andi $a1, $a1, 1 movgr2cf $fcc0, $a1 fsel $ft4, $fa5, $ft4, $fcc0 - b .LBB0_19 + b .LBB0_18 .p2align 4, , 16 -.LBB0_64: # in Loop: Header=BB0_15 Depth=2 +.LBB0_63: # in Loop: Header=BB0_15 Depth=2 move $a1, $zero b .LBB0_14 -.LBB0_65: +.LBB0_64: ld.d $a0, $s1, 0 ld.d $a0, $a0, -24 add.d $a0, $s1, $a0 ld.d $fp, $a0, 240 - beqz $fp, .LBB0_70 -# %bb.66: # %_ZSt13__check_facetISt5ctypeIcEERKT_PS3_.exit.i.i.i + beqz $fp, .LBB0_69 +# %bb.65: # %_ZSt13__check_facetISt5ctypeIcEERKT_PS3_.exit.i.i.i ld.bu $a0, $fp, 56 - beqz $a0, .LBB0_68 -# %bb.67: + beqz $a0, .LBB0_67 +# %bb.66: ld.bu $a0, $fp, 67 - b .LBB0_69 -.LBB0_68: + b .LBB0_68 +.LBB0_67: move $a0, $fp pcaddu18i $ra, %call36(_ZNKSt5ctypeIcE13_M_widen_initEv) jirl $ra, $ra, 0 @@ -789,7 +780,7 @@ main: # @main ori $a1, $zero, 10 move $a0, $fp jirl $ra, $a2, 0 -.LBB0_69: # %_ZL10trace_rgssii.exit +.LBB0_68: # %_ZL10trace_rgssii.exit ext.w.b $a1, $a0 pcalau12i $a0, %got_pc_hi20(_ZSt4cout) ld.d $a0, $a0, %got_pc_lo12(_ZSt4cout) @@ -798,6 +789,7 @@ main: # @main pcaddu18i $ra, %call36(_ZNSo5flushEv) jirl $ra, $ra, 0 move $a0, $zero + fld.d $fs7, $sp, 168 # 8-byte Folded Reload fld.d $fs6, $sp, 176 # 8-byte Folded Reload fld.d $fs5, $sp, 184 # 8-byte Folded Reload fld.d $fs4, $sp, 192 # 8-byte Folded Reload @@ -818,70 +810,39 @@ main: # @main ld.d $ra, $sp, 312 # 8-byte Folded Reload addi.d $sp, $sp, 320 ret -.LBB0_70: +.LBB0_69: pcaddu18i $ra, %call36(_ZSt16__throw_bad_castv) jirl $ra, $ra, 0 .Lfunc_end0: .size main, .Lfunc_end0-main .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZL6createP6node_tii3v_tS1_d -.LCPI1_0: - .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI1_1: - .dword 0x401921fb54411744 # double 6.2831853070000001 -.LCPI1_2: - .dword 0xc01921fb54411744 # double -6.2831853070000001 -.LCPI1_3: - .dword 0x4012d97c7f713e20 # double 4.7123889840000004 -.LCPI1_4: - .dword 0x400921fb5496fd7f # double 3.1415926559999998 -.LCPI1_5: - .dword 0x3ff921fb54524550 # double 1.570796327 -.LCPI1_6: - .dword 0xc00921fb5496fd7f # double -3.1415926559999998 -.LCPI1_7: - .dword 0x405e000000000000 # double 120 -.LCPI1_8: - .dword 0x7ff0000000000000 # double +Inf -.LCPI1_9: - .dword 0x3d719799812dea11 # double 9.9999999999999998E-13 -.LCPI1_10: - .dword 0x3ff0c152382d7365 # double 1.0471975511965976 -.LCPI1_11: - .dword 0xbfd657184ae74487 # double -0.3490658503988659 -.LCPI1_12: - .dword 0x3fe3333333333333 # double 0.59999999999999998 -.LCPI1_13: - .dword 0x4000c152382d7365 # double 2.0943951023931953 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL6createP6node_tii3v_tS1_d .type _ZL6createP6node_tii3v_tS1_d,@function _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d .cfi_startproc # %bb.0: - addi.d $sp, $sp, -464 - .cfi_def_cfa_offset 464 - st.d $ra, $sp, 456 # 8-byte Folded Spill - st.d $fp, $sp, 448 # 8-byte Folded Spill - st.d $s0, $sp, 440 # 8-byte Folded Spill - st.d $s1, $sp, 432 # 8-byte Folded Spill - st.d $s2, $sp, 424 # 8-byte Folded Spill - st.d $s3, $sp, 416 # 8-byte Folded Spill - st.d $s4, $sp, 408 # 8-byte Folded Spill - st.d $s5, $sp, 400 # 8-byte Folded Spill - st.d $s6, $sp, 392 # 8-byte Folded Spill - st.d $s7, $sp, 384 # 8-byte Folded Spill - st.d $s8, $sp, 376 # 8-byte Folded Spill - fst.d $fs0, $sp, 368 # 8-byte Folded Spill - fst.d $fs1, $sp, 360 # 8-byte Folded Spill - fst.d $fs2, $sp, 352 # 8-byte Folded Spill - fst.d $fs3, $sp, 344 # 8-byte Folded Spill - fst.d $fs4, $sp, 336 # 8-byte Folded Spill - fst.d $fs5, $sp, 328 # 8-byte Folded Spill - fst.d $fs6, $sp, 320 # 8-byte Folded Spill - fst.d $fs7, $sp, 312 # 8-byte Folded Spill + addi.d $sp, $sp, -480 + .cfi_def_cfa_offset 480 + st.d $ra, $sp, 472 # 8-byte Folded Spill + st.d $fp, $sp, 464 # 8-byte Folded Spill + st.d $s0, $sp, 456 # 8-byte Folded Spill + st.d $s1, $sp, 448 # 8-byte Folded Spill + st.d $s2, $sp, 440 # 8-byte Folded Spill + st.d $s3, $sp, 432 # 8-byte Folded Spill + st.d $s4, $sp, 424 # 8-byte Folded Spill + st.d $s5, $sp, 416 # 8-byte Folded Spill + st.d $s6, $sp, 408 # 8-byte Folded Spill + st.d $s7, $sp, 400 # 8-byte Folded Spill + st.d $s8, $sp, 392 # 8-byte Folded Spill + fst.d $fs0, $sp, 384 # 8-byte Folded Spill + fst.d $fs1, $sp, 376 # 8-byte Folded Spill + fst.d $fs2, $sp, 368 # 8-byte Folded Spill + fst.d $fs3, $sp, 360 # 8-byte Folded Spill + fst.d $fs4, $sp, 352 # 8-byte Folded Spill + fst.d $fs5, $sp, 344 # 8-byte Folded Spill + fst.d $fs6, $sp, 336 # 8-byte Folded Spill + fst.d $fs7, $sp, 328 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -924,26 +885,26 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d addi.d $a0, $a0, 72 bge $s2, $a3, .LBB1_2 .LBB1_1: - fld.d $fs7, $sp, 312 # 8-byte Folded Reload - fld.d $fs6, $sp, 320 # 8-byte Folded Reload - fld.d $fs5, $sp, 328 # 8-byte Folded Reload - fld.d $fs4, $sp, 336 # 8-byte Folded Reload - fld.d $fs3, $sp, 344 # 8-byte Folded Reload - fld.d $fs2, $sp, 352 # 8-byte Folded Reload - fld.d $fs1, $sp, 360 # 8-byte Folded Reload - fld.d $fs0, $sp, 368 # 8-byte Folded Reload - ld.d $s8, $sp, 376 # 8-byte Folded Reload - ld.d $s7, $sp, 384 # 8-byte Folded Reload - ld.d $s6, $sp, 392 # 8-byte Folded Reload - ld.d $s5, $sp, 400 # 8-byte Folded Reload - ld.d $s4, $sp, 408 # 8-byte Folded Reload - ld.d $s3, $sp, 416 # 8-byte Folded Reload - ld.d $s2, $sp, 424 # 8-byte Folded Reload - ld.d $s1, $sp, 432 # 8-byte Folded Reload - ld.d $s0, $sp, 440 # 8-byte Folded Reload - ld.d $fp, $sp, 448 # 8-byte Folded Reload - ld.d $ra, $sp, 456 # 8-byte Folded Reload - addi.d $sp, $sp, 464 + fld.d $fs7, $sp, 328 # 8-byte Folded Reload + fld.d $fs6, $sp, 336 # 8-byte Folded Reload + fld.d $fs5, $sp, 344 # 8-byte Folded Reload + fld.d $fs4, $sp, 352 # 8-byte Folded Reload + fld.d $fs3, $sp, 360 # 8-byte Folded Reload + fld.d $fs2, $sp, 368 # 8-byte Folded Reload + fld.d $fs1, $sp, 376 # 8-byte Folded Reload + fld.d $fs0, $sp, 384 # 8-byte Folded Reload + ld.d $s8, $sp, 392 # 8-byte Folded Reload + ld.d $s7, $sp, 400 # 8-byte Folded Reload + ld.d $s6, $sp, 408 # 8-byte Folded Reload + ld.d $s5, $sp, 416 # 8-byte Folded Reload + ld.d $s4, $sp, 424 # 8-byte Folded Reload + ld.d $s3, $sp, 432 # 8-byte Folded Reload + ld.d $s2, $sp, 440 # 8-byte Folded Reload + ld.d $s1, $sp, 448 # 8-byte Folded Reload + ld.d $s0, $sp, 456 # 8-byte Folded Reload + ld.d $fp, $sp, 464 # 8-byte Folded Reload + ld.d $ra, $sp, 472 # 8-byte Folded Reload + addi.d $sp, $sp, 480 ret .LBB1_2: addi.w $a2, $a2, -9 @@ -958,7 +919,7 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d masknez $a1, $a1, $a3 or $s1, $a2, $a1 move $s3, $a0 - addi.d $a0, $sp, 216 + addi.d $a0, $sp, 232 move $a1, $fp pcaddu18i $ra, %call36(_ZN7basis_tC2ERK3v_t) jirl $ra, $ra, 0 @@ -967,44 +928,63 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d vldi $vr0, -1016 fdiv.d $fa0, $fs1, $fa0 addi.w $s2, $s2, -1 - fst.d $fa0, $sp, 88 # 8-byte Folded Spill + fst.d $fa0, $sp, 104 # 8-byte Folded Spill fadd.d $fa0, $fs1, $fa0 - vst $vr0, $sp, 64 # 16-byte Folded Spill + vst $vr0, $sp, 80 # 16-byte Folded Spill vreplvei.d $vr0, $vr0, 0 - vst $vr0, $sp, 48 # 16-byte Folded Spill + vst $vr0, $sp, 64 # 16-byte Folded Spill movgr2fr.d $fs6, $zero lu12i.w $a1, -419431 ori $a1, $a1, 2458 lu32i.d $a1, -419431 lu52i.d $a1, $a1, 1020 vreplgr2vr.d $vr0, $a1 - vst $vr0, $sp, 32 # 16-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI1_0) - fst.d $fa0, $sp, 24 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI1_1) - fld.d $fs1, $a1, %pc_lo12(.LCPI1_1) - pcalau12i $a1, %pc_hi20(.LCPI1_2) - fld.d $fs2, $a1, %pc_lo12(.LCPI1_2) - pcalau12i $a1, %pc_hi20(.LCPI1_3) - fld.d $fs3, $a1, %pc_lo12(.LCPI1_3) - pcalau12i $a1, %pc_hi20(.LCPI1_7) - fld.d $fs4, $a1, %pc_lo12(.LCPI1_7) - pcalau12i $a1, %pc_hi20(.LCPI1_5) - fld.d $fs5, $a1, %pc_lo12(.LCPI1_5) - pcalau12i $a1, %pc_hi20(.LCPI1_8) - st.d $a1, $sp, 8 # 8-byte Folded Spill - fld.d $fa0, $a1, %pc_lo12(.LCPI1_8) - fst.d $fa0, $sp, 16 # 8-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI1_10) - fld.d $fs0, $a1, %pc_lo12(.LCPI1_10) - ori $s3, $zero, 6 - addi.w $s8, $zero, -99 + vst $vr0, $sp, 48 # 16-byte Folded Spill + movgr2fr.d $fa0, $a1 + fst.d $fa0, $sp, 40 # 8-byte Folded Spill + lu12i.w $a1, 345105 + ori $a1, $a1, 1860 + lu32i.d $a1, -450053 + lu52i.d $a2, $a1, 1025 + movgr2fr.d $fs1, $a2 + lu52i.d $a1, $a1, -1023 + movgr2fr.d $fs2, $a1 + lu12i.w $a1, 522003 + ori $a1, $a1, 3616 + lu32i.d $a1, 186748 + lu52i.d $a1, $a1, 1025 + movgr2fr.d $fs3, $a1 + ori $a1, $zero, 0 + lu32i.d $a1, -131072 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fs7, $a1 + lu12i.w $a1, 345380 + ori $a1, $a1, 1360 + lu32i.d $a1, -450053 + lu52i.d $a1, $a1, 1023 + movgr2fr.d $fs4, $a1 + lu52i.d $a1, $zero, 2047 + movgr2fr.d $fa0, $a1 + fst.d $fa0, $sp, 32 # 8-byte Folded Spill + lu12i.w $a1, 230103 + ori $a1, $a1, 869 + lu32i.d $a1, 49490 + st.d $a1, $sp, 16 # 8-byte Folded Spill + lu52i.d $s8, $a1, 1023 + ori $s6, $zero, 6 + addi.w $a1, $zero, -99 + st.d $a1, $sp, 24 # 8-byte Folded Spill vldi $vr11, -928 - pcalau12i $s4, %pc_hi20(.LCPI1_9) - pcalau12i $s5, %pc_hi20(.LCPI1_4) - pcalau12i $s6, %pc_hi20(.LCPI1_6) - fmov.d $fs7, $fs6 + lu12i.w $a1, -519458 + ori $a1, $a1, 2577 + lu32i.d $a1, 104345 + lu52i.d $s4, $a1, 983 + lu12i.w $a1, 346479 + ori $a1, $a1, 3455 + lu32i.d $a1, -450053 + lu52i.d $s3, $a1, 1024 + lu52i.d $s5, $a1, -1024 + fmov.d $fs0, $fs6 b .LBB1_4 .p2align 4, , 16 .LBB1_3: # %_ZNK3v_t4normEv.exit @@ -1012,38 +992,39 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d fld.d $fa2, $s0, 16 frecip.d $fa3, $fa3 fmul.d $fa1, $fa1, $fa3 - vld $vr4, $sp, 64 # 16-byte Folded Reload + vld $vr4, $sp, 80 # 16-byte Folded Reload fmul.d $fa4, $fa4, $fa1 fadd.d $fa2, $fa4, $fa2 vld $vr4, $s0, 0 vreplvei.d $vr3, $vr3, 0 vfmul.d $vr0, $vr0, $vr3 - vld $vr3, $sp, 48 # 16-byte Folded Reload + vld $vr3, $sp, 64 # 16-byte Folded Reload vfmul.d $vr3, $vr3, $vr0 vfadd.d $vr3, $vr4, $vr3 - vst $vr3, $sp, 192 - fst.d $fa2, $sp, 208 - vst $vr0, $sp, 160 - fst.d $fa1, $sp, 176 - addi.d $a3, $sp, 192 - addi.d $a4, $sp, 160 + vst $vr3, $sp, 208 + fst.d $fa2, $sp, 224 + vst $vr0, $sp, 176 + fst.d $fa1, $sp, 192 + addi.d $a3, $sp, 208 + addi.d $a4, $sp, 176 move $a1, $s2 move $a2, $s1 - fld.d $fa0, $sp, 88 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZL6createP6node_tii3v_tS1_d) jirl $ra, $ra, 0 vldi $vr11, -928 + movgr2fr.d $fa0, $s8 addi.w $s7, $s7, 1 - fadd.d $fs7, $fs7, $fs0 - beq $s7, $s3, .LBB1_29 + fadd.d $fs0, $fs0, $fa0 + beq $s7, $s6, .LBB1_29 .LBB1_4: # =>This Loop Header: Depth=1 # Child Loop BB1_5 Depth 2 # Child Loop BB1_7 Depth 2 # Child Loop BB1_15 Depth 2 # Child Loop BB1_17 Depth 2 # Child Loop BB1_27 Depth 2 - fcmp.cule.d $fcc0, $fs6, $fs7 - fmov.d $fa0, $fs7 + fcmp.cule.d $fcc0, $fs6, $fs0 + fmov.d $fa0, $fs0 bcnez $fcc0, .LBB1_6 .p2align 4, , 16 .LBB1_5: # %.lr.ph.i @@ -1066,37 +1047,37 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d .LBB1_8: # %._crit_edge.i # in Loop: Header=BB1_4 Depth=1 fcmp.cule.d $fcc0, $fa0, $fs3 + movgr2fr.d $fs5, $s3 bcnez $fcc0, .LBB1_10 # %bb.9: # in Loop: Header=BB1_4 Depth=1 fsub.d $fa0, $fs1, $fa0 vldi $vr2, -784 - fadd.d $fa1, $fs7, $fs5 + fadd.d $fa1, $fs0, $fs4 fcmp.cule.d $fcc0, $fs6, $fa1 bceqz $fcc0, .LBB1_15 b .LBB1_16 .p2align 4, , 16 .LBB1_10: # in Loop: Header=BB1_4 Depth=1 - fld.d $fa1, $s5, %pc_lo12(.LCPI1_4) - fcmp.cule.d $fcc0, $fa0, $fa1 + fcmp.cule.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB1_12 # %bb.11: # in Loop: Header=BB1_4 Depth=1 - fld.d $fa1, $s6, %pc_lo12(.LCPI1_6) + movgr2fr.d $fa1, $s5 fadd.d $fa0, $fa0, $fa1 vldi $vr2, -784 - fadd.d $fa1, $fs7, $fs5 + fadd.d $fa1, $fs0, $fs4 fcmp.cule.d $fcc0, $fs6, $fa1 bceqz $fcc0, .LBB1_15 b .LBB1_16 .p2align 4, , 16 .LBB1_12: # in Loop: Header=BB1_4 Depth=1 - fcmp.cule.d $fcc0, $fa0, $fs5 + fcmp.cule.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB1_14 # %bb.13: # in Loop: Header=BB1_4 Depth=1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fs5, $fa0 .LBB1_14: # %_ZL7LLVMsind.exit # in Loop: Header=BB1_4 Depth=1 vldi $vr2, -912 - fadd.d $fa1, $fs7, $fs5 + fadd.d $fa1, $fs0, $fs4 fcmp.cule.d $fcc0, $fs6, $fa1 bcnez $fcc0, .LBB1_16 .p2align 4, , 16 @@ -1127,20 +1108,19 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d b .LBB1_25 .p2align 4, , 16 .LBB1_20: # in Loop: Header=BB1_4 Depth=1 - fld.d $fa3, $s5, %pc_lo12(.LCPI1_4) - fcmp.cule.d $fcc0, $fa1, $fa3 + fcmp.cule.d $fcc0, $fa1, $fs5 bcnez $fcc0, .LBB1_22 # %bb.21: # in Loop: Header=BB1_4 Depth=1 - fld.d $fa3, $s6, %pc_lo12(.LCPI1_6) + movgr2fr.d $fa3, $s5 fadd.d $fa1, $fa1, $fa3 vldi $vr3, -784 b .LBB1_25 .p2align 4, , 16 .LBB1_22: # in Loop: Header=BB1_4 Depth=1 - fcmp.cule.d $fcc0, $fa1, $fs5 + fcmp.cule.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB1_24 # %bb.23: # in Loop: Header=BB1_4 Depth=1 - fsub.d $fa1, $fa3, $fa1 + fsub.d $fa1, $fs5, $fa1 .LBB1_24: # %_ZL7LLVMcosd.exit # in Loop: Header=BB1_4 Depth=1 vldi $vr3, -912 @@ -1148,9 +1128,9 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d # in Loop: Header=BB1_4 Depth=1 vld $vr4, $fp, 0 fld.d $fa5, $fp, 16 - vld $vr6, $sp, 32 # 16-byte Folded Reload + vld $vr6, $sp, 48 # 16-byte Folded Reload vfmul.d $vr4, $vr4, $vr6 - fld.d $fa6, $sp, 24 # 8-byte Folded Reload + fld.d $fa6, $sp, 40 # 8-byte Folded Reload fmul.d $fa5, $fa5, $fa6 fmul.d $fa6, $fa0, $fa0 fmul.d $fa6, $fa0, $fa6 @@ -1158,7 +1138,7 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d fmul.d $fa7, $fa0, $fa7 vldi $vr8, -872 fdiv.d $fa6, $fa6, $ft0 - fdiv.d $fa7, $fa7, $fs4 + fdiv.d $fa7, $fa7, $fs7 fadd.d $fa0, $fa0, $fa6 fadd.d $fa0, $fa0, $fa7 fmul.d $fa0, $fa2, $fa0 @@ -1167,8 +1147,8 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d fsel $fa0, $fa0, $ft1, $fcc0 vldi $vr10, -784 fcmp.clt.d $fcc0, $fa0, $ft2 - vld $vr2, $sp, 240 - fld.d $fa6, $sp, 256 + vld $vr2, $sp, 256 + fld.d $fa6, $sp, 272 fsel $fa0, $fa0, $ft2, $fcc0 vreplvei.d $vr7, $vr0, 0 vfmul.d $vr2, $vr2, $vr7 @@ -1180,15 +1160,15 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d fmul.d $fa5, $fa1, $fa0 fmul.d $fa5, $fa1, $fa5 fdiv.d $fa0, $fa0, $ft0 - fdiv.d $fa5, $fa5, $fs4 + fdiv.d $fa5, $fa5, $fs7 fadd.d $fa0, $fa1, $fa0 fadd.d $fa0, $fa0, $fa5 fmul.d $fa0, $fa3, $fa0 fcmp.clt.d $fcc0, $ft1, $fa0 fsel $fa0, $fa0, $ft1, $fcc0 - fld.d $fa1, $sp, 280 + fld.d $fa1, $sp, 296 fcmp.clt.d $fcc0, $fa0, $ft2 - vld $vr3, $sp, 264 + vld $vr3, $sp, 280 fsel $fa0, $fa0, $ft2, $fcc0 fmul.d $fa1, $fa1, $fa0 vreplvei.d $vr0, $vr0, 0 @@ -1206,13 +1186,13 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d andi $a1, $a1, 64 sltu $a1, $zero, $a1 andi $a1, $a1, 1 - fld.d $fa3, $sp, 16 # 8-byte Folded Reload + fld.d $fa3, $sp, 32 # 8-byte Folded Reload # kill: def $f3_64 killed $f3_64 def $vr3 bnez $a1, .LBB1_3 # %bb.26: # %.preheader.i.i39.preheader # in Loop: Header=BB1_4 Depth=1 vldi $vr3, -912 - move $a1, $s8 + ld.d $a1, $sp, 24 # 8-byte Folded Reload .p2align 4, , 16 .LBB1_27: # %.preheader.i.i39 # Parent Loop BB1_4 Depth=1 @@ -1220,10 +1200,10 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d fmov.d $fa4, $fa3 fdiv.d $fa3, $fa2, $fa3 fadd.d $fa3, $fa4, $fa3 - fld.d $fa5, $s4, %pc_lo12(.LCPI1_9) fmul.d $fa3, $fa3, $ft3 fsub.d $fa4, $fa3, $fa4 fabs.d $fa4, $fa4 + movgr2fr.d $fa5, $s4 fcmp.cule.d $fcc0, $fa4, $fa5 bcnez $fcc0, .LBB1_3 # %bb.28: # %.preheader.i.i39 @@ -1233,58 +1213,65 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d bnez $a2, .LBB1_27 b .LBB1_3 .LBB1_29: - pcalau12i $a1, %pc_hi20(.LCPI1_11) - fld.d $fa0, $a1, %pc_lo12(.LCPI1_11) move $s7, $zero - fadd.d $fs0, $fs7, $fa0 + lu12i.w $a1, 306804 + ori $a1, $a1, 1159 + lu32i.d $a1, 415512 + lu52i.d $a1, $a1, -1027 + movgr2fr.d $fa0, $a1 + fadd.d $fs0, $fs0, $fa0 lu12i.w $a1, 209715 ori $a1, $a1, 819 lu32i.d $a1, 209715 lu52i.d $a1, $a1, 1022 vreplgr2vr.d $vr0, $a1 - vst $vr0, $sp, 32 # 16-byte Folded Spill - pcalau12i $a1, %pc_hi20(.LCPI1_12) - fld.d $fa0, $a1, %pc_lo12(.LCPI1_12) - fst.d $fa0, $sp, 24 # 8-byte Folded Spill + vst $vr0, $sp, 48 # 16-byte Folded Spill + movgr2fr.d $fa0, $a1 + fst.d $fa0, $sp, 40 # 8-byte Folded Spill movgr2fr.d $fs7, $zero - pcalau12i $a1, %pc_hi20(.LCPI1_13) - fld.d $fs6, $a1, %pc_lo12(.LCPI1_13) - ori $s8, $zero, 3 - addi.w $s3, $zero, -99 + ori $a1, $zero, 0 + lu32i.d $a1, -131072 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fs6, $a1 + ld.d $a1, $sp, 16 # 8-byte Folded Reload + lu52i.d $s3, $a1, 1024 + ori $s6, $zero, 3 + addi.w $s8, $zero, -99 b .LBB1_32 .p2align 4, , 16 .LBB1_30: # in Loop: Header=BB1_32 Depth=1 - ld.d $a1, $sp, 8 # 8-byte Folded Reload - fld.d $fa3, $a1, %pc_lo12(.LCPI1_8) + lu52i.d $a1, $zero, 2047 + movgr2fr.d $fa3, $a1 .LBB1_31: # %_ZNK3v_t4normEv.exit72 # in Loop: Header=BB1_32 Depth=1 fld.d $fa2, $s0, 16 frecip.d $fa3, $fa3 fmul.d $fa1, $fa1, $fa3 - vld $vr4, $sp, 64 # 16-byte Folded Reload + vld $vr4, $sp, 80 # 16-byte Folded Reload fmul.d $fa4, $fa4, $fa1 fadd.d $fa2, $fa4, $fa2 vld $vr4, $s0, 0 vreplvei.d $vr3, $vr3, 0 vfmul.d $vr0, $vr0, $vr3 - vld $vr3, $sp, 48 # 16-byte Folded Reload + vld $vr3, $sp, 64 # 16-byte Folded Reload vfmul.d $vr3, $vr3, $vr0 vfadd.d $vr3, $vr4, $vr3 - vst $vr3, $sp, 128 - fst.d $fa2, $sp, 144 - vst $vr0, $sp, 96 - fst.d $fa1, $sp, 112 - addi.d $a3, $sp, 128 - addi.d $a4, $sp, 96 + vst $vr3, $sp, 144 + fst.d $fa2, $sp, 160 + vst $vr0, $sp, 112 + fst.d $fa1, $sp, 128 + addi.d $a3, $sp, 144 + addi.d $a4, $sp, 112 move $a1, $s2 move $a2, $s1 - fld.d $fa0, $sp, 88 # 8-byte Folded Reload + fld.d $fa0, $sp, 104 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZL6createP6node_tii3v_tS1_d) jirl $ra, $ra, 0 vldi $vr11, -928 + movgr2fr.d $fa0, $s3 addi.w $s7, $s7, 1 - fadd.d $fs0, $fs0, $fs6 - beq $s7, $s8, .LBB1_1 + fadd.d $fs0, $fs0, $fa0 + beq $s7, $s6, .LBB1_1 .LBB1_32: # =>This Loop Header: Depth=1 # Child Loop BB1_33 Depth 2 # Child Loop BB1_35 Depth 2 @@ -1319,33 +1306,32 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d # %bb.37: # in Loop: Header=BB1_32 Depth=1 fsub.d $fa0, $fs1, $fa0 vldi $vr2, -784 - fadd.d $fa1, $fs0, $fs5 + fadd.d $fa1, $fs0, $fs4 fcmp.cule.d $fcc0, $fs7, $fa1 bceqz $fcc0, .LBB1_43 b .LBB1_44 .p2align 4, , 16 .LBB1_38: # in Loop: Header=BB1_32 Depth=1 - fld.d $fa1, $s5, %pc_lo12(.LCPI1_4) - fcmp.cule.d $fcc0, $fa0, $fa1 + fcmp.cule.d $fcc0, $fa0, $fs5 bcnez $fcc0, .LBB1_40 # %bb.39: # in Loop: Header=BB1_32 Depth=1 - fld.d $fa1, $s6, %pc_lo12(.LCPI1_6) + movgr2fr.d $fa1, $s5 fadd.d $fa0, $fa0, $fa1 vldi $vr2, -784 - fadd.d $fa1, $fs0, $fs5 + fadd.d $fa1, $fs0, $fs4 fcmp.cule.d $fcc0, $fs7, $fa1 bceqz $fcc0, .LBB1_43 b .LBB1_44 .p2align 4, , 16 .LBB1_40: # in Loop: Header=BB1_32 Depth=1 - fcmp.cule.d $fcc0, $fa0, $fs5 + fcmp.cule.d $fcc0, $fa0, $fs4 bcnez $fcc0, .LBB1_42 # %bb.41: # in Loop: Header=BB1_32 Depth=1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fs5, $fa0 .LBB1_42: # %_ZL7LLVMsind.exit52 # in Loop: Header=BB1_32 Depth=1 vldi $vr2, -912 - fadd.d $fa1, $fs0, $fs5 + fadd.d $fa1, $fs0, $fs4 fcmp.cule.d $fcc0, $fs7, $fa1 bcnez $fcc0, .LBB1_44 .p2align 4, , 16 @@ -1376,20 +1362,19 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d b .LBB1_53 .p2align 4, , 16 .LBB1_48: # in Loop: Header=BB1_32 Depth=1 - fld.d $fa3, $s5, %pc_lo12(.LCPI1_4) - fcmp.cule.d $fcc0, $fa1, $fa3 + fcmp.cule.d $fcc0, $fa1, $fs5 bcnez $fcc0, .LBB1_50 # %bb.49: # in Loop: Header=BB1_32 Depth=1 - fld.d $fa3, $s6, %pc_lo12(.LCPI1_6) + movgr2fr.d $fa3, $s5 fadd.d $fa1, $fa1, $fa3 vldi $vr3, -784 b .LBB1_53 .p2align 4, , 16 .LBB1_50: # in Loop: Header=BB1_32 Depth=1 - fcmp.cule.d $fcc0, $fa1, $fs5 + fcmp.cule.d $fcc0, $fa1, $fs4 bcnez $fcc0, .LBB1_52 # %bb.51: # in Loop: Header=BB1_32 Depth=1 - fsub.d $fa1, $fa3, $fa1 + fsub.d $fa1, $fs5, $fa1 .LBB1_52: # %_ZL7LLVMcosd.exit65 # in Loop: Header=BB1_32 Depth=1 vldi $vr3, -912 @@ -1397,9 +1382,9 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d # in Loop: Header=BB1_32 Depth=1 vld $vr4, $fp, 0 fld.d $fa5, $fp, 16 - vld $vr6, $sp, 32 # 16-byte Folded Reload + vld $vr6, $sp, 48 # 16-byte Folded Reload vfmul.d $vr4, $vr4, $vr6 - fld.d $fa6, $sp, 24 # 8-byte Folded Reload + fld.d $fa6, $sp, 40 # 8-byte Folded Reload fmul.d $fa5, $fa5, $fa6 fmul.d $fa6, $fa0, $fa0 fmul.d $fa6, $fa0, $fa6 @@ -1407,7 +1392,7 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d fmul.d $fa7, $fa0, $fa7 vldi $vr8, -872 fdiv.d $fa6, $fa6, $ft0 - fdiv.d $fa7, $fa7, $fs4 + fdiv.d $fa7, $fa7, $fs6 fadd.d $fa0, $fa0, $fa6 fadd.d $fa0, $fa0, $fa7 fmul.d $fa0, $fa2, $fa0 @@ -1416,8 +1401,8 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d fsel $fa0, $fa0, $ft1, $fcc0 vldi $vr10, -784 fcmp.clt.d $fcc0, $fa0, $ft2 - vld $vr2, $sp, 240 - fld.d $fa6, $sp, 256 + vld $vr2, $sp, 256 + fld.d $fa6, $sp, 272 fsel $fa0, $fa0, $ft2, $fcc0 vreplvei.d $vr7, $vr0, 0 vfmul.d $vr2, $vr2, $vr7 @@ -1429,15 +1414,15 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d fmul.d $fa5, $fa1, $fa0 fmul.d $fa5, $fa1, $fa5 fdiv.d $fa0, $fa0, $ft0 - fdiv.d $fa5, $fa5, $fs4 + fdiv.d $fa5, $fa5, $fs6 fadd.d $fa0, $fa1, $fa0 fadd.d $fa0, $fa0, $fa5 fmul.d $fa0, $fa3, $fa0 fcmp.clt.d $fcc0, $ft1, $fa0 fsel $fa0, $fa0, $ft1, $fcc0 - fld.d $fa1, $sp, 280 + fld.d $fa1, $sp, 296 fcmp.clt.d $fcc0, $fa0, $ft2 - vld $vr3, $sp, 264 + vld $vr3, $sp, 280 fsel $fa0, $fa0, $ft2, $fcc0 fmul.d $fa1, $fa1, $fa0 vreplvei.d $vr0, $vr0, 0 @@ -1459,7 +1444,7 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d # %bb.54: # %.preheader.i.i66.preheader # in Loop: Header=BB1_32 Depth=1 vldi $vr3, -912 - move $a1, $s3 + move $a1, $s8 .p2align 4, , 16 .LBB1_55: # %.preheader.i.i66 # Parent Loop BB1_32 Depth=1 @@ -1467,10 +1452,10 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d fmov.d $fa4, $fa3 fdiv.d $fa3, $fa2, $fa3 fadd.d $fa3, $fa4, $fa3 - fld.d $fa5, $s4, %pc_lo12(.LCPI1_9) fmul.d $fa3, $fa3, $ft3 fsub.d $fa4, $fa3, $fa4 fabs.d $fa4, $fa4 + movgr2fr.d $fa5, $s4 fcmp.cule.d $fcc0, $fa4, $fa5 bcnez $fcc0, .LBB1_31 # %bb.56: # %.preheader.i.i66 @@ -1483,14 +1468,8 @@ _ZL6createP6node_tii3v_tS1_d: # @_ZL6createP6node_tii3v_tS1_d .size _ZL6createP6node_tii3v_tS1_d, .Lfunc_end1-_ZL6createP6node_tii3v_tS1_d .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN7basis_tC2ERK3v_t -.LCPI2_0: - .dword 0x7ff0000000000000 # double +Inf -.LCPI2_1: - .dword 0x3d719799812dea11 # double 9.9999999999999998E-13 .section .text._ZN7basis_tC2ERK3v_t,"axG",@progbits,_ZN7basis_tC2ERK3v_t,comdat - .weak _ZN7basis_tC2ERK3v_t + .weak _ZN7basis_tC2ERK3v_t # -- Begin function _ZN7basis_tC2ERK3v_t .p2align 5 .type _ZN7basis_tC2ERK3v_t,@function _ZN7basis_tC2ERK3v_t: # @_ZN7basis_tC2ERK3v_t @@ -1511,25 +1490,28 @@ _ZN7basis_tC2ERK3v_t: # @_ZN7basis_tC2ERK3v_t andi $a1, $a1, 1 beqz $a1, .LBB2_2 # %bb.1: - pcalau12i $a1, %pc_hi20(.LCPI2_0) - fld.d $fa4, $a1, %pc_lo12(.LCPI2_0) + lu52i.d $a1, $zero, 2047 + movgr2fr.d $fa4, $a1 b .LBB2_5 .LBB2_2: # %.preheader.i.i.preheader - pcalau12i $a1, %pc_hi20(.LCPI2_1) - fld.d $fa5, $a1, %pc_lo12(.LCPI2_1) addi.w $a1, $zero, -99 vldi $vr4, -912 - vldi $vr6, -928 + vldi $vr5, -928 + lu12i.w $a2, -519458 + ori $a2, $a2, 2577 + lu32i.d $a2, 104345 + lu52i.d $a2, $a2, 983 + movgr2fr.d $fa6, $a2 .p2align 4, , 16 .LBB2_3: # %.preheader.i.i # =>This Inner Loop Header: Depth=1 fmov.d $fa7, $fa4 fdiv.d $fa4, $fa3, $fa4 fadd.d $fa4, $fa7, $fa4 - fmul.d $fa4, $fa4, $fa6 + fmul.d $fa4, $fa4, $fa5 fsub.d $fa7, $fa4, $fa7 fabs.d $fa7, $fa7 - fcmp.cule.d $fcc0, $fa7, $fa5 + fcmp.cule.d $fcc0, $fa7, $fa6 bcnez $fcc0, .LBB2_5 # %bb.4: # %.preheader.i.i # in Loop: Header=BB2_3 Depth=1 @@ -1620,38 +1602,34 @@ _ZN7basis_tC2ERK3v_t: # @_ZN7basis_tC2ERK3v_t .size _ZN7basis_tC2ERK3v_t, .Lfunc_end2-_ZN7basis_tC2ERK3v_t .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _GLOBAL__sub_I_sphereflake.cpp -.LCPI3_0: - .dword 0x3ff7b851eb851eb9 # double 1.4825000000000002 -.LCPI3_1: - .dword 0x3d719799812dea11 # double 9.9999999999999998E-13 -.LCPI3_2: - .dword 0xbfe4cccccccccccd # double -0.65000000000000002 -.LCPI3_3: - .dword 0x3feccccccccccccd # double 0.90000000000000002 .section .text.startup,"ax",@progbits - .p2align 5 + .p2align 5 # -- Begin function _GLOBAL__sub_I_sphereflake.cpp .type _GLOBAL__sub_I_sphereflake.cpp,@function _GLOBAL__sub_I_sphereflake.cpp: # @_GLOBAL__sub_I_sphereflake.cpp # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI3_0) - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_1) addi.w $a0, $zero, -99 - vldi $vr3, -912 + vldi $vr0, -912 + lu12i.w $a1, -83887 + ori $a1, $a1, 3769 + lu32i.d $a1, 505937 + lu52i.d $a1, $a1, 1023 + movgr2fr.d $fa1, $a1 vldi $vr2, -928 + lu12i.w $a1, -519458 + ori $a1, $a1, 2577 + lu32i.d $a1, 104345 + lu52i.d $a1, $a1, 983 + movgr2fr.d $fa3, $a1 .p2align 4, , 16 .LBB3_1: # %.preheader.i.i.i # =>This Inner Loop Header: Depth=1 - fmov.d $fa4, $fa3 - fdiv.d $fa3, $fa0, $fa3 - fadd.d $fa3, $fa4, $fa3 - fmul.d $fa3, $fa3, $fa2 - fsub.d $fa4, $fa3, $fa4 + fmov.d $fa4, $fa0 + fdiv.d $fa0, $fa1, $fa0 + fadd.d $fa0, $fa4, $fa0 + fmul.d $fa0, $fa0, $fa2 + fsub.d $fa4, $fa0, $fa4 fabs.d $fa4, $fa4 - fcmp.cule.d $fcc0, $fa4, $fa1 + fcmp.cule.d $fcc0, $fa4, $fa3 bcnez $fcc0, .LBB3_3 # %bb.2: # %.preheader.i.i.i # in Loop: Header=BB3_1 Depth=1 @@ -1659,19 +1637,24 @@ _GLOBAL__sub_I_sphereflake.cpp: # @_GLOBAL__sub_I_sphereflake.cpp addi.w $a0, $a0, 1 bnez $a1, .LBB3_1 .LBB3_3: # %__cxx_global_var_init.exit - frecip.d $fa0, $fa3 - pcalau12i $a0, %pc_hi20(.LCPI3_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_2) - pcalau12i $a0, %pc_hi20(.LCPI3_3) - fld.d $fa2, $a0, %pc_lo12(.LCPI3_3) - vldi $vr3, -800 - fmul.d $fa3, $fa0, $fa3 + frecip.d $fa0, $fa0 + vldi $vr1, -800 + lu12i.w $a0, -209716 + ori $a0, $a0, 3277 fmul.d $fa1, $fa0, $fa1 - fmul.d $fa0, $fa0, $fa2 + move $a1, $a0 + lu32i.d $a1, 314572 + lu52i.d $a1, $a1, -1026 + movgr2fr.d $fa2, $a1 + fmul.d $fa2, $fa0, $fa2 + lu32i.d $a0, -209716 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa3, $a0 + fmul.d $fa0, $fa0, $fa3 pcalau12i $a0, %pc_hi20(_ZL5light) addi.d $a0, $a0, %pc_lo12(_ZL5light) - fst.d $fa3, $a0, 0 - fst.d $fa1, $a0, 8 + fst.d $fa1, $a0, 0 + fst.d $fa2, $a0, 8 fst.d $fa0, $a0, 16 ret .Lfunc_end3: diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/dt.dir/dt.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/dt.dir/dt.s index 18c931e0..bda7c356 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/dt.dir/dt.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/dt.dir/dt.s @@ -1,12 +1,6 @@ .file "dt.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3ff000001ad7f29b # double 1.0000001000000001 -.LCPI0_1: - .dword 0x3ddb7cdfd9d7bdbb # double 1.0E-10 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -45,14 +39,19 @@ main: # @main ori $s3, $zero, 2048 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - ld.d $s1, $sp, 24 - ld.d $s2, $sp, 16 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_1) move $s4, $zero move $s5, $zero + ld.d $s1, $sp, 24 + ld.d $s2, $sp, 16 + lu12i.w $a0, 109951 + ori $a0, $a0, 667 + lu52i.d $a0, $a0, 1023 + movgr2fr.d $fs0, $a0 + lu12i.w $a0, -156293 + ori $a0, $a0, 3515 + lu32i.d $a0, -295713 + lu52i.d $a0, $a0, 989 + movgr2fr.d $fs1, $a0 .p2align 4, , 16 .LBB0_1: # =>This Inner Loop Header: Depth=1 movgr2fr.d $fa0, $s3 diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/fbench.dir/fbench.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/fbench.dir/fbench.s index 210f9ab2..a37be099 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/fbench.dir/fbench.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/fbench.dir/fbench.s @@ -13,12 +13,6 @@ .LCPI0_3: .dword 0x40b0f47a1cac0831 # double 4340.4769999999999 .dword 0x40af00fced916873 # double 3968.4940000000001 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_4: - .dword 0x408f400000000000 # double 1000 -.LCPI0_5: - .dword 0x3f184647e2eda082 # double 9.2600000000000001E-5 .text .globl main .p2align 5 @@ -94,11 +88,13 @@ main: # @main pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 ld.w $a0, $fp, %pc_lo12(niter) - pcalau12i $a1, %pc_hi20(.LCPI0_4) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_4) - movgr2fr.w $fa1, $a0 - ffint.d.w $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a0 + ffint.d.w $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.2) addi.d $a0, $a0, %pc_lo12(.L.str.2) @@ -130,8 +126,11 @@ main: # @main st.d $a0, $sp, 16 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(aberr_lchrom) st.d $a0, $sp, 8 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.d $fs1, $a0, %pc_lo12(.LCPI0_5) + lu12i.w $a0, -119078 + ori $a0, $a0, 130 + lu32i.d $a0, -506297 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fs1, $a0 pcalau12i $fp, %pc_hi20(max_lspher) pcalau12i $s6, %pc_hi20(max_osc) lu12i.w $a0, 293601 diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/ffbench.dir/ffbench.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/ffbench.dir/ffbench.s index 28990909..73d4b899 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/ffbench.dir/ffbench.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/ffbench.dir/ffbench.s @@ -4,14 +4,6 @@ .LCPI0_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0x4202a05f20000000 # double 1.0E+10 -.LCPI0_2: - .dword 0xc202a05f20000000 # double -1.0E+10 -.LCPI0_3: - .dword 0x406fe00000000000 # double 255 .text .globl main .p2align 5 @@ -176,11 +168,13 @@ main: # @main addi.w $s1, $s1, -1 bnez $s1, .LBB0_17 # %bb.18: # %.preheader95.preheader - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_1) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_2) addi.d $a0, $fp, 8 + lu12i.w $a1, 131072 + lu32i.d $a1, 172127 + lu52i.d $a2, $a1, 1056 + movgr2fr.d $fs0, $a2 + lu52i.d $a1, $a1, -992 + movgr2fr.d $fa0, $a1 lu12i.w $a1, 15 ori $a1, $a1, 4095 .p2align 4, , 16 @@ -195,9 +189,11 @@ main: # @main addi.d $a0, $a0, 16 bltu $s0, $a1, .LBB0_19 # %bb.20: - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_3) fsub.d $fa0, $fa0, $fs0 + ori $a0, $zero, 0 + lu32i.d $a0, -8192 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa1, $a0 fdiv.d $fs1, $fa1, $fa0 addi.d $s3, $fp, 8 lu12i.w $s6, 1 @@ -330,12 +326,7 @@ main: # @main .Lfunc_end0: .size main, .Lfunc_end0-main # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fourn -.LCPI1_0: - .dword 0x401921fb54442d1c # double 6.2831853071795898 - .text - .p2align 5 + .p2align 5 # -- Begin function fourn .type fourn,@function fourn: # @fourn # %bb.0: # %.preheader6 @@ -355,20 +346,23 @@ fourn: # @fourn fst.d $fs1, $sp, 24 # 8-byte Folded Spill fst.d $fs2, $sp, 16 # 8-byte Folded Spill fst.d $fs3, $sp, 8 # 8-byte Folded Spill - pcalau12i $s2, %pc_hi20(main.nsize.0) - ld.bu $a3, $s2, %pc_lo12(main.nsize.0) + pcalau12i $s3, %pc_hi20(main.nsize.0) + ld.bu $a3, $s3, %pc_lo12(main.nsize.0) pcalau12i $a2, %pc_hi20(main.nsize.1) ld.bu $a2, $a2, %pc_lo12(main.nsize.1) move $fp, $a0 slli.d $a0, $a3, 8 slli.d $s1, $a2, 8 - pcalau12i $a3, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI1_0) mul.d $a3, $a2, $a0 - slli.d $s3, $a3, 8 - movgr2fr.w $fa1, $a1 - ffint.d.w $fa1, $fa1 - fmul.d $fs2, $fa1, $fa0 + slli.d $s2, $a3, 8 + movgr2fr.w $fa0, $a1 + ffint.d.w $fa0, $fa0 + lu12i.w $a1, 345154 + ori $a1, $a1, 3356 + lu32i.d $a1, -450053 + lu52i.d $a1, $a1, 1025 + movgr2fr.d $fa1, $a1 + fmul.d $fs2, $fa0, $fa1 slli.d $s0, $s1, 4 beqz $a2, .LBB1_10 # %bb.1: # %.lr.ph19 @@ -435,7 +429,7 @@ fourn: # @fourn blt $a7, $a6, .LBB1_8 b .LBB1_2 .LBB1_10: - move $a1, $s3 + move $a1, $s2 b .LBB1_20 .LBB1_11: # %.lr.ph32.preheader addi.d $s6, $fp, 8 @@ -531,12 +525,12 @@ fourn: # @fourn bge $s5, $a6, .LBB1_18 b .LBB1_15 .LBB1_19: # %._crit_edge33.loopexit - ld.bu $a1, $s2, %pc_lo12(main.nsize.0) + ld.bu $a1, $s3, %pc_lo12(main.nsize.0) slli.d $a0, $a1, 8 mul.d $a1, $a1, $s1 slli.d $a1, $a1, 8 .LBB1_20: # %._crit_edge33 - div.wu $a1, $s3, $a1 + div.wu $a1, $s2, $a1 slli.d $s1, $s1, 1 mul.d $s2, $a0, $s1 mul.w $s3, $s2, $a1 diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-1.dir/flops-1.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-1.dir/flops-1.s index 482061d1..ba90492d 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-1.dir/flops-1.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-1.dir/flops-1.s @@ -1,14 +1,6 @@ .file "flops-1.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x41b2a05f20000000 # double 3.125E+8 -.LCPI0_1: - .dword 0xc039333333333333 # double -25.199999999999999 -.LCPI0_2: - .dword 0x39b4484bfeebc2a0 # double 1.0000000000000001E-30 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -65,8 +57,10 @@ main: # @main pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 fld.d $fa0, $s0, %pc_lo12(one) - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 131072 + lu32i.d $a0, 172127 + lu52i.d $a0, $a0, 1051 + movgr2fr.d $fa1, $a0 fdiv.d $fa1, $fa0, $fa1 pcalau12i $a0, %pc_hi20(D1) fld.d $fa2, $a0, %pc_lo12(D1) @@ -110,16 +104,22 @@ main: # @main pcalau12i $a0, %pc_hi20(sa) fst.d $fa1, $a0, %pc_lo12(sa) fdiv.d $fa0, $fa0, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_1) pcalau12i $a0, %pc_hi20(sb) fst.d $fa0, $a0, %pc_lo12(sb) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_2) + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, -445645 + lu52i.d $a0, $a0, -1021 + movgr2fr.d $fa1, $a0 fadd.d $fa0, $fa0, $fa1 pcalau12i $a0, %pc_hi20(sc) fst.d $fa0, $a0, %pc_lo12(sc) - fmul.d $fa0, $fa0, $fa2 + lu12i.w $a0, -4420 + ori $a0, $a0, 672 + lu32i.d $a0, 280651 + lu52i.d $a0, $a0, 923 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $a0, $a0, %pc_lo12(.L.str.4) diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-2.dir/flops-2.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-2.dir/flops-2.s index 8b806cbf..5f58d4be 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-2.dir/flops-2.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-2.dir/flops-2.s @@ -1,12 +1,6 @@ .file "flops-2.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x403f400000000000 # double 31.25 -.LCPI0_1: - .dword 0x39b4484bfeebc2a0 # double 1.0000000000000001E-30 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -65,59 +59,61 @@ main: # @main fld.d $fa0, $s3, %pc_lo12(five) fld.d $fa1, $s2, %pc_lo12(one) fneg.d $fa3, $fa0 - fneg.d $fa2, $fa1 + fneg.d $fa1, $fa1 lu12i.w $a0, 38146 ori $a1, $a0, 3984 .p2align 4, , 16 .LBB0_1: # =>This Inner Loop Header: Depth=1 - fsub.d $fa2, $fa2, $fa3 + fsub.d $fa1, $fa1, $fa3 addi.d $a1, $a1, -1 fneg.d $fa3, $fa3 bnez $a1, .LBB0_1 # %bb.2: fld.d $fa4, $s1, %pc_lo12(two) - movgr2fr.d $fa1, $zero + movgr2fr.d $fa2, $zero ori $a0, $a0, 3984 - fmov.d $fa5, $fa1 + fmov.d $fa5, $fa2 .p2align 4, , 16 .LBB0_3: # =>This Inner Loop Header: Depth=1 - fadd.d $fa2, $fa2, $fa4 - fmadd.d $fa1, $fa3, $fa2, $fa1 - fdiv.d $fa6, $fa3, $fa2 + fadd.d $fa1, $fa1, $fa4 + fmadd.d $fa2, $fa3, $fa1, $fa2 + fdiv.d $fa6, $fa3, $fa1 fneg.d $fa3, $fa3 addi.d $a0, $a0, -1 fsub.d $fa5, $fa5, $fa6 bnez $a0, .LBB0_3 # %bb.4: - fld.d $fa2, $s0, %pc_lo12(four) - fmul.d $fa2, $fa5, $fa2 - fdiv.d $fa2, $fa2, $fa0 + fld.d $fa1, $s0, %pc_lo12(four) + fmul.d $fa1, $fa5, $fa1 + fdiv.d $fa1, $fa1, $fa0 pcalau12i $a0, %pc_hi20(sa) - fst.d $fa2, $a0, %pc_lo12(sa) - fdiv.d $fa0, $fa0, $fa1 - fadd.d $fa0, $fa0, $fa2 + fst.d $fa1, $a0, %pc_lo12(sa) + fdiv.d $fa0, $fa0, $fa2 + fadd.d $fa0, $fa0, $fa1 pcalau12i $a0, %pc_hi20(sb) fst.d $fa0, $a0, %pc_lo12(sb) pcalau12i $a0, %pc_hi20(sc) - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a1, %pc_lo12(.LCPI0_0) ori $a1, $zero, 0 lu32i.d $a1, -49152 lu52i.d $a1, $a1, 1027 st.d $a1, $a0, %pc_lo12(sc) - fmul.d $fa3, $fa1, $fa1 - fmul.d $fa1, $fa1, $fa3 + fmul.d $fa1, $fa2, $fa2 + fmul.d $fa1, $fa2, $fa1 + movgr2fr.d $fa2, $a1 fdiv.d $fa1, $fa2, $fa1 + fld.d $fa2, $fp, %pc_lo12(piref) fsub.d $fa0, $fa0, $fa1 pcalau12i $a0, %pc_hi20(piprg) - fld.d $fa1, $fp, %pc_lo12(piref) fst.d $fa0, $a0, %pc_lo12(piprg) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) - fsub.d $fa0, $fa0, $fa1 + fsub.d $fa0, $fa0, $fa2 pcalau12i $a0, %pc_hi20(pierr) fst.d $fa0, $a0, %pc_lo12(pierr) - fmul.d $fa0, $fa0, $fa2 + lu12i.w $a0, -4420 + ori $a0, $a0, 672 + lu32i.d $a0, 280651 + lu52i.d $a0, $a0, 923 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $a0, $a0, %pc_lo12(.L.str.4) diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-3.dir/flops-3.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-3.dir/flops-3.s index 69f21716..4c8fbe98 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-3.dir/flops-3.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-3.dir/flops-3.s @@ -1,12 +1,6 @@ .file "flops-3.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x41b2a05f20000000 # double 3.125E+8 -.LCPI0_1: - .dword 0x39b4484bfeebc2a0 # double 1.0000000000000001E-30 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -61,12 +55,14 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.Lstr.2) pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 - fld.d $fa2, $s2, %pc_lo12(three) - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) - fld.d $fa3, $s0, %pc_lo12(piref) - fmul.d $fa0, $fa2, $fa0 - fdiv.d $fa0, $fa3, $fa0 + fld.d $fa2, $s0, %pc_lo12(piref) + fld.d $fa3, $s2, %pc_lo12(three) + lu12i.w $a0, 131072 + lu32i.d $a0, 172127 + lu52i.d $a0, $a0, 1051 + movgr2fr.d $fa0, $a0 + fmul.d $fa0, $fa3, $fa0 + fdiv.d $fa0, $fa2, $fa0 fld.d $fa1, $s1, %pc_lo12(one) pcalau12i $a0, %pc_hi20(A6) pcalau12i $a1, %pc_hi20(A5) @@ -101,7 +97,7 @@ main: # @main fmadd.d $fa5, $ft4, $ft5, $fa5 bnez $a0, .LBB0_1 # %bb.2: - fdiv.d $fa2, $fa3, $fa2 + fdiv.d $fa2, $fa2, $fa3 fmul.d $fa3, $fa2, $fa2 fmadd.d $fa4, $fa4, $fa3, $fa7 fmadd.d $fa4, $fa4, $fa3, $ft0 @@ -120,12 +116,15 @@ main: # @main lu52i.d $a1, $zero, 1022 st.d $a1, $a0, %pc_lo12(sb) vldi $vr1, -800 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) fadd.d $fa0, $fa0, $fa1 pcalau12i $a0, %pc_hi20(sc) fst.d $fa0, $a0, %pc_lo12(sc) - fmul.d $fa0, $fa0, $fa2 + lu12i.w $a0, -4420 + ori $a0, $a0, 672 + lu32i.d $a0, 280651 + lu52i.d $a0, $a0, 923 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $a0, $a0, %pc_lo12(.L.str.4) diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-4.dir/flops-4.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-4.dir/flops-4.s index 59b76003..48f58ba8 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-4.dir/flops-4.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-4.dir/flops-4.s @@ -1,12 +1,6 @@ .file "flops-4.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x41a2a05f20000000 # double 1.5625E+8 -.LCPI0_1: - .dword 0x39b4484bfeebc2a0 # double 1.0000000000000001E-30 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -63,19 +57,21 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a1, %pc_hi20(A3) fld.d $fa0, $a1, %pc_lo12(A3) + pcalau12i $a2, %pc_hi20(A5) + fld.d $fa1, $a2, %pc_lo12(A5) move $a0, $zero fneg.d $fa0, $fa0 fst.d $fa0, $a1, %pc_lo12(A3) - pcalau12i $a1, %pc_hi20(A5) - fld.d $fa1, $a1, %pc_lo12(A5) - fld.d $fa3, $s2, %pc_lo12(three) - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a2, %pc_lo12(.LCPI0_0) - fld.d $fa4, $s1, %pc_lo12(piref) fneg.d $fa1, $fa1 - fst.d $fa1, $a1, %pc_lo12(A5) - fmul.d $fa2, $fa3, $fa2 - fdiv.d $fa2, $fa4, $fa2 + fst.d $fa1, $a2, %pc_lo12(A5) + fld.d $fa3, $s1, %pc_lo12(piref) + fld.d $fa4, $s2, %pc_lo12(three) + lu12i.w $a1, 131072 + lu32i.d $a1, 172127 + lu52i.d $a1, $a1, 1050 + movgr2fr.d $fa2, $a1 + fmul.d $fa2, $fa4, $fa2 + fdiv.d $fa2, $fa3, $fa2 pcalau12i $a1, %pc_hi20(B6) fld.d $fa5, $a1, %pc_lo12(B6) pcalau12i $a1, %pc_hi20(B5) @@ -108,7 +104,7 @@ main: # @main fadd.d $ft1, $fa6, $ft1 bne $a0, $a1, .LBB0_1 # %bb.2: - fdiv.d $fa3, $fa4, $fa3 + fdiv.d $fa3, $fa3, $fa4 fmul.d $fa4, $fa3, $fa3 fmadd.d $fa5, $fa5, $fa4, $fa7 fmadd.d $fa5, $fa4, $fa5, $ft2 @@ -142,11 +138,14 @@ main: # @main fmul.d $fa0, $fa3, $fa0 pcalau12i $a0, %pc_hi20(sb) fst.d $fa0, $a0, %pc_lo12(sb) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_1) fsub.d $fa0, $fa2, $fa0 pcalau12i $a0, %pc_hi20(sc) fst.d $fa0, $a0, %pc_lo12(sc) + lu12i.w $a0, -4420 + ori $a0, $a0, 672 + lu32i.d $a0, 280651 + lu52i.d $a0, $a0, 923 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.4) diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-5.dir/flops-5.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-5.dir/flops-5.s index d1d4f0eb..f87d25cf 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-5.dir/flops-5.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-5.dir/flops-5.s @@ -1,14 +1,6 @@ .file "flops-5.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x41a2a05f20000000 # double 1.5625E+8 -.LCPI0_1: - .dword 0xbfe62e42fefa39ef # double -0.69314718055994529 -.LCPI0_2: - .dword 0x39b4484bfeebc2a0 # double 1.0000000000000001E-30 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -63,13 +55,15 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.Lstr.2) pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 - fld.d $fa0, $s2, %pc_lo12(three) - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_0) - fld.d $fa1, $s1, %pc_lo12(piref) move $a0, $zero - fmul.d $fa2, $fa0, $fa2 - fdiv.d $fa2, $fa1, $fa2 + fld.d $fa1, $s1, %pc_lo12(piref) + fld.d $fa2, $s2, %pc_lo12(three) + lu12i.w $a1, 131072 + lu32i.d $a1, 172127 + lu52i.d $a1, $a1, 1050 + movgr2fr.d $fa0, $a1 + fmul.d $fa0, $fa2, $fa0 + fdiv.d $fa0, $fa1, $fa0 pcalau12i $a1, %pc_hi20(A6) fld.d $fa4, $a1, %pc_lo12(A6) pcalau12i $a1, %pc_hi20(A5) @@ -103,7 +97,7 @@ main: # @main addi.d $a0, $a0, 1 movgr2fr.d $ft9, $a0 ffint.d.l $ft9, $ft9 - fmul.d $ft9, $fa2, $ft9 + fmul.d $ft9, $fa0, $ft9 fmul.d $ft10, $ft9, $ft9 fmadd.d $ft11, $fa4, $ft10, $fa5 fmadd.d $ft11, $ft11, $ft10, $fa6 @@ -122,42 +116,45 @@ main: # @main fadd.d $ft3, $ft3, $ft9 bne $a0, $a1, .LBB0_1 # %bb.2: - fdiv.d $fa0, $fa1, $fa0 - fmul.d $fa1, $fa0, $fa0 - fmadd.d $fa4, $fa4, $fa1, $fa5 - fmadd.d $fa4, $fa4, $fa1, $fa6 - fmadd.d $fa4, $fa4, $fa1, $fa7 - fmadd.d $fa4, $fa4, $fa1, $ft0 - fmadd.d $fa4, $fa4, $fa1, $ft1 - fmadd.d $fa4, $fa4, $fa1, $fa3 - fmul.d $fa0, $fa0, $fa4 - fmadd.d $fa4, $ft2, $fa1, $ft4 - fmadd.d $fa4, $fa1, $fa4, $ft6 - fmadd.d $fa4, $fa1, $fa4, $ft7 - fmadd.d $fa4, $fa1, $fa4, $ft8 + fdiv.d $fa1, $fa1, $fa2 + fmul.d $fa2, $fa1, $fa1 + fmadd.d $fa4, $fa4, $fa2, $fa5 + fmadd.d $fa4, $fa4, $fa2, $fa6 + fmadd.d $fa4, $fa4, $fa2, $fa7 + fmadd.d $fa4, $fa4, $fa2, $ft0 + fmadd.d $fa4, $fa4, $fa2, $ft1 + fmadd.d $fa4, $fa4, $fa2, $fa3 + fmul.d $fa1, $fa1, $fa4 + fmadd.d $fa4, $ft2, $fa2, $ft4 + fmadd.d $fa4, $fa2, $fa4, $ft6 + fmadd.d $fa4, $fa2, $fa4, $ft7 + fmadd.d $fa4, $fa2, $fa4, $ft8 fld.d $fa5, $fp, %pc_lo12(two) - fmadd.d $fa4, $fa1, $fa4, $ft5 - fmadd.d $fa1, $fa1, $fa4, $fa3 - fdiv.d $fa0, $fa0, $fa1 - fmadd.d $fa0, $fa5, $ft3, $fa0 - fmul.d $fa0, $fa2, $fa0 + fmadd.d $fa4, $fa2, $fa4, $ft5 + fmadd.d $fa2, $fa2, $fa4, $fa3 + fdiv.d $fa1, $fa1, $fa2 + fmadd.d $fa1, $fa5, $ft3, $fa1 + fmul.d $fa0, $fa0, $fa1 fdiv.d $fa0, $fa0, $fa5 pcalau12i $a0, %pc_hi20(sa) fst.d $fa0, $a0, %pc_lo12(sa) pcalau12i $a0, %pc_hi20(sb) - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_1) lu12i.w $a1, -4189 ori $a1, $a1, 2543 lu32i.d $a1, 405058 - lu52i.d $a1, $a1, 1022 - st.d $a1, $a0, %pc_lo12(sb) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_2) + lu52i.d $a2, $a1, 1022 + st.d $a2, $a0, %pc_lo12(sb) + lu52i.d $a0, $a1, -1026 + movgr2fr.d $fa1, $a0 fadd.d $fa0, $fa0, $fa1 pcalau12i $a0, %pc_hi20(sc) fst.d $fa0, $a0, %pc_lo12(sc) - fmul.d $fa0, $fa0, $fa2 + lu12i.w $a0, -4420 + ori $a0, $a0, 672 + lu32i.d $a0, 280651 + lu52i.d $a0, $a0, 923 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $a0, $a0, %pc_lo12(.L.str.4) diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-6.dir/flops-6.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-6.dir/flops-6.s index e2de1ed1..fc72fe91 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-6.dir/flops-6.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-6.dir/flops-6.s @@ -1,12 +1,6 @@ .file "flops-6.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x41a2a05f20000000 # double 1.5625E+8 -.LCPI0_1: - .dword 0x39b4484bfeebc2a0 # double 1.0000000000000001E-30 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -61,13 +55,15 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.Lstr.2) pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 - fld.d $fa0, $s2, %pc_lo12(four) - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_0) - fld.d $fa1, $s1, %pc_lo12(piref) move $a0, $zero - fmul.d $fa2, $fa0, $fa2 - fdiv.d $fa2, $fa1, $fa2 + fld.d $fa1, $s1, %pc_lo12(piref) + fld.d $fa2, $s2, %pc_lo12(four) + lu12i.w $a1, 131072 + lu32i.d $a1, 172127 + lu52i.d $a1, $a1, 1050 + movgr2fr.d $fa0, $a1 + fmul.d $fa0, $fa2, $fa0 + fdiv.d $fa0, $fa1, $fa0 pcalau12i $a1, %pc_hi20(A6) fld.d $fa4, $a1, %pc_lo12(A6) pcalau12i $a1, %pc_hi20(A5) @@ -101,7 +97,7 @@ main: # @main addi.d $a0, $a0, 1 movgr2fr.d $ft9, $a0 ffint.d.l $ft9, $ft9 - fmul.d $ft9, $fa2, $ft9 + fmul.d $ft9, $fa0, $ft9 fmul.d $ft10, $ft9, $ft9 fmadd.d $ft11, $fa4, $ft10, $fa5 fmadd.d $ft11, $ft11, $ft10, $fa6 @@ -119,25 +115,25 @@ main: # @main fmadd.d $ft3, $ft9, $ft10, $ft3 bne $a0, $a1, .LBB0_1 # %bb.2: - fdiv.d $fa0, $fa1, $fa0 - fmul.d $fa1, $fa0, $fa0 - fmadd.d $fa4, $fa4, $fa1, $fa5 - fmadd.d $fa4, $fa4, $fa1, $fa6 - fmadd.d $fa4, $fa4, $fa1, $fa7 - fmadd.d $fa4, $fa4, $fa1, $ft0 - fmadd.d $fa4, $fa4, $fa1, $ft1 - fmadd.d $fa4, $fa4, $fa1, $fa3 - fmul.d $fa0, $fa0, $fa4 - fmadd.d $fa4, $ft2, $fa1, $ft4 - fmadd.d $fa4, $fa1, $fa4, $ft6 - fmadd.d $fa4, $fa1, $fa4, $ft7 - fmadd.d $fa4, $fa1, $fa4, $ft8 + fdiv.d $fa1, $fa1, $fa2 + fmul.d $fa2, $fa1, $fa1 + fmadd.d $fa4, $fa4, $fa2, $fa5 + fmadd.d $fa4, $fa4, $fa2, $fa6 + fmadd.d $fa4, $fa4, $fa2, $fa7 + fmadd.d $fa4, $fa4, $fa2, $ft0 + fmadd.d $fa4, $fa4, $fa2, $ft1 + fmadd.d $fa4, $fa4, $fa2, $fa3 + fmul.d $fa1, $fa1, $fa4 + fmadd.d $fa4, $ft2, $fa2, $ft4 + fmadd.d $fa4, $fa2, $fa4, $ft6 + fmadd.d $fa4, $fa2, $fa4, $ft7 + fmadd.d $fa4, $fa2, $fa4, $ft8 fld.d $fa5, $fp, %pc_lo12(two) - fmadd.d $fa4, $fa1, $fa4, $ft5 - fmadd.d $fa1, $fa1, $fa4, $fa3 + fmadd.d $fa4, $fa2, $fa4, $ft5 + fmadd.d $fa2, $fa2, $fa4, $fa3 + fmul.d $fa1, $fa1, $fa2 + fmadd.d $fa1, $fa5, $ft3, $fa1 fmul.d $fa0, $fa0, $fa1 - fmadd.d $fa0, $fa5, $ft3, $fa0 - fmul.d $fa0, $fa2, $fa0 fdiv.d $fa0, $fa0, $fa5 pcalau12i $a0, %pc_hi20(sa) fst.d $fa0, $a0, %pc_lo12(sa) @@ -145,12 +141,15 @@ main: # @main lu52i.d $a1, $zero, 1021 st.d $a1, $a0, %pc_lo12(sb) vldi $vr1, -816 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) fadd.d $fa0, $fa0, $fa1 pcalau12i $a0, %pc_hi20(sc) fst.d $fa0, $a0, %pc_lo12(sc) - fmul.d $fa0, $fa0, $fa2 + lu12i.w $a0, -4420 + ori $a0, $a0, 672 + lu32i.d $a0, 280651 + lu52i.d $a0, $a0, 923 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $a0, $a0, %pc_lo12(.L.str.4) diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-7.dir/flops-7.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-7.dir/flops-7.s index 1cf92f3e..8bee814c 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-7.dir/flops-7.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-7.dir/flops-7.s @@ -1,22 +1,6 @@ .file "flops-7.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3ea5f9c4ff20f589 # double 6.5492576895697597E-7 -.LCPI0_1: - .dword 0x40599541f7f192a4 # double 102.3321513995275 -.LCPI0_2: - .dword 0x40c473ef42466d8c # double 10471.869210055818 -.LCPI0_3: - .dword 0x413059f8e7cae1c6 # double 1071608.9054394825 -.LCPI0_4: - .dword 0x3ee8b8fd9f05143a # double 1.1788663841225567E-5 -.LCPI0_5: - .dword 0x407f433333333333 # double 500.19999999999999 -.LCPI0_6: - .dword 0x39b4484bfeebc2a0 # double 1.0000000000000001E-30 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -71,9 +55,12 @@ main: # @main jirl $ra, $ra, 0 move $a0, $zero fld.d $fa0, $s0, %pc_lo12(one) - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a1, %pc_lo12(.LCPI0_0) movgr2fr.d $fa1, $zero + lu12i.w $a1, -3569 + ori $a1, $a1, 1417 + lu32i.d $a1, 391620 + lu52i.d $a1, $a1, 1002 + movgr2fr.d $fa2, $a1 lu12i.w $a1, 38146 ori $a1, $a1, 3983 .p2align 4, , 16 @@ -94,37 +81,55 @@ main: # @main fsub.d $fa1, $fa1, $fa3 bne $a0, $a1, .LBB0_1 # %bb.2: - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) - fneg.d $fa3, $fa0 - fadd.d $fa4, $fa0, $fa2 + fneg.d $fa2, $fa0 + lu12i.w $a0, -32999 + ori $a0, $a0, 676 + lu32i.d $a0, -420543 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa3, $a0 + fadd.d $fa4, $fa0, $fa3 fdiv.d $fa4, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_2) - fsub.d $fa3, $fa3, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_3) - fadd.d $fa6, $fa0, $fa5 - fdiv.d $fa2, $fa2, $fa6 - fsub.d $fa2, $fa3, $fa2 - fadd.d $fa0, $fa0, $fa4 - fld.d $fa3, $fp, %pc_lo12(two) - pcalau12i $a0, %pc_hi20(.LCPI0_4) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_4) - fdiv.d $fa0, $fa5, $fa0 + fsub.d $fa2, $fa2, $fa4 + lu12i.w $a0, 271462 + ori $a0, $a0, 3468 + lu32i.d $a0, 291823 + lu52i.d $a0, $a0, 1036 + movgr2fr.d $fa4, $a0 + fadd.d $fa5, $fa0, $fa4 + fdiv.d $fa3, $fa3, $fa5 + fsub.d $fa2, $fa2, $fa3 + lu12i.w $a0, -99154 + ori $a0, $a0, 454 + lu32i.d $a0, 23032 + lu52i.d $a0, $a0, 1043 + movgr2fr.d $fa3, $a0 + fld.d $fa5, $fp, %pc_lo12(two) + fadd.d $fa0, $fa0, $fa3 + fdiv.d $fa0, $fa4, $fa0 fsub.d $fa0, $fa2, $fa0 - fmadd.d $fa0, $fa3, $fa1, $fa0 - fmul.d $fa0, $fa0, $fa4 + fmadd.d $fa0, $fa5, $fa1, $fa0 + lu12i.w $a0, -397231 + ori $a0, $a0, 1082 + lu32i.d $a0, -476931 + lu52i.d $a0, $a0, 1006 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 pcalau12i $a0, %pc_hi20(sa) - pcalau12i $a1, %pc_hi20(.LCPI0_5) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_5) fst.d $fa0, $a0, %pc_lo12(sa) - pcalau12i $a0, %pc_hi20(.LCPI0_6) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_6) + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, -48333 + lu52i.d $a0, $a0, 1031 + movgr2fr.d $fa1, $a0 fadd.d $fa0, $fa0, $fa1 pcalau12i $a0, %pc_hi20(sc) fst.d $fa0, $a0, %pc_lo12(sc) - fmul.d $fa0, $fa0, $fa2 + lu12i.w $a0, -4420 + ori $a0, $a0, 672 + lu32i.d $a0, 280651 + lu52i.d $a0, $a0, 923 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $a0, $a0, %pc_lo12(.L.str.4) diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-8.dir/flops-8.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-8.dir/flops-8.s index f701ce70..2bf32dc9 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-8.dir/flops-8.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops-8.dir/flops-8.s @@ -1,14 +1,6 @@ .file "flops-8.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x41a2a05f20000000 # double 1.5625E+8 -.LCPI0_1: - .dword 0xbfd2aaaaaaaaaaab # double -0.29166666666666669 -.LCPI0_2: - .dword 0x39b4484bfeebc2a0 # double 1.0000000000000001E-30 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -63,15 +55,17 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.Lstr.2) pcaddu18i $ra, %call36(puts) jirl $ra, $ra, 0 - fld.d $fa0, $s2, %pc_lo12(three) - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_0) - fld.d $fa1, $s1, %pc_lo12(piref) move $a0, $zero - fmul.d $fa2, $fa0, $fa2 - fdiv.d $fa2, $fa1, $fa2 + fld.d $fa1, $s1, %pc_lo12(piref) + fld.d $fa3, $s2, %pc_lo12(three) + lu12i.w $a1, 131072 + lu32i.d $a1, 172127 + lu52i.d $a1, $a1, 1050 + movgr2fr.d $fa0, $a1 + fmul.d $fa0, $fa3, $fa0 + fdiv.d $fa0, $fa1, $fa0 pcalau12i $a1, %pc_hi20(B6) - fld.d $fa3, $a1, %pc_lo12(B6) + fld.d $fa2, $a1, %pc_lo12(B6) pcalau12i $a1, %pc_hi20(B5) fld.d $fa4, $a1, %pc_lo12(B5) pcalau12i $a1, %pc_hi20(B4) @@ -103,9 +97,9 @@ main: # @main addi.d $a0, $a0, 1 movgr2fr.d $ft9, $a0 ffint.d.l $ft9, $ft9 - fmul.d $ft9, $fa2, $ft9 + fmul.d $ft9, $fa0, $ft9 fmul.d $ft10, $ft9, $ft9 - fmadd.d $ft11, $fa3, $ft10, $fa4 + fmadd.d $ft11, $fa2, $ft10, $fa4 fmadd.d $ft11, $ft10, $ft11, $fa6 fmadd.d $ft11, $ft10, $ft11, $fa7 fmadd.d $ft11, $ft10, $ft11, $ft0 @@ -122,43 +116,46 @@ main: # @main fmadd.d $ft2, $ft9, $ft10, $ft2 bne $a0, $a1, .LBB0_1 # %bb.2: - fdiv.d $fa0, $fa1, $fa0 - fmul.d $fa1, $fa0, $fa0 - fmadd.d $ft3, $ft3, $fa1, $ft4 - fmadd.d $ft3, $ft3, $fa1, $ft5 - fmadd.d $ft3, $ft3, $fa1, $ft6 - fmadd.d $ft3, $ft3, $fa1, $ft7 - fmadd.d $ft3, $ft3, $fa1, $ft8 - fmadd.d $ft3, $ft3, $fa1, $fa5 - fmul.d $fa0, $fa0, $ft3 - fmadd.d $fa3, $fa3, $fa1, $fa4 - fmadd.d $fa3, $fa1, $fa3, $fa6 - fmadd.d $fa3, $fa1, $fa3, $fa7 - fmadd.d $fa3, $fa1, $fa3, $ft0 - fmadd.d $fa3, $fa1, $fa3, $ft1 + fdiv.d $fa1, $fa1, $fa3 + fmul.d $fa3, $fa1, $fa1 + fmadd.d $ft3, $ft3, $fa3, $ft4 + fmadd.d $ft3, $ft3, $fa3, $ft5 + fmadd.d $ft3, $ft3, $fa3, $ft6 + fmadd.d $ft3, $ft3, $fa3, $ft7 + fmadd.d $ft3, $ft3, $fa3, $ft8 + fmadd.d $ft3, $ft3, $fa3, $fa5 + fmul.d $fa1, $fa1, $ft3 + fmadd.d $fa2, $fa2, $fa3, $fa4 + fmadd.d $fa2, $fa3, $fa2, $fa6 + fmadd.d $fa2, $fa3, $fa2, $fa7 + fmadd.d $fa2, $fa3, $fa2, $ft0 + fmadd.d $fa2, $fa3, $fa2, $ft1 fld.d $fa4, $fp, %pc_lo12(two) - fmadd.d $fa1, $fa1, $fa3, $fa5 + fmadd.d $fa2, $fa3, $fa2, $fa5 + fmul.d $fa1, $fa1, $fa2 + fmul.d $fa1, $fa2, $fa1 + fmadd.d $fa1, $fa4, $ft2, $fa1 fmul.d $fa0, $fa0, $fa1 - fmul.d $fa0, $fa1, $fa0 - fmadd.d $fa0, $fa4, $ft2, $fa0 - fmul.d $fa0, $fa2, $fa0 fdiv.d $fa0, $fa0, $fa4 pcalau12i $a0, %pc_hi20(sa) fst.d $fa0, $a0, %pc_lo12(sa) pcalau12i $a0, %pc_hi20(sb) - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_1) lu12i.w $a1, -349526 ori $a1, $a1, 2731 lu32i.d $a1, 174762 - lu52i.d $a1, $a1, 1021 - st.d $a1, $a0, %pc_lo12(sb) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_2) + lu52i.d $a2, $a1, 1021 + st.d $a2, $a0, %pc_lo12(sb) + lu52i.d $a0, $a1, -1027 + movgr2fr.d $fa1, $a0 fadd.d $fa0, $fa0, $fa1 pcalau12i $a0, %pc_hi20(sc) fst.d $fa0, $a0, %pc_lo12(sc) - fmul.d $fa0, $fa0, $fa2 + lu12i.w $a0, -4420 + ori $a0, $a0, 672 + lu32i.d $a0, 280651 + lu52i.d $a0, $a0, 923 + movgr2fr.d $fa1, $a0 + fmul.d $fa0, $fa0, $fa1 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.4) addi.d $a0, $a0, %pc_lo12(.L.str.4) diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops.dir/flops.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops.dir/flops.s index e4a236e4..6902c26c 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops.dir/flops.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/flops.dir/flops.s @@ -1,32 +1,6 @@ .file "flops.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI0_1: - .dword 0xc039333333333333 # double -25.199999999999999 -.LCPI0_2: - .dword 0x39b4484bfeebc2a0 # double 1.0000000000000001E-30 -.LCPI0_3: - .dword 0xc03f400000000000 # double -31.25 -.LCPI0_4: - .dword 0xbfe62e42fefa39ef # double -0.69314718055994529 -.LCPI0_5: - .dword 0x40599541f7f192a4 # double 102.3321513995275 -.LCPI0_6: - .dword 0x407f433333333333 # double 500.19999999999999 -.LCPI0_7: - .dword 0xbfd2aaaaaaaaaaab # double -0.29166666666666669 -.LCPI0_8: - .dword 0x404a000000000000 # double 52 -.LCPI0_9: - .dword 0x4063000000000000 # double 152 -.LCPI0_10: - .dword 0x4062400000000000 # double 146 -.LCPI0_11: - .dword 0x4056c00000000000 # double 91 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -117,13 +91,16 @@ main: # @main pcaddu18i $ra, %call36(gettimeofday) jirl $ra, $ra, 0 ld.d $a0, $fp, 0 + ld.d $a1, $fp, 8 movgr2fr.d $fa0, $a0 - ld.d $a0, $fp, 8 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fs1, $a1, %pc_lo12(.LCPI0_0) ffint.d.l $fa0, $fa0 - movgr2fr.d $fa1, $a0 + movgr2fr.d $fa1, $a1 ffint.d.l $fa1, $fa1 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fs1, $a0 fmadd.d $fs2, $fa1, $fs1, $fa0 fst.d $fs2, $s4, 16 fsub.d $fa0, $fs2, $fs0 @@ -321,23 +298,30 @@ main: # @main movfr2gr.d $a0, $fa3 lu12i.w $a1, 9 ori $a1, $a1, 3136 + fld.d $fa3, $s0, %pc_lo12(scale) mul.d $a0, $a0, $a1 - movgr2fr.d $fa3, $a0 - fld.d $fa5, $s0, %pc_lo12(scale) - ffint.d.l $fa3, $fa3 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa6, $a0, %pc_lo12(.LCPI0_1) - fdiv.d $fa3, $fa3, $fa5 + movgr2fr.d $fa5, $a0 + ffint.d.l $fa5, $fa5 + fdiv.d $fa3, $fa5, $fa3 ftintrz.l.d $fa3, $fa3 + lu12i.w $a0, 209715 + ori $a0, $a0, 819 movfr2gr.d $s0, $fa3 - fadd.d $fa1, $fa1, $fa6 - pcalau12i $a1, %pc_hi20(sc) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fs5, $a0, %pc_lo12(.LCPI0_2) - st.d $a1, $sp, 160 # 8-byte Folded Spill - fst.d $fa1, $a1, %pc_lo12(sc) + st.d $a0, $sp, 8 # 8-byte Folded Spill + lu32i.d $a0, -445645 + lu52i.d $a0, $a0, -1021 + movgr2fr.d $fa3, $a0 + fadd.d $fa1, $fa1, $fa3 + pcalau12i $a0, %pc_hi20(sc) + st.d $a0, $sp, 160 # 8-byte Folded Spill + fst.d $fa1, $a0, %pc_lo12(sc) fdiv.d $fa2, $fa4, $fa2 fst.d $fa2, $s1, 32 + lu12i.w $a0, -4420 + ori $a0, $a0, 672 + lu32i.d $a0, 280651 + lu52i.d $a0, $a0, 923 + movgr2fr.d $fs5, $a0 fmul.d $fa1, $fa1, $fs5 fmul.d $fa0, $fa0, $fs5 fmul.d $fa2, $fa2, $fs5 @@ -494,13 +478,13 @@ main: # @main fst.d $fa1, $a0, %pc_lo12(sb) ori $a0, $zero, 0 lu32i.d $a0, -49152 - lu52i.d $a0, $a0, 1027 - pcalau12i $a1, %pc_hi20(.LCPI0_3) - fld.d $fa3, $a1, %pc_lo12(.LCPI0_3) - st.d $a0, $a2, %pc_lo12(sc) - fmul.d $fa4, $fs4, $fs4 - fmul.d $fa4, $fs4, $fa4 - fdiv.d $fa3, $fa3, $fa4 + lu52i.d $a1, $a0, 1027 + st.d $a1, $a2, %pc_lo12(sc) + fmul.d $fa3, $fs4, $fs4 + fmul.d $fa3, $fs4, $fa3 + lu52i.d $a0, $a0, -1021 + movgr2fr.d $fa4, $a0 + fdiv.d $fa3, $fa4, $fa3 fadd.d $fa1, $fa1, $fa3 fld.d $fa3, $s3, %pc_lo12(piref) pcalau12i $a0, %pc_hi20(piprg) @@ -548,7 +532,7 @@ main: # @main ori $a1, $zero, 2 pcalau12i $a0, %pc_hi20(A6) st.d $a0, $sp, 120 # 8-byte Folded Spill - pcalau12i $s5, %pc_hi20(A5) + pcalau12i $s7, %pc_hi20(A5) pcalau12i $a0, %pc_hi20(A4) st.d $a0, $sp, 128 # 8-byte Folded Spill pcalau12i $s2, %pc_hi20(A3) @@ -559,7 +543,7 @@ main: # @main fmov.d $fs5, $fs0 blt $s0, $a1, .LBB0_19 # %bb.17: # %.lr.ph349 - fld.d $fa2, $s5, %pc_lo12(A5) + fld.d $fa2, $s7, %pc_lo12(A5) ld.d $a0, $sp, 176 # 8-byte Folded Reload fld.d $fa0, $a0, %pc_lo12(one) ld.d $a0, $sp, 120 # 8-byte Folded Reload @@ -609,12 +593,12 @@ main: # @main fld.d $fa2, $a0, %pc_lo12(nulltime) fst.d $fa0, $s4, 8 fld.d $fa3, $s3, %pc_lo12(piref) - ld.d $s6, $sp, 112 # 8-byte Folded Reload - fld.d $fa4, $s6, %pc_lo12(three) + ld.d $s5, $sp, 112 # 8-byte Folded Reload + fld.d $fa4, $s5, %pc_lo12(three) fmsub.d $fa0, $fa1, $fa0, $fa2 ld.d $a0, $sp, 120 # 8-byte Folded Reload fld.d $fa1, $a0, %pc_lo12(A6) - fld.d $fa2, $s5, %pc_lo12(A5) + fld.d $fa2, $s7, %pc_lo12(A5) fdiv.d $fa3, $fa3, $fa4 ld.d $a0, $sp, 128 # 8-byte Folded Reload fld.d $fa4, $a0, %pc_lo12(A4) @@ -663,14 +647,14 @@ main: # @main pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 fld.d $fa0, $s2, %pc_lo12(A3) - fld.d $fa1, $s5, %pc_lo12(A5) + fld.d $fa1, $s7, %pc_lo12(A5) fneg.d $fa0, $fa0 fst.d $fa0, $s2, %pc_lo12(A3) fneg.d $fa0, $fa1 - fld.d $fa1, $s6, %pc_lo12(three) + fld.d $fa1, $s5, %pc_lo12(three) fld.d $fa2, $s3, %pc_lo12(piref) fld.d $fs2, $s4, 16 - fst.d $fa0, $s5, %pc_lo12(A5) + fst.d $fa0, $s7, %pc_lo12(A5) fmul.d $fa0, $fa1, $fs3 fdiv.d $fs4, $fa2, $fa0 move $a0, $fp @@ -688,7 +672,7 @@ main: # @main fsub.d $fa0, $fs6, $fs2 fst.d $fa0, $s4, 8 pcalau12i $s6, %pc_hi20(B6) - pcalau12i $s7, %pc_hi20(B5) + pcalau12i $s5, %pc_hi20(B5) pcalau12i $a0, %pc_hi20(B4) st.d $a0, $sp, 64 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(B3) @@ -702,7 +686,7 @@ main: # @main blt $s0, $a0, .LBB0_22 # %bb.20: # %.lr.ph356 fld.d $fa0, $s6, %pc_lo12(B6) - fld.d $fa1, $s7, %pc_lo12(B5) + fld.d $fa1, $s5, %pc_lo12(B5) ld.d $a0, $sp, 64 # 8-byte Folded Reload fld.d $fa2, $a0, %pc_lo12(B4) ld.d $a0, $sp, 72 # 8-byte Folded Reload @@ -754,7 +738,7 @@ main: # @main fmsub.d $fa0, $fa1, $fa0, $fa2 fdiv.d $fa1, $fa3, $fa4 fld.d $fa2, $s6, %pc_lo12(B6) - fld.d $fa3, $s7, %pc_lo12(B5) + fld.d $fa3, $s5, %pc_lo12(B5) ld.d $a0, $sp, 64 # 8-byte Folded Reload fld.d $fa4, $a0, %pc_lo12(B4) fmul.d $fa5, $fa1, $fa1 @@ -779,7 +763,7 @@ main: # @main fmul.d $fa2, $fs4, $fa2 ld.d $a0, $sp, 120 # 8-byte Folded Reload fld.d $fa4, $a0, %pc_lo12(A6) - fld.d $fa7, $s5, %pc_lo12(A5) + fld.d $fa7, $s7, %pc_lo12(A5) ld.d $a0, $sp, 128 # 8-byte Folded Reload fld.d $ft0, $a0, %pc_lo12(A4) fst.d $fa0, $s1, 96 @@ -845,7 +829,7 @@ main: # @main # %bb.23: # %.lr.ph363 ld.d $a0, $sp, 120 # 8-byte Folded Reload fld.d $fa0, $a0, %pc_lo12(A6) - fld.d $fa1, $s5, %pc_lo12(A5) + fld.d $fa1, $s7, %pc_lo12(A5) ld.d $a0, $sp, 128 # 8-byte Folded Reload fld.d $fa2, $a0, %pc_lo12(A4) fld.d $fa3, $s2, %pc_lo12(A3) @@ -856,7 +840,7 @@ main: # @main ld.d $a0, $sp, 176 # 8-byte Folded Reload fld.d $fa6, $a0, %pc_lo12(one) fld.d $fa7, $s6, %pc_lo12(B6) - fld.d $ft0, $s7, %pc_lo12(B5) + fld.d $ft0, $s5, %pc_lo12(B5) ld.d $a0, $sp, 64 # 8-byte Folded Reload fld.d $ft1, $a0, %pc_lo12(B4) ld.d $a0, $sp, 72 # 8-byte Folded Reload @@ -914,7 +898,7 @@ main: # @main fmsub.d $fa0, $fa1, $fa0, $fa2 ld.d $a0, $sp, 120 # 8-byte Folded Reload fld.d $fa1, $a0, %pc_lo12(A6) - fld.d $fa2, $s5, %pc_lo12(A5) + fld.d $fa2, $s7, %pc_lo12(A5) fdiv.d $fa3, $fa3, $fa4 fmul.d $fa4, $fa3, $fa3 ld.d $a0, $sp, 128 # 8-byte Folded Reload @@ -932,7 +916,7 @@ main: # @main fld.d $fa2, $a0, %pc_lo12(one) fmadd.d $fa1, $fa1, $fa4, $fa5 fld.d $fa5, $s6, %pc_lo12(B6) - fld.d $fa6, $s7, %pc_lo12(B5) + fld.d $fa6, $s5, %pc_lo12(B5) fmadd.d $fa1, $fa1, $fa4, $fa2 fmul.d $fa1, $fa3, $fa1 ld.d $a0, $sp, 64 # 8-byte Folded Reload @@ -963,12 +947,12 @@ main: # @main fst.d $fa1, $s8, %pc_lo12(sa) lu12i.w $a0, -4189 ori $a0, $a0, 2543 - pcalau12i $a1, %pc_hi20(.LCPI0_4) - fld.d $fa3, $a1, %pc_lo12(.LCPI0_4) lu32i.d $a0, 405058 - lu52i.d $a0, $a0, 1022 - ld.d $a1, $sp, 96 # 8-byte Folded Reload - st.d $a0, $a1, %pc_lo12(sb) + lu52i.d $a1, $a0, 1022 + ld.d $a2, $sp, 96 # 8-byte Folded Reload + st.d $a1, $a2, %pc_lo12(sb) + lu52i.d $a0, $a0, -1026 + movgr2fr.d $fa3, $a0 fadd.d $fa1, $fa1, $fa3 ld.d $a0, $sp, 160 # 8-byte Folded Reload fst.d $fa1, $a0, %pc_lo12(sc) @@ -1010,7 +994,7 @@ main: # @main # %bb.26: # %.lr.ph370 ld.d $a0, $sp, 120 # 8-byte Folded Reload fld.d $fa0, $a0, %pc_lo12(A6) - fld.d $fa1, $s5, %pc_lo12(A5) + fld.d $fa1, $s7, %pc_lo12(A5) ld.d $a0, $sp, 128 # 8-byte Folded Reload fld.d $fa2, $a0, %pc_lo12(A4) fld.d $fa3, $s2, %pc_lo12(A3) @@ -1021,7 +1005,7 @@ main: # @main ld.d $a0, $sp, 176 # 8-byte Folded Reload fld.d $fa6, $a0, %pc_lo12(one) fld.d $fa7, $s6, %pc_lo12(B6) - fld.d $ft0, $s7, %pc_lo12(B5) + fld.d $ft0, $s5, %pc_lo12(B5) ld.d $a0, $sp, 64 # 8-byte Folded Reload fld.d $ft1, $a0, %pc_lo12(B4) ld.d $a0, $sp, 72 # 8-byte Folded Reload @@ -1078,7 +1062,7 @@ main: # @main fmsub.d $fa0, $fa1, $fa0, $fa2 ld.d $a0, $sp, 120 # 8-byte Folded Reload fld.d $fa1, $a0, %pc_lo12(A6) - fld.d $fa2, $s5, %pc_lo12(A5) + fld.d $fa2, $s7, %pc_lo12(A5) fdiv.d $fa3, $fa3, $fa4 fmul.d $fa4, $fa3, $fa3 ld.d $a0, $sp, 128 # 8-byte Folded Reload @@ -1096,7 +1080,7 @@ main: # @main fld.d $fa2, $s3, %pc_lo12(one) fmadd.d $fa1, $fa1, $fa4, $fa5 fld.d $fa5, $s6, %pc_lo12(B6) - fld.d $fa6, $s7, %pc_lo12(B5) + fld.d $fa6, $s5, %pc_lo12(B5) fmadd.d $fa1, $fa1, $fa4, $fa2 fmul.d $fa1, $fa3, $fa1 ld.d $a0, $sp, 64 # 8-byte Folded Reload @@ -1147,11 +1131,10 @@ main: # @main lu12i.w $a0, -32999 ori $a0, $a0, 676 lu32i.d $a0, -420543 - pcalau12i $a1, %pc_hi20(.LCPI0_5) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_5) - fld.d $fs5, $s4, 16 lu52i.d $a0, $a0, 1029 + fld.d $fs5, $s4, 16 st.d $a0, $s8, %pc_lo12(sa) + movgr2fr.d $fa0, $a0 fdiv.d $fs3, $fa0, $fs3 move $a0, $fp move $a1, $zero @@ -1242,16 +1225,18 @@ main: # @main movgr2fr.d $fa4, $a0 ffint.d.l $fa4, $fa4 fdiv.d $fa3, $fa4, $fa3 - pcalau12i $a0, %pc_hi20(.LCPI0_6) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_6) ftintrz.l.d $fa3, $fa3 - ld.d $a0, $sp, 176 # 8-byte Folded Reload - fld.d $fa5, $a0, %pc_lo12(one) movfr2gr.d $s0, $fa3 + ld.d $a0, $sp, 8 # 8-byte Folded Reload + lu32i.d $a0, -48333 + lu52i.d $a0, $a0, 1031 + ld.d $a1, $sp, 176 # 8-byte Folded Reload + fld.d $fa3, $a1, %pc_lo12(one) + movgr2fr.d $fa4, $a0 fadd.d $fa1, $fa1, $fa4 ld.d $a0, $sp, 160 # 8-byte Folded Reload fst.d $fa1, $a0, %pc_lo12(sc) - fdiv.d $fa2, $fa5, $fa2 + fdiv.d $fa2, $fa3, $fa2 fst.d $fa2, $s1, 184 fmul.d $fa1, $fa1, $fs7 fmul.d $fa0, $fa0, $fs7 @@ -1289,7 +1274,7 @@ main: # @main blt $s0, $s3, .LBB0_34 # %bb.32: # %.lr.ph384 fld.d $fa0, $s6, %pc_lo12(B6) - fld.d $fa1, $s7, %pc_lo12(B5) + fld.d $fa1, $s5, %pc_lo12(B5) ld.d $a0, $sp, 64 # 8-byte Folded Reload fld.d $fa2, $a0, %pc_lo12(B4) ld.d $a0, $sp, 72 # 8-byte Folded Reload @@ -1302,7 +1287,7 @@ main: # @main fld.d $fa6, $a0, %pc_lo12(one) ld.d $a0, $sp, 120 # 8-byte Folded Reload fld.d $fa7, $a0, %pc_lo12(A6) - fld.d $ft0, $s5, %pc_lo12(A5) + fld.d $ft0, $s7, %pc_lo12(A5) ld.d $a0, $sp, 128 # 8-byte Folded Reload fld.d $ft1, $a0, %pc_lo12(A4) fld.d $ft2, $s2, %pc_lo12(A3) @@ -1360,7 +1345,7 @@ main: # @main fmsub.d $fa0, $fa1, $fa0, $fa2 ld.d $a0, $sp, 120 # 8-byte Folded Reload fld.d $fa1, $a0, %pc_lo12(A6) - fld.d $fa2, $s5, %pc_lo12(A5) + fld.d $fa2, $s7, %pc_lo12(A5) fdiv.d $fa3, $fa3, $fa4 fmul.d $fa4, $fa3, $fa3 ld.d $a0, $sp, 128 # 8-byte Folded Reload @@ -1378,7 +1363,7 @@ main: # @main fld.d $fa2, $fp, %pc_lo12(one) fmadd.d $fa1, $fa1, $fa4, $fa5 fld.d $fa5, $s6, %pc_lo12(B6) - fld.d $fa6, $s7, %pc_lo12(B5) + fld.d $fa6, $s5, %pc_lo12(B5) fmadd.d $fa1, $fa1, $fa4, $fa2 fmul.d $fa1, $fa3, $fa1 ld.d $a0, $sp, 64 # 8-byte Folded Reload @@ -1409,12 +1394,12 @@ main: # @main fst.d $fa1, $s8, %pc_lo12(sa) lu12i.w $a0, -349526 ori $a0, $a0, 2731 - pcalau12i $a1, %pc_hi20(.LCPI0_7) - fld.d $fa3, $a1, %pc_lo12(.LCPI0_7) lu32i.d $a0, 174762 - lu52i.d $a0, $a0, 1021 - ld.d $a1, $sp, 96 # 8-byte Folded Reload - st.d $a0, $a1, %pc_lo12(sb) + lu52i.d $a1, $a0, 1021 + ld.d $a2, $sp, 96 # 8-byte Folded Reload + st.d $a1, $a2, %pc_lo12(sb) + lu52i.d $a0, $a0, -1027 + movgr2fr.d $fa3, $a0 fadd.d $fa1, $fa1, $fa3 ld.d $a0, $sp, 160 # 8-byte Folded Reload fst.d $fa1, $a0, %pc_lo12(sc) @@ -1435,50 +1420,58 @@ main: # @main fld.d $fa1, $s1, 48 fld.d $fa2, $s1, 40 fld.d $fa3, $s1, 72 - pcalau12i $a0, %pc_hi20(.LCPI0_8) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_8) fsub.d $fa1, $fa1, $fa2 fmadd.d $fa0, $fa0, $fa1, $fa3 - fdiv.d $fa0, $fa0, $fa4 - fld.d $fa1, $fp, %pc_lo12(one) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -393216 + lu52i.d $a1, $a1, 1028 + movgr2fr.d $fa1, $a1 + fld.d $fa2, $fp, %pc_lo12(one) + fdiv.d $fa0, $fa0, $fa1 + fld.d $fa1, $s1, 16 fst.d $fa0, $s1, 216 - fld.d $fa2, $s1, 16 - fld.d $fa4, $s1, 96 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa2, $fa0 fst.d $fa0, $s1, 224 - fadd.d $fa0, $fa3, $fa2 - fadd.d $fa0, $fa0, $fa4 - fld.d $fa2, $s1, 120 + fadd.d $fa0, $fa3, $fa1 + fld.d $fa1, $s1, 96 + fld.d $fa4, $s1, 120 fld.d $fa5, $s1, 144 - ld.d $a0, $sp, 56 # 8-byte Folded Reload - fld.d $fa6, $a0, %pc_lo12(four) + ld.d $a1, $sp, 56 # 8-byte Folded Reload + fld.d $fa6, $a1, %pc_lo12(four) fld.d $fa7, $s1, 168 - pcalau12i $a0, %pc_hi20(.LCPI0_9) - fld.d $ft0, $a0, %pc_lo12(.LCPI0_9) - fadd.d $fa0, $fa0, $fa2 + fadd.d $fa0, $fa0, $fa1 + fadd.d $fa0, $fa0, $fa4 fadd.d $fa0, $fa0, $fa5 - fmadd.d $fa2, $fa6, $fa7, $fa0 - fdiv.d $fa2, $fa2, $ft0 - fst.d $fa2, $s1, 232 - fdiv.d $fa2, $fa1, $fa2 + fmadd.d $fa4, $fa6, $fa7, $fa0 + ori $a1, $zero, 0 + lu32i.d $a1, 196608 + lu52i.d $a1, $a1, 1030 + movgr2fr.d $fa6, $a1 + fdiv.d $fa4, $fa4, $fa6 + fst.d $fa4, $s1, 232 fld.d $fa6, $s1, 192 - pcalau12i $a0, %pc_hi20(.LCPI0_10) - fld.d $ft0, $a0, %pc_lo12(.LCPI0_10) - fst.d $fa2, $s1, 240 + fdiv.d $fa4, $fa2, $fa4 + fst.d $fa4, $s1, 240 fadd.d $fa0, $fa0, $fa7 fadd.d $fa0, $fa0, $fa6 - fdiv.d $fa0, $fa0, $ft0 + ori $a1, $zero, 0 + lu32i.d $a1, 147456 + lu52i.d $a1, $a1, 1030 + movgr2fr.d $fa4, $a1 + fdiv.d $fa0, $fa0, $fa4 fst.d $fa0, $s1, 248 - fdiv.d $fa0, $fa1, $fa0 - fst.d $fa0, $s1, 256 - pcalau12i $a0, %pc_hi20(.LCPI0_11) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_11) - fadd.d $fa2, $fa3, $fa4 - fadd.d $fa2, $fa2, $fa5 - fadd.d $fa2, $fa2, $fa6 fdiv.d $fa0, $fa2, $fa0 + fst.d $fa0, $s1, 256 + fadd.d $fa0, $fa3, $fa1 + fadd.d $fa0, $fa0, $fa5 + fadd.d $fa0, $fa0, $fa6 + lu32i.d $a0, 442368 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa1, $a0 + fdiv.d $fa0, $fa0, $fa1 fst.d $fa0, $s1, 264 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa2, $fa0 fst.d $fa0, $s1, 272 ori $a0, $zero, 10 pcaddu18i $ra, %call36(putchar) @@ -1548,12 +1541,7 @@ main: # @main .Lfunc_end0: .size main, .Lfunc_end0-main # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function dtime -.LCPI1_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl dtime + .globl dtime # -- Begin function dtime .p2align 5 .type dtime,@function dtime: # @dtime @@ -1572,14 +1560,17 @@ dtime: # @dtime pcaddu18i $ra, %call36(gettimeofday) jirl $ra, $ra, 0 ld.d $a0, $s0, 0 + ld.d $a1, $s0, 8 movgr2fr.d $fa0, $a0 - ld.d $a0, $s0, 8 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_0) ffint.d.l $fa0, $fa0 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 movgr2fr.d $fa2, $a0 - ffint.d.l $fa2, $fa2 - fmadd.d $fa0, $fa2, $fa1, $fa0 + fmadd.d $fa0, $fa1, $fa2, $fa0 fst.d $fa0, $fp, 16 fsub.d $fa0, $fa0, $fs0 fst.d $fa0, $fp, 8 diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/fp-convert.dir/fp-convert.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/fp-convert.dir/fp-convert.s index 2f818ed9..ab8f1a76 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/fp-convert.dir/fp-convert.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/fp-convert.dir/fp-convert.s @@ -24,15 +24,9 @@ loop: # @loop .Lfunc_end0: .size loop, .Lfunc_end0-loop # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI1_0: - .word 0x3dcccccd # float 0.100000001 -.LCPI1_1: - .word 0x3e4ccccd # float 0.200000003 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI1_2: + .p2align 4, 0x0 # -- Begin function main +.LCPI1_0: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -56,12 +50,14 @@ main: # @main ori $a1, $a1, 3277 lu12i.w $a2, 104857 ori $a2, $a2, 2458 + lu12i.w $a3, 253132 + ori $a3, $a3, 3277 + movgr2fr.w $fa3, $a3 + lu12i.w $a3, 255180 + ori $a3, $a3, 3277 + movgr2fr.w $fa4, $a3 pcalau12i $a3, %pc_hi20(.LCPI1_0) - fld.s $fa3, $a3, %pc_lo12(.LCPI1_0) - pcalau12i $a3, %pc_hi20(.LCPI1_1) - fld.s $fa4, $a3, %pc_lo12(.LCPI1_1) - pcalau12i $a3, %pc_hi20(.LCPI1_2) - vld $vr5, $a3, %pc_lo12(.LCPI1_2) + vld $vr5, $a3, %pc_lo12(.LCPI1_0) lu12i.w $a3, -2 lu12i.w $a4, 2 ori $a4, $a4, 8 diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/himenobmtxpa.dir/himenobmtxpa.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/himenobmtxpa.dir/himenobmtxpa.s index 3ae1f31d..ce1f153f 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/himenobmtxpa.dir/himenobmtxpa.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/himenobmtxpa.dir/himenobmtxpa.s @@ -16,10 +16,6 @@ .word 64 # 0x40 .word 64 # 0x40 .word 128 # 0x80 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI0_3: - .word 0x45781000 # float 3969 .text .globl main .p2align 5 @@ -137,56 +133,56 @@ main: # @main addi.d $s8, $a1, 256 st.d $s1, $s5, 0 addi.d $a1, $s6, 256 + lu12i.w $a2, 284545 + movgr2fr.w $fa0, $a2 lu12i.w $s6, 8 .p2align 4, , 16 .LBB0_1: # %.preheader25.us.us.i # =>This Loop Header: Depth=1 # Child Loop BB0_2 Depth 2 - pcalau12i $a2, %pc_hi20(.LCPI0_3) - fld.s $fa0, $a2, %pc_lo12(.LCPI0_3) mul.d $a2, $a0, $a0 movgr2fr.d $fa1, $a2 ffint.s.l $fa1, $fa1 - fdiv.s $fa0, $fa1, $fa0 - vreplvei.w $vr0, $vr0, 0 + fdiv.s $fa1, $fa1, $fa0 + vreplvei.w $vr1, $vr1, 0 move $a2, $a1 ori $a3, $zero, 64 .p2align 4, , 16 .LBB0_2: # %.preheader.us.us.us.i # Parent Loop BB0_1 Depth=1 # => This Inner Loop Header: Depth=2 - vst $vr0, $a2, -256 - vst $vr0, $a2, -240 - vst $vr0, $a2, -224 - vst $vr0, $a2, -208 - vst $vr0, $a2, -192 - vst $vr0, $a2, -176 - vst $vr0, $a2, -160 - vst $vr0, $a2, -144 - vst $vr0, $a2, -128 - vst $vr0, $a2, -112 - vst $vr0, $a2, -96 - vst $vr0, $a2, -80 - vst $vr0, $a2, -64 - vst $vr0, $a2, -48 - vst $vr0, $a2, -32 - vst $vr0, $a2, -16 - vst $vr0, $a2, 0 - vst $vr0, $a2, 16 - vst $vr0, $a2, 32 - vst $vr0, $a2, 48 - vst $vr0, $a2, 64 - vst $vr0, $a2, 80 - vst $vr0, $a2, 96 - vst $vr0, $a2, 112 - vst $vr0, $a2, 128 - vst $vr0, $a2, 144 - vst $vr0, $a2, 160 - vst $vr0, $a2, 176 - vst $vr0, $a2, 192 - vst $vr0, $a2, 208 - vst $vr0, $a2, 224 - vst $vr0, $a2, 240 + vst $vr1, $a2, -256 + vst $vr1, $a2, -240 + vst $vr1, $a2, -224 + vst $vr1, $a2, -208 + vst $vr1, $a2, -192 + vst $vr1, $a2, -176 + vst $vr1, $a2, -160 + vst $vr1, $a2, -144 + vst $vr1, $a2, -128 + vst $vr1, $a2, -112 + vst $vr1, $a2, -96 + vst $vr1, $a2, -80 + vst $vr1, $a2, -64 + vst $vr1, $a2, -48 + vst $vr1, $a2, -32 + vst $vr1, $a2, -16 + vst $vr1, $a2, 0 + vst $vr1, $a2, 16 + vst $vr1, $a2, 32 + vst $vr1, $a2, 48 + vst $vr1, $a2, 64 + vst $vr1, $a2, 80 + vst $vr1, $a2, 96 + vst $vr1, $a2, 112 + vst $vr1, $a2, 128 + vst $vr1, $a2, 144 + vst $vr1, $a2, 160 + vst $vr1, $a2, 176 + vst $vr1, $a2, 192 + vst $vr1, $a2, 208 + vst $vr1, $a2, 224 + vst $vr1, $a2, 240 addi.d $a3, $a3, -1 addi.d $a2, $a2, 512 bnez $a3, .LBB0_2 @@ -1628,12 +1624,7 @@ clearMat: # @clearMat .Lfunc_end5: .size clearMat, .Lfunc_end5-clearMat # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fflop -.LCPI6_0: - .dword 0x4041000000000000 # double 34 - .text - .globl fflop + .globl fflop # -- Begin function fflop .p2align 5 .type fflop,@function fflop: # @fflop @@ -1646,30 +1637,30 @@ fflop: # @fflop ffint.d.w $fa1, $fa1 fmul.d $fa0, $fa1, $fa0 addi.d $a0, $a0, -2 - pcalau12i $a1, %pc_hi20(.LCPI6_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI6_0) - movgr2fr.w $fa2, $a0 - ffint.d.w $fa2, $fa2 - fmul.d $fa0, $fa0, $fa2 + movgr2fr.w $fa1, $a0 + ffint.d.w $fa1, $fa1 + fmul.d $fa0, $fa0, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, 65536 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 ret .Lfunc_end6: .size fflop, .Lfunc_end6-fflop # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function mflops -.LCPI7_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl mflops + .globl mflops # -- Begin function mflops .p2align 5 .type mflops,@function mflops: # @mflops # %bb.0: - pcalau12i $a1, %pc_hi20(.LCPI7_0) - fld.d $fa2, $a1, %pc_lo12(.LCPI7_0) fdiv.d $fa0, $fa1, $fa0 - fmul.d $fa0, $fa0, $fa2 + lu12i.w $a1, -390306 + ori $a1, $a1, 3469 + lu32i.d $a1, 50935 + lu52i.d $a1, $a1, 1003 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 movgr2fr.w $fa1, $a0 ffint.d.w $fa1, $fa1 fmul.d $fa0, $fa0, $fa1 @@ -1824,12 +1815,8 @@ set_param: # @set_param .word .LBB8_17-.LJTI8_0 .word .LBB8_8-.LJTI8_0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function second -.LCPI9_0: - .dword 0x412e848000000000 # double 1.0E+6 .text - .globl second + .globl second # -- Begin function second .p2align 5 .type second,@function second: # @second @@ -1840,32 +1827,34 @@ second: # @second move $a1, $zero pcaddu18i $ra, %call36(gettimeofday) jirl $ra, $ra, 0 - pcalau12i $a1, %pc_hi20(second.base_sec) - ld.w $a3, $a1, %pc_lo12(second.base_sec) - pcalau12i $a2, %pc_hi20(second.base_usec) - ld.w $a0, $a2, %pc_lo12(second.base_usec) - ld.d $a4, $sp, 8 - or $a5, $a3, $a0 + pcalau12i $a3, %pc_hi20(second.base_sec) + ld.w $a1, $a3, %pc_lo12(second.base_sec) + pcalau12i $a4, %pc_hi20(second.base_usec) + ld.w $a0, $a4, %pc_lo12(second.base_usec) + ld.d $a2, $sp, 8 + or $a5, $a1, $a0 bnez $a5, .LBB9_2 # %bb.1: ld.d $a0, $sp, 16 - st.w $a4, $a1, %pc_lo12(second.base_sec) - st.w $a0, $a2, %pc_lo12(second.base_usec) + st.w $a2, $a3, %pc_lo12(second.base_sec) + st.w $a0, $a4, %pc_lo12(second.base_usec) movgr2fr.d $fa0, $zero ld.d $ra, $sp, 24 # 8-byte Folded Reload addi.d $sp, $sp, 32 ret .LBB9_2: - sub.d $a1, $a4, $a3 + ld.d $a3, $sp, 16 + sub.d $a1, $a2, $a1 movgr2fr.d $fa0, $a1 - ld.d $a1, $sp, 16 ffint.d.l $fa0, $fa0 - pcalau12i $a2, %pc_hi20(.LCPI9_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI9_0) - sub.d $a0, $a1, $a0 + sub.d $a0, $a3, $a0 + movgr2fr.d $fa1, $a0 + ffint.d.l $fa1, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 movgr2fr.d $fa2, $a0 - ffint.d.l $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + fdiv.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa1, $fa0 ld.d $ra, $sp, 24 # 8-byte Folded Reload addi.d $sp, $sp, 32 diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/mandel-2.dir/mandel-2.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/mandel-2.dir/mandel-2.s index 53dae09d..414fe5df 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/mandel-2.dir/mandel-2.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/mandel-2.dir/mandel-2.s @@ -100,14 +100,7 @@ loop: # @loop .Lfunc_end2: .size loop, .Lfunc_end2-loop # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI3_0: - .dword 0xc043800000000000 # double -39 -.LCPI3_1: - .dword 0x4044000000000000 # double 40 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -130,10 +123,14 @@ main: # @main fst.d $fs6, $sp, 24 # 8-byte Folded Spill fst.d $fs7, $sp, 16 # 8-byte Folded Spill addi.d $fp, $zero, -39 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI3_0) - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.d $fs1, $a0, %pc_lo12(.LCPI3_1) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, 229376 + lu52i.d $a1, $a1, -1020 + movgr2fr.d $fs0, $a1 + lu32i.d $a0, 262144 + lu52i.d $a0, $a0, 1028 + movgr2fr.d $fs1, $a0 movgr2fr.d $fs2, $zero pcalau12i $s0, %pc_hi20(max_i) pcalau12i $a0, %pc_hi20(.L.str.1) diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/mandel.dir/mandel.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/mandel.dir/mandel.s index f909b4f6..72dfe53c 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/mandel.dir/mandel.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/mandel.dir/mandel.s @@ -17,14 +17,7 @@ emit: # @emit .Lfunc_end0: .size emit, .Lfunc_end0-emit # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function mandel -.LCPI1_0: - .dword 0x40b3880000000000 # double 5000 -.LCPI1_1: - .dword 0xc0b3880000000000 # double -5000 - .text - .globl mandel + .globl mandel # -- Begin function mandel .p2align 5 .type mandel,@function mandel: # @mandel @@ -45,10 +38,12 @@ mandel: # @mandel fst.d $fs5, $sp, 24 # 8-byte Folded Spill fst.d $fs6, $sp, 16 # 8-byte Folded Spill fst.d $fs7, $sp, 8 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fs2, $a0, %pc_lo12(.LCPI1_0) - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fs3, $a0, %pc_lo12(.LCPI1_1) + ori $a0, $zero, 0 + lu32i.d $a0, 231424 + lu52i.d $a1, $a0, 1035 + movgr2fr.d $fs2, $a1 + lu52i.d $a0, $a0, -1013 + movgr2fr.d $fs3, $a0 movgr2fr.d $fs4, $zero ori $fp, $zero, 9 pcalau12i $a0, %pc_hi20(accum) diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/oourafft.dir/oourafft.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/oourafft.dir/oourafft.s index 23a3f3e7..2b2e7761 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/oourafft.dir/oourafft.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/oourafft.dir/oourafft.s @@ -4,18 +4,6 @@ .LCPI0_0: .dword 0x3ff0000000000000 # double 1 .dword 0x0000000000000000 # double 0 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0x3f6921fb54442d18 # double 0.0030679615757712823 -.LCPI0_2: - .dword 0x3ed02e85c0898b71 # double 3.8580246913580248E-6 -.LCPI0_3: - .dword 0xbf50000000000000 # double -9.765625E-4 -.LCPI0_4: - .dword 0x3ddb7cdfd9d7bdbb # double 1.0E-10 -.LCPI0_5: - .dword 0x3e112e0be826d695 # double 1.0000000000000001E-9 .text .globl main .p2align 5 @@ -68,10 +56,13 @@ main: # @main vstx $vr0, $s0, $a0 addi.d $a0, $s0, 2047 addi.d $s2, $a0, 2041 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI0_1) addi.d $s3, $s0, 24 ori $s4, $zero, 2 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1014 + movgr2fr.d $fs2, $a0 ori $s5, $zero, 254 .p2align 4, , 16 .LBB0_1: # %.lr.ph.i @@ -126,10 +117,13 @@ main: # @main ori $s6, $a2, 1525 lu12i.w $a2, 8285 ori $s7, $a2, 185 - pcalau12i $a2, %pc_hi20(.LCPI0_2) - fld.d $fs0, $a2, %pc_lo12(.LCPI0_2) lu12i.w $a2, 63 ori $s8, $a2, 1152 + lu12i.w $a2, -259944 + ori $a2, $a2, 2929 + lu32i.d $a2, 11909 + lu52i.d $a2, $a2, 1005 + movgr2fr.d $fs0, $a2 ori $a2, $zero, 2048 move $a3, $s3 .p2align 4, , 16 @@ -170,11 +164,11 @@ main: # @main move $a4, $s0 pcaddu18i $ra, %call36(cdft) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_3) move $a2, $zero movgr2fr.d $fa0, $zero ori $a0, $zero, 1 + lu52i.d $a1, $zero, -1035 + movgr2fr.d $fa1, $a1 move $a1, $s3 .p2align 4, , 16 .LBB0_5: # =>This Inner Loop Header: Depth=1 @@ -199,8 +193,11 @@ main: # @main addi.d $a1, $a1, 8 bne $a3, $s4, .LBB0_5 # %bb.6: # %errorcheck.exit - pcalau12i $a0, %pc_hi20(.LCPI0_4) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_4) + lu12i.w $a0, -156293 + ori $a0, $a0, 3515 + lu32i.d $a0, -295713 + lu52i.d $a0, $a0, 989 + movgr2fr.d $fa1, $a0 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB0_20 # %bb.7: @@ -352,9 +349,12 @@ main: # @main move $a1, $zero pcaddu18i $ra, %call36(gettimeofday) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_5) lu12i.w $s2, -4 + lu12i.w $a0, -97683 + ori $a0, $a0, 1685 + lu32i.d $a0, 77323 + lu52i.d $a0, $a0, 993 + movgr2fr.d $fs0, $a0 movgr2fr.d $fs1, $zero pcalau12i $a0, %pc_hi20(.L.str.1) addi.d $s5, $a0, %pc_lo12(.L.str.1) @@ -420,12 +420,7 @@ main: # @main .Lfunc_end0: .size main, .Lfunc_end0-main # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function get_time -.LCPI1_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 - .text - .globl get_time + .globl get_time # -- Begin function get_time .p2align 5 .type get_time,@function get_time: # @get_time @@ -437,27 +432,26 @@ get_time: # @get_time pcaddu18i $ra, %call36(gettimeofday) jirl $ra, $ra, 0 ld.d $a0, $sp, 8 + ld.d $a1, $sp, 16 movgr2fr.d $fa0, $a0 - ld.d $a0, $sp, 16 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_0) ffint.d.l $fa0, $fa0 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + lu12i.w $a0, -390306 + ori $a0, $a0, 3469 + lu32i.d $a0, 50935 + lu52i.d $a0, $a0, 1003 movgr2fr.d $fa2, $a0 - ffint.d.l $fa2, $fa2 - fmadd.d $fa0, $fa2, $fa1, $fa0 + fmadd.d $fa0, $fa1, $fa2, $fa0 ld.d $ra, $sp, 24 # 8-byte Folded Reload addi.d $sp, $sp, 32 ret .Lfunc_end1: .size get_time, .Lfunc_end1-get_time # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function makewt -.LCPI2_0: - .dword 0x3fe921fb54442d18 # double 0.78539816339744828 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI2_1: + .p2align 4, 0x0 # -- Begin function makewt +.LCPI2_0: .dword 0x3ff0000000000000 # double 1 .dword 0x0000000000000000 # double 0 .text @@ -486,15 +480,18 @@ makewt: # @makewt move $s0, $a1 move $s1, $a0 bstrpick.d $s2, $a0, 31, 1 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI2_0) - pcalau12i $a0, %pc_hi20(.LCPI2_1) - vld $vr1, $a0, %pc_lo12(.LCPI2_1) - movgr2fr.d $fa2, $s2 - ffint.d.l $fa2, $fa2 - fdiv.d $fs2, $fa0, $fa2 + movgr2fr.d $fa0, $s2 + ffint.d.l $fa0, $fa0 + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + pcalau12i $a1, %pc_hi20(.LCPI2_0) + vld $vr1, $a1, %pc_lo12(.LCPI2_0) + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 + fdiv.d $fs2, $fa2, $fa0 vst $vr1, $a2, 0 - fmul.d $fa0, $fs2, $fa2 + fmul.d $fa0, $fs2, $fa0 pcaddu18i $ra, %call36(cos) jirl $ra, $ra, 0 alsl.d $a0, $s2, $fp, 3 @@ -570,12 +567,7 @@ makewt: # @makewt .Lfunc_end2: .size makewt, .Lfunc_end2-makewt # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function putdata -.LCPI3_0: - .dword 0x3ed02e85c0898b71 # double 3.8580246913580248E-6 - .text - .globl putdata + .globl putdata # -- Begin function putdata .p2align 5 .type putdata,@function putdata: # @putdata @@ -591,9 +583,12 @@ putdata: # @putdata lu12i.w $a4, 13 ori $a4, $a4, 1525 lu12i.w $a5, 8285 - pcalau12i $a6, %pc_hi20(.LCPI3_0) - fld.d $fa0, $a6, %pc_lo12(.LCPI3_0) ori $a5, $a5, 185 + lu12i.w $a6, -259944 + ori $a6, $a6, 2929 + lu32i.d $a6, 11909 + lu52i.d $a6, $a6, 1005 + movgr2fr.d $fa0, $a6 lu12i.w $a6, 63 ori $a6, $a6, 1152 .p2align 4, , 16 @@ -1048,12 +1043,7 @@ cdft: # @cdft .Lfunc_end4: .size cdft, .Lfunc_end4-cdft # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function errorcheck -.LCPI5_0: - .dword 0x3ed02e85c0898b71 # double 3.8580246913580248E-6 - .text - .globl errorcheck + .globl errorcheck # -- Begin function errorcheck .p2align 5 .type errorcheck,@function errorcheck: # @errorcheck @@ -1074,9 +1064,12 @@ errorcheck: # @errorcheck lu12i.w $a4, 13 ori $a4, $a4, 1525 lu12i.w $a5, 8285 - pcalau12i $a6, %pc_hi20(.LCPI5_0) - fld.d $fa2, $a6, %pc_lo12(.LCPI5_0) ori $a5, $a5, 185 + lu12i.w $a6, -259944 + ori $a6, $a6, 2929 + lu32i.d $a6, 11909 + lu52i.d $a6, $a6, 1005 + movgr2fr.d $fa2, $a6 lu12i.w $a6, 63 ori $a6, $a6, 1152 .p2align 4, , 16 diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/perlin.dir/perlin.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/perlin.dir/perlin.s index 6e5f2174..3bac0f98 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/perlin.dir/perlin.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/perlin.dir/perlin.s @@ -1,27 +1,7 @@ .file "perlin.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0xc0c62c48f5c28f5c # double -11352.57 -.LCPI0_1: - .dword 0xc075a1f9db22d0e5 # double -346.12349999999998 -.LCPI0_2: - .dword 0x40373c083126e979 # double 23.234500000000001 -.LCPI0_4: - .dword 0x3fed2c62745753ff # double 0.91166804049373684 -.LCPI0_5: - .dword 0x400399999999999a # double 2.4500000000000002 -.LCPI0_6: - .dword 0x3ff6eb851eb851ec # double 1.4325000000000001 -.LCPI0_7: - .dword 0x405f07ef9db22d0e # double 124.124 -.LCPI0_8: - .dword 0x3fbf9db22d0e5604 # double 0.1235 -.LCPI0_9: - .dword 0x40d702647ae147ae # double 23561.57 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI0_3: + .p2align 4, 0x0 # -- Begin function main +.LCPI0_0: .dword 0x3fe87ae147ae1400 # double 0.76499999999998636 .dword 0xbfce147ae147b000 # double -0.23500000000001364 .text @@ -30,17 +10,13 @@ .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -96 - st.d $ra, $sp, 88 # 8-byte Folded Spill - st.d $fp, $sp, 80 # 8-byte Folded Spill - st.d $s0, $sp, 72 # 8-byte Folded Spill - fst.d $fs0, $sp, 64 # 8-byte Folded Spill - fst.d $fs1, $sp, 56 # 8-byte Folded Spill - fst.d $fs2, $sp, 48 # 8-byte Folded Spill - fst.d $fs3, $sp, 40 # 8-byte Folded Spill - fst.d $fs4, $sp, 32 # 8-byte Folded Spill - fst.d $fs5, $sp, 24 # 8-byte Folded Spill - fst.d $fs6, $sp, 16 # 8-byte Folded Spill + addi.d $sp, $sp, -48 + st.d $ra, $sp, 40 # 8-byte Folded Spill + st.d $fp, $sp, 32 # 8-byte Folded Spill + st.d $s0, $sp, 24 # 8-byte Folded Spill + fst.d $fs0, $sp, 16 # 8-byte Folded Spill + fst.d $fs1, $sp, 8 # 8-byte Folded Spill + fst.d $fs2, $sp, 0 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(p) addi.d $fp, $a0, %pc_lo12(p) pcalau12i $a0, %pc_hi20(permutation) @@ -56,260 +32,287 @@ main: # @main pcaddu18i $ra, %call36(memcpy) jirl $ra, $ra, 0 movgr2fr.d $fa0, $zero - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, -41944 + ori $a0, $a0, 3932 + lu32i.d $a0, 404552 + lu52i.d $a0, $a0, -1012 + movgr2fr.d $fa1, $a0 vldi $vr2, -850 vldi $vr3, -1000 vldi $vr4, -988 vldi $vr5, -784 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa6, $a0, %pc_lo12(.LCPI0_1) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa7, $a0, %pc_lo12(.LCPI0_2) + lu12i.w $a0, -150995 + ori $a0, $a0, 229 + lu32i.d $a0, 369145 + lu52i.d $a0, $a0, -1017 + movgr2fr.d $fa6, $a0 + lu12i.w $a0, 201326 + ori $a0, $a0, 2425 + lu32i.d $a0, 474120 + lu52i.d $a0, $a0, 1027 + movgr2fr.d $fa7, $a0 vrepli.w $vr8, 15 - pcalau12i $a0, %pc_hi20(.LCPI0_3) - vld $vr9, $a0, %pc_lo12(.LCPI0_3) - pcalau12i $a0, %pc_hi20(.LCPI0_4) - fld.d $ft2, $a0, %pc_lo12(.LCPI0_4) - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.d $ft3, $a0, %pc_lo12(.LCPI0_5) - pcalau12i $a0, %pc_hi20(.LCPI0_6) - fld.d $ft4, $a0, %pc_lo12(.LCPI0_6) - pcalau12i $a0, %pc_hi20(.LCPI0_7) - fld.d $ft5, $a0, %pc_lo12(.LCPI0_7) - pcalau12i $a0, %pc_hi20(.LCPI0_8) - fld.d $ft6, $a0, %pc_lo12(.LCPI0_8) - pcalau12i $a0, %pc_hi20(.LCPI0_9) - fld.d $ft7, $a0, %pc_lo12(.LCPI0_9) - vrepli.w $vr16, 13 - vrepli.w $vr17, 1 - vrepli.w $vr18, 2 + vrepli.w $vr9, 13 + pcalau12i $a0, %pc_hi20(.LCPI0_0) + vld $vr10, $a0, %pc_lo12(.LCPI0_0) + vrepli.w $vr11, 1 + vrepli.w $vr12, 2 + lu12i.w $a0, 476533 + ori $a0, $a0, 1023 + lu32i.d $a0, -185246 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $ft5, $a0 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, 235929 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $ft6, $a0 + lu12i.w $a0, 125829 + ori $a0, $a0, 492 + lu32i.d $a0, 453509 + lu52i.d $a0, $a0, 1023 + lu12i.w $a1, -402654 + ori $a1, $a1, 3342 + lu32i.d $a1, -63505 + lu52i.d $a1, $a1, 1029 + lu12i.w $a2, 184549 + ori $a2, $a2, 1540 + lu32i.d $a2, -25166 + lu52i.d $a2, $a2, 1019 + lu12i.w $a3, 503316 + ori $a3, $a3, 1966 + lu32i.d $a3, 459364 + lu52i.d $a3, $a3, 1037 b .LBB0_2 .p2align 4, , 16 .LBB0_1: # in Loop: Header=BB0_2 Depth=1 - fadd.d $fa1, $fa1, $ft6 + movgr2fr.d $ft7, $a2 + fadd.d $fa1, $fa1, $ft7 + movgr2fr.d $ft7, $a3 fcmp.clt.d $fcc0, $fa1, $ft7 bceqz $fcc0, .LBB0_7 .LBB0_2: # %.preheader14 # =>This Loop Header: Depth=1 # Child Loop BB0_4 Depth 2 # Child Loop BB0_6 Depth 3 - vreplvei.d $vr19, $vr1, 0 - vfrintrm.d $vr19, $vr19 - ftintrz.w.d $ft12, $ft11 - movfr2gr.s $a0, $ft12 - andi $a0, $a0, 255 - fsub.d $ft13, $fa1, $ft11 - fmul.d $ft11, $ft13, $ft13 - fmul.d $ft11, $ft13, $ft11 - fmadd.d $ft12, $ft13, $fa3, $fa2 - fmadd.d $ft12, $ft13, $ft12, $fa4 - fmul.d $ft11, $ft11, $ft12 - alsl.d $a0, $a0, $fp, 2 - fadd.d $ft12, $ft13, $fa5 - vreplvei.d $vr19, $vr19, 0 - vreplvei.d $vr20, $vr20, 0 - vreplvei.d $vr21, $vr21, 0 - fmov.d $ft14, $fa6 + vreplvei.d $vr15, $vr1, 0 + vfrintrm.d $vr15, $vr15 + ftintrz.w.d $ft8, $ft7 + movfr2gr.s $a4, $ft8 + andi $a4, $a4, 255 + fsub.d $ft9, $fa1, $ft7 + fmul.d $ft7, $ft9, $ft9 + fmul.d $ft7, $ft9, $ft7 + fmadd.d $ft8, $ft9, $fa3, $fa2 + fmadd.d $ft8, $ft9, $ft8, $fa4 + fmul.d $ft7, $ft7, $ft8 + alsl.d $a4, $a4, $fp, 2 + fadd.d $ft8, $ft9, $fa5 + vreplvei.d $vr15, $vr15, 0 + vreplvei.d $vr16, $vr16, 0 + vreplvei.d $vr17, $vr17, 0 + fmov.d $ft10, $fa6 b .LBB0_4 .p2align 4, , 16 .LBB0_3: # %._crit_edge # in Loop: Header=BB0_4 Depth=2 - fadd.d $ft14, $ft14, $ft4 - fcmp.clt.d $fcc0, $ft14, $ft5 + movgr2fr.d $ft11, $a0 + fadd.d $ft10, $ft10, $ft11 + movgr2fr.d $ft11, $a1 + fcmp.clt.d $fcc0, $ft10, $ft11 bceqz $fcc0, .LBB0_1 .LBB0_4: # %.preheader # Parent Loop BB0_2 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB0_6 Depth 3 - fcmp.cule.d $fcc0, $fa7, $ft14 + fcmp.cule.d $fcc0, $fa7, $ft10 bcnez $fcc0, .LBB0_3 # %bb.5: # %.lr.ph # in Loop: Header=BB0_4 Depth=2 - ld.w $a1, $a0, 0 - ld.w $a2, $a0, 4 + ld.w $a5, $a4, 0 + ld.w $a6, $a4, 4 .p2align 4, , 16 .LBB0_6: # Parent Loop BB0_2 Depth=1 # Parent Loop BB0_4 Depth=2 # => This Inner Loop Header: Depth=3 - vreplvei.d $vr23, $vr22, 0 - vfrintrm.d $vr23, $vr23 - ftintrz.w.d $fs0, $ft15 - movfr2gr.s $a3, $fs0 - andi $a3, $a3, 255 - fsub.d $fs1, $ft14, $ft15 - fmul.d $ft15, $fs1, $fs1 - fmul.d $ft15, $fs1, $ft15 - fmadd.d $fs0, $fs1, $fa3, $fa2 - fmadd.d $fs0, $fs1, $fs0, $fa4 - fmul.d $ft15, $ft15, $fs0 - add.w $a4, $a1, $a3 - alsl.d $a5, $a4, $fp, 2 - slli.d $a4, $a4, 2 - ldx.w $a4, $fp, $a4 - ld.w $a5, $a5, 4 - add.w $a3, $a2, $a3 - slli.d $a6, $a3, 2 - ldx.w $a6, $fp, $a6 - alsl.d $a3, $a3, $fp, 2 - ld.w $a3, $a3, 4 - alsl.d $a4, $a4, $fp, 2 - alsl.d $a6, $a6, $fp, 2 - ld.d $a7, $a4, 396 - alsl.d $a4, $a5, $fp, 2 - fadd.d $fs0, $fs1, $fa5 - alsl.d $a3, $a3, $fp, 2 - vinsgr2vr.d $vr26, $a7, 0 - vand.v $vr27, $vr26, $vr8 - vslti.wu $vr28, $vr27, 4 - vshuf4i.w $vr28, $vr28, 16 - vslli.d $vr28, $vr28, 32 - vsrai.d $vr28, $vr28, 32 - vand.v $vr29, $vr26, $vr16 - vseqi.w $vr29, $vr29, 12 - vshuf4i.w $vr29, $vr29, 16 - vslli.d $vr29, $vr29, 32 - vsrai.d $vr29, $vr29, 32 - vbitsel.v $vr29, $vr9, $vr21, $vr29 - vreplvei.d $vr25, $vr25, 0 - vbitsel.v $vr28, $vr29, $vr25, $vr28 - vslti.wu $vr27, $vr27, 8 - vshuf4i.w $vr27, $vr27, 16 - vslli.d $vr27, $vr27, 32 - vsrai.d $vr27, $vr27, 32 - vbitsel.v $vr27, $vr25, $vr21, $vr27 - vand.v $vr29, $vr26, $vr17 - vseqi.w $vr29, $vr29, 0 - vshuf4i.w $vr29, $vr29, 16 - vslli.d $vr29, $vr29, 32 - vsrai.d $vr29, $vr29, 32 - vbitrevi.d $vr30, $vr27, 63 - vbitsel.v $vr27, $vr30, $vr27, $vr29 - vand.v $vr26, $vr26, $vr18 - vseqi.w $vr26, $vr26, 0 + vreplvei.d $vr19, $vr18, 0 + vfrintrm.d $vr19, $vr19 + ftintrz.w.d $ft12, $ft11 + movfr2gr.s $a7, $ft12 + andi $a7, $a7, 255 + fsub.d $ft13, $ft10, $ft11 + fmul.d $ft11, $ft13, $ft13 + fmul.d $ft11, $ft13, $ft11 + fmadd.d $ft12, $ft13, $fa3, $fa2 + fmadd.d $ft12, $ft13, $ft12, $fa4 + fmul.d $ft11, $ft11, $ft12 + add.w $t0, $a5, $a7 + alsl.d $t1, $t0, $fp, 2 + slli.d $t0, $t0, 2 + ldx.w $t0, $fp, $t0 + ld.w $t1, $t1, 4 + add.w $a7, $a6, $a7 + slli.d $t2, $a7, 2 + ldx.w $t2, $fp, $t2 + alsl.d $a7, $a7, $fp, 2 + ld.w $a7, $a7, 4 + alsl.d $t0, $t0, $fp, 2 + alsl.d $t2, $t2, $fp, 2 + ld.d $t3, $t0, 396 + alsl.d $t0, $t1, $fp, 2 + fadd.d $ft12, $ft13, $fa5 + alsl.d $a7, $a7, $fp, 2 + vinsgr2vr.d $vr22, $t3, 0 + vand.v $vr23, $vr22, $vr8 + vslti.wu $vr24, $vr23, 4 + vshuf4i.w $vr24, $vr24, 16 + vslli.d $vr24, $vr24, 32 + vsrai.d $vr24, $vr24, 32 + vand.v $vr25, $vr22, $vr9 + vseqi.w $vr25, $vr25, 12 + vshuf4i.w $vr25, $vr25, 16 + vslli.d $vr25, $vr25, 32 + vsrai.d $vr25, $vr25, 32 + vbitsel.v $vr25, $vr10, $vr17, $vr25 + vreplvei.d $vr21, $vr21, 0 + vbitsel.v $vr24, $vr25, $vr21, $vr24 + vslti.wu $vr23, $vr23, 8 + vshuf4i.w $vr23, $vr23, 16 + vslli.d $vr23, $vr23, 32 + vsrai.d $vr23, $vr23, 32 + vbitsel.v $vr23, $vr21, $vr17, $vr23 + vand.v $vr25, $vr22, $vr11 + vseqi.w $vr25, $vr25, 0 + vshuf4i.w $vr25, $vr25, 16 + vslli.d $vr25, $vr25, 32 + vsrai.d $vr25, $vr25, 32 + vbitrevi.d $vr26, $vr23, 63 + vbitsel.v $vr23, $vr26, $vr23, $vr25 + vand.v $vr22, $vr22, $vr12 + vseqi.w $vr22, $vr22, 0 + vshuf4i.w $vr22, $vr22, 16 + vslli.d $vr22, $vr22, 32 + vsrai.d $vr22, $vr22, 32 + ld.d $t1, $t2, 396 + vbitrevi.d $vr25, $vr24, 63 + vbitsel.v $vr22, $vr25, $vr24, $vr22 + vfadd.d $vr22, $vr23, $vr22 + vinsgr2vr.d $vr23, $t1, 0 + vand.v $vr24, $vr23, $vr8 + vslti.wu $vr25, $vr24, 4 + vshuf4i.w $vr25, $vr25, 16 + vslli.d $vr25, $vr25, 32 + vsrai.d $vr25, $vr25, 32 + vand.v $vr26, $vr23, $vr9 + vseqi.w $vr26, $vr26, 12 vshuf4i.w $vr26, $vr26, 16 vslli.d $vr26, $vr26, 32 vsrai.d $vr26, $vr26, 32 - ld.d $a5, $a6, 396 - vbitrevi.d $vr29, $vr28, 63 - vbitsel.v $vr26, $vr29, $vr28, $vr26 - vfadd.d $vr26, $vr27, $vr26 - vinsgr2vr.d $vr27, $a5, 0 - vand.v $vr28, $vr27, $vr8 - vslti.wu $vr29, $vr28, 4 - vshuf4i.w $vr29, $vr29, 16 - vslli.d $vr29, $vr29, 32 - vsrai.d $vr29, $vr29, 32 - vand.v $vr30, $vr27, $vr16 - vseqi.w $vr30, $vr30, 12 - vshuf4i.w $vr30, $vr30, 16 - vslli.d $vr30, $vr30, 32 - vsrai.d $vr30, $vr30, 32 - vbitsel.v $vr30, $vr9, $vr20, $vr30 - vbitsel.v $vr29, $vr30, $vr25, $vr29 - vslti.wu $vr28, $vr28, 8 - vshuf4i.w $vr28, $vr28, 16 - vslli.d $vr28, $vr28, 32 - vsrai.d $vr28, $vr28, 32 - vbitsel.v $vr25, $vr25, $vr20, $vr28 - vand.v $vr28, $vr27, $vr17 - vseqi.w $vr28, $vr28, 0 - vshuf4i.w $vr28, $vr28, 16 - vslli.d $vr28, $vr28, 32 - vsrai.d $vr28, $vr28, 32 - vbitrevi.d $vr30, $vr25, 63 - vbitsel.v $vr25, $vr30, $vr25, $vr28 - vand.v $vr27, $vr27, $vr18 - vseqi.w $vr27, $vr27, 0 - vshuf4i.w $vr27, $vr27, 16 - vslli.d $vr27, $vr27, 32 - vsrai.d $vr27, $vr27, 32 - vbitrevi.d $vr28, $vr29, 63 - vbitsel.v $vr27, $vr28, $vr29, $vr27 - ld.d $a4, $a4, 396 - vfadd.d $vr25, $vr25, $vr27 - vfsub.d $vr25, $vr25, $vr26 - vfmadd.d $vr25, $vr19, $vr25, $vr26 - vinsgr2vr.d $vr26, $a4, 0 - vand.v $vr27, $vr26, $vr8 - vslti.wu $vr28, $vr27, 4 - vshuf4i.w $vr28, $vr28, 16 - vslli.d $vr28, $vr28, 32 - vsrai.d $vr28, $vr28, 32 - vand.v $vr29, $vr26, $vr16 - vseqi.w $vr29, $vr29, 12 - vshuf4i.w $vr29, $vr29, 16 - vslli.d $vr29, $vr29, 32 - vsrai.d $vr29, $vr29, 32 - vbitsel.v $vr29, $vr9, $vr21, $vr29 - vreplvei.d $vr24, $vr24, 0 - vbitsel.v $vr28, $vr29, $vr24, $vr28 - vslti.wu $vr27, $vr27, 8 - vshuf4i.w $vr27, $vr27, 16 - vslli.d $vr27, $vr27, 32 - vsrai.d $vr27, $vr27, 32 - vbitsel.v $vr27, $vr24, $vr21, $vr27 - vand.v $vr29, $vr26, $vr17 - vseqi.w $vr29, $vr29, 0 - vshuf4i.w $vr29, $vr29, 16 - vslli.d $vr29, $vr29, 32 - vsrai.d $vr29, $vr29, 32 - vbitrevi.d $vr30, $vr27, 63 - vbitsel.v $vr27, $vr30, $vr27, $vr29 - vand.v $vr26, $vr26, $vr18 - vseqi.w $vr26, $vr26, 0 + vbitsel.v $vr26, $vr10, $vr16, $vr26 + vbitsel.v $vr25, $vr26, $vr21, $vr25 + vslti.wu $vr24, $vr24, 8 + vshuf4i.w $vr24, $vr24, 16 + vslli.d $vr24, $vr24, 32 + vsrai.d $vr24, $vr24, 32 + vbitsel.v $vr21, $vr21, $vr16, $vr24 + vand.v $vr24, $vr23, $vr11 + vseqi.w $vr24, $vr24, 0 + vshuf4i.w $vr24, $vr24, 16 + vslli.d $vr24, $vr24, 32 + vsrai.d $vr24, $vr24, 32 + vbitrevi.d $vr26, $vr21, 63 + vbitsel.v $vr21, $vr26, $vr21, $vr24 + vand.v $vr23, $vr23, $vr12 + vseqi.w $vr23, $vr23, 0 + vshuf4i.w $vr23, $vr23, 16 + vslli.d $vr23, $vr23, 32 + vsrai.d $vr23, $vr23, 32 + vbitrevi.d $vr24, $vr25, 63 + vbitsel.v $vr23, $vr24, $vr25, $vr23 + ld.d $t0, $t0, 396 + vfadd.d $vr21, $vr21, $vr23 + vfsub.d $vr21, $vr21, $vr22 + vfmadd.d $vr21, $vr15, $vr21, $vr22 + vinsgr2vr.d $vr22, $t0, 0 + vand.v $vr23, $vr22, $vr8 + vslti.wu $vr24, $vr23, 4 + vshuf4i.w $vr24, $vr24, 16 + vslli.d $vr24, $vr24, 32 + vsrai.d $vr24, $vr24, 32 + vand.v $vr25, $vr22, $vr9 + vseqi.w $vr25, $vr25, 12 + vshuf4i.w $vr25, $vr25, 16 + vslli.d $vr25, $vr25, 32 + vsrai.d $vr25, $vr25, 32 + vbitsel.v $vr25, $vr10, $vr17, $vr25 + vreplvei.d $vr20, $vr20, 0 + vbitsel.v $vr24, $vr25, $vr20, $vr24 + vslti.wu $vr23, $vr23, 8 + vshuf4i.w $vr23, $vr23, 16 + vslli.d $vr23, $vr23, 32 + vsrai.d $vr23, $vr23, 32 + vbitsel.v $vr23, $vr20, $vr17, $vr23 + vand.v $vr25, $vr22, $vr11 + vseqi.w $vr25, $vr25, 0 + vshuf4i.w $vr25, $vr25, 16 + vslli.d $vr25, $vr25, 32 + vsrai.d $vr25, $vr25, 32 + vbitrevi.d $vr26, $vr23, 63 + vbitsel.v $vr23, $vr26, $vr23, $vr25 + vand.v $vr22, $vr22, $vr12 + vseqi.w $vr22, $vr22, 0 + vshuf4i.w $vr22, $vr22, 16 + vslli.d $vr22, $vr22, 32 + vsrai.d $vr22, $vr22, 32 + ld.d $a7, $a7, 396 + vbitrevi.d $vr25, $vr24, 63 + vbitsel.v $vr22, $vr25, $vr24, $vr22 + vfadd.d $vr22, $vr23, $vr22 + vinsgr2vr.d $vr23, $a7, 0 + vand.v $vr24, $vr23, $vr8 + vslti.wu $vr25, $vr24, 4 + vshuf4i.w $vr25, $vr25, 16 + vslli.d $vr25, $vr25, 32 + vsrai.d $vr25, $vr25, 32 + vand.v $vr26, $vr23, $vr9 + vseqi.w $vr26, $vr26, 12 vshuf4i.w $vr26, $vr26, 16 vslli.d $vr26, $vr26, 32 vsrai.d $vr26, $vr26, 32 - ld.d $a3, $a3, 396 - vbitrevi.d $vr29, $vr28, 63 - vbitsel.v $vr26, $vr29, $vr28, $vr26 - vfadd.d $vr26, $vr27, $vr26 - vinsgr2vr.d $vr27, $a3, 0 - vand.v $vr28, $vr27, $vr8 - vslti.wu $vr29, $vr28, 4 - vshuf4i.w $vr29, $vr29, 16 - vslli.d $vr29, $vr29, 32 - vsrai.d $vr29, $vr29, 32 - vand.v $vr30, $vr27, $vr16 - vseqi.w $vr30, $vr30, 12 - vshuf4i.w $vr30, $vr30, 16 - vslli.d $vr30, $vr30, 32 - vsrai.d $vr30, $vr30, 32 - vbitsel.v $vr30, $vr9, $vr20, $vr30 - vbitsel.v $vr29, $vr30, $vr24, $vr29 - vslti.wu $vr28, $vr28, 8 - vshuf4i.w $vr28, $vr28, 16 - vslli.d $vr28, $vr28, 32 - vsrai.d $vr28, $vr28, 32 - vbitsel.v $vr24, $vr24, $vr20, $vr28 - vand.v $vr28, $vr27, $vr17 - vseqi.w $vr28, $vr28, 0 - vshuf4i.w $vr28, $vr28, 16 - vslli.d $vr28, $vr28, 32 - vsrai.d $vr28, $vr28, 32 - vbitrevi.d $vr30, $vr24, 63 - vbitsel.v $vr24, $vr30, $vr24, $vr28 - vand.v $vr27, $vr27, $vr18 - vseqi.w $vr27, $vr27, 0 - vshuf4i.w $vr27, $vr27, 16 - vslli.d $vr27, $vr27, 32 - vsrai.d $vr27, $vr27, 32 - vbitrevi.d $vr28, $vr29, 63 - vbitsel.v $vr27, $vr28, $vr29, $vr27 - vfadd.d $vr24, $vr24, $vr27 - vfsub.d $vr24, $vr24, $vr26 - vfmadd.d $vr24, $vr19, $vr24, $vr26 - vfsub.d $vr24, $vr24, $vr25 - vreplvei.d $vr23, $vr23, 0 - vfmadd.d $vr23, $vr23, $vr24, $vr25 - vreplvei.d $vr24, $vr23, 0 - vreplvei.d $vr23, $vr23, 1 - fsub.d $ft15, $ft15, $fs0 - fmadd.d $ft15, $ft15, $ft2, $fs0 - fadd.d $ft14, $ft14, $ft3 - fcmp.clt.d $fcc0, $ft14, $fa7 - fadd.d $fa0, $fa0, $ft15 + vbitsel.v $vr26, $vr10, $vr16, $vr26 + vbitsel.v $vr25, $vr26, $vr20, $vr25 + vslti.wu $vr24, $vr24, 8 + vshuf4i.w $vr24, $vr24, 16 + vslli.d $vr24, $vr24, 32 + vsrai.d $vr24, $vr24, 32 + vbitsel.v $vr20, $vr20, $vr16, $vr24 + vand.v $vr24, $vr23, $vr11 + vseqi.w $vr24, $vr24, 0 + vshuf4i.w $vr24, $vr24, 16 + vslli.d $vr24, $vr24, 32 + vsrai.d $vr24, $vr24, 32 + vbitrevi.d $vr26, $vr20, 63 + vbitsel.v $vr20, $vr26, $vr20, $vr24 + vand.v $vr23, $vr23, $vr12 + vseqi.w $vr23, $vr23, 0 + vshuf4i.w $vr23, $vr23, 16 + vslli.d $vr23, $vr23, 32 + vsrai.d $vr23, $vr23, 32 + vbitrevi.d $vr24, $vr25, 63 + vbitsel.v $vr23, $vr24, $vr25, $vr23 + vfadd.d $vr20, $vr20, $vr23 + vfsub.d $vr20, $vr20, $vr22 + vfmadd.d $vr20, $vr15, $vr20, $vr22 + vfsub.d $vr20, $vr20, $vr21 + vreplvei.d $vr19, $vr19, 0 + vfmadd.d $vr19, $vr19, $vr20, $vr21 + vreplvei.d $vr20, $vr19, 0 + vreplvei.d $vr19, $vr19, 1 + fsub.d $ft11, $ft11, $ft12 + fmadd.d $ft11, $ft11, $ft5, $ft12 + fadd.d $ft10, $ft10, $ft6 + fcmp.clt.d $fcc0, $ft10, $fa7 + fadd.d $fa0, $fa0, $ft11 bcnez $fcc0, .LBB0_6 b .LBB0_3 .LBB0_7: @@ -319,17 +322,13 @@ main: # @main pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 move $a0, $zero - fld.d $fs6, $sp, 16 # 8-byte Folded Reload - fld.d $fs5, $sp, 24 # 8-byte Folded Reload - fld.d $fs4, $sp, 32 # 8-byte Folded Reload - fld.d $fs3, $sp, 40 # 8-byte Folded Reload - fld.d $fs2, $sp, 48 # 8-byte Folded Reload - fld.d $fs1, $sp, 56 # 8-byte Folded Reload - fld.d $fs0, $sp, 64 # 8-byte Folded Reload - ld.d $s0, $sp, 72 # 8-byte Folded Reload - ld.d $fp, $sp, 80 # 8-byte Folded Reload - ld.d $ra, $sp, 88 # 8-byte Folded Reload - addi.d $sp, $sp, 96 + fld.d $fs2, $sp, 0 # 8-byte Folded Reload + fld.d $fs1, $sp, 8 # 8-byte Folded Reload + fld.d $fs0, $sp, 16 # 8-byte Folded Reload + ld.d $s0, $sp, 24 # 8-byte Folded Reload + ld.d $fp, $sp, 32 # 8-byte Folded Reload + ld.d $ra, $sp, 40 # 8-byte Folded Reload + addi.d $sp, $sp, 48 ret .Lfunc_end0: .size main, .Lfunc_end0-main diff --git a/results/SingleSource/Benchmarks/Misc/CMakeFiles/pi.dir/pi.s b/results/SingleSource/Benchmarks/Misc/CMakeFiles/pi.dir/pi.s index 438521c5..71daaf96 100644 --- a/results/SingleSource/Benchmarks/Misc/CMakeFiles/pi.dir/pi.s +++ b/results/SingleSource/Benchmarks/Misc/CMakeFiles/pi.dir/pi.s @@ -13,24 +13,7 @@ myadd: # @myadd .Lfunc_end0: .size myadd, .Lfunc_end0-myadd # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI1_0: - .word 0x45b5a800 # float 5813 -.LCPI1_1: - .word 0x47914780 # float 74383 -.LCPI1_2: - .word 0x44a36000 # float 1307 -.LCPI1_3: - .word 0x45aaf800 # float 5471 -.LCPI1_4: - .word 0xc5aaf800 # float -5471 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI1_5: - .dword 0x418312d000000000 # double 4.0E+7 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -47,26 +30,31 @@ main: # @main movgr2fr.w $fs0, $zero ori $s0, $zero, 1 ori $a1, $zero, 1907 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI1_0) lu12i.w $fp, 9765 ori $a0, $fp, 2560 + lu12i.w $a2, 285530 + ori $a2, $a2, 2048 + movgr2fr.w $fa0, $a2 lu12i.w $a2, 6 ori $a2, $a2, 3035 lu12i.w $a3, -277387 ori $a3, $a3, 2603 lu32i.d $a3, 434130 lu52i.d $a3, $a3, 1804 - pcalau12i $a4, %pc_hi20(.LCPI1_1) - fld.s $fa1, $a4, %pc_lo12(.LCPI1_1) - pcalau12i $a4, %pc_hi20(.LCPI1_2) - fld.s $fa2, $a4, %pc_lo12(.LCPI1_2) - pcalau12i $a4, %pc_hi20(.LCPI1_3) - fld.s $fa3, $a4, %pc_lo12(.LCPI1_3) - pcalau12i $a4, %pc_hi20(.LCPI1_4) - fld.s $fa4, $a4, %pc_lo12(.LCPI1_4) lu12i.w $a4, 18 ori $a4, $a4, 655 + lu12i.w $a5, 293140 + ori $a5, $a5, 1920 + movgr2fr.w $fa1, $a5 + lu12i.w $a5, 281142 + movgr2fr.w $fa2, $a5 + lu12i.w $a5, 285359 + ori $a5, $a5, 2048 + movgr2fr.w $fa3, $a5 + lu12i.w $a5, -238929 + ori $a5, $a5, 2048 + lu32i.d $a5, 0 + movgr2fr.w $fa4, $a5 vldi $vr5, -1168 .p2align 4, , 16 .LBB1_1: # =>This Inner Loop Header: Depth=1 @@ -107,11 +95,13 @@ main: # @main jirl $ra, $ra, 0 movgr2fr.d $fa0, $s0 ffint.s.l $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI1_5) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_5) fcvt.d.s $fa0, $fa0 - vldi $vr2, -1008 - fmul.d $fa0, $fa0, $fa2 + vldi $vr1, -1008 + fmul.d $fa0, $fa0, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, 201424 + lu52i.d $a0, $a0, 1048 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 fcvt.s.d $fa0, $fa0 fcvt.d.s $fa0, $fa0 diff --git a/results/SingleSource/Benchmarks/Polybench/datamining/correlation/CMakeFiles/correlation.dir/correlation.s b/results/SingleSource/Benchmarks/Polybench/datamining/correlation/CMakeFiles/correlation.dir/correlation.s index 7d24901c..956eab0c 100644 --- a/results/SingleSource/Benchmarks/Polybench/datamining/correlation/CMakeFiles/correlation.dir/correlation.s +++ b/results/SingleSource/Benchmarks/Polybench/datamining/correlation/CMakeFiles/correlation.dir/correlation.s @@ -106,19 +106,9 @@ polybench_alloc_data: # @polybench_alloc_data .Lfunc_end6: .size polybench_alloc_data, .Lfunc_end6-polybench_alloc_data # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI7_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI7_2: - .dword 0x4095e00000000000 # double 1400 -.LCPI7_3: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI7_4: - .dword 0x4042b5524ae1278e # double 37.416573867739416 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI7_1: + .p2align 4, 0x0 # -- Begin function main +.LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 .text @@ -193,11 +183,12 @@ main: # @main # %bb.8: # %polybench_alloc_data.exit19 move $a0, $zero lu52i.d $a1, $zero, 1107 - pcalau12i $a2, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI7_0) + lu12i.w $a2, 256 + lu52i.d $a2, $a2, 1107 + movgr2fr.d $fa0, $a2 lu12i.w $a2, 275200 - pcalau12i $a3, %pc_hi20(.LCPI7_1) - vld $vr1, $a3, %pc_lo12(.LCPI7_1) + pcalau12i $a3, %pc_hi20(.LCPI7_0) + vld $vr1, $a3, %pc_lo12(.LCPI7_0) lu12i.w $a3, -3 ori $a3, $a3, 2688 ori $a4, $zero, 0 @@ -248,10 +239,12 @@ main: # @main add.d $a6, $a6, $s2 bne $a0, $a5, .LBB7_9 # %bb.12: # %init_array.exit.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_2) - fld.d $fs0, $a0, %pc_lo12(.LCPI7_2) move $a0, $zero movgr2fr.d $fa0, $zero + ori $a1, $zero, 0 + lu32i.d $a1, 385024 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fa1, $a1 ori $a1, $zero, 1200 move $a2, $s5 .p2align 4, , 16 @@ -263,27 +256,34 @@ main: # @main stx.d $zero, $s1, $a4 ori $a4, $zero, 1400 move $a5, $a2 - fmov.d $fa1, $fa0 + fmov.d $fa2, $fa0 .p2align 4, , 16 .LBB7_14: # Parent Loop BB7_13 Depth=1 # => This Inner Loop Header: Depth=2 - fld.d $fa2, $a5, 0 - fadd.d $fa1, $fa1, $fa2 - fst.d $fa1, $a3, 0 + fld.d $fa3, $a5, 0 + fadd.d $fa2, $fa2, $fa3 + fst.d $fa2, $a3, 0 addi.d $a4, $a4, -1 add.d $a5, $a5, $s2 bnez $a4, .LBB7_14 # %bb.15: # in Loop: Header=BB7_13 Depth=1 - fdiv.d $fa1, $fa1, $fs0 - fst.d $fa1, $a3, 0 + fdiv.d $fa2, $fa2, $fa1 + fst.d $fa2, $a3, 0 addi.d $a0, $a0, 1 addi.d $a2, $a2, 8 bne $a0, $a1, .LBB7_13 # %bb.16: # %.preheader111.i.preheader move $fp, $zero - pcalau12i $a0, %pc_hi20(.LCPI7_3) - fld.d $fs1, $a0, %pc_lo12(.LCPI7_3) - movgr2fr.d $fs2, $zero + movgr2fr.d $fs0, $zero + ori $a0, $zero, 0 + lu32i.d $a0, 385024 + lu52i.d $a0, $a0, 1033 + movgr2fr.d $fs1, $a0 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fs2, $a0 vldi $vr3, -912 ori $s7, $zero, 1200 st.d $s5, $sp, 8 # 8-byte Folded Spill @@ -298,7 +298,7 @@ main: # @main alsl.d $a0, $fp, $s1, 3 ori $a1, $zero, 1400 move $a2, $s8 - fmov.d $fa0, $fs2 + fmov.d $fa0, $fs0 .p2align 4, , 16 .LBB7_18: # Parent Loop BB7_17 Depth=1 # => This Inner Loop Header: Depth=2 @@ -312,13 +312,13 @@ main: # @main add.d $a2, $a2, $s2 bnez $a1, .LBB7_18 # %bb.19: # in Loop: Header=BB7_17 Depth=1 - fdiv.d $fa1, $fa0, $fs0 + fdiv.d $fa1, $fa0, $fs1 fsqrt.d $fa0, $fa1 fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB7_21 .LBB7_20: # %.split # in Loop: Header=BB7_17 Depth=1 - fcmp.cult.d $fcc0, $fs1, $fa0 + fcmp.cult.d $fcc0, $fs2, $fa0 fsel $fa0, $fa3, $fa0, $fcc0 fst.d $fa0, $s5, 0 addi.d $fp, $fp, 1 @@ -347,13 +347,12 @@ main: # @main or $a1, $a2, $a1 lu12i.w $s5, -3 ori $a2, $s5, 2688 - pcalau12i $a3, %pc_hi20(.LCPI7_4) - fld.d $fa0, $a3, %pc_lo12(.LCPI7_4) + lu12i.w $a3, 306706 + ori $a3, $a3, 1934 + lu32i.d $a3, 177490 + lu52i.d $a4, $a3, 1028 + movgr2fr.d $fa0, $a4 ori $a3, $zero, 1400 - lu12i.w $a4, 306706 - ori $a4, $a4, 1934 - lu32i.d $a4, 177490 - lu52i.d $a4, $a4, 1028 vreplgr2vr.d $vr1, $a4 move $a4, $t0 b .LBB7_24 diff --git a/results/SingleSource/Benchmarks/Polybench/datamining/covariance/CMakeFiles/covariance.dir/covariance.s b/results/SingleSource/Benchmarks/Polybench/datamining/covariance/CMakeFiles/covariance.dir/covariance.s index 34ffd18c..da945975 100644 --- a/results/SingleSource/Benchmarks/Polybench/datamining/covariance/CMakeFiles/covariance.dir/covariance.s +++ b/results/SingleSource/Benchmarks/Polybench/datamining/covariance/CMakeFiles/covariance.dir/covariance.s @@ -111,12 +111,6 @@ polybench_alloc_data: # @polybench_alloc_data .LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI7_1: - .dword 0x4059000000000000 # double 100 -.LCPI7_2: - .dword 0x4058c00000000000 # double 99 .text .globl main .p2align 5 @@ -210,10 +204,12 @@ main: # @main addi.d $a3, $a3, 640 bne $a0, $a2, .LBB7_7 # %bb.10: # %init_array.exit.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_1) move $a0, $zero - movgr2fr.d $fa1, $zero + movgr2fr.d $fa0, $zero + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 ori $a1, $zero, 80 move $a2, $fp .p2align 4, , 16 @@ -225,7 +221,7 @@ main: # @main stx.d $zero, $s1, $a4 ori $a4, $zero, 100 move $a5, $a2 - fmov.d $fa2, $fa1 + fmov.d $fa2, $fa0 .p2align 4, , 16 .LBB7_12: # Parent Loop BB7_11 Depth=1 # => This Inner Loop Header: Depth=2 @@ -236,7 +232,7 @@ main: # @main addi.d $a5, $a5, 640 bnez $a4, .LBB7_12 # %bb.13: # in Loop: Header=BB7_11 Depth=1 - fdiv.d $fa2, $fa2, $fa0 + fdiv.d $fa2, $fa2, $fa1 fst.d $fa2, $a3, 0 addi.d $a0, $a0, 1 addi.d $a2, $a2, 8 @@ -443,10 +439,12 @@ main: # @main bne $a5, $a2, .LBB7_19 b .LBB7_16 .LBB7_20: # %.preheader.i19.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_2) move $a0, $zero - movgr2fr.d $fa1, $zero + movgr2fr.d $fa0, $zero + ori $a1, $zero, 0 + lu32i.d $a1, -475136 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 ori $a1, $zero, 80 move $a2, $fp .p2align 4, , 16 @@ -470,7 +468,7 @@ main: # @main ori $t0, $zero, 100 move $t1, $a2 move $t2, $a5 - fmov.d $fa2, $fa1 + fmov.d $fa2, $fa0 .p2align 4, , 16 .LBB7_23: # Parent Loop BB7_21 Depth=1 # Parent Loop BB7_22 Depth=2 @@ -485,7 +483,7 @@ main: # @main addi.d $t1, $t1, 640 bnez $t0, .LBB7_23 # %bb.24: # in Loop: Header=BB7_22 Depth=2 - fdiv.d $fa2, $fa2, $fa0 + fdiv.d $fa2, $fa2, $fa1 fst.d $fa2, $a7, 0 alsl.d $a7, $a6, $a6, 2 slli.d $a7, $a7, 7 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/gemm/CMakeFiles/gemm.dir/gemm.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/gemm/CMakeFiles/gemm.dir/gemm.s index 0561aaf0..3f575d55 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/gemm/CMakeFiles/gemm.dir/gemm.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/gemm/CMakeFiles/gemm.dir/gemm.s @@ -111,12 +111,6 @@ polybench_alloc_data: # @polybench_alloc_data .LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI7_1: - .dword 0x408f400000000000 # double 1000 -.LCPI7_2: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 .text .globl main .p2align 5 @@ -193,15 +187,14 @@ main: # @main lu12i.w $a0, 67108 ori $t0, $a0, 3539 ori $t1, $zero, 1000 - pcalau12i $a0, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_1) - lu12i.w $a3, 2 - ori $s4, $a3, 608 - pcalau12i $a2, %pc_hi20(.LCPI7_0) - vld $vr1, $a2, %pc_lo12(.LCPI7_0) ori $a0, $zero, 0 lu32i.d $a0, -49152 lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa0, $a0 + lu12i.w $a3, 2 + pcalau12i $a2, %pc_hi20(.LCPI7_0) + vld $vr1, $a2, %pc_lo12(.LCPI7_0) + ori $s4, $a3, 608 vreplgr2vr.d $vr2, $a0 ori $a0, $a3, 624 move $t2, $fp @@ -645,40 +638,43 @@ main: # @main bnez $t4, .LBB7_44 b .LBB7_39 .LBB7_45: # %.preheader.i55.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_2) move $a0, $zero move $a2, $zero - ori $a1, $s5, 3488 - ori $a4, $zero, 1000 + ori $a4, $s5, 3488 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa0, $a1 + ori $a5, $zero, 1000 .p2align 4, , 16 .LBB7_46: # %.preheader.i55 # =>This Loop Header: Depth=1 # Child Loop BB7_47 Depth 2 move $a3, $zero - add.d $a5, $s0, $a0 - add.d $a6, $fp, $a0 - move $a7, $a1 + add.d $a6, $s0, $a0 + add.d $a7, $fp, $a0 + move $t0, $a4 .p2align 4, , 16 .LBB7_47: # Parent Loop BB7_46 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t0, $a6, $a7 - fldx.d $fa1, $t0, $s4 - add.d $t0, $a5, $a7 - fldx.d $fa2, $t0, $s4 + add.d $t1, $a7, $t0 + fldx.d $fa1, $t1, $s4 + add.d $t1, $a6, $t0 + fldx.d $fa2, $t1, $s4 fsub.d $fa3, $fa1, $fa2 fabs.d $fa3, $fa3 fcmp.cule.d $fcc0, $fa3, $fa0 bceqz $fcc0, .LBB7_55 # %bb.48: # %.critedge.i # in Loop: Header=BB7_47 Depth=2 - addi.d $a7, $a7, 8 + addi.d $t0, $t0, 8 addi.w $a3, $a3, 1 - bnez $a7, .LBB7_47 + bnez $t0, .LBB7_47 # %bb.49: # in Loop: Header=BB7_46 Depth=1 addi.d $a2, $a2, 1 add.d $a0, $a0, $s4 - bne $a2, $a4, .LBB7_46 + bne $a2, $a5, .LBB7_46 # %bb.50: # %check_FP.exit lu12i.w $s6, 4 ori $a0, $s6, 1217 @@ -772,10 +768,6 @@ main: # @main pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 - lu12i.w $a1, -487882 - ori $a1, $a1, 2289 - lu32i.d $a1, 325813 - lu52i.d $a1, $a1, 1006 st.d $a1, $sp, 0 movfr2gr.d $a4, $fa1 movfr2gr.d $a7, $fa2 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/gemver/CMakeFiles/gemver.dir/gemver.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/gemver/CMakeFiles/gemver.dir/gemver.s index d50928ae..12365f3b 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/gemver/CMakeFiles/gemver.dir/gemver.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/gemver/CMakeFiles/gemver.dir/gemver.s @@ -106,17 +106,9 @@ polybench_alloc_data: # @polybench_alloc_data .Lfunc_end6: .size polybench_alloc_data, .Lfunc_end6-polybench_alloc_data # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI7_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI7_1: - .dword 0x409f400000000000 # double 2000 -.LCPI7_3: - .dword 0x3ff3333333333333 # double 1.2 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI7_2: + .p2align 4, 0x0 # -- Begin function main +.LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 .text @@ -243,24 +235,24 @@ main: # @main # %bb.18: # %polybench_alloc_data.exit45 move $a7, $zero lu52i.d $a2, $zero, 1107 - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_0) + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fa0, $a0 lu12i.w $a3, 275200 - pcalau12i $a0, %pc_hi20(.LCPI7_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI7_1) + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a1, $a0, 1033 + movgr2fr.d $fa1, $a1 vldi $vr2, -928 vldi $vr3, -944 vldi $vr4, -1000 vldi $vr5, -960 vldi $vr6, -990 - pcalau12i $a0, %pc_hi20(.LCPI7_2) - vld $vr7, $a0, %pc_lo12(.LCPI7_2) + pcalau12i $a0, %pc_hi20(.LCPI7_0) + vld $vr7, $a0, %pc_lo12(.LCPI7_0) lu12i.w $a0, -4 ori $a4, $a0, 384 ori $a5, $zero, 2000 - ori $a1, $zero, 0 - lu32i.d $a1, -49152 - lu52i.d $a1, $a1, 1033 vreplgr2vr.d $vr8, $a1 lu12i.w $t5, 3 ori $a1, $t5, 3728 @@ -444,10 +436,13 @@ main: # @main bnez $t1, .LBB7_28 b .LBB7_23 .LBB7_29: # %.preheader67.i.preheader - pcalau12i $a2, %pc_hi20(.LCPI7_3) - fld.d $fa0, $a2, %pc_lo12(.LCPI7_3) move $a2, $zero ori $a3, $a0, 384 + lu12i.w $a4, 209715 + ori $a4, $a4, 819 + lu32i.d $a4, 209715 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa0, $a4 ori $a4, $zero, 2000 move $a5, $fp .p2align 4, , 16 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/gesummv/CMakeFiles/gesummv.dir/gesummv.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/gesummv/CMakeFiles/gesummv.dir/gesummv.s index 5a01cc7f..8e142537 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/gesummv/CMakeFiles/gesummv.dir/gesummv.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/gesummv/CMakeFiles/gesummv.dir/gesummv.s @@ -106,17 +106,9 @@ polybench_alloc_data: # @polybench_alloc_data .Lfunc_end6: .size polybench_alloc_data, .Lfunc_end6-polybench_alloc_data # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI7_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI7_1: - .dword 0x4056800000000000 # double 90 -.LCPI7_3: - .dword 0x3ff3333333333333 # double 1.2 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI7_2: + .p2align 4, 0x0 # -- Begin function main +.LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 .text @@ -194,22 +186,22 @@ main: # @main move $a1, $zero sub.d $a2, $s0, $fp lu52i.d $a3, $zero, 1107 - pcalau12i $a4, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a4, %pc_lo12(.LCPI7_0) + lu12i.w $a4, 256 + lu52i.d $a4, $a4, 1107 + movgr2fr.d $fa0, $a4 lu12i.w $a4, 275200 - pcalau12i $a5, %pc_hi20(.LCPI7_1) - fld.d $fa1, $a5, %pc_lo12(.LCPI7_1) + ori $a5, $zero, 0 + lu32i.d $a5, 425984 + lu52i.d $t1, $a5, 1029 + movgr2fr.d $fa1, $t1 ori $a5, $zero, 15 lu12i.w $a6, -302922 ori $a6, $a6, 183 lu32i.d $a6, 0 + pcalau12i $a7, %pc_hi20(.LCPI7_0) + vld $vr2, $a7, %pc_lo12(.LCPI7_0) ori $a7, $zero, 90 ori $t0, $zero, 720 - pcalau12i $t1, %pc_hi20(.LCPI7_2) - vld $vr2, $t1, %pc_lo12(.LCPI7_2) - ori $t1, $zero, 0 - lu32i.d $t1, 425984 - lu52i.d $t1, $t1, 1029 vreplgr2vr.d $vr3, $t1 move $t1, $fp move $t2, $s0 @@ -318,10 +310,13 @@ main: # @main .LBB7_17: # %init_array.exit.preheader move $a0, $zero move $a1, $zero - pcalau12i $a2, %pc_hi20(.LCPI7_3) - fld.d $fa0, $a2, %pc_lo12(.LCPI7_3) ori $a2, $zero, 720 - vldi $vr1, -904 + vldi $vr0, -904 + lu12i.w $a3, 209715 + ori $a3, $a3, 819 + lu32i.d $a3, 209715 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa1, $a3 ori $a3, $zero, 90 .p2align 4, , 16 .LBB7_18: # %init_array.exit @@ -354,8 +349,8 @@ main: # @main bne $a6, $a2, .LBB7_19 # %bb.20: # in Loop: Header=BB7_18 Depth=1 fld.d $fa3, $a5, 0 - fmul.d $fa3, $fa3, $fa1 - fmul.d $fa2, $fa2, $fa0 + fmul.d $fa3, $fa3, $fa0 + fmul.d $fa2, $fa2, $fa1 fadd.d $fa2, $fa2, $fa3 fst.d $fa2, $a4, 0 addi.d $a1, $a1, 1 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/symm/CMakeFiles/symm.dir/symm.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/symm/CMakeFiles/symm.dir/symm.s index 2dbd26eb..b7c3443f 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/symm/CMakeFiles/symm.dir/symm.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/symm/CMakeFiles/symm.dir/symm.s @@ -111,12 +111,6 @@ polybench_alloc_data: # @polybench_alloc_data .LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI7_1: - .dword 0x404e000000000000 # double 60 -.LCPI7_2: - .dword 0x3ff3333333333333 # double 1.2 .text .globl main .p2align 5 @@ -180,15 +174,14 @@ main: # @main lu12i.w $a0, 335544 ori $a0, $a0, 1311 ori $a7, $zero, 100 - pcalau12i $a1, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI7_1) - ori $t0, $zero, 640 - ori $t1, $zero, 60 + ori $a1, $zero, 0 + lu32i.d $a1, -131072 + lu52i.d $t2, $a1, 1028 + movgr2fr.d $fa0, $t2 pcalau12i $a1, %pc_hi20(.LCPI7_0) vld $vr1, $a1, %pc_lo12(.LCPI7_0) - ori $t2, $zero, 0 - lu32i.d $t2, -131072 - lu52i.d $t2, $t2, 1028 + ori $t0, $zero, 640 + ori $t1, $zero, 60 vreplgr2vr.d $vr2, $t2 move $t2, $fp move $t3, $s1 @@ -295,10 +288,11 @@ main: # @main addi.w $t0, $zero, -4 ori $t1, $zero, 4 ori $t2, $zero, 100 - ori $t3, $zero, 58 - ori $t4, $zero, 0 - lu32i.d $t4, -51200 - lu52i.d $t4, $t4, -1016 + ori $t3, $zero, 0 + lu32i.d $t3, -131072 + lu52i.d $t3, $t3, 1028 + movgr2fr.d $fa0, $t3 + ori $t4, $zero, 58 ori $t5, $zero, 480 ori $t6, $zero, 60 ori $t7, $zero, 59 @@ -363,10 +357,7 @@ main: # @main movgr2fr.d $fa6, $ra ffint.d.l $fa6, $fa6 vextrins.d $vr6, $vr4, 16 - ori $s3, $zero, 0 - lu32i.d $s3, -131072 - lu52i.d $s3, $s3, 1028 - vreplgr2vr.d $vr4, $s3 + vreplgr2vr.d $vr4, $t3 vfdiv.d $vr5, $vr5, $vr4 vfdiv.d $vr4, $vr6, $vr4 vst $vr5, $s6, -16 @@ -402,42 +393,46 @@ main: # @main bne $a6, $s4, .LBB7_21 .LBB7_22: # %.loopexit # in Loop: Header=BB7_15 Depth=1 - bltu $t3, $a3, .LBB7_14 + bltu $t4, $a3, .LBB7_14 # %bb.23: # %.lr.ph.i.preheader # in Loop: Header=BB7_15 Depth=1 - sub.d $s4, $a7, $a3 - move $s5, $a6 - bltu $s4, $t1, .LBB7_27 + sub.d $s5, $a7, $a3 + ori $s4, $zero, 0 + lu32i.d $s4, -51200 + move $s6, $a6 + bltu $s5, $t1, .LBB7_27 # %bb.24: # %vector.ph37 # in Loop: Header=BB7_15 Depth=1 - and $s7, $t7, $t0 - move $s6, $s4 - bstrins.d $s6, $zero, 1, 0 - add.d $s5, $a6, $s6 - move $s8, $a5 + and $s8, $t7, $t0 + move $s7, $s5 + bstrins.d $s7, $zero, 1, 0 + add.d $s6, $a6, $s7 + move $ra, $a5 .p2align 4, , 16 .LBB7_25: # %vector.body38 # Parent Loop BB7_15 Depth=1 # => This Inner Loop Header: Depth=2 - vreplgr2vr.d $vr1, $t4 - vst $vr1, $s8, -16 - vst $vr1, $s8, 0 - addi.d $s7, $s7, -4 - addi.d $s8, $s8, 32 - bnez $s7, .LBB7_25 + lu52i.d $s3, $s4, -1016 + vreplgr2vr.d $vr1, $s3 + vst $vr1, $ra, -16 + vst $vr1, $ra, 0 + addi.d $s8, $s8, -4 + addi.d $ra, $ra, 32 + bnez $s8, .LBB7_25 # %bb.26: # %middle.block41 # in Loop: Header=BB7_15 Depth=1 - beq $s4, $s6, .LBB7_14 + beq $s5, $s7, .LBB7_14 .LBB7_27: # %.lr.ph.i.preheader79 # in Loop: Header=BB7_15 Depth=1 - slli.d $s4, $s5, 3 + slli.d $s5, $s6, 3 .p2align 4, , 16 .LBB7_28: # %.lr.ph.i # Parent Loop BB7_15 Depth=1 # => This Inner Loop Header: Depth=2 - stx.d $t4, $t8, $s4 - addi.d $s4, $s4, 8 - bne $s4, $t5, .LBB7_28 + lu52i.d $s3, $s4, -1016 + stx.d $s3, $t8, $s5 + addi.d $s5, $s5, 8 + bne $s5, $t5, .LBB7_28 b .LBB7_14 .LBB7_29: # %.preheader44.i.preheader move $a0, $zero @@ -454,103 +449,101 @@ main: # @main or $a1, $a2, $a1 addi.d $a2, $fp, 16 ori $a3, $zero, 480 - pcalau12i $a4, %pc_hi20(.LCPI7_2) - fld.d $fa0, $a4, %pc_lo12(.LCPI7_2) + lu12i.w $a4, 209715 + ori $a4, $a4, 819 + lu32i.d $a4, 209715 + lu52i.d $a4, $a4, 1023 + movgr2fr.d $fa0, $a4 vldi $vr1, -904 movgr2fr.d $fa2, $zero - ori $a4, $zero, 640 - lu12i.w $a5, 209715 - ori $a5, $a5, 819 - lu32i.d $a5, 209715 - lu52i.d $a5, $a5, 1023 - vreplgr2vr.d $vr3, $a5 - ori $a5, $zero, 0 - lu32i.d $a5, -524288 - lu52i.d $a5, $a5, 1023 - vreplgr2vr.d $vr4, $a5 - vrepli.b $vr5, 0 - ori $a5, $zero, 60 - ori $a6, $zero, 80 - move $a7, $fp - move $t0, $s1 - move $t1, $s0 + ori $a5, $zero, 640 + ori $a6, $zero, 0 + lu32i.d $a6, -524288 + lu52i.d $a6, $a6, 1023 + vreplgr2vr.d $vr3, $a6 + vrepli.b $vr4, 0 + ori $a6, $zero, 60 + ori $a7, $zero, 80 + move $t0, $fp + move $t1, $s1 + move $t2, $s0 b .LBB7_31 .p2align 4, , 16 .LBB7_30: # %.split.us.i # in Loop: Header=BB7_31 Depth=1 addi.d $a0, $a0, 1 - addi.d $t1, $t1, 480 + addi.d $t2, $t2, 480 addi.d $a2, $a2, 640 + addi.d $t1, $t1, 640 addi.d $t0, $t0, 640 - addi.d $a7, $a7, 640 - beq $a0, $a5, .LBB7_41 + beq $a0, $a6, .LBB7_41 .LBB7_31: # %.preheader44.i # =>This Loop Header: Depth=1 # Child Loop BB7_33 Depth 2 # Child Loop BB7_34 Depth 3 # Child Loop BB7_40 Depth 2 # Child Loop BB7_38 Depth 2 - mul.d $t2, $a0, $a3 - add.d $t2, $s0, $t2 - alsl.d $t2, $a0, $t2, 3 + mul.d $t3, $a0, $a3 + add.d $t3, $s0, $t3 + alsl.d $t3, $a0, $t3, 3 beqz $a0, .LBB7_36 # %bb.32: # %.preheader.us.i.preheader # in Loop: Header=BB7_31 Depth=1 - move $t3, $zero - alsl.d $t4, $a0, $a0, 2 - slli.d $t5, $t4, 7 - add.d $t4, $fp, $t5 - add.d $t5, $s1, $t5 - move $t6, $fp - move $t7, $s1 + move $t4, $zero + alsl.d $t5, $a0, $a0, 2 + slli.d $t6, $t5, 7 + add.d $t5, $fp, $t6 + add.d $t6, $s1, $t6 + move $t7, $fp + move $t8, $s1 .p2align 4, , 16 .LBB7_33: # %.preheader.us.i # Parent Loop BB7_31 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB7_34 Depth 3 - alsl.d $t8, $t3, $t5, 3 - move $s2, $t1 - move $s3, $t6 + alsl.d $s2, $t4, $t6, 3 + move $s3, $t2 move $s4, $t7 - move $s5, $a0 - fmov.d $fa6, $fa2 + move $s5, $t8 + move $s6, $a0 + fmov.d $fa5, $fa2 .p2align 4, , 16 .LBB7_34: # Parent Loop BB7_31 Depth=1 # Parent Loop BB7_33 Depth=2 # => This Inner Loop Header: Depth=3 - fld.d $fa7, $t8, 0 - fld.d $ft0, $s2, 0 - fld.d $ft1, $s3, 0 - fmul.d $fa7, $fa7, $fa1 - fmul.d $fa7, $fa7, $ft0 - fadd.d $fa7, $ft1, $fa7 - fst.d $fa7, $s3, 0 - fld.d $fa7, $s4, 0 - fld.d $ft0, $s2, 0 - fmul.d $fa7, $fa7, $ft0 - fadd.d $fa6, $fa6, $fa7 - addi.d $s5, $s5, -1 + fld.d $fa6, $s2, 0 + fld.d $fa7, $s3, 0 + fld.d $ft0, $s4, 0 + fmul.d $fa6, $fa6, $fa1 + fmul.d $fa6, $fa6, $fa7 + fadd.d $fa6, $ft0, $fa6 + fst.d $fa6, $s4, 0 + fld.d $fa6, $s5, 0 + fld.d $fa7, $s3, 0 + fmul.d $fa6, $fa6, $fa7 + fadd.d $fa5, $fa5, $fa6 + addi.d $s6, $s6, -1 + addi.d $s5, $s5, 640 addi.d $s4, $s4, 640 - addi.d $s3, $s3, 640 - addi.d $s2, $s2, 8 - bnez $s5, .LBB7_34 + addi.d $s3, $s3, 8 + bnez $s6, .LBB7_34 # %bb.35: # %._crit_edge.us.i # in Loop: Header=BB7_33 Depth=2 - slli.d $s2, $t3, 3 - fldx.d $fa7, $t4, $s2 - fld.d $ft0, $t8, 0 - fld.d $ft1, $t2, 0 - fmul.d $fa7, $fa7, $fa0 - fmul.d $ft0, $ft0, $fa1 - fmul.d $ft0, $ft0, $ft1 - fadd.d $fa7, $fa7, $ft0 - fmul.d $fa6, $fa6, $fa1 + slli.d $s3, $t4, 3 + fldx.d $fa6, $t5, $s3 + fld.d $fa7, $s2, 0 + fld.d $ft0, $t3, 0 + fmul.d $fa6, $fa6, $fa0 + fmul.d $fa7, $fa7, $fa1 + fmul.d $fa7, $fa7, $ft0 fadd.d $fa6, $fa6, $fa7 - fstx.d $fa6, $t4, $s2 - addi.d $t3, $t3, 1 + fmul.d $fa5, $fa5, $fa1 + fadd.d $fa5, $fa5, $fa6 + fstx.d $fa5, $t5, $s3 + addi.d $t4, $t4, 1 + addi.d $t8, $t8, 8 addi.d $t7, $t7, 8 - addi.d $t6, $t6, 8 - bne $t3, $a6, .LBB7_33 + bne $t4, $a7, .LBB7_33 b .LBB7_30 .p2align 4, , 16 .LBB7_36: # %vector.memcheck58 @@ -558,51 +551,52 @@ main: # @main beqz $a1, .LBB7_39 # %bb.37: # %.preheader.i22.preheader # in Loop: Header=BB7_31 Depth=1 - move $t3, $zero + move $t4, $zero .p2align 4, , 16 .LBB7_38: # %.preheader.i22 # Parent Loop BB7_31 Depth=1 # => This Inner Loop Header: Depth=2 - fldx.d $fa6, $a7, $t3 - fldx.d $fa7, $t0, $t3 - fld.d $ft0, $t2, 0 - fmul.d $fa6, $fa6, $fa0 - fmul.d $fa7, $fa7, $fa1 - fmul.d $fa7, $fa7, $ft0 - fadd.d $fa6, $fa6, $fa7 - fadd.d $fa6, $fa6, $fa2 - fstx.d $fa6, $a7, $t3 - addi.d $t3, $t3, 8 - bne $t3, $a4, .LBB7_38 + fldx.d $fa5, $t0, $t4 + fldx.d $fa6, $t1, $t4 + fld.d $fa7, $t3, 0 + fmul.d $fa5, $fa5, $fa0 + fmul.d $fa6, $fa6, $fa1 + fmul.d $fa6, $fa6, $fa7 + fadd.d $fa5, $fa5, $fa6 + fadd.d $fa5, $fa5, $fa2 + fstx.d $fa5, $t0, $t4 + addi.d $t4, $t4, 8 + bne $t4, $a5, .LBB7_38 b .LBB7_30 .LBB7_39: # %vector.body66.preheader # in Loop: Header=BB7_31 Depth=1 - vldrepl.d $vr6, $t2, 0 - move $t2, $zero + vldrepl.d $vr5, $t3, 0 + move $t3, $zero .p2align 4, , 16 .LBB7_40: # %vector.body66 # Parent Loop BB7_31 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t3, $a2, $t2 - vld $vr7, $t3, -16 - vldx $vr8, $a2, $t2 - add.d $t4, $t0, $t2 - vldx $vr9, $t0, $t2 - vld $vr10, $t4, 16 - vfmul.d $vr7, $vr7, $vr3 - vfmul.d $vr8, $vr8, $vr3 - vfmul.d $vr9, $vr9, $vr4 - vfmul.d $vr10, $vr10, $vr4 - vfmul.d $vr9, $vr9, $vr6 - vfmul.d $vr10, $vr10, $vr6 + add.d $t4, $a2, $t3 + vld $vr6, $t4, -16 + vldx $vr7, $a2, $t3 + vreplgr2vr.d $vr8, $a4 + add.d $t5, $t1, $t3 + vldx $vr9, $t1, $t3 + vld $vr10, $t5, 16 + vfmul.d $vr6, $vr6, $vr8 + vfmul.d $vr7, $vr7, $vr8 + vfmul.d $vr8, $vr9, $vr3 + vfmul.d $vr9, $vr10, $vr3 + vfmul.d $vr8, $vr8, $vr5 + vfmul.d $vr9, $vr9, $vr5 + vfadd.d $vr6, $vr6, $vr8 vfadd.d $vr7, $vr7, $vr9 - vfadd.d $vr8, $vr8, $vr10 - vfadd.d $vr7, $vr7, $vr5 - vfadd.d $vr8, $vr8, $vr5 - vstx $vr8, $a2, $t2 - addi.d $t2, $t2, 32 - vst $vr7, $t3, -16 - bne $t2, $a4, .LBB7_40 + vfadd.d $vr6, $vr6, $vr4 + vfadd.d $vr7, $vr7, $vr4 + vstx $vr7, $a2, $t3 + addi.d $t3, $t3, 32 + vst $vr6, $t4, -16 + bne $t3, $a5, .LBB7_40 b .LBB7_30 .LBB7_41: # %kernel_symm.exit ori $a0, $zero, 1281 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/syr2k/CMakeFiles/syr2k.dir/syr2k.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/syr2k/CMakeFiles/syr2k.dir/syr2k.s index 2ca87216..be4666d4 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/syr2k/CMakeFiles/syr2k.dir/syr2k.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/syr2k/CMakeFiles/syr2k.dir/syr2k.s @@ -111,16 +111,6 @@ polybench_alloc_data: # @polybench_alloc_data .LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI7_1: - .dword 0x4092c00000000000 # double 1200 -.LCPI7_2: - .dword 0x408f400000000000 # double 1000 -.LCPI7_3: - .dword 0x3ff3333333333333 # double 1.2 -.LCPI7_4: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 .text .globl main .p2align 5 @@ -204,20 +194,18 @@ main: # @main lu12i.w $a0, 111848 ori $a2, $a0, 437 ori $t3, $zero, 1200 - pcalau12i $a0, %pc_hi20(.LCPI7_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI7_1) - ori $a1, $s3, 3904 - pcalau12i $a0, %pc_hi20(.LCPI7_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_2) - pcalau12i $a3, %pc_hi20(.LCPI7_0) - vld $vr2, $a3, %pc_lo12(.LCPI7_0) ori $a0, $zero, 0 - ori $t4, $zero, 0 - lu32i.d $t4, 180224 - lu52i.d $t4, $t4, 1033 - vreplgr2vr.d $vr3, $t4 + ori $a1, $zero, 0 + lu32i.d $a1, 180224 + lu52i.d $t4, $a1, 1033 + movgr2fr.d $fa0, $t4 + ori $a1, $s3, 3904 lu32i.d $a0, -49152 lu52i.d $a0, $a0, 1032 + pcalau12i $a3, %pc_hi20(.LCPI7_0) + vld $vr1, $a3, %pc_lo12(.LCPI7_0) + movgr2fr.d $fa2, $a0 + vreplgr2vr.d $vr3, $t4 vreplgr2vr.d $vr4, $a0 move $t4, $t5 move $t5, $fp @@ -258,12 +246,12 @@ main: # @main bstrpick.d $s0, $s0, 31, 0 movgr2fr.d $fa5, $s0 ffint.d.l $fa5, $fa5 - fdiv.d $fa5, $fa5, $fa1 + fdiv.d $fa5, $fa5, $fa0 add.d $s0, $t4, $t7 fstx.d $fa5, $s0, $a1 movgr2fr.d $fa5, $t8 ffint.d.l $fa5, $fa5 - fdiv.d $fa5, $fa5, $fa0 + fdiv.d $fa5, $fa5, $fa2 add.d $t8, $t5, $t7 fstx.d $fa5, $t8, $a1 addi.d $t7, $t7, 8 @@ -276,7 +264,7 @@ main: # @main # in Loop: Header=BB7_10 Depth=1 vreplgr2vr.d $vr5, $a5 move $t6, $t0 - vori.b $vr6, $vr2, 0 + vori.b $vr6, $vr1, 0 .p2align 4, , 16 .LBB7_14: # %vector.body # Parent Loop BB7_10 Depth=1 @@ -327,12 +315,13 @@ main: # @main lu12i.w $a0, -3 ori $t1, $a0, 2688 ori $t2, $zero, 1200 - lu12i.w $a4, 2 - ori $s4, $a4, 1408 - vld $vr1, $a3, %pc_lo12(.LCPI7_0) ori $a0, $zero, 0 lu32i.d $a0, -49152 lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa0, $a0 + lu12i.w $a4, 2 + vld $vr1, $a3, %pc_lo12(.LCPI7_0) + ori $s4, $a4, 1408 vreplgr2vr.d $vr2, $a0 ori $a3, $a4, 1424 b .LBB7_17 @@ -434,6 +423,11 @@ main: # @main ori $t1, $a0, 192 ori $a2, $a4, 1416 ori $t2, $zero, 4 + lu12i.w $a0, 209715 + ori $a0, $a0, 819 + lu32i.d $a0, 209715 + lu52i.d $a3, $a0, 1023 + movgr2fr.d $fa0, $a3 vldi $vr1, -904 ori $t3, $zero, 1000 ori $t4, $zero, 1200 @@ -442,13 +436,7 @@ main: # @main lu52i.d $a0, $a0, 1023 vreplgr2vr.d $vr2, $a0 lu12i.w $a0, 3 - ori $a3, $a0, 3712 - lu12i.w $a0, 209715 - ori $a0, $a0, 819 - lu32i.d $a0, 209715 - lu52i.d $a0, $a0, 1023 - vreplgr2vr.d $vr0, $a0 - pcalau12i $a4, %pc_hi20(.LCPI7_3) + ori $a4, $a0, 3712 b .LBB7_24 .p2align 4, , 16 .LBB7_23: # in Loop: Header=BB7_24 Depth=1 @@ -485,8 +473,9 @@ main: # @main # => This Inner Loop Header: Depth=2 vld $vr3, $t6, -16 vld $vr4, $t6, 0 - vfmul.d $vr3, $vr3, $vr0 - vfmul.d $vr4, $vr4, $vr0 + vreplgr2vr.d $vr5, $a3 + vfmul.d $vr3, $vr3, $vr5 + vfmul.d $vr4, $vr4, $vr5 vst $vr3, $t6, -16 vst $vr4, $t6, 0 addi.d $a5, $a5, -4 @@ -503,8 +492,7 @@ main: # @main # Parent Loop BB7_24 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa3, $a5, 0 - fld.d $fa4, $a4, %pc_lo12(.LCPI7_3) - fmul.d $fa3, $fa3, $fa4 + fmul.d $fa3, $fa3, $fa0 fst.d $fa3, $a5, 0 addi.d $a0, $a0, 1 addi.d $a5, $a5, 8 @@ -591,8 +579,8 @@ main: # @main vfmadd.d $vr5, $vr5, $vr3, $vr7 vfadd.d $vr5, $vr6, $vr5 vst $vr5, $s0, 0 - add.d $s2, $s2, $a3 - add.d $s1, $s1, $a3 + add.d $s2, $s2, $a4 + add.d $s1, $s1, $a4 addi.d $a0, $a0, -2 addi.d $s0, $s0, 16 bnez $a0, .LBB7_36 @@ -674,8 +662,9 @@ main: # @main # => This Inner Loop Header: Depth=2 vld $vr3, $t5, -16 vld $vr4, $t5, 0 - vfmul.d $vr3, $vr3, $vr0 - vfmul.d $vr4, $vr4, $vr0 + vreplgr2vr.d $vr5, $a3 + vfmul.d $vr3, $vr3, $vr5 + vfmul.d $vr4, $vr4, $vr5 vst $vr3, $t5, -16 vst $vr4, $t5, 0 addi.d $t4, $t4, -4 @@ -692,8 +681,7 @@ main: # @main # Parent Loop BB7_42 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa3, $t4, 0 - fld.d $fa4, $a4, %pc_lo12(.LCPI7_3) - fmul.d $fa3, $fa3, $fa4 + fmul.d $fa3, $fa3, $fa0 fst.d $fa3, $t4, 0 addi.d $a0, $a0, 1 addi.d $t4, $t4, 8 @@ -782,8 +770,8 @@ main: # @main vfadd.d $vr5, $vr5, $vr7 vfadd.d $vr5, $vr6, $vr5 vst $vr5, $t4, 0 - add.d $s8, $s8, $a3 - add.d $s7, $s7, $a3 + add.d $s8, $s8, $a4 + add.d $s7, $s7, $a4 addi.d $t6, $t6, -2 addi.d $t4, $t4, 16 bnez $t6, .LBB7_54 @@ -818,43 +806,46 @@ main: # @main bnez $s7, .LBB7_57 b .LBB7_50 .LBB7_58: # %.preheader.i59.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_4) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_4) move $a0, $zero move $a2, $zero lu12i.w $a1, -3 - ori $a1, $a1, 2688 - ori $a4, $zero, 1200 + ori $a4, $a1, 2688 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa0, $a1 + ori $a5, $zero, 1200 .p2align 4, , 16 .LBB7_59: # %.preheader.i59 # =>This Loop Header: Depth=1 # Child Loop BB7_60 Depth 2 move $a3, $zero - ld.d $a5, $sp, 32 # 8-byte Folded Reload - add.d $a5, $a5, $a0 - ld.d $a6, $sp, 24 # 8-byte Folded Reload + ld.d $a6, $sp, 32 # 8-byte Folded Reload add.d $a6, $a6, $a0 - move $a7, $a1 + ld.d $a7, $sp, 24 # 8-byte Folded Reload + add.d $a7, $a7, $a0 + move $t0, $a4 .p2align 4, , 16 .LBB7_60: # Parent Loop BB7_59 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t0, $a6, $a7 - fldx.d $fa1, $t0, $s4 - add.d $t0, $a5, $a7 - fldx.d $fa2, $t0, $s4 + add.d $t1, $a7, $t0 + fldx.d $fa1, $t1, $s4 + add.d $t1, $a6, $t0 + fldx.d $fa2, $t1, $s4 fsub.d $fa3, $fa1, $fa2 fabs.d $fa3, $fa3 fcmp.cule.d $fcc0, $fa3, $fa0 bceqz $fcc0, .LBB7_68 # %bb.61: # %.critedge.i # in Loop: Header=BB7_60 Depth=2 - addi.d $a7, $a7, 8 + addi.d $t0, $t0, 8 addi.w $a3, $a3, 1 - bnez $a7, .LBB7_60 + bnez $t0, .LBB7_60 # %bb.62: # in Loop: Header=BB7_59 Depth=1 addi.d $a2, $a2, 1 add.d $a0, $a0, $s4 - bne $a2, $a4, .LBB7_59 + bne $a2, $a5, .LBB7_59 # %bb.63: # %check_FP.exit lu12i.w $s0, 4 ori $a0, $s0, 2817 @@ -948,10 +939,6 @@ main: # @main pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 - lu12i.w $a1, -487882 - ori $a1, $a1, 2289 - lu32i.d $a1, 325813 - lu52i.d $a1, $a1, 1006 st.d $a1, $sp, 0 movfr2gr.d $a4, $fa1 movfr2gr.d $a7, $fa2 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/syrk/CMakeFiles/syrk.dir/syrk.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/syrk/CMakeFiles/syrk.dir/syrk.s index 326283c7..87195aa9 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/syrk/CMakeFiles/syrk.dir/syrk.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/syrk/CMakeFiles/syrk.dir/syrk.s @@ -111,14 +111,6 @@ polybench_alloc_data: # @polybench_alloc_data .LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI7_1: - .dword 0x408f400000000000 # double 1000 -.LCPI7_2: - .dword 0x3ff3333333333333 # double 1.2 -.LCPI7_3: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 .text .globl main .p2align 5 @@ -177,9 +169,9 @@ main: # @main move $a1, $zero pcalau12i $t2, %pc_hi20(.LCPI7_0) vld $vr0, $t2, %pc_lo12(.LCPI7_0) - lu12i.w $a3, -2 - ori $a2, $a3, 192 - ori $a4, $zero, 1200 + lu12i.w $a4, -2 + ori $a2, $a4, 192 + ori $a3, $zero, 1200 ori $a0, $zero, 0 lu32i.d $a0, 180224 lu52i.d $a0, $a0, 1033 @@ -206,16 +198,16 @@ main: # @main vaddi.wu $vr5, $vr5, 1 vaddi.wu $vr4, $vr4, 1 vpickve2gr.w $t0, $vr5, 0 - mod.wu $t0, $t0, $a4 + mod.wu $t0, $t0, $a3 bstrpick.d $t0, $t0, 31, 0 vpickve2gr.w $t1, $vr5, 1 - mod.wu $t1, $t1, $a4 + mod.wu $t1, $t1, $a3 bstrpick.d $t1, $t1, 31, 0 vpickve2gr.w $t3, $vr4, 0 - mod.wu $t3, $t3, $a4 + mod.wu $t3, $t3, $a3 bstrpick.d $t3, $t3, 31, 0 vpickve2gr.w $t4, $vr4, 1 - mod.wu $t4, $t4, $a4 + mod.wu $t4, $t4, $a3 bstrpick.d $t4, $t4, 31, 0 movgr2fr.d $fa4, $t1 ffint.d.l $fa4, $fa4 @@ -239,10 +231,10 @@ main: # @main # in Loop: Header=BB7_7 Depth=1 addi.d $a1, $a1, 1 add.d $a6, $a6, $a0 - bne $a1, $a4, .LBB7_7 + bne $a1, $a3, .LBB7_7 # %bb.10: # %.preheader.i.preheader move $a2, $zero - move $a4, $zero + move $a3, $zero sub.d $a5, $s0, $fp ori $a6, $zero, 31 lu12i.w $a1, -3 @@ -250,28 +242,27 @@ main: # @main lu12i.w $a1, 67108 ori $t0, $a1, 3539 ori $t1, $zero, 1000 - pcalau12i $a1, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI7_1) + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $t3, $a1, 1032 + movgr2fr.d $fa0, $t3 lu12i.w $a1, 2 ori $s3, $a1, 1408 - ori $t3, $zero, 1200 vld $vr1, $t2, %pc_lo12(.LCPI7_0) - ori $t2, $zero, 0 - lu32i.d $t2, -49152 - lu52i.d $t2, $t2, 1032 - vreplgr2vr.d $vr2, $t2 - ori $t2, $a1, 1424 + ori $t2, $zero, 1200 + vreplgr2vr.d $vr2, $t3 + ori $t3, $a1, 1424 move $t4, $fp move $t5, $s0 b .LBB7_12 .p2align 4, , 16 .LBB7_11: # %middle.block99 # in Loop: Header=BB7_12 Depth=1 - addi.d $a4, $a4, 1 + addi.d $a3, $a3, 1 add.d $t5, $t5, $s3 add.d $t4, $t4, $s3 addi.d $a2, $a2, 1 - beq $a4, $t3, .LBB7_17 + beq $a3, $t2, .LBB7_17 .LBB7_12: # %.preheader.i # =>This Loop Header: Depth=1 # Child Loop BB7_16 Depth 2 @@ -305,7 +296,7 @@ main: # @main .p2align 4, , 16 .LBB7_15: # %vector.ph90 # in Loop: Header=BB7_12 Depth=1 - vreplgr2vr.d $vr3, $a4 + vreplgr2vr.d $vr3, $a3 move $t6, $a7 vori.b $vr4, $vr1, 0 .p2align 4, , 16 @@ -345,10 +336,10 @@ main: # @main vfdiv.d $vr6, $vr7, $vr2 add.d $t7, $t4, $t6 vstx $vr5, $t7, $s3 - vstx $vr6, $t7, $t2 + vstx $vr6, $t7, $t3 add.d $t7, $t5, $t6 vstx $vr5, $t7, $s3 - vstx $vr6, $t7, $t2 + vstx $vr6, $t7, $t3 addi.d $t6, $t6, 32 vaddi.du $vr4, $vr4, 4 bnez $t6, .LBB7_16 @@ -363,20 +354,19 @@ main: # @main ori $t2, $zero, 1 addi.w $t3, $zero, -8 ori $a1, $a1, 1416 + lu12i.w $a2, 209715 + ori $a2, $a2, 819 + lu32i.d $a2, 209715 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa0, $a2 vldi $vr1, -904 ori $t5, $zero, 1000 - lu12i.w $a2, -4 - ori $a2, $a2, 384 - ori $a3, $a3, 192 - lu12i.w $a4, 7 - ori $a4, $a4, 3328 - lu12i.w $a5, 209715 - ori $a5, $a5, 819 - lu32i.d $a5, 209715 - lu52i.d $a5, $a5, 1023 - vreplgr2vr.d $vr0, $a5 + lu12i.w $a3, -4 + ori $a3, $a3, 384 + ori $a4, $a4, 192 + lu12i.w $a5, 7 + ori $a5, $a5, 3328 move $t7, $fp - pcalau12i $a5, %pc_hi20(.LCPI7_2) b .LBB7_19 .p2align 4, , 16 .LBB7_18: # in Loop: Header=BB7_19 Depth=1 @@ -414,8 +404,9 @@ main: # @main # => This Inner Loop Header: Depth=2 vld $vr2, $t0, -16 vld $vr3, $t0, 0 - vfmul.d $vr2, $vr2, $vr0 - vfmul.d $vr3, $vr3, $vr0 + vreplgr2vr.d $vr4, $a2 + vfmul.d $vr2, $vr2, $vr4 + vfmul.d $vr3, $vr3, $vr4 vst $vr2, $t0, -16 vst $vr3, $t0, 0 addi.d $t1, $t1, -4 @@ -432,8 +423,7 @@ main: # @main # Parent Loop BB7_19 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $t0, 0 - fld.d $fa3, $a5, %pc_lo12(.LCPI7_2) - fmul.d $fa2, $fa2, $fa3 + fmul.d $fa2, $fa2, $fa0 fst.d $fa2, $t0, 0 addi.d $s2, $s2, 1 addi.d $t0, $t0, 8 @@ -492,8 +482,8 @@ main: # @main # Parent Loop BB7_19 Depth=1 # Parent Loop BB7_28 Depth=2 # => This Inner Loop Header: Depth=3 - fldx.d $fa3, $ra, $a2 - fldx.d $fa4, $ra, $a3 + fldx.d $fa3, $ra, $a3 + fldx.d $fa4, $ra, $a4 fld.d $fa5, $ra, 0 fldx.d $fa6, $ra, $a0 vld $vr7, $t0, -16 @@ -506,7 +496,7 @@ main: # @main vst $vr4, $t0, 0 addi.d $t8, $t8, -4 addi.d $t0, $t0, 32 - add.d $ra, $ra, $a4 + add.d $ra, $ra, $a5 bnez $t8, .LBB7_31 # %bb.32: # %middle.block115 # in Loop: Header=BB7_28 Depth=2 @@ -580,8 +570,9 @@ main: # @main # => This Inner Loop Header: Depth=2 vld $vr2, $t0, -16 vld $vr3, $t0, 0 - vfmul.d $vr2, $vr2, $vr0 - vfmul.d $vr3, $vr3, $vr0 + vreplgr2vr.d $vr4, $a2 + vfmul.d $vr2, $vr2, $vr4 + vfmul.d $vr3, $vr3, $vr4 vst $vr2, $t0, -16 vst $vr3, $t0, 0 addi.d $t7, $t7, -4 @@ -598,8 +589,7 @@ main: # @main # Parent Loop BB7_37 Depth=1 # => This Inner Loop Header: Depth=2 fld.d $fa2, $t0, 0 - fld.d $fa3, $a5, %pc_lo12(.LCPI7_2) - fmul.d $fa2, $fa2, $fa3 + fmul.d $fa2, $fa2, $fa0 fst.d $fa2, $t0, 0 addi.d $t8, $t8, 1 addi.d $t0, $t0, 8 @@ -658,8 +648,8 @@ main: # @main # Parent Loop BB7_37 Depth=1 # Parent Loop BB7_46 Depth=2 # => This Inner Loop Header: Depth=3 - fldx.d $fa3, $s4, $a2 - fldx.d $fa4, $s4, $a3 + fldx.d $fa3, $s4, $a3 + fldx.d $fa4, $s4, $a4 fld.d $fa5, $s4, 0 fldx.d $fa6, $s4, $a0 vextrins.d $vr3, $vr4, 16 @@ -674,7 +664,7 @@ main: # @main vst $vr4, $t0, 0 addi.d $t7, $t7, -4 addi.d $t0, $t0, 32 - add.d $s4, $s4, $a4 + add.d $s4, $s4, $a5 bnez $t7, .LBB7_49 # %bb.50: # %middle.block154 # in Loop: Header=BB7_46 Depth=2 @@ -703,41 +693,44 @@ main: # @main bnez $s4, .LBB7_52 b .LBB7_45 .LBB7_53: # %.preheader.i53.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_3) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_3) move $a0, $zero move $a2, $zero lu12i.w $s5, -3 - ori $a1, $s5, 2688 - ori $a4, $zero, 1200 + ori $a4, $s5, 2688 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa0, $a1 + ori $a5, $zero, 1200 .p2align 4, , 16 .LBB7_54: # %.preheader.i53 # =>This Loop Header: Depth=1 # Child Loop BB7_55 Depth 2 move $a3, $zero - add.d $a5, $s0, $a0 - add.d $a6, $fp, $a0 - move $a7, $a1 + add.d $a6, $s0, $a0 + add.d $a7, $fp, $a0 + move $t0, $a4 .p2align 4, , 16 .LBB7_55: # Parent Loop BB7_54 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t0, $a6, $a7 - fldx.d $fa1, $t0, $s3 - add.d $t0, $a5, $a7 - fldx.d $fa2, $t0, $s3 + add.d $t1, $a7, $t0 + fldx.d $fa1, $t1, $s3 + add.d $t1, $a6, $t0 + fldx.d $fa2, $t1, $s3 fsub.d $fa3, $fa1, $fa2 fabs.d $fa3, $fa3 fcmp.cule.d $fcc0, $fa3, $fa0 bceqz $fcc0, .LBB7_63 # %bb.56: # %.critedge.i # in Loop: Header=BB7_55 Depth=2 - addi.d $a7, $a7, 8 + addi.d $t0, $t0, 8 addi.w $a3, $a3, 1 - bnez $a7, .LBB7_55 + bnez $t0, .LBB7_55 # %bb.57: # in Loop: Header=BB7_54 Depth=1 addi.d $a2, $a2, 1 add.d $a0, $a0, $s3 - bne $a2, $a4, .LBB7_54 + bne $a2, $a5, .LBB7_54 # %bb.58: # %check_FP.exit lu12i.w $s4, 4 ori $a0, $s4, 2817 @@ -827,10 +820,6 @@ main: # @main pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 - lu12i.w $a1, -487882 - ori $a1, $a1, 2289 - lu32i.d $a1, 325813 - lu52i.d $a1, $a1, 1006 st.d $a1, $sp, 0 movfr2gr.d $a4, $fa1 movfr2gr.d $a7, $fa2 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/trmm/CMakeFiles/trmm.dir/trmm.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/trmm/CMakeFiles/trmm.dir/trmm.s index 71ab1b89..91b7950a 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/trmm/CMakeFiles/trmm.dir/trmm.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/blas/trmm/CMakeFiles/trmm.dir/trmm.s @@ -111,10 +111,6 @@ polybench_alloc_data: # @polybench_alloc_data .LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI7_1: - .dword 0x408f400000000000 # double 1000 .text .globl main .p2align 5 @@ -168,6 +164,7 @@ main: # @main lu12i.w $s3, -3 ori $a7, $s3, 2688 ori $t0, $zero, 1200 + ori $t4, $zero, 0 ori $a1, $zero, 0 lu32i.d $a1, 180224 lu52i.d $a1, $a1, 1033 @@ -176,13 +173,14 @@ main: # @main ori $s2, $a1, 1408 ori $a1, $a1, 1424 ori $t1, $zero, 1000 - pcalau12i $t2, %pc_hi20(.LCPI7_1) - fld.d $fa2, $t2, %pc_lo12(.LCPI7_1) ori $t2, $zero, 4 lu12i.w $t3, 67108 ori $t3, $t3, 3539 - move $t4, $s0 - move $t5, $fp + lu32i.d $t4, -49152 + lu52i.d $t4, $t4, 1032 + movgr2fr.d $fa2, $t4 + move $t5, $s0 + move $t6, $fp .p2align 4, , 16 .LBB7_5: # %.preheader.i # =>This Loop Header: Depth=1 @@ -194,93 +192,90 @@ main: # @main # in Loop: Header=BB7_5 Depth=1 bgeu $a3, $t2, .LBB7_8 # %bb.7: # in Loop: Header=BB7_5 Depth=1 - move $t6, $zero + move $t7, $zero b .LBB7_11 .p2align 4, , 16 .LBB7_8: # %vector.ph28 # in Loop: Header=BB7_5 Depth=1 - move $t7, $a3 - bstrins.d $t7, $zero, 1, 0 - bstrpick.d $t6, $a3, 62, 2 + move $t8, $a3 + bstrins.d $t8, $zero, 1, 0 + bstrpick.d $t7, $a3, 62, 2 vld $vr3, $a6, %pc_lo12(.LCPI7_0) - slli.d $t6, $t6, 2 + slli.d $t7, $t7, 2 vreplgr2vr.d $vr4, $a3 vaddi.du $vr5, $vr4, 2 - move $t8, $a4 + move $s4, $a4 .p2align 4, , 16 .LBB7_9: # %vector.body31 # Parent Loop BB7_5 Depth=1 # => This Inner Loop Header: Depth=2 vadd.d $vr6, $vr3, $vr4 vadd.d $vr7, $vr3, $vr5 - vpickve2gr.w $s4, $vr6, 0 - mod.wu $s4, $s4, $t1 - bstrpick.d $s4, $s4, 31, 0 - vpickve2gr.w $s5, $vr6, 2 + vpickve2gr.w $s5, $vr6, 0 mod.wu $s5, $s5, $t1 bstrpick.d $s5, $s5, 31, 0 - vpickve2gr.w $s6, $vr7, 0 + vpickve2gr.w $s6, $vr6, 2 mod.wu $s6, $s6, $t1 bstrpick.d $s6, $s6, 31, 0 - vpickve2gr.w $s7, $vr7, 2 + vpickve2gr.w $s7, $vr7, 0 mod.wu $s7, $s7, $t1 bstrpick.d $s7, $s7, 31, 0 - movgr2fr.d $fa6, $s5 + vpickve2gr.w $s8, $vr7, 2 + mod.wu $s8, $s8, $t1 + bstrpick.d $s8, $s8, 31, 0 + movgr2fr.d $fa6, $s6 ffint.d.l $fa6, $fa6 - movgr2fr.d $fa7, $s4 + movgr2fr.d $fa7, $s5 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr6, 16 - movgr2fr.d $fa6, $s7 + movgr2fr.d $fa6, $s8 ffint.d.l $fa6, $fa6 - movgr2fr.d $ft0, $s6 + movgr2fr.d $ft0, $s7 ffint.d.l $ft0, $ft0 vextrins.d $vr8, $vr6, 16 - ori $s4, $zero, 0 - lu32i.d $s4, -49152 - lu52i.d $s4, $s4, 1032 - vreplgr2vr.d $vr6, $s4 + vreplgr2vr.d $vr6, $t4 vfdiv.d $vr7, $vr7, $vr6 vfdiv.d $vr6, $vr8, $vr6 - vst $vr7, $t8, -16 - vst $vr6, $t8, 0 + vst $vr7, $s4, -16 + vst $vr6, $s4, 0 vaddi.du $vr3, $vr3, 4 - addi.d $t7, $t7, -4 - addi.d $t8, $t8, 32 - bnez $t7, .LBB7_9 + addi.d $t8, $t8, -4 + addi.d $s4, $s4, 32 + bnez $t8, .LBB7_9 # %bb.10: # %middle.block37 # in Loop: Header=BB7_5 Depth=1 - beq $a3, $t6, .LBB7_13 + beq $a3, $t7, .LBB7_13 .LBB7_11: # %scalar.ph27.preheader # in Loop: Header=BB7_5 Depth=1 - alsl.d $t7, $t6, $t5, 3 - add.w $t8, $a2, $t6 + alsl.d $t8, $t7, $t6, 3 + add.w $s4, $a2, $t7 .p2align 4, , 16 .LBB7_12: # %scalar.ph27 # Parent Loop BB7_5 Depth=1 # => This Inner Loop Header: Depth=2 - bstrpick.d $s4, $t8, 31, 0 - mul.d $s4, $s4, $t3 - srli.d $s4, $s4, 38 - mul.d $s4, $s4, $t1 - sub.d $s4, $t8, $s4 - bstrpick.d $s4, $s4, 31, 0 - movgr2fr.d $fa3, $s4 + bstrpick.d $s5, $s4, 31, 0 + mul.d $s5, $s5, $t3 + srli.d $s5, $s5, 38 + mul.d $s5, $s5, $t1 + sub.d $s5, $s4, $s5 + bstrpick.d $s5, $s5, 31, 0 + movgr2fr.d $fa3, $s5 ffint.d.l $fa3, $fa3 fdiv.d $fa3, $fa3, $fa2 - fst.d $fa3, $t7, 0 - addi.d $t6, $t6, 1 - addi.d $t7, $t7, 8 - addi.w $t8, $t8, 1 - bne $a3, $t6, .LBB7_12 + fst.d $fa3, $t8, 0 + addi.d $t7, $t7, 1 + addi.d $t8, $t8, 8 + addi.w $s4, $s4, 1 + bne $a3, $t7, .LBB7_12 .LBB7_13: # %._crit_edge.i # in Loop: Header=BB7_5 Depth=1 - mul.d $t6, $a3, $a0 - add.d $t6, $fp, $t6 - slli.d $t7, $a3, 3 - stx.d $a5, $t6, $t7 - addi.d $t6, $a3, 1200 - vreplgr2vr.d $vr3, $t6 - move $t6, $a7 + mul.d $t7, $a3, $a0 + add.d $t7, $fp, $t7 + slli.d $t8, $a3, 3 + stx.d $a5, $t7, $t8 + addi.d $t7, $a3, 1200 + vreplgr2vr.d $vr3, $t7 + move $t7, $a7 vori.b $vr4, $vr0, 0 .p2align 4, , 16 .LBB7_14: # %vector.body @@ -288,43 +283,43 @@ main: # @main # => This Inner Loop Header: Depth=2 vsub.d $vr5, $vr3, $vr4 vsubi.du $vr6, $vr5, 2 - vpickve2gr.w $t7, $vr5, 0 - mod.wu $t7, $t7, $t0 - bstrpick.d $t7, $t7, 31, 0 - vpickve2gr.w $t8, $vr5, 2 + vpickve2gr.w $t8, $vr5, 0 mod.wu $t8, $t8, $t0 bstrpick.d $t8, $t8, 31, 0 - vpickve2gr.w $s4, $vr6, 0 + vpickve2gr.w $s4, $vr5, 2 mod.wu $s4, $s4, $t0 bstrpick.d $s4, $s4, 31, 0 - vpickve2gr.w $s5, $vr6, 2 + vpickve2gr.w $s5, $vr6, 0 mod.wu $s5, $s5, $t0 bstrpick.d $s5, $s5, 31, 0 - movgr2fr.d $fa5, $t8 + vpickve2gr.w $s6, $vr6, 2 + mod.wu $s6, $s6, $t0 + bstrpick.d $s6, $s6, 31, 0 + movgr2fr.d $fa5, $s4 ffint.d.l $fa5, $fa5 - movgr2fr.d $fa6, $t7 + movgr2fr.d $fa6, $t8 ffint.d.l $fa6, $fa6 vextrins.d $vr6, $vr5, 16 - movgr2fr.d $fa5, $s5 + movgr2fr.d $fa5, $s6 ffint.d.l $fa5, $fa5 - movgr2fr.d $fa7, $s4 + movgr2fr.d $fa7, $s5 ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr5, 16 vfdiv.d $vr5, $vr6, $vr1 vfdiv.d $vr6, $vr7, $vr1 - add.d $t7, $t4, $t6 - vstx $vr5, $t7, $s2 - vstx $vr6, $t7, $a1 - addi.d $t6, $t6, 32 + add.d $t8, $t5, $t7 + vstx $vr5, $t8, $s2 + vstx $vr6, $t8, $a1 + addi.d $t7, $t7, 32 vaddi.du $vr4, $vr4, 4 - bnez $t6, .LBB7_14 + bnez $t7, .LBB7_14 # %bb.15: # %middle.block # in Loop: Header=BB7_5 Depth=1 addi.d $a3, $a3, 1 add.d $a4, $a4, $a0 - add.d $t5, $t5, $a0 + add.d $t6, $t6, $a0 addi.d $a2, $a2, 1 - add.d $t4, $t4, $s2 + add.d $t5, $t5, $s2 bne $a3, $t1, .LBB7_5 # %bb.16: # %.preheader28.i.preheader move $a2, $zero diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/2mm/CMakeFiles/2mm.dir/2mm.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/2mm/CMakeFiles/2mm.dir/2mm.s index 6cb614c4..44f93fc5 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/2mm/CMakeFiles/2mm.dir/2mm.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/2mm/CMakeFiles/2mm.dir/2mm.s @@ -579,14 +579,6 @@ init_array: # @init_array .LCPI8_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI8_1: - .dword 0x4044000000000000 # double 40 -.LCPI8_2: - .dword 0x4049000000000000 # double 50 -.LCPI8_3: - .dword 0x3ff3333333333333 # double 1.2 .text .globl main .p2align 5 @@ -671,16 +663,15 @@ main: # @main ori $a3, $zero, 40 ori $a4, $zero, 0 lu32i.d $a4, 262144 - lu52i.d $a4, $a4, 1028 - vreplgr2vr.d $vr1, $a4 + lu52i.d $a7, $a4, 1028 + vreplgr2vr.d $vr1, $a7 ori $a4, $zero, 544 addi.w $a5, $zero, -3 lu32i.d $a5, 0 - pcalau12i $a6, %pc_hi20(.LCPI8_1) - fld.d $fa2, $a6, %pc_lo12(.LCPI8_1) lu12i.w $a6, -209716 ori $a6, $a6, 3277 lu32i.d $a6, 0 + movgr2fr.d $fa2, $a7 move $a7, $s0 .p2align 4, , 16 .LBB8_11: # %.preheader67.us.i @@ -770,13 +761,12 @@ main: # @main ori $a3, $zero, 50 ori $a4, $zero, 0 lu32i.d $a4, -458752 - lu52i.d $a4, $a4, 1028 - vreplgr2vr.d $vr1, $a4 + lu52i.d $a6, $a4, 1028 + vreplgr2vr.d $vr1, $a6 ori $a4, $zero, 384 lu12i.w $a5, 335544 ori $a5, $a5, 1311 - pcalau12i $a6, %pc_hi20(.LCPI8_2) - fld.d $fa2, $a6, %pc_lo12(.LCPI8_2) + movgr2fr.d $fa2, $a6 lu12i.w $a6, -377488 ori $a6, $a6, 2622 lu32i.d $a6, 0 @@ -1034,8 +1024,11 @@ main: # @main bne $a0, $a4, .LBB8_27 # %bb.32: # %.preheader.i.preheader move $a0, $zero - pcalau12i $a1, %pc_hi20(.LCPI8_3) - fld.d $fa0, $a1, %pc_lo12(.LCPI8_3) + lu12i.w $a1, 209715 + ori $a1, $a1, 819 + lu32i.d $a1, 209715 + lu52i.d $a1, $a1, 1023 + movgr2fr.d $fa0, $a1 ori $a1, $zero, 400 ori $a2, $zero, 80 ori $a3, $zero, 40 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/bicg/CMakeFiles/bicg.dir/bicg.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/bicg/CMakeFiles/bicg.dir/bicg.s index 8bded37e..3b1b34f3 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/bicg/CMakeFiles/bicg.dir/bicg.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/bicg/CMakeFiles/bicg.dir/bicg.s @@ -106,15 +106,9 @@ polybench_alloc_data: # @polybench_alloc_data .Lfunc_end6: .size polybench_alloc_data, .Lfunc_end6-polybench_alloc_data # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI7_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI7_1: - .dword 0x40a0680000000000 # double 2100 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI7_2: + .p2align 4, 0x0 # -- Begin function main +.LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 .text @@ -236,18 +230,18 @@ main: # @main # %bb.12: # %.preheader.i.preheader move $a1, $zero lu52i.d $a2, $zero, 1107 - pcalau12i $a3, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI7_0) + lu12i.w $a3, 256 + lu52i.d $a3, $a3, 1107 + movgr2fr.d $fa0, $a3 lu12i.w $a3, 275200 - pcalau12i $a4, %pc_hi20(.LCPI7_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI7_1) - pcalau12i $a4, %pc_hi20(.LCPI7_2) - vld $vr2, $a4, %pc_lo12(.LCPI7_2) + ori $a4, $zero, 0 + lu32i.d $a4, 26624 + lu52i.d $a6, $a4, 1034 + movgr2fr.d $fa1, $a6 + pcalau12i $a4, %pc_hi20(.LCPI7_0) + vld $vr2, $a4, %pc_lo12(.LCPI7_0) ori $a4, $s5, 1184 ori $a5, $zero, 2100 - ori $a6, $zero, 0 - lu32i.d $a6, 26624 - lu52i.d $a6, $a6, 1034 vreplgr2vr.d $vr3, $a6 move $a6, $fp .p2align 4, , 16 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/doitgen/CMakeFiles/doitgen.dir/doitgen.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/doitgen/CMakeFiles/doitgen.dir/doitgen.s index b3134248..29b75706 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/doitgen/CMakeFiles/doitgen.dir/doitgen.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/doitgen/CMakeFiles/doitgen.dir/doitgen.s @@ -418,12 +418,6 @@ kernel_doitgen_StrictFP: # @kernel_doitgen_StrictFP .LCPI9_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI9_1: - .dword 0x4064000000000000 # double 160 -.LCPI9_2: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 .text .globl main .p2align 5 @@ -496,128 +490,127 @@ main: # @main lu12i.w $a5, -209716 ori $a5, $a5, 3277 lu32i.d $a5, 0 - pcalau12i $a6, %pc_hi20(.LCPI9_1) - fld.d $fa0, $a6, %pc_lo12(.LCPI9_1) - ori $a6, $zero, 1280 - ori $a7, $zero, 140 - lu12i.w $t0, 43 - ori $s4, $t0, 3072 - ori $t0, $zero, 150 - ori $t1, $zero, 160 - ori $t2, $zero, 0 - lu32i.d $t2, 262144 - lu52i.d $t2, $t2, 1030 - vreplgr2vr.d $vr1, $t2 - move $t2, $fp - move $t3, $s0 + ori $a6, $zero, 0 + lu32i.d $a6, 262144 + lu52i.d $a6, $a6, 1030 + movgr2fr.d $fa0, $a6 + ori $a7, $zero, 1280 + ori $t0, $zero, 140 + lu12i.w $t1, 43 + ori $s4, $t1, 3072 + ori $t1, $zero, 150 + ori $t2, $zero, 160 + move $t3, $fp + move $t4, $s0 b .LBB9_10 .p2align 4, , 16 .LBB9_9: # in Loop: Header=BB9_10 Depth=1 addi.d $a1, $a1, 1 add.d $a3, $a3, $s4 + add.d $t4, $t4, $s4 add.d $t3, $t3, $s4 - add.d $t2, $t2, $s4 addi.d $a0, $a0, 1 - beq $a1, $t0, .LBB9_17 + beq $a1, $t1, .LBB9_17 .LBB9_10: # %.preheader41.i # =>This Loop Header: Depth=1 # Child Loop BB9_12 Depth 2 # Child Loop BB9_16 Depth 3 # Child Loop BB9_14 Depth 3 - move $t4, $zero move $t5, $zero - move $t6, $t2 + move $t6, $zero move $t7, $t3 - move $t8, $a3 + move $t8, $t4 + move $s3, $a3 b .LBB9_12 .p2align 4, , 16 .LBB9_11: # %middle.block # in Loop: Header=BB9_12 Depth=2 - addi.d $t5, $t5, 1 + addi.d $t6, $t6, 1 + addi.d $s3, $s3, 1280 addi.d $t8, $t8, 1280 addi.d $t7, $t7, 1280 - addi.d $t6, $t6, 1280 - add.w $t4, $t4, $a0 - beq $t5, $a7, .LBB9_9 + add.w $t5, $t5, $a0 + beq $t6, $t0, .LBB9_9 .LBB9_12: # %.preheader40.i # Parent Loop BB9_10 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB9_16 Depth 3 # Child Loop BB9_14 Depth 3 - move $s3, $zero + move $s5, $zero bltu $a4, $a2, .LBB9_15 # %bb.13: # %scalar.ph.preheader # in Loop: Header=BB9_12 Depth=2 - move $s5, $t4 + move $s6, $t5 .p2align 4, , 16 .LBB9_14: # %scalar.ph # Parent Loop BB9_10 Depth=1 # Parent Loop BB9_12 Depth=2 # => This Inner Loop Header: Depth=3 - bstrpick.d $s6, $s5, 31, 0 - mul.d $s6, $s6, $a5 - srli.d $s6, $s6, 39 - alsl.d $s6, $s6, $s6, 2 - slli.d $s6, $s6, 5 - sub.d $s6, $s5, $s6 - bstrpick.d $s6, $s6, 31, 0 - movgr2fr.d $fa2, $s6 - ffint.d.l $fa2, $fa2 - fdiv.d $fa2, $fa2, $fa0 - fstx.d $fa2, $t6, $s3 - fstx.d $fa2, $t7, $s3 - addi.d $s3, $s3, 8 - addi.w $s5, $s5, 1 - bne $s3, $a6, .LBB9_14 + bstrpick.d $s7, $s6, 31, 0 + mul.d $s7, $s7, $a5 + srli.d $s7, $s7, 39 + alsl.d $s7, $s7, $s7, 2 + slli.d $s7, $s7, 5 + sub.d $s7, $s6, $s7 + bstrpick.d $s7, $s7, 31, 0 + movgr2fr.d $fa1, $s7 + ffint.d.l $fa1, $fa1 + fdiv.d $fa1, $fa1, $fa0 + fstx.d $fa1, $t7, $s5 + fstx.d $fa1, $t8, $s5 + addi.d $s5, $s5, 8 + addi.w $s6, $s6, 1 + bne $s5, $a7, .LBB9_14 b .LBB9_11 .p2align 4, , 16 .LBB9_15: # %vector.ph # in Loop: Header=BB9_12 Depth=2 - pcalau12i $s5, %pc_hi20(.LCPI9_0) - vld $vr2, $s5, %pc_lo12(.LCPI9_0) - mul.d $s5, $t5, $a1 - vreplgr2vr.d $vr3, $s5 - vaddi.du $vr4, $vr3, 2 + pcalau12i $s6, %pc_hi20(.LCPI9_0) + vld $vr1, $s6, %pc_lo12(.LCPI9_0) + mul.d $s6, $t6, $a1 + vreplgr2vr.d $vr2, $s6 + vaddi.du $vr3, $vr2, 2 .p2align 4, , 16 .LBB9_16: # %vector.body # Parent Loop BB9_10 Depth=1 # Parent Loop BB9_12 Depth=2 # => This Inner Loop Header: Depth=3 - vadd.d $vr5, $vr2, $vr3 - vadd.d $vr6, $vr2, $vr4 - vpickve2gr.w $s5, $vr5, 0 - mod.wu $s5, $s5, $t1 - bstrpick.d $s5, $s5, 31, 0 - vpickve2gr.w $s6, $vr5, 2 - mod.wu $s6, $s6, $t1 + vadd.d $vr4, $vr1, $vr2 + vadd.d $vr5, $vr1, $vr3 + vpickve2gr.w $s6, $vr4, 0 + mod.wu $s6, $s6, $t2 bstrpick.d $s6, $s6, 31, 0 - vpickve2gr.w $s7, $vr6, 0 - mod.wu $s7, $s7, $t1 + vpickve2gr.w $s7, $vr4, 2 + mod.wu $s7, $s7, $t2 bstrpick.d $s7, $s7, 31, 0 - vpickve2gr.w $s8, $vr6, 2 - mod.wu $s8, $s8, $t1 + vpickve2gr.w $s8, $vr5, 0 + mod.wu $s8, $s8, $t2 bstrpick.d $s8, $s8, 31, 0 + vpickve2gr.w $ra, $vr5, 2 + mod.wu $ra, $ra, $t2 + bstrpick.d $ra, $ra, 31, 0 + movgr2fr.d $fa4, $s7 + ffint.d.l $fa4, $fa4 movgr2fr.d $fa5, $s6 ffint.d.l $fa5, $fa5 - movgr2fr.d $fa6, $s5 + vextrins.d $vr5, $vr4, 16 + movgr2fr.d $fa4, $ra + ffint.d.l $fa4, $fa4 + movgr2fr.d $fa6, $s8 ffint.d.l $fa6, $fa6 - vextrins.d $vr6, $vr5, 16 - movgr2fr.d $fa5, $s8 - ffint.d.l $fa5, $fa5 - movgr2fr.d $fa7, $s7 - ffint.d.l $fa7, $fa7 - vextrins.d $vr7, $vr5, 16 - vfdiv.d $vr5, $vr6, $vr1 - vfdiv.d $vr6, $vr7, $vr1 - add.d $s5, $t8, $s3 - vst $vr5, $s5, -16 - vstx $vr6, $t8, $s3 - add.d $s5, $t7, $s3 - vstx $vr5, $t7, $s3 - vst $vr6, $s5, 16 - addi.d $s3, $s3, 32 - vaddi.du $vr2, $vr2, 4 - bne $s3, $a6, .LBB9_16 + vextrins.d $vr6, $vr4, 16 + vreplgr2vr.d $vr4, $a6 + vfdiv.d $vr5, $vr5, $vr4 + vfdiv.d $vr4, $vr6, $vr4 + add.d $s6, $s3, $s5 + vst $vr5, $s6, -16 + vstx $vr4, $s3, $s5 + add.d $s6, $t8, $s5 + vstx $vr5, $t8, $s5 + vst $vr4, $s6, 16 + addi.d $s5, $s5, 32 + vaddi.du $vr1, $vr1, 4 + bne $s5, $a7, .LBB9_16 b .LBB9_11 .LBB9_17: # %.preheader.i.preheader move $a0, $zero @@ -1191,49 +1184,52 @@ main: # @main .LBB9_45: # %.preheader35.i.preheader move $a0, $zero move $a2, $zero - pcalau12i $a1, %pc_hi20(.LCPI9_2) - fld.d $fa0, $a1, %pc_lo12(.LCPI9_2) - ori $a1, $zero, 1280 - ori $a5, $zero, 140 - ori $a6, $zero, 150 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa0, $a1 + ori $a5, $zero, 1280 + ori $a6, $zero, 140 + ori $a7, $zero, 150 .LBB9_46: # %.preheader35.i # =>This Loop Header: Depth=1 # Child Loop BB9_47 Depth 2 # Child Loop BB9_48 Depth 3 move $a3, $zero - move $a7, $a0 + move $t0, $a0 .p2align 4, , 16 .LBB9_47: # %.preheader.i64 # Parent Loop BB9_46 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB9_48 Depth 3 - move $t0, $zero + move $t1, $zero move $a4, $zero - add.d $t1, $s0, $a7 - add.d $t2, $fp, $a7 + add.d $t2, $s0, $t0 + add.d $t3, $fp, $t0 .p2align 4, , 16 .LBB9_48: # Parent Loop BB9_46 Depth=1 # Parent Loop BB9_47 Depth=2 # => This Inner Loop Header: Depth=3 - fldx.d $fa1, $t2, $t0 - fldx.d $fa2, $t1, $t0 + fldx.d $fa1, $t3, $t1 + fldx.d $fa2, $t2, $t1 fsub.d $fa3, $fa1, $fa2 fabs.d $fa3, $fa3 fcmp.cule.d $fcc0, $fa3, $fa0 bceqz $fcc0, .LBB9_59 # %bb.49: # %.critedge.i # in Loop: Header=BB9_48 Depth=3 - addi.d $t0, $t0, 8 + addi.d $t1, $t1, 8 addi.w $a4, $a4, 1 - bne $t0, $a1, .LBB9_48 + bne $t1, $a5, .LBB9_48 # %bb.50: # in Loop: Header=BB9_47 Depth=2 addi.d $a3, $a3, 1 - addi.d $a7, $a7, 1280 - bne $a3, $a5, .LBB9_47 + addi.d $t0, $t0, 1280 + bne $a3, $a6, .LBB9_47 # %bb.51: # in Loop: Header=BB9_46 Depth=1 addi.d $a2, $a2, 1 add.d $a0, $a0, $s4 - bne $a2, $a6, .LBB9_46 + bne $a2, $a7, .LBB9_46 # %bb.52: # %check_FP.exit ori $a0, $zero, 2561 pcaddu18i $ra, %call36(malloc) @@ -1341,10 +1337,6 @@ main: # @main pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 - lu12i.w $a1, -487882 - ori $a1, $a1, 2289 - lu32i.d $a1, 325813 - lu52i.d $a1, $a1, 1006 st.d $a1, $sp, 16 fst.d $fa2, $sp, 8 st.d $a4, $sp, 0 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/mvt/CMakeFiles/mvt.dir/mvt.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/mvt/CMakeFiles/mvt.dir/mvt.s index 55e4b9d3..ee0a642b 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/mvt/CMakeFiles/mvt.dir/mvt.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/kernels/mvt/CMakeFiles/mvt.dir/mvt.s @@ -106,17 +106,9 @@ polybench_alloc_data: # @polybench_alloc_data .Lfunc_end6: .size polybench_alloc_data, .Lfunc_end6-polybench_alloc_data # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI7_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI7_1: - .dword 0x409f400000000000 # double 2000 -.LCPI7_3: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI7_2: + .p2align 4, 0x0 # -- Begin function main +.LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 .text @@ -219,24 +211,24 @@ main: # @main # %bb.14: # %polybench_alloc_data.exit48 move $t2, $zero lu52i.d $a0, $zero, 1107 - pcalau12i $a1, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI7_0) + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 + movgr2fr.d $fa0, $a1 lu12i.w $a1, 275200 - pcalau12i $a2, %pc_hi20(.LCPI7_1) - fld.d $fa1, $a2, %pc_lo12(.LCPI7_1) + ori $a2, $zero, 0 + lu32i.d $a2, -49152 + lu52i.d $t0, $a2, 1033 + movgr2fr.d $fa1, $t0 movgr2fr.d $fa2, $zero addi.w $a2, $zero, -1997 ori $a3, $zero, 3 addi.w $a4, $zero, -1996 ori $a5, $zero, 4 - pcalau12i $a6, %pc_hi20(.LCPI7_2) - vld $vr3, $a6, %pc_lo12(.LCPI7_2) + pcalau12i $a6, %pc_hi20(.LCPI7_0) + vld $vr3, $a6, %pc_lo12(.LCPI7_0) lu12i.w $s7, -4 ori $a6, $s7, 384 ori $a7, $zero, 2000 - ori $t0, $zero, 0 - lu32i.d $t0, -49152 - lu52i.d $t0, $t0, 1033 vreplgr2vr.d $vr4, $t0 ori $t0, $s8, 3728 move $t1, $fp @@ -453,10 +445,13 @@ main: # @main addi.d $a3, $a3, 8 bne $a0, $a2, .LBB7_31 # %bb.34: # %kernel_mvt_StrictFP.exit.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_3) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_3) move $a2, $zero ori $a0, $s7, 384 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a6, $a1, 1006 + movgr2fr.d $fa0, $a6 .p2align 4, , 16 .LBB7_35: # %kernel_mvt_StrictFP.exit # =>This Inner Loop Header: Depth=1 @@ -628,10 +623,6 @@ main: # @main movfr2gr.d $a5, $fa2 pcalau12i $a1, %pc_hi20(.L.str.2) addi.d $a1, $a1, %pc_lo12(.L.str.2) - lu12i.w $a4, -487882 - ori $a4, $a4, 2289 - lu32i.d $a4, 325813 - lu52i.d $a6, $a4, 1006 move $a4, $a2 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/cholesky/CMakeFiles/cholesky.dir/cholesky.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/cholesky/CMakeFiles/cholesky.dir/cholesky.s index eb23755d..065d4c1d 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/cholesky/CMakeFiles/cholesky.dir/cholesky.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/cholesky/CMakeFiles/cholesky.dir/cholesky.s @@ -106,59 +106,53 @@ polybench_alloc_data: # @polybench_alloc_data .Lfunc_end6: .size polybench_alloc_data, .Lfunc_end6-polybench_alloc_data # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI7_0: - .dword 0x409f400000000000 # double 2000 -.LCPI7_1: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -112 - st.d $ra, $sp, 104 # 8-byte Folded Spill - st.d $fp, $sp, 96 # 8-byte Folded Spill - st.d $s0, $sp, 88 # 8-byte Folded Spill - st.d $s1, $sp, 80 # 8-byte Folded Spill - st.d $s2, $sp, 72 # 8-byte Folded Spill - st.d $s3, $sp, 64 # 8-byte Folded Spill - st.d $s4, $sp, 56 # 8-byte Folded Spill - st.d $s5, $sp, 48 # 8-byte Folded Spill - st.d $s6, $sp, 40 # 8-byte Folded Spill - st.d $s7, $sp, 32 # 8-byte Folded Spill - st.d $s8, $sp, 24 # 8-byte Folded Spill - st.d $zero, $sp, 16 + addi.d $sp, $sp, -128 + st.d $ra, $sp, 120 # 8-byte Folded Spill + st.d $fp, $sp, 112 # 8-byte Folded Spill + st.d $s0, $sp, 104 # 8-byte Folded Spill + st.d $s1, $sp, 96 # 8-byte Folded Spill + st.d $s2, $sp, 88 # 8-byte Folded Spill + st.d $s3, $sp, 80 # 8-byte Folded Spill + st.d $s4, $sp, 72 # 8-byte Folded Spill + st.d $s5, $sp, 64 # 8-byte Folded Spill + st.d $s6, $sp, 56 # 8-byte Folded Spill + st.d $s7, $sp, 48 # 8-byte Folded Spill + st.d $s8, $sp, 40 # 8-byte Folded Spill + st.d $zero, $sp, 32 lu12i.w $a0, 7812 - ori $s1, $a0, 2048 + ori $s0, $a0, 2048 lu12i.w $a1, 1 - addi.d $a0, $sp, 16 - move $a2, $s1 + addi.d $a0, $sp, 32 + move $a2, $s0 pcaddu18i $ra, %call36(posix_memalign) jirl $ra, $ra, 0 - ld.d $fp, $sp, 16 + ld.d $fp, $sp, 32 beqz $fp, .LBB7_77 # %bb.1: bnez $a0, .LBB7_77 # %bb.2: # %polybench_alloc_data.exit - st.d $zero, $sp, 16 + st.d $zero, $sp, 32 lu12i.w $a1, 1 - addi.d $a0, $sp, 16 - move $a2, $s1 + addi.d $a0, $sp, 32 + st.d $s0, $sp, 24 # 8-byte Folded Spill + move $a2, $s0 pcaddu18i $ra, %call36(posix_memalign) jirl $ra, $ra, 0 - ld.d $s0, $sp, 16 + ld.d $s0, $sp, 32 beqz $s0, .LBB7_77 # %bb.3: # %polybench_alloc_data.exit bnez $a0, .LBB7_77 # %bb.4: # %polybench_alloc_data.exit15 - move $t4, $zero + move $t3, $zero sub.d $s6, $s0, $fp addi.d $a0, $s0, 16 addi.d $a1, $fp, 16 - addi.d $a2, $s0, 24 + addi.d $s1, $s0, 24 addi.d $a3, $fp, 24 ori $a4, $zero, 1 ori $a5, $zero, 1999 @@ -166,180 +160,179 @@ main: # @main lu12i.w $s7, 3 ori $s3, $s7, 3712 ori $a7, $zero, 4 - vldi $vr0, -912 - ori $t0, $zero, 1998 - ori $t1, $zero, 32 - vrepli.b $vr1, 0 - lu52i.d $t2, $zero, 1023 - ori $t3, $zero, 2000 - ori $t5, $zero, 0 - lu32i.d $t5, 1 - vreplgr2vr.d $vr2, $t5 + ori $a2, $zero, 0 + lu32i.d $a2, -49152 + lu52i.d $t0, $a2, 1033 + movgr2fr.d $fa0, $t0 + vldi $vr1, -912 + ori $t1, $zero, 1998 + ori $t2, $zero, 32 + vrepli.b $vr2, 0 + lu52i.d $t4, $zero, 1023 + ori $t5, $zero, 2000 vrepli.w $vr3, -2 - vreplgr2vr.d $vr4, $t2 - ori $t5, $zero, 1999 - move $t6, $fp - move $t7, $s0 + vreplgr2vr.d $vr4, $t4 + ori $t6, $zero, 1999 + move $t7, $fp + move $t8, $s0 b .LBB7_6 .p2align 4, , 16 .LBB7_5: # %._crit_edge.i # in Loop: Header=BB7_6 Depth=1 - mul.d $t8, $t4, $s3 - add.d $s2, $fp, $t8 - add.d $t8, $s0, $t8 - addi.d $s4, $t4, 1 - slli.d $t4, $t4, 3 - stx.d $t2, $s2, $t4 - stx.d $t2, $t8, $t4 + mul.d $a2, $t3, $s3 + add.d $s2, $fp, $a2 + add.d $a2, $s0, $a2 + addi.d $s4, $t3, 1 + slli.d $t3, $t3, 3 + stx.d $t4, $s2, $t3 + stx.d $t4, $a2, $t3 addi.d $a4, $a4, 1 add.d $a0, $a0, $s3 add.d $a1, $a1, $s3 + add.d $t8, $t8, $s3 add.d $t7, $t7, $s3 - add.d $t6, $t6, $s3 - addi.d $t5, $t5, -1 - ori $t4, $s7, 3720 - add.d $a2, $a2, $t4 - add.d $a3, $a3, $t4 - move $t4, $s4 - beq $s4, $t3, .LBB7_21 + addi.d $t6, $t6, -1 + ori $a2, $s7, 3720 + add.d $s1, $s1, $a2 + add.d $a3, $a3, $a2 + move $t3, $s4 + beq $s4, $t5, .LBB7_21 .LBB7_6: # %.preheader87.i # =>This Loop Header: Depth=1 # Child Loop BB7_9 Depth 2 # Child Loop BB7_12 Depth 2 # Child Loop BB7_17 Depth 2 # Child Loop BB7_20 Depth 2 - move $t8, $zero + move $s2, $zero bltu $a4, $a7, .LBB7_11 # %bb.7: # %.preheader87.i # in Loop: Header=BB7_6 Depth=1 - bltu $s6, $t1, .LBB7_11 + bltu $s6, $t2, .LBB7_11 # %bb.8: # %vector.ph102 # in Loop: Header=BB7_6 Depth=1 - and $s2, $a4, $a6 - bstrpick.d $t8, $a4, 62, 2 - slli.d $t8, $t8, 2 - move $s4, $a1 - move $s5, $a0 - vori.b $vr5, $vr2, 0 + and $s4, $a4, $a6 + ori $a2, $zero, 0 + lu32i.d $a2, 1 + vreplgr2vr.d $vr5, $a2 + bstrpick.d $a2, $a4, 62, 2 + slli.d $s2, $a2, 2 + move $s5, $a1 + move $s8, $a0 .p2align 4, , 16 .LBB7_9: # %vector.body105 # Parent Loop BB7_6 Depth=1 # => This Inner Loop Header: Depth=2 vneg.w $vr6, $vr5 vsub.w $vr7, $vr3, $vr5 - vpickve2gr.w $s8, $vr6, 1 - movgr2fr.w $ft0, $s8 + vpickve2gr.w $a2, $vr6, 1 + movgr2fr.w $ft0, $a2 ffint.d.w $ft0, $ft0 - vpickve2gr.w $s8, $vr6, 0 - movgr2fr.w $fa6, $s8 + vpickve2gr.w $a2, $vr6, 0 + movgr2fr.w $fa6, $a2 ffint.d.w $fa6, $fa6 vextrins.d $vr6, $vr8, 16 - vpickve2gr.w $s8, $vr7, 1 - movgr2fr.w $ft0, $s8 + vpickve2gr.w $a2, $vr7, 1 + movgr2fr.w $ft0, $a2 ffint.d.w $ft0, $ft0 - vpickve2gr.w $s8, $vr7, 0 - movgr2fr.w $fa7, $s8 + vpickve2gr.w $a2, $vr7, 0 + movgr2fr.w $fa7, $a2 ffint.d.w $fa7, $fa7 vextrins.d $vr7, $vr8, 16 - ori $s8, $zero, 0 - lu32i.d $s8, -49152 - lu52i.d $s8, $s8, 1033 - vreplgr2vr.d $vr8, $s8 + vreplgr2vr.d $vr8, $t0 vfdiv.d $vr6, $vr6, $vr8 vfdiv.d $vr7, $vr7, $vr8 vfadd.d $vr6, $vr6, $vr4 vfadd.d $vr7, $vr7, $vr4 - vst $vr6, $s4, -16 - vst $vr7, $s4, 0 vst $vr6, $s5, -16 vst $vr7, $s5, 0 + vst $vr6, $s8, -16 + vst $vr7, $s8, 0 vaddi.wu $vr5, $vr5, 4 - addi.d $s2, $s2, -4 + addi.d $s4, $s4, -4 + addi.d $s8, $s8, 32 addi.d $s5, $s5, 32 - addi.d $s4, $s4, 32 - bnez $s2, .LBB7_9 + bnez $s4, .LBB7_9 # %bb.10: # %middle.block108 # in Loop: Header=BB7_6 Depth=1 - beq $a4, $t8, .LBB7_13 + beq $a4, $s2, .LBB7_13 .LBB7_11: # %scalar.ph100.preheader # in Loop: Header=BB7_6 Depth=1 - alsl.d $s2, $t8, $t7, 3 - alsl.d $s4, $t8, $t6, 3 - sub.w $s5, $zero, $t8 + alsl.d $s4, $s2, $t8, 3 + alsl.d $s5, $s2, $t7, 3 + sub.w $s8, $zero, $s2 .p2align 4, , 16 .LBB7_12: # %scalar.ph100 # Parent Loop BB7_6 Depth=1 # => This Inner Loop Header: Depth=2 - pcalau12i $s8, %pc_hi20(.LCPI7_0) - fld.d $fa5, $s8, %pc_lo12(.LCPI7_0) - movgr2fr.w $fa6, $s5 - ffint.d.w $fa6, $fa6 - fdiv.d $fa5, $fa6, $fa5 - fadd.d $fa5, $fa5, $fa0 + movgr2fr.w $fa5, $s8 + ffint.d.w $fa5, $fa5 + fdiv.d $fa5, $fa5, $fa0 + fadd.d $fa5, $fa5, $fa1 + fst.d $fa5, $s5, 0 fst.d $fa5, $s4, 0 - fst.d $fa5, $s2, 0 - addi.d $t8, $t8, 1 - addi.d $s2, $s2, 8 + addi.d $s2, $s2, 1 addi.d $s4, $s4, 8 - addi.w $s5, $s5, -1 - bne $a4, $t8, .LBB7_12 + addi.d $s5, $s5, 8 + addi.w $s8, $s8, -1 + bne $a4, $s2, .LBB7_12 .LBB7_13: # %.loopexit140 # in Loop: Header=BB7_6 Depth=1 - bltu $t0, $t4, .LBB7_5 + bltu $t1, $t3, .LBB7_5 # %bb.14: # %.lr.ph.i.preheader # in Loop: Header=BB7_6 Depth=1 - sub.d $t8, $a5, $t4 - move $s2, $a4 - bltu $t8, $a7, .LBB7_19 + sub.d $s2, $a5, $t3 + move $s4, $a4 + bltu $s2, $a7, .LBB7_19 # %bb.15: # %.lr.ph.i.preheader # in Loop: Header=BB7_6 Depth=1 - move $s2, $a4 - bltu $s6, $t1, .LBB7_19 + move $s4, $a4 + bltu $s6, $t2, .LBB7_19 # %bb.16: # %vector.ph # in Loop: Header=BB7_6 Depth=1 - and $s5, $t5, $a6 - move $s4, $t8 - bstrins.d $s4, $zero, 1, 0 - add.d $s2, $a4, $s4 - move $s8, $a3 - move $ra, $a2 + and $s8, $t6, $a6 + move $s5, $s2 + bstrins.d $s5, $zero, 1, 0 + add.d $s4, $a4, $s5 + move $ra, $a3 + move $a2, $s1 .p2align 4, , 16 .LBB7_17: # %vector.body # Parent Loop BB7_6 Depth=1 # => This Inner Loop Header: Depth=2 - vst $vr1, $s8, -16 - vst $vr1, $s8, 0 - vst $vr1, $ra, -16 - vst $vr1, $ra, 0 - addi.d $s5, $s5, -4 + vst $vr2, $ra, -16 + vst $vr2, $ra, 0 + vst $vr2, $a2, -16 + vst $vr2, $a2, 0 + addi.d $s8, $s8, -4 + addi.d $a2, $a2, 32 addi.d $ra, $ra, 32 - addi.d $s8, $s8, 32 - bnez $s5, .LBB7_17 + bnez $s8, .LBB7_17 # %bb.18: # %middle.block # in Loop: Header=BB7_6 Depth=1 - beq $t8, $s4, .LBB7_5 + beq $s2, $s5, .LBB7_5 .LBB7_19: # %.lr.ph.i.preheader153 # in Loop: Header=BB7_6 Depth=1 - addi.d $t8, $s2, -2000 - slli.d $s2, $s2, 3 + addi.d $a2, $s4, -2000 + slli.d $s2, $s4, 3 .p2align 4, , 16 .LBB7_20: # %.lr.ph.i # Parent Loop BB7_6 Depth=1 # => This Inner Loop Header: Depth=2 - stx.d $zero, $t6, $s2 stx.d $zero, $t7, $s2 - addi.d $t8, $t8, 1 + stx.d $zero, $t8, $s2 + addi.d $a2, $a2, 1 addi.d $s2, $s2, 8 - bnez $t8, .LBB7_20 + bnez $a2, .LBB7_20 b .LBB7_5 .LBB7_21: - st.d $zero, $sp, 16 + st.d $zero, $sp, 32 lu12i.w $a1, 1 - addi.d $a0, $sp, 16 + addi.d $a0, $sp, 32 + ld.d $s1, $sp, 24 # 8-byte Folded Reload move $a2, $s1 pcaddu18i $ra, %call36(posix_memalign) jirl $ra, $ra, 0 - ld.d $s2, $sp, 16 + ld.d $s2, $sp, 32 beqz $s2, .LBB7_77 # %bb.22: bnez $a0, .LBB7_77 @@ -724,40 +717,43 @@ main: # @main fmov.d $fa1, $fa0 b .LBB7_62 .LBB7_65: # %.preheader.i49.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_1) move $a0, $zero move $a2, $zero - ori $a1, $s5, 384 - ori $a4, $zero, 2000 + ori $a4, $s5, 384 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa0, $a1 + ori $a5, $zero, 2000 .p2align 4, , 16 .LBB7_66: # %.preheader.i49 # =>This Loop Header: Depth=1 # Child Loop BB7_67 Depth 2 move $a3, $zero - add.d $a5, $s0, $a0 - add.d $a6, $fp, $a0 - move $a7, $a1 + add.d $a6, $s0, $a0 + add.d $a7, $fp, $a0 + move $t0, $a4 .p2align 4, , 16 .LBB7_67: # Parent Loop BB7_66 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t0, $a6, $a7 - fldx.d $fa1, $t0, $s3 - add.d $t0, $a5, $a7 - fldx.d $fa2, $t0, $s3 + add.d $t1, $a7, $t0 + fldx.d $fa1, $t1, $s3 + add.d $t1, $a6, $t0 + fldx.d $fa2, $t1, $s3 fsub.d $fa3, $fa1, $fa2 fabs.d $fa3, $fa3 fcmp.cule.d $fcc0, $fa3, $fa0 bceqz $fcc0, .LBB7_75 # %bb.68: # %.critedge.i # in Loop: Header=BB7_67 Depth=2 - addi.d $a7, $a7, 8 + addi.d $t0, $t0, 8 addi.w $a3, $a3, 1 - bnez $a7, .LBB7_67 + bnez $t0, .LBB7_67 # %bb.69: # in Loop: Header=BB7_66 Depth=1 addi.d $a2, $a2, 1 add.d $a0, $a0, $s3 - bne $a2, $a4, .LBB7_66 + bne $a2, $a5, .LBB7_66 # %bb.70: # %check_FP.exit lu12i.w $a0, 7 ori $a0, $a0, 3329 @@ -842,10 +838,6 @@ main: # @main pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 - lu12i.w $a1, -487882 - ori $a1, $a1, 2289 - lu32i.d $a1, 325813 - lu52i.d $a1, $a1, 1006 st.d $a1, $sp, 0 movfr2gr.d $a4, $fa1 movfr2gr.d $a7, $fa2 @@ -857,18 +849,18 @@ main: # @main jirl $ra, $ra, 0 ori $a0, $zero, 1 .LBB7_76: - ld.d $s8, $sp, 24 # 8-byte Folded Reload - ld.d $s7, $sp, 32 # 8-byte Folded Reload - ld.d $s6, $sp, 40 # 8-byte Folded Reload - ld.d $s5, $sp, 48 # 8-byte Folded Reload - ld.d $s4, $sp, 56 # 8-byte Folded Reload - ld.d $s3, $sp, 64 # 8-byte Folded Reload - ld.d $s2, $sp, 72 # 8-byte Folded Reload - ld.d $s1, $sp, 80 # 8-byte Folded Reload - ld.d $s0, $sp, 88 # 8-byte Folded Reload - ld.d $fp, $sp, 96 # 8-byte Folded Reload - ld.d $ra, $sp, 104 # 8-byte Folded Reload - addi.d $sp, $sp, 112 + ld.d $s8, $sp, 40 # 8-byte Folded Reload + ld.d $s7, $sp, 48 # 8-byte Folded Reload + ld.d $s6, $sp, 56 # 8-byte Folded Reload + ld.d $s5, $sp, 64 # 8-byte Folded Reload + ld.d $s4, $sp, 72 # 8-byte Folded Reload + ld.d $s3, $sp, 80 # 8-byte Folded Reload + ld.d $s2, $sp, 88 # 8-byte Folded Reload + ld.d $s1, $sp, 96 # 8-byte Folded Reload + ld.d $s0, $sp, 104 # 8-byte Folded Reload + ld.d $fp, $sp, 112 # 8-byte Folded Reload + ld.d $ra, $sp, 120 # 8-byte Folded Reload + addi.d $sp, $sp, 128 ret .LBB7_77: pcalau12i $a0, %got_pc_hi20(stderr) diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/durbin/CMakeFiles/durbin.dir/durbin.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/durbin/CMakeFiles/durbin.dir/durbin.s index 2369a4a6..b21ba835 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/durbin/CMakeFiles/durbin.dir/durbin.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/durbin/CMakeFiles/durbin.dir/durbin.s @@ -106,12 +106,7 @@ polybench_alloc_data: # @polybench_alloc_data .Lfunc_end6: .size polybench_alloc_data, .Lfunc_end6-polybench_alloc_data # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI7_0: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -503,11 +498,14 @@ main: # @main bne $s8, $a0, .LBB7_31 b .LBB7_22 .LBB7_32: # %kernel_durbin_StrictFP.exit - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_0) move $a2, $zero lu12i.w $a0, -4 ori $a0, $a0, 384 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a6, $a1, 1006 + movgr2fr.d $fa0, $a6 ld.d $a3, $sp, 16 # 8-byte Folded Reload ld.d $s1, $sp, 72 # 8-byte Folded Reload .p2align 4, , 16 @@ -548,10 +546,6 @@ main: # @main movfr2gr.d $a5, $fa2 pcalau12i $a1, %pc_hi20(.L.str.2) addi.d $a1, $a1, %pc_lo12(.L.str.2) - lu12i.w $a4, -487882 - ori $a4, $a4, 2289 - lu32i.d $a4, 325813 - lu52i.d $a6, $a4, 1006 move $a4, $a2 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/gramschmidt/CMakeFiles/gramschmidt.dir/gramschmidt.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/gramschmidt/CMakeFiles/gramschmidt.dir/gramschmidt.s index a6b32f04..e0376189 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/gramschmidt/CMakeFiles/gramschmidt.dir/gramschmidt.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/gramschmidt/CMakeFiles/gramschmidt.dir/gramschmidt.s @@ -111,12 +111,6 @@ polybench_alloc_data: # @polybench_alloc_data .LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI7_1: - .dword 0x408f400000000000 # double 1000 -.LCPI7_2: - .dword 0x4059000000000000 # double 100 .text .globl main .p2align 5 @@ -183,24 +177,22 @@ main: # @main lu12i.w $a5, 67108 ori $a5, $a5, 3539 ori $a6, $zero, 1000 - pcalau12i $a7, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a7, %pc_lo12(.LCPI7_1) - pcalau12i $a7, %pc_hi20(.LCPI7_2) - fld.d $fa1, $a7, %pc_lo12(.LCPI7_2) - vldi $vr2, -988 - lu12i.w $a7, 2 - ori $s3, $a7, 1408 - pcalau12i $a7, %pc_hi20(.LCPI7_0) - vld $vr3, $a7, %pc_lo12(.LCPI7_0) ori $a7, $zero, 0 ori $t0, $zero, 0 lu32i.d $t0, -49152 lu52i.d $t0, $t0, 1032 + movgr2fr.d $fa0, $t0 + pcalau12i $t1, %pc_hi20(.LCPI7_0) + vld $vr1, $t1, %pc_lo12(.LCPI7_0) + ori $t1, $zero, 0 + lu32i.d $t1, -458752 + lu52i.d $t1, $t1, 1029 + movgr2fr.d $fa2, $t1 + vldi $vr3, -988 vreplgr2vr.d $vr4, $t0 - ori $t0, $zero, 0 - lu32i.d $t0, -458752 - lu52i.d $t0, $t0, 1029 - vreplgr2vr.d $vr5, $t0 + lu12i.w $t0, 2 + ori $s3, $t0, 1408 + vreplgr2vr.d $vr5, $t1 lu32i.d $a7, 262144 lu52i.d $a7, $a7, 1026 vreplgr2vr.d $vr6, $a7 @@ -238,8 +230,8 @@ main: # @main movgr2fr.d $ft0, $t3 ffint.d.l $ft0, $ft0 fdiv.d $ft0, $ft0, $fa0 - fmul.d $ft0, $ft0, $fa1 - fadd.d $ft0, $ft0, $fa2 + fmul.d $ft0, $ft0, $fa2 + fadd.d $ft0, $ft0, $fa3 add.d $t3, $a7, $t2 fstx.d $ft0, $t3, $s3 add.d $t3, $t0, $t2 @@ -253,7 +245,7 @@ main: # @main # in Loop: Header=BB7_8 Depth=1 vreplgr2vr.d $vr8, $a1 move $t1, $a4 - vori.b $vr9, $vr3, 0 + vori.b $vr9, $vr1, 0 .p2align 4, , 16 .LBB7_12: # %vector.body # Parent Loop BB7_8 Depth=1 diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/lu/CMakeFiles/lu.dir/lu.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/lu/CMakeFiles/lu.dir/lu.s index 5be54f72..1313bfbb 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/lu/CMakeFiles/lu.dir/lu.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/lu/CMakeFiles/lu.dir/lu.s @@ -106,12 +106,7 @@ polybench_alloc_data: # @polybench_alloc_data .Lfunc_end6: .size polybench_alloc_data, .Lfunc_end6-polybench_alloc_data # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI7_0: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -366,39 +361,42 @@ main: # @main .LBB7_30: # %.preheader.i.preheader move $a0, $zero move $a2, $zero - pcalau12i $a1, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI7_0) lu12i.w $s4, -4 - ori $a1, $s4, 384 - ori $a4, $zero, 2000 + ori $a4, $s4, 384 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa0, $a1 + ori $a5, $zero, 2000 .p2align 4, , 16 .LBB7_31: # %.preheader.i # =>This Loop Header: Depth=1 # Child Loop BB7_32 Depth 2 move $a3, $zero - add.d $a5, $s0, $a0 - add.d $a6, $fp, $a0 - move $a7, $a1 + add.d $a6, $s0, $a0 + add.d $a7, $fp, $a0 + move $t0, $a4 .p2align 4, , 16 .LBB7_32: # Parent Loop BB7_31 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t0, $a6, $a7 - fldx.d $fa1, $t0, $s2 - add.d $t0, $a5, $a7 - fldx.d $fa2, $t0, $s2 + add.d $t1, $a7, $t0 + fldx.d $fa1, $t1, $s2 + add.d $t1, $a6, $t0 + fldx.d $fa2, $t1, $s2 fsub.d $fa3, $fa1, $fa2 fabs.d $fa3, $fa3 fcmp.cule.d $fcc0, $fa3, $fa0 bceqz $fcc0, .LBB7_40 # %bb.33: # %.critedge.i # in Loop: Header=BB7_32 Depth=2 - addi.d $a7, $a7, 8 + addi.d $t0, $t0, 8 addi.w $a3, $a3, 1 - bnez $a7, .LBB7_32 + bnez $t0, .LBB7_32 # %bb.34: # in Loop: Header=BB7_31 Depth=1 addi.d $a2, $a2, 1 add.d $a0, $a0, $s2 - bne $a2, $a4, .LBB7_31 + bne $a2, $a5, .LBB7_31 # %bb.35: # %check_FP.exit lu12i.w $s3, 7 ori $a0, $s3, 3329 @@ -484,10 +482,6 @@ main: # @main pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 - lu12i.w $a1, -487882 - ori $a1, $a1, 2289 - lu32i.d $a1, 325813 - lu52i.d $a1, $a1, 1006 st.d $a1, $sp, 0 movfr2gr.d $a4, $fa1 movfr2gr.d $a7, $fa2 @@ -528,12 +522,7 @@ main: # @main .Lfunc_end7: .size main, .Lfunc_end7-main # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function init_array -.LCPI8_0: - .dword 0x409f400000000000 # double 2000 - .text - .p2align 5 + .p2align 5 # -- Begin function init_array .type init_array,@function init_array: # @init_array # %bb.0: @@ -560,21 +549,20 @@ init_array: # @init_array ori $s4, $a0, 3696 ori $s2, $a0, 3712 ori $a4, $zero, 4 - pcalau12i $a0, %pc_hi20(.LCPI8_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI8_0) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs0, $a1 vldi $vr4, -912 ori $a5, $zero, 1998 lu52i.d $s3, $zero, 1023 - ori $s7, $zero, 2000 - ori $a0, $zero, 0 lu32i.d $a0, 1 vreplgr2vr.d $vr0, $a0 vst $vr0, $sp, 64 # 16-byte Folded Spill - ori $a0, $zero, 0 + ori $s7, $zero, 2000 vrepli.w $vr5, -2 - lu32i.d $a0, -49152 - lu52i.d $a0, $a0, 1033 - vreplgr2vr.d $vr6, $a0 + vreplgr2vr.d $vr6, $a1 vreplgr2vr.d $vr7, $s3 move $s6, $fp vst $vr5, $sp, 48 # 16-byte Folded Spill diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/ludcmp/CMakeFiles/ludcmp.dir/ludcmp.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/ludcmp/CMakeFiles/ludcmp.dir/ludcmp.s index f9675feb..667a2f67 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/ludcmp/CMakeFiles/ludcmp.dir/ludcmp.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/ludcmp/CMakeFiles/ludcmp.dir/ludcmp.s @@ -106,12 +106,7 @@ polybench_alloc_data: # @polybench_alloc_data .Lfunc_end6: .size polybench_alloc_data, .Lfunc_end6-polybench_alloc_data # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI7_0: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -554,10 +549,13 @@ main: # @main bnez $t0, .LBB7_53 b .LBB7_50 .LBB7_54: # %kernel_ludcmp_StrictFP.exit.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_0) move $a2, $zero ori $a0, $s6, 384 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a6, $a1, 1006 + movgr2fr.d $fa0, $a6 .p2align 4, , 16 .LBB7_55: # %kernel_ludcmp_StrictFP.exit # =>This Inner Loop Header: Depth=1 @@ -603,10 +601,6 @@ main: # @main movfr2gr.d $a5, $fa2 pcalau12i $a1, %pc_hi20(.L.str.2) addi.d $a1, $a1, %pc_lo12(.L.str.2) - lu12i.w $a4, -487882 - ori $a4, $a4, 2289 - lu32i.d $a4, 325813 - lu52i.d $a6, $a4, 1006 move $a4, $a2 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 @@ -645,10 +639,6 @@ main: # @main .LCPI8_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI8_1: - .dword 0x409f400000000000 # double 2000 .text .p2align 5 .type init_array,@function @@ -668,62 +658,61 @@ init_array: # @init_array st.d $s8, $sp, 104 # 8-byte Folded Spill fst.d $fs0, $sp, 96 # 8-byte Folded Spill move $fp, $a0 - sub.d $a5, $a3, $a2 - ori $a4, $zero, 32 - lu12i.w $t0, 3 - lu12i.w $a6, -4 - pcalau12i $a0, %pc_hi20(.LCPI8_1) - bltu $a5, $a4, .LBB8_5 + sub.d $a4, $a3, $a2 + ori $a0, $zero, 32 + lu12i.w $a7, 3 + lu12i.w $a5, -4 + bltu $a4, $a0, .LBB8_5 # %bb.1: # %vector.memcheck - sub.d $a5, $a1, $a2 - bltu $a5, $a4, .LBB8_5 + sub.d $a4, $a1, $a2 + bltu $a4, $a0, .LBB8_5 # %bb.2: # %vector.memcheck - sub.d $a4, $a1, $a3 - ori $a5, $zero, 32 - bltu $a4, $a5, .LBB8_5 + sub.d $a0, $a1, $a3 + ori $a4, $zero, 32 + bltu $a0, $a4, .LBB8_5 # %bb.3: # %vector.body.preheader - pcalau12i $a4, %pc_hi20(.LCPI8_0) - vld $vr0, $a4, %pc_lo12(.LCPI8_0) - ori $a4, $a6, 384 + pcalau12i $a0, %pc_hi20(.LCPI8_0) + vld $vr0, $a0, %pc_lo12(.LCPI8_0) + ori $a0, $a5, 384 vrepli.b $vr1, 0 - ori $a5, $t0, 3712 - ori $a6, $t0, 3728 - ori $a7, $zero, 0 - lu32i.d $a7, -49152 - lu52i.d $a7, $a7, 1033 - vreplgr2vr.d $vr2, $a7 - lu52i.d $a7, $zero, 1022 - vreplgr2vr.d $vr3, $a7 - lu52i.d $a7, $zero, 1025 - vreplgr2vr.d $vr4, $a7 + ori $a4, $a7, 3712 + ori $a5, $a7, 3728 + ori $a6, $zero, 0 + lu32i.d $a6, -49152 + lu52i.d $a6, $a6, 1033 + vreplgr2vr.d $vr2, $a6 + lu52i.d $a6, $zero, 1022 + vreplgr2vr.d $vr3, $a6 + lu52i.d $a6, $zero, 1025 + vreplgr2vr.d $vr4, $a6 .p2align 4, , 16 .LBB8_4: # %vector.body # =>This Inner Loop Header: Depth=1 - add.d $a7, $a2, $a4 - vstx $vr1, $a7, $a5 - vstx $vr1, $a7, $a6 - add.d $a7, $a3, $a4 - vstx $vr1, $a7, $a5 - vstx $vr1, $a7, $a6 + add.d $a6, $a2, $a0 + vstx $vr1, $a6, $a4 + vstx $vr1, $a6, $a5 + add.d $a6, $a3, $a0 + vstx $vr1, $a6, $a4 + vstx $vr1, $a6, $a5 vshuf4i.w $vr5, $vr0, 8 vaddi.wu $vr6, $vr5, 1 vaddi.wu $vr5, $vr5, 3 - vpickve2gr.w $a7, $vr6, 1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa7, $a7 + vpickve2gr.w $a6, $vr6, 1 + bstrpick.d $a6, $a6, 31, 0 + movgr2fr.d $fa7, $a6 ffint.d.l $fa7, $fa7 - vpickve2gr.w $a7, $vr6, 0 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa6, $a7 + vpickve2gr.w $a6, $vr6, 0 + bstrpick.d $a6, $a6, 31, 0 + movgr2fr.d $fa6, $a6 ffint.d.l $fa6, $fa6 vextrins.d $vr6, $vr7, 16 - vpickve2gr.w $a7, $vr5, 1 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa7, $a7 + vpickve2gr.w $a6, $vr5, 1 + bstrpick.d $a6, $a6, 31, 0 + movgr2fr.d $fa7, $a6 ffint.d.l $fa7, $fa7 - vpickve2gr.w $a7, $vr5, 0 - bstrpick.d $a7, $a7, 31, 0 - movgr2fr.d $fa5, $a7 + vpickve2gr.w $a6, $vr5, 0 + bstrpick.d $a6, $a6, 31, 0 + movgr2fr.d $fa5, $a6 ffint.d.l $fa5, $fa5 vextrins.d $vr5, $vr7, 16 vfdiv.d $vr6, $vr6, $vr2 @@ -732,61 +721,64 @@ init_array: # @init_array vfmul.d $vr5, $vr5, $vr3 vfadd.d $vr6, $vr6, $vr4 vfadd.d $vr5, $vr5, $vr4 - add.d $a7, $a1, $a4 - vstx $vr6, $a7, $a5 - vstx $vr5, $a7, $a6 - addi.d $a4, $a4, 32 + add.d $a6, $a1, $a0 + vstx $vr6, $a6, $a4 + vstx $vr5, $a6, $a5 + addi.d $a0, $a0, 32 vaddi.du $vr0, $vr0, 4 - bnez $a4, .LBB8_4 + bnez $a0, .LBB8_4 b .LBB8_7 .LBB8_5: # %scalar.ph.preheader - ori $a4, $zero, 1 - ori $a5, $a6, 384 - fld.d $fa0, $a0, %pc_lo12(.LCPI8_1) + ori $a0, $zero, 1 + ori $a4, $a5, 384 + ori $a5, $zero, 0 + lu32i.d $a5, -49152 + lu52i.d $a5, $a5, 1033 + movgr2fr.d $fa0, $a5 vldi $vr1, -928 vldi $vr2, -1008 - ori $a6, $t0, 3712 + ori $a5, $a7, 3712 .p2align 4, , 16 .LBB8_6: # %scalar.ph # =>This Inner Loop Header: Depth=1 - add.d $a7, $a2, $a5 - stptr.d $zero, $a7, 16000 - add.d $a7, $a3, $a5 - stptr.d $zero, $a7, 16000 - bstrpick.d $a7, $a4, 31, 0 - movgr2fr.d $fa3, $a7 + add.d $a6, $a2, $a4 + stptr.d $zero, $a6, 16000 + add.d $a6, $a3, $a4 + stptr.d $zero, $a6, 16000 + bstrpick.d $a6, $a0, 31, 0 + movgr2fr.d $fa3, $a6 ffint.d.l $fa3, $fa3 fdiv.d $fa3, $fa3, $fa0 fmul.d $fa3, $fa3, $fa1 fadd.d $fa3, $fa3, $fa2 - add.d $a7, $a1, $a5 - fstx.d $fa3, $a7, $a6 - addi.d $a5, $a5, 8 - addi.w $a4, $a4, 1 - bnez $a5, .LBB8_6 + add.d $a6, $a1, $a4 + fstx.d $fa3, $a6, $a5 + addi.d $a4, $a4, 8 + addi.w $a0, $a0, 1 + bnez $a4, .LBB8_6 .LBB8_7: # %.preheader87.preheader move $s0, $zero addi.d $s1, $fp, 16 ori $s6, $zero, 1 - ori $a1, $t0, 3720 - st.d $a1, $sp, 80 # 8-byte Folded Spill - ori $s5, $t0, 3696 - ori $s4, $t0, 3712 + ori $a0, $a7, 3720 + st.d $a0, $sp, 80 # 8-byte Folded Spill + ori $s5, $a7, 3696 + ori $s4, $a7, 3712 ori $a4, $zero, 4 - fld.d $fs0, $a0, %pc_lo12(.LCPI8_1) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -49152 + lu52i.d $a1, $a1, 1033 + movgr2fr.d $fs0, $a1 vldi $vr4, -912 ori $a5, $zero, 1998 lu52i.d $s2, $zero, 1023 - ori $s8, $zero, 2000 - ori $a0, $zero, 0 lu32i.d $a0, 1 vreplgr2vr.d $vr0, $a0 vst $vr0, $sp, 64 # 16-byte Folded Spill - ori $a0, $zero, 0 + ori $s8, $zero, 2000 vrepli.w $vr5, -2 - lu32i.d $a0, -49152 - lu52i.d $a0, $a0, 1033 - vreplgr2vr.d $vr6, $a0 + vreplgr2vr.d $vr6, $a1 vreplgr2vr.d $vr7, $s2 move $s7, $fp vst $vr5, $sp, 48 # 16-byte Folded Spill diff --git a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/trisolv/CMakeFiles/trisolv.dir/trisolv.s b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/trisolv/CMakeFiles/trisolv.dir/trisolv.s index 6ffa9a4d..8c6dd92e 100644 --- a/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/trisolv/CMakeFiles/trisolv.dir/trisolv.s +++ b/results/SingleSource/Benchmarks/Polybench/linear-algebra/solvers/trisolv/CMakeFiles/trisolv.dir/trisolv.s @@ -106,15 +106,9 @@ polybench_alloc_data: # @polybench_alloc_data .Lfunc_end6: .size polybench_alloc_data, .Lfunc_end6-polybench_alloc_data # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI7_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI7_2: - .dword 0x409f400000000000 # double 2000 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI7_1: + .p2align 4, 0x0 # -- Begin function main +.LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 .text @@ -169,24 +163,24 @@ main: # @main addi.d $a1, $fp, 16 ori $a2, $zero, 1 ori $a3, $zero, 4002 - ori $t2, $zero, 0 + ori $t0, $zero, 0 ori $a4, $zero, 0 lu32i.d $a4, -51200 lu52i.d $a4, $a4, -1016 lu52i.d $a5, $zero, 1107 - pcalau12i $a6, %pc_hi20(.LCPI7_0) - fld.d $fa0, $a6, %pc_lo12(.LCPI7_0) + lu12i.w $a6, 256 + lu52i.d $a6, $a6, 1107 + movgr2fr.d $fa0, $a6 lu12i.w $a6, 275200 ori $a7, $zero, 4 - pcalau12i $t0, %pc_hi20(.LCPI7_2) - fld.d $fa1, $t0, %pc_lo12(.LCPI7_2) + lu32i.d $t0, -49152 + lu52i.d $t2, $t0, 1033 + movgr2fr.d $fa1, $t2 ori $t0, $zero, 2000 - pcalau12i $t1, %pc_hi20(.LCPI7_1) - vld $vr2, $t1, %pc_lo12(.LCPI7_1) + pcalau12i $t1, %pc_hi20(.LCPI7_0) + vld $vr2, $t1, %pc_lo12(.LCPI7_0) addi.w $t1, $zero, -2 lu32i.d $t1, 0 - lu32i.d $t2, -49152 - lu52i.d $t2, $t2, 1033 vreplgr2vr.d $vr3, $t2 move $t2, $fp b .LBB7_8 diff --git a/results/SingleSource/Benchmarks/Polybench/medley/deriche/CMakeFiles/deriche.dir/deriche.s b/results/SingleSource/Benchmarks/Polybench/medley/deriche/CMakeFiles/deriche.dir/deriche.s index cefd3d9a..ab7b1940 100644 --- a/results/SingleSource/Benchmarks/Polybench/medley/deriche/CMakeFiles/deriche.dir/deriche.s +++ b/results/SingleSource/Benchmarks/Polybench/medley/deriche/CMakeFiles/deriche.dir/deriche.s @@ -114,24 +114,6 @@ polybench_alloc_data: # @polybench_alloc_data .LCPI7_1: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI7_2: - .word 0x3de1b54c # float 0.110209078 -.LCPI7_3: - .word 0xbe4135c4 # float -0.188681662 -.LCPI7_4: - .word 0x3f5744fd # float 0.840896427 -.LCPI7_5: - .word 0xbf1b4598 # float -0.606530666 -.LCPI7_6: - .word 0x3dea6028 # float 0.114441216 -.LCPI7_7: - .word 0xbe3c1714 # float -0.183681786 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI7_8: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 .text .globl main .p2align 5 @@ -208,12 +190,12 @@ main: # @main # %bb.10: # %polybench_alloc_data.exit38 move $a0, $zero ori $a1, $zero, 313 - pcalau12i $a4, %pc_hi20(.LCPI7_0) - vld $vr0, $a4, %pc_lo12(.LCPI7_0) - pcalau12i $a5, %pc_hi20(.LCPI7_1) - vld $vr1, $a5, %pc_lo12(.LCPI7_1) + pcalau12i $a3, %pc_hi20(.LCPI7_0) + vld $vr0, $a3, %pc_lo12(.LCPI7_0) + pcalau12i $a4, %pc_hi20(.LCPI7_1) + vld $vr1, $a4, %pc_lo12(.LCPI7_1) lu12i.w $s6, -3 - ori $a3, $s6, 3648 + ori $a5, $s6, 3648 ori $a2, $zero, 991 vreplgr2vr.d $vr2, $a2 lu12i.w $a2, 15 @@ -231,7 +213,7 @@ main: # @main # Child Loop BB7_12 Depth 2 mul.d $a7, $a0, $a1 vreplgr2vr.d $vr3, $a7 - move $a7, $a3 + move $a7, $a5 vori.b $vr4, $vr1, 0 vori.b $vr5, $vr0, 0 .p2align 4, , 16 @@ -258,40 +240,46 @@ main: # @main add.d $a6, $a6, $s5 bne $a0, $s4, .LBB7_11 # %bb.14: # %.preheader180.i.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_2) - fld.s $fa4, $a0, %pc_lo12(.LCPI7_2) - pcalau12i $a0, %pc_hi20(.LCPI7_3) - fld.s $fa5, $a0, %pc_lo12(.LCPI7_3) - pcalau12i $a0, %pc_hi20(.LCPI7_4) - fld.s $fa0, $a0, %pc_lo12(.LCPI7_4) - pcalau12i $a0, %pc_hi20(.LCPI7_5) - fld.s $fa1, $a0, %pc_lo12(.LCPI7_5) move $a0, $zero move $a1, $zero - movgr2fr.w $fa2, $zero - ori $a3, $s6, 3648 + movgr2fr.w $fa4, $zero + lu12i.w $a5, 253467 + ori $a5, $a5, 1356 + movgr2fr.w $fa2, $a5 + lu12i.w $a5, -269293 + ori $a5, $a5, 1476 + lu32i.d $a5, 0 + movgr2fr.w $fa3, $a5 + lu12i.w $a5, 259444 + ori $a5, $a5, 1277 + movgr2fr.w $fa0, $a5 + lu12i.w $a5, -265804 + ori $a5, $a5, 1432 + lu32i.d $a5, 0 + movgr2fr.w $fa1, $a5 + ori $a5, $s6, 3648 .p2align 4, , 16 .LBB7_15: # %.preheader180.i # =>This Loop Header: Depth=1 # Child Loop BB7_16 Depth 2 add.d $a6, $s2, $a0 add.d $a7, $fp, $a0 - move $t0, $a3 - fmov.s $ft0, $fa2 - fmov.s $fa3, $fa2 - fmov.s $ft1, $fa2 + move $t0, $a5 + fmov.s $ft0, $fa4 + fmov.s $fa5, $fa4 + fmov.s $ft1, $fa4 .p2align 4, , 16 .LBB7_16: # Parent Loop BB7_15 Depth=1 # => This Inner Loop Header: Depth=2 add.d $t1, $a7, $t0 fldx.s $ft2, $t1, $s5 - fmov.s $ft3, $fa3 - fmul.s $fa3, $ft0, $fa4 - fmadd.s $fa3, $ft2, $fa5, $fa3 - fmadd.s $fa3, $ft3, $fa0, $fa3 - fmadd.s $fa3, $ft1, $fa1, $fa3 + fmov.s $ft3, $fa5 + fmul.s $fa5, $ft0, $fa2 + fmadd.s $fa5, $ft2, $fa3, $fa5 + fmadd.s $fa5, $ft3, $fa0, $fa5 + fmadd.s $fa5, $ft1, $fa1, $fa5 add.d $t2, $a6, $t0 - fstx.s $fa3, $t2, $s5 + fstx.s $fa5, $t2, $s5 fldx.s $ft0, $t1, $s5 addi.d $t0, $t0, 4 fmov.s $ft1, $ft3 @@ -303,13 +291,16 @@ main: # @main # %bb.18: # %.preheader178.i.preheader move $a0, $zero move $a1, $zero - pcalau12i $a3, %pc_hi20(.LCPI7_6) - fld.s $fa2, $a3, %pc_lo12(.LCPI7_6) - pcalau12i $a3, %pc_hi20(.LCPI7_7) - fld.s $fa3, $a3, %pc_lo12(.LCPI7_7) movgr2fr.w $ft0, $zero ori $a6, $a2, 444 - addi.w $a3, $zero, -4 + lu12i.w $a5, 253606 + ori $a5, $a5, 40 + movgr2fr.w $fa4, $a5 + lu12i.w $a5, -269375 + ori $a5, $a5, 1812 + lu32i.d $a5, 0 + movgr2fr.w $fa5, $a5 + addi.w $a5, $zero, -4 .p2align 4, , 16 .LBB7_19: # %.preheader178.i # =>This Loop Header: Depth=1 @@ -325,16 +316,16 @@ main: # @main .LBB7_20: # Parent Loop BB7_19 Depth=1 # => This Inner Loop Header: Depth=2 fmov.s $ft5, $ft2 - fmul.s $ft2, $ft3, $fa2 + fmul.s $ft2, $ft3, $fa4 fmov.s $ft3, $ft1 - fmadd.s $ft1, $ft1, $fa3, $ft2 + fmadd.s $ft1, $ft1, $fa5, $ft2 fmadd.s $ft1, $ft5, $fa0, $ft1 fmadd.s $ft2, $ft4, $fa1, $ft1 fstx.s $ft2, $t0, $t1 fldx.s $ft1, $a7, $t1 addi.d $t1, $t1, -4 fmov.s $ft4, $ft5 - bne $t1, $a3, .LBB7_20 + bne $t1, $a5, .LBB7_20 # %bb.21: # in Loop: Header=BB7_19 Depth=1 addi.d $a1, $a1, 1 add.d $a0, $a0, $s5 @@ -420,8 +411,8 @@ main: # @main # => This Inner Loop Header: Depth=2 fldx.s $ft4, $s0, $t2 fmov.s $ft5, $ft2 - fmul.s $ft1, $ft1, $fa4 - fmadd.s $ft1, $ft4, $fa5, $ft1 + fmul.s $ft1, $ft1, $fa2 + fmadd.s $ft1, $ft4, $fa3, $ft1 fmadd.s $ft1, $ft2, $fa0, $ft1 fmadd.s $ft2, $ft3, $fa1, $ft1 fstx.s $ft2, $s2, $t2 @@ -456,9 +447,9 @@ main: # @main .LBB7_33: # Parent Loop BB7_32 Depth=1 # => This Inner Loop Header: Depth=2 fmov.s $ft5, $ft2 - fmul.s $ft2, $ft3, $fa2 + fmul.s $ft2, $ft3, $fa4 fmov.s $ft3, $ft1 - fmadd.s $ft1, $ft1, $fa3, $ft2 + fmadd.s $ft1, $ft1, $fa5, $ft2 fmadd.s $ft1, $ft5, $fa0, $ft1 fmadd.s $ft2, $ft4, $fa1, $ft1 fstx.s $ft2, $s3, $t5 @@ -529,19 +520,19 @@ main: # @main .LBB7_40: # %.preheader.i43.preheader move $t0, $zero ori $t1, $zero, 313 - vld $vr8, $a4, %pc_lo12(.LCPI7_0) - vld $vr9, $a5, %pc_lo12(.LCPI7_1) - ori $a4, $s6, 3648 - ori $a5, $zero, 991 - vreplgr2vr.d $vr10, $a5 - move $a5, $fp + vld $vr8, $a3, %pc_lo12(.LCPI7_0) + vld $vr9, $a4, %pc_lo12(.LCPI7_1) + ori $a3, $s6, 3648 + ori $a4, $zero, 991 + vreplgr2vr.d $vr10, $a4 + move $a4, $fp .p2align 4, , 16 .LBB7_41: # %.preheader.i43 # =>This Loop Header: Depth=1 # Child Loop BB7_42 Depth 2 mul.d $t2, $t0, $t1 vreplgr2vr.d $vr11, $t2 - move $t2, $a4 + move $t2, $a3 vori.b $vr12, $vr9, 0 vori.b $vr13, $vr8, 0 .p2align 4, , 16 @@ -556,7 +547,7 @@ main: # @main vand.v $vr14, $vr14, $vr6 vffint.s.wu $vr14, $vr14 vfdiv.s $vr14, $vr14, $vr7 - add.d $t3, $a5, $t2 + add.d $t3, $a4, $t2 vstx $vr14, $t3, $s5 vaddi.du $vr12, $vr12, 4 addi.d $t2, $t2, 16 @@ -565,19 +556,19 @@ main: # @main # %bb.43: # %middle.block189 # in Loop: Header=BB7_41 Depth=1 addi.d $t0, $t0, 1 - add.d $a5, $a5, $s5 + add.d $a4, $a4, $s5 bne $t0, $a7, .LBB7_41 # %bb.44: # %.preheader180.i52.preheader + move $a3, $zero move $a4, $zero - move $a5, $zero movgr2fr.w $fa6, $zero ori $t0, $s6, 3648 .p2align 4, , 16 .LBB7_45: # %.preheader180.i52 # =>This Loop Header: Depth=1 # Child Loop BB7_46 Depth 2 - add.d $t1, $s2, $a4 - add.d $t2, $fp, $a4 + add.d $t1, $s2, $a3 + add.d $t2, $fp, $a3 move $t3, $t0 fmov.s $fa7, $fa6 fmov.s $ft0, $fa6 @@ -589,8 +580,8 @@ main: # @main fldx.s $ft2, $t4, $s5 fmul.s $ft1, $ft1, $fa1 fmov.s $ft3, $ft0 - fmul.s $ft0, $ft2, $fa5 - fmul.s $fa7, $fa7, $fa4 + fmul.s $ft0, $ft2, $fa3 + fmul.s $fa7, $fa7, $fa2 fadd.s $fa7, $fa7, $ft0 fmul.s $ft0, $ft3, $fa0 fadd.s $fa7, $ft0, $fa7 @@ -602,20 +593,20 @@ main: # @main fmov.s $ft1, $ft3 bnez $t3, .LBB7_46 # %bb.47: # in Loop: Header=BB7_45 Depth=1 - addi.d $a5, $a5, 1 - add.d $a4, $a4, $s5 - bne $a5, $a7, .LBB7_45 + addi.d $a4, $a4, 1 + add.d $a3, $a3, $s5 + bne $a4, $a7, .LBB7_45 # %bb.48: # %.preheader178.i63.preheader + move $a3, $zero move $a4, $zero - move $a5, $zero movgr2fr.w $fa6, $zero ori $a2, $a2, 444 .p2align 4, , 16 .LBB7_49: # %.preheader178.i63 # =>This Loop Header: Depth=1 # Child Loop BB7_50 Depth 2 - add.d $t0, $fp, $a4 - add.d $t1, $s3, $a4 + add.d $t0, $fp, $a3 + add.d $t1, $s3, $a3 move $t2, $a2 fmov.s $fa7, $fa6 fmov.s $ft1, $fa6 @@ -626,9 +617,9 @@ main: # @main # => This Inner Loop Header: Depth=2 fmul.s $ft2, $ft2, $fa1 fmov.s $ft3, $ft0 - fmul.s $ft0, $ft1, $fa2 + fmul.s $ft0, $ft1, $fa4 fmov.s $ft1, $fa7 - fmul.s $fa7, $fa7, $fa3 + fmul.s $fa7, $fa7, $fa5 fadd.s $fa7, $ft0, $fa7 fmul.s $ft0, $ft3, $fa0 fadd.s $fa7, $ft0, $fa7 @@ -637,11 +628,11 @@ main: # @main fldx.s $fa7, $t0, $t2 addi.d $t2, $t2, -4 fmov.s $ft2, $ft3 - bne $t2, $a3, .LBB7_50 + bne $t2, $a5, .LBB7_50 # %bb.51: # in Loop: Header=BB7_49 Depth=1 - addi.d $a5, $a5, 1 - add.d $a4, $a4, $s5 - bne $a5, $a7, .LBB7_49 + addi.d $a4, $a4, 1 + add.d $a3, $a3, $s5 + bne $a4, $a7, .LBB7_49 # %bb.52: # %.preheader176.i74.preheader move $a3, $zero sub.d $a2, $s1, $s2 @@ -722,8 +713,8 @@ main: # @main fldx.s $ft2, $s1, $t0 fmul.s $ft1, $ft1, $fa1 fmov.s $ft3, $ft0 - fmul.s $ft0, $ft2, $fa5 - fmul.s $fa7, $fa7, $fa4 + fmul.s $ft0, $ft2, $fa3 + fmul.s $fa7, $fa7, $fa2 fadd.s $fa7, $fa7, $ft0 fmul.s $ft0, $ft3, $fa0 fadd.s $fa7, $ft0, $fa7 @@ -741,7 +732,7 @@ main: # @main # %bb.61: # %.preheader172.i95.preheader move $a4, $zero ori $a5, $a6, 3648 - movgr2fr.w $fa4, $zero + movgr2fr.w $fa2, $zero lu12i.w $a6, -1 ori $a3, $s6, 3648 ori $a7, $zero, 2160 @@ -751,24 +742,24 @@ main: # @main # Child Loop BB7_63 Depth 2 move $t0, $a5 move $t1, $a6 - fmov.s $fa5, $fa4 - fmov.s $fa7, $fa4 - fmov.s $fa6, $fa4 - fmov.s $ft0, $fa4 + fmov.s $fa3, $fa2 + fmov.s $fa7, $fa2 + fmov.s $fa6, $fa2 + fmov.s $ft0, $fa2 .p2align 4, , 16 .LBB7_63: # Parent Loop BB7_62 Depth=1 # => This Inner Loop Header: Depth=2 fmul.s $ft0, $ft0, $fa1 fmov.s $ft1, $fa6 - fmul.s $fa6, $fa7, $fa2 - fmov.s $fa7, $fa5 - fmul.s $fa5, $fa5, $fa3 - fadd.s $fa5, $fa6, $fa5 + fmul.s $fa6, $fa7, $fa4 + fmov.s $fa7, $fa3 + fmul.s $fa3, $fa3, $fa5 + fadd.s $fa3, $fa6, $fa3 fmul.s $fa6, $ft1, $fa0 - fadd.s $fa5, $fa6, $fa5 - fadd.s $fa6, $fa5, $ft0 + fadd.s $fa3, $fa6, $fa3 + fadd.s $fa6, $fa3, $ft0 fstx.s $fa6, $s3, $t0 - fldx.s $fa5, $s1, $t0 + fldx.s $fa3, $s1, $t0 fmov.s $ft0, $ft1 addi.d $t1, $t1, 1 add.d $t0, $t0, $a3 @@ -833,26 +824,29 @@ main: # @main bnez $t1, .LBB7_69 b .LBB7_66 .LBB7_70: # %.preheader.i117.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_8) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_8) move $a0, $zero move $a2, $zero - ori $a1, $s6, 3648 + ori $a4, $s6, 3648 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa0, $a1 .p2align 4, , 16 .LBB7_71: # %.preheader.i117 # =>This Loop Header: Depth=1 # Child Loop BB7_72 Depth 2 move $a3, $zero - add.d $a4, $s1, $a0 - add.d $a5, $s0, $a0 - move $a6, $a1 + add.d $a5, $s1, $a0 + add.d $a6, $s0, $a0 + move $a7, $a4 .p2align 4, , 16 .LBB7_72: # Parent Loop BB7_71 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $a7, $a5, $a6 - fldx.s $fa1, $a7, $s5 - add.d $a7, $a4, $a6 - fldx.s $fa2, $a7, $s5 + add.d $t0, $a6, $a7 + fldx.s $fa1, $t0, $s5 + add.d $t0, $a5, $a7 + fldx.s $fa2, $t0, $s5 fcvt.d.s $fa1, $fa1 fcvt.d.s $fa2, $fa2 fsub.d $fa3, $fa1, $fa2 @@ -861,9 +855,9 @@ main: # @main bceqz $fcc0, .LBB7_80 # %bb.73: # %.critedge.i # in Loop: Header=BB7_72 Depth=2 - addi.d $a6, $a6, 4 + addi.d $a7, $a7, 4 addi.w $a3, $a3, 1 - bnez $a6, .LBB7_72 + bnez $a7, .LBB7_72 # %bb.74: # in Loop: Header=BB7_71 Depth=1 addi.d $a2, $a2, 1 add.d $a0, $a0, $s5 @@ -966,10 +960,6 @@ main: # @main pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 - lu12i.w $a1, -487882 - ori $a1, $a1, 2289 - lu32i.d $a1, 325813 - lu52i.d $a1, $a1, 1006 st.d $a1, $sp, 0 movfr2gr.d $a4, $fa1 movfr2gr.d $a7, $fa2 diff --git a/results/SingleSource/Benchmarks/Polybench/medley/floyd-warshall/CMakeFiles/floyd-warshall.dir/floyd-warshall.s b/results/SingleSource/Benchmarks/Polybench/medley/floyd-warshall/CMakeFiles/floyd-warshall.dir/floyd-warshall.s index e05d5ca6..15f0a2b2 100644 --- a/results/SingleSource/Benchmarks/Polybench/medley/floyd-warshall/CMakeFiles/floyd-warshall.dir/floyd-warshall.s +++ b/results/SingleSource/Benchmarks/Polybench/medley/floyd-warshall/CMakeFiles/floyd-warshall.dir/floyd-warshall.s @@ -114,10 +114,6 @@ polybench_alloc_data: # @polybench_alloc_data .LCPI7_1: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI7_2: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 .text .globl main .p2align 5 @@ -494,30 +490,33 @@ main: # @main bnez $t4, .LBB7_29 b .LBB7_24 .LBB7_30: # %.preheader.i44.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_2) move $a0, $zero move $a2, $zero - ori $a1, $s3, 1088 - ori $a4, $zero, 2800 + ori $a4, $s3, 1088 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa0, $a1 + ori $a5, $zero, 2800 .p2align 4, , 16 .LBB7_31: # %.preheader.i44 # =>This Loop Header: Depth=1 # Child Loop BB7_32 Depth 2 move $a3, $zero - add.d $a5, $s0, $a0 - add.d $a6, $fp, $a0 - move $a7, $a1 + add.d $a6, $s0, $a0 + add.d $a7, $fp, $a0 + move $t0, $a4 .p2align 4, , 16 .LBB7_32: # Parent Loop BB7_31 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t0, $a6, $a7 - ldptr.w $t0, $t0, 11200 - add.d $t1, $a5, $a7 + add.d $t1, $a7, $t0 ldptr.w $t1, $t1, 11200 - movgr2fr.w $fa1, $t0 + add.d $t2, $a6, $t0 + ldptr.w $t2, $t2, 11200 + movgr2fr.w $fa1, $t1 ffint.d.w $fa1, $fa1 - movgr2fr.w $fa2, $t1 + movgr2fr.w $fa2, $t2 ffint.d.w $fa2, $fa2 fsub.d $fa3, $fa1, $fa2 fabs.d $fa3, $fa3 @@ -525,13 +524,13 @@ main: # @main bceqz $fcc0, .LBB7_40 # %bb.33: # %.critedge.i # in Loop: Header=BB7_32 Depth=2 - addi.d $a7, $a7, 4 + addi.d $t0, $t0, 4 addi.w $a3, $a3, 1 - bnez $a7, .LBB7_32 + bnez $t0, .LBB7_32 # %bb.34: # in Loop: Header=BB7_31 Depth=1 addi.d $a2, $a2, 1 add.d $a0, $a0, $s2 - bne $a2, $a4, .LBB7_31 + bne $a2, $a5, .LBB7_31 # %bb.35: # %check_FP.exit lu12i.w $s4, 10 ori $a0, $s4, 3841 @@ -620,10 +619,6 @@ main: # @main pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 - lu12i.w $a1, -487882 - ori $a1, $a1, 2289 - lu32i.d $a1, 325813 - lu52i.d $a1, $a1, 1006 st.d $a1, $sp, 0 movfr2gr.d $a4, $fa1 movfr2gr.d $a7, $fa2 diff --git a/results/SingleSource/Benchmarks/Polybench/medley/nussinov/CMakeFiles/nussinov.dir/nussinov.s b/results/SingleSource/Benchmarks/Polybench/medley/nussinov/CMakeFiles/nussinov.dir/nussinov.s index 6f5b0c4f..fe3c475a 100644 --- a/results/SingleSource/Benchmarks/Polybench/medley/nussinov/CMakeFiles/nussinov.dir/nussinov.s +++ b/results/SingleSource/Benchmarks/Polybench/medley/nussinov/CMakeFiles/nussinov.dir/nussinov.s @@ -132,10 +132,6 @@ polybench_alloc_data: # @polybench_alloc_data .LCPI7_7: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI7_8: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 .text .globl main .p2align 5 @@ -507,30 +503,33 @@ main: # @main bnez $t7, .LBB7_27 b .LBB7_23 .LBB7_28: # %.preheader.i.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_8) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_8) move $a0, $zero move $a2, $zero - ori $a1, $s5, 2288 - ori $a4, $zero, 2500 + ori $a4, $s5, 2288 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa0, $a1 + ori $a5, $zero, 2500 .p2align 4, , 16 .LBB7_29: # %.preheader.i # =>This Loop Header: Depth=1 # Child Loop BB7_30 Depth 2 move $a3, $zero - add.d $a5, $s1, $a0 - add.d $a6, $s0, $a0 - move $a7, $a1 + add.d $a6, $s1, $a0 + add.d $a7, $s0, $a0 + move $t0, $a4 .p2align 4, , 16 .LBB7_30: # Parent Loop BB7_29 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t0, $a6, $a7 - ldptr.w $t0, $t0, 10000 - add.d $t1, $a5, $a7 + add.d $t1, $a7, $t0 ldptr.w $t1, $t1, 10000 - movgr2fr.w $fa1, $t0 + add.d $t2, $a6, $t0 + ldptr.w $t2, $t2, 10000 + movgr2fr.w $fa1, $t1 ffint.d.w $fa1, $fa1 - movgr2fr.w $fa2, $t1 + movgr2fr.w $fa2, $t2 ffint.d.w $fa2, $fa2 fsub.d $fa3, $fa1, $fa2 fabs.d $fa3, $fa3 @@ -538,13 +537,13 @@ main: # @main bceqz $fcc0, .LBB7_38 # %bb.31: # %.critedge.i # in Loop: Header=BB7_30 Depth=2 - addi.d $a7, $a7, 4 + addi.d $t0, $t0, 4 addi.w $a3, $a3, 1 - bnez $a7, .LBB7_30 + bnez $t0, .LBB7_30 # %bb.32: # in Loop: Header=BB7_29 Depth=1 addi.d $a2, $a2, 1 add.d $a0, $a0, $s3 - bne $a2, $a4, .LBB7_29 + bne $a2, $a5, .LBB7_29 # %bb.33: # %check_FP.exit lu12i.w $s4, 9 ori $a0, $s4, 3137 @@ -637,10 +636,6 @@ main: # @main pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 - lu12i.w $a1, -487882 - ori $a1, $a1, 2289 - lu32i.d $a1, 325813 - lu52i.d $a1, $a1, 1006 st.d $a1, $sp, 0 movfr2gr.d $a4, $fa1 movfr2gr.d $a7, $fa2 diff --git a/results/SingleSource/Benchmarks/Polybench/stencils/adi/CMakeFiles/adi.dir/adi.s b/results/SingleSource/Benchmarks/Polybench/stencils/adi/CMakeFiles/adi.dir/adi.s index c44fef1e..11fba93a 100644 --- a/results/SingleSource/Benchmarks/Polybench/stencils/adi/CMakeFiles/adi.dir/adi.s +++ b/results/SingleSource/Benchmarks/Polybench/stencils/adi/CMakeFiles/adi.dir/adi.s @@ -111,21 +111,9 @@ polybench_alloc_data: # @polybench_alloc_data .LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 -.LCPI7_4: - .dword 0x409f3c0000000001 # double 1999.0000000000002 - .dword 0x408f400000000001 # double 1000.0000000000001 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 .LCPI7_1: - .dword 0x409f400000000001 # double 2000.0000000000002 -.LCPI7_2: - .dword 0x40af420000000001 # double 4001.0000000000005 -.LCPI7_3: + .dword 0x409f3c0000000001 # double 1999.0000000000002 .dword 0x408f400000000001 # double 1000.0000000000001 -.LCPI7_5: - .dword 0x409f440000000001 # double 2001.0000000000002 -.LCPI7_6: - .dword 0xc0af3e0000000001 # double -3999.0000000000005 .text .globl main .p2align 5 @@ -164,7 +152,7 @@ main: # @main pcaddu18i $ra, %call36(posix_memalign) jirl $ra, $ra, 0 ld.d $a1, $sp, 112 - st.d $a1, $sp, 48 # 8-byte Folded Spill + st.d $a1, $sp, 96 # 8-byte Folded Spill beqz $a1, .LBB7_39 # %bb.3: # %polybench_alloc_data.exit bnez $a0, .LBB7_39 @@ -205,7 +193,6 @@ main: # @main ori $a3, $a3, 3920 ori $a4, $zero, 1000 move $a5, $fp - ld.d $t0, $sp, 48 # 8-byte Folded Reload .p2align 4, , 16 .LBB7_9: # %.preheader.i # =>This Loop Header: Depth=1 @@ -238,23 +225,24 @@ main: # @main # %bb.12: # %init_array.exit lu12i.w $a1, 1951 ori $a1, $a1, 704 - add.d $a1, $t0, $a1 - st.d $a1, $sp, 96 # 8-byte Folded Spill + ld.d $a3, $sp, 96 # 8-byte Folded Reload + add.d $a1, $a3, $a1 + st.d $a1, $sp, 88 # 8-byte Folded Spill lu12i.w $a1, 1 ori $a1, $a1, 3912 add.d $a2, $s2, $a1 - st.d $a2, $sp, 80 # 8-byte Folded Spill + st.d $a2, $sp, 72 # 8-byte Folded Spill add.d $a2, $fp, $a1 st.d $a2, $sp, 40 # 8-byte Folded Spill add.d $a2, $s1, $a1 - st.d $a2, $sp, 72 # 8-byte Folded Spill - add.d $a2, $s2, $s4 st.d $a2, $sp, 64 # 8-byte Folded Spill + add.d $a2, $s2, $s4 + st.d $a2, $sp, 56 # 8-byte Folded Spill lu12i.w $a2, 1949 ori $a2, $a2, 904 - add.d $a2, $t0, $a2 + add.d $a2, $a3, $a2 st.d $a2, $sp, 32 # 8-byte Folded Spill - add.d $a1, $t0, $a1 + add.d $a1, $a3, $a1 st.d $a1, $sp, 24 # 8-byte Folded Spill ori $t0, $zero, 1 lu12i.w $s5, 3 @@ -262,12 +250,10 @@ main: # @main st.d $a1, $sp, 16 # 8-byte Folded Spill ori $a1, $s5, 3704 st.d $a1, $sp, 104 # 8-byte Folded Spill - lu52i.d $t3, $zero, 1023 + lu52i.d $t5, $zero, 1023 ori $a1, $a0, 208 - st.d $a1, $sp, 88 # 8-byte Folded Spill + st.d $a1, $sp, 80 # 8-byte Folded Spill ori $a2, $zero, 1 - pcalau12i $t8, %pc_hi20(.LCPI7_3) - pcalau12i $s7, %pc_hi20(.LCPI7_1) .p2align 4, , 16 .LBB7_13: # %.preheader154.i # =>This Loop Header: Depth=1 @@ -279,16 +265,15 @@ main: # @main # Child Loop BB7_29 Depth 3 # Child Loop BB7_27 Depth 3 # Child Loop BB7_31 Depth 3 - st.d $a2, $sp, 56 # 8-byte Folded Spill - move $t6, $zero + st.d $a2, $sp, 48 # 8-byte Folded Spill + move $ra, $zero ld.d $a6, $sp, 32 # 8-byte Folded Reload ld.d $t1, $sp, 16 # 8-byte Folded Reload - ld.d $a5, $sp, 64 # 8-byte Folded Reload - ld.d $a4, $sp, 72 # 8-byte Folded Reload - ld.d $a2, $sp, 40 # 8-byte Folded Reload - ld.d $a3, $sp, 80 # 8-byte Folded Reload + ld.d $a5, $sp, 56 # 8-byte Folded Reload + ld.d $a4, $sp, 64 # 8-byte Folded Reload + ld.d $a3, $sp, 40 # 8-byte Folded Reload + ld.d $a2, $sp, 72 # 8-byte Folded Reload ori $a7, $zero, 1 - ld.d $t2, $sp, 48 # 8-byte Folded Reload .p2align 4, , 16 .LBB7_14: # %.lver.check # Parent Loop BB7_13 Depth=1 @@ -296,55 +281,63 @@ main: # @main # Child Loop BB7_19 Depth 3 # Child Loop BB7_17 Depth 3 # Child Loop BB7_21 Depth 3 - mul.d $t4, $t6, $s4 + mul.d $t4, $ra, $s4 add.d $a1, $t4, $s4 slli.d $s3, $a7, 3 - stx.d $t3, $t2, $s3 - mul.d $t7, $a7, $s4 - stx.d $zero, $s1, $t7 - fldx.d $fa0, $t2, $s3 + ld.d $t3, $sp, 96 # 8-byte Folded Reload + move $t2, $t5 + stx.d $t5, $t3, $s3 + mul.d $t5, $a7, $s4 + stx.d $zero, $s1, $t5 + fldx.d $fa0, $t3, $s3 add.d $s0, $s1, $a1 ld.d $t3, $sp, 104 # 8-byte Folded Reload - add.d $t5, $t4, $t3 - add.d $t3, $s2, $t5 - fstx.d $fa0, $s2, $t7 - lu12i.w $ra, 1 - ori $s6, $ra, 3888 - ori $s8, $ra, 3880 - pcalau12i $t7, %pc_hi20(.LCPI7_2) - pcalau12i $t4, %pc_hi20(.LCPI7_4) - ori $ra, $ra, 3896 + add.d $t4, $t4, $t3 + add.d $t3, $s2, $t4 + fstx.d $fa0, $s2, $t5 + ori $t8, $zero, 1 + lu32i.d $t8, -49152 + lu12i.w $t5, 1 + ori $s6, $t5, 3888 + ori $s7, $t5, 3880 + ori $t6, $zero, 1 + lu32i.d $t6, -48640 + pcalau12i $t7, %pc_hi20(.LCPI7_1) + ori $s8, $t5, 3896 bgeu $s0, $t3, .LBB7_18 # %bb.15: # %.lver.check # in Loop: Header=BB7_14 Depth=2 - add.d $t3, $s1, $t5 - add.d $t5, $s2, $a1 - bgeu $t5, $t3, .LBB7_18 + add.d $t3, $s1, $t4 + add.d $t4, $s2, $a1 + bgeu $t4, $t3, .LBB7_18 # %bb.16: # %.ph.lver.orig.preheader # in Loop: Header=BB7_14 Depth=2 - move $a1, $a2 - ld.d $t5, $sp, 88 # 8-byte Folded Reload + move $a1, $a3 + ld.d $t4, $sp, 80 # 8-byte Folded Reload .p2align 4, , 16 .LBB7_17: # %.ph.lver.orig # Parent Loop BB7_13 Depth=1 # Parent Loop BB7_14 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $t3, $a4, $t5 - fldx.d $fa0, $t3, $s8 - fld.d $fa1, $s7, %pc_lo12(.LCPI7_1) - fld.d $fa2, $t7, %pc_lo12(.LCPI7_2) + add.d $t3, $a4, $t4 + fldx.d $fa0, $t3, $s7 + lu52i.d $t5, $t8, 1033 + movgr2fr.d $fa1, $t5 fmul.d $fa0, $fa0, $fa1 + lu52i.d $t5, $t6, 1034 + movgr2fr.d $fa2, $t5 fsub.d $fa0, $fa2, $fa0 fdiv.d $fa2, $fa1, $fa0 fstx.d $fa2, $t3, $s6 fld.d $fa2, $a1, -8 - fld.d $fa3, $t8, %pc_lo12(.LCPI7_3) - vld $vr4, $a1, 0 - vld $vr5, $t4, %pc_lo12(.LCPI7_4) - fmul.d $fa2, $fa2, $fa3 - vfmul.d $vr3, $vr4, $vr5 + vld $vr3, $a1, 0 + vld $vr4, $t7, %pc_lo12(.LCPI7_1) + lu52i.d $t3, $t8, 1032 + movgr2fr.d $fa5, $t3 + fmul.d $fa2, $fa2, $fa5 + vfmul.d $vr3, $vr3, $vr4 vreplvei.d $vr4, $vr3, 0 - add.d $t3, $a5, $t5 + add.d $t3, $a5, $t4 fldx.d $fa5, $t3, $s6 fsub.d $fa2, $fa2, $fa4 vreplvei.d $vr3, $vr3, 1 @@ -352,75 +345,78 @@ main: # @main fmul.d $fa1, $fa5, $fa1 fadd.d $fa1, $fa2, $fa1 fdiv.d $fa0, $fa1, $fa0 - fstx.d $fa0, $t3, $ra - addi.d $t5, $t5, 8 + fstx.d $fa0, $t3, $s8 + addi.d $t4, $t4, 8 add.d $a1, $a1, $s4 - bnez $t5, .LBB7_17 + bnez $t4, .LBB7_17 b .LBB7_20 .p2align 4, , 16 .LBB7_18: # %.ph # in Loop: Header=BB7_14 Depth=2 - fldx.d $fa0, $s1, $a1 - fldx.d $fa1, $s2, $a1 + fldx.d $fa1, $s1, $a1 + fldx.d $fa0, $s2, $a1 ori $a1, $a0, 208 - move $t5, $a2 + move $t4, $a3 .p2align 4, , 16 .LBB7_19: # Parent Loop BB7_13 Depth=1 # Parent Loop BB7_14 Depth=2 # => This Inner Loop Header: Depth=3 - fld.d $fa2, $s7, %pc_lo12(.LCPI7_1) - fld.d $fa3, $t7, %pc_lo12(.LCPI7_2) - fmul.d $fa0, $fa0, $fa2 - fsub.d $fa3, $fa3, $fa0 - fdiv.d $fa0, $fa2, $fa3 + lu52i.d $t3, $t8, 1033 + movgr2fr.d $fa2, $t3 + fmul.d $fa1, $fa1, $fa2 + lu52i.d $t3, $t6, 1034 + movgr2fr.d $fa3, $t3 + fsub.d $fa3, $fa3, $fa1 + fdiv.d $fa1, $fa2, $fa3 add.d $t3, $a4, $a1 - fstx.d $fa0, $t3, $s6 - fld.d $fa4, $t5, -8 - fld.d $fa5, $t8, %pc_lo12(.LCPI7_3) - vld $vr6, $t5, 0 - vld $vr7, $t4, %pc_lo12(.LCPI7_4) - fmul.d $fa4, $fa4, $fa5 - vfmul.d $vr5, $vr6, $vr7 + fstx.d $fa1, $t3, $s6 + fld.d $fa4, $t4, -8 + vld $vr5, $t4, 0 + vld $vr6, $t7, %pc_lo12(.LCPI7_1) + lu52i.d $t3, $t8, 1032 + movgr2fr.d $fa7, $t3 + fmul.d $fa4, $fa4, $fa7 + vfmul.d $vr5, $vr5, $vr6 vreplvei.d $vr6, $vr5, 0 fsub.d $fa4, $fa4, $fa6 vreplvei.d $vr5, $vr5, 1 fadd.d $fa4, $fa4, $fa5 - fmul.d $fa1, $fa1, $fa2 - fadd.d $fa1, $fa4, $fa1 - fdiv.d $fa1, $fa1, $fa3 - add.d $t3, $a3, $a1 - fstx.d $fa1, $t3, $s6 + fmul.d $fa0, $fa0, $fa2 + fadd.d $fa0, $fa4, $fa0 + fdiv.d $fa0, $fa0, $fa3 + add.d $t3, $a2, $a1 + fstx.d $fa0, $t3, $s6 addi.d $a1, $a1, 8 - add.d $t5, $t5, $s4 + add.d $t4, $t4, $s4 bnez $a1, .LBB7_19 .LBB7_20: # %.loopexit34 # in Loop: Header=BB7_14 Depth=2 - ld.d $a1, $sp, 96 # 8-byte Folded Reload - lu52i.d $t3, $zero, 1023 - stx.d $t3, $a1, $s3 + ld.d $a1, $sp, 88 # 8-byte Folded Reload + move $t5, $t2 + stx.d $t2, $a1, $s3 ori $a1, $zero, 999 move $t4, $a6 - move $t5, $t1 + move $t6, $t1 .p2align 4, , 16 .LBB7_21: # Parent Loop BB7_13 Depth=1 # Parent Loop BB7_14 Depth=2 # => This Inner Loop Header: Depth=3 - fldx.d $fa0, $s1, $t5 + fldx.d $fa0, $s1, $t6 fldx.d $fa1, $t4, $s4 - fldx.d $fa2, $s2, $t5 + fldx.d $fa2, $s2, $t6 fmul.d $fa0, $fa0, $fa1 fadd.d $fa0, $fa0, $fa2 fst.d $fa0, $t4, 0 addi.d $a1, $a1, -1 - addi.d $t5, $t5, -8 + addi.d $t6, $t6, -8 ori $s3, $a0, 192 add.d $t4, $t4, $s3 bltu $t0, $a1, .LBB7_21 # %bb.22: # in Loop: Header=BB7_14 Depth=2 addi.d $a7, $a7, 1 - addi.d $t6, $t6, 1 - add.d $a3, $a3, $s4 - addi.d $a2, $a2, 8 + addi.d $ra, $ra, 1 + add.d $a2, $a2, $s4 + addi.d $a3, $a3, 8 add.d $a4, $a4, $s4 add.d $a5, $a5, $s4 add.d $t1, $t1, $s4 @@ -432,10 +428,10 @@ main: # @main move $a2, $zero ori $a3, $zero, 1 ori $a4, $s5, 3696 - ld.d $a5, $sp, 64 # 8-byte Folded Reload + ld.d $a5, $sp, 56 # 8-byte Folded Reload ld.d $a6, $sp, 24 # 8-byte Folded Reload - ld.d $a7, $sp, 72 # 8-byte Folded Reload - ld.d $t1, $sp, 80 # 8-byte Folded Reload + ld.d $a7, $sp, 64 # 8-byte Folded Reload + ld.d $t1, $sp, 72 # 8-byte Folded Reload .p2align 4, , 16 .LBB7_24: # %.lver.check49 # Parent Loop BB7_13 Depth=1 @@ -443,106 +439,116 @@ main: # @main # Child Loop BB7_29 Depth 3 # Child Loop BB7_27 Depth 3 # Child Loop BB7_31 Depth 3 - mul.d $t6, $a2, $s4 - add.d $t5, $t6, $s4 - mul.d $t7, $a3, $s4 - stx.d $t3, $fp, $t7 - stx.d $zero, $s1, $t7 - fldx.d $fa0, $fp, $t7 - add.d $t3, $s1, $t5 - ld.d $a1, $sp, 104 # 8-byte Folded Reload - add.d $s0, $t6, $a1 - add.d $t2, $s2, $s0 - fstx.d $fa0, $s2, $t7 - pcalau12i $t4, %pc_hi20(.LCPI7_5) - pcalau12i $a1, %pc_hi20(.LCPI7_6) - bgeu $t3, $t2, .LBB7_28 + mul.d $ra, $a2, $s4 + add.d $a1, $ra, $s4 + mul.d $t6, $a3, $s4 + stx.d $t5, $fp, $t6 + stx.d $zero, $s1, $t6 + fldx.d $fa0, $fp, $t6 + add.d $t3, $s1, $a1 + ld.d $t4, $sp, 104 # 8-byte Folded Reload + add.d $t5, $ra, $t4 + add.d $s0, $s2, $t5 + fstx.d $fa0, $s2, $t6 + ori $t7, $zero, 1 + lu32i.d $t7, -48128 + ori $t4, $zero, 1 + lu32i.d $t4, -49664 + bgeu $t3, $s0, .LBB7_28 # %bb.25: # %.lver.check49 # in Loop: Header=BB7_24 Depth=2 - add.d $t2, $s1, $s0 - add.d $t3, $s2, $t5 - bgeu $t3, $t2, .LBB7_28 + add.d $t3, $s1, $t5 + add.d $t5, $s2, $a1 + bgeu $t5, $t3, .LBB7_28 # %bb.26: # %.ph50.lver.orig.preheader # in Loop: Header=BB7_24 Depth=2 - ori $t5, $a0, 208 + ori $a1, $a0, 208 .p2align 4, , 16 .LBB7_27: # %.ph50.lver.orig # Parent Loop BB7_13 Depth=1 # Parent Loop BB7_24 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $t2, $a7, $t5 - fldx.d $fa0, $t2, $s8 - fld.d $fa1, $t8, %pc_lo12(.LCPI7_3) - fld.d $fa2, $t4, %pc_lo12(.LCPI7_5) + add.d $t3, $a7, $a1 + fldx.d $fa0, $t3, $s7 + lu52i.d $t5, $t8, 1032 + movgr2fr.d $fa1, $t5 fmul.d $fa0, $fa0, $fa1 + lu52i.d $t5, $t7, 1033 + movgr2fr.d $fa2, $t5 fsub.d $fa0, $fa2, $fa0 fdiv.d $fa2, $fa1, $fa0 - fstx.d $fa2, $t2, $s6 - add.d $t2, $a6, $t5 - fld.d $fa2, $t2, -16 - fld.d $fa3, $s7, %pc_lo12(.LCPI7_1) - fldx.d $fa4, $t2, $s6 - fld.d $fa5, $a1, %pc_lo12(.LCPI7_6) + fstx.d $fa2, $t3, $s6 + add.d $t3, $a6, $a1 + fld.d $fa2, $t3, -16 + lu52i.d $t5, $t8, 1033 + movgr2fr.d $fa3, $t5 + fldx.d $fa4, $t3, $s6 fmul.d $fa2, $fa2, $fa3 + lu52i.d $t5, $t4, -1014 + movgr2fr.d $fa5, $t5 fmul.d $fa4, $fa4, $fa5 - ori $t3, $s5, 3696 - fldx.d $fa5, $t2, $t3 - add.d $t2, $a5, $t5 - fldx.d $fa6, $t2, $s6 + ori $t5, $s5, 3696 + fldx.d $fa5, $t3, $t5 + add.d $t3, $a5, $a1 + fldx.d $fa6, $t3, $s6 fadd.d $fa2, $fa2, $fa4 fmul.d $fa3, $fa5, $fa3 fadd.d $fa2, $fa2, $fa3 fmul.d $fa1, $fa6, $fa1 fadd.d $fa1, $fa2, $fa1 fdiv.d $fa0, $fa1, $fa0 - addi.d $t5, $t5, 8 - fstx.d $fa0, $t2, $ra - bnez $t5, .LBB7_27 + addi.d $a1, $a1, 8 + fstx.d $fa0, $t3, $s8 + bnez $a1, .LBB7_27 b .LBB7_30 .p2align 4, , 16 .LBB7_28: # %.ph50 # in Loop: Header=BB7_24 Depth=2 - fldx.d $fa0, $s1, $t5 - fldx.d $fa1, $s2, $t5 - ori $t5, $a0, 208 + fldx.d $fa1, $s1, $a1 + fldx.d $fa0, $s2, $a1 + ori $a1, $a0, 208 .p2align 4, , 16 .LBB7_29: # Parent Loop BB7_13 Depth=1 # Parent Loop BB7_24 Depth=2 # => This Inner Loop Header: Depth=3 - fld.d $fa2, $t8, %pc_lo12(.LCPI7_3) - fld.d $fa3, $t4, %pc_lo12(.LCPI7_5) - fmul.d $fa0, $fa0, $fa2 - fsub.d $fa3, $fa3, $fa0 - fdiv.d $fa0, $fa2, $fa3 - add.d $t2, $a7, $t5 - fstx.d $fa0, $t2, $s6 - add.d $t2, $a6, $t5 - fld.d $fa4, $t2, -16 - fldx.d $fa5, $t2, $s6 - fld.d $fa6, $a1, %pc_lo12(.LCPI7_6) - fld.d $fa7, $s7, %pc_lo12(.LCPI7_1) - ori $t3, $s5, 3696 - fldx.d $ft0, $t2, $t3 - fmul.d $fa5, $fa5, $fa6 + lu52i.d $t3, $t8, 1032 + movgr2fr.d $fa2, $t3 + fmul.d $fa1, $fa1, $fa2 + lu52i.d $t3, $t7, 1033 + movgr2fr.d $fa3, $t3 + fsub.d $fa3, $fa3, $fa1 + fdiv.d $fa1, $fa2, $fa3 + add.d $t3, $a7, $a1 + fstx.d $fa1, $t3, $s6 + add.d $t3, $a6, $a1 + fld.d $fa4, $t3, -16 + fldx.d $fa5, $t3, $s6 + ori $t5, $s5, 3696 + fldx.d $fa6, $t3, $t5 + lu52i.d $t3, $t8, 1033 + lu52i.d $t5, $t4, -1014 + movgr2fr.d $fa7, $t5 + fmul.d $fa5, $fa5, $fa7 + movgr2fr.d $fa7, $t3 fmul.d $fa4, $fa4, $fa7 fadd.d $fa4, $fa4, $fa5 - fmul.d $fa5, $ft0, $fa7 + fmul.d $fa5, $fa6, $fa7 fadd.d $fa4, $fa4, $fa5 - fmul.d $fa1, $fa1, $fa2 - fadd.d $fa1, $fa4, $fa1 - fdiv.d $fa1, $fa1, $fa3 - add.d $t2, $t1, $t5 - addi.d $t5, $t5, 8 - fstx.d $fa1, $t2, $s6 - bnez $t5, .LBB7_29 + fmul.d $fa0, $fa0, $fa2 + fadd.d $fa0, $fa4, $fa0 + fdiv.d $fa0, $fa0, $fa3 + add.d $t3, $t1, $a1 + addi.d $a1, $a1, 8 + fstx.d $fa0, $t3, $s6 + bnez $a1, .LBB7_29 .LBB7_30: # %.loopexit51 # in Loop: Header=BB7_24 Depth=2 - add.d $a1, $fp, $t6 - add.d $t2, $fp, $t7 - lu52i.d $t3, $zero, 1023 - stptr.d $t3, $t2, 7992 - ld.d $t2, $sp, 104 # 8-byte Folded Reload - fldx.d $fa0, $a1, $t2 + add.d $a1, $fp, $ra + add.d $t3, $fp, $t6 + move $t5, $t2 + stptr.d $t2, $t3, 7992 + ld.d $t3, $sp, 104 # 8-byte Folded Reload + fldx.d $fa0, $a1, $t3 ori $a1, $zero, 999 move $t4, $a4 .p2align 4, , 16 @@ -568,7 +574,7 @@ main: # @main ori $a1, $zero, 999 bne $a3, $a1, .LBB7_24 # %bb.33: # in Loop: Header=BB7_13 Depth=1 - ld.d $a2, $sp, 56 # 8-byte Folded Reload + ld.d $a2, $sp, 48 # 8-byte Folded Reload addi.w $a2, $a2, 1 ori $a1, $zero, 501 bne $a2, $a1, .LBB7_13 @@ -648,7 +654,7 @@ main: # @main move $a0, $fp pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 - ld.d $a0, $sp, 48 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 move $a0, $s1 diff --git a/results/SingleSource/Benchmarks/Polybench/stencils/fdtd-2d/CMakeFiles/fdtd-2d.dir/fdtd-2d.s b/results/SingleSource/Benchmarks/Polybench/stencils/fdtd-2d/CMakeFiles/fdtd-2d.dir/fdtd-2d.s index 591c8be8..01eb0c66 100644 --- a/results/SingleSource/Benchmarks/Polybench/stencils/fdtd-2d/CMakeFiles/fdtd-2d.dir/fdtd-2d.s +++ b/results/SingleSource/Benchmarks/Polybench/stencils/fdtd-2d/CMakeFiles/fdtd-2d.dir/fdtd-2d.s @@ -111,14 +111,6 @@ polybench_alloc_data: # @polybench_alloc_data .LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI7_1: - .dword 0x408f400000000000 # double 1000 -.LCPI7_2: - .dword 0x4092c00000000000 # double 1200 -.LCPI7_3: - .dword 0xbfe6666666666666 # double -0.69999999999999996 .text .globl main .p2align 5 @@ -139,10 +131,10 @@ main: # @main st.d $s8, $sp, 136 # 8-byte Folded Spill st.d $zero, $sp, 128 lu12i.w $s6, 2343 - ori $s2, $s6, 3072 + ori $s0, $s6, 3072 lu12i.w $a1, 1 addi.d $a0, $sp, 128 - move $a2, $s2 + move $a2, $s0 pcaddu18i $ra, %call36(posix_memalign) jirl $ra, $ra, 0 ld.d $fp, $sp, 128 @@ -153,7 +145,7 @@ main: # @main st.d $zero, $sp, 128 lu12i.w $a1, 1 addi.d $a0, $sp, 128 - move $a2, $s2 + move $a2, $s0 pcaddu18i $ra, %call36(posix_memalign) jirl $ra, $ra, 0 ld.d $a1, $sp, 128 @@ -165,7 +157,7 @@ main: # @main st.d $zero, $sp, 128 lu12i.w $a1, 1 addi.d $a0, $sp, 128 - move $a2, $s2 + move $a2, $s0 pcaddu18i $ra, %call36(posix_memalign) jirl $ra, $ra, 0 ld.d $s1, $sp, 128 @@ -176,7 +168,7 @@ main: # @main st.d $zero, $sp, 128 lu12i.w $a1, 1 addi.d $a0, $sp, 128 - move $a2, $s2 + move $a2, $s0 pcaddu18i $ra, %call36(posix_memalign) jirl $ra, $ra, 0 ld.d $s5, $sp, 128 @@ -187,7 +179,7 @@ main: # @main st.d $zero, $sp, 128 lu12i.w $a1, 1 addi.d $a0, $sp, 128 - move $a2, $s2 + move $a2, $s0 pcaddu18i $ra, %call36(posix_memalign) jirl $ra, $ra, 0 ld.d $s3, $sp, 128 @@ -198,7 +190,7 @@ main: # @main st.d $zero, $sp, 128 lu12i.w $a1, 1 addi.d $a0, $sp, 128 - move $a2, $s2 + move $a2, $s0 pcaddu18i $ra, %call36(posix_memalign) jirl $ra, $ra, 0 ld.d $s7, $sp, 128 @@ -212,11 +204,12 @@ main: # @main ori $a2, $zero, 4000 pcaddu18i $ra, %call36(posix_memalign) jirl $ra, $ra, 0 - ld.d $s0, $sp, 128 - beqz $s0, .LBB7_93 + ld.d $s2, $sp, 128 + beqz $s2, .LBB7_93 # %bb.13: # %polybench_alloc_data.exit48 bnez $a0, .LBB7_93 # %bb.14: # %polybench_alloc_data.exit50 + st.d $s0, $sp, 32 # 8-byte Folded Spill ori $a0, $zero, 0 lu32i.d $a0, 1 vreplgr2vr.d $vr0, $a0 @@ -224,7 +217,7 @@ main: # @main ori $a0, $a0, 96 ori $a1, $zero, 4000 ori $a2, $zero, 4016 - ld.d $t1, $sp, 120 # 8-byte Folded Reload + ld.d $t3, $sp, 120 # 8-byte Folded Reload .p2align 4, , 16 .LBB7_15: # %vector.body # =>This Inner Loop Header: Depth=1 @@ -247,7 +240,7 @@ main: # @main movgr2fr.d $fa1, $a3 ffint.d.l $fa1, $fa1 vextrins.d $vr1, $vr2, 16 - add.d $a3, $s0, $a0 + add.d $a3, $s2, $a0 vstx $vr3, $a3, $a1 vstx $vr1, $a3, $a2 addi.d $a0, $a0, 32 @@ -258,9 +251,9 @@ main: # @main move $a2, $zero move $a3, $zero move $a4, $zero - sub.d $a0, $t1, $fp + sub.d $a0, $t3, $fp sub.d $a5, $s1, $fp - sub.d $a6, $s1, $t1 + sub.d $a6, $s1, $t3 sltui $a0, $a0, 16 sltui $a5, $a5, 16 or $a0, $a0, $a5 @@ -268,28 +261,32 @@ main: # @main or $a5, $a0, $a5 lu12i.w $a0, -3 ori $a6, $a0, 2688 - pcalau12i $a0, %pc_hi20(.LCPI7_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI7_1) - pcalau12i $a0, %pc_hi20(.LCPI7_2) - fld.d $fa2, $a0, %pc_lo12(.LCPI7_2) + ori $t0, $zero, 0 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a7, $a0, 1032 + movgr2fr.d $fa0, $a7 lu12i.w $a0, 2 ori $s8, $a0, 1408 - ori $a7, $zero, 1000 - move $t0, $fp - move $t2, $s1 - pcalau12i $t8, %pc_hi20(.LCPI7_0) + lu32i.d $t0, 180224 + lu52i.d $t0, $t0, 1033 + movgr2fr.d $fa1, $t0 + ori $t1, $zero, 1000 + move $t2, $fp + move $t4, $s1 + pcalau12i $s4, %pc_hi20(.LCPI7_0) b .LBB7_18 .p2align 4, , 16 .LBB7_17: # %middle.block115 # in Loop: Header=BB7_18 Depth=1 addi.d $a4, $a4, 1 + add.d $t4, $t4, $s8 + add.d $t3, $t3, $s8 add.d $t2, $t2, $s8 - add.d $t1, $t1, $s8 - add.d $t0, $t0, $s8 addi.w $a3, $a3, 3 addi.d $a2, $a2, 1 addi.w $a1, $a1, 2 - beq $a4, $a7, .LBB7_23 + beq $a4, $t1, .LBB7_23 .LBB7_18: # %.preheader.i # =>This Loop Header: Depth=1 # Child Loop BB7_22 Depth 2 @@ -297,78 +294,72 @@ main: # @main beqz $a5, .LBB7_21 # %bb.19: # %scalar.ph108.preheader # in Loop: Header=BB7_18 Depth=1 - move $t4, $zero - move $t3, $a6 + move $t6, $zero + move $t5, $a6 .p2align 4, , 16 .LBB7_20: # %scalar.ph108 # Parent Loop BB7_18 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t5, $a1, $t4 - add.d $t6, $a3, $t4 - add.w $t4, $a2, $t4 - bstrpick.d $t7, $t4, 31, 0 - movgr2fr.d $fa0, $t7 - ffint.d.l $fa0, $fa0 - fdiv.d $fa0, $fa0, $fa1 - add.d $t7, $t0, $t3 - fstx.d $fa0, $t7, $s8 - bstrpick.d $t5, $t5, 31, 0 - movgr2fr.d $fa0, $t5 - ffint.d.l $fa0, $fa0 - fdiv.d $fa0, $fa0, $fa2 - add.d $t5, $t1, $t3 - fstx.d $fa0, $t5, $s8 - bstrpick.d $t5, $t6, 31, 0 - movgr2fr.d $fa0, $t5 - ffint.d.l $fa0, $fa0 - fdiv.d $fa0, $fa0, $fa1 - add.d $t5, $t2, $t3 - addi.d $t3, $t3, 8 - fstx.d $fa0, $t5, $s8 - bnez $t3, .LBB7_20 + add.d $t7, $a1, $t6 + add.d $t8, $a3, $t6 + add.w $t6, $a2, $t6 + bstrpick.d $s0, $t6, 31, 0 + movgr2fr.d $fa2, $s0 + ffint.d.l $fa2, $fa2 + fdiv.d $fa2, $fa2, $fa0 + add.d $s0, $t2, $t5 + fstx.d $fa2, $s0, $s8 + bstrpick.d $t7, $t7, 31, 0 + movgr2fr.d $fa2, $t7 + ffint.d.l $fa2, $fa2 + fdiv.d $fa2, $fa2, $fa1 + add.d $t7, $t3, $t5 + fstx.d $fa2, $t7, $s8 + bstrpick.d $t7, $t8, 31, 0 + movgr2fr.d $fa2, $t7 + ffint.d.l $fa2, $fa2 + fdiv.d $fa2, $fa2, $fa0 + add.d $t7, $t4, $t5 + addi.d $t5, $t5, 8 + fstx.d $fa2, $t7, $s8 + bnez $t5, .LBB7_20 b .LBB7_17 .p2align 4, , 16 .LBB7_21: # %vector.ph109 # in Loop: Header=BB7_18 Depth=1 - vld $vr3, $t8, %pc_lo12(.LCPI7_0) - vreplgr2vr.d $vr0, $a4 - lu12i.w $t3, -3 - ori $t3, $t3, 2688 + vld $vr3, $s4, %pc_lo12(.LCPI7_0) + vreplgr2vr.d $vr2, $a4 + lu12i.w $t5, -3 + ori $t5, $t5, 2688 .p2align 4, , 16 .LBB7_22: # %vector.body110 # Parent Loop BB7_18 Depth=1 # => This Inner Loop Header: Depth=2 vaddi.du $vr4, $vr3, 1 - vmul.d $vr4, $vr4, $vr0 + vmul.d $vr4, $vr4, $vr2 vffint.d.lu $vr4, $vr4 - ori $t4, $zero, 0 - lu32i.d $t4, -49152 - lu52i.d $t4, $t4, 1032 - vreplgr2vr.d $vr5, $t4 + vreplgr2vr.d $vr5, $a7 vfdiv.d $vr4, $vr4, $vr5 - add.d $t4, $t0, $t3 - vstx $vr4, $t4, $s8 + add.d $t6, $t2, $t5 + vstx $vr4, $t6, $s8 vaddi.du $vr4, $vr3, 3 vaddi.du $vr3, $vr3, 2 - vmul.d $vr6, $vr3, $vr0 + vmul.d $vr6, $vr3, $vr2 vffint.d.lu $vr6, $vr6 - ori $t4, $zero, 0 - lu32i.d $t4, 180224 - lu52i.d $t4, $t4, 1033 - vreplgr2vr.d $vr7, $t4 + vreplgr2vr.d $vr7, $t0 vfdiv.d $vr6, $vr6, $vr7 - add.d $t4, $t1, $t3 - vstx $vr6, $t4, $s8 - vmul.d $vr4, $vr4, $vr0 + add.d $t6, $t3, $t5 + vstx $vr6, $t6, $s8 + vmul.d $vr4, $vr4, $vr2 vffint.d.lu $vr4, $vr4 vfdiv.d $vr4, $vr4, $vr5 - add.d $t4, $t2, $t3 - addi.d $t3, $t3, 16 - vstx $vr4, $t4, $s8 - bnez $t3, .LBB7_22 + add.d $t6, $t4, $t5 + addi.d $t5, $t5, 16 + vstx $vr4, $t6, $s8 + bnez $t5, .LBB7_22 b .LBB7_17 .LBB7_23: # %.preheader80.i.preheader - st.d $t8, $sp, 40 # 8-byte Folded Spill + st.d $s4, $sp, 40 # 8-byte Folded Spill move $t1, $zero lu12i.w $a1, 2341 ori $a2, $a1, 1656 @@ -378,30 +369,30 @@ main: # @main st.d $a1, $sp, 16 # 8-byte Folded Spill add.d $a1, $fp, $a1 ori $a3, $s6, 3064 - ld.d $t4, $sp, 120 # 8-byte Folded Reload + ld.d $t5, $sp, 120 # 8-byte Folded Reload st.d $a3, $sp, 8 # 8-byte Folded Spill - add.d $a3, $t4, $a3 + add.d $a3, $t5, $a3 addi.d $a4, $fp, 8 - add.d $a5, $fp, $s2 - add.d $a6, $s1, $s2 - add.d $a7, $t4, $s8 - st.d $s2, $sp, 32 # 8-byte Folded Spill - add.d $t0, $t4, $s2 + ld.d $t0, $sp, 32 # 8-byte Folded Reload + add.d $a5, $fp, $t0 + add.d $a6, $s1, $t0 + add.d $a7, $t5, $s8 + add.d $t0, $t5, $t0 sltu $a7, $a7, $a6 sltu $t0, $s1, $t0 - and $t5, $a7, $t0 + and $t6, $a7, $t0 sltu $a4, $a4, $a6 sltu $a5, $s1, $a5 - and $t6, $a4, $a5 + and $t7, $a4, $a5 sltu $a1, $s1, $a1 sltu $a4, $fp, $a2 and $a1, $a1, $a4 sltu $a3, $s1, $a3 - sltu $a2, $t4, $a2 + sltu $a2, $t5, $a2 and $a2, $a3, $a2 - or $t7, $a1, $a2 + or $t8, $a1, $a2 ori $a1, $a0, 1424 - add.d $a2, $t4, $a1 + add.d $a2, $t5, $a1 st.d $a2, $sp, 96 # 8-byte Folded Spill add.d $a2, $s1, $a1 st.d $a2, $sp, 88 # 8-byte Folded Spill @@ -418,17 +409,16 @@ main: # @main st.d $a2, $sp, 48 # 8-byte Folded Spill lu12i.w $a2, 4 ori $s6, $a2, 2816 - vldi $vr4, -800 - ori $s2, $zero, 1000 - ori $t8, $zero, 999 + vldi $vr1, -800 + ori $ra, $zero, 1000 lu12i.w $a3, 419430 ori $a3, $a3, 1638 lu32i.d $a3, 419430 lu52i.d $a3, $a3, -1026 - vreplgr2vr.d $vr0, $a3 - lu52i.d $a3, $zero, -1026 - vreplgr2vr.d $vr5, $a3 - st.d $s0, $sp, 112 # 8-byte Folded Spill + movgr2fr.d $fa0, $a3 + lu52i.d $a4, $zero, -1026 + vreplgr2vr.d $vr2, $a4 + st.d $s2, $sp, 112 # 8-byte Folded Spill .p2align 4, , 16 .LBB7_24: # %vector.ph179 # =>This Loop Header: Depth=1 @@ -442,82 +432,82 @@ main: # @main # Child Loop BB7_40 Depth 2 # Child Loop BB7_43 Depth 3 # Child Loop BB7_46 Depth 3 - alsl.d $a3, $t1, $s0, 3 - vldrepl.d $vr3, $a3, 0 - ld.d $a3, $sp, 48 # 8-byte Folded Reload + st.d $t1, $sp, 104 # 8-byte Folded Spill + alsl.d $a4, $t1, $s2, 3 + vldrepl.d $vr3, $a4, 0 + ld.d $a4, $sp, 48 # 8-byte Folded Reload .p2align 4, , 16 .LBB7_25: # %vector.body182 # Parent Loop BB7_24 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $a4, $t4, $a3 - vstx $vr3, $a4, $s8 - addi.d $a3, $a3, 32 - vstx $vr3, $a4, $a1 - bnez $a3, .LBB7_25 + add.d $a5, $t5, $a4 + vstx $vr3, $a5, $s8 + addi.d $a4, $a4, 32 + vstx $vr3, $a5, $a1 + bnez $a4, .LBB7_25 # %bb.26: # %.preheader76.i.preheader # in Loop: Header=BB7_24 Depth=1 - st.d $t1, $sp, 104 # 8-byte Folded Spill - ori $a4, $zero, 1 - move $a5, $t4 - move $a6, $s1 + ori $a5, $zero, 1 + move $a6, $t5 + move $a7, $s1 ld.d $t0, $sp, 88 # 8-byte Folded Reload ld.d $t2, $sp, 96 # 8-byte Folded Reload b .LBB7_28 .p2align 4, , 16 .LBB7_27: # %middle.block176 # in Loop: Header=BB7_28 Depth=2 - addi.d $a4, $a4, 1 + addi.d $a5, $a5, 1 add.d $t2, $t2, $s8 add.d $t0, $t0, $s8 + add.d $a7, $a7, $s8 add.d $a6, $a6, $s8 - add.d $a5, $a5, $s8 - beq $a4, $s2, .LBB7_31 + beq $a5, $ra, .LBB7_31 .LBB7_28: # %.preheader76.i # Parent Loop BB7_24 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB7_30 Depth 3 # Child Loop BB7_29 Depth 3 - ori $a3, $a0, 1392 - lu12i.w $a7, -3 - ori $t1, $a7, 2688 - beqz $t5, .LBB7_30 + ori $a4, $a0, 1392 + lu12i.w $t1, -3 + ori $t3, $t1, 2688 + beqz $t6, .LBB7_30 .p2align 4, , 16 .LBB7_29: # %scalar.ph165 # Parent Loop BB7_24 Depth=1 # Parent Loop BB7_28 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $t3, $a5, $t1 - add.d $s0, $a6, $t1 + add.d $t4, $a6, $t3 + add.d $s0, $a7, $t3 fldx.d $fa3, $s0, $s6 - fldx.d $fa6, $s0, $s8 - fldx.d $fa7, $t3, $s6 - fsub.d $fa3, $fa3, $fa6 - fmadd.d $fa3, $fa3, $fa4, $fa7 - addi.d $t1, $t1, 8 - fstx.d $fa3, $t3, $s6 - bnez $t1, .LBB7_29 + fldx.d $fa4, $s0, $s8 + fldx.d $fa5, $t4, $s6 + fsub.d $fa3, $fa3, $fa4 + fmadd.d $fa3, $fa3, $fa1, $fa5 + addi.d $t3, $t3, 8 + fstx.d $fa3, $t4, $s6 + bnez $t3, .LBB7_29 b .LBB7_27 .p2align 4, , 16 .LBB7_30: # %vector.body167 # Parent Loop BB7_24 Depth=1 # Parent Loop BB7_28 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $t3, $t2, $t1 - add.d $s0, $t0, $t1 - vldx $vr3, $s0, $a3 - vldx $vr6, $s0, $s8 - vld $vr7, $s0, -16 - vldx $vr8, $t0, $t1 - vldx $vr9, $t3, $a3 - vldx $vr10, $t3, $s8 - vfsub.d $vr3, $vr3, $vr7 - vfsub.d $vr6, $vr6, $vr8 - vfmadd.d $vr3, $vr3, $vr5, $vr9 - vfmadd.d $vr6, $vr6, $vr5, $vr10 - vstx $vr3, $t3, $a3 - addi.d $t1, $t1, 32 - vstx $vr6, $t3, $s8 - bnez $t1, .LBB7_30 + add.d $t4, $t2, $t3 + add.d $s0, $t0, $t3 + vldx $vr3, $s0, $a4 + vldx $vr4, $s0, $s8 + vld $vr5, $s0, -16 + vldx $vr6, $t0, $t3 + vldx $vr7, $t4, $a4 + vldx $vr8, $t4, $s8 + vfsub.d $vr3, $vr3, $vr5 + vfsub.d $vr4, $vr4, $vr6 + vfmadd.d $vr3, $vr3, $vr2, $vr7 + vfmadd.d $vr4, $vr4, $vr2, $vr8 + vstx $vr3, $t4, $a4 + addi.d $t3, $t3, 32 + vstx $vr4, $t4, $s8 + bnez $t3, .LBB7_30 b .LBB7_27 .p2align 4, , 16 .LBB7_31: # %.preheader75.i.preheader @@ -525,357 +515,365 @@ main: # @main move $t0, $zero move $t2, $zero ld.d $t3, $sp, 72 # 8-byte Folded Reload - ld.d $s0, $sp, 80 # 8-byte Folded Reload + ld.d $t4, $sp, 80 # 8-byte Folded Reload .p2align 4, , 16 .LBB7_32: # %.preheader75.i # Parent Loop BB7_24 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB7_34 Depth 3 # Child Loop BB7_37 Depth 3 - ori $s4, $zero, 1 - ori $a4, $a0, 1360 - ori $a5, $a0, 1376 - ori $a6, $a0, 1368 - bnez $t6, .LBB7_36 + ori $s2, $zero, 1 + ori $a5, $a0, 1360 + ori $a6, $a0, 1376 + ori $a7, $a0, 1368 + bnez $t7, .LBB7_36 # %bb.33: # %vector.body148.preheader # in Loop: Header=BB7_32 Depth=2 - lu12i.w $a7, -3 - ori $t1, $a7, 2720 + lu12i.w $t1, -3 + ori $s0, $t1, 2720 .p2align 4, , 16 .LBB7_34: # %vector.body148 # Parent Loop BB7_24 Depth=1 # Parent Loop BB7_32 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $s4, $s0, $t1 - add.d $ra, $t3, $t1 - vldx $vr3, $ra, $a4 - vldx $vr6, $ra, $a5 - ori $a7, $a0, 1352 - vldx $vr7, $ra, $a7 - vldx $vr8, $ra, $a6 - vldx $vr9, $s4, $a4 - vldx $vr10, $s4, $a5 - vfsub.d $vr3, $vr3, $vr7 - vfsub.d $vr6, $vr6, $vr8 - vfmadd.d $vr3, $vr3, $vr5, $vr9 - vfmadd.d $vr6, $vr6, $vr5, $vr10 - vstx $vr3, $s4, $a4 - addi.d $t1, $t1, 32 - vstx $vr6, $s4, $a5 - bnez $t1, .LBB7_34 + add.d $s2, $t4, $s0 + add.d $s4, $t3, $s0 + vldx $vr3, $s4, $a5 + vldx $vr4, $s4, $a6 + ori $t5, $a0, 1352 + vldx $vr5, $s4, $t5 + vldx $vr6, $s4, $a7 + vldx $vr7, $s2, $a5 + vldx $vr8, $s2, $a6 + vfsub.d $vr3, $vr3, $vr5 + vfsub.d $vr4, $vr4, $vr6 + vfmadd.d $vr3, $vr3, $vr2, $vr7 + vfmadd.d $vr4, $vr4, $vr2, $vr8 + vstx $vr3, $s2, $a5 + addi.d $s0, $s0, 32 + vstx $vr4, $s2, $a6 + bnez $s0, .LBB7_34 # %bb.35: # in Loop: Header=BB7_32 Depth=2 - ori $s4, $zero, 1197 + ori $s2, $zero, 1197 .LBB7_36: # %scalar.ph146.preheader # in Loop: Header=BB7_32 Depth=2 - addi.d $t1, $s4, -1200 - alsl.d $s4, $s4, $t0, 3 + addi.d $s0, $s2, -1200 + alsl.d $s2, $s2, $t0, 3 .p2align 4, , 16 .LBB7_37: # %scalar.ph146 # Parent Loop BB7_24 Depth=1 # Parent Loop BB7_32 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $a7, $s1, $s4 - fldx.d $fa3, $s1, $s4 - fld.d $fa6, $a7, -8 - fldx.d $fa7, $fp, $s4 - fsub.d $fa3, $fa3, $fa6 - fmadd.d $fa3, $fa3, $fa4, $fa7 - fstx.d $fa3, $fp, $s4 - addi.d $t1, $t1, 1 - addi.d $s4, $s4, 8 - bnez $t1, .LBB7_37 + add.d $t5, $s1, $s2 + fldx.d $fa3, $s1, $s2 + fld.d $fa4, $t5, -8 + fldx.d $fa5, $fp, $s2 + fsub.d $fa3, $fa3, $fa4 + fmadd.d $fa3, $fa3, $fa1, $fa5 + fstx.d $fa3, $fp, $s2 + addi.d $s0, $s0, 1 + addi.d $s2, $s2, 8 + bnez $s0, .LBB7_37 # %bb.38: # in Loop: Header=BB7_32 Depth=2 addi.d $t2, $t2, 1 - add.d $s0, $s0, $s8 + add.d $t4, $t4, $s8 add.d $t3, $t3, $s8 add.d $t0, $t0, $s8 - bne $t2, $s2, .LBB7_32 + bne $t2, $ra, .LBB7_32 # %bb.39: # %.preheader.i54.preheader # in Loop: Header=BB7_24 Depth=1 - move $ra, $zero - move $t0, $zero - move $s0, $t4 - ld.d $t3, $sp, 56 # 8-byte Folded Reload - ld.d $t2, $sp, 64 # 8-byte Folded Reload + move $t2, $zero + move $t3, $zero + ld.d $t5, $sp, 120 # 8-byte Folded Reload + move $s0, $t5 + ld.d $t0, $sp, 56 # 8-byte Folded Reload + ld.d $t4, $sp, 64 # 8-byte Folded Reload .p2align 4, , 16 .LBB7_40: # %.preheader.i54 # Parent Loop BB7_24 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB7_43 Depth 3 # Child Loop BB7_46 Depth 3 - beqz $t7, .LBB7_42 + beqz $t8, .LBB7_42 # %bb.41: # in Loop: Header=BB7_40 Depth=2 - move $t1, $zero + move $s2, $zero b .LBB7_45 .p2align 4, , 16 .LBB7_42: # %vector.body125.preheader # in Loop: Header=BB7_40 Depth=2 - lu12i.w $a7, -3 - ori $s4, $a7, 2720 + lu12i.w $t1, -3 + ori $s4, $t1, 2720 .p2align 4, , 16 .LBB7_43: # %vector.body125 # Parent Loop BB7_24 Depth=1 # Parent Loop BB7_40 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $a7, $t2, $s4 - vldx $vr3, $a7, $a4 - add.d $t1, $t3, $s4 - vldx $vr6, $t1, $a6 - vldx $vr7, $t1, $a4 - ori $t4, $a0, 1384 - vldx $vr8, $t1, $t4 - vldx $vr9, $t1, $a5 - vfsub.d $vr6, $vr6, $vr7 + add.d $t5, $t4, $s4 + vldx $vr3, $t5, $a5 + add.d $s2, $t0, $s4 + vldx $vr4, $s2, $a7 + vldx $vr5, $s2, $a5 + ori $t1, $a0, 1384 + vldx $vr6, $s2, $t1 + vldx $vr7, $s2, $a6 + vfsub.d $vr4, $vr4, $vr5 add.d $t1, $s0, $s4 - ori $t4, $a2, 2784 - vldx $vr7, $t1, $t4 - vfsub.d $vr8, $vr8, $vr9 - ori $t4, $a2, 2800 - vldx $vr9, $t1, $t4 - vfadd.d $vr6, $vr6, $vr7 - vldx $vr7, $t1, $a5 - vldx $vr10, $t1, $a3 - vfadd.d $vr8, $vr8, $vr9 - vldx $vr9, $a7, $a5 + ori $s2, $a2, 2784 + vldx $vr5, $t1, $s2 vfsub.d $vr6, $vr6, $vr7 - vfsub.d $vr7, $vr8, $vr10 - vfmadd.d $vr3, $vr6, $vr0, $vr3 - vfmadd.d $vr6, $vr7, $vr0, $vr9 - vstx $vr3, $a7, $a4 + ori $s2, $a2, 2800 + vldx $vr7, $t1, $s2 + vfadd.d $vr4, $vr4, $vr5 + vldx $vr5, $t1, $a6 + vldx $vr8, $t1, $a4 + vfadd.d $vr6, $vr6, $vr7 + vldx $vr7, $t5, $a6 + vfsub.d $vr4, $vr4, $vr5 + vfsub.d $vr5, $vr6, $vr8 + vreplgr2vr.d $vr6, $a3 + vfmadd.d $vr3, $vr4, $vr6, $vr3 + vfmadd.d $vr4, $vr5, $vr6, $vr7 + vstx $vr3, $t5, $a5 addi.d $s4, $s4, 32 - vstx $vr6, $a7, $a5 + vstx $vr4, $t5, $a6 bnez $s4, .LBB7_43 # %bb.44: # in Loop: Header=BB7_40 Depth=2 - ori $t1, $zero, 1196 - ld.d $t4, $sp, 120 # 8-byte Folded Reload + ori $s2, $zero, 1196 + ld.d $t5, $sp, 120 # 8-byte Folded Reload .LBB7_45: # %scalar.ph123.preheader # in Loop: Header=BB7_40 Depth=2 - alsl.d $s4, $t1, $ra, 3 - addi.d $t1, $t1, -1199 + alsl.d $s4, $s2, $t2, 3 + addi.d $s2, $s2, -1199 .p2align 4, , 16 .LBB7_46: # %scalar.ph123 # Parent Loop BB7_24 Depth=1 # Parent Loop BB7_40 Depth=2 # => This Inner Loop Header: Depth=3 - fldx.d $fa6, $s1, $s4 - add.d $a7, $fp, $s4 - fld.d $fa7, $a7, 8 - fldx.d $ft0, $fp, $s4 - add.d $a7, $t4, $s4 - fldx.d $ft1, $a7, $s8 - fldx.d $ft2, $t4, $s4 - pcalau12i $a7, %pc_hi20(.LCPI7_3) - fld.d $fa3, $a7, %pc_lo12(.LCPI7_3) - fsub.d $fa7, $fa7, $ft0 - fadd.d $fa7, $fa7, $ft1 - fsub.d $fa7, $fa7, $ft2 - fmadd.d $fa6, $fa7, $fa3, $fa6 - fstx.d $fa6, $s1, $s4 - addi.d $t1, $t1, 1 + add.d $t1, $fp, $s4 + fld.d $fa3, $t1, 8 + fldx.d $fa4, $fp, $s4 + add.d $t1, $t5, $s4 + fldx.d $fa5, $t1, $s8 + fldx.d $fa6, $t5, $s4 + fldx.d $fa7, $s1, $s4 + fsub.d $fa3, $fa3, $fa4 + fadd.d $fa3, $fa3, $fa5 + fsub.d $fa3, $fa3, $fa6 + fmadd.d $fa3, $fa3, $fa0, $fa7 + fstx.d $fa3, $s1, $s4 + addi.d $s2, $s2, 1 addi.d $s4, $s4, 8 - bnez $t1, .LBB7_46 + bnez $s2, .LBB7_46 # %bb.47: # in Loop: Header=BB7_40 Depth=2 - addi.d $t0, $t0, 1 - add.d $t2, $t2, $s8 - add.d $t3, $t3, $s8 + addi.d $t3, $t3, 1 + add.d $t4, $t4, $s8 + add.d $t0, $t0, $s8 add.d $s0, $s0, $s8 - add.d $ra, $ra, $s8 - bne $t0, $t8, .LBB7_40 + add.d $t2, $t2, $s8 + ori $t1, $zero, 999 + bne $t3, $t1, .LBB7_40 # %bb.48: # in Loop: Header=BB7_24 Depth=1 ld.d $t1, $sp, 104 # 8-byte Folded Reload addi.d $t1, $t1, 1 - ld.d $s0, $sp, 112 # 8-byte Folded Reload - ori $a7, $zero, 500 - bne $t1, $a7, .LBB7_24 + ld.d $s2, $sp, 112 # 8-byte Folded Reload + ori $t0, $zero, 500 + bne $t1, $t0, .LBB7_24 # %bb.49: # %vector.body188.preheader - ori $a7, $zero, 0 - lu32i.d $a7, 1 - vreplgr2vr.d $vr4, $a7 - lu12i.w $a7, -1 - ori $a7, $a7, 96 - ori $t0, $zero, 4000 - ori $t1, $zero, 4016 + ori $t0, $zero, 0 + lu32i.d $t0, 1 + vreplgr2vr.d $vr1, $t0 + lu12i.w $t0, -1 + ori $t0, $t0, 96 + ori $t1, $zero, 4000 + ori $t2, $zero, 4016 .p2align 4, , 16 .LBB7_50: # %vector.body188 # =>This Inner Loop Header: Depth=1 - vaddi.wu $vr5, $vr4, 2 - vpickve2gr.w $t2, $vr4, 1 - bstrpick.d $t2, $t2, 31, 0 - movgr2fr.d $fa6, $t2 - ffint.d.l $fa6, $fa6 - vpickve2gr.w $t2, $vr4, 0 - bstrpick.d $t2, $t2, 31, 0 - movgr2fr.d $fa7, $t2 - ffint.d.l $fa7, $fa7 - vextrins.d $vr7, $vr6, 16 - vpickve2gr.w $t2, $vr5, 1 - bstrpick.d $t2, $t2, 31, 0 - movgr2fr.d $fa6, $t2 - ffint.d.l $fa6, $fa6 - vpickve2gr.w $t2, $vr5, 0 - bstrpick.d $t2, $t2, 31, 0 - movgr2fr.d $fa5, $t2 - ffint.d.l $fa5, $fa5 - vextrins.d $vr5, $vr6, 16 - add.d $t2, $s0, $a7 - vstx $vr7, $t2, $t0 - vstx $vr5, $t2, $t1 - addi.d $a7, $a7, 32 - vaddi.wu $vr4, $vr4, 4 - bnez $a7, .LBB7_50 + vaddi.wu $vr2, $vr1, 2 + vpickve2gr.w $t3, $vr1, 1 + bstrpick.d $t3, $t3, 31, 0 + movgr2fr.d $fa3, $t3 + ffint.d.l $fa3, $fa3 + vpickve2gr.w $t3, $vr1, 0 + bstrpick.d $t3, $t3, 31, 0 + movgr2fr.d $fa4, $t3 + ffint.d.l $fa4, $fa4 + vextrins.d $vr4, $vr3, 16 + vpickve2gr.w $t3, $vr2, 1 + bstrpick.d $t3, $t3, 31, 0 + movgr2fr.d $fa3, $t3 + ffint.d.l $fa3, $fa3 + vpickve2gr.w $t3, $vr2, 0 + bstrpick.d $t3, $t3, 31, 0 + movgr2fr.d $fa2, $t3 + ffint.d.l $fa2, $fa2 + vextrins.d $vr2, $vr3, 16 + add.d $t3, $s2, $t0 + vstx $vr4, $t3, $t1 + vstx $vr2, $t3, $t2 + addi.d $t0, $t0, 32 + vaddi.wu $vr1, $vr1, 4 + bnez $t0, .LBB7_50 # %bb.51: # %.preheader.i58.preheader - move $a7, $zero move $t0, $zero move $t1, $zero move $t2, $zero - sub.d $t3, $s3, $s5 - sub.d $t4, $s7, $s5 - sub.d $t5, $s7, $s3 - sltui $t3, $t3, 16 + move $t8, $zero + sub.d $t4, $s3, $s5 + sub.d $t5, $s7, $s5 + sub.d $t6, $s7, $s3 sltui $t4, $t4, 16 - or $t3, $t3, $t4 - sltui $t4, $t5, 16 - or $t3, $t3, $t4 - lu12i.w $t4, -3 - ori $t4, $t4, 2688 - move $t6, $s5 - move $t7, $s3 - move $t8, $s7 + sltui $t5, $t5, 16 + or $t4, $t4, $t5 + sltui $t5, $t6, 16 + or $t3, $t4, $t5 + st.d $t3, $sp, 96 # 8-byte Folded Spill + lu12i.w $t3, -3 + ori $t3, $t3, 2688 + st.d $t3, $sp, 88 # 8-byte Folded Spill + ori $t5, $zero, 0 + ori $t6, $zero, 0 + lu32i.d $t6, -49152 + lu52i.d $t6, $t6, 1032 + movgr2fr.d $fa1, $t6 + lu32i.d $t5, 180224 + lu52i.d $t7, $t5, 1033 + movgr2fr.d $fa2, $t7 + move $s0, $s5 + move $ra, $s3 + move $t5, $s7 b .LBB7_53 .p2align 4, , 16 .LBB7_52: # %middle.block210 # in Loop: Header=BB7_53 Depth=1 - addi.d $t2, $t2, 1 - add.d $t8, $t8, $s8 - add.d $t7, $t7, $s8 - add.d $t6, $t6, $s8 - addi.w $t1, $t1, 3 - addi.d $t0, $t0, 1 - addi.w $a7, $a7, 2 - ori $t5, $zero, 1000 - beq $t2, $t5, .LBB7_58 + ld.d $t8, $sp, 104 # 8-byte Folded Reload + addi.d $t8, $t8, 1 + add.d $t5, $t5, $s8 + add.d $ra, $ra, $s8 + add.d $s0, $s0, $s8 + addi.w $t2, $t2, 3 + addi.d $t1, $t1, 1 + addi.w $t0, $t0, 2 + ori $t3, $zero, 1000 + beq $t8, $t3, .LBB7_58 .LBB7_53: # %.preheader.i58 # =>This Loop Header: Depth=1 # Child Loop BB7_57 Depth 2 # Child Loop BB7_55 Depth 2 + ld.d $t3, $sp, 96 # 8-byte Folded Reload + st.d $t8, $sp, 104 # 8-byte Folded Spill beqz $t3, .LBB7_56 # %bb.54: # %scalar.ph201.preheader # in Loop: Header=BB7_53 Depth=1 - move $s2, $zero - move $s0, $t4 + move $s4, $zero + ld.d $s2, $sp, 88 # 8-byte Folded Reload .p2align 4, , 16 .LBB7_55: # %scalar.ph201 # Parent Loop BB7_53 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $s4, $a7, $s2 - add.d $ra, $t1, $s2 - add.w $s2, $t0, $s2 - bstrpick.d $t5, $s2, 31, 0 - movgr2fr.d $fa4, $t5 - ffint.d.l $fa4, $fa4 - fdiv.d $fa4, $fa4, $fa1 - add.d $t5, $t6, $s0 - fstx.d $fa4, $t5, $s8 - bstrpick.d $t5, $s4, 31, 0 - movgr2fr.d $fa4, $t5 - ffint.d.l $fa4, $fa4 - fdiv.d $fa4, $fa4, $fa2 - add.d $t5, $t7, $s0 - fstx.d $fa4, $t5, $s8 - bstrpick.d $t5, $ra, 31, 0 - movgr2fr.d $fa4, $t5 - ffint.d.l $fa4, $fa4 - fdiv.d $fa4, $fa4, $fa1 - add.d $t5, $t8, $s0 - addi.d $s0, $s0, 8 - fstx.d $fa4, $t5, $s8 - bnez $s0, .LBB7_55 + add.d $t8, $t0, $s4 + add.d $t4, $t2, $s4 + add.w $s4, $t1, $s4 + bstrpick.d $t3, $s4, 31, 0 + movgr2fr.d $fa3, $t3 + ffint.d.l $fa3, $fa3 + fdiv.d $fa3, $fa3, $fa1 + add.d $t3, $s0, $s2 + fstx.d $fa3, $t3, $s8 + bstrpick.d $t3, $t8, 31, 0 + movgr2fr.d $fa3, $t3 + ffint.d.l $fa3, $fa3 + fdiv.d $fa3, $fa3, $fa2 + add.d $t3, $ra, $s2 + fstx.d $fa3, $t3, $s8 + bstrpick.d $t3, $t4, 31, 0 + movgr2fr.d $fa3, $t3 + ffint.d.l $fa3, $fa3 + fdiv.d $fa3, $fa3, $fa1 + add.d $t3, $t5, $s2 + addi.d $s2, $s2, 8 + fstx.d $fa3, $t3, $s8 + bnez $s2, .LBB7_55 b .LBB7_52 .p2align 4, , 16 .LBB7_56: # %vector.ph202 # in Loop: Header=BB7_53 Depth=1 - ld.d $t5, $sp, 40 # 8-byte Folded Reload - vld $vr5, $t5, %pc_lo12(.LCPI7_0) - vreplgr2vr.d $vr4, $t2 - lu12i.w $t5, -3 - ori $s0, $t5, 2688 + ld.d $t3, $sp, 40 # 8-byte Folded Reload + vld $vr4, $t3, %pc_lo12(.LCPI7_0) + vreplgr2vr.d $vr3, $t8 + lu12i.w $t3, -3 + ori $s2, $t3, 2688 .p2align 4, , 16 .LBB7_57: # %vector.body205 # Parent Loop BB7_53 Depth=1 # => This Inner Loop Header: Depth=2 - vaddi.du $vr6, $vr5, 1 - vmul.d $vr6, $vr6, $vr4 - vffint.d.lu $vr6, $vr6 - ori $s2, $zero, 0 - lu32i.d $s2, -49152 - lu52i.d $s2, $s2, 1032 - vreplgr2vr.d $vr7, $s2 - vfdiv.d $vr6, $vr6, $vr7 - add.d $s2, $t6, $s0 - vstx $vr6, $s2, $s8 - vaddi.du $vr6, $vr5, 3 - vaddi.du $vr5, $vr5, 2 - vmul.d $vr8, $vr5, $vr4 - vffint.d.lu $vr8, $vr8 - ori $s2, $zero, 0 - lu32i.d $s2, 180224 - lu52i.d $s2, $s2, 1033 - vreplgr2vr.d $vr9, $s2 - vfdiv.d $vr8, $vr8, $vr9 - add.d $s2, $t7, $s0 - vstx $vr8, $s2, $s8 - vmul.d $vr6, $vr6, $vr4 - vffint.d.lu $vr6, $vr6 - vfdiv.d $vr6, $vr6, $vr7 - add.d $s2, $t8, $s0 - addi.d $s0, $s0, 16 - vstx $vr6, $s2, $s8 - bnez $s0, .LBB7_57 + vaddi.du $vr5, $vr4, 1 + vmul.d $vr5, $vr5, $vr3 + vffint.d.lu $vr5, $vr5 + vreplgr2vr.d $vr6, $t6 + vfdiv.d $vr5, $vr5, $vr6 + add.d $s4, $s0, $s2 + vstx $vr5, $s4, $s8 + vaddi.du $vr5, $vr4, 3 + vaddi.du $vr4, $vr4, 2 + vmul.d $vr7, $vr4, $vr3 + vffint.d.lu $vr7, $vr7 + vreplgr2vr.d $vr8, $t7 + vfdiv.d $vr7, $vr7, $vr8 + add.d $s4, $ra, $s2 + vstx $vr7, $s4, $s8 + vmul.d $vr5, $vr5, $vr3 + vffint.d.lu $vr5, $vr5 + vfdiv.d $vr5, $vr5, $vr6 + add.d $s4, $t5, $s2 + addi.d $s2, $s2, 16 + vstx $vr5, $s4, $s8 + bnez $s2, .LBB7_57 b .LBB7_52 .LBB7_58: # %.preheader80.i66.preheader - move $t8, $zero - ld.d $a7, $sp, 24 # 8-byte Folded Reload - add.d $t0, $s7, $a7 - ld.d $a7, $sp, 16 # 8-byte Folded Reload - add.d $t3, $s5, $a7 - ld.d $a7, $sp, 8 # 8-byte Folded Reload - add.d $t4, $s3, $a7 - addi.d $t2, $s5, 8 - ld.d $a7, $sp, 32 # 8-byte Folded Reload - add.d $t5, $s5, $a7 - add.d $t6, $s7, $a7 - add.d $t7, $s3, $s8 - add.d $t1, $s3, $a7 - st.d $t7, $sp, 96 # 8-byte Folded Spill - sltu $t7, $t7, $t6 - sltu $t1, $s7, $t1 - and $t1, $t7, $t1 - sltu $t2, $t2, $t6 - sltu $t5, $s7, $t5 - and $t2, $t2, $t5 - sltu $t3, $s7, $t3 - sltu $t5, $s5, $t0 - and $t3, $t3, $t5 + move $s0, $zero + ld.d $t0, $sp, 24 # 8-byte Folded Reload + add.d $t1, $s7, $t0 + ld.d $t0, $sp, 16 # 8-byte Folded Reload + add.d $t4, $s5, $t0 + ld.d $t0, $sp, 8 # 8-byte Folded Reload + add.d $t5, $s3, $t0 + addi.d $t3, $s5, 8 + ld.d $t0, $sp, 32 # 8-byte Folded Reload + add.d $t6, $s5, $t0 + add.d $t7, $s7, $t0 + add.d $t8, $s3, $s8 + add.d $t2, $s3, $t0 + st.d $t8, $sp, 96 # 8-byte Folded Spill + sltu $t8, $t8, $t7 + sltu $t2, $s7, $t2 + and $t2, $t8, $t2 + sltu $t3, $t3, $t7 + sltu $t6, $s7, $t6 + and $t3, $t3, $t6 sltu $t4, $s7, $t4 - sltu $t0, $s3, $t0 - and $t0, $t4, $t0 - or $t3, $t3, $t0 - add.d $a7, $s3, $a1 - st.d $a7, $sp, 88 # 8-byte Folded Spill - add.d $a7, $s7, $a1 - st.d $a7, $sp, 80 # 8-byte Folded Spill - addi.d $a7, $s5, 24 - st.d $a7, $sp, 72 # 8-byte Folded Spill - addi.d $a7, $s7, 24 - st.d $a7, $sp, 64 # 8-byte Folded Spill - lu12i.w $a7, -3 - ori $a7, $a7, 2688 - st.d $a7, $sp, 56 # 8-byte Folded Spill + sltu $t6, $s5, $t1 + and $t4, $t4, $t6 + sltu $t5, $s7, $t5 + sltu $t1, $s3, $t1 + and $t1, $t5, $t1 + or $t4, $t4, $t1 + add.d $t0, $s3, $a1 + st.d $t0, $sp, 88 # 8-byte Folded Spill + add.d $t0, $s7, $a1 + st.d $t0, $sp, 80 # 8-byte Folded Spill + addi.d $t0, $s5, 24 + st.d $t0, $sp, 72 # 8-byte Folded Spill + addi.d $t0, $s7, 24 + st.d $t0, $sp, 64 # 8-byte Folded Spill + lu12i.w $t0, -3 + ori $t0, $t0, 2688 + st.d $t0, $sp, 56 # 8-byte Folded Spill vldi $vr1, -800 - ori $t5, $zero, 1000 - ori $ra, $zero, 999 - lu52i.d $t0, $zero, 1022 - vreplgr2vr.d $vr2, $t0 + ori $ra, $zero, 1000 + ori $t1, $zero, 999 + lu52i.d $t5, $zero, 1022 + vreplgr2vr.d $vr2, $t5 .p2align 4, , 16 .LBB7_59: # %vector.ph276 # =>This Loop Header: Depth=1 @@ -889,24 +887,24 @@ main: # @main # Child Loop BB7_75 Depth 2 # Child Loop BB7_78 Depth 3 # Child Loop BB7_81 Depth 3 - ld.d $a7, $sp, 112 # 8-byte Folded Reload - st.d $t8, $sp, 104 # 8-byte Folded Spill - alsl.d $t0, $t8, $a7, 3 - vldrepl.d $vr4, $t0, 0 - ld.d $t0, $sp, 56 # 8-byte Folded Reload + ld.d $t0, $sp, 112 # 8-byte Folded Reload + st.d $s0, $sp, 104 # 8-byte Folded Spill + alsl.d $t5, $s0, $t0, 3 + vldrepl.d $vr3, $t5, 0 + ld.d $t5, $sp, 56 # 8-byte Folded Reload .p2align 4, , 16 .LBB7_60: # %vector.body279 # Parent Loop BB7_59 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t4, $s3, $t0 - vstx $vr4, $t4, $s8 - addi.d $t0, $t0, 32 - vstx $vr4, $t4, $a1 - bnez $t0, .LBB7_60 + add.d $t6, $s3, $t5 + vstx $vr3, $t6, $s8 + addi.d $t5, $t5, 32 + vstx $vr3, $t6, $a1 + bnez $t5, .LBB7_60 # %bb.61: # %.preheader76.i72.preheader # in Loop: Header=BB7_59 Depth=1 - ori $t0, $zero, 1 - move $t4, $s3 + ori $t5, $zero, 1 + move $t6, $s3 move $s0, $s7 ld.d $s2, $sp, 80 # 8-byte Folded Reload ld.d $s4, $sp, 88 # 8-byte Folded Reload @@ -914,66 +912,66 @@ main: # @main .p2align 4, , 16 .LBB7_62: # %middle.block273 # in Loop: Header=BB7_63 Depth=2 - addi.d $t0, $t0, 1 + addi.d $t5, $t5, 1 add.d $s4, $s4, $s8 add.d $s2, $s2, $s8 add.d $s0, $s0, $s8 - add.d $t4, $t4, $s8 - beq $t0, $t5, .LBB7_66 + add.d $t6, $t6, $s8 + beq $t5, $ra, .LBB7_66 .LBB7_63: # %.preheader76.i72 # Parent Loop BB7_59 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB7_65 Depth 3 # Child Loop BB7_64 Depth 3 - lu12i.w $a7, -3 - ori $t6, $a7, 2688 - beqz $t1, .LBB7_65 + lu12i.w $t0, -3 + ori $t7, $t0, 2688 + beqz $t2, .LBB7_65 .p2align 4, , 16 .LBB7_64: # %scalar.ph262 # Parent Loop BB7_59 Depth=1 # Parent Loop BB7_63 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $t7, $s0, $t6 - fldx.d $fa4, $t7, $s6 - fldx.d $fa5, $t7, $s8 - add.d $t7, $t4, $t6 - fldx.d $fa6, $t7, $s6 - fsub.d $fa4, $fa4, $fa5 - fmul.d $fa4, $fa4, $fa1 - fadd.d $fa4, $fa6, $fa4 - addi.d $t6, $t6, 8 - fstx.d $fa4, $t7, $s6 - bnez $t6, .LBB7_64 + add.d $t0, $s0, $t7 + fldx.d $fa3, $t0, $s6 + fldx.d $fa4, $t0, $s8 + add.d $t0, $t6, $t7 + fldx.d $fa5, $t0, $s6 + fsub.d $fa3, $fa3, $fa4 + fmul.d $fa3, $fa3, $fa1 + fadd.d $fa3, $fa5, $fa3 + addi.d $t7, $t7, 8 + fstx.d $fa3, $t0, $s6 + bnez $t7, .LBB7_64 b .LBB7_62 .p2align 4, , 16 .LBB7_65: # %vector.body264 # Parent Loop BB7_59 Depth=1 # Parent Loop BB7_63 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $t7, $s4, $t6 - add.d $t8, $s2, $t6 - vldx $vr4, $t8, $a3 - vldx $vr5, $t8, $s8 - vld $vr6, $t8, -16 - vldx $vr7, $s2, $t6 - vldx $vr8, $t7, $a3 - vldx $vr9, $t7, $s8 + add.d $t8, $s4, $t7 + add.d $t0, $s2, $t7 + vldx $vr3, $t0, $a4 + vldx $vr4, $t0, $s8 + vld $vr5, $t0, -16 + vldx $vr6, $s2, $t7 + vldx $vr7, $t8, $a4 + vldx $vr8, $t8, $s8 + vfsub.d $vr3, $vr3, $vr5 vfsub.d $vr4, $vr4, $vr6 - vfsub.d $vr5, $vr5, $vr7 + vfmul.d $vr3, $vr3, $vr2 vfmul.d $vr4, $vr4, $vr2 - vfmul.d $vr5, $vr5, $vr2 + vfsub.d $vr3, $vr7, $vr3 vfsub.d $vr4, $vr8, $vr4 - vfsub.d $vr5, $vr9, $vr5 - vstx $vr4, $t7, $a3 - addi.d $t6, $t6, 32 - vstx $vr5, $t7, $s8 - bnez $t6, .LBB7_65 + vstx $vr3, $t8, $a4 + addi.d $t7, $t7, 32 + vstx $vr4, $t8, $s8 + bnez $t7, .LBB7_65 b .LBB7_62 .p2align 4, , 16 .LBB7_66: # %.preheader75.i79.preheader # in Loop: Header=BB7_59 Depth=1 - move $t0, $zero - move $t4, $zero + move $t5, $zero + move $t6, $zero ld.d $s0, $sp, 64 # 8-byte Folded Reload ld.d $s2, $sp, 72 # 8-byte Folded Reload .p2align 4, , 16 @@ -982,69 +980,69 @@ main: # @main # => This Loop Header: Depth=2 # Child Loop BB7_69 Depth 3 # Child Loop BB7_72 Depth 3 - ori $t7, $zero, 1 - bnez $t2, .LBB7_71 + ori $t8, $zero, 1 + bnez $t3, .LBB7_71 # %bb.68: # %vector.body244.preheader # in Loop: Header=BB7_67 Depth=2 - lu12i.w $a7, -3 - ori $t6, $a7, 2720 + lu12i.w $t0, -3 + ori $t7, $t0, 2720 .p2align 4, , 16 .LBB7_69: # %vector.body244 # Parent Loop BB7_59 Depth=1 # Parent Loop BB7_67 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $t7, $s2, $t6 - add.d $t8, $s0, $t6 - vldx $vr4, $t8, $a4 - vldx $vr5, $t8, $a5 + add.d $t0, $s2, $t7 + add.d $t8, $s0, $t7 + vldx $vr3, $t8, $a5 + vldx $vr4, $t8, $a6 ori $s4, $a0, 1352 - vldx $vr6, $t8, $s4 - vldx $vr7, $t8, $a6 - vldx $vr8, $t7, $a4 - vldx $vr9, $t7, $a5 + vldx $vr5, $t8, $s4 + vldx $vr6, $t8, $a7 + vldx $vr7, $t0, $a5 + vldx $vr8, $t0, $a6 + vfsub.d $vr3, $vr3, $vr5 vfsub.d $vr4, $vr4, $vr6 - vfsub.d $vr5, $vr5, $vr7 + vfmul.d $vr3, $vr3, $vr2 vfmul.d $vr4, $vr4, $vr2 - vfmul.d $vr5, $vr5, $vr2 + vfsub.d $vr3, $vr7, $vr3 vfsub.d $vr4, $vr8, $vr4 - vfsub.d $vr5, $vr9, $vr5 - vstx $vr4, $t7, $a4 - addi.d $t6, $t6, 32 - vstx $vr5, $t7, $a5 - bnez $t6, .LBB7_69 + vstx $vr3, $t0, $a5 + addi.d $t7, $t7, 32 + vstx $vr4, $t0, $a6 + bnez $t7, .LBB7_69 # %bb.70: # in Loop: Header=BB7_67 Depth=2 - ori $t7, $zero, 1197 + ori $t8, $zero, 1197 .LBB7_71: # %scalar.ph242.preheader # in Loop: Header=BB7_67 Depth=2 - addi.d $t6, $t7, -1200 - alsl.d $t7, $t7, $t0, 3 + addi.d $t7, $t8, -1200 + alsl.d $t8, $t8, $t5, 3 .p2align 4, , 16 .LBB7_72: # %scalar.ph242 # Parent Loop BB7_59 Depth=1 # Parent Loop BB7_67 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $t8, $s7, $t7 - fldx.d $fa4, $s7, $t7 - fld.d $fa5, $t8, -8 - fldx.d $fa6, $s5, $t7 - fsub.d $fa4, $fa4, $fa5 - fmul.d $fa4, $fa4, $fa1 - fadd.d $fa4, $fa6, $fa4 - fstx.d $fa4, $s5, $t7 - addi.d $t6, $t6, 1 - addi.d $t7, $t7, 8 - bnez $t6, .LBB7_72 + add.d $t0, $s7, $t8 + fldx.d $fa3, $s7, $t8 + fld.d $fa4, $t0, -8 + fldx.d $fa5, $s5, $t8 + fsub.d $fa3, $fa3, $fa4 + fmul.d $fa3, $fa3, $fa1 + fadd.d $fa3, $fa5, $fa3 + fstx.d $fa3, $s5, $t8 + addi.d $t7, $t7, 1 + addi.d $t8, $t8, 8 + bnez $t7, .LBB7_72 # %bb.73: # in Loop: Header=BB7_67 Depth=2 - addi.d $t4, $t4, 1 + addi.d $t6, $t6, 1 add.d $s2, $s2, $s8 add.d $s0, $s0, $s8 - add.d $t0, $t0, $s8 - bne $t4, $t5, .LBB7_67 + add.d $t5, $t5, $s8 + bne $t6, $ra, .LBB7_67 # %bb.74: # %.preheader.i86.preheader # in Loop: Header=BB7_59 Depth=1 move $s0, $zero - move $t0, $zero - ld.d $t4, $sp, 96 # 8-byte Folded Reload + move $t5, $zero + ld.d $t6, $sp, 96 # 8-byte Folded Reload move $s2, $s5 move $s4, $s7 .p2align 4, , 16 @@ -1053,101 +1051,102 @@ main: # @main # => This Loop Header: Depth=2 # Child Loop BB7_78 Depth 3 # Child Loop BB7_81 Depth 3 - beqz $t3, .LBB7_77 + beqz $t4, .LBB7_77 # %bb.76: # in Loop: Header=BB7_75 Depth=2 move $t7, $zero b .LBB7_80 .p2align 4, , 16 .LBB7_77: # %vector.body225.preheader # in Loop: Header=BB7_75 Depth=2 - lu12i.w $a7, -3 - ori $t6, $a7, 2704 + lu12i.w $t0, -3 + ori $t7, $t0, 2704 .p2align 4, , 16 .LBB7_78: # %vector.body225 # Parent Loop BB7_59 Depth=1 # Parent Loop BB7_75 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $t7, $s4, $t6 - add.d $t8, $s2, $t6 - ori $a7, $a0, 1400 - vldx $vr4, $t8, $a7 - vldx $vr5, $t8, $a3 - add.d $a7, $t4, $t6 - vldx $vr6, $a7, $a3 - vld $vr7, $a7, -16 - vfsub.d $vr4, $vr4, $vr5 - vldx $vr5, $t7, $a3 - vfadd.d $vr4, $vr4, $vr6 - vfsub.d $vr4, $vr4, $vr7 - vfmul.d $vr4, $vr4, $vr0 - vfadd.d $vr4, $vr5, $vr4 - addi.d $t6, $t6, 16 - vstx $vr4, $t7, $a3 - bnez $t6, .LBB7_78 + add.d $t0, $s2, $t7 + ori $t8, $a0, 1400 + vldx $vr3, $t0, $t8 + vldx $vr4, $t0, $a4 + add.d $t0, $t6, $t7 + vldx $vr5, $t0, $a4 + vfsub.d $vr3, $vr3, $vr4 + vld $vr4, $t0, -16 + add.d $t0, $s4, $t7 + vfadd.d $vr3, $vr3, $vr5 + vldx $vr5, $t0, $a4 + vfsub.d $vr3, $vr3, $vr4 + vreplgr2vr.d $vr4, $a3 + vfmul.d $vr3, $vr3, $vr4 + vfadd.d $vr3, $vr5, $vr3 + addi.d $t7, $t7, 16 + vstx $vr3, $t0, $a4 + bnez $t7, .LBB7_78 # %bb.79: # in Loop: Header=BB7_75 Depth=2 ori $t7, $zero, 1198 .LBB7_80: # %scalar.ph223.preheader # in Loop: Header=BB7_75 Depth=2 - alsl.d $t6, $t7, $s0, 3 + alsl.d $t8, $t7, $s0, 3 addi.d $t7, $t7, -1199 .p2align 4, , 16 .LBB7_81: # %scalar.ph223 # Parent Loop BB7_59 Depth=1 # Parent Loop BB7_75 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $a7, $s5, $t6 - fld.d $fa4, $a7, 8 - fldx.d $fa5, $s5, $t6 - add.d $a7, $s3, $t6 - fldx.d $fa6, $a7, $s8 - fldx.d $fa7, $s3, $t6 - fsub.d $fa4, $fa4, $fa5 - fldx.d $fa5, $s7, $t6 - fadd.d $fa4, $fa4, $fa6 - fsub.d $fa4, $fa4, $fa7 - fmul.d $fa4, $fa4, $fa3 - fadd.d $fa4, $fa5, $fa4 - fstx.d $fa4, $s7, $t6 + add.d $t0, $s5, $t8 + fld.d $fa3, $t0, 8 + fldx.d $fa4, $s5, $t8 + add.d $t0, $s3, $t8 + fldx.d $fa5, $t0, $s8 + fldx.d $fa6, $s3, $t8 + fsub.d $fa3, $fa3, $fa4 + fldx.d $fa4, $s7, $t8 + fadd.d $fa3, $fa3, $fa5 + fsub.d $fa3, $fa3, $fa6 + fmul.d $fa3, $fa3, $fa0 + fadd.d $fa3, $fa4, $fa3 + fstx.d $fa3, $s7, $t8 addi.d $t7, $t7, 1 - addi.d $t6, $t6, 8 + addi.d $t8, $t8, 8 bnez $t7, .LBB7_81 # %bb.82: # in Loop: Header=BB7_75 Depth=2 - addi.d $t0, $t0, 1 + addi.d $t5, $t5, 1 add.d $s4, $s4, $s8 add.d $s2, $s2, $s8 - add.d $t4, $t4, $s8 + add.d $t6, $t6, $s8 add.d $s0, $s0, $s8 - bne $t0, $ra, .LBB7_75 + bne $t5, $t1, .LBB7_75 # %bb.83: # in Loop: Header=BB7_59 Depth=1 - ld.d $t8, $sp, 104 # 8-byte Folded Reload - addi.d $t8, $t8, 1 - ori $a7, $zero, 500 - bne $t8, $a7, .LBB7_59 + ld.d $s0, $sp, 104 # 8-byte Folded Reload + addi.d $s0, $s0, 1 + ori $t0, $zero, 500 + bne $s0, $t0, .LBB7_59 # %bb.84: # %kernel_fdtd_2d_StrictFP.exit ori $a0, $a2, 2817 pcaddu18i $ra, %call36(malloc) jirl $ra, $ra, 0 - move $s2, $a0 + move $s4, $a0 stx.b $zero, $a0, $s6 - addi.d $s4, $a0, 7 + addi.d $a0, $a0, 7 + st.d $a0, $sp, 104 # 8-byte Folded Spill lu12i.w $a0, -3 ori $a0, $a0, 2688 - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 88 # 8-byte Folded Spill ori $s6, $zero, 3 pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) - st.d $a0, $sp, 104 # 8-byte Folded Spill + st.d $a0, $sp, 96 # 8-byte Folded Spill move $s0, $zero - move $a0, $zero + move $s2, $zero .LBB7_85: # %.preheader.i95 # =>This Loop Header: Depth=1 # Child Loop BB7_86 Depth 2 # Child Loop BB7_88 Depth 2 # Child Loop BB7_90 Depth 2 - st.d $a0, $sp, 88 # 8-byte Folded Spill add.d $a0, $s5, $s0 - move $a1, $s4 - ld.d $a2, $sp, 96 # 8-byte Folded Reload + ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a2, $sp, 88 # 8-byte Folded Reload .p2align 4, , 16 .LBB7_86: # Parent Loop BB7_85 Depth=1 # => This Inner Loop Header: Depth=2 @@ -1188,14 +1187,14 @@ main: # @main addi.d $a1, $a1, 16 bnez $a2, .LBB7_86 # %bb.87: # in Loop: Header=BB7_85 Depth=1 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a1, $a0, 0 - move $a0, $s2 + move $a0, $s4 pcaddu18i $ra, %call36(fputs) jirl $ra, $ra, 0 add.d $a0, $s3, $s0 - move $a1, $s4 - ld.d $a2, $sp, 96 # 8-byte Folded Reload + ld.d $a1, $sp, 104 # 8-byte Folded Reload + ld.d $a2, $sp, 88 # 8-byte Folded Reload .p2align 4, , 16 .LBB7_88: # Parent Loop BB7_85 Depth=1 # => This Inner Loop Header: Depth=2 @@ -1236,15 +1235,15 @@ main: # @main addi.d $a1, $a1, 16 bnez $a2, .LBB7_88 # %bb.89: # in Loop: Header=BB7_85 Depth=1 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a1, $a0, 0 - move $a0, $s2 + move $a0, $s4 pcaddu18i $ra, %call36(fputs) jirl $ra, $ra, 0 add.d $a0, $s7, $s0 lu12i.w $a1, -3 ori $a1, $a1, 2688 - move $a2, $s4 + ld.d $a2, $sp, 104 # 8-byte Folded Reload .p2align 4, , 16 .LBB7_90: # Parent Loop BB7_85 Depth=1 # => This Inner Loop Header: Depth=2 @@ -1285,18 +1284,17 @@ main: # @main addi.d $a2, $a2, 16 bnez $a1, .LBB7_90 # %bb.91: # in Loop: Header=BB7_85 Depth=1 - ld.d $a0, $sp, 104 # 8-byte Folded Reload + ld.d $a0, $sp, 96 # 8-byte Folded Reload ld.d $a1, $a0, 0 - move $a0, $s2 + move $a0, $s4 pcaddu18i $ra, %call36(fputs) jirl $ra, $ra, 0 - ld.d $a0, $sp, 88 # 8-byte Folded Reload - addi.d $a0, $a0, 1 + addi.d $s2, $s2, 1 add.d $s0, $s0, $s8 - ori $a1, $zero, 1000 - bne $a0, $a1, .LBB7_85 + ori $a0, $zero, 1000 + bne $s2, $a0, .LBB7_85 # %bb.92: # %print_array.exit - move $a0, $s2 + move $a0, $s4 pcaddu18i $ra, %call36(free) jirl $ra, $ra, 0 move $a0, $fp diff --git a/results/SingleSource/Benchmarks/Polybench/stencils/heat-3d/CMakeFiles/heat-3d.dir/heat-3d.s b/results/SingleSource/Benchmarks/Polybench/stencils/heat-3d/CMakeFiles/heat-3d.dir/heat-3d.s index 5656cae9..41f65a9f 100644 --- a/results/SingleSource/Benchmarks/Polybench/stencils/heat-3d/CMakeFiles/heat-3d.dir/heat-3d.s +++ b/results/SingleSource/Benchmarks/Polybench/stencils/heat-3d/CMakeFiles/heat-3d.dir/heat-3d.s @@ -111,10 +111,6 @@ polybench_alloc_data: # @polybench_alloc_data .LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI7_1: - .dword 0x405e000000000000 # double 120 .text .globl main .p2align 5 @@ -173,44 +169,46 @@ main: # @main ori $a3, $zero, 120 ori $a4, $zero, 31 vldi $vr0, -988 - pcalau12i $a5, %pc_hi20(.LCPI7_1) - fld.d $fa1, $a5, %pc_lo12(.LCPI7_1) - ori $a6, $zero, 960 + ori $a5, $zero, 0 + lu32i.d $a5, -131072 + lu52i.d $a6, $a5, 1029 + movgr2fr.d $fa1, $a6 + ori $a7, $zero, 960 lu12i.w $a5, 28 ori $s3, $a5, 512 - move $a7, $s1 - ori $t0, $zero, 120 - move $t1, $fp + move $t0, $s1 + ori $t1, $zero, 120 + move $t2, $fp b .LBB7_8 .p2align 4, , 16 .LBB7_7: # in Loop: Header=BB7_8 Depth=1 addi.d $a0, $a0, 1 add.d $a2, $a2, $s3 - add.d $t1, $t1, $s3 - addi.w $t0, $t0, 1 - add.d $a7, $a7, $s3 + add.d $t2, $t2, $s3 + addi.w $t1, $t1, 1 + add.d $t0, $t0, $s3 beq $a0, $a3, .LBB7_15 .LBB7_8: # %.preheader24.i # =>This Loop Header: Depth=1 # Child Loop BB7_10 Depth 2 # Child Loop BB7_14 Depth 3 # Child Loop BB7_12 Depth 3 - move $t2, $zero - addi.d $t3, $a0, 120 - move $t4, $a7 + move $t3, $zero + addi.d $t4, $a0, 120 move $t5, $t0 move $t6, $t1 - move $t7, $a2 + move $t7, $t2 + move $t8, $a2 b .LBB7_10 .p2align 4, , 16 .LBB7_9: # %middle.block # in Loop: Header=BB7_10 Depth=2 - addi.d $t2, $t2, 1 + addi.d $t3, $t3, 1 + addi.d $t8, $t8, 960 addi.d $t7, $t7, 960 - addi.d $t6, $t6, 960 - addi.w $t5, $t5, 1 - addi.d $t4, $t4, 960 - beq $t2, $a3, .LBB7_7 + addi.w $t6, $t6, 1 + addi.d $t5, $t5, 960 + beq $t3, $a3, .LBB7_7 .LBB7_10: # %.preheader.i # Parent Loop BB7_8 Depth=1 # => This Loop Header: Depth=2 @@ -219,31 +217,31 @@ main: # @main bltu $a4, $a1, .LBB7_13 # %bb.11: # %scalar.ph.preheader # in Loop: Header=BB7_10 Depth=2 - move $t8, $zero - move $s0, $t5 + move $s0, $zero + move $s2, $t6 .p2align 4, , 16 .LBB7_12: # %scalar.ph # Parent Loop BB7_8 Depth=1 # Parent Loop BB7_10 Depth=2 # => This Inner Loop Header: Depth=3 - movgr2fr.w $fa2, $s0 + movgr2fr.w $fa2, $s2 ffint.d.w $fa2, $fa2 fmul.d $fa2, $fa2, $fa0 fdiv.d $fa2, $fa2, $fa1 - fstx.d $fa2, $t4, $t8 - fstx.d $fa2, $t6, $t8 - addi.d $t8, $t8, 8 - addi.w $s0, $s0, -1 - bne $t8, $a6, .LBB7_12 + fstx.d $fa2, $t5, $s0 + fstx.d $fa2, $t7, $s0 + addi.d $s0, $s0, 8 + addi.w $s2, $s2, -1 + bne $s0, $a7, .LBB7_12 b .LBB7_9 .p2align 4, , 16 .LBB7_13: # %vector.ph # in Loop: Header=BB7_10 Depth=2 - pcalau12i $t8, %pc_hi20(.LCPI7_0) - vld $vr2, $t8, %pc_lo12(.LCPI7_0) - move $t8, $zero - add.d $s0, $t3, $t2 - vreplgr2vr.d $vr3, $s0 + pcalau12i $s0, %pc_hi20(.LCPI7_0) + vld $vr2, $s0, %pc_lo12(.LCPI7_0) + move $s0, $zero + add.d $s2, $t4, $t3 + vreplgr2vr.d $vr3, $s2 .p2align 4, , 16 .LBB7_14: # %vector.body # Parent Loop BB7_8 Depth=1 @@ -253,27 +251,24 @@ main: # @main vsubi.du $vr5, $vr4, 2 vffint.d.l $vr4, $vr4 vffint.d.l $vr5, $vr5 - ori $s0, $zero, 0 - lu32i.d $s0, 262144 - lu52i.d $s0, $s0, 1026 - vreplgr2vr.d $vr6, $s0 + ori $s2, $zero, 0 + lu32i.d $s2, 262144 + lu52i.d $s2, $s2, 1026 + vreplgr2vr.d $vr6, $s2 vfmul.d $vr4, $vr4, $vr6 vfmul.d $vr5, $vr5, $vr6 - ori $s0, $zero, 0 - lu32i.d $s0, -131072 - lu52i.d $s0, $s0, 1029 - vreplgr2vr.d $vr6, $s0 + vreplgr2vr.d $vr6, $a6 vfdiv.d $vr4, $vr4, $vr6 vfdiv.d $vr5, $vr5, $vr6 - add.d $s0, $t7, $t8 - vst $vr4, $s0, -16 - vstx $vr5, $t7, $t8 - add.d $s0, $t6, $t8 - vstx $vr4, $t6, $t8 - vst $vr5, $s0, 16 - addi.d $t8, $t8, 32 + add.d $s2, $t8, $s0 + vst $vr4, $s2, -16 + vstx $vr5, $t8, $s0 + add.d $s2, $t7, $s0 + vstx $vr4, $t7, $s0 + vst $vr5, $s2, 16 + addi.d $s0, $s0, 32 vaddi.du $vr2, $vr2, 4 - bne $t8, $a6, .LBB7_14 + bne $s0, $a7, .LBB7_14 b .LBB7_9 .LBB7_15: # %.preheader117.i.preheader lu12i.w $a0, 56 diff --git a/results/SingleSource/Benchmarks/Polybench/stencils/jacobi-1d/CMakeFiles/jacobi-1d.dir/jacobi-1d.s b/results/SingleSource/Benchmarks/Polybench/stencils/jacobi-1d/CMakeFiles/jacobi-1d.dir/jacobi-1d.s index 7d3b1a68..1d72c429 100644 --- a/results/SingleSource/Benchmarks/Polybench/stencils/jacobi-1d/CMakeFiles/jacobi-1d.dir/jacobi-1d.s +++ b/results/SingleSource/Benchmarks/Polybench/stencils/jacobi-1d/CMakeFiles/jacobi-1d.dir/jacobi-1d.s @@ -106,16 +106,7 @@ polybench_alloc_data: # @polybench_alloc_data .Lfunc_end6: .size polybench_alloc_data, .Lfunc_end6-polybench_alloc_data # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI7_0: - .dword 0x409f400000000000 # double 2000 -.LCPI7_1: - .dword 0x3fd555475a31a4be # double 0.33333000000000002 -.LCPI7_2: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -130,38 +121,37 @@ main: # @main st.d $s4, $sp, 40 # 8-byte Folded Spill st.d $s5, $sp, 32 # 8-byte Folded Spill st.d $s6, $sp, 24 # 8-byte Folded Spill - st.d $s7, $sp, 16 # 8-byte Folded Spill - st.d $zero, $sp, 8 + st.d $zero, $sp, 16 lu12i.w $s4, 3 ori $s2, $s4, 3712 lu12i.w $a1, 1 - addi.d $a0, $sp, 8 + addi.d $a0, $sp, 16 move $a2, $s2 pcaddu18i $ra, %call36(posix_memalign) jirl $ra, $ra, 0 - ld.d $fp, $sp, 8 + ld.d $fp, $sp, 16 beqz $fp, .LBB7_52 # %bb.1: bnez $a0, .LBB7_52 # %bb.2: # %polybench_alloc_data.exit - st.d $zero, $sp, 8 + st.d $zero, $sp, 16 lu12i.w $a1, 1 - addi.d $a0, $sp, 8 + addi.d $a0, $sp, 16 move $a2, $s2 pcaddu18i $ra, %call36(posix_memalign) jirl $ra, $ra, 0 - ld.d $s0, $sp, 8 + ld.d $s0, $sp, 16 beqz $s0, .LBB7_52 # %bb.3: # %polybench_alloc_data.exit bnez $a0, .LBB7_52 # %bb.4: # %polybench_alloc_data.exit23 - st.d $zero, $sp, 8 + st.d $zero, $sp, 16 lu12i.w $a1, 1 - addi.d $a0, $sp, 8 + addi.d $a0, $sp, 16 move $a2, $s2 pcaddu18i $ra, %call36(posix_memalign) jirl $ra, $ra, 0 - ld.d $s1, $sp, 8 + ld.d $s1, $sp, 16 beqz $s1, .LBB7_52 # %bb.5: # %polybench_alloc_data.exit23 bnez $a0, .LBB7_52 @@ -169,12 +159,14 @@ main: # @main sub.d $a0, $s1, $fp ori $a1, $zero, 16 lu12i.w $s3, -4 - pcalau12i $a6, %pc_hi20(.LCPI7_0) bgeu $a0, $a1, .LBB7_9 # %bb.7: # %scalar.ph.preheader - fld.d $fa0, $a6, %pc_lo12(.LCPI7_0) ori $a0, $zero, 3 ori $a1, $s3, 384 + ori $a2, $zero, 0 + lu32i.d $a2, -49152 + lu52i.d $a2, $a2, 1033 + movgr2fr.d $fa0, $a2 .p2align 4, , 16 .LBB7_8: # %scalar.ph # =>This Inner Loop Header: Depth=1 @@ -236,32 +228,31 @@ main: # @main vstx $vr2, $a1, $s2 bnez $a0, .LBB7_10 .LBB7_11: # %.preheader26.i.preheader - move $t2, $zero + move $t1, $zero addi.d $a1, $fp, 8 ori $a0, $s4, 3704 add.d $a2, $fp, $a0 - add.d $a7, $s1, $s2 - addi.d $t0, $s1, 8 - add.d $t1, $s1, $a0 + add.d $a6, $s1, $s2 + addi.d $a7, $s1, 8 + add.d $t0, $s1, $a0 add.d $a3, $fp, $s2 - sltu $a3, $t0, $a3 - sltu $a4, $fp, $t1 - and $t3, $a3, $a4 - sltu $a1, $a1, $a7 + sltu $a3, $a7, $a3 + sltu $a4, $fp, $t0 + and $t2, $a3, $a4 + sltu $a1, $a1, $a6 sltu $a2, $s1, $a2 - and $t4, $a1, $a2 + and $t3, $a1, $a2 addi.d $a1, $s1, 24 - addi.d $t5, $fp, 24 - pcalau12i $a2, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a2, %pc_lo12(.LCPI7_1) - ori $t6, $zero, 500 - ori $t7, $s3, 416 + addi.d $t4, $fp, 24 + lu12i.w $a2, 369434 + ori $a2, $a2, 1214 + lu32i.d $a2, 349511 + lu52i.d $a4, $a2, 1021 + movgr2fr.d $fa0, $a4 + ori $t5, $zero, 500 + ori $t6, $s3, 416 ori $a2, $s4, 3680 ori $a3, $s4, 3696 - lu12i.w $a4, 369434 - ori $a4, $a4, 1214 - lu32i.d $a4, 349511 - lu52i.d $a4, $a4, 1021 vreplgr2vr.d $vr1, $a4 ori $a4, $s4, 3688 ori $a5, $s4, 3664 @@ -272,122 +263,125 @@ main: # @main # Child Loop BB7_17 Depth 2 # Child Loop BB7_20 Depth 2 # Child Loop BB7_23 Depth 2 - ori $s6, $zero, 1 - bnez $t3, .LBB7_16 + ori $s5, $zero, 1 + bnez $t2, .LBB7_16 # %bb.13: # %vector.body96.preheader # in Loop: Header=BB7_12 Depth=1 - move $t8, $t7 + move $t7, $t6 .p2align 4, , 16 .LBB7_14: # %vector.body96 # Parent Loop BB7_12 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $s4, $fp, $t8 - vldx $vr2, $s4, $a2 - vldx $vr3, $s4, $a3 - vldx $vr4, $s4, $a4 - vldx $vr5, $s4, $a0 - vldx $vr6, $s4, $s2 + add.d $t8, $fp, $t7 + vldx $vr2, $t8, $a2 + vldx $vr3, $t8, $a3 + vldx $vr4, $t8, $a4 + vldx $vr5, $t8, $a0 + vldx $vr6, $t8, $s2 vfadd.d $vr2, $vr2, $vr4 vfadd.d $vr4, $vr3, $vr5 vfadd.d $vr2, $vr2, $vr3 vfadd.d $vr3, $vr4, $vr6 vfmul.d $vr2, $vr2, $vr1 vfmul.d $vr3, $vr3, $vr1 - add.d $s4, $a1, $t8 - vstx $vr2, $s4, $a5 - addi.d $t8, $t8, 32 - vstx $vr3, $s4, $a2 - bnez $t8, .LBB7_14 + add.d $t8, $a1, $t7 + vstx $vr2, $t8, $a5 + addi.d $t7, $t7, 32 + vstx $vr3, $t8, $a2 + bnez $t7, .LBB7_14 # %bb.15: # in Loop: Header=BB7_12 Depth=1 - ori $s6, $zero, 1997 + ori $s5, $zero, 1997 .LBB7_16: # %scalar.ph94.preheader # in Loop: Header=BB7_12 Depth=1 - move $t8, $zero - slli.d $s4, $s6, 3 - alsl.d $s5, $s6, $s1, 3 - alsl.d $s6, $s6, $fp, 3 + move $t7, $zero + slli.d $t8, $s5, 3 + alsl.d $s4, $s5, $s1, 3 + alsl.d $s5, $s5, $fp, 3 .p2align 4, , 16 .LBB7_17: # %scalar.ph94 # Parent Loop BB7_12 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $s7, $s6, $t8 - fld.d $fa2, $s7, -8 - fldx.d $fa3, $s6, $t8 - fld.d $fa4, $s7, 8 + add.d $s6, $s5, $t7 + fld.d $fa2, $s6, -8 + fldx.d $fa3, $s5, $t7 + fld.d $fa4, $s6, 8 fadd.d $fa2, $fa2, $fa3 fadd.d $fa2, $fa2, $fa4 fmul.d $fa2, $fa2, $fa0 - fstx.d $fa2, $s5, $t8 - addi.d $t8, $t8, 8 - add.d $s7, $s4, $t8 - bne $s7, $a0, .LBB7_17 + fstx.d $fa2, $s4, $t7 + addi.d $t7, $t7, 8 + add.d $s6, $t8, $t7 + bne $s6, $a0, .LBB7_17 # %bb.18: # %vector.memcheck72 # in Loop: Header=BB7_12 Depth=1 - ori $s6, $zero, 1 - bnez $t4, .LBB7_22 + ori $s5, $zero, 1 + bnez $t3, .LBB7_22 # %bb.19: # %vector.body77.preheader # in Loop: Header=BB7_12 Depth=1 - move $t8, $t7 + move $t7, $t6 .p2align 4, , 16 .LBB7_20: # %vector.body77 # Parent Loop BB7_12 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $s4, $s1, $t8 - vldx $vr2, $s4, $a2 - vldx $vr3, $s4, $a3 - vldx $vr4, $s4, $a4 - vldx $vr5, $s4, $a0 - vldx $vr6, $s4, $s2 + add.d $t8, $s1, $t7 + vldx $vr2, $t8, $a2 + vldx $vr3, $t8, $a3 + vldx $vr4, $t8, $a4 + vldx $vr5, $t8, $a0 + vldx $vr6, $t8, $s2 vfadd.d $vr2, $vr2, $vr4 vfadd.d $vr4, $vr3, $vr5 vfadd.d $vr2, $vr2, $vr3 vfadd.d $vr3, $vr4, $vr6 vfmul.d $vr2, $vr2, $vr1 vfmul.d $vr3, $vr3, $vr1 - add.d $s4, $t5, $t8 - vstx $vr2, $s4, $a5 - addi.d $t8, $t8, 32 - vstx $vr3, $s4, $a2 - bnez $t8, .LBB7_20 + add.d $t8, $t4, $t7 + vstx $vr2, $t8, $a5 + addi.d $t7, $t7, 32 + vstx $vr3, $t8, $a2 + bnez $t7, .LBB7_20 # %bb.21: # in Loop: Header=BB7_12 Depth=1 - ori $s6, $zero, 1997 + ori $s5, $zero, 1997 .LBB7_22: # %.preheader.i.preheader # in Loop: Header=BB7_12 Depth=1 - move $t8, $zero - slli.d $s4, $s6, 3 - alsl.d $s5, $s6, $fp, 3 - alsl.d $s6, $s6, $s1, 3 + move $t7, $zero + slli.d $t8, $s5, 3 + alsl.d $s4, $s5, $fp, 3 + alsl.d $s5, $s5, $s1, 3 .p2align 4, , 16 .LBB7_23: # %.preheader.i # Parent Loop BB7_12 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $s7, $s6, $t8 - fld.d $fa2, $s7, -8 - fldx.d $fa3, $s6, $t8 - fld.d $fa4, $s7, 8 + add.d $s6, $s5, $t7 + fld.d $fa2, $s6, -8 + fldx.d $fa3, $s5, $t7 + fld.d $fa4, $s6, 8 fadd.d $fa2, $fa2, $fa3 fadd.d $fa2, $fa2, $fa4 fmul.d $fa2, $fa2, $fa0 - fstx.d $fa2, $s5, $t8 - addi.d $t8, $t8, 8 - add.d $s7, $s4, $t8 - bne $s7, $a0, .LBB7_23 + fstx.d $fa2, $s4, $t7 + addi.d $t7, $t7, 8 + add.d $s6, $t8, $t7 + bne $s6, $a0, .LBB7_23 # %bb.24: # in Loop: Header=BB7_12 Depth=1 - addi.w $t2, $t2, 1 - bne $t2, $t6, .LBB7_12 + addi.w $t1, $t1, 1 + bne $t1, $t5, .LBB7_12 # %bb.25: # %vector.memcheck108 - sub.d $t2, $s1, $s0 - ori $t3, $zero, 16 - bgeu $t2, $t3, .LBB7_28 + sub.d $t1, $s1, $s0 + ori $t2, $zero, 16 + bgeu $t1, $t2, .LBB7_28 # %bb.26: # %kernel_jacobi_1d.exit.preheader - fld.d $fa2, $a6, %pc_lo12(.LCPI7_0) - ori $a6, $zero, 3 + ori $t1, $zero, 3 ori $t2, $s3, 384 + ori $t3, $zero, 0 + lu32i.d $t3, -49152 + lu52i.d $t3, $t3, 1033 + movgr2fr.d $fa2, $t3 .p2align 4, , 16 .LBB7_27: # %kernel_jacobi_1d.exit # =>This Inner Loop Header: Depth=1 - bstrpick.d $t3, $a6, 31, 0 - addi.d $t4, $a6, -1 + bstrpick.d $t3, $t1, 31, 0 + addi.d $t4, $t1, -1 bstrpick.d $t4, $t4, 31, 0 movgr2fr.d $fa3, $t4 ffint.d.l $fa3, $fa3 @@ -400,15 +394,15 @@ main: # @main add.d $t3, $s1, $t2 fstx.d $fa3, $t3, $s2 addi.d $t2, $t2, 8 - addi.w $a6, $a6, 1 + addi.w $t1, $t1, 1 bnez $t2, .LBB7_27 b .LBB7_30 .LBB7_28: # %vector.body112.preheader ori $t2, $zero, 0 - ori $a6, $zero, 0 - lu32i.d $a6, 1 - vreplgr2vr.d $vr3, $a6 - ori $a6, $s3, 384 + ori $t1, $zero, 0 + lu32i.d $t1, 1 + vreplgr2vr.d $vr3, $t1 + ori $t1, $s3, 384 lu32i.d $t2, -49152 lu52i.d $t2, $t2, 1033 vreplgr2vr.d $vr2, $t2 @@ -427,7 +421,7 @@ main: # @main ffint.d.l $fa6, $fa6 vextrins.d $vr6, $vr5, 16 vfdiv.d $vr5, $vr6, $vr2 - add.d $t2, $s0, $a6 + add.d $t2, $s0, $t1 vstx $vr5, $t2, $s2 vpickve2gr.w $t2, $vr4, 1 bstrpick.d $t2, $t2, 31, 0 @@ -439,22 +433,22 @@ main: # @main ffint.d.l $fa4, $fa4 vextrins.d $vr4, $vr5, 16 vfdiv.d $vr4, $vr4, $vr2 - add.d $t2, $s1, $a6 - addi.d $a6, $a6, 16 + add.d $t2, $s1, $t1 + addi.d $t1, $t1, 16 vstx $vr4, $t2, $s2 - bnez $a6, .LBB7_29 + bnez $t1, .LBB7_29 .LBB7_30: # %.preheader26.i33.preheader - move $a6, $zero + move $t1, $zero addi.d $t2, $s0, 8 add.d $t3, $s0, $a0 add.d $t4, $s0, $s2 - sltu $t0, $t0, $t4 - sltu $t1, $s0, $t1 - and $t0, $t0, $t1 - sltu $a7, $t2, $a7 - sltu $t1, $s1, $t3 - and $a7, $a7, $t1 - addi.d $t1, $s0, 24 + sltu $a7, $a7, $t4 + sltu $t0, $s0, $t0 + and $a7, $a7, $t0 + sltu $a6, $t2, $a6 + sltu $t0, $s1, $t3 + and $a6, $a6, $t0 + addi.d $t0, $s0, 24 ori $t2, $zero, 500 ori $t3, $s3, 416 .p2align 4, , 16 @@ -465,7 +459,7 @@ main: # @main # Child Loop BB7_39 Depth 2 # Child Loop BB7_42 Depth 2 ori $t7, $zero, 1 - bnez $t0, .LBB7_35 + bnez $a7, .LBB7_35 # %bb.32: # %vector.body149.preheader # in Loop: Header=BB7_31 Depth=1 move $t4, $t3 @@ -516,7 +510,7 @@ main: # @main # %bb.37: # %vector.memcheck119 # in Loop: Header=BB7_31 Depth=1 ori $t7, $zero, 1 - bnez $a7, .LBB7_41 + bnez $a6, .LBB7_41 # %bb.38: # %vector.body128.preheader # in Loop: Header=BB7_31 Depth=1 move $t4, $t3 @@ -536,7 +530,7 @@ main: # @main vfadd.d $vr3, $vr4, $vr6 vfmul.d $vr2, $vr2, $vr1 vfmul.d $vr3, $vr3, $vr1 - add.d $t5, $t1, $t4 + add.d $t5, $t0, $t4 vstx $vr2, $t5, $a5 addi.d $t4, $t4, 32 vstx $vr3, $t5, $a2 @@ -565,13 +559,16 @@ main: # @main add.d $t8, $t5, $t4 bne $t8, $a0, .LBB7_42 # %bb.43: # in Loop: Header=BB7_31 Depth=1 - addi.w $a6, $a6, 1 - bne $a6, $t2, .LBB7_31 + addi.w $t1, $t1, 1 + bne $t1, $t2, .LBB7_31 # %bb.44: # %kernel_jacobi_1d_StrictFP.exit.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_2) move $a2, $zero ori $a0, $s3, 384 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a6, $a1, 1006 + movgr2fr.d $fa0, $a6 .p2align 4, , 16 .LBB7_45: # %kernel_jacobi_1d_StrictFP.exit # =>This Inner Loop Header: Depth=1 @@ -665,16 +662,11 @@ main: # @main movfr2gr.d $a5, $fa2 pcalau12i $a1, %pc_hi20(.L.str.2) addi.d $a1, $a1, %pc_lo12(.L.str.2) - lu12i.w $a4, -487882 - ori $a4, $a4, 2289 - lu32i.d $a4, 325813 - lu52i.d $a6, $a4, 1006 move $a4, $a2 pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 ori $a0, $zero, 1 .LBB7_51: - ld.d $s7, $sp, 16 # 8-byte Folded Reload ld.d $s6, $sp, 24 # 8-byte Folded Reload ld.d $s5, $sp, 32 # 8-byte Folded Reload ld.d $s4, $sp, 40 # 8-byte Folded Reload diff --git a/results/SingleSource/Benchmarks/Polybench/stencils/jacobi-2d/CMakeFiles/jacobi-2d.dir/jacobi-2d.s b/results/SingleSource/Benchmarks/Polybench/stencils/jacobi-2d/CMakeFiles/jacobi-2d.dir/jacobi-2d.s index c1e8b46a..82b22904 100644 --- a/results/SingleSource/Benchmarks/Polybench/stencils/jacobi-2d/CMakeFiles/jacobi-2d.dir/jacobi-2d.s +++ b/results/SingleSource/Benchmarks/Polybench/stencils/jacobi-2d/CMakeFiles/jacobi-2d.dir/jacobi-2d.s @@ -111,14 +111,6 @@ polybench_alloc_data: # @polybench_alloc_data .LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI7_1: - .dword 0x4094500000000000 # double 1300 -.LCPI7_2: - .dword 0x3fc999999999999a # double 0.20000000000000001 -.LCPI7_3: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 .text .globl main .p2align 5 @@ -180,17 +172,16 @@ main: # @main ori $a6, $zero, 15 lu12i.w $s4, -3 ori $a7, $s4, 1888 - pcalau12i $a3, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a3, %pc_lo12(.LCPI7_1) - lu12i.w $t3, 2 - ori $s3, $t3, 2208 - ori $t0, $zero, 1300 - pcalau12i $a3, %pc_hi20(.LCPI7_0) - st.d $a3, $sp, 40 # 8-byte Folded Spill - vld $vr1, $a3, %pc_lo12(.LCPI7_0) ori $a3, $zero, 0 lu32i.d $a3, 282624 lu52i.d $a3, $a3, 1033 + movgr2fr.d $fa0, $a3 + lu12i.w $t3, 2 + pcalau12i $t0, %pc_hi20(.LCPI7_0) + st.d $t0, $sp, 40 # 8-byte Folded Spill + vld $vr1, $t0, %pc_lo12(.LCPI7_0) + ori $s3, $t3, 2208 + ori $t0, $zero, 1300 vreplgr2vr.d $vr2, $a3 move $t1, $fp move $t2, $s1 @@ -309,14 +300,13 @@ main: # @main add.d $a3, $fp, $t2 add.d $s6, $s1, $t2 ori $s7, $s4, 1904 - ori $s8, $zero, 1299 lu12i.w $a0, -419431 ori $a0, $a0, 2458 lu32i.d $a0, -419431 lu52i.d $a0, $a0, 1020 - vreplgr2vr.d $vr1, $a0 + movgr2fr.d $fa0, $a0 + ori $s8, $zero, 1299 ori $ra, $zero, 500 - pcalau12i $a2, %pc_hi20(.LCPI7_2) b .LBB7_15 .p2align 4, , 16 .LBB7_14: # in Loop: Header=BB7_15 Depth=1 @@ -346,8 +336,8 @@ main: # @main # => This Loop Header: Depth=2 # Child Loop BB7_21 Depth 3 # Child Loop BB7_19 Depth 3 - ori $a0, $t3, 2192 - ori $a1, $s4, 1872 + ori $a1, $t3, 2192 + ori $a2, $s4, 1872 beqz $t7, .LBB7_20 # %bb.18: # %scalar.ph113.preheader # in Loop: Header=BB7_17 Depth=2 @@ -358,20 +348,19 @@ main: # @main # Parent Loop BB7_17 Depth=2 # => This Inner Loop Header: Depth=3 add.d $t1, $s2, $a6 - fld.d $fa2, $t1, -16 - fld.d $fa3, $t1, -24 - fadd.d $fa2, $fa2, $fa3 + fld.d $fa1, $t1, -16 + fld.d $fa2, $t1, -24 fld.d $fa3, $t1, -8 - fldx.d $fa4, $t1, $a0 - fldx.d $fa5, $t1, $a1 - fld.d $fa6, $a2, %pc_lo12(.LCPI7_2) - fadd.d $fa2, $fa2, $fa3 - fadd.d $fa2, $fa2, $fa4 - fadd.d $fa2, $fa2, $fa5 - fmul.d $fa2, $fa2, $fa6 + fldx.d $fa4, $t1, $a1 + fldx.d $fa5, $t1, $a2 + fadd.d $fa1, $fa1, $fa2 + fadd.d $fa1, $fa1, $fa3 + fadd.d $fa1, $fa1, $fa4 + fadd.d $fa1, $fa1, $fa5 + fmul.d $fa1, $fa1, $fa0 add.d $t1, $t6, $a6 addi.d $a6, $a6, 8 - fstx.d $fa2, $t1, $a0 + fstx.d $fa1, $t1, $a1 bnez $a6, .LBB7_19 b .LBB7_16 .p2align 4, , 16 @@ -384,35 +373,36 @@ main: # @main # Parent Loop BB7_17 Depth=2 # => This Inner Loop Header: Depth=3 add.d $t1, $s2, $a6 - vld $vr2, $t1, -16 - vld $vr3, $t1, -24 - vld $vr4, $t1, -8 - vldx $vr5, $t1, $a0 - vldx $vr6, $t1, $a1 - vfadd.d $vr2, $vr2, $vr3 - vfadd.d $vr2, $vr2, $vr4 - vfadd.d $vr2, $vr2, $vr5 - vfadd.d $vr2, $vr2, $vr6 - vfmul.d $vr2, $vr2, $vr1 + vld $vr1, $t1, -16 + vld $vr2, $t1, -24 + vld $vr3, $t1, -8 + vldx $vr4, $t1, $a1 + vldx $vr5, $t1, $a2 + vfadd.d $vr1, $vr1, $vr2 + vfadd.d $vr1, $vr1, $vr3 + vfadd.d $vr1, $vr1, $vr4 + vfadd.d $vr1, $vr1, $vr5 + vreplgr2vr.d $vr2, $a0 + vfmul.d $vr1, $vr1, $vr2 add.d $t1, $t6, $a6 addi.d $a6, $a6, 16 - vstx $vr2, $t1, $a0 + vstx $vr1, $t1, $a1 bnez $a6, .LBB7_21 b .LBB7_16 .p2align 4, , 16 .LBB7_22: # %.preheader.i29.preheader # in Loop: Header=BB7_15 Depth=1 - ori $t6, $zero, 1 - move $s2, $s6 + ori $s2, $zero, 1 + move $t6, $s6 move $s5, $t5 b .LBB7_24 .p2align 4, , 16 .LBB7_23: # %middle.block103 # in Loop: Header=BB7_24 Depth=2 - addi.d $t6, $t6, 1 + addi.d $s2, $s2, 1 add.d $s5, $s5, $s3 - add.d $s2, $s2, $s3 - beq $t6, $s8, .LBB7_14 + add.d $t6, $t6, $s3 + beq $s2, $s8, .LBB7_14 .LBB7_24: # %.preheader.i29 # Parent Loop BB7_15 Depth=1 # => This Loop Header: Depth=2 @@ -425,21 +415,20 @@ main: # @main # Parent Loop BB7_15 Depth=1 # Parent Loop BB7_24 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $t1, $s2, $a6 - fld.d $fa2, $t1, -16 - fld.d $fa3, $t1, -24 - fadd.d $fa2, $fa2, $fa3 + add.d $t1, $t6, $a6 + fld.d $fa1, $t1, -16 + fld.d $fa2, $t1, -24 fld.d $fa3, $t1, -8 - fldx.d $fa4, $t1, $a0 - fldx.d $fa5, $t1, $a1 - fld.d $fa6, $a2, %pc_lo12(.LCPI7_2) - fadd.d $fa2, $fa2, $fa3 - fadd.d $fa2, $fa2, $fa4 - fadd.d $fa2, $fa2, $fa5 - fmul.d $fa2, $fa2, $fa6 + fldx.d $fa4, $t1, $a1 + fldx.d $fa5, $t1, $a2 + fadd.d $fa1, $fa1, $fa2 + fadd.d $fa1, $fa1, $fa3 + fadd.d $fa1, $fa1, $fa4 + fadd.d $fa1, $fa1, $fa5 + fmul.d $fa1, $fa1, $fa0 add.d $t1, $s5, $a6 addi.d $a6, $a6, 8 - fstx.d $fa2, $t1, $a0 + fstx.d $fa1, $t1, $a1 bnez $a6, .LBB7_25 b .LBB7_23 .p2align 4, , 16 @@ -447,20 +436,21 @@ main: # @main # Parent Loop BB7_15 Depth=1 # Parent Loop BB7_24 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $t1, $s2, $a6 - vld $vr2, $t1, -16 - vld $vr3, $t1, -24 - vld $vr4, $t1, -8 - vldx $vr5, $t1, $a0 - vldx $vr6, $t1, $a1 - vfadd.d $vr2, $vr2, $vr3 - vfadd.d $vr2, $vr2, $vr4 - vfadd.d $vr2, $vr2, $vr5 - vfadd.d $vr2, $vr2, $vr6 - vfmul.d $vr2, $vr2, $vr1 + add.d $t1, $t6, $a6 + vld $vr1, $t1, -16 + vld $vr2, $t1, -24 + vld $vr3, $t1, -8 + vldx $vr4, $t1, $a1 + vldx $vr5, $t1, $a2 + vfadd.d $vr1, $vr1, $vr2 + vfadd.d $vr1, $vr1, $vr3 + vfadd.d $vr1, $vr1, $vr4 + vfadd.d $vr1, $vr1, $vr5 + vreplgr2vr.d $vr2, $a0 + vfmul.d $vr1, $vr1, $vr2 add.d $t1, $s5, $a6 addi.d $a6, $a6, 16 - vstx $vr2, $t1, $a0 + vstx $vr1, $t1, $a1 bnez $a6, .LBB7_26 b .LBB7_23 .LBB7_27: # %.preheader.i30.preheader @@ -471,14 +461,15 @@ main: # @main ori $t7, $zero, 2 ori $t8, $zero, 15 ori $s2, $s4, 1888 - ori $s6, $zero, 1300 - ld.d $a3, $sp, 40 # 8-byte Folded Reload - vld $vr2, $a3, %pc_lo12(.LCPI7_0) ori $a3, $zero, 0 lu32i.d $a3, 282624 - lu52i.d $a3, $a3, 1033 - vreplgr2vr.d $vr3, $a3 - move $a3, $s0 + lu52i.d $a4, $a3, 1033 + ld.d $a3, $sp, 40 # 8-byte Folded Reload + vld $vr1, $a3, %pc_lo12(.LCPI7_0) + movgr2fr.d $fa2, $a4 + ori $a3, $zero, 1300 + vreplgr2vr.d $vr3, $a4 + move $s6, $s0 move $s7, $s1 b .LBB7_29 .p2align 4, , 16 @@ -486,11 +477,11 @@ main: # @main # in Loop: Header=BB7_29 Depth=1 addi.d $t4, $t4, 1 add.d $s7, $s7, $s3 - add.d $a3, $a3, $s3 + add.d $s6, $s6, $s3 addi.w $t6, $t6, 3 addi.d $t3, $t3, 1 addi.w $t7, $t7, 2 - beq $t4, $s6, .LBB7_34 + beq $t4, $a3, .LBB7_34 .LBB7_29: # %.preheader.i30 # =>This Loop Header: Depth=1 # Child Loop BB7_33 Depth 2 @@ -508,14 +499,14 @@ main: # @main bstrpick.d $t1, $t1, 31, 0 movgr2fr.d $fa4, $t1 ffint.d.l $fa4, $fa4 - fdiv.d $fa4, $fa4, $fa0 - add.d $t1, $a3, $a6 + fdiv.d $fa4, $fa4, $fa2 + add.d $t1, $s6, $a6 fstx.d $fa4, $t1, $s3 add.d $t1, $t6, $a4 bstrpick.d $t1, $t1, 31, 0 movgr2fr.d $fa4, $t1 ffint.d.l $fa4, $fa4 - fdiv.d $fa4, $fa4, $fa0 + fdiv.d $fa4, $fa4, $fa2 add.d $t1, $s7, $a6 fstx.d $fa4, $t1, $s3 addi.d $a6, $a6, 8 @@ -527,7 +518,7 @@ main: # @main # in Loop: Header=BB7_29 Depth=1 vreplgr2vr.d $vr4, $t4 move $a4, $s2 - vori.b $vr5, $vr2, 0 + vori.b $vr5, $vr1, 0 .p2align 4, , 16 .LBB7_33: # %vector.body132 # Parent Loop BB7_29 Depth=1 @@ -547,7 +538,7 @@ main: # @main ffint.d.l $fa7, $fa7 vextrins.d $vr7, $vr8, 16 vfdiv.d $vr7, $vr7, $vr3 - add.d $a6, $a3, $a4 + add.d $a6, $s6, $a4 vstx $vr7, $a6, $s3 vmul.d $vr6, $vr6, $vr4 vshuf4i.w $vr6, $vr6, 8 @@ -627,20 +618,19 @@ main: # @main # Parent Loop BB7_38 Depth=2 # => This Inner Loop Header: Depth=3 add.d $s2, $t6, $t8 - fld.d $fa0, $s2, -16 + fld.d $fa1, $s2, -16 fld.d $fa2, $s2, -24 - fadd.d $fa0, $fa0, $fa2 - fld.d $fa2, $s2, -8 - fldx.d $fa3, $s2, $a0 + fld.d $fa3, $s2, -8 fldx.d $fa4, $s2, $a1 - fld.d $fa5, $a2, %pc_lo12(.LCPI7_2) - fadd.d $fa0, $fa0, $fa2 - fadd.d $fa0, $fa0, $fa3 - fadd.d $fa0, $fa0, $fa4 - fmul.d $fa0, $fa0, $fa5 + fldx.d $fa5, $s2, $a2 + fadd.d $fa1, $fa1, $fa2 + fadd.d $fa1, $fa1, $fa3 + fadd.d $fa1, $fa1, $fa4 + fadd.d $fa1, $fa1, $fa5 + fmul.d $fa1, $fa1, $fa0 add.d $s2, $t7, $t8 addi.d $t8, $t8, 8 - fstx.d $fa0, $s2, $a0 + fstx.d $fa1, $s2, $a1 bnez $t8, .LBB7_40 b .LBB7_37 .p2align 4, , 16 @@ -653,19 +643,20 @@ main: # @main # Parent Loop BB7_38 Depth=2 # => This Inner Loop Header: Depth=3 add.d $s2, $t6, $t8 - vld $vr0, $s2, -16 + vld $vr1, $s2, -16 vld $vr2, $s2, -24 vld $vr3, $s2, -8 - vldx $vr4, $s2, $a0 - vldx $vr5, $s2, $a1 - vfadd.d $vr0, $vr0, $vr2 - vfadd.d $vr0, $vr0, $vr3 - vfadd.d $vr0, $vr0, $vr4 - vfadd.d $vr0, $vr0, $vr5 - vfmul.d $vr0, $vr0, $vr1 + vldx $vr4, $s2, $a1 + vldx $vr5, $s2, $a2 + vfadd.d $vr1, $vr1, $vr2 + vfadd.d $vr1, $vr1, $vr3 + vfadd.d $vr1, $vr1, $vr4 + vfadd.d $vr1, $vr1, $vr5 + vreplgr2vr.d $vr2, $a0 + vfmul.d $vr1, $vr1, $vr2 add.d $s2, $t7, $t8 addi.d $t8, $t8, 16 - vstx $vr0, $s2, $a0 + vstx $vr1, $s2, $a1 bnez $t8, .LBB7_42 b .LBB7_37 .p2align 4, , 16 @@ -695,20 +686,19 @@ main: # @main # Parent Loop BB7_45 Depth=2 # => This Inner Loop Header: Depth=3 add.d $s2, $t6, $t8 - fld.d $fa0, $s2, -16 + fld.d $fa1, $s2, -16 fld.d $fa2, $s2, -24 - fadd.d $fa0, $fa0, $fa2 - fld.d $fa2, $s2, -8 - fldx.d $fa3, $s2, $a0 + fld.d $fa3, $s2, -8 fldx.d $fa4, $s2, $a1 - fld.d $fa5, $a2, %pc_lo12(.LCPI7_2) - fadd.d $fa0, $fa0, $fa2 - fadd.d $fa0, $fa0, $fa3 - fadd.d $fa0, $fa0, $fa4 - fmul.d $fa0, $fa0, $fa5 + fldx.d $fa5, $s2, $a2 + fadd.d $fa1, $fa1, $fa2 + fadd.d $fa1, $fa1, $fa3 + fadd.d $fa1, $fa1, $fa4 + fadd.d $fa1, $fa1, $fa5 + fmul.d $fa1, $fa1, $fa0 add.d $s2, $t7, $t8 addi.d $t8, $t8, 8 - fstx.d $fa0, $s2, $a0 + fstx.d $fa1, $s2, $a1 bnez $t8, .LBB7_46 b .LBB7_44 .p2align 4, , 16 @@ -717,56 +707,60 @@ main: # @main # Parent Loop BB7_45 Depth=2 # => This Inner Loop Header: Depth=3 add.d $s2, $t6, $t8 - vld $vr0, $s2, -16 + vld $vr1, $s2, -16 vld $vr2, $s2, -24 vld $vr3, $s2, -8 - vldx $vr4, $s2, $a0 - vldx $vr5, $s2, $a1 - vfadd.d $vr0, $vr0, $vr2 - vfadd.d $vr0, $vr0, $vr3 - vfadd.d $vr0, $vr0, $vr4 - vfadd.d $vr0, $vr0, $vr5 - vfmul.d $vr0, $vr0, $vr1 + vldx $vr4, $s2, $a1 + vldx $vr5, $s2, $a2 + vfadd.d $vr1, $vr1, $vr2 + vfadd.d $vr1, $vr1, $vr3 + vfadd.d $vr1, $vr1, $vr4 + vfadd.d $vr1, $vr1, $vr5 + vreplgr2vr.d $vr2, $a0 + vfmul.d $vr1, $vr1, $vr2 add.d $s2, $t7, $t8 addi.d $t8, $t8, 16 - vstx $vr0, $s2, $a0 + vstx $vr1, $s2, $a1 bnez $t8, .LBB7_47 b .LBB7_44 .LBB7_48: # %.preheader.i55.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_3) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_3) move $a0, $zero move $a2, $zero - ori $a1, $s4, 1888 - ori $a4, $zero, 1300 + ori $a4, $s4, 1888 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa0, $a1 + ori $a5, $zero, 1300 .p2align 4, , 16 .LBB7_49: # %.preheader.i55 # =>This Loop Header: Depth=1 # Child Loop BB7_50 Depth 2 move $a3, $zero - add.d $a5, $s0, $a0 - add.d $a6, $fp, $a0 - move $a7, $a1 + add.d $a6, $s0, $a0 + add.d $a7, $fp, $a0 + move $t0, $a4 .p2align 4, , 16 .LBB7_50: # Parent Loop BB7_49 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t0, $a6, $a7 - fldx.d $fa1, $t0, $s3 - add.d $t0, $a5, $a7 - fldx.d $fa2, $t0, $s3 + add.d $t1, $a7, $t0 + fldx.d $fa1, $t1, $s3 + add.d $t1, $a6, $t0 + fldx.d $fa2, $t1, $s3 fsub.d $fa3, $fa1, $fa2 fabs.d $fa3, $fa3 fcmp.cule.d $fcc0, $fa3, $fa0 bceqz $fcc0, .LBB7_58 # %bb.51: # %.critedge.i # in Loop: Header=BB7_50 Depth=2 - addi.d $a7, $a7, 8 + addi.d $t0, $t0, 8 addi.w $a3, $a3, 1 - bnez $a7, .LBB7_50 + bnez $t0, .LBB7_50 # %bb.52: # in Loop: Header=BB7_49 Depth=1 addi.d $a2, $a2, 1 add.d $a0, $a0, $s3 - bne $a2, $a4, .LBB7_49 + bne $a2, $a5, .LBB7_49 # %bb.53: # %check_FP.exit lu12i.w $s5, 5 ori $a0, $s5, 321 @@ -856,10 +850,6 @@ main: # @main pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 - lu12i.w $a1, -487882 - ori $a1, $a1, 2289 - lu32i.d $a1, 325813 - lu52i.d $a1, $a1, 1006 st.d $a1, $sp, 0 movfr2gr.d $a4, $fa1 movfr2gr.d $a7, $fa2 diff --git a/results/SingleSource/Benchmarks/Polybench/stencils/seidel-2d/CMakeFiles/seidel-2d.dir/seidel-2d.s b/results/SingleSource/Benchmarks/Polybench/stencils/seidel-2d/CMakeFiles/seidel-2d.dir/seidel-2d.s index 0b8b8117..c3c8a14a 100644 --- a/results/SingleSource/Benchmarks/Polybench/stencils/seidel-2d/CMakeFiles/seidel-2d.dir/seidel-2d.s +++ b/results/SingleSource/Benchmarks/Polybench/stencils/seidel-2d/CMakeFiles/seidel-2d.dir/seidel-2d.s @@ -111,10 +111,6 @@ polybench_alloc_data: # @polybench_alloc_data .LCPI7_0: .dword 0 # 0x0 .dword 1 # 0x1 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI7_1: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 .text .globl main .p2align 5 @@ -406,39 +402,42 @@ main: # @main addi.w $a5, $a5, 1 bne $a5, $t1, .LBB7_19 # %bb.24: # %.preheader.i50.preheader - pcalau12i $a0, %pc_hi20(.LCPI7_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI7_1) move $a0, $zero move $a2, $zero - ori $a1, $zero, 2000 + lu12i.w $a1, -487882 + ori $a1, $a1, 2289 + lu32i.d $a1, 325813 + lu52i.d $a1, $a1, 1006 + movgr2fr.d $fa0, $a1 + ori $a5, $zero, 2000 .p2align 4, , 16 .LBB7_25: # %.preheader.i50 # =>This Loop Header: Depth=1 # Child Loop BB7_26 Depth 2 move $a3, $zero - add.d $a5, $s0, $a0 - add.d $a6, $fp, $a0 - move $a7, $a4 + add.d $a6, $s0, $a0 + add.d $a7, $fp, $a0 + move $t0, $a4 .p2align 4, , 16 .LBB7_26: # Parent Loop BB7_25 Depth=1 # => This Inner Loop Header: Depth=2 - add.d $t0, $a6, $a7 - fldx.d $fa1, $t0, $s2 - add.d $t0, $a5, $a7 - fldx.d $fa2, $t0, $s2 + add.d $t1, $a7, $t0 + fldx.d $fa1, $t1, $s2 + add.d $t1, $a6, $t0 + fldx.d $fa2, $t1, $s2 fsub.d $fa3, $fa1, $fa2 fabs.d $fa3, $fa3 fcmp.cule.d $fcc0, $fa3, $fa0 bceqz $fcc0, .LBB7_34 # %bb.27: # %.critedge.i # in Loop: Header=BB7_26 Depth=2 - addi.d $a7, $a7, 8 + addi.d $t0, $t0, 8 addi.w $a3, $a3, 1 - bnez $a7, .LBB7_26 + bnez $t0, .LBB7_26 # %bb.28: # in Loop: Header=BB7_25 Depth=1 addi.d $a2, $a2, 1 add.d $a0, $a0, $s2 - bne $a2, $a1, .LBB7_25 + bne $a2, $a5, .LBB7_25 # %bb.29: # %check_FP.exit ori $a0, $s4, 3329 pcaddu18i $ra, %call36(malloc) @@ -523,10 +522,6 @@ main: # @main pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 - lu12i.w $a1, -487882 - ori $a1, $a1, 2289 - lu32i.d $a1, 325813 - lu52i.d $a1, $a1, 1006 st.d $a1, $sp, 0 movfr2gr.d $a4, $fa1 movfr2gr.d $a7, $fa2 diff --git a/results/SingleSource/Benchmarks/Shootout-C++/CMakeFiles/Shootout-C++-heapsort.dir/heapsort.s b/results/SingleSource/Benchmarks/Shootout-C++/CMakeFiles/Shootout-C++-heapsort.dir/heapsort.s index 95bc92a0..c1b26082 100644 --- a/results/SingleSource/Benchmarks/Shootout-C++/CMakeFiles/Shootout-C++-heapsort.dir/heapsort.s +++ b/results/SingleSource/Benchmarks/Shootout-C++/CMakeFiles/Shootout-C++-heapsort.dir/heapsort.s @@ -3,12 +3,8 @@ .globl _ZSt21ios_base_library_initv # End of file scope inline assembly - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z10gen_randomd -.LCPI0_0: - .dword 0x4101160000000000 # double 139968 .text - .globl _Z10gen_randomd + .globl _Z10gen_randomd # -- Begin function _Z10gen_randomd .p2align 5 .type _Z10gen_randomd,@function _Z10gen_randomd: # @_Z10gen_randomd @@ -30,11 +26,13 @@ _Z10gen_randomd: # @_Z10gen_randomd ori $a3, $a3, 704 mul.d $a2, $a2, $a3 sub.d $a1, $a1, $a2 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI0_0) - movgr2fr.d $fa2, $a1 - ffint.d.l $fa2, $fa2 - fmul.d $fa0, $fa0, $fa2 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + fmul.d $fa0, $fa0, $fa1 + ori $a2, $zero, 0 + lu32i.d $a2, 71168 + lu52i.d $a2, $a2, 1040 + movgr2fr.d $fa1, $a2 fdiv.d $fa0, $fa0, $fa1 st.d $a1, $a0, %pc_lo12(_ZZ10gen_randomdE4last) ret @@ -121,12 +119,7 @@ _Z8heapsortiPd: # @_Z8heapsortiPd .Lfunc_end1: .size _Z8heapsortiPd, .Lfunc_end1-_Z8heapsortiPd # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI2_0: - .dword 0x4101160000000000 # double 139968 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -168,11 +161,13 @@ main: # @main lu12i.w $a7, -10549 ori $a7, $a7, 3729 lu32i.d $a7, -86783 - pcalau12i $t0, %pc_hi20(.LCPI2_0) - fld.d $fa0, $t0, %pc_lo12(.LCPI2_0) lu52i.d $a7, $a7, 958 lu12i.w $t0, 34 ori $t0, $t0, 704 + ori $t1, $zero, 0 + lu32i.d $t1, 71168 + lu52i.d $t1, $t1, 1040 + movgr2fr.d $fa0, $t1 .p2align 4, , 16 .LBB2_5: # %.lr.ph # =>This Inner Loop Header: Depth=1 diff --git a/results/SingleSource/Benchmarks/Shootout-C++/CMakeFiles/Shootout-C++-moments.dir/moments.s b/results/SingleSource/Benchmarks/Shootout-C++/CMakeFiles/Shootout-C++-moments.dir/moments.s index 4b7918ab..67a51992 100644 --- a/results/SingleSource/Benchmarks/Shootout-C++/CMakeFiles/Shootout-C++-moments.dir/moments.s +++ b/results/SingleSource/Benchmarks/Shootout-C++/CMakeFiles/Shootout-C++-moments.dir/moments.s @@ -274,12 +274,8 @@ GCC_except_table0: .Lcst_end0: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN7momentsIdEC2IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEET_S9_ -.LCPI1_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 .section .text._ZN7momentsIdEC2IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEET_S9_,"axG",@progbits,_ZN7momentsIdEC2IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEET_S9_,comdat - .weak _ZN7momentsIdEC2IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEET_S9_ + .weak _ZN7momentsIdEC2IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEET_S9_ # -- Begin function _ZN7momentsIdEC2IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEET_S9_ .p2align 5 .type _ZN7momentsIdEC2IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEET_S9_,@function _ZN7momentsIdEC2IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEET_S9_: # @_ZN7momentsIdEC2IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEET_S9_ @@ -312,7 +308,7 @@ _ZN7momentsIdEC2IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEET_S9_: # vst $vr0, $a0, 32 vst $vr0, $a0, 16 vst $vr0, $a0, 0 - pcalau12i $a1, %pc_hi20(.LCPI1_0) + lu12i.w $a1, 256 lu12i.w $a0, 275200 bne $a2, $s0, .LBB1_2 # %bb.1: # %_ZSt10accumulateIN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEdET0_T_S8_S7_.exit.thread @@ -340,11 +336,12 @@ _ZN7momentsIdEC2IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEET_S9_: # sub.d $s2, $a2, $s0 srai.d $s3, $s2, 3 srli.d $a3, $s3, 32 - fld.d $fa1, $a1, %pc_lo12(.LCPI1_0) lu52i.d $a4, $zero, 1107 or $a3, $a3, $a4 + movgr2fr.d $fa1, $a3 + lu52i.d $a3, $a1, 1107 movgr2fr.d $fa2, $a3 - fsub.d $fa1, $fa2, $fa1 + fsub.d $fa1, $fa1, $fa2 move $a3, $s3 bstrins.d $a3, $a0, 63, 32 movgr2fr.d $fa2, $a3 @@ -379,11 +376,12 @@ _ZN7momentsIdEC2IN9__gnu_cxx17__normal_iteratorIPdSt6vectorIdSaIdEEEEEET_S9_: # fst.d $fa1, $fp, 16 addi.d $a3, $s3, -1 srli.d $a4, $a3, 32 - fld.d $fa1, $a1, %pc_lo12(.LCPI1_0) - lu52i.d $a1, $zero, 1107 - or $a1, $a4, $a1 + lu52i.d $a5, $zero, 1107 + or $a4, $a4, $a5 + movgr2fr.d $fa1, $a4 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa2, $a1 - fsub.d $fa1, $fa2, $fa1 + fsub.d $fa1, $fa1, $fa2 bstrins.d $a3, $a0, 63, 32 movgr2fr.d $fa2, $a3 fadd.d $fa1, $fa2, $fa1 diff --git a/results/SingleSource/Benchmarks/Shootout-C++/CMakeFiles/Shootout-C++-random.dir/random.s b/results/SingleSource/Benchmarks/Shootout-C++/CMakeFiles/Shootout-C++-random.dir/random.s index 36eb7da8..c41c9f56 100644 --- a/results/SingleSource/Benchmarks/Shootout-C++/CMakeFiles/Shootout-C++-random.dir/random.s +++ b/results/SingleSource/Benchmarks/Shootout-C++/CMakeFiles/Shootout-C++-random.dir/random.s @@ -3,14 +3,8 @@ .globl _ZSt21ios_base_library_initv # End of file scope inline assembly - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x4059000000000000 # double 100 -.LCPI0_1: - .dword 0x4101160000000000 # double 139968 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -42,10 +36,10 @@ main: # @main ori $a0, $a0, 1024 .LBB0_4: # %.lr.ph.preheader pcalau12i $a1, %pc_hi20(_ZZ10gen_randomdE4last) - ld.d $a2, $a1, %pc_lo12(_ZZ10gen_randomdE4last) - ori $a3, $zero, 3877 - lu12i.w $a4, 7 - ori $a4, $a4, 901 + ld.d $a4, $a1, %pc_lo12(_ZZ10gen_randomdE4last) + ori $a2, $zero, 3877 + lu12i.w $a3, 7 + ori $a3, $a3, 901 lu12i.w $a5, -10549 ori $a5, $a5, 3729 lu32i.d $a5, -86783 @@ -56,24 +50,28 @@ main: # @main .LBB0_5: # %.lr.ph # =>This Inner Loop Header: Depth=1 addi.w $a0, $a0, -1 - mul.d $a2, $a2, $a3 - add.d $a2, $a2, $a4 - mulh.d $a7, $a2, $a5 + mul.d $a4, $a4, $a2 + add.d $a4, $a4, $a3 + mulh.d $a7, $a4, $a5 srli.d $t0, $a7, 63 srai.d $a7, $a7, 15 add.d $a7, $a7, $t0 mul.d $a7, $a7, $a6 - sub.d $a2, $a2, $a7 + sub.d $a4, $a4, $a7 bnez $a0, .LBB0_5 # %bb.6: # %._crit_edge - st.d $a2, $a1, %pc_lo12(_ZZ10gen_randomdE4last) - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_1) - movgr2fr.d $fa2, $a2 - ffint.d.l $fa2, $fa2 - fmul.d $fa0, $fa2, $fa0 + st.d $a4, $a1, %pc_lo12(_ZZ10gen_randomdE4last) + movgr2fr.d $fa0, $a4 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -458752 + lu52i.d $a1, $a1, 1029 + movgr2fr.d $fa1, $a1 + fmul.d $fa0, $fa0, $fa1 + lu32i.d $a0, 71168 + lu52i.d $a0, $a0, 1040 + movgr2fr.d $fa1, $a0 fdiv.d $fa0, $fa0, $fa1 .LBB0_7: pcalau12i $a0, %got_pc_hi20(_ZSt4cout) diff --git a/results/SingleSource/Benchmarks/Shootout/CMakeFiles/Shootout-heapsort.dir/heapsort.s b/results/SingleSource/Benchmarks/Shootout/CMakeFiles/Shootout-heapsort.dir/heapsort.s index 3c723dab..fc9aa90d 100644 --- a/results/SingleSource/Benchmarks/Shootout/CMakeFiles/Shootout-heapsort.dir/heapsort.s +++ b/results/SingleSource/Benchmarks/Shootout/CMakeFiles/Shootout-heapsort.dir/heapsort.s @@ -1,10 +1,6 @@ .file "heapsort.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function gen_random -.LCPI0_0: - .dword 0x4101160000000000 # double 139968 .text - .globl gen_random + .globl gen_random # -- Begin function gen_random .p2align 5 .type gen_random,@function gen_random: # @gen_random @@ -26,11 +22,13 @@ gen_random: # @gen_random ori $a3, $a3, 704 mul.d $a2, $a2, $a3 sub.d $a1, $a1, $a2 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a2, %pc_lo12(.LCPI0_0) - movgr2fr.d $fa2, $a1 - ffint.d.l $fa2, $fa2 - fmul.d $fa0, $fa0, $fa2 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + fmul.d $fa0, $fa0, $fa1 + ori $a2, $zero, 0 + lu32i.d $a2, 71168 + lu52i.d $a2, $a2, 1040 + movgr2fr.d $fa1, $a2 fdiv.d $fa0, $fa0, $fa1 st.d $a1, $a0, %pc_lo12(gen_random.last) ret @@ -117,12 +115,7 @@ benchmark_heapsort: # @benchmark_heapsort .Lfunc_end1: .size benchmark_heapsort, .Lfunc_end1-benchmark_heapsort # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI2_0: - .dword 0x4101160000000000 # double 139968 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -164,11 +157,13 @@ main: # @main lu12i.w $a7, -10549 ori $a7, $a7, 3729 lu32i.d $a7, -86783 - pcalau12i $t0, %pc_hi20(.LCPI2_0) - fld.d $fa0, $t0, %pc_lo12(.LCPI2_0) lu52i.d $a7, $a7, 958 lu12i.w $t0, 34 ori $t0, $t0, 704 + ori $t1, $zero, 0 + lu32i.d $t1, 71168 + lu52i.d $t1, $t1, 1040 + movgr2fr.d $fa0, $t1 .p2align 4, , 16 .LBB2_5: # %.lr.ph # =>This Inner Loop Header: Depth=1 diff --git a/results/SingleSource/Benchmarks/Shootout/CMakeFiles/Shootout-random.dir/random.s b/results/SingleSource/Benchmarks/Shootout/CMakeFiles/Shootout-random.dir/random.s index 4d4a9a6d..b9e6defc 100644 --- a/results/SingleSource/Benchmarks/Shootout/CMakeFiles/Shootout-random.dir/random.s +++ b/results/SingleSource/Benchmarks/Shootout/CMakeFiles/Shootout-random.dir/random.s @@ -1,10 +1,6 @@ .file "random.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x4101160000000000 # double 139968 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -68,12 +64,14 @@ main: # @main sub.d $a0, $a0, $a1 st.d $a0, $fp, %pc_lo12(gen_random.last) ori $a1, $zero, 100 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI0_0) mul.d $a0, $a0, $a1 + movgr2fr.d $fa0, $a0 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, 71168 + lu52i.d $a0, $a0, 1040 movgr2fr.d $fa1, $a0 - ffint.d.l $fa1, $fa1 - fdiv.d $fa0, $fa1, $fa0 + fdiv.d $fa0, $fa0, $fa1 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str) addi.d $a0, $a0, %pc_lo12(.L.str) diff --git a/results/SingleSource/Benchmarks/SmallPT/CMakeFiles/smallpt.dir/smallpt.s b/results/SingleSource/Benchmarks/SmallPT/CMakeFiles/smallpt.dir/smallpt.s index 0b337788..aded61aa 100644 --- a/results/SingleSource/Benchmarks/SmallPT/CMakeFiles/smallpt.dir/smallpt.s +++ b/results/SingleSource/Benchmarks/SmallPT/CMakeFiles/smallpt.dir/smallpt.s @@ -1,22 +1,6 @@ .file "smallpt.cpp" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z8radianceRK3RayiPt -.LCPI0_0: - .dword 0x4415af1d78b58c40 # double 1.0E+20 -.LCPI0_1: - .dword 0x3f1a36e2eb1c432d # double 1.0E-4 -.LCPI0_2: - .dword 0x401921fb54442d18 # double 6.2831853071795862 -.LCPI0_3: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI0_4: - .dword 0x3fe5555555555555 # double 0.66666666666666663 -.LCPI0_5: - .dword 0x3feeb851eb851eb8 # double 0.95999999999999996 -.LCPI0_6: - .dword 0x3fa47ae147ae147b # double 0.040000000000000001 .text - .globl _Z8radianceRK3RayiPt + .globl _Z8radianceRK3RayiPt # -- Begin function _Z8radianceRK3RayiPt .p2align 5 .type _Z8radianceRK3RayiPt,@function _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt @@ -63,36 +47,42 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt fmadd.d $fa0, $fa2, $fa2, $fa0 fmsub.d $fa0, $fs7, $fs7, $fa0 fmadd.d $fa0, $fa1, $fa1, $fa0 - pcalau12i $s1, %pc_hi20(.LCPI0_0) + lu12i.w $a0, 494424 + ori $a0, $a0, 3136 + lu32i.d $a0, 372509 + lu52i.d $s1, $a0, 1089 movgr2fr.d $fs3, $zero fcmp.clt.d $fcc0, $fa0, $fs3 - pcalau12i $s2, %pc_hi20(.LCPI0_1) + lu12i.w $s2, -85564 bceqz $fcc0, .LBB0_2 # %bb.1: - fld.d $fs7, $s1, %pc_lo12(.LCPI0_0) move $s3, $zero + movgr2fr.d $fs7, $s1 b .LBB0_4 .LBB0_2: # %_ZNK6Sphere9intersectERK3Ray.exit.i fsqrt.d $fa1, $fa0 fcmp.cor.d $fcc0, $fa1, $fa1 bceqz $fcc0, .LBB0_54 .LBB0_3: # %_ZNK6Sphere9intersectERK3Ray.exit.i.split - fld.d $fa0, $s2, %pc_lo12(.LCPI0_1) - fsub.d $fa2, $fs7, $fa1 + fsub.d $fa0, $fs7, $fa1 fadd.d $fa1, $fs7, $fa1 - fcmp.clt.d $fcc0, $fa0, $fa1 - fcmp.clt.d $fcc1, $fa0, $fa2 - fld.d $fa0, $s1, %pc_lo12(.LCPI0_0) + ori $a0, $s2, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fcmp.clt.d $fcc0, $fa2, $fa1 fsel $fa1, $fs3, $fa1, $fcc0 - fsel $fa1, $fa1, $fa2, $fcc1 - fcmp.cune.d $fcc0, $fa1, $fs3 - fcmp.clt.d $fcc1, $fa1, $fa0 + fcmp.clt.d $fcc0, $fa2, $fa0 + fsel $fa0, $fa1, $fa0, $fcc0 + fcmp.cune.d $fcc0, $fa0, $fs3 + movgr2fr.d $fa1, $s1 + fcmp.clt.d $fcc1, $fa0, $fa1 movcf2gr $a0, $fcc0 movcf2gr $a4, $fcc1 and $a0, $a0, $a4 slli.d $s3, $a0, 3 movgr2cf $fcc0, $a0 - fsel $fs7, $fa0, $fa1, $fcc0 + fsel $fs7, $fa1, $fa0, $fcc0 .LBB0_4: # %_ZNK6Sphere9intersectERK3Ray.exit.thread.i fld.d $fa0, $s0, 624 fld.d $fa1, $s0, 632 @@ -108,21 +98,24 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt fmadd.d $fs0, $fa2, $fs1, $fa3 fmadd.d $fa0, $fa2, $fa2, $fa0 fmsub.d $fa0, $fs0, $fs0, $fa0 - fmadd.d $fa1, $fa1, $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs3 + fmadd.d $fa0, $fa1, $fa1, $fa0 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB0_7 # %bb.5: # %_ZNK6Sphere9intersectERK3Ray.exit.i.1 - fsqrt.d $fa0, $fa1 - fcmp.cor.d $fcc0, $fa0, $fa0 + fsqrt.d $fa1, $fa0 + fcmp.cor.d $fcc0, $fa1, $fa1 bceqz $fcc0, .LBB0_55 .LBB0_6: # %_ZNK6Sphere9intersectERK3Ray.exit.i.1.split - fld.d $fa1, $s2, %pc_lo12(.LCPI0_1) - fsub.d $fa2, $fs0, $fa0 - fadd.d $fa0, $fs0, $fa0 - fcmp.clt.d $fcc0, $fa1, $fa0 - fsel $fa0, $fs3, $fa0, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa0, $fa0, $fa2, $fcc0 + fsub.d $fa0, $fs0, $fa1 + fadd.d $fa1, $fs0, $fa1 + ori $a0, $s2, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fs3, $fa1, $fcc0 + fcmp.clt.d $fcc0, $fa2, $fa0 + fsel $fa0, $fa1, $fa0, $fcc0 fcmp.cune.d $fcc0, $fa0, $fs3 fcmp.clt.d $fcc1, $fa0, $fs7 movcf2gr $a0, $fcc0 @@ -149,21 +142,24 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt fmadd.d $fs0, $fa2, $fs1, $fa3 fmadd.d $fa0, $fa2, $fa2, $fa0 fmsub.d $fa0, $fs0, $fs0, $fa0 - fmadd.d $fa1, $fa1, $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs3 + fmadd.d $fa0, $fa1, $fa1, $fa0 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB0_10 # %bb.8: # %_ZNK6Sphere9intersectERK3Ray.exit.i.2 - fsqrt.d $fa0, $fa1 - fcmp.cor.d $fcc0, $fa0, $fa0 + fsqrt.d $fa1, $fa0 + fcmp.cor.d $fcc0, $fa1, $fa1 bceqz $fcc0, .LBB0_56 .LBB0_9: # %_ZNK6Sphere9intersectERK3Ray.exit.i.2.split - fld.d $fa1, $s2, %pc_lo12(.LCPI0_1) - fsub.d $fa2, $fs0, $fa0 - fadd.d $fa0, $fs0, $fa0 - fcmp.clt.d $fcc0, $fa1, $fa0 - fsel $fa0, $fs3, $fa0, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa0, $fa0, $fa2, $fcc0 + fsub.d $fa0, $fs0, $fa1 + fadd.d $fa1, $fs0, $fa1 + ori $a0, $s2, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fs3, $fa1, $fcc0 + fcmp.clt.d $fcc0, $fa2, $fa0 + fsel $fa0, $fa1, $fa0, $fcc0 fcmp.cune.d $fcc0, $fa0, $fs3 fcmp.clt.d $fcc1, $fa0, $fs7 movcf2gr $a0, $fcc0 @@ -190,21 +186,24 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt fmadd.d $fs0, $fa2, $fs1, $fa3 fmadd.d $fa0, $fa2, $fa2, $fa0 fmsub.d $fa0, $fs0, $fs0, $fa0 - fmadd.d $fa1, $fa1, $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs3 + fmadd.d $fa0, $fa1, $fa1, $fa0 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB0_13 # %bb.11: # %_ZNK6Sphere9intersectERK3Ray.exit.i.3 - fsqrt.d $fa0, $fa1 - fcmp.cor.d $fcc0, $fa0, $fa0 + fsqrt.d $fa1, $fa0 + fcmp.cor.d $fcc0, $fa1, $fa1 bceqz $fcc0, .LBB0_57 .LBB0_12: # %_ZNK6Sphere9intersectERK3Ray.exit.i.3.split - fld.d $fa1, $s2, %pc_lo12(.LCPI0_1) - fsub.d $fa2, $fs0, $fa0 - fadd.d $fa0, $fs0, $fa0 - fcmp.clt.d $fcc0, $fa1, $fa0 - fsel $fa0, $fs3, $fa0, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa0, $fa0, $fa2, $fcc0 + fsub.d $fa0, $fs0, $fa1 + fadd.d $fa1, $fs0, $fa1 + ori $a0, $s2, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fs3, $fa1, $fcc0 + fcmp.clt.d $fcc0, $fa2, $fa0 + fsel $fa0, $fa1, $fa0, $fcc0 fcmp.cune.d $fcc0, $fa0, $fs3 fcmp.clt.d $fcc1, $fa0, $fs7 movcf2gr $a0, $fcc0 @@ -231,21 +230,24 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt fmadd.d $fs0, $fa2, $fs1, $fa3 fmadd.d $fa0, $fa2, $fa2, $fa0 fmsub.d $fa0, $fs0, $fs0, $fa0 - fmadd.d $fa1, $fa1, $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs3 + fmadd.d $fa0, $fa1, $fa1, $fa0 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB0_16 # %bb.14: # %_ZNK6Sphere9intersectERK3Ray.exit.i.4 - fsqrt.d $fa0, $fa1 - fcmp.cor.d $fcc0, $fa0, $fa0 + fsqrt.d $fa1, $fa0 + fcmp.cor.d $fcc0, $fa1, $fa1 bceqz $fcc0, .LBB0_58 .LBB0_15: # %_ZNK6Sphere9intersectERK3Ray.exit.i.4.split - fld.d $fa1, $s2, %pc_lo12(.LCPI0_1) - fsub.d $fa2, $fs0, $fa0 - fadd.d $fa0, $fs0, $fa0 - fcmp.clt.d $fcc0, $fa1, $fa0 - fsel $fa0, $fs3, $fa0, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa0, $fa0, $fa2, $fcc0 + fsub.d $fa0, $fs0, $fa1 + fadd.d $fa1, $fs0, $fa1 + ori $a0, $s2, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fs3, $fa1, $fcc0 + fcmp.clt.d $fcc0, $fa2, $fa0 + fsel $fa0, $fa1, $fa0, $fcc0 fcmp.cune.d $fcc0, $fa0, $fs3 fcmp.clt.d $fcc1, $fa0, $fs7 movcf2gr $a0, $fcc0 @@ -272,21 +274,24 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt fmadd.d $fs0, $fa2, $fs1, $fa3 fmadd.d $fa0, $fa2, $fa2, $fa0 fmsub.d $fa0, $fs0, $fs0, $fa0 - fmadd.d $fa1, $fa1, $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs3 + fmadd.d $fa0, $fa1, $fa1, $fa0 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB0_19 # %bb.17: # %_ZNK6Sphere9intersectERK3Ray.exit.i.5 - fsqrt.d $fa0, $fa1 - fcmp.cor.d $fcc0, $fa0, $fa0 + fsqrt.d $fa1, $fa0 + fcmp.cor.d $fcc0, $fa1, $fa1 bceqz $fcc0, .LBB0_59 .LBB0_18: # %_ZNK6Sphere9intersectERK3Ray.exit.i.5.split - fld.d $fa1, $s2, %pc_lo12(.LCPI0_1) - fsub.d $fa2, $fs0, $fa0 - fadd.d $fa0, $fs0, $fa0 - fcmp.clt.d $fcc0, $fa1, $fa0 - fsel $fa0, $fs3, $fa0, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa0, $fa0, $fa2, $fcc0 + fsub.d $fa0, $fs0, $fa1 + fadd.d $fa1, $fs0, $fa1 + ori $a0, $s2, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fs3, $fa1, $fcc0 + fcmp.clt.d $fcc0, $fa2, $fa0 + fsel $fa0, $fa1, $fa0, $fcc0 fcmp.cune.d $fcc0, $fa0, $fs3 fcmp.clt.d $fcc1, $fa0, $fs7 movcf2gr $a0, $fcc0 @@ -313,21 +318,24 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt fmadd.d $fs0, $fa2, $fs1, $fa3 fmadd.d $fa0, $fa2, $fa2, $fa0 fmsub.d $fa0, $fs0, $fs0, $fa0 - fmadd.d $fa1, $fa1, $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs3 + fmadd.d $fa0, $fa1, $fa1, $fa0 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB0_22 # %bb.20: # %_ZNK6Sphere9intersectERK3Ray.exit.i.6 - fsqrt.d $fa0, $fa1 - fcmp.cor.d $fcc0, $fa0, $fa0 + fsqrt.d $fa1, $fa0 + fcmp.cor.d $fcc0, $fa1, $fa1 bceqz $fcc0, .LBB0_60 .LBB0_21: # %_ZNK6Sphere9intersectERK3Ray.exit.i.6.split - fld.d $fa1, $s2, %pc_lo12(.LCPI0_1) - fsub.d $fa2, $fs0, $fa0 - fadd.d $fa0, $fs0, $fa0 - fcmp.clt.d $fcc0, $fa1, $fa0 - fsel $fa0, $fs3, $fa0, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa0, $fa0, $fa2, $fcc0 + fsub.d $fa0, $fs0, $fa1 + fadd.d $fa1, $fs0, $fa1 + ori $a0, $s2, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fs3, $fa1, $fcc0 + fcmp.clt.d $fcc0, $fa2, $fa0 + fsel $fa0, $fa1, $fa0, $fcc0 fcmp.cune.d $fcc0, $fa0, $fs3 fcmp.clt.d $fcc1, $fa0, $fs7 movcf2gr $a0, $fcc0 @@ -354,21 +362,24 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt fmadd.d $fs0, $fa2, $fs1, $fa3 fmadd.d $fa0, $fa2, $fa2, $fa0 fmsub.d $fa0, $fs0, $fs0, $fa0 - fmadd.d $fa1, $fa1, $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs3 + fmadd.d $fa0, $fa1, $fa1, $fa0 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB0_25 # %bb.23: # %_ZNK6Sphere9intersectERK3Ray.exit.i.7 - fsqrt.d $fa0, $fa1 - fcmp.cor.d $fcc0, $fa0, $fa0 + fsqrt.d $fa1, $fa0 + fcmp.cor.d $fcc0, $fa1, $fa1 bceqz $fcc0, .LBB0_61 .LBB0_24: # %_ZNK6Sphere9intersectERK3Ray.exit.i.7.split - fld.d $fa1, $s2, %pc_lo12(.LCPI0_1) - fsub.d $fa2, $fs0, $fa0 - fadd.d $fa0, $fs0, $fa0 - fcmp.clt.d $fcc0, $fa1, $fa0 - fsel $fa0, $fs3, $fa0, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa0, $fa0, $fa2, $fcc0 + fsub.d $fa0, $fs0, $fa1 + fadd.d $fa1, $fs0, $fa1 + ori $a0, $s2, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fs3, $fa1, $fcc0 + fcmp.clt.d $fcc0, $fa2, $fa0 + fsel $fa0, $fa1, $fa0, $fcc0 fcmp.cune.d $fcc0, $fa0, $fs3 fcmp.clt.d $fcc1, $fa0, $fs7 movcf2gr $a0, $fcc0 @@ -395,21 +406,24 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt fmadd.d $fs0, $fa2, $fs1, $fa3 fmadd.d $fa0, $fa2, $fa2, $fa0 fmsub.d $fa0, $fs0, $fs0, $fa0 - fmadd.d $fa1, $fa1, $fa1, $fa0 - fcmp.clt.d $fcc0, $fa1, $fs3 + fmadd.d $fa0, $fa1, $fa1, $fa0 + fcmp.clt.d $fcc0, $fa0, $fs3 bcnez $fcc0, .LBB0_28 # %bb.26: # %_ZNK6Sphere9intersectERK3Ray.exit.i.8 - fsqrt.d $fa0, $fa1 - fcmp.cor.d $fcc0, $fa0, $fa0 + fsqrt.d $fa1, $fa0 + fcmp.cor.d $fcc0, $fa1, $fa1 bceqz $fcc0, .LBB0_62 .LBB0_27: # %_ZNK6Sphere9intersectERK3Ray.exit.i.8.split - fld.d $fa1, $s2, %pc_lo12(.LCPI0_1) - fsub.d $fa2, $fs0, $fa0 - fadd.d $fa0, $fs0, $fa0 - fcmp.clt.d $fcc0, $fa1, $fa0 - fsel $fa0, $fs3, $fa0, $fcc0 - fcmp.clt.d $fcc0, $fa1, $fa2 - fsel $fa0, $fa0, $fa2, $fcc0 + fsub.d $fa0, $fs0, $fa1 + fadd.d $fa1, $fs0, $fa1 + ori $a0, $s2, 813 + lu32i.d $a0, -379166 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fcmp.clt.d $fcc0, $fa2, $fa1 + fsel $fa1, $fs3, $fa1, $fcc0 + fcmp.clt.d $fcc0, $fa2, $fa0 + fsel $fa0, $fa1, $fa0, $fcc0 fcmp.cune.d $fcc0, $fa0, $fs3 fcmp.clt.d $fcc1, $fa0, $fs7 movcf2gr $a0, $fcc0 @@ -419,7 +433,7 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt movgr2cf $fcc0, $a0 fsel $fs7, $fs7, $fa0, $fcc0 .LBB0_28: # %_ZNK6Sphere9intersectERK3Ray.exit.thread.i.8 - fld.d $fa0, $s1, %pc_lo12(.LCPI0_0) + movgr2fr.d $fa0, $s1 fcmp.clt.d $fcc0, $fs7, $fa0 bceqz $fcc0, .LBB0_33 # %bb.29: @@ -527,8 +541,11 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt move $s2, $a3 pcaddu18i $ra, %call36(erand48) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_2) + lu12i.w $a0, 345154 + ori $a0, $a0, 3352 + lu32i.d $a0, -450053 + lu52i.d $a0, $a0, 1025 + movgr2fr.d $fa1, $a0 fmul.d $fs7, $fa0, $fa1 move $s1, $s2 move $a0, $s2 @@ -539,10 +556,13 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt fcmp.cor.d $fcc0, $fs2, $fs2 bceqz $fcc0, .LBB0_63 .LBB0_38: # %.split - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_3) - fabs.d $fa1, $fs6 - fcmp.clt.d $fcc0, $fa0, $fa1 + fabs.d $fa0, $fs6 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fa1, $a0 + fcmp.clt.d $fcc0, $fa1, $fa0 vldi $vr1, -912 fsel $fa0, $fa1, $fs3, $fcc0 fsel $fa1, $fs3, $fa1, $fcc0 @@ -688,11 +708,14 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt fst.d $fa4, $sp, 264 fmul.d $fa0, $fs7, $fs1 fmadd.d $fa0, $fs6, $fs0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI0_4) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_4) fmadd.d $fa0, $fs5, $fs4, $fa0 fcmp.clt.d $fcc1, $fs3, $fa0 vldi $vr0, -904 + lu12i.w $a0, 349525 + ori $a0, $a0, 1365 + lu32i.d $a0, 349525 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa4, $a0 fsel $fs2, $fa0, $fa4, $fcc1 fmul.d $fa0, $fs1, $fa1 fmadd.d $fa0, $fa3, $fs0, $fa0 @@ -776,17 +799,23 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt fmul.d $fa0, $fs7, $fs1 fmadd.d $fa0, $fs2, $fs6, $fa0 fmadd.d $fa0, $fs0, $fs5, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_5) fsel $fa0, $fa0, $fs4, $fcc1 vldi $vr3, -912 fsub.d $fa0, $fa3, $fa0 + lu12i.w $a0, -83887 + ori $a0, $a0, 3768 + lu32i.d $a0, -83887 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa1, $a0 fmul.d $fa1, $fa0, $fa1 - pcalau12i $a0, %pc_hi20(.LCPI0_6) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_6) fmul.d $fa1, $fa0, $fa1 fmul.d $fa1, $fa0, $fa1 fmul.d $fa1, $fa0, $fa1 + lu12i.w $a0, 293601 + ori $a0, $a0, 1147 + lu32i.d $a0, 293601 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fa2, $a0 fmadd.d $fa2, $fa1, $fa0, $fa2 ori $a0, $zero, 2 fsub.d $fa0, $fa3, $fa2 @@ -909,7 +938,6 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt fmov.d $fa1, $fa0 b .LBB0_3 .LBB0_55: # %call.sqrt349 - fmov.d $fa0, $fa1 move $s4, $a3 move $s6, $a2 move $s5, $a1 @@ -920,9 +948,9 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt move $a1, $s5 move $a2, $s6 move $a3, $s4 + fmov.d $fa1, $fa0 b .LBB0_6 .LBB0_56: # %call.sqrt350 - fmov.d $fa0, $fa1 move $s4, $a3 move $s6, $a2 move $s5, $a1 @@ -933,9 +961,9 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt move $a1, $s5 move $a2, $s6 move $a3, $s4 + fmov.d $fa1, $fa0 b .LBB0_9 .LBB0_57: # %call.sqrt351 - fmov.d $fa0, $fa1 move $s4, $a3 move $s6, $a2 move $s5, $a1 @@ -946,9 +974,9 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt move $a1, $s5 move $a2, $s6 move $a3, $s4 + fmov.d $fa1, $fa0 b .LBB0_12 .LBB0_58: # %call.sqrt352 - fmov.d $fa0, $fa1 move $s4, $a3 move $s6, $a2 move $s5, $a1 @@ -959,9 +987,9 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt move $a1, $s5 move $a2, $s6 move $a3, $s4 + fmov.d $fa1, $fa0 b .LBB0_15 .LBB0_59: # %call.sqrt353 - fmov.d $fa0, $fa1 move $s4, $a3 move $s6, $a2 move $s5, $a1 @@ -972,9 +1000,9 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt move $a1, $s5 move $a2, $s6 move $a3, $s4 + fmov.d $fa1, $fa0 b .LBB0_18 .LBB0_60: # %call.sqrt354 - fmov.d $fa0, $fa1 move $s4, $a3 move $s6, $a2 move $s5, $a1 @@ -985,9 +1013,9 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt move $a1, $s5 move $a2, $s6 move $a3, $s4 + fmov.d $fa1, $fa0 b .LBB0_21 .LBB0_61: # %call.sqrt355 - fmov.d $fa0, $fa1 move $s4, $a3 move $s6, $a2 move $s5, $a1 @@ -998,9 +1026,9 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt move $a1, $s5 move $a2, $s6 move $a3, $s4 + fmov.d $fa1, $fa0 b .LBB0_24 .LBB0_62: # %call.sqrt356 - fmov.d $fa0, $fa1 move $s4, $a3 move $s6, $a2 move $s5, $a1 @@ -1011,6 +1039,7 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt move $a1, $s5 move $a2, $s6 move $a3, $s4 + fmov.d $fa1, $fa0 b .LBB0_27 .LBB0_63: # %call.sqrt357 fld.d $fa0, $sp, 48 # 8-byte Folded Reload @@ -1041,34 +1070,18 @@ _Z8radianceRK3RayiPt: # @_Z8radianceRK3RayiPt .Lfunc_end0: .size _Z8radianceRK3RayiPt, .Lfunc_end0-_Z8radianceRK3RayiPt # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI1_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI1_1: - .dword 0x3f50000000000000 # double 9.765625E-4 -.LCPI1_2: - .dword 0x4088000000000000 # double 768 -.LCPI1_3: - .dword 0xbf9662d746dc5a9d # double -0.021861423206326881 -.LCPI1_7: - .dword 0xbfeff8929a5e7d34 # double -0.99909334325994914 -.LCPI1_8: - .dword 0x4061800000000000 # double 140 -.LCPI1_10: - .dword 0x407279999999999a # double 295.60000000000002 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI1_4: + .p2align 4, 0x0 # -- Begin function main +.LCPI1_0: .dword 0x3fe5e8ca11bfd44f # double 0.68466666666666665 .dword 0x0000000000000000 # double 0 -.LCPI1_5: +.LCPI1_1: .dword 0x0000000000000000 # double 0 .dword 0x3fe06ac72f44be81 # double 0.513034431763984 -.LCPI1_6: +.LCPI1_2: .dword 0x0000000000000000 # double 0 .dword 0xbfa5cc2d1960285f # double -0.042573365542992951 -.LCPI1_9: +.LCPI1_3: .dword 0x4049000000000000 # double 50 .dword 0x404a000000000000 # double 52 .section .text.unlikely.,"ax",@progbits @@ -1119,7 +1132,7 @@ main: # @main .cfi_offset 62, -144 .cfi_offset 63, -152 ori $a2, $zero, 2 - ori $fp, $zero, 1 + ori $s0, $zero, 1 bne $a0, $a2, .LBB1_2 # %bb.1: ld.d $a0, $a1, 8 @@ -1130,123 +1143,133 @@ main: # @main addi.w $a1, $a0, 0 bstrpick.d $a1, $a1, 62, 61 add.w $a0, $a0, $a1 - srai.d $fp, $a0, 2 + srai.d $s0, $a0, 2 .LBB1_2: - lu12i.w $s0, 4608 - move $a0, $s0 + lu12i.w $fp, 4608 + move $a0, $fp pcaddu18i $ra, %call36(_Znam) jirl $ra, $ra, 0 move $s1, $a0 move $a1, $zero - move $a2, $s0 + move $a2, $fp pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 pcalau12i $a0, %got_pc_hi20(stderr) ld.d $a0, $a0, %got_pc_lo12(stderr) ld.d $a0, $a0, 0 - slli.d $a2, $fp, 2 + slli.d $a2, $s0, 2 pcalau12i $a1, %pc_hi20(.L.str) addi.d $a1, $a1, %pc_lo12(.L.str) pcaddu18i $ra, %call36(fprintf) jirl $ra, $ra, 0 lu12i.w $a2, 4602 vrepli.b $vr3, 0 - blez $fp, .LBB1_41 + blez $s0, .LBB1_41 # %bb.3: # %.split148.us.us.preheader - move $a1, $zero move $a3, $zero - movgr2fr.w $fa0, $fp + move $a4, $zero + movgr2fr.w $fa0, $s0 ffint.d.w $fa0, $fa0 frecip.d $fa0, $fa0 vst $vr0, $sp, 272 # 16-byte Folded Spill vreplvei.d $vr0, $vr0, 0 vst $vr0, $sp, 256 # 16-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) - fst.d $fa0, $sp, 16 # 8-byte Folded Spill + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fa0, $a0 + fst.d $fa0, $sp, 8 # 8-byte Folded Spill ori $a0, $a2, 16 - st.d $a0, $sp, 40 # 8-byte Folded Spill + st.d $a0, $sp, 32 # 8-byte Folded Spill movgr2fr.d $fa0, $zero fst.d $fa0, $sp, 56 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_1) + lu52i.d $a0, $zero, 1013 + movgr2fr.d $fa0, $a0 fst.d $fa0, $sp, 248 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_2) + ori $a0, $zero, 0 + ori $a1, $zero, 0 + lu32i.d $a1, -524288 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa0, $a1 fst.d $fa0, $sp, 240 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_3) - fld.d $fs4, $a0, %pc_lo12(.LCPI1_3) - pcalau12i $a0, %pc_hi20(.LCPI1_4) - vld $vr0, $a0, %pc_lo12(.LCPI1_4) + lu12i.w $a1, 290245 + ori $a1, $a1, 2717 + lu32i.d $a1, 418519 + lu52i.d $a1, $a1, -1031 + movgr2fr.d $fs4, $a1 + pcalau12i $a1, %pc_hi20(.LCPI1_0) + vld $vr0, $a1, %pc_lo12(.LCPI1_0) vst $vr0, $sp, 224 # 16-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_5) - vld $vr0, $a0, %pc_lo12(.LCPI1_5) + pcalau12i $a1, %pc_hi20(.LCPI1_1) + vld $vr0, $a1, %pc_lo12(.LCPI1_1) vst $vr0, $sp, 208 # 16-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_6) - vld $vr0, $a0, %pc_lo12(.LCPI1_6) + pcalau12i $a1, %pc_hi20(.LCPI1_2) + vld $vr0, $a1, %pc_lo12(.LCPI1_2) vst $vr0, $sp, 192 # 16-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_7) - fld.d $fs5, $a0, %pc_lo12(.LCPI1_7) - ori $a0, $zero, 0 + lu12i.w $a1, -416281 + ori $a1, $a1, 3380 + lu32i.d $a1, -1902 + lu52i.d $a1, $a1, -1026 + movgr2fr.d $fs5, $a1 lu32i.d $a0, 98304 - lu52i.d $a0, $a0, 1030 - vreplgr2vr.d $vr0, $a0 + lu52i.d $s7, $a0, 1030 + vreplgr2vr.d $vr0, $s7 vst $vr0, $sp, 176 # 16-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_8) - fld.d $fs6, $a0, %pc_lo12(.LCPI1_8) - pcalau12i $a0, %pc_hi20(.LCPI1_9) - vld $vr0, $a0, %pc_lo12(.LCPI1_9) + pcalau12i $a0, %pc_hi20(.LCPI1_3) + vld $vr0, $a0, %pc_lo12(.LCPI1_3) vst $vr0, $sp, 160 # 16-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_10) - fld.d $fs7, $a0, %pc_lo12(.LCPI1_10) - lu52i.d $s7, $zero, 1023 - lu52i.d $s8, $zero, 1021 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, 162201 + lu52i.d $s8, $a0, 1031 + lu52i.d $s4, $zero, 1023 + lu52i.d $s5, $zero, 1021 vst $vr3, $sp, 64 # 16-byte Folded Spill + st.d $s1, $sp, 40 # 8-byte Folded Spill b .LBB1_5 .p2align 4, , 16 .LBB1_4: # %.split150.us.us # in Loop: Header=BB1_5 Depth=1 - ld.d $a1, $sp, 32 # 8-byte Folded Reload - addi.d $a1, $a1, 1 ld.d $a3, $sp, 24 # 8-byte Folded Reload addi.d $a3, $a3, 1 + ld.d $a4, $sp, 16 # 8-byte Folded Reload + addi.d $a4, $a4, 1 ori $a0, $zero, 768 - beq $a1, $a0, .LBB1_45 + beq $a3, $a0, .LBB1_45 .LBB1_5: # %.split148.us.us # =>This Loop Header: Depth=1 # Child Loop BB1_7 Depth 2 # Child Loop BB1_9 Depth 3 # Child Loop BB1_13 Depth 4 # Child Loop BB1_29 Depth 4 - move $s5, $zero + move $s2, $zero st.w $zero, $sp, 386 - st.d $a3, $sp, 24 # 8-byte Folded Spill - mul.d $a0, $a3, $a3 - mul.d $a0, $a0, $a1 + st.d $a4, $sp, 16 # 8-byte Folded Spill + mul.d $a0, $a4, $a4 + mul.d $a0, $a0, $a3 st.h $a0, $sp, 390 - slli.d $a0, $a1, 10 + slli.d $a0, $a3, 10 st.d $a0, $sp, 48 # 8-byte Folded Spill - srli.d $a0, $a1, 32 - lu52i.d $a3, $zero, 1107 - or $a0, $a0, $a3 + srli.d $a0, $a3, 32 + lu52i.d $a1, $zero, 1107 + or $a0, $a0, $a1 movgr2fr.d $fa0, $a0 - fld.d $fa1, $sp, 16 # 8-byte Folded Reload + fld.d $fa1, $sp, 8 # 8-byte Folded Reload fsub.d $fa0, $fa0, $fa1 - st.d $a1, $sp, 32 # 8-byte Folded Spill - move $a0, $a1 + st.d $a3, $sp, 24 # 8-byte Folded Spill + move $a0, $a3 lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa1, $a0 - fadd.d $fs0, $fa1, $fa0 + fadd.d $fs6, $fa1, $fa0 b .LBB1_7 .p2align 4, , 16 .LBB1_6: # %.split141.us.us.us # in Loop: Header=BB1_7 Depth=2 - addi.d $s5, $s5, 1 - move $s1, $s6 - move $a2, $s3 + addi.d $s2, $s2, 1 + ld.d $s1, $sp, 40 # 8-byte Folded Reload + lu12i.w $a2, 4602 ori $a0, $zero, 1024 - beq $s5, $a0, .LBB1_4 + beq $s2, $a0, .LBB1_4 .LBB1_7: # %.split139.us.us.us # Parent Loop BB1_5 Depth=1 # => This Loop Header: Depth=2 @@ -1254,19 +1277,17 @@ main: # @main # Child Loop BB1_13 Depth 4 # Child Loop BB1_29 Depth 4 ld.d $a0, $sp, 48 # 8-byte Folded Reload - sub.d $a0, $s5, $a0 - bstrpick.d $a1, $s5, 15, 0 + sub.d $a0, $s2, $a0 + bstrpick.d $a1, $s2, 15, 0 movgr2fr.w $fa0, $a1 - ffint.d.w $fs1, $fa0 + ffint.d.w $fs7, $fa0 slli.d $a1, $a0, 4 alsl.d $a0, $a0, $a1, 3 - move $s6, $s1 add.d $a0, $s1, $a0 - move $s3, $a2 - add.d $s2, $a0, $a2 - ld.d $a1, $sp, 40 # 8-byte Folded Reload - add.d $s0, $a0, $a1 - ori $s4, $zero, 1 + add.d $s3, $a0, $a2 + ld.d $a1, $sp, 32 # 8-byte Folded Reload + add.d $fp, $a0, $a1 + ori $s6, $zero, 1 vldi $vr0, -928 vst $vr0, $sp, 288 # 16-byte Folded Spill b .LBB1_9 @@ -1293,12 +1314,12 @@ main: # @main vand.v $vr0, $vr0, $vr1 vld $vr1, $sp, 96 # 16-byte Folded Reload vfadd.d $vr0, $vr0, $vr1 - vst $vr0, $s2, 0 - fst.d $fa2, $s0, 0 - andi $a0, $s4, 1 + vst $vr0, $s3, 0 + fst.d $fa2, $fp, 0 + andi $a0, $s6, 1 vldi $vr0, -904 vst $vr0, $sp, 288 # 16-byte Folded Spill - move $s4, $zero + move $s6, $zero vld $vr3, $sp, 64 # 16-byte Folded Reload beqz $a0, .LBB1_6 .LBB1_9: # %.preheader126.us.us.us @@ -1307,7 +1328,7 @@ main: # @main # => This Loop Header: Depth=3 # Child Loop BB1_13 Depth 4 # Child Loop BB1_29 Depth 4 - move $s1, $fp + move $s1, $s0 fld.d $fs3, $sp, 56 # 8-byte Folded Reload b .LBB1_13 .p2align 4, , 16 @@ -1321,9 +1342,9 @@ main: # @main fadd.d $fa0, $fa1, $fa0 .LBB1_12: # in Loop: Header=BB1_13 Depth=4 vldi $vr2, -928 - fadd.d $fa1, $fs2, $fa2 + fadd.d $fa1, $fs0, $fa2 fmul.d $fa1, $fa1, $fa2 - fadd.d $fa1, $fa1, $fs1 + fadd.d $fa1, $fa1, $fs7 fld.d $fa3, $sp, 248 # 8-byte Folded Reload fmul.d $fa1, $fa1, $fa3 vldi $vr3, -800 @@ -1331,7 +1352,7 @@ main: # @main vld $vr4, $sp, 288 # 16-byte Folded Reload fadd.d $fa0, $fa4, $fa0 fmul.d $fa0, $fa0, $fa2 - fadd.d $fa0, $fa0, $fs0 + fadd.d $fa0, $fa0, $fs6 fld.d $fa2, $sp, 240 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa2 fadd.d $fa0, $fa0, $fa3 @@ -1350,10 +1371,12 @@ main: # @main fadd.d $fa1, $fa1, $fs5 vld $vr2, $sp, 176 # 16-byte Folded Reload vfmul.d $vr2, $vr0, $vr2 - fmul.d $fa3, $fa1, $fs6 + movgr2fr.d $fs0, $s7 + fmul.d $fa3, $fa1, $fs0 vld $vr4, $sp, 160 # 16-byte Folded Reload vfadd.d $vr2, $vr2, $vr4 - fadd.d $fa3, $fa3, $fs7 + movgr2fr.d $fs1, $s8 + fadd.d $fa3, $fa3, $fs1 vreplvei.d $vr4, $vr0, 1 fmul.d $fa5, $fa4, $fa4 vreplvei.d $vr0, $vr0, 0 @@ -1404,7 +1427,7 @@ main: # @main .LBB1_15: # %.split210 # in Loop: Header=BB1_13 Depth=4 vldi $vr0, -784 - fadd.d $fs2, $fa1, $fa0 + fadd.d $fs0, $fa1, $fa0 b .LBB1_18 .p2align 4, , 16 .LBB1_16: # in Loop: Header=BB1_13 Depth=4 @@ -1415,7 +1438,7 @@ main: # @main bceqz $fcc0, .LBB1_23 .LBB1_17: # %.split # in Loop: Header=BB1_13 Depth=4 - fsub.d $fs2, $fa2, $fa0 + fsub.d $fs0, $fa2, $fa0 .LBB1_18: # in Loop: Header=BB1_13 Depth=4 addi.d $a0, $sp, 386 pcaddu18i $ra, %call36(erand48) @@ -1465,7 +1488,7 @@ main: # @main .LBB1_25: # %._crit_edge.us.us.us.us # in Loop: Header=BB1_9 Depth=3 vrepli.b $vr4, 0 - vreplgr2vr.d $vr2, $s7 + vreplgr2vr.d $vr2, $s4 vfcmp.clt.d $vr0, $vr2, $vr3 vldi $vr1, -912 fcmp.clt.d $fcc0, $fa1, $fs3 @@ -1474,23 +1497,23 @@ main: # @main fcmp.clt.d $fcc0, $fs3, $fs2 vst $vr2, $sp, 128 # 16-byte Folded Spill vbitsel.v $vr0, $vr3, $vr2, $vr0 - vreplgr2vr.d $vr2, $s8 + vreplgr2vr.d $vr2, $s5 vst $vr2, $sp, 112 # 16-byte Folded Spill vfmul.d $vr0, $vr0, $vr2 - vld $vr2, $s2, 0 + vld $vr2, $s3, 0 vfcmp.cule.d $vr3, $vr4, $vr3 vand.v $vr0, $vr3, $vr0 - fld.d $fa3, $s0, 0 + fld.d $fa3, $fp, 0 vfadd.d $vr2, $vr0, $vr2 vldi $vr0, -944 fmul.d $fa0, $fa1, $fa0 fsel $fa0, $fa0, $fs2, $fcc0 fadd.d $fa0, $fa0, $fa3 vst $vr2, $sp, 96 # 16-byte Folded Spill - vst $vr2, $s2, 0 + vst $vr2, $s3, 0 fst.d $fa0, $sp, 88 # 8-byte Folded Spill - fst.d $fa0, $s0, 0 - move $s1, $fp + fst.d $fa0, $fp, 0 + move $s1, $s0 b .LBB1_29 .p2align 4, , 16 .LBB1_26: # in Loop: Header=BB1_29 Depth=4 @@ -1506,7 +1529,7 @@ main: # @main fadd.d $fa1, $fs3, $fa1 vldi $vr2, -928 fmul.d $fa1, $fa1, $fa2 - fadd.d $fa1, $fa1, $fs1 + fadd.d $fa1, $fa1, $fs7 fld.d $fa3, $sp, 248 # 8-byte Folded Reload fmul.d $fa1, $fa1, $fa3 vldi $vr3, -800 @@ -1514,7 +1537,7 @@ main: # @main vld $vr4, $sp, 288 # 16-byte Folded Reload fadd.d $fa0, $fa4, $fa0 fmul.d $fa0, $fa0, $fa2 - fadd.d $fa0, $fa0, $fs0 + fadd.d $fa0, $fa0, $fs6 fld.d $fa2, $sp, 240 # 8-byte Folded Reload fdiv.d $fa0, $fa0, $fa2 fadd.d $fa0, $fa0, $fa3 @@ -1533,10 +1556,10 @@ main: # @main fadd.d $fa1, $fa1, $fs5 vld $vr2, $sp, 176 # 16-byte Folded Reload vfmul.d $vr2, $vr0, $vr2 - fmul.d $fa3, $fa1, $fs6 + fmul.d $fa3, $fa1, $fs0 vld $vr4, $sp, 160 # 16-byte Folded Reload vfadd.d $vr2, $vr2, $vr4 - fadd.d $fa3, $fa3, $fs7 + fadd.d $fa3, $fa3, $fs1 vreplvei.d $vr4, $vr0, 1 fmul.d $fa5, $fa4, $fa4 vreplvei.d $vr0, $vr0, 0 diff --git a/results/SingleSource/Benchmarks/Stanford/CMakeFiles/Oscar.dir/Oscar.s b/results/SingleSource/Benchmarks/Stanford/CMakeFiles/Oscar.dir/Oscar.s index 38b500e3..711a250f 100644 --- a/results/SingleSource/Benchmarks/Stanford/CMakeFiles/Oscar.dir/Oscar.s +++ b/results/SingleSource/Benchmarks/Stanford/CMakeFiles/Oscar.dir/Oscar.s @@ -31,16 +31,7 @@ Rand: # @Rand .Lfunc_end1: .size Rand, .Lfunc_end1-Rand # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function Cos -.LCPI2_0: - .word 0xc4340000 # float -720 -.LCPI2_1: - .word 0x471d8000 # float 40320 -.LCPI2_2: - .word 0xca5d7c00 # float -3628800 - .text - .globl Cos + .globl Cos # -- Begin function Cos .p2align 5 .type Cos,@function Cos: # @Cos @@ -54,24 +45,27 @@ Cos: # @Cos fmul.s $fa1, $fa0, $fa1 vldi $vr3, -1224 fdiv.s $fa3, $fa1, $fa3 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.s $fa4, $a0, %pc_lo12(.LCPI2_0) fadd.s $fa2, $fa2, $fa3 fmul.s $fa1, $fa0, $fa1 fmul.s $fa1, $fa0, $fa1 - fdiv.s $fa3, $fa1, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI2_1) - fld.s $fa4, $a0, %pc_lo12(.LCPI2_1) + lu12i.w $a0, -244928 + lu32i.d $a0, 0 + movgr2fr.w $fa3, $a0 + fdiv.s $fa3, $fa1, $fa3 fadd.s $fa2, $fa2, $fa3 fmul.s $fa1, $fa0, $fa1 fmul.s $fa1, $fa0, $fa1 - fdiv.s $fa3, $fa1, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI2_2) - fld.s $fa4, $a0, %pc_lo12(.LCPI2_2) + lu12i.w $a0, 291288 + movgr2fr.w $fa3, $a0 + fdiv.s $fa3, $fa1, $fa3 fadd.s $fa2, $fa2, $fa3 fmul.s $fa1, $fa0, $fa1 fmul.s $fa0, $fa0, $fa1 - fdiv.s $fa0, $fa0, $fa4 + lu12i.w $a0, -219689 + ori $a0, $a0, 3072 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 + fdiv.s $fa0, $fa0, $fa1 fadd.s $fa0, $fa2, $fa0 ret .Lfunc_end2: @@ -153,12 +147,7 @@ Printcomplex: # @Printcomplex .Lfunc_end4: .size Printcomplex, .Lfunc_end4-Printcomplex # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function Uniform11 -.LCPI5_0: - .word 0x39000000 # float 1.22070313E-4 - .text - .globl Uniform11 + .globl Uniform11 # -- Begin function Uniform11 .p2align 5 .type Uniform11,@function Uniform11: # @Uniform11 @@ -169,29 +158,18 @@ Uniform11: # @Uniform11 mul.d $a2, $a2, $a3 addi.d $a2, $a2, 1731 bstrpick.d $a2, $a2, 12, 0 - pcalau12i $a3, %pc_hi20(.LCPI5_0) - fld.s $fa0, $a3, %pc_lo12(.LCPI5_0) st.w $a2, $a0, 0 - movgr2fr.w $fa1, $a2 - ffint.s.w $fa1, $fa1 - fmul.s $fa0, $fa1, $fa0 + movgr2fr.w $fa0, $a2 + ffint.s.w $fa0, $fa0 + lu12i.w $a0, 233472 + movgr2fr.w $fa1, $a0 + fmul.s $fa0, $fa0, $fa1 fst.s $fa0, $a1, 0 ret .Lfunc_end5: .size Uniform11, .Lfunc_end5-Uniform11 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function Exptab -.LCPI6_0: - .word 0x40490fdb # float 3.14159274 -.LCPI6_1: - .word 0xc4340000 # float -720 -.LCPI6_2: - .word 0x471d8000 # float 40320 -.LCPI6_3: - .word 0xca5d7c00 # float -3628800 - .text - .globl Exptab + .globl Exptab # -- Begin function Exptab .p2align 5 .type Exptab,@function Exptab: # @Exptab @@ -199,17 +177,21 @@ Exptab: # @Exptab addi.d $sp, $sp, -112 ori $a2, $zero, 4 vldi $vr0, -1264 - pcalau12i $a3, %pc_hi20(.LCPI6_0) - fld.s $fa1, $a3, %pc_lo12(.LCPI6_0) + lu12i.w $a3, 263312 + ori $a3, $a3, 4059 + movgr2fr.w $fa1, $a3 vldi $vr2, -1056 vldi $vr3, -1168 - pcalau12i $a3, %pc_hi20(.LCPI6_1) - fld.s $fa4, $a3, %pc_lo12(.LCPI6_1) - pcalau12i $a3, %pc_hi20(.LCPI6_2) - fld.s $fa5, $a3, %pc_lo12(.LCPI6_2) - pcalau12i $a3, %pc_hi20(.LCPI6_3) - fld.s $fa6, $a3, %pc_lo12(.LCPI6_3) - vldi $vr7, -1224 + vldi $vr4, -1224 + lu12i.w $a3, -244928 + lu32i.d $a3, 0 + movgr2fr.w $fa5, $a3 + lu12i.w $a3, 291288 + movgr2fr.w $fa6, $a3 + lu12i.w $a3, -219689 + ori $a3, $a3, 3072 + lu32i.d $a3, 0 + movgr2fr.w $fa7, $a3 addi.d $a3, $sp, 8 ori $a4, $zero, 104 .p2align 4, , 16 @@ -220,19 +202,19 @@ Exptab: # @Exptab fadd.s $ft2, $ft2, $fa3 fmul.s $ft1, $ft0, $ft1 fmul.s $ft1, $ft0, $ft1 - fdiv.s $ft3, $ft1, $fa7 + fdiv.s $ft3, $ft1, $fa4 fadd.s $ft2, $ft2, $ft3 fmul.s $ft1, $ft0, $ft1 fmul.s $ft1, $ft0, $ft1 - fdiv.s $ft3, $ft1, $fa4 + fdiv.s $ft3, $ft1, $fa5 fadd.s $ft2, $ft2, $ft3 fmul.s $ft1, $ft0, $ft1 fmul.s $ft1, $ft0, $ft1 - fdiv.s $ft3, $ft1, $fa5 + fdiv.s $ft3, $ft1, $fa6 fadd.s $ft2, $ft2, $ft3 fmul.s $ft1, $ft0, $ft1 fmul.s $ft0, $ft0, $ft1 - fdiv.s $ft0, $ft0, $fa6 + fdiv.s $ft0, $ft0, $fa7 fadd.s $ft0, $ft2, $ft0 fadd.s $ft0, $ft0, $ft0 frecip.s $ft0, $ft0 @@ -489,14 +471,7 @@ Fft: # @Fft .Lfunc_end7: .size Fft, .Lfunc_end7-Fft # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function Oscar -.LCPI8_0: - .word 0x39000000 # float 1.22070313E-4 -.LCPI8_1: - .word 0x3d800000 # float 0.0625 - .text - .globl Oscar + .globl Oscar # -- Begin function Oscar .p2align 5 .type Oscar,@function Oscar: # @Oscar @@ -514,9 +489,9 @@ Oscar: # @Oscar jirl $ra, $ra, 0 lu12i.w $a0, 1 ori $a1, $a0, 1671 - pcalau12i $a2, %pc_hi20(.LCPI8_0) - fld.s $fa0, $a2, %pc_lo12(.LCPI8_0) ori $a0, $a0, 759 + lu12i.w $a2, 233472 + movgr2fr.w $fa0, $a2 vldi $vr1, -1116 vldi $vr2, -1228 pcalau12i $a2, %pc_hi20(z) @@ -555,8 +530,8 @@ Oscar: # @Oscar addi.d $s0, $a0, %pc_lo12(w) pcalau12i $a0, %pc_hi20(e) addi.d $s1, $a0, %pc_lo12(e) - pcalau12i $a0, %pc_hi20(.LCPI8_1) - fld.s $fs0, $a0, %pc_lo12(.LCPI8_1) + lu12i.w $a0, 251904 + movgr2fr.w $fs0, $a0 ori $a0, $zero, 256 move $a1, $fp move $a2, $s0 diff --git a/results/SingleSource/Regression/C/CMakeFiles/Regression-C-casts.dir/casts.s b/results/SingleSource/Regression/C/CMakeFiles/Regression-C-casts.dir/casts.s index cf83de84..4117e0da 100644 --- a/results/SingleSource/Regression/C/CMakeFiles/Regression-C-casts.dir/casts.s +++ b/results/SingleSource/Regression/C/CMakeFiles/Regression-C-casts.dir/casts.s @@ -1,14 +1,6 @@ .file "casts.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI0_0: - .word 0x5f000000 # float 9.22337203E+18 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -218,32 +210,32 @@ main: # @main move $a1, $zero pcaddu18i $ra, %call36(strtod) jirl $ra, $ra, 0 - fcvt.s.d $fa2, $fa0 + fcvt.s.d $fa1, $fa0 ori $a0, $zero, 5 bge $s0, $a0, .LBB0_10 .LBB0_8: vldi $vr0, -1024 b .LBB0_11 .LBB0_9: - vldi $vr2, -1168 + vldi $vr1, -1168 ori $a0, $zero, 5 blt $s0, $a0, .LBB0_8 .LBB0_10: ld.d $a0, $fp, 32 move $a1, $zero - vst $vr2, $sp, 16 # 16-byte Folded Spill + vst $vr1, $sp, 16 # 16-byte Folded Spill pcaddu18i $ra, %call36(strtod) jirl $ra, $ra, 0 - vld $vr2, $sp, 16 # 16-byte Folded Reload + vld $vr1, $sp, 16 # 16-byte Folded Reload # kill: def $f0_64 killed $f0_64 def $vr0 .LBB0_11: vst $vr0, $sp, 32 # 16-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fa0, $a0, %pc_lo12(.LCPI0_0) - ftintrz.l.s $fa1, $fa2 - movfr2gr.d $s0, $fa1 - fcmp.clt.s $fcc0, $fa2, $fa0 - fsub.s $fa0, $fa2, $fa0 + ftintrz.l.s $fa0, $fa1 + movfr2gr.d $s0, $fa0 + lu12i.w $a0, 389120 + movgr2fr.w $fa0, $a0 + fcmp.clt.s $fcc0, $fa1, $fa0 + fsub.s $fa0, $fa1, $fa0 ftintrz.l.s $fa0, $fa0 movfr2gr.d $a0, $fa0 lu52i.d $fp, $zero, -2048 @@ -252,9 +244,9 @@ main: # @main masknez $a0, $a0, $a1 maskeqz $a1, $s0, $a1 or $s1, $a1, $a0 - ftintrz.w.s $fa0, $fa2 + ftintrz.w.s $fa0, $fa1 movfr2gr.s $s2, $fa0 - fcvt.d.s $fa0, $fa2 + fcvt.d.s $fa0, $fa1 movfr2gr.d $a1, $fa0 pcalau12i $a0, %pc_hi20(.L.str.24) addi.d $a0, $a0, %pc_lo12(.L.str.24) @@ -290,8 +282,8 @@ main: # @main move $a2, $s1 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_1) + lu52i.d $s0, $zero, 1086 + movgr2fr.d $fa0, $s0 vld $vr1, $sp, 32 # 16-byte Folded Reload fcmp.clt.d $fcc0, $fa1, $fa0 fsub.d $fa0, $fa1, $fa0 @@ -301,11 +293,11 @@ main: # @main movcf2gr $a1, $fcc0 masknez $a0, $a0, $a1 ftintrz.l.d $fa0, $fa1 - movfr2gr.d $s1, $fa0 - maskeqz $a1, $s1, $a1 - or $s0, $a1, $a0 + movfr2gr.d $s2, $fa0 + maskeqz $a1, $s2, $a1 + or $s1, $a1, $a0 ftintrz.w.d $fa0, $fa1 - movfr2gr.s $s2, $fa0 + movfr2gr.s $s3, $fa0 movfr2gr.d $a1, $fa1 pcalau12i $a0, %pc_hi20(.L.str.30) addi.d $a0, $a0, %pc_lo12(.L.str.30) @@ -313,29 +305,29 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.31) addi.d $a0, $a0, %pc_lo12(.L.str.31) - move $a1, $s1 - move $a2, $s1 + move $a1, $s2 + move $a2, $s2 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.32) addi.d $a0, $a0, %pc_lo12(.L.str.32) - move $a1, $s2 - move $a2, $s2 + move $a1, $s3 + move $a2, $s3 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.33) addi.d $a0, $a0, %pc_lo12(.L.str.33) - move $a1, $s1 - move $a2, $s1 + move $a1, $s2 + move $a2, $s2 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.34) addi.d $a0, $a0, %pc_lo12(.L.str.34) - move $a1, $s1 - move $a2, $s1 + move $a1, $s2 + move $a2, $s2 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - addi.w $a1, $s1, 0 + addi.w $a1, $s2, 0 pcalau12i $a0, %pc_hi20(.L.str.35) addi.d $a0, $a0, %pc_lo12(.L.str.35) move $a2, $a1 @@ -343,71 +335,71 @@ main: # @main jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.36) addi.d $a0, $a0, %pc_lo12(.L.str.36) - move $a1, $s0 - move $a2, $s0 + move $a1, $s1 + move $a2, $s1 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.37) - addi.d $s0, $a0, %pc_lo12(.L.str.37) + addi.d $s1, $a0, %pc_lo12(.L.str.37) ori $a0, $zero, 0 lu32i.d $a0, -81920 - lu52i.d $s2, $a0, 1029 + lu52i.d $s3, $a0, 1029 ori $a1, $zero, 123 - ori $s4, $zero, 0 - move $a0, $s0 - move $a2, $s2 + ori $s5, $zero, 0 + move $a0, $s1 + move $a2, $s3 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 pcalau12i $a0, %pc_hi20(.L.str.38) - addi.d $s1, $a0, %pc_lo12(.L.str.38) + addi.d $s2, $a0, %pc_lo12(.L.str.38) ori $a1, $zero, 123 - move $a0, $s1 - move $a2, $s2 + move $a0, $s2 + move $a2, $s3 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - addi.w $s2, $zero, -1 + addi.w $s3, $zero, -1 lu52i.d $a2, $zero, -1025 - move $a0, $s0 - move $a1, $s2 - pcaddu18i $ra, %call36(printf) - jirl $ra, $ra, 0 - lu52i.d $s3, $zero, 1087 move $a0, $s1 - move $a1, $s2 - move $a2, $s3 + move $a1, $s3 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - lu32i.d $s4, -262144 - lu52i.d $a2, $s4, -1022 - addi.w $s2, $zero, -14 - move $a0, $s0 - move $a1, $s2 + lu52i.d $s4, $zero, 1087 + move $a0, $s2 + move $a1, $s3 + move $a2, $s4 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 + lu32i.d $s5, -262144 + lu52i.d $a2, $s5, -1022 + addi.w $s3, $zero, -14 move $a0, $s1 - move $a1, $s2 - move $a2, $s3 + move $a1, $s3 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - lu52i.d $s2, $s4, 1026 - ori $a1, $zero, 14 - move $a0, $s0 - move $a2, $s2 + move $a0, $s2 + move $a1, $s3 + move $a2, $s4 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 + lu52i.d $s3, $s5, 1026 ori $a1, $zero, 14 move $a0, $s1 - move $a2, $s2 + move $a2, $s3 + pcaddu18i $ra, %call36(printf) + jirl $ra, $ra, 0 + ori $a1, $zero, 14 + move $a0, $s2 + move $a2, $s3 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 lu52i.d $a2, $zero, -962 - move $a0, $s0 + move $a0, $s1 move $a1, $fp pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - lu52i.d $a2, $zero, 1086 - move $a0, $s1 + move $a0, $s2 move $a1, $fp + move $a2, $s0 pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 move $a0, $zero diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-20060420-1.dir/20060420-1.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-20060420-1.dir/20060420-1.s index 5f7b8a61..9fa59cd1 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-20060420-1.dir/20060420-1.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-20060420-1.dir/20060420-1.s @@ -272,36 +272,6 @@ foo: # @foo .word 0x43290000 # float 169 .word 0x43360000 # float 182 .word 0x43430000 # float 195 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI1_8: - .word 0x42480000 # float 50 -.LCPI1_9: - .word 0x42960000 # float 75 -.LCPI1_10: - .word 0x42c80000 # float 100 -.LCPI1_11: - .word 0x42fa0000 # float 125 -.LCPI1_12: - .word 0x43160000 # float 150 -.LCPI1_13: - .word 0x432f0000 # float 175 -.LCPI1_14: - .word 0x43480000 # float 200 -.LCPI1_15: - .word 0x43610000 # float 225 -.LCPI1_16: - .word 0x437a0000 # float 250 -.LCPI1_17: - .word 0x43898000 # float 275 -.LCPI1_18: - .word 0x43960000 # float 300 -.LCPI1_19: - .word 0x43a28000 # float 325 -.LCPI1_20: - .word 0x43af0000 # float 350 -.LCPI1_21: - .word 0x43bb8000 # float 375 .text .globl main .p2align 5 @@ -363,86 +333,86 @@ main: # @main bcnez $fcc0, .LBB1_17 # %bb.2: fld.s $fa0, $fp, 8 - pcalau12i $a0, %pc_hi20(.LCPI1_8) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_8) + lu12i.w $a0, 271488 + movgr2fr.w $fa1, $a0 fcmp.cune.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_17 # %bb.3: fld.s $fa0, $fp, 12 - pcalau12i $a0, %pc_hi20(.LCPI1_9) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_9) + lu12i.w $a0, 272736 + movgr2fr.w $fa1, $a0 fcmp.cune.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_17 # %bb.4: fld.s $fa0, $fp, 16 - pcalau12i $a0, %pc_hi20(.LCPI1_10) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_10) + lu12i.w $a0, 273536 + movgr2fr.w $fa1, $a0 fcmp.cune.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_17 # %bb.5: fld.s $fa0, $fp, 20 - pcalau12i $a0, %pc_hi20(.LCPI1_11) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_11) + lu12i.w $a0, 274336 + movgr2fr.w $fa1, $a0 fcmp.cune.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_17 # %bb.6: fld.s $fa0, $fp, 24 - pcalau12i $a0, %pc_hi20(.LCPI1_12) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_12) + lu12i.w $a0, 274784 + movgr2fr.w $fa1, $a0 fcmp.cune.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_17 # %bb.7: fld.s $fa0, $fp, 28 - pcalau12i $a0, %pc_hi20(.LCPI1_13) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_13) + lu12i.w $a0, 275184 + movgr2fr.w $fa1, $a0 fcmp.cune.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_17 # %bb.8: fld.s $fa0, $fp, 32 - pcalau12i $a0, %pc_hi20(.LCPI1_14) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_14) + lu12i.w $a0, 275584 + movgr2fr.w $fa1, $a0 fcmp.cune.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_17 # %bb.9: fld.s $fa0, $fp, 36 - pcalau12i $a0, %pc_hi20(.LCPI1_15) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_15) + lu12i.w $a0, 275984 + movgr2fr.w $fa1, $a0 fcmp.cune.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_17 # %bb.10: fld.s $fa0, $fp, 40 - pcalau12i $a0, %pc_hi20(.LCPI1_16) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_16) + lu12i.w $a0, 276384 + movgr2fr.w $fa1, $a0 fcmp.cune.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_17 # %bb.11: fld.s $fa0, $fp, 44 - pcalau12i $a0, %pc_hi20(.LCPI1_17) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_17) + lu12i.w $a0, 276632 + movgr2fr.w $fa1, $a0 fcmp.cune.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_17 # %bb.12: fld.s $fa0, $fp, 48 - pcalau12i $a0, %pc_hi20(.LCPI1_18) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_18) + lu12i.w $a0, 276832 + movgr2fr.w $fa1, $a0 fcmp.cune.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_17 # %bb.13: fld.s $fa0, $fp, 52 - pcalau12i $a0, %pc_hi20(.LCPI1_19) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_19) + lu12i.w $a0, 277032 + movgr2fr.w $fa1, $a0 fcmp.cune.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_17 # %bb.14: fld.s $fa0, $fp, 56 - pcalau12i $a0, %pc_hi20(.LCPI1_20) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_20) + lu12i.w $a0, 277232 + movgr2fr.w $fa1, $a0 fcmp.cune.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_17 # %bb.15: fld.s $fa0, $fp, 60 - pcalau12i $a0, %pc_hi20(.LCPI1_21) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_21) + lu12i.w $a0, 277432 + movgr2fr.w $fa1, $a0 fcmp.cune.s $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_17 # %bb.16: diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-20071030-1.dir/20071030-1.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-20071030-1.dir/20071030-1.s index 66384829..86fe6b53 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-20071030-1.dir/20071030-1.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-20071030-1.dir/20071030-1.s @@ -1,10 +1,6 @@ .file "20071030-1.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function CalcPing -.LCPI0_0: - .word 0x447a0000 # float 1000 .text - .globl CalcPing + .globl CalcPing # -- Begin function CalcPing .p2align 5 .type CalcPing,@function CalcPing: # @CalcPing @@ -36,11 +32,11 @@ CalcPing: # @CalcPing beqz $a1, .LBB0_6 # %bb.5: bstrpick.d $a0, $a1, 31, 0 - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI0_0) - movgr2fr.d $fa2, $a0 - ffint.s.l $fa2, $fa2 - fdiv.s $fa0, $fa0, $fa2 + movgr2fr.d $fa1, $a0 + ffint.s.l $fa1, $fa1 + fdiv.s $fa0, $fa0, $fa1 + lu12i.w $a0, 280480 + movgr2fr.w $fa1, $a0 fmul.s $fa0, $fa0, $fa1 ftintrz.w.s $fa0, $fa0 movfr2gr.s $a0, $fa0 @@ -52,12 +48,7 @@ CalcPing: # @CalcPing .Lfunc_end0: .size CalcPing, .Lfunc_end0-CalcPing # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI1_0: - .word 0x447a0000 # float 1000 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -92,11 +83,11 @@ main: # @main beqz $a0, .LBB1_5 # %bb.3: # %CalcPing.exit bstrpick.d $a0, $a0, 31, 0 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI1_0) - movgr2fr.d $fa2, $a0 - ffint.s.l $fa2, $fa2 - fdiv.s $fa0, $fa0, $fa2 + movgr2fr.d $fa1, $a0 + ffint.s.l $fa1, $fa1 + fdiv.s $fa0, $fa0, $fa1 + lu12i.w $a0, 280480 + movgr2fr.w $fa1, $a0 fmul.s $fa0, $fa0, $fa1 ftintrz.w.s $fa0, $fa0 movfr2gr.s $a0, $fa0 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-930603-1.dir/930603-1.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-930603-1.dir/930603-1.s index 9b57e0d9..1877f5ff 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-930603-1.dir/930603-1.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-930603-1.dir/930603-1.s @@ -1,18 +1,17 @@ .file "930603-1.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fx -.LCPI0_0: - .dword 0x40026bb1bbb58975 # double 2.3025850929999998 .text - .globl fx + .globl fx # -- Begin function fx .p2align 5 .type fx,@function fx: # @fx # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) fcvt.s.d $fa0, $fa0 fcvt.d.s $fa0, $fa0 + lu12i.w $a0, -279720 + ori $a0, $a0, 2421 + lu32i.d $a0, 158641 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 vldi $vr1, -1016 fdiv.d $fa0, $fa1, $fa0 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-930702-1.dir/930702-1.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-930702-1.dir/930702-1.s index 71c9ce64..bf34c8e7 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-930702-1.dir/930702-1.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-930702-1.dir/930702-1.s @@ -1,16 +1,14 @@ .file "930702-1.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fp -.LCPI0_0: - .dword 0x4040800000000000 # double 33 .text - .globl fp + .globl fp # -- Begin function fp .p2align 5 .type fp,@function fp: # @fp # %bb.0: - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_0) + ori $a1, $zero, 0 + lu32i.d $a1, 32768 + lu52i.d $a1, $a1, 1028 + movgr2fr.d $fa1, $a1 fcmp.cune.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB0_3 # %bb.1: diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-980709-1.dir/980709-1.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-980709-1.dir/980709-1.s index 7a3e4baf..85b94a7b 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-980709-1.dir/980709-1.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-980709-1.dir/980709-1.s @@ -1,16 +1,6 @@ .file "980709-1.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3fd5555555555555 # double 0.33333333333333331 -.LCPI0_1: - .dword 0x3fb999999999999a # double 0.10000000000000001 -.LCPI0_2: - .dword 0xbfb999999999999a # double -0.10000000000000001 -.LCPI0_3: - .dword 0x400965fe974a3401 # double 3.1748020000000001 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -20,20 +10,29 @@ main: # @main lu52i.d $a0, $zero, 1028 st.d $a0, $sp, 0 fld.d $fa0, $sp, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 349525 + ori $a0, $a0, 1365 + lu32i.d $a0, 349525 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa1, $a0 pcaddu18i $ra, %call36(pow) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_3) - fadd.d $fa2, $fa0, $fa2 + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, -419431 + lu52i.d $a1, $a0, 1019 + movgr2fr.d $fa1, $a1 + fadd.d $fa2, $fa0, $fa1 + lu12i.w $a1, -428893 + ori $a1, $a1, 1025 + lu32i.d $a1, -432642 + lu52i.d $a1, $a1, 1024 + movgr2fr.d $fa1, $a1 fcmp.cule.d $fcc0, $fa2, $fa1 bcnez $fcc0, .LBB0_3 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_2) + lu52i.d $a0, $a0, -1029 + movgr2fr.d $fa2, $a0 fadd.d $fa0, $fa0, $fa2 fcmp.cule.d $fcc0, $fa1, $fa0 bcnez $fcc0, .LBB0_3 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-990127-2.dir/990127-2.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-990127-2.dir/990127-2.s index 32773233..b45357ee 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-990127-2.dir/990127-2.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-990127-2.dir/990127-2.s @@ -15,25 +15,23 @@ fpEq: # @fpEq .Lfunc_end0: .size fpEq, .Lfunc_end0-fpEq # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function fpTest -.LCPI1_0: - .dword 0x4059000000000000 # double 100 -.LCPI1_1: - .dword 0x4053d55555555556 # double 79.333333333333343 - .text - .globl fpTest + .globl fpTest # -- Begin function fpTest .p2align 5 .type fpTest,@function fpTest: # @fpTest # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI1_0) - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fa3, $a0, %pc_lo12(.LCPI1_1) + ori $a0, $zero, 0 + lu32i.d $a0, -458752 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa2, $a0 fmul.d $fa0, $fa0, $fa2 fdiv.d $fa0, $fa0, $fa1 - fcmp.ceq.d $fcc0, $fa0, $fa3 + lu12i.w $a0, 349525 + ori $a0, $a0, 1366 + lu32i.d $a0, 251221 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa1, $a0 + fcmp.ceq.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB1_2 # %bb.1: # %fpEq.exit ret diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-align-2.dir/align-2.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-align-2.dir/align-2.s index b861df5b..5ad6cb62 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-align-2.dir/align-2.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-align-2.dir/align-2.s @@ -1,20 +1,13 @@ .file "align-2.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI0_0: - .word 0x42000000 # float 32 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI0_1: - .dword 0x4041000000000000 # double 34 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main # %bb.0: addi.d $sp, $sp, -16 st.d $ra, $sp, 8 # 8-byte Folded Spill + st.d $fp, $sp, 0 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(s_c_s) addi.d $a0, $a0, %pc_lo12(s_c_s) ld.bu $a1, $a0, 0 @@ -163,8 +156,8 @@ main: # @main pcalau12i $a0, %pc_hi20(s_f_ld) addi.d $a1, $a0, %pc_lo12(s_f_ld) fld.s $fa0, $a1, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 270336 + movgr2fr.w $fa1, $a0 fcmp.ceq.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB0_29 # %bb.25: @@ -173,6 +166,7 @@ main: # @main ori $a2, $zero, 0 lu32i.d $a2, 264192 lu52i.d $a3, $a2, 1024 + ori $fp, $zero, 0 move $a2, $zero pcaddu18i $ra, %call36(__eqtf2) jirl $ra, $ra, 0 @@ -181,8 +175,9 @@ main: # @main pcalau12i $a0, %pc_hi20(s_d_ld) addi.d $a1, $a0, %pc_lo12(s_d_ld) fld.d $fa0, $a1, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_1) + lu32i.d $fp, 65536 + lu52i.d $a0, $fp, 1028 + movgr2fr.d $fa1, $a0 fcmp.ceq.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB0_29 # %bb.27: @@ -197,6 +192,7 @@ main: # @main bnez $a0, .LBB0_29 # %bb.28: move $a0, $zero + ld.d $fp, $sp, 0 # 8-byte Folded Reload ld.d $ra, $sp, 8 # 8-byte Folded Reload addi.d $sp, $sp, 16 ret diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-cbrt.dir/cbrt.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-cbrt.dir/cbrt.s index 52dae574..04d97ffa 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-cbrt.dir/cbrt.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-cbrt.dir/cbrt.s @@ -1,22 +1,6 @@ .file "cbrt.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function cbrtl -.LCPI0_0: - .dword 0x4350000000000000 # double 18014398509481984 -.LCPI0_1: - .dword 0x3fe15f15f15f15f1 # double 0.54285714285714282 -.LCPI0_2: - .dword 0x3ff6a0ea0ea0ea0f # double 1.4142857142857144 -.LCPI0_3: - .dword 0xbfe691de2532c834 # double -0.70530612244897961 -.LCPI0_4: - .dword 0x3ff9b6db6db6db6e # double 1.6071428571428572 -.LCPI0_5: - .dword 0x3fd6db6db6db6db7 # double 0.35714285714285715 -.LCPI0_6: - .dword 0xbfd5555555555555 # double -0.33333333333333331 .text - .globl cbrtl + .globl cbrtl # -- Begin function cbrtl .p2align 5 .type cbrtl,@function cbrtl: # @cbrtl @@ -38,8 +22,8 @@ cbrtl: # @cbrtl fabs.d $fa1, $fa0 bnez $a2, .LBB0_5 # %bb.4: - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a1, %pc_lo12(.LCPI0_0) + lu52i.d $a1, $zero, 1077 + movgr2fr.d $fa2, $a1 fmul.d $fa2, $fa0, $fa2 movfr2gr.d $a1, $fa2 srli.d $a2, $a1, 32 @@ -70,24 +54,39 @@ cbrtl: # @cbrtl .LBB0_6: add.d $a1, $a1, $a2 movgr2fr.d $fa2, $a1 - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.d $fa3, $a1, %pc_lo12(.LCPI0_1) - pcalau12i $a1, %pc_hi20(.LCPI0_2) - fld.d $fa4, $a1, %pc_lo12(.LCPI0_2) - fmul.d $fa5, $fa2, $fa2 - fdiv.d $fa5, $fa5, $fa1 - fmadd.d $fa3, $fa5, $fa2, $fa3 + fmul.d $fa3, $fa2, $fa2 + fdiv.d $fa3, $fa3, $fa1 + lu12i.w $a1, -59919 + ori $a1, $a1, 1521 + lu32i.d $a1, 89877 + lu52i.d $a1, $a1, 1022 + movgr2fr.d $fa4, $a1 + fmadd.d $fa3, $fa3, $fa2, $fa4 + lu12i.w $a1, 59918 + ori $a1, $a1, 2575 + lu32i.d $a1, 434410 + lu52i.d $a1, $a1, 1023 + movgr2fr.d $fa4, $a1 fadd.d $fa4, $fa3, $fa4 - pcalau12i $a1, %pc_hi20(.LCPI0_3) - fld.d $fa5, $a1, %pc_lo12(.LCPI0_3) - pcalau12i $a1, %pc_hi20(.LCPI0_4) - fld.d $fa6, $a1, %pc_lo12(.LCPI0_4) - pcalau12i $a1, %pc_hi20(.LCPI0_5) - fld.d $fa7, $a1, %pc_lo12(.LCPI0_5) + lu12i.w $a1, 152364 + ori $a1, $a1, 2100 + lu32i.d $a1, 430558 + lu52i.d $a1, $a1, -1026 + movgr2fr.d $fa5, $a1 fdiv.d $fa3, $fa5, $fa3 fadd.d $fa3, $fa4, $fa3 - fdiv.d $fa3, $fa6, $fa3 - fadd.d $fa3, $fa3, $fa7 + lu12i.w $a1, 449389 + ori $a1, $a1, 2926 + lu32i.d $a1, -411941 + lu52i.d $a1, $a1, 1023 + movgr2fr.d $fa4, $a1 + fdiv.d $fa3, $fa4, $fa3 + lu12i.w $a1, -299594 + ori $a1, $a1, 3511 + lu32i.d $a1, 449389 + lu52i.d $a1, $a1, 1021 + movgr2fr.d $fa4, $a1 + fadd.d $fa3, $fa3, $fa4 fmul.d $fa2, $fa3, $fa2 movfr2gr.d $a1, $fa2 bstrins.d $a1, $zero, 31, 0 @@ -106,11 +105,14 @@ cbrtl: # @cbrtl bstrins.d $a0, $zero, 62, 0 or $a0, $a0, $a1 movgr2fr.d $fa1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI0_6) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_6) - fmul.d $fa3, $fa1, $fa1 - fdiv.d $fa0, $fa0, $fa3 + fmul.d $fa2, $fa1, $fa1 + fdiv.d $fa0, $fa0, $fa2 fsub.d $fa0, $fa1, $fa0 + lu12i.w $a0, 349525 + ori $a0, $a0, 1365 + lu32i.d $a0, 349525 + lu52i.d $a0, $a0, -1027 + movgr2fr.d $fa2, $a0 fmadd.d $fa0, $fa0, $fa2, $fa1 .LBB0_7: ret diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-conversion.dir/conversion.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-conversion.dir/conversion.s index d93f40c2..c670e35c 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-conversion.dir/conversion.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-conversion.dir/conversion.s @@ -76,12 +76,7 @@ s2ld: # @s2ld .Lfunc_end5: .size s2ld, .Lfunc_end5-s2ld # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function fnear -.LCPI6_0: - .word 0x49742400 # float 1.0E+6 - .text - .globl fnear + .globl fnear # -- Begin function fnear .p2align 5 .type fnear,@function fnear: # @fnear @@ -94,21 +89,17 @@ fnear: # @fnear # %bb.1: ret .LBB6_2: - pcalau12i $a0, %pc_hi20(.LCPI6_0) - fld.s $fa2, $a0, %pc_lo12(.LCPI6_0) fdiv.s $fa0, $fa0, $fa1 - fcmp.clt.s $fcc0, $fa2, $fa0 + lu12i.w $a0, 300866 + ori $a0, $a0, 1024 + movgr2fr.w $fa1, $a0 + fcmp.clt.s $fcc0, $fa1, $fa0 movcf2gr $a0, $fcc0 ret .Lfunc_end6: .size fnear, .Lfunc_end6-fnear # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function dnear -.LCPI7_0: - .dword 0x42d6bcc41e900000 # double 1.0E+14 - .text - .globl dnear + .globl dnear # -- Begin function dnear .p2align 5 .type dnear,@function dnear: # @dnear @@ -121,10 +112,12 @@ dnear: # @dnear # %bb.1: ret .LBB7_2: - pcalau12i $a0, %pc_hi20(.LCPI7_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI7_0) fdiv.d $fa0, $fa0, $fa1 - fcmp.clt.d $fcc0, $fa2, $fa0 + lu12i.w $a0, 125184 + lu32i.d $a0, 441540 + lu52i.d $a0, $a0, 1069 + movgr2fr.d $fa1, $a0 + fcmp.clt.d $fcc0, $fa1, $fa0 movcf2gr $a0, $fcc0 ret .Lfunc_end7: @@ -210,23 +203,19 @@ ull2f: # @ull2f .Lfunc_end10: .size ull2f, .Lfunc_end10-ull2f # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function ull2d -.LCPI11_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 - .text - .globl ull2d + .globl ull2d # -- Begin function ull2d .p2align 5 .type ull2d,@function ull2d: # @ull2d # %bb.0: srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI11_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI11_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa1, $a0 @@ -384,18 +373,13 @@ test_float_to_integer: # @test_float_to_integer .Lfunc_end23: .size test_float_to_integer, .Lfunc_end23-test_float_to_integer # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function f2ull -.LCPI24_0: - .word 0x5f000000 # float 9.22337203E+18 - .text - .globl f2ull + .globl f2ull # -- Begin function f2ull .p2align 5 .type f2ull,@function f2ull: # @f2ull # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI24_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI24_0) + lu12i.w $a0, 389120 + movgr2fr.w $fa1, $a0 fcmp.clt.s $fcc0, $fa0, $fa1 fsub.s $fa1, $fa0, $fa1 ftintrz.l.s $fa1, $fa1 @@ -412,18 +396,13 @@ f2ull: # @f2ull .Lfunc_end24: .size f2ull, .Lfunc_end24-f2ull # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function d2ull -.LCPI25_0: - .dword 0x43e0000000000000 # double 9.2233720368547758E+18 - .text - .globl d2ull + .globl d2ull # -- Begin function d2ull .p2align 5 .type d2ull,@function d2ull: # @d2ull # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI25_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI25_0) + lu52i.d $a0, $zero, 1086 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 fsub.d $fa1, $fa0, $fa1 ftintrz.l.d $fa1, $fa1 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-floatunsisf-1.dir/floatunsisf-1.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-floatunsisf-1.dir/floatunsisf-1.s index 6ca3d05c..78851a0b 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-floatunsisf-1.dir/floatunsisf-1.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-floatunsisf-1.dir/floatunsisf-1.s @@ -1,10 +1,6 @@ .file "floatunsisf-1.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI0_0: - .word 0x4f000001 # float 2.1474839E+9 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -15,12 +11,13 @@ main: # @main ld.wu $a0, $a0, %pc_lo12(u) movgr2fr.d $fa0, $a0 ffint.s.l $fa0, $fa0 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_0) pcalau12i $a0, %pc_hi20(f1) fst.s $fa0, $a0, %pc_lo12(f1) pcalau12i $a1, %pc_hi20(f2) - fst.s $fa1, $a1, %pc_lo12(f2) + lu12i.w $a2, 323584 + ori $a2, $a2, 1 + movgr2fr.w $fa0, $a2 + fst.s $fa0, $a1, %pc_lo12(f2) fld.s $fa0, $a0, %pc_lo12(f1) fld.s $fa1, $a1, %pc_lo12(f2) fcmp.ceq.s $fcc0, $fa0, $fa1 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr23941.dir/pr23941.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr23941.dir/pr23941.s index 5782501e..dabde52c 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr23941.dir/pr23941.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr23941.dir/pr23941.s @@ -1,18 +1,14 @@ .file "pr23941.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3800000000000000 # double 5.8774717541114375E-39 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main # %bb.0: pcalau12i $a0, %pc_hi20(d) fld.d $fa0, $a0, %pc_lo12(d) - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) + lu52i.d $a0, $zero, 896 + movgr2fr.d $fa1, $a0 fcmp.ceq.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB0_2 # %bb.1: diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr38969.dir/pr38969.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr38969.dir/pr38969.s index d6912a0c..959b6cce 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr38969.dir/pr38969.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr38969.dir/pr38969.s @@ -19,12 +19,7 @@ bar: # @bar .Lfunc_end1: .size bar, .Lfunc_end1-bar # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI2_0: - .word 0x42280000 # float 42 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -32,8 +27,8 @@ main: # @main addi.d $sp, $sp, -16 st.d $ra, $sp, 8 # 8-byte Folded Spill fst.d $fs0, $sp, 0 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.s $fs0, $a0, %pc_lo12(.LCPI2_0) + lu12i.w $a0, 270976 + movgr2fr.w $fs0, $a0 vldi $vr0, -1246 fmov.s $fa1, $fs0 pcaddu18i $ra, %call36(bar) diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr42691.dir/pr42691.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr42691.dir/pr42691.s index c1b69c0b..25331bb8 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr42691.dir/pr42691.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr42691.dir/pr42691.s @@ -41,10 +41,6 @@ add: # @add .LCPI1_0: .dword 0x7ff0000000000000 # double +Inf .dword 0x4037000000000000 # double 23 - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 -.LCPI1_1: - .dword 0x7ff0000000000000 # double +Inf .text .globl main .p2align 5 @@ -55,9 +51,9 @@ main: # @main pcalau12i $a0, %pc_hi20(.LCPI1_0) vld $vr0, $a0, %pc_lo12(.LCPI1_0) vst $vr0, $sp, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_1) ori $a0, $zero, 1 + lu52i.d $a1, $zero, 2047 + movgr2fr.d $fa1, $a1 addi.d $a1, $sp, 0 vldi $vr0, -969 .p2align 4, , 16 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr44942.dir/pr44942.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr44942.dir/pr44942.s index f90022eb..b19fee65 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr44942.dir/pr44942.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr44942.dir/pr44942.s @@ -40,12 +40,7 @@ test2: # @test2 .Lfunc_end1: .size test2, .Lfunc_end1-test2 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function test3 -.LCPI2_0: - .dword 0x4093480000000000 # double 1234 - .text - .globl test3 + .globl test3 # -- Begin function test3 .p2align 5 .type test3,@function test3: # @test3 @@ -60,8 +55,10 @@ test3: # @test3 addi.d $a0, $sp, 24 st.d $a0, $sp, 8 fld.d $fa0, $sp, 16 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI2_0) + ori $a0, $zero, 0 + lu32i.d $a0, 215040 + lu52i.d $a0, $a0, 1033 + movgr2fr.d $fa1, $a0 fcmp.ceq.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB2_2 # %bb.1: @@ -73,12 +70,7 @@ test3: # @test3 .Lfunc_end2: .size test3, .Lfunc_end2-test3 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function test4 -.LCPI3_0: - .dword 0x4093480000000000 # double 1234 - .text - .globl test4 + .globl test4 # -- Begin function test4 .p2align 5 .type test4,@function test4: # @test4 @@ -87,8 +79,10 @@ test4: # @test4 addi.d $a0, $sp, 40 st.d $a0, $sp, 8 fld.d $fa0, $sp, 32 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI3_0) + ori $a0, $zero, 0 + lu32i.d $a0, 215040 + lu52i.d $a0, $a0, 1033 + movgr2fr.d $fa1, $a0 fcmp.ceq.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB3_2 # %bb.1: diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr47538.dir/pr47538.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr47538.dir/pr47538.s index d970eda7..46f80afd 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr47538.dir/pr47538.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr47538.dir/pr47538.s @@ -1,10 +1,6 @@ .file "pr47538.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function foo -.LCPI0_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 .text - .globl foo + .globl foo # -- Begin function foo .p2align 5 .type foo,@function foo: # @foo @@ -40,11 +36,12 @@ foo: # @foo ld.d $a0, $a0, 16 movgr2fr.d $fa1, $zero ori $a7, $zero, 1 - pcalau12i $a4, %pc_hi20(.LCPI0_0) - fld.d $fa3, $a4, %pc_lo12(.LCPI0_0) vldi $vr2, -912 - lu52i.d $a5, $zero, 1107 - lu12i.w $a4, 275200 + lu52i.d $a4, $zero, 1107 + lu12i.w $a5, 256 + lu52i.d $a5, $a5, 1107 + movgr2fr.d $fa3, $a5 + lu12i.w $a5, 275200 .p2align 4, , 16 .LBB0_5: # =>This Inner Loop Header: Depth=1 add.d $t0, $a1, $a6 @@ -54,10 +51,10 @@ foo: # @foo fsub.d $fa4, $fa4, $fa5 fmul.d $fa4, $fa0, $fa4 srli.d $t1, $a7, 32 - or $t1, $t1, $a5 + or $t1, $t1, $a4 movgr2fr.d $fa5, $t1 fsub.d $fa5, $fa5, $fa3 - bstrins.d $a7, $a4, 63, 32 + bstrins.d $a7, $a5, 63, 32 movgr2fr.d $fa6, $a7 fadd.d $fa5, $fa6, $fa5 fdiv.d $fa4, $fa4, $fa5 @@ -74,10 +71,10 @@ foo: # @foo slli.d $a1, $a2, 3 fmul.d $fa0, $fa0, $fa4 srli.d $a2, $a3, 32 - or $a2, $a2, $a5 + or $a2, $a2, $a4 movgr2fr.d $fa4, $a2 fsub.d $fa3, $fa4, $fa3 - bstrins.d $a3, $a4, 63, 32 + bstrins.d $a3, $a5, 63, 32 movgr2fr.d $fa4, $a3 fadd.d $fa3, $fa4, $fa3 vldi $vr4, -784 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr58574.dir/pr58574.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr58574.dir/pr58574.s index 16da9140..5766c425 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr58574.dir/pr58574.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr58574.dir/pr58574.s @@ -1,1000 +1,6 @@ .file "pr58574.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function foo -.LCPI0_0: - .dword 0xc067600000000000 # double -187 -.LCPI0_1: - .dword 0x3dd0e94bd6e965b5 # double 6.1522999999999998E-11 -.LCPI0_2: - .dword 0x3d4e1c5c72814664 # double 2.1395000000000001E-13 -.LCPI0_3: - .dword 0x3e4a7d3ceb3a9a89 # double 1.2334999999999999E-8 -.LCPI0_4: - .dword 0x3ebea679caf3e3fb # double 1.8269E-6 -.LCPI0_5: - .dword 0x3f2a78514a756f18 # double 2.0195E-4 -.LCPI0_6: - .dword 0x3f90a99b6f5caf2d # double 0.016271999999999998 -.LCPI0_7: - .dword 0x3fe7ff6d330941c8 # double 0.74992999999999999 -.LCPI0_8: - .dword 0xc067200000000000 # double -185 -.LCPI0_9: - .dword 0x3dd0371f61e9bda6 # double 5.8991000000000003E-11 -.LCPI0_10: - .dword 0x3d4d471215b73735 # double 2.0803E-13 -.LCPI0_11: - .dword 0x3e4931bc36a06157 # double 1.1732E-8 -.LCPI0_12: - .dword 0x3ebd094cc631711f # double 1.7307E-6 -.LCPI0_13: - .dword 0x3f29124ab0526db6 # double 1.9128E-4 -.LCPI0_14: - .dword 0x3f8fb71fbc5de9c0 # double 0.015486 -.LCPI0_15: - .dword 0x3fe6fb549f94855e # double 0.71818000000000004 -.LCPI0_16: - .dword 0xc066e00000000000 # double -183 -.LCPI0_17: - .dword 0x3dcf13e3e53e4f7e # double 5.6529999999999998E-11 -.LCPI0_18: - .dword 0x3d4c710f4142f5d0 # double 2.0209E-13 -.LCPI0_19: - .dword 0x3e47f486aebf1d72 # double 1.1155E-8 -.LCPI0_20: - .dword 0x3ebb804f75d2f8b2 # double 1.6391999999999999E-6 -.LCPI0_21: - .dword 0x3f27bf0e733556cf # double 1.8117E-4 -.LCPI0_22: - .dword 0x3f8e308787485e3e # double 0.014741000000000001 -.LCPI0_23: - .dword 0x3fe603afb7e90ff9 # double 0.68794999999999995 -.LCPI0_24: - .dword 0xc066a00000000000 # double -181 -.LCPI0_25: - .dword 0x3dcdc38712134803 # double 5.414E-11 -.LCPI0_26: - .dword 0x3d4b9b68a8a3cd86 # double 1.9615999999999999E-13 -.LCPI0_27: - .dword 0x3e46c3f61d32b28e # double 1.0601E-8 -.LCPI0_28: - .dword 0x3eba0a37ff5a4498 # double 1.5520999999999999E-6 -.LCPI0_29: - .dword 0x3f267df0c6a718de # double 1.716E-4 -.LCPI0_30: - .dword 0x3f8cbee807bbb624 # double 0.014036 -.LCPI0_31: - .dword 0x3fe51800a7c5ac47 # double 0.65917999999999999 -.LCPI0_32: - .dword 0xc066600000000000 # double -179 -.LCPI0_33: - .dword 0x3dcc7d4c51b1a2a8 # double 5.1822E-11 -.LCPI0_34: - .dword 0x3d4ac67a87aed773 # double 1.9025E-13 -.LCPI0_35: - .dword 0x3e45a123fb933389 # double 1.0072000000000001E-8 -.LCPI0_36: - .dword 0x3eb8a7745646bc30 # double 1.4695000000000001E-6 -.LCPI0_37: - .dword 0x3f254deff7f5199d # double 1.6254E-4 -.LCPI0_38: - .dword 0x3f8b60ae9680e065 # double 0.013368 -.LCPI0_39: - .dword 0x3fe4378ab0c88a48 # double 0.63178000000000001 -.LCPI0_40: - .dword 0xc066200000000000 # double -177 -.LCPI0_41: - .dword 0x3dcb40eb955ae3d0 # double 4.9573999999999999E-11 -.LCPI0_42: - .dword 0x3d49f1e8a28efa7b # double 1.8434999999999999E-13 -.LCPI0_43: - .dword 0x3e448a78265db839 # double 9.5651000000000004E-9 -.LCPI0_44: - .dword 0x3eb755deb91b5a9e # double 1.3909000000000001E-6 -.LCPI0_45: - .dword 0x3f242e0a546cbec5 # double 1.5396E-4 -.LCPI0_46: - .dword 0x3f8a14cec41dd1a2 # double 0.012735 -.LCPI0_47: - .dword 0x3fe361cffeb074a7 # double 0.60568999999999995 -.LCPI0_48: - .dword 0xc065e00000000000 # double -175 -.LCPI0_49: - .dword 0x3dca0e88e46e494a # double 4.7396999999999999E-11 -.LCPI0_50: - .dword 0x3d491f23e8989b0c # double 1.785E-13 -.LCPI0_51: - .dword 0x3e437ff29d92409f # double 9.0803000000000006E-9 -.LCPI0_52: - .dword 0x3eb615e51b578741 # double 1.3164E-6 -.LCPI0_53: - .dword 0x3f231d940f96f6d2 # double 1.4584E-4 -.LCPI0_54: - .dword 0x3f88da3c21187e7c # double 0.012135 -.LCPI0_55: - .dword 0x3fe29613d31b9b67 # double 0.58082 -.LCPI0_56: - .dword 0xc065a00000000000 # double -173 -.LCPI0_57: - .dword 0x3dc8e600378c9547 # double 4.5289999999999999E-11 -.LCPI0_58: - .dword 0x3d484d73e22186ef # double 1.7268E-13 -.LCPI0_59: - .dword 0x3e428130dd085fb9 # double 8.6168999999999999E-9 -.LCPI0_60: - .dword 0x3eb4e5cfaefda49e # double 1.2456E-6 -.LCPI0_61: - .dword 0x3f221b8b76c1277d # double 1.3815E-4 -.LCPI0_62: - .dword 0x3f87b0f6ad70e6f3 # double 0.011568 -.LCPI0_63: - .dword 0x3fe1d3ed527e5215 # double 0.55711999999999995 -.LCPI0_64: - .dword 0xc065600000000000 # double -171 -.LCPI0_65: - .dword 0x3dc7c72d875689f8 # double 4.3251999999999997E-11 -.LCPI0_66: - .dword 0x3d477ded42a90976 # double 1.6692000000000001E-13 -.LCPI0_67: - .dword 0x3e418dde7378dcac # double 8.1743000000000006E-9 -.LCPI0_68: - .dword 0x3eb3c530808e4b56 # double 1.1784E-6 -.LCPI0_69: - .dword 0x3f21279aa3afc804 # double 1.3087999999999999E-4 -.LCPI0_70: - .dword 0x3f8696e58a32f449 # double 0.01103 -.LCPI0_71: - .dword 0x3fe11adea897635e # double 0.53452999999999995 -.LCPI0_72: - .dword 0xc065200000000000 # double -169 -.LCPI0_73: - .dword 0x3dc6b210d3cc275e # double 4.1282999999999998E-11 -.LCPI0_74: - .dword 0x3d46b0900a2f22a0 # double 1.6121999999999999E-13 -.LCPI0_75: - .dword 0x3e40a58ac9da1650 # double 7.7516999999999992E-9 -.LCPI0_76: - .dword 0x3eb2b3999c8a140a # double 1.1147000000000001E-6 -.LCPI0_77: - .dword 0x3f2040bfe3b03e21 # double 1.2400000000000001E-4 -.LCPI0_78: - .dword 0x3f858b827fa1a0cf # double 0.01052 -.LCPI0_79: - .dword 0x3fe06a550870110a # double 0.51297999999999999 -.LCPI0_80: - .dword 0xc064e00000000000 # double -167 -.LCPI0_81: - .dword 0x3dc5a6aa1ced6d78 # double 3.9382999999999997E-11 -.LCPI0_82: - .dword 0x3d45e5b87488eb8a # double 1.5559E-13 -.LCPI0_83: - .dword 0x3e3f8fa6b8073f4d # double 7.3484E-9 -.LCPI0_84: - .dword 0x3eb1b09d0f71975a # double 1.0544E-6 -.LCPI0_85: - .dword 0x3f1ecd4aa10e0221 # double 1.175E-4 -.LCPI0_86: - .dword 0x3f848e4755ffe6d6 # double 0.010037000000000001 -.LCPI0_87: - .dword 0x3fdf83f91e646f15 # double 0.49242999999999998 -.LCPI0_88: - .dword 0xc064a00000000000 # double -165 -.LCPI0_89: - .dword 0x3dc4a48d4c9ca2db # double 3.7549E-11 -.LCPI0_90: - .dword 0x3d451d6681b66433 # double 1.5002999999999999E-13 -.LCPI0_91: - .dword 0x3e3de8c7715c7fa3 # double 6.9638000000000003E-9 -.LCPI0_92: - .dword 0x3eb0bac503c6dc37 # double 9.9715999999999991E-7 -.LCPI0_93: - .dword 0x3f1d30926f02ed1a # double 1.1135E-4 -.LCPI0_94: - .dword 0x3f839ea06997734f # double 0.0095799000000000005 -.LCPI0_95: - .dword 0x3fde42aed1394318 # double 0.47282000000000002 -.LCPI0_96: - .dword 0xc064600000000000 # double -163 -.LCPI0_97: - .dword 0x3dc3abde6a390555 # double 3.5782000000000002E-11 -.LCPI0_98: - .dword 0x3d4457f66d8ca5b7 # double 1.4455E-13 -.LCPI0_99: - .dword 0x3e3c55b2b76313a0 # double 6.5972000000000003E-9 -.LCPI0_100: - .dword 0x3eafa3b4ff945de5 # double 9.4292999999999999E-7 -.LCPI0_101: - .dword 0x3f1ba9ff98511a24 # double 1.0553E-4 -.LCPI0_102: - .dword 0x3f82bb4b9b090562 # double 0.0091462999999999996 -.LCPI0_103: - .dword 0x3fdd0fcf80dc3372 # double 0.45408999999999999 -.LCPI0_104: - .dword 0xc064200000000000 # double -161 -.LCPI0_105: - .dword 0x3dc2bc315fa4db79 # double 3.4078999999999998E-11 -.LCPI0_106: - .dword 0x3d439620afb5e24c # double 1.3917E-13 -.LCPI0_107: - .dword 0x3e3ad5bfa78c898b # double 6.2479999999999999E-9 -.LCPI0_108: - .dword 0x3eadea712c78e8fa # double 8.9156000000000002E-7 -.LCPI0_109: - .dword 0x3f1a383a840a6635 # double 1.0001999999999999E-4 -.LCPI0_110: - .dword 0x3f81e3c2b2979761 # double 0.0087352000000000002 -.LCPI0_111: - .dword 0x3fdbeadd590c0ad0 # double 0.43620999999999999 -.LCPI0_112: - .dword 0xc063e00000000000 # double -159 -.LCPI0_113: - .dword 0x3dc1d5aa343f6318 # double 3.2441000000000002E-11 -.LCPI0_114: - .dword 0x3d42d72cd087e7bb # double 1.3386999999999999E-13 -.LCPI0_115: - .dword 0x3e39680d13c59f19 # double 5.9153999999999997E-9 -.LCPI0_116: - .dword 0x3eac488ab13d0509 # double 8.4290999999999997E-7 -.LCPI0_117: - .dword 0x3f18dbbb74822a5f # double 9.4827000000000005E-5 -.LCPI0_118: - .dword 0x3f81177f7886239b # double 0.0083455999999999998 -.LCPI0_119: - .dword 0x3fdad330941c8217 # double 0.41914000000000001 -.LCPI0_120: - .dword 0xc063a00000000000 # double -157 -.LCPI0_121: - .dword 0x3dc0f800d94a2092 # double 3.0865999999999997E-11 -.LCPI0_122: - .dword 0x3d421c2f83820157 # double 1.2867999999999999E-13 -.LCPI0_123: - .dword 0x3e380c0e3f424adb # double 5.5988999999999998E-9 -.LCPI0_124: - .dword 0x3eaabd0fa96201dc # double 7.9686999999999996E-7 -.LCPI0_125: - .dword 0x3f1791b0dbc45040 # double 8.9908999999999996E-5 -.LCPI0_126: - .dword 0x3f8055d3712bbc46 # double 0.0079761999999999993 -.LCPI0_127: - .dword 0x3fd9c7cd898b2e9d # double 0.40282000000000001 -.LCPI0_128: - .dword 0xc063600000000000 # double -155 -.LCPI0_129: - .dword 0x3dc022ed4006984c # double 2.9351999999999999E-11 -.LCPI0_130: - .dword 0x3d416528c8a42f20 # double 1.236E-13 -.LCPI0_131: - .dword 0x3e36c11a47741b18 # double 5.2979000000000001E-9 -.LCPI0_132: - .dword 0x3ea946b63a69a956 # double 7.5328999999999999E-7 -.LCPI0_133: - .dword 0x3f1659a2777d7ecb # double 8.5259000000000005E-5 -.LCPI0_134: - .dword 0x3f7f3c70c996b767 # double 0.007626 -.LCPI0_135: - .dword 0x3fd8c8366516db0e # double 0.38722000000000001 -.LCPI0_136: - .dword 0xc063200000000000 # double -153 -.LCPI0_137: - .dword 0x3dbeacded0e9948a # double 2.7899E-11 -.LCPI0_138: - .dword 0x3d40b1bc641957fa # double 1.1862E-13 -.LCPI0_139: - .dword 0x3e35866c240a35cd # double 5.0117E-9 -.LCPI0_140: - .dword 0x3ea7e48c7fd54b3f # double 7.1205999999999998E-7 -.LCPI0_141: - .dword 0x3f1532b0f112ec05 # double 8.0864000000000004E-5 -.LCPI0_142: - .dword 0x3f7de01a876ac2ec # double 0.0072937999999999996 -.LCPI0_143: - .dword 0x3fd7d3c36113404f # double 0.37230000000000002 -.LCPI0_144: - .dword 0xc062e00000000000 # double -151 -.LCPI0_145: - .dword 0x3dbd247e87ac75bf # double 2.6505E-11 -.LCPI0_146: - .dword 0x3d4002a2cd8bae1c # double 1.1376000000000001E-13 -.LCPI0_147: - .dword 0x3e345b5af2762942 # double 4.7397000000000001E-9 -.LCPI0_148: - .dword 0x3ea6958a97a655e7 # double 6.7306000000000002E-7 -.LCPI0_149: - .dword 0x3f141bebc3dde5cf # double 7.6710000000000002E-5 -.LCPI0_150: - .dword 0x3f7c95b2844c2a7b # double 0.0069787 -.LCPI0_151: - .dword 0x3fd6e9f6a93f290b # double 0.35803000000000001 -.LCPI0_152: - .dword 0xc062a00000000000 # double -149 -.LCPI0_153: - .dword 0x3dbbac2986d8dcfd # double 2.5168000000000001E-11 -.LCPI0_154: - .dword 0x3d3eaeff924c30d3 # double 1.0900999999999999E-13 -.LCPI0_155: - .dword 0x3e333f59f5ebec07 # double 4.4813999999999998E-9 -.LCPI0_156: - .dword 0x3ea558d49addfa8f # double 6.3618999999999998E-7 -.LCPI0_157: - .dword 0x3f1314626b37ba09 # double 7.2782999999999998E-5 -.LCPI0_158: - .dword 0x3f7b5c4728b37d70 # double 0.0066797999999999996 -.LCPI0_159: - .dword 0x3fd60a5269595fee # double 0.34438000000000002 -.LCPI0_160: - .dword 0xc062600000000000 # double -147 -.LCPI0_161: - .dword 0x3dba43dfce6eca43 # double 2.3888E-11 -.LCPI0_162: - .dword 0x3d3d62179d259236 # double 1.0439E-13 -.LCPI0_163: - .dword 0x3e3231c04bdd0c64 # double 4.2361999999999999E-9 -.LCPI0_164: - .dword 0x3ea42d62a77da788 # double 6.0133000000000005E-7 -.LCPI0_165: - .dword 0x3f121b57ec9d6f09 # double 6.9072000000000005E-5 -.LCPI0_166: - .dword 0x3f7a32e6dd194b2b # double 0.0063962000000000003 -.LCPI0_167: - .dword 0x3fd53404ea4a8c15 # double 0.33129999999999998 -.LCPI0_168: - .dword 0xc062200000000000 # double -145 -.LCPI0_169: - .dword 0x3db8eac93232caba # double 2.2662E-11 -.LCPI0_170: - .dword 0x3d3c1de7b6571ffb # double 9.9891000000000001E-14 -.LCPI0_171: - .dword 0x3e3131e511bb18a0 # double 4.0035000000000001E-9 -.LCPI0_172: - .dword 0x3ea31242d906ac99 # double 5.6837000000000005E-7 -.LCPI0_173: - .dword 0x3f112fecf1743ad4 # double 6.5563999999999998E-5 -.LCPI0_174: - .dword 0x3f7918a009f62307 # double 0.0061269999999999996 -.LCPI0_175: - .dword 0x3fd466e43aa79bbb # double 0.31878000000000001 -.LCPI0_176: - .dword 0xc061e00000000000 # double -143 -.LCPI0_177: - .dword 0x3db7a0e5b224de62 # double 2.1490000000000002E-11 -.LCPI0_178: - .dword 0x3d3ae26fdde0da22 # double 9.5513000000000004E-14 -.LCPI0_179: - .dword 0x3e303f1f64f79f02 # double 3.7827000000000002E-9 -.LCPI0_180: - .dword 0x3ea206db40f9df70 # double 5.3723999999999996E-7 -.LCPI0_181: - .dword 0x3f1051647f3923c1 # double 6.2248000000000004E-5 -.LCPI0_182: - .dword 0x3f780c9befb52f21 # double 0.0058713999999999997 -.LCPI0_183: - .dword 0x3fd3a272862f598a # double 0.30679000000000001 -.LCPI0_184: - .dword 0xc061a00000000000 # double -141 -.LCPI0_185: - .dword 0x3db6655d22099262 # double 2.0369000000000001E-11 -.LCPI0_186: - .dword 0x3d39b01ec1f5ab98 # double 9.1262E-14 -.LCPI0_187: - .dword 0x3e2eb235a896cd5b # double 3.5735000000000001E-9 -.LCPI0_188: - .dword 0x3ea10a23fd58ae5e # double 5.0781999999999995E-7 -.LCPI0_189: - .dword 0x3f0efe0336d26046 # double 5.9113000000000001E-5 -.LCPI0_190: - .dword 0x3f770e397ea6cf0c # double 0.0056287999999999998 -.LCPI0_191: - .dword 0x3fd2e60807357e67 # double 0.29529 -.LCPI0_192: - .dword 0xc061600000000000 # double -139 -.LCPI0_193: - .dword 0x3db5382f81e0e6ba # double 1.9298999999999998E-11 -.LCPI0_194: - .dword 0x3d388706d4f36630 # double 8.7138999999999998E-14 -.LCPI0_195: - .dword 0x3e2cfe24aecb2b41 # double 3.3752000000000001E-9 -.LCPI0_196: - .dword 0x3ea01b6d22240d98 # double 4.8003000000000004E-7 -.LCPI0_197: - .dword 0x3f0d70534f326d3b # double 5.6150000000000003E-5 -.LCPI0_198: - .dword 0x3f761c871f439226 # double 0.0053983 -.LCPI0_199: - .dword 0x3fd23150dae3e6c5 # double 0.28426000000000001 -.LCPI0_200: - .dword 0xc061200000000000 # double -137 -.LCPI0_201: - .dword 0x3db41884a56f6894 # double 1.8277000000000001E-11 -.LCPI0_202: - .dword 0x3d37672816da09ea # double 8.3143999999999998E-14 -.LCPI0_203: - .dword 0x3e2b612aae79156a # double 3.1874E-9 -.LCPI0_204: - .dword 0x3e9e740d86b9e2a1 # double 4.5378999999999999E-7 -.LCPI0_205: - .dword 0x3f0bf8840abc1ba5 # double 5.3350000000000003E-5 -.LCPI0_206: - .dword 0x3f7536e3c1dbd803 # double 0.0051793000000000004 -.LCPI0_207: - .dword 0x3fd184230fcf80dc # double 0.27368999999999999 -.LCPI0_208: - .dword 0xc060e00000000000 # double -135 -.LCPI0_209: - .dword 0x3db3065c8cb517ee # double 1.7303000000000001E-11 -.LCPI0_210: - .dword 0x3d365094fa076898 # double 7.9277999999999998E-14 -.LCPI0_211: - .dword 0x3e29d9f5e283a865 # double 3.0095E-9 -.LCPI0_212: - .dword 0x3e9cca55ef08d88a # double 4.2901E-7 -.LCPI0_213: - .dword 0x3f0a951b7469782d # double 5.0701999999999997E-5 -.LCPI0_214: - .dword 0x3f745cc92eb29af2 # double 0.0049712999999999997 -.LCPI0_215: - .dword 0x3fd0ddd6e04c0592 # double 0.26354 -.LCPI0_216: - .dword 0xc060a00000000000 # double -133 -.LCPI0_217: - .dword 0x3db200df0b7681f0 # double 1.6373999999999999E-11 -.LCPI0_218: - .dword 0x3d35434d7e7b823a # double 7.5540999999999996E-14 -.LCPI0_219: - .dword 0x3e2867a51cd7a1e6 # double 2.8411E-9 -.LCPI0_220: - .dword 0x3e9b3853a536e553 # double 4.0560999999999999E-7 -.LCPI0_221: - .dword 0x3f0945290793d0b5 # double 4.8198999999999997E-5 -.LCPI0_222: - .dword 0x3f738d60a6330510 # double 0.0047735 -.LCPI0_223: - .dword 0x3fd03e1869835159 # double 0.25379000000000002 -.LCPI0_224: - .dword 0xc060600000000000 # double -131 -.LCPI0_225: - .dword 0x3db107c412f52afe # double 1.5489000000000001E-11 -.LCPI0_226: - .dword 0x3d343f51a43656d1 # double 7.1933000000000005E-14 -.LCPI0_227: - .dword 0x3e27098f7ae69034 # double 2.6819E-9 -.LCPI0_228: - .dword 0x3e99bcd2cc45b459 # double 3.8351999999999998E-7 -.LCPI0_229: - .dword 0x3f0807778764d281 # double 4.5831999999999998E-5 -.LCPI0_230: - .dword 0x3f72c83ec892ab69 # double 0.0045855000000000002 -.LCPI0_231: - .dword 0x3fcf49cf56eac860 # double 0.24443999999999999 -.LCPI0_232: - .dword 0xc060200000000000 # double -129 -.LCPI0_233: - .dword 0x3db01ac394729779 # double 1.4646999999999999E-11 -.LCPI0_234: - .dword 0x3d33448ef8da1489 # double 6.8453000000000004E-14 -.LCPI0_235: - .dword 0x3e25be2aec0ebf4b # double 2.5312000000000001E-9 -.LCPI0_236: - .dword 0x3e9856cb8236b3ec # double 3.6268000000000002E-7 -.LCPI0_237: - .dword 0x3f06db166f35cb72 # double 4.3594000000000003E-5 -.LCPI0_238: - .dword 0x3f720cc28621ed91 # double 0.0044067000000000004 -.LCPI0_239: - .dword 0x3fce2339c0ebedfa # double 0.23544999999999999 -.LCPI0_240: - .dword 0xc05fc00000000000 # double -127 -.LCPI0_241: - .dword 0x3dae729ae4e3a050 # double 1.3846000000000001E-11 -.LCPI0_242: - .dword 0x3d3252f30a08e990 # double 6.5099999999999995E-14 -.LCPI0_243: - .dword 0x3e248506d9468e04 # double 2.3887999999999999E-9 -.LCPI0_244: - .dword 0x3e9704b1f40c0981 # double 3.4299999999999999E-7 -.LCPI0_245: - .dword 0x3f05bef2de483919 # double 4.1477000000000003E-5 -.LCPI0_246: - .dword 0x3f715a65a723c5d8 # double 0.0042366000000000001 -.LCPI0_247: - .dword 0x3fcd07c84b5dcc64 # double 0.2268 -.LCPI0_248: - .dword 0xc05f400000000000 # double -125 -.LCPI0_249: - .dword 0x3dacc5a31eebbb9e # double 1.3084E-11 -.LCPI0_250: - .dword 0x3d316a6b65650415 # double 6.1872999999999995E-14 -.LCPI0_251: - .dword 0x3e235d09c8f5e982 # double 2.2541999999999999E-9 -.LCPI0_252: - .dword 0x3e95c5aa3ac6e65c # double 3.2443E-7 -.LCPI0_253: - .dword 0x3f04b261082509f2 # double 3.9475999999999999E-5 -.LCPI0_254: - .dword 0x3f70b0a1f3db2e8f # double 0.0040746999999999997 -.LCPI0_255: - .dword 0x3fcbf77af640639d # double 0.21848999999999999 -.LCPI0_256: - .dword 0xc05ec00000000000 # double -123 -.LCPI0_257: - .dword 0x3dab2e9fd6fd80dd # double 1.2361E-11 -.LCPI0_258: - .dword 0x3d308ad32632c073 # double 5.8769999999999998E-14 -.LCPI0_259: - .dword 0x3e2245528d098f79 # double 2.1270000000000002E-9 -.LCPI0_260: - .dword 0x3e9498ac7468b8cb # double 3.0690999999999999E-7 -.LCPI0_261: - .dword 0x3f03b42baff5eb43 # double 3.7582E-5 -.LCPI0_262: - .dword 0x3f700f0c0c7dbcc4 # double 0.0039205999999999998 -.LCPI0_263: - .dword 0x3fcaf1a9fbe76c8b # double 0.21049999999999999 -.LCPI0_264: - .dword 0xc05e400000000000 # double -121 -.LCPI0_265: - .dword 0x3da9ab5097251322 # double 1.1673E-11 -.LCPI0_266: - .dword 0x3d2f682fb42899af # double 5.5790000000000001E-14 -.LCPI0_267: - .dword 0x3e213cfff76e3d9c # double 2.0068000000000001E-9 -.LCPI0_268: - .dword 0x3e937cb0bef2ef1e # double 2.9037999999999998E-7 -.LCPI0_269: - .dword 0x3f02c3c9655b9bd4 # double 3.5790999999999997E-5 -.LCPI0_270: - .dword 0x3f6eea7122820b08 # double 0.0037739000000000002 -.LCPI0_271: - .dword 0x3fc9f5ad96a6a012 # double 0.20280999999999999 -.LCPI0_272: - .dword 0xc05dc00000000000 # double -119 -.LCPI0_273: - .dword 0x3da83c457cdf69a8 # double 1.1021E-11 -.LCPI0_274: - .dword 0x3d2dcc29389c0b3b # double 5.2930999999999998E-14 -.LCPI0_275: - .dword 0x3e2043a1711a52c6 # double 1.8934000000000001E-9 -.LCPI0_276: - .dword 0x3e9270db3366ba97 # double 2.7478999999999999E-7 -.LCPI0_277: - .dword 0x3f01e049a3af6987 # double 3.4096E-5 -.LCPI0_278: - .dword 0x3f6dc57844b53bb7 # double 0.0036342000000000002 -.LCPI0_279: - .dword 0x3fc902de00d1b717 # double 0.19539999999999999 -.LCPI0_280: - .dword 0xc058c00000000000 # double -99 -.LCPI0_281: - .dword 0x3d9af109a3630d2e # double 6.1257999999999996E-12 -.LCPI0_282: - .dword 0x3d211ed4c2f43d7e # double 3.0412E-14 -.LCPI0_283: - .dword 0x3e122f550d281614 # double 1.0585E-9 -.LCPI0_284: - .dword 0x3e85782f0a3274a4 # double 1.5996000000000001E-7 -.LCPI0_285: - .dword 0x3ef66c7e028f516c # double 2.1384999999999999E-5 -.LCPI0_286: - .dword 0x3f64de48f6131734 # double 0.0025474 -.LCPI0_287: - .dword 0x3fc1350092ccf6be # double 0.13442999999999999 -.LCPI0_288: - .dword 0xc053c00000000000 # double -79 -.LCPI0_289: - .dword 0x3d8d9aa84ed5f7f8 # double 3.3656000000000001E-12 -.LCPI0_290: - .dword 0x3d12ee9801a347ab # double 1.6814999999999999E-14 -.LCPI0_291: - .dword 0x3e0487d76cb7622a # double 5.9752000000000002E-10 -.LCPI0_292: - .dword 0x3e79a613c8cbadfc # double 9.5549000000000004E-8 -.LCPI0_293: - .dword 0x3eed281dc526a9fd # double 1.3903000000000001E-5 -.LCPI0_294: - .dword 0x3f5e61ead6a30f64 # double 0.0018544 -.LCPI0_295: - .dword 0x3fb745bf26f1dc51 # double 0.090908000000000003 -.LCPI0_296: - .dword 0xc04d800000000000 # double -59 -.LCPI0_297: - .dword 0x3d805189fcd8287b # double 1.8552E-12 -.LCPI0_298: - .dword 0x3d04870426dcdb0e # double 9.1160000000000006E-15 -.LCPI0_299: - .dword 0x3df7a62cc6986c28 # double 3.4414000000000001E-10 -.LCPI0_300: - .dword 0x3e6f9cae3284854e # double 5.8881999999999997E-8 -.LCPI0_301: - .dword 0x3ee3a73b6897e136 # double 9.3713999999999997E-6 -.LCPI0_302: - .dword 0x3f56e01655acdabf # double 0.0013962 -.LCPI0_303: - .dword 0x3fae0e30446b69db # double 0.058701999999999997 -.LCPI0_304: - .dword 0xc049800000000000 # double -51 -.LCPI0_305: - .dword 0x3d79d095040f681c # double 1.4674E-12 -.LCPI0_306: - .dword 0x3d000b39a7a160d0 # double 7.1249000000000003E-15 -.LCPI0_307: - .dword 0x3df31acdbb7ee971 # double 2.7801E-10 -.LCPI0_308: - .dword 0x3e6a4a3f844e2f75 # double 4.8969E-8 -.LCPI0_309: - .dword 0x3ee0f2ab2899438c # double 8.0814000000000002E-6 -.LCPI0_310: - .dword 0x3f5497d2193ce7e8 # double 0.0012569 -.LCPI0_311: - .dword 0x3fa8a0f4d7add15f # double 0.048103 -.LCPI0_312: - .dword 0xc048800000000000 # double -49 -.LCPI0_313: - .dword 0x3d785b3bd2b88744 # double 1.3845E-12 -.LCPI0_314: - .dword 0x3cfe2c1ce7d17156 # double 6.6996000000000002E-15 -.LCPI0_315: - .dword 0x3df21ff066d70de7 # double 2.6374999999999999E-10 -.LCPI0_316: - .dword 0x3e69208e2ab83a80 # double 4.6802999999999999E-8 -.LCPI0_317: - .dword 0x3ee0586cf27f6074 # double 7.7941000000000005E-6 -.LCPI0_318: - .dword 0x3f5412711bcc0e61 # double 0.0012251 -.LCPI0_319: - .dword 0x3fa75ba2be0589ad # double 0.045621000000000002 -.LCPI0_320: - .dword 0xc047800000000000 # double -47 -.LCPI0_321: - .dword 0x3d76fbf3f21de835 # double 1.3064999999999999E-12 -.LCPI0_322: - .dword 0x3cfc5f67cd792795 # double 6.2999999999999998E-15 -.LCPI0_323: - .dword 0x3df13352fc9a645b # double 2.503E-10 -.LCPI0_324: - .dword 0x3e6805fb190d4900 # double 4.4746999999999997E-8 -.LCPI0_325: - .dword 0x3edf8a006bd80cbe # double 7.5194999999999997E-6 -.LCPI0_326: - .dword 0x3f5392189bd8383b # double 0.0011945 -.LCPI0_327: - .dword 0x3fa61e71044f1a1a # double 0.043201000000000003 -.LCPI0_328: - .dword 0xc046800000000000 # double -45 -.LCPI0_329: - .dword 0x3d75b1d6ccaacc2c # double 1.2332E-12 -.LCPI0_330: - .dword 0x3cfaae99476e38a8 # double 5.9246000000000001E-15 -.LCPI0_331: - .dword 0x3df054144eb5aa81 # double 2.3761000000000002E-10 -.LCPI0_332: - .dword 0x3e66f9d6634e4f2b # double 4.2796000000000002E-8 -.LCPI0_333: - .dword 0x3ede70097b9f75b6 # double 7.2568999999999999E-6 -.LCPI0_334: - .dword 0x3f53165d3996fa83 # double 0.001165 -.LCPI0_335: - .dword 0x3fa4e93e1c9b413a # double 0.040842000000000003 -.LCPI0_336: - .dword 0xc045800000000000 # double -43 -.LCPI0_337: - .dword 0x3d747b173735b59f # double 1.1642E-12 -.LCPI0_338: - .dword 0x3cf91831a4779845 # double 5.5721E-15 -.LCPI0_339: - .dword 0x3def02a65e2b3c19 # double 2.2563E-10 -.LCPI0_340: - .dword 0x3e65fb29bf163c7c # double 4.0942999999999998E-8 -.LCPI0_341: - .dword 0x3edd626ba3f5ba98 # double 7.0057999999999999E-6 -.LCPI0_342: - .dword 0x3f529e6835737f54 # double 0.0011364000000000001 -.LCPI0_343: - .dword 0x3fa3bb83cf2cf95d # double 0.038539999999999998 -.LCPI0_344: - .dword 0xc044800000000000 # double -41 -.LCPI0_345: - .dword 0x3d735741e6f4452c # double 1.0993999999999999E-12 -.LCPI0_346: - .dword 0x3cf79a58a8004aff # double 5.2408999999999996E-15 -.LCPI0_347: - .dword 0x3ded745cdf4df966 # double 2.1431000000000001E-10 -.LCPI0_348: - .dword 0x3e6509686f990786 # double 3.9184000000000002E-8 -.LCPI0_349: - .dword 0x3edc604afddc0ca6 # double 6.7653999999999996E-6 -.LCPI0_350: - .dword 0x3f522b104f029c92 # double 0.0011088999999999999 -.LCPI0_351: - .dword 0x3fa295421c044285 # double 0.036295000000000001 -.LCPI0_352: - .dword 0xc043800000000000 # double -39 -.LCPI0_353: - .dword 0x3d724489b0bcfd4c # double 1.0384000000000001E-12 -.LCPI0_354: - .dword 0x3cf633e72c2b33b3 # double 4.93E-15 -.LCPI0_355: - .dword 0x3debfc3de9893d59 # double 2.0361999999999999E-10 -.LCPI0_356: - .dword 0x3e64239c2a719fc4 # double 3.7511999999999999E-8 -.LCPI0_357: - .dword 0x3edb695512b2de5a # double 6.5354000000000001E-6 -.LCPI0_358: - .dword 0x3f51bb7ec6af7c5a # double 0.0010823 -.LCPI0_359: - .dword 0x3fa176145953586d # double 0.034104000000000002 -.LCPI0_360: - .dword 0xc042800000000000 # double -37 -.LCPI0_361: - .dword 0x3d71421f0df0657f # double 9.8101999999999999E-13 -.LCPI0_362: - .dword 0x3cf4e35d7fbf4617 # double 4.6381000000000002E-15 -.LCPI0_363: - .dword 0x3dea993b4592b866 # double 1.9353E-10 -.LCPI0_364: - .dword 0x3e63495b6206fe24 # double 3.5923999999999998E-8 -.LCPI0_365: - .dword 0x3eda7cc9785b3acc # double 6.3150999999999999E-6 -.LCPI0_366: - .dword 0x3f514fb39c7a1eaa # double 0.0010566 -.LCPI0_367: - .dword 0x3fa05db76b3bb83d # double 0.031965 -.LCPI0_368: - .dword 0xc041800000000000 # double -35 -.LCPI0_369: - .dword 0x3d704ef8d289d598 # double 9.2702999999999994E-13 -.LCPI0_370: - .dword 0x3cf3a73bf18375e2 # double 4.3638999999999998E-15 -.LCPI0_371: - .dword 0x3de949929743e5f4 # double 1.8399E-10 -.LCPI0_372: - .dword 0x3e6279d2fb27147f # double 3.4414E-8 -.LCPI0_373: - .dword 0x3ed99a3a3b55ba9e # double 6.1040999999999997E-6 -.LCPI0_374: - .dword 0x3f50e7aed0628383 # double 0.0010318 -.LCPI0_375: - .dword 0x3f9e9813879c4114 # double 0.029877000000000001 -.LCPI0_376: - .dword 0xc040800000000000 # double -33 -.LCPI0_377: - .dword 0x3d6ed449c2f3d75f # double 8.7621999999999998E-13 -.LCPI0_378: - .dword 0x3cf27e96632d455f # double 4.1066000000000001E-15 -.LCPI0_379: - .dword 0x3de80c8fb9c090fa # double 1.7498E-10 -.LCPI0_380: - .dword 0x3e61b4996838dbc1 # double 3.2979E-8 -.LCPI0_381: - .dword 0x3ed8c1396822f672 # double 5.9020000000000004E-6 -.LCPI0_382: - .dword 0x3f508305029e3ff2 # double 0.0010078000000000001 -.LCPI0_383: - .dword 0x3f9c814d72799a20 # double 0.027837000000000001 -.LCPI0_384: - .dword 0x3d6d2590594d1848 # double 8.2839999999999998E-13 -.LCPI0_385: - .dword 0x3cf167ed2383a844 # double 3.8649000000000001E-15 -.LCPI0_386: - .dword 0x3de6e0ca63504f66 # double 1.6646000000000001E-10 -.LCPI0_387: - .dword 0x3e60f8db8e0a45c3 # double 3.1613E-8 -.LCPI0_388: - .dword 0x3ed7f1221183d337 # double 5.7081999999999998E-6 -.LCPI0_389: - .dword 0x3f5021ab7665e2de # double 9.8459000000000003E-4 -.LCPI0_390: - .dword 0x3f9a771c970f7b9e # double 0.025845 -.LCPI0_391: - .dword 0x3d6b903d69d5c337 # double 7.8339999999999997E-13 -.LCPI0_392: - .dword 0x3cf0627198057091 # double 3.6380999999999997E-15 -.LCPI0_393: - .dword 0x3de5c5345ca8d1a8 # double 1.5839999999999999E-10 -.LCPI0_394: - .dword 0x3e6046530e354dce # double 3.0314000000000003E-8 -.LCPI0_395: - .dword 0x3ed729bd3db89d40 # double 5.5225000000000001E-6 -.LCPI0_396: - .dword 0x3f4f86ee71374fcd # double 9.6212999999999995E-4 -.LCPI0_397: - .dword 0x3f9878b7a1c25d07 # double 0.023897999999999999 -.LCPI0_398: - .dword 0x3d6a129ad859a0eb # double 7.4103000000000002E-13 -.LCPI0_399: - .dword 0x3ceed9be2e1862d9 # double 3.4251E-15 -.LCPI0_400: - .dword 0x3de4b91980ede2b9 # double 1.5078E-10 -.LCPI0_401: - .dword 0x3e5f38e657dbd4e3 # double 2.9078000000000001E-8 -.LCPI0_402: - .dword 0x3ed66a65ff82397d # double 5.3442999999999998E-6 -.LCPI0_403: - .dword 0x3f4ed0a59f6159b7 # double 9.4039999999999998E-4 -.LCPI0_404: - .dword 0x3f96861e92923e5c # double 0.021996000000000002 -.LCPI0_405: - .dword 0x3d68ab4ec479933c # double 7.0113999999999998E-13 -.LCPI0_406: - .dword 0x3ced0cce0c2d79ab # double 3.2252E-15 -.LCPI0_407: - .dword 0x3de3bb6b98d5330a # double 1.4357E-10 -.LCPI0_408: - .dword 0x3e5df517f66a1fc6 # double 2.7899999999999998E-8 -.LCPI0_409: - .dword 0x3ed5b2e55d20f440 # double 5.1734000000000003E-6 -.LCPI0_410: - .dword 0x3f4e2026910e5ab7 # double 9.1936000000000003E-4 -.LCPI0_411: - .dword 0x3f949e8815e39714 # double 0.020136000000000001 -.LCPI0_412: - .dword 0x3d6758ff4dd67c05 # double 6.6357999999999999E-13 -.LCPI0_413: - .dword 0x3ceb5bff86228abe # double 3.0375000000000002E-15 -.LCPI0_414: - .dword 0x3de2cb767f828d91 # double 1.3674999999999999E-10 -.LCPI0_415: - .dword 0x3e5cc0f499af778f # double 2.6779000000000001E-8 -.LCPI0_416: - .dword 0x3ed502cd63156628 # double 5.0093999999999998E-6 -.LCPI0_417: - .dword 0x3f4d755bccaf709b # double 8.9899999999999995E-4 -.LCPI0_418: - .dword 0x3f92c1f42bb66730 # double 0.018318000000000001 -.LCPI0_419: - .dword 0x3d661a5294113d1f # double 6.2819999999999999E-13 -.LCPI0_420: - .dword 0x3ce9c57a5f629aa4 # double 2.8612E-15 -.LCPI0_421: - .dword 0x3de1e8861019bd46 # double 1.303E-10 -.LCPI0_422: - .dword 0x3e5b9b62c813c95d # double 2.5711000000000001E-8 -.LCPI0_423: - .dword 0x3ed459cb9ac001b0 # double 4.8520000000000003E-6 -.LCPI0_424: - .dword 0x3f4ccfef6c0912a3 # double 8.7927999999999997E-4 -.LCPI0_425: - .dword 0x3f90efdc9c4da900 # double 0.016539999999999999 -.LCPI0_426: - .dword 0x3d64ee05c5bffeaa # double 5.9485999999999995E-13 -.LCPI0_427: - .dword 0x3ce847dc6a7deccc # double 2.6957E-15 -.LCPI0_428: - .dword 0x3de1113200e25815 # double 1.2418E-10 -.LCPI0_429: - .dword 0x3e5a83d5c4cb0bc1 # double 2.4693999999999999E-8 -.LCPI0_430: - .dword 0x3ed3b77210a15f77 # double 4.7008E-6 -.LCPI0_431: - .dword 0x3f4c2fb67bfd7c6d # double 8.6017999999999999E-4 -.LCPI0_432: - .dword 0x3f8e4f765fd8adac # double 0.014800000000000001 -.LCPI0_433: - .dword 0x3d63d3324d4e01e3 # double 5.6346E-13 -.LCPI0_434: - .dword 0x3ce6e18872722536 # double 2.5402999999999999E-15 -.LCPI0_435: - .dword 0x3de0457a51dc5dfe # double 1.1838999999999999E-10 -.LCPI0_436: - .dword 0x3e5978edb7d72726 # double 2.3723E-8 -.LCPI0_437: - .dword 0x3ed31b6e4e19f1f7 # double 4.5554999999999998E-6 -.LCPI0_438: - .dword 0x3f4b94708fe00767 # double 8.4166999999999998E-4 -.LCPI0_439: - .dword 0x3f8ad3a604e1e710 # double 0.013099 -.LCPI0_440: - .dword 0x3d62c89559516ee9 # double 5.3386000000000001E-13 -.LCPI0_441: - .dword 0x3ce5911c49cf8751 # double 2.3944E-15 -.LCPI0_442: - .dword 0x3ddf0955bc5733f2 # double 1.1291E-10 -.LCPI0_443: - .dword 0x3e587aaaa1381b8b # double 2.2798E-8 -.LCPI0_444: - .dword 0x3ed285a4d649df58 # double 4.4159999999999997E-6 -.LCPI0_445: - .dword 0x3f4afddd3b040d00 # double 8.2372000000000001E-4 -.LCPI0_446: - .dword 0x3f876a2f48c2e771 # double 0.011433 -.LCPI0_447: - .dword 0x3d61cd31454040b1 # double 5.0594999999999998E-13 -.LCPI0_448: - .dword 0x3ce454fabb93b71c # double 2.2572999999999998E-15 -.LCPI0_449: - .dword 0x3ddd9b6add0b78ed # double 1.0771E-10 -.LCPI0_450: - .dword 0x3e57883965bbdac9 # double 2.1915999999999999E-8 -.LCPI0_451: - .dword 0x3ed1f5a7b5b1c03b # double 4.2818999999999996E-6 -.LCPI0_452: - .dword 0x3f4a6bfc7d698d37 # double 8.0632999999999995E-4 -.LCPI0_453: - .dword 0x3f8414112efc6cce # double 0.0098039000000000008 -.LCPI0_454: - .dword 0x3d60e04d99704505 # double 4.7964999999999998E-13 -.LCPI0_455: - .dword 0x3ce32bfca1e19775 # double 2.1285000000000002E-15 -.LCPI0_456: - .dword 0x3ddc407fe0f955e6 # double 1.0278000000000001E-10 -.LCPI0_457: - .dword 0x3e56a0c6ea3056ba # double 2.1074000000000001E-8 -.LCPI0_458: - .dword 0x3ed16b2475b20719 # double 4.1528999999999997E-6 -.LCPI0_459: - .dword 0x3f49de7870d4ff4b # double 7.8945999999999997E-4 -.LCPI0_460: - .dword 0x3f80cf75f478e341 # double 0.0082082000000000005 -.LCPI0_461: - .dword 0x3d6000d5a2623093 # double 4.5484000000000001E-13 -.LCPI0_462: - .dword 0x3ce21535de6eaaa3 # double 2.0076000000000001E-15 -.LCPI0_463: - .dword 0x3ddaf85ebd11ee25 # double 9.8117000000000006E-11 -.LCPI0_464: - .dword 0x3e55c40cd02f8aa5 # double 2.0271E-8 -.LCPI0_465: - .dword 0x3ed0e5ff996ada1a # double 4.0288999999999997E-6 -.LCPI0_466: - .dword 0x3f49553b9bb7810b # double 7.7309999999999998E-4 -.LCPI0_467: - .dword 0x3f7b3885828b601b # double 0.0066457 -.LCPI0_468: - .dword 0x3d5e5bf3b2ed15ba # double 4.3143000000000001E-13 -.LCPI0_469: - .dword 0x3ce10f093c3894a7 # double 1.8938999999999999E-15 -.LCPI0_470: - .dword 0x3dd9c0a2f40226f1 # double 9.3687000000000001E-11 -.LCPI0_471: - .dword 0x3e54f137fc876864 # double 1.9504E-8 -.LCPI0_472: - .dword 0x3ed065e6aa3cabb7 # double 3.9095999999999998E-6 -.LCPI0_473: - .dword 0x3f48d00591646be5 # double 7.5721999999999996E-4 -.LCPI0_474: - .dword 0x3f74f3e2bb4b9b09 # double 0.0051154 -.LCPI0_475: - .dword 0x3d5cce31abf0cfe7 # double 4.0935000000000002E-13 -.LCPI0_476: - .dword 0x3ce01900ac1a16a7 # double 1.7871999999999999E-15 -.LCPI0_477: - .dword 0x3dd898e06fac46df # double 8.9484000000000006E-11 -.LCPI0_478: - .dword 0x3e5427bbb26be687 # double 1.8771E-8 -.LCPI0_479: - .dword 0x3ecfd5455ccf9081 # double 3.7948000000000001E-6 -.LCPI0_480: - .dword 0x3f484ed651dbbfda # double 7.4182000000000002E-4 -.LCPI0_481: - .dword 0x3f6da059a73b42cc # double 0.0036164999999999999 -.LCPI0_482: - .dword 0x3d5b56f4407b2b3f # double 3.8851999999999999E-13 -.LCPI0_483: - .dword 0x3cde62fdf221a945 # double 1.6867999999999999E-15 -.LCPI0_484: - .dword 0x3dd7803f03d4db15 # double 8.5495999999999993E-11 -.LCPI0_485: - .dword 0x3e53675193770057 # double 1.8071E-8 -.LCPI0_486: - .dword 0x3ecee7f95858f80d # double 3.6843E-6 -.LCPI0_487: - .dword 0x3f47d157f6e1f426 # double 7.2685999999999996E-4 -.LCPI0_488: - .dword 0x3f61987908299a2d # double 0.0021478999999999999 -.LCPI0_489: - .dword 0x3d59f49c634d36c8 # double 3.6884999999999999E-13 -.LCPI0_490: - .dword 0x3cdcac6baec528a3 # double 1.5917E-15 -.LCPI0_491: - .dword 0x3dd675d48090d1d6 # double 8.1709999999999995E-11 -.LCPI0_492: - .dword 0x3e52afb34142b11c # double 1.7403E-8 -.LCPI0_493: - .dword 0x3ece037b539626b4 # double 3.5779000000000001E-6 -.LCPI0_494: - .dword 0x3f47578a807708cb # double 7.1234E-4 -.LCPI0_495: - .dword 0x3f4739ad75c47d48 # double 7.0878E-4 .text - .globl foo + .globl foo # -- Begin function foo .p2align 5 .type foo,@function foo: # @foo @@ -1002,7 +8,7 @@ foo: # @foo ftintrz.w.d $fa1, $fa0 movfr2gr.s $a0, $fa1 ori $a1, $zero, 93 - bltu $a1, $a0, .LBB0_67 + bltu $a1, $a0, .LBB0_66 # %bb.1: slli.d $a0, $a0, 2 pcalau12i $a1, %pc_hi20(.LJTI0_0) @@ -1012,1402 +18,3093 @@ foo: # @foo jr $a0 .LBB0_2: vldi $vr1, -784 - pcalau12i $a0, %pc_hi20(.LCPI0_489) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_489) - pcalau12i $a0, %pc_hi20(.LCPI0_490) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_490) - pcalau12i $a0, %pc_hi20(.LCPI0_491) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_491) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_492) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_492) - pcalau12i $a0, %pc_hi20(.LCPI0_493) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_493) - pcalau12i $a0, %pc_hi20(.LCPI0_494) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_494) - pcalau12i $a0, %pc_hi20(.LCPI0_495) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_495) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, 406739 + ori $a0, $a0, 1736 + lu32i.d $a0, -396132 + lu52i.d $a0, $a0, 981 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -332718 + ori $a0, $a0, 2211 + lu32i.d $a0, -218005 + lu52i.d $a0, $a0, 973 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -521971 + ori $a0, $a0, 470 + lu32i.d $a0, 423380 + lu52i.d $a0, $a0, 989 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 267307 + ori $a0, $a0, 284 + lu32i.d $a0, 176051 + lu52i.d $a0, $a0, 997 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 342370 + ori $a0, $a0, 1716 + lu32i.d $a0, -130181 + lu52i.d $a0, $a0, 1004 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -522384 + ori $a0, $a0, 2251 + lu32i.d $a0, 481162 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 482375 + ori $a0, $a0, 3400 + lu32i.d $a0, 473517 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_3: - pcalau12i $a0, %pc_hi20(.LCPI0_160) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_160) - pcalau12i $a0, %pc_hi20(.LCPI0_161) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_161) - pcalau12i $a0, %pc_hi20(.LCPI0_162) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_162) - pcalau12i $a0, %pc_hi20(.LCPI0_163) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_163) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_164) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_164) - pcalau12i $a0, %pc_hi20(.LCPI0_165) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_165) - pcalau12i $a0, %pc_hi20(.LCPI0_166) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_166) - pcalau12i $a0, %pc_hi20(.LCPI0_167) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_167) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 155648 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -203028 + ori $a0, $a0, 2627 + lu32i.d $a0, -375841 + lu52i.d $a0, $a0, 987 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -404903 + ori $a0, $a0, 566 + lu32i.d $a0, -171497 + lu52i.d $a0, $a0, 979 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 310736 + ori $a0, $a0, 3172 + lu32i.d $a0, 143808 + lu52i.d $a0, $a0, 995 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -362534 + ori $a0, $a0, 1928 + lu32i.d $a0, 273762 + lu52i.d $a0, $a0, 1002 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -79402 + ori $a0, $a0, 3849 + lu32i.d $a0, 138071 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -142956 + ori $a0, $a0, 2859 + lu32i.d $a0, -380186 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -88920 + ori $a0, $a0, 3093 + lu32i.d $a0, 340996 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_4: - pcalau12i $a0, %pc_hi20(.LCPI0_104) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_104) - pcalau12i $a0, %pc_hi20(.LCPI0_105) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_105) - pcalau12i $a0, %pc_hi20(.LCPI0_106) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_106) - pcalau12i $a0, %pc_hi20(.LCPI0_107) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_107) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_108) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_108) - pcalau12i $a0, %pc_hi20(.LCPI0_109) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_109) - pcalau12i $a0, %pc_hi20(.LCPI0_110) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_110) - pcalau12i $a0, %pc_hi20(.LCPI0_111) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_111) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 270336 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 391757 + ori $a0, $a0, 2937 + lu32i.d $a0, 179249 + lu52i.d $a0, $a0, 988 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -328866 + ori $a0, $a0, 588 + lu32i.d $a0, 235040 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -362296 + ori $a0, $a0, 2443 + lu32i.d $a0, -338497 + lu52i.d $a0, $a0, 995 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 182158 + ori $a0, $a0, 2298 + lu32i.d $a0, -136591 + lu52i.d $a0, $a0, 1002 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -507738 + ori $a0, $a0, 1589 + lu32i.d $a0, -378822 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -317063 + ori $a0, $a0, 1889 + lu32i.d $a0, 123842 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 364736 + ori $a0, $a0, 2768 + lu32i.d $a0, -267555 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_5: - pcalau12i $a0, %pc_hi20(.LCPI0_56) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_56) - pcalau12i $a0, %pc_hi20(.LCPI0_57) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_57) - pcalau12i $a0, %pc_hi20(.LCPI0_58) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_58) - pcalau12i $a0, %pc_hi20(.LCPI0_59) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_59) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_60) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_60) - pcalau12i $a0, %pc_hi20(.LCPI0_61) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_61) - pcalau12i $a0, %pc_hi20(.LCPI0_62) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_62) - pcalau12i $a0, %pc_hi20(.LCPI0_63) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_63) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 368640 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 227529 + ori $a0, $a0, 1351 + lu32i.d $a0, -465408 + lu52i.d $a0, $a0, 988 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -122344 + ori $a0, $a0, 1775 + lu32i.d $a0, -504461 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -143227 + ori $a0, $a0, 4025 + lu32i.d $a0, 164144 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -331814 + ori $a0, $a0, 1182 + lu32i.d $a0, 320975 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 486418 + ori $a0, $a0, 1917 + lu32i.d $a0, 138123 + lu52i.d $a0, $a0, 1010 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -338162 + ori $a0, $a0, 1779 + lu32i.d $a0, 504054 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 337893 + ori $a0, $a0, 533 + lu32i.d $a0, 119789 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_6: - pcalau12i $a0, %pc_hi20(.LCPI0_80) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_80) - pcalau12i $a0, %pc_hi20(.LCPI0_81) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_81) - pcalau12i $a0, %pc_hi20(.LCPI0_82) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_82) - pcalau12i $a0, %pc_hi20(.LCPI0_83) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_83) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_84) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_84) - pcalau12i $a0, %pc_hi20(.LCPI0_85) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_85) - pcalau12i $a0, %pc_hi20(.LCPI0_86) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_86) - pcalau12i $a0, %pc_hi20(.LCPI0_87) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_87) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 319488 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 118486 + ori $a0, $a0, 3448 + lu32i.d $a0, 370346 + lu52i.d $a0, $a0, 988 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 477326 + ori $a0, $a0, 2954 + lu32i.d $a0, 386488 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -294797 + ori $a0, $a0, 3917 + lu32i.d $a0, -28762 + lu52i.d $a0, $a0, 995 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 63257 + ori $a0, $a0, 1882 + lu32i.d $a0, 110749 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -388896 + ori $a0, $a0, 545 + lu32i.d $a0, -78518 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 352254 + ori $a0, $a0, 1750 + lu32i.d $a0, 298567 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 124486 + ori $a0, $a0, 3861 + lu32i.d $a0, -31751 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_7: - pcalau12i $a0, %pc_hi20(.LCPI0_112) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_112) - pcalau12i $a0, %pc_hi20(.LCPI0_113) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_113) - pcalau12i $a0, %pc_hi20(.LCPI0_114) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_114) - pcalau12i $a0, %pc_hi20(.LCPI0_115) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_115) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_116) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_116) - pcalau12i $a0, %pc_hi20(.LCPI0_117) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_117) - pcalau12i $a0, %pc_hi20(.LCPI0_118) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_118) - pcalau12i $a0, %pc_hi20(.LCPI0_119) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_119) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 253952 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 214006 + ori $a0, $a0, 792 + lu32i.d $a0, 120234 + lu52i.d $a0, $a0, 988 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -194434 + ori $a0, $a0, 1979 + lu32i.d $a0, 186156 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 80985 + ori $a0, $a0, 3865 + lu32i.d $a0, -432115 + lu52i.d $a0, $a0, 995 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -322608 + ori $a0, $a0, 1289 + lu32i.d $a0, -243574 + lu52i.d $a0, $a0, 1002 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 477218 + ori $a0, $a0, 2655 + lu32i.d $a0, -468037 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 493666 + ori $a0, $a0, 923 + lu32i.d $a0, 71551 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -441912 + ori $a0, $a0, 535 + lu32i.d $a0, -339152 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_8: - pcalau12i $a0, %pc_hi20(.LCPI0_192) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_192) - pcalau12i $a0, %pc_hi20(.LCPI0_193) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_193) - pcalau12i $a0, %pc_hi20(.LCPI0_194) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_194) - pcalau12i $a0, %pc_hi20(.LCPI0_195) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_195) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_196) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_196) - pcalau12i $a0, %pc_hi20(.LCPI0_197) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_197) - pcalau12i $a0, %pc_hi20(.LCPI0_198) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_198) - pcalau12i $a0, %pc_hi20(.LCPI0_199) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_199) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 90112 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -516594 + ori $a0, $a0, 1722 + lu32i.d $a0, 342063 + lu52i.d $a0, $a0, 987 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -176330 + ori $a0, $a0, 1584 + lu32i.d $a0, -489722 + lu52i.d $a0, $a0, 979 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -332622 + ori $a0, $a0, 2881 + lu32i.d $a0, -197084 + lu52i.d $a0, $a0, 994 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 139840 + ori $a0, $a0, 3480 + lu32i.d $a0, 7021 + lu52i.d $a0, $a0, 1002 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 324390 + ori $a0, $a0, 3387 + lu32i.d $a0, -167853 + lu52i.d $a0, $a0, 1008 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 128057 + ori $a0, $a0, 550 + lu32i.d $a0, 400519 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -152002 + ori $a0, $a0, 1733 + lu32i.d $a0, 143696 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_9: - pcalau12i $a0, %pc_hi20(.LCPI0_200) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_200) - pcalau12i $a0, %pc_hi20(.LCPI0_201) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_201) - pcalau12i $a0, %pc_hi20(.LCPI0_202) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_202) - pcalau12i $a0, %pc_hi20(.LCPI0_203) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_203) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_204) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_204) - pcalau12i $a0, %pc_hi20(.LCPI0_205) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_205) - pcalau12i $a0, %pc_hi20(.LCPI0_206) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_206) - pcalau12i $a0, %pc_hi20(.LCPI0_207) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_207) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 73728 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -370954 + ori $a0, $a0, 2196 + lu32i.d $a0, 268420 + lu52i.d $a0, $a0, 987 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 93600 + ori $a0, $a0, 2538 + lu32i.d $a0, 485160 + lu52i.d $a0, $a0, 979 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -333935 + ori $a0, $a0, 1386 + lu32i.d $a0, -302806 + lu52i.d $a0, $a0, 994 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -496738 + ori $a0, $a0, 673 + lu32i.d $a0, -101363 + lu52i.d $a0, $a0, 1001 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 43969 + ori $a0, $a0, 2981 + lu32i.d $a0, -264060 + lu52i.d $a0, $a0, 1008 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -254531 + ori $a0, $a0, 2051 + lu32i.d $a0, 341731 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 64760 + ori $a0, $a0, 220 + lu32i.d $a0, 99363 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_10: - pcalau12i $a0, %pc_hi20(.LCPI0_216) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_216) - pcalau12i $a0, %pc_hi20(.LCPI0_217) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_217) - pcalau12i $a0, %pc_hi20(.LCPI0_218) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_218) - pcalau12i $a0, %pc_hi20(.LCPI0_219) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_219) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_220) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_220) - pcalau12i $a0, %pc_hi20(.LCPI0_221) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_221) - pcalau12i $a0, %pc_hi20(.LCPI0_222) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_222) - pcalau12i $a0, %pc_hi20(.LCPI0_223) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_223) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 40960 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 46952 + ori $a0, $a0, 496 + lu32i.d $a0, 131295 + lu52i.d $a0, $a0, 987 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 518072 + ori $a0, $a0, 570 + lu32i.d $a0, 344909 + lu52i.d $a0, $a0, 979 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 118138 + ori $a0, $a0, 486 + lu32i.d $a0, -497755 + lu52i.d $a0, $a0, 994 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -371858 + ori $a0, $a0, 1363 + lu32i.d $a0, -313261 + lu52i.d $a0, $a0, 1001 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 31037 + ori $a0, $a0, 181 + lu32i.d $a0, -441047 + lu52i.d $a0, $a0, 1008 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -367824 + ori $a0, $a0, 1296 + lu32i.d $a0, 232800 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 432181 + ori $a0, $a0, 345 + lu32i.d $a0, 15896 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_11: - pcalau12i $a0, %pc_hi20(.LCPI0_152) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_152) - pcalau12i $a0, %pc_hi20(.LCPI0_153) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_153) - pcalau12i $a0, %pc_hi20(.LCPI0_154) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_154) - pcalau12i $a0, %pc_hi20(.LCPI0_155) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_155) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_156) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_156) - pcalau12i $a0, %pc_hi20(.LCPI0_157) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_157) - pcalau12i $a0, %pc_hi20(.LCPI0_158) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_158) - pcalau12i $a0, %pc_hi20(.LCPI0_159) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_159) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 172032 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -496243 + ori $a0, $a0, 3325 + lu32i.d $a0, -283607 + lu52i.d $a0, $a0, 987 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -449341 + ori $a0, $a0, 211 + lu32i.d $a0, -86273 + lu52i.d $a0, $a0, 979 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -41282 + ori $a0, $a0, 3079 + lu32i.d $a0, 212825 + lu52i.d $a0, $a0, 995 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -414241 + ori $a0, $a0, 2703 + lu32i.d $a0, 350420 + lu52i.d $a0, $a0, 1002 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 439163 + ori $a0, $a0, 2569 + lu32i.d $a0, 201826 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 166711 + ori $a0, $a0, 3440 + lu32i.d $a0, -304057 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 431509 + ori $a0, $a0, 4078 + lu32i.d $a0, 395858 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_12: - pcalau12i $a0, %pc_hi20(.LCPI0_360) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_360) - pcalau12i $a0, %pc_hi20(.LCPI0_361) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_361) - pcalau12i $a0, %pc_hi20(.LCPI0_362) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_362) - pcalau12i $a0, %pc_hi20(.LCPI0_363) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_363) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_364) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_364) - pcalau12i $a0, %pc_hi20(.LCPI0_365) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_365) - pcalau12i $a0, %pc_hi20(.LCPI0_366) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_366) - pcalau12i $a0, %pc_hi20(.LCPI0_367) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_367) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 163840 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 57094 + ori $a0, $a0, 1407 + lu32i.d $a0, 82463 + lu52i.d $a0, $a0, 983 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 523252 + ori $a0, $a0, 1559 + lu32i.d $a0, 320349 + lu52i.d $a0, $a0, 975 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 284971 + ori $a0, $a0, 2150 + lu32i.d $a0, -353989 + lu52i.d $a0, $a0, 990 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 401519 + ori $a0, $a0, 3620 + lu32i.d $a0, 215387 + lu52i.d $a0, $a0, 998 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 492979 + ori $a0, $a0, 2764 + lu32i.d $a0, -361271 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -407647 + ori $a0, $a0, 3754 + lu32i.d $a0, 85939 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 439227 + ori $a0, $a0, 2109 + lu32i.d $a0, 23991 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_13: - pcalau12i $a0, %pc_hi20(.LCPI0_336) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_336) - pcalau12i $a0, %pc_hi20(.LCPI0_337) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_337) - pcalau12i $a0, %pc_hi20(.LCPI0_338) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_338) - pcalau12i $a0, %pc_hi20(.LCPI0_339) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_339) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_340) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_340) - pcalau12i $a0, %pc_hi20(.LCPI0_341) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_341) - pcalau12i $a0, %pc_hi20(.LCPI0_342) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_342) - pcalau12i $a0, %pc_hi20(.LCPI0_343) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_343) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 360448 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 226139 + ori $a0, $a0, 1439 + lu32i.d $a0, 293655 + lu52i.d $a0, $a0, 983 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -374919 + ori $a0, $a0, 2117 + lu32i.d $a0, -452559 + lu52i.d $a0, $a0, 975 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 385715 + ori $a0, $a0, 3097 + lu32i.d $a0, -64858 + lu52i.d $a0, $a0, 990 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -265885 + ori $a0, $a0, 3196 + lu32i.d $a0, 391977 + lu52i.d $a0, $a0, 998 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -376997 + ori $a0, $a0, 2712 + lu32i.d $a0, -171413 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 218935 + ori $a0, $a0, 3924 + lu32i.d $a0, 171624 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -199985 + ori $a0, $a0, 2397 + lu32i.d $a0, 244611 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_14: vldi $vr1, -837 - pcalau12i $a0, %pc_hi20(.LCPI0_398) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_398) - pcalau12i $a0, %pc_hi20(.LCPI0_399) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_399) - pcalau12i $a0, %pc_hi20(.LCPI0_400) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_400) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_401) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_401) - pcalau12i $a0, %pc_hi20(.LCPI0_402) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_402) - pcalau12i $a0, %pc_hi20(.LCPI0_403) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_403) - pcalau12i $a0, %pc_hi20(.LCPI0_404) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_404) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, -162406 + ori $a0, $a0, 235 + lu32i.d $a0, -388454 + lu52i.d $a0, $a0, 982 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 188806 + ori $a0, $a0, 729 + lu32i.d $a0, -75330 + lu52i.d $a0, $a0, 974 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -520482 + ori $a0, $a0, 697 + lu32i.d $a0, 309529 + lu52i.d $a0, $a0, 990 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 359869 + ori $a0, $a0, 1251 + lu32i.d $a0, -50970 + lu52i.d $a0, $a0, 997 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -2013 + ori $a0, $a0, 2429 + lu32i.d $a0, 420453 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -395755 + ori $a0, $a0, 2487 + lu32i.d $a0, -77659 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -448221 + ori $a0, $a0, 3676 + lu32i.d $a0, 427550 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_15: - pcalau12i $a0, %pc_hi20(.LCPI0_304) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_304) - pcalau12i $a0, %pc_hi20(.LCPI0_305) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_305) - pcalau12i $a0, %pc_hi20(.LCPI0_306) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_306) - pcalau12i $a0, %pc_hi20(.LCPI0_307) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_307) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_308) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_308) - pcalau12i $a0, %pc_hi20(.LCPI0_309) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_309) - pcalau12i $a0, %pc_hi20(.LCPI0_310) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_310) - pcalau12i $a0, %pc_hi20(.LCPI0_311) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_311) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, -425984 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 16630 + ori $a0, $a0, 2076 + lu32i.d $a0, -405355 + lu52i.d $a0, $a0, 983 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -361962 + ori $a0, $a0, 208 + lu32i.d $a0, 2873 + lu52i.d $a0, $a0, 976 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -280594 + ori $a0, $a0, 2417 + lu32i.d $a0, 203469 + lu52i.d $a0, $a0, 991 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -506654 + ori $a0, $a0, 3957 + lu32i.d $a0, -374209 + lu52i.d $a0, $a0, 998 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 166292 + ori $a0, $a0, 908 + lu32i.d $a0, 62123 + lu52i.d $a0, $a0, 1006 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 103374 + ori $a0, $a0, 2024 + lu32i.d $a0, 301010 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -165155 + ori $a0, $a0, 351 + lu32i.d $a0, -483084 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_16: - pcalau12i $a0, %pc_hi20(.LCPI0_320) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_320) - pcalau12i $a0, %pc_hi20(.LCPI0_321) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_321) - pcalau12i $a0, %pc_hi20(.LCPI0_322) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_322) - pcalau12i $a0, %pc_hi20(.LCPI0_323) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_323) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_324) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_324) - pcalau12i $a0, %pc_hi20(.LCPI0_325) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_325) - pcalau12i $a0, %pc_hi20(.LCPI0_326) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_326) - pcalau12i $a0, %pc_hi20(.LCPI0_327) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_327) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 491520 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -56866 + ori $a0, $a0, 2101 + lu32i.d $a0, 457715 + lu52i.d $a0, $a0, 983 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -206958 + ori $a0, $a0, 1941 + lu32i.d $a0, -237721 + lu52i.d $a0, $a0, 975 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -13914 + ori $a0, $a0, 1115 + lu32i.d $a0, 78674 + lu52i.d $a0, $a0, 991 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 102612 + ori $a0, $a0, 2304 + lu32i.d $a0, -522757 + lu52i.d $a0, $a0, 998 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 441728 + ori $a0, $a0, 3262 + lu32i.d $a0, -30208 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -410237 + ori $a0, $a0, 2107 + lu32i.d $a0, 234008 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 17649 + ori $a0, $a0, 2586 + lu32i.d $a0, 401009 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_17: vldi $vr1, -843 - pcalau12i $a0, %pc_hi20(.LCPI0_419) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_419) - pcalau12i $a0, %pc_hi20(.LCPI0_420) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_420) - pcalau12i $a0, %pc_hi20(.LCPI0_421) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_421) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_422) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_422) - pcalau12i $a0, %pc_hi20(.LCPI0_423) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_423) - pcalau12i $a0, %pc_hi20(.LCPI0_424) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_424) - pcalau12i $a0, %pc_hi20(.LCPI0_425) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_425) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, -442093 + ori $a0, $a0, 3359 + lu32i.d $a0, 399954 + lu52i.d $a0, $a0, 982 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 390697 + ori $a0, $a0, 2724 + lu32i.d $a0, -408198 + lu52i.d $a0, $a0, 974 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 65947 + ori $a0, $a0, 3398 + lu32i.d $a0, 125062 + lu52i.d $a0, $a0, 990 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -229060 + ori $a0, $a0, 2397 + lu32i.d $a0, -287902 + lu52i.d $a0, $a0, 997 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -414720 + ori $a0, $a0, 432 + lu32i.d $a0, 285131 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 442513 + ori $a0, $a0, 675 + lu32i.d $a0, -208913 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -408358 + ori $a0, $a0, 2304 + lu32i.d $a0, 61404 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_18: - pcalau12i $a0, %pc_hi20(.LCPI0_240) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_240) - pcalau12i $a0, %pc_hi20(.LCPI0_241) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_241) - pcalau12i $a0, %pc_hi20(.LCPI0_242) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_242) - pcalau12i $a0, %pc_hi20(.LCPI0_243) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_243) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_244) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_244) - pcalau12i $a0, %pc_hi20(.LCPI0_245) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_245) - pcalau12i $a0, %pc_hi20(.LCPI0_246) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_246) - pcalau12i $a0, %pc_hi20(.LCPI0_247) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_247) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, -16384 + lu52i.d $a0, $a0, -1019 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -111046 + ori $a0, $a0, 80 + lu32i.d $a0, -101734 + lu52i.d $a0, $a0, 986 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 41102 + ori $a0, $a0, 2448 + lu32i.d $a0, 152307 + lu52i.d $a0, $a0, 979 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -158616 + ori $a0, $a0, 3588 + lu32i.d $a0, 296198 + lu52i.d $a0, $a0, 994 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -48960 + ori $a0, $a0, 2433 + lu32i.d $a0, 459953 + lu52i.d $a0, $a0, 1001 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -138109 + ori $a0, $a0, 2329 + lu32i.d $a0, 376562 + lu52i.d $a0, $a0, 1008 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -363972 + ori $a0, $a0, 1496 + lu32i.d $a0, 88677 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 308700 + ori $a0, $a0, 3172 + lu32i.d $a0, -194616 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_19: - pcalau12i $a0, %pc_hi20(.LCPI0_280) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_280) - pcalau12i $a0, %pc_hi20(.LCPI0_281) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_281) - pcalau12i $a0, %pc_hi20(.LCPI0_282) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_282) - pcalau12i $a0, %pc_hi20(.LCPI0_283) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_283) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_284) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_284) - pcalau12i $a0, %pc_hi20(.LCPI0_285) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_285) - pcalau12i $a0, %pc_hi20(.LCPI0_286) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_286) - pcalau12i $a0, %pc_hi20(.LCPI0_287) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_287) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, -475136 + lu52i.d $a0, $a0, -1019 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -379344 + ori $a0, $a0, 3374 + lu32i.d $a0, -331511 + lu52i.d $a0, $a0, 985 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -250045 + ori $a0, $a0, 3454 + lu32i.d $a0, 73428 + lu52i.d $a0, $a0, 978 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 53889 + ori $a0, $a0, 1556 + lu32i.d $a0, 143189 + lu52i.d $a0, $a0, 993 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 41767 + ori $a0, $a0, 1188 + lu32i.d $a0, 358447 + lu52i.d $a0, $a0, 1000 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 10485 + ori $a0, $a0, 364 + lu32i.d $a0, 420990 + lu52i.d $a0, $a0, 1007 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -40655 + ori $a0, $a0, 1844 + lu32i.d $a0, 319048 + lu52i.d $a0, $a0, 1014 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -447281 + ori $a0, $a0, 1726 + lu32i.d $a0, 79104 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_20: - pcalau12i $a0, %pc_hi20(.LCPI0_184) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_184) - pcalau12i $a0, %pc_hi20(.LCPI0_185) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_185) - pcalau12i $a0, %pc_hi20(.LCPI0_186) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_186) - pcalau12i $a0, %pc_hi20(.LCPI0_187) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_187) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_188) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_188) - pcalau12i $a0, %pc_hi20(.LCPI0_189) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_189) - pcalau12i $a0, %pc_hi20(.LCPI0_190) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_190) - pcalau12i $a0, %pc_hi20(.LCPI0_191) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_191) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 106496 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 139417 + ori $a0, $a0, 610 + lu32i.d $a0, 419165 + lu52i.d $a0, $a0, 987 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -254118 + ori $a0, $a0, 2968 + lu32i.d $a0, -413666 + lu52i.d $a0, $a0, 979 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -358036 + ori $a0, $a0, 3419 + lu32i.d $a0, -85451 + lu52i.d $a0, $a0, 994 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -10870 + ori $a0, $a0, 3678 + lu32i.d $a0, 68131 + lu52i.d $a0, $a0, 1002 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 224550 + ori $a0, $a0, 70 + lu32i.d $a0, -66045 + lu52i.d $a0, $a0, 1008 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 518764 + ori $a0, $a0, 3852 + lu32i.d $a0, 462393 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 29527 + ori $a0, $a0, 3687 + lu32i.d $a0, 189960 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_21: - pcalau12i $a0, %pc_hi20(.LCPI0_328) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_328) - pcalau12i $a0, %pc_hi20(.LCPI0_329) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_329) - pcalau12i $a0, %pc_hi20(.LCPI0_330) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_330) - pcalau12i $a0, %pc_hi20(.LCPI0_331) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_331) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_332) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_332) - pcalau12i $a0, %pc_hi20(.LCPI0_333) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_333) - pcalau12i $a0, %pc_hi20(.LCPI0_334) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_334) - pcalau12i $a0, %pc_hi20(.LCPI0_335) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_335) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 425984 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -210260 + ori $a0, $a0, 3116 + lu32i.d $a0, 373206 + lu52i.d $a0, $a0, 983 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 292579 + ori $a0, $a0, 2216 + lu32i.d $a0, -348519 + lu52i.d $a0, $a0, 975 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 322394 + ori $a0, $a0, 2689 + lu32i.d $a0, 21524 + lu52i.d $a0, $a0, 991 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 406756 + ori $a0, $a0, 3883 + lu32i.d $a0, 457174 + lu52i.d $a0, $a0, 998 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 506359 + ori $a0, $a0, 1462 + lu32i.d $a0, -102391 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 235887 + ori $a0, $a0, 2691 + lu32i.d $a0, 202333 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 117172 + ori $a0, $a0, 314 + lu32i.d $a0, 321854 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_22: vldi $vr1, -839 - pcalau12i $a0, %pc_hi20(.LCPI0_405) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_405) - pcalau12i $a0, %pc_hi20(.LCPI0_406) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_406) - pcalau12i $a0, %pc_hi20(.LCPI0_407) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_407) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_408) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_408) - pcalau12i $a0, %pc_hi20(.LCPI0_409) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_409) - pcalau12i $a0, %pc_hi20(.LCPI0_410) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_410) - pcalau12i $a0, %pc_hi20(.LCPI0_411) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_411) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, -243815 + ori $a0, $a0, 828 + lu32i.d $a0, -480434 + lu52i.d $a0, $a0, 982 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 49879 + ori $a0, $a0, 2475 + lu32i.d $a0, -193330 + lu52i.d $a0, $a0, 974 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -422573 + ori $a0, $a0, 778 + lu32i.d $a0, 244587 + lu52i.d $a0, $a0, 990 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -39263 + ori $a0, $a0, 4038 + lu32i.d $a0, -133865 + lu52i.d $a0, $a0, 997 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 381455 + ori $a0, $a0, 1088 + lu32i.d $a0, 373477 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -454427 + ori $a0, $a0, 2743 + lu32i.d $a0, -122842 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 89657 + ori $a0, $a0, 1812 + lu32i.d $a0, 302728 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_23: - pcalau12i $a0, %pc_hi20(.LCPI0_352) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_352) - pcalau12i $a0, %pc_hi20(.LCPI0_353) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_353) - pcalau12i $a0, %pc_hi20(.LCPI0_354) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_354) - pcalau12i $a0, %pc_hi20(.LCPI0_355) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_355) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_356) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_356) - pcalau12i $a0, %pc_hi20(.LCPI0_357) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_357) - pcalau12i $a0, %pc_hi20(.LCPI0_358) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_358) - pcalau12i $a0, %pc_hi20(.LCPI0_359) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_359) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 229376 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -324657 + ori $a0, $a0, 3404 + lu32i.d $a0, 148617 + lu52i.d $a0, $a0, 983 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 180915 + ori $a0, $a0, 947 + lu32i.d $a0, 406503 + lu52i.d $a0, $a0, 975 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -92013 + ori $a0, $a0, 3417 + lu32i.d $a0, -263107 + lu52i.d $a0, $a0, 990 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 173849 + ori $a0, $a0, 4036 + lu32i.d $a0, 271260 + lu52i.d $a0, $a0, 998 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 76589 + ori $a0, $a0, 3674 + lu32i.d $a0, -300715 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -234761 + ori $a0, $a0, 3162 + lu32i.d $a0, 113534 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 365877 + ori $a0, $a0, 2157 + lu32i.d $a0, 95764 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_24: vldi $vr1, -876 - pcalau12i $a0, %pc_hi20(.LCPI0_475) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_475) - pcalau12i $a0, %pc_hi20(.LCPI0_476) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_476) - pcalau12i $a0, %pc_hi20(.LCPI0_477) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_477) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_478) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_478) - pcalau12i $a0, %pc_hi20(.LCPI0_479) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_479) - pcalau12i $a0, %pc_hi20(.LCPI0_480) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_480) - pcalau12i $a0, %pc_hi20(.LCPI0_481) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_481) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, -344308 + ori $a0, $a0, 4071 + lu32i.d $a0, -209359 + lu52i.d $a0, $a0, 981 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -343647 + ori $a0, $a0, 1703 + lu32i.d $a0, 6400 + lu52i.d $a0, $a0, 974 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 457412 + ori $a0, $a0, 1759 + lu32i.d $a0, -485152 + lu52i.d $a0, $a0, 989 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -317762 + ori $a0, $a0, 1671 + lu32i.d $a0, 272315 + lu52i.d $a0, $a0, 997 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 380153 + ori $a0, $a0, 129 + lu32i.d $a0, -10939 + lu52i.d $a0, $a0, 1004 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 335291 + ori $a0, $a0, 4058 + lu32i.d $a0, -504106 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -363596 + ori $a0, $a0, 716 + lu32i.d $a0, -155559 + lu52i.d $a0, $a0, 1014 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_25: - pcalau12i $a0, %pc_hi20(.LCPI0_272) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_272) - pcalau12i $a0, %pc_hi20(.LCPI0_273) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_273) - pcalau12i $a0, %pc_hi20(.LCPI0_274) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_274) - pcalau12i $a0, %pc_hi20(.LCPI0_275) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_275) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_276) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_276) - pcalau12i $a0, %pc_hi20(.LCPI0_277) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_277) - pcalau12i $a0, %pc_hi20(.LCPI0_278) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_278) - pcalau12i $a0, %pc_hi20(.LCPI0_279) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_279) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, -147456 + lu52i.d $a0, $a0, -1019 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 511478 + ori $a0, $a0, 2472 + lu32i.d $a0, -508859 + lu52i.d $a0, $a0, 986 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 231872 + ori $a0, $a0, 2875 + lu32i.d $a0, -144343 + lu52i.d $a0, $a0, 978 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 463269 + ori $a0, $a0, 710 + lu32i.d $a0, 17313 + lu52i.d $a0, $a0, 994 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 210539 + ori $a0, $a0, 2711 + lu32i.d $a0, 159963 + lu52i.d $a0, $a0, 1001 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -378122 + ori $a0, $a0, 2439 + lu32i.d $a0, 122953 + lu52i.d $a0, $a0, 1008 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 281427 + ori $a0, $a0, 2999 + lu32i.d $a0, -146056 + lu52i.d $a0, $a0, 1014 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 3355 + ori $a0, $a0, 1815 + lu32i.d $a0, -458018 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_26: - pcalau12i $a0, %pc_hi20(.LCPI0_256) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_256) - pcalau12i $a0, %pc_hi20(.LCPI0_257) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_257) - pcalau12i $a0, %pc_hi20(.LCPI0_258) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_258) - pcalau12i $a0, %pc_hi20(.LCPI0_259) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_259) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_260) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_260) - pcalau12i $a0, %pc_hi20(.LCPI0_261) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_261) - pcalau12i $a0, %pc_hi20(.LCPI0_262) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_262) - pcalau12i $a0, %pc_hi20(.LCPI0_263) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_263) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, -81920 + lu52i.d $a0, $a0, -1019 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -167976 + ori $a0, $a0, 221 + lu32i.d $a0, -315745 + lu52i.d $a0, $a0, 986 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 156460 + ori $a0, $a0, 115 + lu32i.d $a0, 35539 + lu52i.d $a0, $a0, 979 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -470888 + ori $a0, $a0, 3961 + lu32i.d $a0, 148818 + lu52i.d $a0, $a0, 994 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 476811 + ori $a0, $a0, 2251 + lu32i.d $a0, 301228 + lu52i.d $a0, $a0, 1001 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -327842 + ori $a0, $a0, 2883 + lu32i.d $a0, 242731 + lu52i.d $a0, $a0, 1008 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 51163 + ori $a0, $a0, 3268 + lu32i.d $a0, 3852 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -16778 + ori $a0, $a0, 3211 + lu32i.d $a0, -331351 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_27: vldi $vr1, -858 - pcalau12i $a0, %pc_hi20(.LCPI0_454) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_454) - pcalau12i $a0, %pc_hi20(.LCPI0_455) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_455) - pcalau12i $a0, %pc_hi20(.LCPI0_456) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_456) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_457) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_457) - pcalau12i $a0, %pc_hi20(.LCPI0_458) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_458) - pcalau12i $a0, %pc_hi20(.LCPI0_459) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_459) - pcalau12i $a0, %pc_hi20(.LCPI0_460) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_460) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, -420092 + ori $a0, $a0, 1285 + lu32i.d $a0, 57421 + lu52i.d $a0, $a0, 982 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -385511 + ori $a0, $a0, 1909 + lu32i.d $a0, 207868 + lu52i.d $a0, $a0, 974 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -127083 + ori $a0, $a0, 1510 + lu32i.d $a0, -245633 + lu52i.d $a0, $a0, 989 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -89339 + ori $a0, $a0, 1722 + lu32i.d $a0, 434374 + lu52i.d $a0, $a0, 997 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 482080 + ori $a0, $a0, 1817 + lu32i.d $a0, 92964 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 462159 + ori $a0, $a0, 3915 + lu32i.d $a0, -401800 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -47218 + ori $a0, $a0, 833 + lu32i.d $a0, 53109 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_28: vldi $vr1, -841 - pcalau12i $a0, %pc_hi20(.LCPI0_412) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_412) - pcalau12i $a0, %pc_hi20(.LCPI0_413) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_413) - pcalau12i $a0, %pc_hi20(.LCPI0_414) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_414) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_415) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_415) - pcalau12i $a0, %pc_hi20(.LCPI0_416) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_416) - pcalau12i $a0, %pc_hi20(.LCPI0_417) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_417) - pcalau12i $a0, %pc_hi20(.LCPI0_418) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_418) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, 318823 + ori $a0, $a0, 3077 + lu32i.d $a0, 481535 + lu52i.d $a0, $a0, 982 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -499160 + ori $a0, $a0, 2750 + lu32i.d $a0, -304129 + lu52i.d $a0, $a0, 974 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 522280 + ori $a0, $a0, 3473 + lu32i.d $a0, 183158 + lu52i.d $a0, $a0, 990 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -419081 + ori $a0, $a0, 1935 + lu32i.d $a0, -212748 + lu52i.d $a0, $a0, 997 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 405846 + ori $a0, $a0, 1576 + lu32i.d $a0, 328397 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -210185 + ori $a0, $a0, 155 + lu32i.d $a0, -166565 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 179046 + ori $a0, $a0, 1840 + lu32i.d $a0, 180724 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_29: - pcalau12i $a0, %pc_hi20(.LCPI0_312) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_312) - pcalau12i $a0, %pc_hi20(.LCPI0_313) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_313) - pcalau12i $a0, %pc_hi20(.LCPI0_314) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_314) - pcalau12i $a0, %pc_hi20(.LCPI0_315) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_315) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_316) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_316) - pcalau12i $a0, %pc_hi20(.LCPI0_317) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_317) - pcalau12i $a0, %pc_hi20(.LCPI0_318) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_318) - pcalau12i $a0, %pc_hi20(.LCPI0_319) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_319) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, -491520 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -185464 + ori $a0, $a0, 1860 + lu32i.d $a0, -500933 + lu52i.d $a0, $a0, 983 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -99049 + ori $a0, $a0, 342 + lu32i.d $a0, -119780 + lu52i.d $a0, $a0, 975 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 421232 + ori $a0, $a0, 3559 + lu32i.d $a0, 139248 + lu52i.d $a0, $a0, 991 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 174979 + ori $a0, $a0, 2688 + lu32i.d $a0, -450418 + lu52i.d $a0, $a0, 998 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -55306 + ori $a0, $a0, 116 + lu32i.d $a0, 22636 + lu52i.d $a0, $a0, 1006 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 113856 + ori $a0, $a0, 3681 + lu32i.d $a0, 266865 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -270248 + ori $a0, $a0, 2477 + lu32i.d $a0, 482210 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_30: - pcalau12i $a0, %pc_hi20(.LCPI0_176) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_176) - pcalau12i $a0, %pc_hi20(.LCPI0_177) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_177) - pcalau12i $a0, %pc_hi20(.LCPI0_178) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_178) - pcalau12i $a0, %pc_hi20(.LCPI0_179) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_179) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_180) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_180) - pcalau12i $a0, %pc_hi20(.LCPI0_181) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_181) - pcalau12i $a0, %pc_hi20(.LCPI0_182) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_182) - pcalau12i $a0, %pc_hi20(.LCPI0_183) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_183) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 122880 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -318899 + ori $a0, $a0, 3682 + lu32i.d $a0, 499941 + lu52i.d $a0, $a0, 987 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -139763 + ori $a0, $a0, 2594 + lu32i.d $a0, -335249 + lu52i.d $a0, $a0, 979 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 413561 + ori $a0, $a0, 3842 + lu32i.d $a0, 16159 + lu52i.d $a0, $a0, 995 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 266141 + ori $a0, $a0, 3952 + lu32i.d $a0, 132827 + lu52i.d $a0, $a0, 1002 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 521106 + ori $a0, $a0, 961 + lu32i.d $a0, 20836 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -66734 + ori $a0, $a0, 3873 + lu32i.d $a0, -521061 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -498955 + ori $a0, $a0, 2442 + lu32i.d $a0, 238194 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_31: vldi $vr1, -888 - pcalau12i $a0, %pc_hi20(.LCPI0_482) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_482) - pcalau12i $a0, %pc_hi20(.LCPI0_483) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_483) - pcalau12i $a0, %pc_hi20(.LCPI0_484) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_484) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_485) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_485) - pcalau12i $a0, %pc_hi20(.LCPI0_486) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_486) - pcalau12i $a0, %pc_hi20(.LCPI0_487) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_487) - pcalau12i $a0, %pc_hi20(.LCPI0_488) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_488) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, 264114 + ori $a0, $a0, 2879 + lu32i.d $a0, -305420 + lu52i.d $a0, $a0, 981 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -56806 + ori $a0, $a0, 2373 + lu32i.d $a0, -105731 + lu52i.d $a0, $a0, 973 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 15693 + ori $a0, $a0, 2837 + lu32i.d $a0, 491583 + lu52i.d $a0, $a0, 989 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -444560 + ori $a0, $a0, 87 + lu32i.d $a0, 223057 + lu52i.d $a0, $a0, 997 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 361871 + ori $a0, $a0, 2061 + lu32i.d $a0, -71687 + lu52i.d $a0, $a0, 1004 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -37345 + ori $a0, $a0, 1062 + lu32i.d $a0, 512343 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 33433 + ori $a0, $a0, 2605 + lu32i.d $a0, 104569 + lu52i.d $a0, $a0, 1014 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_32: vldi $vr1, -854 - pcalau12i $a0, %pc_hi20(.LCPI0_447) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_447) - pcalau12i $a0, %pc_hi20(.LCPI0_448) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_448) - pcalau12i $a0, %pc_hi20(.LCPI0_449) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_449) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_450) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_450) - pcalau12i $a0, %pc_hi20(.LCPI0_451) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_451) - pcalau12i $a0, %pc_hi20(.LCPI0_452) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_452) - pcalau12i $a0, %pc_hi20(.LCPI0_453) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_453) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, 283652 + ori $a0, $a0, 177 + lu32i.d $a0, 118065 + lu52i.d $a0, $a0, 982 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -280261 + ori $a0, $a0, 1820 + lu32i.d $a0, 283898 + lu52i.d $a0, $a0, 974 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -143177 + ori $a0, $a0, 2285 + lu32i.d $a0, -156822 + lu52i.d $a0, $a0, 989 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 416701 + ori $a0, $a0, 2761 + lu32i.d $a0, 493625 + lu52i.d $a0, $a0, 997 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -304356 + ori $a0, $a0, 59 + lu32i.d $a0, 128423 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 513688 + ori $a0, $a0, 3383 + lu32i.d $a0, -365572 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 192454 + ori $a0, $a0, 3278 + lu32i.d $a0, 267281 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_33: vldi $vr1, -850 - pcalau12i $a0, %pc_hi20(.LCPI0_440) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_440) - pcalau12i $a0, %pc_hi20(.LCPI0_441) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_441) - pcalau12i $a0, %pc_hi20(.LCPI0_442) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_442) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_443) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_443) - pcalau12i $a0, %pc_hi20(.LCPI0_444) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_444) - pcalau12i $a0, %pc_hi20(.LCPI0_445) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_445) - pcalau12i $a0, %pc_hi20(.LCPI0_446) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_446) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, 365846 + ori $a0, $a0, 3817 + lu32i.d $a0, 182421 + lu52i.d $a0, $a0, 982 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 302328 + ori $a0, $a0, 1873 + lu32i.d $a0, 364828 + lu52i.d $a0, $a0, 974 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -277133 + ori $a0, $a0, 1010 + lu32i.d $a0, -63147 + lu52i.d $a0, $a0, 989 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -388223 + ori $a0, $a0, 2955 + lu32i.d $a0, -492886 + lu52i.d $a0, $a0, 997 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -170851 + ori $a0, $a0, 3928 + lu32i.d $a0, 165284 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 241728 + ori $a0, $a0, 3328 + lu32i.d $a0, -328227 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 298030 + ori $a0, $a0, 1905 + lu32i.d $a0, 485935 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_34: - pcalau12i $a0, %pc_hi20(.LCPI0_144) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_144) - pcalau12i $a0, %pc_hi20(.LCPI0_145) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_145) - pcalau12i $a0, %pc_hi20(.LCPI0_146) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_146) - pcalau12i $a0, %pc_hi20(.LCPI0_147) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_147) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_148) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_148) - pcalau12i $a0, %pc_hi20(.LCPI0_149) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_149) - pcalau12i $a0, %pc_hi20(.LCPI0_150) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_150) - pcalau12i $a0, %pc_hi20(.LCPI0_151) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_151) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 188416 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -492857 + ori $a0, $a0, 1471 + lu32i.d $a0, -187266 + lu52i.d $a0, $a0, 987 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -206662 + ori $a0, $a0, 3612 + lu32i.d $a0, 674 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -55454 + ori $a0, $a0, 2370 + lu32i.d $a0, 285530 + lu52i.d $a0, $a0, 995 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -427419 + ori $a0, $a0, 1511 + lu32i.d $a0, 431498 + lu52i.d $a0, $a0, 1002 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -246306 + ori $a0, $a0, 1487 + lu32i.d $a0, 269291 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -506686 + ori $a0, $a0, 2683 + lu32i.d $a0, -223822 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -355342 + ori $a0, $a0, 2315 + lu32i.d $a0, 453110 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_35: vldi $vr1, -835 - pcalau12i $a0, %pc_hi20(.LCPI0_391) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_391) - pcalau12i $a0, %pc_hi20(.LCPI0_392) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_392) - pcalau12i $a0, %pc_hi20(.LCPI0_393) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_393) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_394) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_394) - pcalau12i $a0, %pc_hi20(.LCPI0_395) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_395) - pcalau12i $a0, %pc_hi20(.LCPI0_396) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_396) - pcalau12i $a0, %pc_hi20(.LCPI0_397) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_397) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, 433500 + ori $a0, $a0, 823 + lu32i.d $a0, -290755 + lu52i.d $a0, $a0, 982 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -425897 + ori $a0, $a0, 145 + lu32i.d $a0, 25201 + lu52i.d $a0, $a0, 975 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 379533 + ori $a0, $a0, 424 + lu32i.d $a0, 378164 + lu52i.d $a0, $a0, 990 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 58196 + ori $a0, $a0, 3534 + lu32i.d $a0, 18003 + lu52i.d $a0, $a0, 998 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 252809 + ori $a0, $a0, 3392 + lu32i.d $a0, 469437 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 463732 + ori $a0, $a0, 4045 + lu32i.d $a0, -30994 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -386011 + ori $a0, $a0, 3335 + lu32i.d $a0, -493385 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_36: vldi $vr1, -833 - pcalau12i $a0, %pc_hi20(.LCPI0_384) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_384) - pcalau12i $a0, %pc_hi20(.LCPI0_385) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_385) - pcalau12i $a0, %pc_hi20(.LCPI0_386) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_386) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_387) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_387) - pcalau12i $a0, %pc_hi20(.LCPI0_388) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_388) - pcalau12i $a0, %pc_hi20(.LCPI0_389) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_389) - pcalau12i $a0, %pc_hi20(.LCPI0_390) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_390) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, 365777 + ori $a0, $a0, 2120 + lu32i.d $a0, -186992 + lu52i.d $a0, $a0, 982 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 145466 + ori $a0, $a0, 2116 + lu32i.d $a0, 92141 + lu52i.d $a0, $a0, 975 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 406788 + ori $a0, $a0, 3942 + lu32i.d $a0, 450762 + lu52i.d $a0, $a0, 990 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -466780 + ori $a0, $a0, 1475 + lu32i.d $a0, 63707 + lu52i.d $a0, $a0, 998 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 71741 + ori $a0, $a0, 823 + lu32i.d $a0, 520482 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 484958 + ori $a0, $a0, 734 + lu32i.d $a0, 8619 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -429833 + ori $a0, $a0, 2974 + lu32i.d $a0, -362724 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_37: - pcalau12i $a0, %pc_hi20(.LCPI0_120) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_120) - pcalau12i $a0, %pc_hi20(.LCPI0_121) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_121) - pcalau12i $a0, %pc_hi20(.LCPI0_122) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_122) - pcalau12i $a0, %pc_hi20(.LCPI0_123) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_123) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_124) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_124) - pcalau12i $a0, %pc_hi20(.LCPI0_125) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_125) - pcalau12i $a0, %pc_hi20(.LCPI0_126) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_126) - pcalau12i $a0, %pc_hi20(.LCPI0_127) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_127) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 237568 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -158558 + ori $a0, $a0, 146 + lu32i.d $a0, 63488 + lu52i.d $a0, $a0, 988 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -509920 + ori $a0, $a0, 343 + lu32i.d $a0, 138287 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 259108 + ori $a0, $a0, 2779 + lu32i.d $a0, -521202 + lu52i.d $a0, $a0, 995 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -354784 + ori $a0, $a0, 476 + lu32i.d $a0, -344817 + lu52i.d $a0, $a0, 1002 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -148411 + ori $a0, $a0, 64 + lu32i.d $a0, 496048 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 463547 + ori $a0, $a0, 3142 + lu32i.d $a0, 21971 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -485198 + ori $a0, $a0, 3741 + lu32i.d $a0, -407603 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_38: - pcalau12i $a0, %pc_hi20(.LCPI0_264) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_264) - pcalau12i $a0, %pc_hi20(.LCPI0_265) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_265) - pcalau12i $a0, %pc_hi20(.LCPI0_266) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_266) - pcalau12i $a0, %pc_hi20(.LCPI0_267) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_267) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_268) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_268) - pcalau12i $a0, %pc_hi20(.LCPI0_269) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_269) - pcalau12i $a0, %pc_hi20(.LCPI0_270) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_270) - pcalau12i $a0, %pc_hi20(.LCPI0_271) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_271) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, -114688 + lu52i.d $a0, $a0, -1019 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -429487 + ori $a0, $a0, 802 + lu32i.d $a0, -414896 + lu52i.d $a0, $a0, 986 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -310647 + ori $a0, $a0, 2479 + lu32i.d $a0, -38865 + lu52i.d $a0, $a0, 978 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -35101 + ori $a0, $a0, 3484 + lu32i.d $a0, 81151 + lu52i.d $a0, $a0, 994 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -266450 + ori $a0, $a0, 3870 + lu32i.d $a0, 228528 + lu52i.d $a0, $a0, 1001 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 415161 + ori $a0, $a0, 3028 + lu32i.d $a0, 181193 + lu52i.d $a0, $a0, 1008 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 141344 + ori $a0, $a0, 2824 + lu32i.d $a0, -71055 + lu52i.d $a0, $a0, 1014 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -431510 + ori $a0, $a0, 18 + lu32i.d $a0, -395859 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_39: vldi $vr1, -868 - pcalau12i $a0, %pc_hi20(.LCPI0_468) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_468) - pcalau12i $a0, %pc_hi20(.LCPI0_469) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_469) - pcalau12i $a0, %pc_hi20(.LCPI0_470) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_470) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_471) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_471) - pcalau12i $a0, %pc_hi20(.LCPI0_472) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_472) - pcalau12i $a0, %pc_hi20(.LCPI0_473) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_473) - pcalau12i $a0, %pc_hi20(.LCPI0_474) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_474) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, -315695 + ori $a0, $a0, 1466 + lu32i.d $a0, -107533 + lu52i.d $a0, $a0, 981 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 246665 + ori $a0, $a0, 1191 + lu32i.d $a0, 69385 + lu52i.d $a0, $a0, 974 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -49118 + ori $a0, $a0, 1777 + lu32i.d $a0, -409438 + lu52i.d $a0, $a0, 989 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -14218 + ori $a0, $a0, 2148 + lu32i.d $a0, 323895 + lu52i.d $a0, $a0, 997 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -351286 + ori $a0, $a0, 2999 + lu32i.d $a0, 26086 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -453050 + ori $a0, $a0, 3045 + lu32i.d $a0, -471035 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -281415 + ori $a0, $a0, 2825 + lu32i.d $a0, 324578 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_40: - pcalau12i $a0, %pc_hi20(.LCPI0_136) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_136) - pcalau12i $a0, %pc_hi20(.LCPI0_137) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_137) - pcalau12i $a0, %pc_hi20(.LCPI0_138) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_138) - pcalau12i $a0, %pc_hi20(.LCPI0_139) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_139) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_140) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_140) - pcalau12i $a0, %pc_hi20(.LCPI0_141) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_141) - pcalau12i $a0, %pc_hi20(.LCPI0_142) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_142) - pcalau12i $a0, %pc_hi20(.LCPI0_143) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_143) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 204800 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -192871 + ori $a0, $a0, 1162 + lu32i.d $a0, -86818 + lu52i.d $a0, $a0, 987 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 410005 + ori $a0, $a0, 2042 + lu32i.d $a0, 45500 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 147619 + ori $a0, $a0, 1485 + lu32i.d $a0, 362092 + lu52i.d $a0, $a0, 995 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 523604 + ori $a0, $a0, 2879 + lu32i.d $a0, 517260 + lu52i.d $a0, $a0, 1002 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -61138 + ori $a0, $a0, 3077 + lu32i.d $a0, 340656 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -493908 + ori $a0, $a0, 748 + lu32i.d $a0, -139238 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 397620 + ori $a0, $a0, 79 + lu32i.d $a0, 512963 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_41: vldi $vr1, -862 - pcalau12i $a0, %pc_hi20(.LCPI0_461) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_461) - pcalau12i $a0, %pc_hi20(.LCPI0_462) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_462) - pcalau12i $a0, %pc_hi20(.LCPI0_463) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_463) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_464) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_464) - pcalau12i $a0, %pc_hi20(.LCPI0_465) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_465) - pcalau12i $a0, %pc_hi20(.LCPI0_466) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_466) - pcalau12i $a0, %pc_hi20(.LCPI0_467) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_467) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, -383453 + ori $a0, $a0, 147 + lu32i.d $a0, 213 + lu52i.d $a0, $a0, 982 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -137494 + ori $a0, $a0, 2723 + lu32i.d $a0, 136501 + lu52i.d $a0, $a0, 974 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -274146 + ori $a0, $a0, 3621 + lu32i.d $a0, -329634 + lu52i.d $a0, $a0, 989 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -195848 + ori $a0, $a0, 2725 + lu32i.d $a0, 377868 + lu52i.d $a0, $a0, 997 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -420179 + ori $a0, $a0, 2586 + lu32i.d $a0, 58879 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -410760 + ori $a0, $a0, 267 + lu32i.d $a0, -436933 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -513866 + ori $a0, $a0, 27 + lu32i.d $a0, -313211 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_42: - pcalau12i $a0, %pc_hi20(.LCPI0_208) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_208) - pcalau12i $a0, %pc_hi20(.LCPI0_209) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_209) - pcalau12i $a0, %pc_hi20(.LCPI0_210) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_210) - pcalau12i $a0, %pc_hi20(.LCPI0_211) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_211) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_212) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_212) - pcalau12i $a0, %pc_hi20(.LCPI0_213) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_213) - pcalau12i $a0, %pc_hi20(.LCPI0_214) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_214) - pcalau12i $a0, %pc_hi20(.LCPI0_215) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_215) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 57344 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -472239 + ori $a0, $a0, 2030 + lu32i.d $a0, 198236 + lu52i.d $a0, $a0, 987 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -24458 + ori $a0, $a0, 2200 + lu32i.d $a0, 413844 + lu52i.d $a0, $a0, 979 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -120774 + ori $a0, $a0, 2149 + lu32i.d $a0, -402955 + lu52i.d $a0, $a0, 994 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -69491 + ori $a0, $a0, 2186 + lu32i.d $a0, -210347 + lu52i.d $a0, $a0, 1001 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 476823 + ori $a0, $a0, 2093 + lu32i.d $a0, -355045 + lu52i.d $a0, $a0, 1008 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 191273 + ori $a0, $a0, 2802 + lu32i.d $a0, 285897 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -129856 + ori $a0, $a0, 1426 + lu32i.d $a0, 56790 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_43: vldi $vr1, -845 - pcalau12i $a0, %pc_hi20(.LCPI0_426) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_426) - pcalau12i $a0, %pc_hi20(.LCPI0_427) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_427) - pcalau12i $a0, %pc_hi20(.LCPI0_428) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_428) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_429) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_429) - pcalau12i $a0, %pc_hi20(.LCPI0_430) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_430) - pcalau12i $a0, %pc_hi20(.LCPI0_431) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_431) - pcalau12i $a0, %pc_hi20(.LCPI0_432) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_432) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, -238593 + ori $a0, $a0, 3754 + lu32i.d $a0, 323077 + lu52i.d $a0, $a0, 982 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 436190 + ori $a0, $a0, 3276 + lu32i.d $a0, -505892 + lu52i.d $a0, $a0, 974 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 3621 + ori $a0, $a0, 2069 + lu32i.d $a0, 69938 + lu52i.d $a0, $a0, 990 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -242512 + ori $a0, $a0, 3009 + lu32i.d $a0, -359467 + lu52i.d $a0, $a0, 997 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 68117 + ori $a0, $a0, 3959 + lu32i.d $a0, 243570 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 507863 + ori $a0, $a0, 3181 + lu32i.d $a0, -249930 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 392586 + ori $a0, $a0, 3500 + lu32i.d $a0, -110730 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_44: - pcalau12i $a0, %pc_hi20(.LCPI0_72) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_72) - pcalau12i $a0, %pc_hi20(.LCPI0_73) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_73) - pcalau12i $a0, %pc_hi20(.LCPI0_74) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_74) - pcalau12i $a0, %pc_hi20(.LCPI0_75) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_75) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_76) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_76) - pcalau12i $a0, %pc_hi20(.LCPI0_77) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_77) - pcalau12i $a0, %pc_hi20(.LCPI0_78) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_78) - pcalau12i $a0, %pc_hi20(.LCPI0_79) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_79) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 335872 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -181054 + ori $a0, $a0, 1886 + lu32i.d $a0, 438800 + lu52i.d $a0, $a0, 988 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 41714 + ori $a0, $a0, 672 + lu32i.d $a0, 438416 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -221791 + ori $a0, $a0, 1616 + lu32i.d $a0, 42378 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -407391 + ori $a0, $a0, 1034 + lu32i.d $a0, 177049 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -115965 + ori $a0, $a0, 3617 + lu32i.d $a0, 16575 + lu52i.d $a0, $a0, 1010 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 522778 + ori $a0, $a0, 207 + lu32i.d $a0, 363394 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 34561 + ori $a0, $a0, 266 + lu32i.d $a0, 27221 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_45: vldi $vr1, -847 - pcalau12i $a0, %pc_hi20(.LCPI0_433) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_433) - pcalau12i $a0, %pc_hi20(.LCPI0_434) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_434) - pcalau12i $a0, %pc_hi20(.LCPI0_435) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_435) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_436) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_436) - pcalau12i $a0, %pc_hi20(.LCPI0_437) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_437) - pcalau12i $a0, %pc_hi20(.LCPI0_438) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_438) - pcalau12i $a0, %pc_hi20(.LCPI0_439) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_439) - b .LBB0_66 + vldi $vr2, -1024 + fmadd.d $fa0, $fa0, $fa2, $fa1 + lu12i.w $a0, 316640 + ori $a0, $a0, 483 + lu32i.d $a0, 250674 + lu52i.d $a0, $a0, 982 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 468770 + ori $a0, $a0, 1334 + lu32i.d $a0, 450952 + lu52i.d $a0, $a0, 974 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 335301 + ori $a0, $a0, 3582 + lu32i.d $a0, 17786 + lu52i.d $a0, $a0, 990 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -295566 + ori $a0, $a0, 1830 + lu32i.d $a0, -427795 + lu52i.d $a0, $a0, 997 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 319903 + ori $a0, $a0, 503 + lu32i.d $a0, 203630 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -459264 + ori $a0, $a0, 1895 + lu32i.d $a0, -289680 + lu52i.d $a0, $a0, 1012 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 19998 + ori $a0, $a0, 1808 + lu32i.d $a0, -339034 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_46: - pcalau12i $a0, %pc_hi20(.LCPI0_368) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_368) - pcalau12i $a0, %pc_hi20(.LCPI0_369) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_369) - pcalau12i $a0, %pc_hi20(.LCPI0_370) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_370) - pcalau12i $a0, %pc_hi20(.LCPI0_371) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_371) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_372) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_372) - pcalau12i $a0, %pc_hi20(.LCPI0_373) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_373) - pcalau12i $a0, %pc_hi20(.LCPI0_374) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_374) - pcalau12i $a0, %pc_hi20(.LCPI0_375) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_375) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 98304 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -186211 + ori $a0, $a0, 1432 + lu32i.d $a0, 20216 + lu52i.d $a0, $a0, 983 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -59337 + ori $a0, $a0, 1506 + lu32i.d $a0, 239419 + lu52i.d $a0, $a0, 975 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -428994 + ori $a0, $a0, 1524 + lu32i.d $a0, -439918 + lu52i.d $a0, $a0, 990 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -19855 + ori $a0, $a0, 1151 + lu32i.d $a0, 162258 + lu52i.d $a0, $a0, 998 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 243035 + ori $a0, $a0, 2718 + lu32i.d $a0, -419270 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -195032 + ori $a0, $a0, 899 + lu32i.d $a0, 59310 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -493116 + ori $a0, $a0, 276 + lu32i.d $a0, -92141 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_47: - pcalau12i $a0, %pc_hi20(.LCPI0_232) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_232) - pcalau12i $a0, %pc_hi20(.LCPI0_233) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_233) - pcalau12i $a0, %pc_hi20(.LCPI0_234) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_234) - pcalau12i $a0, %pc_hi20(.LCPI0_235) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_235) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_236) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_236) - pcalau12i $a0, %pc_hi20(.LCPI0_237) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_237) - pcalau12i $a0, %pc_hi20(.LCPI0_238) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_238) - pcalau12i $a0, %pc_hi20(.LCPI0_239) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_239) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 8192 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -440535 + ori $a0, $a0, 1913 + lu32i.d $a0, 6851 + lu52i.d $a0, $a0, 987 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -29279 + ori $a0, $a0, 1161 + lu32i.d $a0, 214158 + lu52i.d $a0, $a0, 979 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -81685 + ori $a0, $a0, 3915 + lu32i.d $a0, 376362 + lu52i.d $a0, $a0, 994 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -515221 + ori $a0, $a0, 1004 + lu32i.d $a0, -502069 + lu52i.d $a0, $a0, 1001 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 455516 + ori $a0, $a0, 2930 + lu32i.d $a0, 449302 + lu52i.d $a0, $a0, 1008 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -499170 + ori $a0, $a0, 3473 + lu32i.d $a0, 134338 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -258370 + ori $a0, $a0, 3578 + lu32i.d $a0, -122055 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_48: - pcalau12i $a0, %pc_hi20(.LCPI0_376) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_376) - pcalau12i $a0, %pc_hi20(.LCPI0_377) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_377) - pcalau12i $a0, %pc_hi20(.LCPI0_378) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_378) - pcalau12i $a0, %pc_hi20(.LCPI0_379) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_379) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_380) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_380) - pcalau12i $a0, %pc_hi20(.LCPI0_381) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_381) - pcalau12i $a0, %pc_hi20(.LCPI0_382) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_382) - pcalau12i $a0, %pc_hi20(.LCPI0_383) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_383) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 32768 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -250051 + ori $a0, $a0, 1887 + lu32i.d $a0, -76727 + lu52i.d $a0, $a0, 982 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 406228 + ori $a0, $a0, 1375 + lu32i.d $a0, 163478 + lu52i.d $a0, $a0, 975 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -287735 + ori $a0, $a0, 250 + lu32i.d $a0, -521073 + lu52i.d $a0, $a0, 990 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 426893 + ori $a0, $a0, 3009 + lu32i.d $a0, 111769 + lu52i.d $a0, $a0, 998 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 426543 + ori $a0, $a0, 1650 + lu32i.d $a0, -474823 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 10723 + ori $a0, $a0, 4082 + lu32i.d $a0, 33541 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 468889 + ori $a0, $a0, 2592 + lu32i.d $a0, -229043 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_49: - pcalau12i $a0, %pc_hi20(.LCPI0_64) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_64) - pcalau12i $a0, %pc_hi20(.LCPI0_65) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_65) - pcalau12i $a0, %pc_hi20(.LCPI0_66) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_66) - pcalau12i $a0, %pc_hi20(.LCPI0_67) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_67) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_68) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_68) - pcalau12i $a0, %pc_hi20(.LCPI0_69) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_69) - pcalau12i $a0, %pc_hi20(.LCPI0_70) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_70) - pcalau12i $a0, %pc_hi20(.LCPI0_71) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_71) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 352256 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -494232 + ori $a0, $a0, 2552 + lu32i.d $a0, 509741 + lu52i.d $a0, $a0, 988 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 273040 + ori $a0, $a0, 2422 + lu32i.d $a0, 490989 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 472973 + ori $a0, $a0, 3244 + lu32i.d $a0, 101854 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -522012 + ori $a0, $a0, 2902 + lu32i.d $a0, 247088 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -378116 + ori $a0, $a0, 2052 + lu32i.d $a0, 75674 + lu52i.d $a0, $a0, 1010 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -482513 + ori $a0, $a0, 1097 + lu32i.d $a0, 431845 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -358026 + ori $a0, $a0, 862 + lu32i.d $a0, 72414 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_50: - pcalau12i $a0, %pc_hi20(.LCPI0_168) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_168) - pcalau12i $a0, %pc_hi20(.LCPI0_169) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_169) - pcalau12i $a0, %pc_hi20(.LCPI0_170) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_170) - pcalau12i $a0, %pc_hi20(.LCPI0_171) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_171) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_172) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_172) - pcalau12i $a0, %pc_hi20(.LCPI0_173) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_173) - pcalau12i $a0, %pc_hi20(.LCPI0_174) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_174) - pcalau12i $a0, %pc_hi20(.LCPI0_175) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_175) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 139264 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 205612 + ori $a0, $a0, 2746 + lu32i.d $a0, -464183 + lu52i.d $a0, $a0, 987 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -301711 + ori $a0, $a0, 4091 + lu32i.d $a0, -254489 + lu52i.d $a0, $a0, 979 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 72625 + ori $a0, $a0, 2208 + lu32i.d $a0, 78309 + lu52i.d $a0, $a0, 995 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -159638 + ori $a0, $a0, 3225 + lu32i.d $a0, 201282 + lu52i.d $a0, $a0, 1002 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -59581 + ori $a0, $a0, 2772 + lu32i.d $a0, 77804 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 40802 + ori $a0, $a0, 775 + lu32i.d $a0, -452448 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 240249 + ori $a0, $a0, 3003 + lu32i.d $a0, 288484 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_51: - pcalau12i $a0, %pc_hi20(.LCPI0_344) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_344) - pcalau12i $a0, %pc_hi20(.LCPI0_345) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_345) - pcalau12i $a0, %pc_hi20(.LCPI0_346) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_346) - pcalau12i $a0, %pc_hi20(.LCPI0_347) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_347) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_348) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_348) - pcalau12i $a0, %pc_hi20(.LCPI0_349) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_349) - pcalau12i $a0, %pc_hi20(.LCPI0_350) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_350) - pcalau12i $a0, %pc_hi20(.LCPI0_351) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_351) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 294912 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -102588 + ori $a0, $a0, 1324 + lu32i.d $a0, 218945 + lu52i.d $a0, $a0, 983 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -360444 + ori $a0, $a0, 2815 + lu32i.d $a0, 498264 + lu52i.d $a0, $a0, 975 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -133921 + ori $a0, $a0, 2406 + lu32i.d $a0, -166820 + lu52i.d $a0, $a0, 990 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 457104 + ori $a0, $a0, 1926 + lu32i.d $a0, 330088 + lu52i.d $a0, $a0, 998 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -8768 + ori $a0, $a0, 3238 + lu32i.d $a0, -237494 + lu52i.d $a0, $a0, 1005 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 323625 + ori $a0, $a0, 3218 + lu32i.d $a0, 142096 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 114756 + ori $a0, $a0, 645 + lu32i.d $a0, 169282 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_52: - pcalau12i $a0, %pc_hi20(.LCPI0_40) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_40) - pcalau12i $a0, %pc_hi20(.LCPI0_41) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_41) - pcalau12i $a0, %pc_hi20(.LCPI0_42) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_42) - pcalau12i $a0, %pc_hi20(.LCPI0_43) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_43) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_44) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_44) - pcalau12i $a0, %pc_hi20(.LCPI0_45) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_45) - pcalau12i $a0, %pc_hi20(.LCPI0_46) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_46) - pcalau12i $a0, %pc_hi20(.LCPI0_47) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_47) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 401408 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -436818 + ori $a0, $a0, 976 + lu32i.d $a0, -311061 + lu52i.d $a0, $a0, 988 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -382737 + ori $a0, $a0, 2683 + lu32i.d $a0, -396824 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 157147 + ori $a0, $a0, 2105 + lu32i.d $a0, 297592 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -290379 + ori $a0, $a0, 2718 + lu32i.d $a0, 480734 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 345803 + ori $a0, $a0, 3781 + lu32i.d $a0, 273930 + lu52i.d $a0, $a0, 1010 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -245283 + ori $a0, $a0, 418 + lu32i.d $a0, -387890 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -5369 + ori $a0, $a0, 1191 + lu32i.d $a0, 221647 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_53: - pcalau12i $a0, %pc_hi20(.LCPI0_8) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_8) - pcalau12i $a0, %pc_hi20(.LCPI0_9) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_9) - pcalau12i $a0, %pc_hi20(.LCPI0_10) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_10) - pcalau12i $a0, %pc_hi20(.LCPI0_11) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_11) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_12) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_12) - pcalau12i $a0, %pc_hi20(.LCPI0_13) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_13) - pcalau12i $a0, %pc_hi20(.LCPI0_14) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_14) - pcalau12i $a0, %pc_hi20(.LCPI0_15) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_15) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 466944 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 401051 + ori $a0, $a0, 3494 + lu32i.d $a0, 14111 + lu52i.d $a0, $a0, 989 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 88947 + ori $a0, $a0, 1845 + lu32i.d $a0, -178414 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 223750 + ori $a0, $a0, 343 + lu32i.d $a0, -446020 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -236777 + ori $a0, $a0, 287 + lu32i.d $a0, -194228 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -326362 + ori $a0, $a0, 3510 + lu32i.d $a0, -454070 + lu52i.d $a0, $a0, 1010 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -277026 + ori $a0, $a0, 2496 + lu32i.d $a0, -18657 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -394936 + ori $a0, $a0, 1374 + lu32i.d $a0, 457556 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_54: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) - pcalau12i $a0, %pc_hi20(.LCPI0_2) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_2) - pcalau12i $a0, %pc_hi20(.LCPI0_3) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_3) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_4) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_4) - pcalau12i $a0, %pc_hi20(.LCPI0_5) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_5) - pcalau12i $a0, %pc_hi20(.LCPI0_6) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_6) - pcalau12i $a0, %pc_hi20(.LCPI0_7) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_7) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 483328 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -168298 + ori $a0, $a0, 1461 + lu32i.d $a0, 59723 + lu52i.d $a0, $a0, 989 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 469012 + ori $a0, $a0, 1636 + lu32i.d $a0, -123812 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -85079 + ori $a0, $a0, 2697 + lu32i.d $a0, -361156 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -217282 + ori $a0, $a0, 1019 + lu32i.d $a0, -88455 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 304982 + ori $a0, $a0, 3864 + lu32i.d $a0, -362415 + lu52i.d $a0, $a0, 1010 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 456138 + ori $a0, $a0, 3885 + lu32i.d $a0, 43419 + lu52i.d $a0, $a0, 1017 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 209044 + ori $a0, $a0, 456 + lu32i.d $a0, 524141 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_55: - pcalau12i $a0, %pc_hi20(.LCPI0_48) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_48) - pcalau12i $a0, %pc_hi20(.LCPI0_49) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_49) - pcalau12i $a0, %pc_hi20(.LCPI0_50) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_50) - pcalau12i $a0, %pc_hi20(.LCPI0_51) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_51) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_52) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_52) - pcalau12i $a0, %pc_hi20(.LCPI0_53) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_53) - pcalau12i $a0, %pc_hi20(.LCPI0_54) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_54) - pcalau12i $a0, %pc_hi20(.LCPI0_55) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_55) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 385024 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -112924 + ori $a0, $a0, 2378 + lu32i.d $a0, -389496 + lu52i.d $a0, $a0, 988 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -95863 + ori $a0, $a0, 2828 + lu32i.d $a0, -450781 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -403164 + ori $a0, $a0, 159 + lu32i.d $a0, 229362 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 111992 + ori $a0, $a0, 1857 + lu32i.d $a0, 398821 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 63855 + ori $a0, $a0, 1746 + lu32i.d $a0, 204180 + lu52i.d $a0, $a0, 1010 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 135559 + ori $a0, $a0, 3708 + lu32i.d $a0, -468420 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -183879 + ori $a0, $a0, 2919 + lu32i.d $a0, 169491 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_56: - pcalau12i $a0, %pc_hi20(.LCPI0_288) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_288) - pcalau12i $a0, %pc_hi20(.LCPI0_289) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_289) - pcalau12i $a0, %pc_hi20(.LCPI0_290) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_290) - pcalau12i $a0, %pc_hi20(.LCPI0_291) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_291) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_292) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_292) - pcalau12i $a0, %pc_hi20(.LCPI0_293) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_293) - pcalau12i $a0, %pc_hi20(.LCPI0_294) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_294) - pcalau12i $a0, %pc_hi20(.LCPI0_295) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_295) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 245760 + lu52i.d $a0, $a0, -1019 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 322911 + ori $a0, $a0, 2040 + lu32i.d $a0, -157016 + lu52i.d $a0, $a0, 984 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 6708 + ori $a0, $a0, 1963 + lu32i.d $a0, 192152 + lu52i.d $a0, $a0, 977 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 445302 + ori $a0, $a0, 554 + lu32i.d $a0, 296919 + lu52i.d $a0, $a0, 992 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -226118 + ori $a0, $a0, 3580 + lu32i.d $a0, -416237 + lu52i.d $a0, $a0, 999 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -241046 + ori $a0, $a0, 2557 + lu32i.d $a0, -186339 + lu52i.d $a0, $a0, 1006 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -169424 + ori $a0, $a0, 3940 + lu32i.d $a0, -106006 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 159517 + ori $a0, $a0, 3153 + lu32i.d $a0, 476607 + lu52i.d $a0, $a0, 1019 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_57: - pcalau12i $a0, %pc_hi20(.LCPI0_296) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_296) - pcalau12i $a0, %pc_hi20(.LCPI0_297) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_297) - pcalau12i $a0, %pc_hi20(.LCPI0_298) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_298) - pcalau12i $a0, %pc_hi20(.LCPI0_299) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_299) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_300) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_300) - pcalau12i $a0, %pc_hi20(.LCPI0_301) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_301) - pcalau12i $a0, %pc_hi20(.LCPI0_302) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_302) - pcalau12i $a0, %pc_hi20(.LCPI0_303) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_303) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, -163840 + lu52i.d $a0, $a0, -1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -12926 + ori $a0, $a0, 2171 + lu32i.d $a0, 20873 + lu52i.d $a0, $a0, 984 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 159181 + ori $a0, $a0, 2830 + lu32i.d $a0, 296708 + lu52i.d $a0, $a0, 976 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -235130 + ori $a0, $a0, 3112 + lu32i.d $a0, 501292 + lu52i.d $a0, $a0, 991 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 206920 + ori $a0, $a0, 1358 + lu32i.d $a0, -25426 + lu52i.d $a0, $a0, 998 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 428414 + ori $a0, $a0, 310 + lu32i.d $a0, 239419 + lu52i.d $a0, $a0, 1006 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 350925 + ori $a0, $a0, 2751 + lu32i.d $a0, 450582 + lu52i.d $a0, $a0, 1013 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 280246 + ori $a0, $a0, 2523 + lu32i.d $a0, -127440 + lu52i.d $a0, $a0, 1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_58: - pcalau12i $a0, %pc_hi20(.LCPI0_88) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_88) - pcalau12i $a0, %pc_hi20(.LCPI0_89) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_89) - pcalau12i $a0, %pc_hi20(.LCPI0_90) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_90) - pcalau12i $a0, %pc_hi20(.LCPI0_91) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_91) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_92) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_92) - pcalau12i $a0, %pc_hi20(.LCPI0_93) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_93) - pcalau12i $a0, %pc_hi20(.LCPI0_94) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_94) - pcalau12i $a0, %pc_hi20(.LCPI0_95) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_95) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 303104 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 313802 + ori $a0, $a0, 731 + lu32i.d $a0, 304269 + lu52i.d $a0, $a0, 988 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -517274 + ori $a0, $a0, 1075 + lu32i.d $a0, 335206 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 464327 + ori $a0, $a0, 4003 + lu32i.d $a0, -137017 + lu52i.d $a0, $a0, 995 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 15469 + ori $a0, $a0, 3127 + lu32i.d $a0, 47813 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 454702 + ori $a0, $a0, 3354 + lu32i.d $a0, -184174 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 432503 + ori $a0, $a0, 847 + lu32i.d $a0, 237216 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -191596 + ori $a0, $a0, 792 + lu32i.d $a0, -114002 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_59: - pcalau12i $a0, %pc_hi20(.LCPI0_224) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_224) - pcalau12i $a0, %pc_hi20(.LCPI0_225) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_225) - pcalau12i $a0, %pc_hi20(.LCPI0_226) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_226) - pcalau12i $a0, %pc_hi20(.LCPI0_227) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_227) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_228) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_228) - pcalau12i $a0, %pc_hi20(.LCPI0_229) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_229) - pcalau12i $a0, %pc_hi20(.LCPI0_230) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_230) - pcalau12i $a0, %pc_hi20(.LCPI0_231) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_231) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 24576 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 77650 + ori $a0, $a0, 2814 + lu32i.d $a0, 67524 + lu52i.d $a0, $a0, 987 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -375963 + ori $a0, $a0, 1745 + lu32i.d $a0, 278353 + lu52i.d $a0, $a0, 979 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 503401 + ori $a0, $a0, 52 + lu32i.d $a0, 461199 + lu52i.d $a0, $a0, 994 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -211877 + ori $a0, $a0, 1113 + lu32i.d $a0, -410414 + lu52i.d $a0, $a0, 1001 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -494003 + ori $a0, $a0, 641 + lu32i.d $a0, -522377 + lu52i.d $a0, $a0, 1008 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -227030 + ori $a0, $a0, 2921 + lu32i.d $a0, 182334 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 356012 + ori $a0, $a0, 2144 + lu32i.d $a0, -46641 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_60: - pcalau12i $a0, %pc_hi20(.LCPI0_248) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_248) - pcalau12i $a0, %pc_hi20(.LCPI0_249) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_249) - pcalau12i $a0, %pc_hi20(.LCPI0_250) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_250) - pcalau12i $a0, %pc_hi20(.LCPI0_251) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_251) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_252) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_252) - pcalau12i $a0, %pc_hi20(.LCPI0_253) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_253) - pcalau12i $a0, %pc_hi20(.LCPI0_254) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_254) - pcalau12i $a0, %pc_hi20(.LCPI0_255) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_255) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, -1019 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 126651 + ori $a0, $a0, 2974 + lu32i.d $a0, -211549 + lu52i.d $a0, $a0, 986 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 415312 + ori $a0, $a0, 1045 + lu32i.d $a0, 92779 + lu52i.d $a0, $a0, 979 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -225442 + ori $a0, $a0, 2434 + lu32i.d $a0, 220425 + lu52i.d $a0, $a0, 994 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 240750 + ori $a0, $a0, 1628 + lu32i.d $a0, 378282 + lu52i.d $a0, $a0, 1001 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 33360 + ori $a0, $a0, 2546 + lu32i.d $a0, 307809 + lu52i.d $a0, $a0, 1008 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -49742 + ori $a0, $a0, 3727 + lu32i.d $a0, 45217 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -39930 + ori $a0, $a0, 925 + lu32i.d $a0, -264326 + lu52i.d $a0, $a0, 1020 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_61: - pcalau12i $a0, %pc_hi20(.LCPI0_128) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_128) - pcalau12i $a0, %pc_hi20(.LCPI0_129) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_129) - pcalau12i $a0, %pc_hi20(.LCPI0_130) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_130) - pcalau12i $a0, %pc_hi20(.LCPI0_131) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_131) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_132) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_132) - pcalau12i $a0, %pc_hi20(.LCPI0_133) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_133) - pcalau12i $a0, %pc_hi20(.LCPI0_134) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_134) - pcalau12i $a0, %pc_hi20(.LCPI0_135) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_135) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 221184 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 262249 + ori $a0, $a0, 2124 + lu32i.d $a0, 8941 + lu52i.d $a0, $a0, 988 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -226750 + ori $a0, $a0, 3872 + lu32i.d $a0, 91432 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 292673 + ori $a0, $a0, 2840 + lu32i.d $a0, 442650 + lu52i.d $a0, $a0, 995 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 239258 + ori $a0, $a0, 2390 + lu32i.d $a0, -440650 + lu52i.d $a0, $a0, 1002 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 489431 + ori $a0, $a0, 3787 + lu32i.d $a0, 416162 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -222869 + ori $a0, $a0, 1895 + lu32i.d $a0, -50064 + lu52i.d $a0, $a0, 1015 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 414061 + ori $a0, $a0, 2830 + lu32i.d $a0, -473034 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_62: - pcalau12i $a0, %pc_hi20(.LCPI0_24) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_24) - pcalau12i $a0, %pc_hi20(.LCPI0_25) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_25) - pcalau12i $a0, %pc_hi20(.LCPI0_26) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_26) - pcalau12i $a0, %pc_hi20(.LCPI0_27) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_27) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_28) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_28) - pcalau12i $a0, %pc_hi20(.LCPI0_29) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_29) - pcalau12i $a0, %pc_hi20(.LCPI0_30) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_30) - pcalau12i $a0, %pc_hi20(.LCPI0_31) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_31) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 434176 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 74036 + ori $a0, $a0, 2051 + lu32i.d $a0, -146553 + lu52i.d $a0, $a0, 988 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -357828 + ori $a0, $a0, 3462 + lu32i.d $a0, -287896 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, 119595 + ori $a0, $a0, 654 + lu32i.d $a0, 443382 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -2652 + ori $a0, $a0, 1176 + lu32i.d $a0, -390601 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -234895 + ori $a0, $a0, 2270 + lu32i.d $a0, 425456 + lu52i.d $a0, $a0, 1010 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 31675 + ori $a0, $a0, 1572 + lu32i.d $a0, -213272 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -361382 + ori $a0, $a0, 3143 + lu32i.d $a0, 333824 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_63: - pcalau12i $a0, %pc_hi20(.LCPI0_96) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_96) - pcalau12i $a0, %pc_hi20(.LCPI0_97) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_97) - pcalau12i $a0, %pc_hi20(.LCPI0_98) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_98) - pcalau12i $a0, %pc_hi20(.LCPI0_99) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_99) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_100) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_100) - pcalau12i $a0, %pc_hi20(.LCPI0_101) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_101) - pcalau12i $a0, %pc_hi20(.LCPI0_102) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_102) - pcalau12i $a0, %pc_hi20(.LCPI0_103) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_103) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 286720 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 435088 + ori $a0, $a0, 1365 + lu32i.d $a0, 240606 + lu52i.d $a0, $a0, 988 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 448714 + ori $a0, $a0, 1463 + lu32i.d $a0, 284662 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -297423 + ori $a0, $a0, 928 + lu32i.d $a0, -240206 + lu52i.d $a0, $a0, 995 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -1723 + ori $a0, $a0, 3557 + lu32i.d $a0, -23628 + lu52i.d $a0, $a0, 1002 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -424687 + ori $a0, $a0, 2596 + lu32i.d $a0, -284161 + lu52i.d $a0, $a0, 1009 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -413552 + ori $a0, $a0, 1378 + lu32i.d $a0, 179019 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -520765 + ori $a0, $a0, 882 + lu32i.d $a0, -192561 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_64: - pcalau12i $a0, %pc_hi20(.LCPI0_16) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_16) - pcalau12i $a0, %pc_hi20(.LCPI0_17) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_17) - pcalau12i $a0, %pc_hi20(.LCPI0_18) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_18) - pcalau12i $a0, %pc_hi20(.LCPI0_19) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_19) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_20) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_20) - pcalau12i $a0, %pc_hi20(.LCPI0_21) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_21) - pcalau12i $a0, %pc_hi20(.LCPI0_22) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_22) - pcalau12i $a0, %pc_hi20(.LCPI0_23) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_23) - b .LBB0_66 + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 450560 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, -109596 + ori $a0, $a0, 3966 + lu32i.d $a0, -60445 + lu52i.d $a0, $a0, 988 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, 267311 + ori $a0, $a0, 1488 + lu32i.d $a0, -233201 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -332815 + ori $a0, $a0, 3442 + lu32i.d $a0, 521350 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 482607 + ori $a0, $a0, 2226 + lu32i.d $a0, -294833 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 471893 + ori $a0, $a0, 1743 + lu32i.d $a0, 507662 + lu52i.d $a0, $a0, 1010 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -494459 + ori $a0, $a0, 3646 + lu32i.d $a0, -118649 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -295280 + ori $a0, $a0, 4089 + lu32i.d $a0, 394159 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 + ret .LBB0_65: - pcalau12i $a0, %pc_hi20(.LCPI0_32) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_32) - pcalau12i $a0, %pc_hi20(.LCPI0_33) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_33) - pcalau12i $a0, %pc_hi20(.LCPI0_34) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_34) - pcalau12i $a0, %pc_hi20(.LCPI0_35) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_35) - vldi $vr5, -1024 - fmadd.d $fa0, $fa0, $fa5, $fa1 - fmadd.d $fa1, $fa0, $fa3, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa4 - pcalau12i $a0, %pc_hi20(.LCPI0_36) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_36) - pcalau12i $a0, %pc_hi20(.LCPI0_37) - fld.d $fa3, $a0, %pc_lo12(.LCPI0_37) - pcalau12i $a0, %pc_hi20(.LCPI0_38) - fld.d $fa4, $a0, %pc_lo12(.LCPI0_38) - pcalau12i $a0, %pc_hi20(.LCPI0_39) - fld.d $fa5, $a0, %pc_lo12(.LCPI0_39) -.LBB0_66: + vldi $vr1, -1024 + ori $a0, $zero, 0 + lu32i.d $a0, 417792 + lu52i.d $a0, $a0, -1018 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa0, $fa1, $fa2 + lu12i.w $a0, 334618 + ori $a0, $a0, 680 + lu32i.d $a0, -230068 + lu52i.d $a0, $a0, 988 + movgr2fr.d $fa1, $a0 + lu12i.w $a0, -492819 + ori $a0, $a0, 1907 + lu32i.d $a0, -342406 + lu52i.d $a0, $a0, 980 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa0, $fa2, $fa1 + lu12i.w $a0, -18125 + ori $a0, $a0, 905 + lu32i.d $a0, 368931 + lu52i.d $a0, $a0, 996 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, 353387 + ori $a0, $a0, 3120 + lu32i.d $a0, -481420 + lu52i.d $a0, $a0, 1003 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -32943 + ori $a0, $a0, 2461 + lu32i.d $a0, 347631 + lu52i.d $a0, $a0, 1010 + movgr2fr.d $fa2, $a0 fmadd.d $fa1, $fa1, $fa0, $fa2 - fmadd.d $fa1, $fa1, $fa0, $fa3 - fmadd.d $fa1, $fa1, $fa0, $fa4 - fmadd.d $fa0, $fa1, $fa0, $fa5 + lu12i.w $a0, -432114 + ori $a0, $a0, 101 + lu32i.d $a0, -302930 + lu52i.d $a0, $a0, 1016 + movgr2fr.d $fa2, $a0 + fmadd.d $fa1, $fa1, $fa0, $fa2 + lu12i.w $a0, -324472 + ori $a0, $a0, 2632 + lu32i.d $a0, 276362 + lu52i.d $a0, $a0, 1022 + movgr2fr.d $fa2, $a0 + fmadd.d $fa0, $fa1, $fa0, $fa2 ret -.LBB0_67: +.LBB0_66: vldi $vr0, -912 ret .Lfunc_end0: @@ -2442,39 +3139,39 @@ foo: # @foo .word .LBB0_29-.LJTI0_0 .word .LBB0_15-.LJTI0_0 .word .LBB0_57-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 .word .LBB0_56-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 .word .LBB0_19-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 .word .LBB0_25-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 - .word .LBB0_67-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 + .word .LBB0_66-.LJTI0_0 .word .LBB0_38-.LJTI0_0 .word .LBB0_26-.LJTI0_0 .word .LBB0_60-.LJTI0_0 @@ -2510,33 +3207,34 @@ foo: # @foo .word .LBB0_53-.LJTI0_0 .word .LBB0_54-.LJTI0_0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI1_0: - .dword 0x405399999999999a # double 78.400000000000006 -.LCPI1_1: - .dword 0x3fd851eb851eb852 # double 0.38 -.LCPI1_2: - .dword 0x3fdae147ae147ae1 # double 0.41999999999999998 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main # %bb.0: addi.d $sp, $sp, -16 st.d $ra, $sp, 8 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) + lu12i.w $a0, -419431 + ori $a0, $a0, 2458 + lu32i.d $a0, 235929 + lu52i.d $a0, $a0, 1029 + movgr2fr.d $fa0, $a0 pcaddu18i $ra, %call36(foo) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_1) + lu12i.w $a0, -503317 + ori $a0, $a0, 2130 + lu32i.d $a0, -503317 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa1, $a0 fcmp.clt.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_3 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_2) + lu12i.w $a0, -335545 + ori $a0, $a0, 2785 + lu32i.d $a0, -335545 + lu52i.d $a0, $a0, 1021 + movgr2fr.d $fa1, $a0 fcmp.cule.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB1_3 # %bb.2: diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr67929_1.dir/pr67929_1.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr67929_1.dir/pr67929_1.s index 2613924f..3ef7a4dc 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr67929_1.dir/pr67929_1.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-pr67929_1.dir/pr67929_1.s @@ -1,16 +1,13 @@ .file "pr67929_1.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function foo -.LCPI0_0: - .word 0x409ccccd # float 4.9000001 .text - .globl foo + .globl foo # -- Begin function foo .p2align 5 .type foo,@function foo: # @foo # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 264652 + ori $a0, $a0, 3277 + movgr2fr.w $fa1, $a0 fmul.s $fa0, $fa0, $fa1 ftintrz.w.s $fa0, $fa0 movfr2gr.s $a0, $fa0 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-stdarg-1.dir/stdarg-1.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-stdarg-1.dir/stdarg-1.s index efdd4fc4..eb1d2e75 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-stdarg-1.dir/stdarg-1.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-stdarg-1.dir/stdarg-1.s @@ -800,12 +800,7 @@ f8: # @f8 .Lfunc_end10: .size f8, .Lfunc_end10-f8 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI11_0: - .dword 0x4060600000000000 # double 131 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -823,41 +818,41 @@ main: # @main lu52i.d $a0, $a0, 1027 st.d $a0, $s1, %pc_lo12(d) ori $a1, $zero, 28 - ori $s0, $zero, 28 + ori $fp, $zero, 28 pcaddu18i $ra, %call36(f2) jirl $ra, $ra, 0 pcalau12i $s2, %pc_hi20(bar_arg) ld.w $a0, $s2, %pc_lo12(bar_arg) - bne $a0, $s0, .LBB11_11 + bne $a0, $fp, .LBB11_11 # %bb.1: - pcalau12i $fp, %pc_hi20(x) - ld.d $a0, $fp, %pc_lo12(x) - bne $a0, $s0, .LBB11_11 + pcalau12i $s0, %pc_hi20(x) + ld.d $a0, $s0, %pc_lo12(x) + bne $a0, $fp, .LBB11_11 # %bb.2: ori $a0, $zero, 0 lu32i.d $a0, 24576 - lu52i.d $a1, $a0, 1030 + lu52i.d $fp, $a0, 1030 + move $a1, $fp pcaddu18i $ra, %call36(f3) jirl $ra, $ra, 0 fld.d $fa0, $s1, %pc_lo12(d) - pcalau12i $a0, %pc_hi20(.LCPI11_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI11_0) + movgr2fr.d $fa1, $fp fcmp.ceq.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB11_11 # %bb.3: lu52i.d $a1, $zero, 1027 ori $a0, $zero, 5 ori $a2, $zero, 128 - ori $s0, $zero, 128 + ori $fp, $zero, 128 pcaddu18i $ra, %call36(f4) jirl $ra, $ra, 0 - ld.d $a0, $fp, %pc_lo12(x) + ld.d $a0, $s0, %pc_lo12(x) ori $a1, $zero, 16 bne $a0, $a1, .LBB11_11 # %bb.4: pcalau12i $a0, %pc_hi20(foo_arg) ld.w $a0, $a0, %pc_lo12(foo_arg) - bne $a0, $s0, .LBB11_11 + bne $a0, $fp, .LBB11_11 # %bb.5: lu12i.w $s0, 4 ori $fp, $s0, 6 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-stdarg-3.dir/stdarg-3.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-stdarg-3.dir/stdarg-3.s index 108275fd..aa246aa4 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-stdarg-3.dir/stdarg-3.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-stdarg-3.dir/stdarg-3.s @@ -423,31 +423,18 @@ f8: # @f8 .Lfunc_end8: .size f8, .Lfunc_end8-f8 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI9_0: - .dword 0x4040000000000000 # double 32 -.LCPI9_1: - .dword 0x4066400000000000 # double 178 -.LCPI9_2: - .dword 0x4067e00000000000 # double 191 -.LCPI9_3: - .dword 0x4066000000000000 # double 176 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main # %bb.0: - addi.d $sp, $sp, -176 - st.d $ra, $sp, 168 # 8-byte Folded Spill - st.d $fp, $sp, 160 # 8-byte Folded Spill - st.d $s0, $sp, 152 # 8-byte Folded Spill - st.d $s1, $sp, 144 # 8-byte Folded Spill - st.d $s2, $sp, 136 # 8-byte Folded Spill - st.d $s3, $sp, 128 # 8-byte Folded Spill - fst.d $fs0, $sp, 120 # 8-byte Folded Spill - fst.d $fs1, $sp, 112 # 8-byte Folded Spill + addi.d $sp, $sp, -160 + st.d $ra, $sp, 152 # 8-byte Folded Spill + st.d $fp, $sp, 144 # 8-byte Folded Spill + st.d $s0, $sp, 136 # 8-byte Folded Spill + st.d $s1, $sp, 128 # 8-byte Folded Spill + st.d $s2, $sp, 120 # 8-byte Folded Spill + st.d $s3, $sp, 112 # 8-byte Folded Spill ori $a0, $zero, 13 st.d $a0, $sp, 0 ori $a0, $zero, 7 @@ -470,15 +457,15 @@ main: # @main lu52i.d $a3, $zero, 1025 lu52i.d $a4, $zero, 1026 lu52i.d $a5, $zero, 1027 - lu52i.d $a6, $zero, 1028 + lu52i.d $fp, $zero, 1028 lu52i.d $a7, $zero, 1029 ori $a0, $zero, 6 + move $a6, $fp pcaddu18i $ra, %call36(f2) jirl $ra, $ra, 0 pcalau12i $s1, %pc_hi20(d) fld.d $fa0, $s1, %pc_lo12(d) - pcalau12i $a0, %pc_hi20(.LCPI9_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI9_0) + movgr2fr.d $fa1, $fp fcmp.ceq.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB9_26 # %bb.2: @@ -557,35 +544,34 @@ main: # @main bcnez $fcc0, .LBB9_26 # %bb.9: fld.d $fa0, $s0, 24 - pcalau12i $a0, %pc_hi20(.LCPI9_1) - fld.d $fs0, $a0, %pc_lo12(.LCPI9_1) - fcmp.ceq.d $fcc0, $fa0, $fs0 + ori $a0, $zero, 0 + lu32i.d $a0, 409600 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa1, $a0 + fcmp.ceq.d $fcc0, $fa0, $fa1 + ori $a1, $zero, 0 bceqz $fcc0, .LBB9_26 # %bb.10: ori $s2, $zero, 131 st.w $s2, $sp, 80 - ori $a0, $zero, 0 - ori $a1, $zero, 0 - lu32i.d $a1, -131072 - lu52i.d $a1, $a1, 1026 - st.d $a1, $sp, 88 + ori $a2, $zero, 0 + lu32i.d $a2, -131072 + lu52i.d $a2, $a2, 1026 + st.d $a2, $sp, 88 ori $s1, $zero, 251 st.w $s1, $sp, 96 - ori $a2, $zero, 0 - lu32i.d $a2, 516096 - lu52i.d $a2, $a2, 1030 - st.d $a2, $sp, 104 + lu32i.d $a1, 516096 + lu52i.d $a1, $a1, 1030 + st.d $a1, $sp, 104 st.w $s2, $sp, 48 - st.d $a1, $sp, 56 + st.d $a2, $sp, 56 ori $a3, $zero, 254 st.w $a3, $sp, 64 - lu32i.d $a0, 409600 - lu52i.d $a0, $a0, 1030 st.d $a0, $sp, 72 st.w $s2, $sp, 16 - st.d $a1, $sp, 24 + st.d $a2, $sp, 24 st.w $s1, $sp, 32 - st.d $a2, $sp, 40 + st.d $a1, $sp, 40 ori $a0, $zero, 3 addi.d $a1, $sp, 80 addi.d $a2, $sp, 48 @@ -604,12 +590,14 @@ main: # @main bcnez $fcc0, .LBB9_26 # %bb.13: fld.d $fa0, $s0, 24 - pcalau12i $a0, %pc_hi20(.LCPI9_2) - fld.d $fa1, $a0, %pc_lo12(.LCPI9_2) + ori $a0, $zero, 0 + lu32i.d $a0, 516096 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa1, $a0 fcmp.ceq.d $fcc0, $fa0, $fa1 + ori $a0, $zero, 0 bceqz $fcc0, .LBB9_26 # %bb.14: - ori $a0, $zero, 0 lu32i.d $a0, 393216 lu52i.d $a3, $a0, 1030 lu52i.d $a1, $zero, 1027 @@ -627,14 +615,13 @@ main: # @main bne $a0, $s2, .LBB9_26 # %bb.15: fld.d $fa0, $s1, 0 - pcalau12i $a0, %pc_hi20(.LCPI9_3) - fld.d $fs1, $a0, %pc_lo12(.LCPI9_3) - fcmp.ceq.d $fcc0, $fa0, $fs1 - bceqz $fcc0, .LBB9_26 -# %bb.16: ori $a0, $zero, 0 lu32i.d $a0, 393216 lu52i.d $a3, $a0, 1030 + movgr2fr.d $fa1, $a3 + fcmp.ceq.d $fcc0, $fa0, $fa1 + bceqz $fcc0, .LBB9_26 +# %bb.16: lu52i.d $a1, $zero, 1027 ori $a0, $zero, 3 ori $a2, $zero, 138 @@ -694,7 +681,11 @@ main: # @main bcnez $fcc0, .LBB9_26 # %bb.21: fld.d $fa0, $s0, 24 - fcmp.ceq.d $fcc0, $fa0, $fs0 + ori $a0, $zero, 0 + lu32i.d $a0, 409600 + lu52i.d $a0, $a0, 1030 + movgr2fr.d $fa1, $a0 + fcmp.ceq.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB9_26 # %bb.22: ld.w $a0, $fp, %pc_lo12(bar_arg) @@ -703,33 +694,33 @@ main: # @main # %bb.23: ori $a0, $zero, 0 lu32i.d $a0, 393216 - lu52i.d $a1, $a0, 1030 + lu52i.d $fp, $a0, 1030 lu52i.d $a3, $zero, 1027 ori $a0, $zero, 3 ori $a2, $zero, 257 ori $a4, $zero, 138 ori $a6, $zero, 138 - ori $fp, $zero, 257 + ori $s0, $zero, 257 + move $a1, $fp move $a5, $a3 pcaddu18i $ra, %call36(f8) jirl $ra, $ra, 0 ld.d $a0, $s1, 8 - bne $a0, $fp, .LBB9_26 + bne $a0, $s0, .LBB9_26 # %bb.24: fld.d $fa0, $s1, 0 - fcmp.ceq.d $fcc0, $fa0, $fs1 + movgr2fr.d $fa1, $fp + fcmp.ceq.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB9_26 # %bb.25: move $a0, $zero - fld.d $fs1, $sp, 112 # 8-byte Folded Reload - fld.d $fs0, $sp, 120 # 8-byte Folded Reload - ld.d $s3, $sp, 128 # 8-byte Folded Reload - ld.d $s2, $sp, 136 # 8-byte Folded Reload - ld.d $s1, $sp, 144 # 8-byte Folded Reload - ld.d $s0, $sp, 152 # 8-byte Folded Reload - ld.d $fp, $sp, 160 # 8-byte Folded Reload - ld.d $ra, $sp, 168 # 8-byte Folded Reload - addi.d $sp, $sp, 176 + ld.d $s3, $sp, 112 # 8-byte Folded Reload + ld.d $s2, $sp, 120 # 8-byte Folded Reload + ld.d $s1, $sp, 128 # 8-byte Folded Reload + ld.d $s0, $sp, 136 # 8-byte Folded Reload + ld.d $fp, $sp, 144 # 8-byte Folded Reload + ld.d $ra, $sp, 152 # 8-byte Folded Reload + addi.d $sp, $sp, 160 ret .LBB9_26: pcaddu18i $ra, %call36(abort) diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-va-arg-16.dir/va-arg-16.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-va-arg-16.dir/va-arg-16.s index 567ba9ab..52d85816 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-va-arg-16.dir/va-arg-16.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-va-arg-16.dir/va-arg-16.s @@ -1,12 +1,6 @@ .file "va-arg-16.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function vafunction -.LCPI0_0: - .dword 0x408bc00000000000 # double 888 -.LCPI0_1: - .dword 0x408f380000000000 # double 999 .text - .globl vafunction + .globl vafunction # -- Begin function vafunction .p2align 5 .type vafunction,@function vafunction: # @vafunction @@ -18,17 +12,21 @@ vafunction: # @vafunction st.d $a4, $sp, 48 st.d $a3, $sp, 40 st.d $a2, $sp, 32 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a2, %pc_lo12(.LCPI0_0) st.d $a1, $sp, 24 st.d $a0, $sp, 16 addi.d $a0, $sp, 16 - fcmp.ceq.d $fcc0, $fa0, $fa2 st.d $a0, $sp, 8 + ori $a0, $zero, 0 + lu32i.d $a0, -278528 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa2, $a0 + fcmp.ceq.d $fcc0, $fa0, $fa2 + ori $a0, $zero, 0 bceqz $fcc0, .LBB0_12 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_1) + lu32i.d $a0, -51200 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa0, $a0 fcmp.ceq.d $fcc0, $fa1, $fa0 bceqz $fcc0, .LBB0_12 # %bb.2: @@ -104,14 +102,7 @@ vafunction: # @vafunction .Lfunc_end0: .size vafunction, .Lfunc_end0-vafunction # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI1_0: - .dword 0x408bc00000000000 # double 888 -.LCPI1_1: - .dword 0x408f380000000000 # double 999 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -122,10 +113,6 @@ main: # @main ori $a1, $zero, 0 lu32i.d $a1, 131072 lu52i.d $t0, $a1, 1026 - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a1, %pc_lo12(.LCPI1_0) - pcalau12i $a1, %pc_hi20(.LCPI1_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI1_1) ori $a1, $zero, 0 lu32i.d $a1, -524288 lu52i.d $a2, $a1, 1024 @@ -133,8 +120,16 @@ main: # @main lu32i.d $a3, 262144 lu52i.d $a4, $a3, 1025 lu52i.d $a5, $a1, 1025 - lu32i.d $a0, -262144 - lu52i.d $a6, $a0, 1025 + ori $a1, $zero, 0 + lu32i.d $a1, -262144 + lu52i.d $a6, $a1, 1025 + ori $a1, $zero, 0 + lu32i.d $a1, -278528 + lu52i.d $a1, $a1, 1032 + movgr2fr.d $fa0, $a1 + lu32i.d $a0, -51200 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa1, $a0 lu52i.d $a0, $zero, 1023 lu52i.d $a1, $zero, 1024 lu52i.d $a3, $zero, 1025 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-va-arg-5.dir/va-arg-5.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-va-arg-5.dir/va-arg-5.s index 687b42f8..f59c83ab 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-va-arg-5.dir/va-arg-5.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-va-arg-5.dir/va-arg-5.s @@ -1,16 +1,6 @@ .file "va-arg-5.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function va_double -.LCPI0_0: - .dword 0x400921fafc8b007a # double 3.1415920000000002 -.LCPI0_1: - .dword 0x4005bf04577d9557 # double 2.71827 -.LCPI0_2: - .dword 0x4001e3779131154c # double 2.2360679000000001 -.LCPI0_3: - .dword 0x40012e0be1b5921e # double 2.1474836000000002 .text - .globl va_double + .globl va_double # -- Begin function va_double .p2align 5 .type va_double,@function va_double: # @va_double @@ -26,34 +16,46 @@ va_double: # @va_double addi.d $a0, $sp, 32 st.d $a0, $sp, 8 fld.d $fa0, $sp, 24 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, -14160 + ori $a0, $a0, 122 + lu32i.d $a0, -450054 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa1, $a0 fcmp.ceq.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB0_5 # %bb.1: addi.d $a0, $sp, 24 - fld.d $fa0, $a0, 8 - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_1) addi.d $a1, $a0, 16 + fld.d $fa0, $a0, 8 + lu12i.w $a2, 358361 + ori $a2, $a2, 1367 + lu32i.d $a2, 376580 + lu52i.d $a2, $a2, 1024 + movgr2fr.d $fa1, $a2 fcmp.ceq.d $fcc0, $fa0, $fa1 st.d $a1, $sp, 8 bceqz $fcc0, .LBB0_5 # %bb.2: - fld.d $fa0, $a0, 16 - pcalau12i $a1, %pc_hi20(.LCPI0_2) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_2) addi.d $a1, $a0, 24 + fld.d $fa0, $a0, 16 + lu12i.w $a2, -453871 + ori $a2, $a2, 1356 + lu32i.d $a2, 123767 + lu52i.d $a2, $a2, 1024 + movgr2fr.d $fa1, $a2 fcmp.ceq.d $fcc0, $fa0, $fa1 st.d $a1, $sp, 8 bceqz $fcc0, .LBB0_5 # %bb.3: + addi.d $a1, $a0, 32 fld.d $fa0, $a0, 24 - pcalau12i $a1, %pc_hi20(.LCPI0_3) - fld.d $fa1, $a1, %pc_lo12(.LCPI0_3) - addi.d $a0, $a0, 32 + lu12i.w $a0, -124071 + ori $a0, $a0, 542 + lu32i.d $a0, 77323 + lu52i.d $a0, $a0, 1024 + movgr2fr.d $fa1, $a0 fcmp.ceq.d $fcc0, $fa0, $fa1 - st.d $a0, $sp, 8 + st.d $a1, $sp, 8 bceqz $fcc0, .LBB0_5 # %bb.4: move $a0, $zero diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-va-arg-6.dir/va-arg-6.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-va-arg-6.dir/va-arg-6.s index 407887cd..4c4ffd68 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-va-arg-6.dir/va-arg-6.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-va-arg-6.dir/va-arg-6.s @@ -1,10 +1,6 @@ .file "va-arg-6.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function f -.LCPI0_0: - .dword 0x4005c28f5c28f5c3 # double 2.7200000000000002 .text - .globl f + .globl f # -- Begin function f .p2align 5 .type f,@function f: # @f @@ -87,10 +83,13 @@ f: # @f st.d $a1, $sp, 8 bne $a0, $a2, .LBB0_10 # %bb.8: - fld.d $fa0, $fp, 48 - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) addi.d $a0, $fp, 56 + fld.d $fa0, $fp, 48 + lu12i.w $a1, 377487 + ori $a1, $a1, 1475 + lu32i.d $a1, 377487 + lu52i.d $a1, $a1, 1024 + movgr2fr.d $fa1, $a1 fcmp.ceq.d $fcc0, $fa0, $fa1 st.d $a0, $sp, 8 bceqz $fcc0, .LBB0_10 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-20010114-2.dir/20010114-2.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-20010114-2.dir/20010114-2.s index f6eda1c0..d18a0a47 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-20010114-2.dir/20010114-2.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-20010114-2.dir/20010114-2.s @@ -1,19 +1,13 @@ .file "20010114-2.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function rintf -.LCPI0_0: - .word 0x4b000000 # float 8388608 -.LCPI0_1: - .word 0xcb000000 # float -8388608 .text - .globl rintf + .globl rintf # -- Begin function rintf .p2align 5 .type rintf,@function rintf: # @rintf # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_0) fabs.s $fa2, $fa0 + lu12i.w $a0, 307200 + movgr2fr.w $fa1, $a0 fcmp.cule.s $fcc0, $fa1, $fa2 bcnez $fcc0, .LBB0_5 # %bb.1: @@ -21,19 +15,21 @@ rintf: # @rintf fcmp.cule.s $fcc0, $fa0, $fa2 bcnez $fcc0, .LBB0_3 # %bb.2: - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.s $fa2, $a0, %pc_lo12(.LCPI0_1) fadd.s $fa0, $fa0, $fa1 - fadd.s $fa0, $fa0, $fa2 + lu12i.w $a0, -217088 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 + fadd.s $fa0, $fa0, $fa1 ret .LBB0_3: fcmp.cule.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB0_5 # %bb.4: - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.s $fa2, $a0, %pc_lo12(.LCPI0_1) fsub.s $fa0, $fa1, $fa0 - fadd.s $fa0, $fa0, $fa2 + lu12i.w $a0, -217088 + lu32i.d $a0, 0 + movgr2fr.w $fa1, $a0 + fadd.s $fa0, $fa0, $fa1 fneg.s $fa0, $fa0 .LBB0_5: ret diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-20030331-1.dir/20030331-1.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-20030331-1.dir/20030331-1.s index e30041c8..e56334e0 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-20030331-1.dir/20030331-1.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-20030331-1.dir/20030331-1.s @@ -1,21 +1,15 @@ .file "20030331-1.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function rintf -.LCPI0_0: - .word 0x4b000000 # float 8388608 -.LCPI0_1: - .word 0xcb000000 # float -8388608 .text - .globl rintf + .globl rintf # -- Begin function rintf .p2align 5 .type rintf,@function rintf: # @rintf # %bb.0: pcalau12i $a0, %pc_hi20(x) fld.s $fa0, $a0, %pc_lo12(x) - pcalau12i $a1, %pc_hi20(.LCPI0_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI0_0) fabs.s $fa2, $fa0 + lu12i.w $a1, 307200 + movgr2fr.w $fa1, $a1 fcmp.cule.s $fcc0, $fa1, $fa2 bcnez $fcc0, .LBB0_6 # %bb.1: @@ -23,19 +17,21 @@ rintf: # @rintf fcmp.cule.s $fcc0, $fa0, $fa2 bcnez $fcc0, .LBB0_3 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.s $fa2, $a1, %pc_lo12(.LCPI0_1) fadd.s $fa0, $fa0, $fa1 - fadd.s $fa0, $fa0, $fa2 + lu12i.w $a1, -217088 + lu32i.d $a1, 0 + movgr2fr.w $fa1, $a1 + fadd.s $fa0, $fa0, $fa1 b .LBB0_5 .LBB0_3: fcmp.cule.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB0_6 # %bb.4: - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.s $fa2, $a1, %pc_lo12(.LCPI0_1) fsub.s $fa0, $fa1, $fa0 - fadd.s $fa0, $fa0, $fa2 + lu12i.w $a1, -217088 + lu32i.d $a1, 0 + movgr2fr.w $fa1, $a1 + fadd.s $fa0, $fa0, $fa1 fneg.s $fa0, $fa0 .LBB0_5: # %.sink.split fst.s $fa0, $a0, %pc_lo12(x) @@ -44,14 +40,7 @@ rintf: # @rintf .Lfunc_end0: .size rintf, .Lfunc_end0-rintf # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI1_0: - .word 0x4b000000 # float 8388608 -.LCPI1_1: - .word 0xcb000000 # float -8388608 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -60,9 +49,9 @@ main: # @main st.d $ra, $sp, 8 # 8-byte Folded Spill pcalau12i $a0, %pc_hi20(x) fld.s $fa0, $a0, %pc_lo12(x) - pcalau12i $a1, %pc_hi20(.LCPI1_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI1_0) fabs.s $fa2, $fa0 + lu12i.w $a1, 307200 + movgr2fr.w $fa1, $a1 fcmp.cule.s $fcc0, $fa1, $fa2 bcnez $fcc0, .LBB1_6 # %bb.1: @@ -70,19 +59,21 @@ main: # @main fcmp.cule.s $fcc0, $fa0, $fa2 bcnez $fcc0, .LBB1_3 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI1_1) - fld.s $fa2, $a1, %pc_lo12(.LCPI1_1) fadd.s $fa0, $fa0, $fa1 - fadd.s $fa0, $fa0, $fa2 + lu12i.w $a1, -217088 + lu32i.d $a1, 0 + movgr2fr.w $fa1, $a1 + fadd.s $fa0, $fa0, $fa1 b .LBB1_5 .LBB1_3: fcmp.cule.s $fcc0, $fa2, $fa0 bcnez $fcc0, .LBB1_6 # %bb.4: - pcalau12i $a1, %pc_hi20(.LCPI1_1) - fld.s $fa2, $a1, %pc_lo12(.LCPI1_1) fsub.s $fa0, $fa1, $fa0 - fadd.s $fa0, $fa0, $fa2 + lu12i.w $a1, -217088 + lu32i.d $a1, 0 + movgr2fr.w $fa1, $a1 + fadd.s $fa0, $fa0, $fa1 fneg.s $fa0, $fa0 .LBB1_5: # %.sink.split.i fst.s $fa0, $a0, %pc_lo12(x) diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-20041213-1.dir/20041213-1.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-20041213-1.dir/20041213-1.s index 45b0ad37..f44659c5 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-20041213-1.dir/20041213-1.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-20041213-1.dir/20041213-1.s @@ -1,10 +1,6 @@ .file "20041213-1.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function foo -.LCPI0_0: - .dword 0x7ff8000000000000 # double NaN .text - .globl foo + .globl foo # -- Begin function foo .p2align 5 .type foo,@function foo: # @foo @@ -15,8 +11,10 @@ foo: # @foo st.w $a2, $a0, %pc_lo12(once) bnez $a1, .LBB0_2 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI0_0) + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 2047 + movgr2fr.d $fa0, $a0 ret .LBB0_2: pcaddu18i $ra, %call36(abort) @@ -24,12 +22,7 @@ foo: # @foo .Lfunc_end0: .size foo, .Lfunc_end0-foo # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI1_0: - .dword 0x7ff8000000000000 # double NaN - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -40,8 +33,10 @@ main: # @main st.w $a2, $a0, %pc_lo12(once) bnez $a1, .LBB1_2 # %bb.1: # %foo.exit - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) + ori $a0, $zero, 0 + lu32i.d $a0, -524288 + lu52i.d $a0, $a0, 2047 + movgr2fr.d $fa0, $a0 fsqrt.d $fa0, $fa0 pcalau12i $a0, %pc_hi20(x) fst.d $fa0, $a0, %pc_lo12(x) diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-acc1.dir/acc1.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-acc1.dir/acc1.s index 3ee1e819..0a1a1a03 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-acc1.dir/acc1.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-acc1.dir/acc1.s @@ -26,12 +26,7 @@ func: # @func .Lfunc_end0: .size func, .Lfunc_end0-func # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI1_0: - .dword 0x2af665bf1d3e6a8d # double 1.0000000000000001E-101 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -42,8 +37,11 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.L__const.main.values) pcaddu18i $ra, %call36(func) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_0) + lu12i.w $a0, 119782 + ori $a0, $a0, 2701 + lu32i.d $a0, 419263 + lu52i.d $a0, $a0, 687 + movgr2fr.d $fa1, $a0 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_2 # %bb.1: diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-acc2.dir/acc2.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-acc2.dir/acc2.s index 25977d2f..df7ed791 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-acc2.dir/acc2.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-acc2.dir/acc2.s @@ -31,12 +31,7 @@ func: # @func .Lfunc_end0: .size func, .Lfunc_end0-func # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI1_0: - .dword 0x7fefffffffffffff # double 1.7976931348623157E+308 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -47,8 +42,9 @@ main: # @main addi.d $a0, $a0, %pc_lo12(.L__const.main.values) pcaddu18i $ra, %call36(func) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_0) + addi.w $a0, $zero, -1 + lu52i.d $a0, $a0, 2046 + movgr2fr.d $fa1, $a0 fcmp.ceq.d $fcc0, $fa0, $fa1 bcnez $fcc0, .LBB1_2 # %bb.1: diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-inf-2.dir/inf-2.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-inf-2.dir/inf-2.s index f99aba65..ae49e506 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-inf-2.dir/inf-2.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-inf-2.dir/inf-2.s @@ -1,23 +1,17 @@ .file "inf-2.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function test -.LCPI0_0: - .dword 0x7ff0000000000000 # double +Inf -.LCPI0_1: - .dword 0xfff0000000000000 # double -Inf .text - .globl test + .globl test # -- Begin function test .p2align 5 .type test,@function test: # @test # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_0) + lu52i.d $a0, $zero, 2047 + movgr2fr.d $fa2, $a0 fcmp.cune.d $fcc0, $fa0, $fa2 bceqz $fcc0, .LBB0_5 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) + lu52i.d $a0, $zero, -1 + movgr2fr.d $fa2, $a0 fcmp.cune.d $fcc0, $fa0, $fa2 bceqz $fcc0, .LBB0_5 # %bb.2: @@ -38,25 +32,19 @@ test: # @test .Lfunc_end0: .size test, .Lfunc_end0-test # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function testf -.LCPI1_0: - .word 0x7f800000 # float +Inf -.LCPI1_1: - .word 0xff800000 # float -Inf - .text - .globl testf + .globl testf # -- Begin function testf .p2align 5 .type testf,@function testf: # @testf # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fa2, $a0, %pc_lo12(.LCPI1_0) + lu12i.w $a0, 522240 + movgr2fr.w $fa2, $a0 fcmp.cune.s $fcc0, $fa0, $fa2 bceqz $fcc0, .LBB1_5 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.s $fa2, $a0, %pc_lo12(.LCPI1_1) + lu12i.w $a0, -2048 + lu32i.d $a0, 0 + movgr2fr.w $fa2, $a0 fcmp.cune.s $fcc0, $fa0, $fa2 bceqz $fcc0, .LBB1_5 # %bb.2: diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-inf-3.dir/inf-3.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-inf-3.dir/inf-3.s index 51adcdfb..efc4640d 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-inf-3.dir/inf-3.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-inf-3.dir/inf-3.s @@ -1,23 +1,17 @@ .file "inf-3.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function test -.LCPI0_0: - .dword 0x7ff0000000000000 # double +Inf -.LCPI0_1: - .dword 0xfff0000000000000 # double -Inf .text - .globl test + .globl test # -- Begin function test .p2align 5 .type test,@function test: # @test # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_0) + lu52i.d $a0, $zero, 2047 + movgr2fr.d $fa2, $a0 fcmp.cune.d $fcc0, $fa0, $fa2 bceqz $fcc0, .LBB0_5 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) + lu52i.d $a0, $zero, -1 + movgr2fr.d $fa2, $a0 fcmp.cune.d $fcc0, $fa0, $fa2 bceqz $fcc0, .LBB0_5 # %bb.2: @@ -38,25 +32,19 @@ test: # @test .Lfunc_end0: .size test, .Lfunc_end0-test # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function testf -.LCPI1_0: - .word 0x7f800000 # float +Inf -.LCPI1_1: - .word 0xff800000 # float -Inf - .text - .globl testf + .globl testf # -- Begin function testf .p2align 5 .type testf,@function testf: # @testf # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fa2, $a0, %pc_lo12(.LCPI1_0) + lu12i.w $a0, 522240 + movgr2fr.w $fa2, $a0 fcmp.cune.s $fcc0, $fa0, $fa2 bceqz $fcc0, .LBB1_5 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.s $fa2, $a0, %pc_lo12(.LCPI1_1) + lu12i.w $a0, -2048 + lu32i.d $a0, 0 + movgr2fr.w $fa2, $a0 fcmp.cune.s $fcc0, $fa0, $fa2 bceqz $fcc0, .LBB1_5 # %bb.2: diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-pr30704.dir/pr30704.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-pr30704.dir/pr30704.s index 67b81b4d..4721c3b6 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-pr30704.dir/pr30704.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-pr30704.dir/pr30704.s @@ -11,18 +11,16 @@ f1: # @f1 .Lfunc_end0: .size f1, .Lfunc_end0-f1 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function f2 -.LCPI1_0: - .dword 0x000fedcba9876543 # double 2.2151846413582804E-308 - .text - .globl f2 + .globl f2 # -- Begin function f2 .p2align 5 .type f2,@function f2: # @f2 # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa0, $a0, %pc_lo12(.LCPI1_0) + lu12i.w $a0, -354186 + ori $a0, $a0, 1347 + lu32i.d $a0, -4661 + lu52i.d $a0, $a0, 0 + movgr2fr.d $fa0, $a0 ret .Lfunc_end1: .size f2, .Lfunc_end1-f2 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-pr67218.dir/pr67218.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-pr67218.dir/pr67218.s index b046ea97..1f1b1343 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-pr67218.dir/pr67218.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-pr67218.dir/pr67218.s @@ -16,12 +16,7 @@ foo: # @foo .Lfunc_end0: .size foo, .Lfunc_end0-foo # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI1_0: - .dword 0x41efffe000000000 # double 4294901760 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -31,8 +26,10 @@ main: # @main ori $a0, $zero, 1 pcaddu18i $ra, %call36(foo) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI1_0) + ori $a0, $zero, 0 + lu32i.d $a0, -32 + lu52i.d $a0, $a0, 1054 + movgr2fr.d $fa1, $a0 fcmp.ceq.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB1_2 # %bb.1: diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-rbug.dir/rbug.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-rbug.dir/rbug.s index 5cf569ef..aa865fbb 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-rbug.dir/rbug.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-rbug.dir/rbug.s @@ -1,21 +1,18 @@ .file "rbug.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function d -.LCPI0_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 .text - .globl d + .globl d # -- Begin function d .p2align 5 .type d,@function d: # @d # %bb.0: srli.d $a1, $a0, 32 - pcalau12i $a2, %pc_hi20(.LCPI0_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI0_0) lu52i.d $a2, $zero, 1107 or $a1, $a1, $a2 + movgr2fr.d $fa0, $a1 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 movgr2fr.d $fa1, $a1 - fsub.d $fa0, $fa1, $fa0 + fsub.d $fa0, $fa0, $fa1 lu12i.w $a1, 275200 bstrins.d $a0, $a1, 63, 32 movgr2fr.d $fa1, $a0 diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-unsafe-fp-assoc-1.dir/unsafe-fp-assoc-1.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-unsafe-fp-assoc-1.dir/unsafe-fp-assoc-1.s index 5d752c71..c6bfb601 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-unsafe-fp-assoc-1.dir/unsafe-fp-assoc-1.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-unsafe-fp-assoc-1.dir/unsafe-fp-assoc-1.s @@ -1,12 +1,6 @@ .file "unsafe-fp-assoc-1.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function func -.LCPI0_0: - .dword 0xc330000000000000 # double -4503599627370496 -.LCPI0_1: - .dword 0x4330000000000000 # double 4503599627370496 .text - .globl func + .globl func # -- Begin function func .p2align 5 .type func,@function func: # @func @@ -16,20 +10,20 @@ func: # @func ori $a1, $zero, 1074 bltu $a1, $a0, .LBB0_3 # %bb.1: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.d $fa2, $a0, %pc_lo12(.LCPI0_1) - fadd.d $fa3, $fa0, $fa1 - fadd.d $fa3, $fa3, $fa2 - fcmp.ceq.d $fcc0, $fa3, $fa0 + lu52i.d $a0, $zero, -973 + movgr2fr.d $fa1, $a0 + fadd.d $fa2, $fa0, $fa1 + lu52i.d $a0, $zero, 1075 + movgr2fr.d $fa3, $a0 + fadd.d $fa2, $fa2, $fa3 + fcmp.ceq.d $fcc0, $fa2, $fa0 bceqz $fcc0, .LBB0_4 # %bb.2: vldi $vr4, -800 fadd.d $fa0, $fa0, $fa4 fadd.d $fa0, $fa0, $fa1 - fadd.d $fa0, $fa0, $fa2 - fcmp.cune.d $fcc0, $fa0, $fa3 + fadd.d $fa0, $fa0, $fa3 + fcmp.cune.d $fcc0, $fa0, $fa2 bceqz $fcc0, .LBB0_4 .LBB0_3: ret diff --git a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-unsafe-fp-assoc.dir/unsafe-fp-assoc.s b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-unsafe-fp-assoc.dir/unsafe-fp-assoc.s index 818a0b69..f48e01eb 100644 --- a/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-unsafe-fp-assoc.dir/unsafe-fp-assoc.s +++ b/results/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeFiles/GCC-C-execute-ieee-unsafe-fp-assoc.dir/unsafe-fp-assoc.s @@ -1,16 +1,13 @@ .file "unsafe-fp-assoc.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function foo -.LCPI0_0: - .dword 0x7fefffffffffffff # double 1.7976931348623157E+308 .text - .globl foo + .globl foo # -- Begin function foo .p2align 5 .type foo,@function foo: # @foo # %bb.0: - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI0_0) + addi.w $a0, $zero, -1 + lu52i.d $a0, $a0, 2046 + movgr2fr.d $fa1, $a0 fmul.d $fa0, $fa0, $fa1 fmul.d $fa0, $fa0, $fa1 fmul.d $fa0, $fa0, $fa1 diff --git a/results/SingleSource/UnitTests/CMakeFiles/2005-07-17-INT-To-FP.dir/2005-07-17-INT-To-FP.s b/results/SingleSource/UnitTests/CMakeFiles/2005-07-17-INT-To-FP.dir/2005-07-17-INT-To-FP.s index eedb28a2..604375ec 100644 --- a/results/SingleSource/UnitTests/CMakeFiles/2005-07-17-INT-To-FP.dir/2005-07-17-INT-To-FP.s +++ b/results/SingleSource/UnitTests/CMakeFiles/2005-07-17-INT-To-FP.dir/2005-07-17-INT-To-FP.s @@ -1,10 +1,6 @@ .file "2005-07-17-INT-To-FP.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -28,8 +24,9 @@ main: # @main move $s1, $zero move $s2, $zero move $s3, $zero - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.d $fs0, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs0, $a0 lu52i.d $s6, $zero, 64 ori $s7, $zero, 64 .p2align 4, , 16 diff --git a/results/SingleSource/UnitTests/CMakeFiles/2009-12-07-StructReturn.dir/2009-12-07-StructReturn.s b/results/SingleSource/UnitTests/CMakeFiles/2009-12-07-StructReturn.dir/2009-12-07-StructReturn.s index b5e6960b..192bd6d7 100644 --- a/results/SingleSource/UnitTests/CMakeFiles/2009-12-07-StructReturn.dir/2009-12-07-StructReturn.s +++ b/results/SingleSource/UnitTests/CMakeFiles/2009-12-07-StructReturn.dir/2009-12-07-StructReturn.s @@ -27,12 +27,7 @@ foobify: # @foobify .Lfunc_end1: .size foobify, .Lfunc_end1-foobify # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function main -.LCPI2_0: - .word 0x430e0000 # float 142 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -51,8 +46,8 @@ main: # @main st.d $a0, $a2, 0 st.w $a1, $a2, 8 fld.s $fa0, $fp, 0 - pcalau12i $a0, %pc_hi20(.LCPI2_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI2_0) + lu12i.w $a0, 274656 + movgr2fr.w $fa1, $a0 fcmp.ceq.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB2_2 # %bb.1: diff --git a/results/SingleSource/UnitTests/CMakeFiles/FloatPrecision.dir/FloatPrecision.s b/results/SingleSource/UnitTests/CMakeFiles/FloatPrecision.dir/FloatPrecision.s index 89a7514c..e3b9eff8 100644 --- a/results/SingleSource/UnitTests/CMakeFiles/FloatPrecision.dir/FloatPrecision.s +++ b/results/SingleSource/UnitTests/CMakeFiles/FloatPrecision.dir/FloatPrecision.s @@ -1,12 +1,6 @@ .file "FloatPrecision.c" - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function print -.LCPI0_0: - .word 0x4f000000 # float 2.14748365E+9 -.LCPI0_1: - .word 0x5f000000 # float 9.22337203E+18 .text - .globl print + .globl print # -- Begin function print .p2align 5 .type print,@function print: # @print @@ -14,8 +8,8 @@ print: # @print addi.d $sp, $sp, -16 st.d $ra, $sp, 8 # 8-byte Folded Spill fst.d $fs0, $sp, 0 # 8-byte Folded Spill - pcalau12i $a0, %pc_hi20(.LCPI0_0) - fld.s $fa1, $a0, %pc_lo12(.LCPI0_0) + lu12i.w $a0, 323584 + movgr2fr.w $fa1, $a0 fmul.s $fs0, $fa0, $fa1 fcvt.d.s $fa0, $fs0 movfr2gr.d $a1, $fa0 @@ -23,8 +17,8 @@ print: # @print addi.d $a0, $a0, %pc_lo12(.L.str) pcaddu18i $ra, %call36(printf) jirl $ra, $ra, 0 - pcalau12i $a0, %pc_hi20(.LCPI0_1) - fld.s $fa0, $a0, %pc_lo12(.LCPI0_1) + lu12i.w $a0, 389120 + movgr2fr.w $fa0, $a0 fcmp.clt.s $fcc0, $fs0, $fa0 ftintrz.l.s $fa1, $fs0 movfr2gr.d $a0, $fa1 diff --git a/results/SingleSource/UnitTests/CMakeFiles/matrix-types-spec.dir/matrix-types-spec.s b/results/SingleSource/UnitTests/CMakeFiles/matrix-types-spec.dir/matrix-types-spec.s index b81b8801..8871dff8 100644 --- a/results/SingleSource/UnitTests/CMakeFiles/matrix-types-spec.dir/matrix-types-spec.s +++ b/results/SingleSource/UnitTests/CMakeFiles/matrix-types-spec.dir/matrix-types-spec.s @@ -74,14 +74,8 @@ main: # @main .size main, .Lfunc_end1-main .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13testTransposeIdLj3ELj3EEvv -.LCPI2_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI2_1: - .dword 0x41dfffffff800000 # double 2147483646 .section .text._Z13testTransposeIdLj3ELj3EEvv,"axG",@progbits,_Z13testTransposeIdLj3ELj3EEvv,comdat - .weak _Z13testTransposeIdLj3ELj3EEvv + .weak _Z13testTransposeIdLj3ELj3EEvv # -- Begin function _Z13testTransposeIdLj3ELj3EEvv .p2align 5 .type _Z13testTransposeIdLj3ELj3EEvv,@function _Z13testTransposeIdLj3ELj3EEvv: # @_Z13testTransposeIdLj3ELj3EEvv @@ -149,13 +143,15 @@ _Z13testTransposeIdLj3ELj3EEvv: # @_Z13testTransposeIdLj3ELj3EEvv lu12i.w $a0, 4 ori $s0, $a0, 423 ori $a0, $zero, 5 - pcalau12i $a1, %pc_hi20(.LCPI2_0) - fld.d $fs0, $a1, %pc_lo12(.LCPI2_0) - pcalau12i $a1, %pc_hi20(.LCPI2_1) - fld.d $fs1, $a1, %pc_lo12(.LCPI2_1) lu32i.d $a0, 2 lu52i.d $s1, $zero, 1107 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 + movgr2fr.d $fs0, $a1 lu12i.w $s2, 275200 + lu12i.w $a1, -2048 + lu52i.d $a1, $a1, 1053 + movgr2fr.d $fs1, $a1 move $a1, $fp .p2align 4, , 16 .LBB2_1: # %select.unfold.i.i.i.i.i @@ -659,14 +655,8 @@ _Z13testTransposeIdLj3ELj3EEvv: # @_Z13testTransposeIdLj3ELj3EEvv .size _Z13testTransposeIdLj3ELj3EEvv, .Lfunc_end2-_Z13testTransposeIdLj3ELj3EEvv .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13testTransposeIdLj3ELj10EEvv -.LCPI3_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI3_1: - .dword 0x41dfffffff800000 # double 2147483646 .section .text._Z13testTransposeIdLj3ELj10EEvv,"axG",@progbits,_Z13testTransposeIdLj3ELj10EEvv,comdat - .weak _Z13testTransposeIdLj3ELj10EEvv + .weak _Z13testTransposeIdLj3ELj10EEvv # -- Begin function _Z13testTransposeIdLj3ELj10EEvv .p2align 5 .type _Z13testTransposeIdLj3ELj10EEvv,@function _Z13testTransposeIdLj3ELj10EEvv: # @_Z13testTransposeIdLj3ELj10EEvv @@ -734,11 +724,13 @@ _Z13testTransposeIdLj3ELj10EEvv: # @_Z13testTransposeIdLj3ELj10EEvv ori $s2, $zero, 5 lu32i.d $s2, 2 lu52i.d $s4, $zero, 1107 - pcalau12i $a0, %pc_hi20(.LCPI3_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI3_0) + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs1, $a0 lu12i.w $s5, 275200 - pcalau12i $a0, %pc_hi20(.LCPI3_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI3_1) + lu12i.w $a0, -2048 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs2, $a0 vldi $vr4, -912 vldi $vr5, -860 vldi $vr6, -972 @@ -963,14 +955,8 @@ _Z13testTransposeIdLj3ELj10EEvv: # @_Z13testTransposeIdLj3ELj10EEvv .size _Z13testTransposeIdLj3ELj10EEvv, .Lfunc_end3-_Z13testTransposeIdLj3ELj10EEvv .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z13testTransposeIdLj4ELj3EEvv -.LCPI4_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI4_1: - .dword 0x41dfffffff800000 # double 2147483646 .section .text._Z13testTransposeIdLj4ELj3EEvv,"axG",@progbits,_Z13testTransposeIdLj4ELj3EEvv,comdat - .weak _Z13testTransposeIdLj4ELj3EEvv + .weak _Z13testTransposeIdLj4ELj3EEvv # -- Begin function _Z13testTransposeIdLj4ELj3EEvv .p2align 5 .type _Z13testTransposeIdLj4ELj3EEvv,@function _Z13testTransposeIdLj4ELj3EEvv: # @_Z13testTransposeIdLj4ELj3EEvv @@ -1038,11 +1024,13 @@ _Z13testTransposeIdLj4ELj3EEvv: # @_Z13testTransposeIdLj4ELj3EEvv ori $s2, $zero, 5 lu32i.d $s2, 2 lu52i.d $s4, $zero, 1107 - pcalau12i $a0, %pc_hi20(.LCPI4_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI4_0) + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs1, $a0 lu12i.w $s5, 275200 - pcalau12i $a0, %pc_hi20(.LCPI4_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI4_1) + lu12i.w $a0, -2048 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs2, $a0 vldi $vr4, -912 vldi $vr5, -860 vldi $vr6, -972 @@ -5268,14 +5256,8 @@ _Z13testTransposeIiLj8ELj7EEvv: # @_Z13testTransposeIiLj8ELj7EEvv .size _Z13testTransposeIiLj8ELj7EEvv, .Lfunc_end6-_Z13testTransposeIiLj8ELj7EEvv .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z12testMultiplyIdLj3ELj3ELj3EEvv -.LCPI7_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI7_1: - .dword 0x41dfffffff800000 # double 2147483646 .section .text._Z12testMultiplyIdLj3ELj3ELj3EEvv,"axG",@progbits,_Z12testMultiplyIdLj3ELj3ELj3EEvv,comdat - .weak _Z12testMultiplyIdLj3ELj3ELj3EEvv + .weak _Z12testMultiplyIdLj3ELj3ELj3EEvv # -- Begin function _Z12testMultiplyIdLj3ELj3ELj3EEvv .p2align 5 .type _Z12testMultiplyIdLj3ELj3ELj3EEvv,@function _Z12testMultiplyIdLj3ELj3ELj3EEvv: # @_Z12testMultiplyIdLj3ELj3ELj3EEvv @@ -5343,13 +5325,15 @@ _Z12testMultiplyIdLj3ELj3ELj3EEvv: # @_Z12testMultiplyIdLj3ELj3ELj3EEvv lu12i.w $a0, 4 ori $s0, $a0, 423 ori $a0, $zero, 5 - pcalau12i $a1, %pc_hi20(.LCPI7_0) - fld.d $fs1, $a1, %pc_lo12(.LCPI7_0) - pcalau12i $a1, %pc_hi20(.LCPI7_1) - fld.d $fs2, $a1, %pc_lo12(.LCPI7_1) lu32i.d $a0, 2 lu52i.d $s1, $zero, 1107 + lu12i.w $a1, 256 + lu52i.d $a1, $a1, 1107 + movgr2fr.d $fs1, $a1 lu12i.w $s2, 275200 + lu12i.w $a1, -2048 + lu52i.d $a1, $a1, 1053 + movgr2fr.d $fs2, $a1 move $a1, $fp .p2align 4, , 16 .LBB7_1: # %select.unfold.i.i.i.i.i @@ -6587,14 +6571,8 @@ _Z12testMultiplyIdLj3ELj3ELj3EEvv: # @_Z12testMultiplyIdLj3ELj3ELj3EEvv .size _Z12testMultiplyIdLj3ELj3ELj3EEvv, .Lfunc_end7-_Z12testMultiplyIdLj3ELj3ELj3EEvv .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z12testMultiplyIdLj10ELj21ELj23EEvv -.LCPI8_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI8_1: - .dword 0x41dfffffff800000 # double 2147483646 .section .text._Z12testMultiplyIdLj10ELj21ELj23EEvv,"axG",@progbits,_Z12testMultiplyIdLj10ELj21ELj23EEvv,comdat - .weak _Z12testMultiplyIdLj10ELj21ELj23EEvv + .weak _Z12testMultiplyIdLj10ELj21ELj23EEvv # -- Begin function _Z12testMultiplyIdLj10ELj21ELj23EEvv .p2align 5 .type _Z12testMultiplyIdLj10ELj21ELj23EEvv,@function _Z12testMultiplyIdLj10ELj21ELj23EEvv: # @_Z12testMultiplyIdLj10ELj21ELj23EEvv @@ -6678,11 +6656,13 @@ _Z12testMultiplyIdLj10ELj21ELj23EEvv: # @_Z12testMultiplyIdLj10ELj21ELj23EEvv ori $s4, $zero, 5 lu32i.d $s4, 2 lu52i.d $s1, $zero, 1107 - pcalau12i $a0, %pc_hi20(.LCPI8_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI8_0) + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs1, $a0 lu12i.w $s2, 275200 - pcalau12i $a0, %pc_hi20(.LCPI8_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI8_1) + lu12i.w $a0, -2048 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs2, $a0 vldi $vr4, -912 vldi $vr5, -860 vldi $vr6, -972 @@ -20097,14 +20077,8 @@ _Z12testMultiplyIdLj10ELj21ELj23EEvv: # @_Z12testMultiplyIdLj10ELj21ELj23EEvv .size _Z12testMultiplyIdLj10ELj21ELj23EEvv, .Lfunc_end8-_Z12testMultiplyIdLj10ELj21ELj23EEvv .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z12testMultiplyIdLj25ELj19ELj11EEvv -.LCPI9_0: - .dword 0x4530000000100000 # double 1.9342813118337666E+25 -.LCPI9_1: - .dword 0x41dfffffff800000 # double 2147483646 .section .text._Z12testMultiplyIdLj25ELj19ELj11EEvv,"axG",@progbits,_Z12testMultiplyIdLj25ELj19ELj11EEvv,comdat - .weak _Z12testMultiplyIdLj25ELj19ELj11EEvv + .weak _Z12testMultiplyIdLj25ELj19ELj11EEvv # -- Begin function _Z12testMultiplyIdLj25ELj19ELj11EEvv .p2align 5 .type _Z12testMultiplyIdLj25ELj19ELj11EEvv,@function _Z12testMultiplyIdLj25ELj19ELj11EEvv: # @_Z12testMultiplyIdLj25ELj19ELj11EEvv @@ -20186,11 +20160,13 @@ _Z12testMultiplyIdLj25ELj19ELj11EEvv: # @_Z12testMultiplyIdLj25ELj19ELj11EEvv ori $s4, $zero, 5 lu32i.d $s4, 2 lu52i.d $s1, $zero, 1107 - pcalau12i $a0, %pc_hi20(.LCPI9_0) - fld.d $fs1, $a0, %pc_lo12(.LCPI9_0) + lu12i.w $a0, 256 + lu52i.d $a0, $a0, 1107 + movgr2fr.d $fs1, $a0 lu12i.w $s2, 275200 - pcalau12i $a0, %pc_hi20(.LCPI9_1) - fld.d $fs2, $a0, %pc_lo12(.LCPI9_1) + lu12i.w $a0, -2048 + lu52i.d $a0, $a0, 1053 + movgr2fr.d $fs2, $a0 vldi $vr4, -912 vldi $vr5, -860 vldi $vr6, -972 @@ -38792,14 +38768,8 @@ _Z12testMultiplyIdLj25ELj19ELj11EEvv: # @_Z12testMultiplyIdLj25ELj19ELj11EEvv .size _Z12testMultiplyIdLj25ELj19ELj11EEvv, .Lfunc_end9-_Z12testMultiplyIdLj25ELj19ELj11EEvv .cfi_endproc # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14expectMatrixEQIdTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj -.LCPI10_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI10_1: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 .section .text._Z14expectMatrixEQIdTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj,"axG",@progbits,_Z14expectMatrixEQIdTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj,comdat - .weak _Z14expectMatrixEQIdTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj + .weak _Z14expectMatrixEQIdTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj # -- Begin function _Z14expectMatrixEQIdTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj .p2align 5 .type _Z14expectMatrixEQIdTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj,@function _Z14expectMatrixEQIdTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj: # @_Z14expectMatrixEQIdTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj @@ -38818,13 +38788,19 @@ _Z14expectMatrixEQIdTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typ beqz $a3, .LBB10_11 # %bb.2: # %.preheader.us.preheader move $fp, $zero - pcalau12i $a4, %pc_hi20(.LCPI10_0) - fld.d $fa0, $a4, %pc_lo12(.LCPI10_0) - pcalau12i $a4, %pc_hi20(.LCPI10_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI10_1) bstrpick.d $a3, $a3, 31, 0 - movgr2fr.d $fa2, $zero - vldi $vr3, -784 + lu12i.w $a4, -390306 + ori $a4, $a4, 3469 + lu32i.d $a4, 50935 + lu52i.d $a4, $a4, 1003 + movgr2fr.d $fa0, $a4 + movgr2fr.d $fa1, $zero + vldi $vr2, -784 + lu12i.w $a4, -487882 + ori $a4, $a4, 2289 + lu32i.d $a4, 325813 + lu52i.d $a4, $a4, 1006 + movgr2fr.d $fa3, $a4 .p2align 4, , 16 .LBB10_3: # %.preheader.us # =>This Loop Header: Depth=1 @@ -38836,9 +38812,9 @@ _Z14expectMatrixEQIdTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typ .p2align 4, , 16 .LBB10_4: # in Loop: Header=BB10_5 Depth=2 fdiv.d $fa4, $fa4, $fa5 - fadd.d $fa4, $fa4, $fa3 + fadd.d $fa4, $fa4, $fa2 fabs.d $fa4, $fa4 - fcmp.clt.d $fcc0, $fa1, $fa4 + fcmp.clt.d $fcc0, $fa3, $fa4 bceqz $fcc0, .LBB10_9 b .LBB10_12 .p2align 4, , 16 @@ -38854,16 +38830,16 @@ _Z14expectMatrixEQIdTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typ fcmp.cule.d $fcc0, $fa6, $fa0 bcnez $fcc0, .LBB10_9 # %bb.6: # in Loop: Header=BB10_5 Depth=2 - fcmp.cune.d $fcc0, $fa5, $fa2 + fcmp.cune.d $fcc0, $fa5, $fa1 bcnez $fcc0, .LBB10_4 # %bb.7: # in Loop: Header=BB10_5 Depth=2 - fcmp.ceq.d $fcc0, $fa4, $fa2 + fcmp.ceq.d $fcc0, $fa4, $fa1 bcnez $fcc0, .LBB10_9 # %bb.8: # in Loop: Header=BB10_5 Depth=2 fdiv.d $fa4, $fa5, $fa4 - fadd.d $fa4, $fa4, $fa3 + fadd.d $fa4, $fa4, $fa2 fabs.d $fa4, $fa4 - fcmp.clt.d $fcc0, $fa1, $fa4 + fcmp.clt.d $fcc0, $fa3, $fa4 bcnez $fcc0, .LBB10_12 .p2align 4, , 16 .LBB10_9: # %_Z5fpcmpdddd.exit.us @@ -182098,14 +182074,8 @@ _Z13transposeSpecIfLj31ELj17EEvPT_S1_: # @_Z13transposeSpecIfLj31ELj17EEvPT_S1_ .Lfunc_end11: .size _Z13transposeSpecIfLj31ELj17EEvPT_S1_, .Lfunc_end11-_Z13transposeSpecIfLj31ELj17EEvPT_S1_ # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z14expectMatrixEQIfTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj -.LCPI12_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI12_1: - .dword 0x3ee4f8b588e368f1 # double 1.0000000000000001E-5 .section .text._Z14expectMatrixEQIfTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj,"axG",@progbits,_Z14expectMatrixEQIfTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj,comdat - .weak _Z14expectMatrixEQIfTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj + .weak _Z14expectMatrixEQIfTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj # -- Begin function _Z14expectMatrixEQIfTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj .p2align 5 .type _Z14expectMatrixEQIfTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj,@function _Z14expectMatrixEQIfTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj: # @_Z14expectMatrixEQIfTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typeELi0EEvPS1_S4_jj @@ -182124,13 +182094,19 @@ _Z14expectMatrixEQIfTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typ beqz $a3, .LBB12_11 # %bb.2: # %.preheader.us.preheader move $fp, $zero - pcalau12i $a4, %pc_hi20(.LCPI12_0) - fld.d $fa0, $a4, %pc_lo12(.LCPI12_0) - pcalau12i $a4, %pc_hi20(.LCPI12_1) - fld.d $fa1, $a4, %pc_lo12(.LCPI12_1) bstrpick.d $a3, $a3, 31, 0 - movgr2fr.w $fa2, $zero - vldi $vr3, -784 + lu12i.w $a4, -390306 + ori $a4, $a4, 3469 + lu32i.d $a4, 50935 + lu52i.d $a4, $a4, 1003 + movgr2fr.d $fa0, $a4 + movgr2fr.w $fa1, $zero + vldi $vr2, -784 + lu12i.w $a4, -487882 + ori $a4, $a4, 2289 + lu32i.d $a4, 325813 + lu52i.d $a4, $a4, 1006 + movgr2fr.d $fa3, $a4 .p2align 4, , 16 .LBB12_3: # %.preheader.us # =>This Loop Header: Depth=1 @@ -182142,9 +182118,9 @@ _Z14expectMatrixEQIfTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typ .p2align 4, , 16 .LBB12_4: # in Loop: Header=BB12_5 Depth=2 fdiv.d $fa4, $fa4, $fa5 - fadd.d $fa4, $fa4, $fa3 + fadd.d $fa4, $fa4, $fa2 fabs.d $fa4, $fa4 - fcmp.clt.d $fcc0, $fa1, $fa4 + fcmp.clt.d $fcc0, $fa3, $fa4 bceqz $fcc0, .LBB12_9 b .LBB12_12 .p2align 4, , 16 @@ -182162,16 +182138,16 @@ _Z14expectMatrixEQIfTnNSt9enable_ifIXsr3std17is_floating_pointIT_EE5valueEiE4typ fcmp.cule.d $fcc0, $ft0, $fa0 bcnez $fcc0, .LBB12_9 # %bb.6: # in Loop: Header=BB12_5 Depth=2 - fcmp.cune.s $fcc0, $fa7, $fa2 + fcmp.cune.s $fcc0, $fa7, $fa1 bcnez $fcc0, .LBB12_4 # %bb.7: # in Loop: Header=BB12_5 Depth=2 - fcmp.ceq.s $fcc0, $fa6, $fa2 + fcmp.ceq.s $fcc0, $fa6, $fa1 bcnez $fcc0, .LBB12_9 # %bb.8: # in Loop: Header=BB12_5 Depth=2 fdiv.d $fa4, $fa5, $fa4 - fadd.d $fa4, $fa4, $fa3 + fadd.d $fa4, $fa4, $fa2 fabs.d $fa4, $fa4 - fcmp.clt.d $fcc0, $fa1, $fa4 + fcmp.clt.d $fcc0, $fa3, $fa4 bcnez $fcc0, .LBB12_12 .p2align 4, , 16 .LBB12_9: # %_Z5fpcmpdddd.exit.us diff --git a/results/SingleSource/UnitTests/Float/CMakeFiles/classify.dir/classify.s b/results/SingleSource/UnitTests/Float/CMakeFiles/classify.dir/classify.s index 0a8bcf9c..e34fdb38 100644 --- a/results/SingleSource/UnitTests/Float/CMakeFiles/classify.dir/classify.s +++ b/results/SingleSource/UnitTests/Float/CMakeFiles/classify.dir/classify.s @@ -1446,12 +1446,7 @@ test_fcPosInf_float: # @test_fcPosInf_float .Lfunc_end3: .size test_fcPosInf_float, .Lfunc_end3-test_fcPosInf_float # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function test_fcNegInf_float -.LCPI4_0: - .word 0x7f800000 # float +Inf - .text - .globl test_fcNegInf_float + .globl test_fcNegInf_float # -- Begin function test_fcNegInf_float .p2align 5 .type test_fcNegInf_float,@function test_fcNegInf_float: # @test_fcNegInf_float @@ -1468,8 +1463,8 @@ test_fcNegInf_float: # @test_fcNegInf_float fcmp.cor.s $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB4_6 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI4_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI4_0) + lu12i.w $a1, 522240 + movgr2fr.w $fa1, $a1 fcmp.cune.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB4_7 # %bb.3: @@ -1532,14 +1527,7 @@ test_fcNegInf_float: # @test_fcNegInf_float .Lfunc_end4: .size test_fcNegInf_float, .Lfunc_end4-test_fcNegInf_float # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function test_fcPosNormal_float -.LCPI5_0: - .word 0x7f800000 # float +Inf -.LCPI5_1: - .word 0xff800000 # float -Inf - .text - .globl test_fcPosNormal_float + .globl test_fcPosNormal_float # -- Begin function test_fcPosNormal_float .p2align 5 .type test_fcPosNormal_float,@function test_fcPosNormal_float: # @test_fcPosNormal_float @@ -1556,13 +1544,14 @@ test_fcPosNormal_float: # @test_fcPosNormal_float fcmp.cor.s $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB5_7 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI5_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI5_0) + lu12i.w $a1, 522240 + movgr2fr.w $fa1, $a1 fcmp.cune.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB5_8 # %bb.3: - pcalau12i $a1, %pc_hi20(.LCPI5_1) - fld.s $fa1, $a1, %pc_lo12(.LCPI5_1) + lu12i.w $a1, -2048 + lu32i.d $a1, 0 + movgr2fr.w $fa1, $a1 fcmp.cune.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB5_9 # %bb.4: @@ -1637,14 +1626,7 @@ test_fcPosNormal_float: # @test_fcPosNormal_float .Lfunc_end5: .size test_fcPosNormal_float, .Lfunc_end5-test_fcPosNormal_float # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function test_fcNegNormal_float -.LCPI6_0: - .word 0x7f800000 # float +Inf -.LCPI6_1: - .word 0xff800000 # float -Inf - .text - .globl test_fcNegNormal_float + .globl test_fcNegNormal_float # -- Begin function test_fcNegNormal_float .p2align 5 .type test_fcNegNormal_float,@function test_fcNegNormal_float: # @test_fcNegNormal_float @@ -1661,13 +1643,14 @@ test_fcNegNormal_float: # @test_fcNegNormal_float fcmp.cor.s $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB6_8 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI6_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI6_0) + lu12i.w $a1, 522240 + movgr2fr.w $fa1, $a1 fcmp.cune.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB6_9 # %bb.3: - pcalau12i $a1, %pc_hi20(.LCPI6_1) - fld.s $fa1, $a1, %pc_lo12(.LCPI6_1) + lu12i.w $a1, -2048 + lu32i.d $a1, 0 + movgr2fr.w $fa1, $a1 fcmp.cune.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB6_10 # %bb.4: @@ -1759,14 +1742,7 @@ test_fcNegNormal_float: # @test_fcNegNormal_float .Lfunc_end6: .size test_fcNegNormal_float, .Lfunc_end6-test_fcNegNormal_float # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function test_fcPosSubnormal_float -.LCPI7_0: - .word 0x7f800000 # float +Inf -.LCPI7_1: - .word 0xff800000 # float -Inf - .text - .globl test_fcPosSubnormal_float + .globl test_fcPosSubnormal_float # -- Begin function test_fcPosSubnormal_float .p2align 5 .type test_fcPosSubnormal_float,@function test_fcPosSubnormal_float: # @test_fcPosSubnormal_float @@ -1783,13 +1759,14 @@ test_fcPosSubnormal_float: # @test_fcPosSubnormal_float fcmp.cor.s $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB7_9 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI7_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI7_0) + lu12i.w $a1, 522240 + movgr2fr.w $fa1, $a1 fcmp.cune.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB7_10 # %bb.3: - pcalau12i $a1, %pc_hi20(.LCPI7_1) - fld.s $fa1, $a1, %pc_lo12(.LCPI7_1) + lu12i.w $a1, -2048 + lu32i.d $a1, 0 + movgr2fr.w $fa1, $a1 fcmp.cune.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB7_11 # %bb.4: @@ -1898,14 +1875,7 @@ test_fcPosSubnormal_float: # @test_fcPosSubnormal_float .Lfunc_end7: .size test_fcPosSubnormal_float, .Lfunc_end7-test_fcPosSubnormal_float # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function test_fcNegSubnormal_float -.LCPI8_0: - .word 0x7f800000 # float +Inf -.LCPI8_1: - .word 0xff800000 # float -Inf - .text - .globl test_fcNegSubnormal_float + .globl test_fcNegSubnormal_float # -- Begin function test_fcNegSubnormal_float .p2align 5 .type test_fcNegSubnormal_float,@function test_fcNegSubnormal_float: # @test_fcNegSubnormal_float @@ -1922,13 +1892,14 @@ test_fcNegSubnormal_float: # @test_fcNegSubnormal_float fcmp.cor.s $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB8_10 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI8_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI8_0) + lu12i.w $a1, 522240 + movgr2fr.w $fa1, $a1 fcmp.cune.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB8_11 # %bb.3: - pcalau12i $a1, %pc_hi20(.LCPI8_1) - fld.s $fa1, $a1, %pc_lo12(.LCPI8_1) + lu12i.w $a1, -2048 + lu32i.d $a1, 0 + movgr2fr.w $fa1, $a1 fcmp.cune.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB8_12 # %bb.4: @@ -2054,14 +2025,7 @@ test_fcNegSubnormal_float: # @test_fcNegSubnormal_float .Lfunc_end8: .size test_fcNegSubnormal_float, .Lfunc_end8-test_fcNegSubnormal_float # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function test_fcPosZero_float -.LCPI9_0: - .word 0x7f800000 # float +Inf -.LCPI9_1: - .word 0xff800000 # float -Inf - .text - .globl test_fcPosZero_float + .globl test_fcPosZero_float # -- Begin function test_fcPosZero_float .p2align 5 .type test_fcPosZero_float,@function test_fcPosZero_float: # @test_fcPosZero_float @@ -2078,13 +2042,14 @@ test_fcPosZero_float: # @test_fcPosZero_float fcmp.cor.s $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB9_11 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI9_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI9_0) + lu12i.w $a1, 522240 + movgr2fr.w $fa1, $a1 fcmp.cune.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB9_12 # %bb.3: - pcalau12i $a1, %pc_hi20(.LCPI9_1) - fld.s $fa1, $a1, %pc_lo12(.LCPI9_1) + lu12i.w $a1, -2048 + lu32i.d $a1, 0 + movgr2fr.w $fa1, $a1 fcmp.cune.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB9_13 # %bb.4: @@ -2226,14 +2191,7 @@ test_fcPosZero_float: # @test_fcPosZero_float .Lfunc_end9: .size test_fcPosZero_float, .Lfunc_end9-test_fcPosZero_float # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function test_fcNegZero_float -.LCPI10_0: - .word 0x7f800000 # float +Inf -.LCPI10_1: - .word 0xff800000 # float -Inf - .text - .globl test_fcNegZero_float + .globl test_fcNegZero_float # -- Begin function test_fcNegZero_float .p2align 5 .type test_fcNegZero_float,@function test_fcNegZero_float: # @test_fcNegZero_float @@ -2250,13 +2208,14 @@ test_fcNegZero_float: # @test_fcNegZero_float fcmp.cor.s $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB10_11 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI10_0) - fld.s $fa1, $a1, %pc_lo12(.LCPI10_0) + lu12i.w $a1, 522240 + movgr2fr.w $fa1, $a1 fcmp.cune.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB10_12 # %bb.3: - pcalau12i $a1, %pc_hi20(.LCPI10_1) - fld.s $fa1, $a1, %pc_lo12(.LCPI10_1) + lu12i.w $a1, -2048 + lu32i.d $a1, 0 + movgr2fr.w $fa1, $a1 fcmp.cune.s $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB10_13 # %bb.4: @@ -4462,12 +4421,7 @@ test_fcPosInf_double: # @test_fcPosInf_double .Lfunc_end15: .size test_fcPosInf_double, .Lfunc_end15-test_fcPosInf_double # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function test_fcNegInf_double -.LCPI16_0: - .dword 0x7ff0000000000000 # double +Inf - .text - .globl test_fcNegInf_double + .globl test_fcNegInf_double # -- Begin function test_fcNegInf_double .p2align 5 .type test_fcNegInf_double,@function test_fcNegInf_double: # @test_fcNegInf_double @@ -4484,8 +4438,8 @@ test_fcNegInf_double: # @test_fcNegInf_double fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB16_6 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI16_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI16_0) + lu52i.d $a1, $zero, 2047 + movgr2fr.d $fa1, $a1 fcmp.cune.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB16_7 # %bb.3: @@ -4548,14 +4502,7 @@ test_fcNegInf_double: # @test_fcNegInf_double .Lfunc_end16: .size test_fcNegInf_double, .Lfunc_end16-test_fcNegInf_double # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function test_fcPosNormal_double -.LCPI17_0: - .dword 0x7ff0000000000000 # double +Inf -.LCPI17_1: - .dword 0xfff0000000000000 # double -Inf - .text - .globl test_fcPosNormal_double + .globl test_fcPosNormal_double # -- Begin function test_fcPosNormal_double .p2align 5 .type test_fcPosNormal_double,@function test_fcPosNormal_double: # @test_fcPosNormal_double @@ -4572,13 +4519,13 @@ test_fcPosNormal_double: # @test_fcPosNormal_double fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB17_7 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI17_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI17_0) + lu52i.d $a1, $zero, 2047 + movgr2fr.d $fa1, $a1 fcmp.cune.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB17_8 # %bb.3: - pcalau12i $a1, %pc_hi20(.LCPI17_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI17_1) + lu52i.d $a1, $zero, -1 + movgr2fr.d $fa1, $a1 fcmp.cune.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB17_9 # %bb.4: @@ -4653,14 +4600,7 @@ test_fcPosNormal_double: # @test_fcPosNormal_double .Lfunc_end17: .size test_fcPosNormal_double, .Lfunc_end17-test_fcPosNormal_double # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function test_fcNegNormal_double -.LCPI18_0: - .dword 0x7ff0000000000000 # double +Inf -.LCPI18_1: - .dword 0xfff0000000000000 # double -Inf - .text - .globl test_fcNegNormal_double + .globl test_fcNegNormal_double # -- Begin function test_fcNegNormal_double .p2align 5 .type test_fcNegNormal_double,@function test_fcNegNormal_double: # @test_fcNegNormal_double @@ -4677,13 +4617,13 @@ test_fcNegNormal_double: # @test_fcNegNormal_double fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB18_8 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI18_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI18_0) + lu52i.d $a1, $zero, 2047 + movgr2fr.d $fa1, $a1 fcmp.cune.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB18_9 # %bb.3: - pcalau12i $a1, %pc_hi20(.LCPI18_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI18_1) + lu52i.d $a1, $zero, -1 + movgr2fr.d $fa1, $a1 fcmp.cune.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB18_10 # %bb.4: @@ -4775,14 +4715,7 @@ test_fcNegNormal_double: # @test_fcNegNormal_double .Lfunc_end18: .size test_fcNegNormal_double, .Lfunc_end18-test_fcNegNormal_double # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function test_fcPosSubnormal_double -.LCPI19_0: - .dword 0x7ff0000000000000 # double +Inf -.LCPI19_1: - .dword 0xfff0000000000000 # double -Inf - .text - .globl test_fcPosSubnormal_double + .globl test_fcPosSubnormal_double # -- Begin function test_fcPosSubnormal_double .p2align 5 .type test_fcPosSubnormal_double,@function test_fcPosSubnormal_double: # @test_fcPosSubnormal_double @@ -4799,13 +4732,13 @@ test_fcPosSubnormal_double: # @test_fcPosSubnormal_double fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB19_9 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI19_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI19_0) + lu52i.d $a1, $zero, 2047 + movgr2fr.d $fa1, $a1 fcmp.cune.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB19_10 # %bb.3: - pcalau12i $a1, %pc_hi20(.LCPI19_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI19_1) + lu52i.d $a1, $zero, -1 + movgr2fr.d $fa1, $a1 fcmp.cune.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB19_11 # %bb.4: @@ -4914,14 +4847,7 @@ test_fcPosSubnormal_double: # @test_fcPosSubnormal_double .Lfunc_end19: .size test_fcPosSubnormal_double, .Lfunc_end19-test_fcPosSubnormal_double # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function test_fcNegSubnormal_double -.LCPI20_0: - .dword 0x7ff0000000000000 # double +Inf -.LCPI20_1: - .dword 0xfff0000000000000 # double -Inf - .text - .globl test_fcNegSubnormal_double + .globl test_fcNegSubnormal_double # -- Begin function test_fcNegSubnormal_double .p2align 5 .type test_fcNegSubnormal_double,@function test_fcNegSubnormal_double: # @test_fcNegSubnormal_double @@ -4938,13 +4864,13 @@ test_fcNegSubnormal_double: # @test_fcNegSubnormal_double fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB20_10 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI20_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI20_0) + lu52i.d $a1, $zero, 2047 + movgr2fr.d $fa1, $a1 fcmp.cune.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB20_11 # %bb.3: - pcalau12i $a1, %pc_hi20(.LCPI20_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI20_1) + lu52i.d $a1, $zero, -1 + movgr2fr.d $fa1, $a1 fcmp.cune.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB20_12 # %bb.4: @@ -5070,14 +4996,7 @@ test_fcNegSubnormal_double: # @test_fcNegSubnormal_double .Lfunc_end20: .size test_fcNegSubnormal_double, .Lfunc_end20-test_fcNegSubnormal_double # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function test_fcPosZero_double -.LCPI21_0: - .dword 0x7ff0000000000000 # double +Inf -.LCPI21_1: - .dword 0xfff0000000000000 # double -Inf - .text - .globl test_fcPosZero_double + .globl test_fcPosZero_double # -- Begin function test_fcPosZero_double .p2align 5 .type test_fcPosZero_double,@function test_fcPosZero_double: # @test_fcPosZero_double @@ -5094,13 +5013,13 @@ test_fcPosZero_double: # @test_fcPosZero_double fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB21_11 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI21_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI21_0) + lu52i.d $a1, $zero, 2047 + movgr2fr.d $fa1, $a1 fcmp.cune.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB21_12 # %bb.3: - pcalau12i $a1, %pc_hi20(.LCPI21_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI21_1) + lu52i.d $a1, $zero, -1 + movgr2fr.d $fa1, $a1 fcmp.cune.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB21_13 # %bb.4: @@ -5242,14 +5161,7 @@ test_fcPosZero_double: # @test_fcPosZero_double .Lfunc_end21: .size test_fcPosZero_double, .Lfunc_end21-test_fcPosZero_double # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function test_fcNegZero_double -.LCPI22_0: - .dword 0x7ff0000000000000 # double +Inf -.LCPI22_1: - .dword 0xfff0000000000000 # double -Inf - .text - .globl test_fcNegZero_double + .globl test_fcNegZero_double # -- Begin function test_fcNegZero_double .p2align 5 .type test_fcNegZero_double,@function test_fcNegZero_double: # @test_fcNegZero_double @@ -5266,13 +5178,13 @@ test_fcNegZero_double: # @test_fcNegZero_double fcmp.cor.d $fcc0, $fa0, $fa0 bceqz $fcc0, .LBB22_11 # %bb.2: - pcalau12i $a1, %pc_hi20(.LCPI22_0) - fld.d $fa1, $a1, %pc_lo12(.LCPI22_0) + lu52i.d $a1, $zero, 2047 + movgr2fr.d $fa1, $a1 fcmp.cune.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB22_12 # %bb.3: - pcalau12i $a1, %pc_hi20(.LCPI22_1) - fld.d $fa1, $a1, %pc_lo12(.LCPI22_1) + lu52i.d $a1, $zero, -1 + movgr2fr.d $fa1, $a1 fcmp.cune.d $fcc0, $fa0, $fa1 bceqz $fcc0, .LBB22_13 # %bb.4: diff --git a/results/SingleSource/UnitTests/Vector/CMakeFiles/Vector-simple.dir/simple.s b/results/SingleSource/UnitTests/Vector/CMakeFiles/Vector-simple.dir/simple.s index 75af15aa..67b72372 100644 --- a/results/SingleSource/UnitTests/Vector/CMakeFiles/Vector-simple.dir/simple.s +++ b/results/SingleSource/UnitTests/Vector/CMakeFiles/Vector-simple.dir/simple.s @@ -12,13 +12,9 @@ .LCPI0_2: .word 0x411e0419 # float 9.8760004 .word 0x40f705bc # float 7.71944999 -.LCPI0_4: +.LCPI0_3: .word 0x3f9df3b6 # float 1.23399997 .word 0x401874d1 # float 2.38212991 - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 -.LCPI0_3: - .word 0x3f8e353f # float 1.11099994 .text .globl main .p2align 5 @@ -56,16 +52,17 @@ main: # @main pcalau12i $a1, %pc_hi20(.LCPI0_2) addi.d $a1, $a1, %pc_lo12(.LCPI0_2) fldx.s $fa0, $a1, $a0 - pcalau12i $a0, %pc_hi20(.LCPI0_3) - addi.d $a0, $a0, %pc_lo12(.LCPI0_3) - vldrepl.w $vr1, $a0, 0 + lu12i.w $a0, 260323 + ori $a0, $a0, 1343 + movgr2fr.w $fa1, $a0 + vreplvei.w $vr1, $vr1, 0 vextrins.w $vr1, $vr0, 0 vextrins.w $vr1, $vr0, 16 vfadd.s $vr1, $vr1, $vr1 vst $vr1, $sp, 48 # 16-byte Folded Spill slli.d $a0, $a3, 2 - pcalau12i $a1, %pc_hi20(.LCPI0_4) - addi.d $a1, $a1, %pc_lo12(.LCPI0_4) + pcalau12i $a1, %pc_hi20(.LCPI0_3) + addi.d $a1, $a1, %pc_lo12(.LCPI0_3) fldx.s $fa1, $a1, $a0 vfmul.s $vr2, $vr1, $vr1 vextrins.w $vr1, $vr1, 16 diff --git a/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/any-of.dir/any-of.s b/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/any-of.dir/any-of.s index 11a43eeb..3202855e 100644 --- a/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/any-of.dir/any-of.s +++ b/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/any-of.dir/any-of.s @@ -6860,33 +6860,26 @@ _ZNSt17_Function_handlerIFiPiS0_jEZ4mainE3$_1E10_M_managerERSt9_Any_dataRKS4_St1 .size _ZNSt17_Function_handlerIFiPiS0_jEZ4mainE3$_1E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation, .Lfunc_end13-_ZNSt17_Function_handlerIFiPiS0_jEZ4mainE3$_1E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj -.LCPI14_0: - .word 0x4f800000 # float 4.2949673E+9 -.LCPI14_1: - .word 0x00800000 # float 1.17549435E-38 -.LCPI14_2: - .word 0x7f7fffff # float 3.40282347E+38 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj .type _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj,@function _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj: # @_ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj # %bb.0: - addi.d $sp, $sp, -224 - st.d $ra, $sp, 216 # 8-byte Folded Spill - st.d $fp, $sp, 208 # 8-byte Folded Spill - st.d $s0, $sp, 200 # 8-byte Folded Spill - st.d $s1, $sp, 192 # 8-byte Folded Spill - st.d $s2, $sp, 184 # 8-byte Folded Spill - st.d $s3, $sp, 176 # 8-byte Folded Spill - st.d $s4, $sp, 168 # 8-byte Folded Spill - st.d $s5, $sp, 160 # 8-byte Folded Spill - st.d $s6, $sp, 152 # 8-byte Folded Spill - st.d $s7, $sp, 144 # 8-byte Folded Spill - st.d $s8, $sp, 136 # 8-byte Folded Spill - fst.d $fs0, $sp, 128 # 8-byte Folded Spill - fst.d $fs1, $sp, 120 # 8-byte Folded Spill + addi.d $sp, $sp, -240 + st.d $ra, $sp, 232 # 8-byte Folded Spill + st.d $fp, $sp, 224 # 8-byte Folded Spill + st.d $s0, $sp, 216 # 8-byte Folded Spill + st.d $s1, $sp, 208 # 8-byte Folded Spill + st.d $s2, $sp, 200 # 8-byte Folded Spill + st.d $s3, $sp, 192 # 8-byte Folded Spill + st.d $s4, $sp, 184 # 8-byte Folded Spill + st.d $s5, $sp, 176 # 8-byte Folded Spill + st.d $s6, $sp, 168 # 8-byte Folded Spill + st.d $s7, $sp, 160 # 8-byte Folded Spill + st.d $s8, $sp, 152 # 8-byte Folded Spill + fst.d $fs0, $sp, 144 # 8-byte Folded Spill + fst.d $fs1, $sp, 136 # 8-byte Folded Spill + fst.d $fs2, $sp, 128 # 8-byte Folded Spill + fst.d $fs3, $sp, 120 # 8-byte Folded Spill move $fp, $a0 ori $a0, $zero, 0 lu32i.d $a0, -65536 @@ -6945,10 +6938,15 @@ _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj: # @_ZL9init_dat ori $s2, $a0, 1664 lu32i.d $s2, 0 lu12i.w $s3, -66464 - pcalau12i $a0, %pc_hi20(.LCPI14_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI14_0) lu32i.d $s3, 0 + lu12i.w $a0, 325632 + movgr2fr.w $fs1, $a0 vldi $vr11, -1168 + lu12i.w $a0, 2048 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, 522239 + ori $a0, $a0, 4095 + movgr2fr.w $fs3, $a0 ori $t2, $zero, 1000 .p2align 4, , 16 .LBB14_1: # =>This Loop Header: Depth=1 @@ -7089,11 +7087,7 @@ _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj: # @_ZL9init_dat bceqz $fcc0, .LBB14_11 .LBB14_10: # %_ZNSt25uniform_real_distributionIfEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEfRT_.exit # in Loop: Header=BB14_1 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI14_1) - fld.s $fa1, $a0, %pc_lo12(.LCPI14_1) - pcalau12i $a0, %pc_hi20(.LCPI14_2) - fld.s $fa2, $a0, %pc_lo12(.LCPI14_2) - fmadd.s $fa0, $fa0, $fa2, $fa1 + fmadd.s $fa0, $fa0, $fs3, $fs2 slli.d $a0, $a7, 2 addi.d $a7, $a7, 1 fstx.s $fa0, $a5, $a0 @@ -7127,20 +7121,22 @@ _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj: # @_ZL9init_dat ld.d $a5, $sp, 112 # 8-byte Folded Reload b .LBB14_10 .LBB14_12: - fld.d $fs1, $sp, 120 # 8-byte Folded Reload - fld.d $fs0, $sp, 128 # 8-byte Folded Reload - ld.d $s8, $sp, 136 # 8-byte Folded Reload - ld.d $s7, $sp, 144 # 8-byte Folded Reload - ld.d $s6, $sp, 152 # 8-byte Folded Reload - ld.d $s5, $sp, 160 # 8-byte Folded Reload - ld.d $s4, $sp, 168 # 8-byte Folded Reload - ld.d $s3, $sp, 176 # 8-byte Folded Reload - ld.d $s2, $sp, 184 # 8-byte Folded Reload - ld.d $s1, $sp, 192 # 8-byte Folded Reload - ld.d $s0, $sp, 200 # 8-byte Folded Reload - ld.d $fp, $sp, 208 # 8-byte Folded Reload - ld.d $ra, $sp, 216 # 8-byte Folded Reload - addi.d $sp, $sp, 224 + fld.d $fs3, $sp, 120 # 8-byte Folded Reload + fld.d $fs2, $sp, 128 # 8-byte Folded Reload + fld.d $fs1, $sp, 136 # 8-byte Folded Reload + fld.d $fs0, $sp, 144 # 8-byte Folded Reload + ld.d $s8, $sp, 152 # 8-byte Folded Reload + ld.d $s7, $sp, 160 # 8-byte Folded Reload + ld.d $s6, $sp, 168 # 8-byte Folded Reload + ld.d $s5, $sp, 176 # 8-byte Folded Reload + ld.d $s4, $sp, 184 # 8-byte Folded Reload + ld.d $s3, $sp, 192 # 8-byte Folded Reload + ld.d $s2, $sp, 200 # 8-byte Folded Reload + ld.d $s1, $sp, 208 # 8-byte Folded Reload + ld.d $s0, $sp, 216 # 8-byte Folded Reload + ld.d $fp, $sp, 224 # 8-byte Folded Reload + ld.d $ra, $sp, 232 # 8-byte Folded Reload + addi.d $sp, $sp, 240 ret .Lfunc_end14: .size _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj, .Lfunc_end14-_ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj diff --git a/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/find-last.dir/find-last.s b/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/find-last.dir/find-last.s index a5fad6b6..73204192 100644 --- a/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/find-last.dir/find-last.s +++ b/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/find-last.dir/find-last.s @@ -5969,33 +5969,26 @@ _ZNSt17_Function_handlerIFiPiS0_iEZ4mainE3$_1E10_M_managerERSt9_Any_dataRKS4_St1 .size _ZNSt17_Function_handlerIFiPiS0_iEZ4mainE3$_1E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation, .Lfunc_end10-_ZNSt17_Function_handlerIFiPiS0_iEZ4mainE3$_1E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj -.LCPI11_0: - .word 0x4f800000 # float 4.2949673E+9 -.LCPI11_1: - .word 0x00800000 # float 1.17549435E-38 -.LCPI11_2: - .word 0x7f7fffff # float 3.40282347E+38 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj .type _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj,@function _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj: # @_ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj # %bb.0: - addi.d $sp, $sp, -224 - st.d $ra, $sp, 216 # 8-byte Folded Spill - st.d $fp, $sp, 208 # 8-byte Folded Spill - st.d $s0, $sp, 200 # 8-byte Folded Spill - st.d $s1, $sp, 192 # 8-byte Folded Spill - st.d $s2, $sp, 184 # 8-byte Folded Spill - st.d $s3, $sp, 176 # 8-byte Folded Spill - st.d $s4, $sp, 168 # 8-byte Folded Spill - st.d $s5, $sp, 160 # 8-byte Folded Spill - st.d $s6, $sp, 152 # 8-byte Folded Spill - st.d $s7, $sp, 144 # 8-byte Folded Spill - st.d $s8, $sp, 136 # 8-byte Folded Spill - fst.d $fs0, $sp, 128 # 8-byte Folded Spill - fst.d $fs1, $sp, 120 # 8-byte Folded Spill + addi.d $sp, $sp, -240 + st.d $ra, $sp, 232 # 8-byte Folded Spill + st.d $fp, $sp, 224 # 8-byte Folded Spill + st.d $s0, $sp, 216 # 8-byte Folded Spill + st.d $s1, $sp, 208 # 8-byte Folded Spill + st.d $s2, $sp, 200 # 8-byte Folded Spill + st.d $s3, $sp, 192 # 8-byte Folded Spill + st.d $s4, $sp, 184 # 8-byte Folded Spill + st.d $s5, $sp, 176 # 8-byte Folded Spill + st.d $s6, $sp, 168 # 8-byte Folded Spill + st.d $s7, $sp, 160 # 8-byte Folded Spill + st.d $s8, $sp, 152 # 8-byte Folded Spill + fst.d $fs0, $sp, 144 # 8-byte Folded Spill + fst.d $fs1, $sp, 136 # 8-byte Folded Spill + fst.d $fs2, $sp, 128 # 8-byte Folded Spill + fst.d $fs3, $sp, 120 # 8-byte Folded Spill move $fp, $a0 ori $a0, $zero, 0 lu32i.d $a0, -65536 @@ -6054,10 +6047,15 @@ _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj: # @_ZL9init_dat ori $s2, $a0, 1664 lu32i.d $s2, 0 lu12i.w $s3, -66464 - pcalau12i $a0, %pc_hi20(.LCPI11_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI11_0) lu32i.d $s3, 0 + lu12i.w $a0, 325632 + movgr2fr.w $fs1, $a0 vldi $vr11, -1168 + lu12i.w $a0, 2048 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, 522239 + ori $a0, $a0, 4095 + movgr2fr.w $fs3, $a0 ori $t2, $zero, 1000 .p2align 4, , 16 .LBB11_1: # =>This Loop Header: Depth=1 @@ -6198,11 +6196,7 @@ _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj: # @_ZL9init_dat bceqz $fcc0, .LBB11_11 .LBB11_10: # %_ZNSt25uniform_real_distributionIfEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEfRT_.exit # in Loop: Header=BB11_1 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI11_1) - fld.s $fa1, $a0, %pc_lo12(.LCPI11_1) - pcalau12i $a0, %pc_hi20(.LCPI11_2) - fld.s $fa2, $a0, %pc_lo12(.LCPI11_2) - fmadd.s $fa0, $fa0, $fa2, $fa1 + fmadd.s $fa0, $fa0, $fs3, $fs2 slli.d $a0, $a7, 2 addi.d $a7, $a7, 1 fstx.s $fa0, $a5, $a0 @@ -6236,20 +6230,22 @@ _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj: # @_ZL9init_dat ld.d $a5, $sp, 112 # 8-byte Folded Reload b .LBB11_10 .LBB11_12: - fld.d $fs1, $sp, 120 # 8-byte Folded Reload - fld.d $fs0, $sp, 128 # 8-byte Folded Reload - ld.d $s8, $sp, 136 # 8-byte Folded Reload - ld.d $s7, $sp, 144 # 8-byte Folded Reload - ld.d $s6, $sp, 152 # 8-byte Folded Reload - ld.d $s5, $sp, 160 # 8-byte Folded Reload - ld.d $s4, $sp, 168 # 8-byte Folded Reload - ld.d $s3, $sp, 176 # 8-byte Folded Reload - ld.d $s2, $sp, 184 # 8-byte Folded Reload - ld.d $s1, $sp, 192 # 8-byte Folded Reload - ld.d $s0, $sp, 200 # 8-byte Folded Reload - ld.d $fp, $sp, 208 # 8-byte Folded Reload - ld.d $ra, $sp, 216 # 8-byte Folded Reload - addi.d $sp, $sp, 224 + fld.d $fs3, $sp, 120 # 8-byte Folded Reload + fld.d $fs2, $sp, 128 # 8-byte Folded Reload + fld.d $fs1, $sp, 136 # 8-byte Folded Reload + fld.d $fs0, $sp, 144 # 8-byte Folded Reload + ld.d $s8, $sp, 152 # 8-byte Folded Reload + ld.d $s7, $sp, 160 # 8-byte Folded Reload + ld.d $s6, $sp, 168 # 8-byte Folded Reload + ld.d $s5, $sp, 176 # 8-byte Folded Reload + ld.d $s4, $sp, 184 # 8-byte Folded Reload + ld.d $s3, $sp, 192 # 8-byte Folded Reload + ld.d $s2, $sp, 200 # 8-byte Folded Reload + ld.d $s1, $sp, 208 # 8-byte Folded Reload + ld.d $s0, $sp, 216 # 8-byte Folded Reload + ld.d $fp, $sp, 224 # 8-byte Folded Reload + ld.d $ra, $sp, 232 # 8-byte Folded Reload + addi.d $sp, $sp, 240 ret .Lfunc_end11: .size _ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj, .Lfunc_end11-_ZL9init_dataIfEvRKSt10unique_ptrIA_T_St14default_deleteIS2_EEj diff --git a/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/fmax-reduction.dir/fmax-reduction.s b/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/fmax-reduction.dir/fmax-reduction.s index ad4c27ac..c999869f 100644 --- a/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/fmax-reduction.dir/fmax-reduction.s +++ b/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/fmax-reduction.dir/fmax-reduction.s @@ -1488,22 +1488,14 @@ GCC_except_table0: .Lttbase0: .p2align 2, 0x0 # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc -.LCPI1_0: - .word 0x4f800000 # float 4.2949673E+9 -.LCPI1_1: - .word 0x00800000 # float 1.17549435E-38 -.LCPI1_2: - .word 0x7f7fffff # float 3.40282347E+38 .section .rodata.cst16,"aM",@progbits,16 - .p2align 4, 0x0 -.LCPI1_3: + .p2align 4, 0x0 # -- Begin function _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc +.LCPI1_0: .word 1 # 0x1 .word 2 # 0x2 .word 3 # 0x3 .word 4 # 0x4 -.LCPI1_4: +.LCPI1_1: .word 0 # 0x0 .word 1 # 0x1 .word 2 # 0x2 @@ -1517,22 +1509,23 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .cfi_personality 155, DW.ref.__gxx_personality_v0 .cfi_lsda 27, .Lexception1 # %bb.0: - addi.d $sp, $sp, -1216 - .cfi_def_cfa_offset 1216 - st.d $ra, $sp, 1208 # 8-byte Folded Spill - st.d $fp, $sp, 1200 # 8-byte Folded Spill - st.d $s0, $sp, 1192 # 8-byte Folded Spill - st.d $s1, $sp, 1184 # 8-byte Folded Spill - st.d $s2, $sp, 1176 # 8-byte Folded Spill - st.d $s3, $sp, 1168 # 8-byte Folded Spill - st.d $s4, $sp, 1160 # 8-byte Folded Spill - st.d $s5, $sp, 1152 # 8-byte Folded Spill - st.d $s6, $sp, 1144 # 8-byte Folded Spill - st.d $s7, $sp, 1136 # 8-byte Folded Spill - st.d $s8, $sp, 1128 # 8-byte Folded Spill - fst.d $fs0, $sp, 1120 # 8-byte Folded Spill - fst.d $fs1, $sp, 1112 # 8-byte Folded Spill - fst.d $fs2, $sp, 1104 # 8-byte Folded Spill + addi.d $sp, $sp, -1232 + .cfi_def_cfa_offset 1232 + st.d $ra, $sp, 1224 # 8-byte Folded Spill + st.d $fp, $sp, 1216 # 8-byte Folded Spill + st.d $s0, $sp, 1208 # 8-byte Folded Spill + st.d $s1, $sp, 1200 # 8-byte Folded Spill + st.d $s2, $sp, 1192 # 8-byte Folded Spill + st.d $s3, $sp, 1184 # 8-byte Folded Spill + st.d $s4, $sp, 1176 # 8-byte Folded Spill + st.d $s5, $sp, 1168 # 8-byte Folded Spill + st.d $s6, $sp, 1160 # 8-byte Folded Spill + st.d $s7, $sp, 1152 # 8-byte Folded Spill + st.d $s8, $sp, 1144 # 8-byte Folded Spill + fst.d $fs0, $sp, 1136 # 8-byte Folded Spill + fst.d $fs1, $sp, 1128 # 8-byte Folded Spill + fst.d $fs2, $sp, 1120 # 8-byte Folded Spill + fst.d $fs3, $sp, 1112 # 8-byte Folded Spill .cfi_offset 1, -8 .cfi_offset 22, -16 .cfi_offset 23, -24 @@ -1547,6 +1540,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .cfi_offset 56, -96 .cfi_offset 57, -104 .cfi_offset 58, -112 + .cfi_offset 59, -120 move $fp, $a2 move $s0, $a1 move $s1, $a0 @@ -1587,11 +1581,11 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun ori $fp, $zero, 1 pcaddu18i $ra, %call36(_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l) jirl $ra, $ra, 0 - lu12i.w $s7, 1 - move $a0, $s7 + lu12i.w $s5, 1 + move $a0, $s5 pcaddu18i $ra, %call36(_Znam) jirl $ra, $ra, 0 - st.d $a0, $sp, 184 # 8-byte Folded Spill + st.d $a0, $sp, 192 # 8-byte Folded Spill ori $a0, $zero, 0 lu32i.d $a0, -65536 lu52i.d $a1, $a0, 1025 @@ -1619,13 +1613,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun masknez $a1, $fp, $a1 or $a5, $a0, $a1 pcalau12i $a0, %pc_hi20(_ZL3rng) - addi.d $s6, $a0, %pc_lo12(_ZL3rng) + addi.d $s8, $a0, %pc_lo12(_ZL3rng) move $a6, $zero - ldptr.d $fp, $s6, 4992 + ldptr.d $fp, $s8, 4992 movgr2fr.w $fs0, $zero ori $a7, $zero, 624 lu12i.w $a0, -524288 - st.d $a0, $sp, 152 # 8-byte Folded Spill + st.d $a0, $sp, 168 # 8-byte Folded Spill vreplgr2vr.d $vr6, $a0 lu12i.w $a0, 524287 ori $a0, $a0, 4094 @@ -1637,20 +1631,26 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun ori $t0, $a0, 223 lu32i.d $t0, 0 vreplgr2vr.d $vr10, $t0 - ori $s5, $zero, 1808 + ori $s6, $zero, 1808 lu12i.w $a0, -1 ori $t1, $a0, 928 - ori $s4, $s7, 896 - ori $s8, $zero, 3168 - ori $s7, $s7, 888 + ori $s4, $s5, 896 + ori $s7, $zero, 3168 + ori $s5, $s5, 888 lu12i.w $a0, -404795 ori $t2, $a0, 1664 lu32i.d $t2, 0 lu12i.w $s3, -66464 - pcalau12i $a0, %pc_hi20(.LCPI1_0) - fld.s $fs1, $a0, %pc_lo12(.LCPI1_0) lu32i.d $s3, 0 + lu12i.w $a0, 325632 + movgr2fr.w $fs1, $a0 vldi $vr11, -1168 + lu12i.w $a0, 2048 + movgr2fr.w $fs2, $a0 + lu12i.w $a0, 522239 + ori $a0, $a0, 4095 + st.d $a0, $sp, 144 # 8-byte Folded Spill + movgr2fr.w $fs3, $a0 ori $t3, $zero, 1024 .p2align 4, , 16 .LBB1_4: # =>This Loop Header: Depth=1 @@ -1666,8 +1666,8 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # in Loop: Header=BB1_6 Depth=2 slli.d $a1, $fp, 3 addi.d $fp, $fp, 1 - stptr.d $fp, $s6, 4992 - ldx.d $a1, $s6, $a1 + stptr.d $fp, $s8, 4992 + ldx.d $a1, $s8, $a1 bstrpick.d $a2, $a1, 42, 11 xor $a1, $a2, $a1 slli.d $a2, $a1, 7 @@ -1701,7 +1701,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun bltu $fp, $a7, .LBB1_5 # %bb.7: # %vector.ph1229 # in Loop: Header=BB1_6 Depth=2 - ld.d $a2, $s6, 0 + ld.d $a2, $s8, 0 move $a1, $zero vinsgr2vr.d $vr2, $a2, 1 .p2align 4, , 16 @@ -1710,7 +1710,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # Parent Loop BB1_6 Depth=2 # => This Inner Loop Header: Depth=3 vori.b $vr3, $vr2, 0 - add.d $a2, $s6, $a1 + add.d $a2, $s8, $a1 vld $vr2, $a2, 8 vshuf4i.d $vr3, $vr2, 9 vand.v $vr3, $vr3, $vr6 @@ -1724,15 +1724,15 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun vxor.v $vr4, $vr4, $vr9 vand.v $vr4, $vr4, $vr10 vxor.v $vr3, $vr3, $vr4 - vstx $vr3, $s6, $a1 + vstx $vr3, $s8, $a1 addi.d $a1, $a1, 16 - bne $a1, $s5, .LBB1_8 + bne $a1, $s6, .LBB1_8 # %bb.9: # %vector.ph # in Loop: Header=BB1_6 Depth=2 - ld.d $a1, $s6, 1816 + ld.d $a1, $s8, 1816 vpickve2gr.d $a2, $vr2, 1 bstrpick.d $a3, $a1, 30, 1 - ldptr.d $a4, $s6, 4984 + ldptr.d $a4, $s8, 4984 slli.d $a3, $a3, 1 bstrins.d $a2, $a3, 30, 0 srli.d $a2, $a2, 1 @@ -1741,7 +1741,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun sub.d $a3, $zero, $a3 and $a3, $a3, $t0 xor $a2, $a2, $a3 - st.d $a2, $s6, 1808 + st.d $a2, $s8, 1808 vinsgr2vr.d $vr2, $a1, 1 move $a1, $t1 .p2align 4, , 16 @@ -1749,11 +1749,11 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # Parent Loop BB1_4 Depth=1 # Parent Loop BB1_6 Depth=2 # => This Inner Loop Header: Depth=3 - add.d $a2, $s6, $a1 + add.d $a2, $s8, $a1 vldx $vr3, $a2, $s4 vshuf4i.d $vr2, $vr3, 9 vand.v $vr2, $vr2, $vr6 - vldx $vr4, $a2, $s8 + vldx $vr4, $a2, $s7 vand.v $vr5, $vr3, $vr7 vor.v $vr2, $vr5, $vr2 vsrli.d $vr2, $vr2, 1 @@ -1764,16 +1764,16 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun vand.v $vr4, $vr4, $vr10 vxor.v $vr2, $vr2, $vr4 addi.d $a1, $a1, 16 - vstx $vr2, $a2, $s7 + vstx $vr2, $a2, $s5 vori.b $vr2, $vr3, 0 bnez $a1, .LBB1_10 # %bb.11: # %_ZNSt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EE11_M_gen_randEv.exit.i.i # in Loop: Header=BB1_6 Depth=2 - ld.d $a1, $s6, 0 + ld.d $a1, $s8, 0 move $fp, $zero - ldptr.d $a2, $s6, 4984 + ldptr.d $a2, $s8, 4984 bstrpick.d $a3, $a1, 30, 1 - ldptr.d $a4, $s6, 3168 + ldptr.d $a4, $s8, 3168 slli.d $a3, $a3, 1 bstrins.d $a2, $a3, 30, 0 srli.d $a2, $a2, 1 @@ -1782,7 +1782,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun sub.d $a1, $zero, $a1 and $a1, $a1, $t0 xor $a1, $a2, $a1 - stptr.d $a1, $s6, 4984 + stptr.d $a1, $s8, 4984 b .LBB1_5 .p2align 4, , 16 .LBB1_12: # in Loop: Header=BB1_4 Depth=1 @@ -1791,21 +1791,17 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun bceqz $fcc0, .LBB1_14 .LBB1_13: # %_ZNSt25uniform_real_distributionIfEclISt23mersenne_twister_engineImLm32ELm624ELm397ELm31ELm2567483615ELm11ELm4294967295ELm7ELm2636928640ELm15ELm4022730752ELm18ELm1812433253EEEEfRT_.exit.i # in Loop: Header=BB1_4 Depth=1 - pcalau12i $a0, %pc_hi20(.LCPI1_1) - fld.s $fa1, $a0, %pc_lo12(.LCPI1_1) - pcalau12i $a0, %pc_hi20(.LCPI1_2) - fld.s $fa2, $a0, %pc_lo12(.LCPI1_2) - fmadd.s $fa0, $fa0, $fa2, $fa1 + fmadd.s $fa0, $fa0, $fs3, $fs2 slli.d $a0, $a6, 2 addi.d $a6, $a6, 1 - ld.d $a1, $sp, 184 # 8-byte Folded Reload + ld.d $a1, $sp, 192 # 8-byte Folded Reload fstx.s $fa0, $a1, $a0 bne $a6, $t3, .LBB1_4 b .LBB1_15 .LBB1_14: # in Loop: Header=BB1_4 Depth=1 vldi $vr0, -1168 fmov.s $fa1, $fs0 - st.d $a5, $sp, 160 # 8-byte Folded Spill + st.d $a5, $sp, 176 # 8-byte Folded Spill st.d $a6, $sp, 128 # 8-byte Folded Spill vst $vr6, $sp, 112 # 16-byte Folded Spill vst $vr7, $sp, 96 # 16-byte Folded Spill @@ -1829,10 +1825,10 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun vld $vr6, $sp, 112 # 16-byte Folded Reload ori $a7, $zero, 624 ld.d $a6, $sp, 128 # 8-byte Folded Reload - ld.d $a5, $sp, 160 # 8-byte Folded Reload + ld.d $a5, $sp, 176 # 8-byte Folded Reload b .LBB1_13 .LBB1_15: - ld.d $fp, $sp, 184 # 8-byte Folded Reload + ld.d $fp, $sp, 192 # 8-byte Folded Reload lu12i.w $s2, 1 add.d $s3, $fp, $s2 .Ltmp195: # EH_LABEL @@ -1854,71 +1850,71 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # %bb.17: # %_ZSt4sortIPfEvT_S1_.exit ld.d $a3, $s1, 16 vrepli.b $vr0, 0 - vst $vr0, $sp, 1072 - vst $vr0, $sp, 160 # 16-byte Folded Spill - vst $vr0, $sp, 1056 + vst $vr0, $sp, 1080 + vst $vr0, $sp, 176 # 16-byte Folded Spill + vst $vr0, $sp, 1064 lu12i.w $s3, -1 beqz $a3, .LBB1_20 # %bb.18: .Ltmp200: # EH_LABEL - addi.d $a0, $sp, 1056 + addi.d $a0, $sp, 1064 ori $a2, $zero, 2 move $a1, $s1 jirl $ra, $a3, 0 .Ltmp201: # EH_LABEL # %bb.19: vld $vr0, $s1, 16 - vst $vr0, $sp, 1072 + vst $vr0, $sp, 1080 .LBB1_20: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit ld.d $a3, $s0, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 1040 - vst $vr0, $sp, 1024 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 1048 + vst $vr0, $sp, 1032 beqz $a3, .LBB1_23 # %bb.21: .Ltmp206: # EH_LABEL - addi.d $a0, $sp, 1024 + addi.d $a0, $sp, 1032 ori $a2, $zero, 2 move $a1, $s0 jirl $ra, $a3, 0 .Ltmp207: # EH_LABEL # %bb.22: vld $vr0, $s0, 16 - vst $vr0, $sp, 1040 + vst $vr0, $sp, 1048 .LBB1_23: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit220 .Ltmp212: # EH_LABEL pcalau12i $a0, %pc_hi20(.L.str.15) addi.d $a3, $a0, %pc_lo12(.L.str.15) - addi.d $a0, $sp, 1056 - addi.d $a1, $sp, 1024 - ld.d $a2, $sp, 184 # 8-byte Folded Reload + addi.d $a0, $sp, 1064 + addi.d $a1, $sp, 1032 + ld.d $a2, $sp, 192 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZL5checkIfEvSt8functionIFT_PS1_jEES4_PfjPKc) jirl $ra, $ra, 0 .Ltmp213: # EH_LABEL # %bb.24: - ld.d $a3, $sp, 1040 + ld.d $a3, $sp, 1048 beqz $a3, .LBB1_26 # %bb.25: .Ltmp221: # EH_LABEL - addi.d $a0, $sp, 1024 - addi.d $a1, $sp, 1024 + addi.d $a0, $sp, 1032 + addi.d $a1, $sp, 1032 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp222: # EH_LABEL .LBB1_26: # %_ZNSt14_Function_baseD2Ev.exit - ld.d $a3, $sp, 1072 + ld.d $a3, $sp, 1080 beqz $a3, .LBB1_28 # %bb.27: .Ltmp224: # EH_LABEL - addi.d $a0, $sp, 1056 - addi.d $a1, $sp, 1056 + addi.d $a0, $sp, 1064 + addi.d $a1, $sp, 1064 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp225: # EH_LABEL .LBB1_28: # %vector.body1240.preheader ori $a0, $zero, 4080 ori $a1, $zero, 2032 - ld.d $a3, $sp, 184 # 8-byte Folded Reload + ld.d $a3, $sp, 192 # 8-byte Folded Reload move $a2, $a3 .p2align 4, , 16 .LBB1_29: # %vector.body1240 @@ -1934,72 +1930,71 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun bne $a0, $a1, .LBB1_29 # %bb.30: # %_ZSt7reverseIPfEvT_S1_.exit ld.d $a3, $s1, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 1008 - vst $vr0, $sp, 992 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 1016 + vst $vr0, $sp, 1000 beqz $a3, .LBB1_33 # %bb.31: .Ltmp227: # EH_LABEL - addi.d $a0, $sp, 992 + addi.d $a0, $sp, 1000 ori $a2, $zero, 2 move $a1, $s1 jirl $ra, $a3, 0 .Ltmp228: # EH_LABEL # %bb.32: vld $vr0, $s1, 16 - vst $vr0, $sp, 1008 + vst $vr0, $sp, 1016 .LBB1_33: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit229 ld.d $a3, $s0, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 976 - vst $vr0, $sp, 960 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 984 + vst $vr0, $sp, 968 beqz $a3, .LBB1_36 # %bb.34: .Ltmp233: # EH_LABEL - addi.d $a0, $sp, 960 + addi.d $a0, $sp, 968 ori $a2, $zero, 2 move $a1, $s0 jirl $ra, $a3, 0 .Ltmp234: # EH_LABEL # %bb.35: vld $vr0, $s0, 16 - vst $vr0, $sp, 976 + vst $vr0, $sp, 984 .LBB1_36: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit235 .Ltmp239: # EH_LABEL pcalau12i $a0, %pc_hi20(.L.str.16) addi.d $a3, $a0, %pc_lo12(.L.str.16) - addi.d $a0, $sp, 992 - addi.d $a1, $sp, 960 - ld.d $a2, $sp, 184 # 8-byte Folded Reload + addi.d $a0, $sp, 1000 + addi.d $a1, $sp, 968 + ld.d $a2, $sp, 192 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZL5checkIfEvSt8functionIFT_PS1_jEES4_PfjPKc) jirl $ra, $ra, 0 .Ltmp240: # EH_LABEL # %bb.37: - ld.d $a3, $sp, 976 + ld.d $a3, $sp, 984 beqz $a3, .LBB1_39 # %bb.38: .Ltmp248: # EH_LABEL - addi.d $a0, $sp, 960 - addi.d $a1, $sp, 960 + addi.d $a0, $sp, 968 + addi.d $a1, $sp, 968 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp249: # EH_LABEL .LBB1_39: # %_ZNSt14_Function_baseD2Ev.exit237 - ld.d $a3, $sp, 1008 + ld.d $a3, $sp, 1016 beqz $a3, .LBB1_41 # %bb.40: .Ltmp251: # EH_LABEL - addi.d $a0, $sp, 992 - addi.d $a1, $sp, 992 + addi.d $a0, $sp, 1000 + addi.d $a1, $sp, 1000 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp252: # EH_LABEL .LBB1_41: # %vector.body1252.preheader - lu12i.w $a0, 522239 - ori $a0, $a0, 4095 + ld.d $a0, $sp, 144 # 8-byte Folded Reload vreplgr2vr.w $vr0, $a0 ori $fp, $s2, 16 - ld.d $a1, $sp, 184 # 8-byte Folded Reload + ld.d $a1, $sp, 192 # 8-byte Folded Reload .p2align 4, , 16 .LBB1_42: # %vector.body1252 # =>This Inner Loop Header: Depth=1 @@ -2010,63 +2005,63 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun bnez $s3, .LBB1_42 # %bb.43: # %middle.block1255 ld.d $a3, $s1, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 944 - vst $vr0, $sp, 928 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 952 + vst $vr0, $sp, 936 beqz $a3, .LBB1_46 # %bb.44: .Ltmp254: # EH_LABEL - addi.d $a0, $sp, 928 + addi.d $a0, $sp, 936 ori $a2, $zero, 2 move $a1, $s1 jirl $ra, $a3, 0 .Ltmp255: # EH_LABEL # %bb.45: vld $vr0, $s1, 16 - vst $vr0, $sp, 944 + vst $vr0, $sp, 952 .LBB1_46: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit245 ld.d $a3, $s0, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 912 - vst $vr0, $sp, 896 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 920 + vst $vr0, $sp, 904 beqz $a3, .LBB1_49 # %bb.47: .Ltmp260: # EH_LABEL - addi.d $a0, $sp, 896 + addi.d $a0, $sp, 904 ori $a2, $zero, 2 move $a1, $s0 jirl $ra, $a3, 0 .Ltmp261: # EH_LABEL # %bb.48: vld $vr0, $s0, 16 - vst $vr0, $sp, 912 + vst $vr0, $sp, 920 .LBB1_49: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit259 .Ltmp266: # EH_LABEL pcalau12i $a0, %pc_hi20(.L.str.17) addi.d $a3, $a0, %pc_lo12(.L.str.17) - addi.d $a0, $sp, 928 - addi.d $a1, $sp, 896 - ld.d $a2, $sp, 184 # 8-byte Folded Reload + addi.d $a0, $sp, 936 + addi.d $a1, $sp, 904 + ld.d $a2, $sp, 192 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZL5checkIfEvSt8functionIFT_PS1_jEES4_PfjPKc) jirl $ra, $ra, 0 .Ltmp267: # EH_LABEL # %bb.50: - ld.d $a3, $sp, 912 + ld.d $a3, $sp, 920 beqz $a3, .LBB1_52 # %bb.51: .Ltmp275: # EH_LABEL - addi.d $a0, $sp, 896 - addi.d $a1, $sp, 896 + addi.d $a0, $sp, 904 + addi.d $a1, $sp, 904 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp276: # EH_LABEL .LBB1_52: # %_ZNSt14_Function_baseD2Ev.exit261 - ld.d $a3, $sp, 944 + ld.d $a3, $sp, 952 beqz $a3, .LBB1_54 # %bb.53: .Ltmp278: # EH_LABEL - addi.d $a0, $sp, 928 - addi.d $a1, $sp, 928 + addi.d $a0, $sp, 936 + addi.d $a1, $sp, 936 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp279: # EH_LABEL @@ -2074,7 +2069,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun lu12i.w $a0, -1 lu12i.w $a1, 2048 vreplgr2vr.w $vr0, $a1 - ld.d $a2, $sp, 184 # 8-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload .p2align 4, , 16 .LBB1_55: # %vector.body1258 # =>This Inner Loop Header: Depth=1 @@ -2085,72 +2080,72 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun bnez $a0, .LBB1_55 # %bb.56: # %middle.block1261 ld.d $a3, $s1, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 880 - vst $vr0, $sp, 864 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 888 + vst $vr0, $sp, 872 beqz $a3, .LBB1_59 # %bb.57: .Ltmp281: # EH_LABEL - addi.d $a0, $sp, 864 + addi.d $a0, $sp, 872 ori $a2, $zero, 2 move $a1, $s1 jirl $ra, $a3, 0 .Ltmp282: # EH_LABEL # %bb.58: vld $vr0, $s1, 16 - vst $vr0, $sp, 880 + vst $vr0, $sp, 888 .LBB1_59: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit269 ld.d $a3, $s0, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 848 - vst $vr0, $sp, 832 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 856 + vst $vr0, $sp, 840 beqz $a3, .LBB1_62 # %bb.60: .Ltmp287: # EH_LABEL - addi.d $a0, $sp, 832 + addi.d $a0, $sp, 840 ori $a2, $zero, 2 move $a1, $s0 jirl $ra, $a3, 0 .Ltmp288: # EH_LABEL # %bb.61: vld $vr0, $s0, 16 - vst $vr0, $sp, 848 + vst $vr0, $sp, 856 .LBB1_62: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit279 .Ltmp293: # EH_LABEL pcalau12i $a0, %pc_hi20(.L.str.18) addi.d $a3, $a0, %pc_lo12(.L.str.18) - addi.d $a0, $sp, 864 - addi.d $a1, $sp, 832 - ld.d $a2, $sp, 184 # 8-byte Folded Reload + addi.d $a0, $sp, 872 + addi.d $a1, $sp, 840 + ld.d $a2, $sp, 192 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZL5checkIfEvSt8functionIFT_PS1_jEES4_PfjPKc) jirl $ra, $ra, 0 .Ltmp294: # EH_LABEL # %bb.63: - ld.d $a3, $sp, 848 + ld.d $a3, $sp, 856 beqz $a3, .LBB1_65 # %bb.64: .Ltmp302: # EH_LABEL - addi.d $a0, $sp, 832 - addi.d $a1, $sp, 832 + addi.d $a0, $sp, 840 + addi.d $a1, $sp, 840 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp303: # EH_LABEL .LBB1_65: # %_ZNSt14_Function_baseD2Ev.exit281 - ld.d $a3, $sp, 880 + ld.d $a3, $sp, 888 beqz $a3, .LBB1_67 # %bb.66: .Ltmp305: # EH_LABEL - addi.d $a0, $sp, 864 - addi.d $a1, $sp, 864 + addi.d $a0, $sp, 872 + addi.d $a1, $sp, 872 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp306: # EH_LABEL .LBB1_67: # %_ZNSt14_Function_baseD2Ev.exit283 ori $a0, $zero, 1 - ld.d $a4, $sp, 184 # 8-byte Folded Reload + ld.d $a4, $sp, 192 # 8-byte Folded Reload st.w $a0, $a4, 0 - pcalau12i $a0, %pc_hi20(.LCPI1_3) - vld $vr0, $a0, %pc_lo12(.LCPI1_3) + pcalau12i $a0, %pc_hi20(.LCPI1_0) + vld $vr0, $a0, %pc_lo12(.LCPI1_0) lu12i.w $s7, -1 ori $a0, $s7, 16 vrepli.w $vr1, 1 @@ -2169,131 +2164,131 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun ld.d $a3, $s1, 16 stptr.d $zero, $a4, 4084 stptr.w $zero, $a4, 4092 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 800 - vst $vr0, $sp, 816 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 808 + vst $vr0, $sp, 824 beqz $a3, .LBB1_72 # %bb.70: .Ltmp308: # EH_LABEL - addi.d $a0, $sp, 800 + addi.d $a0, $sp, 808 ori $a2, $zero, 2 move $a1, $s1 jirl $ra, $a3, 0 .Ltmp309: # EH_LABEL # %bb.71: vld $vr0, $s1, 16 - vst $vr0, $sp, 816 + vst $vr0, $sp, 824 .LBB1_72: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit289 ld.d $a3, $s0, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 784 - vst $vr0, $sp, 768 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 792 + vst $vr0, $sp, 776 beqz $a3, .LBB1_75 # %bb.73: .Ltmp314: # EH_LABEL - addi.d $a0, $sp, 768 + addi.d $a0, $sp, 776 ori $a2, $zero, 2 move $a1, $s0 jirl $ra, $a3, 0 .Ltmp315: # EH_LABEL # %bb.74: vld $vr0, $s0, 16 - vst $vr0, $sp, 784 + vst $vr0, $sp, 792 .LBB1_75: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit299 .Ltmp320: # EH_LABEL pcalau12i $a0, %pc_hi20(.L.str.19) addi.d $a3, $a0, %pc_lo12(.L.str.19) - addi.d $a0, $sp, 800 - addi.d $a1, $sp, 768 - ld.d $a2, $sp, 184 # 8-byte Folded Reload + addi.d $a0, $sp, 808 + addi.d $a1, $sp, 776 + ld.d $a2, $sp, 192 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZL5checkIfEvSt8functionIFT_PS1_jEES4_PfjPKc) jirl $ra, $ra, 0 .Ltmp321: # EH_LABEL # %bb.76: - ld.d $a3, $sp, 784 + ld.d $a3, $sp, 792 beqz $a3, .LBB1_78 # %bb.77: .Ltmp329: # EH_LABEL - addi.d $a0, $sp, 768 - addi.d $a1, $sp, 768 + addi.d $a0, $sp, 776 + addi.d $a1, $sp, 776 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp330: # EH_LABEL .LBB1_78: # %_ZNSt14_Function_baseD2Ev.exit301 - ld.d $a3, $sp, 816 + ld.d $a3, $sp, 824 beqz $a3, .LBB1_80 # %bb.79: .Ltmp332: # EH_LABEL - addi.d $a0, $sp, 800 - addi.d $a1, $sp, 800 + addi.d $a0, $sp, 808 + addi.d $a1, $sp, 808 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp333: # EH_LABEL .LBB1_80: # %_ZNSt14_Function_baseD2Ev.exit303 lu12i.w $s2, 1 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload move $a1, $zero move $a2, $s2 pcaddu18i $ra, %call36(memset) jirl $ra, $ra, 0 ld.d $a3, $s1, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 736 - vst $vr0, $sp, 752 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 744 + vst $vr0, $sp, 760 beqz $a3, .LBB1_83 # %bb.81: .Ltmp335: # EH_LABEL - addi.d $a0, $sp, 736 + addi.d $a0, $sp, 744 ori $a2, $zero, 2 move $a1, $s1 jirl $ra, $a3, 0 .Ltmp336: # EH_LABEL # %bb.82: vld $vr0, $s1, 16 - vst $vr0, $sp, 752 + vst $vr0, $sp, 760 .LBB1_83: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit309 ld.d $a3, $s0, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 720 - vst $vr0, $sp, 704 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 728 + vst $vr0, $sp, 712 beqz $a3, .LBB1_86 # %bb.84: .Ltmp341: # EH_LABEL - addi.d $a0, $sp, 704 + addi.d $a0, $sp, 712 ori $a2, $zero, 2 move $a1, $s0 jirl $ra, $a3, 0 .Ltmp342: # EH_LABEL # %bb.85: vld $vr0, $s0, 16 - vst $vr0, $sp, 720 + vst $vr0, $sp, 728 .LBB1_86: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit319 .Ltmp347: # EH_LABEL pcalau12i $a0, %pc_hi20(.L.str.20) addi.d $a3, $a0, %pc_lo12(.L.str.20) - addi.d $a0, $sp, 736 - addi.d $a1, $sp, 704 - ld.d $a2, $sp, 184 # 8-byte Folded Reload + addi.d $a0, $sp, 744 + addi.d $a1, $sp, 712 + ld.d $a2, $sp, 192 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZL5checkIfEvSt8functionIFT_PS1_jEES4_PfjPKc) jirl $ra, $ra, 0 .Ltmp348: # EH_LABEL # %bb.87: - ld.d $a3, $sp, 720 + ld.d $a3, $sp, 728 beqz $a3, .LBB1_89 # %bb.88: .Ltmp356: # EH_LABEL - addi.d $a0, $sp, 704 - addi.d $a1, $sp, 704 + addi.d $a0, $sp, 712 + addi.d $a1, $sp, 712 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp357: # EH_LABEL .LBB1_89: # %_ZNSt14_Function_baseD2Ev.exit321 - ld.d $a3, $sp, 752 + ld.d $a3, $sp, 760 beqz $a3, .LBB1_91 # %bb.90: .Ltmp359: # EH_LABEL - addi.d $a0, $sp, 736 - addi.d $a1, $sp, 736 + addi.d $a0, $sp, 744 + addi.d $a1, $sp, 744 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp360: # EH_LABEL @@ -2301,7 +2296,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun ori $fp, $zero, 3 lu12i.w $a0, 273536 vreplgr2vr.w $vr0, $a0 - vst $vr0, $sp, 128 # 16-byte Folded Spill + vst $vr0, $sp, 144 # 16-byte Folded Spill ori $s5, $s2, 16 pcalau12i $a0, %pc_hi20(.L.str.21) addi.d $s3, $a0, %pc_lo12(.L.str.21) @@ -2316,9 +2311,9 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # =>This Loop Header: Depth=1 # Child Loop BB1_94 Depth 2 move $a0, $s7 - ld.d $a2, $sp, 184 # 8-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload lu12i.w $a3, 1 - vld $vr0, $sp, 128 # 16-byte Folded Reload + vld $vr0, $sp, 144 # 16-byte Folded Reload .p2align 4, , 16 .LBB1_94: # %vector.body1272 # Parent Loop BB1_93 Depth=1 @@ -2334,79 +2329,79 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun slli.d $a0, $fp, 2 lu12i.w $a1, 523264 stx.w $a1, $a2, $a0 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 672 - vst $vr0, $sp, 688 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 680 + vst $vr0, $sp, 696 beqz $a3, .LBB1_98 # %bb.96: # in Loop: Header=BB1_93 Depth=1 .Ltmp362: # EH_LABEL - addi.d $a0, $sp, 672 + addi.d $a0, $sp, 680 ori $a2, $zero, 2 move $a1, $s1 jirl $ra, $a3, 0 .Ltmp363: # EH_LABEL # %bb.97: # in Loop: Header=BB1_93 Depth=1 vld $vr0, $s1, 16 - vst $vr0, $sp, 688 + vst $vr0, $sp, 696 .LBB1_98: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit333 # in Loop: Header=BB1_93 Depth=1 ld.d $a3, $s0, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 656 - vst $vr0, $sp, 640 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 664 + vst $vr0, $sp, 648 beqz $a3, .LBB1_101 # %bb.99: # in Loop: Header=BB1_93 Depth=1 .Ltmp368: # EH_LABEL - addi.d $a0, $sp, 640 + addi.d $a0, $sp, 648 ori $a2, $zero, 2 move $a1, $s0 jirl $ra, $a3, 0 .Ltmp369: # EH_LABEL # %bb.100: # in Loop: Header=BB1_93 Depth=1 vld $vr0, $s0, 16 - vst $vr0, $sp, 656 + vst $vr0, $sp, 664 .LBB1_101: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit339 # in Loop: Header=BB1_93 Depth=1 .Ltmp374: # EH_LABEL - addi.d $a0, $sp, 672 - addi.d $a1, $sp, 640 - ld.d $a2, $sp, 184 # 8-byte Folded Reload + addi.d $a0, $sp, 680 + addi.d $a1, $sp, 648 + ld.d $a2, $sp, 192 # 8-byte Folded Reload move $a3, $s3 pcaddu18i $ra, %call36(_ZL5checkIfEvSt8functionIFT_PS1_jEES4_PfjPKc) jirl $ra, $ra, 0 .Ltmp375: # EH_LABEL # %bb.102: # in Loop: Header=BB1_93 Depth=1 - ld.d $a3, $sp, 656 + ld.d $a3, $sp, 664 beqz $a3, .LBB1_104 # %bb.103: # in Loop: Header=BB1_93 Depth=1 .Ltmp383: # EH_LABEL - addi.d $a0, $sp, 640 - addi.d $a1, $sp, 640 + addi.d $a0, $sp, 648 + addi.d $a1, $sp, 648 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp384: # EH_LABEL .LBB1_104: # %_ZNSt14_Function_baseD2Ev.exit341 # in Loop: Header=BB1_93 Depth=1 - ld.d $a3, $sp, 688 + ld.d $a3, $sp, 696 beqz $a3, .LBB1_92 # %bb.105: # in Loop: Header=BB1_93 Depth=1 .Ltmp386: # EH_LABEL - addi.d $a0, $sp, 672 - addi.d $a1, $sp, 672 + addi.d $a0, $sp, 680 + addi.d $a1, $sp, 680 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp387: # EH_LABEL b .LBB1_92 .LBB1_106: # %.preheader715 move $fp, $zero - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload addi.d $s3, $a0, 4 lu12i.w $a0, -264192 vreplgr2vr.w $vr0, $a0 - vst $vr0, $sp, 128 # 16-byte Folded Spill - ld.d $a0, $sp, 152 # 8-byte Folded Reload + vst $vr0, $sp, 144 # 16-byte Folded Spill + ld.d $a0, $sp, 168 # 8-byte Folded Reload lu32i.d $a0, 0 - st.d $a0, $sp, 152 # 8-byte Folded Spill + st.d $a0, $sp, 168 # 8-byte Folded Spill ori $s6, $zero, 1024 ori $s7, $zero, 124 movgr2fr.w $fs2, $zero @@ -2423,8 +2418,8 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # Child Loop BB1_109 Depth 2 # Child Loop BB1_112 Depth 2 lu12i.w $a0, -1 - ld.d $a2, $sp, 184 # 8-byte Folded Reload - vld $vr0, $sp, 128 # 16-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload + vld $vr0, $sp, 144 # 16-byte Folded Reload .p2align 4, , 16 .LBB1_109: # %vector.body1278 # Parent Loop BB1_108 Depth=1 @@ -2447,16 +2442,16 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .LBB1_112: # Parent Loop BB1_108 Depth=1 # => This Inner Loop Header: Depth=2 ld.d $a3, $s1, 16 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload st.w $a0, $s8, 0 stx.w $zero, $s3, $s4 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 608 - vst $vr0, $sp, 624 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 616 + vst $vr0, $sp, 632 beqz $a3, .LBB1_115 # %bb.113: # in Loop: Header=BB1_112 Depth=2 .Ltmp389: # EH_LABEL - addi.d $a0, $sp, 608 + addi.d $a0, $sp, 616 ori $a2, $zero, 2 move $a1, $s1 jirl $ra, $a3, 0 @@ -2464,7 +2459,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # %bb.114: # in Loop: Header=BB1_112 Depth=2 vld $vr0, $s1, 16 ld.d $a0, $s1, 16 - vst $vr0, $sp, 624 + vst $vr0, $sp, 632 b .LBB1_116 .p2align 4, , 16 .LBB1_115: # in Loop: Header=BB1_112 Depth=2 @@ -2472,51 +2467,51 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .LBB1_116: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit353 # in Loop: Header=BB1_112 Depth=2 ld.d $a3, $s0, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 592 - vst $vr0, $sp, 576 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 600 + vst $vr0, $sp, 584 beqz $a3, .LBB1_119 # %bb.117: # in Loop: Header=BB1_112 Depth=2 .Ltmp395: # EH_LABEL - addi.d $a0, $sp, 576 + addi.d $a0, $sp, 584 ori $a2, $zero, 2 move $a1, $s0 jirl $ra, $a3, 0 .Ltmp396: # EH_LABEL # %bb.118: # in Loop: Header=BB1_112 Depth=2 vld $vr0, $s0, 16 - ld.d $a0, $sp, 624 - vst $vr0, $sp, 592 + ld.d $a0, $sp, 632 + vst $vr0, $sp, 600 .LBB1_119: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit359 # in Loop: Header=BB1_112 Depth=2 - ld.d $a1, $sp, 184 # 8-byte Folded Reload - st.d $a1, $sp, 1096 - st.w $s6, $sp, 1092 + ld.d $a1, $sp, 192 # 8-byte Folded Reload + st.d $a1, $sp, 1104 + st.w $s6, $sp, 1100 beqz $a0, .LBB1_309 # %bb.120: # %_ZNKSt8functionIFfPfjEEclES0_j.exit.i # in Loop: Header=BB1_112 Depth=2 - ld.d $a3, $sp, 632 + ld.d $a3, $sp, 640 .Ltmp401: # EH_LABEL - addi.d $a0, $sp, 608 - addi.d $a1, $sp, 1096 - addi.d $a2, $sp, 1092 + addi.d $a0, $sp, 616 + addi.d $a1, $sp, 1104 + addi.d $a2, $sp, 1100 jirl $ra, $a3, 0 .Ltmp402: # EH_LABEL # %bb.121: # %.noexc362 # in Loop: Header=BB1_112 Depth=2 - ld.d $a0, $sp, 592 - ld.d $a1, $sp, 184 # 8-byte Folded Reload - st.d $a1, $sp, 1096 - st.w $s6, $sp, 1092 + ld.d $a0, $sp, 600 + ld.d $a1, $sp, 192 # 8-byte Folded Reload + st.d $a1, $sp, 1104 + st.w $s6, $sp, 1100 beqz $a0, .LBB1_309 # %bb.122: # %_ZNKSt8functionIFfPfjEEclES0_j.exit9.i # in Loop: Header=BB1_112 Depth=2 fmov.s $fs1, $fa0 - ld.d $a3, $sp, 600 + ld.d $a3, $sp, 608 .Ltmp403: # EH_LABEL - addi.d $a0, $sp, 576 - addi.d $a1, $sp, 1096 - addi.d $a2, $sp, 1092 + addi.d $a0, $sp, 584 + addi.d $a1, $sp, 1104 + addi.d $a2, $sp, 1100 jirl $ra, $a3, 0 .Ltmp404: # EH_LABEL # %bb.123: # %.noexc364 @@ -2542,23 +2537,23 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun bceqz $fcc0, .LBB1_134 .LBB1_128: # %_ZL5checkIfEvSt8functionIFT_PS1_jEES4_PfjPKc.exit # in Loop: Header=BB1_112 Depth=2 - ld.d $a3, $sp, 592 + ld.d $a3, $sp, 600 beqz $a3, .LBB1_130 # %bb.129: # in Loop: Header=BB1_112 Depth=2 .Ltmp420: # EH_LABEL - addi.d $a0, $sp, 576 - addi.d $a1, $sp, 576 + addi.d $a0, $sp, 584 + addi.d $a1, $sp, 584 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp421: # EH_LABEL .LBB1_130: # %_ZNSt14_Function_baseD2Ev.exit373 # in Loop: Header=BB1_112 Depth=2 - ld.d $a3, $sp, 624 + ld.d $a3, $sp, 632 beqz $a3, .LBB1_111 # %bb.131: # in Loop: Header=BB1_112 Depth=2 .Ltmp423: # EH_LABEL - addi.d $a0, $sp, 608 - addi.d $a1, $sp, 608 + addi.d $a0, $sp, 616 + addi.d $a1, $sp, 616 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp424: # EH_LABEL @@ -2636,7 +2631,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun b .LBB1_307 .LBB1_141: # %.preheader707 move $fp, $zero - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload addi.d $s3, $a0, 4 ori $s6, $zero, 1024 ori $s7, $zero, 124 @@ -2652,8 +2647,8 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # Child Loop BB1_144 Depth 2 # Child Loop BB1_147 Depth 2 lu12i.w $a0, -1 - ld.d $a2, $sp, 184 # 8-byte Folded Reload - vld $vr0, $sp, 128 # 16-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload + vld $vr0, $sp, 144 # 16-byte Folded Reload .p2align 4, , 16 .LBB1_144: # %vector.body1284 # Parent Loop BB1_143 Depth=1 @@ -2677,15 +2672,15 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # => This Inner Loop Header: Depth=2 ld.d $a3, $s1, 16 st.w $zero, $s8, 0 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload stx.w $a0, $s3, $s4 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 544 - vst $vr0, $sp, 560 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 552 + vst $vr0, $sp, 568 beqz $a3, .LBB1_150 # %bb.148: # in Loop: Header=BB1_147 Depth=2 .Ltmp426: # EH_LABEL - addi.d $a0, $sp, 544 + addi.d $a0, $sp, 552 ori $a2, $zero, 2 move $a1, $s1 jirl $ra, $a3, 0 @@ -2693,7 +2688,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # %bb.149: # in Loop: Header=BB1_147 Depth=2 vld $vr0, $s1, 16 ld.d $a0, $s1, 16 - vst $vr0, $sp, 560 + vst $vr0, $sp, 568 b .LBB1_151 .p2align 4, , 16 .LBB1_150: # in Loop: Header=BB1_147 Depth=2 @@ -2701,51 +2696,51 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .LBB1_151: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit385 # in Loop: Header=BB1_147 Depth=2 ld.d $a3, $s0, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 528 - vst $vr0, $sp, 512 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 536 + vst $vr0, $sp, 520 beqz $a3, .LBB1_154 # %bb.152: # in Loop: Header=BB1_147 Depth=2 .Ltmp432: # EH_LABEL - addi.d $a0, $sp, 512 + addi.d $a0, $sp, 520 ori $a2, $zero, 2 move $a1, $s0 jirl $ra, $a3, 0 .Ltmp433: # EH_LABEL # %bb.153: # in Loop: Header=BB1_147 Depth=2 vld $vr0, $s0, 16 - ld.d $a0, $sp, 560 - vst $vr0, $sp, 528 + ld.d $a0, $sp, 568 + vst $vr0, $sp, 536 .LBB1_154: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit391 # in Loop: Header=BB1_147 Depth=2 - ld.d $a1, $sp, 184 # 8-byte Folded Reload - st.d $a1, $sp, 1096 - st.w $s6, $sp, 1092 + ld.d $a1, $sp, 192 # 8-byte Folded Reload + st.d $a1, $sp, 1104 + st.w $s6, $sp, 1100 beqz $a0, .LBB1_311 # %bb.155: # %_ZNKSt8functionIFfPfjEEclES0_j.exit.i393 # in Loop: Header=BB1_147 Depth=2 - ld.d $a3, $sp, 568 + ld.d $a3, $sp, 576 .Ltmp438: # EH_LABEL - addi.d $a0, $sp, 544 - addi.d $a1, $sp, 1096 - addi.d $a2, $sp, 1092 + addi.d $a0, $sp, 552 + addi.d $a1, $sp, 1104 + addi.d $a2, $sp, 1100 jirl $ra, $a3, 0 .Ltmp439: # EH_LABEL # %bb.156: # %.noexc401 # in Loop: Header=BB1_147 Depth=2 - ld.d $a0, $sp, 528 - ld.d $a1, $sp, 184 # 8-byte Folded Reload - st.d $a1, $sp, 1096 - st.w $s6, $sp, 1092 + ld.d $a0, $sp, 536 + ld.d $a1, $sp, 192 # 8-byte Folded Reload + st.d $a1, $sp, 1104 + st.w $s6, $sp, 1100 beqz $a0, .LBB1_311 # %bb.157: # %_ZNKSt8functionIFfPfjEEclES0_j.exit9.i395 # in Loop: Header=BB1_147 Depth=2 fmov.s $fs1, $fa0 - ld.d $a3, $sp, 536 + ld.d $a3, $sp, 544 .Ltmp440: # EH_LABEL - addi.d $a0, $sp, 512 - addi.d $a1, $sp, 1096 - addi.d $a2, $sp, 1092 + addi.d $a0, $sp, 520 + addi.d $a1, $sp, 1104 + addi.d $a2, $sp, 1100 jirl $ra, $a3, 0 .Ltmp441: # EH_LABEL # %bb.158: # %.noexc403 @@ -2771,23 +2766,23 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun bceqz $fcc0, .LBB1_169 .LBB1_163: # %_ZL5checkIfEvSt8functionIFT_PS1_jEES4_PfjPKc.exit411 # in Loop: Header=BB1_147 Depth=2 - ld.d $a3, $sp, 528 + ld.d $a3, $sp, 536 beqz $a3, .LBB1_165 # %bb.164: # in Loop: Header=BB1_147 Depth=2 .Ltmp457: # EH_LABEL - addi.d $a0, $sp, 512 - addi.d $a1, $sp, 512 + addi.d $a0, $sp, 520 + addi.d $a1, $sp, 520 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp458: # EH_LABEL .LBB1_165: # %_ZNSt14_Function_baseD2Ev.exit413 # in Loop: Header=BB1_147 Depth=2 - ld.d $a3, $sp, 560 + ld.d $a3, $sp, 568 beqz $a3, .LBB1_146 # %bb.166: # in Loop: Header=BB1_147 Depth=2 .Ltmp460: # EH_LABEL - addi.d $a0, $sp, 544 - addi.d $a1, $sp, 544 + addi.d $a0, $sp, 552 + addi.d $a1, $sp, 552 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp461: # EH_LABEL @@ -2864,14 +2859,14 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .Ltmp456: # EH_LABEL b .LBB1_307 .LBB1_176: # %.preheader699 - pcalau12i $a0, %pc_hi20(.LCPI1_4) - st.d $a0, $sp, 80 # 8-byte Folded Spill - vld $vr0, $a0, %pc_lo12(.LCPI1_4) - vst $vr0, $sp, 112 # 16-byte Folded Spill + pcalau12i $a0, %pc_hi20(.LCPI1_1) + st.d $a0, $sp, 96 # 8-byte Folded Spill + vld $vr0, $a0, %pc_lo12(.LCPI1_1) + vst $vr0, $sp, 128 # 16-byte Folded Spill ori $s4, $zero, 4 pcalau12i $a0, %pc_hi20(.L.str.23) addi.d $a0, $a0, %pc_lo12(.L.str.23) - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 112 # 8-byte Folded Spill move $s8, $zero ori $s3, $zero, 1023 ori $s6, $zero, 1024 @@ -2880,7 +2875,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .p2align 4, , 16 .LBB1_177: # in Loop: Header=BB1_178 Depth=1 addi.d $s8, $s8, 1 - ld.d $s4, $sp, 152 # 8-byte Folded Reload + ld.d $s4, $sp, 168 # 8-byte Folded Reload addi.d $s4, $s4, 4 beq $s8, $s6, .LBB1_224 .LBB1_178: # %vector.ph1289 @@ -2888,8 +2883,8 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # Child Loop BB1_179 Depth 2 # Child Loop BB1_193 Depth 2 lu12i.w $a0, -1 - vld $vr0, $sp, 112 # 16-byte Folded Reload - ld.d $a2, $sp, 184 # 8-byte Folded Reload + vld $vr0, $sp, 128 # 16-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload .p2align 4, , 16 .LBB1_179: # %vector.body1290 # Parent Loop BB1_178 Depth=1 @@ -2909,72 +2904,72 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun slli.d $a0, $s8, 2 lu12i.w $a1, 280578 stx.w $a1, $a2, $a0 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 480 - vst $vr0, $sp, 496 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 488 + vst $vr0, $sp, 504 beqz $a3, .LBB1_183 # %bb.181: # in Loop: Header=BB1_178 Depth=1 .Ltmp463: # EH_LABEL - addi.d $a0, $sp, 480 + addi.d $a0, $sp, 488 ori $a2, $zero, 2 move $a1, $s1 jirl $ra, $a3, 0 .Ltmp464: # EH_LABEL # %bb.182: # in Loop: Header=BB1_178 Depth=1 vld $vr0, $s1, 16 - vst $vr0, $sp, 496 + vst $vr0, $sp, 504 .LBB1_183: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit425 # in Loop: Header=BB1_178 Depth=1 ld.d $a3, $s0, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 464 - vst $vr0, $sp, 448 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 472 + vst $vr0, $sp, 456 beqz $a3, .LBB1_186 # %bb.184: # in Loop: Header=BB1_178 Depth=1 .Ltmp469: # EH_LABEL - addi.d $a0, $sp, 448 + addi.d $a0, $sp, 456 ori $a2, $zero, 2 move $a1, $s0 jirl $ra, $a3, 0 .Ltmp470: # EH_LABEL # %bb.185: # in Loop: Header=BB1_178 Depth=1 vld $vr0, $s0, 16 - vst $vr0, $sp, 464 + vst $vr0, $sp, 472 .LBB1_186: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit431 # in Loop: Header=BB1_178 Depth=1 .Ltmp475: # EH_LABEL - addi.d $a0, $sp, 480 - addi.d $a1, $sp, 448 - ld.d $a2, $sp, 184 # 8-byte Folded Reload - ld.d $a3, $sp, 96 # 8-byte Folded Reload + addi.d $a0, $sp, 488 + addi.d $a1, $sp, 456 + ld.d $a2, $sp, 192 # 8-byte Folded Reload + ld.d $a3, $sp, 112 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZL5checkIfEvSt8functionIFT_PS1_jEES4_PfjPKc) jirl $ra, $ra, 0 .Ltmp476: # EH_LABEL # %bb.187: # in Loop: Header=BB1_178 Depth=1 - ld.d $a3, $sp, 464 + ld.d $a3, $sp, 472 beqz $a3, .LBB1_189 # %bb.188: # in Loop: Header=BB1_178 Depth=1 .Ltmp484: # EH_LABEL - addi.d $a0, $sp, 448 - addi.d $a1, $sp, 448 + addi.d $a0, $sp, 456 + addi.d $a1, $sp, 456 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp485: # EH_LABEL .LBB1_189: # %_ZNSt14_Function_baseD2Ev.exit433 # in Loop: Header=BB1_178 Depth=1 - ld.d $a3, $sp, 496 + ld.d $a3, $sp, 504 beqz $a3, .LBB1_191 # %bb.190: # in Loop: Header=BB1_178 Depth=1 .Ltmp487: # EH_LABEL - addi.d $a0, $sp, 480 - addi.d $a1, $sp, 480 + addi.d $a0, $sp, 488 + addi.d $a1, $sp, 488 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp488: # EH_LABEL .LBB1_191: # %_ZNSt14_Function_baseD2Ev.exit435.preheader # in Loop: Header=BB1_178 Depth=1 ori $fp, $zero, 1 - st.d $s4, $sp, 152 # 8-byte Folded Spill + st.d $s4, $sp, 168 # 8-byte Folded Spill b .LBB1_193 .p2align 4, , 16 .LBB1_192: # %_ZNSt14_Function_baseD2Ev.exit475 @@ -2988,18 +2983,18 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun add.d $a0, $s8, $fp bltu $s3, $a0, .LBB1_195 # %bb.194: # in Loop: Header=BB1_193 Depth=2 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload lu12i.w $a1, 280578 stx.w $a1, $a0, $s4 .LBB1_195: # in Loop: Header=BB1_193 Depth=2 ld.d $a3, $s1, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 432 - vst $vr0, $sp, 416 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 440 + vst $vr0, $sp, 424 beqz $a3, .LBB1_198 # %bb.196: # in Loop: Header=BB1_193 Depth=2 .Ltmp490: # EH_LABEL - addi.d $a0, $sp, 416 + addi.d $a0, $sp, 424 ori $a2, $zero, 2 move $a1, $s1 jirl $ra, $a3, 0 @@ -3007,7 +3002,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # %bb.197: # in Loop: Header=BB1_193 Depth=2 vld $vr0, $s1, 16 ld.d $a0, $s1, 16 - vst $vr0, $sp, 432 + vst $vr0, $sp, 440 b .LBB1_199 .p2align 4, , 16 .LBB1_198: # in Loop: Header=BB1_193 Depth=2 @@ -3015,51 +3010,51 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .LBB1_199: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit445 # in Loop: Header=BB1_193 Depth=2 ld.d $a3, $s0, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 400 - vst $vr0, $sp, 384 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 408 + vst $vr0, $sp, 392 beqz $a3, .LBB1_202 # %bb.200: # in Loop: Header=BB1_193 Depth=2 .Ltmp496: # EH_LABEL - addi.d $a0, $sp, 384 + addi.d $a0, $sp, 392 ori $a2, $zero, 2 move $a1, $s0 jirl $ra, $a3, 0 .Ltmp497: # EH_LABEL # %bb.201: # in Loop: Header=BB1_193 Depth=2 vld $vr0, $s0, 16 - ld.d $a0, $sp, 432 - vst $vr0, $sp, 400 + ld.d $a0, $sp, 440 + vst $vr0, $sp, 408 .LBB1_202: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit451 # in Loop: Header=BB1_193 Depth=2 - ld.d $a1, $sp, 184 # 8-byte Folded Reload - st.d $a1, $sp, 1096 - st.w $s6, $sp, 1092 + ld.d $a1, $sp, 192 # 8-byte Folded Reload + st.d $a1, $sp, 1104 + st.w $s6, $sp, 1100 beqz $a0, .LBB1_313 # %bb.203: # %_ZNKSt8functionIFfPfjEEclES0_j.exit.i453 # in Loop: Header=BB1_193 Depth=2 - ld.d $a3, $sp, 440 + ld.d $a3, $sp, 448 .Ltmp502: # EH_LABEL - addi.d $a0, $sp, 416 - addi.d $a1, $sp, 1096 - addi.d $a2, $sp, 1092 + addi.d $a0, $sp, 424 + addi.d $a1, $sp, 1104 + addi.d $a2, $sp, 1100 jirl $ra, $a3, 0 .Ltmp503: # EH_LABEL # %bb.204: # %.noexc461 # in Loop: Header=BB1_193 Depth=2 - ld.d $a0, $sp, 400 - ld.d $a1, $sp, 184 # 8-byte Folded Reload - st.d $a1, $sp, 1096 - st.w $s6, $sp, 1092 + ld.d $a0, $sp, 408 + ld.d $a1, $sp, 192 # 8-byte Folded Reload + st.d $a1, $sp, 1104 + st.w $s6, $sp, 1100 beqz $a0, .LBB1_313 # %bb.205: # %_ZNKSt8functionIFfPfjEEclES0_j.exit9.i455 # in Loop: Header=BB1_193 Depth=2 fmov.s $fs1, $fa0 - ld.d $a3, $sp, 408 + ld.d $a3, $sp, 416 .Ltmp504: # EH_LABEL - addi.d $a0, $sp, 384 - addi.d $a1, $sp, 1096 - addi.d $a2, $sp, 1092 + addi.d $a0, $sp, 392 + addi.d $a1, $sp, 1104 + addi.d $a2, $sp, 1100 jirl $ra, $a3, 0 .Ltmp505: # EH_LABEL # %bb.206: # %.noexc463 @@ -3085,23 +3080,23 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun bceqz $fcc0, .LBB1_217 .LBB1_211: # %_ZL5checkIfEvSt8functionIFT_PS1_jEES4_PfjPKc.exit471 # in Loop: Header=BB1_193 Depth=2 - ld.d $a3, $sp, 400 + ld.d $a3, $sp, 408 beqz $a3, .LBB1_213 # %bb.212: # in Loop: Header=BB1_193 Depth=2 .Ltmp521: # EH_LABEL - addi.d $a0, $sp, 384 - addi.d $a1, $sp, 384 + addi.d $a0, $sp, 392 + addi.d $a1, $sp, 392 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp522: # EH_LABEL .LBB1_213: # %_ZNSt14_Function_baseD2Ev.exit473 # in Loop: Header=BB1_193 Depth=2 - ld.d $a3, $sp, 432 + ld.d $a3, $sp, 440 beqz $a3, .LBB1_192 # %bb.214: # in Loop: Header=BB1_193 Depth=2 .Ltmp524: # EH_LABEL - addi.d $a0, $sp, 416 - addi.d $a1, $sp, 416 + addi.d $a0, $sp, 424 + addi.d $a1, $sp, 424 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp525: # EH_LABEL @@ -3178,13 +3173,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .Ltmp520: # EH_LABEL b .LBB1_307 .LBB1_224: # %.preheader692 - ld.d $a0, $sp, 80 # 8-byte Folded Reload - vld $vr0, $a0, %pc_lo12(.LCPI1_4) - vst $vr0, $sp, 112 # 16-byte Folded Spill + ld.d $a0, $sp, 96 # 8-byte Folded Reload + vld $vr0, $a0, %pc_lo12(.LCPI1_1) + vst $vr0, $sp, 128 # 16-byte Folded Spill ori $s4, $zero, 4 pcalau12i $a0, %pc_hi20(.L.str.24) addi.d $a0, $a0, %pc_lo12(.L.str.24) - st.d $a0, $sp, 96 # 8-byte Folded Spill + st.d $a0, $sp, 112 # 8-byte Folded Spill move $s8, $zero ori $s7, $zero, 1023 ori $s6, $zero, 1024 @@ -3193,7 +3188,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .p2align 4, , 16 .LBB1_225: # in Loop: Header=BB1_226 Depth=1 addi.d $s8, $s8, 1 - ld.d $s4, $sp, 152 # 8-byte Folded Reload + ld.d $s4, $sp, 168 # 8-byte Folded Reload addi.d $s4, $s4, 4 beq $s8, $s6, .LBB1_272 .LBB1_226: # %vector.ph1297 @@ -3201,8 +3196,8 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # Child Loop BB1_227 Depth 2 # Child Loop BB1_241 Depth 2 lu12i.w $a0, -1 - vld $vr0, $sp, 112 # 16-byte Folded Reload - ld.d $a2, $sp, 184 # 8-byte Folded Reload + vld $vr0, $sp, 128 # 16-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload .p2align 4, , 16 .LBB1_227: # %vector.body1298 # Parent Loop BB1_226 Depth=1 @@ -3222,72 +3217,72 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun slli.d $a0, $s8, 2 lu12i.w $a1, 523264 stx.w $a1, $a2, $a0 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 352 - vst $vr0, $sp, 368 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 360 + vst $vr0, $sp, 376 beqz $a3, .LBB1_231 # %bb.229: # in Loop: Header=BB1_226 Depth=1 .Ltmp527: # EH_LABEL - addi.d $a0, $sp, 352 + addi.d $a0, $sp, 360 ori $a2, $zero, 2 move $a1, $s1 jirl $ra, $a3, 0 .Ltmp528: # EH_LABEL # %bb.230: # in Loop: Header=BB1_226 Depth=1 vld $vr0, $s1, 16 - vst $vr0, $sp, 368 + vst $vr0, $sp, 376 .LBB1_231: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit485 # in Loop: Header=BB1_226 Depth=1 ld.d $a3, $s0, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 336 - vst $vr0, $sp, 320 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 344 + vst $vr0, $sp, 328 beqz $a3, .LBB1_234 # %bb.232: # in Loop: Header=BB1_226 Depth=1 .Ltmp533: # EH_LABEL - addi.d $a0, $sp, 320 + addi.d $a0, $sp, 328 ori $a2, $zero, 2 move $a1, $s0 jirl $ra, $a3, 0 .Ltmp534: # EH_LABEL # %bb.233: # in Loop: Header=BB1_226 Depth=1 vld $vr0, $s0, 16 - vst $vr0, $sp, 336 + vst $vr0, $sp, 344 .LBB1_234: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit491 # in Loop: Header=BB1_226 Depth=1 .Ltmp539: # EH_LABEL - addi.d $a0, $sp, 352 - addi.d $a1, $sp, 320 - ld.d $a2, $sp, 184 # 8-byte Folded Reload - ld.d $a3, $sp, 96 # 8-byte Folded Reload + addi.d $a0, $sp, 360 + addi.d $a1, $sp, 328 + ld.d $a2, $sp, 192 # 8-byte Folded Reload + ld.d $a3, $sp, 112 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZL5checkIfEvSt8functionIFT_PS1_jEES4_PfjPKc) jirl $ra, $ra, 0 .Ltmp540: # EH_LABEL # %bb.235: # in Loop: Header=BB1_226 Depth=1 - ld.d $a3, $sp, 336 + ld.d $a3, $sp, 344 beqz $a3, .LBB1_237 # %bb.236: # in Loop: Header=BB1_226 Depth=1 .Ltmp548: # EH_LABEL - addi.d $a0, $sp, 320 - addi.d $a1, $sp, 320 + addi.d $a0, $sp, 328 + addi.d $a1, $sp, 328 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp549: # EH_LABEL .LBB1_237: # %_ZNSt14_Function_baseD2Ev.exit493 # in Loop: Header=BB1_226 Depth=1 - ld.d $a3, $sp, 368 + ld.d $a3, $sp, 376 beqz $a3, .LBB1_239 # %bb.238: # in Loop: Header=BB1_226 Depth=1 .Ltmp551: # EH_LABEL - addi.d $a0, $sp, 352 - addi.d $a1, $sp, 352 + addi.d $a0, $sp, 360 + addi.d $a1, $sp, 360 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp552: # EH_LABEL .LBB1_239: # %_ZNSt14_Function_baseD2Ev.exit495.preheader # in Loop: Header=BB1_226 Depth=1 ori $fp, $zero, 1 - st.d $s4, $sp, 152 # 8-byte Folded Spill + st.d $s4, $sp, 168 # 8-byte Folded Spill b .LBB1_241 .p2align 4, , 16 .LBB1_240: # %_ZNSt14_Function_baseD2Ev.exit535 @@ -3301,18 +3296,18 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun add.d $a0, $s8, $fp bltu $s7, $a0, .LBB1_243 # %bb.242: # in Loop: Header=BB1_241 Depth=2 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload lu12i.w $a1, 523264 stx.w $a1, $a0, $s4 .LBB1_243: # in Loop: Header=BB1_241 Depth=2 ld.d $a3, $s1, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 304 - vst $vr0, $sp, 288 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 312 + vst $vr0, $sp, 296 beqz $a3, .LBB1_246 # %bb.244: # in Loop: Header=BB1_241 Depth=2 .Ltmp554: # EH_LABEL - addi.d $a0, $sp, 288 + addi.d $a0, $sp, 296 ori $a2, $zero, 2 move $a1, $s1 jirl $ra, $a3, 0 @@ -3320,7 +3315,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # %bb.245: # in Loop: Header=BB1_241 Depth=2 vld $vr0, $s1, 16 ld.d $a0, $s1, 16 - vst $vr0, $sp, 304 + vst $vr0, $sp, 312 b .LBB1_247 .p2align 4, , 16 .LBB1_246: # in Loop: Header=BB1_241 Depth=2 @@ -3328,51 +3323,51 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .LBB1_247: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit505 # in Loop: Header=BB1_241 Depth=2 ld.d $a3, $s0, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 272 - vst $vr0, $sp, 256 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 280 + vst $vr0, $sp, 264 beqz $a3, .LBB1_250 # %bb.248: # in Loop: Header=BB1_241 Depth=2 .Ltmp560: # EH_LABEL - addi.d $a0, $sp, 256 + addi.d $a0, $sp, 264 ori $a2, $zero, 2 move $a1, $s0 jirl $ra, $a3, 0 .Ltmp561: # EH_LABEL # %bb.249: # in Loop: Header=BB1_241 Depth=2 vld $vr0, $s0, 16 - ld.d $a0, $sp, 304 - vst $vr0, $sp, 272 + ld.d $a0, $sp, 312 + vst $vr0, $sp, 280 .LBB1_250: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit511 # in Loop: Header=BB1_241 Depth=2 - ld.d $a1, $sp, 184 # 8-byte Folded Reload - st.d $a1, $sp, 1096 - st.w $s6, $sp, 1092 + ld.d $a1, $sp, 192 # 8-byte Folded Reload + st.d $a1, $sp, 1104 + st.w $s6, $sp, 1100 beqz $a0, .LBB1_315 # %bb.251: # %_ZNKSt8functionIFfPfjEEclES0_j.exit.i513 # in Loop: Header=BB1_241 Depth=2 - ld.d $a3, $sp, 312 + ld.d $a3, $sp, 320 .Ltmp566: # EH_LABEL - addi.d $a0, $sp, 288 - addi.d $a1, $sp, 1096 - addi.d $a2, $sp, 1092 + addi.d $a0, $sp, 296 + addi.d $a1, $sp, 1104 + addi.d $a2, $sp, 1100 jirl $ra, $a3, 0 .Ltmp567: # EH_LABEL # %bb.252: # %.noexc521 # in Loop: Header=BB1_241 Depth=2 - ld.d $a0, $sp, 272 - ld.d $a1, $sp, 184 # 8-byte Folded Reload - st.d $a1, $sp, 1096 - st.w $s6, $sp, 1092 + ld.d $a0, $sp, 280 + ld.d $a1, $sp, 192 # 8-byte Folded Reload + st.d $a1, $sp, 1104 + st.w $s6, $sp, 1100 beqz $a0, .LBB1_315 # %bb.253: # %_ZNKSt8functionIFfPfjEEclES0_j.exit9.i515 # in Loop: Header=BB1_241 Depth=2 fmov.s $fs1, $fa0 - ld.d $a3, $sp, 280 + ld.d $a3, $sp, 288 .Ltmp568: # EH_LABEL - addi.d $a0, $sp, 256 - addi.d $a1, $sp, 1096 - addi.d $a2, $sp, 1092 + addi.d $a0, $sp, 264 + addi.d $a1, $sp, 1104 + addi.d $a2, $sp, 1100 jirl $ra, $a3, 0 .Ltmp569: # EH_LABEL # %bb.254: # %.noexc523 @@ -3398,23 +3393,23 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun bceqz $fcc0, .LBB1_265 .LBB1_259: # %_ZL5checkIfEvSt8functionIFT_PS1_jEES4_PfjPKc.exit531 # in Loop: Header=BB1_241 Depth=2 - ld.d $a3, $sp, 272 + ld.d $a3, $sp, 280 beqz $a3, .LBB1_261 # %bb.260: # in Loop: Header=BB1_241 Depth=2 .Ltmp585: # EH_LABEL - addi.d $a0, $sp, 256 - addi.d $a1, $sp, 256 + addi.d $a0, $sp, 264 + addi.d $a1, $sp, 264 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp586: # EH_LABEL .LBB1_261: # %_ZNSt14_Function_baseD2Ev.exit533 # in Loop: Header=BB1_241 Depth=2 - ld.d $a3, $sp, 304 + ld.d $a3, $sp, 312 beqz $a3, .LBB1_240 # %bb.262: # in Loop: Header=BB1_241 Depth=2 .Ltmp588: # EH_LABEL - addi.d $a0, $sp, 288 - addi.d $a1, $sp, 288 + addi.d $a0, $sp, 296 + addi.d $a1, $sp, 296 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp589: # EH_LABEL @@ -3491,8 +3486,8 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .Ltmp584: # EH_LABEL b .LBB1_307 .LBB1_272: # %.preheader685 - st.d $zero, $sp, 152 # 8-byte Folded Spill - ld.d $a0, $sp, 184 # 8-byte Folded Reload + st.d $zero, $sp, 168 # 8-byte Folded Spill + ld.d $a0, $sp, 192 # 8-byte Folded Reload addi.d $s3, $a0, 4 lu12i.w $s6, -2048 lu32i.d $s6, 0 @@ -3502,11 +3497,11 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun b .LBB1_274 .p2align 4, , 16 .LBB1_273: # in Loop: Header=BB1_274 Depth=1 - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload addi.d $a0, $a0, 1 addi.d $s3, $s3, 4 lu12i.w $s2, 1 - st.d $a0, $sp, 152 # 8-byte Folded Spill + st.d $a0, $sp, 168 # 8-byte Folded Spill ori $a1, $zero, 64 beq $a0, $a1, .LBB1_308 .LBB1_274: # %vector.ph1306 @@ -3514,8 +3509,8 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # Child Loop BB1_275 Depth 2 # Child Loop BB1_278 Depth 2 lu12i.w $a0, -1 - ld.d $a2, $sp, 184 # 8-byte Folded Reload - vld $vr0, $sp, 128 # 16-byte Folded Reload + ld.d $a2, $sp, 192 # 8-byte Folded Reload + vld $vr0, $sp, 144 # 16-byte Folded Reload .p2align 4, , 16 .LBB1_275: # %vector.body1307 # Parent Loop BB1_274 Depth=1 @@ -3528,7 +3523,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # %bb.276: # %.preheader # in Loop: Header=BB1_274 Depth=1 move $fp, $zero - ld.d $a0, $sp, 152 # 8-byte Folded Reload + ld.d $a0, $sp, 168 # 8-byte Folded Reload alsl.d $s2, $a0, $a2, 2 b .LBB1_278 .p2align 4, , 16 @@ -3541,13 +3536,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun ld.d $a3, $s1, 16 st.w $s6, $s2, 0 stx.w $s7, $s3, $fp - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 224 - vst $vr0, $sp, 240 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 232 + vst $vr0, $sp, 248 beqz $a3, .LBB1_281 # %bb.279: # in Loop: Header=BB1_278 Depth=2 .Ltmp591: # EH_LABEL - addi.d $a0, $sp, 224 + addi.d $a0, $sp, 232 ori $a2, $zero, 2 move $a1, $s1 jirl $ra, $a3, 0 @@ -3555,7 +3550,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun # %bb.280: # in Loop: Header=BB1_278 Depth=2 vld $vr0, $s1, 16 ld.d $a0, $s1, 16 - vst $vr0, $sp, 240 + vst $vr0, $sp, 248 b .LBB1_282 .p2align 4, , 16 .LBB1_281: # in Loop: Header=BB1_278 Depth=2 @@ -3563,51 +3558,51 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .LBB1_282: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit546 # in Loop: Header=BB1_278 Depth=2 ld.d $a3, $s0, 16 - vld $vr0, $sp, 160 # 16-byte Folded Reload - vst $vr0, $sp, 208 - vst $vr0, $sp, 192 + vld $vr0, $sp, 176 # 16-byte Folded Reload + vst $vr0, $sp, 216 + vst $vr0, $sp, 200 beqz $a3, .LBB1_285 # %bb.283: # in Loop: Header=BB1_278 Depth=2 .Ltmp597: # EH_LABEL - addi.d $a0, $sp, 192 + addi.d $a0, $sp, 200 ori $a2, $zero, 2 move $a1, $s0 jirl $ra, $a3, 0 .Ltmp598: # EH_LABEL # %bb.284: # in Loop: Header=BB1_278 Depth=2 vld $vr0, $s0, 16 - ld.d $a0, $sp, 240 - vst $vr0, $sp, 208 + ld.d $a0, $sp, 248 + vst $vr0, $sp, 216 .LBB1_285: # %_ZNSt8functionIFfPfjEEC2ERKS2_.exit552 # in Loop: Header=BB1_278 Depth=2 - ld.d $a1, $sp, 184 # 8-byte Folded Reload - st.d $a1, $sp, 1096 - st.w $s8, $sp, 1092 + ld.d $a1, $sp, 192 # 8-byte Folded Reload + st.d $a1, $sp, 1104 + st.w $s8, $sp, 1100 beqz $a0, .LBB1_317 # %bb.286: # %_ZNKSt8functionIFfPfjEEclES0_j.exit.i554 # in Loop: Header=BB1_278 Depth=2 - ld.d $a3, $sp, 248 + ld.d $a3, $sp, 256 .Ltmp603: # EH_LABEL - addi.d $a0, $sp, 224 - addi.d $a1, $sp, 1096 - addi.d $a2, $sp, 1092 + addi.d $a0, $sp, 232 + addi.d $a1, $sp, 1104 + addi.d $a2, $sp, 1100 jirl $ra, $a3, 0 .Ltmp604: # EH_LABEL # %bb.287: # %.noexc562 # in Loop: Header=BB1_278 Depth=2 - ld.d $a0, $sp, 208 - ld.d $a1, $sp, 184 # 8-byte Folded Reload - st.d $a1, $sp, 1096 - st.w $s8, $sp, 1092 + ld.d $a0, $sp, 216 + ld.d $a1, $sp, 192 # 8-byte Folded Reload + st.d $a1, $sp, 1104 + st.w $s8, $sp, 1100 beqz $a0, .LBB1_317 # %bb.288: # %_ZNKSt8functionIFfPfjEEclES0_j.exit9.i556 # in Loop: Header=BB1_278 Depth=2 fmov.s $fs1, $fa0 - ld.d $a3, $sp, 216 + ld.d $a3, $sp, 224 .Ltmp605: # EH_LABEL - addi.d $a0, $sp, 192 - addi.d $a1, $sp, 1096 - addi.d $a2, $sp, 1092 + addi.d $a0, $sp, 200 + addi.d $a1, $sp, 1104 + addi.d $a2, $sp, 1100 jirl $ra, $a3, 0 .Ltmp606: # EH_LABEL # %bb.289: # %.noexc564 @@ -3633,23 +3628,23 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun bceqz $fcc0, .LBB1_300 .LBB1_294: # %_ZL5checkIfEvSt8functionIFT_PS1_jEES4_PfjPKc.exit572 # in Loop: Header=BB1_278 Depth=2 - ld.d $a3, $sp, 208 + ld.d $a3, $sp, 216 beqz $a3, .LBB1_296 # %bb.295: # in Loop: Header=BB1_278 Depth=2 .Ltmp622: # EH_LABEL - addi.d $a0, $sp, 192 - addi.d $a1, $sp, 192 + addi.d $a0, $sp, 200 + addi.d $a1, $sp, 200 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp623: # EH_LABEL .LBB1_296: # %_ZNSt14_Function_baseD2Ev.exit574 # in Loop: Header=BB1_278 Depth=2 - ld.d $a3, $sp, 240 + ld.d $a3, $sp, 248 beqz $a3, .LBB1_277 # %bb.297: # in Loop: Header=BB1_278 Depth=2 .Ltmp625: # EH_LABEL - addi.d $a0, $sp, 224 - addi.d $a1, $sp, 224 + addi.d $a0, $sp, 232 + addi.d $a1, $sp, 232 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp626: # EH_LABEL @@ -3729,24 +3724,25 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun pcaddu18i $ra, %call36(exit) jirl $ra, $ra, 0 .LBB1_308: # %_ZNSt10unique_ptrIA_fSt14default_deleteIS0_EED2Ev.exit - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZdaPv) jirl $ra, $ra, 0 - fld.d $fs2, $sp, 1104 # 8-byte Folded Reload - fld.d $fs1, $sp, 1112 # 8-byte Folded Reload - fld.d $fs0, $sp, 1120 # 8-byte Folded Reload - ld.d $s8, $sp, 1128 # 8-byte Folded Reload - ld.d $s7, $sp, 1136 # 8-byte Folded Reload - ld.d $s6, $sp, 1144 # 8-byte Folded Reload - ld.d $s5, $sp, 1152 # 8-byte Folded Reload - ld.d $s4, $sp, 1160 # 8-byte Folded Reload - ld.d $s3, $sp, 1168 # 8-byte Folded Reload - ld.d $s2, $sp, 1176 # 8-byte Folded Reload - ld.d $s1, $sp, 1184 # 8-byte Folded Reload - ld.d $s0, $sp, 1192 # 8-byte Folded Reload - ld.d $fp, $sp, 1200 # 8-byte Folded Reload - ld.d $ra, $sp, 1208 # 8-byte Folded Reload - addi.d $sp, $sp, 1216 + fld.d $fs3, $sp, 1112 # 8-byte Folded Reload + fld.d $fs2, $sp, 1120 # 8-byte Folded Reload + fld.d $fs1, $sp, 1128 # 8-byte Folded Reload + fld.d $fs0, $sp, 1136 # 8-byte Folded Reload + ld.d $s8, $sp, 1144 # 8-byte Folded Reload + ld.d $s7, $sp, 1152 # 8-byte Folded Reload + ld.d $s6, $sp, 1160 # 8-byte Folded Reload + ld.d $s5, $sp, 1168 # 8-byte Folded Reload + ld.d $s4, $sp, 1176 # 8-byte Folded Reload + ld.d $s3, $sp, 1184 # 8-byte Folded Reload + ld.d $s2, $sp, 1192 # 8-byte Folded Reload + ld.d $s1, $sp, 1200 # 8-byte Folded Reload + ld.d $s0, $sp, 1208 # 8-byte Folded Reload + ld.d $fp, $sp, 1216 # 8-byte Folded Reload + ld.d $ra, $sp, 1224 # 8-byte Folded Reload + addi.d $sp, $sp, 1232 ret .LBB1_309: # %.invoke .Ltmp664: # EH_LABEL @@ -3788,13 +3784,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_321: .Ltmp343: # EH_LABEL - ld.d $a3, $sp, 720 + ld.d $a3, $sp, 728 move $s0, $a0 beqz $a3, .LBB1_369 # %bb.322: .Ltmp344: # EH_LABEL - addi.d $a0, $sp, 704 - addi.d $a1, $sp, 704 + addi.d $a0, $sp, 712 + addi.d $a1, $sp, 712 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp345: # EH_LABEL @@ -3805,13 +3801,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_324: .Ltmp337: # EH_LABEL - ld.d $a3, $sp, 752 + ld.d $a3, $sp, 760 move $s0, $a0 beqz $a3, .LBB1_524 # %bb.325: .Ltmp338: # EH_LABEL - addi.d $a0, $sp, 736 - addi.d $a1, $sp, 736 + addi.d $a0, $sp, 744 + addi.d $a1, $sp, 744 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp339: # EH_LABEL @@ -3830,13 +3826,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_329: .Ltmp316: # EH_LABEL - ld.d $a3, $sp, 784 + ld.d $a3, $sp, 792 move $s0, $a0 beqz $a3, .LBB1_375 # %bb.330: .Ltmp317: # EH_LABEL - addi.d $a0, $sp, 768 - addi.d $a1, $sp, 768 + addi.d $a0, $sp, 776 + addi.d $a1, $sp, 776 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp318: # EH_LABEL @@ -3847,13 +3843,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_332: .Ltmp310: # EH_LABEL - ld.d $a3, $sp, 816 + ld.d $a3, $sp, 824 move $s0, $a0 beqz $a3, .LBB1_524 # %bb.333: .Ltmp311: # EH_LABEL - addi.d $a0, $sp, 800 - addi.d $a1, $sp, 800 + addi.d $a0, $sp, 808 + addi.d $a1, $sp, 808 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp312: # EH_LABEL @@ -3872,13 +3868,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_337: .Ltmp289: # EH_LABEL - ld.d $a3, $sp, 848 + ld.d $a3, $sp, 856 move $s0, $a0 beqz $a3, .LBB1_381 # %bb.338: .Ltmp290: # EH_LABEL - addi.d $a0, $sp, 832 - addi.d $a1, $sp, 832 + addi.d $a0, $sp, 840 + addi.d $a1, $sp, 840 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp291: # EH_LABEL @@ -3889,13 +3885,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_340: .Ltmp283: # EH_LABEL - ld.d $a3, $sp, 880 + ld.d $a3, $sp, 888 move $s0, $a0 beqz $a3, .LBB1_524 # %bb.341: .Ltmp284: # EH_LABEL - addi.d $a0, $sp, 864 - addi.d $a1, $sp, 864 + addi.d $a0, $sp, 872 + addi.d $a1, $sp, 872 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp285: # EH_LABEL @@ -3914,13 +3910,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_345: .Ltmp262: # EH_LABEL - ld.d $a3, $sp, 912 + ld.d $a3, $sp, 920 move $s0, $a0 beqz $a3, .LBB1_387 # %bb.346: .Ltmp263: # EH_LABEL - addi.d $a0, $sp, 896 - addi.d $a1, $sp, 896 + addi.d $a0, $sp, 904 + addi.d $a1, $sp, 904 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp264: # EH_LABEL @@ -3931,13 +3927,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_348: .Ltmp256: # EH_LABEL - ld.d $a3, $sp, 944 + ld.d $a3, $sp, 952 move $s0, $a0 beqz $a3, .LBB1_524 # %bb.349: .Ltmp257: # EH_LABEL - addi.d $a0, $sp, 928 - addi.d $a1, $sp, 928 + addi.d $a0, $sp, 936 + addi.d $a1, $sp, 936 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp258: # EH_LABEL @@ -3956,13 +3952,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_353: .Ltmp235: # EH_LABEL - ld.d $a3, $sp, 976 + ld.d $a3, $sp, 984 move $s0, $a0 beqz $a3, .LBB1_393 # %bb.354: .Ltmp236: # EH_LABEL - addi.d $a0, $sp, 960 - addi.d $a1, $sp, 960 + addi.d $a0, $sp, 968 + addi.d $a1, $sp, 968 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp237: # EH_LABEL @@ -3973,13 +3969,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_356: .Ltmp229: # EH_LABEL - ld.d $a3, $sp, 1008 + ld.d $a3, $sp, 1016 move $s0, $a0 beqz $a3, .LBB1_524 # %bb.357: .Ltmp230: # EH_LABEL - addi.d $a0, $sp, 992 - addi.d $a1, $sp, 992 + addi.d $a0, $sp, 1000 + addi.d $a1, $sp, 1000 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp231: # EH_LABEL @@ -3998,13 +3994,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_361: .Ltmp208: # EH_LABEL - ld.d $a3, $sp, 1040 + ld.d $a3, $sp, 1048 move $s0, $a0 beqz $a3, .LBB1_399 # %bb.362: .Ltmp209: # EH_LABEL - addi.d $a0, $sp, 1024 - addi.d $a1, $sp, 1024 + addi.d $a0, $sp, 1032 + addi.d $a1, $sp, 1032 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp210: # EH_LABEL @@ -4015,13 +4011,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_364: .Ltmp202: # EH_LABEL - ld.d $a3, $sp, 1072 + ld.d $a3, $sp, 1080 move $s0, $a0 beqz $a3, .LBB1_524 # %bb.365: .Ltmp203: # EH_LABEL - addi.d $a0, $sp, 1056 - addi.d $a1, $sp, 1056 + addi.d $a0, $sp, 1064 + addi.d $a1, $sp, 1064 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp204: # EH_LABEL @@ -4032,23 +4028,23 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_367: .Ltmp349: # EH_LABEL - ld.d $a3, $sp, 720 + ld.d $a3, $sp, 728 move $s0, $a0 beqz $a3, .LBB1_369 # %bb.368: .Ltmp350: # EH_LABEL - addi.d $a0, $sp, 704 - addi.d $a1, $sp, 704 + addi.d $a0, $sp, 712 + addi.d $a1, $sp, 712 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp351: # EH_LABEL .LBB1_369: # %.body317 - ld.d $a3, $sp, 752 + ld.d $a3, $sp, 760 beqz $a3, .LBB1_524 # %bb.370: .Ltmp353: # EH_LABEL - addi.d $a0, $sp, 736 - addi.d $a1, $sp, 736 + addi.d $a0, $sp, 744 + addi.d $a1, $sp, 744 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp354: # EH_LABEL @@ -4063,23 +4059,23 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_373: .Ltmp322: # EH_LABEL - ld.d $a3, $sp, 784 + ld.d $a3, $sp, 792 move $s0, $a0 beqz $a3, .LBB1_375 # %bb.374: .Ltmp323: # EH_LABEL - addi.d $a0, $sp, 768 - addi.d $a1, $sp, 768 + addi.d $a0, $sp, 776 + addi.d $a1, $sp, 776 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp324: # EH_LABEL .LBB1_375: # %.body297 - ld.d $a3, $sp, 816 + ld.d $a3, $sp, 824 beqz $a3, .LBB1_524 # %bb.376: .Ltmp326: # EH_LABEL - addi.d $a0, $sp, 800 - addi.d $a1, $sp, 800 + addi.d $a0, $sp, 808 + addi.d $a1, $sp, 808 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp327: # EH_LABEL @@ -4094,23 +4090,23 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_379: .Ltmp295: # EH_LABEL - ld.d $a3, $sp, 848 + ld.d $a3, $sp, 856 move $s0, $a0 beqz $a3, .LBB1_381 # %bb.380: .Ltmp296: # EH_LABEL - addi.d $a0, $sp, 832 - addi.d $a1, $sp, 832 + addi.d $a0, $sp, 840 + addi.d $a1, $sp, 840 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp297: # EH_LABEL .LBB1_381: # %.body277 - ld.d $a3, $sp, 880 + ld.d $a3, $sp, 888 beqz $a3, .LBB1_524 # %bb.382: .Ltmp299: # EH_LABEL - addi.d $a0, $sp, 864 - addi.d $a1, $sp, 864 + addi.d $a0, $sp, 872 + addi.d $a1, $sp, 872 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp300: # EH_LABEL @@ -4125,23 +4121,23 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_385: .Ltmp268: # EH_LABEL - ld.d $a3, $sp, 912 + ld.d $a3, $sp, 920 move $s0, $a0 beqz $a3, .LBB1_387 # %bb.386: .Ltmp269: # EH_LABEL - addi.d $a0, $sp, 896 - addi.d $a1, $sp, 896 + addi.d $a0, $sp, 904 + addi.d $a1, $sp, 904 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp270: # EH_LABEL .LBB1_387: # %.body257 - ld.d $a3, $sp, 944 + ld.d $a3, $sp, 952 beqz $a3, .LBB1_524 # %bb.388: .Ltmp272: # EH_LABEL - addi.d $a0, $sp, 928 - addi.d $a1, $sp, 928 + addi.d $a0, $sp, 936 + addi.d $a1, $sp, 936 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp273: # EH_LABEL @@ -4156,23 +4152,23 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_391: .Ltmp241: # EH_LABEL - ld.d $a3, $sp, 976 + ld.d $a3, $sp, 984 move $s0, $a0 beqz $a3, .LBB1_393 # %bb.392: .Ltmp242: # EH_LABEL - addi.d $a0, $sp, 960 - addi.d $a1, $sp, 960 + addi.d $a0, $sp, 968 + addi.d $a1, $sp, 968 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp243: # EH_LABEL .LBB1_393: # %.body233 - ld.d $a3, $sp, 1008 + ld.d $a3, $sp, 1016 beqz $a3, .LBB1_524 # %bb.394: .Ltmp245: # EH_LABEL - addi.d $a0, $sp, 992 - addi.d $a1, $sp, 992 + addi.d $a0, $sp, 1000 + addi.d $a1, $sp, 1000 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp246: # EH_LABEL @@ -4187,23 +4183,23 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_397: .Ltmp214: # EH_LABEL - ld.d $a3, $sp, 1040 + ld.d $a3, $sp, 1048 move $s0, $a0 beqz $a3, .LBB1_399 # %bb.398: .Ltmp215: # EH_LABEL - addi.d $a0, $sp, 1024 - addi.d $a1, $sp, 1024 + addi.d $a0, $sp, 1032 + addi.d $a1, $sp, 1032 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp216: # EH_LABEL .LBB1_399: # %.body218 - ld.d $a3, $sp, 1072 + ld.d $a3, $sp, 1080 beqz $a3, .LBB1_524 # %bb.400: .Ltmp218: # EH_LABEL - addi.d $a0, $sp, 1056 - addi.d $a1, $sp, 1056 + addi.d $a0, $sp, 1064 + addi.d $a1, $sp, 1064 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp219: # EH_LABEL @@ -4219,7 +4215,7 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .LBB1_403: .Ltmp199: # EH_LABEL move $s0, $a0 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZdaPv) jirl $ra, $ra, 0 move $a0, $s0 @@ -4227,13 +4223,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_404: .Ltmp529: # EH_LABEL - ld.d $a3, $sp, 368 + ld.d $a3, $sp, 376 move $s0, $a0 beqz $a3, .LBB1_524 # %bb.405: .Ltmp530: # EH_LABEL - addi.d $a0, $sp, 352 - addi.d $a1, $sp, 352 + addi.d $a0, $sp, 360 + addi.d $a1, $sp, 360 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp531: # EH_LABEL @@ -4248,13 +4244,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_408: .Ltmp535: # EH_LABEL - ld.d $a3, $sp, 336 + ld.d $a3, $sp, 344 move $s0, $a0 beqz $a3, .LBB1_430 # %bb.409: .Ltmp536: # EH_LABEL - addi.d $a0, $sp, 320 - addi.d $a1, $sp, 320 + addi.d $a0, $sp, 328 + addi.d $a1, $sp, 328 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp537: # EH_LABEL @@ -4269,13 +4265,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_412: .Ltmp465: # EH_LABEL - ld.d $a3, $sp, 496 + ld.d $a3, $sp, 504 move $s0, $a0 beqz $a3, .LBB1_524 # %bb.413: .Ltmp466: # EH_LABEL - addi.d $a0, $sp, 480 - addi.d $a1, $sp, 480 + addi.d $a0, $sp, 488 + addi.d $a1, $sp, 488 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp467: # EH_LABEL @@ -4290,13 +4286,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_416: .Ltmp471: # EH_LABEL - ld.d $a3, $sp, 464 + ld.d $a3, $sp, 472 move $s0, $a0 beqz $a3, .LBB1_436 # %bb.417: .Ltmp472: # EH_LABEL - addi.d $a0, $sp, 448 - addi.d $a1, $sp, 448 + addi.d $a0, $sp, 456 + addi.d $a1, $sp, 456 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp473: # EH_LABEL @@ -4315,13 +4311,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_421: .Ltmp364: # EH_LABEL - ld.d $a3, $sp, 688 + ld.d $a3, $sp, 696 move $s0, $a0 beqz $a3, .LBB1_524 # %bb.422: .Ltmp365: # EH_LABEL - addi.d $a0, $sp, 672 - addi.d $a1, $sp, 672 + addi.d $a0, $sp, 680 + addi.d $a1, $sp, 680 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp366: # EH_LABEL @@ -4336,13 +4332,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_425: .Ltmp370: # EH_LABEL - ld.d $a3, $sp, 656 + ld.d $a3, $sp, 664 move $s0, $a0 beqz $a3, .LBB1_442 # %bb.426: .Ltmp371: # EH_LABEL - addi.d $a0, $sp, 640 - addi.d $a1, $sp, 640 + addi.d $a0, $sp, 648 + addi.d $a1, $sp, 648 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp372: # EH_LABEL @@ -4353,23 +4349,23 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_428: .Ltmp541: # EH_LABEL - ld.d $a3, $sp, 336 + ld.d $a3, $sp, 344 move $s0, $a0 beqz $a3, .LBB1_430 # %bb.429: .Ltmp542: # EH_LABEL - addi.d $a0, $sp, 320 - addi.d $a1, $sp, 320 + addi.d $a0, $sp, 328 + addi.d $a1, $sp, 328 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp543: # EH_LABEL .LBB1_430: # %.body489 - ld.d $a3, $sp, 368 + ld.d $a3, $sp, 376 beqz $a3, .LBB1_524 # %bb.431: .Ltmp545: # EH_LABEL - addi.d $a0, $sp, 352 - addi.d $a1, $sp, 352 + addi.d $a0, $sp, 360 + addi.d $a1, $sp, 360 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp546: # EH_LABEL @@ -4384,23 +4380,23 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_434: .Ltmp477: # EH_LABEL - ld.d $a3, $sp, 464 + ld.d $a3, $sp, 472 move $s0, $a0 beqz $a3, .LBB1_436 # %bb.435: .Ltmp478: # EH_LABEL - addi.d $a0, $sp, 448 - addi.d $a1, $sp, 448 + addi.d $a0, $sp, 456 + addi.d $a1, $sp, 456 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp479: # EH_LABEL .LBB1_436: # %.body429 - ld.d $a3, $sp, 496 + ld.d $a3, $sp, 504 beqz $a3, .LBB1_524 # %bb.437: .Ltmp481: # EH_LABEL - addi.d $a0, $sp, 480 - addi.d $a1, $sp, 480 + addi.d $a0, $sp, 488 + addi.d $a1, $sp, 488 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp482: # EH_LABEL @@ -4415,23 +4411,23 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_440: .Ltmp376: # EH_LABEL - ld.d $a3, $sp, 656 + ld.d $a3, $sp, 664 move $s0, $a0 beqz $a3, .LBB1_442 # %bb.441: .Ltmp377: # EH_LABEL - addi.d $a0, $sp, 640 - addi.d $a1, $sp, 640 + addi.d $a0, $sp, 648 + addi.d $a1, $sp, 648 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp378: # EH_LABEL .LBB1_442: # %.body337 - ld.d $a3, $sp, 688 + ld.d $a3, $sp, 696 beqz $a3, .LBB1_524 # %bb.443: .Ltmp380: # EH_LABEL - addi.d $a0, $sp, 672 - addi.d $a1, $sp, 672 + addi.d $a0, $sp, 680 + addi.d $a1, $sp, 680 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp381: # EH_LABEL @@ -4450,13 +4446,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_447: .Ltmp593: # EH_LABEL - ld.d $a3, $sp, 240 + ld.d $a3, $sp, 248 move $s0, $a0 beqz $a3, .LBB1_524 # %bb.448: .Ltmp594: # EH_LABEL - addi.d $a0, $sp, 224 - addi.d $a1, $sp, 224 + addi.d $a0, $sp, 232 + addi.d $a1, $sp, 232 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp595: # EH_LABEL @@ -4471,13 +4467,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_451: .Ltmp599: # EH_LABEL - ld.d $a3, $sp, 208 + ld.d $a3, $sp, 216 move $s0, $a0 beqz $a3, .LBB1_494 # %bb.452: .Ltmp600: # EH_LABEL - addi.d $a0, $sp, 192 - addi.d $a1, $sp, 192 + addi.d $a0, $sp, 200 + addi.d $a1, $sp, 200 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp601: # EH_LABEL @@ -4492,13 +4488,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_455: .Ltmp556: # EH_LABEL - ld.d $a3, $sp, 304 + ld.d $a3, $sp, 312 move $s0, $a0 beqz $a3, .LBB1_524 # %bb.456: .Ltmp557: # EH_LABEL - addi.d $a0, $sp, 288 - addi.d $a1, $sp, 288 + addi.d $a0, $sp, 296 + addi.d $a1, $sp, 296 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp558: # EH_LABEL @@ -4513,13 +4509,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_459: .Ltmp562: # EH_LABEL - ld.d $a3, $sp, 272 + ld.d $a3, $sp, 280 move $s0, $a0 beqz $a3, .LBB1_501 # %bb.460: .Ltmp563: # EH_LABEL - addi.d $a0, $sp, 256 - addi.d $a1, $sp, 256 + addi.d $a0, $sp, 264 + addi.d $a1, $sp, 264 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp564: # EH_LABEL @@ -4534,13 +4530,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_463: .Ltmp492: # EH_LABEL - ld.d $a3, $sp, 432 + ld.d $a3, $sp, 440 move $s0, $a0 beqz $a3, .LBB1_524 # %bb.464: .Ltmp493: # EH_LABEL - addi.d $a0, $sp, 416 - addi.d $a1, $sp, 416 + addi.d $a0, $sp, 424 + addi.d $a1, $sp, 424 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp494: # EH_LABEL @@ -4555,13 +4551,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_467: .Ltmp498: # EH_LABEL - ld.d $a3, $sp, 400 + ld.d $a3, $sp, 408 move $s0, $a0 beqz $a3, .LBB1_508 # %bb.468: .Ltmp499: # EH_LABEL - addi.d $a0, $sp, 384 - addi.d $a1, $sp, 384 + addi.d $a0, $sp, 392 + addi.d $a1, $sp, 392 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp500: # EH_LABEL @@ -4576,13 +4572,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_471: .Ltmp428: # EH_LABEL - ld.d $a3, $sp, 560 + ld.d $a3, $sp, 568 move $s0, $a0 beqz $a3, .LBB1_524 # %bb.472: .Ltmp429: # EH_LABEL - addi.d $a0, $sp, 544 - addi.d $a1, $sp, 544 + addi.d $a0, $sp, 552 + addi.d $a1, $sp, 552 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp430: # EH_LABEL @@ -4597,13 +4593,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_475: .Ltmp434: # EH_LABEL - ld.d $a3, $sp, 528 + ld.d $a3, $sp, 536 move $s0, $a0 beqz $a3, .LBB1_515 # %bb.476: .Ltmp435: # EH_LABEL - addi.d $a0, $sp, 512 - addi.d $a1, $sp, 512 + addi.d $a0, $sp, 520 + addi.d $a1, $sp, 520 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp436: # EH_LABEL @@ -4618,13 +4614,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_479: .Ltmp391: # EH_LABEL - ld.d $a3, $sp, 624 + ld.d $a3, $sp, 632 move $s0, $a0 beqz $a3, .LBB1_524 # %bb.480: .Ltmp392: # EH_LABEL - addi.d $a0, $sp, 608 - addi.d $a1, $sp, 608 + addi.d $a0, $sp, 616 + addi.d $a1, $sp, 616 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp393: # EH_LABEL @@ -4639,13 +4635,13 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun jirl $ra, $ra, 0 .LBB1_483: .Ltmp397: # EH_LABEL - ld.d $a3, $sp, 592 + ld.d $a3, $sp, 600 move $s0, $a0 beqz $a3, .LBB1_522 # %bb.484: .Ltmp398: # EH_LABEL - addi.d $a0, $sp, 576 - addi.d $a1, $sp, 576 + addi.d $a0, $sp, 584 + addi.d $a1, $sp, 584 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp399: # EH_LABEL @@ -4673,22 +4669,22 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .Ltmp630: # EH_LABEL .LBB1_492: move $s0, $a0 - ld.d $a3, $sp, 208 + ld.d $a3, $sp, 216 beqz $a3, .LBB1_494 # %bb.493: .Ltmp631: # EH_LABEL - addi.d $a0, $sp, 192 - addi.d $a1, $sp, 192 + addi.d $a0, $sp, 200 + addi.d $a1, $sp, 200 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp632: # EH_LABEL .LBB1_494: # %.body550 - ld.d $a3, $sp, 240 + ld.d $a3, $sp, 248 beqz $a3, .LBB1_524 # %bb.495: .Ltmp634: # EH_LABEL - addi.d $a0, $sp, 224 - addi.d $a1, $sp, 224 + addi.d $a0, $sp, 232 + addi.d $a1, $sp, 232 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp635: # EH_LABEL @@ -4705,22 +4701,22 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .Ltmp639: # EH_LABEL .LBB1_499: move $s0, $a0 - ld.d $a3, $sp, 272 + ld.d $a3, $sp, 280 beqz $a3, .LBB1_501 # %bb.500: .Ltmp640: # EH_LABEL - addi.d $a0, $sp, 256 - addi.d $a1, $sp, 256 + addi.d $a0, $sp, 264 + addi.d $a1, $sp, 264 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp641: # EH_LABEL .LBB1_501: # %.body509 - ld.d $a3, $sp, 304 + ld.d $a3, $sp, 312 beqz $a3, .LBB1_524 # %bb.502: .Ltmp643: # EH_LABEL - addi.d $a0, $sp, 288 - addi.d $a1, $sp, 288 + addi.d $a0, $sp, 296 + addi.d $a1, $sp, 296 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp644: # EH_LABEL @@ -4737,22 +4733,22 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .Ltmp648: # EH_LABEL .LBB1_506: move $s0, $a0 - ld.d $a3, $sp, 400 + ld.d $a3, $sp, 408 beqz $a3, .LBB1_508 # %bb.507: .Ltmp649: # EH_LABEL - addi.d $a0, $sp, 384 - addi.d $a1, $sp, 384 + addi.d $a0, $sp, 392 + addi.d $a1, $sp, 392 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp650: # EH_LABEL .LBB1_508: # %.body449 - ld.d $a3, $sp, 432 + ld.d $a3, $sp, 440 beqz $a3, .LBB1_524 # %bb.509: .Ltmp652: # EH_LABEL - addi.d $a0, $sp, 416 - addi.d $a1, $sp, 416 + addi.d $a0, $sp, 424 + addi.d $a1, $sp, 424 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp653: # EH_LABEL @@ -4769,22 +4765,22 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .Ltmp657: # EH_LABEL .LBB1_513: move $s0, $a0 - ld.d $a3, $sp, 528 + ld.d $a3, $sp, 536 beqz $a3, .LBB1_515 # %bb.514: .Ltmp658: # EH_LABEL - addi.d $a0, $sp, 512 - addi.d $a1, $sp, 512 + addi.d $a0, $sp, 520 + addi.d $a1, $sp, 520 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp659: # EH_LABEL .LBB1_515: # %.body389 - ld.d $a3, $sp, 560 + ld.d $a3, $sp, 568 beqz $a3, .LBB1_524 # %bb.516: .Ltmp661: # EH_LABEL - addi.d $a0, $sp, 544 - addi.d $a1, $sp, 544 + addi.d $a0, $sp, 552 + addi.d $a1, $sp, 552 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp662: # EH_LABEL @@ -4801,27 +4797,27 @@ _ZL19checkVectorFunctionIfEvSt8functionIFT_PS1_jEES4_PKc: # @_ZL19checkVectorFun .Ltmp666: # EH_LABEL .LBB1_520: move $s0, $a0 - ld.d $a3, $sp, 592 + ld.d $a3, $sp, 600 beqz $a3, .LBB1_522 # %bb.521: .Ltmp667: # EH_LABEL - addi.d $a0, $sp, 576 - addi.d $a1, $sp, 576 + addi.d $a0, $sp, 584 + addi.d $a1, $sp, 584 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp668: # EH_LABEL .LBB1_522: # %.body357 - ld.d $a3, $sp, 624 + ld.d $a3, $sp, 632 beqz $a3, .LBB1_524 # %bb.523: .Ltmp670: # EH_LABEL - addi.d $a0, $sp, 608 - addi.d $a1, $sp, 608 + addi.d $a0, $sp, 616 + addi.d $a1, $sp, 616 ori $a2, $zero, 3 jirl $ra, $a3, 0 .Ltmp671: # EH_LABEL .LBB1_524: # %_ZNSt10unique_ptrIA_fSt14default_deleteIS0_EED2Ev.exit583 - ld.d $a0, $sp, 184 # 8-byte Folded Reload + ld.d $a0, $sp, 192 # 8-byte Folded Reload pcaddu18i $ra, %call36(_ZdaPv) jirl $ra, $ra, 0 move $a0, $s0 @@ -6521,19 +6517,14 @@ _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_1E10_M_managerERSt9_Any_dataRKS4_St18_M .size _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_1E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation, .Lfunc_end10-_ZNSt17_Function_handlerIFfPfjEZ4mainE3$_1E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_2E9_M_invokeERKSt9_Any_dataOS0_Oj -.LCPI11_0: - .word 0x00800000 # float 1.17549435E-38 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_2E9_M_invokeERKSt9_Any_dataOS0_Oj .type _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_2E9_M_invokeERKSt9_Any_dataOS0_Oj,@function _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_2E9_M_invokeERKSt9_Any_dataOS0_Oj: # @"_ZNSt17_Function_handlerIFfPfjEZ4mainE3$_2E9_M_invokeERKSt9_Any_dataOS0_Oj" # %bb.0: ld.d $a0, $a1, 0 - pcalau12i $a1, %pc_hi20(.LCPI11_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI11_0) lu12i.w $a1, -1 + lu12i.w $a2, 2048 + movgr2fr.w $fa0, $a2 lu12i.w $a2, 1 .p2align 4, , 16 .LBB11_1: # =>This Inner Loop Header: Depth=1 @@ -6691,19 +6682,14 @@ _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_3E10_M_managerERSt9_Any_dataRKS4_St18_M .size _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_3E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation, .Lfunc_end14-_ZNSt17_Function_handlerIFfPfjEZ4mainE3$_3E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_4E9_M_invokeERKSt9_Any_dataOS0_Oj -.LCPI15_0: - .word 0x00000001 # float 1.40129846E-45 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_4E9_M_invokeERKSt9_Any_dataOS0_Oj .type _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_4E9_M_invokeERKSt9_Any_dataOS0_Oj,@function _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_4E9_M_invokeERKSt9_Any_dataOS0_Oj: # @"_ZNSt17_Function_handlerIFfPfjEZ4mainE3$_4E9_M_invokeERKSt9_Any_dataOS0_Oj" # %bb.0: ld.d $a0, $a1, 0 - pcalau12i $a1, %pc_hi20(.LCPI15_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI15_0) lu12i.w $a1, -1 + ori $a2, $zero, 1 + movgr2fr.w $fa0, $a2 lu12i.w $a2, 1 .p2align 4, , 16 .LBB15_1: # =>This Inner Loop Header: Depth=1 @@ -6860,19 +6846,14 @@ _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_5E10_M_managerERSt9_Any_dataRKS4_St18_M .size _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_5E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation, .Lfunc_end18-_ZNSt17_Function_handlerIFfPfjEZ4mainE3$_5E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_6E9_M_invokeERKSt9_Any_dataOS0_Oj -.LCPI19_0: - .word 0x7fc00000 # float NaN - .text - .p2align 5 + .p2align 5 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_6E9_M_invokeERKSt9_Any_dataOS0_Oj .type _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_6E9_M_invokeERKSt9_Any_dataOS0_Oj,@function _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_6E9_M_invokeERKSt9_Any_dataOS0_Oj: # @"_ZNSt17_Function_handlerIFfPfjEZ4mainE3$_6E9_M_invokeERKSt9_Any_dataOS0_Oj" # %bb.0: ld.d $a0, $a1, 0 - pcalau12i $a1, %pc_hi20(.LCPI19_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI19_0) lu12i.w $a1, -1 + lu12i.w $a2, 523264 + movgr2fr.w $fa0, $a2 lu12i.w $a2, 1 .p2align 4, , 16 .LBB19_1: # =>This Inner Loop Header: Depth=1 @@ -7120,19 +7101,14 @@ _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_9E10_M_managerERSt9_Any_dataRKS4_St18_M .size _ZNSt17_Function_handlerIFfPfjEZ4mainE3$_9E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation, .Lfunc_end26-_ZNSt17_Function_handlerIFfPfjEZ4mainE3$_9E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_10E9_M_invokeERKSt9_Any_dataOS0_Oj -.LCPI27_0: - .word 0x00800000 # float 1.17549435E-38 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_10E9_M_invokeERKSt9_Any_dataOS0_Oj .type _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_10E9_M_invokeERKSt9_Any_dataOS0_Oj,@function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_10E9_M_invokeERKSt9_Any_dataOS0_Oj: # @"_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_10E9_M_invokeERKSt9_Any_dataOS0_Oj" # %bb.0: ld.d $a0, $a1, 0 - pcalau12i $a1, %pc_hi20(.LCPI27_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI27_0) lu12i.w $a1, -1 + lu12i.w $a2, 2048 + movgr2fr.w $fa0, $a2 lu12i.w $a2, 1 .p2align 4, , 16 .LBB27_1: # =>This Inner Loop Header: Depth=1 @@ -7170,19 +7146,14 @@ _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_10E10_M_managerERSt9_Any_dataRKS4_St18_ .size _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_10E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation, .Lfunc_end28-_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_10E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_11E9_M_invokeERKSt9_Any_dataOS0_Oj -.LCPI29_0: - .word 0x00800000 # float 1.17549435E-38 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_11E9_M_invokeERKSt9_Any_dataOS0_Oj .type _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_11E9_M_invokeERKSt9_Any_dataOS0_Oj,@function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_11E9_M_invokeERKSt9_Any_dataOS0_Oj: # @"_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_11E9_M_invokeERKSt9_Any_dataOS0_Oj" # %bb.0: ld.d $a0, $a1, 0 - pcalau12i $a1, %pc_hi20(.LCPI29_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI29_0) lu12i.w $a1, -1 + lu12i.w $a2, 2048 + movgr2fr.w $fa0, $a2 lu12i.w $a2, 1 .p2align 4, , 16 .LBB29_1: # =>This Inner Loop Header: Depth=1 @@ -7220,18 +7191,13 @@ _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_11E10_M_managerERSt9_Any_dataRKS4_St18_ .size _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_11E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation, .Lfunc_end30-_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_11E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_12E9_M_invokeERKSt9_Any_dataOS0_Oj -.LCPI31_0: - .word 0x00000001 # float 1.40129846E-45 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_12E9_M_invokeERKSt9_Any_dataOS0_Oj .type _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_12E9_M_invokeERKSt9_Any_dataOS0_Oj,@function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_12E9_M_invokeERKSt9_Any_dataOS0_Oj: # @"_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_12E9_M_invokeERKSt9_Any_dataOS0_Oj" # %bb.0: ld.d $a0, $a1, 0 - pcalau12i $a1, %pc_hi20(.LCPI31_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI31_0) + ori $a1, $zero, 1 + movgr2fr.w $fa0, $a1 lu12i.w $a1, -2 ori $a1, $a1, 4092 lu12i.w $a2, 1 @@ -7272,18 +7238,13 @@ _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_12E10_M_managerERSt9_Any_dataRKS4_St18_ .size _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_12E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation, .Lfunc_end32-_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_12E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_13E9_M_invokeERKSt9_Any_dataOS0_Oj -.LCPI33_0: - .word 0x00000001 # float 1.40129846E-45 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_13E9_M_invokeERKSt9_Any_dataOS0_Oj .type _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_13E9_M_invokeERKSt9_Any_dataOS0_Oj,@function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_13E9_M_invokeERKSt9_Any_dataOS0_Oj: # @"_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_13E9_M_invokeERKSt9_Any_dataOS0_Oj" # %bb.0: ld.d $a0, $a1, 0 - pcalau12i $a1, %pc_hi20(.LCPI33_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI33_0) + ori $a1, $zero, 1 + movgr2fr.w $fa0, $a1 lu12i.w $a1, -2 ori $a1, $a1, 4092 lu12i.w $a2, 1 @@ -7324,18 +7285,13 @@ _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_13E10_M_managerERSt9_Any_dataRKS4_St18_ .size _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_13E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation, .Lfunc_end34-_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_13E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_14E9_M_invokeERKSt9_Any_dataOS0_Oj -.LCPI35_0: - .word 0x7fc00000 # float NaN - .text - .p2align 5 + .p2align 5 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_14E9_M_invokeERKSt9_Any_dataOS0_Oj .type _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_14E9_M_invokeERKSt9_Any_dataOS0_Oj,@function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_14E9_M_invokeERKSt9_Any_dataOS0_Oj: # @"_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_14E9_M_invokeERKSt9_Any_dataOS0_Oj" # %bb.0: ld.d $a0, $a1, 0 - pcalau12i $a1, %pc_hi20(.LCPI35_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI35_0) + lu12i.w $a1, 523264 + movgr2fr.w $fa0, $a1 lu12i.w $a1, -2 ori $a1, $a1, 4092 lu12i.w $a2, 1 @@ -7376,18 +7332,13 @@ _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_14E10_M_managerERSt9_Any_dataRKS4_St18_ .size _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_14E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation, .Lfunc_end36-_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_14E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_15E9_M_invokeERKSt9_Any_dataOS0_Oj -.LCPI37_0: - .word 0x7fc00000 # float NaN - .text - .p2align 5 + .p2align 5 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_15E9_M_invokeERKSt9_Any_dataOS0_Oj .type _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_15E9_M_invokeERKSt9_Any_dataOS0_Oj,@function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_15E9_M_invokeERKSt9_Any_dataOS0_Oj: # @"_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_15E9_M_invokeERKSt9_Any_dataOS0_Oj" # %bb.0: ld.d $a0, $a1, 0 - pcalau12i $a1, %pc_hi20(.LCPI37_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI37_0) + lu12i.w $a1, 523264 + movgr2fr.w $fa0, $a1 lu12i.w $a1, -2 ori $a1, $a1, 4092 lu12i.w $a2, 1 @@ -7698,19 +7649,14 @@ _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_21E10_M_managerERSt9_Any_dataRKS4_St18_ .size _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_21E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation, .Lfunc_end50-_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_21E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_22E9_M_invokeERKSt9_Any_dataOS0_Oj -.LCPI51_0: - .word 0x00000001 # float 1.40129846E-45 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_22E9_M_invokeERKSt9_Any_dataOS0_Oj .type _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_22E9_M_invokeERKSt9_Any_dataOS0_Oj,@function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_22E9_M_invokeERKSt9_Any_dataOS0_Oj: # @"_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_22E9_M_invokeERKSt9_Any_dataOS0_Oj" # %bb.0: ld.d $a0, $a1, 0 - pcalau12i $a1, %pc_hi20(.LCPI51_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI51_0) lu12i.w $a1, -1 + ori $a2, $zero, 1 + movgr2fr.w $fa0, $a2 lu12i.w $a2, 1 .p2align 4, , 16 .LBB51_1: # =>This Inner Loop Header: Depth=1 @@ -7748,19 +7694,14 @@ _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_22E10_M_managerERSt9_Any_dataRKS4_St18_ .size _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_22E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation, .Lfunc_end52-_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_22E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_23E9_M_invokeERKSt9_Any_dataOS0_Oj -.LCPI53_0: - .word 0x00000001 # float 1.40129846E-45 - .text - .p2align 5 + .p2align 5 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_23E9_M_invokeERKSt9_Any_dataOS0_Oj .type _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_23E9_M_invokeERKSt9_Any_dataOS0_Oj,@function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_23E9_M_invokeERKSt9_Any_dataOS0_Oj: # @"_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_23E9_M_invokeERKSt9_Any_dataOS0_Oj" # %bb.0: ld.d $a0, $a1, 0 - pcalau12i $a1, %pc_hi20(.LCPI53_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI53_0) lu12i.w $a1, -1 + ori $a2, $zero, 1 + movgr2fr.w $fa0, $a2 lu12i.w $a2, 1 .p2align 4, , 16 .LBB53_1: # =>This Inner Loop Header: Depth=1 @@ -7798,19 +7739,14 @@ _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_23E10_M_managerERSt9_Any_dataRKS4_St18_ .size _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_23E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation, .Lfunc_end54-_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_23E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_24E9_M_invokeERKSt9_Any_dataOS0_Oj -.LCPI55_0: - .word 0x7fc00000 # float NaN - .text - .p2align 5 + .p2align 5 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_24E9_M_invokeERKSt9_Any_dataOS0_Oj .type _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_24E9_M_invokeERKSt9_Any_dataOS0_Oj,@function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_24E9_M_invokeERKSt9_Any_dataOS0_Oj: # @"_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_24E9_M_invokeERKSt9_Any_dataOS0_Oj" # %bb.0: ld.d $a0, $a1, 0 - pcalau12i $a1, %pc_hi20(.LCPI55_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI55_0) lu12i.w $a1, -1 + lu12i.w $a2, 523264 + movgr2fr.w $fa0, $a2 lu12i.w $a2, 1 .p2align 4, , 16 .LBB55_1: # =>This Inner Loop Header: Depth=1 @@ -7848,19 +7784,14 @@ _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_24E10_M_managerERSt9_Any_dataRKS4_St18_ .size _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_24E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation, .Lfunc_end56-_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_24E10_M_managerERSt9_Any_dataRKS4_St18_Manager_operation .cfi_endproc # -- End function - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2, 0x0 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_25E9_M_invokeERKSt9_Any_dataOS0_Oj -.LCPI57_0: - .word 0x7fc00000 # float NaN - .text - .p2align 5 + .p2align 5 # -- Begin function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_25E9_M_invokeERKSt9_Any_dataOS0_Oj .type _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_25E9_M_invokeERKSt9_Any_dataOS0_Oj,@function _ZNSt17_Function_handlerIFfPfjEZ4mainE4$_25E9_M_invokeERKSt9_Any_dataOS0_Oj: # @"_ZNSt17_Function_handlerIFfPfjEZ4mainE4$_25E9_M_invokeERKSt9_Any_dataOS0_Oj" # %bb.0: ld.d $a0, $a1, 0 - pcalau12i $a1, %pc_hi20(.LCPI57_0) - fld.s $fa0, $a1, %pc_lo12(.LCPI57_0) lu12i.w $a1, -1 + lu12i.w $a2, 523264 + movgr2fr.w $fa0, $a2 lu12i.w $a2, 1 .p2align 4, , 16 .LBB57_1: # =>This Inner Loop Header: Depth=1 diff --git a/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/gcc-loops.dir/gcc-loops.s b/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/gcc-loops.dir/gcc-loops.s index 8e2e60de..6affcbc0 100644 --- a/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/gcc-loops.dir/gcc-loops.s +++ b/results/SingleSource/UnitTests/Vectorizer/CMakeFiles/gcc-loops.dir/gcc-loops.s @@ -1626,28 +1626,26 @@ _Z11init_memoryPvS_: # @_Z11init_memoryPvS_ .Lfunc_end21: .size _Z11init_memoryPvS_, .Lfunc_end21-_Z11init_memoryPvS_ # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _Z17init_memory_floatPfS_ -.LCPI22_0: - .dword 0x3ff199999999999a # double 1.1000000000000001 - .text - .globl _Z17init_memory_floatPfS_ + .globl _Z17init_memory_floatPfS_ # -- Begin function _Z17init_memory_floatPfS_ .p2align 5 .type _Z17init_memory_floatPfS_,@function _Z17init_memory_floatPfS_: # @_Z17init_memory_floatPfS_ # %bb.0: beq $a0, $a1, .LBB22_3 # %bb.1: # %.lr.ph.preheader - pcalau12i $a2, %pc_hi20(.LCPI22_0) - fld.d $fa0, $a2, %pc_lo12(.LCPI22_0) - vldi $vr1, -1168 + vldi $vr0, -1168 + lu12i.w $a2, -419431 + ori $a2, $a2, 2458 + lu32i.d $a2, 104857 + lu52i.d $a2, $a2, 1023 + movgr2fr.d $fa1, $a2 .p2align 4, , 16 .LBB22_2: # %.lr.ph # =>This Inner Loop Header: Depth=1 - fcvt.d.s $fa1, $fa1 - fmul.d $fa1, $fa1, $fa0 - fcvt.s.d $fa1, $fa1 - fst.s $fa1, $a0, 0 + fcvt.d.s $fa0, $fa0 + fmul.d $fa0, $fa0, $fa1 + fcvt.s.d $fa0, $fa0 + fst.s $fa0, $a0, 0 addi.d $a0, $a0, 4 bne $a0, $a1, .LBB22_2 .LBB22_3: # %._crit_edge @@ -1679,12 +1677,7 @@ _Z13digest_memoryPvS_: # @_Z13digest_memoryPvS_ .Lfunc_end23: .size _Z13digest_memoryPvS_, .Lfunc_end23-_Z13digest_memoryPvS_ # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI24_0: - .dword 0x3ff199999999999a # double 1.1000000000000001 - .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -1932,8 +1925,11 @@ main: # @main vldi $vr1, -1168 pcalau12i $a2, %pc_hi20(fa) addi.d $a2, $a2, %pc_lo12(fa) - pcalau12i $a3, %pc_hi20(.LCPI24_0) - fld.d $fa0, $a3, %pc_lo12(.LCPI24_0) + lu12i.w $a3, -419431 + ori $a3, $a3, 2458 + lu32i.d $a3, 104857 + lu52i.d $a3, $a3, 1023 + movgr2fr.d $fa0, $a3 .p2align 4, , 16 .LBB24_27: # %.lr.ph.i139 # =>This Inner Loop Header: Depth=1 @@ -3551,12 +3547,8 @@ GCC_except_table24: .Lcst_end0: .p2align 2, 0x0 # -- End function - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function _ZN5TimerD2Ev -.LCPI25_0: - .dword 0x408f400000000000 # double 1000 .section .text._ZN5TimerD2Ev,"axG",@progbits,_ZN5TimerD2Ev,comdat - .weak _ZN5TimerD2Ev + .weak _ZN5TimerD2Ev # -- Begin function _ZN5TimerD2Ev .p2align 5 .type _ZN5TimerD2Ev,@function _ZN5TimerD2Ev: # @_ZN5TimerD2Ev @@ -3635,12 +3627,14 @@ _ZN5TimerD2Ev: # @_ZN5TimerD2Ev ori $a2, $zero, 1000 mul.d $a0, $a0, $a2 movgr2fr.d $fa0, $a0 - pcalau12i $a0, %pc_hi20(.LCPI25_0) - fld.d $fa1, $a0, %pc_lo12(.LCPI25_0) ffint.d.l $fa0, $fa0 - movgr2fr.d $fa2, $a1 - ffint.d.l $fa2, $fa2 - fdiv.d $fa1, $fa2, $fa1 + movgr2fr.d $fa1, $a1 + ffint.d.l $fa1, $fa1 + ori $a0, $zero, 0 + lu32i.d $a0, -49152 + lu52i.d $a0, $a0, 1032 + movgr2fr.d $fa2, $a0 + fdiv.d $fa1, $fa1, $fa2 fadd.d $fa0, $fa1, $fa0 vldi $vr1, -928 fadd.d $fa0, $fa0, $fa1 diff --git a/results/tools/CMakeFiles/timeit-target.dir/timeit.s b/results/tools/CMakeFiles/timeit-target.dir/timeit.s index 50f970ad..36875db4 100644 --- a/results/tools/CMakeFiles/timeit-target.dir/timeit.s +++ b/results/tools/CMakeFiles/timeit-target.dir/timeit.s @@ -1,12 +1,6 @@ .file "timeit.c" - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3, 0x0 # -- Begin function main -.LCPI0_0: - .dword 0x3eb0c6f7a0b5ed8d # double 9.9999999999999995E-7 -.LCPI0_1: - .dword 0x412e848000000000 # double 1.0E+6 .text - .globl main + .globl main # -- Begin function main .p2align 5 .type main,@function main: # @main @@ -421,11 +415,14 @@ main: # @main movgr2fr.d $fa0, $s1 ffint.d.l $fs0, $fa0 movgr2fr.d $fa0, $s0 - pcalau12i $a1, %pc_hi20(.LCPI0_0) + ffint.d.l $fs2, $fa0 + lu12i.w $a1, -390306 + ori $a1, $a1, 3469 + lu32i.d $a1, 50935 ld.d $s0, $sp, 128 # 8-byte Folded Reload ld.w $a2, $s0, %pc_lo12(g_timeout_in_seconds) - fld.d $fs1, $a1, %pc_lo12(.LCPI0_0) - ffint.d.l $fs2, $fa0 + lu52i.d $a1, $a1, 1003 + movgr2fr.d $fs1, $a1 pcalau12i $fp, %pc_hi20(g_monitored_pid) st.w $a0, $fp, %pc_lo12(g_monitored_pid) beqz $a2, .LBB0_61 @@ -480,31 +477,33 @@ main: # @main # %bb.66: movgr2fr.d $fa0, $s0 ffint.d.l $fa0, $fa0 - ld.d $a0, $sp, 160 movgr2fr.d $fa1, $fp + ld.d $a0, $sp, 160 ffint.d.l $fa1, $fa1 + ld.d $a1, $sp, 168 fmadd.d $fs1, $fa1, $fs1, $fa0 movgr2fr.d $fa0, $a0 - ld.d $a0, $sp, 168 ffint.d.l $fs2, $fa0 - pcalau12i $a1, %pc_hi20(.LCPI0_1) - fld.d $fa0, $a1, %pc_lo12(.LCPI0_1) - movgr2fr.d $fa1, $a0 - ld.d $a0, $sp, 176 - ffint.d.l $fa1, $fa1 - fdiv.d $fs3, $fa1, $fa0 - ld.d $a1, $sp, 184 + movgr2fr.d $fa0, $a1 + ffint.d.l $fa0, $fa0 + ori $a0, $zero, 0 + lu32i.d $a0, -97152 + lu52i.d $a0, $a0, 1042 + ld.d $a1, $sp, 176 movgr2fr.d $fa1, $a0 + fdiv.d $fs3, $fa0, $fa1 + ld.d $a2, $sp, 184 + movgr2fr.d $fa0, $a1 ld.wu $a0, $sp, 144 - ffint.d.l $fs4, $fa1 - movgr2fr.d $fa1, $a1 - ffint.d.l $fa1, $fa1 + ffint.d.l $fs4, $fa0 + movgr2fr.d $fa0, $a2 + ffint.d.l $fa0, $fa0 andi $a3, $a0, 127 slli.d $a1, $a3, 24 addu16i.d $a1, $a1, 256 addi.w $a1, $a1, 0 lu12i.w $a2, 8192 - fdiv.d $fs6, $fa1, $fa0 + fdiv.d $fs6, $fa0, $fa1 bge $a1, $a2, .LBB0_111 # %bb.67: ori $s3, $zero, 66